diff --git a/Documentation/devicetree/bindings/arm/mali-bifrost.txt b/Documentation/devicetree/bindings/arm/mali-bifrost.txt index 3a3cd8837142..dd8f733ce71b 100644 --- a/Documentation/devicetree/bindings/arm/mali-bifrost.txt +++ b/Documentation/devicetree/bindings/arm/mali-bifrost.txt @@ -1,5 +1,5 @@ # -# (C) COPYRIGHT 2013-2019 ARM Limited. All rights reserved. +# (C) COPYRIGHT 2013-2020 ARM Limited. All rights reserved. # # This program is free software and is provided to you under the terms of the # GNU General Public License version 2 as published by the Free Software @@ -19,13 +19,14 @@ # # -* ARM Mali Midgard devices +* ARM Mali Midgard / Bifrost devices Required properties: - compatible : Should be mali, replacing digits with x from the back, -until malitxx, ending with arm,mali-midgard, the latter not optional. +until malitxx, and it must end with one of: "arm,malit6xx" or +"arm,mali-midgard" or "arm,mali-bifrost" - reg : Physical base address of the device and length of the register area. - interrupts : Contains the three IRQ lines required by T-6xx devices - interrupt-names : Contains the names of IRQ resources in the order they were @@ -107,13 +108,22 @@ for details. model is not found in the registered models list. If no model is specified here, a gpu-id based model is picked if available, otherwise the default model is used. - mali-simple-power-model: Default model used on mali -- protected-mode-switcher : Phandle to device implemented protected mode switching functionality. -Refer to Documentation/devicetree/bindings/arm/smc-protected-mode-switcher.txt for one implementation. - idvs-group-size : Override the IDVS group size value. Tasks are sent to cores in groups of N + 1, so i.e. 0xF means 16 tasks. Valid values are between 0 to 0x3F (including). - l2-size : Override L2 cache size on GPU that supports it - l2-hash : Override L2 hash function on GPU that supports it +- arbiter_if : Phandle to the arbif platform device, used to provide KBASE with an interface + to the Arbiter. This is required when using arbitration; setting to a non-NULL + value will enable arbitration. + If arbitration is in use, then there should be no external GPU control. + When arbiter_if is in use then the following must not be: + - power_model (no IPA allowed with arbitration) + - #cooling-cells + - operating-points-v2 (no dvfs in kbase with arbitration) + - system-coherency with a value of 1 (no full coherency with arbitration) + + Example for a Mali GPU with 1 clock and no regulators: diff --git a/drivers/gpu/arm/bifrost/Kbuild b/drivers/gpu/arm/bifrost/Kbuild index ab12718387a2..443a9b85fdee 100644 --- a/drivers/gpu/arm/bifrost/Kbuild +++ b/drivers/gpu/arm/bifrost/Kbuild @@ -1,5 +1,5 @@ # -# (C) COPYRIGHT 2012-2018 ARM Limited. All rights reserved. +# (C) COPYRIGHT 2012-2020 ARM Limited. All rights reserved. # # This program is free software and is provided to you under the terms of the # GNU General Public License version 2 as published by the Free Software @@ -20,9 +20,12 @@ # # Driver version string which is returned to userspace via an ioctl -MALI_RELEASE_NAME ?= "r21p0-01rel0" +MALI_RELEASE_NAME ?= "r25p1-01bet0" # Paths required for build + +# make $(src) as absolute path if it isn't already, by prefixing $(srctree) +src:=$(if $(patsubst /%,,$(src)),$(srctree)/$(src),$(src)) KBASE_PATH = $(src) KBASE_PLATFORM_PATH = $(KBASE_PATH)/platform_dummy UMP_PATH = $(src)/../../../base @@ -34,6 +37,14 @@ MALI_UNIT_TEST ?= 0 MALI_KERNEL_TEST_API ?= 0 MALI_COVERAGE ?= 0 CONFIG_MALI_PLATFORM_NAME ?= "devicetree" +# Experimental features (corresponding -D definition should be appended to +# DEFINES below, e.g. for MALI_EXPERIMENTAL_FEATURE, +# -DMALI_EXPERIMENTAL_FEATURE=$(MALI_EXPERIMENTAL_FEATURE) should be appended) +# +# Experimental features must default to disabled, e.g.: +# MALI_EXPERIMENTAL_FEATURE ?= 0 +MALI_JIT_PRESSURE_LIMIT ?= 0 +MALI_INCREMENTAL_RENDERING ?= 0 # Set up our defines, which will be passed to gcc DEFINES = \ @@ -42,7 +53,9 @@ DEFINES = \ -DMALI_KERNEL_TEST_API=$(MALI_KERNEL_TEST_API) \ -DMALI_UNIT_TEST=$(MALI_UNIT_TEST) \ -DMALI_COVERAGE=$(MALI_COVERAGE) \ - -DMALI_RELEASE_NAME=\"$(MALI_RELEASE_NAME)\" + -DMALI_RELEASE_NAME=\"$(MALI_RELEASE_NAME)\" \ + -DMALI_JIT_PRESSURE_LIMIT=$(MALI_JIT_PRESSURE_LIMIT) \ + -DMALI_INCREMENTAL_RENDERING=$(MALI_INCREMENTAL_RENDERING) ifeq ($(KBUILD_EXTMOD),) # in-tree @@ -61,21 +74,16 @@ ccflags-y += $(DEFINES) -I$(KBASE_PATH) -I$(KBASE_PLATFORM_PATH) -I$(UMP_PATH) subdir-ccflags-y += $(DEFINES) -I$(KBASE_PATH) -I$(KBASE_PLATFORM_PATH) -I$(UMP_PATH) -I$(srctree)/include/linux SRC := \ - mali_kbase_device.c \ + context/mali_kbase_context.c \ + debug/mali_kbase_debug_ktrace.c \ + device/mali_kbase_device.c \ mali_kbase_cache_policy.c \ mali_kbase_mem.c \ mali_kbase_mem_pool_group.c \ - mali_kbase_mmu.c \ mali_kbase_native_mgm.c \ mali_kbase_ctx_sched.c \ - mali_kbase_jd.c \ - mali_kbase_jd_debugfs.c \ mali_kbase_jm.c \ mali_kbase_gpuprops.c \ - mali_kbase_js.c \ - mali_kbase_js_ctx_attr.c \ - mali_kbase_event.c \ - mali_kbase_context.c \ mali_kbase_pm.c \ mali_kbase_config.c \ mali_kbase_vinstr.c \ @@ -86,38 +94,63 @@ SRC := \ mali_kbase_hwcnt_types.c \ mali_kbase_hwcnt_virtualizer.c \ mali_kbase_softjobs.c \ - mali_kbase_10969_workaround.c \ mali_kbase_hw.c \ mali_kbase_debug.c \ mali_kbase_gpu_memory_debugfs.c \ mali_kbase_mem_linux.c \ mali_kbase_core_linux.c \ mali_kbase_mem_profile_debugfs.c \ - mali_kbase_mmu_mode_lpae.c \ - mali_kbase_mmu_mode_aarch64.c \ + mmu/mali_kbase_mmu.c \ + mmu/mali_kbase_mmu_hw_direct.c \ + mmu/mali_kbase_mmu_mode_lpae.c \ + mmu/mali_kbase_mmu_mode_aarch64.c \ mali_kbase_disjoint_events.c \ mali_kbase_debug_mem_view.c \ - mali_kbase_debug_job_fault.c \ mali_kbase_smc.c \ mali_kbase_mem_pool.c \ mali_kbase_mem_pool_debugfs.c \ mali_kbase_debugfs_helper.c \ - mali_kbase_timeline.c \ - mali_kbase_timeline_io.c \ - mali_kbase_tlstream.c \ - mali_kbase_tracepoints.c \ mali_kbase_strings.c \ mali_kbase_as_fault_debugfs.c \ mali_kbase_regs_history_debugfs.c \ - thirdparty/mali_kbase_mmap.c + thirdparty/mali_kbase_mmap.c \ + tl/mali_kbase_timeline.c \ + tl/mali_kbase_timeline_io.c \ + tl/mali_kbase_tlstream.c \ + tl/mali_kbase_tracepoints.c \ + gpu/mali_kbase_gpu.c +ifeq ($(MALI_USE_CSF),1) + SRC += \ + debug/backend/mali_kbase_debug_ktrace_csf.c \ + device/backend/mali_kbase_device_csf.c \ + gpu/backend/mali_kbase_gpu_fault_csf.c \ + tl/backend/mali_kbase_timeline_csf.c \ + mmu/backend/mali_kbase_mmu_csf.c \ + context/backend/mali_kbase_context_csf.c +else + SRC += \ + mali_kbase_dummy_job_wa.c \ + mali_kbase_debug_job_fault.c \ + mali_kbase_event.c \ + mali_kbase_jd.c \ + mali_kbase_jd_debugfs.c \ + mali_kbase_js.c \ + mali_kbase_js_ctx_attr.c \ + debug/backend/mali_kbase_debug_ktrace_jm.c \ + device/backend/mali_kbase_device_jm.c \ + gpu/backend/mali_kbase_gpu_fault_jm.c \ + tl/backend/mali_kbase_timeline_jm.c \ + mmu/backend/mali_kbase_mmu_jm.c \ + context/backend/mali_kbase_context_jm.c +endif ifeq ($(CONFIG_MALI_CINSTR_GWT),y) SRC += mali_kbase_gwt.c endif ifeq ($(MALI_UNIT_TEST),1) - SRC += mali_kbase_timeline_test.c + SRC += tl/mali_kbase_timeline_test.c endif ifeq ($(MALI_CUSTOMER_RELEASE),0) @@ -125,7 +158,8 @@ ifeq ($(MALI_CUSTOMER_RELEASE),0) endif -ccflags-y += -I$(KBASE_PATH) +ccflags-y += -I$(KBASE_PATH) -I$(KBASE_PATH)/debug \ + -I$(KBASE_PATH)/debug/backend # Tell the Linux build system from which .o file to create the kernel module obj-$(CONFIG_MALI_BIFROST) += bifrost_kbase.o @@ -147,6 +181,14 @@ endif ifeq ($(MALI_USE_CSF),1) include $(src)/csf/Kbuild +else +# empty +endif + +ifeq ($(CONFIG_MALI_ARBITER_SUPPORT),y) + include $(src)/arbiter/Kbuild +else +# empty endif bifrost_kbase-$(CONFIG_MALI_BIFROST_DMA_FENCE) += \ diff --git a/drivers/gpu/arm/bifrost/Kconfig b/drivers/gpu/arm/bifrost/Kconfig index a846b0482074..ccb16671047a 100644 --- a/drivers/gpu/arm/bifrost/Kconfig +++ b/drivers/gpu/arm/bifrost/Kconfig @@ -1,5 +1,5 @@ # -# (C) COPYRIGHT 2012-2019 ARM Limited. All rights reserved. +# (C) COPYRIGHT 2012-2020 ARM Limited. All rights reserved. # # This program is free software and is provided to you under the terms of the # GNU General Public License version 2 as published by the Free Software @@ -49,6 +49,7 @@ config MALI_BIFROST_DVFS config MALI_BIFROST_ENABLE_TRACE bool "Enable kbase tracing" depends on MALI_BIFROST + default y if MALI_BIFROST_DEBUG default n help Enables tracing in kbase. Trace log available through @@ -83,6 +84,17 @@ config MALI_PLATFORM_NAME include in the build. 'platform/$(MALI_PLATFORM_NAME)/Kbuild' must exist. +config MALI_ARBITER_SUPPORT + bool "Enable arbiter support for Mali" + depends on MALI_BIFROST + default n + help + Enable support for the arbiter interface in the driver. + This allows an external arbiter to manage driver access + to GPU hardware in a virtualized environment + + If unsure, say N. + # MALI_BIFROST_EXPERT configuration options menuconfig MALI_BIFROST_EXPERT @@ -155,6 +167,7 @@ config MALI_BIFROST_ERROR_INJECT config MALI_BIFROST_SYSTEM_TRACE bool "Enable system event tracing support" depends on MALI_BIFROST && MALI_BIFROST_EXPERT + default y if MALI_BIFROST_DEBUG default n help Choose this option to enable system trace events for each @@ -209,13 +222,17 @@ config MALI_DMA_BUF_MAP_ON_DEMAND config MALI_DMA_BUF_LEGACY_COMPAT bool "Enable legacy compatibility cache flush on dma-buf map" depends on MALI_BIFROST && !MALI_DMA_BUF_MAP_ON_DEMAND - default y + default n help This option enables compatibility with legacy dma-buf mapping behavior, then the dma-buf is mapped on import, by adding cache maintenance where MALI_DMA_BUF_MAP_ON_DEMAND would do the mapping, including a cache flush. + This option might work-around issues related to missing cache + flushes in other drivers. This only has an effect for clients using + UK 11.18 or older. For later UK versions it is not possible. + config MALI_HW_ERRATA_1485982_NOT_AFFECTED bool "Disable workaround for BASE_HW_ISSUE_GPU2017_1336" depends on MALI_BIFROST && MALI_BIFROST_EXPERT @@ -239,6 +256,14 @@ config MALI_HW_ERRATA_1485982_USE_CLOCK_ALTERNATIVE tree using the property, opp-mali-errata-1485982. Otherwise the slowest clock will be selected. +config MALI_GEM5_BUILD + bool "Enable build of Mali kernel driver for GEM5" + depends on MALI_BIFROST + default n + help + This option is to do a Mali GEM5 build. + If unsure, say N. + # Instrumentation options. config MALI_JOB_DUMP @@ -264,5 +289,20 @@ config MALI_BIFROST_PRFCNT_SET_SECONDARY If unsure, say N. +config MALI_PRFCNT_SET_SECONDARY_VIA_DEBUG_FS + bool "Use secondary set of performance counters" + depends on MALI_BIFROST && MALI_BIFROST_EXPERT && !MALI_BIFROST_PRFCNT_SET_SECONDARY && DEBUG_FS + default n + help + Select this option to make the secondary set of performance counters + available at runtime via debugfs. Kernel features that depend on an + access to the primary set of counters may become unavailable. + + This feature is unsupported and unstable, and may break at any time. + Enabling this option will prevent power management from working + optimally and may cause instrumentation tools to return bogus results. + + If unsure, say N. + source "drivers/gpu/arm/midgard/platform/Kconfig" # source "drivers/gpu/arm/midgard/tests/Kconfig" diff --git a/drivers/gpu/arm/bifrost/Mconfig b/drivers/gpu/arm/bifrost/Mconfig index dc6dfac135ab..25d384325a77 100644 --- a/drivers/gpu/arm/bifrost/Mconfig +++ b/drivers/gpu/arm/bifrost/Mconfig @@ -1,5 +1,5 @@ # -# (C) COPYRIGHT 2012-2019 ARM Limited. All rights reserved. +# (C) COPYRIGHT 2012-2020 ARM Limited. All rights reserved. # # This program is free software and is provided to you under the terms of the # GNU General Public License version 2 as published by the Free Software @@ -39,7 +39,7 @@ config MALI_BIFROST_DVFS config MALI_BIFROST_ENABLE_TRACE bool "Enable kbase tracing" - depends on MALI_BIFROST + default y if MALI_BIFROST_DEBUG default n help Enables tracing in kbase. Trace log available through @@ -81,6 +81,17 @@ config MALI_PLATFORM_NAME When PLATFORM_CUSTOM is set, this needs to be set manually to pick up the desired platform files. +config MALI_ARBITER_SUPPORT + bool "Enable arbiter support for Mali" + depends on MALI_BIFROST + default n + help + Enable support for the arbiter interface in the driver. + This allows an external arbiter to manage driver access + to GPU hardware in a virtualized environment + + If unsure, say N. + # MALI_BIFROST_EXPERT configuration options menuconfig MALI_BIFROST_EXPERT @@ -167,10 +178,11 @@ config MALI_BIFROST_ERROR_INJECT config MALI_BIFROST_SYSTEM_TRACE bool "Enable system event tracing support" depends on MALI_BIFROST && MALI_BIFROST_EXPERT + default y if MALI_BIFROST_DEBUG default n help Choose this option to enable system trace events for each - kbase event. This is typically used for debugging but has + kbase event. This is typically used for debugging but has minimal overhead when not in use. Enable only if you know what you are doing. @@ -213,7 +225,7 @@ config MALI_DMA_BUF_MAP_ON_DEMAND config MALI_DMA_BUF_LEGACY_COMPAT bool "Enable legacy compatibility cache flush on dma-buf map" depends on MALI_BIFROST && !MALI_DMA_BUF_MAP_ON_DEMAND - default y + default n help This option enables compatibility with legacy dma-buf mapping behavior, then the dma-buf is mapped on import, by adding cache @@ -249,6 +261,14 @@ config MALI_HW_ERRATA_1485982_USE_CLOCK_ALTERNATIVE tree using the property, opp-mali-errata-1485982. Otherwise the slowest clock will be selected. +config MALI_GEM5_BUILD + bool "Enable build of Mali kernel driver for GEM5" + depends on MALI_BIFROST + default n + help + This option is to do a Mali GEM5 build. + If unsure, say N. + # Instrumentation options. # config MALI_JOB_DUMP exists in the Kernel Kconfig but is configured using CINSTR_JOB_DUMP in Mconfig. diff --git a/drivers/gpu/arm/bifrost/platform/meson/Kbuild b/drivers/gpu/arm/bifrost/arbiter/Kbuild similarity index 81% rename from drivers/gpu/arm/bifrost/platform/meson/Kbuild rename to drivers/gpu/arm/bifrost/arbiter/Kbuild index 243415b3c53f..98e47bed223a 100644 --- a/drivers/gpu/arm/bifrost/platform/meson/Kbuild +++ b/drivers/gpu/arm/bifrost/arbiter/Kbuild @@ -1,5 +1,5 @@ # -# (C) COPYRIGHT 2012-2017, 2019 ARM Limited. All rights reserved. +# (C) COPYRIGHT 2019-2020 ARM Limited. All rights reserved. # # This program is free software and is provided to you under the terms of the # GNU General Public License version 2 as published by the Free Software @@ -20,5 +20,5 @@ # mali_kbase-y += \ - $(MALI_PLATFORM_DIR)/mali_kbase_config_meson.o \ - $(MALI_PLATFORM_DIR)/mali_kbase_runtime_pm.o + arbiter/mali_kbase_arbif.o \ + arbiter/mali_kbase_arbiter_pm.o diff --git a/drivers/gpu/arm/bifrost/arbiter/mali_kbase_arbif.c b/drivers/gpu/arm/bifrost/arbiter/mali_kbase_arbif.c new file mode 100644 index 000000000000..d193cb99d881 --- /dev/null +++ b/drivers/gpu/arm/bifrost/arbiter/mali_kbase_arbif.c @@ -0,0 +1,175 @@ +// SPDX-License-Identifier: GPL-2.0 + +/* + * + * (C) COPYRIGHT 2019-2020 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +/** + * @file mali_kbase_arbif.c + * Mali arbiter interface APIs to share GPU between Virtual Machines + */ + +#include +#include "mali_kbase_arbif.h" +#include +#include +#include +#include "mali_kbase_arbiter_interface.h" + +static void on_gpu_stop(struct device *dev) +{ + struct kbase_device *kbdev = dev_get_drvdata(dev); + + KBASE_TLSTREAM_TL_EVENT_ARB_STOP_REQUESTED(kbdev, kbdev); + kbase_arbiter_pm_vm_event(kbdev, KBASE_VM_GPU_STOP_EVT); +} + +static void on_gpu_granted(struct device *dev) +{ + struct kbase_device *kbdev = dev_get_drvdata(dev); + + KBASE_TLSTREAM_TL_EVENT_ARB_GRANTED(kbdev, kbdev); + kbase_arbiter_pm_vm_event(kbdev, KBASE_VM_GPU_GRANTED_EVT); +} + +static void on_gpu_lost(struct device *dev) +{ + struct kbase_device *kbdev = dev_get_drvdata(dev); + + kbase_arbiter_pm_vm_event(kbdev, KBASE_VM_GPU_LOST_EVT); +} + +int kbase_arbif_init(struct kbase_device *kbdev) +{ +#ifdef CONFIG_OF + struct arbiter_if_arb_vm_ops ops; + struct arbiter_if_dev *arb_if; + struct device_node *arbiter_if_node; + struct platform_device *pdev; + int err; + + dev_dbg(kbdev->dev, "%s\n", __func__); + + arbiter_if_node = of_parse_phandle(kbdev->dev->of_node, + "arbiter_if", 0); + if (!arbiter_if_node) { + dev_dbg(kbdev->dev, "No arbiter_if in Device Tree\n"); + /* no arbiter interface defined in device tree */ + kbdev->arb.arb_dev = NULL; + kbdev->arb.arb_if = NULL; + return 0; + } + + pdev = of_find_device_by_node(arbiter_if_node); + if (!pdev) { + dev_err(kbdev->dev, "Failed to find arbiter_if device\n"); + return -EPROBE_DEFER; + } + + if (!pdev->dev.driver || !try_module_get(pdev->dev.driver->owner)) { + dev_err(kbdev->dev, "arbiter_if driver not available\n"); + return -EPROBE_DEFER; + } + kbdev->arb.arb_dev = &pdev->dev; + arb_if = platform_get_drvdata(pdev); + if (!arb_if) { + dev_err(kbdev->dev, "arbiter_if driver not ready\n"); + module_put(pdev->dev.driver->owner); + return -EPROBE_DEFER; + } + + kbdev->arb.arb_if = arb_if; + ops.arb_vm_gpu_stop = on_gpu_stop; + ops.arb_vm_gpu_granted = on_gpu_granted; + ops.arb_vm_gpu_lost = on_gpu_lost; + + /* register kbase arbiter_if callbacks */ + if (arb_if->vm_ops.vm_arb_register_dev) { + err = arb_if->vm_ops.vm_arb_register_dev(arb_if, + kbdev->dev, &ops); + if (err) { + dev_err(kbdev->dev, "Arbiter registration failed.\n"); + module_put(pdev->dev.driver->owner); + return err; + } + } +#else /* CONFIG_OF */ + dev_dbg(kbdev->dev, "No arbiter without Device Tree support\n"); + kbdev->arb.arb_dev = NULL; + kbdev->arb.arb_if = NULL; +#endif + return 0; +} + +void kbase_arbif_destroy(struct kbase_device *kbdev) +{ + struct arbiter_if_dev *arb_if = kbdev->arb.arb_if; + + if (arb_if && arb_if->vm_ops.vm_arb_unregister_dev) { + dev_dbg(kbdev->dev, "%s\n", __func__); + arb_if->vm_ops.vm_arb_unregister_dev(kbdev->arb.arb_if); + } + kbdev->arb.arb_if = NULL; + if (kbdev->arb.arb_dev) + module_put(kbdev->arb.arb_dev->driver->owner); + kbdev->arb.arb_dev = NULL; +} + +void kbase_arbif_gpu_request(struct kbase_device *kbdev) +{ + struct arbiter_if_dev *arb_if = kbdev->arb.arb_if; + + if (arb_if && arb_if->vm_ops.vm_arb_gpu_request) { + dev_dbg(kbdev->dev, "%s\n", __func__); + arb_if->vm_ops.vm_arb_gpu_request(arb_if); + } +} + +void kbase_arbif_gpu_stopped(struct kbase_device *kbdev, u8 gpu_required) +{ + struct arbiter_if_dev *arb_if = kbdev->arb.arb_if; + + if (arb_if && arb_if->vm_ops.vm_arb_gpu_stopped) { + dev_dbg(kbdev->dev, "%s\n", __func__); + KBASE_TLSTREAM_TL_EVENT_ARB_STOPPED(kbdev, kbdev); + arb_if->vm_ops.vm_arb_gpu_stopped(arb_if, gpu_required); + } +} + +void kbase_arbif_gpu_active(struct kbase_device *kbdev) +{ + struct arbiter_if_dev *arb_if = kbdev->arb.arb_if; + + if (arb_if && arb_if->vm_ops.vm_arb_gpu_active) { + dev_dbg(kbdev->dev, "%s\n", __func__); + arb_if->vm_ops.vm_arb_gpu_active(arb_if); + } +} + +void kbase_arbif_gpu_idle(struct kbase_device *kbdev) +{ + struct arbiter_if_dev *arb_if = kbdev->arb.arb_if; + + if (arb_if && arb_if->vm_ops.vm_arb_gpu_idle) { + dev_dbg(kbdev->dev, "vm_arb_gpu_idle\n"); + arb_if->vm_ops.vm_arb_gpu_idle(arb_if); + } +} diff --git a/drivers/gpu/arm/bifrost/arbiter/mali_kbase_arbif.h b/drivers/gpu/arm/bifrost/arbiter/mali_kbase_arbif.h new file mode 100644 index 000000000000..e7e9de76c94c --- /dev/null +++ b/drivers/gpu/arm/bifrost/arbiter/mali_kbase_arbif.h @@ -0,0 +1,133 @@ +/* + * + * (C) COPYRIGHT ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + *//* SPDX-License-Identifier: GPL-2.0 */ + +/* + * + * (C) COPYRIGHT 2019-2020 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * + */ + +/** + * @file + * Mali arbiter interface APIs to share GPU between Virtual Machines + */ + +#ifndef _MALI_KBASE_ARBIF_H_ +#define _MALI_KBASE_ARBIF_H_ + +/** + * enum kbase_arbif_evt - Internal Arbiter event. + * + * @KBASE_VM_GPU_INITIALIZED_EVT: KBase has finished initializing + * and can be stopped + * @KBASE_VM_GPU_STOP_EVT: Stop message received from Arbiter + * @KBASE_VM_GPU_GRANTED_EVT: Grant message received from Arbiter + * @KBASE_VM_GPU_LOST_EVT: Lost message received from Arbiter + * @KBASE_VM_GPU_IDLE_EVENT: KBase has transitioned into an inactive state. + * @KBASE_VM_REF_EVENT: KBase has transitioned into an active state. + * @KBASE_VM_OS_SUSPEND_EVENT: KBase is suspending + * @KBASE_VM_OS_RESUME_EVENT: Kbase is resuming + */ +enum kbase_arbif_evt { + KBASE_VM_GPU_INITIALIZED_EVT = 1, + KBASE_VM_GPU_STOP_EVT, + KBASE_VM_GPU_GRANTED_EVT, + KBASE_VM_GPU_LOST_EVT, + KBASE_VM_GPU_IDLE_EVENT, + KBASE_VM_REF_EVENT, + KBASE_VM_OS_SUSPEND_EVENT, + KBASE_VM_OS_RESUME_EVENT, +}; + +/** + * kbase_arbif_init() - Initialize the arbiter interface functionality. + * @kbdev: The kbase device structure for the device (must be a valid pointer) + * + * Initialize the arbiter interface and also determines + * if Arbiter functionality is required. + * + * Return: 0 if the Arbiter interface was successfully initialized or the + * Arbiter was not required. + */ +int kbase_arbif_init(struct kbase_device *kbdev); + +/** + * kbase_arbif_destroy() - Cleanups the arbiter interface functionality. + * @kbdev: The kbase device structure for the device (must be a valid pointer) + * + * Cleans up the arbiter interface functionality and resets the reference count + * of the arbif module used + */ +void kbase_arbif_destroy(struct kbase_device *kbdev); + +/** + * kbase_arbif_gpu_request() - Send GPU request message to the arbiter + * @kbdev: The kbase device structure for the device (must be a valid pointer) + * + * Sends a message to Arbiter to request GPU access. + */ +void kbase_arbif_gpu_request(struct kbase_device *kbdev); + +/** + * kbase_arbif_gpu_stopped() - Send GPU stopped message to the arbiter + * @kbdev: The kbase device structure for the device (must be a valid pointer) + * @gpu_required: true if GPU access is still required + * (Arbiter will automatically send another grant message) + * + * Sends a message to Arbiter to notify that the GPU has stopped. + * @note Once this call has been made, KBase must not attempt to access the GPU + * until the #KBASE_VM_GPU_GRANTED_EVT event has been received. + */ +void kbase_arbif_gpu_stopped(struct kbase_device *kbdev, u8 gpu_required); + +/** + * kbase_arbif_gpu_active() - Send a GPU active message to the arbiter + * @kbdev: The kbase device structure for the device (must be a valid pointer) + * + * Sends a message to Arbiter to report that KBase has gone active. + */ +void kbase_arbif_gpu_active(struct kbase_device *kbdev); + +/** + * kbase_arbif_gpu_idle() - Send a GPU idle message to the arbiter + * @kbdev: The kbase device structure for the device (must be a valid pointer) + * + * Sends a message to Arbiter to report that KBase has gone idle. + */ +void kbase_arbif_gpu_idle(struct kbase_device *kbdev); + +#endif /* _MALI_KBASE_ARBIF_H_ */ diff --git a/drivers/gpu/arm/bifrost/arbiter/mali_kbase_arbiter_defs.h b/drivers/gpu/arm/bifrost/arbiter/mali_kbase_arbiter_defs.h new file mode 100644 index 000000000000..1f53cbf1a286 --- /dev/null +++ b/drivers/gpu/arm/bifrost/arbiter/mali_kbase_arbiter_defs.h @@ -0,0 +1,95 @@ +/* + * + * (C) COPYRIGHT ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + *//* SPDX-License-Identifier: GPL-2.0 */ + +/* + * + * (C) COPYRIGHT 2019-2020 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * + */ + +/** + * @file + * Mali structures define to support arbitration feature + */ + +#ifndef _MALI_KBASE_ARBITER_DEFS_H_ +#define _MALI_KBASE_ARBITER_DEFS_H_ + +#include "mali_kbase_arbiter_pm.h" + +/** + * struct kbase_arbiter_vm_state - Struct representing the state and containing the + * data of pm work + * @kbdev: Pointer to kbase device structure (must be a valid pointer) + * @vm_state_lock: The lock protecting the VM state when arbiter is used. + * This lock must also be held whenever the VM state is being + * transitioned + * @vm_state_wait: Wait queue set when GPU is granted + * @vm_state: Current state of VM + * @vm_arb_wq: Work queue for resuming or stopping work on the GPU for use + * with the Arbiter + * @vm_suspend_work: Work item for vm_arb_wq to stop current work on GPU + * @vm_resume_work: Work item for vm_arb_wq to resume current work on GPU + * @vm_arb_starting: Work queue resume in progress + * @vm_arb_stopping: Work queue suspend in progress + * @vm_arb_users_waiting: Count of users waiting for GPU + */ +struct kbase_arbiter_vm_state { + struct kbase_device *kbdev; + struct mutex vm_state_lock; + wait_queue_head_t vm_state_wait; + enum kbase_vm_state vm_state; + struct workqueue_struct *vm_arb_wq; + struct work_struct vm_suspend_work; + struct work_struct vm_resume_work; + bool vm_arb_starting; + bool vm_arb_stopping; + int vm_arb_users_waiting; +}; + +/** + * struct kbase_arbiter_device - Representing an instance of arbiter device, + * allocated from the probe method of Mali driver + * @arb_if: Pointer to the arbiter interface device + * @arb_dev: Pointer to the arbiter device + */ +struct kbase_arbiter_device { + struct arbiter_if_dev *arb_if; + struct device *arb_dev; +}; + +#endif /* _MALI_KBASE_ARBITER_DEFS_H_ */ diff --git a/drivers/gpu/arm/bifrost/arbiter/mali_kbase_arbiter_interface.h b/drivers/gpu/arm/bifrost/arbiter/mali_kbase_arbiter_interface.h new file mode 100644 index 000000000000..5d5d8a7d2cff --- /dev/null +++ b/drivers/gpu/arm/bifrost/arbiter/mali_kbase_arbiter_interface.h @@ -0,0 +1,181 @@ +/* + * + * (C) COPYRIGHT ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + *//* SPDX-License-Identifier: GPL-2.0 */ + +/* + * + * (C) COPYRIGHT 2019-2020 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * + */ + +/** + * @file + * Defines the Mali arbiter interface + */ + +#ifndef _MALI_KBASE_ARBITER_INTERFACE_H_ +#define _MALI_KBASE_ARBITER_INTERFACE_H_ + +/** + * @brief Mali arbiter interface version + * + * This specifies the current version of the configuration interface. Whenever + * the arbiter interface changes, so that integration effort is required, the + * version number will be increased. Each configuration must make an effort + * to check that it implements the correct version. + * + * Version history: + * 1 - Added the Mali arbiter configuration interface. + * 2 - Strip out reference code from header + * 3 - Removed DVFS utilization interface (DVFS moved to arbiter side) + */ +#define MALI_KBASE_ARBITER_INTERFACE_VERSION 3 + +struct arbiter_if_dev; + +/** + * struct arbiter_if_arb_vm_ops - Interface to communicate messages to VM + * + * This struct contains callbacks used to deliver messages + * from the arbiter to the corresponding VM. + * + * Note that calls into these callbacks may have synchronous calls back into + * the arbiter arbiter_if_vm_arb_ops callbacks below. + * For example vm_arb_gpu_stopped() may be called as a side effect of + * arb_vm_gpu_stop() being called here. + */ +struct arbiter_if_arb_vm_ops { + /** + * arb_vm_gpu_stop() - Ask VM to stop using GPU + * @dev: The arbif kernel module device. + * + * Informs KBase to stop using the GPU as soon as possible. + * @Note: Once the driver is no longer using the GPU, a call to + * vm_arb_gpu_stopped is expected by the arbiter. + */ + void (*arb_vm_gpu_stop)(struct device *dev); + + /** + * arb_vm_gpu_granted() - GPU has been granted to VM + * @dev: The arbif kernel module device. + * + * Informs KBase that the GPU can now be used by the VM. + */ + void (*arb_vm_gpu_granted)(struct device *dev); + + /** + * arb_vm_gpu_lost() - VM has lost the GPU + * @dev: The arbif kernel module device. + * + * This is called if KBase takes too long to respond to the arbiter + * stop request. + * Once this is called, KBase will assume that access to the GPU + * has been lost and will fail all running jobs and reset its + * internal state. + * If successful, will respond with a vm_arb_gpu_stopped message. + */ + void (*arb_vm_gpu_lost)(struct device *dev); +}; + +/** + * struct arbiter_if_vm_arb_ops - Interface to communicate messages to arbiter + * + * This struct contains callbacks used to request operations + * from the VM to the arbiter + * + * Note that we must not make any synchronous calls back in to the VM + * (via arbiter_if_arb_vm_ops above) in the context of these callbacks. + */ +struct arbiter_if_vm_arb_ops { + /** + * vm_arb_register_dev() - Register VM device driver callbacks. + * @arbif_dev: The arbiter interface we are registering device callbacks + * @dev: The device structure to supply in the callbacks. + * @ops: The callbacks that the device driver supports + * (none are optional). + */ + int (*vm_arb_register_dev)(struct arbiter_if_dev *arbif_dev, + struct device *dev, struct arbiter_if_arb_vm_ops *ops); + + /** + * vm_arb_unregister_dev() - Unregister VM device driver callbacks. + * @arbif_dev: The arbiter interface we are unregistering from. + */ + void (*vm_arb_unregister_dev)(struct arbiter_if_dev *arbif_dev); + + /** + * vm_arb_gpu_request() - Ask the arbiter interface for GPU access. + * @arbif_dev: The arbiter interface we want to issue the request. + */ + void (*vm_arb_gpu_request)(struct arbiter_if_dev *arbif_dev); + + /** + * vm_arb_gpu_active() - Inform arbiter that the driver has gone active + * @arbif_dev: The arbiter interface device. + */ + void (*vm_arb_gpu_active)(struct arbiter_if_dev *arbif_dev); + + /** + * vm_arb_gpu_idle() - Inform the arbiter that the driver has gone idle + * @arbif_dev: The arbiter interface device. + */ + void (*vm_arb_gpu_idle)(struct arbiter_if_dev *arbif_dev); + + /** + * vm_arb_gpu_stopped() - Inform the arbiter that the driver has stopped + * using the GPU + * @arbif_dev: The arbiter interface device. + * @gpu_required: The GPU is still needed to do more work. + */ + void (*vm_arb_gpu_stopped)(struct arbiter_if_dev *arbif_dev, + u8 gpu_required); +}; + +/** + * struct arbiter_if_dev - Arbiter Interface + * @vm_ops: Callback functions for connecting KBase with + * arbiter interface device. + * @priv_data: Internal arbif data not used by KBASE. + * + * Arbiter Interface Kernel Module State used for linking KBase + * with an arbiter interface platform device + */ +struct arbiter_if_dev { + struct arbiter_if_vm_arb_ops vm_ops; + void *priv_data; +}; + +#endif /* _MALI_KBASE_ARBITER_INTERFACE_H_ */ diff --git a/drivers/gpu/arm/bifrost/arbiter/mali_kbase_arbiter_pm.c b/drivers/gpu/arm/bifrost/arbiter/mali_kbase_arbiter_pm.c new file mode 100644 index 000000000000..6c35e165009b --- /dev/null +++ b/drivers/gpu/arm/bifrost/arbiter/mali_kbase_arbiter_pm.c @@ -0,0 +1,645 @@ +// SPDX-License-Identifier: GPL-2.0 + +/* + * + * (C) COPYRIGHT 2019-2020 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +/** + * @file mali_kbase_arbiter_pm.c + * Mali arbiter power manager state machine and APIs + */ + +#include +#include +#include +#include +#include +#include +#include + +static void kbase_arbiter_pm_vm_wait_gpu_assignment(struct kbase_device *kbdev); +static inline bool kbase_arbiter_pm_vm_gpu_assigned_lockheld( + struct kbase_device *kbdev); + +static inline const char *kbase_arbiter_pm_vm_state_str( + enum kbase_vm_state state) +{ + switch (state) { + case KBASE_VM_STATE_INITIALIZING: + return "KBASE_VM_STATE_INITIALIZING"; + case KBASE_VM_STATE_INITIALIZING_WITH_GPU: + return "KBASE_VM_STATE_INITIALIZING_WITH_GPU"; + case KBASE_VM_STATE_SUSPENDED: + return "KBASE_VM_STATE_SUSPENDED"; + case KBASE_VM_STATE_STOPPED: + return "KBASE_VM_STATE_STOPPED"; + case KBASE_VM_STATE_STOPPED_GPU_REQUESTED: + return "KBASE_VM_STATE_STOPPED_GPU_REQUESTED"; + case KBASE_VM_STATE_STARTING: + return "KBASE_VM_STATE_STARTING"; + case KBASE_VM_STATE_IDLE: + return "KBASE_VM_STATE_IDLE"; + case KBASE_VM_STATE_ACTIVE: + return "KBASE_VM_STATE_ACTIVE"; + case KBASE_VM_STATE_STOPPING_IDLE: + return "KBASE_VM_STATE_STOPPING_IDLE"; + case KBASE_VM_STATE_STOPPING_ACTIVE: + return "KBASE_VM_STATE_STOPPING_ACTIVE"; + case KBASE_VM_STATE_SUSPEND_PENDING: + return "KBASE_VM_STATE_SUSPEND_PENDING"; + case KBASE_VM_STATE_SUSPEND_WAIT_FOR_GRANT: + return "KBASE_VM_STATE_SUSPEND_WAIT_FOR_GRANT"; + default: + KBASE_DEBUG_ASSERT(false); + return "[UnknownState]"; + } +} + +static inline const char *kbase_arbiter_pm_vm_event_str( + enum kbase_arbif_evt evt) +{ + switch (evt) { + case KBASE_VM_GPU_INITIALIZED_EVT: + return "KBASE_VM_GPU_INITIALIZED_EVT"; + case KBASE_VM_GPU_STOP_EVT: + return "KBASE_VM_GPU_STOP_EVT"; + case KBASE_VM_GPU_GRANTED_EVT: + return "KBASE_VM_GPU_GRANTED_EVT"; + case KBASE_VM_GPU_LOST_EVT: + return "KBASE_VM_GPU_LOST_EVT"; + case KBASE_VM_OS_SUSPEND_EVENT: + return "KBASE_VM_OS_SUSPEND_EVENT"; + case KBASE_VM_OS_RESUME_EVENT: + return "KBASE_VM_OS_RESUME_EVENT"; + case KBASE_VM_GPU_IDLE_EVENT: + return "KBASE_VM_GPU_IDLE_EVENT"; + case KBASE_VM_REF_EVENT: + return "KBASE_VM_REF_EVENT"; + default: + KBASE_DEBUG_ASSERT(false); + return "[UnknownEvent]"; + } +} + +static void kbase_arbiter_pm_vm_set_state(struct kbase_device *kbdev, + enum kbase_vm_state new_state) +{ + struct kbase_arbiter_vm_state *arb_vm_state = kbdev->pm.arb_vm_state; + + dev_dbg(kbdev->dev, "VM set_state %s -> %s", + kbase_arbiter_pm_vm_state_str(arb_vm_state->vm_state), + kbase_arbiter_pm_vm_state_str(new_state)); + lockdep_assert_held(&arb_vm_state->vm_state_lock); + arb_vm_state->vm_state = new_state; + wake_up(&arb_vm_state->vm_state_wait); +} + +static void kbase_arbiter_pm_suspend_wq(struct work_struct *data) +{ + struct kbase_arbiter_vm_state *arb_vm_state = container_of(data, + struct kbase_arbiter_vm_state, + vm_suspend_work); + struct kbase_device *kbdev = arb_vm_state->kbdev; + + mutex_lock(&arb_vm_state->vm_state_lock); + dev_dbg(kbdev->dev, ">%s\n", __func__); + if (arb_vm_state->vm_state == KBASE_VM_STATE_STOPPING_IDLE || + arb_vm_state->vm_state == + KBASE_VM_STATE_STOPPING_ACTIVE || + arb_vm_state->vm_state == + KBASE_VM_STATE_SUSPEND_PENDING) { + mutex_unlock(&arb_vm_state->vm_state_lock); + dev_dbg(kbdev->dev, ">kbase_pm_driver_suspend\n"); + kbase_pm_driver_suspend(kbdev); + dev_dbg(kbdev->dev, "vm_state_lock); + } + mutex_unlock(&arb_vm_state->vm_state_lock); + dev_dbg(kbdev->dev, "<%s\n", __func__); +} + +static void kbase_arbiter_pm_resume_wq(struct work_struct *data) +{ + struct kbase_arbiter_vm_state *arb_vm_state = container_of(data, + struct kbase_arbiter_vm_state, + vm_resume_work); + struct kbase_device *kbdev = arb_vm_state->kbdev; + + mutex_lock(&arb_vm_state->vm_state_lock); + dev_dbg(kbdev->dev, ">%s\n", __func__); + arb_vm_state->vm_arb_starting = true; + if (arb_vm_state->vm_state == KBASE_VM_STATE_STARTING) { + mutex_unlock(&arb_vm_state->vm_state_lock); + dev_dbg(kbdev->dev, ">kbase_pm_driver_resume\n"); + kbase_pm_driver_resume(kbdev, true); + dev_dbg(kbdev->dev, "vm_state_lock); + } else if (arb_vm_state->vm_state == KBASE_VM_STATE_STOPPING_ACTIVE) { + kbase_arbiter_pm_vm_stopped(kbdev); + } + arb_vm_state->vm_arb_starting = false; + mutex_unlock(&arb_vm_state->vm_state_lock); + dev_dbg(kbdev->dev, "<%s\n", __func__); +} + +int kbase_arbiter_pm_early_init(struct kbase_device *kbdev) +{ + int err; + struct kbase_arbiter_vm_state *arb_vm_state = NULL; + + arb_vm_state = kmalloc(sizeof(struct kbase_arbiter_vm_state), + GFP_KERNEL); + if (arb_vm_state == NULL) + return -ENOMEM; + + arb_vm_state->kbdev = kbdev; + arb_vm_state->vm_state = KBASE_VM_STATE_INITIALIZING; + + mutex_init(&arb_vm_state->vm_state_lock); + init_waitqueue_head(&arb_vm_state->vm_state_wait); + arb_vm_state->vm_arb_wq = alloc_ordered_workqueue("kbase_vm_arb_wq", + WQ_HIGHPRI); + if (!arb_vm_state->vm_arb_wq) { + dev_err(kbdev->dev, "Failed to allocate vm_arb workqueue\n"); + return -ENOMEM; + } + INIT_WORK(&arb_vm_state->vm_suspend_work, kbase_arbiter_pm_suspend_wq); + INIT_WORK(&arb_vm_state->vm_resume_work, kbase_arbiter_pm_resume_wq); + arb_vm_state->vm_arb_starting = false; + arb_vm_state->vm_arb_users_waiting = 0; + kbdev->pm.arb_vm_state = arb_vm_state; + + err = kbase_arbif_init(kbdev); + if (err) { + goto arbif_init_fail; + } + if (kbdev->arb.arb_if) { + kbase_arbif_gpu_request(kbdev); + dev_dbg(kbdev->dev, "Waiting for initial GPU assignment...\n"); + wait_event(arb_vm_state->vm_state_wait, + arb_vm_state->vm_state == + KBASE_VM_STATE_INITIALIZING_WITH_GPU); + dev_dbg(kbdev->dev, + "Waiting for initial GPU assignment - done\n"); + } + return 0; + +arbif_init_fail: + destroy_workqueue(arb_vm_state->vm_arb_wq); + kfree(arb_vm_state); + kbdev->pm.arb_vm_state = NULL; + return err; +} + +void kbase_arbiter_pm_early_term(struct kbase_device *kbdev) +{ + struct kbase_arbiter_vm_state *arb_vm_state = kbdev->pm.arb_vm_state; + + mutex_lock(&arb_vm_state->vm_state_lock); + if (arb_vm_state->vm_state > KBASE_VM_STATE_STOPPED_GPU_REQUESTED) + kbase_arbif_gpu_stopped(kbdev, false); + + mutex_unlock(&arb_vm_state->vm_state_lock); + kbase_arbif_destroy(kbdev); + destroy_workqueue(arb_vm_state->vm_arb_wq); + arb_vm_state->vm_arb_wq = NULL; + kfree(kbdev->pm.arb_vm_state); + kbdev->pm.arb_vm_state = NULL; +} + +void kbase_arbiter_pm_release_interrupts(struct kbase_device *kbdev) +{ + struct kbase_arbiter_vm_state *arb_vm_state = kbdev->pm.arb_vm_state; + + mutex_lock(&arb_vm_state->vm_state_lock); + if (!kbdev->arb.arb_if || + arb_vm_state->vm_state > + KBASE_VM_STATE_STOPPED_GPU_REQUESTED) + kbase_release_interrupts(kbdev); + + mutex_unlock(&arb_vm_state->vm_state_lock); +} + +void kbase_arbiter_pm_vm_stopped(struct kbase_device *kbdev) +{ + bool request_gpu = false; + struct kbase_arbiter_vm_state *arb_vm_state = kbdev->pm.arb_vm_state; + + lockdep_assert_held(&arb_vm_state->vm_state_lock); + + if (arb_vm_state->vm_arb_users_waiting > 0 && + arb_vm_state->vm_state == KBASE_VM_STATE_STOPPING_IDLE) + kbase_arbiter_pm_vm_set_state(kbdev, + KBASE_VM_STATE_STOPPING_ACTIVE); + + dev_dbg(kbdev->dev, "%s %s\n", __func__, + kbase_arbiter_pm_vm_state_str(arb_vm_state->vm_state)); + kbase_release_interrupts(kbdev); + switch (arb_vm_state->vm_state) { + case KBASE_VM_STATE_STOPPING_ACTIVE: + request_gpu = true; + kbase_arbiter_pm_vm_set_state(kbdev, + KBASE_VM_STATE_STOPPED_GPU_REQUESTED); + break; + case KBASE_VM_STATE_STOPPING_IDLE: + kbase_arbiter_pm_vm_set_state(kbdev, KBASE_VM_STATE_STOPPED); + break; + case KBASE_VM_STATE_SUSPEND_PENDING: + kbase_arbiter_pm_vm_set_state(kbdev, KBASE_VM_STATE_SUSPENDED); + break; + default: + dev_warn(kbdev->dev, "unexpected pm_stop VM state %u", + arb_vm_state->vm_state); + break; + } + + kbase_arbif_gpu_stopped(kbdev, request_gpu); +} + +static void kbase_arbiter_pm_vm_gpu_start(struct kbase_device *kbdev) +{ + struct kbase_arbiter_vm_state *arb_vm_state = kbdev->pm.arb_vm_state; + + lockdep_assert_held(&arb_vm_state->vm_state_lock); + switch (arb_vm_state->vm_state) { + case KBASE_VM_STATE_INITIALIZING: + kbase_arbiter_pm_vm_set_state(kbdev, + KBASE_VM_STATE_INITIALIZING_WITH_GPU); + break; + case KBASE_VM_STATE_STOPPED_GPU_REQUESTED: + kbase_arbiter_pm_vm_set_state(kbdev, KBASE_VM_STATE_STARTING); + kbase_install_interrupts(kbdev); + queue_work(arb_vm_state->vm_arb_wq, + &arb_vm_state->vm_resume_work); + break; + case KBASE_VM_STATE_SUSPEND_WAIT_FOR_GRANT: + kbase_arbif_gpu_stopped(kbdev, false); + kbase_arbiter_pm_vm_set_state(kbdev, KBASE_VM_STATE_SUSPENDED); + break; + default: + dev_warn(kbdev->dev, + "GPU_GRANTED when not expected - state %s\n", + kbase_arbiter_pm_vm_state_str(arb_vm_state->vm_state)); + break; + } +} + +static void kbase_arbiter_pm_vm_gpu_stop(struct kbase_device *kbdev) +{ + struct kbase_arbiter_vm_state *arb_vm_state = kbdev->pm.arb_vm_state; + + lockdep_assert_held(&arb_vm_state->vm_state_lock); + if (arb_vm_state->vm_state == KBASE_VM_STATE_INITIALIZING_WITH_GPU) { + mutex_unlock(&arb_vm_state->vm_state_lock); + kbase_arbiter_pm_vm_wait_gpu_assignment(kbdev); + mutex_lock(&arb_vm_state->vm_state_lock); + } + + switch (arb_vm_state->vm_state) { + case KBASE_VM_STATE_IDLE: + kbase_arbiter_pm_vm_set_state(kbdev, + KBASE_VM_STATE_STOPPING_IDLE); + queue_work(arb_vm_state->vm_arb_wq, + &arb_vm_state->vm_suspend_work); + break; + case KBASE_VM_STATE_ACTIVE: + kbase_arbiter_pm_vm_set_state(kbdev, + KBASE_VM_STATE_STOPPING_ACTIVE); + queue_work(arb_vm_state->vm_arb_wq, + &arb_vm_state->vm_suspend_work); + break; + case KBASE_VM_STATE_STARTING: + dev_dbg(kbdev->dev, "Got GPU_STOP event while STARTING."); + kbase_arbiter_pm_vm_set_state(kbdev, + KBASE_VM_STATE_STOPPING_ACTIVE); + if (arb_vm_state->vm_arb_starting) + queue_work(arb_vm_state->vm_arb_wq, + &arb_vm_state->vm_suspend_work); + break; + case KBASE_VM_STATE_SUSPEND_PENDING: + /* Suspend finishes with a stop so nothing else to do */ + break; + default: + dev_warn(kbdev->dev, "GPU_STOP when not expected - state %s\n", + kbase_arbiter_pm_vm_state_str(arb_vm_state->vm_state)); + break; + } +} + +static void kbase_gpu_lost(struct kbase_device *kbdev) +{ + struct kbase_arbiter_vm_state *arb_vm_state = kbdev->pm.arb_vm_state; + + lockdep_assert_held(&arb_vm_state->vm_state_lock); + + switch (arb_vm_state->vm_state) { + case KBASE_VM_STATE_STARTING: + case KBASE_VM_STATE_ACTIVE: + case KBASE_VM_STATE_IDLE: + dev_warn(kbdev->dev, "GPU lost in state %s", + kbase_arbiter_pm_vm_state_str(arb_vm_state->vm_state)); + kbase_arbiter_pm_vm_gpu_stop(kbdev); + mutex_unlock(&arb_vm_state->vm_state_lock); + kbase_pm_handle_gpu_lost(kbdev); + mutex_lock(&arb_vm_state->vm_state_lock); + break; + case KBASE_VM_STATE_STOPPING_IDLE: + case KBASE_VM_STATE_STOPPING_ACTIVE: + case KBASE_VM_STATE_SUSPEND_PENDING: + dev_info(kbdev->dev, "GPU lost while stopping"); + mutex_unlock(&arb_vm_state->vm_state_lock); + kbase_pm_handle_gpu_lost(kbdev); + mutex_lock(&arb_vm_state->vm_state_lock); + break; + case KBASE_VM_STATE_SUSPENDED: + case KBASE_VM_STATE_STOPPED: + case KBASE_VM_STATE_STOPPED_GPU_REQUESTED: + dev_info(kbdev->dev, "GPU lost while already stopped"); + break; + case KBASE_VM_STATE_SUSPEND_WAIT_FOR_GRANT: + dev_info(kbdev->dev, "GPU lost while waiting to suspend"); + kbase_arbiter_pm_vm_set_state(kbdev, KBASE_VM_STATE_SUSPENDED); + break; + default: + break; + } + +} + +static inline bool kbase_arbiter_pm_vm_os_suspend_ready_state( + struct kbase_device *kbdev) +{ + switch (kbdev->pm.arb_vm_state->vm_state) { + case KBASE_VM_STATE_SUSPENDED: + case KBASE_VM_STATE_STOPPED: + case KBASE_VM_STATE_IDLE: + case KBASE_VM_STATE_ACTIVE: + return true; + default: + return false; + } +} + +static void kbase_arbiter_pm_vm_os_prepare_suspend(struct kbase_device *kbdev) +{ + struct kbase_arbiter_vm_state *arb_vm_state = kbdev->pm.arb_vm_state; + enum kbase_vm_state prev_state; + + lockdep_assert_held(&arb_vm_state->vm_state_lock); + if (kbdev->arb.arb_if) { + if (kbdev->pm.arb_vm_state->vm_state == + KBASE_VM_STATE_SUSPENDED) + return; + } + /* Block suspend OS function until we are in a stable state + * with vm_state_lock + */ + while (!kbase_arbiter_pm_vm_os_suspend_ready_state(kbdev)) { + prev_state = arb_vm_state->vm_state; + switch (arb_vm_state->vm_state) { + case KBASE_VM_STATE_STOPPING_ACTIVE: + case KBASE_VM_STATE_STOPPING_IDLE: + kbase_arbiter_pm_vm_set_state(kbdev, + KBASE_VM_STATE_SUSPEND_PENDING); + break; + case KBASE_VM_STATE_STOPPED_GPU_REQUESTED: + kbase_arbiter_pm_vm_set_state(kbdev, + KBASE_VM_STATE_SUSPEND_WAIT_FOR_GRANT); + break; + case KBASE_VM_STATE_STARTING: + if (!arb_vm_state->vm_arb_starting) { + kbase_arbiter_pm_vm_set_state(kbdev, + KBASE_VM_STATE_SUSPEND_PENDING); + kbase_arbiter_pm_vm_stopped(kbdev); + } + break; + default: + break; + } + mutex_unlock(&arb_vm_state->vm_state_lock); + wait_event(arb_vm_state->vm_state_wait, + arb_vm_state->vm_state != prev_state); + mutex_lock(&arb_vm_state->vm_state_lock); + } + + switch (arb_vm_state->vm_state) { + case KBASE_VM_STATE_STOPPED: + kbase_arbiter_pm_vm_set_state(kbdev, + KBASE_VM_STATE_SUSPENDED); + break; + case KBASE_VM_STATE_IDLE: + case KBASE_VM_STATE_ACTIVE: + kbase_arbiter_pm_vm_set_state(kbdev, + KBASE_VM_STATE_SUSPEND_PENDING); + mutex_unlock(&arb_vm_state->vm_state_lock); + /* Ensure resume has completed fully before starting suspend */ + flush_work(&arb_vm_state->vm_resume_work); + kbase_pm_driver_suspend(kbdev); + mutex_lock(&arb_vm_state->vm_state_lock); + break; + case KBASE_VM_STATE_SUSPENDED: + break; + default: + KBASE_DEBUG_ASSERT_MSG(false, "Unexpected state to suspend"); + break; + } +} + +static void kbase_arbiter_pm_vm_os_resume(struct kbase_device *kbdev) +{ + struct kbase_arbiter_vm_state *arb_vm_state = kbdev->pm.arb_vm_state; + + lockdep_assert_held(&arb_vm_state->vm_state_lock); + KBASE_DEBUG_ASSERT_MSG(arb_vm_state->vm_state == + KBASE_VM_STATE_SUSPENDED, + "Unexpected state to resume"); + + kbase_arbiter_pm_vm_set_state(kbdev, + KBASE_VM_STATE_STOPPED_GPU_REQUESTED); + kbase_arbif_gpu_request(kbdev); + + /* Release lock and block resume OS function until we have + * asynchronously received the GRANT message from the Arbiter and + * fully resumed + */ + mutex_unlock(&arb_vm_state->vm_state_lock); + kbase_arbiter_pm_vm_wait_gpu_assignment(kbdev); + flush_work(&arb_vm_state->vm_resume_work); + mutex_lock(&arb_vm_state->vm_state_lock); +} + +void kbase_arbiter_pm_vm_event(struct kbase_device *kbdev, + enum kbase_arbif_evt evt) +{ + struct kbase_arbiter_vm_state *arb_vm_state = kbdev->pm.arb_vm_state; + + if (!kbdev->arb.arb_if) + return; + + mutex_lock(&arb_vm_state->vm_state_lock); + dev_dbg(kbdev->dev, "%s %s\n", __func__, + kbase_arbiter_pm_vm_event_str(evt)); + + switch (evt) { + case KBASE_VM_GPU_GRANTED_EVT: + kbase_arbiter_pm_vm_gpu_start(kbdev); + break; + case KBASE_VM_GPU_STOP_EVT: + kbase_arbiter_pm_vm_gpu_stop(kbdev); + break; + case KBASE_VM_GPU_LOST_EVT: + dev_info(kbdev->dev, "KBASE_ARBIF_GPU_LOST_EVT!"); + kbase_gpu_lost(kbdev); + break; + case KBASE_VM_OS_SUSPEND_EVENT: + kbase_arbiter_pm_vm_os_prepare_suspend(kbdev); + break; + case KBASE_VM_OS_RESUME_EVENT: + kbase_arbiter_pm_vm_os_resume(kbdev); + break; + case KBASE_VM_GPU_IDLE_EVENT: + switch (arb_vm_state->vm_state) { + case KBASE_VM_STATE_ACTIVE: + kbase_arbiter_pm_vm_set_state(kbdev, + KBASE_VM_STATE_IDLE); + kbase_arbif_gpu_idle(kbdev); + break; + default: + break; + } + break; + + case KBASE_VM_REF_EVENT: + switch (arb_vm_state->vm_state) { + case KBASE_VM_STATE_STARTING: + KBASE_TLSTREAM_TL_EVENT_ARB_STARTED(kbdev, kbdev); + /* FALL THROUGH */ + case KBASE_VM_STATE_IDLE: + kbase_arbiter_pm_vm_set_state(kbdev, + KBASE_VM_STATE_ACTIVE); + kbase_arbif_gpu_active(kbdev); + break; + case KBASE_VM_STATE_STOPPING_IDLE: + kbase_arbiter_pm_vm_set_state(kbdev, + KBASE_VM_STATE_STOPPING_ACTIVE); + break; + default: + break; + } + break; + + case KBASE_VM_GPU_INITIALIZED_EVT: + lockdep_assert_held(&kbdev->pm.lock); + if (kbdev->pm.active_count > 0) { + kbase_arbiter_pm_vm_set_state(kbdev, + KBASE_VM_STATE_ACTIVE); + kbase_arbif_gpu_active(kbdev); + } else { + kbase_arbiter_pm_vm_set_state(kbdev, + KBASE_VM_STATE_IDLE); + kbase_arbif_gpu_idle(kbdev); + } + break; + + default: + dev_alert(kbdev->dev, "Got Unknown Event!"); + break; + } + mutex_unlock(&arb_vm_state->vm_state_lock); +} + +static void kbase_arbiter_pm_vm_wait_gpu_assignment(struct kbase_device *kbdev) +{ + struct kbase_arbiter_vm_state *arb_vm_state = kbdev->pm.arb_vm_state; + + dev_dbg(kbdev->dev, "Waiting for GPU assignment...\n"); + wait_event(arb_vm_state->vm_state_wait, + arb_vm_state->vm_state == KBASE_VM_STATE_IDLE || + arb_vm_state->vm_state == KBASE_VM_STATE_ACTIVE); + dev_dbg(kbdev->dev, "Waiting for GPU assignment - done\n"); +} + +static inline bool kbase_arbiter_pm_vm_gpu_assigned_lockheld( + struct kbase_device *kbdev) +{ + struct kbase_arbiter_vm_state *arb_vm_state = kbdev->pm.arb_vm_state; + + lockdep_assert_held(&arb_vm_state->vm_state_lock); + return (arb_vm_state->vm_state == KBASE_VM_STATE_IDLE || + arb_vm_state->vm_state == KBASE_VM_STATE_ACTIVE); +} + +int kbase_arbiter_pm_ctx_active_handle_suspend(struct kbase_device *kbdev, + enum kbase_pm_suspend_handler suspend_handler) +{ + struct kbasep_js_device_data *js_devdata = &kbdev->js_data; + struct kbase_arbiter_vm_state *arb_vm_state = kbdev->pm.arb_vm_state; + + if (kbdev->arb.arb_if) { + mutex_lock(&arb_vm_state->vm_state_lock); + while (!kbase_arbiter_pm_vm_gpu_assigned_lockheld(kbdev)) { + /* Update VM state since we have GPU work to do */ + if (arb_vm_state->vm_state == + KBASE_VM_STATE_STOPPING_IDLE) + kbase_arbiter_pm_vm_set_state(kbdev, + KBASE_VM_STATE_STOPPING_ACTIVE); + else if (arb_vm_state->vm_state == + KBASE_VM_STATE_STOPPED) { + kbase_arbiter_pm_vm_set_state(kbdev, + KBASE_VM_STATE_STOPPED_GPU_REQUESTED); + kbase_arbif_gpu_request(kbdev); + } else if (arb_vm_state->vm_state == + KBASE_VM_STATE_INITIALIZING_WITH_GPU) + break; + + if (suspend_handler != + KBASE_PM_SUSPEND_HANDLER_NOT_POSSIBLE) { + if (suspend_handler == + KBASE_PM_SUSPEND_HANDLER_VM_GPU_GRANTED + || + kbdev->pm.active_count > 0) + break; + + mutex_unlock(&arb_vm_state->vm_state_lock); + mutex_unlock(&kbdev->pm.lock); + mutex_unlock(&js_devdata->runpool_mutex); + return 1; + } + + if (arb_vm_state->vm_state == + KBASE_VM_STATE_INITIALIZING_WITH_GPU) + break; + + /* Need to synchronously wait for GPU assignment */ + arb_vm_state->vm_arb_users_waiting++; + mutex_unlock(&arb_vm_state->vm_state_lock); + mutex_unlock(&kbdev->pm.lock); + mutex_unlock(&js_devdata->runpool_mutex); + kbase_arbiter_pm_vm_wait_gpu_assignment(kbdev); + mutex_lock(&js_devdata->runpool_mutex); + mutex_lock(&kbdev->pm.lock); + mutex_lock(&arb_vm_state->vm_state_lock); + arb_vm_state->vm_arb_users_waiting--; + } + mutex_unlock(&arb_vm_state->vm_state_lock); + } + return 0; +} diff --git a/drivers/gpu/arm/bifrost/arbiter/mali_kbase_arbiter_pm.h b/drivers/gpu/arm/bifrost/arbiter/mali_kbase_arbiter_pm.h new file mode 100644 index 000000000000..3c49eb1948c5 --- /dev/null +++ b/drivers/gpu/arm/bifrost/arbiter/mali_kbase_arbiter_pm.h @@ -0,0 +1,159 @@ +/* + * + * (C) COPYRIGHT ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + *//* SPDX-License-Identifier: GPL-2.0 */ + +/* + * + * (C) COPYRIGHT 2019-2020 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +/** + * @file + * Mali arbiter power manager state machine and APIs + */ + +#ifndef _MALI_KBASE_ARBITER_PM_H_ +#define _MALI_KBASE_ARBITER_PM_H_ + +#include "mali_kbase_arbif.h" + +/** + * enum kbase_vm_state - Current PM Arbitration state. + * + * @KBASE_VM_STATE_INITIALIZING: Special state before arbiter is initialized. + * @KBASE_VM_STATE_INITIALIZING_WITH_GPU: Initialization after GPU + * has been granted. + * @KBASE_VM_STATE_SUSPENDED: KBase is suspended by OS and GPU is not assigned. + * @KBASE_VM_STATE_STOPPED: GPU is not assigned to KBase and is not required. + * @KBASE_VM_STATE_STOPPED_GPU_REQUESTED: GPU is not assigned to KBase + * but a request has been made. + * @KBASE_VM_STATE_STARTING: GPU is assigned and KBase is getting ready to run. + * @KBASE_VM_STATE_IDLE: GPU is assigned but KBase has no work to do + * @KBASE_VM_STATE_ACTIVE: GPU is assigned and KBase is busy using it + * @KBASE_VM_STATE_SUSPEND_PENDING: OS is going into suspend mode. + * @KBASE_VM_STATE_SUSPEND_WAIT_FOR_GRANT: OS is going into suspend mode but GPU + * has already been requested. + * In this situation we must wait for + * the Arbiter to send a GRANTED message + * and respond immediately with + * a STOPPED message before entering + * the suspend mode. + * @KBASE_VM_STATE_STOPPING_IDLE: Arbiter has sent a stopped message and there + * is currently no work to do on the GPU. + * @KBASE_VM_STATE_STOPPING_ACTIVE: Arbiter has sent a stopped message when + * KBase has work to do. + */ +enum kbase_vm_state { + KBASE_VM_STATE_INITIALIZING, + KBASE_VM_STATE_INITIALIZING_WITH_GPU, + KBASE_VM_STATE_SUSPENDED, + KBASE_VM_STATE_STOPPED, + KBASE_VM_STATE_STOPPED_GPU_REQUESTED, + KBASE_VM_STATE_STARTING, + KBASE_VM_STATE_IDLE, + KBASE_VM_STATE_ACTIVE, + KBASE_VM_STATE_SUSPEND_PENDING, + KBASE_VM_STATE_SUSPEND_WAIT_FOR_GRANT, + KBASE_VM_STATE_STOPPING_IDLE, + KBASE_VM_STATE_STOPPING_ACTIVE +}; + +/** + * kbase_arbiter_pm_early_init() - Initialize arbiter for VM Paravirtualized use + * @kbdev: The kbase device structure for the device (must be a valid pointer) + * + * Initialize the arbiter and other required resources during the runtime + * and request the GPU for the VM for the first time. + * + * Return: 0 if successful, otherwise a standard Linux error code + */ +int kbase_arbiter_pm_early_init(struct kbase_device *kbdev); + +/** + * kbase_arbiter_pm_early_term() - Shutdown arbiter and free resources. + * @kbdev: The kbase device structure for the device (must be a valid pointer) + * + * Clean up all the resources + */ +void kbase_arbiter_pm_early_term(struct kbase_device *kbdev); + +/** + * kbase_arbiter_pm_release_interrupts() - Release the GPU interrupts + * @kbdev: The kbase device structure for the device (must be a valid pointer) + * + * Releases interrupts if needed (GPU is available) otherwise does nothing + */ +void kbase_arbiter_pm_release_interrupts(struct kbase_device *kbdev); + +/** + * kbase_arbiter_pm_vm_event() - Dispatch VM event to the state machine + * @kbdev: The kbase device structure for the device (must be a valid pointer) + * + * The state machine function. Receives events and transitions states + * according the event received and the current state + */ +void kbase_arbiter_pm_vm_event(struct kbase_device *kbdev, + enum kbase_arbif_evt event); + +/** + * kbase_arbiter_pm_ctx_active_handle_suspend() - Handle suspend operation for + * arbitration mode + * @kbdev: The kbase device structure for the device (must be a valid pointer) + * @suspend_handler: The handler code for how to handle a suspend + * that might occur + * + * This function handles a suspend event from the driver, + * communicating with the arbiter and waiting synchronously for the GPU + * to be granted again depending on the VM state. + * + * Return: 0 if success, 1 if failure due to system suspending/suspended + */ +int kbase_arbiter_pm_ctx_active_handle_suspend(struct kbase_device *kbdev, + enum kbase_pm_suspend_handler suspend_handler); + + +/** + * kbase_arbiter_pm_vm_stopped() - Handle stop event for the VM + * @kbdev: The kbase device structure for the device (must be a valid pointer) + * + * This function handles a stop event for the VM. + * It will update the VM state and forward the stop event to the driver. + */ +void kbase_arbiter_pm_vm_stopped(struct kbase_device *kbdev); + +#endif /*_MALI_KBASE_ARBITER_PM_H_ */ diff --git a/drivers/gpu/arm/bifrost/backend/gpu/Kbuild b/drivers/gpu/arm/bifrost/backend/gpu/Kbuild index 21bc56f6a936..f0090e97dd48 100644 --- a/drivers/gpu/arm/bifrost/backend/gpu/Kbuild +++ b/drivers/gpu/arm/bifrost/backend/gpu/Kbuild @@ -1,5 +1,5 @@ # -# (C) COPYRIGHT 2014-2018 ARM Limited. All rights reserved. +# (C) COPYRIGHT 2014-2019 ARM Limited. All rights reserved. # # This program is free software and is provided to you under the terms of the # GNU General Public License version 2 as published by the Free Software @@ -22,16 +22,10 @@ BACKEND += \ backend/gpu/mali_kbase_cache_policy_backend.c \ backend/gpu/mali_kbase_device_hw.c \ - backend/gpu/mali_kbase_gpu.c \ backend/gpu/mali_kbase_gpuprops_backend.c \ - backend/gpu/mali_kbase_debug_job_fault_backend.c \ backend/gpu/mali_kbase_irq_linux.c \ backend/gpu/mali_kbase_instr_backend.c \ - backend/gpu/mali_kbase_jm_as.c \ - backend/gpu/mali_kbase_jm_hw.c \ - backend/gpu/mali_kbase_jm_rb.c \ backend/gpu/mali_kbase_js_backend.c \ - backend/gpu/mali_kbase_mmu_hw_direct.c \ backend/gpu/mali_kbase_pm_backend.c \ backend/gpu/mali_kbase_pm_driver.c \ backend/gpu/mali_kbase_pm_metrics.c \ @@ -42,6 +36,16 @@ BACKEND += \ backend/gpu/mali_kbase_time.c \ backend/gpu/mali_kbase_l2_mmu_config.c +ifeq ($(MALI_USE_CSF),1) +# empty +else + BACKEND += \ + backend/gpu/mali_kbase_jm_as.c \ + backend/gpu/mali_kbase_debug_job_fault_backend.c \ + backend/gpu/mali_kbase_jm_hw.c \ + backend/gpu/mali_kbase_jm_rb.c +endif + ifeq ($(MALI_CUSTOMER_RELEASE),0) BACKEND += \ backend/gpu/mali_kbase_pm_always_on_demand.c diff --git a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_debug_job_fault_backend.c b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_debug_job_fault_backend.c index 450f6e750a0c..b05844ef4f50 100644 --- a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_debug_job_fault_backend.c +++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_debug_job_fault_backend.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2012-2015,2018 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2012-2015,2018-2019 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -70,6 +70,8 @@ static int mmu_reg_snapshot[] = { static int as_reg_snapshot[] = { AS_TRANSTAB_LO, AS_TRANSTAB_HI, + AS_TRANSCFG_LO, + AS_TRANSCFG_HI, AS_MEMATTR_LO, AS_MEMATTR_HI, AS_FAULTSTATUS, diff --git a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_devfreq.c b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_devfreq.c index aac9172537cd..35362c37e661 100644 --- a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_devfreq.c +++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_devfreq.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2014-2019 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2020 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -21,7 +21,7 @@ */ #include -#include +#include #include #include diff --git a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_device_hw.c b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_device_hw.c index 958d8d5c58be..55cff3e2f5b6 100644 --- a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_device_hw.c +++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_device_hw.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2014-2016, 2018-2019 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2016, 2018-2020 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -25,15 +25,15 @@ * */ #include +#include #include #include #include -#include #include +#include #if !defined(CONFIG_MALI_BIFROST_NO_MALI) - #ifdef CONFIG_DEBUG_FS int kbase_io_history_resize(struct kbase_io_history *h, u16 new_size) @@ -143,8 +143,8 @@ void kbase_io_history_dump(struct kbase_device *kbdev) &h->buf[(h->count - iters + i) % h->size]; char const access = (io->addr & 1) ? 'w' : 'r'; - dev_err(kbdev->dev, "%6i: %c: reg 0x%p val %08x\n", i, access, - (void *)(io->addr & ~0x1), io->value); + dev_err(kbdev->dev, "%6i: %c: reg 0x%016lx val %08x\n", i, + access, (unsigned long)(io->addr & ~0x1), io->value); } spin_unlock_irqrestore(&h->lock, flags); @@ -190,6 +190,15 @@ u32 kbase_reg_read(struct kbase_device *kbdev, u32 offset) } KBASE_EXPORT_TEST_API(kbase_reg_read); + +bool kbase_is_gpu_lost(struct kbase_device *kbdev) +{ + u32 val; + + val = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_ID)); + + return val == 0; +} #endif /* !defined(CONFIG_MALI_BIFROST_NO_MALI) */ /** @@ -203,23 +212,19 @@ KBASE_EXPORT_TEST_API(kbase_reg_read); */ static void kbase_report_gpu_fault(struct kbase_device *kbdev, int multiple) { - u32 gpu_id = kbdev->gpu_props.props.raw_props.gpu_id; - u32 status = kbase_reg_read(kbdev, - GPU_CONTROL_REG(GPU_FAULTSTATUS)); + u32 status = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_FAULTSTATUS)); u64 address = (u64) kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_FAULTADDRESS_HI)) << 32; address |= kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_FAULTADDRESS_LO)); - if ((gpu_id & GPU_ID2_PRODUCT_MODEL) != GPU_ID2_PRODUCT_TULX) { - dev_warn(kbdev->dev, "GPU Fault 0x%08x (%s) at 0x%016llx", - status, - kbase_exception_name(kbdev, status & 0xFF), - address); - if (multiple) - dev_warn(kbdev->dev, "There were multiple GPU faults - some have not been reported\n"); - } + dev_warn(kbdev->dev, "GPU Fault 0x%08x (%s) at 0x%016llx", + status, + kbase_gpu_exception_name(status & 0xFF), + address); + if (multiple) + dev_warn(kbdev->dev, "There were multiple GPU faults - some have not been reported\n"); } static bool kbase_gpu_fault_interrupt(struct kbase_device *kbdev, int multiple) @@ -249,7 +254,7 @@ void kbase_gpu_start_cache_clean_nolock(struct kbase_device *kbdev) kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), irq_mask | CLEAN_CACHES_COMPLETED); - KBASE_TRACE_ADD(kbdev, CORE_GPU_CLEAN_INV_CACHES, NULL, NULL, 0u, 0); + KBASE_KTRACE_ADD(kbdev, CORE_GPU_CLEAN_INV_CACHES, NULL, 0); kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND), GPU_COMMAND_CLEAN_INV_CACHES); @@ -284,7 +289,7 @@ static void kbase_clean_caches_done(struct kbase_device *kbdev) if (kbdev->cache_clean_queued) { kbdev->cache_clean_queued = false; - KBASE_TRACE_ADD(kbdev, CORE_GPU_CLEAN_INV_CACHES, NULL, NULL, 0u, 0); + KBASE_KTRACE_ADD(kbdev, CORE_GPU_CLEAN_INV_CACHES, NULL, 0); kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND), GPU_COMMAND_CLEAN_INV_CACHES); } else { @@ -299,25 +304,45 @@ static void kbase_clean_caches_done(struct kbase_device *kbdev) spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); } -void kbase_gpu_wait_cache_clean(struct kbase_device *kbdev) +static inline bool get_cache_clean_flag(struct kbase_device *kbdev) { + bool cache_clean_in_progress; unsigned long flags; spin_lock_irqsave(&kbdev->hwaccess_lock, flags); - while (kbdev->cache_clean_in_progress) { - spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + cache_clean_in_progress = kbdev->cache_clean_in_progress; + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + + return cache_clean_in_progress; +} + +void kbase_gpu_wait_cache_clean(struct kbase_device *kbdev) +{ + while (get_cache_clean_flag(kbdev)) { wait_event_interruptible(kbdev->cache_clean_wait, !kbdev->cache_clean_in_progress); - spin_lock_irqsave(&kbdev->hwaccess_lock, flags); } - spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); +} + +int kbase_gpu_wait_cache_clean_timeout(struct kbase_device *kbdev, + unsigned int wait_timeout_ms) +{ + long remaining = msecs_to_jiffies(wait_timeout_ms); + + while (remaining && get_cache_clean_flag(kbdev)) { + remaining = wait_event_timeout(kbdev->cache_clean_wait, + !kbdev->cache_clean_in_progress, + remaining); + } + + return (remaining ? 0 : -ETIMEDOUT); } void kbase_gpu_interrupt(struct kbase_device *kbdev, u32 val) { bool clear_gpu_fault = false; - KBASE_TRACE_ADD(kbdev, CORE_GPU_IRQ, NULL, NULL, 0u, val); + KBASE_KTRACE_ADD(kbdev, CORE_GPU_IRQ, NULL, val); if (val & GPU_FAULT) clear_gpu_fault = kbase_gpu_fault_interrupt(kbdev, val & MULTIPLE_GPU_FAULTS); @@ -328,7 +353,7 @@ void kbase_gpu_interrupt(struct kbase_device *kbdev, u32 val) if (val & PRFCNT_SAMPLE_COMPLETED) kbase_instr_hwcnt_sample_done(kbdev); - KBASE_TRACE_ADD(kbdev, CORE_GPU_IRQ_CLEAR, NULL, NULL, 0u, val); + KBASE_KTRACE_ADD(kbdev, CORE_GPU_IRQ_CLEAR, NULL, val); kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_CLEAR), val); /* kbase_pm_check_transitions (called by kbase_pm_power_changed) must @@ -358,5 +383,5 @@ void kbase_gpu_interrupt(struct kbase_device *kbdev, u32 val) } - KBASE_TRACE_ADD(kbdev, CORE_GPU_IRQ_DONE, NULL, NULL, 0u, val); + KBASE_KTRACE_ADD(kbdev, CORE_GPU_IRQ_DONE, NULL, val); } diff --git a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_device_internal.h b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_device_internal.h index c62f1e57d795..2e1d0112172e 100644 --- a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_device_internal.h +++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_device_internal.h @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2014,2019 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014,2019-2020 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -50,6 +50,18 @@ void kbase_reg_write(struct kbase_device *kbdev, u32 offset, u32 value); */ u32 kbase_reg_read(struct kbase_device *kbdev, u32 offset); +/** + * kbase_is_gpu_lost() - Has the GPU been lost. + * @kbdev: Kbase device pointer + * + * This function will return true if the GPU has been lost. + * When this happens register reads will be zero. A zero GPU_ID is + * invalid so this is used to detect GPU_LOST + * + * Return: True if GPU LOST + */ +bool kbase_is_gpu_lost(struct kbase_device *kbdev); + /** * kbase_gpu_start_cache_clean - Start a cache clean * @kbdev: Kbase device @@ -76,6 +88,22 @@ void kbase_gpu_start_cache_clean_nolock(struct kbase_device *kbdev); */ void kbase_gpu_wait_cache_clean(struct kbase_device *kbdev); +/** + * kbase_gpu_wait_cache_clean_timeout - Wait for certain time for cache + * cleaning to finish + * @kbdev: Kbase device + * @wait_timeout_ms: Time, in milli seconds, to wait for cache clean to complete. + * + * This function will take hwaccess_lock, and may sleep. This is supposed to be + * called from paths (like GPU reset) where an indefinite wait for the completion + * of cache clean operation can cause deadlock, as the operation may never + * complete. + * + * Return: 0 if successful or a negative error code on failure. + */ +int kbase_gpu_wait_cache_clean_timeout(struct kbase_device *kbdev, + unsigned int wait_timeout_ms); + /** * kbase_gpu_cache_clean_wait_complete - Called after the cache cleaning is * finished. Would also be called after diff --git a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_gpu.c b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_gpu.c deleted file mode 100644 index 9c8c856af44c..000000000000 --- a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_gpu.c +++ /dev/null @@ -1,162 +0,0 @@ -/* - * - * (C) COPYRIGHT 2014-2019 ARM Limited. All rights reserved. - * - * This program is free software and is provided to you under the terms of the - * GNU General Public License version 2 as published by the Free Software - * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, you can access it online at - * http://www.gnu.org/licenses/gpl-2.0.html. - * - * SPDX-License-Identifier: GPL-2.0 - * - */ - - -/* - * Register-based HW access backend APIs - */ -#include -#include -#include -#include -#include -#include -#include - -int kbase_backend_early_init(struct kbase_device *kbdev) -{ - int err; - - err = kbasep_platform_device_init(kbdev); - if (err) - return err; - - err = kbase_pm_runtime_init(kbdev); - if (err) - goto fail_runtime_pm; - - /* Ensure we can access the GPU registers */ - kbase_pm_register_access_enable(kbdev); - - /* Find out GPU properties based on the GPU feature registers */ - kbase_gpuprops_set(kbdev); - - /* We're done accessing the GPU registers for now. */ - kbase_pm_register_access_disable(kbdev); - - err = kbase_install_interrupts(kbdev); - if (err) - goto fail_interrupts; - - return 0; - -fail_interrupts: - kbase_pm_runtime_term(kbdev); -fail_runtime_pm: - kbasep_platform_device_term(kbdev); - - return err; -} - -void kbase_backend_early_term(struct kbase_device *kbdev) -{ - kbase_release_interrupts(kbdev); - kbase_pm_runtime_term(kbdev); - kbasep_platform_device_term(kbdev); -} - -int kbase_backend_late_init(struct kbase_device *kbdev) -{ - int err; - - err = kbase_hwaccess_pm_init(kbdev); - if (err) - return err; - - err = kbase_reset_gpu_init(kbdev); - if (err) - goto fail_reset_gpu_init; - - err = kbase_hwaccess_pm_powerup(kbdev, PM_HW_ISSUES_DETECT); - if (err) - goto fail_pm_powerup; - - err = kbase_backend_timer_init(kbdev); - if (err) - goto fail_timer; - -#ifdef CONFIG_MALI_BIFROST_DEBUG -#ifndef CONFIG_MALI_BIFROST_NO_MALI - if (kbasep_common_test_interrupt_handlers(kbdev) != 0) { - dev_err(kbdev->dev, "Interrupt assigment check failed.\n"); - err = -EINVAL; - goto fail_interrupt_test; - } -#endif /* !CONFIG_MALI_BIFROST_NO_MALI */ -#endif /* CONFIG_MALI_BIFROST_DEBUG */ - - err = kbase_job_slot_init(kbdev); - if (err) - goto fail_job_slot; - - /* Do the initialisation of devfreq. - * Devfreq needs backend_timer_init() for completion of its - * initialisation and it also needs to catch the first callback - * occurence of the runtime_suspend event for maintaining state - * coherence with the backend power management, hence needs to be - * placed before the kbase_pm_context_idle(). - */ - err = kbase_backend_devfreq_init(kbdev); - if (err) - goto fail_devfreq_init; - - /* Idle the GPU and/or cores, if the policy wants it to */ - kbase_pm_context_idle(kbdev); - - /* Update gpuprops with L2_FEATURES if applicable */ - kbase_gpuprops_update_l2_features(kbdev); - - init_waitqueue_head(&kbdev->hwaccess.backend.reset_wait); - - return 0; - -fail_devfreq_init: - kbase_job_slot_term(kbdev); -fail_job_slot: - -#ifdef CONFIG_MALI_BIFROST_DEBUG -#ifndef CONFIG_MALI_BIFROST_NO_MALI -fail_interrupt_test: -#endif /* !CONFIG_MALI_BIFROST_NO_MALI */ -#endif /* CONFIG_MALI_BIFROST_DEBUG */ - - kbase_backend_timer_term(kbdev); -fail_timer: - kbase_hwaccess_pm_halt(kbdev); -fail_pm_powerup: - kbase_reset_gpu_term(kbdev); -fail_reset_gpu_init: - kbase_hwaccess_pm_term(kbdev); - - return err; -} - -void kbase_backend_late_term(struct kbase_device *kbdev) -{ - kbase_backend_devfreq_term(kbdev); - kbase_job_slot_halt(kbdev); - kbase_job_slot_term(kbdev); - kbase_backend_timer_term(kbdev); - kbase_hwaccess_pm_halt(kbdev); - kbase_reset_gpu_term(kbdev); - kbase_hwaccess_pm_term(kbdev); -} diff --git a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_gpuprops_backend.c b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_gpuprops_backend.c index 29018b22f52a..352afa11907a 100644 --- a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_gpuprops_backend.c +++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_gpuprops_backend.c @@ -1,6 +1,7 @@ +// SPDX-License-Identifier: GPL-2.0 /* * - * (C) COPYRIGHT 2014-2019 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2020 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -20,8 +21,6 @@ * */ - - /* * Base kernel property query backend APIs */ @@ -31,79 +30,93 @@ #include #include -void kbase_backend_gpuprops_get(struct kbase_device *kbdev, +int kbase_backend_gpuprops_get(struct kbase_device *kbdev, struct kbase_gpuprops_regdump *regdump) { int i; + struct kbase_gpuprops_regdump registers; /* Fill regdump with the content of the relevant registers */ - regdump->gpu_id = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_ID)); + registers.gpu_id = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_ID)); - regdump->l2_features = kbase_reg_read(kbdev, + registers.l2_features = kbase_reg_read(kbdev, GPU_CONTROL_REG(L2_FEATURES)); - regdump->core_features = kbase_reg_read(kbdev, + registers.core_features = kbase_reg_read(kbdev, GPU_CONTROL_REG(CORE_FEATURES)); - regdump->tiler_features = kbase_reg_read(kbdev, + registers.tiler_features = kbase_reg_read(kbdev, GPU_CONTROL_REG(TILER_FEATURES)); - regdump->mem_features = kbase_reg_read(kbdev, + registers.mem_features = kbase_reg_read(kbdev, GPU_CONTROL_REG(MEM_FEATURES)); - regdump->mmu_features = kbase_reg_read(kbdev, + registers.mmu_features = kbase_reg_read(kbdev, GPU_CONTROL_REG(MMU_FEATURES)); - regdump->as_present = kbase_reg_read(kbdev, + registers.as_present = kbase_reg_read(kbdev, GPU_CONTROL_REG(AS_PRESENT)); - regdump->js_present = kbase_reg_read(kbdev, + registers.js_present = kbase_reg_read(kbdev, GPU_CONTROL_REG(JS_PRESENT)); for (i = 0; i < GPU_MAX_JOB_SLOTS; i++) - regdump->js_features[i] = kbase_reg_read(kbdev, + registers.js_features[i] = kbase_reg_read(kbdev, GPU_CONTROL_REG(JS_FEATURES_REG(i))); for (i = 0; i < BASE_GPU_NUM_TEXTURE_FEATURES_REGISTERS; i++) - regdump->texture_features[i] = kbase_reg_read(kbdev, + registers.texture_features[i] = kbase_reg_read(kbdev, GPU_CONTROL_REG(TEXTURE_FEATURES_REG(i))); - regdump->thread_max_threads = kbase_reg_read(kbdev, + registers.thread_max_threads = kbase_reg_read(kbdev, GPU_CONTROL_REG(THREAD_MAX_THREADS)); - regdump->thread_max_workgroup_size = kbase_reg_read(kbdev, + registers.thread_max_workgroup_size = kbase_reg_read(kbdev, GPU_CONTROL_REG(THREAD_MAX_WORKGROUP_SIZE)); - regdump->thread_max_barrier_size = kbase_reg_read(kbdev, + registers.thread_max_barrier_size = kbase_reg_read(kbdev, GPU_CONTROL_REG(THREAD_MAX_BARRIER_SIZE)); - regdump->thread_features = kbase_reg_read(kbdev, + registers.thread_features = kbase_reg_read(kbdev, GPU_CONTROL_REG(THREAD_FEATURES)); - regdump->thread_tls_alloc = kbase_reg_read(kbdev, + registers.thread_tls_alloc = kbase_reg_read(kbdev, GPU_CONTROL_REG(THREAD_TLS_ALLOC)); - regdump->shader_present_lo = kbase_reg_read(kbdev, + registers.shader_present_lo = kbase_reg_read(kbdev, GPU_CONTROL_REG(SHADER_PRESENT_LO)); - regdump->shader_present_hi = kbase_reg_read(kbdev, + registers.shader_present_hi = kbase_reg_read(kbdev, GPU_CONTROL_REG(SHADER_PRESENT_HI)); - regdump->tiler_present_lo = kbase_reg_read(kbdev, + registers.tiler_present_lo = kbase_reg_read(kbdev, GPU_CONTROL_REG(TILER_PRESENT_LO)); - regdump->tiler_present_hi = kbase_reg_read(kbdev, + registers.tiler_present_hi = kbase_reg_read(kbdev, GPU_CONTROL_REG(TILER_PRESENT_HI)); - regdump->l2_present_lo = kbase_reg_read(kbdev, + registers.l2_present_lo = kbase_reg_read(kbdev, GPU_CONTROL_REG(L2_PRESENT_LO)); - regdump->l2_present_hi = kbase_reg_read(kbdev, + registers.l2_present_hi = kbase_reg_read(kbdev, GPU_CONTROL_REG(L2_PRESENT_HI)); - regdump->stack_present_lo = kbase_reg_read(kbdev, + registers.stack_present_lo = kbase_reg_read(kbdev, GPU_CONTROL_REG(STACK_PRESENT_LO)); - regdump->stack_present_hi = kbase_reg_read(kbdev, + registers.stack_present_hi = kbase_reg_read(kbdev, GPU_CONTROL_REG(STACK_PRESENT_HI)); + + if (!kbase_is_gpu_lost(kbdev)) { + *regdump = registers; + return 0; + } else + return -EIO; } -void kbase_backend_gpuprops_get_features(struct kbase_device *kbdev, +int kbase_backend_gpuprops_get_features(struct kbase_device *kbdev, struct kbase_gpuprops_regdump *regdump) { if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_COHERENCY_REG)) { + u32 coherency_features; + /* Ensure we can access the GPU registers */ kbase_pm_register_access_enable(kbdev); - regdump->coherency_features = kbase_reg_read(kbdev, + coherency_features = kbase_reg_read(kbdev, GPU_CONTROL_REG(COHERENCY_FEATURES)); + if (kbase_is_gpu_lost(kbdev)) + return -EIO; + + regdump->coherency_features = coherency_features; + /* We're done accessing the GPU registers for now. */ kbase_pm_register_access_disable(kbdev); } else { @@ -112,14 +125,22 @@ void kbase_backend_gpuprops_get_features(struct kbase_device *kbdev, COHERENCY_FEATURE_BIT(COHERENCY_NONE) | COHERENCY_FEATURE_BIT(COHERENCY_ACE_LITE); } + + return 0; } -void kbase_backend_gpuprops_get_l2_features(struct kbase_device *kbdev, +int kbase_backend_gpuprops_get_l2_features(struct kbase_device *kbdev, struct kbase_gpuprops_regdump *regdump) { if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_L2_CONFIG)) { - regdump->l2_features = kbase_reg_read(kbdev, + u32 l2_features = kbase_reg_read(kbdev, GPU_CONTROL_REG(L2_FEATURES)); - } -} + if (kbase_is_gpu_lost(kbdev)) + return -EIO; + + regdump->l2_features = l2_features; + } + + return 0; +} diff --git a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_instr_backend.c b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_instr_backend.c index 4e216d32257e..8b320c7ba950 100644 --- a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_instr_backend.c +++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_instr_backend.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2014-2019 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2020 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -27,11 +27,12 @@ */ #include -#include +#include #include #include #include + int kbase_instr_hwcnt_enable_internal(struct kbase_device *kbdev, struct kbase_context *kctx, struct kbase_instr_hwcnt_enable *enable) @@ -70,7 +71,11 @@ int kbase_instr_hwcnt_enable_internal(struct kbase_device *kbdev, /* Configure */ prfcnt_config = kctx->as_nr << PRFCNT_CONFIG_AS_SHIFT; +#ifdef CONFIG_MALI_PRFCNT_SET_SECONDARY_VIA_DEBUG_FS + if (kbdev->hwcnt.backend.use_secondary_override) +#else if (enable->use_secondary) +#endif prfcnt_config |= 1 << PRFCNT_CONFIG_SETSELECT_SHIFT; kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_CONFIG), @@ -80,16 +85,17 @@ int kbase_instr_hwcnt_enable_internal(struct kbase_device *kbdev, enable->dump_buffer & 0xFFFFFFFF); kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_BASE_HI), enable->dump_buffer >> 32); + kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_JM_EN), enable->jm_bm); + kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_SHADER_EN), enable->shader_bm); kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_MMU_L2_EN), enable->mmu_l2_bm); - /* Due to PRLAM-8186 we need to disable the Tiler before we enable the - * HW counter dump. */ + kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_TILER_EN), - enable->tiler_bm); + enable->tiler_bm); kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_CONFIG), prfcnt_config | PRFCNT_CONFIG_MODE_MANUAL); @@ -198,6 +204,7 @@ int kbase_instr_hwcnt_request_dump(struct kbase_context *kctx) */ kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_DUMPING; + /* Reconfigure the dump address */ kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_BASE_LO), kbdev->hwcnt.addr & 0xFFFFFFFF); @@ -205,8 +212,9 @@ int kbase_instr_hwcnt_request_dump(struct kbase_context *kctx) kbdev->hwcnt.addr >> 32); /* Start dumping */ - KBASE_TRACE_ADD(kbdev, CORE_GPU_PRFCNT_SAMPLE, NULL, NULL, - kbdev->hwcnt.addr, 0); + KBASE_KTRACE_ADD(kbdev, CORE_GPU_PRFCNT_SAMPLE, NULL, + kbdev->hwcnt.addr); + kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND), GPU_COMMAND_PRFCNT_SAMPLE); @@ -216,6 +224,8 @@ int kbase_instr_hwcnt_request_dump(struct kbase_context *kctx) unlock: spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); + + return err; } KBASE_EXPORT_SYMBOL(kbase_instr_hwcnt_request_dump); @@ -277,6 +287,7 @@ void kbasep_cache_clean_worker(struct work_struct *data) spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); } + void kbase_instr_hwcnt_sample_done(struct kbase_device *kbdev) { unsigned long flags; @@ -348,7 +359,7 @@ int kbase_instr_hwcnt_clear(struct kbase_context *kctx) goto out; /* Clear the counters */ - KBASE_TRACE_ADD(kbdev, CORE_GPU_PRFCNT_CLEAR, NULL, NULL, 0u, 0); + KBASE_KTRACE_ADD(kbdev, CORE_GPU_PRFCNT_CLEAR, NULL, 0); kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND), GPU_COMMAND_PRFCNT_CLEAR); @@ -369,8 +380,14 @@ int kbase_instr_backend_init(struct kbase_device *kbdev) init_waitqueue_head(&kbdev->hwcnt.backend.wait); INIT_WORK(&kbdev->hwcnt.backend.cache_clean_work, kbasep_cache_clean_worker); + + kbdev->hwcnt.backend.triggered = 0; +#ifdef CONFIG_MALI_PRFCNT_SET_SECONDARY_VIA_DEBUG_FS + kbdev->hwcnt.backend.use_secondary_override = false; +#endif + kbdev->hwcnt.backend.cache_clean_wq = alloc_workqueue("Mali cache cleaning workqueue", 0, 1); if (NULL == kbdev->hwcnt.backend.cache_clean_wq) @@ -383,3 +400,12 @@ void kbase_instr_backend_term(struct kbase_device *kbdev) { destroy_workqueue(kbdev->hwcnt.backend.cache_clean_wq); } + +#ifdef CONFIG_MALI_PRFCNT_SET_SECONDARY_VIA_DEBUG_FS +void kbase_instr_backend_debugfs_init(struct kbase_device *kbdev) +{ + debugfs_create_bool("hwcnt_use_secondary", S_IRUGO | S_IWUSR, + kbdev->mali_debugfs_directory, + &kbdev->hwcnt.backend.use_secondary_override); +} +#endif diff --git a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_instr_defs.h b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_instr_defs.h index c9fb7593a936..99309685c4ff 100644 --- a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_instr_defs.h +++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_instr_defs.h @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2014, 2016, 2018 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014, 2016, 2018, 2019-2020 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -47,6 +47,9 @@ enum kbase_instr_state { struct kbase_instr_backend { wait_queue_head_t wait; int triggered; +#ifdef CONFIG_MALI_PRFCNT_SET_SECONDARY_VIA_DEBUG_FS + bool use_secondary_override; +#endif enum kbase_instr_state state; struct workqueue_struct *cache_clean_wq; diff --git a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_irq_linux.c b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_irq_linux.c index 4bbeecbb4834..8cf3c1e6790e 100644 --- a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_irq_linux.c +++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_irq_linux.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2014-2016,2018-2019 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2016,2018-2020 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -79,8 +79,6 @@ static irqreturn_t kbase_job_irq_handler(int irq, void *data) return IRQ_HANDLED; } -KBASE_EXPORT_TEST_API(kbase_job_irq_handler); - static irqreturn_t kbase_mmu_irq_handler(int irq, void *data) { unsigned long flags; @@ -153,8 +151,6 @@ static irqreturn_t kbase_gpu_irq_handler(int irq, void *data) return IRQ_HANDLED; } -KBASE_EXPORT_TEST_API(kbase_gpu_irq_handler); - static irq_handler_t kbase_handler_table[] = { [JOB_IRQ_TAG] = kbase_job_irq_handler, [MMU_IRQ_TAG] = kbase_mmu_irq_handler, @@ -166,6 +162,35 @@ static irq_handler_t kbase_handler_table[] = { #define MMU_IRQ_HANDLER MMU_IRQ_TAG #define GPU_IRQ_HANDLER GPU_IRQ_TAG +/** + * kbase_gpu_irq_test_handler - Variant (for test) of kbase_gpu_irq_handler() + * @irq: IRQ number + * @data: Data associated with this IRQ (i.e. kbdev) + * @val: Value of the GPU_CONTROL_REG(GPU_IRQ_STATUS) + * + * Handle the GPU device interrupt source requests reflected in the + * given source bit-pattern. The test code caller is responsible for + * undertaking the required device power maintenace. + * + * Return: IRQ_HANDLED if the requests are from the GPU device, + * IRQ_NONE otherwise + */ +irqreturn_t kbase_gpu_irq_test_handler(int irq, void *data, u32 val) +{ + struct kbase_device *kbdev = kbase_untag(data); + + if (!val) + return IRQ_NONE; + + dev_dbg(kbdev->dev, "%s: irq %d irqstatus 0x%x\n", __func__, irq, val); + + kbase_gpu_interrupt(kbdev, val); + + return IRQ_HANDLED; +} + +KBASE_EXPORT_TEST_API(kbase_gpu_irq_test_handler); + /** * kbase_set_custom_irq_handler - Set a custom IRQ handler * @kbdev: Device for which the handler is to be registered diff --git a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_jm_as.c b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_jm_as.c index c8153ba4c121..bb4f548e9a4d 100644 --- a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_jm_as.c +++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_jm_as.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2014-2018 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2020 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -148,8 +148,7 @@ int kbase_backend_find_and_release_free_address_space( */ if (as_kctx && !kbase_ctx_flag(as_kctx, KCTX_PRIVILEGED) && atomic_read(&as_kctx->refcount) == 1) { - if (!kbasep_js_runpool_retain_ctx_nolock(kbdev, - as_kctx)) { + if (!kbase_ctx_sched_inc_refcount_nolock(as_kctx)) { WARN(1, "Failed to retain active context\n"); spin_unlock_irqrestore(&kbdev->hwaccess_lock, @@ -236,7 +235,7 @@ bool kbase_backend_use_ctx(struct kbase_device *kbdev, if (kbase_ctx_flag(kctx, KCTX_PRIVILEGED)) { /* We need to retain it to keep the corresponding address space */ - kbasep_js_runpool_retain_ctx_nolock(kbdev, kctx); + kbase_ctx_sched_retain_ctx_refcount(kctx); } return true; diff --git a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_jm_defs.h b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_jm_defs.h index b4d2ae1cc4e8..7cda61ac6cdb 100644 --- a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_jm_defs.h +++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_jm_defs.h @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2014-2016, 2018 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2016, 2018-2019 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -62,9 +62,6 @@ struct slot_rb { /** * struct kbase_backend_data - GPU backend specific data for HW access layer * @slot_rb: Slot ringbuffers - * @rmu_workaround_flag: When PRLAM-8987 is present, this flag determines - * whether slots 0/1 or slot 2 are currently being - * pulled from * @scheduling_timer: The timer tick used for rescheduling jobs * @timer_running: Is the timer running? The runpool_mutex must be * held whilst modifying this. @@ -83,8 +80,6 @@ struct slot_rb { struct kbase_backend_data { struct slot_rb slot_rb[BASE_JM_MAX_NR_SLOTS]; - bool rmu_workaround_flag; - struct hrtimer scheduling_timer; bool timer_running; diff --git a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_jm_hw.c b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_jm_hw.c index b3093a6014ec..4255e0b373bb 100644 --- a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_jm_hw.c +++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_jm_hw.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2010-2019 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010-2020 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -26,8 +26,9 @@ #include #include -#include -#include +#include +#include +#include #include #include #include @@ -37,17 +38,8 @@ #include #include -#define beenthere(kctx, f, a...) \ - dev_dbg(kctx->kbdev->dev, "%s:" f, __func__, ##a) - static void kbasep_try_reset_gpu_early_locked(struct kbase_device *kbdev); -static inline int kbasep_jm_is_js_free(struct kbase_device *kbdev, int js, - struct kbase_context *kctx) -{ - return !kbase_reg_read(kbdev, JOB_SLOT_REG(js, JS_COMMAND_NEXT)); -} - static u64 kbase_job_write_affinity(struct kbase_device *kbdev, base_jd_core_req core_req, int js) @@ -87,6 +79,17 @@ static u64 kbase_job_write_affinity(struct kbase_device *kbdev, kbdev->pm.debug_core_mask[js]; } + if (unlikely(!affinity)) { +#ifdef CONFIG_MALI_BIFROST_DEBUG + u64 shaders_ready = + kbase_pm_get_ready_cores(kbdev, KBASE_PM_CORE_SHADER); + + WARN_ON(!(shaders_ready & kbdev->pm.backend.shaders_avail)); +#endif + + affinity = kbdev->pm.backend.shaders_avail; + } + kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_AFFINITY_NEXT_LO), affinity & 0xFFFFFFFF); kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_AFFINITY_NEXT_HI), @@ -95,13 +98,90 @@ static u64 kbase_job_write_affinity(struct kbase_device *kbdev, return affinity; } +/** + * select_job_chain() - Select which job chain to submit to the GPU + * @katom: Pointer to the atom about to be submitted to the GPU + * + * Selects one of the fragment job chains attached to the special atom at the + * end of a renderpass, or returns the address of the single job chain attached + * to any other type of atom. + * + * Which job chain is selected depends upon whether the tiling phase of the + * renderpass completed normally or was soft-stopped because it used too + * much memory. It also depends upon whether one of the fragment job chains + * has already been run as part of the same renderpass. + * + * Return: GPU virtual address of the selected job chain + */ +static u64 select_job_chain(struct kbase_jd_atom *katom) +{ + struct kbase_context *const kctx = katom->kctx; + u64 jc = katom->jc; + struct kbase_jd_renderpass *rp; + + lockdep_assert_held(&kctx->kbdev->hwaccess_lock); + + if (!(katom->core_req & BASE_JD_REQ_END_RENDERPASS)) + return jc; + + compiletime_assert((1ull << (sizeof(katom->renderpass_id) * 8)) <= + ARRAY_SIZE(kctx->jctx.renderpasses), + "Should check invalid access to renderpasses"); + + rp = &kctx->jctx.renderpasses[katom->renderpass_id]; + /* We can read a subset of renderpass state without holding + * higher-level locks (but not end_katom, for example). + * If the end-of-renderpass atom is running with as-yet indeterminate + * OOM state then assume that the start atom was not soft-stopped. + */ + switch (rp->state) { + case KBASE_JD_RP_OOM: + /* Tiling ran out of memory. + * Start of incremental rendering, used once. + */ + jc = katom->jc_fragment.norm_read_forced_write; + break; + case KBASE_JD_RP_START: + case KBASE_JD_RP_PEND_OOM: + /* Tiling completed successfully first time. + * Single-iteration rendering, used once. + */ + jc = katom->jc_fragment.norm_read_norm_write; + break; + case KBASE_JD_RP_RETRY_OOM: + /* Tiling ran out of memory again. + * Continuation of incremental rendering, used as + * many times as required. + */ + jc = katom->jc_fragment.forced_read_forced_write; + break; + case KBASE_JD_RP_RETRY: + case KBASE_JD_RP_RETRY_PEND_OOM: + /* Tiling completed successfully this time. + * End of incremental rendering, used once. + */ + jc = katom->jc_fragment.forced_read_norm_write; + break; + default: + WARN_ON(1); + break; + } + + dev_dbg(kctx->kbdev->dev, + "Selected job chain 0x%llx for end atom %p in state %d\n", + jc, (void *)katom, (int)rp->state); + + katom->jc = jc; + return jc; +} + void kbase_job_hw_submit(struct kbase_device *kbdev, struct kbase_jd_atom *katom, int js) { struct kbase_context *kctx; u32 cfg; - u64 jc_head = katom->jc; + u64 const jc_head = select_job_chain(katom); u64 affinity; KBASE_DEBUG_ASSERT(kbdev); @@ -112,6 +192,9 @@ void kbase_job_hw_submit(struct kbase_device *kbdev, /* Command register must be available */ KBASE_DEBUG_ASSERT(kbasep_jm_is_js_free(kbdev, js, kctx)); + dev_dbg(kctx->kbdev->dev, "Write JS_HEAD_NEXT 0x%llx for atom %p\n", + jc_head, (void *)katom); + kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_HEAD_NEXT_LO), jc_head & 0xFFFFFFFF); kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_HEAD_NEXT_HI), @@ -142,8 +225,8 @@ void kbase_job_hw_submit(struct kbase_device *kbdev, cfg |= JS_CONFIG_THREAD_PRI(8); - if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_PROTECTED_MODE) && - (katom->atom_flags & KBASE_KATOM_FLAG_PROTECTED)) + if ((katom->atom_flags & KBASE_KATOM_FLAG_PROTECTED) || + (katom->core_req & BASE_JD_REQ_END_RENDERPASS)) cfg |= JS_CONFIG_DISABLE_DESCRIPTOR_WR_BK; if (kbase_hw_has_feature(kbdev, @@ -175,7 +258,7 @@ void kbase_job_hw_submit(struct kbase_device *kbdev, dev_dbg(kbdev->dev, "JS: Submitting atom %p from ctx %p to js[%d] with head=0x%llx", katom, kctx, js, jc_head); - KBASE_TRACE_ADD_SLOT_INFO(kbdev, JM_SUBMIT, kctx, katom, jc_head, js, + KBASE_KTRACE_ADD_JM_SLOT_INFO(kbdev, JM_SUBMIT, kctx, katom, jc_head, js, (u32)affinity); KBASE_TLSTREAM_AUX_EVENT_JOB_SLOT(kbdev, kctx, @@ -207,6 +290,10 @@ void kbase_job_hw_submit(struct kbase_device *kbdev, kbdev->hwaccess.backend.slot_rb[js].last_context = katom->kctx; } #endif + + trace_sysgraph_gpu(SGR_SUBMIT, kctx->id, + kbase_jd_atom_id(kctx, katom), js); + kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_COMMAND_NEXT), JS_COMMAND_START); } @@ -274,7 +361,7 @@ void kbase_job_done(struct kbase_device *kbdev, u32 done) KBASE_DEBUG_ASSERT(kbdev); - KBASE_TRACE_ADD(kbdev, JM_IRQ, NULL, NULL, 0, done); + KBASE_KTRACE_ADD_JM(kbdev, JM_IRQ, NULL, NULL, 0, done); end_timestamp = ktime_get(); @@ -459,7 +546,7 @@ void kbase_job_done(struct kbase_device *kbdev, u32 done) */ kbasep_try_reset_gpu_early_locked(kbdev); } - KBASE_TRACE_ADD(kbdev, JM_IRQ_END, NULL, NULL, 0, count); + KBASE_KTRACE_ADD_JM(kbdev, JM_IRQ_END, NULL, NULL, 0, count); } void kbasep_job_slot_soft_or_hard_stop_do_action(struct kbase_device *kbdev, @@ -468,7 +555,7 @@ void kbasep_job_slot_soft_or_hard_stop_do_action(struct kbase_device *kbdev, base_jd_core_req core_reqs, struct kbase_jd_atom *target_katom) { -#if KBASE_TRACE_ENABLE +#if KBASE_KTRACE_ENABLE u32 status_reg_before; u64 job_in_head_before; u32 status_reg_after; @@ -496,7 +583,7 @@ void kbasep_job_slot_soft_or_hard_stop_do_action(struct kbase_device *kbdev, /* We are about to issue a soft stop, so mark the atom as having * been soft stopped */ - target_katom->atom_flags |= KBASE_KATOM_FLAG_BEEN_SOFT_STOPPPED; + target_katom->atom_flags |= KBASE_KATOM_FLAG_BEEN_SOFT_STOPPED; /* Mark the point where we issue the soft-stop command */ KBASE_TLSTREAM_TL_EVENT_ATOM_SOFTSTOP_ISSUE(kbdev, target_katom); @@ -524,7 +611,7 @@ void kbasep_job_slot_soft_or_hard_stop_do_action(struct kbase_device *kbdev, kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_COMMAND), action); -#if KBASE_TRACE_ENABLE +#if KBASE_KTRACE_ENABLE status_reg_after = kbase_reg_read(kbdev, JOB_SLOT_REG(js, JS_STATUS)); if (status_reg_after == BASE_JD_EVENT_ACTIVE) { struct kbase_jd_atom *head; @@ -534,36 +621,28 @@ void kbasep_job_slot_soft_or_hard_stop_do_action(struct kbase_device *kbdev, head_kctx = head->kctx; if (status_reg_before == BASE_JD_EVENT_ACTIVE) - KBASE_TRACE_ADD_SLOT(kbdev, JM_CHECK_HEAD, head_kctx, - head, job_in_head_before, js); + KBASE_KTRACE_ADD_JM_SLOT(kbdev, JM_CHECK_HEAD, head_kctx, head, job_in_head_before, js); else - KBASE_TRACE_ADD_SLOT(kbdev, JM_CHECK_HEAD, NULL, NULL, - 0, js); + KBASE_KTRACE_ADD_JM_SLOT(kbdev, JM_CHECK_HEAD, NULL, NULL, 0, js); switch (action) { case JS_COMMAND_SOFT_STOP: - KBASE_TRACE_ADD_SLOT(kbdev, JM_SOFTSTOP, head_kctx, - head, head->jc, js); + KBASE_KTRACE_ADD_JM_SLOT(kbdev, JM_SOFTSTOP, head_kctx, head, head->jc, js); break; case JS_COMMAND_SOFT_STOP_0: - KBASE_TRACE_ADD_SLOT(kbdev, JM_SOFTSTOP_0, head_kctx, - head, head->jc, js); + KBASE_KTRACE_ADD_JM_SLOT(kbdev, JM_SOFTSTOP_0, head_kctx, head, head->jc, js); break; case JS_COMMAND_SOFT_STOP_1: - KBASE_TRACE_ADD_SLOT(kbdev, JM_SOFTSTOP_1, head_kctx, - head, head->jc, js); + KBASE_KTRACE_ADD_JM_SLOT(kbdev, JM_SOFTSTOP_1, head_kctx, head, head->jc, js); break; case JS_COMMAND_HARD_STOP: - KBASE_TRACE_ADD_SLOT(kbdev, JM_HARDSTOP, head_kctx, - head, head->jc, js); + KBASE_KTRACE_ADD_JM_SLOT(kbdev, JM_HARDSTOP, head_kctx, head, head->jc, js); break; case JS_COMMAND_HARD_STOP_0: - KBASE_TRACE_ADD_SLOT(kbdev, JM_HARDSTOP_0, head_kctx, - head, head->jc, js); + KBASE_KTRACE_ADD_JM_SLOT(kbdev, JM_HARDSTOP_0, head_kctx, head, head->jc, js); break; case JS_COMMAND_HARD_STOP_1: - KBASE_TRACE_ADD_SLOT(kbdev, JM_HARDSTOP_1, head_kctx, - head, head->jc, js); + KBASE_KTRACE_ADD_JM_SLOT(kbdev, JM_HARDSTOP_1, head_kctx, head, head->jc, js); break; default: BUG(); @@ -571,36 +650,28 @@ void kbasep_job_slot_soft_or_hard_stop_do_action(struct kbase_device *kbdev, } } else { if (status_reg_before == BASE_JD_EVENT_ACTIVE) - KBASE_TRACE_ADD_SLOT(kbdev, JM_CHECK_HEAD, NULL, NULL, - job_in_head_before, js); + KBASE_KTRACE_ADD_JM_SLOT(kbdev, JM_CHECK_HEAD, NULL, NULL, job_in_head_before, js); else - KBASE_TRACE_ADD_SLOT(kbdev, JM_CHECK_HEAD, NULL, NULL, - 0, js); + KBASE_KTRACE_ADD_JM_SLOT(kbdev, JM_CHECK_HEAD, NULL, NULL, 0, js); switch (action) { case JS_COMMAND_SOFT_STOP: - KBASE_TRACE_ADD_SLOT(kbdev, JM_SOFTSTOP, NULL, NULL, 0, - js); + KBASE_KTRACE_ADD_JM_SLOT(kbdev, JM_SOFTSTOP, NULL, NULL, 0, js); break; case JS_COMMAND_SOFT_STOP_0: - KBASE_TRACE_ADD_SLOT(kbdev, JM_SOFTSTOP_0, NULL, NULL, - 0, js); + KBASE_KTRACE_ADD_JM_SLOT(kbdev, JM_SOFTSTOP_0, NULL, NULL, 0, js); break; case JS_COMMAND_SOFT_STOP_1: - KBASE_TRACE_ADD_SLOT(kbdev, JM_SOFTSTOP_1, NULL, NULL, - 0, js); + KBASE_KTRACE_ADD_JM_SLOT(kbdev, JM_SOFTSTOP_1, NULL, NULL, 0, js); break; case JS_COMMAND_HARD_STOP: - KBASE_TRACE_ADD_SLOT(kbdev, JM_HARDSTOP, NULL, NULL, 0, - js); + KBASE_KTRACE_ADD_JM_SLOT(kbdev, JM_HARDSTOP, NULL, NULL, 0, js); break; case JS_COMMAND_HARD_STOP_0: - KBASE_TRACE_ADD_SLOT(kbdev, JM_HARDSTOP_0, NULL, NULL, - 0, js); + KBASE_KTRACE_ADD_JM_SLOT(kbdev, JM_HARDSTOP_0, NULL, NULL, 0, js); break; case JS_COMMAND_HARD_STOP_1: - KBASE_TRACE_ADD_SLOT(kbdev, JM_HARDSTOP_1, NULL, NULL, - 0, js); + KBASE_KTRACE_ADD_JM_SLOT(kbdev, JM_HARDSTOP_1, NULL, NULL, 0, js); break; default: BUG(); @@ -660,6 +731,70 @@ void kbase_job_slot_ctx_priority_check_locked(struct kbase_context *kctx, } } +static int softstop_start_rp_nolock( + struct kbase_context *kctx, struct kbase_va_region *reg) +{ + struct kbase_device *const kbdev = kctx->kbdev; + struct kbase_jd_atom *katom; + struct kbase_jd_renderpass *rp; + + lockdep_assert_held(&kbdev->hwaccess_lock); + + katom = kbase_gpu_inspect(kbdev, 1, 0); + + if (!katom) { + dev_dbg(kctx->kbdev->dev, "No atom on job slot\n"); + return -ESRCH; + } + + if (!(katom->core_req & BASE_JD_REQ_START_RENDERPASS)) { + dev_dbg(kctx->kbdev->dev, + "Atom %p on job slot is not start RP\n", (void *)katom); + return -EPERM; + } + + compiletime_assert((1ull << (sizeof(katom->renderpass_id) * 8)) <= + ARRAY_SIZE(kctx->jctx.renderpasses), + "Should check invalid access to renderpasses"); + + rp = &kctx->jctx.renderpasses[katom->renderpass_id]; + if (WARN_ON(rp->state != KBASE_JD_RP_START && + rp->state != KBASE_JD_RP_RETRY)) + return -EINVAL; + + dev_dbg(kctx->kbdev->dev, "OOM in state %d with region %p\n", + (int)rp->state, (void *)reg); + + if (WARN_ON(katom != rp->start_katom)) + return -EINVAL; + + dev_dbg(kctx->kbdev->dev, "Adding region %p to list %p\n", + (void *)reg, (void *)&rp->oom_reg_list); + list_move_tail(®->link, &rp->oom_reg_list); + dev_dbg(kctx->kbdev->dev, "Added region to list\n"); + + rp->state = (rp->state == KBASE_JD_RP_START ? + KBASE_JD_RP_PEND_OOM : KBASE_JD_RP_RETRY_PEND_OOM); + + kbase_job_slot_softstop(kbdev, 1, katom); + + return 0; +} + +int kbase_job_slot_softstop_start_rp(struct kbase_context *const kctx, + struct kbase_va_region *const reg) +{ + struct kbase_device *const kbdev = kctx->kbdev; + int err; + unsigned long flags; + + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + err = softstop_start_rp_nolock(kctx, reg); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + + return err; +} + void kbase_jm_wait_for_zero_jobs(struct kbase_context *kctx) { struct kbase_device *kbdev = kctx->kbdev; @@ -749,6 +884,9 @@ KBASE_EXPORT_TEST_API(kbase_job_slot_term); void kbase_job_slot_softstop_swflags(struct kbase_device *kbdev, int js, struct kbase_jd_atom *target_katom, u32 sw_flags) { + dev_dbg(kbdev->dev, "Soft-stop atom %p with flags 0x%x (s:%d)\n", + target_katom, sw_flags, js); + KBASE_DEBUG_ASSERT(!(sw_flags & JS_COMMAND_MASK)); kbase_backend_soft_hard_stop_slot(kbdev, NULL, js, target_katom, JS_COMMAND_SOFT_STOP | sw_flags); @@ -907,7 +1045,7 @@ static void kbasep_reset_timeout_worker(struct work_struct *data) KBASE_RESET_GPU_SILENT) silent = true; - KBASE_TRACE_ADD(kbdev, JM_BEGIN_RESET_WORKER, NULL, NULL, 0u, 0); + KBASE_KTRACE_ADD_JM(kbdev, JM_BEGIN_RESET_WORKER, NULL, NULL, 0u, 0); /* Disable GPU hardware counters. * This call will block until counters are disabled. @@ -1035,7 +1173,7 @@ static void kbasep_reset_timeout_worker(struct work_struct *data) dev_err(kbdev->dev, "Reset complete"); /* Try submitting some jobs to restart processing */ - KBASE_TRACE_ADD(kbdev, JM_SUBMIT_AFTER_RESET, NULL, NULL, 0u, 0); + KBASE_KTRACE_ADD_JM(kbdev, JM_SUBMIT_AFTER_RESET, NULL, NULL, 0u, 0); kbase_js_sched_all(kbdev); /* Process any pending slot updates */ @@ -1050,7 +1188,7 @@ static void kbasep_reset_timeout_worker(struct work_struct *data) kbase_hwcnt_context_enable(kbdev->hwcnt_gpu_ctx); spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); - KBASE_TRACE_ADD(kbdev, JM_END_RESET_WORKER, NULL, NULL, 0u, 0); + KBASE_KTRACE_ADD_JM(kbdev, JM_END_RESET_WORKER, NULL, NULL, 0u, 0); } static enum hrtimer_restart kbasep_reset_timer_callback(struct hrtimer *timer) @@ -1281,4 +1419,3 @@ void kbase_reset_gpu_term(struct kbase_device *kbdev) { destroy_workqueue(kbdev->hwaccess.backend.reset_workq); } - diff --git a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_jm_internal.h b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_jm_internal.h index 452ddee35581..1419b5987eff 100644 --- a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_jm_internal.h +++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_jm_internal.h @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2011-2018 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2011-2016, 2018-2020 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -34,6 +34,7 @@ #include #include +#include /** * kbase_job_submit_nolock() - Submit a job to a certain job-slot @@ -70,6 +71,13 @@ static inline char *kbasep_make_job_slot_string(int js, char *js_string, } #endif +static inline int kbasep_jm_is_js_free(struct kbase_device *kbdev, int js, + struct kbase_context *kctx) +{ + return !kbase_reg_read(kbdev, JOB_SLOT_REG(js, JS_COMMAND_NEXT)); +} + + /** * kbase_job_hw_submit() - Submit a job to the GPU * @kbdev: Device pointer diff --git a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_jm_rb.c b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_jm_rb.c index a1f90f5b8c6a..ec7bcb19f8d1 100644 --- a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_jm_rb.c +++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_jm_rb.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2014-2019 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2020 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -26,12 +26,12 @@ */ #include +#include #include #include #include -#include +#include #include -#include #include #include #include @@ -299,9 +299,6 @@ static void kbase_gpu_release_atom(struct kbase_device *kbdev, case KBASE_ATOM_GPU_RB_READY: /* ***FALLTHROUGH: TRANSITION TO LOWER STATE*** */ - case KBASE_ATOM_GPU_RB_WAITING_AFFINITY: - /* ***FALLTHROUGH: TRANSITION TO LOWER STATE*** */ - case KBASE_ATOM_GPU_RB_WAITING_FOR_CORE_AVAILABLE: break; @@ -617,11 +614,15 @@ static int kbase_jm_enter_protected_mode(struct kbase_device *kbdev, return -EAGAIN; } - if (kbase_pm_get_ready_cores(kbdev, KBASE_PM_CORE_L2) || - kbase_pm_get_trans_cores(kbdev, KBASE_PM_CORE_L2)) { + if (kbase_pm_get_ready_cores(kbdev, + KBASE_PM_CORE_L2) || + kbase_pm_get_trans_cores(kbdev, + KBASE_PM_CORE_L2) || + kbase_is_gpu_lost(kbdev)) { /* - * The L2 is still powered, wait for all the users to - * finish with it before doing the actual reset. + * The L2 is still powered, wait for all + * the users to finish with it before doing + * the actual reset. */ return -EAGAIN; } @@ -809,7 +810,11 @@ void kbase_backend_slot_update(struct kbase_device *kbdev) lockdep_assert_held(&kbdev->hwaccess_lock); +#ifdef CONFIG_MALI_ARBITER_SUPPORT + if (kbase_reset_gpu_is_active(kbdev) || kbase_is_gpu_lost(kbdev)) +#else if (kbase_reset_gpu_is_active(kbdev)) +#endif return; for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) { @@ -834,8 +839,7 @@ void kbase_backend_slot_update(struct kbase_device *kbdev) break; case KBASE_ATOM_GPU_RB_WAITING_BLOCKED: - if (katom[idx]->atom_flags & - KBASE_KATOM_FLAG_X_DEP_BLOCKED) + if (kbase_js_atom_blocked_on_x_dep(katom[idx])) break; katom[idx]->gpu_rb_state = @@ -930,12 +934,6 @@ void kbase_backend_slot_update(struct kbase_device *kbdev) if (!cores_ready) break; - katom[idx]->gpu_rb_state = - KBASE_ATOM_GPU_RB_WAITING_AFFINITY; - - /* ***TRANSITION TO HIGHER STATE*** */ - /* fallthrough */ - case KBASE_ATOM_GPU_RB_WAITING_AFFINITY: katom[idx]->gpu_rb_state = KBASE_ATOM_GPU_RB_READY; @@ -967,11 +965,11 @@ void kbase_backend_slot_update(struct kbase_device *kbdev) other_slots_busy(kbdev, js)) break; - if ((kbdev->serialize_jobs & - KBASE_SERIALIZE_RESET) && - kbase_reset_gpu_is_active(kbdev)) +#ifdef CONFIG_MALI_GEM5_BUILD + if (!kbasep_jm_is_js_free(kbdev, js, + katom[idx]->kctx)) break; - +#endif /* Check if this job needs the cycle counter * enabled before submission */ if (katom[idx]->core_req & BASE_JD_REQ_PERMON) @@ -1015,6 +1013,8 @@ void kbase_backend_run_atom(struct kbase_device *kbdev, struct kbase_jd_atom *katom) { lockdep_assert_held(&kbdev->hwaccess_lock); + dev_dbg(kbdev->dev, "Backend running atom %p\n", (void *)katom); + kbase_gpu_enqueue_atom(kbdev, katom); kbase_backend_slot_update(kbdev); } @@ -1073,6 +1073,10 @@ void kbase_gpu_complete_hw(struct kbase_device *kbdev, int js, struct kbase_jd_atom *katom = kbase_gpu_inspect(kbdev, js, 0); struct kbase_context *kctx = katom->kctx; + dev_dbg(kbdev->dev, + "Atom %p completed on hw with code 0x%x and job_tail 0x%llx (s:%d)\n", + (void *)katom, completion_code, job_tail, js); + lockdep_assert_held(&kbdev->hwaccess_lock); /* @@ -1125,12 +1129,11 @@ void kbase_gpu_complete_hw(struct kbase_device *kbdev, int js, if (!kbase_ctx_flag(katom->kctx, KCTX_DYING)) dev_warn(kbdev->dev, "error detected from slot %d, job status 0x%08x (%s)", js, completion_code, - kbase_exception_name - (kbdev, + kbase_gpu_exception_name( completion_code)); -#if KBASE_TRACE_DUMP_ON_JOB_SLOT_ERROR != 0 - KBASE_TRACE_DUMP(kbdev); +#if KBASE_KTRACE_DUMP_ON_JOB_SLOT_ERROR != 0 + KBASE_KTRACE_DUMP(kbdev); #endif kbasep_js_clear_submit_allowed(js_devdata, katom->kctx); @@ -1184,23 +1187,22 @@ void kbase_gpu_complete_hw(struct kbase_device *kbdev, int js, } } - KBASE_TRACE_ADD_SLOT_INFO(kbdev, JM_JOB_DONE, kctx, katom, katom->jc, - js, completion_code); + KBASE_KTRACE_ADD_JM_SLOT_INFO(kbdev, JM_JOB_DONE, kctx, katom, katom->jc, js, completion_code); if (job_tail != 0 && job_tail != katom->jc) { - bool was_updated = (job_tail != katom->jc); + /* Some of the job has been executed */ + dev_dbg(kbdev->dev, + "Update job chain address of atom %p to resume from 0x%llx\n", + (void *)katom, job_tail); - /* Some of the job has been executed, so we update the job chain - * address to where we should resume from */ katom->jc = job_tail; - if (was_updated) - KBASE_TRACE_ADD_SLOT(kbdev, JM_UPDATE_HEAD, katom->kctx, - katom, job_tail, js); + KBASE_KTRACE_ADD_JM_SLOT(kbdev, JM_UPDATE_HEAD, katom->kctx, + katom, job_tail, js); } /* Only update the event code for jobs that weren't cancelled */ if (katom->event_code != BASE_JD_EVENT_JOB_CANCELLED) - katom->event_code = (base_jd_event_code)completion_code; + katom->event_code = (enum base_jd_event_code)completion_code; /* Complete the job, and start new ones * @@ -1250,8 +1252,9 @@ void kbase_gpu_complete_hw(struct kbase_device *kbdev, int js, katom = kbase_jm_complete(kbdev, katom, end_timestamp); if (katom) { - /* Cross-slot dependency has now become runnable. Try to submit - * it. */ + dev_dbg(kbdev->dev, + "Cross-slot dependency %p has become runnable.\n", + (void *)katom); /* Check if there are lower priority jobs to soft stop */ kbase_job_slot_ctx_priority_check_locked(kctx, katom); diff --git a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_js_backend.c b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_js_backend.c index fef995c2cf01..4ffc1232d306 100644 --- a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_js_backend.c +++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_js_backend.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2014-2019 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2020 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -301,8 +301,7 @@ void kbase_backend_ctx_count_changed(struct kbase_device *kbdev) HR_TIMER_DELAY_NSEC(js_devdata->scheduling_period_ns), HRTIMER_MODE_REL); - KBASE_TRACE_ADD(kbdev, JS_POLICY_TIMER_START, NULL, NULL, 0u, - 0u); + KBASE_KTRACE_ADD_JM(kbdev, JS_POLICY_TIMER_START, NULL, NULL, 0u, 0u); } } @@ -313,7 +312,6 @@ int kbase_backend_timer_init(struct kbase_device *kbdev) hrtimer_init(&backend->scheduling_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); backend->scheduling_timer.function = timer_callback; - backend->timer_running = false; return 0; diff --git a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_l2_mmu_config.c b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_l2_mmu_config.c index 916916ddb48c..e67d12b1ba3d 100644 --- a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_l2_mmu_config.c +++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_l2_mmu_config.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 /* * - * (C) COPYRIGHT 2019 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2019-2020 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -21,7 +21,6 @@ * */ - #include #include #include @@ -76,7 +75,7 @@ static const struct l2_mmu_config_limit limits[] = { {KBASE_3BIT_AID_32, GENMASK(17, 15), 15} }, }; -void kbase_set_mmu_quirks(struct kbase_device *kbdev) +int kbase_set_mmu_quirks(struct kbase_device *kbdev) { /* All older GPUs had 2 bits for both fields, this is a default */ struct l2_mmu_config_limit limit = { @@ -101,18 +100,23 @@ void kbase_set_mmu_quirks(struct kbase_device *kbdev) mmu_config = kbase_reg_read(kbdev, GPU_CONTROL_REG(L2_MMU_CONFIG)); + if (kbase_is_gpu_lost(kbdev)) + return -EIO; + mmu_config &= ~(limit.read.mask | limit.write.mask); /* Can't use FIELD_PREP() macro here as the mask isn't constant */ mmu_config |= (limit.read.value << limit.read.shift) | - (limit.write.value << limit.write.shift); + (limit.write.value << limit.write.shift); kbdev->hw_quirks_mmu = mmu_config; if (kbdev->system_coherency == COHERENCY_ACE) { /* Allow memory configuration disparity to be ignored, - * we optimize the use of shared memory and thus we - * expect some disparity in the memory configuration. - */ + * we optimize the use of shared memory and thus we + * expect some disparity in the memory configuration. + */ kbdev->hw_quirks_mmu |= L2_MMU_CONFIG_ALLOW_SNOOP_DISPARITY; } + + return 0; } diff --git a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_l2_mmu_config.h b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_l2_mmu_config.h index 25636ee8488d..0c779ac80d27 100644 --- a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_l2_mmu_config.h +++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_l2_mmu_config.h @@ -20,13 +20,22 @@ * *//* SPDX-License-Identifier: GPL-2.0 */ /* - * (C) COPYRIGHT 2019 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2019-2020 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software * Foundation, and any use by you of this program is subject to the terms * of such GNU licence. * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * */ #ifndef _KBASE_L2_MMU_CONFIG_H_ @@ -38,7 +47,9 @@ * * Use this function to initialise the hw_quirks_mmu field, for instance to set * the MAX_READS and MAX_WRITES to sane defaults for each GPU. + * + * Return: Zero for succeess or a Linux error code */ -void kbase_set_mmu_quirks(struct kbase_device *kbdev); +int kbase_set_mmu_quirks(struct kbase_device *kbdev); #endif /* _KBASE_L2_MMU_CONFIG_H */ diff --git a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_mmu_hw_direct.h b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_mmu_hw_direct.h deleted file mode 100644 index 0a3fa7ec9bf9..000000000000 --- a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_mmu_hw_direct.h +++ /dev/null @@ -1,62 +0,0 @@ -/* - * - * (C) COPYRIGHT 2014-2015, 2019 ARM Limited. All rights reserved. - * - * This program is free software and is provided to you under the terms of the - * GNU General Public License version 2 as published by the Free Software - * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, you can access it online at - * http://www.gnu.org/licenses/gpl-2.0.html. - * - * SPDX-License-Identifier: GPL-2.0 - * - */ - -/* - * Interface file for the direct implementation for MMU hardware access - * - * Direct MMU hardware interface - * - * This module provides the interface(s) that are required by the direct - * register access implementation of the MMU hardware interface - */ - -#ifndef _KBASE_MMU_HW_DIRECT_H_ -#define _KBASE_MMU_HW_DIRECT_H_ - -#include - -/** - * kbase_mmu_interrupt - Process an MMU interrupt. - * - * Process the MMU interrupt that was reported by the &kbase_device. - * - * @kbdev: Pointer to the kbase device for which the interrupt happened. - * @irq_stat: Value of the MMU_IRQ_STATUS register. - */ -void kbase_mmu_interrupt(struct kbase_device *kbdev, u32 irq_stat); - -/** - * kbase_mmu_bus_fault_interrupt - Process a bus fault interrupt. - * - * Process the bus fault interrupt that was reported for a particular GPU - * address space. - * - * @kbdev: Pointer to the kbase device for which bus fault was reported. - * @status: Value of the GPU_FAULTSTATUS register. - * @as_nr: GPU address space for which the bus fault occurred. - * - * Return: zero if the operation was successful, non-zero otherwise. - */ -int kbase_mmu_bus_fault_interrupt(struct kbase_device *kbdev, - u32 status, u32 as_nr); - -#endif /* _KBASE_MMU_HW_DIRECT_H_ */ diff --git a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_backend.c b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_backend.c index be3348870151..948080b991e8 100644 --- a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_backend.c +++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_backend.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2010-2019 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010-2020 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -26,16 +26,18 @@ */ #include -#include +#include #include #include #include -#include #include -#include #include +#include +#include #include +#include +#include static void kbase_pm_gpu_poweroff_wait_wq(struct work_struct *data); static void kbase_pm_hwcnt_disable_worker(struct work_struct *data); @@ -138,6 +140,9 @@ int kbase_hwaccess_pm_init(struct kbase_device *kbdev) kbdev->pm.backend.ca_cores_enabled = ~0ull; kbdev->pm.backend.gpu_powered = false; kbdev->pm.suspending = false; +#ifdef CONFIG_MALI_ARBITER_SUPPORT + kbdev->pm.gpu_lost = false; +#endif #ifdef CONFIG_MALI_BIFROST_DEBUG kbdev->pm.backend.driver_ready_for_irqs = false; #endif /* CONFIG_MALI_BIFROST_DEBUG */ @@ -244,7 +249,6 @@ static void kbase_pm_gpu_poweroff_wait_wq(struct work_struct *data) pm.backend.gpu_poweroff_wait_work); struct kbase_pm_device_data *pm = &kbdev->pm; struct kbase_pm_backend_data *backend = &pm->backend; - struct kbasep_js_device_data *js_devdata = &kbdev->js_data; unsigned long flags; /* Wait for power transitions to complete. We do this with no locks held @@ -252,8 +256,7 @@ static void kbase_pm_gpu_poweroff_wait_wq(struct work_struct *data) */ kbase_pm_wait_for_desired_state(kbdev); - mutex_lock(&js_devdata->runpool_mutex); - mutex_lock(&kbdev->pm.lock); + kbase_pm_lock(kbdev); if (!backend->poweron_required) { unsigned long flags; @@ -271,11 +274,9 @@ static void kbase_pm_gpu_poweroff_wait_wq(struct work_struct *data) * process. Interrupts are disabled so no more faults * should be generated at this point. */ - mutex_unlock(&kbdev->pm.lock); - mutex_unlock(&js_devdata->runpool_mutex); + kbase_pm_unlock(kbdev); kbase_flush_mmu_wqs(kbdev); - mutex_lock(&js_devdata->runpool_mutex); - mutex_lock(&kbdev->pm.lock); + kbase_pm_lock(kbdev); /* Turn off clock now that fault have been handled. We * dropped locks so poweron_required may have changed - @@ -301,8 +302,7 @@ static void kbase_pm_gpu_poweroff_wait_wq(struct work_struct *data) } spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); - mutex_unlock(&kbdev->pm.lock); - mutex_unlock(&js_devdata->runpool_mutex); + kbase_pm_unlock(kbdev); wake_up(&kbdev->pm.backend.poweroff_wait); } @@ -517,14 +517,12 @@ KBASE_EXPORT_TEST_API(kbase_pm_wait_for_poweroff_complete); int kbase_hwaccess_pm_powerup(struct kbase_device *kbdev, unsigned int flags) { - struct kbasep_js_device_data *js_devdata = &kbdev->js_data; unsigned long irq_flags; int ret; KBASE_DEBUG_ASSERT(kbdev != NULL); - mutex_lock(&js_devdata->runpool_mutex); - mutex_lock(&kbdev->pm.lock); + kbase_pm_lock(kbdev); /* A suspend won't happen during startup/insmod */ KBASE_DEBUG_ASSERT(!kbase_pm_is_suspending(kbdev)); @@ -533,8 +531,7 @@ int kbase_hwaccess_pm_powerup(struct kbase_device *kbdev, * them. */ ret = kbase_pm_init_hw(kbdev, flags); if (ret) { - mutex_unlock(&kbdev->pm.lock); - mutex_unlock(&js_devdata->runpool_mutex); + kbase_pm_unlock(kbdev); return ret; } @@ -564,8 +561,7 @@ int kbase_hwaccess_pm_powerup(struct kbase_device *kbdev, /* Turn on the GPU and any cores needed by the policy */ kbase_pm_do_poweron(kbdev, false); - mutex_unlock(&kbdev->pm.lock); - mutex_unlock(&js_devdata->runpool_mutex); + kbase_pm_unlock(kbdev); return 0; } @@ -615,7 +611,7 @@ void kbase_pm_power_changed(struct kbase_device *kbdev) spin_lock_irqsave(&kbdev->hwaccess_lock, flags); kbase_pm_update_state(kbdev); - kbase_backend_slot_update(kbdev); + kbase_backend_slot_update(kbdev); spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); } @@ -627,6 +623,11 @@ void kbase_pm_set_debug_core_mask(struct kbase_device *kbdev, lockdep_assert_held(&kbdev->hwaccess_lock); lockdep_assert_held(&kbdev->pm.lock); + if (kbase_dummy_job_wa_enabled(kbdev)) { + dev_warn(kbdev->dev, "Change of core mask not supported for slot 0 as dummy job WA is enabled"); + new_core_mask_js0 = kbdev->pm.debug_core_mask[0]; + } + kbdev->pm.debug_core_mask[0] = new_core_mask_js0; kbdev->pm.debug_core_mask[1] = new_core_mask_js1; kbdev->pm.debug_core_mask[2] = new_core_mask_js2; @@ -648,20 +649,16 @@ void kbase_hwaccess_pm_gpu_idle(struct kbase_device *kbdev) void kbase_hwaccess_pm_suspend(struct kbase_device *kbdev) { - struct kbasep_js_device_data *js_devdata = &kbdev->js_data; - /* Force power off the GPU and all cores (regardless of policy), only * after the PM active count reaches zero (otherwise, we risk turning it * off prematurely) */ - mutex_lock(&js_devdata->runpool_mutex); - mutex_lock(&kbdev->pm.lock); + kbase_pm_lock(kbdev); kbase_pm_do_poweroff(kbdev); kbase_backend_timer_suspend(kbdev); - mutex_unlock(&kbdev->pm.lock); - mutex_unlock(&js_devdata->runpool_mutex); + kbase_pm_unlock(kbdev); kbase_pm_wait_for_poweroff_complete(kbdev); @@ -671,16 +668,80 @@ void kbase_hwaccess_pm_suspend(struct kbase_device *kbdev) void kbase_hwaccess_pm_resume(struct kbase_device *kbdev) { - struct kbasep_js_device_data *js_devdata = &kbdev->js_data; - - mutex_lock(&js_devdata->runpool_mutex); - mutex_lock(&kbdev->pm.lock); + kbase_pm_lock(kbdev); kbdev->pm.suspending = false; +#ifdef CONFIG_MALI_ARBITER_SUPPORT + kbdev->pm.gpu_lost = false; +#endif kbase_pm_do_poweron(kbdev, true); kbase_backend_timer_resume(kbdev); - mutex_unlock(&kbdev->pm.lock); - mutex_unlock(&js_devdata->runpool_mutex); + kbase_pm_unlock(kbdev); } + +#ifdef CONFIG_MALI_ARBITER_SUPPORT +void kbase_pm_handle_gpu_lost(struct kbase_device *kbdev) +{ + unsigned long flags; + struct kbase_pm_backend_data *backend = &kbdev->pm.backend; + ktime_t end_timestamp = ktime_get(); + + /* Full GPU reset will have been done by hypervisor, so cancel */ + atomic_set(&kbdev->hwaccess.backend.reset_gpu, + KBASE_RESET_GPU_NOT_PENDING); + hrtimer_cancel(&kbdev->hwaccess.backend.reset_timer); + + /* GPU is no longer mapped to VM. So no interrupts will be received + * and Mali registers have been replaced by dummy RAM + */ + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + spin_lock(&kbdev->mmu_mask_change); + kbdev->irq_reset_flush = true; + spin_unlock(&kbdev->mmu_mask_change); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + kbase_synchronize_irqs(kbdev); + kbase_flush_mmu_wqs(kbdev); + kbdev->irq_reset_flush = false; + + /* Clear all jobs running on the GPU */ + mutex_lock(&kbdev->pm.lock); + kbdev->pm.gpu_lost = true; + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + kbdev->protected_mode = false; + if (!kbdev->pm.backend.protected_entry_transition_override) + kbase_backend_reset(kbdev, &end_timestamp); + kbase_pm_metrics_update(kbdev, NULL); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + + /* Cancel any pending HWC dumps */ + spin_lock_irqsave(&kbdev->hwcnt.lock, flags); + kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_IDLE; + kbdev->hwcnt.backend.triggered = 1; + wake_up(&kbdev->hwcnt.backend.wait); + spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); + + /* Wait for all threads keeping GPU active to complete */ + mutex_unlock(&kbdev->pm.lock); + wait_event(kbdev->pm.zero_active_count_wait, + kbdev->pm.active_count == 0); + mutex_lock(&kbdev->pm.lock); + + /* Update state to GPU off */ + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + kbdev->pm.backend.shaders_desired = false; + kbdev->pm.backend.l2_desired = false; + backend->l2_state = KBASE_L2_OFF; + backend->shaders_state = KBASE_SHADERS_OFF_CORESTACK_OFF; + kbdev->pm.backend.gpu_powered = false; + backend->poweroff_wait_in_progress = false; + KBASE_KTRACE_ADD(kbdev, PM_WAKE_WAITERS, NULL, 0); + wake_up(&kbdev->pm.backend.gpu_in_desired_state_wait); + kbase_gpu_cache_clean_wait_complete(kbdev); + backend->poweroff_wait_in_progress = false; + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + wake_up(&kbdev->pm.backend.poweroff_wait); + mutex_unlock(&kbdev->pm.lock); +} +#endif /* CONFIG_MALI_ARBITER_SUPPORT */ diff --git a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_ca.c b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_ca.c index a0b413ab844d..dc22b6e25bb8 100644 --- a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_ca.c +++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_ca.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2013-2018 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2013-2018, 2020 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -30,6 +30,7 @@ #ifdef MALI_BIFROST_NO_MALI #include #endif +#include int kbase_pm_ca_init(struct kbase_device *kbdev) { @@ -64,6 +65,11 @@ void kbase_devfreq_set_core_mask(struct kbase_device *kbdev, u64 core_mask) goto unlock; } + if (kbase_dummy_job_wa_enabled(kbdev)) { + dev_err(kbdev->dev, "Dynamic core scaling not supported as dummy job WA is enabled"); + goto unlock; + } + pm_backend->ca_cores_enabled = core_mask; kbase_pm_update_state(kbdev); diff --git a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_defs.h b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_defs.h index 45ea1de9fbbc..abe9713fd9af 100644 --- a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_defs.h +++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_defs.h @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2014-2019 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2020 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -301,9 +301,17 @@ union kbase_pm_policy_data { * @l2_always_on: If true, disable powering down of l2 cache. * @shaders_state: The current state of the shader state machine. * @shaders_avail: This is updated by the state machine when it is in a state - * where it can handle changes to the core availability. This - * is internal to the shader state machine and should *not* be - * modified elsewhere. + * where it can write to the SHADER_PWRON or PWROFF registers + * to have the same set of available cores as specified by + * @shaders_desired_mask. So it would eventually have the same + * value as @shaders_desired_mask and would precisely indicate + * the cores that are currently available. This is internal to + * shader state machine and should *not* be modified elsewhere. + * @shaders_desired_mask: This is updated by the state machine when it is in + * a state where it can handle changes to the core + * availability (either by DVFS or sysfs). This is + * internal to the shader state machine and should + * *not* be modified elsewhere. * @shaders_desired: True if the PM active count or power policy requires the * shader cores to be on. This is used as an input to the * shader power state machine. The current state of the @@ -401,6 +409,7 @@ struct kbase_pm_backend_data { enum kbase_l2_core_state l2_state; enum kbase_shader_core_state shaders_state; u64 shaders_avail; + u64 shaders_desired_mask; bool l2_desired; bool l2_always_on; bool shaders_desired; diff --git a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_driver.c b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_driver.c index ff4f574d06db..6b821f7d7134 100644 --- a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_driver.c +++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_driver.c @@ -1,6 +1,7 @@ +// SPDX-License-Identifier: GPL-2.0 /* * - * (C) COPYRIGHT 2010-2019 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010-2020 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -20,16 +21,14 @@ * */ - - /* * Base kernel Power Management hardware control */ #include #include -#include -#include +#include +#include #include #include #include @@ -42,6 +41,10 @@ #include #include #include +#include +#ifdef CONFIG_MALI_ARBITER_SUPPORT +#include +#endif /* CONFIG_MALI_ARBITER_SUPPORT */ #include @@ -280,16 +283,13 @@ static void kbase_pm_invoke(struct kbase_device *kbdev, if (action == ACTION_PWRON) switch (core_type) { case KBASE_PM_CORE_SHADER: - KBASE_TRACE_ADD(kbdev, PM_PWRON, NULL, NULL, 0u, - lo); + KBASE_KTRACE_ADD(kbdev, PM_PWRON, NULL, cores); break; case KBASE_PM_CORE_TILER: - KBASE_TRACE_ADD(kbdev, PM_PWRON_TILER, NULL, - NULL, 0u, lo); + KBASE_KTRACE_ADD(kbdev, PM_PWRON_TILER, NULL, cores); break; case KBASE_PM_CORE_L2: - KBASE_TRACE_ADD(kbdev, PM_PWRON_L2, NULL, NULL, - 0u, lo); + KBASE_KTRACE_ADD(kbdev, PM_PWRON_L2, NULL, cores); break; default: break; @@ -297,16 +297,13 @@ static void kbase_pm_invoke(struct kbase_device *kbdev, else if (action == ACTION_PWROFF) switch (core_type) { case KBASE_PM_CORE_SHADER: - KBASE_TRACE_ADD(kbdev, PM_PWROFF, NULL, NULL, - 0u, lo); + KBASE_KTRACE_ADD(kbdev, PM_PWROFF, NULL, cores); break; case KBASE_PM_CORE_TILER: - KBASE_TRACE_ADD(kbdev, PM_PWROFF_TILER, NULL, - NULL, 0u, lo); + KBASE_KTRACE_ADD(kbdev, PM_PWROFF_TILER, NULL, cores); break; case KBASE_PM_CORE_L2: - KBASE_TRACE_ADD(kbdev, PM_PWROFF_L2, NULL, NULL, - 0u, lo); + KBASE_KTRACE_ADD(kbdev, PM_PWROFF_L2, NULL, cores); /* disable snoops before L2 is turned off */ kbase_pm_cache_snoop_disable(kbdev); break; @@ -315,11 +312,18 @@ static void kbase_pm_invoke(struct kbase_device *kbdev, } } - if (lo != 0) - kbase_reg_write(kbdev, GPU_CONTROL_REG(reg), lo); - - if (hi != 0) - kbase_reg_write(kbdev, GPU_CONTROL_REG(reg + 4), hi); + if (kbase_dummy_job_wa_enabled(kbdev) && + action == ACTION_PWRON && + core_type == KBASE_PM_CORE_SHADER && + !(kbdev->dummy_job_wa.flags & + KBASE_DUMMY_JOB_WA_FLAG_LOGICAL_SHADER_POWER)) { + kbase_dummy_job_wa_execute(kbdev, cores); + } else { + if (lo != 0) + kbase_reg_write(kbdev, GPU_CONTROL_REG(reg), lo); + if (hi != 0) + kbase_reg_write(kbdev, GPU_CONTROL_REG(reg + 4), hi); + } } /** @@ -436,16 +440,13 @@ u64 kbase_pm_get_ready_cores(struct kbase_device *kbdev, switch (type) { case KBASE_PM_CORE_SHADER: - KBASE_TRACE_ADD(kbdev, PM_CORES_POWERED, NULL, NULL, 0u, - (u32) result); + KBASE_KTRACE_ADD(kbdev, PM_CORES_POWERED, NULL, result); break; case KBASE_PM_CORE_TILER: - KBASE_TRACE_ADD(kbdev, PM_CORES_POWERED_TILER, NULL, NULL, 0u, - (u32) result); + KBASE_KTRACE_ADD(kbdev, PM_CORES_POWERED_TILER, NULL, result); break; case KBASE_PM_CORE_L2: - KBASE_TRACE_ADD(kbdev, PM_CORES_POWERED_L2, NULL, NULL, 0u, - (u32) result); + KBASE_KTRACE_ADD(kbdev, PM_CORES_POWERED_L2, NULL, result); break; default: break; @@ -535,7 +536,7 @@ static const char *kbase_l2_core_state_to_string(enum kbase_l2_core_state state) return strings[state]; } -static u64 kbase_pm_l2_update_state(struct kbase_device *kbdev) +static int kbase_pm_l2_update_state(struct kbase_device *kbdev) { struct kbase_pm_backend_data *backend = &kbdev->pm.backend; u64 l2_present = kbdev->gpu_props.props.raw_props.l2_present; @@ -555,6 +556,13 @@ static u64 kbase_pm_l2_update_state(struct kbase_device *kbdev) u64 tiler_ready = kbase_pm_get_ready_cores(kbdev, KBASE_PM_CORE_TILER); + /* + * kbase_pm_get_ready_cores and kbase_pm_get_trans_cores + * are vulnerable to corruption if gpu is lost + */ + if (kbase_is_gpu_lost(kbdev)) + return -EIO; + /* mask off ready from trans in case transitions finished * between the register reads */ @@ -592,10 +600,7 @@ static u64 kbase_pm_l2_update_state(struct kbase_device *kbdev) case KBASE_L2_PEND_ON: if (!l2_trans && l2_ready == l2_present && !tiler_trans && tiler_ready == tiler_present) { - KBASE_TRACE_ADD(kbdev, - PM_CORES_CHANGE_AVAILABLE_TILER, - NULL, NULL, 0u, - (u32)tiler_ready); + KBASE_KTRACE_ADD(kbdev, PM_CORES_CHANGE_AVAILABLE_TILER, NULL, tiler_ready); /* * Ensure snoops are enabled after L2 is powered * up. Note that kbase keeps track of the snoop @@ -765,8 +770,7 @@ static u64 kbase_pm_l2_update_state(struct kbase_device *kbdev) kbase_gpu_start_cache_clean_nolock( kbdev); - KBASE_TRACE_ADD(kbdev, PM_CORES_CHANGE_AVAILABLE_TILER, - NULL, NULL, 0u, 0u); + KBASE_KTRACE_ADD(kbdev, PM_CORES_CHANGE_AVAILABLE_TILER, NULL, 0u); backend->l2_state = KBASE_L2_PEND_OFF; break; @@ -811,8 +815,6 @@ static u64 kbase_pm_l2_update_state(struct kbase_device *kbdev) &kbdev->pm.backend.gpu_poweroff_wait_work); } - if (backend->l2_state == KBASE_L2_ON) - return l2_present; return 0; } @@ -889,13 +891,14 @@ static const char *kbase_shader_core_state_to_string( return strings[state]; } -static void kbase_pm_shaders_update_state(struct kbase_device *kbdev) +static int kbase_pm_shaders_update_state(struct kbase_device *kbdev) { struct kbase_pm_backend_data *backend = &kbdev->pm.backend; struct kbasep_pm_tick_timer_state *stt = &kbdev->pm.backend.shader_tick_timer; enum kbase_shader_core_state prev_state; u64 stacks_avail = 0; + int err = 0; lockdep_assert_held(&kbdev->hwaccess_lock); @@ -917,6 +920,15 @@ static void kbase_pm_shaders_update_state(struct kbase_device *kbdev) stacks_ready = kbase_pm_get_ready_cores(kbdev, KBASE_PM_CORE_STACK); } + /* + * kbase_pm_get_ready_cores and kbase_pm_get_trans_cores + * are vulnerable to corruption if gpu is lost + */ + if (kbase_is_gpu_lost(kbdev)) { + err = -EIO; + break; + } + /* mask off ready from trans in case transitions finished * between the register reads */ @@ -931,7 +943,8 @@ static void kbase_pm_shaders_update_state(struct kbase_device *kbdev) * except at certain points where we can handle it, * i.e. off and SHADERS_ON_CORESTACK_ON. */ - backend->shaders_avail = kbase_pm_ca_get_core_mask(kbdev); + backend->shaders_desired_mask = + kbase_pm_ca_get_core_mask(kbdev); backend->pm_shaders_core_mask = 0; if (backend->shaders_desired && @@ -958,6 +971,8 @@ static void kbase_pm_shaders_update_state(struct kbase_device *kbdev) case KBASE_SHADERS_OFF_CORESTACK_PEND_ON: if (!stacks_trans && stacks_ready == stacks_avail) { + backend->shaders_avail = + backend->shaders_desired_mask; kbase_pm_invoke(kbdev, KBASE_PM_CORE_SHADER, backend->shaders_avail, ACTION_PWRON); @@ -967,9 +982,7 @@ static void kbase_pm_shaders_update_state(struct kbase_device *kbdev) case KBASE_SHADERS_PEND_ON_CORESTACK_ON: if (!shaders_trans && shaders_ready == backend->shaders_avail) { - KBASE_TRACE_ADD(kbdev, - PM_CORES_CHANGE_AVAILABLE, - NULL, NULL, 0u, (u32)shaders_ready); + KBASE_KTRACE_ADD(kbdev, PM_CORES_CHANGE_AVAILABLE, NULL, shaders_ready); backend->pm_shaders_core_mask = shaders_ready; backend->hwcnt_desired = true; if (backend->hwcnt_disabled) { @@ -983,11 +996,12 @@ static void kbase_pm_shaders_update_state(struct kbase_device *kbdev) break; case KBASE_SHADERS_ON_CORESTACK_ON: - backend->shaders_avail = kbase_pm_ca_get_core_mask(kbdev); + backend->shaders_desired_mask = + kbase_pm_ca_get_core_mask(kbdev); /* If shaders to change state, trigger a counter dump */ if (!backend->shaders_desired || - (backend->shaders_avail != shaders_ready)) { + (backend->shaders_desired_mask != shaders_ready)) { backend->hwcnt_desired = false; if (!backend->hwcnt_disabled) kbase_pm_trigger_hwcnt_disable(kbdev); @@ -997,7 +1011,7 @@ static void kbase_pm_shaders_update_state(struct kbase_device *kbdev) break; case KBASE_SHADERS_ON_CORESTACK_ON_RECHECK: - backend->shaders_avail = + backend->shaders_desired_mask = kbase_pm_ca_get_core_mask(kbdev); if (!backend->hwcnt_disabled) { @@ -1005,6 +1019,10 @@ static void kbase_pm_shaders_update_state(struct kbase_device *kbdev) ; } else if (!backend->shaders_desired) { if (kbdev->pm.backend.protected_transition_override || +#ifdef CONFIG_MALI_ARBITER_SUPPORT + kbase_pm_is_suspending(kbdev) || + kbase_pm_is_gpu_lost(kbdev) || +#endif /* CONFIG_MALI_ARBITER_SUPPORT */ !stt->configured_ticks || WARN_ON(stt->cancel_queued)) { backend->shaders_state = KBASE_SHADERS_WAIT_FINISHED_CORESTACK_ON; @@ -1031,19 +1049,20 @@ static void kbase_pm_shaders_update_state(struct kbase_device *kbdev) backend->shaders_state = KBASE_SHADERS_WAIT_OFF_CORESTACK_ON; } - } else if (backend->shaders_avail & ~shaders_ready) { + } else if (backend->shaders_desired_mask & ~shaders_ready) { /* set cores ready but not available to * meet KBASE_SHADERS_PEND_ON_CORESTACK_ON * check pass */ - backend->shaders_avail |= shaders_ready; + backend->shaders_avail = + (backend->shaders_desired_mask | shaders_ready); kbase_pm_invoke(kbdev, KBASE_PM_CORE_SHADER, backend->shaders_avail & ~shaders_ready, ACTION_PWRON); backend->shaders_state = KBASE_SHADERS_PEND_ON_CORESTACK_ON; - } else if (shaders_ready & ~backend->shaders_avail) { + } else if (shaders_ready & ~backend->shaders_desired_mask) { backend->shaders_state = KBASE_SHADERS_WAIT_GPU_IDLE; } else { @@ -1063,6 +1082,11 @@ static void kbase_pm_shaders_update_state(struct kbase_device *kbdev) backend->shaders_state = KBASE_SHADERS_ON_CORESTACK_ON_RECHECK; } else if (stt->remaining_ticks == 0) { backend->shaders_state = KBASE_SHADERS_WAIT_FINISHED_CORESTACK_ON; +#ifdef CONFIG_MALI_ARBITER_SUPPORT + } else if (kbase_pm_is_suspending(kbdev) || + kbase_pm_is_gpu_lost(kbdev)) { + backend->shaders_state = KBASE_SHADERS_WAIT_FINISHED_CORESTACK_ON; +#endif /* CONFIG_MALI_ARBITER_SUPPORT */ } break; @@ -1104,21 +1128,24 @@ static void kbase_pm_shaders_update_state(struct kbase_device *kbdev) * meet KBASE_SHADERS_PEND_ON_CORESTACK_ON * check pass */ - backend->shaders_avail &= shaders_ready; + + /* shaders_desired_mask shall be a subset of + * shaders_ready + */ + WARN_ON(backend->shaders_desired_mask & ~shaders_ready); + WARN_ON(!(backend->shaders_desired_mask & shaders_ready)); + + backend->shaders_avail = + backend->shaders_desired_mask; kbase_pm_invoke(kbdev, KBASE_PM_CORE_SHADER, shaders_ready & ~backend->shaders_avail, ACTION_PWROFF); backend->shaders_state = KBASE_SHADERS_PEND_ON_CORESTACK_ON; - KBASE_TRACE_ADD(kbdev, - PM_CORES_CHANGE_AVAILABLE, - NULL, NULL, 0u, - (u32)(shaders_ready & ~backend->shaders_avail)); + KBASE_KTRACE_ADD(kbdev, PM_CORES_CHANGE_AVAILABLE, NULL, (shaders_ready & ~backend->shaders_avail)); } else { kbase_pm_invoke(kbdev, KBASE_PM_CORE_SHADER, shaders_ready, ACTION_PWROFF); - KBASE_TRACE_ADD(kbdev, - PM_CORES_CHANGE_AVAILABLE, - NULL, NULL, 0u, 0u); + KBASE_KTRACE_ADD(kbdev, PM_CORES_CHANGE_AVAILABLE, NULL, 0u); backend->shaders_state = KBASE_SHADERS_PEND_OFF_CORESTACK_ON; } @@ -1167,6 +1194,8 @@ static void kbase_pm_shaders_update_state(struct kbase_device *kbdev) backend->shaders_state)); } while (backend->shaders_state != prev_state); + + return err; } static bool kbase_pm_is_in_desired_state_nolock(struct kbase_device *kbdev) @@ -1259,24 +1288,29 @@ void kbase_pm_update_state(struct kbase_device *kbdev) if (!kbdev->pm.backend.gpu_powered) return; /* Do nothing if the GPU is off */ - kbase_pm_l2_update_state(kbdev); - kbase_pm_shaders_update_state(kbdev); + if (kbase_pm_l2_update_state(kbdev)) + return; + + if (kbase_pm_shaders_update_state(kbdev)) + return; /* If the shaders just turned off, re-invoke the L2 state machine, in * case it was waiting for the shaders to turn off before powering down * the L2. */ if (prev_shaders_state != KBASE_SHADERS_OFF_CORESTACK_OFF && - kbdev->pm.backend.shaders_state == KBASE_SHADERS_OFF_CORESTACK_OFF) - kbase_pm_l2_update_state(kbdev); + kbdev->pm.backend.shaders_state == + KBASE_SHADERS_OFF_CORESTACK_OFF) { + if (kbase_pm_l2_update_state(kbdev)) + return; + } if (kbase_pm_is_in_desired_state_nolock(kbdev)) { - KBASE_TRACE_ADD(kbdev, PM_DESIRED_REACHED, NULL, NULL, - true, kbdev->pm.backend.shaders_avail); + KBASE_KTRACE_ADD(kbdev, PM_DESIRED_REACHED, NULL, kbdev->pm.backend.shaders_avail); kbase_pm_trace_power_state(kbdev); - KBASE_TRACE_ADD(kbdev, PM_WAKE_WAITERS, NULL, NULL, 0u, 0); + KBASE_KTRACE_ADD(kbdev, PM_WAKE_WAITERS, NULL, 0); wake_up(&kbdev->pm.backend.gpu_in_desired_state_wait); } } @@ -1555,7 +1589,7 @@ void kbase_pm_clock_on(struct kbase_device *kbdev, bool is_resume) kbdev->poweroff_pending = false; - KBASE_TRACE_ADD(kbdev, PM_GPU_ON, NULL, NULL, 0u, 0u); + KBASE_KTRACE_ADD(kbdev, PM_GPU_ON, NULL, 0u); if (is_resume && kbdev->pm.backend.callback_power_resume) { kbdev->pm.backend.callback_power_resume(kbdev); @@ -1580,6 +1614,15 @@ void kbase_pm_clock_on(struct kbase_device *kbdev, bool is_resume) spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); mutex_unlock(&kbdev->mmu_hw_mutex); + if (kbdev->dummy_job_wa.flags & + KBASE_DUMMY_JOB_WA_FLAG_LOGICAL_SHADER_POWER) { + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + kbase_dummy_job_wa_execute(kbdev, + kbase_pm_get_present_cores(kbdev, + KBASE_PM_CORE_SHADER)); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + } + /* Enable the interrupts */ kbase_pm_enable_interrupts(kbdev); @@ -1609,7 +1652,7 @@ bool kbase_pm_clock_off(struct kbase_device *kbdev) return true; } - KBASE_TRACE_ADD(kbdev, PM_GPU_OFF, NULL, NULL, 0u, 0u); + KBASE_KTRACE_ADD(kbdev, PM_GPU_OFF, NULL, 0u); /* Disable interrupts. This also clears any outstanding interrupts */ kbase_pm_disable_interrupts(kbdev); @@ -1630,6 +1673,9 @@ bool kbase_pm_clock_off(struct kbase_device *kbdev) /* The GPU power may be turned off from this point */ kbdev->pm.backend.gpu_powered = false; spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); +#ifdef CONFIG_MALI_ARBITER_SUPPORT + kbase_arbiter_pm_vm_event(kbdev, KBASE_VM_GPU_IDLE_EVENT); +#endif /* CONFIG_MALI_ARBITER_SUPPORT */ if (kbdev->pm.backend.callback_power_off) kbdev->pm.backend.callback_power_off(kbdev); @@ -1683,10 +1729,11 @@ static enum hrtimer_restart kbasep_reset_timeout(struct hrtimer *timer) return HRTIMER_NORESTART; } -static void kbase_set_jm_quirks(struct kbase_device *kbdev, const u32 prod_id) +static int kbase_set_jm_quirks(struct kbase_device *kbdev, const u32 prod_id) { - kbdev->hw_quirks_jm = kbase_reg_read(kbdev, + u32 hw_quirks_jm = kbase_reg_read(kbdev, GPU_CONTROL_REG(JM_CONFIG)); + if (GPU_ID2_MODEL_MATCH_VALUE(prod_id) == GPU_ID2_PRODUCT_TMIX) { /* Only for tMIx */ u32 coherency_features; @@ -1699,11 +1746,17 @@ static void kbase_set_jm_quirks(struct kbase_device *kbdev, const u32 prod_id) */ if (coherency_features == COHERENCY_FEATURE_BIT(COHERENCY_ACE)) { - kbdev->hw_quirks_jm |= (COHERENCY_ACE_LITE | + hw_quirks_jm |= (COHERENCY_ACE_LITE | COHERENCY_ACE) << JM_FORCE_COHERENCY_FEATURES_SHIFT; } } + + if (kbase_is_gpu_lost(kbdev)) + return -EIO; + + kbdev->hw_quirks_jm = hw_quirks_jm; + if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_IDVS_GROUP_SIZE)) { int default_idvs_group_size = 0xF; u32 tmp; @@ -1712,53 +1765,71 @@ static void kbase_set_jm_quirks(struct kbase_device *kbdev, const u32 prod_id) "idvs-group-size", &tmp)) tmp = default_idvs_group_size; - if (tmp > JM_MAX_IDVS_GROUP_SIZE) { + if (tmp > IDVS_GROUP_MAX_SIZE) { dev_err(kbdev->dev, "idvs-group-size of %d is too large. Maximum value is %d", - tmp, JM_MAX_IDVS_GROUP_SIZE); + tmp, IDVS_GROUP_MAX_SIZE); tmp = default_idvs_group_size; } - kbdev->hw_quirks_jm |= tmp << JM_IDVS_GROUP_SIZE_SHIFT; + kbdev->hw_quirks_jm |= tmp << IDVS_GROUP_SIZE_SHIFT; } #define MANUAL_POWER_CONTROL ((u32)(1 << 8)) if (corestack_driver_control) kbdev->hw_quirks_jm |= MANUAL_POWER_CONTROL; + + return 0; } -static void kbase_set_sc_quirks(struct kbase_device *kbdev, const u32 prod_id) +static int kbase_set_sc_quirks(struct kbase_device *kbdev, const u32 prod_id) { - kbdev->hw_quirks_sc = kbase_reg_read(kbdev, + u32 hw_quirks_sc = kbase_reg_read(kbdev, GPU_CONTROL_REG(SHADER_CONFIG)); + if (kbase_is_gpu_lost(kbdev)) + return -EIO; + if (prod_id < 0x750 || prod_id == 0x6956) /* T60x, T62x, T72x */ - kbdev->hw_quirks_sc |= SC_LS_ATTR_CHECK_DISABLE; + hw_quirks_sc |= SC_LS_ATTR_CHECK_DISABLE; else if (prod_id >= 0x750 && prod_id <= 0x880) /* T76x, T8xx */ - kbdev->hw_quirks_sc |= SC_LS_ALLOW_ATTR_TYPES; + hw_quirks_sc |= SC_LS_ALLOW_ATTR_TYPES; if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_TTRX_2968_TTRX_3162)) - kbdev->hw_quirks_sc |= SC_VAR_ALGORITHM; + hw_quirks_sc |= SC_VAR_ALGORITHM; if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_TLS_HASHING)) - kbdev->hw_quirks_sc |= SC_TLS_HASH_ENABLE; + hw_quirks_sc |= SC_TLS_HASH_ENABLE; + + kbdev->hw_quirks_sc = hw_quirks_sc; + + return 0; } -static void kbase_set_tiler_quirks(struct kbase_device *kbdev) +static int kbase_set_tiler_quirks(struct kbase_device *kbdev) { - kbdev->hw_quirks_tiler = kbase_reg_read(kbdev, + u32 hw_quirks_tiler = kbase_reg_read(kbdev, GPU_CONTROL_REG(TILER_CONFIG)); + + if (kbase_is_gpu_lost(kbdev)) + return -EIO; + /* Set tiler clock gate override if required */ if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_T76X_3953)) - kbdev->hw_quirks_tiler |= TC_CLOCK_GATE_OVERRIDE; + hw_quirks_tiler |= TC_CLOCK_GATE_OVERRIDE; + + kbdev->hw_quirks_tiler = hw_quirks_tiler; + + return 0; } -static void kbase_pm_hw_issues_detect(struct kbase_device *kbdev) +static int kbase_pm_hw_issues_detect(struct kbase_device *kbdev) { struct device_node *np = kbdev->dev->of_node; const u32 gpu_id = kbdev->gpu_props.props.raw_props.gpu_id; const u32 prod_id = (gpu_id & GPU_ID_VERSION_PRODUCT_ID) >> GPU_ID_VERSION_PRODUCT_ID_SHIFT; + int error = 0; kbdev->hw_quirks_jm = 0; kbdev->hw_quirks_sc = 0; @@ -1771,7 +1842,9 @@ static void kbase_pm_hw_issues_detect(struct kbase_device *kbdev) "Found quirks_jm = [0x%x] in Devicetree\n", kbdev->hw_quirks_jm); } else { - kbase_set_jm_quirks(kbdev, prod_id); + error = kbase_set_jm_quirks(kbdev, prod_id); + if (error) + return error; } if (!of_property_read_u32(np, "quirks_sc", @@ -1780,7 +1853,9 @@ static void kbase_pm_hw_issues_detect(struct kbase_device *kbdev) "Found quirks_sc = [0x%x] in Devicetree\n", kbdev->hw_quirks_sc); } else { - kbase_set_sc_quirks(kbdev, prod_id); + error = kbase_set_sc_quirks(kbdev, prod_id); + if (error) + return error; } if (!of_property_read_u32(np, "quirks_tiler", @@ -1789,7 +1864,9 @@ static void kbase_pm_hw_issues_detect(struct kbase_device *kbdev) "Found quirks_tiler = [0x%x] in Devicetree\n", kbdev->hw_quirks_tiler); } else { - kbase_set_tiler_quirks(kbdev); + error = kbase_set_tiler_quirks(kbdev); + if (error) + return error; } if (!of_property_read_u32(np, "quirks_mmu", @@ -1798,8 +1875,10 @@ static void kbase_pm_hw_issues_detect(struct kbase_device *kbdev) "Found quirks_mmu = [0x%x] in Devicetree\n", kbdev->hw_quirks_mmu); } else { - kbase_set_mmu_quirks(kbdev); + error = kbase_set_mmu_quirks(kbdev); } + + return error; } static void kbase_pm_hw_issues_apply(struct kbase_device *kbdev) @@ -1861,7 +1940,7 @@ static int kbase_pm_do_reset(struct kbase_device *kbdev) struct kbasep_reset_timeout_data rtdata; int ret; - KBASE_TRACE_ADD(kbdev, CORE_GPU_SOFT_RESET, NULL, NULL, 0u, 0); + KBASE_KTRACE_ADD(kbdev, CORE_GPU_SOFT_RESET, NULL, 0); KBASE_TLSTREAM_JD_GPU_SOFT_RESET(kbdev, kbdev); @@ -1902,8 +1981,9 @@ static int kbase_pm_do_reset(struct kbase_device *kbdev) /* No interrupt has been received - check if the RAWSTAT register says * the reset has completed */ - if (kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_RAWSTAT)) & - RESET_COMPLETED) { + if ((kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_RAWSTAT)) & + RESET_COMPLETED) + || kbase_is_gpu_lost(kbdev)) { /* The interrupt is set in the RAWSTAT; this suggests that the * interrupts are not getting to the CPU */ dev_err(kbdev->dev, "Reset interrupt didn't reach CPU. Check interrupt assignments.\n"); @@ -1916,7 +1996,7 @@ static int kbase_pm_do_reset(struct kbase_device *kbdev) * reset */ dev_err(kbdev->dev, "Failed to soft-reset GPU (timed out after %d ms), now attempting a hard reset\n", RESET_TIMEOUT); - KBASE_TRACE_ADD(kbdev, CORE_GPU_HARD_RESET, NULL, NULL, 0u, 0); + KBASE_KTRACE_ADD(kbdev, CORE_GPU_HARD_RESET, NULL, 0); kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND), GPU_COMMAND_HARD_RESET); @@ -1944,29 +2024,20 @@ static int kbase_pm_do_reset(struct kbase_device *kbdev) return -EINVAL; } -static int kbasep_protected_mode_enable(struct protected_mode_device *pdev) +int kbase_pm_protected_mode_enable(struct kbase_device *const kbdev) { - struct kbase_device *kbdev = pdev->data; - kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND), GPU_COMMAND_SET_PROTECTED_MODE); return 0; } -static int kbasep_protected_mode_disable(struct protected_mode_device *pdev) +int kbase_pm_protected_mode_disable(struct kbase_device *const kbdev) { - struct kbase_device *kbdev = pdev->data; - lockdep_assert_held(&kbdev->pm.lock); return kbase_pm_do_reset(kbdev); } -struct protected_mode_ops kbase_native_protected_ops = { - .protected_mode_enable = kbasep_protected_mode_enable, - .protected_mode_disable = kbasep_protected_mode_disable -}; - int kbase_pm_init_hw(struct kbase_device *kbdev, unsigned int flags) { unsigned long irq_flags; @@ -1994,16 +2065,12 @@ int kbase_pm_init_hw(struct kbase_device *kbdev, unsigned int flags) /* The cores should be made unavailable due to the reset */ spin_lock_irqsave(&kbdev->hwaccess_lock, irq_flags); if (kbdev->pm.backend.shaders_state != KBASE_SHADERS_OFF_CORESTACK_OFF) - KBASE_TRACE_ADD(kbdev, PM_CORES_CHANGE_AVAILABLE, NULL, - NULL, 0u, (u32)0u); + KBASE_KTRACE_ADD(kbdev, PM_CORES_CHANGE_AVAILABLE, NULL, 0u); spin_unlock_irqrestore(&kbdev->hwaccess_lock, irq_flags); /* Soft reset the GPU */ - if (kbdev->protected_mode_support) - err = kbdev->protected_ops->protected_mode_disable( - kbdev->protected_dev); - else - err = kbase_pm_do_reset(kbdev); + err = kbdev->protected_ops->protected_mode_disable( + kbdev->protected_dev); spin_lock_irqsave(&kbdev->hwaccess_lock, irq_flags); kbdev->protected_mode = false; @@ -2012,19 +2079,18 @@ int kbase_pm_init_hw(struct kbase_device *kbdev, unsigned int flags) if (err) goto exit; - if (flags & PM_HW_ISSUES_DETECT) - kbase_pm_hw_issues_detect(kbdev); + if (flags & PM_HW_ISSUES_DETECT) { + err = kbase_pm_hw_issues_detect(kbdev); + if (err) + goto exit; + } kbase_pm_hw_issues_apply(kbdev); kbase_cache_set_coherency_mode(kbdev, kbdev->system_coherency); /* Sanity check protected mode was left after reset */ - if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_PROTECTED_MODE)) { - u32 gpu_status = kbase_reg_read(kbdev, - GPU_CONTROL_REG(GPU_STATUS)); - - WARN_ON(gpu_status & GPU_STATUS_PROTECTED_MODE_ACTIVE); - } + WARN_ON(kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_STATUS)) & + GPU_STATUS_PROTECTED_MODE_ACTIVE); /* If cycle counter was in use re-enable it, enable_irqs will only be * false when called from kbase_pm_powerup */ diff --git a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_internal.h b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_internal.h index 6cab4535442d..f8da114003f1 100644 --- a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_internal.h +++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_internal.h @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2010-2019 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010-2020 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -682,4 +682,29 @@ extern bool corestack_driver_control; * Return: true if l2 need to power on */ bool kbase_pm_is_l2_desired(struct kbase_device *kbdev); + +/** + * kbase_pm_lock - Lock all necessary mutexes to perform PM actions + * + * @kbdev: Device pointer + * + * This function locks correct mutexes independent of GPU architecture. + */ +static inline void kbase_pm_lock(struct kbase_device *kbdev) +{ + mutex_lock(&kbdev->js_data.runpool_mutex); + mutex_lock(&kbdev->pm.lock); +} + +/** + * kbase_pm_unlock - Unlock mutexes locked by kbase_pm_lock + * + * @kbdev: Device pointer + */ +static inline void kbase_pm_unlock(struct kbase_device *kbdev) +{ + mutex_unlock(&kbdev->pm.lock); + mutex_unlock(&kbdev->js_data.runpool_mutex); +} + #endif /* _KBASE_BACKEND_PM_INTERNAL_H_ */ diff --git a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_metrics.c b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_metrics.c index 2706d160325e..519fc41a272b 100644 --- a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_metrics.c +++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_metrics.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2011-2019 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2011-2020 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -31,6 +31,7 @@ #include #include #include +#include /* When VSync is being hit aim for utilisation between 70-90% */ #define KBASE_PM_VSYNC_MIN_UTILISATION 70 @@ -284,8 +285,11 @@ static void kbase_pm_metrics_active_calc(struct kbase_device *kbdev) active_cl_ctx[device_nr] = 1; } else { kbdev->pm.backend.metrics.active_gl_ctx[js] = 1; + trace_sysgraph(SGR_ACTIVE, 0, js); } kbdev->pm.backend.metrics.gpu_active = true; + } else { + trace_sysgraph(SGR_INACTIVE, 0, js); } } } diff --git a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_policy.c b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_policy.c index f0e2d22879d3..8e7b3de9a945 100644 --- a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_policy.c +++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_policy.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2010-2019 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010-2020 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -25,7 +25,7 @@ */ #include -#include +#include #include #include @@ -102,6 +102,8 @@ void kbase_pm_update_active(struct kbase_device *kbdev) * when there are contexts active */ KBASE_DEBUG_ASSERT(pm->active_count == 0); + pm->backend.poweron_required = false; + /* Request power off */ if (pm->backend.gpu_powered) { spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); @@ -157,8 +159,7 @@ void kbase_pm_update_cores_state_nolock(struct kbase_device *kbdev) shaders_desired = kbdev->pm.backend.pm_current_policy->shaders_needed(kbdev); if (kbdev->pm.backend.shaders_desired != shaders_desired) { - KBASE_TRACE_ADD(kbdev, PM_CORES_CHANGE_DESIRED, NULL, NULL, 0u, - (u32)kbdev->pm.backend.shaders_desired); + KBASE_KTRACE_ADD(kbdev, PM_CORES_CHANGE_DESIRED, NULL, kbdev->pm.backend.shaders_desired); kbdev->pm.backend.shaders_desired = shaders_desired; kbase_pm_update_state(kbdev); @@ -199,22 +200,20 @@ KBASE_EXPORT_TEST_API(kbase_pm_get_policy); void kbase_pm_set_policy(struct kbase_device *kbdev, const struct kbase_pm_policy *new_policy) { - struct kbasep_js_device_data *js_devdata = &kbdev->js_data; const struct kbase_pm_policy *old_policy; unsigned long flags; KBASE_DEBUG_ASSERT(kbdev != NULL); KBASE_DEBUG_ASSERT(new_policy != NULL); - KBASE_TRACE_ADD(kbdev, PM_SET_POLICY, NULL, NULL, 0u, new_policy->id); + KBASE_KTRACE_ADD(kbdev, PM_SET_POLICY, NULL, new_policy->id); /* During a policy change we pretend the GPU is active */ /* A suspend won't happen here, because we're in a syscall from a * userspace thread */ kbase_pm_context_active(kbdev); - mutex_lock(&js_devdata->runpool_mutex); - mutex_lock(&kbdev->pm.lock); + kbase_pm_lock(kbdev); /* Remove the policy to prevent IRQ handlers from working on it */ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); @@ -222,13 +221,11 @@ void kbase_pm_set_policy(struct kbase_device *kbdev, kbdev->pm.backend.pm_current_policy = NULL; spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); - KBASE_TRACE_ADD(kbdev, PM_CURRENT_POLICY_TERM, NULL, NULL, 0u, - old_policy->id); + KBASE_KTRACE_ADD(kbdev, PM_CURRENT_POLICY_TERM, NULL, old_policy->id); if (old_policy->term) old_policy->term(kbdev); - KBASE_TRACE_ADD(kbdev, PM_CURRENT_POLICY_INIT, NULL, NULL, 0u, - new_policy->id); + KBASE_KTRACE_ADD(kbdev, PM_CURRENT_POLICY_INIT, NULL, new_policy->id); if (new_policy->init) new_policy->init(kbdev); @@ -242,8 +239,7 @@ void kbase_pm_set_policy(struct kbase_device *kbdev, kbase_pm_update_active(kbdev); kbase_pm_update_cores_state(kbdev); - mutex_unlock(&kbdev->pm.lock); - mutex_unlock(&js_devdata->runpool_mutex); + kbase_pm_unlock(kbdev); /* Now the policy change is finished, we release our fake context active * reference */ diff --git a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_shader_states.h b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_shader_states.h index 2f573de5ab3a..2bd9e4798e93 100644 --- a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_shader_states.h +++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_shader_states.h @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2019 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2018-2019 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software diff --git a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_time.c b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_time.c index 057bf10082eb..cb105186d798 100644 --- a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_time.c +++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_time.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2014-2016,2018-2019 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2016,2018-2020 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -26,7 +26,7 @@ #include void kbase_backend_get_gpu_time(struct kbase_device *kbdev, u64 *cycle_counter, - u64 *system_time, struct timespec *ts) + u64 *system_time, struct timespec64 *ts) { u32 hi1, hi2; @@ -60,7 +60,11 @@ void kbase_backend_get_gpu_time(struct kbase_device *kbdev, u64 *cycle_counter, /* Record the CPU's idea of current time */ if (ts != NULL) - getrawmonotonic(ts); +#if (KERNEL_VERSION(4, 17, 0) > LINUX_VERSION_CODE) + *ts = ktime_to_timespec64(ktime_get_raw()); +#else + ktime_get_raw_ts64(ts); +#endif kbase_pm_release_gpu_cycle_counter(kbdev); } diff --git a/drivers/gpu/arm/bifrost/build.bp b/drivers/gpu/arm/bifrost/build.bp index 64eeed295e3c..a74677522f8e 100644 --- a/drivers/gpu/arm/bifrost/build.bp +++ b/drivers/gpu/arm/bifrost/build.bp @@ -1,13 +1,16 @@ /* - * Copyright: - * ---------------------------------------------------------------------------- - * This confidential and proprietary software may be used only as authorized - * by a licensing agreement from ARM Limited. - * (C) COPYRIGHT 2017-2019 ARM Limited, ALL RIGHTS RESERVED - * The entire notice above must be reproduced on all authorized copies and - * copies may only be made to the extent permitted by a licensing agreement - * from ARM Limited. - * ---------------------------------------------------------------------------- + * + * (C) COPYRIGHT 2017-2020 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * */ /* Kernel-side tests may include mali_kbase's headers. Therefore any config @@ -34,9 +37,6 @@ bob_defaults { buslog: { kbuild_options: ["CONFIG_MALI_BUSLOG=y"], }, - cinstr_job_dump: { - kbuild_options: ["CONFIG_MALI_JOB_DUMP=y"], - }, cinstr_vector_dump: { kbuild_options: ["CONFIG_MALI_VECTOR_DUMP=y"], }, @@ -46,6 +46,9 @@ bob_defaults { mali_gator_support: { kbuild_options: ["CONFIG_MALI_BIFROST_GATOR_SUPPORT=y"], }, + mali_midgard_enable_trace: { + kbuild_options: ["CONFIG_MALI_BIFROST_ENABLE_TRACE=y"], + }, mali_system_trace: { kbuild_options: ["CONFIG_MALI_BIFROST_SYSTEM_TRACE=y"], }, @@ -61,6 +64,12 @@ bob_defaults { mali_dma_buf_legacy_compat: { kbuild_options: ["CONFIG_MALI_DMA_BUF_LEGACY_COMPAT=y"], }, + mali_arbiter_support: { + kbuild_options: ["CONFIG_MALI_ARBITER_SUPPORT=y"], + }, + mali_gem5_build: { + kbuild_options: ["CONFIG_MALI_GEM5_BUILD=y"], + }, kbuild_options: [ "MALI_UNIT_TEST={{.unit_test_code}}", "MALI_CUSTOMER_RELEASE={{.release}}", @@ -79,6 +88,8 @@ bob_kernel_module { "backend/gpu/*.c", "backend/gpu/*.h", "backend/gpu/Kbuild", + "context/*.c", + "context/*.h", "ipa/*.c", "ipa/*.h", "ipa/Kbuild", @@ -87,6 +98,16 @@ bob_kernel_module { "platform/*/*.h", "platform/*/Kbuild", "thirdparty/*.c", + "debug/*.c", + "debug/*.h", + "device/*.c", + "device/*.h", + "gpu/*.c", + "gpu/*.h", + "tl/*.c", + "tl/*.h", + "mmu/*.c", + "mmu/*.h", ], kbuild_options: [ "CONFIG_MALI_KUTF=n", @@ -111,6 +132,9 @@ bob_kernel_module { cinstr_secondary_hwc: { kbuild_options: ["CONFIG_MALI_BIFROST_PRFCNT_SET_SECONDARY=y"], }, + cinstr_secondary_hwc_via_debug_fs: { + kbuild_options: ["CONFIG_MALI_PRFCNT_SET_SECONDARY_VIA_DEBUG_FS=y"], + }, mali_2mb_alloc: { kbuild_options: ["CONFIG_MALI_2MB_ALLOC=y"], }, @@ -120,11 +144,39 @@ bob_kernel_module { mali_hw_errata_1485982_use_clock_alternative: { kbuild_options: ["CONFIG_MALI_HW_ERRATA_1485982_USE_CLOCK_ALTERNATIVE=y"], }, + gpu_has_job_manager: { + srcs: [ + "context/backend/*_jm.c", + "debug/backend/*_jm.c", + "debug/backend/*_jm.h", + "device/backend/*_jm.c", + "gpu/backend/*_jm.c", + "gpu/backend/*_jm.h", + "jm/*.h", + "tl/backend/*_jm.c", + "mmu/backend/*_jm.c", + ], + }, gpu_has_csf: { srcs: [ + "context/backend/*_csf.c", "csf/*.c", "csf/*.h", "csf/Kbuild", + "debug/backend/*_csf.c", + "debug/backend/*_csf.h", + "device/backend/*_csf.c", + "gpu/backend/*_csf.c", + "gpu/backend/*_csf.h", + "tl/backend/*_csf.c", + "mmu/backend/*_csf.c", + ], + }, + mali_arbiter_support: { + srcs: [ + "arbiter/*.c", + "arbiter/*.h", + "arbiter/Kbuild", ], }, defaults: ["mali_kbase_shared_config_defaults"], diff --git a/drivers/gpu/arm/bifrost/context/backend/mali_kbase_context_jm.c b/drivers/gpu/arm/bifrost/context/backend/mali_kbase_context_jm.c new file mode 100644 index 000000000000..2cd2551b433e --- /dev/null +++ b/drivers/gpu/arm/bifrost/context/backend/mali_kbase_context_jm.c @@ -0,0 +1,213 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * + * (C) COPYRIGHT 2019-2020 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +/* + * Base kernel context APIs for Job Manager GPUs + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#ifdef CONFIG_DEBUG_FS +#include +#include + +void kbase_context_debugfs_init(struct kbase_context *const kctx) +{ + kbase_debug_mem_view_init(kctx); + kbase_mem_pool_debugfs_init(kctx->kctx_dentry, kctx); + kbase_jit_debugfs_init(kctx); + kbasep_jd_debugfs_ctx_init(kctx); + kbase_debug_job_fault_context_init(kctx); +} +KBASE_EXPORT_SYMBOL(kbase_context_debugfs_init); + +void kbase_context_debugfs_term(struct kbase_context *const kctx) +{ + debugfs_remove_recursive(kctx->kctx_dentry); + kbase_debug_job_fault_context_term(kctx); +} +KBASE_EXPORT_SYMBOL(kbase_context_debugfs_term); +#else +void kbase_context_debugfs_init(struct kbase_context *const kctx) +{ + CSTD_UNUSED(kctx); +} +KBASE_EXPORT_SYMBOL(kbase_context_debugfs_init); + +void kbase_context_debugfs_term(struct kbase_context *const kctx) +{ + CSTD_UNUSED(kctx); +} +KBASE_EXPORT_SYMBOL(kbase_context_debugfs_term); +#endif /* CONFIG_DEBUG_FS */ + +static int kbase_context_kbase_timer_setup(struct kbase_context *kctx) +{ + kbase_timer_setup(&kctx->soft_job_timeout, + kbasep_soft_job_timeout_worker); + + return 0; +} + +static int kbase_context_submit_check(struct kbase_context *kctx) +{ + struct kbasep_js_kctx_info *js_kctx_info = &kctx->jctx.sched_info; + unsigned long irq_flags = 0; + + base_context_create_flags const flags = kctx->create_flags; + + mutex_lock(&js_kctx_info->ctx.jsctx_mutex); + spin_lock_irqsave(&kctx->kbdev->hwaccess_lock, irq_flags); + + /* Translate the flags */ + if ((flags & BASE_CONTEXT_SYSTEM_MONITOR_SUBMIT_DISABLED) == 0) + kbase_ctx_flag_clear(kctx, KCTX_SUBMIT_DISABLED); + + spin_unlock_irqrestore(&kctx->kbdev->hwaccess_lock, irq_flags); + mutex_unlock(&js_kctx_info->ctx.jsctx_mutex); + + return 0; +} + +static const struct kbase_context_init context_init[] = { + {kbase_context_common_init, kbase_context_common_term, NULL}, + {kbase_context_mem_pool_group_init, kbase_context_mem_pool_group_term, + "Memory pool goup initialization failed"}, + {kbase_mem_evictable_init, kbase_mem_evictable_deinit, + "Memory evictable initialization failed"}, + {kbasep_js_kctx_init, kbasep_js_kctx_term, + "JS kctx initialization failed"}, + {kbase_jd_init, kbase_jd_exit, + "JD initialization failed"}, + {kbase_event_init, kbase_event_cleanup, + "Event initialization failed"}, + {kbase_dma_fence_init, kbase_dma_fence_term, + "DMA fence initialization failed"}, + {kbase_context_mmu_init, kbase_context_mmu_term, + "MMU initialization failed"}, + {kbase_context_mem_alloc_page, kbase_context_mem_pool_free, + "Memory alloc page failed"}, + {kbase_region_tracker_init, kbase_region_tracker_term, + "Region tracker initialization failed"}, + {kbase_sticky_resource_init, kbase_context_sticky_resource_term, + "Sticky resource initialization failed"}, + {kbase_jit_init, kbase_jit_term, + "JIT initialization failed"}, + {kbase_context_kbase_timer_setup, NULL, NULL}, + {kbase_context_submit_check, NULL, NULL}, +}; + +static void kbase_context_term_partial( + struct kbase_context *kctx, + unsigned int i) +{ + while (i-- > 0) { + if (context_init[i].term) + context_init[i].term(kctx); + } +} + +struct kbase_context *kbase_create_context(struct kbase_device *kbdev, + bool is_compat, + base_context_create_flags const flags, + unsigned long const api_version, + struct file *const filp) +{ + struct kbase_context *kctx; + unsigned int i = 0; + + if (WARN_ON(!kbdev)) + return NULL; + + /* Validate flags */ + if (WARN_ON(flags != (flags & BASEP_CONTEXT_CREATE_KERNEL_FLAGS))) + return NULL; + + /* zero-inited as lot of code assume it's zero'ed out on create */ + kctx = vzalloc(sizeof(*kctx)); + if (WARN_ON(!kctx)) + return NULL; + + kctx->kbdev = kbdev; + kctx->api_version = api_version; + kctx->filp = filp; + kctx->create_flags = flags; + + if (is_compat) + kbase_ctx_flag_set(kctx, KCTX_COMPAT); +#if defined(CONFIG_64BIT) + else + kbase_ctx_flag_set(kctx, KCTX_FORCE_SAME_VA); +#endif /* !defined(CONFIG_64BIT) */ + + for (i = 0; i < ARRAY_SIZE(context_init); i++) { + int err = context_init[i].init(kctx); + + if (err) { + dev_err(kbdev->dev, "%s error = %d\n", + context_init[i].err_mes, err); + kbase_context_term_partial(kctx, i); + return NULL; + } + } + + return kctx; +} +KBASE_EXPORT_SYMBOL(kbase_create_context); + +void kbase_destroy_context(struct kbase_context *kctx) +{ + struct kbase_device *kbdev; + + if (WARN_ON(!kctx)) + return; + + kbdev = kctx->kbdev; + if (WARN_ON(!kbdev)) + return; + + /* Ensure the core is powered up for the destroy process + * A suspend won't happen here, because we're in a syscall + * from a userspace thread. + */ + kbase_pm_context_active(kbdev); + + kbase_mem_pool_group_mark_dying(&kctx->mem_pools); + + kbase_jd_zap_context(kctx); + flush_workqueue(kctx->jctx.job_done_wq); + + kbase_context_term_partial(kctx, ARRAY_SIZE(context_init)); + + kbase_pm_context_idle(kbdev); +} +KBASE_EXPORT_SYMBOL(kbase_destroy_context); diff --git a/drivers/gpu/arm/bifrost/context/mali_kbase_context.c b/drivers/gpu/arm/bifrost/context/mali_kbase_context.c new file mode 100644 index 000000000000..93fe43147536 --- /dev/null +++ b/drivers/gpu/arm/bifrost/context/mali_kbase_context.c @@ -0,0 +1,210 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * + * (C) COPYRIGHT 2019-2020 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +/* + * Base kernel context APIs + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +int kbase_context_common_init(struct kbase_context *kctx) +{ + const unsigned long cookies_mask = KBASE_COOKIE_MASK; + + /* creating a context is considered a disjoint event */ + kbase_disjoint_event(kctx->kbdev); + + kctx->as_nr = KBASEP_AS_NR_INVALID; + + atomic_set(&kctx->refcount, 0); + + spin_lock_init(&kctx->mm_update_lock); + kctx->process_mm = NULL; + atomic_set(&kctx->nonmapped_pages, 0); + atomic_set(&kctx->permanent_mapped_pages, 0); + kctx->tgid = current->tgid; + kctx->pid = current->pid; + + atomic_set(&kctx->used_pages, 0); + + mutex_init(&kctx->reg_lock); + + spin_lock_init(&kctx->mem_partials_lock); + INIT_LIST_HEAD(&kctx->mem_partials); + + spin_lock_init(&kctx->waiting_soft_jobs_lock); + INIT_LIST_HEAD(&kctx->waiting_soft_jobs); + + init_waitqueue_head(&kctx->event_queue); + atomic_set(&kctx->event_count, 0); + atomic_set(&kctx->event_closed, false); + + bitmap_copy(kctx->cookies, &cookies_mask, BITS_PER_LONG); + +#ifdef CONFIG_GPU_TRACEPOINTS + atomic_set(&kctx->jctx.work_id, 0); +#endif + + kctx->id = atomic_add_return(1, &(kctx->kbdev->ctx_num)) - 1; + + mutex_init(&kctx->legacy_hwcnt_lock); + + mutex_lock(&kctx->kbdev->kctx_list_lock); + list_add(&kctx->kctx_list_link, &kctx->kbdev->kctx_list); + + KBASE_TLSTREAM_TL_KBASE_NEW_CTX(kctx->kbdev, kctx->id, + kctx->kbdev->gpu_props.props.raw_props.gpu_id); + KBASE_TLSTREAM_TL_NEW_CTX(kctx->kbdev, kctx, kctx->id, + (u32)(kctx->tgid)); + mutex_unlock(&kctx->kbdev->kctx_list_lock); + + return 0; +} + +void kbase_context_common_term(struct kbase_context *kctx) +{ + unsigned long flags; + int pages; + + mutex_lock(&kctx->kbdev->mmu_hw_mutex); + spin_lock_irqsave(&kctx->kbdev->hwaccess_lock, flags); + kbase_ctx_sched_remove_ctx(kctx); + spin_unlock_irqrestore(&kctx->kbdev->hwaccess_lock, flags); + mutex_unlock(&kctx->kbdev->mmu_hw_mutex); + + pages = atomic_read(&kctx->used_pages); + if (pages != 0) + dev_warn(kctx->kbdev->dev, + "%s: %d pages in use!\n", __func__, pages); + + WARN_ON(atomic_read(&kctx->nonmapped_pages) != 0); + + mutex_lock(&kctx->kbdev->kctx_list_lock); + + KBASE_TLSTREAM_TL_KBASE_DEL_CTX(kctx->kbdev, kctx->id); + + KBASE_TLSTREAM_TL_DEL_CTX(kctx->kbdev, kctx); + list_del(&kctx->kctx_list_link); + mutex_unlock(&kctx->kbdev->kctx_list_lock); + + KBASE_KTRACE_ADD(kctx->kbdev, CORE_CTX_DESTROY, kctx, 0u); + + /* Flush the timeline stream, so the user can see the termination + * tracepoints being fired. + * The "if" statement below is for optimization. It is safe to call + * kbase_timeline_streams_flush when timeline is disabled. + */ + if (atomic_read(&kctx->kbdev->timeline_flags) != 0) + kbase_timeline_streams_flush(kctx->kbdev->timeline); + + vfree(kctx); +} + +int kbase_context_mem_pool_group_init(struct kbase_context *kctx) +{ + return kbase_mem_pool_group_init(&kctx->mem_pools, + kctx->kbdev, + &kctx->kbdev->mem_pool_defaults, + &kctx->kbdev->mem_pools); +} + +void kbase_context_mem_pool_group_term(struct kbase_context *kctx) +{ + kbase_mem_pool_group_term(&kctx->mem_pools); +} + +int kbase_context_mmu_init(struct kbase_context *kctx) +{ + kbase_mmu_init(kctx->kbdev, + &kctx->mmu, kctx, + base_context_mmu_group_id_get(kctx->create_flags)); + + return 0; +} + +void kbase_context_mmu_term(struct kbase_context *kctx) +{ + kbase_mmu_term(kctx->kbdev, &kctx->mmu); +} + +int kbase_context_mem_alloc_page(struct kbase_context *kctx) +{ + struct page *p; + + p = kbase_mem_alloc_page(&kctx->mem_pools.small[KBASE_MEM_GROUP_SINK]); + if (!p) + return -ENOMEM; + + kctx->aliasing_sink_page = as_tagged(page_to_phys(p)); + + return 0; +} + +void kbase_context_mem_pool_free(struct kbase_context *kctx) +{ + /* drop the aliasing sink page now that it can't be mapped anymore */ + kbase_mem_pool_free( + &kctx->mem_pools.small[KBASE_MEM_GROUP_SINK], + as_page(kctx->aliasing_sink_page), + false); +} + +void kbase_context_sticky_resource_term(struct kbase_context *kctx) +{ + unsigned long pending_regions_to_clean; + + kbase_gpu_vm_lock(kctx); + kbase_sticky_resource_term(kctx); + + /* free pending region setups */ + pending_regions_to_clean = KBASE_COOKIE_MASK; + bitmap_andnot(&pending_regions_to_clean, &pending_regions_to_clean, + kctx->cookies, BITS_PER_LONG); + while (pending_regions_to_clean) { + unsigned int cookie = find_first_bit(&pending_regions_to_clean, + BITS_PER_LONG); + + if (!WARN_ON(!kctx->pending_regions[cookie])) { + dev_dbg(kctx->kbdev->dev, "Freeing pending unmapped region\n"); + kbase_mem_phy_alloc_put( + kctx->pending_regions[cookie]->cpu_alloc); + kbase_mem_phy_alloc_put( + kctx->pending_regions[cookie]->gpu_alloc); + kfree(kctx->pending_regions[cookie]); + + kctx->pending_regions[cookie] = NULL; + } + + bitmap_clear(&pending_regions_to_clean, cookie, 1); + } + kbase_gpu_vm_unlock(kctx); +} diff --git a/drivers/gpu/arm/bifrost/mali_kbase_context.h b/drivers/gpu/arm/bifrost/context/mali_kbase_context.h similarity index 80% rename from drivers/gpu/arm/bifrost/mali_kbase_context.h rename to drivers/gpu/arm/bifrost/context/mali_kbase_context.h index 5037b4e69b96..e4ed8944bdd2 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_context.h +++ b/drivers/gpu/arm/bifrost/context/mali_kbase_context.h @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2011-2017, 2019 ARM Limited. All rights reserved. + * (C) COPYRIGHT ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -18,6 +18,16 @@ * * SPDX-License-Identifier: GPL-2.0 * + *//* SPDX-License-Identifier: GPL-2.0 */ +/* + * + * (C) COPYRIGHT 2011-2017, 2019 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * */ #ifndef _KBASE_CONTEXT_H_ @@ -25,6 +35,28 @@ #include +/** + * kbase_context_debugfs_init - Initialize the kctx platform + * specific debugfs + * + * @kctx: kbase context + * + * This initializes some debugfs interfaces specific to the platform the source + * is compiled for. + */ +void kbase_context_debugfs_init(struct kbase_context *const kctx); + +/** + * kbase_context_debugfs_term - Terminate the kctx platform + * specific debugfs + * + * @kctx: kbase context + * + * This terminates some debugfs interfaces specific to the platform the source + * is compiled for. + */ +void kbase_context_debugfs_term(struct kbase_context *const kctx); + /** * kbase_create_context() - Create a kernel base context. * diff --git a/drivers/gpu/arm/bifrost/context/mali_kbase_context_internal.h b/drivers/gpu/arm/bifrost/context/mali_kbase_context_internal.h new file mode 100644 index 000000000000..818cdbea960d --- /dev/null +++ b/drivers/gpu/arm/bifrost/context/mali_kbase_context_internal.h @@ -0,0 +1,60 @@ +/* + * + * (C) COPYRIGHT ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + *//* SPDX-License-Identifier: GPL-2.0 */ +/* + * (C) COPYRIGHT 2019 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + */ + +#include + +typedef int kbase_context_init_method(struct kbase_context *kctx); +typedef void kbase_context_term_method(struct kbase_context *kctx); + +/** + * struct kbase_context_init - Device init/term methods. + * @init: Function pointer to a initialise method. + * @term: Function pointer to a terminate method. + * @err_mes: Error message to be printed when init method fails. + */ +struct kbase_context_init { + kbase_context_init_method *init; + kbase_context_term_method *term; + char *err_mes; +}; + +int kbase_context_common_init(struct kbase_context *kctx); +void kbase_context_common_term(struct kbase_context *kctx); + +int kbase_context_mem_pool_group_init(struct kbase_context *kctx); +void kbase_context_mem_pool_group_term(struct kbase_context *kctx); + +int kbase_context_mmu_init(struct kbase_context *kctx); +void kbase_context_mmu_term(struct kbase_context *kctx); + +int kbase_context_mem_alloc_page(struct kbase_context *kctx); +void kbase_context_mem_pool_free(struct kbase_context *kctx); + +void kbase_context_sticky_resource_term(struct kbase_context *kctx); diff --git a/drivers/gpu/arm/bifrost/debug/backend/mali_kbase_debug_ktrace_codes_jm.h b/drivers/gpu/arm/bifrost/debug/backend/mali_kbase_debug_ktrace_codes_jm.h new file mode 100644 index 000000000000..d534f3006c9b --- /dev/null +++ b/drivers/gpu/arm/bifrost/debug/backend/mali_kbase_debug_ktrace_codes_jm.h @@ -0,0 +1,170 @@ +/* + * + * (C) COPYRIGHT 2011-2015,2018-2020 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +/* + * ***** IMPORTANT: THIS IS NOT A NORMAL HEADER FILE ***** + * ***** DO NOT INCLUDE DIRECTLY ***** + * ***** THE LACK OF HEADER GUARDS IS INTENTIONAL ***** + */ + +/* + * The purpose of this header file is just to contain a list of trace code + * identifiers + * + * IMPORTANT: THIS FILE MUST NOT BE USED FOR ANY OTHER PURPOSE OTHER THAN THAT + * DESCRIBED IN mali_kbase_debug_ktrace_codes.h + */ + +#if 0 /* Dummy section to avoid breaking formatting */ +int dummy_array[] = { +#endif + + /* + * Job Slot management events + */ + /* info_val==irq rawstat at start */ + KBASE_KTRACE_CODE_MAKE_CODE(JM_IRQ), + /* info_val==jobs processed */ + KBASE_KTRACE_CODE_MAKE_CODE(JM_IRQ_END), + /* In the following: + * + * - ctx is set if a corresponding job found (NULL otherwise, e.g. some + * soft-stop cases) + * - uatom==kernel-side mapped uatom address (for correlation with + * user-side) + */ + /* info_val==exit code; gpu_addr==chain gpuaddr */ + KBASE_KTRACE_CODE_MAKE_CODE(JM_JOB_DONE), + /* gpu_addr==JS_HEAD_NEXT written, info_val==lower 32 bits of + * affinity + */ + KBASE_KTRACE_CODE_MAKE_CODE(JM_SUBMIT), + /* gpu_addr is as follows: + * - If JS_STATUS active after soft-stop, val==gpu addr written to + * JS_HEAD on submit + * - otherwise gpu_addr==0 + */ + KBASE_KTRACE_CODE_MAKE_CODE(JM_SOFTSTOP), + KBASE_KTRACE_CODE_MAKE_CODE(JM_SOFTSTOP_0), + KBASE_KTRACE_CODE_MAKE_CODE(JM_SOFTSTOP_1), + /* gpu_addr==JS_HEAD read */ + KBASE_KTRACE_CODE_MAKE_CODE(JM_HARDSTOP), + /* gpu_addr==JS_HEAD read */ + KBASE_KTRACE_CODE_MAKE_CODE(JM_HARDSTOP_0), + /* gpu_addr==JS_HEAD read */ + KBASE_KTRACE_CODE_MAKE_CODE(JM_HARDSTOP_1), + /* gpu_addr==JS_TAIL read */ + KBASE_KTRACE_CODE_MAKE_CODE(JM_UPDATE_HEAD), + /* gpu_addr is as follows: + * - If JS_STATUS active before soft-stop, val==JS_HEAD + * - otherwise gpu_addr==0 + */ + /* gpu_addr==JS_HEAD read */ + KBASE_KTRACE_CODE_MAKE_CODE(JM_CHECK_HEAD), + KBASE_KTRACE_CODE_MAKE_CODE(JM_FLUSH_WORKQS), + KBASE_KTRACE_CODE_MAKE_CODE(JM_FLUSH_WORKQS_DONE), + /* info_val == is_scheduled */ + KBASE_KTRACE_CODE_MAKE_CODE(JM_ZAP_NON_SCHEDULED), + /* info_val == is_scheduled */ + KBASE_KTRACE_CODE_MAKE_CODE(JM_ZAP_SCHEDULED), + KBASE_KTRACE_CODE_MAKE_CODE(JM_ZAP_DONE), + /* info_val == nr jobs submitted */ + KBASE_KTRACE_CODE_MAKE_CODE(JM_SLOT_SOFT_OR_HARD_STOP), + /* gpu_addr==JS_HEAD_NEXT last written */ + KBASE_KTRACE_CODE_MAKE_CODE(JM_SLOT_EVICT), + KBASE_KTRACE_CODE_MAKE_CODE(JM_SUBMIT_AFTER_RESET), + KBASE_KTRACE_CODE_MAKE_CODE(JM_BEGIN_RESET_WORKER), + KBASE_KTRACE_CODE_MAKE_CODE(JM_END_RESET_WORKER), + /* + * Job dispatch events + */ + /* gpu_addr==value to write into JS_HEAD */ + KBASE_KTRACE_CODE_MAKE_CODE(JD_DONE), + /* gpu_addr==value to write into JS_HEAD */ + KBASE_KTRACE_CODE_MAKE_CODE(JD_DONE_WORKER), + /* gpu_addr==value to write into JS_HEAD */ + KBASE_KTRACE_CODE_MAKE_CODE(JD_DONE_WORKER_END), + /* gpu_addr==value to write into JS_HEAD */ + KBASE_KTRACE_CODE_MAKE_CODE(JD_DONE_TRY_RUN_NEXT_JOB), + /* gpu_addr==0, info_val==0, uatom==0 */ + KBASE_KTRACE_CODE_MAKE_CODE(JD_ZAP_CONTEXT), + /* gpu_addr==value to write into JS_HEAD */ + KBASE_KTRACE_CODE_MAKE_CODE(JD_CANCEL), + /* gpu_addr==value to write into JS_HEAD */ + KBASE_KTRACE_CODE_MAKE_CODE(JD_CANCEL_WORKER), + /* + * Scheduler Core events + */ + /* gpu_addr==value to write into JS_HEAD */ + KBASE_KTRACE_CODE_MAKE_CODE(JS_ADD_JOB), + /* gpu_addr==last value written/would be written to JS_HEAD */ + KBASE_KTRACE_CODE_MAKE_CODE(JS_REMOVE_JOB), + KBASE_KTRACE_CODE_MAKE_CODE(JS_TRY_SCHEDULE_HEAD_CTX), + /* gpu_addr==value to write into JS_HEAD */ + KBASE_KTRACE_CODE_MAKE_CODE(JS_JOB_DONE_TRY_RUN_NEXT_JOB), + /* gpu_addr==value to write into JS_HEAD */ + KBASE_KTRACE_CODE_MAKE_CODE(JS_JOB_DONE_RETRY_NEEDED), + KBASE_KTRACE_CODE_MAKE_CODE(JS_AFFINITY_SUBMIT_TO_BLOCKED), + /* info_val == lower 32 bits of affinity */ + KBASE_KTRACE_CODE_MAKE_CODE(JS_AFFINITY_CURRENT), + /* info_val == lower 32 bits of affinity */ + KBASE_KTRACE_CODE_MAKE_CODE(JS_CORE_REF_REQUEST_CORES_FAILED), + /* info_val == lower 32 bits of affinity */ + KBASE_KTRACE_CODE_MAKE_CODE(JS_CORE_REF_REGISTER_INUSE_FAILED), + /* info_val == lower 32 bits of rechecked affinity */ + KBASE_KTRACE_CODE_MAKE_CODE(JS_CORE_REF_REQUEST_ON_RECHECK_FAILED), + /* info_val == lower 32 bits of rechecked affinity */ + KBASE_KTRACE_CODE_MAKE_CODE(JS_CORE_REF_REGISTER_ON_RECHECK_FAILED), + /* info_val == lower 32 bits of affinity */ + KBASE_KTRACE_CODE_MAKE_CODE(JS_CORE_REF_AFFINITY_WOULD_VIOLATE), + /* info_val == the ctx attribute now on ctx */ + KBASE_KTRACE_CODE_MAKE_CODE(JS_CTX_ATTR_NOW_ON_CTX), + /* info_val == the ctx attribute now on runpool */ + KBASE_KTRACE_CODE_MAKE_CODE(JS_CTX_ATTR_NOW_ON_RUNPOOL), + /* info_val == the ctx attribute now off ctx */ + KBASE_KTRACE_CODE_MAKE_CODE(JS_CTX_ATTR_NOW_OFF_CTX), + /* info_val == the ctx attribute now off runpool */ + KBASE_KTRACE_CODE_MAKE_CODE(JS_CTX_ATTR_NOW_OFF_RUNPOOL), + /* + * Scheduler Policy events + */ + KBASE_KTRACE_CODE_MAKE_CODE(JS_POLICY_INIT_CTX), + KBASE_KTRACE_CODE_MAKE_CODE(JS_POLICY_TERM_CTX), + /* info_val == whether it was evicted */ + KBASE_KTRACE_CODE_MAKE_CODE(JS_POLICY_TRY_EVICT_CTX), + KBASE_KTRACE_CODE_MAKE_CODE(JS_POLICY_FOREACH_CTX_JOBS), + KBASE_KTRACE_CODE_MAKE_CODE(JS_POLICY_ENQUEUE_CTX), + KBASE_KTRACE_CODE_MAKE_CODE(JS_POLICY_DEQUEUE_HEAD_CTX), + KBASE_KTRACE_CODE_MAKE_CODE(JS_POLICY_RUNPOOL_ADD_CTX), + KBASE_KTRACE_CODE_MAKE_CODE(JS_POLICY_RUNPOOL_REMOVE_CTX), + KBASE_KTRACE_CODE_MAKE_CODE(JS_POLICY_DEQUEUE_JOB), + KBASE_KTRACE_CODE_MAKE_CODE(JS_POLICY_DEQUEUE_JOB_IRQ), + /* gpu_addr==JS_HEAD to write if the job were run */ + KBASE_KTRACE_CODE_MAKE_CODE(JS_POLICY_ENQUEUE_JOB), + KBASE_KTRACE_CODE_MAKE_CODE(JS_POLICY_TIMER_START), + KBASE_KTRACE_CODE_MAKE_CODE(JS_POLICY_TIMER_END), + +#if 0 /* Dummy section to avoid breaking formatting */ +}; +#endif + +/* ***** THE LACK OF HEADER GUARDS IS INTENTIONAL ***** */ diff --git a/drivers/gpu/arm/bifrost/debug/backend/mali_kbase_debug_ktrace_defs_jm.h b/drivers/gpu/arm/bifrost/debug/backend/mali_kbase_debug_ktrace_defs_jm.h new file mode 100644 index 000000000000..55b66adff7c7 --- /dev/null +++ b/drivers/gpu/arm/bifrost/debug/backend/mali_kbase_debug_ktrace_defs_jm.h @@ -0,0 +1,75 @@ +/* + * + * (C) COPYRIGHT 2020 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +#ifndef _KBASE_DEBUG_KTRACE_DEFS_JM_H_ +#define _KBASE_DEBUG_KTRACE_DEFS_JM_H_ + +/** + * DOC: KTrace version history, JM variant + * 1.0: + * - Original version (implicit, header did not carry version information) + * 2.0: + * - Introduced version information into the header + * - some changes of parameter names in header + * - trace now uses all 64-bits of info_val + * - Non-JM specific parts moved to using info_val instead of refcount/gpu_addr + */ +#define KBASE_KTRACE_VERSION_MAJOR 2 +#define KBASE_KTRACE_VERSION_MINOR 0 + +/* indicates if the trace message has a valid refcount member */ +#define KBASE_KTRACE_FLAG_JM_REFCOUNT (((kbase_ktrace_flag_t)1) << 0) +/* indicates if the trace message has a valid jobslot member */ +#define KBASE_KTRACE_FLAG_JM_JOBSLOT (((kbase_ktrace_flag_t)1) << 1) +/* indicates if the trace message has valid atom related info. */ +#define KBASE_KTRACE_FLAG_JM_ATOM (((kbase_ktrace_flag_t)1) << 2) + + +/** + * struct kbase_ktrace_backend - backend specific part of a trace message + * + * @atom_udata: Copy of the user data sent for the atom in base_jd_submit. + * Only valid if KBASE_KTRACE_FLAG_JM_ATOM is set in @flags + * @gpu_addr: GPU address, usually of the job-chain represented by an atom. + * @atom_number: id of the atom for which trace message was added. Only valid + * if KBASE_KTRACE_FLAG_JM_ATOM is set in @flags + * @code: Identifies the event, refer to enum kbase_ktrace_code. + * @flags: indicates information about the trace message itself. Used + * during dumping of the message. + * @jobslot: job-slot for which trace message was added, valid only for + * job-slot management events. + * @refcount: reference count for the context, valid for certain events + * related to scheduler core and policy. + */ +struct kbase_ktrace_backend { + /* Place 64 and 32-bit members together */ + u64 atom_udata[2]; /* Only valid for KBASE_KTRACE_FLAG_JM_ATOM */ + u64 gpu_addr; + int atom_number; /* Only valid for KBASE_KTRACE_FLAG_JM_ATOM */ + /* Pack smaller members together */ + kbase_ktrace_code_t code; + kbase_ktrace_flag_t flags; + u8 jobslot; + u8 refcount; +}; + +#endif /* _KBASE_DEBUG_KTRACE_DEFS_JM_H_ */ diff --git a/drivers/gpu/arm/bifrost/debug/backend/mali_kbase_debug_ktrace_jm.c b/drivers/gpu/arm/bifrost/debug/backend/mali_kbase_debug_ktrace_jm.c new file mode 100644 index 000000000000..e651a09fba4d --- /dev/null +++ b/drivers/gpu/arm/bifrost/debug/backend/mali_kbase_debug_ktrace_jm.c @@ -0,0 +1,113 @@ +/* + * + * (C) COPYRIGHT 2020 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ +#include +#include "debug/mali_kbase_debug_ktrace_internal.h" +#include "debug/backend/mali_kbase_debug_ktrace_jm.h" + +#if KBASE_KTRACE_TARGET_RBUF + +void kbasep_ktrace_backend_format_header(char *buffer, int sz, s32 *written) +{ + *written += MAX(snprintf(buffer + *written, MAX(sz - *written, 0), + "katom,gpu_addr,jobslot,refcount"), 0); +} + +void kbasep_ktrace_backend_format_msg(struct kbase_ktrace_msg *trace_msg, + char *buffer, int sz, s32 *written) +{ + /* katom */ + if (trace_msg->backend.flags & KBASE_KTRACE_FLAG_JM_ATOM) + *written += MAX(snprintf(buffer + *written, + MAX(sz - *written, 0), + "atom %d (ud: 0x%llx 0x%llx)", + trace_msg->backend.atom_number, + trace_msg->backend.atom_udata[0], + trace_msg->backend.atom_udata[1]), 0); + + /* gpu_addr */ + if (trace_msg->backend.flags & KBASE_KTRACE_FLAG_BACKEND) + *written += MAX(snprintf(buffer + *written, + MAX(sz - *written, 0), + ",%.8llx,", trace_msg->backend.gpu_addr), 0); + else + *written += MAX(snprintf(buffer + *written, + MAX(sz - *written, 0), + ",,"), 0); + + /* jobslot */ + if (trace_msg->backend.flags & KBASE_KTRACE_FLAG_JM_JOBSLOT) + *written += MAX(snprintf(buffer + *written, + MAX(sz - *written, 0), + "%d", trace_msg->backend.jobslot), 0); + + *written += MAX(snprintf(buffer + *written, MAX(sz - *written, 0), + ","), 0); + + /* refcount */ + if (trace_msg->backend.flags & KBASE_KTRACE_FLAG_JM_REFCOUNT) + *written += MAX(snprintf(buffer + *written, + MAX(sz - *written, 0), + "%d", trace_msg->backend.refcount), 0); +} + +void kbasep_ktrace_add_jm(struct kbase_device *kbdev, + enum kbase_ktrace_code code, struct kbase_context *kctx, + struct kbase_jd_atom *katom, u64 gpu_addr, + kbase_ktrace_flag_t flags, int refcount, int jobslot, + u64 info_val) +{ + unsigned long irqflags; + struct kbase_ktrace_msg *trace_msg; + + spin_lock_irqsave(&kbdev->ktrace.lock, irqflags); + + /* Reserve and update indices */ + trace_msg = kbasep_ktrace_reserve(&kbdev->ktrace); + + /* Fill the common part of the message (including backend.flags) */ + kbasep_ktrace_msg_init(&kbdev->ktrace, trace_msg, code, kctx, flags, + info_val); + + /* Indicate to the common code that backend-specific parts will be + * valid + */ + trace_msg->backend.flags |= KBASE_KTRACE_FLAG_BACKEND; + + /* Fill the JM-specific parts of the message */ + if (katom) { + trace_msg->backend.flags |= KBASE_KTRACE_FLAG_JM_ATOM; + + trace_msg->backend.atom_number = kbase_jd_atom_id(katom->kctx, katom); + trace_msg->backend.atom_udata[0] = katom->udata.blob[0]; + trace_msg->backend.atom_udata[1] = katom->udata.blob[1]; + } + + trace_msg->backend.gpu_addr = gpu_addr; + trace_msg->backend.jobslot = jobslot; + /* Clamp refcount */ + trace_msg->backend.refcount = MIN((unsigned int)refcount, 0xFF); + + /* Done */ + spin_unlock_irqrestore(&kbdev->ktrace.lock, irqflags); +} + +#endif /* KBASE_KTRACE_TARGET_RBUF */ diff --git a/drivers/gpu/arm/bifrost/debug/backend/mali_kbase_debug_ktrace_jm.h b/drivers/gpu/arm/bifrost/debug/backend/mali_kbase_debug_ktrace_jm.h new file mode 100644 index 000000000000..cf3dc1e274e9 --- /dev/null +++ b/drivers/gpu/arm/bifrost/debug/backend/mali_kbase_debug_ktrace_jm.h @@ -0,0 +1,362 @@ +/* + * + * (C) COPYRIGHT 2020 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +#ifndef _KBASE_DEBUG_KTRACE_JM_H_ +#define _KBASE_DEBUG_KTRACE_JM_H_ + +/* + * KTrace target for internal ringbuffer + */ +#if KBASE_KTRACE_TARGET_RBUF +/** + * kbasep_ktrace_add_jm - internal function to add trace about Job Management + * @kbdev: kbase device + * @code: trace code + * @kctx: kbase context, or NULL if no context + * @katom: kbase atom, or NULL if no atom + * @gpu_addr: GPU address, usually related to @katom + * @flags: flags about the message + * @refcount: reference count information to add to the trace + * @jobslot: jobslot information to add to the trace + * @info_val: generic information about @code to add to the trace + * + * PRIVATE: do not use directly. Use KBASE_KTRACE_ADD_JM() instead. + */ +void kbasep_ktrace_add_jm(struct kbase_device *kbdev, + enum kbase_ktrace_code code, struct kbase_context *kctx, + struct kbase_jd_atom *katom, u64 gpu_addr, + kbase_ktrace_flag_t flags, int refcount, int jobslot, + u64 info_val); + +#define KBASE_KTRACE_RBUF_ADD_JM_SLOT(kbdev, code, kctx, katom, gpu_addr, \ + jobslot) \ + kbasep_ktrace_add_jm(kbdev, KBASE_KTRACE_CODE(code), kctx, katom, \ + gpu_addr, KBASE_KTRACE_FLAG_JM_JOBSLOT, 0, jobslot, 0) + +#define KBASE_KTRACE_RBUF_ADD_JM_SLOT_INFO(kbdev, code, kctx, katom, gpu_addr, \ + jobslot, info_val) \ + kbasep_ktrace_add_jm(kbdev, KBASE_KTRACE_CODE(code), kctx, katom, \ + gpu_addr, KBASE_KTRACE_FLAG_JM_JOBSLOT, 0, jobslot, \ + info_val) + +#define KBASE_KTRACE_RBUF_ADD_JM_REFCOUNT(kbdev, code, kctx, katom, gpu_addr, \ + refcount) \ + kbasep_ktrace_add_jm(kbdev, KBASE_KTRACE_CODE(code), kctx, katom, \ + gpu_addr, KBASE_KTRACE_FLAG_JM_REFCOUNT, refcount, 0, 0) +#define KBASE_KTRACE_RBUF_ADD_JM_REFCOUNT_INFO(kbdev, code, kctx, katom, \ + gpu_addr, refcount, info_val) \ + kbasep_ktrace_add_jm(kbdev, KBASE_KTRACE_CODE(code), kctx, katom, \ + gpu_addr, KBASE_KTRACE_FLAG_JM_REFCOUNT, refcount, 0, \ + info_val) + +#define KBASE_KTRACE_RBUF_ADD_JM(kbdev, code, kctx, katom, gpu_addr, info_val) \ + kbasep_ktrace_add_jm(kbdev, KBASE_KTRACE_CODE(code), kctx, katom, \ + gpu_addr, 0, 0, 0, info_val) + +#else /* KBASE_KTRACE_TARGET_RBUF */ +#define KBASE_KTRACE_RBUF_ADD_JM_SLOT(kbdev, code, kctx, katom, gpu_addr, \ + jobslot) \ + do {\ + CSTD_UNUSED(kbdev);\ + CSTD_NOP(code);\ + CSTD_UNUSED(kctx);\ + CSTD_UNUSED(katom);\ + CSTD_UNUSED(gpu_addr);\ + CSTD_UNUSED(jobslot);\ + CSTD_NOP(0);\ + } while (0) + +#define KBASE_KTRACE_RBUF_ADD_JM_SLOT_INFO(kbdev, code, kctx, katom, gpu_addr, \ + jobslot, info_val) \ + do {\ + CSTD_UNUSED(kbdev);\ + CSTD_NOP(code);\ + CSTD_UNUSED(kctx);\ + CSTD_UNUSED(katom);\ + CSTD_UNUSED(gpu_addr);\ + CSTD_UNUSED(jobslot);\ + CSTD_UNUSED(info_val);\ + CSTD_NOP(0);\ + } while (0) + +#define KBASE_KTRACE_RBUF_ADD_JM_REFCOUNT(kbdev, code, kctx, katom, gpu_addr, \ + refcount) \ + do {\ + CSTD_UNUSED(kbdev);\ + CSTD_NOP(code);\ + CSTD_UNUSED(kctx);\ + CSTD_UNUSED(katom);\ + CSTD_UNUSED(gpu_addr);\ + CSTD_UNUSED(refcount);\ + CSTD_NOP(0);\ + } while (0) + +#define KBASE_KTRACE_RBUF_ADD_JM_REFCOUNT_INFO(kbdev, code, kctx, katom, \ + gpu_addr, refcount, info_val) \ + do {\ + CSTD_UNUSED(kbdev);\ + CSTD_NOP(code);\ + CSTD_UNUSED(kctx);\ + CSTD_UNUSED(katom);\ + CSTD_UNUSED(gpu_addr);\ + CSTD_UNUSED(info_val);\ + CSTD_NOP(0);\ + } while (0) + +#define KBASE_KTRACE_RBUF_ADD_JM(kbdev, code, kctx, katom, gpu_addr, \ + info_val)\ + do {\ + CSTD_UNUSED(kbdev);\ + CSTD_NOP(code);\ + CSTD_UNUSED(kctx);\ + CSTD_UNUSED(katom);\ + CSTD_UNUSED(gpu_addr);\ + CSTD_UNUSED(info_val);\ + CSTD_NOP(0);\ + } while (0) +#endif /* KBASE_KTRACE_TARGET_RBUF */ + +/* + * KTrace target for Linux's ftrace + */ +#if KBASE_KTRACE_TARGET_FTRACE +#define KBASE_KTRACE_FTRACE_ADD_JM_SLOT(kbdev, code, kctx, katom, gpu_addr, \ + jobslot) \ + trace_mali_##code(jobslot, 0) + +#define KBASE_KTRACE_FTRACE_ADD_JM_SLOT_INFO(kbdev, code, kctx, katom, \ + gpu_addr, jobslot, info_val) \ + trace_mali_##code(jobslot, info_val) + +#define KBASE_KTRACE_FTRACE_ADD_JM_REFCOUNT(kbdev, code, kctx, katom, \ + gpu_addr, refcount) \ + trace_mali_##code(refcount, 0) + +#define KBASE_KTRACE_FTRACE_ADD_JM_REFCOUNT_INFO(kbdev, code, kctx, katom, \ + gpu_addr, refcount, info_val) \ + trace_mali_##code(refcount, info_val) + +#define KBASE_KTRACE_FTRACE_ADD_JM(kbdev, code, kctx, katom, gpu_addr, \ + info_val) \ + trace_mali_##code(gpu_addr, info_val) +#else /* KBASE_KTRACE_TARGET_FTRACE */ +#define KBASE_KTRACE_FTRACE_ADD_JM_SLOT(kbdev, code, kctx, katom, gpu_addr, \ + jobslot) \ + do {\ + CSTD_UNUSED(kbdev);\ + CSTD_NOP(code);\ + CSTD_UNUSED(kctx);\ + CSTD_UNUSED(katom);\ + CSTD_UNUSED(gpu_addr);\ + CSTD_UNUSED(jobslot);\ + CSTD_NOP(0);\ + } while (0) + +#define KBASE_KTRACE_FTRACE_ADD_JM_SLOT_INFO(kbdev, code, kctx, katom, \ + gpu_addr, jobslot, info_val) \ + do {\ + CSTD_UNUSED(kbdev);\ + CSTD_NOP(code);\ + CSTD_UNUSED(kctx);\ + CSTD_UNUSED(katom);\ + CSTD_UNUSED(gpu_addr);\ + CSTD_UNUSED(jobslot);\ + CSTD_UNUSED(info_val);\ + CSTD_NOP(0);\ + } while (0) + +#define KBASE_KTRACE_FTRACE_ADD_JM_REFCOUNT(kbdev, code, kctx, katom, \ + gpu_addr, refcount) \ + do {\ + CSTD_UNUSED(kbdev);\ + CSTD_NOP(code);\ + CSTD_UNUSED(kctx);\ + CSTD_UNUSED(katom);\ + CSTD_UNUSED(gpu_addr);\ + CSTD_UNUSED(refcount);\ + CSTD_NOP(0);\ + } while (0) + +#define KBASE_KTRACE_FTRACE_ADD_JM_REFCOUNT_INFO(kbdev, code, kctx, katom, \ + gpu_addr, refcount, info_val) \ + do {\ + CSTD_UNUSED(kbdev);\ + CSTD_NOP(code);\ + CSTD_UNUSED(kctx);\ + CSTD_UNUSED(katom);\ + CSTD_UNUSED(gpu_addr);\ + CSTD_UNUSED(info_val);\ + CSTD_NOP(0);\ + } while (0) + +#define KBASE_KTRACE_FTRACE_ADD_JM(kbdev, code, kctx, katom, gpu_addr, \ + info_val)\ + do {\ + CSTD_UNUSED(kbdev);\ + CSTD_NOP(code);\ + CSTD_UNUSED(kctx);\ + CSTD_UNUSED(katom);\ + CSTD_UNUSED(gpu_addr);\ + CSTD_UNUSED(info_val);\ + CSTD_NOP(0);\ + } while (0) +#endif /* KBASE_KTRACE_TARGET_FTRACE */ + +/* + * Master set of macros to route KTrace to any of the targets + */ + +/** + * KBASE_KTRACE_ADD_JM_SLOT - Add trace values about a job-slot + * @kbdev: kbase device + * @code: trace code + * @kctx: kbase context, or NULL if no context + * @katom: kbase atom, or NULL if no atom + * @gpu_addr: GPU address, usually related to @katom + * @jobslot: jobslot information to add to the trace + * + * Note: Any functions called through this macro will still be evaluated in + * Release builds (CONFIG_MALI_BIFROST_DEBUG not defined). Therefore, when + * KBASE_KTRACE_ENABLE == 0 any functions called to get the parameters supplied + * to this macro must: + * a) be static or static inline, and + * b) just return 0 and have no other statements present in the body. + */ +#define KBASE_KTRACE_ADD_JM_SLOT(kbdev, code, kctx, katom, gpu_addr, \ + jobslot) \ + do { \ + /* capture values that could come from non-pure function calls */ \ + u64 __gpu_addr = gpu_addr; \ + int __jobslot = jobslot; \ + KBASE_KTRACE_RBUF_ADD_JM_SLOT(kbdev, code, kctx, katom, __gpu_addr, __jobslot); \ + KBASE_KTRACE_FTRACE_ADD_JM_SLOT(kbdev, code, kctx, katom, __gpu_addr, __jobslot); \ + } while (0) + +/** + * KBASE_KTRACE_ADD_JM_SLOT_INFO - Add trace values about a job-slot, with info + * @kbdev: kbase device + * @code: trace code + * @kctx: kbase context, or NULL if no context + * @katom: kbase atom, or NULL if no atom + * @gpu_addr: GPU address, usually related to @katom + * @jobslot: jobslot information to add to the trace + * @info_val: generic information about @code to add to the trace + * + * Note: Any functions called through this macro will still be evaluated in + * Release builds (CONFIG_MALI_BIFROST_DEBUG not defined). Therefore, when + * KBASE_KTRACE_ENABLE == 0 any functions called to get the parameters supplied + * to this macro must: + * a) be static or static inline, and + * b) just return 0 and have no other statements present in the body. + */ +#define KBASE_KTRACE_ADD_JM_SLOT_INFO(kbdev, code, kctx, katom, gpu_addr, \ + jobslot, info_val) \ + do { \ + /* capture values that could come from non-pure function calls */ \ + u64 __gpu_addr = gpu_addr; \ + int __jobslot = jobslot; \ + u64 __info_val = info_val; \ + KBASE_KTRACE_RBUF_ADD_JM_SLOT_INFO(kbdev, code, kctx, katom, __gpu_addr, __jobslot, __info_val); \ + KBASE_KTRACE_FTRACE_ADD_JM_SLOT_INFO(kbdev, code, kctx, katom, __gpu_addr, __jobslot, __info_val); \ + } while (0) + +/** + * KBASE_KTRACE_ADD_JM_REFCOUNT - Add trace values about a kctx refcount + * @kbdev: kbase device + * @code: trace code + * @kctx: kbase context, or NULL if no context + * @katom: kbase atom, or NULL if no atom + * @gpu_addr: GPU address, usually related to @katom + * @refcount: reference count information to add to the trace + * + * Note: Any functions called through this macro will still be evaluated in + * Release builds (CONFIG_MALI_BIFROST_DEBUG not defined). Therefore, when + * KBASE_KTRACE_ENABLE == 0 any functions called to get the parameters supplied + * to this macro must: + * a) be static or static inline, and + * b) just return 0 and have no other statements present in the body. + */ +#define KBASE_KTRACE_ADD_JM_REFCOUNT(kbdev, code, kctx, katom, gpu_addr, \ + refcount) \ + do { \ + /* capture values that could come from non-pure function calls */ \ + u64 __gpu_addr = gpu_addr; \ + int __refcount = refcount; \ + KBASE_KTRACE_RBUF_ADD_JM_REFCOUNT(kbdev, code, kctx, katom, __gpu_addr, __refcount); \ + KBASE_KTRACE_FTRACE_ADD_JM_REFCOUNT(kbdev, code, kctx, katom, __gpu_addr, __refcount); \ + } while (0) + +/** + * KBASE_KTRACE_ADD_JM_REFCOUNT_INFO - Add trace values about a kctx refcount, + * and info + * @kbdev: kbase device + * @code: trace code + * @kctx: kbase context, or NULL if no context + * @katom: kbase atom, or NULL if no atom + * @gpu_addr: GPU address, usually related to @katom + * @refcount: reference count information to add to the trace + * @info_val: generic information about @code to add to the trace + * + * Note: Any functions called through this macro will still be evaluated in + * Release builds (CONFIG_MALI_BIFROST_DEBUG not defined). Therefore, when + * KBASE_KTRACE_ENABLE == 0 any functions called to get the parameters supplied + * to this macro must: + * a) be static or static inline, and + * b) just return 0 and have no other statements present in the body. + */ +#define KBASE_KTRACE_ADD_JM_REFCOUNT_INFO(kbdev, code, kctx, katom, \ + gpu_addr, refcount, info_val) \ + do { \ + /* capture values that could come from non-pure function calls */ \ + u64 __gpu_addr = gpu_addr; \ + int __refcount = refcount; \ + u64 __info_val = info_val; \ + KBASE_KTRACE_RBUF_ADD_JM_REFCOUNT(kbdev, code, kctx, katom, __gpu_addr, __refcount, __info_val); \ + KBASE_KTRACE_FTRACE_ADD_JM_REFCOUNT(kbdev, code, kctx, katom, __gpu_addr, __refcount, __info_val); \ + } while (0) + +/** + * KBASE_KTRACE_ADD_JM - Add trace values (no slot or refcount) + * @kbdev: kbase device + * @code: trace code + * @kctx: kbase context, or NULL if no context + * @katom: kbase atom, or NULL if no atom + * @gpu_addr: GPU address, usually related to @katom + * @info_val: generic information about @code to add to the trace + * + * Note: Any functions called through this macro will still be evaluated in + * Release builds (CONFIG_MALI_BIFROST_DEBUG not defined). Therefore, when + * KBASE_KTRACE_ENABLE == 0 any functions called to get the parameters supplied + * to this macro must: + * a) be static or static inline, and + * b) just return 0 and have no other statements present in the body. + */ +#define KBASE_KTRACE_ADD_JM(kbdev, code, kctx, katom, gpu_addr, info_val) \ + do { \ + /* capture values that could come from non-pure function calls */ \ + u64 __gpu_addr = gpu_addr; \ + u64 __info_val = info_val; \ + KBASE_KTRACE_RBUF_ADD_JM(kbdev, code, kctx, katom, __gpu_addr, __info_val); \ + KBASE_KTRACE_FTRACE_ADD_JM(kbdev, code, kctx, katom, __gpu_addr, __info_val); \ + } while (0) + +#endif /* _KBASE_DEBUG_KTRACE_JM_H_ */ diff --git a/drivers/gpu/arm/bifrost/debug/backend/mali_kbase_debug_linux_ktrace_jm.h b/drivers/gpu/arm/bifrost/debug/backend/mali_kbase_debug_linux_ktrace_jm.h new file mode 100644 index 000000000000..d964e5ade3b8 --- /dev/null +++ b/drivers/gpu/arm/bifrost/debug/backend/mali_kbase_debug_linux_ktrace_jm.h @@ -0,0 +1,150 @@ +/* + * + * (C) COPYRIGHT 2014,2018,2020 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +/* + * NOTE: This must **only** be included through mali_linux_trace.h, + * otherwise it will fail to setup tracepoints correctly + */ + +#if !defined(_KBASE_DEBUG_LINUX_KTRACE_JM_H_) || defined(TRACE_HEADER_MULTI_READ) +#define _KBASE_DEBUG_LINUX_KTRACE_JM_H_ + +DECLARE_EVENT_CLASS(mali_jm_slot_template, + TP_PROTO(int jobslot, u64 info_val), + TP_ARGS(jobslot, info_val), + TP_STRUCT__entry( + __field(unsigned int, jobslot) + __field(u64, info_val) + ), + TP_fast_assign( + __entry->jobslot = jobslot; + __entry->info_val = info_val; + ), + TP_printk("jobslot=%u info=0x%llx", __entry->jobslot, __entry->info_val) +); + +#define DEFINE_MALI_JM_SLOT_EVENT(name) \ +DEFINE_EVENT(mali_jm_slot_template, mali_##name, \ + TP_PROTO(int jobslot, u64 info_val), \ + TP_ARGS(jobslot, info_val)) +DEFINE_MALI_JM_SLOT_EVENT(JM_SUBMIT); +DEFINE_MALI_JM_SLOT_EVENT(JM_JOB_DONE); +DEFINE_MALI_JM_SLOT_EVENT(JM_UPDATE_HEAD); +DEFINE_MALI_JM_SLOT_EVENT(JM_CHECK_HEAD); +DEFINE_MALI_JM_SLOT_EVENT(JM_SOFTSTOP); +DEFINE_MALI_JM_SLOT_EVENT(JM_SOFTSTOP_0); +DEFINE_MALI_JM_SLOT_EVENT(JM_SOFTSTOP_1); +DEFINE_MALI_JM_SLOT_EVENT(JM_HARDSTOP); +DEFINE_MALI_JM_SLOT_EVENT(JM_HARDSTOP_0); +DEFINE_MALI_JM_SLOT_EVENT(JM_HARDSTOP_1); +DEFINE_MALI_JM_SLOT_EVENT(JM_SLOT_SOFT_OR_HARD_STOP); +DEFINE_MALI_JM_SLOT_EVENT(JM_SLOT_EVICT); +DEFINE_MALI_JM_SLOT_EVENT(JM_BEGIN_RESET_WORKER); +DEFINE_MALI_JM_SLOT_EVENT(JM_END_RESET_WORKER); +DEFINE_MALI_JM_SLOT_EVENT(JS_CORE_REF_REGISTER_ON_RECHECK_FAILED); +DEFINE_MALI_JM_SLOT_EVENT(JS_AFFINITY_SUBMIT_TO_BLOCKED); +DEFINE_MALI_JM_SLOT_EVENT(JS_AFFINITY_CURRENT); +DEFINE_MALI_JM_SLOT_EVENT(JD_DONE_TRY_RUN_NEXT_JOB); +DEFINE_MALI_JM_SLOT_EVENT(JS_CORE_REF_REQUEST_CORES_FAILED); +DEFINE_MALI_JM_SLOT_EVENT(JS_CORE_REF_REGISTER_INUSE_FAILED); +DEFINE_MALI_JM_SLOT_EVENT(JS_CORE_REF_REQUEST_ON_RECHECK_FAILED); +DEFINE_MALI_JM_SLOT_EVENT(JS_CORE_REF_AFFINITY_WOULD_VIOLATE); +DEFINE_MALI_JM_SLOT_EVENT(JS_JOB_DONE_TRY_RUN_NEXT_JOB); +DEFINE_MALI_JM_SLOT_EVENT(JS_JOB_DONE_RETRY_NEEDED); +DEFINE_MALI_JM_SLOT_EVENT(JS_POLICY_DEQUEUE_JOB); +DEFINE_MALI_JM_SLOT_EVENT(JS_POLICY_DEQUEUE_JOB_IRQ); +#undef DEFINE_MALI_JM_SLOT_EVENT + +DECLARE_EVENT_CLASS(mali_jm_refcount_template, + TP_PROTO(int refcount, u64 info_val), + TP_ARGS(refcount, info_val), + TP_STRUCT__entry( + __field(unsigned int, refcount) + __field(u64, info_val) + ), + TP_fast_assign( + __entry->refcount = refcount; + __entry->info_val = info_val; + ), + TP_printk("refcount=%u info=0x%llx", __entry->refcount, __entry->info_val) +); + +#define DEFINE_MALI_JM_REFCOUNT_EVENT(name) \ +DEFINE_EVENT(mali_jm_refcount_template, mali_##name, \ + TP_PROTO(int refcount, u64 info_val), \ + TP_ARGS(refcount, info_val)) +DEFINE_MALI_JM_REFCOUNT_EVENT(JS_ADD_JOB); +DEFINE_MALI_JM_REFCOUNT_EVENT(JS_REMOVE_JOB); +DEFINE_MALI_JM_REFCOUNT_EVENT(JS_TRY_SCHEDULE_HEAD_CTX); +DEFINE_MALI_JM_REFCOUNT_EVENT(JS_POLICY_INIT_CTX); +DEFINE_MALI_JM_REFCOUNT_EVENT(JS_POLICY_TERM_CTX); +DEFINE_MALI_JM_REFCOUNT_EVENT(JS_POLICY_ENQUEUE_CTX); +DEFINE_MALI_JM_REFCOUNT_EVENT(JS_POLICY_DEQUEUE_HEAD_CTX); +DEFINE_MALI_JM_REFCOUNT_EVENT(JS_POLICY_TRY_EVICT_CTX); +DEFINE_MALI_JM_REFCOUNT_EVENT(JS_POLICY_RUNPOOL_ADD_CTX); +DEFINE_MALI_JM_REFCOUNT_EVENT(JS_POLICY_RUNPOOL_REMOVE_CTX); +DEFINE_MALI_JM_REFCOUNT_EVENT(JS_POLICY_FOREACH_CTX_JOBS); +#undef DEFINE_MALI_JM_REFCOUNT_EVENT + +DECLARE_EVENT_CLASS(mali_jm_add_template, + TP_PROTO(u64 gpu_addr, u64 info_val), + TP_ARGS(gpu_addr, info_val), + TP_STRUCT__entry( + __field(u64, gpu_addr) + __field(u64, info_val) + ), + TP_fast_assign( + __entry->gpu_addr = gpu_addr; + __entry->info_val = info_val; + ), + TP_printk("gpu_addr=0x%llx info=0x%llx", __entry->gpu_addr, __entry->info_val) +); + +#define DEFINE_MALI_JM_ADD_EVENT(name) \ +DEFINE_EVENT(mali_jm_add_template, mali_##name, \ + TP_PROTO(u64 gpu_addr, u64 info_val), \ + TP_ARGS(gpu_addr, info_val)) +DEFINE_MALI_JM_ADD_EVENT(JD_DONE_WORKER); +DEFINE_MALI_JM_ADD_EVENT(JD_DONE_WORKER_END); +DEFINE_MALI_JM_ADD_EVENT(JD_CANCEL_WORKER); +DEFINE_MALI_JM_ADD_EVENT(JD_DONE); +DEFINE_MALI_JM_ADD_EVENT(JD_CANCEL); +DEFINE_MALI_JM_ADD_EVENT(JD_ZAP_CONTEXT); +DEFINE_MALI_JM_ADD_EVENT(JM_IRQ); +DEFINE_MALI_JM_ADD_EVENT(JM_IRQ_END); +DEFINE_MALI_JM_ADD_EVENT(JM_FLUSH_WORKQS); +DEFINE_MALI_JM_ADD_EVENT(JM_FLUSH_WORKQS_DONE); +DEFINE_MALI_JM_ADD_EVENT(JM_ZAP_NON_SCHEDULED); +DEFINE_MALI_JM_ADD_EVENT(JM_ZAP_SCHEDULED); +DEFINE_MALI_JM_ADD_EVENT(JM_ZAP_DONE); +DEFINE_MALI_JM_ADD_EVENT(JM_SUBMIT_AFTER_RESET); +DEFINE_MALI_JM_ADD_EVENT(JM_JOB_COMPLETE); +DEFINE_MALI_JM_ADD_EVENT(JS_CTX_ATTR_NOW_ON_RUNPOOL); +DEFINE_MALI_JM_ADD_EVENT(JS_CTX_ATTR_NOW_OFF_RUNPOOL); +DEFINE_MALI_JM_ADD_EVENT(JS_CTX_ATTR_NOW_ON_CTX); +DEFINE_MALI_JM_ADD_EVENT(JS_CTX_ATTR_NOW_OFF_CTX); +DEFINE_MALI_JM_ADD_EVENT(JS_POLICY_TIMER_END); +DEFINE_MALI_JM_ADD_EVENT(JS_POLICY_TIMER_START); +DEFINE_MALI_JM_ADD_EVENT(JS_POLICY_ENQUEUE_JOB); +#undef DEFINE_MALI_JM_ADD_EVENT + +#endif /* !defined(_KBASE_DEBUG_LINUX_KTRACE_JM_H_) || defined(TRACE_HEADER_MULTI_READ)*/ diff --git a/drivers/gpu/arm/bifrost/debug/mali_kbase_debug_ktrace.c b/drivers/gpu/arm/bifrost/debug/mali_kbase_debug_ktrace.c new file mode 100644 index 000000000000..6322abb11ac8 --- /dev/null +++ b/drivers/gpu/arm/bifrost/debug/mali_kbase_debug_ktrace.c @@ -0,0 +1,342 @@ +/* + * + * (C) COPYRIGHT 2020 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ +#include +#include "debug/mali_kbase_debug_ktrace_internal.h" + +int kbase_ktrace_init(struct kbase_device *kbdev) +{ +#if KBASE_KTRACE_TARGET_RBUF + struct kbase_ktrace_msg *rbuf; + + /* See also documentation of enum kbase_ktrace_code */ + compiletime_assert(sizeof(kbase_ktrace_code_t) == sizeof(unsigned long long) || + KBASE_KTRACE_CODE_COUNT <= (1ull << (sizeof(kbase_ktrace_code_t) * BITS_PER_BYTE)), + "kbase_ktrace_code_t not wide enough for KBASE_KTRACE_CODE_COUNT"); + + rbuf = kmalloc_array(KBASE_KTRACE_SIZE, sizeof(*rbuf), GFP_KERNEL); + + if (!rbuf) + return -EINVAL; + + kbdev->ktrace.rbuf = rbuf; + spin_lock_init(&kbdev->ktrace.lock); +#endif /* KBASE_KTRACE_TARGET_RBUF */ + return 0; +} + +void kbase_ktrace_term(struct kbase_device *kbdev) +{ +#if KBASE_KTRACE_TARGET_RBUF + kfree(kbdev->ktrace.rbuf); +#endif /* KBASE_KTRACE_TARGET_RBUF */ +} + +void kbase_ktrace_hook_wrapper(void *param) +{ + struct kbase_device *kbdev = (struct kbase_device *)param; + + KBASE_KTRACE_DUMP(kbdev); +} + +#if KBASE_KTRACE_TARGET_RBUF + +static const char * const kbasep_ktrace_code_string[] = { + /* + * IMPORTANT: USE OF SPECIAL #INCLUDE OF NON-STANDARD HEADER FILE + * THIS MUST BE USED AT THE START OF THE ARRAY + */ +#define KBASE_KTRACE_CODE_MAKE_CODE(X) # X +#include "debug/mali_kbase_debug_ktrace_codes.h" +#undef KBASE_KTRACE_CODE_MAKE_CODE +}; + +static void kbasep_ktrace_format_header(char *buffer, int sz, s32 written) +{ + written += MAX(snprintf(buffer + written, MAX(sz - written, 0), + "secs,thread_id,cpu,code,kctx,"), 0); + + kbasep_ktrace_backend_format_header(buffer, sz, &written); + + written += MAX(snprintf(buffer + written, MAX(sz - written, 0), + ",info_val,ktrace_version=%u.%u", + KBASE_KTRACE_VERSION_MAJOR, + KBASE_KTRACE_VERSION_MINOR), 0); + + buffer[sz - 1] = 0; +} + +static void kbasep_ktrace_format_msg(struct kbase_ktrace_msg *trace_msg, + char *buffer, int sz) +{ + s32 written = 0; + + /* Initial part of message: + * + * secs,thread_id,cpu,code,kctx, + */ + written += MAX(snprintf(buffer + written, MAX(sz - written, 0), + "%d.%.6d,%d,%d,%s,%p,", + (int)trace_msg->timestamp.tv_sec, + (int)(trace_msg->timestamp.tv_nsec / 1000), + trace_msg->thread_id, trace_msg->cpu, + kbasep_ktrace_code_string[trace_msg->backend.code], + trace_msg->kctx), 0); + + /* Backend parts */ + kbasep_ktrace_backend_format_msg(trace_msg, buffer, sz, + &written); + + /* Rest of message: + * + * ,info_val + * + * Note that the last column is empty, it's simply to hold the ktrace + * version in the header + */ + written += MAX(snprintf(buffer + written, MAX(sz - written, 0), + ",0x%.16llx", + (unsigned long long)trace_msg->info_val), 0); + buffer[sz - 1] = 0; +} + +static void kbasep_ktrace_dump_msg(struct kbase_device *kbdev, + struct kbase_ktrace_msg *trace_msg) +{ + char buffer[KTRACE_DUMP_MESSAGE_SIZE]; + + lockdep_assert_held(&kbdev->ktrace.lock); + + kbasep_ktrace_format_msg(trace_msg, buffer, sizeof(buffer)); + dev_dbg(kbdev->dev, "%s", buffer); +} + +struct kbase_ktrace_msg *kbasep_ktrace_reserve(struct kbase_ktrace *ktrace) +{ + struct kbase_ktrace_msg *trace_msg; + + lockdep_assert_held(&ktrace->lock); + + trace_msg = &ktrace->rbuf[ktrace->next_in]; + + /* Update the ringbuffer indices */ + ktrace->next_in = (ktrace->next_in + 1) & KBASE_KTRACE_MASK; + if (ktrace->next_in == ktrace->first_out) + ktrace->first_out = (ktrace->first_out + 1) & KBASE_KTRACE_MASK; + + return trace_msg; +} +void kbasep_ktrace_msg_init(struct kbase_ktrace *ktrace, + struct kbase_ktrace_msg *trace_msg, enum kbase_ktrace_code code, + struct kbase_context *kctx, kbase_ktrace_flag_t flags, + u64 info_val) +{ + lockdep_assert_held(&ktrace->lock); + + trace_msg->thread_id = task_pid_nr(current); + trace_msg->cpu = task_cpu(current); + + ktime_get_real_ts64(&trace_msg->timestamp); + + trace_msg->kctx = kctx; + + trace_msg->info_val = info_val; + trace_msg->backend.code = code; + trace_msg->backend.flags = flags; +} + +void kbasep_ktrace_add(struct kbase_device *kbdev, enum kbase_ktrace_code code, + struct kbase_context *kctx, kbase_ktrace_flag_t flags, + u64 info_val) +{ + unsigned long irqflags; + struct kbase_ktrace_msg *trace_msg; + + spin_lock_irqsave(&kbdev->ktrace.lock, irqflags); + + /* Reserve and update indices */ + trace_msg = kbasep_ktrace_reserve(&kbdev->ktrace); + + /* Fill the common part of the message (including backend.flags) */ + kbasep_ktrace_msg_init(&kbdev->ktrace, trace_msg, code, kctx, flags, + info_val); + + /* Done */ + spin_unlock_irqrestore(&kbdev->ktrace.lock, irqflags); +} + +static void kbasep_ktrace_clear_locked(struct kbase_device *kbdev) +{ + lockdep_assert_held(&kbdev->ktrace.lock); + kbdev->ktrace.first_out = kbdev->ktrace.next_in; +} +void kbasep_ktrace_clear(struct kbase_device *kbdev) +{ + unsigned long flags; + + spin_lock_irqsave(&kbdev->ktrace.lock, flags); + kbasep_ktrace_clear_locked(kbdev); + spin_unlock_irqrestore(&kbdev->ktrace.lock, flags); +} + +void kbasep_ktrace_dump(struct kbase_device *kbdev) +{ + unsigned long flags; + u32 start; + u32 end; + char buffer[KTRACE_DUMP_MESSAGE_SIZE] = "Dumping trace:\n"; + + kbasep_ktrace_format_header(buffer, sizeof(buffer), strlen(buffer)); + dev_dbg(kbdev->dev, "%s", buffer); + + spin_lock_irqsave(&kbdev->ktrace.lock, flags); + start = kbdev->ktrace.first_out; + end = kbdev->ktrace.next_in; + + while (start != end) { + struct kbase_ktrace_msg *trace_msg = &kbdev->ktrace.rbuf[start]; + + kbasep_ktrace_dump_msg(kbdev, trace_msg); + + start = (start + 1) & KBASE_KTRACE_MASK; + } + dev_dbg(kbdev->dev, "TRACE_END"); + + kbasep_ktrace_clear_locked(kbdev); + + spin_unlock_irqrestore(&kbdev->ktrace.lock, flags); +} + +#ifdef CONFIG_DEBUG_FS +struct trace_seq_state { + struct kbase_ktrace_msg trace_buf[KBASE_KTRACE_SIZE]; + u32 start; + u32 end; +}; + +static void *kbasep_ktrace_seq_start(struct seq_file *s, loff_t *pos) +{ + struct trace_seq_state *state = s->private; + int i; + + if (*pos == 0) + /* See Documentation/filesystems/seq_file.txt */ + return SEQ_START_TOKEN; + + if (*pos > KBASE_KTRACE_SIZE) + return NULL; + i = state->start + *pos; + if ((state->end >= state->start && i >= state->end) || + i >= state->end + KBASE_KTRACE_SIZE) + return NULL; + + i &= KBASE_KTRACE_MASK; + + return &state->trace_buf[i]; +} + +static void kbasep_ktrace_seq_stop(struct seq_file *s, void *data) +{ +} + +static void *kbasep_ktrace_seq_next(struct seq_file *s, void *data, loff_t *pos) +{ + struct trace_seq_state *state = s->private; + int i; + + if (data != SEQ_START_TOKEN) + (*pos)++; + + i = (state->start + *pos) & KBASE_KTRACE_MASK; + if (i == state->end) + return NULL; + + return &state->trace_buf[i]; +} + +static int kbasep_ktrace_seq_show(struct seq_file *s, void *data) +{ + struct kbase_ktrace_msg *trace_msg = data; + char buffer[KTRACE_DUMP_MESSAGE_SIZE]; + + /* If this is the start, print a header */ + if (data == SEQ_START_TOKEN) + kbasep_ktrace_format_header(buffer, sizeof(buffer), 0); + else + kbasep_ktrace_format_msg(trace_msg, buffer, sizeof(buffer)); + + seq_printf(s, "%s\n", buffer); + return 0; +} + +static const struct seq_operations kbasep_ktrace_seq_ops = { + .start = kbasep_ktrace_seq_start, + .next = kbasep_ktrace_seq_next, + .stop = kbasep_ktrace_seq_stop, + .show = kbasep_ktrace_seq_show, +}; + +static int kbasep_ktrace_debugfs_open(struct inode *inode, struct file *file) +{ + struct kbase_device *kbdev = inode->i_private; + unsigned long flags; + + struct trace_seq_state *state; + + state = __seq_open_private(file, &kbasep_ktrace_seq_ops, + sizeof(*state)); + if (!state) + return -ENOMEM; + + spin_lock_irqsave(&kbdev->ktrace.lock, flags); + state->start = kbdev->ktrace.first_out; + state->end = kbdev->ktrace.next_in; + memcpy(state->trace_buf, kbdev->ktrace.rbuf, sizeof(state->trace_buf)); + spin_unlock_irqrestore(&kbdev->ktrace.lock, flags); + + return 0; +} + +static const struct file_operations kbasep_ktrace_debugfs_fops = { + .owner = THIS_MODULE, + .open = kbasep_ktrace_debugfs_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release_private, +}; + +void kbase_ktrace_debugfs_init(struct kbase_device *kbdev) +{ + debugfs_create_file("mali_trace", 0444, + kbdev->mali_debugfs_directory, kbdev, + &kbasep_ktrace_debugfs_fops); +} +#endif /* CONFIG_DEBUG_FS */ + +#else /* KBASE_KTRACE_TARGET_RBUF */ + +#ifdef CONFIG_DEBUG_FS +void kbase_ktrace_debugfs_init(struct kbase_device *kbdev) +{ + CSTD_UNUSED(kbdev); +} +#endif /* CONFIG_DEBUG_FS */ +#endif /* KBASE_KTRACE_TARGET_RBUF */ diff --git a/drivers/gpu/arm/bifrost/debug/mali_kbase_debug_ktrace.h b/drivers/gpu/arm/bifrost/debug/mali_kbase_debug_ktrace.h new file mode 100644 index 000000000000..9b1905cd32b7 --- /dev/null +++ b/drivers/gpu/arm/bifrost/debug/mali_kbase_debug_ktrace.h @@ -0,0 +1,219 @@ +/* + * + * (C) COPYRIGHT 2020 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +/* + * DOC: Kbase's own trace, 'KTrace' + * + * Low overhead trace specific to kbase, aimed at: + * - common use-cases for tracing kbase specific functionality to do with + * running work on the GPU + * - easy 1-line addition of new types of trace + * + * KTrace can be recorded in one or more of the following targets: + * - KBASE_KTRACE_TARGET_RBUF: low overhead ringbuffer protected by an + * irq-spinlock, output available via dev_dbg() and debugfs file + * - KBASE_KTRACE_TARGET_FTRACE: ftrace based tracepoints under 'mali' events + */ + +#ifndef _KBASE_DEBUG_KTRACE_H_ +#define _KBASE_DEBUG_KTRACE_H_ + +#include "debug/backend/mali_kbase_debug_ktrace_jm.h" + +/** + * kbase_ktrace_init - initialize kbase ktrace. + * @kbdev: kbase device + */ +int kbase_ktrace_init(struct kbase_device *kbdev); + +/** + * kbase_ktrace_term - terminate kbase ktrace. + * @kbdev: kbase device + */ +void kbase_ktrace_term(struct kbase_device *kbdev); + +/** + * kbase_ktrace_hook_wrapper - wrapper so that dumping ktrace can be done via a + * callback. + * @param: kbase device, cast to void pointer + */ +void kbase_ktrace_hook_wrapper(void *param); + +#ifdef CONFIG_DEBUG_FS +/** + * kbase_ktrace_debugfs_init - initialize kbase ktrace for debugfs usage, if + * the selected targets support it. + * @kbdev: kbase device + * + * There is no matching 'term' call, debugfs_remove_recursive() is sufficient. + */ +void kbase_ktrace_debugfs_init(struct kbase_device *kbdev); +#endif /* CONFIG_DEBUG_FS */ + +/* + * KTrace target for internal ringbuffer + */ +#if KBASE_KTRACE_TARGET_RBUF +/** + * kbasep_ktrace_add - internal function to add trace to the ringbuffer. + * @kbdev: kbase device + * @code: ktrace code + * @kctx: kbase context, or NULL if no context + * @flags: flags about the message + * @info_val: generic information about @code to add to the trace + * + * PRIVATE: do not use directly. Use KBASE_KTRACE_ADD() instead. + */ +void kbasep_ktrace_add(struct kbase_device *kbdev, enum kbase_ktrace_code code, + struct kbase_context *kctx, kbase_ktrace_flag_t flags, + u64 info_val); + +/** + * kbasep_ktrace_clear - clear the trace ringbuffer + * @kbdev: kbase device + * + * PRIVATE: do not use directly. Use KBASE_KTRACE_CLEAR() instead. + */ +void kbasep_ktrace_clear(struct kbase_device *kbdev); + +/** + * kbasep_ktrace_dump - dump ktrace ringbuffer to dev_dbg(), then clear it + * @kbdev: kbase device + * + * PRIVATE: do not use directly. Use KBASE_KTRACE_DUMP() instead. + */ +void kbasep_ktrace_dump(struct kbase_device *kbdev); + +#define KBASE_KTRACE_RBUF_ADD(kbdev, code, kctx, info_val) \ + kbasep_ktrace_add(kbdev, KBASE_KTRACE_CODE(code), kctx, 0, \ + info_val) \ + +#define KBASE_KTRACE_RBUF_CLEAR(kbdev) \ + kbasep_ktrace_clear(kbdev) + +#define KBASE_KTRACE_RBUF_DUMP(kbdev) \ + kbasep_ktrace_dump(kbdev) + +#else /* KBASE_KTRACE_TARGET_RBUF */ + +#define KBASE_KTRACE_RBUF_ADD(kbdev, code, kctx, info_val) \ + do { \ + CSTD_UNUSED(kbdev); \ + CSTD_NOP(code); \ + CSTD_UNUSED(kctx); \ + CSTD_UNUSED(info_val); \ + CSTD_NOP(0); \ + } while (0) + +#define KBASE_KTRACE_RBUF_CLEAR(kbdev) \ + do { \ + CSTD_UNUSED(kbdev); \ + CSTD_NOP(0); \ + } while (0) +#define KBASE_KTRACE_RBUF_DUMP(kbdev) \ + do { \ + CSTD_UNUSED(kbdev); \ + CSTD_NOP(0); \ + } while (0) +#endif /* KBASE_KTRACE_TARGET_RBUF */ + +/* + * KTrace target for Linux's ftrace + */ +#if KBASE_KTRACE_TARGET_FTRACE +#include "mali_linux_trace.h" + +#define KBASE_KTRACE_FTRACE_ADD(kbdev, code, kctx, info_val) \ + trace_mali_##code(info_val) + +#else /* KBASE_KTRACE_TARGET_FTRACE */ +#define KBASE_KTRACE_FTRACE_ADD(kbdev, code, kctx, info_val) \ + do { \ + CSTD_UNUSED(kbdev); \ + CSTD_NOP(code); \ + CSTD_UNUSED(kctx); \ + CSTD_UNUSED(info_val); \ + CSTD_NOP(0); \ + } while (0) +#endif /* KBASE_KTRACE_TARGET_FTRACE */ + +/* No 'clear' implementation for ftrace yet */ +#define KBASE_KTRACE_FTRACE_CLEAR(kbdev) \ + do { \ + CSTD_UNUSED(kbdev); \ + CSTD_NOP(0); \ + } while (0) + +/* No 'dump' implementation for ftrace yet */ +#define KBASE_KTRACE_FTRACE_DUMP(kbdev) \ + do { \ + CSTD_UNUSED(kbdev); \ + CSTD_NOP(0); \ + } while (0) + +/* + * Master set of macros to route KTrace to any of the targets + */ + +/** + * KBASE_KTRACE_ADD - Add trace values + * @kbdev: kbase device + * @code: trace code + * @kctx: kbase context, or NULL if no context + * @info_val: generic information about @code to add to the trace + * + * Note: Any functions called through this macro will still be evaluated in + * Release builds (CONFIG_MALI_BIFROST_DEBUG not defined). Therefore, when + * KBASE_KTRACE_ENABLE == 0 any functions called to get the parameters supplied + * to this macro must: + * a) be static or static inline, and + * b) just return 0 and have no other statements present in the body. + */ +#define KBASE_KTRACE_ADD(kbdev, code, kctx, info_val) \ + do { \ + /* capture values that could come from non-pure function calls */ \ + u64 __info_val = info_val; \ + KBASE_KTRACE_RBUF_ADD(kbdev, code, kctx, __info_val); \ + KBASE_KTRACE_FTRACE_ADD(kbdev, code, kctx, __info_val); \ + } while (0) + +/** + * KBASE_KTRACE_CLEAR - Clear the trace, if applicable to the target(s) + * @kbdev: kbase device + */ +#define KBASE_KTRACE_CLEAR(kbdev) \ + do { \ + KBASE_KTRACE_RBUF_CLEAR(kbdev); \ + KBASE_KTRACE_FTRACE_CLEAR(kbdev); \ + } while (0) + +/** + * KBASE_KTRACE_DUMP - Dump the trace, if applicable to the target(s) + * @kbdev: kbase device + */ +#define KBASE_KTRACE_DUMP(kbdev) \ + do { \ + KBASE_KTRACE_RBUF_DUMP(kbdev); \ + KBASE_KTRACE_FTRACE_DUMP(kbdev); \ + } while (0) + +#endif /* _KBASE_DEBUG_KTRACE_H_ */ diff --git a/drivers/gpu/arm/bifrost/debug/mali_kbase_debug_ktrace_codes.h b/drivers/gpu/arm/bifrost/debug/mali_kbase_debug_ktrace_codes.h new file mode 100644 index 000000000000..364ed6091e6e --- /dev/null +++ b/drivers/gpu/arm/bifrost/debug/mali_kbase_debug_ktrace_codes.h @@ -0,0 +1,158 @@ +/* + * + * (C) COPYRIGHT 2011-2015,2018-2020 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +/* + * ***** IMPORTANT: THIS IS NOT A NORMAL HEADER FILE ***** + * ***** DO NOT INCLUDE DIRECTLY ***** + * ***** THE LACK OF HEADER GUARDS IS INTENTIONAL ***** + */ + +/* + * The purpose of this header file is just to contain a list of trace code + * identifiers + * + * Each identifier is wrapped in a macro, so that its string form and enum form + * can be created + * + * Each macro is separated with a comma, to allow insertion into an array + * initializer or enum definition block. + * + * This allows automatic creation of an enum and a corresponding array of + * strings + * + * Before #including, the includer MUST #define KBASE_KTRACE_CODE_MAKE_CODE. + * After #including, the includer MUST #under KBASE_KTRACE_CODE_MAKE_CODE. + * + * e.g.: + * #define KBASE_KTRACE_CODE( X ) KBASE_KTRACE_CODE_ ## X + * typedef enum + * { + * #define KBASE_KTRACE_CODE_MAKE_CODE( X ) KBASE_KTRACE_CODE( X ) + * #include "mali_kbase_debug_ktrace_codes.h" + * #undef KBASE_KTRACE_CODE_MAKE_CODE + * } kbase_ktrace_code; + * + * IMPORTANT: THIS FILE MUST NOT BE USED FOR ANY OTHER PURPOSE OTHER THAN THE ABOVE + * + * + * The use of the macro here is: + * - KBASE_KTRACE_CODE_MAKE_CODE( X ) + * + * Which produces: + * - For an enum, KBASE_KTRACE_CODE_X + * - For a string, "X" + * + * + * For example: + * - KBASE_KTRACE_CODE_MAKE_CODE( JM_JOB_COMPLETE ) expands to: + * - KBASE_KTRACE_CODE_JM_JOB_COMPLETE for the enum + * - "JM_JOB_COMPLETE" for the string + * - To use it to trace an event, do: + * - KBASE_KTRACE_ADD( kbdev, JM_JOB_COMPLETE, subcode, kctx, uatom, val ); + */ + +#if 0 /* Dummy section to avoid breaking formatting */ +int dummy_array[] = { +#endif + + /* + * Core events + */ + /* no info_val */ + KBASE_KTRACE_CODE_MAKE_CODE(CORE_CTX_DESTROY), + /* no info_val */ + KBASE_KTRACE_CODE_MAKE_CODE(CORE_CTX_HWINSTR_TERM), + /* info_val == GPU_IRQ_STATUS register */ + KBASE_KTRACE_CODE_MAKE_CODE(CORE_GPU_IRQ), + /* info_val == bits cleared */ + KBASE_KTRACE_CODE_MAKE_CODE(CORE_GPU_IRQ_CLEAR), + /* info_val == GPU_IRQ_STATUS register */ + KBASE_KTRACE_CODE_MAKE_CODE(CORE_GPU_IRQ_DONE), + KBASE_KTRACE_CODE_MAKE_CODE(CORE_GPU_SOFT_RESET), + KBASE_KTRACE_CODE_MAKE_CODE(CORE_GPU_HARD_RESET), + KBASE_KTRACE_CODE_MAKE_CODE(CORE_GPU_PRFCNT_CLEAR), + /* info_val == dump address */ + KBASE_KTRACE_CODE_MAKE_CODE(CORE_GPU_PRFCNT_SAMPLE), + KBASE_KTRACE_CODE_MAKE_CODE(CORE_GPU_CLEAN_INV_CACHES), + + /* + * Power Management Events + */ + KBASE_KTRACE_CODE_MAKE_CODE(PM_JOB_SUBMIT_AFTER_POWERING_UP), + KBASE_KTRACE_CODE_MAKE_CODE(PM_JOB_SUBMIT_AFTER_POWERED_UP), + KBASE_KTRACE_CODE_MAKE_CODE(PM_PWRON), + KBASE_KTRACE_CODE_MAKE_CODE(PM_PWRON_TILER), + KBASE_KTRACE_CODE_MAKE_CODE(PM_PWRON_L2), + KBASE_KTRACE_CODE_MAKE_CODE(PM_PWROFF), + KBASE_KTRACE_CODE_MAKE_CODE(PM_PWROFF_TILER), + KBASE_KTRACE_CODE_MAKE_CODE(PM_PWROFF_L2), + KBASE_KTRACE_CODE_MAKE_CODE(PM_CORES_POWERED), + KBASE_KTRACE_CODE_MAKE_CODE(PM_CORES_POWERED_TILER), + KBASE_KTRACE_CODE_MAKE_CODE(PM_CORES_POWERED_L2), + KBASE_KTRACE_CODE_MAKE_CODE(PM_CORES_CHANGE_DESIRED), + KBASE_KTRACE_CODE_MAKE_CODE(PM_CORES_CHANGE_DESIRED_TILER), + KBASE_KTRACE_CODE_MAKE_CODE(PM_CORES_CHANGE_AVAILABLE), + KBASE_KTRACE_CODE_MAKE_CODE(PM_CORES_CHANGE_AVAILABLE_TILER), + KBASE_KTRACE_CODE_MAKE_CODE(PM_CORES_AVAILABLE), + KBASE_KTRACE_CODE_MAKE_CODE(PM_CORES_AVAILABLE_TILER), + KBASE_KTRACE_CODE_MAKE_CODE(PM_DESIRED_REACHED), + KBASE_KTRACE_CODE_MAKE_CODE(PM_DESIRED_REACHED_TILER), + KBASE_KTRACE_CODE_MAKE_CODE(PM_RELEASE_CHANGE_SHADER_NEEDED), + KBASE_KTRACE_CODE_MAKE_CODE(PM_RELEASE_CHANGE_TILER_NEEDED), + KBASE_KTRACE_CODE_MAKE_CODE(PM_REQUEST_CHANGE_SHADER_NEEDED), + KBASE_KTRACE_CODE_MAKE_CODE(PM_REQUEST_CHANGE_TILER_NEEDED), + KBASE_KTRACE_CODE_MAKE_CODE(PM_WAKE_WAITERS), + /* info_val == kbdev->pm.active_count*/ + KBASE_KTRACE_CODE_MAKE_CODE(PM_CONTEXT_ACTIVE), + /* info_val == kbdev->pm.active_count*/ + KBASE_KTRACE_CODE_MAKE_CODE(PM_CONTEXT_IDLE), + KBASE_KTRACE_CODE_MAKE_CODE(PM_GPU_ON), + KBASE_KTRACE_CODE_MAKE_CODE(PM_GPU_OFF), + /* info_val == policy number, or -1 for "Already changing" */ + KBASE_KTRACE_CODE_MAKE_CODE(PM_SET_POLICY), + KBASE_KTRACE_CODE_MAKE_CODE(PM_CA_SET_POLICY), + /* info_val == policy number */ + KBASE_KTRACE_CODE_MAKE_CODE(PM_CURRENT_POLICY_INIT), + /* info_val == policy number */ + KBASE_KTRACE_CODE_MAKE_CODE(PM_CURRENT_POLICY_TERM), + + /* + * Context Scheduler events + */ + /* info_val == kctx->refcount */ + KBASE_KTRACE_CODE_MAKE_CODE(SCHED_RETAIN_CTX_NOLOCK), + /* info_val == kctx->refcount */ + KBASE_KTRACE_CODE_MAKE_CODE(SCHED_RELEASE_CTX), + + +#include "debug/backend/mali_kbase_debug_ktrace_codes_jm.h" + /* + * Unused code just to make it easier to not have a comma at the end. + * All other codes MUST come before this + */ + KBASE_KTRACE_CODE_MAKE_CODE(DUMMY) + +#if 0 /* Dummy section to avoid breaking formatting */ +}; +#endif + +/* ***** THE LACK OF HEADER GUARDS IS INTENTIONAL ***** */ diff --git a/drivers/gpu/arm/bifrost/debug/mali_kbase_debug_ktrace_defs.h b/drivers/gpu/arm/bifrost/debug/mali_kbase_debug_ktrace_defs.h new file mode 100644 index 000000000000..eda31c7afbda --- /dev/null +++ b/drivers/gpu/arm/bifrost/debug/mali_kbase_debug_ktrace_defs.h @@ -0,0 +1,152 @@ +/* + * + * (C) COPYRIGHT 2020 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +#ifndef _KBASE_DEBUG_KTRACE_DEFS_H_ +#define _KBASE_DEBUG_KTRACE_DEFS_H_ + +/* Enable SW tracing when set */ +#if defined(CONFIG_MALI_BIFROST_ENABLE_TRACE) || defined(CONFIG_MALI_BIFROST_SYSTEM_TRACE) +#define KBASE_KTRACE_ENABLE 1 +#endif + +#ifndef KBASE_KTRACE_ENABLE +#ifdef CONFIG_MALI_BIFROST_DEBUG +#define KBASE_KTRACE_ENABLE 1 +#else /* CONFIG_MALI_BIFROST_DEBUG */ +#define KBASE_KTRACE_ENABLE 0 +#endif /* CONFIG_MALI_BIFROST_DEBUG */ +#endif /* KBASE_KTRACE_ENABLE */ + +/* Select targets for recording of trace: + * + */ +#if KBASE_KTRACE_ENABLE + +#ifdef CONFIG_MALI_BIFROST_SYSTEM_TRACE +#define KBASE_KTRACE_TARGET_FTRACE 1 +#else /* CONFIG_MALI_BIFROST_SYSTEM_TRACE */ +#define KBASE_KTRACE_TARGET_FTRACE 0 +#endif /* CONFIG_MALI_BIFROST_SYSTEM_TRACE */ + +#ifdef CONFIG_MALI_BIFROST_ENABLE_TRACE +#define KBASE_KTRACE_TARGET_RBUF 1 +#else /* CONFIG_MALI_BIFROST_ENABLE_TRACE*/ +#define KBASE_KTRACE_TARGET_RBUF 0 +#endif /* CONFIG_MALI_BIFROST_ENABLE_TRACE */ + +#else /* KBASE_KTRACE_ENABLE */ +#define KBASE_KTRACE_TARGET_FTRACE 0 +#define KBASE_KTRACE_TARGET_RBUF 0 +#endif /* KBASE_KTRACE_ENABLE */ + +/* + * NOTE: KBASE_KTRACE_VERSION_MAJOR, KBASE_KTRACE_VERSION_MINOR are kept in + * the backend, since updates can be made to one backend in a way that doesn't + * affect the other. + * + * However, modifying the common part could require both backend versions to be + * updated. + */ + +#if KBASE_KTRACE_TARGET_RBUF +typedef u8 kbase_ktrace_flag_t; +typedef u8 kbase_ktrace_code_t; + +/* + * struct kbase_ktrace_backend - backend specific part of a trace message + * + * At the very least, this must contain a kbase_ktrace_code_t 'code' member and + * a kbase_ktrace_flag_t 'flags' member + */ +struct kbase_ktrace_backend; + +#include "debug/backend/mali_kbase_debug_ktrace_defs_jm.h" + +/* Indicates if the trace message has backend related info. + * + * If not set, consider the &kbase_ktrace_backend part of a &kbase_ktrace_msg + * as uninitialized, apart from the mandatory parts: + * - code + * - flags + */ +#define KBASE_KTRACE_FLAG_BACKEND (((kbase_ktrace_flag_t)1) << 7) + +#define KBASE_KTRACE_SHIFT 8 /* 256 entries */ +#define KBASE_KTRACE_SIZE (1 << KBASE_KTRACE_SHIFT) +#define KBASE_KTRACE_MASK ((1 << KBASE_KTRACE_SHIFT)-1) + +#define KBASE_KTRACE_CODE(X) KBASE_KTRACE_CODE_ ## X + +/* Note: compiletime_assert() about this against kbase_ktrace_code_t is in + * kbase_ktrace_init() + */ +enum kbase_ktrace_code { + /* + * IMPORTANT: USE OF SPECIAL #INCLUDE OF NON-STANDARD HEADER FILE + * THIS MUST BE USED AT THE START OF THE ENUM + */ +#define KBASE_KTRACE_CODE_MAKE_CODE(X) KBASE_KTRACE_CODE(X) +#include +#undef KBASE_KTRACE_CODE_MAKE_CODE + /* Comma on its own, to extend the list */ + , + /* Must be the last in the enum */ + KBASE_KTRACE_CODE_COUNT +}; + +/** + * struct kbase_ktrace - object representing a trace message added to trace + * buffer trace_rbuf in &kbase_device + * @timestamp: CPU timestamp at which the trace message was added. + * @thread_id: id of the thread in the context of which trace message was + * added. + * @cpu: indicates which CPU the @thread_id was scheduled on when the + * trace message was added. + * @kctx: Pointer to the kbase context for which the trace message was + * added. Will be NULL for certain trace messages associated with + * the &kbase_device itself, such as power management events. + * Will point to the appropriate context corresponding to + * backend-specific events. + * @info_val: value specific to the type of event being traced. Refer to the + * specific code in enum kbase_ktrace_code + * @backend: backend-specific trace information. All backends must implement + * a minimum common set of members + */ +struct kbase_ktrace_msg { + struct timespec64 timestamp; + u32 thread_id; + u32 cpu; + void *kctx; + u64 info_val; + + struct kbase_ktrace_backend backend; +}; + +struct kbase_ktrace { + spinlock_t lock; + u16 first_out; + u16 next_in; + struct kbase_ktrace_msg *rbuf; +}; + +#endif /* KBASE_KTRACE_TARGET_RBUF */ +#endif /* _KBASE_DEBUG_KTRACE_DEFS_H_ */ diff --git a/drivers/gpu/arm/bifrost/debug/mali_kbase_debug_ktrace_internal.h b/drivers/gpu/arm/bifrost/debug/mali_kbase_debug_ktrace_internal.h new file mode 100644 index 000000000000..e450760e3426 --- /dev/null +++ b/drivers/gpu/arm/bifrost/debug/mali_kbase_debug_ktrace_internal.h @@ -0,0 +1,89 @@ +/* + * + * (C) COPYRIGHT 2020 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +#ifndef _KBASE_DEBUG_KTRACE_INTERNAL_H_ +#define _KBASE_DEBUG_KTRACE_INTERNAL_H_ + +#if KBASE_KTRACE_TARGET_RBUF + +#define KTRACE_DUMP_MESSAGE_SIZE 256 + +/** + * kbasep_ktrace_backend_format_header - format the backend part of the header + * @buffer: buffer to write to + * @sz: size of @buffer in bytes + * @written: pointer to storage for updating bytes written so far to @buffer + * + * The backend must format only the non-common backend specific parts of the + * header. It must format them as though they were standalone. The caller will + * handle adding any delimiters around this. + */ +void kbasep_ktrace_backend_format_header(char *buffer, int sz, s32 *written); + +/** + * kbasep_ktrace_backend_format_msg - format the backend part of the message + * @trace_msg: ktrace message + * @buffer: buffer to write to + * @sz: size of @buffer in bytes + * @written: pointer to storage for updating bytes written so far to @buffer + * + * The backend must format only the non-common backend specific parts of the + * message. It must format them as though they were standalone. The caller will + * handle adding any delimiters around this. + * + * A caller may have the flags member of @trace_msg with + * %KBASE_KTRACE_FLAG_BACKEND clear. The backend must handle that setting + * appropriately. + */ +void kbasep_ktrace_backend_format_msg(struct kbase_ktrace_msg *trace_msg, + char *buffer, int sz, s32 *written); + + +/** + * kbasep_ktrace_reserve - internal function to reserve space for a ktrace + * message + * @ktrace: kbase device's ktrace + * + * This may also empty the oldest entry in the ringbuffer to make space. + */ +struct kbase_ktrace_msg *kbasep_ktrace_reserve(struct kbase_ktrace *ktrace); + +/** + * kbasep_ktrace_msg_init - internal function to initialize just the common + * part of a ktrace message + * @ktrace: kbase device's ktrace + * @trace_msg: ktrace message to initialize + * @code: ktrace code + * @kctx: kbase context, or NULL if no context + * @flags: flags about the message + * @info_val: generic information about @code to add to the trace + * + * The common part includes the mandatory parts of the backend part + */ +void kbasep_ktrace_msg_init(struct kbase_ktrace *ktrace, + struct kbase_ktrace_msg *trace_msg, enum kbase_ktrace_code code, + struct kbase_context *kctx, kbase_ktrace_flag_t flags, + u64 info_val); + +#endif /* KBASE_KTRACE_TARGET_RBUF */ + +#endif /* _KBASE_DEBUG_KTRACE_INTERNAL_H_ */ diff --git a/drivers/gpu/arm/bifrost/debug/mali_kbase_debug_linux_ktrace.h b/drivers/gpu/arm/bifrost/debug/mali_kbase_debug_linux_ktrace.h new file mode 100644 index 000000000000..18e4f7c4f069 --- /dev/null +++ b/drivers/gpu/arm/bifrost/debug/mali_kbase_debug_linux_ktrace.h @@ -0,0 +1,99 @@ +/* + * + * (C) COPYRIGHT 2014,2018,2020 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +/* + * NOTE: This must **only** be included through mali_linux_trace.h, + * otherwise it will fail to setup tracepoints correctly + */ + +#if !defined(_KBASE_DEBUG_LINUX_KTRACE_H_) || defined(TRACE_HEADER_MULTI_READ) +#define _KBASE_DEBUG_LINUX_KTRACE_H_ + +#if KBASE_KTRACE_TARGET_FTRACE + +DECLARE_EVENT_CLASS(mali_add_template, + TP_PROTO(u64 info_val), + TP_ARGS(info_val), + TP_STRUCT__entry( + __field(u64, info_val) + ), + TP_fast_assign( + __entry->info_val = info_val; + ), + TP_printk("info=0x%llx", __entry->info_val) +); + +#define DEFINE_MALI_ADD_EVENT(name) \ +DEFINE_EVENT(mali_add_template, mali_##name, \ + TP_PROTO(u64 info_val), \ + TP_ARGS(info_val)) +DEFINE_MALI_ADD_EVENT(CORE_CTX_DESTROY); +DEFINE_MALI_ADD_EVENT(CORE_CTX_HWINSTR_TERM); +DEFINE_MALI_ADD_EVENT(CORE_GPU_IRQ); +DEFINE_MALI_ADD_EVENT(CORE_GPU_IRQ_CLEAR); +DEFINE_MALI_ADD_EVENT(CORE_GPU_IRQ_DONE); +DEFINE_MALI_ADD_EVENT(CORE_GPU_SOFT_RESET); +DEFINE_MALI_ADD_EVENT(CORE_GPU_HARD_RESET); +DEFINE_MALI_ADD_EVENT(CORE_GPU_PRFCNT_SAMPLE); +DEFINE_MALI_ADD_EVENT(CORE_GPU_PRFCNT_CLEAR); +DEFINE_MALI_ADD_EVENT(CORE_GPU_CLEAN_INV_CACHES); +DEFINE_MALI_ADD_EVENT(PM_CORES_CHANGE_DESIRED); +DEFINE_MALI_ADD_EVENT(PM_JOB_SUBMIT_AFTER_POWERING_UP); +DEFINE_MALI_ADD_EVENT(PM_JOB_SUBMIT_AFTER_POWERED_UP); +DEFINE_MALI_ADD_EVENT(PM_PWRON); +DEFINE_MALI_ADD_EVENT(PM_PWRON_TILER); +DEFINE_MALI_ADD_EVENT(PM_PWRON_L2); +DEFINE_MALI_ADD_EVENT(PM_PWROFF); +DEFINE_MALI_ADD_EVENT(PM_PWROFF_TILER); +DEFINE_MALI_ADD_EVENT(PM_PWROFF_L2); +DEFINE_MALI_ADD_EVENT(PM_CORES_POWERED); +DEFINE_MALI_ADD_EVENT(PM_CORES_POWERED_TILER); +DEFINE_MALI_ADD_EVENT(PM_CORES_POWERED_L2); +DEFINE_MALI_ADD_EVENT(PM_DESIRED_REACHED); +DEFINE_MALI_ADD_EVENT(PM_DESIRED_REACHED_TILER); +DEFINE_MALI_ADD_EVENT(PM_REQUEST_CHANGE_SHADER_NEEDED); +DEFINE_MALI_ADD_EVENT(PM_REQUEST_CHANGE_TILER_NEEDED); +DEFINE_MALI_ADD_EVENT(PM_RELEASE_CHANGE_SHADER_NEEDED); +DEFINE_MALI_ADD_EVENT(PM_RELEASE_CHANGE_TILER_NEEDED); +DEFINE_MALI_ADD_EVENT(PM_CORES_AVAILABLE); +DEFINE_MALI_ADD_EVENT(PM_CORES_AVAILABLE_TILER); +DEFINE_MALI_ADD_EVENT(PM_CORES_CHANGE_AVAILABLE); +DEFINE_MALI_ADD_EVENT(PM_CORES_CHANGE_AVAILABLE_TILER); +DEFINE_MALI_ADD_EVENT(PM_GPU_ON); +DEFINE_MALI_ADD_EVENT(PM_GPU_OFF); +DEFINE_MALI_ADD_EVENT(PM_SET_POLICY); +DEFINE_MALI_ADD_EVENT(PM_CURRENT_POLICY_INIT); +DEFINE_MALI_ADD_EVENT(PM_CURRENT_POLICY_TERM); +DEFINE_MALI_ADD_EVENT(PM_CA_SET_POLICY); +DEFINE_MALI_ADD_EVENT(PM_CONTEXT_ACTIVE); +DEFINE_MALI_ADD_EVENT(PM_CONTEXT_IDLE); +DEFINE_MALI_ADD_EVENT(PM_WAKE_WAITERS); +DEFINE_MALI_ADD_EVENT(SCHED_RETAIN_CTX_NOLOCK); +DEFINE_MALI_ADD_EVENT(SCHED_RELEASE_CTX); + +#undef DEFINE_MALI_ADD_EVENT + +#include "mali_kbase_debug_linux_ktrace_jm.h" + +#endif /* KBASE_KTRACE_TARGET_FTRACE */ + +#endif /* !defined(_KBASE_DEBUG_LINUX_KTRACE_H_) || defined(TRACE_HEADER_MULTI_READ) */ diff --git a/drivers/gpu/arm/bifrost/device/backend/mali_kbase_device_jm.c b/drivers/gpu/arm/bifrost/device/backend/mali_kbase_device_jm.c new file mode 100644 index 000000000000..45cbab74b00f --- /dev/null +++ b/drivers/gpu/arm/bifrost/device/backend/mali_kbase_device_jm.c @@ -0,0 +1,260 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * + * (C) COPYRIGHT 2019-2020 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +#include "../mali_kbase_device_internal.h" +#include "../mali_kbase_device.h" + +#include +#include +#include +#include + +#ifdef CONFIG_MALI_BIFROST_NO_MALI +#include +#endif + +#ifdef CONFIG_MALI_ARBITER_SUPPORT +#include +#endif + +#include +#include +#include +#include +#include +#include + +/** + * kbase_backend_late_init - Perform any backend-specific initialization. + * @kbdev: Device pointer + * + * Return: 0 on success, or an error code on failure. + */ +static int kbase_backend_late_init(struct kbase_device *kbdev) +{ + int err; + + err = kbase_hwaccess_pm_init(kbdev); + if (err) + return err; + + err = kbase_reset_gpu_init(kbdev); + if (err) + goto fail_reset_gpu_init; + + err = kbase_hwaccess_pm_powerup(kbdev, PM_HW_ISSUES_DETECT); + if (err) + goto fail_pm_powerup; + + err = kbase_backend_timer_init(kbdev); + if (err) + goto fail_timer; + +#ifdef CONFIG_MALI_BIFROST_DEBUG +#ifndef CONFIG_MALI_BIFROST_NO_MALI + if (kbasep_common_test_interrupt_handlers(kbdev) != 0) { + dev_err(kbdev->dev, "Interrupt assignment check failed.\n"); + err = -EINVAL; + goto fail_interrupt_test; + } +#endif /* !CONFIG_MALI_BIFROST_NO_MALI */ +#endif /* CONFIG_MALI_BIFROST_DEBUG */ + + err = kbase_job_slot_init(kbdev); + if (err) + goto fail_job_slot; + + /* Do the initialisation of devfreq. + * Devfreq needs backend_timer_init() for completion of its + * initialisation and it also needs to catch the first callback + * occurrence of the runtime_suspend event for maintaining state + * coherence with the backend power management, hence needs to be + * placed before the kbase_pm_context_idle(). + */ + err = kbase_backend_devfreq_init(kbdev); + if (err) + goto fail_devfreq_init; + + /* Idle the GPU and/or cores, if the policy wants it to */ + kbase_pm_context_idle(kbdev); + + /* Update gpuprops with L2_FEATURES if applicable */ + err = kbase_gpuprops_update_l2_features(kbdev); + if (err) + goto fail_update_l2_features; + + init_waitqueue_head(&kbdev->hwaccess.backend.reset_wait); + + return 0; + +fail_update_l2_features: +fail_devfreq_init: + kbase_job_slot_term(kbdev); +fail_job_slot: + +#ifdef CONFIG_MALI_BIFROST_DEBUG +#ifndef CONFIG_MALI_BIFROST_NO_MALI +fail_interrupt_test: +#endif /* !CONFIG_MALI_BIFROST_NO_MALI */ +#endif /* CONFIG_MALI_BIFROST_DEBUG */ + + kbase_backend_timer_term(kbdev); +fail_timer: + kbase_hwaccess_pm_halt(kbdev); +fail_pm_powerup: + kbase_reset_gpu_term(kbdev); +fail_reset_gpu_init: + kbase_hwaccess_pm_term(kbdev); + + return err; +} + +/** + * kbase_backend_late_term - Perform any backend-specific termination. + * @kbdev: Device pointer + */ +static void kbase_backend_late_term(struct kbase_device *kbdev) +{ + kbase_backend_devfreq_term(kbdev); + kbase_job_slot_halt(kbdev); + kbase_job_slot_term(kbdev); + kbase_backend_timer_term(kbdev); + kbase_hwaccess_pm_halt(kbdev); + kbase_reset_gpu_term(kbdev); + kbase_hwaccess_pm_term(kbdev); +} + +static const struct kbase_device_init dev_init[] = { +#ifdef CONFIG_MALI_BIFROST_NO_MALI + {kbase_gpu_device_create, kbase_gpu_device_destroy, + "Dummy model initialization failed"}, +#else + {assign_irqs, NULL, + "IRQ search failed"}, + {registers_map, registers_unmap, + "Register map failed"}, +#endif + {kbase_device_io_history_init, kbase_device_io_history_term, + "Register access history initialization failed"}, + {kbase_device_pm_init, kbase_device_pm_term, + "Power management initialization failed"}, + {kbase_device_early_init, kbase_device_early_term, + "Early device initialization failed"}, + {kbase_device_populate_max_freq, NULL, + "Populating max frequency failed"}, + {kbase_device_misc_init, kbase_device_misc_term, + "Miscellaneous device initialization failed"}, + {kbase_ctx_sched_init, kbase_ctx_sched_term, + "Context scheduler initialization failed"}, + {kbase_mem_init, kbase_mem_term, + "Memory subsystem initialization failed"}, + {kbase_device_coherency_init, NULL, + "Device coherency init failed"}, + {kbase_protected_mode_init, kbase_protected_mode_term, + "Protected mode subsystem initialization failed"}, + {kbase_device_list_init, kbase_device_list_term, + "Device list setup failed"}, + {kbasep_js_devdata_init, kbasep_js_devdata_term, + "Job JS devdata initialization failed"}, + {kbase_device_timeline_init, kbase_device_timeline_term, + "Timeline stream initialization failed"}, + {kbase_device_hwcnt_backend_gpu_init, + kbase_device_hwcnt_backend_gpu_term, + "GPU hwcnt backend creation failed"}, + {kbase_device_hwcnt_context_init, kbase_device_hwcnt_context_term, + "GPU hwcnt context initialization failed"}, + {kbase_device_hwcnt_virtualizer_init, + kbase_device_hwcnt_virtualizer_term, + "GPU hwcnt virtualizer initialization failed"}, + {kbase_device_vinstr_init, kbase_device_vinstr_term, + "Virtual instrumentation initialization failed"}, + {kbase_backend_late_init, kbase_backend_late_term, + "Late backend initialization failed"}, +#ifdef MALI_KBASE_BUILD + {kbase_debug_job_fault_dev_init, kbase_debug_job_fault_dev_term, + "Job fault debug initialization failed"}, + {kbase_device_debugfs_init, kbase_device_debugfs_term, + "DebugFS initialization failed"}, + /* Sysfs init needs to happen before registering the device with + * misc_register(), otherwise it causes a race condition between + * registering the device and a uevent event being generated for + * userspace, causing udev rules to run which might expect certain + * sysfs attributes present. As a result of the race condition + * we avoid, some Mali sysfs entries may have appeared to udev + * to not exist. + * For more information, see + * https://www.kernel.org/doc/Documentation/driver-model/device.txt, the + * paragraph that starts with "Word of warning", currently the + * second-last paragraph. + */ + {kbase_sysfs_init, kbase_sysfs_term, "SysFS group creation failed"}, + {kbase_device_misc_register, kbase_device_misc_deregister, + "Misc device registration failed"}, +#ifdef CONFIG_MALI_BUSLOG + {buslog_init, buslog_term, "Bus log client registration failed"}, +#endif + {kbase_gpuprops_populate_user_buffer, kbase_gpuprops_free_user_buffer, + "GPU property population failed"}, +#endif + {kbase_dummy_job_wa_load, kbase_dummy_job_wa_cleanup, + "Dummy job workaround load failed"}, +}; + +static void kbase_device_term_partial(struct kbase_device *kbdev, + unsigned int i) +{ + while (i-- > 0) { + if (dev_init[i].term) + dev_init[i].term(kbdev); + } +} + +void kbase_device_term(struct kbase_device *kbdev) +{ + kbase_device_term_partial(kbdev, ARRAY_SIZE(dev_init)); + kbasep_js_devdata_halt(kbdev); + kbase_mem_halt(kbdev); +} + +int kbase_device_init(struct kbase_device *kbdev) +{ + int err = 0; + unsigned int i = 0; + + dev_info(kbdev->dev, "Kernel DDK version %s", MALI_RELEASE_NAME); + + kbase_device_id_init(kbdev); + kbase_disjoint_init(kbdev); + + for (i = 0; i < ARRAY_SIZE(dev_init); i++) { + err = dev_init[i].init(kbdev); + if (err) { + dev_err(kbdev->dev, "%s error = %d\n", + dev_init[i].err_mes, err); + kbase_device_term_partial(kbdev, i); + break; + } + } + + return err; +} diff --git a/drivers/gpu/arm/bifrost/device/mali_kbase_device.c b/drivers/gpu/arm/bifrost/device/mali_kbase_device.c new file mode 100644 index 000000000000..76f14e5aa2da --- /dev/null +++ b/drivers/gpu/arm/bifrost/device/mali_kbase_device.c @@ -0,0 +1,429 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * + * (C) COPYRIGHT 2010-2020 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + + + +/* + * Base kernel device APIs + */ + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#include +#include "mali_kbase_vinstr.h" +#include "mali_kbase_hwcnt_context.h" +#include "mali_kbase_hwcnt_virtualizer.h" + +#include "mali_kbase_device.h" +#include "mali_kbase_device_internal.h" +#include "backend/gpu/mali_kbase_pm_internal.h" +#include "backend/gpu/mali_kbase_irq_internal.h" + +#ifdef CONFIG_MALI_ARBITER_SUPPORT +#include "arbiter/mali_kbase_arbiter_pm.h" +#endif /* CONFIG_MALI_ARBITER_SUPPORT */ + +/* NOTE: Magic - 0x45435254 (TRCE in ASCII). + * Supports tracing feature provided in the base module. + * Please keep it in sync with the value of base module. + */ +#define TRACE_BUFFER_HEADER_SPECIAL 0x45435254 + +/* Number of register accesses for the buffer that we allocate during + * initialization time. The buffer size can be changed later via debugfs. + */ +#define KBASEP_DEFAULT_REGISTER_HISTORY_SIZE ((u16)512) + +static DEFINE_MUTEX(kbase_dev_list_lock); +static LIST_HEAD(kbase_dev_list); +static int kbase_dev_nr; + +struct kbase_device *kbase_device_alloc(void) +{ + return kzalloc(sizeof(struct kbase_device), GFP_KERNEL); +} + +static int kbase_device_as_init(struct kbase_device *kbdev, int i) +{ + kbdev->as[i].number = i; + kbdev->as[i].bf_data.addr = 0ULL; + kbdev->as[i].pf_data.addr = 0ULL; + + kbdev->as[i].pf_wq = alloc_workqueue("mali_mmu%d", 0, 1, i); + if (!kbdev->as[i].pf_wq) + return -EINVAL; + + INIT_WORK(&kbdev->as[i].work_pagefault, page_fault_worker); + INIT_WORK(&kbdev->as[i].work_busfault, bus_fault_worker); + + return 0; +} + +static void kbase_device_as_term(struct kbase_device *kbdev, int i) +{ + destroy_workqueue(kbdev->as[i].pf_wq); +} + +static int kbase_device_all_as_init(struct kbase_device *kbdev) +{ + int i, err; + + for (i = 0; i < kbdev->nr_hw_address_spaces; i++) { + err = kbase_device_as_init(kbdev, i); + if (err) + goto free_workqs; + } + + return 0; + +free_workqs: + for (; i > 0; i--) + kbase_device_as_term(kbdev, i); + + return err; +} + +static void kbase_device_all_as_term(struct kbase_device *kbdev) +{ + int i; + + for (i = 0; i < kbdev->nr_hw_address_spaces; i++) + kbase_device_as_term(kbdev, i); +} + +int kbase_device_misc_init(struct kbase_device * const kbdev) +{ + int err; +#ifdef CONFIG_ARM64 + struct device_node *np = NULL; +#endif /* CONFIG_ARM64 */ + + spin_lock_init(&kbdev->mmu_mask_change); + mutex_init(&kbdev->mmu_hw_mutex); +#ifdef CONFIG_ARM64 + kbdev->cci_snoop_enabled = false; + np = kbdev->dev->of_node; + if (np != NULL) { + if (of_property_read_u32(np, "snoop_enable_smc", + &kbdev->snoop_enable_smc)) + kbdev->snoop_enable_smc = 0; + if (of_property_read_u32(np, "snoop_disable_smc", + &kbdev->snoop_disable_smc)) + kbdev->snoop_disable_smc = 0; + /* Either both or none of the calls should be provided. */ + if (!((kbdev->snoop_disable_smc == 0 + && kbdev->snoop_enable_smc == 0) + || (kbdev->snoop_disable_smc != 0 + && kbdev->snoop_enable_smc != 0))) { + WARN_ON(1); + err = -EINVAL; + goto fail; + } + } +#endif /* CONFIG_ARM64 */ + /* Get the list of workarounds for issues on the current HW + * (identified by the GPU_ID register) + */ + err = kbase_hw_set_issues_mask(kbdev); + if (err) + goto fail; + + /* Set the list of features available on the current HW + * (identified by the GPU_ID register) + */ + kbase_hw_set_features_mask(kbdev); + + err = kbase_gpuprops_set_features(kbdev); + if (err) + goto fail; + + /* On Linux 4.0+, dma coherency is determined from device tree */ +#if defined(CONFIG_ARM64) && LINUX_VERSION_CODE < KERNEL_VERSION(4, 0, 0) + set_dma_ops(kbdev->dev, &noncoherent_swiotlb_dma_ops); +#endif + + /* Workaround a pre-3.13 Linux issue, where dma_mask is NULL when our + * device structure was created by device-tree + */ + if (!kbdev->dev->dma_mask) + kbdev->dev->dma_mask = &kbdev->dev->coherent_dma_mask; + + err = dma_set_mask(kbdev->dev, + DMA_BIT_MASK(kbdev->gpu_props.mmu.pa_bits)); + if (err) + goto dma_set_mask_failed; + + err = dma_set_coherent_mask(kbdev->dev, + DMA_BIT_MASK(kbdev->gpu_props.mmu.pa_bits)); + if (err) + goto dma_set_mask_failed; + + kbdev->nr_hw_address_spaces = kbdev->gpu_props.num_address_spaces; + + err = kbase_device_all_as_init(kbdev); + if (err) + goto as_init_failed; + + spin_lock_init(&kbdev->hwcnt.lock); + + err = kbase_ktrace_init(kbdev); + if (err) + goto term_as; + + init_waitqueue_head(&kbdev->cache_clean_wait); + + kbase_debug_assert_register_hook(&kbase_ktrace_hook_wrapper, kbdev); + + atomic_set(&kbdev->ctx_num, 0); + + err = kbase_instr_backend_init(kbdev); + if (err) + goto term_trace; + + kbdev->pm.dvfs_period = DEFAULT_PM_DVFS_PERIOD; + + kbdev->reset_timeout_ms = DEFAULT_RESET_TIMEOUT_MS; + + if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_AARCH64_MMU)) + kbdev->mmu_mode = kbase_mmu_mode_get_aarch64(); + else + kbdev->mmu_mode = kbase_mmu_mode_get_lpae(); + + mutex_init(&kbdev->kctx_list_lock); + INIT_LIST_HEAD(&kbdev->kctx_list); + + spin_lock_init(&kbdev->hwaccess_lock); + + return 0; +term_trace: + kbase_ktrace_term(kbdev); +term_as: + kbase_device_all_as_term(kbdev); +as_init_failed: +dma_set_mask_failed: +fail: + return err; +} + +void kbase_device_misc_term(struct kbase_device *kbdev) +{ + KBASE_DEBUG_ASSERT(kbdev); + + WARN_ON(!list_empty(&kbdev->kctx_list)); + +#if KBASE_KTRACE_ENABLE + kbase_debug_assert_register_hook(NULL, NULL); +#endif + + kbase_instr_backend_term(kbdev); + + kbase_ktrace_term(kbdev); + + kbase_device_all_as_term(kbdev); +} + +void kbase_device_free(struct kbase_device *kbdev) +{ + kfree(kbdev); +} + +void kbase_device_id_init(struct kbase_device *kbdev) +{ + scnprintf(kbdev->devname, DEVNAME_SIZE, "%s%d", kbase_drv_name, + kbase_dev_nr); + kbdev->id = kbase_dev_nr; +} + +void kbase_increment_device_id(void) +{ + kbase_dev_nr++; +} + +int kbase_device_hwcnt_backend_gpu_init(struct kbase_device *kbdev) +{ + return kbase_hwcnt_backend_gpu_create(kbdev, &kbdev->hwcnt_gpu_iface); +} + +void kbase_device_hwcnt_backend_gpu_term(struct kbase_device *kbdev) +{ + kbase_hwcnt_backend_gpu_destroy(&kbdev->hwcnt_gpu_iface); +} + +int kbase_device_hwcnt_context_init(struct kbase_device *kbdev) +{ + return kbase_hwcnt_context_init(&kbdev->hwcnt_gpu_iface, + &kbdev->hwcnt_gpu_ctx); +} + +void kbase_device_hwcnt_context_term(struct kbase_device *kbdev) +{ + kbase_hwcnt_context_term(kbdev->hwcnt_gpu_ctx); +} + +int kbase_device_hwcnt_virtualizer_init(struct kbase_device *kbdev) +{ + return kbase_hwcnt_virtualizer_init(kbdev->hwcnt_gpu_ctx, + KBASE_HWCNT_GPU_VIRTUALIZER_DUMP_THRESHOLD_NS, + &kbdev->hwcnt_gpu_virt); +} + +void kbase_device_hwcnt_virtualizer_term(struct kbase_device *kbdev) +{ + kbase_hwcnt_virtualizer_term(kbdev->hwcnt_gpu_virt); +} + +int kbase_device_timeline_init(struct kbase_device *kbdev) +{ + atomic_set(&kbdev->timeline_flags, 0); + return kbase_timeline_init(&kbdev->timeline, &kbdev->timeline_flags); +} + +void kbase_device_timeline_term(struct kbase_device *kbdev) +{ + kbase_timeline_term(kbdev->timeline); +} + +int kbase_device_vinstr_init(struct kbase_device *kbdev) +{ + return kbase_vinstr_init(kbdev->hwcnt_gpu_virt, &kbdev->vinstr_ctx); +} + +void kbase_device_vinstr_term(struct kbase_device *kbdev) +{ + kbase_vinstr_term(kbdev->vinstr_ctx); +} + +int kbase_device_io_history_init(struct kbase_device *kbdev) +{ + return kbase_io_history_init(&kbdev->io_history, + KBASEP_DEFAULT_REGISTER_HISTORY_SIZE); +} + +void kbase_device_io_history_term(struct kbase_device *kbdev) +{ + kbase_io_history_term(&kbdev->io_history); +} + +int kbase_device_misc_register(struct kbase_device *kbdev) +{ + return misc_register(&kbdev->mdev); +} + +void kbase_device_misc_deregister(struct kbase_device *kbdev) +{ + misc_deregister(&kbdev->mdev); +} + +int kbase_device_list_init(struct kbase_device *kbdev) +{ + const struct list_head *dev_list; + + dev_list = kbase_device_get_list(); + list_add(&kbdev->entry, &kbase_dev_list); + kbase_device_put_list(dev_list); + + return 0; +} + +void kbase_device_list_term(struct kbase_device *kbdev) +{ + const struct list_head *dev_list; + + dev_list = kbase_device_get_list(); + list_del(&kbdev->entry); + kbase_device_put_list(dev_list); +} + +const struct list_head *kbase_device_get_list(void) +{ + mutex_lock(&kbase_dev_list_lock); + return &kbase_dev_list; +} +KBASE_EXPORT_TEST_API(kbase_device_get_list); + +void kbase_device_put_list(const struct list_head *dev_list) +{ + mutex_unlock(&kbase_dev_list_lock); +} +KBASE_EXPORT_TEST_API(kbase_device_put_list); + +int kbase_device_early_init(struct kbase_device *kbdev) +{ + int err; + + err = kbasep_platform_device_init(kbdev); + if (err) + return err; + + err = kbase_pm_runtime_init(kbdev); + if (err) + goto fail_runtime_pm; + + /* Ensure we can access the GPU registers */ + kbase_pm_register_access_enable(kbdev); + + /* Find out GPU properties based on the GPU feature registers */ + kbase_gpuprops_set(kbdev); + + /* We're done accessing the GPU registers for now. */ + kbase_pm_register_access_disable(kbdev); + + err = kbase_install_interrupts(kbdev); + if (err) + goto fail_interrupts; + + return 0; + +fail_interrupts: + kbase_pm_runtime_term(kbdev); +fail_runtime_pm: + kbasep_platform_device_term(kbdev); + + return err; +} + +void kbase_device_early_term(struct kbase_device *kbdev) +{ +#ifdef CONFIG_MALI_ARBITER_SUPPORT + if (kbdev->arb.arb_if) + kbase_arbiter_pm_release_interrupts(kbdev); + else + kbase_release_interrupts(kbdev); +#else + kbase_release_interrupts(kbdev); +#endif /* CONFIG_MALI_ARBITER_SUPPORT */ + kbase_pm_runtime_term(kbdev); + kbasep_platform_device_term(kbdev); +} diff --git a/drivers/gpu/arm/bifrost/device/mali_kbase_device.h b/drivers/gpu/arm/bifrost/device/mali_kbase_device.h new file mode 100644 index 000000000000..16f1d7098688 --- /dev/null +++ b/drivers/gpu/arm/bifrost/device/mali_kbase_device.h @@ -0,0 +1,71 @@ +/* + * + * (C) COPYRIGHT 2019-2020 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +#include + +/** + * kbase_device_get_list - get device list. + * + * Get access to device list. + * + * Return: Pointer to the linked list head. + */ +const struct list_head *kbase_device_get_list(void); + +/** + * kbase_device_put_list - put device list. + * + * @dev_list: head of linked list containing device list. + * + * Put access to the device list. + */ +void kbase_device_put_list(const struct list_head *dev_list); + +/** + * Kbase_increment_device_id - increment device id. + * + * Used to increment device id on successful initialization of the device. + */ +void kbase_increment_device_id(void); + +/** + * kbase_device_init - Device initialisation. + * + * This is called from device probe to initialise various other + * components needed. + * + * @kbdev: The kbase device structure for the device (must be a valid pointer) + * + * Return: 0 on success and non-zero value on failure. + */ +int kbase_device_init(struct kbase_device *kbdev); + +/** + * kbase_device_term - Device termination. + * + * This is called from device remove to terminate various components that + * were initialised during kbase_device_init. + * + * @kbdev: The kbase device structure for the device (must be a valid pointer) + * + */ +void kbase_device_term(struct kbase_device *kbdev); diff --git a/drivers/gpu/arm/bifrost/device/mali_kbase_device_internal.h b/drivers/gpu/arm/bifrost/device/mali_kbase_device_internal.h new file mode 100644 index 000000000000..9f96db050bfe --- /dev/null +++ b/drivers/gpu/arm/bifrost/device/mali_kbase_device_internal.h @@ -0,0 +1,78 @@ +/* + * + * (C) COPYRIGHT 2019 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +#include + +typedef int kbase_device_init_method(struct kbase_device *kbdev); +typedef void kbase_device_term_method(struct kbase_device *kbdev); + +/** + * struct kbase_device_init - Device init/term methods. + * @init: Function pointer to a initialise method. + * @term: Function pointer to a terminate method. + * @err_mes: Error message to be printed when init method fails. + */ +struct kbase_device_init { + kbase_device_init_method *init; + kbase_device_term_method *term; + char *err_mes; +}; + +int kbase_device_vinstr_init(struct kbase_device *kbdev); +void kbase_device_vinstr_term(struct kbase_device *kbdev); + +int kbase_device_timeline_init(struct kbase_device *kbdev); +void kbase_device_timeline_term(struct kbase_device *kbdev); + +int kbase_device_hwcnt_backend_gpu_init(struct kbase_device *kbdev); +void kbase_device_hwcnt_backend_gpu_term(struct kbase_device *kbdev); + +int kbase_device_hwcnt_context_init(struct kbase_device *kbdev); +void kbase_device_hwcnt_context_term(struct kbase_device *kbdev); + +int kbase_device_hwcnt_virtualizer_init(struct kbase_device *kbdev); +void kbase_device_hwcnt_virtualizer_term(struct kbase_device *kbdev); + +int kbase_device_list_init(struct kbase_device *kbdev); +void kbase_device_list_term(struct kbase_device *kbdev); + +int kbase_device_io_history_init(struct kbase_device *kbdev); +void kbase_device_io_history_term(struct kbase_device *kbdev); + +int kbase_device_misc_register(struct kbase_device *kbdev); +void kbase_device_misc_deregister(struct kbase_device *kbdev); + +void kbase_device_id_init(struct kbase_device *kbdev); + +/** + * kbase_device_early_init - Perform any device-specific initialization. + * @kbdev: Device pointer + * + * Return: 0 on success, or an error code on failure. + */ +int kbase_device_early_init(struct kbase_device *kbdev); + +/** + * kbase_device_early_term - Perform any device-specific termination. + * @kbdev: Device pointer + */ +void kbase_device_early_term(struct kbase_device *kbdev); diff --git a/drivers/gpu/arm/bifrost/gpu/backend/mali_kbase_gpu_fault_jm.c b/drivers/gpu/arm/bifrost/gpu/backend/mali_kbase_gpu_fault_jm.c new file mode 100644 index 000000000000..63132dc80fa5 --- /dev/null +++ b/drivers/gpu/arm/bifrost/gpu/backend/mali_kbase_gpu_fault_jm.c @@ -0,0 +1,181 @@ +/* + * + * (C) COPYRIGHT 2019 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +#include + +#include "../mali_kbase_gpu_fault.h" + +const char *kbase_gpu_exception_name(u32 const exception_code) +{ + const char *e; + + switch (exception_code) { + /* Non-Fault Status code */ + case 0x00: + e = "NOT_STARTED/IDLE/OK"; + break; + case 0x01: + e = "DONE"; + break; + case 0x02: + e = "INTERRUPTED"; + break; + case 0x03: + e = "STOPPED"; + break; + case 0x04: + e = "TERMINATED"; + break; + case 0x08: + e = "ACTIVE"; + break; + /* Job exceptions */ + case 0x40: + e = "JOB_CONFIG_FAULT"; + break; + case 0x41: + e = "JOB_POWER_FAULT"; + break; + case 0x42: + e = "JOB_READ_FAULT"; + break; + case 0x43: + e = "JOB_WRITE_FAULT"; + break; + case 0x44: + e = "JOB_AFFINITY_FAULT"; + break; + case 0x48: + e = "JOB_BUS_FAULT"; + break; + case 0x50: + e = "INSTR_INVALID_PC"; + break; + case 0x51: + e = "INSTR_INVALID_ENC"; + break; + case 0x52: + e = "INSTR_TYPE_MISMATCH"; + break; + case 0x53: + e = "INSTR_OPERAND_FAULT"; + break; + case 0x54: + e = "INSTR_TLS_FAULT"; + break; + case 0x55: + e = "INSTR_BARRIER_FAULT"; + break; + case 0x56: + e = "INSTR_ALIGN_FAULT"; + break; + case 0x58: + e = "DATA_INVALID_FAULT"; + break; + case 0x59: + e = "TILE_RANGE_FAULT"; + break; + case 0x5A: + e = "ADDR_RANGE_FAULT"; + break; + case 0x60: + e = "OUT_OF_MEMORY"; + break; + /* GPU exceptions */ + case 0x80: + e = "DELAYED_BUS_FAULT"; + break; + case 0x88: + e = "SHAREABILITY_FAULT"; + break; + /* MMU exceptions */ + case 0xC0: + case 0xC1: + case 0xC2: + case 0xC3: + case 0xC4: + case 0xC5: + case 0xC6: + case 0xC7: + e = "TRANSLATION_FAULT"; + break; + case 0xC8: + e = "PERMISSION_FAULT"; + break; + case 0xC9: + case 0xCA: + case 0xCB: + case 0xCC: + case 0xCD: + case 0xCE: + case 0xCF: + e = "PERMISSION_FAULT"; + break; + case 0xD0: + case 0xD1: + case 0xD2: + case 0xD3: + case 0xD4: + case 0xD5: + case 0xD6: + case 0xD7: + e = "TRANSTAB_BUS_FAULT"; + break; + case 0xD8: + e = "ACCESS_FLAG"; + break; + case 0xD9: + case 0xDA: + case 0xDB: + case 0xDC: + case 0xDD: + case 0xDE: + case 0xDF: + e = "ACCESS_FLAG"; + break; + case 0xE0: + case 0xE1: + case 0xE2: + case 0xE3: + case 0xE4: + case 0xE5: + case 0xE6: + case 0xE7: + e = "ADDRESS_SIZE_FAULT"; + break; + case 0xE8: + case 0xE9: + case 0xEA: + case 0xEB: + case 0xEC: + case 0xED: + case 0xEE: + case 0xEF: + e = "MEMORY_ATTRIBUTES_FAULT"; + break; + default: + e = "UNKNOWN"; + break; + }; + + return e; +} diff --git a/drivers/gpu/arm/bifrost/mali_midg_regmap_jm.h b/drivers/gpu/arm/bifrost/gpu/backend/mali_kbase_gpu_regmap_jm.h similarity index 85% rename from drivers/gpu/arm/bifrost/mali_midg_regmap_jm.h rename to drivers/gpu/arm/bifrost/gpu/backend/mali_kbase_gpu_regmap_jm.h index 58e4d08deb86..258ff33348fe 100644 --- a/drivers/gpu/arm/bifrost/mali_midg_regmap_jm.h +++ b/drivers/gpu/arm/bifrost/gpu/backend/mali_kbase_gpu_regmap_jm.h @@ -20,15 +20,69 @@ * */ -#ifndef _MIDG_REGMAP_JM_H_ -#define _MIDG_REGMAP_JM_H_ +#ifndef _KBASE_GPU_REGMAP_JM_H_ +#define _KBASE_GPU_REGMAP_JM_H_ + + +/* Set to implementation defined, outer caching */ +#define AS_MEMATTR_AARCH64_OUTER_IMPL_DEF 0x88ull +/* Set to write back memory, outer caching */ +#define AS_MEMATTR_AARCH64_OUTER_WA 0x8Dull +/* Set to inner non-cacheable, outer-non-cacheable + * Setting defined by the alloc bits is ignored, but set to a valid encoding: + * - no-alloc on read + * - no alloc on write + */ +#define AS_MEMATTR_AARCH64_NON_CACHEABLE 0x4Cull + +/* Symbols for default MEMATTR to use + * Default is - HW implementation defined caching + */ +#define AS_MEMATTR_INDEX_DEFAULT 0 +#define AS_MEMATTR_INDEX_DEFAULT_ACE 3 + +/* HW implementation defined caching */ +#define AS_MEMATTR_INDEX_IMPL_DEF_CACHE_POLICY 0 +/* Force cache on */ +#define AS_MEMATTR_INDEX_FORCE_TO_CACHE_ALL 1 +/* Write-alloc */ +#define AS_MEMATTR_INDEX_WRITE_ALLOC 2 +/* Outer coherent, inner implementation defined policy */ +#define AS_MEMATTR_INDEX_OUTER_IMPL_DEF 3 +/* Outer coherent, write alloc inner */ +#define AS_MEMATTR_INDEX_OUTER_WA 4 +/* Normal memory, inner non-cacheable, outer non-cacheable (ARMv8 mode only) */ +#define AS_MEMATTR_INDEX_NON_CACHEABLE 5 /* GPU control registers */ #define CORE_FEATURES 0x008 /* (RO) Shader Core Features */ #define JS_PRESENT 0x01C /* (RO) Job slots present */ -#define LATEST_FLUSH 0x038 /* (RO) Flush ID of latest clean-and-invalidate operation */ -#define GROUPS_L2_COHERENT (1 << 0) /* Cores groups are l2 coherent */ +#define LATEST_FLUSH 0x038 /* (RO) Flush ID of latest + * clean-and-invalidate operation + */ + +#define PRFCNT_BASE_LO 0x060 /* (RW) Performance counter memory + * region base address, low word + */ +#define PRFCNT_BASE_HI 0x064 /* (RW) Performance counter memory + * region base address, high word + */ +#define PRFCNT_CONFIG 0x068 /* (RW) Performance counter + * configuration + */ +#define PRFCNT_JM_EN 0x06C /* (RW) Performance counter enable + * flags for Job Manager + */ +#define PRFCNT_SHADER_EN 0x070 /* (RW) Performance counter enable + * flags for shader cores + */ +#define PRFCNT_TILER_EN 0x074 /* (RW) Performance counter enable + * flags for tiler + */ +#define PRFCNT_MMU_L2_EN 0x07C /* (RW) Performance counter enable + * flags for MMU/L2 cache + */ #define JS0_FEATURES 0x0C0 /* (RO) Features of job slot 0 */ #define JS1_FEATURES 0x0C4 /* (RO) Features of job slot 1 */ @@ -140,7 +194,7 @@ * The values are separated to avoid dependency of userspace and kernel code. */ -/* Group of values representing the job status insead a particular fault */ +/* Group of values representing the job status instead of a particular fault */ #define JS_STATUS_NO_EXCEPTION_BASE 0x00 #define JS_STATUS_INTERRUPTED (JS_STATUS_NO_EXCEPTION_BASE + 0x02) /* 0x02 means INTERRUPTED */ #define JS_STATUS_STOPPED (JS_STATUS_NO_EXCEPTION_BASE + 0x03) /* 0x03 means STOPPED */ @@ -192,8 +246,6 @@ #define JM_JOB_THROTTLE_LIMIT_SHIFT (3) #define JM_MAX_JOB_THROTTLE_LIMIT (0x3F) #define JM_FORCE_COHERENCY_FEATURES_SHIFT (2) -#define JM_IDVS_GROUP_SIZE_SHIFT (16) -#define JM_MAX_IDVS_GROUP_SIZE (0x3F) /* GPU_COMMAND values */ #define GPU_COMMAND_NOP 0x00 /* No operation, nothing happens */ @@ -207,4 +259,4 @@ #define GPU_COMMAND_CLEAN_INV_CACHES 0x08 /* Clean and invalidate all caches */ #define GPU_COMMAND_SET_PROTECTED_MODE 0x09 /* Places the GPU in protected mode */ -#endif /* _MIDG_REGMAP_JM_H_ */ +#endif /* _KBASE_GPU_REGMAP_JM_H_ */ diff --git a/drivers/gpu/arm/bifrost/platform/meson/mali_kbase_config_platform.h b/drivers/gpu/arm/bifrost/gpu/mali_kbase_gpu.c similarity index 53% rename from drivers/gpu/arm/bifrost/platform/meson/mali_kbase_config_platform.h rename to drivers/gpu/arm/bifrost/gpu/mali_kbase_gpu.c index c02886fd2187..3128db4cabfc 100644 --- a/drivers/gpu/arm/bifrost/platform/meson/mali_kbase_config_platform.h +++ b/drivers/gpu/arm/bifrost/gpu/mali_kbase_gpu.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2014-2017, 2019 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2019 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -20,27 +20,22 @@ * */ -/** - * Power management configuration - * - * Attached value: pointer to @ref kbase_pm_callback_conf - * Default value: See @ref kbase_pm_callback_conf - */ -#define POWER_MANAGEMENT_CALLBACKS (&pm_callbacks) +#include +#include -/** - * Platform specific configuration functions - * - * Attached value: pointer to @ref kbase_platform_funcs_conf - * Default value: See @ref kbase_platform_funcs_conf - */ -#define PLATFORM_FUNCS (NULL) - -extern struct kbase_pm_callback_conf pm_callbacks; - -/** - * Autosuspend delay - * - * The delay time (in milliseconds) to be used for autosuspend - */ -#define AUTO_SUSPEND_DELAY (100) +const char *kbase_gpu_access_type_name(u32 fault_status) +{ + switch (AS_FAULTSTATUS_ACCESS_TYPE_GET(fault_status)) { + case AS_FAULTSTATUS_ACCESS_TYPE_ATOMIC: + return "ATOMIC"; + case AS_FAULTSTATUS_ACCESS_TYPE_READ: + return "READ"; + case AS_FAULTSTATUS_ACCESS_TYPE_WRITE: + return "WRITE"; + case AS_FAULTSTATUS_ACCESS_TYPE_EX: + return "EXECUTE"; + default: + WARN_ON(1); + return NULL; + } +} diff --git a/drivers/gpu/arm/bifrost/platform/meson/mali_kbase_config_meson.c b/drivers/gpu/arm/bifrost/gpu/mali_kbase_gpu.h similarity index 66% rename from drivers/gpu/arm/bifrost/platform/meson/mali_kbase_config_meson.c rename to drivers/gpu/arm/bifrost/gpu/mali_kbase_gpu.h index 8de7a8852029..9516e56eda01 100644 --- a/drivers/gpu/arm/bifrost/platform/meson/mali_kbase_config_meson.c +++ b/drivers/gpu/arm/bifrost/gpu/mali_kbase_gpu.h @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2015, 2017, 2019 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2019 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -20,22 +20,12 @@ * */ -#include +#ifndef _KBASE_GPU_H_ +#define _KBASE_GPU_H_ -static struct kbase_platform_config dummy_platform_config; +#include "mali_kbase_gpu_regmap.h" +#include "mali_kbase_gpu_fault.h" +#include "mali_kbase_gpu_coherency.h" +#include "mali_kbase_gpu_id.h" -struct kbase_platform_config *kbase_get_platform_config(void) -{ - return &dummy_platform_config; -} - -#ifndef CONFIG_OF -int kbase_platform_register(void) -{ - return 0; -} - -void kbase_platform_unregister(void) -{ -} -#endif +#endif /* _KBASE_GPU_H_ */ diff --git a/drivers/gpu/arm/bifrost/mali_midg_coherency.h b/drivers/gpu/arm/bifrost/gpu/mali_kbase_gpu_coherency.h similarity index 84% rename from drivers/gpu/arm/bifrost/mali_midg_coherency.h rename to drivers/gpu/arm/bifrost/gpu/mali_kbase_gpu_coherency.h index 29d5df38c92b..bb2b1613aa47 100644 --- a/drivers/gpu/arm/bifrost/mali_midg_coherency.h +++ b/drivers/gpu/arm/bifrost/gpu/mali_kbase_gpu_coherency.h @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2015 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2015-2020 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -20,12 +20,12 @@ * */ -#ifndef _MIDG_COHERENCY_H_ -#define _MIDG_COHERENCY_H_ +#ifndef _KBASE_GPU_COHERENCY_H_ +#define _KBASE_GPU_COHERENCY_H_ #define COHERENCY_ACE_LITE 0 #define COHERENCY_ACE 1 #define COHERENCY_NONE 31 #define COHERENCY_FEATURE_BIT(x) (1 << (x)) -#endif /* _MIDG_COHERENCY_H_ */ +#endif /* _KBASE_GPU_COHERENCY_H_ */ diff --git a/drivers/gpu/arm/bifrost/gpu/mali_kbase_gpu_fault.h b/drivers/gpu/arm/bifrost/gpu/mali_kbase_gpu_fault.h new file mode 100644 index 000000000000..b59b9d15f945 --- /dev/null +++ b/drivers/gpu/arm/bifrost/gpu/mali_kbase_gpu_fault.h @@ -0,0 +1,59 @@ +/* + * + * (C) COPYRIGHT 2019-2020 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +#ifndef _KBASE_GPU_FAULT_H_ +#define _KBASE_GPU_FAULT_H_ + +/** Returns the name associated with a Mali exception code + * + * @exception_code: exception code + * + * This function is called from the interrupt handler when a GPU fault occurs. + * + * Return: name associated with the exception code + */ +const char *kbase_gpu_exception_name(u32 exception_code); + +/** Returns the name associated with a Mali fatal exception code + * + * @fatal_exception_code: fatal exception code + * + * This function is called from the interrupt handler when a GPU fatal + * exception occurs. + * + * Return: name associated with the fatal exception code + */ +const char *kbase_gpu_fatal_exception_name(u32 const fatal_exception_code); + +/** + * kbase_gpu_access_type_name - Convert MMU_AS_CONTROL.FAULTSTATUS.ACCESS_TYPE + * into string. + * @fault_status: value of FAULTSTATUS register. + * + * After MMU fault, this function can be used to get readable information about + * access_type of the MMU fault. + * + * Return: String of the access type. + */ +const char *kbase_gpu_access_type_name(u32 fault_status); + +#endif /* _KBASE_GPU_FAULT_H_ */ diff --git a/drivers/gpu/arm/bifrost/mali_kbase_gpu_id.h b/drivers/gpu/arm/bifrost/gpu/mali_kbase_gpu_id.h similarity index 94% rename from drivers/gpu/arm/bifrost/mali_kbase_gpu_id.h rename to drivers/gpu/arm/bifrost/gpu/mali_kbase_gpu_id.h index a38e88693862..9f3d6b1d5b51 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_gpu_id.h +++ b/drivers/gpu/arm/bifrost/gpu/mali_kbase_gpu_id.h @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2015-2019 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2015-2020 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -19,6 +19,7 @@ * SPDX-License-Identifier: GPL-2.0 * */ + #ifndef _KBASE_GPU_ID_H_ #define _KBASE_GPU_ID_H_ @@ -97,12 +98,14 @@ #define GPU_ID2_PRODUCT_TNAX GPU_ID2_MODEL_MAKE(9, 1) #define GPU_ID2_PRODUCT_TBEX GPU_ID2_MODEL_MAKE(9, 2) #define GPU_ID2_PRODUCT_LBEX GPU_ID2_MODEL_MAKE(9, 4) -#define GPU_ID2_PRODUCT_TULX GPU_ID2_MODEL_MAKE(10, 0) #define GPU_ID2_PRODUCT_TDUX GPU_ID2_MODEL_MAKE(10, 1) #define GPU_ID2_PRODUCT_TODX GPU_ID2_MODEL_MAKE(10, 2) -#define GPU_ID2_PRODUCT_TIDX GPU_ID2_MODEL_MAKE(10, 3) +#define GPU_ID2_PRODUCT_TGRX GPU_ID2_MODEL_MAKE(10, 3) #define GPU_ID2_PRODUCT_TVAX GPU_ID2_MODEL_MAKE(10, 4) -#define GPU_ID2_PRODUCT_LODX GPU_ID2_MODEL_MAKE(10, 5) +#define GPU_ID2_PRODUCT_LODX GPU_ID2_MODEL_MAKE(10, 7) +#define GPU_ID2_PRODUCT_TTUX GPU_ID2_MODEL_MAKE(11, 2) +#define GPU_ID2_PRODUCT_LTUX GPU_ID2_MODEL_MAKE(11, 3) +#define GPU_ID2_PRODUCT_TE2X GPU_ID2_MODEL_MAKE(11, 1) /* Helper macro to create a GPU_ID assuming valid values for id, major, minor, status */ diff --git a/drivers/gpu/arm/bifrost/mali_midg_regmap.h b/drivers/gpu/arm/bifrost/gpu/mali_kbase_gpu_regmap.h similarity index 88% rename from drivers/gpu/arm/bifrost/mali_midg_regmap.h rename to drivers/gpu/arm/bifrost/gpu/mali_kbase_gpu_regmap.h index 6d5f24308dab..51024308fafe 100644 --- a/drivers/gpu/arm/bifrost/mali_midg_regmap.h +++ b/drivers/gpu/arm/bifrost/gpu/mali_kbase_gpu_regmap.h @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2010-2019 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010-2020 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -20,12 +20,12 @@ * */ -#ifndef _MIDG_REGMAP_H_ -#define _MIDG_REGMAP_H_ +#ifndef _KBASE_GPU_REGMAP_H_ +#define _KBASE_GPU_REGMAP_H_ -#include "mali_midg_coherency.h" +#include "mali_kbase_gpu_coherency.h" #include "mali_kbase_gpu_id.h" -#include "mali_midg_regmap_jm.h" +#include "backend/mali_kbase_gpu_regmap_jm.h" /* Begin Register Offsets */ /* GPU control registers */ @@ -54,18 +54,15 @@ #define L2_CONFIG 0x048 /* (RW) Level 2 cache configuration */ +#define GROUPS_L2_COHERENT (1 << 0) /* Cores groups are l2 coherent */ +#define SUPER_L2_COHERENT (1 << 1) /* Shader cores within a core + * supergroup are l2 coherent + */ + #define PWR_KEY 0x050 /* (WO) Power manager key register */ #define PWR_OVERRIDE0 0x054 /* (RW) Power manager override settings */ #define PWR_OVERRIDE1 0x058 /* (RW) Power manager override settings */ -#define PRFCNT_BASE_LO 0x060 /* (RW) Performance counter memory region base address, low word */ -#define PRFCNT_BASE_HI 0x064 /* (RW) Performance counter memory region base address, high word */ -#define PRFCNT_CONFIG 0x068 /* (RW) Performance counter configuration */ -#define PRFCNT_JM_EN 0x06C /* (RW) Performance counter enable flags for Job Manager */ -#define PRFCNT_SHADER_EN 0x070 /* (RW) Performance counter enable flags for shader cores */ -#define PRFCNT_TILER_EN 0x074 /* (RW) Performance counter enable flags for tiler */ -#define PRFCNT_MMU_L2_EN 0x07C /* (RW) Performance counter enable flags for MMU/L2 cache */ - #define CYCLE_COUNT_LO 0x090 /* (RO) Cycle counter, low word */ #define CYCLE_COUNT_HI 0x094 /* (RO) Cycle counter, high word */ #define TIMESTAMP_LO 0x098 /* (RO) Global time stamp counter, low word */ @@ -235,9 +232,17 @@ #define PRFCNT_SAMPLE_COMPLETED (1 << 16) /* Set when a performance count sample has completed. */ #define CLEAN_CACHES_COMPLETED (1 << 17) /* Set when a cache clean operation has completed. */ -#define GPU_IRQ_REG_ALL (GPU_FAULT | MULTIPLE_GPU_FAULTS | RESET_COMPLETED \ +/* Include POWER_CHANGED_SINGLE in debug builds for use in irq latency test. + */ +#define GPU_IRQ_REG_COMMON (GPU_FAULT | MULTIPLE_GPU_FAULTS | RESET_COMPLETED \ | POWER_CHANGED_ALL | PRFCNT_SAMPLE_COMPLETED) +#ifdef CONFIG_MALI_BIFROST_DEBUG +#define GPU_IRQ_REG_ALL (GPU_IRQ_REG_COMMON | POWER_CHANGED_SINGLE) +#else /* CONFIG_MALI_BIFROST_DEBUG */ +#define GPU_IRQ_REG_ALL (GPU_IRQ_REG_COMMON) +#endif /* CONFIG_MALI_BIFROST_DEBUG */ + /* * MMU_IRQ_RAWSTAT register values. Values are valid also for * MMU_IRQ_CLEAR, MMU_IRQ_MASK, MMU_IRQ_STATUS registers. @@ -281,11 +286,25 @@ #define AS_FAULTSTATUS_EXCEPTION_CODE_ADDRESS_SIZE_FAULT (0x4<<3) #define AS_FAULTSTATUS_EXCEPTION_CODE_MEMORY_ATTRIBUTES_FAULT (0x5<<3) -#define AS_FAULTSTATUS_ACCESS_TYPE_MASK (0x3<<8) -#define AS_FAULTSTATUS_ACCESS_TYPE_ATOMIC (0x0<<8) -#define AS_FAULTSTATUS_ACCESS_TYPE_EX (0x1<<8) -#define AS_FAULTSTATUS_ACCESS_TYPE_READ (0x2<<8) -#define AS_FAULTSTATUS_ACCESS_TYPE_WRITE (0x3<<8) +#define AS_FAULTSTATUS_EXCEPTION_TYPE_SHIFT 0 +#define AS_FAULTSTATUS_EXCEPTION_TYPE_MASK (0xFF << AS_FAULTSTATUS_EXCEPTION_TYPE_SHIFT) +#define AS_FAULTSTATUS_EXCEPTION_TYPE_GET(reg_val) \ + (((reg_val)&AS_FAULTSTATUS_EXCEPTION_TYPE_MASK) >> AS_FAULTSTATUS_EXCEPTION_TYPE_SHIFT) + +#define AS_FAULTSTATUS_ACCESS_TYPE_SHIFT 8 +#define AS_FAULTSTATUS_ACCESS_TYPE_MASK (0x3 << AS_FAULTSTATUS_ACCESS_TYPE_SHIFT) +#define AS_FAULTSTATUS_ACCESS_TYPE_GET(reg_val) \ + (((reg_val)&AS_FAULTSTATUS_ACCESS_TYPE_MASK) >> AS_FAULTSTATUS_ACCESS_TYPE_SHIFT) + +#define AS_FAULTSTATUS_ACCESS_TYPE_ATOMIC (0x0) +#define AS_FAULTSTATUS_ACCESS_TYPE_EX (0x1) +#define AS_FAULTSTATUS_ACCESS_TYPE_READ (0x2) +#define AS_FAULTSTATUS_ACCESS_TYPE_WRITE (0x3) + +#define AS_FAULTSTATUS_SOURCE_ID_SHIFT 16 +#define AS_FAULTSTATUS_SOURCE_ID_MASK (0xFFFF << AS_FAULTSTATUS_SOURCE_ID_SHIFT) +#define AS_FAULTSTATUS_SOURCE_ID_GET(reg_val) \ + (((reg_val)&AS_FAULTSTATUS_SOURCE_ID_MASK) >> AS_FAULTSTATUS_SOURCE_ID_SHIFT) /* * Begin MMU TRANSCFG register values @@ -353,17 +372,6 @@ /* Inner write-alloc cache setup, no outer caching */ #define AS_MEMATTR_WRITE_ALLOC 0x8Dull -/* Set to implementation defined, outer caching */ -#define AS_MEMATTR_AARCH64_OUTER_IMPL_DEF 0x88ull -/* Set to write back memory, outer caching */ -#define AS_MEMATTR_AARCH64_OUTER_WA 0x8Dull -/* Set to inner non-cacheable, outer-non-cacheable - * Setting defined by the alloc bits is ignored, but set to a valid encoding: - * - no-alloc on read - * - no alloc on write - */ -#define AS_MEMATTR_AARCH64_NON_CACHEABLE 0x4Cull - /* Use GPU implementation-defined caching policy. */ #define AS_MEMATTR_LPAE_IMPL_DEF_CACHE_POLICY 0x48ull /* The attribute set to force all resources to be cached. */ @@ -380,24 +388,6 @@ */ #define AS_MEMATTR_LPAE_NON_CACHEABLE_RESERVED -/* Symbols for default MEMATTR to use - * Default is - HW implementation defined caching */ -#define AS_MEMATTR_INDEX_DEFAULT 0 -#define AS_MEMATTR_INDEX_DEFAULT_ACE 3 - -/* HW implementation defined caching */ -#define AS_MEMATTR_INDEX_IMPL_DEF_CACHE_POLICY 0 -/* Force cache on */ -#define AS_MEMATTR_INDEX_FORCE_TO_CACHE_ALL 1 -/* Write-alloc */ -#define AS_MEMATTR_INDEX_WRITE_ALLOC 2 -/* Outer coherent, inner implementation defined policy */ -#define AS_MEMATTR_INDEX_OUTER_IMPL_DEF 3 -/* Outer coherent, write alloc inner */ -#define AS_MEMATTR_INDEX_OUTER_WA 4 -/* Normal memory, inner non-cacheable, outer non-cacheable (ARMv8 mode only) */ -#define AS_MEMATTR_INDEX_NON_CACHEABLE 5 - /* L2_MMU_CONFIG register */ #define L2_MMU_CONFIG_ALLOW_SNOOP_DISPARITY_SHIFT (23) #define L2_MMU_CONFIG_ALLOW_SNOOP_DISPARITY (0x1 << L2_MMU_CONFIG_ALLOW_SNOOP_DISPARITY_SHIFT) @@ -440,5 +430,8 @@ #define L2_CONFIG_HASH_MASK (0xFFul << L2_CONFIG_HASH_SHIFT) /* End L2_CONFIG register */ +/* IDVS_GROUP register */ +#define IDVS_GROUP_SIZE_SHIFT (16) +#define IDVS_GROUP_MAX_SIZE (0x3F) -#endif /* _MIDG_REGMAP_H_ */ +#endif /* _KBASE_GPU_REGMAP_H_ */ diff --git a/drivers/gpu/arm/bifrost/jm/mali_base_jm_kernel.h b/drivers/gpu/arm/bifrost/jm/mali_base_jm_kernel.h new file mode 100644 index 000000000000..879a436152d9 --- /dev/null +++ b/drivers/gpu/arm/bifrost/jm/mali_base_jm_kernel.h @@ -0,0 +1,1001 @@ +/* + * + * (C) COPYRIGHT 2020 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ +#ifndef _BASE_JM_KERNEL_H_ +#define _BASE_JM_KERNEL_H_ + +/* Memory allocation, access/hint flags. + * + * See base_mem_alloc_flags. + */ + +/* IN */ +/* Read access CPU side + */ +#define BASE_MEM_PROT_CPU_RD ((base_mem_alloc_flags)1 << 0) + +/* Write access CPU side + */ +#define BASE_MEM_PROT_CPU_WR ((base_mem_alloc_flags)1 << 1) + +/* Read access GPU side + */ +#define BASE_MEM_PROT_GPU_RD ((base_mem_alloc_flags)1 << 2) + +/* Write access GPU side + */ +#define BASE_MEM_PROT_GPU_WR ((base_mem_alloc_flags)1 << 3) + +/* Execute allowed on the GPU side + */ +#define BASE_MEM_PROT_GPU_EX ((base_mem_alloc_flags)1 << 4) + +/* Will be permanently mapped in kernel space. + * Flag is only allowed on allocations originating from kbase. + */ +#define BASEP_MEM_PERMANENT_KERNEL_MAPPING ((base_mem_alloc_flags)1 << 5) + +/* The allocation will completely reside within the same 4GB chunk in the GPU + * virtual space. + * Since this flag is primarily required only for the TLS memory which will + * not be used to contain executable code and also not used for Tiler heap, + * it can't be used along with BASE_MEM_PROT_GPU_EX and TILER_ALIGN_TOP flags. + */ +#define BASE_MEM_GPU_VA_SAME_4GB_PAGE ((base_mem_alloc_flags)1 << 6) + +/* Userspace is not allowed to free this memory. + * Flag is only allowed on allocations originating from kbase. + */ +#define BASEP_MEM_NO_USER_FREE ((base_mem_alloc_flags)1 << 7) + +#define BASE_MEM_RESERVED_BIT_8 ((base_mem_alloc_flags)1 << 8) + +/* Grow backing store on GPU Page Fault + */ +#define BASE_MEM_GROW_ON_GPF ((base_mem_alloc_flags)1 << 9) + +/* Page coherence Outer shareable, if available + */ +#define BASE_MEM_COHERENT_SYSTEM ((base_mem_alloc_flags)1 << 10) + +/* Page coherence Inner shareable + */ +#define BASE_MEM_COHERENT_LOCAL ((base_mem_alloc_flags)1 << 11) + +/* Should be cached on the CPU + */ +#define BASE_MEM_CACHED_CPU ((base_mem_alloc_flags)1 << 12) + +/* IN/OUT */ +/* Must have same VA on both the GPU and the CPU + */ +#define BASE_MEM_SAME_VA ((base_mem_alloc_flags)1 << 13) + +/* OUT */ +/* Must call mmap to acquire a GPU address for the allocation + */ +#define BASE_MEM_NEED_MMAP ((base_mem_alloc_flags)1 << 14) + +/* IN */ +/* Page coherence Outer shareable, required. + */ +#define BASE_MEM_COHERENT_SYSTEM_REQUIRED ((base_mem_alloc_flags)1 << 15) + +/* Protected memory + */ +#define BASE_MEM_PROTECTED ((base_mem_alloc_flags)1 << 16) + +/* Not needed physical memory + */ +#define BASE_MEM_DONT_NEED ((base_mem_alloc_flags)1 << 17) + +/* Must use shared CPU/GPU zone (SAME_VA zone) but doesn't require the + * addresses to be the same + */ +#define BASE_MEM_IMPORT_SHARED ((base_mem_alloc_flags)1 << 18) + +/** + * Bit 19 is reserved. + * + * Do not remove, use the next unreserved bit for new flags + */ +#define BASE_MEM_RESERVED_BIT_19 ((base_mem_alloc_flags)1 << 19) + +/** + * Memory starting from the end of the initial commit is aligned to 'extent' + * pages, where 'extent' must be a power of 2 and no more than + * BASE_MEM_TILER_ALIGN_TOP_EXTENT_MAX_PAGES + */ +#define BASE_MEM_TILER_ALIGN_TOP ((base_mem_alloc_flags)1 << 20) + +/* Should be uncached on the GPU, will work only for GPUs using AARCH64 mmu + * mode. Some components within the GPU might only be able to access memory + * that is GPU cacheable. Refer to the specific GPU implementation for more + * details. The 3 shareability flags will be ignored for GPU uncached memory. + * If used while importing USER_BUFFER type memory, then the import will fail + * if the memory is not aligned to GPU and CPU cache line width. + */ +#define BASE_MEM_UNCACHED_GPU ((base_mem_alloc_flags)1 << 21) + +/* + * Bits [22:25] for group_id (0~15). + * + * base_mem_group_id_set() should be used to pack a memory group ID into a + * base_mem_alloc_flags value instead of accessing the bits directly. + * base_mem_group_id_get() should be used to extract the memory group ID from + * a base_mem_alloc_flags value. + */ +#define BASEP_MEM_GROUP_ID_SHIFT 22 +#define BASE_MEM_GROUP_ID_MASK \ + ((base_mem_alloc_flags)0xF << BASEP_MEM_GROUP_ID_SHIFT) + +/* Must do CPU cache maintenance when imported memory is mapped/unmapped + * on GPU. Currently applicable to dma-buf type only. + */ +#define BASE_MEM_IMPORT_SYNC_ON_MAP_UNMAP ((base_mem_alloc_flags)1 << 26) + +/* Use the GPU VA chosen by the kernel client */ +#define BASE_MEM_FLAG_MAP_FIXED ((base_mem_alloc_flags)1 << 27) + +/* Number of bits used as flags for base memory management + * + * Must be kept in sync with the base_mem_alloc_flags flags + */ +#define BASE_MEM_FLAGS_NR_BITS 28 + +/* A mask of all the flags which are only valid for allocations within kbase, + * and may not be passed from user space. + */ +#define BASEP_MEM_FLAGS_KERNEL_ONLY \ + (BASEP_MEM_PERMANENT_KERNEL_MAPPING | BASEP_MEM_NO_USER_FREE | \ + BASE_MEM_FLAG_MAP_FIXED) + +/* A mask for all output bits, excluding IN/OUT bits. + */ +#define BASE_MEM_FLAGS_OUTPUT_MASK BASE_MEM_NEED_MMAP + +/* A mask for all input bits, including IN/OUT bits. + */ +#define BASE_MEM_FLAGS_INPUT_MASK \ + (((1 << BASE_MEM_FLAGS_NR_BITS) - 1) & ~BASE_MEM_FLAGS_OUTPUT_MASK) + +/* A mask of all currently reserved flags + */ +#define BASE_MEM_FLAGS_RESERVED \ + (BASE_MEM_RESERVED_BIT_8 | BASE_MEM_RESERVED_BIT_19) + +#define BASEP_MEM_INVALID_HANDLE (0ull << 12) +#define BASE_MEM_MMU_DUMP_HANDLE (1ull << 12) +#define BASE_MEM_TRACE_BUFFER_HANDLE (2ull << 12) +#define BASE_MEM_MAP_TRACKING_HANDLE (3ull << 12) +#define BASEP_MEM_WRITE_ALLOC_PAGES_HANDLE (4ull << 12) +/* reserved handles ..-47< for future special handles */ +#define BASE_MEM_COOKIE_BASE (64ul << 12) +#define BASE_MEM_FIRST_FREE_ADDRESS ((BITS_PER_LONG << 12) + \ + BASE_MEM_COOKIE_BASE) + +/** + * typedef base_context_create_flags - Flags to pass to ::base_context_init. + * + * Flags can be ORed together to enable multiple things. + * + * These share the same space as BASEP_CONTEXT_FLAG_*, and so must + * not collide with them. + */ +typedef u32 base_context_create_flags; + +/* No flags set */ +#define BASE_CONTEXT_CREATE_FLAG_NONE ((base_context_create_flags)0) + +/* Base context is embedded in a cctx object (flag used for CINSTR + * software counter macros) + */ +#define BASE_CONTEXT_CCTX_EMBEDDED ((base_context_create_flags)1 << 0) + +/* Base context is a 'System Monitor' context for Hardware counters. + * + * One important side effect of this is that job submission is disabled. + */ +#define BASE_CONTEXT_SYSTEM_MONITOR_SUBMIT_DISABLED \ + ((base_context_create_flags)1 << 1) + +/* Bit-shift used to encode a memory group ID in base_context_create_flags + */ +#define BASEP_CONTEXT_MMU_GROUP_ID_SHIFT (3) + +/* Bitmask used to encode a memory group ID in base_context_create_flags + */ +#define BASEP_CONTEXT_MMU_GROUP_ID_MASK \ + ((base_context_create_flags)0xF << BASEP_CONTEXT_MMU_GROUP_ID_SHIFT) + +/* Bitpattern describing the base_context_create_flags that can be + * passed to the kernel + */ +#define BASEP_CONTEXT_CREATE_KERNEL_FLAGS \ + (BASE_CONTEXT_SYSTEM_MONITOR_SUBMIT_DISABLED | \ + BASEP_CONTEXT_MMU_GROUP_ID_MASK) + +/* Bitpattern describing the ::base_context_create_flags that can be + * passed to base_context_init() + */ +#define BASEP_CONTEXT_CREATE_ALLOWED_FLAGS \ + (BASE_CONTEXT_CCTX_EMBEDDED | BASEP_CONTEXT_CREATE_KERNEL_FLAGS) + +/* + * Private flags used on the base context + * + * These start at bit 31, and run down to zero. + * + * They share the same space as base_context_create_flags, and so must + * not collide with them. + */ + +/* Private flag tracking whether job descriptor dumping is disabled */ +#define BASEP_CONTEXT_FLAG_JOB_DUMP_DISABLED \ + ((base_context_create_flags)(1 << 31)) + +/* Enable additional tracepoints for latency measurements (TL_ATOM_READY, + * TL_ATOM_DONE, TL_ATOM_PRIO_CHANGE, TL_ATOM_EVENT_POST) + */ +#define BASE_TLSTREAM_ENABLE_LATENCY_TRACEPOINTS (1 << 0) + +/* Indicate that job dumping is enabled. This could affect certain timers + * to account for the performance impact. + */ +#define BASE_TLSTREAM_JOB_DUMPING_ENABLED (1 << 1) + +#define BASE_TLSTREAM_FLAGS_MASK (BASE_TLSTREAM_ENABLE_LATENCY_TRACEPOINTS | \ + BASE_TLSTREAM_JOB_DUMPING_ENABLED) +/* + * Dependency stuff, keep it private for now. May want to expose it if + * we decide to make the number of semaphores a configurable + * option. + */ +#define BASE_JD_ATOM_COUNT 256 + +/* Maximum number of concurrent render passes. + */ +#define BASE_JD_RP_COUNT (256) + +/* Set/reset values for a software event */ +#define BASE_JD_SOFT_EVENT_SET ((unsigned char)1) +#define BASE_JD_SOFT_EVENT_RESET ((unsigned char)0) + +/** + * struct base_jd_udata - Per-job data + * + * This structure is used to store per-job data, and is completely unused + * by the Base driver. It can be used to store things such as callback + * function pointer, data to handle job completion. It is guaranteed to be + * untouched by the Base driver. + * + * @blob: per-job data array + */ +struct base_jd_udata { + u64 blob[2]; +}; + +/** + * typedef base_jd_dep_type - Job dependency type. + * + * A flags field will be inserted into the atom structure to specify whether a + * dependency is a data or ordering dependency (by putting it before/after + * 'core_req' in the structure it should be possible to add without changing + * the structure size). + * When the flag is set for a particular dependency to signal that it is an + * ordering only dependency then errors will not be propagated. + */ +typedef u8 base_jd_dep_type; + +#define BASE_JD_DEP_TYPE_INVALID (0) /**< Invalid dependency */ +#define BASE_JD_DEP_TYPE_DATA (1U << 0) /**< Data dependency */ +#define BASE_JD_DEP_TYPE_ORDER (1U << 1) /**< Order dependency */ + +/** + * typedef base_jd_core_req - Job chain hardware requirements. + * + * A job chain must specify what GPU features it needs to allow the + * driver to schedule the job correctly. By not specifying the + * correct settings can/will cause an early job termination. Multiple + * values can be ORed together to specify multiple requirements. + * Special case is ::BASE_JD_REQ_DEP, which is used to express complex + * dependencies, and that doesn't execute anything on the hardware. + */ +typedef u32 base_jd_core_req; + +/* Requirements that come from the HW */ + +/* No requirement, dependency only + */ +#define BASE_JD_REQ_DEP ((base_jd_core_req)0) + +/* Requires fragment shaders + */ +#define BASE_JD_REQ_FS ((base_jd_core_req)1 << 0) + +/* Requires compute shaders + * + * This covers any of the following GPU job types: + * - Vertex Shader Job + * - Geometry Shader Job + * - An actual Compute Shader Job + * + * Compare this with BASE_JD_REQ_ONLY_COMPUTE, which specifies that the + * job is specifically just the "Compute Shader" job type, and not the "Vertex + * Shader" nor the "Geometry Shader" job type. + */ +#define BASE_JD_REQ_CS ((base_jd_core_req)1 << 1) + +/* Requires tiling */ +#define BASE_JD_REQ_T ((base_jd_core_req)1 << 2) + +/* Requires cache flushes */ +#define BASE_JD_REQ_CF ((base_jd_core_req)1 << 3) + +/* Requires value writeback */ +#define BASE_JD_REQ_V ((base_jd_core_req)1 << 4) + +/* SW-only requirements - the HW does not expose these as part of the job slot + * capabilities + */ + +/* Requires fragment job with AFBC encoding */ +#define BASE_JD_REQ_FS_AFBC ((base_jd_core_req)1 << 13) + +/* SW-only requirement: coalesce completion events. + * If this bit is set then completion of this atom will not cause an event to + * be sent to userspace, whether successful or not; completion events will be + * deferred until an atom completes which does not have this bit set. + * + * This bit may not be used in combination with BASE_JD_REQ_EXTERNAL_RESOURCES. + */ +#define BASE_JD_REQ_EVENT_COALESCE ((base_jd_core_req)1 << 5) + +/* SW Only requirement: the job chain requires a coherent core group. We don't + * mind which coherent core group is used. + */ +#define BASE_JD_REQ_COHERENT_GROUP ((base_jd_core_req)1 << 6) + +/* SW Only requirement: The performance counters should be enabled only when + * they are needed, to reduce power consumption. + */ +#define BASE_JD_REQ_PERMON ((base_jd_core_req)1 << 7) + +/* SW Only requirement: External resources are referenced by this atom. + * + * This bit may not be used in combination with BASE_JD_REQ_EVENT_COALESCE and + * BASE_JD_REQ_SOFT_EVENT_WAIT. + */ +#define BASE_JD_REQ_EXTERNAL_RESOURCES ((base_jd_core_req)1 << 8) + +/* SW Only requirement: Software defined job. Jobs with this bit set will not be + * submitted to the hardware but will cause some action to happen within the + * driver + */ +#define BASE_JD_REQ_SOFT_JOB ((base_jd_core_req)1 << 9) + +#define BASE_JD_REQ_SOFT_DUMP_CPU_GPU_TIME (BASE_JD_REQ_SOFT_JOB | 0x1) +#define BASE_JD_REQ_SOFT_FENCE_TRIGGER (BASE_JD_REQ_SOFT_JOB | 0x2) +#define BASE_JD_REQ_SOFT_FENCE_WAIT (BASE_JD_REQ_SOFT_JOB | 0x3) + +/* 0x4 RESERVED for now */ + +/* SW only requirement: event wait/trigger job. + * + * - BASE_JD_REQ_SOFT_EVENT_WAIT: this job will block until the event is set. + * - BASE_JD_REQ_SOFT_EVENT_SET: this job sets the event, thus unblocks the + * other waiting jobs. It completes immediately. + * - BASE_JD_REQ_SOFT_EVENT_RESET: this job resets the event, making it + * possible for other jobs to wait upon. It completes immediately. + */ +#define BASE_JD_REQ_SOFT_EVENT_WAIT (BASE_JD_REQ_SOFT_JOB | 0x5) +#define BASE_JD_REQ_SOFT_EVENT_SET (BASE_JD_REQ_SOFT_JOB | 0x6) +#define BASE_JD_REQ_SOFT_EVENT_RESET (BASE_JD_REQ_SOFT_JOB | 0x7) + +#define BASE_JD_REQ_SOFT_DEBUG_COPY (BASE_JD_REQ_SOFT_JOB | 0x8) + +/* SW only requirement: Just In Time allocation + * + * This job requests a single or multiple just-in-time allocations through a + * list of base_jit_alloc_info structure which is passed via the jc element of + * the atom. The number of base_jit_alloc_info structures present in the + * list is passed via the nr_extres element of the atom + * + * It should be noted that the id entry in base_jit_alloc_info must not + * be reused until it has been released via BASE_JD_REQ_SOFT_JIT_FREE. + * + * Should this soft job fail it is expected that a BASE_JD_REQ_SOFT_JIT_FREE + * soft job to free the JIT allocation is still made. + * + * The job will complete immediately. + */ +#define BASE_JD_REQ_SOFT_JIT_ALLOC (BASE_JD_REQ_SOFT_JOB | 0x9) + +/* SW only requirement: Just In Time free + * + * This job requests a single or multiple just-in-time allocations created by + * BASE_JD_REQ_SOFT_JIT_ALLOC to be freed. The ID list of the just-in-time + * allocations is passed via the jc element of the atom. + * + * The job will complete immediately. + */ +#define BASE_JD_REQ_SOFT_JIT_FREE (BASE_JD_REQ_SOFT_JOB | 0xa) + +/* SW only requirement: Map external resource + * + * This job requests external resource(s) are mapped once the dependencies + * of the job have been satisfied. The list of external resources are + * passed via the jc element of the atom which is a pointer to a + * base_external_resource_list. + */ +#define BASE_JD_REQ_SOFT_EXT_RES_MAP (BASE_JD_REQ_SOFT_JOB | 0xb) + +/* SW only requirement: Unmap external resource + * + * This job requests external resource(s) are unmapped once the dependencies + * of the job has been satisfied. The list of external resources are + * passed via the jc element of the atom which is a pointer to a + * base_external_resource_list. + */ +#define BASE_JD_REQ_SOFT_EXT_RES_UNMAP (BASE_JD_REQ_SOFT_JOB | 0xc) + +/* HW Requirement: Requires Compute shaders (but not Vertex or Geometry Shaders) + * + * This indicates that the Job Chain contains GPU jobs of the 'Compute + * Shaders' type. + * + * In contrast to BASE_JD_REQ_CS, this does not indicate that the Job + * Chain contains 'Geometry Shader' or 'Vertex Shader' jobs. + */ +#define BASE_JD_REQ_ONLY_COMPUTE ((base_jd_core_req)1 << 10) + +/* HW Requirement: Use the base_jd_atom::device_nr field to specify a + * particular core group + * + * If both BASE_JD_REQ_COHERENT_GROUP and this flag are set, this flag + * takes priority + * + * This is only guaranteed to work for BASE_JD_REQ_ONLY_COMPUTE atoms. + * + * If the core availability policy is keeping the required core group turned + * off, then the job will fail with a BASE_JD_EVENT_PM_EVENT error code. + */ +#define BASE_JD_REQ_SPECIFIC_COHERENT_GROUP ((base_jd_core_req)1 << 11) + +/* SW Flag: If this bit is set then the successful completion of this atom + * will not cause an event to be sent to userspace + */ +#define BASE_JD_REQ_EVENT_ONLY_ON_FAILURE ((base_jd_core_req)1 << 12) + +/* SW Flag: If this bit is set then completion of this atom will not cause an + * event to be sent to userspace, whether successful or not. + */ +#define BASEP_JD_REQ_EVENT_NEVER ((base_jd_core_req)1 << 14) + +/* SW Flag: Skip GPU cache clean and invalidation before starting a GPU job. + * + * If this bit is set then the GPU's cache will not be cleaned and invalidated + * until a GPU job starts which does not have this bit set or a job completes + * which does not have the BASE_JD_REQ_SKIP_CACHE_END bit set. Do not use + * if the CPU may have written to memory addressed by the job since the last job + * without this bit set was submitted. + */ +#define BASE_JD_REQ_SKIP_CACHE_START ((base_jd_core_req)1 << 15) + +/* SW Flag: Skip GPU cache clean and invalidation after a GPU job completes. + * + * If this bit is set then the GPU's cache will not be cleaned and invalidated + * until a GPU job completes which does not have this bit set or a job starts + * which does not have the BASE_JD_REQ_SKIP_CACHE_START bit set. Do not use + * if the CPU may read from or partially overwrite memory addressed by the job + * before the next job without this bit set completes. + */ +#define BASE_JD_REQ_SKIP_CACHE_END ((base_jd_core_req)1 << 16) + +/* Request the atom be executed on a specific job slot. + * + * When this flag is specified, it takes precedence over any existing job slot + * selection logic. + */ +#define BASE_JD_REQ_JOB_SLOT ((base_jd_core_req)1 << 17) + +/* SW-only requirement: The atom is the start of a renderpass. + * + * If this bit is set then the job chain will be soft-stopped if it causes the + * GPU to write beyond the end of the physical pages backing the tiler heap, and + * committing more memory to the heap would exceed an internal threshold. It may + * be resumed after running one of the job chains attached to an atom with + * BASE_JD_REQ_END_RENDERPASS set and the same renderpass ID. It may be + * resumed multiple times until it completes without memory usage exceeding the + * threshold. + * + * Usually used with BASE_JD_REQ_T. + */ +#define BASE_JD_REQ_START_RENDERPASS ((base_jd_core_req)1 << 18) + +/* SW-only requirement: The atom is the end of a renderpass. + * + * If this bit is set then the atom incorporates the CPU address of a + * base_jd_fragment object instead of the GPU address of a job chain. + * + * Which job chain is run depends upon whether the atom with the same renderpass + * ID and the BASE_JD_REQ_START_RENDERPASS bit set completed normally or + * was soft-stopped when it exceeded an upper threshold for tiler heap memory + * usage. + * + * It also depends upon whether one of the job chains attached to the atom has + * already been run as part of the same renderpass (in which case it would have + * written unresolved multisampled and otherwise-discarded output to temporary + * buffers that need to be read back). The job chain for doing a forced read and + * forced write (from/to temporary buffers) is run as many times as necessary. + * + * Usually used with BASE_JD_REQ_FS. + */ +#define BASE_JD_REQ_END_RENDERPASS ((base_jd_core_req)1 << 19) + +/* These requirement bits are currently unused in base_jd_core_req + */ +#define BASEP_JD_REQ_RESERVED \ + (~(BASE_JD_REQ_ATOM_TYPE | BASE_JD_REQ_EXTERNAL_RESOURCES | \ + BASE_JD_REQ_EVENT_ONLY_ON_FAILURE | BASEP_JD_REQ_EVENT_NEVER | \ + BASE_JD_REQ_EVENT_COALESCE | \ + BASE_JD_REQ_COHERENT_GROUP | BASE_JD_REQ_SPECIFIC_COHERENT_GROUP | \ + BASE_JD_REQ_FS_AFBC | BASE_JD_REQ_PERMON | \ + BASE_JD_REQ_SKIP_CACHE_START | BASE_JD_REQ_SKIP_CACHE_END | \ + BASE_JD_REQ_JOB_SLOT | BASE_JD_REQ_START_RENDERPASS | \ + BASE_JD_REQ_END_RENDERPASS)) + +/* Mask of all bits in base_jd_core_req that control the type of the atom. + * + * This allows dependency only atoms to have flags set + */ +#define BASE_JD_REQ_ATOM_TYPE \ + (BASE_JD_REQ_FS | BASE_JD_REQ_CS | BASE_JD_REQ_T | BASE_JD_REQ_CF | \ + BASE_JD_REQ_V | BASE_JD_REQ_SOFT_JOB | BASE_JD_REQ_ONLY_COMPUTE) + +/** + * Mask of all bits in base_jd_core_req that control the type of a soft job. + */ +#define BASE_JD_REQ_SOFT_JOB_TYPE (BASE_JD_REQ_SOFT_JOB | 0x1f) + +/* Returns non-zero value if core requirements passed define a soft job or + * a dependency only job. + */ +#define BASE_JD_REQ_SOFT_JOB_OR_DEP(core_req) \ + (((core_req) & BASE_JD_REQ_SOFT_JOB) || \ + ((core_req) & BASE_JD_REQ_ATOM_TYPE) == BASE_JD_REQ_DEP) + +/** + * enum kbase_jd_atom_state + * + * @KBASE_JD_ATOM_STATE_UNUSED: Atom is not used. + * @KBASE_JD_ATOM_STATE_QUEUED: Atom is queued in JD. + * @KBASE_JD_ATOM_STATE_IN_JS: Atom has been given to JS (is runnable/running). + * @KBASE_JD_ATOM_STATE_HW_COMPLETED: Atom has been completed, but not yet + * handed back to job dispatcher for + * dependency resolution. + * @KBASE_JD_ATOM_STATE_COMPLETED: Atom has been completed, but not yet handed + * back to userspace. + */ +enum kbase_jd_atom_state { + KBASE_JD_ATOM_STATE_UNUSED, + KBASE_JD_ATOM_STATE_QUEUED, + KBASE_JD_ATOM_STATE_IN_JS, + KBASE_JD_ATOM_STATE_HW_COMPLETED, + KBASE_JD_ATOM_STATE_COMPLETED +}; + +/** + * typedef base_atom_id - Type big enough to store an atom number in. + */ +typedef u8 base_atom_id; + +/** + * struct base_dependency - + * + * @atom_id: An atom number + * @dependency_type: Dependency type + */ +struct base_dependency { + base_atom_id atom_id; + base_jd_dep_type dependency_type; +}; + +/** + * struct base_jd_fragment - Set of GPU fragment job chains used for rendering. + * + * @norm_read_norm_write: Job chain for full rendering. + * GPU address of a fragment job chain to render in the + * circumstance where the tiler job chain did not exceed + * its memory usage threshold and no fragment job chain + * was previously run for the same renderpass. + * It is used no more than once per renderpass. + * @norm_read_forced_write: Job chain for starting incremental + * rendering. + * GPU address of a fragment job chain to render in + * the circumstance where the tiler job chain exceeded + * its memory usage threshold for the first time and + * no fragment job chain was previously run for the + * same renderpass. + * Writes unresolved multisampled and normally- + * discarded output to temporary buffers that must be + * read back by a subsequent forced_read job chain + * before the renderpass is complete. + * It is used no more than once per renderpass. + * @forced_read_forced_write: Job chain for continuing incremental + * rendering. + * GPU address of a fragment job chain to render in + * the circumstance where the tiler job chain + * exceeded its memory usage threshold again + * and a fragment job chain was previously run for + * the same renderpass. + * Reads unresolved multisampled and + * normally-discarded output from temporary buffers + * written by a previous forced_write job chain and + * writes the same to temporary buffers again. + * It is used as many times as required until + * rendering completes. + * @forced_read_norm_write: Job chain for ending incremental rendering. + * GPU address of a fragment job chain to render in the + * circumstance where the tiler job chain did not + * exceed its memory usage threshold this time and a + * fragment job chain was previously run for the same + * renderpass. + * Reads unresolved multisampled and normally-discarded + * output from temporary buffers written by a previous + * forced_write job chain in order to complete a + * renderpass. + * It is used no more than once per renderpass. + * + * This structure is referenced by the main atom structure if + * BASE_JD_REQ_END_RENDERPASS is set in the base_jd_core_req. + */ +struct base_jd_fragment { + u64 norm_read_norm_write; + u64 norm_read_forced_write; + u64 forced_read_forced_write; + u64 forced_read_norm_write; +}; + +/** + * typedef base_jd_prio - Base Atom priority. + * + * Only certain priority levels are actually implemented, as specified by the + * BASE_JD_PRIO_<...> definitions below. It is undefined to use a priority + * level that is not one of those defined below. + * + * Priority levels only affect scheduling after the atoms have had dependencies + * resolved. For example, a low priority atom that has had its dependencies + * resolved might run before a higher priority atom that has not had its + * dependencies resolved. + * + * In general, fragment atoms do not affect non-fragment atoms with + * lower priorities, and vice versa. One exception is that there is only one + * priority value for each context. So a high-priority (e.g.) fragment atom + * could increase its context priority, causing its non-fragment atoms to also + * be scheduled sooner. + * + * The atoms are scheduled as follows with respect to their priorities: + * * Let atoms 'X' and 'Y' be for the same job slot who have dependencies + * resolved, and atom 'X' has a higher priority than atom 'Y' + * * If atom 'Y' is currently running on the HW, then it is interrupted to + * allow atom 'X' to run soon after + * * If instead neither atom 'Y' nor atom 'X' are running, then when choosing + * the next atom to run, atom 'X' will always be chosen instead of atom 'Y' + * * Any two atoms that have the same priority could run in any order with + * respect to each other. That is, there is no ordering constraint between + * atoms of the same priority. + * + * The sysfs file 'js_ctx_scheduling_mode' is used to control how atoms are + * scheduled between contexts. The default value, 0, will cause higher-priority + * atoms to be scheduled first, regardless of their context. The value 1 will + * use a round-robin algorithm when deciding which context's atoms to schedule + * next, so higher-priority atoms can only preempt lower priority atoms within + * the same context. See KBASE_JS_SYSTEM_PRIORITY_MODE and + * KBASE_JS_PROCESS_LOCAL_PRIORITY_MODE for more details. + */ +typedef u8 base_jd_prio; + +/* Medium atom priority. This is a priority higher than BASE_JD_PRIO_LOW */ +#define BASE_JD_PRIO_MEDIUM ((base_jd_prio)0) +/* High atom priority. This is a priority higher than BASE_JD_PRIO_MEDIUM and + * BASE_JD_PRIO_LOW + */ +#define BASE_JD_PRIO_HIGH ((base_jd_prio)1) +/* Low atom priority. */ +#define BASE_JD_PRIO_LOW ((base_jd_prio)2) + +/* Count of the number of priority levels. This itself is not a valid + * base_jd_prio setting + */ +#define BASE_JD_NR_PRIO_LEVELS 3 + +/** + * struct base_jd_atom_v2 - Node of a dependency graph used to submit a + * GPU job chain or soft-job to the kernel driver. + * + * @jc: GPU address of a job chain or (if BASE_JD_REQ_END_RENDERPASS + * is set in the base_jd_core_req) the CPU address of a + * base_jd_fragment object. + * @udata: User data. + * @extres_list: List of external resources. + * @nr_extres: Number of external resources or JIT allocations. + * @jit_id: Zero-terminated array of IDs of just-in-time memory + * allocations written to by the atom. When the atom + * completes, the value stored at the + * &struct_base_jit_alloc_info.heap_info_gpu_addr of + * each allocation is read in order to enforce an + * overall physical memory usage limit. + * @pre_dep: Pre-dependencies. One need to use SETTER function to assign + * this field; this is done in order to reduce possibility of + * improper assignment of a dependency field. + * @atom_number: Unique number to identify the atom. + * @prio: Atom priority. Refer to base_jd_prio for more details. + * @device_nr: Core group when BASE_JD_REQ_SPECIFIC_COHERENT_GROUP + * specified. + * @jobslot: Job slot to use when BASE_JD_REQ_JOB_SLOT is specified. + * @core_req: Core requirements. + * @renderpass_id: Renderpass identifier used to associate an atom that has + * BASE_JD_REQ_START_RENDERPASS set in its core requirements + * with an atom that has BASE_JD_REQ_END_RENDERPASS set. + * @padding: Unused. Must be zero. + * + * This structure has changed since UK 10.2 for which base_jd_core_req was a + * u16 value. + * + * In UK 10.3 a core_req field of a u32 type was added to the end of the + * structure, and the place in the structure previously occupied by u16 + * core_req was kept but renamed to compat_core_req. + * + * From UK 11.20 - compat_core_req is now occupied by u8 jit_id[2]. + * Compatibility with UK 10.x from UK 11.y is not handled because + * the major version increase prevents this. + * + * For UK 11.20 jit_id[2] must be initialized to zero. + */ +struct base_jd_atom_v2 { + u64 jc; + struct base_jd_udata udata; + u64 extres_list; + u16 nr_extres; + u8 jit_id[2]; + struct base_dependency pre_dep[2]; + base_atom_id atom_number; + base_jd_prio prio; + u8 device_nr; + u8 jobslot; + base_jd_core_req core_req; + u8 renderpass_id; + u8 padding[7]; +}; + +/* Job chain event code bits + * Defines the bits used to create ::base_jd_event_code + */ +enum { + BASE_JD_SW_EVENT_KERNEL = (1u << 15), /* Kernel side event */ + BASE_JD_SW_EVENT = (1u << 14), /* SW defined event */ + /* Event indicates success (SW events only) */ + BASE_JD_SW_EVENT_SUCCESS = (1u << 13), + BASE_JD_SW_EVENT_JOB = (0u << 11), /* Job related event */ + BASE_JD_SW_EVENT_BAG = (1u << 11), /* Bag related event */ + BASE_JD_SW_EVENT_INFO = (2u << 11), /* Misc/info event */ + BASE_JD_SW_EVENT_RESERVED = (3u << 11), /* Reserved event type */ + /* Mask to extract the type from an event code */ + BASE_JD_SW_EVENT_TYPE_MASK = (3u << 11) +}; + +/** + * enum base_jd_event_code - Job chain event codes + * + * @BASE_JD_EVENT_RANGE_HW_NONFAULT_START: Start of hardware non-fault status + * codes. + * Obscurely, BASE_JD_EVENT_TERMINATED + * indicates a real fault, because the + * job was hard-stopped. + * @BASE_JD_EVENT_NOT_STARTED: Can't be seen by userspace, treated as + * 'previous job done'. + * @BASE_JD_EVENT_STOPPED: Can't be seen by userspace, becomes + * TERMINATED, DONE or JOB_CANCELLED. + * @BASE_JD_EVENT_TERMINATED: This is actually a fault status code - the job + * was hard stopped. + * @BASE_JD_EVENT_ACTIVE: Can't be seen by userspace, jobs only returned on + * complete/fail/cancel. + * @BASE_JD_EVENT_RANGE_HW_NONFAULT_END: End of hardware non-fault status codes. + * Obscurely, BASE_JD_EVENT_TERMINATED + * indicates a real fault, + * because the job was hard-stopped. + * @BASE_JD_EVENT_RANGE_HW_FAULT_OR_SW_ERROR_START: Start of hardware fault and + * software error status codes. + * @BASE_JD_EVENT_RANGE_HW_FAULT_OR_SW_ERROR_END: End of hardware fault and + * software error status codes. + * @BASE_JD_EVENT_RANGE_SW_SUCCESS_START: Start of software success status + * codes. + * @BASE_JD_EVENT_RANGE_SW_SUCCESS_END: End of software success status codes. + * @BASE_JD_EVENT_RANGE_KERNEL_ONLY_START: Start of kernel-only status codes. + * Such codes are never returned to + * user-space. + * @BASE_JD_EVENT_RANGE_KERNEL_ONLY_END: End of kernel-only status codes. + * + * HW and low-level SW events are represented by event codes. + * The status of jobs which succeeded are also represented by + * an event code (see @BASE_JD_EVENT_DONE). + * Events are usually reported as part of a &struct base_jd_event. + * + * The event codes are encoded in the following way: + * * 10:0 - subtype + * * 12:11 - type + * * 13 - SW success (only valid if the SW bit is set) + * * 14 - SW event (HW event if not set) + * * 15 - Kernel event (should never be seen in userspace) + * + * Events are split up into ranges as follows: + * * BASE_JD_EVENT_RANGE__START + * * BASE_JD_EVENT_RANGE__END + * + * code is in 's range when: + * BASE_JD_EVENT_RANGE__START <= code < + * BASE_JD_EVENT_RANGE__END + * + * Ranges can be asserted for adjacency by testing that the END of the previous + * is equal to the START of the next. This is useful for optimizing some tests + * for range. + * + * A limitation is that the last member of this enum must explicitly be handled + * (with an assert-unreachable statement) in switch statements that use + * variables of this type. Otherwise, the compiler warns that we have not + * handled that enum value. + */ +enum base_jd_event_code { + /* HW defined exceptions */ + BASE_JD_EVENT_RANGE_HW_NONFAULT_START = 0, + + /* non-fatal exceptions */ + BASE_JD_EVENT_NOT_STARTED = 0x00, + BASE_JD_EVENT_DONE = 0x01, + BASE_JD_EVENT_STOPPED = 0x03, + BASE_JD_EVENT_TERMINATED = 0x04, + BASE_JD_EVENT_ACTIVE = 0x08, + + BASE_JD_EVENT_RANGE_HW_NONFAULT_END = 0x40, + BASE_JD_EVENT_RANGE_HW_FAULT_OR_SW_ERROR_START = 0x40, + + /* job exceptions */ + BASE_JD_EVENT_JOB_CONFIG_FAULT = 0x40, + BASE_JD_EVENT_JOB_POWER_FAULT = 0x41, + BASE_JD_EVENT_JOB_READ_FAULT = 0x42, + BASE_JD_EVENT_JOB_WRITE_FAULT = 0x43, + BASE_JD_EVENT_JOB_AFFINITY_FAULT = 0x44, + BASE_JD_EVENT_JOB_BUS_FAULT = 0x48, + BASE_JD_EVENT_INSTR_INVALID_PC = 0x50, + BASE_JD_EVENT_INSTR_INVALID_ENC = 0x51, + BASE_JD_EVENT_INSTR_TYPE_MISMATCH = 0x52, + BASE_JD_EVENT_INSTR_OPERAND_FAULT = 0x53, + BASE_JD_EVENT_INSTR_TLS_FAULT = 0x54, + BASE_JD_EVENT_INSTR_BARRIER_FAULT = 0x55, + BASE_JD_EVENT_INSTR_ALIGN_FAULT = 0x56, + BASE_JD_EVENT_DATA_INVALID_FAULT = 0x58, + BASE_JD_EVENT_TILE_RANGE_FAULT = 0x59, + BASE_JD_EVENT_STATE_FAULT = 0x5A, + BASE_JD_EVENT_OUT_OF_MEMORY = 0x60, + BASE_JD_EVENT_UNKNOWN = 0x7F, + + /* GPU exceptions */ + BASE_JD_EVENT_DELAYED_BUS_FAULT = 0x80, + BASE_JD_EVENT_SHAREABILITY_FAULT = 0x88, + + /* MMU exceptions */ + BASE_JD_EVENT_TRANSLATION_FAULT_LEVEL1 = 0xC1, + BASE_JD_EVENT_TRANSLATION_FAULT_LEVEL2 = 0xC2, + BASE_JD_EVENT_TRANSLATION_FAULT_LEVEL3 = 0xC3, + BASE_JD_EVENT_TRANSLATION_FAULT_LEVEL4 = 0xC4, + BASE_JD_EVENT_PERMISSION_FAULT = 0xC8, + BASE_JD_EVENT_TRANSTAB_BUS_FAULT_LEVEL1 = 0xD1, + BASE_JD_EVENT_TRANSTAB_BUS_FAULT_LEVEL2 = 0xD2, + BASE_JD_EVENT_TRANSTAB_BUS_FAULT_LEVEL3 = 0xD3, + BASE_JD_EVENT_TRANSTAB_BUS_FAULT_LEVEL4 = 0xD4, + BASE_JD_EVENT_ACCESS_FLAG = 0xD8, + + /* SW defined exceptions */ + BASE_JD_EVENT_MEM_GROWTH_FAILED = + BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_JOB | 0x000, + BASE_JD_EVENT_TIMED_OUT = + BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_JOB | 0x001, + BASE_JD_EVENT_JOB_CANCELLED = + BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_JOB | 0x002, + BASE_JD_EVENT_JOB_INVALID = + BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_JOB | 0x003, + BASE_JD_EVENT_PM_EVENT = + BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_JOB | 0x004, + + BASE_JD_EVENT_BAG_INVALID = + BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_BAG | 0x003, + + BASE_JD_EVENT_RANGE_HW_FAULT_OR_SW_ERROR_END = BASE_JD_SW_EVENT | + BASE_JD_SW_EVENT_RESERVED | 0x3FF, + + BASE_JD_EVENT_RANGE_SW_SUCCESS_START = BASE_JD_SW_EVENT | + BASE_JD_SW_EVENT_SUCCESS | 0x000, + + BASE_JD_EVENT_PROGRESS_REPORT = BASE_JD_SW_EVENT | + BASE_JD_SW_EVENT_SUCCESS | BASE_JD_SW_EVENT_JOB | 0x000, + BASE_JD_EVENT_BAG_DONE = BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_SUCCESS | + BASE_JD_SW_EVENT_BAG | 0x000, + BASE_JD_EVENT_DRV_TERMINATED = BASE_JD_SW_EVENT | + BASE_JD_SW_EVENT_SUCCESS | BASE_JD_SW_EVENT_INFO | 0x000, + + BASE_JD_EVENT_RANGE_SW_SUCCESS_END = BASE_JD_SW_EVENT | + BASE_JD_SW_EVENT_SUCCESS | BASE_JD_SW_EVENT_RESERVED | 0x3FF, + + BASE_JD_EVENT_RANGE_KERNEL_ONLY_START = BASE_JD_SW_EVENT | + BASE_JD_SW_EVENT_KERNEL | 0x000, + BASE_JD_EVENT_REMOVED_FROM_NEXT = BASE_JD_SW_EVENT | + BASE_JD_SW_EVENT_KERNEL | BASE_JD_SW_EVENT_JOB | 0x000, + BASE_JD_EVENT_END_RP_DONE = BASE_JD_SW_EVENT | + BASE_JD_SW_EVENT_KERNEL | BASE_JD_SW_EVENT_JOB | 0x001, + + BASE_JD_EVENT_RANGE_KERNEL_ONLY_END = BASE_JD_SW_EVENT | + BASE_JD_SW_EVENT_KERNEL | BASE_JD_SW_EVENT_RESERVED | 0x3FF +}; + +/** + * struct base_jd_event_v2 - Event reporting structure + * + * @event_code: event code. + * @atom_number: the atom number that has completed. + * @udata: user data. + * + * This structure is used by the kernel driver to report information + * about GPU events. They can either be HW-specific events or low-level + * SW events, such as job-chain completion. + * + * The event code contains an event type field which can be extracted + * by ANDing with BASE_JD_SW_EVENT_TYPE_MASK. + */ +struct base_jd_event_v2 { + enum base_jd_event_code event_code; + base_atom_id atom_number; + struct base_jd_udata udata; +}; + +/** + * struct base_dump_cpu_gpu_counters - Structure for + * BASE_JD_REQ_SOFT_DUMP_CPU_GPU_COUNTERS + * jobs. + * + * This structure is stored into the memory pointed to by the @jc field + * of &struct base_jd_atom_v2. + * + * It must not occupy the same CPU cache line(s) as any neighboring data. + * This is to avoid cases where access to pages containing the structure + * is shared between cached and un-cached memory regions, which would + * cause memory corruption. + */ + +struct base_dump_cpu_gpu_counters { + u64 system_time; + u64 cycle_counter; + u64 sec; + u32 usec; + u8 padding[36]; +}; + +#endif /* _BASE_JM_KERNEL_H_ */ diff --git a/drivers/gpu/arm/bifrost/jm/mali_kbase_jm_defs.h b/drivers/gpu/arm/bifrost/jm/mali_kbase_jm_defs.h new file mode 100644 index 000000000000..5f767109a511 --- /dev/null +++ b/drivers/gpu/arm/bifrost/jm/mali_kbase_jm_defs.h @@ -0,0 +1,818 @@ +/* + * + * (C) COPYRIGHT 2020 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + + + +/* + * Definitions (types, defines, etcs) specific to Job Manager Kbase. + * They are placed here to allow the hierarchy of header files to work. + */ + +#ifndef _KBASE_JM_DEFS_H_ +#define _KBASE_JM_DEFS_H_ + +#include "mali_kbase_js_defs.h" + +/* Dump Job slot trace on error (only active if KBASE_KTRACE_ENABLE != 0) */ +#define KBASE_KTRACE_DUMP_ON_JOB_SLOT_ERROR 1 + +/* + * Number of milliseconds before resetting the GPU when a job cannot be "zapped" + * from the hardware. Note that the time is actually + * ZAP_TIMEOUT+SOFT_STOP_RESET_TIMEOUT between the context zap starting and + * the GPU actually being reset to give other contexts time for their jobs + * to be soft-stopped and removed from the hardware before resetting. + */ +#define ZAP_TIMEOUT 1000 + +/* + * Prevent soft-stops from occurring in scheduling situations + * + * This is not due to HW issues, but when scheduling is desired to be more + * predictable. + * + * Therefore, soft stop may still be disabled due to HW issues. + * + * Soft stop will still be used for non-scheduling purposes e.g. when + * terminating a context. + * + * if not in use, define this value to 0 instead of being undefined. + */ +#define KBASE_DISABLE_SCHEDULING_SOFT_STOPS 0 + +/* + * Prevent hard-stops from occurring in scheduling situations + * + * This is not due to HW issues, but when scheduling is desired to be more + * predictable. + * + * Hard stop will still be used for non-scheduling purposes e.g. when + * terminating a context. + * + * if not in use, define this value to 0 instead of being undefined. + */ +#define KBASE_DISABLE_SCHEDULING_HARD_STOPS 0 + +/* Atom has been previously soft-stopped */ +#define KBASE_KATOM_FLAG_BEEN_SOFT_STOPPED (1<<1) +/* Atom has been previously retried to execute */ +#define KBASE_KATOM_FLAGS_RERUN (1<<2) +/* Atom submitted with JOB_CHAIN_FLAG bit set in JS_CONFIG_NEXT register, helps + * to disambiguate short-running job chains during soft/hard stopping of jobs + */ +#define KBASE_KATOM_FLAGS_JOBCHAIN (1<<3) +/* Atom has been previously hard-stopped. */ +#define KBASE_KATOM_FLAG_BEEN_HARD_STOPPED (1<<4) +/* Atom has caused us to enter disjoint state */ +#define KBASE_KATOM_FLAG_IN_DISJOINT (1<<5) +/* Atom blocked on cross-slot dependency */ +#define KBASE_KATOM_FLAG_X_DEP_BLOCKED (1<<7) +/* Atom has fail dependency on cross-slot dependency */ +#define KBASE_KATOM_FLAG_FAIL_BLOCKER (1<<8) +/* Atom is currently in the list of atoms blocked on cross-slot dependencies */ +#define KBASE_KATOM_FLAG_JSCTX_IN_X_DEP_LIST (1<<9) +/* Atom is currently holding a context reference */ +#define KBASE_KATOM_FLAG_HOLDING_CTX_REF (1<<10) +/* Atom requires GPU to be in protected mode */ +#define KBASE_KATOM_FLAG_PROTECTED (1<<11) +/* Atom has been stored in runnable_tree */ +#define KBASE_KATOM_FLAG_JSCTX_IN_TREE (1<<12) +/* Atom is waiting for L2 caches to power up in order to enter protected mode */ +#define KBASE_KATOM_FLAG_HOLDING_L2_REF_PROT (1<<13) + +/* SW related flags about types of JS_COMMAND action + * NOTE: These must be masked off by JS_COMMAND_MASK + */ + +/* This command causes a disjoint event */ +#define JS_COMMAND_SW_CAUSES_DISJOINT 0x100 + +/* Bitmask of all SW related flags */ +#define JS_COMMAND_SW_BITS (JS_COMMAND_SW_CAUSES_DISJOINT) + +#if (JS_COMMAND_SW_BITS & JS_COMMAND_MASK) +#error "JS_COMMAND_SW_BITS not masked off by JS_COMMAND_MASK." \ + "Must update JS_COMMAND_SW_<..> bitmasks" +#endif + +/* Soft-stop command that causes a Disjoint event. This of course isn't + * entirely masked off by JS_COMMAND_MASK + */ +#define JS_COMMAND_SOFT_STOP_WITH_SW_DISJOINT \ + (JS_COMMAND_SW_CAUSES_DISJOINT | JS_COMMAND_SOFT_STOP) + +#define KBASEP_ATOM_ID_INVALID BASE_JD_ATOM_COUNT + +/* Serialize atoms within a slot (ie only one atom per job slot) */ +#define KBASE_SERIALIZE_INTRA_SLOT (1 << 0) +/* Serialize atoms between slots (ie only one job slot running at any time) */ +#define KBASE_SERIALIZE_INTER_SLOT (1 << 1) +/* Reset the GPU after each atom completion */ +#define KBASE_SERIALIZE_RESET (1 << 2) + +#ifdef CONFIG_DEBUG_FS +/** + * struct base_job_fault_event - keeps track of the atom which faulted or which + * completed after the faulty atom but before the + * debug data for faulty atom was dumped. + * + * @event_code: event code for the atom, should != BASE_JD_EVENT_DONE for + * the atom which faulted. + * @katom: pointer to the atom for which job fault occurred or which + * completed after the faulty atom. + * @job_fault_work: work item, queued only for the faulty atom, which waits for + * the dumping to get completed and then does the bottom half + * of job done for the atoms which followed the faulty atom. + * @head: List head used to store the atom in the global list of + * faulty atoms or context specific list of atoms which got + * completed during the dump. + * @reg_offset: offset of the register to be dumped next, only applicable + * for the faulty atom. + */ +struct base_job_fault_event { + + u32 event_code; + struct kbase_jd_atom *katom; + struct work_struct job_fault_work; + struct list_head head; + int reg_offset; +}; +#endif + +/** + * struct kbase_jd_atom_dependency - Contains the dependency info for an atom. + * @atom: pointer to the dependee atom. + * @dep_type: type of dependency on the dependee @atom, i.e. order or data + * dependency. BASE_JD_DEP_TYPE_INVALID indicates no dependency. + */ +struct kbase_jd_atom_dependency { + struct kbase_jd_atom *atom; + u8 dep_type; +}; + +/** + * kbase_jd_katom_dep_atom - Retrieves a read-only reference to the + * dependee atom. + * @dep: pointer to the dependency info structure. + * + * Return: readonly reference to dependee atom. + */ +static inline const struct kbase_jd_atom * +kbase_jd_katom_dep_atom(const struct kbase_jd_atom_dependency *dep) +{ + LOCAL_ASSERT(dep != NULL); + + return (const struct kbase_jd_atom *)(dep->atom); +} + +/** + * kbase_jd_katom_dep_type - Retrieves the dependency type info + * + * @dep: pointer to the dependency info structure. + * + * Return: the type of dependency there is on the dependee atom. + */ +static inline u8 kbase_jd_katom_dep_type( + const struct kbase_jd_atom_dependency *dep) +{ + LOCAL_ASSERT(dep != NULL); + + return dep->dep_type; +} + +/** + * kbase_jd_katom_dep_set - sets up the dependency info structure + * as per the values passed. + * @const_dep: pointer to the dependency info structure to be setup. + * @a: pointer to the dependee atom. + * @type: type of dependency there is on the dependee atom. + */ +static inline void kbase_jd_katom_dep_set( + const struct kbase_jd_atom_dependency *const_dep, + struct kbase_jd_atom *a, u8 type) +{ + struct kbase_jd_atom_dependency *dep; + + LOCAL_ASSERT(const_dep != NULL); + + dep = (struct kbase_jd_atom_dependency *)const_dep; + + dep->atom = a; + dep->dep_type = type; +} + +/** + * kbase_jd_katom_dep_clear - resets the dependency info structure + * + * @const_dep: pointer to the dependency info structure to be setup. + */ +static inline void kbase_jd_katom_dep_clear( + const struct kbase_jd_atom_dependency *const_dep) +{ + struct kbase_jd_atom_dependency *dep; + + LOCAL_ASSERT(const_dep != NULL); + + dep = (struct kbase_jd_atom_dependency *)const_dep; + + dep->atom = NULL; + dep->dep_type = BASE_JD_DEP_TYPE_INVALID; +} + +/** + * enum kbase_atom_gpu_rb_state - The state of an atom, pertinent after it + * becomes runnable, with respect to job slot + * ringbuffer/fifo. + * @KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB: Atom not currently present in slot fifo, + * which implies that either atom has not become + * runnable due to dependency or has completed + * the execution on GPU. + * @KBASE_ATOM_GPU_RB_WAITING_BLOCKED: Atom has been added to slot fifo but is + * blocked due to cross slot dependency, + * can't be submitted to GPU. + * @KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_PREV: Atom has been added to slot + * fifo but is waiting for the completion of + * previously added atoms in current & other + * slots, as their protected mode requirements + * do not match with the current atom. + * @KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_TRANSITION: Atom is in slot fifo + * and is waiting for completion of protected + * mode transition, needed before the atom is + * submitted to GPU. + * @KBASE_ATOM_GPU_RB_WAITING_FOR_CORE_AVAILABLE: Atom is in slot fifo but is + * waiting for the cores, which are needed to + * execute the job chain represented by the atom, + * to become available + * @KBASE_ATOM_GPU_RB_READY: Atom is in slot fifo and can be submitted to + * GPU. + * @KBASE_ATOM_GPU_RB_SUBMITTED: Atom is in slot fifo and has been submitted + * to GPU. + * @KBASE_ATOM_GPU_RB_RETURN_TO_JS: Atom must be returned to JS due to some + * failure, but only after the previously added + * atoms in fifo have completed or have also + * been returned to JS. + */ +enum kbase_atom_gpu_rb_state { + KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB, + KBASE_ATOM_GPU_RB_WAITING_BLOCKED, + KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_PREV, + KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_TRANSITION, + KBASE_ATOM_GPU_RB_WAITING_FOR_CORE_AVAILABLE, + KBASE_ATOM_GPU_RB_READY, + KBASE_ATOM_GPU_RB_SUBMITTED, + KBASE_ATOM_GPU_RB_RETURN_TO_JS = -1 +}; + +/** + * enum kbase_atom_enter_protected_state - The state of an atom with respect to + * the preparation for GPU's entry into protected mode, + * becomes pertinent only after atom's state with respect + * to slot ringbuffer is + * KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_TRANSITION + * @KBASE_ATOM_ENTER_PROTECTED_CHECK: Starting state. Check if there are any + * atoms currently submitted to GPU and protected mode + * transition is not already in progress. + * @KBASE_ATOM_ENTER_PROTECTED_HWCNT: Wait for hardware counter context to + * become disabled before entry into protected mode. + * @KBASE_ATOM_ENTER_PROTECTED_IDLE_L2: Wait for the L2 to become idle in + * preparation for the coherency change. L2 shall be + * powered down and GPU shall come out of fully + * coherent mode before entering protected mode. + * @KBASE_ATOM_ENTER_PROTECTED_SET_COHERENCY: Prepare coherency change; + * for BASE_HW_ISSUE_TGOX_R1_1234 also request L2 power on + * so that coherency register contains correct value when + * GPU enters protected mode. + * @KBASE_ATOM_ENTER_PROTECTED_FINISHED: End state; for + * BASE_HW_ISSUE_TGOX_R1_1234 check + * that L2 is powered up and switch GPU to protected mode. + */ +enum kbase_atom_enter_protected_state { + /* + * NOTE: The integer value of this must match + * KBASE_ATOM_EXIT_PROTECTED_CHECK. + */ + KBASE_ATOM_ENTER_PROTECTED_CHECK = 0, + KBASE_ATOM_ENTER_PROTECTED_HWCNT, + KBASE_ATOM_ENTER_PROTECTED_IDLE_L2, + KBASE_ATOM_ENTER_PROTECTED_SET_COHERENCY, + KBASE_ATOM_ENTER_PROTECTED_FINISHED, +}; + +/** + * enum kbase_atom_exit_protected_state - The state of an atom with respect to + * the preparation for GPU's exit from protected mode, + * becomes pertinent only after atom's state with respect + * to slot ngbuffer is + * KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_TRANSITION + * @KBASE_ATOM_EXIT_PROTECTED_CHECK: Starting state. Check if there are any + * atoms currently submitted to GPU and protected mode + * transition is not already in progress. + * @KBASE_ATOM_EXIT_PROTECTED_IDLE_L2: Wait for the L2 to become idle in + * preparation for the reset, as exiting protected mode + * requires a reset. + * @KBASE_ATOM_EXIT_PROTECTED_RESET: Issue the reset to trigger exit from + * protected mode + * @KBASE_ATOM_EXIT_PROTECTED_RESET_WAIT: End state, Wait for the reset to + * complete + */ +enum kbase_atom_exit_protected_state { + /* + * NOTE: The integer value of this must match + * KBASE_ATOM_ENTER_PROTECTED_CHECK. + */ + KBASE_ATOM_EXIT_PROTECTED_CHECK = 0, + KBASE_ATOM_EXIT_PROTECTED_IDLE_L2, + KBASE_ATOM_EXIT_PROTECTED_RESET, + KBASE_ATOM_EXIT_PROTECTED_RESET_WAIT, +}; + +/** + * struct kbase_ext_res - Contains the info for external resources referred + * by an atom, which have been mapped on GPU side. + * @gpu_address: Start address of the memory region allocated for + * the resource from GPU virtual address space. + * @alloc: pointer to physical pages tracking object, set on + * mapping the external resource on GPU side. + */ +struct kbase_ext_res { + u64 gpu_address; + struct kbase_mem_phy_alloc *alloc; +}; + +/** + * struct kbase_jd_atom - object representing the atom, containing the complete + * state and attributes of an atom. + * @work: work item for the bottom half processing of the atom, + * by JD or JS, after it got executed on GPU or the + * input fence got signaled + * @start_timestamp: time at which the atom was submitted to the GPU, by + * updating the JS_HEAD_NEXTn register. + * @udata: copy of the user data sent for the atom in + * base_jd_submit. + * @kctx: Pointer to the base context with which the atom is + * associated. + * @dep_head: Array of 2 list heads, pointing to the two list of + * atoms + * which are blocked due to dependency on this atom. + * @dep_item: Array of 2 list heads, used to store the atom in the + * list of other atoms depending on the same dependee + * atom. + * @dep: Array containing the dependency info for the 2 atoms + * on which the atom depends upon. + * @jd_item: List head used during job dispatch job_done + * processing - as dependencies may not be entirely + * resolved at this point, + * we need to use a separate list head. + * @in_jd_list: flag set to true if atom's @jd_item is currently on + * a list, prevents atom being processed twice. + * @jit_ids: Zero-terminated array of IDs of just-in-time memory + * allocations written to by the atom. When the atom + * completes, the value stored at the + * &struct_base_jit_alloc_info.heap_info_gpu_addr of + * each allocation is read in order to enforce an + * overall physical memory usage limit. + * @nr_extres: number of external resources referenced by the atom. + * @extres: pointer to the location containing info about + * @nr_extres external resources referenced by the atom. + * @device_nr: indicates the coregroup with which the atom is + * associated, when + * BASE_JD_REQ_SPECIFIC_COHERENT_GROUP specified. + * @jc: GPU address of the job-chain. + * @softjob_data: Copy of data read from the user space buffer that @jc + * points to. + * @fence: Stores either an input or output sync fence, + * depending on soft-job type + * @sync_waiter: Pointer to the sync fence waiter structure passed to + * the callback function on signaling of the input + * fence. + * @dma_fence: object containing pointers to both input & output + * fences and other related members used for explicit + * sync through soft jobs and for the implicit + * synchronization required on access to external + * resources. + * @event_code: Event code for the job chain represented by the atom, + * both HW and low-level SW events are represented by + * event codes. + * @core_req: bitmask of BASE_JD_REQ_* flags specifying either + * Hw or Sw requirements for the job chain represented + * by the atom. + * @ticks: Number of scheduling ticks for which atom has been + * running on the GPU. + * @sched_priority: Priority of the atom for Job scheduling, as per the + * KBASE_JS_ATOM_SCHED_PRIO_*. + * @completed: Wait queue to wait upon for the completion of atom. + * @status: Indicates at high level at what stage the atom is in, + * as per KBASE_JD_ATOM_STATE_*, that whether it is not + * in use or its queued in JD or given to JS or + * submitted to Hw or it completed the execution on Hw. + * @work_id: used for GPU tracepoints, its a snapshot of the + * 'work_id' counter in kbase_jd_context which is + * incremented on every call to base_jd_submit. + * @slot_nr: Job slot chosen for the atom. + * @atom_flags: bitmask of KBASE_KATOM_FLAG* flags capturing the + * excat low level state of the atom. + * @gpu_rb_state: bitmnask of KBASE_ATOM_GPU_RB_* flags, precisely + * tracking atom's state after it has entered + * Job scheduler on becoming runnable. Atom + * could be blocked due to cross slot dependency + * or waiting for the shader cores to become available + * or waiting for protected mode transitions to + * complete. + * @need_cache_flush_cores_retained: flag indicating that manual flush of GPU + * cache is needed for the atom and the shader cores + * used for atom have been kept on. + * @blocked: flag indicating that atom's resubmission to GPU is + * blocked till the work item is scheduled to return the + * atom to JS. + * @pre_dep: Pointer to atom that this atom has same-slot + * dependency on + * @post_dep: Pointer to atom that has same-slot dependency on + * this atom + * @x_pre_dep: Pointer to atom that this atom has cross-slot + * dependency on + * @x_post_dep: Pointer to atom that has cross-slot dependency on + * this atom + * @flush_id: The GPU's flush count recorded at the time of + * submission, + * used for the cache flush optimization + * @fault_event: Info for dumping the debug data on Job fault. + * @queue: List head used for 4 different purposes : + * Adds atom to the list of dma-buf fence waiting atoms. + * Adds atom to the list of atoms blocked due to cross + * slot dependency. + * Adds atom to the list of softjob atoms for which JIT + * allocation has been deferred + * Adds atom to the list of softjob atoms waiting for + * the signaling of fence. + * @jit_node: Used to keep track of all JIT free/alloc jobs in + * submission order + * @jit_blocked: Flag indicating that JIT allocation requested through + * softjob atom will be reattempted after the impending + * free of other active JIT allocations. + * @will_fail_event_code: If non-zero, this indicates that the atom will fail + * with the set event_code when the atom is processed. + * Used for special handling of atoms, which have a data + * dependency on the failed atoms. + * @protected_state: State of the atom, as per + * KBASE_ATOM_(ENTER|EXIT)_PROTECTED_*, + * when transitioning into or out of protected mode. + * Atom will be either entering or exiting the + * protected mode. + * @runnable_tree_node: The node added to context's job slot specific rb tree + * when the atom becomes runnable. + * @age: Age of atom relative to other atoms in the context, + * is snapshot of the age_count counter in kbase + * context. + */ +struct kbase_jd_atom { + struct work_struct work; + ktime_t start_timestamp; + + struct base_jd_udata udata; + struct kbase_context *kctx; + + struct list_head dep_head[2]; + struct list_head dep_item[2]; + const struct kbase_jd_atom_dependency dep[2]; + struct list_head jd_item; + bool in_jd_list; + +#if MALI_JIT_PRESSURE_LIMIT + u8 jit_ids[2]; +#endif /* MALI_JIT_PRESSURE_LIMIT */ + + u16 nr_extres; + struct kbase_ext_res *extres; + + u32 device_nr; + u64 jc; + void *softjob_data; +#if defined(CONFIG_SYNC) + struct sync_fence *fence; + struct sync_fence_waiter sync_waiter; +#endif /* CONFIG_SYNC */ +#if defined(CONFIG_MALI_BIFROST_DMA_FENCE) || defined(CONFIG_SYNC_FILE) + struct { + /* Use the functions/API defined in mali_kbase_fence.h to + * when working with this sub struct + */ +#if defined(CONFIG_SYNC_FILE) + /* Input fence */ +#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE) + struct fence *fence_in; +#else + struct dma_fence *fence_in; +#endif +#endif + /* This points to the dma-buf output fence for this atom. If + * this is NULL then there is no fence for this atom and the + * following fields related to dma_fence may have invalid data. + * + * The context and seqno fields contain the details for this + * fence. + * + * This fence is signaled when the katom is completed, + * regardless of the event_code of the katom (signal also on + * failure). + */ +#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE) + struct fence *fence; +#else + struct dma_fence *fence; +#endif + /* The dma-buf fence context number for this atom. A unique + * context number is allocated to each katom in the context on + * context creation. + */ + unsigned int context; + /* The dma-buf fence sequence number for this atom. This is + * increased every time this katom uses dma-buf fence. + */ + atomic_t seqno; + /* This contains a list of all callbacks set up to wait on + * other fences. This atom must be held back from JS until all + * these callbacks have been called and dep_count have reached + * 0. The initial value of dep_count must be equal to the + * number of callbacks on this list. + * + * This list is protected by jctx.lock. Callbacks are added to + * this list when the atom is built and the wait are set up. + * All the callbacks then stay on the list until all callbacks + * have been called and the atom is queued, or cancelled, and + * then all callbacks are taken off the list and freed. + */ + struct list_head callbacks; + /* Atomic counter of number of outstandind dma-buf fence + * dependencies for this atom. When dep_count reaches 0 the + * atom may be queued. + * + * The special value "-1" may only be set after the count + * reaches 0, while holding jctx.lock. This indicates that the + * atom has been handled, either queued in JS or cancelled. + * + * If anyone but the dma-fence worker sets this to -1 they must + * ensure that any potentially queued worker must have + * completed before allowing the atom to be marked as unused. + * This can be done by flushing the fence work queue: + * kctx->dma_fence.wq. + */ + atomic_t dep_count; + } dma_fence; +#endif /* CONFIG_MALI_BIFROST_DMA_FENCE || CONFIG_SYNC_FILE */ + + /* Note: refer to kbasep_js_atom_retained_state, which will take a copy + * of some of the following members + */ + enum base_jd_event_code event_code; + base_jd_core_req core_req; + u8 jobslot; + u8 renderpass_id; + struct base_jd_fragment jc_fragment; + + u32 ticks; + int sched_priority; + + wait_queue_head_t completed; + enum kbase_jd_atom_state status; +#ifdef CONFIG_GPU_TRACEPOINTS + int work_id; +#endif + int slot_nr; + + u32 atom_flags; + + int retry_count; + + enum kbase_atom_gpu_rb_state gpu_rb_state; + + bool need_cache_flush_cores_retained; + + atomic_t blocked; + + struct kbase_jd_atom *pre_dep; + struct kbase_jd_atom *post_dep; + + struct kbase_jd_atom *x_pre_dep; + struct kbase_jd_atom *x_post_dep; + + u32 flush_id; + +#ifdef CONFIG_DEBUG_FS + struct base_job_fault_event fault_event; +#endif + struct list_head queue; + + struct list_head jit_node; + bool jit_blocked; + + enum base_jd_event_code will_fail_event_code; + + union { + enum kbase_atom_enter_protected_state enter; + enum kbase_atom_exit_protected_state exit; + } protected_state; + + struct rb_node runnable_tree_node; + + u32 age; +}; + +static inline bool kbase_jd_katom_is_protected( + const struct kbase_jd_atom *katom) +{ + return (bool)(katom->atom_flags & KBASE_KATOM_FLAG_PROTECTED); +} + +/* + * Theory of operations: + * + * Atom objects are statically allocated within the context structure. + * + * Each atom is the head of two lists, one for the "left" set of dependencies, + * one for the "right" set. + */ + +#define KBASE_JD_DEP_QUEUE_SIZE 256 + +/** + * enum kbase_jd_renderpass_state - State of a renderpass + * @KBASE_JD_RP_COMPLETE: Unused or completed renderpass. Can only transition to + * START. + * @KBASE_JD_RP_START: Renderpass making a first attempt at tiling. + * Can transition to PEND_OOM or COMPLETE. + * @KBASE_JD_RP_PEND_OOM: Renderpass whose first attempt at tiling used too much + * memory and has a soft-stop pending. Can transition to + * OOM or COMPLETE. + * @KBASE_JD_RP_OOM: Renderpass whose first attempt at tiling used too much + * memory and therefore switched to incremental + * rendering. The fragment job chain is forced to run. + * Can only transition to RETRY. + * @KBASE_JD_RP_RETRY: Renderpass making a second or subsequent attempt at + * tiling. Can transition to RETRY_PEND_OOM or COMPLETE. + * @KBASE_JD_RP_RETRY_PEND_OOM: Renderpass whose second or subsequent attempt at + * tiling used too much memory again and has a + * soft-stop pending. Can transition to RETRY_OOM + * or COMPLETE. + * @KBASE_JD_RP_RETRY_OOM: Renderpass whose second or subsequent attempt at + * tiling used too much memory again. The fragment job + * chain is forced to run. Can only transition to RETRY. + * + * A state machine is used to control incremental rendering. + */ +enum kbase_jd_renderpass_state { + KBASE_JD_RP_COMPLETE, /* COMPLETE => START */ + KBASE_JD_RP_START, /* START => PEND_OOM or COMPLETE */ + KBASE_JD_RP_PEND_OOM, /* PEND_OOM => OOM or COMPLETE */ + KBASE_JD_RP_OOM, /* OOM => RETRY */ + KBASE_JD_RP_RETRY, /* RETRY => RETRY_PEND_OOM or + * COMPLETE + */ + KBASE_JD_RP_RETRY_PEND_OOM, /* RETRY_PEND_OOM => RETRY_OOM or + * COMPLETE + */ + KBASE_JD_RP_RETRY_OOM, /* RETRY_OOM => RETRY */ +}; + +/** + * struct kbase_jd_renderpass - Data for a renderpass + * @state: Current state of the renderpass. If KBASE_JD_RP_COMPLETE then + * all other members are invalid. + * Both the job dispatcher context and hwaccess_lock must be + * locked to modify this so that it can be read with either + * (or both) locked. + * @start_katom: Address of the atom that is the start of a renderpass. + * Both the job dispatcher context and hwaccess_lock must be + * locked to modify this so that it can be read with either + * (or both) locked. + * @end_katom: Address of the atom that is the end of a renderpass, or NULL + * if that atom hasn't been added to the job scheduler yet. + * The job dispatcher context and hwaccess_lock must be + * locked to modify this so that it can be read with either + * (or both) locked. + * @oom_reg_list: A list of region structures which triggered out-of-memory. + * The hwaccess_lock must be locked to access this. + * + * Atoms tagged with BASE_JD_REQ_START_RENDERPASS or BASE_JD_REQ_END_RENDERPASS + * are associated with an object of this type, which is created and maintained + * by kbase to keep track of each renderpass. + */ +struct kbase_jd_renderpass { + enum kbase_jd_renderpass_state state; + struct kbase_jd_atom *start_katom; + struct kbase_jd_atom *end_katom; + struct list_head oom_reg_list; +}; + +/** + * struct kbase_jd_context - per context object encapsulating all the + * Job dispatcher related state. + * @lock: lock to serialize the updates made to the + * Job dispatcher state and kbase_jd_atom objects. + * @sched_info: Structure encapsulating all the Job scheduling + * info. + * @atoms: Array of the objects representing atoms, + * containing the complete state and attributes + * of an atom. + * @renderpasses: Array of renderpass state for incremental + * rendering, indexed by user-specified renderpass + * ID. + * @job_nr: Tracks the number of atoms being processed by the + * kbase. This includes atoms that are not tracked by + * scheduler: 'not ready to run' & 'dependency-only' + * jobs. + * @zero_jobs_wait: Waitq that reflects whether there are no jobs + * (including SW-only dependency jobs). This is set + * when no jobs are present on the ctx, and clear + * when there are jobs. + * This must be updated atomically with @job_nr. + * note: Job Dispatcher knows about more jobs than + * the Job Scheduler as it is unaware of jobs that + * are blocked on dependencies and SW-only dependency + * jobs. This waitq can be waited upon to find out + * when the context jobs are all done/cancelled + * (including those that might've been blocked + * on dependencies) - and so, whether it can be + * terminated. However, it should only be terminated + * once it is not present in the run-pool. + * Since the waitq is only set under @lock, + * the waiter should also briefly obtain and drop + * @lock to guarantee that the setter has completed + * its work on the kbase_context + * @job_done_wq: Workqueue to which the per atom work item is + * queued for bottom half processing when the + * atom completes + * execution on GPU or the input fence get signaled. + * @tb_lock: Lock to serialize the write access made to @tb to + * to store the register access trace messages. + * @tb: Pointer to the Userspace accessible buffer storing + * the trace messages for register read/write + * accesses made by the Kbase. The buffer is filled + * in circular fashion. + * @tb_wrap_offset: Offset to the end location in the trace buffer, + * the write pointer is moved to the beginning on + * reaching this offset. + * @work_id: atomic variable used for GPU tracepoints, + * incremented on every call to base_jd_submit. + * @jit_atoms_head: A list of the just-in-time memory soft-jobs, both + * allocate & free, in submission order, protected + * by kbase_jd_context.lock. + * @jit_pending_alloc: A list of just-in-time memory allocation + * soft-jobs which will be reattempted after the + * impending free of other active allocations. + */ +struct kbase_jd_context { + struct mutex lock; + struct kbasep_js_kctx_info sched_info; + struct kbase_jd_atom atoms[BASE_JD_ATOM_COUNT]; + struct kbase_jd_renderpass renderpasses[BASE_JD_RP_COUNT]; + struct workqueue_struct *job_done_wq; + + wait_queue_head_t zero_jobs_wait; + spinlock_t tb_lock; + u32 *tb; + u32 job_nr; + size_t tb_wrap_offset; + +#ifdef CONFIG_GPU_TRACEPOINTS + atomic_t work_id; +#endif + + struct list_head jit_atoms_head; + struct list_head jit_pending_alloc; +}; + +/** + * struct jsctx_queue - JS context atom queue + * @runnable_tree: Root of RB-tree containing currently runnable atoms on this + * job slot. + * @x_dep_head: Head item of the linked list of atoms blocked on cross-slot + * dependencies. Atoms on this list will be moved to the + * runnable_tree when the blocking atom completes. + * + * hwaccess_lock must be held when accessing this structure. + */ +struct jsctx_queue { + struct rb_root runnable_tree; + struct list_head x_dep_head; +}; + +#endif /* _KBASE_JM_DEFS_H_ */ diff --git a/drivers/gpu/arm/bifrost/jm/mali_kbase_jm_ioctl.h b/drivers/gpu/arm/bifrost/jm/mali_kbase_jm_ioctl.h new file mode 100644 index 000000000000..408e98e33519 --- /dev/null +++ b/drivers/gpu/arm/bifrost/jm/mali_kbase_jm_ioctl.h @@ -0,0 +1,136 @@ +/* + * + * (C) COPYRIGHT 2020 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +#ifndef _KBASE_JM_IOCTL_H_ +#define _KBASE_JM_IOCTL_H_ + +#include +#include + +/* + * 11.1: + * - Add BASE_MEM_TILER_ALIGN_TOP under base_mem_alloc_flags + * 11.2: + * - KBASE_MEM_QUERY_FLAGS can return KBASE_REG_PF_GROW and KBASE_REG_PROTECTED, + * which some user-side clients prior to 11.2 might fault if they received + * them + * 11.3: + * - New ioctls KBASE_IOCTL_STICKY_RESOURCE_MAP and + * KBASE_IOCTL_STICKY_RESOURCE_UNMAP + * 11.4: + * - New ioctl KBASE_IOCTL_MEM_FIND_GPU_START_AND_OFFSET + * 11.5: + * - New ioctl: KBASE_IOCTL_MEM_JIT_INIT (old ioctl renamed to _OLD) + * 11.6: + * - Added flags field to base_jit_alloc_info structure, which can be used to + * specify pseudo chunked tiler alignment for JIT allocations. + * 11.7: + * - Removed UMP support + * 11.8: + * - Added BASE_MEM_UNCACHED_GPU under base_mem_alloc_flags + * 11.9: + * - Added BASE_MEM_PERMANENT_KERNEL_MAPPING and BASE_MEM_FLAGS_KERNEL_ONLY + * under base_mem_alloc_flags + * 11.10: + * - Enabled the use of nr_extres field of base_jd_atom_v2 structure for + * JIT_ALLOC and JIT_FREE type softjobs to enable multiple JIT allocations + * with one softjob. + * 11.11: + * - Added BASE_MEM_GPU_VA_SAME_4GB_PAGE under base_mem_alloc_flags + * 11.12: + * - Removed ioctl: KBASE_IOCTL_GET_PROFILING_CONTROLS + * 11.13: + * - New ioctl: KBASE_IOCTL_MEM_EXEC_INIT + * 11.14: + * - Add BASE_MEM_GROUP_ID_MASK, base_mem_group_id_get, base_mem_group_id_set + * under base_mem_alloc_flags + * 11.15: + * - Added BASEP_CONTEXT_MMU_GROUP_ID_MASK under base_context_create_flags. + * - Require KBASE_IOCTL_SET_FLAGS before BASE_MEM_MAP_TRACKING_HANDLE can be + * passed to mmap(). + * 11.16: + * - Extended ioctl KBASE_IOCTL_MEM_SYNC to accept imported dma-buf. + * - Modified (backwards compatible) ioctl KBASE_IOCTL_MEM_IMPORT behavior for + * dma-buf. Now, buffers are mapped on GPU when first imported, no longer + * requiring external resource or sticky resource tracking. UNLESS, + * CONFIG_MALI_DMA_BUF_MAP_ON_DEMAND is enabled. + * 11.17: + * - Added BASE_JD_REQ_JOB_SLOT. + * - Reused padding field in base_jd_atom_v2 to pass job slot number. + * - New ioctl: KBASE_IOCTL_GET_CPU_GPU_TIMEINFO + * 11.18: + * - Added BASE_MEM_IMPORT_SYNC_ON_MAP_UNMAP under base_mem_alloc_flags + * 11.19: + * - Extended base_jd_atom_v2 to allow a renderpass ID to be specified. + * 11.20: + * - Added new phys_pages member to kbase_ioctl_mem_jit_init for + * KBASE_IOCTL_MEM_JIT_INIT, previous variants of this renamed to use _10_2 + * (replacing '_OLD') and _11_5 suffixes + * - Replaced compat_core_req (deprecated in 10.3) with jit_id[2] in + * base_jd_atom_v2. It must currently be initialized to zero. + * - Added heap_info_gpu_addr to base_jit_alloc_info, and + * BASE_JIT_ALLOC_HEAP_INFO_IS_SIZE allowable in base_jit_alloc_info's + * flags member. Previous variants of this structure are kept and given _10_2 + * and _11_5 suffixes. + * - The above changes are checked for safe values in usual builds + * 11.21: + * - v2.0 of mali_trace debugfs file, which now versions the file separately + */ +#define BASE_UK_VERSION_MAJOR 11 +#define BASE_UK_VERSION_MINOR 21 + +/** + * struct kbase_ioctl_job_submit - Submit jobs/atoms to the kernel + * + * @addr: Memory address of an array of struct base_jd_atom_v2 + * @nr_atoms: Number of entries in the array + * @stride: sizeof(struct base_jd_atom_v2) + */ +struct kbase_ioctl_job_submit { + __u64 addr; + __u32 nr_atoms; + __u32 stride; +}; + +#define KBASE_IOCTL_JOB_SUBMIT \ + _IOW(KBASE_IOCTL_TYPE, 2, struct kbase_ioctl_job_submit) + +#define KBASE_IOCTL_POST_TERM \ + _IO(KBASE_IOCTL_TYPE, 4) + +/** + * struct kbase_ioctl_soft_event_update - Update the status of a soft-event + * @event: GPU address of the event which has been updated + * @new_status: The new status to set + * @flags: Flags for future expansion + */ +struct kbase_ioctl_soft_event_update { + __u64 event; + __u32 new_status; + __u32 flags; +}; + +#define KBASE_IOCTL_SOFT_EVENT_UPDATE \ + _IOW(KBASE_IOCTL_TYPE, 28, struct kbase_ioctl_soft_event_update) + + +#endif /* _KBASE_JM_IOCTL_H_ */ diff --git a/drivers/gpu/arm/bifrost/jm/mali_kbase_jm_js.h b/drivers/gpu/arm/bifrost/jm/mali_kbase_jm_js.h new file mode 100644 index 000000000000..6c222ceae8ee --- /dev/null +++ b/drivers/gpu/arm/bifrost/jm/mali_kbase_jm_js.h @@ -0,0 +1,892 @@ +/* + * + * (C) COPYRIGHT 2020 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +/* + * Job Scheduler Interface. + * These interfaces are Internal to KBase. + */ + +#ifndef _KBASE_JM_JS_H_ +#define _KBASE_JM_JS_H_ + +#include "mali_kbase_js_ctx_attr.h" + +/** + * kbasep_js_devdata_init - Initialize the Job Scheduler + * + * The struct kbasep_js_device_data sub-structure of kbdev must be zero + * initialized before passing to the kbasep_js_devdata_init() function. This is + * to give efficient error path code. + */ +int kbasep_js_devdata_init(struct kbase_device * const kbdev); + +/** + * kbasep_js_devdata_halt - Halt the Job Scheduler. + * + * It is safe to call this on kbdev even if it the kbasep_js_device_data + * sub-structure was never initialized/failed initialization, to give efficient + * error-path code. + * + * For this to work, the struct kbasep_js_device_data sub-structure of kbdev + * must be zero initialized before passing to the kbasep_js_devdata_init() + * function. This is to give efficient error path code. + * + * It is a programming error to call this whilst there are still kbase_context + * structures registered with this scheduler. + * + */ +void kbasep_js_devdata_halt(struct kbase_device *kbdev); + +/** + * kbasep_js_devdata_term - Terminate the Job Scheduler + * + * It is safe to call this on kbdev even if it the kbasep_js_device_data + * sub-structure was never initialized/failed initialization, to give efficient + * error-path code. + * + * For this to work, the struct kbasep_js_device_data sub-structure of kbdev + * must be zero initialized before passing to the kbasep_js_devdata_init() + * function. This is to give efficient error path code. + * + * It is a programming error to call this whilst there are still kbase_context + * structures registered with this scheduler. + */ +void kbasep_js_devdata_term(struct kbase_device *kbdev); + +/** + * kbasep_js_kctx_init - Initialize the Scheduling Component of a + * struct kbase_context on the Job Scheduler. + * + * This effectively registers a struct kbase_context with a Job Scheduler. + * + * It does not register any jobs owned by the struct kbase_context with + * the scheduler. Those must be separately registered by kbasep_js_add_job(). + * + * The struct kbase_context must be zero initialized before passing to the + * kbase_js_init() function. This is to give efficient error path code. + */ +int kbasep_js_kctx_init(struct kbase_context *const kctx); + +/** + * kbasep_js_kctx_term - Terminate the Scheduling Component of a + * struct kbase_context on the Job Scheduler + * + * This effectively de-registers a struct kbase_context from its Job Scheduler + * + * It is safe to call this on a struct kbase_context that has never had or + * failed initialization of its jctx.sched_info member, to give efficient + * error-path code. + * + * For this to work, the struct kbase_context must be zero intitialized before + * passing to the kbase_js_init() function. + * + * It is a Programming Error to call this whilst there are still jobs + * registered with this context. + */ +void kbasep_js_kctx_term(struct kbase_context *kctx); + +/** + * kbasep_js_add_job - Add a job chain to the Job Scheduler, + * and take necessary actions to + * schedule the context/run the job. + * + * This atomically does the following: + * * Update the numbers of jobs information + * * Add the job to the run pool if necessary (part of init_job) + * + * Once this is done, then an appropriate action is taken: + * * If the ctx is scheduled, it attempts to start the next job (which might be + * this added job) + * * Otherwise, and if this is the first job on the context, it enqueues it on + * the Policy Queue + * + * The Policy's Queue can be updated by this in the following ways: + * * In the above case that this is the first job on the context + * * If the context is high priority and the context is not scheduled, then it + * could cause the Policy to schedule out a low-priority context, allowing + * this context to be scheduled in. + * + * If the context is already scheduled on the RunPool, then adding a job to it + * is guaranteed not to update the Policy Queue. And so, the caller is + * guaranteed to not need to try scheduling a context from the Run Pool - it + * can safely assert that the result is false. + * + * It is a programming error to have more than U32_MAX jobs in flight at a time. + * + * The following locking conditions are made on the caller: + * * it must not hold kbasep_js_kctx_info::ctx::jsctx_mutex. + * * it must not hold hwaccess_lock (as this will be obtained internally) + * * it must not hold kbasep_js_device_data::runpool_mutex (as this will be + * obtained internally) + * * it must not hold kbasep_jd_device_data::queue_mutex (again, it's used + * internally). + * + * Return: true indicates that the Policy Queue was updated, and so the + * caller will need to try scheduling a context onto the Run Pool, + * false indicates that no updates were made to the Policy Queue, + * so no further action is required from the caller. This is always returned + * when the context is currently scheduled. + */ +bool kbasep_js_add_job(struct kbase_context *kctx, struct kbase_jd_atom *atom); + +/** + * kbasep_js_remove_job - Remove a job chain from the Job Scheduler, + * except for its 'retained state'. + * + * Completely removing a job requires several calls: + * * kbasep_js_copy_atom_retained_state(), to capture the 'retained state' of + * the atom + * * kbasep_js_remove_job(), to partially remove the atom from the Job Scheduler + * * kbasep_js_runpool_release_ctx_and_katom_retained_state(), to release the + * remaining state held as part of the job having been run. + * + * In the common case of atoms completing normally, this set of actions is more + * optimal for spinlock purposes than having kbasep_js_remove_job() handle all + * of the actions. + * + * In the case of canceling atoms, it is easier to call + * kbasep_js_remove_cancelled_job(), which handles all the necessary actions. + * + * It is a programming error to call this when: + * * a atom is not a job belonging to kctx. + * * a atom has already been removed from the Job Scheduler. + * * a atom is still in the runpool + * + * Do not use this for removing jobs being killed by kbase_jd_cancel() - use + * kbasep_js_remove_cancelled_job() instead. + * + * The following locking conditions are made on the caller: + * * it must hold kbasep_js_kctx_info::ctx::jsctx_mutex. + * + */ +void kbasep_js_remove_job(struct kbase_device *kbdev, + struct kbase_context *kctx, struct kbase_jd_atom *atom); + +/** + * kbasep_js_remove_cancelled_job - Completely remove a job chain from the + * Job Scheduler, in the case + * where the job chain was cancelled. + * + * This is a variant of kbasep_js_remove_job() that takes care of removing all + * of the retained state too. This is generally useful for cancelled atoms, + * which need not be handled in an optimal way. + * + * It is a programming error to call this when: + * * a atom is not a job belonging to kctx. + * * a atom has already been removed from the Job Scheduler. + * * a atom is still in the runpool: + * * it is not being killed with kbasep_jd_cancel() + * + * The following locking conditions are made on the caller: + * * it must hold kbasep_js_kctx_info::ctx::jsctx_mutex. + * * it must not hold the hwaccess_lock, (as this will be obtained + * internally) + * * it must not hold kbasep_js_device_data::runpool_mutex (as this could be + * obtained internally) + * + * Return: true indicates that ctx attributes have changed and the caller + * should call kbase_js_sched_all() to try to run more jobs and + * false otherwise. + */ +bool kbasep_js_remove_cancelled_job(struct kbase_device *kbdev, + struct kbase_context *kctx, + struct kbase_jd_atom *katom); + +/** + * kbasep_js_runpool_requeue_or_kill_ctx - Handling the requeuing/killing of a + * context that was evicted from the + * policy queue or runpool. + * + * This should be used whenever handing off a context that has been evicted + * from the policy queue or the runpool: + * * If the context is not dying and has jobs, it gets re-added to the policy + * queue + * * Otherwise, it is not added + * + * In addition, if the context is dying the jobs are killed asynchronously. + * + * In all cases, the Power Manager active reference is released + * (kbase_pm_context_idle()) whenever the has_pm_ref parameter is true. + * has_pm_ref must be set to false whenever the context was not previously in + * the runpool and does not hold a Power Manager active refcount. Note that + * contexts in a rollback of kbasep_js_try_schedule_head_ctx() might have an + * active refcount even though they weren't in the runpool. + * + * The following locking conditions are made on the caller: + * * it must hold kbasep_js_kctx_info::ctx::jsctx_mutex. + * * it must not hold kbasep_jd_device_data::queue_mutex (as this will be + * obtained internally) + */ +void kbasep_js_runpool_requeue_or_kill_ctx(struct kbase_device *kbdev, + struct kbase_context *kctx, bool has_pm_ref); + +/** + * kbasep_js_runpool_release_ctx - Release a refcount of a context being busy, + * allowing it to be scheduled out. + * + * When the refcount reaches zero and the context might be scheduled out + * (depending on whether the Scheduling Policy has deemed it so, or if it has + * run out of jobs). + * + * If the context does get scheduled out, then The following actions will be + * taken as part of deschduling a context: + * For the context being descheduled: + * * If the context is in the processing of dying (all the jobs are being + * removed from it), then descheduling also kills off any jobs remaining in the + * context. + * * If the context is not dying, and any jobs remain after descheduling the + * context then it is re-enqueued to the Policy's Queue. + * * Otherwise, the context is still known to the scheduler, but remains absent + * from the Policy Queue until a job is next added to it. + * * In all descheduling cases, the Power Manager active reference (obtained + * during kbasep_js_try_schedule_head_ctx()) is released + * (kbase_pm_context_idle()). + * + * Whilst the context is being descheduled, this also handles actions that + * cause more atoms to be run: + * * Attempt submitting atoms when the Context Attributes on the Runpool have + * changed. This is because the context being scheduled out could mean that + * there are more opportunities to run atoms. + * * Attempt submitting to a slot that was previously blocked due to affinity + * restrictions. This is usually only necessary when releasing a context + * happens as part of completing a previous job, but is harmless nonetheless. + * * Attempt scheduling in a new context (if one is available), and if + * necessary, running a job from that new context. + * + * Unlike retaining a context in the runpool, this function cannot be called + * from IRQ context. + * + * It is a programming error to call this on a kctx that is not currently + * scheduled, or that already has a zero refcount. + * + * The following locking conditions are made on the caller: + * * it must not hold the hwaccess_lock, because it will be used internally. + * * it must not hold kbasep_js_kctx_info::ctx::jsctx_mutex. + * * it must not hold kbasep_js_device_data::runpool_mutex (as this will be + * obtained internally) + * * it must not hold the kbase_device::mmu_hw_mutex (as this will be + * obtained internally) + * * it must not hold kbasep_jd_device_data::queue_mutex (as this will be + * obtained internally) + * + */ +void kbasep_js_runpool_release_ctx(struct kbase_device *kbdev, + struct kbase_context *kctx); + +/** + * kbasep_js_runpool_release_ctx_and_katom_retained_state - Variant of + * kbasep_js_runpool_release_ctx() that handles additional + * actions from completing an atom. + * + * This is usually called as part of completing an atom and releasing the + * refcount on the context held by the atom. + * + * Therefore, the extra actions carried out are part of handling actions queued + * on a completed atom, namely: + * * Releasing the atom's context attributes + * * Retrying the submission on a particular slot, because we couldn't submit + * on that slot from an IRQ handler. + * + * The locking conditions of this function are the same as those for + * kbasep_js_runpool_release_ctx() + */ +void kbasep_js_runpool_release_ctx_and_katom_retained_state( + struct kbase_device *kbdev, + struct kbase_context *kctx, + struct kbasep_js_atom_retained_state *katom_retained_state); + +/** + * kbasep_js_runpool_release_ctx_nolock - Variant of + * kbase_js_runpool_release_ctx() that assumes that + * kbasep_js_device_data::runpool_mutex and + * kbasep_js_kctx_info::ctx::jsctx_mutex are held by the caller, and does not + * attempt to schedule new contexts. + */ +void kbasep_js_runpool_release_ctx_nolock(struct kbase_device *kbdev, + struct kbase_context *kctx); + +/** + * kbasep_js_schedule_privileged_ctx - Schedule in a privileged context + * + * This schedules a context in regardless of the context priority. + * If the runpool is full, a context will be forced out of the runpool and the + * function will wait for the new context to be scheduled in. + * The context will be kept scheduled in (and the corresponding address space + * reserved) until kbasep_js_release_privileged_ctx is called). + * + * The following locking conditions are made on the caller: + * * it must not hold the hwaccess_lock, because it will be used internally. + * * it must not hold kbasep_js_device_data::runpool_mutex (as this will be + * obtained internally) + * * it must not hold the kbase_device::mmu_hw_mutex (as this will be + * obtained internally) + * * it must not hold kbasep_jd_device_data::queue_mutex (again, it's used + * internally). + * * it must not hold kbasep_js_kctx_info::ctx::jsctx_mutex, because it will + * be used internally. + * + */ +void kbasep_js_schedule_privileged_ctx(struct kbase_device *kbdev, + struct kbase_context *kctx); + +/** + * kbasep_js_release_privileged_ctx - Release a privileged context, + * allowing it to be scheduled out. + * + * See kbasep_js_runpool_release_ctx for potential side effects. + * + * The following locking conditions are made on the caller: + * * it must not hold the hwaccess_lock, because it will be used internally. + * * it must not hold kbasep_js_kctx_info::ctx::jsctx_mutex. + * * it must not hold kbasep_js_device_data::runpool_mutex (as this will be + * obtained internally) + * * it must not hold the kbase_device::mmu_hw_mutex (as this will be + * obtained internally) + * + */ +void kbasep_js_release_privileged_ctx(struct kbase_device *kbdev, + struct kbase_context *kctx); + +/** + * kbase_js_try_run_jobs - Try to submit the next job on each slot + * + * The following locks may be used: + * * kbasep_js_device_data::runpool_mutex + * * hwaccess_lock + */ +void kbase_js_try_run_jobs(struct kbase_device *kbdev); + +/** + * kbasep_js_suspend - Suspend the job scheduler during a Power Management + * Suspend event. + * + * Causes all contexts to be removed from the runpool, and prevents any + * contexts from (re)entering the runpool. + * + * This does not handle suspending the one privileged context: the caller must + * instead do this by by suspending the GPU HW Counter Instrumentation. + * + * This will eventually cause all Power Management active references held by + * contexts on the runpool to be released, without running any more atoms. + * + * The caller must then wait for all Power Management active refcount to become + * zero before completing the suspend. + * + * The emptying mechanism may take some time to complete, since it can wait for + * jobs to complete naturally instead of forcing them to end quickly. However, + * this is bounded by the Job Scheduler's Job Timeouts. Hence, this + * function is guaranteed to complete in a finite time. + */ +void kbasep_js_suspend(struct kbase_device *kbdev); + +/** + * kbasep_js_resume - Resume the Job Scheduler after a Power Management + * Resume event. + * + * This restores the actions from kbasep_js_suspend(): + * * Schedules contexts back into the runpool + * * Resumes running atoms on the GPU + */ +void kbasep_js_resume(struct kbase_device *kbdev); + +/** + * kbase_js_dep_resolved_submit - Submit an atom to the job scheduler. + * + * @kctx: Context pointer + * @atom: Pointer to the atom to submit + * + * The atom is enqueued on the context's ringbuffer. The caller must have + * ensured that all dependencies can be represented in the ringbuffer. + * + * Caller must hold jctx->lock + * + * Return: true if the context requires to be enqueued, otherwise false. + */ +bool kbase_js_dep_resolved_submit(struct kbase_context *kctx, + struct kbase_jd_atom *katom); + +/** + * jsctx_ll_flush_to_rb() - Pushes atoms from the linked list to ringbuffer. + * @kctx: Context Pointer + * @prio: Priority (specifies the queue together with js). + * @js: Job slot (specifies the queue together with prio). + * + * Pushes all possible atoms from the linked list to the ringbuffer. + * Number of atoms are limited to free space in the ringbuffer and + * number of available atoms in the linked list. + * + */ +void jsctx_ll_flush_to_rb(struct kbase_context *kctx, int prio, int js); + +/** + * kbase_js_pull - Pull an atom from a context in the job scheduler for + * execution. + * + * @kctx: Context to pull from + * @js: Job slot to pull from + * + * The atom will not be removed from the ringbuffer at this stage. + * + * The HW access lock must be held when calling this function. + * + * Return: a pointer to an atom, or NULL if there are no atoms for this + * slot that can be currently run. + */ +struct kbase_jd_atom *kbase_js_pull(struct kbase_context *kctx, int js); + +/** + * kbase_js_unpull - Return an atom to the job scheduler ringbuffer. + * + * @kctx: Context pointer + * @atom: Pointer to the atom to unpull + * + * An atom is 'unpulled' if execution is stopped but intended to be returned to + * later. The most common reason for this is that the atom has been + * soft-stopped. Another reason is if an end-of-renderpass atom completed + * but will need to be run again as part of the same renderpass. + * + * Note that if multiple atoms are to be 'unpulled', they must be returned in + * the reverse order to which they were originally pulled. It is a programming + * error to return atoms in any other order. + * + * The HW access lock must be held when calling this function. + * + */ +void kbase_js_unpull(struct kbase_context *kctx, struct kbase_jd_atom *katom); + +/** + * kbase_js_complete_atom_wq - Complete an atom from jd_done_worker(), + * removing it from the job + * scheduler ringbuffer. + * @kctx: Context pointer + * @katom: Pointer to the atom to complete + * + * If the atom failed then all dependee atoms marked for failure propagation + * will also fail. + * + * Return: true if the context is now idle (no jobs pulled) false otherwise. + */ +bool kbase_js_complete_atom_wq(struct kbase_context *kctx, + struct kbase_jd_atom *katom); + +/** + * kbase_js_complete_atom - Complete an atom. + * + * @katom: Pointer to the atom to complete + * @end_timestamp: The time that the atom completed (may be NULL) + * + * Most of the work required to complete an atom will be performed by + * jd_done_worker(). + * + * The HW access lock must be held when calling this function. + * + * Return: a atom that has now been unblocked and can now be run, or NULL + * if none + */ +struct kbase_jd_atom *kbase_js_complete_atom(struct kbase_jd_atom *katom, + ktime_t *end_timestamp); + +/** + * kbase_js_atom_blocked_on_x_dep - Decide whether to ignore a cross-slot + * dependency + * @katom: Pointer to an atom in the slot ringbuffer + * + * A cross-slot dependency is ignored if necessary to unblock incremental + * rendering. If the atom at the start of a renderpass used too much memory + * and was soft-stopped then the atom at the end of a renderpass is submitted + * to hardware regardless of its dependency on the start-of-renderpass atom. + * This can happen multiple times for the same pair of atoms. + * + * Return: true to block the atom or false to allow it to be submitted to + * hardware. + */ +bool kbase_js_atom_blocked_on_x_dep(struct kbase_jd_atom *katom); + +/** + * kbase_js_sched - Submit atoms from all available contexts. + * + * @kbdev: Device pointer + * @js_mask: Mask of job slots to submit to + * + * This will attempt to submit as many jobs as possible to the provided job + * slots. It will exit when either all job slots are full, or all contexts have + * been used. + * + */ +void kbase_js_sched(struct kbase_device *kbdev, int js_mask); + +/** + * kbase_jd_zap_context - Attempt to deschedule a context that is being + * destroyed + * @kctx: Context pointer + * + * This will attempt to remove a context from any internal job scheduler queues + * and perform any other actions to ensure a context will not be submitted + * from. + * + * If the context is currently scheduled, then the caller must wait for all + * pending jobs to complete before taking any further action. + */ +void kbase_js_zap_context(struct kbase_context *kctx); + +/** + * kbase_js_is_atom_valid - Validate an atom + * + * @kbdev: Device pointer + * @katom: Atom to validate + * + * This will determine whether the atom can be scheduled onto the GPU. Atoms + * with invalid combinations of core requirements will be rejected. + * + * Return: true if atom is valid false otherwise. + */ +bool kbase_js_is_atom_valid(struct kbase_device *kbdev, + struct kbase_jd_atom *katom); + +/** + * kbase_js_set_timeouts - update all JS timeouts with user specified data + * + * @kbdev: Device pointer + * + * Timeouts are specified through the 'js_timeouts' sysfs file. If a timeout is + * set to a positive number then that becomes the new value used, if a timeout + * is negative then the default is set. + */ +void kbase_js_set_timeouts(struct kbase_device *kbdev); + +/** + * kbase_js_set_ctx_priority - set the context priority + * + * @kctx: Context pointer + * @new_priority: New priority value for the Context + * + * The context priority is set to a new value and it is moved to the + * pullable/unpullable list as per the new priority. + */ +void kbase_js_set_ctx_priority(struct kbase_context *kctx, int new_priority); + + +/** + * kbase_js_update_ctx_priority - update the context priority + * + * @kctx: Context pointer + * + * The context priority gets updated as per the priority of atoms currently in + * use for that context, but only if system priority mode for context scheduling + * is being used. + */ +void kbase_js_update_ctx_priority(struct kbase_context *kctx); + +/* + * Helpers follow + */ + +/** + * kbasep_js_is_submit_allowed - Check that a context is allowed to submit + * jobs on this policy + * + * The purpose of this abstraction is to hide the underlying data size, + * and wrap up the long repeated line of code. + * + * As with any bool, never test the return value with true. + * + * The caller must hold hwaccess_lock. + */ +static inline bool kbasep_js_is_submit_allowed( + struct kbasep_js_device_data *js_devdata, + struct kbase_context *kctx) +{ + u16 test_bit; + bool is_allowed; + + /* Ensure context really is scheduled in */ + KBASE_DEBUG_ASSERT(kctx->as_nr != KBASEP_AS_NR_INVALID); + KBASE_DEBUG_ASSERT(kbase_ctx_flag(kctx, KCTX_SCHEDULED)); + + test_bit = (u16) (1u << kctx->as_nr); + + is_allowed = (bool) (js_devdata->runpool_irq.submit_allowed & test_bit); + dev_dbg(kctx->kbdev->dev, "JS: submit %s allowed on %p (as=%d)", + is_allowed ? "is" : "isn't", (void *)kctx, kctx->as_nr); + return is_allowed; +} + +/** + * kbasep_js_set_submit_allowed - Allow a context to submit jobs on this policy + * + * The purpose of this abstraction is to hide the underlying data size, + * and wrap up the long repeated line of code. + * + * The caller must hold hwaccess_lock. + */ +static inline void kbasep_js_set_submit_allowed( + struct kbasep_js_device_data *js_devdata, + struct kbase_context *kctx) +{ + u16 set_bit; + + /* Ensure context really is scheduled in */ + KBASE_DEBUG_ASSERT(kctx->as_nr != KBASEP_AS_NR_INVALID); + KBASE_DEBUG_ASSERT(kbase_ctx_flag(kctx, KCTX_SCHEDULED)); + + set_bit = (u16) (1u << kctx->as_nr); + + dev_dbg(kctx->kbdev->dev, "JS: Setting Submit Allowed on %p (as=%d)", + kctx, kctx->as_nr); + + js_devdata->runpool_irq.submit_allowed |= set_bit; +} + +/** + * kbasep_js_clear_submit_allowed - Prevent a context from submitting more + * jobs on this policy + * + * The purpose of this abstraction is to hide the underlying data size, + * and wrap up the long repeated line of code. + * + * The caller must hold hwaccess_lock. + */ +static inline void kbasep_js_clear_submit_allowed( + struct kbasep_js_device_data *js_devdata, + struct kbase_context *kctx) +{ + u16 clear_bit; + u16 clear_mask; + + /* Ensure context really is scheduled in */ + KBASE_DEBUG_ASSERT(kctx->as_nr != KBASEP_AS_NR_INVALID); + KBASE_DEBUG_ASSERT(kbase_ctx_flag(kctx, KCTX_SCHEDULED)); + + clear_bit = (u16) (1u << kctx->as_nr); + clear_mask = ~clear_bit; + + dev_dbg(kctx->kbdev->dev, "JS: Clearing Submit Allowed on %p (as=%d)", + kctx, kctx->as_nr); + + js_devdata->runpool_irq.submit_allowed &= clear_mask; +} + +/** + * Create an initial 'invalid' atom retained state, that requires no + * atom-related work to be done on releasing with + * kbasep_js_runpool_release_ctx_and_katom_retained_state() + */ +static inline void kbasep_js_atom_retained_state_init_invalid( + struct kbasep_js_atom_retained_state *retained_state) +{ + retained_state->event_code = BASE_JD_EVENT_NOT_STARTED; + retained_state->core_req = + KBASEP_JS_ATOM_RETAINED_STATE_CORE_REQ_INVALID; +} + +/** + * Copy atom state that can be made available after jd_done_nolock() is called + * on that atom. + */ +static inline void kbasep_js_atom_retained_state_copy( + struct kbasep_js_atom_retained_state *retained_state, + const struct kbase_jd_atom *katom) +{ + retained_state->event_code = katom->event_code; + retained_state->core_req = katom->core_req; + retained_state->sched_priority = katom->sched_priority; + retained_state->device_nr = katom->device_nr; +} + +/** + * kbasep_js_has_atom_finished - Determine whether an atom has finished + * (given its retained state), + * and so should be given back to + * userspace/removed from the system. + * + * @katom_retained_state: the retained state of the atom to check + * + * Reasons for an atom not finishing include: + * * Being soft-stopped (and so, the atom should be resubmitted sometime later) + * * It is an end of renderpass atom that was run to consume the output of a + * start-of-renderpass atom that was soft-stopped because it used too much + * memory. In this case, it will have to be run again later. + * + * Return: false if the atom has not finished, true otherwise. + */ +static inline bool kbasep_js_has_atom_finished( + const struct kbasep_js_atom_retained_state *katom_retained_state) +{ + return (bool) (katom_retained_state->event_code != + BASE_JD_EVENT_STOPPED && + katom_retained_state->event_code != + BASE_JD_EVENT_REMOVED_FROM_NEXT && + katom_retained_state->event_code != + BASE_JD_EVENT_END_RP_DONE); +} + +/** + * kbasep_js_atom_retained_state_is_valid - Determine whether a struct + * kbasep_js_atom_retained_state + * is valid + * @katom_retained_state the atom's retained state to check + * + * An invalid struct kbasep_js_atom_retained_state is allowed, and indicates + * that the code should just ignore it. + * + * Return: false if the retained state is invalid, true otherwise. + */ +static inline bool kbasep_js_atom_retained_state_is_valid( + const struct kbasep_js_atom_retained_state *katom_retained_state) +{ + return (bool) (katom_retained_state->core_req != + KBASEP_JS_ATOM_RETAINED_STATE_CORE_REQ_INVALID); +} + +/** + * kbase_js_runpool_inc_context_count - Increment number of running contexts. + * + * The following locking conditions are made on the caller: + * * The caller must hold the kbasep_js_kctx_info::ctx::jsctx_mutex. + * * The caller must hold the kbasep_js_device_data::runpool_mutex + */ +static inline void kbase_js_runpool_inc_context_count( + struct kbase_device *kbdev, + struct kbase_context *kctx) +{ + struct kbasep_js_device_data *js_devdata; + struct kbasep_js_kctx_info *js_kctx_info; + + KBASE_DEBUG_ASSERT(kbdev != NULL); + KBASE_DEBUG_ASSERT(kctx != NULL); + + js_devdata = &kbdev->js_data; + js_kctx_info = &kctx->jctx.sched_info; + + lockdep_assert_held(&js_kctx_info->ctx.jsctx_mutex); + lockdep_assert_held(&js_devdata->runpool_mutex); + + /* Track total contexts */ + KBASE_DEBUG_ASSERT(js_devdata->nr_all_contexts_running < S8_MAX); + ++(js_devdata->nr_all_contexts_running); + + if (!kbase_ctx_flag(kctx, KCTX_SUBMIT_DISABLED)) { + /* Track contexts that can submit jobs */ + KBASE_DEBUG_ASSERT(js_devdata->nr_user_contexts_running < + S8_MAX); + ++(js_devdata->nr_user_contexts_running); + } +} + +/** + * kbase_js_runpool_dec_context_count - decrement number of running contexts. + * + * The following locking conditions are made on the caller: + * * The caller must hold the kbasep_js_kctx_info::ctx::jsctx_mutex. + * * The caller must hold the kbasep_js_device_data::runpool_mutex + */ +static inline void kbase_js_runpool_dec_context_count( + struct kbase_device *kbdev, + struct kbase_context *kctx) +{ + struct kbasep_js_device_data *js_devdata; + struct kbasep_js_kctx_info *js_kctx_info; + + KBASE_DEBUG_ASSERT(kbdev != NULL); + KBASE_DEBUG_ASSERT(kctx != NULL); + + js_devdata = &kbdev->js_data; + js_kctx_info = &kctx->jctx.sched_info; + + lockdep_assert_held(&js_kctx_info->ctx.jsctx_mutex); + lockdep_assert_held(&js_devdata->runpool_mutex); + + /* Track total contexts */ + --(js_devdata->nr_all_contexts_running); + KBASE_DEBUG_ASSERT(js_devdata->nr_all_contexts_running >= 0); + + if (!kbase_ctx_flag(kctx, KCTX_SUBMIT_DISABLED)) { + /* Track contexts that can submit jobs */ + --(js_devdata->nr_user_contexts_running); + KBASE_DEBUG_ASSERT(js_devdata->nr_user_contexts_running >= 0); + } +} + +/** + * kbase_js_sched_all - Submit atoms from all available contexts to all + * job slots. + * + * @kbdev: Device pointer + * + * This will attempt to submit as many jobs as possible. It will exit when + * either all job slots are full, or all contexts have been used. + */ +static inline void kbase_js_sched_all(struct kbase_device *kbdev) +{ + kbase_js_sched(kbdev, (1 << kbdev->gpu_props.num_job_slots) - 1); +} + +extern const int +kbasep_js_atom_priority_to_relative[BASE_JD_NR_PRIO_LEVELS]; + +extern const base_jd_prio +kbasep_js_relative_priority_to_atom[KBASE_JS_ATOM_SCHED_PRIO_COUNT]; + +/** + * kbasep_js_atom_prio_to_sched_prio(): - Convert atom priority (base_jd_prio) + * to relative ordering + * @atom_prio: Priority ID to translate. + * + * Atom priority values for @ref base_jd_prio cannot be compared directly to + * find out which are higher or lower. + * + * This function will convert base_jd_prio values for successively lower + * priorities into a monotonically increasing sequence. That is, the lower the + * base_jd_prio priority, the higher the value produced by this function. This + * is in accordance with how the rest of the kernel treats priority. + * + * The mapping is 1:1 and the size of the valid input range is the same as the + * size of the valid output range, i.e. + * KBASE_JS_ATOM_SCHED_PRIO_COUNT == BASE_JD_NR_PRIO_LEVELS + * + * Note This must be kept in sync with BASE_JD_PRIO_<...> definitions + * + * Return: On success: a value in the inclusive range + * 0..KBASE_JS_ATOM_SCHED_PRIO_COUNT-1. On failure: + * KBASE_JS_ATOM_SCHED_PRIO_INVALID + */ +static inline int kbasep_js_atom_prio_to_sched_prio(base_jd_prio atom_prio) +{ + if (atom_prio >= BASE_JD_NR_PRIO_LEVELS) + return KBASE_JS_ATOM_SCHED_PRIO_INVALID; + + return kbasep_js_atom_priority_to_relative[atom_prio]; +} + +static inline base_jd_prio kbasep_js_sched_prio_to_atom_prio(int sched_prio) +{ + unsigned int prio_idx; + + KBASE_DEBUG_ASSERT(sched_prio >= 0 && + sched_prio < KBASE_JS_ATOM_SCHED_PRIO_COUNT); + + prio_idx = (unsigned int)sched_prio; + + return kbasep_js_relative_priority_to_atom[prio_idx]; +} + +#endif /* _KBASE_JM_JS_H_ */ diff --git a/drivers/gpu/arm/bifrost/mali_kbase_js_defs.h b/drivers/gpu/arm/bifrost/jm/mali_kbase_js_defs.h similarity index 97% rename from drivers/gpu/arm/bifrost/mali_kbase_js_defs.h rename to drivers/gpu/arm/bifrost/jm/mali_kbase_js_defs.h index 4b130660043c..900ecd2c1b8d 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_js_defs.h +++ b/drivers/gpu/arm/bifrost/jm/mali_kbase_js_defs.h @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2011-2018 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2011-2018, 2020 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -21,7 +21,6 @@ */ - /** * @file mali_kbase_js.h * Job Scheduler Type Definitions @@ -51,12 +50,6 @@ struct kbase_jd_atom; typedef u32 kbase_context_flags; -struct kbasep_atom_req { - base_jd_core_req core_req; - kbase_context_flags ctx_req; - u32 device_nr; -}; - /** Callback function run on all of a context's jobs registered with the Job * Scheduler */ typedef void (*kbasep_js_ctx_job_cb)(struct kbase_device *kbdev, struct kbase_jd_atom *katom); @@ -245,24 +238,6 @@ struct kbasep_js_device_data { s8 slot_affinity_refcount[BASE_JM_MAX_NR_SLOTS][64]; } runpool_irq; - /** - * Run Pool mutex, for managing contexts within the runpool. - * Unless otherwise specified, you must hold this lock whilst accessing any - * members that follow - * - * In addition, this is used to access: - * - the kbasep_js_kctx_info::runpool substructure - */ - struct mutex runpool_mutex; - - /** - * Queue Lock, used to access the Policy's queue of contexts independently - * of the Run Pool. - * - * Of course, you don't need the Run Pool lock to access this. - */ - struct mutex queue_mutex; - /** * Scheduling semaphore. This must be held when calling * kbase_jm_kick() @@ -299,9 +274,6 @@ struct kbasep_js_device_data { u32 gpu_reset_ticks_dumping; /*< Value for JS_RESET_TICKS_DUMPING */ u32 ctx_timeslice_ns; /**< Value for JS_CTX_TIMESLICE_NS */ - /**< Value for JS_SOFT_JOB_TIMEOUT */ - atomic_t soft_job_timeout_ms; - /** List of suspended soft jobs */ struct list_head suspended_soft_jobs_list; @@ -321,6 +293,27 @@ struct kbasep_js_device_data { /* Number of contexts that can either be pulled from or are currently * running */ atomic_t nr_contexts_runnable; + + /** Value for JS_SOFT_JOB_TIMEOUT */ + atomic_t soft_job_timeout_ms; + + /** + * Queue Lock, used to access the Policy's queue of contexts + * independently of the Run Pool. + * + * Of course, you don't need the Run Pool lock to access this. + */ + struct mutex queue_mutex; + + /** + * Run Pool mutex, for managing contexts within the runpool. + * Unless otherwise specified, you must hold this lock whilst accessing + * any members that follow + * + * In addition, this is used to access: + * * the kbasep_js_kctx_info::runpool substructure + */ + struct mutex runpool_mutex; }; /** diff --git a/drivers/gpu/arm/bifrost/mali_base_hwconfig_features.h b/drivers/gpu/arm/bifrost/mali_base_hwconfig_features.h index c25f870097b7..6885f8d58066 100644 --- a/drivers/gpu/arm/bifrost/mali_base_hwconfig_features.h +++ b/drivers/gpu/arm/bifrost/mali_base_hwconfig_features.h @@ -48,7 +48,6 @@ enum base_hw_feature { BASE_HW_FEATURE_BRNDOUT_KILL, BASE_HW_FEATURE_WARPING, BASE_HW_FEATURE_FLUSH_REDUCTION, - BASE_HW_FEATURE_PROTECTED_MODE, BASE_HW_FEATURE_COHERENCY_REG, BASE_HW_FEATURE_PROTECTED_DEBUG_MODE, BASE_HW_FEATURE_AARCH64_MMU, @@ -85,7 +84,6 @@ static const enum base_hw_feature base_hw_features_tMIx[] = { BASE_HW_FEATURE_TEST4_DATUM_MODE, BASE_HW_FEATURE_THREAD_GROUP_SPLIT, BASE_HW_FEATURE_FLUSH_REDUCTION, - BASE_HW_FEATURE_PROTECTED_MODE, BASE_HW_FEATURE_COHERENCY_REG, BASE_HW_FEATURE_AARCH64_MMU, BASE_HW_FEATURE_END @@ -112,7 +110,6 @@ static const enum base_hw_feature base_hw_features_tHEx[] = { BASE_HW_FEATURE_TEST4_DATUM_MODE, BASE_HW_FEATURE_THREAD_GROUP_SPLIT, BASE_HW_FEATURE_FLUSH_REDUCTION, - BASE_HW_FEATURE_PROTECTED_MODE, BASE_HW_FEATURE_PROTECTED_DEBUG_MODE, BASE_HW_FEATURE_COHERENCY_REG, BASE_HW_FEATURE_AARCH64_MMU, @@ -140,7 +137,6 @@ static const enum base_hw_feature base_hw_features_tSIx[] = { BASE_HW_FEATURE_TEST4_DATUM_MODE, BASE_HW_FEATURE_THREAD_GROUP_SPLIT, BASE_HW_FEATURE_FLUSH_REDUCTION, - BASE_HW_FEATURE_PROTECTED_MODE, BASE_HW_FEATURE_PROTECTED_DEBUG_MODE, BASE_HW_FEATURE_COHERENCY_REG, BASE_HW_FEATURE_AARCH64_MMU, @@ -168,7 +164,6 @@ static const enum base_hw_feature base_hw_features_tDVx[] = { BASE_HW_FEATURE_TEST4_DATUM_MODE, BASE_HW_FEATURE_THREAD_GROUP_SPLIT, BASE_HW_FEATURE_FLUSH_REDUCTION, - BASE_HW_FEATURE_PROTECTED_MODE, BASE_HW_FEATURE_PROTECTED_DEBUG_MODE, BASE_HW_FEATURE_COHERENCY_REG, BASE_HW_FEATURE_AARCH64_MMU, @@ -196,7 +191,6 @@ static const enum base_hw_feature base_hw_features_tNOx[] = { BASE_HW_FEATURE_TEST4_DATUM_MODE, BASE_HW_FEATURE_THREAD_GROUP_SPLIT, BASE_HW_FEATURE_FLUSH_REDUCTION, - BASE_HW_FEATURE_PROTECTED_MODE, BASE_HW_FEATURE_PROTECTED_DEBUG_MODE, BASE_HW_FEATURE_COHERENCY_REG, BASE_HW_FEATURE_AARCH64_MMU, @@ -226,7 +220,6 @@ static const enum base_hw_feature base_hw_features_tGOx[] = { BASE_HW_FEATURE_TEST4_DATUM_MODE, BASE_HW_FEATURE_THREAD_GROUP_SPLIT, BASE_HW_FEATURE_FLUSH_REDUCTION, - BASE_HW_FEATURE_PROTECTED_MODE, BASE_HW_FEATURE_PROTECTED_DEBUG_MODE, BASE_HW_FEATURE_COHERENCY_REG, BASE_HW_FEATURE_AARCH64_MMU, @@ -255,7 +248,6 @@ static const enum base_hw_feature base_hw_features_tTRx[] = { BASE_HW_FEATURE_T7XX_PAIRING_RULES, BASE_HW_FEATURE_TEST4_DATUM_MODE, BASE_HW_FEATURE_FLUSH_REDUCTION, - BASE_HW_FEATURE_PROTECTED_MODE, BASE_HW_FEATURE_PROTECTED_DEBUG_MODE, BASE_HW_FEATURE_COHERENCY_REG, BASE_HW_FEATURE_AARCH64_MMU, @@ -284,7 +276,6 @@ static const enum base_hw_feature base_hw_features_tNAx[] = { BASE_HW_FEATURE_T7XX_PAIRING_RULES, BASE_HW_FEATURE_TEST4_DATUM_MODE, BASE_HW_FEATURE_FLUSH_REDUCTION, - BASE_HW_FEATURE_PROTECTED_MODE, BASE_HW_FEATURE_PROTECTED_DEBUG_MODE, BASE_HW_FEATURE_COHERENCY_REG, BASE_HW_FEATURE_AARCH64_MMU, @@ -313,7 +304,6 @@ static const enum base_hw_feature base_hw_features_tBEx[] = { BASE_HW_FEATURE_T7XX_PAIRING_RULES, BASE_HW_FEATURE_TEST4_DATUM_MODE, BASE_HW_FEATURE_FLUSH_REDUCTION, - BASE_HW_FEATURE_PROTECTED_MODE, BASE_HW_FEATURE_PROTECTED_DEBUG_MODE, BASE_HW_FEATURE_COHERENCY_REG, BASE_HW_FEATURE_AARCH64_MMU, @@ -323,35 +313,6 @@ static const enum base_hw_feature base_hw_features_tBEx[] = { BASE_HW_FEATURE_END }; -static const enum base_hw_feature base_hw_features_tULx[] = { - BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION, - BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS, - BASE_HW_FEATURE_XAFFINITY, - BASE_HW_FEATURE_WARPING, - BASE_HW_FEATURE_INTERPIPE_REG_ALIASING, - BASE_HW_FEATURE_32_BIT_UNIFORM_ADDRESS, - BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL, - BASE_HW_FEATURE_BRNDOUT_CC, - BASE_HW_FEATURE_BRNDOUT_KILL, - BASE_HW_FEATURE_LD_ST_LEA_TEX, - BASE_HW_FEATURE_LD_ST_TILEBUFFER, - BASE_HW_FEATURE_LINEAR_FILTER_FLOAT, - BASE_HW_FEATURE_MRT, - BASE_HW_FEATURE_MSAA_16X, - BASE_HW_FEATURE_NEXT_INSTRUCTION_TYPE, - BASE_HW_FEATURE_OUT_OF_ORDER_EXEC, - BASE_HW_FEATURE_T7XX_PAIRING_RULES, - BASE_HW_FEATURE_TEST4_DATUM_MODE, - BASE_HW_FEATURE_FLUSH_REDUCTION, - BASE_HW_FEATURE_PROTECTED_MODE, - BASE_HW_FEATURE_PROTECTED_DEBUG_MODE, - BASE_HW_FEATURE_COHERENCY_REG, - BASE_HW_FEATURE_AARCH64_MMU, - BASE_HW_FEATURE_L2_CONFIG, - BASE_HW_FEATURE_CLEAN_ONLY_SAFE, - BASE_HW_FEATURE_END -}; - static const enum base_hw_feature base_hw_features_tDUx[] = { BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION, BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS, @@ -372,7 +333,6 @@ static const enum base_hw_feature base_hw_features_tDUx[] = { BASE_HW_FEATURE_T7XX_PAIRING_RULES, BASE_HW_FEATURE_TEST4_DATUM_MODE, BASE_HW_FEATURE_FLUSH_REDUCTION, - BASE_HW_FEATURE_PROTECTED_MODE, BASE_HW_FEATURE_PROTECTED_DEBUG_MODE, BASE_HW_FEATURE_COHERENCY_REG, BASE_HW_FEATURE_AARCH64_MMU, @@ -402,7 +362,6 @@ static const enum base_hw_feature base_hw_features_tODx[] = { BASE_HW_FEATURE_T7XX_PAIRING_RULES, BASE_HW_FEATURE_TEST4_DATUM_MODE, BASE_HW_FEATURE_FLUSH_REDUCTION, - BASE_HW_FEATURE_PROTECTED_MODE, BASE_HW_FEATURE_PROTECTED_DEBUG_MODE, BASE_HW_FEATURE_COHERENCY_REG, BASE_HW_FEATURE_AARCH64_MMU, @@ -411,7 +370,7 @@ static const enum base_hw_feature base_hw_features_tODx[] = { BASE_HW_FEATURE_END }; -static const enum base_hw_feature base_hw_features_tIDx[] = { +static const enum base_hw_feature base_hw_features_tGRx[] = { BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION, BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS, BASE_HW_FEATURE_XAFFINITY, @@ -431,7 +390,6 @@ static const enum base_hw_feature base_hw_features_tIDx[] = { BASE_HW_FEATURE_T7XX_PAIRING_RULES, BASE_HW_FEATURE_TEST4_DATUM_MODE, BASE_HW_FEATURE_FLUSH_REDUCTION, - BASE_HW_FEATURE_PROTECTED_MODE, BASE_HW_FEATURE_PROTECTED_DEBUG_MODE, BASE_HW_FEATURE_COHERENCY_REG, BASE_HW_FEATURE_AARCH64_MMU, @@ -460,7 +418,6 @@ static const enum base_hw_feature base_hw_features_tVAx[] = { BASE_HW_FEATURE_T7XX_PAIRING_RULES, BASE_HW_FEATURE_TEST4_DATUM_MODE, BASE_HW_FEATURE_FLUSH_REDUCTION, - BASE_HW_FEATURE_PROTECTED_MODE, BASE_HW_FEATURE_PROTECTED_DEBUG_MODE, BASE_HW_FEATURE_COHERENCY_REG, BASE_HW_FEATURE_AARCH64_MMU, @@ -469,4 +426,61 @@ static const enum base_hw_feature base_hw_features_tVAx[] = { BASE_HW_FEATURE_END }; +static const enum base_hw_feature base_hw_features_tTUx[] = { + BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION, + BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS, + BASE_HW_FEATURE_XAFFINITY, + BASE_HW_FEATURE_WARPING, + BASE_HW_FEATURE_INTERPIPE_REG_ALIASING, + BASE_HW_FEATURE_32_BIT_UNIFORM_ADDRESS, + BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL, + BASE_HW_FEATURE_BRNDOUT_CC, + BASE_HW_FEATURE_BRNDOUT_KILL, + BASE_HW_FEATURE_LD_ST_LEA_TEX, + BASE_HW_FEATURE_LD_ST_TILEBUFFER, + BASE_HW_FEATURE_LINEAR_FILTER_FLOAT, + BASE_HW_FEATURE_MRT, + BASE_HW_FEATURE_MSAA_16X, + BASE_HW_FEATURE_NEXT_INSTRUCTION_TYPE, + BASE_HW_FEATURE_OUT_OF_ORDER_EXEC, + BASE_HW_FEATURE_T7XX_PAIRING_RULES, + BASE_HW_FEATURE_TEST4_DATUM_MODE, + BASE_HW_FEATURE_FLUSH_REDUCTION, + BASE_HW_FEATURE_PROTECTED_DEBUG_MODE, + BASE_HW_FEATURE_COHERENCY_REG, + BASE_HW_FEATURE_AARCH64_MMU, + BASE_HW_FEATURE_L2_CONFIG, + BASE_HW_FEATURE_CLEAN_ONLY_SAFE, + BASE_HW_FEATURE_END +}; + +static const enum base_hw_feature base_hw_features_tE2x[] = { + BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION, + BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS, + BASE_HW_FEATURE_XAFFINITY, + BASE_HW_FEATURE_WARPING, + BASE_HW_FEATURE_INTERPIPE_REG_ALIASING, + BASE_HW_FEATURE_32_BIT_UNIFORM_ADDRESS, + BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL, + BASE_HW_FEATURE_BRNDOUT_CC, + BASE_HW_FEATURE_BRNDOUT_KILL, + BASE_HW_FEATURE_LD_ST_LEA_TEX, + BASE_HW_FEATURE_LD_ST_TILEBUFFER, + BASE_HW_FEATURE_LINEAR_FILTER_FLOAT, + BASE_HW_FEATURE_MRT, + BASE_HW_FEATURE_MSAA_16X, + BASE_HW_FEATURE_NEXT_INSTRUCTION_TYPE, + BASE_HW_FEATURE_OUT_OF_ORDER_EXEC, + BASE_HW_FEATURE_T7XX_PAIRING_RULES, + BASE_HW_FEATURE_TEST4_DATUM_MODE, + BASE_HW_FEATURE_FLUSH_REDUCTION, + BASE_HW_FEATURE_PROTECTED_DEBUG_MODE, + BASE_HW_FEATURE_COHERENCY_REG, + BASE_HW_FEATURE_AARCH64_MMU, + BASE_HW_FEATURE_IDVS_GROUP_SIZE, + BASE_HW_FEATURE_L2_CONFIG, + BASE_HW_FEATURE_CLEAN_ONLY_SAFE, + BASE_HW_FEATURE_END +}; + #endif /* _BASE_HWCONFIG_FEATURES_H_ */ diff --git a/drivers/gpu/arm/bifrost/mali_base_hwconfig_issues.h b/drivers/gpu/arm/bifrost/mali_base_hwconfig_issues.h index 1a28bb1ea982..3966069178c1 100644 --- a/drivers/gpu/arm/bifrost/mali_base_hwconfig_issues.h +++ b/drivers/gpu/arm/bifrost/mali_base_hwconfig_issues.h @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2014-2019 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2020 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -53,7 +53,12 @@ enum base_hw_issue { BASE_HW_ISSUE_TTRX_2968_TTRX_3162, BASE_HW_ISSUE_TTRX_3076, BASE_HW_ISSUE_TTRX_921, + BASE_HW_ISSUE_TTRX_3414, BASE_HW_ISSUE_GPU2017_1336, + BASE_HW_ISSUE_TTRX_3083, + BASE_HW_ISSUE_TTRX_3470, + BASE_HW_ISSUE_TTRX_3464, + BASE_HW_ISSUE_TTRX_3485, BASE_HW_ISSUE_END }; @@ -206,6 +211,7 @@ static const enum base_hw_issue base_hw_issues_tSIx_r0p0[] = { BASE_HW_ISSUE_TSIX_1792, BASE_HW_ISSUE_TTRX_921, BASE_HW_ISSUE_GPU2017_1336, + BASE_HW_ISSUE_TTRX_3464, BASE_HW_ISSUE_END }; @@ -218,6 +224,7 @@ static const enum base_hw_issue base_hw_issues_tSIx_r0p1[] = { BASE_HW_ISSUE_TSIX_1792, BASE_HW_ISSUE_TTRX_921, BASE_HW_ISSUE_GPU2017_1336, + BASE_HW_ISSUE_TTRX_3464, BASE_HW_ISSUE_END }; @@ -229,6 +236,7 @@ static const enum base_hw_issue base_hw_issues_tSIx_r1p0[] = { BASE_HW_ISSUE_TSIX_2033, BASE_HW_ISSUE_TTRX_921, BASE_HW_ISSUE_GPU2017_1336, + BASE_HW_ISSUE_TTRX_3464, BASE_HW_ISSUE_END }; @@ -239,6 +247,7 @@ static const enum base_hw_issue base_hw_issues_tSIx_r1p1[] = { BASE_HW_ISSUE_TSIX_2033, BASE_HW_ISSUE_TTRX_921, BASE_HW_ISSUE_GPU2017_1336, + BASE_HW_ISSUE_TTRX_3464, BASE_HW_ISSUE_END }; @@ -248,6 +257,7 @@ static const enum base_hw_issue base_hw_issues_model_tSIx[] = { BASE_HW_ISSUE_TMIX_8133, BASE_HW_ISSUE_TSIX_1116, BASE_HW_ISSUE_TSIX_2033, + BASE_HW_ISSUE_TTRX_3464, BASE_HW_ISSUE_END }; @@ -258,6 +268,7 @@ static const enum base_hw_issue base_hw_issues_tDVx_r0p0[] = { BASE_HW_ISSUE_TSIX_2033, BASE_HW_ISSUE_TTRX_921, BASE_HW_ISSUE_GPU2017_1336, + BASE_HW_ISSUE_TTRX_3464, BASE_HW_ISSUE_END }; @@ -267,6 +278,7 @@ static const enum base_hw_issue base_hw_issues_model_tDVx[] = { BASE_HW_ISSUE_TMIX_8133, BASE_HW_ISSUE_TSIX_1116, BASE_HW_ISSUE_TSIX_2033, + BASE_HW_ISSUE_TTRX_3464, BASE_HW_ISSUE_END }; @@ -278,6 +290,7 @@ static const enum base_hw_issue base_hw_issues_tNOx_r0p0[] = { BASE_HW_ISSUE_TNOX_1194, BASE_HW_ISSUE_TTRX_921, BASE_HW_ISSUE_GPU2017_1336, + BASE_HW_ISSUE_TTRX_3464, BASE_HW_ISSUE_END }; @@ -287,6 +300,7 @@ static const enum base_hw_issue base_hw_issues_model_tNOx[] = { BASE_HW_ISSUE_TMIX_8133, BASE_HW_ISSUE_TSIX_1116, BASE_HW_ISSUE_TSIX_2033, + BASE_HW_ISSUE_TTRX_3464, BASE_HW_ISSUE_END }; @@ -298,6 +312,7 @@ static const enum base_hw_issue base_hw_issues_tGOx_r0p0[] = { BASE_HW_ISSUE_TNOX_1194, BASE_HW_ISSUE_TTRX_921, BASE_HW_ISSUE_GPU2017_1336, + BASE_HW_ISSUE_TTRX_3464, BASE_HW_ISSUE_END }; @@ -309,6 +324,7 @@ static const enum base_hw_issue base_hw_issues_tGOx_r1p0[] = { BASE_HW_ISSUE_TGOX_R1_1234, BASE_HW_ISSUE_TTRX_921, BASE_HW_ISSUE_GPU2017_1336, + BASE_HW_ISSUE_TTRX_3464, BASE_HW_ISSUE_END }; @@ -318,6 +334,7 @@ static const enum base_hw_issue base_hw_issues_model_tGOx[] = { BASE_HW_ISSUE_TMIX_8133, BASE_HW_ISSUE_TSIX_1116, BASE_HW_ISSUE_TSIX_2033, + BASE_HW_ISSUE_TTRX_3464, BASE_HW_ISSUE_END }; @@ -328,7 +345,12 @@ static const enum base_hw_issue base_hw_issues_tTRx_r0p0[] = { BASE_HW_ISSUE_TTRX_2968_TTRX_3162, BASE_HW_ISSUE_TTRX_3076, BASE_HW_ISSUE_TTRX_921, + BASE_HW_ISSUE_TTRX_3414, BASE_HW_ISSUE_GPU2017_1336, + BASE_HW_ISSUE_TTRX_3083, + BASE_HW_ISSUE_TTRX_3470, + BASE_HW_ISSUE_TTRX_3464, + BASE_HW_ISSUE_TTRX_3485, BASE_HW_ISSUE_END }; @@ -339,7 +361,27 @@ static const enum base_hw_issue base_hw_issues_tTRx_r0p1[] = { BASE_HW_ISSUE_TTRX_2968_TTRX_3162, BASE_HW_ISSUE_TTRX_3076, BASE_HW_ISSUE_TTRX_921, + BASE_HW_ISSUE_TTRX_3414, BASE_HW_ISSUE_GPU2017_1336, + BASE_HW_ISSUE_TTRX_3083, + BASE_HW_ISSUE_TTRX_3470, + BASE_HW_ISSUE_TTRX_3464, + BASE_HW_ISSUE_TTRX_3485, + BASE_HW_ISSUE_END +}; + +static const enum base_hw_issue base_hw_issues_tTRx_r0p2[] = { + BASE_HW_ISSUE_9435, + BASE_HW_ISSUE_TSIX_2033, + BASE_HW_ISSUE_TTRX_1337, + BASE_HW_ISSUE_TTRX_2968_TTRX_3162, + BASE_HW_ISSUE_TTRX_3076, + BASE_HW_ISSUE_TTRX_921, + BASE_HW_ISSUE_TTRX_3414, + BASE_HW_ISSUE_GPU2017_1336, + BASE_HW_ISSUE_TTRX_3083, + BASE_HW_ISSUE_TTRX_3470, + BASE_HW_ISSUE_TTRX_3464, BASE_HW_ISSUE_END }; @@ -348,6 +390,10 @@ static const enum base_hw_issue base_hw_issues_model_tTRx[] = { BASE_HW_ISSUE_9435, BASE_HW_ISSUE_TSIX_2033, BASE_HW_ISSUE_TTRX_1337, + BASE_HW_ISSUE_TTRX_3414, + BASE_HW_ISSUE_TTRX_3083, + BASE_HW_ISSUE_TTRX_3470, + BASE_HW_ISSUE_TTRX_3464, BASE_HW_ISSUE_END }; @@ -358,7 +404,12 @@ static const enum base_hw_issue base_hw_issues_tNAx_r0p0[] = { BASE_HW_ISSUE_TTRX_2968_TTRX_3162, BASE_HW_ISSUE_TTRX_3076, BASE_HW_ISSUE_TTRX_921, + BASE_HW_ISSUE_TTRX_3414, BASE_HW_ISSUE_GPU2017_1336, + BASE_HW_ISSUE_TTRX_3083, + BASE_HW_ISSUE_TTRX_3470, + BASE_HW_ISSUE_TTRX_3464, + BASE_HW_ISSUE_TTRX_3485, BASE_HW_ISSUE_END }; @@ -369,7 +420,11 @@ static const enum base_hw_issue base_hw_issues_tNAx_r0p1[] = { BASE_HW_ISSUE_TTRX_2968_TTRX_3162, BASE_HW_ISSUE_TTRX_3076, BASE_HW_ISSUE_TTRX_921, + BASE_HW_ISSUE_TTRX_3414, BASE_HW_ISSUE_GPU2017_1336, + BASE_HW_ISSUE_TTRX_3083, + BASE_HW_ISSUE_TTRX_3470, + BASE_HW_ISSUE_TTRX_3464, BASE_HW_ISSUE_END }; @@ -378,6 +433,10 @@ static const enum base_hw_issue base_hw_issues_model_tNAx[] = { BASE_HW_ISSUE_9435, BASE_HW_ISSUE_TSIX_2033, BASE_HW_ISSUE_TTRX_1337, + BASE_HW_ISSUE_TTRX_3414, + BASE_HW_ISSUE_TTRX_3083, + BASE_HW_ISSUE_TTRX_3470, + BASE_HW_ISSUE_TTRX_3464, BASE_HW_ISSUE_END }; @@ -387,6 +446,24 @@ static const enum base_hw_issue base_hw_issues_tBEx_r0p0[] = { BASE_HW_ISSUE_TTRX_1337, BASE_HW_ISSUE_TTRX_2968_TTRX_3162, BASE_HW_ISSUE_TTRX_921, + BASE_HW_ISSUE_TTRX_3414, + BASE_HW_ISSUE_TTRX_3083, + BASE_HW_ISSUE_TTRX_3470, + BASE_HW_ISSUE_TTRX_3464, + BASE_HW_ISSUE_TTRX_3485, + BASE_HW_ISSUE_END +}; + +static const enum base_hw_issue base_hw_issues_tBEx_r0p1[] = { + BASE_HW_ISSUE_9435, + BASE_HW_ISSUE_TSIX_2033, + BASE_HW_ISSUE_TTRX_1337, + BASE_HW_ISSUE_TTRX_2968_TTRX_3162, + BASE_HW_ISSUE_TTRX_921, + BASE_HW_ISSUE_TTRX_3414, + BASE_HW_ISSUE_TTRX_3083, + BASE_HW_ISSUE_TTRX_3470, + BASE_HW_ISSUE_TTRX_3464, BASE_HW_ISSUE_END }; @@ -396,6 +473,23 @@ static const enum base_hw_issue base_hw_issues_tBEx_r1p0[] = { BASE_HW_ISSUE_TTRX_1337, BASE_HW_ISSUE_TTRX_2968_TTRX_3162, BASE_HW_ISSUE_TTRX_921, + BASE_HW_ISSUE_TTRX_3414, + BASE_HW_ISSUE_TTRX_3083, + BASE_HW_ISSUE_TTRX_3470, + BASE_HW_ISSUE_TTRX_3464, + BASE_HW_ISSUE_END +}; + +static const enum base_hw_issue base_hw_issues_tBEx_r1p1[] = { + BASE_HW_ISSUE_9435, + BASE_HW_ISSUE_TSIX_2033, + BASE_HW_ISSUE_TTRX_1337, + BASE_HW_ISSUE_TTRX_2968_TTRX_3162, + BASE_HW_ISSUE_TTRX_921, + BASE_HW_ISSUE_TTRX_3414, + BASE_HW_ISSUE_TTRX_3083, + BASE_HW_ISSUE_TTRX_3470, + BASE_HW_ISSUE_TTRX_3464, BASE_HW_ISSUE_END }; @@ -404,22 +498,37 @@ static const enum base_hw_issue base_hw_issues_model_tBEx[] = { BASE_HW_ISSUE_9435, BASE_HW_ISSUE_TSIX_2033, BASE_HW_ISSUE_TTRX_1337, + BASE_HW_ISSUE_TTRX_3414, + BASE_HW_ISSUE_TTRX_3083, + BASE_HW_ISSUE_TTRX_3470, + BASE_HW_ISSUE_TTRX_3464, BASE_HW_ISSUE_END }; -static const enum base_hw_issue base_hw_issues_tULx_r0p0[] = { +static const enum base_hw_issue base_hw_issues_lBEx_r1p0[] = { BASE_HW_ISSUE_9435, BASE_HW_ISSUE_TSIX_2033, BASE_HW_ISSUE_TTRX_1337, + BASE_HW_ISSUE_TTRX_2968_TTRX_3162, BASE_HW_ISSUE_TTRX_921, + BASE_HW_ISSUE_TTRX_3414, + BASE_HW_ISSUE_TTRX_3083, + BASE_HW_ISSUE_TTRX_3470, + BASE_HW_ISSUE_TTRX_3464, + BASE_HW_ISSUE_TTRX_3485, BASE_HW_ISSUE_END }; -static const enum base_hw_issue base_hw_issues_model_tULx[] = { - BASE_HW_ISSUE_5736, +static const enum base_hw_issue base_hw_issues_lBEx_r1p1[] = { BASE_HW_ISSUE_9435, BASE_HW_ISSUE_TSIX_2033, BASE_HW_ISSUE_TTRX_1337, + BASE_HW_ISSUE_TTRX_2968_TTRX_3162, + BASE_HW_ISSUE_TTRX_921, + BASE_HW_ISSUE_TTRX_3414, + BASE_HW_ISSUE_TTRX_3083, + BASE_HW_ISSUE_TTRX_3470, + BASE_HW_ISSUE_TTRX_3464, BASE_HW_ISSUE_END }; @@ -428,6 +537,8 @@ static const enum base_hw_issue base_hw_issues_tDUx_r0p0[] = { BASE_HW_ISSUE_TSIX_2033, BASE_HW_ISSUE_TTRX_1337, BASE_HW_ISSUE_TTRX_921, + BASE_HW_ISSUE_TTRX_3414, + BASE_HW_ISSUE_TTRX_3083, BASE_HW_ISSUE_END }; @@ -436,6 +547,8 @@ static const enum base_hw_issue base_hw_issues_model_tDUx[] = { BASE_HW_ISSUE_9435, BASE_HW_ISSUE_TSIX_2033, BASE_HW_ISSUE_TTRX_1337, + BASE_HW_ISSUE_TTRX_3414, + BASE_HW_ISSUE_TTRX_3083, BASE_HW_ISSUE_END }; @@ -454,14 +567,14 @@ static const enum base_hw_issue base_hw_issues_model_tODx[] = { BASE_HW_ISSUE_END }; -static const enum base_hw_issue base_hw_issues_tIDx_r0p0[] = { +static const enum base_hw_issue base_hw_issues_tGRx_r0p0[] = { BASE_HW_ISSUE_9435, BASE_HW_ISSUE_TSIX_2033, BASE_HW_ISSUE_TTRX_1337, BASE_HW_ISSUE_END }; -static const enum base_hw_issue base_hw_issues_model_tIDx[] = { +static const enum base_hw_issue base_hw_issues_model_tGRx[] = { BASE_HW_ISSUE_5736, BASE_HW_ISSUE_9435, BASE_HW_ISSUE_TSIX_2033, @@ -484,4 +597,39 @@ static const enum base_hw_issue base_hw_issues_model_tVAx[] = { BASE_HW_ISSUE_END }; +static const enum base_hw_issue base_hw_issues_tTUx_r0p0[] = { + BASE_HW_ISSUE_9435, + BASE_HW_ISSUE_TSIX_2033, + BASE_HW_ISSUE_TTRX_1337, + BASE_HW_ISSUE_END +}; + +static const enum base_hw_issue base_hw_issues_model_tTUx[] = { + BASE_HW_ISSUE_5736, + BASE_HW_ISSUE_9435, + BASE_HW_ISSUE_TSIX_2033, + BASE_HW_ISSUE_TTRX_1337, + BASE_HW_ISSUE_END +}; + +static const enum base_hw_issue base_hw_issues_tE2x_r0p0[] = { + BASE_HW_ISSUE_9435, + BASE_HW_ISSUE_TSIX_2033, + BASE_HW_ISSUE_TTRX_1337, + BASE_HW_ISSUE_TTRX_921, + BASE_HW_ISSUE_TTRX_3414, + BASE_HW_ISSUE_TTRX_3083, + BASE_HW_ISSUE_END +}; + +static const enum base_hw_issue base_hw_issues_model_tE2x[] = { + BASE_HW_ISSUE_5736, + BASE_HW_ISSUE_9435, + BASE_HW_ISSUE_TSIX_2033, + BASE_HW_ISSUE_TTRX_1337, + BASE_HW_ISSUE_TTRX_3414, + BASE_HW_ISSUE_TTRX_3083, + BASE_HW_ISSUE_END +}; + #endif /* _BASE_HWCONFIG_ISSUES_H_ */ diff --git a/drivers/gpu/arm/bifrost/mali_base_kernel.h b/drivers/gpu/arm/bifrost/mali_base_kernel.h index bd42e5c63a87..1e2744d0fe8a 100644 --- a/drivers/gpu/arm/bifrost/mali_base_kernel.h +++ b/drivers/gpu/arm/bifrost/mali_base_kernel.h @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2010-2019 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010-2020 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -29,26 +29,15 @@ #ifndef _BASE_KERNEL_H_ #define _BASE_KERNEL_H_ -typedef struct base_mem_handle { +struct base_mem_handle { struct { u64 handle; } basep; -} base_mem_handle; +}; #include "mali_base_mem_priv.h" -#include "mali_midg_coherency.h" -#include "mali_kbase_gpu_id.h" - -/* - * Dependency stuff, keep it private for now. May want to expose it if - * we decide to make the number of semaphores a configurable - * option. - */ -#define BASE_JD_ATOM_COUNT 256 - -/* Set/reset values for a software event */ -#define BASE_JD_SOFT_EVENT_SET ((unsigned char)1) -#define BASE_JD_SOFT_EVENT_RESET ((unsigned char)0) +#include "gpu/mali_kbase_gpu_coherency.h" +#include "gpu/mali_kbase_gpu_id.h" #define BASE_GPU_NUM_TEXTURE_FEATURES_REGISTERS 4 @@ -76,16 +65,6 @@ typedef struct base_mem_handle { #endif #endif -/** - * @addtogroup base_user_api User-side Base APIs - * @{ - */ - -/** - * @addtogroup base_user_api_memory User-side Base Memory APIs - * @{ - */ - /* Physical memory group ID for normal usage. */ #define BASE_MEM_GROUP_DEFAULT (0) @@ -108,185 +87,6 @@ typedef struct base_mem_handle { */ typedef u32 base_mem_alloc_flags; -/* Memory allocation, access/hint flags. - * - * See base_mem_alloc_flags. - */ - -/* IN */ -/* Read access CPU side - */ -#define BASE_MEM_PROT_CPU_RD ((base_mem_alloc_flags)1 << 0) - -/* Write access CPU side - */ -#define BASE_MEM_PROT_CPU_WR ((base_mem_alloc_flags)1 << 1) - -/* Read access GPU side - */ -#define BASE_MEM_PROT_GPU_RD ((base_mem_alloc_flags)1 << 2) - -/* Write access GPU side - */ -#define BASE_MEM_PROT_GPU_WR ((base_mem_alloc_flags)1 << 3) - -/* Execute allowed on the GPU side - */ -#define BASE_MEM_PROT_GPU_EX ((base_mem_alloc_flags)1 << 4) - -/* Will be permanently mapped in kernel space. - * Flag is only allowed on allocations originating from kbase. - */ -#define BASEP_MEM_PERMANENT_KERNEL_MAPPING ((base_mem_alloc_flags)1 << 5) - -/* The allocation will completely reside within the same 4GB chunk in the GPU - * virtual space. - * Since this flag is primarily required only for the TLS memory which will - * not be used to contain executable code and also not used for Tiler heap, - * it can't be used along with BASE_MEM_PROT_GPU_EX and TILER_ALIGN_TOP flags. - */ -#define BASE_MEM_GPU_VA_SAME_4GB_PAGE ((base_mem_alloc_flags)1 << 6) - -/* Userspace is not allowed to free this memory. - * Flag is only allowed on allocations originating from kbase. - */ -#define BASEP_MEM_NO_USER_FREE ((base_mem_alloc_flags)1 << 7) - -#define BASE_MEM_RESERVED_BIT_8 ((base_mem_alloc_flags)1 << 8) - -/* Grow backing store on GPU Page Fault - */ -#define BASE_MEM_GROW_ON_GPF ((base_mem_alloc_flags)1 << 9) - -/* Page coherence Outer shareable, if available - */ -#define BASE_MEM_COHERENT_SYSTEM ((base_mem_alloc_flags)1 << 10) - -/* Page coherence Inner shareable - */ -#define BASE_MEM_COHERENT_LOCAL ((base_mem_alloc_flags)1 << 11) - -/* Should be cached on the CPU - */ -#define BASE_MEM_CACHED_CPU ((base_mem_alloc_flags)1 << 12) - -/* IN/OUT */ -/* Must have same VA on both the GPU and the CPU - */ -#define BASE_MEM_SAME_VA ((base_mem_alloc_flags)1 << 13) - -/* OUT */ -/* Must call mmap to acquire a GPU address for the alloc - */ -#define BASE_MEM_NEED_MMAP ((base_mem_alloc_flags)1 << 14) - -/* IN */ -/* Page coherence Outer shareable, required. - */ -#define BASE_MEM_COHERENT_SYSTEM_REQUIRED ((base_mem_alloc_flags)1 << 15) - -/* Protected memory - */ -#define BASE_MEM_PROTECTED ((base_mem_alloc_flags)1 << 16) - -/* Not needed physical memory - */ -#define BASE_MEM_DONT_NEED ((base_mem_alloc_flags)1 << 17) - -/* Must use shared CPU/GPU zone (SAME_VA zone) but doesn't require the - * addresses to be the same - */ -#define BASE_MEM_IMPORT_SHARED ((base_mem_alloc_flags)1 << 18) - -/** - * Bit 19 is reserved. - * - * Do not remove, use the next unreserved bit for new flags - */ -#define BASE_MEM_RESERVED_BIT_19 ((base_mem_alloc_flags)1 << 19) -#define BASE_MEM_MAYBE_RESERVED_BIT_19 BASE_MEM_RESERVED_BIT_19 - -/** - * Memory starting from the end of the initial commit is aligned to 'extent' - * pages, where 'extent' must be a power of 2 and no more than - * BASE_MEM_TILER_ALIGN_TOP_EXTENT_MAX_PAGES - */ -#define BASE_MEM_TILER_ALIGN_TOP ((base_mem_alloc_flags)1 << 20) - -/* Should be uncached on the GPU, will work only for GPUs using AARCH64 mmu mode. - * Some components within the GPU might only be able to access memory that is - * GPU cacheable. Refer to the specific GPU implementation for more details. - * The 3 shareability flags will be ignored for GPU uncached memory. - * If used while importing USER_BUFFER type memory, then the import will fail - * if the memory is not aligned to GPU and CPU cache line width. - */ -#define BASE_MEM_UNCACHED_GPU ((base_mem_alloc_flags)1 << 21) - -/* - * Bits [22:25] for group_id (0~15). - * - * base_mem_group_id_set() should be used to pack a memory group ID into a - * base_mem_alloc_flags value instead of accessing the bits directly. - * base_mem_group_id_get() should be used to extract the memory group ID from - * a base_mem_alloc_flags value. - */ -#define BASEP_MEM_GROUP_ID_SHIFT 22 -#define BASE_MEM_GROUP_ID_MASK \ - ((base_mem_alloc_flags)0xF << BASEP_MEM_GROUP_ID_SHIFT) - -/** - * Number of bits used as flags for base memory management - * - * Must be kept in sync with the base_mem_alloc_flags flags - */ -#define BASE_MEM_FLAGS_NR_BITS 26 - -/* A mask for all output bits, excluding IN/OUT bits. - */ -#define BASE_MEM_FLAGS_OUTPUT_MASK BASE_MEM_NEED_MMAP - -/* A mask for all input bits, including IN/OUT bits. - */ -#define BASE_MEM_FLAGS_INPUT_MASK \ - (((1 << BASE_MEM_FLAGS_NR_BITS) - 1) & ~BASE_MEM_FLAGS_OUTPUT_MASK) - -/** - * base_mem_group_id_get() - Get group ID from flags - * @flags: Flags to pass to base_mem_alloc - * - * This inline function extracts the encoded group ID from flags - * and converts it into numeric value (0~15). - * - * Return: group ID(0~15) extracted from the parameter - */ -static inline int base_mem_group_id_get(base_mem_alloc_flags flags) -{ - LOCAL_ASSERT((flags & ~BASE_MEM_FLAGS_INPUT_MASK) == 0); - return (int)((flags & BASE_MEM_GROUP_ID_MASK) >> - BASEP_MEM_GROUP_ID_SHIFT); -} - -/** - * base_mem_group_id_set() - Set group ID into base_mem_alloc_flags - * @id: group ID(0~15) you want to encode - * - * This inline function encodes specific group ID into base_mem_alloc_flags. - * Parameter 'id' should lie in-between 0 to 15. - * - * Return: base_mem_alloc_flags with the group ID (id) encoded - * - * The return value can be combined with other flags against base_mem_alloc - * to identify a specific memory group. - */ -static inline base_mem_alloc_flags base_mem_group_id_set(int id) -{ - LOCAL_ASSERT(id >= 0); - LOCAL_ASSERT(id < BASE_MEM_GROUP_COUNT); - - return ((base_mem_alloc_flags)id << BASEP_MEM_GROUP_ID_SHIFT) & - BASE_MEM_GROUP_ID_MASK; -} - /* A mask for all the flags which are modifiable via the base_mem_set_flags * interface. */ @@ -294,18 +94,6 @@ static inline base_mem_alloc_flags base_mem_group_id_set(int id) (BASE_MEM_DONT_NEED | BASE_MEM_COHERENT_SYSTEM | \ BASE_MEM_COHERENT_LOCAL) - -/* A mask of all currently reserved flags - */ -#define BASE_MEM_FLAGS_RESERVED \ - (BASE_MEM_RESERVED_BIT_8 | BASE_MEM_MAYBE_RESERVED_BIT_19) - -/* A mask of all the flags which are only valid for allocations within kbase, - * and may not be passed from user space. - */ -#define BASEP_MEM_FLAGS_KERNEL_ONLY \ - (BASEP_MEM_PERMANENT_KERNEL_MAPPING | BASEP_MEM_NO_USER_FREE) - /* A mask of all the flags that can be returned via the base_mem_get_flags() * interface. */ @@ -331,14 +119,14 @@ static inline base_mem_alloc_flags base_mem_group_id_set(int id) * as future releases from ARM might include other new types * which could clash with your custom types. */ -typedef enum base_mem_import_type { +enum base_mem_import_type { BASE_MEM_IMPORT_TYPE_INVALID = 0, /** * Import type with value 1 is deprecated. */ BASE_MEM_IMPORT_TYPE_UMM = 2, BASE_MEM_IMPORT_TYPE_USER_BUFFER = 3 -} base_mem_import_type; +}; /** * struct base_mem_import_user_buffer - Handle of an imported user buffer @@ -354,45 +142,12 @@ struct base_mem_import_user_buffer { u64 length; }; -/** - * @brief Invalid memory handle. - * - * Return value from functions returning @ref base_mem_handle on error. - * - * @warning @ref base_mem_handle_new_invalid must be used instead of this macro - * in C++ code or other situations where compound literals cannot be used. - */ -#define BASE_MEM_INVALID_HANDLE ((base_mem_handle) { {BASEP_MEM_INVALID_HANDLE} }) - -/** - * @brief Special write-alloc memory handle. - * - * A special handle is used to represent a region where a special page is mapped - * with a write-alloc cache setup, typically used when the write result of the - * GPU isn't needed, but the GPU must write anyway. - * - * @warning @ref base_mem_handle_new_write_alloc must be used instead of this macro - * in C++ code or other situations where compound literals cannot be used. - */ -#define BASE_MEM_WRITE_ALLOC_PAGES_HANDLE ((base_mem_handle) { {BASEP_MEM_WRITE_ALLOC_PAGES_HANDLE} }) - -#define BASEP_MEM_INVALID_HANDLE (0ull << 12) -#define BASE_MEM_MMU_DUMP_HANDLE (1ull << 12) -#define BASE_MEM_TRACE_BUFFER_HANDLE (2ull << 12) -#define BASE_MEM_MAP_TRACKING_HANDLE (3ull << 12) -#define BASEP_MEM_WRITE_ALLOC_PAGES_HANDLE (4ull << 12) -/* reserved handles ..-47< for future special handles */ -#define BASE_MEM_COOKIE_BASE (64ul << 12) -#define BASE_MEM_FIRST_FREE_ADDRESS ((BITS_PER_LONG << 12) + \ - BASE_MEM_COOKIE_BASE) - /* Mask to detect 4GB boundary alignment */ #define BASE_MEM_MASK_4GB 0xfffff000UL /* Mask to detect 4GB boundary (in page units) alignment */ #define BASE_MEM_PFN_MASK_4GB (BASE_MEM_MASK_4GB >> LOCAL_PAGE_SHIFT) -/** - * Limit on the 'extent' parameter for an allocation with the +/* Limit on the 'extent' parameter for an allocation with the * BASE_MEM_TILER_ALIGN_TOP flag set * * This is the same as the maximum limit for a Buffer Descriptor's chunk size @@ -408,82 +163,22 @@ struct base_mem_import_user_buffer { /* Maximum size allowed in a single KBASE_IOCTL_MEM_ALLOC call */ #define KBASE_MEM_ALLOC_MAX_SIZE ((8ull << 30) >> PAGE_SHIFT) /* 8 GB */ - /** - * @addtogroup base_user_api_memory_defered User-side Base Defered Memory Coherency APIs - * @{ - */ - -/** - * @brief a basic memory operation (sync-set). + * struct base_fence - Cross-device synchronisation fence. * - * The content of this structure is private, and should only be used - * by the accessors. + * A fence is used to signal when the GPU has finished accessing a resource that + * may be shared with other devices, and also to delay work done asynchronously + * by the GPU until other devices have finished accessing a shared resource. */ -typedef struct base_syncset { - struct basep_syncset basep_sset; -} base_syncset; - -/** @} end group base_user_api_memory_defered */ - -/** - * Handle to represent imported memory object. - * Simple opague handle to imported memory, can't be used - * with anything but base_external_resource_init to bind to an atom. - */ -typedef struct base_import_handle { - struct { - u64 handle; - } basep; -} base_import_handle; - -/** @} end group base_user_api_memory */ - -/** - * @addtogroup base_user_api_job_dispatch User-side Base Job Dispatcher APIs - * @{ - */ - -typedef int platform_fence_type; -#define INVALID_PLATFORM_FENCE ((platform_fence_type)-1) - -/** - * Base stream handle. - * - * References an underlying base stream object. - */ -typedef struct base_stream { - struct { - int fd; - } basep; -} base_stream; - -/** - * Base fence handle. - * - * References an underlying base fence object. - */ -typedef struct base_fence { +struct base_fence { struct { int fd; int stream_fd; } basep; -} base_fence; +}; /** - * @brief Per-job data - * - * This structure is used to store per-job data, and is completely unused - * by the Base driver. It can be used to store things such as callback - * function pointer, data to handle job completion. It is guaranteed to be - * untouched by the Base driver. - */ -typedef struct base_jd_udata { - u64 blob[2]; /**< per-job data array */ -} base_jd_udata; - -/** - * @brief Memory aliasing info + * struct base_mem_aliasing_info - Memory aliasing info * * Describes a memory handle to be aliased. * A subset of the handle can be chosen for aliasing, given an offset and a @@ -497,25 +192,98 @@ typedef struct base_jd_udata { * Offset must be within the size of the handle. * Offset+length must not overrun the size of the handle. * - * @handle Handle to alias, can be BASE_MEM_WRITE_ALLOC_PAGES_HANDLE - * @offset Offset within the handle to start aliasing from, in pages. - * Not used with BASE_MEM_WRITE_ALLOC_PAGES_HANDLE. - * @length Length to alias, in pages. For BASE_MEM_WRITE_ALLOC_PAGES_HANDLE - * specifies the number of times the special page is needed. + * @handle: Handle to alias, can be BASE_MEM_WRITE_ALLOC_PAGES_HANDLE + * @offset: Offset within the handle to start aliasing from, in pages. + * Not used with BASE_MEM_WRITE_ALLOC_PAGES_HANDLE. + * @length: Length to alias, in pages. For BASE_MEM_WRITE_ALLOC_PAGES_HANDLE + * specifies the number of times the special page is needed. */ struct base_mem_aliasing_info { - base_mem_handle handle; + struct base_mem_handle handle; u64 offset; u64 length; }; -/** - * Similar to BASE_MEM_TILER_ALIGN_TOP, memory starting from the end of the +/* Maximum percentage of just-in-time memory allocation trimming to perform + * on free. + */ +#define BASE_JIT_MAX_TRIM_LEVEL (100) + +/* Maximum number of concurrent just-in-time memory allocations. + */ +#define BASE_JIT_ALLOC_COUNT (255) + +/* Similar to BASE_MEM_TILER_ALIGN_TOP, memory starting from the end of the * initial commit is aligned to 'extent' pages, where 'extent' must be a power * of 2 and no more than BASE_MEM_TILER_ALIGN_TOP_EXTENT_MAX_PAGES */ #define BASE_JIT_ALLOC_MEM_TILER_ALIGN_TOP (1 << 0) +/** + * If set, the heap info address points to a u32 holding the used size in bytes; + * otherwise it points to a u64 holding the lowest address of unused memory. + */ +#define BASE_JIT_ALLOC_HEAP_INFO_IS_SIZE (1 << 1) + +/** + * Valid set of just-in-time memory allocation flags + * + * Note: BASE_JIT_ALLOC_HEAP_INFO_IS_SIZE cannot be set if heap_info_gpu_addr + * in %base_jit_alloc_info is 0 (atom with BASE_JIT_ALLOC_HEAP_INFO_IS_SIZE set + * and heap_info_gpu_addr being 0 will be rejected). + */ +#define BASE_JIT_ALLOC_VALID_FLAGS \ + (BASE_JIT_ALLOC_MEM_TILER_ALIGN_TOP | BASE_JIT_ALLOC_HEAP_INFO_IS_SIZE) + +/* base_jit_alloc_info in use for kernel driver versions 10.2 to early 11.5 + * + * jit_version is 1 + * + * Due to the lack of padding specified, user clients between 32 and 64-bit + * may have assumed a different size of the struct + * + * An array of structures was not supported + */ +struct base_jit_alloc_info_10_2 { + u64 gpu_alloc_addr; + u64 va_pages; + u64 commit_pages; + u64 extent; + u8 id; +}; + +/* base_jit_alloc_info introduced by kernel driver version 11.5, and in use up + * to 11.19 + * + * This structure had a number of modifications during and after kernel driver + * version 11.5, but remains size-compatible throughout its version history, and + * with earlier variants compatible with future variants by requiring + * zero-initialization to the unused space in the structure. + * + * jit_version is 2 + * + * Kernel driver version history: + * 11.5: Initial introduction with 'usage_id' and padding[5]. All padding bytes + * must be zero. Kbase minor version was not incremented, so some + * versions of 11.5 do not have this change. + * 11.5: Added 'bin_id' and 'max_allocations', replacing 2 padding bytes (Kbase + * minor version not incremented) + * 11.6: Added 'flags', replacing 1 padding byte + * 11.10: Arrays of this structure are supported + */ +struct base_jit_alloc_info_11_5 { + u64 gpu_alloc_addr; + u64 va_pages; + u64 commit_pages; + u64 extent; + u8 id; + u8 bin_id; + u8 max_allocations; + u8 flags; + u8 padding[2]; + u16 usage_id; +}; + /** * struct base_jit_alloc_info - Structure which describes a JIT allocation * request. @@ -534,13 +302,29 @@ struct base_mem_aliasing_info { * type of JIT allocation. * @max_allocations: The maximum number of allocations allowed within * the bin specified by @bin_id. Should be the same - * for all JIT allocations within the same bin. + * for all allocations within the same bin. * @flags: flags specifying the special requirements for - * the JIT allocation. + * the JIT allocation, see + * %BASE_JIT_ALLOC_VALID_FLAGS * @padding: Expansion space - should be initialised to zero * @usage_id: A hint about which allocation should be reused. * The kernel should attempt to use a previous * allocation with the same usage_id + * @heap_info_gpu_addr: Pointer to an object in GPU memory describing + * the actual usage of the region. + * + * jit_version is 3. + * + * When modifications are made to this structure, it is still compatible with + * jit_version 3 when: a) the size is unchanged, and b) new members only + * replace the padding bytes. + * + * Previous jit_version history: + * jit_version == 1, refer to &base_jit_alloc_info_10_2 + * jit_version == 2, refer to &base_jit_alloc_info_11_5 + * + * Kbase version history: + * 11.20: added @heap_info_gpu_addr */ struct base_jit_alloc_info { u64 gpu_alloc_addr; @@ -553,379 +337,17 @@ struct base_jit_alloc_info { u8 flags; u8 padding[2]; u16 usage_id; + u64 heap_info_gpu_addr; }; -/** - * @brief Job dependency type. - * - * A flags field will be inserted into the atom structure to specify whether a dependency is a data or - * ordering dependency (by putting it before/after 'core_req' in the structure it should be possible to add without - * changing the structure size). - * When the flag is set for a particular dependency to signal that it is an ordering only dependency then - * errors will not be propagated. - */ -typedef u8 base_jd_dep_type; - - -#define BASE_JD_DEP_TYPE_INVALID (0) /**< Invalid dependency */ -#define BASE_JD_DEP_TYPE_DATA (1U << 0) /**< Data dependency */ -#define BASE_JD_DEP_TYPE_ORDER (1U << 1) /**< Order dependency */ - -/** - * @brief Job chain hardware requirements. - * - * A job chain must specify what GPU features it needs to allow the - * driver to schedule the job correctly. By not specifying the - * correct settings can/will cause an early job termination. Multiple - * values can be ORed together to specify multiple requirements. - * Special case is ::BASE_JD_REQ_DEP, which is used to express complex - * dependencies, and that doesn't execute anything on the hardware. - */ -typedef u32 base_jd_core_req; - -/* Requirements that come from the HW */ - -/** - * No requirement, dependency only - */ -#define BASE_JD_REQ_DEP ((base_jd_core_req)0) - -/** - * Requires fragment shaders - */ -#define BASE_JD_REQ_FS ((base_jd_core_req)1 << 0) - -/** - * Requires compute shaders - * This covers any of the following Midgard Job types: - * - Vertex Shader Job - * - Geometry Shader Job - * - An actual Compute Shader Job - * - * Compare this with @ref BASE_JD_REQ_ONLY_COMPUTE, which specifies that the - * job is specifically just the "Compute Shader" job type, and not the "Vertex - * Shader" nor the "Geometry Shader" job type. - */ -#define BASE_JD_REQ_CS ((base_jd_core_req)1 << 1) -#define BASE_JD_REQ_T ((base_jd_core_req)1 << 2) /**< Requires tiling */ -#define BASE_JD_REQ_CF ((base_jd_core_req)1 << 3) /**< Requires cache flushes */ -#define BASE_JD_REQ_V ((base_jd_core_req)1 << 4) /**< Requires value writeback */ - -/* SW-only requirements - the HW does not expose these as part of the job slot capabilities */ - -/* Requires fragment job with AFBC encoding */ -#define BASE_JD_REQ_FS_AFBC ((base_jd_core_req)1 << 13) - -/** - * SW-only requirement: coalesce completion events. - * If this bit is set then completion of this atom will not cause an event to - * be sent to userspace, whether successful or not; completion events will be - * deferred until an atom completes which does not have this bit set. - * - * This bit may not be used in combination with BASE_JD_REQ_EXTERNAL_RESOURCES. - */ -#define BASE_JD_REQ_EVENT_COALESCE ((base_jd_core_req)1 << 5) - -/** - * SW Only requirement: the job chain requires a coherent core group. We don't - * mind which coherent core group is used. - */ -#define BASE_JD_REQ_COHERENT_GROUP ((base_jd_core_req)1 << 6) - -/** - * SW Only requirement: The performance counters should be enabled only when - * they are needed, to reduce power consumption. - */ - -#define BASE_JD_REQ_PERMON ((base_jd_core_req)1 << 7) - -/** - * SW Only requirement: External resources are referenced by this atom. - * When external resources are referenced no syncsets can be bundled with the atom - * but should instead be part of a NULL jobs inserted into the dependency tree. - * The first pre_dep object must be configured for the external resouces to use, - * the second pre_dep object can be used to create other dependencies. - * - * This bit may not be used in combination with BASE_JD_REQ_EVENT_COALESCE and - * BASE_JD_REQ_SOFT_EVENT_WAIT. - */ -#define BASE_JD_REQ_EXTERNAL_RESOURCES ((base_jd_core_req)1 << 8) - -/** - * SW Only requirement: Software defined job. Jobs with this bit set will not be submitted - * to the hardware but will cause some action to happen within the driver - */ -#define BASE_JD_REQ_SOFT_JOB ((base_jd_core_req)1 << 9) - -#define BASE_JD_REQ_SOFT_DUMP_CPU_GPU_TIME (BASE_JD_REQ_SOFT_JOB | 0x1) -#define BASE_JD_REQ_SOFT_FENCE_TRIGGER (BASE_JD_REQ_SOFT_JOB | 0x2) -#define BASE_JD_REQ_SOFT_FENCE_WAIT (BASE_JD_REQ_SOFT_JOB | 0x3) - -/* 0x4 RESERVED for now */ - -/** - * SW only requirement: event wait/trigger job. - * - * - BASE_JD_REQ_SOFT_EVENT_WAIT: this job will block until the event is set. - * - BASE_JD_REQ_SOFT_EVENT_SET: this job sets the event, thus unblocks the - * other waiting jobs. It completes immediately. - * - BASE_JD_REQ_SOFT_EVENT_RESET: this job resets the event, making it - * possible for other jobs to wait upon. It completes immediately. - */ -#define BASE_JD_REQ_SOFT_EVENT_WAIT (BASE_JD_REQ_SOFT_JOB | 0x5) -#define BASE_JD_REQ_SOFT_EVENT_SET (BASE_JD_REQ_SOFT_JOB | 0x6) -#define BASE_JD_REQ_SOFT_EVENT_RESET (BASE_JD_REQ_SOFT_JOB | 0x7) - -#define BASE_JD_REQ_SOFT_DEBUG_COPY (BASE_JD_REQ_SOFT_JOB | 0x8) - -/** - * SW only requirement: Just In Time allocation - * - * This job requests a single or multiple JIT allocations through a list - * of @base_jit_alloc_info structure which is passed via the jc element of - * the atom. The number of @base_jit_alloc_info structures present in the - * list is passed via the nr_extres element of the atom - * - * It should be noted that the id entry in @base_jit_alloc_info must not - * be reused until it has been released via @BASE_JD_REQ_SOFT_JIT_FREE. - * - * Should this soft job fail it is expected that a @BASE_JD_REQ_SOFT_JIT_FREE - * soft job to free the JIT allocation is still made. - * - * The job will complete immediately. - */ -#define BASE_JD_REQ_SOFT_JIT_ALLOC (BASE_JD_REQ_SOFT_JOB | 0x9) -/** - * SW only requirement: Just In Time free - * - * This job requests a single or multiple JIT allocations created by - * @BASE_JD_REQ_SOFT_JIT_ALLOC to be freed. The ID list of the JIT - * allocations is passed via the jc element of the atom. - * - * The job will complete immediately. - */ -#define BASE_JD_REQ_SOFT_JIT_FREE (BASE_JD_REQ_SOFT_JOB | 0xa) - -/** - * SW only requirement: Map external resource - * - * This job requests external resource(s) are mapped once the dependencies - * of the job have been satisfied. The list of external resources are - * passed via the jc element of the atom which is a pointer to a - * @base_external_resource_list. - */ -#define BASE_JD_REQ_SOFT_EXT_RES_MAP (BASE_JD_REQ_SOFT_JOB | 0xb) -/** - * SW only requirement: Unmap external resource - * - * This job requests external resource(s) are unmapped once the dependencies - * of the job has been satisfied. The list of external resources are - * passed via the jc element of the atom which is a pointer to a - * @base_external_resource_list. - */ -#define BASE_JD_REQ_SOFT_EXT_RES_UNMAP (BASE_JD_REQ_SOFT_JOB | 0xc) - -/** - * HW Requirement: Requires Compute shaders (but not Vertex or Geometry Shaders) - * - * This indicates that the Job Chain contains Midgard Jobs of the 'Compute Shaders' type. - * - * In contrast to @ref BASE_JD_REQ_CS, this does \b not indicate that the Job - * Chain contains 'Geometry Shader' or 'Vertex Shader' jobs. - */ -#define BASE_JD_REQ_ONLY_COMPUTE ((base_jd_core_req)1 << 10) - -/** - * HW Requirement: Use the base_jd_atom::device_nr field to specify a - * particular core group - * - * If both @ref BASE_JD_REQ_COHERENT_GROUP and this flag are set, this flag takes priority - * - * This is only guaranteed to work for @ref BASE_JD_REQ_ONLY_COMPUTE atoms. - * - * If the core availability policy is keeping the required core group turned off, then - * the job will fail with a @ref BASE_JD_EVENT_PM_EVENT error code. - */ -#define BASE_JD_REQ_SPECIFIC_COHERENT_GROUP ((base_jd_core_req)1 << 11) - -/** - * SW Flag: If this bit is set then the successful completion of this atom - * will not cause an event to be sent to userspace - */ -#define BASE_JD_REQ_EVENT_ONLY_ON_FAILURE ((base_jd_core_req)1 << 12) - -/** - * SW Flag: If this bit is set then completion of this atom will not cause an - * event to be sent to userspace, whether successful or not. - */ -#define BASEP_JD_REQ_EVENT_NEVER ((base_jd_core_req)1 << 14) - -/** - * SW Flag: Skip GPU cache clean and invalidation before starting a GPU job. - * - * If this bit is set then the GPU's cache will not be cleaned and invalidated - * until a GPU job starts which does not have this bit set or a job completes - * which does not have the @ref BASE_JD_REQ_SKIP_CACHE_END bit set. Do not use if - * the CPU may have written to memory addressed by the job since the last job - * without this bit set was submitted. - */ -#define BASE_JD_REQ_SKIP_CACHE_START ((base_jd_core_req)1 << 15) - -/** - * SW Flag: Skip GPU cache clean and invalidation after a GPU job completes. - * - * If this bit is set then the GPU's cache will not be cleaned and invalidated - * until a GPU job completes which does not have this bit set or a job starts - * which does not have the @ref BASE_JD_REQ_SKIP_CACHE_START bti set. Do not use if - * the CPU may read from or partially overwrite memory addressed by the job - * before the next job without this bit set completes. - */ -#define BASE_JD_REQ_SKIP_CACHE_END ((base_jd_core_req)1 << 16) - -/** - * Request the atom be executed on a specific job slot. - * - * When this flag is specified, it takes precedence over any existing job slot - * selection logic. - */ -#define BASE_JD_REQ_JOB_SLOT ((base_jd_core_req)1 << 17) - -/** - * These requirement bits are currently unused in base_jd_core_req - */ -#define BASEP_JD_REQ_RESERVED \ - (~(BASE_JD_REQ_ATOM_TYPE | BASE_JD_REQ_EXTERNAL_RESOURCES | \ - BASE_JD_REQ_EVENT_ONLY_ON_FAILURE | BASEP_JD_REQ_EVENT_NEVER | \ - BASE_JD_REQ_EVENT_COALESCE | \ - BASE_JD_REQ_COHERENT_GROUP | BASE_JD_REQ_SPECIFIC_COHERENT_GROUP | \ - BASE_JD_REQ_FS_AFBC | BASE_JD_REQ_PERMON | \ - BASE_JD_REQ_SKIP_CACHE_START | BASE_JD_REQ_SKIP_CACHE_END | \ - BASE_JD_REQ_JOB_SLOT)) - -/** - * Mask of all bits in base_jd_core_req that control the type of the atom. - * - * This allows dependency only atoms to have flags set - */ -#define BASE_JD_REQ_ATOM_TYPE \ - (BASE_JD_REQ_FS | BASE_JD_REQ_CS | BASE_JD_REQ_T | BASE_JD_REQ_CF | \ - BASE_JD_REQ_V | BASE_JD_REQ_SOFT_JOB | BASE_JD_REQ_ONLY_COMPUTE) - -/** - * Mask of all bits in base_jd_core_req that control the type of a soft job. - */ -#define BASE_JD_REQ_SOFT_JOB_TYPE (BASE_JD_REQ_SOFT_JOB | 0x1f) - -/* - * Returns non-zero value if core requirements passed define a soft job or - * a dependency only job. - */ -#define BASE_JD_REQ_SOFT_JOB_OR_DEP(core_req) \ - ((core_req & BASE_JD_REQ_SOFT_JOB) || \ - (core_req & BASE_JD_REQ_ATOM_TYPE) == BASE_JD_REQ_DEP) - -/* - * Base Atom priority - * - * Only certain priority levels are actually implemented, as specified by the - * BASE_JD_PRIO_<...> definitions below. It is undefined to use a priority - * level that is not one of those defined below. - * - * Priority levels only affect scheduling after the atoms have had dependencies - * resolved. For example, a low priority atom that has had its dependencies - * resolved might run before a higher priority atom that has not had its - * dependencies resolved. - * - * In general, fragment atoms do not affect non-fragment atoms with - * lower priorities, and vice versa. One exception is that there is only one - * priority value for each context. So a high-priority (e.g.) fragment atom - * could increase its context priority, causing its non-fragment atoms to also - * be scheduled sooner. - * - * The atoms are scheduled as follows with respect to their priorities: - * - Let atoms 'X' and 'Y' be for the same job slot who have dependencies - * resolved, and atom 'X' has a higher priority than atom 'Y' - * - If atom 'Y' is currently running on the HW, then it is interrupted to - * allow atom 'X' to run soon after - * - If instead neither atom 'Y' nor atom 'X' are running, then when choosing - * the next atom to run, atom 'X' will always be chosen instead of atom 'Y' - * - Any two atoms that have the same priority could run in any order with - * respect to each other. That is, there is no ordering constraint between - * atoms of the same priority. - * - * The sysfs file 'js_ctx_scheduling_mode' is used to control how atoms are - * scheduled between contexts. The default value, 0, will cause higher-priority - * atoms to be scheduled first, regardless of their context. The value 1 will - * use a round-robin algorithm when deciding which context's atoms to schedule - * next, so higher-priority atoms can only preempt lower priority atoms within - * the same context. See KBASE_JS_SYSTEM_PRIORITY_MODE and - * KBASE_JS_PROCESS_LOCAL_PRIORITY_MODE for more details. - */ -typedef u8 base_jd_prio; - -/* Medium atom priority. This is a priority higher than BASE_JD_PRIO_LOW */ -#define BASE_JD_PRIO_MEDIUM ((base_jd_prio)0) -/* High atom priority. This is a priority higher than BASE_JD_PRIO_MEDIUM and - * BASE_JD_PRIO_LOW */ -#define BASE_JD_PRIO_HIGH ((base_jd_prio)1) -/* Low atom priority. */ -#define BASE_JD_PRIO_LOW ((base_jd_prio)2) - -/* Count of the number of priority levels. This itself is not a valid - * base_jd_prio setting */ -#define BASE_JD_NR_PRIO_LEVELS 3 - -enum kbase_jd_atom_state { - /** Atom is not used */ - KBASE_JD_ATOM_STATE_UNUSED, - /** Atom is queued in JD */ - KBASE_JD_ATOM_STATE_QUEUED, - /** Atom has been given to JS (is runnable/running) */ - KBASE_JD_ATOM_STATE_IN_JS, - /** Atom has been completed, but not yet handed back to job dispatcher - * for dependency resolution */ - KBASE_JD_ATOM_STATE_HW_COMPLETED, - /** Atom has been completed, but not yet handed back to userspace */ - KBASE_JD_ATOM_STATE_COMPLETED -}; - -typedef u8 base_atom_id; /**< Type big enough to store an atom number in */ - -struct base_dependency { - base_atom_id atom_id; /**< An atom number */ - base_jd_dep_type dependency_type; /**< Dependency type */ -}; - -/* This structure has changed since UK 10.2 for which base_jd_core_req was a u16 value. - * In order to keep the size of the structure same, padding field has been adjusted - * accordingly and core_req field of a u32 type (to which UK 10.3 base_jd_core_req defines) - * is added at the end of the structure. Place in the structure previously occupied by u16 core_req - * is kept but renamed to compat_core_req and as such it can be used in ioctl call for job submission - * as long as UK 10.2 legacy is supported. Once when this support ends, this field can be left - * for possible future use. */ -typedef struct base_jd_atom_v2 { - u64 jc; /**< job-chain GPU address */ - struct base_jd_udata udata; /**< user data */ - u64 extres_list; /**< list of external resources */ - u16 nr_extres; /**< nr of external resources or JIT allocations */ - u16 compat_core_req; /**< core requirements which correspond to the legacy support for UK 10.2 */ - struct base_dependency pre_dep[2]; /**< pre-dependencies, one need to use SETTER function to assign this field, - this is done in order to reduce possibility of improper assigment of a dependency field */ - base_atom_id atom_number; /**< unique number to identify the atom */ - base_jd_prio prio; /**< Atom priority. Refer to @ref base_jd_prio for more details */ - u8 device_nr; /**< coregroup when BASE_JD_REQ_SPECIFIC_COHERENT_GROUP specified */ - u8 jobslot; /**< Job slot to use when BASE_JD_REQ_JOB_SLOT is specified */ - base_jd_core_req core_req; /**< core requirements */ -} base_jd_atom_v2; - -typedef enum base_external_resource_access { +enum base_external_resource_access { BASE_EXT_RES_ACCESS_SHARED, BASE_EXT_RES_ACCESS_EXCLUSIVE -} base_external_resource_access; +}; -typedef struct base_external_resource { +struct base_external_resource { u64 ext_resource; -} base_external_resource; +}; /** @@ -952,351 +374,32 @@ struct base_jd_debug_copy_buffer { struct base_external_resource extres; }; -/** - * @brief Setter for a dependency structure - * - * @param[in] dep The kbase jd atom dependency to be initialized. - * @param id The atom_id to be assigned. - * @param dep_type The dep_type to be assigned. - * - */ -static inline void base_jd_atom_dep_set(struct base_dependency *dep, - base_atom_id id, base_jd_dep_type dep_type) -{ - LOCAL_ASSERT(dep != NULL); - - /* - * make sure we don't set not allowed combinations - * of atom_id/dependency_type. - */ - LOCAL_ASSERT((id == 0 && dep_type == BASE_JD_DEP_TYPE_INVALID) || - (id > 0 && dep_type != BASE_JD_DEP_TYPE_INVALID)); - - dep->atom_id = id; - dep->dependency_type = dep_type; -} - -/** - * @brief Make a copy of a dependency structure - * - * @param[in,out] dep The kbase jd atom dependency to be written. - * @param[in] from The dependency to make a copy from. - * - */ -static inline void base_jd_atom_dep_copy(struct base_dependency *dep, - const struct base_dependency *from) -{ - LOCAL_ASSERT(dep != NULL); - - base_jd_atom_dep_set(dep, from->atom_id, from->dependency_type); -} - -/** - * @brief Soft-atom fence trigger setup. - * - * Sets up an atom to be a SW-only atom signaling a fence - * when it reaches the run state. - * - * Using the existing base dependency system the fence can - * be set to trigger when a GPU job has finished. - * - * The base fence object must not be terminated until the atom - * has been submitted to @ref base_jd_submit and @ref base_jd_submit - * has returned. - * - * @a fence must be a valid fence set up with @a base_fence_init. - * Calling this function with a uninitialized fence results in undefined behavior. - * - * @param[out] atom A pre-allocated atom to configure as a fence trigger SW atom - * @param[in] fence The base fence object to trigger. - * - * @pre @p fence must reference a @ref base_fence successfully initialized by - * calling @ref base_fence_init. - * @pre @p fence was @e not initialized by calling @ref base_fence_import, nor - * is it associated with a fence-trigger job that was already submitted - * by calling @ref base_jd_submit. - * @post @p atom can be submitted by calling @ref base_jd_submit. - */ -static inline void base_jd_fence_trigger_setup_v2(struct base_jd_atom_v2 *atom, struct base_fence *fence) -{ - LOCAL_ASSERT(atom); - LOCAL_ASSERT(fence); - LOCAL_ASSERT(fence->basep.fd == INVALID_PLATFORM_FENCE); - LOCAL_ASSERT(fence->basep.stream_fd >= 0); - atom->jc = (uintptr_t) fence; - atom->core_req = BASE_JD_REQ_SOFT_FENCE_TRIGGER; -} - -/** - * @brief Soft-atom fence wait setup. - * - * Sets up an atom to be a SW-only atom waiting on a fence. - * When the fence becomes triggered the atom becomes runnable - * and completes immediately. - * - * Using the existing base dependency system the fence can - * be set to block a GPU job until it has been triggered. - * - * The base fence object must not be terminated until the atom - * has been submitted to @ref base_jd_submit and - * @ref base_jd_submit has returned. - * - * @param[out] atom A pre-allocated atom to configure as a fence wait SW atom - * @param[in] fence The base fence object to wait on - * - * @pre @p fence must reference a @ref base_fence successfully initialized by - * calling @ref base_fence_import, or it must be associated with a - * fence-trigger job that was already submitted by calling - * @ref base_jd_submit. - * @post @p atom can be submitted by calling @ref base_jd_submit. - */ -static inline void base_jd_fence_wait_setup_v2(struct base_jd_atom_v2 *atom, struct base_fence *fence) -{ - LOCAL_ASSERT(atom); - LOCAL_ASSERT(fence); - LOCAL_ASSERT(fence->basep.fd >= 0); - atom->jc = (uintptr_t) fence; - atom->core_req = BASE_JD_REQ_SOFT_FENCE_WAIT; -} - -/** - * @brief External resource info initialization. - * - * Sets up an external resource object to reference - * a memory allocation and the type of access requested. - * - * @param[in] res The resource object to initialize - * @param handle The handle to the imported memory object, must be - * obtained by calling @ref base_mem_as_import_handle(). - * @param access The type of access requested - */ -static inline void base_external_resource_init(struct base_external_resource *res, struct base_import_handle handle, base_external_resource_access access) -{ - u64 address; - - address = handle.basep.handle; - - LOCAL_ASSERT(res != NULL); - LOCAL_ASSERT(0 == (address & LOCAL_PAGE_LSB)); - LOCAL_ASSERT(access == BASE_EXT_RES_ACCESS_SHARED || access == BASE_EXT_RES_ACCESS_EXCLUSIVE); - - res->ext_resource = address | (access & LOCAL_PAGE_LSB); -} - -/** - * @brief Job chain event code bits - * Defines the bits used to create ::base_jd_event_code - */ -enum { - BASE_JD_SW_EVENT_KERNEL = (1u << 15), /**< Kernel side event */ - BASE_JD_SW_EVENT = (1u << 14), /**< SW defined event */ - BASE_JD_SW_EVENT_SUCCESS = (1u << 13), /**< Event idicates success (SW events only) */ - BASE_JD_SW_EVENT_JOB = (0u << 11), /**< Job related event */ - BASE_JD_SW_EVENT_BAG = (1u << 11), /**< Bag related event */ - BASE_JD_SW_EVENT_INFO = (2u << 11), /**< Misc/info event */ - BASE_JD_SW_EVENT_RESERVED = (3u << 11), /**< Reserved event type */ - BASE_JD_SW_EVENT_TYPE_MASK = (3u << 11) /**< Mask to extract the type from an event code */ -}; - -/** - * @brief Job chain event codes - * - * HW and low-level SW events are represented by event codes. - * The status of jobs which succeeded are also represented by - * an event code (see ::BASE_JD_EVENT_DONE). - * Events are usually reported as part of a ::base_jd_event. - * - * The event codes are encoded in the following way: - * @li 10:0 - subtype - * @li 12:11 - type - * @li 13 - SW success (only valid if the SW bit is set) - * @li 14 - SW event (HW event if not set) - * @li 15 - Kernel event (should never be seen in userspace) - * - * Events are split up into ranges as follows: - * - BASE_JD_EVENT_RANGE_\_START - * - BASE_JD_EVENT_RANGE_\_END - * - * \a code is in \'s range when: - * - BASE_JD_EVENT_RANGE_\_START <= code < BASE_JD_EVENT_RANGE_\_END - * - * Ranges can be asserted for adjacency by testing that the END of the previous - * is equal to the START of the next. This is useful for optimizing some tests - * for range. - * - * A limitation is that the last member of this enum must explicitly be handled - * (with an assert-unreachable statement) in switch statements that use - * variables of this type. Otherwise, the compiler warns that we have not - * handled that enum value. - */ -typedef enum base_jd_event_code { - /* HW defined exceptions */ - - /** Start of HW Non-fault status codes - * - * @note Obscurely, BASE_JD_EVENT_TERMINATED indicates a real fault, - * because the job was hard-stopped - */ - BASE_JD_EVENT_RANGE_HW_NONFAULT_START = 0, - - /* non-fatal exceptions */ - BASE_JD_EVENT_NOT_STARTED = 0x00, /**< Can't be seen by userspace, treated as 'previous job done' */ - BASE_JD_EVENT_DONE = 0x01, - BASE_JD_EVENT_STOPPED = 0x03, /**< Can't be seen by userspace, becomes TERMINATED, DONE or JOB_CANCELLED */ - BASE_JD_EVENT_TERMINATED = 0x04, /**< This is actually a fault status code - the job was hard stopped */ - BASE_JD_EVENT_ACTIVE = 0x08, /**< Can't be seen by userspace, jobs only returned on complete/fail/cancel */ - - /** End of HW Non-fault status codes - * - * @note Obscurely, BASE_JD_EVENT_TERMINATED indicates a real fault, - * because the job was hard-stopped - */ - BASE_JD_EVENT_RANGE_HW_NONFAULT_END = 0x40, - - /** Start of HW fault and SW Error status codes */ - BASE_JD_EVENT_RANGE_HW_FAULT_OR_SW_ERROR_START = 0x40, - - /* job exceptions */ - BASE_JD_EVENT_JOB_CONFIG_FAULT = 0x40, - BASE_JD_EVENT_JOB_POWER_FAULT = 0x41, - BASE_JD_EVENT_JOB_READ_FAULT = 0x42, - BASE_JD_EVENT_JOB_WRITE_FAULT = 0x43, - BASE_JD_EVENT_JOB_AFFINITY_FAULT = 0x44, - BASE_JD_EVENT_JOB_BUS_FAULT = 0x48, - BASE_JD_EVENT_INSTR_INVALID_PC = 0x50, - BASE_JD_EVENT_INSTR_INVALID_ENC = 0x51, - BASE_JD_EVENT_INSTR_TYPE_MISMATCH = 0x52, - BASE_JD_EVENT_INSTR_OPERAND_FAULT = 0x53, - BASE_JD_EVENT_INSTR_TLS_FAULT = 0x54, - BASE_JD_EVENT_INSTR_BARRIER_FAULT = 0x55, - BASE_JD_EVENT_INSTR_ALIGN_FAULT = 0x56, - BASE_JD_EVENT_DATA_INVALID_FAULT = 0x58, - BASE_JD_EVENT_TILE_RANGE_FAULT = 0x59, - BASE_JD_EVENT_STATE_FAULT = 0x5A, - BASE_JD_EVENT_OUT_OF_MEMORY = 0x60, - BASE_JD_EVENT_UNKNOWN = 0x7F, - - /* GPU exceptions */ - BASE_JD_EVENT_DELAYED_BUS_FAULT = 0x80, - BASE_JD_EVENT_SHAREABILITY_FAULT = 0x88, - - /* MMU exceptions */ - BASE_JD_EVENT_TRANSLATION_FAULT_LEVEL1 = 0xC1, - BASE_JD_EVENT_TRANSLATION_FAULT_LEVEL2 = 0xC2, - BASE_JD_EVENT_TRANSLATION_FAULT_LEVEL3 = 0xC3, - BASE_JD_EVENT_TRANSLATION_FAULT_LEVEL4 = 0xC4, - BASE_JD_EVENT_PERMISSION_FAULT = 0xC8, - BASE_JD_EVENT_TRANSTAB_BUS_FAULT_LEVEL1 = 0xD1, - BASE_JD_EVENT_TRANSTAB_BUS_FAULT_LEVEL2 = 0xD2, - BASE_JD_EVENT_TRANSTAB_BUS_FAULT_LEVEL3 = 0xD3, - BASE_JD_EVENT_TRANSTAB_BUS_FAULT_LEVEL4 = 0xD4, - BASE_JD_EVENT_ACCESS_FLAG = 0xD8, - - /* SW defined exceptions */ - BASE_JD_EVENT_MEM_GROWTH_FAILED = BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_JOB | 0x000, - BASE_JD_EVENT_TIMED_OUT = BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_JOB | 0x001, - BASE_JD_EVENT_JOB_CANCELLED = BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_JOB | 0x002, - BASE_JD_EVENT_JOB_INVALID = BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_JOB | 0x003, - BASE_JD_EVENT_PM_EVENT = BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_JOB | 0x004, - - BASE_JD_EVENT_BAG_INVALID = BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_BAG | 0x003, - - /** End of HW fault and SW Error status codes */ - BASE_JD_EVENT_RANGE_HW_FAULT_OR_SW_ERROR_END = BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_RESERVED | 0x3FF, - - /** Start of SW Success status codes */ - BASE_JD_EVENT_RANGE_SW_SUCCESS_START = BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_SUCCESS | 0x000, - - BASE_JD_EVENT_PROGRESS_REPORT = BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_SUCCESS | BASE_JD_SW_EVENT_JOB | 0x000, - BASE_JD_EVENT_BAG_DONE = BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_SUCCESS | BASE_JD_SW_EVENT_BAG | 0x000, - BASE_JD_EVENT_DRV_TERMINATED = BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_SUCCESS | BASE_JD_SW_EVENT_INFO | 0x000, - - /** End of SW Success status codes */ - BASE_JD_EVENT_RANGE_SW_SUCCESS_END = BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_SUCCESS | BASE_JD_SW_EVENT_RESERVED | 0x3FF, - - /** Start of Kernel-only status codes. Such codes are never returned to user-space */ - BASE_JD_EVENT_RANGE_KERNEL_ONLY_START = BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_KERNEL | 0x000, - BASE_JD_EVENT_REMOVED_FROM_NEXT = BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_KERNEL | BASE_JD_SW_EVENT_JOB | 0x000, - - /** End of Kernel-only status codes. */ - BASE_JD_EVENT_RANGE_KERNEL_ONLY_END = BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_KERNEL | BASE_JD_SW_EVENT_RESERVED | 0x3FF -} base_jd_event_code; - -/** - * @brief Event reporting structure - * - * This structure is used by the kernel driver to report information - * about GPU events. The can either be HW-specific events or low-level - * SW events, such as job-chain completion. - * - * The event code contains an event type field which can be extracted - * by ANDing with ::BASE_JD_SW_EVENT_TYPE_MASK. - * - * Based on the event type base_jd_event::data holds: - * @li ::BASE_JD_SW_EVENT_JOB : the offset in the ring-buffer for the completed - * job-chain - * @li ::BASE_JD_SW_EVENT_BAG : The address of the ::base_jd_bag that has - * been completed (ie all contained job-chains have been completed). - * @li ::BASE_JD_SW_EVENT_INFO : base_jd_event::data not used - */ -typedef struct base_jd_event_v2 { - base_jd_event_code event_code; /**< event code */ - base_atom_id atom_number; /**< the atom number that has completed */ - struct base_jd_udata udata; /**< user data */ -} base_jd_event_v2; - -/** - * @brief Structure for BASE_JD_REQ_SOFT_DUMP_CPU_GPU_COUNTERS jobs. - * - * This structure is stored into the memory pointed to by the @c jc field - * of @ref base_jd_atom. - * - * It must not occupy the same CPU cache line(s) as any neighboring data. - * This is to avoid cases where access to pages containing the structure - * is shared between cached and un-cached memory regions, which would - * cause memory corruption. - */ - -typedef struct base_dump_cpu_gpu_counters { - u64 system_time; - u64 cycle_counter; - u64 sec; - u32 usec; - u8 padding[36]; -} base_dump_cpu_gpu_counters; - -/** @} end group base_user_api_job_dispatch */ - #define GPU_MAX_JOB_SLOTS 16 /** - * @page page_base_user_api_gpuprops User-side Base GPU Property Query API + * User-side Base GPU Property Queries * - * The User-side Base GPU Property Query API encapsulates two + * The User-side Base GPU Property Query interface encapsulates two * sub-modules: * - * - @ref base_user_api_gpuprops_dyn "Dynamic GPU Properties" - * - @ref base_plat_config_gpuprops "Base Platform Config GPU Properties" + * - "Dynamic GPU Properties" + * - "Base Platform Config GPU Properties" * - * There is a related third module outside of Base, which is owned by the MIDG - * module: - * - @ref gpu_props_static "Midgard Compile-time GPU Properties" - * - * Base only deals with properties that vary between different Midgard + * Base only deals with properties that vary between different GPU * implementations - the Dynamic GPU properties and the Platform Config * properties. * - * For properties that are constant for the Midgard Architecture, refer to the - * MIDG module. However, we will discuss their relevance here just to - * provide background information. + * For properties that are constant for the GPU Architecture, refer to the + * GPU module. However, we will discuss their relevance here just to + * provide background information. * - * @section sec_base_user_api_gpuprops_about About the GPU Properties in Base and MIDG modules + * About the GPU Properties in Base and GPU modules * - * The compile-time properties (Platform Config, Midgard Compile-time + * The compile-time properties (Platform Config, GPU Compile-time * properties) are exposed as pre-processor macros. * * Complementing the compile-time properties are the Dynamic GPU - * Properties, which act as a conduit for the Midgard Configuration + * Properties, which act as a conduit for the GPU Configuration * Discovery. * * In general, the dynamic properties are present to verify that the platform @@ -1305,13 +408,13 @@ typedef struct base_dump_cpu_gpu_counters { * * As a consistent guide across the entire DDK, the choice for dynamic or * compile-time should consider the following, in order: - * -# Can the code be written so that it doesn't need to know the + * 1. Can the code be written so that it doesn't need to know the * implementation limits at all? - * -# If you need the limits, get the information from the Dynamic Property + * 2. If you need the limits, get the information from the Dynamic Property * lookup. This should be done once as you fetch the context, and then cached * as part of the context data structure, so it's cheap to access. - * -# If there's a clear and arguable inefficiency in using Dynamic Properties, - * then use a Compile-Time Property (Platform Config, or Midgard Compile-time + * 3. If there's a clear and arguable inefficiency in using Dynamic Properties, + * then use a Compile-Time Property (Platform Config, or GPU Compile-time * property). Examples of where this might be sensible follow: * - Part of a critical inner-loop * - Frequent re-use throughout the driver, causing significant extra load @@ -1324,25 +427,25 @@ typedef struct base_dump_cpu_gpu_counters { * devices would benefit much more from a single DDK binary, instead of * insignificant micro-optimizations. * - * @section sec_base_user_api_gpuprops_dyn Dynamic GPU Properties + * Dynamic GPU Properties * * Dynamic GPU properties are presented in two sets: - * -# the commonly used properties in @ref base_gpu_props, which have been + * 1. the commonly used properties in @ref base_gpu_props, which have been * unpacked from GPU register bitfields. - * -# The full set of raw, unprocessed properties in @ref gpu_raw_gpu_props - * (also a member of @ref base_gpu_props). All of these are presented in + * 2. The full set of raw, unprocessed properties in gpu_raw_gpu_props + * (also a member of base_gpu_props). All of these are presented in * the packed form, as presented by the GPU registers themselves. * - * @usecase The raw properties in @ref gpu_raw_gpu_props are necessary to + * The raw properties in gpu_raw_gpu_props are necessary to * allow a user of the Mali Tools (e.g. PAT) to determine "Why is this device * behaving differently?". In this case, all information about the - * configuration is potentially useful, but it does not need to be processed - * by the driver. Instead, the raw registers can be processed by the Mali + * configuration is potentially useful, but it does not need to be processed + * by the driver. Instead, the raw registers can be processed by the Mali * Tools software on the host PC. * - * The properties returned extend the Midgard Configuration Discovery - * registers. For example, GPU clock speed is not specified in the Midgard - * Architecture, but is necessary for OpenCL's clGetDeviceInfo() function. + * The properties returned extend the GPU Configuration Discovery + * registers. For example, GPU clock speed is not specified in the GPU + * Architecture, but is necessary for OpenCL's clGetDeviceInfo() function. * * The GPU properties are obtained by a call to * base_get_gpu_props(). This simply returns a pointer to a const @@ -1356,12 +459,12 @@ typedef struct base_dump_cpu_gpu_counters { * base_context. * * - * @section sec_base_user_api_gpuprops_kernel Kernel Operation + * Kernel Operation * * During Base Context Create time, user-side makes a single kernel call: * - A call to fill user memory with GPU information structures * - * The kernel-side will fill the provided the entire processed @ref base_gpu_props + * The kernel-side will fill the provided the entire processed base_gpu_props * structure, because this information is required in both * user and kernel side; it does not make sense to decode it twice. * @@ -1370,7 +473,8 @@ typedef struct base_dump_cpu_gpu_counters { * be known kernel-side, to support chains that specify a 'Only Coherent Group' * SW requirement, or 'Only Coherent Group with Tiler' SW requirement. * - * @section sec_base_user_api_gpuprops_cocalc Coherency Group calculation + * Coherency Group calculation + * * Creation of the coherent group data is done at device-driver startup, and so * is one-time. This will most likely involve a loop with CLZ, shifting, and * bit clearing on the L2_PRESENT mask, depending on whether the @@ -1384,16 +488,6 @@ typedef struct base_dump_cpu_gpu_counters { * 16 coherent groups, since core groups are typically 4 cores. */ -/** - * @addtogroup base_user_api_gpuprops User-side Base GPU Property Query APIs - * @{ - */ - -/** - * @addtogroup base_user_api_gpuprops_dyn Dynamic HW Properties - * @{ - */ - #define BASE_GPU_NUM_TEXTURE_FEATURES_REGISTERS 4 #define BASE_MAX_COHERENT_GROUPS 16 @@ -1498,7 +592,7 @@ struct mali_base_gpu_thread_props { }; /** - * @brief descriptor for a coherent group + * struct mali_base_gpu_coherent_group - descriptor for a coherent group * * \c core_mask exposes all cores in that coherent group, and \c num_cores * provides a cached population-count for that mask. @@ -1515,7 +609,7 @@ struct mali_base_gpu_coherent_group { }; /** - * @brief Coherency group information + * struct mali_base_gpu_coherent_group_info - Coherency group information * * Note that the sizes of the members could be reduced. However, the \c group * member might be 8-byte aligned to ensure the u64 core_mask is 8-byte @@ -1540,7 +634,7 @@ struct mali_base_gpu_coherent_group_info { u32 num_core_groups; /** - * Coherency features of the memory, accessed by @ref gpu_mem_features + * Coherency features of the memory, accessed by gpu_mem_features * methods */ u32 coherency; @@ -1554,18 +648,18 @@ struct mali_base_gpu_coherent_group_info { }; /** - * A complete description of the GPU's Hardware Configuration Discovery - * registers. + * struct gpu_raw_gpu_props - A complete description of the GPU's Hardware + * Configuration Discovery registers. * * The information is presented inefficiently for access. For frequent access, * the values should be better expressed in an unpacked form in the * base_gpu_props structure. * - * @usecase The raw properties in @ref gpu_raw_gpu_props are necessary to + * The raw properties in gpu_raw_gpu_props are necessary to * allow a user of the Mali Tools (e.g. PAT) to determine "Why is this device * behaving differently?". In this case, all information about the - * configuration is potentially useful, but it does not need to be processed - * by the driver. Instead, the raw registers can be processed by the Mali + * configuration is potentially useful, but it does not need to be processed + * by the driver. Instead, the raw registers can be processed by the Mali * Tools software on the host PC. * */ @@ -1604,95 +698,65 @@ struct gpu_raw_gpu_props { }; /** - * Return structure for base_get_gpu_props(). + * struct base_gpu_props - Return structure for base_get_gpu_props(). * * NOTE: the raw_props member in this data structure contains the register * values from which the value of the other members are derived. The derived * members exist to allow for efficient access and/or shielding the details * of the layout of the registers. * + * @unused_1: Keep for backwards compatibility. + * @raw_props: This member is large, likely to be 128 bytes. + * @coherency_info: This must be last member of the structure. */ -typedef struct base_gpu_props { +struct base_gpu_props { struct mali_base_gpu_core_props core_props; struct mali_base_gpu_l2_cache_props l2_props; - u64 unused_1; /* keep for backwards compatibility */ + u64 unused_1; struct mali_base_gpu_tiler_props tiler_props; struct mali_base_gpu_thread_props thread_props; - - /** This member is large, likely to be 128 bytes */ struct gpu_raw_gpu_props raw_props; - - /** This must be last member of the structure */ struct mali_base_gpu_coherent_group_info coherency_info; -} base_gpu_props; +}; -/** @} end group base_user_api_gpuprops_dyn */ - -/** @} end group base_user_api_gpuprops */ +#include "jm/mali_base_jm_kernel.h" /** - * @addtogroup base_user_api_core User-side Base core APIs - * @{ + * base_mem_group_id_get() - Get group ID from flags + * @flags: Flags to pass to base_mem_alloc + * + * This inline function extracts the encoded group ID from flags + * and converts it into numeric value (0~15). + * + * Return: group ID(0~15) extracted from the parameter */ +static inline int base_mem_group_id_get(base_mem_alloc_flags flags) +{ + LOCAL_ASSERT((flags & ~BASE_MEM_FLAGS_INPUT_MASK) == 0); + return (int)((flags & BASE_MEM_GROUP_ID_MASK) >> + BASEP_MEM_GROUP_ID_SHIFT); +} /** - * Flags to pass to ::base_context_init. - * Flags can be ORed together to enable multiple things. + * base_mem_group_id_set() - Set group ID into base_mem_alloc_flags + * @id: group ID(0~15) you want to encode * - * These share the same space as BASEP_CONTEXT_FLAG_*, and so must - * not collide with them. - */ -typedef u32 base_context_create_flags; - -/** No flags set */ -#define BASE_CONTEXT_CREATE_FLAG_NONE ((base_context_create_flags)0) - -/** Base context is embedded in a cctx object (flag used for CINSTR - * software counter macros) - */ -#define BASE_CONTEXT_CCTX_EMBEDDED ((base_context_create_flags)1 << 0) - -/** Base context is a 'System Monitor' context for Hardware counters. + * This inline function encodes specific group ID into base_mem_alloc_flags. + * Parameter 'id' should lie in-between 0 to 15. * - * One important side effect of this is that job submission is disabled. - */ -#define BASE_CONTEXT_SYSTEM_MONITOR_SUBMIT_DISABLED \ - ((base_context_create_flags)1 << 1) - - -/* Bit-shift used to encode a memory group ID in base_context_create_flags - */ -#define BASEP_CONTEXT_MMU_GROUP_ID_SHIFT (3) - -/* Bitmask used to encode a memory group ID in base_context_create_flags - */ -#define BASEP_CONTEXT_MMU_GROUP_ID_MASK \ - ((base_context_create_flags)0xF << BASEP_CONTEXT_MMU_GROUP_ID_SHIFT) - -/* Bitpattern describing the base_context_create_flags that can be - * passed to the kernel - */ -#define BASEP_CONTEXT_CREATE_KERNEL_FLAGS \ - (BASE_CONTEXT_SYSTEM_MONITOR_SUBMIT_DISABLED | \ - BASEP_CONTEXT_MMU_GROUP_ID_MASK) - -/* Bitpattern describing the ::base_context_create_flags that can be - * passed to base_context_init() - */ -#define BASEP_CONTEXT_CREATE_ALLOWED_FLAGS \ - (BASE_CONTEXT_CCTX_EMBEDDED | BASEP_CONTEXT_CREATE_KERNEL_FLAGS) - -/* - * Private flags used on the base context + * Return: base_mem_alloc_flags with the group ID (id) encoded * - * These start at bit 31, and run down to zero. - * - * They share the same space as @ref base_context_create_flags, and so must - * not collide with them. + * The return value can be combined with other flags against base_mem_alloc + * to identify a specific memory group. */ -/** Private flag tracking whether job descriptor dumping is disabled */ -#define BASEP_CONTEXT_FLAG_JOB_DUMP_DISABLED \ - ((base_context_create_flags)(1 << 31)) +static inline base_mem_alloc_flags base_mem_group_id_set(int id) +{ + LOCAL_ASSERT(id >= 0); + LOCAL_ASSERT(id < BASE_MEM_GROUP_COUNT); + + return ((base_mem_alloc_flags)id << BASEP_MEM_GROUP_ID_SHIFT) & + BASE_MEM_GROUP_ID_MASK; +} /** * base_context_mmu_group_id_set - Encode a memory group ID in @@ -1732,45 +796,7 @@ static inline int base_context_mmu_group_id_get( BASEP_CONTEXT_MMU_GROUP_ID_SHIFT); } -/** @} end group base_user_api_core */ - -/** @} end group base_user_api */ - -/** - * @addtogroup base_plat_config_gpuprops Base Platform Config GPU Properties - * @{ - * - * C Pre-processor macros are exposed here to do with Platform - * Config. - * - * These include: - * - GPU Properties that are constant on a particular Midgard Family - * Implementation e.g. Maximum samples per pixel on Mali-T600. - * - General platform config for the GPU, such as the GPU major and minor - * revison. - */ - -/** @} end group base_plat_config_gpuprops */ - -/** - * @addtogroup base_api Base APIs - * @{ - */ - -/** @} end group base_api */ - -/* Enable additional tracepoints for latency measurements (TL_ATOM_READY, - * TL_ATOM_DONE, TL_ATOM_PRIO_CHANGE, TL_ATOM_EVENT_POST) */ -#define BASE_TLSTREAM_ENABLE_LATENCY_TRACEPOINTS (1 << 0) - -/* Indicate that job dumping is enabled. This could affect certain timers - * to account for the performance impact. */ -#define BASE_TLSTREAM_JOB_DUMPING_ENABLED (1 << 1) - -#define BASE_TLSTREAM_FLAGS_MASK (BASE_TLSTREAM_ENABLE_LATENCY_TRACEPOINTS | \ - BASE_TLSTREAM_JOB_DUMPING_ENABLED) - -/** +/* * A number of bit flags are defined for requesting cpu_gpu_timeinfo. These * flags are also used, where applicable, for specifying which fields * are valid following the request operation. @@ -1794,5 +820,4 @@ static inline int base_context_mmu_group_id_get( BASE_TIMEINFO_KERNEL_SOURCE_FLAG | \ BASE_TIMEINFO_USER_SOURCE_FLAG) - #endif /* _BASE_KERNEL_H_ */ diff --git a/drivers/gpu/arm/bifrost/mali_base_mem_priv.h b/drivers/gpu/arm/bifrost/mali_base_mem_priv.h index 52c8a4f7d2d8..844a025b715d 100644 --- a/drivers/gpu/arm/bifrost/mali_base_mem_priv.h +++ b/drivers/gpu/arm/bifrost/mali_base_mem_priv.h @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2010-2014 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010-2015, 2020 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -47,7 +47,7 @@ * - offset is ignored. */ struct basep_syncset { - base_mem_handle mem_handle; + struct base_mem_handle mem_handle; u64 user_addr; u64 size; u8 type; diff --git a/drivers/gpu/arm/bifrost/mali_kbase.h b/drivers/gpu/arm/bifrost/mali_kbase.h index 53495f2fc0dd..dd49495c7133 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase.h +++ b/drivers/gpu/arm/bifrost/mali_kbase.h @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2010-2019 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010-2020 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -56,19 +56,21 @@ */ #include "mali_kbase_defs.h" -#include "mali_kbase_context.h" +#include "debug/mali_kbase_debug_ktrace.h" +#include "context/mali_kbase_context.h" #include "mali_kbase_strings.h" #include "mali_kbase_mem_lowlevel.h" -#include "mali_kbase_js.h" #include "mali_kbase_utility.h" #include "mali_kbase_mem.h" +#include "mmu/mali_kbase_mmu.h" #include "mali_kbase_gpu_memory_debugfs.h" #include "mali_kbase_mem_profile_debugfs.h" +#include "mali_kbase_gpuprops.h" +#include "mali_kbase_ioctl.h" #include "mali_kbase_debug_job_fault.h" #include "mali_kbase_jd_debugfs.h" -#include "mali_kbase_gpuprops.h" #include "mali_kbase_jm.h" -#include "mali_kbase_ioctl.h" +#include "mali_kbase_js.h" #include "ipa/mali_kbase_ipa.h" @@ -76,6 +78,8 @@ #include #endif +#include "mali_linux_trace.h" + #ifndef u64_to_user_ptr /* Introduced in Linux v4.6 */ @@ -97,16 +101,8 @@ struct kbase_device *kbase_device_alloc(void); * been setup before calling kbase_device_init */ -/* -* API to acquire device list semaphore and return pointer -* to the device list head -*/ -const struct list_head *kbase_dev_list_get(void); -/* API to release the device list semaphore */ -void kbase_dev_list_put(const struct list_head *dev_list); - -int kbase_device_init(struct kbase_device * const kbdev); -void kbase_device_term(struct kbase_device *kbdev); +int kbase_device_misc_init(struct kbase_device *kbdev); +void kbase_device_misc_term(struct kbase_device *kbdev); void kbase_device_free(struct kbase_device *kbdev); int kbase_device_has_feature(struct kbase_device *kbdev, u32 feature); @@ -156,6 +152,60 @@ unsigned long kbase_context_get_unmapped_area(struct kbase_context *kctx, const unsigned long addr, const unsigned long len, const unsigned long pgoff, const unsigned long flags); + +int assign_irqs(struct kbase_device *kbdev); + +int kbase_sysfs_init(struct kbase_device *kbdev); +void kbase_sysfs_term(struct kbase_device *kbdev); + + +int kbase_protected_mode_init(struct kbase_device *kbdev); +void kbase_protected_mode_term(struct kbase_device *kbdev); + +/** + * kbase_device_pm_init() - Performs power management initialization and + * Verifies device tree configurations. + * @kbdev: The kbase device structure for the device (must be a valid pointer) + * + * Return: 0 if successful, otherwise a standard Linux error code + */ +int kbase_device_pm_init(struct kbase_device *kbdev); + +/** + * kbase_device_pm_term() - Performs power management deinitialization and + * Free resources. + * @kbdev: The kbase device structure for the device (must be a valid pointer) + * + * Clean up all the resources + */ +void kbase_device_pm_term(struct kbase_device *kbdev); + + +int power_control_init(struct kbase_device *kbdev); +void power_control_term(struct kbase_device *kbdev); + +#ifdef CONFIG_DEBUG_FS +void kbase_device_debugfs_term(struct kbase_device *kbdev); +int kbase_device_debugfs_init(struct kbase_device *kbdev); +#else /* CONFIG_DEBUG_FS */ +static inline int kbase_device_debugfs_init(struct kbase_device *kbdev) +{ + return 0; +} + +static inline void kbase_device_debugfs_term(struct kbase_device *kbdev) { } +#endif /* CONFIG_DEBUG_FS */ + +int registers_map(struct kbase_device *kbdev); +void registers_unmap(struct kbase_device *kbdev); + +int kbase_device_coherency_init(struct kbase_device *kbdev); + +#ifdef CONFIG_MALI_BUSLOG +int buslog_init(struct kbase_device *kbdev); +void buslog_term(struct kbase_device *kbdev); +#endif + int kbase_jd_init(struct kbase_context *kctx); void kbase_jd_exit(struct kbase_context *kctx); @@ -200,9 +250,6 @@ void kbase_jd_zap_context(struct kbase_context *kctx); bool jd_done_nolock(struct kbase_jd_atom *katom, struct list_head *completed_jobs_ctx); void kbase_jd_free_external_resources(struct kbase_jd_atom *katom); -bool jd_submit_atom(struct kbase_context *kctx, - const struct base_jd_atom_v2 *user_atom, - struct kbase_jd_atom *katom); void kbase_jd_dep_clear_locked(struct kbase_jd_atom *katom); /** @@ -232,6 +279,22 @@ void kbase_job_done(struct kbase_device *kbdev, u32 done); void kbase_job_slot_ctx_priority_check_locked(struct kbase_context *kctx, struct kbase_jd_atom *katom); +/** + * kbase_job_slot_softstop_start_rp() - Soft-stop the atom at the start + * of a renderpass. + * @kctx: Pointer to a kernel base context. + * @reg: Reference of a growable GPU memory region in the same context. + * Takes ownership of the reference if successful. + * + * Used to switch to incremental rendering if we have nearly run out of + * virtual address space in a growable memory region and the atom currently + * executing on a job slot is the tiler job chain at the start of a renderpass. + * + * Return 0 if successful, otherwise a negative error code. + */ +int kbase_job_slot_softstop_start_rp(struct kbase_context *kctx, + struct kbase_va_region *reg); + void kbase_job_slot_softstop(struct kbase_device *kbdev, int js, struct kbase_jd_atom *target_katom); void kbase_job_slot_softstop_swflags(struct kbase_device *kbdev, int js, @@ -243,6 +306,7 @@ void kbase_job_check_enter_disjoint(struct kbase_device *kbdev, u32 action, void kbase_job_check_leave_disjoint(struct kbase_device *kbdev, struct kbase_jd_atom *target_katom); + void kbase_event_post(struct kbase_context *ctx, struct kbase_jd_atom *event); int kbase_event_dequeue(struct kbase_context *ctx, struct base_jd_event_v2 *uevent); int kbase_event_pending(struct kbase_context *ctx); @@ -262,6 +326,16 @@ void kbase_event_wakeup(struct kbase_context *kctx); */ int kbasep_jit_alloc_validate(struct kbase_context *kctx, struct base_jit_alloc_info *info); + +/** + * kbase_jit_retry_pending_alloc() - Retry blocked just-in-time memory + * allocations. + * + * @kctx: Pointer to the kbase context within which the just-in-time + * memory allocations are to be retried. + */ +void kbase_jit_retry_pending_alloc(struct kbase_context *kctx); + /** * kbase_free_user_buffer() - Free memory allocated for struct * @kbase_debug_copy_buffer. @@ -288,23 +362,6 @@ static inline void kbase_free_user_buffer( } } -/** - * kbase_mem_copy_from_extres_page() - Copy pages from external resources. - * - * @kctx: kbase context within which the copying is to take place. - * @extres_pages: Pointer to the pages which correspond to the external - * resources from which the copying will take place. - * @pages: Pointer to the pages to which the content is to be - * copied from the provided external resources. - * @nr_pages: Number of pages to copy. - * @target_page_nr: Number of target pages which will be used for copying. - * @offset: Offset into the target pages from which the copying - * is to be performed. - * @to_copy: Size of the chunk to be copied, in bytes. - */ -void kbase_mem_copy_from_extres_page(struct kbase_context *kctx, - void *extres_page, struct page **pages, unsigned int nr_pages, - unsigned int *target_page_nr, size_t offset, size_t *to_copy); /** * kbase_mem_copy_from_extres() - Copy from external resources. * @@ -333,18 +390,6 @@ void kbasep_complete_triggered_soft_events(struct kbase_context *kctx, u64 evt); void kbasep_as_do_poke(struct work_struct *work); -/** Returns the name associated with a Mali exception code - * - * This function is called from the interrupt handler when a GPU fault occurs. - * It reports the details of the fault using KBASE_DEBUG_PRINT_WARN. - * - * @param[in] kbdev The kbase device that the GPU fault occurred from. - * @param[in] exception_code exception code - * @return name associated with the exception code - */ -const char *kbase_exception_name(struct kbase_device *kbdev, - u32 exception_code); - /** * Check whether a system suspend is in progress, or has already been suspended * @@ -360,6 +405,24 @@ static inline bool kbase_pm_is_suspending(struct kbase_device *kbdev) return kbdev->pm.suspending; } +#ifdef CONFIG_MALI_ARBITER_SUPPORT +/* + * Check whether a gpu lost is in progress + * + * @kbdev: The kbase device structure for the device (must be a valid pointer) + * + * Indicates whether a gpu lost has been received and jobs are no longer + * being scheduled + * + * Return: false if gpu is lost + * Return: != false otherwise + */ +static inline bool kbase_pm_is_gpu_lost(struct kbase_device *kbdev) +{ + return kbdev->pm.gpu_lost; +} +#endif + /** * kbase_pm_is_active - Determine whether the GPU is active * @@ -446,6 +509,8 @@ static inline struct kbase_jd_atom *kbase_jd_atom_from_id( * and during context creation. * * @param kbdev The kbase device + * + * Return: 0 on success and non-zero value on failure. */ void kbase_disjoint_init(struct kbase_device *kbdev); @@ -507,181 +572,6 @@ void kbase_disjoint_state_down(struct kbase_device *kbdev); #define UINT64_MAX ((uint64_t)0xFFFFFFFFFFFFFFFFULL) #endif -#if KBASE_TRACE_ENABLE -void kbasep_trace_debugfs_init(struct kbase_device *kbdev); - -#ifndef CONFIG_MALI_BIFROST_SYSTEM_TRACE -/** Add trace values about a job-slot - * - * @note Any functions called through this macro will still be evaluated in - * Release builds (CONFIG_MALI_BIFROST_DEBUG not defined). Therefore, when KBASE_TRACE_ENABLE == 0 any - * functions called to get the parameters supplied to this macro must: - * - be static or static inline - * - must just return 0 and have no other statements present in the body. - */ -#define KBASE_TRACE_ADD_SLOT(kbdev, code, ctx, katom, gpu_addr, jobslot) \ - kbasep_trace_add(kbdev, KBASE_TRACE_CODE(code), ctx, katom, gpu_addr, \ - KBASE_TRACE_FLAG_JOBSLOT, 0, jobslot, 0) - -/** Add trace values about a job-slot, with info - * - * @note Any functions called through this macro will still be evaluated in - * Release builds (CONFIG_MALI_BIFROST_DEBUG not defined). Therefore, when KBASE_TRACE_ENABLE == 0 any - * functions called to get the parameters supplied to this macro must: - * - be static or static inline - * - must just return 0 and have no other statements present in the body. - */ -#define KBASE_TRACE_ADD_SLOT_INFO(kbdev, code, ctx, katom, gpu_addr, jobslot, info_val) \ - kbasep_trace_add(kbdev, KBASE_TRACE_CODE(code), ctx, katom, gpu_addr, \ - KBASE_TRACE_FLAG_JOBSLOT, 0, jobslot, info_val) - -/** Add trace values about a ctx refcount - * - * @note Any functions called through this macro will still be evaluated in - * Release builds (CONFIG_MALI_BIFROST_DEBUG not defined). Therefore, when KBASE_TRACE_ENABLE == 0 any - * functions called to get the parameters supplied to this macro must: - * - be static or static inline - * - must just return 0 and have no other statements present in the body. - */ -#define KBASE_TRACE_ADD_REFCOUNT(kbdev, code, ctx, katom, gpu_addr, refcount) \ - kbasep_trace_add(kbdev, KBASE_TRACE_CODE(code), ctx, katom, gpu_addr, \ - KBASE_TRACE_FLAG_REFCOUNT, refcount, 0, 0) -/** Add trace values about a ctx refcount, and info - * - * @note Any functions called through this macro will still be evaluated in - * Release builds (CONFIG_MALI_BIFROST_DEBUG not defined). Therefore, when KBASE_TRACE_ENABLE == 0 any - * functions called to get the parameters supplied to this macro must: - * - be static or static inline - * - must just return 0 and have no other statements present in the body. - */ -#define KBASE_TRACE_ADD_REFCOUNT_INFO(kbdev, code, ctx, katom, gpu_addr, refcount, info_val) \ - kbasep_trace_add(kbdev, KBASE_TRACE_CODE(code), ctx, katom, gpu_addr, \ - KBASE_TRACE_FLAG_REFCOUNT, refcount, 0, info_val) - -/** Add trace values (no slot or refcount) - * - * @note Any functions called through this macro will still be evaluated in - * Release builds (CONFIG_MALI_BIFROST_DEBUG not defined). Therefore, when KBASE_TRACE_ENABLE == 0 any - * functions called to get the parameters supplied to this macro must: - * - be static or static inline - * - must just return 0 and have no other statements present in the body. - */ -#define KBASE_TRACE_ADD(kbdev, code, ctx, katom, gpu_addr, info_val) \ - kbasep_trace_add(kbdev, KBASE_TRACE_CODE(code), ctx, katom, gpu_addr, \ - 0, 0, 0, info_val) - -/** Clear the trace */ -#define KBASE_TRACE_CLEAR(kbdev) \ - kbasep_trace_clear(kbdev) - -/** Dump the slot trace */ -#define KBASE_TRACE_DUMP(kbdev) \ - kbasep_trace_dump(kbdev) - -/** PRIVATE - do not use directly. Use KBASE_TRACE_ADD() instead */ -void kbasep_trace_add(struct kbase_device *kbdev, enum kbase_trace_code code, void *ctx, struct kbase_jd_atom *katom, u64 gpu_addr, u8 flags, int refcount, int jobslot, unsigned long info_val); -/** PRIVATE - do not use directly. Use KBASE_TRACE_CLEAR() instead */ -void kbasep_trace_clear(struct kbase_device *kbdev); -#else /* #ifndef CONFIG_MALI_BIFROST_SYSTEM_TRACE */ -/* Dispatch kbase trace events as system trace events */ -#include -#define KBASE_TRACE_ADD_SLOT(kbdev, code, ctx, katom, gpu_addr, jobslot)\ - trace_mali_##code(jobslot, 0) - -#define KBASE_TRACE_ADD_SLOT_INFO(kbdev, code, ctx, katom, gpu_addr, jobslot, info_val)\ - trace_mali_##code(jobslot, info_val) - -#define KBASE_TRACE_ADD_REFCOUNT(kbdev, code, ctx, katom, gpu_addr, refcount)\ - trace_mali_##code(refcount, 0) - -#define KBASE_TRACE_ADD_REFCOUNT_INFO(kbdev, code, ctx, katom, gpu_addr, refcount, info_val)\ - trace_mali_##code(refcount, info_val) - -#define KBASE_TRACE_ADD(kbdev, code, ctx, katom, gpu_addr, info_val)\ - trace_mali_##code(gpu_addr, info_val) - -#define KBASE_TRACE_CLEAR(kbdev)\ - do {\ - CSTD_UNUSED(kbdev);\ - CSTD_NOP(0);\ - } while (0) -#define KBASE_TRACE_DUMP(kbdev)\ - do {\ - CSTD_UNUSED(kbdev);\ - CSTD_NOP(0);\ - } while (0) - -#endif /* #ifndef CONFIG_MALI_BIFROST_SYSTEM_TRACE */ -#else -#define KBASE_TRACE_ADD_SLOT(kbdev, code, ctx, katom, gpu_addr, jobslot)\ - do {\ - CSTD_UNUSED(kbdev);\ - CSTD_NOP(code);\ - CSTD_UNUSED(ctx);\ - CSTD_UNUSED(katom);\ - CSTD_UNUSED(gpu_addr);\ - CSTD_UNUSED(jobslot);\ - } while (0) - -#define KBASE_TRACE_ADD_SLOT_INFO(kbdev, code, ctx, katom, gpu_addr, jobslot, info_val)\ - do {\ - CSTD_UNUSED(kbdev);\ - CSTD_NOP(code);\ - CSTD_UNUSED(ctx);\ - CSTD_UNUSED(katom);\ - CSTD_UNUSED(gpu_addr);\ - CSTD_UNUSED(jobslot);\ - CSTD_UNUSED(info_val);\ - CSTD_NOP(0);\ - } while (0) - -#define KBASE_TRACE_ADD_REFCOUNT(kbdev, code, ctx, katom, gpu_addr, refcount)\ - do {\ - CSTD_UNUSED(kbdev);\ - CSTD_NOP(code);\ - CSTD_UNUSED(ctx);\ - CSTD_UNUSED(katom);\ - CSTD_UNUSED(gpu_addr);\ - CSTD_UNUSED(refcount);\ - CSTD_NOP(0);\ - } while (0) - -#define KBASE_TRACE_ADD_REFCOUNT_INFO(kbdev, code, ctx, katom, gpu_addr, refcount, info_val)\ - do {\ - CSTD_UNUSED(kbdev);\ - CSTD_NOP(code);\ - CSTD_UNUSED(ctx);\ - CSTD_UNUSED(katom);\ - CSTD_UNUSED(gpu_addr);\ - CSTD_UNUSED(info_val);\ - CSTD_NOP(0);\ - } while (0) - -#define KBASE_TRACE_ADD(kbdev, code, subcode, ctx, katom, val)\ - do {\ - CSTD_UNUSED(kbdev);\ - CSTD_NOP(code);\ - CSTD_UNUSED(subcode);\ - CSTD_UNUSED(ctx);\ - CSTD_UNUSED(katom);\ - CSTD_UNUSED(val);\ - CSTD_NOP(0);\ - } while (0) - -#define KBASE_TRACE_CLEAR(kbdev)\ - do {\ - CSTD_UNUSED(kbdev);\ - CSTD_NOP(0);\ - } while (0) -#define KBASE_TRACE_DUMP(kbdev)\ - do {\ - CSTD_UNUSED(kbdev);\ - CSTD_NOP(0);\ - } while (0) -#endif /* KBASE_TRACE_ENABLE */ -/** PRIVATE - do not use directly. Use KBASE_TRACE_DUMP() instead */ -void kbasep_trace_dump(struct kbase_device *kbdev); - #if defined(CONFIG_DEBUG_FS) && !defined(CONFIG_MALI_BIFROST_NO_MALI) /* kbase_io_history_init - initialize data struct for register access history @@ -731,5 +621,4 @@ int kbase_io_history_resize(struct kbase_io_history *h, u16 new_size); #endif /* CONFIG_DEBUG_FS */ - #endif diff --git a/drivers/gpu/arm/bifrost/mali_kbase_10969_workaround.c b/drivers/gpu/arm/bifrost/mali_kbase_10969_workaround.c deleted file mode 100644 index 118511abe053..000000000000 --- a/drivers/gpu/arm/bifrost/mali_kbase_10969_workaround.c +++ /dev/null @@ -1,209 +0,0 @@ -/* - * - * (C) COPYRIGHT 2013-2015,2017-2018 ARM Limited. All rights reserved. - * - * This program is free software and is provided to you under the terms of the - * GNU General Public License version 2 as published by the Free Software - * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, you can access it online at - * http://www.gnu.org/licenses/gpl-2.0.html. - * - * SPDX-License-Identifier: GPL-2.0 - * - */ -#include -#include -#include - -/* Mask of X and Y coordinates for the coordinates words in the descriptors*/ -#define X_COORDINATE_MASK 0x00000FFF -#define Y_COORDINATE_MASK 0x0FFF0000 -/* Max number of words needed from the fragment shader job descriptor */ -#define JOB_HEADER_SIZE_IN_WORDS 10 -#define JOB_HEADER_SIZE (JOB_HEADER_SIZE_IN_WORDS*sizeof(u32)) - -/* Word 0: Status Word */ -#define JOB_DESC_STATUS_WORD 0 -/* Word 1: Restart Index */ -#define JOB_DESC_RESTART_INDEX_WORD 1 -/* Word 2: Fault address low word */ -#define JOB_DESC_FAULT_ADDR_LOW_WORD 2 -/* Word 8: Minimum Tile Coordinates */ -#define FRAG_JOB_DESC_MIN_TILE_COORD_WORD 8 -/* Word 9: Maximum Tile Coordinates */ -#define FRAG_JOB_DESC_MAX_TILE_COORD_WORD 9 - -int kbasep_10969_workaround_clamp_coordinates(struct kbase_jd_atom *katom) -{ - struct device *dev = katom->kctx->kbdev->dev; - u32 clamped = 0; - struct kbase_va_region *region; - struct tagged_addr *page_array; - u64 page_index; - u32 offset = katom->jc & (~PAGE_MASK); - u32 *page_1 = NULL; - u32 *page_2 = NULL; - u32 job_header[JOB_HEADER_SIZE_IN_WORDS]; - void *dst = job_header; - u32 minX, minY, maxX, maxY; - u32 restartX, restartY; - struct page *p; - u32 copy_size; - - dev_warn(dev, "Called TILE_RANGE_FAULT workaround clamping function.\n"); - if (!(katom->core_req & BASE_JD_REQ_FS)) - return 0; - - kbase_gpu_vm_lock(katom->kctx); - region = kbase_region_tracker_find_region_enclosing_address(katom->kctx, - katom->jc); - if (kbase_is_region_invalid_or_free(region)) - goto out_unlock; - - page_array = kbase_get_cpu_phy_pages(region); - if (!page_array) - goto out_unlock; - - page_index = (katom->jc >> PAGE_SHIFT) - region->start_pfn; - - p = as_page(page_array[page_index]); - - /* we need the first 10 words of the fragment shader job descriptor. - * We need to check that the offset + 10 words is less that the page - * size otherwise we need to load the next page. - * page_size_overflow will be equal to 0 in case the whole descriptor - * is within the page > 0 otherwise. - */ - copy_size = MIN(PAGE_SIZE - offset, JOB_HEADER_SIZE); - - page_1 = kmap_atomic(p); - - /* page_1 is a u32 pointer, offset is expressed in bytes */ - page_1 += offset>>2; - - kbase_sync_single_for_cpu(katom->kctx->kbdev, - kbase_dma_addr(p) + offset, - copy_size, DMA_BIDIRECTIONAL); - - memcpy(dst, page_1, copy_size); - - /* The data needed overflows page the dimension, - * need to map the subsequent page */ - if (copy_size < JOB_HEADER_SIZE) { - p = as_page(page_array[page_index + 1]); - page_2 = kmap_atomic(p); - - kbase_sync_single_for_cpu(katom->kctx->kbdev, - kbase_dma_addr(p), - JOB_HEADER_SIZE - copy_size, DMA_BIDIRECTIONAL); - - memcpy(dst + copy_size, page_2, JOB_HEADER_SIZE - copy_size); - } - - /* We managed to correctly map one or two pages (in case of overflow) */ - /* Get Bounding Box data and restart index from fault address low word */ - minX = job_header[FRAG_JOB_DESC_MIN_TILE_COORD_WORD] & X_COORDINATE_MASK; - minY = job_header[FRAG_JOB_DESC_MIN_TILE_COORD_WORD] & Y_COORDINATE_MASK; - maxX = job_header[FRAG_JOB_DESC_MAX_TILE_COORD_WORD] & X_COORDINATE_MASK; - maxY = job_header[FRAG_JOB_DESC_MAX_TILE_COORD_WORD] & Y_COORDINATE_MASK; - restartX = job_header[JOB_DESC_FAULT_ADDR_LOW_WORD] & X_COORDINATE_MASK; - restartY = job_header[JOB_DESC_FAULT_ADDR_LOW_WORD] & Y_COORDINATE_MASK; - - dev_warn(dev, "Before Clamping:\n" - "Jobstatus: %08x\n" - "restartIdx: %08x\n" - "Fault_addr_low: %08x\n" - "minCoordsX: %08x minCoordsY: %08x\n" - "maxCoordsX: %08x maxCoordsY: %08x\n", - job_header[JOB_DESC_STATUS_WORD], - job_header[JOB_DESC_RESTART_INDEX_WORD], - job_header[JOB_DESC_FAULT_ADDR_LOW_WORD], - minX, minY, - maxX, maxY); - - /* Set the restart index to the one which generated the fault*/ - job_header[JOB_DESC_RESTART_INDEX_WORD] = - job_header[JOB_DESC_FAULT_ADDR_LOW_WORD]; - - if (restartX < minX) { - job_header[JOB_DESC_RESTART_INDEX_WORD] = (minX) | restartY; - dev_warn(dev, - "Clamping restart X index to minimum. %08x clamped to %08x\n", - restartX, minX); - clamped = 1; - } - if (restartY < minY) { - job_header[JOB_DESC_RESTART_INDEX_WORD] = (minY) | restartX; - dev_warn(dev, - "Clamping restart Y index to minimum. %08x clamped to %08x\n", - restartY, minY); - clamped = 1; - } - if (restartX > maxX) { - job_header[JOB_DESC_RESTART_INDEX_WORD] = (maxX) | restartY; - dev_warn(dev, - "Clamping restart X index to maximum. %08x clamped to %08x\n", - restartX, maxX); - clamped = 1; - } - if (restartY > maxY) { - job_header[JOB_DESC_RESTART_INDEX_WORD] = (maxY) | restartX; - dev_warn(dev, - "Clamping restart Y index to maximum. %08x clamped to %08x\n", - restartY, maxY); - clamped = 1; - } - - if (clamped) { - /* Reset the fault address low word - * and set the job status to STOPPED */ - job_header[JOB_DESC_FAULT_ADDR_LOW_WORD] = 0x0; - job_header[JOB_DESC_STATUS_WORD] = BASE_JD_EVENT_STOPPED; - dev_warn(dev, "After Clamping:\n" - "Jobstatus: %08x\n" - "restartIdx: %08x\n" - "Fault_addr_low: %08x\n" - "minCoordsX: %08x minCoordsY: %08x\n" - "maxCoordsX: %08x maxCoordsY: %08x\n", - job_header[JOB_DESC_STATUS_WORD], - job_header[JOB_DESC_RESTART_INDEX_WORD], - job_header[JOB_DESC_FAULT_ADDR_LOW_WORD], - minX, minY, - maxX, maxY); - - /* Flush CPU cache to update memory for future GPU reads*/ - memcpy(page_1, dst, copy_size); - p = as_page(page_array[page_index]); - - kbase_sync_single_for_device(katom->kctx->kbdev, - kbase_dma_addr(p) + offset, - copy_size, DMA_TO_DEVICE); - - if (copy_size < JOB_HEADER_SIZE) { - memcpy(page_2, dst + copy_size, - JOB_HEADER_SIZE - copy_size); - p = as_page(page_array[page_index + 1]); - - kbase_sync_single_for_device(katom->kctx->kbdev, - kbase_dma_addr(p), - JOB_HEADER_SIZE - copy_size, - DMA_TO_DEVICE); - } - } - if (copy_size < JOB_HEADER_SIZE) - kunmap_atomic(page_2); - - kunmap_atomic(page_1); - -out_unlock: - kbase_gpu_vm_unlock(katom->kctx); - return clamped; -} diff --git a/drivers/gpu/arm/bifrost/mali_kbase_10969_workaround.h b/drivers/gpu/arm/bifrost/mali_kbase_10969_workaround.h deleted file mode 100644 index 379a05a1a128..000000000000 --- a/drivers/gpu/arm/bifrost/mali_kbase_10969_workaround.h +++ /dev/null @@ -1,37 +0,0 @@ -/* - * - * (C) COPYRIGHT 2013-2014, 2018 ARM Limited. All rights reserved. - * - * This program is free software and is provided to you under the terms of the - * GNU General Public License version 2 as published by the Free Software - * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, you can access it online at - * http://www.gnu.org/licenses/gpl-2.0.html. - * - * SPDX-License-Identifier: GPL-2.0 - * - */ - -#ifndef _KBASE_10969_WORKAROUND_ -#define _KBASE_10969_WORKAROUND_ - -/** - * kbasep_10969_workaround_clamp_coordinates - Apply the WA to clamp the restart indices - * @katom: atom representing the fragment job for which the WA has to be applied - * - * This workaround is used to solve an HW issue with single iterator GPUs. - * If a fragment job is soft-stopped on the edge of its bounding box, it can happen - * that the restart index is out of bounds and the rerun causes a tile range - * fault. If this happens we try to clamp the restart index to a correct value. - */ -int kbasep_10969_workaround_clamp_coordinates(struct kbase_jd_atom *katom); - -#endif /* _KBASE_10969_WORKAROUND_ */ diff --git a/drivers/gpu/arm/bifrost/mali_kbase_as_fault_debugfs.c b/drivers/gpu/arm/bifrost/mali_kbase_as_fault_debugfs.c index 0daae6cedbf3..76bbfffe03a0 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_as_fault_debugfs.c +++ b/drivers/gpu/arm/bifrost/mali_kbase_as_fault_debugfs.c @@ -24,6 +24,7 @@ #include #include +#include #ifdef CONFIG_DEBUG_FS #ifdef CONFIG_MALI_BIFROST_DEBUG @@ -36,7 +37,7 @@ static int kbase_as_fault_read(struct seq_file *sfile, void *data) const struct list_head *kbdev_list; struct kbase_device *kbdev = NULL; - kbdev_list = kbase_dev_list_get(); + kbdev_list = kbase_device_get_list(); list_for_each(entry, kbdev_list) { kbdev = list_entry(entry, struct kbase_device, entry); @@ -53,7 +54,7 @@ static int kbase_as_fault_read(struct seq_file *sfile, void *data) } - kbase_dev_list_put(kbdev_list); + kbase_device_put_list(kbdev_list); return 0; } diff --git a/drivers/gpu/arm/bifrost/mali_kbase_config_defaults.h b/drivers/gpu/arm/bifrost/mali_kbase_config_defaults.h index 6a1083c0bd76..e079281127ab 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_config_defaults.h +++ b/drivers/gpu/arm/bifrost/mali_kbase_config_defaults.h @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2013-2019 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2013-2020 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -33,27 +33,6 @@ /* Include mandatory definitions per platform */ #include -/** -* Boolean indicating whether the driver is configured to be secure at -* a potential loss of performance. -* -* This currently affects only r0p0-15dev0 HW and earlier. -* -* On r0p0-15dev0 HW and earlier, there are tradeoffs between security and -* performance: -* -* - When this is set to true, the driver remains fully secure, -* but potentially loses performance compared with setting this to -* false. -* - When set to false, the driver is open to certain security -* attacks. -* -* From r0p0-00rel0 and onwards, there is no security loss by setting -* this to false, and no performance loss by setting it to -* true. -*/ -#define DEFAULT_SECURE_BUT_LOSS_OF_PERFORMANCE false - enum { /** * Use unrestricted Address ID width on the AXI bus. @@ -211,5 +190,24 @@ enum { */ #define DEFAULT_GPU_FREQ_KHZ_MAX (5000) +/** + * Default timeout for task execution on an endpoint + * + * Number of GPU clock cycles before the driver terminates a task that is + * making no forward progress on an endpoint (e.g. shader core). + * Value chosen is equivalent to the time after which a job is hard stopped + * which is 5 seconds (assuming the GPU is usually clocked at ~500 MHZ). + */ +#define DEFAULT_PROGRESS_TIMEOUT ((u64)5 * 500 * 1024 * 1024) + +/** + * Default threshold at which to switch to incremental rendering + * + * Fraction of the maximum size of an allocation that grows on GPU page fault + * that can be used up before the driver switches to incremental rendering, + * in 256ths. 0 means disable incremental rendering. + */ +#define DEFAULT_IR_THRESHOLD (192) + #endif /* _KBASE_CONFIG_DEFAULTS_H_ */ diff --git a/drivers/gpu/arm/bifrost/mali_kbase_context.c b/drivers/gpu/arm/bifrost/mali_kbase_context.c deleted file mode 100644 index 1cd854e5d7cf..000000000000 --- a/drivers/gpu/arm/bifrost/mali_kbase_context.c +++ /dev/null @@ -1,334 +0,0 @@ -/* - * - * (C) COPYRIGHT 2010-2019 ARM Limited. All rights reserved. - * - * This program is free software and is provided to you under the terms of the - * GNU General Public License version 2 as published by the Free Software - * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, you can access it online at - * http://www.gnu.org/licenses/gpl-2.0.html. - * - * SPDX-License-Identifier: GPL-2.0 - * - */ - - - -/* - * Base kernel context APIs - */ - -#include -#include -#include -#include -#include -#include -#include - -struct kbase_context * -kbase_create_context(struct kbase_device *kbdev, bool is_compat, - base_context_create_flags const flags, - unsigned long const api_version, - struct file *const filp) -{ - struct kbase_context *kctx; - int err; - struct page *p; - struct kbasep_js_kctx_info *js_kctx_info = NULL; - unsigned long irq_flags = 0; - const unsigned long cookies_mask = KBASE_COOKIE_MASK; - - if (WARN_ON(!kbdev)) - goto out; - - /* Validate flags */ - if (WARN_ON(flags != (flags & BASEP_CONTEXT_CREATE_KERNEL_FLAGS))) - goto out; - - /* zero-inited as lot of code assume it's zero'ed out on create */ - kctx = vzalloc(sizeof(*kctx)); - - if (!kctx) - goto out; - - /* creating a context is considered a disjoint event */ - kbase_disjoint_event(kbdev); - - kctx->kbdev = kbdev; - kctx->as_nr = KBASEP_AS_NR_INVALID; - atomic_set(&kctx->refcount, 0); - if (is_compat) - kbase_ctx_flag_set(kctx, KCTX_COMPAT); -#if defined(CONFIG_64BIT) - else - kbase_ctx_flag_set(kctx, KCTX_FORCE_SAME_VA); -#endif /* !defined(CONFIG_64BIT) */ - - spin_lock_init(&kctx->mm_update_lock); - kctx->process_mm = NULL; - atomic_set(&kctx->nonmapped_pages, 0); - atomic_set(&kctx->permanent_mapped_pages, 0); - kctx->slots_pullable = 0; - kctx->tgid = current->tgid; - kctx->pid = current->pid; - - err = kbase_mem_pool_group_init(&kctx->mem_pools, kbdev, - &kbdev->mem_pool_defaults, &kbdev->mem_pools); - if (err) - goto free_kctx; - - err = kbase_mem_evictable_init(kctx); - if (err) - goto free_both_pools; - - atomic_set(&kctx->used_pages, 0); - - err = kbase_jd_init(kctx); - if (err) - goto deinit_evictable; - - err = kbasep_js_kctx_init(kctx); - if (err) - goto free_jd; /* safe to call kbasep_js_kctx_term in this case */ - - err = kbase_event_init(kctx); - if (err) - goto free_jd; - - mutex_init(&kctx->reg_lock); - - spin_lock_init(&kctx->mem_partials_lock); - INIT_LIST_HEAD(&kctx->mem_partials); - - INIT_LIST_HEAD(&kctx->waiting_soft_jobs); - spin_lock_init(&kctx->waiting_soft_jobs_lock); - err = kbase_dma_fence_init(kctx); - if (err) - goto free_event; - - err = kbase_mmu_init(kbdev, &kctx->mmu, kctx, - base_context_mmu_group_id_get(flags)); - if (err) - goto term_dma_fence; - - p = kbase_mem_alloc_page( - &kctx->mem_pools.small[KBASE_MEM_GROUP_SINK]); - if (!p) - goto no_sink_page; - kctx->aliasing_sink_page = as_tagged(page_to_phys(p)); - - init_waitqueue_head(&kctx->event_queue); - - bitmap_copy(kctx->cookies, &cookies_mask, BITS_PER_LONG); - - /* Make sure page 0 is not used... */ - err = kbase_region_tracker_init(kctx); - if (err) - goto no_region_tracker; - - err = kbase_sticky_resource_init(kctx); - if (err) - goto no_sticky; - - err = kbase_jit_init(kctx); - if (err) - goto no_jit; - - -#ifdef CONFIG_GPU_TRACEPOINTS - atomic_set(&kctx->jctx.work_id, 0); -#endif - - kctx->id = atomic_add_return(1, &(kbdev->ctx_num)) - 1; - - mutex_init(&kctx->legacy_hwcnt_lock); - - kbase_timer_setup(&kctx->soft_job_timeout, - kbasep_soft_job_timeout_worker); - - mutex_lock(&kbdev->kctx_list_lock); - list_add(&kctx->kctx_list_link, &kbdev->kctx_list); - KBASE_TLSTREAM_TL_NEW_CTX(kbdev, kctx, kctx->id, (u32)(kctx->tgid)); - mutex_unlock(&kbdev->kctx_list_lock); - - kctx->api_version = api_version; - kctx->filp = filp; - - js_kctx_info = &kctx->jctx.sched_info; - - mutex_lock(&js_kctx_info->ctx.jsctx_mutex); - spin_lock_irqsave(&kctx->kbdev->hwaccess_lock, irq_flags); - - /* Translate the flags */ - if ((flags & BASE_CONTEXT_SYSTEM_MONITOR_SUBMIT_DISABLED) == 0) - kbase_ctx_flag_clear(kctx, KCTX_SUBMIT_DISABLED); - - spin_unlock_irqrestore(&kctx->kbdev->hwaccess_lock, irq_flags); - mutex_unlock(&js_kctx_info->ctx.jsctx_mutex); - - return kctx; - -no_jit: - kbase_gpu_vm_lock(kctx); - kbase_sticky_resource_term(kctx); - kbase_gpu_vm_unlock(kctx); -no_sticky: - kbase_region_tracker_term(kctx); -no_region_tracker: - kbase_mem_pool_free( - &kctx->mem_pools.small[KBASE_MEM_GROUP_SINK], p, false); -no_sink_page: - kbase_mmu_term(kbdev, &kctx->mmu); -term_dma_fence: - kbase_dma_fence_term(kctx); -free_event: - kbase_event_cleanup(kctx); -free_jd: - /* Safe to call this one even when didn't initialize (assuming kctx was sufficiently zeroed) */ - kbasep_js_kctx_term(kctx); - kbase_jd_exit(kctx); -deinit_evictable: - kbase_mem_evictable_deinit(kctx); -free_both_pools: - kbase_mem_pool_group_term(&kctx->mem_pools); -free_kctx: - vfree(kctx); -out: - return NULL; -} -KBASE_EXPORT_SYMBOL(kbase_create_context); - -static void kbase_reg_pending_dtor(struct kbase_device *kbdev, - struct kbase_va_region *reg) -{ - dev_dbg(kbdev->dev, "Freeing pending unmapped region\n"); - kbase_mem_phy_alloc_put(reg->cpu_alloc); - kbase_mem_phy_alloc_put(reg->gpu_alloc); - kfree(reg); -} - -void kbase_destroy_context(struct kbase_context *kctx) -{ - struct kbase_device *kbdev; - int pages; - unsigned long pending_regions_to_clean; - unsigned long flags; - struct page *p; - - if (WARN_ON(!kctx)) - return; - - kbdev = kctx->kbdev; - if (WARN_ON(!kbdev)) - return; - - mutex_lock(&kbdev->kctx_list_lock); - KBASE_TLSTREAM_TL_DEL_CTX(kbdev, kctx); - list_del(&kctx->kctx_list_link); - mutex_unlock(&kbdev->kctx_list_lock); - - KBASE_TRACE_ADD(kbdev, CORE_CTX_DESTROY, kctx, NULL, 0u, 0u); - - /* Ensure the core is powered up for the destroy process */ - /* A suspend won't happen here, because we're in a syscall from a userspace - * thread. */ - kbase_pm_context_active(kbdev); - - kbase_mem_pool_group_mark_dying(&kctx->mem_pools); - - kbase_jd_zap_context(kctx); - - /* We have already waited for the jobs to complete (and hereafter there - * can be no more submissions for the context). However the wait could - * have timedout and there could still be work items in flight that - * would do the completion processing of jobs. - * kbase_jd_exit() will destroy the 'job_done_wq'. And destroying the wq - * will cause it do drain and implicitly wait for those work items to - * complete. - */ - kbase_jd_exit(kctx); - -#ifdef CONFIG_DEBUG_FS - /* Removing the rest of the debugfs entries here as we want to keep the - * atom debugfs interface alive until all atoms have completed. This - * is useful for debugging hung contexts. */ - debugfs_remove_recursive(kctx->kctx_dentry); - kbase_debug_job_fault_context_term(kctx); -#endif - - kbase_event_cleanup(kctx); - - - /* - * JIT must be terminated before the code below as it must be called - * without the region lock being held. - * The code above ensures no new JIT allocations can be made by - * by the time we get to this point of context tear down. - */ - kbase_jit_term(kctx); - - kbase_gpu_vm_lock(kctx); - - kbase_sticky_resource_term(kctx); - - /* drop the aliasing sink page now that it can't be mapped anymore */ - p = as_page(kctx->aliasing_sink_page); - kbase_mem_pool_free(&kctx->mem_pools.small[KBASE_MEM_GROUP_SINK], - p, false); - - /* free pending region setups */ - pending_regions_to_clean = KBASE_COOKIE_MASK; - bitmap_andnot(&pending_regions_to_clean, &pending_regions_to_clean, - kctx->cookies, BITS_PER_LONG); - while (pending_regions_to_clean) { - unsigned int cookie = find_first_bit(&pending_regions_to_clean, - BITS_PER_LONG); - - BUG_ON(!kctx->pending_regions[cookie]); - - kbase_reg_pending_dtor(kbdev, kctx->pending_regions[cookie]); - - kctx->pending_regions[cookie] = NULL; - bitmap_clear(&pending_regions_to_clean, cookie, 1); - } - - kbase_region_tracker_term(kctx); - kbase_gpu_vm_unlock(kctx); - - /* Safe to call this one even when didn't initialize (assuming kctx was sufficiently zeroed) */ - kbasep_js_kctx_term(kctx); - - kbase_dma_fence_term(kctx); - - mutex_lock(&kbdev->mmu_hw_mutex); - spin_lock_irqsave(&kctx->kbdev->hwaccess_lock, flags); - kbase_ctx_sched_remove_ctx(kctx); - spin_unlock_irqrestore(&kctx->kbdev->hwaccess_lock, flags); - mutex_unlock(&kbdev->mmu_hw_mutex); - - kbase_mmu_term(kbdev, &kctx->mmu); - - pages = atomic_read(&kctx->used_pages); - if (pages != 0) - dev_warn(kbdev->dev, "%s: %d pages in use!\n", __func__, pages); - - kbase_mem_evictable_deinit(kctx); - - kbase_mem_pool_group_term(&kctx->mem_pools); - - WARN_ON(atomic_read(&kctx->nonmapped_pages) != 0); - - vfree(kctx); - - kbase_pm_context_idle(kbdev); -} -KBASE_EXPORT_SYMBOL(kbase_destroy_context); diff --git a/drivers/gpu/arm/bifrost/mali_kbase_core_linux.c b/drivers/gpu/arm/bifrost/mali_kbase_core_linux.c index 9ec53dc500d3..05fcb239a6fd 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_core_linux.c +++ b/drivers/gpu/arm/bifrost/mali_kbase_core_linux.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2010-2019 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010-2020 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -22,7 +22,7 @@ #include #include -#include +#include #include #include #ifdef CONFIG_MALI_BIFROST_DEVFREQ @@ -48,6 +48,9 @@ #include #include #include +#ifdef CONFIG_MALI_PRFCNT_SET_SECONDARY_VIA_DEBUG_FS +#include +#endif #include #include #include @@ -56,10 +59,16 @@ #include "mali_kbase_hwcnt_virtualizer.h" #include "mali_kbase_hwcnt_legacy.h" #include "mali_kbase_vinstr.h" +#ifdef CONFIG_MALI_ARBITER_SUPPORT +#include "arbiter/mali_kbase_arbiter_pm.h" +#endif + +#include "mali_kbase_cs_experimental.h" #ifdef CONFIG_MALI_CINSTR_GWT #include "mali_kbase_gwt.h" #endif +#include "mali_kbase_pm_internal.h" #include #include @@ -100,20 +109,17 @@ #include -#include +#include #include +#include +#include /* GPU IRQ Tags */ #define JOB_IRQ_TAG 0 #define MMU_IRQ_TAG 1 #define GPU_IRQ_TAG 2 -static int kbase_dev_nr; - -static DEFINE_MUTEX(kbase_dev_list_lock); -static LIST_HEAD(kbase_dev_list); - #define KERNEL_SIDE_DDK_VERSION_STRING "K:" MALI_RELEASE_NAME "(GPL)" /** @@ -278,6 +284,8 @@ static void kbase_file_delete(struct kbase_file *const kfile) kctx->legacy_hwcnt_cli = NULL; mutex_unlock(&kctx->legacy_hwcnt_lock); + kbase_context_debugfs_term(kctx); + kbase_destroy_context(kctx); dev_dbg(kbdev->dev, "deleted base context\n"); @@ -345,51 +353,20 @@ enum mali_error { MALI_ERROR_FUNCTION_FAILED, }; -enum { - inited_mem = (1u << 0), - inited_js = (1u << 1), - /* Bit number 2 was earlier assigned to the runtime-pm initialization - * stage (which has been merged with the backend_early stage). - */ -#ifdef CONFIG_MALI_BIFROST_DEVFREQ - inited_devfreq = (1u << 3), -#endif /* CONFIG_MALI_BIFROST_DEVFREQ */ - inited_tlstream = (1u << 4), - inited_backend_early = (1u << 5), - inited_hwcnt_gpu_iface = (1u << 6), - inited_hwcnt_gpu_ctx = (1u << 7), - inited_hwcnt_gpu_virt = (1u << 8), - inited_vinstr = (1u << 9), - inited_backend_late = (1u << 10), - inited_device = (1u << 11), - inited_job_fault = (1u << 13), - inited_sysfs_group = (1u << 14), - inited_misc_register = (1u << 15), - inited_get_device = (1u << 16), - inited_dev_list = (1u << 17), - inited_debugfs = (1u << 18), - inited_gpu_device = (1u << 19), - inited_registers_map = (1u << 20), - inited_io_history = (1u << 21), - inited_power_control = (1u << 22), - inited_buslogger = (1u << 23), - inited_protected = (1u << 24), - inited_ctx_sched = (1u << 25) -}; - static struct kbase_device *to_kbase_device(struct device *dev) { return dev_get_drvdata(dev); } -static int assign_irqs(struct platform_device *pdev) +int assign_irqs(struct kbase_device *kbdev) { - struct kbase_device *kbdev = to_kbase_device(&pdev->dev); + struct platform_device *pdev; int i; if (!kbdev) return -ENODEV; + pdev = to_platform_device(kbdev->dev); /* 3 IRQ resources */ for (i = 0; i < 3; i++) { struct resource *irq_res; @@ -402,11 +379,11 @@ static int assign_irqs(struct platform_device *pdev) } #ifdef CONFIG_OF - if (!strncmp(irq_res->name, "JOB", 4)) { + if (!strncasecmp(irq_res->name, "JOB", 4)) { irqtag = JOB_IRQ_TAG; - } else if (!strncmp(irq_res->name, "MMU", 4)) { + } else if (!strncasecmp(irq_res->name, "MMU", 4)) { irqtag = MMU_IRQ_TAG; - } else if (!strncmp(irq_res->name, "GPU", 4)) { + } else if (!strncasecmp(irq_res->name, "GPU", 4)) { irqtag = GPU_IRQ_TAG; } else { dev_err(&pdev->dev, "Invalid irq res name: '%s'\n", @@ -423,30 +400,12 @@ static int assign_irqs(struct platform_device *pdev) return 0; } -/* - * API to acquire device list mutex and - * return pointer to the device list head - */ -const struct list_head *kbase_dev_list_get(void) -{ - mutex_lock(&kbase_dev_list_lock); - return &kbase_dev_list; -} -KBASE_EXPORT_TEST_API(kbase_dev_list_get); - -/* API to release the device list mutex */ -void kbase_dev_list_put(const struct list_head *dev_list) -{ - mutex_unlock(&kbase_dev_list_lock); -} -KBASE_EXPORT_TEST_API(kbase_dev_list_put); - /* Find a particular kbase device (as specified by minor number), or find the "first" device if -1 is specified */ struct kbase_device *kbase_find_device(int minor) { struct kbase_device *kbdev = NULL; struct list_head *entry; - const struct list_head *dev_list = kbase_dev_list_get(); + const struct list_head *dev_list = kbase_device_get_list(); list_for_each(entry, dev_list) { struct kbase_device *tmp; @@ -458,7 +417,7 @@ struct kbase_device *kbase_find_device(int minor) break; } } - kbase_dev_list_put(dev_list); + kbase_device_put_list(dev_list); return kbdev; } @@ -628,22 +587,22 @@ static int kbase_file_create_kctx(struct kbase_file *const kfile, /* we don't treat this as a fail - just warn about it */ dev_warn(kbdev->dev, "couldn't create debugfs dir for kctx\n"); } else { +#if (KERNEL_VERSION(4, 7, 0) > LINUX_VERSION_CODE) + /* prevent unprivileged use of debug file system + * in old kernel version + */ + debugfs_create_file("infinite_cache", 0600, kctx->kctx_dentry, + kctx, &kbase_infinite_cache_fops); +#else debugfs_create_file("infinite_cache", 0644, kctx->kctx_dentry, - kctx, &kbase_infinite_cache_fops); - debugfs_create_file("force_same_va", 0600, - kctx->kctx_dentry, kctx, - &kbase_force_same_va_fops); + kctx, &kbase_infinite_cache_fops); +#endif + debugfs_create_file("force_same_va", 0600, kctx->kctx_dentry, + kctx, &kbase_force_same_va_fops); mutex_init(&kctx->mem_profile_lock); - kbasep_jd_debugfs_ctx_init(kctx); - kbase_debug_mem_view_init(kctx); - - kbase_debug_job_fault_context_init(kctx); - - kbase_mem_pool_debugfs_init(kctx->kctx_dentry, kctx); - - kbase_jit_debugfs_init(kctx); + kbase_context_debugfs_init(kctx); } #endif /* CONFIG_DEBUG_FS */ @@ -721,12 +680,12 @@ static int kbase_api_set_flags(struct kbase_file *kfile, js_kctx_info = &kctx->jctx.sched_info; mutex_lock(&js_kctx_info->ctx.jsctx_mutex); spin_lock_irqsave(&kctx->kbdev->hwaccess_lock, irq_flags); - /* Translate the flags */ if ((flags->create_flags & BASE_CONTEXT_SYSTEM_MONITOR_SUBMIT_DISABLED) == 0) kbase_ctx_flag_clear(kctx, KCTX_SUBMIT_DISABLED); + spin_unlock_irqrestore(&kctx->kbdev->hwaccess_lock, irq_flags); mutex_unlock(&js_kctx_info->ctx.jsctx_mutex); } @@ -898,7 +857,7 @@ static int kbase_api_get_cpu_gpu_timeinfo(struct kbase_context *kctx, union kbase_ioctl_get_cpu_gpu_timeinfo *timeinfo) { u32 flags = timeinfo->in.request_flags; - struct timespec ts; + struct timespec64 ts; u64 timestamp; u64 cycle_cnt; @@ -967,22 +926,26 @@ static int kbase_api_get_ddk_version(struct kbase_context *kctx, return len; } -/* Defaults for legacy JIT init ioctl */ +/* Defaults for legacy just-in-time memory allocator initialization + * kernel calls + */ #define DEFAULT_MAX_JIT_ALLOCATIONS 255 #define JIT_LEGACY_TRIM_LEVEL (0) /* No trimming */ -static int kbase_api_mem_jit_init_old(struct kbase_context *kctx, - struct kbase_ioctl_mem_jit_init_old *jit_init) +static int kbase_api_mem_jit_init_10_2(struct kbase_context *kctx, + struct kbase_ioctl_mem_jit_init_10_2 *jit_init) { kctx->jit_version = 1; + /* since no phys_pages parameter, use the maximum: va_pages */ return kbase_region_tracker_init_jit(kctx, jit_init->va_pages, DEFAULT_MAX_JIT_ALLOCATIONS, - JIT_LEGACY_TRIM_LEVEL, BASE_MEM_GROUP_DEFAULT); + JIT_LEGACY_TRIM_LEVEL, BASE_MEM_GROUP_DEFAULT, + jit_init->va_pages); } -static int kbase_api_mem_jit_init(struct kbase_context *kctx, - struct kbase_ioctl_mem_jit_init *jit_init) +static int kbase_api_mem_jit_init_11_5(struct kbase_context *kctx, + struct kbase_ioctl_mem_jit_init_11_5 *jit_init) { int i; @@ -996,9 +959,30 @@ static int kbase_api_mem_jit_init(struct kbase_context *kctx, return -EINVAL; } + /* since no phys_pages parameter, use the maximum: va_pages */ return kbase_region_tracker_init_jit(kctx, jit_init->va_pages, jit_init->max_allocations, jit_init->trim_level, - jit_init->group_id); + jit_init->group_id, jit_init->va_pages); +} + +static int kbase_api_mem_jit_init(struct kbase_context *kctx, + struct kbase_ioctl_mem_jit_init *jit_init) +{ + int i; + + kctx->jit_version = 3; + + for (i = 0; i < sizeof(jit_init->padding); i++) { + /* Ensure all padding bytes are 0 for potential future + * extension + */ + if (jit_init->padding[i]) + return -EINVAL; + } + + return kbase_region_tracker_init_jit(kctx, jit_init->va_pages, + jit_init->max_allocations, jit_init->trim_level, + jit_init->group_id, jit_init->phys_pages); } static int kbase_api_mem_exec_init(struct kbase_context *kctx, @@ -1430,10 +1414,16 @@ static long kbase_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) struct kbase_ioctl_get_ddk_version, kctx); break; - case KBASE_IOCTL_MEM_JIT_INIT_OLD: - KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_MEM_JIT_INIT_OLD, - kbase_api_mem_jit_init_old, - struct kbase_ioctl_mem_jit_init_old, + case KBASE_IOCTL_MEM_JIT_INIT_10_2: + KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_MEM_JIT_INIT_10_2, + kbase_api_mem_jit_init_10_2, + struct kbase_ioctl_mem_jit_init_10_2, + kctx); + break; + case KBASE_IOCTL_MEM_JIT_INIT_11_5: + KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_MEM_JIT_INIT_11_5, + kbase_api_mem_jit_init_11_5, + struct kbase_ioctl_mem_jit_init_11_5, kctx); break; case KBASE_IOCTL_MEM_JIT_INIT: @@ -1525,12 +1515,14 @@ static long kbase_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) struct kbase_ioctl_mem_profile_add, kctx); break; + case KBASE_IOCTL_SOFT_EVENT_UPDATE: KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_SOFT_EVENT_UPDATE, kbase_api_soft_event_update, struct kbase_ioctl_soft_event_update, kctx); break; + case KBASE_IOCTL_STICKY_RESOURCE_MAP: KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_STICKY_RESOURCE_MAP, kbase_api_sticky_resource_map, @@ -1612,7 +1604,7 @@ static long kbase_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) struct kbase_ioctl_tlstream_stats, kctx); break; -#endif +#endif /* MALI_UNIT_TEST */ } dev_warn(kbdev->dev, "Unknown ioctl 0x%x nr:%d", cmd, _IOC_NR(cmd)); @@ -1689,6 +1681,16 @@ void kbase_event_wakeup(struct kbase_context *kctx) KBASE_EXPORT_TEST_API(kbase_event_wakeup); +int kbase_event_pending(struct kbase_context *ctx) +{ + KBASE_DEBUG_ASSERT(ctx); + + return (atomic_read(&ctx->event_count) != 0) || + (atomic_read(&ctx->event_closed) != 0); +} + +KBASE_EXPORT_TEST_API(kbase_event_pending); + static int kbase_mmap(struct file *const filp, struct vm_area_struct *const vma) { struct kbase_file *const kfile = filp->private_data; @@ -2435,11 +2437,16 @@ struct kbasep_debug_command { kbasep_debug_command_func *func; }; +void kbasep_ktrace_dump_wrapper(struct kbase_device *kbdev) +{ + KBASE_KTRACE_DUMP(kbdev); +} + /* Debug commands supported by the driver */ static const struct kbasep_debug_command debug_commands[] = { { .str = "dumptrace", - .func = &kbasep_trace_dump, + .func = &kbasep_ktrace_dump_wrapper, } }; @@ -2561,15 +2568,25 @@ static ssize_t kbase_show_gpuinfo(struct device *dev, { .id = GPU_ID2_PRODUCT_TTRX >> GPU_ID_VERSION_PRODUCT_ID_SHIFT, .name = "Mali-G77" }, { .id = GPU_ID2_PRODUCT_TBEX >> GPU_ID_VERSION_PRODUCT_ID_SHIFT, - .name = "Mali-TBEX" }, + .name = "Mali-G78" }, { .id = GPU_ID2_PRODUCT_LBEX >> GPU_ID_VERSION_PRODUCT_ID_SHIFT, - .name = "Mali-LBEX" }, + .name = "Mali-G68" }, { .id = GPU_ID2_PRODUCT_TNAX >> GPU_ID_VERSION_PRODUCT_ID_SHIFT, - .name = "Mali-TNAX" }, + .name = "Mali-G57" }, { .id = GPU_ID2_PRODUCT_TODX >> GPU_ID_VERSION_PRODUCT_ID_SHIFT, .name = "Mali-TODX" }, + { .id = GPU_ID2_PRODUCT_TGRX >> GPU_ID_VERSION_PRODUCT_ID_SHIFT, + .name = "Mali-TGRX" }, + { .id = GPU_ID2_PRODUCT_TVAX >> GPU_ID_VERSION_PRODUCT_ID_SHIFT, + .name = "Mali-TVAX" }, { .id = GPU_ID2_PRODUCT_LODX >> GPU_ID_VERSION_PRODUCT_ID_SHIFT, .name = "Mali-LODX" }, + { .id = GPU_ID2_PRODUCT_TTUX >> GPU_ID_VERSION_PRODUCT_ID_SHIFT, + .name = "Mali-TTUX" }, + { .id = GPU_ID2_PRODUCT_LTUX >> GPU_ID_VERSION_PRODUCT_ID_SHIFT, + .name = "Mali-LTUX" }, + { .id = GPU_ID2_PRODUCT_TE2X >> GPU_ID_VERSION_PRODUCT_ID_SHIFT, + .name = "Mali-TE2X" }, }; const char *product_name = "(Unknown Mali GPU)"; struct kbase_device *kbdev; @@ -3246,80 +3263,45 @@ static void kbasep_protected_mode_hwcnt_disable_worker(struct work_struct *data) spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); } -static int kbasep_protected_mode_init(struct kbase_device *kbdev) +static int kbasep_protected_mode_enable(struct protected_mode_device *pdev) { -#ifdef CONFIG_OF - struct device_node *protected_node; - struct platform_device *pdev; - struct protected_mode_device *protected_dev; -#endif + struct kbase_device *kbdev = pdev->data; - if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_PROTECTED_MODE)) { - /* Use native protected ops */ - kbdev->protected_dev = kzalloc(sizeof(*kbdev->protected_dev), - GFP_KERNEL); - if (!kbdev->protected_dev) - return -ENOMEM; - kbdev->protected_dev->data = kbdev; - kbdev->protected_ops = &kbase_native_protected_ops; - kbdev->protected_mode_support = true; - INIT_WORK(&kbdev->protected_mode_hwcnt_disable_work, - kbasep_protected_mode_hwcnt_disable_worker); - kbdev->protected_mode_hwcnt_desired = true; - kbdev->protected_mode_hwcnt_disabled = false; - return 0; - } + return kbase_pm_protected_mode_enable(kbdev); +} - kbdev->protected_mode_support = false; +static int kbasep_protected_mode_disable(struct protected_mode_device *pdev) +{ + struct kbase_device *kbdev = pdev->data; -#ifdef CONFIG_OF - protected_node = of_parse_phandle(kbdev->dev->of_node, - "protected-mode-switcher", 0); + return kbase_pm_protected_mode_disable(kbdev); +} - if (!protected_node) - protected_node = of_parse_phandle(kbdev->dev->of_node, - "secure-mode-switcher", 0); +static const struct protected_mode_ops kbasep_native_protected_ops = { + .protected_mode_enable = kbasep_protected_mode_enable, + .protected_mode_disable = kbasep_protected_mode_disable +}; - if (!protected_node) { - /* If protected_node cannot be looked up then we assume - * protected mode is not supported on this platform. */ - dev_info(kbdev->dev, "Protected mode not available\n"); - return 0; - } - - pdev = of_find_device_by_node(protected_node); - if (!pdev) - return -EINVAL; - - protected_dev = platform_get_drvdata(pdev); - if (!protected_dev) - return -EPROBE_DEFER; - - kbdev->protected_ops = &protected_dev->ops; - kbdev->protected_dev = protected_dev; - - if (kbdev->protected_ops) { - int err; - - /* Make sure protected mode is disabled on startup */ - mutex_lock(&kbdev->pm.lock); - err = kbdev->protected_ops->protected_mode_disable( - kbdev->protected_dev); - mutex_unlock(&kbdev->pm.lock); - - /* protected_mode_disable() returns -EINVAL if not supported */ - kbdev->protected_mode_support = (err != -EINVAL); - } -#endif +int kbase_protected_mode_init(struct kbase_device *kbdev) +{ + /* Use native protected ops */ + kbdev->protected_dev = kzalloc(sizeof(*kbdev->protected_dev), + GFP_KERNEL); + if (!kbdev->protected_dev) + return -ENOMEM; + kbdev->protected_dev->data = kbdev; + kbdev->protected_ops = &kbasep_native_protected_ops; + INIT_WORK(&kbdev->protected_mode_hwcnt_disable_work, + kbasep_protected_mode_hwcnt_disable_worker); + kbdev->protected_mode_hwcnt_desired = true; + kbdev->protected_mode_hwcnt_disabled = false; return 0; } -static void kbasep_protected_mode_term(struct kbase_device *kbdev) +void kbase_protected_mode_term(struct kbase_device *kbdev) { - if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_PROTECTED_MODE)) { - cancel_work_sync(&kbdev->protected_mode_hwcnt_disable_work); - kfree(kbdev->protected_dev); - } + cancel_work_sync(&kbdev->protected_mode_hwcnt_disable_work); + kfree(kbdev->protected_dev); } #ifdef CONFIG_MALI_BIFROST_NO_MALI @@ -3350,9 +3332,9 @@ static int kbase_common_reg_map(struct kbase_device *kbdev) return err; - out_ioremap: +out_ioremap: release_mem_region(kbdev->reg_start, kbdev->reg_size); - out_region: +out_region: return err; } @@ -3368,40 +3350,158 @@ static void kbase_common_reg_unmap(struct kbase_device * const kbdev) } #endif /* CONFIG_MALI_BIFROST_NO_MALI */ -static int registers_map(struct kbase_device * const kbdev) +int registers_map(struct kbase_device * const kbdev) { + /* the first memory resource is the physical address of the GPU + * registers. + */ + struct platform_device *pdev = to_platform_device(kbdev->dev); + struct resource *reg_res; + int err; - /* the first memory resource is the physical address of the GPU - * registers */ - struct platform_device *pdev = to_platform_device(kbdev->dev); - struct resource *reg_res; - int err; + reg_res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + if (!reg_res) { + dev_err(kbdev->dev, "Invalid register resource\n"); + return -ENOENT; + } - reg_res = platform_get_resource(pdev, IORESOURCE_MEM, 0); - if (!reg_res) { - dev_err(kbdev->dev, "Invalid register resource\n"); - return -ENOENT; - } - - kbdev->reg_start = reg_res->start; - kbdev->reg_size = resource_size(reg_res); + kbdev->reg_start = reg_res->start; + kbdev->reg_size = resource_size(reg_res); - err = kbase_common_reg_map(kbdev); - if (err) { - dev_err(kbdev->dev, "Failed to map registers\n"); - return err; - } + err = kbase_common_reg_map(kbdev); + if (err) { + dev_err(kbdev->dev, "Failed to map registers\n"); + return err; + } return 0; } -static void registers_unmap(struct kbase_device *kbdev) +void registers_unmap(struct kbase_device *kbdev) { kbase_common_reg_unmap(kbdev); } -static int power_control_init(struct platform_device *pdev) +#if defined(CONFIG_MALI_ARBITER_SUPPORT) && defined(CONFIG_OF) + +static bool kbase_is_pm_enabled(const struct device_node *gpu_node) +{ + const struct device_node *power_model_node; + const void *cooling_cells_node; + const void *operating_point_node; + bool is_pm_enable = false; + + power_model_node = of_get_child_by_name(gpu_node, + "power_model"); + if (power_model_node) + is_pm_enable = true; + + cooling_cells_node = of_get_property(gpu_node, + "#cooling-cells", NULL); + if (cooling_cells_node) + is_pm_enable = true; + + operating_point_node = of_get_property(gpu_node, + "operating-points", NULL); + if (operating_point_node) + is_pm_enable = true; + + return is_pm_enable; +} + +static bool kbase_is_pv_enabled(const struct device_node *gpu_node) +{ + const void *arbiter_if_node; + + arbiter_if_node = of_get_property(gpu_node, + "arbiter_if", NULL); + + return arbiter_if_node ? true : false; +} + +static bool kbase_is_full_coherency_enabled(const struct device_node *gpu_node) +{ + const void *coherency_dts; + u32 coherency; + + coherency_dts = of_get_property(gpu_node, + "system-coherency", + NULL); + if (coherency_dts) { + coherency = be32_to_cpup(coherency_dts); + if (coherency == COHERENCY_ACE) + return true; + } + return false; +} + +#endif /* CONFIG_MALI_ARBITER_SUPPORT && CONFIG_OF */ + +int kbase_device_pm_init(struct kbase_device *kbdev) +{ + int err = 0; + +#if defined(CONFIG_MALI_ARBITER_SUPPORT) && defined(CONFIG_OF) + + u32 gpu_id; + u32 product_id; + u32 gpu_model_id; + + if (kbase_is_pv_enabled(kbdev->dev->of_node)) { + if (kbase_is_pm_enabled(kbdev->dev->of_node)) { + /* Arbitration AND power management invalid */ + dev_err(kbdev->dev, "Invalid combination of arbitration AND power management\n"); + return -EPERM; + } + if (kbase_is_full_coherency_enabled(kbdev->dev->of_node)) { + /* Arbitration AND full coherency invalid */ + dev_err(kbdev->dev, "Invalid combination of arbitration AND full coherency\n"); + return -EPERM; + } + err = kbase_arbiter_pm_early_init(kbdev); + if (err == 0) { + /* Check if Arbitration is running on + * supported GPU platform + */ + kbase_pm_register_access_enable(kbdev); + gpu_id = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_ID)); + kbase_pm_register_access_disable(kbdev); + product_id = KBASE_UBFX32(gpu_id, + GPU_ID_VERSION_PRODUCT_ID_SHIFT, 16); + gpu_model_id = GPU_ID2_MODEL_MATCH_VALUE(product_id); + + if (gpu_model_id != GPU_ID2_PRODUCT_TGOX + && gpu_model_id != GPU_ID2_PRODUCT_TNOX) { + kbase_arbiter_pm_early_term(kbdev); + dev_err(kbdev->dev, "GPU platform not suitable for arbitration\n"); + return -EPERM; + } + } + } else { + err = power_control_init(kbdev); + } +#else + err = power_control_init(kbdev); +#endif /* CONFIG_MALI_ARBITER_SUPPORT && CONFIG_OF */ + return err; +} + +void kbase_device_pm_term(struct kbase_device *kbdev) +{ +#ifdef CONFIG_MALI_ARBITER_SUPPORT +#ifdef CONFIG_OF + if (kbase_is_pv_enabled(kbdev->dev->of_node)) + kbase_arbiter_pm_early_term(kbdev); + else + power_control_term(kbdev); +#endif /* CONFIG_OF */ +#else + power_control_term(kbdev); +#endif +} + +int power_control_init(struct kbase_device *kbdev) { #if KERNEL_VERSION(3, 18, 0) > LINUX_VERSION_CODE || !defined(CONFIG_OF) /* Power control initialization requires at least the capability to get @@ -3413,7 +3513,7 @@ static int power_control_init(struct platform_device *pdev) */ return 0; #else - struct kbase_device *kbdev = to_kbase_device(&pdev->dev); + struct platform_device *pdev; int err = 0; unsigned int i; #if defined(CONFIG_REGULATOR) @@ -3426,6 +3526,8 @@ static int power_control_init(struct platform_device *pdev) if (!kbdev) return -ENODEV; + pdev = to_platform_device(kbdev->dev); + #if defined(CONFIG_REGULATOR) /* Since the error code EPROBE_DEFER causes the entire probing * procedure to be restarted from scratch at a later time, @@ -3520,7 +3622,7 @@ static int power_control_init(struct platform_device *pdev) return err; } -static void power_control_term(struct kbase_device *kbdev) +void power_control_term(struct kbase_device *kbdev) { unsigned int i; @@ -3565,7 +3667,7 @@ static void power_control_term(struct kbase_device *kbdev) #ifdef MALI_KBASE_BUILD #ifdef CONFIG_DEBUG_FS -static void trigger_quirks_reload(struct kbase_device *kbdev) +static void trigger_reset(struct kbase_device *kbdev) { kbase_pm_context_active(kbdev); if (kbase_prepare_to_reset_gpu(kbdev)) @@ -3579,7 +3681,7 @@ static int type##_quirks_set(void *data, u64 val) \ struct kbase_device *kbdev; \ kbdev = (struct kbase_device *)data; \ kbdev->hw_quirks_##type = (u32)val; \ - trigger_quirks_reload(kbdev); \ + trigger_reset(kbdev); \ return 0;\ } \ \ @@ -3598,6 +3700,25 @@ MAKE_QUIRK_ACCESSORS(tiler); MAKE_QUIRK_ACCESSORS(mmu); MAKE_QUIRK_ACCESSORS(jm); +static ssize_t kbase_device_debugfs_reset_write(struct file *file, + const char __user *ubuf, size_t count, loff_t *ppos) +{ + struct kbase_device *kbdev = file->private_data; + CSTD_UNUSED(ubuf); + CSTD_UNUSED(count); + CSTD_UNUSED(ppos); + + trigger_reset(kbdev); + + return count; +} + +static const struct file_operations fops_trigger_reset = { + .owner = THIS_MODULE, + .open = simple_open, + .write = kbase_device_debugfs_reset_write, + .llseek = default_llseek, +}; /** * debugfs_protected_debug_mode_read - "protected_debug_mode" debugfs read @@ -3681,10 +3802,19 @@ static const struct file_operations .release = single_release, }; -static int kbase_device_debugfs_init(struct kbase_device *kbdev) +int kbase_device_debugfs_init(struct kbase_device *kbdev) { struct dentry *debugfs_ctx_defaults_directory; int err; + /* prevent unprivileged use of debug file system + * in old kernel version + */ +#if (KERNEL_VERSION(4, 7, 0) <= LINUX_VERSION_CODE) + /* only for newer kernel version debug file system is safe */ + const mode_t mode = 0644; +#else + const mode_t mode = 0600; +#endif kbdev->mali_debugfs_directory = debugfs_create_dir(kbdev->devname, NULL); @@ -3716,8 +3846,12 @@ static int kbase_device_debugfs_init(struct kbase_device *kbdev) kbasep_regs_history_debugfs_init(kbdev); kbase_debug_job_fault_debugfs_init(kbdev); + kbasep_gpu_memory_debugfs_init(kbdev); kbase_as_fault_debugfs_init(kbdev); +#ifdef CONFIG_MALI_PRFCNT_SET_SECONDARY_VIA_DEBUG_FS + kbase_instr_backend_debugfs_init(kbdev); +#endif /* fops_* variables created by invocations of macro * MAKE_QUIRK_ACCESSORS() above. */ debugfs_create_file("quirks_sc", 0644, @@ -3733,16 +3867,16 @@ static int kbase_device_debugfs_init(struct kbase_device *kbdev) kbdev->mali_debugfs_directory, kbdev, &fops_jm_quirks); - debugfs_create_bool("infinite_cache", 0644, + debugfs_create_bool("infinite_cache", mode, debugfs_ctx_defaults_directory, &kbdev->infinite_cache_active_default); - debugfs_create_file("mem_pool_max_size", 0644, + debugfs_create_file("mem_pool_max_size", mode, debugfs_ctx_defaults_directory, &kbdev->mem_pool_defaults.small, &kbase_device_debugfs_mem_pool_max_size_fops); - debugfs_create_file("lp_mem_pool_max_size", 0644, + debugfs_create_file("lp_mem_pool_max_size", mode, debugfs_ctx_defaults_directory, &kbdev->mem_pool_defaults.large, &kbase_device_debugfs_mem_pool_max_size_fops); @@ -3753,13 +3887,15 @@ static int kbase_device_debugfs_init(struct kbase_device *kbdev) &fops_protected_debug_mode); } -#if KBASE_TRACE_ENABLE - kbasep_trace_debugfs_init(kbdev); -#endif /* KBASE_TRACE_ENABLE */ + debugfs_create_file("reset", 0644, + kbdev->mali_debugfs_directory, kbdev, + &fops_trigger_reset); + + kbase_ktrace_debugfs_init(kbdev); #ifdef CONFIG_MALI_BIFROST_DEVFREQ #ifdef CONFIG_DEVFREQ_THERMAL - if (kbdev->inited_subsys & inited_devfreq) + if (kbdev->devfreq) kbase_ipa_debugfs_init(kbdev); #endif /* CONFIG_DEVFREQ_THERMAL */ #endif /* CONFIG_MALI_BIFROST_DEVFREQ */ @@ -3768,7 +3904,6 @@ static int kbase_device_debugfs_init(struct kbase_device *kbdev) kbdev->mali_debugfs_directory, kbdev, &kbasep_serialize_jobs_debugfs_fops); - return 0; out: @@ -3776,29 +3911,25 @@ static int kbase_device_debugfs_init(struct kbase_device *kbdev) return err; } -static void kbase_device_debugfs_term(struct kbase_device *kbdev) +void kbase_device_debugfs_term(struct kbase_device *kbdev) { debugfs_remove_recursive(kbdev->mali_debugfs_directory); } - -#else /* CONFIG_DEBUG_FS */ -static inline int kbase_device_debugfs_init(struct kbase_device *kbdev) -{ - return 0; -} - -static inline void kbase_device_debugfs_term(struct kbase_device *kbdev) { } #endif /* CONFIG_DEBUG_FS */ #endif /* MALI_KBASE_BUILD */ -static void kbase_device_coherency_init(struct kbase_device *kbdev, - unsigned prod_id) +int kbase_device_coherency_init(struct kbase_device *kbdev) { #ifdef CONFIG_OF u32 supported_coherency_bitmap = kbdev->gpu_props.props.raw_props.coherency_mode; const void *coherency_override_dts; - u32 override_coherency; + u32 override_coherency, gpu_id; + unsigned int prod_id; + + gpu_id = kbdev->gpu_props.props.raw_props.gpu_id; + gpu_id &= GPU_ID_VERSION_PRODUCT_ID; + prod_id = gpu_id >> GPU_ID_VERSION_PRODUCT_ID_SHIFT; /* Only for tMIx : * (COHERENCY_ACE_LITE | COHERENCY_ACE) was incorrectly @@ -3843,6 +3974,8 @@ static void kbase_device_coherency_init(struct kbase_device *kbdev, kbdev->gpu_props.props.raw_props.coherency_mode = kbdev->system_coherency; + + return 0; } #ifdef CONFIG_MALI_BUSLOG @@ -3859,6 +3992,25 @@ static void kbase_logging_started_cb(void *data) kbase_reset_gpu(kbdev); dev_info(kbdev->dev, "KBASE - Bus logger restarted\n"); } + +int buslog_init(struct kbase_device *kbdev) +{ + int err = 0; + + err = bl_core_client_register(kbdev->devname, + kbase_logging_started_cb, + kbdev, &kbdev->buslogger, + THIS_MODULE, NULL); + if (err == 0) + bl_core_set_threshold(kbdev->buslogger, 1024*1024*1024); + + return err; +} + +void buslog_term(struct kbase_device *kbdev) +{ + bl_core_client_unregister(kbdev->buslogger); +} #endif static struct attribute *kbase_attrs[] = { @@ -3887,151 +4039,35 @@ static const struct attribute_group kbase_attr_group = { .attrs = kbase_attrs, }; +int kbase_sysfs_init(struct kbase_device *kbdev) +{ + int err = 0; + + kbdev->mdev.minor = MISC_DYNAMIC_MINOR; + kbdev->mdev.name = kbdev->devname; + kbdev->mdev.fops = &kbase_fops; + kbdev->mdev.parent = get_device(kbdev->dev); + kbdev->mdev.mode = 0666; + + err = sysfs_create_group(&kbdev->dev->kobj, &kbase_attr_group); + return err; +} + +void kbase_sysfs_term(struct kbase_device *kbdev) +{ + sysfs_remove_group(&kbdev->dev->kobj, &kbase_attr_group); + put_device(kbdev->dev); +} + static int kbase_platform_device_remove(struct platform_device *pdev) { struct kbase_device *kbdev = to_kbase_device(&pdev->dev); - const struct list_head *dev_list; if (!kbdev) return -ENODEV; - kfree(kbdev->gpu_props.prop_buffer); - -#ifdef CONFIG_MALI_BUSLOG - if (kbdev->inited_subsys & inited_buslogger) { - bl_core_client_unregister(kbdev->buslogger); - kbdev->inited_subsys &= ~inited_buslogger; - } -#endif - - if (kbdev->inited_subsys & inited_dev_list) { - dev_list = kbase_dev_list_get(); - list_del(&kbdev->entry); - kbase_dev_list_put(dev_list); - kbdev->inited_subsys &= ~inited_dev_list; - } - - if (kbdev->inited_subsys & inited_misc_register) { - misc_deregister(&kbdev->mdev); - kbdev->inited_subsys &= ~inited_misc_register; - } - - if (kbdev->inited_subsys & inited_sysfs_group) { - sysfs_remove_group(&kbdev->dev->kobj, &kbase_attr_group); - kbdev->inited_subsys &= ~inited_sysfs_group; - } - - if (kbdev->inited_subsys & inited_get_device) { - put_device(kbdev->dev); - kbdev->inited_subsys &= ~inited_get_device; - } - -#ifdef MALI_KBASE_BUILD - if (kbdev->inited_subsys & inited_debugfs) { - kbase_device_debugfs_term(kbdev); - kbdev->inited_subsys &= ~inited_debugfs; - } -#endif - - if (kbdev->inited_subsys & inited_job_fault) { - kbase_debug_job_fault_dev_term(kbdev); - kbdev->inited_subsys &= ~inited_job_fault; - } - - - if (kbdev->inited_subsys & inited_backend_late) { - kbase_backend_late_term(kbdev); - kbdev->inited_subsys &= ~inited_backend_late; - } - - if (kbdev->inited_subsys & inited_vinstr) { - kbase_vinstr_term(kbdev->vinstr_ctx); - kbdev->inited_subsys &= ~inited_vinstr; - } - - if (kbdev->inited_subsys & inited_hwcnt_gpu_virt) { - kbase_hwcnt_virtualizer_term(kbdev->hwcnt_gpu_virt); - kbdev->inited_subsys &= ~inited_hwcnt_gpu_virt; - } - - if (kbdev->inited_subsys & inited_hwcnt_gpu_ctx) { - kbase_hwcnt_context_term(kbdev->hwcnt_gpu_ctx); - kbdev->inited_subsys &= ~inited_hwcnt_gpu_ctx; - } - - if (kbdev->inited_subsys & inited_hwcnt_gpu_iface) { - kbase_hwcnt_backend_gpu_destroy(&kbdev->hwcnt_gpu_iface); - kbdev->inited_subsys &= ~inited_hwcnt_gpu_iface; - } - - if (kbdev->inited_subsys & inited_tlstream) { - kbase_timeline_term(kbdev->timeline); - kbdev->inited_subsys &= ~inited_tlstream; - } - - /* Bring job and mem sys to a halt before we continue termination */ - - if (kbdev->inited_subsys & inited_js) - kbasep_js_devdata_halt(kbdev); - - if (kbdev->inited_subsys & inited_mem) - kbase_mem_halt(kbdev); - - if (kbdev->inited_subsys & inited_protected) { - kbasep_protected_mode_term(kbdev); - kbdev->inited_subsys &= ~inited_protected; - } - - if (kbdev->inited_subsys & inited_js) { - kbasep_js_devdata_term(kbdev); - kbdev->inited_subsys &= ~inited_js; - } - - if (kbdev->inited_subsys & inited_mem) { - kbase_mem_term(kbdev); - kbdev->inited_subsys &= ~inited_mem; - } - - if (kbdev->inited_subsys & inited_ctx_sched) { - kbase_ctx_sched_term(kbdev); - kbdev->inited_subsys &= ~inited_ctx_sched; - } - - if (kbdev->inited_subsys & inited_device) { - kbase_device_term(kbdev); - kbdev->inited_subsys &= ~inited_device; - } - - if (kbdev->inited_subsys & inited_backend_early) { - kbase_backend_early_term(kbdev); - kbdev->inited_subsys &= ~inited_backend_early; - } - - if (kbdev->inited_subsys & inited_io_history) { - kbase_io_history_term(&kbdev->io_history); - kbdev->inited_subsys &= ~inited_io_history; - } - - if (kbdev->inited_subsys & inited_power_control) { - power_control_term(kbdev); - kbdev->inited_subsys &= ~inited_power_control; - } - - if (kbdev->inited_subsys & inited_registers_map) { - registers_unmap(kbdev); - kbdev->inited_subsys &= ~inited_registers_map; - } - -#ifdef CONFIG_MALI_BIFROST_NO_MALI - if (kbdev->inited_subsys & inited_gpu_device) { - gpu_device_destroy(kbdev); - kbdev->inited_subsys &= ~inited_gpu_device; - } -#endif /* CONFIG_MALI_BIFROST_NO_MALI */ - - if (kbdev->inited_subsys != 0) - dev_err(kbdev->dev, "Missing sub system termination\n"); - + kbase_device_term(kbdev); + dev_set_drvdata(kbdev->dev, NULL); kbase_device_free(kbdev); return 0; @@ -4040,10 +4076,8 @@ static int kbase_platform_device_remove(struct platform_device *pdev) void kbase_backend_devfreq_term(struct kbase_device *kbdev) { #ifdef CONFIG_MALI_BIFROST_DEVFREQ - if (kbdev->inited_subsys & inited_devfreq) { + if (kbdev->devfreq) kbase_devfreq_term(kbdev); - kbdev->inited_subsys &= ~inited_devfreq; - } #endif } @@ -4053,295 +4087,50 @@ int kbase_backend_devfreq_init(struct kbase_device *kbdev) /* Devfreq uses hardware counters, so must be initialized after it. */ int err = kbase_devfreq_init(kbdev); - if (!err) - kbdev->inited_subsys |= inited_devfreq; - else + if (err) dev_err(kbdev->dev, "Continuing without devfreq\n"); #endif /* CONFIG_MALI_BIFROST_DEVFREQ */ return 0; } -/* Number of register accesses for the buffer that we allocate during - * initialization time. The buffer size can be changed later via debugfs. */ -#define KBASEP_DEFAULT_REGISTER_HISTORY_SIZE ((u16)512) - static int kbase_platform_device_probe(struct platform_device *pdev) { struct kbase_device *kbdev; - struct mali_base_gpu_core_props *core_props; - u32 gpu_id; - unsigned prod_id; - const struct list_head *dev_list; int err = 0; + mali_kbase_print_cs_experimental(); + kbdev = kbase_device_alloc(); if (!kbdev) { dev_err(&pdev->dev, "Allocate device failed\n"); - kbase_platform_device_remove(pdev); return -ENOMEM; } kbdev->dev = &pdev->dev; dev_set_drvdata(kbdev->dev, kbdev); -#ifdef CONFIG_MALI_BIFROST_NO_MALI - err = gpu_device_create(kbdev); - if (err) { - dev_err(&pdev->dev, "Dummy model initialization failed\n"); - kbase_platform_device_remove(pdev); - return err; - } - kbdev->inited_subsys |= inited_gpu_device; -#endif /* CONFIG_MALI_BIFROST_NO_MALI */ - - err = assign_irqs(pdev); - if (err) { - dev_err(&pdev->dev, "IRQ search failed\n"); - kbase_platform_device_remove(pdev); - return err; - } - - err = registers_map(kbdev); - if (err) { - dev_err(&pdev->dev, "Register map failed\n"); - kbase_platform_device_remove(pdev); - return err; - } - kbdev->inited_subsys |= inited_registers_map; - - err = power_control_init(pdev); - if (err) { - dev_err(&pdev->dev, "Power control initialization failed\n"); - kbase_platform_device_remove(pdev); - return err; - } - kbdev->inited_subsys |= inited_power_control; - - err = kbase_io_history_init(&kbdev->io_history, - KBASEP_DEFAULT_REGISTER_HISTORY_SIZE); - if (err) { - dev_err(&pdev->dev, "Register access history initialization failed\n"); - kbase_platform_device_remove(pdev); - return -ENOMEM; - } - kbdev->inited_subsys |= inited_io_history; - - err = kbase_backend_early_init(kbdev); - if (err) { - dev_err(kbdev->dev, "Early backend initialization failed\n"); - kbase_platform_device_remove(pdev); - return err; - } - kbdev->inited_subsys |= inited_backend_early; - - scnprintf(kbdev->devname, DEVNAME_SIZE, "%s%d", kbase_drv_name, - kbase_dev_nr); - kbdev->id = kbase_dev_nr; - - kbase_disjoint_init(kbdev); - - /* obtain max configured gpu frequency, if devfreq is enabled then - * this will be overridden by the highest operating point found - */ - core_props = &(kbdev->gpu_props.props.core_props); -#ifdef GPU_FREQ_KHZ_MAX - core_props->gpu_freq_khz_max = GPU_FREQ_KHZ_MAX; -#else - core_props->gpu_freq_khz_max = DEFAULT_GPU_FREQ_KHZ_MAX; -#endif - err = kbase_device_init(kbdev); + if (err) { - dev_err(kbdev->dev, "Device initialization failed (%d)\n", err); - kbase_platform_device_remove(pdev); - return err; - } - kbdev->inited_subsys |= inited_device; + if (err == -EPROBE_DEFER) + dev_err(kbdev->dev, "Device initialization Deferred\n"); + else + dev_err(kbdev->dev, "Device initialization failed\n"); - err = kbase_ctx_sched_init(kbdev); - if (err) { - dev_err(kbdev->dev, "Context scheduler initialization failed (%d)\n", - err); - kbase_platform_device_remove(pdev); - return err; - } - kbdev->inited_subsys |= inited_ctx_sched; - - err = kbase_mem_init(kbdev); - if (err) { - dev_err(kbdev->dev, "Memory subsystem initialization failed\n"); - kbase_platform_device_remove(pdev); - return err; - } - kbdev->inited_subsys |= inited_mem; - - gpu_id = kbdev->gpu_props.props.raw_props.gpu_id; - gpu_id &= GPU_ID_VERSION_PRODUCT_ID; - prod_id = gpu_id >> GPU_ID_VERSION_PRODUCT_ID_SHIFT; - - kbase_device_coherency_init(kbdev, prod_id); - - err = kbasep_protected_mode_init(kbdev); - if (err) { - dev_err(kbdev->dev, "Protected mode subsystem initialization failed\n"); - kbase_platform_device_remove(pdev); - return err; - } - kbdev->inited_subsys |= inited_protected; - - dev_list = kbase_dev_list_get(); - list_add(&kbdev->entry, &kbase_dev_list); - kbase_dev_list_put(dev_list); - kbdev->inited_subsys |= inited_dev_list; - - err = kbasep_js_devdata_init(kbdev); - if (err) { - dev_err(kbdev->dev, "Job JS devdata initialization failed\n"); - kbase_platform_device_remove(pdev); - return err; - } - kbdev->inited_subsys |= inited_js; - - atomic_set(&kbdev->timeline_is_enabled, 0); - err = kbase_timeline_init(&kbdev->timeline, &kbdev->timeline_is_enabled); - if (err) { - dev_err(kbdev->dev, "Timeline stream initialization failed\n"); - kbase_platform_device_remove(pdev); - return err; - } - kbdev->inited_subsys |= inited_tlstream; - - err = kbase_hwcnt_backend_gpu_create(kbdev, &kbdev->hwcnt_gpu_iface); - if (err) { - dev_err(kbdev->dev, "GPU hwcnt backend creation failed\n"); - kbase_platform_device_remove(pdev); - return err; - } - kbdev->inited_subsys |= inited_hwcnt_gpu_iface; - - err = kbase_hwcnt_context_init(&kbdev->hwcnt_gpu_iface, - &kbdev->hwcnt_gpu_ctx); - if (err) { - dev_err(kbdev->dev, - "GPU hwcnt context initialization failed\n"); - kbase_platform_device_remove(pdev); - return err; - } - kbdev->inited_subsys |= inited_hwcnt_gpu_ctx; - - err = kbase_hwcnt_virtualizer_init( - kbdev->hwcnt_gpu_ctx, - KBASE_HWCNT_GPU_VIRTUALIZER_DUMP_THRESHOLD_NS, - &kbdev->hwcnt_gpu_virt); - if (err) { - dev_err(kbdev->dev, - "GPU hwcnt virtualizer initialization failed\n"); - kbase_platform_device_remove(pdev); - return err; - } - kbdev->inited_subsys |= inited_hwcnt_gpu_virt; - - err = kbase_vinstr_init(kbdev->hwcnt_gpu_virt, &kbdev->vinstr_ctx); - if (err) { - dev_err(kbdev->dev, - "Virtual instrumentation initialization failed\n"); - kbase_platform_device_remove(pdev); - return -EINVAL; - } - kbdev->inited_subsys |= inited_vinstr; - - /* The initialization of the devfreq is now embedded inside the - * kbase_backend_late_init(), calling the kbase_backend_devfreq_init() - * before the first trigger of pm_context_idle(). */ - err = kbase_backend_late_init(kbdev); - if (err) { - dev_err(kbdev->dev, "Late backend initialization failed\n"); - kbase_platform_device_remove(pdev); - return err; - } - kbdev->inited_subsys |= inited_backend_late; - - -#ifdef MALI_KBASE_BUILD - err = kbase_debug_job_fault_dev_init(kbdev); - if (err) { - dev_err(kbdev->dev, "Job fault debug initialization failed\n"); - kbase_platform_device_remove(pdev); - return err; - } - kbdev->inited_subsys |= inited_job_fault; - - err = kbase_device_debugfs_init(kbdev); - if (err) { - dev_err(kbdev->dev, "DebugFS initialization failed"); - kbase_platform_device_remove(pdev); - return err; - } - kbdev->inited_subsys |= inited_debugfs; - - kbdev->mdev.minor = MISC_DYNAMIC_MINOR; - kbdev->mdev.name = kbdev->devname; - kbdev->mdev.fops = &kbase_fops; - kbdev->mdev.parent = get_device(kbdev->dev); - kbdev->mdev.mode = 0666; - kbdev->inited_subsys |= inited_get_device; - - /* This needs to happen before registering the device with misc_register(), - * otherwise it causes a race condition between registering the device and a - * uevent event being generated for userspace, causing udev rules to run - * which might expect certain sysfs attributes present. As a result of the - * race condition we avoid, some Mali sysfs entries may have appeared to - * udev to not exist. - - * For more information, see - * https://www.kernel.org/doc/Documentation/driver-model/device.txt, the - * paragraph that starts with "Word of warning", currently the second-last - * paragraph. - */ - err = sysfs_create_group(&kbdev->dev->kobj, &kbase_attr_group); - if (err) { - dev_err(&pdev->dev, "SysFS group creation failed\n"); - kbase_platform_device_remove(pdev); - return err; - } - kbdev->inited_subsys |= inited_sysfs_group; - - err = misc_register(&kbdev->mdev); - if (err) { - dev_err(kbdev->dev, "Misc device registration failed for %s\n", - kbdev->devname); - kbase_platform_device_remove(pdev); - return err; - } - kbdev->inited_subsys |= inited_misc_register; - - -#ifdef CONFIG_MALI_BUSLOG - err = bl_core_client_register(kbdev->devname, - kbase_logging_started_cb, - kbdev, &kbdev->buslogger, - THIS_MODULE, NULL); - if (err == 0) { - kbdev->inited_subsys |= inited_buslogger; - bl_core_set_threshold(kbdev->buslogger, 1024*1024*1024); + dev_set_drvdata(kbdev->dev, NULL); + kbase_device_free(kbdev); } else { - dev_warn(kbdev->dev, "Bus log client registration failed\n"); - err = 0; - } -#endif - - err = kbase_gpuprops_populate_user_buffer(kbdev); - if (err) { - dev_err(&pdev->dev, "GPU property population failed"); - kbase_platform_device_remove(pdev); - return err; - } - - dev_info(kbdev->dev, +#ifdef MALI_KBASE_BUILD + dev_info(kbdev->dev, "Probed as %s\n", dev_name(kbdev->mdev.this_device)); - - kbase_dev_nr++; #endif /* MALI_KBASE_BUILD */ + kbase_increment_device_id(); +#ifdef CONFIG_MALI_ARBITER_SUPPORT + mutex_lock(&kbdev->pm.lock); + kbase_arbiter_pm_vm_event(kbdev, KBASE_VM_GPU_INITIALIZED_EVT); + mutex_unlock(&kbdev->pm.lock); +#endif + } return err; } @@ -4369,7 +4158,7 @@ static int kbase_device_suspend(struct device *dev) #if defined(CONFIG_MALI_BIFROST_DEVFREQ) && \ (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 8, 0)) dev_dbg(dev, "Callback %s\n", __func__); - if (kbdev->inited_subsys & inited_devfreq) { + if (kbdev->devfreq) { kbase_devfreq_enqueue_work(kbdev, DEVFREQ_WORK_SUSPEND); flush_workqueue(kbdev->devfreq_queue.workq); } @@ -4398,7 +4187,7 @@ static int kbase_device_resume(struct device *dev) #if defined(CONFIG_MALI_BIFROST_DEVFREQ) && \ (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 8, 0)) dev_dbg(dev, "Callback %s\n", __func__); - if (kbdev->inited_subsys & inited_devfreq) { + if (kbdev->devfreq) { mutex_lock(&kbdev->pm.lock); if (kbdev->pm.active_count > 0) kbase_devfreq_enqueue_work(kbdev, DEVFREQ_WORK_RESUME); @@ -4430,7 +4219,7 @@ static int kbase_device_runtime_suspend(struct device *dev) #if defined(CONFIG_MALI_BIFROST_DEVFREQ) && \ (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 8, 0)) - if (kbdev->inited_subsys & inited_devfreq) + if (kbdev->devfreq) kbase_devfreq_enqueue_work(kbdev, DEVFREQ_WORK_SUSPEND); #endif @@ -4469,7 +4258,7 @@ static int kbase_device_runtime_resume(struct device *dev) #if defined(CONFIG_MALI_BIFROST_DEVFREQ) && \ (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 8, 0)) - if (kbdev->inited_subsys & inited_devfreq) + if (kbdev->devfreq) kbase_devfreq_enqueue_work(kbdev, DEVFREQ_WORK_RESUME); #endif @@ -4525,6 +4314,7 @@ static const struct dev_pm_ops kbase_pm_ops = { static const struct of_device_id kbase_dt_ids[] = { { .compatible = "arm,malit6xx" }, { .compatible = "arm,mali-midgard" }, + { .compatible = "arm,mali-bifrost" }, { /* sentinel */ } }; MODULE_DEVICE_TABLE(of, kbase_dt_ids); @@ -4581,14 +4371,11 @@ MODULE_VERSION(MALI_RELEASE_NAME " (UK version " \ __stringify(BASE_UK_VERSION_MAJOR) "." \ __stringify(BASE_UK_VERSION_MINOR) ")"); -#if defined(CONFIG_MALI_BIFROST_GATOR_SUPPORT) || defined(CONFIG_MALI_BIFROST_SYSTEM_TRACE) #define CREATE_TRACE_POINTS -#endif - -#ifdef CONFIG_MALI_BIFROST_GATOR_SUPPORT /* Create the trace points (otherwise we just get code to call a tracepoint) */ #include "mali_linux_trace.h" +#ifdef CONFIG_MALI_BIFROST_GATOR_SUPPORT EXPORT_TRACEPOINT_SYMBOL_GPL(mali_job_slots_event); EXPORT_TRACEPOINT_SYMBOL_GPL(mali_pm_status); EXPORT_TRACEPOINT_SYMBOL_GPL(mali_page_fault_insert_pages); @@ -4617,6 +4404,3 @@ void kbase_trace_mali_total_alloc_pages_change(u32 dev_id, long long int event) trace_mali_total_alloc_pages_change(dev_id, event); } #endif /* CONFIG_MALI_BIFROST_GATOR_SUPPORT */ -#ifdef CONFIG_MALI_BIFROST_SYSTEM_TRACE -#include "mali_linux_kbase_trace.h" -#endif diff --git a/drivers/gpu/arm/bifrost/mali_kbase_cs_experimental.h b/drivers/gpu/arm/bifrost/mali_kbase_cs_experimental.h new file mode 100644 index 000000000000..e1fffc3bd8b7 --- /dev/null +++ b/drivers/gpu/arm/bifrost/mali_kbase_cs_experimental.h @@ -0,0 +1,54 @@ +/* + * + * (C) COPYRIGHT ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + *//* SPDX-License-Identifier: GPL-2.0 */ + +/* + * (C) COPYRIGHT 2019-2020 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + */ + +#ifndef _KBASE_CS_EXPERIMENTAL_H_ +#define _KBASE_CS_EXPERIMENTAL_H_ + +#include + +/** + * mali_kbase_print_cs_experimental() - Print a string for every Core Services + * experimental feature that is enabled + */ +static inline void mali_kbase_print_cs_experimental(void) +{ +#if MALI_JIT_PRESSURE_LIMIT + pr_info("mali_kbase: JIT_PRESSURE_LIMIT (experimental) enabled"); +#endif /* MALI_JIT_PRESSURE_LIMIT */ +#if MALI_INCREMENTAL_RENDERING + pr_info("mali_kbase: INCREMENTAL_RENDERING (experimental) enabled"); +#endif /* MALI_INCREMENTAL_RENDERING */ +} + +#endif /* _KBASE_CS_EXPERIMENTAL_H_ */ + + diff --git a/drivers/gpu/arm/bifrost/mali_kbase_ctx_sched.c b/drivers/gpu/arm/bifrost/mali_kbase_ctx_sched.c index 35853a3a26b5..cea91bcaf02e 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_ctx_sched.c +++ b/drivers/gpu/arm/bifrost/mali_kbase_ctx_sched.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2017-2019 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2017-2020 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -23,7 +23,23 @@ #include #include +#include #include "mali_kbase_ctx_sched.h" +#include "tl/mali_kbase_tracepoints.h" + +/* Helper for ktrace */ +#if KBASE_KTRACE_ENABLE +static int kbase_ktrace_get_ctx_refcnt(struct kbase_context *kctx) +{ + return atomic_read(&kctx->refcount); +} +#else /* KBASE_KTRACE_ENABLE */ +static int kbase_ktrace_get_ctx_refcnt(struct kbase_context *kctx) +{ + CSTD_UNUSED(kctx); + return 0; +} +#endif /* KBASE_KTRACE_ENABLE */ int kbase_ctx_sched_init(struct kbase_device *kbdev) { @@ -106,11 +122,15 @@ int kbase_ctx_sched_retain_ctx(struct kbase_context *kctx) if (prev_kctx) { WARN_ON(atomic_read(&prev_kctx->refcount) != 0); kbase_mmu_disable(prev_kctx); + KBASE_TLSTREAM_TL_KBASE_CTX_UNASSIGN_AS( + kbdev, prev_kctx->id); prev_kctx->as_nr = KBASEP_AS_NR_INVALID; } kctx->as_nr = free_as; kbdev->as_to_kctx[free_as] = kctx; + KBASE_TLSTREAM_TL_KBASE_CTX_ASSIGN_AS( + kbdev, kctx->id, free_as); kbase_mmu_update(kbdev, &kctx->mmu, kctx->as_nr); } @@ -142,11 +162,23 @@ void kbase_ctx_sched_retain_ctx_refcount(struct kbase_context *kctx) void kbase_ctx_sched_release_ctx(struct kbase_context *kctx) { struct kbase_device *const kbdev = kctx->kbdev; + int new_ref_count; lockdep_assert_held(&kbdev->hwaccess_lock); - if (atomic_dec_return(&kctx->refcount) == 0) + new_ref_count = atomic_dec_return(&kctx->refcount); + if (new_ref_count == 0) { kbdev->as_free |= (1u << kctx->as_nr); + if (kbase_ctx_flag(kctx, KCTX_AS_DISABLED_ON_FAULT)) { + KBASE_TLSTREAM_TL_KBASE_CTX_UNASSIGN_AS( + kbdev, kctx->id); + kbdev->as_to_kctx[kctx->as_nr] = NULL; + kctx->as_nr = KBASEP_AS_NR_INVALID; + kbase_ctx_flag_clear(kctx, KCTX_AS_DISABLED_ON_FAULT); + } + } + + KBASE_KTRACE_ADD(kbdev, SCHED_RELEASE_CTX, kctx, new_ref_count); } void kbase_ctx_sched_remove_ctx(struct kbase_context *kctx) @@ -162,6 +194,7 @@ void kbase_ctx_sched_remove_ctx(struct kbase_context *kctx) if (kbdev->pm.backend.gpu_powered) kbase_mmu_disable(kctx); + KBASE_TLSTREAM_TL_KBASE_CTX_UNASSIGN_AS(kbdev, kctx->id); kbdev->as_to_kctx[kctx->as_nr] = NULL; kctx->as_nr = KBASEP_AS_NR_INVALID; } @@ -186,15 +219,126 @@ void kbase_ctx_sched_restore_all_as(struct kbase_device *kbdev) kbase_mmu_update(kbdev, &kctx->mmu, kctx->as_nr); + kbase_ctx_flag_clear(kctx, + KCTX_AS_DISABLED_ON_FAULT); } else { /* This context might have been assigned an * AS before, clear it. */ - kbdev->as_to_kctx[kctx->as_nr] = NULL; - kctx->as_nr = KBASEP_AS_NR_INVALID; + if (kctx->as_nr != KBASEP_AS_NR_INVALID) { + KBASE_TLSTREAM_TL_KBASE_CTX_UNASSIGN_AS( + kbdev, kctx->id); + kbdev->as_to_kctx[kctx->as_nr] = NULL; + kctx->as_nr = KBASEP_AS_NR_INVALID; + } } } else { kbase_mmu_disable_as(kbdev, i); } } } + +struct kbase_context *kbase_ctx_sched_as_to_ctx_refcount( + struct kbase_device *kbdev, size_t as_nr) +{ + unsigned long flags; + struct kbase_context *found_kctx = NULL; + + if (WARN_ON(kbdev == NULL)) + return NULL; + + if (WARN_ON(as_nr >= BASE_MAX_NR_AS)) + return NULL; + + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + + found_kctx = kbdev->as_to_kctx[as_nr]; + + if (found_kctx != NULL) + kbase_ctx_sched_retain_ctx_refcount(found_kctx); + + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + + return found_kctx; +} + +struct kbase_context *kbase_ctx_sched_as_to_ctx(struct kbase_device *kbdev, + size_t as_nr) +{ + struct kbase_context *found_kctx; + + if (WARN_ON(kbdev == NULL)) + return NULL; + + if (WARN_ON(as_nr >= BASE_MAX_NR_AS)) + return NULL; + + found_kctx = kbdev->as_to_kctx[as_nr]; + + if (WARN_ON(!found_kctx)) + return NULL; + + if (WARN_ON(atomic_read(&found_kctx->refcount) <= 0)) + return NULL; + + return found_kctx; +} + +bool kbase_ctx_sched_inc_refcount_nolock(struct kbase_context *kctx) +{ + bool result = false; + int as_nr; + + if (WARN_ON(kctx == NULL)) + return result; + + lockdep_assert_held(&kctx->kbdev->hwaccess_lock); + + as_nr = kctx->as_nr; + if (atomic_read(&kctx->refcount) > 0) { + KBASE_DEBUG_ASSERT(as_nr >= 0); + + kbase_ctx_sched_retain_ctx_refcount(kctx); + KBASE_KTRACE_ADD(kctx->kbdev, SCHED_RETAIN_CTX_NOLOCK, kctx, + kbase_ktrace_get_ctx_refcnt(kctx)); + result = true; + } + + return result; +} + +bool kbase_ctx_sched_inc_refcount(struct kbase_context *kctx) +{ + unsigned long flags; + bool result = false; + + if (WARN_ON(kctx == NULL)) + return result; + + if (WARN_ON(kctx->kbdev == NULL)) + return result; + + mutex_lock(&kctx->kbdev->mmu_hw_mutex); + spin_lock_irqsave(&kctx->kbdev->hwaccess_lock, flags); + result = kbase_ctx_sched_inc_refcount_nolock(kctx); + spin_unlock_irqrestore(&kctx->kbdev->hwaccess_lock, flags); + mutex_unlock(&kctx->kbdev->mmu_hw_mutex); + + return result; +} + +void kbase_ctx_sched_release_ctx_lock(struct kbase_context *kctx) +{ + unsigned long flags; + + if (WARN_ON(!kctx)) + return; + + spin_lock_irqsave(&kctx->kbdev->hwaccess_lock, flags); + + if (!WARN_ON(kctx->as_nr == KBASEP_AS_NR_INVALID) && + !WARN_ON(atomic_read(&kctx->refcount) <= 0)) + kbase_ctx_sched_release_ctx(kctx); + + spin_unlock_irqrestore(&kctx->kbdev->hwaccess_lock, flags); +} diff --git a/drivers/gpu/arm/bifrost/mali_kbase_ctx_sched.h b/drivers/gpu/arm/bifrost/mali_kbase_ctx_sched.h index ab57a0dc1ca8..1affa719e6dc 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_ctx_sched.h +++ b/drivers/gpu/arm/bifrost/mali_kbase_ctx_sched.h @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2017-2018 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2017-2018, 2020 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -132,4 +132,78 @@ void kbase_ctx_sched_remove_ctx(struct kbase_context *kctx); */ void kbase_ctx_sched_restore_all_as(struct kbase_device *kbdev); +/** + * kbase_ctx_sched_as_to_ctx_refcount - Lookup a context based on its current + * address space and ensure that is stays scheduled in + * @kbdev: The device for which the returned context must belong + * @as_nr: address space assigned to the context of interest + * + * The context is refcounted as being busy to prevent it from scheduling + * out. It must be released with kbase_ctx_sched_release_ctx() when it is no + * longer required to stay scheduled in. + * + * This function can safely be called from IRQ context. + * + * The following locking conditions are made on the caller: + * * it must not hold the kbase_device::hwaccess_lock, because it will be used + * internally. + * + * Return: a valid struct kbase_context on success, which has been refcounted + * as being busy or return NULL on failure, indicating that no context was found + * in as_nr. + */ +struct kbase_context *kbase_ctx_sched_as_to_ctx_refcount( + struct kbase_device *kbdev, size_t as_nr); + +/** + * kbase_ctx_sched_as_to_ctx - Lookup a context based on its current address + * space + * @kbdev: The device for which the returned context must belong + * @as_nr: address space assigned to the context of interest + * + * Return: a valid struct kbase_context on success or NULL on failure, + * indicating that no context was found in as_nr. + */ +struct kbase_context *kbase_ctx_sched_as_to_ctx(struct kbase_device *kbdev, + size_t as_nr); + +/** + * kbase_ctx_sched_inc_refcount_nolock - Refcount a context as being busy, + * preventing it from being scheduled out. + * @kctx: Context to be refcounted + * + * The following locks must be held by the caller: + * * kbase_device::mmu_hw_mutex + * * kbase_device::hwaccess_lock + * + * Return: true if refcount succeeded, and the context will not be scheduled + * out, false if the refcount failed (because the context is being/has been + * scheduled out). + */ +bool kbase_ctx_sched_inc_refcount_nolock(struct kbase_context *kctx); + +/** + * kbase_ctx_sched_inc_refcount - Refcount a context as being busy, preventing + * it from being scheduled out. + * @kctx: Context to be refcounted + * + * The following locking conditions are made on the caller: + * * it must not hold kbase_device::mmu_hw_mutex and + * kbase_device::hwaccess_lock, because they will be used internally. + * + * Return: true if refcount succeeded, and the context will not be scheduled + * out, false if the refcount failed (because the context is being/has been + * scheduled out). + */ +bool kbase_ctx_sched_inc_refcount(struct kbase_context *kctx); + +/** + * kbase_ctx_sched_release_ctx_lock - Release a reference count of a context + * @kctx: Context for which refcount should be decreased + * + * Effectivelly, this is a wrapper for kbase_ctx_sched_release_ctx, but + * kbase_device::hwaccess_lock is required NOT to be locked. + */ +void kbase_ctx_sched_release_ctx_lock(struct kbase_context *kctx); + #endif /* _KBASE_CTX_SCHED_H_ */ diff --git a/drivers/gpu/arm/bifrost/mali_kbase_debug_mem_view.c b/drivers/gpu/arm/bifrost/mali_kbase_debug_mem_view.c index c091f164b73f..478813705a41 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_debug_mem_view.c +++ b/drivers/gpu/arm/bifrost/mali_kbase_debug_mem_view.c @@ -233,6 +233,12 @@ static int debug_mem_open(struct inode *i, struct file *file) goto out; } + ret = debug_mem_zone_open(&kctx->reg_rbtree_exec, mem_data); + if (0 != ret) { + kbase_gpu_vm_unlock(kctx); + goto out; + } + kbase_gpu_vm_unlock(kctx); ((struct seq_file *)file->private_data)->private = mem_data; diff --git a/drivers/gpu/arm/bifrost/mali_kbase_defs.h b/drivers/gpu/arm/bifrost/mali_kbase_defs.h index 0a3b9514247e..958defbfbe3b 100755 --- a/drivers/gpu/arm/bifrost/mali_kbase_defs.h +++ b/drivers/gpu/arm/bifrost/mali_kbase_defs.h @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2011-2019 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2011-2020 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -36,7 +36,7 @@ #include #include #include -#include +#include #include #include #include @@ -61,12 +61,16 @@ #ifdef CONFIG_DEBUG_FS #include -#endif /* CONFIG_DEBUG_FS */ +#endif /* CONFIG_DEBUG_FS */ #ifdef CONFIG_MALI_BIFROST_DEVFREQ #include #endif /* CONFIG_MALI_BIFROST_DEVFREQ */ +#ifdef CONFIG_MALI_ARBITER_SUPPORT +#include +#endif /* CONFIG_MALI_ARBITER_SUPPORT */ + #include #include #include @@ -76,57 +80,11 @@ #define KBASE_PM_RUNTIME 1 #endif -/** Enable SW tracing when set */ -#ifdef CONFIG_MALI_BIFROST_ENABLE_TRACE -#define KBASE_TRACE_ENABLE 1 -#endif - -#ifndef KBASE_TRACE_ENABLE -#ifdef CONFIG_MALI_BIFROST_DEBUG -#define KBASE_TRACE_ENABLE 1 -#else -#define KBASE_TRACE_ENABLE 0 -#endif /* CONFIG_MALI_BIFROST_DEBUG */ -#endif /* KBASE_TRACE_ENABLE */ - -/** Dump Job slot trace on error (only active if KBASE_TRACE_ENABLE != 0) */ -#define KBASE_TRACE_DUMP_ON_JOB_SLOT_ERROR 1 - -/** - * Number of milliseconds before resetting the GPU when a job cannot be "zapped" from the hardware. - * Note that the time is actually ZAP_TIMEOUT+SOFT_STOP_RESET_TIMEOUT between the context zap starting and the GPU - * actually being reset to give other contexts time for their jobs to be soft-stopped and removed from the hardware - * before resetting. - */ -#define ZAP_TIMEOUT 1000 +#include "debug/mali_kbase_debug_ktrace_defs.h" /** Number of milliseconds before we time out on a GPU soft/hard reset */ #define RESET_TIMEOUT 500 -/** - * Prevent soft-stops from occuring in scheduling situations - * - * This is not due to HW issues, but when scheduling is desired to be more predictable. - * - * Therefore, soft stop may still be disabled due to HW issues. - * - * @note Soft stop will still be used for non-scheduling purposes e.g. when terminating a context. - * - * @note if not in use, define this value to 0 instead of \#undef'ing it - */ -#define KBASE_DISABLE_SCHEDULING_SOFT_STOPS 0 - -/** - * Prevent hard-stops from occuring in scheduling situations - * - * This is not due to HW issues, but when scheduling is desired to be more predictable. - * - * @note Hard stop will still be used for non-scheduling purposes e.g. when terminating a context. - * - * @note if not in use, define this value to 0 instead of \#undef'ing it - */ -#define KBASE_DISABLE_SCHEDULING_HARD_STOPS 0 - /** * The maximum number of Job Slots to support in the Hardware. * @@ -155,76 +113,23 @@ /** setting in kbase_context::as_nr that indicates it's invalid */ #define KBASEP_AS_NR_INVALID (-1) -#define KBASE_LOCK_REGION_MAX_SIZE (63) -#define KBASE_LOCK_REGION_MIN_SIZE (15) +/** + * Maximum size in bytes of a MMU lock region, as a logarithm + */ +#define KBASE_LOCK_REGION_MAX_SIZE_LOG2 (64) -#define KBASE_TRACE_SIZE_LOG2 8 /* 256 entries */ -#define KBASE_TRACE_SIZE (1 << KBASE_TRACE_SIZE_LOG2) -#define KBASE_TRACE_MASK ((1 << KBASE_TRACE_SIZE_LOG2)-1) +/** + * Minimum size in bytes of a MMU lock region, as a logarithm + */ +#define KBASE_LOCK_REGION_MIN_SIZE_LOG2 (15) -#include "mali_kbase_js_defs.h" #include "mali_kbase_hwaccess_defs.h" /* Maximum number of pages of memory that require a permanent mapping, per * kbase_context */ -#define KBASE_PERMANENTLY_MAPPED_MEM_LIMIT_PAGES ((1024ul * 1024ul) >> \ +#define KBASE_PERMANENTLY_MAPPED_MEM_LIMIT_PAGES ((32 * 1024ul * 1024ul) >> \ PAGE_SHIFT) - -/** Atom has been previously soft-stoppped */ -#define KBASE_KATOM_FLAG_BEEN_SOFT_STOPPPED (1<<1) -/** Atom has been previously retried to execute */ -#define KBASE_KATOM_FLAGS_RERUN (1<<2) -/* Atom submitted with JOB_CHAIN_FLAG bit set in JS_CONFIG_NEXT register, helps to - * disambiguate short-running job chains during soft/hard stopping of jobs - */ -#define KBASE_KATOM_FLAGS_JOBCHAIN (1<<3) -/** Atom has been previously hard-stopped. */ -#define KBASE_KATOM_FLAG_BEEN_HARD_STOPPED (1<<4) -/** Atom has caused us to enter disjoint state */ -#define KBASE_KATOM_FLAG_IN_DISJOINT (1<<5) -/* Atom blocked on cross-slot dependency */ -#define KBASE_KATOM_FLAG_X_DEP_BLOCKED (1<<7) -/* Atom has fail dependency on cross-slot dependency */ -#define KBASE_KATOM_FLAG_FAIL_BLOCKER (1<<8) -/* Atom is currently in the list of atoms blocked on cross-slot dependencies */ -#define KBASE_KATOM_FLAG_JSCTX_IN_X_DEP_LIST (1<<9) -/* Atom is currently holding a context reference */ -#define KBASE_KATOM_FLAG_HOLDING_CTX_REF (1<<10) -/* Atom requires GPU to be in protected mode */ -#define KBASE_KATOM_FLAG_PROTECTED (1<<11) -/* Atom has been stored in runnable_tree */ -#define KBASE_KATOM_FLAG_JSCTX_IN_TREE (1<<12) -/* Atom is waiting for L2 caches to power up in order to enter protected mode */ -#define KBASE_KATOM_FLAG_HOLDING_L2_REF_PROT (1<<13) - -/* SW related flags about types of JS_COMMAND action - * NOTE: These must be masked off by JS_COMMAND_MASK */ - -/** This command causes a disjoint event */ -#define JS_COMMAND_SW_CAUSES_DISJOINT 0x100 - -/** Bitmask of all SW related flags */ -#define JS_COMMAND_SW_BITS (JS_COMMAND_SW_CAUSES_DISJOINT) - -#if (JS_COMMAND_SW_BITS & JS_COMMAND_MASK) -#error JS_COMMAND_SW_BITS not masked off by JS_COMMAND_MASK. Must update JS_COMMAND_SW_<..> bitmasks -#endif - -/** Soft-stop command that causes a Disjoint event. This of course isn't - * entirely masked off by JS_COMMAND_MASK */ -#define JS_COMMAND_SOFT_STOP_WITH_SW_DISJOINT \ - (JS_COMMAND_SW_CAUSES_DISJOINT | JS_COMMAND_SOFT_STOP) - -#define KBASEP_ATOM_ID_INVALID BASE_JD_ATOM_COUNT - -/* Serialize atoms within a slot (ie only one atom per job slot) */ -#define KBASE_SERIALIZE_INTRA_SLOT (1 << 0) -/* Serialize atoms between slots (ie only one job slot running at any time) */ -#define KBASE_SERIALIZE_INTER_SLOT (1 << 1) -/* Reset the GPU after each atom completion */ -#define KBASE_SERIALIZE_RESET (1 << 2) - /* Minimum threshold period for hwcnt dumps between different hwcnt virtualizer * clients, to reduce undesired system load. * If a virtualizer client requests a dump within this threshold period after @@ -252,47 +157,6 @@ struct kbase_as; struct kbase_mmu_setup; struct kbase_ipa_model_vinstr_data; -#ifdef CONFIG_DEBUG_FS -/** - * struct base_job_fault_event - keeps track of the atom which faulted or which - * completed after the faulty atom but before the - * debug data for faulty atom was dumped. - * - * @event_code: event code for the atom, should != BASE_JD_EVENT_DONE for the - * atom which faulted. - * @katom: pointer to the atom for which job fault occurred or which completed - * after the faulty atom. - * @job_fault_work: work item, queued only for the faulty atom, which waits for - * the dumping to get completed and then does the bottom half - * of job done for the atoms which followed the faulty atom. - * @head: List head used to store the atom in the global list of faulty - * atoms or context specific list of atoms which got completed - * during the dump. - * @reg_offset: offset of the register to be dumped next, only applicable for - * the faulty atom. - */ -struct base_job_fault_event { - - u32 event_code; - struct kbase_jd_atom *katom; - struct work_struct job_fault_work; - struct list_head head; - int reg_offset; -}; - -#endif - -/** - * struct kbase_jd_atom_dependency - Contains the dependency info for an atom. - * @atom: pointer to the dependee atom. - * @dep_type: type of dependency on the dependee @atom, i.e. order or data - * dependency. BASE_JD_DEP_TYPE_INVALID indicates no dependency. - */ -struct kbase_jd_atom_dependency { - struct kbase_jd_atom *atom; - u8 dep_type; -}; - /** * struct kbase_io_access - holds information about 1 register access * @@ -326,421 +190,6 @@ struct kbase_io_history { struct kbase_io_access *buf; }; -/** - * kbase_jd_katom_dep_atom - Retrieves a read-only reference to the - * dependee atom. - * @dep: pointer to the dependency info structure. - * - * Return: readonly reference to dependee atom. - */ -static inline const struct kbase_jd_atom * -kbase_jd_katom_dep_atom(const struct kbase_jd_atom_dependency *dep) -{ - LOCAL_ASSERT(dep != NULL); - - return (const struct kbase_jd_atom *)(dep->atom); -} - -/** - * kbase_jd_katom_dep_type - Retrieves the dependency type info - * - * @dep: pointer to the dependency info structure. - * - * Return: the type of dependency there is on the dependee atom. - */ -static inline u8 kbase_jd_katom_dep_type(const struct kbase_jd_atom_dependency *dep) -{ - LOCAL_ASSERT(dep != NULL); - - return dep->dep_type; -} - -/** - * kbase_jd_katom_dep_set - sets up the dependency info structure - * as per the values passed. - * @const_dep: pointer to the dependency info structure to be setup. - * @a: pointer to the dependee atom. - * @type: type of dependency there is on the dependee atom. - */ -static inline void kbase_jd_katom_dep_set(const struct kbase_jd_atom_dependency *const_dep, - struct kbase_jd_atom *a, u8 type) -{ - struct kbase_jd_atom_dependency *dep; - - LOCAL_ASSERT(const_dep != NULL); - - dep = (struct kbase_jd_atom_dependency *)const_dep; - - dep->atom = a; - dep->dep_type = type; -} - -/** - * kbase_jd_katom_dep_clear - resets the dependency info structure - * - * @const_dep: pointer to the dependency info structure to be setup. - */ -static inline void kbase_jd_katom_dep_clear(const struct kbase_jd_atom_dependency *const_dep) -{ - struct kbase_jd_atom_dependency *dep; - - LOCAL_ASSERT(const_dep != NULL); - - dep = (struct kbase_jd_atom_dependency *)const_dep; - - dep->atom = NULL; - dep->dep_type = BASE_JD_DEP_TYPE_INVALID; -} - -/** - * enum kbase_atom_gpu_rb_state - The state of an atom, pertinent after it becomes - * runnable, with respect to job slot ringbuffer/fifo. - * @KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB: Atom not currently present in slot fifo, which - * implies that either atom has not become runnable - * due to dependency or has completed the execution - * on GPU. - * @KBASE_ATOM_GPU_RB_WAITING_BLOCKED: Atom has been added to slot fifo but is blocked - * due to cross slot dependency, can't be submitted to GPU. - * @KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_PREV: Atom has been added to slot fifo but - * is waiting for the completion of previously added atoms - * in current & other slots, as their protected mode - * requirements do not match with the current atom. - * @KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_TRANSITION: Atom is in slot fifo and is - * waiting for completion of protected mode transition, - * needed before the atom is submitted to GPU. - * @KBASE_ATOM_GPU_RB_WAITING_FOR_CORE_AVAILABLE: Atom is in slot fifo but is waiting - * for the cores, which are needed to execute the job - * chain represented by the atom, to become available - * @KBASE_ATOM_GPU_RB_WAITING_AFFINITY: Atom is in slot fifo but is blocked on - * affinity due to rmu workaround for Hw issue 8987. - * @KBASE_ATOM_GPU_RB_READY: Atom is in slot fifo and can be submitted to GPU. - * @KBASE_ATOM_GPU_RB_SUBMITTED: Atom is in slot fifo and has been submitted to GPU. - * @KBASE_ATOM_GPU_RB_RETURN_TO_JS: Atom must be returned to JS due to some failure, - * but only after the previously added atoms in fifo - * have completed or have also been returned to JS. - */ -enum kbase_atom_gpu_rb_state { - KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB, - KBASE_ATOM_GPU_RB_WAITING_BLOCKED, - KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_PREV, - KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_TRANSITION, - KBASE_ATOM_GPU_RB_WAITING_FOR_CORE_AVAILABLE, - KBASE_ATOM_GPU_RB_WAITING_AFFINITY, - KBASE_ATOM_GPU_RB_READY, - KBASE_ATOM_GPU_RB_SUBMITTED, - KBASE_ATOM_GPU_RB_RETURN_TO_JS = -1 -}; - -/** - * enum kbase_atom_enter_protected_state - The state of an atom with respect to the - * preparation for GPU's entry into protected mode, becomes - * pertinent only after atom's state with respect to slot - * ringbuffer is KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_TRANSITION - * @KBASE_ATOM_ENTER_PROTECTED_CHECK: Starting state. Check if there are any atoms - * currently submitted to GPU and protected mode transition is - * not already in progress. - * @KBASE_ATOM_ENTER_PROTECTED_HWCNT: Wait for hardware counter context to - * become disabled before entry into protected mode. - * @KBASE_ATOM_ENTER_PROTECTED_IDLE_L2: Wait for the L2 to become idle in preparation - * for the coherency change. L2 shall be powered down and GPU shall - * come out of fully coherent mode before entering protected mode. - * @KBASE_ATOM_ENTER_PROTECTED_SET_COHERENCY: Prepare coherency change; - * for BASE_HW_ISSUE_TGOX_R1_1234 also request L2 power on so that - * coherency register contains correct value when GPU enters - * protected mode. - * @KBASE_ATOM_ENTER_PROTECTED_FINISHED: End state; for BASE_HW_ISSUE_TGOX_R1_1234 check - * that L2 is powered up and switch GPU to protected mode. - */ -enum kbase_atom_enter_protected_state { - /** - * NOTE: The integer value of this must match KBASE_ATOM_EXIT_PROTECTED_CHECK. - */ - KBASE_ATOM_ENTER_PROTECTED_CHECK = 0, - KBASE_ATOM_ENTER_PROTECTED_HWCNT, - KBASE_ATOM_ENTER_PROTECTED_IDLE_L2, - KBASE_ATOM_ENTER_PROTECTED_SET_COHERENCY, - KBASE_ATOM_ENTER_PROTECTED_FINISHED, -}; - -/** - * enum kbase_atom_exit_protected_state - The state of an atom with respect to the - * preparation for GPU's exit from protected mode, becomes - * pertinent only after atom's state with respect to slot - * ringbuffer is KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_TRANSITION - * @KBASE_ATOM_EXIT_PROTECTED_CHECK: Starting state. Check if there are any atoms - * currently submitted to GPU and protected mode transition is - * not already in progress. - * @KBASE_ATOM_EXIT_PROTECTED_IDLE_L2: Wait for the L2 to become idle in preparation - * for the reset, as exiting protected mode requires a reset. - * @KBASE_ATOM_EXIT_PROTECTED_RESET: Issue the reset to trigger exit from protected mode - * @KBASE_ATOM_EXIT_PROTECTED_RESET_WAIT: End state, Wait for the reset to complete - */ -enum kbase_atom_exit_protected_state { - /** - * NOTE: The integer value of this must match KBASE_ATOM_ENTER_PROTECTED_CHECK. - */ - KBASE_ATOM_EXIT_PROTECTED_CHECK = 0, - KBASE_ATOM_EXIT_PROTECTED_IDLE_L2, - KBASE_ATOM_EXIT_PROTECTED_RESET, - KBASE_ATOM_EXIT_PROTECTED_RESET_WAIT, -}; - -/** - * struct kbase_ext_res - Contains the info for external resources referred - * by an atom, which have been mapped on GPU side. - * @gpu_address: Start address of the memory region allocated for - * the resource from GPU virtual address space. - * @alloc: pointer to physical pages tracking object, set on - * mapping the external resource on GPU side. - */ -struct kbase_ext_res { - u64 gpu_address; - struct kbase_mem_phy_alloc *alloc; -}; - -/** - * struct kbase_jd_atom - object representing the atom, containing the complete - * state and attributes of an atom. - * @work: work item for the bottom half processing of the atom, - * by JD or JS, after it got executed on GPU or the input - * fence got signaled - * @start_timestamp: time at which the atom was submitted to the GPU, by - * updating the JS_HEAD_NEXTn register. - * @udata: copy of the user data sent for the atom in base_jd_submit. - * @kctx: Pointer to the base context with which the atom is associated. - * @dep_head: Array of 2 list heads, pointing to the two list of atoms - * which are blocked due to dependency on this atom. - * @dep_item: Array of 2 list heads, used to store the atom in the list of - * other atoms depending on the same dependee atom. - * @dep: Array containing the dependency info for the 2 atoms on which - * the atom depends upon. - * @jd_item: List head used during job dispatch job_done processing - as - * dependencies may not be entirely resolved at this point, - * we need to use a separate list head. - * @in_jd_list: flag set to true if atom's @jd_item is currently on a list, - * prevents atom being processed twice. - * @nr_extres: number of external resources referenced by the atom. - * @extres: pointer to the location containing info about @nr_extres - * external resources referenced by the atom. - * @device_nr: indicates the coregroup with which the atom is associated, - * when BASE_JD_REQ_SPECIFIC_COHERENT_GROUP specified. - * @jc: GPU address of the job-chain. - * @softjob_data: Copy of data read from the user space buffer that @jc - * points to. - * @fence: Stores either an input or output sync fence, depending - * on soft-job type - * @sync_waiter: Pointer to the sync fence waiter structure passed to the - * callback function on signaling of the input fence. - * @dma_fence: object containing pointers to both input & output fences - * and other related members used for explicit sync through - * soft jobs and for the implicit synchronization required - * on access to external resources. - * @event_code: Event code for the job chain represented by the atom, both - * HW and low-level SW events are represented by event codes. - * @core_req: bitmask of BASE_JD_REQ_* flags specifying either Hw or Sw - * requirements for the job chain represented by the atom. - * @ticks: Number of scheduling ticks for which atom has been running - * on the GPU. - * @sched_priority: Priority of the atom for Job scheduling, as per the - * KBASE_JS_ATOM_SCHED_PRIO_*. - * @completed: Wait queue to wait upon for the completion of atom. - * @status: Indicates at high level at what stage the atom is in, - * as per KBASE_JD_ATOM_STATE_*, that whether it is not in - * use or its queued in JD or given to JS or submitted to Hw - * or it completed the execution on Hw. - * @work_id: used for GPU tracepoints, its a snapshot of the 'work_id' - * counter in kbase_jd_context which is incremented on - * every call to base_jd_submit. - * @slot_nr: Job slot chosen for the atom. - * @atom_flags: bitmask of KBASE_KATOM_FLAG* flags capturing the exact - * low level state of the atom. - * @gpu_rb_state: bitmnask of KBASE_ATOM_GPU_RB_* flags, precisely tracking - * atom's state after it has entered Job scheduler on becoming - * runnable. Atom could be blocked due to cross slot dependency - * or waiting for the shader cores to become available or - * waiting for protected mode transitions to complete. - * @need_cache_flush_cores_retained: flag indicating that manual flush of GPU - * cache is needed for the atom and the shader cores used - * for atom have been kept on. - * @blocked: flag indicating that atom's resubmission to GPU is - * blocked till the work item is scheduled to return the - * atom to JS. - * @pre_dep: Pointer to atom that this atom has same-slot dependency on - * @post_dep: Pointer to atom that has same-slot dependency on this atom - * @x_pre_dep: Pointer to atom that this atom has cross-slot dependency on - * @x_post_dep: Pointer to atom that has cross-slot dependency on this atom - * @flush_id: The GPU's flush count recorded at the time of submission, - * used for the cache flush optimisation - * @fault_event: Info for dumping the debug data on Job fault. - * @queue: List head used for 4 different purposes : - * Adds atom to the list of dma-buf fence waiting atoms. - * Adds atom to the list of atoms blocked due to cross - * slot dependency. - * Adds atom to the list of softjob atoms for which JIT - * allocation has been deferred - * Adds atom to the list of softjob atoms waiting for the - * signaling of fence. - * @jit_node: Used to keep track of all JIT free/alloc jobs in submission order - * @jit_blocked: Flag indicating that JIT allocation requested through - * softjob atom will be reattempted after the impending - * free of other active JIT allocations. - * @will_fail_event_code: If non-zero, this indicates that the atom will fail - * with the set event_code when the atom is processed. - * Used for special handling of atoms, which have a data - * dependency on the failed atoms. - * @protected_state: State of the atom, as per KBASE_ATOM_(ENTER|EXIT)_PROTECTED_*, - * when transitioning into or out of protected mode. Atom will - * be either entering or exiting the protected mode. - * @runnable_tree_node: The node added to context's job slot specific rb tree - * when the atom becomes runnable. - * @age: Age of atom relative to other atoms in the context, is - * snapshot of the age_count counter in kbase context. - */ -struct kbase_jd_atom { - struct work_struct work; - ktime_t start_timestamp; - - struct base_jd_udata udata; - struct kbase_context *kctx; - - struct list_head dep_head[2]; - struct list_head dep_item[2]; - const struct kbase_jd_atom_dependency dep[2]; - struct list_head jd_item; - bool in_jd_list; - - u16 nr_extres; - struct kbase_ext_res *extres; - - u32 device_nr; - u64 jc; - void *softjob_data; -#if defined(CONFIG_SYNC) - struct sync_fence *fence; - struct sync_fence_waiter sync_waiter; -#endif /* CONFIG_SYNC */ -#if defined(CONFIG_MALI_BIFROST_DMA_FENCE) || defined(CONFIG_SYNC_FILE) - struct { - /* Use the functions/API defined in mali_kbase_fence.h to - * when working with this sub struct */ -#if defined(CONFIG_SYNC_FILE) - /* Input fence */ -#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0)) - struct fence *fence_in; -#else - struct dma_fence *fence_in; -#endif -#endif - /* This points to the dma-buf output fence for this atom. If - * this is NULL then there is no fence for this atom and the - * following fields related to dma_fence may have invalid data. - * - * The context and seqno fields contain the details for this - * fence. - * - * This fence is signaled when the katom is completed, - * regardless of the event_code of the katom (signal also on - * failure). - */ -#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0)) - struct fence *fence; -#else - struct dma_fence *fence; -#endif - /* The dma-buf fence context number for this atom. A unique - * context number is allocated to each katom in the context on - * context creation. - */ - unsigned int context; - /* The dma-buf fence sequence number for this atom. This is - * increased every time this katom uses dma-buf fence. - */ - atomic_t seqno; - /* This contains a list of all callbacks set up to wait on - * other fences. This atom must be held back from JS until all - * these callbacks have been called and dep_count have reached - * 0. The initial value of dep_count must be equal to the - * number of callbacks on this list. - * - * This list is protected by jctx.lock. Callbacks are added to - * this list when the atom is built and the wait are set up. - * All the callbacks then stay on the list until all callbacks - * have been called and the atom is queued, or cancelled, and - * then all callbacks are taken off the list and freed. - */ - struct list_head callbacks; - /* Atomic counter of number of outstandind dma-buf fence - * dependencies for this atom. When dep_count reaches 0 the - * atom may be queued. - * - * The special value "-1" may only be set after the count - * reaches 0, while holding jctx.lock. This indicates that the - * atom has been handled, either queued in JS or cancelled. - * - * If anyone but the dma-fence worker sets this to -1 they must - * ensure that any potentially queued worker must have - * completed before allowing the atom to be marked as unused. - * This can be done by flushing the fence work queue: - * kctx->dma_fence.wq. - */ - atomic_t dep_count; - } dma_fence; -#endif /* CONFIG_MALI_BIFROST_DMA_FENCE || CONFIG_SYNC_FILE*/ - - /* Note: refer to kbasep_js_atom_retained_state, which will take a copy of some of the following members */ - enum base_jd_event_code event_code; - base_jd_core_req core_req; - u8 jobslot; - - u32 ticks; - int sched_priority; - - wait_queue_head_t completed; - enum kbase_jd_atom_state status; -#ifdef CONFIG_GPU_TRACEPOINTS - int work_id; -#endif - int slot_nr; - - u32 atom_flags; - - int retry_count; - - enum kbase_atom_gpu_rb_state gpu_rb_state; - - bool need_cache_flush_cores_retained; - - atomic_t blocked; - - struct kbase_jd_atom *pre_dep; - struct kbase_jd_atom *post_dep; - - struct kbase_jd_atom *x_pre_dep; - struct kbase_jd_atom *x_post_dep; - - u32 flush_id; - -#ifdef CONFIG_DEBUG_FS - struct base_job_fault_event fault_event; -#endif - - struct list_head queue; - - struct list_head jit_node; - bool jit_blocked; - - enum base_jd_event_code will_fail_event_code; - - union { - enum kbase_atom_enter_protected_state enter; - enum kbase_atom_exit_protected_state exit; - } protected_state; - - struct rb_node runnable_tree_node; - - u32 age; -}; - /** * struct kbase_debug_copy_buffer - information about the buffer to be copied. * @@ -768,83 +217,6 @@ struct kbase_debug_copy_buffer { int nr_extres_pages; }; -static inline bool kbase_jd_katom_is_protected(const struct kbase_jd_atom *katom) -{ - return (bool)(katom->atom_flags & KBASE_KATOM_FLAG_PROTECTED); -} - -/* - * Theory of operations: - * - * Atom objects are statically allocated within the context structure. - * - * Each atom is the head of two lists, one for the "left" set of dependencies, one for the "right" set. - */ - -#define KBASE_JD_DEP_QUEUE_SIZE 256 - -/** - * struct kbase_jd_context - per context object encapsulating all the Job dispatcher - * related state. - * @lock: lock to serialize the updates made to the Job dispatcher - * state and kbase_jd_atom objects. - * @sched_info: Structure encapsulating all the Job scheduling info. - * @atoms: Array of the objects representing atoms, containing - * the complete state and attributes of an atom. - * @job_nr: Tracks the number of atoms being processed by the - * kbase. This includes atoms that are not tracked by - * scheduler: 'not ready to run' & 'dependency-only' jobs. - * @zero_jobs_wait: Waitq that reflects whether there are no jobs - * (including SW-only dependency jobs). This is set - * when no jobs are present on the ctx, and clear when - * there are jobs. - * This must be updated atomically with @job_nr. - * note: Job Dispatcher knows about more jobs than the - * Job Scheduler as it is unaware of jobs that are - * blocked on dependencies and SW-only dependency jobs. - * This waitq can be waited upon to find out when the - * context jobs are all done/cancelled (including those - * that might've been blocked on dependencies) - and so, - * whether it can be terminated. However, it should only - * be terminated once it is not present in the run-pool. - * Since the waitq is only set under @lock, the waiter - * should also briefly obtain and drop @lock to guarantee - * that the setter has completed its work on the kbase_context - * @job_done_wq: Workqueue to which the per atom work item is queued - * for bottom half processing when the atom completes - * execution on GPU or the input fence get signaled. - * @tb_lock: Lock to serialize the write access made to @tb to - * to store the register access trace messages. - * @tb: Pointer to the Userspace accessible buffer storing - * the trace messages for register read/write accesses - * made by the Kbase. The buffer is filled in circular - * fashion. - * @tb_wrap_offset: Offset to the end location in the trace buffer, the - * write pointer is moved to the beginning on reaching - * this offset. - * @work_id: atomic variable used for GPU tracepoints, incremented - * on every call to base_jd_submit. - */ -struct kbase_jd_context { - struct mutex lock; - struct kbasep_js_kctx_info sched_info; - struct kbase_jd_atom atoms[BASE_JD_ATOM_COUNT]; - - u32 job_nr; - - wait_queue_head_t zero_jobs_wait; - - struct workqueue_struct *job_done_wq; - - spinlock_t tb_lock; - u32 *tb; - size_t tb_wrap_offset; - -#ifdef CONFIG_GPU_TRACEPOINTS - atomic_t work_id; -#endif -}; - struct kbase_device_info { u32 features; }; @@ -919,6 +291,8 @@ struct kbase_mmu_table { struct kbase_context *kctx; }; +#include "jm/mali_kbase_jm_defs.h" + static inline int kbase_as_has_bus_fault(struct kbase_as *as, struct kbase_fault *fault) { @@ -931,74 +305,19 @@ static inline int kbase_as_has_page_fault(struct kbase_as *as, return (fault == &as->pf_data); } -struct kbasep_mem_device { - atomic_t used_pages; /* Tracks usage of OS shared memory. Updated - when OS memory is allocated/freed. */ - -}; - -#define KBASE_TRACE_CODE(X) KBASE_TRACE_CODE_ ## X - -enum kbase_trace_code { - /* IMPORTANT: USE OF SPECIAL #INCLUDE OF NON-STANDARD HEADER FILE - * THIS MUST BE USED AT THE START OF THE ENUM */ -#define KBASE_TRACE_CODE_MAKE_CODE(X) KBASE_TRACE_CODE(X) -#include "mali_kbase_trace_defs.h" -#undef KBASE_TRACE_CODE_MAKE_CODE - /* Comma on its own, to extend the list */ - , - /* Must be the last in the enum */ - KBASE_TRACE_CODE_COUNT -}; - -#define KBASE_TRACE_FLAG_REFCOUNT (((u8)1) << 0) -#define KBASE_TRACE_FLAG_JOBSLOT (((u8)1) << 1) - /** - * struct kbase_trace - object representing a trace message added to trace buffer - * kbase_device::trace_rbuf - * @timestamp: CPU timestamp at which the trace message was added. - * @thread_id: id of the thread in the context of which trace message - * was added. - * @cpu: indicates which CPU the @thread_id was scheduled on when - * the trace message was added. - * @ctx: Pointer to the kbase context for which the trace message - * was added. Will be NULL for certain trace messages like - * for traces added corresponding to power management events. - * Will point to the appropriate context corresponding to - * job-slot & context's reference count related events. - * @katom: indicates if the trace message has atom related info. - * @atom_number: id of the atom for which trace message was added. - * Only valid if @katom is true. - * @atom_udata: Copy of the user data sent for the atom in base_jd_submit. - * Only valid if @katom is true. - * @gpu_addr: GPU address of the job-chain represented by atom. Could - * be valid even if @katom is false. - * @info_val: value specific to the type of event being traced. For the - * case where @katom is true, will be set to atom's affinity, - * i.e. bitmask of shader cores chosen for atom's execution. - * @code: Identifies the event, refer enum kbase_trace_code. - * @jobslot: job-slot for which trace message was added, valid only for - * job-slot management events. - * @refcount: reference count for the context, valid for certain events - * related to scheduler core and policy. - * @flags: indicates if info related to @jobslot & @refcount is present - * in the trace message, used during dumping of the message. + * struct kbasep_mem_device - Data stored per device for memory allocation + * + * @used_pages: Tracks usage of OS shared memory. Updated when OS memory is + * allocated/freed. + * @ir_threshold: Fraction of the maximum size of an allocation that grows + * on GPU page fault that can be used before the driver + * switches to incremental rendering, in 1/256ths. + * 0 means disabled. */ -struct kbase_trace { - struct timespec timestamp; - u32 thread_id; - u32 cpu; - void *ctx; - bool katom; - int atom_number; - u64 atom_udata[2]; - u64 gpu_addr; - unsigned long info_val; - u8 code; - u8 jobslot; - u8 refcount; - u8 flags; +struct kbasep_mem_device { + atomic_t used_pages; + atomic_t ir_threshold; }; /** @@ -1025,6 +344,10 @@ struct kbase_pm_device_data { int active_count; /** Flag indicating suspending/suspended */ bool suspending; +#ifdef CONFIG_MALI_ARBITER_SUPPORT + /* Flag indicating gpu lost */ + bool gpu_lost; +#endif /* CONFIG_MALI_ARBITER_SUPPORT */ /* Wait queue set when active_count == 0 */ wait_queue_head_t zero_active_count_wait; @@ -1055,6 +378,13 @@ struct kbase_pm_device_data { u32 dvfs_period; struct kbase_pm_backend_data backend; + +#ifdef CONFIG_MALI_ARBITER_SUPPORT + /** + * The state of the arbiter VM machine + */ + struct kbase_arbiter_vm_state *arb_vm_state; +#endif /* CONFIG_MALI_ARBITER_SUPPORT */ }; /** @@ -1197,8 +527,6 @@ struct kbase_mmu_mode { struct kbase_mmu_mode const *kbase_mmu_mode_get_lpae(void); struct kbase_mmu_mode const *kbase_mmu_mode_get_aarch64(void); - - #define DEVNAME_SIZE 16 /** @@ -1329,8 +657,9 @@ struct kbase_devfreq_queue_info { * kbase_hwcnt_context_enable() with @hwcnt_gpu_ctx. * @hwcnt_gpu_virt: Virtualizer for GPU hardware counters. * @vinstr_ctx: vinstr context created per device. - * @timeline_is_enabled: Non zero, if there is at least one timeline client, - * zero otherwise. + * @timeline_flags: Bitmask defining which sets of timeline tracepoints + * are enabled. If zero, there is no timeline client and + * therefore timeline is disabled. * @timeline: Timeline context created per device. * @trace_lock: Lock to serialize the access to trace buffer. * @trace_first_out: Index/offset in the trace buffer at which the first @@ -1456,7 +785,6 @@ struct kbase_devfreq_queue_info { * enabled. * @protected_mode_hwcnt_disable_work: Work item to disable GPU hardware * counters, used if atomic disable is not possible. - * @protected_mode_support: set to true if protected mode is supported. * @buslogger: Pointer to the structure required for interfacing * with the bus logger module to set the size of buffer * used by the module for capturing bus logs. @@ -1520,7 +848,7 @@ struct kbase_device { #endif /* CONFIG_MALI_BIFROST_NO_MALI */ struct kbase_pm_device_data pm; - struct kbasep_js_device_data js_data; + struct kbase_mem_pool_group mem_pools; struct kbasep_mem_device memdev; struct kbase_mmu_mode const *mmu_mode; @@ -1562,16 +890,12 @@ struct kbase_device { struct kbase_hwcnt_virtualizer *hwcnt_gpu_virt; struct kbase_vinstr_context *vinstr_ctx; - atomic_t timeline_is_enabled; + atomic_t timeline_flags; struct kbase_timeline *timeline; -#if KBASE_TRACE_ENABLE - spinlock_t trace_lock; - u16 trace_first_out; - u16 trace_next_in; - struct kbase_trace *trace_rbuf; +#if KBASE_KTRACE_TARGET_RBUF + struct kbase_ktrace ktrace; #endif - u32 reset_timeout_ms; bool cache_clean_in_progress; @@ -1677,7 +1001,7 @@ struct kbase_device { u32 snoop_enable_smc; u32 snoop_disable_smc; - struct protected_mode_ops *protected_ops; + const struct protected_mode_ops *protected_ops; struct protected_mode_device *protected_dev; @@ -1691,8 +1015,6 @@ struct kbase_device { struct work_struct protected_mode_hwcnt_disable_work; - bool protected_mode_support; - #ifdef CONFIG_MALI_BUSLOG struct bus_logger_client *buslogger; #endif @@ -1705,37 +1027,35 @@ struct kbase_device { struct mutex mmu_hw_mutex; + u8 l2_size_override; + u8 l2_hash_override; + + struct kbasep_js_device_data js_data; + + /* See KBASE_JS_*_PRIORITY_MODE for details. */ + u32 js_ctx_scheduling_mode; + /* See KBASE_SERIALIZE_* for details */ u8 serialize_jobs; #ifdef CONFIG_MALI_CINSTR_GWT u8 backup_serialize_jobs; +#endif /* CONFIG_MALI_CINSTR_GWT */ + + + struct { + struct kbase_context *ctx; + u64 jc; + int slot; + u64 flags; + } dummy_job_wa; + +#ifdef CONFIG_MALI_ARBITER_SUPPORT + /* Pointer to the arbiter device */ + struct kbase_arbiter_device arb; #endif - - u8 l2_size_override; - u8 l2_hash_override; - - /* See KBASE_JS_*_PRIORITY_MODE for details. */ - u32 js_ctx_scheduling_mode; - }; -/** - * struct jsctx_queue - JS context atom queue - * @runnable_tree: Root of RB-tree containing currently runnable atoms on this - * job slot. - * @x_dep_head: Head item of the linked list of atoms blocked on cross-slot - * dependencies. Atoms on this list will be moved to the - * runnable_tree when the blocking atom completes. - * - * hwaccess_lock must be held when accessing this structure. - */ -struct jsctx_queue { - struct rb_root runnable_tree; - struct list_head x_dep_head; -}; - - #define KBASE_API_VERSION(major, minor) ((((major) & 0xFFF) << 20) | \ (((minor) & 0xFFF) << 8) | \ ((0 & 0xFF) << 0)) @@ -1844,6 +1164,11 @@ struct kbase_file { * from it for job slot 2. This is reset when the context first goes active or * is re-activated on that slot. * + * @KCTX_AS_DISABLED_ON_FAULT: Set when the GPU address space is disabled for + * the context due to unhandled page(or bus) fault. It is cleared when the + * refcount for the context drops to 0 or on when the address spaces are + * re-enabled on GPU reset or power cycle. + * * All members need to be separate bits. This enum is intended for use in a * bitmask where multiple values get OR-ed together. */ @@ -1863,6 +1188,7 @@ enum kbase_context_flags { KCTX_PULLED_SINCE_ACTIVE_JS0 = 1U << 12, KCTX_PULLED_SINCE_ACTIVE_JS1 = 1U << 13, KCTX_PULLED_SINCE_ACTIVE_JS2 = 1U << 14, + KCTX_AS_DISABLED_ON_FAULT = 1U << 15, }; struct kbase_sub_alloc { @@ -2045,36 +1371,52 @@ struct kbase_sub_alloc { * soft-jobs which have been blocked for more than the * timeout value used for the soft-jobs * @jit_alloc: Array of 256 pointers to GPU memory regions, used for - * for JIT allocations. - * @jit_max_allocations: Maximum number of JIT allocations allowed at once. - * @jit_current_allocations: Current number of in-flight JIT allocations. - * @jit_current_allocations_per_bin: Current number of in-flight JIT allocations per bin - * @jit_version: version number indicating whether userspace is using - * old or new version of interface for JIT allocations - * 1 -> client used KBASE_IOCTL_MEM_JIT_INIT_OLD - * 2 -> client used KBASE_IOCTL_MEM_JIT_INIT + * just-in-time memory allocations. + * @jit_max_allocations: Maximum allowed number of in-flight + * just-in-time memory allocations. + * @jit_current_allocations: Current number of in-flight just-in-time + * memory allocations. + * @jit_current_allocations_per_bin: Current number of in-flight just-in-time + * memory allocations per bin. + * @jit_version: Version number indicating whether userspace is using + * old or new version of interface for just-in-time + * memory allocations. + * 1 -> client used KBASE_IOCTL_MEM_JIT_INIT_10_2 + * 2 -> client used KBASE_IOCTL_MEM_JIT_INIT_11_5 + * 3 -> client used KBASE_IOCTL_MEM_JIT_INIT * @jit_group_id: A memory group ID to be passed to a platform-specific * memory group manager. * Valid range is 0..(MEMORY_GROUP_MANAGER_NR_GROUPS-1). - * @jit_active_head: List containing the JIT allocations which are in use. - * @jit_pool_head: List containing the JIT allocations which have been - * freed up by userpsace and so not being used by them. + * @jit_phys_pages_limit: Limit of physical pages to apply across all + * just-in-time memory allocations, applied to + * @jit_current_phys_pressure. + * @jit_current_phys_pressure: Current 'pressure' on physical pages, which is + * the sum of the worst case estimate of pages that + * could be used (i.e. the + * &struct_kbase_va_region.nr_pages for all in-use + * just-in-time memory regions that have not yet had + * a usage report) and the actual number of pages + * that were used (i.e. the + * &struct_kbase_va_region.used_pages for regions + * that have had a usage report). + * @jit_active_head: List containing the just-in-time memory allocations + * which are in use. + * @jit_pool_head: List containing the just-in-time memory allocations + * which have been freed up by userspace and so not being + * used by them. * Driver caches them to quickly fulfill requests for new * JIT allocations. They are released in case of memory * pressure as they are put on the @evict_list when they * are freed up by userspace. - * @jit_destroy_head: List containing the JIT allocations which were moved to it - * from @jit_pool_head, in the shrinker callback, after freeing - * their backing physical pages. - * @jit_evict_lock: Lock used for operations done on JIT allocations and also - * for accessing @evict_list. - * @jit_work: Work item queued to defer the freeing of memory region when - * JIT allocation is moved to @jit_destroy_head. - * @jit_atoms_head: A list of the JIT soft-jobs, both alloc & free, in submission - * order, protected by kbase_jd_context.lock. - * @jit_pending_alloc: A list of JIT alloc soft-jobs for which allocation will be - * reattempted after the impending free of other active JIT - * allocations. + * @jit_destroy_head: List containing the just-in-time memory allocations + * which were moved to it from @jit_pool_head, in the + * shrinker callback, after freeing their backing + * physical pages. + * @jit_evict_lock: Lock used for operations done on just-in-time memory + * allocations and also for accessing @evict_list. + * @jit_work: Work item queued to defer the freeing of a memory + * region when a just-in-time memory allocation is moved + * to @jit_destroy_head. * @ext_res_meta_head: A list of sticky external resources which were requested to * be mapped on GPU side, through a softjob atom of type * EXT_RES_MAP or STICKY_RESOURCE_MAP ioctl. @@ -2091,7 +1433,8 @@ struct kbase_sub_alloc { * @gwt_snapshot_list: Snapshot of the @gwt_current_list for sending to user space. * @priority: Indicates the context priority. Used along with @atoms_count * for context scheduling, protected by hwaccess_lock. - * @atoms_count: Number of gpu atoms currently in use, per priority + * @atoms_count: Number of GPU atoms currently in use, per priority + * @create_flags: Flags used in context creation. * * A kernel base context is an entity among which the GPU is scheduled. * Each context has its own GPU address space. @@ -2123,10 +1466,28 @@ struct kbase_context { struct list_head mem_partials; struct mutex reg_lock; + struct rb_root reg_rbtree_same; struct rb_root reg_rbtree_custom; struct rb_root reg_rbtree_exec; + struct kbase_jd_context jctx; + struct jsctx_queue jsctx_queue + [KBASE_JS_ATOM_SCHED_PRIO_COUNT][BASE_JM_MAX_NR_SLOTS]; + + struct list_head completed_jobs; + atomic_t work_count; + struct timer_list soft_job_timeout; + + atomic_t atoms_pulled; + atomic_t atoms_pulled_slot[BASE_JM_MAX_NR_SLOTS]; + int atoms_pulled_slot_pri[BASE_JM_MAX_NR_SLOTS][ + KBASE_JS_ATOM_SCHED_PRIO_COUNT]; + int priority; + bool blocked_js[BASE_JM_MAX_NR_SLOTS][KBASE_JS_ATOM_SCHED_PRIO_COUNT]; + s16 atoms_count[KBASE_JS_ATOM_SCHED_PRIO_COUNT]; + u32 slots_pullable; + u32 age_count; DECLARE_BITMAP(cookies, BITS_PER_LONG); struct kbase_va_region *pending_regions[BITS_PER_LONG]; @@ -2134,10 +1495,8 @@ struct kbase_context { wait_queue_head_t event_queue; pid_t tgid; pid_t pid; - - struct kbase_jd_context jctx; atomic_t used_pages; - atomic_t nonmapped_pages; + atomic_t nonmapped_pages; atomic_t permanent_mapped_pages; struct kbase_mem_pool_group mem_pools; @@ -2177,61 +1536,37 @@ struct kbase_context { #endif /* CONFIG_DEBUG_FS */ - struct jsctx_queue jsctx_queue - [KBASE_JS_ATOM_SCHED_PRIO_COUNT][BASE_JM_MAX_NR_SLOTS]; - - atomic_t atoms_pulled; - atomic_t atoms_pulled_slot[BASE_JM_MAX_NR_SLOTS]; - int atoms_pulled_slot_pri[BASE_JM_MAX_NR_SLOTS][ - KBASE_JS_ATOM_SCHED_PRIO_COUNT]; - - bool blocked_js[BASE_JM_MAX_NR_SLOTS][KBASE_JS_ATOM_SCHED_PRIO_COUNT]; - - u32 slots_pullable; - - struct work_struct work; - struct kbase_hwcnt_legacy_client *legacy_hwcnt_cli; struct mutex legacy_hwcnt_lock; - struct list_head completed_jobs; - atomic_t work_count; - - struct timer_list soft_job_timeout; - - struct kbase_va_region *jit_alloc[256]; + struct kbase_va_region *jit_alloc[1 + BASE_JIT_ALLOC_COUNT]; u8 jit_max_allocations; u8 jit_current_allocations; u8 jit_current_allocations_per_bin[256]; u8 jit_version; u8 jit_group_id; +#if MALI_JIT_PRESSURE_LIMIT + u64 jit_phys_pages_limit; + u64 jit_current_phys_pressure; +#endif /* MALI_JIT_PRESSURE_LIMIT */ struct list_head jit_active_head; struct list_head jit_pool_head; struct list_head jit_destroy_head; struct mutex jit_evict_lock; struct work_struct jit_work; - struct list_head jit_atoms_head; - struct list_head jit_pending_alloc; - struct list_head ext_res_meta_head; - u32 age_count; - u8 trim_level; #ifdef CONFIG_MALI_CINSTR_GWT bool gwt_enabled; - bool gwt_was_enabled; - struct list_head gwt_current_list; - struct list_head gwt_snapshot_list; #endif - int priority; - s16 atoms_count[KBASE_JS_ATOM_SCHED_PRIO_COUNT]; + base_context_create_flags create_flags; }; #ifdef CONFIG_MALI_CINSTR_GWT diff --git a/drivers/gpu/arm/bifrost/mali_kbase_device.c b/drivers/gpu/arm/bifrost/mali_kbase_device.c deleted file mode 100644 index 89db1746c5e5..000000000000 --- a/drivers/gpu/arm/bifrost/mali_kbase_device.c +++ /dev/null @@ -1,515 +0,0 @@ -/* - * - * (C) COPYRIGHT 2010-2019 ARM Limited. All rights reserved. - * - * This program is free software and is provided to you under the terms of the - * GNU General Public License version 2 as published by the Free Software - * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, you can access it online at - * http://www.gnu.org/licenses/gpl-2.0.html. - * - * SPDX-License-Identifier: GPL-2.0 - * - */ - - - -/* - * Base kernel device APIs - */ - -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include - -/* NOTE: Magic - 0x45435254 (TRCE in ASCII). - * Supports tracing feature provided in the base module. - * Please keep it in sync with the value of base module. - */ -#define TRACE_BUFFER_HEADER_SPECIAL 0x45435254 - -#if KBASE_TRACE_ENABLE -static const char *kbasep_trace_code_string[] = { - /* IMPORTANT: USE OF SPECIAL #INCLUDE OF NON-STANDARD HEADER FILE - * THIS MUST BE USED AT THE START OF THE ARRAY */ -#define KBASE_TRACE_CODE_MAKE_CODE(X) # X -#include "mali_kbase_trace_defs.h" -#undef KBASE_TRACE_CODE_MAKE_CODE -}; -#endif - -#define DEBUG_MESSAGE_SIZE 256 - -static int kbasep_trace_init(struct kbase_device *kbdev); -static void kbasep_trace_term(struct kbase_device *kbdev); -static void kbasep_trace_hook_wrapper(void *param); - -struct kbase_device *kbase_device_alloc(void) -{ - return kzalloc(sizeof(struct kbase_device), GFP_KERNEL); -} - -static int kbase_device_as_init(struct kbase_device *kbdev, int i) -{ - kbdev->as[i].number = i; - kbdev->as[i].bf_data.addr = 0ULL; - kbdev->as[i].pf_data.addr = 0ULL; - - kbdev->as[i].pf_wq = alloc_workqueue("mali_mmu%d", 0, 1, i); - if (!kbdev->as[i].pf_wq) - return -EINVAL; - - INIT_WORK(&kbdev->as[i].work_pagefault, page_fault_worker); - INIT_WORK(&kbdev->as[i].work_busfault, bus_fault_worker); - - return 0; -} - -static void kbase_device_as_term(struct kbase_device *kbdev, int i) -{ - destroy_workqueue(kbdev->as[i].pf_wq); -} - -static int kbase_device_all_as_init(struct kbase_device *kbdev) -{ - int i, err; - - for (i = 0; i < kbdev->nr_hw_address_spaces; i++) { - err = kbase_device_as_init(kbdev, i); - if (err) - goto free_workqs; - } - - return 0; - -free_workqs: - for (; i > 0; i--) - kbase_device_as_term(kbdev, i); - - return err; -} - -static void kbase_device_all_as_term(struct kbase_device *kbdev) -{ - int i; - - for (i = 0; i < kbdev->nr_hw_address_spaces; i++) - kbase_device_as_term(kbdev, i); -} - -int kbase_device_init(struct kbase_device * const kbdev) -{ - int err; -#ifdef CONFIG_ARM64 - struct device_node *np = NULL; -#endif /* CONFIG_ARM64 */ - - spin_lock_init(&kbdev->mmu_mask_change); - mutex_init(&kbdev->mmu_hw_mutex); -#ifdef CONFIG_ARM64 - kbdev->cci_snoop_enabled = false; - np = kbdev->dev->of_node; - if (np != NULL) { - if (of_property_read_u32(np, "snoop_enable_smc", - &kbdev->snoop_enable_smc)) - kbdev->snoop_enable_smc = 0; - if (of_property_read_u32(np, "snoop_disable_smc", - &kbdev->snoop_disable_smc)) - kbdev->snoop_disable_smc = 0; - /* Either both or none of the calls should be provided. */ - if (!((kbdev->snoop_disable_smc == 0 - && kbdev->snoop_enable_smc == 0) - || (kbdev->snoop_disable_smc != 0 - && kbdev->snoop_enable_smc != 0))) { - WARN_ON(1); - err = -EINVAL; - goto fail; - } - } -#endif /* CONFIG_ARM64 */ - /* Get the list of workarounds for issues on the current HW - * (identified by the GPU_ID register) - */ - err = kbase_hw_set_issues_mask(kbdev); - if (err) - goto fail; - - /* Set the list of features available on the current HW - * (identified by the GPU_ID register) - */ - kbase_hw_set_features_mask(kbdev); - - kbase_gpuprops_set_features(kbdev); - - /* On Linux 4.0+, dma coherency is determined from device tree */ -#if defined(CONFIG_ARM64) && LINUX_VERSION_CODE < KERNEL_VERSION(4, 0, 0) - set_dma_ops(kbdev->dev, &noncoherent_swiotlb_dma_ops); -#endif - - /* Workaround a pre-3.13 Linux issue, where dma_mask is NULL when our - * device structure was created by device-tree - */ - if (!kbdev->dev->dma_mask) - kbdev->dev->dma_mask = &kbdev->dev->coherent_dma_mask; - - err = dma_set_mask(kbdev->dev, - DMA_BIT_MASK(kbdev->gpu_props.mmu.pa_bits)); - if (err) - goto dma_set_mask_failed; - - err = dma_set_coherent_mask(kbdev->dev, - DMA_BIT_MASK(kbdev->gpu_props.mmu.pa_bits)); - if (err) - goto dma_set_mask_failed; - - kbdev->nr_hw_address_spaces = kbdev->gpu_props.num_address_spaces; - - err = kbase_device_all_as_init(kbdev); - if (err) - goto as_init_failed; - - spin_lock_init(&kbdev->hwcnt.lock); - - err = kbasep_trace_init(kbdev); - if (err) - goto term_as; - - init_waitqueue_head(&kbdev->cache_clean_wait); - - kbase_debug_assert_register_hook(&kbasep_trace_hook_wrapper, kbdev); - - atomic_set(&kbdev->ctx_num, 0); - - err = kbase_instr_backend_init(kbdev); - if (err) - goto term_trace; - - kbdev->pm.dvfs_period = DEFAULT_PM_DVFS_PERIOD; - - kbdev->reset_timeout_ms = DEFAULT_RESET_TIMEOUT_MS; - - if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_AARCH64_MMU)) - kbdev->mmu_mode = kbase_mmu_mode_get_aarch64(); - else - kbdev->mmu_mode = kbase_mmu_mode_get_lpae(); - - mutex_init(&kbdev->kctx_list_lock); - INIT_LIST_HEAD(&kbdev->kctx_list); - - return 0; -term_trace: - kbasep_trace_term(kbdev); -term_as: - kbase_device_all_as_term(kbdev); -as_init_failed: -dma_set_mask_failed: -fail: - return err; -} - -void kbase_device_term(struct kbase_device *kbdev) -{ - KBASE_DEBUG_ASSERT(kbdev); - - WARN_ON(!list_empty(&kbdev->kctx_list)); - -#if KBASE_TRACE_ENABLE - kbase_debug_assert_register_hook(NULL, NULL); -#endif - - kbase_instr_backend_term(kbdev); - - kbasep_trace_term(kbdev); - - kbase_device_all_as_term(kbdev); -} - -void kbase_device_free(struct kbase_device *kbdev) -{ - kfree(kbdev); -} - -/* - * Device trace functions - */ -#if KBASE_TRACE_ENABLE - -static int kbasep_trace_init(struct kbase_device *kbdev) -{ - struct kbase_trace *rbuf; - - rbuf = kmalloc_array(KBASE_TRACE_SIZE, sizeof(*rbuf), GFP_KERNEL); - - if (!rbuf) - return -EINVAL; - - kbdev->trace_rbuf = rbuf; - spin_lock_init(&kbdev->trace_lock); - return 0; -} - -static void kbasep_trace_term(struct kbase_device *kbdev) -{ - kfree(kbdev->trace_rbuf); -} - -static void kbasep_trace_format_msg(struct kbase_trace *trace_msg, char *buffer, int len) -{ - s32 written = 0; - - /* Initial part of message */ - written += MAX(snprintf(buffer + written, MAX(len - written, 0), "%d.%.6d,%d,%d,%s,%p,", (int)trace_msg->timestamp.tv_sec, (int)(trace_msg->timestamp.tv_nsec / 1000), trace_msg->thread_id, trace_msg->cpu, kbasep_trace_code_string[trace_msg->code], trace_msg->ctx), 0); - - if (trace_msg->katom) - written += MAX(snprintf(buffer + written, MAX(len - written, 0), "atom %d (ud: 0x%llx 0x%llx)", trace_msg->atom_number, trace_msg->atom_udata[0], trace_msg->atom_udata[1]), 0); - - written += MAX(snprintf(buffer + written, MAX(len - written, 0), ",%.8llx,", trace_msg->gpu_addr), 0); - - /* NOTE: Could add function callbacks to handle different message types */ - /* Jobslot present */ - if (trace_msg->flags & KBASE_TRACE_FLAG_JOBSLOT) - written += MAX(snprintf(buffer + written, MAX(len - written, 0), "%d", trace_msg->jobslot), 0); - - written += MAX(snprintf(buffer + written, MAX(len - written, 0), ","), 0); - - /* Refcount present */ - if (trace_msg->flags & KBASE_TRACE_FLAG_REFCOUNT) - written += MAX(snprintf(buffer + written, MAX(len - written, 0), "%d", trace_msg->refcount), 0); - - written += MAX(snprintf(buffer + written, MAX(len - written, 0), ","), 0); - - /* Rest of message */ - written += MAX(snprintf(buffer + written, MAX(len - written, 0), "0x%.8lx", trace_msg->info_val), 0); -} - -static void kbasep_trace_dump_msg(struct kbase_device *kbdev, struct kbase_trace *trace_msg) -{ - char buffer[DEBUG_MESSAGE_SIZE]; - - kbasep_trace_format_msg(trace_msg, buffer, DEBUG_MESSAGE_SIZE); - dev_dbg(kbdev->dev, "%s", buffer); -} - -void kbasep_trace_add(struct kbase_device *kbdev, enum kbase_trace_code code, void *ctx, struct kbase_jd_atom *katom, u64 gpu_addr, u8 flags, int refcount, int jobslot, unsigned long info_val) -{ - unsigned long irqflags; - struct kbase_trace *trace_msg; - - spin_lock_irqsave(&kbdev->trace_lock, irqflags); - - trace_msg = &kbdev->trace_rbuf[kbdev->trace_next_in]; - - /* Fill the message */ - trace_msg->thread_id = task_pid_nr(current); - trace_msg->cpu = task_cpu(current); - - getnstimeofday(&trace_msg->timestamp); - - trace_msg->code = code; - trace_msg->ctx = ctx; - - if (NULL == katom) { - trace_msg->katom = false; - } else { - trace_msg->katom = true; - trace_msg->atom_number = kbase_jd_atom_id(katom->kctx, katom); - trace_msg->atom_udata[0] = katom->udata.blob[0]; - trace_msg->atom_udata[1] = katom->udata.blob[1]; - } - - trace_msg->gpu_addr = gpu_addr; - trace_msg->jobslot = jobslot; - trace_msg->refcount = MIN((unsigned int)refcount, 0xFF); - trace_msg->info_val = info_val; - trace_msg->flags = flags; - - /* Update the ringbuffer indices */ - kbdev->trace_next_in = (kbdev->trace_next_in + 1) & KBASE_TRACE_MASK; - if (kbdev->trace_next_in == kbdev->trace_first_out) - kbdev->trace_first_out = (kbdev->trace_first_out + 1) & KBASE_TRACE_MASK; - - /* Done */ - - spin_unlock_irqrestore(&kbdev->trace_lock, irqflags); -} - -void kbasep_trace_clear(struct kbase_device *kbdev) -{ - unsigned long flags; - - spin_lock_irqsave(&kbdev->trace_lock, flags); - kbdev->trace_first_out = kbdev->trace_next_in; - spin_unlock_irqrestore(&kbdev->trace_lock, flags); -} - -void kbasep_trace_dump(struct kbase_device *kbdev) -{ - unsigned long flags; - u32 start; - u32 end; - - dev_dbg(kbdev->dev, "Dumping trace:\nsecs,nthread,cpu,code,ctx,katom,gpu_addr,jobslot,refcount,info_val"); - spin_lock_irqsave(&kbdev->trace_lock, flags); - start = kbdev->trace_first_out; - end = kbdev->trace_next_in; - - while (start != end) { - struct kbase_trace *trace_msg = &kbdev->trace_rbuf[start]; - - kbasep_trace_dump_msg(kbdev, trace_msg); - - start = (start + 1) & KBASE_TRACE_MASK; - } - dev_dbg(kbdev->dev, "TRACE_END"); - - spin_unlock_irqrestore(&kbdev->trace_lock, flags); - - KBASE_TRACE_CLEAR(kbdev); -} - -static void kbasep_trace_hook_wrapper(void *param) -{ - struct kbase_device *kbdev = (struct kbase_device *)param; - - kbasep_trace_dump(kbdev); -} - -#ifdef CONFIG_DEBUG_FS -struct trace_seq_state { - struct kbase_trace trace_buf[KBASE_TRACE_SIZE]; - u32 start; - u32 end; -}; - -static void *kbasep_trace_seq_start(struct seq_file *s, loff_t *pos) -{ - struct trace_seq_state *state = s->private; - int i; - - if (*pos > KBASE_TRACE_SIZE) - return NULL; - i = state->start + *pos; - if ((state->end >= state->start && i >= state->end) || - i >= state->end + KBASE_TRACE_SIZE) - return NULL; - - i &= KBASE_TRACE_MASK; - - return &state->trace_buf[i]; -} - -static void kbasep_trace_seq_stop(struct seq_file *s, void *data) -{ -} - -static void *kbasep_trace_seq_next(struct seq_file *s, void *data, loff_t *pos) -{ - struct trace_seq_state *state = s->private; - int i; - - (*pos)++; - - i = (state->start + *pos) & KBASE_TRACE_MASK; - if (i == state->end) - return NULL; - - return &state->trace_buf[i]; -} - -static int kbasep_trace_seq_show(struct seq_file *s, void *data) -{ - struct kbase_trace *trace_msg = data; - char buffer[DEBUG_MESSAGE_SIZE]; - - kbasep_trace_format_msg(trace_msg, buffer, DEBUG_MESSAGE_SIZE); - seq_printf(s, "%s\n", buffer); - return 0; -} - -static const struct seq_operations kbasep_trace_seq_ops = { - .start = kbasep_trace_seq_start, - .next = kbasep_trace_seq_next, - .stop = kbasep_trace_seq_stop, - .show = kbasep_trace_seq_show, -}; - -static int kbasep_trace_debugfs_open(struct inode *inode, struct file *file) -{ - struct kbase_device *kbdev = inode->i_private; - unsigned long flags; - - struct trace_seq_state *state; - - state = __seq_open_private(file, &kbasep_trace_seq_ops, sizeof(*state)); - if (!state) - return -ENOMEM; - - spin_lock_irqsave(&kbdev->trace_lock, flags); - state->start = kbdev->trace_first_out; - state->end = kbdev->trace_next_in; - memcpy(state->trace_buf, kbdev->trace_rbuf, sizeof(state->trace_buf)); - spin_unlock_irqrestore(&kbdev->trace_lock, flags); - - return 0; -} - -static const struct file_operations kbasep_trace_debugfs_fops = { - .owner = THIS_MODULE, - .open = kbasep_trace_debugfs_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release_private, -}; - -void kbasep_trace_debugfs_init(struct kbase_device *kbdev) -{ - debugfs_create_file("mali_trace", S_IRUGO, - kbdev->mali_debugfs_directory, kbdev, - &kbasep_trace_debugfs_fops); -} - -#else -void kbasep_trace_debugfs_init(struct kbase_device *kbdev) -{ -} -#endif /* CONFIG_DEBUG_FS */ - -#else /* KBASE_TRACE_ENABLE */ -static int kbasep_trace_init(struct kbase_device *kbdev) -{ - CSTD_UNUSED(kbdev); - return 0; -} - -static void kbasep_trace_term(struct kbase_device *kbdev) -{ - CSTD_UNUSED(kbdev); -} - -static void kbasep_trace_hook_wrapper(void *param) -{ - CSTD_UNUSED(param); -} - -void kbasep_trace_dump(struct kbase_device *kbdev) -{ - CSTD_UNUSED(kbdev); -} -#endif /* KBASE_TRACE_ENABLE */ diff --git a/drivers/gpu/arm/bifrost/mali_kbase_disjoint_events.c b/drivers/gpu/arm/bifrost/mali_kbase_disjoint_events.c index 68eb4ed0715d..b5ac414b1223 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_disjoint_events.c +++ b/drivers/gpu/arm/bifrost/mali_kbase_disjoint_events.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2014 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014, 2020 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software diff --git a/drivers/gpu/arm/bifrost/mali_kbase_dma_fence.c b/drivers/gpu/arm/bifrost/mali_kbase_dma_fence.c index 9af59bb56d1c..6a872be1b165 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_dma_fence.c +++ b/drivers/gpu/arm/bifrost/mali_kbase_dma_fence.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2011-2017 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2011-2017,2020 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -114,6 +114,8 @@ kbase_dma_fence_unlock_reservations(struct kbase_dma_fence_resv_info *info, ww_acquire_fini(ctx); } + + /** * kbase_dma_fence_queue_work() - Queue work to handle @katom * @katom: Pointer to atom for which to queue work diff --git a/drivers/gpu/arm/bifrost/mali_kbase_dummy_job_wa.c b/drivers/gpu/arm/bifrost/mali_kbase_dummy_job_wa.c new file mode 100644 index 000000000000..188e53bf1abe --- /dev/null +++ b/drivers/gpu/arm/bifrost/mali_kbase_dummy_job_wa.c @@ -0,0 +1,442 @@ +/* + * + * (C) COPYRIGHT 2020 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +/* + * Implementation of the dummy job execution workaround for the GPU hang issue. + */ + +#include +#include +#include + +#include +#include + +#define DUMMY_JOB_WA_BINARY_NAME "valhall-1691526.wa" + +struct wa_header { + u16 signature; + u16 version; + u32 info_offset; +} __packed; + +struct wa_v2_info { + u64 jc; + u32 js; + u32 blob_offset; + u64 flags; +} __packed; + +struct wa_blob { + u64 base; + u32 size; + u32 map_flags; + u32 payload_offset; + u32 blob_offset; +} __packed; + +static bool in_range(const u8 *base, const u8 *end, off_t off, size_t sz) +{ + return !(end - base - off < sz); +} + +static u32 wait_any(struct kbase_device *kbdev, off_t offset, u32 bits) +{ + int loop; + const int timeout = 100; + u32 val; + + for (loop = 0; loop < timeout; loop++) { + val = kbase_reg_read(kbdev, offset); + if (val & bits) + break; + udelay(10); + } + + if (loop == timeout) { + dev_err(kbdev->dev, + "Timeout reading register 0x%lx, bits 0x%lx, last read was 0x%lx\n", + (unsigned long)offset, (unsigned long)bits, + (unsigned long)val); + } + + return (val & bits); +} + +static int wait(struct kbase_device *kbdev, off_t offset, u32 bits, bool set) +{ + int loop; + const int timeout = 100; + u32 val; + u32 target = 0; + + if (set) + target = bits; + + for (loop = 0; loop < timeout; loop++) { + val = kbase_reg_read(kbdev, (offset)); + if ((val & bits) == target) + break; + + udelay(10); + } + + if (loop == timeout) { + dev_err(kbdev->dev, + "Timeout reading register 0x%lx, bits 0x%lx, last read was 0x%lx\n", + (unsigned long)offset, (unsigned long)bits, + (unsigned long)val); + return -ETIMEDOUT; + } + + return 0; +} + +static inline int run_job(struct kbase_device *kbdev, int as, int slot, + u64 cores, u64 jc) +{ + u32 done; + + /* setup job */ + kbase_reg_write(kbdev, JOB_SLOT_REG(slot, JS_HEAD_NEXT_LO), + jc & U32_MAX); + kbase_reg_write(kbdev, JOB_SLOT_REG(slot, JS_HEAD_NEXT_HI), + jc >> 32); + kbase_reg_write(kbdev, JOB_SLOT_REG(slot, JS_AFFINITY_NEXT_LO), + cores & U32_MAX); + kbase_reg_write(kbdev, JOB_SLOT_REG(slot, JS_AFFINITY_NEXT_HI), + cores >> 32); + kbase_reg_write(kbdev, JOB_SLOT_REG(slot, JS_CONFIG_NEXT), + JS_CONFIG_DISABLE_DESCRIPTOR_WR_BK | as); + + /* go */ + kbase_reg_write(kbdev, JOB_SLOT_REG(slot, JS_COMMAND_NEXT), + JS_COMMAND_START); + + /* wait for the slot to finish (done, error) */ + done = wait_any(kbdev, JOB_CONTROL_REG(JOB_IRQ_RAWSTAT), + (1ul << (16+slot)) | (1ul << slot)); + kbase_reg_write(kbdev, JOB_CONTROL_REG(JOB_IRQ_CLEAR), done); + + if (done != (1ul << slot)) { + dev_err(kbdev->dev, + "Failed to run WA job on slot %d cores 0x%llx: done 0x%lx\n", + slot, (unsigned long long)cores, + (unsigned long)done); + dev_err(kbdev->dev, "JS_STATUS on failure: 0x%x\n", + kbase_reg_read(kbdev, JOB_SLOT_REG(slot, JS_STATUS))); + + return -EFAULT; + } else { + return 0; + } +} + +/* To be called after power up & MMU init, but before everything else */ +int kbase_dummy_job_wa_execute(struct kbase_device *kbdev, u64 cores) +{ + int as; + int slot; + u64 jc; + int failed = 0; + int runs = 0; + u32 old_gpu_mask; + u32 old_job_mask; + + if (!kbdev) + return -EFAULT; + + if (!kbdev->dummy_job_wa.ctx) + return -EFAULT; + + as = kbdev->dummy_job_wa.ctx->as_nr; + slot = kbdev->dummy_job_wa.slot; + jc = kbdev->dummy_job_wa.jc; + + /* mask off all but MMU IRQs */ + old_gpu_mask = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK)); + old_job_mask = kbase_reg_read(kbdev, JOB_CONTROL_REG(JOB_IRQ_MASK)); + kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), 0); + kbase_reg_write(kbdev, JOB_CONTROL_REG(JOB_IRQ_MASK), 0); + + /* power up requested cores */ + kbase_reg_write(kbdev, SHADER_PWRON_LO, (cores & U32_MAX)); + kbase_reg_write(kbdev, SHADER_PWRON_HI, (cores >> 32)); + + if (kbdev->dummy_job_wa.flags & KBASE_DUMMY_JOB_WA_FLAG_WAIT_POWERUP) { + /* wait for power-ups */ + wait(kbdev, SHADER_READY_LO, (cores & U32_MAX), true); + if (cores >> 32) + wait(kbdev, SHADER_READY_HI, (cores >> 32), true); + } + + if (kbdev->dummy_job_wa.flags & KBASE_DUMMY_JOB_WA_FLAG_SERIALIZE) { + int i; + + /* do for each requested core */ + for (i = 0; i < sizeof(cores) * 8; i++) { + u64 affinity; + + affinity = 1ull << i; + + if (!(cores & affinity)) + continue; + + if (run_job(kbdev, as, slot, affinity, jc)) + failed++; + runs++; + } + + } else { + if (run_job(kbdev, as, slot, cores, jc)) + failed++; + runs++; + } + + if (kbdev->dummy_job_wa.flags & + KBASE_DUMMY_JOB_WA_FLAG_LOGICAL_SHADER_POWER) { + /* power off shader cores (to reduce any dynamic leakage) */ + kbase_reg_write(kbdev, SHADER_PWROFF_LO, (cores & U32_MAX)); + kbase_reg_write(kbdev, SHADER_PWROFF_HI, (cores >> 32)); + + /* wait for power off complete */ + wait(kbdev, SHADER_READY_LO, (cores & U32_MAX), false); + wait(kbdev, SHADER_PWRTRANS_LO, (cores & U32_MAX), false); + if (cores >> 32) { + wait(kbdev, SHADER_READY_HI, (cores >> 32), false); + wait(kbdev, SHADER_PWRTRANS_HI, (cores >> 32), false); + } + kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_CLEAR), U32_MAX); + } + + /* restore IRQ masks */ + kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), old_gpu_mask); + kbase_reg_write(kbdev, JOB_CONTROL_REG(JOB_IRQ_MASK), old_job_mask); + + if (failed) + dev_err(kbdev->dev, + "WA complete with %d failures out of %d runs\n", failed, + runs); + + return failed ? -EFAULT : 0; +} + +static ssize_t show_dummy_job_wa_info(struct device * const dev, + struct device_attribute * const attr, char * const buf) +{ + struct kbase_device *const kbdev = dev_get_drvdata(dev); + int err; + + if (!kbdev || !kbdev->dummy_job_wa.ctx) + return -ENODEV; + + err = scnprintf(buf, PAGE_SIZE, "slot %u flags %llx\n", + kbdev->dummy_job_wa.slot, kbdev->dummy_job_wa.flags); + + return err; +} + +static DEVICE_ATTR(dummy_job_wa_info, 0444, show_dummy_job_wa_info, NULL); + +static bool wa_blob_load_needed(struct kbase_device *kbdev) +{ + if (of_machine_is_compatible("arm,juno")) + return false; + + if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_TTRX_3485)) + return true; + + return false; +} + +int kbase_dummy_job_wa_load(struct kbase_device *kbdev) +{ + const struct firmware *firmware; + static const char wa_name[] = DUMMY_JOB_WA_BINARY_NAME; + const u32 signature = 0x4157; + const u32 version = 2; + const u8 *fw_end; + const u8 *fw; + const struct wa_header *header; + const struct wa_v2_info *v2_info; + u32 blob_offset; + int err; + struct kbase_context *kctx; + + if (!wa_blob_load_needed(kbdev)) + return 0; + + /* load the wa */ + err = request_firmware(&firmware, wa_name, kbdev->dev); + + if (err) { + dev_err(kbdev->dev, "WA blob missing. Please refer to the Arm Mali DDK Valhall Release Notes, " + "Part number DC-06002 or contact support-mali@arm.com - driver probe will be failed"); + return -ENODEV; + } + + kctx = kbase_create_context(kbdev, true, + BASE_CONTEXT_CREATE_FLAG_NONE, 0, + NULL); + + if (!kctx) { + dev_err(kbdev->dev, "Failed to create WA context\n"); + goto no_ctx; + } + + fw = firmware->data; + fw_end = fw + firmware->size; + + dev_dbg(kbdev->dev, "Loaded firmware of size %zu bytes\n", + firmware->size); + + if (!in_range(fw, fw_end, 0, sizeof(*header))) { + dev_err(kbdev->dev, "WA too small\n"); + goto bad_fw; + } + + header = (const struct wa_header *)(fw + 0); + + if (header->signature != signature) { + dev_err(kbdev->dev, "WA signature failure: 0x%lx\n", + (unsigned long)header->signature); + goto bad_fw; + } + + if (header->version != version) { + dev_err(kbdev->dev, "WA version 0x%lx not supported\n", + (unsigned long)header->version); + goto bad_fw; + } + + if (!in_range(fw, fw_end, header->info_offset, sizeof(*v2_info))) { + dev_err(kbdev->dev, "WA info offset out of bounds\n"); + goto bad_fw; + } + + v2_info = (const struct wa_v2_info *)(fw + header->info_offset); + + if (v2_info->flags & ~KBASE_DUMMY_JOB_WA_FLAGS) { + dev_err(kbdev->dev, "Unsupported WA flag(s): 0x%llx\n", + (unsigned long long)v2_info->flags); + goto bad_fw; + } + + kbdev->dummy_job_wa.slot = v2_info->js; + kbdev->dummy_job_wa.jc = v2_info->jc; + kbdev->dummy_job_wa.flags = v2_info->flags; + + blob_offset = v2_info->blob_offset; + + while (blob_offset) { + const struct wa_blob *blob; + size_t nr_pages; + u64 flags; + u64 gpu_va; + struct kbase_va_region *va_region; + + if (!in_range(fw, fw_end, blob_offset, sizeof(*blob))) { + dev_err(kbdev->dev, "Blob offset out-of-range: 0x%lx\n", + (unsigned long)blob_offset); + goto bad_fw; + } + + blob = (const struct wa_blob *)(fw + blob_offset); + if (!in_range(fw, fw_end, blob->payload_offset, blob->size)) { + dev_err(kbdev->dev, "Payload out-of-bounds\n"); + goto bad_fw; + } + + gpu_va = blob->base; + if (PAGE_ALIGN(gpu_va) != gpu_va) { + dev_err(kbdev->dev, "blob not page aligned\n"); + goto bad_fw; + } + nr_pages = PFN_UP(blob->size); + flags = blob->map_flags | BASE_MEM_FLAG_MAP_FIXED; + + va_region = kbase_mem_alloc(kctx, nr_pages, nr_pages, + 0, &flags, &gpu_va); + + if (!va_region) { + dev_err(kbdev->dev, "Failed to allocate for blob\n"); + } else { + struct kbase_vmap_struct vmap = { 0 }; + const u8 *payload; + void *dst; + + /* copy the payload, */ + payload = fw + blob->payload_offset; + + dst = kbase_vmap(kctx, + va_region->start_pfn << PAGE_SHIFT, + nr_pages << PAGE_SHIFT, &vmap); + + if (dst) { + memcpy(dst, payload, blob->size); + kbase_vunmap(kctx, &vmap); + } else { + dev_err(kbdev->dev, + "Failed to copy payload\n"); + } + + } + blob_offset = blob->blob_offset; /* follow chain */ + } + + release_firmware(firmware); + + kbasep_js_schedule_privileged_ctx(kbdev, kctx); + + kbdev->dummy_job_wa.ctx = kctx; + + err = sysfs_create_file(&kbdev->dev->kobj, + &dev_attr_dummy_job_wa_info.attr); + if (err) + dev_err(kbdev->dev, "SysFS file creation for dummy job wa failed\n"); + + return 0; + +bad_fw: + kbase_destroy_context(kctx); +no_ctx: + release_firmware(firmware); + return -EFAULT; +} + +void kbase_dummy_job_wa_cleanup(struct kbase_device *kbdev) +{ + struct kbase_context *wa_ctx; + + /* Can be safely called even if the file wasn't created on probe */ + sysfs_remove_file(&kbdev->dev->kobj, &dev_attr_dummy_job_wa_info.attr); + + wa_ctx = READ_ONCE(kbdev->dummy_job_wa.ctx); + WRITE_ONCE(kbdev->dummy_job_wa.ctx, NULL); + /* make this write visible before we tear down the ctx */ + smp_mb(); + + if (wa_ctx) { + kbasep_js_release_privileged_ctx(kbdev, wa_ctx); + kbase_destroy_context(wa_ctx); + } +} diff --git a/drivers/gpu/arm/bifrost/mali_kbase_dummy_job_wa.h b/drivers/gpu/arm/bifrost/mali_kbase_dummy_job_wa.h new file mode 100644 index 000000000000..5bbe37df7ed6 --- /dev/null +++ b/drivers/gpu/arm/bifrost/mali_kbase_dummy_job_wa.h @@ -0,0 +1,45 @@ +/* + * + * (C) COPYRIGHT 2020 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +#ifndef _KBASE_DUMMY_JOB_WORKAROUND_ +#define _KBASE_DUMMY_JOB_WORKAROUND_ + +#define KBASE_DUMMY_JOB_WA_FLAG_SERIALIZE (1ull << 0) +#define KBASE_DUMMY_JOB_WA_FLAG_WAIT_POWERUP (1ull << 1) +#define KBASE_DUMMY_JOB_WA_FLAG_LOGICAL_SHADER_POWER (1ull << 2) + +#define KBASE_DUMMY_JOB_WA_FLAGS (KBASE_DUMMY_JOB_WA_FLAG_SERIALIZE | \ + KBASE_DUMMY_JOB_WA_FLAG_WAIT_POWERUP | \ + KBASE_DUMMY_JOB_WA_FLAG_LOGICAL_SHADER_POWER) + + +int kbase_dummy_job_wa_load(struct kbase_device *kbdev); +void kbase_dummy_job_wa_cleanup(struct kbase_device *kbdev); +int kbase_dummy_job_wa_execute(struct kbase_device *kbdev, u64 cores); + +static inline bool kbase_dummy_job_wa_enabled(struct kbase_device *kbdev) +{ + return (kbdev->dummy_job_wa.ctx != NULL); +} + + +#endif /* _KBASE_DUMMY_JOB_WORKAROUND_ */ diff --git a/drivers/gpu/arm/bifrost/mali_kbase_event.c b/drivers/gpu/arm/bifrost/mali_kbase_event.c index 721af6963aff..c8b8f22d14f7 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_event.c +++ b/drivers/gpu/arm/bifrost/mali_kbase_event.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2010-2016,2018-2019 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010-2016,2018-2020 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -24,7 +24,8 @@ #include #include -#include +#include +#include static struct base_jd_udata kbase_event_process(struct kbase_context *kctx, struct kbase_jd_atom *katom) { @@ -44,22 +45,12 @@ static struct base_jd_udata kbase_event_process(struct kbase_context *kctx, stru KBASE_TLSTREAM_TL_DEL_ATOM(kbdev, katom); katom->status = KBASE_JD_ATOM_STATE_UNUSED; - + dev_dbg(kbdev->dev, "Atom %p status to unused\n", (void *)katom); wake_up(&katom->completed); return data; } -int kbase_event_pending(struct kbase_context *ctx) -{ - KBASE_DEBUG_ASSERT(ctx); - - return (atomic_read(&ctx->event_count) != 0) || - (atomic_read(&ctx->event_closed) != 0); -} - -KBASE_EXPORT_TEST_API(kbase_event_pending); - int kbase_event_dequeue(struct kbase_context *ctx, struct base_jd_event_v2 *uevent) { struct kbase_jd_atom *atom; @@ -93,6 +84,7 @@ int kbase_event_dequeue(struct kbase_context *ctx, struct base_jd_event_v2 *ueve dev_dbg(ctx->kbdev->dev, "event dequeuing %p\n", (void *)atom); uevent->event_code = atom->event_code; + uevent->atom_number = (atom - ctx->jctx.atoms); if (atom->core_req & BASE_JD_REQ_EXTERNAL_RESOURCES) @@ -174,22 +166,25 @@ void kbase_event_post(struct kbase_context *ctx, struct kbase_jd_atom *atom) { struct kbase_device *kbdev = ctx->kbdev; + dev_dbg(kbdev->dev, "Posting event for atom %p\n", (void *)atom); + if (atom->core_req & BASE_JD_REQ_EVENT_ONLY_ON_FAILURE) { if (atom->event_code == BASE_JD_EVENT_DONE) { - /* Don't report the event */ + dev_dbg(kbdev->dev, "Suppressing event (atom done)\n"); kbase_event_process_noreport(ctx, atom); return; } } if (atom->core_req & BASEP_JD_REQ_EVENT_NEVER) { - /* Don't report the event */ + dev_dbg(kbdev->dev, "Suppressing event (never)\n"); kbase_event_process_noreport(ctx, atom); return; } KBASE_TLSTREAM_TL_ATTRIB_ATOM_STATE(kbdev, atom, TL_ATOM_STATE_POSTED); if (atom->core_req & BASE_JD_REQ_EVENT_COALESCE) { /* Don't report the event until other event(s) have completed */ + dev_dbg(kbdev->dev, "Deferring event (coalesced)\n"); mutex_lock(&ctx->event_mutex); list_add_tail(&atom->dep_item[0], &ctx->event_coalesce_list); ++ctx->event_coalesce_count; @@ -203,8 +198,13 @@ void kbase_event_post(struct kbase_context *ctx, struct kbase_jd_atom *atom) list_add_tail(&atom->dep_item[0], &ctx->event_list); atomic_add(event_count, &ctx->event_count); mutex_unlock(&ctx->event_mutex); + dev_dbg(kbdev->dev, "Reporting %d events\n", event_count); kbase_event_wakeup(ctx); + + /* Post-completion latency */ + trace_sysgraph(SGR_POST, ctx->id, + kbase_jd_atom_id(ctx, atom)); } } KBASE_EXPORT_TEST_API(kbase_event_post); @@ -224,9 +224,7 @@ int kbase_event_init(struct kbase_context *kctx) INIT_LIST_HEAD(&kctx->event_list); INIT_LIST_HEAD(&kctx->event_coalesce_list); mutex_init(&kctx->event_mutex); - atomic_set(&kctx->event_count, 0); kctx->event_coalesce_count = 0; - atomic_set(&kctx->event_closed, false); kctx->event_workq = alloc_workqueue("kbase_event", WQ_MEM_RECLAIM, 1); if (NULL == kctx->event_workq) diff --git a/drivers/gpu/arm/bifrost/mali_kbase_fence.c b/drivers/gpu/arm/bifrost/mali_kbase_fence.c index 96a6ab96095d..7a715b3354be 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_fence.c +++ b/drivers/gpu/arm/bifrost/mali_kbase_fence.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2011-2019 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2011-2020 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -87,6 +87,7 @@ const struct dma_fence_ops kbase_fence_ops = { .fence_value_str = kbase_fence_fence_value_str }; + #if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0)) struct fence * kbase_fence_out_new(struct kbase_jd_atom *katom) @@ -210,3 +211,4 @@ kbase_fence_add_callback(struct kbase_jd_atom *katom, return err; } + diff --git a/drivers/gpu/arm/bifrost/mali_kbase_fence.h b/drivers/gpu/arm/bifrost/mali_kbase_fence.h index e0b25b5890df..6079a7dfb2ef 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_fence.h +++ b/drivers/gpu/arm/bifrost/mali_kbase_fence.h @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2010-2018 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010-2018, 2020 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -87,6 +87,7 @@ struct dma_fence *kbase_fence_out_new(struct kbase_jd_atom *katom); } while (0) #endif + /** * kbase_fence_out_remove() - Removes the output fence from atom * @katom: Atom to remove output fence for @@ -268,6 +269,7 @@ bool kbase_fence_free_callbacks(struct kbase_jd_atom *katom); */ #define kbase_fence_out_get(katom) dma_fence_get((katom)->dma_fence.fence) + /** * kbase_fence_put() - Releases a reference to a fence * @fence: Fence to release reference for. diff --git a/drivers/gpu/arm/bifrost/mali_kbase_gpu_memory_debugfs.c b/drivers/gpu/arm/bifrost/mali_kbase_gpu_memory_debugfs.c index 2c42f5ccdd4e..569abd920fde 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_gpu_memory_debugfs.c +++ b/drivers/gpu/arm/bifrost/mali_kbase_gpu_memory_debugfs.c @@ -21,6 +21,7 @@ */ #include +#include #ifdef CONFIG_DEBUG_FS /** Show callback for the @c gpu_memory debugfs file. @@ -40,7 +41,7 @@ static int kbasep_gpu_memory_seq_show(struct seq_file *sfile, void *data) struct list_head *entry; const struct list_head *kbdev_list; - kbdev_list = kbase_dev_list_get(); + kbdev_list = kbase_device_get_list(); list_for_each(entry, kbdev_list) { struct kbase_device *kbdev = NULL; struct kbase_context *kctx; @@ -61,7 +62,7 @@ static int kbasep_gpu_memory_seq_show(struct seq_file *sfile, void *data) } mutex_unlock(&kbdev->kctx_list_lock); } - kbase_dev_list_put(kbdev_list); + kbase_device_put_list(kbdev_list); return 0; } diff --git a/drivers/gpu/arm/bifrost/mali_kbase_gpuprops.c b/drivers/gpu/arm/bifrost/mali_kbase_gpuprops.c index f6b70bd0798c..ae2458f497da 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_gpuprops.c +++ b/drivers/gpu/arm/bifrost/mali_kbase_gpuprops.c @@ -1,6 +1,7 @@ +// SPDX-License-Identifier: GPL-2.0 /* * - * (C) COPYRIGHT 2011-2019 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2011-2020 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -27,30 +28,19 @@ */ #include -#include +#include #include #include +#include #include "mali_kbase_ioctl.h" #include #include #include #include -/** - * KBASE_UBFX32 - Extracts bits from a 32-bit bitfield. - * @value: The value from which to extract bits. - * @offset: The first bit to extract (0 being the LSB). - * @size: The number of bits to extract. - * - * Context: @offset + @size <= 32. - * - * Return: Bits [@offset, @offset + @size) from @value. - */ -/* from mali_cdsb.h */ -#define KBASE_UBFX32(value, offset, size) \ - (((u32)(value) >> (u32)(offset)) & (u32)((1ULL << (u32)(size)) - 1)) -static void kbase_gpuprops_construct_coherent_groups(base_gpu_props * const props) +static void kbase_gpuprops_construct_coherent_groups( + struct base_gpu_props * const props) { struct mali_base_gpu_coherent_group *current_group; u64 group_present; @@ -119,22 +109,28 @@ static void kbase_gpuprops_construct_coherent_groups(base_gpu_props * const prop /** * kbase_gpuprops_get_props - Get the GPU configuration - * @gpu_props: The &base_gpu_props structure + * @gpu_props: The &struct base_gpu_props structure * @kbdev: The &struct kbase_device structure for the device * - * Fill the &base_gpu_props structure with values from the GPU configuration - * registers. Only the raw properties are filled in this function + * Fill the &struct base_gpu_props structure with values from the GPU + * configuration registers. Only the raw properties are filled in this function. + * + * Return: Zero on success, Linux error code on failure */ -static void kbase_gpuprops_get_props(base_gpu_props * const gpu_props, struct kbase_device *kbdev) +static int kbase_gpuprops_get_props(struct base_gpu_props * const gpu_props, + struct kbase_device *kbdev) { struct kbase_gpuprops_regdump regdump; int i; + int err; KBASE_DEBUG_ASSERT(NULL != kbdev); KBASE_DEBUG_ASSERT(NULL != gpu_props); /* Dump relevant registers */ - kbase_backend_gpuprops_get(kbdev, ®dump); + err = kbase_backend_gpuprops_get(kbdev, ®dump); + if (err) + return err; gpu_props->raw_props.gpu_id = regdump.gpu_id; gpu_props->raw_props.tiler_features = regdump.tiler_features; @@ -169,9 +165,12 @@ static void kbase_gpuprops_get_props(base_gpu_props * const gpu_props, struct kb gpu_props->raw_props.thread_max_workgroup_size = regdump.thread_max_workgroup_size; gpu_props->raw_props.thread_features = regdump.thread_features; gpu_props->raw_props.thread_tls_alloc = regdump.thread_tls_alloc; + + return 0; } -void kbase_gpuprops_update_core_props_gpu_id(base_gpu_props * const gpu_props) +void kbase_gpuprops_update_core_props_gpu_id( + struct base_gpu_props * const gpu_props) { gpu_props->core_props.version_status = KBASE_UBFX32(gpu_props->raw_props.gpu_id, 0U, 4); @@ -185,13 +184,14 @@ void kbase_gpuprops_update_core_props_gpu_id(base_gpu_props * const gpu_props) /** * kbase_gpuprops_calculate_props - Calculate the derived properties - * @gpu_props: The &base_gpu_props structure + * @gpu_props: The &struct base_gpu_props structure * @kbdev: The &struct kbase_device structure for the device * - * Fill the &base_gpu_props structure with values derived from the GPU + * Fill the &struct base_gpu_props structure with values derived from the GPU * configuration registers */ -static void kbase_gpuprops_calculate_props(base_gpu_props * const gpu_props, struct kbase_device *kbdev) +static void kbase_gpuprops_calculate_props( + struct base_gpu_props * const gpu_props, struct kbase_device *kbdev) { int i; u32 gpu_id; @@ -247,8 +247,8 @@ static void kbase_gpuprops_calculate_props(base_gpu_props * const gpu_props, str gpu_props->thread_props.tls_alloc = gpu_props->raw_props.thread_tls_alloc; - /* Workaround for GPU2019HW-509. MIDHARC-2364 was wrongfully applied - * to tDUx GPUs. + /* MIDHARC-2364 was intended for tULx. + * Workaround for the incorrectly applied THREAD_FEATURES to tDUx. */ gpu_id = kbdev->gpu_props.props.raw_props.gpu_id; product_id = gpu_id & GPU_ID_VERSION_PRODUCT_ID; @@ -320,15 +320,18 @@ void kbase_gpuprops_set(struct kbase_device *kbdev) gpu_props->num_job_slots = hweight32(raw->js_present); } -void kbase_gpuprops_set_features(struct kbase_device *kbdev) +int kbase_gpuprops_set_features(struct kbase_device *kbdev) { - base_gpu_props *gpu_props; + struct base_gpu_props *gpu_props; struct kbase_gpuprops_regdump regdump; + int err; gpu_props = &kbdev->gpu_props.props; /* Dump relevant registers */ - kbase_backend_gpuprops_get_features(kbdev, ®dump); + err = kbase_backend_gpuprops_get_features(kbdev, ®dump); + if (err) + return err; /* * Copy the raw value from the register, later this will get turned @@ -340,6 +343,8 @@ void kbase_gpuprops_set_features(struct kbase_device *kbdev) if (!kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_THREAD_GROUP_SPLIT)) gpu_props->thread_props.max_thread_group_split = 0; + + return err; } /* @@ -391,15 +396,17 @@ static bool kbase_read_l2_config_from_dt(struct kbase_device * const kbdev) return false; } -void kbase_gpuprops_update_l2_features(struct kbase_device *kbdev) +int kbase_gpuprops_update_l2_features(struct kbase_device *kbdev) { + int err = 0; + if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_L2_CONFIG)) { struct kbase_gpuprops_regdump regdump; - base_gpu_props *gpu_props = &kbdev->gpu_props.props; + struct base_gpu_props *gpu_props = &kbdev->gpu_props.props; /* Check for L2 cache size & hash overrides */ if (!kbase_read_l2_config_from_dt(kbdev)) - return; + return 0; /* Need L2 to get powered to reflect to L2_FEATURES */ kbase_pm_context_active(kbdev); @@ -408,7 +415,9 @@ void kbase_gpuprops_update_l2_features(struct kbase_device *kbdev) kbase_pm_wait_for_l2_powered(kbdev); /* Dump L2_FEATURES register */ - kbase_backend_gpuprops_get_l2_features(kbdev, ®dump); + err = kbase_backend_gpuprops_get_l2_features(kbdev, ®dump); + if (err) + goto idle_gpu; dev_info(kbdev->dev, "Reflected L2_FEATURES is 0x%x\n", regdump.l2_features); @@ -418,9 +427,12 @@ void kbase_gpuprops_update_l2_features(struct kbase_device *kbdev) gpu_props->l2_props.log2_cache_size = KBASE_UBFX32(gpu_props->raw_props.l2_features, 16U, 8); +idle_gpu: /* Let GPU idle */ kbase_pm_context_idle(kbdev); } + + return err; } static struct { @@ -600,3 +612,25 @@ int kbase_gpuprops_populate_user_buffer(struct kbase_device *kbdev) return 0; } + +void kbase_gpuprops_free_user_buffer(struct kbase_device *kbdev) +{ + kfree(kbdev->gpu_props.prop_buffer); +} + +int kbase_device_populate_max_freq(struct kbase_device *kbdev) +{ + struct mali_base_gpu_core_props *core_props; + + /* obtain max configured gpu frequency, if devfreq is enabled then + * this will be overridden by the highest operating point found + */ + core_props = &(kbdev->gpu_props.props.core_props); +#ifdef GPU_FREQ_KHZ_MAX + core_props->gpu_freq_khz_max = GPU_FREQ_KHZ_MAX; +#else + core_props->gpu_freq_khz_max = DEFAULT_GPU_FREQ_KHZ_MAX; +#endif + + return 0; +} diff --git a/drivers/gpu/arm/bifrost/mali_kbase_gpuprops.h b/drivers/gpu/arm/bifrost/mali_kbase_gpuprops.h index 8edba4868ab4..5eee7948381a 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_gpuprops.h +++ b/drivers/gpu/arm/bifrost/mali_kbase_gpuprops.h @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2011-2015,2017,2019 ARM Limited. All rights reserved. + * (C) COPYRIGHT ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -18,6 +18,20 @@ * * SPDX-License-Identifier: GPL-2.0 * + *//* SPDX-License-Identifier: GPL-2.0 */ +/* + * + * (C) COPYRIGHT 2011-2015, 2017, 2019-2020 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * */ @@ -35,6 +49,20 @@ /* Forward definition - see mali_kbase.h */ struct kbase_device; +/** + * KBASE_UBFX32 - Extracts bits from a 32-bit bitfield. + * @value: The value from which to extract bits. + * @offset: The first bit to extract (0 being the LSB). + * @size: The number of bits to extract. + * + * Context: @offset + @size <= 32. + * + * Return: Bits [@offset, @offset + @size) from @value. + */ +/* from mali_cdsb.h */ +#define KBASE_UBFX32(value, offset, size) \ + (((u32)(value) >> (u32)(offset)) & (u32)((1ULL << (u32)(size)) - 1)) + /** * @brief Set up Kbase GPU properties. * @@ -51,26 +79,48 @@ void kbase_gpuprops_set(struct kbase_device *kbdev); * This function sets up GPU properties that are dependent on the hardware * features bitmask. This function must be preceeded by a call to * kbase_hw_set_features_mask(). + * + * Return: Zero on success, Linux error code on failure */ -void kbase_gpuprops_set_features(struct kbase_device *kbdev); +int kbase_gpuprops_set_features(struct kbase_device *kbdev); /** * kbase_gpuprops_update_l2_features - Update GPU property of L2_FEATURES * @kbdev: Device pointer * * This function updates l2_features and the log2 cache size. + * + * Return: Zero on success, Linux error code for failure */ -void kbase_gpuprops_update_l2_features(struct kbase_device *kbdev); +int kbase_gpuprops_update_l2_features(struct kbase_device *kbdev); /** * kbase_gpuprops_populate_user_buffer - Populate the GPU properties buffer * @kbdev: The kbase device * - * Fills kbdev->gpu_props->prop_buffer with the GPU properties for user - * space to read. + * Fills prop_buffer with the GPU properties for user space to read. */ int kbase_gpuprops_populate_user_buffer(struct kbase_device *kbdev); +/** + * kbase_gpuprops_free_user_buffer - Free the GPU properties buffer. + * @kbdev: kbase device pointer + * + * Free the GPU properties buffer allocated from + * kbase_gpuprops_populate_user_buffer. + */ +void kbase_gpuprops_free_user_buffer(struct kbase_device *kbdev); + +/** + * kbase_device_populate_max_freq - Populate max gpu frequency. + * @kbdev: kbase device pointer + * + * Populate the maximum gpu frequency to be used when devfreq is disabled. + * + * Return: 0 on success and non-zero value on failure. + */ +int kbase_device_populate_max_freq(struct kbase_device *kbdev); + /** * kbase_gpuprops_update_core_props_gpu_id - break down gpu id value * @gpu_props: the &base_gpu_props structure @@ -79,7 +129,7 @@ int kbase_gpuprops_populate_user_buffer(struct kbase_device *kbdev); * separate fields (version_status, minor_revision, major_revision, product_id) * stored in base_gpu_props::core_props. */ -void kbase_gpuprops_update_core_props_gpu_id(base_gpu_props * const gpu_props); - +void kbase_gpuprops_update_core_props_gpu_id( + struct base_gpu_props * const gpu_props); #endif /* _KBASE_GPUPROPS_H_ */ diff --git a/drivers/gpu/arm/bifrost/mali_kbase_gpuprops_types.h b/drivers/gpu/arm/bifrost/mali_kbase_gpuprops_types.h index d7877d1d4a57..ec6f1c39ccb0 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_gpuprops_types.h +++ b/drivers/gpu/arm/bifrost/mali_kbase_gpuprops_types.h @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2011-2018 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2011-2018, 2020 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -89,7 +89,7 @@ struct kbase_gpu_props { struct kbase_gpu_mmu_props mmu; /* Properties shared with userspace */ - base_gpu_props props; + struct base_gpu_props props; u32 prop_buffer_size; void *prop_buffer; diff --git a/drivers/gpu/arm/bifrost/mali_kbase_gwt.c b/drivers/gpu/arm/bifrost/mali_kbase_gwt.c index 75a0820d5560..6a47c9dd3610 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_gwt.c +++ b/drivers/gpu/arm/bifrost/mali_kbase_gwt.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2010-2019 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010-2020 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software diff --git a/drivers/gpu/arm/bifrost/mali_kbase_hw.c b/drivers/gpu/arm/bifrost/mali_kbase_hw.c index c277c0c97f29..f8a9248e3c06 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_hw.c +++ b/drivers/gpu/arm/bifrost/mali_kbase_hw.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2012-2019 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2012-2020 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -28,7 +28,7 @@ #include #include -#include +#include "gpu/mali_kbase_gpu_regmap.h" #include "mali_kbase.h" #include "mali_kbase_hw.h" @@ -68,9 +68,6 @@ void kbase_hw_set_features_mask(struct kbase_device *kbdev) case GPU_ID2_PRODUCT_TBEX: features = base_hw_features_tBEx; break; - case GPU_ID2_PRODUCT_TULX: - features = base_hw_features_tULx; - break; case GPU_ID2_PRODUCT_TDUX: features = base_hw_features_tDUx; break; @@ -78,12 +75,20 @@ void kbase_hw_set_features_mask(struct kbase_device *kbdev) case GPU_ID2_PRODUCT_LODX: features = base_hw_features_tODx; break; - case GPU_ID2_PRODUCT_TIDX: - features = base_hw_features_tIDx; + case GPU_ID2_PRODUCT_TGRX: + features = base_hw_features_tGRx; break; case GPU_ID2_PRODUCT_TVAX: features = base_hw_features_tVAx; break; + case GPU_ID2_PRODUCT_TTUX: + /* Fallthrough */ + case GPU_ID2_PRODUCT_LTUX: + features = base_hw_features_tTUx; + break; + case GPU_ID2_PRODUCT_TE2X: + features = base_hw_features_tE2x; + break; default: features = base_hw_features_generic; break; @@ -92,11 +97,11 @@ void kbase_hw_set_features_mask(struct kbase_device *kbdev) for (; *features != BASE_HW_FEATURE_END; features++) set_bit(*features, &kbdev->hw_features_mask[0]); -#if defined(CONFIG_MALI_JOB_DUMP) || defined(CONFIG_MALI_VECTOR_DUMP) +#if defined(CONFIG_MALI_VECTOR_DUMP) /* When dumping is enabled, need to disable flush reduction optimization * for GPUs on which it is safe to have only cache clean operation at * the end of job chain. - * This is required to make job dumping work. There is some discrepancy + * This is required to make vector dump work. There is some discrepancy * in the implementation of flush reduction optimization due to * unclear or ambiguous ARCH spec. */ @@ -177,6 +182,7 @@ static const enum base_hw_issue *kbase_hw_get_issues_for_new_id( {GPU_ID2_VERSION_MAKE(0, 0, 3), base_hw_issues_tTRx_r0p0}, {GPU_ID2_VERSION_MAKE(0, 1, 0), base_hw_issues_tTRx_r0p1}, {GPU_ID2_VERSION_MAKE(0, 1, 1), base_hw_issues_tTRx_r0p1}, + {GPU_ID2_VERSION_MAKE(0, 2, 0), base_hw_issues_tTRx_r0p2}, {U32_MAX, NULL} } }, {GPU_ID2_PRODUCT_TNAX, @@ -189,19 +195,17 @@ static const enum base_hw_issue *kbase_hw_get_issues_for_new_id( {U32_MAX, NULL} } }, {GPU_ID2_PRODUCT_LBEX, - {{GPU_ID2_VERSION_MAKE(1, 0, 0), base_hw_issues_tBEx_r1p0}, + {{GPU_ID2_VERSION_MAKE(1, 0, 0), base_hw_issues_lBEx_r1p0}, + {GPU_ID2_VERSION_MAKE(1, 1, 0), base_hw_issues_lBEx_r1p1}, {U32_MAX, NULL} } }, {GPU_ID2_PRODUCT_TBEX, {{GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tBEx_r0p0}, {GPU_ID2_VERSION_MAKE(0, 0, 3), base_hw_issues_tBEx_r0p0}, + {GPU_ID2_VERSION_MAKE(0, 1, 0), base_hw_issues_tBEx_r0p1}, {GPU_ID2_VERSION_MAKE(1, 0, 0), base_hw_issues_tBEx_r1p0}, {U32_MAX, NULL} } }, - {GPU_ID2_PRODUCT_TULX, - {{GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tULx_r0p0}, - {U32_MAX, NULL} } }, - {GPU_ID2_PRODUCT_TDUX, {{GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tDUx_r0p0}, {U32_MAX, NULL} } }, @@ -214,13 +218,25 @@ static const enum base_hw_issue *kbase_hw_get_issues_for_new_id( {{GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tODx_r0p0}, {U32_MAX, NULL} } }, - {GPU_ID2_PRODUCT_TIDX, - {{GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tIDx_r0p0}, + {GPU_ID2_PRODUCT_TGRX, + {{GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tGRx_r0p0}, {U32_MAX, NULL} } }, {GPU_ID2_PRODUCT_TVAX, {{GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tVAx_r0p0}, {U32_MAX, NULL} } }, + + {GPU_ID2_PRODUCT_TTUX, + {{GPU_ID2_VERSION_MAKE(2, 0, 0), base_hw_issues_tTUx_r0p0}, + {U32_MAX, NULL} } }, + + {GPU_ID2_PRODUCT_LTUX, + {{GPU_ID2_VERSION_MAKE(3, 0, 0), base_hw_issues_tTUx_r0p0}, + {U32_MAX, NULL} } }, + + {GPU_ID2_PRODUCT_TE2X, + {{GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tE2x_r0p0}, + {U32_MAX, NULL} } }, }; u32 gpu_id = kbdev->gpu_props.props.raw_props.gpu_id; @@ -358,9 +374,6 @@ int kbase_hw_set_issues_mask(struct kbase_device *kbdev) case GPU_ID2_PRODUCT_TBEX: issues = base_hw_issues_model_tBEx; break; - case GPU_ID2_PRODUCT_TULX: - issues = base_hw_issues_model_tULx; - break; case GPU_ID2_PRODUCT_TDUX: issues = base_hw_issues_model_tDUx; break; @@ -368,12 +381,19 @@ int kbase_hw_set_issues_mask(struct kbase_device *kbdev) case GPU_ID2_PRODUCT_LODX: issues = base_hw_issues_model_tODx; break; - case GPU_ID2_PRODUCT_TIDX: - issues = base_hw_issues_model_tIDx; + case GPU_ID2_PRODUCT_TGRX: + issues = base_hw_issues_model_tGRx; break; case GPU_ID2_PRODUCT_TVAX: issues = base_hw_issues_model_tVAx; break; + case GPU_ID2_PRODUCT_TTUX: + case GPU_ID2_PRODUCT_LTUX: + issues = base_hw_issues_model_tTUx; + break; + case GPU_ID2_PRODUCT_TE2X: + issues = base_hw_issues_model_tE2x; + break; default: dev_err(kbdev->dev, "Unknown GPU ID %x", gpu_id); diff --git a/drivers/gpu/arm/bifrost/mali_kbase_hwaccess_backend.h b/drivers/gpu/arm/bifrost/mali_kbase_hwaccess_backend.h index d5e3d3abdc9e..89df2519ab97 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_hwaccess_backend.h +++ b/drivers/gpu/arm/bifrost/mali_kbase_hwaccess_backend.h @@ -28,34 +28,6 @@ #ifndef _KBASE_HWACCESS_BACKEND_H_ #define _KBASE_HWACCESS_BACKEND_H_ -/** - * kbase_backend_early_init - Perform any backend-specific initialization. - * @kbdev: Device pointer - * - * Return: 0 on success, or an error code on failure. - */ -int kbase_backend_early_init(struct kbase_device *kbdev); - -/** - * kbase_backend_late_init - Perform any backend-specific initialization. - * @kbdev: Device pointer - * - * Return: 0 on success, or an error code on failure. - */ -int kbase_backend_late_init(struct kbase_device *kbdev); - -/** - * kbase_backend_early_term - Perform any backend-specific termination. - * @kbdev: Device pointer - */ -void kbase_backend_early_term(struct kbase_device *kbdev); - -/** - * kbase_backend_late_term - Perform any backend-specific termination. - * @kbdev: Device pointer - */ -void kbase_backend_late_term(struct kbase_device *kbdev); - /** * kbase_backend_devfreq_init - Perform backend devfreq related initialization. * @kbdev: Device pointer diff --git a/drivers/gpu/arm/bifrost/mali_kbase_hwaccess_gpuprops.h b/drivers/gpu/arm/bifrost/mali_kbase_hwaccess_gpuprops.h index 62628b612036..3ae0dbe6886d 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_hwaccess_gpuprops.h +++ b/drivers/gpu/arm/bifrost/mali_kbase_hwaccess_gpuprops.h @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2014-2015, 2018, 2019 ARM Limited. All rights reserved. + * (C) COPYRIGHT ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -18,6 +18,20 @@ * * SPDX-License-Identifier: GPL-2.0 * + *//* SPDX-License-Identifier: GPL-2.0 */ +/* + * + * (C) COPYRIGHT 2014-2015, 2018, 2019-2020 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * */ @@ -35,8 +49,10 @@ * @regdump: Pointer to struct kbase_gpuprops_regdump structure * * The caller should ensure that GPU remains powered-on during this function. + * + * Return: Zero for succeess or a Linux error code */ -void kbase_backend_gpuprops_get(struct kbase_device *kbdev, +int kbase_backend_gpuprops_get(struct kbase_device *kbdev, struct kbase_gpuprops_regdump *regdump); /** @@ -47,8 +63,10 @@ void kbase_backend_gpuprops_get(struct kbase_device *kbdev, * * This function reads GPU properties that are dependent on the hardware * features bitmask. It will power-on the GPU if required. + * + * Return: Zero for succeess or a Linux error code */ -void kbase_backend_gpuprops_get_features(struct kbase_device *kbdev, +int kbase_backend_gpuprops_get_features(struct kbase_device *kbdev, struct kbase_gpuprops_regdump *regdump); /** @@ -59,8 +77,10 @@ void kbase_backend_gpuprops_get_features(struct kbase_device *kbdev, * * This function reads L2_FEATURES register that is dependent on the hardware * features bitmask. It will power-on the GPU if required. + * + * Return: Zero on success, Linux error code on failure */ -void kbase_backend_gpuprops_get_l2_features(struct kbase_device *kbdev, +int kbase_backend_gpuprops_get_l2_features(struct kbase_device *kbdev, struct kbase_gpuprops_regdump *regdump); diff --git a/drivers/gpu/arm/bifrost/mali_kbase_hwaccess_instr.h b/drivers/gpu/arm/bifrost/mali_kbase_hwaccess_instr.h index d5b90994790b..be85491c18d9 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_hwaccess_instr.h +++ b/drivers/gpu/arm/bifrost/mali_kbase_hwaccess_instr.h @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2014-2015, 2017-2018 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2015, 2017-2018, 2020 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -139,4 +139,13 @@ int kbase_instr_backend_init(struct kbase_device *kbdev); */ void kbase_instr_backend_term(struct kbase_device *kbdev); +#ifdef CONFIG_MALI_PRFCNT_SET_SECONDARY_VIA_DEBUG_FS +/** + * kbase_instr_backend_debugfs_init() - Add a debugfs entry for the + * hardware counter set. + * @kbdev: kbase device + */ +void kbase_instr_backend_debugfs_init(struct kbase_device *kbdev); +#endif + #endif /* _KBASE_HWACCESS_INSTR_H_ */ diff --git a/drivers/gpu/arm/bifrost/mali_kbase_hwaccess_jm.h b/drivers/gpu/arm/bifrost/mali_kbase_hwaccess_jm.h index cfda5c4b8129..3d5934e0e0a1 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_hwaccess_jm.h +++ b/drivers/gpu/arm/bifrost/mali_kbase_hwaccess_jm.h @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2014-2019 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2020 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -299,9 +299,4 @@ void kbase_job_slot_hardstop(struct kbase_context *kctx, int js, */ bool kbase_gpu_atoms_submitted_any(struct kbase_device *kbdev); -/* Object containing callbacks for enabling/disabling protected mode, used - * on GPU which supports protected mode switching natively. - */ -extern struct protected_mode_ops kbase_native_protected_ops; - #endif /* _KBASE_HWACCESS_JM_H_ */ diff --git a/drivers/gpu/arm/bifrost/mali_kbase_hwaccess_pm.h b/drivers/gpu/arm/bifrost/mali_kbase_hwaccess_pm.h index 96c473ac94ef..bbaf6eaf8d88 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_hwaccess_pm.h +++ b/drivers/gpu/arm/bifrost/mali_kbase_hwaccess_pm.h @@ -29,7 +29,7 @@ #ifndef _KBASE_HWACCESS_PM_H_ #define _KBASE_HWACCESS_PM_H_ -#include +#include #include #include @@ -208,4 +208,22 @@ void kbase_pm_set_policy(struct kbase_device *kbdev, int kbase_pm_list_policies(struct kbase_device *kbdev, const struct kbase_pm_policy * const **list); +/** + * kbase_protected_most_enable - Enable protected mode + * + * @kbdev: Address of the instance of a GPU platform device. + * + * Return: Zero on success or an error code + */ +int kbase_pm_protected_mode_enable(struct kbase_device *kbdev); + +/** + * kbase_protected_mode_disable - Disable protected mode + * + * @kbdev: Address of the instance of a GPU platform device. + * + * Return: Zero on success or an error code + */ +int kbase_pm_protected_mode_disable(struct kbase_device *kbdev); + #endif /* _KBASE_HWACCESS_PM_H_ */ diff --git a/drivers/gpu/arm/bifrost/mali_kbase_hwaccess_time.h b/drivers/gpu/arm/bifrost/mali_kbase_hwaccess_time.h index ca0cd797e5a2..a61e5b9b3e0a 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_hwaccess_time.h +++ b/drivers/gpu/arm/bifrost/mali_kbase_hwaccess_time.h @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2014,2018-2019 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014,2018-2020 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -37,6 +37,6 @@ * time in */ void kbase_backend_get_gpu_time(struct kbase_device *kbdev, u64 *cycle_counter, - u64 *system_time, struct timespec *ts); + u64 *system_time, struct timespec64 *ts); #endif /* _KBASE_BACKEND_TIME_H_ */ diff --git a/drivers/gpu/arm/bifrost/mali_kbase_hwcnt.c b/drivers/gpu/arm/bifrost/mali_kbase_hwcnt.c index 265fc2138bb5..14ec5cb1c0d3 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_hwcnt.c +++ b/drivers/gpu/arm/bifrost/mali_kbase_hwcnt.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2018 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2018, 2020 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -683,21 +683,14 @@ bool kbase_hwcnt_context_disable_atomic(struct kbase_hwcnt_context *hctx) if (!WARN_ON(hctx->disable_count == SIZE_MAX)) { /* - * If disable count is non-zero or no counters are enabled, we - * can just bump the disable count. + * If disable count is non-zero, we can just bump the disable + * count. * * Otherwise, we can't disable in an atomic context. */ if (hctx->disable_count != 0) { hctx->disable_count++; atomic_disabled = true; - } else { - WARN_ON(!hctx->accum_inited); - if (!hctx->accum.enable_map_any_enabled) { - hctx->disable_count++; - hctx->accum.state = ACCUM_STATE_DISABLED; - atomic_disabled = true; - } } } diff --git a/drivers/gpu/arm/bifrost/mali_kbase_hwcnt_backend_gpu.c b/drivers/gpu/arm/bifrost/mali_kbase_hwcnt_backend_gpu.c index ec8cff34a763..1fcff38b48dd 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_hwcnt_backend_gpu.c +++ b/drivers/gpu/arm/bifrost/mali_kbase_hwcnt_backend_gpu.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2018-2019 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2018-2020 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -30,6 +30,7 @@ #include "backend/gpu/mali_kbase_model_dummy.h" #endif + /** * struct kbase_hwcnt_backend_gpu_info - Information used to create an instance * of a GPU hardware counter backend. @@ -72,11 +73,8 @@ struct kbase_hwcnt_backend_gpu { static u64 kbasep_hwcnt_backend_gpu_timestamp_ns( struct kbase_hwcnt_backend *backend) { - struct timespec ts; - (void)backend; - getrawmonotonic(&ts); - return (u64)ts.tv_sec * NSEC_PER_SEC + ts.tv_nsec; + return ktime_get_raw_ns(); } /* GPU backend implementation of kbase_hwcnt_backend_dump_enable_nolock_fn */ @@ -324,6 +322,7 @@ static int kbasep_hwcnt_backend_gpu_create( const struct kbase_hwcnt_backend_gpu_info *info, struct kbase_hwcnt_backend_gpu **out_backend) { + int errcode; struct kbase_device *kbdev; struct kbase_hwcnt_backend_gpu *backend = NULL; diff --git a/drivers/gpu/arm/bifrost/mali_kbase_ioctl.h b/drivers/gpu/arm/bifrost/mali_kbase_ioctl.h index 9694d620fada..977b194eb9c4 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_ioctl.h +++ b/drivers/gpu/arm/bifrost/mali_kbase_ioctl.h @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2017-2019 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2017-2020 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -30,62 +30,9 @@ extern "C" { #include #include -#define KBASE_IOCTL_TYPE 0x80 +#include "jm/mali_kbase_jm_ioctl.h" -/* - * 11.1: - * - Add BASE_MEM_TILER_ALIGN_TOP under base_mem_alloc_flags - * 11.2: - * - KBASE_MEM_QUERY_FLAGS can return KBASE_REG_PF_GROW and KBASE_REG_PROTECTED, - * which some user-side clients prior to 11.2 might fault if they received - * them - * 11.3: - * - New ioctls KBASE_IOCTL_STICKY_RESOURCE_MAP and - * KBASE_IOCTL_STICKY_RESOURCE_UNMAP - * 11.4: - * - New ioctl KBASE_IOCTL_MEM_FIND_GPU_START_AND_OFFSET - * 11.5: - * - New ioctl: KBASE_IOCTL_MEM_JIT_INIT (old ioctl renamed to _OLD) - * 11.6: - * - Added flags field to base_jit_alloc_info structure, which can be used to - * specify pseudo chunked tiler alignment for JIT allocations. - * 11.7: - * - Removed UMP support - * 11.8: - * - Added BASE_MEM_UNCACHED_GPU under base_mem_alloc_flags - * 11.9: - * - Added BASE_MEM_PERMANENT_KERNEL_MAPPING and BASE_MEM_FLAGS_KERNEL_ONLY - * under base_mem_alloc_flags - * 11.10: - * - Enabled the use of nr_extres field of base_jd_atom_v2 structure for - * JIT_ALLOC and JIT_FREE type softjobs to enable multiple JIT allocations - * with one softjob. - * 11.11: - * - Added BASE_MEM_GPU_VA_SAME_4GB_PAGE under base_mem_alloc_flags - * 11.12: - * - Removed ioctl: KBASE_IOCTL_GET_PROFILING_CONTROLS - * 11.13: - * - New ioctl: KBASE_IOCTL_MEM_EXEC_INIT - * 11.14: - * - Add BASE_MEM_GROUP_ID_MASK, base_mem_group_id_get, base_mem_group_id_set - * under base_mem_alloc_flags - * 11.15: - * - Added BASEP_CONTEXT_MMU_GROUP_ID_MASK under base_context_create_flags. - * - Require KBASE_IOCTL_SET_FLAGS before BASE_MEM_MAP_TRACKING_HANDLE can be - * passed to mmap(). - * 11.16: - * - Extended ioctl KBASE_IOCTL_MEM_SYNC to accept imported dma-buf. - * - Modified (backwards compatible) ioctl KBASE_IOCTL_MEM_IMPORT behavior for - * dma-buf. Now, buffers are mapped on GPU when first imported, no longer - * requiring external resource or sticky resource tracking. UNLESS, - * CONFIG_MALI_DMA_BUF_MAP_ON_DEMAND is enabled. - * 11.17: - * - Added BASE_JD_REQ_JOB_SLOT. - * - Reused padding field in base_jd_atom_v2 to pass job slot number. - * - New ioctl: KBASE_IOCTL_GET_CPU_GPU_TIMEINFO - */ -#define BASE_UK_VERSION_MAJOR 11 -#define BASE_UK_VERSION_MINOR 17 +#define KBASE_IOCTL_TYPE 0x80 /** * struct kbase_ioctl_version_check - Check version compatibility with kernel @@ -113,22 +60,6 @@ struct kbase_ioctl_set_flags { #define KBASE_IOCTL_SET_FLAGS \ _IOW(KBASE_IOCTL_TYPE, 1, struct kbase_ioctl_set_flags) -/** - * struct kbase_ioctl_job_submit - Submit jobs/atoms to the kernel - * - * @addr: Memory address of an array of struct base_jd_atom_v2 - * @nr_atoms: Number of entries in the array - * @stride: sizeof(struct base_jd_atom_v2) - */ -struct kbase_ioctl_job_submit { - __u64 addr; - __u32 nr_atoms; - __u32 stride; -}; - -#define KBASE_IOCTL_JOB_SUBMIT \ - _IOW(KBASE_IOCTL_TYPE, 2, struct kbase_ioctl_job_submit) - /** * struct kbase_ioctl_get_gpuprops - Read GPU properties from the kernel * @@ -164,9 +95,6 @@ struct kbase_ioctl_get_gpuprops { #define KBASE_IOCTL_GET_GPUPROPS \ _IOW(KBASE_IOCTL_TYPE, 3, struct kbase_ioctl_get_gpuprops) -#define KBASE_IOCTL_POST_TERM \ - _IO(KBASE_IOCTL_TYPE, 4) - /** * union kbase_ioctl_mem_alloc - Allocate memory on the GPU * @@ -330,8 +258,9 @@ struct kbase_ioctl_get_ddk_version { _IOW(KBASE_IOCTL_TYPE, 13, struct kbase_ioctl_get_ddk_version) /** - * struct kbase_ioctl_mem_jit_init_old - Initialise the JIT memory allocator - * + * struct kbase_ioctl_mem_jit_init_10_2 - Initialize the just-in-time memory + * allocator (between kernel driver + * version 10.2--11.4) * @va_pages: Number of VA pages to reserve for JIT * * Note that depending on the VA size of the application and GPU, the value @@ -340,16 +269,17 @@ struct kbase_ioctl_get_ddk_version { * New code should use KBASE_IOCTL_MEM_JIT_INIT instead, this is kept for * backwards compatibility. */ -struct kbase_ioctl_mem_jit_init_old { +struct kbase_ioctl_mem_jit_init_10_2 { __u64 va_pages; }; -#define KBASE_IOCTL_MEM_JIT_INIT_OLD \ - _IOW(KBASE_IOCTL_TYPE, 14, struct kbase_ioctl_mem_jit_init_old) +#define KBASE_IOCTL_MEM_JIT_INIT_10_2 \ + _IOW(KBASE_IOCTL_TYPE, 14, struct kbase_ioctl_mem_jit_init_10_2) /** - * struct kbase_ioctl_mem_jit_init - Initialise the JIT memory allocator - * + * struct kbase_ioctl_mem_jit_init_11_5 - Initialize the just-in-time memory + * allocator (between kernel driver + * version 11.5--11.19) * @va_pages: Number of VA pages to reserve for JIT * @max_allocations: Maximum number of concurrent allocations * @trim_level: Level of JIT allocation trimming to perform on free (0 - 100%) @@ -358,6 +288,34 @@ struct kbase_ioctl_mem_jit_init_old { * * Note that depending on the VA size of the application and GPU, the value * specified in @va_pages may be ignored. + * + * New code should use KBASE_IOCTL_MEM_JIT_INIT instead, this is kept for + * backwards compatibility. + */ +struct kbase_ioctl_mem_jit_init_11_5 { + __u64 va_pages; + __u8 max_allocations; + __u8 trim_level; + __u8 group_id; + __u8 padding[5]; +}; + +#define KBASE_IOCTL_MEM_JIT_INIT_11_5 \ + _IOW(KBASE_IOCTL_TYPE, 14, struct kbase_ioctl_mem_jit_init_11_5) + +/** + * struct kbase_ioctl_mem_jit_init - Initialize the just-in-time memory + * allocator + * @va_pages: Number of GPU virtual address pages to reserve for just-in-time + * memory allocations + * @max_allocations: Maximum number of concurrent allocations + * @trim_level: Level of JIT allocation trimming to perform on free (0 - 100%) + * @group_id: Group ID to be used for physical allocations + * @padding: Currently unused, must be zero + * @phys_pages: Maximum number of physical pages to allocate just-in-time + * + * Note that depending on the VA size of the application and GPU, the value + * specified in @va_pages may be ignored. */ struct kbase_ioctl_mem_jit_init { __u64 va_pages; @@ -365,6 +323,7 @@ struct kbase_ioctl_mem_jit_init { __u8 trim_level; __u8 group_id; __u8 padding[5]; + __u64 phys_pages; }; #define KBASE_IOCTL_MEM_JIT_INIT \ @@ -582,21 +541,6 @@ struct kbase_ioctl_mem_profile_add { #define KBASE_IOCTL_MEM_PROFILE_ADD \ _IOW(KBASE_IOCTL_TYPE, 27, struct kbase_ioctl_mem_profile_add) -/** - * struct kbase_ioctl_soft_event_update - Update the status of a soft-event - * @event: GPU address of the event which has been updated - * @new_status: The new status to set - * @flags: Flags for future expansion - */ -struct kbase_ioctl_soft_event_update { - __u64 event; - __u32 new_status; - __u32 flags; -}; - -#define KBASE_IOCTL_SOFT_EVENT_UPDATE \ - _IOW(KBASE_IOCTL_TYPE, 28, struct kbase_ioctl_soft_event_update) - /** * struct kbase_ioctl_sticky_resource_map - Permanently map an external resource * @count: Number of resources @@ -693,7 +637,6 @@ union kbase_ioctl_cinstr_gwt_dump { #define KBASE_IOCTL_CINSTR_GWT_DUMP \ _IOWR(KBASE_IOCTL_TYPE, 35, union kbase_ioctl_cinstr_gwt_dump) - /** * struct kbase_ioctl_mem_exec_init - Initialise the EXEC_VA memory zone * @@ -706,7 +649,6 @@ struct kbase_ioctl_mem_exec_init { #define KBASE_IOCTL_MEM_EXEC_INIT \ _IOW(KBASE_IOCTL_TYPE, 38, struct kbase_ioctl_mem_exec_init) - /** * union kbase_ioctl_get_cpu_gpu_timeinfo - Request zero or more types of * cpu/gpu time (counter values) @@ -740,7 +682,6 @@ union kbase_ioctl_get_cpu_gpu_timeinfo { #define KBASE_IOCTL_GET_CPU_GPU_TIMEINFO \ _IOWR(KBASE_IOCTL_TYPE, 50, union kbase_ioctl_get_cpu_gpu_timeinfo) - /*************** * test ioctls * ***************/ @@ -782,38 +723,7 @@ struct kbase_ioctl_tlstream_stats { #define KBASE_IOCTL_TLSTREAM_STATS \ _IOR(KBASE_IOCTL_TEST_TYPE, 2, struct kbase_ioctl_tlstream_stats) -/** - * struct kbase_ioctl_cs_event_memory_write - Write an event memory address - * @cpu_addr: Memory address to write - * @value: Value to write - * @padding: Currently unused, must be zero - */ -struct kbase_ioctl_cs_event_memory_write { - __u64 cpu_addr; - __u8 value; - __u8 padding[7]; -}; - -/** - * union kbase_ioctl_cs_event_memory_read - Read an event memory address - * @cpu_addr: Memory address to read - * @value: Value read - * @padding: Currently unused, must be zero - * - * @in: Input parameters - * @out: Output parameters - */ -union kbase_ioctl_cs_event_memory_read { - struct { - __u64 cpu_addr; - } in; - struct { - __u8 value; - __u8 padding[7]; - } out; -}; - -#endif +#endif /* MALI_UNIT_TEST */ /* Customer extension range */ #define KBASE_IOCTL_EXTRA_TYPE (KBASE_IOCTL_TYPE + 2) diff --git a/drivers/gpu/arm/bifrost/mali_kbase_jd.c b/drivers/gpu/arm/bifrost/mali_kbase_jd.c index 77cfa2fc3699..935a7bf9a1e9 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_jd.c +++ b/drivers/gpu/arm/bifrost/mali_kbase_jd.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2010-2019 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010-2020 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -33,9 +33,11 @@ #include #include -#include +#include +#include #include "mali_kbase_dma_fence.h" +#include #define beenthere(kctx, f, a...) dev_dbg(kctx->kbdev->dev, "%s:" f, __func__, ##a) @@ -49,6 +51,12 @@ #define IS_GPU_ATOM(katom) (!((katom->core_req & BASE_JD_REQ_SOFT_JOB) || \ ((katom->core_req & BASE_JD_REQ_ATOM_TYPE) == \ BASE_JD_REQ_DEP))) + +/* Minimum API version that supports the just-in-time memory allocation pressure + * limit feature. + */ +#define MIN_API_VERSION_WITH_JPL KBASE_API_VERSION(11, 20) + /* * This is the kernel side of the API. Only entry points are: * - kbase_jd_submit(): Called from userspace to submit a single bag @@ -76,36 +84,47 @@ get_compat_pointer(struct kbase_context *kctx, const u64 p) * Note that the caller must also check the atom status and * if it is KBASE_JD_ATOM_STATE_COMPLETED must call jd_done_nolock */ -static int jd_run_atom(struct kbase_jd_atom *katom) +static bool jd_run_atom(struct kbase_jd_atom *katom) { struct kbase_context *kctx = katom->kctx; + dev_dbg(kctx->kbdev->dev, "JD run atom %p in kctx %p\n", + (void *)katom, (void *)kctx); + KBASE_DEBUG_ASSERT(katom->status != KBASE_JD_ATOM_STATE_UNUSED); if ((katom->core_req & BASE_JD_REQ_ATOM_TYPE) == BASE_JD_REQ_DEP) { /* Dependency only atom */ + trace_sysgraph(SGR_SUBMIT, kctx->id, + kbase_jd_atom_id(katom->kctx, katom)); katom->status = KBASE_JD_ATOM_STATE_COMPLETED; + dev_dbg(kctx->kbdev->dev, "Atom %p status to completed\n", + (void *)katom); return 0; } else if (katom->core_req & BASE_JD_REQ_SOFT_JOB) { /* Soft-job */ if (katom->will_fail_event_code) { kbase_finish_soft_job(katom); katom->status = KBASE_JD_ATOM_STATE_COMPLETED; + dev_dbg(kctx->kbdev->dev, + "Atom %p status to completed\n", (void *)katom); return 0; } if (kbase_process_soft_job(katom) == 0) { kbase_finish_soft_job(katom); katom->status = KBASE_JD_ATOM_STATE_COMPLETED; + dev_dbg(kctx->kbdev->dev, + "Atom %p status to completed\n", (void *)katom); } return 0; } katom->status = KBASE_JD_ATOM_STATE_IN_JS; + dev_dbg(kctx->kbdev->dev, "Atom %p status to in JS\n", (void *)katom); /* Queue an action about whether we should try scheduling a context */ return kbasep_js_add_job(kctx, katom); } -#if defined(CONFIG_MALI_BIFROST_DMA_FENCE) void kbase_jd_dep_clear_locked(struct kbase_jd_atom *katom) { struct kbase_device *kbdev; @@ -136,7 +155,6 @@ void kbase_jd_dep_clear_locked(struct kbase_jd_atom *katom) kbase_js_sched_all(kbdev); } } -#endif void kbase_jd_free_external_resources(struct kbase_jd_atom *katom) { @@ -437,6 +455,9 @@ static inline void jd_resolve_dep(struct list_head *out_list, #endif /* CONFIG_MALI_BIFROST_DMA_FENCE */ if (dep_satisfied) { + trace_sysgraph(SGR_DEP_RES, + dep_atom->kctx->id, + kbase_jd_atom_id(katom->kctx, dep_atom)); dep_atom->in_jd_list = true; list_add_tail(&dep_atom->jd_item, out_list); } @@ -444,8 +465,6 @@ static inline void jd_resolve_dep(struct list_head *out_list, } } -KBASE_EXPORT_TEST_API(jd_resolve_dep); - /** * is_dep_valid - Validate that a dependency is valid for early dependency * submission @@ -525,6 +544,10 @@ static void jd_try_submitting_deps(struct list_head *out_list, #endif /* CONFIG_MALI_BIFROST_DMA_FENCE */ if (dep0_valid && dep1_valid && dep_satisfied) { + trace_sysgraph(SGR_DEP_RES, + dep_atom->kctx->id, + kbase_jd_atom_id(dep_atom->kctx, + dep_atom)); dep_atom->in_jd_list = true; list_add(&dep_atom->jd_item, out_list); } @@ -533,6 +556,148 @@ static void jd_try_submitting_deps(struct list_head *out_list, } } +#if MALI_JIT_PRESSURE_LIMIT +/** + * jd_update_jit_usage - Update just-in-time physical memory usage for an atom. + * + * @katom: An atom that has just finished. + * + * Read back actual just-in-time memory region usage from atoms that provide + * this information, and update the current physical page pressure. + * + * The caller must hold the kbase_jd_context.lock. + */ +static void jd_update_jit_usage(struct kbase_jd_atom *katom) +{ + struct kbase_context *kctx = katom->kctx; + struct kbase_va_region *reg; + struct kbase_vmap_struct mapping; + u64 *ptr; + u64 used_pages; + unsigned int idx; + + lockdep_assert_held(&kctx->jctx.lock); + + /* If this atom wrote to JIT memory, find out how much it has written + * and update the usage information in the region. + */ + for (idx = 0; + idx < ARRAY_SIZE(katom->jit_ids) && katom->jit_ids[idx]; + idx++) { + enum heap_pointer { LOW = 0, HIGH, COUNT }; + size_t size_to_read; + u64 read_val; + + reg = kctx->jit_alloc[katom->jit_ids[idx]]; + + if (!reg) { + dev_warn(kctx->kbdev->dev, + "%s: JIT id[%u]=%u has no region\n", + __func__, idx, katom->jit_ids[idx]); + continue; + } + + if (reg == KBASE_RESERVED_REG_JIT_ALLOC) { + dev_warn(kctx->kbdev->dev, + "%s: JIT id[%u]=%u has failed to allocate a region\n", + __func__, idx, katom->jit_ids[idx]); + continue; + } + + if (!reg->heap_info_gpu_addr) + continue; + + size_to_read = sizeof(*ptr); + if (reg->flags & KBASE_REG_HEAP_INFO_IS_SIZE) + size_to_read = sizeof(u32); + else if (reg->flags & KBASE_REG_TILER_ALIGN_TOP) + size_to_read = sizeof(u64[COUNT]); + + ptr = kbase_vmap(kctx, reg->heap_info_gpu_addr, size_to_read, + &mapping); + + if (!ptr) { + dev_warn(kctx->kbdev->dev, + "%s: JIT id[%u]=%u start=0x%llx unable to map end marker %llx\n", + __func__, idx, katom->jit_ids[idx], + reg->start_pfn << PAGE_SHIFT, + reg->heap_info_gpu_addr); + continue; + } + + if (reg->flags & KBASE_REG_HEAP_INFO_IS_SIZE) { + read_val = READ_ONCE(*(u32 *)ptr); + used_pages = PFN_UP(read_val); + } else { + u64 addr_end; + + if (reg->flags & KBASE_REG_TILER_ALIGN_TOP) { + const unsigned long extent_bytes = reg->extent + << PAGE_SHIFT; + const u64 low_ptr = ptr[LOW]; + const u64 high_ptr = ptr[HIGH]; + + /* As either the low or high pointer could + * consume their partition and move onto the + * next chunk, we need to account for both. + * In the case where nothing has been allocated + * from the high pointer the whole chunk could + * be backed unnecessarily - but the granularity + * is the chunk size anyway and any non-zero + * offset of low pointer from the start of the + * chunk would result in the whole chunk being + * backed. + */ + read_val = max(high_ptr, low_ptr); + + /* kbase_check_alloc_sizes() already satisfies + * this, but here to avoid future maintenance + * hazards + */ + WARN_ON(!is_power_of_2(extent_bytes)); + addr_end = ALIGN(read_val, extent_bytes); + } else { + addr_end = read_val = READ_ONCE(*ptr); + } + + if (addr_end >= (reg->start_pfn << PAGE_SHIFT)) + used_pages = PFN_UP(addr_end) - reg->start_pfn; + else + used_pages = reg->used_pages; + } + + trace_mali_jit_report(katom, reg, idx, read_val, used_pages); + kbase_trace_jit_report_gpu_mem(kctx, reg, 0u); + + /* We can never have used more pages than the VA size of the + * region + */ + if (used_pages > reg->nr_pages) { + dev_warn(kctx->kbdev->dev, + "%s: JIT id[%u]=%u start=0x%llx used_pages %llx > %zx (read 0x%llx as %s%s)\n", + __func__, idx, katom->jit_ids[idx], + reg->start_pfn << PAGE_SHIFT, + used_pages, reg->nr_pages, read_val, + (reg->flags & KBASE_REG_HEAP_INFO_IS_SIZE) ? + "size" : "addr", + (reg->flags & KBASE_REG_TILER_ALIGN_TOP) ? + " with align" : ""); + used_pages = reg->nr_pages; + } + /* Note: one real use case has an atom correctly reporting 0 + * pages in use. This happens in normal use-cases but may only + * happen for a few of the application's frames. + */ + + kbase_vunmap(kctx, &mapping); + + kbase_jit_report_update_pressure(kctx, reg, used_pages, 0u); + } + + kbase_jit_retry_pending_alloc(kctx); +} +#endif /* MALI_JIT_PRESSURE_LIMIT */ + /* * Perform the necessary handling of an atom that has finished running * on the GPU. @@ -556,6 +721,10 @@ bool jd_done_nolock(struct kbase_jd_atom *katom, KBASE_DEBUG_ASSERT(katom->status != KBASE_JD_ATOM_STATE_UNUSED); +#if MALI_JIT_PRESSURE_LIMIT + jd_update_jit_usage(katom); +#endif /* MALI_JIT_PRESSURE_LIMIT */ + /* This is needed in case an atom is failed due to being invalid, this * can happen *before* the jobs that the atom depends on have completed */ for (i = 0; i < 2; i++) { @@ -566,6 +735,8 @@ bool jd_done_nolock(struct kbase_jd_atom *katom, } katom->status = KBASE_JD_ATOM_STATE_COMPLETED; + dev_dbg(kctx->kbdev->dev, "Atom %p status to completed\n", + (void *)katom); list_add_tail(&katom->jd_item, &completed_jobs); while (!list_empty(&completed_jobs)) { @@ -588,7 +759,12 @@ bool jd_done_nolock(struct kbase_jd_atom *katom, list_del(runnable_jobs.next); node->in_jd_list = false; + dev_dbg(kctx->kbdev->dev, "List node %p has status %d\n", + node, node->status); + KBASE_DEBUG_ASSERT(node->status != KBASE_JD_ATOM_STATE_UNUSED); + if (node->status == KBASE_JD_ATOM_STATE_IN_JS) + continue; if (node->status != KBASE_JD_ATOM_STATE_COMPLETED && !kbase_ctx_flag(kctx, KCTX_DYING)) { @@ -692,16 +868,20 @@ static const char *kbasep_map_core_reqs_to_string(base_jd_core_req core_req) } #endif -bool jd_submit_atom(struct kbase_context *kctx, const struct base_jd_atom_v2 *user_atom, struct kbase_jd_atom *katom) +static bool jd_submit_atom(struct kbase_context *const kctx, + const struct base_jd_atom_v2 *const user_atom, + const struct base_jd_fragment *const user_jc_incr, + struct kbase_jd_atom *const katom) { struct kbase_device *kbdev = kctx->kbdev; struct kbase_jd_context *jctx = &kctx->jctx; int queued = 0; int i; int sched_prio; - bool ret; bool will_fail = false; + dev_dbg(kbdev->dev, "User did JD submit atom %p\n", (void *)katom); + /* Update the TOTAL number of jobs. This includes those not tracked by * the scheduler: 'not ready to run' and 'dependency-only' jobs. */ jctx->job_nr++; @@ -729,6 +909,24 @@ bool jd_submit_atom(struct kbase_context *kctx, const struct base_jd_atom_v2 *us katom->will_fail_event_code = BASE_JD_EVENT_NOT_STARTED; katom->softjob_data = NULL; + trace_sysgraph(SGR_ARRIVE, kctx->id, user_atom->atom_number); + +#if MALI_JIT_PRESSURE_LIMIT + /* Older API version atoms might have random values where jit_id now + * lives, but we must maintain backwards compatibility - handle the + * issue. + */ + if (kctx->api_version < MIN_API_VERSION_WITH_JPL) { + katom->jit_ids[0] = 0; + katom->jit_ids[1] = 0; + } else { + katom->jit_ids[0] = user_atom->jit_id[0]; + katom->jit_ids[1] = user_atom->jit_id[1]; + } +#endif /* MALI_JIT_PRESSURE_LIMIT */ + + katom->renderpass_id = user_atom->renderpass_id; + /* Implicitly sets katom->protected_state.enter as well. */ katom->protected_state.exit = KBASE_ATOM_EXIT_PROTECTED_CHECK; @@ -754,6 +952,9 @@ bool jd_submit_atom(struct kbase_context *kctx, const struct base_jd_atom_v2 *us dep_atom_type != BASE_JD_DEP_TYPE_DATA) { katom->event_code = BASE_JD_EVENT_JOB_CONFIG_FAULT; katom->status = KBASE_JD_ATOM_STATE_COMPLETED; + dev_dbg(kbdev->dev, + "Atom %p status to completed\n", + (void *)katom); /* Wrong dependency setup. Atom will be sent * back to user space. Do not record any @@ -770,8 +971,7 @@ bool jd_submit_atom(struct kbase_context *kctx, const struct base_jd_atom_v2 *us katom, TL_ATOM_STATE_IDLE); - ret = jd_done_nolock(katom, NULL); - goto out; + return jd_done_nolock(katom, NULL); } } } @@ -805,6 +1005,8 @@ bool jd_submit_atom(struct kbase_context *kctx, const struct base_jd_atom_v2 *us /* Atom has completed, propagate the error code if any */ katom->event_code = dep_atom->event_code; katom->status = KBASE_JD_ATOM_STATE_QUEUED; + dev_dbg(kbdev->dev, "Atom %p status to queued\n", + (void *)katom); /* This atom will be sent back to user space. * Do not record any dependencies. @@ -840,37 +1042,33 @@ bool jd_submit_atom(struct kbase_context *kctx, const struct base_jd_atom_v2 *us kbase_finish_soft_job(katom); } - ret = jd_done_nolock(katom, NULL); - - goto out; - } else { - - if (katom->core_req & BASE_JD_REQ_SOFT_JOB) { - /* This softjob has failed due to a previous - * dependency, however we should still run the - * prepare & finish functions - */ - if (kbase_prepare_soft_job(katom) != 0) { - katom->event_code = - BASE_JD_EVENT_JOB_INVALID; - ret = jd_done_nolock(katom, NULL); - goto out; - } - } - - katom->will_fail_event_code = katom->event_code; - ret = false; - - goto out; + return jd_done_nolock(katom, NULL); } - } else { - /* These must occur after the above loop to ensure that an atom - * that depends on a previous atom with the same number behaves - * as expected */ - katom->event_code = BASE_JD_EVENT_DONE; - katom->status = KBASE_JD_ATOM_STATE_QUEUED; + + if (katom->core_req & BASE_JD_REQ_SOFT_JOB) { + /* This softjob has failed due to a previous + * dependency, however we should still run the + * prepare & finish functions + */ + if (kbase_prepare_soft_job(katom) != 0) { + katom->event_code = + BASE_JD_EVENT_JOB_INVALID; + return jd_done_nolock(katom, NULL); + } + } + + katom->will_fail_event_code = katom->event_code; + return false; } + /* These must occur after the above loop to ensure that an atom + * that depends on a previous atom with the same number behaves + * as expected + */ + katom->event_code = BASE_JD_EVENT_DONE; + katom->status = KBASE_JD_ATOM_STATE_QUEUED; + dev_dbg(kbdev->dev, "Atom %p status to queued\n", (void *)katom); + /* For invalid priority, be most lenient and choose the default */ sched_prio = kbasep_js_atom_prio_to_sched_prio(user_atom->prio); if (sched_prio == KBASE_JS_ATOM_SCHED_PRIO_INVALID) @@ -886,34 +1084,49 @@ bool jd_submit_atom(struct kbase_context *kctx, const struct base_jd_atom_v2 *us KBASE_TLSTREAM_TL_ATTRIB_ATOM_PRIORITY(kbdev, katom, katom->sched_priority); KBASE_TLSTREAM_TL_RET_ATOM_CTX(kbdev, katom, kctx); - /* Reject atoms with job chain = NULL, as these cause issues with soft-stop */ - if (!katom->jc && (katom->core_req & BASE_JD_REQ_ATOM_TYPE) != BASE_JD_REQ_DEP) { - dev_warn(kctx->kbdev->dev, "Rejecting atom with jc = NULL"); +#if !MALI_INCREMENTAL_RENDERING + /* Reject atoms for incremental rendering if not supported */ + if (katom->core_req & + (BASE_JD_REQ_START_RENDERPASS|BASE_JD_REQ_END_RENDERPASS)) { + dev_err(kctx->kbdev->dev, + "Rejecting atom with unsupported core_req 0x%x\n", + katom->core_req); katom->event_code = BASE_JD_EVENT_JOB_INVALID; - ret = jd_done_nolock(katom, NULL); - goto out; + return jd_done_nolock(katom, NULL); + } +#endif /* !MALI_INCREMENTAL_RENDERING */ + + if (katom->core_req & BASE_JD_REQ_END_RENDERPASS) { + WARN_ON(katom->jc != 0); + katom->jc_fragment = *user_jc_incr; + } else if (!katom->jc && + (katom->core_req & BASE_JD_REQ_ATOM_TYPE) != BASE_JD_REQ_DEP) { + /* Reject atoms with job chain = NULL, as these cause issues + * with soft-stop + */ + dev_err(kctx->kbdev->dev, "Rejecting atom with jc = NULL\n"); + katom->event_code = BASE_JD_EVENT_JOB_INVALID; + return jd_done_nolock(katom, NULL); } /* Reject atoms with an invalid device_nr */ if ((katom->core_req & BASE_JD_REQ_SPECIFIC_COHERENT_GROUP) && (katom->device_nr >= kctx->kbdev->gpu_props.num_core_groups)) { - dev_warn(kctx->kbdev->dev, - "Rejecting atom with invalid device_nr %d", + dev_err(kctx->kbdev->dev, + "Rejecting atom with invalid device_nr %d\n", katom->device_nr); katom->event_code = BASE_JD_EVENT_JOB_INVALID; - ret = jd_done_nolock(katom, NULL); - goto out; + return jd_done_nolock(katom, NULL); } /* Reject atoms with invalid core requirements */ if ((katom->core_req & BASE_JD_REQ_EXTERNAL_RESOURCES) && (katom->core_req & BASE_JD_REQ_EVENT_COALESCE)) { - dev_warn(kctx->kbdev->dev, - "Rejecting atom with invalid core requirements"); + dev_err(kctx->kbdev->dev, + "Rejecting atom with invalid core requirements\n"); katom->event_code = BASE_JD_EVENT_JOB_INVALID; katom->core_req &= ~BASE_JD_REQ_EVENT_COALESCE; - ret = jd_done_nolock(katom, NULL); - goto out; + return jd_done_nolock(katom, NULL); } /* Reject soft-job atom of certain types from accessing external resources */ @@ -921,11 +1134,10 @@ bool jd_submit_atom(struct kbase_context *kctx, const struct base_jd_atom_v2 *us (((katom->core_req & BASE_JD_REQ_SOFT_JOB_TYPE) == BASE_JD_REQ_SOFT_FENCE_WAIT) || ((katom->core_req & BASE_JD_REQ_SOFT_JOB_TYPE) == BASE_JD_REQ_SOFT_JIT_ALLOC) || ((katom->core_req & BASE_JD_REQ_SOFT_JOB_TYPE) == BASE_JD_REQ_SOFT_JIT_FREE))) { - dev_warn(kctx->kbdev->dev, - "Rejecting soft-job atom accessing external resources"); + dev_err(kctx->kbdev->dev, + "Rejecting soft-job atom accessing external resources\n"); katom->event_code = BASE_JD_EVENT_JOB_INVALID; - ret = jd_done_nolock(katom, NULL); - goto out; + return jd_done_nolock(katom, NULL); } if (katom->core_req & BASE_JD_REQ_EXTERNAL_RESOURCES) { @@ -933,11 +1145,21 @@ bool jd_submit_atom(struct kbase_context *kctx, const struct base_jd_atom_v2 *us if (kbase_jd_pre_external_resources(katom, user_atom) != 0) { /* setup failed (no access, bad resource, unknown resource types, etc.) */ katom->event_code = BASE_JD_EVENT_JOB_INVALID; - ret = jd_done_nolock(katom, NULL); - goto out; + return jd_done_nolock(katom, NULL); } } +#if !MALI_JIT_PRESSURE_LIMIT + if ((kctx->api_version >= MIN_API_VERSION_WITH_JPL) && + (user_atom->jit_id[0] || user_atom->jit_id[1])) { + /* JIT pressure limit is disabled, but we are receiving non-0 + * JIT IDs - atom is invalid. + */ + katom->event_code = BASE_JD_EVENT_JOB_INVALID; + return jd_done_nolock(katom, NULL); + } +#endif /* MALI_JIT_PRESSURE_LIMIT */ + /* Validate the atom. Function will return error if the atom is * malformed. * @@ -948,15 +1170,13 @@ bool jd_submit_atom(struct kbase_context *kctx, const struct base_jd_atom_v2 *us if ((katom->core_req & BASE_JD_REQ_SOFT_JOB) == 0) { if (!kbase_js_is_atom_valid(kctx->kbdev, katom)) { katom->event_code = BASE_JD_EVENT_JOB_INVALID; - ret = jd_done_nolock(katom, NULL); - goto out; + return jd_done_nolock(katom, NULL); } } else { /* Soft-job */ if (kbase_prepare_soft_job(katom) != 0) { katom->event_code = BASE_JD_EVENT_JOB_INVALID; - ret = jd_done_nolock(katom, NULL); - goto out; + return jd_done_nolock(katom, NULL); } } @@ -966,39 +1186,38 @@ bool jd_submit_atom(struct kbase_context *kctx, const struct base_jd_atom_v2 *us kbasep_map_core_reqs_to_string(katom->core_req)); #endif - if (queued && !IS_GPU_ATOM(katom)) { - ret = false; - goto out; - } + if (queued && !IS_GPU_ATOM(katom)) + return false; #ifdef CONFIG_MALI_BIFROST_DMA_FENCE - if (kbase_fence_dep_count_read(katom) != -1) { - ret = false; - goto out; - } + if (kbase_fence_dep_count_read(katom) != -1) + return false; + #endif /* CONFIG_MALI_BIFROST_DMA_FENCE */ if (katom->core_req & BASE_JD_REQ_SOFT_JOB) { if (kbase_process_soft_job(katom) == 0) { kbase_finish_soft_job(katom); - ret = jd_done_nolock(katom, NULL); - goto out; + return jd_done_nolock(katom, NULL); } - - ret = false; - } else if ((katom->core_req & BASE_JD_REQ_ATOM_TYPE) != BASE_JD_REQ_DEP) { - katom->status = KBASE_JD_ATOM_STATE_IN_JS; - ret = kbasep_js_add_job(kctx, katom); - /* If job was cancelled then resolve immediately */ - if (katom->event_code == BASE_JD_EVENT_JOB_CANCELLED) - ret = jd_done_nolock(katom, NULL); - } else { - /* This is a pure dependency. Resolve it immediately */ - ret = jd_done_nolock(katom, NULL); + return false; } - out: - return ret; + if ((katom->core_req & BASE_JD_REQ_ATOM_TYPE) != BASE_JD_REQ_DEP) { + bool need_to_try_schedule_context; + + katom->status = KBASE_JD_ATOM_STATE_IN_JS; + dev_dbg(kctx->kbdev->dev, "Atom %p status to in JS\n", + (void *)katom); + + need_to_try_schedule_context = kbasep_js_add_job(kctx, katom); + /* If job was cancelled then resolve immediately */ + if (katom->event_code != BASE_JD_EVENT_JOB_CANCELLED) + return need_to_try_schedule_context; + } + + /* This is a pure dependency. Resolve it immediately */ + return jd_done_nolock(katom, NULL); } int kbase_jd_submit(struct kbase_context *kctx, @@ -1021,12 +1240,15 @@ int kbase_jd_submit(struct kbase_context *kctx, beenthere(kctx, "%s", "Enter"); if (kbase_ctx_flag(kctx, KCTX_SUBMIT_DISABLED)) { - dev_err(kbdev->dev, "Attempt to submit to a context that has SUBMIT_DISABLED set on it"); + dev_err(kbdev->dev, "Attempt to submit to a context that has SUBMIT_DISABLED set on it\n"); return -EINVAL; } - if (stride != sizeof(base_jd_atom_v2)) { - dev_err(kbdev->dev, "Stride passed to job_submit doesn't match kernel"); + if (stride != offsetof(struct base_jd_atom_v2, renderpass_id) && + stride != sizeof(struct base_jd_atom_v2)) { + dev_err(kbdev->dev, + "Stride %u passed to job_submit isn't supported by the kernel\n", + stride); return -EINVAL; } @@ -1035,14 +1257,58 @@ int kbase_jd_submit(struct kbase_context *kctx, for (i = 0; i < nr_atoms; i++) { struct base_jd_atom_v2 user_atom; + struct base_jd_fragment user_jc_incr; struct kbase_jd_atom *katom; - if (copy_from_user(&user_atom, user_addr, - sizeof(user_atom)) != 0) { - err = -EINVAL; + if (copy_from_user(&user_atom, user_addr, stride) != 0) { + dev_err(kbdev->dev, + "Invalid atom address %p passed to job_submit\n", + user_addr); + err = -EFAULT; break; } + if (stride == offsetof(struct base_jd_atom_v2, renderpass_id)) { + dev_dbg(kbdev->dev, "No renderpass ID: use 0\n"); + user_atom.renderpass_id = 0; + } else { + /* Ensure all padding bytes are 0 for potential future + * extension + */ + size_t j; + + dev_dbg(kbdev->dev, "Renderpass ID is %d\n", + user_atom.renderpass_id); + for (j = 0; j < sizeof(user_atom.padding); j++) { + if (user_atom.padding[j]) { + dev_err(kbdev->dev, + "Bad padding byte %zu: %d\n", + j, user_atom.padding[j]); + err = -EINVAL; + break; + } + } + if (err) + break; + } + + /* In this case 'jc' is the CPU address of a struct + * instead of a GPU address of a job chain. + */ + if (user_atom.core_req & BASE_JD_REQ_END_RENDERPASS) { + if (copy_from_user(&user_jc_incr, + u64_to_user_ptr(user_atom.jc), + sizeof(user_jc_incr))) { + dev_err(kbdev->dev, + "Invalid jc address 0x%llx passed to job_submit\n", + user_atom.jc); + err = -EFAULT; + break; + } + dev_dbg(kbdev->dev, "Copied IR jobchain addresses\n"); + user_atom.jc = 0; + } + user_addr = (void __user *)((uintptr_t) user_addr + stride); mutex_lock(&jctx->lock); @@ -1092,8 +1358,8 @@ while (false) mutex_lock(&jctx->lock); } - need_to_try_schedule_context |= - jd_submit_atom(kctx, &user_atom, katom); + need_to_try_schedule_context |= jd_submit_atom(kctx, &user_atom, + &user_jc_incr, katom); /* Register a completed job as a disjoint event when the GPU is in a disjoint state * (ie. being reset). @@ -1133,7 +1399,10 @@ void kbase_jd_done_worker(struct work_struct *data) js_kctx_info = &kctx->jctx.sched_info; js_devdata = &kbdev->js_data; - KBASE_TRACE_ADD(kbdev, JD_DONE_WORKER, kctx, katom, katom->jc, 0); + dev_dbg(kbdev->dev, "Enter atom %p done worker for kctx %p\n", + (void *)katom, (void *)kctx); + + KBASE_KTRACE_ADD_JM(kbdev, JD_DONE_WORKER, kctx, katom, katom->jc, 0); kbase_backend_complete_wq(kbdev, katom); @@ -1152,15 +1421,18 @@ void kbase_jd_done_worker(struct work_struct *data) KBASE_DEBUG_ASSERT(kbase_ctx_flag(kctx, KCTX_SCHEDULED)); if (katom->event_code == BASE_JD_EVENT_STOPPED) { - /* Atom has been promoted to stopped */ unsigned long flags; + dev_dbg(kbdev->dev, "Atom %p has been promoted to stopped\n", + (void *)katom); mutex_unlock(&js_kctx_info->ctx.jsctx_mutex); mutex_unlock(&js_devdata->queue_mutex); spin_lock_irqsave(&kbdev->hwaccess_lock, flags); katom->status = KBASE_JD_ATOM_STATE_IN_JS; + dev_dbg(kctx->kbdev->dev, "Atom %p status to in JS\n", + (void *)katom); kbase_js_unpull(kctx, katom); spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); @@ -1270,7 +1542,10 @@ void kbase_jd_done_worker(struct work_struct *data) if (context_idle) kbase_pm_context_idle(kbdev); - KBASE_TRACE_ADD(kbdev, JD_DONE_WORKER_END, kctx, NULL, cache_jc, 0); + KBASE_KTRACE_ADD_JM(kbdev, JD_DONE_WORKER_END, kctx, NULL, cache_jc, 0); + + dev_dbg(kbdev->dev, "Leave atom %p done worker for kctx %p\n", + (void *)katom, (void *)kctx); } /** @@ -1305,7 +1580,7 @@ static void jd_cancel_worker(struct work_struct *data) jctx = &kctx->jctx; js_kctx_info = &kctx->jctx.sched_info; - KBASE_TRACE_ADD(kbdev, JD_CANCEL_WORKER, kctx, katom, katom->jc, 0); + KBASE_KTRACE_ADD_JM(kbdev, JD_CANCEL_WORKER, kctx, katom, katom->jc, 0); /* This only gets called on contexts that are scheduled out. Hence, we must * make sure we don't de-ref the number of running jobs (there aren't @@ -1367,7 +1642,7 @@ void kbase_jd_done(struct kbase_jd_atom *katom, int slot_nr, if (done_code & KBASE_JS_ATOM_DONE_EVICTED_FROM_NEXT) katom->event_code = BASE_JD_EVENT_REMOVED_FROM_NEXT; - KBASE_TRACE_ADD(kbdev, JD_DONE, kctx, katom, katom->jc, 0); + KBASE_KTRACE_ADD_JM(kbdev, JD_DONE, kctx, katom, katom->jc, 0); kbase_job_check_leave_disjoint(kbdev, katom); @@ -1398,7 +1673,8 @@ void kbase_jd_cancel(struct kbase_device *kbdev, struct kbase_jd_atom *katom) kctx = katom->kctx; KBASE_DEBUG_ASSERT(NULL != kctx); - KBASE_TRACE_ADD(kbdev, JD_CANCEL, kctx, katom, katom->jc, 0); + dev_dbg(kbdev->dev, "JD: cancelling atom %p\n", (void *)katom); + KBASE_KTRACE_ADD_JM(kbdev, JD_CANCEL, kctx, katom, katom->jc, 0); /* This should only be done from a context that is not scheduled */ KBASE_DEBUG_ASSERT(!kbase_ctx_flag(kctx, KCTX_SCHEDULED)); @@ -1422,7 +1698,7 @@ void kbase_jd_zap_context(struct kbase_context *kctx) kbdev = kctx->kbdev; - KBASE_TRACE_ADD(kbdev, JD_ZAP_CONTEXT, kctx, NULL, 0u, 0u); + KBASE_KTRACE_ADD_JM(kbdev, JD_ZAP_CONTEXT, kctx, NULL, 0u, 0u); kbase_js_zap_context(kctx); @@ -1494,6 +1770,9 @@ int kbase_jd_init(struct kbase_context *kctx) #endif } + for (i = 0; i < BASE_JD_RP_COUNT; i++) + kctx->jctx.renderpasses[i].state = KBASE_JD_RP_COMPLETE; + mutex_init(&kctx->jctx.lock); init_waitqueue_head(&kctx->jctx.zero_jobs_wait); diff --git a/drivers/gpu/arm/bifrost/mali_kbase_jm.c b/drivers/gpu/arm/bifrost/mali_kbase_jm.c index da78a1670d9b..3f17dd763b97 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_jm.c +++ b/drivers/gpu/arm/bifrost/mali_kbase_jm.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2014-2018 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2020 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -45,6 +45,9 @@ static bool kbase_jm_next_job(struct kbase_device *kbdev, int js, int i; kctx = kbdev->hwaccess.active_kctx[js]; + dev_dbg(kbdev->dev, + "Trying to run the next %d jobs in kctx %p (s:%d)\n", + nr_jobs_to_submit, (void *)kctx, js); if (!kctx) return true; @@ -58,7 +61,8 @@ static bool kbase_jm_next_job(struct kbase_device *kbdev, int js, kbase_backend_run_atom(kbdev, katom); } - return false; /* Slot ringbuffer should now be full */ + dev_dbg(kbdev->dev, "Slot ringbuffer should now be full (s:%d)\n", js); + return false; } u32 kbase_jm_kick(struct kbase_device *kbdev, u32 js_mask) @@ -66,6 +70,7 @@ u32 kbase_jm_kick(struct kbase_device *kbdev, u32 js_mask) u32 ret_mask = 0; lockdep_assert_held(&kbdev->hwaccess_lock); + dev_dbg(kbdev->dev, "JM kick slot mask 0x%x\n", js_mask); while (js_mask) { int js = ffs(js_mask) - 1; @@ -77,6 +82,7 @@ u32 kbase_jm_kick(struct kbase_device *kbdev, u32 js_mask) js_mask &= ~(1 << js); } + dev_dbg(kbdev->dev, "Can still submit to mask 0x%x\n", ret_mask); return ret_mask; } @@ -111,8 +117,11 @@ void kbase_jm_idle_ctx(struct kbase_device *kbdev, struct kbase_context *kctx) lockdep_assert_held(&kbdev->hwaccess_lock); for (js = 0; js < BASE_JM_MAX_NR_SLOTS; js++) { - if (kbdev->hwaccess.active_kctx[js] == kctx) + if (kbdev->hwaccess.active_kctx[js] == kctx) { + dev_dbg(kbdev->dev, "Marking kctx %p as inactive (s:%d)\n", + (void *)kctx, js); kbdev->hwaccess.active_kctx[js] = NULL; + } } } @@ -121,6 +130,9 @@ struct kbase_jd_atom *kbase_jm_return_atom_to_js(struct kbase_device *kbdev, { lockdep_assert_held(&kbdev->hwaccess_lock); + dev_dbg(kbdev->dev, "Atom %p is returning with event code 0x%x\n", + (void *)katom, katom->event_code); + if (katom->event_code != BASE_JD_EVENT_STOPPED && katom->event_code != BASE_JD_EVENT_REMOVED_FROM_NEXT) { return kbase_js_complete_atom(katom, NULL); @@ -137,4 +149,3 @@ struct kbase_jd_atom *kbase_jm_complete(struct kbase_device *kbdev, return kbase_js_complete_atom(katom, end_timestamp); } - diff --git a/drivers/gpu/arm/bifrost/mali_kbase_jm.h b/drivers/gpu/arm/bifrost/mali_kbase_jm.h index c468ea4d20a5..a3c774483256 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_jm.h +++ b/drivers/gpu/arm/bifrost/mali_kbase_jm.h @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2014, 2016 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014, 2016, 2019 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software diff --git a/drivers/gpu/arm/bifrost/mali_kbase_js.c b/drivers/gpu/arm/bifrost/mali_kbase_js.c index e0bac144a002..9b338eb66531 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_js.c +++ b/drivers/gpu/arm/bifrost/mali_kbase_js.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2011-2019 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2011-2020 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -27,7 +27,8 @@ */ #include #include -#include +#include +#include #include #include @@ -80,22 +81,19 @@ static int kbase_js_get_slot(struct kbase_device *kbdev, static void kbase_js_foreach_ctx_job(struct kbase_context *kctx, kbasep_js_ctx_job_cb callback); -/* Helper for trace subcodes */ -#if KBASE_TRACE_ENABLE -static int kbasep_js_trace_get_refcnt(struct kbase_device *kbdev, - struct kbase_context *kctx) +/* Helper for ktrace */ +#if KBASE_KTRACE_ENABLE +static int kbase_ktrace_get_ctx_refcnt(struct kbase_context *kctx) { return atomic_read(&kctx->refcount); } -#else /* KBASE_TRACE_ENABLE */ -static int kbasep_js_trace_get_refcnt(struct kbase_device *kbdev, - struct kbase_context *kctx) +#else /* KBASE_KTRACE_ENABLE */ +static int kbase_ktrace_get_ctx_refcnt(struct kbase_context *kctx) { - CSTD_UNUSED(kbdev); CSTD_UNUSED(kctx); return 0; } -#endif /* KBASE_TRACE_ENABLE */ +#endif /* KBASE_KTRACE_ENABLE */ /* * Private functions @@ -138,31 +136,6 @@ static void kbase_js_sync_timers(struct kbase_device *kbdev) mutex_unlock(&kbdev->js_data.runpool_mutex); } -/* Hold the mmu_hw_mutex and hwaccess_lock for this */ -bool kbasep_js_runpool_retain_ctx_nolock(struct kbase_device *kbdev, - struct kbase_context *kctx) -{ - bool result = false; - int as_nr; - - KBASE_DEBUG_ASSERT(kbdev != NULL); - KBASE_DEBUG_ASSERT(kctx != NULL); - - lockdep_assert_held(&kbdev->hwaccess_lock); - - as_nr = kctx->as_nr; - if (atomic_read(&kctx->refcount) > 0) { - KBASE_DEBUG_ASSERT(as_nr >= 0); - - kbase_ctx_sched_retain_ctx_refcount(kctx); - KBASE_TRACE_ADD_REFCOUNT(kbdev, JS_RETAIN_CTX_NOLOCK, kctx, - NULL, 0u, atomic_read(&kctx->refcount)); - result = true; - } - - return result; -} - /** * jsctx_rb_none_to_pull_prio(): - Check if there are no pullable atoms * @kctx: Pointer to kbase context with ring buffer. @@ -179,11 +152,18 @@ bool kbasep_js_runpool_retain_ctx_nolock(struct kbase_device *kbdev, static inline bool jsctx_rb_none_to_pull_prio(struct kbase_context *kctx, int js, int prio) { + bool none_to_pull; struct jsctx_queue *rb = &kctx->jsctx_queue[prio][js]; lockdep_assert_held(&kctx->kbdev->hwaccess_lock); - return RB_EMPTY_ROOT(&rb->runnable_tree); + none_to_pull = RB_EMPTY_ROOT(&rb->runnable_tree); + + dev_dbg(kctx->kbdev->dev, + "Slot %d (prio %d) is %spullable in kctx %p\n", + js, prio, none_to_pull ? "not " : "", kctx); + + return none_to_pull; } /** @@ -204,7 +184,8 @@ jsctx_rb_none_to_pull(struct kbase_context *kctx, int js) lockdep_assert_held(&kctx->kbdev->hwaccess_lock); - for (prio = 0; prio < KBASE_JS_ATOM_SCHED_PRIO_COUNT; prio++) { + for (prio = KBASE_JS_ATOM_SCHED_PRIO_HIGH; + prio < KBASE_JS_ATOM_SCHED_PRIO_COUNT; prio++) { if (!jsctx_rb_none_to_pull_prio(kctx, js, prio)) return false; } @@ -244,13 +225,37 @@ jsctx_queue_foreach_prio(struct kbase_context *kctx, int js, int prio, rb_erase(node, &queue->runnable_tree); callback(kctx->kbdev, entry); + + /* Runnable end-of-renderpass atoms can also be in the linked + * list of atoms blocked on cross-slot dependencies. Remove them + * to avoid calling the callback twice. + */ + if (entry->atom_flags & KBASE_KATOM_FLAG_JSCTX_IN_X_DEP_LIST) { + WARN_ON(!(entry->core_req & + BASE_JD_REQ_END_RENDERPASS)); + dev_dbg(kctx->kbdev->dev, + "Del runnable atom %p from X_DEP list\n", + (void *)entry); + + list_del(&entry->queue); + entry->atom_flags &= + ~KBASE_KATOM_FLAG_JSCTX_IN_X_DEP_LIST; + } } while (!list_empty(&queue->x_dep_head)) { struct kbase_jd_atom *entry = list_entry(queue->x_dep_head.next, struct kbase_jd_atom, queue); + WARN_ON(!(entry->atom_flags & + KBASE_KATOM_FLAG_JSCTX_IN_X_DEP_LIST)); + dev_dbg(kctx->kbdev->dev, + "Del blocked atom %p from X_DEP list\n", + (void *)entry); + list_del(queue->x_dep_head.next); + entry->atom_flags &= + ~KBASE_KATOM_FLAG_JSCTX_IN_X_DEP_LIST; callback(kctx->kbdev, entry); } @@ -272,7 +277,8 @@ jsctx_queue_foreach(struct kbase_context *kctx, int js, { int prio; - for (prio = 0; prio < KBASE_JS_ATOM_SCHED_PRIO_COUNT; prio++) + for (prio = KBASE_JS_ATOM_SCHED_PRIO_HIGH; + prio < KBASE_JS_ATOM_SCHED_PRIO_COUNT; prio++) jsctx_queue_foreach_prio(kctx, js, prio, callback); } @@ -294,10 +300,15 @@ jsctx_rb_peek_prio(struct kbase_context *kctx, int js, int prio) struct rb_node *node; lockdep_assert_held(&kctx->kbdev->hwaccess_lock); + dev_dbg(kctx->kbdev->dev, + "Peeking runnable tree of kctx %p for prio %d (s:%d)\n", + (void *)kctx, prio, js); node = rb_first(&rb->runnable_tree); - if (!node) + if (!node) { + dev_dbg(kctx->kbdev->dev, "Tree is empty\n"); return NULL; + } return rb_entry(node, struct kbase_jd_atom, runnable_tree_node); } @@ -322,7 +333,8 @@ jsctx_rb_peek(struct kbase_context *kctx, int js) lockdep_assert_held(&kctx->kbdev->hwaccess_lock); - for (prio = 0; prio < KBASE_JS_ATOM_SCHED_PRIO_COUNT; prio++) { + for (prio = KBASE_JS_ATOM_SCHED_PRIO_HIGH; + prio < KBASE_JS_ATOM_SCHED_PRIO_COUNT; prio++) { struct kbase_jd_atom *katom; katom = jsctx_rb_peek_prio(kctx, js, prio); @@ -351,6 +363,9 @@ jsctx_rb_pull(struct kbase_context *kctx, struct kbase_jd_atom *katom) lockdep_assert_held(&kctx->kbdev->hwaccess_lock); + dev_dbg(kctx->kbdev->dev, "Erasing atom %p from runnable tree of kctx %p\n", + (void *)katom, (void *)kctx); + /* Atoms must be pulled in the correct order. */ WARN_ON(katom != jsctx_rb_peek_prio(kctx, js, prio)); @@ -370,6 +385,9 @@ jsctx_tree_add(struct kbase_context *kctx, struct kbase_jd_atom *katom) lockdep_assert_held(&kctx->kbdev->hwaccess_lock); + dev_dbg(kbdev->dev, "Adding atom %p to runnable tree of kctx %p (s:%d)\n", + (void *)katom, (void *)kctx, js); + while (*new) { struct kbase_jd_atom *entry = container_of(*new, struct kbase_jd_atom, runnable_tree_node); @@ -517,7 +535,6 @@ int kbasep_js_devdata_init(struct kbase_device * const kbdev) mutex_init(&jsdd->runpool_mutex); mutex_init(&jsdd->queue_mutex); - spin_lock_init(&kbdev->hwaccess_lock); sema_init(&jsdd->schedule_sem, 1); for (i = 0; i < kbdev->gpu_props.num_job_slots; ++i) { @@ -570,6 +587,7 @@ int kbasep_js_kctx_init(struct kbase_context *const kctx) js_kctx_info = &kctx->jctx.sched_info; + kctx->slots_pullable = 0; js_kctx_info->ctx.nr_jobs = 0; kbase_ctx_flag_clear(kctx, KCTX_SCHEDULED); kbase_ctx_flag_clear(kctx, KCTX_DYING); @@ -660,6 +678,8 @@ static bool kbase_js_ctx_list_add_pullable_nolock(struct kbase_device *kbdev, bool ret = false; lockdep_assert_held(&kbdev->hwaccess_lock); + dev_dbg(kbdev->dev, "Add pullable tail kctx %p (s:%d)\n", + (void *)kctx, js); if (!list_empty(&kctx->jctx.sched_info.ctx.ctx_list_entry[js])) list_del_init(&kctx->jctx.sched_info.ctx.ctx_list_entry[js]); @@ -700,6 +720,8 @@ static bool kbase_js_ctx_list_add_pullable_head_nolock( bool ret = false; lockdep_assert_held(&kbdev->hwaccess_lock); + dev_dbg(kbdev->dev, "Add pullable head kctx %p (s:%d)\n", + (void *)kctx, js); if (!list_empty(&kctx->jctx.sched_info.ctx.ctx_list_entry[js])) list_del_init(&kctx->jctx.sched_info.ctx.ctx_list_entry[js]); @@ -774,6 +796,8 @@ static bool kbase_js_ctx_list_add_unpullable_nolock(struct kbase_device *kbdev, bool ret = false; lockdep_assert_held(&kbdev->hwaccess_lock); + dev_dbg(kbdev->dev, "Add unpullable tail kctx %p (s:%d)\n", + (void *)kctx, js); list_move_tail(&kctx->jctx.sched_info.ctx.ctx_list_entry[js], &kbdev->js_data.ctx_list_unpullable[js][kctx->priority]); @@ -864,7 +888,9 @@ static struct kbase_context *kbase_js_ctx_list_pop_head_nolock( jctx.sched_info.ctx.ctx_list_entry[js]); list_del_init(&kctx->jctx.sched_info.ctx.ctx_list_entry[js]); - + dev_dbg(kbdev->dev, + "Popped %p from the pullable queue (s:%d)\n", + (void *)kctx, js); return kctx; } return NULL; @@ -909,31 +935,56 @@ static bool kbase_js_ctx_pullable(struct kbase_context *kctx, int js, { struct kbasep_js_device_data *js_devdata; struct kbase_jd_atom *katom; + struct kbase_device *kbdev = kctx->kbdev; - lockdep_assert_held(&kctx->kbdev->hwaccess_lock); + lockdep_assert_held(&kbdev->hwaccess_lock); - js_devdata = &kctx->kbdev->js_data; + js_devdata = &kbdev->js_data; if (is_scheduled) { - if (!kbasep_js_is_submit_allowed(js_devdata, kctx)) + if (!kbasep_js_is_submit_allowed(js_devdata, kctx)) { + dev_dbg(kbdev->dev, "JS: No submit allowed for kctx %p\n", + (void *)kctx); return false; + } } katom = jsctx_rb_peek(kctx, js); - if (!katom) + if (!katom) { + dev_dbg(kbdev->dev, "JS: No pullable atom in kctx %p (s:%d)\n", + (void *)kctx, js); return false; /* No pullable atoms */ - if (kctx->blocked_js[js][katom->sched_priority]) - return false; - if (atomic_read(&katom->blocked)) - return false; /* next atom blocked */ - if (katom->atom_flags & KBASE_KATOM_FLAG_X_DEP_BLOCKED) { - if (katom->x_pre_dep->gpu_rb_state == - KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB || - katom->x_pre_dep->will_fail_event_code) - return false; - if ((katom->atom_flags & KBASE_KATOM_FLAG_FAIL_BLOCKER) && - kbase_backend_nr_atoms_on_slot(kctx->kbdev, js)) - return false; } + if (kctx->blocked_js[js][katom->sched_priority]) { + dev_dbg(kbdev->dev, + "JS: kctx %p is blocked from submitting atoms at priority %d (s:%d)\n", + (void *)kctx, katom->sched_priority, js); + return false; + } + if (atomic_read(&katom->blocked)) { + dev_dbg(kbdev->dev, "JS: Atom %p is blocked in js_ctx_pullable\n", + (void *)katom); + return false; /* next atom blocked */ + } + if (kbase_js_atom_blocked_on_x_dep(katom)) { + if (katom->x_pre_dep->gpu_rb_state == + KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB || + katom->x_pre_dep->will_fail_event_code) { + dev_dbg(kbdev->dev, + "JS: X pre-dep %p is not present in slot FIFO or will fail\n", + (void *)katom->x_pre_dep); + return false; + } + if ((katom->atom_flags & KBASE_KATOM_FLAG_FAIL_BLOCKER) && + kbase_backend_nr_atoms_on_slot(kctx->kbdev, js)) { + dev_dbg(kbdev->dev, + "JS: Atom %p has cross-slot fail dependency and atoms on slot (s:%d)\n", + (void *)katom, js); + return false; + } + } + + dev_dbg(kbdev->dev, "JS: Atom %p is pullable in kctx %p (s:%d)\n", + (void *)katom, (void *)kctx, js); return true; } @@ -955,9 +1006,15 @@ static bool kbase_js_dep_validate(struct kbase_context *kctx, int dep_js = kbase_js_get_slot(kbdev, dep_atom); int dep_prio = dep_atom->sched_priority; + dev_dbg(kbdev->dev, + "Checking dep %d of atom %p (s:%d) on %p (s:%d)\n", + i, (void *)katom, js, (void *)dep_atom, dep_js); + /* Dependent atom must already have been submitted */ if (!(dep_atom->atom_flags & KBASE_KATOM_FLAG_JSCTX_IN_TREE)) { + dev_dbg(kbdev->dev, + "Blocker not submitted yet\n"); ret = false; break; } @@ -965,6 +1022,8 @@ static bool kbase_js_dep_validate(struct kbase_context *kctx, /* Dependencies with different priorities can't be represented in the ringbuffer */ if (prio != dep_prio) { + dev_dbg(kbdev->dev, + "Different atom priorities\n"); ret = false; break; } @@ -973,12 +1032,16 @@ static bool kbase_js_dep_validate(struct kbase_context *kctx, /* Only one same-slot dependency can be * represented in the ringbuffer */ if (has_dep) { + dev_dbg(kbdev->dev, + "Too many same-slot deps\n"); ret = false; break; } /* Each dependee atom can only have one * same-slot dependency */ if (dep_atom->post_dep) { + dev_dbg(kbdev->dev, + "Too many same-slot successors\n"); ret = false; break; } @@ -987,12 +1050,16 @@ static bool kbase_js_dep_validate(struct kbase_context *kctx, /* Only one cross-slot dependency can be * represented in the ringbuffer */ if (has_x_dep) { + dev_dbg(kbdev->dev, + "Too many cross-slot deps\n"); ret = false; break; } /* Each dependee atom can only have one * cross-slot dependency */ if (dep_atom->x_post_dep) { + dev_dbg(kbdev->dev, + "Too many cross-slot successors\n"); ret = false; break; } @@ -1000,6 +1067,9 @@ static bool kbase_js_dep_validate(struct kbase_context *kctx, * HW access ringbuffer */ if (dep_atom->gpu_rb_state != KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB) { + dev_dbg(kbdev->dev, + "Blocker already in ringbuffer (state:%d)\n", + dep_atom->gpu_rb_state); ret = false; break; } @@ -1007,6 +1077,9 @@ static bool kbase_js_dep_validate(struct kbase_context *kctx, * completed */ if (dep_atom->status != KBASE_JD_ATOM_STATE_IN_JS) { + dev_dbg(kbdev->dev, + "Blocker already completed (status:%d)\n", + dep_atom->status); ret = false; break; } @@ -1027,6 +1100,11 @@ static bool kbase_js_dep_validate(struct kbase_context *kctx, if (dep_atom) { int dep_js = kbase_js_get_slot(kbdev, dep_atom); + dev_dbg(kbdev->dev, + "Clearing dep %d of atom %p (s:%d) on %p (s:%d)\n", + i, (void *)katom, js, (void *)dep_atom, + dep_js); + if ((js != dep_js) && (dep_atom->status != KBASE_JD_ATOM_STATE_COMPLETED) @@ -1037,6 +1115,10 @@ static bool kbase_js_dep_validate(struct kbase_context *kctx, katom->atom_flags |= KBASE_KATOM_FLAG_X_DEP_BLOCKED; + + dev_dbg(kbdev->dev, "Set X_DEP flag on atom %p\n", + (void *)katom); + katom->x_pre_dep = dep_atom; dep_atom->x_post_dep = katom; if (kbase_jd_katom_dep_type( @@ -1056,6 +1138,10 @@ static bool kbase_js_dep_validate(struct kbase_context *kctx, kbase_jd_katom_dep_clear(&katom->dep[i]); } } + } else { + dev_dbg(kbdev->dev, + "Deps of atom %p (s:%d) could not be represented\n", + (void *)katom, js); } return ret; @@ -1095,7 +1181,8 @@ void kbase_js_update_ctx_priority(struct kbase_context *kctx) /* Determine the new priority for context, as per the priority * of currently in-use atoms. */ - for (prio = 0; prio < KBASE_JS_ATOM_SCHED_PRIO_COUNT; prio++) { + for (prio = KBASE_JS_ATOM_SCHED_PRIO_HIGH; + prio < KBASE_JS_ATOM_SCHED_PRIO_COUNT; prio++) { if (kctx->atoms_count[prio]) { new_priority = prio; break; @@ -1106,6 +1193,103 @@ void kbase_js_update_ctx_priority(struct kbase_context *kctx) kbase_js_set_ctx_priority(kctx, new_priority); } +/** + * js_add_start_rp() - Add an atom that starts a renderpass to the job scheduler + * @start_katom: Pointer to the atom to be added. + * Return: 0 if successful or a negative value on failure. + */ +static int js_add_start_rp(struct kbase_jd_atom *const start_katom) +{ + struct kbase_context *const kctx = start_katom->kctx; + struct kbase_jd_renderpass *rp; + struct kbase_device *const kbdev = kctx->kbdev; + unsigned long flags; + + lockdep_assert_held(&kctx->jctx.lock); + + if (WARN_ON(!(start_katom->core_req & BASE_JD_REQ_START_RENDERPASS))) + return -EINVAL; + + if (start_katom->core_req & BASE_JD_REQ_END_RENDERPASS) + return -EINVAL; + + compiletime_assert((1ull << (sizeof(start_katom->renderpass_id) * 8)) <= + ARRAY_SIZE(kctx->jctx.renderpasses), + "Should check invalid access to renderpasses"); + + rp = &kctx->jctx.renderpasses[start_katom->renderpass_id]; + + if (rp->state != KBASE_JD_RP_COMPLETE) + return -EINVAL; + + dev_dbg(kctx->kbdev->dev, "JS add start atom %p of RP %d\n", + (void *)start_katom, start_katom->renderpass_id); + + /* The following members are read when updating the job slot + * ringbuffer/fifo therefore they require additional locking. + */ + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + + rp->state = KBASE_JD_RP_START; + rp->start_katom = start_katom; + rp->end_katom = NULL; + INIT_LIST_HEAD(&rp->oom_reg_list); + + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + + return 0; +} + +/** + * js_add_end_rp() - Add an atom that ends a renderpass to the job scheduler + * @end_katom: Pointer to the atom to be added. + * Return: 0 if successful or a negative value on failure. + */ +static int js_add_end_rp(struct kbase_jd_atom *const end_katom) +{ + struct kbase_context *const kctx = end_katom->kctx; + struct kbase_jd_renderpass *rp; + struct kbase_device *const kbdev = kctx->kbdev; + + lockdep_assert_held(&kctx->jctx.lock); + + if (WARN_ON(!(end_katom->core_req & BASE_JD_REQ_END_RENDERPASS))) + return -EINVAL; + + if (end_katom->core_req & BASE_JD_REQ_START_RENDERPASS) + return -EINVAL; + + compiletime_assert((1ull << (sizeof(end_katom->renderpass_id) * 8)) <= + ARRAY_SIZE(kctx->jctx.renderpasses), + "Should check invalid access to renderpasses"); + + rp = &kctx->jctx.renderpasses[end_katom->renderpass_id]; + + dev_dbg(kbdev->dev, "JS add end atom %p in state %d of RP %d\n", + (void *)end_katom, (int)rp->state, end_katom->renderpass_id); + + if (rp->state == KBASE_JD_RP_COMPLETE) + return -EINVAL; + + if (rp->end_katom == NULL) { + /* We can't be in a retry state until the fragment job chain + * has completed. + */ + unsigned long flags; + + WARN_ON(rp->state == KBASE_JD_RP_RETRY); + WARN_ON(rp->state == KBASE_JD_RP_RETRY_PEND_OOM); + WARN_ON(rp->state == KBASE_JD_RP_RETRY_OOM); + + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + rp->end_katom = end_katom; + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + } else + WARN_ON(rp->end_katom != end_katom); + + return 0; +} + bool kbasep_js_add_job(struct kbase_context *kctx, struct kbase_jd_atom *atom) { @@ -1113,6 +1297,7 @@ bool kbasep_js_add_job(struct kbase_context *kctx, struct kbasep_js_kctx_info *js_kctx_info; struct kbase_device *kbdev; struct kbasep_js_device_data *js_devdata; + int err = 0; bool enqueue_required = false; bool timer_sync = false; @@ -1128,6 +1313,17 @@ bool kbasep_js_add_job(struct kbase_context *kctx, mutex_lock(&js_devdata->queue_mutex); mutex_lock(&js_kctx_info->ctx.jsctx_mutex); + if (atom->core_req & BASE_JD_REQ_START_RENDERPASS) + err = js_add_start_rp(atom); + else if (atom->core_req & BASE_JD_REQ_END_RENDERPASS) + err = js_add_end_rp(atom); + + if (err < 0) { + atom->event_code = BASE_JD_EVENT_JOB_INVALID; + atom->status = KBASE_JD_ATOM_STATE_COMPLETED; + goto out_unlock; + } + /* * Begin Runpool transaction */ @@ -1136,6 +1332,8 @@ bool kbasep_js_add_job(struct kbase_context *kctx, /* Refcount ctx.nr_jobs */ KBASE_DEBUG_ASSERT(js_kctx_info->ctx.nr_jobs < U32_MAX); ++(js_kctx_info->ctx.nr_jobs); + dev_dbg(kbdev->dev, "Add atom %p to kctx %p; now %d in ctx\n", + (void *)atom, (void *)kctx, js_kctx_info->ctx.nr_jobs); /* Lock for state available during IRQ */ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); @@ -1146,10 +1344,14 @@ bool kbasep_js_add_job(struct kbase_context *kctx, if (!kbase_js_dep_validate(kctx, atom)) { /* Dependencies could not be represented */ --(js_kctx_info->ctx.nr_jobs); + dev_dbg(kbdev->dev, + "Remove atom %p from kctx %p; now %d in ctx\n", + (void *)atom, (void *)kctx, js_kctx_info->ctx.nr_jobs); /* Setting atom status back to queued as it still has unresolved * dependencies */ atom->status = KBASE_JD_ATOM_STATE_QUEUED; + dev_dbg(kbdev->dev, "Atom %p status to queued\n", (void *)atom); /* Undo the count, as the atom will get added again later but * leave the context priority adjusted or boosted, in case if @@ -1172,8 +1374,8 @@ bool kbasep_js_add_job(struct kbase_context *kctx, enqueue_required = kbase_js_dep_resolved_submit(kctx, atom); - KBASE_TRACE_ADD_REFCOUNT(kbdev, JS_ADD_JOB, kctx, atom, atom->jc, - kbasep_js_trace_get_refcnt(kbdev, kctx)); + KBASE_KTRACE_ADD_JM_REFCOUNT(kbdev, JS_ADD_JOB, kctx, atom, atom->jc, + kbase_ktrace_get_ctx_refcnt(kctx)); /* Context Attribute Refcounting */ kbasep_js_ctx_attr_ctx_retain_atom(kbdev, kctx, atom); @@ -1217,6 +1419,9 @@ bool kbasep_js_add_job(struct kbase_context *kctx, } } out_unlock: + dev_dbg(kbdev->dev, "Enqueue of kctx %p is %srequired\n", + kctx, enqueue_required ? "" : "not "); + mutex_unlock(&js_kctx_info->ctx.jsctx_mutex); mutex_unlock(&js_devdata->queue_mutex); @@ -1236,12 +1441,15 @@ void kbasep_js_remove_job(struct kbase_device *kbdev, js_kctx_info = &kctx->jctx.sched_info; - KBASE_TRACE_ADD_REFCOUNT(kbdev, JS_REMOVE_JOB, kctx, atom, atom->jc, - kbasep_js_trace_get_refcnt(kbdev, kctx)); + KBASE_KTRACE_ADD_JM_REFCOUNT(kbdev, JS_REMOVE_JOB, kctx, atom, atom->jc, + kbase_ktrace_get_ctx_refcnt(kctx)); /* De-refcount ctx.nr_jobs */ KBASE_DEBUG_ASSERT(js_kctx_info->ctx.nr_jobs > 0); --(js_kctx_info->ctx.nr_jobs); + dev_dbg(kbdev->dev, + "Remove atom %p from kctx %p; now %d in ctx\n", + (void *)atom, (void *)kctx, js_kctx_info->ctx.nr_jobs); spin_lock_irqsave(&kbdev->hwaccess_lock, flags); if (--kctx->atoms_count[atom->sched_priority] == 0) @@ -1278,44 +1486,6 @@ bool kbasep_js_remove_cancelled_job(struct kbase_device *kbdev, return attr_state_changed; } -bool kbasep_js_runpool_retain_ctx(struct kbase_device *kbdev, - struct kbase_context *kctx) -{ - unsigned long flags; - bool result; - - KBASE_DEBUG_ASSERT(kbdev != NULL); - - mutex_lock(&kbdev->mmu_hw_mutex); - spin_lock_irqsave(&kbdev->hwaccess_lock, flags); - result = kbasep_js_runpool_retain_ctx_nolock(kbdev, kctx); - spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); - mutex_unlock(&kbdev->mmu_hw_mutex); - - return result; -} - -struct kbase_context *kbasep_js_runpool_lookup_ctx(struct kbase_device *kbdev, - int as_nr) -{ - unsigned long flags; - struct kbase_context *found_kctx = NULL; - - KBASE_DEBUG_ASSERT(kbdev != NULL); - KBASE_DEBUG_ASSERT(0 <= as_nr && as_nr < BASE_MAX_NR_AS); - - spin_lock_irqsave(&kbdev->hwaccess_lock, flags); - - found_kctx = kbdev->as_to_kctx[as_nr]; - - if (found_kctx != NULL) - kbase_ctx_sched_retain_ctx_refcount(found_kctx); - - spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); - - return found_kctx; -} - /** * kbasep_js_run_jobs_after_ctx_and_atom_release - Try running more jobs after * releasing a context and/or atom @@ -1358,7 +1528,7 @@ static kbasep_js_release_result kbasep_js_run_jobs_after_ctx_and_atom_release( * run more jobs than before */ result = KBASEP_JS_RELEASE_RESULT_SCHED_ALL; - KBASE_TRACE_ADD_SLOT(kbdev, JD_DONE_TRY_RUN_NEXT_JOB, + KBASE_KTRACE_ADD_JM_SLOT(kbdev, JD_DONE_TRY_RUN_NEXT_JOB, kctx, NULL, 0u, 0); } return result; @@ -1436,10 +1606,10 @@ static kbasep_js_release_result kbasep_js_runpool_release_ctx_internal( runpool_ctx_attr_change |= kbasep_js_ctx_attr_ctx_release_atom( kbdev, kctx, katom_retained_state); - KBASE_TRACE_ADD_REFCOUNT(kbdev, JS_RELEASE_CTX, kctx, NULL, 0u, - new_ref_count); - if (new_ref_count == 2 && kbase_ctx_flag(kctx, KCTX_PRIVILEGED) && +#ifdef CONFIG_MALI_ARBITER_SUPPORT + !kbase_pm_is_gpu_lost(kbdev) && +#endif !kbase_pm_is_suspending(kbdev)) { /* Context is kept scheduled into an address space even when * there are no jobs, in this case we have to handle the @@ -1457,7 +1627,10 @@ static kbasep_js_release_result kbasep_js_runpool_release_ctx_internal( * which was previously acquired by kbasep_js_schedule_ctx(). */ if (new_ref_count == 1 && (!kbasep_js_is_submit_allowed(js_devdata, kctx) || - kbdev->pm.suspending)) { +#ifdef CONFIG_MALI_ARBITER_SUPPORT + kbase_pm_is_gpu_lost(kbdev) || +#endif + kbase_pm_is_suspending(kbdev))) { int num_slots = kbdev->gpu_props.num_job_slots; int slot; @@ -1472,8 +1645,11 @@ static kbasep_js_release_result kbasep_js_runpool_release_ctx_internal( kbase_backend_release_ctx_irq(kbdev, kctx); for (slot = 0; slot < num_slots; slot++) { - if (kbdev->hwaccess.active_kctx[slot] == kctx) + if (kbdev->hwaccess.active_kctx[slot] == kctx) { + dev_dbg(kbdev->dev, "Marking kctx %p as inactive (s:%d)\n", + (void *)kctx, slot); kbdev->hwaccess.active_kctx[slot] = NULL; + } } /* Ctx Attribute handling @@ -1675,6 +1851,8 @@ static bool kbasep_js_schedule_ctx(struct kbase_device *kbdev, bool kctx_suspended = false; int as_nr; + dev_dbg(kbdev->dev, "Scheduling kctx %p (s:%d)\n", kctx, js); + js_devdata = &kbdev->js_data; js_kctx_info = &kctx->jctx.sched_info; @@ -1723,9 +1901,9 @@ static bool kbasep_js_schedule_ctx(struct kbase_device *kbdev, return false; } - KBASE_TRACE_ADD_REFCOUNT(kbdev, JS_TRY_SCHEDULE_HEAD_CTX, kctx, NULL, + KBASE_KTRACE_ADD_JM_REFCOUNT(kbdev, JS_TRY_SCHEDULE_HEAD_CTX, kctx, NULL, 0u, - kbasep_js_trace_get_refcnt(kbdev, kctx)); + kbase_ktrace_get_ctx_refcnt(kctx)); kbase_ctx_flag_set(kctx, KCTX_SCHEDULED); @@ -1762,11 +1940,15 @@ static bool kbasep_js_schedule_ctx(struct kbase_device *kbdev, * kbasep_js_suspend() code will cleanup this context instead (by virtue * of it being called strictly after the suspend flag is set, and will * wait for this lock to drop) */ +#ifdef CONFIG_MALI_ARBITER_SUPPORT + if (kbase_pm_is_suspending(kbdev) || kbase_pm_is_gpu_lost(kbdev)) { +#else if (kbase_pm_is_suspending(kbdev)) { +#endif /* Cause it to leave at some later point */ bool retained; - retained = kbasep_js_runpool_retain_ctx_nolock(kbdev, kctx); + retained = kbase_ctx_sched_inc_refcount_nolock(kctx); KBASE_DEBUG_ASSERT(retained); kbasep_js_clear_submit_allowed(js_devdata, kctx); @@ -1808,7 +1990,11 @@ static bool kbase_js_use_ctx(struct kbase_device *kbdev, if (kbase_ctx_flag(kctx, KCTX_SCHEDULED) && kbase_backend_use_ctx_sched(kbdev, kctx, js)) { - /* Context already has ASID - mark as active */ + + dev_dbg(kbdev->dev, + "kctx %p already has ASID - mark as active (s:%d)\n", + (void *)kctx, js); + if (kbdev->hwaccess.active_kctx[js] != kctx) { kbdev->hwaccess.active_kctx[js] = kctx; kbase_ctx_flag_clear(kctx, @@ -1835,9 +2021,30 @@ void kbasep_js_schedule_privileged_ctx(struct kbase_device *kbdev, js_devdata = &kbdev->js_data; js_kctx_info = &kctx->jctx.sched_info; - /* This must never be attempted whilst suspending - i.e. it should only - * happen in response to a syscall from a user-space thread */ - BUG_ON(kbase_pm_is_suspending(kbdev)); +#ifdef CONFIG_MALI_ARBITER_SUPPORT + /* This should only happen in response to a system call + * from a user-space thread. + * In a non-arbitrated environment this can never happen + * whilst suspending. + * + * In an arbitrated environment, user-space threads can run + * while we are suspended (for example GPU not available + * to this VM), however in that case we will block on + * the wait event for KCTX_SCHEDULED, since no context + * can be scheduled until we have the GPU again. + */ + if (kbdev->arb.arb_if == NULL) + if (WARN_ON(kbase_pm_is_suspending(kbdev))) + return; +#else + /* This should only happen in response to a system call + * from a user-space thread. + * In a non-arbitrated environment this can never happen + * whilst suspending. + */ + if (WARN_ON(kbase_pm_is_suspending(kbdev))) + return; +#endif mutex_lock(&js_devdata->queue_mutex); mutex_lock(&js_kctx_info->ctx.jsctx_mutex); @@ -1865,7 +2072,7 @@ void kbasep_js_schedule_privileged_ctx(struct kbase_device *kbdev, } else { /* Already scheduled in - We need to retain it to keep the * corresponding address space */ - WARN_ON(!kbasep_js_runpool_retain_ctx(kbdev, kctx)); + WARN_ON(!kbase_ctx_sched_inc_refcount(kctx)); mutex_unlock(&js_kctx_info->ctx.jsctx_mutex); mutex_unlock(&js_devdata->queue_mutex); } @@ -1898,7 +2105,6 @@ void kbasep_js_suspend(struct kbase_device *kbdev) struct kbasep_js_device_data *js_devdata; int i; u16 retained = 0u; - int nr_privileged_ctx = 0; KBASE_DEBUG_ASSERT(kbdev); KBASE_DEBUG_ASSERT(kbase_pm_is_suspending(kbdev)); @@ -1919,16 +2125,14 @@ void kbasep_js_suspend(struct kbase_device *kbdev) if (kctx && !(kbdev->as_free & (1u << i))) { kbase_ctx_sched_retain_ctx_refcount(kctx); retained |= 1u; - /* We can only cope with up to 1 privileged context - - * the instrumented context. It'll be suspended by - * disabling instrumentation */ - if (kbase_ctx_flag(kctx, KCTX_PRIVILEGED)) { - ++nr_privileged_ctx; - WARN_ON(nr_privileged_ctx != 1); - } + /* This loop will not have an effect on the privileged + * contexts as they would have an extra ref count + * compared to the normal contexts, so they will hold + * on to their address spaces. MMU will re-enabled for + * them on resume. + */ } } - CSTD_UNUSED(nr_privileged_ctx); spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); @@ -1958,10 +2162,12 @@ void kbasep_js_resume(struct kbase_device *kbdev) mutex_lock(&js_devdata->queue_mutex); for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) { - for (prio = 0; prio < KBASE_JS_ATOM_SCHED_PRIO_COUNT; prio++) { + for (prio = KBASE_JS_ATOM_SCHED_PRIO_HIGH; + prio < KBASE_JS_ATOM_SCHED_PRIO_COUNT; prio++) { struct kbase_context *kctx, *n; unsigned long flags; +#ifndef CONFIG_MALI_ARBITER_SUPPORT spin_lock_irqsave(&kbdev->hwaccess_lock, flags); list_for_each_entry_safe(kctx, n, @@ -1985,9 +2191,13 @@ void kbasep_js_resume(struct kbase_device *kbdev) timer_sync = kbase_js_ctx_list_add_pullable_nolock( kbdev, kctx, js); - spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + + spin_unlock_irqrestore(&kbdev->hwaccess_lock, + flags); + if (timer_sync) kbase_backend_ctx_count_changed(kbdev); + mutex_unlock(&js_devdata->runpool_mutex); mutex_unlock(&js_kctx_info->ctx.jsctx_mutex); @@ -1995,6 +2205,30 @@ void kbasep_js_resume(struct kbase_device *kbdev) spin_lock_irqsave(&kbdev->hwaccess_lock, flags); } spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); +#else + bool timer_sync = false; + + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + + list_for_each_entry_safe(kctx, n, + &kbdev->js_data.ctx_list_unpullable[js][prio], + jctx.sched_info.ctx.ctx_list_entry[js]) { + + if (!kbase_ctx_flag(kctx, KCTX_SCHEDULED) && + kbase_js_ctx_pullable(kctx, js, false)) + timer_sync |= + kbase_js_ctx_list_add_pullable_nolock( + kbdev, kctx, js); + } + + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + + if (timer_sync) { + mutex_lock(&js_devdata->runpool_mutex); + kbase_backend_ctx_count_changed(kbdev); + mutex_unlock(&js_devdata->runpool_mutex); + } +#endif } } mutex_unlock(&js_devdata->queue_mutex); @@ -2041,7 +2275,7 @@ static int kbase_js_get_slot(struct kbase_device *kbdev, bool kbase_js_dep_resolved_submit(struct kbase_context *kctx, struct kbase_jd_atom *katom) { - bool enqueue_required; + bool enqueue_required, add_required = true; katom->slot_nr = kbase_js_get_slot(kctx->kbdev, katom); @@ -2055,6 +2289,7 @@ bool kbase_js_dep_resolved_submit(struct kbase_context *kctx, } else { enqueue_required = false; } + if ((katom->atom_flags & KBASE_KATOM_FLAG_X_DEP_BLOCKED) || (katom->pre_dep && (katom->pre_dep->atom_flags & KBASE_KATOM_FLAG_JSCTX_IN_X_DEP_LIST))) { @@ -2062,10 +2297,21 @@ bool kbase_js_dep_resolved_submit(struct kbase_context *kctx, int js = katom->slot_nr; struct jsctx_queue *queue = &kctx->jsctx_queue[prio][js]; + dev_dbg(kctx->kbdev->dev, "Add atom %p to X_DEP list (s:%d)\n", + (void *)katom, js); + list_add_tail(&katom->queue, &queue->x_dep_head); katom->atom_flags |= KBASE_KATOM_FLAG_JSCTX_IN_X_DEP_LIST; - enqueue_required = false; + if (kbase_js_atom_blocked_on_x_dep(katom)) { + enqueue_required = false; + add_required = false; + } } else { + dev_dbg(kctx->kbdev->dev, "Atom %p not added to X_DEP list\n", + (void *)katom); + } + + if (add_required) { /* Check if there are lower priority jobs to soft stop */ kbase_job_slot_ctx_priority_check_locked(kctx, katom); @@ -2074,6 +2320,10 @@ bool kbase_js_dep_resolved_submit(struct kbase_context *kctx, katom->atom_flags |= KBASE_KATOM_FLAG_JSCTX_IN_TREE; } + dev_dbg(kctx->kbdev->dev, + "Enqueue of kctx %p is %srequired to submit atom %p\n", + kctx, enqueue_required ? "" : "not ", katom); + return enqueue_required; } @@ -2088,19 +2338,36 @@ bool kbase_js_dep_resolved_submit(struct kbase_context *kctx, */ static void kbase_js_move_to_tree(struct kbase_jd_atom *katom) { - lockdep_assert_held(&katom->kctx->kbdev->hwaccess_lock); + struct kbase_context *const kctx = katom->kctx; + + lockdep_assert_held(&kctx->kbdev->hwaccess_lock); while (katom) { WARN_ON(!(katom->atom_flags & KBASE_KATOM_FLAG_JSCTX_IN_X_DEP_LIST)); - if (!(katom->atom_flags & KBASE_KATOM_FLAG_X_DEP_BLOCKED)) { + if (!kbase_js_atom_blocked_on_x_dep(katom)) { + dev_dbg(kctx->kbdev->dev, + "Del atom %p from X_DEP list in js_move_to_tree\n", + (void *)katom); + list_del(&katom->queue); katom->atom_flags &= ~KBASE_KATOM_FLAG_JSCTX_IN_X_DEP_LIST; - jsctx_tree_add(katom->kctx, katom); - katom->atom_flags |= KBASE_KATOM_FLAG_JSCTX_IN_TREE; + /* For incremental rendering, an end-of-renderpass atom + * may have had its dependency on start-of-renderpass + * ignored and may therefore already be in the tree. + */ + if (!(katom->atom_flags & + KBASE_KATOM_FLAG_JSCTX_IN_TREE)) { + jsctx_tree_add(kctx, katom); + katom->atom_flags |= + KBASE_KATOM_FLAG_JSCTX_IN_TREE; + } } else { + dev_dbg(kctx->kbdev->dev, + "Atom %p blocked on x-dep in js_move_to_tree\n", + (void *)katom); break; } @@ -2142,6 +2409,11 @@ static void kbase_js_evict_deps(struct kbase_context *kctx, KBASE_KATOM_FLAG_JSCTX_IN_X_DEP_LIST))) { /* Remove dependency.*/ x_dep->atom_flags &= ~KBASE_KATOM_FLAG_X_DEP_BLOCKED; + trace_sysgraph(SGR_DEP_RES, kctx->id, + kbase_jd_atom_id(kctx, x_dep)); + + dev_dbg(kctx->kbdev->dev, "Cleared X_DEP flag on atom %p\n", + (void *)x_dep); /* Fail if it had a data dependency. */ if (x_dep->atom_flags & KBASE_KATOM_FLAG_FAIL_BLOCKER) { @@ -2162,22 +2434,41 @@ struct kbase_jd_atom *kbase_js_pull(struct kbase_context *kctx, int js) KBASE_DEBUG_ASSERT(kctx); kbdev = kctx->kbdev; + dev_dbg(kbdev->dev, "JS: pulling an atom from kctx %p (s:%d)\n", + (void *)kctx, js); js_devdata = &kbdev->js_data; lockdep_assert_held(&kbdev->hwaccess_lock); - if (!kbasep_js_is_submit_allowed(js_devdata, kctx)) + if (!kbasep_js_is_submit_allowed(js_devdata, kctx)) { + dev_dbg(kbdev->dev, "JS: No submit allowed for kctx %p\n", + (void *)kctx); return NULL; + } +#ifdef CONFIG_MALI_ARBITER_SUPPORT + if (kbase_pm_is_suspending(kbdev) || kbase_pm_is_gpu_lost(kbdev)) +#else if (kbase_pm_is_suspending(kbdev)) +#endif return NULL; katom = jsctx_rb_peek(kctx, js); - if (!katom) + if (!katom) { + dev_dbg(kbdev->dev, "JS: No pullable atom in kctx %p (s:%d)\n", + (void *)kctx, js); return NULL; - if (kctx->blocked_js[js][katom->sched_priority]) + } + if (kctx->blocked_js[js][katom->sched_priority]) { + dev_dbg(kbdev->dev, + "JS: kctx %p is blocked from submitting atoms at priority %d (s:%d)\n", + (void *)kctx, katom->sched_priority, js); return NULL; - if (atomic_read(&katom->blocked)) + } + if (atomic_read(&katom->blocked)) { + dev_dbg(kbdev->dev, "JS: Atom %p is blocked in js_pull\n", + (void *)katom); return NULL; + } /* Due to ordering restrictions when unpulling atoms on failure, we do * not allow multiple runs of fail-dep atoms from the same context to be @@ -2190,14 +2481,22 @@ struct kbase_jd_atom *kbase_js_pull(struct kbase_context *kctx, int js) return NULL; } - if (katom->atom_flags & KBASE_KATOM_FLAG_X_DEP_BLOCKED) { + if (kbase_js_atom_blocked_on_x_dep(katom)) { if (katom->x_pre_dep->gpu_rb_state == - KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB || - katom->x_pre_dep->will_fail_event_code) + KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB || + katom->x_pre_dep->will_fail_event_code) { + dev_dbg(kbdev->dev, + "JS: X pre-dep %p is not present in slot FIFO or will fail\n", + (void *)katom->x_pre_dep); return NULL; + } if ((katom->atom_flags & KBASE_KATOM_FLAG_FAIL_BLOCKER) && - kbase_backend_nr_atoms_on_slot(kbdev, js)) + kbase_backend_nr_atoms_on_slot(kbdev, js)) { + dev_dbg(kbdev->dev, + "JS: Atom %p has cross-slot fail dependency and atoms on slot (s:%d)\n", + (void *)katom, js); return NULL; + } } kbase_ctx_flag_set(kctx, KCTX_PULLED); @@ -2213,15 +2512,220 @@ struct kbase_jd_atom *kbase_js_pull(struct kbase_context *kctx, int js) kctx->atoms_pulled_slot_pri[katom->slot_nr][katom->sched_priority]++; jsctx_rb_pull(kctx, katom); - kbasep_js_runpool_retain_ctx_nolock(kbdev, kctx); + kbase_ctx_sched_retain_ctx_refcount(kctx); katom->atom_flags |= KBASE_KATOM_FLAG_HOLDING_CTX_REF; katom->ticks = 0; + dev_dbg(kbdev->dev, "JS: successfully pulled atom %p from kctx %p (s:%d)\n", + (void *)katom, (void *)kctx, js); + return katom; } +/** + * js_return_of_start_rp() - Handle soft-stop of an atom that starts a + * renderpass + * @start_katom: Pointer to the start-of-renderpass atom that was soft-stopped + * + * This function is called to switch to incremental rendering if the tiler job + * chain at the start of a renderpass has used too much memory. It prevents the + * tiler job being pulled for execution in the job scheduler again until the + * next phase of incremental rendering is complete. + * + * If the end-of-renderpass atom is already in the job scheduler (because a + * previous attempt at tiling used too much memory during the same renderpass) + * then it is unblocked; otherwise, it is run by handing it to the scheduler. + */ +static void js_return_of_start_rp(struct kbase_jd_atom *const start_katom) +{ + struct kbase_context *const kctx = start_katom->kctx; + struct kbase_device *const kbdev = kctx->kbdev; + struct kbase_jd_renderpass *rp; + struct kbase_jd_atom *end_katom; + unsigned long flags; + + lockdep_assert_held(&kctx->jctx.lock); + + if (WARN_ON(!(start_katom->core_req & BASE_JD_REQ_START_RENDERPASS))) + return; + + compiletime_assert((1ull << (sizeof(start_katom->renderpass_id) * 8)) <= + ARRAY_SIZE(kctx->jctx.renderpasses), + "Should check invalid access to renderpasses"); + + rp = &kctx->jctx.renderpasses[start_katom->renderpass_id]; + + if (WARN_ON(rp->start_katom != start_katom)) + return; + + dev_dbg(kctx->kbdev->dev, + "JS return start atom %p in state %d of RP %d\n", + (void *)start_katom, (int)rp->state, + start_katom->renderpass_id); + + if (WARN_ON(rp->state == KBASE_JD_RP_COMPLETE)) + return; + + /* The tiler job might have been soft-stopped for some reason other + * than running out of memory. + */ + if (rp->state == KBASE_JD_RP_START || rp->state == KBASE_JD_RP_RETRY) { + dev_dbg(kctx->kbdev->dev, + "JS return isn't OOM in state %d of RP %d\n", + (int)rp->state, start_katom->renderpass_id); + return; + } + + dev_dbg(kctx->kbdev->dev, + "JS return confirm OOM in state %d of RP %d\n", + (int)rp->state, start_katom->renderpass_id); + + if (WARN_ON(rp->state != KBASE_JD_RP_PEND_OOM && + rp->state != KBASE_JD_RP_RETRY_PEND_OOM)) + return; + + /* Prevent the tiler job being pulled for execution in the + * job scheduler again. + */ + dev_dbg(kbdev->dev, "Blocking start atom %p\n", + (void *)start_katom); + atomic_inc(&start_katom->blocked); + + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + + rp->state = (rp->state == KBASE_JD_RP_PEND_OOM) ? + KBASE_JD_RP_OOM : KBASE_JD_RP_RETRY_OOM; + + /* Was the fragment job chain submitted to kbase yet? */ + end_katom = rp->end_katom; + if (end_katom) { + dev_dbg(kctx->kbdev->dev, "JS return add end atom %p\n", + (void *)end_katom); + + if (rp->state == KBASE_JD_RP_RETRY_OOM) { + /* Allow the end of the renderpass to be pulled for + * execution again to continue incremental rendering. + */ + dev_dbg(kbdev->dev, "Unblocking end atom %p\n", + (void *)end_katom); + atomic_dec(&end_katom->blocked); + WARN_ON(!(end_katom->atom_flags & + KBASE_KATOM_FLAG_JSCTX_IN_TREE)); + WARN_ON(end_katom->status != KBASE_JD_ATOM_STATE_IN_JS); + + kbase_js_ctx_list_add_pullable_nolock(kbdev, kctx, + end_katom->slot_nr); + + /* Expect the fragment job chain to be scheduled without + * further action because this function is called when + * returning an atom to the job scheduler ringbuffer. + */ + end_katom = NULL; + } else { + WARN_ON(end_katom->status != + KBASE_JD_ATOM_STATE_QUEUED && + end_katom->status != KBASE_JD_ATOM_STATE_IN_JS); + } + } + + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + + if (end_katom) + kbase_jd_dep_clear_locked(end_katom); +} + +/** + * js_return_of_end_rp() - Handle completion of an atom that ends a renderpass + * @end_katom: Pointer to the end-of-renderpass atom that was completed + * + * This function is called to continue incremental rendering if the tiler job + * chain at the start of a renderpass used too much memory. It resets the + * mechanism for detecting excessive memory usage then allows the soft-stopped + * tiler job chain to be pulled for execution again. + * + * The start-of-renderpass atom must already been submitted to kbase. + */ +static void js_return_of_end_rp(struct kbase_jd_atom *const end_katom) +{ + struct kbase_context *const kctx = end_katom->kctx; + struct kbase_device *const kbdev = kctx->kbdev; + struct kbase_jd_renderpass *rp; + struct kbase_jd_atom *start_katom; + unsigned long flags; + + lockdep_assert_held(&kctx->jctx.lock); + + if (WARN_ON(!(end_katom->core_req & BASE_JD_REQ_END_RENDERPASS))) + return; + + compiletime_assert((1ull << (sizeof(end_katom->renderpass_id) * 8)) <= + ARRAY_SIZE(kctx->jctx.renderpasses), + "Should check invalid access to renderpasses"); + + rp = &kctx->jctx.renderpasses[end_katom->renderpass_id]; + + if (WARN_ON(rp->end_katom != end_katom)) + return; + + dev_dbg(kctx->kbdev->dev, + "JS return end atom %p in state %d of RP %d\n", + (void *)end_katom, (int)rp->state, end_katom->renderpass_id); + + if (WARN_ON(rp->state != KBASE_JD_RP_OOM && + rp->state != KBASE_JD_RP_RETRY_OOM)) + return; + + /* Reduce the number of mapped pages in the memory regions that + * triggered out-of-memory last time so that we can detect excessive + * memory usage again. + */ + kbase_gpu_vm_lock(kctx); + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + + while (!list_empty(&rp->oom_reg_list)) { + struct kbase_va_region *reg = + list_first_entry(&rp->oom_reg_list, + struct kbase_va_region, link); + + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + + dev_dbg(kbdev->dev, + "Reset backing to %zu pages for region %p\n", + reg->threshold_pages, (void *)reg); + + if (!WARN_ON(reg->flags & KBASE_REG_VA_FREED)) + kbase_mem_shrink(kctx, reg, reg->threshold_pages); + + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + dev_dbg(kbdev->dev, "Deleting region %p from list\n", + (void *)reg); + list_del_init(®->link); + kbase_va_region_alloc_put(kctx, reg); + } + + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + kbase_gpu_vm_unlock(kctx); + + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + rp->state = KBASE_JD_RP_RETRY; + dev_dbg(kbdev->dev, "Changed state to %d for retry\n", rp->state); + + /* Allow the start of the renderpass to be pulled for execution again + * to begin/continue incremental rendering. + */ + start_katom = rp->start_katom; + if (!WARN_ON(!start_katom)) { + dev_dbg(kbdev->dev, "Unblocking start atom %p\n", + (void *)start_katom); + atomic_dec(&start_katom->blocked); + (void)kbase_js_ctx_list_add_pullable_head_nolock(kbdev, kctx, + start_katom->slot_nr); + } + + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); +} static void js_return_worker(struct work_struct *data) { @@ -2239,7 +2743,11 @@ static void js_return_worker(struct work_struct *data) unsigned long flags; base_jd_core_req core_req = katom->core_req; - KBASE_TLSTREAM_TL_EVENT_ATOM_SOFTSTOP_EX(kbdev, katom); + dev_dbg(kbdev->dev, "%s for atom %p with event code 0x%x\n", + __func__, (void *)katom, katom->event_code); + + if (katom->event_code != BASE_JD_EVENT_END_RP_DONE) + KBASE_TLSTREAM_TL_EVENT_ATOM_SOFTSTOP_EX(kbdev, katom); kbase_backend_complete_wq(kbdev, katom); @@ -2251,7 +2759,8 @@ static void js_return_worker(struct work_struct *data) atomic_dec(&kctx->atoms_pulled); atomic_dec(&kctx->atoms_pulled_slot[js]); - atomic_dec(&katom->blocked); + if (katom->event_code != BASE_JD_EVENT_END_RP_DONE) + atomic_dec(&katom->blocked); spin_lock_irqsave(&kbdev->hwaccess_lock, flags); @@ -2276,7 +2785,17 @@ static void js_return_worker(struct work_struct *data) } if (!atomic_read(&kctx->atoms_pulled)) { + dev_dbg(kbdev->dev, + "No atoms currently pulled from context %p\n", + (void *)kctx); + if (!kctx->slots_pullable) { + dev_dbg(kbdev->dev, + "Context %p %s counted as runnable\n", + (void *)kctx, + kbase_ctx_flag(kctx, KCTX_RUNNABLE_REF) ? + "is" : "isn't"); + WARN_ON(!kbase_ctx_flag(kctx, KCTX_RUNNABLE_REF)); kbase_ctx_flag_clear(kctx, KCTX_RUNNABLE_REF); atomic_dec(&kbdev->js_data.nr_contexts_runnable); @@ -2307,6 +2826,11 @@ static void js_return_worker(struct work_struct *data) spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); if (context_idle) { + dev_dbg(kbdev->dev, + "Context %p %s counted as active\n", + (void *)kctx, + kbase_ctx_flag(kctx, KCTX_ACTIVE) ? + "is" : "isn't"); WARN_ON(!kbase_ctx_flag(kctx, KCTX_ACTIVE)); kbase_ctx_flag_clear(kctx, KCTX_ACTIVE); kbase_pm_context_idle(kbdev); @@ -2318,7 +2842,21 @@ static void js_return_worker(struct work_struct *data) mutex_unlock(&js_kctx_info->ctx.jsctx_mutex); mutex_unlock(&js_devdata->queue_mutex); + if (katom->core_req & BASE_JD_REQ_START_RENDERPASS) { + mutex_lock(&kctx->jctx.lock); + js_return_of_start_rp(katom); + mutex_unlock(&kctx->jctx.lock); + } else if (katom->event_code == BASE_JD_EVENT_END_RP_DONE) { + mutex_lock(&kctx->jctx.lock); + js_return_of_end_rp(katom); + mutex_unlock(&kctx->jctx.lock); + } + katom->atom_flags &= ~KBASE_KATOM_FLAG_HOLDING_CTX_REF; + dev_dbg(kbdev->dev, "JS: retained state %s finished", + kbasep_js_has_atom_finished(&retained_state) ? + "has" : "hasn't"); + WARN_ON(kbasep_js_has_atom_finished(&retained_state)); kbasep_js_runpool_release_ctx_and_katom_retained_state(kbdev, kctx, @@ -2327,10 +2865,16 @@ static void js_return_worker(struct work_struct *data) kbase_js_sched_all(kbdev); kbase_backend_complete_wq_post_sched(kbdev, core_req); + + dev_dbg(kbdev->dev, "Leaving %s for atom %p\n", + __func__, (void *)katom); } void kbase_js_unpull(struct kbase_context *kctx, struct kbase_jd_atom *katom) { + dev_dbg(kctx->kbdev->dev, "Unpulling atom %p in kctx %p\n", + (void *)katom, (void *)kctx); + lockdep_assert_held(&kctx->kbdev->hwaccess_lock); jsctx_rb_unpull(kctx, katom); @@ -2346,6 +2890,157 @@ void kbase_js_unpull(struct kbase_context *kctx, struct kbase_jd_atom *katom) queue_work(kctx->jctx.job_done_wq, &katom->work); } +/** + * js_complete_start_rp() - Handle completion of atom that starts a renderpass + * @kctx: Context pointer + * @start_katom: Pointer to the atom that completed + * + * Put any references to virtual memory regions that might have been added by + * kbase_job_slot_softstop_start_rp() because the tiler job chain completed + * despite any pending soft-stop request. + * + * If the atom that just completed was soft-stopped during a previous attempt to + * run it then there should be a blocked end-of-renderpass atom waiting for it, + * which we must unblock to process the output of the tiler job chain. + * + * Return: true if caller should call kbase_backend_ctx_count_changed() + */ +static bool js_complete_start_rp(struct kbase_context *kctx, + struct kbase_jd_atom *const start_katom) +{ + struct kbase_device *const kbdev = kctx->kbdev; + struct kbase_jd_renderpass *rp; + bool timer_sync = false; + + lockdep_assert_held(&kctx->jctx.lock); + + if (WARN_ON(!(start_katom->core_req & BASE_JD_REQ_START_RENDERPASS))) + return false; + + compiletime_assert((1ull << (sizeof(start_katom->renderpass_id) * 8)) <= + ARRAY_SIZE(kctx->jctx.renderpasses), + "Should check invalid access to renderpasses"); + + rp = &kctx->jctx.renderpasses[start_katom->renderpass_id]; + + if (WARN_ON(rp->start_katom != start_katom)) + return false; + + dev_dbg(kctx->kbdev->dev, + "Start atom %p is done in state %d of RP %d\n", + (void *)start_katom, (int)rp->state, + start_katom->renderpass_id); + + if (WARN_ON(rp->state == KBASE_JD_RP_COMPLETE)) + return false; + + if (rp->state == KBASE_JD_RP_PEND_OOM || + rp->state == KBASE_JD_RP_RETRY_PEND_OOM) { + unsigned long flags; + + dev_dbg(kctx->kbdev->dev, + "Start atom %p completed before soft-stop\n", + (void *)start_katom); + + kbase_gpu_vm_lock(kctx); + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + + while (!list_empty(&rp->oom_reg_list)) { + struct kbase_va_region *reg = + list_first_entry(&rp->oom_reg_list, + struct kbase_va_region, link); + + WARN_ON(reg->flags & KBASE_REG_VA_FREED); + dev_dbg(kctx->kbdev->dev, "Deleting region %p from list\n", + (void *)reg); + list_del_init(®->link); + kbase_va_region_alloc_put(kctx, reg); + } + + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + kbase_gpu_vm_unlock(kctx); + } else { + dev_dbg(kctx->kbdev->dev, + "Start atom %p did not exceed memory threshold\n", + (void *)start_katom); + + WARN_ON(rp->state != KBASE_JD_RP_START && + rp->state != KBASE_JD_RP_RETRY); + } + + if (rp->state == KBASE_JD_RP_RETRY || + rp->state == KBASE_JD_RP_RETRY_PEND_OOM) { + struct kbase_jd_atom *const end_katom = rp->end_katom; + + if (!WARN_ON(!end_katom)) { + unsigned long flags; + + /* Allow the end of the renderpass to be pulled for + * execution again to continue incremental rendering. + */ + dev_dbg(kbdev->dev, "Unblocking end atom %p!\n", + (void *)end_katom); + atomic_dec(&end_katom->blocked); + + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + timer_sync = kbase_js_ctx_list_add_pullable_nolock( + kbdev, kctx, end_katom->slot_nr); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + } + } + + return timer_sync; +} + +/** + * js_complete_end_rp() - Handle final completion of atom that ends a renderpass + * @kctx: Context pointer + * @end_katom: Pointer to the atom that completed for the last time + * + * This function must only be called if the renderpass actually completed + * without the tiler job chain at the start using too much memory; otherwise + * completion of the end-of-renderpass atom is handled similarly to a soft-stop. + */ +static void js_complete_end_rp(struct kbase_context *kctx, + struct kbase_jd_atom *const end_katom) +{ + struct kbase_device *const kbdev = kctx->kbdev; + unsigned long flags; + struct kbase_jd_renderpass *rp; + + lockdep_assert_held(&kctx->jctx.lock); + + if (WARN_ON(!(end_katom->core_req & BASE_JD_REQ_END_RENDERPASS))) + return; + + compiletime_assert((1ull << (sizeof(end_katom->renderpass_id) * 8)) <= + ARRAY_SIZE(kctx->jctx.renderpasses), + "Should check invalid access to renderpasses"); + + rp = &kctx->jctx.renderpasses[end_katom->renderpass_id]; + + if (WARN_ON(rp->end_katom != end_katom)) + return; + + dev_dbg(kbdev->dev, "End atom %p is done in state %d of RP %d\n", + (void *)end_katom, (int)rp->state, end_katom->renderpass_id); + + if (WARN_ON(rp->state == KBASE_JD_RP_COMPLETE) || + WARN_ON(rp->state == KBASE_JD_RP_OOM) || + WARN_ON(rp->state == KBASE_JD_RP_RETRY_OOM)) + return; + + /* Rendering completed without running out of memory. + */ + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + WARN_ON(!list_empty(&rp->oom_reg_list)); + rp->state = KBASE_JD_RP_COMPLETE; + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + + dev_dbg(kbdev->dev, "Renderpass %d is complete\n", + end_katom->renderpass_id); +} + bool kbase_js_complete_atom_wq(struct kbase_context *kctx, struct kbase_jd_atom *katom) { @@ -2361,6 +3056,16 @@ bool kbase_js_complete_atom_wq(struct kbase_context *kctx, kbdev = kctx->kbdev; atom_slot = katom->slot_nr; + dev_dbg(kbdev->dev, "%s for atom %p (s:%d)\n", + __func__, (void *)katom, atom_slot); + + /* Update the incremental rendering state machine. + */ + if (katom->core_req & BASE_JD_REQ_START_RENDERPASS) + timer_sync |= js_complete_start_rp(kctx, katom); + else if (katom->core_req & BASE_JD_REQ_END_RENDERPASS) + js_complete_end_rp(kctx, katom); + js_kctx_info = &kctx->jctx.sched_info; js_devdata = &kbdev->js_data; @@ -2370,6 +3075,9 @@ bool kbase_js_complete_atom_wq(struct kbase_context *kctx, spin_lock_irqsave(&kbdev->hwaccess_lock, flags); if (katom->atom_flags & KBASE_KATOM_FLAG_JSCTX_IN_TREE) { + dev_dbg(kbdev->dev, "Atom %p is in runnable_tree\n", + (void *)katom); + context_idle = !atomic_dec_return(&kctx->atoms_pulled); atomic_dec(&kctx->atoms_pulled_slot[atom_slot]); kctx->atoms_pulled_slot_pri[atom_slot][prio]--; @@ -2386,6 +3094,10 @@ bool kbase_js_complete_atom_wq(struct kbase_context *kctx, * all atoms have now been processed, then unblock the slot */ if (!kctx->atoms_pulled_slot_pri[atom_slot][prio] && kctx->blocked_js[atom_slot][prio]) { + dev_dbg(kbdev->dev, + "kctx %p is no longer blocked from submitting on slot %d at priority %d\n", + (void *)kctx, atom_slot, prio); + kctx->blocked_js[atom_slot][prio] = false; if (kbase_js_ctx_pullable(kctx, atom_slot, true)) timer_sync |= @@ -2436,17 +3148,79 @@ bool kbase_js_complete_atom_wq(struct kbase_context *kctx, /* Mark context as inactive. The pm reference will be dropped later in * jd_done_worker(). */ - if (context_idle) + if (context_idle) { + dev_dbg(kbdev->dev, "kctx %p is no longer active\n", + (void *)kctx); kbase_ctx_flag_clear(kctx, KCTX_ACTIVE); + } spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); if (timer_sync) kbase_backend_ctx_count_changed(kbdev); mutex_unlock(&js_devdata->runpool_mutex); + dev_dbg(kbdev->dev, "Leaving %s\n", __func__); return context_idle; } +/** + * js_end_rp_is_complete() - Check whether an atom that ends a renderpass has + * completed for the last time. + * + * @end_katom: Pointer to the atom that completed on the hardware. + * + * An atom that ends a renderpass may be run on the hardware several times + * before notifying userspace or allowing dependent atoms to be executed. + * + * This function is used to decide whether or not to allow end-of-renderpass + * atom completion. It only returns false if the atom at the start of the + * renderpass was soft-stopped because it used too much memory during the most + * recent attempt at tiling. + * + * Return: True if the atom completed for the last time. + */ +static bool js_end_rp_is_complete(struct kbase_jd_atom *const end_katom) +{ + struct kbase_context *const kctx = end_katom->kctx; + struct kbase_device *const kbdev = kctx->kbdev; + struct kbase_jd_renderpass *rp; + + lockdep_assert_held(&kctx->kbdev->hwaccess_lock); + + if (WARN_ON(!(end_katom->core_req & BASE_JD_REQ_END_RENDERPASS))) + return true; + + compiletime_assert((1ull << (sizeof(end_katom->renderpass_id) * 8)) <= + ARRAY_SIZE(kctx->jctx.renderpasses), + "Should check invalid access to renderpasses"); + + rp = &kctx->jctx.renderpasses[end_katom->renderpass_id]; + + if (WARN_ON(rp->end_katom != end_katom)) + return true; + + dev_dbg(kbdev->dev, + "JS complete end atom %p in state %d of RP %d\n", + (void *)end_katom, (int)rp->state, + end_katom->renderpass_id); + + if (WARN_ON(rp->state == KBASE_JD_RP_COMPLETE)) + return true; + + /* Failure of end-of-renderpass atoms must not return to the + * start of the renderpass. + */ + if (end_katom->event_code != BASE_JD_EVENT_DONE) + return true; + + if (rp->state != KBASE_JD_RP_OOM && + rp->state != KBASE_JD_RP_RETRY_OOM) + return true; + + dev_dbg(kbdev->dev, "Suppressing end atom completion\n"); + return false; +} + struct kbase_jd_atom *kbase_js_complete_atom(struct kbase_jd_atom *katom, ktime_t *end_timestamp) { @@ -2455,14 +3229,23 @@ struct kbase_jd_atom *kbase_js_complete_atom(struct kbase_jd_atom *katom, struct kbase_jd_atom *x_dep = katom->x_post_dep; kbdev = kctx->kbdev; - + dev_dbg(kbdev->dev, "Atom %p complete in kctx %p (post-dep %p)\n", + (void *)katom, (void *)kctx, (void *)x_dep); lockdep_assert_held(&kctx->kbdev->hwaccess_lock); + if ((katom->core_req & BASE_JD_REQ_END_RENDERPASS) && + !js_end_rp_is_complete(katom)) { + katom->event_code = BASE_JD_EVENT_END_RP_DONE; + kbase_js_unpull(kctx, katom); + return NULL; + } + if (katom->will_fail_event_code) katom->event_code = katom->will_fail_event_code; katom->status = KBASE_JD_ATOM_STATE_HW_COMPLETED; + dev_dbg(kbdev->dev, "Atom %p status to HW completed\n", (void *)katom); if (katom->event_code != BASE_JD_EVENT_DONE) { kbase_js_evict_deps(kctx, katom, katom->slot_nr, @@ -2472,28 +3255,111 @@ struct kbase_jd_atom *kbase_js_complete_atom(struct kbase_jd_atom *katom, KBASE_TLSTREAM_AUX_EVENT_JOB_SLOT(kbdev, NULL, katom->slot_nr, 0, TL_JS_EVENT_STOP); + trace_sysgraph_gpu(SGR_COMPLETE, kctx->id, + kbase_jd_atom_id(katom->kctx, katom), katom->slot_nr); + kbase_jd_done(katom, katom->slot_nr, end_timestamp, 0); /* Unblock cross dependency if present */ if (x_dep && (katom->event_code == BASE_JD_EVENT_DONE || - !(x_dep->atom_flags & KBASE_KATOM_FLAG_FAIL_BLOCKER)) && - (x_dep->atom_flags & KBASE_KATOM_FLAG_X_DEP_BLOCKED)) { + !(x_dep->atom_flags & KBASE_KATOM_FLAG_FAIL_BLOCKER)) && + (x_dep->atom_flags & KBASE_KATOM_FLAG_JSCTX_IN_X_DEP_LIST)) { bool was_pullable = kbase_js_ctx_pullable(kctx, x_dep->slot_nr, false); x_dep->atom_flags &= ~KBASE_KATOM_FLAG_X_DEP_BLOCKED; + trace_sysgraph(SGR_DEP_RES, kctx->id, + kbase_jd_atom_id(katom->kctx, x_dep)); + dev_dbg(kbdev->dev, "Cleared X_DEP flag on atom %p\n", + (void *)x_dep); + kbase_js_move_to_tree(x_dep); + if (!was_pullable && kbase_js_ctx_pullable(kctx, x_dep->slot_nr, false)) kbase_js_ctx_list_add_pullable_nolock(kbdev, kctx, x_dep->slot_nr); - if (x_dep->atom_flags & KBASE_KATOM_FLAG_JSCTX_IN_TREE) + if (x_dep->atom_flags & KBASE_KATOM_FLAG_JSCTX_IN_TREE) { + dev_dbg(kbdev->dev, "Atom %p is in runnable tree\n", + (void *)x_dep); return x_dep; + } + } else { + dev_dbg(kbdev->dev, + "No cross-slot dep to unblock for atom %p\n", + (void *)katom); } return NULL; } +/** + * kbase_js_atom_blocked_on_x_dep - Decide whether to ignore a cross-slot + * dependency + * @katom: Pointer to an atom in the slot ringbuffer + * + * A cross-slot dependency is ignored if necessary to unblock incremental + * rendering. If the atom at the start of a renderpass used too much memory + * and was soft-stopped then the atom at the end of a renderpass is submitted + * to hardware regardless of its dependency on the start-of-renderpass atom. + * This can happen multiple times for the same pair of atoms. + * + * Return: true to block the atom or false to allow it to be submitted to + * hardware + */ +bool kbase_js_atom_blocked_on_x_dep(struct kbase_jd_atom *const katom) +{ + struct kbase_context *const kctx = katom->kctx; + struct kbase_device *kbdev = kctx->kbdev; + struct kbase_jd_renderpass *rp; + + lockdep_assert_held(&kbdev->hwaccess_lock); + + if (!(katom->atom_flags & + KBASE_KATOM_FLAG_X_DEP_BLOCKED)) { + dev_dbg(kbdev->dev, "Atom %p is not blocked on a cross-slot dependency", + (void *)katom); + return false; + } + + if (!(katom->core_req & BASE_JD_REQ_END_RENDERPASS)) { + dev_dbg(kbdev->dev, "Atom %p is blocked on a cross-slot dependency", + (void *)katom); + return true; + } + + compiletime_assert((1ull << (sizeof(katom->renderpass_id) * 8)) <= + ARRAY_SIZE(kctx->jctx.renderpasses), + "Should check invalid access to renderpasses"); + + rp = &kctx->jctx.renderpasses[katom->renderpass_id]; + /* We can read a subset of renderpass state without holding + * higher-level locks (but not end_katom, for example). + */ + + WARN_ON(rp->state == KBASE_JD_RP_COMPLETE); + + dev_dbg(kbdev->dev, "End atom has cross-slot dep in state %d\n", + (int)rp->state); + + if (rp->state != KBASE_JD_RP_OOM && rp->state != KBASE_JD_RP_RETRY_OOM) + return true; + + /* Tiler ran out of memory so allow the fragment job chain to run + * if it only depends on the tiler job chain. + */ + if (katom->x_pre_dep != rp->start_katom) { + dev_dbg(kbdev->dev, "Dependency is on %p not start atom %p\n", + (void *)katom->x_pre_dep, (void *)rp->start_katom); + return true; + } + + dev_dbg(kbdev->dev, "Ignoring cross-slot dep on atom %p\n", + (void *)katom->x_pre_dep); + + return false; +} + void kbase_js_sched(struct kbase_device *kbdev, int js_mask) { struct kbasep_js_device_data *js_devdata; @@ -2502,6 +3368,9 @@ void kbase_js_sched(struct kbase_device *kbdev, int js_mask) bool ctx_waiting[BASE_JM_MAX_NR_SLOTS]; int js; + dev_dbg(kbdev->dev, "%s kbdev %p mask 0x%x\n", + __func__, (void *)kbdev, (unsigned int)js_mask); + js_devdata = &kbdev->js_data; down(&js_devdata->schedule_sem); @@ -2524,15 +3393,24 @@ void kbase_js_sched(struct kbase_device *kbdev, int js_mask) if (!kctx) { js_mask &= ~(1 << js); - break; /* No contexts on pullable list */ + dev_dbg(kbdev->dev, + "No kctx on pullable list (s:%d)\n", + js); + break; } if (!kbase_ctx_flag(kctx, KCTX_ACTIVE)) { context_idle = true; + dev_dbg(kbdev->dev, + "kctx %p is not active (s:%d)\n", + (void *)kctx, js); + if (kbase_pm_context_active_handle_suspend( kbdev, KBASE_PM_SUSPEND_HANDLER_DONT_INCREASE)) { + dev_dbg(kbdev->dev, + "Suspend pending (s:%d)\n", js); /* Suspend pending - return context to * queue and stop scheduling */ mutex_lock( @@ -2552,7 +3430,11 @@ void kbase_js_sched(struct kbase_device *kbdev, int js_mask) if (!kbase_js_use_ctx(kbdev, kctx, js)) { mutex_lock( &kctx->jctx.sched_info.ctx.jsctx_mutex); - /* Context can not be used at this time */ + + dev_dbg(kbdev->dev, + "kctx %p cannot be used at this time\n", + kctx); + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); if (kbase_js_ctx_pullable(kctx, js, false) || kbase_ctx_flag(kctx, KCTX_PRIVILEGED)) @@ -2582,12 +3464,20 @@ void kbase_js_sched(struct kbase_device *kbdev, int js_mask) kbase_ctx_flag_clear(kctx, KCTX_PULLED); - if (!kbase_jm_kick(kbdev, 1 << js)) - /* No more jobs can be submitted on this slot */ + if (!kbase_jm_kick(kbdev, 1 << js)) { + dev_dbg(kbdev->dev, + "No more jobs can be submitted (s:%d)\n", + js); js_mask &= ~(1 << js); - + } if (!kbase_ctx_flag(kctx, KCTX_PULLED)) { - bool pullable = kbase_js_ctx_pullable(kctx, js, + bool pullable; + + dev_dbg(kbdev->dev, + "No atoms pulled from kctx %p (s:%d)\n", + (void *)kctx, js); + + pullable = kbase_js_ctx_pullable(kctx, js, true); /* Failed to pull jobs - push to head of list. @@ -2644,7 +3534,8 @@ void kbase_js_sched(struct kbase_device *kbdev, int js_mask) break; /* Could not run atoms on this slot */ } - /* Push to back of list */ + dev_dbg(kbdev->dev, "Push kctx %p to back of list\n", + (void *)kctx); if (kbase_js_ctx_pullable(kctx, js, true)) timer_sync |= kbase_js_ctx_list_add_pullable_nolock( @@ -2664,8 +3555,11 @@ void kbase_js_sched(struct kbase_device *kbdev, int js_mask) for (js = 0; js < BASE_JM_MAX_NR_SLOTS; js++) { if (kbdev->hwaccess.active_kctx[js] == last_active[js] && - ctx_waiting[js]) + ctx_waiting[js]) { + dev_dbg(kbdev->dev, "Marking kctx %p as inactive (s:%d)\n", + (void *)last_active[js], js); kbdev->hwaccess.active_kctx[js] = NULL; + } } mutex_unlock(&js_devdata->queue_mutex); @@ -2754,8 +3648,7 @@ void kbase_js_zap_context(struct kbase_context *kctx) * back (this already cancels the jobs) */ - KBASE_TRACE_ADD(kbdev, JM_ZAP_NON_SCHEDULED, kctx, NULL, 0u, - kbase_ctx_flag(kctx, KCTX_SCHEDULED)); + KBASE_KTRACE_ADD_JM(kbdev, JM_ZAP_NON_SCHEDULED, kctx, NULL, 0u, kbase_ctx_flag(kctx, KCTX_SCHEDULED)); dev_dbg(kbdev->dev, "Zap: Ctx %p scheduled=0", kctx); @@ -2775,8 +3668,7 @@ void kbase_js_zap_context(struct kbase_context *kctx) /* Case c: didn't evict, but it is scheduled - it's in the Run * Pool */ - KBASE_TRACE_ADD(kbdev, JM_ZAP_SCHEDULED, kctx, NULL, 0u, - kbase_ctx_flag(kctx, KCTX_SCHEDULED)); + KBASE_KTRACE_ADD_JM(kbdev, JM_ZAP_SCHEDULED, kctx, NULL, 0u, kbase_ctx_flag(kctx, KCTX_SCHEDULED)); dev_dbg(kbdev->dev, "Zap: Ctx %p is in RunPool", kctx); /* Disable the ctx from submitting any more jobs */ @@ -2787,7 +3679,7 @@ void kbase_js_zap_context(struct kbase_context *kctx) /* Retain and (later) release the context whilst it is is now * disallowed from submitting jobs - ensures that someone * somewhere will be removing the context later on */ - was_retained = kbasep_js_runpool_retain_ctx_nolock(kbdev, kctx); + was_retained = kbase_ctx_sched_inc_refcount_nolock(kctx); /* Since it's scheduled and we have the jsctx_mutex, it must be * retained successfully */ @@ -2811,7 +3703,7 @@ void kbase_js_zap_context(struct kbase_context *kctx) kbasep_js_runpool_release_ctx(kbdev, kctx); } - KBASE_TRACE_ADD(kbdev, JM_ZAP_DONE, kctx, NULL, 0u, 0u); + KBASE_KTRACE_ADD_JM(kbdev, JM_ZAP_DONE, kctx, NULL, 0u, 0u); /* After this, you must wait on both the * kbase_jd_context::zero_jobs_wait and the @@ -2857,7 +3749,7 @@ static void kbase_js_foreach_ctx_job(struct kbase_context *kctx, spin_lock_irqsave(&kbdev->hwaccess_lock, flags); - KBASE_TRACE_ADD_REFCOUNT(kbdev, JS_POLICY_FOREACH_CTX_JOBS, kctx, NULL, + KBASE_KTRACE_ADD_JM_REFCOUNT(kbdev, JS_POLICY_FOREACH_CTX_JOBS, kctx, NULL, 0u, trace_get_refcnt(kbdev, kctx)); /* Invoke callback on jobs on each slot in turn */ diff --git a/drivers/gpu/arm/bifrost/mali_kbase_js.h b/drivers/gpu/arm/bifrost/mali_kbase_js.h index 355da27edc1b..541acd4afed7 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_js.h +++ b/drivers/gpu/arm/bifrost/mali_kbase_js.h @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2011-2018 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2011-2020 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -30,883 +30,11 @@ #ifndef _KBASE_JS_H_ #define _KBASE_JS_H_ -#include "mali_kbase_js_defs.h" -#include "mali_kbase_context.h" +#include "context/mali_kbase_context.h" #include "mali_kbase_defs.h" #include "mali_kbase_debug.h" +#include +#include "jm/mali_kbase_jm_js.h" +#include "jm/mali_kbase_js_defs.h" -#include "mali_kbase_js_ctx_attr.h" - -/** - * @addtogroup base_api - * @{ - */ - -/** - * @addtogroup base_kbase_api - * @{ - */ - -/** - * @addtogroup kbase_js Job Scheduler Internal APIs - * @{ - * - * These APIs are Internal to KBase. - */ - -/** - * @brief Initialize the Job Scheduler - * - * The struct kbasep_js_device_data sub-structure of \a kbdev must be zero - * initialized before passing to the kbasep_js_devdata_init() function. This is - * to give efficient error path code. - */ -int kbasep_js_devdata_init(struct kbase_device * const kbdev); - -/** - * @brief Halt the Job Scheduler. - * - * It is safe to call this on \a kbdev even if it the kbasep_js_device_data - * sub-structure was never initialized/failed initialization, to give efficient - * error-path code. - * - * For this to work, the struct kbasep_js_device_data sub-structure of \a kbdev must - * be zero initialized before passing to the kbasep_js_devdata_init() - * function. This is to give efficient error path code. - * - * It is a Programming Error to call this whilst there are still kbase_context - * structures registered with this scheduler. - * - */ -void kbasep_js_devdata_halt(struct kbase_device *kbdev); - -/** - * @brief Terminate the Job Scheduler - * - * It is safe to call this on \a kbdev even if it the kbasep_js_device_data - * sub-structure was never initialized/failed initialization, to give efficient - * error-path code. - * - * For this to work, the struct kbasep_js_device_data sub-structure of \a kbdev must - * be zero initialized before passing to the kbasep_js_devdata_init() - * function. This is to give efficient error path code. - * - * It is a Programming Error to call this whilst there are still kbase_context - * structures registered with this scheduler. - */ -void kbasep_js_devdata_term(struct kbase_device *kbdev); - -/** - * @brief Initialize the Scheduling Component of a struct kbase_context on the Job Scheduler. - * - * This effectively registers a struct kbase_context with a Job Scheduler. - * - * It does not register any jobs owned by the struct kbase_context with the scheduler. - * Those must be separately registered by kbasep_js_add_job(). - * - * The struct kbase_context must be zero intitialized before passing to the - * kbase_js_init() function. This is to give efficient error path code. - */ -int kbasep_js_kctx_init(struct kbase_context * const kctx); - -/** - * @brief Terminate the Scheduling Component of a struct kbase_context on the Job Scheduler - * - * This effectively de-registers a struct kbase_context from its Job Scheduler - * - * It is safe to call this on a struct kbase_context that has never had or failed - * initialization of its jctx.sched_info member, to give efficient error-path - * code. - * - * For this to work, the struct kbase_context must be zero intitialized before passing - * to the kbase_js_init() function. - * - * It is a Programming Error to call this whilst there are still jobs - * registered with this context. - */ -void kbasep_js_kctx_term(struct kbase_context *kctx); - -/** - * @brief Add a job chain to the Job Scheduler, and take necessary actions to - * schedule the context/run the job. - * - * This atomically does the following: - * - Update the numbers of jobs information - * - Add the job to the run pool if necessary (part of init_job) - * - * Once this is done, then an appropriate action is taken: - * - If the ctx is scheduled, it attempts to start the next job (which might be - * this added job) - * - Otherwise, and if this is the first job on the context, it enqueues it on - * the Policy Queue - * - * The Policy's Queue can be updated by this in the following ways: - * - In the above case that this is the first job on the context - * - If the context is high priority and the context is not scheduled, then it - * could cause the Policy to schedule out a low-priority context, allowing - * this context to be scheduled in. - * - * If the context is already scheduled on the RunPool, then adding a job to it - * is guarenteed not to update the Policy Queue. And so, the caller is - * guarenteed to not need to try scheduling a context from the Run Pool - it - * can safely assert that the result is false. - * - * It is a programming error to have more than U32_MAX jobs in flight at a time. - * - * The following locking conditions are made on the caller: - * - it must \em not hold kbasep_js_kctx_info::ctx::jsctx_mutex. - * - it must \em not hold hwaccess_lock (as this will be obtained internally) - * - it must \em not hold kbasep_js_device_data::runpool_mutex (as this will be - * obtained internally) - * - it must \em not hold kbasep_jd_device_data::queue_mutex (again, it's used internally). - * - * @return true indicates that the Policy Queue was updated, and so the - * caller will need to try scheduling a context onto the Run Pool. - * @return false indicates that no updates were made to the Policy Queue, - * so no further action is required from the caller. This is \b always returned - * when the context is currently scheduled. - */ -bool kbasep_js_add_job(struct kbase_context *kctx, struct kbase_jd_atom *atom); - -/** - * @brief Remove a job chain from the Job Scheduler, except for its 'retained state'. - * - * Completely removing a job requires several calls: - * - kbasep_js_copy_atom_retained_state(), to capture the 'retained state' of - * the atom - * - kbasep_js_remove_job(), to partially remove the atom from the Job Scheduler - * - kbasep_js_runpool_release_ctx_and_katom_retained_state(), to release the - * remaining state held as part of the job having been run. - * - * In the common case of atoms completing normally, this set of actions is more optimal for spinlock purposes than having kbasep_js_remove_job() handle all of the actions. - * - * In the case of cancelling atoms, it is easier to call kbasep_js_remove_cancelled_job(), which handles all the necessary actions. - * - * It is a programming error to call this when: - * - \a atom is not a job belonging to kctx. - * - \a atom has already been removed from the Job Scheduler. - * - \a atom is still in the runpool - * - * Do not use this for removing jobs being killed by kbase_jd_cancel() - use - * kbasep_js_remove_cancelled_job() instead. - * - * The following locking conditions are made on the caller: - * - it must hold kbasep_js_kctx_info::ctx::jsctx_mutex. - * - */ -void kbasep_js_remove_job(struct kbase_device *kbdev, struct kbase_context *kctx, struct kbase_jd_atom *atom); - -/** - * @brief Completely remove a job chain from the Job Scheduler, in the case - * where the job chain was cancelled. - * - * This is a variant of kbasep_js_remove_job() that takes care of removing all - * of the retained state too. This is generally useful for cancelled atoms, - * which need not be handled in an optimal way. - * - * It is a programming error to call this when: - * - \a atom is not a job belonging to kctx. - * - \a atom has already been removed from the Job Scheduler. - * - \a atom is still in the runpool: - * - it is not being killed with kbasep_jd_cancel() - * - * The following locking conditions are made on the caller: - * - it must hold kbasep_js_kctx_info::ctx::jsctx_mutex. - * - it must \em not hold the hwaccess_lock, (as this will be obtained - * internally) - * - it must \em not hold kbasep_js_device_data::runpool_mutex (as this could be - * obtained internally) - * - * @return true indicates that ctx attributes have changed and the caller - * should call kbase_js_sched_all() to try to run more jobs - * @return false otherwise - */ -bool kbasep_js_remove_cancelled_job(struct kbase_device *kbdev, - struct kbase_context *kctx, - struct kbase_jd_atom *katom); - -/** - * @brief Refcount a context as being busy, preventing it from being scheduled - * out. - * - * @note This function can safely be called from IRQ context. - * - * The following locking conditions are made on the caller: - * - it must \em not hold mmu_hw_mutex and hwaccess_lock, because they will be - * used internally. - * - * @return value != false if the retain succeeded, and the context will not be scheduled out. - * @return false if the retain failed (because the context is being/has been scheduled out). - */ -bool kbasep_js_runpool_retain_ctx(struct kbase_device *kbdev, struct kbase_context *kctx); - -/** - * @brief Refcount a context as being busy, preventing it from being scheduled - * out. - * - * @note This function can safely be called from IRQ context. - * - * The following locks must be held by the caller: - * - mmu_hw_mutex, hwaccess_lock - * - * @return value != false if the retain succeeded, and the context will not be scheduled out. - * @return false if the retain failed (because the context is being/has been scheduled out). - */ -bool kbasep_js_runpool_retain_ctx_nolock(struct kbase_device *kbdev, struct kbase_context *kctx); - -/** - * @brief Lookup a context in the Run Pool based upon its current address space - * and ensure that is stays scheduled in. - * - * The context is refcounted as being busy to prevent it from scheduling - * out. It must be released with kbasep_js_runpool_release_ctx() when it is no - * longer required to stay scheduled in. - * - * @note This function can safely be called from IRQ context. - * - * The following locking conditions are made on the caller: - * - it must \em not hold the hwaccess_lock, because it will be used internally. - * If the hwaccess_lock is already held, then the caller should use - * kbasep_js_runpool_lookup_ctx_nolock() instead. - * - * @return a valid struct kbase_context on success, which has been refcounted as being busy. - * @return NULL on failure, indicating that no context was found in \a as_nr - */ -struct kbase_context *kbasep_js_runpool_lookup_ctx(struct kbase_device *kbdev, int as_nr); - -/** - * @brief Handling the requeuing/killing of a context that was evicted from the - * policy queue or runpool. - * - * This should be used whenever handing off a context that has been evicted - * from the policy queue or the runpool: - * - If the context is not dying and has jobs, it gets re-added to the policy - * queue - * - Otherwise, it is not added - * - * In addition, if the context is dying the jobs are killed asynchronously. - * - * In all cases, the Power Manager active reference is released - * (kbase_pm_context_idle()) whenever the has_pm_ref parameter is true. \a - * has_pm_ref must be set to false whenever the context was not previously in - * the runpool and does not hold a Power Manager active refcount. Note that - * contexts in a rollback of kbasep_js_try_schedule_head_ctx() might have an - * active refcount even though they weren't in the runpool. - * - * The following locking conditions are made on the caller: - * - it must hold kbasep_js_kctx_info::ctx::jsctx_mutex. - * - it must \em not hold kbasep_jd_device_data::queue_mutex (as this will be - * obtained internally) - */ -void kbasep_js_runpool_requeue_or_kill_ctx(struct kbase_device *kbdev, struct kbase_context *kctx, bool has_pm_ref); - -/** - * @brief Release a refcount of a context being busy, allowing it to be - * scheduled out. - * - * When the refcount reaches zero and the context \em might be scheduled out - * (depending on whether the Scheudling Policy has deemed it so, or if it has run - * out of jobs). - * - * If the context does get scheduled out, then The following actions will be - * taken as part of deschduling a context: - * - For the context being descheduled: - * - If the context is in the processing of dying (all the jobs are being - * removed from it), then descheduling also kills off any jobs remaining in the - * context. - * - If the context is not dying, and any jobs remain after descheduling the - * context then it is re-enqueued to the Policy's Queue. - * - Otherwise, the context is still known to the scheduler, but remains absent - * from the Policy Queue until a job is next added to it. - * - In all descheduling cases, the Power Manager active reference (obtained - * during kbasep_js_try_schedule_head_ctx()) is released (kbase_pm_context_idle()). - * - * Whilst the context is being descheduled, this also handles actions that - * cause more atoms to be run: - * - Attempt submitting atoms when the Context Attributes on the Runpool have - * changed. This is because the context being scheduled out could mean that - * there are more opportunities to run atoms. - * - Attempt submitting to a slot that was previously blocked due to affinity - * restrictions. This is usually only necessary when releasing a context - * happens as part of completing a previous job, but is harmless nonetheless. - * - Attempt scheduling in a new context (if one is available), and if necessary, - * running a job from that new context. - * - * Unlike retaining a context in the runpool, this function \b cannot be called - * from IRQ context. - * - * It is a programming error to call this on a \a kctx that is not currently - * scheduled, or that already has a zero refcount. - * - * The following locking conditions are made on the caller: - * - it must \em not hold the hwaccess_lock, because it will be used internally. - * - it must \em not hold kbasep_js_kctx_info::ctx::jsctx_mutex. - * - it must \em not hold kbasep_js_device_data::runpool_mutex (as this will be - * obtained internally) - * - it must \em not hold the kbase_device::mmu_hw_mutex (as this will be - * obtained internally) - * - it must \em not hold kbasep_jd_device_data::queue_mutex (as this will be - * obtained internally) - * - */ -void kbasep_js_runpool_release_ctx(struct kbase_device *kbdev, struct kbase_context *kctx); - -/** - * @brief Variant of kbasep_js_runpool_release_ctx() that handles additional - * actions from completing an atom. - * - * This is usually called as part of completing an atom and releasing the - * refcount on the context held by the atom. - * - * Therefore, the extra actions carried out are part of handling actions queued - * on a completed atom, namely: - * - Releasing the atom's context attributes - * - Retrying the submission on a particular slot, because we couldn't submit - * on that slot from an IRQ handler. - * - * The locking conditions of this function are the same as those for - * kbasep_js_runpool_release_ctx() - */ -void kbasep_js_runpool_release_ctx_and_katom_retained_state(struct kbase_device *kbdev, struct kbase_context *kctx, struct kbasep_js_atom_retained_state *katom_retained_state); - -/** - * @brief Variant of kbase_js_runpool_release_ctx() that assumes that - * kbasep_js_device_data::runpool_mutex and - * kbasep_js_kctx_info::ctx::jsctx_mutex are held by the caller, and does not - * attempt to schedule new contexts. - */ -void kbasep_js_runpool_release_ctx_nolock(struct kbase_device *kbdev, - struct kbase_context *kctx); - -/** - * @brief Schedule in a privileged context - * - * This schedules a context in regardless of the context priority. - * If the runpool is full, a context will be forced out of the runpool and the function will wait - * for the new context to be scheduled in. - * The context will be kept scheduled in (and the corresponding address space reserved) until - * kbasep_js_release_privileged_ctx is called). - * - * The following locking conditions are made on the caller: - * - it must \em not hold the hwaccess_lock, because it will be used internally. - * - it must \em not hold kbasep_js_device_data::runpool_mutex (as this will be - * obtained internally) - * - it must \em not hold the kbase_device::mmu_hw_mutex (as this will be - * obtained internally) - * - it must \em not hold kbasep_jd_device_data::queue_mutex (again, it's used internally). - * - it must \em not hold kbasep_js_kctx_info::ctx::jsctx_mutex, because it will - * be used internally. - * - */ -void kbasep_js_schedule_privileged_ctx(struct kbase_device *kbdev, struct kbase_context *kctx); - -/** - * @brief Release a privileged context, allowing it to be scheduled out. - * - * See kbasep_js_runpool_release_ctx for potential side effects. - * - * The following locking conditions are made on the caller: - * - it must \em not hold the hwaccess_lock, because it will be used internally. - * - it must \em not hold kbasep_js_kctx_info::ctx::jsctx_mutex. - * - it must \em not hold kbasep_js_device_data::runpool_mutex (as this will be - * obtained internally) - * - it must \em not hold the kbase_device::mmu_hw_mutex (as this will be - * obtained internally) - * - */ -void kbasep_js_release_privileged_ctx(struct kbase_device *kbdev, struct kbase_context *kctx); - -/** - * @brief Try to submit the next job on each slot - * - * The following locks may be used: - * - kbasep_js_device_data::runpool_mutex - * - hwaccess_lock - */ -void kbase_js_try_run_jobs(struct kbase_device *kbdev); - -/** - * @brief Suspend the job scheduler during a Power Management Suspend event. - * - * Causes all contexts to be removed from the runpool, and prevents any - * contexts from (re)entering the runpool. - * - * This does not handle suspending the one privileged context: the caller must - * instead do this by by suspending the GPU HW Counter Instrumentation. - * - * This will eventually cause all Power Management active references held by - * contexts on the runpool to be released, without running any more atoms. - * - * The caller must then wait for all Power Mangement active refcount to become - * zero before completing the suspend. - * - * The emptying mechanism may take some time to complete, since it can wait for - * jobs to complete naturally instead of forcing them to end quickly. However, - * this is bounded by the Job Scheduler's Job Timeouts. Hence, this - * function is guaranteed to complete in a finite time. - */ -void kbasep_js_suspend(struct kbase_device *kbdev); - -/** - * @brief Resume the Job Scheduler after a Power Management Resume event. - * - * This restores the actions from kbasep_js_suspend(): - * - Schedules contexts back into the runpool - * - Resumes running atoms on the GPU - */ -void kbasep_js_resume(struct kbase_device *kbdev); - -/** - * @brief Submit an atom to the job scheduler. - * - * The atom is enqueued on the context's ringbuffer. The caller must have - * ensured that all dependencies can be represented in the ringbuffer. - * - * Caller must hold jctx->lock - * - * @param[in] kctx Context pointer - * @param[in] atom Pointer to the atom to submit - * - * @return Whether the context requires to be enqueued. */ -bool kbase_js_dep_resolved_submit(struct kbase_context *kctx, - struct kbase_jd_atom *katom); - -/** - * jsctx_ll_flush_to_rb() - Pushes atoms from the linked list to ringbuffer. - * @kctx: Context Pointer - * @prio: Priority (specifies the queue together with js). - * @js: Job slot (specifies the queue together with prio). - * - * Pushes all possible atoms from the linked list to the ringbuffer. - * Number of atoms are limited to free space in the ringbuffer and - * number of available atoms in the linked list. - * - */ -void jsctx_ll_flush_to_rb(struct kbase_context *kctx, int prio, int js); -/** - * @brief Pull an atom from a context in the job scheduler for execution. - * - * The atom will not be removed from the ringbuffer at this stage. - * - * The HW access lock must be held when calling this function. - * - * @param[in] kctx Context to pull from - * @param[in] js Job slot to pull from - * @return Pointer to an atom, or NULL if there are no atoms for this - * slot that can be currently run. - */ -struct kbase_jd_atom *kbase_js_pull(struct kbase_context *kctx, int js); - -/** - * @brief Return an atom to the job scheduler ringbuffer. - * - * An atom is 'unpulled' if execution is stopped but intended to be returned to - * later. The most common reason for this is that the atom has been - * soft-stopped. - * - * Note that if multiple atoms are to be 'unpulled', they must be returned in - * the reverse order to which they were originally pulled. It is a programming - * error to return atoms in any other order. - * - * The HW access lock must be held when calling this function. - * - * @param[in] kctx Context pointer - * @param[in] atom Pointer to the atom to unpull - */ -void kbase_js_unpull(struct kbase_context *kctx, struct kbase_jd_atom *katom); - -/** - * @brief Complete an atom from jd_done_worker(), removing it from the job - * scheduler ringbuffer. - * - * If the atom failed then all dependee atoms marked for failure propagation - * will also fail. - * - * @param[in] kctx Context pointer - * @param[in] katom Pointer to the atom to complete - * @return true if the context is now idle (no jobs pulled) - * false otherwise - */ -bool kbase_js_complete_atom_wq(struct kbase_context *kctx, - struct kbase_jd_atom *katom); - -/** - * @brief Complete an atom. - * - * Most of the work required to complete an atom will be performed by - * jd_done_worker(). - * - * The HW access lock must be held when calling this function. - * - * @param[in] katom Pointer to the atom to complete - * @param[in] end_timestamp The time that the atom completed (may be NULL) - * - * Return: Atom that has now been unblocked and can now be run, or NULL if none - */ -struct kbase_jd_atom *kbase_js_complete_atom(struct kbase_jd_atom *katom, - ktime_t *end_timestamp); - -/** - * @brief Submit atoms from all available contexts. - * - * This will attempt to submit as many jobs as possible to the provided job - * slots. It will exit when either all job slots are full, or all contexts have - * been used. - * - * @param[in] kbdev Device pointer - * @param[in] js_mask Mask of job slots to submit to - */ -void kbase_js_sched(struct kbase_device *kbdev, int js_mask); - -/** - * kbase_jd_zap_context - Attempt to deschedule a context that is being - * destroyed - * @kctx: Context pointer - * - * This will attempt to remove a context from any internal job scheduler queues - * and perform any other actions to ensure a context will not be submitted - * from. - * - * If the context is currently scheduled, then the caller must wait for all - * pending jobs to complete before taking any further action. - */ -void kbase_js_zap_context(struct kbase_context *kctx); - -/** - * @brief Validate an atom - * - * This will determine whether the atom can be scheduled onto the GPU. Atoms - * with invalid combinations of core requirements will be rejected. - * - * @param[in] kbdev Device pointer - * @param[in] katom Atom to validate - * @return true if atom is valid - * false otherwise - */ -bool kbase_js_is_atom_valid(struct kbase_device *kbdev, - struct kbase_jd_atom *katom); - -/** - * kbase_js_set_timeouts - update all JS timeouts with user specified data - * @kbdev: Device pointer - * - * Timeouts are specified through the 'js_timeouts' sysfs file. If a timeout is - * set to a positive number then that becomes the new value used, if a timeout - * is negative then the default is set. - */ -void kbase_js_set_timeouts(struct kbase_device *kbdev); - -/** - * kbase_js_set_ctx_priority - set the context priority - * @kctx: Context pointer - * @new_priority: New priority value for the Context - * - * The context priority is set to a new value and it is moved to the - * pullable/unpullable list as per the new priority. - */ -void kbase_js_set_ctx_priority(struct kbase_context *kctx, int new_priority); - - -/** - * kbase_js_update_ctx_priority - update the context priority - * @kctx: Context pointer - * - * The context priority gets updated as per the priority of atoms currently in - * use for that context, but only if system priority mode for context scheduling - * is being used. - */ -void kbase_js_update_ctx_priority(struct kbase_context *kctx); - -/* - * Helpers follow - */ - -/** - * @brief Check that a context is allowed to submit jobs on this policy - * - * The purpose of this abstraction is to hide the underlying data size, and wrap up - * the long repeated line of code. - * - * As with any bool, never test the return value with true. - * - * The caller must hold hwaccess_lock. - */ -static inline bool kbasep_js_is_submit_allowed(struct kbasep_js_device_data *js_devdata, struct kbase_context *kctx) -{ - u16 test_bit; - - /* Ensure context really is scheduled in */ - KBASE_DEBUG_ASSERT(kctx->as_nr != KBASEP_AS_NR_INVALID); - KBASE_DEBUG_ASSERT(kbase_ctx_flag(kctx, KCTX_SCHEDULED)); - - test_bit = (u16) (1u << kctx->as_nr); - - return (bool) (js_devdata->runpool_irq.submit_allowed & test_bit); -} - -/** - * @brief Allow a context to submit jobs on this policy - * - * The purpose of this abstraction is to hide the underlying data size, and wrap up - * the long repeated line of code. - * - * The caller must hold hwaccess_lock. - */ -static inline void kbasep_js_set_submit_allowed(struct kbasep_js_device_data *js_devdata, struct kbase_context *kctx) -{ - u16 set_bit; - - /* Ensure context really is scheduled in */ - KBASE_DEBUG_ASSERT(kctx->as_nr != KBASEP_AS_NR_INVALID); - KBASE_DEBUG_ASSERT(kbase_ctx_flag(kctx, KCTX_SCHEDULED)); - - set_bit = (u16) (1u << kctx->as_nr); - - dev_dbg(kctx->kbdev->dev, "JS: Setting Submit Allowed on %p (as=%d)", - kctx, kctx->as_nr); - - js_devdata->runpool_irq.submit_allowed |= set_bit; -} - -/** - * @brief Prevent a context from submitting more jobs on this policy - * - * The purpose of this abstraction is to hide the underlying data size, and wrap up - * the long repeated line of code. - * - * The caller must hold hwaccess_lock. - */ -static inline void kbasep_js_clear_submit_allowed(struct kbasep_js_device_data *js_devdata, struct kbase_context *kctx) -{ - u16 clear_bit; - u16 clear_mask; - - /* Ensure context really is scheduled in */ - KBASE_DEBUG_ASSERT(kctx->as_nr != KBASEP_AS_NR_INVALID); - KBASE_DEBUG_ASSERT(kbase_ctx_flag(kctx, KCTX_SCHEDULED)); - - clear_bit = (u16) (1u << kctx->as_nr); - clear_mask = ~clear_bit; - - dev_dbg(kctx->kbdev->dev, "JS: Clearing Submit Allowed on %p (as=%d)", - kctx, kctx->as_nr); - - js_devdata->runpool_irq.submit_allowed &= clear_mask; -} - -/** - * Create an initial 'invalid' atom retained state, that requires no - * atom-related work to be done on releasing with - * kbasep_js_runpool_release_ctx_and_katom_retained_state() - */ -static inline void kbasep_js_atom_retained_state_init_invalid(struct kbasep_js_atom_retained_state *retained_state) -{ - retained_state->event_code = BASE_JD_EVENT_NOT_STARTED; - retained_state->core_req = KBASEP_JS_ATOM_RETAINED_STATE_CORE_REQ_INVALID; -} - -/** - * Copy atom state that can be made available after jd_done_nolock() is called - * on that atom. - */ -static inline void kbasep_js_atom_retained_state_copy(struct kbasep_js_atom_retained_state *retained_state, const struct kbase_jd_atom *katom) -{ - retained_state->event_code = katom->event_code; - retained_state->core_req = katom->core_req; - retained_state->sched_priority = katom->sched_priority; - retained_state->device_nr = katom->device_nr; -} - -/** - * @brief Determine whether an atom has finished (given its retained state), - * and so should be given back to userspace/removed from the system. - * - * Reasons for an atom not finishing include: - * - Being soft-stopped (and so, the atom should be resubmitted sometime later) - * - * @param[in] katom_retained_state the retained state of the atom to check - * @return false if the atom has not finished - * @return !=false if the atom has finished - */ -static inline bool kbasep_js_has_atom_finished(const struct kbasep_js_atom_retained_state *katom_retained_state) -{ - return (bool) (katom_retained_state->event_code != BASE_JD_EVENT_STOPPED && katom_retained_state->event_code != BASE_JD_EVENT_REMOVED_FROM_NEXT); -} - -/** - * @brief Determine whether a struct kbasep_js_atom_retained_state is valid - * - * An invalid struct kbasep_js_atom_retained_state is allowed, and indicates that the - * code should just ignore it. - * - * @param[in] katom_retained_state the atom's retained state to check - * @return false if the retained state is invalid, and can be ignored - * @return !=false if the retained state is valid - */ -static inline bool kbasep_js_atom_retained_state_is_valid(const struct kbasep_js_atom_retained_state *katom_retained_state) -{ - return (bool) (katom_retained_state->core_req != KBASEP_JS_ATOM_RETAINED_STATE_CORE_REQ_INVALID); -} - -/** - * @brief Variant of kbasep_js_runpool_lookup_ctx() that can be used when the - * context is guaranteed to be already previously retained. - * - * It is a programming error to supply the \a as_nr of a context that has not - * been previously retained/has a busy refcount of zero. The only exception is - * when there is no ctx in \a as_nr (NULL returned). - * - * The following locking conditions are made on the caller: - * - it must \em not hold the hwaccess_lock, because it will be used internally. - * - * @return a valid struct kbase_context on success, with a refcount that is guaranteed - * to be non-zero and unmodified by this function. - * @return NULL on failure, indicating that no context was found in \a as_nr - */ -static inline struct kbase_context *kbasep_js_runpool_lookup_ctx_noretain(struct kbase_device *kbdev, int as_nr) -{ - struct kbase_context *found_kctx; - - KBASE_DEBUG_ASSERT(kbdev != NULL); - KBASE_DEBUG_ASSERT(0 <= as_nr && as_nr < BASE_MAX_NR_AS); - - found_kctx = kbdev->as_to_kctx[as_nr]; - KBASE_DEBUG_ASSERT(found_kctx == NULL || - atomic_read(&found_kctx->refcount) > 0); - - return found_kctx; -} - -/* - * The following locking conditions are made on the caller: - * - The caller must hold the kbasep_js_kctx_info::ctx::jsctx_mutex. - * - The caller must hold the kbasep_js_device_data::runpool_mutex - */ -static inline void kbase_js_runpool_inc_context_count( - struct kbase_device *kbdev, - struct kbase_context *kctx) -{ - struct kbasep_js_device_data *js_devdata; - struct kbasep_js_kctx_info *js_kctx_info; - - KBASE_DEBUG_ASSERT(kbdev != NULL); - KBASE_DEBUG_ASSERT(kctx != NULL); - - js_devdata = &kbdev->js_data; - js_kctx_info = &kctx->jctx.sched_info; - - lockdep_assert_held(&js_kctx_info->ctx.jsctx_mutex); - lockdep_assert_held(&js_devdata->runpool_mutex); - - /* Track total contexts */ - KBASE_DEBUG_ASSERT(js_devdata->nr_all_contexts_running < S8_MAX); - ++(js_devdata->nr_all_contexts_running); - - if (!kbase_ctx_flag(kctx, KCTX_SUBMIT_DISABLED)) { - /* Track contexts that can submit jobs */ - KBASE_DEBUG_ASSERT(js_devdata->nr_user_contexts_running < - S8_MAX); - ++(js_devdata->nr_user_contexts_running); - } -} - -/* - * The following locking conditions are made on the caller: - * - The caller must hold the kbasep_js_kctx_info::ctx::jsctx_mutex. - * - The caller must hold the kbasep_js_device_data::runpool_mutex - */ -static inline void kbase_js_runpool_dec_context_count( - struct kbase_device *kbdev, - struct kbase_context *kctx) -{ - struct kbasep_js_device_data *js_devdata; - struct kbasep_js_kctx_info *js_kctx_info; - - KBASE_DEBUG_ASSERT(kbdev != NULL); - KBASE_DEBUG_ASSERT(kctx != NULL); - - js_devdata = &kbdev->js_data; - js_kctx_info = &kctx->jctx.sched_info; - - lockdep_assert_held(&js_kctx_info->ctx.jsctx_mutex); - lockdep_assert_held(&js_devdata->runpool_mutex); - - /* Track total contexts */ - --(js_devdata->nr_all_contexts_running); - KBASE_DEBUG_ASSERT(js_devdata->nr_all_contexts_running >= 0); - - if (!kbase_ctx_flag(kctx, KCTX_SUBMIT_DISABLED)) { - /* Track contexts that can submit jobs */ - --(js_devdata->nr_user_contexts_running); - KBASE_DEBUG_ASSERT(js_devdata->nr_user_contexts_running >= 0); - } -} - - -/** - * @brief Submit atoms from all available contexts to all job slots. - * - * This will attempt to submit as many jobs as possible. It will exit when - * either all job slots are full, or all contexts have been used. - * - * @param[in] kbdev Device pointer - */ -static inline void kbase_js_sched_all(struct kbase_device *kbdev) -{ - kbase_js_sched(kbdev, (1 << kbdev->gpu_props.num_job_slots) - 1); -} - -extern const int -kbasep_js_atom_priority_to_relative[BASE_JD_NR_PRIO_LEVELS]; - -extern const base_jd_prio -kbasep_js_relative_priority_to_atom[KBASE_JS_ATOM_SCHED_PRIO_COUNT]; - -/** - * kbasep_js_atom_prio_to_sched_prio(): - Convert atom priority (base_jd_prio) - * to relative ordering - * @atom_prio: Priority ID to translate. - * - * Atom priority values for @ref base_jd_prio cannot be compared directly to - * find out which are higher or lower. - * - * This function will convert base_jd_prio values for successively lower - * priorities into a monotonically increasing sequence. That is, the lower the - * base_jd_prio priority, the higher the value produced by this function. This - * is in accordance with how the rest of the kernel treates priority. - * - * The mapping is 1:1 and the size of the valid input range is the same as the - * size of the valid output range, i.e. - * KBASE_JS_ATOM_SCHED_PRIO_COUNT == BASE_JD_NR_PRIO_LEVELS - * - * Note This must be kept in sync with BASE_JD_PRIO_<...> definitions - * - * Return: On success: a value in the inclusive range - * 0..KBASE_JS_ATOM_SCHED_PRIO_COUNT-1. On failure: - * KBASE_JS_ATOM_SCHED_PRIO_INVALID - */ -static inline int kbasep_js_atom_prio_to_sched_prio(base_jd_prio atom_prio) -{ - if (atom_prio >= BASE_JD_NR_PRIO_LEVELS) - return KBASE_JS_ATOM_SCHED_PRIO_INVALID; - - return kbasep_js_atom_priority_to_relative[atom_prio]; -} - -static inline base_jd_prio kbasep_js_sched_prio_to_atom_prio(int sched_prio) -{ - unsigned int prio_idx; - - KBASE_DEBUG_ASSERT(0 <= sched_prio - && sched_prio < KBASE_JS_ATOM_SCHED_PRIO_COUNT); - - prio_idx = (unsigned int)sched_prio; - - return kbasep_js_relative_priority_to_atom[prio_idx]; -} - - /** @} *//* end group kbase_js */ - /** @} *//* end group base_kbase_api */ - /** @} *//* end group base_api */ - -#endif /* _KBASE_JS_H_ */ +#endif /* _KBASE_JS_H_ */ diff --git a/drivers/gpu/arm/bifrost/mali_kbase_js_ctx_attr.c b/drivers/gpu/arm/bifrost/mali_kbase_js_ctx_attr.c index 1ff230cc222d..141d04a385cb 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_js_ctx_attr.c +++ b/drivers/gpu/arm/bifrost/mali_kbase_js_ctx_attr.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2012-2016, 2018 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2012-2016, 2018, 2020 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -67,7 +67,7 @@ static bool kbasep_js_ctx_attr_runpool_retain_attr(struct kbase_device *kbdev, s if (js_devdata->runpool_irq.ctx_attr_ref_count[attribute] == 1) { /* First refcount indicates a state change */ runpool_state_changed = true; - KBASE_TRACE_ADD(kbdev, JS_CTX_ATTR_NOW_ON_RUNPOOL, kctx, NULL, 0u, attribute); + KBASE_KTRACE_ADD_JM(kbdev, JS_CTX_ATTR_NOW_ON_RUNPOOL, kctx, NULL, 0u, attribute); } } @@ -112,7 +112,7 @@ static bool kbasep_js_ctx_attr_runpool_release_attr(struct kbase_device *kbdev, if (js_devdata->runpool_irq.ctx_attr_ref_count[attribute] == 0) { /* Last de-refcount indicates a state change */ runpool_state_changed = true; - KBASE_TRACE_ADD(kbdev, JS_CTX_ATTR_NOW_OFF_RUNPOOL, kctx, NULL, 0u, attribute); + KBASE_KTRACE_ADD_JM(kbdev, JS_CTX_ATTR_NOW_OFF_RUNPOOL, kctx, NULL, 0u, attribute); } } @@ -149,7 +149,7 @@ static bool kbasep_js_ctx_attr_ctx_retain_attr(struct kbase_device *kbdev, struc if (kbase_ctx_flag(kctx, KCTX_SCHEDULED) && js_kctx_info->ctx.ctx_attr_ref_count[attribute] == 1) { /* Only ref-count the attribute on the runpool for the first time this contexts sees this attribute */ - KBASE_TRACE_ADD(kbdev, JS_CTX_ATTR_NOW_ON_CTX, kctx, NULL, 0u, attribute); + KBASE_KTRACE_ADD_JM(kbdev, JS_CTX_ATTR_NOW_ON_CTX, kctx, NULL, 0u, attribute); runpool_state_changed = kbasep_js_ctx_attr_runpool_retain_attr(kbdev, kctx, attribute); } @@ -185,7 +185,7 @@ static bool kbasep_js_ctx_attr_ctx_release_attr(struct kbase_device *kbdev, stru lockdep_assert_held(&kbdev->hwaccess_lock); /* Only de-ref-count the attribute on the runpool when this is the last ctx-reference to it */ runpool_state_changed = kbasep_js_ctx_attr_runpool_release_attr(kbdev, kctx, attribute); - KBASE_TRACE_ADD(kbdev, JS_CTX_ATTR_NOW_OFF_CTX, kctx, NULL, 0u, attribute); + KBASE_KTRACE_ADD_JM(kbdev, JS_CTX_ATTR_NOW_OFF_CTX, kctx, NULL, 0u, attribute); } /* De-ref must happen afterwards, because kbasep_js_ctx_attr_runpool_release() needs to check it too */ diff --git a/drivers/gpu/arm/bifrost/mali_kbase_mem.c b/drivers/gpu/arm/bifrost/mali_kbase_mem.c index 45899833e23f..4a1004b6a45c 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_mem.c +++ b/drivers/gpu/arm/bifrost/mali_kbase_mem.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2010-2019 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010-2020 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -20,10 +20,7 @@ * */ - - /** - * @file mali_kbase_mem.c * Base kernel memory APIs */ #include @@ -38,12 +35,36 @@ #include #include -#include +#include #include #include -#include +#include #include #include +#include +#include + +/* + * Alignment of objects allocated by the GPU inside a just-in-time memory + * region whose size is given by an end address + * + * This is the alignment of objects allocated by the GPU, but possibly not + * fully written to. When taken into account with + * KBASE_GPU_ALLOCATED_OBJECT_MAX_BYTES it gives the maximum number of bytes + * that the JIT memory report size can exceed the actual backed memory size. + */ +#define KBASE_GPU_ALLOCATED_OBJECT_ALIGN_BYTES (128u) + +/* + * Maximum size of objects allocated by the GPU inside a just-in-time memory + * region whose size is given by an end address + * + * This is the maximum size of objects allocated by the GPU, but possibly not + * fully written to. When taken into account with + * KBASE_GPU_ALLOCATED_OBJECT_ALIGN_BYTES it gives the maximum number of bytes + * that the JIT memory report size can exceed the actual backed memory size. + */ +#define KBASE_GPU_ALLOCATED_OBJECT_MAX_BYTES (512u) /* Forward declarations */ @@ -673,9 +694,11 @@ static void kbase_region_tracker_erase_rbtree(struct rb_root *rbtree) void kbase_region_tracker_term(struct kbase_context *kctx) { + kbase_gpu_vm_lock(kctx); kbase_region_tracker_erase_rbtree(&kctx->reg_rbtree_same); kbase_region_tracker_erase_rbtree(&kctx->reg_rbtree_custom); kbase_region_tracker_erase_rbtree(&kctx->reg_rbtree_exec); + kbase_gpu_vm_unlock(kctx); } void kbase_region_tracker_term_rbtree(struct rb_root *rbtree) @@ -813,15 +836,22 @@ static int kbase_region_tracker_init_jit_64(struct kbase_context *kctx, #endif int kbase_region_tracker_init_jit(struct kbase_context *kctx, u64 jit_va_pages, - u8 max_allocations, u8 trim_level, int group_id) + int max_allocations, int trim_level, int group_id, + u64 phys_pages_limit) { int err = 0; - if (trim_level > 100) + if (trim_level < 0 || trim_level > BASE_JIT_MAX_TRIM_LEVEL) return -EINVAL; - if (WARN_ON(group_id >= MEMORY_GROUP_MANAGER_NR_GROUPS) || - WARN_ON(group_id < 0)) + if (group_id < 0 || group_id >= MEMORY_GROUP_MANAGER_NR_GROUPS) + return -EINVAL; + +#if MALI_JIT_PRESSURE_LIMIT + if (phys_pages_limit > jit_va_pages) +#else + if (phys_pages_limit != jit_va_pages) +#endif /* MALI_JIT_PRESSURE_LIMIT */ return -EINVAL; kbase_gpu_vm_lock(kctx); @@ -840,6 +870,11 @@ int kbase_region_tracker_init_jit(struct kbase_context *kctx, u64 jit_va_pages, kctx->trim_level = trim_level; kctx->jit_va = true; kctx->jit_group_id = group_id; +#if MALI_JIT_PRESSURE_LIMIT + kctx->jit_phys_pages_limit = phys_pages_limit; + dev_dbg(kctx->kbdev->dev, "phys_pages_limit set to %llu\n", + phys_pages_limit); +#endif /* MALI_JIT_PRESSURE_LIMIT */ } kbase_gpu_vm_unlock(kctx); @@ -941,6 +976,12 @@ int kbase_mem_init(struct kbase_device *kbdev) /* Initialize memory usage */ atomic_set(&memdev->used_pages, 0); +#ifdef IR_THRESHOLD + atomic_set(&memdev->ir_threshold, IR_THRESHOLD); +#else + atomic_set(&memdev->ir_threshold, DEFAULT_IR_THRESHOLD); +#endif + kbdev->mgm_dev = &kbase_native_mgm_dev; #ifdef CONFIG_OF @@ -1015,12 +1056,8 @@ void kbase_mem_term(struct kbase_device *kbdev) if (kbdev->mgm_dev) module_put(kbdev->mgm_dev->owner); } - KBASE_EXPORT_TEST_API(kbase_mem_term); - - - /** * @brief Allocate a free region object. * @@ -1060,6 +1097,7 @@ struct kbase_va_region *kbase_alloc_free_region(struct rb_root *rbtree, new_reg->nr_pages = nr_pages; INIT_LIST_HEAD(&new_reg->jit_node); + INIT_LIST_HEAD(&new_reg->link); return new_reg; } @@ -1114,6 +1152,8 @@ void kbase_free_alloced_region(struct kbase_va_region *reg) if (WARN_ON(kbase_is_region_invalid(reg))) return; + dev_dbg(kctx->kbdev->dev, "Freeing memory region %p\n", + (void *)reg); mutex_lock(&kctx->jit_evict_lock); @@ -1638,6 +1678,8 @@ int kbase_mem_free_region(struct kbase_context *kctx, struct kbase_va_region *re KBASE_DEBUG_ASSERT(NULL != kctx); KBASE_DEBUG_ASSERT(NULL != reg); + dev_dbg(kctx->kbdev->dev, "%s %p in kctx %p\n", + __func__, (void *)reg, (void *)kctx); lockdep_assert_held(&kctx->reg_lock); if (reg->flags & KBASE_REG_NO_USER_FREE) { @@ -1693,6 +1735,8 @@ int kbase_mem_free(struct kbase_context *kctx, u64 gpu_addr) struct kbase_va_region *reg; KBASE_DEBUG_ASSERT(kctx != NULL); + dev_dbg(kctx->kbdev->dev, "%s 0x%llx in kctx %p\n", + __func__, gpu_addr, (void *)kctx); if ((gpu_addr & ~PAGE_MASK) && (gpu_addr >= PAGE_SIZE)) { dev_warn(kctx->kbdev->dev, "kbase_mem_free: gpu_addr parameter is invalid"); @@ -2622,6 +2666,12 @@ bool kbase_check_alloc_flags(unsigned long flags) if ((flags & BASE_MEM_IMPORT_SHARED) == BASE_MEM_IMPORT_SHARED) return false; + /* BASE_MEM_IMPORT_SYNC_ON_MAP_UNMAP is only valid for imported + * memory */ + if ((flags & BASE_MEM_IMPORT_SYNC_ON_MAP_UNMAP) == + BASE_MEM_IMPORT_SYNC_ON_MAP_UNMAP) + return false; + /* Should not combine BASE_MEM_COHERENT_LOCAL with * BASE_MEM_COHERENT_SYSTEM */ if ((flags & (BASE_MEM_COHERENT_LOCAL | BASE_MEM_COHERENT_SYSTEM)) == @@ -2933,8 +2983,75 @@ static int kbase_jit_debugfs_phys_get(struct kbase_jit_debugfs_data *data) KBASE_JIT_DEBUGFS_DECLARE(kbase_jit_debugfs_phys_fops, kbase_jit_debugfs_phys_get); +#if MALI_JIT_PRESSURE_LIMIT +static int kbase_jit_debugfs_used_get(struct kbase_jit_debugfs_data *data) +{ + struct kbase_context *kctx = data->kctx; + struct kbase_va_region *reg; + + mutex_lock(&kctx->jctx.lock); + mutex_lock(&kctx->jit_evict_lock); + list_for_each_entry(reg, &kctx->jit_active_head, jit_node) { + data->active_value += reg->used_pages; + } + mutex_unlock(&kctx->jit_evict_lock); + mutex_unlock(&kctx->jctx.lock); + + return 0; +} + +KBASE_JIT_DEBUGFS_DECLARE(kbase_jit_debugfs_used_fops, + kbase_jit_debugfs_used_get); + +static int kbase_mem_jit_trim_pages_from_region(struct kbase_context *kctx, + struct kbase_va_region *reg, size_t pages_needed, + size_t *freed, bool shrink); + +static int kbase_jit_debugfs_trim_get(struct kbase_jit_debugfs_data *data) +{ + struct kbase_context *kctx = data->kctx; + struct kbase_va_region *reg; + + mutex_lock(&kctx->jctx.lock); + kbase_gpu_vm_lock(kctx); + mutex_lock(&kctx->jit_evict_lock); + list_for_each_entry(reg, &kctx->jit_active_head, jit_node) { + int err; + size_t freed = 0u; + + err = kbase_mem_jit_trim_pages_from_region(kctx, reg, + SIZE_MAX, &freed, false); + + if (err) { + /* Failed to calculate, try the next region */ + continue; + } + + data->active_value += freed; + } + mutex_unlock(&kctx->jit_evict_lock); + kbase_gpu_vm_unlock(kctx); + mutex_unlock(&kctx->jctx.lock); + + return 0; +} + +KBASE_JIT_DEBUGFS_DECLARE(kbase_jit_debugfs_trim_fops, + kbase_jit_debugfs_trim_get); +#endif /* MALI_JIT_PRESSURE_LIMIT */ + void kbase_jit_debugfs_init(struct kbase_context *kctx) { + /* prevent unprivileged use of debug file system + * in old kernel version + */ +#if (KERNEL_VERSION(4, 7, 0) <= LINUX_VERSION_CODE) + /* only for newer kernel version debug file system is safe */ + const mode_t mode = 0444; +#else + const mode_t mode = 0400; +#endif + /* Caller already ensures this, but we keep the pattern for * maintenance safety. */ @@ -2942,23 +3059,41 @@ void kbase_jit_debugfs_init(struct kbase_context *kctx) WARN_ON(IS_ERR_OR_NULL(kctx->kctx_dentry))) return; + + /* Debugfs entry for getting the number of JIT allocations. */ - debugfs_create_file("mem_jit_count", S_IRUGO, kctx->kctx_dentry, + debugfs_create_file("mem_jit_count", mode, kctx->kctx_dentry, kctx, &kbase_jit_debugfs_count_fops); /* * Debugfs entry for getting the total number of virtual pages * used by JIT allocations. */ - debugfs_create_file("mem_jit_vm", S_IRUGO, kctx->kctx_dentry, + debugfs_create_file("mem_jit_vm", mode, kctx->kctx_dentry, kctx, &kbase_jit_debugfs_vm_fops); /* * Debugfs entry for getting the number of physical pages used * by JIT allocations. */ - debugfs_create_file("mem_jit_phys", S_IRUGO, kctx->kctx_dentry, + debugfs_create_file("mem_jit_phys", mode, kctx->kctx_dentry, kctx, &kbase_jit_debugfs_phys_fops); +#if MALI_JIT_PRESSURE_LIMIT + /* + * Debugfs entry for getting the number of pages used + * by JIT allocations for estimating the physical pressure + * limit. + */ + debugfs_create_file("mem_jit_used", mode, kctx->kctx_dentry, + kctx, &kbase_jit_debugfs_used_fops); + + /* + * Debugfs entry for getting the number of pages that could + * be trimmed to free space for more JIT allocations. + */ + debugfs_create_file("mem_jit_trim", mode, kctx->kctx_dentry, + kctx, &kbase_jit_debugfs_trim_fops); +#endif /* MALI_JIT_PRESSURE_LIMIT */ } #endif /* CONFIG_DEBUG_FS */ @@ -3003,8 +3138,8 @@ int kbase_jit_init(struct kbase_context *kctx) INIT_LIST_HEAD(&kctx->jit_destroy_head); INIT_WORK(&kctx->jit_work, kbase_jit_destroy_worker); - INIT_LIST_HEAD(&kctx->jit_pending_alloc); - INIT_LIST_HEAD(&kctx->jit_atoms_head); + INIT_LIST_HEAD(&kctx->jctx.jit_atoms_head); + INIT_LIST_HEAD(&kctx->jctx.jit_pending_alloc); mutex_unlock(&kctx->jit_evict_lock); kctx->jit_max_allocations = 0; @@ -3019,7 +3154,7 @@ int kbase_jit_init(struct kbase_context *kctx) * the alignment requirements. */ static bool meet_size_and_tiler_align_top_requirements(struct kbase_context *kctx, - struct kbase_va_region *walker, struct base_jit_alloc_info *info) + struct kbase_va_region *walker, const struct base_jit_alloc_info *info) { bool meet_reqs = true; @@ -3036,8 +3171,175 @@ static bool meet_size_and_tiler_align_top_requirements(struct kbase_context *kct return meet_reqs; } +#if MALI_JIT_PRESSURE_LIMIT +/* Function will guarantee *@freed will not exceed @pages_needed + */ +static int kbase_mem_jit_trim_pages_from_region(struct kbase_context *kctx, + struct kbase_va_region *reg, size_t pages_needed, + size_t *freed, bool shrink) +{ + int err = 0; + size_t available_pages = 0u; + const size_t old_pages = kbase_reg_current_backed_size(reg); + size_t new_pages = old_pages; + size_t to_free = 0u; + size_t max_allowed_pages = old_pages; + + lockdep_assert_held(&kctx->jctx.lock); + lockdep_assert_held(&kctx->reg_lock); + + /* Is this a JIT allocation that has been reported on? */ + if (reg->used_pages == reg->nr_pages) + goto out; + + if (!(reg->flags & KBASE_REG_HEAP_INFO_IS_SIZE)) { + /* For address based memory usage calculation, the GPU + * allocates objects of up to size 's', but aligns every object + * to alignment 'a', with a < s. + * + * It also doesn't have to write to all bytes in an object of + * size 's'. + * + * Hence, we can observe the GPU's address for the end of used + * memory being up to (s - a) bytes into the first unallocated + * page. + * + * We allow for this and only warn when it exceeds this bound + * (rounded up to page sized units). Note, this is allowed to + * exceed reg->nr_pages. + */ + max_allowed_pages += PFN_UP( + KBASE_GPU_ALLOCATED_OBJECT_MAX_BYTES - + KBASE_GPU_ALLOCATED_OBJECT_ALIGN_BYTES); + } else if (reg->flags & KBASE_REG_TILER_ALIGN_TOP) { + /* The GPU could report being ready to write to the next + * 'extent' sized chunk, but didn't actually write to it, so we + * can report up to 'extent' size pages more than the backed + * size. + * + * Note, this is allowed to exceed reg->nr_pages. + */ + max_allowed_pages += reg->extent; + + /* Also note that in these GPUs, the GPU may make a large (>1 + * page) initial allocation but not actually write out to all + * of it. Hence it might report that a much higher amount of + * memory was used than actually was written to. This does not + * result in a real warning because on growing this memory we + * round up the size of the allocation up to an 'extent' sized + * chunk, hence automatically bringing the backed size up to + * the reported size. + */ + } + + if (old_pages < reg->used_pages) { + /* Prevent overflow on available_pages, but only report the + * problem if it's in a scenario where used_pages should have + * been consistent with the backed size + * + * Note: In case of a size-based report, this legitimately + * happens in common use-cases: we allow for up to this size of + * memory being used, but depending on the content it doesn't + * have to use all of it. + * + * Hence, we're much more quiet about that in the size-based + * report case - it's not indicating a real problem, it's just + * for information + */ + if (max_allowed_pages < reg->used_pages) { + if (!(reg->flags & KBASE_REG_HEAP_INFO_IS_SIZE)) + dev_warn(kctx->kbdev->dev, + "%s: current backed pages %zu < reported used pages %zu (allowed to be up to %zu) on JIT 0x%llx vapages %zu\n", + __func__, + old_pages, reg->used_pages, + max_allowed_pages, + reg->start_pfn << PAGE_SHIFT, + reg->nr_pages); + else + dev_dbg(kctx->kbdev->dev, + "%s: no need to trim, current backed pages %zu < reported used pages %zu on size-report for JIT 0x%llx vapages %zu\n", + __func__, + old_pages, reg->used_pages, + reg->start_pfn << PAGE_SHIFT, + reg->nr_pages); + } + /* In any case, no error condition to report here, caller can + * try other regions + */ + + goto out; + } + available_pages = old_pages - reg->used_pages; + to_free = min(available_pages, pages_needed); + + if (shrink) { + new_pages -= to_free; + + err = kbase_mem_shrink(kctx, reg, new_pages); + } +out: + trace_mali_jit_trim_from_region(reg, to_free, old_pages, + available_pages, new_pages); + *freed = to_free; + return err; +} + + +/** + * kbase_mem_jit_trim_pages - Trim JIT regions until sufficient pages have been + * freed + * @kctx: Pointer to the kbase context whose active JIT allocations will be + * checked. + * @pages_needed: The maximum number of pages to trim. + * + * This functions checks all active JIT allocations in @kctx for unused pages + * at the end, and trim the backed memory regions of those allocations down to + * the used portion and free the unused pages into the page pool. + * + * Specifying @pages_needed allows us to stop early when there's enough + * physical memory freed to sufficiently bring down the total JIT physical page + * usage (e.g. to below the pressure limit) + * + * Return: Total number of successfully freed pages + */ +static size_t kbase_mem_jit_trim_pages(struct kbase_context *kctx, + size_t pages_needed) +{ + struct kbase_va_region *reg, *tmp; + size_t total_freed = 0; + + kbase_gpu_vm_lock(kctx); + mutex_lock(&kctx->jit_evict_lock); + list_for_each_entry_safe(reg, tmp, &kctx->jit_active_head, jit_node) { + int err; + size_t freed = 0u; + + err = kbase_mem_jit_trim_pages_from_region(kctx, reg, + pages_needed, &freed, true); + + if (err) { + /* Failed to trim, try the next region */ + continue; + } + + total_freed += freed; + WARN_ON(freed > pages_needed); + pages_needed -= freed; + if (!pages_needed) + break; + } + mutex_unlock(&kctx->jit_evict_lock); + kbase_gpu_vm_unlock(kctx); + + trace_mali_jit_trim(total_freed); + + return total_freed; +} +#endif /* MALI_JIT_PRESSURE_LIMIT */ + static int kbase_jit_grow(struct kbase_context *kctx, - struct base_jit_alloc_info *info, struct kbase_va_region *reg) + const struct base_jit_alloc_info *info, + struct kbase_va_region *reg) { size_t delta; size_t pages_required; @@ -3190,10 +3492,133 @@ static void trace_jit_stats(struct kbase_context *kctx, max_allocations, alloc_count, va_pages, ph_pages); } -struct kbase_va_region *kbase_jit_allocate(struct kbase_context *kctx, - struct base_jit_alloc_info *info) +#if MALI_JIT_PRESSURE_LIMIT +/** + * get_jit_backed_pressure() - calculate the physical backing of all JIT + * allocations + * + * @kctx: Pointer to the kbase context whose active JIT allocations will be + * checked + * + * Return: number of pages that are committed by JIT allocations + */ +static size_t get_jit_backed_pressure(struct kbase_context *kctx) { - struct kbase_va_region *reg = NULL; + size_t backed_pressure = 0; + int jit_id; + + lockdep_assert_held(&kctx->jctx.lock); + + kbase_gpu_vm_lock(kctx); + for (jit_id = 0; jit_id <= BASE_JIT_ALLOC_COUNT; jit_id++) { + struct kbase_va_region *reg = kctx->jit_alloc[jit_id]; + + if (reg && (reg != KBASE_RESERVED_REG_JIT_ALLOC)) { + /* If region has no report, be pessimistic */ + if (reg->used_pages == reg->nr_pages) { + backed_pressure += reg->nr_pages; + } else { + backed_pressure += + kbase_reg_current_backed_size(reg); + } + } + } + kbase_gpu_vm_unlock(kctx); + + return backed_pressure; +} + +/** + * jit_trim_necessary_pages() - calculate and trim the least pages possible to + * satisfy a new JIT allocation + * + * @kctx: Pointer to the kbase context + * @info: Pointer to JIT allocation information for the new allocation + * + * Before allocating a new just-in-time memory region or reusing a previous + * one, ensure that the total JIT physical page usage also will not exceed the + * pressure limit. + * + * If there are no reported-on allocations, then we already guarantee this will + * be the case - because our current pressure then only comes from the va_pages + * of each JIT region, hence JIT physical page usage is guaranteed to be + * bounded by this. + * + * However as soon as JIT allocations become "reported on", the pressure is + * lowered to allow new JIT regions to be allocated. It is after such a point + * that the total JIT physical page usage could (either now or in the future on + * a grow-on-GPU-page-fault) exceed the pressure limit, but only on newly + * allocated JIT regions. Hence, trim any "reported on" regions. + * + * Any pages freed will go into the pool and be allocated from there in + * kbase_mem_alloc(). + */ +static void jit_trim_necessary_pages(struct kbase_context *kctx, + const struct base_jit_alloc_info *info) +{ + size_t backed_pressure = 0; + size_t needed_pages = 0; + + backed_pressure = get_jit_backed_pressure(kctx); + + /* It is possible that this is the case - if this is the first + * allocation after "ignore_pressure_limit" allocation. + */ + if (backed_pressure > kctx->jit_phys_pages_limit) { + needed_pages += + (backed_pressure - kctx->jit_phys_pages_limit) + + info->va_pages; + } else { + size_t backed_diff = + kctx->jit_phys_pages_limit - backed_pressure; + + if (info->va_pages > backed_diff) + needed_pages += info->va_pages - backed_diff; + } + + if (needed_pages) { + size_t trimmed_pages = kbase_mem_jit_trim_pages(kctx, + needed_pages); + + /* This should never happen - we already asserted that + * we are not violating JIT pressure limit in earlier + * checks, which means that in-flight JIT allocations + * must have enough unused pages to satisfy the new + * allocation + */ + WARN_ON(trimmed_pages < needed_pages); + } +} +#endif /* MALI_JIT_PRESSURE_LIMIT */ + +/** + * jit_allow_allocate() - check whether basic conditions are satisfied to allow + * a new JIT allocation + * + * @kctx: Pointer to the kbase context + * @info: Pointer to JIT allocation information for the new allocation + * @ignore_pressure_limit: Flag to indicate whether JIT pressure limit check + * should be ignored + * + * Return: true if allocation can be executed, false otherwise + */ +static bool jit_allow_allocate(struct kbase_context *kctx, + const struct base_jit_alloc_info *info, + bool ignore_pressure_limit) +{ + lockdep_assert_held(&kctx->jctx.lock); + +#if MALI_JIT_PRESSURE_LIMIT + if (likely(!ignore_pressure_limit) && + ((kctx->jit_phys_pages_limit <= kctx->jit_current_phys_pressure) || + (info->va_pages > (kctx->jit_phys_pages_limit - kctx->jit_current_phys_pressure)))) { + dev_dbg(kctx->kbdev->dev, + "Max JIT page allocations limit reached: active pages %llu, max pages %llu\n", + kctx->jit_current_phys_pressure + info->va_pages, + kctx->jit_phys_pages_limit); + return false; + } +#endif /* MALI_JIT_PRESSURE_LIMIT */ if (kctx->jit_current_allocations >= kctx->jit_max_allocations) { /* Too many current allocations */ @@ -3201,8 +3626,9 @@ struct kbase_va_region *kbase_jit_allocate(struct kbase_context *kctx, "Max JIT allocations limit reached: active allocations %d, max allocations %d\n", kctx->jit_current_allocations, kctx->jit_max_allocations); - return NULL; + return false; } + if (info->max_allocations > 0 && kctx->jit_current_allocations_per_bin[info->bin_id] >= info->max_allocations) { @@ -3212,9 +3638,28 @@ struct kbase_va_region *kbase_jit_allocate(struct kbase_context *kctx, info->bin_id, kctx->jit_current_allocations_per_bin[info->bin_id], info->max_allocations); - return NULL; + return false; } + return true; +} + +struct kbase_va_region *kbase_jit_allocate(struct kbase_context *kctx, + const struct base_jit_alloc_info *info, + bool ignore_pressure_limit) +{ + struct kbase_va_region *reg = NULL; + + lockdep_assert_held(&kctx->jctx.lock); + + if (!jit_allow_allocate(kctx, info, ignore_pressure_limit)) + return NULL; + +#if MALI_JIT_PRESSURE_LIMIT + if (!ignore_pressure_limit) + jit_trim_necessary_pages(kctx, info); +#endif /* MALI_JIT_PRESSURE_LIMIT */ + mutex_lock(&kctx->jit_evict_lock); /* @@ -3325,7 +3770,10 @@ struct kbase_va_region *kbase_jit_allocate(struct kbase_context *kctx, dev_dbg(kctx->kbdev->dev, "JIT allocation resize failed: va_pages 0x%llx, commit_pages 0x%llx\n", info->va_pages, info->commit_pages); - goto update_failed_unlocked; + mutex_lock(&kctx->jit_evict_lock); + list_move(®->jit_node, &kctx->jit_pool_head); + mutex_unlock(&kctx->jit_evict_lock); + return NULL; } } else { /* No suitable JIT allocation was found so create a new one */ @@ -3351,7 +3799,7 @@ struct kbase_va_region *kbase_jit_allocate(struct kbase_context *kctx, dev_dbg(kctx->kbdev->dev, "Failed to allocate JIT memory: va_pages 0x%llx, commit_pages 0x%llx\n", info->va_pages, info->commit_pages); - goto out_unlocked; + return NULL; } mutex_lock(&kctx->jit_evict_lock); @@ -3359,6 +3807,8 @@ struct kbase_va_region *kbase_jit_allocate(struct kbase_context *kctx, mutex_unlock(&kctx->jit_evict_lock); } + trace_mali_jit_alloc(reg, info->id); + kctx->jit_current_allocations++; kctx->jit_current_allocations_per_bin[info->bin_id]++; @@ -3366,21 +3816,24 @@ struct kbase_va_region *kbase_jit_allocate(struct kbase_context *kctx, reg->jit_usage_id = info->usage_id; reg->jit_bin_id = info->bin_id; +#if MALI_JIT_PRESSURE_LIMIT + if (info->flags & BASE_JIT_ALLOC_HEAP_INFO_IS_SIZE) + reg->flags = reg->flags | KBASE_REG_HEAP_INFO_IS_SIZE; + reg->heap_info_gpu_addr = info->heap_info_gpu_addr; + kbase_jit_report_update_pressure(kctx, reg, info->va_pages, + KBASE_JIT_REPORT_ON_ALLOC_OR_FREE); +#endif /* MALI_JIT_PRESSURE_LIMIT */ return reg; - -update_failed_unlocked: - mutex_lock(&kctx->jit_evict_lock); - list_move(®->jit_node, &kctx->jit_pool_head); - mutex_unlock(&kctx->jit_evict_lock); -out_unlocked: - return NULL; } void kbase_jit_free(struct kbase_context *kctx, struct kbase_va_region *reg) { u64 old_pages; + /* JIT id not immediately available here, so use 0u */ + trace_mali_jit_free(reg, 0u); + /* Get current size of JIT region */ old_pages = kbase_reg_current_backed_size(reg); if (reg->initial_commit < old_pages) { @@ -3391,19 +3844,16 @@ void kbase_jit_free(struct kbase_context *kctx, struct kbase_va_region *reg) div_u64(old_pages * (100 - kctx->trim_level), 100)); u64 delta = old_pages - new_size; - if (delta) { - kbase_mem_shrink_cpu_mapping(kctx, reg, old_pages-delta, - old_pages); - kbase_mem_shrink_gpu_mapping(kctx, reg, old_pages-delta, - old_pages); - - kbase_free_phy_pages_helper(reg->cpu_alloc, delta); - if (reg->cpu_alloc != reg->gpu_alloc) - kbase_free_phy_pages_helper(reg->gpu_alloc, - delta); - } + if (delta) + kbase_mem_shrink(kctx, reg, old_pages - delta); } +#if MALI_JIT_PRESSURE_LIMIT + reg->heap_info_gpu_addr = 0; + kbase_jit_report_update_pressure(kctx, reg, 0, + KBASE_JIT_REPORT_ON_ALLOC_OR_FREE); +#endif /* MALI_JIT_PRESSURE_LIMIT */ + kctx->jit_current_allocations--; kctx->jit_current_allocations_per_bin[reg->jit_bin_id]--; @@ -3522,6 +3972,89 @@ void kbase_jit_term(struct kbase_context *kctx) cancel_work_sync(&kctx->jit_work); } +#if MALI_JIT_PRESSURE_LIMIT +void kbase_trace_jit_report_gpu_mem_trace_enabled(struct kbase_context *kctx, + struct kbase_va_region *reg, unsigned int flags) +{ + /* Offset to the location used for a JIT report within the GPU memory + * + * This constants only used for this debugging function - not useful + * anywhere else in kbase + */ + const u64 jit_report_gpu_mem_offset = sizeof(u64)*2; + + u64 addr_start; + struct kbase_vmap_struct mapping; + u64 *ptr; + + if (reg->heap_info_gpu_addr == 0ull) + goto out; + + /* Nothing else to trace in the case the memory just contains the + * size. Other tracepoints already record the relevant area of memory. + */ + if (reg->flags & KBASE_REG_HEAP_INFO_IS_SIZE) + goto out; + + addr_start = reg->heap_info_gpu_addr - jit_report_gpu_mem_offset; + + ptr = kbase_vmap(kctx, addr_start, KBASE_JIT_REPORT_GPU_MEM_SIZE, + &mapping); + if (!ptr) { + dev_warn(kctx->kbdev->dev, + "%s: JIT start=0x%llx unable to map memory near end pointer %llx\n", + __func__, reg->start_pfn << PAGE_SHIFT, + addr_start); + goto out; + } + + trace_mali_jit_report_gpu_mem(addr_start, reg->start_pfn << PAGE_SHIFT, + ptr, flags); + + kbase_vunmap(kctx, &mapping); +out: + return; +} +#endif /* MALI_JIT_PRESSURE_LIMIT */ + +#if MALI_JIT_PRESSURE_LIMIT +void kbase_jit_report_update_pressure(struct kbase_context *kctx, + struct kbase_va_region *reg, u64 new_used_pages, + unsigned int flags) +{ + u64 diff; + + lockdep_assert_held(&kctx->jctx.lock); + + trace_mali_jit_report_pressure(reg, new_used_pages, + kctx->jit_current_phys_pressure + new_used_pages - + reg->used_pages, + flags); + + if (WARN_ON(new_used_pages > reg->nr_pages)) + return; + + if (reg->used_pages > new_used_pages) { + /* We reduced the number of used pages */ + diff = reg->used_pages - new_used_pages; + + if (!WARN_ON(diff > kctx->jit_current_phys_pressure)) + kctx->jit_current_phys_pressure -= diff; + + reg->used_pages = new_used_pages; + } else { + /* We increased the number of used pages */ + diff = new_used_pages - reg->used_pages; + + if (!WARN_ON(diff > U64_MAX - kctx->jit_current_phys_pressure)) + kctx->jit_current_phys_pressure += diff; + + reg->used_pages = new_used_pages; + } + +} +#endif /* MALI_JIT_PRESSURE_LIMIT */ + bool kbase_has_exec_va_zone(struct kbase_context *kctx) { bool has_exec_va_zone; @@ -3707,6 +4240,46 @@ static void kbase_jd_user_buf_unmap(struct kbase_context *kctx, alloc->nents = 0; } +int kbase_mem_copy_to_pinned_user_pages(struct page **dest_pages, + void *src_page, size_t *to_copy, unsigned int nr_pages, + unsigned int *target_page_nr, size_t offset) +{ + void *target_page = kmap(dest_pages[*target_page_nr]); + size_t chunk = PAGE_SIZE-offset; + + if (!target_page) { + pr_err("%s: kmap failure", __func__); + return -ENOMEM; + } + + chunk = min(chunk, *to_copy); + + memcpy(target_page + offset, src_page, chunk); + *to_copy -= chunk; + + kunmap(dest_pages[*target_page_nr]); + + *target_page_nr += 1; + if (*target_page_nr >= nr_pages || *to_copy == 0) + return 0; + + target_page = kmap(dest_pages[*target_page_nr]); + if (!target_page) { + pr_err("%s: kmap failure", __func__); + return -ENOMEM; + } + + KBASE_DEBUG_ASSERT(target_page); + + chunk = min(offset, *to_copy); + memcpy(target_page, src_page + PAGE_SIZE-offset, chunk); + *to_copy -= chunk; + + kunmap(dest_pages[*target_page_nr]); + + return 0; +} + struct kbase_mem_phy_alloc *kbase_map_external_resource( struct kbase_context *kctx, struct kbase_va_region *reg, struct mm_struct *locked_mm) diff --git a/drivers/gpu/arm/bifrost/mali_kbase_mem.h b/drivers/gpu/arm/bifrost/mali_kbase_mem.h index 4fb406db0e2b..6e921ece883f 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_mem.h +++ b/drivers/gpu/arm/bifrost/mali_kbase_mem.h @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2010-2019 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010-2020 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -145,6 +145,7 @@ struct kbase_mem_phy_alloc { struct dma_buf_attachment *dma_attachment; unsigned int current_mapping_usage_count; struct sg_table *sgt; + bool need_sync; } umm; struct { u64 stride; @@ -183,6 +184,19 @@ struct kbase_mem_phy_alloc { */ #define PINNED_ON_IMPORT (1<<31) +/** + * enum kbase_jit_report_flags - Flags for just-in-time memory allocation + * pressure limit functions + * @KBASE_JIT_REPORT_ON_ALLOC_OR_FREE: Notifying about an update happening due + * to a just-in-time memory allocation or free + * + * Used to control flow within pressure limit related functions, or to provide + * extra debugging information + */ +enum kbase_jit_report_flags { + KBASE_JIT_REPORT_ON_ALLOC_OR_FREE = (1u << 0) +}; + static inline void kbase_mem_phy_alloc_gpu_mapped(struct kbase_mem_phy_alloc *alloc) { KBASE_DEBUG_ASSERT(alloc); @@ -235,18 +249,35 @@ static inline struct kbase_mem_phy_alloc *kbase_mem_phy_alloc_put(struct kbase_m /** * A GPU memory region, and attributes for CPU mappings. + * + * @rblink: Node in a red-black tree of memory regions within the same zone of + * the GPU's virtual address space. + * @link: Links to neighboring items in a list of growable memory regions + * that triggered incremental rendering by growing too much. + * @rbtree: Backlink to the red-black tree of memory regions. + * @start_pfn: The Page Frame Number in GPU virtual address space. + * @nr_pages: The size of the region in pages. + * @initial_commit: Initial commit, for aligning the start address and + * correctly growing KBASE_REG_TILER_ALIGN_TOP regions. + * @threshold_pages: If non-zero and the amount of memory committed to a region + * that can grow on page fault exceeds this number of pages + * then the driver switches to incremental rendering. + * @extent: Number of pages allocated on page fault. + * @cpu_alloc: The physical memory we mmap to the CPU when mapping this region. + * @gpu_alloc: The physical memory we mmap to the GPU when mapping this region. + * @jit_node: Links to neighboring regions in the just-in-time memory pool. + * @jit_usage_id: The last just-in-time memory usage ID for this region. + * @jit_bin_id: The just-in-time memory bin this region came from. + * @va_refcnt: Number of users of this region. Protected by reg_lock. */ struct kbase_va_region { struct rb_node rblink; struct list_head link; - - struct rb_root *rbtree; /* Backlink to rb tree */ - - u64 start_pfn; /* The PFN in GPU space */ + struct rb_root *rbtree; + u64 start_pfn; size_t nr_pages; - /* Initial commit, for aligning the start address and correctly growing - * KBASE_REG_TILER_ALIGN_TOP regions */ size_t initial_commit; + size_t threshold_pages; /* Free region */ #define KBASE_REG_FREE (1ul << 0) @@ -331,6 +362,11 @@ struct kbase_va_region { */ #define KBASE_REG_VA_FREED (1ul << 26) +/* If set, the heap info address points to a u32 holding the used size in bytes; + * otherwise it points to a u64 holding the lowest address of unused memory. + */ +#define KBASE_REG_HEAP_INFO_IS_SIZE (1ul << 27) + #define KBASE_REG_ZONE_SAME_VA KBASE_REG_ZONE(0) /* only used with 32-bit clients */ @@ -356,22 +392,47 @@ struct kbase_va_region { unsigned long flags; - - size_t extent; /* nr of pages alloc'd on PF */ - - struct kbase_mem_phy_alloc *cpu_alloc; /* the one alloc object we mmap to the CPU when mapping this region */ - struct kbase_mem_phy_alloc *gpu_alloc; /* the one alloc object we mmap to the GPU when mapping this region */ - - /* List head used to store the region in the JIT allocation pool */ + size_t extent; + struct kbase_mem_phy_alloc *cpu_alloc; + struct kbase_mem_phy_alloc *gpu_alloc; struct list_head jit_node; - /* The last JIT usage ID for this region */ u16 jit_usage_id; - /* The JIT bin this allocation came from */ u8 jit_bin_id; +#if MALI_JIT_PRESSURE_LIMIT + /* Pointer to an object in GPU memory defining an end of an allocated + * region + * + * The object can be one of: + * - u32 value defining the size of the region + * - u64 pointer first unused byte in the region + * + * The interpretation of the object depends on + * BASE_JIT_ALLOC_HEAP_INFO_IS_SIZE flag in jit_info_flags - if it is + * set, the heap info object should be interpreted as size. + */ + u64 heap_info_gpu_addr; - int va_refcnt; /* number of users of this va */ + /* The current estimate of the number of pages used, which in normal + * use is either: + * - the initial estimate == va_pages + * - the actual pages used, as found by a JIT usage report + * + * Note that since the value is calculated from GPU memory after a JIT + * usage report, at any point in time it is allowed to take a random + * value that is no greater than va_pages (e.g. it may be greater than + * gpu_alloc->nents) + */ + size_t used_pages; +#endif /* MALI_JIT_PRESSURE_LIMIT */ + + int va_refcnt; }; +/* Special marker for failed JIT allocations that still must be marked as + * in-use + */ +#define KBASE_RESERVED_REG_JIT_ALLOC ((struct kbase_va_region *)-1) + static inline bool kbase_is_region_free(struct kbase_va_region *reg) { return (!reg || reg->flags & KBASE_REG_FREE); @@ -410,6 +471,8 @@ static inline struct kbase_va_region *kbase_va_region_alloc_get( WARN_ON(!region->va_refcnt); /* non-atomic as kctx->reg_lock is held */ + dev_dbg(kctx->kbdev->dev, "va_refcnt %d before get %p\n", + region->va_refcnt, (void *)region); region->va_refcnt++; return region; @@ -425,6 +488,8 @@ static inline struct kbase_va_region *kbase_va_region_alloc_put( /* non-atomic as kctx->reg_lock is held */ region->va_refcnt--; + dev_dbg(kctx->kbdev->dev, "va_refcnt %d after put %p\n", + region->va_refcnt, (void *)region); if (!region->va_refcnt) kbase_region_refcnt_free(region); @@ -904,21 +969,27 @@ struct page *kbase_mem_alloc_page(struct kbase_mem_pool *pool); int kbase_region_tracker_init(struct kbase_context *kctx); /** - * kbase_region_tracker_init_jit - Initialize the JIT region - * @kctx: kbase context - * @jit_va_pages: Size of the JIT region in pages - * @max_allocations: Maximum number of allocations allowed for the JIT region - * @trim_level: Trim level for the JIT region - * @group_id: The physical group ID from which to allocate JIT memory. - * Valid range is 0..(MEMORY_GROUP_MANAGER_NR_GROUPS-1). + * kbase_region_tracker_init_jit - Initialize the just-in-time memory + * allocation region + * @kctx: Kbase context. + * @jit_va_pages: Size of the JIT region in pages. + * @max_allocations: Maximum number of allocations allowed for the JIT region. + * Valid range is 0..%BASE_JIT_ALLOC_COUNT. + * @trim_level: Trim level for the JIT region. + * Valid range is 0..%BASE_JIT_MAX_TRIM_LEVEL. + * @group_id: The physical group ID from which to allocate JIT memory. + * Valid range is 0..(%MEMORY_GROUP_MANAGER_NR_GROUPS-1). + * @phys_pages_limit: Maximum number of physical pages to use to back the JIT + * region. Must not exceed @jit_va_pages. * * Return: 0 if success, negative error code otherwise. */ int kbase_region_tracker_init_jit(struct kbase_context *kctx, u64 jit_va_pages, - u8 max_allocations, u8 trim_level, int group_id); + int max_allocations, int trim_level, int group_id, + u64 phys_pages_limit); /** - * kbase_region_tracker_init_exec - Initialize the EXEC_VA region + * kbase_region_tracker_init_exec - Initialize the GPU-executable memory region * @kctx: kbase context * @exec_va_pages: Size of the JIT region in pages. * It must not be greater than 4 GB. @@ -1008,72 +1079,6 @@ void kbase_gpu_vm_unlock(struct kbase_context *kctx); int kbase_alloc_phy_pages(struct kbase_va_region *reg, size_t vsize, size_t size); -/** - * kbase_mmu_init - Initialise an object representing GPU page tables - * - * The structure should be terminated using kbase_mmu_term() - * - * @kbdev: Instance of GPU platform device, allocated from the probe method. - * @mmut: GPU page tables to be initialized. - * @kctx: Optional kbase context, may be NULL if this set of MMU tables - * is not associated with a context. - * @group_id: The physical group ID from which to allocate GPU page tables. - * Valid range is 0..(MEMORY_GROUP_MANAGER_NR_GROUPS-1). - * - * Return: 0 if successful, otherwise a negative error code. - */ -int kbase_mmu_init(struct kbase_device *kbdev, struct kbase_mmu_table *mmut, - struct kbase_context *kctx, int group_id); -/** - * kbase_mmu_term - Terminate an object representing GPU page tables - * - * This will free any page tables that have been allocated - * - * @kbdev: Instance of GPU platform device, allocated from the probe method. - * @mmut: GPU page tables to be destroyed. - */ -void kbase_mmu_term(struct kbase_device *kbdev, struct kbase_mmu_table *mmut); - -/** - * kbase_mmu_create_ate - Create an address translation entry - * - * @kbdev: Instance of GPU platform device, allocated from the probe method. - * @phy: Physical address of the page to be mapped for GPU access. - * @flags: Bitmask of attributes of the GPU memory region being mapped. - * @level: Page table level for which to build an address translation entry. - * @group_id: The physical memory group in which the page was allocated. - * Valid range is 0..(MEMORY_GROUP_MANAGER_NR_GROUPS-1). - * - * This function creates an address translation entry to encode the physical - * address of a page to be mapped for access by the GPU, along with any extra - * attributes required for the GPU memory region. - * - * Return: An address translation entry, either in LPAE or AArch64 format - * (depending on the driver's configuration). - */ -u64 kbase_mmu_create_ate(struct kbase_device *kbdev, - struct tagged_addr phy, unsigned long flags, int level, int group_id); - -int kbase_mmu_insert_pages_no_flush(struct kbase_device *kbdev, - struct kbase_mmu_table *mmut, - const u64 start_vpfn, - struct tagged_addr *phys, size_t nr, - unsigned long flags, int group_id); -int kbase_mmu_insert_pages(struct kbase_device *kbdev, - struct kbase_mmu_table *mmut, u64 vpfn, - struct tagged_addr *phys, size_t nr, - unsigned long flags, int as_nr, int group_id); -int kbase_mmu_insert_single_page(struct kbase_context *kctx, u64 vpfn, - struct tagged_addr phys, size_t nr, - unsigned long flags, int group_id); - -int kbase_mmu_teardown_pages(struct kbase_device *kbdev, - struct kbase_mmu_table *mmut, u64 vpfn, - size_t nr, int as_nr); -int kbase_mmu_update_pages(struct kbase_context *kctx, u64 vpfn, - struct tagged_addr *phys, size_t nr, - unsigned long flags, int const group_id); - /** * @brief Register region and map it on the GPU. * @@ -1161,8 +1166,6 @@ int kbase_sync_now(struct kbase_context *kctx, struct basep_syncset *sset); void kbase_sync_single(struct kbase_context *kctx, struct tagged_addr cpu_pa, struct tagged_addr gpu_pa, off_t offset, size_t size, enum kbase_sync_type sync_fn); -void kbase_pre_job_sync(struct kbase_context *kctx, struct base_syncset *syncsets, size_t nr); -void kbase_post_job_sync(struct kbase_context *kctx, struct base_syncset *syncsets, size_t nr); /* OS specific functions */ int kbase_mem_free(struct kbase_context *kctx, u64 gpu_addr); @@ -1388,20 +1391,6 @@ static inline void kbase_clear_dma_addr(struct page *p) ClearPagePrivate(p); } -/** - * kbase_mmu_interrupt_process - Process a bus or page fault. - * @kbdev The kbase_device the fault happened on - * @kctx The kbase_context for the faulting address space if one was found. - * @as The address space that has the fault - * @fault Data relating to the fault - * - * This function will process a fault on a specific address space - */ -void kbase_mmu_interrupt_process(struct kbase_device *kbdev, - struct kbase_context *kctx, struct kbase_as *as, - struct kbase_fault *fault); - - /** * @brief Process a page fault. * @@ -1468,11 +1457,13 @@ int kbase_jit_init(struct kbase_context *kctx); * kbase_jit_allocate - Allocate JIT memory * @kctx: kbase context * @info: JIT allocation information + * @ignore_pressure_limit: Whether the JIT memory pressure limit is ignored * * Return: JIT allocation on success or NULL on failure. */ struct kbase_va_region *kbase_jit_allocate(struct kbase_context *kctx, - struct base_jit_alloc_info *info); + const struct base_jit_alloc_info *info, + bool ignore_pressure_limit); /** * kbase_jit_free - Free a JIT allocation @@ -1506,6 +1497,73 @@ bool kbase_jit_evict(struct kbase_context *kctx); */ void kbase_jit_term(struct kbase_context *kctx); +#if MALI_JIT_PRESSURE_LIMIT +/** + * kbase_trace_jit_report_gpu_mem_trace_enabled - variant of + * kbase_trace_jit_report_gpu_mem() that should only be called once the + * corresponding tracepoint is verified to be enabled + * @kctx: kbase context + * @reg: Just-in-time memory region to trace + * @flags: combination of values from enum kbase_jit_report_flags + */ +void kbase_trace_jit_report_gpu_mem_trace_enabled(struct kbase_context *kctx, + struct kbase_va_region *reg, unsigned int flags); +#endif /* MALI_JIT_PRESSURE_LIMIT */ + +/** + * kbase_trace_jit_report_gpu_mem - Trace information about the GPU memory used + * to make a JIT report + * @kctx: kbase context + * @reg: Just-in-time memory region to trace + * @flags: combination of values from enum kbase_jit_report_flags + * + * Information is traced using the trace_mali_jit_report_gpu_mem() tracepoint. + * + * In case that tracepoint is not enabled, this function should have the same + * low overheads as a tracepoint itself (i.e. use of 'jump labels' to avoid + * conditional branches) + * + * This can take the reg_lock on @kctx, do not use in places where this lock is + * already held. + * + * Note: this has to be a macro because at this stage the tracepoints have not + * been included. Also gives no opportunity for the compiler to mess up + * inlining it. + */ +#if MALI_JIT_PRESSURE_LIMIT +#define kbase_trace_jit_report_gpu_mem(kctx, reg, flags) \ + do { \ + if (trace_mali_jit_report_gpu_mem_enabled()) \ + kbase_trace_jit_report_gpu_mem_trace_enabled( \ + (kctx), (reg), (flags)); \ + } while (0) +#else +#define kbase_trace_jit_report_gpu_mem(kctx, reg, flags) \ + CSTD_NOP(kctx, reg, flags) +#endif /* MALI_JIT_PRESSURE_LIMIT */ + +#if MALI_JIT_PRESSURE_LIMIT +/** + * kbase_jit_report_update_pressure - safely update the JIT physical page + * pressure and JIT region's estimate of used_pages + * @kctx: kbase context, to update the current physical pressure + * @reg: Just-in-time memory region to update with @new_used_pages + * @new_used_pages: new value of number of pages used in the JIT region + * @flags: combination of values from enum kbase_jit_report_flags + * + * Takes care of: + * - correctly updating the pressure given the current reg->used_pages and + * new_used_pages + * - then updating the %kbase_va_region used_pages member + * + * Precondition: + * - new_used_pages <= reg->nr_pages + */ +void kbase_jit_report_update_pressure(struct kbase_context *kctx, + struct kbase_va_region *reg, u64 new_used_pages, + unsigned int flags); +#endif /* MALI_JIT_PRESSURE_LIMIT */ + /** * kbase_has_exec_va_zone - EXEC_VA zone predicate * @@ -1684,4 +1742,30 @@ void kbase_mem_umm_unmap(struct kbase_context *kctx, int kbase_mem_do_sync_imported(struct kbase_context *kctx, struct kbase_va_region *reg, enum kbase_sync_type sync_fn); + +/** + * kbase_mem_copy_to_pinned_user_pages - Memcpy from source input page to + * an unaligned address at a given offset from the start of a target page. + * + * @dest_pages: Pointer to the array of pages to which the content is + * to be copied from the provided @src_page. + * @src_page: Pointer to the page which correspond to the source page + * from which the copying will take place. + * @to_copy: Total number of bytes pending to be copied from + * @src_page to @target_page_nr within @dest_pages. + * This will get decremented by number of bytes we + * managed to copy from source page to target pages. + * @nr_pages: Total number of pages present in @dest_pages. + * @target_page_nr: Target page number to which @src_page needs to be + * copied. This will get incremented by one if + * we are successful in copying from source page. + * @offset: Offset in bytes into the target pages from which the + * copying is to be performed. + * + * Return: 0 on success, or a negative error code. + */ +int kbase_mem_copy_to_pinned_user_pages(struct page **dest_pages, + void *src_page, size_t *to_copy, unsigned int nr_pages, + unsigned int *target_page_nr, size_t offset); + #endif /* _KBASE_MEM_H_ */ diff --git a/drivers/gpu/arm/bifrost/mali_kbase_mem_linux.c b/drivers/gpu/arm/bifrost/mali_kbase_mem_linux.c index 71019c68b640..acaa82b9767b 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_mem_linux.c +++ b/drivers/gpu/arm/bifrost/mali_kbase_mem_linux.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2010-2019 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010-2020 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -46,8 +46,9 @@ #include #include -#include +#include #include +#include #if ((KERNEL_VERSION(5, 3, 0) <= LINUX_VERSION_CODE) || \ (KERNEL_VERSION(5, 0, 0) > LINUX_VERSION_CODE)) @@ -84,6 +85,8 @@ #define KBASE_MEM_ION_SYNC_WORKAROUND #endif +#define IR_THRESHOLD_STEPS (256u) + static int kbase_vmap_phy_pages(struct kbase_context *kctx, struct kbase_va_region *reg, u64 offset_bytes, size_t size, @@ -93,6 +96,10 @@ static void kbase_vunmap_phy_pages(struct kbase_context *kctx, static int kbase_tracking_page_setup(struct kbase_context *kctx, struct vm_area_struct *vma); +static int kbase_mem_shrink_gpu_mapping(struct kbase_context *kctx, + struct kbase_va_region *reg, + u64 new_pages, u64 old_pages); + /* Retrieve the associated region pointer if the GPU address corresponds to * one of the event memory pages. The enclosing region, if found, shouldn't * have been marked as free. @@ -278,7 +285,12 @@ struct kbase_va_region *kbase_mem_alloc(struct kbase_context *kctx, dev_dbg(dev, "Allocating %lld va_pages, %lld commit_pages, %lld extent, 0x%llX flags\n", va_pages, commit_pages, extent, *flags); - *gpu_va = 0; /* return 0 on failure */ + if (!(*flags & BASE_MEM_FLAG_MAP_FIXED)) + *gpu_va = 0; /* return 0 on failure */ + else + dev_err(dev, + "Keeping requested GPU VA of 0x%llx\n", + (unsigned long long)*gpu_va); if (!kbase_check_alloc_flags(*flags)) { dev_warn(dev, @@ -334,7 +346,9 @@ struct kbase_va_region *kbase_mem_alloc(struct kbase_context *kctx, zone = KBASE_REG_ZONE_CUSTOM_VA; } - reg = kbase_alloc_free_region(rbtree, 0, va_pages, zone); + reg = kbase_alloc_free_region(rbtree, PFN_DOWN(*gpu_va), + va_pages, zone); + if (!reg) { dev_err(dev, "Failed to allocate free region"); goto no_region; @@ -349,6 +363,15 @@ struct kbase_va_region *kbase_mem_alloc(struct kbase_context *kctx, goto prepare_failed; } + if (*flags & BASE_MEM_GROW_ON_GPF) { + unsigned int const ir_threshold = atomic_read( + &kctx->kbdev->memdev.ir_threshold); + + reg->threshold_pages = ((va_pages * ir_threshold) + + (IR_THRESHOLD_STEPS / 2)) / IR_THRESHOLD_STEPS; + } else + reg->threshold_pages = 0; + if (*flags & (BASE_MEM_GROW_ON_GPF|BASE_MEM_TILER_ALIGN_TOP)) { /* kbase_check_alloc_sizes() already checks extent is valid for * assigning to reg->extent */ @@ -404,7 +427,7 @@ struct kbase_va_region *kbase_mem_alloc(struct kbase_context *kctx, *gpu_va = (u64) cookie; } else /* we control the VA */ { - if (kbase_gpu_mmap(kctx, reg, 0, va_pages, 1) != 0) { + if (kbase_gpu_mmap(kctx, reg, *gpu_va, va_pages, 1) != 0) { dev_warn(dev, "Failed to map memory on GPU"); kbase_gpu_vm_unlock(kctx); goto no_mmap; @@ -1125,7 +1148,8 @@ int kbase_mem_umm_map(struct kbase_context *kctx, alloc->imported.umm.current_mapping_usage_count++; if (alloc->imported.umm.current_mapping_usage_count != 1) { - if (IS_ENABLED(CONFIG_MALI_DMA_BUF_LEGACY_COMPAT)) { + if (IS_ENABLED(CONFIG_MALI_DMA_BUF_LEGACY_COMPAT) || + alloc->imported.umm.need_sync) { if (!kbase_is_region_invalid_or_free(reg)) { err = kbase_mem_do_sync_imported(kctx, reg, KBASE_SYNC_TO_DEVICE); @@ -1196,7 +1220,8 @@ void kbase_mem_umm_unmap(struct kbase_context *kctx, { alloc->imported.umm.current_mapping_usage_count--; if (alloc->imported.umm.current_mapping_usage_count) { - if (IS_ENABLED(CONFIG_MALI_DMA_BUF_LEGACY_COMPAT)) { + if (IS_ENABLED(CONFIG_MALI_DMA_BUF_LEGACY_COMPAT) || + alloc->imported.umm.need_sync) { if (!kbase_is_region_invalid_or_free(reg)) { int err = kbase_mem_do_sync_imported(kctx, reg, KBASE_SYNC_TO_CPU); @@ -1261,6 +1286,7 @@ static struct kbase_va_region *kbase_mem_from_umm(struct kbase_context *kctx, struct dma_buf *dma_buf; struct dma_buf_attachment *dma_attachment; bool shared_zone = false; + bool need_sync = false; int group_id; /* 64-bit address range is the max */ @@ -1301,6 +1327,9 @@ static struct kbase_va_region *kbase_mem_from_umm(struct kbase_context *kctx, if (*flags & BASE_MEM_IMPORT_SHARED) shared_zone = true; + if (*flags & BASE_MEM_IMPORT_SYNC_ON_MAP_UNMAP) + need_sync = true; + #ifdef CONFIG_64BIT if (!kbase_ctx_flag(kctx, KCTX_COMPAT)) { /* @@ -1356,6 +1385,7 @@ static struct kbase_va_region *kbase_mem_from_umm(struct kbase_context *kctx, reg->gpu_alloc->imported.umm.dma_buf = dma_buf; reg->gpu_alloc->imported.umm.dma_attachment = dma_attachment; reg->gpu_alloc->imported.umm.current_mapping_usage_count = 0; + reg->gpu_alloc->imported.umm.need_sync = need_sync; reg->extent = 0; if (!IS_ENABLED(CONFIG_MALI_DMA_BUF_MAP_ON_DEMAND)) { @@ -1410,6 +1440,10 @@ static struct kbase_va_region *kbase_mem_from_user_buffer( struct kbase_alloc_import_user_buf *user_buf; struct page **pages = NULL; + /* Flag supported only for dma-buf imported memory */ + if (*flags & BASE_MEM_IMPORT_SYNC_ON_MAP_UNMAP) + return NULL; + if ((address & (cache_line_alignment - 1)) != 0 || (size & (cache_line_alignment - 1)) != 0) { if (*flags & BASE_MEM_UNCACHED_GPU) { @@ -1964,9 +1998,22 @@ void kbase_mem_shrink_cpu_mapping(struct kbase_context *kctx, (old_pages - new_pages)<cpu_alloc, delta); - if (reg->cpu_alloc != reg->gpu_alloc) - kbase_free_phy_pages_helper(reg->gpu_alloc, delta); } out_unlock: @@ -2104,6 +2137,43 @@ int kbase_mem_commit(struct kbase_context *kctx, u64 gpu_addr, u64 new_pages) return res; } +int kbase_mem_shrink(struct kbase_context *const kctx, + struct kbase_va_region *const reg, u64 const new_pages) +{ + u64 delta, old_pages; + int err; + + lockdep_assert_held(&kctx->reg_lock); + + if (WARN_ON(!kctx)) + return -EINVAL; + + if (WARN_ON(!reg)) + return -EINVAL; + + old_pages = kbase_reg_current_backed_size(reg); + if (WARN_ON(old_pages < new_pages)) + return -EINVAL; + + delta = old_pages - new_pages; + + /* Update the GPU mapping */ + err = kbase_mem_shrink_gpu_mapping(kctx, reg, + new_pages, old_pages); + if (err >= 0) { + /* Update all CPU mapping(s) */ + kbase_mem_shrink_cpu_mapping(kctx, reg, + new_pages, old_pages); + + kbase_free_phy_pages_helper(reg->cpu_alloc, delta); + if (reg->cpu_alloc != reg->gpu_alloc) + kbase_free_phy_pages_helper(reg->gpu_alloc, delta); + } + + return err; +} + + static void kbase_cpu_vm_open(struct vm_area_struct *vma) { struct kbase_cpu_mapping *map = vma->vm_private_data; @@ -2149,8 +2219,6 @@ static void kbase_cpu_vm_close(struct vm_area_struct *vma) kfree(map); } -KBASE_EXPORT_TEST_API(kbase_cpu_vm_close); - static struct kbase_aliased *get_aliased_alloc(struct vm_area_struct *vma, struct kbase_va_region *reg, pgoff_t *start_off, @@ -2866,6 +2934,20 @@ void kbase_vunmap(struct kbase_context *kctx, struct kbase_vmap_struct *map) } KBASE_EXPORT_TEST_API(kbase_vunmap); +static void kbasep_add_mm_counter(struct mm_struct *mm, int member, long value) +{ +#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 19, 0)) + /* To avoid the build breakage due to an unexported kernel symbol + * 'mm_trace_rss_stat' from later kernels, i.e. from V4.19.0 onwards, + * we inline here the equivalent of 'add_mm_counter()' from linux + * kernel V5.4.0~8. + */ + atomic_long_add(value, &mm->rss_stat.count[member]); +#else + add_mm_counter(mm, member, value); +#endif +} + void kbasep_os_process_page_usage_update(struct kbase_context *kctx, int pages) { struct mm_struct *mm; @@ -2875,10 +2957,10 @@ void kbasep_os_process_page_usage_update(struct kbase_context *kctx, int pages) if (mm) { atomic_add(pages, &kctx->nonmapped_pages); #ifdef SPLIT_RSS_COUNTING - add_mm_counter(mm, MM_FILEPAGES, pages); + kbasep_add_mm_counter(mm, MM_FILEPAGES, pages); #else spin_lock(&mm->page_table_lock); - add_mm_counter(mm, MM_FILEPAGES, pages); + kbasep_add_mm_counter(mm, MM_FILEPAGES, pages); spin_unlock(&mm->page_table_lock); #endif } @@ -2903,10 +2985,10 @@ static void kbasep_os_process_page_usage_drain(struct kbase_context *kctx) pages = atomic_xchg(&kctx->nonmapped_pages, 0); #ifdef SPLIT_RSS_COUNTING - add_mm_counter(mm, MM_FILEPAGES, -pages); + kbasep_add_mm_counter(mm, MM_FILEPAGES, -pages); #else spin_lock(&mm->page_table_lock); - add_mm_counter(mm, MM_FILEPAGES, -pages); + kbasep_add_mm_counter(mm, MM_FILEPAGES, -pages); spin_unlock(&mm->page_table_lock); #endif } diff --git a/drivers/gpu/arm/bifrost/mali_kbase_mem_linux.h b/drivers/gpu/arm/bifrost/mali_kbase_mem_linux.h index 02f1c3b4bc5d..cd094b3d10bf 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_mem_linux.h +++ b/drivers/gpu/arm/bifrost/mali_kbase_mem_linux.h @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2010, 2012-2019 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010, 2012-2020 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -128,6 +128,18 @@ int kbase_mem_flags_change(struct kbase_context *kctx, u64 gpu_addr, unsigned in */ int kbase_mem_commit(struct kbase_context *kctx, u64 gpu_addr, u64 new_pages); +/** + * kbase_mem_shrink - Shrink the physical backing size of a region + * + * @kctx: The kernel context + * @reg: The GPU region + * @new_pages: Number of physical pages to back the region with + * + * Return: 0 on success or error code + */ +int kbase_mem_shrink(struct kbase_context *kctx, + struct kbase_va_region *reg, u64 new_pages); + /** * kbase_context_mmap - Memory map method, gets invoked when mmap system call is * issued on device file /dev/malixx. @@ -333,23 +345,6 @@ void kbase_mem_shrink_cpu_mapping(struct kbase_context *kctx, struct kbase_va_region *reg, u64 new_pages, u64 old_pages); -/** - * kbase_mem_shrink_gpu_mapping - Shrink the GPU mapping of an allocation - * @kctx: Context the region belongs to - * @reg: The GPU region or NULL if there isn't one - * @new_pages: The number of pages after the shrink - * @old_pages: The number of pages before the shrink - * - * Return: 0 on success, negative -errno on error - * - * Unmap the shrunk pages from the GPU mapping. Note that the size of the region - * itself is unmodified as we still need to reserve the VA, only the page tables - * will be modified by this function. - */ -int kbase_mem_shrink_gpu_mapping(struct kbase_context *kctx, - struct kbase_va_region *reg, - u64 new_pages, u64 old_pages); - /** * kbase_phy_alloc_mapping_term - Terminate the kernel side mapping of a * physical allocation diff --git a/drivers/gpu/arm/bifrost/mali_kbase_mem_pool_debugfs.c b/drivers/gpu/arm/bifrost/mali_kbase_mem_pool_debugfs.c index edb9cd441ec6..5879fdf85b1d 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_mem_pool_debugfs.c +++ b/drivers/gpu/arm/bifrost/mali_kbase_mem_pool_debugfs.c @@ -169,15 +169,23 @@ static const struct file_operations kbase_mem_pool_debugfs_max_size_fops = { void kbase_mem_pool_debugfs_init(struct dentry *parent, struct kbase_context *kctx) { - debugfs_create_file("mem_pool_size", S_IRUGO | S_IWUSR, parent, + /* prevent unprivileged use of debug file in old kernel version */ +#if (KERNEL_VERSION(4, 7, 0) <= LINUX_VERSION_CODE) + /* only for newer kernel version debug file system is safe */ + const mode_t mode = 0644; +#else + const mode_t mode = 0600; +#endif + + debugfs_create_file("mem_pool_size", mode, parent, &kctx->mem_pools.small, &kbase_mem_pool_debugfs_fops); - debugfs_create_file("mem_pool_max_size", S_IRUGO | S_IWUSR, parent, + debugfs_create_file("mem_pool_max_size", mode, parent, &kctx->mem_pools.small, &kbase_mem_pool_debugfs_max_size_fops); - debugfs_create_file("lp_mem_pool_size", S_IRUGO | S_IWUSR, parent, + debugfs_create_file("lp_mem_pool_size", mode, parent, &kctx->mem_pools.large, &kbase_mem_pool_debugfs_fops); - debugfs_create_file("lp_mem_pool_max_size", S_IRUGO | S_IWUSR, parent, + debugfs_create_file("lp_mem_pool_max_size", mode, parent, &kctx->mem_pools.large, &kbase_mem_pool_debugfs_max_size_fops); } diff --git a/drivers/gpu/arm/bifrost/mali_kbase_mipe_gen_header.h b/drivers/gpu/arm/bifrost/mali_kbase_mipe_gen_header.h index 99475b67479c..ec5212275751 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_mipe_gen_header.h +++ b/drivers/gpu/arm/bifrost/mali_kbase_mipe_gen_header.h @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2019 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010-2020 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -20,101 +20,198 @@ * */ +/* THIS FILE IS AUTOGENERATED BY mali_trace_generator.py. + * DO NOT EDIT. + */ + +/* clang-format off */ + #include "mali_kbase_mipe_proto.h" /** * This header generates MIPE tracepoint declaration BLOB at * compile time. * - * Before including this header, the following parameters - * must be defined: + * It is intentional that there is no header guard. + * The header could be included multiple times for + * different blobs compilation. * - * MIPE_HEADER_BLOB_VAR_NAME: the name of the variable - * where the result BLOB will be stored. - * - * MIPE_HEADER_TP_LIST: the list of tracepoints to process. - * It should be defined as follows: - * #define MIPE_HEADER_TP_LIST \ - * TP_DESC(FIRST_TRACEPOINT, "Some description", "@II", "first_arg,second_arg") \ - * TP_DESC(SECOND_TRACEPOINT, "Some description", "@II", "first_arg,second_arg") \ - * etc. - * Where the first argument is tracepoints name, the second - * argument is a short tracepoint description, the third argument - * argument types (see MIPE documentation), and the fourth argument - * is comma separated argument names. - * - * MIPE_HEADER_TP_LIST_COUNT: number of entries in MIPE_HEADER_TP_LIST. - * - * MIPE_HEADER_PKT_CLASS: MIPE packet class. + * Before including this header MIPE_HEADER_* parameters must be + * defined. See documentation below: */ +/** + * The name of the variable where the result BLOB will be stored. + */ #if !defined(MIPE_HEADER_BLOB_VAR_NAME) #error "MIPE_HEADER_BLOB_VAR_NAME must be defined!" #endif -#if !defined(MIPE_HEADER_TP_LIST) -#error "MIPE_HEADER_TP_LIST must be defined!" +/** + * A compiler attribute for the BLOB variable. + * + * e.g. __attribute__((section("my_section"))) + * + * Default value is no attribute. + */ +#if !defined(MIPE_HEADER_BLOB_VAR_ATTRIBUTE) +#define MIPE_HEADER_BLOB_VAR_ATTRIBUTE #endif -#if !defined(MIPE_HEADER_TP_LIST_COUNT) -#error "MIPE_HEADER_TP_LIST_COUNT must be defined!" +/** + * MIPE stream id. + * + * See enum tl_stream_id. + */ +#if !defined(MIPE_HEADER_STREAM_ID) +#error "MIPE_HEADER_STREAM_ID must be defined!" #endif +/** + * MIPE packet class. + * + * See enum tl_packet_class. + */ #if !defined(MIPE_HEADER_PKT_CLASS) #error "MIPE_HEADER_PKT_CLASS must be defined!" #endif -static const struct { +/** + * The list of tracepoints to process. + * + * It should be defined as follows: + * #define MIPE_HEADER_TRACEPOINT_LIST \ + * TRACEPOINT_DESC(FIRST_TRACEPOINT, "Some description", "@II", "first_arg,second_arg") \ + * TRACEPOINT_DESC(SECOND_TRACEPOINT, "Some description", "@II", "first_arg,second_arg") \ + * etc. + * + * Where the first argument is tracepoints name, the second + * argument is a short tracepoint description, the third argument + * argument types (see MIPE documentation), and the fourth argument + * is comma separated argument names. + */ +#if !defined(MIPE_HEADER_TRACEPOINT_LIST) +#error "MIPE_HEADER_TRACEPOINT_LIST must be defined!" +#endif + +/** + * The number of entries in MIPE_HEADER_TRACEPOINT_LIST. + */ +#if !defined(MIPE_HEADER_TRACEPOINT_LIST_SIZE) +#error "MIPE_HEADER_TRACEPOINT_LIST_SIZE must be defined!" +#endif + +/** + * The list of enums to process. + * + * It should be defined as follows: + * #define MIPE_HEADER_ENUM_LIST \ + * ENUM_DESC(enum_arg_name, enum_value) \ + * ENUM_DESC(enum_arg_name, enum_value) \ + * etc. + * + * Where enum_arg_name is the name of a tracepoint argument being used with + * this enum. enum_value is a valid C enum value. + * + * Default value is an empty list. + */ +#if defined(MIPE_HEADER_ENUM_LIST) + +/** + * Tracepoint message ID used for enums declaration. + */ +#if !defined(MIPE_HEADER_ENUM_MSG_ID) +#error "MIPE_HEADER_ENUM_MSG_ID must be defined!" +#endif + +#else +#define MIPE_HEADER_ENUM_LIST +#endif + +/** + * The MIPE tracepoint declaration BLOB. + */ +const struct +{ u32 _mipe_w0; u32 _mipe_w1; u8 _protocol_version; u8 _pointer_size; u32 _tp_count; -#define TP_DESC(name, desc, arg_types, arg_names) \ - struct { \ - u32 _name; \ - u32 _size_string_name; \ - char _string_name[sizeof(#name)]; \ - u32 _size_desc; \ - char _desc[sizeof(desc)]; \ - u32 _size_arg_types; \ - char _arg_types[sizeof(arg_types)]; \ - u32 _size_arg_names; \ - char _arg_names[sizeof(arg_names)]; \ +#define TRACEPOINT_DESC(name, desc, arg_types, arg_names) \ + struct { \ + u32 _name; \ + u32 _size_string_name; \ + char _string_name[sizeof(#name)]; \ + u32 _size_desc; \ + char _desc[sizeof(desc)]; \ + u32 _size_arg_types; \ + char _arg_types[sizeof(arg_types)]; \ + u32 _size_arg_names; \ + char _arg_names[sizeof(arg_names)]; \ } __attribute__ ((__packed__)) __ ## name; - MIPE_HEADER_TP_LIST -#undef TP_DESC +#define ENUM_DESC(arg_name, value) \ + struct { \ + u32 _msg_id; \ + u32 _arg_name_len; \ + char _arg_name[sizeof(#arg_name)]; \ + u32 _value; \ + u32 _value_str_len; \ + char _value_str[sizeof(#value)]; \ + } __attribute__ ((__packed__)) __ ## arg_name ## _ ## value; -} __attribute__ ((__packed__)) MIPE_HEADER_BLOB_VAR_NAME = { + MIPE_HEADER_TRACEPOINT_LIST + MIPE_HEADER_ENUM_LIST +#undef TRACEPOINT_DESC +#undef ENUM_DESC +} __attribute__((packed)) MIPE_HEADER_BLOB_VAR_NAME MIPE_HEADER_BLOB_VAR_ATTRIBUTE = { ._mipe_w0 = MIPE_PACKET_HEADER_W0( TL_PACKET_FAMILY_TL, MIPE_HEADER_PKT_CLASS, TL_PACKET_TYPE_HEADER, - 1), + MIPE_HEADER_STREAM_ID), ._mipe_w1 = MIPE_PACKET_HEADER_W1( sizeof(MIPE_HEADER_BLOB_VAR_NAME) - PACKET_HEADER_SIZE, 0), ._protocol_version = SWTRACE_VERSION, ._pointer_size = sizeof(void *), - ._tp_count = MIPE_HEADER_TP_LIST_COUNT, -#define TP_DESC(name, desc, arg_types, arg_names) \ - .__ ## name = { \ - ._name = name, \ - ._size_string_name = sizeof(#name), \ - ._string_name = #name, \ - ._size_desc = sizeof(desc), \ - ._desc = desc, \ - ._size_arg_types = sizeof(arg_types), \ - ._arg_types = arg_types, \ - ._size_arg_names = sizeof(arg_names), \ - ._arg_names = arg_names \ + ._tp_count = MIPE_HEADER_TRACEPOINT_LIST_SIZE, +#define TRACEPOINT_DESC(name, desc, arg_types, arg_names) \ + .__ ## name = { \ + ._name = name, \ + ._size_string_name = sizeof(#name), \ + ._string_name = #name, \ + ._size_desc = sizeof(desc), \ + ._desc = desc, \ + ._size_arg_types = sizeof(arg_types), \ + ._arg_types = arg_types, \ + ._size_arg_names = sizeof(arg_names), \ + ._arg_names = arg_names \ }, - MIPE_HEADER_TP_LIST -#undef TP_DESC +#define ENUM_DESC(arg_name, value) \ + .__ ## arg_name ## _ ## value = { \ + ._msg_id = MIPE_HEADER_ENUM_MSG_ID, \ + ._arg_name_len = sizeof(#arg_name), \ + ._arg_name = #arg_name, \ + ._value = value, \ + ._value_str_len = sizeof(#value), \ + ._value_str = #value \ + }, + + MIPE_HEADER_TRACEPOINT_LIST + MIPE_HEADER_ENUM_LIST +#undef TRACEPOINT_DESC +#undef ENUM_DESC }; #undef MIPE_HEADER_BLOB_VAR_NAME -#undef MIPE_HEADER_TP_LIST -#undef MIPE_HEADER_TP_LIST_COUNT +#undef MIPE_HEADER_BLOB_VAR_ATTRIBUTE +#undef MIPE_HEADER_STREAM_ID #undef MIPE_HEADER_PKT_CLASS +#undef MIPE_HEADER_TRACEPOINT_LIST +#undef MIPE_HEADER_TRACEPOINT_LIST_SIZE +#undef MIPE_HEADER_ENUM_LIST +#undef MIPE_HEADER_ENUM_MSG_ID + +/* clang-format on */ diff --git a/drivers/gpu/arm/bifrost/mali_kbase_mipe_proto.h b/drivers/gpu/arm/bifrost/mali_kbase_mipe_proto.h index 1a0b8b40c6c2..54667cfc6304 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_mipe_proto.h +++ b/drivers/gpu/arm/bifrost/mali_kbase_mipe_proto.h @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2010-2019 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010-2020 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -20,6 +20,12 @@ * */ +/* THIS FILE IS AUTOGENERATED BY mali_trace_generator.py. + * DO NOT EDIT. + */ + +/* clang-format off */ + #if !defined(_KBASE_MIPE_PROTO_H) #define _KBASE_MIPE_PROTO_H @@ -109,5 +115,13 @@ enum tl_packet_type { TL_PACKET_TYPE_SUMMARY = 2, /* stream's summary */ }; +/* Stream ID types (timeline family). */ +enum tl_stream_id { + TL_STREAM_ID_USER = 0, /* User-space driver Timeline stream. */ + TL_STREAM_ID_KERNEL = 1, /* Kernel-space driver Timeline stream. */ + TL_STREAM_ID_CSFFW = 2, /* CSF firmware driver Timeline stream. */ +}; + #endif /* _KBASE_MIPE_PROTO_H */ +/* clang-format on */ diff --git a/drivers/gpu/arm/bifrost/mali_kbase_pm.c b/drivers/gpu/arm/bifrost/mali_kbase_pm.c index 5699eb8feaf2..b9ed8c31033d 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_pm.c +++ b/drivers/gpu/arm/bifrost/mali_kbase_pm.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2010-2018 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010-2020 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -28,11 +28,16 @@ */ #include -#include +#include #include #include #include +#include + +#ifdef CONFIG_MALI_ARBITER_SUPPORT +#include +#endif /* CONFIG_MALI_ARBITER_SUPPORT */ int kbase_pm_powerup(struct kbase_device *kbdev, unsigned int flags) { @@ -46,27 +51,36 @@ void kbase_pm_halt(struct kbase_device *kbdev) void kbase_pm_context_active(struct kbase_device *kbdev) { - (void)kbase_pm_context_active_handle_suspend(kbdev, KBASE_PM_SUSPEND_HANDLER_NOT_POSSIBLE); + (void)kbase_pm_context_active_handle_suspend(kbdev, + KBASE_PM_SUSPEND_HANDLER_NOT_POSSIBLE); } -int kbase_pm_context_active_handle_suspend(struct kbase_device *kbdev, enum kbase_pm_suspend_handler suspend_handler) +int kbase_pm_context_active_handle_suspend(struct kbase_device *kbdev, + enum kbase_pm_suspend_handler suspend_handler) { - struct kbasep_js_device_data *js_devdata = &kbdev->js_data; int c; KBASE_DEBUG_ASSERT(kbdev != NULL); + dev_dbg(kbdev->dev, "%s - reason = %d, pid = %d\n", __func__, + suspend_handler, current->pid); + kbase_pm_lock(kbdev); - mutex_lock(&js_devdata->runpool_mutex); - mutex_lock(&kbdev->pm.lock); +#ifdef CONFIG_MALI_ARBITER_SUPPORT + if (kbase_arbiter_pm_ctx_active_handle_suspend(kbdev, suspend_handler)) + return 1; + + if (kbase_pm_is_suspending(kbdev) || + kbase_pm_is_gpu_lost(kbdev)) { +#else if (kbase_pm_is_suspending(kbdev)) { +#endif /* CONFIG_MALI_ARBITER_SUPPORT */ switch (suspend_handler) { case KBASE_PM_SUSPEND_HANDLER_DONT_REACTIVATE: if (kbdev->pm.active_count != 0) break; /* FALLTHROUGH */ case KBASE_PM_SUSPEND_HANDLER_DONT_INCREASE: - mutex_unlock(&kbdev->pm.lock); - mutex_unlock(&js_devdata->runpool_mutex); + kbase_pm_unlock(kbdev); return 1; case KBASE_PM_SUSPEND_HANDLER_NOT_POSSIBLE: @@ -77,16 +91,20 @@ int kbase_pm_context_active_handle_suspend(struct kbase_device *kbdev, enum kbas } } c = ++kbdev->pm.active_count; - KBASE_TRACE_ADD_REFCOUNT(kbdev, PM_CONTEXT_ACTIVE, NULL, NULL, 0u, c); + KBASE_KTRACE_ADD(kbdev, PM_CONTEXT_ACTIVE, NULL, c); if (c == 1) { - /* First context active: Power on the GPU and any cores requested by - * the policy */ + /* First context active: Power on the GPU and + * any cores requested by the policy + */ kbase_hwaccess_pm_gpu_active(kbdev); +#ifdef CONFIG_MALI_ARBITER_SUPPORT + kbase_arbiter_pm_vm_event(kbdev, KBASE_VM_REF_EVENT); +#endif /* CONFIG_MALI_ARBITER_SUPPORT */ } - mutex_unlock(&kbdev->pm.lock); - mutex_unlock(&js_devdata->runpool_mutex); + kbase_pm_unlock(kbdev); + dev_dbg(kbdev->dev, "%s %d\n", __func__, kbdev->pm.active_count); return 0; } @@ -95,17 +113,15 @@ KBASE_EXPORT_TEST_API(kbase_pm_context_active); void kbase_pm_context_idle(struct kbase_device *kbdev) { - struct kbasep_js_device_data *js_devdata = &kbdev->js_data; int c; KBASE_DEBUG_ASSERT(kbdev != NULL); - mutex_lock(&js_devdata->runpool_mutex); - mutex_lock(&kbdev->pm.lock); + kbase_pm_lock(kbdev); c = --kbdev->pm.active_count; - KBASE_TRACE_ADD_REFCOUNT(kbdev, PM_CONTEXT_IDLE, NULL, NULL, 0u, c); + KBASE_KTRACE_ADD(kbdev, PM_CONTEXT_IDLE, NULL, c); KBASE_DEBUG_ASSERT(c >= 0); @@ -113,20 +129,21 @@ void kbase_pm_context_idle(struct kbase_device *kbdev) /* Last context has gone idle */ kbase_hwaccess_pm_gpu_idle(kbdev); - /* Wake up anyone waiting for this to become 0 (e.g. suspend). The - * waiters must synchronize with us by locking the pm.lock after - * waiting. + /* Wake up anyone waiting for this to become 0 (e.g. suspend). + * The waiters must synchronize with us by locking the pm.lock + * after waiting. */ wake_up(&kbdev->pm.zero_active_count_wait); } - mutex_unlock(&kbdev->pm.lock); - mutex_unlock(&js_devdata->runpool_mutex); + kbase_pm_unlock(kbdev); + dev_dbg(kbdev->dev, "%s %d (pid = %d)\n", __func__, + kbdev->pm.active_count, current->pid); } KBASE_EXPORT_TEST_API(kbase_pm_context_idle); -void kbase_pm_suspend(struct kbase_device *kbdev) +void kbase_pm_driver_suspend(struct kbase_device *kbdev) { KBASE_DEBUG_ASSERT(kbdev); @@ -141,13 +158,31 @@ void kbase_pm_suspend(struct kbase_device *kbdev) kbase_hwcnt_context_disable(kbdev->hwcnt_gpu_ctx); mutex_lock(&kbdev->pm.lock); - KBASE_DEBUG_ASSERT(!kbase_pm_is_suspending(kbdev)); + if (WARN_ON(kbase_pm_is_suspending(kbdev))) { + mutex_unlock(&kbdev->pm.lock); + return; + } kbdev->pm.suspending = true; mutex_unlock(&kbdev->pm.lock); - /* From now on, the active count will drop towards zero. Sometimes, it'll - * go up briefly before going down again. However, once it reaches zero it - * will stay there - guaranteeing that we've idled all pm references */ +#ifdef CONFIG_MALI_ARBITER_SUPPORT + if (kbdev->arb.arb_if) { + int i; + unsigned long flags; + + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + kbase_disjoint_state_up(kbdev); + for (i = 0; i < kbdev->gpu_props.num_job_slots; i++) + kbase_job_slot_softstop(kbdev, i, NULL); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + } +#endif /* CONFIG_MALI_ARBITER_SUPPORT */ + + /* From now on, the active count will drop towards zero. Sometimes, + * it'll go up briefly before going down again. However, once + * it reaches zero it will stay there - guaranteeing that we've idled + * all pm references + */ /* Suspend job scheduler and associated components, so that it releases all * the PM active count references */ @@ -155,16 +190,29 @@ void kbase_pm_suspend(struct kbase_device *kbdev) /* Wait for the active count to reach zero. This is not the same as * waiting for a power down, since not all policies power down when this - * reaches zero. */ - wait_event(kbdev->pm.zero_active_count_wait, kbdev->pm.active_count == 0); + * reaches zero. + */ + dev_dbg(kbdev->dev, ">wait_event - waiting for active_count == 0 (pid = %d)\n", + current->pid); + wait_event(kbdev->pm.zero_active_count_wait, + kbdev->pm.active_count == 0); + dev_dbg(kbdev->dev, ">wait_event - waiting done\n"); /* NOTE: We synchronize with anything that was just finishing a - * kbase_pm_context_idle() call by locking the pm.lock below */ - + * kbase_pm_context_idle() call by locking the pm.lock below + */ kbase_hwaccess_pm_suspend(kbdev); + +#ifdef CONFIG_MALI_ARBITER_SUPPORT + if (kbdev->arb.arb_if) { + mutex_lock(&kbdev->pm.arb_vm_state->vm_state_lock); + kbase_arbiter_pm_vm_stopped(kbdev); + mutex_unlock(&kbdev->pm.arb_vm_state->vm_state_lock); + } +#endif /* CONFIG_MALI_ARBITER_SUPPORT */ } -void kbase_pm_resume(struct kbase_device *kbdev) +void kbase_pm_driver_resume(struct kbase_device *kbdev, bool arb_gpu_start) { unsigned long flags; @@ -172,18 +220,28 @@ void kbase_pm_resume(struct kbase_device *kbdev) kbase_hwaccess_pm_resume(kbdev); /* Initial active call, to power on the GPU/cores if needed */ +#ifdef CONFIG_MALI_ARBITER_SUPPORT + (void)kbase_pm_context_active_handle_suspend(kbdev, + (arb_gpu_start ? + KBASE_PM_SUSPEND_HANDLER_VM_GPU_GRANTED : + KBASE_PM_SUSPEND_HANDLER_NOT_POSSIBLE)); +#else kbase_pm_context_active(kbdev); +#endif /* Resume any blocked atoms (which may cause contexts to be scheduled in - * and dependent atoms to run) */ + * and dependent atoms to run) + */ kbase_resume_suspended_soft_jobs(kbdev); /* Resume the Job Scheduler and associated components, and start running - * atoms */ + * atoms + */ kbasep_js_resume(kbdev); /* Matching idle call, to power off the GPU/cores if we didn't actually - * need it and the policy doesn't want it on */ + * need it and the policy doesn't want it on + */ kbase_pm_context_idle(kbdev); /* Re-enable GPU hardware counters */ @@ -194,3 +252,27 @@ void kbase_pm_resume(struct kbase_device *kbdev) /* Resume vinstr */ kbase_vinstr_resume(kbdev->vinstr_ctx); } + +void kbase_pm_suspend(struct kbase_device *kbdev) +{ +#ifdef CONFIG_MALI_ARBITER_SUPPORT + if (kbdev->arb.arb_if) + kbase_arbiter_pm_vm_event(kbdev, KBASE_VM_OS_SUSPEND_EVENT); + else + kbase_pm_driver_suspend(kbdev); +#else + kbase_pm_driver_suspend(kbdev); +#endif /* CONFIG_MALI_ARBITER_SUPPORT */ +} + +void kbase_pm_resume(struct kbase_device *kbdev) +{ +#ifdef CONFIG_MALI_ARBITER_SUPPORT + if (kbdev->arb.arb_if) + kbase_arbiter_pm_vm_event(kbdev, KBASE_VM_OS_RESUME_EVENT); + else + kbase_pm_driver_resume(kbdev, false); +#else + kbase_pm_driver_resume(kbdev, false); +#endif /* CONFIG_MALI_ARBITER_SUPPORT */ +} diff --git a/drivers/gpu/arm/bifrost/mali_kbase_pm.h b/drivers/gpu/arm/bifrost/mali_kbase_pm.h index 59a031467c95..257f959cc5a4 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_pm.h +++ b/drivers/gpu/arm/bifrost/mali_kbase_pm.h @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2010-2015,2018 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010-2020 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -40,7 +40,8 @@ * * Must be called before any other power management function * - * @param kbdev The kbase device structure for the device (must be a valid pointer) + * @param kbdev The kbase device structure for the device + * (must be a valid pointer) * * @return 0 if the power management framework was successfully initialized. */ @@ -78,8 +79,9 @@ void kbase_pm_term(struct kbase_device *kbdev); /** Increment the count of active contexts. * - * This function should be called when a context is about to submit a job. It informs the active power policy that the - * GPU is going to be in use shortly and the policy is expected to start turning on the GPU. + * This function should be called when a context is about to submit a job. + * It informs the active power policy that the GPU is going to be in use shortly + * and the policy is expected to start turning on the GPU. * * This function will block until the GPU is available. * @@ -98,16 +100,24 @@ void kbase_pm_context_active(struct kbase_device *kbdev); /** Handler codes for doing kbase_pm_context_active_handle_suspend() */ enum kbase_pm_suspend_handler { /** A suspend is not expected/not possible - this is the same as - * kbase_pm_context_active() */ + * kbase_pm_context_active() + */ KBASE_PM_SUSPEND_HANDLER_NOT_POSSIBLE, /** If we're suspending, fail and don't increase the active count */ KBASE_PM_SUSPEND_HANDLER_DONT_INCREASE, - /** If we're suspending, succeed and allow the active count to increase iff - * it didn't go from 0->1 (i.e., we didn't re-activate the GPU). + /** If we're suspending, succeed and allow the active count to increase + * if it didn't go from 0->1 (i.e., we didn't re-activate the GPU). * * This should only be used when there is a bounded time on the activation - * (e.g. guarantee it's going to be idled very soon after) */ - KBASE_PM_SUSPEND_HANDLER_DONT_REACTIVATE + * (e.g. guarantee it's going to be idled very soon after) + */ + KBASE_PM_SUSPEND_HANDLER_DONT_REACTIVATE, +#ifdef CONFIG_MALI_ARBITER_SUPPORT + /** Special case when Arbiter has notified we can use GPU. + * Active count should always start at 0 in this case. + */ + KBASE_PM_SUSPEND_HANDLER_VM_GPU_GRANTED, +#endif /* CONFIG_MALI_ARBITER_SUPPORT */ }; /** Suspend 'safe' variant of kbase_pm_context_active() @@ -129,8 +139,9 @@ int kbase_pm_context_active_handle_suspend(struct kbase_device *kbdev, enum kbas /** Decrement the reference count of active contexts. * - * This function should be called when a context becomes idle. After this call the GPU may be turned off by the power - * policy so the calling code should ensure that it does not access the GPU's registers. + * This function should be called when a context becomes idle. + * After this call the GPU may be turned off by the power policy so the calling + * code should ensure that it does not access the GPU's registers. * * @param kbdev The kbase device structure for the device (must be a valid pointer) */ @@ -162,6 +173,8 @@ void kbase_pm_suspend(struct kbase_device *kbdev); * This is called in response to an OS resume event, and calls into the various * kbase components to complete the resume. * + * Also called when using VM arbiter, when GPU access has been granted. + * * @param kbdev The kbase device structure for the device (must be a valid pointer) */ void kbase_pm_resume(struct kbase_device *kbdev); @@ -177,4 +190,55 @@ void kbase_pm_resume(struct kbase_device *kbdev); */ void kbase_pm_vsync_callback(int buffer_updated, void *data); -#endif /* _KBASE_PM_H_ */ +/** + * kbase_pm_driver_suspend() - Put GPU and driver in suspend state + * @param kbdev The kbase device structure for the device + * (must be a valid pointer) + * + * Suspend the GPU and prevent any further register accesses to it from Kernel + * threads. + * + * This is called in response to an OS suspend event, and calls into the various + * kbase components to complete the suspend. + * + * Despite kbase_pm_suspend(), it will ignore to update Arbiter + * status if MALI_ARBITER_SUPPORT is enabled. + * + * @note the mechanisms used here rely on all user-space threads being frozen + * by the OS before we suspend. Otherwise, an IOCTL could occur that powers up + * the GPU e.g. via atom submission. + */ +void kbase_pm_driver_suspend(struct kbase_device *kbdev); + +/** + * kbase_pm_driver_resume() - Put GPU and driver in resume + * @param kbdev The kbase device structure for the device + * (must be a valid pointer) + * + * Resume the GPU, allow register accesses to it, and resume running atoms on + * the GPU. + * + * This is called in response to an OS resume event, and calls into the various + * kbase components to complete the resume. + * + * Also called when using VM arbiter, when GPU access has been granted. + * + * Despite kbase_pm_resume(), it will ignore to update Arbiter + * status if MALI_ARBITER_SUPPORT is enabled. + */ +void kbase_pm_driver_resume(struct kbase_device *kbdev, bool arb_gpu_start); + +#ifdef CONFIG_MALI_ARBITER_SUPPORT +/** + * kbase_pm_handle_gpu_lost() - Handle GPU Lost for the VM + * @kbdev: Device pointer + * + * Handles the case that the Arbiter has forced the GPU away from the VM, + * so that interrupts will not be received and registers are no longer + * accessible because replaced by dummy RAM. + * Kill any running tasks and put the driver into a GPU powered-off state. + */ +void kbase_pm_handle_gpu_lost(struct kbase_device *kbdev); +#endif /* CONFIG_MALI_ARBITER_SUPPORT */ + +#endif /* _KBASE_PM_H_ */ diff --git a/drivers/gpu/arm/bifrost/mali_kbase_regs_history_debugfs.c b/drivers/gpu/arm/bifrost/mali_kbase_regs_history_debugfs.c index d3017c10666e..8a349040d714 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_regs_history_debugfs.c +++ b/drivers/gpu/arm/bifrost/mali_kbase_regs_history_debugfs.c @@ -85,8 +85,8 @@ static int regs_history_show(struct seq_file *sfile, void *data) &h->buf[(h->count - iters + i) % h->size]; char const access = (io->addr & 1) ? 'w' : 'r'; - seq_printf(sfile, "%6i: %c: reg 0x%p val %08x\n", i, access, - (void *)(io->addr & ~0x1), io->value); + seq_printf(sfile, "%6i: %c: reg 0x%016lx val %08x\n", i, access, + (unsigned long)(io->addr & ~0x1), io->value); } spin_unlock_irqrestore(&h->lock, flags); diff --git a/drivers/gpu/arm/bifrost/mali_kbase_smc.c b/drivers/gpu/arm/bifrost/mali_kbase_smc.c index 3470f5800d39..b5c7b1289846 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_smc.c +++ b/drivers/gpu/arm/bifrost/mali_kbase_smc.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2015, 2018 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2015, 2018, 2020 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software diff --git a/drivers/gpu/arm/bifrost/mali_kbase_softjobs.c b/drivers/gpu/arm/bifrost/mali_kbase_softjobs.c index 4f8a1e93910a..40e80ae656c1 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_softjobs.c +++ b/drivers/gpu/arm/bifrost/mali_kbase_softjobs.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2011-2019 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2011-2020 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -33,7 +33,8 @@ #include #include #include -#include +#include +#include #include #include #include @@ -133,7 +134,7 @@ static int kbase_dump_cpu_gpu_time(struct kbase_jd_atom *katom) { struct kbase_vmap_struct map; void *user_result; - struct timespec ts; + struct timespec64 ts; struct base_dump_cpu_gpu_counters data; u64 system_time; u64 cycle_counter; @@ -719,47 +720,35 @@ static int kbase_debug_copy_prepare(struct kbase_jd_atom *katom) return ret; } -void kbase_mem_copy_from_extres_page(struct kbase_context *kctx, - void *extres_page, struct page **pages, unsigned int nr_pages, - unsigned int *target_page_nr, size_t offset, size_t *to_copy) +#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 6, 0) +static void *dma_buf_kmap_page(struct kbase_mem_phy_alloc *gpu_alloc, + unsigned long page_num, struct page **page) { - void *target_page = kmap(pages[*target_page_nr]); - size_t chunk = PAGE_SIZE-offset; + struct sg_table *sgt = gpu_alloc->imported.umm.sgt; + struct sg_page_iter sg_iter; + unsigned long page_index = 0; - lockdep_assert_held(&kctx->reg_lock); + if (WARN_ON(gpu_alloc->type != KBASE_MEM_TYPE_IMPORTED_UMM)) + return NULL; - if (!target_page) { - *target_page_nr += 1; - dev_warn(kctx->kbdev->dev, "kmap failed in debug_copy job."); - return; + if (!sgt) + return NULL; + + if (WARN_ON(page_num >= gpu_alloc->nents)) + return NULL; + + for_each_sg_page(sgt->sgl, &sg_iter, sgt->nents, 0) { + if (page_index == page_num) { + *page = sg_page_iter_page(&sg_iter); + + return kmap(*page); + } + page_index++; } - chunk = min(chunk, *to_copy); - - memcpy(target_page + offset, extres_page, chunk); - *to_copy -= chunk; - - kunmap(pages[*target_page_nr]); - - *target_page_nr += 1; - if (*target_page_nr >= nr_pages) - return; - - target_page = kmap(pages[*target_page_nr]); - if (!target_page) { - *target_page_nr += 1; - dev_warn(kctx->kbdev->dev, "kmap failed in debug_copy job."); - return; - } - - KBASE_DEBUG_ASSERT(target_page); - - chunk = min(offset, *to_copy); - memcpy(target_page, extres_page + PAGE_SIZE-offset, chunk); - *to_copy -= chunk; - - kunmap(pages[*target_page_nr]); + return NULL; } +#endif int kbase_mem_copy_from_extres(struct kbase_context *kctx, struct kbase_debug_copy_buffer *buf_data) @@ -785,22 +774,21 @@ int kbase_mem_copy_from_extres(struct kbase_context *kctx, switch (gpu_alloc->type) { case KBASE_MEM_TYPE_IMPORTED_USER_BUF: { - for (i = 0; i < buf_data->nr_extres_pages; i++) { + for (i = 0; i < buf_data->nr_extres_pages && + target_page_nr < buf_data->nr_pages; i++) { struct page *pg = buf_data->extres_pages[i]; void *extres_page = kmap(pg); - if (extres_page) - kbase_mem_copy_from_extres_page(kctx, - extres_page, pages, + if (extres_page) { + ret = kbase_mem_copy_to_pinned_user_pages( + pages, extres_page, &to_copy, buf_data->nr_pages, - &target_page_nr, - offset, &to_copy); - - kunmap(pg); - if (target_page_nr >= buf_data->nr_pages) - break; + &target_page_nr, offset); + kunmap(pg); + if (ret) + goto out_unlock; + } } - break; } break; case KBASE_MEM_TYPE_IMPORTED_UMM: { @@ -820,20 +808,28 @@ int kbase_mem_copy_from_extres(struct kbase_context *kctx, if (ret) goto out_unlock; - for (i = 0; i < dma_to_copy/PAGE_SIZE; i++) { - + for (i = 0; i < dma_to_copy/PAGE_SIZE && + target_page_nr < buf_data->nr_pages; i++) { +#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 6, 0) + struct page *pg; + void *extres_page = dma_buf_kmap_page(gpu_alloc, i, &pg); +#else void *extres_page = dma_buf_kmap(dma_buf, i); - - if (extres_page) - kbase_mem_copy_from_extres_page(kctx, - extres_page, pages, +#endif + if (extres_page) { + ret = kbase_mem_copy_to_pinned_user_pages( + pages, extres_page, &to_copy, buf_data->nr_pages, - &target_page_nr, - offset, &to_copy); + &target_page_nr, offset); - dma_buf_kunmap(dma_buf, i, extres_page); - if (target_page_nr >= buf_data->nr_pages) - break; +#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 6, 0) + kunmap(pg); +#else + dma_buf_kunmap(dma_buf, i, extres_page); +#endif + if (ret) + goto out_unlock; + } } dma_buf_end_cpu_access(dma_buf, #if LINUX_VERSION_CODE < KERNEL_VERSION(4, 6, 0) && !defined(CONFIG_CHROMEOS) @@ -848,7 +844,6 @@ int kbase_mem_copy_from_extres(struct kbase_context *kctx, out_unlock: kbase_gpu_vm_unlock(kctx); return ret; - } static int kbase_debug_copy(struct kbase_jd_atom *katom) @@ -874,6 +869,7 @@ static int kbase_debug_copy(struct kbase_jd_atom *katom) int kbasep_jit_alloc_validate(struct kbase_context *kctx, struct base_jit_alloc_info *info) { + int j; /* If the ID is zero, then fail the job */ if (info->id == 0) return -EINVAL; @@ -886,46 +882,88 @@ int kbasep_jit_alloc_validate(struct kbase_context *kctx, if ((info->gpu_alloc_addr & KBASEP_JIT_ALLOC_GPU_ADDR_ALIGNMENT) != 0) return -EINVAL; - if (kctx->jit_version == 1) { - /* Old JIT didn't have usage_id, max_allocations, bin_id - * or padding, so force them to zero - */ - info->usage_id = 0; - info->max_allocations = 0; - info->bin_id = 0; - info->flags = 0; - memset(info->padding, 0, sizeof(info->padding)); - } else { - int j; + /* Interface version 2 (introduced with kernel driver version 11.5) + * onward has padding and a flags member to validate. + * + * Note: To support earlier versions the extra bytes will have been set + * to 0 by the caller. + */ - /* Check padding is all zeroed */ - for (j = 0; j < sizeof(info->padding); j++) { - if (info->padding[j] != 0) { - return -EINVAL; - } - } - - /* No bit other than TILER_ALIGN_TOP shall be set */ - if (info->flags & ~BASE_JIT_ALLOC_MEM_TILER_ALIGN_TOP) { + /* Check padding is all zeroed */ + for (j = 0; j < sizeof(info->padding); j++) { + if (info->padding[j] != 0) return -EINVAL; - } } + /* Only valid flags shall be set */ + if (info->flags & ~(BASE_JIT_ALLOC_VALID_FLAGS)) + return -EINVAL; + +#if !MALI_JIT_PRESSURE_LIMIT + /* If just-in-time memory allocation pressure limit feature is disabled, + * heap_info_gpu_addr must be zeroed-out + */ + if (info->heap_info_gpu_addr) + return -EINVAL; +#endif + + /* If BASE_JIT_ALLOC_HEAP_INFO_IS_SIZE is set, heap_info_gpu_addr + * cannot be 0 + */ + if ((info->flags & BASE_JIT_ALLOC_HEAP_INFO_IS_SIZE) && + !info->heap_info_gpu_addr) + return -EINVAL; + return 0; } + +#if (KERNEL_VERSION(3, 18, 63) > LINUX_VERSION_CODE) +#define offsetofend(TYPE, MEMBER) \ + (offsetof(TYPE, MEMBER) + sizeof(((TYPE *)0)->MEMBER)) +#endif + +/* + * Sizes of user data to copy for each just-in-time memory interface version + * + * In interface version 2 onwards this is the same as the struct size, allowing + * copying of arrays of structures from userspace. + * + * In interface version 1 the structure size was variable, and hence arrays of + * structures cannot be supported easily, and were not a feature present in + * version 1 anyway. + */ +static const size_t jit_info_copy_size_for_jit_version[] = { + /* in jit_version 1, the structure did not have any end padding, hence + * it could be a different size on 32 and 64-bit clients. We therefore + * do not copy past the last member + */ + [1] = offsetofend(struct base_jit_alloc_info_10_2, id), + [2] = sizeof(struct base_jit_alloc_info_11_5), + [3] = sizeof(struct base_jit_alloc_info) +}; + static int kbase_jit_allocate_prepare(struct kbase_jd_atom *katom) { - __user void *data = (__user void *)(uintptr_t) katom->jc; + __user u8 *data = (__user u8 *)(uintptr_t) katom->jc; struct base_jit_alloc_info *info; struct kbase_context *kctx = katom->kctx; struct kbase_device *kbdev = kctx->kbdev; u32 count; int ret; u32 i; + size_t jit_info_user_copy_size; - /* For backwards compatibility */ - if (katom->nr_extres == 0) + WARN_ON(kctx->jit_version >= + ARRAY_SIZE(jit_info_copy_size_for_jit_version)); + jit_info_user_copy_size = + jit_info_copy_size_for_jit_version[kctx->jit_version]; + WARN_ON(jit_info_user_copy_size > sizeof(*info)); + + /* For backwards compatibility, and to prevent reading more than 1 jit + * info struct on jit version 1 + */ + if (katom->nr_extres == 0 || kctx->jit_version == 1) katom->nr_extres = 1; count = katom->nr_extres; @@ -942,13 +980,21 @@ static int kbase_jit_allocate_prepare(struct kbase_jd_atom *katom) ret = -ENOMEM; goto fail; } - if (copy_from_user(info, data, sizeof(*info)*count) != 0) { - ret = -EINVAL; - goto free_info; - } + katom->softjob_data = info; - for (i = 0; i < count; i++, info++) { + for (i = 0; i < count; i++, info++, data += jit_info_user_copy_size) { + if (copy_from_user(info, data, jit_info_user_copy_size) != 0) { + ret = -EINVAL; + goto free_info; + } + /* Clear any remaining bytes when user struct is smaller than + * kernel struct. For jit version 1, this also clears the + * padding bytes + */ + memset(((u8 *)info) + jit_info_user_copy_size, 0, + sizeof(*info) - jit_info_user_copy_size); + ret = kbasep_jit_alloc_validate(kctx, info); if (ret) goto free_info; @@ -961,7 +1007,7 @@ static int kbase_jit_allocate_prepare(struct kbase_jd_atom *katom) katom->jit_blocked = false; lockdep_assert_held(&kctx->jctx.lock); - list_add_tail(&katom->jit_node, &kctx->jit_atoms_head); + list_add_tail(&katom->jit_node, &kctx->jctx.jit_atoms_head); /* * Note: @@ -998,7 +1044,7 @@ static void kbase_jit_add_to_pending_alloc_list(struct kbase_jd_atom *katom) struct list_head *target_list_head = NULL; struct kbase_jd_atom *entry; - list_for_each_entry(entry, &kctx->jit_pending_alloc, queue) { + list_for_each_entry(entry, &kctx->jctx.jit_pending_alloc, queue) { if (katom->age < entry->age) { target_list_head = &entry->queue; break; @@ -1006,7 +1052,7 @@ static void kbase_jit_add_to_pending_alloc_list(struct kbase_jd_atom *katom) } if (target_list_head == NULL) - target_list_head = &kctx->jit_pending_alloc; + target_list_head = &kctx->jctx.jit_pending_alloc; list_add_tail(&katom->queue, target_list_head); } @@ -1021,6 +1067,10 @@ static int kbase_jit_allocate_process(struct kbase_jd_atom *katom) u64 *ptr, new_addr; u32 count = katom->nr_extres; u32 i; + bool ignore_pressure_limit = false; + + trace_sysgraph(SGR_SUBMIT, kctx->id, + kbase_jd_atom_id(kctx, katom)); if (katom->jit_blocked) { list_del(&katom->queue); @@ -1041,6 +1091,15 @@ static int kbase_jit_allocate_process(struct kbase_jd_atom *katom) } } +#if MALI_JIT_PRESSURE_LIMIT + /** + * If this is the only JIT_ALLOC atom in-flight then allow it to exceed + * the defined pressure limit. + */ + if (kctx->jit_current_allocations == 0) + ignore_pressure_limit = true; +#endif /* MALI_JIT_PRESSURE_LIMIT */ + for (i = 0, info = katom->softjob_data; i < count; i++, info++) { if (kctx->jit_alloc[info->id]) { /* The JIT ID is duplicated in this atom. Roll back @@ -1052,7 +1111,7 @@ static int kbase_jit_allocate_process(struct kbase_jd_atom *katom) for (j = 0; j < i; j++, info++) { kbase_jit_free(kctx, kctx->jit_alloc[info->id]); kctx->jit_alloc[info->id] = - (struct kbase_va_region *) -1; + KBASE_RESERVED_REG_JIT_ALLOC; } katom->event_code = BASE_JD_EVENT_MEM_GROWTH_FAILED; @@ -1060,17 +1119,14 @@ static int kbase_jit_allocate_process(struct kbase_jd_atom *katom) } /* Create a JIT allocation */ - reg = kbase_jit_allocate(kctx, info); + reg = kbase_jit_allocate(kctx, info, ignore_pressure_limit); if (!reg) { struct kbase_jd_atom *jit_atom; bool can_block = false; lockdep_assert_held(&kctx->jctx.lock); - jit_atom = list_first_entry(&kctx->jit_atoms_head, - struct kbase_jd_atom, jit_node); - - list_for_each_entry(jit_atom, &kctx->jit_atoms_head, jit_node) { + list_for_each_entry(jit_atom, &kctx->jctx.jit_atoms_head, jit_node) { if (jit_atom == katom) break; @@ -1097,7 +1153,7 @@ static int kbase_jit_allocate_process(struct kbase_jd_atom *katom) */ for (; i < count; i++, info++) { kctx->jit_alloc[info->id] = - (struct kbase_va_region *) -1; + KBASE_RESERVED_REG_JIT_ALLOC; } katom->event_code = BASE_JD_EVENT_MEM_GROWTH_FAILED; @@ -1164,6 +1220,9 @@ static int kbase_jit_allocate_process(struct kbase_jd_atom *katom) entry_mmu_flags, info->id, info->commit_pages, info->extent, info->va_pages); kbase_vunmap(kctx, &mapping); + + kbase_trace_jit_report_gpu_mem(kctx, reg, + KBASE_JIT_REPORT_ON_ALLOC_OR_FREE); } katom->event_code = BASE_JD_EVENT_DONE; @@ -1238,7 +1297,7 @@ static int kbase_jit_free_prepare(struct kbase_jd_atom *katom) for (i = 0; i < count; i++) KBASE_TLSTREAM_TL_ATTRIB_ATOM_JITFREEINFO(kbdev, katom, ids[i]); - list_add_tail(&katom->jit_node, &kctx->jit_atoms_head); + list_add_tail(&katom->jit_node, &kctx->jctx.jit_atoms_head); return 0; @@ -1272,7 +1331,7 @@ static void kbase_jit_free_process(struct kbase_jd_atom *katom) } } -static void kbasep_jit_free_finish_worker(struct work_struct *work) +static void kbasep_jit_finish_worker(struct work_struct *work) { struct kbase_jd_atom *katom = container_of(work, struct kbase_jd_atom, work); @@ -1288,11 +1347,29 @@ static void kbasep_jit_free_finish_worker(struct work_struct *work) kbase_js_sched_all(kctx->kbdev); } +void kbase_jit_retry_pending_alloc(struct kbase_context *kctx) +{ + LIST_HEAD(jit_pending_alloc_list); + struct list_head *i, *tmp; + + list_splice_tail_init(&kctx->jctx.jit_pending_alloc, + &jit_pending_alloc_list); + + list_for_each_safe(i, tmp, &jit_pending_alloc_list) { + struct kbase_jd_atom *pending_atom = list_entry(i, + struct kbase_jd_atom, queue); + if (kbase_jit_allocate_process(pending_atom) == 0) { + /* Atom has completed */ + INIT_WORK(&pending_atom->work, + kbasep_jit_finish_worker); + queue_work(kctx->jctx.job_done_wq, &pending_atom->work); + } + } +} + static void kbase_jit_free_finish(struct kbase_jd_atom *katom) { - struct list_head *i, *tmp; struct kbase_context *kctx = katom->kctx; - LIST_HEAD(jit_pending_alloc_list); u8 *ids; size_t j; @@ -1303,7 +1380,7 @@ static void kbase_jit_free_finish(struct kbase_jd_atom *katom) return; } - /* Remove this atom from the kctx->jit_atoms_head list */ + /* Remove this atom from the jit_atoms_head list */ list_del(&katom->jit_node); for (j = 0; j != katom->nr_extres; ++j) { @@ -1313,7 +1390,8 @@ static void kbase_jit_free_finish(struct kbase_jd_atom *katom) * still succeed this soft job but don't try and free * the allocation. */ - if (kctx->jit_alloc[ids[j]] != (struct kbase_va_region *) -1) { + if (kctx->jit_alloc[ids[j]] != + KBASE_RESERVED_REG_JIT_ALLOC) { KBASE_TLSTREAM_TL_JIT_USEDPAGES(kctx->kbdev, kctx->jit_alloc[ids[j]]-> gpu_alloc->nents, ids[j]); @@ -1325,18 +1403,7 @@ static void kbase_jit_free_finish(struct kbase_jd_atom *katom) /* Free the list of ids */ kfree(ids); - list_splice_tail_init(&kctx->jit_pending_alloc, &jit_pending_alloc_list); - - list_for_each_safe(i, tmp, &jit_pending_alloc_list) { - struct kbase_jd_atom *pending_atom = list_entry(i, - struct kbase_jd_atom, queue); - if (kbase_jit_allocate_process(pending_atom) == 0) { - /* Atom has completed */ - INIT_WORK(&pending_atom->work, - kbasep_jit_free_finish_worker); - queue_work(kctx->jctx.job_done_wq, &pending_atom->work); - } - } + kbase_jit_retry_pending_alloc(kctx); } static int kbase_ext_res_prepare(struct kbase_jd_atom *katom) @@ -1472,6 +1539,9 @@ int kbase_process_soft_job(struct kbase_jd_atom *katom) KBASE_TLSTREAM_TL_EVENT_ATOM_SOFTJOB_START(kbdev, katom); + trace_sysgraph(SGR_SUBMIT, kctx->id, + kbase_jd_atom_id(kctx, katom)); + switch (katom->core_req & BASE_JD_REQ_SOFT_JOB_TYPE) { case BASE_JD_REQ_SOFT_DUMP_CPU_GPU_TIME: ret = kbase_dump_cpu_gpu_time(katom); @@ -1634,6 +1704,9 @@ int kbase_prepare_soft_job(struct kbase_jd_atom *katom) void kbase_finish_soft_job(struct kbase_jd_atom *katom) { + trace_sysgraph(SGR_COMPLETE, katom->kctx->id, + kbase_jd_atom_id(katom->kctx, katom)); + switch (katom->core_req & BASE_JD_REQ_SOFT_JOB_TYPE) { case BASE_JD_REQ_SOFT_DUMP_CPU_GPU_TIME: /* Nothing to do */ diff --git a/drivers/gpu/arm/bifrost/mali_kbase_sync.h b/drivers/gpu/arm/bifrost/mali_kbase_sync.h index 65b72b9c57cc..37990c25cd91 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_sync.h +++ b/drivers/gpu/arm/bifrost/mali_kbase_sync.h @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2012-2019 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2012-2016, 2018-2019 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -206,6 +206,7 @@ void kbase_sync_fence_info_get(struct dma_fence *fence, */ const char *kbase_sync_status_string(int status); + /* * Internal worker used to continue processing of atom. */ diff --git a/drivers/gpu/arm/bifrost/mali_kbase_sync_common.c b/drivers/gpu/arm/bifrost/mali_kbase_sync_common.c index 03c0df596e06..2e1ede5bdb70 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_sync_common.c +++ b/drivers/gpu/arm/bifrost/mali_kbase_sync_common.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2012-2019 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2012-2016, 2018-2019 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software diff --git a/drivers/gpu/arm/bifrost/mali_kbase_trace_defs.h b/drivers/gpu/arm/bifrost/mali_kbase_trace_defs.h deleted file mode 100644 index 77fb8183a3d1..000000000000 --- a/drivers/gpu/arm/bifrost/mali_kbase_trace_defs.h +++ /dev/null @@ -1,261 +0,0 @@ -/* - * - * (C) COPYRIGHT 2011-2015,2018 ARM Limited. All rights reserved. - * - * This program is free software and is provided to you under the terms of the - * GNU General Public License version 2 as published by the Free Software - * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, you can access it online at - * http://www.gnu.org/licenses/gpl-2.0.html. - * - * SPDX-License-Identifier: GPL-2.0 - * - */ - - - -/* ***** IMPORTANT: THIS IS NOT A NORMAL HEADER FILE ***** - * ***** DO NOT INCLUDE DIRECTLY ***** - * ***** THE LACK OF HEADER GUARDS IS INTENTIONAL ***** */ - -/* - * The purpose of this header file is just to contain a list of trace code idenitifers - * - * Each identifier is wrapped in a macro, so that its string form and enum form can be created - * - * Each macro is separated with a comma, to allow insertion into an array initializer or enum definition block. - * - * This allows automatic creation of an enum and a corresponding array of strings - * - * Before #including, the includer MUST #define KBASE_TRACE_CODE_MAKE_CODE. - * After #including, the includer MUST #under KBASE_TRACE_CODE_MAKE_CODE. - * - * e.g.: - * #define KBASE_TRACE_CODE( X ) KBASE_TRACE_CODE_ ## X - * typedef enum - * { - * #define KBASE_TRACE_CODE_MAKE_CODE( X ) KBASE_TRACE_CODE( X ) - * #include "mali_kbase_trace_defs.h" - * #undef KBASE_TRACE_CODE_MAKE_CODE - * } kbase_trace_code; - * - * IMPORTANT: THIS FILE MUST NOT BE USED FOR ANY OTHER PURPOSE OTHER THAN THE ABOVE - * - * - * The use of the macro here is: - * - KBASE_TRACE_CODE_MAKE_CODE( X ) - * - * Which produces: - * - For an enum, KBASE_TRACE_CODE_X - * - For a string, "X" - * - * - * For example: - * - KBASE_TRACE_CODE_MAKE_CODE( JM_JOB_COMPLETE ) expands to: - * - KBASE_TRACE_CODE_JM_JOB_COMPLETE for the enum - * - "JM_JOB_COMPLETE" for the string - * - To use it to trace an event, do: - * - KBASE_TRACE_ADD( kbdev, JM_JOB_COMPLETE, subcode, kctx, uatom, val ); - */ - -#if 0 /* Dummy section to avoid breaking formatting */ -int dummy_array[] = { -#endif - -/* - * Core events - */ - /* no info_val, no gpu_addr, no atom */ - KBASE_TRACE_CODE_MAKE_CODE(CORE_CTX_DESTROY), - /* no info_val, no gpu_addr, no atom */ - KBASE_TRACE_CODE_MAKE_CODE(CORE_CTX_HWINSTR_TERM), - /* info_val == GPU_IRQ_STATUS register */ - KBASE_TRACE_CODE_MAKE_CODE(CORE_GPU_IRQ), - /* info_val == bits cleared */ - KBASE_TRACE_CODE_MAKE_CODE(CORE_GPU_IRQ_CLEAR), - /* info_val == GPU_IRQ_STATUS register */ - KBASE_TRACE_CODE_MAKE_CODE(CORE_GPU_IRQ_DONE), - KBASE_TRACE_CODE_MAKE_CODE(CORE_GPU_SOFT_RESET), - KBASE_TRACE_CODE_MAKE_CODE(CORE_GPU_HARD_RESET), - KBASE_TRACE_CODE_MAKE_CODE(CORE_GPU_PRFCNT_CLEAR), - /* GPU addr==dump address */ - KBASE_TRACE_CODE_MAKE_CODE(CORE_GPU_PRFCNT_SAMPLE), - KBASE_TRACE_CODE_MAKE_CODE(CORE_GPU_CLEAN_INV_CACHES), -/* - * Job Slot management events - */ - /* info_val==irq rawstat at start */ - KBASE_TRACE_CODE_MAKE_CODE(JM_IRQ), - /* info_val==jobs processed */ - KBASE_TRACE_CODE_MAKE_CODE(JM_IRQ_END), -/* In the following: - * - * - ctx is set if a corresponding job found (NULL otherwise, e.g. some soft-stop cases) - * - uatom==kernel-side mapped uatom address (for correlation with user-side) - */ - /* info_val==exit code; gpu_addr==chain gpuaddr */ - KBASE_TRACE_CODE_MAKE_CODE(JM_JOB_DONE), - /* gpu_addr==JS_HEAD_NEXT written, info_val==lower 32 bits of affinity */ - KBASE_TRACE_CODE_MAKE_CODE(JM_SUBMIT), - /* gpu_addr is as follows: - * - If JS_STATUS active after soft-stop, val==gpu addr written to - * JS_HEAD on submit - * - otherwise gpu_addr==0 */ - KBASE_TRACE_CODE_MAKE_CODE(JM_SOFTSTOP), - KBASE_TRACE_CODE_MAKE_CODE(JM_SOFTSTOP_0), - KBASE_TRACE_CODE_MAKE_CODE(JM_SOFTSTOP_1), - /* gpu_addr==JS_HEAD read */ - KBASE_TRACE_CODE_MAKE_CODE(JM_HARDSTOP), - /* gpu_addr==JS_HEAD read */ - KBASE_TRACE_CODE_MAKE_CODE(JM_HARDSTOP_0), - /* gpu_addr==JS_HEAD read */ - KBASE_TRACE_CODE_MAKE_CODE(JM_HARDSTOP_1), - /* gpu_addr==JS_TAIL read */ - KBASE_TRACE_CODE_MAKE_CODE(JM_UPDATE_HEAD), -/* gpu_addr is as follows: - * - If JS_STATUS active before soft-stop, val==JS_HEAD - * - otherwise gpu_addr==0 - */ - /* gpu_addr==JS_HEAD read */ - KBASE_TRACE_CODE_MAKE_CODE(JM_CHECK_HEAD), - KBASE_TRACE_CODE_MAKE_CODE(JM_FLUSH_WORKQS), - KBASE_TRACE_CODE_MAKE_CODE(JM_FLUSH_WORKQS_DONE), - /* info_val == is_scheduled */ - KBASE_TRACE_CODE_MAKE_CODE(JM_ZAP_NON_SCHEDULED), - /* info_val == is_scheduled */ - KBASE_TRACE_CODE_MAKE_CODE(JM_ZAP_SCHEDULED), - KBASE_TRACE_CODE_MAKE_CODE(JM_ZAP_DONE), - /* info_val == nr jobs submitted */ - KBASE_TRACE_CODE_MAKE_CODE(JM_SLOT_SOFT_OR_HARD_STOP), - /* gpu_addr==JS_HEAD_NEXT last written */ - KBASE_TRACE_CODE_MAKE_CODE(JM_SLOT_EVICT), - KBASE_TRACE_CODE_MAKE_CODE(JM_SUBMIT_AFTER_RESET), - KBASE_TRACE_CODE_MAKE_CODE(JM_BEGIN_RESET_WORKER), - KBASE_TRACE_CODE_MAKE_CODE(JM_END_RESET_WORKER), -/* - * Job dispatch events - */ - /* gpu_addr==value to write into JS_HEAD */ - KBASE_TRACE_CODE_MAKE_CODE(JD_DONE), - /* gpu_addr==value to write into JS_HEAD */ - KBASE_TRACE_CODE_MAKE_CODE(JD_DONE_WORKER), - /* gpu_addr==value to write into JS_HEAD */ - KBASE_TRACE_CODE_MAKE_CODE(JD_DONE_WORKER_END), - /* gpu_addr==value to write into JS_HEAD */ - KBASE_TRACE_CODE_MAKE_CODE(JD_DONE_TRY_RUN_NEXT_JOB), - /* gpu_addr==0, info_val==0, uatom==0 */ - KBASE_TRACE_CODE_MAKE_CODE(JD_ZAP_CONTEXT), - /* gpu_addr==value to write into JS_HEAD */ - KBASE_TRACE_CODE_MAKE_CODE(JD_CANCEL), - /* gpu_addr==value to write into JS_HEAD */ - KBASE_TRACE_CODE_MAKE_CODE(JD_CANCEL_WORKER), -/* - * Scheduler Core events - */ - KBASE_TRACE_CODE_MAKE_CODE(JS_RETAIN_CTX_NOLOCK), - /* gpu_addr==value to write into JS_HEAD */ - KBASE_TRACE_CODE_MAKE_CODE(JS_ADD_JOB), - /* gpu_addr==last value written/would be written to JS_HEAD */ - KBASE_TRACE_CODE_MAKE_CODE(JS_REMOVE_JOB), - KBASE_TRACE_CODE_MAKE_CODE(JS_RETAIN_CTX), - KBASE_TRACE_CODE_MAKE_CODE(JS_RELEASE_CTX), - KBASE_TRACE_CODE_MAKE_CODE(JS_TRY_SCHEDULE_HEAD_CTX), - /* gpu_addr==value to write into JS_HEAD */ - KBASE_TRACE_CODE_MAKE_CODE(JS_JOB_DONE_TRY_RUN_NEXT_JOB), - /* gpu_addr==value to write into JS_HEAD */ - KBASE_TRACE_CODE_MAKE_CODE(JS_JOB_DONE_RETRY_NEEDED), - KBASE_TRACE_CODE_MAKE_CODE(JS_AFFINITY_SUBMIT_TO_BLOCKED), - /* info_val == lower 32 bits of affinity */ - KBASE_TRACE_CODE_MAKE_CODE(JS_AFFINITY_CURRENT), - /* info_val == lower 32 bits of affinity */ - KBASE_TRACE_CODE_MAKE_CODE(JS_CORE_REF_REQUEST_CORES_FAILED), - /* info_val == lower 32 bits of affinity */ - KBASE_TRACE_CODE_MAKE_CODE(JS_CORE_REF_REGISTER_INUSE_FAILED), - /* info_val == lower 32 bits of rechecked affinity */ - KBASE_TRACE_CODE_MAKE_CODE(JS_CORE_REF_REQUEST_ON_RECHECK_FAILED), - /* info_val == lower 32 bits of rechecked affinity */ - KBASE_TRACE_CODE_MAKE_CODE(JS_CORE_REF_REGISTER_ON_RECHECK_FAILED), - /* info_val == lower 32 bits of affinity */ - KBASE_TRACE_CODE_MAKE_CODE(JS_CORE_REF_AFFINITY_WOULD_VIOLATE), - /* info_val == the ctx attribute now on ctx */ - KBASE_TRACE_CODE_MAKE_CODE(JS_CTX_ATTR_NOW_ON_CTX), - /* info_val == the ctx attribute now on runpool */ - KBASE_TRACE_CODE_MAKE_CODE(JS_CTX_ATTR_NOW_ON_RUNPOOL), - /* info_val == the ctx attribute now off ctx */ - KBASE_TRACE_CODE_MAKE_CODE(JS_CTX_ATTR_NOW_OFF_CTX), - /* info_val == the ctx attribute now off runpool */ - KBASE_TRACE_CODE_MAKE_CODE(JS_CTX_ATTR_NOW_OFF_RUNPOOL), -/* - * Scheduler Policy events - */ - KBASE_TRACE_CODE_MAKE_CODE(JS_POLICY_INIT_CTX), - KBASE_TRACE_CODE_MAKE_CODE(JS_POLICY_TERM_CTX), - /* info_val == whether it was evicted */ - KBASE_TRACE_CODE_MAKE_CODE(JS_POLICY_TRY_EVICT_CTX), - KBASE_TRACE_CODE_MAKE_CODE(JS_POLICY_FOREACH_CTX_JOBS), - KBASE_TRACE_CODE_MAKE_CODE(JS_POLICY_ENQUEUE_CTX), - KBASE_TRACE_CODE_MAKE_CODE(JS_POLICY_DEQUEUE_HEAD_CTX), - KBASE_TRACE_CODE_MAKE_CODE(JS_POLICY_RUNPOOL_ADD_CTX), - KBASE_TRACE_CODE_MAKE_CODE(JS_POLICY_RUNPOOL_REMOVE_CTX), - KBASE_TRACE_CODE_MAKE_CODE(JS_POLICY_DEQUEUE_JOB), - KBASE_TRACE_CODE_MAKE_CODE(JS_POLICY_DEQUEUE_JOB_IRQ), - /* gpu_addr==JS_HEAD to write if the job were run */ - KBASE_TRACE_CODE_MAKE_CODE(JS_POLICY_ENQUEUE_JOB), - KBASE_TRACE_CODE_MAKE_CODE(JS_POLICY_TIMER_START), - KBASE_TRACE_CODE_MAKE_CODE(JS_POLICY_TIMER_END), -/* - * Power Management Events - */ - KBASE_TRACE_CODE_MAKE_CODE(PM_JOB_SUBMIT_AFTER_POWERING_UP), - KBASE_TRACE_CODE_MAKE_CODE(PM_JOB_SUBMIT_AFTER_POWERED_UP), - KBASE_TRACE_CODE_MAKE_CODE(PM_PWRON), - KBASE_TRACE_CODE_MAKE_CODE(PM_PWRON_TILER), - KBASE_TRACE_CODE_MAKE_CODE(PM_PWRON_L2), - KBASE_TRACE_CODE_MAKE_CODE(PM_PWROFF), - KBASE_TRACE_CODE_MAKE_CODE(PM_PWROFF_TILER), - KBASE_TRACE_CODE_MAKE_CODE(PM_PWROFF_L2), - KBASE_TRACE_CODE_MAKE_CODE(PM_CORES_POWERED), - KBASE_TRACE_CODE_MAKE_CODE(PM_CORES_POWERED_TILER), - KBASE_TRACE_CODE_MAKE_CODE(PM_CORES_POWERED_L2), - KBASE_TRACE_CODE_MAKE_CODE(PM_CORES_CHANGE_DESIRED), - KBASE_TRACE_CODE_MAKE_CODE(PM_CORES_CHANGE_DESIRED_TILER), - KBASE_TRACE_CODE_MAKE_CODE(PM_CORES_CHANGE_AVAILABLE), - KBASE_TRACE_CODE_MAKE_CODE(PM_CORES_CHANGE_AVAILABLE_TILER), - KBASE_TRACE_CODE_MAKE_CODE(PM_CORES_AVAILABLE), - KBASE_TRACE_CODE_MAKE_CODE(PM_CORES_AVAILABLE_TILER), - /* PM_DESIRED_REACHED: gpu_addr == pm.gpu_in_desired_state */ - KBASE_TRACE_CODE_MAKE_CODE(PM_DESIRED_REACHED), - KBASE_TRACE_CODE_MAKE_CODE(PM_DESIRED_REACHED_TILER), - KBASE_TRACE_CODE_MAKE_CODE(PM_RELEASE_CHANGE_SHADER_NEEDED), - KBASE_TRACE_CODE_MAKE_CODE(PM_RELEASE_CHANGE_TILER_NEEDED), - KBASE_TRACE_CODE_MAKE_CODE(PM_REQUEST_CHANGE_SHADER_NEEDED), - KBASE_TRACE_CODE_MAKE_CODE(PM_REQUEST_CHANGE_TILER_NEEDED), - KBASE_TRACE_CODE_MAKE_CODE(PM_WAKE_WAITERS), - KBASE_TRACE_CODE_MAKE_CODE(PM_CONTEXT_ACTIVE), - KBASE_TRACE_CODE_MAKE_CODE(PM_CONTEXT_IDLE), - KBASE_TRACE_CODE_MAKE_CODE(PM_GPU_ON), - KBASE_TRACE_CODE_MAKE_CODE(PM_GPU_OFF), - /* info_val == policy number, or -1 for "Already changing" */ - KBASE_TRACE_CODE_MAKE_CODE(PM_SET_POLICY), - KBASE_TRACE_CODE_MAKE_CODE(PM_CA_SET_POLICY), - /* info_val == policy number */ - KBASE_TRACE_CODE_MAKE_CODE(PM_CURRENT_POLICY_INIT), - /* info_val == policy number */ - KBASE_TRACE_CODE_MAKE_CODE(PM_CURRENT_POLICY_TERM), -/* Unused code just to make it easier to not have a comma at the end. - * All other codes MUST come before this */ - KBASE_TRACE_CODE_MAKE_CODE(DUMMY) - -#if 0 /* Dummy section to avoid breaking formatting */ -}; -#endif - -/* ***** THE LACK OF HEADER GUARDS IS INTENTIONAL ***** */ diff --git a/drivers/gpu/arm/bifrost/mali_kbase_tracepoints.h b/drivers/gpu/arm/bifrost/mali_kbase_tracepoints.h deleted file mode 100644 index 146b67c4cda4..000000000000 --- a/drivers/gpu/arm/bifrost/mali_kbase_tracepoints.h +++ /dev/null @@ -1,2486 +0,0 @@ -/* - * - * (C) COPYRIGHT 2010-2019 ARM Limited. All rights reserved. - * - * This program is free software and is provided to you under the terms of the - * GNU General Public License version 2 as published by the Free Software - * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, you can access it online at - * http://www.gnu.org/licenses/gpl-2.0.html. - * - * SPDX-License-Identifier: GPL-2.0 - * - */ - -/* - * THIS FILE IS AUTOGENERATED BY mali_trace_generator.py. - * DO NOT EDIT. - */ - -#if !defined(_KBASE_TRACEPOINTS_H) -#define _KBASE_TRACEPOINTS_H - -/* Tracepoints are abstract callbacks notifying that some important - * software or hardware event has happened. - * - * In this particular implementation, it results into a MIPE - * timeline event and, in some cases, it also fires an ftrace event - * (a.k.a. Gator events, see details below). - */ - -#include "mali_kbase.h" -#include "mali_kbase_gator.h" - -#include -#include - -/* clang-format off */ - -struct kbase_tlstream; - -extern const size_t __obj_stream_offset; -extern const size_t __aux_stream_offset; - -/* This macro dispatches a kbase_tlstream from - * a kbase_device instance. Only AUX or OBJ - * streams can be dispatched. It is aware of - * kbase_timeline binary representation and - * relies on offset variables: - * __obj_stream_offset and __aux_stream_offset. - */ -#define __TL_DISPATCH_STREAM(kbdev, stype) \ - ((struct kbase_tlstream *) \ - ((u8 *)kbdev->timeline + __ ## stype ## _stream_offset)) - -struct tp_desc; - -/* Descriptors of timeline messages transmitted in object events stream. */ -extern const char *obj_desc_header; -extern const size_t obj_desc_header_size; -/* Descriptors of timeline messages transmitted in auxiliary events stream. */ -extern const char *aux_desc_header; -extern const size_t aux_desc_header_size; - -#define TL_ATOM_STATE_IDLE 0 -#define TL_ATOM_STATE_READY 1 -#define TL_ATOM_STATE_DONE 2 -#define TL_ATOM_STATE_POSTED 3 - -#define TL_JS_EVENT_START GATOR_JOB_SLOT_START -#define TL_JS_EVENT_STOP GATOR_JOB_SLOT_STOP -#define TL_JS_EVENT_SOFT_STOP GATOR_JOB_SLOT_SOFT_STOPPED - -#define TLSTREAM_ENABLED (1 << 31) - -void __kbase_tlstream_tl_new_ctx( - struct kbase_tlstream *stream, - const void *ctx, - u32 ctx_nr, - u32 tgid); -void __kbase_tlstream_tl_new_gpu( - struct kbase_tlstream *stream, - const void *gpu, - u32 gpu_id, - u32 core_count); -void __kbase_tlstream_tl_new_lpu( - struct kbase_tlstream *stream, - const void *lpu, - u32 lpu_nr, - u32 lpu_fn); -void __kbase_tlstream_tl_new_atom( - struct kbase_tlstream *stream, - const void *atom, - u32 atom_nr); -void __kbase_tlstream_tl_new_as( - struct kbase_tlstream *stream, - const void *address_space, - u32 as_nr); -void __kbase_tlstream_tl_del_ctx( - struct kbase_tlstream *stream, - const void *ctx); -void __kbase_tlstream_tl_del_atom( - struct kbase_tlstream *stream, - const void *atom); -void __kbase_tlstream_tl_lifelink_lpu_gpu( - struct kbase_tlstream *stream, - const void *lpu, - const void *gpu); -void __kbase_tlstream_tl_lifelink_as_gpu( - struct kbase_tlstream *stream, - const void *address_space, - const void *gpu); -void __kbase_tlstream_tl_ret_ctx_lpu( - struct kbase_tlstream *stream, - const void *ctx, - const void *lpu); -void __kbase_tlstream_tl_ret_atom_ctx( - struct kbase_tlstream *stream, - const void *atom, - const void *ctx); -void __kbase_tlstream_tl_ret_atom_lpu( - struct kbase_tlstream *stream, - const void *atom, - const void *lpu, - const char *attrib_match_list); -void __kbase_tlstream_tl_nret_ctx_lpu( - struct kbase_tlstream *stream, - const void *ctx, - const void *lpu); -void __kbase_tlstream_tl_nret_atom_ctx( - struct kbase_tlstream *stream, - const void *atom, - const void *ctx); -void __kbase_tlstream_tl_nret_atom_lpu( - struct kbase_tlstream *stream, - const void *atom, - const void *lpu); -void __kbase_tlstream_tl_ret_as_ctx( - struct kbase_tlstream *stream, - const void *address_space, - const void *ctx); -void __kbase_tlstream_tl_nret_as_ctx( - struct kbase_tlstream *stream, - const void *address_space, - const void *ctx); -void __kbase_tlstream_tl_ret_atom_as( - struct kbase_tlstream *stream, - const void *atom, - const void *address_space); -void __kbase_tlstream_tl_nret_atom_as( - struct kbase_tlstream *stream, - const void *atom, - const void *address_space); -void __kbase_tlstream_tl_attrib_atom_config( - struct kbase_tlstream *stream, - const void *atom, - u64 descriptor, - u64 affinity, - u32 config); -void __kbase_tlstream_tl_attrib_atom_priority( - struct kbase_tlstream *stream, - const void *atom, - u32 prio); -void __kbase_tlstream_tl_attrib_atom_state( - struct kbase_tlstream *stream, - const void *atom, - u32 state); -void __kbase_tlstream_tl_attrib_atom_prioritized( - struct kbase_tlstream *stream, - const void *atom); -void __kbase_tlstream_tl_attrib_atom_jit( - struct kbase_tlstream *stream, - const void *atom, - u64 edit_addr, - u64 new_addr, - u32 jit_flags, - u64 mem_flags, - u32 j_id, - u64 com_pgs, - u64 extent, - u64 va_pgs); -void __kbase_tlstream_tl_jit_usedpages( - struct kbase_tlstream *stream, - u64 used_pages, - u32 j_id); -void __kbase_tlstream_tl_attrib_atom_jitallocinfo( - struct kbase_tlstream *stream, - const void *atom, - u64 va_pgs, - u64 com_pgs, - u64 extent, - u32 j_id, - u32 bin_id, - u32 max_allocs, - u32 jit_flags, - u32 usg_id); -void __kbase_tlstream_tl_attrib_atom_jitfreeinfo( - struct kbase_tlstream *stream, - const void *atom, - u32 j_id); -void __kbase_tlstream_tl_attrib_as_config( - struct kbase_tlstream *stream, - const void *address_space, - u64 transtab, - u64 memattr, - u64 transcfg); -void __kbase_tlstream_tl_event_lpu_softstop( - struct kbase_tlstream *stream, - const void *lpu); -void __kbase_tlstream_tl_event_atom_softstop_ex( - struct kbase_tlstream *stream, - const void *atom); -void __kbase_tlstream_tl_event_atom_softstop_issue( - struct kbase_tlstream *stream, - const void *atom); -void __kbase_tlstream_tl_event_atom_softjob_start( - struct kbase_tlstream *stream, - const void *atom); -void __kbase_tlstream_tl_event_atom_softjob_end( - struct kbase_tlstream *stream, - const void *atom); -void __kbase_tlstream_jd_gpu_soft_reset( - struct kbase_tlstream *stream, - const void *gpu); -void __kbase_tlstream_aux_pm_state( - struct kbase_tlstream *stream, - u32 core_type, - u64 core_state_bitset); -void __kbase_tlstream_aux_pagefault( - struct kbase_tlstream *stream, - u32 ctx_nr, - u32 as_nr, - u64 page_cnt_change); -void __kbase_tlstream_aux_pagesalloc( - struct kbase_tlstream *stream, - u32 ctx_nr, - u64 page_cnt); -void __kbase_tlstream_aux_devfreq_target( - struct kbase_tlstream *stream, - u64 target_freq); -void __kbase_tlstream_aux_protected_enter_start( - struct kbase_tlstream *stream, - const void *gpu); -void __kbase_tlstream_aux_protected_enter_end( - struct kbase_tlstream *stream, - const void *gpu); -void __kbase_tlstream_aux_protected_leave_start( - struct kbase_tlstream *stream, - const void *gpu); -void __kbase_tlstream_aux_protected_leave_end( - struct kbase_tlstream *stream, - const void *gpu); -void __kbase_tlstream_aux_jit_stats( - struct kbase_tlstream *stream, - u32 ctx_nr, - u32 bid, - u32 max_allocs, - u32 allocs, - u32 va_pages, - u32 ph_pages); -void __kbase_tlstream_aux_event_job_slot( - struct kbase_tlstream *stream, - const void *ctx, - u32 slot_nr, - u32 atom_nr, - u32 event); -void __kbase_tlstream_tl_new_kcpuqueue( - struct kbase_tlstream *stream, - const void *kcpu_queue, - const void *ctx, - u32 kcpuq_num_pending_cmds); -void __kbase_tlstream_tl_ret_kcpuqueue_ctx( - struct kbase_tlstream *stream, - const void *kcpu_queue, - const void *ctx); -void __kbase_tlstream_tl_del_kcpuqueue( - struct kbase_tlstream *stream, - const void *kcpu_queue); -void __kbase_tlstream_tl_nret_kcpuqueue_ctx( - struct kbase_tlstream *stream, - const void *kcpu_queue, - const void *ctx); -void __kbase_tlstream_tl_event_kcpuqueue_enqueue_fence_signal( - struct kbase_tlstream *stream, - const void *kcpu_queue, - const void *fence); -void __kbase_tlstream_tl_event_kcpuqueue_enqueue_fence_wait( - struct kbase_tlstream *stream, - const void *kcpu_queue, - const void *fence); -void __kbase_tlstream_tl_event_array_begin_kcpuqueue_enqueue_cqs_wait( - struct kbase_tlstream *stream, - const void *kcpu_queue); -void __kbase_tlstream_tl_event_array_item_kcpuqueue_enqueue_cqs_wait( - struct kbase_tlstream *stream, - const void *kcpu_queue, - u64 cqs_obj_gpu_addr, - u32 cqs_obj_compare_value); -void __kbase_tlstream_tl_event_array_end_kcpuqueue_enqueue_cqs_wait( - struct kbase_tlstream *stream, - const void *kcpu_queue); -void __kbase_tlstream_tl_event_array_begin_kcpuqueue_enqueue_cqs_set( - struct kbase_tlstream *stream, - const void *kcpu_queue); -void __kbase_tlstream_tl_event_array_item_kcpuqueue_enqueue_cqs_set( - struct kbase_tlstream *stream, - const void *kcpu_queue, - u64 cqs_obj_gpu_addr); -void __kbase_tlstream_tl_event_array_end_kcpuqueue_enqueue_cqs_set( - struct kbase_tlstream *stream, - const void *kcpu_queue); -void __kbase_tlstream_tl_event_array_begin_kcpuqueue_enqueue_debugcopy( - struct kbase_tlstream *stream, - const void *kcpu_queue); -void __kbase_tlstream_tl_event_array_item_kcpuqueue_enqueue_debugcopy( - struct kbase_tlstream *stream, - const void *kcpu_queue, - u64 debugcopy_dst_size); -void __kbase_tlstream_tl_event_array_end_kcpuqueue_enqueue_debugcopy( - struct kbase_tlstream *stream, - const void *kcpu_queue); -void __kbase_tlstream_tl_event_kcpuqueue_enqueue_map_import( - struct kbase_tlstream *stream, - const void *kcpu_queue, - u64 map_import_buf_gpu_addr); -void __kbase_tlstream_tl_event_kcpuqueue_enqueue_unmap_import( - struct kbase_tlstream *stream, - const void *kcpu_queue, - u64 map_import_buf_gpu_addr); -void __kbase_tlstream_tl_event_kcpuqueue_enqueue_unmap_import_force( - struct kbase_tlstream *stream, - const void *kcpu_queue, - u64 map_import_buf_gpu_addr); -void __kbase_tlstream_tl_event_array_begin_kcpuqueue_enqueue_jit_alloc( - struct kbase_tlstream *stream, - const void *kcpu_queue); -void __kbase_tlstream_tl_event_array_item_kcpuqueue_enqueue_jit_alloc( - struct kbase_tlstream *stream, - const void *kcpu_queue, - u64 jit_alloc_gpu_alloc_addr_dest, - u64 jit_alloc_va_pages, - u64 jit_alloc_commit_pages, - u64 jit_alloc_extent, - u32 jit_alloc_jit_id, - u32 jit_alloc_bin_id, - u32 jit_alloc_max_allocations, - u32 jit_alloc_flags, - u32 jit_alloc_usage_id); -void __kbase_tlstream_tl_event_array_end_kcpuqueue_enqueue_jit_alloc( - struct kbase_tlstream *stream, - const void *kcpu_queue); -void __kbase_tlstream_tl_event_array_begin_kcpuqueue_enqueue_jit_free( - struct kbase_tlstream *stream, - const void *kcpu_queue); -void __kbase_tlstream_tl_event_array_item_kcpuqueue_enqueue_jit_free( - struct kbase_tlstream *stream, - const void *kcpu_queue, - u32 jit_alloc_jit_id); -void __kbase_tlstream_tl_event_array_end_kcpuqueue_enqueue_jit_free( - struct kbase_tlstream *stream, - const void *kcpu_queue); -void __kbase_tlstream_tl_event_kcpuqueue_execute_fence_signal_start( - struct kbase_tlstream *stream, - const void *kcpu_queue); -void __kbase_tlstream_tl_event_kcpuqueue_execute_fence_signal_end( - struct kbase_tlstream *stream, - const void *kcpu_queue); -void __kbase_tlstream_tl_event_kcpuqueue_execute_fence_wait_start( - struct kbase_tlstream *stream, - const void *kcpu_queue); -void __kbase_tlstream_tl_event_kcpuqueue_execute_fence_wait_end( - struct kbase_tlstream *stream, - const void *kcpu_queue); -void __kbase_tlstream_tl_event_kcpuqueue_execute_cqs_wait_start( - struct kbase_tlstream *stream, - const void *kcpu_queue); -void __kbase_tlstream_tl_event_kcpuqueue_execute_cqs_wait_end( - struct kbase_tlstream *stream, - const void *kcpu_queue); -void __kbase_tlstream_tl_event_kcpuqueue_execute_cqs_set_start( - struct kbase_tlstream *stream, - const void *kcpu_queue); -void __kbase_tlstream_tl_event_kcpuqueue_execute_cqs_set_end( - struct kbase_tlstream *stream, - const void *kcpu_queue); -void __kbase_tlstream_tl_event_kcpuqueue_execute_debugcopy_start( - struct kbase_tlstream *stream, - const void *kcpu_queue); -void __kbase_tlstream_tl_event_kcpuqueue_execute_debugcopy_end( - struct kbase_tlstream *stream, - const void *kcpu_queue); -void __kbase_tlstream_tl_event_kcpuqueue_execute_map_import_start( - struct kbase_tlstream *stream, - const void *kcpu_queue); -void __kbase_tlstream_tl_event_kcpuqueue_execute_map_import_end( - struct kbase_tlstream *stream, - const void *kcpu_queue); -void __kbase_tlstream_tl_event_kcpuqueue_execute_unmap_import_start( - struct kbase_tlstream *stream, - const void *kcpu_queue); -void __kbase_tlstream_tl_event_kcpuqueue_execute_unmap_import_end( - struct kbase_tlstream *stream, - const void *kcpu_queue); -void __kbase_tlstream_tl_event_kcpuqueue_execute_unmap_import_force_start( - struct kbase_tlstream *stream, - const void *kcpu_queue); -void __kbase_tlstream_tl_event_kcpuqueue_execute_unmap_import_force_end( - struct kbase_tlstream *stream, - const void *kcpu_queue); -void __kbase_tlstream_tl_event_kcpuqueue_execute_jit_alloc_start( - struct kbase_tlstream *stream, - const void *kcpu_queue); -void __kbase_tlstream_tl_event_array_begin_kcpuqueue_execute_jit_alloc_end( - struct kbase_tlstream *stream, - const void *kcpu_queue); -void __kbase_tlstream_tl_event_array_item_kcpuqueue_execute_jit_alloc_end( - struct kbase_tlstream *stream, - const void *kcpu_queue, - u64 jit_alloc_gpu_alloc_addr, - u64 jit_alloc_mmu_flags); -void __kbase_tlstream_tl_event_array_end_kcpuqueue_execute_jit_alloc_end( - struct kbase_tlstream *stream, - const void *kcpu_queue); -void __kbase_tlstream_tl_event_kcpuqueue_execute_jit_free_start( - struct kbase_tlstream *stream, - const void *kcpu_queue); -void __kbase_tlstream_tl_event_array_begin_kcpuqueue_execute_jit_free_end( - struct kbase_tlstream *stream, - const void *kcpu_queue); -void __kbase_tlstream_tl_event_array_item_kcpuqueue_execute_jit_free_end( - struct kbase_tlstream *stream, - const void *kcpu_queue, - u64 jit_free_pages_used); -void __kbase_tlstream_tl_event_array_end_kcpuqueue_execute_jit_free_end( - struct kbase_tlstream *stream, - const void *kcpu_queue); -void __kbase_tlstream_tl_event_kcpuqueue_execute_errorbarrier( - struct kbase_tlstream *stream, - const void *kcpu_queue); - -struct kbase_tlstream; - -/** - * KBASE_TLSTREAM_TL_NEW_CTX - - * object ctx is created - * - * @kbdev: Kbase device - * @ctx: Name of the context object - * @ctx_nr: Kernel context number - * @tgid: Thread Group Id - */ -#define KBASE_TLSTREAM_TL_NEW_CTX( \ - kbdev, \ - ctx, \ - ctx_nr, \ - tgid \ - ) \ - do { \ - int enabled = atomic_read(&kbdev->timeline_is_enabled); \ - if (enabled & TLSTREAM_ENABLED) \ - __kbase_tlstream_tl_new_ctx( \ - __TL_DISPATCH_STREAM(kbdev, obj), \ - ctx, ctx_nr, tgid); \ - } while (0) - -/** - * KBASE_TLSTREAM_TL_NEW_GPU - - * object gpu is created - * - * @kbdev: Kbase device - * @gpu: Name of the GPU object - * @gpu_id: Name of the GPU object - * @core_count: Number of cores this GPU hosts - */ -#define KBASE_TLSTREAM_TL_NEW_GPU( \ - kbdev, \ - gpu, \ - gpu_id, \ - core_count \ - ) \ - do { \ - int enabled = atomic_read(&kbdev->timeline_is_enabled); \ - if (enabled & TLSTREAM_ENABLED) \ - __kbase_tlstream_tl_new_gpu( \ - __TL_DISPATCH_STREAM(kbdev, obj), \ - gpu, gpu_id, core_count); \ - } while (0) - -/** - * KBASE_TLSTREAM_TL_NEW_LPU - - * object lpu is created - * - * @kbdev: Kbase device - * @lpu: Name of the Logical Processing Unit object - * @lpu_nr: Sequential number assigned to the newly created LPU - * @lpu_fn: Property describing functional abilities of this LPU - */ -#define KBASE_TLSTREAM_TL_NEW_LPU( \ - kbdev, \ - lpu, \ - lpu_nr, \ - lpu_fn \ - ) \ - do { \ - int enabled = atomic_read(&kbdev->timeline_is_enabled); \ - if (enabled & TLSTREAM_ENABLED) \ - __kbase_tlstream_tl_new_lpu( \ - __TL_DISPATCH_STREAM(kbdev, obj), \ - lpu, lpu_nr, lpu_fn); \ - } while (0) - -/** - * KBASE_TLSTREAM_TL_NEW_ATOM - - * object atom is created - * - * @kbdev: Kbase device - * @atom: Atom identifier - * @atom_nr: Sequential number of an atom - */ -#define KBASE_TLSTREAM_TL_NEW_ATOM( \ - kbdev, \ - atom, \ - atom_nr \ - ) \ - do { \ - int enabled = atomic_read(&kbdev->timeline_is_enabled); \ - if (enabled & TLSTREAM_ENABLED) \ - __kbase_tlstream_tl_new_atom( \ - __TL_DISPATCH_STREAM(kbdev, obj), \ - atom, atom_nr); \ - } while (0) - -/** - * KBASE_TLSTREAM_TL_NEW_AS - - * address space object is created - * - * @kbdev: Kbase device - * @address_space: Name of the address space object - * @as_nr: Address space number - */ -#define KBASE_TLSTREAM_TL_NEW_AS( \ - kbdev, \ - address_space, \ - as_nr \ - ) \ - do { \ - int enabled = atomic_read(&kbdev->timeline_is_enabled); \ - if (enabled & TLSTREAM_ENABLED) \ - __kbase_tlstream_tl_new_as( \ - __TL_DISPATCH_STREAM(kbdev, obj), \ - address_space, as_nr); \ - } while (0) - -/** - * KBASE_TLSTREAM_TL_DEL_CTX - - * context is destroyed - * - * @kbdev: Kbase device - * @ctx: Name of the context object - */ -#define KBASE_TLSTREAM_TL_DEL_CTX( \ - kbdev, \ - ctx \ - ) \ - do { \ - int enabled = atomic_read(&kbdev->timeline_is_enabled); \ - if (enabled & TLSTREAM_ENABLED) \ - __kbase_tlstream_tl_del_ctx( \ - __TL_DISPATCH_STREAM(kbdev, obj), \ - ctx); \ - } while (0) - -/** - * KBASE_TLSTREAM_TL_DEL_ATOM - - * atom is destroyed - * - * @kbdev: Kbase device - * @atom: Atom identifier - */ -#define KBASE_TLSTREAM_TL_DEL_ATOM( \ - kbdev, \ - atom \ - ) \ - do { \ - int enabled = atomic_read(&kbdev->timeline_is_enabled); \ - if (enabled & TLSTREAM_ENABLED) \ - __kbase_tlstream_tl_del_atom( \ - __TL_DISPATCH_STREAM(kbdev, obj), \ - atom); \ - } while (0) - -/** - * KBASE_TLSTREAM_TL_LIFELINK_LPU_GPU - - * lpu is deleted with gpu - * - * @kbdev: Kbase device - * @lpu: Name of the Logical Processing Unit object - * @gpu: Name of the GPU object - */ -#define KBASE_TLSTREAM_TL_LIFELINK_LPU_GPU( \ - kbdev, \ - lpu, \ - gpu \ - ) \ - do { \ - int enabled = atomic_read(&kbdev->timeline_is_enabled); \ - if (enabled & TLSTREAM_ENABLED) \ - __kbase_tlstream_tl_lifelink_lpu_gpu( \ - __TL_DISPATCH_STREAM(kbdev, obj), \ - lpu, gpu); \ - } while (0) - -/** - * KBASE_TLSTREAM_TL_LIFELINK_AS_GPU - - * address space is deleted with gpu - * - * @kbdev: Kbase device - * @address_space: Name of the address space object - * @gpu: Name of the GPU object - */ -#define KBASE_TLSTREAM_TL_LIFELINK_AS_GPU( \ - kbdev, \ - address_space, \ - gpu \ - ) \ - do { \ - int enabled = atomic_read(&kbdev->timeline_is_enabled); \ - if (enabled & TLSTREAM_ENABLED) \ - __kbase_tlstream_tl_lifelink_as_gpu( \ - __TL_DISPATCH_STREAM(kbdev, obj), \ - address_space, gpu); \ - } while (0) - -/** - * KBASE_TLSTREAM_TL_RET_CTX_LPU - - * context is retained by lpu - * - * @kbdev: Kbase device - * @ctx: Name of the context object - * @lpu: Name of the Logical Processing Unit object - */ -#define KBASE_TLSTREAM_TL_RET_CTX_LPU( \ - kbdev, \ - ctx, \ - lpu \ - ) \ - do { \ - int enabled = atomic_read(&kbdev->timeline_is_enabled); \ - if (enabled & TLSTREAM_ENABLED) \ - __kbase_tlstream_tl_ret_ctx_lpu( \ - __TL_DISPATCH_STREAM(kbdev, obj), \ - ctx, lpu); \ - } while (0) - -/** - * KBASE_TLSTREAM_TL_RET_ATOM_CTX - - * atom is retained by context - * - * @kbdev: Kbase device - * @atom: Atom identifier - * @ctx: Name of the context object - */ -#define KBASE_TLSTREAM_TL_RET_ATOM_CTX( \ - kbdev, \ - atom, \ - ctx \ - ) \ - do { \ - int enabled = atomic_read(&kbdev->timeline_is_enabled); \ - if (enabled & TLSTREAM_ENABLED) \ - __kbase_tlstream_tl_ret_atom_ctx( \ - __TL_DISPATCH_STREAM(kbdev, obj), \ - atom, ctx); \ - } while (0) - -/** - * KBASE_TLSTREAM_TL_RET_ATOM_LPU - - * atom is retained by lpu - * - * @kbdev: Kbase device - * @atom: Atom identifier - * @lpu: Name of the Logical Processing Unit object - * @attrib_match_list: List containing match operator attributes - */ -#define KBASE_TLSTREAM_TL_RET_ATOM_LPU( \ - kbdev, \ - atom, \ - lpu, \ - attrib_match_list \ - ) \ - do { \ - int enabled = atomic_read(&kbdev->timeline_is_enabled); \ - if (enabled & TLSTREAM_ENABLED) \ - __kbase_tlstream_tl_ret_atom_lpu( \ - __TL_DISPATCH_STREAM(kbdev, obj), \ - atom, lpu, attrib_match_list); \ - } while (0) - -/** - * KBASE_TLSTREAM_TL_NRET_CTX_LPU - - * context is released by lpu - * - * @kbdev: Kbase device - * @ctx: Name of the context object - * @lpu: Name of the Logical Processing Unit object - */ -#define KBASE_TLSTREAM_TL_NRET_CTX_LPU( \ - kbdev, \ - ctx, \ - lpu \ - ) \ - do { \ - int enabled = atomic_read(&kbdev->timeline_is_enabled); \ - if (enabled & TLSTREAM_ENABLED) \ - __kbase_tlstream_tl_nret_ctx_lpu( \ - __TL_DISPATCH_STREAM(kbdev, obj), \ - ctx, lpu); \ - } while (0) - -/** - * KBASE_TLSTREAM_TL_NRET_ATOM_CTX - - * atom is released by context - * - * @kbdev: Kbase device - * @atom: Atom identifier - * @ctx: Name of the context object - */ -#define KBASE_TLSTREAM_TL_NRET_ATOM_CTX( \ - kbdev, \ - atom, \ - ctx \ - ) \ - do { \ - int enabled = atomic_read(&kbdev->timeline_is_enabled); \ - if (enabled & TLSTREAM_ENABLED) \ - __kbase_tlstream_tl_nret_atom_ctx( \ - __TL_DISPATCH_STREAM(kbdev, obj), \ - atom, ctx); \ - } while (0) - -/** - * KBASE_TLSTREAM_TL_NRET_ATOM_LPU - - * atom is released by lpu - * - * @kbdev: Kbase device - * @atom: Atom identifier - * @lpu: Name of the Logical Processing Unit object - */ -#define KBASE_TLSTREAM_TL_NRET_ATOM_LPU( \ - kbdev, \ - atom, \ - lpu \ - ) \ - do { \ - int enabled = atomic_read(&kbdev->timeline_is_enabled); \ - if (enabled & TLSTREAM_ENABLED) \ - __kbase_tlstream_tl_nret_atom_lpu( \ - __TL_DISPATCH_STREAM(kbdev, obj), \ - atom, lpu); \ - } while (0) - -/** - * KBASE_TLSTREAM_TL_RET_AS_CTX - - * address space is retained by context - * - * @kbdev: Kbase device - * @address_space: Name of the address space object - * @ctx: Name of the context object - */ -#define KBASE_TLSTREAM_TL_RET_AS_CTX( \ - kbdev, \ - address_space, \ - ctx \ - ) \ - do { \ - int enabled = atomic_read(&kbdev->timeline_is_enabled); \ - if (enabled & TLSTREAM_ENABLED) \ - __kbase_tlstream_tl_ret_as_ctx( \ - __TL_DISPATCH_STREAM(kbdev, obj), \ - address_space, ctx); \ - } while (0) - -/** - * KBASE_TLSTREAM_TL_NRET_AS_CTX - - * address space is released by context - * - * @kbdev: Kbase device - * @address_space: Name of the address space object - * @ctx: Name of the context object - */ -#define KBASE_TLSTREAM_TL_NRET_AS_CTX( \ - kbdev, \ - address_space, \ - ctx \ - ) \ - do { \ - int enabled = atomic_read(&kbdev->timeline_is_enabled); \ - if (enabled & TLSTREAM_ENABLED) \ - __kbase_tlstream_tl_nret_as_ctx( \ - __TL_DISPATCH_STREAM(kbdev, obj), \ - address_space, ctx); \ - } while (0) - -/** - * KBASE_TLSTREAM_TL_RET_ATOM_AS - - * atom is retained by address space - * - * @kbdev: Kbase device - * @atom: Atom identifier - * @address_space: Name of the address space object - */ -#define KBASE_TLSTREAM_TL_RET_ATOM_AS( \ - kbdev, \ - atom, \ - address_space \ - ) \ - do { \ - int enabled = atomic_read(&kbdev->timeline_is_enabled); \ - if (enabled & TLSTREAM_ENABLED) \ - __kbase_tlstream_tl_ret_atom_as( \ - __TL_DISPATCH_STREAM(kbdev, obj), \ - atom, address_space); \ - } while (0) - -/** - * KBASE_TLSTREAM_TL_NRET_ATOM_AS - - * atom is released by address space - * - * @kbdev: Kbase device - * @atom: Atom identifier - * @address_space: Name of the address space object - */ -#define KBASE_TLSTREAM_TL_NRET_ATOM_AS( \ - kbdev, \ - atom, \ - address_space \ - ) \ - do { \ - int enabled = atomic_read(&kbdev->timeline_is_enabled); \ - if (enabled & TLSTREAM_ENABLED) \ - __kbase_tlstream_tl_nret_atom_as( \ - __TL_DISPATCH_STREAM(kbdev, obj), \ - atom, address_space); \ - } while (0) - -/** - * KBASE_TLSTREAM_TL_ATTRIB_ATOM_CONFIG - - * atom job slot attributes - * - * @kbdev: Kbase device - * @atom: Atom identifier - * @descriptor: Job descriptor address - * @affinity: Job affinity - * @config: Job config - */ -#define KBASE_TLSTREAM_TL_ATTRIB_ATOM_CONFIG( \ - kbdev, \ - atom, \ - descriptor, \ - affinity, \ - config \ - ) \ - do { \ - int enabled = atomic_read(&kbdev->timeline_is_enabled); \ - if (enabled & TLSTREAM_ENABLED) \ - __kbase_tlstream_tl_attrib_atom_config( \ - __TL_DISPATCH_STREAM(kbdev, obj), \ - atom, descriptor, affinity, config); \ - } while (0) - -/** - * KBASE_TLSTREAM_TL_ATTRIB_ATOM_PRIORITY - - * atom priority - * - * @kbdev: Kbase device - * @atom: Atom identifier - * @prio: Atom priority - */ -#define KBASE_TLSTREAM_TL_ATTRIB_ATOM_PRIORITY( \ - kbdev, \ - atom, \ - prio \ - ) \ - do { \ - int enabled = atomic_read(&kbdev->timeline_is_enabled); \ - if (enabled & BASE_TLSTREAM_ENABLE_LATENCY_TRACEPOINTS) \ - __kbase_tlstream_tl_attrib_atom_priority( \ - __TL_DISPATCH_STREAM(kbdev, obj), \ - atom, prio); \ - } while (0) - -/** - * KBASE_TLSTREAM_TL_ATTRIB_ATOM_STATE - - * atom state - * - * @kbdev: Kbase device - * @atom: Atom identifier - * @state: Atom state - */ -#define KBASE_TLSTREAM_TL_ATTRIB_ATOM_STATE( \ - kbdev, \ - atom, \ - state \ - ) \ - do { \ - int enabled = atomic_read(&kbdev->timeline_is_enabled); \ - if (enabled & BASE_TLSTREAM_ENABLE_LATENCY_TRACEPOINTS) \ - __kbase_tlstream_tl_attrib_atom_state( \ - __TL_DISPATCH_STREAM(kbdev, obj), \ - atom, state); \ - } while (0) - -/** - * KBASE_TLSTREAM_TL_ATTRIB_ATOM_PRIORITIZED - - * atom caused priority change - * - * @kbdev: Kbase device - * @atom: Atom identifier - */ -#define KBASE_TLSTREAM_TL_ATTRIB_ATOM_PRIORITIZED( \ - kbdev, \ - atom \ - ) \ - do { \ - int enabled = atomic_read(&kbdev->timeline_is_enabled); \ - if (enabled & BASE_TLSTREAM_ENABLE_LATENCY_TRACEPOINTS) \ - __kbase_tlstream_tl_attrib_atom_prioritized( \ - __TL_DISPATCH_STREAM(kbdev, obj), \ - atom); \ - } while (0) - -/** - * KBASE_TLSTREAM_TL_ATTRIB_ATOM_JIT - - * jit done for atom - * - * @kbdev: Kbase device - * @atom: Atom identifier - * @edit_addr: Address edited by jit - * @new_addr: Address placed into the edited location - * @jit_flags: Flags specifying the special requirements for - * the JIT allocation. - * @mem_flags: Flags defining the properties of a memory region - * @j_id: Unique ID provided by the caller, this is used - * to pair allocation and free requests. - * @com_pgs: The minimum number of physical pages which - * should back the allocation. - * @extent: Granularity of physical pages to grow the - * allocation by during a fault. - * @va_pgs: The minimum number of virtual pages required - */ -#define KBASE_TLSTREAM_TL_ATTRIB_ATOM_JIT( \ - kbdev, \ - atom, \ - edit_addr, \ - new_addr, \ - jit_flags, \ - mem_flags, \ - j_id, \ - com_pgs, \ - extent, \ - va_pgs \ - ) \ - do { \ - int enabled = atomic_read(&kbdev->timeline_is_enabled); \ - if (enabled & BASE_TLSTREAM_JOB_DUMPING_ENABLED) \ - __kbase_tlstream_tl_attrib_atom_jit( \ - __TL_DISPATCH_STREAM(kbdev, obj), \ - atom, edit_addr, new_addr, jit_flags, mem_flags, j_id, com_pgs, extent, va_pgs); \ - } while (0) - -/** - * KBASE_TLSTREAM_TL_JIT_USEDPAGES - - * used pages for jit - * - * @kbdev: Kbase device - * @used_pages: Number of pages used for jit - * @j_id: Unique ID provided by the caller, this is used - * to pair allocation and free requests. - */ -#define KBASE_TLSTREAM_TL_JIT_USEDPAGES( \ - kbdev, \ - used_pages, \ - j_id \ - ) \ - do { \ - int enabled = atomic_read(&kbdev->timeline_is_enabled); \ - if (enabled & TLSTREAM_ENABLED) \ - __kbase_tlstream_tl_jit_usedpages( \ - __TL_DISPATCH_STREAM(kbdev, obj), \ - used_pages, j_id); \ - } while (0) - -/** - * KBASE_TLSTREAM_TL_ATTRIB_ATOM_JITALLOCINFO - - * Information about JIT allocations - * - * @kbdev: Kbase device - * @atom: Atom identifier - * @va_pgs: The minimum number of virtual pages required - * @com_pgs: The minimum number of physical pages which - * should back the allocation. - * @extent: Granularity of physical pages to grow the - * allocation by during a fault. - * @j_id: Unique ID provided by the caller, this is used - * to pair allocation and free requests. - * @bin_id: The JIT allocation bin, used in conjunction with - * max_allocations to limit the number of each - * type of JIT allocation. - * @max_allocs: Maximum allocations allowed in this bin. - * @jit_flags: Flags specifying the special requirements for - * the JIT allocation. - * @usg_id: A hint about which allocation should be reused. - */ -#define KBASE_TLSTREAM_TL_ATTRIB_ATOM_JITALLOCINFO( \ - kbdev, \ - atom, \ - va_pgs, \ - com_pgs, \ - extent, \ - j_id, \ - bin_id, \ - max_allocs, \ - jit_flags, \ - usg_id \ - ) \ - do { \ - int enabled = atomic_read(&kbdev->timeline_is_enabled); \ - if (enabled & TLSTREAM_ENABLED) \ - __kbase_tlstream_tl_attrib_atom_jitallocinfo( \ - __TL_DISPATCH_STREAM(kbdev, obj), \ - atom, va_pgs, com_pgs, extent, j_id, bin_id, max_allocs, jit_flags, usg_id); \ - } while (0) - -/** - * KBASE_TLSTREAM_TL_ATTRIB_ATOM_JITFREEINFO - - * Information about JIT frees - * - * @kbdev: Kbase device - * @atom: Atom identifier - * @j_id: Unique ID provided by the caller, this is used - * to pair allocation and free requests. - */ -#define KBASE_TLSTREAM_TL_ATTRIB_ATOM_JITFREEINFO( \ - kbdev, \ - atom, \ - j_id \ - ) \ - do { \ - int enabled = atomic_read(&kbdev->timeline_is_enabled); \ - if (enabled & TLSTREAM_ENABLED) \ - __kbase_tlstream_tl_attrib_atom_jitfreeinfo( \ - __TL_DISPATCH_STREAM(kbdev, obj), \ - atom, j_id); \ - } while (0) - -/** - * KBASE_TLSTREAM_TL_ATTRIB_AS_CONFIG - - * address space attributes - * - * @kbdev: Kbase device - * @address_space: Name of the address space object - * @transtab: Configuration of the TRANSTAB register - * @memattr: Configuration of the MEMATTR register - * @transcfg: Configuration of the TRANSCFG register (or zero if not present) - */ -#define KBASE_TLSTREAM_TL_ATTRIB_AS_CONFIG( \ - kbdev, \ - address_space, \ - transtab, \ - memattr, \ - transcfg \ - ) \ - do { \ - int enabled = atomic_read(&kbdev->timeline_is_enabled); \ - if (enabled & TLSTREAM_ENABLED) \ - __kbase_tlstream_tl_attrib_as_config( \ - __TL_DISPATCH_STREAM(kbdev, obj), \ - address_space, transtab, memattr, transcfg); \ - } while (0) - -/** - * KBASE_TLSTREAM_TL_EVENT_LPU_SOFTSTOP - - * softstop event on given lpu - * - * @kbdev: Kbase device - * @lpu: Name of the Logical Processing Unit object - */ -#define KBASE_TLSTREAM_TL_EVENT_LPU_SOFTSTOP( \ - kbdev, \ - lpu \ - ) \ - do { \ - int enabled = atomic_read(&kbdev->timeline_is_enabled); \ - if (enabled & TLSTREAM_ENABLED) \ - __kbase_tlstream_tl_event_lpu_softstop( \ - __TL_DISPATCH_STREAM(kbdev, obj), \ - lpu); \ - } while (0) - -/** - * KBASE_TLSTREAM_TL_EVENT_ATOM_SOFTSTOP_EX - - * atom softstopped - * - * @kbdev: Kbase device - * @atom: Atom identifier - */ -#define KBASE_TLSTREAM_TL_EVENT_ATOM_SOFTSTOP_EX( \ - kbdev, \ - atom \ - ) \ - do { \ - int enabled = atomic_read(&kbdev->timeline_is_enabled); \ - if (enabled & TLSTREAM_ENABLED) \ - __kbase_tlstream_tl_event_atom_softstop_ex( \ - __TL_DISPATCH_STREAM(kbdev, obj), \ - atom); \ - } while (0) - -/** - * KBASE_TLSTREAM_TL_EVENT_ATOM_SOFTSTOP_ISSUE - - * atom softstop issued - * - * @kbdev: Kbase device - * @atom: Atom identifier - */ -#define KBASE_TLSTREAM_TL_EVENT_ATOM_SOFTSTOP_ISSUE( \ - kbdev, \ - atom \ - ) \ - do { \ - int enabled = atomic_read(&kbdev->timeline_is_enabled); \ - if (enabled & TLSTREAM_ENABLED) \ - __kbase_tlstream_tl_event_atom_softstop_issue( \ - __TL_DISPATCH_STREAM(kbdev, obj), \ - atom); \ - } while (0) - -/** - * KBASE_TLSTREAM_TL_EVENT_ATOM_SOFTJOB_START - - * atom soft job has started - * - * @kbdev: Kbase device - * @atom: Atom identifier - */ -#define KBASE_TLSTREAM_TL_EVENT_ATOM_SOFTJOB_START( \ - kbdev, \ - atom \ - ) \ - do { \ - int enabled = atomic_read(&kbdev->timeline_is_enabled); \ - if (enabled & TLSTREAM_ENABLED) \ - __kbase_tlstream_tl_event_atom_softjob_start( \ - __TL_DISPATCH_STREAM(kbdev, obj), \ - atom); \ - } while (0) - -/** - * KBASE_TLSTREAM_TL_EVENT_ATOM_SOFTJOB_END - - * atom soft job has completed - * - * @kbdev: Kbase device - * @atom: Atom identifier - */ -#define KBASE_TLSTREAM_TL_EVENT_ATOM_SOFTJOB_END( \ - kbdev, \ - atom \ - ) \ - do { \ - int enabled = atomic_read(&kbdev->timeline_is_enabled); \ - if (enabled & TLSTREAM_ENABLED) \ - __kbase_tlstream_tl_event_atom_softjob_end( \ - __TL_DISPATCH_STREAM(kbdev, obj), \ - atom); \ - } while (0) - -/** - * KBASE_TLSTREAM_JD_GPU_SOFT_RESET - - * gpu soft reset - * - * @kbdev: Kbase device - * @gpu: Name of the GPU object - */ -#define KBASE_TLSTREAM_JD_GPU_SOFT_RESET( \ - kbdev, \ - gpu \ - ) \ - do { \ - int enabled = atomic_read(&kbdev->timeline_is_enabled); \ - if (enabled & TLSTREAM_ENABLED) \ - __kbase_tlstream_jd_gpu_soft_reset( \ - __TL_DISPATCH_STREAM(kbdev, obj), \ - gpu); \ - } while (0) - -/** - * KBASE_TLSTREAM_AUX_PM_STATE - - * PM state - * - * @kbdev: Kbase device - * @core_type: Core type (shader, tiler, l2 cache, l3 cache) - * @core_state_bitset: 64bits bitmask reporting power state of the cores - * (1-ON, 0-OFF) - */ -#define KBASE_TLSTREAM_AUX_PM_STATE( \ - kbdev, \ - core_type, \ - core_state_bitset \ - ) \ - do { \ - int enabled = atomic_read(&kbdev->timeline_is_enabled); \ - if (enabled & TLSTREAM_ENABLED) \ - __kbase_tlstream_aux_pm_state( \ - __TL_DISPATCH_STREAM(kbdev, aux), \ - core_type, core_state_bitset); \ - } while (0) - -/** - * KBASE_TLSTREAM_AUX_PAGEFAULT - - * Page fault - * - * @kbdev: Kbase device - * @ctx_nr: Kernel context number - * @as_nr: Address space number - * @page_cnt_change: Number of pages to be added - */ -#define KBASE_TLSTREAM_AUX_PAGEFAULT( \ - kbdev, \ - ctx_nr, \ - as_nr, \ - page_cnt_change \ - ) \ - do { \ - int enabled = atomic_read(&kbdev->timeline_is_enabled); \ - if (enabled & TLSTREAM_ENABLED) \ - __kbase_tlstream_aux_pagefault( \ - __TL_DISPATCH_STREAM(kbdev, aux), \ - ctx_nr, as_nr, page_cnt_change); \ - } while (0) - -/** - * KBASE_TLSTREAM_AUX_PAGESALLOC - - * Total alloc pages change - * - * @kbdev: Kbase device - * @ctx_nr: Kernel context number - * @page_cnt: Number of pages used by the context - */ -#define KBASE_TLSTREAM_AUX_PAGESALLOC( \ - kbdev, \ - ctx_nr, \ - page_cnt \ - ) \ - do { \ - int enabled = atomic_read(&kbdev->timeline_is_enabled); \ - if (enabled & TLSTREAM_ENABLED) \ - __kbase_tlstream_aux_pagesalloc( \ - __TL_DISPATCH_STREAM(kbdev, aux), \ - ctx_nr, page_cnt); \ - } while (0) - -/** - * KBASE_TLSTREAM_AUX_DEVFREQ_TARGET - - * New device frequency target - * - * @kbdev: Kbase device - * @target_freq: New target frequency - */ -#define KBASE_TLSTREAM_AUX_DEVFREQ_TARGET( \ - kbdev, \ - target_freq \ - ) \ - do { \ - int enabled = atomic_read(&kbdev->timeline_is_enabled); \ - if (enabled & TLSTREAM_ENABLED) \ - __kbase_tlstream_aux_devfreq_target( \ - __TL_DISPATCH_STREAM(kbdev, aux), \ - target_freq); \ - } while (0) - -/** - * KBASE_TLSTREAM_AUX_PROTECTED_ENTER_START - - * enter protected mode start - * - * @kbdev: Kbase device - * @gpu: Name of the GPU object - */ -#define KBASE_TLSTREAM_AUX_PROTECTED_ENTER_START( \ - kbdev, \ - gpu \ - ) \ - do { \ - int enabled = atomic_read(&kbdev->timeline_is_enabled); \ - if (enabled & BASE_TLSTREAM_ENABLE_LATENCY_TRACEPOINTS) \ - __kbase_tlstream_aux_protected_enter_start( \ - __TL_DISPATCH_STREAM(kbdev, aux), \ - gpu); \ - } while (0) - -/** - * KBASE_TLSTREAM_AUX_PROTECTED_ENTER_END - - * enter protected mode end - * - * @kbdev: Kbase device - * @gpu: Name of the GPU object - */ -#define KBASE_TLSTREAM_AUX_PROTECTED_ENTER_END( \ - kbdev, \ - gpu \ - ) \ - do { \ - int enabled = atomic_read(&kbdev->timeline_is_enabled); \ - if (enabled & BASE_TLSTREAM_ENABLE_LATENCY_TRACEPOINTS) \ - __kbase_tlstream_aux_protected_enter_end( \ - __TL_DISPATCH_STREAM(kbdev, aux), \ - gpu); \ - } while (0) - -/** - * KBASE_TLSTREAM_AUX_PROTECTED_LEAVE_START - - * leave protected mode start - * - * @kbdev: Kbase device - * @gpu: Name of the GPU object - */ -#define KBASE_TLSTREAM_AUX_PROTECTED_LEAVE_START( \ - kbdev, \ - gpu \ - ) \ - do { \ - int enabled = atomic_read(&kbdev->timeline_is_enabled); \ - if (enabled & BASE_TLSTREAM_ENABLE_LATENCY_TRACEPOINTS) \ - __kbase_tlstream_aux_protected_leave_start( \ - __TL_DISPATCH_STREAM(kbdev, aux), \ - gpu); \ - } while (0) - -/** - * KBASE_TLSTREAM_AUX_PROTECTED_LEAVE_END - - * leave protected mode end - * - * @kbdev: Kbase device - * @gpu: Name of the GPU object - */ -#define KBASE_TLSTREAM_AUX_PROTECTED_LEAVE_END( \ - kbdev, \ - gpu \ - ) \ - do { \ - int enabled = atomic_read(&kbdev->timeline_is_enabled); \ - if (enabled & BASE_TLSTREAM_ENABLE_LATENCY_TRACEPOINTS) \ - __kbase_tlstream_aux_protected_leave_end( \ - __TL_DISPATCH_STREAM(kbdev, aux), \ - gpu); \ - } while (0) - -/** - * KBASE_TLSTREAM_AUX_JIT_STATS - - * per-bin JIT statistics - * - * @kbdev: Kbase device - * @ctx_nr: Kernel context number - * @bid: JIT bin id - * @max_allocs: Maximum allocations allowed in this bin. - * @allocs: Number of active allocations in this bin - * @va_pages: Number of virtual pages allocated in this bin - * @ph_pages: Number of physical pages allocated in this bin - */ -#define KBASE_TLSTREAM_AUX_JIT_STATS( \ - kbdev, \ - ctx_nr, \ - bid, \ - max_allocs, \ - allocs, \ - va_pages, \ - ph_pages \ - ) \ - do { \ - int enabled = atomic_read(&kbdev->timeline_is_enabled); \ - if (enabled & TLSTREAM_ENABLED) \ - __kbase_tlstream_aux_jit_stats( \ - __TL_DISPATCH_STREAM(kbdev, aux), \ - ctx_nr, bid, max_allocs, allocs, va_pages, ph_pages); \ - } while (0) - -/** - * KBASE_TLSTREAM_AUX_EVENT_JOB_SLOT - - * event on a given job slot - * - * @kbdev: Kbase device - * @ctx: Name of the context object - * @slot_nr: Job slot number - * @atom_nr: Sequential number of an atom - * @event: Event type. One of TL_JS_EVENT values - */ -#define KBASE_TLSTREAM_AUX_EVENT_JOB_SLOT( \ - kbdev, \ - ctx, \ - slot_nr, \ - atom_nr, \ - event \ - ) \ - do { \ - int enabled = atomic_read(&kbdev->timeline_is_enabled); \ - if (enabled & TLSTREAM_ENABLED) \ - __kbase_tlstream_aux_event_job_slot( \ - __TL_DISPATCH_STREAM(kbdev, aux), \ - ctx, slot_nr, atom_nr, event); \ - } while (0) - -/** - * KBASE_TLSTREAM_TL_NEW_KCPUQUEUE - - * New KCPU Queue - * - * @kbdev: Kbase device - * @kcpu_queue: KCPU queue - * @ctx: Name of the context object - * @kcpuq_num_pending_cmds: Number of commands already enqueued - * in the KCPU queue - */ -#define KBASE_TLSTREAM_TL_NEW_KCPUQUEUE( \ - kbdev, \ - kcpu_queue, \ - ctx, \ - kcpuq_num_pending_cmds \ - ) \ - do { \ - int enabled = atomic_read(&kbdev->timeline_is_enabled); \ - if (enabled & TLSTREAM_ENABLED) \ - __kbase_tlstream_tl_new_kcpuqueue( \ - __TL_DISPATCH_STREAM(kbdev, obj), \ - kcpu_queue, ctx, kcpuq_num_pending_cmds); \ - } while (0) - -/** - * KBASE_TLSTREAM_TL_RET_KCPUQUEUE_CTX - - * Context retains KCPU Queue - * - * @kbdev: Kbase device - * @kcpu_queue: KCPU queue - * @ctx: Name of the context object - */ -#define KBASE_TLSTREAM_TL_RET_KCPUQUEUE_CTX( \ - kbdev, \ - kcpu_queue, \ - ctx \ - ) \ - do { \ - int enabled = atomic_read(&kbdev->timeline_is_enabled); \ - if (enabled & TLSTREAM_ENABLED) \ - __kbase_tlstream_tl_ret_kcpuqueue_ctx( \ - __TL_DISPATCH_STREAM(kbdev, obj), \ - kcpu_queue, ctx); \ - } while (0) - -/** - * KBASE_TLSTREAM_TL_DEL_KCPUQUEUE - - * Delete KCPU Queue - * - * @kbdev: Kbase device - * @kcpu_queue: KCPU queue - */ -#define KBASE_TLSTREAM_TL_DEL_KCPUQUEUE( \ - kbdev, \ - kcpu_queue \ - ) \ - do { \ - int enabled = atomic_read(&kbdev->timeline_is_enabled); \ - if (enabled & TLSTREAM_ENABLED) \ - __kbase_tlstream_tl_del_kcpuqueue( \ - __TL_DISPATCH_STREAM(kbdev, obj), \ - kcpu_queue); \ - } while (0) - -/** - * KBASE_TLSTREAM_TL_NRET_KCPUQUEUE_CTX - - * Context releases KCPU Queue - * - * @kbdev: Kbase device - * @kcpu_queue: KCPU queue - * @ctx: Name of the context object - */ -#define KBASE_TLSTREAM_TL_NRET_KCPUQUEUE_CTX( \ - kbdev, \ - kcpu_queue, \ - ctx \ - ) \ - do { \ - int enabled = atomic_read(&kbdev->timeline_is_enabled); \ - if (enabled & TLSTREAM_ENABLED) \ - __kbase_tlstream_tl_nret_kcpuqueue_ctx( \ - __TL_DISPATCH_STREAM(kbdev, obj), \ - kcpu_queue, ctx); \ - } while (0) - -/** - * KBASE_TLSTREAM_TL_EVENT_KCPUQUEUE_ENQUEUE_FENCE_SIGNAL - - * KCPU Queue enqueues Signal on Fence - * - * @kbdev: Kbase device - * @kcpu_queue: KCPU queue - * @fence: Fence object handle - */ -#define KBASE_TLSTREAM_TL_EVENT_KCPUQUEUE_ENQUEUE_FENCE_SIGNAL( \ - kbdev, \ - kcpu_queue, \ - fence \ - ) \ - do { \ - int enabled = atomic_read(&kbdev->timeline_is_enabled); \ - if (enabled & TLSTREAM_ENABLED) \ - __kbase_tlstream_tl_event_kcpuqueue_enqueue_fence_signal( \ - __TL_DISPATCH_STREAM(kbdev, obj), \ - kcpu_queue, fence); \ - } while (0) - -/** - * KBASE_TLSTREAM_TL_EVENT_KCPUQUEUE_ENQUEUE_FENCE_WAIT - - * KCPU Queue enqueues Wait on Fence - * - * @kbdev: Kbase device - * @kcpu_queue: KCPU queue - * @fence: Fence object handle - */ -#define KBASE_TLSTREAM_TL_EVENT_KCPUQUEUE_ENQUEUE_FENCE_WAIT( \ - kbdev, \ - kcpu_queue, \ - fence \ - ) \ - do { \ - int enabled = atomic_read(&kbdev->timeline_is_enabled); \ - if (enabled & TLSTREAM_ENABLED) \ - __kbase_tlstream_tl_event_kcpuqueue_enqueue_fence_wait( \ - __TL_DISPATCH_STREAM(kbdev, obj), \ - kcpu_queue, fence); \ - } while (0) - -/** - * KBASE_TLSTREAM_TL_EVENT_ARRAY_BEGIN_KCPUQUEUE_ENQUEUE_CQS_WAIT - - * Begin array of KCPU Queue enqueues Wait on Cross Queue Sync Object - * - * @kbdev: Kbase device - * @kcpu_queue: KCPU queue - */ -#define KBASE_TLSTREAM_TL_EVENT_ARRAY_BEGIN_KCPUQUEUE_ENQUEUE_CQS_WAIT( \ - kbdev, \ - kcpu_queue \ - ) \ - do { \ - int enabled = atomic_read(&kbdev->timeline_is_enabled); \ - if (enabled & TLSTREAM_ENABLED) \ - __kbase_tlstream_tl_event_array_begin_kcpuqueue_enqueue_cqs_wait( \ - __TL_DISPATCH_STREAM(kbdev, obj), \ - kcpu_queue); \ - } while (0) - -/** - * KBASE_TLSTREAM_TL_EVENT_ARRAY_ITEM_KCPUQUEUE_ENQUEUE_CQS_WAIT - - * Array item of KCPU Queue enqueues Wait on Cross Queue Sync Object - * - * @kbdev: Kbase device - * @kcpu_queue: KCPU queue - * @cqs_obj_gpu_addr: CQS Object GPU ptr - * @cqs_obj_compare_value: Semaphore value that should be exceeded - * for the WAIT to pass - */ -#define KBASE_TLSTREAM_TL_EVENT_ARRAY_ITEM_KCPUQUEUE_ENQUEUE_CQS_WAIT( \ - kbdev, \ - kcpu_queue, \ - cqs_obj_gpu_addr, \ - cqs_obj_compare_value \ - ) \ - do { \ - int enabled = atomic_read(&kbdev->timeline_is_enabled); \ - if (enabled & TLSTREAM_ENABLED) \ - __kbase_tlstream_tl_event_array_item_kcpuqueue_enqueue_cqs_wait( \ - __TL_DISPATCH_STREAM(kbdev, obj), \ - kcpu_queue, cqs_obj_gpu_addr, cqs_obj_compare_value); \ - } while (0) - -/** - * KBASE_TLSTREAM_TL_EVENT_ARRAY_END_KCPUQUEUE_ENQUEUE_CQS_WAIT - - * End array of KCPU Queue enqueues Wait on Cross Queue Sync Object - * - * @kbdev: Kbase device - * @kcpu_queue: KCPU queue - */ -#define KBASE_TLSTREAM_TL_EVENT_ARRAY_END_KCPUQUEUE_ENQUEUE_CQS_WAIT( \ - kbdev, \ - kcpu_queue \ - ) \ - do { \ - int enabled = atomic_read(&kbdev->timeline_is_enabled); \ - if (enabled & TLSTREAM_ENABLED) \ - __kbase_tlstream_tl_event_array_end_kcpuqueue_enqueue_cqs_wait( \ - __TL_DISPATCH_STREAM(kbdev, obj), \ - kcpu_queue); \ - } while (0) - -/** - * KBASE_TLSTREAM_TL_EVENT_ARRAY_BEGIN_KCPUQUEUE_ENQUEUE_CQS_SET - - * Begin array of KCPU Queue enqueues Set on Cross Queue Sync Object - * - * @kbdev: Kbase device - * @kcpu_queue: KCPU queue - */ -#define KBASE_TLSTREAM_TL_EVENT_ARRAY_BEGIN_KCPUQUEUE_ENQUEUE_CQS_SET( \ - kbdev, \ - kcpu_queue \ - ) \ - do { \ - int enabled = atomic_read(&kbdev->timeline_is_enabled); \ - if (enabled & TLSTREAM_ENABLED) \ - __kbase_tlstream_tl_event_array_begin_kcpuqueue_enqueue_cqs_set( \ - __TL_DISPATCH_STREAM(kbdev, obj), \ - kcpu_queue); \ - } while (0) - -/** - * KBASE_TLSTREAM_TL_EVENT_ARRAY_ITEM_KCPUQUEUE_ENQUEUE_CQS_SET - - * Array item of KCPU Queue enqueues Set on Cross Queue Sync Object - * - * @kbdev: Kbase device - * @kcpu_queue: KCPU queue - * @cqs_obj_gpu_addr: CQS Object GPU ptr - */ -#define KBASE_TLSTREAM_TL_EVENT_ARRAY_ITEM_KCPUQUEUE_ENQUEUE_CQS_SET( \ - kbdev, \ - kcpu_queue, \ - cqs_obj_gpu_addr \ - ) \ - do { \ - int enabled = atomic_read(&kbdev->timeline_is_enabled); \ - if (enabled & TLSTREAM_ENABLED) \ - __kbase_tlstream_tl_event_array_item_kcpuqueue_enqueue_cqs_set( \ - __TL_DISPATCH_STREAM(kbdev, obj), \ - kcpu_queue, cqs_obj_gpu_addr); \ - } while (0) - -/** - * KBASE_TLSTREAM_TL_EVENT_ARRAY_END_KCPUQUEUE_ENQUEUE_CQS_SET - - * End array of KCPU Queue enqueues Set on Cross Queue Sync Object - * - * @kbdev: Kbase device - * @kcpu_queue: KCPU queue - */ -#define KBASE_TLSTREAM_TL_EVENT_ARRAY_END_KCPUQUEUE_ENQUEUE_CQS_SET( \ - kbdev, \ - kcpu_queue \ - ) \ - do { \ - int enabled = atomic_read(&kbdev->timeline_is_enabled); \ - if (enabled & TLSTREAM_ENABLED) \ - __kbase_tlstream_tl_event_array_end_kcpuqueue_enqueue_cqs_set( \ - __TL_DISPATCH_STREAM(kbdev, obj), \ - kcpu_queue); \ - } while (0) - -/** - * KBASE_TLSTREAM_TL_EVENT_ARRAY_BEGIN_KCPUQUEUE_ENQUEUE_DEBUGCOPY - - * Begin array of KCPU Queue enqueues Debug Copy - * - * @kbdev: Kbase device - * @kcpu_queue: KCPU queue - */ -#define KBASE_TLSTREAM_TL_EVENT_ARRAY_BEGIN_KCPUQUEUE_ENQUEUE_DEBUGCOPY( \ - kbdev, \ - kcpu_queue \ - ) \ - do { \ - int enabled = atomic_read(&kbdev->timeline_is_enabled); \ - if (enabled & TLSTREAM_ENABLED) \ - __kbase_tlstream_tl_event_array_begin_kcpuqueue_enqueue_debugcopy( \ - __TL_DISPATCH_STREAM(kbdev, obj), \ - kcpu_queue); \ - } while (0) - -/** - * KBASE_TLSTREAM_TL_EVENT_ARRAY_ITEM_KCPUQUEUE_ENQUEUE_DEBUGCOPY - - * Array item of KCPU Queue enqueues Debug Copy - * - * @kbdev: Kbase device - * @kcpu_queue: KCPU queue - * @debugcopy_dst_size: Debug Copy destination size - */ -#define KBASE_TLSTREAM_TL_EVENT_ARRAY_ITEM_KCPUQUEUE_ENQUEUE_DEBUGCOPY( \ - kbdev, \ - kcpu_queue, \ - debugcopy_dst_size \ - ) \ - do { \ - int enabled = atomic_read(&kbdev->timeline_is_enabled); \ - if (enabled & TLSTREAM_ENABLED) \ - __kbase_tlstream_tl_event_array_item_kcpuqueue_enqueue_debugcopy( \ - __TL_DISPATCH_STREAM(kbdev, obj), \ - kcpu_queue, debugcopy_dst_size); \ - } while (0) - -/** - * KBASE_TLSTREAM_TL_EVENT_ARRAY_END_KCPUQUEUE_ENQUEUE_DEBUGCOPY - - * End array of KCPU Queue enqueues Debug Copy - * - * @kbdev: Kbase device - * @kcpu_queue: KCPU queue - */ -#define KBASE_TLSTREAM_TL_EVENT_ARRAY_END_KCPUQUEUE_ENQUEUE_DEBUGCOPY( \ - kbdev, \ - kcpu_queue \ - ) \ - do { \ - int enabled = atomic_read(&kbdev->timeline_is_enabled); \ - if (enabled & TLSTREAM_ENABLED) \ - __kbase_tlstream_tl_event_array_end_kcpuqueue_enqueue_debugcopy( \ - __TL_DISPATCH_STREAM(kbdev, obj), \ - kcpu_queue); \ - } while (0) - -/** - * KBASE_TLSTREAM_TL_EVENT_KCPUQUEUE_ENQUEUE_MAP_IMPORT - - * KCPU Queue enqueues Map Import - * - * @kbdev: Kbase device - * @kcpu_queue: KCPU queue - * @map_import_buf_gpu_addr: Map import buffer GPU ptr - */ -#define KBASE_TLSTREAM_TL_EVENT_KCPUQUEUE_ENQUEUE_MAP_IMPORT( \ - kbdev, \ - kcpu_queue, \ - map_import_buf_gpu_addr \ - ) \ - do { \ - int enabled = atomic_read(&kbdev->timeline_is_enabled); \ - if (enabled & TLSTREAM_ENABLED) \ - __kbase_tlstream_tl_event_kcpuqueue_enqueue_map_import( \ - __TL_DISPATCH_STREAM(kbdev, obj), \ - kcpu_queue, map_import_buf_gpu_addr); \ - } while (0) - -/** - * KBASE_TLSTREAM_TL_EVENT_KCPUQUEUE_ENQUEUE_UNMAP_IMPORT - - * KCPU Queue enqueues Unmap Import - * - * @kbdev: Kbase device - * @kcpu_queue: KCPU queue - * @map_import_buf_gpu_addr: Map import buffer GPU ptr - */ -#define KBASE_TLSTREAM_TL_EVENT_KCPUQUEUE_ENQUEUE_UNMAP_IMPORT( \ - kbdev, \ - kcpu_queue, \ - map_import_buf_gpu_addr \ - ) \ - do { \ - int enabled = atomic_read(&kbdev->timeline_is_enabled); \ - if (enabled & TLSTREAM_ENABLED) \ - __kbase_tlstream_tl_event_kcpuqueue_enqueue_unmap_import( \ - __TL_DISPATCH_STREAM(kbdev, obj), \ - kcpu_queue, map_import_buf_gpu_addr); \ - } while (0) - -/** - * KBASE_TLSTREAM_TL_EVENT_KCPUQUEUE_ENQUEUE_UNMAP_IMPORT_FORCE - - * KCPU Queue enqueues Unmap Import ignoring reference count - * - * @kbdev: Kbase device - * @kcpu_queue: KCPU queue - * @map_import_buf_gpu_addr: Map import buffer GPU ptr - */ -#define KBASE_TLSTREAM_TL_EVENT_KCPUQUEUE_ENQUEUE_UNMAP_IMPORT_FORCE( \ - kbdev, \ - kcpu_queue, \ - map_import_buf_gpu_addr \ - ) \ - do { \ - int enabled = atomic_read(&kbdev->timeline_is_enabled); \ - if (enabled & TLSTREAM_ENABLED) \ - __kbase_tlstream_tl_event_kcpuqueue_enqueue_unmap_import_force( \ - __TL_DISPATCH_STREAM(kbdev, obj), \ - kcpu_queue, map_import_buf_gpu_addr); \ - } while (0) - -/** - * KBASE_TLSTREAM_TL_EVENT_ARRAY_BEGIN_KCPUQUEUE_ENQUEUE_JIT_ALLOC - - * Begin array of KCPU Queue enqueues JIT Alloc - * - * @kbdev: Kbase device - * @kcpu_queue: KCPU queue - */ -#define KBASE_TLSTREAM_TL_EVENT_ARRAY_BEGIN_KCPUQUEUE_ENQUEUE_JIT_ALLOC( \ - kbdev, \ - kcpu_queue \ - ) \ - do { \ - int enabled = atomic_read(&kbdev->timeline_is_enabled); \ - if (enabled & TLSTREAM_ENABLED) \ - __kbase_tlstream_tl_event_array_begin_kcpuqueue_enqueue_jit_alloc( \ - __TL_DISPATCH_STREAM(kbdev, obj), \ - kcpu_queue); \ - } while (0) - -/** - * KBASE_TLSTREAM_TL_EVENT_ARRAY_ITEM_KCPUQUEUE_ENQUEUE_JIT_ALLOC - - * Array item of KCPU Queue enqueues JIT Alloc - * - * @kbdev: Kbase device - * @kcpu_queue: KCPU queue - * @jit_alloc_gpu_alloc_addr_dest: The GPU virtual address to write - * the JIT allocated GPU virtual address to - * @jit_alloc_va_pages: The minimum number of virtual pages required - * @jit_alloc_commit_pages: The minimum number of physical pages which - * should back the allocation - * @jit_alloc_extent: Granularity of physical pages to grow the allocation - * by during a fault - * @jit_alloc_jit_id: Unique ID provided by the caller, this is used - * to pair allocation and free requests. Zero is not a valid value - * @jit_alloc_bin_id: The JIT allocation bin, used in conjunction with - * max_allocations to limit the number of each type of JIT allocation - * @jit_alloc_max_allocations: The maximum number of allocations - * allowed within the bin specified by bin_id. Should be the same for all - * JIT allocations within the same bin. - * @jit_alloc_flags: Flags specifying the special requirements for the - * JIT allocation - * @jit_alloc_usage_id: A hint about which allocation should be - * reused. The kernel should attempt to use a previous allocation with the same - * usage_id - */ -#define KBASE_TLSTREAM_TL_EVENT_ARRAY_ITEM_KCPUQUEUE_ENQUEUE_JIT_ALLOC( \ - kbdev, \ - kcpu_queue, \ - jit_alloc_gpu_alloc_addr_dest, \ - jit_alloc_va_pages, \ - jit_alloc_commit_pages, \ - jit_alloc_extent, \ - jit_alloc_jit_id, \ - jit_alloc_bin_id, \ - jit_alloc_max_allocations, \ - jit_alloc_flags, \ - jit_alloc_usage_id \ - ) \ - do { \ - int enabled = atomic_read(&kbdev->timeline_is_enabled); \ - if (enabled & TLSTREAM_ENABLED) \ - __kbase_tlstream_tl_event_array_item_kcpuqueue_enqueue_jit_alloc( \ - __TL_DISPATCH_STREAM(kbdev, obj), \ - kcpu_queue, jit_alloc_gpu_alloc_addr_dest, jit_alloc_va_pages, jit_alloc_commit_pages, jit_alloc_extent, jit_alloc_jit_id, jit_alloc_bin_id, jit_alloc_max_allocations, jit_alloc_flags, jit_alloc_usage_id); \ - } while (0) - -/** - * KBASE_TLSTREAM_TL_EVENT_ARRAY_END_KCPUQUEUE_ENQUEUE_JIT_ALLOC - - * End array of KCPU Queue enqueues JIT Alloc - * - * @kbdev: Kbase device - * @kcpu_queue: KCPU queue - */ -#define KBASE_TLSTREAM_TL_EVENT_ARRAY_END_KCPUQUEUE_ENQUEUE_JIT_ALLOC( \ - kbdev, \ - kcpu_queue \ - ) \ - do { \ - int enabled = atomic_read(&kbdev->timeline_is_enabled); \ - if (enabled & TLSTREAM_ENABLED) \ - __kbase_tlstream_tl_event_array_end_kcpuqueue_enqueue_jit_alloc( \ - __TL_DISPATCH_STREAM(kbdev, obj), \ - kcpu_queue); \ - } while (0) - -/** - * KBASE_TLSTREAM_TL_EVENT_ARRAY_BEGIN_KCPUQUEUE_ENQUEUE_JIT_FREE - - * Begin array of KCPU Queue enqueues JIT Free - * - * @kbdev: Kbase device - * @kcpu_queue: KCPU queue - */ -#define KBASE_TLSTREAM_TL_EVENT_ARRAY_BEGIN_KCPUQUEUE_ENQUEUE_JIT_FREE( \ - kbdev, \ - kcpu_queue \ - ) \ - do { \ - int enabled = atomic_read(&kbdev->timeline_is_enabled); \ - if (enabled & TLSTREAM_ENABLED) \ - __kbase_tlstream_tl_event_array_begin_kcpuqueue_enqueue_jit_free( \ - __TL_DISPATCH_STREAM(kbdev, obj), \ - kcpu_queue); \ - } while (0) - -/** - * KBASE_TLSTREAM_TL_EVENT_ARRAY_ITEM_KCPUQUEUE_ENQUEUE_JIT_FREE - - * Array item of KCPU Queue enqueues JIT Free - * - * @kbdev: Kbase device - * @kcpu_queue: KCPU queue - * @jit_alloc_jit_id: Unique ID provided by the caller, this is used - * to pair allocation and free requests. Zero is not a valid value - */ -#define KBASE_TLSTREAM_TL_EVENT_ARRAY_ITEM_KCPUQUEUE_ENQUEUE_JIT_FREE( \ - kbdev, \ - kcpu_queue, \ - jit_alloc_jit_id \ - ) \ - do { \ - int enabled = atomic_read(&kbdev->timeline_is_enabled); \ - if (enabled & TLSTREAM_ENABLED) \ - __kbase_tlstream_tl_event_array_item_kcpuqueue_enqueue_jit_free( \ - __TL_DISPATCH_STREAM(kbdev, obj), \ - kcpu_queue, jit_alloc_jit_id); \ - } while (0) - -/** - * KBASE_TLSTREAM_TL_EVENT_ARRAY_END_KCPUQUEUE_ENQUEUE_JIT_FREE - - * End array of KCPU Queue enqueues JIT Free - * - * @kbdev: Kbase device - * @kcpu_queue: KCPU queue - */ -#define KBASE_TLSTREAM_TL_EVENT_ARRAY_END_KCPUQUEUE_ENQUEUE_JIT_FREE( \ - kbdev, \ - kcpu_queue \ - ) \ - do { \ - int enabled = atomic_read(&kbdev->timeline_is_enabled); \ - if (enabled & TLSTREAM_ENABLED) \ - __kbase_tlstream_tl_event_array_end_kcpuqueue_enqueue_jit_free( \ - __TL_DISPATCH_STREAM(kbdev, obj), \ - kcpu_queue); \ - } while (0) - -/** - * KBASE_TLSTREAM_TL_EVENT_KCPUQUEUE_EXECUTE_FENCE_SIGNAL_START - - * KCPU Queue starts a Signal on Fence - * - * @kbdev: Kbase device - * @kcpu_queue: KCPU queue - */ -#define KBASE_TLSTREAM_TL_EVENT_KCPUQUEUE_EXECUTE_FENCE_SIGNAL_START( \ - kbdev, \ - kcpu_queue \ - ) \ - do { \ - int enabled = atomic_read(&kbdev->timeline_is_enabled); \ - if (enabled & TLSTREAM_ENABLED) \ - __kbase_tlstream_tl_event_kcpuqueue_execute_fence_signal_start( \ - __TL_DISPATCH_STREAM(kbdev, obj), \ - kcpu_queue); \ - } while (0) - -/** - * KBASE_TLSTREAM_TL_EVENT_KCPUQUEUE_EXECUTE_FENCE_SIGNAL_END - - * KCPU Queue ends a Signal on Fence - * - * @kbdev: Kbase device - * @kcpu_queue: KCPU queue - */ -#define KBASE_TLSTREAM_TL_EVENT_KCPUQUEUE_EXECUTE_FENCE_SIGNAL_END( \ - kbdev, \ - kcpu_queue \ - ) \ - do { \ - int enabled = atomic_read(&kbdev->timeline_is_enabled); \ - if (enabled & TLSTREAM_ENABLED) \ - __kbase_tlstream_tl_event_kcpuqueue_execute_fence_signal_end( \ - __TL_DISPATCH_STREAM(kbdev, obj), \ - kcpu_queue); \ - } while (0) - -/** - * KBASE_TLSTREAM_TL_EVENT_KCPUQUEUE_EXECUTE_FENCE_WAIT_START - - * KCPU Queue starts a Wait on Fence - * - * @kbdev: Kbase device - * @kcpu_queue: KCPU queue - */ -#define KBASE_TLSTREAM_TL_EVENT_KCPUQUEUE_EXECUTE_FENCE_WAIT_START( \ - kbdev, \ - kcpu_queue \ - ) \ - do { \ - int enabled = atomic_read(&kbdev->timeline_is_enabled); \ - if (enabled & TLSTREAM_ENABLED) \ - __kbase_tlstream_tl_event_kcpuqueue_execute_fence_wait_start( \ - __TL_DISPATCH_STREAM(kbdev, obj), \ - kcpu_queue); \ - } while (0) - -/** - * KBASE_TLSTREAM_TL_EVENT_KCPUQUEUE_EXECUTE_FENCE_WAIT_END - - * KCPU Queue ends a Wait on Fence - * - * @kbdev: Kbase device - * @kcpu_queue: KCPU queue - */ -#define KBASE_TLSTREAM_TL_EVENT_KCPUQUEUE_EXECUTE_FENCE_WAIT_END( \ - kbdev, \ - kcpu_queue \ - ) \ - do { \ - int enabled = atomic_read(&kbdev->timeline_is_enabled); \ - if (enabled & TLSTREAM_ENABLED) \ - __kbase_tlstream_tl_event_kcpuqueue_execute_fence_wait_end( \ - __TL_DISPATCH_STREAM(kbdev, obj), \ - kcpu_queue); \ - } while (0) - -/** - * KBASE_TLSTREAM_TL_EVENT_KCPUQUEUE_EXECUTE_CQS_WAIT_START - - * KCPU Queue starts a Wait on an array of Cross Queue Sync Objects - * - * @kbdev: Kbase device - * @kcpu_queue: KCPU queue - */ -#define KBASE_TLSTREAM_TL_EVENT_KCPUQUEUE_EXECUTE_CQS_WAIT_START( \ - kbdev, \ - kcpu_queue \ - ) \ - do { \ - int enabled = atomic_read(&kbdev->timeline_is_enabled); \ - if (enabled & TLSTREAM_ENABLED) \ - __kbase_tlstream_tl_event_kcpuqueue_execute_cqs_wait_start( \ - __TL_DISPATCH_STREAM(kbdev, obj), \ - kcpu_queue); \ - } while (0) - -/** - * KBASE_TLSTREAM_TL_EVENT_KCPUQUEUE_EXECUTE_CQS_WAIT_END - - * KCPU Queue ends a Wait on an array of Cross Queue Sync Objects - * - * @kbdev: Kbase device - * @kcpu_queue: KCPU queue - */ -#define KBASE_TLSTREAM_TL_EVENT_KCPUQUEUE_EXECUTE_CQS_WAIT_END( \ - kbdev, \ - kcpu_queue \ - ) \ - do { \ - int enabled = atomic_read(&kbdev->timeline_is_enabled); \ - if (enabled & TLSTREAM_ENABLED) \ - __kbase_tlstream_tl_event_kcpuqueue_execute_cqs_wait_end( \ - __TL_DISPATCH_STREAM(kbdev, obj), \ - kcpu_queue); \ - } while (0) - -/** - * KBASE_TLSTREAM_TL_EVENT_KCPUQUEUE_EXECUTE_CQS_SET_START - - * KCPU Queue starts a Set on an array of Cross Queue Sync Objects - * - * @kbdev: Kbase device - * @kcpu_queue: KCPU queue - */ -#define KBASE_TLSTREAM_TL_EVENT_KCPUQUEUE_EXECUTE_CQS_SET_START( \ - kbdev, \ - kcpu_queue \ - ) \ - do { \ - int enabled = atomic_read(&kbdev->timeline_is_enabled); \ - if (enabled & TLSTREAM_ENABLED) \ - __kbase_tlstream_tl_event_kcpuqueue_execute_cqs_set_start( \ - __TL_DISPATCH_STREAM(kbdev, obj), \ - kcpu_queue); \ - } while (0) - -/** - * KBASE_TLSTREAM_TL_EVENT_KCPUQUEUE_EXECUTE_CQS_SET_END - - * KCPU Queue ends a Set on an array of Cross Queue Sync Objects - * - * @kbdev: Kbase device - * @kcpu_queue: KCPU queue - */ -#define KBASE_TLSTREAM_TL_EVENT_KCPUQUEUE_EXECUTE_CQS_SET_END( \ - kbdev, \ - kcpu_queue \ - ) \ - do { \ - int enabled = atomic_read(&kbdev->timeline_is_enabled); \ - if (enabled & TLSTREAM_ENABLED) \ - __kbase_tlstream_tl_event_kcpuqueue_execute_cqs_set_end( \ - __TL_DISPATCH_STREAM(kbdev, obj), \ - kcpu_queue); \ - } while (0) - -/** - * KBASE_TLSTREAM_TL_EVENT_KCPUQUEUE_EXECUTE_DEBUGCOPY_START - - * KCPU Queue starts an array of Debug Copys - * - * @kbdev: Kbase device - * @kcpu_queue: KCPU queue - */ -#define KBASE_TLSTREAM_TL_EVENT_KCPUQUEUE_EXECUTE_DEBUGCOPY_START( \ - kbdev, \ - kcpu_queue \ - ) \ - do { \ - int enabled = atomic_read(&kbdev->timeline_is_enabled); \ - if (enabled & TLSTREAM_ENABLED) \ - __kbase_tlstream_tl_event_kcpuqueue_execute_debugcopy_start( \ - __TL_DISPATCH_STREAM(kbdev, obj), \ - kcpu_queue); \ - } while (0) - -/** - * KBASE_TLSTREAM_TL_EVENT_KCPUQUEUE_EXECUTE_DEBUGCOPY_END - - * KCPU Queue ends an array of Debug Copys - * - * @kbdev: Kbase device - * @kcpu_queue: KCPU queue - */ -#define KBASE_TLSTREAM_TL_EVENT_KCPUQUEUE_EXECUTE_DEBUGCOPY_END( \ - kbdev, \ - kcpu_queue \ - ) \ - do { \ - int enabled = atomic_read(&kbdev->timeline_is_enabled); \ - if (enabled & TLSTREAM_ENABLED) \ - __kbase_tlstream_tl_event_kcpuqueue_execute_debugcopy_end( \ - __TL_DISPATCH_STREAM(kbdev, obj), \ - kcpu_queue); \ - } while (0) - -/** - * KBASE_TLSTREAM_TL_EVENT_KCPUQUEUE_EXECUTE_MAP_IMPORT_START - - * KCPU Queue starts a Map Import - * - * @kbdev: Kbase device - * @kcpu_queue: KCPU queue - */ -#define KBASE_TLSTREAM_TL_EVENT_KCPUQUEUE_EXECUTE_MAP_IMPORT_START( \ - kbdev, \ - kcpu_queue \ - ) \ - do { \ - int enabled = atomic_read(&kbdev->timeline_is_enabled); \ - if (enabled & TLSTREAM_ENABLED) \ - __kbase_tlstream_tl_event_kcpuqueue_execute_map_import_start( \ - __TL_DISPATCH_STREAM(kbdev, obj), \ - kcpu_queue); \ - } while (0) - -/** - * KBASE_TLSTREAM_TL_EVENT_KCPUQUEUE_EXECUTE_MAP_IMPORT_END - - * KCPU Queue ends a Map Import - * - * @kbdev: Kbase device - * @kcpu_queue: KCPU queue - */ -#define KBASE_TLSTREAM_TL_EVENT_KCPUQUEUE_EXECUTE_MAP_IMPORT_END( \ - kbdev, \ - kcpu_queue \ - ) \ - do { \ - int enabled = atomic_read(&kbdev->timeline_is_enabled); \ - if (enabled & TLSTREAM_ENABLED) \ - __kbase_tlstream_tl_event_kcpuqueue_execute_map_import_end( \ - __TL_DISPATCH_STREAM(kbdev, obj), \ - kcpu_queue); \ - } while (0) - -/** - * KBASE_TLSTREAM_TL_EVENT_KCPUQUEUE_EXECUTE_UNMAP_IMPORT_START - - * KCPU Queue starts an Unmap Import - * - * @kbdev: Kbase device - * @kcpu_queue: KCPU queue - */ -#define KBASE_TLSTREAM_TL_EVENT_KCPUQUEUE_EXECUTE_UNMAP_IMPORT_START( \ - kbdev, \ - kcpu_queue \ - ) \ - do { \ - int enabled = atomic_read(&kbdev->timeline_is_enabled); \ - if (enabled & TLSTREAM_ENABLED) \ - __kbase_tlstream_tl_event_kcpuqueue_execute_unmap_import_start( \ - __TL_DISPATCH_STREAM(kbdev, obj), \ - kcpu_queue); \ - } while (0) - -/** - * KBASE_TLSTREAM_TL_EVENT_KCPUQUEUE_EXECUTE_UNMAP_IMPORT_END - - * KCPU Queue ends an Unmap Import - * - * @kbdev: Kbase device - * @kcpu_queue: KCPU queue - */ -#define KBASE_TLSTREAM_TL_EVENT_KCPUQUEUE_EXECUTE_UNMAP_IMPORT_END( \ - kbdev, \ - kcpu_queue \ - ) \ - do { \ - int enabled = atomic_read(&kbdev->timeline_is_enabled); \ - if (enabled & TLSTREAM_ENABLED) \ - __kbase_tlstream_tl_event_kcpuqueue_execute_unmap_import_end( \ - __TL_DISPATCH_STREAM(kbdev, obj), \ - kcpu_queue); \ - } while (0) - -/** - * KBASE_TLSTREAM_TL_EVENT_KCPUQUEUE_EXECUTE_UNMAP_IMPORT_FORCE_START - - * KCPU Queue starts an Unmap Import ignoring reference count - * - * @kbdev: Kbase device - * @kcpu_queue: KCPU queue - */ -#define KBASE_TLSTREAM_TL_EVENT_KCPUQUEUE_EXECUTE_UNMAP_IMPORT_FORCE_START( \ - kbdev, \ - kcpu_queue \ - ) \ - do { \ - int enabled = atomic_read(&kbdev->timeline_is_enabled); \ - if (enabled & TLSTREAM_ENABLED) \ - __kbase_tlstream_tl_event_kcpuqueue_execute_unmap_import_force_start( \ - __TL_DISPATCH_STREAM(kbdev, obj), \ - kcpu_queue); \ - } while (0) - -/** - * KBASE_TLSTREAM_TL_EVENT_KCPUQUEUE_EXECUTE_UNMAP_IMPORT_FORCE_END - - * KCPU Queue ends an Unmap Import ignoring reference count - * - * @kbdev: Kbase device - * @kcpu_queue: KCPU queue - */ -#define KBASE_TLSTREAM_TL_EVENT_KCPUQUEUE_EXECUTE_UNMAP_IMPORT_FORCE_END( \ - kbdev, \ - kcpu_queue \ - ) \ - do { \ - int enabled = atomic_read(&kbdev->timeline_is_enabled); \ - if (enabled & TLSTREAM_ENABLED) \ - __kbase_tlstream_tl_event_kcpuqueue_execute_unmap_import_force_end( \ - __TL_DISPATCH_STREAM(kbdev, obj), \ - kcpu_queue); \ - } while (0) - -/** - * KBASE_TLSTREAM_TL_EVENT_KCPUQUEUE_EXECUTE_JIT_ALLOC_START - - * KCPU Queue starts an array of JIT Allocs - * - * @kbdev: Kbase device - * @kcpu_queue: KCPU queue - */ -#define KBASE_TLSTREAM_TL_EVENT_KCPUQUEUE_EXECUTE_JIT_ALLOC_START( \ - kbdev, \ - kcpu_queue \ - ) \ - do { \ - int enabled = atomic_read(&kbdev->timeline_is_enabled); \ - if (enabled & TLSTREAM_ENABLED) \ - __kbase_tlstream_tl_event_kcpuqueue_execute_jit_alloc_start( \ - __TL_DISPATCH_STREAM(kbdev, obj), \ - kcpu_queue); \ - } while (0) - -/** - * KBASE_TLSTREAM_TL_EVENT_ARRAY_BEGIN_KCPUQUEUE_EXECUTE_JIT_ALLOC_END - - * Begin array of KCPU Queue ends an array of JIT Allocs - * - * @kbdev: Kbase device - * @kcpu_queue: KCPU queue - */ -#define KBASE_TLSTREAM_TL_EVENT_ARRAY_BEGIN_KCPUQUEUE_EXECUTE_JIT_ALLOC_END( \ - kbdev, \ - kcpu_queue \ - ) \ - do { \ - int enabled = atomic_read(&kbdev->timeline_is_enabled); \ - if (enabled & TLSTREAM_ENABLED) \ - __kbase_tlstream_tl_event_array_begin_kcpuqueue_execute_jit_alloc_end( \ - __TL_DISPATCH_STREAM(kbdev, obj), \ - kcpu_queue); \ - } while (0) - -/** - * KBASE_TLSTREAM_TL_EVENT_ARRAY_ITEM_KCPUQUEUE_EXECUTE_JIT_ALLOC_END - - * Array item of KCPU Queue ends an array of JIT Allocs - * - * @kbdev: Kbase device - * @kcpu_queue: KCPU queue - * @jit_alloc_gpu_alloc_addr: The JIT allocated GPU virtual address - * @jit_alloc_mmu_flags: The MMU flags for the JIT allocation - */ -#define KBASE_TLSTREAM_TL_EVENT_ARRAY_ITEM_KCPUQUEUE_EXECUTE_JIT_ALLOC_END( \ - kbdev, \ - kcpu_queue, \ - jit_alloc_gpu_alloc_addr, \ - jit_alloc_mmu_flags \ - ) \ - do { \ - int enabled = atomic_read(&kbdev->timeline_is_enabled); \ - if (enabled & TLSTREAM_ENABLED) \ - __kbase_tlstream_tl_event_array_item_kcpuqueue_execute_jit_alloc_end( \ - __TL_DISPATCH_STREAM(kbdev, obj), \ - kcpu_queue, jit_alloc_gpu_alloc_addr, jit_alloc_mmu_flags); \ - } while (0) - -/** - * KBASE_TLSTREAM_TL_EVENT_ARRAY_END_KCPUQUEUE_EXECUTE_JIT_ALLOC_END - - * End array of KCPU Queue ends an array of JIT Allocs - * - * @kbdev: Kbase device - * @kcpu_queue: KCPU queue - */ -#define KBASE_TLSTREAM_TL_EVENT_ARRAY_END_KCPUQUEUE_EXECUTE_JIT_ALLOC_END( \ - kbdev, \ - kcpu_queue \ - ) \ - do { \ - int enabled = atomic_read(&kbdev->timeline_is_enabled); \ - if (enabled & TLSTREAM_ENABLED) \ - __kbase_tlstream_tl_event_array_end_kcpuqueue_execute_jit_alloc_end( \ - __TL_DISPATCH_STREAM(kbdev, obj), \ - kcpu_queue); \ - } while (0) - -/** - * KBASE_TLSTREAM_TL_EVENT_KCPUQUEUE_EXECUTE_JIT_FREE_START - - * KCPU Queue starts an array of JIT Frees - * - * @kbdev: Kbase device - * @kcpu_queue: KCPU queue - */ -#define KBASE_TLSTREAM_TL_EVENT_KCPUQUEUE_EXECUTE_JIT_FREE_START( \ - kbdev, \ - kcpu_queue \ - ) \ - do { \ - int enabled = atomic_read(&kbdev->timeline_is_enabled); \ - if (enabled & TLSTREAM_ENABLED) \ - __kbase_tlstream_tl_event_kcpuqueue_execute_jit_free_start( \ - __TL_DISPATCH_STREAM(kbdev, obj), \ - kcpu_queue); \ - } while (0) - -/** - * KBASE_TLSTREAM_TL_EVENT_ARRAY_BEGIN_KCPUQUEUE_EXECUTE_JIT_FREE_END - - * Begin array of KCPU Queue ends an array of JIT Frees - * - * @kbdev: Kbase device - * @kcpu_queue: KCPU queue - */ -#define KBASE_TLSTREAM_TL_EVENT_ARRAY_BEGIN_KCPUQUEUE_EXECUTE_JIT_FREE_END( \ - kbdev, \ - kcpu_queue \ - ) \ - do { \ - int enabled = atomic_read(&kbdev->timeline_is_enabled); \ - if (enabled & TLSTREAM_ENABLED) \ - __kbase_tlstream_tl_event_array_begin_kcpuqueue_execute_jit_free_end( \ - __TL_DISPATCH_STREAM(kbdev, obj), \ - kcpu_queue); \ - } while (0) - -/** - * KBASE_TLSTREAM_TL_EVENT_ARRAY_ITEM_KCPUQUEUE_EXECUTE_JIT_FREE_END - - * Array item of KCPU Queue ends an array of JIT Frees - * - * @kbdev: Kbase device - * @kcpu_queue: KCPU queue - * @jit_free_pages_used: The actual number of pages used by the JIT - * allocation - */ -#define KBASE_TLSTREAM_TL_EVENT_ARRAY_ITEM_KCPUQUEUE_EXECUTE_JIT_FREE_END( \ - kbdev, \ - kcpu_queue, \ - jit_free_pages_used \ - ) \ - do { \ - int enabled = atomic_read(&kbdev->timeline_is_enabled); \ - if (enabled & TLSTREAM_ENABLED) \ - __kbase_tlstream_tl_event_array_item_kcpuqueue_execute_jit_free_end( \ - __TL_DISPATCH_STREAM(kbdev, obj), \ - kcpu_queue, jit_free_pages_used); \ - } while (0) - -/** - * KBASE_TLSTREAM_TL_EVENT_ARRAY_END_KCPUQUEUE_EXECUTE_JIT_FREE_END - - * End array of KCPU Queue ends an array of JIT Frees - * - * @kbdev: Kbase device - * @kcpu_queue: KCPU queue - */ -#define KBASE_TLSTREAM_TL_EVENT_ARRAY_END_KCPUQUEUE_EXECUTE_JIT_FREE_END( \ - kbdev, \ - kcpu_queue \ - ) \ - do { \ - int enabled = atomic_read(&kbdev->timeline_is_enabled); \ - if (enabled & TLSTREAM_ENABLED) \ - __kbase_tlstream_tl_event_array_end_kcpuqueue_execute_jit_free_end( \ - __TL_DISPATCH_STREAM(kbdev, obj), \ - kcpu_queue); \ - } while (0) - -/** - * KBASE_TLSTREAM_TL_EVENT_KCPUQUEUE_EXECUTE_ERRORBARRIER - - * KCPU Queue executes an Error Barrier - * - * @kbdev: Kbase device - * @kcpu_queue: KCPU queue - */ -#define KBASE_TLSTREAM_TL_EVENT_KCPUQUEUE_EXECUTE_ERRORBARRIER( \ - kbdev, \ - kcpu_queue \ - ) \ - do { \ - int enabled = atomic_read(&kbdev->timeline_is_enabled); \ - if (enabled & TLSTREAM_ENABLED) \ - __kbase_tlstream_tl_event_kcpuqueue_execute_errorbarrier( \ - __TL_DISPATCH_STREAM(kbdev, obj), \ - kcpu_queue); \ - } while (0) - - -/* Gator tracepoints are hooked into TLSTREAM interface. - * When the following tracepoints are called, corresponding - * Gator tracepoint will be called as well. - */ - -#if defined(CONFIG_MALI_BIFROST_GATOR_SUPPORT) -/* `event` is one of TL_JS_EVENT values here. - * The values of TL_JS_EVENT are guaranteed to match - * with corresponding GATOR_JOB_SLOT values. - */ -#undef KBASE_TLSTREAM_AUX_EVENT_JOB_SLOT -#define KBASE_TLSTREAM_AUX_EVENT_JOB_SLOT(kbdev, \ - context, slot_nr, atom_nr, event) \ - do { \ - int enabled = atomic_read(&kbdev->timeline_is_enabled); \ - kbase_trace_mali_job_slots_event(kbdev->id, \ - GATOR_MAKE_EVENT(event, slot_nr), \ - context, (u8) atom_nr); \ - if (enabled & TLSTREAM_ENABLED) \ - __kbase_tlstream_aux_event_job_slot( \ - __TL_DISPATCH_STREAM(kbdev, aux), \ - context, slot_nr, atom_nr, event); \ - } while (0) - -#undef KBASE_TLSTREAM_AUX_PM_STATE -#define KBASE_TLSTREAM_AUX_PM_STATE(kbdev, core_type, state) \ - do { \ - int enabled = atomic_read(&kbdev->timeline_is_enabled); \ - kbase_trace_mali_pm_status(kbdev->id, \ - core_type, state); \ - if (enabled & TLSTREAM_ENABLED) \ - __kbase_tlstream_aux_pm_state( \ - __TL_DISPATCH_STREAM(kbdev, aux), \ - core_type, state); \ - } while (0) - -#undef KBASE_TLSTREAM_AUX_PAGEFAULT -#define KBASE_TLSTREAM_AUX_PAGEFAULT(kbdev, \ - ctx_nr, as_nr, page_cnt_change) \ - do { \ - int enabled = atomic_read(&kbdev->timeline_is_enabled); \ - kbase_trace_mali_page_fault_insert_pages(kbdev->id, \ - as_nr, \ - page_cnt_change); \ - if (enabled & TLSTREAM_ENABLED) \ - __kbase_tlstream_aux_pagefault( \ - __TL_DISPATCH_STREAM(kbdev, aux), \ - ctx_nr, as_nr, page_cnt_change); \ - } while (0) - -/* kbase_trace_mali_total_alloc_pages_change is handled differently here. - * We stream the total amount of pages allocated for `kbdev` rather - * than `page_count`, which is per-context. - */ -#undef KBASE_TLSTREAM_AUX_PAGESALLOC -#define KBASE_TLSTREAM_AUX_PAGESALLOC(kbdev, ctx_nr, page_cnt) \ - do { \ - int enabled = atomic_read(&kbdev->timeline_is_enabled); \ - u32 global_pages_count = \ - atomic_read(&kbdev->memdev.used_pages); \ - \ - kbase_trace_mali_total_alloc_pages_change(kbdev->id, \ - global_pages_count); \ - if (enabled & TLSTREAM_ENABLED) \ - __kbase_tlstream_aux_pagesalloc( \ - __TL_DISPATCH_STREAM(kbdev, aux), \ - ctx_nr, page_cnt); \ - } while (0) -#endif /* CONFIG_MALI_BIFROST_GATOR_SUPPORT */ - -/* clang-format on */ -#endif diff --git a/drivers/gpu/arm/bifrost/mali_kbase_vinstr.c b/drivers/gpu/arm/bifrost/mali_kbase_vinstr.c index 5e3b74d9d79b..d96b565a966e 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_vinstr.c +++ b/drivers/gpu/arm/bifrost/mali_kbase_vinstr.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2011-2019 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2011-2020 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -139,10 +139,7 @@ static const struct file_operations vinstr_client_fops = { */ static u64 kbasep_vinstr_timestamp_ns(void) { - struct timespec ts; - - getrawmonotonic(&ts); - return (u64)ts.tv_sec * NSEC_PER_SEC + ts.tv_nsec; + return ktime_get_raw_ns(); } /** @@ -574,16 +571,6 @@ int kbase_vinstr_hwcnt_reader_setup( if (errcode) goto error; - errcode = anon_inode_getfd( - "[mali_vinstr_desc]", - &vinstr_client_fops, - vcli, - O_RDONLY | O_CLOEXEC); - if (errcode < 0) - goto error; - - fd = errcode; - /* Add the new client. No need to reschedule worker, as not periodic */ mutex_lock(&vctx->lock); @@ -592,7 +579,26 @@ int kbase_vinstr_hwcnt_reader_setup( mutex_unlock(&vctx->lock); + /* Expose to user-space only once the client is fully initialized */ + errcode = anon_inode_getfd( + "[mali_vinstr_desc]", + &vinstr_client_fops, + vcli, + O_RDONLY | O_CLOEXEC); + if (errcode < 0) + goto client_installed_error; + + fd = errcode; + return fd; + +client_installed_error: + mutex_lock(&vctx->lock); + + vctx->client_count--; + list_del(&vcli->node); + + mutex_unlock(&vctx->lock); error: kbasep_vinstr_client_destroy(vcli); return errcode; diff --git a/drivers/gpu/arm/bifrost/mali_linux_kbase_trace.h b/drivers/gpu/arm/bifrost/mali_linux_kbase_trace.h deleted file mode 100644 index 6c6a8c6a5b43..000000000000 --- a/drivers/gpu/arm/bifrost/mali_linux_kbase_trace.h +++ /dev/null @@ -1,204 +0,0 @@ -/* - * - * (C) COPYRIGHT 2014,2018 ARM Limited. All rights reserved. - * - * This program is free software and is provided to you under the terms of the - * GNU General Public License version 2 as published by the Free Software - * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, you can access it online at - * http://www.gnu.org/licenses/gpl-2.0.html. - * - * SPDX-License-Identifier: GPL-2.0 - * - */ - - -#if !defined(_TRACE_MALI_KBASE_H) || defined(TRACE_HEADER_MULTI_READ) -#define _TRACE_MALI_KBASE_H - -#undef TRACE_SYSTEM -#define TRACE_SYSTEM mali - -#include - -DECLARE_EVENT_CLASS(mali_slot_template, - TP_PROTO(int jobslot, unsigned int info_val), - TP_ARGS(jobslot, info_val), - TP_STRUCT__entry( - __field(unsigned int, jobslot) - __field(unsigned int, info_val) - ), - TP_fast_assign( - __entry->jobslot = jobslot; - __entry->info_val = info_val; - ), - TP_printk("jobslot=%u info=%u", __entry->jobslot, __entry->info_val) -); - -#define DEFINE_MALI_SLOT_EVENT(name) \ -DEFINE_EVENT(mali_slot_template, mali_##name, \ - TP_PROTO(int jobslot, unsigned int info_val), \ - TP_ARGS(jobslot, info_val)) -DEFINE_MALI_SLOT_EVENT(JM_SUBMIT); -DEFINE_MALI_SLOT_EVENT(JM_JOB_DONE); -DEFINE_MALI_SLOT_EVENT(JM_UPDATE_HEAD); -DEFINE_MALI_SLOT_EVENT(JM_CHECK_HEAD); -DEFINE_MALI_SLOT_EVENT(JM_SOFTSTOP); -DEFINE_MALI_SLOT_EVENT(JM_SOFTSTOP_0); -DEFINE_MALI_SLOT_EVENT(JM_SOFTSTOP_1); -DEFINE_MALI_SLOT_EVENT(JM_HARDSTOP); -DEFINE_MALI_SLOT_EVENT(JM_HARDSTOP_0); -DEFINE_MALI_SLOT_EVENT(JM_HARDSTOP_1); -DEFINE_MALI_SLOT_EVENT(JM_SLOT_SOFT_OR_HARD_STOP); -DEFINE_MALI_SLOT_EVENT(JM_SLOT_EVICT); -DEFINE_MALI_SLOT_EVENT(JM_BEGIN_RESET_WORKER); -DEFINE_MALI_SLOT_EVENT(JM_END_RESET_WORKER); -DEFINE_MALI_SLOT_EVENT(JS_CORE_REF_REGISTER_ON_RECHECK_FAILED); -DEFINE_MALI_SLOT_EVENT(JS_AFFINITY_SUBMIT_TO_BLOCKED); -DEFINE_MALI_SLOT_EVENT(JS_AFFINITY_CURRENT); -DEFINE_MALI_SLOT_EVENT(JD_DONE_TRY_RUN_NEXT_JOB); -DEFINE_MALI_SLOT_EVENT(JS_CORE_REF_REQUEST_CORES_FAILED); -DEFINE_MALI_SLOT_EVENT(JS_CORE_REF_REGISTER_INUSE_FAILED); -DEFINE_MALI_SLOT_EVENT(JS_CORE_REF_REQUEST_ON_RECHECK_FAILED); -DEFINE_MALI_SLOT_EVENT(JS_CORE_REF_AFFINITY_WOULD_VIOLATE); -DEFINE_MALI_SLOT_EVENT(JS_JOB_DONE_TRY_RUN_NEXT_JOB); -DEFINE_MALI_SLOT_EVENT(JS_JOB_DONE_RETRY_NEEDED); -DEFINE_MALI_SLOT_EVENT(JS_POLICY_DEQUEUE_JOB); -DEFINE_MALI_SLOT_EVENT(JS_POLICY_DEQUEUE_JOB_IRQ); -#undef DEFINE_MALI_SLOT_EVENT - -DECLARE_EVENT_CLASS(mali_refcount_template, - TP_PROTO(int refcount, unsigned int info_val), - TP_ARGS(refcount, info_val), - TP_STRUCT__entry( - __field(unsigned int, refcount) - __field(unsigned int, info_val) - ), - TP_fast_assign( - __entry->refcount = refcount; - __entry->info_val = info_val; - ), - TP_printk("refcount=%u info=%u", __entry->refcount, __entry->info_val) -); - -#define DEFINE_MALI_REFCOUNT_EVENT(name) \ -DEFINE_EVENT(mali_refcount_template, mali_##name, \ - TP_PROTO(int refcount, unsigned int info_val), \ - TP_ARGS(refcount, info_val)) -DEFINE_MALI_REFCOUNT_EVENT(JS_RETAIN_CTX_NOLOCK); -DEFINE_MALI_REFCOUNT_EVENT(JS_ADD_JOB); -DEFINE_MALI_REFCOUNT_EVENT(JS_REMOVE_JOB); -DEFINE_MALI_REFCOUNT_EVENT(JS_RETAIN_CTX); -DEFINE_MALI_REFCOUNT_EVENT(JS_RELEASE_CTX); -DEFINE_MALI_REFCOUNT_EVENT(JS_TRY_SCHEDULE_HEAD_CTX); -DEFINE_MALI_REFCOUNT_EVENT(JS_POLICY_INIT_CTX); -DEFINE_MALI_REFCOUNT_EVENT(JS_POLICY_TERM_CTX); -DEFINE_MALI_REFCOUNT_EVENT(JS_POLICY_ENQUEUE_CTX); -DEFINE_MALI_REFCOUNT_EVENT(JS_POLICY_DEQUEUE_HEAD_CTX); -DEFINE_MALI_REFCOUNT_EVENT(JS_POLICY_TRY_EVICT_CTX); -DEFINE_MALI_REFCOUNT_EVENT(JS_POLICY_RUNPOOL_ADD_CTX); -DEFINE_MALI_REFCOUNT_EVENT(JS_POLICY_RUNPOOL_REMOVE_CTX); -DEFINE_MALI_REFCOUNT_EVENT(JS_POLICY_FOREACH_CTX_JOBS); -DEFINE_MALI_REFCOUNT_EVENT(PM_CONTEXT_ACTIVE); -DEFINE_MALI_REFCOUNT_EVENT(PM_CONTEXT_IDLE); -#undef DEFINE_MALI_REFCOUNT_EVENT - -DECLARE_EVENT_CLASS(mali_add_template, - TP_PROTO(int gpu_addr, unsigned int info_val), - TP_ARGS(gpu_addr, info_val), - TP_STRUCT__entry( - __field(unsigned int, gpu_addr) - __field(unsigned int, info_val) - ), - TP_fast_assign( - __entry->gpu_addr = gpu_addr; - __entry->info_val = info_val; - ), - TP_printk("gpu_addr=%u info=%u", __entry->gpu_addr, __entry->info_val) -); - -#define DEFINE_MALI_ADD_EVENT(name) \ -DEFINE_EVENT(mali_add_template, mali_##name, \ - TP_PROTO(int gpu_addr, unsigned int info_val), \ - TP_ARGS(gpu_addr, info_val)) -DEFINE_MALI_ADD_EVENT(CORE_CTX_DESTROY); -DEFINE_MALI_ADD_EVENT(CORE_CTX_HWINSTR_TERM); -DEFINE_MALI_ADD_EVENT(CORE_GPU_IRQ); -DEFINE_MALI_ADD_EVENT(CORE_GPU_IRQ_CLEAR); -DEFINE_MALI_ADD_EVENT(CORE_GPU_IRQ_DONE); -DEFINE_MALI_ADD_EVENT(CORE_GPU_SOFT_RESET); -DEFINE_MALI_ADD_EVENT(CORE_GPU_HARD_RESET); -DEFINE_MALI_ADD_EVENT(CORE_GPU_PRFCNT_SAMPLE); -DEFINE_MALI_ADD_EVENT(CORE_GPU_PRFCNT_CLEAR); -DEFINE_MALI_ADD_EVENT(CORE_GPU_CLEAN_INV_CACHES); -DEFINE_MALI_ADD_EVENT(JD_DONE_WORKER); -DEFINE_MALI_ADD_EVENT(JD_DONE_WORKER_END); -DEFINE_MALI_ADD_EVENT(JD_CANCEL_WORKER); -DEFINE_MALI_ADD_EVENT(JD_DONE); -DEFINE_MALI_ADD_EVENT(JD_CANCEL); -DEFINE_MALI_ADD_EVENT(JD_ZAP_CONTEXT); -DEFINE_MALI_ADD_EVENT(JM_IRQ); -DEFINE_MALI_ADD_EVENT(JM_IRQ_END); -DEFINE_MALI_ADD_EVENT(JM_FLUSH_WORKQS); -DEFINE_MALI_ADD_EVENT(JM_FLUSH_WORKQS_DONE); -DEFINE_MALI_ADD_EVENT(JM_ZAP_NON_SCHEDULED); -DEFINE_MALI_ADD_EVENT(JM_ZAP_SCHEDULED); -DEFINE_MALI_ADD_EVENT(JM_ZAP_DONE); -DEFINE_MALI_ADD_EVENT(JM_SUBMIT_AFTER_RESET); -DEFINE_MALI_ADD_EVENT(JM_JOB_COMPLETE); -DEFINE_MALI_ADD_EVENT(JS_CTX_ATTR_NOW_ON_RUNPOOL); -DEFINE_MALI_ADD_EVENT(JS_CTX_ATTR_NOW_OFF_RUNPOOL); -DEFINE_MALI_ADD_EVENT(JS_CTX_ATTR_NOW_ON_CTX); -DEFINE_MALI_ADD_EVENT(JS_CTX_ATTR_NOW_OFF_CTX); -DEFINE_MALI_ADD_EVENT(JS_POLICY_TIMER_END); -DEFINE_MALI_ADD_EVENT(JS_POLICY_TIMER_START); -DEFINE_MALI_ADD_EVENT(JS_POLICY_ENQUEUE_JOB); -DEFINE_MALI_ADD_EVENT(PM_CORES_CHANGE_DESIRED); -DEFINE_MALI_ADD_EVENT(PM_JOB_SUBMIT_AFTER_POWERING_UP); -DEFINE_MALI_ADD_EVENT(PM_JOB_SUBMIT_AFTER_POWERED_UP); -DEFINE_MALI_ADD_EVENT(PM_PWRON); -DEFINE_MALI_ADD_EVENT(PM_PWRON_TILER); -DEFINE_MALI_ADD_EVENT(PM_PWRON_L2); -DEFINE_MALI_ADD_EVENT(PM_PWROFF); -DEFINE_MALI_ADD_EVENT(PM_PWROFF_TILER); -DEFINE_MALI_ADD_EVENT(PM_PWROFF_L2); -DEFINE_MALI_ADD_EVENT(PM_CORES_POWERED); -DEFINE_MALI_ADD_EVENT(PM_CORES_POWERED_TILER); -DEFINE_MALI_ADD_EVENT(PM_CORES_POWERED_L2); -DEFINE_MALI_ADD_EVENT(PM_DESIRED_REACHED); -DEFINE_MALI_ADD_EVENT(PM_DESIRED_REACHED_TILER); -DEFINE_MALI_ADD_EVENT(PM_REQUEST_CHANGE_SHADER_NEEDED); -DEFINE_MALI_ADD_EVENT(PM_REQUEST_CHANGE_TILER_NEEDED); -DEFINE_MALI_ADD_EVENT(PM_RELEASE_CHANGE_SHADER_NEEDED); -DEFINE_MALI_ADD_EVENT(PM_RELEASE_CHANGE_TILER_NEEDED); -DEFINE_MALI_ADD_EVENT(PM_CORES_AVAILABLE); -DEFINE_MALI_ADD_EVENT(PM_CORES_AVAILABLE_TILER); -DEFINE_MALI_ADD_EVENT(PM_CORES_CHANGE_AVAILABLE); -DEFINE_MALI_ADD_EVENT(PM_CORES_CHANGE_AVAILABLE_TILER); -DEFINE_MALI_ADD_EVENT(PM_GPU_ON); -DEFINE_MALI_ADD_EVENT(PM_GPU_OFF); -DEFINE_MALI_ADD_EVENT(PM_SET_POLICY); -DEFINE_MALI_ADD_EVENT(PM_CURRENT_POLICY_INIT); -DEFINE_MALI_ADD_EVENT(PM_CURRENT_POLICY_TERM); -DEFINE_MALI_ADD_EVENT(PM_CA_SET_POLICY); -DEFINE_MALI_ADD_EVENT(PM_WAKE_WAITERS); -#undef DEFINE_MALI_ADD_EVENT - -#endif /* _TRACE_MALI_KBASE_H */ - -#undef TRACE_INCLUDE_PATH -#undef linux -#define TRACE_INCLUDE_PATH . -#undef TRACE_INCLUDE_FILE -#define TRACE_INCLUDE_FILE mali_linux_kbase_trace - -/* This part must be outside protection */ -#include diff --git a/drivers/gpu/arm/bifrost/mali_linux_trace.h b/drivers/gpu/arm/bifrost/mali_linux_trace.h index 96296ac62b94..783fd2ede1a4 100644 --- a/drivers/gpu/arm/bifrost/mali_linux_trace.h +++ b/drivers/gpu/arm/bifrost/mali_linux_trace.h @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2011-2019 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2011-2016, 2018-2020 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -20,15 +20,15 @@ * */ +#undef TRACE_SYSTEM +#define TRACE_SYSTEM mali + #if !defined(_TRACE_MALI_H) || defined(TRACE_HEADER_MULTI_READ) #define _TRACE_MALI_H -#undef TRACE_SYSTEM -#define TRACE_SYSTEM mali -#define TRACE_INCLUDE_FILE mali_linux_trace - #include +#if defined(CONFIG_MALI_BIFROST_GATOR_SUPPORT) #define MALI_JOB_SLOTS_EVENT_CHANGED /** @@ -127,12 +127,406 @@ TRACE_EVENT(mali_total_alloc_pages_change, ), TP_printk("gpu=%u event=%lld", __entry->gpu_id, __entry->event_id) ); +#endif /* CONFIG_MALI_BIFROST_GATOR_SUPPORT */ + +/* + * MMU subsystem tracepoints + */ + +/* Fault status and exception code helpers + * + * Must be macros to allow use by user-side tracepoint tools + * + * bits 0:1 masked off code, and used for the level + * + * Tracepoint files get included more than once - protect against multiple + * definition + */ +#ifndef __TRACE_MALI_MMU_HELPERS +#define __TRACE_MALI_MMU_HELPERS +/* Complex macros should be enclosed in parenthesis. + * + * We need to have those parentheses removed for our arrays of symbolic look-ups + * for __print_symbolic() whilst also being able to use them outside trace code + */ +#define _ENSURE_PARENTHESIS(args...) args + +#define KBASE_MMU_FAULT_CODE_EXCEPTION_NAME_PRINT(code) \ + (!KBASE_MMU_FAULT_CODE_VALID(code) ? "UNKNOWN,level=" : \ + __print_symbolic(((code) & ~3u), \ + KBASE_MMU_FAULT_CODE_SYMBOLIC_STRINGS)) +#define KBASE_MMU_FAULT_CODE_LEVEL(code) \ + (((((code) & ~0x3u) == 0xC4) ? 4 : 0) + ((code) & 0x3u)) + +#define KBASE_MMU_FAULT_STATUS_CODE(status) \ + ((status) & 0xFFu) +#define KBASE_MMU_FAULT_STATUS_DECODED_STRING(status) \ + (((status) & (1u << 10)) ? "DECODER_FAULT" : "SLAVE_FAULT") + +#define KBASE_MMU_FAULT_STATUS_EXCEPTION_NAME_PRINT(status) \ + KBASE_MMU_FAULT_CODE_EXCEPTION_NAME_PRINT( \ + KBASE_MMU_FAULT_STATUS_CODE(status)) + +#define KBASE_MMU_FAULT_STATUS_LEVEL(status) \ + KBASE_MMU_FAULT_CODE_LEVEL(KBASE_MMU_FAULT_STATUS_CODE(status)) + +#define KBASE_MMU_FAULT_STATUS_ACCESS(status) \ + ((status) & AS_FAULTSTATUS_ACCESS_TYPE_MASK) +#define KBASE_MMU_FAULT_ACCESS_SYMBOLIC_STRINGS _ENSURE_PARENTHESIS(\ + {AS_FAULTSTATUS_ACCESS_TYPE_ATOMIC, "ATOMIC" }, \ + {AS_FAULTSTATUS_ACCESS_TYPE_EX, "EXECUTE"}, \ + {AS_FAULTSTATUS_ACCESS_TYPE_READ, "READ" }, \ + {AS_FAULTSTATUS_ACCESS_TYPE_WRITE, "WRITE" }) +#define KBASE_MMU_FAULT_STATUS_ACCESS_PRINT(status) \ + __print_symbolic(KBASE_MMU_FAULT_STATUS_ACCESS(status), \ + KBASE_MMU_FAULT_ACCESS_SYMBOLIC_STRINGS) + +#define KBASE_MMU_FAULT_CODE_VALID(code) \ + ((code >= 0xC0 && code <= 0xEF) && \ + (!(code >= 0xC5 && code <= 0xC6)) && \ + (!(code >= 0xCC && code <= 0xCF)) && \ + (!(code >= 0xD4 && code <= 0xD7)) && \ + (!(code >= 0xDC && code <= 0xDF))) +#define KBASE_MMU_FAULT_CODE_SYMBOLIC_STRINGS _ENSURE_PARENTHESIS(\ + {0xC0, "TRANSLATION_FAULT_" }, \ + {0xC4, "TRANSLATION_FAULT(_7==_IDENTITY)_" }, \ + {0xC8, "PERMISSION_FAULT_" }, \ + {0xD0, "TRANSTAB_BUS_FAULT_" }, \ + {0xD8, "ACCESS_FLAG_" }, \ + {0xE0, "ADDRESS_SIZE_FAULT_IN" }, \ + {0xE4, "ADDRESS_SIZE_FAULT_OUT" }, \ + {0xE8, "MEMORY_ATTRIBUTES_FAULT_" }, \ + {0xEC, "MEMORY_ATTRIBUTES_NONCACHEABLE_" }) +#endif /* __TRACE_MALI_MMU_HELPERS */ + +/* trace_mali_mmu_page_fault_grow + * + * Tracepoint about a successful grow of a region due to a GPU page fault + */ +TRACE_EVENT(mali_mmu_page_fault_grow, + TP_PROTO(struct kbase_va_region *reg, struct kbase_fault *fault, + size_t new_pages), + TP_ARGS(reg, fault, new_pages), + TP_STRUCT__entry( + __field(u64, start_addr) + __field(u64, fault_addr) + __field(u64, fault_extra_addr) + __field(size_t, new_pages) + __field(u32, status) + ), + TP_fast_assign( + __entry->start_addr = ((u64)reg->start_pfn) << PAGE_SHIFT; + __entry->fault_addr = fault->addr; + __entry->fault_extra_addr = fault->extra_addr; + __entry->new_pages = new_pages; + __entry->status = fault->status; + ), + TP_printk("start=0x%llx fault_addr=0x%llx fault_extra_addr=0x%llx new_pages=%zu raw_fault_status=0x%x decoded_faultstatus=%s exception_type=0x%x,%s%u access_type=0x%x,%s source_id=0x%x", + __entry->start_addr, __entry->fault_addr, + __entry->fault_extra_addr, __entry->new_pages, + __entry->status, + KBASE_MMU_FAULT_STATUS_DECODED_STRING(__entry->status), + KBASE_MMU_FAULT_STATUS_CODE(__entry->status), + KBASE_MMU_FAULT_STATUS_EXCEPTION_NAME_PRINT(__entry->status), + KBASE_MMU_FAULT_STATUS_LEVEL(__entry->status), + KBASE_MMU_FAULT_STATUS_ACCESS(__entry->status) >> 8, + KBASE_MMU_FAULT_STATUS_ACCESS_PRINT(__entry->status), + __entry->status >> 16) +); + + + + +/* + * Just-in-time memory allocation subsystem tracepoints + */ + +/* Just-in-time memory allocation soft-job template. Override the TP_printk + * further if need be. jit_id can be 0. + */ +DECLARE_EVENT_CLASS(mali_jit_softjob_template, + TP_PROTO(struct kbase_va_region *reg, u8 jit_id), + TP_ARGS(reg, jit_id), + TP_STRUCT__entry( + __field(u64, start_addr) + __field(size_t, nr_pages) + __field(size_t, backed_pages) + __field(u8, jit_id) + ), + TP_fast_assign( + __entry->start_addr = ((u64)reg->start_pfn) << PAGE_SHIFT; + __entry->nr_pages = reg->nr_pages; + __entry->backed_pages = kbase_reg_current_backed_size(reg); + __entry->jit_id = jit_id; + ), + TP_printk("jit_id=%u start=0x%llx va_pages=0x%zx backed_size=0x%zx", + __entry->jit_id, __entry->start_addr, __entry->nr_pages, + __entry->backed_pages) +); + +/* trace_mali_jit_alloc() + * + * Tracepoint about a just-in-time memory allocation soft-job successfully + * allocating memory + */ +DEFINE_EVENT(mali_jit_softjob_template, mali_jit_alloc, + TP_PROTO(struct kbase_va_region *reg, u8 jit_id), + TP_ARGS(reg, jit_id)); + +/* trace_mali_jit_free() + * + * Tracepoint about memory that was allocated just-in-time being freed + * (which may happen either on free soft-job, or during rollback error + * paths of an allocation soft-job, etc) + * + * Free doesn't immediately have the just-in-time memory allocation ID so + * it's currently suppressed from the output - set jit_id to 0 + */ +DEFINE_EVENT_PRINT(mali_jit_softjob_template, mali_jit_free, + TP_PROTO(struct kbase_va_region *reg, u8 jit_id), + TP_ARGS(reg, jit_id), + TP_printk("start=0x%llx va_pages=0x%zx backed_size=0x%zx", + __entry->start_addr, __entry->nr_pages, __entry->backed_pages)); + +#if MALI_JIT_PRESSURE_LIMIT +/* trace_mali_jit_report + * + * Tracepoint about the GPU data structure read to form a just-in-time memory + * allocation report, and its calculated physical page usage + */ +TRACE_EVENT(mali_jit_report, + TP_PROTO(struct kbase_jd_atom *katom, struct kbase_va_region *reg, + unsigned int id_idx, u64 read_val, u64 used_pages), + TP_ARGS(katom, reg, id_idx, read_val, used_pages), + TP_STRUCT__entry( + __field(u64, start_addr) + __field(u64, read_val) + __field(u64, used_pages) + __field(unsigned long, flags) + __field(u8, id_idx) + __field(u8, jit_id) + ), + TP_fast_assign( + __entry->start_addr = ((u64)reg->start_pfn) << PAGE_SHIFT; + __entry->read_val = read_val; + __entry->used_pages = used_pages; + __entry->flags = reg->flags; + __entry->id_idx = id_idx; + __entry->jit_id = katom->jit_ids[id_idx]; + ), + TP_printk("start=0x%llx jit_ids[%u]=%u read_type='%s' read_val=0x%llx used_pages=%llu", + __entry->start_addr, __entry->id_idx, __entry->jit_id, + __print_symbolic(__entry->flags, + { 0, "address"}, + { KBASE_REG_TILER_ALIGN_TOP, "address with align" }, + { KBASE_REG_HEAP_INFO_IS_SIZE, "size" }, + { KBASE_REG_HEAP_INFO_IS_SIZE | + KBASE_REG_TILER_ALIGN_TOP, + "size with align (invalid)" } + ), + __entry->read_val, __entry->used_pages) +); +#endif /* MALI_JIT_PRESSURE_LIMIT */ + +#if (KERNEL_VERSION(4, 1, 0) <= LINUX_VERSION_CODE) +TRACE_DEFINE_ENUM(KBASE_JIT_REPORT_ON_ALLOC_OR_FREE); +#endif + +#if MALI_JIT_PRESSURE_LIMIT +/* trace_mali_jit_report_pressure + * + * Tracepoint about change in physical memory pressure, due to the information + * about a region changing. Examples include: + * - a report on a region that was allocated just-in-time + * - just-in-time allocation of a region + * - free of a region that was allocated just-in-time + */ +TRACE_EVENT(mali_jit_report_pressure, + TP_PROTO(struct kbase_va_region *reg, u64 new_used_pages, + u64 new_pressure, unsigned int flags), + TP_ARGS(reg, new_used_pages, new_pressure, flags), + TP_STRUCT__entry( + __field(u64, start_addr) + __field(u64, used_pages) + __field(u64, new_used_pages) + __field(u64, new_pressure) + __field(unsigned int, flags) + ), + TP_fast_assign( + __entry->start_addr = ((u64)reg->start_pfn) << PAGE_SHIFT; + __entry->used_pages = reg->used_pages; + __entry->new_used_pages = new_used_pages; + __entry->new_pressure = new_pressure; + __entry->flags = flags; + ), + TP_printk("start=0x%llx old_used_pages=%llu new_used_pages=%llu new_pressure=%llu report_flags=%s", + __entry->start_addr, __entry->used_pages, + __entry->new_used_pages, __entry->new_pressure, + __print_flags(__entry->flags, "|", + { KBASE_JIT_REPORT_ON_ALLOC_OR_FREE, + "HAPPENED_ON_ALLOC_OR_FREE" })) +); +#endif /* MALI_JIT_PRESSURE_LIMIT */ + +#ifndef __TRACE_SYSGRAPH_ENUM +#define __TRACE_SYSGRAPH_ENUM +/* Enum of sysgraph message IDs */ +enum sysgraph_msg { + SGR_ARRIVE, + SGR_DEP_RES, + SGR_SUBMIT, + SGR_COMPLETE, + SGR_POST, + SGR_ACTIVE, + SGR_INACTIVE +}; +#endif /* __TRACE_SYSGRAPH_ENUM */ + +/* A template for SYSGRAPH events + * + * Most of the sysgraph events contain only one input argument + * which is atom_id therefore they will be using a common template + */ +TRACE_EVENT(sysgraph, + TP_PROTO(enum sysgraph_msg message, unsigned int proc_id, + unsigned int atom_id), + TP_ARGS(message, proc_id, atom_id), + TP_STRUCT__entry( + __field(unsigned int, proc_id) + __field(enum sysgraph_msg, message) + __field(unsigned int, atom_id) + ), + TP_fast_assign( + __entry->proc_id = proc_id; + __entry->message = message; + __entry->atom_id = atom_id; + ), + TP_printk("msg=%u proc_id=%u, param1=%d\n", __entry->message, + __entry->proc_id, __entry->atom_id) +); + +/* A template for SYSGRAPH GPU events + * + * Sysgraph events that record start/complete events + * on GPU also record a js value in addition to the + * atom id. + */ +TRACE_EVENT(sysgraph_gpu, + TP_PROTO(enum sysgraph_msg message, unsigned int proc_id, + unsigned int atom_id, unsigned int js), + TP_ARGS(message, proc_id, atom_id, js), + TP_STRUCT__entry( + __field(unsigned int, proc_id) + __field(enum sysgraph_msg, message) + __field(unsigned int, atom_id) + __field(unsigned int, js) + ), + TP_fast_assign( + __entry->proc_id = proc_id; + __entry->message = message; + __entry->atom_id = atom_id; + __entry->js = js; + ), + TP_printk("msg=%u proc_id=%u, param1=%d, param2=%d\n", + __entry->message, __entry->proc_id, + __entry->atom_id, __entry->js) +); + +/* Tracepoint files get included more than once - protect against multiple + * definition + */ +#undef KBASE_JIT_REPORT_GPU_MEM_SIZE + +/* Size in bytes of the memory surrounding the location used for a just-in-time + * memory allocation report + */ +#define KBASE_JIT_REPORT_GPU_MEM_SIZE (4 * sizeof(u64)) + +/* trace_mali_jit_report_gpu_mem + * + * Tracepoint about the GPU memory nearby the location used for a just-in-time + * memory allocation report + */ +TRACE_EVENT(mali_jit_report_gpu_mem, + TP_PROTO(u64 base_addr, u64 reg_addr, u64 *gpu_mem, unsigned int flags), + TP_ARGS(base_addr, reg_addr, gpu_mem, flags), + TP_STRUCT__entry( + __field(u64, base_addr) + __field(u64, reg_addr) + __array(u64, mem_values, + KBASE_JIT_REPORT_GPU_MEM_SIZE / sizeof(u64)) + __field(unsigned int, flags) + ), + TP_fast_assign( + __entry->base_addr = base_addr; + __entry->reg_addr = reg_addr; + memcpy(__entry->mem_values, gpu_mem, + sizeof(__entry->mem_values)); + __entry->flags = flags; + ), + TP_printk("start=0x%llx read GPU memory base=0x%llx values=%s report_flags=%s", + __entry->reg_addr, __entry->base_addr, + __print_array(__entry->mem_values, + ARRAY_SIZE(__entry->mem_values), sizeof(u64)), + __print_flags(__entry->flags, "|", + { KBASE_JIT_REPORT_ON_ALLOC_OR_FREE, + "HAPPENED_ON_ALLOC_OR_FREE" })) +); + +/* trace_mali_jit_trim_from_region + * + * Tracepoint about trimming physical pages from a region + */ +TRACE_EVENT(mali_jit_trim_from_region, + TP_PROTO(struct kbase_va_region *reg, size_t freed_pages, + size_t old_pages, size_t available_pages, size_t new_pages), + TP_ARGS(reg, freed_pages, old_pages, available_pages, new_pages), + TP_STRUCT__entry( + __field(u64, start_addr) + __field(size_t, freed_pages) + __field(size_t, old_pages) + __field(size_t, available_pages) + __field(size_t, new_pages) + ), + TP_fast_assign( + __entry->start_addr = ((u64)reg->start_pfn) << PAGE_SHIFT; + __entry->freed_pages = freed_pages; + __entry->old_pages = old_pages; + __entry->available_pages = available_pages; + __entry->new_pages = new_pages; + ), + TP_printk("start=0x%llx freed_pages=%zu old_pages=%zu available_pages=%zu new_pages=%zu", + __entry->start_addr, __entry->freed_pages, __entry->old_pages, + __entry->available_pages, __entry->new_pages) +); + +/* trace_mali_jit_trim + * + * Tracepoint about total trimmed physical pages + */ +TRACE_EVENT(mali_jit_trim, + TP_PROTO(size_t freed_pages), + TP_ARGS(freed_pages), + TP_STRUCT__entry( + __field(size_t, freed_pages) + ), + TP_fast_assign( + __entry->freed_pages = freed_pages; + ), + TP_printk("freed_pages=%zu", __entry->freed_pages) +); + +#include "mali_kbase_debug_linux_ktrace.h" #endif /* _TRACE_MALI_H */ #undef TRACE_INCLUDE_PATH -#undef linux +/* lwn.net/Articles/383362 suggests this should remain as '.', and instead + * extend CFLAGS + */ #define TRACE_INCLUDE_PATH . +#undef TRACE_INCLUDE_FILE +#define TRACE_INCLUDE_FILE mali_linux_trace /* This part must be outside protection */ #include diff --git a/drivers/gpu/arm/bifrost/mmu/backend/mali_kbase_mmu_jm.c b/drivers/gpu/arm/bifrost/mmu/backend/mali_kbase_mmu_jm.c new file mode 100644 index 000000000000..2d8fb51d11ac --- /dev/null +++ b/drivers/gpu/arm/bifrost/mmu/backend/mali_kbase_mmu_jm.c @@ -0,0 +1,424 @@ +/* + * + * (C) COPYRIGHT 2019-2020 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +/** + * Base kernel MMU management specific for Job Manager GPU. + */ + +#include +#include +#include +#include +#include +#include "../mali_kbase_mmu_internal.h" +#include "mali_kbase_device_internal.h" + +void kbase_mmu_get_as_setup(struct kbase_mmu_table *mmut, + struct kbase_mmu_setup * const setup) +{ + /* Set up the required caching policies at the correct indices + * in the memattr register. + */ + setup->memattr = + (AS_MEMATTR_IMPL_DEF_CACHE_POLICY << + (AS_MEMATTR_INDEX_IMPL_DEF_CACHE_POLICY * 8)) | + (AS_MEMATTR_FORCE_TO_CACHE_ALL << + (AS_MEMATTR_INDEX_FORCE_TO_CACHE_ALL * 8)) | + (AS_MEMATTR_WRITE_ALLOC << + (AS_MEMATTR_INDEX_WRITE_ALLOC * 8)) | + (AS_MEMATTR_AARCH64_OUTER_IMPL_DEF << + (AS_MEMATTR_INDEX_OUTER_IMPL_DEF * 8)) | + (AS_MEMATTR_AARCH64_OUTER_WA << + (AS_MEMATTR_INDEX_OUTER_WA * 8)) | + (AS_MEMATTR_AARCH64_NON_CACHEABLE << + (AS_MEMATTR_INDEX_NON_CACHEABLE * 8)); + + setup->transtab = (u64)mmut->pgd & AS_TRANSTAB_BASE_MASK; + setup->transcfg = AS_TRANSCFG_ADRMODE_AARCH64_4K; +} + +void kbase_gpu_report_bus_fault_and_kill(struct kbase_context *kctx, + struct kbase_as *as, struct kbase_fault *fault) +{ + struct kbase_device *const kbdev = kctx->kbdev; + u32 const status = fault->status; + u32 const exception_type = (status & 0xFF); + u32 const exception_data = (status >> 8) & 0xFFFFFF; + int const as_no = as->number; + unsigned long flags; + + /* terminal fault, print info about the fault */ + dev_err(kbdev->dev, + "GPU bus fault in AS%d at VA 0x%016llX\n" + "raw fault status: 0x%X\n" + "exception type 0x%X: %s\n" + "exception data 0x%X\n" + "pid: %d\n", + as_no, fault->addr, + status, + exception_type, kbase_gpu_exception_name(exception_type), + exception_data, + kctx->pid); + + /* switch to UNMAPPED mode, will abort all jobs and stop any hw counter + * dumping AS transaction begin + */ + mutex_lock(&kbdev->mmu_hw_mutex); + + /* Set the MMU into unmapped mode */ + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + kbase_mmu_disable(kctx); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + + mutex_unlock(&kbdev->mmu_hw_mutex); + /* AS transaction end */ + + kbase_mmu_hw_clear_fault(kbdev, as, + KBASE_MMU_FAULT_TYPE_BUS_UNEXPECTED); + kbase_mmu_hw_enable_fault(kbdev, as, + KBASE_MMU_FAULT_TYPE_BUS_UNEXPECTED); +} + +/** + * The caller must ensure it's retained the ctx to prevent it from being + * scheduled out whilst it's being worked on. + */ +void kbase_mmu_report_fault_and_kill(struct kbase_context *kctx, + struct kbase_as *as, const char *reason_str, + struct kbase_fault *fault) +{ + unsigned long flags; + u32 exception_type; + u32 access_type; + u32 source_id; + int as_no; + struct kbase_device *kbdev; + struct kbasep_js_device_data *js_devdata; + + as_no = as->number; + kbdev = kctx->kbdev; + js_devdata = &kbdev->js_data; + + /* Make sure the context was active */ + if (WARN_ON(atomic_read(&kctx->refcount) <= 0)) + return; + + /* decode the fault status */ + exception_type = fault->status & 0xFF; + access_type = (fault->status >> 8) & 0x3; + source_id = (fault->status >> 16); + + /* terminal fault, print info about the fault */ + dev_err(kbdev->dev, + "Unhandled Page fault in AS%d at VA 0x%016llX\n" + "Reason: %s\n" + "raw fault status: 0x%X\n" + "exception type 0x%X: %s\n" + "access type 0x%X: %s\n" + "source id 0x%X\n" + "pid: %d\n", + as_no, fault->addr, + reason_str, + fault->status, + exception_type, kbase_gpu_exception_name(exception_type), + access_type, kbase_gpu_access_type_name(fault->status), + source_id, + kctx->pid); + + /* hardware counters dump fault handling */ + if ((kbdev->hwcnt.kctx) && (kbdev->hwcnt.kctx->as_nr == as_no) && + (kbdev->hwcnt.backend.state == + KBASE_INSTR_STATE_DUMPING)) { + if ((fault->addr >= kbdev->hwcnt.addr) && + (fault->addr < (kbdev->hwcnt.addr + + kbdev->hwcnt.addr_bytes))) + kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_FAULT; + } + + /* Stop the kctx from submitting more jobs and cause it to be scheduled + * out/rescheduled - this will occur on releasing the context's refcount + */ + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + kbasep_js_clear_submit_allowed(js_devdata, kctx); + + /* Kill any running jobs from the context. Submit is disallowed, so no + * more jobs from this context can appear in the job slots from this + * point on + */ + kbase_backend_jm_kill_running_jobs_from_kctx(kctx); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + + /* AS transaction begin */ + mutex_lock(&kbdev->mmu_hw_mutex); + + /* switch to UNMAPPED mode, will abort all jobs and stop + * any hw counter dumping + */ + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + kbase_mmu_disable(kctx); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + + mutex_unlock(&kbdev->mmu_hw_mutex); + + /* AS transaction end */ + /* Clear down the fault */ + kbase_mmu_hw_clear_fault(kbdev, as, + KBASE_MMU_FAULT_TYPE_PAGE_UNEXPECTED); + kbase_mmu_hw_enable_fault(kbdev, as, + KBASE_MMU_FAULT_TYPE_PAGE_UNEXPECTED); +} + +void kbase_mmu_interrupt_process(struct kbase_device *kbdev, + struct kbase_context *kctx, struct kbase_as *as, + struct kbase_fault *fault) +{ + lockdep_assert_held(&kbdev->hwaccess_lock); + + dev_dbg(kbdev->dev, + "Entering %s kctx %p, as %p\n", + __func__, (void *)kctx, (void *)as); + + if (!kctx) { + dev_warn(kbdev->dev, "%s in AS%d at 0x%016llx with no context present! Spurious IRQ or SW Design Error?\n", + kbase_as_has_bus_fault(as, fault) ? + "Bus error" : "Page fault", + as->number, fault->addr); + + /* Since no ctx was found, the MMU must be disabled. */ + WARN_ON(as->current_setup.transtab); + + if (kbase_as_has_bus_fault(as, fault)) { + kbase_mmu_hw_clear_fault(kbdev, as, + KBASE_MMU_FAULT_TYPE_BUS_UNEXPECTED); + kbase_mmu_hw_enable_fault(kbdev, as, + KBASE_MMU_FAULT_TYPE_BUS_UNEXPECTED); + } else if (kbase_as_has_page_fault(as, fault)) { + kbase_mmu_hw_clear_fault(kbdev, as, + KBASE_MMU_FAULT_TYPE_PAGE_UNEXPECTED); + kbase_mmu_hw_enable_fault(kbdev, as, + KBASE_MMU_FAULT_TYPE_PAGE_UNEXPECTED); + } + + return; + } + + if (kbase_as_has_bus_fault(as, fault)) { + struct kbasep_js_device_data *js_devdata = &kbdev->js_data; + + /* + * hw counters dumping in progress, signal the + * other thread that it failed + */ + if ((kbdev->hwcnt.kctx == kctx) && + (kbdev->hwcnt.backend.state == + KBASE_INSTR_STATE_DUMPING)) + kbdev->hwcnt.backend.state = + KBASE_INSTR_STATE_FAULT; + + /* + * Stop the kctx from submitting more jobs and cause it + * to be scheduled out/rescheduled when all references + * to it are released + */ + kbasep_js_clear_submit_allowed(js_devdata, kctx); + + if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_AARCH64_MMU)) + dev_warn(kbdev->dev, + "Bus error in AS%d at VA=0x%016llx, IPA=0x%016llx\n", + as->number, fault->addr, + fault->extra_addr); + else + dev_warn(kbdev->dev, "Bus error in AS%d at 0x%016llx\n", + as->number, fault->addr); + + /* + * We need to switch to UNMAPPED mode - but we do this in a + * worker so that we can sleep + */ + WARN_ON(!queue_work(as->pf_wq, &as->work_busfault)); + atomic_inc(&kbdev->faults_pending); + } else { + WARN_ON(!queue_work(as->pf_wq, &as->work_pagefault)); + atomic_inc(&kbdev->faults_pending); + } + + dev_dbg(kbdev->dev, + "Leaving %s kctx %p, as %p\n", + __func__, (void *)kctx, (void *)as); +} + +static void validate_protected_page_fault(struct kbase_device *kbdev) +{ + /* GPUs which support (native) protected mode shall not report page + * fault addresses unless it has protected debug mode and protected + * debug mode is turned on + */ + u32 protected_debug_mode = 0; + + if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_PROTECTED_DEBUG_MODE)) { + protected_debug_mode = kbase_reg_read(kbdev, + GPU_CONTROL_REG(GPU_STATUS)) & GPU_DBGEN; + } + + if (!protected_debug_mode) { + /* fault_addr should never be reported in protected mode. + * However, we just continue by printing an error message + */ + dev_err(kbdev->dev, "Fault address reported in protected mode\n"); + } +} + +void kbase_mmu_interrupt(struct kbase_device *kbdev, u32 irq_stat) +{ + const int num_as = 16; + const int busfault_shift = MMU_PAGE_FAULT_FLAGS; + const int pf_shift = 0; + const unsigned long as_bit_mask = (1UL << num_as) - 1; + unsigned long flags; + u32 new_mask; + u32 tmp, bf_bits, pf_bits; + bool gpu_lost = false; + + dev_dbg(kbdev->dev, "Entering %s irq_stat %u\n", + __func__, irq_stat); + /* bus faults */ + bf_bits = (irq_stat >> busfault_shift) & as_bit_mask; + /* page faults (note: Ignore ASes with both pf and bf) */ + pf_bits = ((irq_stat >> pf_shift) & as_bit_mask) & ~bf_bits; + + if (WARN_ON(kbdev == NULL)) + return; + + /* remember current mask */ + spin_lock_irqsave(&kbdev->mmu_mask_change, flags); + new_mask = kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_MASK)); + /* mask interrupts for now */ + kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_MASK), 0); + spin_unlock_irqrestore(&kbdev->mmu_mask_change, flags); + + while (bf_bits | pf_bits) { + struct kbase_as *as; + int as_no; + struct kbase_context *kctx; + struct kbase_fault *fault; + + /* + * the while logic ensures we have a bit set, no need to check + * for not-found here + */ + as_no = ffs(bf_bits | pf_bits) - 1; + as = &kbdev->as[as_no]; + + /* find the fault type */ + if (bf_bits & (1 << as_no)) + fault = &as->bf_data; + else + fault = &as->pf_data; + + /* + * Refcount the kctx ASAP - it shouldn't disappear anyway, since + * Bus/Page faults _should_ only occur whilst jobs are running, + * and a job causing the Bus/Page fault shouldn't complete until + * the MMU is updated + */ + kctx = kbase_ctx_sched_as_to_ctx_refcount(kbdev, as_no); + + /* find faulting address */ + fault->addr = kbase_reg_read(kbdev, MMU_AS_REG(as_no, + AS_FAULTADDRESS_HI)); + fault->addr <<= 32; + fault->addr |= kbase_reg_read(kbdev, MMU_AS_REG(as_no, + AS_FAULTADDRESS_LO)); + /* Mark the fault protected or not */ + fault->protected_mode = kbdev->protected_mode; + + if (kbdev->protected_mode && fault->addr) { + /* check if address reporting is allowed */ + validate_protected_page_fault(kbdev); + } + + /* report the fault to debugfs */ + kbase_as_fault_debugfs_new(kbdev, as_no); + + /* record the fault status */ + fault->status = kbase_reg_read(kbdev, MMU_AS_REG(as_no, + AS_FAULTSTATUS)); + + if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_AARCH64_MMU)) { + fault->extra_addr = kbase_reg_read(kbdev, + MMU_AS_REG(as_no, AS_FAULTEXTRA_HI)); + fault->extra_addr <<= 32; + fault->extra_addr |= kbase_reg_read(kbdev, + MMU_AS_REG(as_no, AS_FAULTEXTRA_LO)); + } + + /* check if we still have GPU */ + gpu_lost = kbase_is_gpu_lost(kbdev); + if (gpu_lost) { + if (kctx) + kbasep_js_runpool_release_ctx(kbdev, kctx); + return; + } + + if (kbase_as_has_bus_fault(as, fault)) { + /* Mark bus fault as handled. + * Note that a bus fault is processed first in case + * where both a bus fault and page fault occur. + */ + bf_bits &= ~(1UL << as_no); + + /* remove the queued BF (and PF) from the mask */ + new_mask &= ~(MMU_BUS_ERROR(as_no) | + MMU_PAGE_FAULT(as_no)); + } else { + /* Mark page fault as handled */ + pf_bits &= ~(1UL << as_no); + + /* remove the queued PF from the mask */ + new_mask &= ~MMU_PAGE_FAULT(as_no); + } + + /* Process the interrupt for this address space */ + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + kbase_mmu_interrupt_process(kbdev, kctx, as, fault); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + } + + /* reenable interrupts */ + spin_lock_irqsave(&kbdev->mmu_mask_change, flags); + tmp = kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_MASK)); + new_mask |= tmp; + kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_MASK), new_mask); + spin_unlock_irqrestore(&kbdev->mmu_mask_change, flags); + + dev_dbg(kbdev->dev, "Leaving %s irq_stat %u\n", + __func__, irq_stat); +} + +int kbase_mmu_switch_to_ir(struct kbase_context *const kctx, + struct kbase_va_region *const reg) +{ + dev_dbg(kctx->kbdev->dev, + "Switching to incremental rendering for region %p\n", + (void *)reg); + return kbase_job_slot_softstop_start_rp(kctx, reg); +} diff --git a/drivers/gpu/arm/bifrost/mali_kbase_mmu.c b/drivers/gpu/arm/bifrost/mmu/mali_kbase_mmu.c similarity index 81% rename from drivers/gpu/arm/bifrost/mali_kbase_mmu.c rename to drivers/gpu/arm/bifrost/mmu/mali_kbase_mmu.c index f30d09b62791..db27832bf251 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_mmu.c +++ b/drivers/gpu/arm/bifrost/mmu/mali_kbase_mmu.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2010-2019 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010-2020 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -20,31 +20,30 @@ * */ - - /** * @file mali_kbase_mmu.c * Base kernel MMU management. */ -/* #define DEBUG 1 */ #include #include #include -#include -#include +#include +#include +#include #include +#include #include - -#define beenthere(kctx, f, a...) dev_dbg(kctx->kbdev->dev, "%s:" f, __func__, ##a) - #include #include -#include +#include #include #include #include #include +#include +#include +#include #define KBASE_MMU_PAGE_ENTRIES 512 @@ -107,15 +106,10 @@ static void kbase_mmu_sync_pgd(struct kbase_device *kbdev, * - PGD: Page Directory. * - PTE: Page Table Entry. A 64bit value pointing to the next * level of translation - * - ATE: Address Transation Entry. A 64bit value pointing to + * - ATE: Address Translation Entry. A 64bit value pointing to * a 4kB physical page. */ -static void kbase_mmu_report_fault_and_kill(struct kbase_context *kctx, - struct kbase_as *as, const char *reason_str, - struct kbase_fault *fault); - - static int kbase_mmu_update_pages_no_flush(struct kbase_context *kctx, u64 vpfn, struct tagged_addr *phys, size_t nr, unsigned long flags, int group_id); @@ -146,39 +140,46 @@ static size_t reg_grow_calc_extra_pages(struct kbase_device *kbdev, if (!multiple) { dev_warn(kbdev->dev, - "VA Region 0x%llx extent was 0, allocator needs to set this properly for KBASE_REG_PF_GROW\n", - ((unsigned long long)reg->start_pfn) << PAGE_SHIFT); + "VA Region 0x%llx extent was 0, allocator needs to set this properly for KBASE_REG_PF_GROW\n", + ((unsigned long long)reg->start_pfn) << PAGE_SHIFT); return minimum_extra; } /* Calculate the remainder to subtract from minimum_extra to make it * the desired (rounded down) multiple of the extent. * Depending on reg's flags, the base used for calculating multiples is - * different */ + * different + */ if (reg->flags & KBASE_REG_TILER_ALIGN_TOP) { /* multiple is based from the top of the initial commit, which * has been allocated in such a way that (start_pfn + * initial_commit) is already aligned to multiple. Hence the * pfn for the end of committed memory will also be aligned to - * multiple */ + * multiple + */ size_t initial_commit = reg->initial_commit; if (fault_rel_pfn < initial_commit) { /* this case is just to catch in case it's been * recommitted by userspace to be smaller than the - * initial commit */ + * initial commit + */ minimum_extra = initial_commit - reg_current_size; remainder = 0; } else { - /* same as calculating (fault_rel_pfn - initial_commit + 1) */ - size_t pages_after_initial = minimum_extra + reg_current_size - initial_commit; + /* same as calculating + * (fault_rel_pfn - initial_commit + 1) + */ + size_t pages_after_initial = minimum_extra + + reg_current_size - initial_commit; remainder = pages_after_initial % multiple; } } else { /* multiple is based from the current backed size, even if the * current backed size/pfn for end of committed memory are not - * themselves aligned to multiple */ + * themselves aligned to multiple + */ remainder = minimum_extra % multiple; } @@ -292,7 +293,7 @@ static void kbase_gpu_mmu_handle_permission_fault(struct kbase_context *kctx, { struct kbase_fault *fault = &faulting_as->pf_data; - switch (fault->status & AS_FAULTSTATUS_ACCESS_TYPE_MASK) { + switch (AS_FAULTSTATUS_ACCESS_TYPE_GET(fault->status)) { case AS_FAULTSTATUS_ACCESS_TYPE_ATOMIC: case AS_FAULTSTATUS_ACCESS_TYPE_WRITE: kbase_gpu_mmu_handle_write_fault(kctx, faulting_as); @@ -517,6 +518,13 @@ static bool page_fault_try_alloc(struct kbase_context *kctx, return true; } +/* Small wrapper function to factor out GPU-dependent context releasing */ +static void release_ctx(struct kbase_device *kbdev, + struct kbase_context *kctx) +{ + kbasep_js_runpool_release_ctx(kbdev, kctx); +} + void page_fault_worker(struct work_struct *data) { u64 fault_pfn; @@ -535,6 +543,8 @@ void page_fault_worker(struct work_struct *data) bool grow_2mb_pool; struct kbase_sub_alloc *prealloc_sas[2] = { NULL, NULL }; int i; + size_t current_backed_size; + faulting_as = container_of(data, struct kbase_as, work_pagefault); fault = &faulting_as->pf_data; @@ -542,12 +552,16 @@ void page_fault_worker(struct work_struct *data) as_no = faulting_as->number; kbdev = container_of(faulting_as, struct kbase_device, as[as_no]); + dev_dbg(kbdev->dev, + "Entering %s %p, fault_pfn %lld, as_no %d\n", + __func__, (void *)data, fault_pfn, as_no); - /* Grab the context that was already refcounted in kbase_mmu_interrupt(). - * Therefore, it cannot be scheduled out of this AS until we explicitly release it + /* Grab the context that was already refcounted in kbase_mmu_interrupt() + * Therefore, it cannot be scheduled out of this AS until we explicitly + * release it */ - kctx = kbasep_js_runpool_lookup_ctx_noretain(kbdev, as_no); - if (WARN_ON(!kctx)) { + kctx = kbase_ctx_sched_as_to_ctx(kbdev, as_no); + if (!kctx) { atomic_dec(&kbdev->faults_pending); return; } @@ -634,8 +648,9 @@ void page_fault_worker(struct work_struct *data) #endif /* CONFIG_MALI_2MB_ALLOC */ page_fault_retry: - /* so we have a translation fault, let's see if it is for growable - * memory */ + /* so we have a translation fault, + * let's see if it is for growable memory + */ kbase_gpu_vm_lock(kctx); region = kbase_region_tracker_find_region_enclosing_address(kctx, @@ -676,16 +691,21 @@ void page_fault_worker(struct work_struct *data) goto fault_done; } - /* find the size we need to grow it by */ - /* we know the result fit in a size_t due to kbase_region_tracker_find_region_enclosing_address - * validating the fault_adress to be within a size_t from the start_pfn */ + /* find the size we need to grow it by + * we know the result fit in a size_t due to + * kbase_region_tracker_find_region_enclosing_address + * validating the fault_address to be within a size_t from the start_pfn + */ fault_rel_pfn = fault_pfn - region->start_pfn; - if (fault_rel_pfn < kbase_reg_current_backed_size(region)) { - dev_dbg(kbdev->dev, "Page fault @ 0x%llx in allocated region 0x%llx-0x%llx of growable TMEM: Ignoring", + current_backed_size = kbase_reg_current_backed_size(region); + + if (fault_rel_pfn < current_backed_size) { + dev_dbg(kbdev->dev, + "Page fault @ 0x%llx in allocated region 0x%llx-0x%llx of growable TMEM: Ignoring", fault->addr, region->start_pfn, region->start_pfn + - kbase_reg_current_backed_size(region)); + current_backed_size); mutex_lock(&kbdev->mmu_hw_mutex); @@ -714,9 +734,11 @@ void page_fault_worker(struct work_struct *data) new_pages = reg_grow_calc_extra_pages(kbdev, region, fault_rel_pfn); /* cap to max vsize */ - new_pages = min(new_pages, region->nr_pages - kbase_reg_current_backed_size(region)); + new_pages = min(new_pages, region->nr_pages - current_backed_size); + dev_dbg(kctx->kbdev->dev, "Allocate %zu pages on page fault\n", + new_pages); - if (0 == new_pages) { + if (new_pages == 0) { mutex_lock(&kbdev->mmu_hw_mutex); /* Duplicate of a fault we've already handled, nothing to do */ @@ -746,7 +768,8 @@ void page_fault_worker(struct work_struct *data) u32 op; /* alloc success */ - KBASE_DEBUG_ASSERT(kbase_reg_current_backed_size(region) <= region->nr_pages); + WARN_ON(kbase_reg_current_backed_size(region) > + region->nr_pages); /* set up the new pages */ pfn_offset = kbase_reg_current_backed_size(region) - new_pages; @@ -763,17 +786,44 @@ void page_fault_worker(struct work_struct *data) &kbase_get_gpu_phy_pages(region)[pfn_offset], new_pages, region->flags, region->gpu_alloc->group_id); if (err) { - kbase_free_phy_pages_helper(region->gpu_alloc, new_pages); + kbase_free_phy_pages_helper(region->gpu_alloc, + new_pages); if (region->gpu_alloc != region->cpu_alloc) kbase_free_phy_pages_helper(region->cpu_alloc, new_pages); kbase_gpu_vm_unlock(kctx); - /* The locked VA region will be unlocked and the cache invalidated in here */ + /* The locked VA region will be unlocked and the cache + * invalidated in here + */ kbase_mmu_report_fault_and_kill(kctx, faulting_as, "Page table update failure", fault); goto fault_done; } - KBASE_TLSTREAM_AUX_PAGEFAULT(kbdev, kctx->id, as_no, (u64)new_pages); + KBASE_TLSTREAM_AUX_PAGEFAULT(kbdev, kctx->id, as_no, + (u64)new_pages); + trace_mali_mmu_page_fault_grow(region, fault, new_pages); + +#if MALI_INCREMENTAL_RENDERING + /* Switch to incremental rendering if we have nearly run out of + * memory in a JIT memory allocation. + */ + if (region->threshold_pages && + kbase_reg_current_backed_size(region) > + region->threshold_pages) { + + dev_dbg(kctx->kbdev->dev, + "%zu pages exceeded IR threshold %zu\n", + new_pages + current_backed_size, + region->threshold_pages); + + if (kbase_mmu_switch_to_ir(kctx, region) >= 0) { + dev_dbg(kctx->kbdev->dev, + "Get region %p for IR\n", + (void *)region); + kbase_va_region_alloc_get(kctx, region); + } + } +#endif /* AS transaction begin */ mutex_lock(&kbdev->mmu_hw_mutex); @@ -862,6 +912,7 @@ void page_fault_worker(struct work_struct *data) kbase_mmu_report_fault_and_kill(kctx, faulting_as, "Page allocation failure", fault); } else { + dev_dbg(kbdev->dev, "Try again after pool_grow\n"); goto page_fault_retry; } } @@ -874,9 +925,10 @@ void page_fault_worker(struct work_struct *data) * By this point, the fault was handled in some way, * so release the ctx refcount */ - kbasep_js_runpool_release_ctx(kbdev, kctx); + release_ctx(kbdev, kctx); atomic_dec(&kbdev->faults_pending); + dev_dbg(kbdev->dev, "Leaving page_fault_worker %p\n", (void *)data); } static phys_addr_t kbase_mmu_alloc_pgd(struct kbase_device *kbdev, @@ -891,7 +943,7 @@ static phys_addr_t kbase_mmu_alloc_pgd(struct kbase_device *kbdev, return 0; page = kmap(p); - if (NULL == page) + if (page == NULL) goto alloc_free; /* If the MMU tables belong to a context then account the memory usage @@ -951,7 +1003,7 @@ static int mmu_get_next_pgd(struct kbase_device *kbdev, p = pfn_to_page(PFN_DOWN(*pgd)); page = kmap(p); - if (NULL == page) { + if (page == NULL) { dev_warn(kbdev->dev, "%s: kmap failure\n", __func__); return -EINVAL; } @@ -1098,16 +1150,19 @@ int kbase_mmu_insert_single_page(struct kbase_context *kctx, u64 vpfn, { phys_addr_t pgd; u64 *pgd_page; - /* In case the insert_single_page only partially completes we need to be - * able to recover */ + /* In case the insert_single_page only partially completes + * we need to be able to recover + */ bool recover_required = false; - u64 recover_vpfn = vpfn; + u64 start_vpfn = vpfn; size_t recover_count = 0; size_t remain = nr; int err; struct kbase_device *kbdev; - KBASE_DEBUG_ASSERT(NULL != kctx); + if (WARN_ON(kctx == NULL)) + return -EINVAL; + /* 64-bit address range is the max */ KBASE_DEBUG_ASSERT(vpfn <= (U64_MAX / PAGE_SIZE)); @@ -1154,11 +1209,12 @@ int kbase_mmu_insert_single_page(struct kbase_context *kctx, u64 vpfn, dev_warn(kbdev->dev, "kbase_mmu_insert_pages: mmu_get_bottom_pgd failure\n"); if (recover_required) { /* Invalidate the pages we have partially - * completed */ + * completed + */ mmu_insert_pages_failure_recovery(kbdev, &kctx->mmu, - recover_vpfn, - recover_vpfn + recover_count); + start_vpfn, + start_vpfn + recover_count); } goto fail_unlock; } @@ -1169,11 +1225,12 @@ int kbase_mmu_insert_single_page(struct kbase_context *kctx, u64 vpfn, dev_warn(kbdev->dev, "kbase_mmu_insert_pages: kmap failure\n"); if (recover_required) { /* Invalidate the pages we have partially - * completed */ + * completed + */ mmu_insert_pages_failure_recovery(kbdev, &kctx->mmu, - recover_vpfn, - recover_vpfn + recover_count); + start_vpfn, + start_vpfn + recover_count); } err = -ENOMEM; goto fail_unlock; @@ -1199,17 +1256,18 @@ int kbase_mmu_insert_single_page(struct kbase_context *kctx, u64 vpfn, kunmap(p); /* We have started modifying the page table. * If further pages need inserting and fail we need to undo what - * has already taken place */ + * has already taken place + */ recover_required = true; recover_count += count; } mutex_unlock(&kctx->mmu.mmu_lock); - kbase_mmu_flush_invalidate(kctx, vpfn, nr, false); + kbase_mmu_flush_invalidate(kctx, start_vpfn, nr, false); return 0; fail_unlock: mutex_unlock(&kctx->mmu.mmu_lock); - kbase_mmu_flush_invalidate(kctx, vpfn, nr, false); + kbase_mmu_flush_invalidate(kctx, start_vpfn, nr, false); return err; } @@ -1313,7 +1371,8 @@ int kbase_mmu_insert_pages_no_flush(struct kbase_device *kbdev, "%s: mmu_get_bottom_pgd failure\n", __func__); if (insert_vpfn != start_vpfn) { /* Invalidate the pages we have partially - * completed */ + * completed + */ mmu_insert_pages_failure_recovery(kbdev, mmut, start_vpfn, insert_vpfn); } @@ -1327,7 +1386,8 @@ int kbase_mmu_insert_pages_no_flush(struct kbase_device *kbdev, __func__); if (insert_vpfn != start_vpfn) { /* Invalidate the pages we have partially - * completed */ + * completed + */ mmu_insert_pages_failure_recovery(kbdev, mmut, start_vpfn, insert_vpfn); } @@ -1397,7 +1457,8 @@ int kbase_mmu_insert_pages(struct kbase_device *kbdev, if (mmut->kctx) kbase_mmu_flush_invalidate(mmut->kctx, vpfn, nr, false); else - kbase_mmu_flush_invalidate_no_ctx(kbdev, vpfn, nr, false, as_nr); + kbase_mmu_flush_invalidate_no_ctx(kbdev, vpfn, nr, false, + as_nr); return err; } @@ -1436,8 +1497,8 @@ static void kbase_mmu_flush_invalidate_noretain(struct kbase_context *kctx, vpfn, nr, op, 0); if (err) { /* Flush failed to complete, assume the - * GPU has hung and perform a reset to - * recover */ + * GPU has hung and perform a reset to recover + */ dev_err(kbdev->dev, "Flush for GPU page table update did not complete. Issuing GPU soft-reset to recover\n"); if (kbase_prepare_to_reset_gpu_locked(kbdev)) @@ -1509,7 +1570,7 @@ static void kbase_mmu_flush_invalidate(struct kbase_context *kctx, kbdev = kctx->kbdev; mutex_lock(&kbdev->js_data.queue_mutex); - ctx_is_in_runpool = kbasep_js_runpool_retain_ctx(kbdev, kctx); + ctx_is_in_runpool = kbase_ctx_sched_inc_refcount(kctx); mutex_unlock(&kbdev->js_data.queue_mutex); if (ctx_is_in_runpool) { @@ -1518,7 +1579,7 @@ static void kbase_mmu_flush_invalidate(struct kbase_context *kctx, kbase_mmu_flush_invalidate_as(kbdev, &kbdev->as[kctx->as_nr], vpfn, nr, sync); - kbasep_js_runpool_release_ctx(kbdev, kctx); + release_ctx(kbdev, kctx); } } @@ -1547,7 +1608,8 @@ void kbase_mmu_disable(struct kbase_context *kctx) /* ASSERT that the context has a valid as_nr, which is only the case * when it's scheduled in. * - * as_nr won't change because the caller has the hwaccess_lock */ + * as_nr won't change because the caller has the hwaccess_lock + */ KBASE_DEBUG_ASSERT(kctx->as_nr != KBASEP_AS_NR_INVALID); lockdep_assert_held(&kctx->kbdev->hwaccess_lock); @@ -1581,11 +1643,12 @@ int kbase_mmu_teardown_pages(struct kbase_device *kbdev, struct kbase_mmu_table *mmut, u64 vpfn, size_t nr, int as_nr) { phys_addr_t pgd; + u64 start_vpfn = vpfn; size_t requested_nr = nr; struct kbase_mmu_mode const *mmu_mode; int err = -EFAULT; - if (0 == nr) { + if (nr == 0) { /* early out if nothing to do */ return 0; } @@ -1691,9 +1754,11 @@ int kbase_mmu_teardown_pages(struct kbase_device *kbdev, mutex_unlock(&mmut->mmu_lock); if (mmut->kctx) - kbase_mmu_flush_invalidate(mmut->kctx, vpfn, requested_nr, true); + kbase_mmu_flush_invalidate(mmut->kctx, start_vpfn, requested_nr, + true); else - kbase_mmu_flush_invalidate_no_ctx(kbdev, vpfn, requested_nr, true, as_nr); + kbase_mmu_flush_invalidate_no_ctx(kbdev, start_vpfn, requested_nr, + true, as_nr); return err; } @@ -1727,7 +1792,9 @@ static int kbase_mmu_update_pages_no_flush(struct kbase_context *kctx, u64 vpfn, int err; struct kbase_device *kbdev; - KBASE_DEBUG_ASSERT(NULL != kctx); + if (WARN_ON(kctx == NULL)) + return -EINVAL; + KBASE_DEBUG_ASSERT(vpfn <= (U64_MAX / PAGE_SIZE)); /* Early out if there is nothing to do */ @@ -1826,9 +1893,11 @@ static void mmu_teardown_level(struct kbase_device *kbdev, pgd_page = kmap_atomic(pfn_to_page(PFN_DOWN(pgd))); /* kmap_atomic should NEVER fail. */ - KBASE_DEBUG_ASSERT(NULL != pgd_page); + if (WARN_ON(pgd_page == NULL)) + return; /* Copy the page to our preallocated buffer so that we can minimize - * kmap_atomic usage */ + * kmap_atomic usage + */ memcpy(pgd_page_buffer, pgd_page, PAGE_SIZE); kunmap_atomic(pgd_page); pgd_page = pgd_page_buffer; @@ -1923,7 +1992,8 @@ void kbase_mmu_term(struct kbase_device *kbdev, struct kbase_mmu_table *mmut) mutex_destroy(&mmut->mmu_lock); } -static size_t kbasep_mmu_dump_level(struct kbase_context *kctx, phys_addr_t pgd, int level, char ** const buffer, size_t *size_left) +static size_t kbasep_mmu_dump_level(struct kbase_context *kctx, phys_addr_t pgd, + int level, char ** const buffer, size_t *size_left) { phys_addr_t target_pgd; u64 *pgd_page; @@ -1933,7 +2003,8 @@ static size_t kbasep_mmu_dump_level(struct kbase_context *kctx, phys_addr_t pgd, struct kbase_device *kbdev; struct kbase_mmu_mode const *mmu_mode; - KBASE_DEBUG_ASSERT(NULL != kctx); + if (WARN_ON(kctx == NULL)) + return 0; lockdep_assert_held(&kctx->mmu.mmu_lock); kbdev = kctx->kbdev; @@ -1946,7 +2017,9 @@ static size_t kbasep_mmu_dump_level(struct kbase_context *kctx, phys_addr_t pgd, } if (*size_left >= size) { - /* A modified physical address that contains the page table level */ + /* A modified physical address that contains + * the page table level + */ u64 m_pgd = pgd | level; /* Put the modified physical address in the output buffer */ @@ -1990,14 +2063,15 @@ void *kbase_mmu_dump(struct kbase_context *kctx, int nr_pages) KBASE_DEBUG_ASSERT(kctx); - if (0 == nr_pages) { + if (nr_pages == 0) { /* can't dump in a 0 sized buffer, early out */ return NULL; } size_left = nr_pages * PAGE_SIZE; - KBASE_DEBUG_ASSERT(0 != size_left); + if (WARN_ON(size_left == 0)) + return NULL; kaddr = vmalloc_user(size_left); mutex_lock(&kctx->mmu.mmu_lock); @@ -2038,7 +2112,9 @@ void *kbase_mmu_dump(struct kbase_context *kctx, int nr_pages) size += sizeof(u64); if (size > (nr_pages * PAGE_SIZE)) { - /* The buffer isn't big enough - free the memory and return failure */ + /* The buffer isn't big enough - free the memory and + * return failure + */ goto fail_free; } @@ -2078,8 +2154,8 @@ void bus_fault_worker(struct work_struct *data) * flagging of the bus-fault. Therefore, it cannot be scheduled out of * this AS until we explicitly release it */ - kctx = kbasep_js_runpool_lookup_ctx_noretain(kbdev, as_no); - if (WARN_ON(!kctx)) { + kctx = kbase_ctx_sched_as_to_ctx(kbdev, as_no); + if (!kctx) { atomic_dec(&kbdev->faults_pending); return; } @@ -2089,387 +2165,26 @@ void bus_fault_worker(struct work_struct *data) "Permission failure", fault); kbase_mmu_hw_clear_fault(kbdev, faulting_as, KBASE_MMU_FAULT_TYPE_BUS_UNEXPECTED); - kbasep_js_runpool_release_ctx(kbdev, kctx); + release_ctx(kbdev, kctx); atomic_dec(&kbdev->faults_pending); return; } - /* NOTE: If GPU already powered off for suspend, we don't need to switch to unmapped */ - if (!kbase_pm_context_active_handle_suspend(kbdev, KBASE_PM_SUSPEND_HANDLER_DONT_REACTIVATE)) { - unsigned long flags; - - /* switch to UNMAPPED mode, will abort all jobs and stop any hw counter dumping */ - /* AS transaction begin */ - mutex_lock(&kbdev->mmu_hw_mutex); - - /* Set the MMU into unmapped mode */ - spin_lock_irqsave(&kbdev->hwaccess_lock, flags); - kbase_mmu_disable(kctx); - spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); - - mutex_unlock(&kbdev->mmu_hw_mutex); - /* AS transaction end */ - - kbase_mmu_hw_clear_fault(kbdev, faulting_as, - KBASE_MMU_FAULT_TYPE_BUS_UNEXPECTED); - kbase_mmu_hw_enable_fault(kbdev, faulting_as, - KBASE_MMU_FAULT_TYPE_BUS_UNEXPECTED); - + /* NOTE: If GPU already powered off for suspend, + * we don't need to switch to unmapped + */ + if (!kbase_pm_context_active_handle_suspend(kbdev, + KBASE_PM_SUSPEND_HANDLER_DONT_REACTIVATE)) { + kbase_gpu_report_bus_fault_and_kill(kctx, faulting_as, fault); kbase_pm_context_idle(kbdev); } - kbasep_js_runpool_release_ctx(kbdev, kctx); + release_ctx(kbdev, kctx); atomic_dec(&kbdev->faults_pending); } -const char *kbase_exception_name(struct kbase_device *kbdev, u32 exception_code) -{ - const char *e; - - switch (exception_code) { - /* Non-Fault Status code */ - case 0x00: - e = "NOT_STARTED/IDLE/OK"; - break; - case 0x01: - e = "DONE"; - break; - case 0x02: - e = "INTERRUPTED"; - break; - case 0x03: - e = "STOPPED"; - break; - case 0x04: - e = "TERMINATED"; - break; - case 0x08: - e = "ACTIVE"; - break; - /* Job exceptions */ - case 0x40: - e = "JOB_CONFIG_FAULT"; - break; - case 0x41: - e = "JOB_POWER_FAULT"; - break; - case 0x42: - e = "JOB_READ_FAULT"; - break; - case 0x43: - e = "JOB_WRITE_FAULT"; - break; - case 0x44: - e = "JOB_AFFINITY_FAULT"; - break; - case 0x48: - e = "JOB_BUS_FAULT"; - break; - case 0x50: - e = "INSTR_INVALID_PC"; - break; - case 0x51: - e = "INSTR_INVALID_ENC"; - break; - case 0x52: - e = "INSTR_TYPE_MISMATCH"; - break; - case 0x53: - e = "INSTR_OPERAND_FAULT"; - break; - case 0x54: - e = "INSTR_TLS_FAULT"; - break; - case 0x55: - e = "INSTR_BARRIER_FAULT"; - break; - case 0x56: - e = "INSTR_ALIGN_FAULT"; - break; - case 0x58: - e = "DATA_INVALID_FAULT"; - break; - case 0x59: - e = "TILE_RANGE_FAULT"; - break; - case 0x5A: - e = "ADDR_RANGE_FAULT"; - break; - case 0x60: - e = "OUT_OF_MEMORY"; - break; - /* GPU exceptions */ - case 0x80: - e = "DELAYED_BUS_FAULT"; - break; - case 0x88: - e = "SHAREABILITY_FAULT"; - break; - /* MMU exceptions */ - case 0xC0: - case 0xC1: - case 0xC2: - case 0xC3: - case 0xC4: - case 0xC5: - case 0xC6: - case 0xC7: - e = "TRANSLATION_FAULT"; - break; - case 0xC8: - e = "PERMISSION_FAULT"; - break; - case 0xC9: - case 0xCA: - case 0xCB: - case 0xCC: - case 0xCD: - case 0xCE: - case 0xCF: - if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_AARCH64_MMU)) - e = "PERMISSION_FAULT"; - else - e = "UNKNOWN"; - break; - case 0xD0: - case 0xD1: - case 0xD2: - case 0xD3: - case 0xD4: - case 0xD5: - case 0xD6: - case 0xD7: - e = "TRANSTAB_BUS_FAULT"; - break; - case 0xD8: - e = "ACCESS_FLAG"; - break; - case 0xD9: - case 0xDA: - case 0xDB: - case 0xDC: - case 0xDD: - case 0xDE: - case 0xDF: - if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_AARCH64_MMU)) - e = "ACCESS_FLAG"; - else - e = "UNKNOWN"; - break; - case 0xE0: - case 0xE1: - case 0xE2: - case 0xE3: - case 0xE4: - case 0xE5: - case 0xE6: - case 0xE7: - if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_AARCH64_MMU)) - e = "ADDRESS_SIZE_FAULT"; - else - e = "UNKNOWN"; - break; - case 0xE8: - case 0xE9: - case 0xEA: - case 0xEB: - case 0xEC: - case 0xED: - case 0xEE: - case 0xEF: - if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_AARCH64_MMU)) - e = "MEMORY_ATTRIBUTES_FAULT"; - else - e = "UNKNOWN"; - break; - default: - e = "UNKNOWN"; - break; - }; - - return e; -} - -static const char *access_type_name(struct kbase_device *kbdev, - u32 fault_status) -{ - switch (fault_status & AS_FAULTSTATUS_ACCESS_TYPE_MASK) { - case AS_FAULTSTATUS_ACCESS_TYPE_ATOMIC: - if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_AARCH64_MMU)) - return "ATOMIC"; - else - return "UNKNOWN"; - case AS_FAULTSTATUS_ACCESS_TYPE_READ: - return "READ"; - case AS_FAULTSTATUS_ACCESS_TYPE_WRITE: - return "WRITE"; - case AS_FAULTSTATUS_ACCESS_TYPE_EX: - return "EXECUTE"; - default: - WARN_ON(1); - return NULL; - } -} - - -/** - * The caller must ensure it's retained the ctx to prevent it from being scheduled out whilst it's being worked on. - */ -static void kbase_mmu_report_fault_and_kill(struct kbase_context *kctx, - struct kbase_as *as, const char *reason_str, - struct kbase_fault *fault) -{ - unsigned long flags; - int exception_type; - int access_type; - int source_id; - int as_no; - struct kbase_device *kbdev; - struct kbasep_js_device_data *js_devdata; - - as_no = as->number; - kbdev = kctx->kbdev; - js_devdata = &kbdev->js_data; - - /* ASSERT that the context won't leave the runpool */ - KBASE_DEBUG_ASSERT(atomic_read(&kctx->refcount) > 0); - - /* decode the fault status */ - exception_type = fault->status & 0xFF; - access_type = (fault->status >> 8) & 0x3; - source_id = (fault->status >> 16); - - /* terminal fault, print info about the fault */ - dev_err(kbdev->dev, - "Unhandled Page fault in AS%d at VA 0x%016llX\n" - "Reason: %s\n" - "raw fault status: 0x%X\n" - "decoded fault status: %s\n" - "exception type 0x%X: %s\n" - "access type 0x%X: %s\n" - "source id 0x%X\n" - "pid: %d\n", - as_no, fault->addr, - reason_str, - fault->status, - (fault->status & (1 << 10) ? "DECODER FAULT" : "SLAVE FAULT"), - exception_type, kbase_exception_name(kbdev, exception_type), - access_type, access_type_name(kbdev, fault->status), - source_id, - kctx->pid); - - /* hardware counters dump fault handling */ - if ((kbdev->hwcnt.kctx) && (kbdev->hwcnt.kctx->as_nr == as_no) && - (kbdev->hwcnt.backend.state == - KBASE_INSTR_STATE_DUMPING)) { - if ((fault->addr >= kbdev->hwcnt.addr) && - (fault->addr < (kbdev->hwcnt.addr + - kbdev->hwcnt.addr_bytes))) - kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_FAULT; - } - - /* Stop the kctx from submitting more jobs and cause it to be scheduled - * out/rescheduled - this will occur on releasing the context's refcount */ - spin_lock_irqsave(&kbdev->hwaccess_lock, flags); - kbasep_js_clear_submit_allowed(js_devdata, kctx); - - /* Kill any running jobs from the context. Submit is disallowed, so no more jobs from this - * context can appear in the job slots from this point on */ - kbase_backend_jm_kill_running_jobs_from_kctx(kctx); - spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); - - /* AS transaction begin */ - mutex_lock(&kbdev->mmu_hw_mutex); - - /* switch to UNMAPPED mode, will abort all jobs and stop any hw counter dumping */ - spin_lock_irqsave(&kbdev->hwaccess_lock, flags); - kbase_mmu_disable(kctx); - spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); - - mutex_unlock(&kbdev->mmu_hw_mutex); - - - /* AS transaction end */ - /* Clear down the fault */ - kbase_mmu_hw_clear_fault(kbdev, as, - KBASE_MMU_FAULT_TYPE_PAGE_UNEXPECTED); - kbase_mmu_hw_enable_fault(kbdev, as, - KBASE_MMU_FAULT_TYPE_PAGE_UNEXPECTED); -} - -void kbase_mmu_interrupt_process(struct kbase_device *kbdev, - struct kbase_context *kctx, struct kbase_as *as, - struct kbase_fault *fault) -{ - lockdep_assert_held(&kbdev->hwaccess_lock); - - if (!kctx) { - dev_warn(kbdev->dev, "%s in AS%d at 0x%016llx with no context present! Spurious IRQ or SW Design Error?\n", - kbase_as_has_bus_fault(as, fault) ? - "Bus error" : "Page fault", - as->number, fault->addr); - - /* Since no ctx was found, the MMU must be disabled. */ - WARN_ON(as->current_setup.transtab); - - if (kbase_as_has_bus_fault(as, fault)) { - kbase_mmu_hw_clear_fault(kbdev, as, - KBASE_MMU_FAULT_TYPE_BUS_UNEXPECTED); - kbase_mmu_hw_enable_fault(kbdev, as, - KBASE_MMU_FAULT_TYPE_BUS_UNEXPECTED); - } else if (kbase_as_has_page_fault(as, fault)) { - kbase_mmu_hw_clear_fault(kbdev, as, - KBASE_MMU_FAULT_TYPE_PAGE_UNEXPECTED); - kbase_mmu_hw_enable_fault(kbdev, as, - KBASE_MMU_FAULT_TYPE_PAGE_UNEXPECTED); - } - - return; - } - - if (kbase_as_has_bus_fault(as, fault)) { - struct kbasep_js_device_data *js_devdata = &kbdev->js_data; - - /* - * hw counters dumping in progress, signal the - * other thread that it failed - */ - if ((kbdev->hwcnt.kctx == kctx) && - (kbdev->hwcnt.backend.state == - KBASE_INSTR_STATE_DUMPING)) - kbdev->hwcnt.backend.state = - KBASE_INSTR_STATE_FAULT; - - /* - * Stop the kctx from submitting more jobs and cause it - * to be scheduled out/rescheduled when all references - * to it are released - */ - kbasep_js_clear_submit_allowed(js_devdata, kctx); - - if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_AARCH64_MMU)) - dev_warn(kbdev->dev, - "Bus error in AS%d at VA=0x%016llx, IPA=0x%016llx\n", - as->number, fault->addr, - fault->extra_addr); - else - dev_warn(kbdev->dev, "Bus error in AS%d at 0x%016llx\n", - as->number, fault->addr); - - /* - * We need to switch to UNMAPPED mode - but we do this in a - * worker so that we can sleep - */ - WARN_ON(!queue_work(as->pf_wq, &as->work_busfault)); - atomic_inc(&kbdev->faults_pending); - } else { - WARN_ON(!queue_work(as->pf_wq, &as->work_pagefault)); - atomic_inc(&kbdev->faults_pending); - } -} - void kbase_flush_mmu_wqs(struct kbase_device *kbdev) { int i; diff --git a/drivers/gpu/arm/bifrost/mmu/mali_kbase_mmu.h b/drivers/gpu/arm/bifrost/mmu/mali_kbase_mmu.h new file mode 100644 index 000000000000..c9e27b1255c5 --- /dev/null +++ b/drivers/gpu/arm/bifrost/mmu/mali_kbase_mmu.h @@ -0,0 +1,118 @@ +/* + * + * (C) COPYRIGHT 2019 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +#ifndef _KBASE_MMU_H_ +#define _KBASE_MMU_H_ + +/** + * kbase_mmu_init - Initialise an object representing GPU page tables + * + * The structure should be terminated using kbase_mmu_term() + * + * @kbdev: Instance of GPU platform device, allocated from the probe method. + * @mmut: GPU page tables to be initialized. + * @kctx: Optional kbase context, may be NULL if this set of MMU tables + * is not associated with a context. + * @group_id: The physical group ID from which to allocate GPU page tables. + * Valid range is 0..(MEMORY_GROUP_MANAGER_NR_GROUPS-1). + * + * Return: 0 if successful, otherwise a negative error code. + */ +int kbase_mmu_init(struct kbase_device *kbdev, struct kbase_mmu_table *mmut, + struct kbase_context *kctx, int group_id); + +/** + * kbase_mmu_interrupt - Process an MMU interrupt. + * + * Process the MMU interrupt that was reported by the &kbase_device. + * + * @kbdev: Pointer to the kbase device for which the interrupt happened. + * @irq_stat: Value of the MMU_IRQ_STATUS register. + */ +void kbase_mmu_interrupt(struct kbase_device *kbdev, u32 irq_stat); + +/** + * kbase_mmu_term - Terminate an object representing GPU page tables + * + * This will free any page tables that have been allocated + * + * @kbdev: Instance of GPU platform device, allocated from the probe method. + * @mmut: GPU page tables to be destroyed. + */ +void kbase_mmu_term(struct kbase_device *kbdev, struct kbase_mmu_table *mmut); + +/** + * kbase_mmu_create_ate - Create an address translation entry + * + * @kbdev: Instance of GPU platform device, allocated from the probe method. + * @phy: Physical address of the page to be mapped for GPU access. + * @flags: Bitmask of attributes of the GPU memory region being mapped. + * @level: Page table level for which to build an address translation entry. + * @group_id: The physical memory group in which the page was allocated. + * Valid range is 0..(MEMORY_GROUP_MANAGER_NR_GROUPS-1). + * + * This function creates an address translation entry to encode the physical + * address of a page to be mapped for access by the GPU, along with any extra + * attributes required for the GPU memory region. + * + * Return: An address translation entry, either in LPAE or AArch64 format + * (depending on the driver's configuration). + */ +u64 kbase_mmu_create_ate(struct kbase_device *kbdev, + struct tagged_addr phy, unsigned long flags, int level, int group_id); + +int kbase_mmu_insert_pages_no_flush(struct kbase_device *kbdev, + struct kbase_mmu_table *mmut, + const u64 start_vpfn, + struct tagged_addr *phys, size_t nr, + unsigned long flags, int group_id); +int kbase_mmu_insert_pages(struct kbase_device *kbdev, + struct kbase_mmu_table *mmut, u64 vpfn, + struct tagged_addr *phys, size_t nr, + unsigned long flags, int as_nr, int group_id); +int kbase_mmu_insert_single_page(struct kbase_context *kctx, u64 vpfn, + struct tagged_addr phys, size_t nr, + unsigned long flags, int group_id); + +int kbase_mmu_teardown_pages(struct kbase_device *kbdev, + struct kbase_mmu_table *mmut, u64 vpfn, + size_t nr, int as_nr); +int kbase_mmu_update_pages(struct kbase_context *kctx, u64 vpfn, + struct tagged_addr *phys, size_t nr, + unsigned long flags, int const group_id); + +/** + * kbase_mmu_bus_fault_interrupt - Process a bus fault interrupt. + * + * Process the bus fault interrupt that was reported for a particular GPU + * address space. + * + * @kbdev: Pointer to the kbase device for which bus fault was reported. + * @status: Value of the GPU_FAULTSTATUS register. + * @as_nr: GPU address space for which the bus fault occurred. + * + * Return: zero if the operation was successful, non-zero otherwise. + */ +int kbase_mmu_bus_fault_interrupt(struct kbase_device *kbdev, u32 status, + u32 as_nr); + +#endif /* _KBASE_MMU_H_ */ diff --git a/drivers/gpu/arm/bifrost/mali_kbase_mmu_hw.h b/drivers/gpu/arm/bifrost/mmu/mali_kbase_mmu_hw.h similarity index 55% rename from drivers/gpu/arm/bifrost/mali_kbase_mmu_hw.h rename to drivers/gpu/arm/bifrost/mmu/mali_kbase_mmu_hw.h index f49a1d4da965..e6eef86d7ac0 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_mmu_hw.h +++ b/drivers/gpu/arm/bifrost/mmu/mali_kbase_mmu_hw.h @@ -21,14 +21,8 @@ */ /** - * @file - * Interface file for accessing MMU hardware functionality - */ - -/** - * @page mali_kbase_mmu_hw_page MMU hardware interface + * DOC: Interface file for accessing MMU hardware functionality * - * @section mali_kbase_mmu_hw_intro_sec Introduction * This module provides an abstraction for accessing the functionality provided * by the midgard MMU and thus allows all MMU HW access to be contained within * one common place and allows for different backends (implementations) to @@ -44,16 +38,7 @@ struct kbase_as; struct kbase_context; /** - * @addtogroup base_kbase_api - * @{ - */ - -/** - * @addtogroup mali_kbase_mmu_hw MMU access APIs - * @{ - */ - -/** @brief MMU fault type descriptor. + * enum kbase_mmu_fault_type - MMU fault type descriptor. */ enum kbase_mmu_fault_type { KBASE_MMU_FAULT_TYPE_UNKNOWN = 0, @@ -63,62 +48,60 @@ enum kbase_mmu_fault_type { KBASE_MMU_FAULT_TYPE_BUS_UNEXPECTED }; -/** @brief Configure an address space for use. +/** + * kbase_mmu_hw_configure - Configure an address space for use. + * @kbdev: kbase device to configure. + * @as: address space to configure. * * Configure the MMU using the address space details setup in the - * @ref kbase_context structure. - * - * @param[in] kbdev kbase device to configure. - * @param[in] as address space to configure. + * kbase_context structure. */ void kbase_mmu_hw_configure(struct kbase_device *kbdev, struct kbase_as *as); -/** @brief Issue an operation to the MMU. +/** + * kbase_mmu_hw_do_operation - Issue an operation to the MMU. + * @kbdev: kbase device to issue the MMU operation on. + * @as: address space to issue the MMU operation on. + * @vpfn: MMU Virtual Page Frame Number to start the operation on. + * @nr: Number of pages to work on. + * @type: Operation type (written to ASn_COMMAND). + * @handling_irq: Is this operation being called during the handling + * of an interrupt? * * Issue an operation (MMU invalidate, MMU flush, etc) on the address space that - * is associated with the provided @ref kbase_context over the specified range + * is associated with the provided kbase_context over the specified range * - * @param[in] kbdev kbase device to issue the MMU operation on. - * @param[in] as address space to issue the MMU operation on. - * @param[in] vpfn MMU Virtual Page Frame Number to start the - * operation on. - * @param[in] nr Number of pages to work on. - * @param[in] type Operation type (written to ASn_COMMAND). - * @param[in] handling_irq Is this operation being called during the handling - * of an interrupt? - * - * @return Zero if the operation was successful, non-zero otherwise. + * Return: Zero if the operation was successful, non-zero otherwise. */ int kbase_mmu_hw_do_operation(struct kbase_device *kbdev, struct kbase_as *as, u64 vpfn, u32 nr, u32 type, unsigned int handling_irq); -/** @brief Clear a fault that has been previously reported by the MMU. +/** + * kbase_mmu_hw_clear_fault - Clear a fault that has been previously reported by + * the MMU. + * @kbdev: kbase device to clear the fault from. + * @as: address space to clear the fault from. + * @type: The type of fault that needs to be cleared. * * Clear a bus error or page fault that has been reported by the MMU. - * - * @param[in] kbdev kbase device to clear the fault from. - * @param[in] as address space to clear the fault from. - * @param[in] type The type of fault that needs to be cleared. */ void kbase_mmu_hw_clear_fault(struct kbase_device *kbdev, struct kbase_as *as, enum kbase_mmu_fault_type type); -/** @brief Enable fault that has been previously reported by the MMU. +/** + * kbase_mmu_hw_enable_fault - Enable fault that has been previously reported by + * the MMU. + * @kbdev: kbase device to again enable the fault from. + * @as: address space to again enable the fault from. + * @type: The type of fault that needs to be enabled again. * * After a page fault or bus error has been reported by the MMU these * will be disabled. After these are handled this function needs to be * called to enable the page fault or bus error fault again. - * - * @param[in] kbdev kbase device to again enable the fault from. - * @param[in] as address space to again enable the fault from. - * @param[in] type The type of fault that needs to be enabled again. */ void kbase_mmu_hw_enable_fault(struct kbase_device *kbdev, struct kbase_as *as, enum kbase_mmu_fault_type type); -/** @} *//* end group mali_kbase_mmu_hw */ -/** @} *//* end group base_kbase_api */ - #endif /* _KBASE_MMU_HW_H_ */ diff --git a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_mmu_hw_direct.c b/drivers/gpu/arm/bifrost/mmu/mali_kbase_mmu_hw_direct.c similarity index 50% rename from drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_mmu_hw_direct.c rename to drivers/gpu/arm/bifrost/mmu/mali_kbase_mmu_hw_direct.c index 670e1cbf1c9c..f22e73e07398 100644 --- a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_mmu_hw_direct.c +++ b/drivers/gpu/arm/bifrost/mmu/mali_kbase_mmu_hw_direct.c @@ -21,49 +21,68 @@ */ #include - #include #include -#include -#include +#include +#include #include #include -static inline u64 lock_region(struct kbase_device *kbdev, u64 pfn, - u32 num_pages) +/** + * lock_region() - Generate lockaddr to lock memory region in MMU + * @pfn: Starting page frame number of the region to lock + * @num_pages: Number of pages to lock. It must be greater than 0. + * @lockaddr: Address and size of memory region to lock + * + * The lockaddr value is a combination of the starting address and + * the size of the region that encompasses all the memory pages to lock. + * + * The size is expressed as a logarithm: it is represented in a way + * that is compatible with the HW specification and it also determines + * how many of the lowest bits of the address are cleared. + * + * Return: 0 if success, or an error code on failure. + */ +static int lock_region(u64 pfn, u32 num_pages, u64 *lockaddr) { - u64 region; + const u64 lockaddr_base = pfn << PAGE_SHIFT; + u64 lockaddr_size_log2, region_frame_number_start, + region_frame_number_end; - /* can't lock a zero sized range */ - KBASE_DEBUG_ASSERT(num_pages); + if (num_pages == 0) + return -EINVAL; - region = pfn << PAGE_SHIFT; - /* - * fls returns (given the ASSERT above): - * 1 .. 32 - * - * 10 + fls(num_pages) - * results in the range (11 .. 42) + /* The size is expressed as a logarithm and should take into account + * the possibility that some pages might spill into the next region. */ + lockaddr_size_log2 = fls(num_pages) + PAGE_SHIFT - 1; - /* gracefully handle num_pages being zero */ - if (0 == num_pages) { - region |= KBASE_LOCK_REGION_MIN_SIZE; - } else { - u8 region_width; + /* Round up if the number of pages is not a power of 2. */ + if (num_pages != ((u32)1 << (lockaddr_size_log2 - PAGE_SHIFT))) + lockaddr_size_log2 += 1; - region_width = 10 + fls(num_pages); - if (num_pages != (1ul << (region_width - 11))) { - /* not pow2, so must go up to the next pow2 */ - region_width += 1; - } - region_width = MAX(region_width, KBASE_LOCK_REGION_MIN_SIZE); + /* Round up if some memory pages spill into the next region. */ + region_frame_number_start = pfn >> (lockaddr_size_log2 - PAGE_SHIFT); + region_frame_number_end = + (pfn + num_pages - 1) >> (lockaddr_size_log2 - PAGE_SHIFT); - KBASE_DEBUG_ASSERT(region_width <= KBASE_LOCK_REGION_MAX_SIZE); - region |= region_width; - } + if (region_frame_number_start < region_frame_number_end) + lockaddr_size_log2 += 1; - return region; + /* Represent the size according to the HW specification. */ + lockaddr_size_log2 = MAX(lockaddr_size_log2, + KBASE_LOCK_REGION_MIN_SIZE_LOG2); + + if (lockaddr_size_log2 > KBASE_LOCK_REGION_MAX_SIZE_LOG2) + return -EINVAL; + + /* The lowest bits are cleared and then set to size - 1 to represent + * the size in a way that is compatible with the HW specification. + */ + *lockaddr = lockaddr_base & ~((1ull << lockaddr_size_log2) - 1); + *lockaddr |= lockaddr_size_log2 - 1; + + return 0; } static int wait_ready(struct kbase_device *kbdev, @@ -73,7 +92,8 @@ static int wait_ready(struct kbase_device *kbdev, u32 val = kbase_reg_read(kbdev, MMU_AS_REG(as_nr, AS_STATUS)); /* Wait for the MMU status to indicate there is no active command, in - * case one is pending. Do not log remaining register accesses. */ + * case one is pending. Do not log remaining register accesses. + */ while (--max_loops && (val & AS_STATUS_AS_ACTIVE)) val = kbase_reg_read(kbdev, MMU_AS_REG(as_nr, AS_STATUS)); @@ -101,140 +121,6 @@ static int write_cmd(struct kbase_device *kbdev, int as_nr, u32 cmd) return status; } -static void validate_protected_page_fault(struct kbase_device *kbdev) -{ - /* GPUs which support (native) protected mode shall not report page - * fault addresses unless it has protected debug mode and protected - * debug mode is turned on */ - u32 protected_debug_mode = 0; - - if (!kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_PROTECTED_MODE)) - return; - - if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_PROTECTED_DEBUG_MODE)) { - protected_debug_mode = kbase_reg_read(kbdev, - GPU_CONTROL_REG(GPU_STATUS)) & GPU_DBGEN; - } - - if (!protected_debug_mode) { - /* fault_addr should never be reported in protected mode. - * However, we just continue by printing an error message */ - dev_err(kbdev->dev, "Fault address reported in protected mode\n"); - } -} - -void kbase_mmu_interrupt(struct kbase_device *kbdev, u32 irq_stat) -{ - const int num_as = 16; - const int busfault_shift = MMU_PAGE_FAULT_FLAGS; - const int pf_shift = 0; - const unsigned long as_bit_mask = (1UL << num_as) - 1; - unsigned long flags; - u32 new_mask; - u32 tmp; - - /* bus faults */ - u32 bf_bits = (irq_stat >> busfault_shift) & as_bit_mask; - /* page faults (note: Ignore ASes with both pf and bf) */ - u32 pf_bits = ((irq_stat >> pf_shift) & as_bit_mask) & ~bf_bits; - - KBASE_DEBUG_ASSERT(NULL != kbdev); - - /* remember current mask */ - spin_lock_irqsave(&kbdev->mmu_mask_change, flags); - new_mask = kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_MASK)); - /* mask interrupts for now */ - kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_MASK), 0); - spin_unlock_irqrestore(&kbdev->mmu_mask_change, flags); - - while (bf_bits | pf_bits) { - struct kbase_as *as; - int as_no; - struct kbase_context *kctx; - struct kbase_fault *fault; - - /* - * the while logic ensures we have a bit set, no need to check - * for not-found here - */ - as_no = ffs(bf_bits | pf_bits) - 1; - as = &kbdev->as[as_no]; - - /* find the fault type */ - if (bf_bits & (1 << as_no)) - fault = &as->bf_data; - else - fault = &as->pf_data; - - /* - * Refcount the kctx ASAP - it shouldn't disappear anyway, since - * Bus/Page faults _should_ only occur whilst jobs are running, - * and a job causing the Bus/Page fault shouldn't complete until - * the MMU is updated - */ - kctx = kbasep_js_runpool_lookup_ctx(kbdev, as_no); - - /* find faulting address */ - fault->addr = kbase_reg_read(kbdev, MMU_AS_REG(as_no, - AS_FAULTADDRESS_HI)); - fault->addr <<= 32; - fault->addr |= kbase_reg_read(kbdev, MMU_AS_REG(as_no, - AS_FAULTADDRESS_LO)); - /* Mark the fault protected or not */ - fault->protected_mode = kbdev->protected_mode; - - if (kbdev->protected_mode && fault->addr) { - /* check if address reporting is allowed */ - validate_protected_page_fault(kbdev); - } - - /* report the fault to debugfs */ - kbase_as_fault_debugfs_new(kbdev, as_no); - - /* record the fault status */ - fault->status = kbase_reg_read(kbdev, MMU_AS_REG(as_no, - AS_FAULTSTATUS)); - - if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_AARCH64_MMU)) { - fault->extra_addr = kbase_reg_read(kbdev, - MMU_AS_REG(as_no, AS_FAULTEXTRA_HI)); - fault->extra_addr <<= 32; - fault->extra_addr |= kbase_reg_read(kbdev, - MMU_AS_REG(as_no, AS_FAULTEXTRA_LO)); - } - - if (kbase_as_has_bus_fault(as, fault)) { - /* Mark bus fault as handled. - * Note that a bus fault is processed first in case - * where both a bus fault and page fault occur. - */ - bf_bits &= ~(1UL << as_no); - - /* remove the queued BF (and PF) from the mask */ - new_mask &= ~(MMU_BUS_ERROR(as_no) | - MMU_PAGE_FAULT(as_no)); - } else { - /* Mark page fault as handled */ - pf_bits &= ~(1UL << as_no); - - /* remove the queued PF from the mask */ - new_mask &= ~MMU_PAGE_FAULT(as_no); - } - - /* Process the interrupt for this address space */ - spin_lock_irqsave(&kbdev->hwaccess_lock, flags); - kbase_mmu_interrupt_process(kbdev, kctx, as, fault); - spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); - } - - /* reenable interrupts */ - spin_lock_irqsave(&kbdev->mmu_mask_change, flags); - tmp = kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_MASK)); - new_mask |= tmp; - kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_MASK), new_mask); - spin_unlock_irqrestore(&kbdev->mmu_mask_change, flags); -} - void kbase_mmu_hw_configure(struct kbase_device *kbdev, struct kbase_as *as) { struct kbase_mmu_setup *current_setup = &as->current_setup; @@ -243,8 +129,9 @@ void kbase_mmu_hw_configure(struct kbase_device *kbdev, struct kbase_as *as) if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_AARCH64_MMU)) { transcfg = current_setup->transcfg; - /* Set flag AS_TRANSCFG_PTW_MEMATTR_WRITE_BACK */ - /* Clear PTW_MEMATTR bits */ + /* Set flag AS_TRANSCFG_PTW_MEMATTR_WRITE_BACK + * Clear PTW_MEMATTR bits + */ transcfg &= ~AS_TRANSCFG_PTW_MEMATTR_MASK; /* Enable correct PTW_MEMATTR bits */ transcfg |= AS_TRANSCFG_PTW_MEMATTR_WRITE_BACK; @@ -254,8 +141,9 @@ void kbase_mmu_hw_configure(struct kbase_device *kbdev, struct kbase_as *as) transcfg |= AS_TRANSCFG_R_ALLOCATE; if (kbdev->system_coherency == COHERENCY_ACE) { - /* Set flag AS_TRANSCFG_PTW_SH_OS (outer shareable) */ - /* Clear PTW_SH bits */ + /* Set flag AS_TRANSCFG_PTW_SH_OS (outer shareable) + * Clear PTW_SH bits + */ transcfg = (transcfg & ~AS_TRANSCFG_PTW_SH_MASK); /* Enable correct PTW_SH bits */ transcfg = (transcfg | AS_TRANSCFG_PTW_SH_OS); @@ -300,20 +188,26 @@ int kbase_mmu_hw_do_operation(struct kbase_device *kbdev, struct kbase_as *as, /* Unlock doesn't require a lock first */ ret = write_cmd(kbdev, as->number, AS_COMMAND_UNLOCK); } else { - u64 lock_addr = lock_region(kbdev, vpfn, nr); + u64 lock_addr; - /* Lock the region that needs to be updated */ - kbase_reg_write(kbdev, MMU_AS_REG(as->number, AS_LOCKADDR_LO), + ret = lock_region(vpfn, nr, &lock_addr); + + if (!ret) { + /* Lock the region that needs to be updated */ + kbase_reg_write(kbdev, + MMU_AS_REG(as->number, AS_LOCKADDR_LO), lock_addr & 0xFFFFFFFFUL); - kbase_reg_write(kbdev, MMU_AS_REG(as->number, AS_LOCKADDR_HI), + kbase_reg_write(kbdev, + MMU_AS_REG(as->number, AS_LOCKADDR_HI), (lock_addr >> 32) & 0xFFFFFFFFUL); - write_cmd(kbdev, as->number, AS_COMMAND_LOCK); + write_cmd(kbdev, as->number, AS_COMMAND_LOCK); - /* Run the MMU operation */ - write_cmd(kbdev, as->number, op); + /* Run the MMU operation */ + write_cmd(kbdev, as->number, op); - /* Wait for the flush to complete */ - ret = wait_ready(kbdev, as->number); + /* Wait for the flush to complete */ + ret = wait_ready(kbdev, as->number); + } } return ret; @@ -352,8 +246,9 @@ void kbase_mmu_hw_enable_fault(struct kbase_device *kbdev, struct kbase_as *as, unsigned long flags; u32 irq_mask; - /* Enable the page fault IRQ (and bus fault IRQ as well in case one - * occurred) */ + /* Enable the page fault IRQ + * (and bus fault IRQ as well in case one occurred) + */ spin_lock_irqsave(&kbdev->mmu_mask_change, flags); /* diff --git a/drivers/gpu/arm/bifrost/mmu/mali_kbase_mmu_internal.h b/drivers/gpu/arm/bifrost/mmu/mali_kbase_mmu_internal.h new file mode 100644 index 000000000000..28bd341bf082 --- /dev/null +++ b/drivers/gpu/arm/bifrost/mmu/mali_kbase_mmu_internal.h @@ -0,0 +1,63 @@ +/* + * + * (C) COPYRIGHT 2019-2020 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +#ifndef _KBASE_MMU_INTERNAL_H_ +#define _KBASE_MMU_INTERNAL_H_ + +void kbase_mmu_get_as_setup(struct kbase_mmu_table *mmut, + struct kbase_mmu_setup * const setup); + +void kbase_gpu_report_bus_fault_and_kill(struct kbase_context *kctx, + struct kbase_as *as, struct kbase_fault *fault); + +void kbase_mmu_report_fault_and_kill(struct kbase_context *kctx, + struct kbase_as *as, const char *reason_str, + struct kbase_fault *fault); + +/** + * kbase_mmu_interrupt_process - Process a bus or page fault. + * @kbdev The kbase_device the fault happened on + * @kctx The kbase_context for the faulting address space if one was found. + * @as The address space that has the fault + * @fault Data relating to the fault + * + * This function will process a fault on a specific address space + */ +void kbase_mmu_interrupt_process(struct kbase_device *kbdev, + struct kbase_context *kctx, struct kbase_as *as, + struct kbase_fault *fault); + +/** + * kbase_mmu_switch_to_ir() - Switch to incremental rendering if possible + * @kctx The kbase_context for the faulting address space. + * @reg Reference of a growable GPU memory region in the same context. + * Takes ownership of the reference if successful. + * + * Used to switch to incremental rendering if we have nearly run out of + * virtual address space in a growable memory region. + * + * Return 0 if successful, otherwise a negative error code. + */ +int kbase_mmu_switch_to_ir(struct kbase_context *kctx, + struct kbase_va_region *reg); + +#endif /* _KBASE_MMU_INTERNAL_H_ */ diff --git a/drivers/gpu/arm/bifrost/mali_kbase_mmu_mode_aarch64.c b/drivers/gpu/arm/bifrost/mmu/mali_kbase_mmu_mode_aarch64.c similarity index 84% rename from drivers/gpu/arm/bifrost/mali_kbase_mmu_mode_aarch64.c rename to drivers/gpu/arm/bifrost/mmu/mali_kbase_mmu_mode_aarch64.c index 7b9cc0c3162e..02493e9b2621 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_mmu_mode_aarch64.c +++ b/drivers/gpu/arm/bifrost/mmu/mali_kbase_mmu_mode_aarch64.c @@ -20,10 +20,11 @@ * */ - #include "mali_kbase.h" -#include "mali_midg_regmap.h" +#include #include "mali_kbase_defs.h" +#include +#include #define ENTRY_TYPE_MASK 3ULL /* For valid ATEs bit 1 = ((level == 3) ? 1 : 0). @@ -68,30 +69,6 @@ static inline void page_table_entry_set(u64 *pte, u64 phy) #endif } -static void mmu_get_as_setup(struct kbase_mmu_table *mmut, - struct kbase_mmu_setup * const setup) -{ - /* Set up the required caching policies at the correct indices - * in the memattr register. - */ - setup->memattr = - (AS_MEMATTR_IMPL_DEF_CACHE_POLICY << - (AS_MEMATTR_INDEX_IMPL_DEF_CACHE_POLICY * 8)) | - (AS_MEMATTR_FORCE_TO_CACHE_ALL << - (AS_MEMATTR_INDEX_FORCE_TO_CACHE_ALL * 8)) | - (AS_MEMATTR_WRITE_ALLOC << - (AS_MEMATTR_INDEX_WRITE_ALLOC * 8)) | - (AS_MEMATTR_AARCH64_OUTER_IMPL_DEF << - (AS_MEMATTR_INDEX_OUTER_IMPL_DEF * 8)) | - (AS_MEMATTR_AARCH64_OUTER_WA << - (AS_MEMATTR_INDEX_OUTER_WA * 8)) | - (AS_MEMATTR_AARCH64_NON_CACHEABLE << - (AS_MEMATTR_INDEX_NON_CACHEABLE * 8)); - - setup->transtab = (u64)mmut->pgd & AS_TRANSTAB_BASE_MASK; - setup->transcfg = AS_TRANSCFG_ADRMODE_AARCH64_4K; -} - static void mmu_update(struct kbase_device *kbdev, struct kbase_mmu_table *mmut, int as_nr) { @@ -104,7 +81,7 @@ static void mmu_update(struct kbase_device *kbdev, struct kbase_mmu_table *mmut, as = &kbdev->as[as_nr]; current_setup = &as->current_setup; - mmu_get_as_setup(mmut, current_setup); + kbase_mmu_get_as_setup(mmut, current_setup); /* Apply the address space setting */ kbase_mmu_hw_configure(kbdev, as); @@ -206,7 +183,7 @@ static void entry_invalidate(u64 *entry) static struct kbase_mmu_mode const aarch64_mode = { .update = mmu_update, - .get_as_setup = mmu_get_as_setup, + .get_as_setup = kbase_mmu_get_as_setup, .disable_as = mmu_disable_as, .pte_to_phy_addr = pte_to_phy_addr, .ate_is_valid = ate_is_valid, diff --git a/drivers/gpu/arm/bifrost/mali_kbase_mmu_mode_lpae.c b/drivers/gpu/arm/bifrost/mmu/mali_kbase_mmu_mode_lpae.c similarity index 98% rename from drivers/gpu/arm/bifrost/mali_kbase_mmu_mode_lpae.c rename to drivers/gpu/arm/bifrost/mmu/mali_kbase_mmu_mode_lpae.c index 7ec90cf2d8cd..91a2d7ac4dcb 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_mmu_mode_lpae.c +++ b/drivers/gpu/arm/bifrost/mmu/mali_kbase_mmu_mode_lpae.c @@ -22,7 +22,7 @@ #include "mali_kbase.h" -#include "mali_midg_regmap.h" +#include #include "mali_kbase_defs.h" #define ENTRY_TYPE_MASK 3ULL @@ -70,7 +70,8 @@ static void mmu_get_as_setup(struct kbase_mmu_table *mmut, struct kbase_mmu_setup * const setup) { /* Set up the required caching policies at the correct indices - * in the memattr register. */ + * in the memattr register. + */ setup->memattr = (AS_MEMATTR_LPAE_IMPL_DEF_CACHE_POLICY << (AS_MEMATTR_INDEX_IMPL_DEF_CACHE_POLICY * 8)) | diff --git a/drivers/gpu/arm/bifrost/platform/meson/mali_kbase_runtime_pm.c b/drivers/gpu/arm/bifrost/platform/meson/mali_kbase_runtime_pm.c deleted file mode 100644 index 5b7378db8687..000000000000 --- a/drivers/gpu/arm/bifrost/platform/meson/mali_kbase_runtime_pm.c +++ /dev/null @@ -1,257 +0,0 @@ -/* - * - * (C) COPYRIGHT 2015, 2017-2019 ARM Limited. All rights reserved. - * - * This program is free software and is provided to you under the terms of the - * GNU General Public License version 2 as published by the Free Software - * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, you can access it online at - * http://www.gnu.org/licenses/gpl-2.0.html. - * - * SPDX-License-Identifier: GPL-2.0 - * - */ - -#include -#include -#include - -#include -#include -#include -#include -#include -#include - -#include "mali_kbase_config_platform.h" - - -static struct reset_control **resets; -static int nr_resets; - -static int resets_init(struct kbase_device *kbdev) -{ - struct device_node *np; - int i; - int err = 0; - - np = kbdev->dev->of_node; - - nr_resets = of_count_phandle_with_args(np, "resets", "#reset-cells"); - if (nr_resets <= 0) { - dev_err(kbdev->dev, "Failed to get GPU resets from dtb\n"); - return nr_resets; - } - - resets = devm_kcalloc(kbdev->dev, nr_resets, sizeof(*resets), - GFP_KERNEL); - if (!resets) - return -ENOMEM; - - for (i = 0; i < nr_resets; ++i) { - resets[i] = devm_reset_control_get_exclusive_by_index( - kbdev->dev, i); - if (IS_ERR(resets[i])) { - err = PTR_ERR(resets[i]); - nr_resets = i; - break; - } - } - - return err; -} - -static int pm_callback_soft_reset(struct kbase_device *kbdev) -{ - int ret, i; - - if (!resets) { - ret = resets_init(kbdev); - if (ret) - return ret; - } - - for (i = 0; i < nr_resets; ++i) - reset_control_assert(resets[i]); - - udelay(10); - - for (i = 0; i < nr_resets; ++i) - reset_control_deassert(resets[i]); - - udelay(10); - - /* Override Power Management Settings, values from Amlogic KBase */ - kbase_reg_write(kbdev, GPU_CONTROL_REG(PWR_KEY), 0x2968A819); - kbase_reg_write(kbdev, GPU_CONTROL_REG(PWR_OVERRIDE1), - 0xfff | (0x20 << 16)); - - /* - * RESET_COMPLETED interrupt will be raised, so continue with - * the normal soft reset procedure - */ - return 0; -} - -static void enable_gpu_power_control(struct kbase_device *kbdev) -{ - unsigned int i; - -#if defined(CONFIG_REGULATOR) - for (i = 0; i < kbdev->nr_regulators; i++) { - if (WARN_ON_ONCE(kbdev->regulators[i] == NULL)) - ; - else if (!regulator_is_enabled(kbdev->regulators[i])) - WARN_ON(regulator_enable(kbdev->regulators[i])); - } -#endif - - for (i = 0; i < kbdev->nr_clocks; i++) { - if (WARN_ON_ONCE(kbdev->clocks[i] == NULL)) - ; - else if (!__clk_is_enabled(kbdev->clocks[i])) - WARN_ON(clk_prepare_enable(kbdev->clocks[i])); - } -} - -static void disable_gpu_power_control(struct kbase_device *kbdev) -{ - unsigned int i; - - for (i = 0; i < kbdev->nr_clocks; i++) { - if (WARN_ON_ONCE(kbdev->clocks[i] == NULL)) - ; - else if (__clk_is_enabled(kbdev->clocks[i])) { - clk_disable_unprepare(kbdev->clocks[i]); - WARN_ON(__clk_is_enabled(kbdev->clocks[i])); - } - - } - -#if defined(CONFIG_REGULATOR) - for (i = 0; i < kbdev->nr_regulators; i++) { - if (WARN_ON_ONCE(kbdev->regulators[i] == NULL)) - ; - else if (regulator_is_enabled(kbdev->regulators[i])) - WARN_ON(regulator_disable(kbdev->regulators[i])); - } -#endif -} - -static int pm_callback_power_on(struct kbase_device *kbdev) -{ - int ret = 1; /* Assume GPU has been powered off */ - int error; - - dev_dbg(kbdev->dev, "%s %p\n", __func__, - (void *)kbdev->dev->pm_domain); - - enable_gpu_power_control(kbdev); - - error = pm_runtime_get_sync(kbdev->dev); - if (error == 1) { - /* - * Let core know that the chip has not been - * powered off, so we can save on re-initialization. - */ - ret = 0; - } - - dev_dbg(kbdev->dev, "pm_runtime_get_sync returned %d\n", error); - - return ret; -} - -static void pm_callback_power_off(struct kbase_device *kbdev) -{ - dev_dbg(kbdev->dev, "%s\n", __func__); - - pm_runtime_mark_last_busy(kbdev->dev); - pm_runtime_put_autosuspend(kbdev->dev); - -#ifndef KBASE_PM_RUNTIME - disable_gpu_power_control(kbdev); -#endif -} - -#ifdef KBASE_PM_RUNTIME -static int kbase_device_runtime_init(struct kbase_device *kbdev) -{ - int ret = 0; - - dev_dbg(kbdev->dev, "%s\n", __func__); - - pm_runtime_set_autosuspend_delay(kbdev->dev, AUTO_SUSPEND_DELAY); - pm_runtime_use_autosuspend(kbdev->dev); - - pm_runtime_set_active(kbdev->dev); - pm_runtime_enable(kbdev->dev); - - if (!pm_runtime_enabled(kbdev->dev)) { - dev_warn(kbdev->dev, "pm_runtime not enabled"); - ret = -ENOENT; - } - - return ret; -} - -static void kbase_device_runtime_disable(struct kbase_device *kbdev) -{ - dev_dbg(kbdev->dev, "%s\n", __func__); - pm_runtime_disable(kbdev->dev); -} -#endif - -static int pm_callback_runtime_on(struct kbase_device *kbdev) -{ - dev_dbg(kbdev->dev, "%s\n", __func__); - - enable_gpu_power_control(kbdev); - return 0; -} - -static void pm_callback_runtime_off(struct kbase_device *kbdev) -{ - dev_dbg(kbdev->dev, "%s\n", __func__); - - disable_gpu_power_control(kbdev); -} - -static void pm_callback_resume(struct kbase_device *kbdev) -{ - int ret = pm_callback_runtime_on(kbdev); - - WARN_ON(ret); -} - -static void pm_callback_suspend(struct kbase_device *kbdev) -{ - pm_callback_runtime_off(kbdev); -} - -struct kbase_pm_callback_conf pm_callbacks = { - .power_on_callback = pm_callback_power_on, - .power_off_callback = pm_callback_power_off, - .power_suspend_callback = pm_callback_suspend, - .power_resume_callback = pm_callback_resume, - .soft_reset_callback = pm_callback_soft_reset, -#ifdef KBASE_PM_RUNTIME - .power_runtime_init_callback = kbase_device_runtime_init, - .power_runtime_term_callback = kbase_device_runtime_disable, - .power_runtime_on_callback = pm_callback_runtime_on, - .power_runtime_off_callback = pm_callback_runtime_off, -#else /* KBASE_PM_RUNTIME */ - .power_runtime_init_callback = NULL, - .power_runtime_term_callback = NULL, - .power_runtime_on_callback = NULL, - .power_runtime_off_callback = NULL, -#endif /* KBASE_PM_RUNTIME */ -}; diff --git a/drivers/gpu/arm/bifrost/tests/Mconfig b/drivers/gpu/arm/bifrost/tests/Mconfig index f67ac48d7569..bba96b3d9e48 100644 --- a/drivers/gpu/arm/bifrost/tests/Mconfig +++ b/drivers/gpu/arm/bifrost/tests/Mconfig @@ -1,5 +1,5 @@ # -# (C) COPYRIGHT 2018 ARM Limited. All rights reserved. +# (C) COPYRIGHT 2018-2020 ARM Limited. All rights reserved. # # This program is free software and is provided to you under the terms of the # GNU General Public License version 2 as published by the Free Software @@ -30,3 +30,9 @@ config BUILD_CSF_TESTS bool default y if UNIT_TEST_KERNEL_MODULES && GPU_HAS_CSF default n + +config BUILD_ARBIF_TESTS + bool + default y if UNIT_TEST_KERNEL_MODULES && MALI_ARBITER_SUPPORT + default n + diff --git a/drivers/gpu/arm/bifrost/tests/kutf/build.bp b/drivers/gpu/arm/bifrost/tests/kutf/build.bp index f0c7a0ca4950..32eab143e669 100644 --- a/drivers/gpu/arm/bifrost/tests/kutf/build.bp +++ b/drivers/gpu/arm/bifrost/tests/kutf/build.bp @@ -1,13 +1,16 @@ /* - * Copyright: - * ---------------------------------------------------------------------------- - * This confidential and proprietary software may be used only as authorized - * by a licensing agreement from ARM Limited. - * (C) COPYRIGHT 2018-2019 ARM Limited, ALL RIGHTS RESERVED - * The entire notice above must be reproduced on all authorized copies and - * copies may only be made to the extent permitted by a licensing agreement - * from ARM Limited. - * ---------------------------------------------------------------------------- + * + * (C) COPYRIGHT 2018-2020 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * */ bob_kernel_module { diff --git a/drivers/gpu/arm/bifrost/tests/kutf/kutf_suite.c b/drivers/gpu/arm/bifrost/tests/kutf/kutf_suite.c index 3307c0e07957..3f15669a2a0a 100644 --- a/drivers/gpu/arm/bifrost/tests/kutf/kutf_suite.c +++ b/drivers/gpu/arm/bifrost/tests/kutf/kutf_suite.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2014, 2017-2019 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014, 2017-2020 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -634,6 +634,17 @@ static void kutf_remove_test_variant(struct kutf_test_fixture *test_fix) kfree(test_fix); } +#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 5, 0) +/* Adapting to the upstream debugfs_create_x32() change */ +static int ktufp_u32_get(void *data, u64 *val) +{ + *val = *(u32 *)data; + return 0; +} + +DEFINE_DEBUGFS_ATTRIBUTE(kutfp_fops_x32_ro, ktufp_u32_get, NULL, "0x%08llx\n"); +#endif + void kutf_add_test_with_filters_and_data( struct kutf_suite *suite, unsigned int id, @@ -668,8 +679,13 @@ void kutf_add_test_with_filters_and_data( } test_func->filters = filters; +#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 5, 0) + tmp = debugfs_create_file_unsafe("filters", S_IROTH, test_func->dir, + &test_func->filters, &kutfp_fops_x32_ro); +#else tmp = debugfs_create_x32("filters", S_IROTH, test_func->dir, &test_func->filters); +#endif if (!tmp) { pr_err("Failed to create debugfs file \"filters\" when adding test %s\n", name); goto fail_file; diff --git a/drivers/gpu/arm/bifrost/tests/mali_kutf_irq_test/Makefile b/drivers/gpu/arm/bifrost/tests/mali_kutf_irq_test/Makefile index 9218a40f8069..bc4d654a90ca 100644 --- a/drivers/gpu/arm/bifrost/tests/mali_kutf_irq_test/Makefile +++ b/drivers/gpu/arm/bifrost/tests/mali_kutf_irq_test/Makefile @@ -1,5 +1,5 @@ # -# (C) COPYRIGHT 2015, 2017-2018 ARM Limited. All rights reserved. +# (C) COPYRIGHT 2015, 2017-2018, 2020 ARM Limited. All rights reserved. # # This program is free software and is provided to you under the terms of the # GNU General Public License version 2 as published by the Free Software @@ -38,6 +38,8 @@ TEST_CCFLAGS := \ -I$(CURDIR)/../../../ \ -I$(CURDIR)/../../ \ -I$(CURDIR)/../../backend/gpu \ + -I$(CURDIR)/../../debug \ + -I$(CURDIR)/../../debug/backend \ -I$(CURDIR)/ \ -I$(srctree)/drivers/staging/android \ -I$(srctree)/include/linux diff --git a/drivers/gpu/arm/bifrost/tests/mali_kutf_irq_test/build.bp b/drivers/gpu/arm/bifrost/tests/mali_kutf_irq_test/build.bp index 971f09213fa7..90efdcf9ad9c 100644 --- a/drivers/gpu/arm/bifrost/tests/mali_kutf_irq_test/build.bp +++ b/drivers/gpu/arm/bifrost/tests/mali_kutf_irq_test/build.bp @@ -1,13 +1,16 @@ /* - * Copyright: - * ---------------------------------------------------------------------------- - * This confidential and proprietary software may be used only as authorized - * by a licensing agreement from ARM Limited. - * (C) COPYRIGHT 2018-2019 ARM Limited, ALL RIGHTS RESERVED - * The entire notice above must be reproduced on all authorized copies and - * copies may only be made to the extent permitted by a licensing agreement - * from ARM Limited. - * ---------------------------------------------------------------------------- + * + * (C) COPYRIGHT 2018-2020 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * */ bob_kernel_module { diff --git a/drivers/gpu/arm/bifrost/tests/mali_kutf_irq_test/mali_kutf_irq_test_main.c b/drivers/gpu/arm/bifrost/tests/mali_kutf_irq_test/mali_kutf_irq_test_main.c index 4181b7f92db6..26b442a82fdd 100644 --- a/drivers/gpu/arm/bifrost/tests/mali_kutf_irq_test/mali_kutf_irq_test_main.c +++ b/drivers/gpu/arm/bifrost/tests/mali_kutf_irq_test/mali_kutf_irq_test_main.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2016-2018 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2016-2020 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -26,6 +26,7 @@ #include "mali_kbase.h" #include +#include #include #include @@ -55,11 +56,12 @@ struct kutf_irq_fixture_data { /* ID for the GPU IRQ */ #define GPU_IRQ_HANDLER 2 -#define NR_TEST_IRQS 1000000 +#define NR_TEST_IRQS ((u32)1000000) -/* IRQ for the test to trigger. Currently MULTIPLE_GPU_FAULTS as we would not - * expect to see this in normal use (e.g., when Android is running). */ -#define TEST_IRQ MULTIPLE_GPU_FAULTS +/* IRQ for the test to trigger. Currently POWER_CHANGED_SINGLE as it is + * otherwise unused in the DDK + */ +#define TEST_IRQ POWER_CHANGED_SINGLE #define IRQ_TIMEOUT HZ @@ -67,7 +69,7 @@ struct kutf_irq_fixture_data { extern int kbase_set_custom_irq_handler(struct kbase_device *kbdev, irq_handler_t custom_handler, int irq_type); -extern irqreturn_t kbase_gpu_irq_handler(int irq, void *data); +extern irqreturn_t kbase_gpu_irq_test_handler(int irq, void *data, u32 val); static DECLARE_WAIT_QUEUE_HEAD(wait); static bool triggered; @@ -88,25 +90,30 @@ static void *kbase_untag(void *ptr) static irqreturn_t kbase_gpu_irq_custom_handler(int irq, void *data) { struct kbase_device *kbdev = kbase_untag(data); - u32 val; + u32 val = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_STATUS)); + irqreturn_t result; + u64 tval; + bool has_test_irq = val & TEST_IRQ; - val = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_STATUS)); - if (val & TEST_IRQ) { - struct timespec tval; - - getnstimeofday(&tval); - irq_time = SEC_TO_NANO(tval.tv_sec) + (tval.tv_nsec); - - kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_CLEAR), val); - - triggered = true; - wake_up(&wait); - - return IRQ_HANDLED; + if (has_test_irq) { + tval = ktime_get_real_ns(); + /* Clear the test source only here */ + kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_CLEAR), + TEST_IRQ); + /* Remove the test IRQ status bit */ + val = val ^ TEST_IRQ; } - /* Trigger main irq handler */ - return kbase_gpu_irq_handler(irq, data); + result = kbase_gpu_irq_test_handler(irq, data, val); + + if (has_test_irq) { + irq_time = tval; + triggered = true; + wake_up(&wait); + result = IRQ_HANDLED; + } + + return result; } /** @@ -173,33 +180,28 @@ static void mali_kutf_irq_latency(struct kutf_context *context) struct kutf_irq_fixture_data *data = context->fixture; struct kbase_device *kbdev = data->kbdev; u64 min_time = U64_MAX, max_time = 0, average_time = 0; - int i; - bool test_failed = false; + u32 i; + const char *results; /* Force GPU to be powered */ kbase_pm_context_active(kbdev); + kbase_pm_wait_for_desired_state(kbdev); kbase_set_custom_irq_handler(kbdev, kbase_gpu_irq_custom_handler, GPU_IRQ_HANDLER); - for (i = 0; i < NR_TEST_IRQS; i++) { - struct timespec tval; - u64 start_time; - int ret; + for (i = 1; i <= NR_TEST_IRQS; i++) { + u64 start_time = ktime_get_real_ns(); triggered = false; - getnstimeofday(&tval); - start_time = SEC_TO_NANO(tval.tv_sec) + (tval.tv_nsec); /* Trigger fake IRQ */ kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_RAWSTAT), TEST_IRQ); - ret = wait_event_timeout(wait, triggered != false, IRQ_TIMEOUT); - - if (ret == 0) { - kutf_test_fail(context, "Timed out waiting for IRQ\n"); - test_failed = true; + if (wait_event_timeout(wait, triggered, IRQ_TIMEOUT) == 0) { + /* Wait extra time to see if it would come */ + wait_event_timeout(wait, triggered, 10 * IRQ_TIMEOUT); break; } @@ -217,14 +219,17 @@ static void mali_kutf_irq_latency(struct kutf_context *context) kbase_pm_context_idle(kbdev); - if (!test_failed) { - const char *results; - + if (i > NR_TEST_IRQS) { do_div(average_time, NR_TEST_IRQS); results = kutf_dsprintf(&context->fixture_pool, "Min latency = %lldns, Max latency = %lldns, Average latency = %lldns\n", min_time, max_time, average_time); kutf_test_pass(context, results); + } else { + results = kutf_dsprintf(&context->fixture_pool, + "Timed out for the %u-th IRQ (loop_limit: %u), triggered late: %d\n", + i, NR_TEST_IRQS, triggered); + kutf_test_fail(context, results); } } diff --git a/drivers/gpu/arm/bifrost/tl/backend/mali_kbase_timeline_jm.c b/drivers/gpu/arm/bifrost/tl/backend/mali_kbase_timeline_jm.c new file mode 100644 index 000000000000..c368ac7288da --- /dev/null +++ b/drivers/gpu/arm/bifrost/tl/backend/mali_kbase_timeline_jm.c @@ -0,0 +1,97 @@ +/* + * + * (C) COPYRIGHT 2019 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +#include "../mali_kbase_tracepoints.h" +#include "../mali_kbase_timeline.h" +#include "../mali_kbase_timeline_priv.h" + +#include + +void kbase_create_timeline_objects(struct kbase_device *kbdev) +{ + unsigned int lpu_id; + unsigned int as_nr; + struct kbase_context *kctx; + struct kbase_timeline *timeline = kbdev->timeline; + struct kbase_tlstream *summary = + &timeline->streams[TL_STREAM_TYPE_OBJ_SUMMARY]; + + /* Summarize the LPU objects. */ + for (lpu_id = 0; lpu_id < kbdev->gpu_props.num_job_slots; lpu_id++) { + u32 *lpu = + &kbdev->gpu_props.props.raw_props.js_features[lpu_id]; + __kbase_tlstream_tl_new_lpu(summary, lpu, lpu_id, *lpu); + } + + /* Summarize the Address Space objects. */ + for (as_nr = 0; as_nr < kbdev->nr_hw_address_spaces; as_nr++) + __kbase_tlstream_tl_new_as(summary, &kbdev->as[as_nr], as_nr); + + /* Create GPU object and make it retain all LPUs and address spaces. */ + __kbase_tlstream_tl_new_gpu(summary, + kbdev, + kbdev->gpu_props.props.raw_props.gpu_id, + kbdev->gpu_props.num_cores); + + for (lpu_id = 0; lpu_id < kbdev->gpu_props.num_job_slots; lpu_id++) { + void *lpu = + &kbdev->gpu_props.props.raw_props.js_features[lpu_id]; + __kbase_tlstream_tl_lifelink_lpu_gpu(summary, lpu, kbdev); + } + + for (as_nr = 0; as_nr < kbdev->nr_hw_address_spaces; as_nr++) + __kbase_tlstream_tl_lifelink_as_gpu(summary, + &kbdev->as[as_nr], + kbdev); + + /* Lock the context list, to ensure no changes to the list are made + * while we're summarizing the contexts and their contents. + */ + mutex_lock(&kbdev->kctx_list_lock); + + /* For each context in the device... */ + list_for_each_entry(kctx, &kbdev->kctx_list, kctx_list_link) { + /* Summarize the context itself */ + __kbase_tlstream_tl_new_ctx(summary, + kctx, + kctx->id, + (u32)(kctx->tgid)); + }; + + /* Reset body stream buffers while holding the kctx lock. + * This ensures we can't fire both summary and normal tracepoints for + * the same objects. + * If we weren't holding the lock, it's possible that the summarized + * objects could have been created, destroyed, or used after we + * constructed the summary stream tracepoints, but before we reset + * the body stream, resulting in losing those object event tracepoints. + */ + kbase_timeline_streams_body_reset(timeline); + + mutex_unlock(&kbdev->kctx_list_lock); + + /* Static object are placed into summary packet that needs to be + * transmitted first. Flush all streams to make it available to + * user space. + */ + kbase_timeline_streams_flush(timeline); +} \ No newline at end of file diff --git a/drivers/gpu/arm/bifrost/mali_kbase_timeline.c b/drivers/gpu/arm/bifrost/tl/mali_kbase_timeline.c similarity index 72% rename from drivers/gpu/arm/bifrost/mali_kbase_timeline.c rename to drivers/gpu/arm/bifrost/tl/mali_kbase_timeline.c index 4cd1a5ca8670..8c465a4f4425 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_timeline.c +++ b/drivers/gpu/arm/bifrost/tl/mali_kbase_timeline.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2015-2019 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2015-2020 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -105,12 +105,12 @@ static void kbasep_timeline_autoflush_timer_callback(struct timer_list *timer) /*****************************************************************************/ int kbase_timeline_init(struct kbase_timeline **timeline, - atomic_t *timeline_is_enabled) + atomic_t *timeline_flags) { enum tl_stream_type i; struct kbase_timeline *result; - if (!timeline || !timeline_is_enabled) + if (!timeline || !timeline_flags) return -EINVAL; result = kzalloc(sizeof(*result), GFP_KERNEL); @@ -129,7 +129,8 @@ int kbase_timeline_init(struct kbase_timeline **timeline, atomic_set(&result->autoflush_timer_active, 0); kbase_timer_setup(&result->autoflush_timer, kbasep_timeline_autoflush_timer_callback); - result->is_enabled = timeline_is_enabled; + result->timeline_flags = timeline_flags; + *timeline = result; return 0; @@ -142,82 +143,13 @@ void kbase_timeline_term(struct kbase_timeline *timeline) if (!timeline) return; + for (i = (enum tl_stream_type)0; i < TL_STREAM_TYPE_COUNT; i++) kbase_tlstream_term(&timeline->streams[i]); kfree(timeline); } -static void kbase_create_timeline_objects(struct kbase_device *kbdev) -{ - unsigned int lpu_id; - unsigned int as_nr; - struct kbase_context *kctx; - struct kbase_timeline *timeline = kbdev->timeline; - struct kbase_tlstream *summary = - &timeline->streams[TL_STREAM_TYPE_OBJ_SUMMARY]; - - /* Summarize the LPU objects. */ - for (lpu_id = 0; lpu_id < kbdev->gpu_props.num_job_slots; lpu_id++) { - u32 *lpu = - &kbdev->gpu_props.props.raw_props.js_features[lpu_id]; - __kbase_tlstream_tl_new_lpu(summary, lpu, lpu_id, *lpu); - } - - /* Summarize the Address Space objects. */ - for (as_nr = 0; as_nr < kbdev->nr_hw_address_spaces; as_nr++) - __kbase_tlstream_tl_new_as(summary, &kbdev->as[as_nr], as_nr); - - /* Create GPU object and make it retain all LPUs and address spaces. */ - __kbase_tlstream_tl_new_gpu(summary, - kbdev, - kbdev->gpu_props.props.raw_props.gpu_id, - kbdev->gpu_props.num_cores); - - for (lpu_id = 0; lpu_id < kbdev->gpu_props.num_job_slots; lpu_id++) { - void *lpu = - &kbdev->gpu_props.props.raw_props.js_features[lpu_id]; - __kbase_tlstream_tl_lifelink_lpu_gpu(summary, lpu, kbdev); - } - - for (as_nr = 0; as_nr < kbdev->nr_hw_address_spaces; as_nr++) - __kbase_tlstream_tl_lifelink_as_gpu(summary, - &kbdev->as[as_nr], - kbdev); - - /* Lock the context list, to ensure no changes to the list are made - * while we're summarizing the contexts and their contents. - */ - mutex_lock(&kbdev->kctx_list_lock); - - /* For each context in the device... */ - list_for_each_entry(kctx, &kbdev->kctx_list, kctx_list_link) { - /* Summarize the context itself */ - __kbase_tlstream_tl_new_ctx(summary, - kctx, - kctx->id, - (u32)(kctx->tgid)); - }; - - /* Reset body stream buffers while holding the kctx lock. - * This ensures we can't fire both summary and normal tracepoints for - * the same objects. - * If we weren't holding the lock, it's possible that the summarized - * objects could have been created, destroyed, or used after we - * constructed the summary stream tracepoints, but before we reset - * the body stream, resulting in losing those object event tracepoints. - */ - kbase_timeline_streams_body_reset(timeline); - - mutex_unlock(&kbdev->kctx_list_lock); - - /* Static object are placed into summary packet that needs to be - * transmitted first. Flush all streams to make it available to - * user space. - */ - kbase_timeline_streams_flush(timeline); -} - #ifdef CONFIG_MALI_BIFROST_DEVFREQ static void kbase_tlstream_current_devfreq_target(struct kbase_device *kbdev) { @@ -244,10 +176,10 @@ static void kbase_tlstream_current_devfreq_target(struct kbase_device *kbdev) int kbase_timeline_io_acquire(struct kbase_device *kbdev, u32 flags) { int ret; - u32 tlstream_enabled = TLSTREAM_ENABLED | flags; + u32 timeline_flags = TLSTREAM_ENABLED | flags; struct kbase_timeline *timeline = kbdev->timeline; - if (!atomic_cmpxchg(timeline->is_enabled, 0, tlstream_enabled)) { + if (!atomic_cmpxchg(timeline->timeline_flags, 0, timeline_flags)) { int rcode; ret = anon_inode_getfd( @@ -256,7 +188,7 @@ int kbase_timeline_io_acquire(struct kbase_device *kbdev, u32 flags) timeline, O_RDONLY | O_CLOEXEC); if (ret < 0) { - atomic_set(timeline->is_enabled, 0); + atomic_set(timeline->timeline_flags, 0); return ret; } diff --git a/drivers/gpu/arm/bifrost/mali_kbase_timeline.h b/drivers/gpu/arm/bifrost/tl/mali_kbase_timeline.h similarity index 90% rename from drivers/gpu/arm/bifrost/mali_kbase_timeline.h rename to drivers/gpu/arm/bifrost/tl/mali_kbase_timeline.h index d80028802ebb..cd48411b45cf 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_timeline.h +++ b/drivers/gpu/arm/bifrost/tl/mali_kbase_timeline.h @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2015-2019 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2015-2020 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -31,15 +31,15 @@ struct kbase_timeline; /** * kbase_timeline_init - initialize timeline infrastructure in kernel - * @timeline: Newly created instance of kbase_timeline will - * be stored in this pointer. - * @timeline_is_enabled: Timeline status will be written to this variable - * when a client is attached/detached. The variable - * must be valid while timeline instance is valid. + * @timeline: Newly created instance of kbase_timeline will be stored in + * this pointer. + * @timeline_flags: Timeline status will be written to this variable when a + * client is attached/detached. The variable must be valid + * while timeline instance is valid. * Return: zero on success, negative number on error */ int kbase_timeline_init(struct kbase_timeline **timeline, - atomic_t *timeline_is_enabled); + atomic_t *timeline_flags); /** * kbase_timeline_term - terminate timeline infrastructure in kernel diff --git a/drivers/gpu/arm/bifrost/mali_kbase_timeline_io.c b/drivers/gpu/arm/bifrost/tl/mali_kbase_timeline_io.c similarity index 82% rename from drivers/gpu/arm/bifrost/mali_kbase_timeline_io.c rename to drivers/gpu/arm/bifrost/tl/mali_kbase_timeline_io.c index ffcf84a12596..cdde928bbab9 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_timeline_io.c +++ b/drivers/gpu/arm/bifrost/tl/mali_kbase_timeline_io.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2019 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2019-2020 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -20,9 +20,9 @@ * */ -#include -#include -#include +#include "mali_kbase_timeline_priv.h" +#include "mali_kbase_tlstream.h" +#include "mali_kbase_tracepoints.h" #include @@ -84,6 +84,43 @@ static int kbasep_timeline_io_packet_pending( return 0; } +/** + * copy_stream_header() - copy timeline stream header. + * + * @buffer: Pointer to the buffer provided by user. + * @size: Maximum amount of data that can be stored in the buffer. + * @copy_len: Pointer to amount of bytes that has been copied already + * within the read system call. + * @hdr: Pointer to the stream header. + * @hdr_size: Header size. + * @hdr_btc: Pointer to the remaining number of bytes to copy. + * + * Returns: 0 if success, -1 otherwise. + */ +static inline int copy_stream_header( + char __user *buffer, size_t size, ssize_t *copy_len, + const char *hdr, + size_t hdr_size, + size_t *hdr_btc) +{ + const size_t offset = hdr_size - *hdr_btc; + const size_t copy_size = MIN(size - *copy_len, *hdr_btc); + + if (!*hdr_btc) + return 0; + + if (WARN_ON(*hdr_btc > hdr_size)) + return -1; + + if (copy_to_user(&buffer[*copy_len], &hdr[offset], copy_size)) + return -1; + + *hdr_btc -= copy_size; + *copy_len += copy_size; + + return 0; +} + /** * kbasep_timeline_copy_header - copy timeline headers to the user * @timeline: Timeline instance @@ -93,51 +130,28 @@ static int kbasep_timeline_io_packet_pending( * within the read system call. * * This helper function checks if timeline headers have not been sent - * to the user, and if so, sends them. @ref copy_len is respectively + * to the user, and if so, sends them. copy_len is respectively * updated. * * Returns: 0 if success, -1 if copy_to_user has failed. */ -static inline int kbasep_timeline_copy_header( +static inline int kbasep_timeline_copy_headers( struct kbase_timeline *timeline, char __user *buffer, size_t size, ssize_t *copy_len) { - if (timeline->obj_header_btc) { - size_t offset = obj_desc_header_size - - timeline->obj_header_btc; + if (copy_stream_header(buffer, size, copy_len, + obj_desc_header, + obj_desc_header_size, + &timeline->obj_header_btc)) + return -1; - size_t header_cp_size = MIN( - size - *copy_len, - timeline->obj_header_btc); - - if (copy_to_user( - &buffer[*copy_len], - &obj_desc_header[offset], - header_cp_size)) - return -1; - - timeline->obj_header_btc -= header_cp_size; - *copy_len += header_cp_size; - } - - if (timeline->aux_header_btc) { - size_t offset = aux_desc_header_size - - timeline->aux_header_btc; - size_t header_cp_size = MIN( - size - *copy_len, - timeline->aux_header_btc); - - if (copy_to_user( - &buffer[*copy_len], - &aux_desc_header[offset], - header_cp_size)) - return -1; - - timeline->aux_header_btc -= header_cp_size; - *copy_len += header_cp_size; - } + if (copy_stream_header(buffer, size, copy_len, + aux_desc_header, + aux_desc_header_size, + &timeline->aux_header_btc)) + return -1; return 0; } @@ -183,7 +197,7 @@ static ssize_t kbasep_timeline_io_read( unsigned int rb_idx; size_t rb_size; - if (kbasep_timeline_copy_header( + if (kbasep_timeline_copy_headers( timeline, buffer, size, ©_len)) { copy_len = -EFAULT; break; @@ -305,10 +319,11 @@ static int kbasep_timeline_io_release(struct inode *inode, struct file *filp) timeline = (struct kbase_timeline *) filp->private_data; + /* Stop autoflush timer before releasing access to streams. */ atomic_set(&timeline->autoflush_timer_active, 0); del_timer_sync(&timeline->autoflush_timer); - atomic_set(timeline->is_enabled, 0); + atomic_set(timeline->timeline_flags, 0); return 0; } diff --git a/drivers/gpu/arm/bifrost/mali_kbase_timeline_priv.h b/drivers/gpu/arm/bifrost/tl/mali_kbase_timeline_priv.h similarity index 89% rename from drivers/gpu/arm/bifrost/mali_kbase_timeline_priv.h rename to drivers/gpu/arm/bifrost/tl/mali_kbase_timeline_priv.h index e4a4a2014260..3596584d85c6 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_timeline_priv.h +++ b/drivers/gpu/arm/bifrost/tl/mali_kbase_timeline_priv.h @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2019 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2019-2020 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -24,7 +24,8 @@ #define _KBASE_TIMELINE_PRIV_H #include -#include +#include "mali_kbase_tlstream.h" + #include #include @@ -39,7 +40,7 @@ * have access to the timeline streams at any given time. * @event_queue: Timeline stream event queue * @bytes_collected: Number of bytes read by user - * @is_enabled: Zero, if timeline is disabled. Timeline stream flags + * @timeline_flags: Zero, if timeline is disabled. Timeline stream flags * otherwise. See kbase_timeline_io_acquire(). * @obj_header_btc: Remaining bytes to copy for the object stream header * @aux_header_btc: Remaining bytes to copy for the aux stream header @@ -53,11 +54,13 @@ struct kbase_timeline { #if MALI_UNIT_TEST atomic_t bytes_collected; #endif /* MALI_UNIT_TEST */ - atomic_t *is_enabled; + atomic_t *timeline_flags; size_t obj_header_btc; size_t aux_header_btc; }; extern const struct file_operations kbasep_tlstream_fops; +void kbase_create_timeline_objects(struct kbase_device *kbdev); + #endif /* _KBASE_TIMELINE_PRIV_H */ diff --git a/drivers/gpu/arm/bifrost/mali_kbase_tl_serialize.h b/drivers/gpu/arm/bifrost/tl/mali_kbase_tl_serialize.h similarity index 95% rename from drivers/gpu/arm/bifrost/mali_kbase_tl_serialize.h rename to drivers/gpu/arm/bifrost/tl/mali_kbase_tl_serialize.h index 90808ce07a8d..3e378279cf2c 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_tl_serialize.h +++ b/drivers/gpu/arm/bifrost/tl/mali_kbase_tl_serialize.h @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2019 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2019-2020 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -113,11 +113,9 @@ static inline size_t kbasep_serialize_string( */ static inline size_t kbasep_serialize_timestamp(void *buffer, size_t pos) { - struct timespec ts; u64 timestamp; - getrawmonotonic(&ts); - timestamp = (u64)ts.tv_sec * NSECS_IN_SEC + ts.tv_nsec; + timestamp = ktime_get_raw_ns(); return kbasep_serialize_bytes( buffer, pos, diff --git a/drivers/gpu/arm/bifrost/mali_kbase_tlstream.c b/drivers/gpu/arm/bifrost/tl/mali_kbase_tlstream.c similarity index 93% rename from drivers/gpu/arm/bifrost/mali_kbase_tlstream.c rename to drivers/gpu/arm/bifrost/tl/mali_kbase_tlstream.c index 2a76bc04eb32..bec4be71570e 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_tlstream.c +++ b/drivers/gpu/arm/bifrost/tl/mali_kbase_tlstream.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2015-2019 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2015-2020 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -106,20 +106,31 @@ void kbase_tlstream_reset(struct kbase_tlstream *stream) atomic_set(&stream->rbi, 0); } -/* Configuration of timeline streams generated by kernel. - * Kernel emit only streams containing either timeline object events or - * auxiliary events. All streams have stream id value of 1 (as opposed to user - * space streams that have value of 0). - */ +/* Configuration of timeline streams generated by kernel. */ static const struct { enum tl_packet_family pkt_family; enum tl_packet_class pkt_class; enum tl_packet_type pkt_type; - unsigned int stream_id; + enum tl_stream_id stream_id; } tl_stream_cfg[TL_STREAM_TYPE_COUNT] = { - {TL_PACKET_FAMILY_TL, TL_PACKET_CLASS_OBJ, TL_PACKET_TYPE_SUMMARY, 1}, - {TL_PACKET_FAMILY_TL, TL_PACKET_CLASS_OBJ, TL_PACKET_TYPE_BODY, 1}, - {TL_PACKET_FAMILY_TL, TL_PACKET_CLASS_AUX, TL_PACKET_TYPE_BODY, 1} + { + TL_PACKET_FAMILY_TL, + TL_PACKET_CLASS_OBJ, + TL_PACKET_TYPE_SUMMARY, + TL_STREAM_ID_KERNEL, + }, + { + TL_PACKET_FAMILY_TL, + TL_PACKET_CLASS_OBJ, + TL_PACKET_TYPE_BODY, + TL_STREAM_ID_KERNEL, + }, + { + TL_PACKET_FAMILY_TL, + TL_PACKET_CLASS_AUX, + TL_PACKET_TYPE_BODY, + TL_STREAM_ID_KERNEL, + }, }; void kbase_tlstream_init( diff --git a/drivers/gpu/arm/bifrost/mali_kbase_tlstream.h b/drivers/gpu/arm/bifrost/tl/mali_kbase_tlstream.h similarity index 98% rename from drivers/gpu/arm/bifrost/mali_kbase_tlstream.h rename to drivers/gpu/arm/bifrost/tl/mali_kbase_tlstream.h index 57977387e1a2..427bb0969540 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_tlstream.h +++ b/drivers/gpu/arm/bifrost/tl/mali_kbase_tlstream.h @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2015-2019 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2015-2020 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -97,7 +97,6 @@ enum tl_stream_type { TL_STREAM_TYPE_OBJ_SUMMARY = TL_STREAM_TYPE_FIRST, TL_STREAM_TYPE_OBJ, TL_STREAM_TYPE_AUX, - TL_STREAM_TYPE_COUNT }; diff --git a/drivers/gpu/arm/bifrost/mali_kbase_tracepoints.c b/drivers/gpu/arm/bifrost/tl/mali_kbase_tracepoints.c similarity index 75% rename from drivers/gpu/arm/bifrost/mali_kbase_tracepoints.c rename to drivers/gpu/arm/bifrost/tl/mali_kbase_tracepoints.c index 68c0c1fa42f7..e445a3a3d683 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_tracepoints.c +++ b/drivers/gpu/arm/bifrost/tl/mali_kbase_tracepoints.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2010-2019 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010-2020 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -66,56 +66,65 @@ enum tl_msg_id_obj { KBASE_TL_EVENT_ATOM_SOFTSTOP_ISSUE, KBASE_TL_EVENT_ATOM_SOFTJOB_START, KBASE_TL_EVENT_ATOM_SOFTJOB_END, + KBASE_TL_EVENT_ARB_GRANTED, + KBASE_TL_EVENT_ARB_STARTED, + KBASE_TL_EVENT_ARB_STOP_REQUESTED, + KBASE_TL_EVENT_ARB_STOPPED, KBASE_JD_GPU_SOFT_RESET, - KBASE_TL_NEW_KCPUQUEUE, - KBASE_TL_RET_KCPUQUEUE_CTX, - KBASE_TL_DEL_KCPUQUEUE, - KBASE_TL_NRET_KCPUQUEUE_CTX, - KBASE_TL_EVENT_KCPUQUEUE_ENQUEUE_FENCE_SIGNAL, - KBASE_TL_EVENT_KCPUQUEUE_ENQUEUE_FENCE_WAIT, - KBASE_TL_EVENT_ARRAY_BEGIN_KCPUQUEUE_ENQUEUE_CQS_WAIT, - KBASE_TL_EVENT_ARRAY_ITEM_KCPUQUEUE_ENQUEUE_CQS_WAIT, - KBASE_TL_EVENT_ARRAY_END_KCPUQUEUE_ENQUEUE_CQS_WAIT, - KBASE_TL_EVENT_ARRAY_BEGIN_KCPUQUEUE_ENQUEUE_CQS_SET, - KBASE_TL_EVENT_ARRAY_ITEM_KCPUQUEUE_ENQUEUE_CQS_SET, - KBASE_TL_EVENT_ARRAY_END_KCPUQUEUE_ENQUEUE_CQS_SET, - KBASE_TL_EVENT_ARRAY_BEGIN_KCPUQUEUE_ENQUEUE_DEBUGCOPY, - KBASE_TL_EVENT_ARRAY_ITEM_KCPUQUEUE_ENQUEUE_DEBUGCOPY, - KBASE_TL_EVENT_ARRAY_END_KCPUQUEUE_ENQUEUE_DEBUGCOPY, - KBASE_TL_EVENT_KCPUQUEUE_ENQUEUE_MAP_IMPORT, - KBASE_TL_EVENT_KCPUQUEUE_ENQUEUE_UNMAP_IMPORT, - KBASE_TL_EVENT_KCPUQUEUE_ENQUEUE_UNMAP_IMPORT_FORCE, - KBASE_TL_EVENT_ARRAY_BEGIN_KCPUQUEUE_ENQUEUE_JIT_ALLOC, - KBASE_TL_EVENT_ARRAY_ITEM_KCPUQUEUE_ENQUEUE_JIT_ALLOC, - KBASE_TL_EVENT_ARRAY_END_KCPUQUEUE_ENQUEUE_JIT_ALLOC, - KBASE_TL_EVENT_ARRAY_BEGIN_KCPUQUEUE_ENQUEUE_JIT_FREE, - KBASE_TL_EVENT_ARRAY_ITEM_KCPUQUEUE_ENQUEUE_JIT_FREE, - KBASE_TL_EVENT_ARRAY_END_KCPUQUEUE_ENQUEUE_JIT_FREE, - KBASE_TL_EVENT_KCPUQUEUE_EXECUTE_FENCE_SIGNAL_START, - KBASE_TL_EVENT_KCPUQUEUE_EXECUTE_FENCE_SIGNAL_END, - KBASE_TL_EVENT_KCPUQUEUE_EXECUTE_FENCE_WAIT_START, - KBASE_TL_EVENT_KCPUQUEUE_EXECUTE_FENCE_WAIT_END, - KBASE_TL_EVENT_KCPUQUEUE_EXECUTE_CQS_WAIT_START, - KBASE_TL_EVENT_KCPUQUEUE_EXECUTE_CQS_WAIT_END, - KBASE_TL_EVENT_KCPUQUEUE_EXECUTE_CQS_SET_START, - KBASE_TL_EVENT_KCPUQUEUE_EXECUTE_CQS_SET_END, - KBASE_TL_EVENT_KCPUQUEUE_EXECUTE_DEBUGCOPY_START, - KBASE_TL_EVENT_KCPUQUEUE_EXECUTE_DEBUGCOPY_END, - KBASE_TL_EVENT_KCPUQUEUE_EXECUTE_MAP_IMPORT_START, - KBASE_TL_EVENT_KCPUQUEUE_EXECUTE_MAP_IMPORT_END, - KBASE_TL_EVENT_KCPUQUEUE_EXECUTE_UNMAP_IMPORT_START, - KBASE_TL_EVENT_KCPUQUEUE_EXECUTE_UNMAP_IMPORT_END, - KBASE_TL_EVENT_KCPUQUEUE_EXECUTE_UNMAP_IMPORT_FORCE_START, - KBASE_TL_EVENT_KCPUQUEUE_EXECUTE_UNMAP_IMPORT_FORCE_END, - KBASE_TL_EVENT_KCPUQUEUE_EXECUTE_JIT_ALLOC_START, - KBASE_TL_EVENT_ARRAY_BEGIN_KCPUQUEUE_EXECUTE_JIT_ALLOC_END, - KBASE_TL_EVENT_ARRAY_ITEM_KCPUQUEUE_EXECUTE_JIT_ALLOC_END, - KBASE_TL_EVENT_ARRAY_END_KCPUQUEUE_EXECUTE_JIT_ALLOC_END, - KBASE_TL_EVENT_KCPUQUEUE_EXECUTE_JIT_FREE_START, - KBASE_TL_EVENT_ARRAY_BEGIN_KCPUQUEUE_EXECUTE_JIT_FREE_END, - KBASE_TL_EVENT_ARRAY_ITEM_KCPUQUEUE_EXECUTE_JIT_FREE_END, - KBASE_TL_EVENT_ARRAY_END_KCPUQUEUE_EXECUTE_JIT_FREE_END, - KBASE_TL_EVENT_KCPUQUEUE_EXECUTE_ERRORBARRIER, + KBASE_TL_KBASE_NEW_DEVICE, + KBASE_TL_KBASE_DEVICE_PROGRAM_CSG, + KBASE_TL_KBASE_DEVICE_DEPROGRAM_CSG, + KBASE_TL_KBASE_NEW_CTX, + KBASE_TL_KBASE_DEL_CTX, + KBASE_TL_KBASE_CTX_ASSIGN_AS, + KBASE_TL_KBASE_CTX_UNASSIGN_AS, + KBASE_TL_KBASE_NEW_KCPUQUEUE, + KBASE_TL_KBASE_DEL_KCPUQUEUE, + KBASE_TL_KBASE_KCPUQUEUE_ENQUEUE_FENCE_SIGNAL, + KBASE_TL_KBASE_KCPUQUEUE_ENQUEUE_FENCE_WAIT, + KBASE_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_ENQUEUE_CQS_WAIT, + KBASE_TL_KBASE_ARRAY_ITEM_KCPUQUEUE_ENQUEUE_CQS_WAIT, + KBASE_TL_KBASE_ARRAY_END_KCPUQUEUE_ENQUEUE_CQS_WAIT, + KBASE_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_ENQUEUE_CQS_SET, + KBASE_TL_KBASE_ARRAY_ITEM_KCPUQUEUE_ENQUEUE_CQS_SET, + KBASE_TL_KBASE_ARRAY_END_KCPUQUEUE_ENQUEUE_CQS_SET, + KBASE_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_ENQUEUE_DEBUGCOPY, + KBASE_TL_KBASE_ARRAY_ITEM_KCPUQUEUE_ENQUEUE_DEBUGCOPY, + KBASE_TL_KBASE_ARRAY_END_KCPUQUEUE_ENQUEUE_DEBUGCOPY, + KBASE_TL_KBASE_KCPUQUEUE_ENQUEUE_MAP_IMPORT, + KBASE_TL_KBASE_KCPUQUEUE_ENQUEUE_UNMAP_IMPORT, + KBASE_TL_KBASE_KCPUQUEUE_ENQUEUE_UNMAP_IMPORT_FORCE, + KBASE_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_ENQUEUE_JIT_ALLOC, + KBASE_TL_KBASE_ARRAY_ITEM_KCPUQUEUE_ENQUEUE_JIT_ALLOC, + KBASE_TL_KBASE_ARRAY_END_KCPUQUEUE_ENQUEUE_JIT_ALLOC, + KBASE_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_ENQUEUE_JIT_FREE, + KBASE_TL_KBASE_ARRAY_ITEM_KCPUQUEUE_ENQUEUE_JIT_FREE, + KBASE_TL_KBASE_ARRAY_END_KCPUQUEUE_ENQUEUE_JIT_FREE, + KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_FENCE_SIGNAL_START, + KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_FENCE_SIGNAL_END, + KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_FENCE_WAIT_START, + KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_FENCE_WAIT_END, + KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_WAIT_START, + KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_WAIT_END, + KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_SET, + KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_DEBUGCOPY_START, + KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_DEBUGCOPY_END, + KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_MAP_IMPORT_START, + KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_MAP_IMPORT_END, + KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_UNMAP_IMPORT_START, + KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_UNMAP_IMPORT_END, + KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_UNMAP_IMPORT_FORCE_START, + KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_UNMAP_IMPORT_FORCE_END, + KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_JIT_ALLOC_START, + KBASE_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_EXECUTE_JIT_ALLOC_END, + KBASE_TL_KBASE_ARRAY_ITEM_KCPUQUEUE_EXECUTE_JIT_ALLOC_END, + KBASE_TL_KBASE_ARRAY_END_KCPUQUEUE_EXECUTE_JIT_ALLOC_END, + KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_JIT_FREE_START, + KBASE_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_EXECUTE_JIT_FREE_END, + KBASE_TL_KBASE_ARRAY_ITEM_KCPUQUEUE_EXECUTE_JIT_FREE_END, + KBASE_TL_KBASE_ARRAY_END_KCPUQUEUE_EXECUTE_JIT_FREE_END, + KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_ERRORBARRIER, + KBASE_TL_KBASE_CSFFW_TLSTREAM_OVERFLOW, KBASE_OBJ_MSG_COUNT, }; @@ -134,396 +143,434 @@ enum tl_msg_id_aux { KBASE_AUX_MSG_COUNT, }; -#define OBJ_TL_LIST \ - TP_DESC(KBASE_TL_NEW_CTX, \ +#define OBJ_TP_LIST \ + TRACEPOINT_DESC(KBASE_TL_NEW_CTX, \ "object ctx is created", \ "@pII", \ "ctx,ctx_nr,tgid") \ - TP_DESC(KBASE_TL_NEW_GPU, \ + TRACEPOINT_DESC(KBASE_TL_NEW_GPU, \ "object gpu is created", \ "@pII", \ "gpu,gpu_id,core_count") \ - TP_DESC(KBASE_TL_NEW_LPU, \ + TRACEPOINT_DESC(KBASE_TL_NEW_LPU, \ "object lpu is created", \ "@pII", \ "lpu,lpu_nr,lpu_fn") \ - TP_DESC(KBASE_TL_NEW_ATOM, \ + TRACEPOINT_DESC(KBASE_TL_NEW_ATOM, \ "object atom is created", \ "@pI", \ "atom,atom_nr") \ - TP_DESC(KBASE_TL_NEW_AS, \ + TRACEPOINT_DESC(KBASE_TL_NEW_AS, \ "address space object is created", \ "@pI", \ "address_space,as_nr") \ - TP_DESC(KBASE_TL_DEL_CTX, \ + TRACEPOINT_DESC(KBASE_TL_DEL_CTX, \ "context is destroyed", \ "@p", \ "ctx") \ - TP_DESC(KBASE_TL_DEL_ATOM, \ + TRACEPOINT_DESC(KBASE_TL_DEL_ATOM, \ "atom is destroyed", \ "@p", \ "atom") \ - TP_DESC(KBASE_TL_LIFELINK_LPU_GPU, \ + TRACEPOINT_DESC(KBASE_TL_LIFELINK_LPU_GPU, \ "lpu is deleted with gpu", \ "@pp", \ "lpu,gpu") \ - TP_DESC(KBASE_TL_LIFELINK_AS_GPU, \ + TRACEPOINT_DESC(KBASE_TL_LIFELINK_AS_GPU, \ "address space is deleted with gpu", \ "@pp", \ "address_space,gpu") \ - TP_DESC(KBASE_TL_RET_CTX_LPU, \ + TRACEPOINT_DESC(KBASE_TL_RET_CTX_LPU, \ "context is retained by lpu", \ "@pp", \ "ctx,lpu") \ - TP_DESC(KBASE_TL_RET_ATOM_CTX, \ + TRACEPOINT_DESC(KBASE_TL_RET_ATOM_CTX, \ "atom is retained by context", \ "@pp", \ "atom,ctx") \ - TP_DESC(KBASE_TL_RET_ATOM_LPU, \ + TRACEPOINT_DESC(KBASE_TL_RET_ATOM_LPU, \ "atom is retained by lpu", \ "@pps", \ "atom,lpu,attrib_match_list") \ - TP_DESC(KBASE_TL_NRET_CTX_LPU, \ + TRACEPOINT_DESC(KBASE_TL_NRET_CTX_LPU, \ "context is released by lpu", \ "@pp", \ "ctx,lpu") \ - TP_DESC(KBASE_TL_NRET_ATOM_CTX, \ + TRACEPOINT_DESC(KBASE_TL_NRET_ATOM_CTX, \ "atom is released by context", \ "@pp", \ "atom,ctx") \ - TP_DESC(KBASE_TL_NRET_ATOM_LPU, \ + TRACEPOINT_DESC(KBASE_TL_NRET_ATOM_LPU, \ "atom is released by lpu", \ "@pp", \ "atom,lpu") \ - TP_DESC(KBASE_TL_RET_AS_CTX, \ + TRACEPOINT_DESC(KBASE_TL_RET_AS_CTX, \ "address space is retained by context", \ "@pp", \ "address_space,ctx") \ - TP_DESC(KBASE_TL_NRET_AS_CTX, \ + TRACEPOINT_DESC(KBASE_TL_NRET_AS_CTX, \ "address space is released by context", \ "@pp", \ "address_space,ctx") \ - TP_DESC(KBASE_TL_RET_ATOM_AS, \ + TRACEPOINT_DESC(KBASE_TL_RET_ATOM_AS, \ "atom is retained by address space", \ "@pp", \ "atom,address_space") \ - TP_DESC(KBASE_TL_NRET_ATOM_AS, \ + TRACEPOINT_DESC(KBASE_TL_NRET_ATOM_AS, \ "atom is released by address space", \ "@pp", \ "atom,address_space") \ - TP_DESC(KBASE_TL_ATTRIB_ATOM_CONFIG, \ + TRACEPOINT_DESC(KBASE_TL_ATTRIB_ATOM_CONFIG, \ "atom job slot attributes", \ "@pLLI", \ "atom,descriptor,affinity,config") \ - TP_DESC(KBASE_TL_ATTRIB_ATOM_PRIORITY, \ + TRACEPOINT_DESC(KBASE_TL_ATTRIB_ATOM_PRIORITY, \ "atom priority", \ "@pI", \ "atom,prio") \ - TP_DESC(KBASE_TL_ATTRIB_ATOM_STATE, \ + TRACEPOINT_DESC(KBASE_TL_ATTRIB_ATOM_STATE, \ "atom state", \ "@pI", \ "atom,state") \ - TP_DESC(KBASE_TL_ATTRIB_ATOM_PRIORITIZED, \ + TRACEPOINT_DESC(KBASE_TL_ATTRIB_ATOM_PRIORITIZED, \ "atom caused priority change", \ "@p", \ "atom") \ - TP_DESC(KBASE_TL_ATTRIB_ATOM_JIT, \ + TRACEPOINT_DESC(KBASE_TL_ATTRIB_ATOM_JIT, \ "jit done for atom", \ "@pLLILILLL", \ "atom,edit_addr,new_addr,jit_flags,mem_flags,j_id,com_pgs,extent,va_pgs") \ - TP_DESC(KBASE_TL_JIT_USEDPAGES, \ + TRACEPOINT_DESC(KBASE_TL_JIT_USEDPAGES, \ "used pages for jit", \ "@LI", \ "used_pages,j_id") \ - TP_DESC(KBASE_TL_ATTRIB_ATOM_JITALLOCINFO, \ + TRACEPOINT_DESC(KBASE_TL_ATTRIB_ATOM_JITALLOCINFO, \ "Information about JIT allocations", \ "@pLLLIIIII", \ "atom,va_pgs,com_pgs,extent,j_id,bin_id,max_allocs,jit_flags,usg_id") \ - TP_DESC(KBASE_TL_ATTRIB_ATOM_JITFREEINFO, \ + TRACEPOINT_DESC(KBASE_TL_ATTRIB_ATOM_JITFREEINFO, \ "Information about JIT frees", \ "@pI", \ "atom,j_id") \ - TP_DESC(KBASE_TL_ATTRIB_AS_CONFIG, \ + TRACEPOINT_DESC(KBASE_TL_ATTRIB_AS_CONFIG, \ "address space attributes", \ "@pLLL", \ "address_space,transtab,memattr,transcfg") \ - TP_DESC(KBASE_TL_EVENT_LPU_SOFTSTOP, \ + TRACEPOINT_DESC(KBASE_TL_EVENT_LPU_SOFTSTOP, \ "softstop event on given lpu", \ "@p", \ "lpu") \ - TP_DESC(KBASE_TL_EVENT_ATOM_SOFTSTOP_EX, \ + TRACEPOINT_DESC(KBASE_TL_EVENT_ATOM_SOFTSTOP_EX, \ "atom softstopped", \ "@p", \ "atom") \ - TP_DESC(KBASE_TL_EVENT_ATOM_SOFTSTOP_ISSUE, \ + TRACEPOINT_DESC(KBASE_TL_EVENT_ATOM_SOFTSTOP_ISSUE, \ "atom softstop issued", \ "@p", \ "atom") \ - TP_DESC(KBASE_TL_EVENT_ATOM_SOFTJOB_START, \ + TRACEPOINT_DESC(KBASE_TL_EVENT_ATOM_SOFTJOB_START, \ "atom soft job has started", \ "@p", \ "atom") \ - TP_DESC(KBASE_TL_EVENT_ATOM_SOFTJOB_END, \ + TRACEPOINT_DESC(KBASE_TL_EVENT_ATOM_SOFTJOB_END, \ "atom soft job has completed", \ "@p", \ "atom") \ - TP_DESC(KBASE_JD_GPU_SOFT_RESET, \ + TRACEPOINT_DESC(KBASE_TL_EVENT_ARB_GRANTED, \ + "Arbiter has granted gpu access", \ + "@p", \ + "gpu") \ + TRACEPOINT_DESC(KBASE_TL_EVENT_ARB_STARTED, \ + "Driver is running again and able to process jobs", \ + "@p", \ + "gpu") \ + TRACEPOINT_DESC(KBASE_TL_EVENT_ARB_STOP_REQUESTED, \ + "Arbiter has requested driver to stop using gpu", \ + "@p", \ + "gpu") \ + TRACEPOINT_DESC(KBASE_TL_EVENT_ARB_STOPPED, \ + "Driver has stopped using gpu", \ + "@p", \ + "gpu") \ + TRACEPOINT_DESC(KBASE_JD_GPU_SOFT_RESET, \ "gpu soft reset", \ "@p", \ "gpu") \ - TP_DESC(KBASE_TL_NEW_KCPUQUEUE, \ + TRACEPOINT_DESC(KBASE_TL_KBASE_NEW_DEVICE, \ + "New KBase Device", \ + "@IIII", \ + "kbase_device_id,kbase_device_gpu_core_count,kbase_device_max_num_csgs,kbase_device_as_count") \ + TRACEPOINT_DESC(KBASE_TL_KBASE_DEVICE_PROGRAM_CSG, \ + "CSG is programmed to a slot", \ + "@III", \ + "kbase_device_id,gpu_cmdq_grp_handle,kbase_device_csg_slot_index") \ + TRACEPOINT_DESC(KBASE_TL_KBASE_DEVICE_DEPROGRAM_CSG, \ + "CSG is deprogrammed from a slot", \ + "@II", \ + "kbase_device_id,kbase_device_csg_slot_index") \ + TRACEPOINT_DESC(KBASE_TL_KBASE_NEW_CTX, \ + "New KBase Context", \ + "@II", \ + "kernel_ctx_id,kbase_device_id") \ + TRACEPOINT_DESC(KBASE_TL_KBASE_DEL_CTX, \ + "Delete KBase Context", \ + "@I", \ + "kernel_ctx_id") \ + TRACEPOINT_DESC(KBASE_TL_KBASE_CTX_ASSIGN_AS, \ + "Address Space is assigned to a KBase context", \ + "@II", \ + "kernel_ctx_id,kbase_device_as_index") \ + TRACEPOINT_DESC(KBASE_TL_KBASE_CTX_UNASSIGN_AS, \ + "Address Space is unassigned from a KBase context", \ + "@I", \ + "kernel_ctx_id") \ + TRACEPOINT_DESC(KBASE_TL_KBASE_NEW_KCPUQUEUE, \ "New KCPU Queue", \ - "@ppI", \ - "kcpu_queue,ctx,kcpuq_num_pending_cmds") \ - TP_DESC(KBASE_TL_RET_KCPUQUEUE_CTX, \ - "Context retains KCPU Queue", \ - "@pp", \ - "kcpu_queue,ctx") \ - TP_DESC(KBASE_TL_DEL_KCPUQUEUE, \ + "@pII", \ + "kcpu_queue,kernel_ctx_id,kcpuq_num_pending_cmds") \ + TRACEPOINT_DESC(KBASE_TL_KBASE_DEL_KCPUQUEUE, \ "Delete KCPU Queue", \ "@p", \ "kcpu_queue") \ - TP_DESC(KBASE_TL_NRET_KCPUQUEUE_CTX, \ - "Context releases KCPU Queue", \ - "@pp", \ - "kcpu_queue,ctx") \ - TP_DESC(KBASE_TL_EVENT_KCPUQUEUE_ENQUEUE_FENCE_SIGNAL, \ + TRACEPOINT_DESC(KBASE_TL_KBASE_KCPUQUEUE_ENQUEUE_FENCE_SIGNAL, \ "KCPU Queue enqueues Signal on Fence", \ "@pp", \ "kcpu_queue,fence") \ - TP_DESC(KBASE_TL_EVENT_KCPUQUEUE_ENQUEUE_FENCE_WAIT, \ + TRACEPOINT_DESC(KBASE_TL_KBASE_KCPUQUEUE_ENQUEUE_FENCE_WAIT, \ "KCPU Queue enqueues Wait on Fence", \ "@pp", \ "kcpu_queue,fence") \ - TP_DESC(KBASE_TL_EVENT_ARRAY_BEGIN_KCPUQUEUE_ENQUEUE_CQS_WAIT, \ + TRACEPOINT_DESC(KBASE_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_ENQUEUE_CQS_WAIT, \ "Begin array of KCPU Queue enqueues Wait on Cross Queue Sync Object", \ "@p", \ "kcpu_queue") \ - TP_DESC(KBASE_TL_EVENT_ARRAY_ITEM_KCPUQUEUE_ENQUEUE_CQS_WAIT, \ + TRACEPOINT_DESC(KBASE_TL_KBASE_ARRAY_ITEM_KCPUQUEUE_ENQUEUE_CQS_WAIT, \ "Array item of KCPU Queue enqueues Wait on Cross Queue Sync Object", \ "@pLI", \ "kcpu_queue,cqs_obj_gpu_addr,cqs_obj_compare_value") \ - TP_DESC(KBASE_TL_EVENT_ARRAY_END_KCPUQUEUE_ENQUEUE_CQS_WAIT, \ + TRACEPOINT_DESC(KBASE_TL_KBASE_ARRAY_END_KCPUQUEUE_ENQUEUE_CQS_WAIT, \ "End array of KCPU Queue enqueues Wait on Cross Queue Sync Object", \ "@p", \ "kcpu_queue") \ - TP_DESC(KBASE_TL_EVENT_ARRAY_BEGIN_KCPUQUEUE_ENQUEUE_CQS_SET, \ + TRACEPOINT_DESC(KBASE_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_ENQUEUE_CQS_SET, \ "Begin array of KCPU Queue enqueues Set on Cross Queue Sync Object", \ "@p", \ "kcpu_queue") \ - TP_DESC(KBASE_TL_EVENT_ARRAY_ITEM_KCPUQUEUE_ENQUEUE_CQS_SET, \ + TRACEPOINT_DESC(KBASE_TL_KBASE_ARRAY_ITEM_KCPUQUEUE_ENQUEUE_CQS_SET, \ "Array item of KCPU Queue enqueues Set on Cross Queue Sync Object", \ "@pL", \ "kcpu_queue,cqs_obj_gpu_addr") \ - TP_DESC(KBASE_TL_EVENT_ARRAY_END_KCPUQUEUE_ENQUEUE_CQS_SET, \ + TRACEPOINT_DESC(KBASE_TL_KBASE_ARRAY_END_KCPUQUEUE_ENQUEUE_CQS_SET, \ "End array of KCPU Queue enqueues Set on Cross Queue Sync Object", \ "@p", \ "kcpu_queue") \ - TP_DESC(KBASE_TL_EVENT_ARRAY_BEGIN_KCPUQUEUE_ENQUEUE_DEBUGCOPY, \ + TRACEPOINT_DESC(KBASE_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_ENQUEUE_DEBUGCOPY, \ "Begin array of KCPU Queue enqueues Debug Copy", \ "@p", \ "kcpu_queue") \ - TP_DESC(KBASE_TL_EVENT_ARRAY_ITEM_KCPUQUEUE_ENQUEUE_DEBUGCOPY, \ + TRACEPOINT_DESC(KBASE_TL_KBASE_ARRAY_ITEM_KCPUQUEUE_ENQUEUE_DEBUGCOPY, \ "Array item of KCPU Queue enqueues Debug Copy", \ "@pL", \ "kcpu_queue,debugcopy_dst_size") \ - TP_DESC(KBASE_TL_EVENT_ARRAY_END_KCPUQUEUE_ENQUEUE_DEBUGCOPY, \ + TRACEPOINT_DESC(KBASE_TL_KBASE_ARRAY_END_KCPUQUEUE_ENQUEUE_DEBUGCOPY, \ "End array of KCPU Queue enqueues Debug Copy", \ "@p", \ "kcpu_queue") \ - TP_DESC(KBASE_TL_EVENT_KCPUQUEUE_ENQUEUE_MAP_IMPORT, \ + TRACEPOINT_DESC(KBASE_TL_KBASE_KCPUQUEUE_ENQUEUE_MAP_IMPORT, \ "KCPU Queue enqueues Map Import", \ "@pL", \ "kcpu_queue,map_import_buf_gpu_addr") \ - TP_DESC(KBASE_TL_EVENT_KCPUQUEUE_ENQUEUE_UNMAP_IMPORT, \ + TRACEPOINT_DESC(KBASE_TL_KBASE_KCPUQUEUE_ENQUEUE_UNMAP_IMPORT, \ "KCPU Queue enqueues Unmap Import", \ "@pL", \ "kcpu_queue,map_import_buf_gpu_addr") \ - TP_DESC(KBASE_TL_EVENT_KCPUQUEUE_ENQUEUE_UNMAP_IMPORT_FORCE, \ + TRACEPOINT_DESC(KBASE_TL_KBASE_KCPUQUEUE_ENQUEUE_UNMAP_IMPORT_FORCE, \ "KCPU Queue enqueues Unmap Import ignoring reference count", \ "@pL", \ "kcpu_queue,map_import_buf_gpu_addr") \ - TP_DESC(KBASE_TL_EVENT_ARRAY_BEGIN_KCPUQUEUE_ENQUEUE_JIT_ALLOC, \ + TRACEPOINT_DESC(KBASE_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_ENQUEUE_JIT_ALLOC, \ "Begin array of KCPU Queue enqueues JIT Alloc", \ "@p", \ "kcpu_queue") \ - TP_DESC(KBASE_TL_EVENT_ARRAY_ITEM_KCPUQUEUE_ENQUEUE_JIT_ALLOC, \ + TRACEPOINT_DESC(KBASE_TL_KBASE_ARRAY_ITEM_KCPUQUEUE_ENQUEUE_JIT_ALLOC, \ "Array item of KCPU Queue enqueues JIT Alloc", \ "@pLLLLIIIII", \ "kcpu_queue,jit_alloc_gpu_alloc_addr_dest,jit_alloc_va_pages,jit_alloc_commit_pages,jit_alloc_extent,jit_alloc_jit_id,jit_alloc_bin_id,jit_alloc_max_allocations,jit_alloc_flags,jit_alloc_usage_id") \ - TP_DESC(KBASE_TL_EVENT_ARRAY_END_KCPUQUEUE_ENQUEUE_JIT_ALLOC, \ + TRACEPOINT_DESC(KBASE_TL_KBASE_ARRAY_END_KCPUQUEUE_ENQUEUE_JIT_ALLOC, \ "End array of KCPU Queue enqueues JIT Alloc", \ "@p", \ "kcpu_queue") \ - TP_DESC(KBASE_TL_EVENT_ARRAY_BEGIN_KCPUQUEUE_ENQUEUE_JIT_FREE, \ + TRACEPOINT_DESC(KBASE_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_ENQUEUE_JIT_FREE, \ "Begin array of KCPU Queue enqueues JIT Free", \ "@p", \ "kcpu_queue") \ - TP_DESC(KBASE_TL_EVENT_ARRAY_ITEM_KCPUQUEUE_ENQUEUE_JIT_FREE, \ + TRACEPOINT_DESC(KBASE_TL_KBASE_ARRAY_ITEM_KCPUQUEUE_ENQUEUE_JIT_FREE, \ "Array item of KCPU Queue enqueues JIT Free", \ "@pI", \ "kcpu_queue,jit_alloc_jit_id") \ - TP_DESC(KBASE_TL_EVENT_ARRAY_END_KCPUQUEUE_ENQUEUE_JIT_FREE, \ + TRACEPOINT_DESC(KBASE_TL_KBASE_ARRAY_END_KCPUQUEUE_ENQUEUE_JIT_FREE, \ "End array of KCPU Queue enqueues JIT Free", \ "@p", \ "kcpu_queue") \ - TP_DESC(KBASE_TL_EVENT_KCPUQUEUE_EXECUTE_FENCE_SIGNAL_START, \ + TRACEPOINT_DESC(KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_FENCE_SIGNAL_START, \ "KCPU Queue starts a Signal on Fence", \ "@p", \ "kcpu_queue") \ - TP_DESC(KBASE_TL_EVENT_KCPUQUEUE_EXECUTE_FENCE_SIGNAL_END, \ + TRACEPOINT_DESC(KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_FENCE_SIGNAL_END, \ "KCPU Queue ends a Signal on Fence", \ "@p", \ "kcpu_queue") \ - TP_DESC(KBASE_TL_EVENT_KCPUQUEUE_EXECUTE_FENCE_WAIT_START, \ + TRACEPOINT_DESC(KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_FENCE_WAIT_START, \ "KCPU Queue starts a Wait on Fence", \ "@p", \ "kcpu_queue") \ - TP_DESC(KBASE_TL_EVENT_KCPUQUEUE_EXECUTE_FENCE_WAIT_END, \ + TRACEPOINT_DESC(KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_FENCE_WAIT_END, \ "KCPU Queue ends a Wait on Fence", \ "@p", \ "kcpu_queue") \ - TP_DESC(KBASE_TL_EVENT_KCPUQUEUE_EXECUTE_CQS_WAIT_START, \ + TRACEPOINT_DESC(KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_WAIT_START, \ "KCPU Queue starts a Wait on an array of Cross Queue Sync Objects", \ "@p", \ "kcpu_queue") \ - TP_DESC(KBASE_TL_EVENT_KCPUQUEUE_EXECUTE_CQS_WAIT_END, \ + TRACEPOINT_DESC(KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_WAIT_END, \ "KCPU Queue ends a Wait on an array of Cross Queue Sync Objects", \ "@p", \ "kcpu_queue") \ - TP_DESC(KBASE_TL_EVENT_KCPUQUEUE_EXECUTE_CQS_SET_START, \ - "KCPU Queue starts a Set on an array of Cross Queue Sync Objects", \ + TRACEPOINT_DESC(KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_SET, \ + "KCPU Queue executes a Set on an array of Cross Queue Sync Objects", \ "@p", \ "kcpu_queue") \ - TP_DESC(KBASE_TL_EVENT_KCPUQUEUE_EXECUTE_CQS_SET_END, \ - "KCPU Queue ends a Set on an array of Cross Queue Sync Objects", \ - "@p", \ - "kcpu_queue") \ - TP_DESC(KBASE_TL_EVENT_KCPUQUEUE_EXECUTE_DEBUGCOPY_START, \ + TRACEPOINT_DESC(KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_DEBUGCOPY_START, \ "KCPU Queue starts an array of Debug Copys", \ "@p", \ "kcpu_queue") \ - TP_DESC(KBASE_TL_EVENT_KCPUQUEUE_EXECUTE_DEBUGCOPY_END, \ + TRACEPOINT_DESC(KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_DEBUGCOPY_END, \ "KCPU Queue ends an array of Debug Copys", \ "@p", \ "kcpu_queue") \ - TP_DESC(KBASE_TL_EVENT_KCPUQUEUE_EXECUTE_MAP_IMPORT_START, \ + TRACEPOINT_DESC(KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_MAP_IMPORT_START, \ "KCPU Queue starts a Map Import", \ "@p", \ "kcpu_queue") \ - TP_DESC(KBASE_TL_EVENT_KCPUQUEUE_EXECUTE_MAP_IMPORT_END, \ + TRACEPOINT_DESC(KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_MAP_IMPORT_END, \ "KCPU Queue ends a Map Import", \ "@p", \ "kcpu_queue") \ - TP_DESC(KBASE_TL_EVENT_KCPUQUEUE_EXECUTE_UNMAP_IMPORT_START, \ + TRACEPOINT_DESC(KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_UNMAP_IMPORT_START, \ "KCPU Queue starts an Unmap Import", \ "@p", \ "kcpu_queue") \ - TP_DESC(KBASE_TL_EVENT_KCPUQUEUE_EXECUTE_UNMAP_IMPORT_END, \ + TRACEPOINT_DESC(KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_UNMAP_IMPORT_END, \ "KCPU Queue ends an Unmap Import", \ "@p", \ "kcpu_queue") \ - TP_DESC(KBASE_TL_EVENT_KCPUQUEUE_EXECUTE_UNMAP_IMPORT_FORCE_START, \ + TRACEPOINT_DESC(KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_UNMAP_IMPORT_FORCE_START, \ "KCPU Queue starts an Unmap Import ignoring reference count", \ "@p", \ "kcpu_queue") \ - TP_DESC(KBASE_TL_EVENT_KCPUQUEUE_EXECUTE_UNMAP_IMPORT_FORCE_END, \ + TRACEPOINT_DESC(KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_UNMAP_IMPORT_FORCE_END, \ "KCPU Queue ends an Unmap Import ignoring reference count", \ "@p", \ "kcpu_queue") \ - TP_DESC(KBASE_TL_EVENT_KCPUQUEUE_EXECUTE_JIT_ALLOC_START, \ + TRACEPOINT_DESC(KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_JIT_ALLOC_START, \ "KCPU Queue starts an array of JIT Allocs", \ "@p", \ "kcpu_queue") \ - TP_DESC(KBASE_TL_EVENT_ARRAY_BEGIN_KCPUQUEUE_EXECUTE_JIT_ALLOC_END, \ + TRACEPOINT_DESC(KBASE_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_EXECUTE_JIT_ALLOC_END, \ "Begin array of KCPU Queue ends an array of JIT Allocs", \ "@p", \ "kcpu_queue") \ - TP_DESC(KBASE_TL_EVENT_ARRAY_ITEM_KCPUQUEUE_EXECUTE_JIT_ALLOC_END, \ + TRACEPOINT_DESC(KBASE_TL_KBASE_ARRAY_ITEM_KCPUQUEUE_EXECUTE_JIT_ALLOC_END, \ "Array item of KCPU Queue ends an array of JIT Allocs", \ "@pLL", \ "kcpu_queue,jit_alloc_gpu_alloc_addr,jit_alloc_mmu_flags") \ - TP_DESC(KBASE_TL_EVENT_ARRAY_END_KCPUQUEUE_EXECUTE_JIT_ALLOC_END, \ + TRACEPOINT_DESC(KBASE_TL_KBASE_ARRAY_END_KCPUQUEUE_EXECUTE_JIT_ALLOC_END, \ "End array of KCPU Queue ends an array of JIT Allocs", \ "@p", \ "kcpu_queue") \ - TP_DESC(KBASE_TL_EVENT_KCPUQUEUE_EXECUTE_JIT_FREE_START, \ + TRACEPOINT_DESC(KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_JIT_FREE_START, \ "KCPU Queue starts an array of JIT Frees", \ "@p", \ "kcpu_queue") \ - TP_DESC(KBASE_TL_EVENT_ARRAY_BEGIN_KCPUQUEUE_EXECUTE_JIT_FREE_END, \ + TRACEPOINT_DESC(KBASE_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_EXECUTE_JIT_FREE_END, \ "Begin array of KCPU Queue ends an array of JIT Frees", \ "@p", \ "kcpu_queue") \ - TP_DESC(KBASE_TL_EVENT_ARRAY_ITEM_KCPUQUEUE_EXECUTE_JIT_FREE_END, \ + TRACEPOINT_DESC(KBASE_TL_KBASE_ARRAY_ITEM_KCPUQUEUE_EXECUTE_JIT_FREE_END, \ "Array item of KCPU Queue ends an array of JIT Frees", \ "@pL", \ "kcpu_queue,jit_free_pages_used") \ - TP_DESC(KBASE_TL_EVENT_ARRAY_END_KCPUQUEUE_EXECUTE_JIT_FREE_END, \ + TRACEPOINT_DESC(KBASE_TL_KBASE_ARRAY_END_KCPUQUEUE_EXECUTE_JIT_FREE_END, \ "End array of KCPU Queue ends an array of JIT Frees", \ "@p", \ "kcpu_queue") \ - TP_DESC(KBASE_TL_EVENT_KCPUQUEUE_EXECUTE_ERRORBARRIER, \ + TRACEPOINT_DESC(KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_ERRORBARRIER, \ "KCPU Queue executes an Error Barrier", \ "@p", \ "kcpu_queue") \ + TRACEPOINT_DESC(KBASE_TL_KBASE_CSFFW_TLSTREAM_OVERFLOW, \ + "An overflow has happened with the CSFFW Timeline stream", \ + "@LL", \ + "csffw_timestamp,csffw_cycle") \ -#define MIPE_HEADER_BLOB_VAR_NAME __obj_desc_header -#define MIPE_HEADER_TP_LIST OBJ_TL_LIST -#define MIPE_HEADER_TP_LIST_COUNT KBASE_OBJ_MSG_COUNT -#define MIPE_HEADER_PKT_CLASS TL_PACKET_CLASS_OBJ +#define MIPE_HEADER_BLOB_VAR_NAME __obj_desc_header +#define MIPE_HEADER_STREAM_ID TL_STREAM_ID_KERNEL +#define MIPE_HEADER_PKT_CLASS TL_PACKET_CLASS_OBJ +#define MIPE_HEADER_TRACEPOINT_LIST OBJ_TP_LIST +#define MIPE_HEADER_TRACEPOINT_LIST_SIZE KBASE_OBJ_MSG_COUNT #include "mali_kbase_mipe_gen_header.h" const char *obj_desc_header = (const char *) &__obj_desc_header; const size_t obj_desc_header_size = sizeof(__obj_desc_header); -#define AUX_TL_LIST \ - TP_DESC(KBASE_AUX_PM_STATE, \ +#define AUX_TP_LIST \ + TRACEPOINT_DESC(KBASE_AUX_PM_STATE, \ "PM state", \ "@IL", \ "core_type,core_state_bitset") \ - TP_DESC(KBASE_AUX_PAGEFAULT, \ + TRACEPOINT_DESC(KBASE_AUX_PAGEFAULT, \ "Page fault", \ "@IIL", \ "ctx_nr,as_nr,page_cnt_change") \ - TP_DESC(KBASE_AUX_PAGESALLOC, \ + TRACEPOINT_DESC(KBASE_AUX_PAGESALLOC, \ "Total alloc pages change", \ "@IL", \ "ctx_nr,page_cnt") \ - TP_DESC(KBASE_AUX_DEVFREQ_TARGET, \ + TRACEPOINT_DESC(KBASE_AUX_DEVFREQ_TARGET, \ "New device frequency target", \ "@L", \ "target_freq") \ - TP_DESC(KBASE_AUX_PROTECTED_ENTER_START, \ + TRACEPOINT_DESC(KBASE_AUX_PROTECTED_ENTER_START, \ "enter protected mode start", \ "@p", \ "gpu") \ - TP_DESC(KBASE_AUX_PROTECTED_ENTER_END, \ + TRACEPOINT_DESC(KBASE_AUX_PROTECTED_ENTER_END, \ "enter protected mode end", \ "@p", \ "gpu") \ - TP_DESC(KBASE_AUX_PROTECTED_LEAVE_START, \ + TRACEPOINT_DESC(KBASE_AUX_PROTECTED_LEAVE_START, \ "leave protected mode start", \ "@p", \ "gpu") \ - TP_DESC(KBASE_AUX_PROTECTED_LEAVE_END, \ + TRACEPOINT_DESC(KBASE_AUX_PROTECTED_LEAVE_END, \ "leave protected mode end", \ "@p", \ "gpu") \ - TP_DESC(KBASE_AUX_JIT_STATS, \ + TRACEPOINT_DESC(KBASE_AUX_JIT_STATS, \ "per-bin JIT statistics", \ "@IIIIII", \ "ctx_nr,bid,max_allocs,allocs,va_pages,ph_pages") \ - TP_DESC(KBASE_AUX_EVENT_JOB_SLOT, \ + TRACEPOINT_DESC(KBASE_AUX_EVENT_JOB_SLOT, \ "event on a given job slot", \ "@pIII", \ "ctx,slot_nr,atom_nr,event") \ -#define MIPE_HEADER_BLOB_VAR_NAME __aux_desc_header -#define MIPE_HEADER_TP_LIST AUX_TL_LIST -#define MIPE_HEADER_TP_LIST_COUNT KBASE_AUX_MSG_COUNT -#define MIPE_HEADER_PKT_CLASS TL_PACKET_CLASS_AUX +#define MIPE_HEADER_BLOB_VAR_NAME __aux_desc_header +#define MIPE_HEADER_STREAM_ID TL_STREAM_ID_KERNEL +#define MIPE_HEADER_PKT_CLASS TL_PACKET_CLASS_AUX +#define MIPE_HEADER_TRACEPOINT_LIST AUX_TP_LIST +#define MIPE_HEADER_TRACEPOINT_LIST_SIZE KBASE_AUX_MSG_COUNT #include "mali_kbase_mipe_gen_header.h" @@ -1446,6 +1493,94 @@ void __kbase_tlstream_tl_event_atom_softjob_end( kbase_tlstream_msgbuf_release(stream, acq_flags); } +void __kbase_tlstream_tl_event_arb_granted( + struct kbase_tlstream *stream, + const void *gpu) +{ + const u32 msg_id = KBASE_TL_EVENT_ARB_GRANTED; + const size_t msg_size = sizeof(msg_id) + sizeof(u64) + + sizeof(gpu) + ; + char *buffer; + unsigned long acq_flags; + size_t pos = 0; + + buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + + pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); + pos = kbasep_serialize_timestamp(buffer, pos); + pos = kbasep_serialize_bytes(buffer, + pos, &gpu, sizeof(gpu)); + + kbase_tlstream_msgbuf_release(stream, acq_flags); +} + +void __kbase_tlstream_tl_event_arb_started( + struct kbase_tlstream *stream, + const void *gpu) +{ + const u32 msg_id = KBASE_TL_EVENT_ARB_STARTED; + const size_t msg_size = sizeof(msg_id) + sizeof(u64) + + sizeof(gpu) + ; + char *buffer; + unsigned long acq_flags; + size_t pos = 0; + + buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + + pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); + pos = kbasep_serialize_timestamp(buffer, pos); + pos = kbasep_serialize_bytes(buffer, + pos, &gpu, sizeof(gpu)); + + kbase_tlstream_msgbuf_release(stream, acq_flags); +} + +void __kbase_tlstream_tl_event_arb_stop_requested( + struct kbase_tlstream *stream, + const void *gpu) +{ + const u32 msg_id = KBASE_TL_EVENT_ARB_STOP_REQUESTED; + const size_t msg_size = sizeof(msg_id) + sizeof(u64) + + sizeof(gpu) + ; + char *buffer; + unsigned long acq_flags; + size_t pos = 0; + + buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + + pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); + pos = kbasep_serialize_timestamp(buffer, pos); + pos = kbasep_serialize_bytes(buffer, + pos, &gpu, sizeof(gpu)); + + kbase_tlstream_msgbuf_release(stream, acq_flags); +} + +void __kbase_tlstream_tl_event_arb_stopped( + struct kbase_tlstream *stream, + const void *gpu) +{ + const u32 msg_id = KBASE_TL_EVENT_ARB_STOPPED; + const size_t msg_size = sizeof(msg_id) + sizeof(u64) + + sizeof(gpu) + ; + char *buffer; + unsigned long acq_flags; + size_t pos = 0; + + buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + + pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); + pos = kbasep_serialize_timestamp(buffer, pos); + pos = kbasep_serialize_bytes(buffer, + pos, &gpu, sizeof(gpu)); + + kbase_tlstream_msgbuf_release(stream, acq_flags); +} + void __kbase_tlstream_jd_gpu_soft_reset( struct kbase_tlstream *stream, const void *gpu) @@ -1736,16 +1871,202 @@ void __kbase_tlstream_aux_event_job_slot( kbase_tlstream_msgbuf_release(stream, acq_flags); } -void __kbase_tlstream_tl_new_kcpuqueue( +void __kbase_tlstream_tl_kbase_new_device( + struct kbase_tlstream *stream, + u32 kbase_device_id, + u32 kbase_device_gpu_core_count, + u32 kbase_device_max_num_csgs, + u32 kbase_device_as_count) +{ + const u32 msg_id = KBASE_TL_KBASE_NEW_DEVICE; + const size_t msg_size = sizeof(msg_id) + sizeof(u64) + + sizeof(kbase_device_id) + + sizeof(kbase_device_gpu_core_count) + + sizeof(kbase_device_max_num_csgs) + + sizeof(kbase_device_as_count) + ; + char *buffer; + unsigned long acq_flags; + size_t pos = 0; + + buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + + pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); + pos = kbasep_serialize_timestamp(buffer, pos); + pos = kbasep_serialize_bytes(buffer, + pos, &kbase_device_id, sizeof(kbase_device_id)); + pos = kbasep_serialize_bytes(buffer, + pos, &kbase_device_gpu_core_count, sizeof(kbase_device_gpu_core_count)); + pos = kbasep_serialize_bytes(buffer, + pos, &kbase_device_max_num_csgs, sizeof(kbase_device_max_num_csgs)); + pos = kbasep_serialize_bytes(buffer, + pos, &kbase_device_as_count, sizeof(kbase_device_as_count)); + + kbase_tlstream_msgbuf_release(stream, acq_flags); +} + +void __kbase_tlstream_tl_kbase_device_program_csg( + struct kbase_tlstream *stream, + u32 kbase_device_id, + u32 gpu_cmdq_grp_handle, + u32 kbase_device_csg_slot_index) +{ + const u32 msg_id = KBASE_TL_KBASE_DEVICE_PROGRAM_CSG; + const size_t msg_size = sizeof(msg_id) + sizeof(u64) + + sizeof(kbase_device_id) + + sizeof(gpu_cmdq_grp_handle) + + sizeof(kbase_device_csg_slot_index) + ; + char *buffer; + unsigned long acq_flags; + size_t pos = 0; + + buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + + pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); + pos = kbasep_serialize_timestamp(buffer, pos); + pos = kbasep_serialize_bytes(buffer, + pos, &kbase_device_id, sizeof(kbase_device_id)); + pos = kbasep_serialize_bytes(buffer, + pos, &gpu_cmdq_grp_handle, sizeof(gpu_cmdq_grp_handle)); + pos = kbasep_serialize_bytes(buffer, + pos, &kbase_device_csg_slot_index, sizeof(kbase_device_csg_slot_index)); + + kbase_tlstream_msgbuf_release(stream, acq_flags); +} + +void __kbase_tlstream_tl_kbase_device_deprogram_csg( + struct kbase_tlstream *stream, + u32 kbase_device_id, + u32 kbase_device_csg_slot_index) +{ + const u32 msg_id = KBASE_TL_KBASE_DEVICE_DEPROGRAM_CSG; + const size_t msg_size = sizeof(msg_id) + sizeof(u64) + + sizeof(kbase_device_id) + + sizeof(kbase_device_csg_slot_index) + ; + char *buffer; + unsigned long acq_flags; + size_t pos = 0; + + buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + + pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); + pos = kbasep_serialize_timestamp(buffer, pos); + pos = kbasep_serialize_bytes(buffer, + pos, &kbase_device_id, sizeof(kbase_device_id)); + pos = kbasep_serialize_bytes(buffer, + pos, &kbase_device_csg_slot_index, sizeof(kbase_device_csg_slot_index)); + + kbase_tlstream_msgbuf_release(stream, acq_flags); +} + +void __kbase_tlstream_tl_kbase_new_ctx( + struct kbase_tlstream *stream, + u32 kernel_ctx_id, + u32 kbase_device_id) +{ + const u32 msg_id = KBASE_TL_KBASE_NEW_CTX; + const size_t msg_size = sizeof(msg_id) + sizeof(u64) + + sizeof(kernel_ctx_id) + + sizeof(kbase_device_id) + ; + char *buffer; + unsigned long acq_flags; + size_t pos = 0; + + buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + + pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); + pos = kbasep_serialize_timestamp(buffer, pos); + pos = kbasep_serialize_bytes(buffer, + pos, &kernel_ctx_id, sizeof(kernel_ctx_id)); + pos = kbasep_serialize_bytes(buffer, + pos, &kbase_device_id, sizeof(kbase_device_id)); + + kbase_tlstream_msgbuf_release(stream, acq_flags); +} + +void __kbase_tlstream_tl_kbase_del_ctx( + struct kbase_tlstream *stream, + u32 kernel_ctx_id) +{ + const u32 msg_id = KBASE_TL_KBASE_DEL_CTX; + const size_t msg_size = sizeof(msg_id) + sizeof(u64) + + sizeof(kernel_ctx_id) + ; + char *buffer; + unsigned long acq_flags; + size_t pos = 0; + + buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + + pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); + pos = kbasep_serialize_timestamp(buffer, pos); + pos = kbasep_serialize_bytes(buffer, + pos, &kernel_ctx_id, sizeof(kernel_ctx_id)); + + kbase_tlstream_msgbuf_release(stream, acq_flags); +} + +void __kbase_tlstream_tl_kbase_ctx_assign_as( + struct kbase_tlstream *stream, + u32 kernel_ctx_id, + u32 kbase_device_as_index) +{ + const u32 msg_id = KBASE_TL_KBASE_CTX_ASSIGN_AS; + const size_t msg_size = sizeof(msg_id) + sizeof(u64) + + sizeof(kernel_ctx_id) + + sizeof(kbase_device_as_index) + ; + char *buffer; + unsigned long acq_flags; + size_t pos = 0; + + buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + + pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); + pos = kbasep_serialize_timestamp(buffer, pos); + pos = kbasep_serialize_bytes(buffer, + pos, &kernel_ctx_id, sizeof(kernel_ctx_id)); + pos = kbasep_serialize_bytes(buffer, + pos, &kbase_device_as_index, sizeof(kbase_device_as_index)); + + kbase_tlstream_msgbuf_release(stream, acq_flags); +} + +void __kbase_tlstream_tl_kbase_ctx_unassign_as( + struct kbase_tlstream *stream, + u32 kernel_ctx_id) +{ + const u32 msg_id = KBASE_TL_KBASE_CTX_UNASSIGN_AS; + const size_t msg_size = sizeof(msg_id) + sizeof(u64) + + sizeof(kernel_ctx_id) + ; + char *buffer; + unsigned long acq_flags; + size_t pos = 0; + + buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + + pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); + pos = kbasep_serialize_timestamp(buffer, pos); + pos = kbasep_serialize_bytes(buffer, + pos, &kernel_ctx_id, sizeof(kernel_ctx_id)); + + kbase_tlstream_msgbuf_release(stream, acq_flags); +} + +void __kbase_tlstream_tl_kbase_new_kcpuqueue( struct kbase_tlstream *stream, const void *kcpu_queue, - const void *ctx, + u32 kernel_ctx_id, u32 kcpuq_num_pending_cmds) { - const u32 msg_id = KBASE_TL_NEW_KCPUQUEUE; + const u32 msg_id = KBASE_TL_KBASE_NEW_KCPUQUEUE; const size_t msg_size = sizeof(msg_id) + sizeof(u64) + sizeof(kcpu_queue) - + sizeof(ctx) + + sizeof(kernel_ctx_id) + sizeof(kcpuq_num_pending_cmds) ; char *buffer; @@ -1759,44 +2080,18 @@ void __kbase_tlstream_tl_new_kcpuqueue( pos = kbasep_serialize_bytes(buffer, pos, &kcpu_queue, sizeof(kcpu_queue)); pos = kbasep_serialize_bytes(buffer, - pos, &ctx, sizeof(ctx)); + pos, &kernel_ctx_id, sizeof(kernel_ctx_id)); pos = kbasep_serialize_bytes(buffer, pos, &kcpuq_num_pending_cmds, sizeof(kcpuq_num_pending_cmds)); kbase_tlstream_msgbuf_release(stream, acq_flags); } -void __kbase_tlstream_tl_ret_kcpuqueue_ctx( - struct kbase_tlstream *stream, - const void *kcpu_queue, - const void *ctx) -{ - const u32 msg_id = KBASE_TL_RET_KCPUQUEUE_CTX; - const size_t msg_size = sizeof(msg_id) + sizeof(u64) - + sizeof(kcpu_queue) - + sizeof(ctx) - ; - char *buffer; - unsigned long acq_flags; - size_t pos = 0; - - buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); - - pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); - pos = kbasep_serialize_timestamp(buffer, pos); - pos = kbasep_serialize_bytes(buffer, - pos, &kcpu_queue, sizeof(kcpu_queue)); - pos = kbasep_serialize_bytes(buffer, - pos, &ctx, sizeof(ctx)); - - kbase_tlstream_msgbuf_release(stream, acq_flags); -} - -void __kbase_tlstream_tl_del_kcpuqueue( +void __kbase_tlstream_tl_kbase_del_kcpuqueue( struct kbase_tlstream *stream, const void *kcpu_queue) { - const u32 msg_id = KBASE_TL_DEL_KCPUQUEUE; + const u32 msg_id = KBASE_TL_KBASE_DEL_KCPUQUEUE; const size_t msg_size = sizeof(msg_id) + sizeof(u64) + sizeof(kcpu_queue) ; @@ -1814,38 +2109,12 @@ void __kbase_tlstream_tl_del_kcpuqueue( kbase_tlstream_msgbuf_release(stream, acq_flags); } -void __kbase_tlstream_tl_nret_kcpuqueue_ctx( - struct kbase_tlstream *stream, - const void *kcpu_queue, - const void *ctx) -{ - const u32 msg_id = KBASE_TL_NRET_KCPUQUEUE_CTX; - const size_t msg_size = sizeof(msg_id) + sizeof(u64) - + sizeof(kcpu_queue) - + sizeof(ctx) - ; - char *buffer; - unsigned long acq_flags; - size_t pos = 0; - - buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); - - pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); - pos = kbasep_serialize_timestamp(buffer, pos); - pos = kbasep_serialize_bytes(buffer, - pos, &kcpu_queue, sizeof(kcpu_queue)); - pos = kbasep_serialize_bytes(buffer, - pos, &ctx, sizeof(ctx)); - - kbase_tlstream_msgbuf_release(stream, acq_flags); -} - -void __kbase_tlstream_tl_event_kcpuqueue_enqueue_fence_signal( +void __kbase_tlstream_tl_kbase_kcpuqueue_enqueue_fence_signal( struct kbase_tlstream *stream, const void *kcpu_queue, const void *fence) { - const u32 msg_id = KBASE_TL_EVENT_KCPUQUEUE_ENQUEUE_FENCE_SIGNAL; + const u32 msg_id = KBASE_TL_KBASE_KCPUQUEUE_ENQUEUE_FENCE_SIGNAL; const size_t msg_size = sizeof(msg_id) + sizeof(u64) + sizeof(kcpu_queue) + sizeof(fence) @@ -1866,12 +2135,12 @@ void __kbase_tlstream_tl_event_kcpuqueue_enqueue_fence_signal( kbase_tlstream_msgbuf_release(stream, acq_flags); } -void __kbase_tlstream_tl_event_kcpuqueue_enqueue_fence_wait( +void __kbase_tlstream_tl_kbase_kcpuqueue_enqueue_fence_wait( struct kbase_tlstream *stream, const void *kcpu_queue, const void *fence) { - const u32 msg_id = KBASE_TL_EVENT_KCPUQUEUE_ENQUEUE_FENCE_WAIT; + const u32 msg_id = KBASE_TL_KBASE_KCPUQUEUE_ENQUEUE_FENCE_WAIT; const size_t msg_size = sizeof(msg_id) + sizeof(u64) + sizeof(kcpu_queue) + sizeof(fence) @@ -1892,11 +2161,11 @@ void __kbase_tlstream_tl_event_kcpuqueue_enqueue_fence_wait( kbase_tlstream_msgbuf_release(stream, acq_flags); } -void __kbase_tlstream_tl_event_array_begin_kcpuqueue_enqueue_cqs_wait( +void __kbase_tlstream_tl_kbase_array_begin_kcpuqueue_enqueue_cqs_wait( struct kbase_tlstream *stream, const void *kcpu_queue) { - const u32 msg_id = KBASE_TL_EVENT_ARRAY_BEGIN_KCPUQUEUE_ENQUEUE_CQS_WAIT; + const u32 msg_id = KBASE_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_ENQUEUE_CQS_WAIT; const size_t msg_size = sizeof(msg_id) + sizeof(u64) + sizeof(kcpu_queue) ; @@ -1914,13 +2183,13 @@ void __kbase_tlstream_tl_event_array_begin_kcpuqueue_enqueue_cqs_wait( kbase_tlstream_msgbuf_release(stream, acq_flags); } -void __kbase_tlstream_tl_event_array_item_kcpuqueue_enqueue_cqs_wait( +void __kbase_tlstream_tl_kbase_array_item_kcpuqueue_enqueue_cqs_wait( struct kbase_tlstream *stream, const void *kcpu_queue, u64 cqs_obj_gpu_addr, u32 cqs_obj_compare_value) { - const u32 msg_id = KBASE_TL_EVENT_ARRAY_ITEM_KCPUQUEUE_ENQUEUE_CQS_WAIT; + const u32 msg_id = KBASE_TL_KBASE_ARRAY_ITEM_KCPUQUEUE_ENQUEUE_CQS_WAIT; const size_t msg_size = sizeof(msg_id) + sizeof(u64) + sizeof(kcpu_queue) + sizeof(cqs_obj_gpu_addr) @@ -1944,11 +2213,11 @@ void __kbase_tlstream_tl_event_array_item_kcpuqueue_enqueue_cqs_wait( kbase_tlstream_msgbuf_release(stream, acq_flags); } -void __kbase_tlstream_tl_event_array_end_kcpuqueue_enqueue_cqs_wait( +void __kbase_tlstream_tl_kbase_array_end_kcpuqueue_enqueue_cqs_wait( struct kbase_tlstream *stream, const void *kcpu_queue) { - const u32 msg_id = KBASE_TL_EVENT_ARRAY_END_KCPUQUEUE_ENQUEUE_CQS_WAIT; + const u32 msg_id = KBASE_TL_KBASE_ARRAY_END_KCPUQUEUE_ENQUEUE_CQS_WAIT; const size_t msg_size = sizeof(msg_id) + sizeof(u64) + sizeof(kcpu_queue) ; @@ -1966,11 +2235,11 @@ void __kbase_tlstream_tl_event_array_end_kcpuqueue_enqueue_cqs_wait( kbase_tlstream_msgbuf_release(stream, acq_flags); } -void __kbase_tlstream_tl_event_array_begin_kcpuqueue_enqueue_cqs_set( +void __kbase_tlstream_tl_kbase_array_begin_kcpuqueue_enqueue_cqs_set( struct kbase_tlstream *stream, const void *kcpu_queue) { - const u32 msg_id = KBASE_TL_EVENT_ARRAY_BEGIN_KCPUQUEUE_ENQUEUE_CQS_SET; + const u32 msg_id = KBASE_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_ENQUEUE_CQS_SET; const size_t msg_size = sizeof(msg_id) + sizeof(u64) + sizeof(kcpu_queue) ; @@ -1988,12 +2257,12 @@ void __kbase_tlstream_tl_event_array_begin_kcpuqueue_enqueue_cqs_set( kbase_tlstream_msgbuf_release(stream, acq_flags); } -void __kbase_tlstream_tl_event_array_item_kcpuqueue_enqueue_cqs_set( +void __kbase_tlstream_tl_kbase_array_item_kcpuqueue_enqueue_cqs_set( struct kbase_tlstream *stream, const void *kcpu_queue, u64 cqs_obj_gpu_addr) { - const u32 msg_id = KBASE_TL_EVENT_ARRAY_ITEM_KCPUQUEUE_ENQUEUE_CQS_SET; + const u32 msg_id = KBASE_TL_KBASE_ARRAY_ITEM_KCPUQUEUE_ENQUEUE_CQS_SET; const size_t msg_size = sizeof(msg_id) + sizeof(u64) + sizeof(kcpu_queue) + sizeof(cqs_obj_gpu_addr) @@ -2014,11 +2283,11 @@ void __kbase_tlstream_tl_event_array_item_kcpuqueue_enqueue_cqs_set( kbase_tlstream_msgbuf_release(stream, acq_flags); } -void __kbase_tlstream_tl_event_array_end_kcpuqueue_enqueue_cqs_set( +void __kbase_tlstream_tl_kbase_array_end_kcpuqueue_enqueue_cqs_set( struct kbase_tlstream *stream, const void *kcpu_queue) { - const u32 msg_id = KBASE_TL_EVENT_ARRAY_END_KCPUQUEUE_ENQUEUE_CQS_SET; + const u32 msg_id = KBASE_TL_KBASE_ARRAY_END_KCPUQUEUE_ENQUEUE_CQS_SET; const size_t msg_size = sizeof(msg_id) + sizeof(u64) + sizeof(kcpu_queue) ; @@ -2036,11 +2305,11 @@ void __kbase_tlstream_tl_event_array_end_kcpuqueue_enqueue_cqs_set( kbase_tlstream_msgbuf_release(stream, acq_flags); } -void __kbase_tlstream_tl_event_array_begin_kcpuqueue_enqueue_debugcopy( +void __kbase_tlstream_tl_kbase_array_begin_kcpuqueue_enqueue_debugcopy( struct kbase_tlstream *stream, const void *kcpu_queue) { - const u32 msg_id = KBASE_TL_EVENT_ARRAY_BEGIN_KCPUQUEUE_ENQUEUE_DEBUGCOPY; + const u32 msg_id = KBASE_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_ENQUEUE_DEBUGCOPY; const size_t msg_size = sizeof(msg_id) + sizeof(u64) + sizeof(kcpu_queue) ; @@ -2058,12 +2327,12 @@ void __kbase_tlstream_tl_event_array_begin_kcpuqueue_enqueue_debugcopy( kbase_tlstream_msgbuf_release(stream, acq_flags); } -void __kbase_tlstream_tl_event_array_item_kcpuqueue_enqueue_debugcopy( +void __kbase_tlstream_tl_kbase_array_item_kcpuqueue_enqueue_debugcopy( struct kbase_tlstream *stream, const void *kcpu_queue, u64 debugcopy_dst_size) { - const u32 msg_id = KBASE_TL_EVENT_ARRAY_ITEM_KCPUQUEUE_ENQUEUE_DEBUGCOPY; + const u32 msg_id = KBASE_TL_KBASE_ARRAY_ITEM_KCPUQUEUE_ENQUEUE_DEBUGCOPY; const size_t msg_size = sizeof(msg_id) + sizeof(u64) + sizeof(kcpu_queue) + sizeof(debugcopy_dst_size) @@ -2084,11 +2353,11 @@ void __kbase_tlstream_tl_event_array_item_kcpuqueue_enqueue_debugcopy( kbase_tlstream_msgbuf_release(stream, acq_flags); } -void __kbase_tlstream_tl_event_array_end_kcpuqueue_enqueue_debugcopy( +void __kbase_tlstream_tl_kbase_array_end_kcpuqueue_enqueue_debugcopy( struct kbase_tlstream *stream, const void *kcpu_queue) { - const u32 msg_id = KBASE_TL_EVENT_ARRAY_END_KCPUQUEUE_ENQUEUE_DEBUGCOPY; + const u32 msg_id = KBASE_TL_KBASE_ARRAY_END_KCPUQUEUE_ENQUEUE_DEBUGCOPY; const size_t msg_size = sizeof(msg_id) + sizeof(u64) + sizeof(kcpu_queue) ; @@ -2106,12 +2375,12 @@ void __kbase_tlstream_tl_event_array_end_kcpuqueue_enqueue_debugcopy( kbase_tlstream_msgbuf_release(stream, acq_flags); } -void __kbase_tlstream_tl_event_kcpuqueue_enqueue_map_import( +void __kbase_tlstream_tl_kbase_kcpuqueue_enqueue_map_import( struct kbase_tlstream *stream, const void *kcpu_queue, u64 map_import_buf_gpu_addr) { - const u32 msg_id = KBASE_TL_EVENT_KCPUQUEUE_ENQUEUE_MAP_IMPORT; + const u32 msg_id = KBASE_TL_KBASE_KCPUQUEUE_ENQUEUE_MAP_IMPORT; const size_t msg_size = sizeof(msg_id) + sizeof(u64) + sizeof(kcpu_queue) + sizeof(map_import_buf_gpu_addr) @@ -2132,12 +2401,12 @@ void __kbase_tlstream_tl_event_kcpuqueue_enqueue_map_import( kbase_tlstream_msgbuf_release(stream, acq_flags); } -void __kbase_tlstream_tl_event_kcpuqueue_enqueue_unmap_import( +void __kbase_tlstream_tl_kbase_kcpuqueue_enqueue_unmap_import( struct kbase_tlstream *stream, const void *kcpu_queue, u64 map_import_buf_gpu_addr) { - const u32 msg_id = KBASE_TL_EVENT_KCPUQUEUE_ENQUEUE_UNMAP_IMPORT; + const u32 msg_id = KBASE_TL_KBASE_KCPUQUEUE_ENQUEUE_UNMAP_IMPORT; const size_t msg_size = sizeof(msg_id) + sizeof(u64) + sizeof(kcpu_queue) + sizeof(map_import_buf_gpu_addr) @@ -2158,12 +2427,12 @@ void __kbase_tlstream_tl_event_kcpuqueue_enqueue_unmap_import( kbase_tlstream_msgbuf_release(stream, acq_flags); } -void __kbase_tlstream_tl_event_kcpuqueue_enqueue_unmap_import_force( +void __kbase_tlstream_tl_kbase_kcpuqueue_enqueue_unmap_import_force( struct kbase_tlstream *stream, const void *kcpu_queue, u64 map_import_buf_gpu_addr) { - const u32 msg_id = KBASE_TL_EVENT_KCPUQUEUE_ENQUEUE_UNMAP_IMPORT_FORCE; + const u32 msg_id = KBASE_TL_KBASE_KCPUQUEUE_ENQUEUE_UNMAP_IMPORT_FORCE; const size_t msg_size = sizeof(msg_id) + sizeof(u64) + sizeof(kcpu_queue) + sizeof(map_import_buf_gpu_addr) @@ -2184,11 +2453,11 @@ void __kbase_tlstream_tl_event_kcpuqueue_enqueue_unmap_import_force( kbase_tlstream_msgbuf_release(stream, acq_flags); } -void __kbase_tlstream_tl_event_array_begin_kcpuqueue_enqueue_jit_alloc( +void __kbase_tlstream_tl_kbase_array_begin_kcpuqueue_enqueue_jit_alloc( struct kbase_tlstream *stream, const void *kcpu_queue) { - const u32 msg_id = KBASE_TL_EVENT_ARRAY_BEGIN_KCPUQUEUE_ENQUEUE_JIT_ALLOC; + const u32 msg_id = KBASE_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_ENQUEUE_JIT_ALLOC; const size_t msg_size = sizeof(msg_id) + sizeof(u64) + sizeof(kcpu_queue) ; @@ -2206,7 +2475,7 @@ void __kbase_tlstream_tl_event_array_begin_kcpuqueue_enqueue_jit_alloc( kbase_tlstream_msgbuf_release(stream, acq_flags); } -void __kbase_tlstream_tl_event_array_item_kcpuqueue_enqueue_jit_alloc( +void __kbase_tlstream_tl_kbase_array_item_kcpuqueue_enqueue_jit_alloc( struct kbase_tlstream *stream, const void *kcpu_queue, u64 jit_alloc_gpu_alloc_addr_dest, @@ -2219,7 +2488,7 @@ void __kbase_tlstream_tl_event_array_item_kcpuqueue_enqueue_jit_alloc( u32 jit_alloc_flags, u32 jit_alloc_usage_id) { - const u32 msg_id = KBASE_TL_EVENT_ARRAY_ITEM_KCPUQUEUE_ENQUEUE_JIT_ALLOC; + const u32 msg_id = KBASE_TL_KBASE_ARRAY_ITEM_KCPUQUEUE_ENQUEUE_JIT_ALLOC; const size_t msg_size = sizeof(msg_id) + sizeof(u64) + sizeof(kcpu_queue) + sizeof(jit_alloc_gpu_alloc_addr_dest) @@ -2264,11 +2533,11 @@ void __kbase_tlstream_tl_event_array_item_kcpuqueue_enqueue_jit_alloc( kbase_tlstream_msgbuf_release(stream, acq_flags); } -void __kbase_tlstream_tl_event_array_end_kcpuqueue_enqueue_jit_alloc( +void __kbase_tlstream_tl_kbase_array_end_kcpuqueue_enqueue_jit_alloc( struct kbase_tlstream *stream, const void *kcpu_queue) { - const u32 msg_id = KBASE_TL_EVENT_ARRAY_END_KCPUQUEUE_ENQUEUE_JIT_ALLOC; + const u32 msg_id = KBASE_TL_KBASE_ARRAY_END_KCPUQUEUE_ENQUEUE_JIT_ALLOC; const size_t msg_size = sizeof(msg_id) + sizeof(u64) + sizeof(kcpu_queue) ; @@ -2286,11 +2555,11 @@ void __kbase_tlstream_tl_event_array_end_kcpuqueue_enqueue_jit_alloc( kbase_tlstream_msgbuf_release(stream, acq_flags); } -void __kbase_tlstream_tl_event_array_begin_kcpuqueue_enqueue_jit_free( +void __kbase_tlstream_tl_kbase_array_begin_kcpuqueue_enqueue_jit_free( struct kbase_tlstream *stream, const void *kcpu_queue) { - const u32 msg_id = KBASE_TL_EVENT_ARRAY_BEGIN_KCPUQUEUE_ENQUEUE_JIT_FREE; + const u32 msg_id = KBASE_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_ENQUEUE_JIT_FREE; const size_t msg_size = sizeof(msg_id) + sizeof(u64) + sizeof(kcpu_queue) ; @@ -2308,12 +2577,12 @@ void __kbase_tlstream_tl_event_array_begin_kcpuqueue_enqueue_jit_free( kbase_tlstream_msgbuf_release(stream, acq_flags); } -void __kbase_tlstream_tl_event_array_item_kcpuqueue_enqueue_jit_free( +void __kbase_tlstream_tl_kbase_array_item_kcpuqueue_enqueue_jit_free( struct kbase_tlstream *stream, const void *kcpu_queue, u32 jit_alloc_jit_id) { - const u32 msg_id = KBASE_TL_EVENT_ARRAY_ITEM_KCPUQUEUE_ENQUEUE_JIT_FREE; + const u32 msg_id = KBASE_TL_KBASE_ARRAY_ITEM_KCPUQUEUE_ENQUEUE_JIT_FREE; const size_t msg_size = sizeof(msg_id) + sizeof(u64) + sizeof(kcpu_queue) + sizeof(jit_alloc_jit_id) @@ -2334,11 +2603,11 @@ void __kbase_tlstream_tl_event_array_item_kcpuqueue_enqueue_jit_free( kbase_tlstream_msgbuf_release(stream, acq_flags); } -void __kbase_tlstream_tl_event_array_end_kcpuqueue_enqueue_jit_free( +void __kbase_tlstream_tl_kbase_array_end_kcpuqueue_enqueue_jit_free( struct kbase_tlstream *stream, const void *kcpu_queue) { - const u32 msg_id = KBASE_TL_EVENT_ARRAY_END_KCPUQUEUE_ENQUEUE_JIT_FREE; + const u32 msg_id = KBASE_TL_KBASE_ARRAY_END_KCPUQUEUE_ENQUEUE_JIT_FREE; const size_t msg_size = sizeof(msg_id) + sizeof(u64) + sizeof(kcpu_queue) ; @@ -2356,11 +2625,11 @@ void __kbase_tlstream_tl_event_array_end_kcpuqueue_enqueue_jit_free( kbase_tlstream_msgbuf_release(stream, acq_flags); } -void __kbase_tlstream_tl_event_kcpuqueue_execute_fence_signal_start( +void __kbase_tlstream_tl_kbase_kcpuqueue_execute_fence_signal_start( struct kbase_tlstream *stream, const void *kcpu_queue) { - const u32 msg_id = KBASE_TL_EVENT_KCPUQUEUE_EXECUTE_FENCE_SIGNAL_START; + const u32 msg_id = KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_FENCE_SIGNAL_START; const size_t msg_size = sizeof(msg_id) + sizeof(u64) + sizeof(kcpu_queue) ; @@ -2378,11 +2647,11 @@ void __kbase_tlstream_tl_event_kcpuqueue_execute_fence_signal_start( kbase_tlstream_msgbuf_release(stream, acq_flags); } -void __kbase_tlstream_tl_event_kcpuqueue_execute_fence_signal_end( +void __kbase_tlstream_tl_kbase_kcpuqueue_execute_fence_signal_end( struct kbase_tlstream *stream, const void *kcpu_queue) { - const u32 msg_id = KBASE_TL_EVENT_KCPUQUEUE_EXECUTE_FENCE_SIGNAL_END; + const u32 msg_id = KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_FENCE_SIGNAL_END; const size_t msg_size = sizeof(msg_id) + sizeof(u64) + sizeof(kcpu_queue) ; @@ -2400,11 +2669,11 @@ void __kbase_tlstream_tl_event_kcpuqueue_execute_fence_signal_end( kbase_tlstream_msgbuf_release(stream, acq_flags); } -void __kbase_tlstream_tl_event_kcpuqueue_execute_fence_wait_start( +void __kbase_tlstream_tl_kbase_kcpuqueue_execute_fence_wait_start( struct kbase_tlstream *stream, const void *kcpu_queue) { - const u32 msg_id = KBASE_TL_EVENT_KCPUQUEUE_EXECUTE_FENCE_WAIT_START; + const u32 msg_id = KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_FENCE_WAIT_START; const size_t msg_size = sizeof(msg_id) + sizeof(u64) + sizeof(kcpu_queue) ; @@ -2422,11 +2691,11 @@ void __kbase_tlstream_tl_event_kcpuqueue_execute_fence_wait_start( kbase_tlstream_msgbuf_release(stream, acq_flags); } -void __kbase_tlstream_tl_event_kcpuqueue_execute_fence_wait_end( +void __kbase_tlstream_tl_kbase_kcpuqueue_execute_fence_wait_end( struct kbase_tlstream *stream, const void *kcpu_queue) { - const u32 msg_id = KBASE_TL_EVENT_KCPUQUEUE_EXECUTE_FENCE_WAIT_END; + const u32 msg_id = KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_FENCE_WAIT_END; const size_t msg_size = sizeof(msg_id) + sizeof(u64) + sizeof(kcpu_queue) ; @@ -2444,11 +2713,11 @@ void __kbase_tlstream_tl_event_kcpuqueue_execute_fence_wait_end( kbase_tlstream_msgbuf_release(stream, acq_flags); } -void __kbase_tlstream_tl_event_kcpuqueue_execute_cqs_wait_start( +void __kbase_tlstream_tl_kbase_kcpuqueue_execute_cqs_wait_start( struct kbase_tlstream *stream, const void *kcpu_queue) { - const u32 msg_id = KBASE_TL_EVENT_KCPUQUEUE_EXECUTE_CQS_WAIT_START; + const u32 msg_id = KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_WAIT_START; const size_t msg_size = sizeof(msg_id) + sizeof(u64) + sizeof(kcpu_queue) ; @@ -2466,11 +2735,11 @@ void __kbase_tlstream_tl_event_kcpuqueue_execute_cqs_wait_start( kbase_tlstream_msgbuf_release(stream, acq_flags); } -void __kbase_tlstream_tl_event_kcpuqueue_execute_cqs_wait_end( +void __kbase_tlstream_tl_kbase_kcpuqueue_execute_cqs_wait_end( struct kbase_tlstream *stream, const void *kcpu_queue) { - const u32 msg_id = KBASE_TL_EVENT_KCPUQUEUE_EXECUTE_CQS_WAIT_END; + const u32 msg_id = KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_WAIT_END; const size_t msg_size = sizeof(msg_id) + sizeof(u64) + sizeof(kcpu_queue) ; @@ -2488,11 +2757,11 @@ void __kbase_tlstream_tl_event_kcpuqueue_execute_cqs_wait_end( kbase_tlstream_msgbuf_release(stream, acq_flags); } -void __kbase_tlstream_tl_event_kcpuqueue_execute_cqs_set_start( +void __kbase_tlstream_tl_kbase_kcpuqueue_execute_cqs_set( struct kbase_tlstream *stream, const void *kcpu_queue) { - const u32 msg_id = KBASE_TL_EVENT_KCPUQUEUE_EXECUTE_CQS_SET_START; + const u32 msg_id = KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_SET; const size_t msg_size = sizeof(msg_id) + sizeof(u64) + sizeof(kcpu_queue) ; @@ -2510,11 +2779,11 @@ void __kbase_tlstream_tl_event_kcpuqueue_execute_cqs_set_start( kbase_tlstream_msgbuf_release(stream, acq_flags); } -void __kbase_tlstream_tl_event_kcpuqueue_execute_cqs_set_end( +void __kbase_tlstream_tl_kbase_kcpuqueue_execute_debugcopy_start( struct kbase_tlstream *stream, const void *kcpu_queue) { - const u32 msg_id = KBASE_TL_EVENT_KCPUQUEUE_EXECUTE_CQS_SET_END; + const u32 msg_id = KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_DEBUGCOPY_START; const size_t msg_size = sizeof(msg_id) + sizeof(u64) + sizeof(kcpu_queue) ; @@ -2532,11 +2801,11 @@ void __kbase_tlstream_tl_event_kcpuqueue_execute_cqs_set_end( kbase_tlstream_msgbuf_release(stream, acq_flags); } -void __kbase_tlstream_tl_event_kcpuqueue_execute_debugcopy_start( +void __kbase_tlstream_tl_kbase_kcpuqueue_execute_debugcopy_end( struct kbase_tlstream *stream, const void *kcpu_queue) { - const u32 msg_id = KBASE_TL_EVENT_KCPUQUEUE_EXECUTE_DEBUGCOPY_START; + const u32 msg_id = KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_DEBUGCOPY_END; const size_t msg_size = sizeof(msg_id) + sizeof(u64) + sizeof(kcpu_queue) ; @@ -2554,11 +2823,11 @@ void __kbase_tlstream_tl_event_kcpuqueue_execute_debugcopy_start( kbase_tlstream_msgbuf_release(stream, acq_flags); } -void __kbase_tlstream_tl_event_kcpuqueue_execute_debugcopy_end( +void __kbase_tlstream_tl_kbase_kcpuqueue_execute_map_import_start( struct kbase_tlstream *stream, const void *kcpu_queue) { - const u32 msg_id = KBASE_TL_EVENT_KCPUQUEUE_EXECUTE_DEBUGCOPY_END; + const u32 msg_id = KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_MAP_IMPORT_START; const size_t msg_size = sizeof(msg_id) + sizeof(u64) + sizeof(kcpu_queue) ; @@ -2576,11 +2845,11 @@ void __kbase_tlstream_tl_event_kcpuqueue_execute_debugcopy_end( kbase_tlstream_msgbuf_release(stream, acq_flags); } -void __kbase_tlstream_tl_event_kcpuqueue_execute_map_import_start( +void __kbase_tlstream_tl_kbase_kcpuqueue_execute_map_import_end( struct kbase_tlstream *stream, const void *kcpu_queue) { - const u32 msg_id = KBASE_TL_EVENT_KCPUQUEUE_EXECUTE_MAP_IMPORT_START; + const u32 msg_id = KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_MAP_IMPORT_END; const size_t msg_size = sizeof(msg_id) + sizeof(u64) + sizeof(kcpu_queue) ; @@ -2598,11 +2867,11 @@ void __kbase_tlstream_tl_event_kcpuqueue_execute_map_import_start( kbase_tlstream_msgbuf_release(stream, acq_flags); } -void __kbase_tlstream_tl_event_kcpuqueue_execute_map_import_end( +void __kbase_tlstream_tl_kbase_kcpuqueue_execute_unmap_import_start( struct kbase_tlstream *stream, const void *kcpu_queue) { - const u32 msg_id = KBASE_TL_EVENT_KCPUQUEUE_EXECUTE_MAP_IMPORT_END; + const u32 msg_id = KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_UNMAP_IMPORT_START; const size_t msg_size = sizeof(msg_id) + sizeof(u64) + sizeof(kcpu_queue) ; @@ -2620,11 +2889,11 @@ void __kbase_tlstream_tl_event_kcpuqueue_execute_map_import_end( kbase_tlstream_msgbuf_release(stream, acq_flags); } -void __kbase_tlstream_tl_event_kcpuqueue_execute_unmap_import_start( +void __kbase_tlstream_tl_kbase_kcpuqueue_execute_unmap_import_end( struct kbase_tlstream *stream, const void *kcpu_queue) { - const u32 msg_id = KBASE_TL_EVENT_KCPUQUEUE_EXECUTE_UNMAP_IMPORT_START; + const u32 msg_id = KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_UNMAP_IMPORT_END; const size_t msg_size = sizeof(msg_id) + sizeof(u64) + sizeof(kcpu_queue) ; @@ -2642,11 +2911,11 @@ void __kbase_tlstream_tl_event_kcpuqueue_execute_unmap_import_start( kbase_tlstream_msgbuf_release(stream, acq_flags); } -void __kbase_tlstream_tl_event_kcpuqueue_execute_unmap_import_end( +void __kbase_tlstream_tl_kbase_kcpuqueue_execute_unmap_import_force_start( struct kbase_tlstream *stream, const void *kcpu_queue) { - const u32 msg_id = KBASE_TL_EVENT_KCPUQUEUE_EXECUTE_UNMAP_IMPORT_END; + const u32 msg_id = KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_UNMAP_IMPORT_FORCE_START; const size_t msg_size = sizeof(msg_id) + sizeof(u64) + sizeof(kcpu_queue) ; @@ -2664,11 +2933,11 @@ void __kbase_tlstream_tl_event_kcpuqueue_execute_unmap_import_end( kbase_tlstream_msgbuf_release(stream, acq_flags); } -void __kbase_tlstream_tl_event_kcpuqueue_execute_unmap_import_force_start( +void __kbase_tlstream_tl_kbase_kcpuqueue_execute_unmap_import_force_end( struct kbase_tlstream *stream, const void *kcpu_queue) { - const u32 msg_id = KBASE_TL_EVENT_KCPUQUEUE_EXECUTE_UNMAP_IMPORT_FORCE_START; + const u32 msg_id = KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_UNMAP_IMPORT_FORCE_END; const size_t msg_size = sizeof(msg_id) + sizeof(u64) + sizeof(kcpu_queue) ; @@ -2686,11 +2955,11 @@ void __kbase_tlstream_tl_event_kcpuqueue_execute_unmap_import_force_start( kbase_tlstream_msgbuf_release(stream, acq_flags); } -void __kbase_tlstream_tl_event_kcpuqueue_execute_unmap_import_force_end( +void __kbase_tlstream_tl_kbase_kcpuqueue_execute_jit_alloc_start( struct kbase_tlstream *stream, const void *kcpu_queue) { - const u32 msg_id = KBASE_TL_EVENT_KCPUQUEUE_EXECUTE_UNMAP_IMPORT_FORCE_END; + const u32 msg_id = KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_JIT_ALLOC_START; const size_t msg_size = sizeof(msg_id) + sizeof(u64) + sizeof(kcpu_queue) ; @@ -2708,11 +2977,11 @@ void __kbase_tlstream_tl_event_kcpuqueue_execute_unmap_import_force_end( kbase_tlstream_msgbuf_release(stream, acq_flags); } -void __kbase_tlstream_tl_event_kcpuqueue_execute_jit_alloc_start( +void __kbase_tlstream_tl_kbase_array_begin_kcpuqueue_execute_jit_alloc_end( struct kbase_tlstream *stream, const void *kcpu_queue) { - const u32 msg_id = KBASE_TL_EVENT_KCPUQUEUE_EXECUTE_JIT_ALLOC_START; + const u32 msg_id = KBASE_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_EXECUTE_JIT_ALLOC_END; const size_t msg_size = sizeof(msg_id) + sizeof(u64) + sizeof(kcpu_queue) ; @@ -2730,35 +2999,13 @@ void __kbase_tlstream_tl_event_kcpuqueue_execute_jit_alloc_start( kbase_tlstream_msgbuf_release(stream, acq_flags); } -void __kbase_tlstream_tl_event_array_begin_kcpuqueue_execute_jit_alloc_end( - struct kbase_tlstream *stream, - const void *kcpu_queue) -{ - const u32 msg_id = KBASE_TL_EVENT_ARRAY_BEGIN_KCPUQUEUE_EXECUTE_JIT_ALLOC_END; - const size_t msg_size = sizeof(msg_id) + sizeof(u64) - + sizeof(kcpu_queue) - ; - char *buffer; - unsigned long acq_flags; - size_t pos = 0; - - buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); - - pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); - pos = kbasep_serialize_timestamp(buffer, pos); - pos = kbasep_serialize_bytes(buffer, - pos, &kcpu_queue, sizeof(kcpu_queue)); - - kbase_tlstream_msgbuf_release(stream, acq_flags); -} - -void __kbase_tlstream_tl_event_array_item_kcpuqueue_execute_jit_alloc_end( +void __kbase_tlstream_tl_kbase_array_item_kcpuqueue_execute_jit_alloc_end( struct kbase_tlstream *stream, const void *kcpu_queue, u64 jit_alloc_gpu_alloc_addr, u64 jit_alloc_mmu_flags) { - const u32 msg_id = KBASE_TL_EVENT_ARRAY_ITEM_KCPUQUEUE_EXECUTE_JIT_ALLOC_END; + const u32 msg_id = KBASE_TL_KBASE_ARRAY_ITEM_KCPUQUEUE_EXECUTE_JIT_ALLOC_END; const size_t msg_size = sizeof(msg_id) + sizeof(u64) + sizeof(kcpu_queue) + sizeof(jit_alloc_gpu_alloc_addr) @@ -2782,11 +3029,11 @@ void __kbase_tlstream_tl_event_array_item_kcpuqueue_execute_jit_alloc_end( kbase_tlstream_msgbuf_release(stream, acq_flags); } -void __kbase_tlstream_tl_event_array_end_kcpuqueue_execute_jit_alloc_end( +void __kbase_tlstream_tl_kbase_array_end_kcpuqueue_execute_jit_alloc_end( struct kbase_tlstream *stream, const void *kcpu_queue) { - const u32 msg_id = KBASE_TL_EVENT_ARRAY_END_KCPUQUEUE_EXECUTE_JIT_ALLOC_END; + const u32 msg_id = KBASE_TL_KBASE_ARRAY_END_KCPUQUEUE_EXECUTE_JIT_ALLOC_END; const size_t msg_size = sizeof(msg_id) + sizeof(u64) + sizeof(kcpu_queue) ; @@ -2804,11 +3051,11 @@ void __kbase_tlstream_tl_event_array_end_kcpuqueue_execute_jit_alloc_end( kbase_tlstream_msgbuf_release(stream, acq_flags); } -void __kbase_tlstream_tl_event_kcpuqueue_execute_jit_free_start( +void __kbase_tlstream_tl_kbase_kcpuqueue_execute_jit_free_start( struct kbase_tlstream *stream, const void *kcpu_queue) { - const u32 msg_id = KBASE_TL_EVENT_KCPUQUEUE_EXECUTE_JIT_FREE_START; + const u32 msg_id = KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_JIT_FREE_START; const size_t msg_size = sizeof(msg_id) + sizeof(u64) + sizeof(kcpu_queue) ; @@ -2826,11 +3073,11 @@ void __kbase_tlstream_tl_event_kcpuqueue_execute_jit_free_start( kbase_tlstream_msgbuf_release(stream, acq_flags); } -void __kbase_tlstream_tl_event_array_begin_kcpuqueue_execute_jit_free_end( +void __kbase_tlstream_tl_kbase_array_begin_kcpuqueue_execute_jit_free_end( struct kbase_tlstream *stream, const void *kcpu_queue) { - const u32 msg_id = KBASE_TL_EVENT_ARRAY_BEGIN_KCPUQUEUE_EXECUTE_JIT_FREE_END; + const u32 msg_id = KBASE_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_EXECUTE_JIT_FREE_END; const size_t msg_size = sizeof(msg_id) + sizeof(u64) + sizeof(kcpu_queue) ; @@ -2848,12 +3095,12 @@ void __kbase_tlstream_tl_event_array_begin_kcpuqueue_execute_jit_free_end( kbase_tlstream_msgbuf_release(stream, acq_flags); } -void __kbase_tlstream_tl_event_array_item_kcpuqueue_execute_jit_free_end( +void __kbase_tlstream_tl_kbase_array_item_kcpuqueue_execute_jit_free_end( struct kbase_tlstream *stream, const void *kcpu_queue, u64 jit_free_pages_used) { - const u32 msg_id = KBASE_TL_EVENT_ARRAY_ITEM_KCPUQUEUE_EXECUTE_JIT_FREE_END; + const u32 msg_id = KBASE_TL_KBASE_ARRAY_ITEM_KCPUQUEUE_EXECUTE_JIT_FREE_END; const size_t msg_size = sizeof(msg_id) + sizeof(u64) + sizeof(kcpu_queue) + sizeof(jit_free_pages_used) @@ -2874,11 +3121,11 @@ void __kbase_tlstream_tl_event_array_item_kcpuqueue_execute_jit_free_end( kbase_tlstream_msgbuf_release(stream, acq_flags); } -void __kbase_tlstream_tl_event_array_end_kcpuqueue_execute_jit_free_end( +void __kbase_tlstream_tl_kbase_array_end_kcpuqueue_execute_jit_free_end( struct kbase_tlstream *stream, const void *kcpu_queue) { - const u32 msg_id = KBASE_TL_EVENT_ARRAY_END_KCPUQUEUE_EXECUTE_JIT_FREE_END; + const u32 msg_id = KBASE_TL_KBASE_ARRAY_END_KCPUQUEUE_EXECUTE_JIT_FREE_END; const size_t msg_size = sizeof(msg_id) + sizeof(u64) + sizeof(kcpu_queue) ; @@ -2896,11 +3143,11 @@ void __kbase_tlstream_tl_event_array_end_kcpuqueue_execute_jit_free_end( kbase_tlstream_msgbuf_release(stream, acq_flags); } -void __kbase_tlstream_tl_event_kcpuqueue_execute_errorbarrier( +void __kbase_tlstream_tl_kbase_kcpuqueue_execute_errorbarrier( struct kbase_tlstream *stream, const void *kcpu_queue) { - const u32 msg_id = KBASE_TL_EVENT_KCPUQUEUE_EXECUTE_ERRORBARRIER; + const u32 msg_id = KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_ERRORBARRIER; const size_t msg_size = sizeof(msg_id) + sizeof(u64) + sizeof(kcpu_queue) ; @@ -2918,4 +3165,30 @@ void __kbase_tlstream_tl_event_kcpuqueue_execute_errorbarrier( kbase_tlstream_msgbuf_release(stream, acq_flags); } +void __kbase_tlstream_tl_kbase_csffw_tlstream_overflow( + struct kbase_tlstream *stream, + u64 csffw_timestamp, + u64 csffw_cycle) +{ + const u32 msg_id = KBASE_TL_KBASE_CSFFW_TLSTREAM_OVERFLOW; + const size_t msg_size = sizeof(msg_id) + sizeof(u64) + + sizeof(csffw_timestamp) + + sizeof(csffw_cycle) + ; + char *buffer; + unsigned long acq_flags; + size_t pos = 0; + + buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + + pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); + pos = kbasep_serialize_timestamp(buffer, pos); + pos = kbasep_serialize_bytes(buffer, + pos, &csffw_timestamp, sizeof(csffw_timestamp)); + pos = kbasep_serialize_bytes(buffer, + pos, &csffw_cycle, sizeof(csffw_cycle)); + + kbase_tlstream_msgbuf_release(stream, acq_flags); +} + /* clang-format on */ diff --git a/drivers/gpu/arm/bifrost/tl/mali_kbase_tracepoints.h b/drivers/gpu/arm/bifrost/tl/mali_kbase_tracepoints.h new file mode 100644 index 000000000000..ef0454386799 --- /dev/null +++ b/drivers/gpu/arm/bifrost/tl/mali_kbase_tracepoints.h @@ -0,0 +1,2381 @@ +/* + * + * (C) COPYRIGHT 2010-2020 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +/* + * THIS FILE IS AUTOGENERATED BY mali_trace_generator.py. + * DO NOT EDIT. + */ + +#if !defined(_KBASE_TRACEPOINTS_H) +#define _KBASE_TRACEPOINTS_H + +/* Tracepoints are abstract callbacks notifying that some important + * software or hardware event has happened. + * + * In this particular implementation, it results into a MIPE + * timeline event and, in some cases, it also fires an ftrace event + * (a.k.a. Gator events, see details below). + */ + +#include "mali_kbase.h" +#include "mali_kbase_gator.h" + +#include +#include + +/* clang-format off */ + +struct kbase_tlstream; + +extern const size_t __obj_stream_offset; +extern const size_t __aux_stream_offset; + +/* This macro dispatches a kbase_tlstream from + * a kbase_device instance. Only AUX or OBJ + * streams can be dispatched. It is aware of + * kbase_timeline binary representation and + * relies on offset variables: + * __obj_stream_offset and __aux_stream_offset. + */ +#define __TL_DISPATCH_STREAM(kbdev, stype) \ + ((struct kbase_tlstream *) \ + ((u8 *)kbdev->timeline + __ ## stype ## _stream_offset)) + +struct tp_desc; + +/* Descriptors of timeline messages transmitted in object events stream. */ +extern const char *obj_desc_header; +extern const size_t obj_desc_header_size; +/* Descriptors of timeline messages transmitted in auxiliary events stream. */ +extern const char *aux_desc_header; +extern const size_t aux_desc_header_size; + +#define TL_ATOM_STATE_IDLE 0 +#define TL_ATOM_STATE_READY 1 +#define TL_ATOM_STATE_DONE 2 +#define TL_ATOM_STATE_POSTED 3 + +#define TL_JS_EVENT_START GATOR_JOB_SLOT_START +#define TL_JS_EVENT_STOP GATOR_JOB_SLOT_STOP +#define TL_JS_EVENT_SOFT_STOP GATOR_JOB_SLOT_SOFT_STOPPED + +#define TLSTREAM_ENABLED (1 << 31) + +void __kbase_tlstream_tl_new_ctx( + struct kbase_tlstream *stream, + const void *ctx, + u32 ctx_nr, + u32 tgid); +void __kbase_tlstream_tl_new_gpu( + struct kbase_tlstream *stream, + const void *gpu, + u32 gpu_id, + u32 core_count); +void __kbase_tlstream_tl_new_lpu( + struct kbase_tlstream *stream, + const void *lpu, + u32 lpu_nr, + u32 lpu_fn); +void __kbase_tlstream_tl_new_atom( + struct kbase_tlstream *stream, + const void *atom, + u32 atom_nr); +void __kbase_tlstream_tl_new_as( + struct kbase_tlstream *stream, + const void *address_space, + u32 as_nr); +void __kbase_tlstream_tl_del_ctx( + struct kbase_tlstream *stream, + const void *ctx); +void __kbase_tlstream_tl_del_atom( + struct kbase_tlstream *stream, + const void *atom); +void __kbase_tlstream_tl_lifelink_lpu_gpu( + struct kbase_tlstream *stream, + const void *lpu, + const void *gpu); +void __kbase_tlstream_tl_lifelink_as_gpu( + struct kbase_tlstream *stream, + const void *address_space, + const void *gpu); +void __kbase_tlstream_tl_ret_ctx_lpu( + struct kbase_tlstream *stream, + const void *ctx, + const void *lpu); +void __kbase_tlstream_tl_ret_atom_ctx( + struct kbase_tlstream *stream, + const void *atom, + const void *ctx); +void __kbase_tlstream_tl_ret_atom_lpu( + struct kbase_tlstream *stream, + const void *atom, + const void *lpu, + const char *attrib_match_list); +void __kbase_tlstream_tl_nret_ctx_lpu( + struct kbase_tlstream *stream, + const void *ctx, + const void *lpu); +void __kbase_tlstream_tl_nret_atom_ctx( + struct kbase_tlstream *stream, + const void *atom, + const void *ctx); +void __kbase_tlstream_tl_nret_atom_lpu( + struct kbase_tlstream *stream, + const void *atom, + const void *lpu); +void __kbase_tlstream_tl_ret_as_ctx( + struct kbase_tlstream *stream, + const void *address_space, + const void *ctx); +void __kbase_tlstream_tl_nret_as_ctx( + struct kbase_tlstream *stream, + const void *address_space, + const void *ctx); +void __kbase_tlstream_tl_ret_atom_as( + struct kbase_tlstream *stream, + const void *atom, + const void *address_space); +void __kbase_tlstream_tl_nret_atom_as( + struct kbase_tlstream *stream, + const void *atom, + const void *address_space); +void __kbase_tlstream_tl_attrib_atom_config( + struct kbase_tlstream *stream, + const void *atom, + u64 descriptor, + u64 affinity, + u32 config); +void __kbase_tlstream_tl_attrib_atom_priority( + struct kbase_tlstream *stream, + const void *atom, + u32 prio); +void __kbase_tlstream_tl_attrib_atom_state( + struct kbase_tlstream *stream, + const void *atom, + u32 state); +void __kbase_tlstream_tl_attrib_atom_prioritized( + struct kbase_tlstream *stream, + const void *atom); +void __kbase_tlstream_tl_attrib_atom_jit( + struct kbase_tlstream *stream, + const void *atom, + u64 edit_addr, + u64 new_addr, + u32 jit_flags, + u64 mem_flags, + u32 j_id, + u64 com_pgs, + u64 extent, + u64 va_pgs); +void __kbase_tlstream_tl_jit_usedpages( + struct kbase_tlstream *stream, + u64 used_pages, + u32 j_id); +void __kbase_tlstream_tl_attrib_atom_jitallocinfo( + struct kbase_tlstream *stream, + const void *atom, + u64 va_pgs, + u64 com_pgs, + u64 extent, + u32 j_id, + u32 bin_id, + u32 max_allocs, + u32 jit_flags, + u32 usg_id); +void __kbase_tlstream_tl_attrib_atom_jitfreeinfo( + struct kbase_tlstream *stream, + const void *atom, + u32 j_id); +void __kbase_tlstream_tl_attrib_as_config( + struct kbase_tlstream *stream, + const void *address_space, + u64 transtab, + u64 memattr, + u64 transcfg); +void __kbase_tlstream_tl_event_lpu_softstop( + struct kbase_tlstream *stream, + const void *lpu); +void __kbase_tlstream_tl_event_atom_softstop_ex( + struct kbase_tlstream *stream, + const void *atom); +void __kbase_tlstream_tl_event_atom_softstop_issue( + struct kbase_tlstream *stream, + const void *atom); +void __kbase_tlstream_tl_event_atom_softjob_start( + struct kbase_tlstream *stream, + const void *atom); +void __kbase_tlstream_tl_event_atom_softjob_end( + struct kbase_tlstream *stream, + const void *atom); +void __kbase_tlstream_tl_event_arb_granted( + struct kbase_tlstream *stream, + const void *gpu); +void __kbase_tlstream_tl_event_arb_started( + struct kbase_tlstream *stream, + const void *gpu); +void __kbase_tlstream_tl_event_arb_stop_requested( + struct kbase_tlstream *stream, + const void *gpu); +void __kbase_tlstream_tl_event_arb_stopped( + struct kbase_tlstream *stream, + const void *gpu); +void __kbase_tlstream_jd_gpu_soft_reset( + struct kbase_tlstream *stream, + const void *gpu); +void __kbase_tlstream_aux_pm_state( + struct kbase_tlstream *stream, + u32 core_type, + u64 core_state_bitset); +void __kbase_tlstream_aux_pagefault( + struct kbase_tlstream *stream, + u32 ctx_nr, + u32 as_nr, + u64 page_cnt_change); +void __kbase_tlstream_aux_pagesalloc( + struct kbase_tlstream *stream, + u32 ctx_nr, + u64 page_cnt); +void __kbase_tlstream_aux_devfreq_target( + struct kbase_tlstream *stream, + u64 target_freq); +void __kbase_tlstream_aux_protected_enter_start( + struct kbase_tlstream *stream, + const void *gpu); +void __kbase_tlstream_aux_protected_enter_end( + struct kbase_tlstream *stream, + const void *gpu); +void __kbase_tlstream_aux_protected_leave_start( + struct kbase_tlstream *stream, + const void *gpu); +void __kbase_tlstream_aux_protected_leave_end( + struct kbase_tlstream *stream, + const void *gpu); +void __kbase_tlstream_aux_jit_stats( + struct kbase_tlstream *stream, + u32 ctx_nr, + u32 bid, + u32 max_allocs, + u32 allocs, + u32 va_pages, + u32 ph_pages); +void __kbase_tlstream_aux_event_job_slot( + struct kbase_tlstream *stream, + const void *ctx, + u32 slot_nr, + u32 atom_nr, + u32 event); +void __kbase_tlstream_tl_kbase_new_device( + struct kbase_tlstream *stream, + u32 kbase_device_id, + u32 kbase_device_gpu_core_count, + u32 kbase_device_max_num_csgs, + u32 kbase_device_as_count); +void __kbase_tlstream_tl_kbase_device_program_csg( + struct kbase_tlstream *stream, + u32 kbase_device_id, + u32 gpu_cmdq_grp_handle, + u32 kbase_device_csg_slot_index); +void __kbase_tlstream_tl_kbase_device_deprogram_csg( + struct kbase_tlstream *stream, + u32 kbase_device_id, + u32 kbase_device_csg_slot_index); +void __kbase_tlstream_tl_kbase_new_ctx( + struct kbase_tlstream *stream, + u32 kernel_ctx_id, + u32 kbase_device_id); +void __kbase_tlstream_tl_kbase_del_ctx( + struct kbase_tlstream *stream, + u32 kernel_ctx_id); +void __kbase_tlstream_tl_kbase_ctx_assign_as( + struct kbase_tlstream *stream, + u32 kernel_ctx_id, + u32 kbase_device_as_index); +void __kbase_tlstream_tl_kbase_ctx_unassign_as( + struct kbase_tlstream *stream, + u32 kernel_ctx_id); +void __kbase_tlstream_tl_kbase_new_kcpuqueue( + struct kbase_tlstream *stream, + const void *kcpu_queue, + u32 kernel_ctx_id, + u32 kcpuq_num_pending_cmds); +void __kbase_tlstream_tl_kbase_del_kcpuqueue( + struct kbase_tlstream *stream, + const void *kcpu_queue); +void __kbase_tlstream_tl_kbase_kcpuqueue_enqueue_fence_signal( + struct kbase_tlstream *stream, + const void *kcpu_queue, + const void *fence); +void __kbase_tlstream_tl_kbase_kcpuqueue_enqueue_fence_wait( + struct kbase_tlstream *stream, + const void *kcpu_queue, + const void *fence); +void __kbase_tlstream_tl_kbase_array_begin_kcpuqueue_enqueue_cqs_wait( + struct kbase_tlstream *stream, + const void *kcpu_queue); +void __kbase_tlstream_tl_kbase_array_item_kcpuqueue_enqueue_cqs_wait( + struct kbase_tlstream *stream, + const void *kcpu_queue, + u64 cqs_obj_gpu_addr, + u32 cqs_obj_compare_value); +void __kbase_tlstream_tl_kbase_array_end_kcpuqueue_enqueue_cqs_wait( + struct kbase_tlstream *stream, + const void *kcpu_queue); +void __kbase_tlstream_tl_kbase_array_begin_kcpuqueue_enqueue_cqs_set( + struct kbase_tlstream *stream, + const void *kcpu_queue); +void __kbase_tlstream_tl_kbase_array_item_kcpuqueue_enqueue_cqs_set( + struct kbase_tlstream *stream, + const void *kcpu_queue, + u64 cqs_obj_gpu_addr); +void __kbase_tlstream_tl_kbase_array_end_kcpuqueue_enqueue_cqs_set( + struct kbase_tlstream *stream, + const void *kcpu_queue); +void __kbase_tlstream_tl_kbase_array_begin_kcpuqueue_enqueue_debugcopy( + struct kbase_tlstream *stream, + const void *kcpu_queue); +void __kbase_tlstream_tl_kbase_array_item_kcpuqueue_enqueue_debugcopy( + struct kbase_tlstream *stream, + const void *kcpu_queue, + u64 debugcopy_dst_size); +void __kbase_tlstream_tl_kbase_array_end_kcpuqueue_enqueue_debugcopy( + struct kbase_tlstream *stream, + const void *kcpu_queue); +void __kbase_tlstream_tl_kbase_kcpuqueue_enqueue_map_import( + struct kbase_tlstream *stream, + const void *kcpu_queue, + u64 map_import_buf_gpu_addr); +void __kbase_tlstream_tl_kbase_kcpuqueue_enqueue_unmap_import( + struct kbase_tlstream *stream, + const void *kcpu_queue, + u64 map_import_buf_gpu_addr); +void __kbase_tlstream_tl_kbase_kcpuqueue_enqueue_unmap_import_force( + struct kbase_tlstream *stream, + const void *kcpu_queue, + u64 map_import_buf_gpu_addr); +void __kbase_tlstream_tl_kbase_array_begin_kcpuqueue_enqueue_jit_alloc( + struct kbase_tlstream *stream, + const void *kcpu_queue); +void __kbase_tlstream_tl_kbase_array_item_kcpuqueue_enqueue_jit_alloc( + struct kbase_tlstream *stream, + const void *kcpu_queue, + u64 jit_alloc_gpu_alloc_addr_dest, + u64 jit_alloc_va_pages, + u64 jit_alloc_commit_pages, + u64 jit_alloc_extent, + u32 jit_alloc_jit_id, + u32 jit_alloc_bin_id, + u32 jit_alloc_max_allocations, + u32 jit_alloc_flags, + u32 jit_alloc_usage_id); +void __kbase_tlstream_tl_kbase_array_end_kcpuqueue_enqueue_jit_alloc( + struct kbase_tlstream *stream, + const void *kcpu_queue); +void __kbase_tlstream_tl_kbase_array_begin_kcpuqueue_enqueue_jit_free( + struct kbase_tlstream *stream, + const void *kcpu_queue); +void __kbase_tlstream_tl_kbase_array_item_kcpuqueue_enqueue_jit_free( + struct kbase_tlstream *stream, + const void *kcpu_queue, + u32 jit_alloc_jit_id); +void __kbase_tlstream_tl_kbase_array_end_kcpuqueue_enqueue_jit_free( + struct kbase_tlstream *stream, + const void *kcpu_queue); +void __kbase_tlstream_tl_kbase_kcpuqueue_execute_fence_signal_start( + struct kbase_tlstream *stream, + const void *kcpu_queue); +void __kbase_tlstream_tl_kbase_kcpuqueue_execute_fence_signal_end( + struct kbase_tlstream *stream, + const void *kcpu_queue); +void __kbase_tlstream_tl_kbase_kcpuqueue_execute_fence_wait_start( + struct kbase_tlstream *stream, + const void *kcpu_queue); +void __kbase_tlstream_tl_kbase_kcpuqueue_execute_fence_wait_end( + struct kbase_tlstream *stream, + const void *kcpu_queue); +void __kbase_tlstream_tl_kbase_kcpuqueue_execute_cqs_wait_start( + struct kbase_tlstream *stream, + const void *kcpu_queue); +void __kbase_tlstream_tl_kbase_kcpuqueue_execute_cqs_wait_end( + struct kbase_tlstream *stream, + const void *kcpu_queue); +void __kbase_tlstream_tl_kbase_kcpuqueue_execute_cqs_set( + struct kbase_tlstream *stream, + const void *kcpu_queue); +void __kbase_tlstream_tl_kbase_kcpuqueue_execute_debugcopy_start( + struct kbase_tlstream *stream, + const void *kcpu_queue); +void __kbase_tlstream_tl_kbase_kcpuqueue_execute_debugcopy_end( + struct kbase_tlstream *stream, + const void *kcpu_queue); +void __kbase_tlstream_tl_kbase_kcpuqueue_execute_map_import_start( + struct kbase_tlstream *stream, + const void *kcpu_queue); +void __kbase_tlstream_tl_kbase_kcpuqueue_execute_map_import_end( + struct kbase_tlstream *stream, + const void *kcpu_queue); +void __kbase_tlstream_tl_kbase_kcpuqueue_execute_unmap_import_start( + struct kbase_tlstream *stream, + const void *kcpu_queue); +void __kbase_tlstream_tl_kbase_kcpuqueue_execute_unmap_import_end( + struct kbase_tlstream *stream, + const void *kcpu_queue); +void __kbase_tlstream_tl_kbase_kcpuqueue_execute_unmap_import_force_start( + struct kbase_tlstream *stream, + const void *kcpu_queue); +void __kbase_tlstream_tl_kbase_kcpuqueue_execute_unmap_import_force_end( + struct kbase_tlstream *stream, + const void *kcpu_queue); +void __kbase_tlstream_tl_kbase_kcpuqueue_execute_jit_alloc_start( + struct kbase_tlstream *stream, + const void *kcpu_queue); +void __kbase_tlstream_tl_kbase_array_begin_kcpuqueue_execute_jit_alloc_end( + struct kbase_tlstream *stream, + const void *kcpu_queue); +void __kbase_tlstream_tl_kbase_array_item_kcpuqueue_execute_jit_alloc_end( + struct kbase_tlstream *stream, + const void *kcpu_queue, + u64 jit_alloc_gpu_alloc_addr, + u64 jit_alloc_mmu_flags); +void __kbase_tlstream_tl_kbase_array_end_kcpuqueue_execute_jit_alloc_end( + struct kbase_tlstream *stream, + const void *kcpu_queue); +void __kbase_tlstream_tl_kbase_kcpuqueue_execute_jit_free_start( + struct kbase_tlstream *stream, + const void *kcpu_queue); +void __kbase_tlstream_tl_kbase_array_begin_kcpuqueue_execute_jit_free_end( + struct kbase_tlstream *stream, + const void *kcpu_queue); +void __kbase_tlstream_tl_kbase_array_item_kcpuqueue_execute_jit_free_end( + struct kbase_tlstream *stream, + const void *kcpu_queue, + u64 jit_free_pages_used); +void __kbase_tlstream_tl_kbase_array_end_kcpuqueue_execute_jit_free_end( + struct kbase_tlstream *stream, + const void *kcpu_queue); +void __kbase_tlstream_tl_kbase_kcpuqueue_execute_errorbarrier( + struct kbase_tlstream *stream, + const void *kcpu_queue); +void __kbase_tlstream_tl_kbase_csffw_tlstream_overflow( + struct kbase_tlstream *stream, + u64 csffw_timestamp, + u64 csffw_cycle); + +struct kbase_tlstream; + +/** + * KBASE_TLSTREAM_TL_NEW_CTX - + * object ctx is created + * + * @kbdev: Kbase device + * @ctx: Name of the context object + * @ctx_nr: Kernel context number + * @tgid: Thread Group Id + */ +#define KBASE_TLSTREAM_TL_NEW_CTX( \ + kbdev, \ + ctx, \ + ctx_nr, \ + tgid \ + ) \ + do { \ + int enabled = atomic_read(&kbdev->timeline_flags); \ + if (enabled & TLSTREAM_ENABLED) \ + __kbase_tlstream_tl_new_ctx( \ + __TL_DISPATCH_STREAM(kbdev, obj), \ + ctx, ctx_nr, tgid); \ + } while (0) + +/** + * KBASE_TLSTREAM_TL_NEW_GPU - + * object gpu is created + * + * @kbdev: Kbase device + * @gpu: Name of the GPU object + * @gpu_id: Name of the GPU object + * @core_count: Number of cores this GPU hosts + */ +#define KBASE_TLSTREAM_TL_NEW_GPU( \ + kbdev, \ + gpu, \ + gpu_id, \ + core_count \ + ) \ + do { \ + int enabled = atomic_read(&kbdev->timeline_flags); \ + if (enabled & TLSTREAM_ENABLED) \ + __kbase_tlstream_tl_new_gpu( \ + __TL_DISPATCH_STREAM(kbdev, obj), \ + gpu, gpu_id, core_count); \ + } while (0) + +/** + * KBASE_TLSTREAM_TL_NEW_LPU - + * object lpu is created + * + * @kbdev: Kbase device + * @lpu: Name of the Logical Processing Unit object + * @lpu_nr: Sequential number assigned to the newly created LPU + * @lpu_fn: Property describing functional abilities of this LPU + */ +#define KBASE_TLSTREAM_TL_NEW_LPU( \ + kbdev, \ + lpu, \ + lpu_nr, \ + lpu_fn \ + ) \ + do { \ + int enabled = atomic_read(&kbdev->timeline_flags); \ + if (enabled & TLSTREAM_ENABLED) \ + __kbase_tlstream_tl_new_lpu( \ + __TL_DISPATCH_STREAM(kbdev, obj), \ + lpu, lpu_nr, lpu_fn); \ + } while (0) + +/** + * KBASE_TLSTREAM_TL_NEW_ATOM - + * object atom is created + * + * @kbdev: Kbase device + * @atom: Atom identifier + * @atom_nr: Sequential number of an atom + */ +#define KBASE_TLSTREAM_TL_NEW_ATOM( \ + kbdev, \ + atom, \ + atom_nr \ + ) \ + do { \ + int enabled = atomic_read(&kbdev->timeline_flags); \ + if (enabled & TLSTREAM_ENABLED) \ + __kbase_tlstream_tl_new_atom( \ + __TL_DISPATCH_STREAM(kbdev, obj), \ + atom, atom_nr); \ + } while (0) + +/** + * KBASE_TLSTREAM_TL_NEW_AS - + * address space object is created + * + * @kbdev: Kbase device + * @address_space: Name of the address space object + * @as_nr: Address space number + */ +#define KBASE_TLSTREAM_TL_NEW_AS( \ + kbdev, \ + address_space, \ + as_nr \ + ) \ + do { \ + int enabled = atomic_read(&kbdev->timeline_flags); \ + if (enabled & TLSTREAM_ENABLED) \ + __kbase_tlstream_tl_new_as( \ + __TL_DISPATCH_STREAM(kbdev, obj), \ + address_space, as_nr); \ + } while (0) + +/** + * KBASE_TLSTREAM_TL_DEL_CTX - + * context is destroyed + * + * @kbdev: Kbase device + * @ctx: Name of the context object + */ +#define KBASE_TLSTREAM_TL_DEL_CTX( \ + kbdev, \ + ctx \ + ) \ + do { \ + int enabled = atomic_read(&kbdev->timeline_flags); \ + if (enabled & TLSTREAM_ENABLED) \ + __kbase_tlstream_tl_del_ctx( \ + __TL_DISPATCH_STREAM(kbdev, obj), \ + ctx); \ + } while (0) + +/** + * KBASE_TLSTREAM_TL_DEL_ATOM - + * atom is destroyed + * + * @kbdev: Kbase device + * @atom: Atom identifier + */ +#define KBASE_TLSTREAM_TL_DEL_ATOM( \ + kbdev, \ + atom \ + ) \ + do { \ + int enabled = atomic_read(&kbdev->timeline_flags); \ + if (enabled & TLSTREAM_ENABLED) \ + __kbase_tlstream_tl_del_atom( \ + __TL_DISPATCH_STREAM(kbdev, obj), \ + atom); \ + } while (0) + +/** + * KBASE_TLSTREAM_TL_LIFELINK_LPU_GPU - + * lpu is deleted with gpu + * + * @kbdev: Kbase device + * @lpu: Name of the Logical Processing Unit object + * @gpu: Name of the GPU object + */ +#define KBASE_TLSTREAM_TL_LIFELINK_LPU_GPU( \ + kbdev, \ + lpu, \ + gpu \ + ) \ + do { \ + int enabled = atomic_read(&kbdev->timeline_flags); \ + if (enabled & TLSTREAM_ENABLED) \ + __kbase_tlstream_tl_lifelink_lpu_gpu( \ + __TL_DISPATCH_STREAM(kbdev, obj), \ + lpu, gpu); \ + } while (0) + +/** + * KBASE_TLSTREAM_TL_LIFELINK_AS_GPU - + * address space is deleted with gpu + * + * @kbdev: Kbase device + * @address_space: Name of the address space object + * @gpu: Name of the GPU object + */ +#define KBASE_TLSTREAM_TL_LIFELINK_AS_GPU( \ + kbdev, \ + address_space, \ + gpu \ + ) \ + do { \ + int enabled = atomic_read(&kbdev->timeline_flags); \ + if (enabled & TLSTREAM_ENABLED) \ + __kbase_tlstream_tl_lifelink_as_gpu( \ + __TL_DISPATCH_STREAM(kbdev, obj), \ + address_space, gpu); \ + } while (0) + +/** + * KBASE_TLSTREAM_TL_RET_CTX_LPU - + * context is retained by lpu + * + * @kbdev: Kbase device + * @ctx: Name of the context object + * @lpu: Name of the Logical Processing Unit object + */ +#define KBASE_TLSTREAM_TL_RET_CTX_LPU( \ + kbdev, \ + ctx, \ + lpu \ + ) \ + do { \ + int enabled = atomic_read(&kbdev->timeline_flags); \ + if (enabled & TLSTREAM_ENABLED) \ + __kbase_tlstream_tl_ret_ctx_lpu( \ + __TL_DISPATCH_STREAM(kbdev, obj), \ + ctx, lpu); \ + } while (0) + +/** + * KBASE_TLSTREAM_TL_RET_ATOM_CTX - + * atom is retained by context + * + * @kbdev: Kbase device + * @atom: Atom identifier + * @ctx: Name of the context object + */ +#define KBASE_TLSTREAM_TL_RET_ATOM_CTX( \ + kbdev, \ + atom, \ + ctx \ + ) \ + do { \ + int enabled = atomic_read(&kbdev->timeline_flags); \ + if (enabled & TLSTREAM_ENABLED) \ + __kbase_tlstream_tl_ret_atom_ctx( \ + __TL_DISPATCH_STREAM(kbdev, obj), \ + atom, ctx); \ + } while (0) + +/** + * KBASE_TLSTREAM_TL_RET_ATOM_LPU - + * atom is retained by lpu + * + * @kbdev: Kbase device + * @atom: Atom identifier + * @lpu: Name of the Logical Processing Unit object + * @attrib_match_list: List containing match operator attributes + */ +#define KBASE_TLSTREAM_TL_RET_ATOM_LPU( \ + kbdev, \ + atom, \ + lpu, \ + attrib_match_list \ + ) \ + do { \ + int enabled = atomic_read(&kbdev->timeline_flags); \ + if (enabled & TLSTREAM_ENABLED) \ + __kbase_tlstream_tl_ret_atom_lpu( \ + __TL_DISPATCH_STREAM(kbdev, obj), \ + atom, lpu, attrib_match_list); \ + } while (0) + +/** + * KBASE_TLSTREAM_TL_NRET_CTX_LPU - + * context is released by lpu + * + * @kbdev: Kbase device + * @ctx: Name of the context object + * @lpu: Name of the Logical Processing Unit object + */ +#define KBASE_TLSTREAM_TL_NRET_CTX_LPU( \ + kbdev, \ + ctx, \ + lpu \ + ) \ + do { \ + int enabled = atomic_read(&kbdev->timeline_flags); \ + if (enabled & TLSTREAM_ENABLED) \ + __kbase_tlstream_tl_nret_ctx_lpu( \ + __TL_DISPATCH_STREAM(kbdev, obj), \ + ctx, lpu); \ + } while (0) + +/** + * KBASE_TLSTREAM_TL_NRET_ATOM_CTX - + * atom is released by context + * + * @kbdev: Kbase device + * @atom: Atom identifier + * @ctx: Name of the context object + */ +#define KBASE_TLSTREAM_TL_NRET_ATOM_CTX( \ + kbdev, \ + atom, \ + ctx \ + ) \ + do { \ + int enabled = atomic_read(&kbdev->timeline_flags); \ + if (enabled & TLSTREAM_ENABLED) \ + __kbase_tlstream_tl_nret_atom_ctx( \ + __TL_DISPATCH_STREAM(kbdev, obj), \ + atom, ctx); \ + } while (0) + +/** + * KBASE_TLSTREAM_TL_NRET_ATOM_LPU - + * atom is released by lpu + * + * @kbdev: Kbase device + * @atom: Atom identifier + * @lpu: Name of the Logical Processing Unit object + */ +#define KBASE_TLSTREAM_TL_NRET_ATOM_LPU( \ + kbdev, \ + atom, \ + lpu \ + ) \ + do { \ + int enabled = atomic_read(&kbdev->timeline_flags); \ + if (enabled & TLSTREAM_ENABLED) \ + __kbase_tlstream_tl_nret_atom_lpu( \ + __TL_DISPATCH_STREAM(kbdev, obj), \ + atom, lpu); \ + } while (0) + +/** + * KBASE_TLSTREAM_TL_RET_AS_CTX - + * address space is retained by context + * + * @kbdev: Kbase device + * @address_space: Name of the address space object + * @ctx: Name of the context object + */ +#define KBASE_TLSTREAM_TL_RET_AS_CTX( \ + kbdev, \ + address_space, \ + ctx \ + ) \ + do { \ + int enabled = atomic_read(&kbdev->timeline_flags); \ + if (enabled & TLSTREAM_ENABLED) \ + __kbase_tlstream_tl_ret_as_ctx( \ + __TL_DISPATCH_STREAM(kbdev, obj), \ + address_space, ctx); \ + } while (0) + +/** + * KBASE_TLSTREAM_TL_NRET_AS_CTX - + * address space is released by context + * + * @kbdev: Kbase device + * @address_space: Name of the address space object + * @ctx: Name of the context object + */ +#define KBASE_TLSTREAM_TL_NRET_AS_CTX( \ + kbdev, \ + address_space, \ + ctx \ + ) \ + do { \ + int enabled = atomic_read(&kbdev->timeline_flags); \ + if (enabled & TLSTREAM_ENABLED) \ + __kbase_tlstream_tl_nret_as_ctx( \ + __TL_DISPATCH_STREAM(kbdev, obj), \ + address_space, ctx); \ + } while (0) + +/** + * KBASE_TLSTREAM_TL_RET_ATOM_AS - + * atom is retained by address space + * + * @kbdev: Kbase device + * @atom: Atom identifier + * @address_space: Name of the address space object + */ +#define KBASE_TLSTREAM_TL_RET_ATOM_AS( \ + kbdev, \ + atom, \ + address_space \ + ) \ + do { \ + int enabled = atomic_read(&kbdev->timeline_flags); \ + if (enabled & TLSTREAM_ENABLED) \ + __kbase_tlstream_tl_ret_atom_as( \ + __TL_DISPATCH_STREAM(kbdev, obj), \ + atom, address_space); \ + } while (0) + +/** + * KBASE_TLSTREAM_TL_NRET_ATOM_AS - + * atom is released by address space + * + * @kbdev: Kbase device + * @atom: Atom identifier + * @address_space: Name of the address space object + */ +#define KBASE_TLSTREAM_TL_NRET_ATOM_AS( \ + kbdev, \ + atom, \ + address_space \ + ) \ + do { \ + int enabled = atomic_read(&kbdev->timeline_flags); \ + if (enabled & TLSTREAM_ENABLED) \ + __kbase_tlstream_tl_nret_atom_as( \ + __TL_DISPATCH_STREAM(kbdev, obj), \ + atom, address_space); \ + } while (0) + +/** + * KBASE_TLSTREAM_TL_ATTRIB_ATOM_CONFIG - + * atom job slot attributes + * + * @kbdev: Kbase device + * @atom: Atom identifier + * @descriptor: Job descriptor address + * @affinity: Job affinity + * @config: Job config + */ +#define KBASE_TLSTREAM_TL_ATTRIB_ATOM_CONFIG( \ + kbdev, \ + atom, \ + descriptor, \ + affinity, \ + config \ + ) \ + do { \ + int enabled = atomic_read(&kbdev->timeline_flags); \ + if (enabled & TLSTREAM_ENABLED) \ + __kbase_tlstream_tl_attrib_atom_config( \ + __TL_DISPATCH_STREAM(kbdev, obj), \ + atom, descriptor, affinity, config); \ + } while (0) + +/** + * KBASE_TLSTREAM_TL_ATTRIB_ATOM_PRIORITY - + * atom priority + * + * @kbdev: Kbase device + * @atom: Atom identifier + * @prio: Atom priority + */ +#define KBASE_TLSTREAM_TL_ATTRIB_ATOM_PRIORITY( \ + kbdev, \ + atom, \ + prio \ + ) \ + do { \ + int enabled = atomic_read(&kbdev->timeline_flags); \ + if (enabled & BASE_TLSTREAM_ENABLE_LATENCY_TRACEPOINTS) \ + __kbase_tlstream_tl_attrib_atom_priority( \ + __TL_DISPATCH_STREAM(kbdev, obj), \ + atom, prio); \ + } while (0) + +/** + * KBASE_TLSTREAM_TL_ATTRIB_ATOM_STATE - + * atom state + * + * @kbdev: Kbase device + * @atom: Atom identifier + * @state: Atom state + */ +#define KBASE_TLSTREAM_TL_ATTRIB_ATOM_STATE( \ + kbdev, \ + atom, \ + state \ + ) \ + do { \ + int enabled = atomic_read(&kbdev->timeline_flags); \ + if (enabled & BASE_TLSTREAM_ENABLE_LATENCY_TRACEPOINTS) \ + __kbase_tlstream_tl_attrib_atom_state( \ + __TL_DISPATCH_STREAM(kbdev, obj), \ + atom, state); \ + } while (0) + +/** + * KBASE_TLSTREAM_TL_ATTRIB_ATOM_PRIORITIZED - + * atom caused priority change + * + * @kbdev: Kbase device + * @atom: Atom identifier + */ +#define KBASE_TLSTREAM_TL_ATTRIB_ATOM_PRIORITIZED( \ + kbdev, \ + atom \ + ) \ + do { \ + int enabled = atomic_read(&kbdev->timeline_flags); \ + if (enabled & BASE_TLSTREAM_ENABLE_LATENCY_TRACEPOINTS) \ + __kbase_tlstream_tl_attrib_atom_prioritized( \ + __TL_DISPATCH_STREAM(kbdev, obj), \ + atom); \ + } while (0) + +/** + * KBASE_TLSTREAM_TL_ATTRIB_ATOM_JIT - + * jit done for atom + * + * @kbdev: Kbase device + * @atom: Atom identifier + * @edit_addr: Address edited by jit + * @new_addr: Address placed into the edited location + * @jit_flags: Flags specifying the special requirements for + * the JIT allocation. + * @mem_flags: Flags defining the properties of a memory region + * @j_id: Unique ID provided by the caller, this is used + * to pair allocation and free requests. + * @com_pgs: The minimum number of physical pages which + * should back the allocation. + * @extent: Granularity of physical pages to grow the + * allocation by during a fault. + * @va_pgs: The minimum number of virtual pages required + */ +#define KBASE_TLSTREAM_TL_ATTRIB_ATOM_JIT( \ + kbdev, \ + atom, \ + edit_addr, \ + new_addr, \ + jit_flags, \ + mem_flags, \ + j_id, \ + com_pgs, \ + extent, \ + va_pgs \ + ) \ + do { \ + int enabled = atomic_read(&kbdev->timeline_flags); \ + if (enabled & BASE_TLSTREAM_JOB_DUMPING_ENABLED) \ + __kbase_tlstream_tl_attrib_atom_jit( \ + __TL_DISPATCH_STREAM(kbdev, obj), \ + atom, edit_addr, new_addr, jit_flags, mem_flags, j_id, com_pgs, extent, va_pgs); \ + } while (0) + +/** + * KBASE_TLSTREAM_TL_JIT_USEDPAGES - + * used pages for jit + * + * @kbdev: Kbase device + * @used_pages: Number of pages used for jit + * @j_id: Unique ID provided by the caller, this is used + * to pair allocation and free requests. + */ +#define KBASE_TLSTREAM_TL_JIT_USEDPAGES( \ + kbdev, \ + used_pages, \ + j_id \ + ) \ + do { \ + int enabled = atomic_read(&kbdev->timeline_flags); \ + if (enabled & TLSTREAM_ENABLED) \ + __kbase_tlstream_tl_jit_usedpages( \ + __TL_DISPATCH_STREAM(kbdev, obj), \ + used_pages, j_id); \ + } while (0) + +/** + * KBASE_TLSTREAM_TL_ATTRIB_ATOM_JITALLOCINFO - + * Information about JIT allocations + * + * @kbdev: Kbase device + * @atom: Atom identifier + * @va_pgs: The minimum number of virtual pages required + * @com_pgs: The minimum number of physical pages which + * should back the allocation. + * @extent: Granularity of physical pages to grow the + * allocation by during a fault. + * @j_id: Unique ID provided by the caller, this is used + * to pair allocation and free requests. + * @bin_id: The JIT allocation bin, used in conjunction with + * max_allocations to limit the number of each + * type of JIT allocation. + * @max_allocs: Maximum allocations allowed in this bin. + * @jit_flags: Flags specifying the special requirements for + * the JIT allocation. + * @usg_id: A hint about which allocation should be reused. + */ +#define KBASE_TLSTREAM_TL_ATTRIB_ATOM_JITALLOCINFO( \ + kbdev, \ + atom, \ + va_pgs, \ + com_pgs, \ + extent, \ + j_id, \ + bin_id, \ + max_allocs, \ + jit_flags, \ + usg_id \ + ) \ + do { \ + int enabled = atomic_read(&kbdev->timeline_flags); \ + if (enabled & TLSTREAM_ENABLED) \ + __kbase_tlstream_tl_attrib_atom_jitallocinfo( \ + __TL_DISPATCH_STREAM(kbdev, obj), \ + atom, va_pgs, com_pgs, extent, j_id, bin_id, max_allocs, jit_flags, usg_id); \ + } while (0) + +/** + * KBASE_TLSTREAM_TL_ATTRIB_ATOM_JITFREEINFO - + * Information about JIT frees + * + * @kbdev: Kbase device + * @atom: Atom identifier + * @j_id: Unique ID provided by the caller, this is used + * to pair allocation and free requests. + */ +#define KBASE_TLSTREAM_TL_ATTRIB_ATOM_JITFREEINFO( \ + kbdev, \ + atom, \ + j_id \ + ) \ + do { \ + int enabled = atomic_read(&kbdev->timeline_flags); \ + if (enabled & TLSTREAM_ENABLED) \ + __kbase_tlstream_tl_attrib_atom_jitfreeinfo( \ + __TL_DISPATCH_STREAM(kbdev, obj), \ + atom, j_id); \ + } while (0) + +/** + * KBASE_TLSTREAM_TL_ATTRIB_AS_CONFIG - + * address space attributes + * + * @kbdev: Kbase device + * @address_space: Name of the address space object + * @transtab: Configuration of the TRANSTAB register + * @memattr: Configuration of the MEMATTR register + * @transcfg: Configuration of the TRANSCFG register (or zero if not present) + */ +#define KBASE_TLSTREAM_TL_ATTRIB_AS_CONFIG( \ + kbdev, \ + address_space, \ + transtab, \ + memattr, \ + transcfg \ + ) \ + do { \ + int enabled = atomic_read(&kbdev->timeline_flags); \ + if (enabled & TLSTREAM_ENABLED) \ + __kbase_tlstream_tl_attrib_as_config( \ + __TL_DISPATCH_STREAM(kbdev, obj), \ + address_space, transtab, memattr, transcfg); \ + } while (0) + +/** + * KBASE_TLSTREAM_TL_EVENT_LPU_SOFTSTOP - + * softstop event on given lpu + * + * @kbdev: Kbase device + * @lpu: Name of the Logical Processing Unit object + */ +#define KBASE_TLSTREAM_TL_EVENT_LPU_SOFTSTOP( \ + kbdev, \ + lpu \ + ) \ + do { \ + int enabled = atomic_read(&kbdev->timeline_flags); \ + if (enabled & TLSTREAM_ENABLED) \ + __kbase_tlstream_tl_event_lpu_softstop( \ + __TL_DISPATCH_STREAM(kbdev, obj), \ + lpu); \ + } while (0) + +/** + * KBASE_TLSTREAM_TL_EVENT_ATOM_SOFTSTOP_EX - + * atom softstopped + * + * @kbdev: Kbase device + * @atom: Atom identifier + */ +#define KBASE_TLSTREAM_TL_EVENT_ATOM_SOFTSTOP_EX( \ + kbdev, \ + atom \ + ) \ + do { \ + int enabled = atomic_read(&kbdev->timeline_flags); \ + if (enabled & TLSTREAM_ENABLED) \ + __kbase_tlstream_tl_event_atom_softstop_ex( \ + __TL_DISPATCH_STREAM(kbdev, obj), \ + atom); \ + } while (0) + +/** + * KBASE_TLSTREAM_TL_EVENT_ATOM_SOFTSTOP_ISSUE - + * atom softstop issued + * + * @kbdev: Kbase device + * @atom: Atom identifier + */ +#define KBASE_TLSTREAM_TL_EVENT_ATOM_SOFTSTOP_ISSUE( \ + kbdev, \ + atom \ + ) \ + do { \ + int enabled = atomic_read(&kbdev->timeline_flags); \ + if (enabled & TLSTREAM_ENABLED) \ + __kbase_tlstream_tl_event_atom_softstop_issue( \ + __TL_DISPATCH_STREAM(kbdev, obj), \ + atom); \ + } while (0) + +/** + * KBASE_TLSTREAM_TL_EVENT_ATOM_SOFTJOB_START - + * atom soft job has started + * + * @kbdev: Kbase device + * @atom: Atom identifier + */ +#define KBASE_TLSTREAM_TL_EVENT_ATOM_SOFTJOB_START( \ + kbdev, \ + atom \ + ) \ + do { \ + int enabled = atomic_read(&kbdev->timeline_flags); \ + if (enabled & TLSTREAM_ENABLED) \ + __kbase_tlstream_tl_event_atom_softjob_start( \ + __TL_DISPATCH_STREAM(kbdev, obj), \ + atom); \ + } while (0) + +/** + * KBASE_TLSTREAM_TL_EVENT_ATOM_SOFTJOB_END - + * atom soft job has completed + * + * @kbdev: Kbase device + * @atom: Atom identifier + */ +#define KBASE_TLSTREAM_TL_EVENT_ATOM_SOFTJOB_END( \ + kbdev, \ + atom \ + ) \ + do { \ + int enabled = atomic_read(&kbdev->timeline_flags); \ + if (enabled & TLSTREAM_ENABLED) \ + __kbase_tlstream_tl_event_atom_softjob_end( \ + __TL_DISPATCH_STREAM(kbdev, obj), \ + atom); \ + } while (0) + +/** + * KBASE_TLSTREAM_TL_EVENT_ARB_GRANTED - + * Arbiter has granted gpu access + * + * @kbdev: Kbase device + * @gpu: Name of the GPU object + */ +#define KBASE_TLSTREAM_TL_EVENT_ARB_GRANTED( \ + kbdev, \ + gpu \ + ) \ + do { \ + int enabled = atomic_read(&kbdev->timeline_flags); \ + if (enabled & TLSTREAM_ENABLED) \ + __kbase_tlstream_tl_event_arb_granted( \ + __TL_DISPATCH_STREAM(kbdev, obj), \ + gpu); \ + } while (0) + +/** + * KBASE_TLSTREAM_TL_EVENT_ARB_STARTED - + * Driver is running again and able to process jobs + * + * @kbdev: Kbase device + * @gpu: Name of the GPU object + */ +#define KBASE_TLSTREAM_TL_EVENT_ARB_STARTED( \ + kbdev, \ + gpu \ + ) \ + do { \ + int enabled = atomic_read(&kbdev->timeline_flags); \ + if (enabled & TLSTREAM_ENABLED) \ + __kbase_tlstream_tl_event_arb_started( \ + __TL_DISPATCH_STREAM(kbdev, obj), \ + gpu); \ + } while (0) + +/** + * KBASE_TLSTREAM_TL_EVENT_ARB_STOP_REQUESTED - + * Arbiter has requested driver to stop using gpu + * + * @kbdev: Kbase device + * @gpu: Name of the GPU object + */ +#define KBASE_TLSTREAM_TL_EVENT_ARB_STOP_REQUESTED( \ + kbdev, \ + gpu \ + ) \ + do { \ + int enabled = atomic_read(&kbdev->timeline_flags); \ + if (enabled & TLSTREAM_ENABLED) \ + __kbase_tlstream_tl_event_arb_stop_requested( \ + __TL_DISPATCH_STREAM(kbdev, obj), \ + gpu); \ + } while (0) + +/** + * KBASE_TLSTREAM_TL_EVENT_ARB_STOPPED - + * Driver has stopped using gpu + * + * @kbdev: Kbase device + * @gpu: Name of the GPU object + */ +#define KBASE_TLSTREAM_TL_EVENT_ARB_STOPPED( \ + kbdev, \ + gpu \ + ) \ + do { \ + int enabled = atomic_read(&kbdev->timeline_flags); \ + if (enabled & TLSTREAM_ENABLED) \ + __kbase_tlstream_tl_event_arb_stopped( \ + __TL_DISPATCH_STREAM(kbdev, obj), \ + gpu); \ + } while (0) + +/** + * KBASE_TLSTREAM_JD_GPU_SOFT_RESET - + * gpu soft reset + * + * @kbdev: Kbase device + * @gpu: Name of the GPU object + */ +#define KBASE_TLSTREAM_JD_GPU_SOFT_RESET( \ + kbdev, \ + gpu \ + ) \ + do { \ + int enabled = atomic_read(&kbdev->timeline_flags); \ + if (enabled & TLSTREAM_ENABLED) \ + __kbase_tlstream_jd_gpu_soft_reset( \ + __TL_DISPATCH_STREAM(kbdev, obj), \ + gpu); \ + } while (0) + +/** + * KBASE_TLSTREAM_AUX_PM_STATE - + * PM state + * + * @kbdev: Kbase device + * @core_type: Core type (shader, tiler, l2 cache, l3 cache) + * @core_state_bitset: 64bits bitmask reporting power state of the cores + * (1-ON, 0-OFF) + */ +#define KBASE_TLSTREAM_AUX_PM_STATE( \ + kbdev, \ + core_type, \ + core_state_bitset \ + ) \ + do { \ + int enabled = atomic_read(&kbdev->timeline_flags); \ + if (enabled & TLSTREAM_ENABLED) \ + __kbase_tlstream_aux_pm_state( \ + __TL_DISPATCH_STREAM(kbdev, aux), \ + core_type, core_state_bitset); \ + } while (0) + +/** + * KBASE_TLSTREAM_AUX_PAGEFAULT - + * Page fault + * + * @kbdev: Kbase device + * @ctx_nr: Kernel context number + * @as_nr: Address space number + * @page_cnt_change: Number of pages to be added + */ +#define KBASE_TLSTREAM_AUX_PAGEFAULT( \ + kbdev, \ + ctx_nr, \ + as_nr, \ + page_cnt_change \ + ) \ + do { \ + int enabled = atomic_read(&kbdev->timeline_flags); \ + if (enabled & TLSTREAM_ENABLED) \ + __kbase_tlstream_aux_pagefault( \ + __TL_DISPATCH_STREAM(kbdev, aux), \ + ctx_nr, as_nr, page_cnt_change); \ + } while (0) + +/** + * KBASE_TLSTREAM_AUX_PAGESALLOC - + * Total alloc pages change + * + * @kbdev: Kbase device + * @ctx_nr: Kernel context number + * @page_cnt: Number of pages used by the context + */ +#define KBASE_TLSTREAM_AUX_PAGESALLOC( \ + kbdev, \ + ctx_nr, \ + page_cnt \ + ) \ + do { \ + int enabled = atomic_read(&kbdev->timeline_flags); \ + if (enabled & TLSTREAM_ENABLED) \ + __kbase_tlstream_aux_pagesalloc( \ + __TL_DISPATCH_STREAM(kbdev, aux), \ + ctx_nr, page_cnt); \ + } while (0) + +/** + * KBASE_TLSTREAM_AUX_DEVFREQ_TARGET - + * New device frequency target + * + * @kbdev: Kbase device + * @target_freq: New target frequency + */ +#define KBASE_TLSTREAM_AUX_DEVFREQ_TARGET( \ + kbdev, \ + target_freq \ + ) \ + do { \ + int enabled = atomic_read(&kbdev->timeline_flags); \ + if (enabled & TLSTREAM_ENABLED) \ + __kbase_tlstream_aux_devfreq_target( \ + __TL_DISPATCH_STREAM(kbdev, aux), \ + target_freq); \ + } while (0) + +/** + * KBASE_TLSTREAM_AUX_PROTECTED_ENTER_START - + * enter protected mode start + * + * @kbdev: Kbase device + * @gpu: Name of the GPU object + */ +#define KBASE_TLSTREAM_AUX_PROTECTED_ENTER_START( \ + kbdev, \ + gpu \ + ) \ + do { \ + int enabled = atomic_read(&kbdev->timeline_flags); \ + if (enabled & BASE_TLSTREAM_ENABLE_LATENCY_TRACEPOINTS) \ + __kbase_tlstream_aux_protected_enter_start( \ + __TL_DISPATCH_STREAM(kbdev, aux), \ + gpu); \ + } while (0) + +/** + * KBASE_TLSTREAM_AUX_PROTECTED_ENTER_END - + * enter protected mode end + * + * @kbdev: Kbase device + * @gpu: Name of the GPU object + */ +#define KBASE_TLSTREAM_AUX_PROTECTED_ENTER_END( \ + kbdev, \ + gpu \ + ) \ + do { \ + int enabled = atomic_read(&kbdev->timeline_flags); \ + if (enabled & BASE_TLSTREAM_ENABLE_LATENCY_TRACEPOINTS) \ + __kbase_tlstream_aux_protected_enter_end( \ + __TL_DISPATCH_STREAM(kbdev, aux), \ + gpu); \ + } while (0) + +/** + * KBASE_TLSTREAM_AUX_PROTECTED_LEAVE_START - + * leave protected mode start + * + * @kbdev: Kbase device + * @gpu: Name of the GPU object + */ +#define KBASE_TLSTREAM_AUX_PROTECTED_LEAVE_START( \ + kbdev, \ + gpu \ + ) \ + do { \ + int enabled = atomic_read(&kbdev->timeline_flags); \ + if (enabled & BASE_TLSTREAM_ENABLE_LATENCY_TRACEPOINTS) \ + __kbase_tlstream_aux_protected_leave_start( \ + __TL_DISPATCH_STREAM(kbdev, aux), \ + gpu); \ + } while (0) + +/** + * KBASE_TLSTREAM_AUX_PROTECTED_LEAVE_END - + * leave protected mode end + * + * @kbdev: Kbase device + * @gpu: Name of the GPU object + */ +#define KBASE_TLSTREAM_AUX_PROTECTED_LEAVE_END( \ + kbdev, \ + gpu \ + ) \ + do { \ + int enabled = atomic_read(&kbdev->timeline_flags); \ + if (enabled & BASE_TLSTREAM_ENABLE_LATENCY_TRACEPOINTS) \ + __kbase_tlstream_aux_protected_leave_end( \ + __TL_DISPATCH_STREAM(kbdev, aux), \ + gpu); \ + } while (0) + +/** + * KBASE_TLSTREAM_AUX_JIT_STATS - + * per-bin JIT statistics + * + * @kbdev: Kbase device + * @ctx_nr: Kernel context number + * @bid: JIT bin id + * @max_allocs: Maximum allocations allowed in this bin. + * @allocs: Number of active allocations in this bin + * @va_pages: Number of virtual pages allocated in this bin + * @ph_pages: Number of physical pages allocated in this bin + */ +#define KBASE_TLSTREAM_AUX_JIT_STATS( \ + kbdev, \ + ctx_nr, \ + bid, \ + max_allocs, \ + allocs, \ + va_pages, \ + ph_pages \ + ) \ + do { \ + int enabled = atomic_read(&kbdev->timeline_flags); \ + if (enabled & TLSTREAM_ENABLED) \ + __kbase_tlstream_aux_jit_stats( \ + __TL_DISPATCH_STREAM(kbdev, aux), \ + ctx_nr, bid, max_allocs, allocs, va_pages, ph_pages); \ + } while (0) + +/** + * KBASE_TLSTREAM_AUX_EVENT_JOB_SLOT - + * event on a given job slot + * + * @kbdev: Kbase device + * @ctx: Name of the context object + * @slot_nr: Job slot number + * @atom_nr: Sequential number of an atom + * @event: Event type. One of TL_JS_EVENT values + */ +#define KBASE_TLSTREAM_AUX_EVENT_JOB_SLOT( \ + kbdev, \ + ctx, \ + slot_nr, \ + atom_nr, \ + event \ + ) \ + do { \ + int enabled = atomic_read(&kbdev->timeline_flags); \ + if (enabled & TLSTREAM_ENABLED) \ + __kbase_tlstream_aux_event_job_slot( \ + __TL_DISPATCH_STREAM(kbdev, aux), \ + ctx, slot_nr, atom_nr, event); \ + } while (0) + +/** + * KBASE_TLSTREAM_TL_KBASE_NEW_DEVICE - + * New KBase Device + * + * @kbdev: Kbase device + * @kbase_device_id: The id of the physical hardware + * @kbase_device_gpu_core_count: The number of gpu cores in the physical hardware + * @kbase_device_max_num_csgs: The max number of CSGs the physical hardware supports + * @kbase_device_as_count: The number of address spaces the physical hardware has available + */ +#define KBASE_TLSTREAM_TL_KBASE_NEW_DEVICE( \ + kbdev, \ + kbase_device_id, \ + kbase_device_gpu_core_count, \ + kbase_device_max_num_csgs, \ + kbase_device_as_count \ + ) \ + do { } while (0) + +/** + * KBASE_TLSTREAM_TL_KBASE_DEVICE_PROGRAM_CSG - + * CSG is programmed to a slot + * + * @kbdev: Kbase device + * @kbase_device_id: The id of the physical hardware + * @gpu_cmdq_grp_handle: GPU Command Queue Group handle which will match userspace + * @kbase_device_csg_slot_index: The index of the slot in the scheduler being programmed + */ +#define KBASE_TLSTREAM_TL_KBASE_DEVICE_PROGRAM_CSG( \ + kbdev, \ + kbase_device_id, \ + gpu_cmdq_grp_handle, \ + kbase_device_csg_slot_index \ + ) \ + do { } while (0) + +/** + * KBASE_TLSTREAM_TL_KBASE_DEVICE_DEPROGRAM_CSG - + * CSG is deprogrammed from a slot + * + * @kbdev: Kbase device + * @kbase_device_id: The id of the physical hardware + * @kbase_device_csg_slot_index: The index of the slot in the scheduler being programmed + */ +#define KBASE_TLSTREAM_TL_KBASE_DEVICE_DEPROGRAM_CSG( \ + kbdev, \ + kbase_device_id, \ + kbase_device_csg_slot_index \ + ) \ + do { } while (0) + +/** + * KBASE_TLSTREAM_TL_KBASE_NEW_CTX - + * New KBase Context + * + * @kbdev: Kbase device + * @kernel_ctx_id: Unique ID for the KBase Context + * @kbase_device_id: The id of the physical hardware + */ +#define KBASE_TLSTREAM_TL_KBASE_NEW_CTX( \ + kbdev, \ + kernel_ctx_id, \ + kbase_device_id \ + ) \ + do { } while (0) + +/** + * KBASE_TLSTREAM_TL_KBASE_DEL_CTX - + * Delete KBase Context + * + * @kbdev: Kbase device + * @kernel_ctx_id: Unique ID for the KBase Context + */ +#define KBASE_TLSTREAM_TL_KBASE_DEL_CTX( \ + kbdev, \ + kernel_ctx_id \ + ) \ + do { } while (0) + +/** + * KBASE_TLSTREAM_TL_KBASE_CTX_ASSIGN_AS - + * Address Space is assigned to a KBase context + * + * @kbdev: Kbase device + * @kernel_ctx_id: Unique ID for the KBase Context + * @kbase_device_as_index: The index of the device address space being assigned + */ +#define KBASE_TLSTREAM_TL_KBASE_CTX_ASSIGN_AS( \ + kbdev, \ + kernel_ctx_id, \ + kbase_device_as_index \ + ) \ + do { } while (0) + +/** + * KBASE_TLSTREAM_TL_KBASE_CTX_UNASSIGN_AS - + * Address Space is unassigned from a KBase context + * + * @kbdev: Kbase device + * @kernel_ctx_id: Unique ID for the KBase Context + */ +#define KBASE_TLSTREAM_TL_KBASE_CTX_UNASSIGN_AS( \ + kbdev, \ + kernel_ctx_id \ + ) \ + do { } while (0) + +/** + * KBASE_TLSTREAM_TL_KBASE_NEW_KCPUQUEUE - + * New KCPU Queue + * + * @kbdev: Kbase device + * @kcpu_queue: KCPU queue + * @kernel_ctx_id: Unique ID for the KBase Context + * @kcpuq_num_pending_cmds: Number of commands already enqueued + * in the KCPU queue + */ +#define KBASE_TLSTREAM_TL_KBASE_NEW_KCPUQUEUE( \ + kbdev, \ + kcpu_queue, \ + kernel_ctx_id, \ + kcpuq_num_pending_cmds \ + ) \ + do { } while (0) + +/** + * KBASE_TLSTREAM_TL_KBASE_DEL_KCPUQUEUE - + * Delete KCPU Queue + * + * @kbdev: Kbase device + * @kcpu_queue: KCPU queue + */ +#define KBASE_TLSTREAM_TL_KBASE_DEL_KCPUQUEUE( \ + kbdev, \ + kcpu_queue \ + ) \ + do { } while (0) + +/** + * KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_FENCE_SIGNAL - + * KCPU Queue enqueues Signal on Fence + * + * @kbdev: Kbase device + * @kcpu_queue: KCPU queue + * @fence: Fence object handle + */ +#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_FENCE_SIGNAL( \ + kbdev, \ + kcpu_queue, \ + fence \ + ) \ + do { } while (0) + +/** + * KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_FENCE_WAIT - + * KCPU Queue enqueues Wait on Fence + * + * @kbdev: Kbase device + * @kcpu_queue: KCPU queue + * @fence: Fence object handle + */ +#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_FENCE_WAIT( \ + kbdev, \ + kcpu_queue, \ + fence \ + ) \ + do { } while (0) + +/** + * KBASE_TLSTREAM_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_ENQUEUE_CQS_WAIT - + * Begin array of KCPU Queue enqueues Wait on Cross Queue Sync Object + * + * @kbdev: Kbase device + * @kcpu_queue: KCPU queue + */ +#define KBASE_TLSTREAM_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_ENQUEUE_CQS_WAIT( \ + kbdev, \ + kcpu_queue \ + ) \ + do { } while (0) + +/** + * KBASE_TLSTREAM_TL_KBASE_ARRAY_ITEM_KCPUQUEUE_ENQUEUE_CQS_WAIT - + * Array item of KCPU Queue enqueues Wait on Cross Queue Sync Object + * + * @kbdev: Kbase device + * @kcpu_queue: KCPU queue + * @cqs_obj_gpu_addr: CQS Object GPU ptr + * @cqs_obj_compare_value: Semaphore value that should be exceeded + * for the WAIT to pass + */ +#define KBASE_TLSTREAM_TL_KBASE_ARRAY_ITEM_KCPUQUEUE_ENQUEUE_CQS_WAIT( \ + kbdev, \ + kcpu_queue, \ + cqs_obj_gpu_addr, \ + cqs_obj_compare_value \ + ) \ + do { } while (0) + +/** + * KBASE_TLSTREAM_TL_KBASE_ARRAY_END_KCPUQUEUE_ENQUEUE_CQS_WAIT - + * End array of KCPU Queue enqueues Wait on Cross Queue Sync Object + * + * @kbdev: Kbase device + * @kcpu_queue: KCPU queue + */ +#define KBASE_TLSTREAM_TL_KBASE_ARRAY_END_KCPUQUEUE_ENQUEUE_CQS_WAIT( \ + kbdev, \ + kcpu_queue \ + ) \ + do { } while (0) + +/** + * KBASE_TLSTREAM_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_ENQUEUE_CQS_SET - + * Begin array of KCPU Queue enqueues Set on Cross Queue Sync Object + * + * @kbdev: Kbase device + * @kcpu_queue: KCPU queue + */ +#define KBASE_TLSTREAM_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_ENQUEUE_CQS_SET( \ + kbdev, \ + kcpu_queue \ + ) \ + do { } while (0) + +/** + * KBASE_TLSTREAM_TL_KBASE_ARRAY_ITEM_KCPUQUEUE_ENQUEUE_CQS_SET - + * Array item of KCPU Queue enqueues Set on Cross Queue Sync Object + * + * @kbdev: Kbase device + * @kcpu_queue: KCPU queue + * @cqs_obj_gpu_addr: CQS Object GPU ptr + */ +#define KBASE_TLSTREAM_TL_KBASE_ARRAY_ITEM_KCPUQUEUE_ENQUEUE_CQS_SET( \ + kbdev, \ + kcpu_queue, \ + cqs_obj_gpu_addr \ + ) \ + do { } while (0) + +/** + * KBASE_TLSTREAM_TL_KBASE_ARRAY_END_KCPUQUEUE_ENQUEUE_CQS_SET - + * End array of KCPU Queue enqueues Set on Cross Queue Sync Object + * + * @kbdev: Kbase device + * @kcpu_queue: KCPU queue + */ +#define KBASE_TLSTREAM_TL_KBASE_ARRAY_END_KCPUQUEUE_ENQUEUE_CQS_SET( \ + kbdev, \ + kcpu_queue \ + ) \ + do { } while (0) + +/** + * KBASE_TLSTREAM_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_ENQUEUE_DEBUGCOPY - + * Begin array of KCPU Queue enqueues Debug Copy + * + * @kbdev: Kbase device + * @kcpu_queue: KCPU queue + */ +#define KBASE_TLSTREAM_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_ENQUEUE_DEBUGCOPY( \ + kbdev, \ + kcpu_queue \ + ) \ + do { } while (0) + +/** + * KBASE_TLSTREAM_TL_KBASE_ARRAY_ITEM_KCPUQUEUE_ENQUEUE_DEBUGCOPY - + * Array item of KCPU Queue enqueues Debug Copy + * + * @kbdev: Kbase device + * @kcpu_queue: KCPU queue + * @debugcopy_dst_size: Debug Copy destination size + */ +#define KBASE_TLSTREAM_TL_KBASE_ARRAY_ITEM_KCPUQUEUE_ENQUEUE_DEBUGCOPY( \ + kbdev, \ + kcpu_queue, \ + debugcopy_dst_size \ + ) \ + do { } while (0) + +/** + * KBASE_TLSTREAM_TL_KBASE_ARRAY_END_KCPUQUEUE_ENQUEUE_DEBUGCOPY - + * End array of KCPU Queue enqueues Debug Copy + * + * @kbdev: Kbase device + * @kcpu_queue: KCPU queue + */ +#define KBASE_TLSTREAM_TL_KBASE_ARRAY_END_KCPUQUEUE_ENQUEUE_DEBUGCOPY( \ + kbdev, \ + kcpu_queue \ + ) \ + do { } while (0) + +/** + * KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_MAP_IMPORT - + * KCPU Queue enqueues Map Import + * + * @kbdev: Kbase device + * @kcpu_queue: KCPU queue + * @map_import_buf_gpu_addr: Map import buffer GPU ptr + */ +#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_MAP_IMPORT( \ + kbdev, \ + kcpu_queue, \ + map_import_buf_gpu_addr \ + ) \ + do { } while (0) + +/** + * KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_UNMAP_IMPORT - + * KCPU Queue enqueues Unmap Import + * + * @kbdev: Kbase device + * @kcpu_queue: KCPU queue + * @map_import_buf_gpu_addr: Map import buffer GPU ptr + */ +#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_UNMAP_IMPORT( \ + kbdev, \ + kcpu_queue, \ + map_import_buf_gpu_addr \ + ) \ + do { } while (0) + +/** + * KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_UNMAP_IMPORT_FORCE - + * KCPU Queue enqueues Unmap Import ignoring reference count + * + * @kbdev: Kbase device + * @kcpu_queue: KCPU queue + * @map_import_buf_gpu_addr: Map import buffer GPU ptr + */ +#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_UNMAP_IMPORT_FORCE( \ + kbdev, \ + kcpu_queue, \ + map_import_buf_gpu_addr \ + ) \ + do { } while (0) + +/** + * KBASE_TLSTREAM_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_ENQUEUE_JIT_ALLOC - + * Begin array of KCPU Queue enqueues JIT Alloc + * + * @kbdev: Kbase device + * @kcpu_queue: KCPU queue + */ +#define KBASE_TLSTREAM_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_ENQUEUE_JIT_ALLOC( \ + kbdev, \ + kcpu_queue \ + ) \ + do { } while (0) + +/** + * KBASE_TLSTREAM_TL_KBASE_ARRAY_ITEM_KCPUQUEUE_ENQUEUE_JIT_ALLOC - + * Array item of KCPU Queue enqueues JIT Alloc + * + * @kbdev: Kbase device + * @kcpu_queue: KCPU queue + * @jit_alloc_gpu_alloc_addr_dest: The GPU virtual address to write + * the JIT allocated GPU virtual address to + * @jit_alloc_va_pages: The minimum number of virtual pages required + * @jit_alloc_commit_pages: The minimum number of physical pages which + * should back the allocation + * @jit_alloc_extent: Granularity of physical pages to grow the allocation + * by during a fault + * @jit_alloc_jit_id: Unique ID provided by the caller, this is used + * to pair allocation and free requests. Zero is not a valid value + * @jit_alloc_bin_id: The JIT allocation bin, used in conjunction with + * max_allocations to limit the number of each type of JIT allocation + * @jit_alloc_max_allocations: The maximum number of allocations + * allowed within the bin specified by bin_id. Should be the same for all + * JIT allocations within the same bin. + * @jit_alloc_flags: Flags specifying the special requirements for the + * JIT allocation + * @jit_alloc_usage_id: A hint about which allocation should be + * reused. The kernel should attempt to use a previous allocation with the same + * usage_id + */ +#define KBASE_TLSTREAM_TL_KBASE_ARRAY_ITEM_KCPUQUEUE_ENQUEUE_JIT_ALLOC( \ + kbdev, \ + kcpu_queue, \ + jit_alloc_gpu_alloc_addr_dest, \ + jit_alloc_va_pages, \ + jit_alloc_commit_pages, \ + jit_alloc_extent, \ + jit_alloc_jit_id, \ + jit_alloc_bin_id, \ + jit_alloc_max_allocations, \ + jit_alloc_flags, \ + jit_alloc_usage_id \ + ) \ + do { } while (0) + +/** + * KBASE_TLSTREAM_TL_KBASE_ARRAY_END_KCPUQUEUE_ENQUEUE_JIT_ALLOC - + * End array of KCPU Queue enqueues JIT Alloc + * + * @kbdev: Kbase device + * @kcpu_queue: KCPU queue + */ +#define KBASE_TLSTREAM_TL_KBASE_ARRAY_END_KCPUQUEUE_ENQUEUE_JIT_ALLOC( \ + kbdev, \ + kcpu_queue \ + ) \ + do { } while (0) + +/** + * KBASE_TLSTREAM_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_ENQUEUE_JIT_FREE - + * Begin array of KCPU Queue enqueues JIT Free + * + * @kbdev: Kbase device + * @kcpu_queue: KCPU queue + */ +#define KBASE_TLSTREAM_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_ENQUEUE_JIT_FREE( \ + kbdev, \ + kcpu_queue \ + ) \ + do { } while (0) + +/** + * KBASE_TLSTREAM_TL_KBASE_ARRAY_ITEM_KCPUQUEUE_ENQUEUE_JIT_FREE - + * Array item of KCPU Queue enqueues JIT Free + * + * @kbdev: Kbase device + * @kcpu_queue: KCPU queue + * @jit_alloc_jit_id: Unique ID provided by the caller, this is used + * to pair allocation and free requests. Zero is not a valid value + */ +#define KBASE_TLSTREAM_TL_KBASE_ARRAY_ITEM_KCPUQUEUE_ENQUEUE_JIT_FREE( \ + kbdev, \ + kcpu_queue, \ + jit_alloc_jit_id \ + ) \ + do { } while (0) + +/** + * KBASE_TLSTREAM_TL_KBASE_ARRAY_END_KCPUQUEUE_ENQUEUE_JIT_FREE - + * End array of KCPU Queue enqueues JIT Free + * + * @kbdev: Kbase device + * @kcpu_queue: KCPU queue + */ +#define KBASE_TLSTREAM_TL_KBASE_ARRAY_END_KCPUQUEUE_ENQUEUE_JIT_FREE( \ + kbdev, \ + kcpu_queue \ + ) \ + do { } while (0) + +/** + * KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_FENCE_SIGNAL_START - + * KCPU Queue starts a Signal on Fence + * + * @kbdev: Kbase device + * @kcpu_queue: KCPU queue + */ +#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_FENCE_SIGNAL_START( \ + kbdev, \ + kcpu_queue \ + ) \ + do { } while (0) + +/** + * KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_FENCE_SIGNAL_END - + * KCPU Queue ends a Signal on Fence + * + * @kbdev: Kbase device + * @kcpu_queue: KCPU queue + */ +#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_FENCE_SIGNAL_END( \ + kbdev, \ + kcpu_queue \ + ) \ + do { } while (0) + +/** + * KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_FENCE_WAIT_START - + * KCPU Queue starts a Wait on Fence + * + * @kbdev: Kbase device + * @kcpu_queue: KCPU queue + */ +#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_FENCE_WAIT_START( \ + kbdev, \ + kcpu_queue \ + ) \ + do { } while (0) + +/** + * KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_FENCE_WAIT_END - + * KCPU Queue ends a Wait on Fence + * + * @kbdev: Kbase device + * @kcpu_queue: KCPU queue + */ +#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_FENCE_WAIT_END( \ + kbdev, \ + kcpu_queue \ + ) \ + do { } while (0) + +/** + * KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_WAIT_START - + * KCPU Queue starts a Wait on an array of Cross Queue Sync Objects + * + * @kbdev: Kbase device + * @kcpu_queue: KCPU queue + */ +#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_WAIT_START( \ + kbdev, \ + kcpu_queue \ + ) \ + do { } while (0) + +/** + * KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_WAIT_END - + * KCPU Queue ends a Wait on an array of Cross Queue Sync Objects + * + * @kbdev: Kbase device + * @kcpu_queue: KCPU queue + */ +#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_WAIT_END( \ + kbdev, \ + kcpu_queue \ + ) \ + do { } while (0) + +/** + * KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_SET - + * KCPU Queue executes a Set on an array of Cross Queue Sync Objects + * + * @kbdev: Kbase device + * @kcpu_queue: KCPU queue + */ +#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_SET( \ + kbdev, \ + kcpu_queue \ + ) \ + do { } while (0) + +/** + * KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_DEBUGCOPY_START - + * KCPU Queue starts an array of Debug Copys + * + * @kbdev: Kbase device + * @kcpu_queue: KCPU queue + */ +#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_DEBUGCOPY_START( \ + kbdev, \ + kcpu_queue \ + ) \ + do { } while (0) + +/** + * KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_DEBUGCOPY_END - + * KCPU Queue ends an array of Debug Copys + * + * @kbdev: Kbase device + * @kcpu_queue: KCPU queue + */ +#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_DEBUGCOPY_END( \ + kbdev, \ + kcpu_queue \ + ) \ + do { } while (0) + +/** + * KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_MAP_IMPORT_START - + * KCPU Queue starts a Map Import + * + * @kbdev: Kbase device + * @kcpu_queue: KCPU queue + */ +#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_MAP_IMPORT_START( \ + kbdev, \ + kcpu_queue \ + ) \ + do { } while (0) + +/** + * KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_MAP_IMPORT_END - + * KCPU Queue ends a Map Import + * + * @kbdev: Kbase device + * @kcpu_queue: KCPU queue + */ +#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_MAP_IMPORT_END( \ + kbdev, \ + kcpu_queue \ + ) \ + do { } while (0) + +/** + * KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_UNMAP_IMPORT_START - + * KCPU Queue starts an Unmap Import + * + * @kbdev: Kbase device + * @kcpu_queue: KCPU queue + */ +#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_UNMAP_IMPORT_START( \ + kbdev, \ + kcpu_queue \ + ) \ + do { } while (0) + +/** + * KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_UNMAP_IMPORT_END - + * KCPU Queue ends an Unmap Import + * + * @kbdev: Kbase device + * @kcpu_queue: KCPU queue + */ +#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_UNMAP_IMPORT_END( \ + kbdev, \ + kcpu_queue \ + ) \ + do { } while (0) + +/** + * KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_UNMAP_IMPORT_FORCE_START - + * KCPU Queue starts an Unmap Import ignoring reference count + * + * @kbdev: Kbase device + * @kcpu_queue: KCPU queue + */ +#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_UNMAP_IMPORT_FORCE_START( \ + kbdev, \ + kcpu_queue \ + ) \ + do { } while (0) + +/** + * KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_UNMAP_IMPORT_FORCE_END - + * KCPU Queue ends an Unmap Import ignoring reference count + * + * @kbdev: Kbase device + * @kcpu_queue: KCPU queue + */ +#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_UNMAP_IMPORT_FORCE_END( \ + kbdev, \ + kcpu_queue \ + ) \ + do { } while (0) + +/** + * KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_JIT_ALLOC_START - + * KCPU Queue starts an array of JIT Allocs + * + * @kbdev: Kbase device + * @kcpu_queue: KCPU queue + */ +#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_JIT_ALLOC_START( \ + kbdev, \ + kcpu_queue \ + ) \ + do { } while (0) + +/** + * KBASE_TLSTREAM_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_EXECUTE_JIT_ALLOC_END - + * Begin array of KCPU Queue ends an array of JIT Allocs + * + * @kbdev: Kbase device + * @kcpu_queue: KCPU queue + */ +#define KBASE_TLSTREAM_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_EXECUTE_JIT_ALLOC_END( \ + kbdev, \ + kcpu_queue \ + ) \ + do { } while (0) + +/** + * KBASE_TLSTREAM_TL_KBASE_ARRAY_ITEM_KCPUQUEUE_EXECUTE_JIT_ALLOC_END - + * Array item of KCPU Queue ends an array of JIT Allocs + * + * @kbdev: Kbase device + * @kcpu_queue: KCPU queue + * @jit_alloc_gpu_alloc_addr: The JIT allocated GPU virtual address + * @jit_alloc_mmu_flags: The MMU flags for the JIT allocation + */ +#define KBASE_TLSTREAM_TL_KBASE_ARRAY_ITEM_KCPUQUEUE_EXECUTE_JIT_ALLOC_END( \ + kbdev, \ + kcpu_queue, \ + jit_alloc_gpu_alloc_addr, \ + jit_alloc_mmu_flags \ + ) \ + do { } while (0) + +/** + * KBASE_TLSTREAM_TL_KBASE_ARRAY_END_KCPUQUEUE_EXECUTE_JIT_ALLOC_END - + * End array of KCPU Queue ends an array of JIT Allocs + * + * @kbdev: Kbase device + * @kcpu_queue: KCPU queue + */ +#define KBASE_TLSTREAM_TL_KBASE_ARRAY_END_KCPUQUEUE_EXECUTE_JIT_ALLOC_END( \ + kbdev, \ + kcpu_queue \ + ) \ + do { } while (0) + +/** + * KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_JIT_FREE_START - + * KCPU Queue starts an array of JIT Frees + * + * @kbdev: Kbase device + * @kcpu_queue: KCPU queue + */ +#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_JIT_FREE_START( \ + kbdev, \ + kcpu_queue \ + ) \ + do { } while (0) + +/** + * KBASE_TLSTREAM_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_EXECUTE_JIT_FREE_END - + * Begin array of KCPU Queue ends an array of JIT Frees + * + * @kbdev: Kbase device + * @kcpu_queue: KCPU queue + */ +#define KBASE_TLSTREAM_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_EXECUTE_JIT_FREE_END( \ + kbdev, \ + kcpu_queue \ + ) \ + do { } while (0) + +/** + * KBASE_TLSTREAM_TL_KBASE_ARRAY_ITEM_KCPUQUEUE_EXECUTE_JIT_FREE_END - + * Array item of KCPU Queue ends an array of JIT Frees + * + * @kbdev: Kbase device + * @kcpu_queue: KCPU queue + * @jit_free_pages_used: The actual number of pages used by the JIT + * allocation + */ +#define KBASE_TLSTREAM_TL_KBASE_ARRAY_ITEM_KCPUQUEUE_EXECUTE_JIT_FREE_END( \ + kbdev, \ + kcpu_queue, \ + jit_free_pages_used \ + ) \ + do { } while (0) + +/** + * KBASE_TLSTREAM_TL_KBASE_ARRAY_END_KCPUQUEUE_EXECUTE_JIT_FREE_END - + * End array of KCPU Queue ends an array of JIT Frees + * + * @kbdev: Kbase device + * @kcpu_queue: KCPU queue + */ +#define KBASE_TLSTREAM_TL_KBASE_ARRAY_END_KCPUQUEUE_EXECUTE_JIT_FREE_END( \ + kbdev, \ + kcpu_queue \ + ) \ + do { } while (0) + +/** + * KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_ERRORBARRIER - + * KCPU Queue executes an Error Barrier + * + * @kbdev: Kbase device + * @kcpu_queue: KCPU queue + */ +#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_ERRORBARRIER( \ + kbdev, \ + kcpu_queue \ + ) \ + do { } while (0) + +/** + * KBASE_TLSTREAM_TL_KBASE_CSFFW_TLSTREAM_OVERFLOW - + * An overflow has happened with the CSFFW Timeline stream + * + * @kbdev: Kbase device + * @csffw_timestamp: Timestamp of a CSFFW event + * @csffw_cycle: Cycle number of a CSFFW event + */ +#define KBASE_TLSTREAM_TL_KBASE_CSFFW_TLSTREAM_OVERFLOW( \ + kbdev, \ + csffw_timestamp, \ + csffw_cycle \ + ) \ + do { } while (0) + + +/* Gator tracepoints are hooked into TLSTREAM interface. + * When the following tracepoints are called, corresponding + * Gator tracepoint will be called as well. + */ + +#if defined(CONFIG_MALI_BIFROST_GATOR_SUPPORT) +/* `event` is one of TL_JS_EVENT values here. + * The values of TL_JS_EVENT are guaranteed to match + * with corresponding GATOR_JOB_SLOT values. + */ +#undef KBASE_TLSTREAM_AUX_EVENT_JOB_SLOT +#define KBASE_TLSTREAM_AUX_EVENT_JOB_SLOT(kbdev, \ + context, slot_nr, atom_nr, event) \ + do { \ + int enabled = atomic_read(&kbdev->timeline_flags); \ + kbase_trace_mali_job_slots_event(kbdev->id, \ + GATOR_MAKE_EVENT(event, slot_nr), \ + context, (u8) atom_nr); \ + if (enabled & TLSTREAM_ENABLED) \ + __kbase_tlstream_aux_event_job_slot( \ + __TL_DISPATCH_STREAM(kbdev, aux), \ + context, slot_nr, atom_nr, event); \ + } while (0) + +#undef KBASE_TLSTREAM_AUX_PM_STATE +#define KBASE_TLSTREAM_AUX_PM_STATE(kbdev, core_type, state) \ + do { \ + int enabled = atomic_read(&kbdev->timeline_flags); \ + kbase_trace_mali_pm_status(kbdev->id, \ + core_type, state); \ + if (enabled & TLSTREAM_ENABLED) \ + __kbase_tlstream_aux_pm_state( \ + __TL_DISPATCH_STREAM(kbdev, aux), \ + core_type, state); \ + } while (0) + +#undef KBASE_TLSTREAM_AUX_PAGEFAULT +#define KBASE_TLSTREAM_AUX_PAGEFAULT(kbdev, \ + ctx_nr, as_nr, page_cnt_change) \ + do { \ + int enabled = atomic_read(&kbdev->timeline_flags); \ + kbase_trace_mali_page_fault_insert_pages(kbdev->id, \ + as_nr, \ + page_cnt_change); \ + if (enabled & TLSTREAM_ENABLED) \ + __kbase_tlstream_aux_pagefault( \ + __TL_DISPATCH_STREAM(kbdev, aux), \ + ctx_nr, as_nr, page_cnt_change); \ + } while (0) + +/* kbase_trace_mali_total_alloc_pages_change is handled differently here. + * We stream the total amount of pages allocated for `kbdev` rather + * than `page_count`, which is per-context. + */ +#undef KBASE_TLSTREAM_AUX_PAGESALLOC +#define KBASE_TLSTREAM_AUX_PAGESALLOC(kbdev, ctx_nr, page_cnt) \ + do { \ + int enabled = atomic_read(&kbdev->timeline_flags); \ + u32 global_pages_count = \ + atomic_read(&kbdev->memdev.used_pages); \ + \ + kbase_trace_mali_total_alloc_pages_change(kbdev->id, \ + global_pages_count); \ + if (enabled & TLSTREAM_ENABLED) \ + __kbase_tlstream_aux_pagesalloc( \ + __TL_DISPATCH_STREAM(kbdev, aux), \ + ctx_nr, page_cnt); \ + } while (0) +#endif /* CONFIG_MALI_BIFROST_GATOR_SUPPORT */ + +/* clang-format on */ +#endif