MALI: rockchip: restore midgard_for_linux/, device driver under it is on DDK r14

The process to get current source code under midgard_for_linux/ :
1. On the status of commit 18166b65,
	revert commit "91842c9 MALI: rockchip: upgrade midgard DDK to r18p0-01rel0",
	which upgraded drivers/gpu/arm/midgard/ from DDK r14 to r18.
2. copy directory drivers/gpu/arm/midgard/ to drivers/gpu/arm/midgard_for_linux/.

It's ensured that changes of commits in drivers/gpu/arm/midgard/
from RK power management group early than commit 18166b65
are correspondingly remained in current drivers/gpu/arm/midgard_for_linux/.

Change-Id: I41463a8c160e5d25365d6872eef1049de4a317fb
Signed-off-by: Zhen Chen <chenzhen@rock-chips.com>
This commit is contained in:
Zhen Chen 2019-01-10 09:51:46 +08:00 committed by Tao Huang
parent c3cbe0b00e
commit 0af9a0968c
211 changed files with 68037 additions and 1 deletions

View File

@ -20,7 +20,9 @@
#
obj-$(CONFIG_MALI_MIDGARD) += midgard/
obj-$(CONFIG_MALI_MIDGARD_FOR_LINUX) += midgard_for_linux/
obj-$(CONFIG_MALI_MIDGARD_FOR_ANDROID) += midgard/
obj-$(CONFIG_MALI400) += mali400/

View File

@ -20,6 +20,18 @@
#
#
source "drivers/gpu/arm/mali400/mali/Kconfig"
choice
prompt "Mali Midgard driver"
config MALI_MIDGARD_FOR_ANDROID
bool "Mali Midgard for Android"
config MALI_MIDGARD_FOR_LINUX
bool "Mali Midgard for Linux only"
endchoice
source "drivers/gpu/arm/midgard/Kconfig"
choice

View File

@ -0,0 +1,229 @@
#
# (C) COPYRIGHT 2012,2014 ARM Limited. All rights reserved.
#
# This program is free software and is provided to you under the terms of the
# GNU General Public License version 2 as published by the Free Software
# Foundation, and any use by you of this program is subject to the terms
# of such GNU licence.
#
# A copy of the licence is included with the program, and can also be obtained
# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
# Boston, MA 02110-1301, USA.
#
#
# Driver version string which is returned to userspace via an ioctl
MALI_RELEASE_NAME ?= "r14p0-01rel0"
# Paths required for build
KBASE_PATH = $(src)
KBASE_PLATFORM_PATH = $(KBASE_PATH)/platform_dummy
UMP_PATH = $(src)/../../../base
ifeq ($(CONFIG_MALI_ERROR_INJECTION),y)
MALI_ERROR_INJECT_ON = 1
endif
# Set up defaults if not defined by build system
MALI_CUSTOMER_RELEASE ?= 1
MALI_UNIT_TEST ?= 0
MALI_KERNEL_TEST_API ?= 0
MALI_ERROR_INJECT_ON ?= 0
MALI_MOCK_TEST ?= 0
MALI_COVERAGE ?= 0
MALI_INSTRUMENTATION_LEVEL ?= 0
# This workaround is for what seems to be a compiler bug we observed in
# GCC 4.7 on AOSP 4.3. The bug caused an intermittent failure compiling
# the "_Pragma" syntax, where an error message is returned:
#
# "internal compiler error: unspellable token PRAGMA"
#
# This regression has thus far only been seen on the GCC 4.7 compiler bundled
# with AOSP 4.3.0. So this makefile, intended for in-tree kernel builds
# which are not known to be used with AOSP, is hardcoded to disable the
# workaround, i.e. set the define to 0.
MALI_GCC_WORKAROUND_MIDCOM_4598 ?= 0
# Set up our defines, which will be passed to gcc
DEFINES = \
-DMALI_CUSTOMER_RELEASE=$(MALI_CUSTOMER_RELEASE) \
-DMALI_KERNEL_TEST_API=$(MALI_KERNEL_TEST_API) \
-DMALI_UNIT_TEST=$(MALI_UNIT_TEST) \
-DMALI_ERROR_INJECT_ON=$(MALI_ERROR_INJECT_ON) \
-DMALI_MOCK_TEST=$(MALI_MOCK_TEST) \
-DMALI_COVERAGE=$(MALI_COVERAGE) \
-DMALI_INSTRUMENTATION_LEVEL=$(MALI_INSTRUMENTATION_LEVEL) \
-DMALI_RELEASE_NAME=\"$(MALI_RELEASE_NAME)\" \
-DMALI_GCC_WORKAROUND_MIDCOM_4598=$(MALI_GCC_WORKAROUND_MIDCOM_4598)
ifeq ($(KBUILD_EXTMOD),)
# in-tree
DEFINES +=-DMALI_KBASE_THIRDPARTY_PATH=../../$(src)/platform/$(CONFIG_MALI_PLATFORM_THIRDPARTY_NAME)
else
# out-of-tree
DEFINES +=-DMALI_KBASE_THIRDPARTY_PATH=$(src)/platform/$(CONFIG_MALI_PLATFORM_THIRDPARTY_NAME)
endif
DEFINES += -I$(srctree)/drivers/staging/android
# Use our defines when compiling
ccflags-y += $(DEFINES) -I$(KBASE_PATH) -I$(KBASE_PLATFORM_PATH) -I$(UMP_PATH) -I$(srctree)/include/linux
subdir-ccflags-y += $(DEFINES) -I$(KBASE_PATH) -I$(KBASE_PLATFORM_PATH) -I$(OSK_PATH) -I$(UMP_PATH) -I$(srctree)/include/linux
SRC := \
mali_kbase_device.c \
mali_kbase_cache_policy.c \
mali_kbase_mem.c \
mali_kbase_mmu.c \
mali_kbase_ipa.c \
mali_kbase_jd.c \
mali_kbase_jd_debugfs.c \
mali_kbase_jm.c \
mali_kbase_gpuprops.c \
mali_kbase_js.c \
mali_kbase_js_ctx_attr.c \
mali_kbase_event.c \
mali_kbase_context.c \
mali_kbase_pm.c \
mali_kbase_config.c \
mali_kbase_vinstr.c \
mali_kbase_softjobs.c \
mali_kbase_10969_workaround.c \
mali_kbase_hw.c \
mali_kbase_utility.c \
mali_kbase_debug.c \
mali_kbase_trace_timeline.c \
mali_kbase_gpu_memory_debugfs.c \
mali_kbase_mem_linux.c \
mali_kbase_core_linux.c \
mali_kbase_sync.c \
mali_kbase_sync_user.c \
mali_kbase_replay.c \
mali_kbase_mem_profile_debugfs.c \
mali_kbase_mmu_mode_lpae.c \
mali_kbase_mmu_mode_aarch64.c \
mali_kbase_disjoint_events.c \
mali_kbase_gator_api.c \
mali_kbase_debug_mem_view.c \
mali_kbase_debug_job_fault.c \
mali_kbase_smc.c \
mali_kbase_mem_pool.c \
mali_kbase_mem_pool_debugfs.c \
mali_kbase_tlstream.c \
mali_kbase_strings.c \
mali_kbase_as_fault_debugfs.c \
mali_kbase_regs_history_debugfs.c
ifeq ($(MALI_UNIT_TEST),1)
SRC += mali_kbase_tlstream_test.c
endif
ifeq ($(MALI_CUSTOMER_RELEASE),0)
SRC += mali_kbase_regs_dump_debugfs.c
endif
# Job Scheduler Policy: Completely Fair Scheduler
SRC += mali_kbase_js_policy_cfs.c
ccflags-y += -I$(KBASE_PATH)
ifeq ($(CONFIG_MALI_PLATFORM_FAKE),y)
SRC += mali_kbase_platform_fake.c
ifeq ($(CONFIG_MALI_PLATFORM_VEXPRESS),y)
SRC += platform/vexpress/mali_kbase_config_vexpress.c \
platform/vexpress/mali_kbase_cpu_vexpress.c
ccflags-y += -I$(src)/platform/vexpress
endif
ifeq ($(CONFIG_MALI_PLATFORM_RTSM_VE),y)
SRC += platform/rtsm_ve/mali_kbase_config_vexpress.c
ccflags-y += -I$(src)/platform/rtsm_ve
endif
ifeq ($(CONFIG_MALI_PLATFORM_JUNO),y)
SRC += platform/juno/mali_kbase_config_vexpress.c
ccflags-y += -I$(src)/platform/juno
endif
ifeq ($(CONFIG_MALI_PLATFORM_JUNO_SOC),y)
SRC += platform/juno_soc/mali_kbase_config_juno_soc.c
ccflags-y += -I$(src)/platform/juno_soc
endif
ifeq ($(CONFIG_MALI_PLATFORM_VEXPRESS_1XV7_A57),y)
SRC += platform/vexpress_1xv7_a57/mali_kbase_config_vexpress.c
ccflags-y += -I$(src)/platform/vexpress_1xv7_a57
endif
ifeq ($(CONFIG_MALI_PLATFORM_VEXPRESS_6XVIRTEX7_10MHZ),y)
SRC += platform/vexpress_6xvirtex7_10mhz/mali_kbase_config_vexpress.c \
platform/vexpress_6xvirtex7_10mhz/mali_kbase_cpu_vexpress.c
ccflags-y += -I$(src)/platform/vexpress_6xvirtex7_10mhz
endif
ifeq ($(CONFIG_MALI_PLATFORM_A7_KIPLING),y)
SRC += platform/a7_kipling/mali_kbase_config_a7_kipling.c \
platform/a7_kipling/mali_kbase_cpu_a7_kipling.c
ccflags-y += -I$(src)/platform/a7_kipling
endif
ifeq ($(CONFIG_MALI_PLATFORM_THIRDPARTY),y)
# remove begin and end quotes from the Kconfig string type
platform_name := $(shell echo $(CONFIG_MALI_PLATFORM_THIRDPARTY_NAME))
MALI_PLATFORM_THIRDPARTY_DIR := platform/$(platform_name)
ccflags-y += -I$(src)/$(MALI_PLATFORM_THIRDPARTY_DIR)
ifeq ($(CONFIG_MALI_MIDGARD),m)
include $(src)/platform/$(platform_name)/Kbuild
else ifeq ($(CONFIG_MALI_MIDGARD),y)
obj-$(CONFIG_MALI_MIDGARD) += platform/
endif
endif
endif # CONFIG_MALI_PLATFORM_FAKE=y
ifeq ($(CONFIG_MALI_PLATFORM_THIRDPARTY),y)
# remove begin and end quotes from the Kconfig string type
platform_name := $(shell echo $(CONFIG_MALI_PLATFORM_THIRDPARTY_NAME))
MALI_PLATFORM_THIRDPARTY_DIR := platform/$(platform_name)
ccflags-y += -I$(src)/$(MALI_PLATFORM_THIRDPARTY_DIR)
MALI_PLATFORM_DIR := platform/$(platform_name)
include $(src)/platform/$(platform_name)/Kbuild
endif
# Tell the Linux build system from which .o file to create the kernel module
obj-$(CONFIG_MALI_MIDGARD) += midgard_kbase.o
# Tell the Linux build system to enable building of our .c files
midgard_kbase-y := $(SRC:.c=.o)
midgard_kbase-$(CONFIG_MALI_DMA_FENCE) += mali_kbase_dma_fence.o
MALI_BACKEND_PATH ?= backend
CONFIG_MALI_BACKEND ?= gpu
CONFIG_MALI_BACKEND_REAL ?= $(CONFIG_MALI_BACKEND)
ifeq ($(MALI_MOCK_TEST),1)
ifeq ($(CONFIG_MALI_BACKEND_REAL),gpu)
# Test functionality
midgard_kbase-y += tests/internal/src/mock/mali_kbase_pm_driver_mock.o
endif
endif
include $(src)/$(MALI_BACKEND_PATH)/$(CONFIG_MALI_BACKEND_REAL)/Kbuild
midgard_kbase-y += $(BACKEND:.c=.o)
ccflags-y += -I$(src)/$(MALI_BACKEND_PATH)/$(CONFIG_MALI_BACKEND_REAL)
subdir-ccflags-y += -I$(src)/$(MALI_BACKEND_PATH)/$(CONFIG_MALI_BACKEND_REAL)
# Default to devicetree platform if neither a fake platform or a thirdparty
# platform is configured.
ifeq ($(CONFIG_MALI_PLATFORM_THIRDPARTY)$(CONFIG_MALI_PLATFORM_FAKE),)
CONFIG_MALI_PLATFORM_DEVICETREE := y
endif
midgard_kbase-$(CONFIG_MALI_PLATFORM_DEVICETREE) += \
platform/devicetree/mali_kbase_runtime_pm.o \
platform/devicetree/mali_kbase_config_devicetree.o
ccflags-$(CONFIG_MALI_PLATFORM_DEVICETREE) += -I$(src)/platform/devicetree

View File

@ -0,0 +1,225 @@
#
# (C) COPYRIGHT 2012-2015 ARM Limited. All rights reserved.
#
# This program is free software and is provided to you under the terms of the
# GNU General Public License version 2 as published by the Free Software
# Foundation, and any use by you of this program is subject to the terms
# of such GNU licence.
#
# A copy of the licence is included with the program, and can also be obtained
# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
# Boston, MA 02110-1301, USA.
#
#
menuconfig MALI_MIDGARD
tristate "Mali Midgard series support"
select GPU_TRACEPOINTS if ANDROID
default n
help
Enable this option to build support for a ARM Mali Midgard GPU.
To compile this driver as a module, choose M here:
this will generate a single module, called mali_kbase.
config MALI_GATOR_SUPPORT
bool "Streamline support via Gator"
depends on MALI_MIDGARD
default n
help
Adds diagnostic support for use with the ARM Streamline Performance Analyzer.
You will need the Gator device driver already loaded before loading this driver when enabling
Streamline debug support.
This is a legacy interface required by older versions of Streamline.
config MALI_MIDGARD_DVFS
bool "Enable legacy DVFS"
depends on MALI_MIDGARD && !MALI_DEVFREQ && !MALI_PLATFORM_DEVICETREE
default n
help
Choose this option to enable legacy DVFS in the Mali Midgard DDK.
config MALI_MIDGARD_ENABLE_TRACE
bool "Enable kbase tracing"
depends on MALI_MIDGARD
default n
help
Enables tracing in kbase. Trace log available through
the "mali_trace" debugfs file, when the CONFIG_DEBUG_FS is enabled
config MALI_DEVFREQ
bool "devfreq support for Mali"
depends on MALI_MIDGARD && PM_DEVFREQ
help
Support devfreq for Mali.
Using the devfreq framework and, by default, the simpleondemand
governor, the frequency of Mali will be dynamically selected from the
available OPPs.
config MALI_DMA_FENCE
bool "DMA_BUF fence support for Mali"
depends on MALI_MIDGARD && !KDS
default n
help
Support DMA_BUF fences for Mali.
This option should only be enabled if KDS is not present and
the Linux Kernel has built in support for DMA_BUF fences.
# MALI_EXPERT configuration options
menuconfig MALI_EXPERT
depends on MALI_MIDGARD
bool "Enable Expert Settings"
default n
help
Enabling this option and modifying the default settings may produce a driver with performance or
other limitations.
config MALI_PRFCNT_SET_SECONDARY
bool "Use secondary set of performance counters"
depends on MALI_MIDGARD && MALI_EXPERT
default n
help
Select this option to use secondary set of performance counters. Kernel
features that depend on an access to the primary set of counters may
become unavailable. Enabling this option will prevent power management
from working optimally and may cause instrumentation tools to return
bogus results.
If unsure, say N.
config MALI_PLATFORM_FAKE
bool "Enable fake platform device support"
depends on MALI_MIDGARD && MALI_EXPERT
default n
help
When you start to work with the Mali Midgard series device driver the platform-specific code of
the Linux kernel for your platform may not be complete. In this situation the kernel device driver
supports creating the platform device outside of the Linux platform-specific code.
Enable this option if would like to use a platform device configuration from within the device driver.
choice
prompt "Platform configuration"
depends on MALI_MIDGARD && MALI_EXPERT
default MALI_PLATFORM_DEVICETREE
help
Select the SOC platform that contains a Mali Midgard GPU
config MALI_PLATFORM_DEVICETREE
bool "Device Tree platform"
depends on OF
help
Select this option to use Device Tree with the Mali driver.
When using this option the Mali driver will get the details of the
GPU hardware from the Device Tree. This means that the same driver
binary can run on multiple platforms as long as all the GPU hardware
details are described in the device tree.
Device Tree is the recommended method for the Mali driver platform
integration.
config MALI_PLATFORM_VEXPRESS
depends on ARCH_VEXPRESS && (ARCH_VEXPRESS_CA9X4 || ARCH_VEXPRESS_CA15X4)
bool "Versatile Express"
config MALI_PLATFORM_VEXPRESS_VIRTEX7_40MHZ
depends on ARCH_VEXPRESS && (ARCH_VEXPRESS_CA9X4 || ARCH_VEXPRESS_CA15X4)
bool "Versatile Express w/Virtex7 @ 40Mhz"
config MALI_PLATFORM_GOLDFISH
depends on ARCH_GOLDFISH
bool "Android Goldfish virtual CPU"
config MALI_PLATFORM_PBX
depends on ARCH_REALVIEW && REALVIEW_EB_A9MP && MACH_REALVIEW_PBX
bool "Realview PBX-A9"
config MALI_PLATFORM_THIRDPARTY
bool "Third Party Platform"
endchoice
config MALI_PLATFORM_THIRDPARTY_NAME
depends on MALI_MIDGARD && MALI_PLATFORM_THIRDPARTY && MALI_EXPERT
string "Third party platform name"
help
Enter the name of a third party platform that is supported. The third part configuration
file must be in midgard/config/tpip/mali_kbase_config_xxx.c where xxx is the name
specified here.
config MALI_DEBUG
bool "Debug build"
depends on MALI_MIDGARD && MALI_EXPERT
default n
help
Select this option for increased checking and reporting of errors.
config MALI_FENCE_DEBUG
bool "Debug sync fence usage"
depends on MALI_MIDGARD && MALI_EXPERT && SYNC
default y if MALI_DEBUG
help
Select this option to enable additional checking and reporting on the
use of sync fences in the Mali driver.
This will add a 3s timeout to all sync fence waits in the Mali
driver, so that when work for Mali has been waiting on a sync fence
for a long time a debug message will be printed, detailing what fence
is causing the block, and which dependent Mali atoms are blocked as a
result of this.
The timeout can be changed at runtime through the js_soft_timeout
device attribute, where the timeout is specified in milliseconds.
config MALI_NO_MALI
bool "No Mali"
depends on MALI_MIDGARD && MALI_EXPERT
default n
help
This can be used to test the driver in a simulated environment
whereby the hardware is not physically present. If the hardware is physically
present it will not be used. This can be used to test the majority of the
driver without needing actual hardware or for software benchmarking.
All calls to the simulated hardware will complete immediately as if the hardware
completed the task.
config MALI_ERROR_INJECT
bool "Error injection"
depends on MALI_MIDGARD && MALI_EXPERT && MALI_NO_MALI
default n
help
Enables insertion of errors to test module failure and recovery mechanisms.
config MALI_TRACE_TIMELINE
bool "Timeline tracing"
depends on MALI_MIDGARD && MALI_EXPERT
default n
help
Enables timeline tracing through the kernel tracepoint system.
config MALI_SYSTEM_TRACE
bool "Enable system event tracing support"
depends on MALI_MIDGARD && MALI_EXPERT
default n
help
Choose this option to enable system trace events for each
kbase event. This is typically used for debugging but has
minimal overhead when not in use. Enable only if you know what
you are doing.
config MALI_GPU_MMU_AARCH64
bool "Use AArch64 page tables"
depends on MALI_MIDGARD && MALI_EXPERT
default n
help
Use AArch64 format page tables for the GPU instead of LPAE-style.
The two formats have the same functionality and performance but a
future GPU may deprecate or remove the legacy LPAE-style format.
The LPAE-style format is supported on all Midgard and current Bifrost
GPUs. Enabling AArch64 format restricts the driver to only supporting
Bifrost GPUs.
If in doubt, say N.
source "drivers/gpu/arm/midgard/platform/Kconfig"

View File

@ -0,0 +1,42 @@
#
# (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved.
#
# This program is free software and is provided to you under the terms of the
# GNU General Public License version 2 as published by the Free Software
# Foundation, and any use by you of this program is subject to the terms
# of such GNU licence.
#
# A copy of the licence is included with the program, and can also be obtained
# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
# Boston, MA 02110-1301, USA.
#
#
KDIR ?= /lib/modules/$(shell uname -r)/build
BUSLOG_PATH_RELATIVE = $(CURDIR)/../../../..
UMP_PATH_RELATIVE = $(CURDIR)/../../../base/ump
KBASE_PATH_RELATIVE = $(CURDIR)
KDS_PATH_RELATIVE = $(CURDIR)/../../../..
EXTRA_SYMBOLS = $(UMP_PATH_RELATIVE)/src/Module.symvers
ifeq ($(MALI_UNIT_TEST), 1)
EXTRA_SYMBOLS += $(KBASE_PATH_RELATIVE)/tests/internal/src/kernel_assert_module/linux/Module.symvers
endif
ifeq ($(MALI_BUS_LOG), 1)
#Add bus logger symbols
EXTRA_SYMBOLS += $(BUSLOG_PATH_RELATIVE)/drivers/base/bus_logger/Module.symvers
endif
# GPL driver supports KDS
EXTRA_SYMBOLS += $(KDS_PATH_RELATIVE)/drivers/base/kds/Module.symvers
# we get the symbols from modules using KBUILD_EXTRA_SYMBOLS to prevent warnings about unknown functions
all:
$(MAKE) -C $(KDIR) M=$(CURDIR) EXTRA_CFLAGS="-I$(CURDIR)/../../../../include -I$(CURDIR)/../../../../tests/include $(SCONS_CFLAGS)" $(SCONS_CONFIGS) KBUILD_EXTRA_SYMBOLS="$(EXTRA_SYMBOLS)" modules
clean:
$(MAKE) -C $(KDIR) M=$(CURDIR) clean

View File

@ -0,0 +1,17 @@
#
# (C) COPYRIGHT 2010 ARM Limited. All rights reserved.
#
# This program is free software and is provided to you under the terms of the
# GNU General Public License version 2 as published by the Free Software
# Foundation, and any use by you of this program is subject to the terms
# of such GNU licence.
#
# A copy of the licence is included with the program, and can also be obtained
# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
# Boston, MA 02110-1301, USA.
#
#
EXTRA_CFLAGS += -I$(ROOT) -I$(KBASE_PATH) -I$(OSK_PATH)/src/linux/include -I$(KBASE_PATH)/platform_$(PLATFORM)

View File

@ -0,0 +1,62 @@
#
# (C) COPYRIGHT 2014 ARM Limited. All rights reserved.
#
# This program is free software and is provided to you under the terms of the
# GNU General Public License version 2 as published by the Free Software
# Foundation, and any use by you of this program is subject to the terms
# of such GNU licence.
#
# A copy of the licence is included with the program, and can also be obtained
# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
# Boston, MA 02110-1301, USA.
#
#
BACKEND += \
backend/gpu/mali_kbase_cache_policy_backend.c \
backend/gpu/mali_kbase_device_hw.c \
backend/gpu/mali_kbase_gpu.c \
backend/gpu/mali_kbase_gpuprops_backend.c \
backend/gpu/mali_kbase_debug_job_fault_backend.c \
backend/gpu/mali_kbase_irq_linux.c \
backend/gpu/mali_kbase_instr_backend.c \
backend/gpu/mali_kbase_jm_as.c \
backend/gpu/mali_kbase_jm_hw.c \
backend/gpu/mali_kbase_jm_rb.c \
backend/gpu/mali_kbase_js_affinity.c \
backend/gpu/mali_kbase_js_backend.c \
backend/gpu/mali_kbase_mmu_hw_direct.c \
backend/gpu/mali_kbase_pm_backend.c \
backend/gpu/mali_kbase_pm_driver.c \
backend/gpu/mali_kbase_pm_metrics.c \
backend/gpu/mali_kbase_pm_ca.c \
backend/gpu/mali_kbase_pm_ca_fixed.c \
backend/gpu/mali_kbase_pm_always_on.c \
backend/gpu/mali_kbase_pm_coarse_demand.c \
backend/gpu/mali_kbase_pm_demand.c \
backend/gpu/mali_kbase_pm_policy.c \
backend/gpu/mali_kbase_time.c
ifeq ($(MALI_CUSTOMER_RELEASE),0)
BACKEND += \
backend/gpu/mali_kbase_pm_ca_random.c \
backend/gpu/mali_kbase_pm_demand_always_powered.c \
backend/gpu/mali_kbase_pm_fast_start.c
endif
ifeq ($(CONFIG_MALI_DEVFREQ),y)
BACKEND += backend/gpu/mali_kbase_devfreq.c
endif
ifeq ($(CONFIG_MALI_NO_MALI),y)
# Dummy model
BACKEND += backend/gpu/mali_kbase_model_dummy.c
BACKEND += backend/gpu/mali_kbase_model_linux.c
# HW error simulation
BACKEND += backend/gpu/mali_kbase_model_error_generator.c
endif
ifeq ($(CONFIG_DEVFREQ_THERMAL),y)
BACKEND += backend/gpu/mali_kbase_power_model_simple.c
endif

View File

@ -0,0 +1,29 @@
/*
*
* (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
* Foundation, and any use by you of this program is subject to the terms
* of such GNU licence.
*
* A copy of the licence is included with the program, and can also be obtained
* from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
* Boston, MA 02110-1301, USA.
*
*/
/*
* Backend specific configuration
*/
#ifndef _KBASE_BACKEND_CONFIG_H_
#define _KBASE_BACKEND_CONFIG_H_
/* Enable GPU reset API */
#define KBASE_GPU_RESET_EN 1
#endif /* _KBASE_BACKEND_CONFIG_H_ */

View File

@ -0,0 +1,29 @@
/*
*
* (C) COPYRIGHT 2015-2016 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
* Foundation, and any use by you of this program is subject to the terms
* of such GNU licence.
*
* A copy of the licence is included with the program, and can also be obtained
* from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
* Boston, MA 02110-1301, USA.
*
*/
#include "backend/gpu/mali_kbase_cache_policy_backend.h"
#include <backend/gpu/mali_kbase_device_internal.h>
void kbase_cache_set_coherency_mode(struct kbase_device *kbdev,
u32 mode)
{
kbdev->current_gpu_coherency_mode = mode;
if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_COHERENCY_REG))
kbase_reg_write(kbdev, COHERENCY_ENABLE, mode, NULL);
}

View File

@ -0,0 +1,34 @@
/*
*
* (C) COPYRIGHT 2015-2016 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
* Foundation, and any use by you of this program is subject to the terms
* of such GNU licence.
*
* A copy of the licence is included with the program, and can also be obtained
* from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
* Boston, MA 02110-1301, USA.
*
*/
#ifndef _KBASE_CACHE_POLICY_BACKEND_H_
#define _KBASE_CACHE_POLICY_BACKEND_H_
#include "mali_kbase.h"
#include "mali_base_kernel.h"
/**
* kbase_cache_set_coherency_mode() - Sets the system coherency mode
* in the GPU.
* @kbdev: Device pointer
* @mode: Coherency mode. COHERENCY_ACE/ACE_LITE
*/
void kbase_cache_set_coherency_mode(struct kbase_device *kbdev,
u32 mode);
#endif /* _KBASE_CACHE_POLICY_H_ */

View File

@ -0,0 +1,157 @@
/*
*
* (C) COPYRIGHT 2012-2015 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
* Foundation, and any use by you of this program is subject to the terms
* of such GNU licence.
*
* A copy of the licence is included with the program, and can also be obtained
* from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
* Boston, MA 02110-1301, USA.
*
*/
#include <mali_kbase.h>
#include <backend/gpu/mali_kbase_device_internal.h>
#include "mali_kbase_debug_job_fault.h"
#ifdef CONFIG_DEBUG_FS
/*GPU_CONTROL_REG(r)*/
static int gpu_control_reg_snapshot[] = {
GPU_ID,
SHADER_READY_LO,
SHADER_READY_HI,
TILER_READY_LO,
TILER_READY_HI,
L2_READY_LO,
L2_READY_HI
};
/* JOB_CONTROL_REG(r) */
static int job_control_reg_snapshot[] = {
JOB_IRQ_MASK,
JOB_IRQ_STATUS
};
/* JOB_SLOT_REG(n,r) */
static int job_slot_reg_snapshot[] = {
JS_HEAD_LO,
JS_HEAD_HI,
JS_TAIL_LO,
JS_TAIL_HI,
JS_AFFINITY_LO,
JS_AFFINITY_HI,
JS_CONFIG,
JS_STATUS,
JS_HEAD_NEXT_LO,
JS_HEAD_NEXT_HI,
JS_AFFINITY_NEXT_LO,
JS_AFFINITY_NEXT_HI,
JS_CONFIG_NEXT
};
/*MMU_REG(r)*/
static int mmu_reg_snapshot[] = {
MMU_IRQ_MASK,
MMU_IRQ_STATUS
};
/* MMU_AS_REG(n,r) */
static int as_reg_snapshot[] = {
AS_TRANSTAB_LO,
AS_TRANSTAB_HI,
AS_MEMATTR_LO,
AS_MEMATTR_HI,
AS_FAULTSTATUS,
AS_FAULTADDRESS_LO,
AS_FAULTADDRESS_HI,
AS_STATUS
};
bool kbase_debug_job_fault_reg_snapshot_init(struct kbase_context *kctx,
int reg_range)
{
int i, j;
int offset = 0;
int slot_number;
int as_number;
if (kctx->reg_dump == NULL)
return false;
slot_number = kctx->kbdev->gpu_props.num_job_slots;
as_number = kctx->kbdev->gpu_props.num_address_spaces;
/* get the GPU control registers*/
for (i = 0; i < sizeof(gpu_control_reg_snapshot)/4; i++) {
kctx->reg_dump[offset] =
GPU_CONTROL_REG(gpu_control_reg_snapshot[i]);
offset += 2;
}
/* get the Job control registers*/
for (i = 0; i < sizeof(job_control_reg_snapshot)/4; i++) {
kctx->reg_dump[offset] =
JOB_CONTROL_REG(job_control_reg_snapshot[i]);
offset += 2;
}
/* get the Job Slot registers*/
for (j = 0; j < slot_number; j++) {
for (i = 0; i < sizeof(job_slot_reg_snapshot)/4; i++) {
kctx->reg_dump[offset] =
JOB_SLOT_REG(j, job_slot_reg_snapshot[i]);
offset += 2;
}
}
/* get the MMU registers*/
for (i = 0; i < sizeof(mmu_reg_snapshot)/4; i++) {
kctx->reg_dump[offset] = MMU_REG(mmu_reg_snapshot[i]);
offset += 2;
}
/* get the Address space registers*/
for (j = 0; j < as_number; j++) {
for (i = 0; i < sizeof(as_reg_snapshot)/4; i++) {
kctx->reg_dump[offset] =
MMU_AS_REG(j, as_reg_snapshot[i]);
offset += 2;
}
}
WARN_ON(offset >= (reg_range*2/4));
/* set the termination flag*/
kctx->reg_dump[offset] = REGISTER_DUMP_TERMINATION_FLAG;
kctx->reg_dump[offset + 1] = REGISTER_DUMP_TERMINATION_FLAG;
dev_dbg(kctx->kbdev->dev, "kbase_job_fault_reg_snapshot_init:%d\n",
offset);
return true;
}
bool kbase_job_fault_get_reg_snapshot(struct kbase_context *kctx)
{
int offset = 0;
if (kctx->reg_dump == NULL)
return false;
while (kctx->reg_dump[offset] != REGISTER_DUMP_TERMINATION_FLAG) {
kctx->reg_dump[offset+1] =
kbase_reg_read(kctx->kbdev,
kctx->reg_dump[offset], NULL);
offset += 2;
}
return true;
}
#endif

View File

@ -0,0 +1,344 @@
/*
*
* (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
* Foundation, and any use by you of this program is subject to the terms
* of such GNU licence.
*
* A copy of the licence is included with the program, and can also be obtained
* from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
* Boston, MA 02110-1301, USA.
*
*/
#define ENABLE_DEBUG_LOG
#include "../../platform/rk/custom_log.h"
#include <mali_kbase.h>
#include <mali_kbase_tlstream.h>
#include <mali_kbase_config_defaults.h>
#include <backend/gpu/mali_kbase_pm_internal.h>
#ifdef CONFIG_DEVFREQ_THERMAL
#include <backend/gpu/mali_kbase_power_model_simple.h>
#endif
#include <linux/clk.h>
#include <linux/devfreq.h>
#ifdef CONFIG_DEVFREQ_THERMAL
#include <linux/devfreq_cooling.h>
#endif
#include <linux/version.h>
#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 13, 0)
#include <linux/pm_opp.h>
#else /* Linux >= 3.13 */
/* In 3.13 the OPP include header file, types, and functions were all
* renamed. Use the old filename for the include, and define the new names to
* the old, when an old kernel is detected.
*/
#include <linux/opp.h>
#define dev_pm_opp opp
#define dev_pm_opp_get_voltage opp_get_voltage
#define dev_pm_opp_get_opp_count opp_get_opp_count
#define dev_pm_opp_find_freq_ceil opp_find_freq_ceil
#endif /* Linux >= 3.13 */
#include <soc/rockchip/rockchip_opp_select.h>
static struct thermal_opp_device_data gpu_devdata = {
.type = THERMAL_OPP_TPYE_DEV,
.low_temp_adjust = rockchip_dev_low_temp_adjust,
.high_temp_adjust = rockchip_dev_high_temp_adjust,
};
static int
kbase_devfreq_target(struct device *dev, unsigned long *target_freq, u32 flags)
{
struct kbase_device *kbdev = dev_get_drvdata(dev);
struct dev_pm_opp *opp;
unsigned long freq = 0;
unsigned long old_freq = kbdev->current_freq;
unsigned long voltage;
int err;
freq = *target_freq;
rcu_read_lock();
opp = devfreq_recommended_opp(dev, &freq, flags);
if (IS_ERR(opp)) {
rcu_read_unlock();
dev_err(dev, "Failed to get opp (%ld)\n", PTR_ERR(opp));
return PTR_ERR(opp);
}
voltage = dev_pm_opp_get_voltage(opp);
rcu_read_unlock();
/*
* Only update if there is a change of frequency
*/
if (old_freq == freq) {
*target_freq = freq;
#ifdef CONFIG_REGULATOR
if (kbdev->current_voltage == voltage)
return 0;
err = regulator_set_voltage(kbdev->regulator, voltage, INT_MAX);
if (err) {
dev_err(dev, "Failed to set voltage (%d)\n", err);
return err;
}
#else
return 0;
#endif
}
#ifdef CONFIG_REGULATOR
if (kbdev->regulator && kbdev->current_voltage != voltage &&
old_freq < freq) {
err = regulator_set_voltage(kbdev->regulator, voltage, INT_MAX);
if (err) {
dev_err(dev, "Failed to increase voltage (%d)\n", err);
return err;
}
}
#endif
err = clk_set_rate(kbdev->clock, freq);
if (err) {
dev_err(dev, "Failed to set clock %lu (target %lu)\n",
freq, *target_freq);
return err;
}
*target_freq = freq;
kbdev->current_freq = freq;
if (kbdev->devfreq)
kbdev->devfreq->last_status.current_frequency = freq;
#ifdef CONFIG_REGULATOR
if (kbdev->regulator && kbdev->current_voltage != voltage &&
old_freq > freq) {
err = regulator_set_voltage(kbdev->regulator, voltage, INT_MAX);
if (err) {
dev_err(dev, "Failed to decrease voltage (%d)\n", err);
return err;
}
}
#endif
kbdev->current_voltage = voltage;
kbase_tlstream_aux_devfreq_target((u64)freq);
kbase_pm_reset_dvfs_utilisation(kbdev);
return err;
}
static int
kbase_devfreq_cur_freq(struct device *dev, unsigned long *freq)
{
struct kbase_device *kbdev = dev_get_drvdata(dev);
*freq = kbdev->current_freq;
return 0;
}
static int
kbase_devfreq_status(struct device *dev, struct devfreq_dev_status *stat)
{
struct kbase_device *kbdev = dev_get_drvdata(dev);
stat->current_frequency = kbdev->current_freq;
kbase_pm_get_dvfs_utilisation(kbdev,
&stat->total_time, &stat->busy_time);
stat->private_data = NULL;
#ifdef CONFIG_DEVFREQ_THERMAL
#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 3, 0)
if (kbdev->devfreq_cooling)
memcpy(&kbdev->devfreq_cooling->last_status, stat,
sizeof(*stat));
#endif
#endif
return 0;
}
static int kbase_devfreq_init_freq_table(struct kbase_device *kbdev,
struct devfreq_dev_profile *dp)
{
int count;
int i = 0;
unsigned long freq = 0;
struct dev_pm_opp *opp;
rcu_read_lock();
count = dev_pm_opp_get_opp_count(kbdev->dev);
if (count < 0) {
rcu_read_unlock();
return count;
}
rcu_read_unlock();
dp->freq_table = kmalloc_array(count, sizeof(dp->freq_table[0]),
GFP_KERNEL);
if (!dp->freq_table)
return -ENOMEM;
rcu_read_lock();
for (i = 0; i < count; i++, freq++) {
opp = dev_pm_opp_find_freq_ceil(kbdev->dev, &freq);
if (IS_ERR(opp))
break;
dp->freq_table[i] = freq;
}
rcu_read_unlock();
if (count != i)
dev_warn(kbdev->dev, "Unable to enumerate all OPPs (%d!=%d\n",
count, i);
dp->max_state = i;
return 0;
}
static void kbase_devfreq_term_freq_table(struct kbase_device *kbdev)
{
struct devfreq_dev_profile *dp = kbdev->devfreq->profile;
kfree(dp->freq_table);
}
static void kbase_devfreq_exit(struct device *dev)
{
struct kbase_device *kbdev = dev_get_drvdata(dev);
kbase_devfreq_term_freq_table(kbdev);
}
int kbase_devfreq_init(struct kbase_device *kbdev)
{
struct devfreq_dev_profile *dp;
unsigned long opp_rate;
int err;
if (!kbdev->clock)
return -ENODEV;
kbdev->current_freq = clk_get_rate(kbdev->clock);
#ifdef CONFIG_REGULATOR
if (kbdev->regulator)
kbdev->current_voltage =
regulator_get_voltage(kbdev->regulator);
#endif
dp = &kbdev->devfreq_profile;
dp->initial_freq = kbdev->current_freq;
/* .KP : set devfreq_dvfs_interval_in_ms */
dp->polling_ms = 20;
dp->target = kbase_devfreq_target;
dp->get_dev_status = kbase_devfreq_status;
dp->get_cur_freq = kbase_devfreq_cur_freq;
dp->exit = kbase_devfreq_exit;
if (kbase_devfreq_init_freq_table(kbdev, dp))
return -EFAULT;
kbdev->devfreq = devfreq_add_device(kbdev->dev, dp,
"simple_ondemand", NULL);
if (IS_ERR(kbdev->devfreq)) {
kbase_devfreq_term_freq_table(kbdev);
return PTR_ERR(kbdev->devfreq);
}
err = devfreq_register_opp_notifier(kbdev->dev, kbdev->devfreq);
if (err) {
dev_err(kbdev->dev,
"Failed to register OPP notifier (%d)\n", err);
goto opp_notifier_failed;
}
opp_rate = kbdev->current_freq;
rcu_read_lock();
devfreq_recommended_opp(kbdev->dev, &opp_rate, 0);
rcu_read_unlock();
kbdev->devfreq->last_status.current_frequency = opp_rate;
gpu_devdata.data = kbdev->devfreq;
kbdev->opp_info = rockchip_register_thermal_notifier(kbdev->dev,
&gpu_devdata);
if (IS_ERR(kbdev->opp_info)) {
dev_dbg(kbdev->dev, "without thermal notifier\n");
kbdev->opp_info = NULL;
}
#ifdef CONFIG_DEVFREQ_THERMAL
err = kbase_power_model_simple_init(kbdev);
if (err && err != -ENODEV && err != -EPROBE_DEFER) {
dev_err(kbdev->dev,
"Failed to initialize simple power model (%d)\n",
err);
goto cooling_failed;
}
if (err == -EPROBE_DEFER)
goto cooling_failed;
if (err != -ENODEV) {
kbdev->devfreq_cooling = of_devfreq_cooling_register_power(
kbdev->dev->of_node,
kbdev->devfreq,
&power_model_simple_ops);
if (IS_ERR_OR_NULL(kbdev->devfreq_cooling)) {
err = PTR_ERR(kbdev->devfreq_cooling);
dev_err(kbdev->dev,
"Failed to register cooling device (%d)\n",
err);
goto cooling_failed;
}
} else {
err = 0;
}
I("success initing power_model_simple.");
#endif
return 0;
#ifdef CONFIG_DEVFREQ_THERMAL
cooling_failed:
devfreq_unregister_opp_notifier(kbdev->dev, kbdev->devfreq);
#endif /* CONFIG_DEVFREQ_THERMAL */
opp_notifier_failed:
if (devfreq_remove_device(kbdev->devfreq))
dev_err(kbdev->dev, "Failed to terminate devfreq (%d)\n", err);
else
kbdev->devfreq = NULL;
return err;
}
void kbase_devfreq_term(struct kbase_device *kbdev)
{
int err;
dev_dbg(kbdev->dev, "Term Mali devfreq\n");
rockchip_unregister_thermal_notifier(kbdev->opp_info);
#ifdef CONFIG_DEVFREQ_THERMAL
if (kbdev->devfreq_cooling)
devfreq_cooling_unregister(kbdev->devfreq_cooling);
#endif
devfreq_unregister_opp_notifier(kbdev->dev, kbdev->devfreq);
err = devfreq_remove_device(kbdev->devfreq);
if (err)
dev_err(kbdev->dev, "Failed to terminate devfreq (%d)\n", err);
else
kbdev->devfreq = NULL;
}

View File

@ -0,0 +1,24 @@
/*
*
* (C) COPYRIGHT 2014 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
* Foundation, and any use by you of this program is subject to the terms
* of such GNU licence.
*
* A copy of the licence is included with the program, and can also be obtained
* from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
* Boston, MA 02110-1301, USA.
*
*/
#ifndef _BASE_DEVFREQ_H_
#define _BASE_DEVFREQ_H_
int kbase_devfreq_init(struct kbase_device *kbdev);
void kbase_devfreq_term(struct kbase_device *kbdev);
#endif /* _BASE_DEVFREQ_H_ */

View File

@ -0,0 +1,255 @@
/*
*
* (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
* Foundation, and any use by you of this program is subject to the terms
* of such GNU licence.
*
* A copy of the licence is included with the program, and can also be obtained
* from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
* Boston, MA 02110-1301, USA.
*
*/
/*
*
*/
#include <mali_kbase.h>
#include <backend/gpu/mali_kbase_instr_internal.h>
#include <backend/gpu/mali_kbase_pm_internal.h>
#include <backend/gpu/mali_kbase_device_internal.h>
#if !defined(CONFIG_MALI_NO_MALI)
#ifdef CONFIG_DEBUG_FS
int kbase_io_history_resize(struct kbase_io_history *h, u16 new_size)
{
struct kbase_io_access *old_buf;
struct kbase_io_access *new_buf;
unsigned long flags;
if (!new_size)
goto out_err; /* The new size must not be 0 */
new_buf = vmalloc(new_size * sizeof(*h->buf));
if (!new_buf)
goto out_err;
spin_lock_irqsave(&h->lock, flags);
old_buf = h->buf;
/* Note: we won't bother with copying the old data over. The dumping
* logic wouldn't work properly as it relies on 'count' both as a
* counter and as an index to the buffer which would have changed with
* the new array. This is a corner case that we don't need to support.
*/
h->count = 0;
h->size = new_size;
h->buf = new_buf;
spin_unlock_irqrestore(&h->lock, flags);
vfree(old_buf);
return 0;
out_err:
return -1;
}
int kbase_io_history_init(struct kbase_io_history *h, u16 n)
{
h->enabled = false;
spin_lock_init(&h->lock);
h->count = 0;
h->size = 0;
h->buf = NULL;
if (kbase_io_history_resize(h, n))
return -1;
return 0;
}
void kbase_io_history_term(struct kbase_io_history *h)
{
vfree(h->buf);
h->buf = NULL;
}
/* kbase_io_history_add - add new entry to the register access history
*
* @h: Pointer to the history data structure
* @addr: Register address
* @value: The value that is either read from or written to the register
* @write: 1 if it's a register write, 0 if it's a read
*/
static void kbase_io_history_add(struct kbase_io_history *h,
void __iomem const *addr, u32 value, u8 write)
{
struct kbase_io_access *io;
unsigned long flags;
spin_lock_irqsave(&h->lock, flags);
io = &h->buf[h->count % h->size];
io->addr = (uintptr_t)addr | write;
io->value = value;
++h->count;
/* If count overflows, move the index by the buffer size so the entire
* buffer will still be dumped later */
if (unlikely(!h->count))
h->count = h->size;
spin_unlock_irqrestore(&h->lock, flags);
}
void kbase_io_history_dump(struct kbase_device *kbdev)
{
struct kbase_io_history *const h = &kbdev->io_history;
u16 i;
size_t iters;
unsigned long flags;
if (!unlikely(h->enabled))
return;
spin_lock_irqsave(&h->lock, flags);
dev_err(kbdev->dev, "Register IO History:");
iters = (h->size > h->count) ? h->count : h->size;
dev_err(kbdev->dev, "Last %zu register accesses of %zu total:\n", iters,
h->count);
for (i = 0; i < iters; ++i) {
struct kbase_io_access *io =
&h->buf[(h->count - iters + i) % h->size];
char const access = (io->addr & 1) ? 'w' : 'r';
dev_err(kbdev->dev, "%6i: %c: reg 0x%p val %08x\n", i, access,
(void *)(io->addr & ~0x1), io->value);
}
spin_unlock_irqrestore(&h->lock, flags);
}
#endif /* CONFIG_DEBUG_FS */
void kbase_reg_write(struct kbase_device *kbdev, u16 offset, u32 value,
struct kbase_context *kctx)
{
KBASE_DEBUG_ASSERT(kbdev->pm.backend.gpu_powered);
KBASE_DEBUG_ASSERT(kctx == NULL || kctx->as_nr != KBASEP_AS_NR_INVALID);
KBASE_DEBUG_ASSERT(kbdev->dev != NULL);
writel(value, kbdev->reg + offset);
#ifdef CONFIG_DEBUG_FS
if (unlikely(kbdev->io_history.enabled))
kbase_io_history_add(&kbdev->io_history, kbdev->reg + offset,
value, 1);
#endif /* CONFIG_DEBUG_FS */
dev_dbg(kbdev->dev, "w: reg %04x val %08x", offset, value);
if (kctx && kctx->jctx.tb)
kbase_device_trace_register_access(kctx, REG_WRITE, offset,
value);
}
KBASE_EXPORT_TEST_API(kbase_reg_write);
u32 kbase_reg_read(struct kbase_device *kbdev, u16 offset,
struct kbase_context *kctx)
{
u32 val;
KBASE_DEBUG_ASSERT(kbdev->pm.backend.gpu_powered);
KBASE_DEBUG_ASSERT(kctx == NULL || kctx->as_nr != KBASEP_AS_NR_INVALID);
KBASE_DEBUG_ASSERT(kbdev->dev != NULL);
val = readl(kbdev->reg + offset);
#ifdef CONFIG_DEBUG_FS
if (unlikely(kbdev->io_history.enabled))
kbase_io_history_add(&kbdev->io_history, kbdev->reg + offset,
val, 0);
#endif /* CONFIG_DEBUG_FS */
dev_dbg(kbdev->dev, "r: reg %04x val %08x", offset, val);
if (kctx && kctx->jctx.tb)
kbase_device_trace_register_access(kctx, REG_READ, offset, val);
return val;
}
KBASE_EXPORT_TEST_API(kbase_reg_read);
#endif /* !defined(CONFIG_MALI_NO_MALI) */
/**
* kbase_report_gpu_fault - Report a GPU fault.
* @kbdev: Kbase device pointer
* @multiple: Zero if only GPU_FAULT was raised, non-zero if MULTIPLE_GPU_FAULTS
* was also set
*
* This function is called from the interrupt handler when a GPU fault occurs.
* It reports the details of the fault using dev_warn().
*/
static void kbase_report_gpu_fault(struct kbase_device *kbdev, int multiple)
{
u32 status;
u64 address;
status = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_FAULTSTATUS), NULL);
address = (u64) kbase_reg_read(kbdev,
GPU_CONTROL_REG(GPU_FAULTADDRESS_HI), NULL) << 32;
address |= kbase_reg_read(kbdev,
GPU_CONTROL_REG(GPU_FAULTADDRESS_LO), NULL);
dev_warn(kbdev->dev, "GPU Fault 0x%08x (%s) at 0x%016llx",
status & 0xFF,
kbase_exception_name(kbdev, status),
address);
if (multiple)
dev_warn(kbdev->dev, "There were multiple GPU faults - some have not been reported\n");
}
void kbase_gpu_interrupt(struct kbase_device *kbdev, u32 val)
{
KBASE_TRACE_ADD(kbdev, CORE_GPU_IRQ, NULL, NULL, 0u, val);
if (val & GPU_FAULT)
kbase_report_gpu_fault(kbdev, val & MULTIPLE_GPU_FAULTS);
if (val & RESET_COMPLETED)
kbase_pm_reset_done(kbdev);
if (val & PRFCNT_SAMPLE_COMPLETED)
kbase_instr_hwcnt_sample_done(kbdev);
if (val & CLEAN_CACHES_COMPLETED)
kbase_clean_caches_done(kbdev);
KBASE_TRACE_ADD(kbdev, CORE_GPU_IRQ_CLEAR, NULL, NULL, 0u, val);
kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_CLEAR), val, NULL);
/* kbase_pm_check_transitions must be called after the IRQ has been
* cleared. This is because it might trigger further power transitions
* and we don't want to miss the interrupt raised to notify us that
* these further transitions have finished.
*/
if (val & POWER_CHANGED_ALL)
kbase_pm_power_changed(kbdev);
KBASE_TRACE_ADD(kbdev, CORE_GPU_IRQ_DONE, NULL, NULL, 0u, val);
}

View File

@ -0,0 +1,67 @@
/*
*
* (C) COPYRIGHT 2014 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
* Foundation, and any use by you of this program is subject to the terms
* of such GNU licence.
*
* A copy of the licence is included with the program, and can also be obtained
* from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
* Boston, MA 02110-1301, USA.
*
*/
/*
* Backend-specific HW access device APIs
*/
#ifndef _KBASE_DEVICE_INTERNAL_H_
#define _KBASE_DEVICE_INTERNAL_H_
/**
* kbase_reg_write - write to GPU register
* @kbdev: Kbase device pointer
* @offset: Offset of register
* @value: Value to write
* @kctx: Kbase context pointer. May be NULL
*
* Caller must ensure the GPU is powered (@kbdev->pm.gpu_powered != false). If
* @kctx is not NULL then the caller must ensure it is scheduled (@kctx->as_nr
* != KBASEP_AS_NR_INVALID).
*/
void kbase_reg_write(struct kbase_device *kbdev, u16 offset, u32 value,
struct kbase_context *kctx);
/**
* kbase_reg_read - read from GPU register
* @kbdev: Kbase device pointer
* @offset: Offset of register
* @kctx: Kbase context pointer. May be NULL
*
* Caller must ensure the GPU is powered (@kbdev->pm.gpu_powered != false). If
* @kctx is not NULL then the caller must ensure it is scheduled (@kctx->as_nr
* != KBASEP_AS_NR_INVALID).
*
* Return: Value in desired register
*/
u32 kbase_reg_read(struct kbase_device *kbdev, u16 offset,
struct kbase_context *kctx);
/**
* kbase_gpu_interrupt - GPU interrupt handler
* @kbdev: Kbase device pointer
* @val: The value of the GPU IRQ status register which triggered the call
*
* This function is called from the interrupt handler when a GPU irq is to be
* handled.
*/
void kbase_gpu_interrupt(struct kbase_device *kbdev, u32 val);
#endif /* _KBASE_DEVICE_INTERNAL_H_ */

View File

@ -0,0 +1,123 @@
/*
*
* (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
* Foundation, and any use by you of this program is subject to the terms
* of such GNU licence.
*
* A copy of the licence is included with the program, and can also be obtained
* from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
* Boston, MA 02110-1301, USA.
*
*/
/*
* Register-based HW access backend APIs
*/
#include <mali_kbase.h>
#include <mali_kbase_hwaccess_backend.h>
#include <backend/gpu/mali_kbase_irq_internal.h>
#include <backend/gpu/mali_kbase_jm_internal.h>
#include <backend/gpu/mali_kbase_js_internal.h>
#include <backend/gpu/mali_kbase_pm_internal.h>
int kbase_backend_early_init(struct kbase_device *kbdev)
{
int err;
err = kbasep_platform_device_init(kbdev);
if (err)
return err;
/* Ensure we can access the GPU registers */
kbase_pm_register_access_enable(kbdev);
/* Find out GPU properties based on the GPU feature registers */
kbase_gpuprops_set(kbdev);
/* We're done accessing the GPU registers for now. */
kbase_pm_register_access_disable(kbdev);
err = kbase_hwaccess_pm_init(kbdev);
if (err)
goto fail_pm;
err = kbase_install_interrupts(kbdev);
if (err)
goto fail_interrupts;
return 0;
fail_interrupts:
kbase_hwaccess_pm_term(kbdev);
fail_pm:
kbasep_platform_device_term(kbdev);
return err;
}
void kbase_backend_early_term(struct kbase_device *kbdev)
{
kbase_release_interrupts(kbdev);
kbase_hwaccess_pm_term(kbdev);
kbasep_platform_device_term(kbdev);
}
int kbase_backend_late_init(struct kbase_device *kbdev)
{
int err;
err = kbase_hwaccess_pm_powerup(kbdev, PM_HW_ISSUES_DETECT);
if (err)
return err;
err = kbase_backend_timer_init(kbdev);
if (err)
goto fail_timer;
#ifdef CONFIG_MALI_DEBUG
#ifndef CONFIG_MALI_NO_MALI
if (kbasep_common_test_interrupt_handlers(kbdev) != 0) {
dev_err(kbdev->dev, "Interrupt assigment check failed.\n");
err = -EINVAL;
goto fail_interrupt_test;
}
#endif /* !CONFIG_MALI_NO_MALI */
#endif /* CONFIG_MALI_DEBUG */
err = kbase_job_slot_init(kbdev);
if (err)
goto fail_job_slot;
init_waitqueue_head(&kbdev->hwaccess.backend.reset_wait);
return 0;
fail_job_slot:
#ifdef CONFIG_MALI_DEBUG
#ifndef CONFIG_MALI_NO_MALI
fail_interrupt_test:
#endif /* !CONFIG_MALI_NO_MALI */
#endif /* CONFIG_MALI_DEBUG */
kbase_backend_timer_term(kbdev);
fail_timer:
kbase_hwaccess_pm_halt(kbdev);
return err;
}
void kbase_backend_late_term(struct kbase_device *kbdev)
{
kbase_job_slot_halt(kbdev);
kbase_job_slot_term(kbdev);
kbase_backend_timer_term(kbdev);
kbase_hwaccess_pm_halt(kbdev);
}

View File

@ -0,0 +1,105 @@
/*
*
* (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
* Foundation, and any use by you of this program is subject to the terms
* of such GNU licence.
*
* A copy of the licence is included with the program, and can also be obtained
* from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
* Boston, MA 02110-1301, USA.
*
*/
/*
* Base kernel property query backend APIs
*/
#include <mali_kbase.h>
#include <backend/gpu/mali_kbase_device_internal.h>
#include <backend/gpu/mali_kbase_pm_internal.h>
#include <mali_kbase_hwaccess_gpuprops.h>
void kbase_backend_gpuprops_get(struct kbase_device *kbdev,
struct kbase_gpuprops_regdump *regdump)
{
int i;
/* Fill regdump with the content of the relevant registers */
regdump->gpu_id = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_ID), NULL);
regdump->l2_features = kbase_reg_read(kbdev,
GPU_CONTROL_REG(L2_FEATURES), NULL);
regdump->suspend_size = kbase_reg_read(kbdev,
GPU_CONTROL_REG(SUSPEND_SIZE), NULL);
regdump->tiler_features = kbase_reg_read(kbdev,
GPU_CONTROL_REG(TILER_FEATURES), NULL);
regdump->mem_features = kbase_reg_read(kbdev,
GPU_CONTROL_REG(MEM_FEATURES), NULL);
regdump->mmu_features = kbase_reg_read(kbdev,
GPU_CONTROL_REG(MMU_FEATURES), NULL);
regdump->as_present = kbase_reg_read(kbdev,
GPU_CONTROL_REG(AS_PRESENT), NULL);
regdump->js_present = kbase_reg_read(kbdev,
GPU_CONTROL_REG(JS_PRESENT), NULL);
for (i = 0; i < GPU_MAX_JOB_SLOTS; i++)
regdump->js_features[i] = kbase_reg_read(kbdev,
GPU_CONTROL_REG(JS_FEATURES_REG(i)), NULL);
for (i = 0; i < BASE_GPU_NUM_TEXTURE_FEATURES_REGISTERS; i++)
regdump->texture_features[i] = kbase_reg_read(kbdev,
GPU_CONTROL_REG(TEXTURE_FEATURES_REG(i)), NULL);
regdump->thread_max_threads = kbase_reg_read(kbdev,
GPU_CONTROL_REG(THREAD_MAX_THREADS), NULL);
regdump->thread_max_workgroup_size = kbase_reg_read(kbdev,
GPU_CONTROL_REG(THREAD_MAX_WORKGROUP_SIZE),
NULL);
regdump->thread_max_barrier_size = kbase_reg_read(kbdev,
GPU_CONTROL_REG(THREAD_MAX_BARRIER_SIZE), NULL);
regdump->thread_features = kbase_reg_read(kbdev,
GPU_CONTROL_REG(THREAD_FEATURES), NULL);
regdump->shader_present_lo = kbase_reg_read(kbdev,
GPU_CONTROL_REG(SHADER_PRESENT_LO), NULL);
regdump->shader_present_hi = kbase_reg_read(kbdev,
GPU_CONTROL_REG(SHADER_PRESENT_HI), NULL);
regdump->tiler_present_lo = kbase_reg_read(kbdev,
GPU_CONTROL_REG(TILER_PRESENT_LO), NULL);
regdump->tiler_present_hi = kbase_reg_read(kbdev,
GPU_CONTROL_REG(TILER_PRESENT_HI), NULL);
regdump->l2_present_lo = kbase_reg_read(kbdev,
GPU_CONTROL_REG(L2_PRESENT_LO), NULL);
regdump->l2_present_hi = kbase_reg_read(kbdev,
GPU_CONTROL_REG(L2_PRESENT_HI), NULL);
}
void kbase_backend_gpuprops_get_features(struct kbase_device *kbdev,
struct kbase_gpuprops_regdump *regdump)
{
if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_COHERENCY_REG)) {
/* Ensure we can access the GPU registers */
kbase_pm_register_access_enable(kbdev);
regdump->coherency_features = kbase_reg_read(kbdev,
GPU_CONTROL_REG(COHERENCY_FEATURES), NULL);
/* We're done accessing the GPU registers for now. */
kbase_pm_register_access_disable(kbdev);
} else {
/* Pre COHERENCY_FEATURES we only supported ACE_LITE */
regdump->coherency_features =
COHERENCY_FEATURE_BIT(COHERENCY_NONE) |
COHERENCY_FEATURE_BIT(COHERENCY_ACE_LITE);
}
}

View File

@ -0,0 +1,492 @@
/*
*
* (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
* Foundation, and any use by you of this program is subject to the terms
* of such GNU licence.
*
* A copy of the licence is included with the program, and can also be obtained
* from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
* Boston, MA 02110-1301, USA.
*
*/
/*
* GPU backend instrumentation APIs.
*/
#include <mali_kbase.h>
#include <mali_midg_regmap.h>
#include <mali_kbase_hwaccess_instr.h>
#include <backend/gpu/mali_kbase_device_internal.h>
#include <backend/gpu/mali_kbase_pm_internal.h>
#include <backend/gpu/mali_kbase_instr_internal.h>
/**
* kbasep_instr_hwcnt_cacheclean - Issue Cache Clean & Invalidate command to
* hardware
*
* @kbdev: Kbase device
*/
static void kbasep_instr_hwcnt_cacheclean(struct kbase_device *kbdev)
{
unsigned long flags;
unsigned long pm_flags;
u32 irq_mask;
spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
KBASE_DEBUG_ASSERT(kbdev->hwcnt.backend.state ==
KBASE_INSTR_STATE_REQUEST_CLEAN);
/* Enable interrupt */
spin_lock_irqsave(&kbdev->hwaccess_lock, pm_flags);
irq_mask = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), NULL);
kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK),
irq_mask | CLEAN_CACHES_COMPLETED, NULL);
spin_unlock_irqrestore(&kbdev->hwaccess_lock, pm_flags);
/* clean&invalidate the caches so we're sure the mmu tables for the dump
* buffer is valid */
KBASE_TRACE_ADD(kbdev, CORE_GPU_CLEAN_INV_CACHES, NULL, NULL, 0u, 0);
kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND),
GPU_COMMAND_CLEAN_INV_CACHES, NULL);
kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_CLEANING;
spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
}
int kbase_instr_hwcnt_enable_internal(struct kbase_device *kbdev,
struct kbase_context *kctx,
struct kbase_uk_hwcnt_setup *setup)
{
unsigned long flags, pm_flags;
int err = -EINVAL;
u32 irq_mask;
int ret;
u64 shader_cores_needed;
u32 prfcnt_config;
shader_cores_needed = kbase_pm_get_present_cores(kbdev,
KBASE_PM_CORE_SHADER);
/* alignment failure */
if ((setup->dump_buffer == 0ULL) || (setup->dump_buffer & (2048 - 1)))
goto out_err;
/* Override core availability policy to ensure all cores are available
*/
kbase_pm_ca_instr_enable(kbdev);
/* Request the cores early on synchronously - we'll release them on any
* errors (e.g. instrumentation already active) */
kbase_pm_request_cores_sync(kbdev, true, shader_cores_needed);
spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
if (kbdev->hwcnt.backend.state != KBASE_INSTR_STATE_DISABLED) {
/* Instrumentation is already enabled */
spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
goto out_unrequest_cores;
}
/* Enable interrupt */
spin_lock_irqsave(&kbdev->hwaccess_lock, pm_flags);
irq_mask = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), NULL);
kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), irq_mask |
PRFCNT_SAMPLE_COMPLETED, NULL);
spin_unlock_irqrestore(&kbdev->hwaccess_lock, pm_flags);
/* In use, this context is the owner */
kbdev->hwcnt.kctx = kctx;
/* Remember the dump address so we can reprogram it later */
kbdev->hwcnt.addr = setup->dump_buffer;
/* Request the clean */
kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_REQUEST_CLEAN;
kbdev->hwcnt.backend.triggered = 0;
/* Clean&invalidate the caches so we're sure the mmu tables for the dump
* buffer is valid */
ret = queue_work(kbdev->hwcnt.backend.cache_clean_wq,
&kbdev->hwcnt.backend.cache_clean_work);
KBASE_DEBUG_ASSERT(ret);
spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
/* Wait for cacheclean to complete */
wait_event(kbdev->hwcnt.backend.wait,
kbdev->hwcnt.backend.triggered != 0);
KBASE_DEBUG_ASSERT(kbdev->hwcnt.backend.state ==
KBASE_INSTR_STATE_IDLE);
kbase_pm_request_l2_caches(kbdev);
/* Configure */
prfcnt_config = kctx->as_nr << PRFCNT_CONFIG_AS_SHIFT;
#ifdef CONFIG_MALI_PRFCNT_SET_SECONDARY
{
u32 gpu_id = kbdev->gpu_props.props.raw_props.gpu_id;
u32 product_id = (gpu_id & GPU_ID_VERSION_PRODUCT_ID)
>> GPU_ID_VERSION_PRODUCT_ID_SHIFT;
int arch_v6 = GPU_ID_IS_NEW_FORMAT(product_id);
if (arch_v6)
prfcnt_config |= 1 << PRFCNT_CONFIG_SETSELECT_SHIFT;
}
#endif
kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_CONFIG),
prfcnt_config | PRFCNT_CONFIG_MODE_OFF, kctx);
kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_BASE_LO),
setup->dump_buffer & 0xFFFFFFFF, kctx);
kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_BASE_HI),
setup->dump_buffer >> 32, kctx);
kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_JM_EN),
setup->jm_bm, kctx);
kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_SHADER_EN),
setup->shader_bm, kctx);
kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_MMU_L2_EN),
setup->mmu_l2_bm, kctx);
/* Due to PRLAM-8186 we need to disable the Tiler before we enable the
* HW counter dump. */
if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8186))
kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_TILER_EN), 0,
kctx);
else
kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_TILER_EN),
setup->tiler_bm, kctx);
kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_CONFIG),
prfcnt_config | PRFCNT_CONFIG_MODE_MANUAL, kctx);
/* If HW has PRLAM-8186 we can now re-enable the tiler HW counters dump
*/
if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8186))
kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_TILER_EN),
setup->tiler_bm, kctx);
spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_IDLE;
kbdev->hwcnt.backend.triggered = 1;
wake_up(&kbdev->hwcnt.backend.wait);
spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
err = 0;
dev_dbg(kbdev->dev, "HW counters dumping set-up for context %p", kctx);
return err;
out_unrequest_cores:
spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
kbase_pm_unrequest_cores(kbdev, true, shader_cores_needed);
spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
out_err:
return err;
}
int kbase_instr_hwcnt_disable_internal(struct kbase_context *kctx)
{
unsigned long flags, pm_flags;
int err = -EINVAL;
u32 irq_mask;
struct kbase_device *kbdev = kctx->kbdev;
while (1) {
spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_DISABLED) {
/* Instrumentation is not enabled */
spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
goto out;
}
if (kbdev->hwcnt.kctx != kctx) {
/* Instrumentation has been setup for another context */
spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
goto out;
}
if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_IDLE)
break;
spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
/* Ongoing dump/setup - wait for its completion */
wait_event(kbdev->hwcnt.backend.wait,
kbdev->hwcnt.backend.triggered != 0);
}
kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_DISABLED;
kbdev->hwcnt.backend.triggered = 0;
/* Disable interrupt */
spin_lock_irqsave(&kbdev->hwaccess_lock, pm_flags);
irq_mask = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), NULL);
kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK),
irq_mask & ~PRFCNT_SAMPLE_COMPLETED, NULL);
/* Disable the counters */
kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_CONFIG), 0, kctx);
kbdev->hwcnt.kctx = NULL;
kbdev->hwcnt.addr = 0ULL;
kbase_pm_ca_instr_disable(kbdev);
kbase_pm_unrequest_cores(kbdev, true,
kbase_pm_get_present_cores(kbdev, KBASE_PM_CORE_SHADER));
kbase_pm_release_l2_caches(kbdev);
spin_unlock_irqrestore(&kbdev->hwaccess_lock, pm_flags);
spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
dev_dbg(kbdev->dev, "HW counters dumping disabled for context %p",
kctx);
err = 0;
out:
return err;
}
int kbase_instr_hwcnt_request_dump(struct kbase_context *kctx)
{
unsigned long flags;
int err = -EINVAL;
struct kbase_device *kbdev = kctx->kbdev;
spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
if (kbdev->hwcnt.kctx != kctx) {
/* The instrumentation has been setup for another context */
goto unlock;
}
if (kbdev->hwcnt.backend.state != KBASE_INSTR_STATE_IDLE) {
/* HW counters are disabled or another dump is ongoing, or we're
* resetting */
goto unlock;
}
kbdev->hwcnt.backend.triggered = 0;
/* Mark that we're dumping - the PF handler can signal that we faulted
*/
kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_DUMPING;
/* Reconfigure the dump address */
kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_BASE_LO),
kbdev->hwcnt.addr & 0xFFFFFFFF, NULL);
kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_BASE_HI),
kbdev->hwcnt.addr >> 32, NULL);
/* Start dumping */
KBASE_TRACE_ADD(kbdev, CORE_GPU_PRFCNT_SAMPLE, NULL, NULL,
kbdev->hwcnt.addr, 0);
kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND),
GPU_COMMAND_PRFCNT_SAMPLE, kctx);
dev_dbg(kbdev->dev, "HW counters dumping done for context %p", kctx);
err = 0;
unlock:
spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
return err;
}
KBASE_EXPORT_SYMBOL(kbase_instr_hwcnt_request_dump);
bool kbase_instr_hwcnt_dump_complete(struct kbase_context *kctx,
bool * const success)
{
unsigned long flags;
bool complete = false;
struct kbase_device *kbdev = kctx->kbdev;
spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_IDLE) {
*success = true;
complete = true;
} else if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_FAULT) {
*success = false;
complete = true;
kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_IDLE;
}
spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
return complete;
}
KBASE_EXPORT_SYMBOL(kbase_instr_hwcnt_dump_complete);
void kbasep_cache_clean_worker(struct work_struct *data)
{
struct kbase_device *kbdev;
unsigned long flags;
kbdev = container_of(data, struct kbase_device,
hwcnt.backend.cache_clean_work);
mutex_lock(&kbdev->cacheclean_lock);
kbasep_instr_hwcnt_cacheclean(kbdev);
spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
/* Wait for our condition, and any reset to complete */
while (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_CLEANING) {
spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
wait_event(kbdev->hwcnt.backend.cache_clean_wait,
kbdev->hwcnt.backend.state !=
KBASE_INSTR_STATE_CLEANING);
spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
}
KBASE_DEBUG_ASSERT(kbdev->hwcnt.backend.state ==
KBASE_INSTR_STATE_CLEANED);
/* All finished and idle */
kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_IDLE;
kbdev->hwcnt.backend.triggered = 1;
wake_up(&kbdev->hwcnt.backend.wait);
spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
mutex_unlock(&kbdev->cacheclean_lock);
}
void kbase_instr_hwcnt_sample_done(struct kbase_device *kbdev)
{
unsigned long flags;
spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_FAULT) {
kbdev->hwcnt.backend.triggered = 1;
wake_up(&kbdev->hwcnt.backend.wait);
} else if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_DUMPING) {
int ret;
/* Always clean and invalidate the cache after a successful dump
*/
kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_REQUEST_CLEAN;
ret = queue_work(kbdev->hwcnt.backend.cache_clean_wq,
&kbdev->hwcnt.backend.cache_clean_work);
KBASE_DEBUG_ASSERT(ret);
}
spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
}
void kbase_clean_caches_done(struct kbase_device *kbdev)
{
u32 irq_mask;
if (kbdev->hwcnt.backend.state != KBASE_INSTR_STATE_DISABLED) {
unsigned long flags;
unsigned long pm_flags;
spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
/* Disable interrupt */
spin_lock_irqsave(&kbdev->hwaccess_lock, pm_flags);
irq_mask = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK),
NULL);
kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK),
irq_mask & ~CLEAN_CACHES_COMPLETED, NULL);
spin_unlock_irqrestore(&kbdev->hwaccess_lock, pm_flags);
/* Wakeup... */
if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_CLEANING) {
/* Only wake if we weren't resetting */
kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_CLEANED;
wake_up(&kbdev->hwcnt.backend.cache_clean_wait);
}
spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
}
}
int kbase_instr_hwcnt_wait_for_dump(struct kbase_context *kctx)
{
struct kbase_device *kbdev = kctx->kbdev;
unsigned long flags;
int err;
/* Wait for dump & cacheclean to complete */
wait_event(kbdev->hwcnt.backend.wait,
kbdev->hwcnt.backend.triggered != 0);
spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_FAULT) {
err = -EINVAL;
kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_IDLE;
} else {
/* Dump done */
KBASE_DEBUG_ASSERT(kbdev->hwcnt.backend.state ==
KBASE_INSTR_STATE_IDLE);
err = 0;
}
spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
return err;
}
int kbase_instr_hwcnt_clear(struct kbase_context *kctx)
{
unsigned long flags;
int err = -EINVAL;
struct kbase_device *kbdev = kctx->kbdev;
spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
/* Check it's the context previously set up and we're not already
* dumping */
if (kbdev->hwcnt.kctx != kctx || kbdev->hwcnt.backend.state !=
KBASE_INSTR_STATE_IDLE)
goto out;
/* Clear the counters */
KBASE_TRACE_ADD(kbdev, CORE_GPU_PRFCNT_CLEAR, NULL, NULL, 0u, 0);
kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND),
GPU_COMMAND_PRFCNT_CLEAR, kctx);
err = 0;
out:
spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
return err;
}
KBASE_EXPORT_SYMBOL(kbase_instr_hwcnt_clear);
int kbase_instr_backend_init(struct kbase_device *kbdev)
{
int ret = 0;
kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_DISABLED;
init_waitqueue_head(&kbdev->hwcnt.backend.wait);
init_waitqueue_head(&kbdev->hwcnt.backend.cache_clean_wait);
INIT_WORK(&kbdev->hwcnt.backend.cache_clean_work,
kbasep_cache_clean_worker);
kbdev->hwcnt.backend.triggered = 0;
kbdev->hwcnt.backend.cache_clean_wq =
alloc_workqueue("Mali cache cleaning workqueue", 0, 1);
if (NULL == kbdev->hwcnt.backend.cache_clean_wq)
ret = -EINVAL;
return ret;
}
void kbase_instr_backend_term(struct kbase_device *kbdev)
{
destroy_workqueue(kbdev->hwcnt.backend.cache_clean_wq);
}

View File

@ -0,0 +1,58 @@
/*
*
* (C) COPYRIGHT 2014, 2016 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
* Foundation, and any use by you of this program is subject to the terms
* of such GNU licence.
*
* A copy of the licence is included with the program, and can also be obtained
* from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
* Boston, MA 02110-1301, USA.
*
*/
/*
* Backend-specific instrumentation definitions
*/
#ifndef _KBASE_INSTR_DEFS_H_
#define _KBASE_INSTR_DEFS_H_
/*
* Instrumentation State Machine States
*/
enum kbase_instr_state {
/* State where instrumentation is not active */
KBASE_INSTR_STATE_DISABLED = 0,
/* State machine is active and ready for a command. */
KBASE_INSTR_STATE_IDLE,
/* Hardware is currently dumping a frame. */
KBASE_INSTR_STATE_DUMPING,
/* We've requested a clean to occur on a workqueue */
KBASE_INSTR_STATE_REQUEST_CLEAN,
/* Hardware is currently cleaning and invalidating caches. */
KBASE_INSTR_STATE_CLEANING,
/* Cache clean completed, and either a) a dump is complete, or
* b) instrumentation can now be setup. */
KBASE_INSTR_STATE_CLEANED,
/* An error has occured during DUMPING (page fault). */
KBASE_INSTR_STATE_FAULT
};
/* Structure used for instrumentation and HW counters dumping */
struct kbase_instr_backend {
wait_queue_head_t wait;
int triggered;
enum kbase_instr_state state;
wait_queue_head_t cache_clean_wait;
struct workqueue_struct *cache_clean_wq;
struct work_struct cache_clean_work;
};
#endif /* _KBASE_INSTR_DEFS_H_ */

View File

@ -0,0 +1,45 @@
/*
*
* (C) COPYRIGHT 2014 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
* Foundation, and any use by you of this program is subject to the terms
* of such GNU licence.
*
* A copy of the licence is included with the program, and can also be obtained
* from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
* Boston, MA 02110-1301, USA.
*
*/
/*
* Backend-specific HW access instrumentation APIs
*/
#ifndef _KBASE_INSTR_INTERNAL_H_
#define _KBASE_INSTR_INTERNAL_H_
/**
* kbasep_cache_clean_worker() - Workqueue for handling cache cleaning
* @data: a &struct work_struct
*/
void kbasep_cache_clean_worker(struct work_struct *data);
/**
* kbase_clean_caches_done() - Cache clean interrupt received
* @kbdev: Kbase device
*/
void kbase_clean_caches_done(struct kbase_device *kbdev);
/**
* kbase_instr_hwcnt_sample_done() - Dump complete interrupt received
* @kbdev: Kbase device
*/
void kbase_instr_hwcnt_sample_done(struct kbase_device *kbdev);
#endif /* _KBASE_INSTR_INTERNAL_H_ */

View File

@ -0,0 +1,39 @@
/*
*
* (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
* Foundation, and any use by you of this program is subject to the terms
* of such GNU licence.
*
* A copy of the licence is included with the program, and can also be obtained
* from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
* Boston, MA 02110-1301, USA.
*
*/
/*
* Backend specific IRQ APIs
*/
#ifndef _KBASE_IRQ_INTERNAL_H_
#define _KBASE_IRQ_INTERNAL_H_
int kbase_install_interrupts(struct kbase_device *kbdev);
void kbase_release_interrupts(struct kbase_device *kbdev);
/**
* kbase_synchronize_irqs - Ensure that all IRQ handlers have completed
* execution
* @kbdev: The kbase device
*/
void kbase_synchronize_irqs(struct kbase_device *kbdev);
int kbasep_common_test_interrupt_handlers(
struct kbase_device * const kbdev);
#endif /* _KBASE_IRQ_INTERNAL_H_ */

View File

@ -0,0 +1,469 @@
/*
*
* (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
* Foundation, and any use by you of this program is subject to the terms
* of such GNU licence.
*
* A copy of the licence is included with the program, and can also be obtained
* from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
* Boston, MA 02110-1301, USA.
*
*/
#include <mali_kbase.h>
#include <backend/gpu/mali_kbase_device_internal.h>
#include <backend/gpu/mali_kbase_irq_internal.h>
#include <linux/interrupt.h>
#if !defined(CONFIG_MALI_NO_MALI)
/* GPU IRQ Tags */
#define JOB_IRQ_TAG 0
#define MMU_IRQ_TAG 1
#define GPU_IRQ_TAG 2
static void *kbase_tag(void *ptr, u32 tag)
{
return (void *)(((uintptr_t) ptr) | tag);
}
static void *kbase_untag(void *ptr)
{
return (void *)(((uintptr_t) ptr) & ~3);
}
static irqreturn_t kbase_job_irq_handler(int irq, void *data)
{
unsigned long flags;
struct kbase_device *kbdev = kbase_untag(data);
u32 val;
spin_lock_irqsave(&kbdev->pm.backend.gpu_powered_lock, flags);
if (!kbdev->pm.backend.gpu_powered) {
/* GPU is turned off - IRQ is not for us */
spin_unlock_irqrestore(&kbdev->pm.backend.gpu_powered_lock,
flags);
return IRQ_NONE;
}
val = kbase_reg_read(kbdev, JOB_CONTROL_REG(JOB_IRQ_STATUS), NULL);
#ifdef CONFIG_MALI_DEBUG
if (!kbdev->pm.backend.driver_ready_for_irqs)
dev_warn(kbdev->dev, "%s: irq %d irqstatus 0x%x before driver is ready\n",
__func__, irq, val);
#endif /* CONFIG_MALI_DEBUG */
spin_unlock_irqrestore(&kbdev->pm.backend.gpu_powered_lock, flags);
if (!val)
return IRQ_NONE;
dev_dbg(kbdev->dev, "%s: irq %d irqstatus 0x%x\n", __func__, irq, val);
kbase_job_done(kbdev, val);
return IRQ_HANDLED;
}
KBASE_EXPORT_TEST_API(kbase_job_irq_handler);
static irqreturn_t kbase_mmu_irq_handler(int irq, void *data)
{
unsigned long flags;
struct kbase_device *kbdev = kbase_untag(data);
u32 val;
spin_lock_irqsave(&kbdev->pm.backend.gpu_powered_lock, flags);
if (!kbdev->pm.backend.gpu_powered) {
/* GPU is turned off - IRQ is not for us */
spin_unlock_irqrestore(&kbdev->pm.backend.gpu_powered_lock,
flags);
return IRQ_NONE;
}
atomic_inc(&kbdev->faults_pending);
val = kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_STATUS), NULL);
#ifdef CONFIG_MALI_DEBUG
if (!kbdev->pm.backend.driver_ready_for_irqs)
dev_warn(kbdev->dev, "%s: irq %d irqstatus 0x%x before driver is ready\n",
__func__, irq, val);
#endif /* CONFIG_MALI_DEBUG */
spin_unlock_irqrestore(&kbdev->pm.backend.gpu_powered_lock, flags);
if (!val) {
atomic_dec(&kbdev->faults_pending);
return IRQ_NONE;
}
dev_dbg(kbdev->dev, "%s: irq %d irqstatus 0x%x\n", __func__, irq, val);
kbase_mmu_interrupt(kbdev, val);
atomic_dec(&kbdev->faults_pending);
return IRQ_HANDLED;
}
static irqreturn_t kbase_gpu_irq_handler(int irq, void *data)
{
unsigned long flags;
struct kbase_device *kbdev = kbase_untag(data);
u32 val;
spin_lock_irqsave(&kbdev->pm.backend.gpu_powered_lock, flags);
if (!kbdev->pm.backend.gpu_powered) {
/* GPU is turned off - IRQ is not for us */
spin_unlock_irqrestore(&kbdev->pm.backend.gpu_powered_lock,
flags);
return IRQ_NONE;
}
val = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_STATUS), NULL);
#ifdef CONFIG_MALI_DEBUG
if (!kbdev->pm.backend.driver_ready_for_irqs)
dev_dbg(kbdev->dev, "%s: irq %d irqstatus 0x%x before driver is ready\n",
__func__, irq, val);
#endif /* CONFIG_MALI_DEBUG */
spin_unlock_irqrestore(&kbdev->pm.backend.gpu_powered_lock, flags);
if (!val)
return IRQ_NONE;
dev_dbg(kbdev->dev, "%s: irq %d irqstatus 0x%x\n", __func__, irq, val);
kbase_gpu_interrupt(kbdev, val);
return IRQ_HANDLED;
}
KBASE_EXPORT_TEST_API(kbase_gpu_irq_handler);
static irq_handler_t kbase_handler_table[] = {
[JOB_IRQ_TAG] = kbase_job_irq_handler,
[MMU_IRQ_TAG] = kbase_mmu_irq_handler,
[GPU_IRQ_TAG] = kbase_gpu_irq_handler,
};
#ifdef CONFIG_MALI_DEBUG
#define JOB_IRQ_HANDLER JOB_IRQ_TAG
#define MMU_IRQ_HANDLER MMU_IRQ_TAG
#define GPU_IRQ_HANDLER GPU_IRQ_TAG
/**
* kbase_set_custom_irq_handler - Set a custom IRQ handler
* @kbdev: Device for which the handler is to be registered
* @custom_handler: Handler to be registered
* @irq_type: Interrupt type
*
* Registers given interrupt handler for requested interrupt type
* In the case where irq handler is not specified, the default handler shall be
* registered
*
* Return: 0 case success, error code otherwise
*/
int kbase_set_custom_irq_handler(struct kbase_device *kbdev,
irq_handler_t custom_handler,
int irq_type)
{
int result = 0;
irq_handler_t requested_irq_handler = NULL;
KBASE_DEBUG_ASSERT((JOB_IRQ_HANDLER <= irq_type) &&
(GPU_IRQ_HANDLER >= irq_type));
/* Release previous handler */
if (kbdev->irqs[irq_type].irq)
free_irq(kbdev->irqs[irq_type].irq, kbase_tag(kbdev, irq_type));
requested_irq_handler = (NULL != custom_handler) ? custom_handler :
kbase_handler_table[irq_type];
if (0 != request_irq(kbdev->irqs[irq_type].irq,
requested_irq_handler,
kbdev->irqs[irq_type].flags | IRQF_SHARED,
dev_name(kbdev->dev), kbase_tag(kbdev, irq_type))) {
result = -EINVAL;
dev_err(kbdev->dev, "Can't request interrupt %d (index %d)\n",
kbdev->irqs[irq_type].irq, irq_type);
#ifdef CONFIG_SPARSE_IRQ
dev_err(kbdev->dev, "You have CONFIG_SPARSE_IRQ support enabled - is the interrupt number correct for this configuration?\n");
#endif /* CONFIG_SPARSE_IRQ */
}
return result;
}
KBASE_EXPORT_TEST_API(kbase_set_custom_irq_handler);
/* test correct interrupt assigment and reception by cpu */
struct kbasep_irq_test {
struct hrtimer timer;
wait_queue_head_t wait;
int triggered;
u32 timeout;
};
static struct kbasep_irq_test kbasep_irq_test_data;
#define IRQ_TEST_TIMEOUT 500
static irqreturn_t kbase_job_irq_test_handler(int irq, void *data)
{
unsigned long flags;
struct kbase_device *kbdev = kbase_untag(data);
u32 val;
spin_lock_irqsave(&kbdev->pm.backend.gpu_powered_lock, flags);
if (!kbdev->pm.backend.gpu_powered) {
/* GPU is turned off - IRQ is not for us */
spin_unlock_irqrestore(&kbdev->pm.backend.gpu_powered_lock,
flags);
return IRQ_NONE;
}
val = kbase_reg_read(kbdev, JOB_CONTROL_REG(JOB_IRQ_STATUS), NULL);
spin_unlock_irqrestore(&kbdev->pm.backend.gpu_powered_lock, flags);
if (!val)
return IRQ_NONE;
dev_dbg(kbdev->dev, "%s: irq %d irqstatus 0x%x\n", __func__, irq, val);
kbasep_irq_test_data.triggered = 1;
wake_up(&kbasep_irq_test_data.wait);
kbase_reg_write(kbdev, JOB_CONTROL_REG(JOB_IRQ_CLEAR), val, NULL);
return IRQ_HANDLED;
}
static irqreturn_t kbase_mmu_irq_test_handler(int irq, void *data)
{
unsigned long flags;
struct kbase_device *kbdev = kbase_untag(data);
u32 val;
spin_lock_irqsave(&kbdev->pm.backend.gpu_powered_lock, flags);
if (!kbdev->pm.backend.gpu_powered) {
/* GPU is turned off - IRQ is not for us */
spin_unlock_irqrestore(&kbdev->pm.backend.gpu_powered_lock,
flags);
return IRQ_NONE;
}
val = kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_STATUS), NULL);
spin_unlock_irqrestore(&kbdev->pm.backend.gpu_powered_lock, flags);
if (!val)
return IRQ_NONE;
dev_dbg(kbdev->dev, "%s: irq %d irqstatus 0x%x\n", __func__, irq, val);
kbasep_irq_test_data.triggered = 1;
wake_up(&kbasep_irq_test_data.wait);
kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_CLEAR), val, NULL);
return IRQ_HANDLED;
}
static enum hrtimer_restart kbasep_test_interrupt_timeout(struct hrtimer *timer)
{
struct kbasep_irq_test *test_data = container_of(timer,
struct kbasep_irq_test, timer);
test_data->timeout = 1;
test_data->triggered = 1;
wake_up(&test_data->wait);
return HRTIMER_NORESTART;
}
static int kbasep_common_test_interrupt(
struct kbase_device * const kbdev, u32 tag)
{
int err = 0;
irq_handler_t test_handler;
u32 old_mask_val;
u16 mask_offset;
u16 rawstat_offset;
switch (tag) {
case JOB_IRQ_TAG:
test_handler = kbase_job_irq_test_handler;
rawstat_offset = JOB_CONTROL_REG(JOB_IRQ_RAWSTAT);
mask_offset = JOB_CONTROL_REG(JOB_IRQ_MASK);
break;
case MMU_IRQ_TAG:
test_handler = kbase_mmu_irq_test_handler;
rawstat_offset = MMU_REG(MMU_IRQ_RAWSTAT);
mask_offset = MMU_REG(MMU_IRQ_MASK);
break;
case GPU_IRQ_TAG:
/* already tested by pm_driver - bail out */
default:
return 0;
}
/* store old mask */
old_mask_val = kbase_reg_read(kbdev, mask_offset, NULL);
/* mask interrupts */
kbase_reg_write(kbdev, mask_offset, 0x0, NULL);
if (kbdev->irqs[tag].irq) {
/* release original handler and install test handler */
if (kbase_set_custom_irq_handler(kbdev, test_handler, tag) != 0) {
err = -EINVAL;
} else {
kbasep_irq_test_data.timeout = 0;
hrtimer_init(&kbasep_irq_test_data.timer,
CLOCK_MONOTONIC, HRTIMER_MODE_REL);
kbasep_irq_test_data.timer.function =
kbasep_test_interrupt_timeout;
/* trigger interrupt */
kbase_reg_write(kbdev, mask_offset, 0x1, NULL);
kbase_reg_write(kbdev, rawstat_offset, 0x1, NULL);
hrtimer_start(&kbasep_irq_test_data.timer,
HR_TIMER_DELAY_MSEC(IRQ_TEST_TIMEOUT),
HRTIMER_MODE_REL);
wait_event(kbasep_irq_test_data.wait,
kbasep_irq_test_data.triggered != 0);
if (kbasep_irq_test_data.timeout != 0) {
dev_err(kbdev->dev, "Interrupt %d (index %d) didn't reach CPU.\n",
kbdev->irqs[tag].irq, tag);
err = -EINVAL;
} else {
dev_dbg(kbdev->dev, "Interrupt %d (index %d) reached CPU.\n",
kbdev->irqs[tag].irq, tag);
}
hrtimer_cancel(&kbasep_irq_test_data.timer);
kbasep_irq_test_data.triggered = 0;
/* mask interrupts */
kbase_reg_write(kbdev, mask_offset, 0x0, NULL);
/* release test handler */
free_irq(kbdev->irqs[tag].irq, kbase_tag(kbdev, tag));
}
/* restore original interrupt */
if (request_irq(kbdev->irqs[tag].irq, kbase_handler_table[tag],
kbdev->irqs[tag].flags | IRQF_SHARED,
dev_name(kbdev->dev), kbase_tag(kbdev, tag))) {
dev_err(kbdev->dev, "Can't restore original interrupt %d (index %d)\n",
kbdev->irqs[tag].irq, tag);
err = -EINVAL;
}
}
/* restore old mask */
kbase_reg_write(kbdev, mask_offset, old_mask_val, NULL);
return err;
}
int kbasep_common_test_interrupt_handlers(
struct kbase_device * const kbdev)
{
int err;
init_waitqueue_head(&kbasep_irq_test_data.wait);
kbasep_irq_test_data.triggered = 0;
/* A suspend won't happen during startup/insmod */
kbase_pm_context_active(kbdev);
err = kbasep_common_test_interrupt(kbdev, JOB_IRQ_TAG);
if (err) {
dev_err(kbdev->dev, "Interrupt JOB_IRQ didn't reach CPU. Check interrupt assignments.\n");
goto out;
}
err = kbasep_common_test_interrupt(kbdev, MMU_IRQ_TAG);
if (err) {
dev_err(kbdev->dev, "Interrupt MMU_IRQ didn't reach CPU. Check interrupt assignments.\n");
goto out;
}
dev_dbg(kbdev->dev, "Interrupts are correctly assigned.\n");
out:
kbase_pm_context_idle(kbdev);
return err;
}
#endif /* CONFIG_MALI_DEBUG */
int kbase_install_interrupts(struct kbase_device *kbdev)
{
u32 nr = ARRAY_SIZE(kbase_handler_table);
int err;
u32 i;
for (i = 0; i < nr; i++) {
err = request_irq(kbdev->irqs[i].irq, kbase_handler_table[i],
kbdev->irqs[i].flags | IRQF_SHARED,
dev_name(kbdev->dev),
kbase_tag(kbdev, i));
if (err) {
dev_err(kbdev->dev, "Can't request interrupt %d (index %d)\n",
kbdev->irqs[i].irq, i);
#ifdef CONFIG_SPARSE_IRQ
dev_err(kbdev->dev, "You have CONFIG_SPARSE_IRQ support enabled - is the interrupt number correct for this configuration?\n");
#endif /* CONFIG_SPARSE_IRQ */
goto release;
}
}
return 0;
release:
while (i-- > 0)
free_irq(kbdev->irqs[i].irq, kbase_tag(kbdev, i));
return err;
}
void kbase_release_interrupts(struct kbase_device *kbdev)
{
u32 nr = ARRAY_SIZE(kbase_handler_table);
u32 i;
for (i = 0; i < nr; i++) {
if (kbdev->irqs[i].irq)
free_irq(kbdev->irqs[i].irq, kbase_tag(kbdev, i));
}
}
void kbase_synchronize_irqs(struct kbase_device *kbdev)
{
u32 nr = ARRAY_SIZE(kbase_handler_table);
u32 i;
for (i = 0; i < nr; i++) {
if (kbdev->irqs[i].irq)
synchronize_irq(kbdev->irqs[i].irq);
}
}
#endif /* !defined(CONFIG_MALI_NO_MALI) */

View File

@ -0,0 +1,378 @@
/*
*
* (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
* Foundation, and any use by you of this program is subject to the terms
* of such GNU licence.
*
* A copy of the licence is included with the program, and can also be obtained
* from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
* Boston, MA 02110-1301, USA.
*
*/
/*
* Register backend context / address space management
*/
#include <mali_kbase.h>
#include <mali_kbase_hwaccess_jm.h>
/**
* assign_and_activate_kctx_addr_space - Assign an AS to a context
* @kbdev: Kbase device
* @kctx: Kbase context
* @current_as: Address Space to assign
*
* Assign an Address Space (AS) to a context, and add the context to the Policy.
*
* This includes
* setting up the global runpool_irq structure and the context on the AS,
* Activating the MMU on the AS,
* Allowing jobs to be submitted on the AS.
*
* Context:
* kbasep_js_kctx_info.jsctx_mutex held,
* kbasep_js_device_data.runpool_mutex held,
* AS transaction mutex held,
* Runpool IRQ lock held
*/
static void assign_and_activate_kctx_addr_space(struct kbase_device *kbdev,
struct kbase_context *kctx,
struct kbase_as *current_as)
{
struct kbasep_js_device_data *js_devdata = &kbdev->js_data;
struct kbasep_js_per_as_data *js_per_as_data;
int as_nr = current_as->number;
lockdep_assert_held(&kctx->jctx.sched_info.ctx.jsctx_mutex);
lockdep_assert_held(&js_devdata->runpool_mutex);
lockdep_assert_held(&kbdev->hwaccess_lock);
js_per_as_data = &js_devdata->runpool_irq.per_as_data[as_nr];
/* Attribute handling */
kbasep_js_ctx_attr_runpool_retain_ctx(kbdev, kctx);
/* Assign addr space */
kctx->as_nr = as_nr;
/* If the GPU is currently powered, activate this address space on the
* MMU */
if (kbdev->pm.backend.gpu_powered)
kbase_mmu_update(kctx);
/* If the GPU was not powered then the MMU will be reprogrammed on the
* next pm_context_active() */
/* Allow it to run jobs */
kbasep_js_set_submit_allowed(js_devdata, kctx);
/* Book-keeping */
js_per_as_data->kctx = kctx;
js_per_as_data->as_busy_refcount = 0;
kbase_js_runpool_inc_context_count(kbdev, kctx);
}
/**
* release_addr_space - Release an address space
* @kbdev: Kbase device
* @kctx_as_nr: Address space of context to release
* @kctx: Context being released
*
* Context: kbasep_js_device_data.runpool_mutex must be held
*
* Release an address space, making it available for being picked again.
*/
static void release_addr_space(struct kbase_device *kbdev, int kctx_as_nr,
struct kbase_context *kctx)
{
struct kbasep_js_device_data *js_devdata;
u16 as_bit = (1u << kctx_as_nr);
js_devdata = &kbdev->js_data;
lockdep_assert_held(&js_devdata->runpool_mutex);
/* The address space must not already be free */
KBASE_DEBUG_ASSERT(!(js_devdata->as_free & as_bit));
js_devdata->as_free |= as_bit;
kbase_js_runpool_dec_context_count(kbdev, kctx);
}
bool kbase_backend_use_ctx_sched(struct kbase_device *kbdev,
struct kbase_context *kctx)
{
int i;
if (kbdev->hwaccess.active_kctx == kctx) {
/* Context is already active */
return true;
}
for (i = 0; i < kbdev->nr_hw_address_spaces; i++) {
struct kbasep_js_per_as_data *js_per_as_data =
&kbdev->js_data.runpool_irq.per_as_data[i];
if (js_per_as_data->kctx == kctx) {
/* Context already has ASID - mark as active */
return true;
}
}
/* Context does not have address space assigned */
return false;
}
void kbase_backend_release_ctx_irq(struct kbase_device *kbdev,
struct kbase_context *kctx)
{
struct kbasep_js_per_as_data *js_per_as_data;
int as_nr = kctx->as_nr;
if (as_nr == KBASEP_AS_NR_INVALID) {
WARN(1, "Attempting to release context without ASID\n");
return;
}
lockdep_assert_held(&kbdev->hwaccess_lock);
js_per_as_data = &kbdev->js_data.runpool_irq.per_as_data[kctx->as_nr];
if (js_per_as_data->as_busy_refcount != 0) {
WARN(1, "Attempting to release active ASID\n");
return;
}
/* Release context from address space */
js_per_as_data->kctx = NULL;
kbasep_js_clear_submit_allowed(&kbdev->js_data, kctx);
/* If the GPU is currently powered, de-activate this address space on
* the MMU */
if (kbdev->pm.backend.gpu_powered)
kbase_mmu_disable(kctx);
/* If the GPU was not powered then the MMU will be reprogrammed on the
* next pm_context_active() */
release_addr_space(kbdev, as_nr, kctx);
kctx->as_nr = KBASEP_AS_NR_INVALID;
}
void kbase_backend_release_ctx_noirq(struct kbase_device *kbdev,
struct kbase_context *kctx)
{
}
void kbase_backend_release_free_address_space(struct kbase_device *kbdev,
int as_nr)
{
struct kbasep_js_device_data *js_devdata;
js_devdata = &kbdev->js_data;
lockdep_assert_held(&js_devdata->runpool_mutex);
js_devdata->as_free |= (1 << as_nr);
}
/**
* check_is_runpool_full - check whether the runpool is full for a specified
* context
* @kbdev: Kbase device
* @kctx: Kbase context
*
* If kctx == NULL, then this makes the least restrictive check on the
* runpool. A specific context that is supplied immediately after could fail
* the check, even under the same conditions.
*
* Therefore, once a context is obtained you \b must re-check it with this
* function, since the return value could change to false.
*
* Context:
* In all cases, the caller must hold kbasep_js_device_data.runpool_mutex.
* When kctx != NULL the caller must hold the
* kbasep_js_kctx_info.ctx.jsctx_mutex.
* When kctx == NULL, then the caller need not hold any jsctx_mutex locks (but
* it doesn't do any harm to do so).
*
* Return: true if the runpool is full
*/
static bool check_is_runpool_full(struct kbase_device *kbdev,
struct kbase_context *kctx)
{
struct kbasep_js_device_data *js_devdata;
bool is_runpool_full;
js_devdata = &kbdev->js_data;
lockdep_assert_held(&js_devdata->runpool_mutex);
/* Regardless of whether a context is submitting or not, can't have more
* than there are HW address spaces */
is_runpool_full = (bool) (js_devdata->nr_all_contexts_running >=
kbdev->nr_hw_address_spaces);
if (kctx && !kbase_ctx_flag(kctx, KCTX_SUBMIT_DISABLED)) {
lockdep_assert_held(&kctx->jctx.sched_info.ctx.jsctx_mutex);
/* Contexts that submit might use less of the address spaces
* available, due to HW workarounds. In which case, the runpool
* is also full when the number of submitting contexts exceeds
* the number of submittable address spaces.
*
* Both checks must be made: can have nr_user_address_spaces ==
* nr_hw_address spaces, and at the same time can have
* nr_user_contexts_running < nr_all_contexts_running. */
is_runpool_full |= (bool)
(js_devdata->nr_user_contexts_running >=
kbdev->nr_user_address_spaces);
}
return is_runpool_full;
}
int kbase_backend_find_free_address_space(struct kbase_device *kbdev,
struct kbase_context *kctx)
{
struct kbasep_js_device_data *js_devdata;
struct kbasep_js_kctx_info *js_kctx_info;
unsigned long flags;
int i;
js_devdata = &kbdev->js_data;
js_kctx_info = &kctx->jctx.sched_info;
mutex_lock(&js_kctx_info->ctx.jsctx_mutex);
mutex_lock(&js_devdata->runpool_mutex);
/* First try to find a free address space */
if (check_is_runpool_full(kbdev, kctx))
i = -1;
else
i = ffs(js_devdata->as_free) - 1;
if (i >= 0 && i < kbdev->nr_hw_address_spaces) {
js_devdata->as_free &= ~(1 << i);
mutex_unlock(&js_devdata->runpool_mutex);
mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
return i;
}
spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
/* No address space currently free, see if we can release one */
for (i = 0; i < kbdev->nr_hw_address_spaces; i++) {
struct kbasep_js_per_as_data *js_per_as_data;
struct kbasep_js_kctx_info *as_js_kctx_info;
struct kbase_context *as_kctx;
js_per_as_data = &kbdev->js_data.runpool_irq.per_as_data[i];
as_kctx = js_per_as_data->kctx;
as_js_kctx_info = &as_kctx->jctx.sched_info;
/* Don't release privileged or active contexts, or contexts with
* jobs running */
if (as_kctx && !kbase_ctx_flag(as_kctx, KCTX_PRIVILEGED) &&
js_per_as_data->as_busy_refcount == 0) {
if (!kbasep_js_runpool_retain_ctx_nolock(kbdev,
as_kctx)) {
WARN(1, "Failed to retain active context\n");
spin_unlock_irqrestore(&kbdev->hwaccess_lock,
flags);
mutex_unlock(&js_devdata->runpool_mutex);
mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
return KBASEP_AS_NR_INVALID;
}
kbasep_js_clear_submit_allowed(js_devdata, as_kctx);
/* Drop and retake locks to take the jsctx_mutex on the
* context we're about to release without violating lock
* ordering
*/
spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
mutex_unlock(&js_devdata->runpool_mutex);
mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
/* Release context from address space */
mutex_lock(&as_js_kctx_info->ctx.jsctx_mutex);
mutex_lock(&js_devdata->runpool_mutex);
kbasep_js_runpool_release_ctx_nolock(kbdev, as_kctx);
if (!kbase_ctx_flag(as_kctx, KCTX_SCHEDULED)) {
kbasep_js_runpool_requeue_or_kill_ctx(kbdev,
as_kctx,
true);
js_devdata->as_free &= ~(1 << i);
mutex_unlock(&js_devdata->runpool_mutex);
mutex_unlock(&as_js_kctx_info->ctx.jsctx_mutex);
return i;
}
/* Context was retained while locks were dropped,
* continue looking for free AS */
mutex_unlock(&js_devdata->runpool_mutex);
mutex_unlock(&as_js_kctx_info->ctx.jsctx_mutex);
mutex_lock(&js_kctx_info->ctx.jsctx_mutex);
mutex_lock(&js_devdata->runpool_mutex);
spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
}
}
spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
mutex_unlock(&js_devdata->runpool_mutex);
mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
return KBASEP_AS_NR_INVALID;
}
bool kbase_backend_use_ctx(struct kbase_device *kbdev,
struct kbase_context *kctx,
int as_nr)
{
struct kbasep_js_device_data *js_devdata;
struct kbasep_js_kctx_info *js_kctx_info;
struct kbase_as *new_address_space = NULL;
js_devdata = &kbdev->js_data;
js_kctx_info = &kctx->jctx.sched_info;
if (kbdev->hwaccess.active_kctx == kctx ||
kctx->as_nr != KBASEP_AS_NR_INVALID ||
as_nr == KBASEP_AS_NR_INVALID) {
WARN(1, "Invalid parameters to use_ctx()\n");
return false;
}
new_address_space = &kbdev->as[as_nr];
lockdep_assert_held(&js_devdata->runpool_mutex);
lockdep_assert_held(&kbdev->hwaccess_lock);
assign_and_activate_kctx_addr_space(kbdev, kctx, new_address_space);
if (kbase_ctx_flag(kctx, KCTX_PRIVILEGED)) {
/* We need to retain it to keep the corresponding address space
*/
kbasep_js_runpool_retain_ctx_nolock(kbdev, kctx);
}
return true;
}

View File

@ -0,0 +1,123 @@
/*
*
* (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
* Foundation, and any use by you of this program is subject to the terms
* of such GNU licence.
*
* A copy of the licence is included with the program, and can also be obtained
* from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
* Boston, MA 02110-1301, USA.
*
*/
/*
* Register-based HW access backend specific definitions
*/
#ifndef _KBASE_HWACCESS_GPU_DEFS_H_
#define _KBASE_HWACCESS_GPU_DEFS_H_
/* SLOT_RB_SIZE must be < 256 */
#define SLOT_RB_SIZE 2
#define SLOT_RB_MASK (SLOT_RB_SIZE - 1)
/**
* struct rb_entry - Ringbuffer entry
* @katom: Atom associated with this entry
*/
struct rb_entry {
struct kbase_jd_atom *katom;
};
/**
* struct slot_rb - Slot ringbuffer
* @entries: Ringbuffer entries
* @last_context: The last context to submit a job on this slot
* @read_idx: Current read index of buffer
* @write_idx: Current write index of buffer
* @job_chain_flag: Flag used to implement jobchain disambiguation
*/
struct slot_rb {
struct rb_entry entries[SLOT_RB_SIZE];
struct kbase_context *last_context;
u8 read_idx;
u8 write_idx;
u8 job_chain_flag;
};
/**
* struct kbase_backend_data - GPU backend specific data for HW access layer
* @slot_rb: Slot ringbuffers
* @rmu_workaround_flag: When PRLAM-8987 is present, this flag determines
* whether slots 0/1 or slot 2 are currently being
* pulled from
* @scheduling_timer: The timer tick used for rescheduling jobs
* @timer_running: Is the timer running? The runpool_mutex must be
* held whilst modifying this.
* @suspend_timer: Is the timer suspended? Set when a suspend
* occurs and cleared on resume. The runpool_mutex
* must be held whilst modifying this.
* @reset_gpu: Set to a KBASE_RESET_xxx value (see comments)
* @reset_workq: Work queue for performing the reset
* @reset_work: Work item for performing the reset
* @reset_wait: Wait event signalled when the reset is complete
* @reset_timer: Timeout for soft-stops before the reset
* @timeouts_updated: Have timeout values just been updated?
*
* The hwaccess_lock (a spinlock) must be held when accessing this structure
*/
struct kbase_backend_data {
struct slot_rb slot_rb[BASE_JM_MAX_NR_SLOTS];
bool rmu_workaround_flag;
struct hrtimer scheduling_timer;
bool timer_running;
bool suspend_timer;
atomic_t reset_gpu;
/* The GPU reset isn't pending */
#define KBASE_RESET_GPU_NOT_PENDING 0
/* kbase_prepare_to_reset_gpu has been called */
#define KBASE_RESET_GPU_PREPARED 1
/* kbase_reset_gpu has been called - the reset will now definitely happen
* within the timeout period */
#define KBASE_RESET_GPU_COMMITTED 2
/* The GPU reset process is currently occuring (timeout has expired or
* kbasep_try_reset_gpu_early was called) */
#define KBASE_RESET_GPU_HAPPENING 3
/* Reset the GPU silently, used when resetting the GPU as part of normal
* behavior (e.g. when exiting protected mode). */
#define KBASE_RESET_GPU_SILENT 4
struct workqueue_struct *reset_workq;
struct work_struct reset_work;
wait_queue_head_t reset_wait;
struct hrtimer reset_timer;
bool timeouts_updated;
};
/**
* struct kbase_jd_atom_backend - GPU backend specific katom data
*/
struct kbase_jd_atom_backend {
};
/**
* struct kbase_context_backend - GPU backend specific context data
*/
struct kbase_context_backend {
};
#endif /* _KBASE_HWACCESS_GPU_DEFS_H_ */

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,155 @@
/*
*
* (C) COPYRIGHT 2011-2016 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
* Foundation, and any use by you of this program is subject to the terms
* of such GNU licence.
*
* A copy of the licence is included with the program, and can also be obtained
* from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
* Boston, MA 02110-1301, USA.
*
*/
/*
* Job Manager backend-specific low-level APIs.
*/
#ifndef _KBASE_JM_HWACCESS_H_
#define _KBASE_JM_HWACCESS_H_
#include <mali_kbase_hw.h>
#include <mali_kbase_debug.h>
#include <linux/atomic.h>
#include <backend/gpu/mali_kbase_jm_rb.h>
/**
* kbase_job_submit_nolock() - Submit a job to a certain job-slot
* @kbdev: Device pointer
* @katom: Atom to submit
* @js: Job slot to submit on
*
* The caller must check kbasep_jm_is_submit_slots_free() != false before
* calling this.
*
* The following locking conditions are made on the caller:
* - it must hold the hwaccess_lock
*/
void kbase_job_submit_nolock(struct kbase_device *kbdev,
struct kbase_jd_atom *katom, int js);
/**
* kbase_job_done_slot() - Complete the head job on a particular job-slot
* @kbdev: Device pointer
* @s: Job slot
* @completion_code: Completion code of job reported by GPU
* @job_tail: Job tail address reported by GPU
* @end_timestamp: Timestamp of job completion
*/
void kbase_job_done_slot(struct kbase_device *kbdev, int s, u32 completion_code,
u64 job_tail, ktime_t *end_timestamp);
#ifdef CONFIG_GPU_TRACEPOINTS
static inline char *kbasep_make_job_slot_string(int js, char *js_string)
{
sprintf(js_string, "job_slot_%i", js);
return js_string;
}
#endif
/**
* kbase_job_hw_submit() - Submit a job to the GPU
* @kbdev: Device pointer
* @katom: Atom to submit
* @js: Job slot to submit on
*
* The caller must check kbasep_jm_is_submit_slots_free() != false before
* calling this.
*
* The following locking conditions are made on the caller:
* - it must hold the hwaccess_lock
*/
void kbase_job_hw_submit(struct kbase_device *kbdev,
struct kbase_jd_atom *katom,
int js);
/**
* kbasep_job_slot_soft_or_hard_stop_do_action() - Perform a soft or hard stop
* on the specified atom
* @kbdev: Device pointer
* @js: Job slot to stop on
* @action: The action to perform, either JSn_COMMAND_HARD_STOP or
* JSn_COMMAND_SOFT_STOP
* @core_reqs: Core requirements of atom to stop
* @target_katom: Atom to stop
*
* The following locking conditions are made on the caller:
* - it must hold the hwaccess_lock
*/
void kbasep_job_slot_soft_or_hard_stop_do_action(struct kbase_device *kbdev,
int js,
u32 action,
base_jd_core_req core_reqs,
struct kbase_jd_atom *target_katom);
/**
* kbase_backend_soft_hard_stop_slot() - Soft or hard stop jobs on a given job
* slot belonging to a given context.
* @kbdev: Device pointer
* @kctx: Context pointer. May be NULL
* @katom: Specific atom to stop. May be NULL
* @js: Job slot to hard stop
* @action: The action to perform, either JSn_COMMAND_HARD_STOP or
* JSn_COMMAND_SOFT_STOP
*
* If no context is provided then all jobs on the slot will be soft or hard
* stopped.
*
* If a katom is provided then only that specific atom will be stopped. In this
* case the kctx parameter is ignored.
*
* Jobs that are on the slot but are not yet on the GPU will be unpulled and
* returned to the job scheduler.
*
* Return: true if an atom was stopped, false otherwise
*/
bool kbase_backend_soft_hard_stop_slot(struct kbase_device *kbdev,
struct kbase_context *kctx,
int js,
struct kbase_jd_atom *katom,
u32 action);
/**
* kbase_job_slot_init - Initialise job slot framework
* @kbdev: Device pointer
*
* Called on driver initialisation
*
* Return: 0 on success
*/
int kbase_job_slot_init(struct kbase_device *kbdev);
/**
* kbase_job_slot_halt - Halt the job slot framework
* @kbdev: Device pointer
*
* Should prevent any further job slot processing
*/
void kbase_job_slot_halt(struct kbase_device *kbdev);
/**
* kbase_job_slot_term - Terminate job slot framework
* @kbdev: Device pointer
*
* Called on driver termination
*/
void kbase_job_slot_term(struct kbase_device *kbdev);
#endif /* _KBASE_JM_HWACCESS_H_ */

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,76 @@
/*
*
* (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
* Foundation, and any use by you of this program is subject to the terms
* of such GNU licence.
*
* A copy of the licence is included with the program, and can also be obtained
* from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
* Boston, MA 02110-1301, USA.
*
*/
/*
* Register-based HW access backend specific APIs
*/
#ifndef _KBASE_HWACCESS_GPU_H_
#define _KBASE_HWACCESS_GPU_H_
#include <backend/gpu/mali_kbase_pm_internal.h>
/**
* kbase_gpu_irq_evict - Evict an atom from a NEXT slot
*
* @kbdev: Device pointer
* @js: Job slot to evict from
*
* Evict the atom in the NEXT slot for the specified job slot. This function is
* called from the job complete IRQ handler when the previous job has failed.
*
* Return: true if job evicted from NEXT registers, false otherwise
*/
bool kbase_gpu_irq_evict(struct kbase_device *kbdev, int js);
/**
* kbase_gpu_complete_hw - Complete an atom on job slot js
*
* @kbdev: Device pointer
* @js: Job slot that has completed
* @completion_code: Event code from job that has completed
* @job_tail: The tail address from the hardware if the job has partially
* completed
* @end_timestamp: Time of completion
*/
void kbase_gpu_complete_hw(struct kbase_device *kbdev, int js,
u32 completion_code,
u64 job_tail,
ktime_t *end_timestamp);
/**
* kbase_gpu_inspect - Inspect the contents of the HW access ringbuffer
*
* @kbdev: Device pointer
* @js: Job slot to inspect
* @idx: Index into ringbuffer. 0 is the job currently running on
* the slot, 1 is the job waiting, all other values are invalid.
* Return: The atom at that position in the ringbuffer
* or NULL if no atom present
*/
struct kbase_jd_atom *kbase_gpu_inspect(struct kbase_device *kbdev, int js,
int idx);
/**
* kbase_gpu_dump_slots - Print the contents of the slot ringbuffers
*
* @kbdev: Device pointer
*/
void kbase_gpu_dump_slots(struct kbase_device *kbdev);
#endif /* _KBASE_HWACCESS_GPU_H_ */

View File

@ -0,0 +1,303 @@
/*
*
* (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
* Foundation, and any use by you of this program is subject to the terms
* of such GNU licence.
*
* A copy of the licence is included with the program, and can also be obtained
* from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
* Boston, MA 02110-1301, USA.
*
*/
/*
* Base kernel affinity manager APIs
*/
#include <mali_kbase.h>
#include "mali_kbase_js_affinity.h"
#include "mali_kbase_hw.h"
#include <backend/gpu/mali_kbase_pm_internal.h>
bool kbase_js_can_run_job_on_slot_no_lock(struct kbase_device *kbdev,
int js)
{
/*
* Here are the reasons for using job slot 2:
* - BASE_HW_ISSUE_8987 (which is entirely used for that purpose)
* - In absence of the above, then:
* - Atoms with BASE_JD_REQ_COHERENT_GROUP
* - But, only when there aren't contexts with
* KBASEP_JS_CTX_ATTR_COMPUTE_ALL_CORES, because the atoms that run on
* all cores on slot 1 could be blocked by those using a coherent group
* on slot 2
* - And, only when you actually have 2 or more coregroups - if you
* only have 1 coregroup, then having jobs for slot 2 implies they'd
* also be for slot 1, meaning you'll get interference from them. Jobs
* able to run on slot 2 could also block jobs that can only run on
* slot 1 (tiler jobs)
*/
if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8987))
return true;
if (js != 2)
return true;
/* Only deal with js==2 now: */
if (kbdev->gpu_props.num_core_groups > 1) {
/* Only use slot 2 in the 2+ coregroup case */
if (kbasep_js_ctx_attr_is_attr_on_runpool(kbdev,
KBASEP_JS_CTX_ATTR_COMPUTE_ALL_CORES) ==
false) {
/* ...But only when we *don't* have atoms that run on
* all cores */
/* No specific check for BASE_JD_REQ_COHERENT_GROUP
* atoms - the policy will sort that out */
return true;
}
}
/* Above checks failed mean we shouldn't use slot 2 */
return false;
}
/*
* As long as it has been decided to have a deeper modification of
* what job scheduler, power manager and affinity manager will
* implement, this function is just an intermediate step that
* assumes:
* - all working cores will be powered on when this is called.
* - largest current configuration is 2 core groups.
* - It has been decided not to have hardcoded values so the low
* and high cores in a core split will be evently distributed.
* - Odd combinations of core requirements have been filtered out
* and do not get to this function (e.g. CS+T+NSS is not
* supported here).
* - This function is frequently called and can be optimized,
* (see notes in loops), but as the functionallity will likely
* be modified, optimization has not been addressed.
*/
bool kbase_js_choose_affinity(u64 * const affinity,
struct kbase_device *kbdev,
struct kbase_jd_atom *katom, int js)
{
base_jd_core_req core_req = katom->core_req;
unsigned int num_core_groups = kbdev->gpu_props.num_core_groups;
u64 core_availability_mask;
lockdep_assert_held(&kbdev->hwaccess_lock);
core_availability_mask = kbase_pm_ca_get_core_mask(kbdev);
/*
* If no cores are currently available (core availability policy is
* transitioning) then fail.
*/
if (0 == core_availability_mask) {
*affinity = 0;
return false;
}
KBASE_DEBUG_ASSERT(js >= 0);
if ((core_req & (BASE_JD_REQ_FS | BASE_JD_REQ_CS | BASE_JD_REQ_T)) ==
BASE_JD_REQ_T) {
/* If the hardware supports XAFFINITY then we'll only enable
* the tiler (which is the default so this is a no-op),
* otherwise enable shader core 0. */
if (!kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_XAFFINITY))
*affinity = 1;
else
*affinity = 0;
return true;
}
if (1 == kbdev->gpu_props.num_cores) {
/* trivial case only one core, nothing to do */
*affinity = core_availability_mask &
kbdev->pm.debug_core_mask[js];
} else {
if ((core_req & (BASE_JD_REQ_COHERENT_GROUP |
BASE_JD_REQ_SPECIFIC_COHERENT_GROUP))) {
if (js == 0 || num_core_groups == 1) {
/* js[0] and single-core-group systems just get
* the first core group */
*affinity =
kbdev->gpu_props.props.coherency_info.group[0].core_mask
& core_availability_mask &
kbdev->pm.debug_core_mask[js];
} else {
/* js[1], js[2] use core groups 0, 1 for
* dual-core-group systems */
u32 core_group_idx = ((u32) js) - 1;
KBASE_DEBUG_ASSERT(core_group_idx <
num_core_groups);
*affinity =
kbdev->gpu_props.props.coherency_info.group[core_group_idx].core_mask
& core_availability_mask &
kbdev->pm.debug_core_mask[js];
/* If the job is specifically targeting core
* group 1 and the core availability policy is
* keeping that core group off, then fail */
if (*affinity == 0 && core_group_idx == 1 &&
kbdev->pm.backend.cg1_disabled
== true)
katom->event_code =
BASE_JD_EVENT_PM_EVENT;
}
} else {
/* All cores are available when no core split is
* required */
*affinity = core_availability_mask &
kbdev->pm.debug_core_mask[js];
}
}
/*
* If no cores are currently available in the desired core group(s)
* (core availability policy is transitioning) then fail.
*/
if (*affinity == 0)
return false;
/* Enable core 0 if tiler required for hardware without XAFFINITY
* support (notes above) */
if (core_req & BASE_JD_REQ_T) {
if (!kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_XAFFINITY))
*affinity = *affinity | 1;
}
return true;
}
static inline bool kbase_js_affinity_is_violating(
struct kbase_device *kbdev,
u64 *affinities)
{
/* This implementation checks whether the two slots involved in Generic
* thread creation have intersecting affinity. This is due to micro-
* architectural issues where a job in slot A targetting cores used by
* slot B could prevent the job in slot B from making progress until the
* job in slot A has completed.
*/
u64 affinity_set_left;
u64 affinity_set_right;
u64 intersection;
KBASE_DEBUG_ASSERT(affinities != NULL);
affinity_set_left = affinities[1];
affinity_set_right = affinities[2];
/* A violation occurs when any bit in the left_set is also in the
* right_set */
intersection = affinity_set_left & affinity_set_right;
return (bool) (intersection != (u64) 0u);
}
bool kbase_js_affinity_would_violate(struct kbase_device *kbdev, int js,
u64 affinity)
{
struct kbasep_js_device_data *js_devdata;
u64 new_affinities[BASE_JM_MAX_NR_SLOTS];
KBASE_DEBUG_ASSERT(kbdev != NULL);
KBASE_DEBUG_ASSERT(js < BASE_JM_MAX_NR_SLOTS);
js_devdata = &kbdev->js_data;
memcpy(new_affinities, js_devdata->runpool_irq.slot_affinities,
sizeof(js_devdata->runpool_irq.slot_affinities));
new_affinities[js] |= affinity;
return kbase_js_affinity_is_violating(kbdev, new_affinities);
}
void kbase_js_affinity_retain_slot_cores(struct kbase_device *kbdev, int js,
u64 affinity)
{
struct kbasep_js_device_data *js_devdata;
u64 cores;
KBASE_DEBUG_ASSERT(kbdev != NULL);
KBASE_DEBUG_ASSERT(js < BASE_JM_MAX_NR_SLOTS);
js_devdata = &kbdev->js_data;
KBASE_DEBUG_ASSERT(kbase_js_affinity_would_violate(kbdev, js, affinity)
== false);
cores = affinity;
while (cores) {
int bitnum = fls64(cores) - 1;
u64 bit = 1ULL << bitnum;
s8 cnt;
cnt =
++(js_devdata->runpool_irq.slot_affinity_refcount[js][bitnum]);
if (cnt == 1)
js_devdata->runpool_irq.slot_affinities[js] |= bit;
cores &= ~bit;
}
}
void kbase_js_affinity_release_slot_cores(struct kbase_device *kbdev, int js,
u64 affinity)
{
struct kbasep_js_device_data *js_devdata;
u64 cores;
KBASE_DEBUG_ASSERT(kbdev != NULL);
KBASE_DEBUG_ASSERT(js < BASE_JM_MAX_NR_SLOTS);
js_devdata = &kbdev->js_data;
cores = affinity;
while (cores) {
int bitnum = fls64(cores) - 1;
u64 bit = 1ULL << bitnum;
s8 cnt;
KBASE_DEBUG_ASSERT(
js_devdata->runpool_irq.slot_affinity_refcount[js][bitnum] > 0);
cnt =
--(js_devdata->runpool_irq.slot_affinity_refcount[js][bitnum]);
if (0 == cnt)
js_devdata->runpool_irq.slot_affinities[js] &= ~bit;
cores &= ~bit;
}
}
#if KBASE_TRACE_ENABLE
void kbase_js_debug_log_current_affinities(struct kbase_device *kbdev)
{
struct kbasep_js_device_data *js_devdata;
int slot_nr;
KBASE_DEBUG_ASSERT(kbdev != NULL);
js_devdata = &kbdev->js_data;
for (slot_nr = 0; slot_nr < 3; ++slot_nr)
KBASE_TRACE_ADD_SLOT_INFO(kbdev, JS_AFFINITY_CURRENT, NULL,
NULL, 0u, slot_nr,
(u32) js_devdata->runpool_irq.slot_affinities[slot_nr]);
}
#endif /* KBASE_TRACE_ENABLE */

View File

@ -0,0 +1,129 @@
/*
*
* (C) COPYRIGHT 2011-2016 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
* Foundation, and any use by you of this program is subject to the terms
* of such GNU licence.
*
* A copy of the licence is included with the program, and can also be obtained
* from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
* Boston, MA 02110-1301, USA.
*
*/
/*
* Affinity Manager internal APIs.
*/
#ifndef _KBASE_JS_AFFINITY_H_
#define _KBASE_JS_AFFINITY_H_
/**
* kbase_js_can_run_job_on_slot_no_lock - Decide whether it is possible to
* submit a job to a particular job slot in the current status
*
* @kbdev: The kbase device structure of the device
* @js: Job slot number to check for allowance
*
* Will check if submitting to the given job slot is allowed in the current
* status. For example using job slot 2 while in soft-stoppable state and only
* having 1 coregroup is not allowed by the policy. This function should be
* called prior to submitting a job to a slot to make sure policy rules are not
* violated.
*
* The following locking conditions are made on the caller
* - it must hold hwaccess_lock
*/
bool kbase_js_can_run_job_on_slot_no_lock(struct kbase_device *kbdev, int js);
/**
* kbase_js_choose_affinity - Compute affinity for a given job.
*
* @affinity: Affinity bitmap computed
* @kbdev: The kbase device structure of the device
* @katom: Job chain of which affinity is going to be found
* @js: Slot the job chain is being submitted
*
* Currently assumes an all-on/all-off power management policy.
* Also assumes there is at least one core with tiler available.
*
* Returns true if a valid affinity was chosen, false if
* no cores were available.
*/
bool kbase_js_choose_affinity(u64 * const affinity,
struct kbase_device *kbdev,
struct kbase_jd_atom *katom,
int js);
/**
* kbase_js_affinity_would_violate - Determine whether a proposed affinity on
* job slot @js would cause a violation of affinity restrictions.
*
* @kbdev: Kbase device structure
* @js: The job slot to test
* @affinity: The affinity mask to test
*
* The following locks must be held by the caller
* - hwaccess_lock
*
* Return: true if the affinity would violate the restrictions
*/
bool kbase_js_affinity_would_violate(struct kbase_device *kbdev, int js,
u64 affinity);
/**
* kbase_js_affinity_retain_slot_cores - Affinity tracking: retain cores used by
* a slot
*
* @kbdev: Kbase device structure
* @js: The job slot retaining the cores
* @affinity: The cores to retain
*
* The following locks must be held by the caller
* - hwaccess_lock
*/
void kbase_js_affinity_retain_slot_cores(struct kbase_device *kbdev, int js,
u64 affinity);
/**
* kbase_js_affinity_release_slot_cores - Affinity tracking: release cores used
* by a slot
*
* @kbdev: Kbase device structure
* @js: Job slot
* @affinity: Bit mask of core to be released
*
* Cores must be released as soon as a job is dequeued from a slot's 'submit
* slots', and before another job is submitted to those slots. Otherwise, the
* refcount could exceed the maximum number submittable to a slot,
* %BASE_JM_SUBMIT_SLOTS.
*
* The following locks must be held by the caller
* - hwaccess_lock
*/
void kbase_js_affinity_release_slot_cores(struct kbase_device *kbdev, int js,
u64 affinity);
/**
* kbase_js_debug_log_current_affinities - log the current affinities
*
* @kbdev: Kbase device structure
*
* Output to the Trace log the current tracked affinities on all slots
*/
#if KBASE_TRACE_ENABLE
void kbase_js_debug_log_current_affinities(struct kbase_device *kbdev);
#else /* KBASE_TRACE_ENABLE */
static inline void
kbase_js_debug_log_current_affinities(struct kbase_device *kbdev)
{
}
#endif /* KBASE_TRACE_ENABLE */
#endif /* _KBASE_JS_AFFINITY_H_ */

View File

@ -0,0 +1,357 @@
/*
*
* (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
* Foundation, and any use by you of this program is subject to the terms
* of such GNU licence.
*
* A copy of the licence is included with the program, and can also be obtained
* from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
* Boston, MA 02110-1301, USA.
*
*/
/*
* Register-based HW access backend specific job scheduler APIs
*/
#include <mali_kbase.h>
#include <mali_kbase_hwaccess_jm.h>
#include <backend/gpu/mali_kbase_jm_internal.h>
#include <backend/gpu/mali_kbase_js_internal.h>
/*
* Define for when dumping is enabled.
* This should not be based on the instrumentation level as whether dumping is
* enabled for a particular level is down to the integrator. However this is
* being used for now as otherwise the cinstr headers would be needed.
*/
#define CINSTR_DUMPING_ENABLED (2 == MALI_INSTRUMENTATION_LEVEL)
/*
* Hold the runpool_mutex for this
*/
static inline bool timer_callback_should_run(struct kbase_device *kbdev)
{
struct kbase_backend_data *backend = &kbdev->hwaccess.backend;
s8 nr_running_ctxs;
lockdep_assert_held(&kbdev->js_data.runpool_mutex);
/* Timer must stop if we are suspending */
if (backend->suspend_timer)
return false;
/* nr_contexts_pullable is updated with the runpool_mutex. However, the
* locking in the caller gives us a barrier that ensures
* nr_contexts_pullable is up-to-date for reading */
nr_running_ctxs = atomic_read(&kbdev->js_data.nr_contexts_runnable);
#ifdef CONFIG_MALI_DEBUG
if (kbdev->js_data.softstop_always) {
/* Debug support for allowing soft-stop on a single context */
return true;
}
#endif /* CONFIG_MALI_DEBUG */
if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_9435)) {
/* Timeouts would have to be 4x longer (due to micro-
* architectural design) to support OpenCL conformance tests, so
* only run the timer when there's:
* - 2 or more CL contexts
* - 1 or more GLES contexts
*
* NOTE: We will treat a context that has both Compute and Non-
* Compute jobs will be treated as an OpenCL context (hence, we
* don't check KBASEP_JS_CTX_ATTR_NON_COMPUTE).
*/
{
s8 nr_compute_ctxs =
kbasep_js_ctx_attr_count_on_runpool(kbdev,
KBASEP_JS_CTX_ATTR_COMPUTE);
s8 nr_noncompute_ctxs = nr_running_ctxs -
nr_compute_ctxs;
return (bool) (nr_compute_ctxs >= 2 ||
nr_noncompute_ctxs > 0);
}
} else {
/* Run the timer callback whenever you have at least 1 context
*/
return (bool) (nr_running_ctxs > 0);
}
}
static enum hrtimer_restart timer_callback(struct hrtimer *timer)
{
unsigned long flags;
struct kbase_device *kbdev;
struct kbasep_js_device_data *js_devdata;
struct kbase_backend_data *backend;
int s;
bool reset_needed = false;
KBASE_DEBUG_ASSERT(timer != NULL);
backend = container_of(timer, struct kbase_backend_data,
scheduling_timer);
kbdev = container_of(backend, struct kbase_device, hwaccess.backend);
js_devdata = &kbdev->js_data;
/* Loop through the slots */
spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
for (s = 0; s < kbdev->gpu_props.num_job_slots; s++) {
struct kbase_jd_atom *atom = NULL;
if (kbase_backend_nr_atoms_on_slot(kbdev, s) > 0) {
atom = kbase_gpu_inspect(kbdev, s, 0);
KBASE_DEBUG_ASSERT(atom != NULL);
}
if (atom != NULL) {
/* The current version of the model doesn't support
* Soft-Stop */
if (!kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_5736)) {
u32 ticks = atom->sched_info.cfs.ticks++;
#if !CINSTR_DUMPING_ENABLED
u32 soft_stop_ticks, hard_stop_ticks,
gpu_reset_ticks;
if (atom->core_req & BASE_JD_REQ_ONLY_COMPUTE) {
soft_stop_ticks =
js_devdata->soft_stop_ticks_cl;
hard_stop_ticks =
js_devdata->hard_stop_ticks_cl;
gpu_reset_ticks =
js_devdata->gpu_reset_ticks_cl;
} else {
soft_stop_ticks =
js_devdata->soft_stop_ticks;
hard_stop_ticks =
js_devdata->hard_stop_ticks_ss;
gpu_reset_ticks =
js_devdata->gpu_reset_ticks_ss;
}
/* If timeouts have been changed then ensure
* that atom tick count is not greater than the
* new soft_stop timeout. This ensures that
* atoms do not miss any of the timeouts due to
* races between this worker and the thread
* changing the timeouts. */
if (backend->timeouts_updated &&
ticks > soft_stop_ticks)
ticks = atom->sched_info.cfs.ticks =
soft_stop_ticks;
/* Job is Soft-Stoppable */
if (ticks == soft_stop_ticks) {
int disjoint_threshold =
KBASE_DISJOINT_STATE_INTERLEAVED_CONTEXT_COUNT_THRESHOLD;
u32 softstop_flags = 0u;
/* Job has been scheduled for at least
* js_devdata->soft_stop_ticks ticks.
* Soft stop the slot so we can run
* other jobs.
*/
dev_dbg(kbdev->dev, "Soft-stop");
#if !KBASE_DISABLE_SCHEDULING_SOFT_STOPS
/* nr_user_contexts_running is updated
* with the runpool_mutex, but we can't
* take that here.
*
* However, if it's about to be
* increased then the new context can't
* run any jobs until they take the
* hwaccess_lock, so it's OK to observe
* the older value.
*
* Similarly, if it's about to be
* decreased, the last job from another
* context has already finished, so it's
* not too bad that we observe the older
* value and register a disjoint event
* when we try soft-stopping */
if (js_devdata->nr_user_contexts_running
>= disjoint_threshold)
softstop_flags |=
JS_COMMAND_SW_CAUSES_DISJOINT;
kbase_job_slot_softstop_swflags(kbdev,
s, atom, softstop_flags);
#endif
} else if (ticks == hard_stop_ticks) {
/* Job has been scheduled for at least
* js_devdata->hard_stop_ticks_ss ticks.
* It should have been soft-stopped by
* now. Hard stop the slot.
*/
#if !KBASE_DISABLE_SCHEDULING_HARD_STOPS
int ms =
js_devdata->scheduling_period_ns
/ 1000000u;
dev_warn(kbdev->dev, "JS: Job Hard-Stopped (took more than %lu ticks at %lu ms/tick)",
(unsigned long)ticks,
(unsigned long)ms);
kbase_job_slot_hardstop(atom->kctx, s,
atom);
#endif
} else if (ticks == gpu_reset_ticks) {
/* Job has been scheduled for at least
* js_devdata->gpu_reset_ticks_ss ticks.
* It should have left the GPU by now.
* Signal that the GPU needs to be
* reset.
*/
reset_needed = true;
}
#else /* !CINSTR_DUMPING_ENABLED */
/* NOTE: During CINSTR_DUMPING_ENABLED, we use
* the alternate timeouts, which makes the hard-
* stop and GPU reset timeout much longer. We
* also ensure that we don't soft-stop at all.
*/
if (ticks == js_devdata->soft_stop_ticks) {
/* Job has been scheduled for at least
* js_devdata->soft_stop_ticks. We do
* not soft-stop during
* CINSTR_DUMPING_ENABLED, however.
*/
dev_dbg(kbdev->dev, "Soft-stop");
} else if (ticks ==
js_devdata->hard_stop_ticks_dumping) {
/* Job has been scheduled for at least
* js_devdata->hard_stop_ticks_dumping
* ticks. Hard stop the slot.
*/
#if !KBASE_DISABLE_SCHEDULING_HARD_STOPS
int ms =
js_devdata->scheduling_period_ns
/ 1000000u;
dev_warn(kbdev->dev, "JS: Job Hard-Stopped (took more than %lu ticks at %lu ms/tick)",
(unsigned long)ticks,
(unsigned long)ms);
kbase_job_slot_hardstop(atom->kctx, s,
atom);
#endif
} else if (ticks ==
js_devdata->gpu_reset_ticks_dumping) {
/* Job has been scheduled for at least
* js_devdata->gpu_reset_ticks_dumping
* ticks. It should have left the GPU by
* now. Signal that the GPU needs to be
* reset.
*/
reset_needed = true;
}
#endif /* !CINSTR_DUMPING_ENABLED */
}
}
}
#if KBASE_GPU_RESET_EN
if (reset_needed) {
dev_err(kbdev->dev, "JS: Job has been on the GPU for too long (JS_RESET_TICKS_SS/DUMPING timeout hit). Issueing GPU soft-reset to resolve.");
if (kbase_prepare_to_reset_gpu_locked(kbdev))
kbase_reset_gpu_locked(kbdev);
}
#endif /* KBASE_GPU_RESET_EN */
/* the timer is re-issued if there is contexts in the run-pool */
if (backend->timer_running)
hrtimer_start(&backend->scheduling_timer,
HR_TIMER_DELAY_NSEC(js_devdata->scheduling_period_ns),
HRTIMER_MODE_REL);
backend->timeouts_updated = false;
spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
return HRTIMER_NORESTART;
}
void kbase_backend_ctx_count_changed(struct kbase_device *kbdev)
{
struct kbasep_js_device_data *js_devdata = &kbdev->js_data;
struct kbase_backend_data *backend = &kbdev->hwaccess.backend;
unsigned long flags;
lockdep_assert_held(&js_devdata->runpool_mutex);
if (!timer_callback_should_run(kbdev)) {
/* Take spinlock to force synchronisation with timer */
spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
backend->timer_running = false;
spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
/* From now on, return value of timer_callback_should_run() will
* also cause the timer to not requeue itself. Its return value
* cannot change, because it depends on variables updated with
* the runpool_mutex held, which the caller of this must also
* hold */
hrtimer_cancel(&backend->scheduling_timer);
}
if (timer_callback_should_run(kbdev) && !backend->timer_running) {
/* Take spinlock to force synchronisation with timer */
spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
backend->timer_running = true;
spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
hrtimer_start(&backend->scheduling_timer,
HR_TIMER_DELAY_NSEC(js_devdata->scheduling_period_ns),
HRTIMER_MODE_REL);
KBASE_TRACE_ADD(kbdev, JS_POLICY_TIMER_START, NULL, NULL, 0u,
0u);
}
}
int kbase_backend_timer_init(struct kbase_device *kbdev)
{
struct kbase_backend_data *backend = &kbdev->hwaccess.backend;
hrtimer_init(&backend->scheduling_timer, CLOCK_MONOTONIC,
HRTIMER_MODE_REL);
backend->scheduling_timer.function = timer_callback;
backend->timer_running = false;
return 0;
}
void kbase_backend_timer_term(struct kbase_device *kbdev)
{
struct kbase_backend_data *backend = &kbdev->hwaccess.backend;
hrtimer_cancel(&backend->scheduling_timer);
}
void kbase_backend_timer_suspend(struct kbase_device *kbdev)
{
struct kbase_backend_data *backend = &kbdev->hwaccess.backend;
backend->suspend_timer = true;
kbase_backend_ctx_count_changed(kbdev);
}
void kbase_backend_timer_resume(struct kbase_device *kbdev)
{
struct kbase_backend_data *backend = &kbdev->hwaccess.backend;
backend->suspend_timer = false;
kbase_backend_ctx_count_changed(kbdev);
}
void kbase_backend_timeouts_changed(struct kbase_device *kbdev)
{
struct kbase_backend_data *backend = &kbdev->hwaccess.backend;
backend->timeouts_updated = true;
}

View File

@ -0,0 +1,69 @@
/*
*
* (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
* Foundation, and any use by you of this program is subject to the terms
* of such GNU licence.
*
* A copy of the licence is included with the program, and can also be obtained
* from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
* Boston, MA 02110-1301, USA.
*
*/
/*
* Register-based HW access backend specific job scheduler APIs
*/
#ifndef _KBASE_JS_BACKEND_H_
#define _KBASE_JS_BACKEND_H_
/**
* kbase_backend_timer_init() - Initialise the JS scheduling timer
* @kbdev: Device pointer
*
* This function should be called at driver initialisation
*
* Return: 0 on success
*/
int kbase_backend_timer_init(struct kbase_device *kbdev);
/**
* kbase_backend_timer_term() - Terminate the JS scheduling timer
* @kbdev: Device pointer
*
* This function should be called at driver termination
*/
void kbase_backend_timer_term(struct kbase_device *kbdev);
/**
* kbase_backend_timer_suspend - Suspend is happening, stop the JS scheduling
* timer
* @kbdev: Device pointer
*
* This function should be called on suspend, after the active count has reached
* zero. This is required as the timer may have been started on job submission
* to the job scheduler, but before jobs are submitted to the GPU.
*
* Caller must hold runpool_mutex.
*/
void kbase_backend_timer_suspend(struct kbase_device *kbdev);
/**
* kbase_backend_timer_resume - Resume is happening, re-evaluate the JS
* scheduling timer
* @kbdev: Device pointer
*
* This function should be called on resume. Note that is is not guaranteed to
* re-start the timer, only evalute whether it should be re-started.
*
* Caller must hold runpool_mutex.
*/
void kbase_backend_timer_resume(struct kbase_device *kbdev);
#endif /* _KBASE_JS_BACKEND_H_ */

View File

@ -0,0 +1,409 @@
/*
*
* (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
* Foundation, and any use by you of this program is subject to the terms
* of such GNU licence.
*
* A copy of the licence is included with the program, and can also be obtained
* from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
* Boston, MA 02110-1301, USA.
*
*/
/* #define ENABLE_DEBUG_LOG */
#include "../../platform/rk/custom_log.h"
#include <linux/bitops.h>
#include <mali_kbase.h>
#include <mali_kbase_mem.h>
#include <mali_kbase_mmu_hw.h>
#include <mali_kbase_tlstream.h>
#include <backend/gpu/mali_kbase_device_internal.h>
#include <mali_kbase_as_fault_debugfs.h>
static inline u64 lock_region(struct kbase_device *kbdev, u64 pfn,
u32 num_pages)
{
u64 region;
/* can't lock a zero sized range */
KBASE_DEBUG_ASSERT(num_pages);
region = pfn << PAGE_SHIFT;
/*
* fls returns (given the ASSERT above):
* 1 .. 32
*
* 10 + fls(num_pages)
* results in the range (11 .. 42)
*/
/* gracefully handle num_pages being zero */
if (0 == num_pages) {
region |= 11;
} else {
u8 region_width;
region_width = 10 + fls(num_pages);
if (num_pages != (1ul << (region_width - 11))) {
/* not pow2, so must go up to the next pow2 */
region_width += 1;
}
KBASE_DEBUG_ASSERT(region_width <= KBASE_LOCK_REGION_MAX_SIZE);
KBASE_DEBUG_ASSERT(region_width >= KBASE_LOCK_REGION_MIN_SIZE);
region |= region_width;
}
return region;
}
static int wait_ready(struct kbase_device *kbdev,
unsigned int as_nr, struct kbase_context *kctx)
{
unsigned int max_loops = KBASE_AS_INACTIVE_MAX_LOOPS;
u32 val = kbase_reg_read(kbdev, MMU_AS_REG(as_nr, AS_STATUS), kctx);
/* Wait for the MMU status to indicate there is no active command, in
* case one is pending. Do not log remaining register accesses. */
while (--max_loops && (val & AS_STATUS_AS_ACTIVE))
val = kbase_reg_read(kbdev, MMU_AS_REG(as_nr, AS_STATUS), NULL);
if (max_loops == 0) {
dev_err(kbdev->dev, "AS_ACTIVE bit stuck\n");
return -1;
}
/* If waiting in loop was performed, log last read value. */
if (KBASE_AS_INACTIVE_MAX_LOOPS - 1 > max_loops)
kbase_reg_read(kbdev, MMU_AS_REG(as_nr, AS_STATUS), kctx);
return 0;
}
static int write_cmd(struct kbase_device *kbdev, int as_nr, u32 cmd,
struct kbase_context *kctx)
{
int status;
/* write AS_COMMAND when MMU is ready to accept another command */
status = wait_ready(kbdev, as_nr, kctx);
if (status == 0)
kbase_reg_write(kbdev, MMU_AS_REG(as_nr, AS_COMMAND), cmd,
kctx);
return status;
}
static void validate_protected_page_fault(struct kbase_device *kbdev,
struct kbase_context *kctx)
{
/* GPUs which support (native) protected mode shall not report page
* fault addresses unless it has protected debug mode and protected
* debug mode is turned on */
u32 protected_debug_mode = 0;
if (!kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_PROTECTED_MODE))
return;
if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_PROTECTED_DEBUG_MODE)) {
protected_debug_mode = kbase_reg_read(kbdev,
GPU_CONTROL_REG(GPU_STATUS),
kctx) & GPU_DBGEN;
}
if (!protected_debug_mode) {
/* fault_addr should never be reported in protected mode.
* However, we just continue by printing an error message */
dev_err(kbdev->dev, "Fault address reported in protected mode\n");
}
}
void kbase_mmu_interrupt(struct kbase_device *kbdev, u32 irq_stat)
{
const int num_as = 16;
const int busfault_shift = MMU_PAGE_FAULT_FLAGS;
const int pf_shift = 0;
const unsigned long as_bit_mask = (1UL << num_as) - 1;
unsigned long flags;
u32 new_mask;
u32 tmp;
/* bus faults */
u32 bf_bits = (irq_stat >> busfault_shift) & as_bit_mask;
/* page faults (note: Ignore ASes with both pf and bf) */
u32 pf_bits = ((irq_stat >> pf_shift) & as_bit_mask) & ~bf_bits;
KBASE_DEBUG_ASSERT(NULL != kbdev);
/* remember current mask */
spin_lock_irqsave(&kbdev->mmu_mask_change, flags);
new_mask = kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_MASK), NULL);
/* mask interrupts for now */
kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_MASK), 0, NULL);
spin_unlock_irqrestore(&kbdev->mmu_mask_change, flags);
while (bf_bits | pf_bits) {
struct kbase_as *as;
int as_no;
struct kbase_context *kctx;
/*
* the while logic ensures we have a bit set, no need to check
* for not-found here
*/
as_no = ffs(bf_bits | pf_bits) - 1;
as = &kbdev->as[as_no];
/*
* Refcount the kctx ASAP - it shouldn't disappear anyway, since
* Bus/Page faults _should_ only occur whilst jobs are running,
* and a job causing the Bus/Page fault shouldn't complete until
* the MMU is updated
*/
kctx = kbasep_js_runpool_lookup_ctx(kbdev, as_no);
if (!kctx) {
E("fail to lookup ctx, to break out.");
break;
}
/* find faulting address */
as->fault_addr = kbase_reg_read(kbdev,
MMU_AS_REG(as_no,
AS_FAULTADDRESS_HI),
kctx);
as->fault_addr <<= 32;
as->fault_addr |= kbase_reg_read(kbdev,
MMU_AS_REG(as_no,
AS_FAULTADDRESS_LO),
kctx);
/* Mark the fault protected or not */
as->protected_mode = kbdev->protected_mode;
if (kbdev->protected_mode && as->fault_addr)
{
/* check if address reporting is allowed */
validate_protected_page_fault(kbdev, kctx);
}
/* report the fault to debugfs */
kbase_as_fault_debugfs_new(kbdev, as_no);
/* record the fault status */
as->fault_status = kbase_reg_read(kbdev,
MMU_AS_REG(as_no,
AS_FAULTSTATUS),
kctx);
/* find the fault type */
as->fault_type = (bf_bits & (1 << as_no)) ?
KBASE_MMU_FAULT_TYPE_BUS :
KBASE_MMU_FAULT_TYPE_PAGE;
#ifdef CONFIG_MALI_GPU_MMU_AARCH64
as->fault_extra_addr = kbase_reg_read(kbdev,
MMU_AS_REG(as_no, AS_FAULTEXTRA_HI),
kctx);
as->fault_extra_addr <<= 32;
as->fault_extra_addr |= kbase_reg_read(kbdev,
MMU_AS_REG(as_no, AS_FAULTEXTRA_LO),
kctx);
#endif /* CONFIG_MALI_GPU_MMU_AARCH64 */
if (kbase_as_has_bus_fault(as)) {
/* Mark bus fault as handled.
* Note that a bus fault is processed first in case
* where both a bus fault and page fault occur.
*/
bf_bits &= ~(1UL << as_no);
/* remove the queued BF (and PF) from the mask */
new_mask &= ~(MMU_BUS_ERROR(as_no) |
MMU_PAGE_FAULT(as_no));
} else {
/* Mark page fault as handled */
pf_bits &= ~(1UL << as_no);
/* remove the queued PF from the mask */
new_mask &= ~MMU_PAGE_FAULT(as_no);
}
/* Process the interrupt for this address space */
spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
kbase_mmu_interrupt_process(kbdev, kctx, as);
spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
}
/* reenable interrupts */
spin_lock_irqsave(&kbdev->mmu_mask_change, flags);
tmp = kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_MASK), NULL);
new_mask |= tmp;
kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_MASK), new_mask, NULL);
spin_unlock_irqrestore(&kbdev->mmu_mask_change, flags);
}
void kbase_mmu_hw_configure(struct kbase_device *kbdev, struct kbase_as *as,
struct kbase_context *kctx)
{
struct kbase_mmu_setup *current_setup = &as->current_setup;
u32 transcfg = 0;
#ifdef CONFIG_MALI_GPU_MMU_AARCH64
transcfg = current_setup->transcfg & 0xFFFFFFFFUL;
/* Set flag AS_TRANSCFG_PTW_MEMATTR_WRITE_BACK */
/* Clear PTW_MEMATTR bits */
transcfg &= ~AS_TRANSCFG_PTW_MEMATTR_MASK;
/* Enable correct PTW_MEMATTR bits */
transcfg |= AS_TRANSCFG_PTW_MEMATTR_WRITE_BACK;
if (kbdev->system_coherency == COHERENCY_ACE) {
/* Set flag AS_TRANSCFG_PTW_SH_OS (outer shareable) */
/* Clear PTW_SH bits */
transcfg = (transcfg & ~AS_TRANSCFG_PTW_SH_MASK);
/* Enable correct PTW_SH bits */
transcfg = (transcfg | AS_TRANSCFG_PTW_SH_OS);
}
kbase_reg_write(kbdev, MMU_AS_REG(as->number, AS_TRANSCFG_LO),
transcfg, kctx);
kbase_reg_write(kbdev, MMU_AS_REG(as->number, AS_TRANSCFG_HI),
(current_setup->transcfg >> 32) & 0xFFFFFFFFUL, kctx);
#else /* CONFIG_MALI_GPU_MMU_AARCH64 */
if (kbdev->system_coherency == COHERENCY_ACE)
current_setup->transtab |= AS_TRANSTAB_LPAE_SHARE_OUTER;
#endif /* CONFIG_MALI_GPU_MMU_AARCH64 */
kbase_reg_write(kbdev, MMU_AS_REG(as->number, AS_TRANSTAB_LO),
current_setup->transtab & 0xFFFFFFFFUL, kctx);
kbase_reg_write(kbdev, MMU_AS_REG(as->number, AS_TRANSTAB_HI),
(current_setup->transtab >> 32) & 0xFFFFFFFFUL, kctx);
kbase_reg_write(kbdev, MMU_AS_REG(as->number, AS_MEMATTR_LO),
current_setup->memattr & 0xFFFFFFFFUL, kctx);
kbase_reg_write(kbdev, MMU_AS_REG(as->number, AS_MEMATTR_HI),
(current_setup->memattr >> 32) & 0xFFFFFFFFUL, kctx);
kbase_tlstream_tl_attrib_as_config(as,
current_setup->transtab,
current_setup->memattr,
transcfg);
write_cmd(kbdev, as->number, AS_COMMAND_UPDATE, kctx);
}
int kbase_mmu_hw_do_operation(struct kbase_device *kbdev, struct kbase_as *as,
struct kbase_context *kctx, u64 vpfn, u32 nr, u32 op,
unsigned int handling_irq)
{
int ret;
lockdep_assert_held(&kbdev->mmu_hw_mutex);
if (op == AS_COMMAND_UNLOCK) {
/* Unlock doesn't require a lock first */
ret = write_cmd(kbdev, as->number, AS_COMMAND_UNLOCK, kctx);
} else {
u64 lock_addr = lock_region(kbdev, vpfn, nr);
/* Lock the region that needs to be updated */
kbase_reg_write(kbdev, MMU_AS_REG(as->number, AS_LOCKADDR_LO),
lock_addr & 0xFFFFFFFFUL, kctx);
kbase_reg_write(kbdev, MMU_AS_REG(as->number, AS_LOCKADDR_HI),
(lock_addr >> 32) & 0xFFFFFFFFUL, kctx);
write_cmd(kbdev, as->number, AS_COMMAND_LOCK, kctx);
/* Run the MMU operation */
write_cmd(kbdev, as->number, op, kctx);
/* Wait for the flush to complete */
ret = wait_ready(kbdev, as->number, kctx);
if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_9630)) {
/* Issue an UNLOCK command to ensure that valid page
tables are re-read by the GPU after an update.
Note that, the FLUSH command should perform all the
actions necessary, however the bus logs show that if
multiple page faults occur within an 8 page region
the MMU does not always re-read the updated page
table entries for later faults or is only partially
read, it subsequently raises the page fault IRQ for
the same addresses, the unlock ensures that the MMU
cache is flushed, so updates can be re-read. As the
region is now unlocked we need to issue 2 UNLOCK
commands in order to flush the MMU/uTLB,
see PRLAM-8812.
*/
write_cmd(kbdev, as->number, AS_COMMAND_UNLOCK, kctx);
write_cmd(kbdev, as->number, AS_COMMAND_UNLOCK, kctx);
}
}
return ret;
}
void kbase_mmu_hw_clear_fault(struct kbase_device *kbdev, struct kbase_as *as,
struct kbase_context *kctx, enum kbase_mmu_fault_type type)
{
unsigned long flags;
u32 pf_bf_mask;
spin_lock_irqsave(&kbdev->mmu_mask_change, flags);
/*
* A reset is in-flight and we're flushing the IRQ + bottom half
* so don't update anything as it could race with the reset code.
*/
if (kbdev->irq_reset_flush)
goto unlock;
/* Clear the page (and bus fault IRQ as well in case one occurred) */
pf_bf_mask = MMU_PAGE_FAULT(as->number);
if (type == KBASE_MMU_FAULT_TYPE_BUS ||
type == KBASE_MMU_FAULT_TYPE_BUS_UNEXPECTED)
pf_bf_mask |= MMU_BUS_ERROR(as->number);
kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_CLEAR), pf_bf_mask, kctx);
unlock:
spin_unlock_irqrestore(&kbdev->mmu_mask_change, flags);
}
void kbase_mmu_hw_enable_fault(struct kbase_device *kbdev, struct kbase_as *as,
struct kbase_context *kctx, enum kbase_mmu_fault_type type)
{
unsigned long flags;
u32 irq_mask;
/* Enable the page fault IRQ (and bus fault IRQ as well in case one
* occurred) */
spin_lock_irqsave(&kbdev->mmu_mask_change, flags);
/*
* A reset is in-flight and we're flushing the IRQ + bottom half
* so don't update anything as it could race with the reset code.
*/
if (kbdev->irq_reset_flush)
goto unlock;
irq_mask = kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_MASK), kctx) |
MMU_PAGE_FAULT(as->number);
if (type == KBASE_MMU_FAULT_TYPE_BUS ||
type == KBASE_MMU_FAULT_TYPE_BUS_UNEXPECTED)
irq_mask |= MMU_BUS_ERROR(as->number);
kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_MASK), irq_mask, kctx);
unlock:
spin_unlock_irqrestore(&kbdev->mmu_mask_change, flags);
}

View File

@ -0,0 +1,42 @@
/*
*
* (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
* Foundation, and any use by you of this program is subject to the terms
* of such GNU licence.
*
* A copy of the licence is included with the program, and can also be obtained
* from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
* Boston, MA 02110-1301, USA.
*
*/
/*
* Interface file for the direct implementation for MMU hardware access
*
* Direct MMU hardware interface
*
* This module provides the interface(s) that are required by the direct
* register access implementation of the MMU hardware interface
*/
#ifndef _MALI_KBASE_MMU_HW_DIRECT_H_
#define _MALI_KBASE_MMU_HW_DIRECT_H_
#include <mali_kbase_defs.h>
/**
* kbase_mmu_interrupt - Process an MMU interrupt.
*
* Process the MMU interrupt that was reported by the &kbase_device.
*
* @kbdev: kbase context to clear the fault from.
* @irq_stat: Value of the MMU_IRQ_STATUS register
*/
void kbase_mmu_interrupt(struct kbase_device *kbdev, u32 irq_stat);
#endif /* _MALI_KBASE_MMU_HW_DIRECT_H_ */

View File

@ -0,0 +1,63 @@
/*
*
* (C) COPYRIGHT 2010-2015 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
* Foundation, and any use by you of this program is subject to the terms
* of such GNU licence.
*
* A copy of the licence is included with the program, and can also be obtained
* from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
* Boston, MA 02110-1301, USA.
*
*/
/*
* "Always on" power management policy
*/
#include <mali_kbase.h>
#include <mali_kbase_pm.h>
static u64 always_on_get_core_mask(struct kbase_device *kbdev)
{
return kbdev->gpu_props.props.raw_props.shader_present;
}
static bool always_on_get_core_active(struct kbase_device *kbdev)
{
return true;
}
static void always_on_init(struct kbase_device *kbdev)
{
CSTD_UNUSED(kbdev);
}
static void always_on_term(struct kbase_device *kbdev)
{
CSTD_UNUSED(kbdev);
}
/*
* The struct kbase_pm_policy structure for the demand power policy.
*
* This is the static structure that defines the demand power policy's callback
* and name.
*/
const struct kbase_pm_policy kbase_pm_always_on_policy_ops = {
"always_on", /* name */
always_on_init, /* init */
always_on_term, /* term */
always_on_get_core_mask, /* get_core_mask */
always_on_get_core_active, /* get_core_active */
0u, /* flags */
KBASE_PM_POLICY_ID_ALWAYS_ON, /* id */
};
KBASE_EXPORT_TEST_API(kbase_pm_always_on_policy_ops);

View File

@ -0,0 +1,77 @@
/*
*
* (C) COPYRIGHT 2011-2015 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
* Foundation, and any use by you of this program is subject to the terms
* of such GNU licence.
*
* A copy of the licence is included with the program, and can also be obtained
* from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
* Boston, MA 02110-1301, USA.
*
*/
/*
* "Always on" power management policy
*/
#ifndef MALI_KBASE_PM_ALWAYS_ON_H
#define MALI_KBASE_PM_ALWAYS_ON_H
/**
* DOC:
* The "Always on" power management policy has the following
* characteristics:
*
* - When KBase indicates that the GPU will be powered up, but we don't yet
* know which Job Chains are to be run:
* All Shader Cores are powered up, regardless of whether or not they will
* be needed later.
*
* - When KBase indicates that a set of Shader Cores are needed to submit the
* currently queued Job Chains:
* All Shader Cores are kept powered, regardless of whether or not they will
* be needed
*
* - When KBase indicates that the GPU need not be powered:
* The Shader Cores are kept powered, regardless of whether or not they will
* be needed. The GPU itself is also kept powered, even though it is not
* needed.
*
* This policy is automatically overridden during system suspend: the desired
* core state is ignored, and the cores are forced off regardless of what the
* policy requests. After resuming from suspend, new changes to the desired
* core state made by the policy are honored.
*
* Note:
*
* - KBase indicates the GPU will be powered up when it has a User Process that
* has just started to submit Job Chains.
*
* - KBase indicates the GPU need not be powered when all the Job Chains from
* User Processes have finished, and it is waiting for a User Process to
* submit some more Job Chains.
*/
/**
* struct kbasep_pm_policy_always_on - Private struct for policy instance data
* @dummy: unused dummy variable
*
* This contains data that is private to the particular power policy that is
* active.
*/
struct kbasep_pm_policy_always_on {
int dummy;
};
extern const struct kbase_pm_policy kbase_pm_always_on_policy_ops;
#endif /* MALI_KBASE_PM_ALWAYS_ON_H */

View File

@ -0,0 +1,466 @@
/*
*
* (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
* Foundation, and any use by you of this program is subject to the terms
* of such GNU licence.
*
* A copy of the licence is included with the program, and can also be obtained
* from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
* Boston, MA 02110-1301, USA.
*
*/
/*
* GPU backend implementation of base kernel power management APIs
*/
#include <mali_kbase.h>
#include <mali_midg_regmap.h>
#include <mali_kbase_config_defaults.h>
#ifdef CONFIG_MALI_PLATFORM_DEVICETREE
#include <linux/pm_runtime.h>
#endif /* CONFIG_MALI_PLATFORM_DEVICETREE */
#include <mali_kbase_pm.h>
#include <mali_kbase_hwaccess_jm.h>
#include <backend/gpu/mali_kbase_js_internal.h>
#include <backend/gpu/mali_kbase_pm_internal.h>
static void kbase_pm_gpu_poweroff_wait_wq(struct work_struct *data);
void kbase_pm_register_access_enable(struct kbase_device *kbdev)
{
struct kbase_pm_callback_conf *callbacks;
callbacks = (struct kbase_pm_callback_conf *)POWER_MANAGEMENT_CALLBACKS;
if (callbacks)
callbacks->power_on_callback(kbdev);
kbdev->pm.backend.gpu_powered = true;
}
void kbase_pm_register_access_disable(struct kbase_device *kbdev)
{
struct kbase_pm_callback_conf *callbacks;
callbacks = (struct kbase_pm_callback_conf *)POWER_MANAGEMENT_CALLBACKS;
if (callbacks)
callbacks->power_off_callback(kbdev);
kbdev->pm.backend.gpu_powered = false;
}
int kbase_hwaccess_pm_init(struct kbase_device *kbdev)
{
int ret = 0;
struct kbase_pm_callback_conf *callbacks;
KBASE_DEBUG_ASSERT(kbdev != NULL);
mutex_init(&kbdev->pm.lock);
kbdev->pm.backend.gpu_poweroff_wait_wq = alloc_workqueue("kbase_pm_poweroff_wait",
WQ_HIGHPRI | WQ_UNBOUND, 1);
if (!kbdev->pm.backend.gpu_poweroff_wait_wq)
return -ENOMEM;
INIT_WORK(&kbdev->pm.backend.gpu_poweroff_wait_work,
kbase_pm_gpu_poweroff_wait_wq);
kbdev->pm.backend.gpu_powered = false;
kbdev->pm.suspending = false;
#ifdef CONFIG_MALI_DEBUG
kbdev->pm.backend.driver_ready_for_irqs = false;
#endif /* CONFIG_MALI_DEBUG */
kbdev->pm.backend.gpu_in_desired_state = true;
init_waitqueue_head(&kbdev->pm.backend.gpu_in_desired_state_wait);
callbacks = (struct kbase_pm_callback_conf *)POWER_MANAGEMENT_CALLBACKS;
if (callbacks) {
kbdev->pm.backend.callback_power_on =
callbacks->power_on_callback;
kbdev->pm.backend.callback_power_off =
callbacks->power_off_callback;
kbdev->pm.backend.callback_power_suspend =
callbacks->power_suspend_callback;
kbdev->pm.backend.callback_power_resume =
callbacks->power_resume_callback;
kbdev->pm.callback_power_runtime_init =
callbacks->power_runtime_init_callback;
kbdev->pm.callback_power_runtime_term =
callbacks->power_runtime_term_callback;
kbdev->pm.backend.callback_power_runtime_on =
callbacks->power_runtime_on_callback;
kbdev->pm.backend.callback_power_runtime_off =
callbacks->power_runtime_off_callback;
kbdev->pm.backend.callback_power_runtime_idle =
callbacks->power_runtime_idle_callback;
} else {
kbdev->pm.backend.callback_power_on = NULL;
kbdev->pm.backend.callback_power_off = NULL;
kbdev->pm.backend.callback_power_suspend = NULL;
kbdev->pm.backend.callback_power_resume = NULL;
kbdev->pm.callback_power_runtime_init = NULL;
kbdev->pm.callback_power_runtime_term = NULL;
kbdev->pm.backend.callback_power_runtime_on = NULL;
kbdev->pm.backend.callback_power_runtime_off = NULL;
kbdev->pm.backend.callback_power_runtime_idle = NULL;
}
/* Initialise the metrics subsystem */
ret = kbasep_pm_metrics_init(kbdev);
if (ret)
return ret;
init_waitqueue_head(&kbdev->pm.backend.l2_powered_wait);
kbdev->pm.backend.l2_powered = 0;
init_waitqueue_head(&kbdev->pm.backend.reset_done_wait);
kbdev->pm.backend.reset_done = false;
init_waitqueue_head(&kbdev->pm.zero_active_count_wait);
kbdev->pm.active_count = 0;
spin_lock_init(&kbdev->pm.backend.gpu_cycle_counter_requests_lock);
spin_lock_init(&kbdev->pm.backend.gpu_powered_lock);
init_waitqueue_head(&kbdev->pm.backend.poweroff_wait);
if (kbase_pm_ca_init(kbdev) != 0)
goto workq_fail;
if (kbase_pm_policy_init(kbdev) != 0)
goto pm_policy_fail;
return 0;
pm_policy_fail:
kbase_pm_ca_term(kbdev);
workq_fail:
kbasep_pm_metrics_term(kbdev);
return -EINVAL;
}
void kbase_pm_do_poweron(struct kbase_device *kbdev, bool is_resume)
{
lockdep_assert_held(&kbdev->pm.lock);
/* Turn clocks and interrupts on - no-op if we haven't done a previous
* kbase_pm_clock_off() */
kbase_pm_clock_on(kbdev, is_resume);
/* Update core status as required by the policy */
KBASE_TIMELINE_PM_CHECKTRANS(kbdev,
SW_FLOW_PM_CHECKTRANS_PM_DO_POWERON_START);
kbase_pm_update_cores_state(kbdev);
KBASE_TIMELINE_PM_CHECKTRANS(kbdev,
SW_FLOW_PM_CHECKTRANS_PM_DO_POWERON_END);
/* NOTE: We don't wait to reach the desired state, since running atoms
* will wait for that state to be reached anyway */
}
static void kbase_pm_gpu_poweroff_wait_wq(struct work_struct *data)
{
struct kbase_device *kbdev = container_of(data, struct kbase_device,
pm.backend.gpu_poweroff_wait_work);
struct kbase_pm_device_data *pm = &kbdev->pm;
struct kbase_pm_backend_data *backend = &pm->backend;
struct kbasep_js_device_data *js_devdata = &kbdev->js_data;
unsigned long flags;
/* rk_ext: adaption in DDK r14 for solution_1_for_glitch. */
#define NOT_TO_WAIT_CORES_POWER_TRANSITIONS_BEFORE_POWER_OFF_GPU
#ifdef NOT_TO_WAIT_CORES_POWER_TRANSITIONS_BEFORE_POWER_OFF_GPU
#else
/* Wait for power transitions to complete. We do this with no locks held
* so that we don't deadlock with any pending workqueues */
KBASE_TIMELINE_PM_CHECKTRANS(kbdev,
SW_FLOW_PM_CHECKTRANS_PM_DO_POWEROFF_START);
kbase_pm_check_transitions_sync(kbdev);
KBASE_TIMELINE_PM_CHECKTRANS(kbdev,
SW_FLOW_PM_CHECKTRANS_PM_DO_POWEROFF_END);
#endif
mutex_lock(&js_devdata->runpool_mutex);
mutex_lock(&kbdev->pm.lock);
if (!backend->poweron_required) {
WARN_ON(kbdev->l2_available_bitmap ||
kbdev->shader_available_bitmap ||
kbdev->tiler_available_bitmap);
/* Consume any change-state events */
kbase_timeline_pm_check_handle_event(kbdev,
KBASE_TIMELINE_PM_EVENT_GPU_STATE_CHANGED);
/* Disable interrupts and turn the clock off */
if (!kbase_pm_clock_off(kbdev, backend->poweroff_is_suspend)) {
/*
* Page/bus faults are pending, must drop locks to
* process. Interrupts are disabled so no more faults
* should be generated at this point.
*/
mutex_unlock(&kbdev->pm.lock);
mutex_unlock(&js_devdata->runpool_mutex);
kbase_flush_mmu_wqs(kbdev);
mutex_lock(&js_devdata->runpool_mutex);
mutex_lock(&kbdev->pm.lock);
/* Turn off clock now that fault have been handled. We
* dropped locks so poweron_required may have changed -
* power back on if this is the case.*/
if (backend->poweron_required)
kbase_pm_clock_on(kbdev, false);
else
WARN_ON(!kbase_pm_clock_off(kbdev,
backend->poweroff_is_suspend));
}
}
spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
backend->poweroff_wait_in_progress = false;
if (backend->poweron_required) {
backend->poweron_required = false;
kbase_pm_update_cores_state_nolock(kbdev);
}
spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
mutex_unlock(&kbdev->pm.lock);
mutex_unlock(&js_devdata->runpool_mutex);
wake_up(&kbdev->pm.backend.poweroff_wait);
}
void kbase_pm_do_poweroff(struct kbase_device *kbdev, bool is_suspend)
{
unsigned long flags;
lockdep_assert_held(&kbdev->pm.lock);
spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
if (!kbdev->pm.backend.poweroff_wait_in_progress) {
/* Force all cores off */
kbdev->pm.backend.desired_shader_state = 0;
kbdev->pm.backend.desired_tiler_state = 0;
/* Force all cores to be unavailable, in the situation where
* transitions are in progress for some cores but not others,
* and kbase_pm_check_transitions_nolock can not immediately
* power off the cores */
kbdev->shader_available_bitmap = 0;
kbdev->tiler_available_bitmap = 0;
kbdev->l2_available_bitmap = 0;
kbdev->pm.backend.poweroff_wait_in_progress = true;
kbdev->pm.backend.poweroff_is_suspend = is_suspend;
spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
/*Kick off wq here. Callers will have to wait*/
queue_work(kbdev->pm.backend.gpu_poweroff_wait_wq,
&kbdev->pm.backend.gpu_poweroff_wait_work);
} else {
spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
}
}
static bool is_poweroff_in_progress(struct kbase_device *kbdev)
{
bool ret;
unsigned long flags;
spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
ret = (kbdev->pm.backend.poweroff_wait_in_progress == false);
spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
return ret;
}
void kbase_pm_wait_for_poweroff_complete(struct kbase_device *kbdev)
{
wait_event_killable(kbdev->pm.backend.poweroff_wait,
is_poweroff_in_progress(kbdev));
}
int kbase_hwaccess_pm_powerup(struct kbase_device *kbdev,
unsigned int flags)
{
struct kbasep_js_device_data *js_devdata = &kbdev->js_data;
unsigned long irq_flags;
int ret;
KBASE_DEBUG_ASSERT(kbdev != NULL);
mutex_lock(&js_devdata->runpool_mutex);
mutex_lock(&kbdev->pm.lock);
/* A suspend won't happen during startup/insmod */
KBASE_DEBUG_ASSERT(!kbase_pm_is_suspending(kbdev));
/* Power up the GPU, don't enable IRQs as we are not ready to receive
* them. */
ret = kbase_pm_init_hw(kbdev, flags);
if (ret) {
mutex_unlock(&kbdev->pm.lock);
mutex_unlock(&js_devdata->runpool_mutex);
return ret;
}
kbasep_pm_read_present_cores(kbdev);
kbdev->pm.debug_core_mask_all = kbdev->pm.debug_core_mask[0] =
kbdev->pm.debug_core_mask[1] =
kbdev->pm.debug_core_mask[2] =
kbdev->gpu_props.props.raw_props.shader_present;
/* Pretend the GPU is active to prevent a power policy turning the GPU
* cores off */
kbdev->pm.active_count = 1;
spin_lock_irqsave(&kbdev->pm.backend.gpu_cycle_counter_requests_lock,
irq_flags);
/* Ensure cycle counter is off */
kbdev->pm.backend.gpu_cycle_counter_requests = 0;
spin_unlock_irqrestore(
&kbdev->pm.backend.gpu_cycle_counter_requests_lock,
irq_flags);
/* We are ready to receive IRQ's now as power policy is set up, so
* enable them now. */
#ifdef CONFIG_MALI_DEBUG
spin_lock_irqsave(&kbdev->pm.backend.gpu_powered_lock, irq_flags);
kbdev->pm.backend.driver_ready_for_irqs = true;
spin_unlock_irqrestore(&kbdev->pm.backend.gpu_powered_lock, irq_flags);
#endif
kbase_pm_enable_interrupts(kbdev);
/* Turn on the GPU and any cores needed by the policy */
kbase_pm_do_poweron(kbdev, false);
mutex_unlock(&kbdev->pm.lock);
mutex_unlock(&js_devdata->runpool_mutex);
/* Idle the GPU and/or cores, if the policy wants it to */
kbase_pm_context_idle(kbdev);
return 0;
}
void kbase_hwaccess_pm_halt(struct kbase_device *kbdev)
{
KBASE_DEBUG_ASSERT(kbdev != NULL);
mutex_lock(&kbdev->pm.lock);
kbase_pm_cancel_deferred_poweroff(kbdev);
kbase_pm_do_poweroff(kbdev, false);
mutex_unlock(&kbdev->pm.lock);
}
KBASE_EXPORT_TEST_API(kbase_hwaccess_pm_halt);
void kbase_hwaccess_pm_term(struct kbase_device *kbdev)
{
KBASE_DEBUG_ASSERT(kbdev != NULL);
KBASE_DEBUG_ASSERT(kbdev->pm.active_count == 0);
KBASE_DEBUG_ASSERT(kbdev->pm.backend.gpu_cycle_counter_requests == 0);
/* Free any resources the policy allocated */
kbase_pm_policy_term(kbdev);
kbase_pm_ca_term(kbdev);
/* Shut down the metrics subsystem */
kbasep_pm_metrics_term(kbdev);
destroy_workqueue(kbdev->pm.backend.gpu_poweroff_wait_wq);
}
void kbase_pm_power_changed(struct kbase_device *kbdev)
{
bool cores_are_available;
unsigned long flags;
KBASE_TIMELINE_PM_CHECKTRANS(kbdev,
SW_FLOW_PM_CHECKTRANS_GPU_INTERRUPT_START);
spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
cores_are_available = kbase_pm_check_transitions_nolock(kbdev);
KBASE_TIMELINE_PM_CHECKTRANS(kbdev,
SW_FLOW_PM_CHECKTRANS_GPU_INTERRUPT_END);
if (cores_are_available) {
/* Log timelining information that a change in state has
* completed */
kbase_timeline_pm_handle_event(kbdev,
KBASE_TIMELINE_PM_EVENT_GPU_STATE_CHANGED);
kbase_backend_slot_update(kbdev);
}
spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
}
void kbase_pm_set_debug_core_mask(struct kbase_device *kbdev,
u64 new_core_mask_js0, u64 new_core_mask_js1,
u64 new_core_mask_js2)
{
kbdev->pm.debug_core_mask[0] = new_core_mask_js0;
kbdev->pm.debug_core_mask[1] = new_core_mask_js1;
kbdev->pm.debug_core_mask[2] = new_core_mask_js2;
kbdev->pm.debug_core_mask_all = new_core_mask_js0 | new_core_mask_js1 |
new_core_mask_js2;
kbase_pm_update_cores_state_nolock(kbdev);
}
void kbase_hwaccess_pm_gpu_active(struct kbase_device *kbdev)
{
kbase_pm_update_active(kbdev);
}
void kbase_hwaccess_pm_gpu_idle(struct kbase_device *kbdev)
{
kbase_pm_update_active(kbdev);
}
void kbase_hwaccess_pm_suspend(struct kbase_device *kbdev)
{
struct kbasep_js_device_data *js_devdata = &kbdev->js_data;
/* Force power off the GPU and all cores (regardless of policy), only
* after the PM active count reaches zero (otherwise, we risk turning it
* off prematurely) */
mutex_lock(&js_devdata->runpool_mutex);
mutex_lock(&kbdev->pm.lock);
kbase_pm_cancel_deferred_poweroff(kbdev);
kbase_pm_do_poweroff(kbdev, true);
kbase_backend_timer_suspend(kbdev);
mutex_unlock(&kbdev->pm.lock);
mutex_unlock(&js_devdata->runpool_mutex);
kbase_pm_wait_for_poweroff_complete(kbdev);
}
void kbase_hwaccess_pm_resume(struct kbase_device *kbdev)
{
struct kbasep_js_device_data *js_devdata = &kbdev->js_data;
mutex_lock(&js_devdata->runpool_mutex);
mutex_lock(&kbdev->pm.lock);
kbdev->pm.suspending = false;
kbase_pm_do_poweron(kbdev, true);
kbase_backend_timer_resume(kbdev);
mutex_unlock(&kbdev->pm.lock);
mutex_unlock(&js_devdata->runpool_mutex);
}

View File

@ -0,0 +1,179 @@
/*
*
* (C) COPYRIGHT 2013-2016 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
* Foundation, and any use by you of this program is subject to the terms
* of such GNU licence.
*
* A copy of the licence is included with the program, and can also be obtained
* from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
* Boston, MA 02110-1301, USA.
*
*/
/*
* Base kernel core availability APIs
*/
#include <mali_kbase.h>
#include <mali_kbase_pm.h>
#include <backend/gpu/mali_kbase_pm_internal.h>
static const struct kbase_pm_ca_policy *const policy_list[] = {
&kbase_pm_ca_fixed_policy_ops,
#if !MALI_CUSTOMER_RELEASE
&kbase_pm_ca_random_policy_ops
#endif
};
/**
* POLICY_COUNT - The number of policies available in the system.
*
* This is derived from the number of functions listed in policy_list.
*/
#define POLICY_COUNT (sizeof(policy_list)/sizeof(*policy_list))
int kbase_pm_ca_init(struct kbase_device *kbdev)
{
KBASE_DEBUG_ASSERT(kbdev != NULL);
kbdev->pm.backend.ca_current_policy = policy_list[0];
kbdev->pm.backend.ca_current_policy->init(kbdev);
return 0;
}
void kbase_pm_ca_term(struct kbase_device *kbdev)
{
kbdev->pm.backend.ca_current_policy->term(kbdev);
}
int kbase_pm_ca_list_policies(const struct kbase_pm_ca_policy * const **list)
{
if (!list)
return POLICY_COUNT;
*list = policy_list;
return POLICY_COUNT;
}
KBASE_EXPORT_TEST_API(kbase_pm_ca_list_policies);
const struct kbase_pm_ca_policy
*kbase_pm_ca_get_policy(struct kbase_device *kbdev)
{
KBASE_DEBUG_ASSERT(kbdev != NULL);
return kbdev->pm.backend.ca_current_policy;
}
KBASE_EXPORT_TEST_API(kbase_pm_ca_get_policy);
void kbase_pm_ca_set_policy(struct kbase_device *kbdev,
const struct kbase_pm_ca_policy *new_policy)
{
const struct kbase_pm_ca_policy *old_policy;
unsigned long flags;
KBASE_DEBUG_ASSERT(kbdev != NULL);
KBASE_DEBUG_ASSERT(new_policy != NULL);
KBASE_TRACE_ADD(kbdev, PM_CA_SET_POLICY, NULL, NULL, 0u,
new_policy->id);
/* During a policy change we pretend the GPU is active */
/* A suspend won't happen here, because we're in a syscall from a
* userspace thread */
kbase_pm_context_active(kbdev);
mutex_lock(&kbdev->pm.lock);
/* Remove the policy to prevent IRQ handlers from working on it */
spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
old_policy = kbdev->pm.backend.ca_current_policy;
kbdev->pm.backend.ca_current_policy = NULL;
spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
if (old_policy->term)
old_policy->term(kbdev);
if (new_policy->init)
new_policy->init(kbdev);
spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
kbdev->pm.backend.ca_current_policy = new_policy;
/* If any core power state changes were previously attempted, but
* couldn't be made because the policy was changing (current_policy was
* NULL), then re-try them here. */
kbase_pm_update_cores_state_nolock(kbdev);
kbdev->pm.backend.ca_current_policy->update_core_status(kbdev,
kbdev->shader_ready_bitmap,
kbdev->shader_transitioning_bitmap);
spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
mutex_unlock(&kbdev->pm.lock);
/* Now the policy change is finished, we release our fake context active
* reference */
kbase_pm_context_idle(kbdev);
}
KBASE_EXPORT_TEST_API(kbase_pm_ca_set_policy);
u64 kbase_pm_ca_get_core_mask(struct kbase_device *kbdev)
{
lockdep_assert_held(&kbdev->hwaccess_lock);
/* All cores must be enabled when instrumentation is in use */
if (kbdev->pm.backend.instr_enabled)
return kbdev->gpu_props.props.raw_props.shader_present &
kbdev->pm.debug_core_mask_all;
if (kbdev->pm.backend.ca_current_policy == NULL)
return kbdev->gpu_props.props.raw_props.shader_present &
kbdev->pm.debug_core_mask_all;
return kbdev->pm.backend.ca_current_policy->get_core_mask(kbdev) &
kbdev->pm.debug_core_mask_all;
}
KBASE_EXPORT_TEST_API(kbase_pm_ca_get_core_mask);
void kbase_pm_ca_update_core_status(struct kbase_device *kbdev, u64 cores_ready,
u64 cores_transitioning)
{
lockdep_assert_held(&kbdev->hwaccess_lock);
if (kbdev->pm.backend.ca_current_policy != NULL)
kbdev->pm.backend.ca_current_policy->update_core_status(kbdev,
cores_ready,
cores_transitioning);
}
void kbase_pm_ca_instr_enable(struct kbase_device *kbdev)
{
unsigned long flags;
spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
kbdev->pm.backend.instr_enabled = true;
kbase_pm_update_cores_state_nolock(kbdev);
spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
}
void kbase_pm_ca_instr_disable(struct kbase_device *kbdev)
{
lockdep_assert_held(&kbdev->hwaccess_lock);
kbdev->pm.backend.instr_enabled = false;
kbase_pm_update_cores_state_nolock(kbdev);
}

View File

@ -0,0 +1,92 @@
/*
*
* (C) COPYRIGHT 2011-2015 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
* Foundation, and any use by you of this program is subject to the terms
* of such GNU licence.
*
* A copy of the licence is included with the program, and can also be obtained
* from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
* Boston, MA 02110-1301, USA.
*
*/
/*
* Base kernel core availability APIs
*/
#ifndef _KBASE_PM_CA_H_
#define _KBASE_PM_CA_H_
/**
* kbase_pm_ca_init - Initialize core availability framework
*
* Must be called before calling any other core availability function
*
* @kbdev: The kbase device structure for the device (must be a valid pointer)
*
* Return: 0 if the core availability framework was successfully initialized,
* -errno otherwise
*/
int kbase_pm_ca_init(struct kbase_device *kbdev);
/**
* kbase_pm_ca_term - Terminate core availability framework
*
* @kbdev: The kbase device structure for the device (must be a valid pointer)
*/
void kbase_pm_ca_term(struct kbase_device *kbdev);
/**
* kbase_pm_ca_get_core_mask - Get currently available shaders core mask
*
* @kbdev: The kbase device structure for the device (must be a valid pointer)
*
* Returns a mask of the currently available shader cores.
* Calls into the core availability policy
*
* Return: The bit mask of available cores
*/
u64 kbase_pm_ca_get_core_mask(struct kbase_device *kbdev);
/**
* kbase_pm_ca_update_core_status - Update core status
*
* @kbdev: The kbase device structure for the device (must be
* a valid pointer)
* @cores_ready: The bit mask of cores ready for job submission
* @cores_transitioning: The bit mask of cores that are transitioning power
* state
*
* Update core availability policy with current core power status
*
* Calls into the core availability policy
*/
void kbase_pm_ca_update_core_status(struct kbase_device *kbdev, u64 cores_ready,
u64 cores_transitioning);
/**
* kbase_pm_ca_instr_enable - Enable override for instrumentation
*
* @kbdev: The kbase device structure for the device (must be a valid pointer)
*
* This overrides the output of the core availability policy, ensuring that all
* cores are available
*/
void kbase_pm_ca_instr_enable(struct kbase_device *kbdev);
/**
* kbase_pm_ca_instr_disable - Disable override for instrumentation
*
* @kbdev: The kbase device structure for the device (must be a valid pointer)
*
* This disables any previously enabled override, and resumes normal policy
* functionality
*/
void kbase_pm_ca_instr_disable(struct kbase_device *kbdev);
#endif /* _KBASE_PM_CA_H_ */

View File

@ -0,0 +1,65 @@
/*
*
* (C) COPYRIGHT 2013-2015 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
* Foundation, and any use by you of this program is subject to the terms
* of such GNU licence.
*
* A copy of the licence is included with the program, and can also be obtained
* from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
* Boston, MA 02110-1301, USA.
*
*/
/*
* A power policy implementing fixed core availability
*/
#include <mali_kbase.h>
#include <mali_kbase_pm.h>
static void fixed_init(struct kbase_device *kbdev)
{
kbdev->pm.backend.ca_in_transition = false;
}
static void fixed_term(struct kbase_device *kbdev)
{
CSTD_UNUSED(kbdev);
}
static u64 fixed_get_core_mask(struct kbase_device *kbdev)
{
return kbdev->gpu_props.props.raw_props.shader_present;
}
static void fixed_update_core_status(struct kbase_device *kbdev,
u64 cores_ready,
u64 cores_transitioning)
{
CSTD_UNUSED(kbdev);
CSTD_UNUSED(cores_ready);
CSTD_UNUSED(cores_transitioning);
}
/*
* The struct kbase_pm_policy structure for the fixed power policy.
*
* This is the static structure that defines the fixed power policy's callback
* and name.
*/
const struct kbase_pm_ca_policy kbase_pm_ca_fixed_policy_ops = {
"fixed", /* name */
fixed_init, /* init */
fixed_term, /* term */
fixed_get_core_mask, /* get_core_mask */
fixed_update_core_status, /* update_core_status */
0u, /* flags */
KBASE_PM_CA_POLICY_ID_FIXED, /* id */
};
KBASE_EXPORT_TEST_API(kbase_pm_ca_fixed_policy_ops);

View File

@ -0,0 +1,40 @@
/*
*
* (C) COPYRIGHT 2013-2015 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
* Foundation, and any use by you of this program is subject to the terms
* of such GNU licence.
*
* A copy of the licence is included with the program, and can also be obtained
* from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
* Boston, MA 02110-1301, USA.
*
*/
/*
* A power policy implementing fixed core availability
*/
#ifndef MALI_KBASE_PM_CA_FIXED_H
#define MALI_KBASE_PM_CA_FIXED_H
/**
* struct kbasep_pm_ca_policy_fixed - Private structure for policy instance data
*
* @dummy: Dummy member - no state is needed
*
* This contains data that is private to the particular power policy that is
* active.
*/
struct kbasep_pm_ca_policy_fixed {
int dummy;
};
extern const struct kbase_pm_ca_policy kbase_pm_ca_fixed_policy_ops;
#endif /* MALI_KBASE_PM_CA_FIXED_H */

View File

@ -0,0 +1,70 @@
/*
*
* (C) COPYRIGHT 2012-2016 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
* Foundation, and any use by you of this program is subject to the terms
* of such GNU licence.
*
* A copy of the licence is included with the program, and can also be obtained
* from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
* Boston, MA 02110-1301, USA.
*
*/
/*
* "Coarse Demand" power management policy
*/
#include <mali_kbase.h>
#include <mali_kbase_pm.h>
static u64 coarse_demand_get_core_mask(struct kbase_device *kbdev)
{
if (kbdev->pm.active_count == 0)
return 0;
return kbdev->gpu_props.props.raw_props.shader_present;
}
static bool coarse_demand_get_core_active(struct kbase_device *kbdev)
{
if (0 == kbdev->pm.active_count && !(kbdev->shader_needed_bitmap |
kbdev->shader_inuse_bitmap) && !kbdev->tiler_needed_cnt
&& !kbdev->tiler_inuse_cnt)
return false;
return true;
}
static void coarse_demand_init(struct kbase_device *kbdev)
{
CSTD_UNUSED(kbdev);
}
static void coarse_demand_term(struct kbase_device *kbdev)
{
CSTD_UNUSED(kbdev);
}
/* The struct kbase_pm_policy structure for the demand power policy.
*
* This is the static structure that defines the demand power policy's callback
* and name.
*/
const struct kbase_pm_policy kbase_pm_coarse_demand_policy_ops = {
"coarse_demand", /* name */
coarse_demand_init, /* init */
coarse_demand_term, /* term */
coarse_demand_get_core_mask, /* get_core_mask */
coarse_demand_get_core_active, /* get_core_active */
0u, /* flags */
KBASE_PM_POLICY_ID_COARSE_DEMAND, /* id */
};
KBASE_EXPORT_TEST_API(kbase_pm_coarse_demand_policy_ops);

View File

@ -0,0 +1,64 @@
/*
*
* (C) COPYRIGHT 2012-2015 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
* Foundation, and any use by you of this program is subject to the terms
* of such GNU licence.
*
* A copy of the licence is included with the program, and can also be obtained
* from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
* Boston, MA 02110-1301, USA.
*
*/
/*
* "Coarse Demand" power management policy
*/
#ifndef MALI_KBASE_PM_COARSE_DEMAND_H
#define MALI_KBASE_PM_COARSE_DEMAND_H
/**
* DOC:
* The "Coarse" demand power management policy has the following
* characteristics:
* - When KBase indicates that the GPU will be powered up, but we don't yet
* know which Job Chains are to be run:
* - All Shader Cores are powered up, regardless of whether or not they will
* be needed later.
* - When KBase indicates that a set of Shader Cores are needed to submit the
* currently queued Job Chains:
* - All Shader Cores are kept powered, regardless of whether or not they will
* be needed
* - When KBase indicates that the GPU need not be powered:
* - The Shader Cores are powered off, and the GPU itself is powered off too.
*
* @note:
* - KBase indicates the GPU will be powered up when it has a User Process that
* has just started to submit Job Chains.
* - KBase indicates the GPU need not be powered when all the Job Chains from
* User Processes have finished, and it is waiting for a User Process to
* submit some more Job Chains.
*/
/**
* struct kbasep_pm_policy_coarse_demand - Private structure for coarse demand
* policy
*
* This contains data that is private to the coarse demand power policy.
*
* @dummy: Dummy member - no state needed
*/
struct kbasep_pm_policy_coarse_demand {
int dummy;
};
extern const struct kbase_pm_policy kbase_pm_coarse_demand_policy_ops;
#endif /* MALI_KBASE_PM_COARSE_DEMAND_H */

View File

@ -0,0 +1,504 @@
/*
*
* (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
* Foundation, and any use by you of this program is subject to the terms
* of such GNU licence.
*
* A copy of the licence is included with the program, and can also be obtained
* from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
* Boston, MA 02110-1301, USA.
*
*/
/*
* Backend-specific Power Manager definitions
*/
#ifndef _KBASE_PM_HWACCESS_DEFS_H_
#define _KBASE_PM_HWACCESS_DEFS_H_
#include "mali_kbase_pm_ca_fixed.h"
#if !MALI_CUSTOMER_RELEASE
#include "mali_kbase_pm_ca_random.h"
#endif
#include "mali_kbase_pm_always_on.h"
#include "mali_kbase_pm_coarse_demand.h"
#include "mali_kbase_pm_demand.h"
#if !MALI_CUSTOMER_RELEASE
#include "mali_kbase_pm_demand_always_powered.h"
#include "mali_kbase_pm_fast_start.h"
#endif
/* Forward definition - see mali_kbase.h */
struct kbase_device;
struct kbase_jd_atom;
/**
* enum kbase_pm_core_type - The types of core in a GPU.
*
* These enumerated values are used in calls to
* - kbase_pm_get_present_cores()
* - kbase_pm_get_active_cores()
* - kbase_pm_get_trans_cores()
* - kbase_pm_get_ready_cores().
*
* They specify which type of core should be acted on. These values are set in
* a manner that allows core_type_to_reg() function to be simpler and more
* efficient.
*
* @KBASE_PM_CORE_L2: The L2 cache
* @KBASE_PM_CORE_SHADER: Shader cores
* @KBASE_PM_CORE_TILER: Tiler cores
*/
enum kbase_pm_core_type {
KBASE_PM_CORE_L2 = L2_PRESENT_LO,
KBASE_PM_CORE_SHADER = SHADER_PRESENT_LO,
KBASE_PM_CORE_TILER = TILER_PRESENT_LO
};
/**
* struct kbasep_pm_metrics_data - Metrics data collected for use by the power
* management framework.
*
* @time_period_start: time at which busy/idle measurements started
* @time_busy: number of ns the GPU was busy executing jobs since the
* @time_period_start timestamp.
* @time_idle: number of ns since time_period_start the GPU was not executing
* jobs since the @time_period_start timestamp.
* @prev_busy: busy time in ns of previous time period.
* Updated when metrics are reset.
* @prev_idle: idle time in ns of previous time period
* Updated when metrics are reset.
* @gpu_active: true when the GPU is executing jobs. false when
* not. Updated when the job scheduler informs us a job in submitted
* or removed from a GPU slot.
* @busy_cl: number of ns the GPU was busy executing CL jobs. Note that
* if two CL jobs were active for 400ns, this value would be updated
* with 800.
* @busy_gl: number of ns the GPU was busy executing GL jobs. Note that
* if two GL jobs were active for 400ns, this value would be updated
* with 800.
* @active_cl_ctx: number of CL jobs active on the GPU. Array is per-device.
* @active_gl_ctx: number of GL jobs active on the GPU. Array is per-slot. As
* GL jobs never run on slot 2 this slot is not recorded.
* @lock: spinlock protecting the kbasep_pm_metrics_data structure
* @timer: timer to regularly make DVFS decisions based on the power
* management metrics.
* @timer_active: boolean indicating @timer is running
* @platform_data: pointer to data controlled by platform specific code
* @kbdev: pointer to kbase device for which metrics are collected
*
*/
struct kbasep_pm_metrics_data {
ktime_t time_period_start;
u32 time_busy;
u32 time_idle;
u32 prev_busy;
u32 prev_idle;
bool gpu_active;
u32 busy_cl[2];
u32 busy_gl;
u32 active_cl_ctx[2];
u32 active_gl_ctx[2]; /* GL jobs can only run on 2 of the 3 job slots */
spinlock_t lock;
#ifdef CONFIG_MALI_MIDGARD_DVFS
struct hrtimer timer;
bool timer_active;
#endif
void *platform_data;
struct kbase_device *kbdev;
};
union kbase_pm_policy_data {
struct kbasep_pm_policy_always_on always_on;
struct kbasep_pm_policy_coarse_demand coarse_demand;
struct kbasep_pm_policy_demand demand;
#if !MALI_CUSTOMER_RELEASE
struct kbasep_pm_policy_demand_always_powered demand_always_powered;
struct kbasep_pm_policy_fast_start fast_start;
#endif
};
union kbase_pm_ca_policy_data {
struct kbasep_pm_ca_policy_fixed fixed;
#if !MALI_CUSTOMER_RELEASE
struct kbasep_pm_ca_policy_random random;
#endif
};
/**
* struct kbase_pm_backend_data - Data stored per device for power management.
*
* This structure contains data for the power management framework. There is one
* instance of this structure per device in the system.
*
* @ca_current_policy: The policy that is currently actively controlling core
* availability.
* @pm_current_policy: The policy that is currently actively controlling the
* power state.
* @ca_policy_data: Private data for current CA policy
* @pm_policy_data: Private data for current PM policy
* @ca_in_transition: Flag indicating when core availability policy is
* transitioning cores. The core availability policy must
* set this when a change in core availability is occurring.
* power_change_lock must be held when accessing this.
* @reset_done: Flag when a reset is complete
* @reset_done_wait: Wait queue to wait for changes to @reset_done
* @l2_powered_wait: Wait queue for whether the l2 cache has been powered as
* requested
* @l2_powered: State indicating whether all the l2 caches are powered.
* Non-zero indicates they're *all* powered
* Zero indicates that some (or all) are not powered
* @gpu_cycle_counter_requests: The reference count of active gpu cycle counter
* users
* @gpu_cycle_counter_requests_lock: Lock to protect @gpu_cycle_counter_requests
* @desired_shader_state: A bit mask identifying the shader cores that the
* power policy would like to be on. The current state
* of the cores may be different, but there should be
* transitions in progress that will eventually achieve
* this state (assuming that the policy doesn't change
* its mind in the mean time).
* @powering_on_shader_state: A bit mask indicating which shader cores are
* currently in a power-on transition
* @desired_tiler_state: A bit mask identifying the tiler cores that the power
* policy would like to be on. See @desired_shader_state
* @powering_on_tiler_state: A bit mask indicating which tiler core are
* currently in a power-on transition
* @powering_on_l2_state: A bit mask indicating which l2-caches are currently
* in a power-on transition
* @gpu_in_desired_state: This flag is set if the GPU is powered as requested
* by the desired_xxx_state variables
* @gpu_in_desired_state_wait: Wait queue set when @gpu_in_desired_state != 0
* @gpu_powered: Set to true when the GPU is powered and register
* accesses are possible, false otherwise
* @instr_enabled: Set to true when instrumentation is enabled,
* false otherwise
* @cg1_disabled: Set if the policy wants to keep the second core group
* powered off
* @driver_ready_for_irqs: Debug state indicating whether sufficient
* initialization of the driver has occurred to handle
* IRQs
* @gpu_powered_lock: Spinlock that must be held when writing @gpu_powered or
* accessing @driver_ready_for_irqs
* @metrics: Structure to hold metrics for the GPU
* @gpu_poweroff_pending: number of poweroff timer ticks until the GPU is
* powered off
* @shader_poweroff_pending_time: number of poweroff timer ticks until shaders
* and/or timers are powered off
* @gpu_poweroff_timer: Timer for powering off GPU
* @gpu_poweroff_wq: Workqueue to power off GPU on when timer fires
* @gpu_poweroff_work: Workitem used on @gpu_poweroff_wq
* @shader_poweroff_pending: Bit mask of shaders to be powered off on next
* timer callback
* @tiler_poweroff_pending: Bit mask of tilers to be powered off on next timer
* callback
* @poweroff_timer_needed: true if the poweroff timer is currently required,
* false otherwise
* @poweroff_timer_running: true if the poweroff timer is currently running,
* false otherwise
* power_change_lock should be held when accessing,
* unless there is no way the timer can be running (eg
* hrtimer_cancel() was called immediately before)
* @poweroff_wait_in_progress: true if a wait for GPU power off is in progress.
* hwaccess_lock must be held when accessing
* @poweron_required: true if a GPU power on is required. Should only be set
* when poweroff_wait_in_progress is true, and therefore the
* GPU can not immediately be powered on. pm.lock must be
* held when accessing
* @poweroff_is_suspend: true if the GPU is being powered off due to a suspend
* request. pm.lock must be held when accessing
* @gpu_poweroff_wait_wq: workqueue for waiting for GPU to power off
* @gpu_poweroff_wait_work: work item for use with @gpu_poweroff_wait_wq
* @poweroff_wait: waitqueue for waiting for @gpu_poweroff_wait_work to complete
* @callback_power_on: Callback when the GPU needs to be turned on. See
* &struct kbase_pm_callback_conf
* @callback_power_off: Callback when the GPU may be turned off. See
* &struct kbase_pm_callback_conf
* @callback_power_suspend: Callback when a suspend occurs and the GPU needs to
* be turned off. See &struct kbase_pm_callback_conf
* @callback_power_resume: Callback when a resume occurs and the GPU needs to
* be turned on. See &struct kbase_pm_callback_conf
* @callback_power_runtime_on: Callback when the GPU needs to be turned on. See
* &struct kbase_pm_callback_conf
* @callback_power_runtime_off: Callback when the GPU may be turned off. See
* &struct kbase_pm_callback_conf
* @callback_power_runtime_idle: Optional callback when the GPU may be idle. See
* &struct kbase_pm_callback_conf
*
* Note:
* During an IRQ, @ca_current_policy or @pm_current_policy can be NULL when the
* policy is being changed with kbase_pm_ca_set_policy() or
* kbase_pm_set_policy(). The change is protected under
* kbase_device.pm.power_change_lock. Direct access to this
* from IRQ context must therefore check for NULL. If NULL, then
* kbase_pm_ca_set_policy() or kbase_pm_set_policy() will re-issue the policy
* functions that would have been done under IRQ.
*/
struct kbase_pm_backend_data {
const struct kbase_pm_ca_policy *ca_current_policy;
const struct kbase_pm_policy *pm_current_policy;
union kbase_pm_ca_policy_data ca_policy_data;
union kbase_pm_policy_data pm_policy_data;
bool ca_in_transition;
bool reset_done;
wait_queue_head_t reset_done_wait;
wait_queue_head_t l2_powered_wait;
int l2_powered;
int gpu_cycle_counter_requests;
spinlock_t gpu_cycle_counter_requests_lock;
u64 desired_shader_state;
u64 powering_on_shader_state;
u64 desired_tiler_state;
u64 powering_on_tiler_state;
u64 powering_on_l2_state;
bool gpu_in_desired_state;
wait_queue_head_t gpu_in_desired_state_wait;
bool gpu_powered;
bool instr_enabled;
bool cg1_disabled;
#ifdef CONFIG_MALI_DEBUG
bool driver_ready_for_irqs;
#endif /* CONFIG_MALI_DEBUG */
spinlock_t gpu_powered_lock;
struct kbasep_pm_metrics_data metrics;
int gpu_poweroff_pending;
int shader_poweroff_pending_time;
struct hrtimer gpu_poweroff_timer;
struct workqueue_struct *gpu_poweroff_wq;
struct work_struct gpu_poweroff_work;
u64 shader_poweroff_pending;
u64 tiler_poweroff_pending;
bool poweroff_timer_needed;
bool poweroff_timer_running;
bool poweroff_wait_in_progress;
bool poweron_required;
bool poweroff_is_suspend;
struct workqueue_struct *gpu_poweroff_wait_wq;
struct work_struct gpu_poweroff_wait_work;
wait_queue_head_t poweroff_wait;
int (*callback_power_on)(struct kbase_device *kbdev);
void (*callback_power_off)(struct kbase_device *kbdev);
void (*callback_power_suspend)(struct kbase_device *kbdev);
void (*callback_power_resume)(struct kbase_device *kbdev);
int (*callback_power_runtime_on)(struct kbase_device *kbdev);
void (*callback_power_runtime_off)(struct kbase_device *kbdev);
int (*callback_power_runtime_idle)(struct kbase_device *kbdev);
};
/* List of policy IDs */
enum kbase_pm_policy_id {
KBASE_PM_POLICY_ID_DEMAND = 1,
KBASE_PM_POLICY_ID_ALWAYS_ON,
KBASE_PM_POLICY_ID_COARSE_DEMAND,
#if !MALI_CUSTOMER_RELEASE
KBASE_PM_POLICY_ID_DEMAND_ALWAYS_POWERED,
KBASE_PM_POLICY_ID_FAST_START
#endif
};
typedef u32 kbase_pm_policy_flags;
/**
* struct kbase_pm_policy - Power policy structure.
*
* Each power policy exposes a (static) instance of this structure which
* contains function pointers to the policy's methods.
*
* @name: The name of this policy
* @init: Function called when the policy is selected
* @term: Function called when the policy is unselected
* @get_core_mask: Function called to get the current shader core mask
* @get_core_active: Function called to get the current overall GPU power
* state
* @flags: Field indicating flags for this policy
* @id: Field indicating an ID for this policy. This is not
* necessarily the same as its index in the list returned
* by kbase_pm_list_policies().
* It is used purely for debugging.
*/
struct kbase_pm_policy {
char *name;
/**
* Function called when the policy is selected
*
* This should initialize the kbdev->pm.pm_policy_data structure. It
* should not attempt to make any changes to hardware state.
*
* It is undefined what state the cores are in when the function is
* called.
*
* @kbdev: The kbase device structure for the device (must be a
* valid pointer)
*/
void (*init)(struct kbase_device *kbdev);
/**
* Function called when the policy is unselected.
*
* @kbdev: The kbase device structure for the device (must be a
* valid pointer)
*/
void (*term)(struct kbase_device *kbdev);
/**
* Function called to get the current shader core mask
*
* The returned mask should meet or exceed (kbdev->shader_needed_bitmap
* | kbdev->shader_inuse_bitmap).
*
* @kbdev: The kbase device structure for the device (must be a
* valid pointer)
*
* Return: The mask of shader cores to be powered
*/
u64 (*get_core_mask)(struct kbase_device *kbdev);
/**
* Function called to get the current overall GPU power state
*
* This function should consider the state of kbdev->pm.active_count. If
* this count is greater than 0 then there is at least one active
* context on the device and the GPU should be powered. If it is equal
* to 0 then there are no active contexts and the GPU could be powered
* off if desired.
*
* @kbdev: The kbase device structure for the device (must be a
* valid pointer)
*
* Return: true if the GPU should be powered, false otherwise
*/
bool (*get_core_active)(struct kbase_device *kbdev);
kbase_pm_policy_flags flags;
enum kbase_pm_policy_id id;
};
enum kbase_pm_ca_policy_id {
KBASE_PM_CA_POLICY_ID_FIXED = 1,
KBASE_PM_CA_POLICY_ID_RANDOM
};
typedef u32 kbase_pm_ca_policy_flags;
/**
* struct kbase_pm_ca_policy - Core availability policy structure.
*
* Each core availability policy exposes a (static) instance of this structure
* which contains function pointers to the policy's methods.
*
* @name: The name of this policy
* @init: Function called when the policy is selected
* @term: Function called when the policy is unselected
* @get_core_mask: Function called to get the current shader core
* availability mask
* @update_core_status: Function called to update the current core status
* @flags: Field indicating flags for this policy
* @id: Field indicating an ID for this policy. This is not
* necessarily the same as its index in the list returned
* by kbase_pm_list_policies().
* It is used purely for debugging.
*/
struct kbase_pm_ca_policy {
char *name;
/**
* Function called when the policy is selected
*
* This should initialize the kbdev->pm.ca_policy_data structure. It
* should not attempt to make any changes to hardware state.
*
* It is undefined what state the cores are in when the function is
* called.
*
* @kbdev The kbase device structure for the device (must be a
* valid pointer)
*/
void (*init)(struct kbase_device *kbdev);
/**
* Function called when the policy is unselected.
*
* @kbdev The kbase device structure for the device (must be a
* valid pointer)
*/
void (*term)(struct kbase_device *kbdev);
/**
* Function called to get the current shader core availability mask
*
* When a change in core availability is occurring, the policy must set
* kbdev->pm.ca_in_transition to true. This is to indicate that
* reporting changes in power state cannot be optimized out, even if
* kbdev->pm.desired_shader_state remains unchanged. This must be done
* by any functions internal to the Core Availability Policy that change
* the return value of kbase_pm_ca_policy::get_core_mask.
*
* @kbdev The kbase device structure for the device (must be a
* valid pointer)
*
* Return: The current core availability mask
*/
u64 (*get_core_mask)(struct kbase_device *kbdev);
/**
* Function called to update the current core status
*
* If none of the cores in core group 0 are ready or transitioning, then
* the policy must ensure that the next call to get_core_mask does not
* return 0 for all cores in core group 0. It is an error to disable
* core group 0 through the core availability policy.
*
* When a change in core availability has finished, the policy must set
* kbdev->pm.ca_in_transition to false. This is to indicate that
* changes in power state can once again be optimized out when
* kbdev->pm.desired_shader_state is unchanged.
*
* @kbdev: The kbase device structure for the device
* (must be a valid pointer)
* @cores_ready: The mask of cores currently powered and
* ready to run jobs
* @cores_transitioning: The mask of cores currently transitioning
* power state
*/
void (*update_core_status)(struct kbase_device *kbdev, u64 cores_ready,
u64 cores_transitioning);
kbase_pm_ca_policy_flags flags;
/**
* Field indicating an ID for this policy. This is not necessarily the
* same as its index in the list returned by kbase_pm_list_policies().
* It is used purely for debugging.
*/
enum kbase_pm_ca_policy_id id;
};
#endif /* _KBASE_PM_HWACCESS_DEFS_H_ */

View File

@ -0,0 +1,73 @@
/*
*
* (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
* Foundation, and any use by you of this program is subject to the terms
* of such GNU licence.
*
* A copy of the licence is included with the program, and can also be obtained
* from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
* Boston, MA 02110-1301, USA.
*
*/
/*
* A simple demand based power management policy
*/
#include <mali_kbase.h>
#include <mali_kbase_pm.h>
static u64 demand_get_core_mask(struct kbase_device *kbdev)
{
u64 desired = kbdev->shader_needed_bitmap | kbdev->shader_inuse_bitmap;
if (0 == kbdev->pm.active_count)
return 0;
return desired;
}
static bool demand_get_core_active(struct kbase_device *kbdev)
{
if (0 == kbdev->pm.active_count && !(kbdev->shader_needed_bitmap |
kbdev->shader_inuse_bitmap) && !kbdev->tiler_needed_cnt
&& !kbdev->tiler_inuse_cnt)
return false;
return true;
}
static void demand_init(struct kbase_device *kbdev)
{
CSTD_UNUSED(kbdev);
}
static void demand_term(struct kbase_device *kbdev)
{
CSTD_UNUSED(kbdev);
}
/*
* The struct kbase_pm_policy structure for the demand power policy.
*
* This is the static structure that defines the demand power policy's callback
* and name.
*/
const struct kbase_pm_policy kbase_pm_demand_policy_ops = {
"demand", /* name */
demand_init, /* init */
demand_term, /* term */
demand_get_core_mask, /* get_core_mask */
demand_get_core_active, /* get_core_active */
0u, /* flags */
KBASE_PM_POLICY_ID_DEMAND, /* id */
};
KBASE_EXPORT_TEST_API(kbase_pm_demand_policy_ops);

View File

@ -0,0 +1,64 @@
/*
*
* (C) COPYRIGHT 2011-2015 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
* Foundation, and any use by you of this program is subject to the terms
* of such GNU licence.
*
* A copy of the licence is included with the program, and can also be obtained
* from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
* Boston, MA 02110-1301, USA.
*
*/
/*
* A simple demand based power management policy
*/
#ifndef MALI_KBASE_PM_DEMAND_H
#define MALI_KBASE_PM_DEMAND_H
/**
* DOC: Demand power management policy
*
* The demand power management policy has the following characteristics:
* - When KBase indicates that the GPU will be powered up, but we don't yet
* know which Job Chains are to be run:
* - The Shader Cores are not powered up
*
* - When KBase indicates that a set of Shader Cores are needed to submit the
* currently queued Job Chains:
* - Only those Shader Cores are powered up
*
* - When KBase indicates that the GPU need not be powered:
* - The Shader Cores are powered off, and the GPU itself is powered off too.
*
* Note:
* - KBase indicates the GPU will be powered up when it has a User Process that
* has just started to submit Job Chains.
*
* - KBase indicates the GPU need not be powered when all the Job Chains from
* User Processes have finished, and it is waiting for a User Process to
* submit some more Job Chains.
*/
/**
* struct kbasep_pm_policy_demand - Private structure for policy instance data
*
* @dummy: No state is needed, a dummy variable
*
* This contains data that is private to the demand power policy.
*/
struct kbasep_pm_policy_demand {
int dummy;
};
extern const struct kbase_pm_policy kbase_pm_demand_policy_ops;
#endif /* MALI_KBASE_PM_DEMAND_H */

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,550 @@
/*
*
* (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
* Foundation, and any use by you of this program is subject to the terms
* of such GNU licence.
*
* A copy of the licence is included with the program, and can also be obtained
* from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
* Boston, MA 02110-1301, USA.
*
*/
/*
* Power management API definitions used internally by GPU backend
*/
#ifndef _KBASE_BACKEND_PM_INTERNAL_H_
#define _KBASE_BACKEND_PM_INTERNAL_H_
#include <mali_kbase_hwaccess_pm.h>
#include "mali_kbase_pm_ca.h"
#include "mali_kbase_pm_policy.h"
/**
* kbase_pm_dev_idle - The GPU is idle.
*
* The OS may choose to turn off idle devices
*
* @kbdev: The kbase device structure for the device (must be a valid pointer)
*/
void kbase_pm_dev_idle(struct kbase_device *kbdev);
/**
* kbase_pm_dev_activate - The GPU is active.
*
* The OS should avoid opportunistically turning off the GPU while it is active
*
* @kbdev: The kbase device structure for the device (must be a valid pointer)
*/
void kbase_pm_dev_activate(struct kbase_device *kbdev);
/**
* kbase_pm_get_present_cores - Get details of the cores that are present in
* the device.
*
* This function can be called by the active power policy to return a bitmask of
* the cores (of a specified type) present in the GPU device and also a count of
* the number of cores.
*
* @kbdev: The kbase device structure for the device (must be a valid
* pointer)
* @type: The type of core (see the enum kbase_pm_core_type enumeration)
*
* Return: The bit mask of cores present
*/
u64 kbase_pm_get_present_cores(struct kbase_device *kbdev,
enum kbase_pm_core_type type);
/**
* kbase_pm_get_active_cores - Get details of the cores that are currently
* active in the device.
*
* This function can be called by the active power policy to return a bitmask of
* the cores (of a specified type) that are actively processing work (i.e.
* turned on *and* busy).
*
* @kbdev: The kbase device structure for the device (must be a valid pointer)
* @type: The type of core (see the enum kbase_pm_core_type enumeration)
*
* Return: The bit mask of active cores
*/
u64 kbase_pm_get_active_cores(struct kbase_device *kbdev,
enum kbase_pm_core_type type);
/**
* kbase_pm_get_trans_cores - Get details of the cores that are currently
* transitioning between power states.
*
* This function can be called by the active power policy to return a bitmask of
* the cores (of a specified type) that are currently transitioning between
* power states.
*
* @kbdev: The kbase device structure for the device (must be a valid pointer)
* @type: The type of core (see the enum kbase_pm_core_type enumeration)
*
* Return: The bit mask of transitioning cores
*/
u64 kbase_pm_get_trans_cores(struct kbase_device *kbdev,
enum kbase_pm_core_type type);
/**
* kbase_pm_get_ready_cores - Get details of the cores that are currently
* powered and ready for jobs.
*
* This function can be called by the active power policy to return a bitmask of
* the cores (of a specified type) that are powered and ready for jobs (they may
* or may not be currently executing jobs).
*
* @kbdev: The kbase device structure for the device (must be a valid pointer)
* @type: The type of core (see the enum kbase_pm_core_type enumeration)
*
* Return: The bit mask of ready cores
*/
u64 kbase_pm_get_ready_cores(struct kbase_device *kbdev,
enum kbase_pm_core_type type);
/**
* kbase_pm_clock_on - Turn the clock for the device on, and enable device
* interrupts.
*
* This function can be used by a power policy to turn the clock for the GPU on.
* It should be modified during integration to perform the necessary actions to
* ensure that the GPU is fully powered and clocked.
*
* @kbdev: The kbase device structure for the device (must be a valid
* pointer)
* @is_resume: true if clock on due to resume after suspend, false otherwise
*/
void kbase_pm_clock_on(struct kbase_device *kbdev, bool is_resume);
/**
* kbase_pm_clock_off - Disable device interrupts, and turn the clock for the
* device off.
*
* This function can be used by a power policy to turn the clock for the GPU
* off. It should be modified during integration to perform the necessary
* actions to turn the clock off (if this is possible in the integration).
*
* @kbdev: The kbase device structure for the device (must be a valid
* pointer)
* @is_suspend: true if clock off due to suspend, false otherwise
*
* Return: true if clock was turned off, or
* false if clock can not be turned off due to pending page/bus fault
* workers. Caller must flush MMU workqueues and retry
*/
bool kbase_pm_clock_off(struct kbase_device *kbdev, bool is_suspend);
/**
* kbase_pm_enable_interrupts - Enable interrupts on the device.
*
* Interrupts are also enabled after a call to kbase_pm_clock_on().
*
* @kbdev: The kbase device structure for the device (must be a valid pointer)
*/
void kbase_pm_enable_interrupts(struct kbase_device *kbdev);
/**
* kbase_pm_disable_interrupts - Disable interrupts on the device.
*
* This prevents delivery of Power Management interrupts to the CPU so that
* kbase_pm_check_transitions_nolock() will not be called from the IRQ handler
* until kbase_pm_enable_interrupts() or kbase_pm_clock_on() is called.
*
* Interrupts are also disabled after a call to kbase_pm_clock_off().
*
* @kbdev: The kbase device structure for the device (must be a valid pointer)
*/
void kbase_pm_disable_interrupts(struct kbase_device *kbdev);
/**
* kbase_pm_disable_interrupts_nolock - Version of kbase_pm_disable_interrupts()
* that does not take the hwaccess_lock
*
* Caller must hold the hwaccess_lock.
*
* @kbdev: The kbase device structure for the device (must be a valid pointer)
*/
void kbase_pm_disable_interrupts_nolock(struct kbase_device *kbdev);
/**
* kbase_pm_init_hw - Initialize the hardware.
* @kbdev: The kbase device structure for the device (must be a valid pointer)
* @flags: Flags specifying the type of PM init
*
* This function checks the GPU ID register to ensure that the GPU is supported
* by the driver and performs a reset on the device so that it is in a known
* state before the device is used.
*
* Return: 0 if the device is supported and successfully reset.
*/
int kbase_pm_init_hw(struct kbase_device *kbdev, unsigned int flags);
/**
* kbase_pm_reset_done - The GPU has been reset successfully.
*
* This function must be called by the GPU interrupt handler when the
* RESET_COMPLETED bit is set. It signals to the power management initialization
* code that the GPU has been successfully reset.
*
* @kbdev: The kbase device structure for the device (must be a valid pointer)
*/
void kbase_pm_reset_done(struct kbase_device *kbdev);
/**
* kbase_pm_check_transitions_nolock - Check if there are any power transitions
* to make, and if so start them.
*
* This function will check the desired_xx_state members of
* struct kbase_pm_device_data and the actual status of the hardware to see if
* any power transitions can be made at this time to make the hardware state
* closer to the state desired by the power policy.
*
* The return value can be used to check whether all the desired cores are
* available, and so whether it's worth submitting a job (e.g. from a Power
* Management IRQ).
*
* Note that this still returns true when desired_xx_state has no
* cores. That is: of the no cores desired, none were *un*available. In
* this case, the caller may still need to try submitting jobs. This is because
* the Core Availability Policy might have taken us to an intermediate state
* where no cores are powered, before powering on more cores (e.g. for core
* rotation)
*
* The caller must hold kbase_device.pm.power_change_lock
*
* @kbdev: The kbase device structure for the device (must be a valid pointer)
*
* Return: non-zero when all desired cores are available. That is,
* it's worthwhile for the caller to submit a job.
* false otherwise
*/
bool kbase_pm_check_transitions_nolock(struct kbase_device *kbdev);
/**
* kbase_pm_check_transitions_sync - Synchronous and locking variant of
* kbase_pm_check_transitions_nolock()
*
* On returning, the desired state at the time of the call will have been met.
*
* There is nothing to stop the core being switched off by calls to
* kbase_pm_release_cores() or kbase_pm_unrequest_cores(). Therefore, the
* caller must have already made a call to
* kbase_pm_request_cores()/kbase_pm_request_cores_sync() previously.
*
* The usual use-case for this is to ensure cores are 'READY' after performing
* a GPU Reset.
*
* Unlike kbase_pm_check_transitions_nolock(), the caller must not hold
* kbase_device.pm.power_change_lock, because this function will take that
* lock itself.
*
* @kbdev: The kbase device structure for the device (must be a valid pointer)
*/
void kbase_pm_check_transitions_sync(struct kbase_device *kbdev);
/**
* kbase_pm_update_cores_state_nolock - Variant of kbase_pm_update_cores_state()
* where the caller must hold
* kbase_device.pm.power_change_lock
*
* @kbdev: The kbase device structure for the device (must be a valid pointer)
*/
void kbase_pm_update_cores_state_nolock(struct kbase_device *kbdev);
/**
* kbase_pm_update_cores_state - Update the desired state of shader cores from
* the Power Policy, and begin any power
* transitions.
*
* This function will update the desired_xx_state members of
* struct kbase_pm_device_data by calling into the current Power Policy. It will
* then begin power transitions to make the hardware acheive the desired shader
* core state.
*
* @kbdev: The kbase device structure for the device (must be a valid pointer)
*/
void kbase_pm_update_cores_state(struct kbase_device *kbdev);
/**
* kbase_pm_cancel_deferred_poweroff - Cancel any pending requests to power off
* the GPU and/or shader cores.
*
* This should be called by any functions which directly power off the GPU.
*
* @kbdev: The kbase device structure for the device (must be a valid pointer)
*/
void kbase_pm_cancel_deferred_poweroff(struct kbase_device *kbdev);
/**
* kbasep_pm_read_present_cores - Read the bitmasks of present cores.
*
* This information is cached to avoid having to perform register reads whenever
* the information is required.
*
* @kbdev: The kbase device structure for the device (must be a valid pointer)
*/
void kbasep_pm_read_present_cores(struct kbase_device *kbdev);
/**
* kbasep_pm_metrics_init - Initialize the metrics gathering framework.
*
* This must be called before other metric gathering APIs are called.
*
* @kbdev: The kbase device structure for the device (must be a valid pointer)
*
* Return: 0 on success, error code on error
*/
int kbasep_pm_metrics_init(struct kbase_device *kbdev);
/**
* kbasep_pm_metrics_term - Terminate the metrics gathering framework.
*
* This must be called when metric gathering is no longer required. It is an
* error to call any metrics gathering function (other than
* kbasep_pm_metrics_init()) after calling this function.
*
* @kbdev: The kbase device structure for the device (must be a valid pointer)
*/
void kbasep_pm_metrics_term(struct kbase_device *kbdev);
/**
* kbase_pm_report_vsync - Function to be called by the frame buffer driver to
* update the vsync metric.
*
* This function should be called by the frame buffer driver to update whether
* the system is hitting the vsync target or not. buffer_updated should be true
* if the vsync corresponded with a new frame being displayed, otherwise it
* should be false. This function does not need to be called every vsync, but
* only when the value of @buffer_updated differs from a previous call.
*
* @kbdev: The kbase device structure for the device (must be a
* valid pointer)
* @buffer_updated: True if the buffer has been updated on this VSync,
* false otherwise
*/
void kbase_pm_report_vsync(struct kbase_device *kbdev, int buffer_updated);
/**
* kbase_pm_get_dvfs_action - Determine whether the DVFS system should change
* the clock speed of the GPU.
*
* @kbdev: The kbase device structure for the device (must be a valid pointer)
*
* This function should be called regularly by the DVFS system to check whether
* the clock speed of the GPU needs updating.
*/
void kbase_pm_get_dvfs_action(struct kbase_device *kbdev);
/**
* kbase_pm_request_gpu_cycle_counter - Mark that the GPU cycle counter is
* needed
*
* If the caller is the first caller then the GPU cycle counters will be enabled
* along with the l2 cache
*
* The GPU must be powered when calling this function (i.e.
* kbase_pm_context_active() must have been called).
*
* @kbdev: The kbase device structure for the device (must be a valid pointer)
*/
void kbase_pm_request_gpu_cycle_counter(struct kbase_device *kbdev);
/**
* kbase_pm_request_gpu_cycle_counter_l2_is_on - Mark GPU cycle counter is
* needed (l2 cache already on)
*
* This is a version of the above function
* (kbase_pm_request_gpu_cycle_counter()) suitable for being called when the
* l2 cache is known to be on and assured to be on until the subsequent call of
* kbase_pm_release_gpu_cycle_counter() such as when a job is submitted. It does
* not sleep and can be called from atomic functions.
*
* The GPU must be powered when calling this function (i.e.
* kbase_pm_context_active() must have been called) and the l2 cache must be
* powered on.
*
* @kbdev: The kbase device structure for the device (must be a valid pointer)
*/
void kbase_pm_request_gpu_cycle_counter_l2_is_on(struct kbase_device *kbdev);
/**
* kbase_pm_release_gpu_cycle_counter - Mark that the GPU cycle counter is no
* longer in use
*
* If the caller is the last caller then the GPU cycle counters will be
* disabled. A request must have been made before a call to this.
*
* Caller must not hold the hwaccess_lock, as it will be taken in this function.
* If the caller is already holding this lock then
* kbase_pm_release_gpu_cycle_counter_nolock() must be used instead.
*
* @kbdev: The kbase device structure for the device (must be a valid pointer)
*/
void kbase_pm_release_gpu_cycle_counter(struct kbase_device *kbdev);
/**
* kbase_pm_release_gpu_cycle_counter_nolock - Version of kbase_pm_release_gpu_cycle_counter()
* that does not take hwaccess_lock
*
* Caller must hold the hwaccess_lock.
*
* @kbdev: The kbase device structure for the device (must be a valid pointer)
*/
void kbase_pm_release_gpu_cycle_counter_nolock(struct kbase_device *kbdev);
/**
* kbase_pm_wait_for_poweroff_complete - Wait for the poweroff workqueue to
* complete
*
* @kbdev: The kbase device structure for the device (must be a valid pointer)
*/
void kbase_pm_wait_for_poweroff_complete(struct kbase_device *kbdev);
/**
* kbase_pm_register_access_enable - Enable access to GPU registers
*
* Enables access to the GPU registers before power management has powered up
* the GPU with kbase_pm_powerup().
*
* Access to registers should be done using kbase_os_reg_read()/write() at this
* stage, not kbase_reg_read()/write().
*
* This results in the power management callbacks provided in the driver
* configuration to get called to turn on power and/or clocks to the GPU. See
* kbase_pm_callback_conf.
*
* This should only be used before power management is powered up with
* kbase_pm_powerup()
*
* @kbdev: The kbase device structure for the device (must be a valid pointer)
*/
void kbase_pm_register_access_enable(struct kbase_device *kbdev);
/**
* kbase_pm_register_access_disable - Disable early register access
*
* Disables access to the GPU registers enabled earlier by a call to
* kbase_pm_register_access_enable().
*
* This results in the power management callbacks provided in the driver
* configuration to get called to turn off power and/or clocks to the GPU. See
* kbase_pm_callback_conf
*
* This should only be used before power management is powered up with
* kbase_pm_powerup()
*
* @kbdev: The kbase device structure for the device (must be a valid pointer)
*/
void kbase_pm_register_access_disable(struct kbase_device *kbdev);
/* NOTE: kbase_pm_is_suspending is in mali_kbase.h, because it is an inline
* function */
/**
* kbase_pm_metrics_is_active - Check if the power management metrics
* collection is active.
*
* Note that this returns if the power management metrics collection was
* active at the time of calling, it is possible that after the call the metrics
* collection enable may have changed state.
*
* The caller must handle the consequence that the state may have changed.
*
* @kbdev: The kbase device structure for the device (must be a valid pointer)
* Return: true if metrics collection was active else false.
*/
bool kbase_pm_metrics_is_active(struct kbase_device *kbdev);
/**
* kbase_pm_do_poweron - Power on the GPU, and any cores that are requested.
*
* @kbdev: The kbase device structure for the device (must be a valid
* pointer)
* @is_resume: true if power on due to resume after suspend,
* false otherwise
*/
void kbase_pm_do_poweron(struct kbase_device *kbdev, bool is_resume);
/**
* kbase_pm_do_poweroff - Power off the GPU, and any cores that have been
* requested.
*
* @kbdev: The kbase device structure for the device (must be a valid
* pointer)
* @is_suspend: true if power off due to suspend,
* false otherwise
*/
void kbase_pm_do_poweroff(struct kbase_device *kbdev, bool is_suspend);
#ifdef CONFIG_PM_DEVFREQ
void kbase_pm_get_dvfs_utilisation(struct kbase_device *kbdev,
unsigned long *total, unsigned long *busy);
void kbase_pm_reset_dvfs_utilisation(struct kbase_device *kbdev);
#endif
#ifdef CONFIG_MALI_MIDGARD_DVFS
/**
* kbase_platform_dvfs_event - Report utilisation to DVFS code
*
* Function provided by platform specific code when DVFS is enabled to allow
* the power management metrics system to report utilisation.
*
* @kbdev: The kbase device structure for the device (must be a
* valid pointer)
* @utilisation: The current calculated utilisation by the metrics system.
* @util_gl_share: The current calculated gl share of utilisation.
* @util_cl_share: The current calculated cl share of utilisation per core
* group.
* Return: Returns 0 on failure and non zero on success.
*/
int kbase_platform_dvfs_event(struct kbase_device *kbdev, u32 utilisation,
u32 util_gl_share, u32 util_cl_share[2]);
#endif
void kbase_pm_power_changed(struct kbase_device *kbdev);
/**
* kbase_pm_metrics_update - Inform the metrics system that an atom is either
* about to be run or has just completed.
* @kbdev: The kbase device structure for the device (must be a valid pointer)
* @now: Pointer to the timestamp of the change, or NULL to use current time
*
* Caller must hold hwaccess_lock
*/
void kbase_pm_metrics_update(struct kbase_device *kbdev,
ktime_t *now);
/**
* kbase_pm_cache_snoop_enable - Allow CPU snoops on the GPU
* If the GPU does not have coherency this is a no-op
* @kbdev: Device pointer
*
* This function should be called after L2 power up.
*/
void kbase_pm_cache_snoop_enable(struct kbase_device *kbdev);
/**
* kbase_pm_cache_snoop_disable - Prevent CPU snoops on the GPU
* If the GPU does not have coherency this is a no-op
* @kbdev: Device pointer
*
* This function should be called before L2 power off.
*/
void kbase_pm_cache_snoop_disable(struct kbase_device *kbdev);
#endif /* _KBASE_BACKEND_PM_INTERNAL_H_ */

View File

@ -0,0 +1,401 @@
/*
*
* (C) COPYRIGHT 2011-2016 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
* Foundation, and any use by you of this program is subject to the terms
* of such GNU licence.
*
* A copy of the licence is included with the program, and can also be obtained
* from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
* Boston, MA 02110-1301, USA.
*
*/
/*
* Metrics for power management
*/
#include <mali_kbase.h>
#include <mali_kbase_pm.h>
#include <backend/gpu/mali_kbase_pm_internal.h>
#include <backend/gpu/mali_kbase_jm_rb.h>
/* When VSync is being hit aim for utilisation between 70-90% */
#define KBASE_PM_VSYNC_MIN_UTILISATION 70
#define KBASE_PM_VSYNC_MAX_UTILISATION 90
/* Otherwise aim for 10-40% */
#define KBASE_PM_NO_VSYNC_MIN_UTILISATION 10
#define KBASE_PM_NO_VSYNC_MAX_UTILISATION 40
/* Shift used for kbasep_pm_metrics_data.time_busy/idle - units of (1 << 8) ns
* This gives a maximum period between samples of 2^(32+8)/100 ns = slightly
* under 11s. Exceeding this will cause overflow */
#define KBASE_PM_TIME_SHIFT 8
/* Maximum time between sampling of utilization data, without resetting the
* counters. */
#define MALI_UTILIZATION_MAX_PERIOD 100000 /* ns = 100ms */
#ifdef CONFIG_MALI_MIDGARD_DVFS
static enum hrtimer_restart dvfs_callback(struct hrtimer *timer)
{
unsigned long flags;
struct kbasep_pm_metrics_data *metrics;
KBASE_DEBUG_ASSERT(timer != NULL);
metrics = container_of(timer, struct kbasep_pm_metrics_data, timer);
kbase_pm_get_dvfs_action(metrics->kbdev);
spin_lock_irqsave(&metrics->lock, flags);
if (metrics->timer_active)
hrtimer_start(timer,
HR_TIMER_DELAY_MSEC(metrics->kbdev->pm.dvfs_period),
HRTIMER_MODE_REL);
spin_unlock_irqrestore(&metrics->lock, flags);
return HRTIMER_NORESTART;
}
#endif /* CONFIG_MALI_MIDGARD_DVFS */
int kbasep_pm_metrics_init(struct kbase_device *kbdev)
{
KBASE_DEBUG_ASSERT(kbdev != NULL);
kbdev->pm.backend.metrics.kbdev = kbdev;
kbdev->pm.backend.metrics.time_period_start = ktime_get();
kbdev->pm.backend.metrics.time_busy = 0;
kbdev->pm.backend.metrics.time_idle = 0;
kbdev->pm.backend.metrics.prev_busy = 0;
kbdev->pm.backend.metrics.prev_idle = 0;
kbdev->pm.backend.metrics.gpu_active = false;
kbdev->pm.backend.metrics.active_cl_ctx[0] = 0;
kbdev->pm.backend.metrics.active_cl_ctx[1] = 0;
kbdev->pm.backend.metrics.active_gl_ctx[0] = 0;
kbdev->pm.backend.metrics.active_gl_ctx[1] = 0;
kbdev->pm.backend.metrics.busy_cl[0] = 0;
kbdev->pm.backend.metrics.busy_cl[1] = 0;
kbdev->pm.backend.metrics.busy_gl = 0;
spin_lock_init(&kbdev->pm.backend.metrics.lock);
#ifdef CONFIG_MALI_MIDGARD_DVFS
kbdev->pm.backend.metrics.timer_active = true;
hrtimer_init(&kbdev->pm.backend.metrics.timer, CLOCK_MONOTONIC,
HRTIMER_MODE_REL);
kbdev->pm.backend.metrics.timer.function = dvfs_callback;
hrtimer_start(&kbdev->pm.backend.metrics.timer,
HR_TIMER_DELAY_MSEC(kbdev->pm.dvfs_period),
HRTIMER_MODE_REL);
#endif /* CONFIG_MALI_MIDGARD_DVFS */
return 0;
}
KBASE_EXPORT_TEST_API(kbasep_pm_metrics_init);
void kbasep_pm_metrics_term(struct kbase_device *kbdev)
{
#ifdef CONFIG_MALI_MIDGARD_DVFS
unsigned long flags;
KBASE_DEBUG_ASSERT(kbdev != NULL);
spin_lock_irqsave(&kbdev->pm.backend.metrics.lock, flags);
kbdev->pm.backend.metrics.timer_active = false;
spin_unlock_irqrestore(&kbdev->pm.backend.metrics.lock, flags);
hrtimer_cancel(&kbdev->pm.backend.metrics.timer);
#endif /* CONFIG_MALI_MIDGARD_DVFS */
}
KBASE_EXPORT_TEST_API(kbasep_pm_metrics_term);
/* caller needs to hold kbdev->pm.backend.metrics.lock before calling this
* function
*/
static void kbase_pm_get_dvfs_utilisation_calc(struct kbase_device *kbdev,
ktime_t now)
{
ktime_t diff;
lockdep_assert_held(&kbdev->pm.backend.metrics.lock);
diff = ktime_sub(now, kbdev->pm.backend.metrics.time_period_start);
if (ktime_to_ns(diff) < 0)
return;
if (kbdev->pm.backend.metrics.gpu_active) {
u32 ns_time = (u32) (ktime_to_ns(diff) >> KBASE_PM_TIME_SHIFT);
kbdev->pm.backend.metrics.time_busy += ns_time;
if (kbdev->pm.backend.metrics.active_cl_ctx[0])
kbdev->pm.backend.metrics.busy_cl[0] += ns_time;
if (kbdev->pm.backend.metrics.active_cl_ctx[1])
kbdev->pm.backend.metrics.busy_cl[1] += ns_time;
if (kbdev->pm.backend.metrics.active_gl_ctx[0])
kbdev->pm.backend.metrics.busy_gl += ns_time;
if (kbdev->pm.backend.metrics.active_gl_ctx[1])
kbdev->pm.backend.metrics.busy_gl += ns_time;
} else {
kbdev->pm.backend.metrics.time_idle += (u32) (ktime_to_ns(diff)
>> KBASE_PM_TIME_SHIFT);
}
kbdev->pm.backend.metrics.time_period_start = now;
}
#if defined(CONFIG_PM_DEVFREQ) || defined(CONFIG_MALI_MIDGARD_DVFS)
/* Caller needs to hold kbdev->pm.backend.metrics.lock before calling this
* function.
*/
static void kbase_pm_reset_dvfs_utilisation_unlocked(struct kbase_device *kbdev,
ktime_t now)
{
/* Store previous value */
kbdev->pm.backend.metrics.prev_idle =
kbdev->pm.backend.metrics.time_idle;
kbdev->pm.backend.metrics.prev_busy =
kbdev->pm.backend.metrics.time_busy;
/* Reset current values */
kbdev->pm.backend.metrics.time_period_start = now;
kbdev->pm.backend.metrics.time_idle = 0;
kbdev->pm.backend.metrics.time_busy = 0;
kbdev->pm.backend.metrics.busy_cl[0] = 0;
kbdev->pm.backend.metrics.busy_cl[1] = 0;
kbdev->pm.backend.metrics.busy_gl = 0;
}
void kbase_pm_reset_dvfs_utilisation(struct kbase_device *kbdev)
{
unsigned long flags;
spin_lock_irqsave(&kbdev->pm.backend.metrics.lock, flags);
kbase_pm_reset_dvfs_utilisation_unlocked(kbdev, ktime_get());
spin_unlock_irqrestore(&kbdev->pm.backend.metrics.lock, flags);
}
void kbase_pm_get_dvfs_utilisation(struct kbase_device *kbdev,
unsigned long *total_out, unsigned long *busy_out)
{
ktime_t now = ktime_get();
unsigned long flags, busy, total;
spin_lock_irqsave(&kbdev->pm.backend.metrics.lock, flags);
kbase_pm_get_dvfs_utilisation_calc(kbdev, now);
busy = kbdev->pm.backend.metrics.time_busy;
total = busy + kbdev->pm.backend.metrics.time_idle;
/* Reset stats if older than MALI_UTILIZATION_MAX_PERIOD (default
* 100ms) */
if (total >= MALI_UTILIZATION_MAX_PERIOD) {
kbase_pm_reset_dvfs_utilisation_unlocked(kbdev, now);
} else if (total < (MALI_UTILIZATION_MAX_PERIOD / 2)) {
total += kbdev->pm.backend.metrics.prev_idle +
kbdev->pm.backend.metrics.prev_busy;
busy += kbdev->pm.backend.metrics.prev_busy;
}
*total_out = total;
*busy_out = busy;
spin_unlock_irqrestore(&kbdev->pm.backend.metrics.lock, flags);
}
#endif
#ifdef CONFIG_MALI_MIDGARD_DVFS
/* caller needs to hold kbdev->pm.backend.metrics.lock before calling this
* function
*/
int kbase_pm_get_dvfs_utilisation_old(struct kbase_device *kbdev,
int *util_gl_share,
int util_cl_share[2],
ktime_t now)
{
int utilisation;
int busy;
kbase_pm_get_dvfs_utilisation_calc(kbdev, now);
if (kbdev->pm.backend.metrics.time_idle +
kbdev->pm.backend.metrics.time_busy == 0) {
/* No data - so we return NOP */
utilisation = -1;
if (util_gl_share)
*util_gl_share = -1;
if (util_cl_share) {
util_cl_share[0] = -1;
util_cl_share[1] = -1;
}
goto out;
}
utilisation = (100 * kbdev->pm.backend.metrics.time_busy) /
(kbdev->pm.backend.metrics.time_idle +
kbdev->pm.backend.metrics.time_busy);
busy = kbdev->pm.backend.metrics.busy_gl +
kbdev->pm.backend.metrics.busy_cl[0] +
kbdev->pm.backend.metrics.busy_cl[1];
if (busy != 0) {
if (util_gl_share)
*util_gl_share =
(100 * kbdev->pm.backend.metrics.busy_gl) /
busy;
if (util_cl_share) {
util_cl_share[0] =
(100 * kbdev->pm.backend.metrics.busy_cl[0]) /
busy;
util_cl_share[1] =
(100 * kbdev->pm.backend.metrics.busy_cl[1]) /
busy;
}
} else {
if (util_gl_share)
*util_gl_share = -1;
if (util_cl_share) {
util_cl_share[0] = -1;
util_cl_share[1] = -1;
}
}
out:
return utilisation;
}
void kbase_pm_get_dvfs_action(struct kbase_device *kbdev)
{
unsigned long flags;
int utilisation, util_gl_share;
int util_cl_share[2];
ktime_t now;
KBASE_DEBUG_ASSERT(kbdev != NULL);
spin_lock_irqsave(&kbdev->pm.backend.metrics.lock, flags);
now = ktime_get();
utilisation = kbase_pm_get_dvfs_utilisation_old(kbdev, &util_gl_share,
util_cl_share, now);
if (utilisation < 0 || util_gl_share < 0 || util_cl_share[0] < 0 ||
util_cl_share[1] < 0) {
utilisation = 0;
util_gl_share = 0;
util_cl_share[0] = 0;
util_cl_share[1] = 0;
goto out;
}
out:
#ifdef CONFIG_MALI_MIDGARD_DVFS
kbase_platform_dvfs_event(kbdev, utilisation, util_gl_share,
util_cl_share);
#endif /*CONFIG_MALI_MIDGARD_DVFS */
kbase_pm_reset_dvfs_utilisation_unlocked(kbdev, now);
spin_unlock_irqrestore(&kbdev->pm.backend.metrics.lock, flags);
}
bool kbase_pm_metrics_is_active(struct kbase_device *kbdev)
{
bool isactive;
unsigned long flags;
KBASE_DEBUG_ASSERT(kbdev != NULL);
spin_lock_irqsave(&kbdev->pm.backend.metrics.lock, flags);
isactive = kbdev->pm.backend.metrics.timer_active;
spin_unlock_irqrestore(&kbdev->pm.backend.metrics.lock, flags);
return isactive;
}
KBASE_EXPORT_TEST_API(kbase_pm_metrics_is_active);
#endif /* CONFIG_MALI_MIDGARD_DVFS */
/**
* kbase_pm_metrics_active_calc - Update PM active counts based on currently
* running atoms
* @kbdev: Device pointer
*
* The caller must hold kbdev->pm.backend.metrics.lock
*/
static void kbase_pm_metrics_active_calc(struct kbase_device *kbdev)
{
int js;
lockdep_assert_held(&kbdev->pm.backend.metrics.lock);
kbdev->pm.backend.metrics.active_gl_ctx[0] = 0;
kbdev->pm.backend.metrics.active_gl_ctx[1] = 0;
kbdev->pm.backend.metrics.active_cl_ctx[0] = 0;
kbdev->pm.backend.metrics.active_cl_ctx[1] = 0;
kbdev->pm.backend.metrics.gpu_active = false;
for (js = 0; js < BASE_JM_MAX_NR_SLOTS; js++) {
struct kbase_jd_atom *katom = kbase_gpu_inspect(kbdev, js, 0);
/* Head atom may have just completed, so if it isn't running
* then try the next atom */
if (katom && katom->gpu_rb_state != KBASE_ATOM_GPU_RB_SUBMITTED)
katom = kbase_gpu_inspect(kbdev, js, 1);
if (katom && katom->gpu_rb_state ==
KBASE_ATOM_GPU_RB_SUBMITTED) {
if (katom->core_req & BASE_JD_REQ_ONLY_COMPUTE) {
int device_nr = (katom->core_req &
BASE_JD_REQ_SPECIFIC_COHERENT_GROUP)
? katom->device_nr : 0;
if (!WARN_ON(device_nr >= 2))
kbdev->pm.backend.metrics.
active_cl_ctx[device_nr] = 1;
} else {
/* Slot 2 should not be running non-compute
* atoms */
if (!WARN_ON(js >= 2))
kbdev->pm.backend.metrics.
active_gl_ctx[js] = 1;
}
kbdev->pm.backend.metrics.gpu_active = true;
}
}
}
/* called when job is submitted to or removed from a GPU slot */
void kbase_pm_metrics_update(struct kbase_device *kbdev, ktime_t *timestamp)
{
unsigned long flags;
ktime_t now;
lockdep_assert_held(&kbdev->hwaccess_lock);
spin_lock_irqsave(&kbdev->pm.backend.metrics.lock, flags);
if (!timestamp) {
now = ktime_get();
timestamp = &now;
}
/* Track how long CL and/or GL jobs have been busy for */
kbase_pm_get_dvfs_utilisation_calc(kbdev, *timestamp);
kbase_pm_metrics_active_calc(kbdev);
spin_unlock_irqrestore(&kbdev->pm.backend.metrics.lock, flags);
}

View File

@ -0,0 +1,969 @@
/*
*
* (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
* Foundation, and any use by you of this program is subject to the terms
* of such GNU licence.
*
* A copy of the licence is included with the program, and can also be obtained
* from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
* Boston, MA 02110-1301, USA.
*
*/
/*
* Power policy API implementations
*/
#include <mali_kbase.h>
#include <mali_midg_regmap.h>
#include <mali_kbase_pm.h>
#include <mali_kbase_config_defaults.h>
#include <backend/gpu/mali_kbase_pm_internal.h>
static const struct kbase_pm_policy *const policy_list[] = {
#ifdef CONFIG_MALI_NO_MALI
&kbase_pm_always_on_policy_ops,
&kbase_pm_demand_policy_ops,
&kbase_pm_coarse_demand_policy_ops,
#if !MALI_CUSTOMER_RELEASE
&kbase_pm_demand_always_powered_policy_ops,
&kbase_pm_fast_start_policy_ops,
#endif
#else /* CONFIG_MALI_NO_MALI */
&kbase_pm_demand_policy_ops,
&kbase_pm_always_on_policy_ops,
&kbase_pm_coarse_demand_policy_ops,
#if !MALI_CUSTOMER_RELEASE
&kbase_pm_demand_always_powered_policy_ops,
&kbase_pm_fast_start_policy_ops,
#endif
#endif /* CONFIG_MALI_NO_MALI */
};
/* The number of policies available in the system.
* This is derived from the number of functions listed in policy_get_functions.
*/
#define POLICY_COUNT (sizeof(policy_list)/sizeof(*policy_list))
/* Function IDs for looking up Timeline Trace codes in
* kbase_pm_change_state_trace_code */
enum kbase_pm_func_id {
KBASE_PM_FUNC_ID_REQUEST_CORES_START,
KBASE_PM_FUNC_ID_REQUEST_CORES_END,
KBASE_PM_FUNC_ID_RELEASE_CORES_START,
KBASE_PM_FUNC_ID_RELEASE_CORES_END,
/* Note: kbase_pm_unrequest_cores() is on the slow path, and we neither
* expect to hit it nor tend to hit it very much anyway. We can detect
* whether we need more instrumentation by a difference between
* PM_CHECKTRANS events and PM_SEND/HANDLE_EVENT. */
/* Must be the last */
KBASE_PM_FUNC_ID_COUNT
};
/* State changes during request/unrequest/release-ing cores */
enum {
KBASE_PM_CHANGE_STATE_SHADER = (1u << 0),
KBASE_PM_CHANGE_STATE_TILER = (1u << 1),
/* These two must be last */
KBASE_PM_CHANGE_STATE_MASK = (KBASE_PM_CHANGE_STATE_TILER |
KBASE_PM_CHANGE_STATE_SHADER),
KBASE_PM_CHANGE_STATE_COUNT = KBASE_PM_CHANGE_STATE_MASK + 1
};
typedef u32 kbase_pm_change_state;
#ifdef CONFIG_MALI_TRACE_TIMELINE
/* Timeline Trace code lookups for each function */
static u32 kbase_pm_change_state_trace_code[KBASE_PM_FUNC_ID_COUNT]
[KBASE_PM_CHANGE_STATE_COUNT] = {
/* kbase_pm_request_cores */
[KBASE_PM_FUNC_ID_REQUEST_CORES_START][0] = 0,
[KBASE_PM_FUNC_ID_REQUEST_CORES_START][KBASE_PM_CHANGE_STATE_SHADER] =
SW_FLOW_PM_CHECKTRANS_PM_REQUEST_CORES_SHADER_START,
[KBASE_PM_FUNC_ID_REQUEST_CORES_START][KBASE_PM_CHANGE_STATE_TILER] =
SW_FLOW_PM_CHECKTRANS_PM_REQUEST_CORES_TILER_START,
[KBASE_PM_FUNC_ID_REQUEST_CORES_START][KBASE_PM_CHANGE_STATE_SHADER |
KBASE_PM_CHANGE_STATE_TILER] =
SW_FLOW_PM_CHECKTRANS_PM_REQUEST_CORES_SHADER_TILER_START,
[KBASE_PM_FUNC_ID_REQUEST_CORES_END][0] = 0,
[KBASE_PM_FUNC_ID_REQUEST_CORES_END][KBASE_PM_CHANGE_STATE_SHADER] =
SW_FLOW_PM_CHECKTRANS_PM_REQUEST_CORES_SHADER_END,
[KBASE_PM_FUNC_ID_REQUEST_CORES_END][KBASE_PM_CHANGE_STATE_TILER] =
SW_FLOW_PM_CHECKTRANS_PM_REQUEST_CORES_TILER_END,
[KBASE_PM_FUNC_ID_REQUEST_CORES_END][KBASE_PM_CHANGE_STATE_SHADER |
KBASE_PM_CHANGE_STATE_TILER] =
SW_FLOW_PM_CHECKTRANS_PM_REQUEST_CORES_SHADER_TILER_END,
/* kbase_pm_release_cores */
[KBASE_PM_FUNC_ID_RELEASE_CORES_START][0] = 0,
[KBASE_PM_FUNC_ID_RELEASE_CORES_START][KBASE_PM_CHANGE_STATE_SHADER] =
SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_SHADER_START,
[KBASE_PM_FUNC_ID_RELEASE_CORES_START][KBASE_PM_CHANGE_STATE_TILER] =
SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_TILER_START,
[KBASE_PM_FUNC_ID_RELEASE_CORES_START][KBASE_PM_CHANGE_STATE_SHADER |
KBASE_PM_CHANGE_STATE_TILER] =
SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_SHADER_TILER_START,
[KBASE_PM_FUNC_ID_RELEASE_CORES_END][0] = 0,
[KBASE_PM_FUNC_ID_RELEASE_CORES_END][KBASE_PM_CHANGE_STATE_SHADER] =
SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_SHADER_END,
[KBASE_PM_FUNC_ID_RELEASE_CORES_END][KBASE_PM_CHANGE_STATE_TILER] =
SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_TILER_END,
[KBASE_PM_FUNC_ID_RELEASE_CORES_END][KBASE_PM_CHANGE_STATE_SHADER |
KBASE_PM_CHANGE_STATE_TILER] =
SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_SHADER_TILER_END
};
static inline void kbase_timeline_pm_cores_func(struct kbase_device *kbdev,
enum kbase_pm_func_id func_id,
kbase_pm_change_state state)
{
int trace_code;
KBASE_DEBUG_ASSERT(func_id >= 0 && func_id < KBASE_PM_FUNC_ID_COUNT);
KBASE_DEBUG_ASSERT(state != 0 && (state & KBASE_PM_CHANGE_STATE_MASK) ==
state);
trace_code = kbase_pm_change_state_trace_code[func_id][state];
KBASE_TIMELINE_PM_CHECKTRANS(kbdev, trace_code);
}
#else /* CONFIG_MALI_TRACE_TIMELINE */
static inline void kbase_timeline_pm_cores_func(struct kbase_device *kbdev,
enum kbase_pm_func_id func_id, kbase_pm_change_state state)
{
}
#endif /* CONFIG_MALI_TRACE_TIMELINE */
/**
* kbasep_pm_do_poweroff_cores - Process a poweroff request and power down any
* requested shader cores
* @kbdev: Device pointer
*/
static void kbasep_pm_do_poweroff_cores(struct kbase_device *kbdev)
{
u64 prev_shader_state = kbdev->pm.backend.desired_shader_state;
u64 prev_tiler_state = kbdev->pm.backend.desired_tiler_state;
lockdep_assert_held(&kbdev->hwaccess_lock);
kbdev->pm.backend.desired_shader_state &=
~kbdev->pm.backend.shader_poweroff_pending;
kbdev->pm.backend.desired_tiler_state &=
~kbdev->pm.backend.tiler_poweroff_pending;
kbdev->pm.backend.shader_poweroff_pending = 0;
kbdev->pm.backend.tiler_poweroff_pending = 0;
if (prev_shader_state != kbdev->pm.backend.desired_shader_state ||
prev_tiler_state !=
kbdev->pm.backend.desired_tiler_state ||
kbdev->pm.backend.ca_in_transition) {
bool cores_are_available;
KBASE_TIMELINE_PM_CHECKTRANS(kbdev,
SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_DEFERRED_START);
cores_are_available = kbase_pm_check_transitions_nolock(kbdev);
KBASE_TIMELINE_PM_CHECKTRANS(kbdev,
SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_DEFERRED_END);
/* Don't need 'cores_are_available',
* because we don't return anything */
CSTD_UNUSED(cores_are_available);
}
}
static enum hrtimer_restart
kbasep_pm_do_gpu_poweroff_callback(struct hrtimer *timer)
{
struct kbase_device *kbdev;
unsigned long flags;
kbdev = container_of(timer, struct kbase_device,
pm.backend.gpu_poweroff_timer);
spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
/* It is safe for this call to do nothing if the work item is already
* queued. The worker function will read the must up-to-date state of
* kbdev->pm.backend.gpu_poweroff_pending under lock.
*
* If a state change occurs while the worker function is processing,
* this call will succeed as a work item can be requeued once it has
* started processing.
*/
if (kbdev->pm.backend.gpu_poweroff_pending)
queue_work(kbdev->pm.backend.gpu_poweroff_wq,
&kbdev->pm.backend.gpu_poweroff_work);
if (kbdev->pm.backend.shader_poweroff_pending ||
kbdev->pm.backend.tiler_poweroff_pending) {
kbdev->pm.backend.shader_poweroff_pending_time--;
KBASE_DEBUG_ASSERT(
kbdev->pm.backend.shader_poweroff_pending_time
>= 0);
if (!kbdev->pm.backend.shader_poweroff_pending_time)
kbasep_pm_do_poweroff_cores(kbdev);
}
if (kbdev->pm.backend.poweroff_timer_needed) {
spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
hrtimer_add_expires(timer, kbdev->pm.gpu_poweroff_time);
return HRTIMER_RESTART;
}
kbdev->pm.backend.poweroff_timer_running = false;
spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
return HRTIMER_NORESTART;
}
static void kbasep_pm_do_gpu_poweroff_wq(struct work_struct *data)
{
unsigned long flags;
struct kbase_device *kbdev;
bool do_poweroff = false;
kbdev = container_of(data, struct kbase_device,
pm.backend.gpu_poweroff_work);
mutex_lock(&kbdev->pm.lock);
if (kbdev->pm.backend.gpu_poweroff_pending == 0) {
mutex_unlock(&kbdev->pm.lock);
return;
}
kbdev->pm.backend.gpu_poweroff_pending--;
if (kbdev->pm.backend.gpu_poweroff_pending > 0) {
mutex_unlock(&kbdev->pm.lock);
return;
}
KBASE_DEBUG_ASSERT(kbdev->pm.backend.gpu_poweroff_pending == 0);
spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
/* Only power off the GPU if a request is still pending */
if (!kbdev->pm.backend.pm_current_policy->get_core_active(kbdev))
do_poweroff = true;
spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
if (do_poweroff) {
kbdev->pm.backend.poweroff_timer_needed = false;
hrtimer_cancel(&kbdev->pm.backend.gpu_poweroff_timer);
kbdev->pm.backend.poweroff_timer_running = false;
/* Power off the GPU */
kbase_pm_do_poweroff(kbdev, false);
}
mutex_unlock(&kbdev->pm.lock);
}
int kbase_pm_policy_init(struct kbase_device *kbdev)
{
struct workqueue_struct *wq;
wq = alloc_workqueue("kbase_pm_do_poweroff",
WQ_HIGHPRI | WQ_UNBOUND, 1);
if (!wq)
return -ENOMEM;
kbdev->pm.backend.gpu_poweroff_wq = wq;
INIT_WORK(&kbdev->pm.backend.gpu_poweroff_work,
kbasep_pm_do_gpu_poweroff_wq);
hrtimer_init(&kbdev->pm.backend.gpu_poweroff_timer,
CLOCK_MONOTONIC, HRTIMER_MODE_REL);
kbdev->pm.backend.gpu_poweroff_timer.function =
kbasep_pm_do_gpu_poweroff_callback;
kbdev->pm.backend.pm_current_policy = policy_list[0];
kbdev->pm.backend.pm_current_policy->init(kbdev);
kbdev->pm.gpu_poweroff_time =
HR_TIMER_DELAY_NSEC(DEFAULT_PM_GPU_POWEROFF_TICK_NS);
kbdev->pm.poweroff_shader_ticks = DEFAULT_PM_POWEROFF_TICK_SHADER;
kbdev->pm.poweroff_gpu_ticks = DEFAULT_PM_POWEROFF_TICK_GPU;
return 0;
}
void kbase_pm_policy_term(struct kbase_device *kbdev)
{
kbdev->pm.backend.pm_current_policy->term(kbdev);
destroy_workqueue(kbdev->pm.backend.gpu_poweroff_wq);
}
void kbase_pm_cancel_deferred_poweroff(struct kbase_device *kbdev)
{
unsigned long flags;
lockdep_assert_held(&kbdev->pm.lock);
kbdev->pm.backend.poweroff_timer_needed = false;
hrtimer_cancel(&kbdev->pm.backend.gpu_poweroff_timer);
spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
kbdev->pm.backend.poweroff_timer_running = false;
/* If wq is already running but is held off by pm.lock, make sure it has
* no effect */
kbdev->pm.backend.gpu_poweroff_pending = 0;
kbdev->pm.backend.shader_poweroff_pending = 0;
kbdev->pm.backend.tiler_poweroff_pending = 0;
kbdev->pm.backend.shader_poweroff_pending_time = 0;
spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
}
void kbase_pm_update_active(struct kbase_device *kbdev)
{
struct kbase_pm_device_data *pm = &kbdev->pm;
struct kbase_pm_backend_data *backend = &pm->backend;
unsigned long flags;
bool active;
lockdep_assert_held(&pm->lock);
/* pm_current_policy will never be NULL while pm.lock is held */
KBASE_DEBUG_ASSERT(backend->pm_current_policy);
spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
active = backend->pm_current_policy->get_core_active(kbdev);
if (active) {
if (backend->gpu_poweroff_pending) {
/* Cancel any pending power off request */
backend->gpu_poweroff_pending = 0;
/* If a request was pending then the GPU was still
* powered, so no need to continue */
if (!kbdev->poweroff_pending) {
spin_unlock_irqrestore(&kbdev->hwaccess_lock,
flags);
return;
}
}
if (!backend->poweroff_timer_running && !backend->gpu_powered &&
(pm->poweroff_gpu_ticks ||
pm->poweroff_shader_ticks)) {
backend->poweroff_timer_needed = true;
backend->poweroff_timer_running = true;
hrtimer_start(&backend->gpu_poweroff_timer,
pm->gpu_poweroff_time,
HRTIMER_MODE_REL);
}
/* Power on the GPU and any cores requested by the policy */
if (pm->backend.poweroff_wait_in_progress) {
pm->backend.poweron_required = true;
spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
} else {
spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
kbase_pm_do_poweron(kbdev, false);
}
} else {
/* It is an error for the power policy to power off the GPU
* when there are contexts active */
KBASE_DEBUG_ASSERT(pm->active_count == 0);
if (backend->shader_poweroff_pending ||
backend->tiler_poweroff_pending) {
backend->shader_poweroff_pending = 0;
backend->tiler_poweroff_pending = 0;
backend->shader_poweroff_pending_time = 0;
}
/* Request power off */
if (pm->backend.gpu_powered) {
if (pm->poweroff_gpu_ticks) {
backend->gpu_poweroff_pending =
pm->poweroff_gpu_ticks;
backend->poweroff_timer_needed = true;
if (!backend->poweroff_timer_running) {
/* Start timer if not running (eg if
* power policy has been changed from
* always_on to something else). This
* will ensure the GPU is actually
* powered off */
backend->poweroff_timer_running
= true;
hrtimer_start(
&backend->gpu_poweroff_timer,
pm->gpu_poweroff_time,
HRTIMER_MODE_REL);
}
spin_unlock_irqrestore(&kbdev->hwaccess_lock,
flags);
} else {
spin_unlock_irqrestore(&kbdev->hwaccess_lock,
flags);
/* Power off the GPU immediately */
kbase_pm_do_poweroff(kbdev, false);
}
} else {
spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
}
}
}
void kbase_pm_update_cores_state_nolock(struct kbase_device *kbdev)
{
u64 desired_bitmap;
u64 desired_tiler_bitmap;
bool cores_are_available;
bool do_poweroff = false;
lockdep_assert_held(&kbdev->hwaccess_lock);
if (kbdev->pm.backend.pm_current_policy == NULL)
return;
if (kbdev->pm.backend.poweroff_wait_in_progress)
return;
if (kbdev->protected_mode_transition && !kbdev->shader_needed_bitmap &&
!kbdev->shader_inuse_bitmap && !kbdev->tiler_needed_cnt
&& !kbdev->tiler_inuse_cnt) {
/* We are trying to change in/out of protected mode - force all
* cores off so that the L2 powers down */
desired_bitmap = 0;
desired_tiler_bitmap = 0;
} else {
desired_bitmap =
kbdev->pm.backend.pm_current_policy->get_core_mask(kbdev);
desired_bitmap &= kbase_pm_ca_get_core_mask(kbdev);
if (kbdev->tiler_needed_cnt > 0 || kbdev->tiler_inuse_cnt > 0)
desired_tiler_bitmap = 1;
else
desired_tiler_bitmap = 0;
if (!kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_XAFFINITY)) {
/* Unless XAFFINITY is supported, enable core 0 if tiler
* required, regardless of core availability */
if (kbdev->tiler_needed_cnt > 0 ||
kbdev->tiler_inuse_cnt > 0)
desired_bitmap |= 1;
}
}
if (kbdev->pm.backend.desired_shader_state != desired_bitmap)
KBASE_TRACE_ADD(kbdev, PM_CORES_CHANGE_DESIRED, NULL, NULL, 0u,
(u32)desired_bitmap);
/* Are any cores being powered on? */
if (~kbdev->pm.backend.desired_shader_state & desired_bitmap ||
~kbdev->pm.backend.desired_tiler_state & desired_tiler_bitmap ||
kbdev->pm.backend.ca_in_transition) {
/* Check if we are powering off any cores before updating shader
* state */
if (kbdev->pm.backend.desired_shader_state & ~desired_bitmap ||
kbdev->pm.backend.desired_tiler_state &
~desired_tiler_bitmap) {
/* Start timer to power off cores */
kbdev->pm.backend.shader_poweroff_pending |=
(kbdev->pm.backend.desired_shader_state &
~desired_bitmap);
kbdev->pm.backend.tiler_poweroff_pending |=
(kbdev->pm.backend.desired_tiler_state &
~desired_tiler_bitmap);
if (kbdev->pm.poweroff_shader_ticks &&
!kbdev->protected_mode_transition)
kbdev->pm.backend.shader_poweroff_pending_time =
kbdev->pm.poweroff_shader_ticks;
else
do_poweroff = true;
}
kbdev->pm.backend.desired_shader_state = desired_bitmap;
kbdev->pm.backend.desired_tiler_state = desired_tiler_bitmap;
/* If any cores are being powered on, transition immediately */
cores_are_available = kbase_pm_check_transitions_nolock(kbdev);
} else if (kbdev->pm.backend.desired_shader_state & ~desired_bitmap ||
kbdev->pm.backend.desired_tiler_state &
~desired_tiler_bitmap) {
/* Start timer to power off cores */
kbdev->pm.backend.shader_poweroff_pending |=
(kbdev->pm.backend.desired_shader_state &
~desired_bitmap);
kbdev->pm.backend.tiler_poweroff_pending |=
(kbdev->pm.backend.desired_tiler_state &
~desired_tiler_bitmap);
if (kbdev->pm.poweroff_shader_ticks &&
!kbdev->protected_mode_transition)
kbdev->pm.backend.shader_poweroff_pending_time =
kbdev->pm.poweroff_shader_ticks;
else
kbasep_pm_do_poweroff_cores(kbdev);
} else if (kbdev->pm.active_count == 0 && desired_bitmap != 0 &&
desired_tiler_bitmap != 0 &&
kbdev->pm.backend.poweroff_timer_needed) {
/* If power policy is keeping cores on despite there being no
* active contexts then disable poweroff timer as it isn't
* required.
* Only reset poweroff_timer_needed if we're not in the middle
* of the power off callback */
kbdev->pm.backend.poweroff_timer_needed = false;
}
/* Ensure timer does not power off wanted cores and make sure to power
* off unwanted cores */
if (kbdev->pm.backend.shader_poweroff_pending ||
kbdev->pm.backend.tiler_poweroff_pending) {
kbdev->pm.backend.shader_poweroff_pending &=
~(kbdev->pm.backend.desired_shader_state &
desired_bitmap);
kbdev->pm.backend.tiler_poweroff_pending &=
~(kbdev->pm.backend.desired_tiler_state &
desired_tiler_bitmap);
if (!kbdev->pm.backend.shader_poweroff_pending &&
!kbdev->pm.backend.tiler_poweroff_pending)
kbdev->pm.backend.shader_poweroff_pending_time = 0;
}
/* Shader poweroff is deferred to the end of the function, to eliminate
* issues caused by the core availability policy recursing into this
* function */
if (do_poweroff)
kbasep_pm_do_poweroff_cores(kbdev);
/* Don't need 'cores_are_available', because we don't return anything */
CSTD_UNUSED(cores_are_available);
}
void kbase_pm_update_cores_state(struct kbase_device *kbdev)
{
unsigned long flags;
spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
kbase_pm_update_cores_state_nolock(kbdev);
spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
}
int kbase_pm_list_policies(const struct kbase_pm_policy * const **list)
{
if (!list)
return POLICY_COUNT;
*list = policy_list;
return POLICY_COUNT;
}
KBASE_EXPORT_TEST_API(kbase_pm_list_policies);
const struct kbase_pm_policy *kbase_pm_get_policy(struct kbase_device *kbdev)
{
KBASE_DEBUG_ASSERT(kbdev != NULL);
return kbdev->pm.backend.pm_current_policy;
}
KBASE_EXPORT_TEST_API(kbase_pm_get_policy);
void kbase_pm_set_policy(struct kbase_device *kbdev,
const struct kbase_pm_policy *new_policy)
{
struct kbasep_js_device_data *js_devdata = &kbdev->js_data;
const struct kbase_pm_policy *old_policy;
unsigned long flags;
KBASE_DEBUG_ASSERT(kbdev != NULL);
KBASE_DEBUG_ASSERT(new_policy != NULL);
KBASE_TRACE_ADD(kbdev, PM_SET_POLICY, NULL, NULL, 0u, new_policy->id);
/* During a policy change we pretend the GPU is active */
/* A suspend won't happen here, because we're in a syscall from a
* userspace thread */
kbase_pm_context_active(kbdev);
mutex_lock(&js_devdata->runpool_mutex);
mutex_lock(&kbdev->pm.lock);
/* Remove the policy to prevent IRQ handlers from working on it */
spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
old_policy = kbdev->pm.backend.pm_current_policy;
kbdev->pm.backend.pm_current_policy = NULL;
spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
KBASE_TRACE_ADD(kbdev, PM_CURRENT_POLICY_TERM, NULL, NULL, 0u,
old_policy->id);
if (old_policy->term)
old_policy->term(kbdev);
KBASE_TRACE_ADD(kbdev, PM_CURRENT_POLICY_INIT, NULL, NULL, 0u,
new_policy->id);
if (new_policy->init)
new_policy->init(kbdev);
spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
kbdev->pm.backend.pm_current_policy = new_policy;
spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
/* If any core power state changes were previously attempted, but
* couldn't be made because the policy was changing (current_policy was
* NULL), then re-try them here. */
kbase_pm_update_active(kbdev);
kbase_pm_update_cores_state(kbdev);
mutex_unlock(&kbdev->pm.lock);
mutex_unlock(&js_devdata->runpool_mutex);
/* Now the policy change is finished, we release our fake context active
* reference */
kbase_pm_context_idle(kbdev);
}
KBASE_EXPORT_TEST_API(kbase_pm_set_policy);
/* Check whether a state change has finished, and trace it as completed */
static void
kbase_pm_trace_check_and_finish_state_change(struct kbase_device *kbdev)
{
if ((kbdev->shader_available_bitmap &
kbdev->pm.backend.desired_shader_state)
== kbdev->pm.backend.desired_shader_state &&
(kbdev->tiler_available_bitmap &
kbdev->pm.backend.desired_tiler_state)
== kbdev->pm.backend.desired_tiler_state)
kbase_timeline_pm_check_handle_event(kbdev,
KBASE_TIMELINE_PM_EVENT_GPU_STATE_CHANGED);
}
void kbase_pm_request_cores(struct kbase_device *kbdev,
bool tiler_required, u64 shader_cores)
{
u64 cores;
kbase_pm_change_state change_gpu_state = 0u;
KBASE_DEBUG_ASSERT(kbdev != NULL);
lockdep_assert_held(&kbdev->hwaccess_lock);
cores = shader_cores;
while (cores) {
int bitnum = fls64(cores) - 1;
u64 bit = 1ULL << bitnum;
/* It should be almost impossible for this to overflow. It would
* require 2^32 atoms to request a particular core, which would
* require 2^24 contexts to submit. This would require an amount
* of memory that is impossible on a 32-bit system and extremely
* unlikely on a 64-bit system. */
int cnt = ++kbdev->shader_needed_cnt[bitnum];
if (1 == cnt) {
kbdev->shader_needed_bitmap |= bit;
change_gpu_state |= KBASE_PM_CHANGE_STATE_SHADER;
}
cores &= ~bit;
}
if (tiler_required) {
int cnt = ++kbdev->tiler_needed_cnt;
if (1 == cnt)
change_gpu_state |= KBASE_PM_CHANGE_STATE_TILER;
KBASE_DEBUG_ASSERT(kbdev->tiler_needed_cnt != 0);
}
if (change_gpu_state) {
KBASE_TRACE_ADD(kbdev, PM_REQUEST_CHANGE_SHADER_NEEDED, NULL,
NULL, 0u, (u32) kbdev->shader_needed_bitmap);
kbase_timeline_pm_cores_func(kbdev,
KBASE_PM_FUNC_ID_REQUEST_CORES_START,
change_gpu_state);
kbase_pm_update_cores_state_nolock(kbdev);
kbase_timeline_pm_cores_func(kbdev,
KBASE_PM_FUNC_ID_REQUEST_CORES_END,
change_gpu_state);
}
}
KBASE_EXPORT_TEST_API(kbase_pm_request_cores);
void kbase_pm_unrequest_cores(struct kbase_device *kbdev,
bool tiler_required, u64 shader_cores)
{
kbase_pm_change_state change_gpu_state = 0u;
KBASE_DEBUG_ASSERT(kbdev != NULL);
lockdep_assert_held(&kbdev->hwaccess_lock);
while (shader_cores) {
int bitnum = fls64(shader_cores) - 1;
u64 bit = 1ULL << bitnum;
int cnt;
KBASE_DEBUG_ASSERT(kbdev->shader_needed_cnt[bitnum] > 0);
cnt = --kbdev->shader_needed_cnt[bitnum];
if (0 == cnt) {
kbdev->shader_needed_bitmap &= ~bit;
change_gpu_state |= KBASE_PM_CHANGE_STATE_SHADER;
}
shader_cores &= ~bit;
}
if (tiler_required) {
int cnt;
KBASE_DEBUG_ASSERT(kbdev->tiler_needed_cnt > 0);
cnt = --kbdev->tiler_needed_cnt;
if (0 == cnt)
change_gpu_state |= KBASE_PM_CHANGE_STATE_TILER;
}
if (change_gpu_state) {
KBASE_TRACE_ADD(kbdev, PM_UNREQUEST_CHANGE_SHADER_NEEDED, NULL,
NULL, 0u, (u32) kbdev->shader_needed_bitmap);
kbase_pm_update_cores_state_nolock(kbdev);
/* Trace that any state change effectively completes immediately
* - no-one will wait on the state change */
kbase_pm_trace_check_and_finish_state_change(kbdev);
}
}
KBASE_EXPORT_TEST_API(kbase_pm_unrequest_cores);
enum kbase_pm_cores_ready
kbase_pm_register_inuse_cores(struct kbase_device *kbdev,
bool tiler_required, u64 shader_cores)
{
u64 prev_shader_needed; /* Just for tracing */
u64 prev_shader_inuse; /* Just for tracing */
lockdep_assert_held(&kbdev->hwaccess_lock);
prev_shader_needed = kbdev->shader_needed_bitmap;
prev_shader_inuse = kbdev->shader_inuse_bitmap;
/* If desired_shader_state does not contain the requested cores, then
* power management is not attempting to powering those cores (most
* likely due to core availability policy) and a new job affinity must
* be chosen */
if ((kbdev->pm.backend.desired_shader_state & shader_cores) !=
shader_cores) {
return (kbdev->pm.backend.poweroff_wait_in_progress ||
kbdev->pm.backend.pm_current_policy == NULL) ?
KBASE_CORES_NOT_READY : KBASE_NEW_AFFINITY;
}
if ((kbdev->shader_available_bitmap & shader_cores) != shader_cores ||
(tiler_required && !kbdev->tiler_available_bitmap)) {
/* Trace ongoing core transition */
kbase_timeline_pm_l2_transition_start(kbdev);
return KBASE_CORES_NOT_READY;
}
/* If we started to trace a state change, then trace it has being
* finished by now, at the very latest */
kbase_pm_trace_check_and_finish_state_change(kbdev);
/* Trace core transition done */
kbase_timeline_pm_l2_transition_done(kbdev);
while (shader_cores) {
int bitnum = fls64(shader_cores) - 1;
u64 bit = 1ULL << bitnum;
int cnt;
KBASE_DEBUG_ASSERT(kbdev->shader_needed_cnt[bitnum] > 0);
cnt = --kbdev->shader_needed_cnt[bitnum];
if (0 == cnt)
kbdev->shader_needed_bitmap &= ~bit;
/* shader_inuse_cnt should not overflow because there can only
* be a very limited number of jobs on the h/w at one time */
kbdev->shader_inuse_cnt[bitnum]++;
kbdev->shader_inuse_bitmap |= bit;
shader_cores &= ~bit;
}
if (tiler_required) {
KBASE_DEBUG_ASSERT(kbdev->tiler_needed_cnt > 0);
--kbdev->tiler_needed_cnt;
kbdev->tiler_inuse_cnt++;
KBASE_DEBUG_ASSERT(kbdev->tiler_inuse_cnt != 0);
}
if (prev_shader_needed != kbdev->shader_needed_bitmap)
KBASE_TRACE_ADD(kbdev, PM_REGISTER_CHANGE_SHADER_NEEDED, NULL,
NULL, 0u, (u32) kbdev->shader_needed_bitmap);
if (prev_shader_inuse != kbdev->shader_inuse_bitmap)
KBASE_TRACE_ADD(kbdev, PM_REGISTER_CHANGE_SHADER_INUSE, NULL,
NULL, 0u, (u32) kbdev->shader_inuse_bitmap);
return KBASE_CORES_READY;
}
KBASE_EXPORT_TEST_API(kbase_pm_register_inuse_cores);
void kbase_pm_release_cores(struct kbase_device *kbdev,
bool tiler_required, u64 shader_cores)
{
kbase_pm_change_state change_gpu_state = 0u;
KBASE_DEBUG_ASSERT(kbdev != NULL);
lockdep_assert_held(&kbdev->hwaccess_lock);
while (shader_cores) {
int bitnum = fls64(shader_cores) - 1;
u64 bit = 1ULL << bitnum;
int cnt;
KBASE_DEBUG_ASSERT(kbdev->shader_inuse_cnt[bitnum] > 0);
cnt = --kbdev->shader_inuse_cnt[bitnum];
if (0 == cnt) {
kbdev->shader_inuse_bitmap &= ~bit;
change_gpu_state |= KBASE_PM_CHANGE_STATE_SHADER;
}
shader_cores &= ~bit;
}
if (tiler_required) {
int cnt;
KBASE_DEBUG_ASSERT(kbdev->tiler_inuse_cnt > 0);
cnt = --kbdev->tiler_inuse_cnt;
if (0 == cnt)
change_gpu_state |= KBASE_PM_CHANGE_STATE_TILER;
}
if (change_gpu_state) {
KBASE_TRACE_ADD(kbdev, PM_RELEASE_CHANGE_SHADER_INUSE, NULL,
NULL, 0u, (u32) kbdev->shader_inuse_bitmap);
kbase_timeline_pm_cores_func(kbdev,
KBASE_PM_FUNC_ID_RELEASE_CORES_START,
change_gpu_state);
kbase_pm_update_cores_state_nolock(kbdev);
kbase_timeline_pm_cores_func(kbdev,
KBASE_PM_FUNC_ID_RELEASE_CORES_END,
change_gpu_state);
/* Trace that any state change completed immediately */
kbase_pm_trace_check_and_finish_state_change(kbdev);
}
}
KBASE_EXPORT_TEST_API(kbase_pm_release_cores);
void kbase_pm_request_cores_sync(struct kbase_device *kbdev,
bool tiler_required,
u64 shader_cores)
{
unsigned long flags;
kbase_pm_wait_for_poweroff_complete(kbdev);
spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
kbase_pm_request_cores(kbdev, tiler_required, shader_cores);
spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
kbase_pm_check_transitions_sync(kbdev);
}
KBASE_EXPORT_TEST_API(kbase_pm_request_cores_sync);
void kbase_pm_request_l2_caches(struct kbase_device *kbdev)
{
unsigned long flags;
u32 prior_l2_users_count;
spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
prior_l2_users_count = kbdev->l2_users_count++;
KBASE_DEBUG_ASSERT(kbdev->l2_users_count != 0);
/* if the GPU is reset while the l2 is on, l2 will be off but
* prior_l2_users_count will be > 0. l2_available_bitmap will have been
* set to 0 though by kbase_pm_init_hw */
if (!prior_l2_users_count || !kbdev->l2_available_bitmap)
kbase_pm_check_transitions_nolock(kbdev);
spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
wait_event(kbdev->pm.backend.l2_powered_wait,
kbdev->pm.backend.l2_powered == 1);
/* Trace that any state change completed immediately */
kbase_pm_trace_check_and_finish_state_change(kbdev);
}
KBASE_EXPORT_TEST_API(kbase_pm_request_l2_caches);
void kbase_pm_request_l2_caches_l2_is_on(struct kbase_device *kbdev)
{
lockdep_assert_held(&kbdev->hwaccess_lock);
kbdev->l2_users_count++;
}
KBASE_EXPORT_TEST_API(kbase_pm_request_l2_caches_l2_is_on);
void kbase_pm_release_l2_caches(struct kbase_device *kbdev)
{
lockdep_assert_held(&kbdev->hwaccess_lock);
KBASE_DEBUG_ASSERT(kbdev->l2_users_count > 0);
--kbdev->l2_users_count;
if (!kbdev->l2_users_count) {
kbase_pm_check_transitions_nolock(kbdev);
/* Trace that any state change completed immediately */
kbase_pm_trace_check_and_finish_state_change(kbdev);
}
}
KBASE_EXPORT_TEST_API(kbase_pm_release_l2_caches);

View File

@ -0,0 +1,227 @@
/*
*
* (C) COPYRIGHT 2010-2015 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
* Foundation, and any use by you of this program is subject to the terms
* of such GNU licence.
*
* A copy of the licence is included with the program, and can also be obtained
* from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
* Boston, MA 02110-1301, USA.
*
*/
/*
* Power policy API definitions
*/
#ifndef _KBASE_PM_POLICY_H_
#define _KBASE_PM_POLICY_H_
/**
* kbase_pm_policy_init - Initialize power policy framework
*
* @kbdev: The kbase device structure for the device (must be a valid pointer)
*
* Must be called before calling any other policy function
*
* Return: 0 if the power policy framework was successfully
* initialized, -errno otherwise.
*/
int kbase_pm_policy_init(struct kbase_device *kbdev);
/**
* kbase_pm_policy_term - Terminate power policy framework
*
* @kbdev: The kbase device structure for the device (must be a valid pointer)
*/
void kbase_pm_policy_term(struct kbase_device *kbdev);
/**
* kbase_pm_update_active - Update the active power state of the GPU
*
* @kbdev: The kbase device structure for the device (must be a valid pointer)
*
* Calls into the current power policy
*/
void kbase_pm_update_active(struct kbase_device *kbdev);
/**
* kbase_pm_update_cores - Update the desired core state of the GPU
*
* @kbdev: The kbase device structure for the device (must be a valid pointer)
*
* Calls into the current power policy
*/
void kbase_pm_update_cores(struct kbase_device *kbdev);
enum kbase_pm_cores_ready {
KBASE_CORES_NOT_READY = 0,
KBASE_NEW_AFFINITY = 1,
KBASE_CORES_READY = 2
};
/**
* kbase_pm_request_cores_sync - Synchronous variant of kbase_pm_request_cores()
*
* @kbdev: The kbase device structure for the device
* @tiler_required: true if the tiler is required, false otherwise
* @shader_cores: A bitmask of shader cores which are necessary for the job
*
* When this function returns, the @shader_cores will be in the READY state.
*
* This is safe variant of kbase_pm_check_transitions_sync(): it handles the
* work of ensuring the requested cores will remain powered until a matching
* call to kbase_pm_unrequest_cores()/kbase_pm_release_cores() (as appropriate)
* is made.
*/
void kbase_pm_request_cores_sync(struct kbase_device *kbdev,
bool tiler_required, u64 shader_cores);
/**
* kbase_pm_request_cores - Mark one or more cores as being required
* for jobs to be submitted
*
* @kbdev: The kbase device structure for the device
* @tiler_required: true if the tiler is required, false otherwise
* @shader_cores: A bitmask of shader cores which are necessary for the job
*
* This function is called by the job scheduler to mark one or more cores as
* being required to submit jobs that are ready to run.
*
* The cores requested are reference counted and a subsequent call to
* kbase_pm_register_inuse_cores() or kbase_pm_unrequest_cores() should be
* made to dereference the cores as being 'needed'.
*
* The active power policy will meet or exceed the requirements of the
* requested cores in the system. Any core transitions needed will be begun
* immediately, but they might not complete/the cores might not be available
* until a Power Management IRQ.
*
* Return: 0 if the cores were successfully requested, or -errno otherwise.
*/
void kbase_pm_request_cores(struct kbase_device *kbdev,
bool tiler_required, u64 shader_cores);
/**
* kbase_pm_unrequest_cores - Unmark one or more cores as being required for
* jobs to be submitted.
*
* @kbdev: The kbase device structure for the device
* @tiler_required: true if the tiler is required, false otherwise
* @shader_cores: A bitmask of shader cores (as given to
* kbase_pm_request_cores() )
*
* This function undoes the effect of kbase_pm_request_cores(). It should be
* used when a job is not going to be submitted to the hardware (e.g. the job is
* cancelled before it is enqueued).
*
* The active power policy will meet or exceed the requirements of the
* requested cores in the system. Any core transitions needed will be begun
* immediately, but they might not complete until a Power Management IRQ.
*
* The policy may use this as an indication that it can power down cores.
*/
void kbase_pm_unrequest_cores(struct kbase_device *kbdev,
bool tiler_required, u64 shader_cores);
/**
* kbase_pm_register_inuse_cores - Register a set of cores as in use by a job
*
* @kbdev: The kbase device structure for the device
* @tiler_required: true if the tiler is required, false otherwise
* @shader_cores: A bitmask of shader cores (as given to
* kbase_pm_request_cores() )
*
* This function should be called after kbase_pm_request_cores() when the job
* is about to be submitted to the hardware. It will check that the necessary
* cores are available and if so update the 'needed' and 'inuse' bitmasks to
* reflect that the job is now committed to being run.
*
* If the necessary cores are not currently available then the function will
* return %KBASE_CORES_NOT_READY and have no effect.
*
* Return: %KBASE_CORES_NOT_READY if the cores are not immediately ready,
*
* %KBASE_NEW_AFFINITY if the affinity requested is not allowed,
*
* %KBASE_CORES_READY if the cores requested are already available
*/
enum kbase_pm_cores_ready kbase_pm_register_inuse_cores(
struct kbase_device *kbdev,
bool tiler_required,
u64 shader_cores);
/**
* kbase_pm_release_cores - Release cores after a job has run
*
* @kbdev: The kbase device structure for the device
* @tiler_required: true if the tiler is required, false otherwise
* @shader_cores: A bitmask of shader cores (as given to
* kbase_pm_register_inuse_cores() )
*
* This function should be called when a job has finished running on the
* hardware. A call to kbase_pm_register_inuse_cores() must have previously
* occurred. The reference counts of the specified cores will be decremented
* which may cause the bitmask of 'inuse' cores to be reduced. The power policy
* may then turn off any cores which are no longer 'inuse'.
*/
void kbase_pm_release_cores(struct kbase_device *kbdev,
bool tiler_required, u64 shader_cores);
/**
* kbase_pm_request_l2_caches - Request l2 caches
*
* @kbdev: The kbase device structure for the device (must be a valid pointer)
*
* Request the use of l2 caches for all core groups, power up, wait and prevent
* the power manager from powering down the l2 caches.
*
* This tells the power management that the caches should be powered up, and
* they should remain powered, irrespective of the usage of shader cores. This
* does not return until the l2 caches are powered up.
*
* The caller must call kbase_pm_release_l2_caches() when they are finished
* to allow normal power management of the l2 caches to resume.
*
* This should only be used when power management is active.
*/
void kbase_pm_request_l2_caches(struct kbase_device *kbdev);
/**
* kbase_pm_request_l2_caches_l2_is_on - Request l2 caches but don't power on
*
* @kbdev: The kbase device structure for the device (must be a valid pointer)
*
* Increment the count of l2 users but do not attempt to power on the l2
*
* It is the callers responsibility to ensure that the l2 is already powered up
* and to eventually call kbase_pm_release_l2_caches()
*/
void kbase_pm_request_l2_caches_l2_is_on(struct kbase_device *kbdev);
/**
* kbase_pm_request_l2_caches - Release l2 caches
*
* @kbdev: The kbase device structure for the device (must be a valid pointer)
*
* Release the use of l2 caches for all core groups and allow the power manager
* to power them down when necessary.
*
* This tells the power management that the caches can be powered down if
* necessary, with respect to the usage of shader cores.
*
* The caller must have called kbase_pm_request_l2_caches() prior to a call
* to this.
*
* This should only be used when power management is active.
*/
void kbase_pm_release_l2_caches(struct kbase_device *kbdev);
#endif /* _KBASE_PM_POLICY_H_ */

View File

@ -0,0 +1,171 @@
/*
*
* (C) COPYRIGHT 2011-2016 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
* Foundation, and any use by you of this program is subject to the terms
* of such GNU licence.
*
* A copy of the licence is included with the program, and can also be obtained
* from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
* Boston, MA 02110-1301, USA.
*
*/
#include <linux/devfreq_cooling.h>
#include <linux/thermal.h>
#include <linux/of.h>
#include <mali_kbase.h>
#include <mali_kbase_defs.h>
#include <backend/gpu/mali_kbase_power_model_simple.h>
/*
* This model is primarily designed for the Juno platform. It may not be
* suitable for other platforms.
*/
#define FALLBACK_STATIC_TEMPERATURE 55000
static u32 dynamic_coefficient;
static u32 static_coefficient;
static s32 ts[4];
static struct thermal_zone_device *gpu_tz;
static unsigned long model_static_power(struct devfreq *devfreq,
unsigned long voltage)
{
#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 3, 0)
unsigned long temperature;
#else
int temperature;
#endif
unsigned long temp;
unsigned long temp_squared, temp_cubed, temp_scaling_factor;
const unsigned long voltage_cubed = (voltage * voltage * voltage) >> 10;
if (!IS_ERR_OR_NULL(gpu_tz) && gpu_tz->ops->get_temp) {
int ret;
ret = gpu_tz->ops->get_temp(gpu_tz, &temperature);
if (ret) {
pr_warn_ratelimited("Error reading temperature for gpu thermal zone: %d\n",
ret);
temperature = FALLBACK_STATIC_TEMPERATURE;
}
} else {
temperature = FALLBACK_STATIC_TEMPERATURE;
}
/* Calculate the temperature scaling factor. To be applied to the
* voltage scaled power.
*/
temp = temperature / 1000;
temp_squared = temp * temp;
temp_cubed = temp_squared * temp;
temp_scaling_factor =
(ts[3] * temp_cubed)
+ (ts[2] * temp_squared)
+ (ts[1] * temp)
+ ts[0];
return (((static_coefficient * voltage_cubed) >> 20)
* temp_scaling_factor)
/ 1000000;
}
static unsigned long model_dynamic_power(struct devfreq *devfreq,
unsigned long freq,
unsigned long voltage)
{
/* The inputs: freq (f) is in Hz, and voltage (v) in mV.
* The coefficient (c) is in mW/(MHz mV mV).
*
* This function calculates the dynamic power after this formula:
* Pdyn (mW) = c (mW/(MHz*mV*mV)) * v (mV) * v (mV) * f (MHz)
*/
const unsigned long v2 = (voltage * voltage) / 1000; /* m*(V*V) */
const unsigned long f_mhz = freq / 1000000; /* MHz */
return (dynamic_coefficient * v2 * f_mhz) / 1000000; /* mW */
}
#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 4, 0)
struct devfreq_cooling_ops power_model_simple_ops = {
#else
struct devfreq_cooling_power power_model_simple_ops = {
#endif
.get_static_power = model_static_power,
.get_dynamic_power = model_dynamic_power,
};
int kbase_power_model_simple_init(struct kbase_device *kbdev)
{
struct device_node *power_model_node;
const char *tz_name;
u32 static_power, dynamic_power;
u32 voltage, voltage_squared, voltage_cubed, frequency;
power_model_node = of_get_child_by_name(kbdev->dev->of_node,
"power_model");
if (!power_model_node) {
dev_err(kbdev->dev, "could not find power_model node\n");
return -ENODEV;
}
if (!of_device_is_compatible(power_model_node,
"arm,mali-simple-power-model")) {
dev_err(kbdev->dev, "power_model incompatible with simple power model\n");
return -ENODEV;
}
if (of_property_read_string(power_model_node, "thermal-zone",
&tz_name)) {
dev_err(kbdev->dev, "ts in power_model not available\n");
return -EINVAL;
}
gpu_tz = thermal_zone_get_zone_by_name(tz_name);
if (IS_ERR(gpu_tz)) {
pr_warn_ratelimited("Error getting gpu thermal zone (%ld), not yet ready?\n",
PTR_ERR(gpu_tz));
gpu_tz = NULL;
return -EPROBE_DEFER;
}
if (of_property_read_u32(power_model_node, "static-power",
&static_power)) {
dev_err(kbdev->dev, "static-power in power_model not available\n");
return -EINVAL;
}
if (of_property_read_u32(power_model_node, "dynamic-power",
&dynamic_power)) {
dev_err(kbdev->dev, "dynamic-power in power_model not available\n");
return -EINVAL;
}
if (of_property_read_u32(power_model_node, "voltage",
&voltage)) {
dev_err(kbdev->dev, "voltage in power_model not available\n");
return -EINVAL;
}
if (of_property_read_u32(power_model_node, "frequency",
&frequency)) {
dev_err(kbdev->dev, "frequency in power_model not available\n");
return -EINVAL;
}
voltage_squared = (voltage * voltage) / 1000;
voltage_cubed = voltage * voltage * voltage;
static_coefficient = (static_power << 20) / (voltage_cubed >> 10);
dynamic_coefficient = (((dynamic_power * 1000) / voltage_squared)
* 1000) / frequency;
if (of_property_read_u32_array(power_model_node, "ts", (u32 *)ts, 4)) {
dev_err(kbdev->dev, "ts in power_model not available\n");
return -EINVAL;
}
return 0;
}

View File

@ -0,0 +1,47 @@
/*
*
* (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
* Foundation, and any use by you of this program is subject to the terms
* of such GNU licence.
*
* A copy of the licence is included with the program, and can also be obtained
* from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
* Boston, MA 02110-1301, USA.
*
*/
#ifndef _BASE_POWER_MODEL_SIMPLE_H_
#define _BASE_POWER_MODEL_SIMPLE_H_
/**
* kbase_power_model_simple_init - Initialise the simple power model
* @kbdev: Device pointer
*
* The simple power model estimates power based on current voltage, temperature,
* and coefficients read from device tree. It does not take utilization into
* account.
*
* The power model requires coefficients from the power_model node in device
* tree. The absence of this node will prevent the model from functioning, but
* should not prevent the rest of the driver from running.
*
* Return: 0 on success
* -ENOSYS if the power_model node is not present in device tree
* -EPROBE_DEFER if the thermal zone specified in device tree is not
* currently available
* Any other negative value on failure
*/
int kbase_power_model_simple_init(struct kbase_device *kbdev);
#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 4, 0)
extern struct devfreq_cooling_ops power_model_simple_ops;
#else
extern struct devfreq_cooling_power power_model_simple_ops;
#endif
#endif /* _BASE_POWER_MODEL_SIMPLE_H_ */

View File

@ -0,0 +1,103 @@
/*
*
* (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
* Foundation, and any use by you of this program is subject to the terms
* of such GNU licence.
*
* A copy of the licence is included with the program, and can also be obtained
* from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
* Boston, MA 02110-1301, USA.
*
*/
#include <mali_kbase.h>
#include <mali_kbase_hwaccess_time.h>
#include <backend/gpu/mali_kbase_device_internal.h>
#include <backend/gpu/mali_kbase_pm_internal.h>
void kbase_backend_get_gpu_time(struct kbase_device *kbdev, u64 *cycle_counter,
u64 *system_time, struct timespec *ts)
{
u32 hi1, hi2;
kbase_pm_request_gpu_cycle_counter(kbdev);
/* Read hi, lo, hi to ensure that overflow from lo to hi is handled
* correctly */
do {
hi1 = kbase_reg_read(kbdev, GPU_CONTROL_REG(CYCLE_COUNT_HI),
NULL);
*cycle_counter = kbase_reg_read(kbdev,
GPU_CONTROL_REG(CYCLE_COUNT_LO), NULL);
hi2 = kbase_reg_read(kbdev, GPU_CONTROL_REG(CYCLE_COUNT_HI),
NULL);
*cycle_counter |= (((u64) hi1) << 32);
} while (hi1 != hi2);
/* Read hi, lo, hi to ensure that overflow from lo to hi is handled
* correctly */
do {
hi1 = kbase_reg_read(kbdev, GPU_CONTROL_REG(TIMESTAMP_HI),
NULL);
*system_time = kbase_reg_read(kbdev,
GPU_CONTROL_REG(TIMESTAMP_LO), NULL);
hi2 = kbase_reg_read(kbdev, GPU_CONTROL_REG(TIMESTAMP_HI),
NULL);
*system_time |= (((u64) hi1) << 32);
} while (hi1 != hi2);
/* Record the CPU's idea of current time */
getrawmonotonic(ts);
kbase_pm_release_gpu_cycle_counter(kbdev);
}
/**
* kbase_wait_write_flush - Wait for GPU write flush
* @kctx: Context pointer
*
* Wait 1000 GPU clock cycles. This delay is known to give the GPU time to flush
* its write buffer.
*
* Only in use for BASE_HW_ISSUE_6367
*
* Note : If GPU resets occur then the counters are reset to zero, the delay may
* not be as expected.
*/
#ifndef CONFIG_MALI_NO_MALI
void kbase_wait_write_flush(struct kbase_context *kctx)
{
u32 base_count = 0;
/*
* The caller must be holding onto the kctx or the call is from
* userspace.
*/
kbase_pm_context_active(kctx->kbdev);
kbase_pm_request_gpu_cycle_counter(kctx->kbdev);
while (true) {
u32 new_count;
new_count = kbase_reg_read(kctx->kbdev,
GPU_CONTROL_REG(CYCLE_COUNT_LO), NULL);
/* First time around, just store the count. */
if (base_count == 0) {
base_count = new_count;
continue;
}
/* No need to handle wrapping, unsigned maths works for this. */
if ((new_count - base_count) > 1000)
break;
}
kbase_pm_release_gpu_cycle_counter(kctx->kbdev);
kbase_pm_context_idle(kctx->kbdev);
}
#endif /* CONFIG_MALI_NO_MALI */

View File

@ -0,0 +1,52 @@
/*
*
* (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
* Foundation, and any use by you of this program is subject to the terms
* of such GNU licence.
*
* A copy of the licence is included with the program, and can also be obtained
* from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
* Boston, MA 02110-1301, USA.
*
*/
#ifndef _KBASE_BACKEND_TIME_H_
#define _KBASE_BACKEND_TIME_H_
/**
* kbase_backend_get_gpu_time() - Get current GPU time
* @kbdev: Device pointer
* @cycle_counter: Pointer to u64 to store cycle counter in
* @system_time: Pointer to u64 to store system time in
* @ts: Pointer to struct timespec to store current monotonic
* time in
*/
void kbase_backend_get_gpu_time(struct kbase_device *kbdev, u64 *cycle_counter,
u64 *system_time, struct timespec *ts);
/**
* kbase_wait_write_flush() - Wait for GPU write flush
* @kctx: Context pointer
*
* Wait 1000 GPU clock cycles. This delay is known to give the GPU time to flush
* its write buffer.
*
* If GPU resets occur then the counters are reset to zero, the delay may not be
* as expected.
*
* This function is only in use for BASE_HW_ISSUE_6367
*/
#ifdef CONFIG_MALI_NO_MALI
static inline void kbase_wait_write_flush(struct kbase_context *kctx)
{
}
#else
void kbase_wait_write_flush(struct kbase_context *kctx);
#endif
#endif /* _KBASE_BACKEND_TIME_H_ */

View File

@ -0,0 +1,126 @@
#
# (C) COPYRIGHT 2011-2013, 2015 ARM Limited. All rights reserved.
#
# This program is free software and is provided to you under the terms of the
# GNU General Public License version 2 as published by the Free Software
# Foundation, and any use by you of this program is subject to the terms
# of such GNU licence.
#
# A copy of the licence is included with the program, and can also be obtained
# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
# Boston, MA 02110-1301, USA.
#
#
##############################################################################
# This file contains per-module Doxygen configuration. Please do not add
# extra settings to this file without consulting all stakeholders, as they
# may cause override project-wide settings.
#
# Additionally, when defining aliases, macros, sections etc, use the module
# name as a prefix e.g. gles_my_alias.
##############################################################################
@INCLUDE = ../../bldsys/Doxyfile_common
# The INPUT tag can be used to specify the files and/or directories that contain
# documented source files. You may enter file names like "myfile.cpp" or
# directories like "/usr/src/myproject". Separate the files or directories
# with spaces.
INPUT += ../../kernel/drivers/gpu/arm/midgard/
##############################################################################
# Everything below here is optional, and in most cases not required
##############################################################################
# This tag can be used to specify a number of aliases that acts
# as commands in the documentation. An alias has the form "name=value".
# For example adding "sideeffect=\par Side Effects:\n" will allow you to
# put the command \sideeffect (or @sideeffect) in the documentation, which
# will result in a user-defined paragraph with heading "Side Effects:".
# You can put \n's in the value part of an alias to insert newlines.
ALIASES +=
# The ENABLED_SECTIONS tag can be used to enable conditional
# documentation sections, marked by \if sectionname ... \endif.
ENABLED_SECTIONS +=
# If the value of the INPUT tag contains directories, you can use the
# FILE_PATTERNS tag to specify one or more wildcard pattern (like *.cpp
# and *.h) to filter out the source-files in the directories. If left
# blank the following patterns are tested:
# *.c *.cc *.cxx *.cpp *.c++ *.java *.ii *.ixx *.ipp *.i++ *.inl *.h *.hh *.hxx
# *.hpp *.h++ *.idl *.odl *.cs *.php *.php3 *.inc *.m *.mm *.py *.f90
FILE_PATTERNS +=
# The EXCLUDE tag can be used to specify files and/or directories that should
# excluded from the INPUT source files. This way you can easily exclude a
# subdirectory from a directory tree whose root is specified with the INPUT tag.
EXCLUDE += ../../kernel/drivers/gpu/arm/midgard/platform ../../kernel/drivers/gpu/arm/midgard/platform_dummy ../../kernel/drivers/gpu/arm/midgard/scripts ../../kernel/drivers/gpu/arm/midgard/tests ../../kernel/drivers/gpu/arm/midgard/Makefile ../../kernel/drivers/gpu/arm/midgard/Makefile.kbase ../../kernel/drivers/gpu/arm/midgard/Kbuild ../../kernel/drivers/gpu/arm/midgard/Kconfig ../../kernel/drivers/gpu/arm/midgard/sconscript ../../kernel/drivers/gpu/arm/midgard/docs ../../kernel/drivers/gpu/arm/midgard/pm_test_script.sh ../../kernel/drivers/gpu/arm/midgard/mali_uk.h ../../kernel/drivers/gpu/arm/midgard/Makefile
# If the value of the INPUT tag contains directories, you can use the
# EXCLUDE_PATTERNS tag to specify one or more wildcard patterns to exclude
# certain files from those directories. Note that the wildcards are matched
# against the file with absolute path, so to exclude all test directories
# for example use the pattern */test/*
EXCLUDE_PATTERNS +=
# The EXCLUDE_SYMBOLS tag can be used to specify one or more symbol names
# (namespaces, classes, functions, etc.) that should be excluded from the
# output. The symbol name can be a fully qualified name, a word, or if the
# wildcard * is used, a substring. Examples: ANamespace, AClass,
# AClass::ANamespace, ANamespace::*Test
EXCLUDE_SYMBOLS +=
# The EXAMPLE_PATH tag can be used to specify one or more files or
# directories that contain example code fragments that are included (see
# the \include command).
EXAMPLE_PATH +=
# The IMAGE_PATH tag can be used to specify one or more files or
# directories that contain image that are included in the documentation (see
# the \image command).
IMAGE_PATH +=
# The INCLUDE_PATH tag can be used to specify one or more directories that
# contain include files that are not input files but should be processed by
# the preprocessor.
INCLUDE_PATH +=
# The PREDEFINED tag can be used to specify one or more macro names that
# are defined before the preprocessor is started (similar to the -D option of
# gcc). The argument of the tag is a list of macros of the form: name
# or name=definition (no spaces). If the definition and the = are
# omitted =1 is assumed. To prevent a macro definition from being
# undefined via #undef or recursively expanded use the := operator
# instead of the = operator.
PREDEFINED +=
# If the MACRO_EXPANSION and EXPAND_ONLY_PREDEF tags are set to YES then
# this tag can be used to specify a list of macro names that should be expanded.
# The macro definition that is found in the sources will be used.
# Use the PREDEFINED tag if you want to use a different macro definition.
EXPAND_AS_DEFINED +=
# The DOTFILE_DIRS tag can be used to specify one or more directories that
# contain dot files that are included in the documentation (see the
# \dotfile command).
DOTFILE_DIRS += ../../kernel/drivers/gpu/arm/midgard/docs

View File

@ -0,0 +1,112 @@
/*
*
* (C) COPYRIGHT 2010 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
* Foundation, and any use by you of this program is subject to the terms
* of such GNU licence.
*
* A copy of the licence is included with the program, and can also be obtained
* from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
* Boston, MA 02110-1301, USA.
*
*/
digraph policy_objects_diagram {
rankdir=LR;
size="12,8";
compound=true;
node [ shape = box ];
subgraph cluster_policy_queues {
low_queue [ shape=record label = "LowP | {<ql>ctx_lo | ... | <qm>ctx_i | ... | <qr>ctx_hi}" ];
queues_middle_sep [ label="" shape=plaintext width=0 height=0 ];
rt_queue [ shape=record label = "RT | {<ql>ctx_lo | ... | <qm>ctx_j | ... | <qr>ctx_hi}" ];
label = "Policy's Queue(s)";
}
call_enqueue [ shape=plaintext label="enqueue_ctx()" ];
{
rank=same;
ordering=out;
call_dequeue [ shape=plaintext label="dequeue_head_ctx()\n+ runpool_add_ctx()" ];
call_ctxfinish [ shape=plaintext label="runpool_remove_ctx()" ];
call_ctxdone [ shape=plaintext label="don't requeue;\n/* ctx has no more jobs */" ];
}
subgraph cluster_runpool {
as0 [ width=2 height = 0.25 label="AS0: Job_1, ..., Job_n" ];
as1 [ width=2 height = 0.25 label="AS1: Job_1, ..., Job_m" ];
as2 [ width=2 height = 0.25 label="AS2: Job_1, ..., Job_p" ];
as3 [ width=2 height = 0.25 label="AS3: Job_1, ..., Job_q" ];
label = "Policy's Run Pool";
}
{
rank=same;
call_jdequeue [ shape=plaintext label="dequeue_job()" ];
sstop_dotfixup [ shape=plaintext label="" width=0 height=0 ];
}
{
rank=same;
ordering=out;
sstop [ shape=ellipse label="SS-Timer expires" ]
jobslots [ shape=record label="Jobslots: | <0>js[0] | <1>js[1] | <2>js[2]" ];
irq [ label="IRQ" shape=ellipse ];
job_finish [ shape=plaintext label="don't requeue;\n/* job done */" ];
}
hstop [ shape=ellipse label="HS-Timer expires" ]
/*
* Edges
*/
call_enqueue -> queues_middle_sep [ lhead=cluster_policy_queues ];
low_queue:qr -> call_dequeue:w;
rt_queue:qr -> call_dequeue:w;
call_dequeue -> as1 [lhead=cluster_runpool];
as1->call_jdequeue [ltail=cluster_runpool];
call_jdequeue->jobslots:0;
call_jdequeue->sstop_dotfixup [ arrowhead=none];
sstop_dotfixup->sstop [label="Spawn SS-Timer"];
sstop->jobslots [label="SoftStop"];
sstop->hstop [label="Spawn HS-Timer"];
hstop->jobslots:ne [label="HardStop"];
as3->call_ctxfinish:ne [ ltail=cluster_runpool ];
call_ctxfinish:sw->rt_queue:qm [ lhead=cluster_policy_queues label="enqueue_ctx()\n/* ctx still has jobs */" ];
call_ctxfinish->call_ctxdone [constraint=false];
call_ctxdone->call_enqueue [weight=0.1 labeldistance=20.0 labelangle=0.0 taillabel="Job submitted to the ctx" style=dotted constraint=false];
{
jobslots->irq [constraint=false];
irq->job_finish [constraint=false];
}
irq->as2 [lhead=cluster_runpool label="requeue_job()\n/* timeslice expired */" ];
}

View File

@ -0,0 +1,63 @@
/*
*
* (C) COPYRIGHT 2010 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
* Foundation, and any use by you of this program is subject to the terms
* of such GNU licence.
*
* A copy of the licence is included with the program, and can also be obtained
* from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
* Boston, MA 02110-1301, USA.
*
*/
digraph policy_objects_diagram {
rankdir=LR
size="6,6"
compound=true;
node [ shape = box ];
call_enqueue [ shape=plaintext label="enqueue ctx" ];
policy_queue [ label="Policy's Queue" ];
{
rank=same;
runpool [ label="Policy's Run Pool" ];
ctx_finish [ label="ctx finished" ];
}
{
rank=same;
jobslots [ shape=record label="Jobslots: | <0>js[0] | <1>js[1] | <2>js[2]" ];
job_finish [ label="Job finished" ];
}
/*
* Edges
*/
call_enqueue -> policy_queue;
policy_queue->runpool [label="dequeue ctx" weight=0.1];
runpool->policy_queue [label="requeue ctx" weight=0.1];
runpool->ctx_finish [ style=dotted ];
runpool->jobslots [label="dequeue job" weight=0.1];
jobslots->runpool [label="requeue job" weight=0.1];
jobslots->job_finish [ style=dotted ];
}

View File

@ -0,0 +1,223 @@
/*
*
* (C) COPYRIGHT 2015-2016 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
* Foundation, and any use by you of this program is subject to the terms
* of such GNU licence.
*
* A copy of the licence is included with the program, and can also be obtained
* from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
* Boston, MA 02110-1301, USA.
*
*/
/* AUTOMATICALLY GENERATED FILE. If you want to amend the issues/features,
* please update base/tools/hwconfig_generator/hwc_{issues,features}.py
* For more information see base/tools/hwconfig_generator/README
*/
#ifndef _BASE_HWCONFIG_FEATURES_H_
#define _BASE_HWCONFIG_FEATURES_H_
enum base_hw_feature {
BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION,
BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS,
BASE_HW_FEATURE_33BIT_VA,
BASE_HW_FEATURE_XAFFINITY,
BASE_HW_FEATURE_OUT_OF_ORDER_EXEC,
BASE_HW_FEATURE_MRT,
BASE_HW_FEATURE_BRNDOUT_CC,
BASE_HW_FEATURE_INTERPIPE_REG_ALIASING,
BASE_HW_FEATURE_LD_ST_TILEBUFFER,
BASE_HW_FEATURE_MSAA_16X,
BASE_HW_FEATURE_32_BIT_UNIFORM_ADDRESS,
BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL,
BASE_HW_FEATURE_OPTIMIZED_COVERAGE_MASK,
BASE_HW_FEATURE_T7XX_PAIRING_RULES,
BASE_HW_FEATURE_LD_ST_LEA_TEX,
BASE_HW_FEATURE_LINEAR_FILTER_FLOAT,
BASE_HW_FEATURE_WORKGROUP_ROUND_MULTIPLE_OF_4,
BASE_HW_FEATURE_IMAGES_IN_FRAGMENT_SHADERS,
BASE_HW_FEATURE_TEST4_DATUM_MODE,
BASE_HW_FEATURE_NEXT_INSTRUCTION_TYPE,
BASE_HW_FEATURE_BRNDOUT_KILL,
BASE_HW_FEATURE_WARPING,
BASE_HW_FEATURE_V4,
BASE_HW_FEATURE_FLUSH_REDUCTION,
BASE_HW_FEATURE_PROTECTED_MODE,
BASE_HW_FEATURE_COHERENCY_REG,
BASE_HW_FEATURE_PROTECTED_DEBUG_MODE,
BASE_HW_FEATURE_END
};
static const enum base_hw_feature base_hw_features_generic[] = {
BASE_HW_FEATURE_END
};
static const enum base_hw_feature base_hw_features_t60x[] = {
BASE_HW_FEATURE_LD_ST_LEA_TEX,
BASE_HW_FEATURE_LINEAR_FILTER_FLOAT,
BASE_HW_FEATURE_V4,
BASE_HW_FEATURE_END
};
static const enum base_hw_feature base_hw_features_t62x[] = {
BASE_HW_FEATURE_LD_ST_LEA_TEX,
BASE_HW_FEATURE_LINEAR_FILTER_FLOAT,
BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL,
BASE_HW_FEATURE_V4,
BASE_HW_FEATURE_END
};
static const enum base_hw_feature base_hw_features_t72x[] = {
BASE_HW_FEATURE_33BIT_VA,
BASE_HW_FEATURE_32_BIT_UNIFORM_ADDRESS,
BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL,
BASE_HW_FEATURE_INTERPIPE_REG_ALIASING,
BASE_HW_FEATURE_OPTIMIZED_COVERAGE_MASK,
BASE_HW_FEATURE_T7XX_PAIRING_RULES,
BASE_HW_FEATURE_WORKGROUP_ROUND_MULTIPLE_OF_4,
BASE_HW_FEATURE_WARPING,
BASE_HW_FEATURE_V4,
BASE_HW_FEATURE_END
};
static const enum base_hw_feature base_hw_features_t76x[] = {
BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION,
BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS,
BASE_HW_FEATURE_XAFFINITY,
BASE_HW_FEATURE_32_BIT_UNIFORM_ADDRESS,
BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL,
BASE_HW_FEATURE_BRNDOUT_CC,
BASE_HW_FEATURE_LD_ST_LEA_TEX,
BASE_HW_FEATURE_LD_ST_TILEBUFFER,
BASE_HW_FEATURE_LINEAR_FILTER_FLOAT,
BASE_HW_FEATURE_MRT,
BASE_HW_FEATURE_MSAA_16X,
BASE_HW_FEATURE_OUT_OF_ORDER_EXEC,
BASE_HW_FEATURE_T7XX_PAIRING_RULES,
BASE_HW_FEATURE_TEST4_DATUM_MODE,
BASE_HW_FEATURE_END
};
static const enum base_hw_feature base_hw_features_tFxx[] = {
BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION,
BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS,
BASE_HW_FEATURE_XAFFINITY,
BASE_HW_FEATURE_32_BIT_UNIFORM_ADDRESS,
BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL,
BASE_HW_FEATURE_BRNDOUT_CC,
BASE_HW_FEATURE_BRNDOUT_KILL,
BASE_HW_FEATURE_LD_ST_LEA_TEX,
BASE_HW_FEATURE_LD_ST_TILEBUFFER,
BASE_HW_FEATURE_LINEAR_FILTER_FLOAT,
BASE_HW_FEATURE_MRT,
BASE_HW_FEATURE_MSAA_16X,
BASE_HW_FEATURE_NEXT_INSTRUCTION_TYPE,
BASE_HW_FEATURE_OUT_OF_ORDER_EXEC,
BASE_HW_FEATURE_T7XX_PAIRING_RULES,
BASE_HW_FEATURE_TEST4_DATUM_MODE,
BASE_HW_FEATURE_END
};
static const enum base_hw_feature base_hw_features_t83x[] = {
BASE_HW_FEATURE_33BIT_VA,
BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION,
BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS,
BASE_HW_FEATURE_XAFFINITY,
BASE_HW_FEATURE_WARPING,
BASE_HW_FEATURE_INTERPIPE_REG_ALIASING,
BASE_HW_FEATURE_32_BIT_UNIFORM_ADDRESS,
BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL,
BASE_HW_FEATURE_BRNDOUT_CC,
BASE_HW_FEATURE_BRNDOUT_KILL,
BASE_HW_FEATURE_LD_ST_LEA_TEX,
BASE_HW_FEATURE_LD_ST_TILEBUFFER,
BASE_HW_FEATURE_LINEAR_FILTER_FLOAT,
BASE_HW_FEATURE_MRT,
BASE_HW_FEATURE_NEXT_INSTRUCTION_TYPE,
BASE_HW_FEATURE_OUT_OF_ORDER_EXEC,
BASE_HW_FEATURE_T7XX_PAIRING_RULES,
BASE_HW_FEATURE_TEST4_DATUM_MODE,
BASE_HW_FEATURE_END
};
static const enum base_hw_feature base_hw_features_t82x[] = {
BASE_HW_FEATURE_33BIT_VA,
BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION,
BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS,
BASE_HW_FEATURE_XAFFINITY,
BASE_HW_FEATURE_WARPING,
BASE_HW_FEATURE_INTERPIPE_REG_ALIASING,
BASE_HW_FEATURE_32_BIT_UNIFORM_ADDRESS,
BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL,
BASE_HW_FEATURE_BRNDOUT_CC,
BASE_HW_FEATURE_BRNDOUT_KILL,
BASE_HW_FEATURE_LD_ST_LEA_TEX,
BASE_HW_FEATURE_LD_ST_TILEBUFFER,
BASE_HW_FEATURE_LINEAR_FILTER_FLOAT,
BASE_HW_FEATURE_MRT,
BASE_HW_FEATURE_NEXT_INSTRUCTION_TYPE,
BASE_HW_FEATURE_OUT_OF_ORDER_EXEC,
BASE_HW_FEATURE_T7XX_PAIRING_RULES,
BASE_HW_FEATURE_TEST4_DATUM_MODE,
BASE_HW_FEATURE_END
};
static const enum base_hw_feature base_hw_features_tMIx[] = {
BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION,
BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS,
BASE_HW_FEATURE_XAFFINITY,
BASE_HW_FEATURE_WARPING,
BASE_HW_FEATURE_INTERPIPE_REG_ALIASING,
BASE_HW_FEATURE_32_BIT_UNIFORM_ADDRESS,
BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL,
BASE_HW_FEATURE_BRNDOUT_CC,
BASE_HW_FEATURE_BRNDOUT_KILL,
BASE_HW_FEATURE_LD_ST_LEA_TEX,
BASE_HW_FEATURE_LD_ST_TILEBUFFER,
BASE_HW_FEATURE_LINEAR_FILTER_FLOAT,
BASE_HW_FEATURE_MRT,
BASE_HW_FEATURE_MSAA_16X,
BASE_HW_FEATURE_NEXT_INSTRUCTION_TYPE,
BASE_HW_FEATURE_OUT_OF_ORDER_EXEC,
BASE_HW_FEATURE_T7XX_PAIRING_RULES,
BASE_HW_FEATURE_TEST4_DATUM_MODE,
BASE_HW_FEATURE_FLUSH_REDUCTION,
BASE_HW_FEATURE_PROTECTED_MODE,
BASE_HW_FEATURE_COHERENCY_REG,
BASE_HW_FEATURE_END
};
static const enum base_hw_feature base_hw_features_tHEx[] = {
BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION,
BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS,
BASE_HW_FEATURE_XAFFINITY,
BASE_HW_FEATURE_WARPING,
BASE_HW_FEATURE_INTERPIPE_REG_ALIASING,
BASE_HW_FEATURE_32_BIT_UNIFORM_ADDRESS,
BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL,
BASE_HW_FEATURE_BRNDOUT_CC,
BASE_HW_FEATURE_BRNDOUT_KILL,
BASE_HW_FEATURE_LD_ST_LEA_TEX,
BASE_HW_FEATURE_LD_ST_TILEBUFFER,
BASE_HW_FEATURE_LINEAR_FILTER_FLOAT,
BASE_HW_FEATURE_MRT,
BASE_HW_FEATURE_MSAA_16X,
BASE_HW_FEATURE_NEXT_INSTRUCTION_TYPE,
BASE_HW_FEATURE_OUT_OF_ORDER_EXEC,
BASE_HW_FEATURE_T7XX_PAIRING_RULES,
BASE_HW_FEATURE_TEST4_DATUM_MODE,
BASE_HW_FEATURE_FLUSH_REDUCTION,
BASE_HW_FEATURE_PROTECTED_MODE,
BASE_HW_FEATURE_PROTECTED_DEBUG_MODE,
BASE_HW_FEATURE_COHERENCY_REG,
BASE_HW_FEATURE_END
};
#endif /* _BASE_HWCONFIG_FEATURES_H_ */

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,47 @@
/*
*
* (C) COPYRIGHT 2012-2013 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
* Foundation, and any use by you of this program is subject to the terms
* of such GNU licence.
*
* A copy of the licence is included with the program, and can also be obtained
* from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
* Boston, MA 02110-1301, USA.
*
*/
/**
* @file
* Base cross-proccess sync API.
*/
#ifndef _BASE_KERNEL_SYNC_H_
#define _BASE_KERNEL_SYNC_H_
#include <linux/ioctl.h>
#define STREAM_IOC_MAGIC '~'
/* Fence insert.
*
* Inserts a fence on the stream operated on.
* Fence can be waited via a base fence wait soft-job
* or triggered via a base fence trigger soft-job.
*
* Fences must be cleaned up with close when no longer needed.
*
* No input/output arguments.
* Returns
* >=0 fd
* <0 error code
*/
#define STREAM_IOC_FENCE_INSERT _IO(STREAM_IOC_MAGIC, 0)
#endif /* _BASE_KERNEL_SYNC_H_ */

View File

@ -0,0 +1,52 @@
/*
*
* (C) COPYRIGHT 2010-2014 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
* Foundation, and any use by you of this program is subject to the terms
* of such GNU licence.
*
* A copy of the licence is included with the program, and can also be obtained
* from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
* Boston, MA 02110-1301, USA.
*
*/
#ifndef _BASE_MEM_PRIV_H_
#define _BASE_MEM_PRIV_H_
#define BASE_SYNCSET_OP_MSYNC (1U << 0)
#define BASE_SYNCSET_OP_CSYNC (1U << 1)
/*
* This structure describe a basic memory coherency operation.
* It can either be:
* @li a sync from CPU to Memory:
* - type = ::BASE_SYNCSET_OP_MSYNC
* - mem_handle = a handle to the memory object on which the operation
* is taking place
* - user_addr = the address of the range to be synced
* - size = the amount of data to be synced, in bytes
* - offset is ignored.
* @li a sync from Memory to CPU:
* - type = ::BASE_SYNCSET_OP_CSYNC
* - mem_handle = a handle to the memory object on which the operation
* is taking place
* - user_addr = the address of the range to be synced
* - size = the amount of data to be synced, in bytes.
* - offset is ignored.
*/
struct basep_syncset {
base_mem_handle mem_handle;
u64 user_addr;
u64 size;
u8 type;
u8 padding[7];
};
#endif

View File

@ -0,0 +1,24 @@
/*
*
* (C) COPYRIGHT 2010, 2012-2013, 2015 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
* Foundation, and any use by you of this program is subject to the terms
* of such GNU licence.
*
* A copy of the licence is included with the program, and can also be obtained
* from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
* Boston, MA 02110-1301, USA.
*
*/
#ifndef _BASE_VENDOR_SPEC_FUNC_H_
#define _BASE_VENDOR_SPEC_FUNC_H_
int kbase_get_vendor_specific_cpu_clock_speed(u32 * const);
#endif /*_BASE_VENDOR_SPEC_FUNC_H_*/

View File

@ -0,0 +1,607 @@
/*
*
* (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
* Foundation, and any use by you of this program is subject to the terms
* of such GNU licence.
*
* A copy of the licence is included with the program, and can also be obtained
* from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
* Boston, MA 02110-1301, USA.
*
*/
#ifndef _KBASE_H_
#define _KBASE_H_
#include <mali_malisw.h>
#include <mali_kbase_debug.h>
#include <asm/page.h>
#include <linux/atomic.h>
#include <linux/highmem.h>
#include <linux/hrtimer.h>
#include <linux/ktime.h>
#include <linux/list.h>
#include <linux/mm_types.h>
#include <linux/mutex.h>
#include <linux/rwsem.h>
#include <linux/sched.h>
#include <linux/slab.h>
#include <linux/spinlock.h>
#include <linux/vmalloc.h>
#include <linux/wait.h>
#include <linux/workqueue.h>
#include "mali_base_kernel.h"
#include <mali_kbase_uku.h>
#include <mali_kbase_linux.h>
/*
* Include mali_kbase_defs.h first as this provides types needed by other local
* header files.
*/
#include "mali_kbase_defs.h"
#include "mali_kbase_context.h"
#include "mali_kbase_strings.h"
#include "mali_kbase_mem_lowlevel.h"
#include "mali_kbase_trace_timeline.h"
#include "mali_kbase_js.h"
#include "mali_kbase_mem.h"
#include "mali_kbase_utility.h"
#include "mali_kbase_gpu_memory_debugfs.h"
#include "mali_kbase_mem_profile_debugfs.h"
#include "mali_kbase_debug_job_fault.h"
#include "mali_kbase_jd_debugfs.h"
#include "mali_kbase_gpuprops.h"
#include "mali_kbase_jm.h"
#include "mali_kbase_vinstr.h"
#include "mali_kbase_ipa.h"
#ifdef CONFIG_GPU_TRACEPOINTS
#include <trace/events/gpu.h>
#endif
/**
* @page page_base_kernel_main Kernel-side Base (KBase) APIs
*
* The Kernel-side Base (KBase) APIs are divided up as follows:
* - @subpage page_kbase_js_policy
*/
/**
* @defgroup base_kbase_api Kernel-side Base (KBase) APIs
*/
struct kbase_device *kbase_device_alloc(void);
/*
* note: configuration attributes member of kbdev needs to have
* been setup before calling kbase_device_init
*/
/*
* API to acquire device list semaphore and return pointer
* to the device list head
*/
const struct list_head *kbase_dev_list_get(void);
/* API to release the device list semaphore */
void kbase_dev_list_put(const struct list_head *dev_list);
int kbase_device_init(struct kbase_device * const kbdev);
void kbase_device_term(struct kbase_device *kbdev);
void kbase_device_free(struct kbase_device *kbdev);
int kbase_device_has_feature(struct kbase_device *kbdev, u32 feature);
/* Needed for gator integration and for reporting vsync information */
struct kbase_device *kbase_find_device(int minor);
void kbase_release_device(struct kbase_device *kbdev);
void kbase_set_profiling_control(struct kbase_device *kbdev, u32 control, u32 value);
u32 kbase_get_profiling_control(struct kbase_device *kbdev, u32 control);
struct kbase_context *
kbase_create_context(struct kbase_device *kbdev, bool is_compat);
void kbase_destroy_context(struct kbase_context *kctx);
int kbase_jd_init(struct kbase_context *kctx);
void kbase_jd_exit(struct kbase_context *kctx);
#ifdef BASE_LEGACY_UK6_SUPPORT
int kbase_jd_submit(struct kbase_context *kctx,
const struct kbase_uk_job_submit *submit_data,
int uk6_atom);
#else
int kbase_jd_submit(struct kbase_context *kctx,
const struct kbase_uk_job_submit *submit_data);
#endif
/**
* kbase_jd_done_worker - Handle a job completion
* @data: a &struct work_struct
*
* This function requeues the job from the runpool (if it was soft-stopped or
* removed from NEXT registers).
*
* Removes it from the system if it finished/failed/was cancelled.
*
* Resolves dependencies to add dependent jobs to the context, potentially
* starting them if necessary (which may add more references to the context)
*
* Releases the reference to the context from the no-longer-running job.
*
* Handles retrying submission outside of IRQ context if it failed from within
* IRQ context.
*/
void kbase_jd_done_worker(struct work_struct *data);
void kbase_jd_done(struct kbase_jd_atom *katom, int slot_nr, ktime_t *end_timestamp,
kbasep_js_atom_done_code done_code);
void kbase_jd_cancel(struct kbase_device *kbdev, struct kbase_jd_atom *katom);
void kbase_jd_zap_context(struct kbase_context *kctx);
bool jd_done_nolock(struct kbase_jd_atom *katom,
struct list_head *completed_jobs_ctx);
void kbase_jd_free_external_resources(struct kbase_jd_atom *katom);
bool jd_submit_atom(struct kbase_context *kctx,
const struct base_jd_atom_v2 *user_atom,
struct kbase_jd_atom *katom);
void kbase_jd_dep_clear_locked(struct kbase_jd_atom *katom);
void kbase_job_done(struct kbase_device *kbdev, u32 done);
void kbase_gpu_cacheclean(struct kbase_device *kbdev,
struct kbase_jd_atom *katom);
/**
* kbase_job_slot_ctx_priority_check_locked(): - Check for lower priority atoms
* and soft stop them
* @kctx: Pointer to context to check.
* @katom: Pointer to priority atom.
*
* Atoms from @kctx on the same job slot as @katom, which have lower priority
* than @katom will be soft stopped and put back in the queue, so that atoms
* with higher priority can run.
*
* The hwaccess_lock must be held when calling this function.
*/
void kbase_job_slot_ctx_priority_check_locked(struct kbase_context *kctx,
struct kbase_jd_atom *katom);
void kbase_job_slot_softstop(struct kbase_device *kbdev, int js,
struct kbase_jd_atom *target_katom);
void kbase_job_slot_softstop_swflags(struct kbase_device *kbdev, int js,
struct kbase_jd_atom *target_katom, u32 sw_flags);
void kbase_job_slot_hardstop(struct kbase_context *kctx, int js,
struct kbase_jd_atom *target_katom);
void kbase_job_check_enter_disjoint(struct kbase_device *kbdev, u32 action,
base_jd_core_req core_reqs, struct kbase_jd_atom *target_katom);
void kbase_job_check_leave_disjoint(struct kbase_device *kbdev,
struct kbase_jd_atom *target_katom);
void kbase_event_post(struct kbase_context *ctx, struct kbase_jd_atom *event);
int kbase_event_dequeue(struct kbase_context *ctx, struct base_jd_event_v2 *uevent);
int kbase_event_pending(struct kbase_context *ctx);
int kbase_event_init(struct kbase_context *kctx);
void kbase_event_close(struct kbase_context *kctx);
void kbase_event_cleanup(struct kbase_context *kctx);
void kbase_event_wakeup(struct kbase_context *kctx);
int kbase_process_soft_job(struct kbase_jd_atom *katom);
int kbase_prepare_soft_job(struct kbase_jd_atom *katom);
void kbase_finish_soft_job(struct kbase_jd_atom *katom);
void kbase_cancel_soft_job(struct kbase_jd_atom *katom);
void kbase_resume_suspended_soft_jobs(struct kbase_device *kbdev);
void kbasep_add_waiting_soft_job(struct kbase_jd_atom *katom);
void kbasep_remove_waiting_soft_job(struct kbase_jd_atom *katom);
int kbase_soft_event_update(struct kbase_context *kctx,
u64 event,
unsigned char new_status);
bool kbase_replay_process(struct kbase_jd_atom *katom);
void kbasep_soft_job_timeout_worker(unsigned long data);
void kbasep_complete_triggered_soft_events(struct kbase_context *kctx, u64 evt);
/* api used internally for register access. Contains validation and tracing */
void kbase_device_trace_register_access(struct kbase_context *kctx, enum kbase_reg_access_type type, u16 reg_offset, u32 reg_value);
int kbase_device_trace_buffer_install(
struct kbase_context *kctx, u32 *tb, size_t size);
void kbase_device_trace_buffer_uninstall(struct kbase_context *kctx);
/* api to be ported per OS, only need to do the raw register access */
void kbase_os_reg_write(struct kbase_device *kbdev, u16 offset, u32 value);
u32 kbase_os_reg_read(struct kbase_device *kbdev, u16 offset);
void kbasep_as_do_poke(struct work_struct *work);
/** Returns the name associated with a Mali exception code
*
* This function is called from the interrupt handler when a GPU fault occurs.
* It reports the details of the fault using KBASE_DEBUG_PRINT_WARN.
*
* @param[in] kbdev The kbase device that the GPU fault occurred from.
* @param[in] exception_code exception code
* @return name associated with the exception code
*/
const char *kbase_exception_name(struct kbase_device *kbdev,
u32 exception_code);
/**
* Check whether a system suspend is in progress, or has already been suspended
*
* The caller should ensure that either kbdev->pm.active_count_lock is held, or
* a dmb was executed recently (to ensure the value is most
* up-to-date). However, without a lock the value could change afterwards.
*
* @return false if a suspend is not in progress
* @return !=false otherwise
*/
static inline bool kbase_pm_is_suspending(struct kbase_device *kbdev)
{
return kbdev->pm.suspending;
}
/**
* Return the atom's ID, as was originally supplied by userspace in
* base_jd_atom_v2::atom_number
*/
static inline int kbase_jd_atom_id(struct kbase_context *kctx, struct kbase_jd_atom *katom)
{
int result;
KBASE_DEBUG_ASSERT(kctx);
KBASE_DEBUG_ASSERT(katom);
KBASE_DEBUG_ASSERT(katom->kctx == kctx);
result = katom - &kctx->jctx.atoms[0];
KBASE_DEBUG_ASSERT(result >= 0 && result <= BASE_JD_ATOM_COUNT);
return result;
}
/**
* kbase_jd_atom_from_id - Return the atom structure for the given atom ID
* @kctx: Context pointer
* @id: ID of atom to retrieve
*
* Return: Pointer to struct kbase_jd_atom associated with the supplied ID
*/
static inline struct kbase_jd_atom *kbase_jd_atom_from_id(
struct kbase_context *kctx, int id)
{
return &kctx->jctx.atoms[id];
}
/**
* Initialize the disjoint state
*
* The disjoint event count and state are both set to zero.
*
* Disjoint functions usage:
*
* The disjoint event count should be incremented whenever a disjoint event occurs.
*
* There are several cases which are regarded as disjoint behavior. Rather than just increment
* the counter during disjoint events we also increment the counter when jobs may be affected
* by what the GPU is currently doing. To facilitate this we have the concept of disjoint state.
*
* Disjoint state is entered during GPU reset and for the entire time that an atom is replaying
* (as part of the replay workaround). Increasing the disjoint state also increases the count of
* disjoint events.
*
* The disjoint state is then used to increase the count of disjoint events during job submission
* and job completion. Any atom submitted or completed while the disjoint state is greater than
* zero is regarded as a disjoint event.
*
* The disjoint event counter is also incremented immediately whenever a job is soft stopped
* and during context creation.
*
* @param kbdev The kbase device
*/
void kbase_disjoint_init(struct kbase_device *kbdev);
/**
* Increase the count of disjoint events
* called when a disjoint event has happened
*
* @param kbdev The kbase device
*/
void kbase_disjoint_event(struct kbase_device *kbdev);
/**
* Increase the count of disjoint events only if the GPU is in a disjoint state
*
* This should be called when something happens which could be disjoint if the GPU
* is in a disjoint state. The state refcount keeps track of this.
*
* @param kbdev The kbase device
*/
void kbase_disjoint_event_potential(struct kbase_device *kbdev);
/**
* Returns the count of disjoint events
*
* @param kbdev The kbase device
* @return the count of disjoint events
*/
u32 kbase_disjoint_event_get(struct kbase_device *kbdev);
/**
* Increment the refcount state indicating that the GPU is in a disjoint state.
*
* Also Increment the disjoint event count (calls @ref kbase_disjoint_event)
* eventually after the disjoint state has completed @ref kbase_disjoint_state_down
* should be called
*
* @param kbdev The kbase device
*/
void kbase_disjoint_state_up(struct kbase_device *kbdev);
/**
* Decrement the refcount state
*
* Also Increment the disjoint event count (calls @ref kbase_disjoint_event)
*
* Called after @ref kbase_disjoint_state_up once the disjoint state is over
*
* @param kbdev The kbase device
*/
void kbase_disjoint_state_down(struct kbase_device *kbdev);
/**
* If a job is soft stopped and the number of contexts is >= this value
* it is reported as a disjoint event
*/
#define KBASE_DISJOINT_STATE_INTERLEAVED_CONTEXT_COUNT_THRESHOLD 2
#if !defined(UINT64_MAX)
#define UINT64_MAX ((uint64_t)0xFFFFFFFFFFFFFFFFULL)
#endif
#if KBASE_TRACE_ENABLE
void kbasep_trace_debugfs_init(struct kbase_device *kbdev);
#ifndef CONFIG_MALI_SYSTEM_TRACE
/** Add trace values about a job-slot
*
* @note Any functions called through this macro will still be evaluated in
* Release builds (CONFIG_MALI_DEBUG not defined). Therefore, when KBASE_TRACE_ENABLE == 0 any
* functions called to get the parameters supplied to this macro must:
* - be static or static inline
* - must just return 0 and have no other statements present in the body.
*/
#define KBASE_TRACE_ADD_SLOT(kbdev, code, ctx, katom, gpu_addr, jobslot) \
kbasep_trace_add(kbdev, KBASE_TRACE_CODE(code), ctx, katom, gpu_addr, \
KBASE_TRACE_FLAG_JOBSLOT, 0, jobslot, 0)
/** Add trace values about a job-slot, with info
*
* @note Any functions called through this macro will still be evaluated in
* Release builds (CONFIG_MALI_DEBUG not defined). Therefore, when KBASE_TRACE_ENABLE == 0 any
* functions called to get the parameters supplied to this macro must:
* - be static or static inline
* - must just return 0 and have no other statements present in the body.
*/
#define KBASE_TRACE_ADD_SLOT_INFO(kbdev, code, ctx, katom, gpu_addr, jobslot, info_val) \
kbasep_trace_add(kbdev, KBASE_TRACE_CODE(code), ctx, katom, gpu_addr, \
KBASE_TRACE_FLAG_JOBSLOT, 0, jobslot, info_val)
/** Add trace values about a ctx refcount
*
* @note Any functions called through this macro will still be evaluated in
* Release builds (CONFIG_MALI_DEBUG not defined). Therefore, when KBASE_TRACE_ENABLE == 0 any
* functions called to get the parameters supplied to this macro must:
* - be static or static inline
* - must just return 0 and have no other statements present in the body.
*/
#define KBASE_TRACE_ADD_REFCOUNT(kbdev, code, ctx, katom, gpu_addr, refcount) \
kbasep_trace_add(kbdev, KBASE_TRACE_CODE(code), ctx, katom, gpu_addr, \
KBASE_TRACE_FLAG_REFCOUNT, refcount, 0, 0)
/** Add trace values about a ctx refcount, and info
*
* @note Any functions called through this macro will still be evaluated in
* Release builds (CONFIG_MALI_DEBUG not defined). Therefore, when KBASE_TRACE_ENABLE == 0 any
* functions called to get the parameters supplied to this macro must:
* - be static or static inline
* - must just return 0 and have no other statements present in the body.
*/
#define KBASE_TRACE_ADD_REFCOUNT_INFO(kbdev, code, ctx, katom, gpu_addr, refcount, info_val) \
kbasep_trace_add(kbdev, KBASE_TRACE_CODE(code), ctx, katom, gpu_addr, \
KBASE_TRACE_FLAG_REFCOUNT, refcount, 0, info_val)
/** Add trace values (no slot or refcount)
*
* @note Any functions called through this macro will still be evaluated in
* Release builds (CONFIG_MALI_DEBUG not defined). Therefore, when KBASE_TRACE_ENABLE == 0 any
* functions called to get the parameters supplied to this macro must:
* - be static or static inline
* - must just return 0 and have no other statements present in the body.
*/
#define KBASE_TRACE_ADD(kbdev, code, ctx, katom, gpu_addr, info_val) \
kbasep_trace_add(kbdev, KBASE_TRACE_CODE(code), ctx, katom, gpu_addr, \
0, 0, 0, info_val)
/** Clear the trace */
#define KBASE_TRACE_CLEAR(kbdev) \
kbasep_trace_clear(kbdev)
/** Dump the slot trace */
#define KBASE_TRACE_DUMP(kbdev) \
kbasep_trace_dump(kbdev)
/** PRIVATE - do not use directly. Use KBASE_TRACE_ADD() instead */
void kbasep_trace_add(struct kbase_device *kbdev, enum kbase_trace_code code, void *ctx, struct kbase_jd_atom *katom, u64 gpu_addr, u8 flags, int refcount, int jobslot, unsigned long info_val);
/** PRIVATE - do not use directly. Use KBASE_TRACE_CLEAR() instead */
void kbasep_trace_clear(struct kbase_device *kbdev);
#else /* #ifndef CONFIG_MALI_SYSTEM_TRACE */
/* Dispatch kbase trace events as system trace events */
#include <mali_linux_kbase_trace.h>
#define KBASE_TRACE_ADD_SLOT(kbdev, code, ctx, katom, gpu_addr, jobslot)\
trace_mali_##code(jobslot, 0)
#define KBASE_TRACE_ADD_SLOT_INFO(kbdev, code, ctx, katom, gpu_addr, jobslot, info_val)\
trace_mali_##code(jobslot, info_val)
#define KBASE_TRACE_ADD_REFCOUNT(kbdev, code, ctx, katom, gpu_addr, refcount)\
trace_mali_##code(refcount, 0)
#define KBASE_TRACE_ADD_REFCOUNT_INFO(kbdev, code, ctx, katom, gpu_addr, refcount, info_val)\
trace_mali_##code(refcount, info_val)
#define KBASE_TRACE_ADD(kbdev, code, ctx, katom, gpu_addr, info_val)\
trace_mali_##code(gpu_addr, info_val)
#define KBASE_TRACE_CLEAR(kbdev)\
do {\
CSTD_UNUSED(kbdev);\
CSTD_NOP(0);\
} while (0)
#define KBASE_TRACE_DUMP(kbdev)\
do {\
CSTD_UNUSED(kbdev);\
CSTD_NOP(0);\
} while (0)
#endif /* #ifndef CONFIG_MALI_SYSTEM_TRACE */
#else
#define KBASE_TRACE_ADD_SLOT(kbdev, code, ctx, katom, gpu_addr, jobslot)\
do {\
CSTD_UNUSED(kbdev);\
CSTD_NOP(code);\
CSTD_UNUSED(ctx);\
CSTD_UNUSED(katom);\
CSTD_UNUSED(gpu_addr);\
CSTD_UNUSED(jobslot);\
} while (0)
#define KBASE_TRACE_ADD_SLOT_INFO(kbdev, code, ctx, katom, gpu_addr, jobslot, info_val)\
do {\
CSTD_UNUSED(kbdev);\
CSTD_NOP(code);\
CSTD_UNUSED(ctx);\
CSTD_UNUSED(katom);\
CSTD_UNUSED(gpu_addr);\
CSTD_UNUSED(jobslot);\
CSTD_UNUSED(info_val);\
CSTD_NOP(0);\
} while (0)
#define KBASE_TRACE_ADD_REFCOUNT(kbdev, code, ctx, katom, gpu_addr, refcount)\
do {\
CSTD_UNUSED(kbdev);\
CSTD_NOP(code);\
CSTD_UNUSED(ctx);\
CSTD_UNUSED(katom);\
CSTD_UNUSED(gpu_addr);\
CSTD_UNUSED(refcount);\
CSTD_NOP(0);\
} while (0)
#define KBASE_TRACE_ADD_REFCOUNT_INFO(kbdev, code, ctx, katom, gpu_addr, refcount, info_val)\
do {\
CSTD_UNUSED(kbdev);\
CSTD_NOP(code);\
CSTD_UNUSED(ctx);\
CSTD_UNUSED(katom);\
CSTD_UNUSED(gpu_addr);\
CSTD_UNUSED(info_val);\
CSTD_NOP(0);\
} while (0)
#define KBASE_TRACE_ADD(kbdev, code, subcode, ctx, katom, val)\
do {\
CSTD_UNUSED(kbdev);\
CSTD_NOP(code);\
CSTD_UNUSED(subcode);\
CSTD_UNUSED(ctx);\
CSTD_UNUSED(katom);\
CSTD_UNUSED(val);\
CSTD_NOP(0);\
} while (0)
#define KBASE_TRACE_CLEAR(kbdev)\
do {\
CSTD_UNUSED(kbdev);\
CSTD_NOP(0);\
} while (0)
#define KBASE_TRACE_DUMP(kbdev)\
do {\
CSTD_UNUSED(kbdev);\
CSTD_NOP(0);\
} while (0)
#endif /* KBASE_TRACE_ENABLE */
/** PRIVATE - do not use directly. Use KBASE_TRACE_DUMP() instead */
void kbasep_trace_dump(struct kbase_device *kbdev);
#ifdef CONFIG_MALI_DEBUG
/**
* kbase_set_driver_inactive - Force driver to go inactive
* @kbdev: Device pointer
* @inactive: true if driver should go inactive, false otherwise
*
* Forcing the driver inactive will cause all future IOCTLs to wait until the
* driver is made active again. This is intended solely for the use of tests
* which require that no jobs are running while the test executes.
*/
void kbase_set_driver_inactive(struct kbase_device *kbdev, bool inactive);
#endif /* CONFIG_MALI_DEBUG */
#if defined(CONFIG_DEBUG_FS) && !defined(CONFIG_MALI_NO_MALI)
/* kbase_io_history_init - initialize data struct for register access history
*
* @kbdev The register history to initialize
* @n The number of register accesses that the buffer could hold
*
* @return 0 if successfully initialized, failure otherwise
*/
int kbase_io_history_init(struct kbase_io_history *h, u16 n);
/* kbase_io_history_term - uninit all resources for the register access history
*
* @h The register history to terminate
*/
void kbase_io_history_term(struct kbase_io_history *h);
/* kbase_io_history_dump - print the register history to the kernel ring buffer
*
* @kbdev Pointer to kbase_device containing the register history to dump
*/
void kbase_io_history_dump(struct kbase_device *kbdev);
/**
* kbase_io_history_resize - resize the register access history buffer.
*
* @h: Pointer to a valid register history to resize
* @new_size: Number of accesses the buffer could hold
*
* A successful resize will clear all recent register accesses.
* If resizing fails for any reason (e.g., could not allocate memory, invalid
* buffer size) then the original buffer will be kept intact.
*
* @return 0 if the buffer was resized, failure otherwise
*/
int kbase_io_history_resize(struct kbase_io_history *h, u16 new_size);
#else /* CONFIG_DEBUG_FS */
#define kbase_io_history_init(...) ((int)0)
#define kbase_io_history_term CSTD_NOP
#define kbase_io_history_dump CSTD_NOP
#define kbase_io_history_resize CSTD_NOP
#endif /* CONFIG_DEBUG_FS */
#endif

View File

@ -0,0 +1,209 @@
/*
*
* (C) COPYRIGHT 2013-2015 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
* Foundation, and any use by you of this program is subject to the terms
* of such GNU licence.
*
* A copy of the licence is included with the program, and can also be obtained
* from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
* Boston, MA 02110-1301, USA.
*
*/
#include <linux/dma-mapping.h>
#include <mali_kbase.h>
#include <mali_kbase_10969_workaround.h>
/* This function is used to solve an HW issue with single iterator GPUs.
* If a fragment job is soft-stopped on the edge of its bounding box, can happen that the
* restart index is out of bounds and the rerun causes a tile range fault. If this happens
* we try to clamp the restart index to a correct value and rerun the job.
*/
/* Mask of X and Y coordinates for the coordinates words in the descriptors*/
#define X_COORDINATE_MASK 0x00000FFF
#define Y_COORDINATE_MASK 0x0FFF0000
/* Max number of words needed from the fragment shader job descriptor */
#define JOB_HEADER_SIZE_IN_WORDS 10
#define JOB_HEADER_SIZE (JOB_HEADER_SIZE_IN_WORDS*sizeof(u32))
/* Word 0: Status Word */
#define JOB_DESC_STATUS_WORD 0
/* Word 1: Restart Index */
#define JOB_DESC_RESTART_INDEX_WORD 1
/* Word 2: Fault address low word */
#define JOB_DESC_FAULT_ADDR_LOW_WORD 2
/* Word 8: Minimum Tile Coordinates */
#define FRAG_JOB_DESC_MIN_TILE_COORD_WORD 8
/* Word 9: Maximum Tile Coordinates */
#define FRAG_JOB_DESC_MAX_TILE_COORD_WORD 9
int kbasep_10969_workaround_clamp_coordinates(struct kbase_jd_atom *katom)
{
struct device *dev = katom->kctx->kbdev->dev;
u32 clamped = 0;
struct kbase_va_region *region;
phys_addr_t *page_array;
u64 page_index;
u32 offset = katom->jc & (~PAGE_MASK);
u32 *page_1 = NULL;
u32 *page_2 = NULL;
u32 job_header[JOB_HEADER_SIZE_IN_WORDS];
void *dst = job_header;
u32 minX, minY, maxX, maxY;
u32 restartX, restartY;
struct page *p;
u32 copy_size;
dev_warn(dev, "Called TILE_RANGE_FAULT workaround clamping function.\n");
if (!(katom->core_req & BASE_JD_REQ_FS))
return 0;
kbase_gpu_vm_lock(katom->kctx);
region = kbase_region_tracker_find_region_enclosing_address(katom->kctx,
katom->jc);
if (!region || (region->flags & KBASE_REG_FREE))
goto out_unlock;
page_array = kbase_get_cpu_phy_pages(region);
if (!page_array)
goto out_unlock;
page_index = (katom->jc >> PAGE_SHIFT) - region->start_pfn;
p = pfn_to_page(PFN_DOWN(page_array[page_index]));
/* we need the first 10 words of the fragment shader job descriptor.
* We need to check that the offset + 10 words is less that the page
* size otherwise we need to load the next page.
* page_size_overflow will be equal to 0 in case the whole descriptor
* is within the page > 0 otherwise.
*/
copy_size = MIN(PAGE_SIZE - offset, JOB_HEADER_SIZE);
page_1 = kmap_atomic(p);
/* page_1 is a u32 pointer, offset is expressed in bytes */
page_1 += offset>>2;
kbase_sync_single_for_cpu(katom->kctx->kbdev,
kbase_dma_addr(p) + offset,
copy_size, DMA_BIDIRECTIONAL);
memcpy(dst, page_1, copy_size);
/* The data needed overflows page the dimension,
* need to map the subsequent page */
if (copy_size < JOB_HEADER_SIZE) {
p = pfn_to_page(PFN_DOWN(page_array[page_index + 1]));
page_2 = kmap_atomic(p);
kbase_sync_single_for_cpu(katom->kctx->kbdev,
kbase_dma_addr(p),
JOB_HEADER_SIZE - copy_size, DMA_BIDIRECTIONAL);
memcpy(dst + copy_size, page_2, JOB_HEADER_SIZE - copy_size);
}
/* We managed to correctly map one or two pages (in case of overflow) */
/* Get Bounding Box data and restart index from fault address low word */
minX = job_header[FRAG_JOB_DESC_MIN_TILE_COORD_WORD] & X_COORDINATE_MASK;
minY = job_header[FRAG_JOB_DESC_MIN_TILE_COORD_WORD] & Y_COORDINATE_MASK;
maxX = job_header[FRAG_JOB_DESC_MAX_TILE_COORD_WORD] & X_COORDINATE_MASK;
maxY = job_header[FRAG_JOB_DESC_MAX_TILE_COORD_WORD] & Y_COORDINATE_MASK;
restartX = job_header[JOB_DESC_FAULT_ADDR_LOW_WORD] & X_COORDINATE_MASK;
restartY = job_header[JOB_DESC_FAULT_ADDR_LOW_WORD] & Y_COORDINATE_MASK;
dev_warn(dev, "Before Clamping:\n"
"Jobstatus: %08x\n"
"restartIdx: %08x\n"
"Fault_addr_low: %08x\n"
"minCoordsX: %08x minCoordsY: %08x\n"
"maxCoordsX: %08x maxCoordsY: %08x\n",
job_header[JOB_DESC_STATUS_WORD],
job_header[JOB_DESC_RESTART_INDEX_WORD],
job_header[JOB_DESC_FAULT_ADDR_LOW_WORD],
minX, minY,
maxX, maxY);
/* Set the restart index to the one which generated the fault*/
job_header[JOB_DESC_RESTART_INDEX_WORD] =
job_header[JOB_DESC_FAULT_ADDR_LOW_WORD];
if (restartX < minX) {
job_header[JOB_DESC_RESTART_INDEX_WORD] = (minX) | restartY;
dev_warn(dev,
"Clamping restart X index to minimum. %08x clamped to %08x\n",
restartX, minX);
clamped = 1;
}
if (restartY < minY) {
job_header[JOB_DESC_RESTART_INDEX_WORD] = (minY) | restartX;
dev_warn(dev,
"Clamping restart Y index to minimum. %08x clamped to %08x\n",
restartY, minY);
clamped = 1;
}
if (restartX > maxX) {
job_header[JOB_DESC_RESTART_INDEX_WORD] = (maxX) | restartY;
dev_warn(dev,
"Clamping restart X index to maximum. %08x clamped to %08x\n",
restartX, maxX);
clamped = 1;
}
if (restartY > maxY) {
job_header[JOB_DESC_RESTART_INDEX_WORD] = (maxY) | restartX;
dev_warn(dev,
"Clamping restart Y index to maximum. %08x clamped to %08x\n",
restartY, maxY);
clamped = 1;
}
if (clamped) {
/* Reset the fault address low word
* and set the job status to STOPPED */
job_header[JOB_DESC_FAULT_ADDR_LOW_WORD] = 0x0;
job_header[JOB_DESC_STATUS_WORD] = BASE_JD_EVENT_STOPPED;
dev_warn(dev, "After Clamping:\n"
"Jobstatus: %08x\n"
"restartIdx: %08x\n"
"Fault_addr_low: %08x\n"
"minCoordsX: %08x minCoordsY: %08x\n"
"maxCoordsX: %08x maxCoordsY: %08x\n",
job_header[JOB_DESC_STATUS_WORD],
job_header[JOB_DESC_RESTART_INDEX_WORD],
job_header[JOB_DESC_FAULT_ADDR_LOW_WORD],
minX, minY,
maxX, maxY);
/* Flush CPU cache to update memory for future GPU reads*/
memcpy(page_1, dst, copy_size);
p = pfn_to_page(PFN_DOWN(page_array[page_index]));
kbase_sync_single_for_device(katom->kctx->kbdev,
kbase_dma_addr(p) + offset,
copy_size, DMA_TO_DEVICE);
if (copy_size < JOB_HEADER_SIZE) {
memcpy(page_2, dst + copy_size,
JOB_HEADER_SIZE - copy_size);
p = pfn_to_page(PFN_DOWN(page_array[page_index + 1]));
kbase_sync_single_for_device(katom->kctx->kbdev,
kbase_dma_addr(p),
JOB_HEADER_SIZE - copy_size,
DMA_TO_DEVICE);
}
}
if (copy_size < JOB_HEADER_SIZE)
kunmap_atomic(page_2);
kunmap_atomic(page_1);
out_unlock:
kbase_gpu_vm_unlock(katom->kctx);
return clamped;
}

View File

@ -0,0 +1,23 @@
/*
*
* (C) COPYRIGHT 2013-2014 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
* Foundation, and any use by you of this program is subject to the terms
* of such GNU licence.
*
* A copy of the licence is included with the program, and can also be obtained
* from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
* Boston, MA 02110-1301, USA.
*
*/
#ifndef _KBASE_10969_WORKAROUND_
#define _KBASE_10969_WORKAROUND_
int kbasep_10969_workaround_clamp_coordinates(struct kbase_jd_atom *katom);
#endif /* _KBASE_10969_WORKAROUND_ */

View File

@ -0,0 +1,102 @@
/*
*
* (C) COPYRIGHT 2016 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
* Foundation, and any use by you of this program is subject to the terms
* of such GNU licence.
*
* A copy of the licence is included with the program, and can also be obtained
* from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
* Boston, MA 02110-1301, USA.
*
*/
#include <linux/debugfs.h>
#include <mali_kbase.h>
#include <mali_kbase_as_fault_debugfs.h>
#ifdef CONFIG_DEBUG_FS
#ifdef CONFIG_MALI_DEBUG
static int kbase_as_fault_read(struct seq_file *sfile, void *data)
{
uintptr_t as_no = (uintptr_t) sfile->private;
struct list_head *entry;
const struct list_head *kbdev_list;
struct kbase_device *kbdev = NULL;
kbdev_list = kbase_dev_list_get();
list_for_each(entry, kbdev_list) {
kbdev = list_entry(entry, struct kbase_device, entry);
if(kbdev->debugfs_as_read_bitmap & (1ULL << as_no)) {
/* don't show this one again until another fault occors */
kbdev->debugfs_as_read_bitmap &= ~(1ULL << as_no);
/* output the last page fault addr */
seq_printf(sfile, "%llu\n", (u64) kbdev->as[as_no].fault_addr);
}
}
kbase_dev_list_put(kbdev_list);
return 0;
}
static int kbase_as_fault_debugfs_open(struct inode *in, struct file *file)
{
return single_open(file, kbase_as_fault_read , in->i_private);
}
static const struct file_operations as_fault_fops = {
.open = kbase_as_fault_debugfs_open,
.read = seq_read,
.llseek = seq_lseek,
.release = single_release,
};
#endif /* CONFIG_MALI_DEBUG */
#endif /* CONFIG_DEBUG_FS */
/*
* Initialize debugfs entry for each address space
*/
void kbase_as_fault_debugfs_init(struct kbase_device *kbdev)
{
#ifdef CONFIG_DEBUG_FS
#ifdef CONFIG_MALI_DEBUG
uint i;
char as_name[64];
struct dentry *debugfs_directory;
kbdev->debugfs_as_read_bitmap = 0ULL;
KBASE_DEBUG_ASSERT(kbdev->nr_hw_address_spaces);
KBASE_DEBUG_ASSERT(sizeof(kbdev->as[0].fault_addr) == sizeof(u64));
debugfs_directory = debugfs_create_dir("address_spaces",
kbdev->mali_debugfs_directory);
if(debugfs_directory) {
for(i = 0; i < kbdev->nr_hw_address_spaces; i++) {
snprintf(as_name, ARRAY_SIZE(as_name), "as%u", i);
debugfs_create_file(as_name, S_IRUGO,
debugfs_directory, (void*) ((uintptr_t) i), &as_fault_fops);
}
}
else
dev_warn(kbdev->dev, "unable to create address_spaces debugfs directory");
#endif /* CONFIG_MALI_DEBUG */
#endif /* CONFIG_DEBUG_FS */
return;
}

View File

@ -0,0 +1,45 @@
/*
*
* (C) COPYRIGHT 2016 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
* Foundation, and any use by you of this program is subject to the terms
* of such GNU licence.
*
* A copy of the licence is included with the program, and can also be obtained
* from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
* Boston, MA 02110-1301, USA.
*
*/
#ifndef _KBASE_AS_FAULT_DEBUG_FS_H
#define _KBASE_AS_FAULT_DEBUG_FS_H
/**
* kbase_as_fault_debugfs_init() - Add debugfs files for reporting page faults
*
* @kbdev: Pointer to kbase_device
*/
void kbase_as_fault_debugfs_init(struct kbase_device *kbdev);
/**
* kbase_as_fault_debugfs_new() - make the last fault available on debugfs
*
* @kbdev: Pointer to kbase_device
* @as_no: The address space the fault occurred on
*/
static inline void
kbase_as_fault_debugfs_new(struct kbase_device *kbdev, int as_no)
{
#ifdef CONFIG_DEBUG_FS
#ifdef CONFIG_MALI_DEBUG
kbdev->debugfs_as_read_bitmap |= (1ULL << as_no);
#endif /* CONFIG_DEBUG_FS */
#endif /* CONFIG_MALI_DEBUG */
return;
}
#endif /*_KBASE_AS_FAULT_DEBUG_FS_H*/

View File

@ -0,0 +1,64 @@
/*
*
* (C) COPYRIGHT 2012-2016 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
* Foundation, and any use by you of this program is subject to the terms
* of such GNU licence.
*
* A copy of the licence is included with the program, and can also be obtained
* from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
* Boston, MA 02110-1301, USA.
*
*/
/*
* Cache Policy API.
*/
#include "mali_kbase_cache_policy.h"
/*
* The output flags should be a combination of the following values:
* KBASE_REG_CPU_CACHED: CPU cache should be enabled.
*/
u32 kbase_cache_enabled(u32 flags, u32 nr_pages)
{
u32 cache_flags = 0;
CSTD_UNUSED(nr_pages);
if (flags & BASE_MEM_CACHED_CPU)
cache_flags |= KBASE_REG_CPU_CACHED;
return cache_flags;
}
void kbase_sync_single_for_device(struct kbase_device *kbdev, dma_addr_t handle,
size_t size, enum dma_data_direction dir)
{
/* Check if kernel is using coherency with GPU */
#ifdef CONFIG_MALI_COH_KERN
if (kbdev->system_coherency == COHERENCY_ACE)
return;
#endif /* CONFIG_MALI_COH_KERN */
dma_sync_single_for_device(kbdev->dev, handle, size, dir);
}
void kbase_sync_single_for_cpu(struct kbase_device *kbdev, dma_addr_t handle,
size_t size, enum dma_data_direction dir)
{
/* Check if kernel is using coherency with GPU */
#ifdef CONFIG_MALI_COH_KERN
if (kbdev->system_coherency == COHERENCY_ACE)
return;
#endif /* CONFIG_MALI_COH_KERN */
dma_sync_single_for_cpu(kbdev->dev, handle, size, dir);
}

View File

@ -0,0 +1,45 @@
/*
*
* (C) COPYRIGHT 2012-2013, 2015 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
* Foundation, and any use by you of this program is subject to the terms
* of such GNU licence.
*
* A copy of the licence is included with the program, and can also be obtained
* from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
* Boston, MA 02110-1301, USA.
*
*/
/*
* Cache Policy API.
*/
#ifndef _KBASE_CACHE_POLICY_H_
#define _KBASE_CACHE_POLICY_H_
#include "mali_kbase.h"
#include "mali_base_kernel.h"
/**
* kbase_cache_enabled - Choose the cache policy for a specific region
* @flags: flags describing attributes of the region
* @nr_pages: total number of pages (backed or not) for the region
*
* Tells whether the CPU and GPU caches should be enabled or not for a specific
* region.
* This function can be modified to customize the cache policy depending on the
* flags and size of the region.
*
* Return: a combination of %KBASE_REG_CPU_CACHED and %KBASE_REG_GPU_CACHED
* depending on the cache policy
*/
u32 kbase_cache_enabled(u32 flags, u32 nr_pages);
#endif /* _KBASE_CACHE_POLICY_H_ */

View File

@ -0,0 +1,51 @@
/*
*
* (C) COPYRIGHT 2011-2015 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
* Foundation, and any use by you of this program is subject to the terms
* of such GNU licence.
*
* A copy of the licence is included with the program, and can also be obtained
* from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
* Boston, MA 02110-1301, USA.
*
*/
#include <mali_kbase.h>
#include <mali_kbase_defs.h>
#include <mali_kbase_config_defaults.h>
int kbasep_platform_device_init(struct kbase_device *kbdev)
{
struct kbase_platform_funcs_conf *platform_funcs_p;
platform_funcs_p = (struct kbase_platform_funcs_conf *)PLATFORM_FUNCS;
if (platform_funcs_p && platform_funcs_p->platform_init_func)
return platform_funcs_p->platform_init_func(kbdev);
return 0;
}
void kbasep_platform_device_term(struct kbase_device *kbdev)
{
struct kbase_platform_funcs_conf *platform_funcs_p;
platform_funcs_p = (struct kbase_platform_funcs_conf *)PLATFORM_FUNCS;
if (platform_funcs_p && platform_funcs_p->platform_term_func)
platform_funcs_p->platform_term_func(kbdev);
}
int kbase_cpuprops_get_default_clock_speed(u32 * const clock_speed)
{
KBASE_DEBUG_ASSERT(NULL != clock_speed);
*clock_speed = 100;
return 0;
}

View File

@ -0,0 +1,345 @@
/*
*
* (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
* Foundation, and any use by you of this program is subject to the terms
* of such GNU licence.
*
* A copy of the licence is included with the program, and can also be obtained
* from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
* Boston, MA 02110-1301, USA.
*
*/
/**
* @file mali_kbase_config.h
* Configuration API and Attributes for KBase
*/
#ifndef _KBASE_CONFIG_H_
#define _KBASE_CONFIG_H_
#include <asm/page.h>
#include <mali_malisw.h>
#include <mali_kbase_backend_config.h>
/**
* @addtogroup base_api
* @{
*/
/**
* @addtogroup base_kbase_api
* @{
*/
/**
* @addtogroup kbase_config Configuration API and Attributes
* @{
*/
#include <linux/rbtree.h>
/* Forward declaration of struct kbase_device */
struct kbase_device;
/**
* kbase_platform_funcs_conf - Specifies platform init/term function pointers
*
* Specifies the functions pointers for platform specific initialization and
* termination. By default no functions are required. No additional platform
* specific control is necessary.
*/
struct kbase_platform_funcs_conf {
/**
* platform_init_func - platform specific init function pointer
* @kbdev - kbase_device pointer
*
* Returns 0 on success, negative error code otherwise.
*
* Function pointer for platform specific initialization or NULL if no
* initialization function is required. At the point this the GPU is
* not active and its power and clocks are in unknown (platform specific
* state) as kbase doesn't yet have control of power and clocks.
*
* The platform specific private pointer kbase_device::platform_context
* can be accessed (and possibly initialized) in here.
*/
int (*platform_init_func)(struct kbase_device *kbdev);
/**
* platform_term_func - platform specific termination function pointer
* @kbdev - kbase_device pointer
*
* Function pointer for platform specific termination or NULL if no
* termination function is required. At the point this the GPU will be
* idle but still powered and clocked.
*
* The platform specific private pointer kbase_device::platform_context
* can be accessed (and possibly terminated) in here.
*/
void (*platform_term_func)(struct kbase_device *kbdev);
};
/*
* @brief Specifies the callbacks for power management
*
* By default no callbacks will be made and the GPU must not be powered off.
*/
struct kbase_pm_callback_conf {
/** Callback for when the GPU is idle and the power to it can be switched off.
*
* The system integrator can decide whether to either do nothing, just switch off
* the clocks to the GPU, or to completely power down the GPU.
* The platform specific private pointer kbase_device::platform_context can be accessed and modified in here. It is the
* platform \em callbacks responsibility to initialize and terminate this pointer if used (see @ref kbase_platform_funcs_conf).
*/
void (*power_off_callback)(struct kbase_device *kbdev);
/** Callback for when the GPU is about to become active and power must be supplied.
*
* This function must not return until the GPU is powered and clocked sufficiently for register access to
* succeed. The return value specifies whether the GPU was powered down since the call to power_off_callback.
* If the GPU state has been lost then this function must return 1, otherwise it should return 0.
* The platform specific private pointer kbase_device::platform_context can be accessed and modified in here. It is the
* platform \em callbacks responsibility to initialize and terminate this pointer if used (see @ref kbase_platform_funcs_conf).
*
* The return value of the first call to this function is ignored.
*
* @return 1 if the GPU state may have been lost, 0 otherwise.
*/
int (*power_on_callback)(struct kbase_device *kbdev);
/** Callback for when the system is requesting a suspend and GPU power
* must be switched off.
*
* Note that if this callback is present, then this may be called
* without a preceding call to power_off_callback. Therefore this
* callback must be able to take any action that might otherwise happen
* in power_off_callback.
*
* The platform specific private pointer kbase_device::platform_context
* can be accessed and modified in here. It is the platform \em
* callbacks responsibility to initialize and terminate this pointer if
* used (see @ref kbase_platform_funcs_conf).
*/
void (*power_suspend_callback)(struct kbase_device *kbdev);
/** Callback for when the system is resuming from a suspend and GPU
* power must be switched on.
*
* Note that if this callback is present, then this may be called
* without a following call to power_on_callback. Therefore this
* callback must be able to take any action that might otherwise happen
* in power_on_callback.
*
* The platform specific private pointer kbase_device::platform_context
* can be accessed and modified in here. It is the platform \em
* callbacks responsibility to initialize and terminate this pointer if
* used (see @ref kbase_platform_funcs_conf).
*/
void (*power_resume_callback)(struct kbase_device *kbdev);
/** Callback for handling runtime power management initialization.
*
* The runtime power management callbacks @ref power_runtime_off_callback and @ref power_runtime_on_callback
* will become active from calls made to the OS from within this function.
* The runtime calls can be triggered by calls from @ref power_off_callback and @ref power_on_callback.
* Note: for linux the kernel must have CONFIG_PM_RUNTIME enabled to use this feature.
*
* @return 0 on success, else int error code.
*/
int (*power_runtime_init_callback)(struct kbase_device *kbdev);
/** Callback for handling runtime power management termination.
*
* The runtime power management callbacks @ref power_runtime_off_callback and @ref power_runtime_on_callback
* should no longer be called by the OS on completion of this function.
* Note: for linux the kernel must have CONFIG_PM_RUNTIME enabled to use this feature.
*/
void (*power_runtime_term_callback)(struct kbase_device *kbdev);
/** Callback for runtime power-off power management callback
*
* For linux this callback will be called by the kernel runtime_suspend callback.
* Note: for linux the kernel must have CONFIG_PM_RUNTIME enabled to use this feature.
*
* @return 0 on success, else OS error code.
*/
void (*power_runtime_off_callback)(struct kbase_device *kbdev);
/** Callback for runtime power-on power management callback
*
* For linux this callback will be called by the kernel runtime_resume callback.
* Note: for linux the kernel must have CONFIG_PM_RUNTIME enabled to use this feature.
*/
int (*power_runtime_on_callback)(struct kbase_device *kbdev);
/*
* Optional callback for checking if GPU can be suspended when idle
*
* This callback will be called by the runtime power management core
* when the reference count goes to 0 to provide notification that the
* GPU now seems idle.
*
* If this callback finds that the GPU can't be powered off, or handles
* suspend by powering off directly or queueing up a power off, a
* non-zero value must be returned to prevent the runtime PM core from
* also triggering a suspend.
*
* Returning 0 will cause the runtime PM core to conduct a regular
* autosuspend.
*
* This callback is optional and if not provided regular autosuspend
* will be triggered.
*
* Note: The Linux kernel must have CONFIG_PM_RUNTIME enabled to use
* this feature.
*
* Return 0 if GPU can be suspended, positive value if it can not be
* suspeneded by runtime PM, else OS error code
*/
int (*power_runtime_idle_callback)(struct kbase_device *kbdev);
};
/**
* kbase_cpuprops_get_default_clock_speed - default for CPU_SPEED_FUNC
* @clock_speed - see kbase_cpu_clk_speed_func for details on the parameters
*
* Returns 0 on success, negative error code otherwise.
*
* Default implementation of CPU_SPEED_FUNC. This function sets clock_speed
* to 100, so will be an underestimate for any real system.
*/
int kbase_cpuprops_get_default_clock_speed(u32 * const clock_speed);
/**
* kbase_cpu_clk_speed_func - Type of the function pointer for CPU_SPEED_FUNC
* @param clock_speed - pointer to store the current CPU clock speed in MHz
*
* Returns 0 on success, otherwise negative error code.
*
* This is mainly used to implement OpenCL's clGetDeviceInfo().
*/
typedef int (*kbase_cpu_clk_speed_func) (u32 *clock_speed);
/**
* kbase_gpu_clk_speed_func - Type of the function pointer for GPU_SPEED_FUNC
* @param clock_speed - pointer to store the current GPU clock speed in MHz
*
* Returns 0 on success, otherwise negative error code.
* When an error is returned the caller assumes maximum GPU speed stored in
* gpu_freq_khz_max.
*
* If the system timer is not available then this function is required
* for the OpenCL queue profiling to return correct timing information.
*
*/
typedef int (*kbase_gpu_clk_speed_func) (u32 *clock_speed);
#ifdef CONFIG_OF
struct kbase_platform_config {
};
#else
/*
* @brief Specifies start and end of I/O memory region.
*/
struct kbase_io_memory_region {
u64 start;
u64 end;
};
/*
* @brief Specifies I/O related resources like IRQs and memory region for I/O operations.
*/
struct kbase_io_resources {
u32 job_irq_number;
u32 mmu_irq_number;
u32 gpu_irq_number;
struct kbase_io_memory_region io_memory_region;
};
struct kbase_platform_config {
const struct kbase_io_resources *io_resources;
};
#endif /* CONFIG_OF */
/**
* @brief Gets the pointer to platform config.
*
* @return Pointer to the platform config
*/
struct kbase_platform_config *kbase_get_platform_config(void);
/**
* kbasep_platform_device_init: - Platform specific call to initialize hardware
* @kbdev: kbase device pointer
*
* Function calls a platform defined routine if specified in the configuration
* attributes. The routine can initialize any hardware and context state that
* is required for the GPU block to function.
*
* Return: 0 if no errors have been found in the config.
* Negative error code otherwise.
*/
int kbasep_platform_device_init(struct kbase_device *kbdev);
/**
* kbasep_platform_device_term - Platform specific call to terminate hardware
* @kbdev: Kbase device pointer
*
* Function calls a platform defined routine if specified in the configuration
* attributes. The routine can destroy any platform specific context state and
* shut down any hardware functionality that are outside of the Power Management
* callbacks.
*
*/
void kbasep_platform_device_term(struct kbase_device *kbdev);
/**
* kbase_platform_early_init - Early initialisation of the platform code
*
* This function will be called when the module is loaded to perform any
* early initialisation required by the platform code. Such as reading
* platform specific device tree entries for the GPU.
*
* Return: 0 for success, any other fail causes module initialisation to fail
*/
int kbase_platform_early_init(void);
#ifndef CONFIG_OF
#ifdef CONFIG_MALI_PLATFORM_FAKE
/**
* kbase_platform_fake_register - Register a platform device for the GPU
*
* This can be used to register a platform device on systems where device tree
* is not enabled and the platform initialisation code in the kernel doesn't
* create the GPU device. Where possible device tree should be used instead.
*
* Return: 0 for success, any other fail causes module initialisation to fail
*/
int kbase_platform_fake_register(void);
/**
* kbase_platform_fake_unregister - Unregister a fake platform device
*
* Unregister the platform device created with kbase_platform_fake_register()
*/
void kbase_platform_fake_unregister(void);
#endif
#endif
/** @} *//* end group kbase_config */
/** @} *//* end group base_kbase_api */
/** @} *//* end group base_api */
#endif /* _KBASE_CONFIG_H_ */

View File

@ -0,0 +1,261 @@
/*
*
* (C) COPYRIGHT 2013-2016 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
* Foundation, and any use by you of this program is subject to the terms
* of such GNU licence.
*
* A copy of the licence is included with the program, and can also be obtained
* from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
* Boston, MA 02110-1301, USA.
*
*/
/**
* @file mali_kbase_config_defaults.h
*
* Default values for configuration settings
*
*/
#ifndef _KBASE_CONFIG_DEFAULTS_H_
#define _KBASE_CONFIG_DEFAULTS_H_
/* Include mandatory definitions per platform */
#include <mali_kbase_config_platform.h>
/**
* Irq throttle. It is the minimum desired time in between two
* consecutive gpu interrupts (given in 'us'). The irq throttle
* gpu register will be configured after this, taking into
* account the configured max frequency.
*
* Attached value: number in micro seconds
*/
#define DEFAULT_IRQ_THROTTLE_TIME_US 20
/**
* Default Job Scheduler initial runtime of a context for the CFS Policy,
* in time-slices.
*
* This value is relative to that of the least-run context, and defines
* where in the CFS queue a new context is added. A value of 1 means 'after
* the least-run context has used its timeslice'. Therefore, when all
* contexts consistently use the same amount of time, a value of 1 models a
* FIFO. A value of 0 would model a LIFO.
*
* The value is represented in "numbers of time slices". Multiply this
* value by that defined in @ref DEFAULT_JS_CTX_TIMESLICE_NS to get
* the time value for this in nanoseconds.
*/
#define DEFAULT_JS_CFS_CTX_RUNTIME_INIT_SLICES 1
/**
* Default Job Scheduler minimum runtime value of a context for CFS, in
* time_slices relative to that of the least-run context.
*
* This is a measure of how much preferrential treatment is given to a
* context that is not run very often.
*
* Specficially, this value defines how many timeslices such a context is
* (initially) allowed to use at once. Such contexts (e.g. 'interactive'
* processes) will appear near the front of the CFS queue, and can initially
* use more time than contexts that run continuously (e.g. 'batch'
* processes).
*
* This limit \b prevents a "stored-up timeslices" DoS attack, where a ctx
* not run for a long time attacks the system by using a very large initial
* number of timeslices when it finally does run.
*
* @note A value of zero allows not-run-often contexts to get scheduled in
* quickly, but to only use a single timeslice when they get scheduled in.
*/
#define DEFAULT_JS_CFS_CTX_RUNTIME_MIN_SLICES 2
/**
* Boolean indicating whether the driver is configured to be secure at
* a potential loss of performance.
*
* This currently affects only r0p0-15dev0 HW and earlier.
*
* On r0p0-15dev0 HW and earlier, there are tradeoffs between security and
* performance:
*
* - When this is set to true, the driver remains fully secure,
* but potentially loses performance compared with setting this to
* false.
* - When set to false, the driver is open to certain security
* attacks.
*
* From r0p0-00rel0 and onwards, there is no security loss by setting
* this to false, and no performance loss by setting it to
* true.
*/
#define DEFAULT_SECURE_BUT_LOSS_OF_PERFORMANCE false
enum {
/**
* Use unrestricted Address ID width on the AXI bus.
*/
KBASE_AID_32 = 0x0,
/**
* Restrict GPU to a half of maximum Address ID count.
* This will reduce performance, but reduce bus load due to GPU.
*/
KBASE_AID_16 = 0x3,
/**
* Restrict GPU to a quarter of maximum Address ID count.
* This will reduce performance, but reduce bus load due to GPU.
*/
KBASE_AID_8 = 0x2,
/**
* Restrict GPU to an eighth of maximum Address ID count.
* This will reduce performance, but reduce bus load due to GPU.
*/
KBASE_AID_4 = 0x1
};
/**
* Default setting for read Address ID limiting on AXI bus.
*
* Attached value: u32 register value
* KBASE_AID_32 - use the full 32 IDs (5 ID bits)
* KBASE_AID_16 - use 16 IDs (4 ID bits)
* KBASE_AID_8 - use 8 IDs (3 ID bits)
* KBASE_AID_4 - use 4 IDs (2 ID bits)
* Default value: KBASE_AID_32 (no limit). Note hardware implementation
* may limit to a lower value.
*/
#define DEFAULT_ARID_LIMIT KBASE_AID_32
/**
* Default setting for write Address ID limiting on AXI.
*
* Attached value: u32 register value
* KBASE_AID_32 - use the full 32 IDs (5 ID bits)
* KBASE_AID_16 - use 16 IDs (4 ID bits)
* KBASE_AID_8 - use 8 IDs (3 ID bits)
* KBASE_AID_4 - use 4 IDs (2 ID bits)
* Default value: KBASE_AID_32 (no limit). Note hardware implementation
* may limit to a lower value.
*/
#define DEFAULT_AWID_LIMIT KBASE_AID_32
/**
* Default UMP device mapping. A UMP_DEVICE_<device>_SHIFT value which
* defines which UMP device this GPU should be mapped to.
*/
#define DEFAULT_UMP_GPU_DEVICE_SHIFT UMP_DEVICE_Z_SHIFT
/*
* Default period for DVFS sampling
*/
// #define DEFAULT_PM_DVFS_PERIOD 100 /* 100ms */
#define DEFAULT_PM_DVFS_PERIOD 20 /* 20 ms */
/*
* Power Management poweroff tick granuality. This is in nanoseconds to
* allow HR timer support.
*
* On each scheduling tick, the power manager core may decide to:
* -# Power off one or more shader cores
* -# Power off the entire GPU
*/
#define DEFAULT_PM_GPU_POWEROFF_TICK_NS (400000) /* 400us */
/*
* Power Manager number of ticks before shader cores are powered off
*/
#define DEFAULT_PM_POWEROFF_TICK_SHADER (2) /* 400-800us */
/*
* Power Manager number of ticks before GPU is powered off
*/
#define DEFAULT_PM_POWEROFF_TICK_GPU (2) /* 400-800us */
/*
* Default scheduling tick granuality
*/
#define DEFAULT_JS_SCHEDULING_PERIOD_NS (100000000u) /* 100ms */
/*
* Default minimum number of scheduling ticks before jobs are soft-stopped.
*
* This defines the time-slice for a job (which may be different from that of a
* context)
*/
#define DEFAULT_JS_SOFT_STOP_TICKS (1) /* 100ms-200ms */
/*
* Default minimum number of scheduling ticks before CL jobs are soft-stopped.
*/
#define DEFAULT_JS_SOFT_STOP_TICKS_CL (1) /* 100ms-200ms */
/*
* Default minimum number of scheduling ticks before jobs are hard-stopped
*/
#define DEFAULT_JS_HARD_STOP_TICKS_SS (100) /* 10s */
#define DEFAULT_JS_HARD_STOP_TICKS_SS_8408 (300) /* 30s */
/*
* Default minimum number of scheduling ticks before CL jobs are hard-stopped.
*/
#define DEFAULT_JS_HARD_STOP_TICKS_CL (100) /* 10s */
/*
* Default minimum number of scheduling ticks before jobs are hard-stopped
* during dumping
*/
#define DEFAULT_JS_HARD_STOP_TICKS_DUMPING (15000) /* 1500s */
/*
* Default timeout for some software jobs, after which the software event wait
* jobs will be cancelled.
*/
#define DEFAULT_JS_SOFT_JOB_TIMEOUT ((u32)3000) /* 3s */
/*
* Default minimum number of scheduling ticks before the GPU is reset to clear a
* "stuck" job
*/
#define DEFAULT_JS_RESET_TICKS_SS (105) /* 10.5s */
#define DEFAULT_JS_RESET_TICKS_SS_8408 (450) /* 45s */
/*
* Default minimum number of scheduling ticks before the GPU is reset to clear a
* "stuck" CL job.
*/
#define DEFAULT_JS_RESET_TICKS_CL (105) /* 10.5s */
/*
* Default minimum number of scheduling ticks before the GPU is reset to clear a
* "stuck" job during dumping.
*/
#define DEFAULT_JS_RESET_TICKS_DUMPING (15020) /* 1502s */
/*
* Default number of milliseconds given for other jobs on the GPU to be
* soft-stopped when the GPU needs to be reset.
*/
#define DEFAULT_RESET_TIMEOUT_MS (3000) /* 3s */
/*
* Default timeslice that a context is scheduled in for, in nanoseconds.
*
* When a context has used up this amount of time across its jobs, it is
* scheduled out to let another run.
*
* @note the resolution is nanoseconds (ns) here, because that's the format
* often used by the OS.
*/
#define DEFAULT_JS_CTX_TIMESLICE_NS (50000000) /* 50ms */
#endif /* _KBASE_CONFIG_DEFAULTS_H_ */

View File

@ -0,0 +1,321 @@
/*
*
* (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
* Foundation, and any use by you of this program is subject to the terms
* of such GNU licence.
*
* A copy of the licence is included with the program, and can also be obtained
* from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
* Boston, MA 02110-1301, USA.
*
*/
/*
* Base kernel context APIs
*/
#include <mali_kbase.h>
#include <mali_midg_regmap.h>
#include <mali_kbase_mem_linux.h>
/**
* kbase_create_context() - Create a kernel base context.
* @kbdev: Kbase device
* @is_compat: Force creation of a 32-bit context
*
* Allocate and init a kernel base context.
*
* Return: new kbase context
*/
struct kbase_context *
kbase_create_context(struct kbase_device *kbdev, bool is_compat)
{
struct kbase_context *kctx;
int err;
KBASE_DEBUG_ASSERT(kbdev != NULL);
/* zero-inited as lot of code assume it's zero'ed out on create */
kctx = vzalloc(sizeof(*kctx));
if (!kctx)
goto out;
/* creating a context is considered a disjoint event */
kbase_disjoint_event(kbdev);
kctx->kbdev = kbdev;
kctx->as_nr = KBASEP_AS_NR_INVALID;
if (is_compat)
kbase_ctx_flag_set(kctx, KCTX_COMPAT);
#ifdef CONFIG_MALI_TRACE_TIMELINE
kctx->timeline.owner_tgid = task_tgid_nr(current);
#endif
atomic_set(&kctx->setup_complete, 0);
atomic_set(&kctx->setup_in_progress, 0);
spin_lock_init(&kctx->mm_update_lock);
kctx->process_mm = NULL;
atomic_set(&kctx->nonmapped_pages, 0);
kctx->slots_pullable = 0;
kctx->tgid = current->tgid;
kctx->pid = current->pid;
err = kbase_mem_pool_init(&kctx->mem_pool,
kbdev->mem_pool_max_size_default,
kctx->kbdev, &kbdev->mem_pool);
if (err)
goto free_kctx;
err = kbase_mem_evictable_init(kctx);
if (err)
goto free_pool;
atomic_set(&kctx->used_pages, 0);
err = kbase_jd_init(kctx);
if (err)
goto deinit_evictable;
err = kbasep_js_kctx_init(kctx);
if (err)
goto free_jd; /* safe to call kbasep_js_kctx_term in this case */
err = kbase_event_init(kctx);
if (err)
goto free_jd;
atomic_set(&kctx->drain_pending, 0);
mutex_init(&kctx->reg_lock);
INIT_LIST_HEAD(&kctx->waiting_soft_jobs);
spin_lock_init(&kctx->waiting_soft_jobs_lock);
#ifdef CONFIG_KDS
INIT_LIST_HEAD(&kctx->waiting_kds_resource);
#endif
err = kbase_dma_fence_init(kctx);
if (err)
goto free_event;
err = kbase_mmu_init(kctx);
if (err)
goto term_dma_fence;
do {
err = kbase_mem_pool_grow(&kctx->mem_pool,
MIDGARD_MMU_BOTTOMLEVEL);
if (err)
goto pgd_no_mem;
kctx->pgd = kbase_mmu_alloc_pgd(kctx);
} while (!kctx->pgd);
kctx->aliasing_sink_page = kbase_mem_alloc_page(kctx->kbdev);
if (!kctx->aliasing_sink_page)
goto no_sink_page;
init_waitqueue_head(&kctx->event_queue);
kctx->cookies = KBASE_COOKIE_MASK;
/* Make sure page 0 is not used... */
err = kbase_region_tracker_init(kctx);
if (err)
goto no_region_tracker;
err = kbase_sticky_resource_init(kctx);
if (err)
goto no_sticky;
err = kbase_jit_init(kctx);
if (err)
goto no_jit;
#ifdef CONFIG_GPU_TRACEPOINTS
atomic_set(&kctx->jctx.work_id, 0);
#endif
#ifdef CONFIG_MALI_TRACE_TIMELINE
atomic_set(&kctx->timeline.jd_atoms_in_flight, 0);
#endif
kctx->id = atomic_add_return(1, &(kbdev->ctx_num)) - 1;
mutex_init(&kctx->vinstr_cli_lock);
setup_timer(&kctx->soft_job_timeout,
kbasep_soft_job_timeout_worker,
(uintptr_t)kctx);
return kctx;
no_jit:
kbase_gpu_vm_lock(kctx);
kbase_sticky_resource_term(kctx);
kbase_gpu_vm_unlock(kctx);
no_sticky:
kbase_region_tracker_term(kctx);
no_region_tracker:
kbase_mem_pool_free(&kctx->mem_pool, kctx->aliasing_sink_page, false);
no_sink_page:
/* VM lock needed for the call to kbase_mmu_free_pgd */
kbase_gpu_vm_lock(kctx);
kbase_mmu_free_pgd(kctx);
kbase_gpu_vm_unlock(kctx);
pgd_no_mem:
kbase_mmu_term(kctx);
term_dma_fence:
kbase_dma_fence_term(kctx);
free_event:
kbase_event_cleanup(kctx);
free_jd:
/* Safe to call this one even when didn't initialize (assuming kctx was sufficiently zeroed) */
kbasep_js_kctx_term(kctx);
kbase_jd_exit(kctx);
deinit_evictable:
kbase_mem_evictable_deinit(kctx);
free_pool:
kbase_mem_pool_term(&kctx->mem_pool);
free_kctx:
vfree(kctx);
out:
return NULL;
}
KBASE_EXPORT_SYMBOL(kbase_create_context);
static void kbase_reg_pending_dtor(struct kbase_va_region *reg)
{
dev_dbg(reg->kctx->kbdev->dev, "Freeing pending unmapped region\n");
kbase_mem_phy_alloc_put(reg->cpu_alloc);
kbase_mem_phy_alloc_put(reg->gpu_alloc);
kfree(reg);
}
/**
* kbase_destroy_context - Destroy a kernel base context.
* @kctx: Context to destroy
*
* Calls kbase_destroy_os_context() to free OS specific structures.
* Will release all outstanding regions.
*/
void kbase_destroy_context(struct kbase_context *kctx)
{
struct kbase_device *kbdev;
int pages;
unsigned long pending_regions_to_clean;
KBASE_DEBUG_ASSERT(NULL != kctx);
kbdev = kctx->kbdev;
KBASE_DEBUG_ASSERT(NULL != kbdev);
KBASE_TRACE_ADD(kbdev, CORE_CTX_DESTROY, kctx, NULL, 0u, 0u);
/* Ensure the core is powered up for the destroy process */
/* A suspend won't happen here, because we're in a syscall from a userspace
* thread. */
kbase_pm_context_active(kbdev);
kbase_jd_zap_context(kctx);
kbase_event_cleanup(kctx);
/*
* JIT must be terminated before the code below as it must be called
* without the region lock being held.
* The code above ensures no new JIT allocations can be made by
* by the time we get to this point of context tear down.
*/
kbase_jit_term(kctx);
kbase_gpu_vm_lock(kctx);
kbase_sticky_resource_term(kctx);
/* MMU is disabled as part of scheduling out the context */
kbase_mmu_free_pgd(kctx);
/* drop the aliasing sink page now that it can't be mapped anymore */
kbase_mem_pool_free(&kctx->mem_pool, kctx->aliasing_sink_page, false);
/* free pending region setups */
pending_regions_to_clean = (~kctx->cookies) & KBASE_COOKIE_MASK;
while (pending_regions_to_clean) {
unsigned int cookie = __ffs(pending_regions_to_clean);
BUG_ON(!kctx->pending_regions[cookie]);
kbase_reg_pending_dtor(kctx->pending_regions[cookie]);
kctx->pending_regions[cookie] = NULL;
pending_regions_to_clean &= ~(1UL << cookie);
}
kbase_region_tracker_term(kctx);
kbase_gpu_vm_unlock(kctx);
/* Safe to call this one even when didn't initialize (assuming kctx was sufficiently zeroed) */
kbasep_js_kctx_term(kctx);
kbase_jd_exit(kctx);
kbase_pm_context_idle(kbdev);
kbase_dma_fence_term(kctx);
kbase_mmu_term(kctx);
pages = atomic_read(&kctx->used_pages);
if (pages != 0)
dev_warn(kbdev->dev, "%s: %d pages in use!\n", __func__, pages);
kbase_mem_evictable_deinit(kctx);
kbase_mem_pool_term(&kctx->mem_pool);
WARN_ON(atomic_read(&kctx->nonmapped_pages) != 0);
vfree(kctx);
}
KBASE_EXPORT_SYMBOL(kbase_destroy_context);
/**
* kbase_context_set_create_flags - Set creation flags on a context
* @kctx: Kbase context
* @flags: Flags to set
*
* Return: 0 on success
*/
int kbase_context_set_create_flags(struct kbase_context *kctx, u32 flags)
{
int err = 0;
struct kbasep_js_kctx_info *js_kctx_info;
unsigned long irq_flags;
KBASE_DEBUG_ASSERT(NULL != kctx);
js_kctx_info = &kctx->jctx.sched_info;
/* Validate flags */
if (flags != (flags & BASE_CONTEXT_CREATE_KERNEL_FLAGS)) {
err = -EINVAL;
goto out;
}
mutex_lock(&js_kctx_info->ctx.jsctx_mutex);
spin_lock_irqsave(&kctx->kbdev->hwaccess_lock, irq_flags);
/* Translate the flags */
if ((flags & BASE_CONTEXT_SYSTEM_MONITOR_SUBMIT_DISABLED) == 0)
kbase_ctx_flag_clear(kctx, KCTX_SUBMIT_DISABLED);
/* Latch the initial attributes into the Job Scheduler */
kbasep_js_ctx_attr_set_initial_attrs(kctx->kbdev, kctx);
spin_unlock_irqrestore(&kctx->kbdev->hwaccess_lock, irq_flags);
mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
out:
return err;
}
KBASE_EXPORT_SYMBOL(kbase_context_set_create_flags);

View File

@ -0,0 +1,90 @@
/*
*
* (C) COPYRIGHT 2011-2016 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
* Foundation, and any use by you of this program is subject to the terms
* of such GNU licence.
*
* A copy of the licence is included with the program, and can also be obtained
* from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
* Boston, MA 02110-1301, USA.
*
*/
#ifndef _KBASE_CONTEXT_H_
#define _KBASE_CONTEXT_H_
#include <linux/atomic.h>
int kbase_context_set_create_flags(struct kbase_context *kctx, u32 flags);
/**
* kbase_ctx_flag - Check if @flag is set on @kctx
* @kctx: Pointer to kbase context to check
* @flag: Flag to check
*
* Return: true if @flag is set on @kctx, false if not.
*/
static inline bool kbase_ctx_flag(struct kbase_context *kctx,
enum kbase_context_flags flag)
{
return atomic_read(&kctx->flags) & flag;
}
/**
* kbase_ctx_flag_clear - Clear @flag on @kctx
* @kctx: Pointer to kbase context
* @flag: Flag to clear
*
* Clear the @flag on @kctx. This is done atomically, so other flags being
* cleared or set at the same time will be safe.
*
* Some flags have locking requirements, check the documentation for the
* respective flags.
*/
static inline void kbase_ctx_flag_clear(struct kbase_context *kctx,
enum kbase_context_flags flag)
{
#if KERNEL_VERSION(4, 3, 0) > LINUX_VERSION_CODE
/*
* Earlier kernel versions doesn't have atomic_andnot() or
* atomic_and(). atomic_clear_mask() was only available on some
* architectures and removed on arm in v3.13 on arm and arm64.
*
* Use a compare-exchange loop to clear the flag on pre 4.3 kernels,
* when atomic_andnot() becomes available.
*/
int old, new;
do {
old = atomic_read(&kctx->flags);
new = old & ~flag;
} while (atomic_cmpxchg(&kctx->flags, old, new) != old);
#else
atomic_andnot(flag, &kctx->flags);
#endif
}
/**
* kbase_ctx_flag_set - Set @flag on @kctx
* @kctx: Pointer to kbase context
* @flag: Flag to clear
*
* Set the @flag on @kctx. This is done atomically, so other flags being
* cleared or set at the same time will be safe.
*
* Some flags have locking requirements, check the documentation for the
* respective flags.
*/
static inline void kbase_ctx_flag_set(struct kbase_context *kctx,
enum kbase_context_flags flag)
{
atomic_or(flag, &kctx->flags);
}
#endif /* _KBASE_CONTEXT_H_ */

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,39 @@
/*
*
* (C) COPYRIGHT 2012-2014 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
* Foundation, and any use by you of this program is subject to the terms
* of such GNU licence.
*
* A copy of the licence is included with the program, and can also be obtained
* from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
* Boston, MA 02110-1301, USA.
*
*/
#include <mali_kbase.h>
static struct kbasep_debug_assert_cb kbasep_debug_assert_registered_cb = {
NULL,
NULL
};
void kbase_debug_assert_register_hook(kbase_debug_assert_hook *func, void *param)
{
kbasep_debug_assert_registered_cb.func = func;
kbasep_debug_assert_registered_cb.param = param;
}
void kbasep_debug_assert_call_hook(void)
{
if (kbasep_debug_assert_registered_cb.func != NULL)
kbasep_debug_assert_registered_cb.func(kbasep_debug_assert_registered_cb.param);
}
KBASE_EXPORT_SYMBOL(kbasep_debug_assert_call_hook);

View File

@ -0,0 +1,164 @@
/*
*
* (C) COPYRIGHT 2012-2015 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
* Foundation, and any use by you of this program is subject to the terms
* of such GNU licence.
*
* A copy of the licence is included with the program, and can also be obtained
* from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
* Boston, MA 02110-1301, USA.
*
*/
#ifndef _KBASE_DEBUG_H
#define _KBASE_DEBUG_H
#include <linux/bug.h>
/** @brief If equals to 0, a trace containing the file, line, and function will be displayed before each message. */
#define KBASE_DEBUG_SKIP_TRACE 0
/** @brief If different from 0, the trace will only contain the file and line. */
#define KBASE_DEBUG_SKIP_FUNCTION_NAME 0
/** @brief Disable the asserts tests if set to 1. Default is to disable the asserts in release. */
#ifndef KBASE_DEBUG_DISABLE_ASSERTS
#ifdef CONFIG_MALI_DEBUG
#define KBASE_DEBUG_DISABLE_ASSERTS 0
#else
#define KBASE_DEBUG_DISABLE_ASSERTS 1
#endif
#endif /* KBASE_DEBUG_DISABLE_ASSERTS */
/** Function type that is called on an KBASE_DEBUG_ASSERT() or KBASE_DEBUG_ASSERT_MSG() */
typedef void (kbase_debug_assert_hook) (void *);
struct kbasep_debug_assert_cb {
kbase_debug_assert_hook *func;
void *param;
};
/**
* @def KBASEP_DEBUG_PRINT_TRACE
* @brief Private macro containing the format of the trace to display before every message
* @sa KBASE_DEBUG_SKIP_TRACE, KBASE_DEBUG_SKIP_FUNCTION_NAME
*/
#if !KBASE_DEBUG_SKIP_TRACE
#define KBASEP_DEBUG_PRINT_TRACE \
"In file: " __FILE__ " line: " CSTD_STR2(__LINE__)
#if !KBASE_DEBUG_SKIP_FUNCTION_NAME
#define KBASEP_DEBUG_PRINT_FUNCTION __func__
#else
#define KBASEP_DEBUG_PRINT_FUNCTION ""
#endif
#else
#define KBASEP_DEBUG_PRINT_TRACE ""
#endif
/**
* @def KBASEP_DEBUG_ASSERT_OUT(trace, function, ...)
* @brief (Private) system printing function associated to the @see KBASE_DEBUG_ASSERT_MSG event.
* @param trace location in the code from where the message is printed
* @param function function from where the message is printed
* @param ... Format string followed by format arguments.
* @note function parameter cannot be concatenated with other strings
*/
/* Select the correct system output function*/
#ifdef CONFIG_MALI_DEBUG
#define KBASEP_DEBUG_ASSERT_OUT(trace, function, ...)\
do { \
pr_err("Mali<ASSERT>: %s function:%s ", trace, function);\
pr_err(__VA_ARGS__);\
pr_err("\n");\
} while (false)
#else
#define KBASEP_DEBUG_ASSERT_OUT(trace, function, ...) CSTD_NOP()
#endif
#ifdef CONFIG_MALI_DEBUG
#define KBASE_CALL_ASSERT_HOOK() kbasep_debug_assert_call_hook()
#else
#define KBASE_CALL_ASSERT_HOOK() CSTD_NOP()
#endif
/**
* @def KBASE_DEBUG_ASSERT(expr)
* @brief Calls @see KBASE_PRINT_ASSERT and prints the expression @a expr if @a expr is false
*
* @note This macro does nothing if the flag @see KBASE_DEBUG_DISABLE_ASSERTS is set to 1
*
* @param expr Boolean expression
*/
#define KBASE_DEBUG_ASSERT(expr) \
KBASE_DEBUG_ASSERT_MSG(expr, #expr)
#if KBASE_DEBUG_DISABLE_ASSERTS
#define KBASE_DEBUG_ASSERT_MSG(expr, ...) CSTD_NOP()
#else
/**
* @def KBASE_DEBUG_ASSERT_MSG(expr, ...)
* @brief Calls @see KBASEP_DEBUG_ASSERT_OUT and prints the given message if @a expr is false
*
* @note This macro does nothing if the flag @see KBASE_DEBUG_DISABLE_ASSERTS is set to 1
*
* @param expr Boolean expression
* @param ... Message to display when @a expr is false, as a format string followed by format arguments.
*/
#define KBASE_DEBUG_ASSERT_MSG(expr, ...) \
do { \
if (!(expr)) { \
KBASEP_DEBUG_ASSERT_OUT(KBASEP_DEBUG_PRINT_TRACE, KBASEP_DEBUG_PRINT_FUNCTION, __VA_ARGS__);\
KBASE_CALL_ASSERT_HOOK();\
BUG();\
} \
} while (false)
#endif /* KBASE_DEBUG_DISABLE_ASSERTS */
/**
* @def KBASE_DEBUG_CODE( X )
* @brief Executes the code inside the macro only in debug mode
*
* @param X Code to compile only in debug mode.
*/
#ifdef CONFIG_MALI_DEBUG
#define KBASE_DEBUG_CODE(X) X
#else
#define KBASE_DEBUG_CODE(X) CSTD_NOP()
#endif /* CONFIG_MALI_DEBUG */
/** @} */
/**
* @brief Register a function to call on ASSERT
*
* Such functions will \b only be called during Debug mode, and for debugging
* features \b only. Do not rely on them to be called in general use.
*
* To disable the hook, supply NULL to \a func.
*
* @note This function is not thread-safe, and should only be used to
* register/deregister once in the module's lifetime.
*
* @param[in] func the function to call when an assert is triggered.
* @param[in] param the parameter to pass to \a func when calling it
*/
void kbase_debug_assert_register_hook(kbase_debug_assert_hook *func, void *param);
/**
* @brief Call a debug assert hook previously registered with kbase_debug_assert_register_hook()
*
* @note This function is not thread-safe with respect to multiple threads
* registering functions and parameters with
* kbase_debug_assert_register_hook(). Otherwise, thread safety is the
* responsibility of the registered hook.
*/
void kbasep_debug_assert_call_hook(void);
#endif /* _KBASE_DEBUG_H */

View File

@ -0,0 +1,502 @@
/*
*
* (C) COPYRIGHT 2012-2016 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
* Foundation, and any use by you of this program is subject to the terms
* of such GNU licence.
*
* A copy of the licence is included with the program, and can also be obtained
* from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
* Boston, MA 02110-1301, USA.
*
*/
#include <mali_kbase.h>
#include <linux/spinlock.h>
#ifdef CONFIG_DEBUG_FS
static bool kbase_is_job_fault_event_pending(struct kbase_device *kbdev)
{
struct list_head *event_list = &kbdev->job_fault_event_list;
unsigned long flags;
bool ret;
spin_lock_irqsave(&kbdev->job_fault_event_lock, flags);
ret = !list_empty(event_list);
spin_unlock_irqrestore(&kbdev->job_fault_event_lock, flags);
return ret;
}
static bool kbase_ctx_has_no_event_pending(struct kbase_context *kctx)
{
struct kbase_device *kbdev = kctx->kbdev;
struct list_head *event_list = &kctx->kbdev->job_fault_event_list;
struct base_job_fault_event *event;
unsigned long flags;
spin_lock_irqsave(&kbdev->job_fault_event_lock, flags);
if (list_empty(event_list)) {
spin_unlock_irqrestore(&kbdev->job_fault_event_lock, flags);
return true;
}
list_for_each_entry(event, event_list, head) {
if (event->katom->kctx == kctx) {
spin_unlock_irqrestore(&kbdev->job_fault_event_lock,
flags);
return false;
}
}
spin_unlock_irqrestore(&kbdev->job_fault_event_lock, flags);
return true;
}
/* wait until the fault happen and copy the event */
static int kbase_job_fault_event_wait(struct kbase_device *kbdev,
struct base_job_fault_event *event)
{
struct list_head *event_list = &kbdev->job_fault_event_list;
struct base_job_fault_event *event_in;
unsigned long flags;
spin_lock_irqsave(&kbdev->job_fault_event_lock, flags);
if (list_empty(event_list)) {
spin_unlock_irqrestore(&kbdev->job_fault_event_lock, flags);
if (wait_event_interruptible(kbdev->job_fault_wq,
kbase_is_job_fault_event_pending(kbdev)))
return -ERESTARTSYS;
spin_lock_irqsave(&kbdev->job_fault_event_lock, flags);
}
event_in = list_entry(event_list->next,
struct base_job_fault_event, head);
event->event_code = event_in->event_code;
event->katom = event_in->katom;
spin_unlock_irqrestore(&kbdev->job_fault_event_lock, flags);
return 0;
}
/* remove the event from the queue */
static struct base_job_fault_event *kbase_job_fault_event_dequeue(
struct kbase_device *kbdev, struct list_head *event_list)
{
struct base_job_fault_event *event;
event = list_entry(event_list->next,
struct base_job_fault_event, head);
list_del(event_list->next);
return event;
}
/* Remove all the following atoms after the failed atom in the same context
* Call the postponed bottom half of job done.
* Then, this context could be rescheduled.
*/
static void kbase_job_fault_resume_event_cleanup(struct kbase_context *kctx)
{
struct list_head *event_list = &kctx->job_fault_resume_event_list;
while (!list_empty(event_list)) {
struct base_job_fault_event *event;
event = kbase_job_fault_event_dequeue(kctx->kbdev,
&kctx->job_fault_resume_event_list);
kbase_jd_done_worker(&event->katom->work);
}
}
/* Remove all the failed atoms that belong to different contexts
* Resume all the contexts that were suspend due to failed job
*/
static void kbase_job_fault_event_cleanup(struct kbase_device *kbdev)
{
struct list_head *event_list = &kbdev->job_fault_event_list;
unsigned long flags;
spin_lock_irqsave(&kbdev->job_fault_event_lock, flags);
while (!list_empty(event_list)) {
kbase_job_fault_event_dequeue(kbdev, event_list);
spin_unlock_irqrestore(&kbdev->job_fault_event_lock, flags);
wake_up(&kbdev->job_fault_resume_wq);
spin_lock_irqsave(&kbdev->job_fault_event_lock, flags);
}
spin_unlock_irqrestore(&kbdev->job_fault_event_lock, flags);
}
static void kbase_job_fault_resume_worker(struct work_struct *data)
{
struct base_job_fault_event *event = container_of(data,
struct base_job_fault_event, job_fault_work);
struct kbase_context *kctx;
struct kbase_jd_atom *katom;
katom = event->katom;
kctx = katom->kctx;
dev_info(kctx->kbdev->dev, "Job dumping wait\n");
/* When it was waked up, it need to check if queue is empty or the
* failed atom belongs to different context. If yes, wake up. Both
* of them mean the failed job has been dumped. Please note, it
* should never happen that the job_fault_event_list has the two
* atoms belong to the same context.
*/
wait_event(kctx->kbdev->job_fault_resume_wq,
kbase_ctx_has_no_event_pending(kctx));
atomic_set(&kctx->job_fault_count, 0);
kbase_jd_done_worker(&katom->work);
/* In case the following atoms were scheduled during failed job dump
* the job_done_worker was held. We need to rerun it after the dump
* was finished
*/
kbase_job_fault_resume_event_cleanup(kctx);
dev_info(kctx->kbdev->dev, "Job dumping finish, resume scheduler\n");
}
static struct base_job_fault_event *kbase_job_fault_event_queue(
struct list_head *event_list,
struct kbase_jd_atom *atom,
u32 completion_code)
{
struct base_job_fault_event *event;
event = &atom->fault_event;
event->katom = atom;
event->event_code = completion_code;
list_add_tail(&event->head, event_list);
return event;
}
static void kbase_job_fault_event_post(struct kbase_device *kbdev,
struct kbase_jd_atom *katom, u32 completion_code)
{
struct base_job_fault_event *event;
unsigned long flags;
spin_lock_irqsave(&kbdev->job_fault_event_lock, flags);
event = kbase_job_fault_event_queue(&kbdev->job_fault_event_list,
katom, completion_code);
spin_unlock_irqrestore(&kbdev->job_fault_event_lock, flags);
wake_up_interruptible(&kbdev->job_fault_wq);
INIT_WORK(&event->job_fault_work, kbase_job_fault_resume_worker);
queue_work(kbdev->job_fault_resume_workq, &event->job_fault_work);
dev_info(katom->kctx->kbdev->dev, "Job fault happen, start dump: %d_%d",
katom->kctx->tgid, katom->kctx->id);
}
/*
* This function will process the job fault
* Get the register copy
* Send the failed job dump event
* Create a Wait queue to wait until the job dump finish
*/
bool kbase_debug_job_fault_process(struct kbase_jd_atom *katom,
u32 completion_code)
{
struct kbase_context *kctx = katom->kctx;
/* Check if dumping is in the process
* only one atom of each context can be dumped at the same time
* If the atom belongs to different context, it can be dumped
*/
if (atomic_read(&kctx->job_fault_count) > 0) {
kbase_job_fault_event_queue(
&kctx->job_fault_resume_event_list,
katom, completion_code);
dev_info(kctx->kbdev->dev, "queue:%d\n",
kbase_jd_atom_id(kctx, katom));
return true;
}
if (kctx->kbdev->job_fault_debug == true) {
if (completion_code != BASE_JD_EVENT_DONE) {
if (kbase_job_fault_get_reg_snapshot(kctx) == false) {
dev_warn(kctx->kbdev->dev, "get reg dump failed\n");
return false;
}
kbase_job_fault_event_post(kctx->kbdev, katom,
completion_code);
atomic_inc(&kctx->job_fault_count);
dev_info(kctx->kbdev->dev, "post:%d\n",
kbase_jd_atom_id(kctx, katom));
return true;
}
}
return false;
}
static int debug_job_fault_show(struct seq_file *m, void *v)
{
struct kbase_device *kbdev = m->private;
struct base_job_fault_event *event = (struct base_job_fault_event *)v;
struct kbase_context *kctx = event->katom->kctx;
int i;
dev_info(kbdev->dev, "debug job fault seq show:%d_%d, %d",
kctx->tgid, kctx->id, event->reg_offset);
if (kctx->reg_dump == NULL) {
dev_warn(kbdev->dev, "reg dump is NULL");
return -1;
}
if (kctx->reg_dump[event->reg_offset] ==
REGISTER_DUMP_TERMINATION_FLAG) {
/* Return the error here to stop the read. And the
* following next() will not be called. The stop can
* get the real event resource and release it
*/
return -1;
}
if (event->reg_offset == 0)
seq_printf(m, "%d_%d\n", kctx->tgid, kctx->id);
for (i = 0; i < 50; i++) {
if (kctx->reg_dump[event->reg_offset] ==
REGISTER_DUMP_TERMINATION_FLAG) {
break;
}
seq_printf(m, "%08x: %08x\n",
kctx->reg_dump[event->reg_offset],
kctx->reg_dump[1+event->reg_offset]);
event->reg_offset += 2;
}
return 0;
}
static void *debug_job_fault_next(struct seq_file *m, void *v, loff_t *pos)
{
struct kbase_device *kbdev = m->private;
struct base_job_fault_event *event = (struct base_job_fault_event *)v;
dev_info(kbdev->dev, "debug job fault seq next:%d, %d",
event->reg_offset, (int)*pos);
return event;
}
static void *debug_job_fault_start(struct seq_file *m, loff_t *pos)
{
struct kbase_device *kbdev = m->private;
struct base_job_fault_event *event;
dev_info(kbdev->dev, "fault job seq start:%d", (int)*pos);
/* The condition is trick here. It needs make sure the
* fault hasn't happened and the dumping hasn't been started,
* or the dumping has finished
*/
if (*pos == 0) {
event = kmalloc(sizeof(*event), GFP_KERNEL);
if (!event)
return NULL;
event->reg_offset = 0;
if (kbase_job_fault_event_wait(kbdev, event)) {
kfree(event);
return NULL;
}
/* The cache flush workaround is called in bottom half of
* job done but we delayed it. Now we should clean cache
* earlier. Then the GPU memory dump should be correct.
*/
if (event->katom->need_cache_flush_cores_retained) {
kbase_gpu_cacheclean(kbdev, event->katom);
event->katom->need_cache_flush_cores_retained = 0;
}
} else
return NULL;
return event;
}
static void debug_job_fault_stop(struct seq_file *m, void *v)
{
struct kbase_device *kbdev = m->private;
/* here we wake up the kbase_jd_done_worker after stop, it needs
* get the memory dump before the register dump in debug daemon,
* otherwise, the memory dump may be incorrect.
*/
if (v != NULL) {
kfree(v);
dev_info(kbdev->dev, "debug job fault seq stop stage 1");
} else {
unsigned long flags;
spin_lock_irqsave(&kbdev->job_fault_event_lock, flags);
if (!list_empty(&kbdev->job_fault_event_list)) {
kbase_job_fault_event_dequeue(kbdev,
&kbdev->job_fault_event_list);
wake_up(&kbdev->job_fault_resume_wq);
}
spin_unlock_irqrestore(&kbdev->job_fault_event_lock, flags);
dev_info(kbdev->dev, "debug job fault seq stop stage 2");
}
}
static const struct seq_operations ops = {
.start = debug_job_fault_start,
.next = debug_job_fault_next,
.stop = debug_job_fault_stop,
.show = debug_job_fault_show,
};
static int debug_job_fault_open(struct inode *in, struct file *file)
{
struct kbase_device *kbdev = in->i_private;
seq_open(file, &ops);
((struct seq_file *)file->private_data)->private = kbdev;
dev_info(kbdev->dev, "debug job fault seq open");
kbdev->job_fault_debug = true;
return 0;
}
static int debug_job_fault_release(struct inode *in, struct file *file)
{
struct kbase_device *kbdev = in->i_private;
seq_release(in, file);
kbdev->job_fault_debug = false;
/* Clean the unprocessed job fault. After that, all the suspended
* contexts could be rescheduled.
*/
kbase_job_fault_event_cleanup(kbdev);
dev_info(kbdev->dev, "debug job fault seq close");
return 0;
}
static const struct file_operations kbasep_debug_job_fault_fops = {
.open = debug_job_fault_open,
.read = seq_read,
.llseek = seq_lseek,
.release = debug_job_fault_release,
};
/*
* Initialize debugfs entry for job fault dump
*/
void kbase_debug_job_fault_debugfs_init(struct kbase_device *kbdev)
{
debugfs_create_file("job_fault", S_IRUGO,
kbdev->mali_debugfs_directory, kbdev,
&kbasep_debug_job_fault_fops);
}
int kbase_debug_job_fault_dev_init(struct kbase_device *kbdev)
{
INIT_LIST_HEAD(&kbdev->job_fault_event_list);
init_waitqueue_head(&(kbdev->job_fault_wq));
init_waitqueue_head(&(kbdev->job_fault_resume_wq));
spin_lock_init(&kbdev->job_fault_event_lock);
kbdev->job_fault_resume_workq = alloc_workqueue(
"kbase_job_fault_resume_work_queue", WQ_MEM_RECLAIM, 1);
if (!kbdev->job_fault_resume_workq)
return -ENOMEM;
kbdev->job_fault_debug = false;
return 0;
}
/*
* Release the relevant resource per device
*/
void kbase_debug_job_fault_dev_term(struct kbase_device *kbdev)
{
destroy_workqueue(kbdev->job_fault_resume_workq);
}
/*
* Initialize the relevant data structure per context
*/
void kbase_debug_job_fault_context_init(struct kbase_context *kctx)
{
/* We need allocate double size register range
* Because this memory will keep the register address and value
*/
kctx->reg_dump = vmalloc(0x4000 * 2);
if (kctx->reg_dump == NULL)
return;
if (kbase_debug_job_fault_reg_snapshot_init(kctx, 0x4000) == false) {
vfree(kctx->reg_dump);
kctx->reg_dump = NULL;
}
INIT_LIST_HEAD(&kctx->job_fault_resume_event_list);
atomic_set(&kctx->job_fault_count, 0);
}
/*
* release the relevant resource per context
*/
void kbase_debug_job_fault_context_term(struct kbase_context *kctx)
{
vfree(kctx->reg_dump);
}
#else /* CONFIG_DEBUG_FS */
int kbase_debug_job_fault_dev_init(struct kbase_device *kbdev)
{
kbdev->job_fault_debug = false;
return 0;
}
void kbase_debug_job_fault_dev_term(struct kbase_device *kbdev)
{
}
#endif /* CONFIG_DEBUG_FS */

View File

@ -0,0 +1,96 @@
/*
*
* (C) COPYRIGHT 2012-2016 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
* Foundation, and any use by you of this program is subject to the terms
* of such GNU licence.
*
* A copy of the licence is included with the program, and can also be obtained
* from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
* Boston, MA 02110-1301, USA.
*
*/
#ifndef _KBASE_DEBUG_JOB_FAULT_H
#define _KBASE_DEBUG_JOB_FAULT_H
#include <linux/debugfs.h>
#include <linux/seq_file.h>
#define REGISTER_DUMP_TERMINATION_FLAG 0xFFFFFFFF
/**
* kbase_debug_job_fault_dev_init - Create the fault event wait queue
* per device and initialize the required lists.
* @kbdev: Device pointer
*
* Return: Zero on success or a negative error code.
*/
int kbase_debug_job_fault_dev_init(struct kbase_device *kbdev);
/**
* kbase_debug_job_fault_debugfs_init - Initialize job fault debug sysfs
* @kbdev: Device pointer
*/
void kbase_debug_job_fault_debugfs_init(struct kbase_device *kbdev);
/**
* kbase_debug_job_fault_dev_term - Clean up resources created in
* kbase_debug_job_fault_dev_init.
* @kbdev: Device pointer
*/
void kbase_debug_job_fault_dev_term(struct kbase_device *kbdev);
/**
* kbase_debug_job_fault_context_init - Initialize the relevant
* data structure per context
* @kctx: KBase context pointer
*/
void kbase_debug_job_fault_context_init(struct kbase_context *kctx);
/**
* kbase_debug_job_fault_context_term - Release the relevant
* resource per context
* @kctx: KBase context pointer
*/
void kbase_debug_job_fault_context_term(struct kbase_context *kctx);
/**
* kbase_debug_job_fault_process - Process the failed job.
* It will send a event and wake up the job fault waiting queue
* Then create a work queue to wait for job dump finish
* This function should be called in the interrupt handler and before
* jd_done that make sure the jd_done_worker will be delayed until the
* job dump finish
* @katom: The failed atom pointer
* @completion_code: the job status
* @return true if dump is going on
*/
bool kbase_debug_job_fault_process(struct kbase_jd_atom *katom,
u32 completion_code);
/**
* kbase_debug_job_fault_reg_snapshot_init - Set the interested registers
* address during the job fault process, the relevant registers will
* be saved when a job fault happen
* @kctx: KBase context pointer
* @reg_range: Maximum register address space
* @return true if initializing successfully
*/
bool kbase_debug_job_fault_reg_snapshot_init(struct kbase_context *kctx,
int reg_range);
/**
* kbase_job_fault_get_reg_snapshot - Read the interested registers for
* failed job dump
* @kctx: KBase context pointer
* @return true if getting registers successfully
*/
bool kbase_job_fault_get_reg_snapshot(struct kbase_context *kctx);
#endif /*_KBASE_DEBUG_JOB_FAULT_H*/

View File

@ -0,0 +1,279 @@
/*
*
* (C) COPYRIGHT 2013-2016 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
* Foundation, and any use by you of this program is subject to the terms
* of such GNU licence.
*
* A copy of the licence is included with the program, and can also be obtained
* from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
* Boston, MA 02110-1301, USA.
*
*/
/*
* Debugfs interface to dump the memory visible to the GPU
*/
#include "mali_kbase_debug_mem_view.h"
#include "mali_kbase.h"
#include <linux/list.h>
#include <linux/file.h>
#ifdef CONFIG_DEBUG_FS
struct debug_mem_mapping {
struct list_head node;
struct kbase_mem_phy_alloc *alloc;
unsigned long flags;
u64 start_pfn;
size_t nr_pages;
};
struct debug_mem_data {
struct list_head mapping_list;
struct kbase_context *kctx;
};
struct debug_mem_seq_off {
struct list_head *lh;
size_t offset;
};
static void *debug_mem_start(struct seq_file *m, loff_t *_pos)
{
struct debug_mem_data *mem_data = m->private;
struct debug_mem_seq_off *data;
struct debug_mem_mapping *map;
loff_t pos = *_pos;
list_for_each_entry(map, &mem_data->mapping_list, node) {
if (pos >= map->nr_pages) {
pos -= map->nr_pages;
} else {
data = kmalloc(sizeof(*data), GFP_KERNEL);
if (!data)
return NULL;
data->lh = &map->node;
data->offset = pos;
return data;
}
}
/* Beyond the end */
return NULL;
}
static void debug_mem_stop(struct seq_file *m, void *v)
{
kfree(v);
}
static void *debug_mem_next(struct seq_file *m, void *v, loff_t *pos)
{
struct debug_mem_data *mem_data = m->private;
struct debug_mem_seq_off *data = v;
struct debug_mem_mapping *map;
map = list_entry(data->lh, struct debug_mem_mapping, node);
if (data->offset < map->nr_pages - 1) {
data->offset++;
++*pos;
return data;
}
if (list_is_last(data->lh, &mem_data->mapping_list))
return NULL;
data->lh = data->lh->next;
data->offset = 0;
++*pos;
return data;
}
static int debug_mem_show(struct seq_file *m, void *v)
{
struct debug_mem_data *mem_data = m->private;
struct debug_mem_seq_off *data = v;
struct debug_mem_mapping *map;
int i, j;
struct page *page;
uint32_t *mapping;
pgprot_t prot = PAGE_KERNEL;
map = list_entry(data->lh, struct debug_mem_mapping, node);
kbase_gpu_vm_lock(mem_data->kctx);
if (data->offset >= map->alloc->nents) {
seq_printf(m, "%016llx: Unbacked page\n\n", (map->start_pfn +
data->offset) << PAGE_SHIFT);
goto out;
}
if (!(map->flags & KBASE_REG_CPU_CACHED))
prot = pgprot_writecombine(prot);
page = pfn_to_page(PFN_DOWN(map->alloc->pages[data->offset]));
mapping = vmap(&page, 1, VM_MAP, prot);
if (!mapping)
goto out;
for (i = 0; i < PAGE_SIZE; i += 4*sizeof(*mapping)) {
seq_printf(m, "%016llx:", i + ((map->start_pfn +
data->offset) << PAGE_SHIFT));
for (j = 0; j < 4*sizeof(*mapping); j += sizeof(*mapping))
seq_printf(m, " %08x", mapping[(i+j)/sizeof(*mapping)]);
seq_putc(m, '\n');
}
vunmap(mapping);
seq_putc(m, '\n');
out:
kbase_gpu_vm_unlock(mem_data->kctx);
return 0;
}
static const struct seq_operations ops = {
.start = debug_mem_start,
.next = debug_mem_next,
.stop = debug_mem_stop,
.show = debug_mem_show,
};
static int debug_mem_open(struct inode *i, struct file *file)
{
struct file *kctx_file = i->i_private;
struct kbase_context *kctx = kctx_file->private_data;
struct rb_node *p;
struct debug_mem_data *mem_data;
int ret;
ret = seq_open(file, &ops);
if (ret)
return ret;
mem_data = kmalloc(sizeof(*mem_data), GFP_KERNEL);
if (!mem_data) {
ret = -ENOMEM;
goto out;
}
mem_data->kctx = kctx;
INIT_LIST_HEAD(&mem_data->mapping_list);
get_file(kctx_file);
kbase_gpu_vm_lock(kctx);
for (p = rb_first(&kctx->reg_rbtree); p; p = rb_next(p)) {
struct kbase_va_region *reg;
struct debug_mem_mapping *mapping;
reg = rb_entry(p, struct kbase_va_region, rblink);
if (reg->gpu_alloc == NULL)
/* Empty region - ignore */
continue;
mapping = kmalloc(sizeof(*mapping), GFP_KERNEL);
if (!mapping) {
ret = -ENOMEM;
kbase_gpu_vm_unlock(kctx);
goto out;
}
mapping->alloc = kbase_mem_phy_alloc_get(reg->gpu_alloc);
mapping->start_pfn = reg->start_pfn;
mapping->nr_pages = reg->nr_pages;
mapping->flags = reg->flags;
list_add_tail(&mapping->node, &mem_data->mapping_list);
}
kbase_gpu_vm_unlock(kctx);
((struct seq_file *)file->private_data)->private = mem_data;
return 0;
out:
if (mem_data) {
while (!list_empty(&mem_data->mapping_list)) {
struct debug_mem_mapping *mapping;
mapping = list_first_entry(&mem_data->mapping_list,
struct debug_mem_mapping, node);
kbase_mem_phy_alloc_put(mapping->alloc);
list_del(&mapping->node);
kfree(mapping);
}
fput(kctx_file);
kfree(mem_data);
}
seq_release(i, file);
return ret;
}
static int debug_mem_release(struct inode *inode, struct file *file)
{
struct file *kctx_file = inode->i_private;
struct seq_file *sfile = file->private_data;
struct debug_mem_data *mem_data = sfile->private;
struct debug_mem_mapping *mapping;
seq_release(inode, file);
while (!list_empty(&mem_data->mapping_list)) {
mapping = list_first_entry(&mem_data->mapping_list,
struct debug_mem_mapping, node);
kbase_mem_phy_alloc_put(mapping->alloc);
list_del(&mapping->node);
kfree(mapping);
}
kfree(mem_data);
fput(kctx_file);
return 0;
}
static const struct file_operations kbase_debug_mem_view_fops = {
.open = debug_mem_open,
.release = debug_mem_release,
.read = seq_read,
.llseek = seq_lseek
};
/**
* kbase_debug_mem_view_init - Initialise the mem_view sysfs file
* @kctx_file: The /dev/mali0 file instance for the context
*
* This function creates a "mem_view" file which can be used to get a view of
* the context's memory as the GPU sees it (i.e. using the GPU's page tables).
*
* The file is cleaned up by a call to debugfs_remove_recursive() deleting the
* parent directory.
*/
void kbase_debug_mem_view_init(struct file *kctx_file)
{
struct kbase_context *kctx = kctx_file->private_data;
debugfs_create_file("mem_view", S_IRUGO, kctx->kctx_dentry, kctx_file,
&kbase_debug_mem_view_fops);
}
#endif

View File

@ -0,0 +1,25 @@
/*
*
* (C) COPYRIGHT 2013-2014 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
* Foundation, and any use by you of this program is subject to the terms
* of such GNU licence.
*
* A copy of the licence is included with the program, and can also be obtained
* from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
* Boston, MA 02110-1301, USA.
*
*/
#ifndef _KBASE_DEBUG_MEM_VIEW_H
#define _KBASE_DEBUG_MEM_VIEW_H
#include <mali_kbase.h>
void kbase_debug_mem_view_init(struct file *kctx_file);
#endif

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,697 @@
/*
*
* (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
* Foundation, and any use by you of this program is subject to the terms
* of such GNU licence.
*
* A copy of the licence is included with the program, and can also be obtained
* from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
* Boston, MA 02110-1301, USA.
*
*/
/*
* Base kernel device APIs
*/
#include <linux/debugfs.h>
#include <linux/dma-mapping.h>
#include <linux/seq_file.h>
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/of_platform.h>
#include <mali_kbase.h>
#include <mali_kbase_defs.h>
#include <mali_kbase_hwaccess_instr.h>
#include <mali_kbase_hw.h>
#include <mali_kbase_config_defaults.h>
#include <mali_kbase_profiling_gator_api.h>
/* NOTE: Magic - 0x45435254 (TRCE in ASCII).
* Supports tracing feature provided in the base module.
* Please keep it in sync with the value of base module.
*/
#define TRACE_BUFFER_HEADER_SPECIAL 0x45435254
#if KBASE_TRACE_ENABLE
static const char *kbasep_trace_code_string[] = {
/* IMPORTANT: USE OF SPECIAL #INCLUDE OF NON-STANDARD HEADER FILE
* THIS MUST BE USED AT THE START OF THE ARRAY */
#define KBASE_TRACE_CODE_MAKE_CODE(X) # X
#include "mali_kbase_trace_defs.h"
#undef KBASE_TRACE_CODE_MAKE_CODE
};
#endif
#define DEBUG_MESSAGE_SIZE 256
static int kbasep_trace_init(struct kbase_device *kbdev);
static void kbasep_trace_term(struct kbase_device *kbdev);
static void kbasep_trace_hook_wrapper(void *param);
struct kbase_device *kbase_device_alloc(void)
{
return kzalloc(sizeof(struct kbase_device), GFP_KERNEL);
}
static int kbase_device_as_init(struct kbase_device *kbdev, int i)
{
const char format[] = "mali_mmu%d";
char name[sizeof(format)];
const char poke_format[] = "mali_mmu%d_poker";
char poke_name[sizeof(poke_format)];
if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8316))
snprintf(poke_name, sizeof(poke_name), poke_format, i);
snprintf(name, sizeof(name), format, i);
kbdev->as[i].number = i;
kbdev->as[i].fault_addr = 0ULL;
kbdev->as[i].pf_wq = alloc_workqueue(name, 0, 1);
if (!kbdev->as[i].pf_wq)
return -EINVAL;
INIT_WORK(&kbdev->as[i].work_pagefault, page_fault_worker);
INIT_WORK(&kbdev->as[i].work_busfault, bus_fault_worker);
if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8316)) {
struct hrtimer *poke_timer = &kbdev->as[i].poke_timer;
struct work_struct *poke_work = &kbdev->as[i].poke_work;
kbdev->as[i].poke_wq = alloc_workqueue(poke_name, 0, 1);
if (!kbdev->as[i].poke_wq) {
destroy_workqueue(kbdev->as[i].pf_wq);
return -EINVAL;
}
KBASE_DEBUG_ASSERT(!object_is_on_stack(poke_work));
INIT_WORK(poke_work, kbasep_as_do_poke);
hrtimer_init(poke_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
poke_timer->function = kbasep_as_poke_timer_callback;
kbdev->as[i].poke_refcount = 0;
kbdev->as[i].poke_state = 0u;
}
return 0;
}
static void kbase_device_as_term(struct kbase_device *kbdev, int i)
{
destroy_workqueue(kbdev->as[i].pf_wq);
if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8316))
destroy_workqueue(kbdev->as[i].poke_wq);
}
static int kbase_device_all_as_init(struct kbase_device *kbdev)
{
int i, err;
for (i = 0; i < kbdev->nr_hw_address_spaces; i++) {
err = kbase_device_as_init(kbdev, i);
if (err)
goto free_workqs;
}
return 0;
free_workqs:
for (; i > 0; i--)
kbase_device_as_term(kbdev, i);
return err;
}
static void kbase_device_all_as_term(struct kbase_device *kbdev)
{
int i;
for (i = 0; i < kbdev->nr_hw_address_spaces; i++)
kbase_device_as_term(kbdev, i);
}
int kbase_device_init(struct kbase_device * const kbdev)
{
int i, err;
#ifdef CONFIG_ARM64
struct device_node *np = NULL;
#endif /* CONFIG_ARM64 */
spin_lock_init(&kbdev->mmu_mask_change);
mutex_init(&kbdev->mmu_hw_mutex);
#ifdef CONFIG_ARM64
kbdev->cci_snoop_enabled = false;
np = kbdev->dev->of_node;
if (np != NULL) {
if (of_property_read_u32(np, "snoop_enable_smc",
&kbdev->snoop_enable_smc))
kbdev->snoop_enable_smc = 0;
if (of_property_read_u32(np, "snoop_disable_smc",
&kbdev->snoop_disable_smc))
kbdev->snoop_disable_smc = 0;
/* Either both or none of the calls should be provided. */
if (!((kbdev->snoop_disable_smc == 0
&& kbdev->snoop_enable_smc == 0)
|| (kbdev->snoop_disable_smc != 0
&& kbdev->snoop_enable_smc != 0))) {
WARN_ON(1);
err = -EINVAL;
goto fail;
}
}
#endif /* CONFIG_ARM64 */
/* Get the list of workarounds for issues on the current HW
* (identified by the GPU_ID register)
*/
err = kbase_hw_set_issues_mask(kbdev);
if (err)
goto fail;
/* Set the list of features available on the current HW
* (identified by the GPU_ID register)
*/
kbase_hw_set_features_mask(kbdev);
kbase_gpuprops_set_features(kbdev);
/* On Linux 4.0+, dma coherency is determined from device tree */
#if defined(CONFIG_ARM64) && LINUX_VERSION_CODE < KERNEL_VERSION(4, 0, 0)
set_dma_ops(kbdev->dev, &noncoherent_swiotlb_dma_ops);
#endif
/* Workaround a pre-3.13 Linux issue, where dma_mask is NULL when our
* device structure was created by device-tree
*/
if (!kbdev->dev->dma_mask)
kbdev->dev->dma_mask = &kbdev->dev->coherent_dma_mask;
err = dma_set_mask(kbdev->dev,
DMA_BIT_MASK(kbdev->gpu_props.mmu.pa_bits));
if (err)
goto dma_set_mask_failed;
err = dma_set_coherent_mask(kbdev->dev,
DMA_BIT_MASK(kbdev->gpu_props.mmu.pa_bits));
if (err)
goto dma_set_mask_failed;
kbdev->nr_hw_address_spaces = kbdev->gpu_props.num_address_spaces;
err = kbase_device_all_as_init(kbdev);
if (err)
goto as_init_failed;
spin_lock_init(&kbdev->hwcnt.lock);
err = kbasep_trace_init(kbdev);
if (err)
goto term_as;
mutex_init(&kbdev->cacheclean_lock);
#ifdef CONFIG_MALI_TRACE_TIMELINE
for (i = 0; i < BASE_JM_MAX_NR_SLOTS; ++i)
kbdev->timeline.slot_atoms_submitted[i] = 0;
for (i = 0; i <= KBASEP_TIMELINE_PM_EVENT_LAST; ++i)
atomic_set(&kbdev->timeline.pm_event_uid[i], 0);
#endif /* CONFIG_MALI_TRACE_TIMELINE */
/* fbdump profiling controls set to 0 - fbdump not enabled until changed by gator */
for (i = 0; i < FBDUMP_CONTROL_MAX; i++)
kbdev->kbase_profiling_controls[i] = 0;
kbase_debug_assert_register_hook(&kbasep_trace_hook_wrapper, kbdev);
atomic_set(&kbdev->ctx_num, 0);
err = kbase_instr_backend_init(kbdev);
if (err)
goto term_trace;
kbdev->pm.dvfs_period = DEFAULT_PM_DVFS_PERIOD;
kbdev->reset_timeout_ms = DEFAULT_RESET_TIMEOUT_MS;
#ifdef CONFIG_MALI_GPU_MMU_AARCH64
kbdev->mmu_mode = kbase_mmu_mode_get_aarch64();
#else
kbdev->mmu_mode = kbase_mmu_mode_get_lpae();
#endif /* CONFIG_MALI_GPU_MMU_AARCH64 */
#ifdef CONFIG_MALI_DEBUG
init_waitqueue_head(&kbdev->driver_inactive_wait);
#endif /* CONFIG_MALI_DEBUG */
return 0;
term_trace:
kbasep_trace_term(kbdev);
term_as:
kbase_device_all_as_term(kbdev);
as_init_failed:
dma_set_mask_failed:
fail:
return err;
}
void kbase_device_term(struct kbase_device *kbdev)
{
KBASE_DEBUG_ASSERT(kbdev);
#if KBASE_TRACE_ENABLE
kbase_debug_assert_register_hook(NULL, NULL);
#endif
kbase_instr_backend_term(kbdev);
kbasep_trace_term(kbdev);
kbase_device_all_as_term(kbdev);
}
void kbase_device_free(struct kbase_device *kbdev)
{
kfree(kbdev);
}
int kbase_device_trace_buffer_install(
struct kbase_context *kctx, u32 *tb, size_t size)
{
unsigned long flags;
KBASE_DEBUG_ASSERT(kctx);
KBASE_DEBUG_ASSERT(tb);
/* Interface uses 16-bit value to track last accessed entry. Each entry
* is composed of two 32-bit words.
* This limits the size that can be handled without an overflow. */
if (0xFFFF * (2 * sizeof(u32)) < size)
return -EINVAL;
/* set up the header */
/* magic number in the first 4 bytes */
tb[0] = TRACE_BUFFER_HEADER_SPECIAL;
/* Store (write offset = 0, wrap counter = 0, transaction active = no)
* write offset 0 means never written.
* Offsets 1 to (wrap_offset - 1) used to store values when trace started
*/
tb[1] = 0;
/* install trace buffer */
spin_lock_irqsave(&kctx->jctx.tb_lock, flags);
kctx->jctx.tb_wrap_offset = size / 8;
kctx->jctx.tb = tb;
spin_unlock_irqrestore(&kctx->jctx.tb_lock, flags);
return 0;
}
void kbase_device_trace_buffer_uninstall(struct kbase_context *kctx)
{
unsigned long flags;
KBASE_DEBUG_ASSERT(kctx);
spin_lock_irqsave(&kctx->jctx.tb_lock, flags);
kctx->jctx.tb = NULL;
kctx->jctx.tb_wrap_offset = 0;
spin_unlock_irqrestore(&kctx->jctx.tb_lock, flags);
}
void kbase_device_trace_register_access(struct kbase_context *kctx, enum kbase_reg_access_type type, u16 reg_offset, u32 reg_value)
{
unsigned long flags;
spin_lock_irqsave(&kctx->jctx.tb_lock, flags);
if (kctx->jctx.tb) {
u16 wrap_count;
u16 write_offset;
u32 *tb = kctx->jctx.tb;
u32 header_word;
header_word = tb[1];
KBASE_DEBUG_ASSERT(0 == (header_word & 0x1));
wrap_count = (header_word >> 1) & 0x7FFF;
write_offset = (header_word >> 16) & 0xFFFF;
/* mark as transaction in progress */
tb[1] |= 0x1;
mb();
/* calculate new offset */
write_offset++;
if (write_offset == kctx->jctx.tb_wrap_offset) {
/* wrap */
write_offset = 1;
wrap_count++;
wrap_count &= 0x7FFF; /* 15bit wrap counter */
}
/* store the trace entry at the selected offset */
tb[write_offset * 2 + 0] = (reg_offset & ~0x3) | ((type == REG_WRITE) ? 0x1 : 0x0);
tb[write_offset * 2 + 1] = reg_value;
mb();
/* new header word */
header_word = (write_offset << 16) | (wrap_count << 1) | 0x0; /* transaction complete */
tb[1] = header_word;
}
spin_unlock_irqrestore(&kctx->jctx.tb_lock, flags);
}
/*
* Device trace functions
*/
#if KBASE_TRACE_ENABLE
static int kbasep_trace_init(struct kbase_device *kbdev)
{
struct kbase_trace *rbuf;
rbuf = kmalloc_array(KBASE_TRACE_SIZE, sizeof(*rbuf), GFP_KERNEL);
if (!rbuf)
return -EINVAL;
kbdev->trace_rbuf = rbuf;
spin_lock_init(&kbdev->trace_lock);
return 0;
}
static void kbasep_trace_term(struct kbase_device *kbdev)
{
kfree(kbdev->trace_rbuf);
}
static void kbasep_trace_format_msg(struct kbase_trace *trace_msg, char *buffer, int len)
{
s32 written = 0;
/* Initial part of message */
written += MAX(snprintf(buffer + written, MAX(len - written, 0), "%d.%.6d,%d,%d,%s,%p,", (int)trace_msg->timestamp.tv_sec, (int)(trace_msg->timestamp.tv_nsec / 1000), trace_msg->thread_id, trace_msg->cpu, kbasep_trace_code_string[trace_msg->code], trace_msg->ctx), 0);
if (trace_msg->katom)
written += MAX(snprintf(buffer + written, MAX(len - written, 0), "atom %d (ud: 0x%llx 0x%llx)", trace_msg->atom_number, trace_msg->atom_udata[0], trace_msg->atom_udata[1]), 0);
written += MAX(snprintf(buffer + written, MAX(len - written, 0), ",%.8llx,", trace_msg->gpu_addr), 0);
/* NOTE: Could add function callbacks to handle different message types */
/* Jobslot present */
if (trace_msg->flags & KBASE_TRACE_FLAG_JOBSLOT)
written += MAX(snprintf(buffer + written, MAX(len - written, 0), "%d", trace_msg->jobslot), 0);
written += MAX(snprintf(buffer + written, MAX(len - written, 0), ","), 0);
/* Refcount present */
if (trace_msg->flags & KBASE_TRACE_FLAG_REFCOUNT)
written += MAX(snprintf(buffer + written, MAX(len - written, 0), "%d", trace_msg->refcount), 0);
written += MAX(snprintf(buffer + written, MAX(len - written, 0), ","), 0);
/* Rest of message */
written += MAX(snprintf(buffer + written, MAX(len - written, 0), "0x%.8lx", trace_msg->info_val), 0);
}
static void kbasep_trace_dump_msg(struct kbase_device *kbdev, struct kbase_trace *trace_msg)
{
char buffer[DEBUG_MESSAGE_SIZE];
kbasep_trace_format_msg(trace_msg, buffer, DEBUG_MESSAGE_SIZE);
dev_dbg(kbdev->dev, "%s", buffer);
}
void kbasep_trace_add(struct kbase_device *kbdev, enum kbase_trace_code code, void *ctx, struct kbase_jd_atom *katom, u64 gpu_addr, u8 flags, int refcount, int jobslot, unsigned long info_val)
{
unsigned long irqflags;
struct kbase_trace *trace_msg;
spin_lock_irqsave(&kbdev->trace_lock, irqflags);
trace_msg = &kbdev->trace_rbuf[kbdev->trace_next_in];
/* Fill the message */
trace_msg->thread_id = task_pid_nr(current);
trace_msg->cpu = task_cpu(current);
getnstimeofday(&trace_msg->timestamp);
trace_msg->code = code;
trace_msg->ctx = ctx;
if (NULL == katom) {
trace_msg->katom = false;
} else {
trace_msg->katom = true;
trace_msg->atom_number = kbase_jd_atom_id(katom->kctx, katom);
trace_msg->atom_udata[0] = katom->udata.blob[0];
trace_msg->atom_udata[1] = katom->udata.blob[1];
}
trace_msg->gpu_addr = gpu_addr;
trace_msg->jobslot = jobslot;
trace_msg->refcount = MIN((unsigned int)refcount, 0xFF);
trace_msg->info_val = info_val;
trace_msg->flags = flags;
/* Update the ringbuffer indices */
kbdev->trace_next_in = (kbdev->trace_next_in + 1) & KBASE_TRACE_MASK;
if (kbdev->trace_next_in == kbdev->trace_first_out)
kbdev->trace_first_out = (kbdev->trace_first_out + 1) & KBASE_TRACE_MASK;
/* Done */
spin_unlock_irqrestore(&kbdev->trace_lock, irqflags);
}
void kbasep_trace_clear(struct kbase_device *kbdev)
{
unsigned long flags;
spin_lock_irqsave(&kbdev->trace_lock, flags);
kbdev->trace_first_out = kbdev->trace_next_in;
spin_unlock_irqrestore(&kbdev->trace_lock, flags);
}
void kbasep_trace_dump(struct kbase_device *kbdev)
{
unsigned long flags;
u32 start;
u32 end;
dev_dbg(kbdev->dev, "Dumping trace:\nsecs,nthread,cpu,code,ctx,katom,gpu_addr,jobslot,refcount,info_val");
spin_lock_irqsave(&kbdev->trace_lock, flags);
start = kbdev->trace_first_out;
end = kbdev->trace_next_in;
while (start != end) {
struct kbase_trace *trace_msg = &kbdev->trace_rbuf[start];
kbasep_trace_dump_msg(kbdev, trace_msg);
start = (start + 1) & KBASE_TRACE_MASK;
}
dev_dbg(kbdev->dev, "TRACE_END");
spin_unlock_irqrestore(&kbdev->trace_lock, flags);
KBASE_TRACE_CLEAR(kbdev);
}
static void kbasep_trace_hook_wrapper(void *param)
{
struct kbase_device *kbdev = (struct kbase_device *)param;
kbasep_trace_dump(kbdev);
}
#ifdef CONFIG_DEBUG_FS
struct trace_seq_state {
struct kbase_trace trace_buf[KBASE_TRACE_SIZE];
u32 start;
u32 end;
};
static void *kbasep_trace_seq_start(struct seq_file *s, loff_t *pos)
{
struct trace_seq_state *state = s->private;
int i;
if (*pos > KBASE_TRACE_SIZE)
return NULL;
i = state->start + *pos;
if ((state->end >= state->start && i >= state->end) ||
i >= state->end + KBASE_TRACE_SIZE)
return NULL;
i &= KBASE_TRACE_MASK;
return &state->trace_buf[i];
}
static void kbasep_trace_seq_stop(struct seq_file *s, void *data)
{
}
static void *kbasep_trace_seq_next(struct seq_file *s, void *data, loff_t *pos)
{
struct trace_seq_state *state = s->private;
int i;
(*pos)++;
i = (state->start + *pos) & KBASE_TRACE_MASK;
if (i == state->end)
return NULL;
return &state->trace_buf[i];
}
static int kbasep_trace_seq_show(struct seq_file *s, void *data)
{
struct kbase_trace *trace_msg = data;
char buffer[DEBUG_MESSAGE_SIZE];
kbasep_trace_format_msg(trace_msg, buffer, DEBUG_MESSAGE_SIZE);
seq_printf(s, "%s\n", buffer);
return 0;
}
static const struct seq_operations kbasep_trace_seq_ops = {
.start = kbasep_trace_seq_start,
.next = kbasep_trace_seq_next,
.stop = kbasep_trace_seq_stop,
.show = kbasep_trace_seq_show,
};
static int kbasep_trace_debugfs_open(struct inode *inode, struct file *file)
{
struct kbase_device *kbdev = inode->i_private;
unsigned long flags;
struct trace_seq_state *state;
state = __seq_open_private(file, &kbasep_trace_seq_ops, sizeof(*state));
if (!state)
return -ENOMEM;
spin_lock_irqsave(&kbdev->trace_lock, flags);
state->start = kbdev->trace_first_out;
state->end = kbdev->trace_next_in;
memcpy(state->trace_buf, kbdev->trace_rbuf, sizeof(state->trace_buf));
spin_unlock_irqrestore(&kbdev->trace_lock, flags);
return 0;
}
static const struct file_operations kbasep_trace_debugfs_fops = {
.open = kbasep_trace_debugfs_open,
.read = seq_read,
.llseek = seq_lseek,
.release = seq_release_private,
};
void kbasep_trace_debugfs_init(struct kbase_device *kbdev)
{
debugfs_create_file("mali_trace", S_IRUGO,
kbdev->mali_debugfs_directory, kbdev,
&kbasep_trace_debugfs_fops);
}
#else
void kbasep_trace_debugfs_init(struct kbase_device *kbdev)
{
}
#endif /* CONFIG_DEBUG_FS */
#else /* KBASE_TRACE_ENABLE */
static int kbasep_trace_init(struct kbase_device *kbdev)
{
CSTD_UNUSED(kbdev);
return 0;
}
static void kbasep_trace_term(struct kbase_device *kbdev)
{
CSTD_UNUSED(kbdev);
}
static void kbasep_trace_hook_wrapper(void *param)
{
CSTD_UNUSED(param);
}
void kbasep_trace_dump(struct kbase_device *kbdev)
{
CSTD_UNUSED(kbdev);
}
#endif /* KBASE_TRACE_ENABLE */
void kbase_set_profiling_control(struct kbase_device *kbdev, u32 control, u32 value)
{
switch (control) {
case FBDUMP_CONTROL_ENABLE:
/* fall through */
case FBDUMP_CONTROL_RATE:
/* fall through */
case SW_COUNTER_ENABLE:
/* fall through */
case FBDUMP_CONTROL_RESIZE_FACTOR:
kbdev->kbase_profiling_controls[control] = value;
break;
default:
dev_err(kbdev->dev, "Profiling control %d not found\n", control);
break;
}
}
u32 kbase_get_profiling_control(struct kbase_device *kbdev, u32 control)
{
u32 ret_value = 0;
switch (control) {
case FBDUMP_CONTROL_ENABLE:
/* fall through */
case FBDUMP_CONTROL_RATE:
/* fall through */
case SW_COUNTER_ENABLE:
/* fall through */
case FBDUMP_CONTROL_RESIZE_FACTOR:
ret_value = kbdev->kbase_profiling_controls[control];
break;
default:
dev_err(kbdev->dev, "Profiling control %d not found\n", control);
break;
}
return ret_value;
}
/*
* Called by gator to control the production of
* profiling information at runtime
* */
void _mali_profiling_control(u32 action, u32 value)
{
struct kbase_device *kbdev = NULL;
/* find the first i.e. call with -1 */
kbdev = kbase_find_device(-1);
if (NULL != kbdev)
kbase_set_profiling_control(kbdev, action, value);
}
KBASE_EXPORT_SYMBOL(_mali_profiling_control);

View File

@ -0,0 +1,76 @@
/*
*
* (C) COPYRIGHT 2014 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
* Foundation, and any use by you of this program is subject to the terms
* of such GNU licence.
*
* A copy of the licence is included with the program, and can also be obtained
* from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
* Boston, MA 02110-1301, USA.
*
*/
/*
* Base kernel disjoint events helper functions
*/
#include <mali_kbase.h>
void kbase_disjoint_init(struct kbase_device *kbdev)
{
KBASE_DEBUG_ASSERT(kbdev != NULL);
atomic_set(&kbdev->disjoint_event.count, 0);
atomic_set(&kbdev->disjoint_event.state, 0);
}
/* increment the disjoint event count */
void kbase_disjoint_event(struct kbase_device *kbdev)
{
KBASE_DEBUG_ASSERT(kbdev != NULL);
atomic_inc(&kbdev->disjoint_event.count);
}
/* increment the state and the event counter */
void kbase_disjoint_state_up(struct kbase_device *kbdev)
{
KBASE_DEBUG_ASSERT(kbdev != NULL);
atomic_inc(&kbdev->disjoint_event.state);
kbase_disjoint_event(kbdev);
}
/* decrement the state */
void kbase_disjoint_state_down(struct kbase_device *kbdev)
{
KBASE_DEBUG_ASSERT(kbdev != NULL);
KBASE_DEBUG_ASSERT(atomic_read(&kbdev->disjoint_event.state) > 0);
kbase_disjoint_event(kbdev);
atomic_dec(&kbdev->disjoint_event.state);
}
/* increments the count only if the state is > 0 */
void kbase_disjoint_event_potential(struct kbase_device *kbdev)
{
KBASE_DEBUG_ASSERT(kbdev != NULL);
if (atomic_read(&kbdev->disjoint_event.state))
kbase_disjoint_event(kbdev);
}
u32 kbase_disjoint_event_get(struct kbase_device *kbdev)
{
KBASE_DEBUG_ASSERT(kbdev != NULL);
return atomic_read(&kbdev->disjoint_event.count);
}
KBASE_EXPORT_TEST_API(kbase_disjoint_event_get);

View File

@ -0,0 +1,606 @@
/*
*
* (C) COPYRIGHT 2011-2016 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
* Foundation, and any use by you of this program is subject to the terms
* of such GNU licence.
*
* A copy of the licence is included with the program, and can also be obtained
* from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
* Boston, MA 02110-1301, USA.
*
*/
/* Include mali_kbase_dma_fence.h before checking for CONFIG_MALI_DMA_FENCE as
* it will be set there.
*/
#include "mali_kbase_dma_fence.h"
#include <linux/atomic.h>
#include <linux/fence.h>
#include <linux/list.h>
#include <linux/lockdep.h>
#include <linux/mutex.h>
#include <linux/reservation.h>
#include <linux/slab.h>
#include <linux/spinlock.h>
#include <linux/workqueue.h>
#include <linux/ww_mutex.h>
#include <mali_kbase.h>
/* Spin lock protecting all Mali fences as fence->lock. */
static DEFINE_SPINLOCK(kbase_dma_fence_lock);
static void
kbase_dma_fence_work(struct work_struct *pwork);
static void
kbase_dma_fence_waiters_add(struct kbase_jd_atom *katom)
{
struct kbase_context *kctx = katom->kctx;
list_add_tail(&katom->queue, &kctx->dma_fence.waiting_resource);
}
void
kbase_dma_fence_waiters_remove(struct kbase_jd_atom *katom)
{
list_del(&katom->queue);
}
static const char *
kbase_dma_fence_get_driver_name(struct fence *fence)
{
return kbase_drv_name;
}
static const char *
kbase_dma_fence_get_timeline_name(struct fence *fence)
{
return kbase_timeline_name;
}
static bool
kbase_dma_fence_enable_signaling(struct fence *fence)
{
/* If in the future we need to add code here remember to
* to get a reference to the fence and release it when signaling
* as stated in fence.h
*/
return true;
}
static void
kbase_dma_fence_fence_value_str(struct fence *fence, char *str, int size)
{
snprintf(str, size, "%u", fence->seqno);
}
static const struct fence_ops kbase_dma_fence_ops = {
.get_driver_name = kbase_dma_fence_get_driver_name,
.get_timeline_name = kbase_dma_fence_get_timeline_name,
.enable_signaling = kbase_dma_fence_enable_signaling,
/* Use the default wait */
.wait = fence_default_wait,
.fence_value_str = kbase_dma_fence_fence_value_str,
};
static struct fence *
kbase_dma_fence_new(unsigned int context, unsigned int seqno)
{
struct fence *fence;
fence = kzalloc(sizeof(*fence), GFP_KERNEL);
if (!fence)
return NULL;
fence_init(fence,
&kbase_dma_fence_ops,
&kbase_dma_fence_lock,
context,
seqno);
return fence;
}
static int
kbase_dma_fence_lock_reservations(struct kbase_dma_fence_resv_info *info,
struct ww_acquire_ctx *ctx)
{
struct reservation_object *content_res = NULL;
unsigned int content_res_idx = 0;
unsigned int r;
int err = 0;
ww_acquire_init(ctx, &reservation_ww_class);
retry:
for (r = 0; r < info->dma_fence_resv_count; r++) {
if (info->resv_objs[r] == content_res) {
content_res = NULL;
continue;
}
err = ww_mutex_lock(&info->resv_objs[r]->lock, ctx);
if (err)
goto error;
}
ww_acquire_done(ctx);
return err;
error:
content_res_idx = r;
/* Unlock the locked one ones */
while (r--)
ww_mutex_unlock(&info->resv_objs[r]->lock);
if (content_res)
ww_mutex_unlock(&content_res->lock);
/* If we deadlock try with lock_slow and retry */
if (err == -EDEADLK) {
content_res = info->resv_objs[content_res_idx];
ww_mutex_lock_slow(&content_res->lock, ctx);
goto retry;
}
/* If we are here the function failed */
ww_acquire_fini(ctx);
return err;
}
static void
kbase_dma_fence_unlock_reservations(struct kbase_dma_fence_resv_info *info,
struct ww_acquire_ctx *ctx)
{
unsigned int r;
for (r = 0; r < info->dma_fence_resv_count; r++)
ww_mutex_unlock(&info->resv_objs[r]->lock);
ww_acquire_fini(ctx);
}
/**
* kbase_dma_fence_queue_work() - Queue work to handle @katom
* @katom: Pointer to atom for which to queue work
*
* Queue kbase_dma_fence_work() for @katom to clean up the fence callbacks and
* submit the atom.
*/
static void
kbase_dma_fence_queue_work(struct kbase_jd_atom *katom)
{
struct kbase_context *kctx = katom->kctx;
bool ret;
INIT_WORK(&katom->work, kbase_dma_fence_work);
ret = queue_work(kctx->dma_fence.wq, &katom->work);
/* Warn if work was already queued, that should not happen. */
WARN_ON(!ret);
}
/**
* kbase_dma_fence_free_callbacks - Free dma-fence callbacks on a katom
* @katom: Pointer to katom
* @queue_worker: Boolean indicating if fence worker is to be queued when
* dep_count reaches 0.
*
* This function will free all fence callbacks on the katom's list of
* callbacks. Callbacks that have not yet been called, because their fence
* hasn't yet signaled, will first be removed from the fence.
*
* Locking: katom->dma_fence.callbacks list assumes jctx.lock is held.
*/
static void
kbase_dma_fence_free_callbacks(struct kbase_jd_atom *katom, bool queue_worker)
{
struct kbase_dma_fence_cb *cb, *tmp;
lockdep_assert_held(&katom->kctx->jctx.lock);
/* Clean up and free callbacks. */
list_for_each_entry_safe(cb, tmp, &katom->dma_fence.callbacks, node) {
bool ret;
/* Cancel callbacks that hasn't been called yet. */
ret = fence_remove_callback(cb->fence, &cb->fence_cb);
if (ret) {
int ret;
/* Fence had not signaled, clean up after
* canceling.
*/
ret = atomic_dec_return(&katom->dma_fence.dep_count);
if (unlikely(queue_worker && ret == 0)) {
/*
* dep_count went to zero and queue_worker is
* true. Queue the worker to handle the
* completion of the katom.
*/
kbase_dma_fence_queue_work(katom);
}
}
/*
* Release the reference taken in
* kbase_dma_fence_add_callback().
*/
fence_put(cb->fence);
list_del(&cb->node);
kfree(cb);
}
}
/**
* kbase_dma_fence_cancel_atom() - Cancels waiting on an atom
* @katom: Katom to cancel
*
* Locking: katom->dma_fence.callbacks list assumes jctx.lock is held.
*/
static void
kbase_dma_fence_cancel_atom(struct kbase_jd_atom *katom)
{
lockdep_assert_held(&katom->kctx->jctx.lock);
/* Cancel callbacks and clean up. */
kbase_dma_fence_free_callbacks(katom, false);
KBASE_DEBUG_ASSERT(atomic_read(&katom->dma_fence.dep_count) == 0);
/* Mark the atom as handled in case all fences signaled just before
* canceling the callbacks and the worker was queued.
*/
atomic_set(&katom->dma_fence.dep_count, -1);
/* Prevent job_done_nolock from being called twice on an atom when
* there is a race between job completion and cancellation.
*/
if (katom->status == KBASE_JD_ATOM_STATE_QUEUED) {
/* Wait was cancelled - zap the atom */
katom->event_code = BASE_JD_EVENT_JOB_CANCELLED;
if (jd_done_nolock(katom, NULL))
kbase_js_sched_all(katom->kctx->kbdev);
}
}
/**
* kbase_dma_fence_work() - Worker thread called when a fence is signaled
* @pwork: work_struct containing a pointer to a katom
*
* This function will clean and mark all dependencies as satisfied
*/
static void
kbase_dma_fence_work(struct work_struct *pwork)
{
struct kbase_jd_atom *katom;
struct kbase_jd_context *ctx;
katom = container_of(pwork, struct kbase_jd_atom, work);
ctx = &katom->kctx->jctx;
mutex_lock(&ctx->lock);
if (atomic_read(&katom->dma_fence.dep_count) != 0)
goto out;
atomic_set(&katom->dma_fence.dep_count, -1);
/* Remove atom from list of dma-fence waiting atoms. */
kbase_dma_fence_waiters_remove(katom);
/* Cleanup callbacks. */
kbase_dma_fence_free_callbacks(katom, false);
/*
* Queue atom on GPU, unless it has already completed due to a failing
* dependency. Run jd_done_nolock() on the katom if it is completed.
*/
if (unlikely(katom->status == KBASE_JD_ATOM_STATE_COMPLETED))
jd_done_nolock(katom, NULL);
else
kbase_jd_dep_clear_locked(katom);
out:
mutex_unlock(&ctx->lock);
}
/**
* kbase_dma_fence_add_callback() - Add callback on @fence to block @katom
* @katom: Pointer to katom that will be blocked by @fence
* @fence: Pointer to fence on which to set up the callback
* @callback: Pointer to function to be called when fence is signaled
*
* Caller needs to hold a reference to @fence when calling this function, and
* the caller is responsible for releasing that reference. An additional
* reference to @fence will be taken when the callback was successfully set up
* and @fence needs to be kept valid until the callback has been called and
* cleanup have been done.
*
* Return: 0 on success: fence was either already signalled, or callback was
* set up. Negative error code is returned on error.
*/
static int
kbase_dma_fence_add_callback(struct kbase_jd_atom *katom,
struct fence *fence,
fence_func_t callback)
{
int err = 0;
struct kbase_dma_fence_cb *kbase_fence_cb;
kbase_fence_cb = kmalloc(sizeof(*kbase_fence_cb), GFP_KERNEL);
if (!kbase_fence_cb)
return -ENOMEM;
kbase_fence_cb->fence = fence;
kbase_fence_cb->katom = katom;
INIT_LIST_HEAD(&kbase_fence_cb->node);
err = fence_add_callback(fence, &kbase_fence_cb->fence_cb, callback);
if (err == -ENOENT) {
/* Fence signaled, clear the error and return */
err = 0;
kbase_fence_cb->fence = NULL;
kfree(kbase_fence_cb);
} else if (err) {
kfree(kbase_fence_cb);
} else {
/*
* Get reference to fence that will be kept until callback gets
* cleaned up in kbase_dma_fence_free_callbacks().
*/
fence_get(fence);
atomic_inc(&katom->dma_fence.dep_count);
/* Add callback to katom's list of callbacks */
list_add(&kbase_fence_cb->node, &katom->dma_fence.callbacks);
}
return err;
}
static void
kbase_dma_fence_cb(struct fence *fence, struct fence_cb *cb)
{
struct kbase_dma_fence_cb *kcb = container_of(cb,
struct kbase_dma_fence_cb,
fence_cb);
struct kbase_jd_atom *katom = kcb->katom;
/* If the atom is zapped dep_count will be forced to a negative number
* preventing this callback from ever scheduling work. Which in turn
* would reschedule the atom.
*/
if (atomic_dec_and_test(&katom->dma_fence.dep_count))
kbase_dma_fence_queue_work(katom);
}
static int
kbase_dma_fence_add_reservation_callback(struct kbase_jd_atom *katom,
struct reservation_object *resv,
bool exclusive)
{
struct fence *excl_fence = NULL;
struct fence **shared_fences = NULL;
unsigned int shared_count = 0;
int err, i;
err = reservation_object_get_fences_rcu(resv,
&excl_fence,
&shared_count,
&shared_fences);
if (err)
return err;
if (excl_fence) {
err = kbase_dma_fence_add_callback(katom,
excl_fence,
kbase_dma_fence_cb);
/* Release our reference, taken by reservation_object_get_fences_rcu(),
* to the fence. We have set up our callback (if that was possible),
* and it's the fence's owner is responsible for singling the fence
* before allowing it to disappear.
*/
fence_put(excl_fence);
if (err)
goto out;
}
if (exclusive) {
for (i = 0; i < shared_count; i++) {
err = kbase_dma_fence_add_callback(katom,
shared_fences[i],
kbase_dma_fence_cb);
if (err)
goto out;
}
}
/* Release all our references to the shared fences, taken by
* reservation_object_get_fences_rcu(). We have set up our callback (if
* that was possible), and it's the fence's owner is responsible for
* signaling the fence before allowing it to disappear.
*/
out:
for (i = 0; i < shared_count; i++)
fence_put(shared_fences[i]);
kfree(shared_fences);
if (err) {
/*
* On error, cancel and clean up all callbacks that was set up
* before the error.
*/
kbase_dma_fence_free_callbacks(katom, false);
}
return err;
}
void kbase_dma_fence_add_reservation(struct reservation_object *resv,
struct kbase_dma_fence_resv_info *info,
bool exclusive)
{
unsigned int i;
for (i = 0; i < info->dma_fence_resv_count; i++) {
/* Duplicate resource, ignore */
if (info->resv_objs[i] == resv)
return;
}
info->resv_objs[info->dma_fence_resv_count] = resv;
if (exclusive)
set_bit(info->dma_fence_resv_count,
info->dma_fence_excl_bitmap);
(info->dma_fence_resv_count)++;
}
int kbase_dma_fence_wait(struct kbase_jd_atom *katom,
struct kbase_dma_fence_resv_info *info)
{
int err, i;
struct fence *fence;
struct ww_acquire_ctx ww_ctx;
lockdep_assert_held(&katom->kctx->jctx.lock);
fence = kbase_dma_fence_new(katom->dma_fence.context,
atomic_inc_return(&katom->dma_fence.seqno));
if (!fence) {
err = -ENOMEM;
dev_err(katom->kctx->kbdev->dev,
"Error %d creating fence.\n", err);
return err;
}
katom->dma_fence.fence = fence;
atomic_set(&katom->dma_fence.dep_count, 1);
err = kbase_dma_fence_lock_reservations(info, &ww_ctx);
if (err) {
dev_err(katom->kctx->kbdev->dev,
"Error %d locking reservations.\n", err);
atomic_set(&katom->dma_fence.dep_count, -1);
fence_put(fence);
return err;
}
for (i = 0; i < info->dma_fence_resv_count; i++) {
struct reservation_object *obj = info->resv_objs[i];
if (!test_bit(i, info->dma_fence_excl_bitmap)) {
err = reservation_object_reserve_shared(obj);
if (err) {
dev_err(katom->kctx->kbdev->dev,
"Error %d reserving space for shared fence.\n", err);
goto end;
}
err = kbase_dma_fence_add_reservation_callback(katom, obj, false);
if (err) {
dev_err(katom->kctx->kbdev->dev,
"Error %d adding reservation to callback.\n", err);
goto end;
}
reservation_object_add_shared_fence(obj, katom->dma_fence.fence);
} else {
err = kbase_dma_fence_add_reservation_callback(katom, obj, true);
if (err) {
dev_err(katom->kctx->kbdev->dev,
"Error %d adding reservation to callback.\n", err);
goto end;
}
reservation_object_add_excl_fence(obj, katom->dma_fence.fence);
}
}
end:
kbase_dma_fence_unlock_reservations(info, &ww_ctx);
if (likely(!err)) {
/* Test if the callbacks are already triggered */
if (atomic_dec_and_test(&katom->dma_fence.dep_count)) {
atomic_set(&katom->dma_fence.dep_count, -1);
kbase_dma_fence_free_callbacks(katom, false);
} else {
/* Add katom to the list of dma-buf fence waiting atoms
* only if it is still waiting.
*/
kbase_dma_fence_waiters_add(katom);
}
} else {
/* There was an error, cancel callbacks, set dep_count to -1 to
* indicate that the atom has been handled (the caller will
* kill it for us), signal the fence, free callbacks and the
* fence.
*/
kbase_dma_fence_free_callbacks(katom, false);
atomic_set(&katom->dma_fence.dep_count, -1);
kbase_dma_fence_signal(katom);
}
return err;
}
void kbase_dma_fence_cancel_all_atoms(struct kbase_context *kctx)
{
struct list_head *list = &kctx->dma_fence.waiting_resource;
while (!list_empty(list)) {
struct kbase_jd_atom *katom;
katom = list_first_entry(list, struct kbase_jd_atom, queue);
kbase_dma_fence_waiters_remove(katom);
kbase_dma_fence_cancel_atom(katom);
}
}
void kbase_dma_fence_cancel_callbacks(struct kbase_jd_atom *katom)
{
/* Cancel callbacks and clean up. */
kbase_dma_fence_free_callbacks(katom, true);
}
void kbase_dma_fence_signal(struct kbase_jd_atom *katom)
{
if (!katom->dma_fence.fence)
return;
KBASE_DEBUG_ASSERT(atomic_read(&katom->dma_fence.dep_count) == -1);
/* Signal the atom's fence. */
fence_signal(katom->dma_fence.fence);
fence_put(katom->dma_fence.fence);
katom->dma_fence.fence = NULL;
kbase_dma_fence_free_callbacks(katom, false);
}
void kbase_dma_fence_term(struct kbase_context *kctx)
{
destroy_workqueue(kctx->dma_fence.wq);
kctx->dma_fence.wq = NULL;
}
int kbase_dma_fence_init(struct kbase_context *kctx)
{
INIT_LIST_HEAD(&kctx->dma_fence.waiting_resource);
kctx->dma_fence.wq = alloc_workqueue("mali-fence-%d",
WQ_UNBOUND, 1, kctx->pid);
if (!kctx->dma_fence.wq)
return -ENOMEM;
return 0;
}

View File

@ -0,0 +1,150 @@
/*
*
* (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
* Foundation, and any use by you of this program is subject to the terms
* of such GNU licence.
*
* A copy of the licence is included with the program, and can also be obtained
* from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
* Boston, MA 02110-1301, USA.
*
*/
#ifndef _KBASE_DMA_FENCE_H_
#define _KBASE_DMA_FENCE_H_
#ifdef CONFIG_MALI_DMA_FENCE
#include <linux/fence.h>
#include <linux/list.h>
#include <linux/reservation.h>
/* Forward declaration from mali_kbase_defs.h */
struct kbase_jd_atom;
struct kbase_context;
/**
* struct kbase_dma_fence_cb - Mali dma-fence callback data struct
* @fence_cb: Callback function
* @katom: Pointer to katom that is waiting on this callback
* @fence: Pointer to the fence object on which this callback is waiting
* @node: List head for linking this callback to the katom
*/
struct kbase_dma_fence_cb {
struct fence_cb fence_cb;
struct kbase_jd_atom *katom;
struct fence *fence;
struct list_head node;
};
/**
* struct kbase_dma_fence_resv_info - Structure with list of reservation objects
* @resv_objs: Array of reservation objects to attach the
* new fence to.
* @dma_fence_resv_count: Number of reservation objects in the array.
* @dma_fence_excl_bitmap: Specifies which resv_obj are exclusive.
*
* This is used by some functions to pass around a collection of data about
* reservation objects.
*/
struct kbase_dma_fence_resv_info {
struct reservation_object **resv_objs;
unsigned int dma_fence_resv_count;
unsigned long *dma_fence_excl_bitmap;
};
/**
* kbase_dma_fence_add_reservation() - Adds a resv to the array of resv_objs
* @resv: Reservation object to add to the array.
* @info: Pointer to struct with current reservation info
* @exclusive: Boolean indicating if exclusive access is needed
*
* The function adds a new reservation_object to an existing array of
* reservation_objects. At the same time keeps track of which objects require
* exclusive access in dma_fence_excl_bitmap.
*/
void kbase_dma_fence_add_reservation(struct reservation_object *resv,
struct kbase_dma_fence_resv_info *info,
bool exclusive);
/**
* kbase_dma_fence_wait() - Creates a new fence and attaches it to the resv_objs
* @katom: Katom with the external dependency.
* @info: Pointer to struct with current reservation info
*
* Return: An error code or 0 if succeeds
*/
int kbase_dma_fence_wait(struct kbase_jd_atom *katom,
struct kbase_dma_fence_resv_info *info);
/**
* kbase_dma_fence_cancel_ctx() - Cancel all dma-fences blocked atoms on kctx
* @kctx: Pointer to kbase context
*
* This function will cancel and clean up all katoms on @kctx that is waiting
* on dma-buf fences.
*
* Locking: jctx.lock needs to be held when calling this function.
*/
void kbase_dma_fence_cancel_all_atoms(struct kbase_context *kctx);
/**
* kbase_dma_fence_cancel_callbacks() - Cancel only callbacks on katom
* @katom: Pointer to katom whose callbacks are to be canceled
*
* This function cancels all dma-buf fence callbacks on @katom, but does not
* cancel the katom itself.
*
* The caller is responsible for ensuring that jd_done_nolock is called on
* @katom.
*
* Locking: jctx.lock must be held when calling this function.
*/
void kbase_dma_fence_cancel_callbacks(struct kbase_jd_atom *katom);
/**
* kbase_dma_fence_signal() - Signal katom's fence and clean up after wait
* @katom: Pointer to katom to signal and clean up
*
* This function will signal the @katom's fence, if it has one, and clean up
* the callback data from the katom's wait on earlier fences.
*
* Locking: jctx.lock must be held while calling this function.
*/
void kbase_dma_fence_signal(struct kbase_jd_atom *katom);
/**
* kbase_dma_fence_term() - Terminate Mali dma-fence context
* @kctx: kbase context to terminate
*/
void kbase_dma_fence_term(struct kbase_context *kctx);
/**
* kbase_dma_fence_init() - Initialize Mali dma-fence context
* @kctx: kbase context to initialize
*/
int kbase_dma_fence_init(struct kbase_context *kctx);
/**
* kbase_dma_fence_waiters_remove()- Remove katom from dma-fence wait list
* @katom: Pointer to katom to remove from list
*/
void kbase_dma_fence_waiters_remove(struct kbase_jd_atom *katom);
#else /* CONFIG_MALI_DMA_FENCE */
/* Dummy functions for when dma-buf fence isn't enabled. */
static inline int kbase_dma_fence_init(struct kbase_context *kctx)
{
return 0;
}
static inline void kbase_dma_fence_term(struct kbase_context *kctx) {}
#endif /* CONFIG_MALI_DMA_FENCE */
#endif

View File

@ -0,0 +1,259 @@
/*
*
* (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
* Foundation, and any use by you of this program is subject to the terms
* of such GNU licence.
*
* A copy of the licence is included with the program, and can also be obtained
* from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
* Boston, MA 02110-1301, USA.
*
*/
#include <mali_kbase.h>
#include <mali_kbase_debug.h>
#include <mali_kbase_tlstream.h>
static struct base_jd_udata kbase_event_process(struct kbase_context *kctx, struct kbase_jd_atom *katom)
{
struct base_jd_udata data;
lockdep_assert_held(&kctx->jctx.lock);
KBASE_DEBUG_ASSERT(kctx != NULL);
KBASE_DEBUG_ASSERT(katom != NULL);
KBASE_DEBUG_ASSERT(katom->status == KBASE_JD_ATOM_STATE_COMPLETED);
data = katom->udata;
KBASE_TIMELINE_ATOMS_IN_FLIGHT(kctx, atomic_sub_return(1, &kctx->timeline.jd_atoms_in_flight));
kbase_tlstream_tl_nret_atom_ctx(katom, kctx);
kbase_tlstream_tl_del_atom(katom);
katom->status = KBASE_JD_ATOM_STATE_UNUSED;
wake_up(&katom->completed);
return data;
}
int kbase_event_pending(struct kbase_context *ctx)
{
KBASE_DEBUG_ASSERT(ctx);
return (atomic_read(&ctx->event_count) != 0) ||
(atomic_read(&ctx->event_closed) != 0);
}
KBASE_EXPORT_TEST_API(kbase_event_pending);
int kbase_event_dequeue(struct kbase_context *ctx, struct base_jd_event_v2 *uevent)
{
struct kbase_jd_atom *atom;
KBASE_DEBUG_ASSERT(ctx);
mutex_lock(&ctx->event_mutex);
if (list_empty(&ctx->event_list)) {
if (!atomic_read(&ctx->event_closed)) {
mutex_unlock(&ctx->event_mutex);
return -1;
}
/* generate the BASE_JD_EVENT_DRV_TERMINATED message on the fly */
mutex_unlock(&ctx->event_mutex);
uevent->event_code = BASE_JD_EVENT_DRV_TERMINATED;
memset(&uevent->udata, 0, sizeof(uevent->udata));
dev_dbg(ctx->kbdev->dev,
"event system closed, returning BASE_JD_EVENT_DRV_TERMINATED(0x%X)\n",
BASE_JD_EVENT_DRV_TERMINATED);
return 0;
}
/* normal event processing */
atomic_dec(&ctx->event_count);
atom = list_entry(ctx->event_list.next, struct kbase_jd_atom, dep_item[0]);
list_del(ctx->event_list.next);
mutex_unlock(&ctx->event_mutex);
dev_dbg(ctx->kbdev->dev, "event dequeuing %p\n", (void *)atom);
uevent->event_code = atom->event_code;
uevent->atom_number = (atom - ctx->jctx.atoms);
if (atom->core_req & BASE_JD_REQ_EXTERNAL_RESOURCES)
kbase_jd_free_external_resources(atom);
mutex_lock(&ctx->jctx.lock);
uevent->udata = kbase_event_process(ctx, atom);
mutex_unlock(&ctx->jctx.lock);
return 0;
}
KBASE_EXPORT_TEST_API(kbase_event_dequeue);
/**
* kbase_event_process_noreport_worker - Worker for processing atoms that do not
* return an event but do have external
* resources
* @data: Work structure
*/
static void kbase_event_process_noreport_worker(struct work_struct *data)
{
struct kbase_jd_atom *katom = container_of(data, struct kbase_jd_atom,
work);
struct kbase_context *kctx = katom->kctx;
if (katom->core_req & BASE_JD_REQ_EXTERNAL_RESOURCES)
kbase_jd_free_external_resources(katom);
mutex_lock(&kctx->jctx.lock);
kbase_event_process(kctx, katom);
mutex_unlock(&kctx->jctx.lock);
}
/**
* kbase_event_process_noreport - Process atoms that do not return an event
* @kctx: Context pointer
* @katom: Atom to be processed
*
* Atoms that do not have external resources will be processed immediately.
* Atoms that do have external resources will be processed on a workqueue, in
* order to avoid locking issues.
*/
static void kbase_event_process_noreport(struct kbase_context *kctx,
struct kbase_jd_atom *katom)
{
if (katom->core_req & BASE_JD_REQ_EXTERNAL_RESOURCES) {
INIT_WORK(&katom->work, kbase_event_process_noreport_worker);
queue_work(kctx->event_workq, &katom->work);
} else {
kbase_event_process(kctx, katom);
}
}
/**
* kbase_event_coalesce - Move pending events to the main event list
* @kctx: Context pointer
*
* kctx->event_list and kctx->event_coalesce_count must be protected
* by a lock unless this is the last thread using them
* (and we're about to terminate the lock).
*
* Return: The number of pending events moved to the main event list
*/
static int kbase_event_coalesce(struct kbase_context *kctx)
{
const int event_count = kctx->event_coalesce_count;
/* Join the list of pending events onto the tail of the main list
and reset it */
list_splice_tail_init(&kctx->event_coalesce_list, &kctx->event_list);
kctx->event_coalesce_count = 0;
/* Return the number of events moved */
return event_count;
}
void kbase_event_post(struct kbase_context *ctx, struct kbase_jd_atom *atom)
{
if (atom->core_req & BASE_JD_REQ_EVENT_ONLY_ON_FAILURE) {
if (atom->event_code == BASE_JD_EVENT_DONE) {
/* Don't report the event */
kbase_event_process_noreport(ctx, atom);
return;
}
}
if (atom->core_req & BASEP_JD_REQ_EVENT_NEVER) {
/* Don't report the event */
kbase_event_process_noreport(ctx, atom);
return;
}
kbase_tlstream_tl_attrib_atom_state(atom, TL_ATOM_STATE_POSTED);
if (atom->core_req & BASE_JD_REQ_EVENT_COALESCE) {
/* Don't report the event until other event(s) have completed */
mutex_lock(&ctx->event_mutex);
list_add_tail(&atom->dep_item[0], &ctx->event_coalesce_list);
++ctx->event_coalesce_count;
mutex_unlock(&ctx->event_mutex);
} else {
/* Report the event and any pending events now */
int event_count = 1;
mutex_lock(&ctx->event_mutex);
event_count += kbase_event_coalesce(ctx);
list_add_tail(&atom->dep_item[0], &ctx->event_list);
atomic_add(event_count, &ctx->event_count);
mutex_unlock(&ctx->event_mutex);
kbase_event_wakeup(ctx);
}
}
KBASE_EXPORT_TEST_API(kbase_event_post);
void kbase_event_close(struct kbase_context *kctx)
{
mutex_lock(&kctx->event_mutex);
atomic_set(&kctx->event_closed, true);
mutex_unlock(&kctx->event_mutex);
kbase_event_wakeup(kctx);
}
int kbase_event_init(struct kbase_context *kctx)
{
KBASE_DEBUG_ASSERT(kctx);
INIT_LIST_HEAD(&kctx->event_list);
INIT_LIST_HEAD(&kctx->event_coalesce_list);
mutex_init(&kctx->event_mutex);
atomic_set(&kctx->event_count, 0);
kctx->event_coalesce_count = 0;
atomic_set(&kctx->event_closed, false);
kctx->event_workq = alloc_workqueue("kbase_event", WQ_MEM_RECLAIM, 1);
if (NULL == kctx->event_workq)
return -EINVAL;
return 0;
}
KBASE_EXPORT_TEST_API(kbase_event_init);
void kbase_event_cleanup(struct kbase_context *kctx)
{
int event_count;
KBASE_DEBUG_ASSERT(kctx);
KBASE_DEBUG_ASSERT(kctx->event_workq);
flush_workqueue(kctx->event_workq);
destroy_workqueue(kctx->event_workq);
/* We use kbase_event_dequeue to remove the remaining events as that
* deals with all the cleanup needed for the atoms.
*
* Note: use of kctx->event_list without a lock is safe because this must be the last
* thread using it (because we're about to terminate the lock)
*/
event_count = kbase_event_coalesce(kctx);
atomic_add(event_count, &kctx->event_count);
while (!list_empty(&kctx->event_list)) {
struct base_jd_event_v2 event;
kbase_event_dequeue(kctx, &event);
}
}
KBASE_EXPORT_TEST_API(kbase_event_cleanup);

View File

@ -0,0 +1,45 @@
/*
*
* (C) COPYRIGHT 2011-2015 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
* Foundation, and any use by you of this program is subject to the terms
* of such GNU licence.
*
* A copy of the licence is included with the program, and can also be obtained
* from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
* Boston, MA 02110-1301, USA.
*
*/
/* NB taken from gator */
/*
* List of possible actions to be controlled by DS-5 Streamline.
* The following numbers are used by gator to control the frame buffer dumping
* and s/w counter reporting. We cannot use the enums in mali_uk_types.h because
* they are unknown inside gator.
*/
#ifndef _KBASE_GATOR_H_
#define _KBASE_GATOR_H_
#ifdef CONFIG_MALI_GATOR_SUPPORT
#define GATOR_MAKE_EVENT(type, number) (((type) << 24) | ((number) << 16))
#define GATOR_JOB_SLOT_START 1
#define GATOR_JOB_SLOT_STOP 2
#define GATOR_JOB_SLOT_SOFT_STOPPED 3
void kbase_trace_mali_job_slots_event(u32 event, const struct kbase_context *kctx, u8 atom_id);
void kbase_trace_mali_pm_status(u32 event, u64 value);
void kbase_trace_mali_pm_power_off(u32 event, u64 value);
void kbase_trace_mali_pm_power_on(u32 event, u64 value);
void kbase_trace_mali_page_fault_insert_pages(int event, u32 value);
void kbase_trace_mali_mmu_as_in_use(int event);
void kbase_trace_mali_mmu_as_released(int event);
void kbase_trace_mali_total_alloc_pages_change(long long int event);
#endif /* CONFIG_MALI_GATOR_SUPPORT */
#endif /* _KBASE_GATOR_H_ */

View File

@ -0,0 +1,330 @@
/*
*
* (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
* Foundation, and any use by you of this program is subject to the terms
* of such GNU licence.
*
* A copy of the licence is included with the program, and can also be obtained
* from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
* Boston, MA 02110-1301, USA.
*
*/
#include "mali_kbase.h"
#include "mali_kbase_hw.h"
#include "mali_kbase_mem_linux.h"
#include "mali_kbase_gator_api.h"
#include "mali_kbase_gator_hwcnt_names.h"
#define MALI_MAX_CORES_PER_GROUP 4
#define MALI_MAX_NUM_BLOCKS_PER_GROUP 8
#define MALI_COUNTERS_PER_BLOCK 64
#define MALI_BYTES_PER_COUNTER 4
struct kbase_gator_hwcnt_handles {
struct kbase_device *kbdev;
struct kbase_vinstr_client *vinstr_cli;
void *vinstr_buffer;
struct work_struct dump_work;
int dump_complete;
spinlock_t dump_lock;
};
static void dump_worker(struct work_struct *work);
const char * const *kbase_gator_hwcnt_init_names(uint32_t *total_counters)
{
const char * const *hardware_counters;
struct kbase_device *kbdev;
uint32_t product_id;
uint32_t count;
if (!total_counters)
return NULL;
/* Get the first device - it doesn't matter in this case */
kbdev = kbase_find_device(-1);
if (!kbdev)
return NULL;
product_id = kbdev->gpu_props.props.core_props.product_id;
if (GPU_ID_IS_NEW_FORMAT(product_id)) {
switch (GPU_ID2_MODEL_MATCH_VALUE(product_id)) {
case GPU_ID2_PRODUCT_TMIX:
hardware_counters = hardware_counters_mali_tMIx;
count = ARRAY_SIZE(hardware_counters_mali_tMIx);
break;
case GPU_ID2_PRODUCT_THEX:
hardware_counters = hardware_counters_mali_tHEx;
count = ARRAY_SIZE(hardware_counters_mali_tHEx);
break;
default:
hardware_counters = NULL;
count = 0;
dev_err(kbdev->dev, "Unrecognized product ID: %u\n",
product_id);
break;
}
} else {
switch (product_id) {
/* If we are using a Mali-T60x device */
case GPU_ID_PI_T60X:
hardware_counters = hardware_counters_mali_t60x;
count = ARRAY_SIZE(hardware_counters_mali_t60x);
break;
/* If we are using a Mali-T62x device */
case GPU_ID_PI_T62X:
hardware_counters = hardware_counters_mali_t62x;
count = ARRAY_SIZE(hardware_counters_mali_t62x);
break;
/* If we are using a Mali-T72x device */
case GPU_ID_PI_T72X:
hardware_counters = hardware_counters_mali_t72x;
count = ARRAY_SIZE(hardware_counters_mali_t72x);
break;
/* If we are using a Mali-T76x device */
case GPU_ID_PI_T76X:
hardware_counters = hardware_counters_mali_t76x;
count = ARRAY_SIZE(hardware_counters_mali_t76x);
break;
/* If we are using a Mali-T82x device */
case GPU_ID_PI_T82X:
hardware_counters = hardware_counters_mali_t82x;
count = ARRAY_SIZE(hardware_counters_mali_t82x);
break;
/* If we are using a Mali-T83x device */
case GPU_ID_PI_T83X:
hardware_counters = hardware_counters_mali_t83x;
count = ARRAY_SIZE(hardware_counters_mali_t83x);
break;
/* If we are using a Mali-T86x device */
case GPU_ID_PI_T86X:
hardware_counters = hardware_counters_mali_t86x;
count = ARRAY_SIZE(hardware_counters_mali_t86x);
break;
/* If we are using a Mali-T88x device */
case GPU_ID_PI_TFRX:
hardware_counters = hardware_counters_mali_t88x;
count = ARRAY_SIZE(hardware_counters_mali_t88x);
break;
default:
hardware_counters = NULL;
count = 0;
dev_err(kbdev->dev, "Unrecognized product ID: %u\n",
product_id);
break;
}
}
/* Release the kbdev reference. */
kbase_release_device(kbdev);
*total_counters = count;
/* If we return a string array take a reference on the module (or fail). */
if (hardware_counters && !try_module_get(THIS_MODULE))
return NULL;
return hardware_counters;
}
KBASE_EXPORT_SYMBOL(kbase_gator_hwcnt_init_names);
void kbase_gator_hwcnt_term_names(void)
{
/* Release the module reference. */
module_put(THIS_MODULE);
}
KBASE_EXPORT_SYMBOL(kbase_gator_hwcnt_term_names);
struct kbase_gator_hwcnt_handles *kbase_gator_hwcnt_init(struct kbase_gator_hwcnt_info *in_out_info)
{
struct kbase_gator_hwcnt_handles *hand;
struct kbase_uk_hwcnt_reader_setup setup;
uint32_t dump_size = 0, i = 0;
if (!in_out_info)
return NULL;
hand = kzalloc(sizeof(*hand), GFP_KERNEL);
if (!hand)
return NULL;
INIT_WORK(&hand->dump_work, dump_worker);
spin_lock_init(&hand->dump_lock);
/* Get the first device */
hand->kbdev = kbase_find_device(-1);
if (!hand->kbdev)
goto free_hand;
dump_size = kbase_vinstr_dump_size(hand->kbdev);
hand->vinstr_buffer = kzalloc(dump_size, GFP_KERNEL);
if (!hand->vinstr_buffer)
goto release_device;
in_out_info->kernel_dump_buffer = hand->vinstr_buffer;
in_out_info->nr_cores = hand->kbdev->gpu_props.num_cores;
in_out_info->nr_core_groups = hand->kbdev->gpu_props.num_core_groups;
in_out_info->gpu_id = hand->kbdev->gpu_props.props.core_props.product_id;
/* If we are using a v4 device (Mali-T6xx or Mali-T72x) */
if (kbase_hw_has_feature(hand->kbdev, BASE_HW_FEATURE_V4)) {
uint32_t cg, j;
uint64_t core_mask;
/* There are 8 hardware counters blocks per core group */
in_out_info->hwc_layout = kmalloc(sizeof(enum hwc_type) *
MALI_MAX_NUM_BLOCKS_PER_GROUP *
in_out_info->nr_core_groups, GFP_KERNEL);
if (!in_out_info->hwc_layout)
goto free_vinstr_buffer;
dump_size = in_out_info->nr_core_groups *
MALI_MAX_NUM_BLOCKS_PER_GROUP *
MALI_COUNTERS_PER_BLOCK *
MALI_BYTES_PER_COUNTER;
for (cg = 0; cg < in_out_info->nr_core_groups; cg++) {
core_mask = hand->kbdev->gpu_props.props.coherency_info.group[cg].core_mask;
for (j = 0; j < MALI_MAX_CORES_PER_GROUP; j++) {
if (core_mask & (1u << j))
in_out_info->hwc_layout[i++] = SHADER_BLOCK;
else
in_out_info->hwc_layout[i++] = RESERVED_BLOCK;
}
in_out_info->hwc_layout[i++] = TILER_BLOCK;
in_out_info->hwc_layout[i++] = MMU_L2_BLOCK;
in_out_info->hwc_layout[i++] = RESERVED_BLOCK;
if (0 == cg)
in_out_info->hwc_layout[i++] = JM_BLOCK;
else
in_out_info->hwc_layout[i++] = RESERVED_BLOCK;
}
/* If we are using any other device */
} else {
uint32_t nr_l2, nr_sc_bits, j;
uint64_t core_mask;
nr_l2 = hand->kbdev->gpu_props.props.l2_props.num_l2_slices;
core_mask = hand->kbdev->gpu_props.props.coherency_info.group[0].core_mask;
nr_sc_bits = fls64(core_mask);
/* The job manager and tiler sets of counters
* are always present */
in_out_info->hwc_layout = kmalloc(sizeof(enum hwc_type) * (2 + nr_sc_bits + nr_l2), GFP_KERNEL);
if (!in_out_info->hwc_layout)
goto free_vinstr_buffer;
dump_size = (2 + nr_sc_bits + nr_l2) * MALI_COUNTERS_PER_BLOCK * MALI_BYTES_PER_COUNTER;
in_out_info->hwc_layout[i++] = JM_BLOCK;
in_out_info->hwc_layout[i++] = TILER_BLOCK;
for (j = 0; j < nr_l2; j++)
in_out_info->hwc_layout[i++] = MMU_L2_BLOCK;
while (core_mask != 0ull) {
if ((core_mask & 1ull) != 0ull)
in_out_info->hwc_layout[i++] = SHADER_BLOCK;
else
in_out_info->hwc_layout[i++] = RESERVED_BLOCK;
core_mask >>= 1;
}
}
in_out_info->nr_hwc_blocks = i;
in_out_info->size = dump_size;
setup.jm_bm = in_out_info->bitmask[0];
setup.tiler_bm = in_out_info->bitmask[1];
setup.shader_bm = in_out_info->bitmask[2];
setup.mmu_l2_bm = in_out_info->bitmask[3];
hand->vinstr_cli = kbase_vinstr_hwcnt_kernel_setup(hand->kbdev->vinstr_ctx,
&setup, hand->vinstr_buffer);
if (!hand->vinstr_cli) {
dev_err(hand->kbdev->dev, "Failed to register gator with vinstr core");
goto free_layout;
}
return hand;
free_layout:
kfree(in_out_info->hwc_layout);
free_vinstr_buffer:
kfree(hand->vinstr_buffer);
release_device:
kbase_release_device(hand->kbdev);
free_hand:
kfree(hand);
return NULL;
}
KBASE_EXPORT_SYMBOL(kbase_gator_hwcnt_init);
void kbase_gator_hwcnt_term(struct kbase_gator_hwcnt_info *in_out_info, struct kbase_gator_hwcnt_handles *opaque_handles)
{
if (in_out_info)
kfree(in_out_info->hwc_layout);
if (opaque_handles) {
cancel_work_sync(&opaque_handles->dump_work);
kbase_vinstr_detach_client(opaque_handles->vinstr_cli);
kfree(opaque_handles->vinstr_buffer);
kbase_release_device(opaque_handles->kbdev);
kfree(opaque_handles);
}
}
KBASE_EXPORT_SYMBOL(kbase_gator_hwcnt_term);
static void dump_worker(struct work_struct *work)
{
struct kbase_gator_hwcnt_handles *hand;
hand = container_of(work, struct kbase_gator_hwcnt_handles, dump_work);
if (!kbase_vinstr_hwc_dump(hand->vinstr_cli,
BASE_HWCNT_READER_EVENT_MANUAL)) {
spin_lock_bh(&hand->dump_lock);
hand->dump_complete = 1;
spin_unlock_bh(&hand->dump_lock);
} else {
schedule_work(&hand->dump_work);
}
}
uint32_t kbase_gator_instr_hwcnt_dump_complete(
struct kbase_gator_hwcnt_handles *opaque_handles,
uint32_t * const success)
{
if (opaque_handles && success) {
*success = opaque_handles->dump_complete;
opaque_handles->dump_complete = 0;
return *success;
}
return 0;
}
KBASE_EXPORT_SYMBOL(kbase_gator_instr_hwcnt_dump_complete);
uint32_t kbase_gator_instr_hwcnt_dump_irq(struct kbase_gator_hwcnt_handles *opaque_handles)
{
if (opaque_handles)
schedule_work(&opaque_handles->dump_work);
return 0;
}
KBASE_EXPORT_SYMBOL(kbase_gator_instr_hwcnt_dump_irq);

View File

@ -0,0 +1,219 @@
/*
*
* (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
* Foundation, and any use by you of this program is subject to the terms
* of such GNU licence.
*
* A copy of the licence is included with the program, and can also be obtained
* from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
* Boston, MA 02110-1301, USA.
*
*/
#ifndef _KBASE_GATOR_API_H_
#define _KBASE_GATOR_API_H_
/**
* @brief This file describes the API used by Gator to fetch hardware counters.
*/
/* This define is used by the gator kernel module compile to select which DDK
* API calling convention to use. If not defined (legacy DDK) gator assumes
* version 1. The version to DDK release mapping is:
* Version 1 API: DDK versions r1px, r2px
* Version 2 API: DDK versions r3px, r4px
* Version 3 API: DDK version r5p0 and newer
*
* API Usage
* =========
*
* 1] Call kbase_gator_hwcnt_init_names() to return the list of short counter
* names for the GPU present in this device.
*
* 2] Create a kbase_gator_hwcnt_info structure and set the counter enables for
* the counters you want enabled. The enables can all be set for simplicity in
* most use cases, but disabling some will let you minimize bandwidth impact.
*
* 3] Call kbase_gator_hwcnt_init() using the above structure, to create a
* counter context. On successful return the DDK will have populated the
* structure with a variety of useful information.
*
* 4] Call kbase_gator_hwcnt_dump_irq() to queue a non-blocking request for a
* counter dump. If this returns a non-zero value the request has been queued,
* otherwise the driver has been unable to do so (typically because of another
* user of the instrumentation exists concurrently).
*
* 5] Call kbase_gator_hwcnt_dump_complete() to test whether the previously
* requested dump has been succesful. If this returns non-zero the counter dump
* has resolved, but the value of *success must also be tested as the dump
* may have not been successful. If it returns zero the counter dump was
* abandoned due to the device being busy (typically because of another
* user of the instrumentation exists concurrently).
*
* 6] Process the counters stored in the buffer pointed to by ...
*
* kbase_gator_hwcnt_info->kernel_dump_buffer
*
* In pseudo code you can find all of the counters via this approach:
*
*
* hwcnt_info # pointer to kbase_gator_hwcnt_info structure
* hwcnt_name # pointer to name list
*
* u32 * hwcnt_data = (u32*)hwcnt_info->kernel_dump_buffer
*
* # Iterate over each 64-counter block in this GPU configuration
* for( i = 0; i < hwcnt_info->nr_hwc_blocks; i++) {
* hwc_type type = hwcnt_info->hwc_layout[i];
*
* # Skip reserved type blocks - they contain no counters at all
* if( type == RESERVED_BLOCK ) {
* continue;
* }
*
* size_t name_offset = type * 64;
* size_t data_offset = i * 64;
*
* # Iterate over the names of the counters in this block type
* for( j = 0; j < 64; j++) {
* const char * name = hwcnt_name[name_offset+j];
*
* # Skip empty name strings - there is no counter here
* if( name[0] == '\0' ) {
* continue;
* }
*
* u32 data = hwcnt_data[data_offset+j];
*
* printk( "COUNTER: %s DATA: %u\n", name, data );
* }
* }
*
*
* Note that in most implementations you typically want to either SUM or
* AVERAGE multiple instances of the same counter if, for example, you have
* multiple shader cores or multiple L2 caches. The most sensible view for
* analysis is to AVERAGE shader core counters, but SUM L2 cache and MMU
* counters.
*
* 7] Goto 4, repeating until you want to stop collecting counters.
*
* 8] Release the dump resources by calling kbase_gator_hwcnt_term().
*
* 9] Release the name table resources by calling
* kbase_gator_hwcnt_term_names(). This function must only be called if
* init_names() returned a non-NULL value.
**/
#define MALI_DDK_GATOR_API_VERSION 3
enum hwc_type {
JM_BLOCK = 0,
TILER_BLOCK,
SHADER_BLOCK,
MMU_L2_BLOCK,
RESERVED_BLOCK
};
struct kbase_gator_hwcnt_info {
/* Passed from Gator to kbase */
/* the bitmask of enabled hardware counters for each counter block */
uint16_t bitmask[4];
/* Passed from kbase to Gator */
/* ptr to counter dump memory */
void *kernel_dump_buffer;
/* size of counter dump memory */
uint32_t size;
/* the ID of the Mali device */
uint32_t gpu_id;
/* the number of shader cores in the GPU */
uint32_t nr_cores;
/* the number of core groups */
uint32_t nr_core_groups;
/* the memory layout of the performance counters */
enum hwc_type *hwc_layout;
/* the total number of hardware couter blocks */
uint32_t nr_hwc_blocks;
};
/**
* @brief Opaque block of Mali data which Gator needs to return to the API later.
*/
struct kbase_gator_hwcnt_handles;
/**
* @brief Initialize the resources Gator needs for performance profiling.
*
* @param in_out_info A pointer to a structure containing the enabled counters passed from Gator and all the Mali
* specific information that will be returned to Gator. On entry Gator must have populated the
* 'bitmask' field with the counters it wishes to enable for each class of counter block.
* Each entry in the array corresponds to a single counter class based on the "hwc_type"
* enumeration, and each bit corresponds to an enable for 4 sequential counters (LSB enables
* the first 4 counters in the block, and so on). See the GPU counter array as returned by
* kbase_gator_hwcnt_get_names() for the index values of each counter for the curernt GPU.
*
* @return Pointer to an opaque handle block on success, NULL on error.
*/
extern struct kbase_gator_hwcnt_handles *kbase_gator_hwcnt_init(struct kbase_gator_hwcnt_info *in_out_info);
/**
* @brief Free all resources once Gator has finished using performance counters.
*
* @param in_out_info A pointer to a structure containing the enabled counters passed from Gator and all the
* Mali specific information that will be returned to Gator.
* @param opaque_handles A wrapper structure for kbase structures.
*/
extern void kbase_gator_hwcnt_term(struct kbase_gator_hwcnt_info *in_out_info, struct kbase_gator_hwcnt_handles *opaque_handles);
/**
* @brief Poll whether a counter dump is successful.
*
* @param opaque_handles A wrapper structure for kbase structures.
* @param[out] success Non-zero on success, zero on failure.
*
* @return Zero if the dump is still pending, non-zero if the dump has completed. Note that a
* completed dump may not have dumped succesfully, so the caller must test for both
* a completed and successful dump before processing counters.
*/
extern uint32_t kbase_gator_instr_hwcnt_dump_complete(struct kbase_gator_hwcnt_handles *opaque_handles, uint32_t * const success);
/**
* @brief Request the generation of a new counter dump.
*
* @param opaque_handles A wrapper structure for kbase structures.
*
* @return Zero if the hardware device is busy and cannot handle the request, non-zero otherwise.
*/
extern uint32_t kbase_gator_instr_hwcnt_dump_irq(struct kbase_gator_hwcnt_handles *opaque_handles);
/**
* @brief This function is used to fetch the names table based on the Mali device in use.
*
* @param[out] total_counters The total number of counters short names in the Mali devices' list.
*
* @return Pointer to an array of strings of length *total_counters.
*/
extern const char * const *kbase_gator_hwcnt_init_names(uint32_t *total_counters);
/**
* @brief This function is used to terminate the use of the names table.
*
* This function must only be called if the initial call to kbase_gator_hwcnt_init_names returned a non-NULL value.
*/
extern void kbase_gator_hwcnt_term_names(void);
#endif

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,291 @@
/*
*
* (C) COPYRIGHT 2016 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
* Foundation, and any use by you of this program is subject to the terms
* of such GNU licence.
*
* A copy of the licence is included with the program, and can also be obtained
* from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
* Boston, MA 02110-1301, USA.
*
*/
/*
* This header was autogenerated, it should not be edited.
*/
#ifndef _KBASE_GATOR_HWCNT_NAMES_THEX_H_
#define _KBASE_GATOR_HWCNT_NAMES_THEX_H_
static const char * const hardware_counters_mali_tHEx[] = {
/* Performance counters for the Job Manager */
"",
"",
"",
"",
"THEx_MESSAGES_SENT",
"THEx_MESSAGES_RECEIVED",
"THEx_GPU_ACTIVE",
"THEx_IRQ_ACTIVE",
"THEx_JS0_JOBS",
"THEx_JS0_TASKS",
"THEx_JS0_ACTIVE",
"",
"THEx_JS0_WAIT_READ",
"THEx_JS0_WAIT_ISSUE",
"THEx_JS0_WAIT_DEPEND",
"THEx_JS0_WAIT_FINISH",
"THEx_JS1_JOBS",
"THEx_JS1_TASKS",
"THEx_JS1_ACTIVE",
"",
"THEx_JS1_WAIT_READ",
"THEx_JS1_WAIT_ISSUE",
"THEx_JS1_WAIT_DEPEND",
"THEx_JS1_WAIT_FINISH",
"THEx_JS2_JOBS",
"THEx_JS2_TASKS",
"THEx_JS2_ACTIVE",
"",
"THEx_JS2_WAIT_READ",
"THEx_JS2_WAIT_ISSUE",
"THEx_JS2_WAIT_DEPEND",
"THEx_JS2_WAIT_FINISH",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
/* Performance counters for the Tiler */
"",
"",
"",
"",
"THEx_TILER_ACTIVE",
"THEx_JOBS_PROCESSED",
"THEx_TRIANGLES",
"THEx_LINES",
"THEx_POINTS",
"THEx_FRONT_FACING",
"THEx_BACK_FACING",
"THEx_PRIM_VISIBLE",
"THEx_PRIM_CULLED",
"THEx_PRIM_CLIPPED",
"THEx_PRIM_SAT_CULLED",
"",
"",
"THEx_BUS_READ",
"",
"THEx_BUS_WRITE",
"THEx_LOADING_DESC",
"THEx_IDVS_POS_SHAD_REQ",
"THEx_IDVS_POS_SHAD_WAIT",
"THEx_IDVS_POS_SHAD_STALL",
"THEx_IDVS_POS_FIFO_FULL",
"THEx_PREFETCH_STALL",
"THEx_VCACHE_HIT",
"THEx_VCACHE_MISS",
"THEx_VCACHE_LINE_WAIT",
"THEx_VFETCH_POS_READ_WAIT",
"THEx_VFETCH_VERTEX_WAIT",
"THEx_VFETCH_STALL",
"THEx_PRIMASSY_STALL",
"THEx_BBOX_GEN_STALL",
"THEx_IDVS_VBU_HIT",
"THEx_IDVS_VBU_MISS",
"THEx_IDVS_VBU_LINE_DEALLOCATE",
"THEx_IDVS_VAR_SHAD_REQ",
"THEx_IDVS_VAR_SHAD_STALL",
"THEx_BINNER_STALL",
"THEx_ITER_STALL",
"THEx_COMPRESS_MISS",
"THEx_COMPRESS_STALL",
"THEx_PCACHE_HIT",
"THEx_PCACHE_MISS",
"THEx_PCACHE_MISS_STALL",
"THEx_PCACHE_EVICT_STALL",
"THEx_PMGR_PTR_WR_STALL",
"THEx_PMGR_PTR_RD_STALL",
"THEx_PMGR_CMD_WR_STALL",
"THEx_WRBUF_ACTIVE",
"THEx_WRBUF_HIT",
"THEx_WRBUF_MISS",
"THEx_WRBUF_NO_FREE_LINE_STALL",
"THEx_WRBUF_NO_AXI_ID_STALL",
"THEx_WRBUF_AXI_STALL",
"",
"",
"",
"THEx_UTLB_TRANS",
"THEx_UTLB_TRANS_HIT",
"THEx_UTLB_TRANS_STALL",
"THEx_UTLB_TRANS_MISS_DELAY",
"THEx_UTLB_MMU_REQ",
/* Performance counters for the Shader Core */
"",
"",
"",
"",
"THEx_FRAG_ACTIVE",
"THEx_FRAG_PRIMITIVES",
"THEx_FRAG_PRIM_RAST",
"THEx_FRAG_FPK_ACTIVE",
"THEx_FRAG_STARVING",
"THEx_FRAG_WARPS",
"THEx_FRAG_PARTIAL_WARPS",
"THEx_FRAG_QUADS_RAST",
"THEx_FRAG_QUADS_EZS_TEST",
"THEx_FRAG_QUADS_EZS_UPDATE",
"THEx_FRAG_QUADS_EZS_KILL",
"THEx_FRAG_LZS_TEST",
"THEx_FRAG_LZS_KILL",
"",
"THEx_FRAG_PTILES",
"THEx_FRAG_TRANS_ELIM",
"THEx_QUAD_FPK_KILLER",
"",
"THEx_COMPUTE_ACTIVE",
"THEx_COMPUTE_TASKS",
"THEx_COMPUTE_WARPS",
"THEx_COMPUTE_STARVING",
"THEx_EXEC_CORE_ACTIVE",
"THEx_EXEC_ACTIVE",
"THEx_EXEC_INSTR_COUNT",
"THEx_EXEC_INSTR_DIVERGED",
"THEx_EXEC_INSTR_STARVING",
"THEx_ARITH_INSTR_SINGLE_FMA",
"THEx_ARITH_INSTR_DOUBLE",
"THEx_ARITH_INSTR_MSG",
"THEx_ARITH_INSTR_MSG_ONLY",
"THEx_TEX_INSTR",
"THEx_TEX_INSTR_MIPMAP",
"THEx_TEX_INSTR_COMPRESSED",
"THEx_TEX_INSTR_3D",
"THEx_TEX_INSTR_TRILINEAR",
"THEx_TEX_COORD_ISSUE",
"THEx_TEX_COORD_STALL",
"THEx_TEX_STARVE_CACHE",
"THEx_TEX_STARVE_FILTER",
"THEx_LS_MEM_READ_FULL",
"THEx_LS_MEM_READ_SHORT",
"THEx_LS_MEM_WRITE_FULL",
"THEx_LS_MEM_WRITE_SHORT",
"THEx_LS_MEM_ATOMIC",
"THEx_VARY_INSTR",
"THEx_VARY_SLOT_32",
"THEx_VARY_SLOT_16",
"THEx_ATTR_INSTR",
"THEx_ARITH_INSTR_FP_MUL",
"THEx_BEATS_RD_FTC",
"THEx_BEATS_RD_FTC_EXT",
"THEx_BEATS_RD_LSC",
"THEx_BEATS_RD_LSC_EXT",
"THEx_BEATS_RD_TEX",
"THEx_BEATS_RD_TEX_EXT",
"THEx_BEATS_RD_OTHER",
"THEx_BEATS_WR_LSC",
"THEx_BEATS_WR_TIB",
"",
/* Performance counters for the Memory System */
"",
"",
"",
"",
"THEx_MMU_REQUESTS",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"THEx_L2_RD_MSG_IN",
"THEx_L2_RD_MSG_IN_STALL",
"THEx_L2_WR_MSG_IN",
"THEx_L2_WR_MSG_IN_STALL",
"THEx_L2_SNP_MSG_IN",
"THEx_L2_SNP_MSG_IN_STALL",
"THEx_L2_RD_MSG_OUT",
"THEx_L2_RD_MSG_OUT_STALL",
"THEx_L2_WR_MSG_OUT",
"THEx_L2_ANY_LOOKUP",
"THEx_L2_READ_LOOKUP",
"THEx_L2_WRITE_LOOKUP",
"THEx_L2_EXT_SNOOP_LOOKUP",
"THEx_L2_EXT_READ",
"THEx_L2_EXT_READ_NOSNP",
"THEx_L2_EXT_READ_UNIQUE",
"THEx_L2_EXT_READ_BEATS",
"THEx_L2_EXT_AR_STALL",
"THEx_L2_EXT_AR_CNT_Q1",
"THEx_L2_EXT_AR_CNT_Q2",
"THEx_L2_EXT_AR_CNT_Q3",
"THEx_L2_EXT_RRESP_0_127",
"THEx_L2_EXT_RRESP_128_191",
"THEx_L2_EXT_RRESP_192_255",
"THEx_L2_EXT_RRESP_256_319",
"THEx_L2_EXT_RRESP_320_383",
"THEx_L2_EXT_WRITE",
"THEx_L2_EXT_WRITE_NOSNP_FULL",
"THEx_L2_EXT_WRITE_NOSNP_PTL",
"THEx_L2_EXT_WRITE_SNP_FULL",
"THEx_L2_EXT_WRITE_SNP_PTL",
"THEx_L2_EXT_WRITE_BEATS",
"THEx_L2_EXT_W_STALL",
"THEx_L2_EXT_AW_CNT_Q1",
"THEx_L2_EXT_AW_CNT_Q2",
"THEx_L2_EXT_AW_CNT_Q3",
"THEx_L2_EXT_SNOOP",
"THEx_L2_EXT_SNOOP_STALL",
"THEx_L2_EXT_SNOOP_RESP_CLEAN",
"THEx_L2_EXT_SNOOP_RESP_DATA",
"THEx_L2_EXT_SNOOP_INTERNAL",
"",
"",
"",
"",
"",
"",
"",
};
#endif /* _KBASE_GATOR_HWCNT_NAMES_THEX_H_ */

View File

@ -0,0 +1,291 @@
/*
*
* (C) COPYRIGHT 2016 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
* Foundation, and any use by you of this program is subject to the terms
* of such GNU licence.
*
* A copy of the licence is included with the program, and can also be obtained
* from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
* Boston, MA 02110-1301, USA.
*
*/
/*
* This header was autogenerated, it should not be edited.
*/
#ifndef _KBASE_GATOR_HWCNT_NAMES_TMIX_H_
#define _KBASE_GATOR_HWCNT_NAMES_TMIX_H_
static const char * const hardware_counters_mali_tMIx[] = {
/* Performance counters for the Job Manager */
"",
"",
"",
"",
"TMIx_MESSAGES_SENT",
"TMIx_MESSAGES_RECEIVED",
"TMIx_GPU_ACTIVE",
"TMIx_IRQ_ACTIVE",
"TMIx_JS0_JOBS",
"TMIx_JS0_TASKS",
"TMIx_JS0_ACTIVE",
"",
"TMIx_JS0_WAIT_READ",
"TMIx_JS0_WAIT_ISSUE",
"TMIx_JS0_WAIT_DEPEND",
"TMIx_JS0_WAIT_FINISH",
"TMIx_JS1_JOBS",
"TMIx_JS1_TASKS",
"TMIx_JS1_ACTIVE",
"",
"TMIx_JS1_WAIT_READ",
"TMIx_JS1_WAIT_ISSUE",
"TMIx_JS1_WAIT_DEPEND",
"TMIx_JS1_WAIT_FINISH",
"TMIx_JS2_JOBS",
"TMIx_JS2_TASKS",
"TMIx_JS2_ACTIVE",
"",
"TMIx_JS2_WAIT_READ",
"TMIx_JS2_WAIT_ISSUE",
"TMIx_JS2_WAIT_DEPEND",
"TMIx_JS2_WAIT_FINISH",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
/* Performance counters for the Tiler */
"",
"",
"",
"",
"TMIx_TILER_ACTIVE",
"TMIx_JOBS_PROCESSED",
"TMIx_TRIANGLES",
"TMIx_LINES",
"TMIx_POINTS",
"TMIx_FRONT_FACING",
"TMIx_BACK_FACING",
"TMIx_PRIM_VISIBLE",
"TMIx_PRIM_CULLED",
"TMIx_PRIM_CLIPPED",
"TMIx_PRIM_SAT_CULLED",
"",
"",
"TMIx_BUS_READ",
"",
"TMIx_BUS_WRITE",
"TMIx_LOADING_DESC",
"TMIx_IDVS_POS_SHAD_REQ",
"TMIx_IDVS_POS_SHAD_WAIT",
"TMIx_IDVS_POS_SHAD_STALL",
"TMIx_IDVS_POS_FIFO_FULL",
"TMIx_PREFETCH_STALL",
"TMIx_VCACHE_HIT",
"TMIx_VCACHE_MISS",
"TMIx_VCACHE_LINE_WAIT",
"TMIx_VFETCH_POS_READ_WAIT",
"TMIx_VFETCH_VERTEX_WAIT",
"TMIx_VFETCH_STALL",
"TMIx_PRIMASSY_STALL",
"TMIx_BBOX_GEN_STALL",
"TMIx_IDVS_VBU_HIT",
"TMIx_IDVS_VBU_MISS",
"TMIx_IDVS_VBU_LINE_DEALLOCATE",
"TMIx_IDVS_VAR_SHAD_REQ",
"TMIx_IDVS_VAR_SHAD_STALL",
"TMIx_BINNER_STALL",
"TMIx_ITER_STALL",
"TMIx_COMPRESS_MISS",
"TMIx_COMPRESS_STALL",
"TMIx_PCACHE_HIT",
"TMIx_PCACHE_MISS",
"TMIx_PCACHE_MISS_STALL",
"TMIx_PCACHE_EVICT_STALL",
"TMIx_PMGR_PTR_WR_STALL",
"TMIx_PMGR_PTR_RD_STALL",
"TMIx_PMGR_CMD_WR_STALL",
"TMIx_WRBUF_ACTIVE",
"TMIx_WRBUF_HIT",
"TMIx_WRBUF_MISS",
"TMIx_WRBUF_NO_FREE_LINE_STALL",
"TMIx_WRBUF_NO_AXI_ID_STALL",
"TMIx_WRBUF_AXI_STALL",
"",
"",
"",
"TMIx_UTLB_TRANS",
"TMIx_UTLB_TRANS_HIT",
"TMIx_UTLB_TRANS_STALL",
"TMIx_UTLB_TRANS_MISS_DELAY",
"TMIx_UTLB_MMU_REQ",
/* Performance counters for the Shader Core */
"",
"",
"",
"",
"TMIx_FRAG_ACTIVE",
"TMIx_FRAG_PRIMITIVES",
"TMIx_FRAG_PRIM_RAST",
"TMIx_FRAG_FPK_ACTIVE",
"TMIx_FRAG_STARVING",
"TMIx_FRAG_WARPS",
"TMIx_FRAG_PARTIAL_WARPS",
"TMIx_FRAG_QUADS_RAST",
"TMIx_FRAG_QUADS_EZS_TEST",
"TMIx_FRAG_QUADS_EZS_UPDATE",
"TMIx_FRAG_QUADS_EZS_KILL",
"TMIx_FRAG_LZS_TEST",
"TMIx_FRAG_LZS_KILL",
"",
"TMIx_FRAG_PTILES",
"TMIx_FRAG_TRANS_ELIM",
"TMIx_QUAD_FPK_KILLER",
"",
"TMIx_COMPUTE_ACTIVE",
"TMIx_COMPUTE_TASKS",
"TMIx_COMPUTE_WARPS",
"TMIx_COMPUTE_STARVING",
"TMIx_EXEC_CORE_ACTIVE",
"TMIx_EXEC_ACTIVE",
"TMIx_EXEC_INSTR_COUNT",
"TMIx_EXEC_INSTR_DIVERGED",
"TMIx_EXEC_INSTR_STARVING",
"TMIx_ARITH_INSTR_SINGLE_FMA",
"TMIx_ARITH_INSTR_DOUBLE",
"TMIx_ARITH_INSTR_MSG",
"TMIx_ARITH_INSTR_MSG_ONLY",
"TMIx_TEX_INSTR",
"TMIx_TEX_INSTR_MIPMAP",
"TMIx_TEX_INSTR_COMPRESSED",
"TMIx_TEX_INSTR_3D",
"TMIx_TEX_INSTR_TRILINEAR",
"TMIx_TEX_COORD_ISSUE",
"TMIx_TEX_COORD_STALL",
"TMIx_TEX_STARVE_CACHE",
"TMIx_TEX_STARVE_FILTER",
"TMIx_LS_MEM_READ_FULL",
"TMIx_LS_MEM_READ_SHORT",
"TMIx_LS_MEM_WRITE_FULL",
"TMIx_LS_MEM_WRITE_SHORT",
"TMIx_LS_MEM_ATOMIC",
"TMIx_VARY_INSTR",
"TMIx_VARY_SLOT_32",
"TMIx_VARY_SLOT_16",
"TMIx_ATTR_INSTR",
"TMIx_ARITH_INSTR_FP_MUL",
"TMIx_BEATS_RD_FTC",
"TMIx_BEATS_RD_FTC_EXT",
"TMIx_BEATS_RD_LSC",
"TMIx_BEATS_RD_LSC_EXT",
"TMIx_BEATS_RD_TEX",
"TMIx_BEATS_RD_TEX_EXT",
"TMIx_BEATS_RD_OTHER",
"TMIx_BEATS_WR_LSC",
"TMIx_BEATS_WR_TIB",
"",
/* Performance counters for the Memory System */
"",
"",
"",
"",
"TMIx_MMU_REQUESTS",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"TMIx_L2_RD_MSG_IN",
"TMIx_L2_RD_MSG_IN_STALL",
"TMIx_L2_WR_MSG_IN",
"TMIx_L2_WR_MSG_IN_STALL",
"TMIx_L2_SNP_MSG_IN",
"TMIx_L2_SNP_MSG_IN_STALL",
"TMIx_L2_RD_MSG_OUT",
"TMIx_L2_RD_MSG_OUT_STALL",
"TMIx_L2_WR_MSG_OUT",
"TMIx_L2_ANY_LOOKUP",
"TMIx_L2_READ_LOOKUP",
"TMIx_L2_WRITE_LOOKUP",
"TMIx_L2_EXT_SNOOP_LOOKUP",
"TMIx_L2_EXT_READ",
"TMIx_L2_EXT_READ_NOSNP",
"TMIx_L2_EXT_READ_UNIQUE",
"TMIx_L2_EXT_READ_BEATS",
"TMIx_L2_EXT_AR_STALL",
"TMIx_L2_EXT_AR_CNT_Q1",
"TMIx_L2_EXT_AR_CNT_Q2",
"TMIx_L2_EXT_AR_CNT_Q3",
"TMIx_L2_EXT_RRESP_0_127",
"TMIx_L2_EXT_RRESP_128_191",
"TMIx_L2_EXT_RRESP_192_255",
"TMIx_L2_EXT_RRESP_256_319",
"TMIx_L2_EXT_RRESP_320_383",
"TMIx_L2_EXT_WRITE",
"TMIx_L2_EXT_WRITE_NOSNP_FULL",
"TMIx_L2_EXT_WRITE_NOSNP_PTL",
"TMIx_L2_EXT_WRITE_SNP_FULL",
"TMIx_L2_EXT_WRITE_SNP_PTL",
"TMIx_L2_EXT_WRITE_BEATS",
"TMIx_L2_EXT_W_STALL",
"TMIx_L2_EXT_AW_CNT_Q1",
"TMIx_L2_EXT_AW_CNT_Q2",
"TMIx_L2_EXT_AW_CNT_Q3",
"TMIx_L2_EXT_SNOOP",
"TMIx_L2_EXT_SNOOP_STALL",
"TMIx_L2_EXT_SNOOP_RESP_CLEAN",
"TMIx_L2_EXT_SNOOP_RESP_DATA",
"TMIx_L2_EXT_SNOOP_INTERNAL",
"",
"",
"",
"",
"",
"",
"",
};
#endif /* _KBASE_GATOR_HWCNT_NAMES_TMIX_H_ */

View File

@ -0,0 +1,113 @@
/*
*
* (C) COPYRIGHT 2015-2016 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
* Foundation, and any use by you of this program is subject to the terms
* of such GNU licence.
*
* A copy of the licence is included with the program, and can also be obtained
* from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
* Boston, MA 02110-1301, USA.
*
*/
#ifndef _KBASE_GPU_ID_H_
#define _KBASE_GPU_ID_H_
/* GPU_ID register */
#define GPU_ID_VERSION_STATUS_SHIFT 0
#define GPU_ID_VERSION_MINOR_SHIFT 4
#define GPU_ID_VERSION_MAJOR_SHIFT 12
#define GPU_ID_VERSION_PRODUCT_ID_SHIFT 16
#define GPU_ID_VERSION_STATUS (0xF << GPU_ID_VERSION_STATUS_SHIFT)
#define GPU_ID_VERSION_MINOR (0xFF << GPU_ID_VERSION_MINOR_SHIFT)
#define GPU_ID_VERSION_MAJOR (0xF << GPU_ID_VERSION_MAJOR_SHIFT)
#define GPU_ID_VERSION_PRODUCT_ID (0xFFFF << GPU_ID_VERSION_PRODUCT_ID_SHIFT)
/* Values for GPU_ID_VERSION_PRODUCT_ID bitfield */
#define GPU_ID_PI_T60X 0x6956
#define GPU_ID_PI_T62X 0x0620
#define GPU_ID_PI_T76X 0x0750
#define GPU_ID_PI_T72X 0x0720
#define GPU_ID_PI_TFRX 0x0880
#define GPU_ID_PI_T86X 0x0860
#define GPU_ID_PI_T82X 0x0820
#define GPU_ID_PI_T83X 0x0830
/* New GPU ID format when PRODUCT_ID is >= 0x1000 (and not 0x6956) */
#define GPU_ID_PI_NEW_FORMAT_START 0x1000
#define GPU_ID_IS_NEW_FORMAT(product_id) ((product_id) != GPU_ID_PI_T60X && \
(product_id) >= \
GPU_ID_PI_NEW_FORMAT_START)
#define GPU_ID2_VERSION_STATUS_SHIFT 0
#define GPU_ID2_VERSION_MINOR_SHIFT 4
#define GPU_ID2_VERSION_MAJOR_SHIFT 12
#define GPU_ID2_PRODUCT_MAJOR_SHIFT 16
#define GPU_ID2_ARCH_REV_SHIFT 20
#define GPU_ID2_ARCH_MINOR_SHIFT 24
#define GPU_ID2_ARCH_MAJOR_SHIFT 28
#define GPU_ID2_VERSION_STATUS (0xF << GPU_ID2_VERSION_STATUS_SHIFT)
#define GPU_ID2_VERSION_MINOR (0xFF << GPU_ID2_VERSION_MINOR_SHIFT)
#define GPU_ID2_VERSION_MAJOR (0xF << GPU_ID2_VERSION_MAJOR_SHIFT)
#define GPU_ID2_PRODUCT_MAJOR (0xF << GPU_ID2_PRODUCT_MAJOR_SHIFT)
#define GPU_ID2_ARCH_REV (0xF << GPU_ID2_ARCH_REV_SHIFT)
#define GPU_ID2_ARCH_MINOR (0xF << GPU_ID2_ARCH_MINOR_SHIFT)
#define GPU_ID2_ARCH_MAJOR (0xF << GPU_ID2_ARCH_MAJOR_SHIFT)
#define GPU_ID2_PRODUCT_MODEL (GPU_ID2_ARCH_MAJOR | GPU_ID2_PRODUCT_MAJOR)
/* Helper macro to create a partial GPU_ID (new format) that defines
a product ignoring its version. */
#define GPU_ID2_PRODUCT_MAKE(arch_major, arch_minor, arch_rev, product_major) \
(((arch_major) << GPU_ID2_ARCH_MAJOR_SHIFT) | \
((arch_minor) << GPU_ID2_ARCH_MINOR_SHIFT) | \
((arch_rev) << GPU_ID2_ARCH_REV_SHIFT) | \
((product_major) << GPU_ID2_PRODUCT_MAJOR_SHIFT))
/* Helper macro to create a partial GPU_ID (new format) that specifies the
revision (major, minor, status) of a product */
#define GPU_ID2_VERSION_MAKE(version_major, version_minor, version_status) \
(((version_major) << GPU_ID2_VERSION_MAJOR_SHIFT) | \
((version_minor) << GPU_ID2_VERSION_MINOR_SHIFT) | \
((version_status) << GPU_ID2_VERSION_STATUS_SHIFT))
/* Helper macro to create a complete GPU_ID (new format) */
#define GPU_ID2_MAKE(arch_major, arch_minor, arch_rev, product_major, \
version_major, version_minor, version_status) \
(GPU_ID2_PRODUCT_MAKE(arch_major, arch_minor, arch_rev, \
product_major) | \
GPU_ID2_VERSION_MAKE(version_major, version_minor, \
version_status))
/* Helper macro to create a partial GPU_ID (new format) that identifies
a particular GPU model by its arch_major and product_major. */
#define GPU_ID2_MODEL_MAKE(arch_major, product_major) \
(((arch_major) << GPU_ID2_ARCH_MAJOR_SHIFT) | \
((product_major) << GPU_ID2_PRODUCT_MAJOR_SHIFT))
/* Strip off the non-relevant bits from a product_id value and make it suitable
for comparison against the GPU_ID2_PRODUCT_xxx values which identify a GPU
model. */
#define GPU_ID2_MODEL_MATCH_VALUE(product_id) \
(((product_id) << GPU_ID2_PRODUCT_MAJOR_SHIFT) & \
GPU_ID2_PRODUCT_MODEL)
#define GPU_ID2_PRODUCT_TMIX GPU_ID2_MODEL_MAKE(6, 0)
#define GPU_ID2_PRODUCT_THEX GPU_ID2_MODEL_MAKE(6, 1)
/* Values for GPU_ID_VERSION_STATUS field for PRODUCT_ID GPU_ID_PI_T60X */
#define GPU_ID_S_15DEV0 0x1
#define GPU_ID_S_EAC 0x2
/* Helper macro to create a GPU_ID assuming valid values for id, major,
minor, status */
#define GPU_ID_MAKE(id, major, minor, status) \
(((id) << GPU_ID_VERSION_PRODUCT_ID_SHIFT) | \
((major) << GPU_ID_VERSION_MAJOR_SHIFT) | \
((minor) << GPU_ID_VERSION_MINOR_SHIFT) | \
((status) << GPU_ID_VERSION_STATUS_SHIFT))
#endif /* _KBASE_GPU_ID_H_ */

View File

@ -0,0 +1,97 @@
/*
*
* (C) COPYRIGHT 2012-2016 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
* Foundation, and any use by you of this program is subject to the terms
* of such GNU licence.
*
* A copy of the licence is included with the program, and can also be obtained
* from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
* Boston, MA 02110-1301, USA.
*
*/
#include <mali_kbase.h>
#ifdef CONFIG_DEBUG_FS
/** Show callback for the @c gpu_memory debugfs file.
*
* This function is called to get the contents of the @c gpu_memory debugfs
* file. This is a report of current gpu memory usage.
*
* @param sfile The debugfs entry
* @param data Data associated with the entry
*
* @return 0 if successfully prints data in debugfs entry file
* -1 if it encountered an error
*/
static int kbasep_gpu_memory_seq_show(struct seq_file *sfile, void *data)
{
struct list_head *entry;
const struct list_head *kbdev_list;
kbdev_list = kbase_dev_list_get();
list_for_each(entry, kbdev_list) {
struct kbase_device *kbdev = NULL;
struct kbasep_kctx_list_element *element;
kbdev = list_entry(entry, struct kbase_device, entry);
/* output the total memory usage and cap for this device */
seq_printf(sfile, "%-16s %10u\n",
kbdev->devname,
atomic_read(&(kbdev->memdev.used_pages)));
mutex_lock(&kbdev->kctx_list_lock);
list_for_each_entry(element, &kbdev->kctx_list, link) {
/* output the memory usage and cap for each kctx
* opened on this device */
seq_printf(sfile, " %s-0x%p %10u\n",
"kctx",
element->kctx,
atomic_read(&(element->kctx->used_pages)));
}
mutex_unlock(&kbdev->kctx_list_lock);
}
kbase_dev_list_put(kbdev_list);
return 0;
}
/*
* File operations related to debugfs entry for gpu_memory
*/
static int kbasep_gpu_memory_debugfs_open(struct inode *in, struct file *file)
{
return single_open(file, kbasep_gpu_memory_seq_show , NULL);
}
static const struct file_operations kbasep_gpu_memory_debugfs_fops = {
.open = kbasep_gpu_memory_debugfs_open,
.read = seq_read,
.llseek = seq_lseek,
.release = single_release,
};
/*
* Initialize debugfs entry for gpu_memory
*/
void kbasep_gpu_memory_debugfs_init(struct kbase_device *kbdev)
{
debugfs_create_file("gpu_memory", S_IRUGO,
kbdev->mali_debugfs_directory, NULL,
&kbasep_gpu_memory_debugfs_fops);
return;
}
#else
/*
* Stub functions for when debugfs is disabled
*/
void kbasep_gpu_memory_debugfs_init(struct kbase_device *kbdev)
{
return;
}
#endif

View File

@ -0,0 +1,37 @@
/*
*
* (C) COPYRIGHT 2012-2014, 2016 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
* Foundation, and any use by you of this program is subject to the terms
* of such GNU licence.
*
* A copy of the licence is included with the program, and can also be obtained
* from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
* Boston, MA 02110-1301, USA.
*
*/
/**
* @file mali_kbase_gpu_memory_debugfs.h
* Header file for gpu_memory entry in debugfs
*
*/
#ifndef _KBASE_GPU_MEMORY_DEBUGFS_H
#define _KBASE_GPU_MEMORY_DEBUGFS_H
#include <linux/debugfs.h>
#include <linux/seq_file.h>
/**
* @brief Initialize gpu_memory debugfs entry
*/
void kbasep_gpu_memory_debugfs_init(struct kbase_device *kbdev);
#endif /*_KBASE_GPU_MEMORY_DEBUGFS_H*/

View File

@ -0,0 +1,314 @@
/*
*
* (C) COPYRIGHT 2011-2015 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
* Foundation, and any use by you of this program is subject to the terms
* of such GNU licence.
*
* A copy of the licence is included with the program, and can also be obtained
* from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
* Boston, MA 02110-1301, USA.
*
*/
/*
* Base kernel property query APIs
*/
#include <mali_kbase.h>
#include <mali_midg_regmap.h>
#include <mali_kbase_gpuprops.h>
#include <mali_kbase_config_defaults.h>
#include <mali_kbase_hwaccess_gpuprops.h>
#include <linux/clk.h>
/**
* KBASE_UBFX32 - Extracts bits from a 32-bit bitfield.
* @value: The value from which to extract bits.
* @offset: The first bit to extract (0 being the LSB).
* @size: The number of bits to extract.
*
* Context: @offset + @size <= 32.
*
* Return: Bits [@offset, @offset + @size) from @value.
*/
/* from mali_cdsb.h */
#define KBASE_UBFX32(value, offset, size) \
(((u32)(value) >> (u32)(offset)) & (u32)((1ULL << (u32)(size)) - 1))
int kbase_gpuprops_uk_get_props(struct kbase_context *kctx, struct kbase_uk_gpuprops * const kbase_props)
{
kbase_gpu_clk_speed_func get_gpu_speed_mhz;
u32 gpu_speed_mhz;
int rc = 1;
KBASE_DEBUG_ASSERT(NULL != kctx);
KBASE_DEBUG_ASSERT(NULL != kbase_props);
/* Current GPU speed is requested from the system integrator via the GPU_SPEED_FUNC function.
* If that function fails, or the function is not provided by the system integrator, we report the maximum
* GPU speed as specified by GPU_FREQ_KHZ_MAX.
*/
get_gpu_speed_mhz = (kbase_gpu_clk_speed_func) GPU_SPEED_FUNC;
if (get_gpu_speed_mhz != NULL) {
rc = get_gpu_speed_mhz(&gpu_speed_mhz);
#ifdef CONFIG_MALI_DEBUG
/* Issue a warning message when the reported GPU speed falls outside the min/max range */
if (rc == 0) {
u32 gpu_speed_khz = gpu_speed_mhz * 1000;
if (gpu_speed_khz < kctx->kbdev->gpu_props.props.core_props.gpu_freq_khz_min ||
gpu_speed_khz > kctx->kbdev->gpu_props.props.core_props.gpu_freq_khz_max)
dev_warn(kctx->kbdev->dev, "GPU Speed is outside of min/max range (got %lu Khz, min %lu Khz, max %lu Khz)\n",
(unsigned long)gpu_speed_khz,
(unsigned long)kctx->kbdev->gpu_props.props.core_props.gpu_freq_khz_min,
(unsigned long)kctx->kbdev->gpu_props.props.core_props.gpu_freq_khz_max);
}
#endif /* CONFIG_MALI_DEBUG */
}
if (kctx->kbdev->clock) {
gpu_speed_mhz = clk_get_rate(kctx->kbdev->clock) / 1000000;
rc = 0;
}
if (rc != 0)
gpu_speed_mhz = kctx->kbdev->gpu_props.props.core_props.gpu_freq_khz_max / 1000;
kctx->kbdev->gpu_props.props.core_props.gpu_speed_mhz = gpu_speed_mhz;
memcpy(&kbase_props->props, &kctx->kbdev->gpu_props.props, sizeof(kbase_props->props));
/* Before API 8.2 they expect L3 cache info here, which was always 0 */
if (kctx->api_version < KBASE_API_VERSION(8, 2))
kbase_props->props.raw_props.suspend_size = 0;
return 0;
}
static void kbase_gpuprops_construct_coherent_groups(base_gpu_props * const props)
{
struct mali_base_gpu_coherent_group *current_group;
u64 group_present;
u64 group_mask;
u64 first_set, first_set_prev;
u32 num_groups = 0;
KBASE_DEBUG_ASSERT(NULL != props);
props->coherency_info.coherency = props->raw_props.mem_features;
props->coherency_info.num_core_groups = hweight64(props->raw_props.l2_present);
if (props->coherency_info.coherency & GROUPS_L2_COHERENT) {
/* Group is l2 coherent */
group_present = props->raw_props.l2_present;
} else {
/* Group is l1 coherent */
group_present = props->raw_props.shader_present;
}
/*
* The coherent group mask can be computed from the l2 present
* register.
*
* For the coherent group n:
* group_mask[n] = (first_set[n] - 1) & ~(first_set[n-1] - 1)
* where first_set is group_present with only its nth set-bit kept
* (i.e. the position from where a new group starts).
*
* For instance if the groups are l2 coherent and l2_present=0x0..01111:
* The first mask is:
* group_mask[1] = (first_set[1] - 1) & ~(first_set[0] - 1)
* = (0x0..010 - 1) & ~(0x0..01 - 1)
* = 0x0..00f
* The second mask is:
* group_mask[2] = (first_set[2] - 1) & ~(first_set[1] - 1)
* = (0x0..100 - 1) & ~(0x0..010 - 1)
* = 0x0..0f0
* And so on until all the bits from group_present have been cleared
* (i.e. there is no group left).
*/
current_group = props->coherency_info.group;
first_set = group_present & ~(group_present - 1);
while (group_present != 0 && num_groups < BASE_MAX_COHERENT_GROUPS) {
group_present -= first_set; /* Clear the current group bit */
first_set_prev = first_set;
first_set = group_present & ~(group_present - 1);
group_mask = (first_set - 1) & ~(first_set_prev - 1);
/* Populate the coherent_group structure for each group */
current_group->core_mask = group_mask & props->raw_props.shader_present;
current_group->num_cores = hweight64(current_group->core_mask);
num_groups++;
current_group++;
}
if (group_present != 0)
pr_warn("Too many coherent groups (keeping only %d groups).\n", BASE_MAX_COHERENT_GROUPS);
props->coherency_info.num_groups = num_groups;
}
/**
* kbase_gpuprops_get_props - Get the GPU configuration
* @gpu_props: The &base_gpu_props structure
* @kbdev: The &struct kbase_device structure for the device
*
* Fill the &base_gpu_props structure with values from the GPU configuration
* registers. Only the raw properties are filled in this function
*/
static void kbase_gpuprops_get_props(base_gpu_props * const gpu_props, struct kbase_device *kbdev)
{
struct kbase_gpuprops_regdump regdump;
int i;
KBASE_DEBUG_ASSERT(NULL != kbdev);
KBASE_DEBUG_ASSERT(NULL != gpu_props);
/* Dump relevant registers */
kbase_backend_gpuprops_get(kbdev, &regdump);
gpu_props->raw_props.gpu_id = regdump.gpu_id;
gpu_props->raw_props.tiler_features = regdump.tiler_features;
gpu_props->raw_props.mem_features = regdump.mem_features;
gpu_props->raw_props.mmu_features = regdump.mmu_features;
gpu_props->raw_props.l2_features = regdump.l2_features;
gpu_props->raw_props.suspend_size = regdump.suspend_size;
gpu_props->raw_props.as_present = regdump.as_present;
gpu_props->raw_props.js_present = regdump.js_present;
gpu_props->raw_props.shader_present = ((u64) regdump.shader_present_hi << 32) + regdump.shader_present_lo;
gpu_props->raw_props.tiler_present = ((u64) regdump.tiler_present_hi << 32) + regdump.tiler_present_lo;
gpu_props->raw_props.l2_present = ((u64) regdump.l2_present_hi << 32) + regdump.l2_present_lo;
for (i = 0; i < GPU_MAX_JOB_SLOTS; i++)
gpu_props->raw_props.js_features[i] = regdump.js_features[i];
for (i = 0; i < BASE_GPU_NUM_TEXTURE_FEATURES_REGISTERS; i++)
gpu_props->raw_props.texture_features[i] = regdump.texture_features[i];
gpu_props->raw_props.thread_max_barrier_size = regdump.thread_max_barrier_size;
gpu_props->raw_props.thread_max_threads = regdump.thread_max_threads;
gpu_props->raw_props.thread_max_workgroup_size = regdump.thread_max_workgroup_size;
gpu_props->raw_props.thread_features = regdump.thread_features;
}
/**
* kbase_gpuprops_calculate_props - Calculate the derived properties
* @gpu_props: The &base_gpu_props structure
* @kbdev: The &struct kbase_device structure for the device
*
* Fill the &base_gpu_props structure with values derived from the GPU
* configuration registers
*/
static void kbase_gpuprops_calculate_props(base_gpu_props * const gpu_props, struct kbase_device *kbdev)
{
int i;
/* Populate the base_gpu_props structure */
gpu_props->core_props.version_status = KBASE_UBFX32(gpu_props->raw_props.gpu_id, 0U, 4);
gpu_props->core_props.minor_revision = KBASE_UBFX32(gpu_props->raw_props.gpu_id, 4U, 8);
gpu_props->core_props.major_revision = KBASE_UBFX32(gpu_props->raw_props.gpu_id, 12U, 4);
gpu_props->core_props.product_id = KBASE_UBFX32(gpu_props->raw_props.gpu_id, 16U, 16);
gpu_props->core_props.log2_program_counter_size = KBASE_GPU_PC_SIZE_LOG2;
gpu_props->core_props.gpu_available_memory_size = totalram_pages << PAGE_SHIFT;
for (i = 0; i < BASE_GPU_NUM_TEXTURE_FEATURES_REGISTERS; i++)
gpu_props->core_props.texture_features[i] = gpu_props->raw_props.texture_features[i];
gpu_props->l2_props.log2_line_size = KBASE_UBFX32(gpu_props->raw_props.l2_features, 0U, 8);
gpu_props->l2_props.log2_cache_size = KBASE_UBFX32(gpu_props->raw_props.l2_features, 16U, 8);
/* Field with number of l2 slices is added to MEM_FEATURES register
* since t76x. Below code assumes that for older GPU reserved bits will
* be read as zero. */
gpu_props->l2_props.num_l2_slices =
KBASE_UBFX32(gpu_props->raw_props.mem_features, 8U, 4) + 1;
gpu_props->tiler_props.bin_size_bytes = 1 << KBASE_UBFX32(gpu_props->raw_props.tiler_features, 0U, 6);
gpu_props->tiler_props.max_active_levels = KBASE_UBFX32(gpu_props->raw_props.tiler_features, 8U, 4);
if (gpu_props->raw_props.thread_max_threads == 0)
gpu_props->thread_props.max_threads = THREAD_MT_DEFAULT;
else
gpu_props->thread_props.max_threads = gpu_props->raw_props.thread_max_threads;
if (gpu_props->raw_props.thread_max_workgroup_size == 0)
gpu_props->thread_props.max_workgroup_size = THREAD_MWS_DEFAULT;
else
gpu_props->thread_props.max_workgroup_size = gpu_props->raw_props.thread_max_workgroup_size;
if (gpu_props->raw_props.thread_max_barrier_size == 0)
gpu_props->thread_props.max_barrier_size = THREAD_MBS_DEFAULT;
else
gpu_props->thread_props.max_barrier_size = gpu_props->raw_props.thread_max_barrier_size;
gpu_props->thread_props.max_registers = KBASE_UBFX32(gpu_props->raw_props.thread_features, 0U, 16);
gpu_props->thread_props.max_task_queue = KBASE_UBFX32(gpu_props->raw_props.thread_features, 16U, 8);
gpu_props->thread_props.max_thread_group_split = KBASE_UBFX32(gpu_props->raw_props.thread_features, 24U, 6);
gpu_props->thread_props.impl_tech = KBASE_UBFX32(gpu_props->raw_props.thread_features, 30U, 2);
/* If values are not specified, then use defaults */
if (gpu_props->thread_props.max_registers == 0) {
gpu_props->thread_props.max_registers = THREAD_MR_DEFAULT;
gpu_props->thread_props.max_task_queue = THREAD_MTQ_DEFAULT;
gpu_props->thread_props.max_thread_group_split = THREAD_MTGS_DEFAULT;
}
/* Initialize the coherent_group structure for each group */
kbase_gpuprops_construct_coherent_groups(gpu_props);
}
void kbase_gpuprops_set(struct kbase_device *kbdev)
{
struct kbase_gpu_props *gpu_props;
struct gpu_raw_gpu_props *raw;
KBASE_DEBUG_ASSERT(NULL != kbdev);
gpu_props = &kbdev->gpu_props;
raw = &gpu_props->props.raw_props;
/* Initialize the base_gpu_props structure from the hardware */
kbase_gpuprops_get_props(&gpu_props->props, kbdev);
/* Populate the derived properties */
kbase_gpuprops_calculate_props(&gpu_props->props, kbdev);
/* Populate kbase-only fields */
gpu_props->l2_props.associativity = KBASE_UBFX32(raw->l2_features, 8U, 8);
gpu_props->l2_props.external_bus_width = KBASE_UBFX32(raw->l2_features, 24U, 8);
gpu_props->mem.core_group = KBASE_UBFX32(raw->mem_features, 0U, 1);
gpu_props->mmu.va_bits = KBASE_UBFX32(raw->mmu_features, 0U, 8);
gpu_props->mmu.pa_bits = KBASE_UBFX32(raw->mmu_features, 8U, 8);
gpu_props->num_cores = hweight64(raw->shader_present);
gpu_props->num_core_groups = hweight64(raw->l2_present);
gpu_props->num_address_spaces = hweight32(raw->as_present);
gpu_props->num_job_slots = hweight32(raw->js_present);
}
void kbase_gpuprops_set_features(struct kbase_device *kbdev)
{
base_gpu_props *gpu_props;
struct kbase_gpuprops_regdump regdump;
gpu_props = &kbdev->gpu_props.props;
/* Dump relevant registers */
kbase_backend_gpuprops_get_features(kbdev, &regdump);
/*
* Copy the raw value from the register, later this will get turned
* into the selected coherency mode.
*/
gpu_props->raw_props.coherency_mode = regdump.coherency_features;
}

View File

@ -0,0 +1,64 @@
/*
*
* (C) COPYRIGHT 2011-2015 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
* Foundation, and any use by you of this program is subject to the terms
* of such GNU licence.
*
* A copy of the licence is included with the program, and can also be obtained
* from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
* Boston, MA 02110-1301, USA.
*
*/
/**
* @file mali_kbase_gpuprops.h
* Base kernel property query APIs
*/
#ifndef _KBASE_GPUPROPS_H_
#define _KBASE_GPUPROPS_H_
#include "mali_kbase_gpuprops_types.h"
/* Forward definition - see mali_kbase.h */
struct kbase_device;
/**
* @brief Set up Kbase GPU properties.
*
* Set up Kbase GPU properties with information from the GPU registers
*
* @param kbdev The struct kbase_device structure for the device
*/
void kbase_gpuprops_set(struct kbase_device *kbdev);
/**
* kbase_gpuprops_set_features - Set up Kbase GPU properties
* @kbdev: Device pointer
*
* This function sets up GPU properties that are dependent on the hardware
* features bitmask. This function must be preceeded by a call to
* kbase_hw_set_features_mask().
*/
void kbase_gpuprops_set_features(struct kbase_device *kbdev);
/**
* @brief Provide GPU properties to userside through UKU call.
*
* Fill the struct kbase_uk_gpuprops with values from GPU configuration registers.
*
* @param kctx The struct kbase_context structure
* @param kbase_props A copy of the struct kbase_uk_gpuprops structure from userspace
*
* @return 0 on success. Any other value indicates failure.
*/
int kbase_gpuprops_uk_get_props(struct kbase_context *kctx, struct kbase_uk_gpuprops * const kbase_props);
#endif /* _KBASE_GPUPROPS_H_ */

Some files were not shown because too many files have changed in this diff Show More