MALI: rockchip: restore midgard_for_linux/, device driver under it is on DDK r14

The process to get current source code under midgard_for_linux/ : 1. On the status of commit 18166b65, revert commit "91842c9 MALI: rockchip: upgrade midgard DDK to r18p0-01rel0", which upgraded drivers/gpu/arm/midgard/ from DDK r14 to r18. 2. copy directory drivers/gpu/arm/midgard/ to drivers/gpu/arm/midgard_for_linux/. It's ensured that changes of commits in drivers/gpu/arm/midgard/ from RK power management group early than commit 18166b65 are correspondingly remained in current drivers/gpu/arm/midgard_for_linux/. Change-Id: I41463a8c160e5d25365d6872eef1049de4a317fb Signed-off-by: Zhen Chen <chenzhen@rock-chips.com>
2026-06-07 14:04:54 +02:00 · 2019-01-10 09:51:46 +08:00 · 2019-01-10 09:51:46 +08:00 · 0af9a0968c
commit 0af9a0968c
parent c3cbe0b00e
211 changed files with 68037 additions and 1 deletions
--- a/drivers/gpu/arm/Kbuild
+++ b/drivers/gpu/arm/Kbuild
@ -20,7 +20,9 @@
 #


-obj-$(CONFIG_MALI_MIDGARD) += midgard/
+obj-$(CONFIG_MALI_MIDGARD_FOR_LINUX) += midgard_for_linux/
+
+obj-$(CONFIG_MALI_MIDGARD_FOR_ANDROID) += midgard/

 obj-$(CONFIG_MALI400)      += mali400/

--- a/drivers/gpu/arm/Kconfig
+++ b/drivers/gpu/arm/Kconfig
@ -20,6 +20,18 @@
 #
 #
 source "drivers/gpu/arm/mali400/mali/Kconfig"
+
+choice
+	prompt "Mali Midgard driver"
+
+config MALI_MIDGARD_FOR_ANDROID
+	bool "Mali Midgard for Android"
+
+config MALI_MIDGARD_FOR_LINUX
+	bool "Mali Midgard for Linux only"
+
+endchoice
+
 source "drivers/gpu/arm/midgard/Kconfig"

 choice
--- a/drivers/gpu/arm/midgard_for_linux/Kbuild
+++ b/drivers/gpu/arm/midgard_for_linux/Kbuild
@ -0,0 +1,229 @@
+#
+# (C) COPYRIGHT 2012,2014 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+# Driver version string which is returned to userspace via an ioctl
+MALI_RELEASE_NAME ?= "r14p0-01rel0"
+
+# Paths required for build
+KBASE_PATH = $(src)
+KBASE_PLATFORM_PATH = $(KBASE_PATH)/platform_dummy
+UMP_PATH = $(src)/../../../base
+
+ifeq ($(CONFIG_MALI_ERROR_INJECTION),y)
+MALI_ERROR_INJECT_ON = 1
+endif
+
+# Set up defaults if not defined by build system
+MALI_CUSTOMER_RELEASE ?= 1
+MALI_UNIT_TEST ?= 0
+MALI_KERNEL_TEST_API ?= 0
+MALI_ERROR_INJECT_ON ?= 0
+MALI_MOCK_TEST ?= 0
+MALI_COVERAGE ?= 0
+MALI_INSTRUMENTATION_LEVEL ?= 0
+# This workaround is for what seems to be a compiler bug we observed in
+# GCC 4.7 on AOSP 4.3.  The bug caused an intermittent failure compiling
+# the "_Pragma" syntax, where an error message is returned:
+#
+# "internal compiler error: unspellable token PRAGMA"
+#
+# This regression has thus far only been seen on the GCC 4.7 compiler bundled
+# with AOSP 4.3.0.  So this makefile, intended for in-tree kernel builds
+# which are not known to be used with AOSP, is hardcoded to disable the
+# workaround, i.e. set the define to 0.
+MALI_GCC_WORKAROUND_MIDCOM_4598 ?= 0
+
+# Set up our defines, which will be passed to gcc
+DEFINES = \
+	-DMALI_CUSTOMER_RELEASE=$(MALI_CUSTOMER_RELEASE) \
+	-DMALI_KERNEL_TEST_API=$(MALI_KERNEL_TEST_API) \
+	-DMALI_UNIT_TEST=$(MALI_UNIT_TEST) \
+	-DMALI_ERROR_INJECT_ON=$(MALI_ERROR_INJECT_ON) \
+	-DMALI_MOCK_TEST=$(MALI_MOCK_TEST) \
+	-DMALI_COVERAGE=$(MALI_COVERAGE) \
+	-DMALI_INSTRUMENTATION_LEVEL=$(MALI_INSTRUMENTATION_LEVEL) \
+	-DMALI_RELEASE_NAME=\"$(MALI_RELEASE_NAME)\" \
+	-DMALI_GCC_WORKAROUND_MIDCOM_4598=$(MALI_GCC_WORKAROUND_MIDCOM_4598)
+
+ifeq ($(KBUILD_EXTMOD),)
+# in-tree
+DEFINES +=-DMALI_KBASE_THIRDPARTY_PATH=../../$(src)/platform/$(CONFIG_MALI_PLATFORM_THIRDPARTY_NAME)
+else
+# out-of-tree
+DEFINES +=-DMALI_KBASE_THIRDPARTY_PATH=$(src)/platform/$(CONFIG_MALI_PLATFORM_THIRDPARTY_NAME)
+endif
+
+DEFINES += -I$(srctree)/drivers/staging/android
+
+# Use our defines when compiling
+ccflags-y += $(DEFINES) -I$(KBASE_PATH)   -I$(KBASE_PLATFORM_PATH) -I$(UMP_PATH) -I$(srctree)/include/linux
+subdir-ccflags-y += $(DEFINES) -I$(KBASE_PATH)   -I$(KBASE_PLATFORM_PATH) -I$(OSK_PATH) -I$(UMP_PATH) -I$(srctree)/include/linux
+
+SRC := \
+	mali_kbase_device.c \
+	mali_kbase_cache_policy.c \
+	mali_kbase_mem.c \
+	mali_kbase_mmu.c \
+	mali_kbase_ipa.c \
+	mali_kbase_jd.c \
+	mali_kbase_jd_debugfs.c \
+	mali_kbase_jm.c \
+	mali_kbase_gpuprops.c \
+	mali_kbase_js.c \
+	mali_kbase_js_ctx_attr.c \
+	mali_kbase_event.c \
+	mali_kbase_context.c \
+	mali_kbase_pm.c \
+	mali_kbase_config.c \
+	mali_kbase_vinstr.c \
+	mali_kbase_softjobs.c \
+	mali_kbase_10969_workaround.c \
+	mali_kbase_hw.c \
+	mali_kbase_utility.c \
+	mali_kbase_debug.c \
+	mali_kbase_trace_timeline.c \
+	mali_kbase_gpu_memory_debugfs.c \
+	mali_kbase_mem_linux.c \
+	mali_kbase_core_linux.c \
+	mali_kbase_sync.c \
+	mali_kbase_sync_user.c \
+	mali_kbase_replay.c \
+	mali_kbase_mem_profile_debugfs.c \
+	mali_kbase_mmu_mode_lpae.c \
+	mali_kbase_mmu_mode_aarch64.c \
+	mali_kbase_disjoint_events.c \
+	mali_kbase_gator_api.c \
+	mali_kbase_debug_mem_view.c \
+	mali_kbase_debug_job_fault.c \
+	mali_kbase_smc.c \
+	mali_kbase_mem_pool.c \
+	mali_kbase_mem_pool_debugfs.c \
+	mali_kbase_tlstream.c \
+	mali_kbase_strings.c \
+	mali_kbase_as_fault_debugfs.c \
+	mali_kbase_regs_history_debugfs.c
+
+ifeq ($(MALI_UNIT_TEST),1)
+	SRC += mali_kbase_tlstream_test.c
+endif
+
+ifeq ($(MALI_CUSTOMER_RELEASE),0)
+	SRC += mali_kbase_regs_dump_debugfs.c
+endif
+
+
+# Job Scheduler Policy: Completely Fair Scheduler
+SRC += mali_kbase_js_policy_cfs.c
+
+ccflags-y += -I$(KBASE_PATH)
+
+ifeq ($(CONFIG_MALI_PLATFORM_FAKE),y)
+	SRC += mali_kbase_platform_fake.c
+
+	ifeq ($(CONFIG_MALI_PLATFORM_VEXPRESS),y)
+		SRC += platform/vexpress/mali_kbase_config_vexpress.c \
+		platform/vexpress/mali_kbase_cpu_vexpress.c
+		ccflags-y += -I$(src)/platform/vexpress
+	endif
+
+	ifeq ($(CONFIG_MALI_PLATFORM_RTSM_VE),y)
+		SRC += platform/rtsm_ve/mali_kbase_config_vexpress.c
+		ccflags-y += -I$(src)/platform/rtsm_ve
+	endif
+
+	ifeq ($(CONFIG_MALI_PLATFORM_JUNO),y)
+		SRC += platform/juno/mali_kbase_config_vexpress.c
+		ccflags-y += -I$(src)/platform/juno
+	endif
+
+	ifeq ($(CONFIG_MALI_PLATFORM_JUNO_SOC),y)
+		SRC += platform/juno_soc/mali_kbase_config_juno_soc.c
+		ccflags-y += -I$(src)/platform/juno_soc
+	endif
+
+	ifeq ($(CONFIG_MALI_PLATFORM_VEXPRESS_1XV7_A57),y)
+		SRC += platform/vexpress_1xv7_a57/mali_kbase_config_vexpress.c
+		ccflags-y += -I$(src)/platform/vexpress_1xv7_a57
+	endif
+
+	ifeq ($(CONFIG_MALI_PLATFORM_VEXPRESS_6XVIRTEX7_10MHZ),y)
+		SRC += platform/vexpress_6xvirtex7_10mhz/mali_kbase_config_vexpress.c \
+		platform/vexpress_6xvirtex7_10mhz/mali_kbase_cpu_vexpress.c
+		ccflags-y += -I$(src)/platform/vexpress_6xvirtex7_10mhz
+	endif
+
+	ifeq ($(CONFIG_MALI_PLATFORM_A7_KIPLING),y)
+		SRC += platform/a7_kipling/mali_kbase_config_a7_kipling.c \
+		platform/a7_kipling/mali_kbase_cpu_a7_kipling.c
+		ccflags-y += -I$(src)/platform/a7_kipling
+	endif
+
+	ifeq ($(CONFIG_MALI_PLATFORM_THIRDPARTY),y)
+	# remove begin and end quotes from the Kconfig string type
+	platform_name := $(shell echo $(CONFIG_MALI_PLATFORM_THIRDPARTY_NAME))
+	MALI_PLATFORM_THIRDPARTY_DIR := platform/$(platform_name)
+	ccflags-y += -I$(src)/$(MALI_PLATFORM_THIRDPARTY_DIR)
+	ifeq ($(CONFIG_MALI_MIDGARD),m)
+	include  $(src)/platform/$(platform_name)/Kbuild
+	else ifeq ($(CONFIG_MALI_MIDGARD),y)
+	obj-$(CONFIG_MALI_MIDGARD) += platform/
+	endif
+	endif
+endif # CONFIG_MALI_PLATFORM_FAKE=y
+
+ifeq ($(CONFIG_MALI_PLATFORM_THIRDPARTY),y)
+# remove begin and end quotes from the Kconfig string type
+platform_name := $(shell echo $(CONFIG_MALI_PLATFORM_THIRDPARTY_NAME))
+MALI_PLATFORM_THIRDPARTY_DIR := platform/$(platform_name)
+ccflags-y += -I$(src)/$(MALI_PLATFORM_THIRDPARTY_DIR)
+MALI_PLATFORM_DIR := platform/$(platform_name)
+include  $(src)/platform/$(platform_name)/Kbuild
+endif
+
+# Tell the Linux build system from which .o file to create the kernel module
+obj-$(CONFIG_MALI_MIDGARD) += midgard_kbase.o
+
+# Tell the Linux build system to enable building of our .c files
+midgard_kbase-y := $(SRC:.c=.o)
+
+midgard_kbase-$(CONFIG_MALI_DMA_FENCE) += mali_kbase_dma_fence.o
+
+MALI_BACKEND_PATH ?= backend
+CONFIG_MALI_BACKEND ?= gpu
+CONFIG_MALI_BACKEND_REAL ?= $(CONFIG_MALI_BACKEND)
+
+ifeq ($(MALI_MOCK_TEST),1)
+ifeq ($(CONFIG_MALI_BACKEND_REAL),gpu)
+# Test functionality
+midgard_kbase-y += tests/internal/src/mock/mali_kbase_pm_driver_mock.o
+endif
+endif
+
+include  $(src)/$(MALI_BACKEND_PATH)/$(CONFIG_MALI_BACKEND_REAL)/Kbuild
+midgard_kbase-y += $(BACKEND:.c=.o)
+
+ccflags-y += -I$(src)/$(MALI_BACKEND_PATH)/$(CONFIG_MALI_BACKEND_REAL)
+subdir-ccflags-y += -I$(src)/$(MALI_BACKEND_PATH)/$(CONFIG_MALI_BACKEND_REAL)
+
+# Default to devicetree platform if neither a fake platform or a thirdparty
+# platform is configured.
+ifeq ($(CONFIG_MALI_PLATFORM_THIRDPARTY)$(CONFIG_MALI_PLATFORM_FAKE),)
+CONFIG_MALI_PLATFORM_DEVICETREE := y
+endif
+
+midgard_kbase-$(CONFIG_MALI_PLATFORM_DEVICETREE) += \
+	platform/devicetree/mali_kbase_runtime_pm.o \
+	platform/devicetree/mali_kbase_config_devicetree.o
+ccflags-$(CONFIG_MALI_PLATFORM_DEVICETREE) += -I$(src)/platform/devicetree
--- a/drivers/gpu/arm/midgard_for_linux/Kconfig
+++ b/drivers/gpu/arm/midgard_for_linux/Kconfig
@ -0,0 +1,225 @@
+#
+# (C) COPYRIGHT 2012-2015 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+
+menuconfig MALI_MIDGARD
+	tristate "Mali Midgard series support"
+	select GPU_TRACEPOINTS if ANDROID
+	default n
+	help
+	  Enable this option to build support for a ARM Mali Midgard GPU.
+
+	  To compile this driver as a module, choose M here:
+	  this will generate a single module, called mali_kbase.
+
+config MALI_GATOR_SUPPORT
+	bool "Streamline support via Gator"
+	depends on MALI_MIDGARD
+	default n
+	help
+	  Adds diagnostic support for use with the ARM Streamline Performance Analyzer.
+	  You will need the Gator device driver already loaded before loading this driver when enabling
+	  Streamline debug support.
+	  This is a legacy interface required by older versions of Streamline.
+
+config MALI_MIDGARD_DVFS
+	bool "Enable legacy DVFS"
+	depends on MALI_MIDGARD && !MALI_DEVFREQ && !MALI_PLATFORM_DEVICETREE
+	default n
+	help
+	  Choose this option to enable legacy DVFS in the Mali Midgard DDK.
+
+config MALI_MIDGARD_ENABLE_TRACE
+	bool "Enable kbase tracing"
+	depends on MALI_MIDGARD
+	default n
+	help
+	  Enables tracing in kbase.  Trace log available through
+	  the "mali_trace" debugfs file, when the CONFIG_DEBUG_FS is enabled
+
+config MALI_DEVFREQ
+	bool "devfreq support for Mali"
+	depends on MALI_MIDGARD && PM_DEVFREQ
+	help
+	  Support devfreq for Mali.
+
+	  Using the devfreq framework and, by default, the simpleondemand
+	  governor, the frequency of Mali will be dynamically selected from the
+	  available OPPs.
+
+config MALI_DMA_FENCE
+	bool "DMA_BUF fence support for Mali"
+	depends on MALI_MIDGARD && !KDS
+	default n
+	help
+	  Support DMA_BUF fences for Mali.
+
+	  This option should only be enabled if KDS is not present and
+	  the Linux Kernel has built in support for DMA_BUF fences.
+
+# MALI_EXPERT configuration options
+
+menuconfig MALI_EXPERT
+	depends on MALI_MIDGARD
+	bool "Enable Expert Settings"
+	default n
+	help
+	  Enabling this option and modifying the default settings may produce a driver with performance or
+	  other limitations.
+
+config MALI_PRFCNT_SET_SECONDARY
+	bool "Use secondary set of performance counters"
+	depends on MALI_MIDGARD && MALI_EXPERT
+	default n
+	help
+	  Select this option to use secondary set of performance counters. Kernel
+	  features that depend on an access to the primary set of counters may
+	  become unavailable. Enabling this option will prevent power management
+	  from working optimally and may cause instrumentation tools to return
+	  bogus results.
+
+	  If unsure, say N.
+
+config MALI_PLATFORM_FAKE
+	bool "Enable fake platform device support"
+	depends on MALI_MIDGARD && MALI_EXPERT
+	default n
+	help
+	  When you start to work with the Mali Midgard series device driver the platform-specific code of
+	  the Linux kernel for your platform may not be complete. In this situation the kernel device driver
+	  supports creating the platform device outside of the Linux platform-specific code.
+	  Enable this option if would like to use a platform device configuration from within the device driver.
+
+choice
+	prompt "Platform configuration"
+	depends on MALI_MIDGARD && MALI_EXPERT
+	default MALI_PLATFORM_DEVICETREE
+	help
+	  Select the SOC platform that contains a Mali Midgard GPU
+
+config MALI_PLATFORM_DEVICETREE
+	bool "Device Tree platform"
+	depends on OF
+	help
+	  Select this option to use Device Tree with the Mali driver.
+
+	  When using this option the Mali driver will get the details of the
+	  GPU hardware from the Device Tree. This means that the same driver
+	  binary can run on multiple platforms as long as all the GPU hardware
+	  details are described in the device tree.
+
+	  Device Tree is the recommended method for the Mali driver platform
+	  integration.
+
+config MALI_PLATFORM_VEXPRESS
+	depends on ARCH_VEXPRESS && (ARCH_VEXPRESS_CA9X4 || ARCH_VEXPRESS_CA15X4)
+	bool "Versatile Express"
+config MALI_PLATFORM_VEXPRESS_VIRTEX7_40MHZ
+	depends on ARCH_VEXPRESS && (ARCH_VEXPRESS_CA9X4 || ARCH_VEXPRESS_CA15X4)
+	bool "Versatile Express w/Virtex7 @ 40Mhz"
+config MALI_PLATFORM_GOLDFISH
+	depends on ARCH_GOLDFISH
+	bool "Android Goldfish virtual CPU"
+config MALI_PLATFORM_PBX
+	depends on ARCH_REALVIEW && REALVIEW_EB_A9MP && MACH_REALVIEW_PBX
+	bool "Realview PBX-A9"
+config MALI_PLATFORM_THIRDPARTY
+	bool "Third Party Platform"
+endchoice
+
+config MALI_PLATFORM_THIRDPARTY_NAME
+	depends on MALI_MIDGARD && MALI_PLATFORM_THIRDPARTY && MALI_EXPERT
+	string "Third party platform name"
+	help
+	  Enter the name of a third party platform that is supported. The third part configuration
+	  file must be in midgard/config/tpip/mali_kbase_config_xxx.c where xxx is the name
+	  specified here.
+
+config MALI_DEBUG
+	bool "Debug build"
+	depends on MALI_MIDGARD && MALI_EXPERT
+	default n
+	help
+	  Select this option for increased checking and reporting of errors.
+
+config MALI_FENCE_DEBUG
+	bool "Debug sync fence usage"
+	depends on MALI_MIDGARD && MALI_EXPERT && SYNC
+	default y if MALI_DEBUG
+	help
+	  Select this option to enable additional checking and reporting on the
+	  use of sync fences in the Mali driver.
+
+	  This will add a 3s timeout to all sync fence waits in the Mali
+	  driver, so that when work for Mali has been waiting on a sync fence
+	  for a long time a debug message will be printed, detailing what fence
+	  is causing the block, and which dependent Mali atoms are blocked as a
+	  result of this.
+
+	  The timeout can be changed at runtime through the js_soft_timeout
+	  device attribute, where the timeout is specified in milliseconds.
+
+config MALI_NO_MALI
+	bool "No Mali"
+	depends on MALI_MIDGARD && MALI_EXPERT
+	default n
+	help
+	  This can be used to test the driver in a simulated environment
+	  whereby the hardware is not physically present. If the hardware is physically
+	  present it will not be used. This can be used to test the majority of the
+	  driver without needing actual hardware or for software benchmarking.
+	  All calls to the simulated hardware will complete immediately as if the hardware
+	  completed the task.
+
+config MALI_ERROR_INJECT
+	bool "Error injection"
+	depends on MALI_MIDGARD && MALI_EXPERT && MALI_NO_MALI
+	default n
+	help
+	  Enables insertion of errors to test module failure and recovery mechanisms.
+
+config MALI_TRACE_TIMELINE
+	bool "Timeline tracing"
+	depends on MALI_MIDGARD && MALI_EXPERT
+	default n
+	help
+	  Enables timeline tracing through the kernel tracepoint system.
+
+config MALI_SYSTEM_TRACE
+	bool "Enable system event tracing support"
+	depends on MALI_MIDGARD && MALI_EXPERT
+	default n
+	help
+	  Choose this option to enable system trace events for each
+	  kbase event.	This is typically used for debugging but has
+	  minimal overhead when not in use. Enable only if you know what
+	  you are doing.
+
+config MALI_GPU_MMU_AARCH64
+	bool "Use AArch64 page tables"
+	depends on MALI_MIDGARD && MALI_EXPERT
+	default n
+	help
+	  Use AArch64 format page tables for the GPU instead of LPAE-style.
+	  The two formats have the same functionality and performance but a
+	  future GPU may deprecate or remove the legacy LPAE-style format.
+
+	  The LPAE-style format is supported on all Midgard and current Bifrost
+	  GPUs. Enabling AArch64 format restricts the driver to only supporting
+	  Bifrost GPUs.
+
+	  If in doubt, say N.
+
+source "drivers/gpu/arm/midgard/platform/Kconfig"
--- a/drivers/gpu/arm/midgard_for_linux/Makefile
+++ b/drivers/gpu/arm/midgard_for_linux/Makefile
@ -0,0 +1,42 @@
+#
+# (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+
+KDIR ?= /lib/modules/$(shell uname -r)/build
+
+BUSLOG_PATH_RELATIVE = $(CURDIR)/../../../..
+UMP_PATH_RELATIVE = $(CURDIR)/../../../base/ump
+KBASE_PATH_RELATIVE = $(CURDIR)
+KDS_PATH_RELATIVE = $(CURDIR)/../../../..
+EXTRA_SYMBOLS = $(UMP_PATH_RELATIVE)/src/Module.symvers
+
+ifeq ($(MALI_UNIT_TEST), 1)
+	EXTRA_SYMBOLS += $(KBASE_PATH_RELATIVE)/tests/internal/src/kernel_assert_module/linux/Module.symvers
+endif
+
+ifeq ($(MALI_BUS_LOG), 1)
+#Add bus logger symbols
+EXTRA_SYMBOLS += $(BUSLOG_PATH_RELATIVE)/drivers/base/bus_logger/Module.symvers
+endif
+
+# GPL driver supports KDS
+EXTRA_SYMBOLS += $(KDS_PATH_RELATIVE)/drivers/base/kds/Module.symvers
+
+# we get the symbols from modules using KBUILD_EXTRA_SYMBOLS to prevent warnings about unknown functions
+all:
+	$(MAKE) -C $(KDIR) M=$(CURDIR) EXTRA_CFLAGS="-I$(CURDIR)/../../../../include -I$(CURDIR)/../../../../tests/include $(SCONS_CFLAGS)" $(SCONS_CONFIGS) KBUILD_EXTRA_SYMBOLS="$(EXTRA_SYMBOLS)" modules
+
+clean:
+	$(MAKE) -C $(KDIR) M=$(CURDIR) clean
--- a/drivers/gpu/arm/midgard_for_linux/Makefile.kbase
+++ b/drivers/gpu/arm/midgard_for_linux/Makefile.kbase
@ -0,0 +1,17 @@
+#
+# (C) COPYRIGHT 2010 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+EXTRA_CFLAGS += -I$(ROOT) -I$(KBASE_PATH) -I$(OSK_PATH)/src/linux/include -I$(KBASE_PATH)/platform_$(PLATFORM)
+
--- a/drivers/gpu/arm/midgard_for_linux/backend/gpu/Kbuild
+++ b/drivers/gpu/arm/midgard_for_linux/backend/gpu/Kbuild
@ -0,0 +1,62 @@
+#
+# (C) COPYRIGHT 2014 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+BACKEND += \
+	backend/gpu/mali_kbase_cache_policy_backend.c \
+	backend/gpu/mali_kbase_device_hw.c \
+	backend/gpu/mali_kbase_gpu.c \
+	backend/gpu/mali_kbase_gpuprops_backend.c \
+	backend/gpu/mali_kbase_debug_job_fault_backend.c \
+	backend/gpu/mali_kbase_irq_linux.c \
+	backend/gpu/mali_kbase_instr_backend.c \
+	backend/gpu/mali_kbase_jm_as.c \
+	backend/gpu/mali_kbase_jm_hw.c \
+	backend/gpu/mali_kbase_jm_rb.c \
+	backend/gpu/mali_kbase_js_affinity.c \
+	backend/gpu/mali_kbase_js_backend.c \
+	backend/gpu/mali_kbase_mmu_hw_direct.c \
+	backend/gpu/mali_kbase_pm_backend.c \
+	backend/gpu/mali_kbase_pm_driver.c \
+	backend/gpu/mali_kbase_pm_metrics.c \
+	backend/gpu/mali_kbase_pm_ca.c \
+	backend/gpu/mali_kbase_pm_ca_fixed.c \
+	backend/gpu/mali_kbase_pm_always_on.c \
+	backend/gpu/mali_kbase_pm_coarse_demand.c \
+	backend/gpu/mali_kbase_pm_demand.c \
+	backend/gpu/mali_kbase_pm_policy.c \
+	backend/gpu/mali_kbase_time.c
+
+ifeq ($(MALI_CUSTOMER_RELEASE),0)
+BACKEND += \
+	backend/gpu/mali_kbase_pm_ca_random.c \
+	backend/gpu/mali_kbase_pm_demand_always_powered.c \
+	backend/gpu/mali_kbase_pm_fast_start.c
+endif
+
+ifeq ($(CONFIG_MALI_DEVFREQ),y)
+BACKEND += backend/gpu/mali_kbase_devfreq.c
+endif
+
+ifeq ($(CONFIG_MALI_NO_MALI),y)
+	# Dummy model
+	BACKEND += backend/gpu/mali_kbase_model_dummy.c
+	BACKEND += backend/gpu/mali_kbase_model_linux.c
+	# HW error simulation
+	BACKEND += backend/gpu/mali_kbase_model_error_generator.c
+endif
+
+ifeq ($(CONFIG_DEVFREQ_THERMAL),y)
+	BACKEND += backend/gpu/mali_kbase_power_model_simple.c
+endif
--- a/drivers/gpu/arm/midgard_for_linux/backend/gpu/mali_kbase_backend_config.h
+++ b/drivers/gpu/arm/midgard_for_linux/backend/gpu/mali_kbase_backend_config.h
@ -0,0 +1,29 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/*
+ * Backend specific configuration
+ */
+
+#ifndef _KBASE_BACKEND_CONFIG_H_
+#define _KBASE_BACKEND_CONFIG_H_
+
+/* Enable GPU reset API */
+#define KBASE_GPU_RESET_EN 1
+
+#endif /* _KBASE_BACKEND_CONFIG_H_ */
+
--- a/drivers/gpu/arm/midgard_for_linux/backend/gpu/mali_kbase_cache_policy_backend.c
+++ b/drivers/gpu/arm/midgard_for_linux/backend/gpu/mali_kbase_cache_policy_backend.c
@ -0,0 +1,29 @@
+/*
+ *
+ * (C) COPYRIGHT 2015-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#include "backend/gpu/mali_kbase_cache_policy_backend.h"
+#include <backend/gpu/mali_kbase_device_internal.h>
+
+void kbase_cache_set_coherency_mode(struct kbase_device *kbdev,
+		u32 mode)
+{
+	kbdev->current_gpu_coherency_mode = mode;
+
+	if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_COHERENCY_REG))
+		kbase_reg_write(kbdev, COHERENCY_ENABLE, mode, NULL);
+}
+
--- a/drivers/gpu/arm/midgard_for_linux/backend/gpu/mali_kbase_cache_policy_backend.h
+++ b/drivers/gpu/arm/midgard_for_linux/backend/gpu/mali_kbase_cache_policy_backend.h
@ -0,0 +1,34 @@
+/*
+ *
+ * (C) COPYRIGHT 2015-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+#ifndef _KBASE_CACHE_POLICY_BACKEND_H_
+#define _KBASE_CACHE_POLICY_BACKEND_H_
+
+#include "mali_kbase.h"
+#include "mali_base_kernel.h"
+
+/**
+  * kbase_cache_set_coherency_mode() - Sets the system coherency mode
+  *			in the GPU.
+  * @kbdev:	Device pointer
+  * @mode:	Coherency mode. COHERENCY_ACE/ACE_LITE
+  */
+void kbase_cache_set_coherency_mode(struct kbase_device *kbdev,
+		u32 mode);
+
+#endif				/* _KBASE_CACHE_POLICY_H_ */
--- a/drivers/gpu/arm/midgard_for_linux/backend/gpu/mali_kbase_debug_job_fault_backend.c
+++ b/drivers/gpu/arm/midgard_for_linux/backend/gpu/mali_kbase_debug_job_fault_backend.c
@ -0,0 +1,157 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#include <mali_kbase.h>
+#include <backend/gpu/mali_kbase_device_internal.h>
+#include "mali_kbase_debug_job_fault.h"
+
+#ifdef CONFIG_DEBUG_FS
+
+/*GPU_CONTROL_REG(r)*/
+static int gpu_control_reg_snapshot[] = {
+	GPU_ID,
+	SHADER_READY_LO,
+	SHADER_READY_HI,
+	TILER_READY_LO,
+	TILER_READY_HI,
+	L2_READY_LO,
+	L2_READY_HI
+};
+
+/* JOB_CONTROL_REG(r) */
+static int job_control_reg_snapshot[] = {
+	JOB_IRQ_MASK,
+	JOB_IRQ_STATUS
+};
+
+/* JOB_SLOT_REG(n,r) */
+static int job_slot_reg_snapshot[] = {
+	JS_HEAD_LO,
+	JS_HEAD_HI,
+	JS_TAIL_LO,
+	JS_TAIL_HI,
+	JS_AFFINITY_LO,
+	JS_AFFINITY_HI,
+	JS_CONFIG,
+	JS_STATUS,
+	JS_HEAD_NEXT_LO,
+	JS_HEAD_NEXT_HI,
+	JS_AFFINITY_NEXT_LO,
+	JS_AFFINITY_NEXT_HI,
+	JS_CONFIG_NEXT
+};
+
+/*MMU_REG(r)*/
+static int mmu_reg_snapshot[] = {
+	MMU_IRQ_MASK,
+	MMU_IRQ_STATUS
+};
+
+/* MMU_AS_REG(n,r) */
+static int as_reg_snapshot[] = {
+	AS_TRANSTAB_LO,
+	AS_TRANSTAB_HI,
+	AS_MEMATTR_LO,
+	AS_MEMATTR_HI,
+	AS_FAULTSTATUS,
+	AS_FAULTADDRESS_LO,
+	AS_FAULTADDRESS_HI,
+	AS_STATUS
+};
+
+bool kbase_debug_job_fault_reg_snapshot_init(struct kbase_context *kctx,
+		int reg_range)
+{
+	int i, j;
+	int offset = 0;
+	int slot_number;
+	int as_number;
+
+	if (kctx->reg_dump == NULL)
+		return false;
+
+	slot_number = kctx->kbdev->gpu_props.num_job_slots;
+	as_number = kctx->kbdev->gpu_props.num_address_spaces;
+
+	/* get the GPU control registers*/
+	for (i = 0; i < sizeof(gpu_control_reg_snapshot)/4; i++) {
+		kctx->reg_dump[offset] =
+				GPU_CONTROL_REG(gpu_control_reg_snapshot[i]);
+		offset += 2;
+	}
+
+	/* get the Job control registers*/
+	for (i = 0; i < sizeof(job_control_reg_snapshot)/4; i++) {
+		kctx->reg_dump[offset] =
+				JOB_CONTROL_REG(job_control_reg_snapshot[i]);
+		offset += 2;
+	}
+
+	/* get the Job Slot registers*/
+	for (j = 0; j < slot_number; j++)	{
+		for (i = 0; i < sizeof(job_slot_reg_snapshot)/4; i++) {
+			kctx->reg_dump[offset] =
+			JOB_SLOT_REG(j, job_slot_reg_snapshot[i]);
+			offset += 2;
+		}
+	}
+
+	/* get the MMU registers*/
+	for (i = 0; i < sizeof(mmu_reg_snapshot)/4; i++) {
+		kctx->reg_dump[offset] = MMU_REG(mmu_reg_snapshot[i]);
+		offset += 2;
+	}
+
+	/* get the Address space registers*/
+	for (j = 0; j < as_number; j++) {
+		for (i = 0; i < sizeof(as_reg_snapshot)/4; i++) {
+			kctx->reg_dump[offset] =
+					MMU_AS_REG(j, as_reg_snapshot[i]);
+			offset += 2;
+		}
+	}
+
+	WARN_ON(offset >= (reg_range*2/4));
+
+	/* set the termination flag*/
+	kctx->reg_dump[offset] = REGISTER_DUMP_TERMINATION_FLAG;
+	kctx->reg_dump[offset + 1] = REGISTER_DUMP_TERMINATION_FLAG;
+
+	dev_dbg(kctx->kbdev->dev, "kbase_job_fault_reg_snapshot_init:%d\n",
+			offset);
+
+	return true;
+}
+
+bool kbase_job_fault_get_reg_snapshot(struct kbase_context *kctx)
+{
+	int offset = 0;
+
+	if (kctx->reg_dump == NULL)
+		return false;
+
+	while (kctx->reg_dump[offset] != REGISTER_DUMP_TERMINATION_FLAG) {
+		kctx->reg_dump[offset+1] =
+				kbase_reg_read(kctx->kbdev,
+						kctx->reg_dump[offset], NULL);
+		offset += 2;
+	}
+	return true;
+}
+
+
+#endif
--- a/drivers/gpu/arm/midgard_for_linux/backend/gpu/mali_kbase_devfreq.c
+++ b/drivers/gpu/arm/midgard_for_linux/backend/gpu/mali_kbase_devfreq.c
@ -0,0 +1,344 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+#define ENABLE_DEBUG_LOG
+#include "../../platform/rk/custom_log.h"
+
+
+#include <mali_kbase.h>
+#include <mali_kbase_tlstream.h>
+#include <mali_kbase_config_defaults.h>
+#include <backend/gpu/mali_kbase_pm_internal.h>
+#ifdef CONFIG_DEVFREQ_THERMAL
+#include <backend/gpu/mali_kbase_power_model_simple.h>
+#endif
+
+#include <linux/clk.h>
+#include <linux/devfreq.h>
+#ifdef CONFIG_DEVFREQ_THERMAL
+#include <linux/devfreq_cooling.h>
+#endif
+
+#include <linux/version.h>
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 13, 0)
+#include <linux/pm_opp.h>
+#else /* Linux >= 3.13 */
+/* In 3.13 the OPP include header file, types, and functions were all
+ * renamed. Use the old filename for the include, and define the new names to
+ * the old, when an old kernel is detected.
+ */
+#include <linux/opp.h>
+#define dev_pm_opp opp
+#define dev_pm_opp_get_voltage opp_get_voltage
+#define dev_pm_opp_get_opp_count opp_get_opp_count
+#define dev_pm_opp_find_freq_ceil opp_find_freq_ceil
+#endif /* Linux >= 3.13 */
+#include <soc/rockchip/rockchip_opp_select.h>
+
+static struct thermal_opp_device_data gpu_devdata = {
+	.type = THERMAL_OPP_TPYE_DEV,
+	.low_temp_adjust = rockchip_dev_low_temp_adjust,
+	.high_temp_adjust = rockchip_dev_high_temp_adjust,
+};
+
+
+static int
+kbase_devfreq_target(struct device *dev, unsigned long *target_freq, u32 flags)
+{
+	struct kbase_device *kbdev = dev_get_drvdata(dev);
+	struct dev_pm_opp *opp;
+	unsigned long freq = 0;
+	unsigned long old_freq = kbdev->current_freq;
+	unsigned long voltage;
+	int err;
+
+	freq = *target_freq;
+
+	rcu_read_lock();
+	opp = devfreq_recommended_opp(dev, &freq, flags);
+	if (IS_ERR(opp)) {
+		rcu_read_unlock();
+		dev_err(dev, "Failed to get opp (%ld)\n", PTR_ERR(opp));
+		return PTR_ERR(opp);
+	}
+	voltage = dev_pm_opp_get_voltage(opp);
+	rcu_read_unlock();
+
+	/*
+	 * Only update if there is a change of frequency
+	 */
+	if (old_freq == freq) {
+		*target_freq = freq;
+#ifdef CONFIG_REGULATOR
+		if (kbdev->current_voltage == voltage)
+			return 0;
+		err = regulator_set_voltage(kbdev->regulator, voltage, INT_MAX);
+		if (err) {
+			dev_err(dev, "Failed to set voltage (%d)\n", err);
+			return err;
+		}
+#else
+		return 0;
+#endif
+	}
+
+#ifdef CONFIG_REGULATOR
+	if (kbdev->regulator && kbdev->current_voltage != voltage &&
+	    old_freq < freq) {
+		err = regulator_set_voltage(kbdev->regulator, voltage, INT_MAX);
+		if (err) {
+			dev_err(dev, "Failed to increase voltage (%d)\n", err);
+			return err;
+		}
+	}
+#endif
+
+	err = clk_set_rate(kbdev->clock, freq);
+	if (err) {
+		dev_err(dev, "Failed to set clock %lu (target %lu)\n",
+				freq, *target_freq);
+		return err;
+	}
+	*target_freq = freq;
+	kbdev->current_freq = freq;
+	if (kbdev->devfreq)
+		kbdev->devfreq->last_status.current_frequency = freq;
+#ifdef CONFIG_REGULATOR
+	if (kbdev->regulator && kbdev->current_voltage != voltage &&
+	    old_freq > freq) {
+		err = regulator_set_voltage(kbdev->regulator, voltage, INT_MAX);
+		if (err) {
+			dev_err(dev, "Failed to decrease voltage (%d)\n", err);
+			return err;
+		}
+	}
+#endif
+
+	kbdev->current_voltage = voltage;
+
+	kbase_tlstream_aux_devfreq_target((u64)freq);
+
+	kbase_pm_reset_dvfs_utilisation(kbdev);
+
+	return err;
+}
+
+static int
+kbase_devfreq_cur_freq(struct device *dev, unsigned long *freq)
+{
+	struct kbase_device *kbdev = dev_get_drvdata(dev);
+
+	*freq = kbdev->current_freq;
+
+	return 0;
+}
+
+static int
+kbase_devfreq_status(struct device *dev, struct devfreq_dev_status *stat)
+{
+	struct kbase_device *kbdev = dev_get_drvdata(dev);
+
+	stat->current_frequency = kbdev->current_freq;
+
+	kbase_pm_get_dvfs_utilisation(kbdev,
+			&stat->total_time, &stat->busy_time);
+
+	stat->private_data = NULL;
+
+#ifdef CONFIG_DEVFREQ_THERMAL
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 3, 0)
+	if (kbdev->devfreq_cooling)
+		memcpy(&kbdev->devfreq_cooling->last_status, stat,
+				sizeof(*stat));
+#endif
+#endif
+
+	return 0;
+}
+
+static int kbase_devfreq_init_freq_table(struct kbase_device *kbdev,
+		struct devfreq_dev_profile *dp)
+{
+	int count;
+	int i = 0;
+	unsigned long freq = 0;
+	struct dev_pm_opp *opp;
+
+	rcu_read_lock();
+	count = dev_pm_opp_get_opp_count(kbdev->dev);
+	if (count < 0) {
+		rcu_read_unlock();
+		return count;
+	}
+	rcu_read_unlock();
+
+	dp->freq_table = kmalloc_array(count, sizeof(dp->freq_table[0]),
+				GFP_KERNEL);
+	if (!dp->freq_table)
+		return -ENOMEM;
+
+	rcu_read_lock();
+	for (i = 0; i < count; i++, freq++) {
+		opp = dev_pm_opp_find_freq_ceil(kbdev->dev, &freq);
+		if (IS_ERR(opp))
+			break;
+
+		dp->freq_table[i] = freq;
+	}
+	rcu_read_unlock();
+
+	if (count != i)
+		dev_warn(kbdev->dev, "Unable to enumerate all OPPs (%d!=%d\n",
+				count, i);
+
+	dp->max_state = i;
+
+	return 0;
+}
+
+static void kbase_devfreq_term_freq_table(struct kbase_device *kbdev)
+{
+	struct devfreq_dev_profile *dp = kbdev->devfreq->profile;
+
+	kfree(dp->freq_table);
+}
+
+static void kbase_devfreq_exit(struct device *dev)
+{
+	struct kbase_device *kbdev = dev_get_drvdata(dev);
+
+	kbase_devfreq_term_freq_table(kbdev);
+}
+
+int kbase_devfreq_init(struct kbase_device *kbdev)
+{
+	struct devfreq_dev_profile *dp;
+	unsigned long opp_rate;
+	int err;
+
+	if (!kbdev->clock)
+		return -ENODEV;
+
+	kbdev->current_freq = clk_get_rate(kbdev->clock);
+#ifdef CONFIG_REGULATOR
+	if (kbdev->regulator)
+		kbdev->current_voltage =
+			regulator_get_voltage(kbdev->regulator);
+#endif
+
+	dp = &kbdev->devfreq_profile;
+
+	dp->initial_freq = kbdev->current_freq;
+	/* .KP : set devfreq_dvfs_interval_in_ms */
+	dp->polling_ms = 20;
+	dp->target = kbase_devfreq_target;
+	dp->get_dev_status = kbase_devfreq_status;
+	dp->get_cur_freq = kbase_devfreq_cur_freq;
+	dp->exit = kbase_devfreq_exit;
+
+	if (kbase_devfreq_init_freq_table(kbdev, dp))
+		return -EFAULT;
+
+	kbdev->devfreq = devfreq_add_device(kbdev->dev, dp,
+				"simple_ondemand", NULL);
+	if (IS_ERR(kbdev->devfreq)) {
+		kbase_devfreq_term_freq_table(kbdev);
+		return PTR_ERR(kbdev->devfreq);
+	}
+
+	err = devfreq_register_opp_notifier(kbdev->dev, kbdev->devfreq);
+	if (err) {
+		dev_err(kbdev->dev,
+			"Failed to register OPP notifier (%d)\n", err);
+		goto opp_notifier_failed;
+	}
+
+	opp_rate = kbdev->current_freq;
+	rcu_read_lock();
+	devfreq_recommended_opp(kbdev->dev, &opp_rate, 0);
+	rcu_read_unlock();
+	kbdev->devfreq->last_status.current_frequency = opp_rate;
+
+	gpu_devdata.data = kbdev->devfreq;
+	kbdev->opp_info = rockchip_register_thermal_notifier(kbdev->dev,
+							     &gpu_devdata);
+	if (IS_ERR(kbdev->opp_info)) {
+		dev_dbg(kbdev->dev, "without thermal notifier\n");
+		kbdev->opp_info = NULL;
+	}
+#ifdef CONFIG_DEVFREQ_THERMAL
+	err = kbase_power_model_simple_init(kbdev);
+	if (err && err != -ENODEV && err != -EPROBE_DEFER) {
+		dev_err(kbdev->dev,
+			"Failed to initialize simple power model (%d)\n",
+			err);
+		goto cooling_failed;
+	}
+	if (err == -EPROBE_DEFER)
+		goto cooling_failed;
+	if (err != -ENODEV) {
+		kbdev->devfreq_cooling = of_devfreq_cooling_register_power(
+				kbdev->dev->of_node,
+				kbdev->devfreq,
+				&power_model_simple_ops);
+		if (IS_ERR_OR_NULL(kbdev->devfreq_cooling)) {
+			err = PTR_ERR(kbdev->devfreq_cooling);
+			dev_err(kbdev->dev,
+				"Failed to register cooling device (%d)\n",
+				err);
+			goto cooling_failed;
+		}
+	} else {
+		err = 0;
+	}
+	I("success initing power_model_simple.");
+#endif
+
+	return 0;
+
+#ifdef CONFIG_DEVFREQ_THERMAL
+cooling_failed:
+	devfreq_unregister_opp_notifier(kbdev->dev, kbdev->devfreq);
+#endif /* CONFIG_DEVFREQ_THERMAL */
+opp_notifier_failed:
+	if (devfreq_remove_device(kbdev->devfreq))
+		dev_err(kbdev->dev, "Failed to terminate devfreq (%d)\n", err);
+	else
+		kbdev->devfreq = NULL;
+
+	return err;
+}
+
+void kbase_devfreq_term(struct kbase_device *kbdev)
+{
+	int err;
+
+	dev_dbg(kbdev->dev, "Term Mali devfreq\n");
+
+	rockchip_unregister_thermal_notifier(kbdev->opp_info);
+#ifdef CONFIG_DEVFREQ_THERMAL
+	if (kbdev->devfreq_cooling)
+		devfreq_cooling_unregister(kbdev->devfreq_cooling);
+#endif
+
+	devfreq_unregister_opp_notifier(kbdev->dev, kbdev->devfreq);
+
+	err = devfreq_remove_device(kbdev->devfreq);
+	if (err)
+		dev_err(kbdev->dev, "Failed to terminate devfreq (%d)\n", err);
+	else
+		kbdev->devfreq = NULL;
+}
--- a/drivers/gpu/arm/midgard_for_linux/backend/gpu/mali_kbase_devfreq.h
+++ b/drivers/gpu/arm/midgard_for_linux/backend/gpu/mali_kbase_devfreq.h
@ -0,0 +1,24 @@
+/*
+ *
+ * (C) COPYRIGHT 2014 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#ifndef _BASE_DEVFREQ_H_
+#define _BASE_DEVFREQ_H_
+
+int kbase_devfreq_init(struct kbase_device *kbdev);
+void kbase_devfreq_term(struct kbase_device *kbdev);
+
+#endif /* _BASE_DEVFREQ_H_ */
--- a/drivers/gpu/arm/midgard_for_linux/backend/gpu/mali_kbase_device_hw.c
+++ b/drivers/gpu/arm/midgard_for_linux/backend/gpu/mali_kbase_device_hw.c
@ -0,0 +1,255 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+/*
+ *
+ */
+#include <mali_kbase.h>
+#include <backend/gpu/mali_kbase_instr_internal.h>
+#include <backend/gpu/mali_kbase_pm_internal.h>
+
+#include <backend/gpu/mali_kbase_device_internal.h>
+
+#if !defined(CONFIG_MALI_NO_MALI)
+
+
+#ifdef CONFIG_DEBUG_FS
+
+
+int kbase_io_history_resize(struct kbase_io_history *h, u16 new_size)
+{
+	struct kbase_io_access *old_buf;
+	struct kbase_io_access *new_buf;
+	unsigned long flags;
+
+	if (!new_size)
+		goto out_err; /* The new size must not be 0 */
+
+	new_buf = vmalloc(new_size * sizeof(*h->buf));
+	if (!new_buf)
+		goto out_err;
+
+	spin_lock_irqsave(&h->lock, flags);
+
+	old_buf = h->buf;
+
+	/* Note: we won't bother with copying the old data over. The dumping
+	 * logic wouldn't work properly as it relies on 'count' both as a
+	 * counter and as an index to the buffer which would have changed with
+	 * the new array. This is a corner case that we don't need to support.
+	 */
+	h->count = 0;
+	h->size = new_size;
+	h->buf = new_buf;
+
+	spin_unlock_irqrestore(&h->lock, flags);
+
+	vfree(old_buf);
+
+	return 0;
+
+out_err:
+	return -1;
+}
+
+
+int kbase_io_history_init(struct kbase_io_history *h, u16 n)
+{
+	h->enabled = false;
+	spin_lock_init(&h->lock);
+	h->count = 0;
+	h->size = 0;
+	h->buf = NULL;
+	if (kbase_io_history_resize(h, n))
+		return -1;
+
+	return 0;
+}
+
+
+void kbase_io_history_term(struct kbase_io_history *h)
+{
+	vfree(h->buf);
+	h->buf = NULL;
+}
+
+
+/* kbase_io_history_add - add new entry to the register access history
+ *
+ * @h: Pointer to the history data structure
+ * @addr: Register address
+ * @value: The value that is either read from or written to the register
+ * @write: 1 if it's a register write, 0 if it's a read
+ */
+static void kbase_io_history_add(struct kbase_io_history *h,
+		void __iomem const *addr, u32 value, u8 write)
+{
+	struct kbase_io_access *io;
+	unsigned long flags;
+
+	spin_lock_irqsave(&h->lock, flags);
+
+	io = &h->buf[h->count % h->size];
+	io->addr = (uintptr_t)addr | write;
+	io->value = value;
+	++h->count;
+	/* If count overflows, move the index by the buffer size so the entire
+	 * buffer will still be dumped later */
+	if (unlikely(!h->count))
+		h->count = h->size;
+
+	spin_unlock_irqrestore(&h->lock, flags);
+}
+
+
+void kbase_io_history_dump(struct kbase_device *kbdev)
+{
+	struct kbase_io_history *const h = &kbdev->io_history;
+	u16 i;
+	size_t iters;
+	unsigned long flags;
+
+	if (!unlikely(h->enabled))
+		return;
+
+	spin_lock_irqsave(&h->lock, flags);
+
+	dev_err(kbdev->dev, "Register IO History:");
+	iters = (h->size > h->count) ? h->count : h->size;
+	dev_err(kbdev->dev, "Last %zu register accesses of %zu total:\n", iters,
+			h->count);
+	for (i = 0; i < iters; ++i) {
+		struct kbase_io_access *io =
+			&h->buf[(h->count - iters + i) % h->size];
+		char const access = (io->addr & 1) ? 'w' : 'r';
+
+		dev_err(kbdev->dev, "%6i: %c: reg 0x%p val %08x\n", i, access,
+				(void *)(io->addr & ~0x1), io->value);
+	}
+
+	spin_unlock_irqrestore(&h->lock, flags);
+}
+
+
+#endif /* CONFIG_DEBUG_FS */
+
+
+void kbase_reg_write(struct kbase_device *kbdev, u16 offset, u32 value,
+						struct kbase_context *kctx)
+{
+	KBASE_DEBUG_ASSERT(kbdev->pm.backend.gpu_powered);
+	KBASE_DEBUG_ASSERT(kctx == NULL || kctx->as_nr != KBASEP_AS_NR_INVALID);
+	KBASE_DEBUG_ASSERT(kbdev->dev != NULL);
+
+	writel(value, kbdev->reg + offset);
+
+#ifdef CONFIG_DEBUG_FS
+	if (unlikely(kbdev->io_history.enabled))
+		kbase_io_history_add(&kbdev->io_history, kbdev->reg + offset,
+				value, 1);
+#endif /* CONFIG_DEBUG_FS */
+	dev_dbg(kbdev->dev, "w: reg %04x val %08x", offset, value);
+
+	if (kctx && kctx->jctx.tb)
+		kbase_device_trace_register_access(kctx, REG_WRITE, offset,
+									value);
+}
+
+KBASE_EXPORT_TEST_API(kbase_reg_write);
+
+u32 kbase_reg_read(struct kbase_device *kbdev, u16 offset,
+						struct kbase_context *kctx)
+{
+	u32 val;
+	KBASE_DEBUG_ASSERT(kbdev->pm.backend.gpu_powered);
+	KBASE_DEBUG_ASSERT(kctx == NULL || kctx->as_nr != KBASEP_AS_NR_INVALID);
+	KBASE_DEBUG_ASSERT(kbdev->dev != NULL);
+
+	val = readl(kbdev->reg + offset);
+
+#ifdef CONFIG_DEBUG_FS
+	if (unlikely(kbdev->io_history.enabled))
+		kbase_io_history_add(&kbdev->io_history, kbdev->reg + offset,
+				val, 0);
+#endif /* CONFIG_DEBUG_FS */
+	dev_dbg(kbdev->dev, "r: reg %04x val %08x", offset, val);
+
+	if (kctx && kctx->jctx.tb)
+		kbase_device_trace_register_access(kctx, REG_READ, offset, val);
+	return val;
+}
+
+KBASE_EXPORT_TEST_API(kbase_reg_read);
+#endif /* !defined(CONFIG_MALI_NO_MALI) */
+
+/**
+ * kbase_report_gpu_fault - Report a GPU fault.
+ * @kbdev:    Kbase device pointer
+ * @multiple: Zero if only GPU_FAULT was raised, non-zero if MULTIPLE_GPU_FAULTS
+ *            was also set
+ *
+ * This function is called from the interrupt handler when a GPU fault occurs.
+ * It reports the details of the fault using dev_warn().
+ */
+static void kbase_report_gpu_fault(struct kbase_device *kbdev, int multiple)
+{
+	u32 status;
+	u64 address;
+
+	status = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_FAULTSTATUS), NULL);
+	address = (u64) kbase_reg_read(kbdev,
+			GPU_CONTROL_REG(GPU_FAULTADDRESS_HI), NULL) << 32;
+	address |= kbase_reg_read(kbdev,
+			GPU_CONTROL_REG(GPU_FAULTADDRESS_LO), NULL);
+
+	dev_warn(kbdev->dev, "GPU Fault 0x%08x (%s) at 0x%016llx",
+			status & 0xFF,
+			kbase_exception_name(kbdev, status),
+			address);
+	if (multiple)
+		dev_warn(kbdev->dev, "There were multiple GPU faults - some have not been reported\n");
+}
+
+void kbase_gpu_interrupt(struct kbase_device *kbdev, u32 val)
+{
+	KBASE_TRACE_ADD(kbdev, CORE_GPU_IRQ, NULL, NULL, 0u, val);
+	if (val & GPU_FAULT)
+		kbase_report_gpu_fault(kbdev, val & MULTIPLE_GPU_FAULTS);
+
+	if (val & RESET_COMPLETED)
+		kbase_pm_reset_done(kbdev);
+
+	if (val & PRFCNT_SAMPLE_COMPLETED)
+		kbase_instr_hwcnt_sample_done(kbdev);
+
+	if (val & CLEAN_CACHES_COMPLETED)
+		kbase_clean_caches_done(kbdev);
+
+	KBASE_TRACE_ADD(kbdev, CORE_GPU_IRQ_CLEAR, NULL, NULL, 0u, val);
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_CLEAR), val, NULL);
+
+	/* kbase_pm_check_transitions must be called after the IRQ has been
+	 * cleared. This is because it might trigger further power transitions
+	 * and we don't want to miss the interrupt raised to notify us that
+	 * these further transitions have finished.
+	 */
+	if (val & POWER_CHANGED_ALL)
+		kbase_pm_power_changed(kbdev);
+
+	KBASE_TRACE_ADD(kbdev, CORE_GPU_IRQ_DONE, NULL, NULL, 0u, val);
+}
--- a/drivers/gpu/arm/midgard_for_linux/backend/gpu/mali_kbase_device_internal.h
+++ b/drivers/gpu/arm/midgard_for_linux/backend/gpu/mali_kbase_device_internal.h
@ -0,0 +1,67 @@
+/*
+ *
+ * (C) COPYRIGHT 2014 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/*
+ * Backend-specific HW access device APIs
+ */
+
+#ifndef _KBASE_DEVICE_INTERNAL_H_
+#define _KBASE_DEVICE_INTERNAL_H_
+
+/**
+ * kbase_reg_write - write to GPU register
+ * @kbdev:  Kbase device pointer
+ * @offset: Offset of register
+ * @value:  Value to write
+ * @kctx:   Kbase context pointer. May be NULL
+ *
+ * Caller must ensure the GPU is powered (@kbdev->pm.gpu_powered != false). If
+ * @kctx is not NULL then the caller must ensure it is scheduled (@kctx->as_nr
+ * != KBASEP_AS_NR_INVALID).
+ */
+void kbase_reg_write(struct kbase_device *kbdev, u16 offset, u32 value,
+						struct kbase_context *kctx);
+
+/**
+ * kbase_reg_read - read from GPU register
+ * @kbdev:  Kbase device pointer
+ * @offset: Offset of register
+ * @kctx:   Kbase context pointer. May be NULL
+ *
+ * Caller must ensure the GPU is powered (@kbdev->pm.gpu_powered != false). If
+ * @kctx is not NULL then the caller must ensure it is scheduled (@kctx->as_nr
+ * != KBASEP_AS_NR_INVALID).
+ *
+ * Return: Value in desired register
+ */
+u32 kbase_reg_read(struct kbase_device *kbdev, u16 offset,
+						struct kbase_context *kctx);
+
+
+/**
+ * kbase_gpu_interrupt - GPU interrupt handler
+ * @kbdev: Kbase device pointer
+ * @val:   The value of the GPU IRQ status register which triggered the call
+ *
+ * This function is called from the interrupt handler when a GPU irq is to be
+ * handled.
+ */
+void kbase_gpu_interrupt(struct kbase_device *kbdev, u32 val);
+
+#endif /* _KBASE_DEVICE_INTERNAL_H_ */
--- a/drivers/gpu/arm/midgard_for_linux/backend/gpu/mali_kbase_gpu.c
+++ b/drivers/gpu/arm/midgard_for_linux/backend/gpu/mali_kbase_gpu.c
@ -0,0 +1,123 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+/*
+ * Register-based HW access backend APIs
+ */
+#include <mali_kbase.h>
+#include <mali_kbase_hwaccess_backend.h>
+#include <backend/gpu/mali_kbase_irq_internal.h>
+#include <backend/gpu/mali_kbase_jm_internal.h>
+#include <backend/gpu/mali_kbase_js_internal.h>
+#include <backend/gpu/mali_kbase_pm_internal.h>
+
+int kbase_backend_early_init(struct kbase_device *kbdev)
+{
+	int err;
+
+	err = kbasep_platform_device_init(kbdev);
+	if (err)
+		return err;
+
+	/* Ensure we can access the GPU registers */
+	kbase_pm_register_access_enable(kbdev);
+
+	/* Find out GPU properties based on the GPU feature registers */
+	kbase_gpuprops_set(kbdev);
+
+	/* We're done accessing the GPU registers for now. */
+	kbase_pm_register_access_disable(kbdev);
+
+	err = kbase_hwaccess_pm_init(kbdev);
+	if (err)
+		goto fail_pm;
+
+	err = kbase_install_interrupts(kbdev);
+	if (err)
+		goto fail_interrupts;
+
+	return 0;
+
+fail_interrupts:
+	kbase_hwaccess_pm_term(kbdev);
+fail_pm:
+	kbasep_platform_device_term(kbdev);
+
+	return err;
+}
+
+void kbase_backend_early_term(struct kbase_device *kbdev)
+{
+	kbase_release_interrupts(kbdev);
+	kbase_hwaccess_pm_term(kbdev);
+	kbasep_platform_device_term(kbdev);
+}
+
+int kbase_backend_late_init(struct kbase_device *kbdev)
+{
+	int err;
+
+	err = kbase_hwaccess_pm_powerup(kbdev, PM_HW_ISSUES_DETECT);
+	if (err)
+		return err;
+
+	err = kbase_backend_timer_init(kbdev);
+	if (err)
+		goto fail_timer;
+
+#ifdef CONFIG_MALI_DEBUG
+#ifndef CONFIG_MALI_NO_MALI
+	if (kbasep_common_test_interrupt_handlers(kbdev) != 0) {
+		dev_err(kbdev->dev, "Interrupt assigment check failed.\n");
+		err = -EINVAL;
+		goto fail_interrupt_test;
+	}
+#endif /* !CONFIG_MALI_NO_MALI */
+#endif /* CONFIG_MALI_DEBUG */
+
+	err = kbase_job_slot_init(kbdev);
+	if (err)
+		goto fail_job_slot;
+
+	init_waitqueue_head(&kbdev->hwaccess.backend.reset_wait);
+
+	return 0;
+
+fail_job_slot:
+
+#ifdef CONFIG_MALI_DEBUG
+#ifndef CONFIG_MALI_NO_MALI
+fail_interrupt_test:
+#endif /* !CONFIG_MALI_NO_MALI */
+#endif /* CONFIG_MALI_DEBUG */
+
+	kbase_backend_timer_term(kbdev);
+fail_timer:
+	kbase_hwaccess_pm_halt(kbdev);
+
+	return err;
+}
+
+void kbase_backend_late_term(struct kbase_device *kbdev)
+{
+	kbase_job_slot_halt(kbdev);
+	kbase_job_slot_term(kbdev);
+	kbase_backend_timer_term(kbdev);
+	kbase_hwaccess_pm_halt(kbdev);
+}
+
--- a/drivers/gpu/arm/midgard_for_linux/backend/gpu/mali_kbase_gpuprops_backend.c
+++ b/drivers/gpu/arm/midgard_for_linux/backend/gpu/mali_kbase_gpuprops_backend.c
@ -0,0 +1,105 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/*
+ * Base kernel property query backend APIs
+ */
+
+#include <mali_kbase.h>
+#include <backend/gpu/mali_kbase_device_internal.h>
+#include <backend/gpu/mali_kbase_pm_internal.h>
+#include <mali_kbase_hwaccess_gpuprops.h>
+
+void kbase_backend_gpuprops_get(struct kbase_device *kbdev,
+					struct kbase_gpuprops_regdump *regdump)
+{
+	int i;
+
+	/* Fill regdump with the content of the relevant registers */
+	regdump->gpu_id = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_ID), NULL);
+
+	regdump->l2_features = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(L2_FEATURES), NULL);
+	regdump->suspend_size = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(SUSPEND_SIZE), NULL);
+	regdump->tiler_features = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(TILER_FEATURES), NULL);
+	regdump->mem_features = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(MEM_FEATURES), NULL);
+	regdump->mmu_features = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(MMU_FEATURES), NULL);
+	regdump->as_present = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(AS_PRESENT), NULL);
+	regdump->js_present = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(JS_PRESENT), NULL);
+
+	for (i = 0; i < GPU_MAX_JOB_SLOTS; i++)
+		regdump->js_features[i] = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(JS_FEATURES_REG(i)), NULL);
+
+	for (i = 0; i < BASE_GPU_NUM_TEXTURE_FEATURES_REGISTERS; i++)
+		regdump->texture_features[i] = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(TEXTURE_FEATURES_REG(i)), NULL);
+
+	regdump->thread_max_threads = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(THREAD_MAX_THREADS), NULL);
+	regdump->thread_max_workgroup_size = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(THREAD_MAX_WORKGROUP_SIZE),
+									NULL);
+	regdump->thread_max_barrier_size = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(THREAD_MAX_BARRIER_SIZE), NULL);
+	regdump->thread_features = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(THREAD_FEATURES), NULL);
+
+	regdump->shader_present_lo = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(SHADER_PRESENT_LO), NULL);
+	regdump->shader_present_hi = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(SHADER_PRESENT_HI), NULL);
+
+	regdump->tiler_present_lo = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(TILER_PRESENT_LO), NULL);
+	regdump->tiler_present_hi = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(TILER_PRESENT_HI), NULL);
+
+	regdump->l2_present_lo = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(L2_PRESENT_LO), NULL);
+	regdump->l2_present_hi = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(L2_PRESENT_HI), NULL);
+}
+
+void kbase_backend_gpuprops_get_features(struct kbase_device *kbdev,
+					struct kbase_gpuprops_regdump *regdump)
+{
+	if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_COHERENCY_REG)) {
+		/* Ensure we can access the GPU registers */
+		kbase_pm_register_access_enable(kbdev);
+
+		regdump->coherency_features = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(COHERENCY_FEATURES), NULL);
+
+		/* We're done accessing the GPU registers for now. */
+		kbase_pm_register_access_disable(kbdev);
+	} else {
+		/* Pre COHERENCY_FEATURES we only supported ACE_LITE */
+		regdump->coherency_features =
+				COHERENCY_FEATURE_BIT(COHERENCY_NONE) |
+				COHERENCY_FEATURE_BIT(COHERENCY_ACE_LITE);
+	}
+}
+
--- a/drivers/gpu/arm/midgard_for_linux/backend/gpu/mali_kbase_instr_backend.c
+++ b/drivers/gpu/arm/midgard_for_linux/backend/gpu/mali_kbase_instr_backend.c
@ -0,0 +1,492 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/*
+ * GPU backend instrumentation APIs.
+ */
+
+#include <mali_kbase.h>
+#include <mali_midg_regmap.h>
+#include <mali_kbase_hwaccess_instr.h>
+#include <backend/gpu/mali_kbase_device_internal.h>
+#include <backend/gpu/mali_kbase_pm_internal.h>
+#include <backend/gpu/mali_kbase_instr_internal.h>
+
+/**
+ * kbasep_instr_hwcnt_cacheclean - Issue Cache Clean & Invalidate command to
+ * hardware
+ *
+ * @kbdev: Kbase device
+ */
+static void kbasep_instr_hwcnt_cacheclean(struct kbase_device *kbdev)
+{
+	unsigned long flags;
+	unsigned long pm_flags;
+	u32 irq_mask;
+
+	spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
+	KBASE_DEBUG_ASSERT(kbdev->hwcnt.backend.state ==
+					KBASE_INSTR_STATE_REQUEST_CLEAN);
+
+	/* Enable interrupt */
+	spin_lock_irqsave(&kbdev->hwaccess_lock, pm_flags);
+	irq_mask = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), NULL);
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK),
+				irq_mask | CLEAN_CACHES_COMPLETED, NULL);
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, pm_flags);
+
+	/* clean&invalidate the caches so we're sure the mmu tables for the dump
+	 * buffer is valid */
+	KBASE_TRACE_ADD(kbdev, CORE_GPU_CLEAN_INV_CACHES, NULL, NULL, 0u, 0);
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND),
+					GPU_COMMAND_CLEAN_INV_CACHES, NULL);
+	kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_CLEANING;
+
+	spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
+}
+
+int kbase_instr_hwcnt_enable_internal(struct kbase_device *kbdev,
+					struct kbase_context *kctx,
+					struct kbase_uk_hwcnt_setup *setup)
+{
+	unsigned long flags, pm_flags;
+	int err = -EINVAL;
+	u32 irq_mask;
+	int ret;
+	u64 shader_cores_needed;
+	u32 prfcnt_config;
+
+	shader_cores_needed = kbase_pm_get_present_cores(kbdev,
+							KBASE_PM_CORE_SHADER);
+
+	/* alignment failure */
+	if ((setup->dump_buffer == 0ULL) || (setup->dump_buffer & (2048 - 1)))
+		goto out_err;
+
+	/* Override core availability policy to ensure all cores are available
+	 */
+	kbase_pm_ca_instr_enable(kbdev);
+
+	/* Request the cores early on synchronously - we'll release them on any
+	 * errors (e.g. instrumentation already active) */
+	kbase_pm_request_cores_sync(kbdev, true, shader_cores_needed);
+
+	spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
+
+	if (kbdev->hwcnt.backend.state != KBASE_INSTR_STATE_DISABLED) {
+		/* Instrumentation is already enabled */
+		spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
+		goto out_unrequest_cores;
+	}
+
+	/* Enable interrupt */
+	spin_lock_irqsave(&kbdev->hwaccess_lock, pm_flags);
+	irq_mask = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), NULL);
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), irq_mask |
+						PRFCNT_SAMPLE_COMPLETED, NULL);
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, pm_flags);
+
+	/* In use, this context is the owner */
+	kbdev->hwcnt.kctx = kctx;
+	/* Remember the dump address so we can reprogram it later */
+	kbdev->hwcnt.addr = setup->dump_buffer;
+
+	/* Request the clean */
+	kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_REQUEST_CLEAN;
+	kbdev->hwcnt.backend.triggered = 0;
+	/* Clean&invalidate the caches so we're sure the mmu tables for the dump
+	 * buffer is valid */
+	ret = queue_work(kbdev->hwcnt.backend.cache_clean_wq,
+					&kbdev->hwcnt.backend.cache_clean_work);
+	KBASE_DEBUG_ASSERT(ret);
+
+	spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
+
+	/* Wait for cacheclean to complete */
+	wait_event(kbdev->hwcnt.backend.wait,
+					kbdev->hwcnt.backend.triggered != 0);
+
+	KBASE_DEBUG_ASSERT(kbdev->hwcnt.backend.state ==
+							KBASE_INSTR_STATE_IDLE);
+
+	kbase_pm_request_l2_caches(kbdev);
+
+	/* Configure */
+	prfcnt_config = kctx->as_nr << PRFCNT_CONFIG_AS_SHIFT;
+#ifdef CONFIG_MALI_PRFCNT_SET_SECONDARY
+	{
+		u32 gpu_id = kbdev->gpu_props.props.raw_props.gpu_id;
+		u32 product_id = (gpu_id & GPU_ID_VERSION_PRODUCT_ID)
+			>> GPU_ID_VERSION_PRODUCT_ID_SHIFT;
+		int arch_v6 = GPU_ID_IS_NEW_FORMAT(product_id);
+
+		if (arch_v6)
+			prfcnt_config |= 1 << PRFCNT_CONFIG_SETSELECT_SHIFT;
+	}
+#endif
+
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_CONFIG),
+			prfcnt_config | PRFCNT_CONFIG_MODE_OFF, kctx);
+
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_BASE_LO),
+					setup->dump_buffer & 0xFFFFFFFF, kctx);
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_BASE_HI),
+					setup->dump_buffer >> 32,        kctx);
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_JM_EN),
+					setup->jm_bm,                    kctx);
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_SHADER_EN),
+					setup->shader_bm,                kctx);
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_MMU_L2_EN),
+					setup->mmu_l2_bm,                kctx);
+	/* Due to PRLAM-8186 we need to disable the Tiler before we enable the
+	 * HW counter dump. */
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8186))
+		kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_TILER_EN), 0,
+									kctx);
+	else
+		kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_TILER_EN),
+							setup->tiler_bm, kctx);
+
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_CONFIG),
+			prfcnt_config | PRFCNT_CONFIG_MODE_MANUAL, kctx);
+
+	/* If HW has PRLAM-8186 we can now re-enable the tiler HW counters dump
+	 */
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8186))
+		kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_TILER_EN),
+							setup->tiler_bm, kctx);
+
+	spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
+
+	kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_IDLE;
+	kbdev->hwcnt.backend.triggered = 1;
+	wake_up(&kbdev->hwcnt.backend.wait);
+
+	spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
+
+	err = 0;
+
+	dev_dbg(kbdev->dev, "HW counters dumping set-up for context %p", kctx);
+	return err;
+ out_unrequest_cores:
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	kbase_pm_unrequest_cores(kbdev, true, shader_cores_needed);
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+ out_err:
+	return err;
+}
+
+int kbase_instr_hwcnt_disable_internal(struct kbase_context *kctx)
+{
+	unsigned long flags, pm_flags;
+	int err = -EINVAL;
+	u32 irq_mask;
+	struct kbase_device *kbdev = kctx->kbdev;
+
+	while (1) {
+		spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
+
+		if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_DISABLED) {
+			/* Instrumentation is not enabled */
+			spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
+			goto out;
+		}
+
+		if (kbdev->hwcnt.kctx != kctx) {
+			/* Instrumentation has been setup for another context */
+			spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
+			goto out;
+		}
+
+		if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_IDLE)
+			break;
+
+		spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
+
+		/* Ongoing dump/setup - wait for its completion */
+		wait_event(kbdev->hwcnt.backend.wait,
+					kbdev->hwcnt.backend.triggered != 0);
+	}
+
+	kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_DISABLED;
+	kbdev->hwcnt.backend.triggered = 0;
+
+	/* Disable interrupt */
+	spin_lock_irqsave(&kbdev->hwaccess_lock, pm_flags);
+	irq_mask = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), NULL);
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK),
+				irq_mask & ~PRFCNT_SAMPLE_COMPLETED, NULL);
+
+	/* Disable the counters */
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_CONFIG), 0, kctx);
+
+	kbdev->hwcnt.kctx = NULL;
+	kbdev->hwcnt.addr = 0ULL;
+
+	kbase_pm_ca_instr_disable(kbdev);
+
+	kbase_pm_unrequest_cores(kbdev, true,
+		kbase_pm_get_present_cores(kbdev, KBASE_PM_CORE_SHADER));
+
+	kbase_pm_release_l2_caches(kbdev);
+
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, pm_flags);
+	spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
+
+	dev_dbg(kbdev->dev, "HW counters dumping disabled for context %p",
+									kctx);
+
+	err = 0;
+
+ out:
+	return err;
+}
+
+int kbase_instr_hwcnt_request_dump(struct kbase_context *kctx)
+{
+	unsigned long flags;
+	int err = -EINVAL;
+	struct kbase_device *kbdev = kctx->kbdev;
+
+	spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
+
+	if (kbdev->hwcnt.kctx != kctx) {
+		/* The instrumentation has been setup for another context */
+		goto unlock;
+	}
+
+	if (kbdev->hwcnt.backend.state != KBASE_INSTR_STATE_IDLE) {
+		/* HW counters are disabled or another dump is ongoing, or we're
+		 * resetting */
+		goto unlock;
+	}
+
+	kbdev->hwcnt.backend.triggered = 0;
+
+	/* Mark that we're dumping - the PF handler can signal that we faulted
+	 */
+	kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_DUMPING;
+
+	/* Reconfigure the dump address */
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_BASE_LO),
+					kbdev->hwcnt.addr & 0xFFFFFFFF, NULL);
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_BASE_HI),
+					kbdev->hwcnt.addr >> 32, NULL);
+
+	/* Start dumping */
+	KBASE_TRACE_ADD(kbdev, CORE_GPU_PRFCNT_SAMPLE, NULL, NULL,
+					kbdev->hwcnt.addr, 0);
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND),
+					GPU_COMMAND_PRFCNT_SAMPLE, kctx);
+
+	dev_dbg(kbdev->dev, "HW counters dumping done for context %p", kctx);
+
+	err = 0;
+
+ unlock:
+	spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
+	return err;
+}
+KBASE_EXPORT_SYMBOL(kbase_instr_hwcnt_request_dump);
+
+bool kbase_instr_hwcnt_dump_complete(struct kbase_context *kctx,
+						bool * const success)
+{
+	unsigned long flags;
+	bool complete = false;
+	struct kbase_device *kbdev = kctx->kbdev;
+
+	spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
+
+	if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_IDLE) {
+		*success = true;
+		complete = true;
+	} else if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_FAULT) {
+		*success = false;
+		complete = true;
+		kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_IDLE;
+	}
+
+	spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
+
+	return complete;
+}
+KBASE_EXPORT_SYMBOL(kbase_instr_hwcnt_dump_complete);
+
+void kbasep_cache_clean_worker(struct work_struct *data)
+{
+	struct kbase_device *kbdev;
+	unsigned long flags;
+
+	kbdev = container_of(data, struct kbase_device,
+						hwcnt.backend.cache_clean_work);
+
+	mutex_lock(&kbdev->cacheclean_lock);
+	kbasep_instr_hwcnt_cacheclean(kbdev);
+
+	spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
+	/* Wait for our condition, and any reset to complete */
+	while (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_CLEANING) {
+		spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
+		wait_event(kbdev->hwcnt.backend.cache_clean_wait,
+				kbdev->hwcnt.backend.state !=
+						KBASE_INSTR_STATE_CLEANING);
+		spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
+	}
+	KBASE_DEBUG_ASSERT(kbdev->hwcnt.backend.state ==
+						KBASE_INSTR_STATE_CLEANED);
+
+	/* All finished and idle */
+	kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_IDLE;
+	kbdev->hwcnt.backend.triggered = 1;
+	wake_up(&kbdev->hwcnt.backend.wait);
+
+	spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
+	mutex_unlock(&kbdev->cacheclean_lock);
+}
+
+void kbase_instr_hwcnt_sample_done(struct kbase_device *kbdev)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
+
+	if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_FAULT) {
+		kbdev->hwcnt.backend.triggered = 1;
+		wake_up(&kbdev->hwcnt.backend.wait);
+	} else if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_DUMPING) {
+		int ret;
+		/* Always clean and invalidate the cache after a successful dump
+		 */
+		kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_REQUEST_CLEAN;
+		ret = queue_work(kbdev->hwcnt.backend.cache_clean_wq,
+					&kbdev->hwcnt.backend.cache_clean_work);
+		KBASE_DEBUG_ASSERT(ret);
+	}
+
+	spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
+}
+
+void kbase_clean_caches_done(struct kbase_device *kbdev)
+{
+	u32 irq_mask;
+
+	if (kbdev->hwcnt.backend.state != KBASE_INSTR_STATE_DISABLED) {
+		unsigned long flags;
+		unsigned long pm_flags;
+
+		spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
+		/* Disable interrupt */
+		spin_lock_irqsave(&kbdev->hwaccess_lock, pm_flags);
+		irq_mask = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK),
+									NULL);
+		kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK),
+				irq_mask & ~CLEAN_CACHES_COMPLETED, NULL);
+		spin_unlock_irqrestore(&kbdev->hwaccess_lock, pm_flags);
+
+		/* Wakeup... */
+		if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_CLEANING) {
+			/* Only wake if we weren't resetting */
+			kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_CLEANED;
+			wake_up(&kbdev->hwcnt.backend.cache_clean_wait);
+		}
+
+		spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
+	}
+}
+
+int kbase_instr_hwcnt_wait_for_dump(struct kbase_context *kctx)
+{
+	struct kbase_device *kbdev = kctx->kbdev;
+	unsigned long flags;
+	int err;
+
+	/* Wait for dump & cacheclean to complete */
+	wait_event(kbdev->hwcnt.backend.wait,
+					kbdev->hwcnt.backend.triggered != 0);
+
+	spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
+
+	if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_FAULT) {
+		err = -EINVAL;
+		kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_IDLE;
+	} else {
+		/* Dump done */
+		KBASE_DEBUG_ASSERT(kbdev->hwcnt.backend.state ==
+							KBASE_INSTR_STATE_IDLE);
+		err = 0;
+	}
+
+	spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
+
+	return err;
+}
+
+int kbase_instr_hwcnt_clear(struct kbase_context *kctx)
+{
+	unsigned long flags;
+	int err = -EINVAL;
+	struct kbase_device *kbdev = kctx->kbdev;
+
+	spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
+
+	/* Check it's the context previously set up and we're not already
+	 * dumping */
+	if (kbdev->hwcnt.kctx != kctx || kbdev->hwcnt.backend.state !=
+							KBASE_INSTR_STATE_IDLE)
+		goto out;
+
+	/* Clear the counters */
+	KBASE_TRACE_ADD(kbdev, CORE_GPU_PRFCNT_CLEAR, NULL, NULL, 0u, 0);
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND),
+						GPU_COMMAND_PRFCNT_CLEAR, kctx);
+
+	err = 0;
+
+out:
+	spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
+	return err;
+}
+KBASE_EXPORT_SYMBOL(kbase_instr_hwcnt_clear);
+
+int kbase_instr_backend_init(struct kbase_device *kbdev)
+{
+	int ret = 0;
+
+	kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_DISABLED;
+
+	init_waitqueue_head(&kbdev->hwcnt.backend.wait);
+	init_waitqueue_head(&kbdev->hwcnt.backend.cache_clean_wait);
+	INIT_WORK(&kbdev->hwcnt.backend.cache_clean_work,
+						kbasep_cache_clean_worker);
+	kbdev->hwcnt.backend.triggered = 0;
+
+	kbdev->hwcnt.backend.cache_clean_wq =
+			alloc_workqueue("Mali cache cleaning workqueue", 0, 1);
+	if (NULL == kbdev->hwcnt.backend.cache_clean_wq)
+		ret = -EINVAL;
+
+	return ret;
+}
+
+void kbase_instr_backend_term(struct kbase_device *kbdev)
+{
+	destroy_workqueue(kbdev->hwcnt.backend.cache_clean_wq);
+}
+
--- a/drivers/gpu/arm/midgard_for_linux/backend/gpu/mali_kbase_instr_defs.h
+++ b/drivers/gpu/arm/midgard_for_linux/backend/gpu/mali_kbase_instr_defs.h
@ -0,0 +1,58 @@
+/*
+ *
+ * (C) COPYRIGHT 2014, 2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/*
+ * Backend-specific instrumentation definitions
+ */
+
+#ifndef _KBASE_INSTR_DEFS_H_
+#define _KBASE_INSTR_DEFS_H_
+
+/*
+ * Instrumentation State Machine States
+ */
+enum kbase_instr_state {
+	/* State where instrumentation is not active */
+	KBASE_INSTR_STATE_DISABLED = 0,
+	/* State machine is active and ready for a command. */
+	KBASE_INSTR_STATE_IDLE,
+	/* Hardware is currently dumping a frame. */
+	KBASE_INSTR_STATE_DUMPING,
+	/* We've requested a clean to occur on a workqueue */
+	KBASE_INSTR_STATE_REQUEST_CLEAN,
+	/* Hardware is currently cleaning and invalidating caches. */
+	KBASE_INSTR_STATE_CLEANING,
+	/* Cache clean completed, and either a) a dump is complete, or
+	 * b) instrumentation can now be setup. */
+	KBASE_INSTR_STATE_CLEANED,
+	/* An error has occured during DUMPING (page fault). */
+	KBASE_INSTR_STATE_FAULT
+};
+
+/* Structure used for instrumentation and HW counters dumping */
+struct kbase_instr_backend {
+	wait_queue_head_t wait;
+	int triggered;
+
+	enum kbase_instr_state state;
+	wait_queue_head_t cache_clean_wait;
+	struct workqueue_struct *cache_clean_wq;
+	struct work_struct  cache_clean_work;
+};
+
+#endif /* _KBASE_INSTR_DEFS_H_ */
+
--- a/drivers/gpu/arm/midgard_for_linux/backend/gpu/mali_kbase_instr_internal.h
+++ b/drivers/gpu/arm/midgard_for_linux/backend/gpu/mali_kbase_instr_internal.h
@ -0,0 +1,45 @@
+/*
+ *
+ * (C) COPYRIGHT 2014 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/*
+ * Backend-specific HW access instrumentation APIs
+ */
+
+#ifndef _KBASE_INSTR_INTERNAL_H_
+#define _KBASE_INSTR_INTERNAL_H_
+
+/**
+ * kbasep_cache_clean_worker() - Workqueue for handling cache cleaning
+ * @data: a &struct work_struct
+ */
+void kbasep_cache_clean_worker(struct work_struct *data);
+
+/**
+ * kbase_clean_caches_done() - Cache clean interrupt received
+ * @kbdev: Kbase device
+ */
+void kbase_clean_caches_done(struct kbase_device *kbdev);
+
+/**
+ * kbase_instr_hwcnt_sample_done() - Dump complete interrupt received
+ * @kbdev: Kbase device
+ */
+void kbase_instr_hwcnt_sample_done(struct kbase_device *kbdev);
+
+#endif /* _KBASE_INSTR_INTERNAL_H_ */
--- a/drivers/gpu/arm/midgard_for_linux/backend/gpu/mali_kbase_irq_internal.h
+++ b/drivers/gpu/arm/midgard_for_linux/backend/gpu/mali_kbase_irq_internal.h
@ -0,0 +1,39 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/*
+ * Backend specific IRQ APIs
+ */
+
+#ifndef _KBASE_IRQ_INTERNAL_H_
+#define _KBASE_IRQ_INTERNAL_H_
+
+int kbase_install_interrupts(struct kbase_device *kbdev);
+
+void kbase_release_interrupts(struct kbase_device *kbdev);
+
+/**
+ * kbase_synchronize_irqs - Ensure that all IRQ handlers have completed
+ *                          execution
+ * @kbdev: The kbase device
+ */
+void kbase_synchronize_irqs(struct kbase_device *kbdev);
+
+int kbasep_common_test_interrupt_handlers(
+					struct kbase_device * const kbdev);
+
+#endif /* _KBASE_IRQ_INTERNAL_H_ */
--- a/drivers/gpu/arm/midgard_for_linux/backend/gpu/mali_kbase_irq_linux.c
+++ b/drivers/gpu/arm/midgard_for_linux/backend/gpu/mali_kbase_irq_linux.c
@ -0,0 +1,469 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#include <mali_kbase.h>
+#include <backend/gpu/mali_kbase_device_internal.h>
+#include <backend/gpu/mali_kbase_irq_internal.h>
+
+#include <linux/interrupt.h>
+
+#if !defined(CONFIG_MALI_NO_MALI)
+
+/* GPU IRQ Tags */
+#define	JOB_IRQ_TAG	0
+#define MMU_IRQ_TAG	1
+#define GPU_IRQ_TAG	2
+
+static void *kbase_tag(void *ptr, u32 tag)
+{
+	return (void *)(((uintptr_t) ptr) | tag);
+}
+
+static void *kbase_untag(void *ptr)
+{
+	return (void *)(((uintptr_t) ptr) & ~3);
+}
+
+static irqreturn_t kbase_job_irq_handler(int irq, void *data)
+{
+	unsigned long flags;
+	struct kbase_device *kbdev = kbase_untag(data);
+	u32 val;
+
+	spin_lock_irqsave(&kbdev->pm.backend.gpu_powered_lock, flags);
+
+	if (!kbdev->pm.backend.gpu_powered) {
+		/* GPU is turned off - IRQ is not for us */
+		spin_unlock_irqrestore(&kbdev->pm.backend.gpu_powered_lock,
+									flags);
+		return IRQ_NONE;
+	}
+
+	val = kbase_reg_read(kbdev, JOB_CONTROL_REG(JOB_IRQ_STATUS), NULL);
+
+#ifdef CONFIG_MALI_DEBUG
+	if (!kbdev->pm.backend.driver_ready_for_irqs)
+		dev_warn(kbdev->dev, "%s: irq %d irqstatus 0x%x before driver is ready\n",
+				__func__, irq, val);
+#endif /* CONFIG_MALI_DEBUG */
+	spin_unlock_irqrestore(&kbdev->pm.backend.gpu_powered_lock, flags);
+
+	if (!val)
+		return IRQ_NONE;
+
+	dev_dbg(kbdev->dev, "%s: irq %d irqstatus 0x%x\n", __func__, irq, val);
+
+	kbase_job_done(kbdev, val);
+
+	return IRQ_HANDLED;
+}
+
+KBASE_EXPORT_TEST_API(kbase_job_irq_handler);
+
+static irqreturn_t kbase_mmu_irq_handler(int irq, void *data)
+{
+	unsigned long flags;
+	struct kbase_device *kbdev = kbase_untag(data);
+	u32 val;
+
+	spin_lock_irqsave(&kbdev->pm.backend.gpu_powered_lock, flags);
+
+	if (!kbdev->pm.backend.gpu_powered) {
+		/* GPU is turned off - IRQ is not for us */
+		spin_unlock_irqrestore(&kbdev->pm.backend.gpu_powered_lock,
+									flags);
+		return IRQ_NONE;
+	}
+
+	atomic_inc(&kbdev->faults_pending);
+
+	val = kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_STATUS), NULL);
+
+#ifdef CONFIG_MALI_DEBUG
+	if (!kbdev->pm.backend.driver_ready_for_irqs)
+		dev_warn(kbdev->dev, "%s: irq %d irqstatus 0x%x before driver is ready\n",
+				__func__, irq, val);
+#endif /* CONFIG_MALI_DEBUG */
+	spin_unlock_irqrestore(&kbdev->pm.backend.gpu_powered_lock, flags);
+
+	if (!val) {
+		atomic_dec(&kbdev->faults_pending);
+		return IRQ_NONE;
+	}
+
+	dev_dbg(kbdev->dev, "%s: irq %d irqstatus 0x%x\n", __func__, irq, val);
+
+	kbase_mmu_interrupt(kbdev, val);
+
+	atomic_dec(&kbdev->faults_pending);
+
+	return IRQ_HANDLED;
+}
+
+static irqreturn_t kbase_gpu_irq_handler(int irq, void *data)
+{
+	unsigned long flags;
+	struct kbase_device *kbdev = kbase_untag(data);
+	u32 val;
+
+	spin_lock_irqsave(&kbdev->pm.backend.gpu_powered_lock, flags);
+
+	if (!kbdev->pm.backend.gpu_powered) {
+		/* GPU is turned off - IRQ is not for us */
+		spin_unlock_irqrestore(&kbdev->pm.backend.gpu_powered_lock,
+									flags);
+		return IRQ_NONE;
+	}
+
+	val = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_STATUS), NULL);
+
+#ifdef CONFIG_MALI_DEBUG
+	if (!kbdev->pm.backend.driver_ready_for_irqs)
+		dev_dbg(kbdev->dev, "%s: irq %d irqstatus 0x%x before driver is ready\n",
+				__func__, irq, val);
+#endif /* CONFIG_MALI_DEBUG */
+	spin_unlock_irqrestore(&kbdev->pm.backend.gpu_powered_lock, flags);
+
+	if (!val)
+		return IRQ_NONE;
+
+	dev_dbg(kbdev->dev, "%s: irq %d irqstatus 0x%x\n", __func__, irq, val);
+
+	kbase_gpu_interrupt(kbdev, val);
+
+	return IRQ_HANDLED;
+}
+
+KBASE_EXPORT_TEST_API(kbase_gpu_irq_handler);
+
+static irq_handler_t kbase_handler_table[] = {
+	[JOB_IRQ_TAG] = kbase_job_irq_handler,
+	[MMU_IRQ_TAG] = kbase_mmu_irq_handler,
+	[GPU_IRQ_TAG] = kbase_gpu_irq_handler,
+};
+
+#ifdef CONFIG_MALI_DEBUG
+#define  JOB_IRQ_HANDLER JOB_IRQ_TAG
+#define  MMU_IRQ_HANDLER MMU_IRQ_TAG
+#define  GPU_IRQ_HANDLER GPU_IRQ_TAG
+
+/**
+ * kbase_set_custom_irq_handler - Set a custom IRQ handler
+ * @kbdev: Device for which the handler is to be registered
+ * @custom_handler: Handler to be registered
+ * @irq_type: Interrupt type
+ *
+ * Registers given interrupt handler for requested interrupt type
+ * In the case where irq handler is not specified, the default handler shall be
+ * registered
+ *
+ * Return: 0 case success, error code otherwise
+ */
+int kbase_set_custom_irq_handler(struct kbase_device *kbdev,
+					irq_handler_t custom_handler,
+					int irq_type)
+{
+	int result = 0;
+	irq_handler_t requested_irq_handler = NULL;
+
+	KBASE_DEBUG_ASSERT((JOB_IRQ_HANDLER <= irq_type) &&
+						(GPU_IRQ_HANDLER >= irq_type));
+
+	/* Release previous handler */
+	if (kbdev->irqs[irq_type].irq)
+		free_irq(kbdev->irqs[irq_type].irq, kbase_tag(kbdev, irq_type));
+
+	requested_irq_handler = (NULL != custom_handler) ? custom_handler :
+						kbase_handler_table[irq_type];
+
+	if (0 != request_irq(kbdev->irqs[irq_type].irq,
+			requested_irq_handler,
+			kbdev->irqs[irq_type].flags | IRQF_SHARED,
+			dev_name(kbdev->dev), kbase_tag(kbdev, irq_type))) {
+		result = -EINVAL;
+		dev_err(kbdev->dev, "Can't request interrupt %d (index %d)\n",
+					kbdev->irqs[irq_type].irq, irq_type);
+#ifdef CONFIG_SPARSE_IRQ
+		dev_err(kbdev->dev, "You have CONFIG_SPARSE_IRQ support enabled - is the interrupt number correct for this configuration?\n");
+#endif /* CONFIG_SPARSE_IRQ */
+	}
+
+	return result;
+}
+
+KBASE_EXPORT_TEST_API(kbase_set_custom_irq_handler);
+
+/* test correct interrupt assigment and reception by cpu */
+struct kbasep_irq_test {
+	struct hrtimer timer;
+	wait_queue_head_t wait;
+	int triggered;
+	u32 timeout;
+};
+
+static struct kbasep_irq_test kbasep_irq_test_data;
+
+#define IRQ_TEST_TIMEOUT    500
+
+static irqreturn_t kbase_job_irq_test_handler(int irq, void *data)
+{
+	unsigned long flags;
+	struct kbase_device *kbdev = kbase_untag(data);
+	u32 val;
+
+	spin_lock_irqsave(&kbdev->pm.backend.gpu_powered_lock, flags);
+
+	if (!kbdev->pm.backend.gpu_powered) {
+		/* GPU is turned off - IRQ is not for us */
+		spin_unlock_irqrestore(&kbdev->pm.backend.gpu_powered_lock,
+									flags);
+		return IRQ_NONE;
+	}
+
+	val = kbase_reg_read(kbdev, JOB_CONTROL_REG(JOB_IRQ_STATUS), NULL);
+
+	spin_unlock_irqrestore(&kbdev->pm.backend.gpu_powered_lock, flags);
+
+	if (!val)
+		return IRQ_NONE;
+
+	dev_dbg(kbdev->dev, "%s: irq %d irqstatus 0x%x\n", __func__, irq, val);
+
+	kbasep_irq_test_data.triggered = 1;
+	wake_up(&kbasep_irq_test_data.wait);
+
+	kbase_reg_write(kbdev, JOB_CONTROL_REG(JOB_IRQ_CLEAR), val, NULL);
+
+	return IRQ_HANDLED;
+}
+
+static irqreturn_t kbase_mmu_irq_test_handler(int irq, void *data)
+{
+	unsigned long flags;
+	struct kbase_device *kbdev = kbase_untag(data);
+	u32 val;
+
+	spin_lock_irqsave(&kbdev->pm.backend.gpu_powered_lock, flags);
+
+	if (!kbdev->pm.backend.gpu_powered) {
+		/* GPU is turned off - IRQ is not for us */
+		spin_unlock_irqrestore(&kbdev->pm.backend.gpu_powered_lock,
+									flags);
+		return IRQ_NONE;
+	}
+
+	val = kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_STATUS), NULL);
+
+	spin_unlock_irqrestore(&kbdev->pm.backend.gpu_powered_lock, flags);
+
+	if (!val)
+		return IRQ_NONE;
+
+	dev_dbg(kbdev->dev, "%s: irq %d irqstatus 0x%x\n", __func__, irq, val);
+
+	kbasep_irq_test_data.triggered = 1;
+	wake_up(&kbasep_irq_test_data.wait);
+
+	kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_CLEAR), val, NULL);
+
+	return IRQ_HANDLED;
+}
+
+static enum hrtimer_restart kbasep_test_interrupt_timeout(struct hrtimer *timer)
+{
+	struct kbasep_irq_test *test_data = container_of(timer,
+						struct kbasep_irq_test, timer);
+
+	test_data->timeout = 1;
+	test_data->triggered = 1;
+	wake_up(&test_data->wait);
+	return HRTIMER_NORESTART;
+}
+
+static int kbasep_common_test_interrupt(
+				struct kbase_device * const kbdev, u32 tag)
+{
+	int err = 0;
+	irq_handler_t test_handler;
+
+	u32 old_mask_val;
+	u16 mask_offset;
+	u16 rawstat_offset;
+
+	switch (tag) {
+	case JOB_IRQ_TAG:
+		test_handler = kbase_job_irq_test_handler;
+		rawstat_offset = JOB_CONTROL_REG(JOB_IRQ_RAWSTAT);
+		mask_offset = JOB_CONTROL_REG(JOB_IRQ_MASK);
+		break;
+	case MMU_IRQ_TAG:
+		test_handler = kbase_mmu_irq_test_handler;
+		rawstat_offset = MMU_REG(MMU_IRQ_RAWSTAT);
+		mask_offset = MMU_REG(MMU_IRQ_MASK);
+		break;
+	case GPU_IRQ_TAG:
+		/* already tested by pm_driver - bail out */
+	default:
+		return 0;
+	}
+
+	/* store old mask */
+	old_mask_val = kbase_reg_read(kbdev, mask_offset, NULL);
+	/* mask interrupts */
+	kbase_reg_write(kbdev, mask_offset, 0x0, NULL);
+
+	if (kbdev->irqs[tag].irq) {
+		/* release original handler and install test handler */
+		if (kbase_set_custom_irq_handler(kbdev, test_handler, tag) != 0) {
+			err = -EINVAL;
+		} else {
+			kbasep_irq_test_data.timeout = 0;
+			hrtimer_init(&kbasep_irq_test_data.timer,
+					CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+			kbasep_irq_test_data.timer.function =
+						kbasep_test_interrupt_timeout;
+
+			/* trigger interrupt */
+			kbase_reg_write(kbdev, mask_offset, 0x1, NULL);
+			kbase_reg_write(kbdev, rawstat_offset, 0x1, NULL);
+
+			hrtimer_start(&kbasep_irq_test_data.timer,
+					HR_TIMER_DELAY_MSEC(IRQ_TEST_TIMEOUT),
+					HRTIMER_MODE_REL);
+
+			wait_event(kbasep_irq_test_data.wait,
+					kbasep_irq_test_data.triggered != 0);
+
+			if (kbasep_irq_test_data.timeout != 0) {
+				dev_err(kbdev->dev, "Interrupt %d (index %d) didn't reach CPU.\n",
+						kbdev->irqs[tag].irq, tag);
+				err = -EINVAL;
+			} else {
+				dev_dbg(kbdev->dev, "Interrupt %d (index %d) reached CPU.\n",
+						kbdev->irqs[tag].irq, tag);
+			}
+
+			hrtimer_cancel(&kbasep_irq_test_data.timer);
+			kbasep_irq_test_data.triggered = 0;
+
+			/* mask interrupts */
+			kbase_reg_write(kbdev, mask_offset, 0x0, NULL);
+
+			/* release test handler */
+			free_irq(kbdev->irqs[tag].irq, kbase_tag(kbdev, tag));
+		}
+
+		/* restore original interrupt */
+		if (request_irq(kbdev->irqs[tag].irq, kbase_handler_table[tag],
+				kbdev->irqs[tag].flags | IRQF_SHARED,
+				dev_name(kbdev->dev), kbase_tag(kbdev, tag))) {
+			dev_err(kbdev->dev, "Can't restore original interrupt %d (index %d)\n",
+						kbdev->irqs[tag].irq, tag);
+			err = -EINVAL;
+		}
+	}
+	/* restore old mask */
+	kbase_reg_write(kbdev, mask_offset, old_mask_val, NULL);
+
+	return err;
+}
+
+int kbasep_common_test_interrupt_handlers(
+					struct kbase_device * const kbdev)
+{
+	int err;
+
+	init_waitqueue_head(&kbasep_irq_test_data.wait);
+	kbasep_irq_test_data.triggered = 0;
+
+	/* A suspend won't happen during startup/insmod */
+	kbase_pm_context_active(kbdev);
+
+	err = kbasep_common_test_interrupt(kbdev, JOB_IRQ_TAG);
+	if (err) {
+		dev_err(kbdev->dev, "Interrupt JOB_IRQ didn't reach CPU. Check interrupt assignments.\n");
+		goto out;
+	}
+
+	err = kbasep_common_test_interrupt(kbdev, MMU_IRQ_TAG);
+	if (err) {
+		dev_err(kbdev->dev, "Interrupt MMU_IRQ didn't reach CPU. Check interrupt assignments.\n");
+		goto out;
+	}
+
+	dev_dbg(kbdev->dev, "Interrupts are correctly assigned.\n");
+
+ out:
+	kbase_pm_context_idle(kbdev);
+
+	return err;
+}
+#endif /* CONFIG_MALI_DEBUG */
+
+int kbase_install_interrupts(struct kbase_device *kbdev)
+{
+	u32 nr = ARRAY_SIZE(kbase_handler_table);
+	int err;
+	u32 i;
+
+	for (i = 0; i < nr; i++) {
+		err = request_irq(kbdev->irqs[i].irq, kbase_handler_table[i],
+				kbdev->irqs[i].flags | IRQF_SHARED,
+				dev_name(kbdev->dev),
+				kbase_tag(kbdev, i));
+		if (err) {
+			dev_err(kbdev->dev, "Can't request interrupt %d (index %d)\n",
+							kbdev->irqs[i].irq, i);
+#ifdef CONFIG_SPARSE_IRQ
+			dev_err(kbdev->dev, "You have CONFIG_SPARSE_IRQ support enabled - is the interrupt number correct for this configuration?\n");
+#endif /* CONFIG_SPARSE_IRQ */
+			goto release;
+		}
+	}
+
+	return 0;
+
+ release:
+	while (i-- > 0)
+		free_irq(kbdev->irqs[i].irq, kbase_tag(kbdev, i));
+
+	return err;
+}
+
+void kbase_release_interrupts(struct kbase_device *kbdev)
+{
+	u32 nr = ARRAY_SIZE(kbase_handler_table);
+	u32 i;
+
+	for (i = 0; i < nr; i++) {
+		if (kbdev->irqs[i].irq)
+			free_irq(kbdev->irqs[i].irq, kbase_tag(kbdev, i));
+	}
+}
+
+void kbase_synchronize_irqs(struct kbase_device *kbdev)
+{
+	u32 nr = ARRAY_SIZE(kbase_handler_table);
+	u32 i;
+
+	for (i = 0; i < nr; i++) {
+		if (kbdev->irqs[i].irq)
+			synchronize_irq(kbdev->irqs[i].irq);
+	}
+}
+
+#endif /* !defined(CONFIG_MALI_NO_MALI) */
--- a/drivers/gpu/arm/midgard_for_linux/backend/gpu/mali_kbase_jm_as.c
+++ b/drivers/gpu/arm/midgard_for_linux/backend/gpu/mali_kbase_jm_as.c
@ -0,0 +1,378 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+/*
+ * Register backend context / address space management
+ */
+
+#include <mali_kbase.h>
+#include <mali_kbase_hwaccess_jm.h>
+
+/**
+ * assign_and_activate_kctx_addr_space - Assign an AS to a context
+ * @kbdev: Kbase device
+ * @kctx: Kbase context
+ * @current_as: Address Space to assign
+ *
+ * Assign an Address Space (AS) to a context, and add the context to the Policy.
+ *
+ * This includes
+ *   setting up the global runpool_irq structure and the context on the AS,
+ *   Activating the MMU on the AS,
+ *   Allowing jobs to be submitted on the AS.
+ *
+ * Context:
+ *   kbasep_js_kctx_info.jsctx_mutex held,
+ *   kbasep_js_device_data.runpool_mutex held,
+ *   AS transaction mutex held,
+ *   Runpool IRQ lock held
+ */
+static void assign_and_activate_kctx_addr_space(struct kbase_device *kbdev,
+						struct kbase_context *kctx,
+						struct kbase_as *current_as)
+{
+	struct kbasep_js_device_data *js_devdata = &kbdev->js_data;
+	struct kbasep_js_per_as_data *js_per_as_data;
+	int as_nr = current_as->number;
+
+	lockdep_assert_held(&kctx->jctx.sched_info.ctx.jsctx_mutex);
+	lockdep_assert_held(&js_devdata->runpool_mutex);
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	js_per_as_data = &js_devdata->runpool_irq.per_as_data[as_nr];
+
+	/* Attribute handling */
+	kbasep_js_ctx_attr_runpool_retain_ctx(kbdev, kctx);
+
+	/* Assign addr space */
+	kctx->as_nr = as_nr;
+
+	/* If the GPU is currently powered, activate this address space on the
+	 * MMU */
+	if (kbdev->pm.backend.gpu_powered)
+		kbase_mmu_update(kctx);
+	/* If the GPU was not powered then the MMU will be reprogrammed on the
+	 * next pm_context_active() */
+
+	/* Allow it to run jobs */
+	kbasep_js_set_submit_allowed(js_devdata, kctx);
+
+	/* Book-keeping */
+	js_per_as_data->kctx = kctx;
+	js_per_as_data->as_busy_refcount = 0;
+
+	kbase_js_runpool_inc_context_count(kbdev, kctx);
+}
+
+/**
+ * release_addr_space - Release an address space
+ * @kbdev: Kbase device
+ * @kctx_as_nr: Address space of context to release
+ * @kctx: Context being released
+ *
+ * Context: kbasep_js_device_data.runpool_mutex must be held
+ *
+ * Release an address space, making it available for being picked again.
+ */
+static void release_addr_space(struct kbase_device *kbdev, int kctx_as_nr,
+						struct kbase_context *kctx)
+{
+	struct kbasep_js_device_data *js_devdata;
+	u16 as_bit = (1u << kctx_as_nr);
+
+	js_devdata = &kbdev->js_data;
+	lockdep_assert_held(&js_devdata->runpool_mutex);
+
+	/* The address space must not already be free */
+	KBASE_DEBUG_ASSERT(!(js_devdata->as_free & as_bit));
+
+	js_devdata->as_free |= as_bit;
+
+	kbase_js_runpool_dec_context_count(kbdev, kctx);
+}
+
+bool kbase_backend_use_ctx_sched(struct kbase_device *kbdev,
+						struct kbase_context *kctx)
+{
+	int i;
+
+	if (kbdev->hwaccess.active_kctx == kctx) {
+		/* Context is already active */
+		return true;
+	}
+
+	for (i = 0; i < kbdev->nr_hw_address_spaces; i++) {
+		struct kbasep_js_per_as_data *js_per_as_data =
+				&kbdev->js_data.runpool_irq.per_as_data[i];
+
+		if (js_per_as_data->kctx == kctx) {
+			/* Context already has ASID - mark as active */
+			return true;
+		}
+	}
+
+	/* Context does not have address space assigned */
+	return false;
+}
+
+void kbase_backend_release_ctx_irq(struct kbase_device *kbdev,
+						struct kbase_context *kctx)
+{
+	struct kbasep_js_per_as_data *js_per_as_data;
+	int as_nr = kctx->as_nr;
+
+	if (as_nr == KBASEP_AS_NR_INVALID) {
+		WARN(1, "Attempting to release context without ASID\n");
+		return;
+	}
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	js_per_as_data = &kbdev->js_data.runpool_irq.per_as_data[kctx->as_nr];
+	if (js_per_as_data->as_busy_refcount != 0) {
+		WARN(1, "Attempting to release active ASID\n");
+		return;
+	}
+
+	/* Release context from address space */
+	js_per_as_data->kctx = NULL;
+
+	kbasep_js_clear_submit_allowed(&kbdev->js_data, kctx);
+	/* If the GPU is currently powered, de-activate this address space on
+	 * the MMU */
+	if (kbdev->pm.backend.gpu_powered)
+		kbase_mmu_disable(kctx);
+	/* If the GPU was not powered then the MMU will be reprogrammed on the
+	 * next pm_context_active() */
+
+	release_addr_space(kbdev, as_nr, kctx);
+	kctx->as_nr = KBASEP_AS_NR_INVALID;
+}
+
+void kbase_backend_release_ctx_noirq(struct kbase_device *kbdev,
+						struct kbase_context *kctx)
+{
+}
+
+void kbase_backend_release_free_address_space(struct kbase_device *kbdev,
+								int as_nr)
+{
+	struct kbasep_js_device_data *js_devdata;
+
+	js_devdata = &kbdev->js_data;
+
+	lockdep_assert_held(&js_devdata->runpool_mutex);
+
+	js_devdata->as_free |= (1 << as_nr);
+}
+
+/**
+ * check_is_runpool_full - check whether the runpool is full for a specified
+ * context
+ * @kbdev: Kbase device
+ * @kctx:  Kbase context
+ *
+ * If kctx == NULL, then this makes the least restrictive check on the
+ * runpool. A specific context that is supplied immediately after could fail
+ * the check, even under the same conditions.
+ *
+ * Therefore, once a context is obtained you \b must re-check it with this
+ * function, since the return value could change to false.
+ *
+ * Context:
+ *   In all cases, the caller must hold kbasep_js_device_data.runpool_mutex.
+ *   When kctx != NULL the caller must hold the
+ *   kbasep_js_kctx_info.ctx.jsctx_mutex.
+ *   When kctx == NULL, then the caller need not hold any jsctx_mutex locks (but
+ *   it doesn't do any harm to do so).
+ *
+ * Return: true if the runpool is full
+ */
+static bool check_is_runpool_full(struct kbase_device *kbdev,
+						struct kbase_context *kctx)
+{
+	struct kbasep_js_device_data *js_devdata;
+	bool is_runpool_full;
+
+	js_devdata = &kbdev->js_data;
+	lockdep_assert_held(&js_devdata->runpool_mutex);
+
+	/* Regardless of whether a context is submitting or not, can't have more
+	 * than there are HW address spaces */
+	is_runpool_full = (bool) (js_devdata->nr_all_contexts_running >=
+						kbdev->nr_hw_address_spaces);
+
+	if (kctx && !kbase_ctx_flag(kctx, KCTX_SUBMIT_DISABLED)) {
+		lockdep_assert_held(&kctx->jctx.sched_info.ctx.jsctx_mutex);
+		/* Contexts that submit might use less of the address spaces
+		 * available, due to HW workarounds.  In which case, the runpool
+		 * is also full when the number of submitting contexts exceeds
+		 * the number of submittable address spaces.
+		 *
+		 * Both checks must be made: can have nr_user_address_spaces ==
+		 * nr_hw_address spaces, and at the same time can have
+		 * nr_user_contexts_running < nr_all_contexts_running. */
+		is_runpool_full |= (bool)
+					(js_devdata->nr_user_contexts_running >=
+						kbdev->nr_user_address_spaces);
+	}
+
+	return is_runpool_full;
+}
+
+int kbase_backend_find_free_address_space(struct kbase_device *kbdev,
+						struct kbase_context *kctx)
+{
+	struct kbasep_js_device_data *js_devdata;
+	struct kbasep_js_kctx_info *js_kctx_info;
+	unsigned long flags;
+	int i;
+
+	js_devdata = &kbdev->js_data;
+	js_kctx_info = &kctx->jctx.sched_info;
+
+	mutex_lock(&js_kctx_info->ctx.jsctx_mutex);
+	mutex_lock(&js_devdata->runpool_mutex);
+
+	/* First try to find a free address space */
+	if (check_is_runpool_full(kbdev, kctx))
+		i = -1;
+	else
+		i = ffs(js_devdata->as_free) - 1;
+
+	if (i >= 0 && i < kbdev->nr_hw_address_spaces) {
+		js_devdata->as_free &= ~(1 << i);
+
+		mutex_unlock(&js_devdata->runpool_mutex);
+		mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
+
+		return i;
+	}
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+
+	/* No address space currently free, see if we can release one */
+	for (i = 0; i < kbdev->nr_hw_address_spaces; i++) {
+		struct kbasep_js_per_as_data *js_per_as_data;
+		struct kbasep_js_kctx_info *as_js_kctx_info;
+		struct kbase_context *as_kctx;
+
+		js_per_as_data = &kbdev->js_data.runpool_irq.per_as_data[i];
+		as_kctx = js_per_as_data->kctx;
+		as_js_kctx_info = &as_kctx->jctx.sched_info;
+
+		/* Don't release privileged or active contexts, or contexts with
+		 * jobs running */
+		if (as_kctx && !kbase_ctx_flag(as_kctx, KCTX_PRIVILEGED) &&
+			js_per_as_data->as_busy_refcount == 0) {
+			if (!kbasep_js_runpool_retain_ctx_nolock(kbdev,
+								as_kctx)) {
+				WARN(1, "Failed to retain active context\n");
+
+				spin_unlock_irqrestore(&kbdev->hwaccess_lock,
+						flags);
+				mutex_unlock(&js_devdata->runpool_mutex);
+				mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
+
+				return KBASEP_AS_NR_INVALID;
+			}
+
+			kbasep_js_clear_submit_allowed(js_devdata, as_kctx);
+
+			/* Drop and retake locks to take the jsctx_mutex on the
+			 * context we're about to release without violating lock
+			 * ordering
+			 */
+			spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+			mutex_unlock(&js_devdata->runpool_mutex);
+			mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
+
+
+			/* Release context from address space */
+			mutex_lock(&as_js_kctx_info->ctx.jsctx_mutex);
+			mutex_lock(&js_devdata->runpool_mutex);
+
+			kbasep_js_runpool_release_ctx_nolock(kbdev, as_kctx);
+
+			if (!kbase_ctx_flag(as_kctx, KCTX_SCHEDULED)) {
+				kbasep_js_runpool_requeue_or_kill_ctx(kbdev,
+								as_kctx,
+								true);
+
+				js_devdata->as_free &= ~(1 << i);
+
+				mutex_unlock(&js_devdata->runpool_mutex);
+				mutex_unlock(&as_js_kctx_info->ctx.jsctx_mutex);
+
+				return i;
+			}
+
+			/* Context was retained while locks were dropped,
+			 * continue looking for free AS */
+
+			mutex_unlock(&js_devdata->runpool_mutex);
+			mutex_unlock(&as_js_kctx_info->ctx.jsctx_mutex);
+
+			mutex_lock(&js_kctx_info->ctx.jsctx_mutex);
+			mutex_lock(&js_devdata->runpool_mutex);
+			spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+		}
+	}
+
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+	mutex_unlock(&js_devdata->runpool_mutex);
+	mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
+
+	return KBASEP_AS_NR_INVALID;
+}
+
+bool kbase_backend_use_ctx(struct kbase_device *kbdev,
+				struct kbase_context *kctx,
+				int as_nr)
+{
+	struct kbasep_js_device_data *js_devdata;
+	struct kbasep_js_kctx_info *js_kctx_info;
+	struct kbase_as *new_address_space = NULL;
+
+	js_devdata = &kbdev->js_data;
+	js_kctx_info = &kctx->jctx.sched_info;
+
+	if (kbdev->hwaccess.active_kctx == kctx ||
+	    kctx->as_nr != KBASEP_AS_NR_INVALID ||
+	    as_nr == KBASEP_AS_NR_INVALID) {
+		WARN(1, "Invalid parameters to use_ctx()\n");
+		return false;
+	}
+
+	new_address_space = &kbdev->as[as_nr];
+
+	lockdep_assert_held(&js_devdata->runpool_mutex);
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	assign_and_activate_kctx_addr_space(kbdev, kctx, new_address_space);
+
+	if (kbase_ctx_flag(kctx, KCTX_PRIVILEGED)) {
+		/* We need to retain it to keep the corresponding address space
+		 */
+		kbasep_js_runpool_retain_ctx_nolock(kbdev, kctx);
+	}
+
+	return true;
+}
+
--- a/drivers/gpu/arm/midgard_for_linux/backend/gpu/mali_kbase_jm_defs.h
+++ b/drivers/gpu/arm/midgard_for_linux/backend/gpu/mali_kbase_jm_defs.h
@ -0,0 +1,123 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+/*
+ * Register-based HW access backend specific definitions
+ */
+
+#ifndef _KBASE_HWACCESS_GPU_DEFS_H_
+#define _KBASE_HWACCESS_GPU_DEFS_H_
+
+/* SLOT_RB_SIZE must be < 256 */
+#define SLOT_RB_SIZE 2
+#define SLOT_RB_MASK (SLOT_RB_SIZE - 1)
+
+/**
+ * struct rb_entry - Ringbuffer entry
+ * @katom:	Atom associated with this entry
+ */
+struct rb_entry {
+	struct kbase_jd_atom *katom;
+};
+
+/**
+ * struct slot_rb - Slot ringbuffer
+ * @entries:		Ringbuffer entries
+ * @last_context:	The last context to submit a job on this slot
+ * @read_idx:		Current read index of buffer
+ * @write_idx:		Current write index of buffer
+ * @job_chain_flag:	Flag used to implement jobchain disambiguation
+ */
+struct slot_rb {
+	struct rb_entry entries[SLOT_RB_SIZE];
+
+	struct kbase_context *last_context;
+
+	u8 read_idx;
+	u8 write_idx;
+
+	u8 job_chain_flag;
+};
+
+/**
+ * struct kbase_backend_data - GPU backend specific data for HW access layer
+ * @slot_rb:			Slot ringbuffers
+ * @rmu_workaround_flag:	When PRLAM-8987 is present, this flag determines
+ *				whether slots 0/1 or slot 2 are currently being
+ *				pulled from
+ * @scheduling_timer:		The timer tick used for rescheduling jobs
+ * @timer_running:		Is the timer running? The runpool_mutex must be
+ *				held whilst modifying this.
+ * @suspend_timer:              Is the timer suspended? Set when a suspend
+ *                              occurs and cleared on resume. The runpool_mutex
+ *                              must be held whilst modifying this.
+ * @reset_gpu:			Set to a KBASE_RESET_xxx value (see comments)
+ * @reset_workq:		Work queue for performing the reset
+ * @reset_work:			Work item for performing the reset
+ * @reset_wait:			Wait event signalled when the reset is complete
+ * @reset_timer:		Timeout for soft-stops before the reset
+ * @timeouts_updated:           Have timeout values just been updated?
+ *
+ * The hwaccess_lock (a spinlock) must be held when accessing this structure
+ */
+struct kbase_backend_data {
+	struct slot_rb slot_rb[BASE_JM_MAX_NR_SLOTS];
+
+	bool rmu_workaround_flag;
+
+	struct hrtimer scheduling_timer;
+
+	bool timer_running;
+	bool suspend_timer;
+
+	atomic_t reset_gpu;
+
+/* The GPU reset isn't pending */
+#define KBASE_RESET_GPU_NOT_PENDING     0
+/* kbase_prepare_to_reset_gpu has been called */
+#define KBASE_RESET_GPU_PREPARED        1
+/* kbase_reset_gpu has been called - the reset will now definitely happen
+ * within the timeout period */
+#define KBASE_RESET_GPU_COMMITTED       2
+/* The GPU reset process is currently occuring (timeout has expired or
+ * kbasep_try_reset_gpu_early was called) */
+#define KBASE_RESET_GPU_HAPPENING       3
+/* Reset the GPU silently, used when resetting the GPU as part of normal
+ * behavior (e.g. when exiting protected mode). */
+#define KBASE_RESET_GPU_SILENT          4
+	struct workqueue_struct *reset_workq;
+	struct work_struct reset_work;
+	wait_queue_head_t reset_wait;
+	struct hrtimer reset_timer;
+
+	bool timeouts_updated;
+};
+
+/**
+ * struct kbase_jd_atom_backend - GPU backend specific katom data
+ */
+struct kbase_jd_atom_backend {
+};
+
+/**
+ * struct kbase_context_backend - GPU backend specific context data
+ */
+struct kbase_context_backend {
+};
+
+#endif /* _KBASE_HWACCESS_GPU_DEFS_H_ */
--- a/drivers/gpu/arm/midgard_for_linux/backend/gpu/mali_kbase_jm_hw.c
+++ b/drivers/gpu/arm/midgard_for_linux/backend/gpu/mali_kbase_jm_hw.c
--- a/drivers/gpu/arm/midgard_for_linux/backend/gpu/mali_kbase_jm_internal.h
+++ b/drivers/gpu/arm/midgard_for_linux/backend/gpu/mali_kbase_jm_internal.h
@ -0,0 +1,155 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/*
+ * Job Manager backend-specific low-level APIs.
+ */
+
+#ifndef _KBASE_JM_HWACCESS_H_
+#define _KBASE_JM_HWACCESS_H_
+
+#include <mali_kbase_hw.h>
+#include <mali_kbase_debug.h>
+#include <linux/atomic.h>
+
+#include <backend/gpu/mali_kbase_jm_rb.h>
+
+/**
+ * kbase_job_submit_nolock() - Submit a job to a certain job-slot
+ * @kbdev:	Device pointer
+ * @katom:	Atom to submit
+ * @js:		Job slot to submit on
+ *
+ * The caller must check kbasep_jm_is_submit_slots_free() != false before
+ * calling this.
+ *
+ * The following locking conditions are made on the caller:
+ * - it must hold the hwaccess_lock
+ */
+void kbase_job_submit_nolock(struct kbase_device *kbdev,
+					struct kbase_jd_atom *katom, int js);
+
+/**
+ * kbase_job_done_slot() - Complete the head job on a particular job-slot
+ * @kbdev:		Device pointer
+ * @s:			Job slot
+ * @completion_code:	Completion code of job reported by GPU
+ * @job_tail:		Job tail address reported by GPU
+ * @end_timestamp:	Timestamp of job completion
+ */
+void kbase_job_done_slot(struct kbase_device *kbdev, int s, u32 completion_code,
+					u64 job_tail, ktime_t *end_timestamp);
+
+#ifdef CONFIG_GPU_TRACEPOINTS
+static inline char *kbasep_make_job_slot_string(int js, char *js_string)
+{
+	sprintf(js_string, "job_slot_%i", js);
+	return js_string;
+}
+#endif
+
+/**
+ * kbase_job_hw_submit() - Submit a job to the GPU
+ * @kbdev:	Device pointer
+ * @katom:	Atom to submit
+ * @js:		Job slot to submit on
+ *
+ * The caller must check kbasep_jm_is_submit_slots_free() != false before
+ * calling this.
+ *
+ * The following locking conditions are made on the caller:
+ * - it must hold the hwaccess_lock
+ */
+void kbase_job_hw_submit(struct kbase_device *kbdev,
+				struct kbase_jd_atom *katom,
+				int js);
+
+/**
+ * kbasep_job_slot_soft_or_hard_stop_do_action() - Perform a soft or hard stop
+ *						   on the specified atom
+ * @kbdev:		Device pointer
+ * @js:			Job slot to stop on
+ * @action:		The action to perform, either JSn_COMMAND_HARD_STOP or
+ *			JSn_COMMAND_SOFT_STOP
+ * @core_reqs:		Core requirements of atom to stop
+ * @target_katom:	Atom to stop
+ *
+ * The following locking conditions are made on the caller:
+ * - it must hold the hwaccess_lock
+ */
+void kbasep_job_slot_soft_or_hard_stop_do_action(struct kbase_device *kbdev,
+					int js,
+					u32 action,
+					base_jd_core_req core_reqs,
+					struct kbase_jd_atom *target_katom);
+
+/**
+ * kbase_backend_soft_hard_stop_slot() - Soft or hard stop jobs on a given job
+ *					 slot belonging to a given context.
+ * @kbdev:	Device pointer
+ * @kctx:	Context pointer. May be NULL
+ * @katom:	Specific atom to stop. May be NULL
+ * @js:		Job slot to hard stop
+ * @action:	The action to perform, either JSn_COMMAND_HARD_STOP or
+ *		JSn_COMMAND_SOFT_STOP
+ *
+ * If no context is provided then all jobs on the slot will be soft or hard
+ * stopped.
+ *
+ * If a katom is provided then only that specific atom will be stopped. In this
+ * case the kctx parameter is ignored.
+ *
+ * Jobs that are on the slot but are not yet on the GPU will be unpulled and
+ * returned to the job scheduler.
+ *
+ * Return: true if an atom was stopped, false otherwise
+ */
+bool kbase_backend_soft_hard_stop_slot(struct kbase_device *kbdev,
+					struct kbase_context *kctx,
+					int js,
+					struct kbase_jd_atom *katom,
+					u32 action);
+
+/**
+ * kbase_job_slot_init - Initialise job slot framework
+ * @kbdev: Device pointer
+ *
+ * Called on driver initialisation
+ *
+ * Return: 0 on success
+ */
+int kbase_job_slot_init(struct kbase_device *kbdev);
+
+/**
+ * kbase_job_slot_halt - Halt the job slot framework
+ * @kbdev: Device pointer
+ *
+ * Should prevent any further job slot processing
+ */
+void kbase_job_slot_halt(struct kbase_device *kbdev);
+
+/**
+ * kbase_job_slot_term - Terminate job slot framework
+ * @kbdev: Device pointer
+ *
+ * Called on driver termination
+ */
+void kbase_job_slot_term(struct kbase_device *kbdev);
+
+#endif /* _KBASE_JM_HWACCESS_H_ */
--- a/drivers/gpu/arm/midgard_for_linux/backend/gpu/mali_kbase_jm_rb.c
+++ b/drivers/gpu/arm/midgard_for_linux/backend/gpu/mali_kbase_jm_rb.c
--- a/drivers/gpu/arm/midgard_for_linux/backend/gpu/mali_kbase_jm_rb.h
+++ b/drivers/gpu/arm/midgard_for_linux/backend/gpu/mali_kbase_jm_rb.h
@ -0,0 +1,76 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+/*
+ * Register-based HW access backend specific APIs
+ */
+
+#ifndef _KBASE_HWACCESS_GPU_H_
+#define _KBASE_HWACCESS_GPU_H_
+
+#include <backend/gpu/mali_kbase_pm_internal.h>
+
+/**
+ * kbase_gpu_irq_evict - Evict an atom from a NEXT slot
+ *
+ * @kbdev:         Device pointer
+ * @js:            Job slot to evict from
+ *
+ * Evict the atom in the NEXT slot for the specified job slot. This function is
+ * called from the job complete IRQ handler when the previous job has failed.
+ *
+ * Return: true if job evicted from NEXT registers, false otherwise
+ */
+bool kbase_gpu_irq_evict(struct kbase_device *kbdev, int js);
+
+/**
+ * kbase_gpu_complete_hw - Complete an atom on job slot js
+ *
+ * @kbdev:           Device pointer
+ * @js:              Job slot that has completed
+ * @completion_code: Event code from job that has completed
+ * @job_tail:        The tail address from the hardware if the job has partially
+ *                   completed
+ * @end_timestamp:   Time of completion
+ */
+void kbase_gpu_complete_hw(struct kbase_device *kbdev, int js,
+				u32 completion_code,
+				u64 job_tail,
+				ktime_t *end_timestamp);
+
+/**
+ * kbase_gpu_inspect - Inspect the contents of the HW access ringbuffer
+ *
+ * @kbdev:  Device pointer
+ * @js:     Job slot to inspect
+ * @idx:    Index into ringbuffer. 0 is the job currently running on
+ *          the slot, 1 is the job waiting, all other values are invalid.
+ * Return:  The atom at that position in the ringbuffer
+ *          or NULL if no atom present
+ */
+struct kbase_jd_atom *kbase_gpu_inspect(struct kbase_device *kbdev, int js,
+					int idx);
+
+/**
+ * kbase_gpu_dump_slots - Print the contents of the slot ringbuffers
+ *
+ * @kbdev:  Device pointer
+ */
+void kbase_gpu_dump_slots(struct kbase_device *kbdev);
+
+#endif /* _KBASE_HWACCESS_GPU_H_ */
--- a/drivers/gpu/arm/midgard_for_linux/backend/gpu/mali_kbase_js_affinity.c
+++ b/drivers/gpu/arm/midgard_for_linux/backend/gpu/mali_kbase_js_affinity.c
@ -0,0 +1,303 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/*
+ * Base kernel affinity manager APIs
+ */
+
+#include <mali_kbase.h>
+#include "mali_kbase_js_affinity.h"
+#include "mali_kbase_hw.h"
+
+#include <backend/gpu/mali_kbase_pm_internal.h>
+
+
+bool kbase_js_can_run_job_on_slot_no_lock(struct kbase_device *kbdev,
+									int js)
+{
+	/*
+	 * Here are the reasons for using job slot 2:
+	 * - BASE_HW_ISSUE_8987 (which is entirely used for that purpose)
+	 * - In absence of the above, then:
+	 *  - Atoms with BASE_JD_REQ_COHERENT_GROUP
+	 *  - But, only when there aren't contexts with
+	 *  KBASEP_JS_CTX_ATTR_COMPUTE_ALL_CORES, because the atoms that run on
+	 *  all cores on slot 1 could be blocked by those using a coherent group
+	 *  on slot 2
+	 *  - And, only when you actually have 2 or more coregroups - if you
+	 *  only have 1 coregroup, then having jobs for slot 2 implies they'd
+	 *  also be for slot 1, meaning you'll get interference from them. Jobs
+	 *  able to run on slot 2 could also block jobs that can only run on
+	 *  slot 1 (tiler jobs)
+	 */
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8987))
+		return true;
+
+	if (js != 2)
+		return true;
+
+	/* Only deal with js==2 now: */
+	if (kbdev->gpu_props.num_core_groups > 1) {
+		/* Only use slot 2 in the 2+ coregroup case */
+		if (kbasep_js_ctx_attr_is_attr_on_runpool(kbdev,
+					KBASEP_JS_CTX_ATTR_COMPUTE_ALL_CORES) ==
+								false) {
+			/* ...But only when we *don't* have atoms that run on
+			 * all cores */
+
+			/* No specific check for BASE_JD_REQ_COHERENT_GROUP
+			 * atoms - the policy will sort that out */
+			return true;
+		}
+	}
+
+	/* Above checks failed mean we shouldn't use slot 2 */
+	return false;
+}
+
+/*
+ * As long as it has been decided to have a deeper modification of
+ * what job scheduler, power manager and affinity manager will
+ * implement, this function is just an intermediate step that
+ * assumes:
+ * - all working cores will be powered on when this is called.
+ * - largest current configuration is 2 core groups.
+ * - It has been decided not to have hardcoded values so the low
+ *   and high cores in a core split will be evently distributed.
+ * - Odd combinations of core requirements have been filtered out
+ *   and do not get to this function (e.g. CS+T+NSS is not
+ *   supported here).
+ * - This function is frequently called and can be optimized,
+ *   (see notes in loops), but as the functionallity will likely
+ *   be modified, optimization has not been addressed.
+*/
+bool kbase_js_choose_affinity(u64 * const affinity,
+					struct kbase_device *kbdev,
+					struct kbase_jd_atom *katom, int js)
+{
+	base_jd_core_req core_req = katom->core_req;
+	unsigned int num_core_groups = kbdev->gpu_props.num_core_groups;
+	u64 core_availability_mask;
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	core_availability_mask = kbase_pm_ca_get_core_mask(kbdev);
+
+	/*
+	 * If no cores are currently available (core availability policy is
+	 * transitioning) then fail.
+	 */
+	if (0 == core_availability_mask) {
+		*affinity = 0;
+		return false;
+	}
+
+	KBASE_DEBUG_ASSERT(js >= 0);
+
+	if ((core_req & (BASE_JD_REQ_FS | BASE_JD_REQ_CS | BASE_JD_REQ_T)) ==
+								BASE_JD_REQ_T) {
+		 /* If the hardware supports XAFFINITY then we'll only enable
+		  * the tiler (which is the default so this is a no-op),
+		  * otherwise enable shader core 0. */
+		if (!kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_XAFFINITY))
+			*affinity = 1;
+		else
+			*affinity = 0;
+
+		return true;
+	}
+
+	if (1 == kbdev->gpu_props.num_cores) {
+		/* trivial case only one core, nothing to do */
+		*affinity = core_availability_mask &
+				kbdev->pm.debug_core_mask[js];
+	} else {
+		if ((core_req & (BASE_JD_REQ_COHERENT_GROUP |
+					BASE_JD_REQ_SPECIFIC_COHERENT_GROUP))) {
+			if (js == 0 || num_core_groups == 1) {
+				/* js[0] and single-core-group systems just get
+				 * the first core group */
+				*affinity =
+				kbdev->gpu_props.props.coherency_info.group[0].core_mask
+						& core_availability_mask &
+						kbdev->pm.debug_core_mask[js];
+			} else {
+				/* js[1], js[2] use core groups 0, 1 for
+				 * dual-core-group systems */
+				u32 core_group_idx = ((u32) js) - 1;
+
+				KBASE_DEBUG_ASSERT(core_group_idx <
+							num_core_groups);
+				*affinity =
+				kbdev->gpu_props.props.coherency_info.group[core_group_idx].core_mask
+						& core_availability_mask &
+						kbdev->pm.debug_core_mask[js];
+
+				/* If the job is specifically targeting core
+				 * group 1 and the core availability policy is
+				 * keeping that core group off, then fail */
+				if (*affinity == 0 && core_group_idx == 1 &&
+						kbdev->pm.backend.cg1_disabled
+								== true)
+					katom->event_code =
+							BASE_JD_EVENT_PM_EVENT;
+			}
+		} else {
+			/* All cores are available when no core split is
+			 * required */
+			*affinity = core_availability_mask &
+					kbdev->pm.debug_core_mask[js];
+		}
+	}
+
+	/*
+	 * If no cores are currently available in the desired core group(s)
+	 * (core availability policy is transitioning) then fail.
+	 */
+	if (*affinity == 0)
+		return false;
+
+	/* Enable core 0 if tiler required for hardware without XAFFINITY
+	 * support (notes above) */
+	if (core_req & BASE_JD_REQ_T) {
+		if (!kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_XAFFINITY))
+			*affinity = *affinity | 1;
+	}
+
+	return true;
+}
+
+static inline bool kbase_js_affinity_is_violating(
+						struct kbase_device *kbdev,
+								u64 *affinities)
+{
+	/* This implementation checks whether the two slots involved in Generic
+	 * thread creation have intersecting affinity. This is due to micro-
+	 * architectural issues where a job in slot A targetting cores used by
+	 * slot B could prevent the job in slot B from making progress until the
+	 * job in slot A has completed.
+	 */
+	u64 affinity_set_left;
+	u64 affinity_set_right;
+	u64 intersection;
+
+	KBASE_DEBUG_ASSERT(affinities != NULL);
+
+	affinity_set_left = affinities[1];
+
+	affinity_set_right = affinities[2];
+
+	/* A violation occurs when any bit in the left_set is also in the
+	 * right_set */
+	intersection = affinity_set_left & affinity_set_right;
+
+	return (bool) (intersection != (u64) 0u);
+}
+
+bool kbase_js_affinity_would_violate(struct kbase_device *kbdev, int js,
+								u64 affinity)
+{
+	struct kbasep_js_device_data *js_devdata;
+	u64 new_affinities[BASE_JM_MAX_NR_SLOTS];
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(js < BASE_JM_MAX_NR_SLOTS);
+	js_devdata = &kbdev->js_data;
+
+	memcpy(new_affinities, js_devdata->runpool_irq.slot_affinities,
+			sizeof(js_devdata->runpool_irq.slot_affinities));
+
+	new_affinities[js] |= affinity;
+
+	return kbase_js_affinity_is_violating(kbdev, new_affinities);
+}
+
+void kbase_js_affinity_retain_slot_cores(struct kbase_device *kbdev, int js,
+								u64 affinity)
+{
+	struct kbasep_js_device_data *js_devdata;
+	u64 cores;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(js < BASE_JM_MAX_NR_SLOTS);
+	js_devdata = &kbdev->js_data;
+
+	KBASE_DEBUG_ASSERT(kbase_js_affinity_would_violate(kbdev, js, affinity)
+								== false);
+
+	cores = affinity;
+	while (cores) {
+		int bitnum = fls64(cores) - 1;
+		u64 bit = 1ULL << bitnum;
+		s8 cnt;
+
+		cnt =
+		++(js_devdata->runpool_irq.slot_affinity_refcount[js][bitnum]);
+
+		if (cnt == 1)
+			js_devdata->runpool_irq.slot_affinities[js] |= bit;
+
+		cores &= ~bit;
+	}
+}
+
+void kbase_js_affinity_release_slot_cores(struct kbase_device *kbdev, int js,
+								u64 affinity)
+{
+	struct kbasep_js_device_data *js_devdata;
+	u64 cores;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(js < BASE_JM_MAX_NR_SLOTS);
+	js_devdata = &kbdev->js_data;
+
+	cores = affinity;
+	while (cores) {
+		int bitnum = fls64(cores) - 1;
+		u64 bit = 1ULL << bitnum;
+		s8 cnt;
+
+		KBASE_DEBUG_ASSERT(
+		js_devdata->runpool_irq.slot_affinity_refcount[js][bitnum] > 0);
+
+		cnt =
+		--(js_devdata->runpool_irq.slot_affinity_refcount[js][bitnum]);
+
+		if (0 == cnt)
+			js_devdata->runpool_irq.slot_affinities[js] &= ~bit;
+
+		cores &= ~bit;
+	}
+}
+
+#if KBASE_TRACE_ENABLE
+void kbase_js_debug_log_current_affinities(struct kbase_device *kbdev)
+{
+	struct kbasep_js_device_data *js_devdata;
+	int slot_nr;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	js_devdata = &kbdev->js_data;
+
+	for (slot_nr = 0; slot_nr < 3; ++slot_nr)
+		KBASE_TRACE_ADD_SLOT_INFO(kbdev, JS_AFFINITY_CURRENT, NULL,
+							NULL, 0u, slot_nr,
+			(u32) js_devdata->runpool_irq.slot_affinities[slot_nr]);
+}
+#endif				/* KBASE_TRACE_ENABLE  */
--- a/drivers/gpu/arm/midgard_for_linux/backend/gpu/mali_kbase_js_affinity.h
+++ b/drivers/gpu/arm/midgard_for_linux/backend/gpu/mali_kbase_js_affinity.h
@ -0,0 +1,129 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/*
+ * Affinity Manager internal APIs.
+ */
+
+#ifndef _KBASE_JS_AFFINITY_H_
+#define _KBASE_JS_AFFINITY_H_
+
+/**
+ * kbase_js_can_run_job_on_slot_no_lock - Decide whether it is possible to
+ * submit a job to a particular job slot in the current status
+ *
+ * @kbdev: The kbase device structure of the device
+ * @js:    Job slot number to check for allowance
+ *
+ * Will check if submitting to the given job slot is allowed in the current
+ * status.  For example using job slot 2 while in soft-stoppable state and only
+ * having 1 coregroup is not allowed by the policy. This function should be
+ * called prior to submitting a job to a slot to make sure policy rules are not
+ * violated.
+ *
+ * The following locking conditions are made on the caller
+ * - it must hold hwaccess_lock
+ */
+bool kbase_js_can_run_job_on_slot_no_lock(struct kbase_device *kbdev, int js);
+
+/**
+ * kbase_js_choose_affinity - Compute affinity for a given job.
+ *
+ * @affinity: Affinity bitmap computed
+ * @kbdev:    The kbase device structure of the device
+ * @katom:    Job chain of which affinity is going to be found
+ * @js:       Slot the job chain is being submitted
+ *
+ * Currently assumes an all-on/all-off power management policy.
+ * Also assumes there is at least one core with tiler available.
+ *
+ * Returns true if a valid affinity was chosen, false if
+ * no cores were available.
+ */
+bool kbase_js_choose_affinity(u64 * const affinity,
+					struct kbase_device *kbdev,
+					struct kbase_jd_atom *katom,
+					int js);
+
+/**
+ * kbase_js_affinity_would_violate - Determine whether a proposed affinity on
+ * job slot @js would cause a violation of affinity restrictions.
+ *
+ * @kbdev:    Kbase device structure
+ * @js:       The job slot to test
+ * @affinity: The affinity mask to test
+ *
+ * The following locks must be held by the caller
+ * - hwaccess_lock
+ *
+ * Return: true if the affinity would violate the restrictions
+ */
+bool kbase_js_affinity_would_violate(struct kbase_device *kbdev, int js,
+								u64 affinity);
+
+/**
+ * kbase_js_affinity_retain_slot_cores - Affinity tracking: retain cores used by
+ *                                       a slot
+ *
+ * @kbdev:    Kbase device structure
+ * @js:       The job slot retaining the cores
+ * @affinity: The cores to retain
+ *
+ * The following locks must be held by the caller
+ * - hwaccess_lock
+ */
+void kbase_js_affinity_retain_slot_cores(struct kbase_device *kbdev, int js,
+								u64 affinity);
+
+/**
+ * kbase_js_affinity_release_slot_cores - Affinity tracking: release cores used
+ *                                        by a slot
+ *
+ * @kbdev:    Kbase device structure
+ * @js:       Job slot
+ * @affinity: Bit mask of core to be released
+ *
+ * Cores must be released as soon as a job is dequeued from a slot's 'submit
+ * slots', and before another job is submitted to those slots. Otherwise, the
+ * refcount could exceed the maximum number submittable to a slot,
+ * %BASE_JM_SUBMIT_SLOTS.
+ *
+ * The following locks must be held by the caller
+ * - hwaccess_lock
+ */
+void kbase_js_affinity_release_slot_cores(struct kbase_device *kbdev, int js,
+								u64 affinity);
+
+/**
+ * kbase_js_debug_log_current_affinities - log the current affinities
+ *
+ * @kbdev:  Kbase device structure
+ *
+ * Output to the Trace log the current tracked affinities on all slots
+ */
+#if KBASE_TRACE_ENABLE
+void kbase_js_debug_log_current_affinities(struct kbase_device *kbdev);
+#else				/*  KBASE_TRACE_ENABLE  */
+static inline void
+kbase_js_debug_log_current_affinities(struct kbase_device *kbdev)
+{
+}
+#endif				/*  KBASE_TRACE_ENABLE  */
+
+#endif				/* _KBASE_JS_AFFINITY_H_ */
--- a/drivers/gpu/arm/midgard_for_linux/backend/gpu/mali_kbase_js_backend.c
+++ b/drivers/gpu/arm/midgard_for_linux/backend/gpu/mali_kbase_js_backend.c
@ -0,0 +1,357 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+/*
+ * Register-based HW access backend specific job scheduler APIs
+ */
+
+#include <mali_kbase.h>
+#include <mali_kbase_hwaccess_jm.h>
+#include <backend/gpu/mali_kbase_jm_internal.h>
+#include <backend/gpu/mali_kbase_js_internal.h>
+
+/*
+ * Define for when dumping is enabled.
+ * This should not be based on the instrumentation level as whether dumping is
+ * enabled for a particular level is down to the integrator. However this is
+ * being used for now as otherwise the cinstr headers would be needed.
+ */
+#define CINSTR_DUMPING_ENABLED (2 == MALI_INSTRUMENTATION_LEVEL)
+
+/*
+ * Hold the runpool_mutex for this
+ */
+static inline bool timer_callback_should_run(struct kbase_device *kbdev)
+{
+	struct kbase_backend_data *backend = &kbdev->hwaccess.backend;
+	s8 nr_running_ctxs;
+
+	lockdep_assert_held(&kbdev->js_data.runpool_mutex);
+
+	/* Timer must stop if we are suspending */
+	if (backend->suspend_timer)
+		return false;
+
+	/* nr_contexts_pullable is updated with the runpool_mutex. However, the
+	 * locking in the caller gives us a barrier that ensures
+	 * nr_contexts_pullable is up-to-date for reading */
+	nr_running_ctxs = atomic_read(&kbdev->js_data.nr_contexts_runnable);
+
+#ifdef CONFIG_MALI_DEBUG
+	if (kbdev->js_data.softstop_always) {
+		/* Debug support for allowing soft-stop on a single context */
+		return true;
+	}
+#endif				/* CONFIG_MALI_DEBUG */
+
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_9435)) {
+		/* Timeouts would have to be 4x longer (due to micro-
+		 * architectural design) to support OpenCL conformance tests, so
+		 * only run the timer when there's:
+		 * - 2 or more CL contexts
+		 * - 1 or more GLES contexts
+		 *
+		 * NOTE: We will treat a context that has both Compute and Non-
+		 * Compute jobs will be treated as an OpenCL context (hence, we
+		 * don't check KBASEP_JS_CTX_ATTR_NON_COMPUTE).
+		 */
+		{
+			s8 nr_compute_ctxs =
+				kbasep_js_ctx_attr_count_on_runpool(kbdev,
+						KBASEP_JS_CTX_ATTR_COMPUTE);
+			s8 nr_noncompute_ctxs = nr_running_ctxs -
+							nr_compute_ctxs;
+
+			return (bool) (nr_compute_ctxs >= 2 ||
+							nr_noncompute_ctxs > 0);
+		}
+	} else {
+		/* Run the timer callback whenever you have at least 1 context
+		 */
+		return (bool) (nr_running_ctxs > 0);
+	}
+}
+
+static enum hrtimer_restart timer_callback(struct hrtimer *timer)
+{
+	unsigned long flags;
+	struct kbase_device *kbdev;
+	struct kbasep_js_device_data *js_devdata;
+	struct kbase_backend_data *backend;
+	int s;
+	bool reset_needed = false;
+
+	KBASE_DEBUG_ASSERT(timer != NULL);
+
+	backend = container_of(timer, struct kbase_backend_data,
+							scheduling_timer);
+	kbdev = container_of(backend, struct kbase_device, hwaccess.backend);
+	js_devdata = &kbdev->js_data;
+
+	/* Loop through the slots */
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	for (s = 0; s < kbdev->gpu_props.num_job_slots; s++) {
+		struct kbase_jd_atom *atom = NULL;
+
+		if (kbase_backend_nr_atoms_on_slot(kbdev, s) > 0) {
+			atom = kbase_gpu_inspect(kbdev, s, 0);
+			KBASE_DEBUG_ASSERT(atom != NULL);
+		}
+
+		if (atom != NULL) {
+			/* The current version of the model doesn't support
+			 * Soft-Stop */
+			if (!kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_5736)) {
+				u32 ticks = atom->sched_info.cfs.ticks++;
+
+#if !CINSTR_DUMPING_ENABLED
+				u32 soft_stop_ticks, hard_stop_ticks,
+								gpu_reset_ticks;
+				if (atom->core_req & BASE_JD_REQ_ONLY_COMPUTE) {
+					soft_stop_ticks =
+						js_devdata->soft_stop_ticks_cl;
+					hard_stop_ticks =
+						js_devdata->hard_stop_ticks_cl;
+					gpu_reset_ticks =
+						js_devdata->gpu_reset_ticks_cl;
+				} else {
+					soft_stop_ticks =
+						js_devdata->soft_stop_ticks;
+					hard_stop_ticks =
+						js_devdata->hard_stop_ticks_ss;
+					gpu_reset_ticks =
+						js_devdata->gpu_reset_ticks_ss;
+				}
+
+				/* If timeouts have been changed then ensure
+				 * that atom tick count is not greater than the
+				 * new soft_stop timeout. This ensures that
+				 * atoms do not miss any of the timeouts due to
+				 * races between this worker and the thread
+				 * changing the timeouts. */
+				if (backend->timeouts_updated &&
+						ticks > soft_stop_ticks)
+					ticks = atom->sched_info.cfs.ticks =
+							soft_stop_ticks;
+
+				/* Job is Soft-Stoppable */
+				if (ticks == soft_stop_ticks) {
+					int disjoint_threshold =
+		KBASE_DISJOINT_STATE_INTERLEAVED_CONTEXT_COUNT_THRESHOLD;
+					u32 softstop_flags = 0u;
+					/* Job has been scheduled for at least
+					 * js_devdata->soft_stop_ticks ticks.
+					 * Soft stop the slot so we can run
+					 * other jobs.
+					 */
+					dev_dbg(kbdev->dev, "Soft-stop");
+#if !KBASE_DISABLE_SCHEDULING_SOFT_STOPS
+					/* nr_user_contexts_running is updated
+					 * with the runpool_mutex, but we can't
+					 * take that here.
+					 *
+					 * However, if it's about to be
+					 * increased then the new context can't
+					 * run any jobs until they take the
+					 * hwaccess_lock, so it's OK to observe
+					 * the older value.
+					 *
+					 * Similarly, if it's about to be
+					 * decreased, the last job from another
+					 * context has already finished, so it's
+					 * not too bad that we observe the older
+					 * value and register a disjoint event
+					 * when we try soft-stopping */
+					if (js_devdata->nr_user_contexts_running
+							>= disjoint_threshold)
+						softstop_flags |=
+						JS_COMMAND_SW_CAUSES_DISJOINT;
+
+					kbase_job_slot_softstop_swflags(kbdev,
+						s, atom, softstop_flags);
+#endif
+				} else if (ticks == hard_stop_ticks) {
+					/* Job has been scheduled for at least
+					 * js_devdata->hard_stop_ticks_ss ticks.
+					 * It should have been soft-stopped by
+					 * now. Hard stop the slot.
+					 */
+#if !KBASE_DISABLE_SCHEDULING_HARD_STOPS
+					int ms =
+						js_devdata->scheduling_period_ns
+								/ 1000000u;
+					dev_warn(kbdev->dev, "JS: Job Hard-Stopped (took more than %lu ticks at %lu ms/tick)",
+							(unsigned long)ticks,
+							(unsigned long)ms);
+					kbase_job_slot_hardstop(atom->kctx, s,
+									atom);
+#endif
+				} else if (ticks == gpu_reset_ticks) {
+					/* Job has been scheduled for at least
+					 * js_devdata->gpu_reset_ticks_ss ticks.
+					 * It should have left the GPU by now.
+					 * Signal that the GPU needs to be
+					 * reset.
+					 */
+					reset_needed = true;
+				}
+#else				/* !CINSTR_DUMPING_ENABLED */
+				/* NOTE: During CINSTR_DUMPING_ENABLED, we use
+				 * the alternate timeouts, which makes the hard-
+				 * stop and GPU reset timeout much longer. We
+				 * also ensure that we don't soft-stop at all.
+				 */
+				if (ticks == js_devdata->soft_stop_ticks) {
+					/* Job has been scheduled for at least
+					 * js_devdata->soft_stop_ticks. We do
+					 * not soft-stop during
+					 * CINSTR_DUMPING_ENABLED, however.
+					 */
+					dev_dbg(kbdev->dev, "Soft-stop");
+				} else if (ticks ==
+					js_devdata->hard_stop_ticks_dumping) {
+					/* Job has been scheduled for at least
+					 * js_devdata->hard_stop_ticks_dumping
+					 * ticks. Hard stop the slot.
+					 */
+#if !KBASE_DISABLE_SCHEDULING_HARD_STOPS
+					int ms =
+						js_devdata->scheduling_period_ns
+								/ 1000000u;
+					dev_warn(kbdev->dev, "JS: Job Hard-Stopped (took more than %lu ticks at %lu ms/tick)",
+							(unsigned long)ticks,
+							(unsigned long)ms);
+					kbase_job_slot_hardstop(atom->kctx, s,
+									atom);
+#endif
+				} else if (ticks ==
+					js_devdata->gpu_reset_ticks_dumping) {
+					/* Job has been scheduled for at least
+					 * js_devdata->gpu_reset_ticks_dumping
+					 * ticks. It should have left the GPU by
+					 * now. Signal that the GPU needs to be
+					 * reset.
+					 */
+					reset_needed = true;
+				}
+#endif				/* !CINSTR_DUMPING_ENABLED */
+			}
+		}
+	}
+#if KBASE_GPU_RESET_EN
+	if (reset_needed) {
+		dev_err(kbdev->dev, "JS: Job has been on the GPU for too long (JS_RESET_TICKS_SS/DUMPING timeout hit). Issueing GPU soft-reset to resolve.");
+
+		if (kbase_prepare_to_reset_gpu_locked(kbdev))
+			kbase_reset_gpu_locked(kbdev);
+	}
+#endif /* KBASE_GPU_RESET_EN */
+	/* the timer is re-issued if there is contexts in the run-pool */
+
+	if (backend->timer_running)
+		hrtimer_start(&backend->scheduling_timer,
+			HR_TIMER_DELAY_NSEC(js_devdata->scheduling_period_ns),
+			HRTIMER_MODE_REL);
+
+	backend->timeouts_updated = false;
+
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+	return HRTIMER_NORESTART;
+}
+
+void kbase_backend_ctx_count_changed(struct kbase_device *kbdev)
+{
+	struct kbasep_js_device_data *js_devdata = &kbdev->js_data;
+	struct kbase_backend_data *backend = &kbdev->hwaccess.backend;
+	unsigned long flags;
+
+	lockdep_assert_held(&js_devdata->runpool_mutex);
+
+	if (!timer_callback_should_run(kbdev)) {
+		/* Take spinlock to force synchronisation with timer */
+		spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+		backend->timer_running = false;
+		spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+		/* From now on, return value of timer_callback_should_run() will
+		 * also cause the timer to not requeue itself. Its return value
+		 * cannot change, because it depends on variables updated with
+		 * the runpool_mutex held, which the caller of this must also
+		 * hold */
+		hrtimer_cancel(&backend->scheduling_timer);
+	}
+
+	if (timer_callback_should_run(kbdev) && !backend->timer_running) {
+		/* Take spinlock to force synchronisation with timer */
+		spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+		backend->timer_running = true;
+		spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+		hrtimer_start(&backend->scheduling_timer,
+			HR_TIMER_DELAY_NSEC(js_devdata->scheduling_period_ns),
+							HRTIMER_MODE_REL);
+
+		KBASE_TRACE_ADD(kbdev, JS_POLICY_TIMER_START, NULL, NULL, 0u,
+									0u);
+	}
+}
+
+int kbase_backend_timer_init(struct kbase_device *kbdev)
+{
+	struct kbase_backend_data *backend = &kbdev->hwaccess.backend;
+
+	hrtimer_init(&backend->scheduling_timer, CLOCK_MONOTONIC,
+							HRTIMER_MODE_REL);
+	backend->scheduling_timer.function = timer_callback;
+
+	backend->timer_running = false;
+
+	return 0;
+}
+
+void kbase_backend_timer_term(struct kbase_device *kbdev)
+{
+	struct kbase_backend_data *backend = &kbdev->hwaccess.backend;
+
+	hrtimer_cancel(&backend->scheduling_timer);
+}
+
+void kbase_backend_timer_suspend(struct kbase_device *kbdev)
+{
+	struct kbase_backend_data *backend = &kbdev->hwaccess.backend;
+
+	backend->suspend_timer = true;
+
+	kbase_backend_ctx_count_changed(kbdev);
+}
+
+void kbase_backend_timer_resume(struct kbase_device *kbdev)
+{
+	struct kbase_backend_data *backend = &kbdev->hwaccess.backend;
+
+	backend->suspend_timer = false;
+
+	kbase_backend_ctx_count_changed(kbdev);
+}
+
+void kbase_backend_timeouts_changed(struct kbase_device *kbdev)
+{
+	struct kbase_backend_data *backend = &kbdev->hwaccess.backend;
+
+	backend->timeouts_updated = true;
+}
+
--- a/drivers/gpu/arm/midgard_for_linux/backend/gpu/mali_kbase_js_internal.h
+++ b/drivers/gpu/arm/midgard_for_linux/backend/gpu/mali_kbase_js_internal.h
@ -0,0 +1,69 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+/*
+ * Register-based HW access backend specific job scheduler APIs
+ */
+
+#ifndef _KBASE_JS_BACKEND_H_
+#define _KBASE_JS_BACKEND_H_
+
+/**
+ * kbase_backend_timer_init() - Initialise the JS scheduling timer
+ * @kbdev:	Device pointer
+ *
+ * This function should be called at driver initialisation
+ *
+ * Return: 0 on success
+ */
+int kbase_backend_timer_init(struct kbase_device *kbdev);
+
+/**
+ * kbase_backend_timer_term() - Terminate the JS scheduling timer
+ * @kbdev:	Device pointer
+ *
+ * This function should be called at driver termination
+ */
+void kbase_backend_timer_term(struct kbase_device *kbdev);
+
+/**
+ * kbase_backend_timer_suspend - Suspend is happening, stop the JS scheduling
+ *                               timer
+ * @kbdev: Device pointer
+ *
+ * This function should be called on suspend, after the active count has reached
+ * zero. This is required as the timer may have been started on job submission
+ * to the job scheduler, but before jobs are submitted to the GPU.
+ *
+ * Caller must hold runpool_mutex.
+ */
+void kbase_backend_timer_suspend(struct kbase_device *kbdev);
+
+/**
+ * kbase_backend_timer_resume - Resume is happening, re-evaluate the JS
+ *                              scheduling timer
+ * @kbdev: Device pointer
+ *
+ * This function should be called on resume. Note that is is not guaranteed to
+ * re-start the timer, only evalute whether it should be re-started.
+ *
+ * Caller must hold runpool_mutex.
+ */
+void kbase_backend_timer_resume(struct kbase_device *kbdev);
+
+#endif /* _KBASE_JS_BACKEND_H_ */
--- a/drivers/gpu/arm/midgard_for_linux/backend/gpu/mali_kbase_mmu_hw_direct.c
+++ b/drivers/gpu/arm/midgard_for_linux/backend/gpu/mali_kbase_mmu_hw_direct.c
@ -0,0 +1,409 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+/* #define ENABLE_DEBUG_LOG */
+#include "../../platform/rk/custom_log.h"
+
+#include <linux/bitops.h>
+
+#include <mali_kbase.h>
+#include <mali_kbase_mem.h>
+#include <mali_kbase_mmu_hw.h>
+#include <mali_kbase_tlstream.h>
+#include <backend/gpu/mali_kbase_device_internal.h>
+#include <mali_kbase_as_fault_debugfs.h>
+
+static inline u64 lock_region(struct kbase_device *kbdev, u64 pfn,
+		u32 num_pages)
+{
+	u64 region;
+
+	/* can't lock a zero sized range */
+	KBASE_DEBUG_ASSERT(num_pages);
+
+	region = pfn << PAGE_SHIFT;
+	/*
+	 * fls returns (given the ASSERT above):
+	 * 1 .. 32
+	 *
+	 * 10 + fls(num_pages)
+	 * results in the range (11 .. 42)
+	 */
+
+	/* gracefully handle num_pages being zero */
+	if (0 == num_pages) {
+		region |= 11;
+	} else {
+		u8 region_width;
+
+		region_width = 10 + fls(num_pages);
+		if (num_pages != (1ul << (region_width - 11))) {
+			/* not pow2, so must go up to the next pow2 */
+			region_width += 1;
+		}
+		KBASE_DEBUG_ASSERT(region_width <= KBASE_LOCK_REGION_MAX_SIZE);
+		KBASE_DEBUG_ASSERT(region_width >= KBASE_LOCK_REGION_MIN_SIZE);
+		region |= region_width;
+	}
+
+	return region;
+}
+
+static int wait_ready(struct kbase_device *kbdev,
+		unsigned int as_nr, struct kbase_context *kctx)
+{
+	unsigned int max_loops = KBASE_AS_INACTIVE_MAX_LOOPS;
+	u32 val = kbase_reg_read(kbdev, MMU_AS_REG(as_nr, AS_STATUS), kctx);
+
+	/* Wait for the MMU status to indicate there is no active command, in
+	 * case one is pending. Do not log remaining register accesses. */
+	while (--max_loops && (val & AS_STATUS_AS_ACTIVE))
+		val = kbase_reg_read(kbdev, MMU_AS_REG(as_nr, AS_STATUS), NULL);
+
+	if (max_loops == 0) {
+		dev_err(kbdev->dev, "AS_ACTIVE bit stuck\n");
+		return -1;
+	}
+
+	/* If waiting in loop was performed, log last read value. */
+	if (KBASE_AS_INACTIVE_MAX_LOOPS - 1 > max_loops)
+		kbase_reg_read(kbdev, MMU_AS_REG(as_nr, AS_STATUS), kctx);
+
+	return 0;
+}
+
+static int write_cmd(struct kbase_device *kbdev, int as_nr, u32 cmd,
+		struct kbase_context *kctx)
+{
+	int status;
+
+	/* write AS_COMMAND when MMU is ready to accept another command */
+	status = wait_ready(kbdev, as_nr, kctx);
+	if (status == 0)
+		kbase_reg_write(kbdev, MMU_AS_REG(as_nr, AS_COMMAND), cmd,
+									kctx);
+
+	return status;
+}
+
+static void validate_protected_page_fault(struct kbase_device *kbdev,
+		struct kbase_context *kctx)
+{
+	/* GPUs which support (native) protected mode shall not report page
+	 * fault addresses unless it has protected debug mode and protected
+	 * debug mode is turned on */
+	u32 protected_debug_mode = 0;
+
+	if (!kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_PROTECTED_MODE))
+		return;
+
+	if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_PROTECTED_DEBUG_MODE)) {
+		protected_debug_mode = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(GPU_STATUS),
+				kctx) & GPU_DBGEN;
+	}
+
+	if (!protected_debug_mode) {
+		/* fault_addr should never be reported in protected mode.
+		 * However, we just continue by printing an error message */
+		dev_err(kbdev->dev, "Fault address reported in protected mode\n");
+	}
+}
+
+void kbase_mmu_interrupt(struct kbase_device *kbdev, u32 irq_stat)
+{
+	const int num_as = 16;
+	const int busfault_shift = MMU_PAGE_FAULT_FLAGS;
+	const int pf_shift = 0;
+	const unsigned long as_bit_mask = (1UL << num_as) - 1;
+	unsigned long flags;
+	u32 new_mask;
+	u32 tmp;
+
+	/* bus faults */
+	u32 bf_bits = (irq_stat >> busfault_shift) & as_bit_mask;
+	/* page faults (note: Ignore ASes with both pf and bf) */
+	u32 pf_bits = ((irq_stat >> pf_shift) & as_bit_mask) & ~bf_bits;
+
+	KBASE_DEBUG_ASSERT(NULL != kbdev);
+
+	/* remember current mask */
+	spin_lock_irqsave(&kbdev->mmu_mask_change, flags);
+	new_mask = kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_MASK), NULL);
+	/* mask interrupts for now */
+	kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_MASK), 0, NULL);
+	spin_unlock_irqrestore(&kbdev->mmu_mask_change, flags);
+
+	while (bf_bits | pf_bits) {
+		struct kbase_as *as;
+		int as_no;
+		struct kbase_context *kctx;
+
+		/*
+		 * the while logic ensures we have a bit set, no need to check
+		 * for not-found here
+		 */
+		as_no = ffs(bf_bits | pf_bits) - 1;
+		as = &kbdev->as[as_no];
+
+		/*
+		 * Refcount the kctx ASAP - it shouldn't disappear anyway, since
+		 * Bus/Page faults _should_ only occur whilst jobs are running,
+		 * and a job causing the Bus/Page fault shouldn't complete until
+		 * the MMU is updated
+		 */
+		kctx = kbasep_js_runpool_lookup_ctx(kbdev, as_no);
+		if (!kctx) {
+			E("fail to lookup ctx, to break out.");
+			break;
+		}
+
+
+		/* find faulting address */
+		as->fault_addr = kbase_reg_read(kbdev,
+						MMU_AS_REG(as_no,
+							AS_FAULTADDRESS_HI),
+						kctx);
+		as->fault_addr <<= 32;
+		as->fault_addr |= kbase_reg_read(kbdev,
+						MMU_AS_REG(as_no,
+							AS_FAULTADDRESS_LO),
+						kctx);
+
+		/* Mark the fault protected or not */
+		as->protected_mode = kbdev->protected_mode;
+
+		if (kbdev->protected_mode && as->fault_addr)
+		{
+			/* check if address reporting is allowed */
+			validate_protected_page_fault(kbdev, kctx);
+		}
+
+		/* report the fault to debugfs */
+		kbase_as_fault_debugfs_new(kbdev, as_no);
+
+		/* record the fault status */
+		as->fault_status = kbase_reg_read(kbdev,
+						  MMU_AS_REG(as_no,
+							AS_FAULTSTATUS),
+						  kctx);
+
+		/* find the fault type */
+		as->fault_type = (bf_bits & (1 << as_no)) ?
+				KBASE_MMU_FAULT_TYPE_BUS :
+				KBASE_MMU_FAULT_TYPE_PAGE;
+
+#ifdef CONFIG_MALI_GPU_MMU_AARCH64
+		as->fault_extra_addr = kbase_reg_read(kbdev,
+				MMU_AS_REG(as_no, AS_FAULTEXTRA_HI),
+				kctx);
+		as->fault_extra_addr <<= 32;
+		as->fault_extra_addr |= kbase_reg_read(kbdev,
+				MMU_AS_REG(as_no, AS_FAULTEXTRA_LO),
+				kctx);
+#endif /* CONFIG_MALI_GPU_MMU_AARCH64 */
+
+		if (kbase_as_has_bus_fault(as)) {
+			/* Mark bus fault as handled.
+			 * Note that a bus fault is processed first in case
+			 * where both a bus fault and page fault occur.
+			 */
+			bf_bits &= ~(1UL << as_no);
+
+			/* remove the queued BF (and PF) from the mask */
+			new_mask &= ~(MMU_BUS_ERROR(as_no) |
+					MMU_PAGE_FAULT(as_no));
+		} else {
+			/* Mark page fault as handled */
+			pf_bits &= ~(1UL << as_no);
+
+			/* remove the queued PF from the mask */
+			new_mask &= ~MMU_PAGE_FAULT(as_no);
+		}
+
+		/* Process the interrupt for this address space */
+		spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+		kbase_mmu_interrupt_process(kbdev, kctx, as);
+		spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+	}
+
+	/* reenable interrupts */
+	spin_lock_irqsave(&kbdev->mmu_mask_change, flags);
+	tmp = kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_MASK), NULL);
+	new_mask |= tmp;
+	kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_MASK), new_mask, NULL);
+	spin_unlock_irqrestore(&kbdev->mmu_mask_change, flags);
+}
+
+void kbase_mmu_hw_configure(struct kbase_device *kbdev, struct kbase_as *as,
+		struct kbase_context *kctx)
+{
+	struct kbase_mmu_setup *current_setup = &as->current_setup;
+	u32 transcfg = 0;
+
+#ifdef CONFIG_MALI_GPU_MMU_AARCH64
+	transcfg = current_setup->transcfg & 0xFFFFFFFFUL;
+
+	/* Set flag AS_TRANSCFG_PTW_MEMATTR_WRITE_BACK */
+	/* Clear PTW_MEMATTR bits */
+	transcfg &= ~AS_TRANSCFG_PTW_MEMATTR_MASK;
+	/* Enable correct PTW_MEMATTR bits */
+	transcfg |= AS_TRANSCFG_PTW_MEMATTR_WRITE_BACK;
+
+	if (kbdev->system_coherency == COHERENCY_ACE) {
+		/* Set flag AS_TRANSCFG_PTW_SH_OS (outer shareable) */
+		/* Clear PTW_SH bits */
+		transcfg = (transcfg & ~AS_TRANSCFG_PTW_SH_MASK);
+		/* Enable correct PTW_SH bits */
+		transcfg = (transcfg | AS_TRANSCFG_PTW_SH_OS);
+	}
+
+	kbase_reg_write(kbdev, MMU_AS_REG(as->number, AS_TRANSCFG_LO),
+			transcfg, kctx);
+	kbase_reg_write(kbdev, MMU_AS_REG(as->number, AS_TRANSCFG_HI),
+			(current_setup->transcfg >> 32) & 0xFFFFFFFFUL, kctx);
+
+#else /* CONFIG_MALI_GPU_MMU_AARCH64 */
+
+	if (kbdev->system_coherency == COHERENCY_ACE)
+		current_setup->transtab |= AS_TRANSTAB_LPAE_SHARE_OUTER;
+
+#endif /* CONFIG_MALI_GPU_MMU_AARCH64 */
+
+	kbase_reg_write(kbdev, MMU_AS_REG(as->number, AS_TRANSTAB_LO),
+			current_setup->transtab & 0xFFFFFFFFUL, kctx);
+	kbase_reg_write(kbdev, MMU_AS_REG(as->number, AS_TRANSTAB_HI),
+			(current_setup->transtab >> 32) & 0xFFFFFFFFUL, kctx);
+
+	kbase_reg_write(kbdev, MMU_AS_REG(as->number, AS_MEMATTR_LO),
+			current_setup->memattr & 0xFFFFFFFFUL, kctx);
+	kbase_reg_write(kbdev, MMU_AS_REG(as->number, AS_MEMATTR_HI),
+			(current_setup->memattr >> 32) & 0xFFFFFFFFUL, kctx);
+
+	kbase_tlstream_tl_attrib_as_config(as,
+			current_setup->transtab,
+			current_setup->memattr,
+			transcfg);
+
+	write_cmd(kbdev, as->number, AS_COMMAND_UPDATE, kctx);
+}
+
+int kbase_mmu_hw_do_operation(struct kbase_device *kbdev, struct kbase_as *as,
+		struct kbase_context *kctx, u64 vpfn, u32 nr, u32 op,
+		unsigned int handling_irq)
+{
+	int ret;
+
+	lockdep_assert_held(&kbdev->mmu_hw_mutex);
+
+	if (op == AS_COMMAND_UNLOCK) {
+		/* Unlock doesn't require a lock first */
+		ret = write_cmd(kbdev, as->number, AS_COMMAND_UNLOCK, kctx);
+	} else {
+		u64 lock_addr = lock_region(kbdev, vpfn, nr);
+
+		/* Lock the region that needs to be updated */
+		kbase_reg_write(kbdev, MMU_AS_REG(as->number, AS_LOCKADDR_LO),
+				lock_addr & 0xFFFFFFFFUL, kctx);
+		kbase_reg_write(kbdev, MMU_AS_REG(as->number, AS_LOCKADDR_HI),
+				(lock_addr >> 32) & 0xFFFFFFFFUL, kctx);
+		write_cmd(kbdev, as->number, AS_COMMAND_LOCK, kctx);
+
+		/* Run the MMU operation */
+		write_cmd(kbdev, as->number, op, kctx);
+
+		/* Wait for the flush to complete */
+		ret = wait_ready(kbdev, as->number, kctx);
+
+		if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_9630)) {
+			/* Issue an UNLOCK command to ensure that valid page
+			   tables are re-read by the GPU after an update.
+			   Note that, the FLUSH command should perform all the
+			   actions necessary, however the bus logs show that if
+			   multiple page faults occur within an 8 page region
+			   the MMU does not always re-read the updated page
+			   table entries for later faults or is only partially
+			   read, it subsequently raises the page fault IRQ for
+			   the same addresses, the unlock ensures that the MMU
+			   cache is flushed, so updates can be re-read.  As the
+			   region is now unlocked we need to issue 2 UNLOCK
+			   commands in order to flush the MMU/uTLB,
+			   see PRLAM-8812.
+			 */
+			write_cmd(kbdev, as->number, AS_COMMAND_UNLOCK, kctx);
+			write_cmd(kbdev, as->number, AS_COMMAND_UNLOCK, kctx);
+		}
+	}
+
+	return ret;
+}
+
+void kbase_mmu_hw_clear_fault(struct kbase_device *kbdev, struct kbase_as *as,
+		struct kbase_context *kctx, enum kbase_mmu_fault_type type)
+{
+	unsigned long flags;
+	u32 pf_bf_mask;
+
+	spin_lock_irqsave(&kbdev->mmu_mask_change, flags);
+
+	/*
+	 * A reset is in-flight and we're flushing the IRQ + bottom half
+	 * so don't update anything as it could race with the reset code.
+	 */
+	if (kbdev->irq_reset_flush)
+		goto unlock;
+
+	/* Clear the page (and bus fault IRQ as well in case one occurred) */
+	pf_bf_mask = MMU_PAGE_FAULT(as->number);
+	if (type == KBASE_MMU_FAULT_TYPE_BUS ||
+			type == KBASE_MMU_FAULT_TYPE_BUS_UNEXPECTED)
+		pf_bf_mask |= MMU_BUS_ERROR(as->number);
+
+	kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_CLEAR), pf_bf_mask, kctx);
+
+unlock:
+	spin_unlock_irqrestore(&kbdev->mmu_mask_change, flags);
+}
+
+void kbase_mmu_hw_enable_fault(struct kbase_device *kbdev, struct kbase_as *as,
+		struct kbase_context *kctx, enum kbase_mmu_fault_type type)
+{
+	unsigned long flags;
+	u32 irq_mask;
+
+	/* Enable the page fault IRQ (and bus fault IRQ as well in case one
+	 * occurred) */
+	spin_lock_irqsave(&kbdev->mmu_mask_change, flags);
+
+	/*
+	 * A reset is in-flight and we're flushing the IRQ + bottom half
+	 * so don't update anything as it could race with the reset code.
+	 */
+	if (kbdev->irq_reset_flush)
+		goto unlock;
+
+	irq_mask = kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_MASK), kctx) |
+			MMU_PAGE_FAULT(as->number);
+
+	if (type == KBASE_MMU_FAULT_TYPE_BUS ||
+			type == KBASE_MMU_FAULT_TYPE_BUS_UNEXPECTED)
+		irq_mask |= MMU_BUS_ERROR(as->number);
+
+	kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_MASK), irq_mask, kctx);
+
+unlock:
+	spin_unlock_irqrestore(&kbdev->mmu_mask_change, flags);
+}
--- a/drivers/gpu/arm/midgard_for_linux/backend/gpu/mali_kbase_mmu_hw_direct.h
+++ b/drivers/gpu/arm/midgard_for_linux/backend/gpu/mali_kbase_mmu_hw_direct.h
@ -0,0 +1,42 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/*
+ * Interface file for the direct implementation for MMU hardware access
+ *
+ * Direct MMU hardware interface
+ *
+ * This module provides the interface(s) that are required by the direct
+ * register access implementation of the MMU hardware interface
+ */
+
+#ifndef _MALI_KBASE_MMU_HW_DIRECT_H_
+#define _MALI_KBASE_MMU_HW_DIRECT_H_
+
+#include <mali_kbase_defs.h>
+
+/**
+ * kbase_mmu_interrupt - Process an MMU interrupt.
+ *
+ * Process the MMU interrupt that was reported by the &kbase_device.
+ *
+ * @kbdev:          kbase context to clear the fault from.
+ * @irq_stat:       Value of the MMU_IRQ_STATUS register
+ */
+void kbase_mmu_interrupt(struct kbase_device *kbdev, u32 irq_stat);
+
+#endif	/* _MALI_KBASE_MMU_HW_DIRECT_H_ */
--- a/drivers/gpu/arm/midgard_for_linux/backend/gpu/mali_kbase_pm_always_on.c
+++ b/drivers/gpu/arm/midgard_for_linux/backend/gpu/mali_kbase_pm_always_on.c
@ -0,0 +1,63 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/*
+ * "Always on" power management policy
+ */
+
+#include <mali_kbase.h>
+#include <mali_kbase_pm.h>
+
+static u64 always_on_get_core_mask(struct kbase_device *kbdev)
+{
+	return kbdev->gpu_props.props.raw_props.shader_present;
+}
+
+static bool always_on_get_core_active(struct kbase_device *kbdev)
+{
+	return true;
+}
+
+static void always_on_init(struct kbase_device *kbdev)
+{
+	CSTD_UNUSED(kbdev);
+}
+
+static void always_on_term(struct kbase_device *kbdev)
+{
+	CSTD_UNUSED(kbdev);
+}
+
+/*
+ * The struct kbase_pm_policy structure for the demand power policy.
+ *
+ * This is the static structure that defines the demand power policy's callback
+ * and name.
+ */
+const struct kbase_pm_policy kbase_pm_always_on_policy_ops = {
+	"always_on",			/* name */
+	always_on_init,			/* init */
+	always_on_term,			/* term */
+	always_on_get_core_mask,	/* get_core_mask */
+	always_on_get_core_active,	/* get_core_active */
+	0u,				/* flags */
+	KBASE_PM_POLICY_ID_ALWAYS_ON,	/* id */
+};
+
+KBASE_EXPORT_TEST_API(kbase_pm_always_on_policy_ops);
--- a/drivers/gpu/arm/midgard_for_linux/backend/gpu/mali_kbase_pm_always_on.h
+++ b/drivers/gpu/arm/midgard_for_linux/backend/gpu/mali_kbase_pm_always_on.h
@ -0,0 +1,77 @@
+
+/*
+ *
+ * (C) COPYRIGHT 2011-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/*
+ * "Always on" power management policy
+ */
+
+#ifndef MALI_KBASE_PM_ALWAYS_ON_H
+#define MALI_KBASE_PM_ALWAYS_ON_H
+
+/**
+ * DOC:
+ * The "Always on" power management policy has the following
+ * characteristics:
+ *
+ * - When KBase indicates that the GPU will be powered up, but we don't yet
+ *   know which Job Chains are to be run:
+ *    All Shader Cores are powered up, regardless of whether or not they will
+ *    be needed later.
+ *
+ * - When KBase indicates that a set of Shader Cores are needed to submit the
+ *   currently queued Job Chains:
+ *    All Shader Cores are kept powered, regardless of whether or not they will
+ *    be needed
+ *
+ * - When KBase indicates that the GPU need not be powered:
+ *    The Shader Cores are kept powered, regardless of whether or not they will
+ *    be needed. The GPU itself is also kept powered, even though it is not
+ *    needed.
+ *
+ * This policy is automatically overridden during system suspend: the desired
+ * core state is ignored, and the cores are forced off regardless of what the
+ * policy requests. After resuming from suspend, new changes to the desired
+ * core state made by the policy are honored.
+ *
+ * Note:
+ *
+ * - KBase indicates the GPU will be powered up when it has a User Process that
+ *   has just started to submit Job Chains.
+ *
+ * - KBase indicates the GPU need not be powered when all the Job Chains from
+ *   User Processes have finished, and it is waiting for a User Process to
+ *   submit some more Job Chains.
+ */
+
+/**
+ * struct kbasep_pm_policy_always_on - Private struct for policy instance data
+ * @dummy: unused dummy variable
+ *
+ * This contains data that is private to the particular power policy that is
+ * active.
+ */
+struct kbasep_pm_policy_always_on {
+	int dummy;
+};
+
+extern const struct kbase_pm_policy kbase_pm_always_on_policy_ops;
+
+#endif /* MALI_KBASE_PM_ALWAYS_ON_H */
+
--- a/drivers/gpu/arm/midgard_for_linux/backend/gpu/mali_kbase_pm_backend.c
+++ b/drivers/gpu/arm/midgard_for_linux/backend/gpu/mali_kbase_pm_backend.c
@ -0,0 +1,466 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+/*
+ * GPU backend implementation of base kernel power management APIs
+ */
+
+#include <mali_kbase.h>
+#include <mali_midg_regmap.h>
+#include <mali_kbase_config_defaults.h>
+#ifdef CONFIG_MALI_PLATFORM_DEVICETREE
+#include <linux/pm_runtime.h>
+#endif /* CONFIG_MALI_PLATFORM_DEVICETREE */
+
+#include <mali_kbase_pm.h>
+#include <mali_kbase_hwaccess_jm.h>
+#include <backend/gpu/mali_kbase_js_internal.h>
+#include <backend/gpu/mali_kbase_pm_internal.h>
+
+static void kbase_pm_gpu_poweroff_wait_wq(struct work_struct *data);
+
+void kbase_pm_register_access_enable(struct kbase_device *kbdev)
+{
+	struct kbase_pm_callback_conf *callbacks;
+
+	callbacks = (struct kbase_pm_callback_conf *)POWER_MANAGEMENT_CALLBACKS;
+
+	if (callbacks)
+		callbacks->power_on_callback(kbdev);
+
+	kbdev->pm.backend.gpu_powered = true;
+}
+
+void kbase_pm_register_access_disable(struct kbase_device *kbdev)
+{
+	struct kbase_pm_callback_conf *callbacks;
+
+	callbacks = (struct kbase_pm_callback_conf *)POWER_MANAGEMENT_CALLBACKS;
+
+	if (callbacks)
+		callbacks->power_off_callback(kbdev);
+
+	kbdev->pm.backend.gpu_powered = false;
+}
+
+int kbase_hwaccess_pm_init(struct kbase_device *kbdev)
+{
+	int ret = 0;
+	struct kbase_pm_callback_conf *callbacks;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	mutex_init(&kbdev->pm.lock);
+
+	kbdev->pm.backend.gpu_poweroff_wait_wq = alloc_workqueue("kbase_pm_poweroff_wait",
+			WQ_HIGHPRI | WQ_UNBOUND, 1);
+	if (!kbdev->pm.backend.gpu_poweroff_wait_wq)
+		return -ENOMEM;
+
+	INIT_WORK(&kbdev->pm.backend.gpu_poweroff_wait_work,
+			kbase_pm_gpu_poweroff_wait_wq);
+
+	kbdev->pm.backend.gpu_powered = false;
+	kbdev->pm.suspending = false;
+#ifdef CONFIG_MALI_DEBUG
+	kbdev->pm.backend.driver_ready_for_irqs = false;
+#endif /* CONFIG_MALI_DEBUG */
+	kbdev->pm.backend.gpu_in_desired_state = true;
+	init_waitqueue_head(&kbdev->pm.backend.gpu_in_desired_state_wait);
+
+	callbacks = (struct kbase_pm_callback_conf *)POWER_MANAGEMENT_CALLBACKS;
+	if (callbacks) {
+		kbdev->pm.backend.callback_power_on =
+					callbacks->power_on_callback;
+		kbdev->pm.backend.callback_power_off =
+					callbacks->power_off_callback;
+		kbdev->pm.backend.callback_power_suspend =
+					callbacks->power_suspend_callback;
+		kbdev->pm.backend.callback_power_resume =
+					callbacks->power_resume_callback;
+		kbdev->pm.callback_power_runtime_init =
+					callbacks->power_runtime_init_callback;
+		kbdev->pm.callback_power_runtime_term =
+					callbacks->power_runtime_term_callback;
+		kbdev->pm.backend.callback_power_runtime_on =
+					callbacks->power_runtime_on_callback;
+		kbdev->pm.backend.callback_power_runtime_off =
+					callbacks->power_runtime_off_callback;
+		kbdev->pm.backend.callback_power_runtime_idle =
+					callbacks->power_runtime_idle_callback;
+	} else {
+		kbdev->pm.backend.callback_power_on = NULL;
+		kbdev->pm.backend.callback_power_off = NULL;
+		kbdev->pm.backend.callback_power_suspend = NULL;
+		kbdev->pm.backend.callback_power_resume = NULL;
+		kbdev->pm.callback_power_runtime_init = NULL;
+		kbdev->pm.callback_power_runtime_term = NULL;
+		kbdev->pm.backend.callback_power_runtime_on = NULL;
+		kbdev->pm.backend.callback_power_runtime_off = NULL;
+		kbdev->pm.backend.callback_power_runtime_idle = NULL;
+	}
+
+	/* Initialise the metrics subsystem */
+	ret = kbasep_pm_metrics_init(kbdev);
+	if (ret)
+		return ret;
+
+	init_waitqueue_head(&kbdev->pm.backend.l2_powered_wait);
+	kbdev->pm.backend.l2_powered = 0;
+
+	init_waitqueue_head(&kbdev->pm.backend.reset_done_wait);
+	kbdev->pm.backend.reset_done = false;
+
+	init_waitqueue_head(&kbdev->pm.zero_active_count_wait);
+	kbdev->pm.active_count = 0;
+
+	spin_lock_init(&kbdev->pm.backend.gpu_cycle_counter_requests_lock);
+	spin_lock_init(&kbdev->pm.backend.gpu_powered_lock);
+
+	init_waitqueue_head(&kbdev->pm.backend.poweroff_wait);
+
+	if (kbase_pm_ca_init(kbdev) != 0)
+		goto workq_fail;
+
+	if (kbase_pm_policy_init(kbdev) != 0)
+		goto pm_policy_fail;
+
+	return 0;
+
+pm_policy_fail:
+	kbase_pm_ca_term(kbdev);
+workq_fail:
+	kbasep_pm_metrics_term(kbdev);
+	return -EINVAL;
+}
+
+void kbase_pm_do_poweron(struct kbase_device *kbdev, bool is_resume)
+{
+	lockdep_assert_held(&kbdev->pm.lock);
+
+	/* Turn clocks and interrupts on - no-op if we haven't done a previous
+	 * kbase_pm_clock_off() */
+	kbase_pm_clock_on(kbdev, is_resume);
+
+	/* Update core status as required by the policy */
+	KBASE_TIMELINE_PM_CHECKTRANS(kbdev,
+				SW_FLOW_PM_CHECKTRANS_PM_DO_POWERON_START);
+	kbase_pm_update_cores_state(kbdev);
+	KBASE_TIMELINE_PM_CHECKTRANS(kbdev,
+				SW_FLOW_PM_CHECKTRANS_PM_DO_POWERON_END);
+
+	/* NOTE: We don't wait to reach the desired state, since running atoms
+	 * will wait for that state to be reached anyway */
+}
+
+static void kbase_pm_gpu_poweroff_wait_wq(struct work_struct *data)
+{
+	struct kbase_device *kbdev = container_of(data, struct kbase_device,
+			pm.backend.gpu_poweroff_wait_work);
+	struct kbase_pm_device_data *pm = &kbdev->pm;
+	struct kbase_pm_backend_data *backend = &pm->backend;
+	struct kbasep_js_device_data *js_devdata = &kbdev->js_data;
+	unsigned long flags;
+
+/* rk_ext: adaption in DDK r14 for solution_1_for_glitch. */
+#define NOT_TO_WAIT_CORES_POWER_TRANSITIONS_BEFORE_POWER_OFF_GPU
+
+#ifdef NOT_TO_WAIT_CORES_POWER_TRANSITIONS_BEFORE_POWER_OFF_GPU
+#else
+	/* Wait for power transitions to complete. We do this with no locks held
+	 * so that we don't deadlock with any pending workqueues */
+	KBASE_TIMELINE_PM_CHECKTRANS(kbdev,
+				SW_FLOW_PM_CHECKTRANS_PM_DO_POWEROFF_START);
+	kbase_pm_check_transitions_sync(kbdev);
+	KBASE_TIMELINE_PM_CHECKTRANS(kbdev,
+				SW_FLOW_PM_CHECKTRANS_PM_DO_POWEROFF_END);
+#endif
+
+	mutex_lock(&js_devdata->runpool_mutex);
+	mutex_lock(&kbdev->pm.lock);
+
+	if (!backend->poweron_required) {
+		WARN_ON(kbdev->l2_available_bitmap ||
+				kbdev->shader_available_bitmap ||
+				kbdev->tiler_available_bitmap);
+
+		/* Consume any change-state events */
+		kbase_timeline_pm_check_handle_event(kbdev,
+					KBASE_TIMELINE_PM_EVENT_GPU_STATE_CHANGED);
+
+		/* Disable interrupts and turn the clock off */
+		if (!kbase_pm_clock_off(kbdev, backend->poweroff_is_suspend)) {
+			/*
+			 * Page/bus faults are pending, must drop locks to
+			 * process.  Interrupts are disabled so no more faults
+			 * should be generated at this point.
+			 */
+			mutex_unlock(&kbdev->pm.lock);
+			mutex_unlock(&js_devdata->runpool_mutex);
+			kbase_flush_mmu_wqs(kbdev);
+			mutex_lock(&js_devdata->runpool_mutex);
+			mutex_lock(&kbdev->pm.lock);
+
+			/* Turn off clock now that fault have been handled. We
+			 * dropped locks so poweron_required may have changed -
+			 * power back on if this is the case.*/
+			if (backend->poweron_required)
+				kbase_pm_clock_on(kbdev, false);
+			else
+				WARN_ON(!kbase_pm_clock_off(kbdev,
+						backend->poweroff_is_suspend));
+		}
+	}
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	backend->poweroff_wait_in_progress = false;
+	if (backend->poweron_required) {
+		backend->poweron_required = false;
+		kbase_pm_update_cores_state_nolock(kbdev);
+	}
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+	mutex_unlock(&kbdev->pm.lock);
+	mutex_unlock(&js_devdata->runpool_mutex);
+
+	wake_up(&kbdev->pm.backend.poweroff_wait);
+}
+
+void kbase_pm_do_poweroff(struct kbase_device *kbdev, bool is_suspend)
+{
+	unsigned long flags;
+
+	lockdep_assert_held(&kbdev->pm.lock);
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	if (!kbdev->pm.backend.poweroff_wait_in_progress) {
+		/* Force all cores off */
+		kbdev->pm.backend.desired_shader_state = 0;
+		kbdev->pm.backend.desired_tiler_state = 0;
+
+		/* Force all cores to be unavailable, in the situation where
+		 * transitions are in progress for some cores but not others,
+		 * and kbase_pm_check_transitions_nolock can not immediately
+		 * power off the cores */
+		kbdev->shader_available_bitmap = 0;
+		kbdev->tiler_available_bitmap = 0;
+		kbdev->l2_available_bitmap = 0;
+
+		kbdev->pm.backend.poweroff_wait_in_progress = true;
+		kbdev->pm.backend.poweroff_is_suspend = is_suspend;
+
+		spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+		/*Kick off wq here. Callers will have to wait*/
+		queue_work(kbdev->pm.backend.gpu_poweroff_wait_wq,
+				&kbdev->pm.backend.gpu_poweroff_wait_work);
+	} else {
+		spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+	}
+}
+
+static bool is_poweroff_in_progress(struct kbase_device *kbdev)
+{
+	bool ret;
+	unsigned long flags;
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	ret = (kbdev->pm.backend.poweroff_wait_in_progress == false);
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+	return ret;
+}
+
+void kbase_pm_wait_for_poweroff_complete(struct kbase_device *kbdev)
+{
+	wait_event_killable(kbdev->pm.backend.poweroff_wait,
+			is_poweroff_in_progress(kbdev));
+}
+
+int kbase_hwaccess_pm_powerup(struct kbase_device *kbdev,
+		unsigned int flags)
+{
+	struct kbasep_js_device_data *js_devdata = &kbdev->js_data;
+	unsigned long irq_flags;
+	int ret;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	mutex_lock(&js_devdata->runpool_mutex);
+	mutex_lock(&kbdev->pm.lock);
+
+	/* A suspend won't happen during startup/insmod */
+	KBASE_DEBUG_ASSERT(!kbase_pm_is_suspending(kbdev));
+
+	/* Power up the GPU, don't enable IRQs as we are not ready to receive
+	 * them. */
+	ret = kbase_pm_init_hw(kbdev, flags);
+	if (ret) {
+		mutex_unlock(&kbdev->pm.lock);
+		mutex_unlock(&js_devdata->runpool_mutex);
+		return ret;
+	}
+
+	kbasep_pm_read_present_cores(kbdev);
+
+	kbdev->pm.debug_core_mask_all = kbdev->pm.debug_core_mask[0] =
+			kbdev->pm.debug_core_mask[1] =
+			kbdev->pm.debug_core_mask[2] =
+			kbdev->gpu_props.props.raw_props.shader_present;
+
+	/* Pretend the GPU is active to prevent a power policy turning the GPU
+	 * cores off */
+	kbdev->pm.active_count = 1;
+
+	spin_lock_irqsave(&kbdev->pm.backend.gpu_cycle_counter_requests_lock,
+								irq_flags);
+	/* Ensure cycle counter is off */
+	kbdev->pm.backend.gpu_cycle_counter_requests = 0;
+	spin_unlock_irqrestore(
+			&kbdev->pm.backend.gpu_cycle_counter_requests_lock,
+								irq_flags);
+
+	/* We are ready to receive IRQ's now as power policy is set up, so
+	 * enable them now. */
+#ifdef CONFIG_MALI_DEBUG
+	spin_lock_irqsave(&kbdev->pm.backend.gpu_powered_lock, irq_flags);
+	kbdev->pm.backend.driver_ready_for_irqs = true;
+	spin_unlock_irqrestore(&kbdev->pm.backend.gpu_powered_lock, irq_flags);
+#endif
+	kbase_pm_enable_interrupts(kbdev);
+
+	/* Turn on the GPU and any cores needed by the policy */
+	kbase_pm_do_poweron(kbdev, false);
+	mutex_unlock(&kbdev->pm.lock);
+	mutex_unlock(&js_devdata->runpool_mutex);
+
+	/* Idle the GPU and/or cores, if the policy wants it to */
+	kbase_pm_context_idle(kbdev);
+
+	return 0;
+}
+
+void kbase_hwaccess_pm_halt(struct kbase_device *kbdev)
+{
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	mutex_lock(&kbdev->pm.lock);
+	kbase_pm_cancel_deferred_poweroff(kbdev);
+	kbase_pm_do_poweroff(kbdev, false);
+	mutex_unlock(&kbdev->pm.lock);
+}
+
+KBASE_EXPORT_TEST_API(kbase_hwaccess_pm_halt);
+
+void kbase_hwaccess_pm_term(struct kbase_device *kbdev)
+{
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(kbdev->pm.active_count == 0);
+	KBASE_DEBUG_ASSERT(kbdev->pm.backend.gpu_cycle_counter_requests == 0);
+
+	/* Free any resources the policy allocated */
+	kbase_pm_policy_term(kbdev);
+	kbase_pm_ca_term(kbdev);
+
+	/* Shut down the metrics subsystem */
+	kbasep_pm_metrics_term(kbdev);
+
+	destroy_workqueue(kbdev->pm.backend.gpu_poweroff_wait_wq);
+}
+
+void kbase_pm_power_changed(struct kbase_device *kbdev)
+{
+	bool cores_are_available;
+	unsigned long flags;
+
+	KBASE_TIMELINE_PM_CHECKTRANS(kbdev,
+				SW_FLOW_PM_CHECKTRANS_GPU_INTERRUPT_START);
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	cores_are_available = kbase_pm_check_transitions_nolock(kbdev);
+	KBASE_TIMELINE_PM_CHECKTRANS(kbdev,
+				SW_FLOW_PM_CHECKTRANS_GPU_INTERRUPT_END);
+
+	if (cores_are_available) {
+		/* Log timelining information that a change in state has
+		 * completed */
+		kbase_timeline_pm_handle_event(kbdev,
+				KBASE_TIMELINE_PM_EVENT_GPU_STATE_CHANGED);
+
+		kbase_backend_slot_update(kbdev);
+	}
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+}
+
+void kbase_pm_set_debug_core_mask(struct kbase_device *kbdev,
+		u64 new_core_mask_js0, u64 new_core_mask_js1,
+		u64 new_core_mask_js2)
+{
+	kbdev->pm.debug_core_mask[0] = new_core_mask_js0;
+	kbdev->pm.debug_core_mask[1] = new_core_mask_js1;
+	kbdev->pm.debug_core_mask[2] = new_core_mask_js2;
+	kbdev->pm.debug_core_mask_all = new_core_mask_js0 | new_core_mask_js1 |
+			new_core_mask_js2;
+
+	kbase_pm_update_cores_state_nolock(kbdev);
+}
+
+void kbase_hwaccess_pm_gpu_active(struct kbase_device *kbdev)
+{
+	kbase_pm_update_active(kbdev);
+}
+
+void kbase_hwaccess_pm_gpu_idle(struct kbase_device *kbdev)
+{
+	kbase_pm_update_active(kbdev);
+}
+
+void kbase_hwaccess_pm_suspend(struct kbase_device *kbdev)
+{
+	struct kbasep_js_device_data *js_devdata = &kbdev->js_data;
+
+	/* Force power off the GPU and all cores (regardless of policy), only
+	 * after the PM active count reaches zero (otherwise, we risk turning it
+	 * off prematurely) */
+	mutex_lock(&js_devdata->runpool_mutex);
+	mutex_lock(&kbdev->pm.lock);
+
+	kbase_pm_cancel_deferred_poweroff(kbdev);
+	kbase_pm_do_poweroff(kbdev, true);
+
+	kbase_backend_timer_suspend(kbdev);
+
+	mutex_unlock(&kbdev->pm.lock);
+	mutex_unlock(&js_devdata->runpool_mutex);
+
+	kbase_pm_wait_for_poweroff_complete(kbdev);
+}
+
+void kbase_hwaccess_pm_resume(struct kbase_device *kbdev)
+{
+	struct kbasep_js_device_data *js_devdata = &kbdev->js_data;
+
+	mutex_lock(&js_devdata->runpool_mutex);
+	mutex_lock(&kbdev->pm.lock);
+
+	kbdev->pm.suspending = false;
+	kbase_pm_do_poweron(kbdev, true);
+
+	kbase_backend_timer_resume(kbdev);
+
+	mutex_unlock(&kbdev->pm.lock);
+	mutex_unlock(&js_devdata->runpool_mutex);
+}
--- a/drivers/gpu/arm/midgard_for_linux/backend/gpu/mali_kbase_pm_ca.c
+++ b/drivers/gpu/arm/midgard_for_linux/backend/gpu/mali_kbase_pm_ca.c
@ -0,0 +1,179 @@
+/*
+ *
+ * (C) COPYRIGHT 2013-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/*
+ * Base kernel core availability APIs
+ */
+
+#include <mali_kbase.h>
+#include <mali_kbase_pm.h>
+#include <backend/gpu/mali_kbase_pm_internal.h>
+
+static const struct kbase_pm_ca_policy *const policy_list[] = {
+	&kbase_pm_ca_fixed_policy_ops,
+#if !MALI_CUSTOMER_RELEASE
+	&kbase_pm_ca_random_policy_ops
+#endif
+};
+
+/**
+ * POLICY_COUNT - The number of policies available in the system.
+ *
+ * This is derived from the number of functions listed in policy_list.
+ */
+#define POLICY_COUNT (sizeof(policy_list)/sizeof(*policy_list))
+
+int kbase_pm_ca_init(struct kbase_device *kbdev)
+{
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	kbdev->pm.backend.ca_current_policy = policy_list[0];
+
+	kbdev->pm.backend.ca_current_policy->init(kbdev);
+
+	return 0;
+}
+
+void kbase_pm_ca_term(struct kbase_device *kbdev)
+{
+	kbdev->pm.backend.ca_current_policy->term(kbdev);
+}
+
+int kbase_pm_ca_list_policies(const struct kbase_pm_ca_policy * const **list)
+{
+	if (!list)
+		return POLICY_COUNT;
+
+	*list = policy_list;
+
+	return POLICY_COUNT;
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_ca_list_policies);
+
+const struct kbase_pm_ca_policy
+*kbase_pm_ca_get_policy(struct kbase_device *kbdev)
+{
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	return kbdev->pm.backend.ca_current_policy;
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_ca_get_policy);
+
+void kbase_pm_ca_set_policy(struct kbase_device *kbdev,
+				const struct kbase_pm_ca_policy *new_policy)
+{
+	const struct kbase_pm_ca_policy *old_policy;
+	unsigned long flags;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(new_policy != NULL);
+
+	KBASE_TRACE_ADD(kbdev, PM_CA_SET_POLICY, NULL, NULL, 0u,
+								new_policy->id);
+
+	/* During a policy change we pretend the GPU is active */
+	/* A suspend won't happen here, because we're in a syscall from a
+	 * userspace thread */
+	kbase_pm_context_active(kbdev);
+
+	mutex_lock(&kbdev->pm.lock);
+
+	/* Remove the policy to prevent IRQ handlers from working on it */
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	old_policy = kbdev->pm.backend.ca_current_policy;
+	kbdev->pm.backend.ca_current_policy = NULL;
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+	if (old_policy->term)
+		old_policy->term(kbdev);
+
+	if (new_policy->init)
+		new_policy->init(kbdev);
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	kbdev->pm.backend.ca_current_policy = new_policy;
+
+	/* If any core power state changes were previously attempted, but
+	 * couldn't be made because the policy was changing (current_policy was
+	 * NULL), then re-try them here. */
+	kbase_pm_update_cores_state_nolock(kbdev);
+
+	kbdev->pm.backend.ca_current_policy->update_core_status(kbdev,
+					kbdev->shader_ready_bitmap,
+					kbdev->shader_transitioning_bitmap);
+
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+	mutex_unlock(&kbdev->pm.lock);
+
+	/* Now the policy change is finished, we release our fake context active
+	 * reference */
+	kbase_pm_context_idle(kbdev);
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_ca_set_policy);
+
+u64 kbase_pm_ca_get_core_mask(struct kbase_device *kbdev)
+{
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	/* All cores must be enabled when instrumentation is in use */
+	if (kbdev->pm.backend.instr_enabled)
+		return kbdev->gpu_props.props.raw_props.shader_present &
+				kbdev->pm.debug_core_mask_all;
+
+	if (kbdev->pm.backend.ca_current_policy == NULL)
+		return kbdev->gpu_props.props.raw_props.shader_present &
+				kbdev->pm.debug_core_mask_all;
+
+	return kbdev->pm.backend.ca_current_policy->get_core_mask(kbdev) &
+						kbdev->pm.debug_core_mask_all;
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_ca_get_core_mask);
+
+void kbase_pm_ca_update_core_status(struct kbase_device *kbdev, u64 cores_ready,
+							u64 cores_transitioning)
+{
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	if (kbdev->pm.backend.ca_current_policy != NULL)
+		kbdev->pm.backend.ca_current_policy->update_core_status(kbdev,
+							cores_ready,
+							cores_transitioning);
+}
+
+void kbase_pm_ca_instr_enable(struct kbase_device *kbdev)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	kbdev->pm.backend.instr_enabled = true;
+
+	kbase_pm_update_cores_state_nolock(kbdev);
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+}
+
+void kbase_pm_ca_instr_disable(struct kbase_device *kbdev)
+{
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+	kbdev->pm.backend.instr_enabled = false;
+
+	kbase_pm_update_cores_state_nolock(kbdev);
+}
--- a/drivers/gpu/arm/midgard_for_linux/backend/gpu/mali_kbase_pm_ca.h
+++ b/drivers/gpu/arm/midgard_for_linux/backend/gpu/mali_kbase_pm_ca.h
@ -0,0 +1,92 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/*
+ * Base kernel core availability APIs
+ */
+
+#ifndef _KBASE_PM_CA_H_
+#define _KBASE_PM_CA_H_
+
+/**
+ * kbase_pm_ca_init - Initialize core availability framework
+ *
+ * Must be called before calling any other core availability function
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ *
+ * Return: 0 if the core availability framework was successfully initialized,
+ *         -errno otherwise
+ */
+int kbase_pm_ca_init(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_ca_term - Terminate core availability framework
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_ca_term(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_ca_get_core_mask - Get currently available shaders core mask
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ *
+ * Returns a mask of the currently available shader cores.
+ * Calls into the core availability policy
+ *
+ * Return: The bit mask of available cores
+ */
+u64 kbase_pm_ca_get_core_mask(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_ca_update_core_status - Update core status
+ *
+ * @kbdev:               The kbase device structure for the device (must be
+ *                       a valid pointer)
+ * @cores_ready:         The bit mask of cores ready for job submission
+ * @cores_transitioning: The bit mask of cores that are transitioning power
+ *                       state
+ *
+ * Update core availability policy with current core power status
+ *
+ * Calls into the core availability policy
+ */
+void kbase_pm_ca_update_core_status(struct kbase_device *kbdev, u64 cores_ready,
+						u64 cores_transitioning);
+
+/**
+ * kbase_pm_ca_instr_enable - Enable override for instrumentation
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ *
+ * This overrides the output of the core availability policy, ensuring that all
+ * cores are available
+ */
+void kbase_pm_ca_instr_enable(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_ca_instr_disable - Disable override for instrumentation
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ *
+ * This disables any previously enabled override, and resumes normal policy
+ * functionality
+ */
+void kbase_pm_ca_instr_disable(struct kbase_device *kbdev);
+
+#endif /* _KBASE_PM_CA_H_ */
--- a/drivers/gpu/arm/midgard_for_linux/backend/gpu/mali_kbase_pm_ca_fixed.c
+++ b/drivers/gpu/arm/midgard_for_linux/backend/gpu/mali_kbase_pm_ca_fixed.c
@ -0,0 +1,65 @@
+/*
+ *
+ * (C) COPYRIGHT 2013-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/*
+ * A power policy implementing fixed core availability
+ */
+
+#include <mali_kbase.h>
+#include <mali_kbase_pm.h>
+
+static void fixed_init(struct kbase_device *kbdev)
+{
+	kbdev->pm.backend.ca_in_transition = false;
+}
+
+static void fixed_term(struct kbase_device *kbdev)
+{
+	CSTD_UNUSED(kbdev);
+}
+
+static u64 fixed_get_core_mask(struct kbase_device *kbdev)
+{
+	return kbdev->gpu_props.props.raw_props.shader_present;
+}
+
+static void fixed_update_core_status(struct kbase_device *kbdev,
+					u64 cores_ready,
+					u64 cores_transitioning)
+{
+	CSTD_UNUSED(kbdev);
+	CSTD_UNUSED(cores_ready);
+	CSTD_UNUSED(cores_transitioning);
+}
+
+/*
+ * The struct kbase_pm_policy structure for the fixed power policy.
+ *
+ * This is the static structure that defines the fixed power policy's callback
+ * and name.
+ */
+const struct kbase_pm_ca_policy kbase_pm_ca_fixed_policy_ops = {
+	"fixed",			/* name */
+	fixed_init,			/* init */
+	fixed_term,			/* term */
+	fixed_get_core_mask,		/* get_core_mask */
+	fixed_update_core_status,	/* update_core_status */
+	0u,				/* flags */
+	KBASE_PM_CA_POLICY_ID_FIXED,	/* id */
+};
+
+KBASE_EXPORT_TEST_API(kbase_pm_ca_fixed_policy_ops);
--- a/drivers/gpu/arm/midgard_for_linux/backend/gpu/mali_kbase_pm_ca_fixed.h
+++ b/drivers/gpu/arm/midgard_for_linux/backend/gpu/mali_kbase_pm_ca_fixed.h
@ -0,0 +1,40 @@
+/*
+ *
+ * (C) COPYRIGHT 2013-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/*
+ * A power policy implementing fixed core availability
+ */
+
+#ifndef MALI_KBASE_PM_CA_FIXED_H
+#define MALI_KBASE_PM_CA_FIXED_H
+
+/**
+ * struct kbasep_pm_ca_policy_fixed - Private structure for policy instance data
+ *
+ * @dummy: Dummy member - no state is needed
+ *
+ * This contains data that is private to the particular power policy that is
+ * active.
+ */
+struct kbasep_pm_ca_policy_fixed {
+	int dummy;
+};
+
+extern const struct kbase_pm_ca_policy kbase_pm_ca_fixed_policy_ops;
+
+#endif /* MALI_KBASE_PM_CA_FIXED_H */
+
--- a/drivers/gpu/arm/midgard_for_linux/backend/gpu/mali_kbase_pm_coarse_demand.c
+++ b/drivers/gpu/arm/midgard_for_linux/backend/gpu/mali_kbase_pm_coarse_demand.c
@ -0,0 +1,70 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/*
+ * "Coarse Demand" power management policy
+ */
+
+#include <mali_kbase.h>
+#include <mali_kbase_pm.h>
+
+static u64 coarse_demand_get_core_mask(struct kbase_device *kbdev)
+{
+	if (kbdev->pm.active_count == 0)
+		return 0;
+
+	return kbdev->gpu_props.props.raw_props.shader_present;
+}
+
+static bool coarse_demand_get_core_active(struct kbase_device *kbdev)
+{
+	if (0 == kbdev->pm.active_count && !(kbdev->shader_needed_bitmap |
+			kbdev->shader_inuse_bitmap) && !kbdev->tiler_needed_cnt
+			&& !kbdev->tiler_inuse_cnt)
+		return false;
+
+	return true;
+}
+
+static void coarse_demand_init(struct kbase_device *kbdev)
+{
+	CSTD_UNUSED(kbdev);
+}
+
+static void coarse_demand_term(struct kbase_device *kbdev)
+{
+	CSTD_UNUSED(kbdev);
+}
+
+/* The struct kbase_pm_policy structure for the demand power policy.
+ *
+ * This is the static structure that defines the demand power policy's callback
+ * and name.
+ */
+const struct kbase_pm_policy kbase_pm_coarse_demand_policy_ops = {
+	"coarse_demand",			/* name */
+	coarse_demand_init,			/* init */
+	coarse_demand_term,			/* term */
+	coarse_demand_get_core_mask,		/* get_core_mask */
+	coarse_demand_get_core_active,		/* get_core_active */
+	0u,					/* flags */
+	KBASE_PM_POLICY_ID_COARSE_DEMAND,	/* id */
+};
+
+KBASE_EXPORT_TEST_API(kbase_pm_coarse_demand_policy_ops);
--- a/drivers/gpu/arm/midgard_for_linux/backend/gpu/mali_kbase_pm_coarse_demand.h
+++ b/drivers/gpu/arm/midgard_for_linux/backend/gpu/mali_kbase_pm_coarse_demand.h
@ -0,0 +1,64 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/*
+ * "Coarse Demand" power management policy
+ */
+
+#ifndef MALI_KBASE_PM_COARSE_DEMAND_H
+#define MALI_KBASE_PM_COARSE_DEMAND_H
+
+/**
+ * DOC:
+ * The "Coarse" demand power management policy has the following
+ * characteristics:
+ * - When KBase indicates that the GPU will be powered up, but we don't yet
+ *   know which Job Chains are to be run:
+ *  - All Shader Cores are powered up, regardless of whether or not they will
+ *    be needed later.
+ * - When KBase indicates that a set of Shader Cores are needed to submit the
+ *   currently queued Job Chains:
+ *  - All Shader Cores are kept powered, regardless of whether or not they will
+ *    be needed
+ * - When KBase indicates that the GPU need not be powered:
+ *  - The Shader Cores are powered off, and the GPU itself is powered off too.
+ *
+ * @note:
+ * - KBase indicates the GPU will be powered up when it has a User Process that
+ *   has just started to submit Job Chains.
+ * - KBase indicates the GPU need not be powered when all the Job Chains from
+ *   User Processes have finished, and it is waiting for a User Process to
+ *   submit some more Job Chains.
+ */
+
+/**
+ * struct kbasep_pm_policy_coarse_demand - Private structure for coarse demand
+ *                                         policy
+ *
+ * This contains data that is private to the coarse demand power policy.
+ *
+ * @dummy: Dummy member - no state needed
+ */
+struct kbasep_pm_policy_coarse_demand {
+	int dummy;
+};
+
+extern const struct kbase_pm_policy kbase_pm_coarse_demand_policy_ops;
+
+#endif /* MALI_KBASE_PM_COARSE_DEMAND_H */
--- a/drivers/gpu/arm/midgard_for_linux/backend/gpu/mali_kbase_pm_defs.h
+++ b/drivers/gpu/arm/midgard_for_linux/backend/gpu/mali_kbase_pm_defs.h
@ -0,0 +1,504 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/*
+ * Backend-specific Power Manager definitions
+ */
+
+#ifndef _KBASE_PM_HWACCESS_DEFS_H_
+#define _KBASE_PM_HWACCESS_DEFS_H_
+
+#include "mali_kbase_pm_ca_fixed.h"
+#if !MALI_CUSTOMER_RELEASE
+#include "mali_kbase_pm_ca_random.h"
+#endif
+
+#include "mali_kbase_pm_always_on.h"
+#include "mali_kbase_pm_coarse_demand.h"
+#include "mali_kbase_pm_demand.h"
+#if !MALI_CUSTOMER_RELEASE
+#include "mali_kbase_pm_demand_always_powered.h"
+#include "mali_kbase_pm_fast_start.h"
+#endif
+
+/* Forward definition - see mali_kbase.h */
+struct kbase_device;
+struct kbase_jd_atom;
+
+/**
+ * enum kbase_pm_core_type - The types of core in a GPU.
+ *
+ * These enumerated values are used in calls to
+ * - kbase_pm_get_present_cores()
+ * - kbase_pm_get_active_cores()
+ * - kbase_pm_get_trans_cores()
+ * - kbase_pm_get_ready_cores().
+ *
+ * They specify which type of core should be acted on.  These values are set in
+ * a manner that allows core_type_to_reg() function to be simpler and more
+ * efficient.
+ *
+ * @KBASE_PM_CORE_L2: The L2 cache
+ * @KBASE_PM_CORE_SHADER: Shader cores
+ * @KBASE_PM_CORE_TILER: Tiler cores
+ */
+enum kbase_pm_core_type {
+	KBASE_PM_CORE_L2 = L2_PRESENT_LO,
+	KBASE_PM_CORE_SHADER = SHADER_PRESENT_LO,
+	KBASE_PM_CORE_TILER = TILER_PRESENT_LO
+};
+
+/**
+ * struct kbasep_pm_metrics_data - Metrics data collected for use by the power
+ *                                 management framework.
+ *
+ *  @time_period_start: time at which busy/idle measurements started
+ *  @time_busy: number of ns the GPU was busy executing jobs since the
+ *          @time_period_start timestamp.
+ *  @time_idle: number of ns since time_period_start the GPU was not executing
+ *          jobs since the @time_period_start timestamp.
+ *  @prev_busy: busy time in ns of previous time period.
+ *           Updated when metrics are reset.
+ *  @prev_idle: idle time in ns of previous time period
+ *           Updated when metrics are reset.
+ *  @gpu_active: true when the GPU is executing jobs. false when
+ *           not. Updated when the job scheduler informs us a job in submitted
+ *           or removed from a GPU slot.
+ *  @busy_cl: number of ns the GPU was busy executing CL jobs. Note that
+ *           if two CL jobs were active for 400ns, this value would be updated
+ *           with 800.
+ *  @busy_gl: number of ns the GPU was busy executing GL jobs. Note that
+ *           if two GL jobs were active for 400ns, this value would be updated
+ *           with 800.
+ *  @active_cl_ctx: number of CL jobs active on the GPU. Array is per-device.
+ *  @active_gl_ctx: number of GL jobs active on the GPU. Array is per-slot. As
+ *           GL jobs never run on slot 2 this slot is not recorded.
+ *  @lock: spinlock protecting the kbasep_pm_metrics_data structure
+ *  @timer: timer to regularly make DVFS decisions based on the power
+ *           management metrics.
+ *  @timer_active: boolean indicating @timer is running
+ *  @platform_data: pointer to data controlled by platform specific code
+ *  @kbdev: pointer to kbase device for which metrics are collected
+ *
+ */
+struct kbasep_pm_metrics_data {
+	ktime_t time_period_start;
+	u32 time_busy;
+	u32 time_idle;
+	u32 prev_busy;
+	u32 prev_idle;
+	bool gpu_active;
+	u32 busy_cl[2];
+	u32 busy_gl;
+	u32 active_cl_ctx[2];
+	u32 active_gl_ctx[2]; /* GL jobs can only run on 2 of the 3 job slots */
+	spinlock_t lock;
+
+#ifdef CONFIG_MALI_MIDGARD_DVFS
+	struct hrtimer timer;
+	bool timer_active;
+#endif
+
+	void *platform_data;
+	struct kbase_device *kbdev;
+};
+
+union kbase_pm_policy_data {
+	struct kbasep_pm_policy_always_on always_on;
+	struct kbasep_pm_policy_coarse_demand coarse_demand;
+	struct kbasep_pm_policy_demand demand;
+#if !MALI_CUSTOMER_RELEASE
+	struct kbasep_pm_policy_demand_always_powered demand_always_powered;
+	struct kbasep_pm_policy_fast_start fast_start;
+#endif
+};
+
+union kbase_pm_ca_policy_data {
+	struct kbasep_pm_ca_policy_fixed fixed;
+#if !MALI_CUSTOMER_RELEASE
+	struct kbasep_pm_ca_policy_random random;
+#endif
+};
+
+/**
+ * struct kbase_pm_backend_data - Data stored per device for power management.
+ *
+ * This structure contains data for the power management framework. There is one
+ * instance of this structure per device in the system.
+ *
+ * @ca_current_policy: The policy that is currently actively controlling core
+ *                     availability.
+ * @pm_current_policy: The policy that is currently actively controlling the
+ *                     power state.
+ * @ca_policy_data:    Private data for current CA policy
+ * @pm_policy_data:    Private data for current PM policy
+ * @ca_in_transition:  Flag indicating when core availability policy is
+ *                     transitioning cores. The core availability policy must
+ *                     set this when a change in core availability is occurring.
+ *                     power_change_lock must be held when accessing this.
+ * @reset_done:        Flag when a reset is complete
+ * @reset_done_wait:   Wait queue to wait for changes to @reset_done
+ * @l2_powered_wait:   Wait queue for whether the l2 cache has been powered as
+ *                     requested
+ * @l2_powered:        State indicating whether all the l2 caches are powered.
+ *                     Non-zero indicates they're *all* powered
+ *                     Zero indicates that some (or all) are not powered
+ * @gpu_cycle_counter_requests: The reference count of active gpu cycle counter
+ *                              users
+ * @gpu_cycle_counter_requests_lock: Lock to protect @gpu_cycle_counter_requests
+ * @desired_shader_state: A bit mask identifying the shader cores that the
+ *                        power policy would like to be on. The current state
+ *                        of the cores may be different, but there should be
+ *                        transitions in progress that will eventually achieve
+ *                        this state (assuming that the policy doesn't change
+ *                        its mind in the mean time).
+ * @powering_on_shader_state: A bit mask indicating which shader cores are
+ *                            currently in a power-on transition
+ * @desired_tiler_state: A bit mask identifying the tiler cores that the power
+ *                       policy would like to be on. See @desired_shader_state
+ * @powering_on_tiler_state: A bit mask indicating which tiler core are
+ *                           currently in a power-on transition
+ * @powering_on_l2_state: A bit mask indicating which l2-caches are currently
+ *                        in a power-on transition
+ * @gpu_in_desired_state: This flag is set if the GPU is powered as requested
+ *                        by the desired_xxx_state variables
+ * @gpu_in_desired_state_wait: Wait queue set when @gpu_in_desired_state != 0
+ * @gpu_powered:       Set to true when the GPU is powered and register
+ *                     accesses are possible, false otherwise
+ * @instr_enabled:     Set to true when instrumentation is enabled,
+ *                     false otherwise
+ * @cg1_disabled:      Set if the policy wants to keep the second core group
+ *                     powered off
+ * @driver_ready_for_irqs: Debug state indicating whether sufficient
+ *                         initialization of the driver has occurred to handle
+ *                         IRQs
+ * @gpu_powered_lock:  Spinlock that must be held when writing @gpu_powered or
+ *                     accessing @driver_ready_for_irqs
+ * @metrics:           Structure to hold metrics for the GPU
+ * @gpu_poweroff_pending: number of poweroff timer ticks until the GPU is
+ *                        powered off
+ * @shader_poweroff_pending_time: number of poweroff timer ticks until shaders
+ *                        and/or timers are powered off
+ * @gpu_poweroff_timer: Timer for powering off GPU
+ * @gpu_poweroff_wq:   Workqueue to power off GPU on when timer fires
+ * @gpu_poweroff_work: Workitem used on @gpu_poweroff_wq
+ * @shader_poweroff_pending: Bit mask of shaders to be powered off on next
+ *                           timer callback
+ * @tiler_poweroff_pending: Bit mask of tilers to be powered off on next timer
+ *                          callback
+ * @poweroff_timer_needed: true if the poweroff timer is currently required,
+ *                         false otherwise
+ * @poweroff_timer_running: true if the poweroff timer is currently running,
+ *                          false otherwise
+ *                          power_change_lock should be held when accessing,
+ *                          unless there is no way the timer can be running (eg
+ *                          hrtimer_cancel() was called immediately before)
+ * @poweroff_wait_in_progress: true if a wait for GPU power off is in progress.
+ *                             hwaccess_lock must be held when accessing
+ * @poweron_required: true if a GPU power on is required. Should only be set
+ *                    when poweroff_wait_in_progress is true, and therefore the
+ *                    GPU can not immediately be powered on. pm.lock must be
+ *                    held when accessing
+ * @poweroff_is_suspend: true if the GPU is being powered off due to a suspend
+ *                       request. pm.lock must be held when accessing
+ * @gpu_poweroff_wait_wq: workqueue for waiting for GPU to power off
+ * @gpu_poweroff_wait_work: work item for use with @gpu_poweroff_wait_wq
+ * @poweroff_wait: waitqueue for waiting for @gpu_poweroff_wait_work to complete
+ * @callback_power_on: Callback when the GPU needs to be turned on. See
+ *                     &struct kbase_pm_callback_conf
+ * @callback_power_off: Callback when the GPU may be turned off. See
+ *                     &struct kbase_pm_callback_conf
+ * @callback_power_suspend: Callback when a suspend occurs and the GPU needs to
+ *                          be turned off. See &struct kbase_pm_callback_conf
+ * @callback_power_resume: Callback when a resume occurs and the GPU needs to
+ *                          be turned on. See &struct kbase_pm_callback_conf
+ * @callback_power_runtime_on: Callback when the GPU needs to be turned on. See
+ *                             &struct kbase_pm_callback_conf
+ * @callback_power_runtime_off: Callback when the GPU may be turned off. See
+ *                              &struct kbase_pm_callback_conf
+ * @callback_power_runtime_idle: Optional callback when the GPU may be idle. See
+ *                              &struct kbase_pm_callback_conf
+ *
+ * Note:
+ * During an IRQ, @ca_current_policy or @pm_current_policy can be NULL when the
+ * policy is being changed with kbase_pm_ca_set_policy() or
+ * kbase_pm_set_policy(). The change is protected under
+ * kbase_device.pm.power_change_lock. Direct access to this
+ * from IRQ context must therefore check for NULL. If NULL, then
+ * kbase_pm_ca_set_policy() or kbase_pm_set_policy() will re-issue the policy
+ * functions that would have been done under IRQ.
+ */
+struct kbase_pm_backend_data {
+	const struct kbase_pm_ca_policy *ca_current_policy;
+	const struct kbase_pm_policy *pm_current_policy;
+	union kbase_pm_ca_policy_data ca_policy_data;
+	union kbase_pm_policy_data pm_policy_data;
+	bool ca_in_transition;
+	bool reset_done;
+	wait_queue_head_t reset_done_wait;
+	wait_queue_head_t l2_powered_wait;
+	int l2_powered;
+	int gpu_cycle_counter_requests;
+	spinlock_t gpu_cycle_counter_requests_lock;
+
+	u64 desired_shader_state;
+	u64 powering_on_shader_state;
+	u64 desired_tiler_state;
+	u64 powering_on_tiler_state;
+	u64 powering_on_l2_state;
+
+	bool gpu_in_desired_state;
+	wait_queue_head_t gpu_in_desired_state_wait;
+
+	bool gpu_powered;
+
+	bool instr_enabled;
+
+	bool cg1_disabled;
+
+#ifdef CONFIG_MALI_DEBUG
+	bool driver_ready_for_irqs;
+#endif /* CONFIG_MALI_DEBUG */
+
+	spinlock_t gpu_powered_lock;
+
+
+	struct kbasep_pm_metrics_data metrics;
+
+	int gpu_poweroff_pending;
+	int shader_poweroff_pending_time;
+
+	struct hrtimer gpu_poweroff_timer;
+	struct workqueue_struct *gpu_poweroff_wq;
+	struct work_struct gpu_poweroff_work;
+
+	u64 shader_poweroff_pending;
+	u64 tiler_poweroff_pending;
+
+	bool poweroff_timer_needed;
+	bool poweroff_timer_running;
+
+	bool poweroff_wait_in_progress;
+	bool poweron_required;
+	bool poweroff_is_suspend;
+
+	struct workqueue_struct *gpu_poweroff_wait_wq;
+	struct work_struct gpu_poweroff_wait_work;
+
+	wait_queue_head_t poweroff_wait;
+
+	int (*callback_power_on)(struct kbase_device *kbdev);
+	void (*callback_power_off)(struct kbase_device *kbdev);
+	void (*callback_power_suspend)(struct kbase_device *kbdev);
+	void (*callback_power_resume)(struct kbase_device *kbdev);
+	int (*callback_power_runtime_on)(struct kbase_device *kbdev);
+	void (*callback_power_runtime_off)(struct kbase_device *kbdev);
+	int (*callback_power_runtime_idle)(struct kbase_device *kbdev);
+};
+
+
+/* List of policy IDs */
+enum kbase_pm_policy_id {
+	KBASE_PM_POLICY_ID_DEMAND = 1,
+	KBASE_PM_POLICY_ID_ALWAYS_ON,
+	KBASE_PM_POLICY_ID_COARSE_DEMAND,
+#if !MALI_CUSTOMER_RELEASE
+	KBASE_PM_POLICY_ID_DEMAND_ALWAYS_POWERED,
+	KBASE_PM_POLICY_ID_FAST_START
+#endif
+};
+
+typedef u32 kbase_pm_policy_flags;
+
+/**
+ * struct kbase_pm_policy - Power policy structure.
+ *
+ * Each power policy exposes a (static) instance of this structure which
+ * contains function pointers to the policy's methods.
+ *
+ * @name:               The name of this policy
+ * @init:               Function called when the policy is selected
+ * @term:               Function called when the policy is unselected
+ * @get_core_mask:      Function called to get the current shader core mask
+ * @get_core_active:    Function called to get the current overall GPU power
+ *                      state
+ * @flags:              Field indicating flags for this policy
+ * @id:                 Field indicating an ID for this policy. This is not
+ *                      necessarily the same as its index in the list returned
+ *                      by kbase_pm_list_policies().
+ *                      It is used purely for debugging.
+ */
+struct kbase_pm_policy {
+	char *name;
+
+	/**
+	 * Function called when the policy is selected
+	 *
+	 * This should initialize the kbdev->pm.pm_policy_data structure. It
+	 * should not attempt to make any changes to hardware state.
+	 *
+	 * It is undefined what state the cores are in when the function is
+	 * called.
+	 *
+	 * @kbdev: The kbase device structure for the device (must be a
+	 *         valid pointer)
+	 */
+	void (*init)(struct kbase_device *kbdev);
+
+	/**
+	 * Function called when the policy is unselected.
+	 *
+	 * @kbdev: The kbase device structure for the device (must be a
+	 *         valid pointer)
+	 */
+	void (*term)(struct kbase_device *kbdev);
+
+	/**
+	 * Function called to get the current shader core mask
+	 *
+	 * The returned mask should meet or exceed (kbdev->shader_needed_bitmap
+	 * | kbdev->shader_inuse_bitmap).
+	 *
+	 * @kbdev: The kbase device structure for the device (must be a
+	 *         valid pointer)
+	 *
+	 * Return: The mask of shader cores to be powered
+	 */
+	u64 (*get_core_mask)(struct kbase_device *kbdev);
+
+	/**
+	 * Function called to get the current overall GPU power state
+	 *
+	 * This function should consider the state of kbdev->pm.active_count. If
+	 * this count is greater than 0 then there is at least one active
+	 * context on the device and the GPU should be powered. If it is equal
+	 * to 0 then there are no active contexts and the GPU could be powered
+	 * off if desired.
+	 *
+	 * @kbdev: The kbase device structure for the device (must be a
+	 *         valid pointer)
+	 *
+	 * Return: true if the GPU should be powered, false otherwise
+	 */
+	bool (*get_core_active)(struct kbase_device *kbdev);
+
+	kbase_pm_policy_flags flags;
+	enum kbase_pm_policy_id id;
+};
+
+
+enum kbase_pm_ca_policy_id {
+	KBASE_PM_CA_POLICY_ID_FIXED = 1,
+	KBASE_PM_CA_POLICY_ID_RANDOM
+};
+
+typedef u32 kbase_pm_ca_policy_flags;
+
+/**
+ * struct kbase_pm_ca_policy - Core availability policy structure.
+ *
+ * Each core availability policy exposes a (static) instance of this structure
+ * which contains function pointers to the policy's methods.
+ *
+ * @name:               The name of this policy
+ * @init:               Function called when the policy is selected
+ * @term:               Function called when the policy is unselected
+ * @get_core_mask:      Function called to get the current shader core
+ *                      availability mask
+ * @update_core_status: Function called to update the current core status
+ * @flags:              Field indicating flags for this policy
+ * @id:                 Field indicating an ID for this policy. This is not
+ *                      necessarily the same as its index in the list returned
+ *                      by kbase_pm_list_policies().
+ *                      It is used purely for debugging.
+ */
+struct kbase_pm_ca_policy {
+	char *name;
+
+	/**
+	 * Function called when the policy is selected
+	 *
+	 * This should initialize the kbdev->pm.ca_policy_data structure. It
+	 * should not attempt to make any changes to hardware state.
+	 *
+	 * It is undefined what state the cores are in when the function is
+	 * called.
+	 *
+	 * @kbdev The kbase device structure for the device (must be a
+	 *        valid pointer)
+	 */
+	void (*init)(struct kbase_device *kbdev);
+
+	/**
+	 * Function called when the policy is unselected.
+	 *
+	 * @kbdev The kbase device structure for the device (must be a
+	 *        valid pointer)
+	 */
+	void (*term)(struct kbase_device *kbdev);
+
+	/**
+	 * Function called to get the current shader core availability mask
+	 *
+	 * When a change in core availability is occurring, the policy must set
+	 * kbdev->pm.ca_in_transition to true. This is to indicate that
+	 * reporting changes in power state cannot be optimized out, even if
+	 * kbdev->pm.desired_shader_state remains unchanged. This must be done
+	 * by any functions internal to the Core Availability Policy that change
+	 * the return value of kbase_pm_ca_policy::get_core_mask.
+	 *
+	 * @kbdev The kbase device structure for the device (must be a
+	 *              valid pointer)
+	 *
+	 * Return: The current core availability mask
+	 */
+	u64 (*get_core_mask)(struct kbase_device *kbdev);
+
+	/**
+	 * Function called to update the current core status
+	 *
+	 * If none of the cores in core group 0 are ready or transitioning, then
+	 * the policy must ensure that the next call to get_core_mask does not
+	 * return 0 for all cores in core group 0. It is an error to disable
+	 * core group 0 through the core availability policy.
+	 *
+	 * When a change in core availability has finished, the policy must set
+	 * kbdev->pm.ca_in_transition to false. This is to indicate that
+	 * changes in power state can once again be optimized out when
+	 * kbdev->pm.desired_shader_state is unchanged.
+	 *
+	 * @kbdev:               The kbase device structure for the device
+	 *                       (must be a valid pointer)
+	 * @cores_ready:         The mask of cores currently powered and
+	 *                       ready to run jobs
+	 * @cores_transitioning: The mask of cores currently transitioning
+	 *                       power state
+	 */
+	void (*update_core_status)(struct kbase_device *kbdev, u64 cores_ready,
+						u64 cores_transitioning);
+
+	kbase_pm_ca_policy_flags flags;
+
+	/**
+	 * Field indicating an ID for this policy. This is not necessarily the
+	 * same as its index in the list returned by kbase_pm_list_policies().
+	 * It is used purely for debugging.
+	 */
+	enum kbase_pm_ca_policy_id id;
+};
+
+#endif /* _KBASE_PM_HWACCESS_DEFS_H_ */
--- a/drivers/gpu/arm/midgard_for_linux/backend/gpu/mali_kbase_pm_demand.c
+++ b/drivers/gpu/arm/midgard_for_linux/backend/gpu/mali_kbase_pm_demand.c
@ -0,0 +1,73 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/*
+ * A simple demand based power management policy
+ */
+
+#include <mali_kbase.h>
+#include <mali_kbase_pm.h>
+
+static u64 demand_get_core_mask(struct kbase_device *kbdev)
+{
+	u64 desired = kbdev->shader_needed_bitmap | kbdev->shader_inuse_bitmap;
+
+	if (0 == kbdev->pm.active_count)
+		return 0;
+
+	return desired;
+}
+
+static bool demand_get_core_active(struct kbase_device *kbdev)
+{
+	if (0 == kbdev->pm.active_count && !(kbdev->shader_needed_bitmap |
+			kbdev->shader_inuse_bitmap) && !kbdev->tiler_needed_cnt
+			&& !kbdev->tiler_inuse_cnt)
+		return false;
+
+	return true;
+}
+
+static void demand_init(struct kbase_device *kbdev)
+{
+	CSTD_UNUSED(kbdev);
+}
+
+static void demand_term(struct kbase_device *kbdev)
+{
+	CSTD_UNUSED(kbdev);
+}
+
+/*
+ * The struct kbase_pm_policy structure for the demand power policy.
+ *
+ * This is the static structure that defines the demand power policy's callback
+ * and name.
+ */
+const struct kbase_pm_policy kbase_pm_demand_policy_ops = {
+	"demand",			/* name */
+	demand_init,			/* init */
+	demand_term,			/* term */
+	demand_get_core_mask,		/* get_core_mask */
+	demand_get_core_active,		/* get_core_active */
+	0u,				/* flags */
+	KBASE_PM_POLICY_ID_DEMAND,	/* id */
+};
+
+KBASE_EXPORT_TEST_API(kbase_pm_demand_policy_ops);
--- a/drivers/gpu/arm/midgard_for_linux/backend/gpu/mali_kbase_pm_demand.h
+++ b/drivers/gpu/arm/midgard_for_linux/backend/gpu/mali_kbase_pm_demand.h
@ -0,0 +1,64 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/*
+ * A simple demand based power management policy
+ */
+
+#ifndef MALI_KBASE_PM_DEMAND_H
+#define MALI_KBASE_PM_DEMAND_H
+
+/**
+ * DOC: Demand power management policy
+ *
+ * The demand power management policy has the following characteristics:
+ * - When KBase indicates that the GPU will be powered up, but we don't yet
+ *   know which Job Chains are to be run:
+ *  - The Shader Cores are not powered up
+ *
+ * - When KBase indicates that a set of Shader Cores are needed to submit the
+ *   currently queued Job Chains:
+ *  - Only those Shader Cores are powered up
+ *
+ * - When KBase indicates that the GPU need not be powered:
+ *  - The Shader Cores are powered off, and the GPU itself is powered off too.
+ *
+ * Note:
+ * - KBase indicates the GPU will be powered up when it has a User Process that
+ *   has just started to submit Job Chains.
+ *
+ * - KBase indicates the GPU need not be powered when all the Job Chains from
+ *   User Processes have finished, and it is waiting for a User Process to
+ *   submit some more Job Chains.
+ */
+
+/**
+ * struct kbasep_pm_policy_demand - Private structure for policy instance data
+ *
+ * @dummy: No state is needed, a dummy variable
+ *
+ * This contains data that is private to the demand power policy.
+ */
+struct kbasep_pm_policy_demand {
+	int dummy;
+};
+
+extern const struct kbase_pm_policy kbase_pm_demand_policy_ops;
+
+#endif /* MALI_KBASE_PM_DEMAND_H */
--- a/drivers/gpu/arm/midgard_for_linux/backend/gpu/mali_kbase_pm_driver.c
+++ b/drivers/gpu/arm/midgard_for_linux/backend/gpu/mali_kbase_pm_driver.c
--- a/drivers/gpu/arm/midgard_for_linux/backend/gpu/mali_kbase_pm_internal.h
+++ b/drivers/gpu/arm/midgard_for_linux/backend/gpu/mali_kbase_pm_internal.h
@ -0,0 +1,550 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/*
+ * Power management API definitions used internally by GPU backend
+ */
+
+#ifndef _KBASE_BACKEND_PM_INTERNAL_H_
+#define _KBASE_BACKEND_PM_INTERNAL_H_
+
+#include <mali_kbase_hwaccess_pm.h>
+
+#include "mali_kbase_pm_ca.h"
+#include "mali_kbase_pm_policy.h"
+
+
+/**
+ * kbase_pm_dev_idle - The GPU is idle.
+ *
+ * The OS may choose to turn off idle devices
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_dev_idle(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_dev_activate - The GPU is active.
+ *
+ * The OS should avoid opportunistically turning off the GPU while it is active
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_dev_activate(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_get_present_cores - Get details of the cores that are present in
+ *                              the device.
+ *
+ * This function can be called by the active power policy to return a bitmask of
+ * the cores (of a specified type) present in the GPU device and also a count of
+ * the number of cores.
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid
+ *         pointer)
+ * @type:  The type of core (see the enum kbase_pm_core_type enumeration)
+ *
+ * Return: The bit mask of cores present
+ */
+u64 kbase_pm_get_present_cores(struct kbase_device *kbdev,
+						enum kbase_pm_core_type type);
+
+/**
+ * kbase_pm_get_active_cores - Get details of the cores that are currently
+ *                             active in the device.
+ *
+ * This function can be called by the active power policy to return a bitmask of
+ * the cores (of a specified type) that are actively processing work (i.e.
+ * turned on *and* busy).
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ * @type:  The type of core (see the enum kbase_pm_core_type enumeration)
+ *
+ * Return: The bit mask of active cores
+ */
+u64 kbase_pm_get_active_cores(struct kbase_device *kbdev,
+						enum kbase_pm_core_type type);
+
+/**
+ * kbase_pm_get_trans_cores - Get details of the cores that are currently
+ *                            transitioning between power states.
+ *
+ * This function can be called by the active power policy to return a bitmask of
+ * the cores (of a specified type) that are currently transitioning between
+ * power states.
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ * @type:  The type of core (see the enum kbase_pm_core_type enumeration)
+ *
+ * Return: The bit mask of transitioning cores
+ */
+u64 kbase_pm_get_trans_cores(struct kbase_device *kbdev,
+						enum kbase_pm_core_type type);
+
+/**
+ * kbase_pm_get_ready_cores - Get details of the cores that are currently
+ *                            powered and ready for jobs.
+ *
+ * This function can be called by the active power policy to return a bitmask of
+ * the cores (of a specified type) that are powered and ready for jobs (they may
+ * or may not be currently executing jobs).
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ * @type:  The type of core (see the enum kbase_pm_core_type enumeration)
+ *
+ * Return: The bit mask of ready cores
+ */
+u64 kbase_pm_get_ready_cores(struct kbase_device *kbdev,
+						enum kbase_pm_core_type type);
+
+/**
+ * kbase_pm_clock_on - Turn the clock for the device on, and enable device
+ *                     interrupts.
+ *
+ * This function can be used by a power policy to turn the clock for the GPU on.
+ * It should be modified during integration to perform the necessary actions to
+ * ensure that the GPU is fully powered and clocked.
+ *
+ * @kbdev:     The kbase device structure for the device (must be a valid
+ *             pointer)
+ * @is_resume: true if clock on due to resume after suspend, false otherwise
+ */
+void kbase_pm_clock_on(struct kbase_device *kbdev, bool is_resume);
+
+/**
+ * kbase_pm_clock_off - Disable device interrupts, and turn the clock for the
+ *                      device off.
+ *
+ * This function can be used by a power policy to turn the clock for the GPU
+ * off. It should be modified during integration to perform the necessary
+ * actions to turn the clock off (if this is possible in the integration).
+ *
+ * @kbdev:      The kbase device structure for the device (must be a valid
+ *              pointer)
+ * @is_suspend: true if clock off due to suspend, false otherwise
+ *
+ * Return: true  if clock was turned off, or
+ *         false if clock can not be turned off due to pending page/bus fault
+ *               workers. Caller must flush MMU workqueues and retry
+ */
+bool kbase_pm_clock_off(struct kbase_device *kbdev, bool is_suspend);
+
+/**
+ * kbase_pm_enable_interrupts - Enable interrupts on the device.
+ *
+ * Interrupts are also enabled after a call to kbase_pm_clock_on().
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_enable_interrupts(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_disable_interrupts - Disable interrupts on the device.
+ *
+ * This prevents delivery of Power Management interrupts to the CPU so that
+ * kbase_pm_check_transitions_nolock() will not be called from the IRQ handler
+ * until kbase_pm_enable_interrupts() or kbase_pm_clock_on() is called.
+ *
+ * Interrupts are also disabled after a call to kbase_pm_clock_off().
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_disable_interrupts(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_disable_interrupts_nolock - Version of kbase_pm_disable_interrupts()
+ *                                      that does not take the hwaccess_lock
+ *
+ * Caller must hold the hwaccess_lock.
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_disable_interrupts_nolock(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_init_hw - Initialize the hardware.
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ * @flags: Flags specifying the type of PM init
+ *
+ * This function checks the GPU ID register to ensure that the GPU is supported
+ * by the driver and performs a reset on the device so that it is in a known
+ * state before the device is used.
+ *
+ * Return: 0 if the device is supported and successfully reset.
+ */
+int kbase_pm_init_hw(struct kbase_device *kbdev, unsigned int flags);
+
+/**
+ * kbase_pm_reset_done - The GPU has been reset successfully.
+ *
+ * This function must be called by the GPU interrupt handler when the
+ * RESET_COMPLETED bit is set. It signals to the power management initialization
+ * code that the GPU has been successfully reset.
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_reset_done(struct kbase_device *kbdev);
+
+
+/**
+ * kbase_pm_check_transitions_nolock - Check if there are any power transitions
+ *                                     to make, and if so start them.
+ *
+ * This function will check the desired_xx_state members of
+ * struct kbase_pm_device_data and the actual status of the hardware to see if
+ * any power transitions can be made at this time to make the hardware state
+ * closer to the state desired by the power policy.
+ *
+ * The return value can be used to check whether all the desired cores are
+ * available, and so whether it's worth submitting a job (e.g. from a Power
+ * Management IRQ).
+ *
+ * Note that this still returns true when desired_xx_state has no
+ * cores. That is: of the no cores desired, none were *un*available. In
+ * this case, the caller may still need to try submitting jobs. This is because
+ * the Core Availability Policy might have taken us to an intermediate state
+ * where no cores are powered, before powering on more cores (e.g. for core
+ * rotation)
+ *
+ * The caller must hold kbase_device.pm.power_change_lock
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ *
+ * Return:      non-zero when all desired cores are available. That is,
+ *              it's worthwhile for the caller to submit a job.
+ *              false otherwise
+ */
+bool kbase_pm_check_transitions_nolock(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_check_transitions_sync - Synchronous and locking variant of
+ *                                   kbase_pm_check_transitions_nolock()
+ *
+ * On returning, the desired state at the time of the call will have been met.
+ *
+ * There is nothing to stop the core being switched off by calls to
+ * kbase_pm_release_cores() or kbase_pm_unrequest_cores(). Therefore, the
+ * caller must have already made a call to
+ * kbase_pm_request_cores()/kbase_pm_request_cores_sync() previously.
+ *
+ * The usual use-case for this is to ensure cores are 'READY' after performing
+ * a GPU Reset.
+ *
+ * Unlike kbase_pm_check_transitions_nolock(), the caller must not hold
+ * kbase_device.pm.power_change_lock, because this function will take that
+ * lock itself.
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_check_transitions_sync(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_update_cores_state_nolock - Variant of kbase_pm_update_cores_state()
+ *                                      where the caller must hold
+ *                                      kbase_device.pm.power_change_lock
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_update_cores_state_nolock(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_update_cores_state - Update the desired state of shader cores from
+ *                               the Power Policy, and begin any power
+ *                               transitions.
+ *
+ * This function will update the desired_xx_state members of
+ * struct kbase_pm_device_data by calling into the current Power Policy. It will
+ * then begin power transitions to make the hardware acheive the desired shader
+ * core state.
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_update_cores_state(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_cancel_deferred_poweroff - Cancel any pending requests to power off
+ *                                     the GPU and/or shader cores.
+ *
+ * This should be called by any functions which directly power off the GPU.
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_cancel_deferred_poweroff(struct kbase_device *kbdev);
+
+/**
+ * kbasep_pm_read_present_cores - Read the bitmasks of present cores.
+ *
+ * This information is cached to avoid having to perform register reads whenever
+ * the information is required.
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ */
+void kbasep_pm_read_present_cores(struct kbase_device *kbdev);
+
+/**
+ * kbasep_pm_metrics_init - Initialize the metrics gathering framework.
+ *
+ * This must be called before other metric gathering APIs are called.
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ *
+ * Return: 0 on success, error code on error
+ */
+int kbasep_pm_metrics_init(struct kbase_device *kbdev);
+
+/**
+ * kbasep_pm_metrics_term - Terminate the metrics gathering framework.
+ *
+ * This must be called when metric gathering is no longer required. It is an
+ * error to call any metrics gathering function (other than
+ * kbasep_pm_metrics_init()) after calling this function.
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ */
+void kbasep_pm_metrics_term(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_report_vsync - Function to be called by the frame buffer driver to
+ *                         update the vsync metric.
+ *
+ * This function should be called by the frame buffer driver to update whether
+ * the system is hitting the vsync target or not. buffer_updated should be true
+ * if the vsync corresponded with a new frame being displayed, otherwise it
+ * should be false. This function does not need to be called every vsync, but
+ * only when the value of @buffer_updated differs from a previous call.
+ *
+ * @kbdev:          The kbase device structure for the device (must be a
+ *                  valid pointer)
+ * @buffer_updated: True if the buffer has been updated on this VSync,
+ *                  false otherwise
+ */
+void kbase_pm_report_vsync(struct kbase_device *kbdev, int buffer_updated);
+
+/**
+ * kbase_pm_get_dvfs_action - Determine whether the DVFS system should change
+ *                            the clock speed of the GPU.
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ *
+ * This function should be called regularly by the DVFS system to check whether
+ * the clock speed of the GPU needs updating.
+ */
+void kbase_pm_get_dvfs_action(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_request_gpu_cycle_counter - Mark that the GPU cycle counter is
+ *                                      needed
+ *
+ * If the caller is the first caller then the GPU cycle counters will be enabled
+ * along with the l2 cache
+ *
+ * The GPU must be powered when calling this function (i.e.
+ * kbase_pm_context_active() must have been called).
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_request_gpu_cycle_counter(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_request_gpu_cycle_counter_l2_is_on - Mark GPU cycle counter is
+ *                                               needed (l2 cache already on)
+ *
+ * This is a version of the above function
+ * (kbase_pm_request_gpu_cycle_counter()) suitable for being called when the
+ * l2 cache is known to be on and assured to be on until the subsequent call of
+ * kbase_pm_release_gpu_cycle_counter() such as when a job is submitted. It does
+ * not sleep and can be called from atomic functions.
+ *
+ * The GPU must be powered when calling this function (i.e.
+ * kbase_pm_context_active() must have been called) and the l2 cache must be
+ * powered on.
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_request_gpu_cycle_counter_l2_is_on(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_release_gpu_cycle_counter - Mark that the GPU cycle counter is no
+ *                                      longer in use
+ *
+ * If the caller is the last caller then the GPU cycle counters will be
+ * disabled. A request must have been made before a call to this.
+ *
+ * Caller must not hold the hwaccess_lock, as it will be taken in this function.
+ * If the caller is already holding this lock then
+ * kbase_pm_release_gpu_cycle_counter_nolock() must be used instead.
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_release_gpu_cycle_counter(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_release_gpu_cycle_counter_nolock - Version of kbase_pm_release_gpu_cycle_counter()
+ *                                             that does not take hwaccess_lock
+ *
+ * Caller must hold the hwaccess_lock.
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_release_gpu_cycle_counter_nolock(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_wait_for_poweroff_complete - Wait for the poweroff workqueue to
+ *                                       complete
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_wait_for_poweroff_complete(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_register_access_enable - Enable access to GPU registers
+ *
+ * Enables access to the GPU registers before power management has powered up
+ * the GPU with kbase_pm_powerup().
+ *
+ * Access to registers should be done using kbase_os_reg_read()/write() at this
+ * stage, not kbase_reg_read()/write().
+ *
+ * This results in the power management callbacks provided in the driver
+ * configuration to get called to turn on power and/or clocks to the GPU. See
+ * kbase_pm_callback_conf.
+ *
+ * This should only be used before power management is powered up with
+ * kbase_pm_powerup()
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_register_access_enable(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_register_access_disable - Disable early register access
+ *
+ * Disables access to the GPU registers enabled earlier by a call to
+ * kbase_pm_register_access_enable().
+ *
+ * This results in the power management callbacks provided in the driver
+ * configuration to get called to turn off power and/or clocks to the GPU. See
+ * kbase_pm_callback_conf
+ *
+ * This should only be used before power management is powered up with
+ * kbase_pm_powerup()
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_register_access_disable(struct kbase_device *kbdev);
+
+/* NOTE: kbase_pm_is_suspending is in mali_kbase.h, because it is an inline
+ * function */
+
+/**
+ * kbase_pm_metrics_is_active - Check if the power management metrics
+ *                              collection is active.
+ *
+ * Note that this returns if the power management metrics collection was
+ * active at the time of calling, it is possible that after the call the metrics
+ * collection enable may have changed state.
+ *
+ * The caller must handle the consequence that the state may have changed.
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ * Return: true if metrics collection was active else false.
+ */
+bool kbase_pm_metrics_is_active(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_do_poweron - Power on the GPU, and any cores that are requested.
+ *
+ * @kbdev:     The kbase device structure for the device (must be a valid
+ *             pointer)
+ * @is_resume: true if power on due to resume after suspend,
+ *             false otherwise
+ */
+void kbase_pm_do_poweron(struct kbase_device *kbdev, bool is_resume);
+
+/**
+ * kbase_pm_do_poweroff - Power off the GPU, and any cores that have been
+ *                        requested.
+ *
+ * @kbdev:      The kbase device structure for the device (must be a valid
+ *              pointer)
+ * @is_suspend: true if power off due to suspend,
+ *              false otherwise
+ */
+void kbase_pm_do_poweroff(struct kbase_device *kbdev, bool is_suspend);
+
+#ifdef CONFIG_PM_DEVFREQ
+void kbase_pm_get_dvfs_utilisation(struct kbase_device *kbdev,
+		unsigned long *total, unsigned long *busy);
+void kbase_pm_reset_dvfs_utilisation(struct kbase_device *kbdev);
+#endif
+
+#ifdef CONFIG_MALI_MIDGARD_DVFS
+
+/**
+ * kbase_platform_dvfs_event - Report utilisation to DVFS code
+ *
+ * Function provided by platform specific code when DVFS is enabled to allow
+ * the power management metrics system to report utilisation.
+ *
+ * @kbdev:         The kbase device structure for the device (must be a
+ *                 valid pointer)
+ * @utilisation:   The current calculated utilisation by the metrics system.
+ * @util_gl_share: The current calculated gl share of utilisation.
+ * @util_cl_share: The current calculated cl share of utilisation per core
+ *                 group.
+ * Return:         Returns 0 on failure and non zero on success.
+ */
+
+int kbase_platform_dvfs_event(struct kbase_device *kbdev, u32 utilisation,
+	u32 util_gl_share, u32 util_cl_share[2]);
+#endif
+
+void kbase_pm_power_changed(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_metrics_update - Inform the metrics system that an atom is either
+ *                           about to be run or has just completed.
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ * @now:   Pointer to the timestamp of the change, or NULL to use current time
+ *
+ * Caller must hold hwaccess_lock
+ */
+void kbase_pm_metrics_update(struct kbase_device *kbdev,
+				ktime_t *now);
+
+/**
+ * kbase_pm_cache_snoop_enable - Allow CPU snoops on the GPU
+ * If the GPU does not have coherency this is a no-op
+ * @kbdev:	Device pointer
+ *
+ * This function should be called after L2 power up.
+ */
+
+void kbase_pm_cache_snoop_enable(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_cache_snoop_disable - Prevent CPU snoops on the GPU
+ * If the GPU does not have coherency this is a no-op
+ * @kbdev:	Device pointer
+ *
+ * This function should be called before L2 power off.
+ */
+void kbase_pm_cache_snoop_disable(struct kbase_device *kbdev);
+
+#endif /* _KBASE_BACKEND_PM_INTERNAL_H_ */
--- a/drivers/gpu/arm/midgard_for_linux/backend/gpu/mali_kbase_pm_metrics.c
+++ b/drivers/gpu/arm/midgard_for_linux/backend/gpu/mali_kbase_pm_metrics.c
@ -0,0 +1,401 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/*
+ * Metrics for power management
+ */
+
+#include <mali_kbase.h>
+#include <mali_kbase_pm.h>
+#include <backend/gpu/mali_kbase_pm_internal.h>
+#include <backend/gpu/mali_kbase_jm_rb.h>
+
+/* When VSync is being hit aim for utilisation between 70-90% */
+#define KBASE_PM_VSYNC_MIN_UTILISATION          70
+#define KBASE_PM_VSYNC_MAX_UTILISATION          90
+/* Otherwise aim for 10-40% */
+#define KBASE_PM_NO_VSYNC_MIN_UTILISATION       10
+#define KBASE_PM_NO_VSYNC_MAX_UTILISATION       40
+
+/* Shift used for kbasep_pm_metrics_data.time_busy/idle - units of (1 << 8) ns
+ * This gives a maximum period between samples of 2^(32+8)/100 ns = slightly
+ * under 11s. Exceeding this will cause overflow */
+#define KBASE_PM_TIME_SHIFT			8
+
+/* Maximum time between sampling of utilization data, without resetting the
+ * counters. */
+#define MALI_UTILIZATION_MAX_PERIOD 100000 /* ns = 100ms */
+
+#ifdef CONFIG_MALI_MIDGARD_DVFS
+static enum hrtimer_restart dvfs_callback(struct hrtimer *timer)
+{
+	unsigned long flags;
+	struct kbasep_pm_metrics_data *metrics;
+
+	KBASE_DEBUG_ASSERT(timer != NULL);
+
+	metrics = container_of(timer, struct kbasep_pm_metrics_data, timer);
+	kbase_pm_get_dvfs_action(metrics->kbdev);
+
+	spin_lock_irqsave(&metrics->lock, flags);
+
+	if (metrics->timer_active)
+		hrtimer_start(timer,
+			HR_TIMER_DELAY_MSEC(metrics->kbdev->pm.dvfs_period),
+			HRTIMER_MODE_REL);
+
+	spin_unlock_irqrestore(&metrics->lock, flags);
+
+	return HRTIMER_NORESTART;
+}
+#endif /* CONFIG_MALI_MIDGARD_DVFS */
+
+int kbasep_pm_metrics_init(struct kbase_device *kbdev)
+{
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	kbdev->pm.backend.metrics.kbdev = kbdev;
+
+	kbdev->pm.backend.metrics.time_period_start = ktime_get();
+	kbdev->pm.backend.metrics.time_busy = 0;
+	kbdev->pm.backend.metrics.time_idle = 0;
+	kbdev->pm.backend.metrics.prev_busy = 0;
+	kbdev->pm.backend.metrics.prev_idle = 0;
+	kbdev->pm.backend.metrics.gpu_active = false;
+	kbdev->pm.backend.metrics.active_cl_ctx[0] = 0;
+	kbdev->pm.backend.metrics.active_cl_ctx[1] = 0;
+	kbdev->pm.backend.metrics.active_gl_ctx[0] = 0;
+	kbdev->pm.backend.metrics.active_gl_ctx[1] = 0;
+	kbdev->pm.backend.metrics.busy_cl[0] = 0;
+	kbdev->pm.backend.metrics.busy_cl[1] = 0;
+	kbdev->pm.backend.metrics.busy_gl = 0;
+
+	spin_lock_init(&kbdev->pm.backend.metrics.lock);
+
+#ifdef CONFIG_MALI_MIDGARD_DVFS
+	kbdev->pm.backend.metrics.timer_active = true;
+	hrtimer_init(&kbdev->pm.backend.metrics.timer, CLOCK_MONOTONIC,
+							HRTIMER_MODE_REL);
+	kbdev->pm.backend.metrics.timer.function = dvfs_callback;
+
+	hrtimer_start(&kbdev->pm.backend.metrics.timer,
+			HR_TIMER_DELAY_MSEC(kbdev->pm.dvfs_period),
+			HRTIMER_MODE_REL);
+#endif /* CONFIG_MALI_MIDGARD_DVFS */
+
+	return 0;
+}
+
+KBASE_EXPORT_TEST_API(kbasep_pm_metrics_init);
+
+void kbasep_pm_metrics_term(struct kbase_device *kbdev)
+{
+#ifdef CONFIG_MALI_MIDGARD_DVFS
+	unsigned long flags;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	spin_lock_irqsave(&kbdev->pm.backend.metrics.lock, flags);
+	kbdev->pm.backend.metrics.timer_active = false;
+	spin_unlock_irqrestore(&kbdev->pm.backend.metrics.lock, flags);
+
+	hrtimer_cancel(&kbdev->pm.backend.metrics.timer);
+#endif /* CONFIG_MALI_MIDGARD_DVFS */
+}
+
+KBASE_EXPORT_TEST_API(kbasep_pm_metrics_term);
+
+/* caller needs to hold kbdev->pm.backend.metrics.lock before calling this
+ * function
+ */
+static void kbase_pm_get_dvfs_utilisation_calc(struct kbase_device *kbdev,
+								ktime_t now)
+{
+	ktime_t diff;
+
+	lockdep_assert_held(&kbdev->pm.backend.metrics.lock);
+
+	diff = ktime_sub(now, kbdev->pm.backend.metrics.time_period_start);
+	if (ktime_to_ns(diff) < 0)
+		return;
+
+	if (kbdev->pm.backend.metrics.gpu_active) {
+		u32 ns_time = (u32) (ktime_to_ns(diff) >> KBASE_PM_TIME_SHIFT);
+
+		kbdev->pm.backend.metrics.time_busy += ns_time;
+		if (kbdev->pm.backend.metrics.active_cl_ctx[0])
+			kbdev->pm.backend.metrics.busy_cl[0] += ns_time;
+		if (kbdev->pm.backend.metrics.active_cl_ctx[1])
+			kbdev->pm.backend.metrics.busy_cl[1] += ns_time;
+		if (kbdev->pm.backend.metrics.active_gl_ctx[0])
+			kbdev->pm.backend.metrics.busy_gl += ns_time;
+		if (kbdev->pm.backend.metrics.active_gl_ctx[1])
+			kbdev->pm.backend.metrics.busy_gl += ns_time;
+	} else {
+		kbdev->pm.backend.metrics.time_idle += (u32) (ktime_to_ns(diff)
+							>> KBASE_PM_TIME_SHIFT);
+	}
+
+	kbdev->pm.backend.metrics.time_period_start = now;
+}
+
+#if defined(CONFIG_PM_DEVFREQ) || defined(CONFIG_MALI_MIDGARD_DVFS)
+/* Caller needs to hold kbdev->pm.backend.metrics.lock before calling this
+ * function.
+ */
+static void kbase_pm_reset_dvfs_utilisation_unlocked(struct kbase_device *kbdev,
+								ktime_t now)
+{
+	/* Store previous value */
+	kbdev->pm.backend.metrics.prev_idle =
+					kbdev->pm.backend.metrics.time_idle;
+	kbdev->pm.backend.metrics.prev_busy =
+					kbdev->pm.backend.metrics.time_busy;
+
+	/* Reset current values */
+	kbdev->pm.backend.metrics.time_period_start = now;
+	kbdev->pm.backend.metrics.time_idle = 0;
+	kbdev->pm.backend.metrics.time_busy = 0;
+	kbdev->pm.backend.metrics.busy_cl[0] = 0;
+	kbdev->pm.backend.metrics.busy_cl[1] = 0;
+	kbdev->pm.backend.metrics.busy_gl = 0;
+}
+
+void kbase_pm_reset_dvfs_utilisation(struct kbase_device *kbdev)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&kbdev->pm.backend.metrics.lock, flags);
+	kbase_pm_reset_dvfs_utilisation_unlocked(kbdev, ktime_get());
+	spin_unlock_irqrestore(&kbdev->pm.backend.metrics.lock, flags);
+}
+
+void kbase_pm_get_dvfs_utilisation(struct kbase_device *kbdev,
+		unsigned long *total_out, unsigned long *busy_out)
+{
+	ktime_t now = ktime_get();
+	unsigned long flags, busy, total;
+
+	spin_lock_irqsave(&kbdev->pm.backend.metrics.lock, flags);
+	kbase_pm_get_dvfs_utilisation_calc(kbdev, now);
+
+	busy = kbdev->pm.backend.metrics.time_busy;
+	total = busy + kbdev->pm.backend.metrics.time_idle;
+
+	/* Reset stats if older than MALI_UTILIZATION_MAX_PERIOD (default
+	 * 100ms) */
+	if (total >= MALI_UTILIZATION_MAX_PERIOD) {
+		kbase_pm_reset_dvfs_utilisation_unlocked(kbdev, now);
+	} else if (total < (MALI_UTILIZATION_MAX_PERIOD / 2)) {
+		total += kbdev->pm.backend.metrics.prev_idle +
+				kbdev->pm.backend.metrics.prev_busy;
+		busy += kbdev->pm.backend.metrics.prev_busy;
+	}
+
+	*total_out = total;
+	*busy_out = busy;
+	spin_unlock_irqrestore(&kbdev->pm.backend.metrics.lock, flags);
+}
+#endif
+
+#ifdef CONFIG_MALI_MIDGARD_DVFS
+
+/* caller needs to hold kbdev->pm.backend.metrics.lock before calling this
+ * function
+ */
+int kbase_pm_get_dvfs_utilisation_old(struct kbase_device *kbdev,
+					int *util_gl_share,
+					int util_cl_share[2],
+					ktime_t now)
+{
+	int utilisation;
+	int busy;
+
+	kbase_pm_get_dvfs_utilisation_calc(kbdev, now);
+
+	if (kbdev->pm.backend.metrics.time_idle +
+				kbdev->pm.backend.metrics.time_busy == 0) {
+		/* No data - so we return NOP */
+		utilisation = -1;
+		if (util_gl_share)
+			*util_gl_share = -1;
+		if (util_cl_share) {
+			util_cl_share[0] = -1;
+			util_cl_share[1] = -1;
+		}
+		goto out;
+	}
+
+	utilisation = (100 * kbdev->pm.backend.metrics.time_busy) /
+			(kbdev->pm.backend.metrics.time_idle +
+			 kbdev->pm.backend.metrics.time_busy);
+
+	busy = kbdev->pm.backend.metrics.busy_gl +
+		kbdev->pm.backend.metrics.busy_cl[0] +
+		kbdev->pm.backend.metrics.busy_cl[1];
+
+	if (busy != 0) {
+		if (util_gl_share)
+			*util_gl_share =
+				(100 * kbdev->pm.backend.metrics.busy_gl) /
+									busy;
+		if (util_cl_share) {
+			util_cl_share[0] =
+				(100 * kbdev->pm.backend.metrics.busy_cl[0]) /
+									busy;
+			util_cl_share[1] =
+				(100 * kbdev->pm.backend.metrics.busy_cl[1]) /
+									busy;
+		}
+	} else {
+		if (util_gl_share)
+			*util_gl_share = -1;
+		if (util_cl_share) {
+			util_cl_share[0] = -1;
+			util_cl_share[1] = -1;
+		}
+	}
+
+out:
+	return utilisation;
+}
+
+void kbase_pm_get_dvfs_action(struct kbase_device *kbdev)
+{
+	unsigned long flags;
+	int utilisation, util_gl_share;
+	int util_cl_share[2];
+	ktime_t now;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	spin_lock_irqsave(&kbdev->pm.backend.metrics.lock, flags);
+
+	now = ktime_get();
+
+	utilisation = kbase_pm_get_dvfs_utilisation_old(kbdev, &util_gl_share,
+			util_cl_share, now);
+
+	if (utilisation < 0 || util_gl_share < 0 || util_cl_share[0] < 0 ||
+							util_cl_share[1] < 0) {
+		utilisation = 0;
+		util_gl_share = 0;
+		util_cl_share[0] = 0;
+		util_cl_share[1] = 0;
+		goto out;
+	}
+
+out:
+#ifdef CONFIG_MALI_MIDGARD_DVFS
+	kbase_platform_dvfs_event(kbdev, utilisation, util_gl_share,
+								util_cl_share);
+#endif				/*CONFIG_MALI_MIDGARD_DVFS */
+
+	kbase_pm_reset_dvfs_utilisation_unlocked(kbdev, now);
+
+	spin_unlock_irqrestore(&kbdev->pm.backend.metrics.lock, flags);
+}
+
+bool kbase_pm_metrics_is_active(struct kbase_device *kbdev)
+{
+	bool isactive;
+	unsigned long flags;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	spin_lock_irqsave(&kbdev->pm.backend.metrics.lock, flags);
+	isactive = kbdev->pm.backend.metrics.timer_active;
+	spin_unlock_irqrestore(&kbdev->pm.backend.metrics.lock, flags);
+
+	return isactive;
+}
+KBASE_EXPORT_TEST_API(kbase_pm_metrics_is_active);
+
+#endif /* CONFIG_MALI_MIDGARD_DVFS */
+
+/**
+ * kbase_pm_metrics_active_calc - Update PM active counts based on currently
+ *                                running atoms
+ * @kbdev: Device pointer
+ *
+ * The caller must hold kbdev->pm.backend.metrics.lock
+ */
+static void kbase_pm_metrics_active_calc(struct kbase_device *kbdev)
+{
+	int js;
+
+	lockdep_assert_held(&kbdev->pm.backend.metrics.lock);
+
+	kbdev->pm.backend.metrics.active_gl_ctx[0] = 0;
+	kbdev->pm.backend.metrics.active_gl_ctx[1] = 0;
+	kbdev->pm.backend.metrics.active_cl_ctx[0] = 0;
+	kbdev->pm.backend.metrics.active_cl_ctx[1] = 0;
+	kbdev->pm.backend.metrics.gpu_active = false;
+
+	for (js = 0; js < BASE_JM_MAX_NR_SLOTS; js++) {
+		struct kbase_jd_atom *katom = kbase_gpu_inspect(kbdev, js, 0);
+
+		/* Head atom may have just completed, so if it isn't running
+		 * then try the next atom */
+		if (katom && katom->gpu_rb_state != KBASE_ATOM_GPU_RB_SUBMITTED)
+			katom = kbase_gpu_inspect(kbdev, js, 1);
+
+		if (katom && katom->gpu_rb_state ==
+				KBASE_ATOM_GPU_RB_SUBMITTED) {
+			if (katom->core_req & BASE_JD_REQ_ONLY_COMPUTE) {
+				int device_nr = (katom->core_req &
+					BASE_JD_REQ_SPECIFIC_COHERENT_GROUP)
+						? katom->device_nr : 0;
+				if (!WARN_ON(device_nr >= 2))
+					kbdev->pm.backend.metrics.
+						active_cl_ctx[device_nr] = 1;
+			} else {
+				/* Slot 2 should not be running non-compute
+				 * atoms */
+				if (!WARN_ON(js >= 2))
+					kbdev->pm.backend.metrics.
+						active_gl_ctx[js] = 1;
+			}
+			kbdev->pm.backend.metrics.gpu_active = true;
+		}
+	}
+}
+
+/* called when job is submitted to or removed from a GPU slot */
+void kbase_pm_metrics_update(struct kbase_device *kbdev, ktime_t *timestamp)
+{
+	unsigned long flags;
+	ktime_t now;
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	spin_lock_irqsave(&kbdev->pm.backend.metrics.lock, flags);
+
+	if (!timestamp) {
+		now = ktime_get();
+		timestamp = &now;
+	}
+
+	/* Track how long CL and/or GL jobs have been busy for */
+	kbase_pm_get_dvfs_utilisation_calc(kbdev, *timestamp);
+
+	kbase_pm_metrics_active_calc(kbdev);
+
+	spin_unlock_irqrestore(&kbdev->pm.backend.metrics.lock, flags);
+}
--- a/drivers/gpu/arm/midgard_for_linux/backend/gpu/mali_kbase_pm_policy.c
+++ b/drivers/gpu/arm/midgard_for_linux/backend/gpu/mali_kbase_pm_policy.c
@ -0,0 +1,969 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/*
+ * Power policy API implementations
+ */
+
+#include <mali_kbase.h>
+#include <mali_midg_regmap.h>
+#include <mali_kbase_pm.h>
+#include <mali_kbase_config_defaults.h>
+#include <backend/gpu/mali_kbase_pm_internal.h>
+
+static const struct kbase_pm_policy *const policy_list[] = {
+#ifdef CONFIG_MALI_NO_MALI
+	&kbase_pm_always_on_policy_ops,
+	&kbase_pm_demand_policy_ops,
+	&kbase_pm_coarse_demand_policy_ops,
+#if !MALI_CUSTOMER_RELEASE
+	&kbase_pm_demand_always_powered_policy_ops,
+	&kbase_pm_fast_start_policy_ops,
+#endif
+#else				/* CONFIG_MALI_NO_MALI */
+	&kbase_pm_demand_policy_ops,
+	&kbase_pm_always_on_policy_ops,
+	&kbase_pm_coarse_demand_policy_ops,
+#if !MALI_CUSTOMER_RELEASE
+	&kbase_pm_demand_always_powered_policy_ops,
+	&kbase_pm_fast_start_policy_ops,
+#endif
+#endif /* CONFIG_MALI_NO_MALI */
+};
+
+/* The number of policies available in the system.
+ * This is derived from the number of functions listed in policy_get_functions.
+ */
+#define POLICY_COUNT (sizeof(policy_list)/sizeof(*policy_list))
+
+
+/* Function IDs for looking up Timeline Trace codes in
+ * kbase_pm_change_state_trace_code */
+enum kbase_pm_func_id {
+	KBASE_PM_FUNC_ID_REQUEST_CORES_START,
+	KBASE_PM_FUNC_ID_REQUEST_CORES_END,
+	KBASE_PM_FUNC_ID_RELEASE_CORES_START,
+	KBASE_PM_FUNC_ID_RELEASE_CORES_END,
+	/* Note: kbase_pm_unrequest_cores() is on the slow path, and we neither
+	 * expect to hit it nor tend to hit it very much anyway. We can detect
+	 * whether we need more instrumentation by a difference between
+	 * PM_CHECKTRANS events and PM_SEND/HANDLE_EVENT. */
+
+	/* Must be the last */
+	KBASE_PM_FUNC_ID_COUNT
+};
+
+
+/* State changes during request/unrequest/release-ing cores */
+enum {
+	KBASE_PM_CHANGE_STATE_SHADER = (1u << 0),
+	KBASE_PM_CHANGE_STATE_TILER  = (1u << 1),
+
+	/* These two must be last */
+	KBASE_PM_CHANGE_STATE_MASK = (KBASE_PM_CHANGE_STATE_TILER |
+						KBASE_PM_CHANGE_STATE_SHADER),
+	KBASE_PM_CHANGE_STATE_COUNT = KBASE_PM_CHANGE_STATE_MASK + 1
+};
+typedef u32 kbase_pm_change_state;
+
+
+#ifdef CONFIG_MALI_TRACE_TIMELINE
+/* Timeline Trace code lookups for each function */
+static u32 kbase_pm_change_state_trace_code[KBASE_PM_FUNC_ID_COUNT]
+					[KBASE_PM_CHANGE_STATE_COUNT] = {
+	/* kbase_pm_request_cores */
+	[KBASE_PM_FUNC_ID_REQUEST_CORES_START][0] = 0,
+	[KBASE_PM_FUNC_ID_REQUEST_CORES_START][KBASE_PM_CHANGE_STATE_SHADER] =
+		SW_FLOW_PM_CHECKTRANS_PM_REQUEST_CORES_SHADER_START,
+	[KBASE_PM_FUNC_ID_REQUEST_CORES_START][KBASE_PM_CHANGE_STATE_TILER] =
+		SW_FLOW_PM_CHECKTRANS_PM_REQUEST_CORES_TILER_START,
+	[KBASE_PM_FUNC_ID_REQUEST_CORES_START][KBASE_PM_CHANGE_STATE_SHADER |
+						KBASE_PM_CHANGE_STATE_TILER] =
+		SW_FLOW_PM_CHECKTRANS_PM_REQUEST_CORES_SHADER_TILER_START,
+
+	[KBASE_PM_FUNC_ID_REQUEST_CORES_END][0] = 0,
+	[KBASE_PM_FUNC_ID_REQUEST_CORES_END][KBASE_PM_CHANGE_STATE_SHADER] =
+		SW_FLOW_PM_CHECKTRANS_PM_REQUEST_CORES_SHADER_END,
+	[KBASE_PM_FUNC_ID_REQUEST_CORES_END][KBASE_PM_CHANGE_STATE_TILER] =
+		SW_FLOW_PM_CHECKTRANS_PM_REQUEST_CORES_TILER_END,
+	[KBASE_PM_FUNC_ID_REQUEST_CORES_END][KBASE_PM_CHANGE_STATE_SHADER |
+						KBASE_PM_CHANGE_STATE_TILER] =
+		SW_FLOW_PM_CHECKTRANS_PM_REQUEST_CORES_SHADER_TILER_END,
+
+	/* kbase_pm_release_cores */
+	[KBASE_PM_FUNC_ID_RELEASE_CORES_START][0] = 0,
+	[KBASE_PM_FUNC_ID_RELEASE_CORES_START][KBASE_PM_CHANGE_STATE_SHADER] =
+		SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_SHADER_START,
+	[KBASE_PM_FUNC_ID_RELEASE_CORES_START][KBASE_PM_CHANGE_STATE_TILER] =
+		SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_TILER_START,
+	[KBASE_PM_FUNC_ID_RELEASE_CORES_START][KBASE_PM_CHANGE_STATE_SHADER |
+						KBASE_PM_CHANGE_STATE_TILER] =
+		SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_SHADER_TILER_START,
+
+	[KBASE_PM_FUNC_ID_RELEASE_CORES_END][0] = 0,
+	[KBASE_PM_FUNC_ID_RELEASE_CORES_END][KBASE_PM_CHANGE_STATE_SHADER] =
+		SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_SHADER_END,
+	[KBASE_PM_FUNC_ID_RELEASE_CORES_END][KBASE_PM_CHANGE_STATE_TILER] =
+		SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_TILER_END,
+	[KBASE_PM_FUNC_ID_RELEASE_CORES_END][KBASE_PM_CHANGE_STATE_SHADER |
+						KBASE_PM_CHANGE_STATE_TILER] =
+		SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_SHADER_TILER_END
+};
+
+static inline void kbase_timeline_pm_cores_func(struct kbase_device *kbdev,
+		enum kbase_pm_func_id func_id,
+		kbase_pm_change_state state)
+{
+	int trace_code;
+
+	KBASE_DEBUG_ASSERT(func_id >= 0 && func_id < KBASE_PM_FUNC_ID_COUNT);
+	KBASE_DEBUG_ASSERT(state != 0 && (state & KBASE_PM_CHANGE_STATE_MASK) ==
+									state);
+
+	trace_code = kbase_pm_change_state_trace_code[func_id][state];
+	KBASE_TIMELINE_PM_CHECKTRANS(kbdev, trace_code);
+}
+
+#else /* CONFIG_MALI_TRACE_TIMELINE */
+static inline void kbase_timeline_pm_cores_func(struct kbase_device *kbdev,
+		enum kbase_pm_func_id func_id, kbase_pm_change_state state)
+{
+}
+
+#endif /* CONFIG_MALI_TRACE_TIMELINE */
+
+/**
+ * kbasep_pm_do_poweroff_cores - Process a poweroff request and power down any
+ *                               requested shader cores
+ * @kbdev: Device pointer
+ */
+static void kbasep_pm_do_poweroff_cores(struct kbase_device *kbdev)
+{
+	u64 prev_shader_state = kbdev->pm.backend.desired_shader_state;
+	u64 prev_tiler_state = kbdev->pm.backend.desired_tiler_state;
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	kbdev->pm.backend.desired_shader_state &=
+			~kbdev->pm.backend.shader_poweroff_pending;
+	kbdev->pm.backend.desired_tiler_state &=
+			~kbdev->pm.backend.tiler_poweroff_pending;
+
+	kbdev->pm.backend.shader_poweroff_pending = 0;
+	kbdev->pm.backend.tiler_poweroff_pending = 0;
+
+	if (prev_shader_state != kbdev->pm.backend.desired_shader_state ||
+			prev_tiler_state !=
+				kbdev->pm.backend.desired_tiler_state ||
+			kbdev->pm.backend.ca_in_transition) {
+		bool cores_are_available;
+
+		KBASE_TIMELINE_PM_CHECKTRANS(kbdev,
+			SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_DEFERRED_START);
+		cores_are_available = kbase_pm_check_transitions_nolock(kbdev);
+		KBASE_TIMELINE_PM_CHECKTRANS(kbdev,
+			SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_DEFERRED_END);
+
+		/* Don't need 'cores_are_available',
+		 * because we don't return anything */
+		CSTD_UNUSED(cores_are_available);
+	}
+}
+
+static enum hrtimer_restart
+kbasep_pm_do_gpu_poweroff_callback(struct hrtimer *timer)
+{
+	struct kbase_device *kbdev;
+	unsigned long flags;
+
+	kbdev = container_of(timer, struct kbase_device,
+						pm.backend.gpu_poweroff_timer);
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+
+	/* It is safe for this call to do nothing if the work item is already
+	 * queued. The worker function will read the must up-to-date state of
+	 * kbdev->pm.backend.gpu_poweroff_pending under lock.
+	 *
+	 * If a state change occurs while the worker function is processing,
+	 * this call will succeed as a work item can be requeued once it has
+	 * started processing.
+	 */
+	if (kbdev->pm.backend.gpu_poweroff_pending)
+		queue_work(kbdev->pm.backend.gpu_poweroff_wq,
+					&kbdev->pm.backend.gpu_poweroff_work);
+
+	if (kbdev->pm.backend.shader_poweroff_pending ||
+			kbdev->pm.backend.tiler_poweroff_pending) {
+		kbdev->pm.backend.shader_poweroff_pending_time--;
+
+		KBASE_DEBUG_ASSERT(
+				kbdev->pm.backend.shader_poweroff_pending_time
+									>= 0);
+
+		if (!kbdev->pm.backend.shader_poweroff_pending_time)
+			kbasep_pm_do_poweroff_cores(kbdev);
+	}
+
+	if (kbdev->pm.backend.poweroff_timer_needed) {
+		spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+		hrtimer_add_expires(timer, kbdev->pm.gpu_poweroff_time);
+
+		return HRTIMER_RESTART;
+	}
+
+	kbdev->pm.backend.poweroff_timer_running = false;
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+	return HRTIMER_NORESTART;
+}
+
+static void kbasep_pm_do_gpu_poweroff_wq(struct work_struct *data)
+{
+	unsigned long flags;
+	struct kbase_device *kbdev;
+	bool do_poweroff = false;
+
+	kbdev = container_of(data, struct kbase_device,
+						pm.backend.gpu_poweroff_work);
+
+	mutex_lock(&kbdev->pm.lock);
+
+	if (kbdev->pm.backend.gpu_poweroff_pending == 0) {
+		mutex_unlock(&kbdev->pm.lock);
+		return;
+	}
+
+	kbdev->pm.backend.gpu_poweroff_pending--;
+
+	if (kbdev->pm.backend.gpu_poweroff_pending > 0) {
+		mutex_unlock(&kbdev->pm.lock);
+		return;
+	}
+
+	KBASE_DEBUG_ASSERT(kbdev->pm.backend.gpu_poweroff_pending == 0);
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+
+	/* Only power off the GPU if a request is still pending */
+	if (!kbdev->pm.backend.pm_current_policy->get_core_active(kbdev))
+		do_poweroff = true;
+
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+	if (do_poweroff) {
+		kbdev->pm.backend.poweroff_timer_needed = false;
+		hrtimer_cancel(&kbdev->pm.backend.gpu_poweroff_timer);
+		kbdev->pm.backend.poweroff_timer_running = false;
+
+		/* Power off the GPU */
+		kbase_pm_do_poweroff(kbdev, false);
+	}
+
+	mutex_unlock(&kbdev->pm.lock);
+}
+
+int kbase_pm_policy_init(struct kbase_device *kbdev)
+{
+	struct workqueue_struct *wq;
+
+	wq = alloc_workqueue("kbase_pm_do_poweroff",
+			WQ_HIGHPRI | WQ_UNBOUND, 1);
+	if (!wq)
+		return -ENOMEM;
+
+	kbdev->pm.backend.gpu_poweroff_wq = wq;
+	INIT_WORK(&kbdev->pm.backend.gpu_poweroff_work,
+			kbasep_pm_do_gpu_poweroff_wq);
+	hrtimer_init(&kbdev->pm.backend.gpu_poweroff_timer,
+			CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+	kbdev->pm.backend.gpu_poweroff_timer.function =
+			kbasep_pm_do_gpu_poweroff_callback;
+	kbdev->pm.backend.pm_current_policy = policy_list[0];
+	kbdev->pm.backend.pm_current_policy->init(kbdev);
+	kbdev->pm.gpu_poweroff_time =
+			HR_TIMER_DELAY_NSEC(DEFAULT_PM_GPU_POWEROFF_TICK_NS);
+	kbdev->pm.poweroff_shader_ticks = DEFAULT_PM_POWEROFF_TICK_SHADER;
+	kbdev->pm.poweroff_gpu_ticks = DEFAULT_PM_POWEROFF_TICK_GPU;
+
+	return 0;
+}
+
+void kbase_pm_policy_term(struct kbase_device *kbdev)
+{
+	kbdev->pm.backend.pm_current_policy->term(kbdev);
+	destroy_workqueue(kbdev->pm.backend.gpu_poweroff_wq);
+}
+
+void kbase_pm_cancel_deferred_poweroff(struct kbase_device *kbdev)
+{
+	unsigned long flags;
+
+	lockdep_assert_held(&kbdev->pm.lock);
+
+	kbdev->pm.backend.poweroff_timer_needed = false;
+	hrtimer_cancel(&kbdev->pm.backend.gpu_poweroff_timer);
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	kbdev->pm.backend.poweroff_timer_running = false;
+
+	/* If wq is already running but is held off by pm.lock, make sure it has
+	 * no effect */
+	kbdev->pm.backend.gpu_poweroff_pending = 0;
+
+	kbdev->pm.backend.shader_poweroff_pending = 0;
+	kbdev->pm.backend.tiler_poweroff_pending = 0;
+	kbdev->pm.backend.shader_poweroff_pending_time = 0;
+
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+}
+
+void kbase_pm_update_active(struct kbase_device *kbdev)
+{
+	struct kbase_pm_device_data *pm = &kbdev->pm;
+	struct kbase_pm_backend_data *backend = &pm->backend;
+	unsigned long flags;
+	bool active;
+
+	lockdep_assert_held(&pm->lock);
+
+	/* pm_current_policy will never be NULL while pm.lock is held */
+	KBASE_DEBUG_ASSERT(backend->pm_current_policy);
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+
+	active = backend->pm_current_policy->get_core_active(kbdev);
+
+	if (active) {
+		if (backend->gpu_poweroff_pending) {
+			/* Cancel any pending power off request */
+			backend->gpu_poweroff_pending = 0;
+
+			/* If a request was pending then the GPU was still
+			 * powered, so no need to continue */
+			if (!kbdev->poweroff_pending) {
+				spin_unlock_irqrestore(&kbdev->hwaccess_lock,
+						flags);
+				return;
+			}
+		}
+
+		if (!backend->poweroff_timer_running && !backend->gpu_powered &&
+				(pm->poweroff_gpu_ticks ||
+				pm->poweroff_shader_ticks)) {
+			backend->poweroff_timer_needed = true;
+			backend->poweroff_timer_running = true;
+			hrtimer_start(&backend->gpu_poweroff_timer,
+					pm->gpu_poweroff_time,
+					HRTIMER_MODE_REL);
+		}
+
+		/* Power on the GPU and any cores requested by the policy */
+		if (pm->backend.poweroff_wait_in_progress) {
+			pm->backend.poweron_required = true;
+			spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+		} else {
+			spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+			kbase_pm_do_poweron(kbdev, false);
+		}
+	} else {
+		/* It is an error for the power policy to power off the GPU
+		 * when there are contexts active */
+		KBASE_DEBUG_ASSERT(pm->active_count == 0);
+
+		if (backend->shader_poweroff_pending ||
+				backend->tiler_poweroff_pending) {
+			backend->shader_poweroff_pending = 0;
+			backend->tiler_poweroff_pending = 0;
+			backend->shader_poweroff_pending_time = 0;
+		}
+
+		/* Request power off */
+		if (pm->backend.gpu_powered) {
+			if (pm->poweroff_gpu_ticks) {
+				backend->gpu_poweroff_pending =
+						pm->poweroff_gpu_ticks;
+				backend->poweroff_timer_needed = true;
+				if (!backend->poweroff_timer_running) {
+					/* Start timer if not running (eg if
+					 * power policy has been changed from
+					 * always_on to something else). This
+					 * will ensure the GPU is actually
+					 * powered off */
+					backend->poweroff_timer_running
+							= true;
+					hrtimer_start(
+						&backend->gpu_poweroff_timer,
+						pm->gpu_poweroff_time,
+						HRTIMER_MODE_REL);
+				}
+				spin_unlock_irqrestore(&kbdev->hwaccess_lock,
+						flags);
+			} else {
+				spin_unlock_irqrestore(&kbdev->hwaccess_lock,
+						flags);
+
+				/* Power off the GPU immediately */
+				kbase_pm_do_poweroff(kbdev, false);
+			}
+		} else {
+			spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+		}
+	}
+}
+
+void kbase_pm_update_cores_state_nolock(struct kbase_device *kbdev)
+{
+	u64 desired_bitmap;
+	u64 desired_tiler_bitmap;
+	bool cores_are_available;
+	bool do_poweroff = false;
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	if (kbdev->pm.backend.pm_current_policy == NULL)
+		return;
+	if (kbdev->pm.backend.poweroff_wait_in_progress)
+		return;
+
+	if (kbdev->protected_mode_transition &&	!kbdev->shader_needed_bitmap &&
+			!kbdev->shader_inuse_bitmap && !kbdev->tiler_needed_cnt
+			&& !kbdev->tiler_inuse_cnt) {
+		/* We are trying to change in/out of protected mode - force all
+		 * cores off so that the L2 powers down */
+		desired_bitmap = 0;
+		desired_tiler_bitmap = 0;
+	} else {
+		desired_bitmap =
+		kbdev->pm.backend.pm_current_policy->get_core_mask(kbdev);
+		desired_bitmap &= kbase_pm_ca_get_core_mask(kbdev);
+
+		if (kbdev->tiler_needed_cnt > 0 || kbdev->tiler_inuse_cnt > 0)
+			desired_tiler_bitmap = 1;
+		else
+			desired_tiler_bitmap = 0;
+
+		if (!kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_XAFFINITY)) {
+			/* Unless XAFFINITY is supported, enable core 0 if tiler
+			 * required, regardless of core availability */
+			if (kbdev->tiler_needed_cnt > 0 ||
+					kbdev->tiler_inuse_cnt > 0)
+				desired_bitmap |= 1;
+		}
+	}
+
+	if (kbdev->pm.backend.desired_shader_state != desired_bitmap)
+		KBASE_TRACE_ADD(kbdev, PM_CORES_CHANGE_DESIRED, NULL, NULL, 0u,
+							(u32)desired_bitmap);
+	/* Are any cores being powered on? */
+	if (~kbdev->pm.backend.desired_shader_state & desired_bitmap ||
+	    ~kbdev->pm.backend.desired_tiler_state & desired_tiler_bitmap ||
+	    kbdev->pm.backend.ca_in_transition) {
+		/* Check if we are powering off any cores before updating shader
+		 * state */
+		if (kbdev->pm.backend.desired_shader_state & ~desired_bitmap ||
+				kbdev->pm.backend.desired_tiler_state &
+				~desired_tiler_bitmap) {
+			/* Start timer to power off cores */
+			kbdev->pm.backend.shader_poweroff_pending |=
+				(kbdev->pm.backend.desired_shader_state &
+							~desired_bitmap);
+			kbdev->pm.backend.tiler_poweroff_pending |=
+				(kbdev->pm.backend.desired_tiler_state &
+							~desired_tiler_bitmap);
+
+			if (kbdev->pm.poweroff_shader_ticks &&
+					!kbdev->protected_mode_transition)
+				kbdev->pm.backend.shader_poweroff_pending_time =
+						kbdev->pm.poweroff_shader_ticks;
+			else
+				do_poweroff = true;
+		}
+
+		kbdev->pm.backend.desired_shader_state = desired_bitmap;
+		kbdev->pm.backend.desired_tiler_state = desired_tiler_bitmap;
+
+		/* If any cores are being powered on, transition immediately */
+		cores_are_available = kbase_pm_check_transitions_nolock(kbdev);
+	} else if (kbdev->pm.backend.desired_shader_state & ~desired_bitmap ||
+				kbdev->pm.backend.desired_tiler_state &
+				~desired_tiler_bitmap) {
+		/* Start timer to power off cores */
+		kbdev->pm.backend.shader_poweroff_pending |=
+				(kbdev->pm.backend.desired_shader_state &
+							~desired_bitmap);
+		kbdev->pm.backend.tiler_poweroff_pending |=
+				(kbdev->pm.backend.desired_tiler_state &
+							~desired_tiler_bitmap);
+		if (kbdev->pm.poweroff_shader_ticks &&
+				!kbdev->protected_mode_transition)
+			kbdev->pm.backend.shader_poweroff_pending_time =
+					kbdev->pm.poweroff_shader_ticks;
+		else
+			kbasep_pm_do_poweroff_cores(kbdev);
+	} else if (kbdev->pm.active_count == 0 && desired_bitmap != 0 &&
+			desired_tiler_bitmap != 0 &&
+			kbdev->pm.backend.poweroff_timer_needed) {
+		/* If power policy is keeping cores on despite there being no
+		 * active contexts then disable poweroff timer as it isn't
+		 * required.
+		 * Only reset poweroff_timer_needed if we're not in the middle
+		 * of the power off callback */
+		kbdev->pm.backend.poweroff_timer_needed = false;
+	}
+
+	/* Ensure timer does not power off wanted cores and make sure to power
+	 * off unwanted cores */
+	if (kbdev->pm.backend.shader_poweroff_pending ||
+			kbdev->pm.backend.tiler_poweroff_pending) {
+		kbdev->pm.backend.shader_poweroff_pending &=
+				~(kbdev->pm.backend.desired_shader_state &
+								desired_bitmap);
+		kbdev->pm.backend.tiler_poweroff_pending &=
+				~(kbdev->pm.backend.desired_tiler_state &
+				desired_tiler_bitmap);
+
+		if (!kbdev->pm.backend.shader_poweroff_pending &&
+				!kbdev->pm.backend.tiler_poweroff_pending)
+			kbdev->pm.backend.shader_poweroff_pending_time = 0;
+	}
+
+	/* Shader poweroff is deferred to the end of the function, to eliminate
+	 * issues caused by the core availability policy recursing into this
+	 * function */
+	if (do_poweroff)
+		kbasep_pm_do_poweroff_cores(kbdev);
+
+	/* Don't need 'cores_are_available', because we don't return anything */
+	CSTD_UNUSED(cores_are_available);
+}
+
+void kbase_pm_update_cores_state(struct kbase_device *kbdev)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+
+	kbase_pm_update_cores_state_nolock(kbdev);
+
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+}
+
+int kbase_pm_list_policies(const struct kbase_pm_policy * const **list)
+{
+	if (!list)
+		return POLICY_COUNT;
+
+	*list = policy_list;
+
+	return POLICY_COUNT;
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_list_policies);
+
+const struct kbase_pm_policy *kbase_pm_get_policy(struct kbase_device *kbdev)
+{
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	return kbdev->pm.backend.pm_current_policy;
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_get_policy);
+
+void kbase_pm_set_policy(struct kbase_device *kbdev,
+				const struct kbase_pm_policy *new_policy)
+{
+	struct kbasep_js_device_data *js_devdata = &kbdev->js_data;
+	const struct kbase_pm_policy *old_policy;
+	unsigned long flags;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(new_policy != NULL);
+
+	KBASE_TRACE_ADD(kbdev, PM_SET_POLICY, NULL, NULL, 0u, new_policy->id);
+
+	/* During a policy change we pretend the GPU is active */
+	/* A suspend won't happen here, because we're in a syscall from a
+	 * userspace thread */
+	kbase_pm_context_active(kbdev);
+
+	mutex_lock(&js_devdata->runpool_mutex);
+	mutex_lock(&kbdev->pm.lock);
+
+	/* Remove the policy to prevent IRQ handlers from working on it */
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	old_policy = kbdev->pm.backend.pm_current_policy;
+	kbdev->pm.backend.pm_current_policy = NULL;
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+	KBASE_TRACE_ADD(kbdev, PM_CURRENT_POLICY_TERM, NULL, NULL, 0u,
+								old_policy->id);
+	if (old_policy->term)
+		old_policy->term(kbdev);
+
+	KBASE_TRACE_ADD(kbdev, PM_CURRENT_POLICY_INIT, NULL, NULL, 0u,
+								new_policy->id);
+	if (new_policy->init)
+		new_policy->init(kbdev);
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	kbdev->pm.backend.pm_current_policy = new_policy;
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+	/* If any core power state changes were previously attempted, but
+	 * couldn't be made because the policy was changing (current_policy was
+	 * NULL), then re-try them here. */
+	kbase_pm_update_active(kbdev);
+	kbase_pm_update_cores_state(kbdev);
+
+	mutex_unlock(&kbdev->pm.lock);
+	mutex_unlock(&js_devdata->runpool_mutex);
+
+	/* Now the policy change is finished, we release our fake context active
+	 * reference */
+	kbase_pm_context_idle(kbdev);
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_set_policy);
+
+/* Check whether a state change has finished, and trace it as completed */
+static void
+kbase_pm_trace_check_and_finish_state_change(struct kbase_device *kbdev)
+{
+	if ((kbdev->shader_available_bitmap &
+					kbdev->pm.backend.desired_shader_state)
+				== kbdev->pm.backend.desired_shader_state &&
+		(kbdev->tiler_available_bitmap &
+					kbdev->pm.backend.desired_tiler_state)
+				== kbdev->pm.backend.desired_tiler_state)
+		kbase_timeline_pm_check_handle_event(kbdev,
+				KBASE_TIMELINE_PM_EVENT_GPU_STATE_CHANGED);
+}
+
+void kbase_pm_request_cores(struct kbase_device *kbdev,
+				bool tiler_required, u64 shader_cores)
+{
+	u64 cores;
+
+	kbase_pm_change_state change_gpu_state = 0u;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	cores = shader_cores;
+	while (cores) {
+		int bitnum = fls64(cores) - 1;
+		u64 bit = 1ULL << bitnum;
+
+		/* It should be almost impossible for this to overflow. It would
+		 * require 2^32 atoms to request a particular core, which would
+		 * require 2^24 contexts to submit. This would require an amount
+		 * of memory that is impossible on a 32-bit system and extremely
+		 * unlikely on a 64-bit system. */
+		int cnt = ++kbdev->shader_needed_cnt[bitnum];
+
+		if (1 == cnt) {
+			kbdev->shader_needed_bitmap |= bit;
+			change_gpu_state |= KBASE_PM_CHANGE_STATE_SHADER;
+		}
+
+		cores &= ~bit;
+	}
+
+	if (tiler_required) {
+		int cnt = ++kbdev->tiler_needed_cnt;
+
+		if (1 == cnt)
+			change_gpu_state |= KBASE_PM_CHANGE_STATE_TILER;
+
+		KBASE_DEBUG_ASSERT(kbdev->tiler_needed_cnt != 0);
+	}
+
+	if (change_gpu_state) {
+		KBASE_TRACE_ADD(kbdev, PM_REQUEST_CHANGE_SHADER_NEEDED, NULL,
+				NULL, 0u, (u32) kbdev->shader_needed_bitmap);
+
+		kbase_timeline_pm_cores_func(kbdev,
+					KBASE_PM_FUNC_ID_REQUEST_CORES_START,
+							change_gpu_state);
+		kbase_pm_update_cores_state_nolock(kbdev);
+		kbase_timeline_pm_cores_func(kbdev,
+					KBASE_PM_FUNC_ID_REQUEST_CORES_END,
+							change_gpu_state);
+	}
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_request_cores);
+
+void kbase_pm_unrequest_cores(struct kbase_device *kbdev,
+				bool tiler_required, u64 shader_cores)
+{
+	kbase_pm_change_state change_gpu_state = 0u;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	while (shader_cores) {
+		int bitnum = fls64(shader_cores) - 1;
+		u64 bit = 1ULL << bitnum;
+		int cnt;
+
+		KBASE_DEBUG_ASSERT(kbdev->shader_needed_cnt[bitnum] > 0);
+
+		cnt = --kbdev->shader_needed_cnt[bitnum];
+
+		if (0 == cnt) {
+			kbdev->shader_needed_bitmap &= ~bit;
+
+			change_gpu_state |= KBASE_PM_CHANGE_STATE_SHADER;
+		}
+
+		shader_cores &= ~bit;
+	}
+
+	if (tiler_required) {
+		int cnt;
+
+		KBASE_DEBUG_ASSERT(kbdev->tiler_needed_cnt > 0);
+
+		cnt = --kbdev->tiler_needed_cnt;
+
+		if (0 == cnt)
+			change_gpu_state |= KBASE_PM_CHANGE_STATE_TILER;
+	}
+
+	if (change_gpu_state) {
+		KBASE_TRACE_ADD(kbdev, PM_UNREQUEST_CHANGE_SHADER_NEEDED, NULL,
+				NULL, 0u, (u32) kbdev->shader_needed_bitmap);
+
+		kbase_pm_update_cores_state_nolock(kbdev);
+
+		/* Trace that any state change effectively completes immediately
+		 * - no-one will wait on the state change */
+		kbase_pm_trace_check_and_finish_state_change(kbdev);
+	}
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_unrequest_cores);
+
+enum kbase_pm_cores_ready
+kbase_pm_register_inuse_cores(struct kbase_device *kbdev,
+				bool tiler_required, u64 shader_cores)
+{
+	u64 prev_shader_needed;	/* Just for tracing */
+	u64 prev_shader_inuse;	/* Just for tracing */
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	prev_shader_needed = kbdev->shader_needed_bitmap;
+	prev_shader_inuse = kbdev->shader_inuse_bitmap;
+
+	/* If desired_shader_state does not contain the requested cores, then
+	 * power management is not attempting to powering those cores (most
+	 * likely due to core availability policy) and a new job affinity must
+	 * be chosen */
+	if ((kbdev->pm.backend.desired_shader_state & shader_cores) !=
+							shader_cores) {
+		return (kbdev->pm.backend.poweroff_wait_in_progress ||
+				kbdev->pm.backend.pm_current_policy == NULL) ?
+				KBASE_CORES_NOT_READY : KBASE_NEW_AFFINITY;
+	}
+
+	if ((kbdev->shader_available_bitmap & shader_cores) != shader_cores ||
+	    (tiler_required && !kbdev->tiler_available_bitmap)) {
+		/* Trace ongoing core transition */
+		kbase_timeline_pm_l2_transition_start(kbdev);
+		return KBASE_CORES_NOT_READY;
+	}
+
+	/* If we started to trace a state change, then trace it has being
+	 * finished by now, at the very latest */
+	kbase_pm_trace_check_and_finish_state_change(kbdev);
+	/* Trace core transition done */
+	kbase_timeline_pm_l2_transition_done(kbdev);
+
+	while (shader_cores) {
+		int bitnum = fls64(shader_cores) - 1;
+		u64 bit = 1ULL << bitnum;
+		int cnt;
+
+		KBASE_DEBUG_ASSERT(kbdev->shader_needed_cnt[bitnum] > 0);
+
+		cnt = --kbdev->shader_needed_cnt[bitnum];
+
+		if (0 == cnt)
+			kbdev->shader_needed_bitmap &= ~bit;
+
+		/* shader_inuse_cnt should not overflow because there can only
+		 * be a very limited number of jobs on the h/w at one time */
+
+		kbdev->shader_inuse_cnt[bitnum]++;
+		kbdev->shader_inuse_bitmap |= bit;
+
+		shader_cores &= ~bit;
+	}
+
+	if (tiler_required) {
+		KBASE_DEBUG_ASSERT(kbdev->tiler_needed_cnt > 0);
+
+		--kbdev->tiler_needed_cnt;
+
+		kbdev->tiler_inuse_cnt++;
+
+		KBASE_DEBUG_ASSERT(kbdev->tiler_inuse_cnt != 0);
+	}
+
+	if (prev_shader_needed != kbdev->shader_needed_bitmap)
+		KBASE_TRACE_ADD(kbdev, PM_REGISTER_CHANGE_SHADER_NEEDED, NULL,
+				NULL, 0u, (u32) kbdev->shader_needed_bitmap);
+
+	if (prev_shader_inuse != kbdev->shader_inuse_bitmap)
+		KBASE_TRACE_ADD(kbdev, PM_REGISTER_CHANGE_SHADER_INUSE, NULL,
+				NULL, 0u, (u32) kbdev->shader_inuse_bitmap);
+
+	return KBASE_CORES_READY;
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_register_inuse_cores);
+
+void kbase_pm_release_cores(struct kbase_device *kbdev,
+				bool tiler_required, u64 shader_cores)
+{
+	kbase_pm_change_state change_gpu_state = 0u;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	while (shader_cores) {
+		int bitnum = fls64(shader_cores) - 1;
+		u64 bit = 1ULL << bitnum;
+		int cnt;
+
+		KBASE_DEBUG_ASSERT(kbdev->shader_inuse_cnt[bitnum] > 0);
+
+		cnt = --kbdev->shader_inuse_cnt[bitnum];
+
+		if (0 == cnt) {
+			kbdev->shader_inuse_bitmap &= ~bit;
+			change_gpu_state |= KBASE_PM_CHANGE_STATE_SHADER;
+		}
+
+		shader_cores &= ~bit;
+	}
+
+	if (tiler_required) {
+		int cnt;
+
+		KBASE_DEBUG_ASSERT(kbdev->tiler_inuse_cnt > 0);
+
+		cnt = --kbdev->tiler_inuse_cnt;
+
+		if (0 == cnt)
+			change_gpu_state |= KBASE_PM_CHANGE_STATE_TILER;
+	}
+
+	if (change_gpu_state) {
+		KBASE_TRACE_ADD(kbdev, PM_RELEASE_CHANGE_SHADER_INUSE, NULL,
+				NULL, 0u, (u32) kbdev->shader_inuse_bitmap);
+
+		kbase_timeline_pm_cores_func(kbdev,
+					KBASE_PM_FUNC_ID_RELEASE_CORES_START,
+							change_gpu_state);
+		kbase_pm_update_cores_state_nolock(kbdev);
+		kbase_timeline_pm_cores_func(kbdev,
+					KBASE_PM_FUNC_ID_RELEASE_CORES_END,
+							change_gpu_state);
+
+		/* Trace that any state change completed immediately */
+		kbase_pm_trace_check_and_finish_state_change(kbdev);
+	}
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_release_cores);
+
+void kbase_pm_request_cores_sync(struct kbase_device *kbdev,
+					bool tiler_required,
+					u64 shader_cores)
+{
+	unsigned long flags;
+
+	kbase_pm_wait_for_poweroff_complete(kbdev);
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	kbase_pm_request_cores(kbdev, tiler_required, shader_cores);
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+	kbase_pm_check_transitions_sync(kbdev);
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_request_cores_sync);
+
+void kbase_pm_request_l2_caches(struct kbase_device *kbdev)
+{
+	unsigned long flags;
+	u32 prior_l2_users_count;
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+
+	prior_l2_users_count = kbdev->l2_users_count++;
+
+	KBASE_DEBUG_ASSERT(kbdev->l2_users_count != 0);
+
+	/* if the GPU is reset while the l2 is on, l2 will be off but
+	 * prior_l2_users_count will be > 0. l2_available_bitmap will have been
+	 * set to 0 though by kbase_pm_init_hw */
+	if (!prior_l2_users_count || !kbdev->l2_available_bitmap)
+		kbase_pm_check_transitions_nolock(kbdev);
+
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+	wait_event(kbdev->pm.backend.l2_powered_wait,
+					kbdev->pm.backend.l2_powered == 1);
+
+	/* Trace that any state change completed immediately */
+	kbase_pm_trace_check_and_finish_state_change(kbdev);
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_request_l2_caches);
+
+void kbase_pm_request_l2_caches_l2_is_on(struct kbase_device *kbdev)
+{
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	kbdev->l2_users_count++;
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_request_l2_caches_l2_is_on);
+
+void kbase_pm_release_l2_caches(struct kbase_device *kbdev)
+{
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	KBASE_DEBUG_ASSERT(kbdev->l2_users_count > 0);
+
+	--kbdev->l2_users_count;
+
+	if (!kbdev->l2_users_count) {
+		kbase_pm_check_transitions_nolock(kbdev);
+		/* Trace that any state change completed immediately */
+		kbase_pm_trace_check_and_finish_state_change(kbdev);
+	}
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_release_l2_caches);
--- a/drivers/gpu/arm/midgard_for_linux/backend/gpu/mali_kbase_pm_policy.h
+++ b/drivers/gpu/arm/midgard_for_linux/backend/gpu/mali_kbase_pm_policy.h
@ -0,0 +1,227 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/*
+ * Power policy API definitions
+ */
+
+#ifndef _KBASE_PM_POLICY_H_
+#define _KBASE_PM_POLICY_H_
+
+/**
+ * kbase_pm_policy_init - Initialize power policy framework
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ *
+ * Must be called before calling any other policy function
+ *
+ * Return: 0 if the power policy framework was successfully
+ *         initialized, -errno otherwise.
+ */
+int kbase_pm_policy_init(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_policy_term - Terminate power policy framework
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_policy_term(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_update_active - Update the active power state of the GPU
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ *
+ * Calls into the current power policy
+ */
+void kbase_pm_update_active(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_update_cores - Update the desired core state of the GPU
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ *
+ * Calls into the current power policy
+ */
+void kbase_pm_update_cores(struct kbase_device *kbdev);
+
+
+enum kbase_pm_cores_ready {
+	KBASE_CORES_NOT_READY = 0,
+	KBASE_NEW_AFFINITY = 1,
+	KBASE_CORES_READY = 2
+};
+
+
+/**
+ * kbase_pm_request_cores_sync - Synchronous variant of kbase_pm_request_cores()
+ *
+ * @kbdev:          The kbase device structure for the device
+ * @tiler_required: true if the tiler is required, false otherwise
+ * @shader_cores:   A bitmask of shader cores which are necessary for the job
+ *
+ * When this function returns, the @shader_cores will be in the READY state.
+ *
+ * This is safe variant of kbase_pm_check_transitions_sync(): it handles the
+ * work of ensuring the requested cores will remain powered until a matching
+ * call to kbase_pm_unrequest_cores()/kbase_pm_release_cores() (as appropriate)
+ * is made.
+ */
+void kbase_pm_request_cores_sync(struct kbase_device *kbdev,
+				bool tiler_required, u64 shader_cores);
+
+/**
+ * kbase_pm_request_cores - Mark one or more cores as being required
+ *                          for jobs to be submitted
+ *
+ * @kbdev:          The kbase device structure for the device
+ * @tiler_required: true if the tiler is required, false otherwise
+ * @shader_cores:   A bitmask of shader cores which are necessary for the job
+ *
+ * This function is called by the job scheduler to mark one or more cores as
+ * being required to submit jobs that are ready to run.
+ *
+ * The cores requested are reference counted and a subsequent call to
+ * kbase_pm_register_inuse_cores() or kbase_pm_unrequest_cores() should be
+ * made to dereference the cores as being 'needed'.
+ *
+ * The active power policy will meet or exceed the requirements of the
+ * requested cores in the system. Any core transitions needed will be begun
+ * immediately, but they might not complete/the cores might not be available
+ * until a Power Management IRQ.
+ *
+ * Return: 0 if the cores were successfully requested, or -errno otherwise.
+ */
+void kbase_pm_request_cores(struct kbase_device *kbdev,
+				bool tiler_required, u64 shader_cores);
+
+/**
+ * kbase_pm_unrequest_cores - Unmark one or more cores as being required for
+ *                            jobs to be submitted.
+ *
+ * @kbdev:          The kbase device structure for the device
+ * @tiler_required: true if the tiler is required, false otherwise
+ * @shader_cores:   A bitmask of shader cores (as given to
+ *                  kbase_pm_request_cores() )
+ *
+ * This function undoes the effect of kbase_pm_request_cores(). It should be
+ * used when a job is not going to be submitted to the hardware (e.g. the job is
+ * cancelled before it is enqueued).
+ *
+ * The active power policy will meet or exceed the requirements of the
+ * requested cores in the system. Any core transitions needed will be begun
+ * immediately, but they might not complete until a Power Management IRQ.
+ *
+ * The policy may use this as an indication that it can power down cores.
+ */
+void kbase_pm_unrequest_cores(struct kbase_device *kbdev,
+				bool tiler_required, u64 shader_cores);
+
+/**
+ * kbase_pm_register_inuse_cores - Register a set of cores as in use by a job
+ *
+ * @kbdev:          The kbase device structure for the device
+ * @tiler_required: true if the tiler is required, false otherwise
+ * @shader_cores:   A bitmask of shader cores (as given to
+ *                  kbase_pm_request_cores() )
+ *
+ * This function should be called after kbase_pm_request_cores() when the job
+ * is about to be submitted to the hardware. It will check that the necessary
+ * cores are available and if so update the 'needed' and 'inuse' bitmasks to
+ * reflect that the job is now committed to being run.
+ *
+ * If the necessary cores are not currently available then the function will
+ * return %KBASE_CORES_NOT_READY and have no effect.
+ *
+ * Return: %KBASE_CORES_NOT_READY if the cores are not immediately ready,
+ *
+ *         %KBASE_NEW_AFFINITY if the affinity requested is not allowed,
+ *
+ *         %KBASE_CORES_READY if the cores requested are already available
+ */
+enum kbase_pm_cores_ready kbase_pm_register_inuse_cores(
+						struct kbase_device *kbdev,
+						bool tiler_required,
+						u64 shader_cores);
+
+/**
+ * kbase_pm_release_cores - Release cores after a job has run
+ *
+ * @kbdev:          The kbase device structure for the device
+ * @tiler_required: true if the tiler is required, false otherwise
+ * @shader_cores:   A bitmask of shader cores (as given to
+ *                  kbase_pm_register_inuse_cores() )
+ *
+ * This function should be called when a job has finished running on the
+ * hardware. A call to kbase_pm_register_inuse_cores() must have previously
+ * occurred. The reference counts of the specified cores will be decremented
+ * which may cause the bitmask of 'inuse' cores to be reduced. The power policy
+ * may then turn off any cores which are no longer 'inuse'.
+ */
+void kbase_pm_release_cores(struct kbase_device *kbdev,
+				bool tiler_required, u64 shader_cores);
+
+/**
+ * kbase_pm_request_l2_caches - Request l2 caches
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ *
+ * Request the use of l2 caches for all core groups, power up, wait and prevent
+ * the power manager from powering down the l2 caches.
+ *
+ * This tells the power management that the caches should be powered up, and
+ * they should remain powered, irrespective of the usage of shader cores. This
+ * does not return until the l2 caches are powered up.
+ *
+ * The caller must call kbase_pm_release_l2_caches() when they are finished
+ * to allow normal power management of the l2 caches to resume.
+ *
+ * This should only be used when power management is active.
+ */
+void kbase_pm_request_l2_caches(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_request_l2_caches_l2_is_on - Request l2 caches but don't power on
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ *
+ * Increment the count of l2 users but do not attempt to power on the l2
+ *
+ * It is the callers responsibility to ensure that the l2 is already powered up
+ * and to eventually call kbase_pm_release_l2_caches()
+ */
+void kbase_pm_request_l2_caches_l2_is_on(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_request_l2_caches - Release l2 caches
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ *
+ * Release the use of l2 caches for all core groups and allow the power manager
+ * to power them down when necessary.
+ *
+ * This tells the power management that the caches can be powered down if
+ * necessary, with respect to the usage of shader cores.
+ *
+ * The caller must have called kbase_pm_request_l2_caches() prior to a call
+ * to this.
+ *
+ * This should only be used when power management is active.
+ */
+void kbase_pm_release_l2_caches(struct kbase_device *kbdev);
+
+#endif /* _KBASE_PM_POLICY_H_ */
--- a/drivers/gpu/arm/midgard_for_linux/backend/gpu/mali_kbase_power_model_simple.c
+++ b/drivers/gpu/arm/midgard_for_linux/backend/gpu/mali_kbase_power_model_simple.c
@ -0,0 +1,171 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#include <linux/devfreq_cooling.h>
+#include <linux/thermal.h>
+#include <linux/of.h>
+#include <mali_kbase.h>
+#include <mali_kbase_defs.h>
+#include <backend/gpu/mali_kbase_power_model_simple.h>
+
+/*
+ * This model is primarily designed for the Juno platform. It may not be
+ * suitable for other platforms.
+ */
+
+#define FALLBACK_STATIC_TEMPERATURE 55000
+
+static u32 dynamic_coefficient;
+static u32 static_coefficient;
+static s32 ts[4];
+static struct thermal_zone_device *gpu_tz;
+
+static unsigned long model_static_power(struct devfreq *devfreq,
+					unsigned long voltage)
+{
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 3, 0)
+	unsigned long temperature;
+#else
+	int temperature;
+#endif
+	unsigned long temp;
+	unsigned long temp_squared, temp_cubed, temp_scaling_factor;
+	const unsigned long voltage_cubed = (voltage * voltage * voltage) >> 10;
+
+	if (!IS_ERR_OR_NULL(gpu_tz) && gpu_tz->ops->get_temp) {
+		int ret;
+
+		ret = gpu_tz->ops->get_temp(gpu_tz, &temperature);
+		if (ret) {
+			pr_warn_ratelimited("Error reading temperature for gpu thermal zone: %d\n",
+					ret);
+			temperature = FALLBACK_STATIC_TEMPERATURE;
+		}
+	} else {
+		temperature = FALLBACK_STATIC_TEMPERATURE;
+	}
+
+	/* Calculate the temperature scaling factor. To be applied to the
+	 * voltage scaled power.
+	 */
+	temp = temperature / 1000;
+	temp_squared = temp * temp;
+	temp_cubed = temp_squared * temp;
+	temp_scaling_factor =
+			(ts[3] * temp_cubed)
+			+ (ts[2] * temp_squared)
+			+ (ts[1] * temp)
+			+ ts[0];
+
+	return (((static_coefficient * voltage_cubed) >> 20)
+			* temp_scaling_factor)
+				/ 1000000;
+}
+
+static unsigned long model_dynamic_power(struct devfreq *devfreq,
+		unsigned long freq,
+		unsigned long voltage)
+{
+	/* The inputs: freq (f) is in Hz, and voltage (v) in mV.
+	 * The coefficient (c) is in mW/(MHz mV mV).
+	 *
+	 * This function calculates the dynamic power after this formula:
+	 * Pdyn (mW) = c (mW/(MHz*mV*mV)) * v (mV) * v (mV) * f (MHz)
+	 */
+	const unsigned long v2 = (voltage * voltage) / 1000; /* m*(V*V) */
+	const unsigned long f_mhz = freq / 1000000; /* MHz */
+
+	return (dynamic_coefficient * v2 * f_mhz) / 1000000; /* mW */
+}
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 4, 0)
+struct devfreq_cooling_ops power_model_simple_ops = {
+#else
+struct devfreq_cooling_power power_model_simple_ops = {
+#endif
+	.get_static_power = model_static_power,
+	.get_dynamic_power = model_dynamic_power,
+};
+
+int kbase_power_model_simple_init(struct kbase_device *kbdev)
+{
+	struct device_node *power_model_node;
+	const char *tz_name;
+	u32 static_power, dynamic_power;
+	u32 voltage, voltage_squared, voltage_cubed, frequency;
+
+	power_model_node = of_get_child_by_name(kbdev->dev->of_node,
+			"power_model");
+	if (!power_model_node) {
+		dev_err(kbdev->dev, "could not find power_model node\n");
+		return -ENODEV;
+	}
+	if (!of_device_is_compatible(power_model_node,
+			"arm,mali-simple-power-model")) {
+		dev_err(kbdev->dev, "power_model incompatible with simple power model\n");
+		return -ENODEV;
+	}
+
+	if (of_property_read_string(power_model_node, "thermal-zone",
+			&tz_name)) {
+		dev_err(kbdev->dev, "ts in power_model not available\n");
+		return -EINVAL;
+	}
+
+	gpu_tz = thermal_zone_get_zone_by_name(tz_name);
+	if (IS_ERR(gpu_tz)) {
+		pr_warn_ratelimited("Error getting gpu thermal zone (%ld), not yet ready?\n",
+				PTR_ERR(gpu_tz));
+		gpu_tz = NULL;
+
+		return -EPROBE_DEFER;
+	}
+
+	if (of_property_read_u32(power_model_node, "static-power",
+			&static_power)) {
+		dev_err(kbdev->dev, "static-power in power_model not available\n");
+		return -EINVAL;
+	}
+	if (of_property_read_u32(power_model_node, "dynamic-power",
+			&dynamic_power)) {
+		dev_err(kbdev->dev, "dynamic-power in power_model not available\n");
+		return -EINVAL;
+	}
+	if (of_property_read_u32(power_model_node, "voltage",
+			&voltage)) {
+		dev_err(kbdev->dev, "voltage in power_model not available\n");
+		return -EINVAL;
+	}
+	if (of_property_read_u32(power_model_node, "frequency",
+			&frequency)) {
+		dev_err(kbdev->dev, "frequency in power_model not available\n");
+		return -EINVAL;
+	}
+	voltage_squared = (voltage * voltage) / 1000;
+	voltage_cubed = voltage * voltage * voltage;
+	static_coefficient = (static_power << 20) / (voltage_cubed >> 10);
+	dynamic_coefficient = (((dynamic_power * 1000) / voltage_squared)
+			* 1000) / frequency;
+
+	if (of_property_read_u32_array(power_model_node, "ts", (u32 *)ts, 4)) {
+		dev_err(kbdev->dev, "ts in power_model not available\n");
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
--- a/drivers/gpu/arm/midgard_for_linux/backend/gpu/mali_kbase_power_model_simple.h
+++ b/drivers/gpu/arm/midgard_for_linux/backend/gpu/mali_kbase_power_model_simple.h
@ -0,0 +1,47 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#ifndef _BASE_POWER_MODEL_SIMPLE_H_
+#define _BASE_POWER_MODEL_SIMPLE_H_
+
+/**
+ * kbase_power_model_simple_init - Initialise the simple power model
+ * @kbdev: Device pointer
+ *
+ * The simple power model estimates power based on current voltage, temperature,
+ * and coefficients read from device tree. It does not take utilization into
+ * account.
+ *
+ * The power model requires coefficients from the power_model node in device
+ * tree. The absence of this node will prevent the model from functioning, but
+ * should not prevent the rest of the driver from running.
+ *
+ * Return: 0 on success
+ *         -ENOSYS if the power_model node is not present in device tree
+ *         -EPROBE_DEFER if the thermal zone specified in device tree is not
+ *         currently available
+ *         Any other negative value on failure
+ */
+int kbase_power_model_simple_init(struct kbase_device *kbdev);
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 4, 0)
+extern struct devfreq_cooling_ops power_model_simple_ops;
+#else
+extern struct devfreq_cooling_power power_model_simple_ops;
+#endif
+
+#endif /* _BASE_POWER_MODEL_SIMPLE_H_ */
--- a/drivers/gpu/arm/midgard_for_linux/backend/gpu/mali_kbase_time.c
+++ b/drivers/gpu/arm/midgard_for_linux/backend/gpu/mali_kbase_time.c
@ -0,0 +1,103 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#include <mali_kbase.h>
+#include <mali_kbase_hwaccess_time.h>
+#include <backend/gpu/mali_kbase_device_internal.h>
+#include <backend/gpu/mali_kbase_pm_internal.h>
+
+void kbase_backend_get_gpu_time(struct kbase_device *kbdev, u64 *cycle_counter,
+				u64 *system_time, struct timespec *ts)
+{
+	u32 hi1, hi2;
+
+	kbase_pm_request_gpu_cycle_counter(kbdev);
+
+	/* Read hi, lo, hi to ensure that overflow from lo to hi is handled
+	 * correctly */
+	do {
+		hi1 = kbase_reg_read(kbdev, GPU_CONTROL_REG(CYCLE_COUNT_HI),
+									NULL);
+		*cycle_counter = kbase_reg_read(kbdev,
+					GPU_CONTROL_REG(CYCLE_COUNT_LO), NULL);
+		hi2 = kbase_reg_read(kbdev, GPU_CONTROL_REG(CYCLE_COUNT_HI),
+									NULL);
+		*cycle_counter |= (((u64) hi1) << 32);
+	} while (hi1 != hi2);
+
+	/* Read hi, lo, hi to ensure that overflow from lo to hi is handled
+	 * correctly */
+	do {
+		hi1 = kbase_reg_read(kbdev, GPU_CONTROL_REG(TIMESTAMP_HI),
+									NULL);
+		*system_time = kbase_reg_read(kbdev,
+					GPU_CONTROL_REG(TIMESTAMP_LO), NULL);
+		hi2 = kbase_reg_read(kbdev, GPU_CONTROL_REG(TIMESTAMP_HI),
+									NULL);
+		*system_time |= (((u64) hi1) << 32);
+	} while (hi1 != hi2);
+
+	/* Record the CPU's idea of current time */
+	getrawmonotonic(ts);
+
+	kbase_pm_release_gpu_cycle_counter(kbdev);
+}
+
+/**
+ * kbase_wait_write_flush -  Wait for GPU write flush
+ * @kctx: Context pointer
+ *
+ * Wait 1000 GPU clock cycles. This delay is known to give the GPU time to flush
+ * its write buffer.
+ *
+ * Only in use for BASE_HW_ISSUE_6367
+ *
+ * Note : If GPU resets occur then the counters are reset to zero, the delay may
+ * not be as expected.
+ */
+#ifndef CONFIG_MALI_NO_MALI
+void kbase_wait_write_flush(struct kbase_context *kctx)
+{
+	u32 base_count = 0;
+
+	/*
+	 * The caller must be holding onto the kctx or the call is from
+	 * userspace.
+	 */
+	kbase_pm_context_active(kctx->kbdev);
+	kbase_pm_request_gpu_cycle_counter(kctx->kbdev);
+
+	while (true) {
+		u32 new_count;
+
+		new_count = kbase_reg_read(kctx->kbdev,
+					GPU_CONTROL_REG(CYCLE_COUNT_LO), NULL);
+		/* First time around, just store the count. */
+		if (base_count == 0) {
+			base_count = new_count;
+			continue;
+		}
+
+		/* No need to handle wrapping, unsigned maths works for this. */
+		if ((new_count - base_count) > 1000)
+			break;
+	}
+
+	kbase_pm_release_gpu_cycle_counter(kctx->kbdev);
+	kbase_pm_context_idle(kctx->kbdev);
+}
+#endif				/* CONFIG_MALI_NO_MALI */
--- a/drivers/gpu/arm/midgard_for_linux/backend/gpu/mali_kbase_time.h
+++ b/drivers/gpu/arm/midgard_for_linux/backend/gpu/mali_kbase_time.h
@ -0,0 +1,52 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#ifndef _KBASE_BACKEND_TIME_H_
+#define _KBASE_BACKEND_TIME_H_
+
+/**
+ * kbase_backend_get_gpu_time() - Get current GPU time
+ * @kbdev:		Device pointer
+ * @cycle_counter:	Pointer to u64 to store cycle counter in
+ * @system_time:	Pointer to u64 to store system time in
+ * @ts:			Pointer to struct timespec to store current monotonic
+ *			time in
+ */
+void kbase_backend_get_gpu_time(struct kbase_device *kbdev, u64 *cycle_counter,
+				u64 *system_time, struct timespec *ts);
+
+/**
+ * kbase_wait_write_flush() -  Wait for GPU write flush
+ * @kctx:	Context pointer
+ *
+ * Wait 1000 GPU clock cycles. This delay is known to give the GPU time to flush
+ * its write buffer.
+ *
+ * If GPU resets occur then the counters are reset to zero, the delay may not be
+ * as expected.
+ *
+ * This function is only in use for BASE_HW_ISSUE_6367
+ */
+#ifdef CONFIG_MALI_NO_MALI
+static inline void kbase_wait_write_flush(struct kbase_context *kctx)
+{
+}
+#else
+void kbase_wait_write_flush(struct kbase_context *kctx);
+#endif
+
+#endif /* _KBASE_BACKEND_TIME_H_ */
--- a/drivers/gpu/arm/midgard_for_linux/docs/Doxyfile
+++ b/drivers/gpu/arm/midgard_for_linux/docs/Doxyfile
@ -0,0 +1,126 @@
+#
+# (C) COPYRIGHT 2011-2013, 2015 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+
+##############################################################################
+
+# This file contains per-module Doxygen configuration. Please do not add
+# extra settings to this file without consulting all stakeholders, as they
+# may cause override project-wide settings.
+#
+# Additionally, when defining aliases, macros, sections etc, use the module
+# name as a prefix e.g. gles_my_alias.
+
+##############################################################################
+
+@INCLUDE = ../../bldsys/Doxyfile_common
+
+# The INPUT tag can be used to specify the files and/or directories that contain
+# documented source files. You may enter file names like "myfile.cpp" or
+# directories like "/usr/src/myproject". Separate the files or directories
+# with spaces.
+
+INPUT                  += ../../kernel/drivers/gpu/arm/midgard/ 
+
+##############################################################################
+# Everything below here is optional, and in most cases not required
+##############################################################################
+
+# This tag can be used to specify a number of aliases that acts
+# as commands in the documentation. An alias has the form "name=value".
+# For example adding "sideeffect=\par Side Effects:\n" will allow you to
+# put the command \sideeffect (or @sideeffect) in the documentation, which
+# will result in a user-defined paragraph with heading "Side Effects:".
+# You can put \n's in the value part of an alias to insert newlines.
+
+ALIASES                +=
+
+# The ENABLED_SECTIONS tag can be used to enable conditional
+# documentation sections, marked by \if sectionname ... \endif.
+
+ENABLED_SECTIONS       +=
+
+# If the value of the INPUT tag contains directories, you can use the
+# FILE_PATTERNS tag to specify one or more wildcard pattern (like *.cpp
+# and *.h) to filter out the source-files in the directories. If left
+# blank the following patterns are tested:
+# *.c *.cc *.cxx *.cpp *.c++ *.java *.ii *.ixx *.ipp *.i++ *.inl *.h *.hh *.hxx
+# *.hpp *.h++ *.idl *.odl *.cs *.php *.php3 *.inc *.m *.mm *.py *.f90
+
+FILE_PATTERNS          +=
+
+# The EXCLUDE tag can be used to specify files and/or directories that should
+# excluded from the INPUT source files. This way you can easily exclude a
+# subdirectory from a directory tree whose root is specified with the INPUT tag.
+EXCLUDE                += ../../kernel/drivers/gpu/arm/midgard/platform ../../kernel/drivers/gpu/arm/midgard/platform_dummy ../../kernel/drivers/gpu/arm/midgard/scripts ../../kernel/drivers/gpu/arm/midgard/tests ../../kernel/drivers/gpu/arm/midgard/Makefile ../../kernel/drivers/gpu/arm/midgard/Makefile.kbase ../../kernel/drivers/gpu/arm/midgard/Kbuild ../../kernel/drivers/gpu/arm/midgard/Kconfig ../../kernel/drivers/gpu/arm/midgard/sconscript ../../kernel/drivers/gpu/arm/midgard/docs ../../kernel/drivers/gpu/arm/midgard/pm_test_script.sh ../../kernel/drivers/gpu/arm/midgard/mali_uk.h ../../kernel/drivers/gpu/arm/midgard/Makefile
+
+
+# If the value of the INPUT tag contains directories, you can use the
+# EXCLUDE_PATTERNS tag to specify one or more wildcard patterns to exclude
+# certain files from those directories. Note that the wildcards are matched
+# against the file with absolute path, so to exclude all test directories
+# for example use the pattern */test/*
+
+EXCLUDE_PATTERNS       +=
+
+# The EXCLUDE_SYMBOLS tag can be used to specify one or more symbol names
+# (namespaces, classes, functions, etc.) that should be excluded from the
+# output. The symbol name can be a fully qualified name, a word, or if the
+# wildcard * is used, a substring. Examples: ANamespace, AClass,
+# AClass::ANamespace, ANamespace::*Test
+
+EXCLUDE_SYMBOLS        +=
+
+# The EXAMPLE_PATH tag can be used to specify one or more files or
+# directories that contain example code fragments that are included (see
+# the \include command).
+
+EXAMPLE_PATH           +=
+
+# The IMAGE_PATH tag can be used to specify one or more files or
+# directories that contain image that are included in the documentation (see
+# the \image command).
+
+IMAGE_PATH             +=
+
+# The INCLUDE_PATH tag can be used to specify one or more directories that
+# contain include files that are not input files but should be processed by
+# the preprocessor.
+
+INCLUDE_PATH           +=
+
+# The PREDEFINED tag can be used to specify one or more macro names that
+# are defined before the preprocessor is started (similar to the -D option of
+# gcc). The argument of the tag is a list of macros of the form: name
+# or name=definition (no spaces). If the definition and the = are
+# omitted =1 is assumed. To prevent a macro definition from being
+# undefined via #undef or recursively expanded use the := operator
+# instead of the = operator.
+
+PREDEFINED             +=
+
+# If the MACRO_EXPANSION and EXPAND_ONLY_PREDEF tags are set to YES then
+# this tag can be used to specify a list of macro names that should be expanded.
+# The macro definition that is found in the sources will be used.
+# Use the PREDEFINED tag if you want to use a different macro definition.
+
+EXPAND_AS_DEFINED      +=
+
+# The DOTFILE_DIRS tag can be used to specify one or more directories that
+# contain dot files that are included in the documentation (see the
+# \dotfile command).
+
+DOTFILE_DIRS           += ../../kernel/drivers/gpu/arm/midgard/docs
+
--- a/drivers/gpu/arm/midgard_for_linux/docs/policy_operation_diagram.dot
+++ b/drivers/gpu/arm/midgard_for_linux/docs/policy_operation_diagram.dot
@ -0,0 +1,112 @@
+/*
+ *
+ * (C) COPYRIGHT 2010 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+digraph policy_objects_diagram {
+	rankdir=LR;
+	size="12,8";
+	compound=true;
+
+	node [ shape = box ];
+
+	subgraph cluster_policy_queues {
+		low_queue [ shape=record label = "LowP | {<ql>ctx_lo | ... | <qm>ctx_i | ... | <qr>ctx_hi}" ];
+		queues_middle_sep [ label="" shape=plaintext width=0 height=0 ];
+
+		rt_queue [ shape=record label = "RT | {<ql>ctx_lo | ... | <qm>ctx_j | ... | <qr>ctx_hi}" ];
+
+		label = "Policy's Queue(s)";
+	}
+
+	call_enqueue [ shape=plaintext label="enqueue_ctx()" ];
+
+	{
+		rank=same;
+		ordering=out;
+		call_dequeue [ shape=plaintext label="dequeue_head_ctx()\n+ runpool_add_ctx()" ];
+		call_ctxfinish [ shape=plaintext label="runpool_remove_ctx()" ];
+
+		call_ctxdone [ shape=plaintext label="don't requeue;\n/* ctx has no more jobs */" ];
+	}
+
+	subgraph cluster_runpool {
+
+		as0 [ width=2 height = 0.25 label="AS0: Job_1, ..., Job_n" ];
+		as1 [ width=2 height = 0.25 label="AS1: Job_1, ..., Job_m" ];
+		as2 [ width=2 height = 0.25 label="AS2: Job_1, ..., Job_p" ];
+		as3 [ width=2 height = 0.25 label="AS3: Job_1, ..., Job_q" ];
+
+		label = "Policy's Run Pool";
+	}
+
+	{
+		rank=same;
+		call_jdequeue [ shape=plaintext label="dequeue_job()" ];
+		sstop_dotfixup [ shape=plaintext label="" width=0 height=0 ];
+	}
+
+	{
+		rank=same;
+		ordering=out;
+		sstop [ shape=ellipse label="SS-Timer expires" ]
+		jobslots [ shape=record label="Jobslots: | <0>js[0] | <1>js[1] | <2>js[2]" ];
+
+		irq [ label="IRQ" shape=ellipse ];
+
+		job_finish [ shape=plaintext label="don't requeue;\n/* job done */" ];
+	}
+
+	hstop [ shape=ellipse label="HS-Timer expires" ]
+
+	/*
+	 * Edges
+	 */
+
+	call_enqueue -> queues_middle_sep [ lhead=cluster_policy_queues ];
+
+	low_queue:qr -> call_dequeue:w;
+	rt_queue:qr -> call_dequeue:w;
+
+	call_dequeue -> as1 [lhead=cluster_runpool];
+
+	as1->call_jdequeue         [ltail=cluster_runpool];
+	call_jdequeue->jobslots:0;
+	call_jdequeue->sstop_dotfixup [ arrowhead=none];
+	sstop_dotfixup->sstop      [label="Spawn SS-Timer"];
+	sstop->jobslots            [label="SoftStop"];
+	sstop->hstop               [label="Spawn HS-Timer"];
+	hstop->jobslots:ne            [label="HardStop"];
+
+
+	as3->call_ctxfinish:ne [ ltail=cluster_runpool ];
+	call_ctxfinish:sw->rt_queue:qm [ lhead=cluster_policy_queues label="enqueue_ctx()\n/* ctx still has jobs */" ];
+
+	call_ctxfinish->call_ctxdone [constraint=false];
+
+	call_ctxdone->call_enqueue [weight=0.1 labeldistance=20.0 labelangle=0.0 taillabel="Job submitted to the ctx" style=dotted constraint=false];
+
+
+	{
+	jobslots->irq   [constraint=false];
+
+	irq->job_finish [constraint=false];
+	}
+
+	irq->as2  [lhead=cluster_runpool label="requeue_job()\n/* timeslice expired */" ];
+
+}
--- a/drivers/gpu/arm/midgard_for_linux/docs/policy_overview.dot
+++ b/drivers/gpu/arm/midgard_for_linux/docs/policy_overview.dot
@ -0,0 +1,63 @@
+/*
+ *
+ * (C) COPYRIGHT 2010 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+digraph policy_objects_diagram {
+	rankdir=LR
+	size="6,6"
+	compound=true;
+
+	node [ shape = box ];
+
+	call_enqueue [ shape=plaintext label="enqueue ctx" ];
+
+
+	policy_queue [ label="Policy's Queue" ];
+
+	{
+		rank=same;
+		runpool [ label="Policy's Run Pool" ];
+
+		ctx_finish [ label="ctx finished" ];
+	}
+
+	{
+		rank=same;
+		jobslots [ shape=record label="Jobslots: | <0>js[0] | <1>js[1] | <2>js[2]" ];
+
+		job_finish [ label="Job finished" ];
+	}
+
+
+
+	/*
+	 * Edges
+	 */
+
+	call_enqueue -> policy_queue;
+
+	policy_queue->runpool [label="dequeue ctx" weight=0.1];
+	runpool->policy_queue [label="requeue ctx" weight=0.1];
+
+	runpool->ctx_finish [ style=dotted ];
+
+	runpool->jobslots  [label="dequeue job" weight=0.1];
+	jobslots->runpool  [label="requeue job" weight=0.1];
+
+	jobslots->job_finish [ style=dotted ];
+}
--- a/drivers/gpu/arm/midgard_for_linux/mali_base_hwconfig_features.h
+++ b/drivers/gpu/arm/midgard_for_linux/mali_base_hwconfig_features.h
@ -0,0 +1,223 @@
+/*
+ *
+ * (C) COPYRIGHT 2015-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/* AUTOMATICALLY GENERATED FILE. If you want to amend the issues/features,
+ * please update base/tools/hwconfig_generator/hwc_{issues,features}.py
+ * For more information see base/tools/hwconfig_generator/README
+ */
+
+#ifndef _BASE_HWCONFIG_FEATURES_H_
+#define _BASE_HWCONFIG_FEATURES_H_
+
+enum base_hw_feature {
+	BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION,
+	BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS,
+	BASE_HW_FEATURE_33BIT_VA,
+	BASE_HW_FEATURE_XAFFINITY,
+	BASE_HW_FEATURE_OUT_OF_ORDER_EXEC,
+	BASE_HW_FEATURE_MRT,
+	BASE_HW_FEATURE_BRNDOUT_CC,
+	BASE_HW_FEATURE_INTERPIPE_REG_ALIASING,
+	BASE_HW_FEATURE_LD_ST_TILEBUFFER,
+	BASE_HW_FEATURE_MSAA_16X,
+	BASE_HW_FEATURE_32_BIT_UNIFORM_ADDRESS,
+	BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL,
+	BASE_HW_FEATURE_OPTIMIZED_COVERAGE_MASK,
+	BASE_HW_FEATURE_T7XX_PAIRING_RULES,
+	BASE_HW_FEATURE_LD_ST_LEA_TEX,
+	BASE_HW_FEATURE_LINEAR_FILTER_FLOAT,
+	BASE_HW_FEATURE_WORKGROUP_ROUND_MULTIPLE_OF_4,
+	BASE_HW_FEATURE_IMAGES_IN_FRAGMENT_SHADERS,
+	BASE_HW_FEATURE_TEST4_DATUM_MODE,
+	BASE_HW_FEATURE_NEXT_INSTRUCTION_TYPE,
+	BASE_HW_FEATURE_BRNDOUT_KILL,
+	BASE_HW_FEATURE_WARPING,
+	BASE_HW_FEATURE_V4,
+	BASE_HW_FEATURE_FLUSH_REDUCTION,
+	BASE_HW_FEATURE_PROTECTED_MODE,
+	BASE_HW_FEATURE_COHERENCY_REG,
+	BASE_HW_FEATURE_PROTECTED_DEBUG_MODE,
+	BASE_HW_FEATURE_END
+};
+
+static const enum base_hw_feature base_hw_features_generic[] = {
+	BASE_HW_FEATURE_END
+};
+
+static const enum base_hw_feature base_hw_features_t60x[] = {
+	BASE_HW_FEATURE_LD_ST_LEA_TEX,
+	BASE_HW_FEATURE_LINEAR_FILTER_FLOAT,
+	BASE_HW_FEATURE_V4,
+	BASE_HW_FEATURE_END
+};
+
+static const enum base_hw_feature base_hw_features_t62x[] = {
+	BASE_HW_FEATURE_LD_ST_LEA_TEX,
+	BASE_HW_FEATURE_LINEAR_FILTER_FLOAT,
+	BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL,
+	BASE_HW_FEATURE_V4,
+	BASE_HW_FEATURE_END
+};
+
+static const enum base_hw_feature base_hw_features_t72x[] = {
+	BASE_HW_FEATURE_33BIT_VA,
+	BASE_HW_FEATURE_32_BIT_UNIFORM_ADDRESS,
+	BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL,
+	BASE_HW_FEATURE_INTERPIPE_REG_ALIASING,
+	BASE_HW_FEATURE_OPTIMIZED_COVERAGE_MASK,
+	BASE_HW_FEATURE_T7XX_PAIRING_RULES,
+	BASE_HW_FEATURE_WORKGROUP_ROUND_MULTIPLE_OF_4,
+	BASE_HW_FEATURE_WARPING,
+	BASE_HW_FEATURE_V4,
+	BASE_HW_FEATURE_END
+};
+
+static const enum base_hw_feature base_hw_features_t76x[] = {
+	BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION,
+	BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS,
+	BASE_HW_FEATURE_XAFFINITY,
+	BASE_HW_FEATURE_32_BIT_UNIFORM_ADDRESS,
+	BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL,
+	BASE_HW_FEATURE_BRNDOUT_CC,
+	BASE_HW_FEATURE_LD_ST_LEA_TEX,
+	BASE_HW_FEATURE_LD_ST_TILEBUFFER,
+	BASE_HW_FEATURE_LINEAR_FILTER_FLOAT,
+	BASE_HW_FEATURE_MRT,
+	BASE_HW_FEATURE_MSAA_16X,
+	BASE_HW_FEATURE_OUT_OF_ORDER_EXEC,
+	BASE_HW_FEATURE_T7XX_PAIRING_RULES,
+	BASE_HW_FEATURE_TEST4_DATUM_MODE,
+	BASE_HW_FEATURE_END
+};
+
+static const enum base_hw_feature base_hw_features_tFxx[] = {
+	BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION,
+	BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS,
+	BASE_HW_FEATURE_XAFFINITY,
+	BASE_HW_FEATURE_32_BIT_UNIFORM_ADDRESS,
+	BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL,
+	BASE_HW_FEATURE_BRNDOUT_CC,
+	BASE_HW_FEATURE_BRNDOUT_KILL,
+	BASE_HW_FEATURE_LD_ST_LEA_TEX,
+	BASE_HW_FEATURE_LD_ST_TILEBUFFER,
+	BASE_HW_FEATURE_LINEAR_FILTER_FLOAT,
+	BASE_HW_FEATURE_MRT,
+	BASE_HW_FEATURE_MSAA_16X,
+	BASE_HW_FEATURE_NEXT_INSTRUCTION_TYPE,
+	BASE_HW_FEATURE_OUT_OF_ORDER_EXEC,
+	BASE_HW_FEATURE_T7XX_PAIRING_RULES,
+	BASE_HW_FEATURE_TEST4_DATUM_MODE,
+	BASE_HW_FEATURE_END
+};
+
+static const enum base_hw_feature base_hw_features_t83x[] = {
+	BASE_HW_FEATURE_33BIT_VA,
+	BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION,
+	BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS,
+	BASE_HW_FEATURE_XAFFINITY,
+	BASE_HW_FEATURE_WARPING,
+	BASE_HW_FEATURE_INTERPIPE_REG_ALIASING,
+	BASE_HW_FEATURE_32_BIT_UNIFORM_ADDRESS,
+	BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL,
+	BASE_HW_FEATURE_BRNDOUT_CC,
+	BASE_HW_FEATURE_BRNDOUT_KILL,
+	BASE_HW_FEATURE_LD_ST_LEA_TEX,
+	BASE_HW_FEATURE_LD_ST_TILEBUFFER,
+	BASE_HW_FEATURE_LINEAR_FILTER_FLOAT,
+	BASE_HW_FEATURE_MRT,
+	BASE_HW_FEATURE_NEXT_INSTRUCTION_TYPE,
+	BASE_HW_FEATURE_OUT_OF_ORDER_EXEC,
+	BASE_HW_FEATURE_T7XX_PAIRING_RULES,
+	BASE_HW_FEATURE_TEST4_DATUM_MODE,
+	BASE_HW_FEATURE_END
+};
+
+static const enum base_hw_feature base_hw_features_t82x[] = {
+	BASE_HW_FEATURE_33BIT_VA,
+	BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION,
+	BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS,
+	BASE_HW_FEATURE_XAFFINITY,
+	BASE_HW_FEATURE_WARPING,
+	BASE_HW_FEATURE_INTERPIPE_REG_ALIASING,
+	BASE_HW_FEATURE_32_BIT_UNIFORM_ADDRESS,
+	BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL,
+	BASE_HW_FEATURE_BRNDOUT_CC,
+	BASE_HW_FEATURE_BRNDOUT_KILL,
+	BASE_HW_FEATURE_LD_ST_LEA_TEX,
+	BASE_HW_FEATURE_LD_ST_TILEBUFFER,
+	BASE_HW_FEATURE_LINEAR_FILTER_FLOAT,
+	BASE_HW_FEATURE_MRT,
+	BASE_HW_FEATURE_NEXT_INSTRUCTION_TYPE,
+	BASE_HW_FEATURE_OUT_OF_ORDER_EXEC,
+	BASE_HW_FEATURE_T7XX_PAIRING_RULES,
+	BASE_HW_FEATURE_TEST4_DATUM_MODE,
+	BASE_HW_FEATURE_END
+};
+
+static const enum base_hw_feature base_hw_features_tMIx[] = {
+	BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION,
+	BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS,
+	BASE_HW_FEATURE_XAFFINITY,
+	BASE_HW_FEATURE_WARPING,
+	BASE_HW_FEATURE_INTERPIPE_REG_ALIASING,
+	BASE_HW_FEATURE_32_BIT_UNIFORM_ADDRESS,
+	BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL,
+	BASE_HW_FEATURE_BRNDOUT_CC,
+	BASE_HW_FEATURE_BRNDOUT_KILL,
+	BASE_HW_FEATURE_LD_ST_LEA_TEX,
+	BASE_HW_FEATURE_LD_ST_TILEBUFFER,
+	BASE_HW_FEATURE_LINEAR_FILTER_FLOAT,
+	BASE_HW_FEATURE_MRT,
+	BASE_HW_FEATURE_MSAA_16X,
+	BASE_HW_FEATURE_NEXT_INSTRUCTION_TYPE,
+	BASE_HW_FEATURE_OUT_OF_ORDER_EXEC,
+	BASE_HW_FEATURE_T7XX_PAIRING_RULES,
+	BASE_HW_FEATURE_TEST4_DATUM_MODE,
+	BASE_HW_FEATURE_FLUSH_REDUCTION,
+	BASE_HW_FEATURE_PROTECTED_MODE,
+	BASE_HW_FEATURE_COHERENCY_REG,
+	BASE_HW_FEATURE_END
+};
+
+static const enum base_hw_feature base_hw_features_tHEx[] = {
+	BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION,
+	BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS,
+	BASE_HW_FEATURE_XAFFINITY,
+	BASE_HW_FEATURE_WARPING,
+	BASE_HW_FEATURE_INTERPIPE_REG_ALIASING,
+	BASE_HW_FEATURE_32_BIT_UNIFORM_ADDRESS,
+	BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL,
+	BASE_HW_FEATURE_BRNDOUT_CC,
+	BASE_HW_FEATURE_BRNDOUT_KILL,
+	BASE_HW_FEATURE_LD_ST_LEA_TEX,
+	BASE_HW_FEATURE_LD_ST_TILEBUFFER,
+	BASE_HW_FEATURE_LINEAR_FILTER_FLOAT,
+	BASE_HW_FEATURE_MRT,
+	BASE_HW_FEATURE_MSAA_16X,
+	BASE_HW_FEATURE_NEXT_INSTRUCTION_TYPE,
+	BASE_HW_FEATURE_OUT_OF_ORDER_EXEC,
+	BASE_HW_FEATURE_T7XX_PAIRING_RULES,
+	BASE_HW_FEATURE_TEST4_DATUM_MODE,
+	BASE_HW_FEATURE_FLUSH_REDUCTION,
+	BASE_HW_FEATURE_PROTECTED_MODE,
+	BASE_HW_FEATURE_PROTECTED_DEBUG_MODE,
+	BASE_HW_FEATURE_COHERENCY_REG,
+	BASE_HW_FEATURE_END
+};
+
+
+#endif /* _BASE_HWCONFIG_FEATURES_H_ */
--- a/drivers/gpu/arm/midgard_for_linux/mali_base_hwconfig_issues.h
+++ b/drivers/gpu/arm/midgard_for_linux/mali_base_hwconfig_issues.h
--- a/drivers/gpu/arm/midgard_for_linux/mali_base_kernel.h
+++ b/drivers/gpu/arm/midgard_for_linux/mali_base_kernel.h
--- a/drivers/gpu/arm/midgard_for_linux/mali_base_kernel_sync.h
+++ b/drivers/gpu/arm/midgard_for_linux/mali_base_kernel_sync.h
@ -0,0 +1,47 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2013 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file
+ * Base cross-proccess sync API.
+ */
+
+#ifndef _BASE_KERNEL_SYNC_H_
+#define _BASE_KERNEL_SYNC_H_
+
+#include <linux/ioctl.h>
+
+#define STREAM_IOC_MAGIC '~'
+
+/* Fence insert.
+ *
+ * Inserts a fence on the stream operated on.
+ * Fence can be waited via a base fence wait soft-job
+ * or triggered via a base fence trigger soft-job.
+ *
+ * Fences must be cleaned up with close when no longer needed.
+ *
+ * No input/output arguments.
+ * Returns
+ * >=0 fd
+ * <0  error code
+ */
+#define STREAM_IOC_FENCE_INSERT _IO(STREAM_IOC_MAGIC, 0)
+
+#endif				/* _BASE_KERNEL_SYNC_H_ */
--- a/drivers/gpu/arm/midgard_for_linux/mali_base_mem_priv.h
+++ b/drivers/gpu/arm/midgard_for_linux/mali_base_mem_priv.h
@ -0,0 +1,52 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2014 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#ifndef _BASE_MEM_PRIV_H_
+#define _BASE_MEM_PRIV_H_
+
+#define BASE_SYNCSET_OP_MSYNC	(1U << 0)
+#define BASE_SYNCSET_OP_CSYNC	(1U << 1)
+
+/*
+ * This structure describe a basic memory coherency operation.
+ * It can either be:
+ * @li a sync from CPU to Memory:
+ *	- type = ::BASE_SYNCSET_OP_MSYNC
+ *	- mem_handle = a handle to the memory object on which the operation
+ *	  is taking place
+ *	- user_addr = the address of the range to be synced
+ *	- size = the amount of data to be synced, in bytes
+ *	- offset is ignored.
+ * @li a sync from Memory to CPU:
+ *	- type = ::BASE_SYNCSET_OP_CSYNC
+ *	- mem_handle = a handle to the memory object on which the operation
+ *	  is taking place
+ *	- user_addr = the address of the range to be synced
+ *	- size = the amount of data to be synced, in bytes.
+ *	- offset is ignored.
+ */
+struct basep_syncset {
+	base_mem_handle mem_handle;
+	u64 user_addr;
+	u64 size;
+	u8 type;
+	u8 padding[7];
+};
+
+#endif
--- a/drivers/gpu/arm/midgard_for_linux/mali_base_vendor_specific_func.h
+++ b/drivers/gpu/arm/midgard_for_linux/mali_base_vendor_specific_func.h
@ -0,0 +1,24 @@
+/*
+ *
+ * (C) COPYRIGHT 2010, 2012-2013, 2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+#ifndef _BASE_VENDOR_SPEC_FUNC_H_
+#define _BASE_VENDOR_SPEC_FUNC_H_
+
+int kbase_get_vendor_specific_cpu_clock_speed(u32 * const);
+
+#endif	/*_BASE_VENDOR_SPEC_FUNC_H_*/
--- a/drivers/gpu/arm/midgard_for_linux/mali_kbase.h
+++ b/drivers/gpu/arm/midgard_for_linux/mali_kbase.h
@ -0,0 +1,607 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#ifndef _KBASE_H_
+#define _KBASE_H_
+
+#include <mali_malisw.h>
+
+#include <mali_kbase_debug.h>
+
+#include <asm/page.h>
+
+#include <linux/atomic.h>
+#include <linux/highmem.h>
+#include <linux/hrtimer.h>
+#include <linux/ktime.h>
+#include <linux/list.h>
+#include <linux/mm_types.h>
+#include <linux/mutex.h>
+#include <linux/rwsem.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+#include <linux/vmalloc.h>
+#include <linux/wait.h>
+#include <linux/workqueue.h>
+
+#include "mali_base_kernel.h"
+#include <mali_kbase_uku.h>
+#include <mali_kbase_linux.h>
+
+/*
+ * Include mali_kbase_defs.h first as this provides types needed by other local
+ * header files.
+ */
+#include "mali_kbase_defs.h"
+
+#include "mali_kbase_context.h"
+#include "mali_kbase_strings.h"
+#include "mali_kbase_mem_lowlevel.h"
+#include "mali_kbase_trace_timeline.h"
+#include "mali_kbase_js.h"
+#include "mali_kbase_mem.h"
+#include "mali_kbase_utility.h"
+#include "mali_kbase_gpu_memory_debugfs.h"
+#include "mali_kbase_mem_profile_debugfs.h"
+#include "mali_kbase_debug_job_fault.h"
+#include "mali_kbase_jd_debugfs.h"
+#include "mali_kbase_gpuprops.h"
+#include "mali_kbase_jm.h"
+#include "mali_kbase_vinstr.h"
+#include "mali_kbase_ipa.h"
+#ifdef CONFIG_GPU_TRACEPOINTS
+#include <trace/events/gpu.h>
+#endif
+/**
+ * @page page_base_kernel_main Kernel-side Base (KBase) APIs
+ *
+ * The Kernel-side Base (KBase) APIs are divided up as follows:
+ * - @subpage page_kbase_js_policy
+ */
+
+/**
+ * @defgroup base_kbase_api Kernel-side Base (KBase) APIs
+ */
+
+struct kbase_device *kbase_device_alloc(void);
+/*
+* note: configuration attributes member of kbdev needs to have
+* been setup before calling kbase_device_init
+*/
+
+/*
+* API to acquire device list semaphore and return pointer
+* to the device list head
+*/
+const struct list_head *kbase_dev_list_get(void);
+/* API to release the device list semaphore */
+void kbase_dev_list_put(const struct list_head *dev_list);
+
+int kbase_device_init(struct kbase_device * const kbdev);
+void kbase_device_term(struct kbase_device *kbdev);
+void kbase_device_free(struct kbase_device *kbdev);
+int kbase_device_has_feature(struct kbase_device *kbdev, u32 feature);
+
+/* Needed for gator integration and for reporting vsync information */
+struct kbase_device *kbase_find_device(int minor);
+void kbase_release_device(struct kbase_device *kbdev);
+
+void kbase_set_profiling_control(struct kbase_device *kbdev, u32 control, u32 value);
+
+u32 kbase_get_profiling_control(struct kbase_device *kbdev, u32 control);
+
+struct kbase_context *
+kbase_create_context(struct kbase_device *kbdev, bool is_compat);
+void kbase_destroy_context(struct kbase_context *kctx);
+
+int kbase_jd_init(struct kbase_context *kctx);
+void kbase_jd_exit(struct kbase_context *kctx);
+#ifdef BASE_LEGACY_UK6_SUPPORT
+int kbase_jd_submit(struct kbase_context *kctx,
+		const struct kbase_uk_job_submit *submit_data,
+		int uk6_atom);
+#else
+int kbase_jd_submit(struct kbase_context *kctx,
+		const struct kbase_uk_job_submit *submit_data);
+#endif
+
+/**
+ * kbase_jd_done_worker - Handle a job completion
+ * @data: a &struct work_struct
+ *
+ * This function requeues the job from the runpool (if it was soft-stopped or
+ * removed from NEXT registers).
+ *
+ * Removes it from the system if it finished/failed/was cancelled.
+ *
+ * Resolves dependencies to add dependent jobs to the context, potentially
+ * starting them if necessary (which may add more references to the context)
+ *
+ * Releases the reference to the context from the no-longer-running job.
+ *
+ * Handles retrying submission outside of IRQ context if it failed from within
+ * IRQ context.
+ */
+void kbase_jd_done_worker(struct work_struct *data);
+
+void kbase_jd_done(struct kbase_jd_atom *katom, int slot_nr, ktime_t *end_timestamp,
+		kbasep_js_atom_done_code done_code);
+void kbase_jd_cancel(struct kbase_device *kbdev, struct kbase_jd_atom *katom);
+void kbase_jd_zap_context(struct kbase_context *kctx);
+bool jd_done_nolock(struct kbase_jd_atom *katom,
+		struct list_head *completed_jobs_ctx);
+void kbase_jd_free_external_resources(struct kbase_jd_atom *katom);
+bool jd_submit_atom(struct kbase_context *kctx,
+			 const struct base_jd_atom_v2 *user_atom,
+			 struct kbase_jd_atom *katom);
+void kbase_jd_dep_clear_locked(struct kbase_jd_atom *katom);
+
+void kbase_job_done(struct kbase_device *kbdev, u32 done);
+
+void kbase_gpu_cacheclean(struct kbase_device *kbdev,
+					struct kbase_jd_atom *katom);
+/**
+ * kbase_job_slot_ctx_priority_check_locked(): - Check for lower priority atoms
+ *                                               and soft stop them
+ * @kctx: Pointer to context to check.
+ * @katom: Pointer to priority atom.
+ *
+ * Atoms from @kctx on the same job slot as @katom, which have lower priority
+ * than @katom will be soft stopped and put back in the queue, so that atoms
+ * with higher priority can run.
+ *
+ * The hwaccess_lock must be held when calling this function.
+ */
+void kbase_job_slot_ctx_priority_check_locked(struct kbase_context *kctx,
+				struct kbase_jd_atom *katom);
+
+void kbase_job_slot_softstop(struct kbase_device *kbdev, int js,
+		struct kbase_jd_atom *target_katom);
+void kbase_job_slot_softstop_swflags(struct kbase_device *kbdev, int js,
+		struct kbase_jd_atom *target_katom, u32 sw_flags);
+void kbase_job_slot_hardstop(struct kbase_context *kctx, int js,
+		struct kbase_jd_atom *target_katom);
+void kbase_job_check_enter_disjoint(struct kbase_device *kbdev, u32 action,
+		base_jd_core_req core_reqs, struct kbase_jd_atom *target_katom);
+void kbase_job_check_leave_disjoint(struct kbase_device *kbdev,
+		struct kbase_jd_atom *target_katom);
+
+void kbase_event_post(struct kbase_context *ctx, struct kbase_jd_atom *event);
+int kbase_event_dequeue(struct kbase_context *ctx, struct base_jd_event_v2 *uevent);
+int kbase_event_pending(struct kbase_context *ctx);
+int kbase_event_init(struct kbase_context *kctx);
+void kbase_event_close(struct kbase_context *kctx);
+void kbase_event_cleanup(struct kbase_context *kctx);
+void kbase_event_wakeup(struct kbase_context *kctx);
+
+int kbase_process_soft_job(struct kbase_jd_atom *katom);
+int kbase_prepare_soft_job(struct kbase_jd_atom *katom);
+void kbase_finish_soft_job(struct kbase_jd_atom *katom);
+void kbase_cancel_soft_job(struct kbase_jd_atom *katom);
+void kbase_resume_suspended_soft_jobs(struct kbase_device *kbdev);
+void kbasep_add_waiting_soft_job(struct kbase_jd_atom *katom);
+void kbasep_remove_waiting_soft_job(struct kbase_jd_atom *katom);
+int kbase_soft_event_update(struct kbase_context *kctx,
+			    u64 event,
+			    unsigned char new_status);
+
+bool kbase_replay_process(struct kbase_jd_atom *katom);
+
+void kbasep_soft_job_timeout_worker(unsigned long data);
+void kbasep_complete_triggered_soft_events(struct kbase_context *kctx, u64 evt);
+
+/* api used internally for register access. Contains validation and tracing */
+void kbase_device_trace_register_access(struct kbase_context *kctx, enum kbase_reg_access_type type, u16 reg_offset, u32 reg_value);
+int kbase_device_trace_buffer_install(
+		struct kbase_context *kctx, u32 *tb, size_t size);
+void kbase_device_trace_buffer_uninstall(struct kbase_context *kctx);
+
+/* api to be ported per OS, only need to do the raw register access */
+void kbase_os_reg_write(struct kbase_device *kbdev, u16 offset, u32 value);
+u32 kbase_os_reg_read(struct kbase_device *kbdev, u16 offset);
+
+void kbasep_as_do_poke(struct work_struct *work);
+
+/** Returns the name associated with a Mali exception code
+ *
+ * This function is called from the interrupt handler when a GPU fault occurs.
+ * It reports the details of the fault using KBASE_DEBUG_PRINT_WARN.
+ *
+ * @param[in] kbdev     The kbase device that the GPU fault occurred from.
+ * @param[in] exception_code  exception code
+ * @return name associated with the exception code
+ */
+const char *kbase_exception_name(struct kbase_device *kbdev,
+		u32 exception_code);
+
+/**
+ * Check whether a system suspend is in progress, or has already been suspended
+ *
+ * The caller should ensure that either kbdev->pm.active_count_lock is held, or
+ * a dmb was executed recently (to ensure the value is most
+ * up-to-date). However, without a lock the value could change afterwards.
+ *
+ * @return false if a suspend is not in progress
+ * @return !=false otherwise
+ */
+static inline bool kbase_pm_is_suspending(struct kbase_device *kbdev)
+{
+	return kbdev->pm.suspending;
+}
+
+/**
+ * Return the atom's ID, as was originally supplied by userspace in
+ * base_jd_atom_v2::atom_number
+ */
+static inline int kbase_jd_atom_id(struct kbase_context *kctx, struct kbase_jd_atom *katom)
+{
+	int result;
+
+	KBASE_DEBUG_ASSERT(kctx);
+	KBASE_DEBUG_ASSERT(katom);
+	KBASE_DEBUG_ASSERT(katom->kctx == kctx);
+
+	result = katom - &kctx->jctx.atoms[0];
+	KBASE_DEBUG_ASSERT(result >= 0 && result <= BASE_JD_ATOM_COUNT);
+	return result;
+}
+
+/**
+ * kbase_jd_atom_from_id - Return the atom structure for the given atom ID
+ * @kctx: Context pointer
+ * @id:   ID of atom to retrieve
+ *
+ * Return: Pointer to struct kbase_jd_atom associated with the supplied ID
+ */
+static inline struct kbase_jd_atom *kbase_jd_atom_from_id(
+		struct kbase_context *kctx, int id)
+{
+	return &kctx->jctx.atoms[id];
+}
+
+/**
+ * Initialize the disjoint state
+ *
+ * The disjoint event count and state are both set to zero.
+ *
+ * Disjoint functions usage:
+ *
+ * The disjoint event count should be incremented whenever a disjoint event occurs.
+ *
+ * There are several cases which are regarded as disjoint behavior. Rather than just increment
+ * the counter during disjoint events we also increment the counter when jobs may be affected
+ * by what the GPU is currently doing. To facilitate this we have the concept of disjoint state.
+ *
+ * Disjoint state is entered during GPU reset and for the entire time that an atom is replaying
+ * (as part of the replay workaround). Increasing the disjoint state also increases the count of
+ * disjoint events.
+ *
+ * The disjoint state is then used to increase the count of disjoint events during job submission
+ * and job completion. Any atom submitted or completed while the disjoint state is greater than
+ * zero is regarded as a disjoint event.
+ *
+ * The disjoint event counter is also incremented immediately whenever a job is soft stopped
+ * and during context creation.
+ *
+ * @param kbdev The kbase device
+ */
+void kbase_disjoint_init(struct kbase_device *kbdev);
+
+/**
+ * Increase the count of disjoint events
+ * called when a disjoint event has happened
+ *
+ * @param kbdev The kbase device
+ */
+void kbase_disjoint_event(struct kbase_device *kbdev);
+
+/**
+ * Increase the count of disjoint events only if the GPU is in a disjoint state
+ *
+ * This should be called when something happens which could be disjoint if the GPU
+ * is in a disjoint state. The state refcount keeps track of this.
+ *
+ * @param kbdev The kbase device
+ */
+void kbase_disjoint_event_potential(struct kbase_device *kbdev);
+
+/**
+ * Returns the count of disjoint events
+ *
+ * @param kbdev The kbase device
+ * @return the count of disjoint events
+ */
+u32 kbase_disjoint_event_get(struct kbase_device *kbdev);
+
+/**
+ * Increment the refcount state indicating that the GPU is in a disjoint state.
+ *
+ * Also Increment the disjoint event count (calls @ref kbase_disjoint_event)
+ * eventually after the disjoint state has completed @ref kbase_disjoint_state_down
+ * should be called
+ *
+ * @param kbdev The kbase device
+ */
+void kbase_disjoint_state_up(struct kbase_device *kbdev);
+
+/**
+ * Decrement the refcount state
+ *
+ * Also Increment the disjoint event count (calls @ref kbase_disjoint_event)
+ *
+ * Called after @ref kbase_disjoint_state_up once the disjoint state is over
+ *
+ * @param kbdev The kbase device
+ */
+void kbase_disjoint_state_down(struct kbase_device *kbdev);
+
+/**
+ * If a job is soft stopped and the number of contexts is >= this value
+ * it is reported as a disjoint event
+ */
+#define KBASE_DISJOINT_STATE_INTERLEAVED_CONTEXT_COUNT_THRESHOLD 2
+
+#if !defined(UINT64_MAX)
+	#define UINT64_MAX ((uint64_t)0xFFFFFFFFFFFFFFFFULL)
+#endif
+
+#if KBASE_TRACE_ENABLE
+void kbasep_trace_debugfs_init(struct kbase_device *kbdev);
+
+#ifndef CONFIG_MALI_SYSTEM_TRACE
+/** Add trace values about a job-slot
+ *
+ * @note Any functions called through this macro will still be evaluated in
+ * Release builds (CONFIG_MALI_DEBUG not defined). Therefore, when KBASE_TRACE_ENABLE == 0 any
+ * functions called to get the parameters supplied to this macro must:
+ * - be static or static inline
+ * - must just return 0 and have no other statements present in the body.
+ */
+#define KBASE_TRACE_ADD_SLOT(kbdev, code, ctx, katom, gpu_addr, jobslot) \
+	kbasep_trace_add(kbdev, KBASE_TRACE_CODE(code), ctx, katom, gpu_addr, \
+			KBASE_TRACE_FLAG_JOBSLOT, 0, jobslot, 0)
+
+/** Add trace values about a job-slot, with info
+ *
+ * @note Any functions called through this macro will still be evaluated in
+ * Release builds (CONFIG_MALI_DEBUG not defined). Therefore, when KBASE_TRACE_ENABLE == 0 any
+ * functions called to get the parameters supplied to this macro must:
+ * - be static or static inline
+ * - must just return 0 and have no other statements present in the body.
+ */
+#define KBASE_TRACE_ADD_SLOT_INFO(kbdev, code, ctx, katom, gpu_addr, jobslot, info_val) \
+	kbasep_trace_add(kbdev, KBASE_TRACE_CODE(code), ctx, katom, gpu_addr, \
+			KBASE_TRACE_FLAG_JOBSLOT, 0, jobslot, info_val)
+
+/** Add trace values about a ctx refcount
+ *
+ * @note Any functions called through this macro will still be evaluated in
+ * Release builds (CONFIG_MALI_DEBUG not defined). Therefore, when KBASE_TRACE_ENABLE == 0 any
+ * functions called to get the parameters supplied to this macro must:
+ * - be static or static inline
+ * - must just return 0 and have no other statements present in the body.
+ */
+#define KBASE_TRACE_ADD_REFCOUNT(kbdev, code, ctx, katom, gpu_addr, refcount) \
+	kbasep_trace_add(kbdev, KBASE_TRACE_CODE(code), ctx, katom, gpu_addr, \
+			KBASE_TRACE_FLAG_REFCOUNT, refcount, 0, 0)
+/** Add trace values about a ctx refcount, and info
+ *
+ * @note Any functions called through this macro will still be evaluated in
+ * Release builds (CONFIG_MALI_DEBUG not defined). Therefore, when KBASE_TRACE_ENABLE == 0 any
+ * functions called to get the parameters supplied to this macro must:
+ * - be static or static inline
+ * - must just return 0 and have no other statements present in the body.
+ */
+#define KBASE_TRACE_ADD_REFCOUNT_INFO(kbdev, code, ctx, katom, gpu_addr, refcount, info_val) \
+	kbasep_trace_add(kbdev, KBASE_TRACE_CODE(code), ctx, katom, gpu_addr, \
+			KBASE_TRACE_FLAG_REFCOUNT, refcount, 0, info_val)
+
+/** Add trace values (no slot or refcount)
+ *
+ * @note Any functions called through this macro will still be evaluated in
+ * Release builds (CONFIG_MALI_DEBUG not defined). Therefore, when KBASE_TRACE_ENABLE == 0 any
+ * functions called to get the parameters supplied to this macro must:
+ * - be static or static inline
+ * - must just return 0 and have no other statements present in the body.
+ */
+#define KBASE_TRACE_ADD(kbdev, code, ctx, katom, gpu_addr, info_val)     \
+	kbasep_trace_add(kbdev, KBASE_TRACE_CODE(code), ctx, katom, gpu_addr, \
+			0, 0, 0, info_val)
+
+/** Clear the trace */
+#define KBASE_TRACE_CLEAR(kbdev) \
+	kbasep_trace_clear(kbdev)
+
+/** Dump the slot trace */
+#define KBASE_TRACE_DUMP(kbdev) \
+	kbasep_trace_dump(kbdev)
+
+/** PRIVATE - do not use directly. Use KBASE_TRACE_ADD() instead */
+void kbasep_trace_add(struct kbase_device *kbdev, enum kbase_trace_code code, void *ctx, struct kbase_jd_atom *katom, u64 gpu_addr, u8 flags, int refcount, int jobslot, unsigned long info_val);
+/** PRIVATE - do not use directly. Use KBASE_TRACE_CLEAR() instead */
+void kbasep_trace_clear(struct kbase_device *kbdev);
+#else /* #ifndef CONFIG_MALI_SYSTEM_TRACE */
+/* Dispatch kbase trace events as system trace events */
+#include <mali_linux_kbase_trace.h>
+#define KBASE_TRACE_ADD_SLOT(kbdev, code, ctx, katom, gpu_addr, jobslot)\
+	trace_mali_##code(jobslot, 0)
+
+#define KBASE_TRACE_ADD_SLOT_INFO(kbdev, code, ctx, katom, gpu_addr, jobslot, info_val)\
+	trace_mali_##code(jobslot, info_val)
+
+#define KBASE_TRACE_ADD_REFCOUNT(kbdev, code, ctx, katom, gpu_addr, refcount)\
+	trace_mali_##code(refcount, 0)
+
+#define KBASE_TRACE_ADD_REFCOUNT_INFO(kbdev, code, ctx, katom, gpu_addr, refcount, info_val)\
+	trace_mali_##code(refcount, info_val)
+
+#define KBASE_TRACE_ADD(kbdev, code, ctx, katom, gpu_addr, info_val)\
+	trace_mali_##code(gpu_addr, info_val)
+
+#define KBASE_TRACE_CLEAR(kbdev)\
+	do {\
+		CSTD_UNUSED(kbdev);\
+		CSTD_NOP(0);\
+	} while (0)
+#define KBASE_TRACE_DUMP(kbdev)\
+	do {\
+		CSTD_UNUSED(kbdev);\
+		CSTD_NOP(0);\
+	} while (0)
+
+#endif /* #ifndef CONFIG_MALI_SYSTEM_TRACE */
+#else
+#define KBASE_TRACE_ADD_SLOT(kbdev, code, ctx, katom, gpu_addr, jobslot)\
+	do {\
+		CSTD_UNUSED(kbdev);\
+		CSTD_NOP(code);\
+		CSTD_UNUSED(ctx);\
+		CSTD_UNUSED(katom);\
+		CSTD_UNUSED(gpu_addr);\
+		CSTD_UNUSED(jobslot);\
+	} while (0)
+
+#define KBASE_TRACE_ADD_SLOT_INFO(kbdev, code, ctx, katom, gpu_addr, jobslot, info_val)\
+	do {\
+		CSTD_UNUSED(kbdev);\
+		CSTD_NOP(code);\
+		CSTD_UNUSED(ctx);\
+		CSTD_UNUSED(katom);\
+		CSTD_UNUSED(gpu_addr);\
+		CSTD_UNUSED(jobslot);\
+		CSTD_UNUSED(info_val);\
+		CSTD_NOP(0);\
+	} while (0)
+
+#define KBASE_TRACE_ADD_REFCOUNT(kbdev, code, ctx, katom, gpu_addr, refcount)\
+	do {\
+		CSTD_UNUSED(kbdev);\
+		CSTD_NOP(code);\
+		CSTD_UNUSED(ctx);\
+		CSTD_UNUSED(katom);\
+		CSTD_UNUSED(gpu_addr);\
+		CSTD_UNUSED(refcount);\
+		CSTD_NOP(0);\
+	} while (0)
+
+#define KBASE_TRACE_ADD_REFCOUNT_INFO(kbdev, code, ctx, katom, gpu_addr, refcount, info_val)\
+	do {\
+		CSTD_UNUSED(kbdev);\
+		CSTD_NOP(code);\
+		CSTD_UNUSED(ctx);\
+		CSTD_UNUSED(katom);\
+		CSTD_UNUSED(gpu_addr);\
+		CSTD_UNUSED(info_val);\
+		CSTD_NOP(0);\
+	} while (0)
+
+#define KBASE_TRACE_ADD(kbdev, code, subcode, ctx, katom, val)\
+	do {\
+		CSTD_UNUSED(kbdev);\
+		CSTD_NOP(code);\
+		CSTD_UNUSED(subcode);\
+		CSTD_UNUSED(ctx);\
+		CSTD_UNUSED(katom);\
+		CSTD_UNUSED(val);\
+		CSTD_NOP(0);\
+	} while (0)
+
+#define KBASE_TRACE_CLEAR(kbdev)\
+	do {\
+		CSTD_UNUSED(kbdev);\
+		CSTD_NOP(0);\
+	} while (0)
+#define KBASE_TRACE_DUMP(kbdev)\
+	do {\
+		CSTD_UNUSED(kbdev);\
+		CSTD_NOP(0);\
+	} while (0)
+#endif /* KBASE_TRACE_ENABLE */
+/** PRIVATE - do not use directly. Use KBASE_TRACE_DUMP() instead */
+void kbasep_trace_dump(struct kbase_device *kbdev);
+
+#ifdef CONFIG_MALI_DEBUG
+/**
+ * kbase_set_driver_inactive - Force driver to go inactive
+ * @kbdev:    Device pointer
+ * @inactive: true if driver should go inactive, false otherwise
+ *
+ * Forcing the driver inactive will cause all future IOCTLs to wait until the
+ * driver is made active again. This is intended solely for the use of tests
+ * which require that no jobs are running while the test executes.
+ */
+void kbase_set_driver_inactive(struct kbase_device *kbdev, bool inactive);
+#endif /* CONFIG_MALI_DEBUG */
+
+
+#if defined(CONFIG_DEBUG_FS) && !defined(CONFIG_MALI_NO_MALI)
+
+/* kbase_io_history_init - initialize data struct for register access history
+ *
+ * @kbdev The register history to initialize
+ * @n The number of register accesses that the buffer could hold
+ *
+ * @return 0 if successfully initialized, failure otherwise
+ */
+int kbase_io_history_init(struct kbase_io_history *h, u16 n);
+
+/* kbase_io_history_term - uninit all resources for the register access history
+ *
+ * @h The register history to terminate
+ */
+void kbase_io_history_term(struct kbase_io_history *h);
+
+/* kbase_io_history_dump - print the register history to the kernel ring buffer
+ *
+ * @kbdev Pointer to kbase_device containing the register history to dump
+ */
+void kbase_io_history_dump(struct kbase_device *kbdev);
+
+/**
+ * kbase_io_history_resize - resize the register access history buffer.
+ *
+ * @h: Pointer to a valid register history to resize
+ * @new_size: Number of accesses the buffer could hold
+ *
+ * A successful resize will clear all recent register accesses.
+ * If resizing fails for any reason (e.g., could not allocate memory, invalid
+ * buffer size) then the original buffer will be kept intact.
+ *
+ * @return 0 if the buffer was resized, failure otherwise
+ */
+int kbase_io_history_resize(struct kbase_io_history *h, u16 new_size);
+
+#else /* CONFIG_DEBUG_FS */
+
+#define kbase_io_history_init(...) ((int)0)
+
+#define kbase_io_history_term CSTD_NOP
+
+#define kbase_io_history_dump CSTD_NOP
+
+#define kbase_io_history_resize CSTD_NOP
+
+#endif /* CONFIG_DEBUG_FS */
+
+
+#endif
+
+
+
--- a/drivers/gpu/arm/midgard_for_linux/mali_kbase_10969_workaround.c
+++ b/drivers/gpu/arm/midgard_for_linux/mali_kbase_10969_workaround.c
@ -0,0 +1,209 @@
+/*
+ *
+ * (C) COPYRIGHT 2013-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+#include <linux/dma-mapping.h>
+#include <mali_kbase.h>
+#include <mali_kbase_10969_workaround.h>
+
+/* This function is used to solve an HW issue with single iterator GPUs.
+ * If a fragment job is soft-stopped on the edge of its bounding box, can happen that the
+ * restart index is out of bounds and the rerun causes a tile range fault. If this happens
+ * we try to clamp the restart index to a correct value and rerun the job.
+ */
+/* Mask of X and Y coordinates for the coordinates words in the descriptors*/
+#define X_COORDINATE_MASK 0x00000FFF
+#define Y_COORDINATE_MASK 0x0FFF0000
+/* Max number of words needed from the fragment shader job descriptor */
+#define JOB_HEADER_SIZE_IN_WORDS 10
+#define JOB_HEADER_SIZE (JOB_HEADER_SIZE_IN_WORDS*sizeof(u32))
+
+/* Word 0: Status Word */
+#define JOB_DESC_STATUS_WORD 0
+/* Word 1: Restart Index */
+#define JOB_DESC_RESTART_INDEX_WORD 1
+/* Word 2: Fault address low word */
+#define JOB_DESC_FAULT_ADDR_LOW_WORD 2
+/* Word 8: Minimum Tile Coordinates */
+#define FRAG_JOB_DESC_MIN_TILE_COORD_WORD 8
+/* Word 9: Maximum Tile Coordinates */
+#define FRAG_JOB_DESC_MAX_TILE_COORD_WORD 9
+
+int kbasep_10969_workaround_clamp_coordinates(struct kbase_jd_atom *katom)
+{
+	struct device *dev = katom->kctx->kbdev->dev;
+	u32   clamped = 0;
+	struct kbase_va_region *region;
+	phys_addr_t *page_array;
+	u64 page_index;
+	u32 offset = katom->jc & (~PAGE_MASK);
+	u32 *page_1 = NULL;
+	u32 *page_2 = NULL;
+	u32   job_header[JOB_HEADER_SIZE_IN_WORDS];
+	void *dst = job_header;
+	u32 minX, minY, maxX, maxY;
+	u32 restartX, restartY;
+	struct page *p;
+	u32 copy_size;
+
+	dev_warn(dev, "Called TILE_RANGE_FAULT workaround clamping function.\n");
+	if (!(katom->core_req & BASE_JD_REQ_FS))
+		return 0;
+
+	kbase_gpu_vm_lock(katom->kctx);
+	region = kbase_region_tracker_find_region_enclosing_address(katom->kctx,
+			katom->jc);
+	if (!region || (region->flags & KBASE_REG_FREE))
+		goto out_unlock;
+
+	page_array = kbase_get_cpu_phy_pages(region);
+	if (!page_array)
+		goto out_unlock;
+
+	page_index = (katom->jc >> PAGE_SHIFT) - region->start_pfn;
+
+	p = pfn_to_page(PFN_DOWN(page_array[page_index]));
+
+	/* we need the first 10 words of the fragment shader job descriptor.
+	 * We need to check that the offset + 10 words is less that the page
+	 * size otherwise we need to load the next page.
+	 * page_size_overflow will be equal to 0 in case the whole descriptor
+	 * is within the page > 0 otherwise.
+	 */
+	copy_size = MIN(PAGE_SIZE - offset, JOB_HEADER_SIZE);
+
+	page_1 = kmap_atomic(p);
+
+	/* page_1 is a u32 pointer, offset is expressed in bytes */
+	page_1 += offset>>2;
+
+	kbase_sync_single_for_cpu(katom->kctx->kbdev,
+			kbase_dma_addr(p) + offset,
+			copy_size, DMA_BIDIRECTIONAL);
+
+	memcpy(dst, page_1, copy_size);
+
+	/* The data needed overflows page the dimension,
+	 * need to map the subsequent page */
+	if (copy_size < JOB_HEADER_SIZE) {
+		p = pfn_to_page(PFN_DOWN(page_array[page_index + 1]));
+		page_2 = kmap_atomic(p);
+
+		kbase_sync_single_for_cpu(katom->kctx->kbdev,
+				kbase_dma_addr(p),
+				JOB_HEADER_SIZE - copy_size, DMA_BIDIRECTIONAL);
+
+		memcpy(dst + copy_size, page_2, JOB_HEADER_SIZE - copy_size);
+	}
+
+	/* We managed to correctly map one or two pages (in case of overflow) */
+	/* Get Bounding Box data and restart index from fault address low word */
+	minX = job_header[FRAG_JOB_DESC_MIN_TILE_COORD_WORD] & X_COORDINATE_MASK;
+	minY = job_header[FRAG_JOB_DESC_MIN_TILE_COORD_WORD] & Y_COORDINATE_MASK;
+	maxX = job_header[FRAG_JOB_DESC_MAX_TILE_COORD_WORD] & X_COORDINATE_MASK;
+	maxY = job_header[FRAG_JOB_DESC_MAX_TILE_COORD_WORD] & Y_COORDINATE_MASK;
+	restartX = job_header[JOB_DESC_FAULT_ADDR_LOW_WORD] & X_COORDINATE_MASK;
+	restartY = job_header[JOB_DESC_FAULT_ADDR_LOW_WORD] & Y_COORDINATE_MASK;
+
+	dev_warn(dev, "Before Clamping:\n"
+			"Jobstatus: %08x\n"
+			"restartIdx: %08x\n"
+			"Fault_addr_low: %08x\n"
+			"minCoordsX: %08x minCoordsY: %08x\n"
+			"maxCoordsX: %08x maxCoordsY: %08x\n",
+			job_header[JOB_DESC_STATUS_WORD],
+			job_header[JOB_DESC_RESTART_INDEX_WORD],
+			job_header[JOB_DESC_FAULT_ADDR_LOW_WORD],
+			minX, minY,
+			maxX, maxY);
+
+	/* Set the restart index to the one which generated the fault*/
+	job_header[JOB_DESC_RESTART_INDEX_WORD] =
+			job_header[JOB_DESC_FAULT_ADDR_LOW_WORD];
+
+	if (restartX < minX) {
+		job_header[JOB_DESC_RESTART_INDEX_WORD] = (minX) | restartY;
+		dev_warn(dev,
+			"Clamping restart X index to minimum. %08x clamped to %08x\n",
+			restartX, minX);
+		clamped =  1;
+	}
+	if (restartY < minY) {
+		job_header[JOB_DESC_RESTART_INDEX_WORD] = (minY) | restartX;
+		dev_warn(dev,
+			"Clamping restart Y index to minimum. %08x clamped to %08x\n",
+			restartY, minY);
+		clamped =  1;
+	}
+	if (restartX > maxX) {
+		job_header[JOB_DESC_RESTART_INDEX_WORD] = (maxX) | restartY;
+		dev_warn(dev,
+			"Clamping restart X index to maximum. %08x clamped to %08x\n",
+			restartX, maxX);
+		clamped =  1;
+	}
+	if (restartY > maxY) {
+		job_header[JOB_DESC_RESTART_INDEX_WORD] = (maxY) | restartX;
+		dev_warn(dev,
+			"Clamping restart Y index to maximum. %08x clamped to %08x\n",
+			restartY, maxY);
+		clamped =  1;
+	}
+
+	if (clamped) {
+		/* Reset the fault address low word
+		 * and set the job status to STOPPED */
+		job_header[JOB_DESC_FAULT_ADDR_LOW_WORD] = 0x0;
+		job_header[JOB_DESC_STATUS_WORD] = BASE_JD_EVENT_STOPPED;
+		dev_warn(dev, "After Clamping:\n"
+				"Jobstatus: %08x\n"
+				"restartIdx: %08x\n"
+				"Fault_addr_low: %08x\n"
+				"minCoordsX: %08x minCoordsY: %08x\n"
+				"maxCoordsX: %08x maxCoordsY: %08x\n",
+				job_header[JOB_DESC_STATUS_WORD],
+				job_header[JOB_DESC_RESTART_INDEX_WORD],
+				job_header[JOB_DESC_FAULT_ADDR_LOW_WORD],
+				minX, minY,
+				maxX, maxY);
+
+		/* Flush CPU cache to update memory for future GPU reads*/
+		memcpy(page_1, dst, copy_size);
+		p = pfn_to_page(PFN_DOWN(page_array[page_index]));
+
+		kbase_sync_single_for_device(katom->kctx->kbdev,
+				kbase_dma_addr(p) + offset,
+				copy_size, DMA_TO_DEVICE);
+
+		if (copy_size < JOB_HEADER_SIZE) {
+			memcpy(page_2, dst + copy_size,
+					JOB_HEADER_SIZE - copy_size);
+			p = pfn_to_page(PFN_DOWN(page_array[page_index + 1]));
+
+			kbase_sync_single_for_device(katom->kctx->kbdev,
+					kbase_dma_addr(p),
+					JOB_HEADER_SIZE - copy_size,
+					DMA_TO_DEVICE);
+		}
+	}
+	if (copy_size < JOB_HEADER_SIZE)
+		kunmap_atomic(page_2);
+
+	kunmap_atomic(page_1);
+
+out_unlock:
+	kbase_gpu_vm_unlock(katom->kctx);
+	return clamped;
+}
--- a/drivers/gpu/arm/midgard_for_linux/mali_kbase_10969_workaround.h
+++ b/drivers/gpu/arm/midgard_for_linux/mali_kbase_10969_workaround.h
@ -0,0 +1,23 @@
+/*
+ *
+ * (C) COPYRIGHT 2013-2014 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#ifndef _KBASE_10969_WORKAROUND_
+#define _KBASE_10969_WORKAROUND_
+
+int kbasep_10969_workaround_clamp_coordinates(struct kbase_jd_atom *katom);
+
+#endif /* _KBASE_10969_WORKAROUND_ */
--- a/drivers/gpu/arm/midgard_for_linux/mali_kbase_as_fault_debugfs.c
+++ b/drivers/gpu/arm/midgard_for_linux/mali_kbase_as_fault_debugfs.c
@ -0,0 +1,102 @@
+/*
+ *
+ * (C) COPYRIGHT 2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#include <linux/debugfs.h>
+
+#include <mali_kbase.h>
+#include <mali_kbase_as_fault_debugfs.h>
+
+#ifdef CONFIG_DEBUG_FS
+#ifdef CONFIG_MALI_DEBUG
+
+static int kbase_as_fault_read(struct seq_file *sfile, void *data)
+{
+	uintptr_t as_no = (uintptr_t) sfile->private;
+
+	struct list_head *entry;
+	const struct list_head *kbdev_list;
+	struct kbase_device *kbdev = NULL;
+
+	kbdev_list = kbase_dev_list_get();
+
+	list_for_each(entry, kbdev_list) {
+		kbdev = list_entry(entry, struct kbase_device, entry);
+
+		if(kbdev->debugfs_as_read_bitmap & (1ULL << as_no)) {
+
+			/* don't show this one again until another fault occors */
+			kbdev->debugfs_as_read_bitmap &= ~(1ULL << as_no);
+
+			/* output the last page fault addr */
+			seq_printf(sfile, "%llu\n", (u64) kbdev->as[as_no].fault_addr);
+		}
+
+	}
+
+	kbase_dev_list_put(kbdev_list);
+
+	return 0;
+}
+
+static int kbase_as_fault_debugfs_open(struct inode *in, struct file *file)
+{
+	return single_open(file, kbase_as_fault_read , in->i_private);
+}
+
+static const struct file_operations as_fault_fops = {
+	.open = kbase_as_fault_debugfs_open,
+	.read = seq_read,
+	.llseek = seq_lseek,
+	.release = single_release,
+};
+
+#endif /* CONFIG_MALI_DEBUG */
+#endif /* CONFIG_DEBUG_FS */
+
+/*
+ *  Initialize debugfs entry for each address space
+ */
+void kbase_as_fault_debugfs_init(struct kbase_device *kbdev)
+{
+#ifdef CONFIG_DEBUG_FS
+#ifdef CONFIG_MALI_DEBUG
+	uint i;
+	char as_name[64];
+	struct dentry *debugfs_directory;
+
+	kbdev->debugfs_as_read_bitmap = 0ULL;
+
+	KBASE_DEBUG_ASSERT(kbdev->nr_hw_address_spaces);
+	KBASE_DEBUG_ASSERT(sizeof(kbdev->as[0].fault_addr) == sizeof(u64));
+
+	debugfs_directory = debugfs_create_dir("address_spaces",
+		kbdev->mali_debugfs_directory);
+
+	if(debugfs_directory) {
+		for(i = 0; i < kbdev->nr_hw_address_spaces; i++) {
+			snprintf(as_name, ARRAY_SIZE(as_name), "as%u", i);
+			debugfs_create_file(as_name, S_IRUGO,
+				debugfs_directory, (void*) ((uintptr_t) i), &as_fault_fops);
+		}
+	}
+	else
+		dev_warn(kbdev->dev, "unable to create address_spaces debugfs directory");
+
+#endif /* CONFIG_MALI_DEBUG */
+#endif /* CONFIG_DEBUG_FS */
+	return;
+}
--- a/drivers/gpu/arm/midgard_for_linux/mali_kbase_as_fault_debugfs.h
+++ b/drivers/gpu/arm/midgard_for_linux/mali_kbase_as_fault_debugfs.h
@ -0,0 +1,45 @@
+/*
+ *
+ * (C) COPYRIGHT 2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#ifndef _KBASE_AS_FAULT_DEBUG_FS_H
+#define _KBASE_AS_FAULT_DEBUG_FS_H
+
+/**
+ * kbase_as_fault_debugfs_init() - Add debugfs files for reporting page faults
+ *
+ * @kbdev: Pointer to kbase_device
+ */
+void kbase_as_fault_debugfs_init(struct kbase_device *kbdev);
+
+/**
+ * kbase_as_fault_debugfs_new() - make the last fault available on debugfs
+ *
+ * @kbdev: Pointer to kbase_device
+ * @as_no: The address space the fault occurred on
+ */
+static inline void
+kbase_as_fault_debugfs_new(struct kbase_device *kbdev, int as_no)
+{
+#ifdef CONFIG_DEBUG_FS
+#ifdef CONFIG_MALI_DEBUG
+	kbdev->debugfs_as_read_bitmap |= (1ULL << as_no);
+#endif /* CONFIG_DEBUG_FS */
+#endif /* CONFIG_MALI_DEBUG */
+	return;
+}
+
+#endif  /*_KBASE_AS_FAULT_DEBUG_FS_H*/
--- a/drivers/gpu/arm/midgard_for_linux/mali_kbase_cache_policy.c
+++ b/drivers/gpu/arm/midgard_for_linux/mali_kbase_cache_policy.c
@ -0,0 +1,64 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/*
+ * Cache Policy API.
+ */
+
+#include "mali_kbase_cache_policy.h"
+
+/*
+ * The output flags should be a combination of the following values:
+ * KBASE_REG_CPU_CACHED: CPU cache should be enabled.
+ */
+u32 kbase_cache_enabled(u32 flags, u32 nr_pages)
+{
+	u32 cache_flags = 0;
+
+	CSTD_UNUSED(nr_pages);
+
+	if (flags & BASE_MEM_CACHED_CPU)
+		cache_flags |= KBASE_REG_CPU_CACHED;
+
+	return cache_flags;
+}
+
+
+void kbase_sync_single_for_device(struct kbase_device *kbdev, dma_addr_t handle,
+		size_t size, enum dma_data_direction dir)
+{
+/* Check if kernel is using coherency with GPU */
+#ifdef CONFIG_MALI_COH_KERN
+	if (kbdev->system_coherency == COHERENCY_ACE)
+		return;
+#endif /* CONFIG_MALI_COH_KERN */
+	dma_sync_single_for_device(kbdev->dev, handle, size, dir);
+}
+
+
+void kbase_sync_single_for_cpu(struct kbase_device *kbdev, dma_addr_t handle,
+		size_t size, enum dma_data_direction dir)
+{
+/* Check if kernel is using coherency with GPU */
+#ifdef CONFIG_MALI_COH_KERN
+	if (kbdev->system_coherency == COHERENCY_ACE)
+		return;
+#endif /* CONFIG_MALI_COH_KERN */
+	dma_sync_single_for_cpu(kbdev->dev, handle, size, dir);
+}
--- a/drivers/gpu/arm/midgard_for_linux/mali_kbase_cache_policy.h
+++ b/drivers/gpu/arm/midgard_for_linux/mali_kbase_cache_policy.h
@ -0,0 +1,45 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2013, 2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/*
+ * Cache Policy API.
+ */
+
+#ifndef _KBASE_CACHE_POLICY_H_
+#define _KBASE_CACHE_POLICY_H_
+
+#include "mali_kbase.h"
+#include "mali_base_kernel.h"
+
+/**
+ * kbase_cache_enabled - Choose the cache policy for a specific region
+ * @flags:    flags describing attributes of the region
+ * @nr_pages: total number of pages (backed or not) for the region
+ *
+ * Tells whether the CPU and GPU caches should be enabled or not for a specific
+ * region.
+ * This function can be modified to customize the cache policy depending on the
+ * flags and size of the region.
+ *
+ * Return: a combination of %KBASE_REG_CPU_CACHED and %KBASE_REG_GPU_CACHED
+ *         depending on the cache policy
+ */
+u32 kbase_cache_enabled(u32 flags, u32 nr_pages);
+
+#endif				/* _KBASE_CACHE_POLICY_H_ */
--- a/drivers/gpu/arm/midgard_for_linux/mali_kbase_config.c
+++ b/drivers/gpu/arm/midgard_for_linux/mali_kbase_config.c
@ -0,0 +1,51 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#include <mali_kbase.h>
+#include <mali_kbase_defs.h>
+#include <mali_kbase_config_defaults.h>
+
+int kbasep_platform_device_init(struct kbase_device *kbdev)
+{
+	struct kbase_platform_funcs_conf *platform_funcs_p;
+
+	platform_funcs_p = (struct kbase_platform_funcs_conf *)PLATFORM_FUNCS;
+	if (platform_funcs_p && platform_funcs_p->platform_init_func)
+		return platform_funcs_p->platform_init_func(kbdev);
+
+	return 0;
+}
+
+void kbasep_platform_device_term(struct kbase_device *kbdev)
+{
+	struct kbase_platform_funcs_conf *platform_funcs_p;
+
+	platform_funcs_p = (struct kbase_platform_funcs_conf *)PLATFORM_FUNCS;
+	if (platform_funcs_p && platform_funcs_p->platform_term_func)
+		platform_funcs_p->platform_term_func(kbdev);
+}
+
+int kbase_cpuprops_get_default_clock_speed(u32 * const clock_speed)
+{
+	KBASE_DEBUG_ASSERT(NULL != clock_speed);
+
+	*clock_speed = 100;
+	return 0;
+}
+
--- a/drivers/gpu/arm/midgard_for_linux/mali_kbase_config.h
+++ b/drivers/gpu/arm/midgard_for_linux/mali_kbase_config.h
@ -0,0 +1,345 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file mali_kbase_config.h
+ * Configuration API and Attributes for KBase
+ */
+
+#ifndef _KBASE_CONFIG_H_
+#define _KBASE_CONFIG_H_
+
+#include <asm/page.h>
+
+#include <mali_malisw.h>
+#include <mali_kbase_backend_config.h>
+
+/**
+ * @addtogroup base_api
+ * @{
+ */
+
+/**
+ * @addtogroup base_kbase_api
+ * @{
+ */
+
+/**
+ * @addtogroup kbase_config Configuration API and Attributes
+ * @{
+ */
+
+#include <linux/rbtree.h>
+
+/* Forward declaration of struct kbase_device */
+struct kbase_device;
+
+/**
+ * kbase_platform_funcs_conf - Specifies platform init/term function pointers
+ *
+ * Specifies the functions pointers for platform specific initialization and
+ * termination. By default no functions are required. No additional platform
+ * specific control is necessary.
+ */
+struct kbase_platform_funcs_conf {
+	/**
+	 * platform_init_func - platform specific init function pointer
+	 * @kbdev - kbase_device pointer
+	 *
+	 * Returns 0 on success, negative error code otherwise.
+	 *
+	 * Function pointer for platform specific initialization or NULL if no
+	 * initialization function is required. At the point this the GPU is
+	 * not active and its power and clocks are in unknown (platform specific
+	 * state) as kbase doesn't yet have control of power and clocks.
+	 *
+	 * The platform specific private pointer kbase_device::platform_context
+	 * can be accessed (and possibly initialized) in here.
+	 */
+	int (*platform_init_func)(struct kbase_device *kbdev);
+	/**
+	 * platform_term_func - platform specific termination function pointer
+	 * @kbdev - kbase_device pointer
+	 *
+	 * Function pointer for platform specific termination or NULL if no
+	 * termination function is required. At the point this the GPU will be
+	 * idle but still powered and clocked.
+	 *
+	 * The platform specific private pointer kbase_device::platform_context
+	 * can be accessed (and possibly terminated) in here.
+	 */
+	void (*platform_term_func)(struct kbase_device *kbdev);
+};
+
+/*
+ * @brief Specifies the callbacks for power management
+ *
+ * By default no callbacks will be made and the GPU must not be powered off.
+ */
+struct kbase_pm_callback_conf {
+	/** Callback for when the GPU is idle and the power to it can be switched off.
+	 *
+	 * The system integrator can decide whether to either do nothing, just switch off
+	 * the clocks to the GPU, or to completely power down the GPU.
+	 * The platform specific private pointer kbase_device::platform_context can be accessed and modified in here. It is the
+	 * platform \em callbacks responsibility to initialize and terminate this pointer if used (see @ref kbase_platform_funcs_conf).
+	 */
+	void (*power_off_callback)(struct kbase_device *kbdev);
+
+	/** Callback for when the GPU is about to become active and power must be supplied.
+	 *
+	 * This function must not return until the GPU is powered and clocked sufficiently for register access to
+	 * succeed.  The return value specifies whether the GPU was powered down since the call to power_off_callback.
+	 * If the GPU state has been lost then this function must return 1, otherwise it should return 0.
+	 * The platform specific private pointer kbase_device::platform_context can be accessed and modified in here. It is the
+	 * platform \em callbacks responsibility to initialize and terminate this pointer if used (see @ref kbase_platform_funcs_conf).
+	 *
+	 * The return value of the first call to this function is ignored.
+	 *
+	 * @return 1 if the GPU state may have been lost, 0 otherwise.
+	 */
+	int (*power_on_callback)(struct kbase_device *kbdev);
+
+	/** Callback for when the system is requesting a suspend and GPU power
+	 * must be switched off.
+	 *
+	 * Note that if this callback is present, then this may be called
+	 * without a preceding call to power_off_callback. Therefore this
+	 * callback must be able to take any action that might otherwise happen
+	 * in power_off_callback.
+	 *
+	 * The platform specific private pointer kbase_device::platform_context
+	 * can be accessed and modified in here. It is the platform \em
+	 * callbacks responsibility to initialize and terminate this pointer if
+	 * used (see @ref kbase_platform_funcs_conf).
+	 */
+	void (*power_suspend_callback)(struct kbase_device *kbdev);
+
+	/** Callback for when the system is resuming from a suspend and GPU
+	 * power must be switched on.
+	 *
+	 * Note that if this callback is present, then this may be called
+	 * without a following call to power_on_callback. Therefore this
+	 * callback must be able to take any action that might otherwise happen
+	 * in power_on_callback.
+	 *
+	 * The platform specific private pointer kbase_device::platform_context
+	 * can be accessed and modified in here. It is the platform \em
+	 * callbacks responsibility to initialize and terminate this pointer if
+	 * used (see @ref kbase_platform_funcs_conf).
+	 */
+	void (*power_resume_callback)(struct kbase_device *kbdev);
+
+	/** Callback for handling runtime power management initialization.
+	 *
+	 * The runtime power management callbacks @ref power_runtime_off_callback and @ref power_runtime_on_callback
+	 * will become active from calls made to the OS from within this function.
+	 * The runtime calls can be triggered by calls from @ref power_off_callback and @ref power_on_callback.
+	 * Note: for linux the kernel must have CONFIG_PM_RUNTIME enabled to use this feature.
+	 *
+	 * @return 0 on success, else int error code.
+	 */
+	 int (*power_runtime_init_callback)(struct kbase_device *kbdev);
+
+	/** Callback for handling runtime power management termination.
+	 *
+	 * The runtime power management callbacks @ref power_runtime_off_callback and @ref power_runtime_on_callback
+	 * should no longer be called by the OS on completion of this function.
+	 * Note: for linux the kernel must have CONFIG_PM_RUNTIME enabled to use this feature.
+	 */
+	void (*power_runtime_term_callback)(struct kbase_device *kbdev);
+
+	/** Callback for runtime power-off power management callback
+	 *
+	 * For linux this callback will be called by the kernel runtime_suspend callback.
+	 * Note: for linux the kernel must have CONFIG_PM_RUNTIME enabled to use this feature.
+	 *
+	 * @return 0 on success, else OS error code.
+	 */
+	void (*power_runtime_off_callback)(struct kbase_device *kbdev);
+
+	/** Callback for runtime power-on power management callback
+	 *
+	 * For linux this callback will be called by the kernel runtime_resume callback.
+	 * Note: for linux the kernel must have CONFIG_PM_RUNTIME enabled to use this feature.
+	 */
+	int (*power_runtime_on_callback)(struct kbase_device *kbdev);
+
+	/*
+	 * Optional callback for checking if GPU can be suspended when idle
+	 *
+	 * This callback will be called by the runtime power management core
+	 * when the reference count goes to 0 to provide notification that the
+	 * GPU now seems idle.
+	 *
+	 * If this callback finds that the GPU can't be powered off, or handles
+	 * suspend by powering off directly or queueing up a power off, a
+	 * non-zero value must be returned to prevent the runtime PM core from
+	 * also triggering a suspend.
+	 *
+	 * Returning 0 will cause the runtime PM core to conduct a regular
+	 * autosuspend.
+	 *
+	 * This callback is optional and if not provided regular autosuspend
+	 * will be triggered.
+	 *
+	 * Note: The Linux kernel must have CONFIG_PM_RUNTIME enabled to use
+	 * this feature.
+	 *
+	 * Return 0 if GPU can be suspended, positive value if it can not be
+	 * suspeneded by runtime PM, else OS error code
+	 */
+	int (*power_runtime_idle_callback)(struct kbase_device *kbdev);
+};
+
+/**
+ * kbase_cpuprops_get_default_clock_speed - default for CPU_SPEED_FUNC
+ * @clock_speed - see  kbase_cpu_clk_speed_func for details on the parameters
+ *
+ * Returns 0 on success, negative error code otherwise.
+ *
+ * Default implementation of CPU_SPEED_FUNC. This function sets clock_speed
+ * to 100, so will be an underestimate for any real system.
+ */
+int kbase_cpuprops_get_default_clock_speed(u32 * const clock_speed);
+
+/**
+ * kbase_cpu_clk_speed_func - Type of the function pointer for CPU_SPEED_FUNC
+ * @param clock_speed - pointer to store the current CPU clock speed in MHz
+ *
+ * Returns 0 on success, otherwise negative error code.
+ *
+ * This is mainly used to implement OpenCL's clGetDeviceInfo().
+ */
+typedef int (*kbase_cpu_clk_speed_func) (u32 *clock_speed);
+
+/**
+ * kbase_gpu_clk_speed_func - Type of the function pointer for GPU_SPEED_FUNC
+ * @param clock_speed - pointer to store the current GPU clock speed in MHz
+ *
+ * Returns 0 on success, otherwise negative error code.
+ * When an error is returned the caller assumes maximum GPU speed stored in
+ * gpu_freq_khz_max.
+ *
+ * If the system timer is not available then this function is required
+ * for the OpenCL queue profiling to return correct timing information.
+ *
+ */
+typedef int (*kbase_gpu_clk_speed_func) (u32 *clock_speed);
+
+#ifdef CONFIG_OF
+struct kbase_platform_config {
+};
+#else
+
+/*
+ * @brief Specifies start and end of I/O memory region.
+ */
+struct kbase_io_memory_region {
+	u64 start;
+	u64 end;
+};
+
+/*
+ * @brief Specifies I/O related resources like IRQs and memory region for I/O operations.
+ */
+struct kbase_io_resources {
+	u32                      job_irq_number;
+	u32                      mmu_irq_number;
+	u32                      gpu_irq_number;
+	struct kbase_io_memory_region io_memory_region;
+};
+
+struct kbase_platform_config {
+	const struct kbase_io_resources *io_resources;
+};
+
+#endif /* CONFIG_OF */
+
+/**
+ * @brief Gets the pointer to platform config.
+ *
+ * @return Pointer to the platform config
+ */
+struct kbase_platform_config *kbase_get_platform_config(void);
+
+/**
+ * kbasep_platform_device_init: - Platform specific call to initialize hardware
+ * @kbdev: kbase device pointer
+ *
+ * Function calls a platform defined routine if specified in the configuration
+ * attributes.  The routine can initialize any hardware and context state that
+ * is required for the GPU block to function.
+ *
+ * Return: 0 if no errors have been found in the config.
+ *         Negative error code otherwise.
+ */
+int kbasep_platform_device_init(struct kbase_device *kbdev);
+
+/**
+ * kbasep_platform_device_term - Platform specific call to terminate hardware
+ * @kbdev: Kbase device pointer
+ *
+ * Function calls a platform defined routine if specified in the configuration
+ * attributes. The routine can destroy any platform specific context state and
+ * shut down any hardware functionality that are outside of the Power Management
+ * callbacks.
+ *
+ */
+void kbasep_platform_device_term(struct kbase_device *kbdev);
+
+
+/**
+ * kbase_platform_early_init - Early initialisation of the platform code
+ *
+ * This function will be called when the module is loaded to perform any
+ * early initialisation required by the platform code. Such as reading
+ * platform specific device tree entries for the GPU.
+ *
+ * Return: 0 for success, any other fail causes module initialisation to fail
+ */
+int kbase_platform_early_init(void);
+
+#ifndef CONFIG_OF
+#ifdef CONFIG_MALI_PLATFORM_FAKE
+/**
+ * kbase_platform_fake_register - Register a platform device for the GPU
+ *
+ * This can be used to register a platform device on systems where device tree
+ * is not enabled and the platform initialisation code in the kernel doesn't
+ * create the GPU device. Where possible device tree should be used instead.
+ *
+ * Return: 0 for success, any other fail causes module initialisation to fail
+ */
+int kbase_platform_fake_register(void);
+
+/**
+ * kbase_platform_fake_unregister - Unregister a fake platform device
+ *
+ * Unregister the platform device created with kbase_platform_fake_register()
+ */
+void kbase_platform_fake_unregister(void);
+#endif
+#endif
+
+	  /** @} *//* end group kbase_config */
+	  /** @} *//* end group base_kbase_api */
+	  /** @} *//* end group base_api */
+
+#endif				/* _KBASE_CONFIG_H_ */
--- a/drivers/gpu/arm/midgard_for_linux/mali_kbase_config_defaults.h
+++ b/drivers/gpu/arm/midgard_for_linux/mali_kbase_config_defaults.h
@ -0,0 +1,261 @@
+/*
+ *
+ * (C) COPYRIGHT 2013-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/**
+ * @file mali_kbase_config_defaults.h
+ *
+ * Default values for configuration settings
+ *
+ */
+
+#ifndef _KBASE_CONFIG_DEFAULTS_H_
+#define _KBASE_CONFIG_DEFAULTS_H_
+
+/* Include mandatory definitions per platform */
+#include <mali_kbase_config_platform.h>
+
+/**
+ * Irq throttle. It is the minimum desired time in between two
+ * consecutive gpu interrupts (given in 'us'). The irq throttle
+ * gpu register will be configured after this, taking into
+ * account the configured max frequency.
+ *
+ * Attached value: number in micro seconds
+ */
+#define DEFAULT_IRQ_THROTTLE_TIME_US 20
+
+/**
+ *  Default Job Scheduler initial runtime of a context for the CFS Policy,
+ *  in time-slices.
+ *
+ * This value is relative to that of the least-run context, and defines
+ * where in the CFS queue a new context is added. A value of 1 means 'after
+ * the least-run context has used its timeslice'. Therefore, when all
+ * contexts consistently use the same amount of time, a value of 1 models a
+ * FIFO. A value of 0 would model a LIFO.
+ *
+ * The value is represented in "numbers of time slices". Multiply this
+ * value by that defined in @ref DEFAULT_JS_CTX_TIMESLICE_NS to get
+ * the time value for this in nanoseconds.
+ */
+#define DEFAULT_JS_CFS_CTX_RUNTIME_INIT_SLICES 1
+
+/**
+ * Default Job Scheduler minimum runtime value of a context for CFS, in
+ * time_slices relative to that of the least-run context.
+ *
+ * This is a measure of how much preferrential treatment is given to a
+ * context that is not run very often.
+ *
+ * Specficially, this value defines how many timeslices such a context is
+ * (initially) allowed to use at once. Such contexts (e.g. 'interactive'
+ * processes) will appear near the front of the CFS queue, and can initially
+ * use more time than contexts that run continuously (e.g. 'batch'
+ * processes).
+ *
+ * This limit \b prevents a "stored-up timeslices" DoS attack, where a ctx
+ * not run for a long time attacks the system by using a very large initial
+ * number of timeslices when it finally does run.
+ *
+ * @note A value of zero allows not-run-often contexts to get scheduled in
+ * quickly, but to only use a single timeslice when they get scheduled in.
+ */
+#define DEFAULT_JS_CFS_CTX_RUNTIME_MIN_SLICES 2
+
+/**
+* Boolean indicating whether the driver is configured to be secure at
+* a potential loss of performance.
+*
+* This currently affects only r0p0-15dev0 HW and earlier.
+*
+* On r0p0-15dev0 HW and earlier, there are tradeoffs between security and
+* performance:
+*
+* - When this is set to true, the driver remains fully secure,
+* but potentially loses performance compared with setting this to
+* false.
+* - When set to false, the driver is open to certain security
+* attacks.
+*
+* From r0p0-00rel0 and onwards, there is no security loss by setting
+* this to false, and no performance loss by setting it to
+* true.
+*/
+#define DEFAULT_SECURE_BUT_LOSS_OF_PERFORMANCE false
+
+enum {
+	/**
+	 * Use unrestricted Address ID width on the AXI bus.
+	 */
+	KBASE_AID_32 = 0x0,
+
+	/**
+	 * Restrict GPU to a half of maximum Address ID count.
+	 * This will reduce performance, but reduce bus load due to GPU.
+	 */
+	KBASE_AID_16 = 0x3,
+
+	/**
+	 * Restrict GPU to a quarter of maximum Address ID count.
+	 * This will reduce performance, but reduce bus load due to GPU.
+	 */
+	KBASE_AID_8  = 0x2,
+
+	/**
+	 * Restrict GPU to an eighth of maximum Address ID count.
+	 * This will reduce performance, but reduce bus load due to GPU.
+	 */
+	KBASE_AID_4  = 0x1
+};
+
+/**
+ * Default setting for read Address ID limiting on AXI bus.
+ *
+ * Attached value: u32 register value
+ *    KBASE_AID_32 - use the full 32 IDs (5 ID bits)
+ *    KBASE_AID_16 - use 16 IDs (4 ID bits)
+ *    KBASE_AID_8  - use 8 IDs (3 ID bits)
+ *    KBASE_AID_4  - use 4 IDs (2 ID bits)
+ * Default value: KBASE_AID_32 (no limit). Note hardware implementation
+ * may limit to a lower value.
+ */
+#define DEFAULT_ARID_LIMIT KBASE_AID_32
+
+/**
+ * Default setting for write Address ID limiting on AXI.
+ *
+ * Attached value: u32 register value
+ *    KBASE_AID_32 - use the full 32 IDs (5 ID bits)
+ *    KBASE_AID_16 - use 16 IDs (4 ID bits)
+ *    KBASE_AID_8  - use 8 IDs (3 ID bits)
+ *    KBASE_AID_4  - use 4 IDs (2 ID bits)
+ * Default value: KBASE_AID_32 (no limit). Note hardware implementation
+ * may limit to a lower value.
+ */
+#define DEFAULT_AWID_LIMIT KBASE_AID_32
+
+/**
+ * Default UMP device mapping. A UMP_DEVICE_<device>_SHIFT value which
+ * defines which UMP device this GPU should be mapped to.
+ */
+#define DEFAULT_UMP_GPU_DEVICE_SHIFT UMP_DEVICE_Z_SHIFT
+
+/*
+ * Default period for DVFS sampling
+ */
+// #define DEFAULT_PM_DVFS_PERIOD 100 /* 100ms */
+#define DEFAULT_PM_DVFS_PERIOD 20 /* 20 ms */
+
+/*
+ * Power Management poweroff tick granuality. This is in nanoseconds to
+ * allow HR timer support.
+ *
+ * On each scheduling tick, the power manager core may decide to:
+ * -# Power off one or more shader cores
+ * -# Power off the entire GPU
+ */
+#define DEFAULT_PM_GPU_POWEROFF_TICK_NS (400000) /* 400us */
+
+/*
+ * Power Manager number of ticks before shader cores are powered off
+ */
+#define DEFAULT_PM_POWEROFF_TICK_SHADER (2) /* 400-800us */
+
+/*
+ * Power Manager number of ticks before GPU is powered off
+ */
+#define DEFAULT_PM_POWEROFF_TICK_GPU (2) /* 400-800us */
+
+/*
+ * Default scheduling tick granuality
+ */
+#define DEFAULT_JS_SCHEDULING_PERIOD_NS    (100000000u) /* 100ms */
+
+/*
+ * Default minimum number of scheduling ticks before jobs are soft-stopped.
+ *
+ * This defines the time-slice for a job (which may be different from that of a
+ * context)
+ */
+#define DEFAULT_JS_SOFT_STOP_TICKS       (1) /* 100ms-200ms */
+
+/*
+ * Default minimum number of scheduling ticks before CL jobs are soft-stopped.
+ */
+#define DEFAULT_JS_SOFT_STOP_TICKS_CL    (1) /* 100ms-200ms */
+
+/*
+ * Default minimum number of scheduling ticks before jobs are hard-stopped
+ */
+#define DEFAULT_JS_HARD_STOP_TICKS_SS    (100) /* 10s */
+#define DEFAULT_JS_HARD_STOP_TICKS_SS_8408  (300) /* 30s */
+
+/*
+ * Default minimum number of scheduling ticks before CL jobs are hard-stopped.
+ */
+#define DEFAULT_JS_HARD_STOP_TICKS_CL    (100) /* 10s */
+
+/*
+ * Default minimum number of scheduling ticks before jobs are hard-stopped
+ * during dumping
+ */
+#define DEFAULT_JS_HARD_STOP_TICKS_DUMPING   (15000) /* 1500s */
+
+/*
+ * Default timeout for some software jobs, after which the software event wait
+ * jobs will be cancelled.
+ */
+#define DEFAULT_JS_SOFT_JOB_TIMEOUT ((u32)3000) /* 3s */
+
+/*
+ * Default minimum number of scheduling ticks before the GPU is reset to clear a
+ * "stuck" job
+ */
+#define DEFAULT_JS_RESET_TICKS_SS           (105) /* 10.5s */
+#define DEFAULT_JS_RESET_TICKS_SS_8408     (450) /* 45s */
+
+/*
+ * Default minimum number of scheduling ticks before the GPU is reset to clear a
+ * "stuck" CL job.
+ */
+#define DEFAULT_JS_RESET_TICKS_CL        (105) /* 10.5s */
+
+/*
+ * Default minimum number of scheduling ticks before the GPU is reset to clear a
+ * "stuck" job during dumping.
+ */
+#define DEFAULT_JS_RESET_TICKS_DUMPING   (15020) /* 1502s */
+
+/*
+ * Default number of milliseconds given for other jobs on the GPU to be
+ * soft-stopped when the GPU needs to be reset.
+ */
+#define DEFAULT_RESET_TIMEOUT_MS (3000) /* 3s */
+
+/*
+ * Default timeslice that a context is scheduled in for, in nanoseconds.
+ *
+ * When a context has used up this amount of time across its jobs, it is
+ * scheduled out to let another run.
+ *
+ * @note the resolution is nanoseconds (ns) here, because that's the format
+ * often used by the OS.
+ */
+#define DEFAULT_JS_CTX_TIMESLICE_NS (50000000) /* 50ms */
+
+#endif /* _KBASE_CONFIG_DEFAULTS_H_ */
+
--- a/drivers/gpu/arm/midgard_for_linux/mali_kbase_context.c
+++ b/drivers/gpu/arm/midgard_for_linux/mali_kbase_context.c
@ -0,0 +1,321 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/*
+ * Base kernel context APIs
+ */
+
+#include <mali_kbase.h>
+#include <mali_midg_regmap.h>
+#include <mali_kbase_mem_linux.h>
+
+/**
+ * kbase_create_context() - Create a kernel base context.
+ * @kbdev: Kbase device
+ * @is_compat: Force creation of a 32-bit context
+ *
+ * Allocate and init a kernel base context.
+ *
+ * Return: new kbase context
+ */
+struct kbase_context *
+kbase_create_context(struct kbase_device *kbdev, bool is_compat)
+{
+	struct kbase_context *kctx;
+	int err;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	/* zero-inited as lot of code assume it's zero'ed out on create */
+	kctx = vzalloc(sizeof(*kctx));
+
+	if (!kctx)
+		goto out;
+
+	/* creating a context is considered a disjoint event */
+	kbase_disjoint_event(kbdev);
+
+	kctx->kbdev = kbdev;
+	kctx->as_nr = KBASEP_AS_NR_INVALID;
+	if (is_compat)
+		kbase_ctx_flag_set(kctx, KCTX_COMPAT);
+#ifdef CONFIG_MALI_TRACE_TIMELINE
+	kctx->timeline.owner_tgid = task_tgid_nr(current);
+#endif
+	atomic_set(&kctx->setup_complete, 0);
+	atomic_set(&kctx->setup_in_progress, 0);
+	spin_lock_init(&kctx->mm_update_lock);
+	kctx->process_mm = NULL;
+	atomic_set(&kctx->nonmapped_pages, 0);
+	kctx->slots_pullable = 0;
+	kctx->tgid = current->tgid;
+	kctx->pid = current->pid;
+
+	err = kbase_mem_pool_init(&kctx->mem_pool,
+			kbdev->mem_pool_max_size_default,
+			kctx->kbdev, &kbdev->mem_pool);
+	if (err)
+		goto free_kctx;
+
+	err = kbase_mem_evictable_init(kctx);
+	if (err)
+		goto free_pool;
+
+	atomic_set(&kctx->used_pages, 0);
+
+	err = kbase_jd_init(kctx);
+	if (err)
+		goto deinit_evictable;
+
+	err = kbasep_js_kctx_init(kctx);
+	if (err)
+		goto free_jd;	/* safe to call kbasep_js_kctx_term  in this case */
+
+	err = kbase_event_init(kctx);
+	if (err)
+		goto free_jd;
+
+	atomic_set(&kctx->drain_pending, 0);
+
+	mutex_init(&kctx->reg_lock);
+
+	INIT_LIST_HEAD(&kctx->waiting_soft_jobs);
+	spin_lock_init(&kctx->waiting_soft_jobs_lock);
+#ifdef CONFIG_KDS
+	INIT_LIST_HEAD(&kctx->waiting_kds_resource);
+#endif
+	err = kbase_dma_fence_init(kctx);
+	if (err)
+		goto free_event;
+
+	err = kbase_mmu_init(kctx);
+	if (err)
+		goto term_dma_fence;
+
+	do {
+		err = kbase_mem_pool_grow(&kctx->mem_pool,
+				MIDGARD_MMU_BOTTOMLEVEL);
+		if (err)
+			goto pgd_no_mem;
+		kctx->pgd = kbase_mmu_alloc_pgd(kctx);
+	} while (!kctx->pgd);
+
+	kctx->aliasing_sink_page = kbase_mem_alloc_page(kctx->kbdev);
+	if (!kctx->aliasing_sink_page)
+		goto no_sink_page;
+
+	init_waitqueue_head(&kctx->event_queue);
+
+	kctx->cookies = KBASE_COOKIE_MASK;
+
+	/* Make sure page 0 is not used... */
+	err = kbase_region_tracker_init(kctx);
+	if (err)
+		goto no_region_tracker;
+
+	err = kbase_sticky_resource_init(kctx);
+	if (err)
+		goto no_sticky;
+
+	err = kbase_jit_init(kctx);
+	if (err)
+		goto no_jit;
+#ifdef CONFIG_GPU_TRACEPOINTS
+	atomic_set(&kctx->jctx.work_id, 0);
+#endif
+#ifdef CONFIG_MALI_TRACE_TIMELINE
+	atomic_set(&kctx->timeline.jd_atoms_in_flight, 0);
+#endif
+
+	kctx->id = atomic_add_return(1, &(kbdev->ctx_num)) - 1;
+
+	mutex_init(&kctx->vinstr_cli_lock);
+
+	setup_timer(&kctx->soft_job_timeout,
+		    kbasep_soft_job_timeout_worker,
+		    (uintptr_t)kctx);
+
+	return kctx;
+
+no_jit:
+	kbase_gpu_vm_lock(kctx);
+	kbase_sticky_resource_term(kctx);
+	kbase_gpu_vm_unlock(kctx);
+no_sticky:
+	kbase_region_tracker_term(kctx);
+no_region_tracker:
+	kbase_mem_pool_free(&kctx->mem_pool, kctx->aliasing_sink_page, false);
+no_sink_page:
+	/* VM lock needed for the call to kbase_mmu_free_pgd */
+	kbase_gpu_vm_lock(kctx);
+	kbase_mmu_free_pgd(kctx);
+	kbase_gpu_vm_unlock(kctx);
+pgd_no_mem:
+	kbase_mmu_term(kctx);
+term_dma_fence:
+	kbase_dma_fence_term(kctx);
+free_event:
+	kbase_event_cleanup(kctx);
+free_jd:
+	/* Safe to call this one even when didn't initialize (assuming kctx was sufficiently zeroed) */
+	kbasep_js_kctx_term(kctx);
+	kbase_jd_exit(kctx);
+deinit_evictable:
+	kbase_mem_evictable_deinit(kctx);
+free_pool:
+	kbase_mem_pool_term(&kctx->mem_pool);
+free_kctx:
+	vfree(kctx);
+out:
+	return NULL;
+}
+KBASE_EXPORT_SYMBOL(kbase_create_context);
+
+static void kbase_reg_pending_dtor(struct kbase_va_region *reg)
+{
+	dev_dbg(reg->kctx->kbdev->dev, "Freeing pending unmapped region\n");
+	kbase_mem_phy_alloc_put(reg->cpu_alloc);
+	kbase_mem_phy_alloc_put(reg->gpu_alloc);
+	kfree(reg);
+}
+
+/**
+ * kbase_destroy_context - Destroy a kernel base context.
+ * @kctx: Context to destroy
+ *
+ * Calls kbase_destroy_os_context() to free OS specific structures.
+ * Will release all outstanding regions.
+ */
+void kbase_destroy_context(struct kbase_context *kctx)
+{
+	struct kbase_device *kbdev;
+	int pages;
+	unsigned long pending_regions_to_clean;
+
+	KBASE_DEBUG_ASSERT(NULL != kctx);
+
+	kbdev = kctx->kbdev;
+	KBASE_DEBUG_ASSERT(NULL != kbdev);
+
+	KBASE_TRACE_ADD(kbdev, CORE_CTX_DESTROY, kctx, NULL, 0u, 0u);
+
+	/* Ensure the core is powered up for the destroy process */
+	/* A suspend won't happen here, because we're in a syscall from a userspace
+	 * thread. */
+	kbase_pm_context_active(kbdev);
+
+	kbase_jd_zap_context(kctx);
+	kbase_event_cleanup(kctx);
+
+	/*
+	 * JIT must be terminated before the code below as it must be called
+	 * without the region lock being held.
+	 * The code above ensures no new JIT allocations can be made by
+	 * by the time we get to this point of context tear down.
+	 */
+	kbase_jit_term(kctx);
+
+	kbase_gpu_vm_lock(kctx);
+
+	kbase_sticky_resource_term(kctx);
+
+	/* MMU is disabled as part of scheduling out the context */
+	kbase_mmu_free_pgd(kctx);
+
+	/* drop the aliasing sink page now that it can't be mapped anymore */
+	kbase_mem_pool_free(&kctx->mem_pool, kctx->aliasing_sink_page, false);
+
+	/* free pending region setups */
+	pending_regions_to_clean = (~kctx->cookies) & KBASE_COOKIE_MASK;
+	while (pending_regions_to_clean) {
+		unsigned int cookie = __ffs(pending_regions_to_clean);
+
+		BUG_ON(!kctx->pending_regions[cookie]);
+
+		kbase_reg_pending_dtor(kctx->pending_regions[cookie]);
+
+		kctx->pending_regions[cookie] = NULL;
+		pending_regions_to_clean &= ~(1UL << cookie);
+	}
+
+	kbase_region_tracker_term(kctx);
+	kbase_gpu_vm_unlock(kctx);
+
+	/* Safe to call this one even when didn't initialize (assuming kctx was sufficiently zeroed) */
+	kbasep_js_kctx_term(kctx);
+
+	kbase_jd_exit(kctx);
+
+	kbase_pm_context_idle(kbdev);
+
+	kbase_dma_fence_term(kctx);
+
+	kbase_mmu_term(kctx);
+
+	pages = atomic_read(&kctx->used_pages);
+	if (pages != 0)
+		dev_warn(kbdev->dev, "%s: %d pages in use!\n", __func__, pages);
+
+	kbase_mem_evictable_deinit(kctx);
+	kbase_mem_pool_term(&kctx->mem_pool);
+	WARN_ON(atomic_read(&kctx->nonmapped_pages) != 0);
+
+	vfree(kctx);
+}
+KBASE_EXPORT_SYMBOL(kbase_destroy_context);
+
+/**
+ * kbase_context_set_create_flags - Set creation flags on a context
+ * @kctx: Kbase context
+ * @flags: Flags to set
+ *
+ * Return: 0 on success
+ */
+int kbase_context_set_create_flags(struct kbase_context *kctx, u32 flags)
+{
+	int err = 0;
+	struct kbasep_js_kctx_info *js_kctx_info;
+	unsigned long irq_flags;
+
+	KBASE_DEBUG_ASSERT(NULL != kctx);
+
+	js_kctx_info = &kctx->jctx.sched_info;
+
+	/* Validate flags */
+	if (flags != (flags & BASE_CONTEXT_CREATE_KERNEL_FLAGS)) {
+		err = -EINVAL;
+		goto out;
+	}
+
+	mutex_lock(&js_kctx_info->ctx.jsctx_mutex);
+	spin_lock_irqsave(&kctx->kbdev->hwaccess_lock, irq_flags);
+
+	/* Translate the flags */
+	if ((flags & BASE_CONTEXT_SYSTEM_MONITOR_SUBMIT_DISABLED) == 0)
+		kbase_ctx_flag_clear(kctx, KCTX_SUBMIT_DISABLED);
+
+	/* Latch the initial attributes into the Job Scheduler */
+	kbasep_js_ctx_attr_set_initial_attrs(kctx->kbdev, kctx);
+
+	spin_unlock_irqrestore(&kctx->kbdev->hwaccess_lock, irq_flags);
+	mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
+ out:
+	return err;
+}
+KBASE_EXPORT_SYMBOL(kbase_context_set_create_flags);
--- a/drivers/gpu/arm/midgard_for_linux/mali_kbase_context.h
+++ b/drivers/gpu/arm/midgard_for_linux/mali_kbase_context.h
@ -0,0 +1,90 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#ifndef _KBASE_CONTEXT_H_
+#define _KBASE_CONTEXT_H_
+
+#include <linux/atomic.h>
+
+
+int kbase_context_set_create_flags(struct kbase_context *kctx, u32 flags);
+
+/**
+ * kbase_ctx_flag - Check if @flag is set on @kctx
+ * @kctx: Pointer to kbase context to check
+ * @flag: Flag to check
+ *
+ * Return: true if @flag is set on @kctx, false if not.
+ */
+static inline bool kbase_ctx_flag(struct kbase_context *kctx,
+				      enum kbase_context_flags flag)
+{
+	return atomic_read(&kctx->flags) & flag;
+}
+
+/**
+ * kbase_ctx_flag_clear - Clear @flag on @kctx
+ * @kctx: Pointer to kbase context
+ * @flag: Flag to clear
+ *
+ * Clear the @flag on @kctx. This is done atomically, so other flags being
+ * cleared or set at the same time will be safe.
+ *
+ * Some flags have locking requirements, check the documentation for the
+ * respective flags.
+ */
+static inline void kbase_ctx_flag_clear(struct kbase_context *kctx,
+					enum kbase_context_flags flag)
+{
+#if KERNEL_VERSION(4, 3, 0) > LINUX_VERSION_CODE
+	/*
+	 * Earlier kernel versions doesn't have atomic_andnot() or
+	 * atomic_and(). atomic_clear_mask() was only available on some
+	 * architectures and removed on arm in v3.13 on arm and arm64.
+	 *
+	 * Use a compare-exchange loop to clear the flag on pre 4.3 kernels,
+	 * when atomic_andnot() becomes available.
+	 */
+	int old, new;
+
+	do {
+		old = atomic_read(&kctx->flags);
+		new = old & ~flag;
+
+	} while (atomic_cmpxchg(&kctx->flags, old, new) != old);
+#else
+	atomic_andnot(flag, &kctx->flags);
+#endif
+}
+
+/**
+ * kbase_ctx_flag_set - Set @flag on @kctx
+ * @kctx: Pointer to kbase context
+ * @flag: Flag to clear
+ *
+ * Set the @flag on @kctx. This is done atomically, so other flags being
+ * cleared or set at the same time will be safe.
+ *
+ * Some flags have locking requirements, check the documentation for the
+ * respective flags.
+ */
+static inline void kbase_ctx_flag_set(struct kbase_context *kctx,
+				      enum kbase_context_flags flag)
+{
+	atomic_or(flag, &kctx->flags);
+}
+#endif /* _KBASE_CONTEXT_H_ */
--- a/drivers/gpu/arm/midgard_for_linux/mali_kbase_core_linux.c
+++ b/drivers/gpu/arm/midgard_for_linux/mali_kbase_core_linux.c
--- a/drivers/gpu/arm/midgard_for_linux/mali_kbase_debug.c
+++ b/drivers/gpu/arm/midgard_for_linux/mali_kbase_debug.c
@ -0,0 +1,39 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2014 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#include <mali_kbase.h>
+
+static struct kbasep_debug_assert_cb kbasep_debug_assert_registered_cb = {
+	NULL,
+	NULL
+};
+
+void kbase_debug_assert_register_hook(kbase_debug_assert_hook *func, void *param)
+{
+	kbasep_debug_assert_registered_cb.func = func;
+	kbasep_debug_assert_registered_cb.param = param;
+}
+
+void kbasep_debug_assert_call_hook(void)
+{
+	if (kbasep_debug_assert_registered_cb.func != NULL)
+		kbasep_debug_assert_registered_cb.func(kbasep_debug_assert_registered_cb.param);
+}
+KBASE_EXPORT_SYMBOL(kbasep_debug_assert_call_hook);
+
--- a/drivers/gpu/arm/midgard_for_linux/mali_kbase_debug.h
+++ b/drivers/gpu/arm/midgard_for_linux/mali_kbase_debug.h
@ -0,0 +1,164 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#ifndef _KBASE_DEBUG_H
+#define _KBASE_DEBUG_H
+
+#include <linux/bug.h>
+
+/** @brief If equals to 0, a trace containing the file, line, and function will be displayed before each message. */
+#define KBASE_DEBUG_SKIP_TRACE 0
+
+/** @brief If different from 0, the trace will only contain the file and line. */
+#define KBASE_DEBUG_SKIP_FUNCTION_NAME 0
+
+/** @brief Disable the asserts tests if set to 1. Default is to disable the asserts in release. */
+#ifndef KBASE_DEBUG_DISABLE_ASSERTS
+#ifdef CONFIG_MALI_DEBUG
+#define KBASE_DEBUG_DISABLE_ASSERTS 0
+#else
+#define KBASE_DEBUG_DISABLE_ASSERTS 1
+#endif
+#endif				/* KBASE_DEBUG_DISABLE_ASSERTS */
+
+/** Function type that is called on an KBASE_DEBUG_ASSERT() or KBASE_DEBUG_ASSERT_MSG() */
+typedef void (kbase_debug_assert_hook) (void *);
+
+struct kbasep_debug_assert_cb {
+	kbase_debug_assert_hook *func;
+	void *param;
+};
+
+/**
+ * @def KBASEP_DEBUG_PRINT_TRACE
+ * @brief Private macro containing the format of the trace to display before every message
+ * @sa KBASE_DEBUG_SKIP_TRACE, KBASE_DEBUG_SKIP_FUNCTION_NAME
+ */
+#if !KBASE_DEBUG_SKIP_TRACE
+#define KBASEP_DEBUG_PRINT_TRACE \
+		"In file: " __FILE__ " line: " CSTD_STR2(__LINE__)
+#if !KBASE_DEBUG_SKIP_FUNCTION_NAME
+#define KBASEP_DEBUG_PRINT_FUNCTION __func__
+#else
+#define KBASEP_DEBUG_PRINT_FUNCTION ""
+#endif
+#else
+#define KBASEP_DEBUG_PRINT_TRACE ""
+#endif
+
+/**
+ * @def KBASEP_DEBUG_ASSERT_OUT(trace, function, ...)
+ * @brief (Private) system printing function associated to the @see KBASE_DEBUG_ASSERT_MSG event.
+ * @param trace location in the code from where the message is printed
+ * @param function function from where the message is printed
+ * @param ... Format string followed by format arguments.
+ * @note function parameter cannot be concatenated with other strings
+ */
+/* Select the correct system output function*/
+#ifdef CONFIG_MALI_DEBUG
+#define KBASEP_DEBUG_ASSERT_OUT(trace, function, ...)\
+		do { \
+			pr_err("Mali<ASSERT>: %s function:%s ", trace, function);\
+			pr_err(__VA_ARGS__);\
+			pr_err("\n");\
+		} while (false)
+#else
+#define KBASEP_DEBUG_ASSERT_OUT(trace, function, ...) CSTD_NOP()
+#endif
+
+#ifdef CONFIG_MALI_DEBUG
+#define KBASE_CALL_ASSERT_HOOK() kbasep_debug_assert_call_hook()
+#else
+#define KBASE_CALL_ASSERT_HOOK() CSTD_NOP()
+#endif
+
+/**
+ * @def KBASE_DEBUG_ASSERT(expr)
+ * @brief Calls @see KBASE_PRINT_ASSERT and prints the expression @a expr if @a expr is false
+ *
+ * @note This macro does nothing if the flag @see KBASE_DEBUG_DISABLE_ASSERTS is set to 1
+ *
+ * @param expr Boolean expression
+ */
+#define KBASE_DEBUG_ASSERT(expr) \
+	KBASE_DEBUG_ASSERT_MSG(expr, #expr)
+
+#if KBASE_DEBUG_DISABLE_ASSERTS
+#define KBASE_DEBUG_ASSERT_MSG(expr, ...) CSTD_NOP()
+#else
+	/**
+	 * @def KBASE_DEBUG_ASSERT_MSG(expr, ...)
+	 * @brief Calls @see KBASEP_DEBUG_ASSERT_OUT and prints the given message if @a expr is false
+	 *
+	 * @note This macro does nothing if the flag @see KBASE_DEBUG_DISABLE_ASSERTS is set to 1
+	 *
+	 * @param expr Boolean expression
+	 * @param ...  Message to display when @a expr is false, as a format string followed by format arguments.
+	 */
+#define KBASE_DEBUG_ASSERT_MSG(expr, ...) \
+		do { \
+			if (!(expr)) { \
+				KBASEP_DEBUG_ASSERT_OUT(KBASEP_DEBUG_PRINT_TRACE, KBASEP_DEBUG_PRINT_FUNCTION, __VA_ARGS__);\
+				KBASE_CALL_ASSERT_HOOK();\
+				BUG();\
+			} \
+		} while (false)
+#endif				/* KBASE_DEBUG_DISABLE_ASSERTS */
+
+/**
+ * @def KBASE_DEBUG_CODE( X )
+ * @brief Executes the code inside the macro only in debug mode
+ *
+ * @param X Code to compile only in debug mode.
+ */
+#ifdef CONFIG_MALI_DEBUG
+#define KBASE_DEBUG_CODE(X) X
+#else
+#define KBASE_DEBUG_CODE(X) CSTD_NOP()
+#endif				/* CONFIG_MALI_DEBUG */
+
+/** @} */
+
+/**
+ * @brief Register a function to call on ASSERT
+ *
+ * Such functions will \b only be called during Debug mode, and for debugging
+ * features \b only. Do not rely on them to be called in general use.
+ *
+ * To disable the hook, supply NULL to \a func.
+ *
+ * @note This function is not thread-safe, and should only be used to
+ * register/deregister once in the module's lifetime.
+ *
+ * @param[in] func the function to call when an assert is triggered.
+ * @param[in] param the parameter to pass to \a func when calling it
+ */
+void kbase_debug_assert_register_hook(kbase_debug_assert_hook *func, void *param);
+
+/**
+ * @brief Call a debug assert hook previously registered with kbase_debug_assert_register_hook()
+ *
+ * @note This function is not thread-safe with respect to multiple threads
+ * registering functions and parameters with
+ * kbase_debug_assert_register_hook(). Otherwise, thread safety is the
+ * responsibility of the registered hook.
+ */
+void kbasep_debug_assert_call_hook(void);
+
+#endif				/* _KBASE_DEBUG_H */
--- a/drivers/gpu/arm/midgard_for_linux/mali_kbase_debug_job_fault.c
+++ b/drivers/gpu/arm/midgard_for_linux/mali_kbase_debug_job_fault.c
@ -0,0 +1,502 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#include <mali_kbase.h>
+#include <linux/spinlock.h>
+
+#ifdef CONFIG_DEBUG_FS
+
+static bool kbase_is_job_fault_event_pending(struct kbase_device *kbdev)
+{
+	struct list_head *event_list = &kbdev->job_fault_event_list;
+	unsigned long    flags;
+	bool             ret;
+
+	spin_lock_irqsave(&kbdev->job_fault_event_lock, flags);
+	ret = !list_empty(event_list);
+	spin_unlock_irqrestore(&kbdev->job_fault_event_lock, flags);
+
+	return ret;
+}
+
+static bool kbase_ctx_has_no_event_pending(struct kbase_context *kctx)
+{
+	struct kbase_device *kbdev = kctx->kbdev;
+	struct list_head *event_list = &kctx->kbdev->job_fault_event_list;
+	struct base_job_fault_event *event;
+	unsigned long               flags;
+
+	spin_lock_irqsave(&kbdev->job_fault_event_lock, flags);
+	if (list_empty(event_list)) {
+		spin_unlock_irqrestore(&kbdev->job_fault_event_lock, flags);
+		return true;
+	}
+	list_for_each_entry(event, event_list, head) {
+		if (event->katom->kctx == kctx) {
+			spin_unlock_irqrestore(&kbdev->job_fault_event_lock,
+					flags);
+			return false;
+		}
+	}
+	spin_unlock_irqrestore(&kbdev->job_fault_event_lock, flags);
+	return true;
+}
+
+/* wait until the fault happen and copy the event */
+static int kbase_job_fault_event_wait(struct kbase_device *kbdev,
+		struct base_job_fault_event *event)
+{
+	struct list_head            *event_list = &kbdev->job_fault_event_list;
+	struct base_job_fault_event *event_in;
+	unsigned long               flags;
+
+	spin_lock_irqsave(&kbdev->job_fault_event_lock, flags);
+	if (list_empty(event_list)) {
+		spin_unlock_irqrestore(&kbdev->job_fault_event_lock, flags);
+		if (wait_event_interruptible(kbdev->job_fault_wq,
+				 kbase_is_job_fault_event_pending(kbdev)))
+			return -ERESTARTSYS;
+		spin_lock_irqsave(&kbdev->job_fault_event_lock, flags);
+	}
+
+	event_in = list_entry(event_list->next,
+			struct base_job_fault_event, head);
+	event->event_code = event_in->event_code;
+	event->katom = event_in->katom;
+
+	spin_unlock_irqrestore(&kbdev->job_fault_event_lock, flags);
+
+	return 0;
+
+}
+
+/* remove the event from the queue */
+static struct base_job_fault_event *kbase_job_fault_event_dequeue(
+		struct kbase_device *kbdev, struct list_head *event_list)
+{
+	struct base_job_fault_event *event;
+
+	event = list_entry(event_list->next,
+			struct base_job_fault_event, head);
+	list_del(event_list->next);
+
+	return event;
+
+}
+
+/* Remove all the following atoms after the failed atom in the same context
+ * Call the postponed bottom half of job done.
+ * Then, this context could be rescheduled.
+ */
+static void kbase_job_fault_resume_event_cleanup(struct kbase_context *kctx)
+{
+	struct list_head *event_list = &kctx->job_fault_resume_event_list;
+
+	while (!list_empty(event_list)) {
+		struct base_job_fault_event *event;
+
+		event = kbase_job_fault_event_dequeue(kctx->kbdev,
+				&kctx->job_fault_resume_event_list);
+		kbase_jd_done_worker(&event->katom->work);
+	}
+
+}
+
+/* Remove all the failed atoms that belong to different contexts
+ * Resume all the contexts that were suspend due to failed job
+ */
+static void kbase_job_fault_event_cleanup(struct kbase_device *kbdev)
+{
+	struct list_head *event_list = &kbdev->job_fault_event_list;
+	unsigned long    flags;
+
+	spin_lock_irqsave(&kbdev->job_fault_event_lock, flags);
+	while (!list_empty(event_list)) {
+		kbase_job_fault_event_dequeue(kbdev, event_list);
+		spin_unlock_irqrestore(&kbdev->job_fault_event_lock, flags);
+		wake_up(&kbdev->job_fault_resume_wq);
+		spin_lock_irqsave(&kbdev->job_fault_event_lock, flags);
+	}
+	spin_unlock_irqrestore(&kbdev->job_fault_event_lock, flags);
+}
+
+static void kbase_job_fault_resume_worker(struct work_struct *data)
+{
+	struct base_job_fault_event *event = container_of(data,
+			struct base_job_fault_event, job_fault_work);
+	struct kbase_context *kctx;
+	struct kbase_jd_atom *katom;
+
+	katom = event->katom;
+	kctx = katom->kctx;
+
+	dev_info(kctx->kbdev->dev, "Job dumping wait\n");
+
+	/* When it was waked up, it need to check if queue is empty or the
+	 * failed atom belongs to different context. If yes, wake up. Both
+	 * of them mean the failed job has been dumped. Please note, it
+	 * should never happen that the job_fault_event_list has the two
+	 * atoms belong to the same context.
+	 */
+	wait_event(kctx->kbdev->job_fault_resume_wq,
+			 kbase_ctx_has_no_event_pending(kctx));
+
+	atomic_set(&kctx->job_fault_count, 0);
+	kbase_jd_done_worker(&katom->work);
+
+	/* In case the following atoms were scheduled during failed job dump
+	 * the job_done_worker was held. We need to rerun it after the dump
+	 * was finished
+	 */
+	kbase_job_fault_resume_event_cleanup(kctx);
+
+	dev_info(kctx->kbdev->dev, "Job dumping finish, resume scheduler\n");
+}
+
+static struct base_job_fault_event *kbase_job_fault_event_queue(
+		struct list_head *event_list,
+		struct kbase_jd_atom *atom,
+		u32 completion_code)
+{
+	struct base_job_fault_event *event;
+
+	event = &atom->fault_event;
+
+	event->katom = atom;
+	event->event_code = completion_code;
+
+	list_add_tail(&event->head, event_list);
+
+	return event;
+
+}
+
+static void kbase_job_fault_event_post(struct kbase_device *kbdev,
+		struct kbase_jd_atom *katom, u32 completion_code)
+{
+	struct base_job_fault_event *event;
+	unsigned long flags;
+
+	spin_lock_irqsave(&kbdev->job_fault_event_lock, flags);
+	event = kbase_job_fault_event_queue(&kbdev->job_fault_event_list,
+				katom, completion_code);
+	spin_unlock_irqrestore(&kbdev->job_fault_event_lock, flags);
+
+	wake_up_interruptible(&kbdev->job_fault_wq);
+
+	INIT_WORK(&event->job_fault_work, kbase_job_fault_resume_worker);
+	queue_work(kbdev->job_fault_resume_workq, &event->job_fault_work);
+
+	dev_info(katom->kctx->kbdev->dev, "Job fault happen, start dump: %d_%d",
+			katom->kctx->tgid, katom->kctx->id);
+
+}
+
+/*
+ * This function will process the job fault
+ * Get the register copy
+ * Send the failed job dump event
+ * Create a Wait queue to wait until the job dump finish
+ */
+
+bool kbase_debug_job_fault_process(struct kbase_jd_atom *katom,
+		u32 completion_code)
+{
+	struct kbase_context *kctx = katom->kctx;
+
+	/* Check if dumping is in the process
+	 * only one atom of each context can be dumped at the same time
+	 * If the atom belongs to different context, it can be dumped
+	 */
+	if (atomic_read(&kctx->job_fault_count) > 0) {
+		kbase_job_fault_event_queue(
+				&kctx->job_fault_resume_event_list,
+				katom, completion_code);
+		dev_info(kctx->kbdev->dev, "queue:%d\n",
+				kbase_jd_atom_id(kctx, katom));
+		return true;
+	}
+
+	if (kctx->kbdev->job_fault_debug == true) {
+
+		if (completion_code != BASE_JD_EVENT_DONE) {
+
+			if (kbase_job_fault_get_reg_snapshot(kctx) == false) {
+				dev_warn(kctx->kbdev->dev, "get reg dump failed\n");
+				return false;
+			}
+
+			kbase_job_fault_event_post(kctx->kbdev, katom,
+					completion_code);
+			atomic_inc(&kctx->job_fault_count);
+			dev_info(kctx->kbdev->dev, "post:%d\n",
+					kbase_jd_atom_id(kctx, katom));
+			return true;
+
+		}
+	}
+	return false;
+
+}
+
+static int debug_job_fault_show(struct seq_file *m, void *v)
+{
+	struct kbase_device *kbdev = m->private;
+	struct base_job_fault_event *event = (struct base_job_fault_event *)v;
+	struct kbase_context *kctx = event->katom->kctx;
+	int i;
+
+	dev_info(kbdev->dev, "debug job fault seq show:%d_%d, %d",
+			kctx->tgid, kctx->id, event->reg_offset);
+
+	if (kctx->reg_dump == NULL) {
+		dev_warn(kbdev->dev, "reg dump is NULL");
+		return -1;
+	}
+
+	if (kctx->reg_dump[event->reg_offset] ==
+			REGISTER_DUMP_TERMINATION_FLAG) {
+		/* Return the error here to stop the read. And the
+		 * following next() will not be called. The stop can
+		 * get the real event resource and release it
+		 */
+		return -1;
+	}
+
+	if (event->reg_offset == 0)
+		seq_printf(m, "%d_%d\n", kctx->tgid, kctx->id);
+
+	for (i = 0; i < 50; i++) {
+		if (kctx->reg_dump[event->reg_offset] ==
+				REGISTER_DUMP_TERMINATION_FLAG) {
+			break;
+		}
+		seq_printf(m, "%08x: %08x\n",
+				kctx->reg_dump[event->reg_offset],
+				kctx->reg_dump[1+event->reg_offset]);
+		event->reg_offset += 2;
+
+	}
+
+
+	return 0;
+}
+static void *debug_job_fault_next(struct seq_file *m, void *v, loff_t *pos)
+{
+	struct kbase_device *kbdev = m->private;
+	struct base_job_fault_event *event = (struct base_job_fault_event *)v;
+
+	dev_info(kbdev->dev, "debug job fault seq next:%d, %d",
+			event->reg_offset, (int)*pos);
+
+	return event;
+}
+
+static void *debug_job_fault_start(struct seq_file *m, loff_t *pos)
+{
+	struct kbase_device *kbdev = m->private;
+	struct base_job_fault_event *event;
+
+	dev_info(kbdev->dev, "fault job seq start:%d", (int)*pos);
+
+	/* The condition is trick here. It needs make sure the
+	 * fault hasn't happened and the dumping hasn't been started,
+	 * or the dumping has finished
+	 */
+	if (*pos == 0) {
+		event = kmalloc(sizeof(*event), GFP_KERNEL);
+		if (!event)
+			return NULL;
+		event->reg_offset = 0;
+		if (kbase_job_fault_event_wait(kbdev, event)) {
+			kfree(event);
+			return NULL;
+		}
+
+		/* The cache flush workaround is called in bottom half of
+		 * job done but we delayed it. Now we should clean cache
+		 * earlier. Then the GPU memory dump should be correct.
+		 */
+		if (event->katom->need_cache_flush_cores_retained) {
+			kbase_gpu_cacheclean(kbdev, event->katom);
+			event->katom->need_cache_flush_cores_retained = 0;
+		}
+
+	} else
+		return NULL;
+
+	return event;
+}
+
+static void debug_job_fault_stop(struct seq_file *m, void *v)
+{
+	struct kbase_device *kbdev = m->private;
+
+	/* here we wake up the kbase_jd_done_worker after stop, it needs
+	 * get the memory dump before the register dump in debug daemon,
+	 * otherwise, the memory dump may be incorrect.
+	 */
+
+	if (v != NULL) {
+		kfree(v);
+		dev_info(kbdev->dev, "debug job fault seq stop stage 1");
+
+	} else {
+		unsigned long flags;
+
+		spin_lock_irqsave(&kbdev->job_fault_event_lock, flags);
+		if (!list_empty(&kbdev->job_fault_event_list)) {
+			kbase_job_fault_event_dequeue(kbdev,
+				&kbdev->job_fault_event_list);
+			wake_up(&kbdev->job_fault_resume_wq);
+		}
+		spin_unlock_irqrestore(&kbdev->job_fault_event_lock, flags);
+		dev_info(kbdev->dev, "debug job fault seq stop stage 2");
+	}
+
+}
+
+static const struct seq_operations ops = {
+	.start = debug_job_fault_start,
+	.next = debug_job_fault_next,
+	.stop = debug_job_fault_stop,
+	.show = debug_job_fault_show,
+};
+
+static int debug_job_fault_open(struct inode *in, struct file *file)
+{
+	struct kbase_device *kbdev = in->i_private;
+
+	seq_open(file, &ops);
+
+	((struct seq_file *)file->private_data)->private = kbdev;
+	dev_info(kbdev->dev, "debug job fault seq open");
+
+	kbdev->job_fault_debug = true;
+
+	return 0;
+
+}
+
+static int debug_job_fault_release(struct inode *in, struct file *file)
+{
+	struct kbase_device *kbdev = in->i_private;
+
+	seq_release(in, file);
+
+	kbdev->job_fault_debug = false;
+
+	/* Clean the unprocessed job fault. After that, all the suspended
+	 * contexts could be rescheduled.
+	 */
+	kbase_job_fault_event_cleanup(kbdev);
+
+	dev_info(kbdev->dev, "debug job fault seq close");
+
+	return 0;
+}
+
+static const struct file_operations kbasep_debug_job_fault_fops = {
+	.open = debug_job_fault_open,
+	.read = seq_read,
+	.llseek = seq_lseek,
+	.release = debug_job_fault_release,
+};
+
+/*
+ *  Initialize debugfs entry for job fault dump
+ */
+void kbase_debug_job_fault_debugfs_init(struct kbase_device *kbdev)
+{
+	debugfs_create_file("job_fault", S_IRUGO,
+			kbdev->mali_debugfs_directory, kbdev,
+			&kbasep_debug_job_fault_fops);
+}
+
+
+int kbase_debug_job_fault_dev_init(struct kbase_device *kbdev)
+{
+
+	INIT_LIST_HEAD(&kbdev->job_fault_event_list);
+
+	init_waitqueue_head(&(kbdev->job_fault_wq));
+	init_waitqueue_head(&(kbdev->job_fault_resume_wq));
+	spin_lock_init(&kbdev->job_fault_event_lock);
+
+	kbdev->job_fault_resume_workq = alloc_workqueue(
+			"kbase_job_fault_resume_work_queue", WQ_MEM_RECLAIM, 1);
+	if (!kbdev->job_fault_resume_workq)
+		return -ENOMEM;
+
+	kbdev->job_fault_debug = false;
+
+	return 0;
+}
+
+/*
+ * Release the relevant resource per device
+ */
+void kbase_debug_job_fault_dev_term(struct kbase_device *kbdev)
+{
+	destroy_workqueue(kbdev->job_fault_resume_workq);
+}
+
+
+/*
+ *  Initialize the relevant data structure per context
+ */
+void kbase_debug_job_fault_context_init(struct kbase_context *kctx)
+{
+
+	/* We need allocate double size register range
+	 * Because this memory will keep the register address and value
+	 */
+	kctx->reg_dump = vmalloc(0x4000 * 2);
+	if (kctx->reg_dump == NULL)
+		return;
+
+	if (kbase_debug_job_fault_reg_snapshot_init(kctx, 0x4000) == false) {
+		vfree(kctx->reg_dump);
+		kctx->reg_dump = NULL;
+	}
+	INIT_LIST_HEAD(&kctx->job_fault_resume_event_list);
+	atomic_set(&kctx->job_fault_count, 0);
+
+}
+
+/*
+ *  release the relevant resource per context
+ */
+void kbase_debug_job_fault_context_term(struct kbase_context *kctx)
+{
+	vfree(kctx->reg_dump);
+}
+
+#else /* CONFIG_DEBUG_FS */
+
+int kbase_debug_job_fault_dev_init(struct kbase_device *kbdev)
+{
+	kbdev->job_fault_debug = false;
+
+	return 0;
+}
+
+void kbase_debug_job_fault_dev_term(struct kbase_device *kbdev)
+{
+}
+
+#endif /* CONFIG_DEBUG_FS */
--- a/drivers/gpu/arm/midgard_for_linux/mali_kbase_debug_job_fault.h
+++ b/drivers/gpu/arm/midgard_for_linux/mali_kbase_debug_job_fault.h
@ -0,0 +1,96 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#ifndef _KBASE_DEBUG_JOB_FAULT_H
+#define _KBASE_DEBUG_JOB_FAULT_H
+
+#include <linux/debugfs.h>
+#include <linux/seq_file.h>
+
+#define REGISTER_DUMP_TERMINATION_FLAG 0xFFFFFFFF
+
+/**
+ * kbase_debug_job_fault_dev_init - Create the fault event wait queue
+ *		per device and initialize the required lists.
+ * @kbdev:	Device pointer
+ *
+ * Return: Zero on success or a negative error code.
+ */
+int kbase_debug_job_fault_dev_init(struct kbase_device *kbdev);
+
+/**
+ * kbase_debug_job_fault_debugfs_init - Initialize job fault debug sysfs
+ * @kbdev:	Device pointer
+ */
+void kbase_debug_job_fault_debugfs_init(struct kbase_device *kbdev);
+
+/**
+ * kbase_debug_job_fault_dev_term - Clean up resources created in
+ *		kbase_debug_job_fault_dev_init.
+ * @kbdev:	Device pointer
+ */
+void kbase_debug_job_fault_dev_term(struct kbase_device *kbdev);
+
+/**
+ * kbase_debug_job_fault_context_init - Initialize the relevant
+ *		data structure per context
+ * @kctx: KBase context pointer
+ */
+void kbase_debug_job_fault_context_init(struct kbase_context *kctx);
+
+/**
+ * kbase_debug_job_fault_context_term - Release the relevant
+ *		resource per context
+ * @kctx: KBase context pointer
+ */
+void kbase_debug_job_fault_context_term(struct kbase_context *kctx);
+
+/**
+ * kbase_debug_job_fault_process - Process the failed job.
+ *      It will send a event and wake up the job fault waiting queue
+ *      Then create a work queue to wait for job dump finish
+ *      This function should be called in the interrupt handler and before
+ *      jd_done that make sure the jd_done_worker will be delayed until the
+ *      job dump finish
+ * @katom: The failed atom pointer
+ * @completion_code: the job status
+ * @return true if dump is going on
+ */
+bool kbase_debug_job_fault_process(struct kbase_jd_atom *katom,
+		u32 completion_code);
+
+
+/**
+ * kbase_debug_job_fault_reg_snapshot_init - Set the interested registers
+ *      address during the job fault process, the relevant registers will
+ *      be saved when a job fault happen
+ * @kctx: KBase context pointer
+ * @reg_range: Maximum register address space
+ * @return true if initializing successfully
+ */
+bool kbase_debug_job_fault_reg_snapshot_init(struct kbase_context *kctx,
+		int reg_range);
+
+/**
+ * kbase_job_fault_get_reg_snapshot - Read the interested registers for
+ *      failed job dump
+ * @kctx: KBase context pointer
+ * @return true if getting registers successfully
+ */
+bool kbase_job_fault_get_reg_snapshot(struct kbase_context *kctx);
+
+#endif  /*_KBASE_DEBUG_JOB_FAULT_H*/
--- a/drivers/gpu/arm/midgard_for_linux/mali_kbase_debug_mem_view.c
+++ b/drivers/gpu/arm/midgard_for_linux/mali_kbase_debug_mem_view.c
@ -0,0 +1,279 @@
+/*
+ *
+ * (C) COPYRIGHT 2013-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/*
+ * Debugfs interface to dump the memory visible to the GPU
+ */
+
+#include "mali_kbase_debug_mem_view.h"
+#include "mali_kbase.h"
+
+#include <linux/list.h>
+#include <linux/file.h>
+
+#ifdef CONFIG_DEBUG_FS
+
+struct debug_mem_mapping {
+	struct list_head node;
+
+	struct kbase_mem_phy_alloc *alloc;
+	unsigned long flags;
+
+	u64 start_pfn;
+	size_t nr_pages;
+};
+
+struct debug_mem_data {
+	struct list_head mapping_list;
+	struct kbase_context *kctx;
+};
+
+struct debug_mem_seq_off {
+	struct list_head *lh;
+	size_t offset;
+};
+
+static void *debug_mem_start(struct seq_file *m, loff_t *_pos)
+{
+	struct debug_mem_data *mem_data = m->private;
+	struct debug_mem_seq_off *data;
+	struct debug_mem_mapping *map;
+	loff_t pos = *_pos;
+
+	list_for_each_entry(map, &mem_data->mapping_list, node) {
+		if (pos >= map->nr_pages) {
+			pos -= map->nr_pages;
+		} else {
+			data = kmalloc(sizeof(*data), GFP_KERNEL);
+			if (!data)
+				return NULL;
+			data->lh = &map->node;
+			data->offset = pos;
+			return data;
+		}
+	}
+
+	/* Beyond the end */
+	return NULL;
+}
+
+static void debug_mem_stop(struct seq_file *m, void *v)
+{
+	kfree(v);
+}
+
+static void *debug_mem_next(struct seq_file *m, void *v, loff_t *pos)
+{
+	struct debug_mem_data *mem_data = m->private;
+	struct debug_mem_seq_off *data = v;
+	struct debug_mem_mapping *map;
+
+	map = list_entry(data->lh, struct debug_mem_mapping, node);
+
+	if (data->offset < map->nr_pages - 1) {
+		data->offset++;
+		++*pos;
+		return data;
+	}
+
+	if (list_is_last(data->lh, &mem_data->mapping_list))
+		return NULL;
+
+	data->lh = data->lh->next;
+	data->offset = 0;
+	++*pos;
+
+	return data;
+}
+
+static int debug_mem_show(struct seq_file *m, void *v)
+{
+	struct debug_mem_data *mem_data = m->private;
+	struct debug_mem_seq_off *data = v;
+	struct debug_mem_mapping *map;
+	int i, j;
+	struct page *page;
+	uint32_t *mapping;
+	pgprot_t prot = PAGE_KERNEL;
+
+	map = list_entry(data->lh, struct debug_mem_mapping, node);
+
+	kbase_gpu_vm_lock(mem_data->kctx);
+
+	if (data->offset >= map->alloc->nents) {
+		seq_printf(m, "%016llx: Unbacked page\n\n", (map->start_pfn +
+				data->offset) << PAGE_SHIFT);
+		goto out;
+	}
+
+	if (!(map->flags & KBASE_REG_CPU_CACHED))
+		prot = pgprot_writecombine(prot);
+
+	page = pfn_to_page(PFN_DOWN(map->alloc->pages[data->offset]));
+	mapping = vmap(&page, 1, VM_MAP, prot);
+	if (!mapping)
+		goto out;
+
+	for (i = 0; i < PAGE_SIZE; i += 4*sizeof(*mapping)) {
+		seq_printf(m, "%016llx:", i + ((map->start_pfn +
+				data->offset) << PAGE_SHIFT));
+
+		for (j = 0; j < 4*sizeof(*mapping); j += sizeof(*mapping))
+			seq_printf(m, " %08x", mapping[(i+j)/sizeof(*mapping)]);
+		seq_putc(m, '\n');
+	}
+
+	vunmap(mapping);
+
+	seq_putc(m, '\n');
+
+out:
+	kbase_gpu_vm_unlock(mem_data->kctx);
+	return 0;
+}
+
+static const struct seq_operations ops = {
+	.start = debug_mem_start,
+	.next = debug_mem_next,
+	.stop = debug_mem_stop,
+	.show = debug_mem_show,
+};
+
+static int debug_mem_open(struct inode *i, struct file *file)
+{
+	struct file *kctx_file = i->i_private;
+	struct kbase_context *kctx = kctx_file->private_data;
+	struct rb_node *p;
+	struct debug_mem_data *mem_data;
+	int ret;
+
+	ret = seq_open(file, &ops);
+	if (ret)
+		return ret;
+
+	mem_data = kmalloc(sizeof(*mem_data), GFP_KERNEL);
+	if (!mem_data) {
+		ret = -ENOMEM;
+		goto out;
+	}
+
+	mem_data->kctx = kctx;
+
+	INIT_LIST_HEAD(&mem_data->mapping_list);
+
+	get_file(kctx_file);
+
+	kbase_gpu_vm_lock(kctx);
+
+	for (p = rb_first(&kctx->reg_rbtree); p; p = rb_next(p)) {
+		struct kbase_va_region *reg;
+		struct debug_mem_mapping *mapping;
+
+		reg = rb_entry(p, struct kbase_va_region, rblink);
+
+		if (reg->gpu_alloc == NULL)
+			/* Empty region - ignore */
+			continue;
+
+		mapping = kmalloc(sizeof(*mapping), GFP_KERNEL);
+		if (!mapping) {
+			ret = -ENOMEM;
+			kbase_gpu_vm_unlock(kctx);
+			goto out;
+		}
+
+		mapping->alloc = kbase_mem_phy_alloc_get(reg->gpu_alloc);
+		mapping->start_pfn = reg->start_pfn;
+		mapping->nr_pages = reg->nr_pages;
+		mapping->flags = reg->flags;
+		list_add_tail(&mapping->node, &mem_data->mapping_list);
+	}
+
+	kbase_gpu_vm_unlock(kctx);
+
+	((struct seq_file *)file->private_data)->private = mem_data;
+
+	return 0;
+
+out:
+	if (mem_data) {
+		while (!list_empty(&mem_data->mapping_list)) {
+			struct debug_mem_mapping *mapping;
+
+			mapping = list_first_entry(&mem_data->mapping_list,
+					struct debug_mem_mapping, node);
+			kbase_mem_phy_alloc_put(mapping->alloc);
+			list_del(&mapping->node);
+			kfree(mapping);
+		}
+		fput(kctx_file);
+		kfree(mem_data);
+	}
+	seq_release(i, file);
+	return ret;
+}
+
+static int debug_mem_release(struct inode *inode, struct file *file)
+{
+	struct file *kctx_file = inode->i_private;
+	struct seq_file *sfile = file->private_data;
+	struct debug_mem_data *mem_data = sfile->private;
+	struct debug_mem_mapping *mapping;
+
+	seq_release(inode, file);
+
+	while (!list_empty(&mem_data->mapping_list)) {
+		mapping = list_first_entry(&mem_data->mapping_list,
+				struct debug_mem_mapping, node);
+		kbase_mem_phy_alloc_put(mapping->alloc);
+		list_del(&mapping->node);
+		kfree(mapping);
+	}
+
+	kfree(mem_data);
+
+	fput(kctx_file);
+
+	return 0;
+}
+
+static const struct file_operations kbase_debug_mem_view_fops = {
+	.open = debug_mem_open,
+	.release = debug_mem_release,
+	.read = seq_read,
+	.llseek = seq_lseek
+};
+
+/**
+ * kbase_debug_mem_view_init - Initialise the mem_view sysfs file
+ * @kctx_file: The /dev/mali0 file instance for the context
+ *
+ * This function creates a "mem_view" file which can be used to get a view of
+ * the context's memory as the GPU sees it (i.e. using the GPU's page tables).
+ *
+ * The file is cleaned up by a call to debugfs_remove_recursive() deleting the
+ * parent directory.
+ */
+void kbase_debug_mem_view_init(struct file *kctx_file)
+{
+	struct kbase_context *kctx = kctx_file->private_data;
+
+	debugfs_create_file("mem_view", S_IRUGO, kctx->kctx_dentry, kctx_file,
+			&kbase_debug_mem_view_fops);
+}
+
+#endif
--- a/drivers/gpu/arm/midgard_for_linux/mali_kbase_debug_mem_view.h
+++ b/drivers/gpu/arm/midgard_for_linux/mali_kbase_debug_mem_view.h
@ -0,0 +1,25 @@
+/*
+ *
+ * (C) COPYRIGHT 2013-2014 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#ifndef _KBASE_DEBUG_MEM_VIEW_H
+#define _KBASE_DEBUG_MEM_VIEW_H
+
+#include <mali_kbase.h>
+
+void kbase_debug_mem_view_init(struct file *kctx_file);
+
+#endif
--- a/drivers/gpu/arm/midgard_for_linux/mali_kbase_defs.h
+++ b/drivers/gpu/arm/midgard_for_linux/mali_kbase_defs.h
--- a/drivers/gpu/arm/midgard_for_linux/mali_kbase_device.c
+++ b/drivers/gpu/arm/midgard_for_linux/mali_kbase_device.c
@ -0,0 +1,697 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/*
+ * Base kernel device APIs
+ */
+
+#include <linux/debugfs.h>
+#include <linux/dma-mapping.h>
+#include <linux/seq_file.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/of_platform.h>
+
+#include <mali_kbase.h>
+#include <mali_kbase_defs.h>
+#include <mali_kbase_hwaccess_instr.h>
+#include <mali_kbase_hw.h>
+#include <mali_kbase_config_defaults.h>
+
+#include <mali_kbase_profiling_gator_api.h>
+
+/* NOTE: Magic - 0x45435254 (TRCE in ASCII).
+ * Supports tracing feature provided in the base module.
+ * Please keep it in sync with the value of base module.
+ */
+#define TRACE_BUFFER_HEADER_SPECIAL 0x45435254
+
+#if KBASE_TRACE_ENABLE
+static const char *kbasep_trace_code_string[] = {
+	/* IMPORTANT: USE OF SPECIAL #INCLUDE OF NON-STANDARD HEADER FILE
+	 * THIS MUST BE USED AT THE START OF THE ARRAY */
+#define KBASE_TRACE_CODE_MAKE_CODE(X) # X
+#include "mali_kbase_trace_defs.h"
+#undef  KBASE_TRACE_CODE_MAKE_CODE
+};
+#endif
+
+#define DEBUG_MESSAGE_SIZE 256
+
+static int kbasep_trace_init(struct kbase_device *kbdev);
+static void kbasep_trace_term(struct kbase_device *kbdev);
+static void kbasep_trace_hook_wrapper(void *param);
+
+struct kbase_device *kbase_device_alloc(void)
+{
+	return kzalloc(sizeof(struct kbase_device), GFP_KERNEL);
+}
+
+static int kbase_device_as_init(struct kbase_device *kbdev, int i)
+{
+	const char format[] = "mali_mmu%d";
+	char name[sizeof(format)];
+	const char poke_format[] = "mali_mmu%d_poker";
+	char poke_name[sizeof(poke_format)];
+
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8316))
+		snprintf(poke_name, sizeof(poke_name), poke_format, i);
+
+	snprintf(name, sizeof(name), format, i);
+
+	kbdev->as[i].number = i;
+	kbdev->as[i].fault_addr = 0ULL;
+
+	kbdev->as[i].pf_wq = alloc_workqueue(name, 0, 1);
+	if (!kbdev->as[i].pf_wq)
+		return -EINVAL;
+
+	INIT_WORK(&kbdev->as[i].work_pagefault, page_fault_worker);
+	INIT_WORK(&kbdev->as[i].work_busfault, bus_fault_worker);
+
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8316)) {
+		struct hrtimer *poke_timer = &kbdev->as[i].poke_timer;
+		struct work_struct *poke_work = &kbdev->as[i].poke_work;
+
+		kbdev->as[i].poke_wq = alloc_workqueue(poke_name, 0, 1);
+		if (!kbdev->as[i].poke_wq) {
+			destroy_workqueue(kbdev->as[i].pf_wq);
+			return -EINVAL;
+		}
+		KBASE_DEBUG_ASSERT(!object_is_on_stack(poke_work));
+		INIT_WORK(poke_work, kbasep_as_do_poke);
+
+		hrtimer_init(poke_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+
+		poke_timer->function = kbasep_as_poke_timer_callback;
+
+		kbdev->as[i].poke_refcount = 0;
+		kbdev->as[i].poke_state = 0u;
+	}
+
+	return 0;
+}
+
+static void kbase_device_as_term(struct kbase_device *kbdev, int i)
+{
+	destroy_workqueue(kbdev->as[i].pf_wq);
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8316))
+		destroy_workqueue(kbdev->as[i].poke_wq);
+}
+
+static int kbase_device_all_as_init(struct kbase_device *kbdev)
+{
+	int i, err;
+
+	for (i = 0; i < kbdev->nr_hw_address_spaces; i++) {
+		err = kbase_device_as_init(kbdev, i);
+		if (err)
+			goto free_workqs;
+	}
+
+	return 0;
+
+free_workqs:
+	for (; i > 0; i--)
+		kbase_device_as_term(kbdev, i);
+
+	return err;
+}
+
+static void kbase_device_all_as_term(struct kbase_device *kbdev)
+{
+	int i;
+
+	for (i = 0; i < kbdev->nr_hw_address_spaces; i++)
+		kbase_device_as_term(kbdev, i);
+}
+
+int kbase_device_init(struct kbase_device * const kbdev)
+{
+	int i, err;
+#ifdef CONFIG_ARM64
+	struct device_node *np = NULL;
+#endif /* CONFIG_ARM64 */
+
+	spin_lock_init(&kbdev->mmu_mask_change);
+	mutex_init(&kbdev->mmu_hw_mutex);
+#ifdef CONFIG_ARM64
+	kbdev->cci_snoop_enabled = false;
+	np = kbdev->dev->of_node;
+	if (np != NULL) {
+		if (of_property_read_u32(np, "snoop_enable_smc",
+					&kbdev->snoop_enable_smc))
+			kbdev->snoop_enable_smc = 0;
+		if (of_property_read_u32(np, "snoop_disable_smc",
+					&kbdev->snoop_disable_smc))
+			kbdev->snoop_disable_smc = 0;
+		/* Either both or none of the calls should be provided. */
+		if (!((kbdev->snoop_disable_smc == 0
+			&& kbdev->snoop_enable_smc == 0)
+			|| (kbdev->snoop_disable_smc != 0
+			&& kbdev->snoop_enable_smc != 0))) {
+			WARN_ON(1);
+			err = -EINVAL;
+			goto fail;
+		}
+	}
+#endif /* CONFIG_ARM64 */
+	/* Get the list of workarounds for issues on the current HW
+	 * (identified by the GPU_ID register)
+	 */
+	err = kbase_hw_set_issues_mask(kbdev);
+	if (err)
+		goto fail;
+
+	/* Set the list of features available on the current HW
+	 * (identified by the GPU_ID register)
+	 */
+	kbase_hw_set_features_mask(kbdev);
+
+	kbase_gpuprops_set_features(kbdev);
+
+	/* On Linux 4.0+, dma coherency is determined from device tree */
+#if defined(CONFIG_ARM64) && LINUX_VERSION_CODE < KERNEL_VERSION(4, 0, 0)
+	set_dma_ops(kbdev->dev, &noncoherent_swiotlb_dma_ops);
+#endif
+
+	/* Workaround a pre-3.13 Linux issue, where dma_mask is NULL when our
+	 * device structure was created by device-tree
+	 */
+	if (!kbdev->dev->dma_mask)
+		kbdev->dev->dma_mask = &kbdev->dev->coherent_dma_mask;
+
+	err = dma_set_mask(kbdev->dev,
+			DMA_BIT_MASK(kbdev->gpu_props.mmu.pa_bits));
+	if (err)
+		goto dma_set_mask_failed;
+
+	err = dma_set_coherent_mask(kbdev->dev,
+			DMA_BIT_MASK(kbdev->gpu_props.mmu.pa_bits));
+	if (err)
+		goto dma_set_mask_failed;
+
+	kbdev->nr_hw_address_spaces = kbdev->gpu_props.num_address_spaces;
+
+	err = kbase_device_all_as_init(kbdev);
+	if (err)
+		goto as_init_failed;
+
+	spin_lock_init(&kbdev->hwcnt.lock);
+
+	err = kbasep_trace_init(kbdev);
+	if (err)
+		goto term_as;
+
+	mutex_init(&kbdev->cacheclean_lock);
+
+#ifdef CONFIG_MALI_TRACE_TIMELINE
+	for (i = 0; i < BASE_JM_MAX_NR_SLOTS; ++i)
+		kbdev->timeline.slot_atoms_submitted[i] = 0;
+
+	for (i = 0; i <= KBASEP_TIMELINE_PM_EVENT_LAST; ++i)
+		atomic_set(&kbdev->timeline.pm_event_uid[i], 0);
+#endif /* CONFIG_MALI_TRACE_TIMELINE */
+
+	/* fbdump profiling controls set to 0 - fbdump not enabled until changed by gator */
+	for (i = 0; i < FBDUMP_CONTROL_MAX; i++)
+		kbdev->kbase_profiling_controls[i] = 0;
+
+	kbase_debug_assert_register_hook(&kbasep_trace_hook_wrapper, kbdev);
+
+	atomic_set(&kbdev->ctx_num, 0);
+
+	err = kbase_instr_backend_init(kbdev);
+	if (err)
+		goto term_trace;
+
+	kbdev->pm.dvfs_period = DEFAULT_PM_DVFS_PERIOD;
+
+	kbdev->reset_timeout_ms = DEFAULT_RESET_TIMEOUT_MS;
+
+#ifdef CONFIG_MALI_GPU_MMU_AARCH64
+	kbdev->mmu_mode = kbase_mmu_mode_get_aarch64();
+#else
+	kbdev->mmu_mode = kbase_mmu_mode_get_lpae();
+#endif /* CONFIG_MALI_GPU_MMU_AARCH64 */
+
+#ifdef CONFIG_MALI_DEBUG
+	init_waitqueue_head(&kbdev->driver_inactive_wait);
+#endif /* CONFIG_MALI_DEBUG */
+
+	return 0;
+term_trace:
+	kbasep_trace_term(kbdev);
+term_as:
+	kbase_device_all_as_term(kbdev);
+as_init_failed:
+dma_set_mask_failed:
+fail:
+	return err;
+}
+
+void kbase_device_term(struct kbase_device *kbdev)
+{
+	KBASE_DEBUG_ASSERT(kbdev);
+
+#if KBASE_TRACE_ENABLE
+	kbase_debug_assert_register_hook(NULL, NULL);
+#endif
+
+	kbase_instr_backend_term(kbdev);
+
+	kbasep_trace_term(kbdev);
+
+	kbase_device_all_as_term(kbdev);
+}
+
+void kbase_device_free(struct kbase_device *kbdev)
+{
+	kfree(kbdev);
+}
+
+int kbase_device_trace_buffer_install(
+		struct kbase_context *kctx, u32 *tb, size_t size)
+{
+	unsigned long flags;
+
+	KBASE_DEBUG_ASSERT(kctx);
+	KBASE_DEBUG_ASSERT(tb);
+
+	/* Interface uses 16-bit value to track last accessed entry. Each entry
+	 * is composed of two 32-bit words.
+	 * This limits the size that can be handled without an overflow. */
+	if (0xFFFF * (2 * sizeof(u32)) < size)
+		return -EINVAL;
+
+	/* set up the header */
+	/* magic number in the first 4 bytes */
+	tb[0] = TRACE_BUFFER_HEADER_SPECIAL;
+	/* Store (write offset = 0, wrap counter = 0, transaction active = no)
+	 * write offset 0 means never written.
+	 * Offsets 1 to (wrap_offset - 1) used to store values when trace started
+	 */
+	tb[1] = 0;
+
+	/* install trace buffer */
+	spin_lock_irqsave(&kctx->jctx.tb_lock, flags);
+	kctx->jctx.tb_wrap_offset = size / 8;
+	kctx->jctx.tb = tb;
+	spin_unlock_irqrestore(&kctx->jctx.tb_lock, flags);
+
+	return 0;
+}
+
+void kbase_device_trace_buffer_uninstall(struct kbase_context *kctx)
+{
+	unsigned long flags;
+
+	KBASE_DEBUG_ASSERT(kctx);
+	spin_lock_irqsave(&kctx->jctx.tb_lock, flags);
+	kctx->jctx.tb = NULL;
+	kctx->jctx.tb_wrap_offset = 0;
+	spin_unlock_irqrestore(&kctx->jctx.tb_lock, flags);
+}
+
+void kbase_device_trace_register_access(struct kbase_context *kctx, enum kbase_reg_access_type type, u16 reg_offset, u32 reg_value)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&kctx->jctx.tb_lock, flags);
+	if (kctx->jctx.tb) {
+		u16 wrap_count;
+		u16 write_offset;
+		u32 *tb = kctx->jctx.tb;
+		u32 header_word;
+
+		header_word = tb[1];
+		KBASE_DEBUG_ASSERT(0 == (header_word & 0x1));
+
+		wrap_count = (header_word >> 1) & 0x7FFF;
+		write_offset = (header_word >> 16) & 0xFFFF;
+
+		/* mark as transaction in progress */
+		tb[1] |= 0x1;
+		mb();
+
+		/* calculate new offset */
+		write_offset++;
+		if (write_offset == kctx->jctx.tb_wrap_offset) {
+			/* wrap */
+			write_offset = 1;
+			wrap_count++;
+			wrap_count &= 0x7FFF;	/* 15bit wrap counter */
+		}
+
+		/* store the trace entry at the selected offset */
+		tb[write_offset * 2 + 0] = (reg_offset & ~0x3) | ((type == REG_WRITE) ? 0x1 : 0x0);
+		tb[write_offset * 2 + 1] = reg_value;
+		mb();
+
+		/* new header word */
+		header_word = (write_offset << 16) | (wrap_count << 1) | 0x0;	/* transaction complete */
+		tb[1] = header_word;
+	}
+	spin_unlock_irqrestore(&kctx->jctx.tb_lock, flags);
+}
+
+/*
+ * Device trace functions
+ */
+#if KBASE_TRACE_ENABLE
+
+static int kbasep_trace_init(struct kbase_device *kbdev)
+{
+	struct kbase_trace *rbuf;
+
+	rbuf = kmalloc_array(KBASE_TRACE_SIZE, sizeof(*rbuf), GFP_KERNEL);
+
+	if (!rbuf)
+		return -EINVAL;
+
+	kbdev->trace_rbuf = rbuf;
+	spin_lock_init(&kbdev->trace_lock);
+	return 0;
+}
+
+static void kbasep_trace_term(struct kbase_device *kbdev)
+{
+	kfree(kbdev->trace_rbuf);
+}
+
+static void kbasep_trace_format_msg(struct kbase_trace *trace_msg, char *buffer, int len)
+{
+	s32 written = 0;
+
+	/* Initial part of message */
+	written += MAX(snprintf(buffer + written, MAX(len - written, 0), "%d.%.6d,%d,%d,%s,%p,", (int)trace_msg->timestamp.tv_sec, (int)(trace_msg->timestamp.tv_nsec / 1000), trace_msg->thread_id, trace_msg->cpu, kbasep_trace_code_string[trace_msg->code], trace_msg->ctx), 0);
+
+	if (trace_msg->katom)
+		written += MAX(snprintf(buffer + written, MAX(len - written, 0), "atom %d (ud: 0x%llx 0x%llx)", trace_msg->atom_number, trace_msg->atom_udata[0], trace_msg->atom_udata[1]), 0);
+
+	written += MAX(snprintf(buffer + written, MAX(len - written, 0), ",%.8llx,", trace_msg->gpu_addr), 0);
+
+	/* NOTE: Could add function callbacks to handle different message types */
+	/* Jobslot present */
+	if (trace_msg->flags & KBASE_TRACE_FLAG_JOBSLOT)
+		written += MAX(snprintf(buffer + written, MAX(len - written, 0), "%d", trace_msg->jobslot), 0);
+
+	written += MAX(snprintf(buffer + written, MAX(len - written, 0), ","), 0);
+
+	/* Refcount present */
+	if (trace_msg->flags & KBASE_TRACE_FLAG_REFCOUNT)
+		written += MAX(snprintf(buffer + written, MAX(len - written, 0), "%d", trace_msg->refcount), 0);
+
+	written += MAX(snprintf(buffer + written, MAX(len - written, 0), ","), 0);
+
+	/* Rest of message */
+	written += MAX(snprintf(buffer + written, MAX(len - written, 0), "0x%.8lx", trace_msg->info_val), 0);
+}
+
+static void kbasep_trace_dump_msg(struct kbase_device *kbdev, struct kbase_trace *trace_msg)
+{
+	char buffer[DEBUG_MESSAGE_SIZE];
+
+	kbasep_trace_format_msg(trace_msg, buffer, DEBUG_MESSAGE_SIZE);
+	dev_dbg(kbdev->dev, "%s", buffer);
+}
+
+void kbasep_trace_add(struct kbase_device *kbdev, enum kbase_trace_code code, void *ctx, struct kbase_jd_atom *katom, u64 gpu_addr, u8 flags, int refcount, int jobslot, unsigned long info_val)
+{
+	unsigned long irqflags;
+	struct kbase_trace *trace_msg;
+
+	spin_lock_irqsave(&kbdev->trace_lock, irqflags);
+
+	trace_msg = &kbdev->trace_rbuf[kbdev->trace_next_in];
+
+	/* Fill the message */
+	trace_msg->thread_id = task_pid_nr(current);
+	trace_msg->cpu = task_cpu(current);
+
+	getnstimeofday(&trace_msg->timestamp);
+
+	trace_msg->code = code;
+	trace_msg->ctx = ctx;
+
+	if (NULL == katom) {
+		trace_msg->katom = false;
+	} else {
+		trace_msg->katom = true;
+		trace_msg->atom_number = kbase_jd_atom_id(katom->kctx, katom);
+		trace_msg->atom_udata[0] = katom->udata.blob[0];
+		trace_msg->atom_udata[1] = katom->udata.blob[1];
+	}
+
+	trace_msg->gpu_addr = gpu_addr;
+	trace_msg->jobslot = jobslot;
+	trace_msg->refcount = MIN((unsigned int)refcount, 0xFF);
+	trace_msg->info_val = info_val;
+	trace_msg->flags = flags;
+
+	/* Update the ringbuffer indices */
+	kbdev->trace_next_in = (kbdev->trace_next_in + 1) & KBASE_TRACE_MASK;
+	if (kbdev->trace_next_in == kbdev->trace_first_out)
+		kbdev->trace_first_out = (kbdev->trace_first_out + 1) & KBASE_TRACE_MASK;
+
+	/* Done */
+
+	spin_unlock_irqrestore(&kbdev->trace_lock, irqflags);
+}
+
+void kbasep_trace_clear(struct kbase_device *kbdev)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&kbdev->trace_lock, flags);
+	kbdev->trace_first_out = kbdev->trace_next_in;
+	spin_unlock_irqrestore(&kbdev->trace_lock, flags);
+}
+
+void kbasep_trace_dump(struct kbase_device *kbdev)
+{
+	unsigned long flags;
+	u32 start;
+	u32 end;
+
+	dev_dbg(kbdev->dev, "Dumping trace:\nsecs,nthread,cpu,code,ctx,katom,gpu_addr,jobslot,refcount,info_val");
+	spin_lock_irqsave(&kbdev->trace_lock, flags);
+	start = kbdev->trace_first_out;
+	end = kbdev->trace_next_in;
+
+	while (start != end) {
+		struct kbase_trace *trace_msg = &kbdev->trace_rbuf[start];
+
+		kbasep_trace_dump_msg(kbdev, trace_msg);
+
+		start = (start + 1) & KBASE_TRACE_MASK;
+	}
+	dev_dbg(kbdev->dev, "TRACE_END");
+
+	spin_unlock_irqrestore(&kbdev->trace_lock, flags);
+
+	KBASE_TRACE_CLEAR(kbdev);
+}
+
+static void kbasep_trace_hook_wrapper(void *param)
+{
+	struct kbase_device *kbdev = (struct kbase_device *)param;
+
+	kbasep_trace_dump(kbdev);
+}
+
+#ifdef CONFIG_DEBUG_FS
+struct trace_seq_state {
+	struct kbase_trace trace_buf[KBASE_TRACE_SIZE];
+	u32 start;
+	u32 end;
+};
+
+static void *kbasep_trace_seq_start(struct seq_file *s, loff_t *pos)
+{
+	struct trace_seq_state *state = s->private;
+	int i;
+
+	if (*pos > KBASE_TRACE_SIZE)
+		return NULL;
+	i = state->start + *pos;
+	if ((state->end >= state->start && i >= state->end) ||
+			i >= state->end + KBASE_TRACE_SIZE)
+		return NULL;
+
+	i &= KBASE_TRACE_MASK;
+
+	return &state->trace_buf[i];
+}
+
+static void kbasep_trace_seq_stop(struct seq_file *s, void *data)
+{
+}
+
+static void *kbasep_trace_seq_next(struct seq_file *s, void *data, loff_t *pos)
+{
+	struct trace_seq_state *state = s->private;
+	int i;
+
+	(*pos)++;
+
+	i = (state->start + *pos) & KBASE_TRACE_MASK;
+	if (i == state->end)
+		return NULL;
+
+	return &state->trace_buf[i];
+}
+
+static int kbasep_trace_seq_show(struct seq_file *s, void *data)
+{
+	struct kbase_trace *trace_msg = data;
+	char buffer[DEBUG_MESSAGE_SIZE];
+
+	kbasep_trace_format_msg(trace_msg, buffer, DEBUG_MESSAGE_SIZE);
+	seq_printf(s, "%s\n", buffer);
+	return 0;
+}
+
+static const struct seq_operations kbasep_trace_seq_ops = {
+	.start = kbasep_trace_seq_start,
+	.next = kbasep_trace_seq_next,
+	.stop = kbasep_trace_seq_stop,
+	.show = kbasep_trace_seq_show,
+};
+
+static int kbasep_trace_debugfs_open(struct inode *inode, struct file *file)
+{
+	struct kbase_device *kbdev = inode->i_private;
+	unsigned long flags;
+
+	struct trace_seq_state *state;
+
+	state = __seq_open_private(file, &kbasep_trace_seq_ops, sizeof(*state));
+	if (!state)
+		return -ENOMEM;
+
+	spin_lock_irqsave(&kbdev->trace_lock, flags);
+	state->start = kbdev->trace_first_out;
+	state->end = kbdev->trace_next_in;
+	memcpy(state->trace_buf, kbdev->trace_rbuf, sizeof(state->trace_buf));
+	spin_unlock_irqrestore(&kbdev->trace_lock, flags);
+
+	return 0;
+}
+
+static const struct file_operations kbasep_trace_debugfs_fops = {
+	.open = kbasep_trace_debugfs_open,
+	.read = seq_read,
+	.llseek = seq_lseek,
+	.release = seq_release_private,
+};
+
+void kbasep_trace_debugfs_init(struct kbase_device *kbdev)
+{
+	debugfs_create_file("mali_trace", S_IRUGO,
+			kbdev->mali_debugfs_directory, kbdev,
+			&kbasep_trace_debugfs_fops);
+}
+
+#else
+void kbasep_trace_debugfs_init(struct kbase_device *kbdev)
+{
+}
+#endif				/* CONFIG_DEBUG_FS */
+
+#else				/* KBASE_TRACE_ENABLE  */
+static int kbasep_trace_init(struct kbase_device *kbdev)
+{
+	CSTD_UNUSED(kbdev);
+	return 0;
+}
+
+static void kbasep_trace_term(struct kbase_device *kbdev)
+{
+	CSTD_UNUSED(kbdev);
+}
+
+static void kbasep_trace_hook_wrapper(void *param)
+{
+	CSTD_UNUSED(param);
+}
+
+void kbasep_trace_dump(struct kbase_device *kbdev)
+{
+	CSTD_UNUSED(kbdev);
+}
+#endif				/* KBASE_TRACE_ENABLE  */
+
+void kbase_set_profiling_control(struct kbase_device *kbdev, u32 control, u32 value)
+{
+	switch (control) {
+	case FBDUMP_CONTROL_ENABLE:
+		/* fall through */
+	case FBDUMP_CONTROL_RATE:
+		/* fall through */
+	case SW_COUNTER_ENABLE:
+		/* fall through */
+	case FBDUMP_CONTROL_RESIZE_FACTOR:
+		kbdev->kbase_profiling_controls[control] = value;
+		break;
+	default:
+		dev_err(kbdev->dev, "Profiling control %d not found\n", control);
+		break;
+	}
+}
+
+u32 kbase_get_profiling_control(struct kbase_device *kbdev, u32 control)
+{
+	u32 ret_value = 0;
+
+	switch (control) {
+	case FBDUMP_CONTROL_ENABLE:
+		/* fall through */
+	case FBDUMP_CONTROL_RATE:
+		/* fall through */
+	case SW_COUNTER_ENABLE:
+		/* fall through */
+	case FBDUMP_CONTROL_RESIZE_FACTOR:
+		ret_value = kbdev->kbase_profiling_controls[control];
+		break;
+	default:
+		dev_err(kbdev->dev, "Profiling control %d not found\n", control);
+		break;
+	}
+
+	return ret_value;
+}
+
+/*
+ * Called by gator to control the production of
+ * profiling information at runtime
+ * */
+
+void _mali_profiling_control(u32 action, u32 value)
+{
+	struct kbase_device *kbdev = NULL;
+
+	/* find the first i.e. call with -1 */
+	kbdev = kbase_find_device(-1);
+
+	if (NULL != kbdev)
+		kbase_set_profiling_control(kbdev, action, value);
+}
+KBASE_EXPORT_SYMBOL(_mali_profiling_control);
+
--- a/drivers/gpu/arm/midgard_for_linux/mali_kbase_disjoint_events.c
+++ b/drivers/gpu/arm/midgard_for_linux/mali_kbase_disjoint_events.c
@ -0,0 +1,76 @@
+/*
+ *
+ * (C) COPYRIGHT 2014 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/*
+ * Base kernel disjoint events helper functions
+ */
+
+#include <mali_kbase.h>
+
+void kbase_disjoint_init(struct kbase_device *kbdev)
+{
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	atomic_set(&kbdev->disjoint_event.count, 0);
+	atomic_set(&kbdev->disjoint_event.state, 0);
+}
+
+/* increment the disjoint event count */
+void kbase_disjoint_event(struct kbase_device *kbdev)
+{
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	atomic_inc(&kbdev->disjoint_event.count);
+}
+
+/* increment the state and the event counter */
+void kbase_disjoint_state_up(struct kbase_device *kbdev)
+{
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	atomic_inc(&kbdev->disjoint_event.state);
+
+	kbase_disjoint_event(kbdev);
+}
+
+/* decrement the state */
+void kbase_disjoint_state_down(struct kbase_device *kbdev)
+{
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(atomic_read(&kbdev->disjoint_event.state) > 0);
+
+	kbase_disjoint_event(kbdev);
+
+	atomic_dec(&kbdev->disjoint_event.state);
+}
+
+/* increments the count only if the state is > 0 */
+void kbase_disjoint_event_potential(struct kbase_device *kbdev)
+{
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	if (atomic_read(&kbdev->disjoint_event.state))
+		kbase_disjoint_event(kbdev);
+}
+
+u32 kbase_disjoint_event_get(struct kbase_device *kbdev)
+{
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	return atomic_read(&kbdev->disjoint_event.count);
+}
+KBASE_EXPORT_TEST_API(kbase_disjoint_event_get);
--- a/drivers/gpu/arm/midgard_for_linux/mali_kbase_dma_fence.c
+++ b/drivers/gpu/arm/midgard_for_linux/mali_kbase_dma_fence.c
@ -0,0 +1,606 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+/* Include mali_kbase_dma_fence.h before checking for CONFIG_MALI_DMA_FENCE as
+ * it will be set there.
+ */
+#include "mali_kbase_dma_fence.h"
+
+#include <linux/atomic.h>
+#include <linux/fence.h>
+#include <linux/list.h>
+#include <linux/lockdep.h>
+#include <linux/mutex.h>
+#include <linux/reservation.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+#include <linux/workqueue.h>
+#include <linux/ww_mutex.h>
+
+#include <mali_kbase.h>
+
+
+/* Spin lock protecting all Mali fences as fence->lock. */
+static DEFINE_SPINLOCK(kbase_dma_fence_lock);
+
+static void
+kbase_dma_fence_work(struct work_struct *pwork);
+
+static void
+kbase_dma_fence_waiters_add(struct kbase_jd_atom *katom)
+{
+	struct kbase_context *kctx = katom->kctx;
+
+	list_add_tail(&katom->queue, &kctx->dma_fence.waiting_resource);
+}
+
+void
+kbase_dma_fence_waiters_remove(struct kbase_jd_atom *katom)
+{
+	list_del(&katom->queue);
+}
+
+static const char *
+kbase_dma_fence_get_driver_name(struct fence *fence)
+{
+	return kbase_drv_name;
+}
+
+static const char *
+kbase_dma_fence_get_timeline_name(struct fence *fence)
+{
+	return kbase_timeline_name;
+}
+
+static bool
+kbase_dma_fence_enable_signaling(struct fence *fence)
+{
+	/* If in the future we need to add code here remember to
+	 * to get a reference to the fence and release it when signaling
+	 * as stated in fence.h
+	 */
+	return true;
+}
+
+static void
+kbase_dma_fence_fence_value_str(struct fence *fence, char *str, int size)
+{
+	snprintf(str, size, "%u", fence->seqno);
+}
+
+static const struct fence_ops kbase_dma_fence_ops = {
+	.get_driver_name = kbase_dma_fence_get_driver_name,
+	.get_timeline_name = kbase_dma_fence_get_timeline_name,
+	.enable_signaling = kbase_dma_fence_enable_signaling,
+	/* Use the default wait */
+	.wait = fence_default_wait,
+	.fence_value_str = kbase_dma_fence_fence_value_str,
+};
+
+static struct fence *
+kbase_dma_fence_new(unsigned int context, unsigned int seqno)
+{
+	struct fence *fence;
+
+	fence = kzalloc(sizeof(*fence), GFP_KERNEL);
+	if (!fence)
+		return NULL;
+
+	fence_init(fence,
+		   &kbase_dma_fence_ops,
+		   &kbase_dma_fence_lock,
+		   context,
+		   seqno);
+
+	return fence;
+}
+
+static int
+kbase_dma_fence_lock_reservations(struct kbase_dma_fence_resv_info *info,
+				  struct ww_acquire_ctx *ctx)
+{
+	struct reservation_object *content_res = NULL;
+	unsigned int content_res_idx = 0;
+	unsigned int r;
+	int err = 0;
+
+	ww_acquire_init(ctx, &reservation_ww_class);
+
+retry:
+	for (r = 0; r < info->dma_fence_resv_count; r++) {
+		if (info->resv_objs[r] == content_res) {
+			content_res = NULL;
+			continue;
+		}
+
+		err = ww_mutex_lock(&info->resv_objs[r]->lock, ctx);
+		if (err)
+			goto error;
+	}
+
+	ww_acquire_done(ctx);
+	return err;
+
+error:
+	content_res_idx = r;
+
+	/* Unlock the locked one ones */
+	while (r--)
+		ww_mutex_unlock(&info->resv_objs[r]->lock);
+
+	if (content_res)
+		ww_mutex_unlock(&content_res->lock);
+
+	/* If we deadlock try with lock_slow and retry */
+	if (err == -EDEADLK) {
+		content_res = info->resv_objs[content_res_idx];
+		ww_mutex_lock_slow(&content_res->lock, ctx);
+		goto retry;
+	}
+
+	/* If we are here the function failed */
+	ww_acquire_fini(ctx);
+	return err;
+}
+
+static void
+kbase_dma_fence_unlock_reservations(struct kbase_dma_fence_resv_info *info,
+				    struct ww_acquire_ctx *ctx)
+{
+	unsigned int r;
+
+	for (r = 0; r < info->dma_fence_resv_count; r++)
+		ww_mutex_unlock(&info->resv_objs[r]->lock);
+	ww_acquire_fini(ctx);
+}
+
+/**
+ * kbase_dma_fence_queue_work() - Queue work to handle @katom
+ * @katom: Pointer to atom for which to queue work
+ *
+ * Queue kbase_dma_fence_work() for @katom to clean up the fence callbacks and
+ * submit the atom.
+ */
+static void
+kbase_dma_fence_queue_work(struct kbase_jd_atom *katom)
+{
+	struct kbase_context *kctx = katom->kctx;
+	bool ret;
+
+	INIT_WORK(&katom->work, kbase_dma_fence_work);
+	ret = queue_work(kctx->dma_fence.wq, &katom->work);
+	/* Warn if work was already queued, that should not happen. */
+	WARN_ON(!ret);
+}
+
+/**
+ * kbase_dma_fence_free_callbacks - Free dma-fence callbacks on a katom
+ * @katom: Pointer to katom
+ * @queue_worker: Boolean indicating if fence worker is to be queued when
+ *                dep_count reaches 0.
+ *
+ * This function will free all fence callbacks on the katom's list of
+ * callbacks. Callbacks that have not yet been called, because their fence
+ * hasn't yet signaled, will first be removed from the fence.
+ *
+ * Locking: katom->dma_fence.callbacks list assumes jctx.lock is held.
+ */
+static void
+kbase_dma_fence_free_callbacks(struct kbase_jd_atom *katom, bool queue_worker)
+{
+	struct kbase_dma_fence_cb *cb, *tmp;
+
+	lockdep_assert_held(&katom->kctx->jctx.lock);
+
+	/* Clean up and free callbacks. */
+	list_for_each_entry_safe(cb, tmp, &katom->dma_fence.callbacks, node) {
+		bool ret;
+
+		/* Cancel callbacks that hasn't been called yet. */
+		ret = fence_remove_callback(cb->fence, &cb->fence_cb);
+		if (ret) {
+			int ret;
+
+			/* Fence had not signaled, clean up after
+			 * canceling.
+			 */
+			ret = atomic_dec_return(&katom->dma_fence.dep_count);
+
+			if (unlikely(queue_worker && ret == 0)) {
+				/*
+				 * dep_count went to zero and queue_worker is
+				 * true. Queue the worker to handle the
+				 * completion of the katom.
+				 */
+				kbase_dma_fence_queue_work(katom);
+			}
+		}
+
+		/*
+		 * Release the reference taken in
+		 * kbase_dma_fence_add_callback().
+		 */
+		fence_put(cb->fence);
+		list_del(&cb->node);
+		kfree(cb);
+	}
+}
+
+/**
+ * kbase_dma_fence_cancel_atom() - Cancels waiting on an atom
+ * @katom:	Katom to cancel
+ *
+ * Locking: katom->dma_fence.callbacks list assumes jctx.lock is held.
+ */
+static void
+kbase_dma_fence_cancel_atom(struct kbase_jd_atom *katom)
+{
+	lockdep_assert_held(&katom->kctx->jctx.lock);
+
+	/* Cancel callbacks and clean up. */
+	kbase_dma_fence_free_callbacks(katom, false);
+
+	KBASE_DEBUG_ASSERT(atomic_read(&katom->dma_fence.dep_count) == 0);
+
+	/* Mark the atom as handled in case all fences signaled just before
+	 * canceling the callbacks and the worker was queued.
+	 */
+	atomic_set(&katom->dma_fence.dep_count, -1);
+
+	/* Prevent job_done_nolock from being called twice on an atom when
+	 * there is a race between job completion and cancellation.
+	 */
+
+	if (katom->status == KBASE_JD_ATOM_STATE_QUEUED) {
+		/* Wait was cancelled - zap the atom */
+		katom->event_code = BASE_JD_EVENT_JOB_CANCELLED;
+		if (jd_done_nolock(katom, NULL))
+			kbase_js_sched_all(katom->kctx->kbdev);
+	}
+}
+
+/**
+ * kbase_dma_fence_work() - Worker thread called when a fence is signaled
+ * @pwork:	work_struct containing a pointer to a katom
+ *
+ * This function will clean and mark all dependencies as satisfied
+ */
+static void
+kbase_dma_fence_work(struct work_struct *pwork)
+{
+	struct kbase_jd_atom *katom;
+	struct kbase_jd_context *ctx;
+
+	katom = container_of(pwork, struct kbase_jd_atom, work);
+	ctx = &katom->kctx->jctx;
+
+	mutex_lock(&ctx->lock);
+	if (atomic_read(&katom->dma_fence.dep_count) != 0)
+		goto out;
+
+	atomic_set(&katom->dma_fence.dep_count, -1);
+
+	/* Remove atom from list of dma-fence waiting atoms. */
+	kbase_dma_fence_waiters_remove(katom);
+	/* Cleanup callbacks. */
+	kbase_dma_fence_free_callbacks(katom, false);
+	/*
+	 * Queue atom on GPU, unless it has already completed due to a failing
+	 * dependency. Run jd_done_nolock() on the katom if it is completed.
+	 */
+	if (unlikely(katom->status == KBASE_JD_ATOM_STATE_COMPLETED))
+		jd_done_nolock(katom, NULL);
+	else
+		kbase_jd_dep_clear_locked(katom);
+
+out:
+	mutex_unlock(&ctx->lock);
+}
+
+/**
+ * kbase_dma_fence_add_callback() - Add callback on @fence to block @katom
+ * @katom: Pointer to katom that will be blocked by @fence
+ * @fence: Pointer to fence on which to set up the callback
+ * @callback: Pointer to function to be called when fence is signaled
+ *
+ * Caller needs to hold a reference to @fence when calling this function, and
+ * the caller is responsible for releasing that reference.  An additional
+ * reference to @fence will be taken when the callback was successfully set up
+ * and @fence needs to be kept valid until the callback has been called and
+ * cleanup have been done.
+ *
+ * Return: 0 on success: fence was either already signalled, or callback was
+ * set up. Negative error code is returned on error.
+ */
+static int
+kbase_dma_fence_add_callback(struct kbase_jd_atom *katom,
+			     struct fence *fence,
+			     fence_func_t callback)
+{
+	int err = 0;
+	struct kbase_dma_fence_cb *kbase_fence_cb;
+
+	kbase_fence_cb = kmalloc(sizeof(*kbase_fence_cb), GFP_KERNEL);
+	if (!kbase_fence_cb)
+		return -ENOMEM;
+
+	kbase_fence_cb->fence = fence;
+	kbase_fence_cb->katom = katom;
+	INIT_LIST_HEAD(&kbase_fence_cb->node);
+
+	err = fence_add_callback(fence, &kbase_fence_cb->fence_cb, callback);
+	if (err == -ENOENT) {
+		/* Fence signaled, clear the error and return */
+		err = 0;
+		kbase_fence_cb->fence = NULL;
+		kfree(kbase_fence_cb);
+	} else if (err) {
+		kfree(kbase_fence_cb);
+	} else {
+		/*
+		 * Get reference to fence that will be kept until callback gets
+		 * cleaned up in kbase_dma_fence_free_callbacks().
+		 */
+		fence_get(fence);
+		atomic_inc(&katom->dma_fence.dep_count);
+		/* Add callback to katom's list of callbacks */
+		list_add(&kbase_fence_cb->node, &katom->dma_fence.callbacks);
+	}
+
+	return err;
+}
+
+static void
+kbase_dma_fence_cb(struct fence *fence, struct fence_cb *cb)
+{
+	struct kbase_dma_fence_cb *kcb = container_of(cb,
+				struct kbase_dma_fence_cb,
+				fence_cb);
+	struct kbase_jd_atom *katom = kcb->katom;
+
+	/* If the atom is zapped dep_count will be forced to a negative number
+	 * preventing this callback from ever scheduling work. Which in turn
+	 * would reschedule the atom.
+	 */
+	if (atomic_dec_and_test(&katom->dma_fence.dep_count))
+		kbase_dma_fence_queue_work(katom);
+}
+
+static int
+kbase_dma_fence_add_reservation_callback(struct kbase_jd_atom *katom,
+					 struct reservation_object *resv,
+					 bool exclusive)
+{
+	struct fence *excl_fence = NULL;
+	struct fence **shared_fences = NULL;
+	unsigned int shared_count = 0;
+	int err, i;
+
+	err = reservation_object_get_fences_rcu(resv,
+						&excl_fence,
+						&shared_count,
+						&shared_fences);
+	if (err)
+		return err;
+
+	if (excl_fence) {
+		err = kbase_dma_fence_add_callback(katom,
+						   excl_fence,
+						   kbase_dma_fence_cb);
+
+		/* Release our reference, taken by reservation_object_get_fences_rcu(),
+		 * to the fence. We have set up our callback (if that was possible),
+		 * and it's the fence's owner is responsible for singling the fence
+		 * before allowing it to disappear.
+		 */
+		fence_put(excl_fence);
+
+		if (err)
+			goto out;
+	}
+
+	if (exclusive) {
+		for (i = 0; i < shared_count; i++) {
+			err = kbase_dma_fence_add_callback(katom,
+							   shared_fences[i],
+							   kbase_dma_fence_cb);
+			if (err)
+				goto out;
+		}
+	}
+
+	/* Release all our references to the shared fences, taken by
+	 * reservation_object_get_fences_rcu(). We have set up our callback (if
+	 * that was possible), and it's the fence's owner is responsible for
+	 * signaling the fence before allowing it to disappear.
+	 */
+out:
+	for (i = 0; i < shared_count; i++)
+		fence_put(shared_fences[i]);
+	kfree(shared_fences);
+
+	if (err) {
+		/*
+		 * On error, cancel and clean up all callbacks that was set up
+		 * before the error.
+		 */
+		kbase_dma_fence_free_callbacks(katom, false);
+	}
+
+	return err;
+}
+
+void kbase_dma_fence_add_reservation(struct reservation_object *resv,
+				     struct kbase_dma_fence_resv_info *info,
+				     bool exclusive)
+{
+	unsigned int i;
+
+	for (i = 0; i < info->dma_fence_resv_count; i++) {
+		/* Duplicate resource, ignore */
+		if (info->resv_objs[i] == resv)
+			return;
+	}
+
+	info->resv_objs[info->dma_fence_resv_count] = resv;
+	if (exclusive)
+		set_bit(info->dma_fence_resv_count,
+			info->dma_fence_excl_bitmap);
+	(info->dma_fence_resv_count)++;
+}
+
+int kbase_dma_fence_wait(struct kbase_jd_atom *katom,
+			 struct kbase_dma_fence_resv_info *info)
+{
+	int err, i;
+	struct fence *fence;
+	struct ww_acquire_ctx ww_ctx;
+
+	lockdep_assert_held(&katom->kctx->jctx.lock);
+
+	fence = kbase_dma_fence_new(katom->dma_fence.context,
+				    atomic_inc_return(&katom->dma_fence.seqno));
+	if (!fence) {
+		err = -ENOMEM;
+		dev_err(katom->kctx->kbdev->dev,
+			"Error %d creating fence.\n", err);
+		return err;
+	}
+
+	katom->dma_fence.fence = fence;
+	atomic_set(&katom->dma_fence.dep_count, 1);
+
+	err = kbase_dma_fence_lock_reservations(info, &ww_ctx);
+	if (err) {
+		dev_err(katom->kctx->kbdev->dev,
+			"Error %d locking reservations.\n", err);
+		atomic_set(&katom->dma_fence.dep_count, -1);
+		fence_put(fence);
+		return err;
+	}
+
+	for (i = 0; i < info->dma_fence_resv_count; i++) {
+		struct reservation_object *obj = info->resv_objs[i];
+
+		if (!test_bit(i, info->dma_fence_excl_bitmap)) {
+			err = reservation_object_reserve_shared(obj);
+			if (err) {
+				dev_err(katom->kctx->kbdev->dev,
+					"Error %d reserving space for shared fence.\n", err);
+				goto end;
+			}
+
+			err = kbase_dma_fence_add_reservation_callback(katom, obj, false);
+			if (err) {
+				dev_err(katom->kctx->kbdev->dev,
+					"Error %d adding reservation to callback.\n", err);
+				goto end;
+			}
+
+			reservation_object_add_shared_fence(obj, katom->dma_fence.fence);
+		} else {
+			err = kbase_dma_fence_add_reservation_callback(katom, obj, true);
+			if (err) {
+				dev_err(katom->kctx->kbdev->dev,
+					"Error %d adding reservation to callback.\n", err);
+				goto end;
+			}
+
+			reservation_object_add_excl_fence(obj, katom->dma_fence.fence);
+		}
+	}
+
+end:
+	kbase_dma_fence_unlock_reservations(info, &ww_ctx);
+
+	if (likely(!err)) {
+		/* Test if the callbacks are already triggered */
+		if (atomic_dec_and_test(&katom->dma_fence.dep_count)) {
+			atomic_set(&katom->dma_fence.dep_count, -1);
+			kbase_dma_fence_free_callbacks(katom, false);
+		} else {
+			/* Add katom to the list of dma-buf fence waiting atoms
+			 * only if it is still waiting.
+			 */
+			kbase_dma_fence_waiters_add(katom);
+		}
+	} else {
+		/* There was an error, cancel callbacks, set dep_count to -1 to
+		 * indicate that the atom has been handled (the caller will
+		 * kill it for us), signal the fence, free callbacks and the
+		 * fence.
+		 */
+		kbase_dma_fence_free_callbacks(katom, false);
+		atomic_set(&katom->dma_fence.dep_count, -1);
+		kbase_dma_fence_signal(katom);
+	}
+
+	return err;
+}
+
+void kbase_dma_fence_cancel_all_atoms(struct kbase_context *kctx)
+{
+	struct list_head *list = &kctx->dma_fence.waiting_resource;
+
+	while (!list_empty(list)) {
+		struct kbase_jd_atom *katom;
+
+		katom = list_first_entry(list, struct kbase_jd_atom, queue);
+		kbase_dma_fence_waiters_remove(katom);
+		kbase_dma_fence_cancel_atom(katom);
+	}
+}
+
+void kbase_dma_fence_cancel_callbacks(struct kbase_jd_atom *katom)
+{
+	/* Cancel callbacks and clean up. */
+	kbase_dma_fence_free_callbacks(katom, true);
+}
+
+void kbase_dma_fence_signal(struct kbase_jd_atom *katom)
+{
+	if (!katom->dma_fence.fence)
+		return;
+
+	KBASE_DEBUG_ASSERT(atomic_read(&katom->dma_fence.dep_count) == -1);
+
+	/* Signal the atom's fence. */
+	fence_signal(katom->dma_fence.fence);
+	fence_put(katom->dma_fence.fence);
+	katom->dma_fence.fence = NULL;
+
+	kbase_dma_fence_free_callbacks(katom, false);
+}
+
+void kbase_dma_fence_term(struct kbase_context *kctx)
+{
+	destroy_workqueue(kctx->dma_fence.wq);
+	kctx->dma_fence.wq = NULL;
+}
+
+int kbase_dma_fence_init(struct kbase_context *kctx)
+{
+	INIT_LIST_HEAD(&kctx->dma_fence.waiting_resource);
+
+	kctx->dma_fence.wq = alloc_workqueue("mali-fence-%d",
+					     WQ_UNBOUND, 1, kctx->pid);
+	if (!kctx->dma_fence.wq)
+		return -ENOMEM;
+
+	return 0;
+}
--- a/drivers/gpu/arm/midgard_for_linux/mali_kbase_dma_fence.h
+++ b/drivers/gpu/arm/midgard_for_linux/mali_kbase_dma_fence.h
@ -0,0 +1,150 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#ifndef _KBASE_DMA_FENCE_H_
+#define _KBASE_DMA_FENCE_H_
+
+#ifdef CONFIG_MALI_DMA_FENCE
+
+#include <linux/fence.h>
+#include <linux/list.h>
+#include <linux/reservation.h>
+
+
+/* Forward declaration from mali_kbase_defs.h */
+struct kbase_jd_atom;
+struct kbase_context;
+
+/**
+ * struct kbase_dma_fence_cb - Mali dma-fence callback data struct
+ * @fence_cb: Callback function
+ * @katom:    Pointer to katom that is waiting on this callback
+ * @fence:    Pointer to the fence object on which this callback is waiting
+ * @node:     List head for linking this callback to the katom
+ */
+struct kbase_dma_fence_cb {
+	struct fence_cb fence_cb;
+	struct kbase_jd_atom *katom;
+	struct fence *fence;
+	struct list_head node;
+};
+
+/**
+ * struct kbase_dma_fence_resv_info - Structure with list of reservation objects
+ * @resv_objs:             Array of reservation objects to attach the
+ *                         new fence to.
+ * @dma_fence_resv_count:  Number of reservation objects in the array.
+ * @dma_fence_excl_bitmap: Specifies which resv_obj are exclusive.
+ *
+ * This is used by some functions to pass around a collection of data about
+ * reservation objects.
+ */
+struct kbase_dma_fence_resv_info {
+	struct reservation_object **resv_objs;
+	unsigned int dma_fence_resv_count;
+	unsigned long *dma_fence_excl_bitmap;
+};
+
+/**
+ * kbase_dma_fence_add_reservation() - Adds a resv to the array of resv_objs
+ * @resv:      Reservation object to add to the array.
+ * @info:      Pointer to struct with current reservation info
+ * @exclusive: Boolean indicating if exclusive access is needed
+ *
+ * The function adds a new reservation_object to an existing array of
+ * reservation_objects. At the same time keeps track of which objects require
+ * exclusive access in dma_fence_excl_bitmap.
+ */
+void kbase_dma_fence_add_reservation(struct reservation_object *resv,
+				     struct kbase_dma_fence_resv_info *info,
+				     bool exclusive);
+
+/**
+ * kbase_dma_fence_wait() - Creates a new fence and attaches it to the resv_objs
+ * @katom: Katom with the external dependency.
+ * @info:  Pointer to struct with current reservation info
+ *
+ * Return: An error code or 0 if succeeds
+ */
+int kbase_dma_fence_wait(struct kbase_jd_atom *katom,
+			 struct kbase_dma_fence_resv_info *info);
+
+/**
+ * kbase_dma_fence_cancel_ctx() - Cancel all dma-fences blocked atoms on kctx
+ * @kctx: Pointer to kbase context
+ *
+ * This function will cancel and clean up all katoms on @kctx that is waiting
+ * on dma-buf fences.
+ *
+ * Locking: jctx.lock needs to be held when calling this function.
+ */
+void kbase_dma_fence_cancel_all_atoms(struct kbase_context *kctx);
+
+/**
+ * kbase_dma_fence_cancel_callbacks() - Cancel only callbacks on katom
+ * @katom: Pointer to katom whose callbacks are to be canceled
+ *
+ * This function cancels all dma-buf fence callbacks on @katom, but does not
+ * cancel the katom itself.
+ *
+ * The caller is responsible for ensuring that jd_done_nolock is called on
+ * @katom.
+ *
+ * Locking: jctx.lock must be held when calling this function.
+ */
+void kbase_dma_fence_cancel_callbacks(struct kbase_jd_atom *katom);
+
+/**
+ * kbase_dma_fence_signal() - Signal katom's fence and clean up after wait
+ * @katom: Pointer to katom to signal and clean up
+ *
+ * This function will signal the @katom's fence, if it has one, and clean up
+ * the callback data from the katom's wait on earlier fences.
+ *
+ * Locking: jctx.lock must be held while calling this function.
+ */
+void kbase_dma_fence_signal(struct kbase_jd_atom *katom);
+
+/**
+ * kbase_dma_fence_term() - Terminate Mali dma-fence context
+ * @kctx: kbase context to terminate
+ */
+void kbase_dma_fence_term(struct kbase_context *kctx);
+
+/**
+ * kbase_dma_fence_init() - Initialize Mali dma-fence context
+ * @kctx: kbase context to initialize
+ */
+int kbase_dma_fence_init(struct kbase_context *kctx);
+
+/**
+ * kbase_dma_fence_waiters_remove()- Remove katom from dma-fence wait list
+ * @katom: Pointer to katom to remove from list
+ */
+void kbase_dma_fence_waiters_remove(struct kbase_jd_atom *katom);
+
+#else /* CONFIG_MALI_DMA_FENCE */
+/* Dummy functions for when dma-buf fence isn't enabled. */
+
+static inline int kbase_dma_fence_init(struct kbase_context *kctx)
+{
+	return 0;
+}
+
+static inline void kbase_dma_fence_term(struct kbase_context *kctx) {}
+#endif /* CONFIG_MALI_DMA_FENCE */
+#endif
--- a/drivers/gpu/arm/midgard_for_linux/mali_kbase_event.c
+++ b/drivers/gpu/arm/midgard_for_linux/mali_kbase_event.c
@ -0,0 +1,259 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#include <mali_kbase.h>
+#include <mali_kbase_debug.h>
+#include <mali_kbase_tlstream.h>
+
+static struct base_jd_udata kbase_event_process(struct kbase_context *kctx, struct kbase_jd_atom *katom)
+{
+	struct base_jd_udata data;
+
+	lockdep_assert_held(&kctx->jctx.lock);
+
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+	KBASE_DEBUG_ASSERT(katom != NULL);
+	KBASE_DEBUG_ASSERT(katom->status == KBASE_JD_ATOM_STATE_COMPLETED);
+
+	data = katom->udata;
+
+	KBASE_TIMELINE_ATOMS_IN_FLIGHT(kctx, atomic_sub_return(1, &kctx->timeline.jd_atoms_in_flight));
+
+	kbase_tlstream_tl_nret_atom_ctx(katom, kctx);
+	kbase_tlstream_tl_del_atom(katom);
+
+	katom->status = KBASE_JD_ATOM_STATE_UNUSED;
+
+	wake_up(&katom->completed);
+
+	return data;
+}
+
+int kbase_event_pending(struct kbase_context *ctx)
+{
+	KBASE_DEBUG_ASSERT(ctx);
+
+	return (atomic_read(&ctx->event_count) != 0) ||
+			(atomic_read(&ctx->event_closed) != 0);
+}
+
+KBASE_EXPORT_TEST_API(kbase_event_pending);
+
+int kbase_event_dequeue(struct kbase_context *ctx, struct base_jd_event_v2 *uevent)
+{
+	struct kbase_jd_atom *atom;
+
+	KBASE_DEBUG_ASSERT(ctx);
+
+	mutex_lock(&ctx->event_mutex);
+
+	if (list_empty(&ctx->event_list)) {
+		if (!atomic_read(&ctx->event_closed)) {
+			mutex_unlock(&ctx->event_mutex);
+			return -1;
+		}
+
+		/* generate the BASE_JD_EVENT_DRV_TERMINATED message on the fly */
+		mutex_unlock(&ctx->event_mutex);
+		uevent->event_code = BASE_JD_EVENT_DRV_TERMINATED;
+		memset(&uevent->udata, 0, sizeof(uevent->udata));
+		dev_dbg(ctx->kbdev->dev,
+				"event system closed, returning BASE_JD_EVENT_DRV_TERMINATED(0x%X)\n",
+				BASE_JD_EVENT_DRV_TERMINATED);
+		return 0;
+	}
+
+	/* normal event processing */
+	atomic_dec(&ctx->event_count);
+	atom = list_entry(ctx->event_list.next, struct kbase_jd_atom, dep_item[0]);
+	list_del(ctx->event_list.next);
+
+	mutex_unlock(&ctx->event_mutex);
+
+	dev_dbg(ctx->kbdev->dev, "event dequeuing %p\n", (void *)atom);
+	uevent->event_code = atom->event_code;
+	uevent->atom_number = (atom - ctx->jctx.atoms);
+
+	if (atom->core_req & BASE_JD_REQ_EXTERNAL_RESOURCES)
+		kbase_jd_free_external_resources(atom);
+
+	mutex_lock(&ctx->jctx.lock);
+	uevent->udata = kbase_event_process(ctx, atom);
+	mutex_unlock(&ctx->jctx.lock);
+
+	return 0;
+}
+
+KBASE_EXPORT_TEST_API(kbase_event_dequeue);
+
+/**
+ * kbase_event_process_noreport_worker - Worker for processing atoms that do not
+ *                                       return an event but do have external
+ *                                       resources
+ * @data:  Work structure
+ */
+static void kbase_event_process_noreport_worker(struct work_struct *data)
+{
+	struct kbase_jd_atom *katom = container_of(data, struct kbase_jd_atom,
+			work);
+	struct kbase_context *kctx = katom->kctx;
+
+	if (katom->core_req & BASE_JD_REQ_EXTERNAL_RESOURCES)
+		kbase_jd_free_external_resources(katom);
+
+	mutex_lock(&kctx->jctx.lock);
+	kbase_event_process(kctx, katom);
+	mutex_unlock(&kctx->jctx.lock);
+}
+
+/**
+ * kbase_event_process_noreport - Process atoms that do not return an event
+ * @kctx:  Context pointer
+ * @katom: Atom to be processed
+ *
+ * Atoms that do not have external resources will be processed immediately.
+ * Atoms that do have external resources will be processed on a workqueue, in
+ * order to avoid locking issues.
+ */
+static void kbase_event_process_noreport(struct kbase_context *kctx,
+		struct kbase_jd_atom *katom)
+{
+	if (katom->core_req & BASE_JD_REQ_EXTERNAL_RESOURCES) {
+		INIT_WORK(&katom->work, kbase_event_process_noreport_worker);
+		queue_work(kctx->event_workq, &katom->work);
+	} else {
+		kbase_event_process(kctx, katom);
+	}
+}
+
+/**
+ * kbase_event_coalesce - Move pending events to the main event list
+ * @kctx:  Context pointer
+ *
+ * kctx->event_list and kctx->event_coalesce_count must be protected
+ * by a lock unless this is the last thread using them
+ * (and we're about to terminate the lock).
+ *
+ * Return: The number of pending events moved to the main event list
+ */
+static int kbase_event_coalesce(struct kbase_context *kctx)
+{
+	const int event_count = kctx->event_coalesce_count;
+
+	/* Join the list of pending events onto the tail of the main list
+	   and reset it */
+	list_splice_tail_init(&kctx->event_coalesce_list, &kctx->event_list);
+	kctx->event_coalesce_count = 0;
+
+	/* Return the number of events moved */
+	return event_count;
+}
+
+void kbase_event_post(struct kbase_context *ctx, struct kbase_jd_atom *atom)
+{
+	if (atom->core_req & BASE_JD_REQ_EVENT_ONLY_ON_FAILURE) {
+		if (atom->event_code == BASE_JD_EVENT_DONE) {
+			/* Don't report the event */
+			kbase_event_process_noreport(ctx, atom);
+			return;
+		}
+	}
+
+	if (atom->core_req & BASEP_JD_REQ_EVENT_NEVER) {
+		/* Don't report the event */
+		kbase_event_process_noreport(ctx, atom);
+		return;
+	}
+	kbase_tlstream_tl_attrib_atom_state(atom, TL_ATOM_STATE_POSTED);
+	if (atom->core_req & BASE_JD_REQ_EVENT_COALESCE) {
+		/* Don't report the event until other event(s) have completed */
+		mutex_lock(&ctx->event_mutex);
+		list_add_tail(&atom->dep_item[0], &ctx->event_coalesce_list);
+		++ctx->event_coalesce_count;
+		mutex_unlock(&ctx->event_mutex);
+	} else {
+		/* Report the event and any pending events now */
+		int event_count = 1;
+
+		mutex_lock(&ctx->event_mutex);
+		event_count += kbase_event_coalesce(ctx);
+		list_add_tail(&atom->dep_item[0], &ctx->event_list);
+		atomic_add(event_count, &ctx->event_count);
+		mutex_unlock(&ctx->event_mutex);
+
+		kbase_event_wakeup(ctx);
+	}
+}
+KBASE_EXPORT_TEST_API(kbase_event_post);
+
+void kbase_event_close(struct kbase_context *kctx)
+{
+	mutex_lock(&kctx->event_mutex);
+	atomic_set(&kctx->event_closed, true);
+	mutex_unlock(&kctx->event_mutex);
+	kbase_event_wakeup(kctx);
+}
+
+int kbase_event_init(struct kbase_context *kctx)
+{
+	KBASE_DEBUG_ASSERT(kctx);
+
+	INIT_LIST_HEAD(&kctx->event_list);
+	INIT_LIST_HEAD(&kctx->event_coalesce_list);
+	mutex_init(&kctx->event_mutex);
+	atomic_set(&kctx->event_count, 0);
+	kctx->event_coalesce_count = 0;
+	atomic_set(&kctx->event_closed, false);
+	kctx->event_workq = alloc_workqueue("kbase_event", WQ_MEM_RECLAIM, 1);
+
+	if (NULL == kctx->event_workq)
+		return -EINVAL;
+
+	return 0;
+}
+
+KBASE_EXPORT_TEST_API(kbase_event_init);
+
+void kbase_event_cleanup(struct kbase_context *kctx)
+{
+	int event_count;
+
+	KBASE_DEBUG_ASSERT(kctx);
+	KBASE_DEBUG_ASSERT(kctx->event_workq);
+
+	flush_workqueue(kctx->event_workq);
+	destroy_workqueue(kctx->event_workq);
+
+	/* We use kbase_event_dequeue to remove the remaining events as that
+	 * deals with all the cleanup needed for the atoms.
+	 *
+	 * Note: use of kctx->event_list without a lock is safe because this must be the last
+	 * thread using it (because we're about to terminate the lock)
+	 */
+	event_count = kbase_event_coalesce(kctx);
+	atomic_add(event_count, &kctx->event_count);
+
+	while (!list_empty(&kctx->event_list)) {
+		struct base_jd_event_v2 event;
+
+		kbase_event_dequeue(kctx, &event);
+	}
+}
+
+KBASE_EXPORT_TEST_API(kbase_event_cleanup);
--- a/drivers/gpu/arm/midgard_for_linux/mali_kbase_gator.h
+++ b/drivers/gpu/arm/midgard_for_linux/mali_kbase_gator.h
@ -0,0 +1,45 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/* NB taken from gator  */
+/*
+ * List of possible actions to be controlled by DS-5 Streamline.
+ * The following numbers are used by gator to control the frame buffer dumping
+ * and s/w counter reporting. We cannot use the enums in mali_uk_types.h because
+ * they are unknown inside gator.
+ */
+#ifndef _KBASE_GATOR_H_
+#define _KBASE_GATOR_H_
+
+#ifdef CONFIG_MALI_GATOR_SUPPORT
+#define GATOR_MAKE_EVENT(type, number) (((type) << 24) | ((number) << 16))
+#define GATOR_JOB_SLOT_START 1
+#define GATOR_JOB_SLOT_STOP  2
+#define GATOR_JOB_SLOT_SOFT_STOPPED  3
+
+void kbase_trace_mali_job_slots_event(u32 event, const struct kbase_context *kctx, u8 atom_id);
+void kbase_trace_mali_pm_status(u32 event, u64 value);
+void kbase_trace_mali_pm_power_off(u32 event, u64 value);
+void kbase_trace_mali_pm_power_on(u32 event, u64 value);
+void kbase_trace_mali_page_fault_insert_pages(int event, u32 value);
+void kbase_trace_mali_mmu_as_in_use(int event);
+void kbase_trace_mali_mmu_as_released(int event);
+void kbase_trace_mali_total_alloc_pages_change(long long int event);
+
+#endif /* CONFIG_MALI_GATOR_SUPPORT */
+
+#endif  /* _KBASE_GATOR_H_ */
--- a/drivers/gpu/arm/midgard_for_linux/mali_kbase_gator_api.c
+++ b/drivers/gpu/arm/midgard_for_linux/mali_kbase_gator_api.c
@ -0,0 +1,330 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#include "mali_kbase.h"
+#include "mali_kbase_hw.h"
+#include "mali_kbase_mem_linux.h"
+#include "mali_kbase_gator_api.h"
+#include "mali_kbase_gator_hwcnt_names.h"
+
+#define MALI_MAX_CORES_PER_GROUP		4
+#define MALI_MAX_NUM_BLOCKS_PER_GROUP	8
+#define MALI_COUNTERS_PER_BLOCK			64
+#define MALI_BYTES_PER_COUNTER			4
+
+struct kbase_gator_hwcnt_handles {
+	struct kbase_device *kbdev;
+	struct kbase_vinstr_client *vinstr_cli;
+	void *vinstr_buffer;
+	struct work_struct dump_work;
+	int dump_complete;
+	spinlock_t dump_lock;
+};
+
+static void dump_worker(struct work_struct *work);
+
+const char * const *kbase_gator_hwcnt_init_names(uint32_t *total_counters)
+{
+	const char * const *hardware_counters;
+	struct kbase_device *kbdev;
+	uint32_t product_id;
+	uint32_t count;
+
+	if (!total_counters)
+		return NULL;
+
+	/* Get the first device - it doesn't matter in this case */
+	kbdev = kbase_find_device(-1);
+	if (!kbdev)
+		return NULL;
+
+	product_id = kbdev->gpu_props.props.core_props.product_id;
+
+	if (GPU_ID_IS_NEW_FORMAT(product_id)) {
+		switch (GPU_ID2_MODEL_MATCH_VALUE(product_id)) {
+		case GPU_ID2_PRODUCT_TMIX:
+			hardware_counters = hardware_counters_mali_tMIx;
+			count = ARRAY_SIZE(hardware_counters_mali_tMIx);
+			break;
+		case GPU_ID2_PRODUCT_THEX:
+			hardware_counters = hardware_counters_mali_tHEx;
+			count = ARRAY_SIZE(hardware_counters_mali_tHEx);
+			break;
+		default:
+			hardware_counters = NULL;
+			count = 0;
+			dev_err(kbdev->dev, "Unrecognized product ID: %u\n",
+				product_id);
+			break;
+		}
+	} else {
+		switch (product_id) {
+			/* If we are using a Mali-T60x device */
+		case GPU_ID_PI_T60X:
+			hardware_counters = hardware_counters_mali_t60x;
+			count = ARRAY_SIZE(hardware_counters_mali_t60x);
+			break;
+			/* If we are using a Mali-T62x device */
+		case GPU_ID_PI_T62X:
+			hardware_counters = hardware_counters_mali_t62x;
+			count = ARRAY_SIZE(hardware_counters_mali_t62x);
+			break;
+			/* If we are using a Mali-T72x device */
+		case GPU_ID_PI_T72X:
+			hardware_counters = hardware_counters_mali_t72x;
+			count = ARRAY_SIZE(hardware_counters_mali_t72x);
+			break;
+			/* If we are using a Mali-T76x device */
+		case GPU_ID_PI_T76X:
+			hardware_counters = hardware_counters_mali_t76x;
+			count = ARRAY_SIZE(hardware_counters_mali_t76x);
+			break;
+			/* If we are using a Mali-T82x device */
+		case GPU_ID_PI_T82X:
+			hardware_counters = hardware_counters_mali_t82x;
+			count = ARRAY_SIZE(hardware_counters_mali_t82x);
+			break;
+			/* If we are using a Mali-T83x device */
+		case GPU_ID_PI_T83X:
+			hardware_counters = hardware_counters_mali_t83x;
+			count = ARRAY_SIZE(hardware_counters_mali_t83x);
+			break;
+			/* If we are using a Mali-T86x device */
+		case GPU_ID_PI_T86X:
+			hardware_counters = hardware_counters_mali_t86x;
+			count = ARRAY_SIZE(hardware_counters_mali_t86x);
+			break;
+			/* If we are using a Mali-T88x device */
+		case GPU_ID_PI_TFRX:
+			hardware_counters = hardware_counters_mali_t88x;
+			count = ARRAY_SIZE(hardware_counters_mali_t88x);
+			break;
+		default:
+			hardware_counters = NULL;
+			count = 0;
+			dev_err(kbdev->dev, "Unrecognized product ID: %u\n",
+				product_id);
+			break;
+		}
+	}
+
+	/* Release the kbdev reference. */
+	kbase_release_device(kbdev);
+
+	*total_counters = count;
+
+	/* If we return a string array take a reference on the module (or fail). */
+	if (hardware_counters && !try_module_get(THIS_MODULE))
+		return NULL;
+
+	return hardware_counters;
+}
+KBASE_EXPORT_SYMBOL(kbase_gator_hwcnt_init_names);
+
+void kbase_gator_hwcnt_term_names(void)
+{
+	/* Release the module reference. */
+	module_put(THIS_MODULE);
+}
+KBASE_EXPORT_SYMBOL(kbase_gator_hwcnt_term_names);
+
+struct kbase_gator_hwcnt_handles *kbase_gator_hwcnt_init(struct kbase_gator_hwcnt_info *in_out_info)
+{
+	struct kbase_gator_hwcnt_handles *hand;
+	struct kbase_uk_hwcnt_reader_setup setup;
+	uint32_t dump_size = 0, i = 0;
+
+	if (!in_out_info)
+		return NULL;
+
+	hand = kzalloc(sizeof(*hand), GFP_KERNEL);
+	if (!hand)
+		return NULL;
+
+	INIT_WORK(&hand->dump_work, dump_worker);
+	spin_lock_init(&hand->dump_lock);
+
+	/* Get the first device */
+	hand->kbdev = kbase_find_device(-1);
+	if (!hand->kbdev)
+		goto free_hand;
+
+	dump_size = kbase_vinstr_dump_size(hand->kbdev);
+	hand->vinstr_buffer = kzalloc(dump_size, GFP_KERNEL);
+	if (!hand->vinstr_buffer)
+		goto release_device;
+	in_out_info->kernel_dump_buffer = hand->vinstr_buffer;
+
+	in_out_info->nr_cores = hand->kbdev->gpu_props.num_cores;
+	in_out_info->nr_core_groups = hand->kbdev->gpu_props.num_core_groups;
+	in_out_info->gpu_id = hand->kbdev->gpu_props.props.core_props.product_id;
+
+	/* If we are using a v4 device (Mali-T6xx or Mali-T72x) */
+	if (kbase_hw_has_feature(hand->kbdev, BASE_HW_FEATURE_V4)) {
+		uint32_t cg, j;
+		uint64_t core_mask;
+
+		/* There are 8 hardware counters blocks per core group */
+		in_out_info->hwc_layout = kmalloc(sizeof(enum hwc_type) *
+			MALI_MAX_NUM_BLOCKS_PER_GROUP *
+			in_out_info->nr_core_groups, GFP_KERNEL);
+
+		if (!in_out_info->hwc_layout)
+			goto free_vinstr_buffer;
+
+		dump_size = in_out_info->nr_core_groups *
+			MALI_MAX_NUM_BLOCKS_PER_GROUP *
+			MALI_COUNTERS_PER_BLOCK *
+			MALI_BYTES_PER_COUNTER;
+
+		for (cg = 0; cg < in_out_info->nr_core_groups; cg++) {
+			core_mask = hand->kbdev->gpu_props.props.coherency_info.group[cg].core_mask;
+
+			for (j = 0; j < MALI_MAX_CORES_PER_GROUP; j++) {
+				if (core_mask & (1u << j))
+					in_out_info->hwc_layout[i++] = SHADER_BLOCK;
+				else
+					in_out_info->hwc_layout[i++] = RESERVED_BLOCK;
+			}
+
+			in_out_info->hwc_layout[i++] = TILER_BLOCK;
+			in_out_info->hwc_layout[i++] = MMU_L2_BLOCK;
+
+			in_out_info->hwc_layout[i++] = RESERVED_BLOCK;
+
+			if (0 == cg)
+				in_out_info->hwc_layout[i++] = JM_BLOCK;
+			else
+				in_out_info->hwc_layout[i++] = RESERVED_BLOCK;
+		}
+	/* If we are using any other device */
+	} else {
+		uint32_t nr_l2, nr_sc_bits, j;
+		uint64_t core_mask;
+
+		nr_l2 = hand->kbdev->gpu_props.props.l2_props.num_l2_slices;
+
+		core_mask = hand->kbdev->gpu_props.props.coherency_info.group[0].core_mask;
+
+		nr_sc_bits = fls64(core_mask);
+
+		/* The job manager and tiler sets of counters
+		 * are always present */
+		in_out_info->hwc_layout = kmalloc(sizeof(enum hwc_type) * (2 + nr_sc_bits + nr_l2), GFP_KERNEL);
+
+		if (!in_out_info->hwc_layout)
+			goto free_vinstr_buffer;
+
+		dump_size = (2 + nr_sc_bits + nr_l2) * MALI_COUNTERS_PER_BLOCK * MALI_BYTES_PER_COUNTER;
+
+		in_out_info->hwc_layout[i++] = JM_BLOCK;
+		in_out_info->hwc_layout[i++] = TILER_BLOCK;
+
+		for (j = 0; j < nr_l2; j++)
+			in_out_info->hwc_layout[i++] = MMU_L2_BLOCK;
+
+		while (core_mask != 0ull) {
+			if ((core_mask & 1ull) != 0ull)
+				in_out_info->hwc_layout[i++] = SHADER_BLOCK;
+			else
+				in_out_info->hwc_layout[i++] = RESERVED_BLOCK;
+			core_mask >>= 1;
+		}
+	}
+
+	in_out_info->nr_hwc_blocks = i;
+	in_out_info->size = dump_size;
+
+	setup.jm_bm = in_out_info->bitmask[0];
+	setup.tiler_bm = in_out_info->bitmask[1];
+	setup.shader_bm = in_out_info->bitmask[2];
+	setup.mmu_l2_bm = in_out_info->bitmask[3];
+	hand->vinstr_cli = kbase_vinstr_hwcnt_kernel_setup(hand->kbdev->vinstr_ctx,
+			&setup, hand->vinstr_buffer);
+	if (!hand->vinstr_cli) {
+		dev_err(hand->kbdev->dev, "Failed to register gator with vinstr core");
+		goto free_layout;
+	}
+
+	return hand;
+
+free_layout:
+	kfree(in_out_info->hwc_layout);
+
+free_vinstr_buffer:
+	kfree(hand->vinstr_buffer);
+
+release_device:
+	kbase_release_device(hand->kbdev);
+
+free_hand:
+	kfree(hand);
+	return NULL;
+}
+KBASE_EXPORT_SYMBOL(kbase_gator_hwcnt_init);
+
+void kbase_gator_hwcnt_term(struct kbase_gator_hwcnt_info *in_out_info, struct kbase_gator_hwcnt_handles *opaque_handles)
+{
+	if (in_out_info)
+		kfree(in_out_info->hwc_layout);
+
+	if (opaque_handles) {
+		cancel_work_sync(&opaque_handles->dump_work);
+		kbase_vinstr_detach_client(opaque_handles->vinstr_cli);
+		kfree(opaque_handles->vinstr_buffer);
+		kbase_release_device(opaque_handles->kbdev);
+		kfree(opaque_handles);
+	}
+}
+KBASE_EXPORT_SYMBOL(kbase_gator_hwcnt_term);
+
+static void dump_worker(struct work_struct *work)
+{
+	struct kbase_gator_hwcnt_handles *hand;
+
+	hand = container_of(work, struct kbase_gator_hwcnt_handles, dump_work);
+	if (!kbase_vinstr_hwc_dump(hand->vinstr_cli,
+			BASE_HWCNT_READER_EVENT_MANUAL)) {
+		spin_lock_bh(&hand->dump_lock);
+		hand->dump_complete = 1;
+		spin_unlock_bh(&hand->dump_lock);
+	} else {
+		schedule_work(&hand->dump_work);
+	}
+}
+
+uint32_t kbase_gator_instr_hwcnt_dump_complete(
+		struct kbase_gator_hwcnt_handles *opaque_handles,
+		uint32_t * const success)
+{
+
+	if (opaque_handles && success) {
+		*success = opaque_handles->dump_complete;
+		opaque_handles->dump_complete = 0;
+		return *success;
+	}
+	return 0;
+}
+KBASE_EXPORT_SYMBOL(kbase_gator_instr_hwcnt_dump_complete);
+
+uint32_t kbase_gator_instr_hwcnt_dump_irq(struct kbase_gator_hwcnt_handles *opaque_handles)
+{
+	if (opaque_handles)
+		schedule_work(&opaque_handles->dump_work);
+	return 0;
+}
+KBASE_EXPORT_SYMBOL(kbase_gator_instr_hwcnt_dump_irq);
--- a/drivers/gpu/arm/midgard_for_linux/mali_kbase_gator_api.h
+++ b/drivers/gpu/arm/midgard_for_linux/mali_kbase_gator_api.h
@ -0,0 +1,219 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#ifndef _KBASE_GATOR_API_H_
+#define _KBASE_GATOR_API_H_
+
+/**
+ * @brief This file describes the API used by Gator to fetch hardware counters.
+ */
+
+/* This define is used by the gator kernel module compile to select which DDK
+ * API calling convention to use. If not defined (legacy DDK) gator assumes
+ * version 1. The version to DDK release mapping is:
+ *     Version 1 API: DDK versions r1px, r2px
+ *     Version 2 API: DDK versions r3px, r4px
+ *     Version 3 API: DDK version r5p0 and newer
+ *
+ * API Usage
+ * =========
+ *
+ * 1] Call kbase_gator_hwcnt_init_names() to return the list of short counter
+ * names for the GPU present in this device.
+ *
+ * 2] Create a kbase_gator_hwcnt_info structure and set the counter enables for
+ * the counters you want enabled. The enables can all be set for simplicity in
+ * most use cases, but disabling some will let you minimize bandwidth impact.
+ *
+ * 3] Call kbase_gator_hwcnt_init() using the above structure, to create a
+ * counter context. On successful return the DDK will have populated the
+ * structure with a variety of useful information.
+ *
+ * 4] Call kbase_gator_hwcnt_dump_irq() to queue a non-blocking request for a
+ * counter dump. If this returns a non-zero value the request has been queued,
+ * otherwise the driver has been unable to do so (typically because of another
+ * user of the instrumentation exists concurrently).
+ *
+ * 5] Call kbase_gator_hwcnt_dump_complete() to test whether the  previously
+ * requested dump has been succesful. If this returns non-zero the counter dump
+ * has resolved, but the value of *success must also be tested as the dump
+ * may have not been successful. If it returns zero the counter dump was
+ * abandoned due to the device being busy (typically because of another
+ * user of the instrumentation exists concurrently).
+ *
+ * 6] Process the counters stored in the buffer pointed to by ...
+ *
+ *        kbase_gator_hwcnt_info->kernel_dump_buffer
+ *
+ *    In pseudo code you can find all of the counters via this approach:
+ *
+ *
+ *        hwcnt_info # pointer to kbase_gator_hwcnt_info structure
+ *        hwcnt_name # pointer to name list
+ *
+ *        u32 * hwcnt_data = (u32*)hwcnt_info->kernel_dump_buffer
+ *
+ *        # Iterate over each 64-counter block in this GPU configuration
+ *        for( i = 0; i < hwcnt_info->nr_hwc_blocks; i++) {
+ *            hwc_type type = hwcnt_info->hwc_layout[i];
+ *
+ *            # Skip reserved type blocks - they contain no counters at all
+ *            if( type == RESERVED_BLOCK ) {
+ *                continue;
+ *            }
+ *
+ *            size_t name_offset = type * 64;
+ *            size_t data_offset = i * 64;
+ *
+ *            # Iterate over the names of the counters in this block type
+ *            for( j = 0; j < 64; j++) {
+ *                const char * name = hwcnt_name[name_offset+j];
+ *
+ *                # Skip empty name strings - there is no counter here
+ *                if( name[0] == '\0' ) {
+ *                    continue;
+ *                }
+ *
+ *                u32 data = hwcnt_data[data_offset+j];
+ *
+ *                printk( "COUNTER: %s DATA: %u\n", name, data );
+ *            }
+ *        }
+ *
+ *
+ *     Note that in most implementations you typically want to either SUM or
+ *     AVERAGE multiple instances of the same counter if, for example, you have
+ *     multiple shader cores or multiple L2 caches. The most sensible view for
+ *     analysis is to AVERAGE shader core counters, but SUM L2 cache and MMU
+ *     counters.
+ *
+ * 7] Goto 4, repeating until you want to stop collecting counters.
+ *
+ * 8] Release the dump resources by calling kbase_gator_hwcnt_term().
+ *
+ * 9] Release the name table resources by calling
+ *    kbase_gator_hwcnt_term_names(). This function must only be called if
+ *    init_names() returned a non-NULL value.
+ **/
+
+#define MALI_DDK_GATOR_API_VERSION 3
+
+enum hwc_type {
+	JM_BLOCK = 0,
+	TILER_BLOCK,
+	SHADER_BLOCK,
+	MMU_L2_BLOCK,
+	RESERVED_BLOCK
+};
+
+struct kbase_gator_hwcnt_info {
+	/* Passed from Gator to kbase */
+
+	/* the bitmask of enabled hardware counters for each counter block */
+	uint16_t bitmask[4];
+
+	/* Passed from kbase to Gator */
+
+	/* ptr to counter dump memory */
+	void *kernel_dump_buffer;
+
+	/* size of counter dump memory */
+	uint32_t size;
+
+	/* the ID of the Mali device */
+	uint32_t gpu_id;
+
+	/* the number of shader cores in the GPU */
+	uint32_t nr_cores;
+
+	/* the number of core groups */
+	uint32_t nr_core_groups;
+
+	/* the memory layout of the performance counters */
+	enum hwc_type *hwc_layout;
+
+	/* the total number of hardware couter blocks */
+	uint32_t nr_hwc_blocks;
+};
+
+/**
+ * @brief Opaque block of Mali data which Gator needs to return to the API later.
+ */
+struct kbase_gator_hwcnt_handles;
+
+/**
+ * @brief Initialize the resources Gator needs for performance profiling.
+ *
+ * @param in_out_info   A pointer to a structure containing the enabled counters passed from Gator and all the Mali
+ *                      specific information that will be returned to Gator. On entry Gator must have populated the
+ *                      'bitmask' field with the counters it wishes to enable for each class of counter block.
+ *                      Each entry in the array corresponds to a single counter class based on the "hwc_type"
+ *                      enumeration, and each bit corresponds to an enable for 4 sequential counters (LSB enables
+ *                      the first 4 counters in the block, and so on). See the GPU counter array as returned by
+ *                      kbase_gator_hwcnt_get_names() for the index values of each counter for the curernt GPU.
+ *
+ * @return              Pointer to an opaque handle block on success, NULL on error.
+ */
+extern struct kbase_gator_hwcnt_handles *kbase_gator_hwcnt_init(struct kbase_gator_hwcnt_info *in_out_info);
+
+/**
+ * @brief Free all resources once Gator has finished using performance counters.
+ *
+ * @param in_out_info       A pointer to a structure containing the enabled counters passed from Gator and all the
+ *                          Mali specific information that will be returned to Gator.
+ * @param opaque_handles    A wrapper structure for kbase structures.
+ */
+extern void kbase_gator_hwcnt_term(struct kbase_gator_hwcnt_info *in_out_info, struct kbase_gator_hwcnt_handles *opaque_handles);
+
+/**
+ * @brief Poll whether a counter dump is successful.
+ *
+ * @param opaque_handles    A wrapper structure for kbase structures.
+ * @param[out] success      Non-zero on success, zero on failure.
+ *
+ * @return                  Zero if the dump is still pending, non-zero if the dump has completed. Note that a
+ *                          completed dump may not have dumped succesfully, so the caller must test for both
+ *                          a completed and successful dump before processing counters.
+ */
+extern uint32_t kbase_gator_instr_hwcnt_dump_complete(struct kbase_gator_hwcnt_handles *opaque_handles, uint32_t * const success);
+
+/**
+ * @brief Request the generation of a new counter dump.
+ *
+ * @param opaque_handles    A wrapper structure for kbase structures.
+ *
+ * @return                  Zero if the hardware device is busy and cannot handle the request, non-zero otherwise.
+ */
+extern uint32_t kbase_gator_instr_hwcnt_dump_irq(struct kbase_gator_hwcnt_handles *opaque_handles);
+
+/**
+ * @brief This function is used to fetch the names table based on the Mali device in use.
+ *
+ * @param[out] total_counters The total number of counters short names in the Mali devices' list.
+ *
+ * @return                    Pointer to an array of strings of length *total_counters.
+ */
+extern const char * const *kbase_gator_hwcnt_init_names(uint32_t *total_counters);
+
+/**
+ * @brief This function is used to terminate the use of the names table.
+ *
+ * This function must only be called if the initial call to kbase_gator_hwcnt_init_names returned a non-NULL value.
+ */
+extern void kbase_gator_hwcnt_term_names(void);
+
+#endif
--- a/drivers/gpu/arm/midgard_for_linux/mali_kbase_gator_hwcnt_names.h
+++ b/drivers/gpu/arm/midgard_for_linux/mali_kbase_gator_hwcnt_names.h
--- a/drivers/gpu/arm/midgard_for_linux/mali_kbase_gator_hwcnt_names_thex.h
+++ b/drivers/gpu/arm/midgard_for_linux/mali_kbase_gator_hwcnt_names_thex.h
@ -0,0 +1,291 @@
+/*
+ *
+ * (C) COPYRIGHT 2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/*
+ * This header was autogenerated, it should not be edited.
+ */
+
+#ifndef _KBASE_GATOR_HWCNT_NAMES_THEX_H_
+#define _KBASE_GATOR_HWCNT_NAMES_THEX_H_
+
+static const char * const hardware_counters_mali_tHEx[] = {
+	/* Performance counters for the Job Manager */
+	"",
+	"",
+	"",
+	"",
+	"THEx_MESSAGES_SENT",
+	"THEx_MESSAGES_RECEIVED",
+	"THEx_GPU_ACTIVE",
+	"THEx_IRQ_ACTIVE",
+	"THEx_JS0_JOBS",
+	"THEx_JS0_TASKS",
+	"THEx_JS0_ACTIVE",
+	"",
+	"THEx_JS0_WAIT_READ",
+	"THEx_JS0_WAIT_ISSUE",
+	"THEx_JS0_WAIT_DEPEND",
+	"THEx_JS0_WAIT_FINISH",
+	"THEx_JS1_JOBS",
+	"THEx_JS1_TASKS",
+	"THEx_JS1_ACTIVE",
+	"",
+	"THEx_JS1_WAIT_READ",
+	"THEx_JS1_WAIT_ISSUE",
+	"THEx_JS1_WAIT_DEPEND",
+	"THEx_JS1_WAIT_FINISH",
+	"THEx_JS2_JOBS",
+	"THEx_JS2_TASKS",
+	"THEx_JS2_ACTIVE",
+	"",
+	"THEx_JS2_WAIT_READ",
+	"THEx_JS2_WAIT_ISSUE",
+	"THEx_JS2_WAIT_DEPEND",
+	"THEx_JS2_WAIT_FINISH",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+
+	/* Performance counters for the Tiler */
+	"",
+	"",
+	"",
+	"",
+	"THEx_TILER_ACTIVE",
+	"THEx_JOBS_PROCESSED",
+	"THEx_TRIANGLES",
+	"THEx_LINES",
+	"THEx_POINTS",
+	"THEx_FRONT_FACING",
+	"THEx_BACK_FACING",
+	"THEx_PRIM_VISIBLE",
+	"THEx_PRIM_CULLED",
+	"THEx_PRIM_CLIPPED",
+	"THEx_PRIM_SAT_CULLED",
+	"",
+	"",
+	"THEx_BUS_READ",
+	"",
+	"THEx_BUS_WRITE",
+	"THEx_LOADING_DESC",
+	"THEx_IDVS_POS_SHAD_REQ",
+	"THEx_IDVS_POS_SHAD_WAIT",
+	"THEx_IDVS_POS_SHAD_STALL",
+	"THEx_IDVS_POS_FIFO_FULL",
+	"THEx_PREFETCH_STALL",
+	"THEx_VCACHE_HIT",
+	"THEx_VCACHE_MISS",
+	"THEx_VCACHE_LINE_WAIT",
+	"THEx_VFETCH_POS_READ_WAIT",
+	"THEx_VFETCH_VERTEX_WAIT",
+	"THEx_VFETCH_STALL",
+	"THEx_PRIMASSY_STALL",
+	"THEx_BBOX_GEN_STALL",
+	"THEx_IDVS_VBU_HIT",
+	"THEx_IDVS_VBU_MISS",
+	"THEx_IDVS_VBU_LINE_DEALLOCATE",
+	"THEx_IDVS_VAR_SHAD_REQ",
+	"THEx_IDVS_VAR_SHAD_STALL",
+	"THEx_BINNER_STALL",
+	"THEx_ITER_STALL",
+	"THEx_COMPRESS_MISS",
+	"THEx_COMPRESS_STALL",
+	"THEx_PCACHE_HIT",
+	"THEx_PCACHE_MISS",
+	"THEx_PCACHE_MISS_STALL",
+	"THEx_PCACHE_EVICT_STALL",
+	"THEx_PMGR_PTR_WR_STALL",
+	"THEx_PMGR_PTR_RD_STALL",
+	"THEx_PMGR_CMD_WR_STALL",
+	"THEx_WRBUF_ACTIVE",
+	"THEx_WRBUF_HIT",
+	"THEx_WRBUF_MISS",
+	"THEx_WRBUF_NO_FREE_LINE_STALL",
+	"THEx_WRBUF_NO_AXI_ID_STALL",
+	"THEx_WRBUF_AXI_STALL",
+	"",
+	"",
+	"",
+	"THEx_UTLB_TRANS",
+	"THEx_UTLB_TRANS_HIT",
+	"THEx_UTLB_TRANS_STALL",
+	"THEx_UTLB_TRANS_MISS_DELAY",
+	"THEx_UTLB_MMU_REQ",
+
+	/* Performance counters for the Shader Core */
+	"",
+	"",
+	"",
+	"",
+	"THEx_FRAG_ACTIVE",
+	"THEx_FRAG_PRIMITIVES",
+	"THEx_FRAG_PRIM_RAST",
+	"THEx_FRAG_FPK_ACTIVE",
+	"THEx_FRAG_STARVING",
+	"THEx_FRAG_WARPS",
+	"THEx_FRAG_PARTIAL_WARPS",
+	"THEx_FRAG_QUADS_RAST",
+	"THEx_FRAG_QUADS_EZS_TEST",
+	"THEx_FRAG_QUADS_EZS_UPDATE",
+	"THEx_FRAG_QUADS_EZS_KILL",
+	"THEx_FRAG_LZS_TEST",
+	"THEx_FRAG_LZS_KILL",
+	"",
+	"THEx_FRAG_PTILES",
+	"THEx_FRAG_TRANS_ELIM",
+	"THEx_QUAD_FPK_KILLER",
+	"",
+	"THEx_COMPUTE_ACTIVE",
+	"THEx_COMPUTE_TASKS",
+	"THEx_COMPUTE_WARPS",
+	"THEx_COMPUTE_STARVING",
+	"THEx_EXEC_CORE_ACTIVE",
+	"THEx_EXEC_ACTIVE",
+	"THEx_EXEC_INSTR_COUNT",
+	"THEx_EXEC_INSTR_DIVERGED",
+	"THEx_EXEC_INSTR_STARVING",
+	"THEx_ARITH_INSTR_SINGLE_FMA",
+	"THEx_ARITH_INSTR_DOUBLE",
+	"THEx_ARITH_INSTR_MSG",
+	"THEx_ARITH_INSTR_MSG_ONLY",
+	"THEx_TEX_INSTR",
+	"THEx_TEX_INSTR_MIPMAP",
+	"THEx_TEX_INSTR_COMPRESSED",
+	"THEx_TEX_INSTR_3D",
+	"THEx_TEX_INSTR_TRILINEAR",
+	"THEx_TEX_COORD_ISSUE",
+	"THEx_TEX_COORD_STALL",
+	"THEx_TEX_STARVE_CACHE",
+	"THEx_TEX_STARVE_FILTER",
+	"THEx_LS_MEM_READ_FULL",
+	"THEx_LS_MEM_READ_SHORT",
+	"THEx_LS_MEM_WRITE_FULL",
+	"THEx_LS_MEM_WRITE_SHORT",
+	"THEx_LS_MEM_ATOMIC",
+	"THEx_VARY_INSTR",
+	"THEx_VARY_SLOT_32",
+	"THEx_VARY_SLOT_16",
+	"THEx_ATTR_INSTR",
+	"THEx_ARITH_INSTR_FP_MUL",
+	"THEx_BEATS_RD_FTC",
+	"THEx_BEATS_RD_FTC_EXT",
+	"THEx_BEATS_RD_LSC",
+	"THEx_BEATS_RD_LSC_EXT",
+	"THEx_BEATS_RD_TEX",
+	"THEx_BEATS_RD_TEX_EXT",
+	"THEx_BEATS_RD_OTHER",
+	"THEx_BEATS_WR_LSC",
+	"THEx_BEATS_WR_TIB",
+	"",
+
+	/* Performance counters for the Memory System */
+	"",
+	"",
+	"",
+	"",
+	"THEx_MMU_REQUESTS",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"THEx_L2_RD_MSG_IN",
+	"THEx_L2_RD_MSG_IN_STALL",
+	"THEx_L2_WR_MSG_IN",
+	"THEx_L2_WR_MSG_IN_STALL",
+	"THEx_L2_SNP_MSG_IN",
+	"THEx_L2_SNP_MSG_IN_STALL",
+	"THEx_L2_RD_MSG_OUT",
+	"THEx_L2_RD_MSG_OUT_STALL",
+	"THEx_L2_WR_MSG_OUT",
+	"THEx_L2_ANY_LOOKUP",
+	"THEx_L2_READ_LOOKUP",
+	"THEx_L2_WRITE_LOOKUP",
+	"THEx_L2_EXT_SNOOP_LOOKUP",
+	"THEx_L2_EXT_READ",
+	"THEx_L2_EXT_READ_NOSNP",
+	"THEx_L2_EXT_READ_UNIQUE",
+	"THEx_L2_EXT_READ_BEATS",
+	"THEx_L2_EXT_AR_STALL",
+	"THEx_L2_EXT_AR_CNT_Q1",
+	"THEx_L2_EXT_AR_CNT_Q2",
+	"THEx_L2_EXT_AR_CNT_Q3",
+	"THEx_L2_EXT_RRESP_0_127",
+	"THEx_L2_EXT_RRESP_128_191",
+	"THEx_L2_EXT_RRESP_192_255",
+	"THEx_L2_EXT_RRESP_256_319",
+	"THEx_L2_EXT_RRESP_320_383",
+	"THEx_L2_EXT_WRITE",
+	"THEx_L2_EXT_WRITE_NOSNP_FULL",
+	"THEx_L2_EXT_WRITE_NOSNP_PTL",
+	"THEx_L2_EXT_WRITE_SNP_FULL",
+	"THEx_L2_EXT_WRITE_SNP_PTL",
+	"THEx_L2_EXT_WRITE_BEATS",
+	"THEx_L2_EXT_W_STALL",
+	"THEx_L2_EXT_AW_CNT_Q1",
+	"THEx_L2_EXT_AW_CNT_Q2",
+	"THEx_L2_EXT_AW_CNT_Q3",
+	"THEx_L2_EXT_SNOOP",
+	"THEx_L2_EXT_SNOOP_STALL",
+	"THEx_L2_EXT_SNOOP_RESP_CLEAN",
+	"THEx_L2_EXT_SNOOP_RESP_DATA",
+	"THEx_L2_EXT_SNOOP_INTERNAL",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+};
+
+#endif /* _KBASE_GATOR_HWCNT_NAMES_THEX_H_ */
--- a/drivers/gpu/arm/midgard_for_linux/mali_kbase_gator_hwcnt_names_tmix.h
+++ b/drivers/gpu/arm/midgard_for_linux/mali_kbase_gator_hwcnt_names_tmix.h
@ -0,0 +1,291 @@
+/*
+ *
+ * (C) COPYRIGHT 2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/*
+ * This header was autogenerated, it should not be edited.
+ */
+
+#ifndef _KBASE_GATOR_HWCNT_NAMES_TMIX_H_
+#define _KBASE_GATOR_HWCNT_NAMES_TMIX_H_
+
+static const char * const hardware_counters_mali_tMIx[] = {
+	/* Performance counters for the Job Manager */
+	"",
+	"",
+	"",
+	"",
+	"TMIx_MESSAGES_SENT",
+	"TMIx_MESSAGES_RECEIVED",
+	"TMIx_GPU_ACTIVE",
+	"TMIx_IRQ_ACTIVE",
+	"TMIx_JS0_JOBS",
+	"TMIx_JS0_TASKS",
+	"TMIx_JS0_ACTIVE",
+	"",
+	"TMIx_JS0_WAIT_READ",
+	"TMIx_JS0_WAIT_ISSUE",
+	"TMIx_JS0_WAIT_DEPEND",
+	"TMIx_JS0_WAIT_FINISH",
+	"TMIx_JS1_JOBS",
+	"TMIx_JS1_TASKS",
+	"TMIx_JS1_ACTIVE",
+	"",
+	"TMIx_JS1_WAIT_READ",
+	"TMIx_JS1_WAIT_ISSUE",
+	"TMIx_JS1_WAIT_DEPEND",
+	"TMIx_JS1_WAIT_FINISH",
+	"TMIx_JS2_JOBS",
+	"TMIx_JS2_TASKS",
+	"TMIx_JS2_ACTIVE",
+	"",
+	"TMIx_JS2_WAIT_READ",
+	"TMIx_JS2_WAIT_ISSUE",
+	"TMIx_JS2_WAIT_DEPEND",
+	"TMIx_JS2_WAIT_FINISH",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+
+	/* Performance counters for the Tiler */
+	"",
+	"",
+	"",
+	"",
+	"TMIx_TILER_ACTIVE",
+	"TMIx_JOBS_PROCESSED",
+	"TMIx_TRIANGLES",
+	"TMIx_LINES",
+	"TMIx_POINTS",
+	"TMIx_FRONT_FACING",
+	"TMIx_BACK_FACING",
+	"TMIx_PRIM_VISIBLE",
+	"TMIx_PRIM_CULLED",
+	"TMIx_PRIM_CLIPPED",
+	"TMIx_PRIM_SAT_CULLED",
+	"",
+	"",
+	"TMIx_BUS_READ",
+	"",
+	"TMIx_BUS_WRITE",
+	"TMIx_LOADING_DESC",
+	"TMIx_IDVS_POS_SHAD_REQ",
+	"TMIx_IDVS_POS_SHAD_WAIT",
+	"TMIx_IDVS_POS_SHAD_STALL",
+	"TMIx_IDVS_POS_FIFO_FULL",
+	"TMIx_PREFETCH_STALL",
+	"TMIx_VCACHE_HIT",
+	"TMIx_VCACHE_MISS",
+	"TMIx_VCACHE_LINE_WAIT",
+	"TMIx_VFETCH_POS_READ_WAIT",
+	"TMIx_VFETCH_VERTEX_WAIT",
+	"TMIx_VFETCH_STALL",
+	"TMIx_PRIMASSY_STALL",
+	"TMIx_BBOX_GEN_STALL",
+	"TMIx_IDVS_VBU_HIT",
+	"TMIx_IDVS_VBU_MISS",
+	"TMIx_IDVS_VBU_LINE_DEALLOCATE",
+	"TMIx_IDVS_VAR_SHAD_REQ",
+	"TMIx_IDVS_VAR_SHAD_STALL",
+	"TMIx_BINNER_STALL",
+	"TMIx_ITER_STALL",
+	"TMIx_COMPRESS_MISS",
+	"TMIx_COMPRESS_STALL",
+	"TMIx_PCACHE_HIT",
+	"TMIx_PCACHE_MISS",
+	"TMIx_PCACHE_MISS_STALL",
+	"TMIx_PCACHE_EVICT_STALL",
+	"TMIx_PMGR_PTR_WR_STALL",
+	"TMIx_PMGR_PTR_RD_STALL",
+	"TMIx_PMGR_CMD_WR_STALL",
+	"TMIx_WRBUF_ACTIVE",
+	"TMIx_WRBUF_HIT",
+	"TMIx_WRBUF_MISS",
+	"TMIx_WRBUF_NO_FREE_LINE_STALL",
+	"TMIx_WRBUF_NO_AXI_ID_STALL",
+	"TMIx_WRBUF_AXI_STALL",
+	"",
+	"",
+	"",
+	"TMIx_UTLB_TRANS",
+	"TMIx_UTLB_TRANS_HIT",
+	"TMIx_UTLB_TRANS_STALL",
+	"TMIx_UTLB_TRANS_MISS_DELAY",
+	"TMIx_UTLB_MMU_REQ",
+
+	/* Performance counters for the Shader Core */
+	"",
+	"",
+	"",
+	"",
+	"TMIx_FRAG_ACTIVE",
+	"TMIx_FRAG_PRIMITIVES",
+	"TMIx_FRAG_PRIM_RAST",
+	"TMIx_FRAG_FPK_ACTIVE",
+	"TMIx_FRAG_STARVING",
+	"TMIx_FRAG_WARPS",
+	"TMIx_FRAG_PARTIAL_WARPS",
+	"TMIx_FRAG_QUADS_RAST",
+	"TMIx_FRAG_QUADS_EZS_TEST",
+	"TMIx_FRAG_QUADS_EZS_UPDATE",
+	"TMIx_FRAG_QUADS_EZS_KILL",
+	"TMIx_FRAG_LZS_TEST",
+	"TMIx_FRAG_LZS_KILL",
+	"",
+	"TMIx_FRAG_PTILES",
+	"TMIx_FRAG_TRANS_ELIM",
+	"TMIx_QUAD_FPK_KILLER",
+	"",
+	"TMIx_COMPUTE_ACTIVE",
+	"TMIx_COMPUTE_TASKS",
+	"TMIx_COMPUTE_WARPS",
+	"TMIx_COMPUTE_STARVING",
+	"TMIx_EXEC_CORE_ACTIVE",
+	"TMIx_EXEC_ACTIVE",
+	"TMIx_EXEC_INSTR_COUNT",
+	"TMIx_EXEC_INSTR_DIVERGED",
+	"TMIx_EXEC_INSTR_STARVING",
+	"TMIx_ARITH_INSTR_SINGLE_FMA",
+	"TMIx_ARITH_INSTR_DOUBLE",
+	"TMIx_ARITH_INSTR_MSG",
+	"TMIx_ARITH_INSTR_MSG_ONLY",
+	"TMIx_TEX_INSTR",
+	"TMIx_TEX_INSTR_MIPMAP",
+	"TMIx_TEX_INSTR_COMPRESSED",
+	"TMIx_TEX_INSTR_3D",
+	"TMIx_TEX_INSTR_TRILINEAR",
+	"TMIx_TEX_COORD_ISSUE",
+	"TMIx_TEX_COORD_STALL",
+	"TMIx_TEX_STARVE_CACHE",
+	"TMIx_TEX_STARVE_FILTER",
+	"TMIx_LS_MEM_READ_FULL",
+	"TMIx_LS_MEM_READ_SHORT",
+	"TMIx_LS_MEM_WRITE_FULL",
+	"TMIx_LS_MEM_WRITE_SHORT",
+	"TMIx_LS_MEM_ATOMIC",
+	"TMIx_VARY_INSTR",
+	"TMIx_VARY_SLOT_32",
+	"TMIx_VARY_SLOT_16",
+	"TMIx_ATTR_INSTR",
+	"TMIx_ARITH_INSTR_FP_MUL",
+	"TMIx_BEATS_RD_FTC",
+	"TMIx_BEATS_RD_FTC_EXT",
+	"TMIx_BEATS_RD_LSC",
+	"TMIx_BEATS_RD_LSC_EXT",
+	"TMIx_BEATS_RD_TEX",
+	"TMIx_BEATS_RD_TEX_EXT",
+	"TMIx_BEATS_RD_OTHER",
+	"TMIx_BEATS_WR_LSC",
+	"TMIx_BEATS_WR_TIB",
+	"",
+
+	/* Performance counters for the Memory System */
+	"",
+	"",
+	"",
+	"",
+	"TMIx_MMU_REQUESTS",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"TMIx_L2_RD_MSG_IN",
+	"TMIx_L2_RD_MSG_IN_STALL",
+	"TMIx_L2_WR_MSG_IN",
+	"TMIx_L2_WR_MSG_IN_STALL",
+	"TMIx_L2_SNP_MSG_IN",
+	"TMIx_L2_SNP_MSG_IN_STALL",
+	"TMIx_L2_RD_MSG_OUT",
+	"TMIx_L2_RD_MSG_OUT_STALL",
+	"TMIx_L2_WR_MSG_OUT",
+	"TMIx_L2_ANY_LOOKUP",
+	"TMIx_L2_READ_LOOKUP",
+	"TMIx_L2_WRITE_LOOKUP",
+	"TMIx_L2_EXT_SNOOP_LOOKUP",
+	"TMIx_L2_EXT_READ",
+	"TMIx_L2_EXT_READ_NOSNP",
+	"TMIx_L2_EXT_READ_UNIQUE",
+	"TMIx_L2_EXT_READ_BEATS",
+	"TMIx_L2_EXT_AR_STALL",
+	"TMIx_L2_EXT_AR_CNT_Q1",
+	"TMIx_L2_EXT_AR_CNT_Q2",
+	"TMIx_L2_EXT_AR_CNT_Q3",
+	"TMIx_L2_EXT_RRESP_0_127",
+	"TMIx_L2_EXT_RRESP_128_191",
+	"TMIx_L2_EXT_RRESP_192_255",
+	"TMIx_L2_EXT_RRESP_256_319",
+	"TMIx_L2_EXT_RRESP_320_383",
+	"TMIx_L2_EXT_WRITE",
+	"TMIx_L2_EXT_WRITE_NOSNP_FULL",
+	"TMIx_L2_EXT_WRITE_NOSNP_PTL",
+	"TMIx_L2_EXT_WRITE_SNP_FULL",
+	"TMIx_L2_EXT_WRITE_SNP_PTL",
+	"TMIx_L2_EXT_WRITE_BEATS",
+	"TMIx_L2_EXT_W_STALL",
+	"TMIx_L2_EXT_AW_CNT_Q1",
+	"TMIx_L2_EXT_AW_CNT_Q2",
+	"TMIx_L2_EXT_AW_CNT_Q3",
+	"TMIx_L2_EXT_SNOOP",
+	"TMIx_L2_EXT_SNOOP_STALL",
+	"TMIx_L2_EXT_SNOOP_RESP_CLEAN",
+	"TMIx_L2_EXT_SNOOP_RESP_DATA",
+	"TMIx_L2_EXT_SNOOP_INTERNAL",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+};
+
+#endif /* _KBASE_GATOR_HWCNT_NAMES_TMIX_H_ */
--- a/drivers/gpu/arm/midgard_for_linux/mali_kbase_gpu_id.h
+++ b/drivers/gpu/arm/midgard_for_linux/mali_kbase_gpu_id.h
@ -0,0 +1,113 @@
+/*
+ *
+ * (C) COPYRIGHT 2015-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+#ifndef _KBASE_GPU_ID_H_
+#define _KBASE_GPU_ID_H_
+
+/* GPU_ID register */
+#define GPU_ID_VERSION_STATUS_SHIFT       0
+#define GPU_ID_VERSION_MINOR_SHIFT        4
+#define GPU_ID_VERSION_MAJOR_SHIFT        12
+#define GPU_ID_VERSION_PRODUCT_ID_SHIFT   16
+#define GPU_ID_VERSION_STATUS             (0xF  << GPU_ID_VERSION_STATUS_SHIFT)
+#define GPU_ID_VERSION_MINOR              (0xFF << GPU_ID_VERSION_MINOR_SHIFT)
+#define GPU_ID_VERSION_MAJOR              (0xF  << GPU_ID_VERSION_MAJOR_SHIFT)
+#define GPU_ID_VERSION_PRODUCT_ID  (0xFFFF << GPU_ID_VERSION_PRODUCT_ID_SHIFT)
+
+/* Values for GPU_ID_VERSION_PRODUCT_ID bitfield */
+#define GPU_ID_PI_T60X                    0x6956
+#define GPU_ID_PI_T62X                    0x0620
+#define GPU_ID_PI_T76X                    0x0750
+#define GPU_ID_PI_T72X                    0x0720
+#define GPU_ID_PI_TFRX                    0x0880
+#define GPU_ID_PI_T86X                    0x0860
+#define GPU_ID_PI_T82X                    0x0820
+#define GPU_ID_PI_T83X                    0x0830
+
+/* New GPU ID format when PRODUCT_ID is >= 0x1000 (and not 0x6956) */
+#define GPU_ID_PI_NEW_FORMAT_START        0x1000
+#define GPU_ID_IS_NEW_FORMAT(product_id)  ((product_id) != GPU_ID_PI_T60X && \
+						(product_id) >= \
+						GPU_ID_PI_NEW_FORMAT_START)
+
+#define GPU_ID2_VERSION_STATUS_SHIFT      0
+#define GPU_ID2_VERSION_MINOR_SHIFT       4
+#define GPU_ID2_VERSION_MAJOR_SHIFT       12
+#define GPU_ID2_PRODUCT_MAJOR_SHIFT       16
+#define GPU_ID2_ARCH_REV_SHIFT            20
+#define GPU_ID2_ARCH_MINOR_SHIFT          24
+#define GPU_ID2_ARCH_MAJOR_SHIFT          28
+#define GPU_ID2_VERSION_STATUS            (0xF << GPU_ID2_VERSION_STATUS_SHIFT)
+#define GPU_ID2_VERSION_MINOR             (0xFF << GPU_ID2_VERSION_MINOR_SHIFT)
+#define GPU_ID2_VERSION_MAJOR             (0xF << GPU_ID2_VERSION_MAJOR_SHIFT)
+#define GPU_ID2_PRODUCT_MAJOR             (0xF << GPU_ID2_PRODUCT_MAJOR_SHIFT)
+#define GPU_ID2_ARCH_REV                  (0xF << GPU_ID2_ARCH_REV_SHIFT)
+#define GPU_ID2_ARCH_MINOR                (0xF << GPU_ID2_ARCH_MINOR_SHIFT)
+#define GPU_ID2_ARCH_MAJOR                (0xF << GPU_ID2_ARCH_MAJOR_SHIFT)
+#define GPU_ID2_PRODUCT_MODEL  (GPU_ID2_ARCH_MAJOR | GPU_ID2_PRODUCT_MAJOR)
+
+/* Helper macro to create a partial GPU_ID (new format) that defines
+   a product ignoring its version. */
+#define GPU_ID2_PRODUCT_MAKE(arch_major, arch_minor, arch_rev, product_major) \
+		(((arch_major) << GPU_ID2_ARCH_MAJOR_SHIFT)  | \
+		 ((arch_minor) << GPU_ID2_ARCH_MINOR_SHIFT)  | \
+		 ((arch_rev) << GPU_ID2_ARCH_REV_SHIFT)      | \
+		 ((product_major) << GPU_ID2_PRODUCT_MAJOR_SHIFT))
+
+/* Helper macro to create a partial GPU_ID (new format) that specifies the
+   revision (major, minor, status) of a product */
+#define GPU_ID2_VERSION_MAKE(version_major, version_minor, version_status) \
+		(((version_major) << GPU_ID2_VERSION_MAJOR_SHIFT)  | \
+		 ((version_minor) << GPU_ID2_VERSION_MINOR_SHIFT)  | \
+		 ((version_status) << GPU_ID2_VERSION_STATUS_SHIFT))
+
+/* Helper macro to create a complete GPU_ID (new format) */
+#define GPU_ID2_MAKE(arch_major, arch_minor, arch_rev, product_major, \
+	version_major, version_minor, version_status) \
+		(GPU_ID2_PRODUCT_MAKE(arch_major, arch_minor, arch_rev, \
+			product_major) | \
+		 GPU_ID2_VERSION_MAKE(version_major, version_minor,     \
+			version_status))
+
+/* Helper macro to create a partial GPU_ID (new format) that identifies
+   a particular GPU model by its arch_major and product_major. */
+#define GPU_ID2_MODEL_MAKE(arch_major, product_major) \
+		(((arch_major) << GPU_ID2_ARCH_MAJOR_SHIFT)  | \
+		((product_major) << GPU_ID2_PRODUCT_MAJOR_SHIFT))
+
+/* Strip off the non-relevant bits from a product_id value and make it suitable
+   for comparison against the GPU_ID2_PRODUCT_xxx values which identify a GPU
+   model. */
+#define GPU_ID2_MODEL_MATCH_VALUE(product_id) \
+		(((product_id) << GPU_ID2_PRODUCT_MAJOR_SHIFT) & \
+		    GPU_ID2_PRODUCT_MODEL)
+
+#define GPU_ID2_PRODUCT_TMIX              GPU_ID2_MODEL_MAKE(6, 0)
+#define GPU_ID2_PRODUCT_THEX              GPU_ID2_MODEL_MAKE(6, 1)
+
+/* Values for GPU_ID_VERSION_STATUS field for PRODUCT_ID GPU_ID_PI_T60X */
+#define GPU_ID_S_15DEV0                   0x1
+#define GPU_ID_S_EAC                      0x2
+
+/* Helper macro to create a GPU_ID assuming valid values for id, major,
+   minor, status */
+#define GPU_ID_MAKE(id, major, minor, status) \
+		(((id) << GPU_ID_VERSION_PRODUCT_ID_SHIFT) | \
+		((major) << GPU_ID_VERSION_MAJOR_SHIFT) |   \
+		((minor) << GPU_ID_VERSION_MINOR_SHIFT) |   \
+		((status) << GPU_ID_VERSION_STATUS_SHIFT))
+
+#endif /* _KBASE_GPU_ID_H_ */
--- a/drivers/gpu/arm/midgard_for_linux/mali_kbase_gpu_memory_debugfs.c
+++ b/drivers/gpu/arm/midgard_for_linux/mali_kbase_gpu_memory_debugfs.c
@ -0,0 +1,97 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#include <mali_kbase.h>
+
+#ifdef CONFIG_DEBUG_FS
+/** Show callback for the @c gpu_memory debugfs file.
+ *
+ * This function is called to get the contents of the @c gpu_memory debugfs
+ * file. This is a report of current gpu memory usage.
+ *
+ * @param sfile The debugfs entry
+ * @param data Data associated with the entry
+ *
+ * @return 0 if successfully prints data in debugfs entry file
+ *         -1 if it encountered an error
+ */
+
+static int kbasep_gpu_memory_seq_show(struct seq_file *sfile, void *data)
+{
+	struct list_head *entry;
+	const struct list_head *kbdev_list;
+
+	kbdev_list = kbase_dev_list_get();
+	list_for_each(entry, kbdev_list) {
+		struct kbase_device *kbdev = NULL;
+		struct kbasep_kctx_list_element *element;
+
+		kbdev = list_entry(entry, struct kbase_device, entry);
+		/* output the total memory usage and cap for this device */
+		seq_printf(sfile, "%-16s  %10u\n",
+				kbdev->devname,
+				atomic_read(&(kbdev->memdev.used_pages)));
+		mutex_lock(&kbdev->kctx_list_lock);
+		list_for_each_entry(element, &kbdev->kctx_list, link) {
+			/* output the memory usage and cap for each kctx
+			* opened on this device */
+			seq_printf(sfile, "  %s-0x%p %10u\n",
+				"kctx",
+				element->kctx,
+				atomic_read(&(element->kctx->used_pages)));
+		}
+		mutex_unlock(&kbdev->kctx_list_lock);
+	}
+	kbase_dev_list_put(kbdev_list);
+	return 0;
+}
+
+/*
+ *  File operations related to debugfs entry for gpu_memory
+ */
+static int kbasep_gpu_memory_debugfs_open(struct inode *in, struct file *file)
+{
+	return single_open(file, kbasep_gpu_memory_seq_show , NULL);
+}
+
+static const struct file_operations kbasep_gpu_memory_debugfs_fops = {
+	.open = kbasep_gpu_memory_debugfs_open,
+	.read = seq_read,
+	.llseek = seq_lseek,
+	.release = single_release,
+};
+
+/*
+ *  Initialize debugfs entry for gpu_memory
+ */
+void kbasep_gpu_memory_debugfs_init(struct kbase_device *kbdev)
+{
+	debugfs_create_file("gpu_memory", S_IRUGO,
+			kbdev->mali_debugfs_directory, NULL,
+			&kbasep_gpu_memory_debugfs_fops);
+	return;
+}
+
+#else
+/*
+ * Stub functions for when debugfs is disabled
+ */
+void kbasep_gpu_memory_debugfs_init(struct kbase_device *kbdev)
+{
+	return;
+}
+#endif
--- a/drivers/gpu/arm/midgard_for_linux/mali_kbase_gpu_memory_debugfs.h
+++ b/drivers/gpu/arm/midgard_for_linux/mali_kbase_gpu_memory_debugfs.h
@ -0,0 +1,37 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2014, 2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file mali_kbase_gpu_memory_debugfs.h
+ * Header file for gpu_memory entry in debugfs
+ *
+ */
+
+#ifndef _KBASE_GPU_MEMORY_DEBUGFS_H
+#define _KBASE_GPU_MEMORY_DEBUGFS_H
+
+#include <linux/debugfs.h>
+#include <linux/seq_file.h>
+
+/**
+ * @brief Initialize gpu_memory debugfs entry
+ */
+void kbasep_gpu_memory_debugfs_init(struct kbase_device *kbdev);
+
+#endif  /*_KBASE_GPU_MEMORY_DEBUGFS_H*/
--- a/drivers/gpu/arm/midgard_for_linux/mali_kbase_gpuprops.c
+++ b/drivers/gpu/arm/midgard_for_linux/mali_kbase_gpuprops.c
@ -0,0 +1,314 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/*
+ * Base kernel property query APIs
+ */
+
+#include <mali_kbase.h>
+#include <mali_midg_regmap.h>
+#include <mali_kbase_gpuprops.h>
+#include <mali_kbase_config_defaults.h>
+#include <mali_kbase_hwaccess_gpuprops.h>
+#include <linux/clk.h>
+
+/**
+ * KBASE_UBFX32 - Extracts bits from a 32-bit bitfield.
+ * @value:  The value from which to extract bits.
+ * @offset: The first bit to extract (0 being the LSB).
+ * @size:   The number of bits to extract.
+ *
+ * Context: @offset + @size <= 32.
+ *
+ * Return: Bits [@offset, @offset + @size) from @value.
+ */
+/* from mali_cdsb.h */
+#define KBASE_UBFX32(value, offset, size) \
+	(((u32)(value) >> (u32)(offset)) & (u32)((1ULL << (u32)(size)) - 1))
+
+int kbase_gpuprops_uk_get_props(struct kbase_context *kctx, struct kbase_uk_gpuprops * const kbase_props)
+{
+	kbase_gpu_clk_speed_func get_gpu_speed_mhz;
+	u32 gpu_speed_mhz;
+	int rc = 1;
+
+	KBASE_DEBUG_ASSERT(NULL != kctx);
+	KBASE_DEBUG_ASSERT(NULL != kbase_props);
+
+	/* Current GPU speed is requested from the system integrator via the GPU_SPEED_FUNC function.
+	 * If that function fails, or the function is not provided by the system integrator, we report the maximum
+	 * GPU speed as specified by GPU_FREQ_KHZ_MAX.
+	 */
+	get_gpu_speed_mhz = (kbase_gpu_clk_speed_func) GPU_SPEED_FUNC;
+	if (get_gpu_speed_mhz != NULL) {
+		rc = get_gpu_speed_mhz(&gpu_speed_mhz);
+#ifdef CONFIG_MALI_DEBUG
+		/* Issue a warning message when the reported GPU speed falls outside the min/max range */
+		if (rc == 0) {
+			u32 gpu_speed_khz = gpu_speed_mhz * 1000;
+
+			if (gpu_speed_khz < kctx->kbdev->gpu_props.props.core_props.gpu_freq_khz_min ||
+					gpu_speed_khz > kctx->kbdev->gpu_props.props.core_props.gpu_freq_khz_max)
+				dev_warn(kctx->kbdev->dev, "GPU Speed is outside of min/max range (got %lu Khz, min %lu Khz, max %lu Khz)\n",
+						(unsigned long)gpu_speed_khz,
+						(unsigned long)kctx->kbdev->gpu_props.props.core_props.gpu_freq_khz_min,
+						(unsigned long)kctx->kbdev->gpu_props.props.core_props.gpu_freq_khz_max);
+		}
+#endif				/* CONFIG_MALI_DEBUG */
+	}
+	if (kctx->kbdev->clock) {
+		gpu_speed_mhz = clk_get_rate(kctx->kbdev->clock) / 1000000;
+		rc = 0;
+	}
+	if (rc != 0)
+		gpu_speed_mhz = kctx->kbdev->gpu_props.props.core_props.gpu_freq_khz_max / 1000;
+
+	kctx->kbdev->gpu_props.props.core_props.gpu_speed_mhz = gpu_speed_mhz;
+
+	memcpy(&kbase_props->props, &kctx->kbdev->gpu_props.props, sizeof(kbase_props->props));
+
+	/* Before API 8.2 they expect L3 cache info here, which was always 0 */
+	if (kctx->api_version < KBASE_API_VERSION(8, 2))
+		kbase_props->props.raw_props.suspend_size = 0;
+
+	return 0;
+}
+
+static void kbase_gpuprops_construct_coherent_groups(base_gpu_props * const props)
+{
+	struct mali_base_gpu_coherent_group *current_group;
+	u64 group_present;
+	u64 group_mask;
+	u64 first_set, first_set_prev;
+	u32 num_groups = 0;
+
+	KBASE_DEBUG_ASSERT(NULL != props);
+
+	props->coherency_info.coherency = props->raw_props.mem_features;
+	props->coherency_info.num_core_groups = hweight64(props->raw_props.l2_present);
+
+	if (props->coherency_info.coherency & GROUPS_L2_COHERENT) {
+		/* Group is l2 coherent */
+		group_present = props->raw_props.l2_present;
+	} else {
+		/* Group is l1 coherent */
+		group_present = props->raw_props.shader_present;
+	}
+
+	/*
+	 * The coherent group mask can be computed from the l2 present
+	 * register.
+	 *
+	 * For the coherent group n:
+	 * group_mask[n] = (first_set[n] - 1) & ~(first_set[n-1] - 1)
+	 * where first_set is group_present with only its nth set-bit kept
+	 * (i.e. the position from where a new group starts).
+	 *
+	 * For instance if the groups are l2 coherent and l2_present=0x0..01111:
+	 * The first mask is:
+	 * group_mask[1] = (first_set[1] - 1) & ~(first_set[0] - 1)
+	 *               = (0x0..010     - 1) & ~(0x0..01      - 1)
+	 *               =  0x0..00f
+	 * The second mask is:
+	 * group_mask[2] = (first_set[2] - 1) & ~(first_set[1] - 1)
+	 *               = (0x0..100     - 1) & ~(0x0..010     - 1)
+	 *               =  0x0..0f0
+	 * And so on until all the bits from group_present have been cleared
+	 * (i.e. there is no group left).
+	 */
+
+	current_group = props->coherency_info.group;
+	first_set = group_present & ~(group_present - 1);
+
+	while (group_present != 0 && num_groups < BASE_MAX_COHERENT_GROUPS) {
+		group_present -= first_set;	/* Clear the current group bit */
+		first_set_prev = first_set;
+
+		first_set = group_present & ~(group_present - 1);
+		group_mask = (first_set - 1) & ~(first_set_prev - 1);
+
+		/* Populate the coherent_group structure for each group */
+		current_group->core_mask = group_mask & props->raw_props.shader_present;
+		current_group->num_cores = hweight64(current_group->core_mask);
+
+		num_groups++;
+		current_group++;
+	}
+
+	if (group_present != 0)
+		pr_warn("Too many coherent groups (keeping only %d groups).\n", BASE_MAX_COHERENT_GROUPS);
+
+	props->coherency_info.num_groups = num_groups;
+}
+
+/**
+ * kbase_gpuprops_get_props - Get the GPU configuration
+ * @gpu_props: The &base_gpu_props structure
+ * @kbdev: The &struct kbase_device structure for the device
+ *
+ * Fill the &base_gpu_props structure with values from the GPU configuration
+ * registers. Only the raw properties are filled in this function
+ */
+static void kbase_gpuprops_get_props(base_gpu_props * const gpu_props, struct kbase_device *kbdev)
+{
+	struct kbase_gpuprops_regdump regdump;
+	int i;
+
+	KBASE_DEBUG_ASSERT(NULL != kbdev);
+	KBASE_DEBUG_ASSERT(NULL != gpu_props);
+
+	/* Dump relevant registers */
+	kbase_backend_gpuprops_get(kbdev, &regdump);
+
+	gpu_props->raw_props.gpu_id = regdump.gpu_id;
+	gpu_props->raw_props.tiler_features = regdump.tiler_features;
+	gpu_props->raw_props.mem_features = regdump.mem_features;
+	gpu_props->raw_props.mmu_features = regdump.mmu_features;
+	gpu_props->raw_props.l2_features = regdump.l2_features;
+	gpu_props->raw_props.suspend_size = regdump.suspend_size;
+
+	gpu_props->raw_props.as_present = regdump.as_present;
+	gpu_props->raw_props.js_present = regdump.js_present;
+	gpu_props->raw_props.shader_present = ((u64) regdump.shader_present_hi << 32) + regdump.shader_present_lo;
+	gpu_props->raw_props.tiler_present = ((u64) regdump.tiler_present_hi << 32) + regdump.tiler_present_lo;
+	gpu_props->raw_props.l2_present = ((u64) regdump.l2_present_hi << 32) + regdump.l2_present_lo;
+
+	for (i = 0; i < GPU_MAX_JOB_SLOTS; i++)
+		gpu_props->raw_props.js_features[i] = regdump.js_features[i];
+
+	for (i = 0; i < BASE_GPU_NUM_TEXTURE_FEATURES_REGISTERS; i++)
+		gpu_props->raw_props.texture_features[i] = regdump.texture_features[i];
+
+	gpu_props->raw_props.thread_max_barrier_size = regdump.thread_max_barrier_size;
+	gpu_props->raw_props.thread_max_threads = regdump.thread_max_threads;
+	gpu_props->raw_props.thread_max_workgroup_size = regdump.thread_max_workgroup_size;
+	gpu_props->raw_props.thread_features = regdump.thread_features;
+}
+
+/**
+ * kbase_gpuprops_calculate_props - Calculate the derived properties
+ * @gpu_props: The &base_gpu_props structure
+ * @kbdev:     The &struct kbase_device structure for the device
+ *
+ * Fill the &base_gpu_props structure with values derived from the GPU
+ * configuration registers
+ */
+static void kbase_gpuprops_calculate_props(base_gpu_props * const gpu_props, struct kbase_device *kbdev)
+{
+	int i;
+
+	/* Populate the base_gpu_props structure */
+	gpu_props->core_props.version_status = KBASE_UBFX32(gpu_props->raw_props.gpu_id, 0U, 4);
+	gpu_props->core_props.minor_revision = KBASE_UBFX32(gpu_props->raw_props.gpu_id, 4U, 8);
+	gpu_props->core_props.major_revision = KBASE_UBFX32(gpu_props->raw_props.gpu_id, 12U, 4);
+	gpu_props->core_props.product_id = KBASE_UBFX32(gpu_props->raw_props.gpu_id, 16U, 16);
+	gpu_props->core_props.log2_program_counter_size = KBASE_GPU_PC_SIZE_LOG2;
+	gpu_props->core_props.gpu_available_memory_size = totalram_pages << PAGE_SHIFT;
+
+	for (i = 0; i < BASE_GPU_NUM_TEXTURE_FEATURES_REGISTERS; i++)
+		gpu_props->core_props.texture_features[i] = gpu_props->raw_props.texture_features[i];
+
+	gpu_props->l2_props.log2_line_size = KBASE_UBFX32(gpu_props->raw_props.l2_features, 0U, 8);
+	gpu_props->l2_props.log2_cache_size = KBASE_UBFX32(gpu_props->raw_props.l2_features, 16U, 8);
+
+	/* Field with number of l2 slices is added to MEM_FEATURES register
+	 * since t76x. Below code assumes that for older GPU reserved bits will
+	 * be read as zero. */
+	gpu_props->l2_props.num_l2_slices =
+		KBASE_UBFX32(gpu_props->raw_props.mem_features, 8U, 4) + 1;
+
+	gpu_props->tiler_props.bin_size_bytes = 1 << KBASE_UBFX32(gpu_props->raw_props.tiler_features, 0U, 6);
+	gpu_props->tiler_props.max_active_levels = KBASE_UBFX32(gpu_props->raw_props.tiler_features, 8U, 4);
+
+	if (gpu_props->raw_props.thread_max_threads == 0)
+		gpu_props->thread_props.max_threads = THREAD_MT_DEFAULT;
+	else
+		gpu_props->thread_props.max_threads = gpu_props->raw_props.thread_max_threads;
+
+	if (gpu_props->raw_props.thread_max_workgroup_size == 0)
+		gpu_props->thread_props.max_workgroup_size = THREAD_MWS_DEFAULT;
+	else
+		gpu_props->thread_props.max_workgroup_size = gpu_props->raw_props.thread_max_workgroup_size;
+
+	if (gpu_props->raw_props.thread_max_barrier_size == 0)
+		gpu_props->thread_props.max_barrier_size = THREAD_MBS_DEFAULT;
+	else
+		gpu_props->thread_props.max_barrier_size = gpu_props->raw_props.thread_max_barrier_size;
+
+	gpu_props->thread_props.max_registers = KBASE_UBFX32(gpu_props->raw_props.thread_features, 0U, 16);
+	gpu_props->thread_props.max_task_queue = KBASE_UBFX32(gpu_props->raw_props.thread_features, 16U, 8);
+	gpu_props->thread_props.max_thread_group_split = KBASE_UBFX32(gpu_props->raw_props.thread_features, 24U, 6);
+	gpu_props->thread_props.impl_tech = KBASE_UBFX32(gpu_props->raw_props.thread_features, 30U, 2);
+
+	/* If values are not specified, then use defaults */
+	if (gpu_props->thread_props.max_registers == 0) {
+		gpu_props->thread_props.max_registers = THREAD_MR_DEFAULT;
+		gpu_props->thread_props.max_task_queue = THREAD_MTQ_DEFAULT;
+		gpu_props->thread_props.max_thread_group_split = THREAD_MTGS_DEFAULT;
+	}
+	/* Initialize the coherent_group structure for each group */
+	kbase_gpuprops_construct_coherent_groups(gpu_props);
+}
+
+void kbase_gpuprops_set(struct kbase_device *kbdev)
+{
+	struct kbase_gpu_props *gpu_props;
+	struct gpu_raw_gpu_props *raw;
+
+	KBASE_DEBUG_ASSERT(NULL != kbdev);
+	gpu_props = &kbdev->gpu_props;
+	raw = &gpu_props->props.raw_props;
+
+	/* Initialize the base_gpu_props structure from the hardware */
+	kbase_gpuprops_get_props(&gpu_props->props, kbdev);
+
+	/* Populate the derived properties */
+	kbase_gpuprops_calculate_props(&gpu_props->props, kbdev);
+
+	/* Populate kbase-only fields */
+	gpu_props->l2_props.associativity = KBASE_UBFX32(raw->l2_features, 8U, 8);
+	gpu_props->l2_props.external_bus_width = KBASE_UBFX32(raw->l2_features, 24U, 8);
+
+	gpu_props->mem.core_group = KBASE_UBFX32(raw->mem_features, 0U, 1);
+
+	gpu_props->mmu.va_bits = KBASE_UBFX32(raw->mmu_features, 0U, 8);
+	gpu_props->mmu.pa_bits = KBASE_UBFX32(raw->mmu_features, 8U, 8);
+
+	gpu_props->num_cores = hweight64(raw->shader_present);
+	gpu_props->num_core_groups = hweight64(raw->l2_present);
+	gpu_props->num_address_spaces = hweight32(raw->as_present);
+	gpu_props->num_job_slots = hweight32(raw->js_present);
+}
+
+void kbase_gpuprops_set_features(struct kbase_device *kbdev)
+{
+	base_gpu_props *gpu_props;
+	struct kbase_gpuprops_regdump regdump;
+
+	gpu_props = &kbdev->gpu_props.props;
+
+	/* Dump relevant registers */
+	kbase_backend_gpuprops_get_features(kbdev, &regdump);
+
+	/*
+	 * Copy the raw value from the register, later this will get turned
+	 * into the selected coherency mode.
+	 */
+	gpu_props->raw_props.coherency_mode = regdump.coherency_features;
+}
--- a/drivers/gpu/arm/midgard_for_linux/mali_kbase_gpuprops.h
+++ b/drivers/gpu/arm/midgard_for_linux/mali_kbase_gpuprops.h
@ -0,0 +1,64 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file mali_kbase_gpuprops.h
+ * Base kernel property query APIs
+ */
+
+#ifndef _KBASE_GPUPROPS_H_
+#define _KBASE_GPUPROPS_H_
+
+#include "mali_kbase_gpuprops_types.h"
+
+/* Forward definition - see mali_kbase.h */
+struct kbase_device;
+
+/**
+ * @brief Set up Kbase GPU properties.
+ *
+ * Set up Kbase GPU properties with information from the GPU registers
+ *
+ * @param kbdev		The struct kbase_device structure for the device
+ */
+void kbase_gpuprops_set(struct kbase_device *kbdev);
+
+/**
+ * kbase_gpuprops_set_features - Set up Kbase GPU properties
+ * @kbdev:   Device pointer
+ *
+ * This function sets up GPU properties that are dependent on the hardware
+ * features bitmask. This function must be preceeded by a call to
+ * kbase_hw_set_features_mask().
+ */
+void kbase_gpuprops_set_features(struct kbase_device *kbdev);
+
+/**
+ * @brief Provide GPU properties to userside through UKU call.
+ *
+ * Fill the struct kbase_uk_gpuprops with values from GPU configuration registers.
+ *
+ * @param kctx		The struct kbase_context structure
+ * @param kbase_props	A copy of the struct kbase_uk_gpuprops structure from userspace
+ *
+ * @return 0 on success. Any other value indicates failure.
+ */
+int kbase_gpuprops_uk_get_props(struct kbase_context *kctx, struct kbase_uk_gpuprops * const kbase_props);
+
+#endif				/* _KBASE_GPUPROPS_H_ */
--- a/Show More
+++ b/Show More