diff --git a/Documentation/devicetree/bindings/arm/mali-bifrost.txt b/Documentation/devicetree/bindings/arm/mali-bifrost.txt
new file mode 100644
index 000000000000..a488e3afbccb
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/mali-bifrost.txt
@@ -0,0 +1,150 @@
+#
+# (C) COPYRIGHT 2013-2018 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, you can access it online at
+# http://www.gnu.org/licenses/gpl-2.0.html.
+#
+# SPDX-License-Identifier: GPL-2.0
+#
+#
+
+* ARM Mali Midgard devices
+
+
+Required properties:
+
+- compatible : Should be mali<chip>, replacing digits with x from the back,
+until malit<Major>xx, ending with arm,mali-midgard, the latter not optional.
+- reg : Physical base address of the device and length of the register area.
+- interrupts : Contains the three IRQ lines required by T-6xx devices
+- interrupt-names : Contains the names of IRQ resources in the order they were
+provided in the interrupts property. Must contain: "JOB, "MMU", "GPU".
+
+Optional:
+
+- clocks : Phandle to clock for the Mali T-6xx device.
+- clock-names : Shall be "clk_mali".
+- mali-supply : Phandle to regulator for the Mali device. Refer to
+Documentation/devicetree/bindings/regulator/regulator.txt for details.
+- operating-points-v2 : Refer to Documentation/devicetree/bindings/power/opp.txt
+for details.
+- jm_config : For T860/T880. Sets job manager configuration. An array containing:
+	- 1 to override the TIMESTAMP value, 0 otherwise.
+	- 1 to override clock gate, forcing them to be always on, 0 otherwise.
+	- 1 to enable job throttle, limiting the number of cores that can be started
+	  simultaneously, 0 otherwise.
+	- Value between 0 and 63 (including). If job throttle is enabled, this is one
+	  less than the number of cores that can be started simultaneously.
+- power_model : Sets the power model parameters. Three power models are currently
+  defined which include "mali-simple-power-model", "mali-g71-power-model" and
+	  "mali-g72-power-model".
+	- mali-simple-power-model: this model derives the GPU power usage based
+	  on the GPU voltage scaled by the system temperature. Note: it was
+	  designed for the Juno platform, and may not be suitable for others.
+		- compatible: Should be "arm,mali-simple-power-model"
+		- dynamic-coefficient: Coefficient, in pW/(Hz V^2), which is
+		  multiplied by v^2*f to calculate the dynamic power consumption.
+		- static-coefficient: Coefficient, in uW/V^3, which is
+		  multiplied by v^3 to calculate the static power consumption.
+		- ts: An array containing coefficients for the temperature
+		  scaling factor. This is used to scale the static power by a
+		  factor of tsf/1000000,
+		  where tsf = ts[3]*T^3 + ts[2]*T^2 + ts[1]*T + ts[0],
+		  and T = temperature in degrees.
+		- thermal-zone: A string identifying the thermal zone used for
+		  the GPU
+		- temp-poll-interval-ms: the interval at which the system
+		  temperature is polled
+	- mali-g71-power-model / mali-g72-power-model: these models derive
+	  the GPU power usage based on performance counters, so they are more
+	  accurate.
+		- compatible: Should be "arm,mali-g71-power-model" /
+		  "arm,mali-g72-power-model"
+		- scale: the dynamic power calculated by the power model is
+		  scaled by a factor of "scale"/1000. This value should be
+		  chosen to match a particular implementation.
+	* Note: when IPA is used, two separate power models (simple and counter-based)
+	  are used at different points so care should be taken to configure
+	  both power models in the device tree (specifically dynamic-coefficient,
+	  static-coefficient and scale) to best match the platform.
+- system-coherency : Sets the coherency protocol to be used for coherent
+		     accesses made from the GPU.
+		     If not set then no coherency is used.
+	- 0  : ACE-Lite
+	- 1  : ACE
+	- 31 : No coherency
+- ipa-model : Sets the IPA model to be used for power management. GPU probe will fail if the
+	      model is not found in the registered models list. If no model is specified here,
+	      a gpu-id based model is picked if available, otherwise the default model is used.
+	- mali-simple-power-model: Default model used on mali
+- protected-mode-switcher : Phandle to device implemented protected mode switching functionality.
+Refer to Documentation/devicetree/bindings/arm/smc-protected-mode-switcher.txt for one implementation.
+
+Example for a Mali GPU:
+
+gpu@0xfc010000 {
+	compatible = "arm,malit602", "arm,malit60x", "arm,malit6xx", "arm,mali-midgard";
+	reg = <0xfc010000 0x4000>;
+	interrupts = <0 36 4>, <0 37 4>, <0 38 4>;
+	interrupt-names = "JOB", "MMU", "GPU";
+
+	clocks = <&pclk_mali>;
+	clock-names = "clk_mali";
+	mali-supply = <&vdd_mali>;
+	operating-points-v2 = <&gpu_opp_table>;
+	power_model@0 {
+		compatible = "arm,mali-simple-power-model";
+		static-coefficient = <2427750>;
+		dynamic-coefficient = <4687>;
+		ts = <20000 2000 (-20) 2>;
+		thermal-zone = "gpu";
+	};
+	power_model@1 {
+		compatible = "arm,mali-g71-power-model";
+		scale = <5>;
+	};
+};
+
+gpu_opp_table: opp_table0 {
+	compatible = "operating-points-v2";
+
+	opp@533000000 {
+		opp-hz = /bits/ 64 <533000000>;
+		opp-microvolt = <1250000>;
+	};
+	opp@450000000 {
+		opp-hz = /bits/ 64 <450000000>;
+		opp-microvolt = <1150000>;
+	};
+	opp@400000000 {
+		opp-hz = /bits/ 64 <400000000>;
+		opp-microvolt = <1125000>;
+	};
+	opp@350000000 {
+		opp-hz = /bits/ 64 <350000000>;
+		opp-microvolt = <1075000>;
+	};
+	opp@266000000 {
+		opp-hz = /bits/ 64 <266000000>;
+		opp-microvolt = <1025000>;
+	};
+	opp@160000000 {
+		opp-hz = /bits/ 64 <160000000>;
+		opp-microvolt = <925000>;
+	};
+	opp@100000000 {
+		opp-hz = /bits/ 64 <100000000>;
+		opp-microvolt = <912500>;
+	};
+};
diff --git a/drivers/gpu/arm/Kbuild b/drivers/gpu/arm/Kbuild
old mode 100755
new mode 100644
index f14f1c824f87..30246dc556f5
--- a/drivers/gpu/arm/Kbuild
+++ b/drivers/gpu/arm/Kbuild
@@ -6,14 +6,20 @@
 # Foundation, and any use by you of this program is subject to the terms
 # of such GNU licence.
 #
-# A copy of the licence is included with the program, and can also be obtained
-# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
-# Boston, MA  02110-1301, USA.
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, you can access it online at
+# http://www.gnu.org/licenses/gpl-2.0.html.
+#
+# SPDX-License-Identifier: GPL-2.0
 #
 #
 
 
-
 obj-$(CONFIG_MALI_MIDGARD_FOR_LINUX) += midgard_for_linux/
 
 obj-$(CONFIG_MALI_MIDGARD_FOR_ANDROID) += midgard/
diff --git a/drivers/gpu/arm/Kconfig b/drivers/gpu/arm/Kconfig
old mode 100755
new mode 100644
index 8a16646f89de..8403a1a48c00
--- a/drivers/gpu/arm/Kconfig
+++ b/drivers/gpu/arm/Kconfig
@@ -6,9 +6,16 @@
 # Foundation, and any use by you of this program is subject to the terms
 # of such GNU licence.
 #
-# A copy of the licence is included with the program, and can also be obtained
-# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
-# Boston, MA  02110-1301, USA.
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, you can access it online at
+# http://www.gnu.org/licenses/gpl-2.0.html.
+#
+# SPDX-License-Identifier: GPL-2.0
 #
 #
 #
diff --git a/drivers/gpu/arm/bifrost/Kbuild b/drivers/gpu/arm/bifrost/Kbuild
index 4b001c67ab69..deb6e8f976b0 100644
--- a/drivers/gpu/arm/bifrost/Kbuild
+++ b/drivers/gpu/arm/bifrost/Kbuild
@@ -20,7 +20,7 @@
 #
 
 # Driver version string which is returned to userspace via an ioctl
-MALI_RELEASE_NAME ?= "r10p0-01rel0"
+MALI_RELEASE_NAME ?= "r11p0-01rel0"
 
 # Paths required for build
 KBASE_PATH = $(src)
diff --git a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_devfreq.c b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_devfreq.c
index 7e3c456bafbb..027eeeb6575b 100644
--- a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_devfreq.c
+++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_devfreq.c
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2014-2017 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2014-2018 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -92,15 +92,21 @@ kbase_devfreq_target(struct device *dev, unsigned long *target_freq, u32 flags)
 
 	freq = *target_freq;
 
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 11, 0)
 	rcu_read_lock();
+#endif
 	opp = devfreq_recommended_opp(dev, &freq, flags);
-	if (IS_ERR(opp)) {
-		rcu_read_unlock();
+	voltage = dev_pm_opp_get_voltage(opp);
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 11, 0)
+	rcu_read_unlock();
+#endif
+	if (IS_ERR_OR_NULL(opp)) {
 		dev_err(dev, "Failed to get opp (%ld)\n", PTR_ERR(opp));
 		return PTR_ERR(opp);
 	}
-	voltage = dev_pm_opp_get_voltage(opp);
-	rcu_read_unlock();
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 11, 0)
+	dev_pm_opp_put(opp);
+#endif
 
 	nominal_freq = freq;
 	/*
@@ -201,20 +207,24 @@ static int kbase_devfreq_init_freq_table(struct kbase_device *kbdev,
 	unsigned long freq;
 	struct dev_pm_opp *opp;
 
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 11, 0)
 	rcu_read_lock();
+#endif
 	count = dev_pm_opp_get_opp_count(kbdev->dev);
-	if (count < 0) {
-		rcu_read_unlock();
-		return count;
-	}
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 11, 0)
 	rcu_read_unlock();
+#endif
+	if (count < 0)
+		return count;
 
 	dp->freq_table = kmalloc_array(count, sizeof(dp->freq_table[0]),
 				GFP_KERNEL);
 	if (!dp->freq_table)
 		return -ENOMEM;
 
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 11, 0)
 	rcu_read_lock();
+#endif
 	for (i = 0, freq = ULONG_MAX; i < count; i++, freq--) {
 		opp = dev_pm_opp_find_freq_floor(kbdev->dev, &freq);
 		if (IS_ERR(opp))
@@ -225,7 +235,9 @@ static int kbase_devfreq_init_freq_table(struct kbase_device *kbdev,
 
 		dp->freq_table[i] = freq;
 	}
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 11, 0)
 	rcu_read_unlock();
+#endif
 
 	if (count != i)
 		dev_warn(kbdev->dev, "Unable to enumerate all OPPs (%d!=%d\n",
@@ -368,7 +380,7 @@ int kbase_devfreq_init(struct kbase_device *kbdev)
 	kbdev->devfreq = devfreq_add_device(kbdev->dev, dp,
 				"simple_ondemand", NULL);
 	if (IS_ERR(kbdev->devfreq)) {
-		kbase_devfreq_term_freq_table(kbdev);
+		kfree(dp->freq_table);
 		return PTR_ERR(kbdev->devfreq);
 	}
 
diff --git a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_gpuprops_backend.c b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_gpuprops_backend.c
index 02dc1ea0061f..8809ab0bed5b 100644
--- a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_gpuprops_backend.c
+++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_gpuprops_backend.c
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2014-2018 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -41,8 +41,8 @@ void kbase_backend_gpuprops_get(struct kbase_device *kbdev,
 
 	regdump->l2_features = kbase_reg_read(kbdev,
 				GPU_CONTROL_REG(L2_FEATURES), NULL);
-	regdump->suspend_size = kbase_reg_read(kbdev,
-				GPU_CONTROL_REG(SUSPEND_SIZE), NULL);
+	regdump->core_features = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(CORE_FEATURES), NULL);
 	regdump->tiler_features = kbase_reg_read(kbdev,
 				GPU_CONTROL_REG(TILER_FEATURES), NULL);
 	regdump->mem_features = kbase_reg_read(kbdev,
@@ -71,6 +71,8 @@ void kbase_backend_gpuprops_get(struct kbase_device *kbdev,
 				GPU_CONTROL_REG(THREAD_MAX_BARRIER_SIZE), NULL);
 	regdump->thread_features = kbase_reg_read(kbdev,
 				GPU_CONTROL_REG(THREAD_FEATURES), NULL);
+	regdump->thread_tls_alloc = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(THREAD_TLS_ALLOC), NULL);
 
 	regdump->shader_present_lo = kbase_reg_read(kbdev,
 				GPU_CONTROL_REG(SHADER_PRESENT_LO), NULL);
diff --git a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_instr_backend.c b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_instr_backend.c
index 43a380e53c60..3a6545db9092 100644
--- a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_instr_backend.c
+++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_instr_backend.c
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2014-2017 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -68,7 +68,7 @@ static void kbasep_instr_hwcnt_cacheclean(struct kbase_device *kbdev)
 
 int kbase_instr_hwcnt_enable_internal(struct kbase_device *kbdev,
 					struct kbase_context *kctx,
-					struct kbase_uk_hwcnt_setup *setup)
+					struct kbase_ioctl_hwcnt_enable *enable)
 {
 	unsigned long flags, pm_flags;
 	int err = -EINVAL;
@@ -81,7 +81,7 @@ int kbase_instr_hwcnt_enable_internal(struct kbase_device *kbdev,
 							KBASE_PM_CORE_SHADER);
 
 	/* alignment failure */
-	if ((setup->dump_buffer == 0ULL) || (setup->dump_buffer & (2048 - 1)))
+	if ((enable->dump_buffer == 0ULL) || (enable->dump_buffer & (2048 - 1)))
 		goto out_err;
 
 	/* Override core availability policy to ensure all cores are available
@@ -110,7 +110,7 @@ int kbase_instr_hwcnt_enable_internal(struct kbase_device *kbdev,
 	/* In use, this context is the owner */
 	kbdev->hwcnt.kctx = kctx;
 	/* Remember the dump address so we can reprogram it later */
-	kbdev->hwcnt.addr = setup->dump_buffer;
+	kbdev->hwcnt.addr = enable->dump_buffer;
 
 	/* Request the clean */
 	kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_REQUEST_CLEAN;
@@ -150,15 +150,15 @@ int kbase_instr_hwcnt_enable_internal(struct kbase_device *kbdev,
 			prfcnt_config | PRFCNT_CONFIG_MODE_OFF, kctx);
 
 	kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_BASE_LO),
-					setup->dump_buffer & 0xFFFFFFFF, kctx);
+					enable->dump_buffer & 0xFFFFFFFF, kctx);
 	kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_BASE_HI),
-					setup->dump_buffer >> 32,        kctx);
+					enable->dump_buffer >> 32,        kctx);
 	kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_JM_EN),
-					setup->jm_bm,                    kctx);
+					enable->jm_bm,                    kctx);
 	kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_SHADER_EN),
-					setup->shader_bm,                kctx);
+					enable->shader_bm,                kctx);
 	kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_MMU_L2_EN),
-					setup->mmu_l2_bm,                kctx);
+					enable->mmu_l2_bm,                kctx);
 	/* Due to PRLAM-8186 we need to disable the Tiler before we enable the
 	 * HW counter dump. */
 	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8186))
@@ -166,7 +166,7 @@ int kbase_instr_hwcnt_enable_internal(struct kbase_device *kbdev,
 									kctx);
 	else
 		kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_TILER_EN),
-							setup->tiler_bm, kctx);
+							enable->tiler_bm, kctx);
 
 	kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_CONFIG),
 			prfcnt_config | PRFCNT_CONFIG_MODE_MANUAL, kctx);
@@ -175,7 +175,7 @@ int kbase_instr_hwcnt_enable_internal(struct kbase_device *kbdev,
 	 */
 	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8186))
 		kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_TILER_EN),
-							setup->tiler_bm, kctx);
+							enable->tiler_bm, kctx);
 
 	spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
 
diff --git a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_jm_defs.h b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_jm_defs.h
index 27a6ca0d871b..b4d2ae1cc4e8 100644
--- a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_jm_defs.h
+++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_jm_defs.h
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2014-2016, 2018 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -113,16 +113,4 @@ struct kbase_backend_data {
 	bool timeouts_updated;
 };
 
-/**
- * struct kbase_jd_atom_backend - GPU backend specific katom data
- */
-struct kbase_jd_atom_backend {
-};
-
-/**
- * struct kbase_context_backend - GPU backend specific context data
- */
-struct kbase_context_backend {
-};
-
 #endif /* _KBASE_HWACCESS_GPU_DEFS_H_ */
diff --git a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_jm_hw.c b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_jm_hw.c
index 63a0dcdb1fb3..c6a8b7f34a03 100644
--- a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_jm_hw.c
+++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_jm_hw.c
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2010-2017 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2010-2018 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -245,9 +245,6 @@ void kbase_job_done(struct kbase_device *kbdev, u32 done)
 
 	KBASE_TRACE_ADD(kbdev, JM_IRQ, NULL, NULL, 0, done);
 
-	memset(&kbdev->slot_submit_count_irq[0], 0,
-					sizeof(kbdev->slot_submit_count_irq));
-
 	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
 
 	while (done) {
@@ -312,7 +309,7 @@ void kbase_job_done(struct kbase_device *kbdev, u32 done)
 							completion_code));
 				}
 
-				kbase_gpu_irq_evict(kbdev, i);
+				kbase_gpu_irq_evict(kbdev, i, completion_code);
 			}
 
 			kbase_reg_write(kbdev, JOB_CONTROL_REG(JOB_IRQ_CLEAR),
@@ -745,7 +742,9 @@ void kbase_job_slot_ctx_priority_check_locked(struct kbase_context *kctx,
 		if (!katom)
 			continue;
 
-		if (katom->kctx != kctx)
+		if ((kbdev->js_ctx_scheduling_mode ==
+			KBASE_JS_PROCESS_LOCAL_PRIORITY_MODE) &&
+				(katom->kctx != kctx))
 			continue;
 
 		if (katom->sched_priority > priority) {
diff --git a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_jm_rb.c b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_jm_rb.c
index ee93d4eb9522..7f09fd229748 100644
--- a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_jm_rb.c
+++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_jm_rb.c
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2014-2017 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2014-2018 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -1247,7 +1247,8 @@ void kbase_backend_run_atom(struct kbase_device *kbdev,
 #define HAS_DEP(katom) (katom->pre_dep || katom->atom_flags & \
 	(KBASE_KATOM_FLAG_X_DEP_BLOCKED | KBASE_KATOM_FLAG_FAIL_BLOCKER))
 
-bool kbase_gpu_irq_evict(struct kbase_device *kbdev, int js)
+bool kbase_gpu_irq_evict(struct kbase_device *kbdev, int js,
+				u32 completion_code)
 {
 	struct kbase_jd_atom *katom;
 	struct kbase_jd_atom *next_katom;
@@ -1268,14 +1269,16 @@ bool kbase_gpu_irq_evict(struct kbase_device *kbdev, int js)
 				JS_COMMAND_NOP, NULL);
 		next_katom->gpu_rb_state = KBASE_ATOM_GPU_RB_READY;
 
-		KBASE_TLSTREAM_TL_NRET_ATOM_LPU(katom,
+		if (completion_code == BASE_JD_EVENT_STOPPED) {
+			KBASE_TLSTREAM_TL_NRET_ATOM_LPU(katom,
 				&kbdev->gpu_props.props.raw_props.js_features
 					[katom->slot_nr]);
-		KBASE_TLSTREAM_TL_NRET_ATOM_AS(katom, &kbdev->as
+			KBASE_TLSTREAM_TL_NRET_ATOM_AS(katom, &kbdev->as
 					[katom->kctx->as_nr]);
-		KBASE_TLSTREAM_TL_NRET_CTX_LPU(katom->kctx,
+			KBASE_TLSTREAM_TL_NRET_CTX_LPU(katom->kctx,
 				&kbdev->gpu_props.props.raw_props.js_features
 					[katom->slot_nr]);
+		}
 
 		return true;
 	}
@@ -1515,8 +1518,7 @@ void kbase_backend_reset(struct kbase_device *kbdev, ktime_t *end_timestamp)
 			if (!katom)
 				break;
 			if (katom->protected_state.exit ==
-					KBASE_ATOM_EXIT_PROTECTED_RESET_WAIT)
-			{
+			    KBASE_ATOM_EXIT_PROTECTED_RESET_WAIT) {
 				KBASE_TLSTREAM_AUX_PROTECTED_LEAVE_END(kbdev);
 
 				kbase_vinstr_resume(kbdev->vinstr_ctx);
@@ -1683,7 +1685,7 @@ bool kbase_backend_soft_hard_stop_slot(struct kbase_device *kbdev,
 			katom_idx0->kctx->blocked_js[js][prio_idx0] = true;
 		} else {
 			/* katom_idx0 is on GPU */
-			if (katom_idx1 && katom_idx1->gpu_rb_state ==
+			if (katom_idx1_valid && katom_idx1->gpu_rb_state ==
 						KBASE_ATOM_GPU_RB_SUBMITTED) {
 				/* katom_idx0 and katom_idx1 are on GPU */
 
diff --git a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_jm_rb.h b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_jm_rb.h
index 456700814ee9..c3b9f2d85536 100644
--- a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_jm_rb.h
+++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_jm_rb.h
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2014-2018 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -33,15 +33,17 @@
 /**
  * kbase_gpu_irq_evict - Evict an atom from a NEXT slot
  *
- * @kbdev:         Device pointer
- * @js:            Job slot to evict from
+ * @kbdev:           Device pointer
+ * @js:              Job slot to evict from
+ * @completion_code: Event code from job that was run.
  *
  * Evict the atom in the NEXT slot for the specified job slot. This function is
  * called from the job complete IRQ handler when the previous job has failed.
  *
  * Return: true if job evicted from NEXT registers, false otherwise
  */
-bool kbase_gpu_irq_evict(struct kbase_device *kbdev, int js);
+bool kbase_gpu_irq_evict(struct kbase_device *kbdev, int js,
+				u32 completion_code);
 
 /**
  * kbase_gpu_complete_hw - Complete an atom on job slot js
diff --git a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_js_backend.c b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_js_backend.c
index f09a26423951..729b971ee072 100644
--- a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_js_backend.c
+++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_js_backend.c
@@ -147,16 +147,17 @@ static enum hrtimer_restart timer_callback(struct hrtimer *timer)
 
 				/* Job is Soft-Stoppable */
 				if (ticks == soft_stop_ticks) {
-					int disjoint_threshold =
-		KBASE_DISJOINT_STATE_INTERLEAVED_CONTEXT_COUNT_THRESHOLD;
-					u32 softstop_flags = 0u;
 					/* Job has been scheduled for at least
 					 * js_devdata->soft_stop_ticks ticks.
 					 * Soft stop the slot so we can run
 					 * other jobs.
 					 */
-					dev_dbg(kbdev->dev, "Soft-stop");
 #if !KBASE_DISABLE_SCHEDULING_SOFT_STOPS
+					int disjoint_threshold =
+		KBASE_DISJOINT_STATE_INTERLEAVED_CONTEXT_COUNT_THRESHOLD;
+					u32 softstop_flags = 0u;
+
+					dev_dbg(kbdev->dev, "Soft-stop");
 					/* nr_user_contexts_running is updated
 					 * with the runpool_mutex, but we can't
 					 * take that here.
diff --git a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_mmu_hw_direct.c b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_mmu_hw_direct.c
index ad27202c8f08..9cd29828016a 100644
--- a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_mmu_hw_direct.c
+++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_mmu_hw_direct.c
@@ -185,8 +185,7 @@ void kbase_mmu_interrupt(struct kbase_device *kbdev, u32 irq_stat)
 		/* Mark the fault protected or not */
 		as->protected_mode = kbdev->protected_mode;
 
-		if (kbdev->protected_mode && as->fault_addr)
-		{
+		if (kbdev->protected_mode && as->fault_addr) {
 			/* check if address reporting is allowed */
 			validate_protected_page_fault(kbdev, kctx);
 		}
diff --git a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_backend.c b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_backend.c
index 8ff607d3c5fd..e57834350e8c 100644
--- a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_backend.c
+++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_backend.c
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2010-2017 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2010-2018 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -252,7 +252,10 @@ static void kbase_pm_gpu_poweroff_wait_wq(struct work_struct *data)
 
 			/* Turn off clock now that fault have been handled. We
 			 * dropped locks so poweron_required may have changed -
-			 * power back on if this is the case.*/
+			 * power back on if this is the case (effectively only
+			 * re-enabling of the interrupts would be done in this
+			 * case, as the clocks to GPU were not withdrawn yet).
+			 */
 			if (backend->poweron_required)
 				kbase_pm_clock_on(kbdev, false);
 			else
diff --git a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_policy.c b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_policy.c
index 18a06d833169..0bf133494f03 100644
--- a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_policy.c
+++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_policy.c
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2010-2018 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -383,6 +383,7 @@ void kbase_pm_update_active(struct kbase_device *kbdev)
 
 		/* Power on the GPU and any cores requested by the policy */
 		if (pm->backend.poweroff_wait_in_progress) {
+			KBASE_DEBUG_ASSERT(kbdev->pm.backend.gpu_powered);
 			pm->backend.poweron_required = true;
 			spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
 		} else {
@@ -927,19 +928,24 @@ KBASE_EXPORT_TEST_API(kbase_pm_request_cores_sync);
 void kbase_pm_request_l2_caches(struct kbase_device *kbdev)
 {
 	unsigned long flags;
-	u32 prior_l2_users_count;
 
 	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
 
-	prior_l2_users_count = kbdev->l2_users_count++;
+	kbdev->l2_users_count++;
 
 	KBASE_DEBUG_ASSERT(kbdev->l2_users_count != 0);
 
-	/* if the GPU is reset while the l2 is on, l2 will be off but
-	 * prior_l2_users_count will be > 0. l2_available_bitmap will have been
-	 * set to 0 though by kbase_pm_init_hw */
-	if (!prior_l2_users_count || !kbdev->l2_available_bitmap)
-		kbase_pm_check_transitions_nolock(kbdev);
+	/* Check for the required L2 transitions.
+	 * Caller would block here for the L2 caches of all core groups to be
+	 * powered on, so need to inform the Hw to power up all the L2 caches.
+	 * Can't rely on the l2_users_count value being non-zero previously to
+	 * avoid checking for the transition, as the count could be non-zero
+	 * even if not all the instances of L2 cache are powered up since
+	 * currently the power status of L2 is not tracked separately for each
+	 * core group. Also if the GPU is reset while the L2 is on, L2 will be
+	 * off but the count will be non-zero.
+	 */
+	kbase_pm_check_transitions_nolock(kbdev);
 
 	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
 	wait_event(kbdev->pm.backend.l2_powered_wait,
diff --git a/drivers/gpu/arm/bifrost/docs/Doxyfile b/drivers/gpu/arm/bifrost/docs/Doxyfile
index cea7bd9a1730..6498dcbc1840 100644
--- a/drivers/gpu/arm/bifrost/docs/Doxyfile
+++ b/drivers/gpu/arm/bifrost/docs/Doxyfile
@@ -38,7 +38,7 @@
 # directories like "/usr/src/myproject". Separate the files or directories
 # with spaces.
 
-INPUT                  += ../../kernel/drivers/gpu/arm/midgard/ 
+INPUT                  += ../../kernel/drivers/gpu/arm/midgard/
 
 ##############################################################################
 # Everything below here is optional, and in most cases not required
diff --git a/drivers/gpu/arm/bifrost/ipa/Kbuild b/drivers/gpu/arm/bifrost/ipa/Kbuild
index e06d2eac4aa1..32008511f366 100644
--- a/drivers/gpu/arm/bifrost/ipa/Kbuild
+++ b/drivers/gpu/arm/bifrost/ipa/Kbuild
@@ -1,5 +1,5 @@
 #
-# (C) COPYRIGHT 2016-2017 ARM Limited. All rights reserved.
+# (C) COPYRIGHT 2016-2018 ARM Limited. All rights reserved.
 #
 # This program is free software and is provided to you under the terms of the
 # GNU General Public License version 2 as published by the Free Software
@@ -25,9 +25,8 @@ bifrost_kbase-y += \
 
 bifrost_kbase-$(CONFIG_DEBUG_FS) += ipa/mali_kbase_ipa_debugfs.o
 
-ifneq ($(wildcard $(src)/ipa/mali_kbase_ipa_vinstr_g71.c),)
+ifneq ($(wildcard $(src)/ipa/mali_kbase_ipa_vinstr_common.c),)
   bifrost_kbase-y += \
-	ipa/mali_kbase_ipa_vinstr_g71.o \
+	ipa/mali_kbase_ipa_vinstr_g7x.o \
 	ipa/mali_kbase_ipa_vinstr_common.o
-
 endif
diff --git a/drivers/gpu/arm/bifrost/ipa/mali_kbase_ipa.c b/drivers/gpu/arm/bifrost/ipa/mali_kbase_ipa.c
index d95a8971d6e2..d14e1e6d01fa 100644
--- a/drivers/gpu/arm/bifrost/ipa/mali_kbase_ipa.c
+++ b/drivers/gpu/arm/bifrost/ipa/mali_kbase_ipa.c
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2016-2017 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2016-2018 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -38,10 +38,14 @@
 
 #define KBASE_IPA_FALLBACK_MODEL_NAME "mali-simple-power-model"
 #define KBASE_IPA_G71_MODEL_NAME      "mali-g71-power-model"
+#define KBASE_IPA_G72_MODEL_NAME      "mali-g72-power-model"
+#define KBASE_IPA_TNOX_MODEL_NAME     "mali-tnox-power-model"
 
 static struct kbase_ipa_model_ops *kbase_ipa_all_model_ops[] = {
 	&kbase_simple_ipa_model_ops,
-	&kbase_g71_ipa_model_ops
+	&kbase_g71_ipa_model_ops,
+	&kbase_g72_ipa_model_ops,
+	&kbase_tnox_ipa_model_ops
 };
 
 int kbase_ipa_model_recalculate(struct kbase_ipa_model *model)
@@ -98,6 +102,15 @@ const char *kbase_ipa_model_name_from_id(u32 gpu_id)
 		switch (GPU_ID2_MODEL_MATCH_VALUE(prod_id)) {
 		case GPU_ID2_PRODUCT_TMIX:
 			return KBASE_IPA_G71_MODEL_NAME;
+		case GPU_ID2_PRODUCT_THEX:
+			return KBASE_IPA_G72_MODEL_NAME;
+		case GPU_ID2_PRODUCT_TNOX:
+			return KBASE_IPA_TNOX_MODEL_NAME;
+		case GPU_ID2_PRODUCT_TGOX:
+			if ((gpu_id & GPU_ID2_VERSION_MAJOR) ==
+					(0 << GPU_ID2_VERSION_MAJOR_SHIFT))
+				/* TGOX r0 shares a power model with TNOX */
+				return KBASE_IPA_TNOX_MODEL_NAME;
 		default:
 			return KBASE_IPA_FALLBACK_MODEL_NAME;
 		}
diff --git a/drivers/gpu/arm/bifrost/ipa/mali_kbase_ipa.h b/drivers/gpu/arm/bifrost/ipa/mali_kbase_ipa.h
index e087d2348763..fc45f22d6a6a 100644
--- a/drivers/gpu/arm/bifrost/ipa/mali_kbase_ipa.h
+++ b/drivers/gpu/arm/bifrost/ipa/mali_kbase_ipa.h
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2016-2017 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2016-2018 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -27,8 +27,17 @@
 
 struct devfreq;
 
+/**
+ * struct kbase_ipa_model - Object describing a particular IPA model.
+ * @kbdev:                    pointer to kbase device
+ * @model_data:               opaque pointer to model specific data, accessed
+ *                            only by model specific methods.
+ * @ops:                      pointer to object containing model specific methods.
+ * @params:                   head of the list of debugfs params added for model
+ * @missing_dt_node_warning:  flag to limit the matching power model DT not found
+ *                            warning to once.
+ */
 struct kbase_ipa_model {
-	struct list_head link;
 	struct kbase_device *kbdev;
 	void *model_data;
 	struct kbase_ipa_model_ops *ops;
@@ -116,22 +125,72 @@ struct kbase_ipa_model_ops {
 	bool do_utilization_scaling_in_framework;
 };
 
-/* Models can be registered only in the platform's platform_init_func call */
-int kbase_ipa_model_ops_register(struct kbase_device *kbdev,
-			     struct kbase_ipa_model_ops *new_model_ops);
-struct kbase_ipa_model *kbase_ipa_get_model(struct kbase_device *kbdev,
-					    const char *name);
-
+/**
+ * kbase_ipa_init - Initialize the IPA feature
+ * @kbdev:      pointer to kbase device
+ *
+ * simple IPA power model is initialized as a fallback model and if that
+ * initialization fails then IPA is not used.
+ * The device tree is read for the name of ipa model to be used, by using the
+ * property string "ipa-model". If that ipa model is supported then it is
+ * initialized but if the initialization fails then simple power model is used.
+ *
+ * Return: 0 on success, negative -errno on error
+ */
 int kbase_ipa_init(struct kbase_device *kbdev);
+
+/**
+ * kbase_ipa_term - Terminate the IPA feature
+ * @kbdev:      pointer to kbase device
+ *
+ * Both simple IPA power model and model retrieved from device tree are
+ * terminated.
+ */
 void kbase_ipa_term(struct kbase_device *kbdev);
-void kbase_ipa_model_use_fallback_locked(struct kbase_device *kbdev);
-void kbase_ipa_model_use_configured_locked(struct kbase_device *kbdev);
+
+/**
+ * kbase_ipa_model_recalculate - Recalculate the model coefficients
+ * @model:      pointer to the IPA model object, already initialized
+ *
+ * It shall be called immediately after the model has been initialized
+ * or when the model parameter has changed, so that any coefficients
+ * derived from parameters can be recalculated.
+ * Its a wrapper for the module specific recalculate() method.
+ *
+ * Return: 0 on success, negative -errno on error
+ */
 int kbase_ipa_model_recalculate(struct kbase_ipa_model *model);
+
+/**
+ * kbase_ipa_init_model - Initilaize the particular IPA model
+ * @kbdev:      pointer to the IPA model object, already initialized
+ * @ops:        pointer to object containing model specific methods.
+ *
+ * Initialize the model corresponding to the @ops pointer passed.
+ * The init() method specified in @ops would be called.
+ *
+ * Return: pointer to kbase_ipa_model on success, NULL on error
+ */
 struct kbase_ipa_model *kbase_ipa_init_model(struct kbase_device *kbdev,
 					     struct kbase_ipa_model_ops *ops);
+/**
+ * kbase_ipa_term_model - Terminate the particular IPA model
+ * @model:      pointer to the IPA model object, already initialized
+ *
+ * Terminate the model, using the term() method.
+ * Module specific parameters would be freed.
+ */
 void kbase_ipa_term_model(struct kbase_ipa_model *model);
 
+/* Switch to the fallback model */
+void kbase_ipa_model_use_fallback_locked(struct kbase_device *kbdev);
+
+/* Switch to the model retrieved from device tree */
+void kbase_ipa_model_use_configured_locked(struct kbase_device *kbdev);
+
 extern struct kbase_ipa_model_ops kbase_g71_ipa_model_ops;
+extern struct kbase_ipa_model_ops kbase_g72_ipa_model_ops;
+extern struct kbase_ipa_model_ops kbase_tnox_ipa_model_ops;
 
 #if MALI_UNIT_TEST
 /**
diff --git a/drivers/gpu/arm/bifrost/ipa/mali_kbase_ipa_simple.c b/drivers/gpu/arm/bifrost/ipa/mali_kbase_ipa_simple.c
index 639ade266e14..7dd2ae252112 100644
--- a/drivers/gpu/arm/bifrost/ipa/mali_kbase_ipa_simple.c
+++ b/drivers/gpu/arm/bifrost/ipa/mali_kbase_ipa_simple.c
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2016-2017 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2016-2018 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -43,7 +43,7 @@ static int kbase_simple_power_model_get_dummy_temp(
 	struct thermal_zone_device *tz,
 	unsigned long *temp)
 {
-	*temp = ACCESS_ONCE(dummy_temp);
+	*temp = READ_ONCE(dummy_temp);
 	return 0;
 }
 
@@ -54,7 +54,7 @@ static int kbase_simple_power_model_get_dummy_temp(
 	struct thermal_zone_device *tz,
 	int *temp)
 {
-	*temp = ACCESS_ONCE(dummy_temp);
+	*temp = READ_ONCE(dummy_temp);
 	return 0;
 }
 #endif
@@ -68,7 +68,7 @@ static int kbase_simple_power_model_get_dummy_temp(
 
 void kbase_simple_power_model_set_dummy_temp(int temp)
 {
-	ACCESS_ONCE(dummy_temp) = temp;
+	WRITE_ONCE(dummy_temp, temp);
 }
 KBASE_EXPORT_TEST_API(kbase_simple_power_model_set_dummy_temp);
 
@@ -155,7 +155,7 @@ static int poll_temperature(void *data)
 #endif
 
 	while (!kthread_should_stop()) {
-		struct thermal_zone_device *tz = ACCESS_ONCE(model_data->gpu_tz);
+		struct thermal_zone_device *tz = READ_ONCE(model_data->gpu_tz);
 
 		if (tz) {
 			int ret;
@@ -170,9 +170,9 @@ static int poll_temperature(void *data)
 			temp = FALLBACK_STATIC_TEMPERATURE;
 		}
 
-		ACCESS_ONCE(model_data->current_temperature) = temp;
+		WRITE_ONCE(model_data->current_temperature, temp);
 
-		msleep_interruptible(ACCESS_ONCE(model_data->temperature_poll_interval_ms));
+		msleep_interruptible(READ_ONCE(model_data->temperature_poll_interval_ms));
 	}
 
 	return 0;
@@ -186,7 +186,7 @@ static int model_static_coeff(struct kbase_ipa_model *model, u32 *coeffp)
 	u64 coeff_big;
 	int temp;
 
-	temp = ACCESS_ONCE(model_data->current_temperature);
+	temp = READ_ONCE(model_data->current_temperature);
 
 	/* Range: 0 <= temp_scaling_factor < 2^24 */
 	temp_scaling_factor = calculate_temp_scaling_factor(model_data->ts,
diff --git a/drivers/gpu/arm/bifrost/ipa/mali_kbase_ipa_vinstr_common.c b/drivers/gpu/arm/bifrost/ipa/mali_kbase_ipa_vinstr_common.c
index b9a9e573607e..3917fb8d0ef7 100644
--- a/drivers/gpu/arm/bifrost/ipa/mali_kbase_ipa_vinstr_common.c
+++ b/drivers/gpu/arm/bifrost/ipa/mali_kbase_ipa_vinstr_common.c
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2017 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2017-2018 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -21,6 +21,7 @@
  */
 
 #include "mali_kbase_ipa_vinstr_common.h"
+#include "mali_kbase_ipa_debugfs.h"
 
 #if MALI_UNIT_TEST
 static ktime_t dummy_time;
@@ -31,11 +32,11 @@ static ktime_t dummy_time;
 #endif
 
 #if LINUX_VERSION_CODE < KERNEL_VERSION(4, 3, 0)
-#define ktime_get() (ACCESS_ONCE(dummy_time))
+#define ktime_get() (READ_ONCE(dummy_time))
 
 void kbase_ipa_set_dummy_time(ktime_t t)
 {
-	ACCESS_ONCE(dummy_time) = t;
+	WRITE_ONCE(dummy_time, t);
 }
 KBASE_EXPORT_TEST_API(kbase_ipa_set_dummy_time);
 #else
@@ -119,10 +120,46 @@ s64 kbase_ipa_single_counter(
 	return div_s64(multiplied, 1000000);
 }
 
+/**
+ * kbase_ipa_gpu_active - Inform IPA that GPU is now active
+ * @model_data: Pointer to model data
+ *
+ * This function may cause vinstr to become active.
+ */
+static void kbase_ipa_gpu_active(struct kbase_ipa_model_vinstr_data *model_data)
+{
+	struct kbase_device *kbdev = model_data->kbdev;
+
+	lockdep_assert_held(&kbdev->pm.lock);
+
+	if (!kbdev->ipa.vinstr_active) {
+		kbdev->ipa.vinstr_active = true;
+		kbase_vinstr_resume_client(model_data->vinstr_cli);
+	}
+}
+
+/**
+ * kbase_ipa_gpu_idle - Inform IPA that GPU is now idle
+ * @model_data: Pointer to model data
+ *
+ * This function may cause vinstr to become idle.
+ */
+static void kbase_ipa_gpu_idle(struct kbase_ipa_model_vinstr_data *model_data)
+{
+	struct kbase_device *kbdev = model_data->kbdev;
+
+	lockdep_assert_held(&kbdev->pm.lock);
+
+	if (kbdev->ipa.vinstr_active) {
+		kbase_vinstr_suspend_client(model_data->vinstr_cli);
+		kbdev->ipa.vinstr_active = false;
+	}
+}
+
 int kbase_ipa_attach_vinstr(struct kbase_ipa_model_vinstr_data *model_data)
 {
 	struct kbase_device *kbdev = model_data->kbdev;
-	struct kbase_uk_hwcnt_reader_setup setup;
+	struct kbase_ioctl_hwcnt_reader_setup setup;
 	size_t dump_size;
 
 	dump_size = kbase_vinstr_dump_size(kbdev);
@@ -148,13 +185,30 @@ int kbase_ipa_attach_vinstr(struct kbase_ipa_model_vinstr_data *model_data)
 	model_data->last_sample_read_time = ktime_get();
 	kbase_vinstr_hwc_clear(model_data->vinstr_cli);
 
+	kbdev->ipa.gpu_active_callback = kbase_ipa_gpu_active;
+	kbdev->ipa.gpu_idle_callback = kbase_ipa_gpu_idle;
+	kbdev->ipa.model_data = model_data;
+	kbdev->ipa.vinstr_active = false;
+	/* Suspend vinstr, to ensure that the GPU is powered off until there is
+	 * something to execute.
+	 */
+	kbase_vinstr_suspend_client(model_data->vinstr_cli);
+
 	return 0;
 }
 
 void kbase_ipa_detach_vinstr(struct kbase_ipa_model_vinstr_data *model_data)
 {
+	struct kbase_device *kbdev = model_data->kbdev;
+
+	kbdev->ipa.gpu_active_callback = NULL;
+	kbdev->ipa.gpu_idle_callback = NULL;
+	kbdev->ipa.model_data = NULL;
+	kbdev->ipa.vinstr_active = false;
+
 	if (model_data->vinstr_cli)
 		kbase_vinstr_detach_client(model_data->vinstr_cli);
+
 	model_data->vinstr_cli = NULL;
 	kfree(model_data->vinstr_buffer);
 	model_data->vinstr_buffer = NULL;
@@ -165,6 +219,7 @@ int kbase_ipa_vinstr_dynamic_coeff(struct kbase_ipa_model *model, u32 *coeffp,
 {
 	struct kbase_ipa_model_vinstr_data *model_data =
 			(struct kbase_ipa_model_vinstr_data *)model->model_data;
+	struct kbase_device *kbdev = model_data->kbdev;
 	s64 energy = 0;
 	size_t i;
 	ktime_t now = ktime_get();
@@ -176,6 +231,9 @@ int kbase_ipa_vinstr_dynamic_coeff(struct kbase_ipa_model *model, u32 *coeffp,
 	u64 num_cycles;
 	int err = 0;
 
+	if (!kbdev->ipa.vinstr_active)
+		goto err0; /* GPU powered off - no counters to collect */
+
 	err = kbase_vinstr_hwc_dump(model_data->vinstr_cli,
 				    BASE_HWCNT_READER_EVENT_MANUAL);
 	if (err)
@@ -232,3 +290,58 @@ int kbase_ipa_vinstr_dynamic_coeff(struct kbase_ipa_model *model, u32 *coeffp,
 	*coeffp = clamp(coeff, (u64) 0, (u64) 1 << 16);
 	return err;
 }
+
+int kbase_ipa_vinstr_common_model_init(struct kbase_ipa_model *model,
+				 const struct kbase_ipa_group *ipa_groups_def,
+							size_t ipa_group_size)
+{
+	int err = 0;
+	size_t i;
+	struct kbase_ipa_model_vinstr_data *model_data;
+
+	model_data = kzalloc(sizeof(*model_data), GFP_KERNEL);
+	if (!model_data)
+		return -ENOMEM;
+
+	model_data->kbdev = model->kbdev;
+	model_data->groups_def = ipa_groups_def;
+	model_data->groups_def_num = ipa_group_size;
+
+	model->model_data = (void *) model_data;
+
+	for (i = 0; i < model_data->groups_def_num; ++i) {
+		const struct kbase_ipa_group *group = &model_data->groups_def[i];
+
+		model_data->group_values[i] = group->default_value;
+		err = kbase_ipa_model_add_param_s32(model, group->name,
+					&model_data->group_values[i],
+					1, false);
+		if (err)
+			goto exit;
+	}
+
+	model_data->scaling_factor = 5;
+	err = kbase_ipa_model_add_param_s32(model, "scale",
+					    &model_data->scaling_factor,
+					    1, false);
+	if (err)
+		goto exit;
+
+	err = kbase_ipa_attach_vinstr(model_data);
+
+exit:
+	if (err) {
+		kbase_ipa_model_param_free_all(model);
+		kfree(model_data);
+	}
+	return err;
+}
+
+void kbase_ipa_vinstr_common_model_term(struct kbase_ipa_model *model)
+{
+	struct kbase_ipa_model_vinstr_data *model_data =
+			(struct kbase_ipa_model_vinstr_data *)model->model_data;
+
+	kbase_ipa_detach_vinstr(model_data);
+	kfree(model_data);
+}
diff --git a/drivers/gpu/arm/bifrost/ipa/mali_kbase_ipa_vinstr_common.h b/drivers/gpu/arm/bifrost/ipa/mali_kbase_ipa_vinstr_common.h
index 7233642add78..18c30fe701a0 100644
--- a/drivers/gpu/arm/bifrost/ipa/mali_kbase_ipa_vinstr_common.h
+++ b/drivers/gpu/arm/bifrost/ipa/mali_kbase_ipa_vinstr_common.h
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2017 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2017-2018 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -25,9 +25,6 @@
 
 #include "mali_kbase.h"
 
-/* Maximum length for the name of an IPA group. */
-#define KBASE_IPA_MAX_GROUP_NAME_LEN 15
-
 /* Maximum number of IPA groups for an IPA model. */
 #define KBASE_IPA_MAX_GROUP_DEF_NUM  16
 
@@ -41,8 +38,6 @@
 #define KBASE_IPA_NR_BYTES_PER_BLOCK \
 	(KBASE_IPA_NR_CNT_PER_BLOCK * KBASE_IPA_NR_BYTES_PER_CNT)
 
-
-
 /**
  * struct kbase_ipa_model_vinstr_data - IPA context per device
  * @kbdev:               pointer to kbase device
@@ -77,7 +72,7 @@ struct kbase_ipa_model_vinstr_data {
  * @counter_block_offset:  block offset in bytes of the counter used to calculate energy for IPA group
  */
 struct kbase_ipa_group {
-	char name[KBASE_IPA_MAX_GROUP_NAME_LEN + 1];
+	const char *name;
 	s32 default_value;
 	s64 (*op)(struct kbase_ipa_model_vinstr_data *, s32, u32);
 	u32 counter_block_offset;
@@ -152,6 +147,33 @@ void kbase_ipa_detach_vinstr(struct kbase_ipa_model_vinstr_data *model_data);
 int kbase_ipa_vinstr_dynamic_coeff(struct kbase_ipa_model *model, u32 *coeffp,
 	u32 current_freq);
 
+/**
+ * kbase_ipa_vinstr_common_model_init() - initialize ipa power model
+ * @model:		ipa power model to initialize
+ * @ipa_groups_def:	array of ipa groups which sets coefficients for
+ *			the corresponding counters used in the ipa model
+ * @ipa_group_size:     number of elements in the array @ipa_groups_def
+ *
+ * This initialization function performs initialization steps common
+ * for ipa models based on counter values. In each call, the model
+ * passes its specific coefficient values per ipa counter group via
+ * @ipa_groups_def array.
+ *
+ * Return: 0 on success, error code otherwise
+ */
+int kbase_ipa_vinstr_common_model_init(struct kbase_ipa_model *model,
+				 const struct kbase_ipa_group *ipa_groups_def,
+							size_t ipa_group_size);
+
+/**
+ * kbase_ipa_vinstr_common_model_term() - terminate ipa power model
+ * @model: ipa power model to terminate
+ *
+ * This function performs all necessary steps to terminate ipa power model
+ * including clean up of resources allocated to hold model data.
+ */
+void kbase_ipa_vinstr_common_model_term(struct kbase_ipa_model *model);
+
 #if MALI_UNIT_TEST
 /**
  * kbase_ipa_set_dummy_time() - set a dummy monotonic time value
diff --git a/drivers/gpu/arm/bifrost/ipa/mali_kbase_ipa_vinstr_g71.c b/drivers/gpu/arm/bifrost/ipa/mali_kbase_ipa_vinstr_g71.c
deleted file mode 100644
index d07fb36d901e..000000000000
--- a/drivers/gpu/arm/bifrost/ipa/mali_kbase_ipa_vinstr_g71.c
+++ /dev/null
@@ -1,256 +0,0 @@
-/*
- *
- * (C) COPYRIGHT 2016-2017 ARM Limited. All rights reserved.
- *
- * This program is free software and is provided to you under the terms of the
- * GNU General Public License version 2 as published by the Free Software
- * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, you can access it online at
- * http://www.gnu.org/licenses/gpl-2.0.html.
- *
- * SPDX-License-Identifier: GPL-2.0
- *
- */
-#include <linux/thermal.h>
-
-#include "mali_kbase_ipa_vinstr_common.h"
-#include "mali_kbase.h"
-#include "mali_kbase_ipa_debugfs.h"
-
-
-/* Performance counter blocks base offsets */
-#define JM_BASE             (0 * KBASE_IPA_NR_BYTES_PER_BLOCK)
-#define TILER_BASE          (1 * KBASE_IPA_NR_BYTES_PER_BLOCK)
-#define MEMSYS_BASE         (2 * KBASE_IPA_NR_BYTES_PER_BLOCK)
-#define SC0_BASE_ONE_MEMSYS (3 * KBASE_IPA_NR_BYTES_PER_BLOCK)
-#define SC0_BASE_TWO_MEMSYS (4 * KBASE_IPA_NR_BYTES_PER_BLOCK)
-
-/* JM counter block offsets */
-#define JM_GPU_ACTIVE (KBASE_IPA_NR_BYTES_PER_CNT *  6)
-
-/* Tiler counter block offsets */
-#define TILER_ACTIVE (KBASE_IPA_NR_BYTES_PER_CNT * 45)
-
-/* MEMSYS counter block offsets */
-#define MEMSYS_L2_ANY_LOOKUP (KBASE_IPA_NR_BYTES_PER_CNT * 25)
-
-/* SC counter block offsets */
-#define SC_FRAG_ACTIVE      (KBASE_IPA_NR_BYTES_PER_CNT *  4)
-#define SC_EXEC_CORE_ACTIVE (KBASE_IPA_NR_BYTES_PER_CNT * 26)
-#define SC_EXEC_INSTR_COUNT (KBASE_IPA_NR_BYTES_PER_CNT * 28)
-#define SC_TEX_COORD_ISSUE  (KBASE_IPA_NR_BYTES_PER_CNT * 40)
-#define SC_VARY_SLOT_32     (KBASE_IPA_NR_BYTES_PER_CNT * 50)
-#define SC_VARY_SLOT_16     (KBASE_IPA_NR_BYTES_PER_CNT * 51)
-#define SC_BEATS_RD_LSC     (KBASE_IPA_NR_BYTES_PER_CNT * 56)
-#define SC_BEATS_WR_LSC     (KBASE_IPA_NR_BYTES_PER_CNT * 61)
-#define SC_BEATS_WR_TIB     (KBASE_IPA_NR_BYTES_PER_CNT * 62)
-
-/** Maximum number of cores for which a single Memory System block of performance counters is present. */
-#define KBASE_G71_SINGLE_MEMSYS_MAX_NUM_CORES ((u8)4)
-
-
-/**
- * get_jm_counter() - get performance counter offset inside the Job Manager block
- * @model_data:            pointer to GPU model data.
- * @counter_block_offset:  offset in bytes of the performance counter inside the Job Manager block.
- *
- * Return: Block offset in bytes of the required performance counter.
- */
-static u32 kbase_g71_power_model_get_jm_counter(struct kbase_ipa_model_vinstr_data *model_data,
-                                                u32 counter_block_offset)
-{
-	return JM_BASE + counter_block_offset;
-}
-
-/**
- * get_memsys_counter() - get peformance counter offset inside the Memory System block
- * @model_data:            pointer to GPU model data.
- * @counter_block_offset:  offset in bytes of the performance counter inside the (first) Memory System block.
- *
- * Return: Block offset in bytes of the required performance counter.
- */
-static u32 kbase_g71_power_model_get_memsys_counter(struct kbase_ipa_model_vinstr_data *model_data,
-                                                    u32 counter_block_offset)
-{
-	/* The base address of Memory System performance counters is always the same, although their number
-	 * may vary based on the number of cores. For the moment it's ok to return a constant.
-	 */
-	return MEMSYS_BASE + counter_block_offset;
-}
-
-/**
- * get_sc_counter() - get performance counter offset inside the Shader Cores block
- * @model_data:            pointer to GPU model data.
- * @counter_block_offset:  offset in bytes of the performance counter inside the (first) Shader Cores block.
- *
- * Return: Block offset in bytes of the required performance counter.
- */
-static u32 kbase_g71_power_model_get_sc_counter(struct kbase_ipa_model_vinstr_data *model_data,
-                                                u32 counter_block_offset)
-{
-	const u32 sc_base = model_data->kbdev->gpu_props.num_cores <= KBASE_G71_SINGLE_MEMSYS_MAX_NUM_CORES ?
-	                    SC0_BASE_ONE_MEMSYS :
-	                    SC0_BASE_TWO_MEMSYS;
-
-	return sc_base + counter_block_offset;
-}
-
-/**
- * memsys_single_counter() - calculate energy for a single Memory System performance counter.
- * @model_data:            pointer to GPU model data.
- * @coeff:                 default value of coefficient for IPA group.
- * @counter_block_offset:  offset in bytes of the counter inside the block it belongs to.
- *
- * Return: Energy estimation for a single Memory System performance counter.
- */
-static s64 kbase_g71_memsys_single_counter(
-    struct kbase_ipa_model_vinstr_data *model_data,
-    s32 coeff,
-    u32 counter_block_offset)
-{
-	return kbase_ipa_single_counter(model_data, coeff,
-	                                kbase_g71_power_model_get_memsys_counter(model_data, counter_block_offset));
-}
-
-/**
- * sum_all_shader_cores() - calculate energy for a Shader Cores performance counter for all cores.
- * @model_data:            pointer to GPU model data.
- * @coeff:                 default value of coefficient for IPA group.
- * @counter_block_offset:  offset in bytes of the counter inside the block it belongs to.
- *
- * Return: Energy estimation for a Shader Cores performance counter for all cores.
- */
-static s64 kbase_g71_sum_all_shader_cores(
-    struct kbase_ipa_model_vinstr_data *model_data,
-    s32 coeff,
-    u32 counter_block_offset)
-{
-	return kbase_ipa_sum_all_shader_cores(model_data, coeff,
-	                                      kbase_g71_power_model_get_sc_counter(model_data, counter_block_offset));
-}
-
-/**
- * jm_single_counter() - calculate energy for a single Job Manager performance counter.
- * @model_data:            pointer to GPU model data.
- * @coeff:                 default value of coefficient for IPA group.
- * @counter_block_offset:  offset in bytes of the counter inside the block it belongs to.
- *
- * Return: Energy estimation for a single Job Manager performance counter.
- */
-static s64 kbase_g71_jm_single_counter(
-    struct kbase_ipa_model_vinstr_data *model_data,
-    s32 coeff,
-    u32 counter_block_offset)
-{
-	return kbase_ipa_single_counter(model_data, coeff,
-	                                kbase_g71_power_model_get_jm_counter(model_data, counter_block_offset));
-}
-
-/** Table of IPA group definitions.
- *
- * For each IPA group, this table defines a function to access the given performance block counter (or counters,
- * if the operation needs to be iterated on multiple blocks) and calculate energy estimation.
- */
-static const struct kbase_ipa_group ipa_groups_def[] = {
-	{
-		.name = "l2_access",
-		.default_value = 526300,
-		.op = kbase_g71_memsys_single_counter,
-		.counter_block_offset = MEMSYS_L2_ANY_LOOKUP,
-	},
-	{
-		.name = "exec_instr_count",
-		.default_value = 301100,
-		.op = kbase_g71_sum_all_shader_cores,
-		.counter_block_offset = SC_EXEC_INSTR_COUNT,
-	},
-	{
-		.name = "tex_issue",
-		.default_value = 197400,
-		.op = kbase_g71_sum_all_shader_cores,
-		.counter_block_offset = SC_TEX_COORD_ISSUE,
-	},
-	{
-		.name = "tile_wb",
-		.default_value = -156400,
-		.op = kbase_g71_sum_all_shader_cores,
-		.counter_block_offset = SC_BEATS_WR_TIB,
-	},
-	{
-		.name = "gpu_active",
-		.default_value = 115800,
-		.op = kbase_g71_jm_single_counter,
-		.counter_block_offset = JM_GPU_ACTIVE,
-	},
-};
-
-static int kbase_g71_power_model_init(struct kbase_ipa_model *model)
-{
-	int i, err = 0;
-	struct kbase_ipa_model_vinstr_data *model_data;
-
-	model_data = kzalloc(sizeof(*model_data), GFP_KERNEL);
-	if (!model_data)
-		return -ENOMEM;
-
-	model_data->kbdev = model->kbdev;
-	model_data->groups_def = ipa_groups_def;
-	BUILD_BUG_ON(ARRAY_SIZE(ipa_groups_def) > KBASE_IPA_MAX_GROUP_DEF_NUM);
-	model_data->groups_def_num = ARRAY_SIZE(ipa_groups_def);
-
-	model->model_data = (void *) model_data;
-
-	for (i = 0; i < ARRAY_SIZE(ipa_groups_def); ++i) {
-		const struct kbase_ipa_group *group = &ipa_groups_def[i];
-
-		model_data->group_values[i] = group->default_value;
-		err = kbase_ipa_model_add_param_s32(model, group->name,
-					&model_data->group_values[i],
-					1, false);
-		if (err)
-			goto exit;
-	}
-
-	model_data->scaling_factor = 5;
-	err = kbase_ipa_model_add_param_s32(model, "scale",
-					    &model_data->scaling_factor,
-					    1, false);
-	if (err)
-		goto exit;
-
-	err = kbase_ipa_attach_vinstr(model_data);
-
-exit:
-	if (err) {
-		kbase_ipa_model_param_free_all(model);
-		kfree(model_data);
-	}
-	return err;
-}
-
-static void kbase_g71_power_model_term(struct kbase_ipa_model *model)
-{
-	struct kbase_ipa_model_vinstr_data *model_data =
-			(struct kbase_ipa_model_vinstr_data *)model->model_data;
-
-	kbase_ipa_detach_vinstr(model_data);
-	kfree(model_data);
-}
-
-
-struct kbase_ipa_model_ops kbase_g71_ipa_model_ops = {
-		.name = "mali-g71-power-model",
-		.init = kbase_g71_power_model_init,
-		.term = kbase_g71_power_model_term,
-		.get_dynamic_coeff = kbase_ipa_vinstr_dynamic_coeff,
-		.do_utilization_scaling_in_framework = false,
-};
-KBASE_EXPORT_TEST_API(kbase_g71_ipa_model_ops);
diff --git a/drivers/gpu/arm/bifrost/ipa/mali_kbase_ipa_vinstr_g7x.c b/drivers/gpu/arm/bifrost/ipa/mali_kbase_ipa_vinstr_g7x.c
new file mode 100644
index 000000000000..81137ea80454
--- /dev/null
+++ b/drivers/gpu/arm/bifrost/ipa/mali_kbase_ipa_vinstr_g7x.c
@@ -0,0 +1,293 @@
+/*
+ *
+ * (C) COPYRIGHT 2016-2018 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+#include <linux/thermal.h>
+
+#include "mali_kbase_ipa_vinstr_common.h"
+#include "mali_kbase.h"
+#include "mali_kbase_ipa_debugfs.h"
+
+
+/* Performance counter blocks base offsets */
+#define JM_BASE             (0 * KBASE_IPA_NR_BYTES_PER_BLOCK)
+#define TILER_BASE          (1 * KBASE_IPA_NR_BYTES_PER_BLOCK)
+#define MEMSYS_BASE         (2 * KBASE_IPA_NR_BYTES_PER_BLOCK)
+#define SC0_BASE_ONE_MEMSYS (3 * KBASE_IPA_NR_BYTES_PER_BLOCK)
+#define SC0_BASE_TWO_MEMSYS (4 * KBASE_IPA_NR_BYTES_PER_BLOCK)
+
+/* JM counter block offsets */
+#define JM_GPU_ACTIVE (KBASE_IPA_NR_BYTES_PER_CNT *  6)
+
+/* Tiler counter block offsets */
+#define TILER_ACTIVE (KBASE_IPA_NR_BYTES_PER_CNT * 45)
+
+/* MEMSYS counter block offsets */
+#define MEMSYS_L2_ANY_LOOKUP (KBASE_IPA_NR_BYTES_PER_CNT * 25)
+
+/* SC counter block offsets */
+#define SC_FRAG_ACTIVE             (KBASE_IPA_NR_BYTES_PER_CNT *  4)
+#define SC_EXEC_CORE_ACTIVE        (KBASE_IPA_NR_BYTES_PER_CNT * 26)
+#define SC_EXEC_INSTR_COUNT        (KBASE_IPA_NR_BYTES_PER_CNT * 28)
+#define SC_TEX_COORD_ISSUE         (KBASE_IPA_NR_BYTES_PER_CNT * 40)
+#define SC_TEX_TFCH_NUM_OPERATIONS (KBASE_IPA_NR_BYTES_PER_CNT * 42)
+#define SC_VARY_INSTR              (KBASE_IPA_NR_BYTES_PER_CNT * 49)
+#define SC_VARY_SLOT_32            (KBASE_IPA_NR_BYTES_PER_CNT * 50)
+#define SC_VARY_SLOT_16            (KBASE_IPA_NR_BYTES_PER_CNT * 51)
+#define SC_BEATS_RD_LSC            (KBASE_IPA_NR_BYTES_PER_CNT * 56)
+#define SC_BEATS_WR_LSC            (KBASE_IPA_NR_BYTES_PER_CNT * 61)
+#define SC_BEATS_WR_TIB            (KBASE_IPA_NR_BYTES_PER_CNT * 62)
+
+/** Maximum number of cores for which a single Memory System block of performance counters is present. */
+#define KBASE_G7x_SINGLE_MEMSYS_MAX_NUM_CORES ((u8)4)
+
+
+/**
+ * get_jm_counter() - get performance counter offset inside the Job Manager block
+ * @model_data:            pointer to GPU model data.
+ * @counter_block_offset:  offset in bytes of the performance counter inside the Job Manager block.
+ *
+ * Return: Block offset in bytes of the required performance counter.
+ */
+static u32 kbase_g7x_power_model_get_jm_counter(struct kbase_ipa_model_vinstr_data *model_data,
+						u32 counter_block_offset)
+{
+	return JM_BASE + counter_block_offset;
+}
+
+/**
+ * get_memsys_counter() - get performance counter offset inside the Memory System block
+ * @model_data:            pointer to GPU model data.
+ * @counter_block_offset:  offset in bytes of the performance counter inside the (first) Memory System block.
+ *
+ * Return: Block offset in bytes of the required performance counter.
+ */
+static u32 kbase_g7x_power_model_get_memsys_counter(struct kbase_ipa_model_vinstr_data *model_data,
+						    u32 counter_block_offset)
+{
+	/* The base address of Memory System performance counters is always the same, although their number
+	 * may vary based on the number of cores. For the moment it's ok to return a constant.
+	 */
+	return MEMSYS_BASE + counter_block_offset;
+}
+
+/**
+ * get_sc_counter() - get performance counter offset inside the Shader Cores block
+ * @model_data:            pointer to GPU model data.
+ * @counter_block_offset:  offset in bytes of the performance counter inside the (first) Shader Cores block.
+ *
+ * Return: Block offset in bytes of the required performance counter.
+ */
+static u32 kbase_g7x_power_model_get_sc_counter(struct kbase_ipa_model_vinstr_data *model_data,
+						u32 counter_block_offset)
+{
+	const u32 sc_base = model_data->kbdev->gpu_props.num_cores <= KBASE_G7x_SINGLE_MEMSYS_MAX_NUM_CORES ?
+			    SC0_BASE_ONE_MEMSYS :
+			    SC0_BASE_TWO_MEMSYS;
+
+	return sc_base + counter_block_offset;
+}
+
+/**
+ * memsys_single_counter() - calculate energy for a single Memory System performance counter.
+ * @model_data:   pointer to GPU model data.
+ * @coeff:        default value of coefficient for IPA group.
+ * @offset:       offset in bytes of the counter inside the block it belongs to.
+ *
+ * Return: Energy estimation for a single Memory System performance counter.
+ */
+static s64 kbase_g7x_memsys_single_counter(
+		struct kbase_ipa_model_vinstr_data *model_data,
+		s32 coeff,
+		u32 offset)
+{
+	u32 counter;
+
+	counter = kbase_g7x_power_model_get_memsys_counter(model_data, offset);
+	return kbase_ipa_single_counter(model_data, coeff, counter);
+}
+
+/**
+ * sum_all_shader_cores() - calculate energy for a Shader Cores performance counter for all cores.
+ * @model_data:            pointer to GPU model data.
+ * @coeff:                 default value of coefficient for IPA group.
+ * @counter_block_offset:  offset in bytes of the counter inside the block it belongs to.
+ *
+ * Return: Energy estimation for a Shader Cores performance counter for all cores.
+ */
+static s64 kbase_g7x_sum_all_shader_cores(
+	struct kbase_ipa_model_vinstr_data *model_data,
+	s32 coeff,
+	u32 counter_block_offset)
+{
+	u32 counter;
+
+	counter = kbase_g7x_power_model_get_sc_counter(model_data,
+						       counter_block_offset);
+	return kbase_ipa_sum_all_shader_cores(model_data, coeff, counter);
+}
+
+/**
+ * jm_single_counter() - calculate energy for a single Job Manager performance counter.
+ * @model_data:            pointer to GPU model data.
+ * @coeff:                 default value of coefficient for IPA group.
+ * @counter_block_offset:  offset in bytes of the counter inside the block it belongs to.
+ *
+ * Return: Energy estimation for a single Job Manager performance counter.
+ */
+static s64 kbase_g7x_jm_single_counter(
+	struct kbase_ipa_model_vinstr_data *model_data,
+	s32 coeff,
+	u32 counter_block_offset)
+{
+	u32 counter;
+
+	counter = kbase_g7x_power_model_get_jm_counter(model_data,
+						     counter_block_offset);
+	return kbase_ipa_single_counter(model_data, coeff, counter);
+}
+
+/** Table of IPA group definitions.
+ *
+ * For each IPA group, this table defines a function to access the given performance block counter (or counters,
+ * if the operation needs to be iterated on multiple blocks) and calculate energy estimation.
+ */
+
+static const struct kbase_ipa_group ipa_groups_def_g71[] = {
+	{
+		.name = "l2_access",
+		.default_value = 526300,
+		.op = kbase_g7x_memsys_single_counter,
+		.counter_block_offset = MEMSYS_L2_ANY_LOOKUP,
+	},
+	{
+		.name = "exec_instr_count",
+		.default_value = 301100,
+		.op = kbase_g7x_sum_all_shader_cores,
+		.counter_block_offset = SC_EXEC_INSTR_COUNT,
+	},
+	{
+		.name = "tex_issue",
+		.default_value = 197400,
+		.op = kbase_g7x_sum_all_shader_cores,
+		.counter_block_offset = SC_TEX_COORD_ISSUE,
+	},
+	{
+		.name = "tile_wb",
+		.default_value = -156400,
+		.op = kbase_g7x_sum_all_shader_cores,
+		.counter_block_offset = SC_BEATS_WR_TIB,
+	},
+	{
+		.name = "gpu_active",
+		.default_value = 115800,
+		.op = kbase_g7x_jm_single_counter,
+		.counter_block_offset = JM_GPU_ACTIVE,
+	},
+};
+
+static const struct kbase_ipa_group ipa_groups_def_g72[] = {
+	{
+		.name = "l2_access",
+		.default_value = 393000,
+		.op = kbase_g7x_memsys_single_counter,
+		.counter_block_offset = MEMSYS_L2_ANY_LOOKUP,
+	},
+	{
+		.name = "exec_instr_count",
+		.default_value = 227000,
+		.op = kbase_g7x_sum_all_shader_cores,
+		.counter_block_offset = SC_EXEC_INSTR_COUNT,
+	},
+	{
+		.name = "tex_issue",
+		.default_value = 181900,
+		.op = kbase_g7x_sum_all_shader_cores,
+		.counter_block_offset = SC_TEX_COORD_ISSUE,
+	},
+	{
+		.name = "tile_wb",
+		.default_value = -120200,
+		.op = kbase_g7x_sum_all_shader_cores,
+		.counter_block_offset = SC_BEATS_WR_TIB,
+	},
+	{
+		.name = "gpu_active",
+		.default_value = 133100,
+		.op = kbase_g7x_jm_single_counter,
+		.counter_block_offset = JM_GPU_ACTIVE,
+	},
+};
+
+static const struct kbase_ipa_group ipa_groups_def_tnox[] = {
+	{
+		.name = "gpu_active",
+		.default_value = 122000,
+		.op = kbase_g7x_jm_single_counter,
+		.counter_block_offset = JM_GPU_ACTIVE,
+	},
+	{
+		.name = "exec_instr_count",
+		.default_value = 488900,
+		.op = kbase_g7x_sum_all_shader_cores,
+		.counter_block_offset = SC_EXEC_INSTR_COUNT,
+	},
+	{
+		.name = "vary_instr",
+		.default_value = 212100,
+		.op = kbase_g7x_sum_all_shader_cores,
+		.counter_block_offset = SC_VARY_INSTR,
+	},
+	{
+		.name = "tex_tfch_num_operations",
+		.default_value = 288000,
+		.op = kbase_g7x_sum_all_shader_cores,
+		.counter_block_offset = SC_TEX_TFCH_NUM_OPERATIONS,
+	},
+	{
+		.name = "l2_access",
+		.default_value = 378100,
+		.op = kbase_g7x_memsys_single_counter,
+		.counter_block_offset = MEMSYS_L2_ANY_LOOKUP,
+	},
+};
+
+#define STANDARD_POWER_MODEL(gpu) \
+	static int kbase_ ## gpu ## _power_model_init(\
+			struct kbase_ipa_model *model) \
+	{ \
+		BUILD_BUG_ON(ARRAY_SIZE(ipa_groups_def_ ## gpu) > \
+				KBASE_IPA_MAX_GROUP_DEF_NUM); \
+		return kbase_ipa_vinstr_common_model_init(model, \
+				ipa_groups_def_ ## gpu, \
+				ARRAY_SIZE(ipa_groups_def_ ## gpu)); \
+	} \
+	struct kbase_ipa_model_ops kbase_ ## gpu ## _ipa_model_ops = { \
+		.name = "mali-" #gpu "-power-model", \
+		.init = kbase_ ## gpu ## _power_model_init, \
+		.term = kbase_ipa_vinstr_common_model_term, \
+		.get_dynamic_coeff = kbase_ipa_vinstr_dynamic_coeff, \
+		.do_utilization_scaling_in_framework = false \
+	}; \
+	KBASE_EXPORT_TEST_API(kbase_ ## gpu ## _ipa_model_ops)
+
+STANDARD_POWER_MODEL(g71);
+STANDARD_POWER_MODEL(g72);
+STANDARD_POWER_MODEL(tnox);
diff --git a/drivers/gpu/arm/bifrost/mali_base_hwconfig_features.h b/drivers/gpu/arm/bifrost/mali_base_hwconfig_features.h
index e0eebd872eb9..10da0c58e9eb 100644
--- a/drivers/gpu/arm/bifrost/mali_base_hwconfig_features.h
+++ b/drivers/gpu/arm/bifrost/mali_base_hwconfig_features.h
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2015-2017 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2014-2018 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -31,7 +31,6 @@
 enum base_hw_feature {
 	BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION,
 	BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS,
-	BASE_HW_FEATURE_33BIT_VA,
 	BASE_HW_FEATURE_XAFFINITY,
 	BASE_HW_FEATURE_OUT_OF_ORDER_EXEC,
 	BASE_HW_FEATURE_MRT,
@@ -85,7 +84,6 @@ static const enum base_hw_feature base_hw_features_t62x[] = {
 };
 
 static const enum base_hw_feature base_hw_features_t72x[] = {
-	BASE_HW_FEATURE_33BIT_VA,
 	BASE_HW_FEATURE_32_BIT_UNIFORM_ADDRESS,
 	BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL,
 	BASE_HW_FEATURE_INTERPIPE_REG_ALIASING,
@@ -139,7 +137,6 @@ static const enum base_hw_feature base_hw_features_tFxx[] = {
 };
 
 static const enum base_hw_feature base_hw_features_t83x[] = {
-	BASE_HW_FEATURE_33BIT_VA,
 	BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION,
 	BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS,
 	BASE_HW_FEATURE_XAFFINITY,
@@ -162,7 +159,6 @@ static const enum base_hw_feature base_hw_features_t83x[] = {
 };
 
 static const enum base_hw_feature base_hw_features_t82x[] = {
-	BASE_HW_FEATURE_33BIT_VA,
 	BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION,
 	BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS,
 	BASE_HW_FEATURE_XAFFINITY,
@@ -238,7 +234,6 @@ static const enum base_hw_feature base_hw_features_tHEx[] = {
 };
 
 static const enum base_hw_feature base_hw_features_tSIx[] = {
-	BASE_HW_FEATURE_33BIT_VA,
 	BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION,
 	BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS,
 	BASE_HW_FEATURE_XAFFINITY,
@@ -266,7 +261,6 @@ static const enum base_hw_feature base_hw_features_tSIx[] = {
 };
 
 static const enum base_hw_feature base_hw_features_tDVx[] = {
-	BASE_HW_FEATURE_33BIT_VA,
 	BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION,
 	BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS,
 	BASE_HW_FEATURE_XAFFINITY,
diff --git a/drivers/gpu/arm/bifrost/mali_base_hwconfig_issues.h b/drivers/gpu/arm/bifrost/mali_base_hwconfig_issues.h
index 7b70e7a82b6f..d3cfdbee5921 100644
--- a/drivers/gpu/arm/bifrost/mali_base_hwconfig_issues.h
+++ b/drivers/gpu/arm/bifrost/mali_base_hwconfig_issues.h
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2015-2017 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2014-2018 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -1141,6 +1141,13 @@ static const enum base_hw_issue base_hw_issues_tGOx_r0p0[] = {
 	BASE_HW_ISSUE_END
 };
 
+static const enum base_hw_issue base_hw_issues_tGOx_r1p0[] = {
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_TMIX_8133,
+	BASE_HW_ISSUE_TSIX_1116,
+	BASE_HW_ISSUE_END
+};
+
 static const enum base_hw_issue base_hw_issues_model_tGOx[] = {
 	BASE_HW_ISSUE_5736,
 	BASE_HW_ISSUE_9435,
diff --git a/drivers/gpu/arm/bifrost/mali_base_kernel.h b/drivers/gpu/arm/bifrost/mali_base_kernel.h
index e6b568fba520..f0ccd789e81c 100644
--- a/drivers/gpu/arm/bifrost/mali_base_kernel.h
+++ b/drivers/gpu/arm/bifrost/mali_base_kernel.h
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2010-2017 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2010-2018 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -317,8 +317,10 @@ struct base_mem_import_user_buffer {
  *
  * This is the same as the maximum limit for a Buffer Descriptor's chunk size
  */
+#define BASE_MEM_TILER_ALIGN_TOP_EXTENT_MAX_PAGES_LOG2 \
+		(21u - (LOCAL_PAGE_SHIFT))
 #define BASE_MEM_TILER_ALIGN_TOP_EXTENT_MAX_PAGES \
-		((2ull * 1024ull * 1024ull) >> (LOCAL_PAGE_SHIFT))
+		(1ull << (BASE_MEM_TILER_ALIGN_TOP_EXTENT_MAX_PAGES_LOG2))
 
 /* Bit mask of cookies used for for memory allocation setup */
 #define KBASE_COOKIE_MASK  ~1UL /* bit 0 is reserved */
@@ -433,6 +435,13 @@ struct base_mem_aliasing_info {
 	u64 length;
 };
 
+/**
+ * Similar to BASE_MEM_TILER_ALIGN_TOP, memory starting from the end of the
+ * initial commit is aligned to 'extent' pages, where 'extent' must be a power
+ * of 2 and no more than BASE_MEM_TILER_ALIGN_TOP_EXTENT_MAX_PAGES
+ */
+#define BASE_JIT_ALLOC_MEM_TILER_ALIGN_TOP  (1 << 0)
+
 /**
  * struct base_jit_alloc_info - Structure which describes a JIT allocation
  *                              request.
@@ -446,6 +455,18 @@ struct base_mem_aliasing_info {
  * @id:                         Unique ID provided by the caller, this is used
  *                              to pair allocation and free requests.
  *                              Zero is not a valid value.
+ * @bin_id:                     The JIT allocation bin, used in conjunction with
+ *                              @max_allocations to limit the number of each
+ *                              type of JIT allocation.
+ * @max_allocations:            The maximum number of allocations allowed within
+ *                              the bin specified by @bin_id. Should be the same
+ *                              for all JIT allocations within the same bin.
+ * @flags:                      flags specifying the special requirements for
+ *                              the JIT allocation.
+ * @padding:                    Expansion space - should be initialised to zero
+ * @usage_id:                   A hint about which allocation should be reused.
+ *                              The kernel should attempt to use a previous
+ *                              allocation with the same usage_id
  */
 struct base_jit_alloc_info {
 	u64 gpu_alloc_addr;
@@ -453,6 +474,11 @@ struct base_jit_alloc_info {
 	u64 commit_pages;
 	u64 extent;
 	u8 id;
+	u8 bin_id;
+	u8 max_allocations;
+	u8 flags;
+	u8 padding[2];
+	u16 usage_id;
 };
 
 /**
@@ -1412,6 +1438,11 @@ struct mali_base_gpu_core_props {
 	 * client will not be expecting to allocate anywhere near this value.
 	 */
 	u64 gpu_available_memory_size;
+
+	/**
+	 * The number of execution engines.
+	 */
+	u8 num_exec_engines;
 };
 
 /**
@@ -1442,7 +1473,10 @@ struct mali_base_gpu_thread_props {
 	u8  max_task_queue;         /* Max. tasks [1..255] which may be sent to a core before it becomes blocked. */
 	u8  max_thread_group_split; /* Max. allowed value [1..15] of the Thread Group Split field. */
 	u8  impl_tech;              /* 0 = Not specified, 1 = Silicon, 2 = FPGA, 3 = SW Model/Emulation */
-	u8  padding[7];
+	u8  padding[3];
+	u32 tls_alloc;              /* Number of threads per core that TLS must
+				     * be allocated for
+				     */
 };
 
 /**
@@ -1524,7 +1558,7 @@ struct gpu_raw_gpu_props {
 	u64 stack_present;
 
 	u32 l2_features;
-	u32 suspend_size; /* API 8.2+ */
+	u32 core_features;
 	u32 mem_features;
 	u32 mmu_features;
 
@@ -1547,6 +1581,8 @@ struct gpu_raw_gpu_props {
 	 * available modes as exposed in the coherency_features register.
 	 */
 	u32 coherency_mode;
+
+	u32 thread_tls_alloc;
 };
 
 /**
@@ -1582,36 +1618,40 @@ typedef struct base_gpu_props {
  */
 
 /**
- * \enum base_context_create_flags
- *
  * Flags to pass to ::base_context_init.
  * Flags can be ORed together to enable multiple things.
  *
  * These share the same space as BASEP_CONTEXT_FLAG_*, and so must
  * not collide with them.
  */
-enum base_context_create_flags {
-	/** No flags set */
-	BASE_CONTEXT_CREATE_FLAG_NONE = 0,
+typedef u32 base_context_create_flags;
 
-	/** Base context is embedded in a cctx object (flag used for CINSTR software counter macros) */
-	BASE_CONTEXT_CCTX_EMBEDDED = (1u << 0),
+/** No flags set */
+#define BASE_CONTEXT_CREATE_FLAG_NONE ((base_context_create_flags)0)
 
-	/** Base context is a 'System Monitor' context for Hardware counters.
-	 *
-	 * One important side effect of this is that job submission is disabled. */
-	BASE_CONTEXT_SYSTEM_MONITOR_SUBMIT_DISABLED = (1u << 1)
-};
+/** Base context is embedded in a cctx object (flag used for CINSTR
+ * software counter macros)
+ */
+#define BASE_CONTEXT_CCTX_EMBEDDED ((base_context_create_flags)1 << 0)
+
+/** Base context is a 'System Monitor' context for Hardware counters.
+ *
+ * One important side effect of this is that job submission is disabled.
+ */
+#define BASE_CONTEXT_SYSTEM_MONITOR_SUBMIT_DISABLED \
+	((base_context_create_flags)1 << 1)
 
 /**
- * Bitpattern describing the ::base_context_create_flags that can be passed to base_context_init()
+ * Bitpattern describing the ::base_context_create_flags that can be
+ * passed to base_context_init()
  */
 #define BASE_CONTEXT_CREATE_ALLOWED_FLAGS \
 	(((u32)BASE_CONTEXT_CCTX_EMBEDDED) | \
 	  ((u32)BASE_CONTEXT_SYSTEM_MONITOR_SUBMIT_DISABLED))
 
 /**
- * Bitpattern describing the ::base_context_create_flags that can be passed to the kernel
+ * Bitpattern describing the ::base_context_create_flags that can be
+ * passed to the kernel
  */
 #define BASE_CONTEXT_CREATE_KERNEL_FLAGS \
 	((u32)BASE_CONTEXT_SYSTEM_MONITOR_SUBMIT_DISABLED)
diff --git a/drivers/gpu/arm/bifrost/mali_kbase.h b/drivers/gpu/arm/bifrost/mali_kbase.h
index f9cc148c9969..35ce1b9e06a0 100644
--- a/drivers/gpu/arm/bifrost/mali_kbase.h
+++ b/drivers/gpu/arm/bifrost/mali_kbase.h
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2010-2017 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2010-2018 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -61,8 +61,8 @@
 #include "mali_kbase_mem_lowlevel.h"
 #include "mali_kbase_trace_timeline.h"
 #include "mali_kbase_js.h"
-#include "mali_kbase_mem.h"
 #include "mali_kbase_utility.h"
+#include "mali_kbase_mem.h"
 #include "mali_kbase_gpu_memory_debugfs.h"
 #include "mali_kbase_mem_profile_debugfs.h"
 #include "mali_kbase_debug_job_fault.h"
@@ -111,10 +111,6 @@ void kbase_release_device(struct kbase_device *kbdev);
 
 void kbase_set_profiling_control(struct kbase_device *kbdev, u32 control, u32 value);
 
-struct kbase_context *
-kbase_create_context(struct kbase_device *kbdev, bool is_compat);
-void kbase_destroy_context(struct kbase_context *kctx);
-
 
 /**
  * kbase_get_unmapped_area() - get an address range which is currently
@@ -258,7 +254,7 @@ int kbase_soft_event_update(struct kbase_context *kctx,
 
 bool kbase_replay_process(struct kbase_jd_atom *katom);
 
-void kbasep_soft_job_timeout_worker(unsigned long data);
+void kbasep_soft_job_timeout_worker(struct timer_list *timer);
 void kbasep_complete_triggered_soft_events(struct kbase_context *kctx, u64 evt);
 
 /* api used internally for register access. Contains validation and tracing */
diff --git a/drivers/gpu/arm/bifrost/mali_kbase_10969_workaround.c b/drivers/gpu/arm/bifrost/mali_kbase_10969_workaround.c
index e0e40a9292e8..f3e71d1a40d0 100644
--- a/drivers/gpu/arm/bifrost/mali_kbase_10969_workaround.c
+++ b/drivers/gpu/arm/bifrost/mali_kbase_10969_workaround.c
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2013-2015,2017 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2013-2015,2017-2018 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -23,11 +23,6 @@
 #include <mali_kbase.h>
 #include <mali_kbase_10969_workaround.h>
 
-/* This function is used to solve an HW issue with single iterator GPUs.
- * If a fragment job is soft-stopped on the edge of its bounding box, can happen that the
- * restart index is out of bounds and the rerun causes a tile range fault. If this happens
- * we try to clamp the restart index to a correct value and rerun the job.
- */
 /* Mask of X and Y coordinates for the coordinates words in the descriptors*/
 #define X_COORDINATE_MASK 0x00000FFF
 #define Y_COORDINATE_MASK 0x0FFF0000
diff --git a/drivers/gpu/arm/bifrost/mali_kbase_10969_workaround.h b/drivers/gpu/arm/bifrost/mali_kbase_10969_workaround.h
index 624dc4a86b52..379a05a1a128 100644
--- a/drivers/gpu/arm/bifrost/mali_kbase_10969_workaround.h
+++ b/drivers/gpu/arm/bifrost/mali_kbase_10969_workaround.h
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2013-2014 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2013-2014, 2018 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -23,6 +23,15 @@
 #ifndef _KBASE_10969_WORKAROUND_
 #define _KBASE_10969_WORKAROUND_
 
+/**
+ * kbasep_10969_workaround_clamp_coordinates - Apply the WA to clamp the restart indices
+ * @katom: atom representing the fragment job for which the WA has to be applied
+ *
+ * This workaround is used to solve an HW issue with single iterator GPUs.
+ * If a fragment job is soft-stopped on the edge of its bounding box, it can happen
+ * that the restart index is out of bounds and the rerun causes a tile range
+ * fault. If this happens we try to clamp the restart index to a correct value.
+ */
 int kbasep_10969_workaround_clamp_coordinates(struct kbase_jd_atom *katom);
 
 #endif /* _KBASE_10969_WORKAROUND_ */
diff --git a/drivers/gpu/arm/bifrost/mali_kbase_as_fault_debugfs.c b/drivers/gpu/arm/bifrost/mali_kbase_as_fault_debugfs.c
index f57eb7c25492..44aa237a7497 100644
--- a/drivers/gpu/arm/bifrost/mali_kbase_as_fault_debugfs.c
+++ b/drivers/gpu/arm/bifrost/mali_kbase_as_fault_debugfs.c
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2016 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2016-2017 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -41,13 +41,14 @@ static int kbase_as_fault_read(struct seq_file *sfile, void *data)
 	list_for_each(entry, kbdev_list) {
 		kbdev = list_entry(entry, struct kbase_device, entry);
 
-		if(kbdev->debugfs_as_read_bitmap & (1ULL << as_no)) {
+		if (kbdev->debugfs_as_read_bitmap & (1ULL << as_no)) {
 
 			/* don't show this one again until another fault occors */
 			kbdev->debugfs_as_read_bitmap &= ~(1ULL << as_no);
 
 			/* output the last page fault addr */
-			seq_printf(sfile, "%llu\n", (u64) kbdev->as[as_no].fault_addr);
+			seq_printf(sfile, "%llu\n",
+				   (u64) kbdev->as[as_no].fault_addr);
 		}
 
 	}
@@ -59,7 +60,7 @@ static int kbase_as_fault_read(struct seq_file *sfile, void *data)
 
 static int kbase_as_fault_debugfs_open(struct inode *in, struct file *file)
 {
-	return single_open(file, kbase_as_fault_read , in->i_private);
+	return single_open(file, kbase_as_fault_read, in->i_private);
 }
 
 static const struct file_operations as_fault_fops = {
@@ -89,17 +90,20 @@ void kbase_as_fault_debugfs_init(struct kbase_device *kbdev)
 	KBASE_DEBUG_ASSERT(sizeof(kbdev->as[0].fault_addr) == sizeof(u64));
 
 	debugfs_directory = debugfs_create_dir("address_spaces",
-		kbdev->mali_debugfs_directory);
+					       kbdev->mali_debugfs_directory);
 
-	if(debugfs_directory) {
-		for(i = 0; i < kbdev->nr_hw_address_spaces; i++) {
+	if (debugfs_directory) {
+		for (i = 0; i < kbdev->nr_hw_address_spaces; i++) {
 			snprintf(as_name, ARRAY_SIZE(as_name), "as%u", i);
 			debugfs_create_file(as_name, S_IRUGO,
-				debugfs_directory, (void*) ((uintptr_t) i), &as_fault_fops);
+					    debugfs_directory,
+					    (void *)(uintptr_t)i,
+					    &as_fault_fops);
 		}
+	} else {
+		dev_warn(kbdev->dev,
+			 "unable to create address_spaces debugfs directory");
 	}
-	else
-		dev_warn(kbdev->dev, "unable to create address_spaces debugfs directory");
 
 #endif /* CONFIG_MALI_BIFROST_DEBUG */
 #endif /* CONFIG_DEBUG_FS */
diff --git a/drivers/gpu/arm/bifrost/mali_kbase_config_defaults.h b/drivers/gpu/arm/bifrost/mali_kbase_config_defaults.h
index dbb4f977bb33..a95736c54a29 100644
--- a/drivers/gpu/arm/bifrost/mali_kbase_config_defaults.h
+++ b/drivers/gpu/arm/bifrost/mali_kbase_config_defaults.h
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2013-2017 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2013-2018 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -156,12 +156,12 @@ enum {
  */
 #define DEFAULT_UMP_GPU_DEVICE_SHIFT UMP_DEVICE_Z_SHIFT
 
-/*
+/**
  * Default period for DVFS sampling
  */
 #define DEFAULT_PM_DVFS_PERIOD 100 /* 100ms */
 
-/*
+/**
  * Power Management poweroff tick granuality. This is in nanoseconds to
  * allow HR timer support.
  *
@@ -171,22 +171,22 @@ enum {
  */
 #define DEFAULT_PM_GPU_POWEROFF_TICK_NS (400000) /* 400us */
 
-/*
+/**
  * Power Manager number of ticks before shader cores are powered off
  */
 #define DEFAULT_PM_POWEROFF_TICK_SHADER (2) /* 400-800us */
 
-/*
+/**
  * Power Manager number of ticks before GPU is powered off
  */
 #define DEFAULT_PM_POWEROFF_TICK_GPU (2) /* 400-800us */
 
-/*
+/**
  * Default scheduling tick granuality
  */
 #define DEFAULT_JS_SCHEDULING_PERIOD_NS    (100000000u) /* 100ms */
 
-/*
+/**
  * Default minimum number of scheduling ticks before jobs are soft-stopped.
  *
  * This defines the time-slice for a job (which may be different from that of a
@@ -194,60 +194,60 @@ enum {
  */
 #define DEFAULT_JS_SOFT_STOP_TICKS       (1) /* 100ms-200ms */
 
-/*
+/**
  * Default minimum number of scheduling ticks before CL jobs are soft-stopped.
  */
 #define DEFAULT_JS_SOFT_STOP_TICKS_CL    (1) /* 100ms-200ms */
 
-/*
+/**
  * Default minimum number of scheduling ticks before jobs are hard-stopped
  */
 #define DEFAULT_JS_HARD_STOP_TICKS_SS    (50) /* 5s */
 #define DEFAULT_JS_HARD_STOP_TICKS_SS_8408  (300) /* 30s */
 
-/*
+/**
  * Default minimum number of scheduling ticks before CL jobs are hard-stopped.
  */
 #define DEFAULT_JS_HARD_STOP_TICKS_CL    (50) /* 5s */
 
-/*
+/**
  * Default minimum number of scheduling ticks before jobs are hard-stopped
  * during dumping
  */
 #define DEFAULT_JS_HARD_STOP_TICKS_DUMPING   (15000) /* 1500s */
 
-/*
+/**
  * Default timeout for some software jobs, after which the software event wait
  * jobs will be cancelled.
  */
 #define DEFAULT_JS_SOFT_JOB_TIMEOUT (3000) /* 3s */
 
-/*
+/**
  * Default minimum number of scheduling ticks before the GPU is reset to clear a
  * "stuck" job
  */
 #define DEFAULT_JS_RESET_TICKS_SS           (55) /* 5.5s */
 #define DEFAULT_JS_RESET_TICKS_SS_8408     (450) /* 45s */
 
-/*
+/**
  * Default minimum number of scheduling ticks before the GPU is reset to clear a
  * "stuck" CL job.
  */
 #define DEFAULT_JS_RESET_TICKS_CL        (55) /* 5.5s */
 
-/*
+/**
  * Default minimum number of scheduling ticks before the GPU is reset to clear a
  * "stuck" job during dumping.
  */
 #define DEFAULT_JS_RESET_TICKS_DUMPING   (15020) /* 1502s */
 
-/*
+/**
  * Default number of milliseconds given for other jobs on the GPU to be
  * soft-stopped when the GPU needs to be reset.
  */
 #define DEFAULT_RESET_TIMEOUT_MS (3000) /* 3s */
 
-/*
+/**
  * Default timeslice that a context is scheduled in for, in nanoseconds.
  *
  * When a context has used up this amount of time across its jobs, it is
@@ -258,7 +258,7 @@ enum {
  */
 #define DEFAULT_JS_CTX_TIMESLICE_NS (50000000) /* 50ms */
 
-/*
+/**
  * Perform GPU power down using only platform specific code, skipping DDK power
  * management.
  *
@@ -272,7 +272,7 @@ enum {
  */
 #define PLATFORM_POWER_DOWN_ONLY (0)
 
-/*
+/**
  * Maximum frequency (in kHz) that the GPU can be clocked. For some platforms
  * this isn't available, so we simply define a dummy value here. If devfreq
  * is enabled the value will be read from there, otherwise this should be
diff --git a/drivers/gpu/arm/bifrost/mali_kbase_context.c b/drivers/gpu/arm/bifrost/mali_kbase_context.c
index d873f9feea30..e9de7239b1e1 100644
--- a/drivers/gpu/arm/bifrost/mali_kbase_context.c
+++ b/drivers/gpu/arm/bifrost/mali_kbase_context.c
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2010-2017 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2010-2018 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -32,15 +32,6 @@
 #include <mali_kbase_dma_fence.h>
 #include <mali_kbase_ctx_sched.h>
 
-/**
- * kbase_create_context() - Create a kernel base context.
- * @kbdev: Kbase device
- * @is_compat: Force creation of a 32-bit context
- *
- * Allocate and init a kernel base context.
- *
- * Return: new kbase context
- */
 struct kbase_context *
 kbase_create_context(struct kbase_device *kbdev, bool is_compat)
 {
@@ -175,9 +166,8 @@ kbase_create_context(struct kbase_device *kbdev, bool is_compat)
 
 	mutex_init(&kctx->vinstr_cli_lock);
 
-	setup_timer(&kctx->soft_job_timeout,
-		    kbasep_soft_job_timeout_worker,
-		    (uintptr_t)kctx);
+	kbase_timer_setup(&kctx->soft_job_timeout,
+			  kbasep_soft_job_timeout_worker);
 
 	return kctx;
 
@@ -225,13 +215,6 @@ static void kbase_reg_pending_dtor(struct kbase_va_region *reg)
 	kfree(reg);
 }
 
-/**
- * kbase_destroy_context - Destroy a kernel base context.
- * @kctx: Context to destroy
- *
- * Calls kbase_destroy_os_context() to free OS specific structures.
- * Will release all outstanding regions.
- */
 void kbase_destroy_context(struct kbase_context *kctx)
 {
 	struct kbase_device *kbdev;
@@ -252,6 +235,8 @@ void kbase_destroy_context(struct kbase_context *kctx)
 	 * thread. */
 	kbase_pm_context_active(kbdev);
 
+	kbase_mem_pool_mark_dying(&kctx->mem_pool);
+
 	kbase_jd_zap_context(kctx);
 
 #ifdef CONFIG_DEBUG_FS
@@ -328,13 +313,6 @@ void kbase_destroy_context(struct kbase_context *kctx)
 }
 KBASE_EXPORT_SYMBOL(kbase_destroy_context);
 
-/**
- * kbase_context_set_create_flags - Set creation flags on a context
- * @kctx: Kbase context
- * @flags: Flags to set
- *
- * Return: 0 on success
- */
 int kbase_context_set_create_flags(struct kbase_context *kctx, u32 flags)
 {
 	int err = 0;
diff --git a/drivers/gpu/arm/bifrost/mali_kbase_context.h b/drivers/gpu/arm/bifrost/mali_kbase_context.h
index 431f9e5aa6de..30b0f649806b 100644
--- a/drivers/gpu/arm/bifrost/mali_kbase_context.h
+++ b/drivers/gpu/arm/bifrost/mali_kbase_context.h
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2011-2016 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2011-2016, 2018 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -25,7 +25,35 @@
 
 #include <linux/atomic.h>
 
+/**
+ * kbase_create_context() - Create a kernel base context.
+ * @kbdev: Kbase device
+ * @is_compat: Force creation of a 32-bit context
+ *
+ * Allocate and init a kernel base context.
+ *
+ * Return: new kbase context
+ */
+struct kbase_context *
+kbase_create_context(struct kbase_device *kbdev, bool is_compat);
 
+/**
+ * kbase_destroy_context - Destroy a kernel base context.
+ * @kctx: Context to destroy
+ *
+ * Calls kbase_destroy_os_context() to free OS specific structures.
+ * Will release all outstanding regions.
+ */
+void kbase_destroy_context(struct kbase_context *kctx);
+
+/**
+ * kbase_context_set_create_flags - Set creation flags on a context
+ * @kctx: Kbase context
+ * @flags: Flags to set, which shall be one of the flags of
+ *         BASE_CONTEXT_CREATE_KERNEL_FLAGS.
+ *
+ * Return: 0 on success, -EINVAL otherwise when an invalid flag is specified.
+ */
 int kbase_context_set_create_flags(struct kbase_context *kctx, u32 flags);
 
 /**
diff --git a/drivers/gpu/arm/bifrost/mali_kbase_core_linux.c b/drivers/gpu/arm/bifrost/mali_kbase_core_linux.c
index a8a4c98669dc..21fd399f58da 100644
--- a/drivers/gpu/arm/bifrost/mali_kbase_core_linux.c
+++ b/drivers/gpu/arm/bifrost/mali_kbase_core_linux.c
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2010-2017 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2010-2018 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -34,6 +34,7 @@
 #endif /* CONFIG_MALI_BIFROST_DEVFREQ */
 #ifdef CONFIG_MALI_BIFROST_NO_MALI
 #include "mali_kbase_model_linux.h"
+#include <backend/gpu/mali_kbase_model_dummy.h>
 #endif /* CONFIG_MALI_BIFROST_NO_MALI */
 #include "mali_kbase_mem_profile_debugfs_buf_size.h"
 #include "mali_kbase_debug_mem_view.h"
@@ -82,7 +83,7 @@
 #include <mali_kbase_config.h>
 
 
-#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 13, 0))
+#if (KERNEL_VERSION(3, 13, 0) <= LINUX_VERSION_CODE)
 #include <linux/pm_opp.h>
 #include <soc/rockchip/rockchip_opp_select.h>
 #else
@@ -106,19 +107,20 @@ static LIST_HEAD(kbase_dev_list);
 #define KERNEL_SIDE_DDK_VERSION_STRING "K:" MALI_RELEASE_NAME "(GPL)"
 
 static int kbase_api_handshake(struct kbase_context *kctx,
-		struct kbase_ioctl_version_check *version)
+			       struct kbase_ioctl_version_check *version)
 {
 	switch (version->major) {
 	case BASE_UK_VERSION_MAJOR:
 		/* set minor to be the lowest common */
 		version->minor = min_t(int, BASE_UK_VERSION_MINOR,
-				(int)version->minor);
+				       (int)version->minor);
 		break;
 	default:
 		/* We return our actual version regardless if it
 		 * matches the version returned by userspace -
 		 * userspace can bail if it can't handle this
-		 * version */
+		 * version
+		 */
 		version->major = BASE_UK_VERSION_MAJOR;
 		version->minor = BASE_UK_VERSION_MINOR;
 		break;
@@ -494,11 +496,11 @@ static int kbase_release(struct inode *inode, struct file *filp)
 	/* If this client was performing hwcnt dumping and did not explicitly
 	 * detach itself, remove it from the vinstr core now */
 	if (kctx->vinstr_cli) {
-		struct kbase_uk_hwcnt_setup setup;
+		struct kbase_ioctl_hwcnt_enable enable;
 
-		setup.dump_buffer = 0llu;
+		enable.dump_buffer = 0llu;
 		kbase_vinstr_legacy_hwc_setup(
-				kbdev->vinstr_ctx, &kctx->vinstr_cli, &setup);
+				kbdev->vinstr_ctx, &kctx->vinstr_cli, &enable);
 	}
 	mutex_unlock(&kctx->vinstr_cli_lock);
 
@@ -611,38 +613,22 @@ static int kbase_api_hwcnt_reader_setup(struct kbase_context *kctx,
 		struct kbase_ioctl_hwcnt_reader_setup *setup)
 {
 	int ret;
-	struct kbase_uk_hwcnt_reader_setup args = {
-		.buffer_count = setup->buffer_count,
-		.jm_bm = setup->jm_bm,
-		.shader_bm = setup->shader_bm,
-		.tiler_bm = setup->tiler_bm,
-		.mmu_l2_bm = setup->mmu_l2_bm
-	};
 
 	mutex_lock(&kctx->vinstr_cli_lock);
-	ret = kbase_vinstr_hwcnt_reader_setup(kctx->kbdev->vinstr_ctx, &args);
+	ret = kbase_vinstr_hwcnt_reader_setup(kctx->kbdev->vinstr_ctx, setup);
 	mutex_unlock(&kctx->vinstr_cli_lock);
 
-	if (ret)
-		return ret;
-	return args.fd;
+	return ret;
 }
 
 static int kbase_api_hwcnt_enable(struct kbase_context *kctx,
 		struct kbase_ioctl_hwcnt_enable *enable)
 {
 	int ret;
-	struct kbase_uk_hwcnt_setup args = {
-		.dump_buffer = enable->dump_buffer,
-		.jm_bm = enable->jm_bm,
-		.shader_bm = enable->shader_bm,
-		.tiler_bm = enable->tiler_bm,
-		.mmu_l2_bm = enable->mmu_l2_bm
-	};
 
 	mutex_lock(&kctx->vinstr_cli_lock);
 	ret = kbase_vinstr_legacy_hwc_setup(kctx->kbdev->vinstr_ctx,
-			&kctx->vinstr_cli, &args);
+			&kctx->vinstr_cli, enable);
 	mutex_unlock(&kctx->vinstr_cli_lock);
 
 	return ret;
@@ -671,6 +657,18 @@ static int kbase_api_hwcnt_clear(struct kbase_context *kctx)
 	return ret;
 }
 
+#ifdef CONFIG_MALI_BIFROST_NO_MALI
+static int kbase_api_hwcnt_set(struct kbase_context *kctx,
+		struct kbase_ioctl_hwcnt_values *values)
+{
+	gpu_model_set_dummy_prfcnt_sample(
+			(u32 __user *)(uintptr_t)values->data,
+			values->size);
+
+	return 0;
+}
+#endif
+
 static int kbase_api_disjoint_query(struct kbase_context *kctx,
 		struct kbase_ioctl_disjoint_query *query)
 {
@@ -701,10 +699,37 @@ static int kbase_api_get_ddk_version(struct kbase_context *kctx,
 	return len;
 }
 
+/* Defaults for legacy JIT init ioctl */
+#define DEFAULT_MAX_JIT_ALLOCATIONS 255
+#define JIT_LEGACY_TRIM_LEVEL (0) /* No trimming */
+
+static int kbase_api_mem_jit_init_old(struct kbase_context *kctx,
+		struct kbase_ioctl_mem_jit_init_old *jit_init)
+{
+	kctx->jit_version = 1;
+
+	return kbase_region_tracker_init_jit(kctx, jit_init->va_pages,
+			DEFAULT_MAX_JIT_ALLOCATIONS,
+			JIT_LEGACY_TRIM_LEVEL);
+}
+
 static int kbase_api_mem_jit_init(struct kbase_context *kctx,
 		struct kbase_ioctl_mem_jit_init *jit_init)
 {
-	return kbase_region_tracker_init_jit(kctx, jit_init->va_pages);
+	int i;
+
+	kctx->jit_version = 2;
+
+	for (i = 0; i < sizeof(jit_init->padding); i++) {
+		/* Ensure all padding bytes are 0 for potential future
+		 * extension
+		 */
+		if (jit_init->padding[i])
+			return -EINVAL;
+	}
+
+	return kbase_region_tracker_init_jit(kctx, jit_init->va_pages,
+			jit_init->max_allocations, jit_init->trim_level);
 }
 
 static int kbase_api_mem_sync(struct kbase_context *kctx,
@@ -1012,14 +1037,12 @@ static int kbase_api_tlstream_stats(struct kbase_context *kctx,
 #endif /* MALI_UNIT_TEST */
 
 #define KBASE_HANDLE_IOCTL(cmd, function)                          \
-	case cmd:                                                  \
 	do {                                                       \
 		BUILD_BUG_ON(_IOC_DIR(cmd) != _IOC_NONE);          \
 		return function(kctx);                             \
 	} while (0)
 
 #define KBASE_HANDLE_IOCTL_IN(cmd, function, type)                 \
-	case cmd:                                                  \
 	do {                                                       \
 		type param;                                        \
 		int err;                                           \
@@ -1032,7 +1055,6 @@ static int kbase_api_tlstream_stats(struct kbase_context *kctx,
 	} while (0)
 
 #define KBASE_HANDLE_IOCTL_OUT(cmd, function, type)                \
-	case cmd:                                                  \
 	do {                                                       \
 		type param;                                        \
 		int ret, err;                                      \
@@ -1046,7 +1068,6 @@ static int kbase_api_tlstream_stats(struct kbase_context *kctx,
 	} while (0)
 
 #define KBASE_HANDLE_IOCTL_INOUT(cmd, function, type)                  \
-	case cmd:                                                      \
 	do {                                                           \
 		type param;                                            \
 		int ret, err;                                          \
@@ -1070,12 +1091,17 @@ static long kbase_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
 
 	/* Only these ioctls are available until setup is complete */
 	switch (cmd) {
+	case KBASE_IOCTL_VERSION_CHECK:
 		KBASE_HANDLE_IOCTL_INOUT(KBASE_IOCTL_VERSION_CHECK,
 				kbase_api_handshake,
 				struct kbase_ioctl_version_check);
+		break;
+
+	case KBASE_IOCTL_SET_FLAGS:
 		KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_SET_FLAGS,
 				kbase_api_set_flags,
 				struct kbase_ioctl_set_flags);
+		break;
 	}
 
 	/* Block call until version handshake and setup is complete */
@@ -1084,109 +1110,192 @@ static long kbase_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
 
 	/* Normal ioctls */
 	switch (cmd) {
+	case KBASE_IOCTL_JOB_SUBMIT:
 		KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_JOB_SUBMIT,
 				kbase_api_job_submit,
 				struct kbase_ioctl_job_submit);
+		break;
+	case KBASE_IOCTL_GET_GPUPROPS:
 		KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_GET_GPUPROPS,
 				kbase_api_get_gpuprops,
 				struct kbase_ioctl_get_gpuprops);
+		break;
+	case KBASE_IOCTL_POST_TERM:
 		KBASE_HANDLE_IOCTL(KBASE_IOCTL_POST_TERM,
 				kbase_api_post_term);
+		break;
+	case KBASE_IOCTL_MEM_ALLOC:
 		KBASE_HANDLE_IOCTL_INOUT(KBASE_IOCTL_MEM_ALLOC,
 				kbase_api_mem_alloc,
 				union kbase_ioctl_mem_alloc);
+		break;
+	case KBASE_IOCTL_MEM_QUERY:
 		KBASE_HANDLE_IOCTL_INOUT(KBASE_IOCTL_MEM_QUERY,
 				kbase_api_mem_query,
 				union kbase_ioctl_mem_query);
+		break;
+	case KBASE_IOCTL_MEM_FREE:
 		KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_MEM_FREE,
 				kbase_api_mem_free,
 				struct kbase_ioctl_mem_free);
-		KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_HWCNT_READER_SETUP,
-				kbase_api_hwcnt_reader_setup,
-				struct kbase_ioctl_hwcnt_reader_setup);
-		KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_HWCNT_ENABLE,
-				kbase_api_hwcnt_enable,
-				struct kbase_ioctl_hwcnt_enable);
-		KBASE_HANDLE_IOCTL(KBASE_IOCTL_HWCNT_DUMP,
-				kbase_api_hwcnt_dump);
-		KBASE_HANDLE_IOCTL(KBASE_IOCTL_HWCNT_CLEAR,
-				kbase_api_hwcnt_clear);
+		break;
+	case KBASE_IOCTL_DISJOINT_QUERY:
 		KBASE_HANDLE_IOCTL_OUT(KBASE_IOCTL_DISJOINT_QUERY,
 				kbase_api_disjoint_query,
 				struct kbase_ioctl_disjoint_query);
+		break;
+	case KBASE_IOCTL_GET_DDK_VERSION:
 		KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_GET_DDK_VERSION,
 				kbase_api_get_ddk_version,
 				struct kbase_ioctl_get_ddk_version);
+		break;
+	case KBASE_IOCTL_MEM_JIT_INIT_OLD:
+		KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_MEM_JIT_INIT_OLD,
+				kbase_api_mem_jit_init_old,
+				struct kbase_ioctl_mem_jit_init_old);
+		break;
+	case KBASE_IOCTL_MEM_JIT_INIT:
 		KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_MEM_JIT_INIT,
 				kbase_api_mem_jit_init,
 				struct kbase_ioctl_mem_jit_init);
+		break;
+	case KBASE_IOCTL_MEM_SYNC:
 		KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_MEM_SYNC,
 				kbase_api_mem_sync,
 				struct kbase_ioctl_mem_sync);
+		break;
+	case KBASE_IOCTL_MEM_FIND_CPU_OFFSET:
 		KBASE_HANDLE_IOCTL_INOUT(KBASE_IOCTL_MEM_FIND_CPU_OFFSET,
 				kbase_api_mem_find_cpu_offset,
 				union kbase_ioctl_mem_find_cpu_offset);
+		break;
+	case KBASE_IOCTL_MEM_FIND_GPU_START_AND_OFFSET:
 		KBASE_HANDLE_IOCTL_INOUT(KBASE_IOCTL_MEM_FIND_GPU_START_AND_OFFSET,
 				kbase_api_mem_find_gpu_start_and_offset,
 				union kbase_ioctl_mem_find_gpu_start_and_offset);
+		break;
+	case KBASE_IOCTL_GET_CONTEXT_ID:
 		KBASE_HANDLE_IOCTL_OUT(KBASE_IOCTL_GET_CONTEXT_ID,
 				kbase_api_get_context_id,
 				struct kbase_ioctl_get_context_id);
+		break;
+	case KBASE_IOCTL_TLSTREAM_ACQUIRE:
 		KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_TLSTREAM_ACQUIRE,
 				kbase_api_tlstream_acquire,
 				struct kbase_ioctl_tlstream_acquire);
+		break;
+	case KBASE_IOCTL_TLSTREAM_FLUSH:
 		KBASE_HANDLE_IOCTL(KBASE_IOCTL_TLSTREAM_FLUSH,
 				kbase_api_tlstream_flush);
+		break;
+	case KBASE_IOCTL_MEM_COMMIT:
 		KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_MEM_COMMIT,
 				kbase_api_mem_commit,
 				struct kbase_ioctl_mem_commit);
+		break;
+	case KBASE_IOCTL_MEM_ALIAS:
 		KBASE_HANDLE_IOCTL_INOUT(KBASE_IOCTL_MEM_ALIAS,
 				kbase_api_mem_alias,
 				union kbase_ioctl_mem_alias);
+		break;
+	case KBASE_IOCTL_MEM_IMPORT:
 		KBASE_HANDLE_IOCTL_INOUT(KBASE_IOCTL_MEM_IMPORT,
 				kbase_api_mem_import,
 				union kbase_ioctl_mem_import);
+		break;
+	case KBASE_IOCTL_MEM_FLAGS_CHANGE:
 		KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_MEM_FLAGS_CHANGE,
 				kbase_api_mem_flags_change,
 				struct kbase_ioctl_mem_flags_change);
+		break;
+	case KBASE_IOCTL_STREAM_CREATE:
 		KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_STREAM_CREATE,
 				kbase_api_stream_create,
 				struct kbase_ioctl_stream_create);
+		break;
+	case KBASE_IOCTL_FENCE_VALIDATE:
 		KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_FENCE_VALIDATE,
 				kbase_api_fence_validate,
 				struct kbase_ioctl_fence_validate);
+		break;
+	case KBASE_IOCTL_GET_PROFILING_CONTROLS:
 		KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_GET_PROFILING_CONTROLS,
 				kbase_api_get_profiling_controls,
 				struct kbase_ioctl_get_profiling_controls);
+		break;
+	case KBASE_IOCTL_MEM_PROFILE_ADD:
 		KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_MEM_PROFILE_ADD,
 				kbase_api_mem_profile_add,
 				struct kbase_ioctl_mem_profile_add);
+		break;
+	case KBASE_IOCTL_SOFT_EVENT_UPDATE:
 		KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_SOFT_EVENT_UPDATE,
 				kbase_api_soft_event_update,
 				struct kbase_ioctl_soft_event_update);
-#ifdef CONFIG_MALI_JOB_DUMP
-		KBASE_HANDLE_IOCTL(KBASE_IOCTL_CINSTR_GWT_START,
-				kbase_gpu_gwt_start);
-		KBASE_HANDLE_IOCTL(KBASE_IOCTL_CINSTR_GWT_STOP,
-				kbase_gpu_gwt_stop);
-		KBASE_HANDLE_IOCTL_INOUT(KBASE_IOCTL_CINSTR_GWT_DUMP,
-				kbase_gpu_gwt_dump,
-				union kbase_ioctl_cinstr_gwt_dump);
-#endif
+		break;
+	case KBASE_IOCTL_STICKY_RESOURCE_MAP:
 		KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_STICKY_RESOURCE_MAP,
 				kbase_api_sticky_resource_map,
 				struct kbase_ioctl_sticky_resource_map);
+		break;
+	case KBASE_IOCTL_STICKY_RESOURCE_UNMAP:
 		KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_STICKY_RESOURCE_UNMAP,
 				kbase_api_sticky_resource_unmap,
 				struct kbase_ioctl_sticky_resource_unmap);
+		break;
 
+	/* Instrumentation. */
+	case KBASE_IOCTL_HWCNT_READER_SETUP:
+		KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_HWCNT_READER_SETUP,
+				kbase_api_hwcnt_reader_setup,
+				struct kbase_ioctl_hwcnt_reader_setup);
+		break;
+	case KBASE_IOCTL_HWCNT_ENABLE:
+		KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_HWCNT_ENABLE,
+				kbase_api_hwcnt_enable,
+				struct kbase_ioctl_hwcnt_enable);
+		break;
+	case KBASE_IOCTL_HWCNT_DUMP:
+		KBASE_HANDLE_IOCTL(KBASE_IOCTL_HWCNT_DUMP,
+				kbase_api_hwcnt_dump);
+		break;
+	case KBASE_IOCTL_HWCNT_CLEAR:
+		KBASE_HANDLE_IOCTL(KBASE_IOCTL_HWCNT_CLEAR,
+				kbase_api_hwcnt_clear);
+		break;
+#ifdef CONFIG_MALI_BIFROST_NO_MALI
+	case KBASE_IOCTL_HWCNT_SET:
+		KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_HWCNT_SET,
+				kbase_api_hwcnt_set,
+				struct kbase_ioctl_hwcnt_values);
+		break;
+#endif
+#ifdef CONFIG_MALI_JOB_DUMP
+	case KBASE_IOCTL_CINSTR_GWT_START:
+		KBASE_HANDLE_IOCTL(KBASE_IOCTL_CINSTR_GWT_START,
+				kbase_gpu_gwt_start);
+		break;
+	case KBASE_IOCTL_CINSTR_GWT_STOP:
+		KBASE_HANDLE_IOCTL(KBASE_IOCTL_CINSTR_GWT_STOP,
+				kbase_gpu_gwt_stop);
+		break;
+	case KBASE_IOCTL_CINSTR_GWT_DUMP:
+		KBASE_HANDLE_IOCTL_INOUT(KBASE_IOCTL_CINSTR_GWT_DUMP,
+				kbase_gpu_gwt_dump,
+				union kbase_ioctl_cinstr_gwt_dump);
+		break;
+#endif
 #if MALI_UNIT_TEST
+	case KBASE_IOCTL_TLSTREAM_TEST:
 		KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_TLSTREAM_TEST,
 				kbase_api_tlstream_test,
 				struct kbase_ioctl_tlstream_test);
+		break;
+	case KBASE_IOCTL_TLSTREAM_STATS:
 		KBASE_HANDLE_IOCTL_OUT(KBASE_IOCTL_TLSTREAM_STATS,
 				kbase_api_tlstream_stats,
 				struct kbase_ioctl_tlstream_stats);
+		break;
 #endif
 	}
 
@@ -2738,6 +2847,88 @@ static ssize_t set_lp_mem_pool_max_size(struct device *dev,
 static DEVICE_ATTR(lp_mem_pool_max_size, S_IRUGO | S_IWUSR, show_lp_mem_pool_max_size,
 		set_lp_mem_pool_max_size);
 
+/**
+ * show_js_ctx_scheduling_mode - Show callback for js_ctx_scheduling_mode sysfs
+ *                               entry.
+ * @dev:  The device this sysfs file is for.
+ * @attr: The attributes of the sysfs file.
+ * @buf:  The output buffer to receive the context scheduling mode information.
+ *
+ * This function is called to get the context scheduling mode being used by JS.
+ *
+ * Return: The number of bytes output to @buf.
+ */
+static ssize_t show_js_ctx_scheduling_mode(struct device *dev,
+		struct device_attribute *attr, char * const buf)
+{
+	struct kbase_device *kbdev;
+
+	kbdev = to_kbase_device(dev);
+	if (!kbdev)
+		return -ENODEV;
+
+	return scnprintf(buf, PAGE_SIZE, "%u\n", kbdev->js_ctx_scheduling_mode);
+}
+
+/**
+ * set_js_ctx_scheduling_mode - Set callback for js_ctx_scheduling_mode sysfs
+ *                              entry.
+ * @dev:   The device this sysfs file is for.
+ * @attr:  The attributes of the sysfs file.
+ * @buf:   The value written to the sysfs file.
+ * @count: The number of bytes written to the sysfs file.
+ *
+ * This function is called when the js_ctx_scheduling_mode sysfs file is written
+ * to. It checks the data written, and if valid updates the ctx scheduling mode
+ * being by JS.
+ *
+ * Return: @count if the function succeeded. An error code on failure.
+ */
+static ssize_t set_js_ctx_scheduling_mode(struct device *dev,
+		struct device_attribute *attr, const char *buf, size_t count)
+{
+	struct kbasep_kctx_list_element *element;
+	u32 new_js_ctx_scheduling_mode;
+	struct kbase_device *kbdev;
+	unsigned long flags;
+	int ret;
+
+	kbdev = to_kbase_device(dev);
+	if (!kbdev)
+		return -ENODEV;
+
+	ret = kstrtouint(buf, 0, &new_js_ctx_scheduling_mode);
+	if (ret || new_js_ctx_scheduling_mode >= KBASE_JS_PRIORITY_MODE_COUNT) {
+		dev_err(kbdev->dev, "Couldn't process js_ctx_scheduling_mode"
+				" write operation.\n"
+				"Use format <js_ctx_scheduling_mode>\n");
+		return -EINVAL;
+	}
+
+	if (new_js_ctx_scheduling_mode == kbdev->js_ctx_scheduling_mode)
+		return count;
+
+	mutex_lock(&kbdev->kctx_list_lock);
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+
+	/* Update the context priority mode */
+	kbdev->js_ctx_scheduling_mode = new_js_ctx_scheduling_mode;
+
+	/* Adjust priority of all the contexts as per the new mode */
+	list_for_each_entry(element, &kbdev->kctx_list, link)
+		kbase_js_update_ctx_priority(element->kctx);
+
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+	mutex_unlock(&kbdev->kctx_list_lock);
+
+	dev_dbg(kbdev->dev, "JS ctx scheduling mode: %u\n", new_js_ctx_scheduling_mode);
+
+	return count;
+}
+
+static DEVICE_ATTR(js_ctx_scheduling_mode, S_IRUGO | S_IWUSR,
+		show_js_ctx_scheduling_mode,
+		set_js_ctx_scheduling_mode);
 #ifdef CONFIG_DEBUG_FS
 
 /* Number of entries in serialize_jobs_settings[] */
@@ -3398,6 +3589,7 @@ static struct attribute *kbase_attrs[] = {
 	&dev_attr_mem_pool_max_size.attr,
 	&dev_attr_lp_mem_pool_size.attr,
 	&dev_attr_lp_mem_pool_max_size.attr,
+	&dev_attr_js_ctx_scheduling_mode.attr,
 	NULL
 };
 
diff --git a/drivers/gpu/arm/bifrost/mali_kbase_ctx_sched.h b/drivers/gpu/arm/bifrost/mali_kbase_ctx_sched.h
index 400ee623055d..ab57a0dc1ca8 100644
--- a/drivers/gpu/arm/bifrost/mali_kbase_ctx_sched.h
+++ b/drivers/gpu/arm/bifrost/mali_kbase_ctx_sched.h
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2017 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2017-2018 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -25,7 +25,8 @@
 
 #include <mali_kbase.h>
 
-/* The Context Scheduler manages address space assignment and reference
+/**
+ * The Context Scheduler manages address space assignment and reference
  * counting to kbase_context. The interface has been designed to minimise
  * interactions between the Job Scheduler and Power Management/MMU to support
  * the existing Job Scheduler interface.
@@ -39,35 +40,30 @@
  * code.
  */
 
-/* base_ctx_sched_init - Initialise the context scheduler
+/**
+ * kbase_ctx_sched_init - Initialise the context scheduler
+ * @kbdev: The device for which the context scheduler needs to be initialised
  *
- * @kbdev: The device for which the context scheduler needs to be
- *         initialised
+ * This must be called during device initialisation. The number of hardware
+ * address spaces must already be established before calling this function.
  *
  * Return: 0 for success, otherwise failure
- *
- * This must be called during device initilisation. The number of hardware
- * address spaces must already be established before calling this function.
  */
 int kbase_ctx_sched_init(struct kbase_device *kbdev);
 
-/* base_ctx_sched_term - Terminate the context scheduler
- *
- * @kbdev: The device for which the context scheduler needs to be
- *         terminated
+/**
+ * kbase_ctx_sched_term - Terminate the context scheduler
+ * @kbdev: The device for which the context scheduler needs to be terminated
  *
  * This must be called during device termination after all contexts have been
  * destroyed.
  */
 void kbase_ctx_sched_term(struct kbase_device *kbdev);
 
-/* kbase_ctx_sched_retain_ctx - Retain a reference to the @ref kbase_context
- *
+/**
+ * kbase_ctx_sched_retain_ctx - Retain a reference to the @ref kbase_context
  * @kctx: The context to which to retain a reference
  *
- * Return: The address space that the context has been assigned to or
- *         KBASEP_AS_NR_INVALID if no address space was available.
- *
  * This function should be called whenever an address space should be assigned
  * to a context and programmed onto the MMU. It should typically be called
  * when jobs are ready to be submitted to the GPU.
@@ -77,11 +73,14 @@ void kbase_ctx_sched_term(struct kbase_device *kbdev);
  *
  * The kbase_device::mmu_hw_mutex and kbase_device::hwaccess_lock locks must be
  * held whilst calling this function.
+ *
+ * Return: The address space that the context has been assigned to or
+ *         KBASEP_AS_NR_INVALID if no address space was available.
  */
 int kbase_ctx_sched_retain_ctx(struct kbase_context *kctx);
 
-/* kbase_ctx_sched_retain_ctx_refcount
- *
+/**
+ * kbase_ctx_sched_retain_ctx_refcount
  * @kctx: The context to which to retain a reference
  *
  * This function only retains a reference to the context. It must be called
@@ -95,8 +94,8 @@ int kbase_ctx_sched_retain_ctx(struct kbase_context *kctx);
  */
 void kbase_ctx_sched_retain_ctx_refcount(struct kbase_context *kctx);
 
-/* kbase_ctx_sched_release_ctx - Release a reference to the @ref kbase_context
- *
+/**
+ * kbase_ctx_sched_release_ctx - Release a reference to the @ref kbase_context
  * @kctx: The context from which to release a reference
  *
  * This function should be called whenever an address space could be unassigned
@@ -108,8 +107,8 @@ void kbase_ctx_sched_retain_ctx_refcount(struct kbase_context *kctx);
  */
 void kbase_ctx_sched_release_ctx(struct kbase_context *kctx);
 
-/* kbase_ctx_sched_remove_ctx - Unassign previously assigned address space
- *
+/**
+ * kbase_ctx_sched_remove_ctx - Unassign previously assigned address space
  * @kctx: The context to be removed
  *
  * This function should be called when a context is being destroyed. The
@@ -121,8 +120,8 @@ void kbase_ctx_sched_release_ctx(struct kbase_context *kctx);
  */
 void kbase_ctx_sched_remove_ctx(struct kbase_context *kctx);
 
-/* kbase_ctx_sched_restore_all_as - Reprogram all address spaces
- *
+/**
+ * kbase_ctx_sched_restore_all_as - Reprogram all address spaces
  * @kbdev: The device for which address spaces to be reprogrammed
  *
  * This function shall reprogram all address spaces previously assigned to
diff --git a/drivers/gpu/arm/bifrost/mali_kbase_debug_mem_view.c b/drivers/gpu/arm/bifrost/mali_kbase_debug_mem_view.c
index d2c57cab1177..857fe9712ef9 100644
--- a/drivers/gpu/arm/bifrost/mali_kbase_debug_mem_view.c
+++ b/drivers/gpu/arm/bifrost/mali_kbase_debug_mem_view.c
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2013-2017 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2013-2018 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -224,7 +224,7 @@ static int debug_mem_open(struct inode *i, struct file *file)
 	}
 
 	ret = debug_mem_zone_open(&kctx->reg_rbtree_exec, mem_data);
-	if (0 != ret) {
+	if (ret != 0) {
 		kbase_gpu_vm_unlock(kctx);
 		goto out;
 	}
diff --git a/drivers/gpu/arm/bifrost/mali_kbase_defs.h b/drivers/gpu/arm/bifrost/mali_kbase_defs.h
index c53d9eb002a4..db1b9abfb2bb 100644
--- a/drivers/gpu/arm/bifrost/mali_kbase_defs.h
+++ b/drivers/gpu/arm/bifrost/mali_kbase_defs.h
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2011-2017 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2011-2018 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -147,11 +147,7 @@
 
 #define MIDGARD_MMU_LEVEL(x) (x)
 
-#if MIDGARD_MMU_VA_BITS > 39
 #define MIDGARD_MMU_TOPLEVEL    MIDGARD_MMU_LEVEL(0)
-#else
-#define MIDGARD_MMU_TOPLEVEL    MIDGARD_MMU_LEVEL(1)
-#endif
 
 #define MIDGARD_MMU_BOTTOMLEVEL MIDGARD_MMU_LEVEL(3)
 
@@ -179,6 +175,9 @@
 #define KBASE_KATOM_FLAG_BEEN_SOFT_STOPPPED (1<<1)
 /** Atom has been previously retried to execute */
 #define KBASE_KATOM_FLAGS_RERUN (1<<2)
+/* Atom submitted with JOB_CHAIN_FLAG bit set in JS_CONFIG_NEXT register, helps to
+ * disambiguate short-running job chains during soft/hard stopping of jobs
+ */
 #define KBASE_KATOM_FLAGS_JOBCHAIN (1<<3)
 /** Atom has been previously hard-stopped. */
 #define KBASE_KATOM_FLAG_BEEN_HARD_STOPPED (1<<4)
@@ -229,8 +228,27 @@ struct kbase_context;
 struct kbase_device;
 struct kbase_as;
 struct kbase_mmu_setup;
+struct kbase_ipa_model_vinstr_data;
 
 #ifdef CONFIG_DEBUG_FS
+/**
+ * struct base_job_fault_event - keeps track of the atom which faulted or which
+ *                               completed after the faulty atom but before the
+ *                               debug data for faulty atom was dumped.
+ *
+ * @event_code:     event code for the atom, should != BASE_JD_EVENT_DONE for the
+ *                  atom which faulted.
+ * @katom:          pointer to the atom for which job fault occurred or which completed
+ *                  after the faulty atom.
+ * @job_fault_work: work item, queued only for the faulty atom, which waits for
+ *                  the dumping to get completed and then does the bottom half
+ *                  of job done for the atoms which followed the faulty atom.
+ * @head:           List head used to store the atom in the global list of faulty
+ *                  atoms or context specific list of atoms which got completed
+ *                  during the dump.
+ * @reg_offset:     offset of the register to be dumped next, only applicable for
+ *                  the faulty atom.
+ */
 struct base_job_fault_event {
 
 	u32 event_code;
@@ -242,6 +260,12 @@ struct base_job_fault_event {
 
 #endif
 
+/**
+ * struct kbase_jd_atom_dependency - Contains the dependency info for an atom.
+ * @atom:          pointer to the dependee atom.
+ * @dep_type:      type of dependency on the dependee @atom, i.e. order or data
+ *                 dependency. BASE_JD_DEP_TYPE_INVALID indicates no dependency.
+ */
 struct kbase_jd_atom_dependency {
 	struct kbase_jd_atom *atom;
 	u8 dep_type;
@@ -281,14 +305,14 @@ struct kbase_io_history {
 };
 
 /**
- * @brief The function retrieves a read-only reference to the atom field from
- * the  kbase_jd_atom_dependency structure
+ * kbase_jd_katom_dep_atom - Retrieves a read-only reference to the
+ *                           dependee atom.
+ * @dep:   pointer to the dependency info structure.
  *
- * @param[in] dep kbase jd atom dependency.
- *
- * @return readonly reference to dependent ATOM.
+ * Return: readonly reference to dependee atom.
  */
-static inline const struct kbase_jd_atom * kbase_jd_katom_dep_atom(const struct kbase_jd_atom_dependency *dep)
+static inline const struct kbase_jd_atom *
+kbase_jd_katom_dep_atom(const struct kbase_jd_atom_dependency *dep)
 {
 	LOCAL_ASSERT(dep != NULL);
 
@@ -296,12 +320,11 @@ static inline const struct kbase_jd_atom * kbase_jd_katom_dep_atom(const struct
 }
 
 /**
- * @brief The function retrieves a read-only reference to the dependency type field from
- * the  kbase_jd_atom_dependency structure
+ * kbase_jd_katom_dep_type -  Retrieves the dependency type info
  *
- * @param[in] dep kbase jd atom dependency.
+ * @dep:   pointer to the dependency info structure.
  *
- * @return A dependency type value.
+ * Return: the type of dependency there is on the dependee atom.
  */
 static inline u8 kbase_jd_katom_dep_type(const struct kbase_jd_atom_dependency *dep)
 {
@@ -311,12 +334,11 @@ static inline u8 kbase_jd_katom_dep_type(const struct kbase_jd_atom_dependency *
 }
 
 /**
- * @brief Setter macro for dep_atom array entry in kbase_jd_atom
- *
- * @param[in] dep    The kbase jd atom dependency.
- * @param[in] a      The ATOM to be set as a dependency.
- * @param     type   The ATOM dependency type to be set.
- *
+ * kbase_jd_katom_dep_set - sets up the dependency info structure
+ *                          as per the values passed.
+ * @const_dep:    pointer to the dependency info structure to be setup.
+ * @a:            pointer to the dependee atom.
+ * @type:         type of dependency there is on the dependee atom.
  */
 static inline void kbase_jd_katom_dep_set(const struct kbase_jd_atom_dependency *const_dep,
 		struct kbase_jd_atom *a, u8 type)
@@ -332,10 +354,9 @@ static inline void kbase_jd_katom_dep_set(const struct kbase_jd_atom_dependency
 }
 
 /**
- * @brief Setter macro for dep_atom array entry in kbase_jd_atom
- *
- * @param[in] dep    The kbase jd atom dependency to be cleared.
+ * kbase_jd_katom_dep_clear - resets the dependency info structure
  *
+ * @const_dep:    pointer to the dependency info structure to be setup.
  */
 static inline void kbase_jd_katom_dep_clear(const struct kbase_jd_atom_dependency *const_dep)
 {
@@ -349,74 +370,212 @@ static inline void kbase_jd_katom_dep_clear(const struct kbase_jd_atom_dependenc
 	dep->dep_type = BASE_JD_DEP_TYPE_INVALID;
 }
 
+/**
+ * enum kbase_atom_gpu_rb_state - The state of an atom, pertinent after it becomes
+ *                                runnable, with respect to job slot ringbuffer/fifo.
+ * @KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB: Atom not currently present in slot fifo, which
+ *                                implies that either atom has not become runnable
+ *                                due to dependency or has completed the execution
+ *                                on GPU.
+ * @KBASE_ATOM_GPU_RB_WAITING_BLOCKED: Atom has been added to slot fifo but is blocked
+ *                                due to cross slot dependency, can't be submitted to GPU.
+ * @KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_PREV: Atom has been added to slot fifo but
+ *                                is waiting for the completion of previously added atoms
+ *                                in current & other slots, as their protected mode
+ *                                requirements do not match with the current atom.
+ * @KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_TRANSITION: Atom is in slot fifo and is
+ *                                waiting for completion of protected mode transition,
+ *                                needed before the atom is submitted to GPU.
+ * @KBASE_ATOM_GPU_RB_WAITING_FOR_CORE_AVAILABLE: Atom is in slot fifo but is waiting
+ *                                for the cores, which are needed to execute the job
+ *                                chain represented by the atom, to become available
+ * @KBASE_ATOM_GPU_RB_WAITING_AFFINITY: Atom is in slot fifo but is blocked on
+ *                                affinity due to rmu workaround for Hw issue 8987.
+ * @KBASE_ATOM_GPU_RB_READY:      Atom is in slot fifo and can be submitted to GPU.
+ * @KBASE_ATOM_GPU_RB_SUBMITTED:  Atom is in slot fifo and has been submitted to GPU.
+ * @KBASE_ATOM_GPU_RB_RETURN_TO_JS: Atom must be returned to JS due to some failure,
+ *                                but only after the previously added atoms in fifo
+ *                                have completed or have also been returned to JS.
+ */
 enum kbase_atom_gpu_rb_state {
-	/* Atom is not currently present in slot ringbuffer */
 	KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB,
-	/* Atom is in slot ringbuffer but is blocked on a previous atom */
 	KBASE_ATOM_GPU_RB_WAITING_BLOCKED,
-	/* Atom is in slot ringbuffer but is waiting for a previous protected
-	 * mode transition to complete */
 	KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_PREV,
-	/* Atom is in slot ringbuffer but is waiting for proected mode
-	 * transition */
 	KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_TRANSITION,
-	/* Atom is in slot ringbuffer but is waiting for cores to become
-	 * available */
 	KBASE_ATOM_GPU_RB_WAITING_FOR_CORE_AVAILABLE,
-	/* Atom is in slot ringbuffer but is blocked on affinity */
 	KBASE_ATOM_GPU_RB_WAITING_AFFINITY,
-	/* Atom is in slot ringbuffer and ready to run */
 	KBASE_ATOM_GPU_RB_READY,
-	/* Atom is in slot ringbuffer and has been submitted to the GPU */
 	KBASE_ATOM_GPU_RB_SUBMITTED,
-	/* Atom must be returned to JS as soon as it reaches the head of the
-	 * ringbuffer due to a previous failure */
 	KBASE_ATOM_GPU_RB_RETURN_TO_JS = -1
 };
 
+/**
+ * enum kbase_atom_enter_protected_state - The state of an atom with respect to the
+ *                      preparation for GPU's entry into protected mode, becomes
+ *                      pertinent only after atom's state with respect to slot
+ *                      ringbuffer is KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_TRANSITION
+ * @KBASE_ATOM_ENTER_PROTECTED_CHECK:  Starting state. Check if there are any atoms
+ *                      currently submitted to GPU and protected mode transition is
+ *                      not already in progress.
+ * @KBASE_ATOM_ENTER_PROTECTED_VINSTR: Wait for vinstr to suspend before entry into
+ *                      protected mode.
+ * @KBASE_ATOM_ENTER_PROTECTED_IDLE_L2: Wait for the L2 to become idle in preparation
+ *                      for the coherency change. L2 shall be powered down and GPU shall
+ *                      come out of fully coherent mode before entering protected mode.
+ * @KBASE_ATOM_ENTER_PROTECTED_FINISHED: End state; Prepare coherency change and switch
+ *                      GPU to protected mode.
+ */
 enum kbase_atom_enter_protected_state {
-	/*
-	 * Starting state:
-	 * Check if a transition into protected mode is required.
-	 *
-	 * NOTE: The integer value of this must
-	 *       match KBASE_ATOM_EXIT_PROTECTED_CHECK.
+	/**
+	 * NOTE: The integer value of this must match KBASE_ATOM_EXIT_PROTECTED_CHECK.
 	 */
 	KBASE_ATOM_ENTER_PROTECTED_CHECK = 0,
-	/* Wait for vinstr to suspend. */
 	KBASE_ATOM_ENTER_PROTECTED_VINSTR,
-	/* Wait for the L2 to become idle in preparation for
-	 * the coherency change. */
 	KBASE_ATOM_ENTER_PROTECTED_IDLE_L2,
-	/* End state;
-	 * Prepare coherency change. */
 	KBASE_ATOM_ENTER_PROTECTED_FINISHED,
 };
 
+/**
+ * enum kbase_atom_exit_protected_state - The state of an atom with respect to the
+ *                      preparation for GPU's exit from protected mode, becomes
+ *                      pertinent only after atom's state with respect to slot
+ *                      ringbuffer is KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_TRANSITION
+ * @KBASE_ATOM_EXIT_PROTECTED_CHECK: Starting state. Check if there are any atoms
+ *                      currently submitted to GPU and protected mode transition is
+ *                      not already in progress.
+ * @KBASE_ATOM_EXIT_PROTECTED_IDLE_L2: Wait for the L2 to become idle in preparation
+ *                      for the reset, as exiting protected mode requires a reset.
+ * @KBASE_ATOM_EXIT_PROTECTED_RESET: Issue the reset to trigger exit from protected mode
+ * @KBASE_ATOM_EXIT_PROTECTED_RESET_WAIT: End state, Wait for the reset to complete
+ */
 enum kbase_atom_exit_protected_state {
-	/*
-	 * Starting state:
-	 * Check if a transition out of protected mode is required.
-	 *
-	 * NOTE: The integer value of this must
-	 *       match KBASE_ATOM_ENTER_PROTECTED_CHECK.
+	/**
+	 * NOTE: The integer value of this must match KBASE_ATOM_ENTER_PROTECTED_CHECK.
 	 */
 	KBASE_ATOM_EXIT_PROTECTED_CHECK = 0,
-	/* Wait for the L2 to become idle in preparation
-	 * for the reset. */
 	KBASE_ATOM_EXIT_PROTECTED_IDLE_L2,
-	/* Issue the protected reset. */
 	KBASE_ATOM_EXIT_PROTECTED_RESET,
-	/* End state;
-	 * Wait for the reset to complete. */
 	KBASE_ATOM_EXIT_PROTECTED_RESET_WAIT,
 };
 
+/**
+ * struct kbase_ext_res - Contains the info for external resources referred
+ *                        by an atom, which have been mapped on GPU side.
+ * @gpu_address:          Start address of the memory region allocated for
+ *                        the resource from GPU virtual address space.
+ * @alloc:                pointer to physical pages tracking object, set on
+ *                        mapping the external resource on GPU side.
+ */
 struct kbase_ext_res {
 	u64 gpu_address;
 	struct kbase_mem_phy_alloc *alloc;
 };
 
+/**
+ * struct kbase_jd_atom  - object representing the atom, containing the complete
+ *                         state and attributes of an atom.
+ * @work:                  work item for the bottom half processing of the atom,
+ *                         by JD or JS, after it got executed on GPU or the input
+ *                         fence got signaled
+ * @start_timestamp:       time at which the atom was submitted to the GPU, by
+ *                         updating the JS_HEAD_NEXTn register.
+ * @udata:                 copy of the user data sent for the atom in base_jd_submit.
+ * @kctx:                  Pointer to the base context with which the atom is associated.
+ * @dep_head:              Array of 2 list heads, pointing to the two list of atoms
+ *                         which are blocked due to dependency on this atom.
+ * @dep_item:              Array of 2 list heads, used to store the atom in the list of
+ *                         other atoms depending on the same dependee atom.
+ * @dep:                   Array containing the dependency info for the 2 atoms on which
+ *                         the atom depends upon.
+ * @jd_item:               List head used during job dispatch job_done processing - as
+ *                         dependencies may not be entirely resolved at this point,
+ *                         we need to use a separate list head.
+ * @in_jd_list:            flag set to true if atom's @jd_item is currently on a list,
+ *                         prevents atom being processed twice.
+ * @nr_extres:             number of external resources referenced by the atom.
+ * @extres:                pointer to the location containing info about @nr_extres
+ *                         external resources referenced by the atom.
+ * @device_nr:             indicates the coregroup with which the atom is associated,
+ *                         when BASE_JD_REQ_SPECIFIC_COHERENT_GROUP specified.
+ * @affinity:              bitmask of the shader cores on which the atom can execute.
+ * @jc:                    GPU address of the job-chain.
+ * @softjob_data:          Copy of data read from the user space buffer that @jc
+ *                         points to.
+ * @coreref_state:         state of the atom with respect to retention of shader
+ *                         cores for affinity & power management.
+ * @fence:                 Stores either an input or output sync fence, depending
+ *                         on soft-job type
+ * @sync_waiter:           Pointer to the sync fence waiter structure passed to the
+ *                         callback function on signaling of the input fence.
+ * @dma_fence:             object containing pointers to both input & output fences
+ *                         and other related members used for explicit sync through
+ *                         soft jobs and for the implicit synchronization required
+ *                         on access to external resources.
+ * @event_code:            Event code for the job chain represented by the atom, both
+ *                         HW and low-level SW events are represented by event codes.
+ * @core_req:              bitmask of BASE_JD_REQ_* flags specifying either Hw or Sw
+ *                         requirements for the job chain represented by the atom.
+ * @ticks:                 Number of scheduling ticks for which atom has been running
+ *                         on the GPU.
+ * @sched_priority:        Priority of the atom for Job scheduling, as per the
+ *                         KBASE_JS_ATOM_SCHED_PRIO_*.
+ * @poking:                Indicates whether poking of MMU is ongoing for the atom,
+ *                         as a WA for the issue HW_ISSUE_8316.
+ * @completed:             Wait queue to wait upon for the completion of atom.
+ * @status:                Indicates at high level at what stage the atom is in,
+ *                         as per KBASE_JD_ATOM_STATE_*, that whether it is not in
+ *                         use or its queued in JD or given to JS or submitted to Hw
+ *                         or it completed the execution on Hw.
+ * @work_id:               used for GPU tracepoints, its a snapshot of the 'work_id'
+ *                         counter in kbase_jd_context which is incremented on
+ *                         every call to base_jd_submit.
+ * @slot_nr:               Job slot chosen for the atom.
+ * @atom_flags:            bitmask of KBASE_KATOM_FLAG* flags capturing the exact
+ *                         low level state of the atom.
+ * @retry_count:           Number of times this atom has been retried. Used by replay
+ *                         soft job.
+ * @gpu_rb_state:          bitmnask of KBASE_ATOM_GPU_RB_* flags, precisely tracking
+ *                         atom's state after it has entered Job scheduler on becoming
+ *                         runnable. Atom could be blocked due to cross slot dependency
+ *                         or waiting for the shader cores to become available or
+ *                         waiting for protected mode transitions to complete.
+ * @need_cache_flush_cores_retained: flag indicating that manual flush of GPU
+ *                         cache is needed for the atom and the shader cores used
+ *                         for atom have been kept on.
+ * @blocked:               flag indicating that atom's resubmission to GPU is
+ *                         blocked till the work item is scheduled to return the
+ *                         atom to JS.
+ * @pre_dep:               Pointer to atom that this atom has same-slot dependency on
+ * @post_dep:              Pointer to atom that has same-slot dependency on this atom
+ * @x_pre_dep:             Pointer to atom that this atom has cross-slot dependency on
+ * @x_post_dep:            Pointer to atom that has cross-slot dependency on this atom
+ * @flush_id:              The GPU's flush count recorded at the time of submission,
+ *                         used for the cache flush optimisation
+ * @fault_event:           Info for dumping the debug data on Job fault.
+ * @queue:                 List head used for 4 different purposes :
+ *                         Adds atom to the list of dma-buf fence waiting atoms.
+ *                         Adds atom to the list of atoms blocked due to cross
+ *                         slot dependency.
+ *                         Adds atom to the list of softjob atoms for which JIT
+ *                         allocation has been deferred
+ *                         Adds atom to the list of softjob atoms waiting for the
+ *                         signaling of fence.
+ * @jit_node:              Used to keep track of all JIT free/alloc jobs in submission order
+ * @jit_blocked:           Flag indicating that JIT allocation requested through
+ *                         softjob atom will be reattempted after the impending
+ *                         free of other active JIT allocations.
+ * @will_fail_event_code:  If non-zero, this indicates that the atom will fail
+ *                         with the set event_code when the atom is processed.
+ *                         Used for special handling of atoms, which have a data
+ *                         dependency on the failed atoms.
+ * @protected_state:       State of the atom, as per KBASE_ATOM_(ENTER|EXIT)_PROTECTED_*,
+ *                         when transitioning into or out of protected mode. Atom will
+ *                         be either entering or exiting the protected mode.
+ * @runnable_tree_node:    The node added to context's job slot specific rb tree
+ *                         when the atom becomes runnable.
+ * @age:                   Age of atom relative to other atoms in the context, is
+ *                         snapshot of the age_count counter in kbase context.
+ */
 struct kbase_jd_atom {
 	struct work_struct work;
 	ktime_t start_timestamp;
@@ -427,12 +586,7 @@ struct kbase_jd_atom {
 	struct list_head dep_head[2];
 	struct list_head dep_item[2];
 	const struct kbase_jd_atom_dependency dep[2];
-	/* List head used during job dispatch job_done processing - as
-	 * dependencies may not be entirely resolved at this point, we need to
-	 * use a separate list head. */
 	struct list_head jd_item;
-	/* true if atom's jd_item is currently on a list. Prevents atom being
-	 * processed twice. */
 	bool in_jd_list;
 
 	u16 nr_extres;
@@ -441,11 +595,9 @@ struct kbase_jd_atom {
 	u32 device_nr;
 	u64 affinity;
 	u64 jc;
-	/* Copy of data read from the user space buffer that jc points to */
 	void *softjob_data;
 	enum kbase_atom_coreref_state coreref_state;
 #if defined(CONFIG_SYNC)
-	/* Stores either an input or output fence, depending on soft-job type */
 	struct sync_fence *fence;
 	struct sync_fence_waiter sync_waiter;
 #endif				/* CONFIG_SYNC */
@@ -519,26 +671,22 @@ struct kbase_jd_atom {
 
 	/* Note: refer to kbasep_js_atom_retained_state, which will take a copy of some of the following members */
 	enum base_jd_event_code event_code;
-	base_jd_core_req core_req;	    /**< core requirements */
+	base_jd_core_req core_req;
 
 	u32 ticks;
-	/* JS atom priority with respect to other atoms on its kctx. */
 	int sched_priority;
 
-	int poking;		/* BASE_HW_ISSUE_8316 */
+	int poking;
 
 	wait_queue_head_t completed;
 	enum kbase_jd_atom_state status;
 #ifdef CONFIG_GPU_TRACEPOINTS
 	int work_id;
 #endif
-	/* Assigned after atom is completed. Used to check whether PRLAM-10676 workaround should be applied */
 	int slot_nr;
 
 	u32 atom_flags;
 
-	/* Number of times this atom has been retried. Used by replay soft job.
-	 */
 	int retry_count;
 
 	enum kbase_atom_gpu_rb_state gpu_rb_state;
@@ -547,45 +695,25 @@ struct kbase_jd_atom {
 
 	atomic_t blocked;
 
-	/* Pointer to atom that this atom has same-slot dependency on */
 	struct kbase_jd_atom *pre_dep;
-	/* Pointer to atom that has same-slot dependency on this atom */
 	struct kbase_jd_atom *post_dep;
 
-	/* Pointer to atom that this atom has cross-slot dependency on */
 	struct kbase_jd_atom *x_pre_dep;
-	/* Pointer to atom that has cross-slot dependency on this atom */
 	struct kbase_jd_atom *x_post_dep;
 
-	/* The GPU's flush count recorded at the time of submission, used for
-	 * the cache flush optimisation */
 	u32 flush_id;
 
-	struct kbase_jd_atom_backend backend;
 #ifdef CONFIG_DEBUG_FS
 	struct base_job_fault_event fault_event;
 #endif
 
-	/* List head used for three different purposes:
-	 *  1. Overflow list for JS ring buffers. If an atom is ready to run,
-	 *     but there is no room in the JS ring buffer, then the atom is put
-	 *     on the ring buffer's overflow list using this list node.
-	 *  2. List of waiting soft jobs.
-	 */
 	struct list_head queue;
 
-	/* Used to keep track of all JIT free/alloc jobs in submission order
-	 */
 	struct list_head jit_node;
 	bool jit_blocked;
 
-	/* If non-zero, this indicates that the atom will fail with the set
-	 * event_code when the atom is processed. */
 	enum base_jd_event_code will_fail_event_code;
 
-	/* Atoms will only ever be transitioning into, or out of
-	 * protected mode so we do not need two separate fields.
-	 */
 	union {
 		enum kbase_atom_enter_protected_state enter;
 		enum kbase_atom_exit_protected_state exit;
@@ -593,7 +721,6 @@ struct kbase_jd_atom {
 
 	struct rb_node runnable_tree_node;
 
-	/* 'Age' of atom relative to other atoms in the context. */
 	u32 age;
 };
 
@@ -612,38 +739,57 @@ static inline bool kbase_jd_katom_is_protected(const struct kbase_jd_atom *katom
 
 #define KBASE_JD_DEP_QUEUE_SIZE 256
 
+/**
+ * struct kbase_jd_context  - per context object encapsulating all the Job dispatcher
+ *                            related state.
+ * @lock:                     lock to serialize the updates made to the Job dispatcher
+ *                            state and kbase_jd_atom objects.
+ * @sched_info:               Structure encapsulating all the Job scheduling info.
+ * @atoms:                    Array of the objects representing atoms, containing
+ *                            the complete state and attributes of an atom.
+ * @job_nr:                   Tracks the number of atoms being processed by the
+ *                            kbase. This includes atoms that are not tracked by
+ *                            scheduler: 'not ready to run' & 'dependency-only' jobs.
+ * @zero_jobs_wait:           Waitq that reflects whether there are no jobs
+ *                            (including SW-only dependency jobs). This is set
+ *                            when no jobs are present on the ctx, and clear when
+ *                            there are jobs.
+ *                            This must be updated atomically with @job_nr.
+ *                            note: Job Dispatcher knows about more jobs than the
+ *                            Job Scheduler as it is unaware of jobs that are
+ *                            blocked on dependencies and SW-only dependency jobs.
+ *                            This waitq can be waited upon to find out when the
+ *                            context jobs are all done/cancelled (including those
+ *                            that might've been blocked on dependencies) - and so,
+ *                            whether it can be terminated. However, it should only
+ *                            be terminated once it is not present in the run-pool.
+ *                            Since the waitq is only set under @lock, the waiter
+ *                            should also briefly obtain and drop @lock to guarantee
+ *                            that the setter has completed its work on the kbase_context
+ * @job_done_wq:              Workqueue to which the per atom work item is queued
+ *                            for bottom half processing when the atom completes
+ *                            execution on GPU or the input fence get signaled.
+ * @tb_lock:                  Lock to serialize the write access made to @tb to
+ *                            to store the register access trace messages.
+ * @tb:                       Pointer to the Userspace accessible buffer storing
+ *                            the trace messages for register read/write accesses
+ *                            made by the Kbase. The buffer is filled in circular
+ *                            fashion.
+ * @tb_wrap_offset:           Offset to the end location in the trace buffer, the
+ *                            write pointer is moved to the beginning on reaching
+ *                            this offset.
+ * @work_id:                  atomic variable used for GPU tracepoints, incremented
+ *                            on every call to base_jd_submit.
+ */
 struct kbase_jd_context {
 	struct mutex lock;
 	struct kbasep_js_kctx_info sched_info;
 	struct kbase_jd_atom atoms[BASE_JD_ATOM_COUNT];
 
-	/** Tracks all job-dispatch jobs.  This includes those not tracked by
-	 * the scheduler: 'not ready to run' and 'dependency-only' jobs. */
 	u32 job_nr;
 
-	/** Waitq that reflects whether there are no jobs (including SW-only
-	 * dependency jobs). This is set when no jobs are present on the ctx,
-	 * and clear when there are jobs.
-	 *
-	 * @note: Job Dispatcher knows about more jobs than the Job Scheduler:
-	 * the Job Scheduler is unaware of jobs that are blocked on dependencies,
-	 * and SW-only dependency jobs.
-	 *
-	 * This waitq can be waited upon to find out when the context jobs are all
-	 * done/cancelled (including those that might've been blocked on
-	 * dependencies) - and so, whether it can be terminated. However, it should
-	 * only be terminated once it is not present in the run-pool (see
-	 * kbasep_js_kctx_info::ctx::is_scheduled).
-	 *
-	 * Since the waitq is only set under kbase_jd_context::lock,
-	 * the waiter should also briefly obtain and drop kbase_jd_context::lock to
-	 * guarentee that the setter has completed its work on the kbase_context
-	 *
-	 * This must be updated atomically with:
-	 * - kbase_jd_context::job_nr */
 	wait_queue_head_t zero_jobs_wait;
 
-	/** Job Done workqueue. */
 	struct workqueue_struct *job_done_wq;
 
 	spinlock_t tb_lock;
@@ -675,15 +821,34 @@ struct kbase_mmu_setup {
 };
 
 /**
- * Important: Our code makes assumptions that a struct kbase_as structure is always at
- * kbase_device->as[number]. This is used to recover the containing
- * struct kbase_device from a struct kbase_as structure.
- *
- * Therefore, struct kbase_as structures must not be allocated anywhere else.
+ * struct kbase_as   - object representing an address space of GPU.
+ * @number:            Index at which this address space structure is present
+ *                     in an array of address space structures embedded inside the
+ *                     struct kbase_device.
+ * @pf_wq:             Workqueue for processing work items related to Bus fault
+ *                     and Page fault handling.
+ * @work_pagefault:    Work item for the Page fault handling.
+ * @work_busfault:     Work item for the Bus fault handling.
+ * @fault_type:        Type of fault which occured for this address space,
+ *                     regular/unexpected Bus or Page fault.
+ * @protected_mode:    Flag indicating whether the fault occurred in protected
+ *                     mode or not.
+ * @fault_status:      Records the fault status as reported by Hw.
+ * @fault_addr:        Records the faulting address.
+ * @fault_extra_addr:  Records the secondary fault address.
+ * @current_setup:     Stores the MMU configuration for this address space.
+ * @poke_wq:           Workqueue to process the work items queue for poking the
+ *                     MMU as a WA for BASE_HW_ISSUE_8316.
+ * @poke_work:         Work item to do the poking of MMU for this address space.
+ * @poke_refcount:     Refcount for the need of poking MMU. While the refcount is
+ *                     non zero the poking of MMU will continue.
+ *                     Protected by hwaccess_lock.
+ * @poke_state:        State indicating whether poking is in progress or it has
+ *                     been stopped. Protected by hwaccess_lock.
+ * @poke_timer:        Timer used to schedule the poking at regular intervals.
  */
 struct kbase_as {
 	int number;
-
 	struct workqueue_struct *pf_wq;
 	struct work_struct work_pagefault;
 	struct work_struct work_busfault;
@@ -692,15 +857,10 @@ struct kbase_as {
 	u32 fault_status;
 	u64 fault_addr;
 	u64 fault_extra_addr;
-
 	struct kbase_mmu_setup current_setup;
-
-	/* BASE_HW_ISSUE_8316  */
 	struct workqueue_struct *poke_wq;
 	struct work_struct poke_work;
-	/** Protected by hwaccess_lock */
 	int poke_refcount;
-	/** Protected by hwaccess_lock */
 	kbase_as_poke_state poke_state;
 	struct hrtimer poke_timer;
 };
@@ -738,6 +898,37 @@ enum kbase_trace_code {
 #define KBASE_TRACE_FLAG_REFCOUNT (((u8)1) << 0)
 #define KBASE_TRACE_FLAG_JOBSLOT  (((u8)1) << 1)
 
+/**
+ * struct kbase_trace - object representing a trace message added to trace buffer
+ *                      kbase_device::trace_rbuf
+ * @timestamp:          CPU timestamp at which the trace message was added.
+ * @thread_id:          id of the thread in the context of which trace message
+ *                      was added.
+ * @cpu:                indicates which CPU the @thread_id was scheduled on when
+ *                      the trace message was added.
+ * @ctx:                Pointer to the kbase context for which the trace message
+ *                      was added. Will be NULL for certain trace messages like
+ *                      for traces added corresponding to power management events.
+ *                      Will point to the appropriate context corresponding to
+ *                      job-slot & context's reference count related events.
+ * @katom:              indicates if the trace message has atom related info.
+ * @atom_number:        id of the atom for which trace message was added.
+ *                      Only valid if @katom is true.
+ * @atom_udata:         Copy of the user data sent for the atom in base_jd_submit.
+ *                      Only valid if @katom is true.
+ * @gpu_addr:           GPU address of the job-chain represented by atom. Could
+ *                      be valid even if @katom is false.
+ * @info_val:           value specific to the type of event being traced. For the
+ *                      case where @katom is true, will be set to atom's affinity,
+ *                      i.e. bitmask of shader cores chosen for atom's execution.
+ * @code:               Identifies the event, refer enum kbase_trace_code.
+ * @jobslot:            job-slot for which trace message was added, valid only for
+ *                      job-slot management events.
+ * @refcount:           reference count for the context, valid for certain events
+ *                      related to scheduler core and policy.
+ * @flags:              indicates if info related to @jobslot & @refcount is present
+ *                      in the trace message, used during dumping of the message.
+ */
 struct kbase_trace {
 	struct timespec timestamp;
 	u32 thread_id;
@@ -904,19 +1095,23 @@ struct kbase_pm_device_data {
 
 /**
  * struct kbase_mem_pool - Page based memory pool for kctx/kbdev
- * @kbdev:     Kbase device where memory is used
- * @cur_size:  Number of free pages currently in the pool (may exceed @max_size
- *             in some corner cases)
- * @max_size:  Maximum number of free pages in the pool
- * @order:     order = 0 refers to a pool of 4 KB pages
- *             order = 9 refers to a pool of 2 MB pages (2^9 * 4KB = 2 MB)
- * @pool_lock: Lock protecting the pool - must be held when modifying @cur_size
- *             and @page_list
- * @page_list: List of free pages in the pool
- * @reclaim:   Shrinker for kernel reclaim of free pages
- * @next_pool: Pointer to next pool where pages can be allocated when this pool
- *             is empty. Pages will spill over to the next pool when this pool
- *             is full. Can be NULL if there is no next pool.
+ * @kbdev:        Kbase device where memory is used
+ * @cur_size:     Number of free pages currently in the pool (may exceed
+ *                @max_size in some corner cases)
+ * @max_size:     Maximum number of free pages in the pool
+ * @order:        order = 0 refers to a pool of 4 KB pages
+ *                order = 9 refers to a pool of 2 MB pages (2^9 * 4KB = 2 MB)
+ * @pool_lock:    Lock protecting the pool - must be held when modifying
+ *                @cur_size and @page_list
+ * @page_list:    List of free pages in the pool
+ * @reclaim:      Shrinker for kernel reclaim of free pages
+ * @next_pool:    Pointer to next pool where pages can be allocated when this
+ *                pool is empty. Pages will spill over to the next pool when
+ *                this pool is full. Can be NULL if there is no next pool.
+ * @dying:        true if the pool is being terminated, and any ongoing
+ *                operations should be abandoned
+ * @dont_reclaim: true if the shrinker is forbidden from reclaiming memory from
+ *                this pool, eg during a grow operation
  */
 struct kbase_mem_pool {
 	struct kbase_device *kbdev;
@@ -928,6 +1123,9 @@ struct kbase_mem_pool {
 	struct shrinker     reclaim;
 
 	struct kbase_mem_pool *next_pool;
+
+	bool dying;
+	bool dont_reclaim;
 };
 
 /**
@@ -963,9 +1161,261 @@ struct kbase_mmu_mode const *kbase_mmu_mode_get_aarch64(void);
 
 #define DEVNAME_SIZE	16
 
+/**
+ * struct kbase_device   - Object representing an instance of GPU platform device,
+ *                         allocated from the probe method of mali driver.
+ * @hw_quirks_sc:          Configuration to be used for the shader cores as per
+ *                         the HW issues present in the GPU.
+ * @hw_quirks_tiler:       Configuration to be used for the Tiler as per the HW
+ *                         issues present in the GPU.
+ * @hw_quirks_mmu:         Configuration to be used for the MMU as per the HW
+ *                         issues present in the GPU.
+ * @hw_quirks_jm:          Configuration to be used for the Job Manager as per
+ *                         the HW issues present in the GPU.
+ * @entry:                 Links the device instance to the global list of GPU
+ *                         devices. The list would have as many entries as there
+ *                         are GPU device instances.
+ * @dev:                   Pointer to the kernel's generic/base representation
+ *                         of the GPU platform device.
+ * @mdev:                  Pointer to the miscellaneous device registered to
+ *                         provide Userspace access to kernel driver through the
+ *                         device file /dev/malixx.
+ * @reg_start:             Base address of the region in physical address space
+ *                         where GPU registers have been mapped.
+ * @reg_size:              Size of the region containing GPU registers
+ * @reg:                   Kernel virtual address of the region containing GPU
+ *                         registers, using which Driver will access the registers.
+ * @irqs:                  Array containing IRQ resource info for 3 types of
+ *                         interrupts : Job scheduling, MMU & GPU events (like
+ *                         power management, cache etc.)
+ * @clock:                 Pointer to the input clock resource (having an id of 0),
+ *                         referenced by the GPU device node.
+ * @regulator:             Pointer to the struct corresponding to the regulator
+ *                         for GPU device
+ * @devname:               string containing the name used for GPU device instance,
+ *                         miscellaneous device is registered using the same name.
+ * @model:                 Pointer, valid only when Driver is compiled to not access
+ *                         the real GPU Hw, to the dummy model which tries to mimic
+ *                         to some extent the state & behavior of GPU Hw in response
+ *                         to the register accesses made by the Driver.
+ * @irq_slab:              slab cache for allocating the work items queued when
+ *                         model mimics raising of IRQ to cause an interrupt on CPU.
+ * @irq_workq:             workqueue for processing the irq work items.
+ * @serving_job_irq:       function to execute work items queued when model mimics
+ *                         the raising of JS irq, mimics the interrupt handler
+ *                         processing JS interrupts.
+ * @serving_gpu_irq:       function to execute work items queued when model mimics
+ *                         the raising of GPU irq, mimics the interrupt handler
+ *                         processing GPU interrupts.
+ * @serving_mmu_irq:       function to execute work items queued when model mimics
+ *                         the raising of MMU irq, mimics the interrupt handler
+ *                         processing MMU interrupts.
+ * @reg_op_lock:           lock used by model to serialize the handling of register
+ *                         accesses made by the driver.
+ * @pm:                    Per device object for storing data for power management
+ *                         framework.
+ * @js_data:               Per device object encapsulating the current context of
+ *                         Job Scheduler, which is global to the device and is not
+ *                         tied to any particular struct kbase_context running on
+ *                         the device
+ * @mem_pool:              Object containing the state for global pool of 4KB size
+ *                         physical pages which can be used by all the contexts.
+ * @lp_mem_pool:           Object containing the state for global pool of 2MB size
+ *                         physical pages which can be used by all the contexts.
+ * @memdev:                keeps track of the in use physical pages allocated by
+ *                         the Driver.
+ * @mmu_mode:              Pointer to the object containing methods for programming
+ *                         the MMU, depending on the type of MMU supported by Hw.
+ * @as:                    Array of objects representing address spaces of GPU.
+ * @as_free:               Bitpattern of free/available address space lots
+ * @as_to_kctx:            Array of pointers to struct kbase_context, having
+ *                         GPU adrress spaces assigned to them.
+ * @mmu_mask_change:       Lock to serialize the access to MMU interrupt mask
+ *                         register used in the handling of Bus & Page faults.
+ * @gpu_props:             Object containing complete information about the
+ *                         configuration/properties of GPU HW device in use.
+ * @hw_issues_mask:        List of SW workarounds for HW issues
+ * @hw_features_mask:      List of available HW features.
+ * shader_inuse_bitmap:    Bitmaps of shader cores that are currently in use.
+ *                         These should be kept up to date by the job scheduler.
+ *                         The bit to be set in this bitmap should already be set
+ *                         in the @shader_needed_bitmap.
+ *                         @pm.power_change_lock should be held when accessing
+ *                         these members.
+ * @shader_inuse_cnt:      Usage count for each of the 64 shader cores
+ * @shader_needed_bitmap:  Bitmaps of cores the JS needs for jobs ready to run
+ *                         kbase_pm_check_transitions_nolock() should be called
+ *                         when the bitmap is modified to update the power
+ *                         management system and allow transitions to occur.
+ * @shader_needed_cnt:     Count for each of the 64 shader cores, incremented
+ *                         when the core is requested for use and decremented
+ *                         later when the core is known to be powered up for use.
+ * @tiler_inuse_cnt:       Usage count for the Tiler block. @tiler_needed_cnt
+ *                         should be non zero at the time of incrementing the
+ *                         usage count.
+ * @tiler_needed_cnt:      Count for the Tiler block shader cores, incremented
+ *                         when Tiler is requested for use and decremented
+ *                         later when Tiler is known to be powered up for use.
+ * @disjoint_event:        struct for keeping track of the disjoint information,
+ *                         that whether the GPU is in a disjoint state and the
+ *                         number of disjoint events that have occurred on GPU.
+ * @l2_users_count:        Refcount for tracking users of the l2 cache, e.g.
+ *                         when using hardware counter instrumentation.
+ * @shader_available_bitmap: Bitmap of shader cores that are currently available,
+ *                         powered up and the power policy is happy for jobs
+ *                         to be submitted to these cores. These are updated
+ *                         by the power management code. The job scheduler
+ *                         should avoid submitting new jobs to any cores
+ *                         that are not marked as available.
+ * @tiler_available_bitmap: Bitmap of tiler units that are currently available.
+ * @l2_available_bitmap:    Bitmap of the currently available Level 2 caches.
+ * @stack_available_bitmap: Bitmap of the currently available Core stacks.
+ * @shader_ready_bitmap:    Bitmap of shader cores that are ready (powered on)
+ * @shader_transitioning_bitmap: Bitmap of shader cores that are currently changing
+ *                         power state.
+ * @nr_hw_address_spaces:  Number of address spaces actually available in the
+ *                         GPU, remains constant after driver initialisation.
+ * @nr_user_address_spaces: Number of address spaces available to user contexts
+ * @hwcnt:                  Structure used for instrumentation and HW counters
+ *                         dumping
+ * @vinstr_ctx:            vinstr context created per device
+ * @trace_lock:            Lock to serialize the access to trace buffer.
+ * @trace_first_out:       Index/offset in the trace buffer at which the first
+ *                         unread message is present.
+ * @trace_next_in:         Index/offset in the trace buffer at which the new
+ *                         message will be written.
+ * @trace_rbuf:            Pointer to the buffer storing debug messages/prints
+ *                         tracing the various events in Driver.
+ *                         The buffer is filled in circular fashion.
+ * @reset_timeout_ms:      Number of milliseconds to wait for the soft stop to
+ *                         complete for the GPU jobs before proceeding with the
+ *                         GPU reset.
+ * @cacheclean_lock:       Lock to serialize the clean & invalidation of GPU caches,
+ *                         between Job Manager backend & Instrumentation code.
+ * @platform_context:      Platform specific private data to be accessed by
+ *                         platform specific config files only.
+ * @kctx_list:             List of kbase_contexts created for the device, including
+ *                         the kbase_context created for vinstr_ctx.
+ * @kctx_list_lock:        Lock protecting concurrent accesses to @kctx_list.
+ * @devfreq_profile:       Describes devfreq profile for the Mali GPU device, passed
+ *                         to devfreq_add_device() to add devfreq feature to Mali
+ *                         GPU device.
+ * @devfreq:               Pointer to devfreq structure for Mali GPU device,
+ *                         returned on the call to devfreq_add_device().
+ * @current_freq:          The real frequency, corresponding to @current_nominal_freq,
+ *                         at which the Mali GPU device is currently operating, as
+ *                         retrieved from @opp_table in the target callback of
+ *                         @devfreq_profile.
+ * @current_nominal_freq:  The nominal frequency currently used for the Mali GPU
+ *                         device as retrieved through devfreq_recommended_opp()
+ *                         using the freq value passed as an argument to target
+ *                         callback of @devfreq_profile
+ * @current_voltage:       The voltage corresponding to @current_nominal_freq, as
+ *                         retrieved through dev_pm_opp_get_voltage().
+ * @current_core_mask:     bitmask of shader cores that are currently desired &
+ *                         enabled, corresponding to @current_nominal_freq as
+ *                         retrieved from @opp_table in the target callback of
+ *                         @devfreq_profile.
+ * @opp_table:             Pointer to the lookup table for converting between nominal
+ *                         OPP (operating performance point) frequency, and real
+ *                         frequency and core mask. This table is constructed according
+ *                         to operating-points-v2-mali table in devicetree.
+ * @num_opps:              Number of operating performance points available for the Mali
+ *                         GPU device.
+ * @devfreq_cooling:       Pointer returned on registering devfreq cooling device
+ *                         corresponding to @devfreq.
+ * @ipa_use_configured_model: set to TRUE when configured model is used for IPA and
+ *                         FALSE when fallback model is used.
+ * @ipa:                   Top level structure for IPA, containing pointers to both
+ *                         configured & fallback models.
+ * @timeline:              Stores the global timeline tracking information.
+ * @job_fault_debug:       Flag to control the dumping of debug data for job faults,
+ *                         set when the 'job_fault' debugfs file is opened.
+ * @mali_debugfs_directory: Root directory for the debugfs files created by the driver
+ * @debugfs_ctx_directory: Directory inside the @mali_debugfs_directory containing
+ *                         a sub-directory for every context.
+ * @debugfs_as_read_bitmap: bitmap of address spaces for which the bus or page fault
+ *                         has occurred.
+ * @job_fault_wq:          Waitqueue to block the job fault dumping daemon till the
+ *                         occurrence of a job fault.
+ * @job_fault_resume_wq:   Waitqueue on which every context with a faulty job wait
+ *                         for the job fault dumping to complete before they can
+ *                         do bottom half of job done for the atoms which followed
+ *                         the faulty atom.
+ * @job_fault_resume_workq: workqueue to process the work items queued for the faulty
+ *                         atoms, whereby the work item function waits for the dumping
+ *                         to get completed.
+ * @job_fault_event_list:  List of atoms, each belonging to a different context, which
+ *                         generated a job fault.
+ * @job_fault_event_lock:  Lock to protect concurrent accesses to @job_fault_event_list
+ * @regs_dump_debugfs_data: Contains the offset of register to be read through debugfs
+ *                         file "read_register".
+ * @kbase_profiling_controls: Profiling controls set by gator to control frame buffer
+ *                         dumping and s/w counter reporting.
+ * @force_replay_limit:    Number of gpu jobs, having replay atoms associated with them,
+ *                         that are run before a job is forced to fail and replay.
+ *                         Set to 0 to disable forced failures.
+ * @force_replay_count:    Count of gpu jobs, having replay atoms associated with them,
+ *                         between forced failures. Incremented on each gpu job which
+ *                         has replay atoms dependent on it. A gpu job is forced to
+ *                         fail once this is greater than or equal to @force_replay_limit
+ * @force_replay_core_req: Core requirements, set through the sysfs file, for the replay
+ *                         job atoms to consider the associated gpu job for forceful
+ *                         failure and replay. May be zero
+ * @force_replay_random:   Set to 1 to randomize the @force_replay_limit, in the
+ *                         range of 1 - KBASEP_FORCE_REPLAY_RANDOM_LIMIT.
+ * @ctx_num:               Total number of contexts created for the device.
+ * @io_history:            Pointer to an object keeping a track of all recent
+ *                         register accesses. The history of register accesses
+ *                         can be read through "regs_history" debugfs file.
+ * @hwaccess:              Contains a pointer to active kbase context and GPU
+ *                         backend specific data for HW access layer.
+ * @faults_pending:        Count of page/bus faults waiting for bottom half processing
+ *                         via workqueues.
+ * @poweroff_pending:      Set when power off operation for GPU is started, reset when
+ *                         power on for GPU is started.
+ * @infinite_cache_active_default: Set to enable using infinite cache for all the
+ *                         allocations of a new context.
+ * @mem_pool_max_size_default: Initial/default value for the maximum size of both
+ *                         types of pool created for a new context.
+ * @current_gpu_coherency_mode: coherency mode in use, which can be different
+ *                         from @system_coherency, when using protected mode.
+ * @system_coherency:      coherency mode as retrieved from the device tree.
+ * @cci_snoop_enabled:     Flag to track when CCI snoops have been enabled.
+ * @snoop_enable_smc:      SMC function ID to call into Trusted firmware to
+ *                         enable cache snooping. Value of 0 indicates that it
+ *                         is not used.
+ * @snoop_disable_smc:     SMC function ID to call disable cache snooping.
+ * @protected_ops:         Pointer to the methods for switching in or out of the
+ *                         protected mode, as per the @protected_dev being used.
+ * @protected_dev:         Pointer to the protected mode switcher device attached
+ *                         to the GPU device retrieved through device tree if
+ *                         GPU do not support protected mode switching natively.
+ * @protected_mode:        set to TRUE when GPU is put into protected mode
+ * @protected_mode_transition: set to TRUE when GPU is transitioning into or
+ *                         out of protected mode.
+ * @protected_mode_support: set to true if protected mode is supported.
+ * @buslogger:              Pointer to the structure required for interfacing
+ *                          with the bus logger module to set the size of buffer
+ *                          used by the module for capturing bus logs.
+ * @irq_reset_flush:        Flag to indicate that GPU reset is in-flight and flush of
+ *                          IRQ + bottom half is being done, to prevent the writes
+ *                          to MMU_IRQ_CLEAR & MMU_IRQ_MASK registers.
+ * @inited_subsys:          Bitmap of inited sub systems at the time of device probe.
+ *                          Used during device remove or for handling error in probe.
+ * @hwaccess_lock:          Lock, which can be taken from IRQ context, to serialize
+ *                          the updates made to Job dispatcher + scheduler states.
+ * @mmu_hw_mutex:           Protects access to MMU operations and address space
+ *                          related state.
+ * @serialize_jobs:         Currently used mode for serialization of jobs, both
+ *                          intra & inter slots serialization is supported.
+ * @backup_serialize_jobs:  Copy of the original value of @serialize_jobs taken
+ *                          when GWT is enabled. Used to restore the original value
+ *                          on disabling of GWT.
+ * @js_ctx_scheduling_mode: Context scheduling mode currently being used by
+ *                          Job Scheduler
+ */
 struct kbase_device {
-	s8 slot_submit_count_irq[BASE_JM_MAX_NR_SLOTS];
-
 	u32 hw_quirks_sc;
 	u32 hw_quirks_tiler;
 	u32 hw_quirks_mmu;
@@ -1007,12 +1457,7 @@ struct kbase_device {
 	struct kbase_mmu_mode const *mmu_mode;
 
 	struct kbase_as as[BASE_MAX_NR_AS];
-	/* The below variables (as_free and as_to_kctx) are managed by the
-	 * Context Scheduler. The kbasep_js_device_data::runpool_irq::lock must
-	 * be held whilst accessing these.
-	 */
 	u16 as_free; /* Bitpattern of free Address Spaces */
-	/* Mapping from active Address Spaces to kbase_context */
 	struct kbase_context *as_to_kctx[BASE_MAX_NR_AS];
 
 
@@ -1020,53 +1465,28 @@ struct kbase_device {
 
 	struct kbase_gpu_props gpu_props;
 
-	/** List of SW workarounds for HW issues */
 	unsigned long hw_issues_mask[(BASE_HW_ISSUE_END + BITS_PER_LONG - 1) / BITS_PER_LONG];
-	/** List of features available */
 	unsigned long hw_features_mask[(BASE_HW_FEATURE_END + BITS_PER_LONG - 1) / BITS_PER_LONG];
 
-	/* Bitmaps of cores that are currently in use (running jobs).
-	 * These should be kept up to date by the job scheduler.
-	 *
-	 * pm.power_change_lock should be held when accessing these members.
-	 *
-	 * kbase_pm_check_transitions_nolock() should be called when bits are
-	 * cleared to update the power management system and allow transitions to
-	 * occur. */
 	u64 shader_inuse_bitmap;
 
-	/* Refcount for cores in use */
 	u32 shader_inuse_cnt[64];
 
-	/* Bitmaps of cores the JS needs for jobs ready to run */
 	u64 shader_needed_bitmap;
 
-	/* Refcount for cores needed */
 	u32 shader_needed_cnt[64];
 
 	u32 tiler_inuse_cnt;
 
 	u32 tiler_needed_cnt;
 
-	/* struct for keeping track of the disjoint information
-	 *
-	 * The state  is > 0 if the GPU is in a disjoint state. Otherwise 0
-	 * The count is the number of disjoint events that have occurred on the GPU
-	 */
 	struct {
 		atomic_t count;
 		atomic_t state;
 	} disjoint_event;
 
-	/* Refcount for tracking users of the l2 cache, e.g. when using hardware counter instrumentation. */
 	u32 l2_users_count;
 
-	/* Bitmaps of cores that are currently available (powered up and the power policy is happy for jobs to be
-	 * submitted to these cores. These are updated by the power management code. The job scheduler should avoid
-	 * submitting new jobs to any cores that are not marked as available.
-	 *
-	 * pm.power_change_lock should be held when accessing these members.
-	 */
 	u64 shader_available_bitmap;
 	u64 tiler_available_bitmap;
 	u64 l2_available_bitmap;
@@ -1075,10 +1495,9 @@ struct kbase_device {
 	u64 shader_ready_bitmap;
 	u64 shader_transitioning_bitmap;
 
-	s8 nr_hw_address_spaces;			  /**< Number of address spaces in the GPU (constant after driver initialisation) */
-	s8 nr_user_address_spaces;			  /**< Number of address spaces available to user contexts */
+	s8 nr_hw_address_spaces;
+	s8 nr_user_address_spaces;
 
-	/* Structure used for instrumentation and HW counters dumping */
 	struct kbase_hwcnt {
 		/* The lock should be used when accessing any of the following members */
 		spinlock_t lock;
@@ -1102,10 +1521,8 @@ struct kbase_device {
 
 	struct mutex cacheclean_lock;
 
-	/* Platform specific private data to be accessed by mali_kbase_config_xxx.c only */
 	void *platform_context;
 
-	/* List of kbase_contexts created */
 	struct list_head        kctx_list;
 	struct mutex            kctx_list_lock;
 
@@ -1124,13 +1541,32 @@ struct kbase_device {
 #else
 	struct thermal_cooling_device *devfreq_cooling;
 #endif
-	/* Current IPA model - true for configured model, false for fallback */
 	atomic_t ipa_use_configured_model;
 	struct {
 		/* Access to this struct must be with ipa.lock held */
 		struct mutex lock;
 		struct kbase_ipa_model *configured_model;
 		struct kbase_ipa_model *fallback_model;
+
+		/*
+		 * gpu_active_callback - Inform IPA that GPU is now active
+		 * @model_data: Pointer to model data
+		 */
+		void (*gpu_active_callback)(
+				struct kbase_ipa_model_vinstr_data *model_data);
+
+		/*
+		 * gpu_idle_callback - Inform IPA that GPU is now idle
+		 * @model_data: Pointer to model data
+		 */
+		void (*gpu_idle_callback)(
+				struct kbase_ipa_model_vinstr_data *model_data);
+
+		/* Model data to pass to ipa_gpu_active/idle() */
+		struct kbase_ipa_model_vinstr_data *model_data;
+
+		/* true if IPA is currently using vinstr */
+		bool vinstr_active;
 	} ipa;
 #endif /* CONFIG_DEVFREQ_THERMAL */
 #endif /* CONFIG_MALI_BIFROST_DEVFREQ */
@@ -1140,79 +1576,52 @@ struct kbase_device {
 	struct kbase_trace_kbdev_timeline timeline;
 #endif
 
-	/*
-	 * Control for enabling job dump on failure, set when control debugfs
-	 * is opened.
-	 */
 	bool job_fault_debug;
 
 #ifdef CONFIG_DEBUG_FS
-	/* directory for debugfs entries */
 	struct dentry *mali_debugfs_directory;
-	/* Root directory for per context entry */
 	struct dentry *debugfs_ctx_directory;
 
 #ifdef CONFIG_MALI_BIFROST_DEBUG
-	/* bit for each as, set if there is new data to report */
 	u64 debugfs_as_read_bitmap;
 #endif /* CONFIG_MALI_BIFROST_DEBUG */
 
-	/* failed job dump, used for separate debug process */
 	wait_queue_head_t job_fault_wq;
 	wait_queue_head_t job_fault_resume_wq;
 	struct workqueue_struct *job_fault_resume_workq;
 	struct list_head job_fault_event_list;
 	spinlock_t job_fault_event_lock;
-	struct kbase_context *kctx_fault;
 
 #if !MALI_CUSTOMER_RELEASE
-	/* Per-device data for register dumping interface */
 	struct {
-		u16 reg_offset; /* Offset of a GPU_CONTROL register to be
-				   dumped upon request */
+		u16 reg_offset;
 	} regs_dump_debugfs_data;
 #endif /* !MALI_CUSTOMER_RELEASE */
 #endif /* CONFIG_DEBUG_FS */
 
-	/* fbdump profiling controls set by gator */
 	u32 kbase_profiling_controls[FBDUMP_CONTROL_MAX];
 
 
 #if MALI_CUSTOMER_RELEASE == 0
-	/* Number of jobs that are run before a job is forced to fail and
-	 * replay. May be KBASEP_FORCE_REPLAY_DISABLED, to disable forced
-	 * failures. */
 	int force_replay_limit;
-	/* Count of jobs between forced failures. Incremented on each job. A
-	 * job is forced to fail once this is greater than or equal to
-	 * force_replay_limit. */
 	int force_replay_count;
-	/* Core requirement for jobs to be failed and replayed. May be zero. */
 	base_jd_core_req force_replay_core_req;
-	/* true if force_replay_limit should be randomized. The random
-	 * value will be in the range of 1 - KBASEP_FORCE_REPLAY_RANDOM_LIMIT.
-	 */
 	bool force_replay_random;
 #endif
 
-	/* Total number of created contexts */
 	atomic_t ctx_num;
 
 #ifdef CONFIG_DEBUG_FS
-	/* Holds the most recent register accesses */
 	struct kbase_io_history io_history;
 #endif /* CONFIG_DEBUG_FS */
 
 	struct kbase_hwaccess_data hwaccess;
 
-	/* Count of page/bus faults waiting for workqueues to process */
 	atomic_t faults_pending;
 
-	/* true if GPU is powered off or power off operation is in progress */
 	bool poweroff_pending;
 
 
-	/* defaults for new context created for this device */
 #if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 4, 0))
 	bool infinite_cache_active_default;
 #else
@@ -1220,73 +1629,45 @@ struct kbase_device {
 #endif
 	size_t mem_pool_max_size_default;
 
-	/* current gpu coherency mode */
 	u32 current_gpu_coherency_mode;
-	/* system coherency mode  */
 	u32 system_coherency;
-	/* Flag to track when cci snoops have been enabled on the interface */
+
 	bool cci_snoop_enabled;
 
-	/* SMC function IDs to call into Trusted firmware to enable/disable
-	 * cache snooping. Value of 0 indicates that they are not used
-	 */
 	u32 snoop_enable_smc;
 	u32 snoop_disable_smc;
 
-	/* Protected mode operations */
 	struct protected_mode_ops *protected_ops;
 
-	/* Protected device attached to this kbase device */
 	struct protected_mode_device *protected_dev;
 
-	/*
-	 * true when GPU is put into protected mode
-	 */
 	bool protected_mode;
 
-	/*
-	 * true when GPU is transitioning into or out of protected mode
-	 */
 	bool protected_mode_transition;
 
-	/*
-	 * true if protected mode is supported
-	 */
 	bool protected_mode_support;
 
-
-#ifdef CONFIG_MALI_BIFROST_DEBUG
-	wait_queue_head_t driver_inactive_wait;
-	bool driver_inactive;
-#endif /* CONFIG_MALI_BIFROST_DEBUG */
-
 #ifdef CONFIG_MALI_FPGA_BUS_LOGGER
-	/*
-	 * Bus logger integration.
-	 */
 	struct bus_logger_client *buslogger;
 #endif
-	/* Boolean indicating if an IRQ flush during reset is in progress. */
+
 	bool irq_reset_flush;
 
-	/* list of inited sub systems. Used during terminate/error recovery */
 	u32 inited_subsys;
 
 	spinlock_t hwaccess_lock;
 
-	/* Protects access to MMU operations */
 	struct mutex mmu_hw_mutex;
 
-	/* Current serialization mode. See KBASE_SERIALIZE_* for details */
+	/* See KBASE_SERIALIZE_* for details */
 	u8 serialize_jobs;
 
 #ifdef CONFIG_MALI_JOB_DUMP
-	/* Used to backup status of job serialization mode
-	 * when we use GWT and restore when GWT is disabled.
-	 * GWT uses full serialization mode.
-	 */
 	u8 backup_serialize_jobs;
 #endif
+
+	/* See KBASE_JS_*_PRIORITY_MODE for details. */
+	u32 js_ctx_scheduling_mode;
 };
 
 /**
@@ -1375,10 +1756,239 @@ struct kbase_sub_alloc {
 	DECLARE_BITMAP(sub_pages, SZ_2M / SZ_4K);
 };
 
+/**
+ * struct kbase_context - Object representing an entity, among which GPU is
+ *                        scheduled and gets its own GPU address space.
+ *                        Created when the device file /dev/malixx is opened.
+ * @filp:                 Pointer to the struct file corresponding to device file
+ *                        /dev/malixx instance, passed to the file's open method.
+ * @kbdev:                Pointer to the Kbase device for which the context is created.
+ * @id:                   Unique indentifier for the context, indicates the number of
+ *                        contexts which have been created for the device so far.
+ * @api_version:          contains the version number for User/kernel interface,
+ *                        used for compatibility check.
+ * @pgd:                  Physical address of the page allocated for the top level
+ *                        page table of the context, this will be used for MMU Hw
+ *                        programming as the address translation will start from
+ *                        the top level page table.
+ * @event_list:           list of posted events about completed atoms, to be sent to
+ *                        event handling thread of Userpsace.
+ * @event_coalesce_list:  list containing events corresponding to successive atoms
+ *                        which have requested deferred delivery of the completion
+ *                        events to Userspace.
+ * @event_mutex:          Lock to protect the concurrent access to @event_list &
+ *                        @event_mutex.
+ * @event_closed:         Flag set through POST_TERM ioctl, indicates that Driver
+ *                        should stop posting events and also inform event handling
+ *                        thread that context termination is in progress.
+ * @event_workq:          Workqueue for processing work items corresponding to atoms
+ *                        that do not return an event to Userspace or have to perform
+ *                        a replay job
+ * @event_count:          Count of the posted events to be consumed by Userspace.
+ * @event_coalesce_count: Count of the events present in @event_coalesce_list.
+ * @flags:                bitmap of enums from kbase_context_flags, indicating the
+ *                        state & attributes for the context.
+ * @setup_complete:       Indicates if the setup for context has completed, i.e.
+ *                        flags have been set for the context. Driver allows only
+ *                        2 ioctls until the setup is done. Valid only for
+ *                        @api_version value 0.
+ * @setup_in_progress:    Indicates if the context's setup is in progress and other
+ *                        setup calls during that shall be rejected.
+ * @mmu_teardown_pages:   Buffer of 4 Pages in size, used to cache the entries of
+ *                        top & intermediate level page tables to avoid repeated
+ *                        calls to kmap_atomic during the MMU teardown.
+ * @aliasing_sink_page:   Special page used for KBASE_MEM_TYPE_ALIAS allocations,
+ *                        which can alias number of memory regions. The page is
+ *                        represent a region where it is mapped with a write-alloc
+ *                        cache setup, typically used when the write result of the
+ *                        GPU isn't needed, but the GPU must write anyway.
+ * @mem_partials_lock:    Lock for protecting the operations done on the elements
+ *                        added to @mem_partials list.
+ * @mem_partials:         List head for the list of large pages, 2MB in size, which
+ *                        which have been split into 4 KB pages and are used
+ *                        partially for the allocations >= 2 MB in size.
+ * @mmu_lock:             Lock to serialize the accesses made to multi level GPU
+ *                        page tables, maintained for every context.
+ * @reg_lock:             Lock used for GPU virtual address space management operations,
+ *                        like adding/freeing a memory region in the address space.
+ *                        Can be converted to a rwlock ?.
+ * @reg_rbtree_same:      RB tree of the memory regions allocated from the SAME_VA
+ *                        zone of the GPU virtual address space. Used for allocations
+ *                        having the same value for GPU & CPU virtual address.
+ * @reg_rbtree_exec:      RB tree of the memory regions allocated from the EXEC
+ *                        zone of the GPU virtual address space. Used for
+ *                        allocations containing executable code for
+ *                        shader programs.
+ * @reg_rbtree_custom:    RB tree of the memory regions allocated from the CUSTOM_VA
+ *                        zone of the GPU virtual address space.
+ * @cookies:              Bitmask containing of BITS_PER_LONG bits, used mainly for
+ *                        SAME_VA allocations to defer the reservation of memory region
+ *                        (from the GPU virtual address space) from base_mem_alloc
+ *                        ioctl to mmap system call. This helps returning unique
+ *                        handles, disguised as GPU VA, to Userspace from base_mem_alloc
+ *                        and later retrieving the pointer to memory region structure
+ *                        in the mmap handler.
+ * @pending_regions:      Array containing pointers to memory region structures,
+ *                        used in conjunction with @cookies bitmask mainly for
+ *                        providing a mechansim to have the same value for CPU &
+ *                        GPU virtual address.
+ * @event_queue:          Wait queue used for blocking the thread, which consumes
+ *                        the base_jd_event corresponding to an atom, when there
+ *                        are no more posted events.
+ * @tgid:                 thread group id of the process, whose thread opened the
+ *                        device file /dev/malixx instance to create a context.
+ * @pid:                  id of the thread, corresponding to process @tgid, which
+ *                        actually which opened the device file.
+ * @jctx:                 object encapsulating all the Job dispatcher related state,
+ *                        including the array of atoms.
+ * @used_pages:           Keeps a track of the number of 4KB physical pages in use
+ *                        for the context.
+ * @nonmapped_pages:      Updated in the same way as @used_pages, except for the case
+ *                        when special tracking page is freed by userspace where it
+ *                        is reset to 0.
+ * @mem_pool:             Object containing the state for the context specific pool of
+ *                        4KB size physical pages.
+ * @lp_mem_pool:          Object containing the state for the context specific pool of
+ *                        2MB size physical pages.
+ * @reclaim:              Shrinker object registered with the kernel containing
+ *                        the pointer to callback function which is invoked under
+ *                        low memory conditions. In the callback function Driver
+ *                        frees up the memory for allocations marked as
+ *                        evictable/reclaimable.
+ * @evict_list:           List head for the list containing the allocations which
+ *                        can be evicted or freed up in the shrinker callback.
+ * @waiting_soft_jobs:    List head for the list containing softjob atoms, which
+ *                        are either waiting for the event set operation, or waiting
+ *                        for the signaling of input fence or waiting for the GPU
+ *                        device to powered on so as to dump the CPU/GPU timestamps.
+ * @waiting_soft_jobs_lock: Lock to protect @waiting_soft_jobs list from concurrent
+ *                        accesses.
+ * @dma_fence:            Object containing list head for the list of dma-buf fence
+ *                        waiting atoms and the waitqueue to process the work item
+ *                        queued for the atoms blocked on the signaling of dma-buf
+ *                        fences.
+ * @as_nr:                id of the address space being used for the scheduled in
+ *                        context. This is effectively part of the Run Pool, because
+ *                        it only has a valid setting (!=KBASEP_AS_NR_INVALID) whilst
+ *                        the context is scheduled in. The hwaccess_lock must be held
+ *                        whilst accessing this.
+ *                        If the context relating to this value of as_nr is required,
+ *                        then the context must be retained to ensure that it doesn't
+ *                        disappear whilst it is being used. Alternatively, hwaccess_lock
+ *                        can be held to ensure the context doesn't disappear (but this
+ *                        has restrictions on what other locks can be taken simutaneously).
+ * @refcount:             Keeps track of the number of users of this context. A user
+ *                        can be a job that is available for execution, instrumentation
+ *                        needing to 'pin' a context for counter collection, etc.
+ *                        If the refcount reaches 0 then this context is considered
+ *                        inactive and the previously programmed AS might be cleared
+ *                        at any point.
+ *                        Generally the reference count is incremented when the context
+ *                        is scheduled in and an atom is pulled from the context's per
+ *                        slot runnable tree.
+ * @mm_update_lock:       lock used for handling of special tracking page.
+ * @process_mm:           Pointer to the memory descriptor of the process which
+ *                        created the context. Used for accounting the physical
+ *                        pages used for GPU allocations, done for the context,
+ *                        to the memory consumed by the process.
+ * @same_va_end:          End address of the SAME_VA zone (in 4KB page units)
+ * @timeline:             Object tracking the number of atoms currently in flight for
+ *                        the context and thread group id of the process, i.e. @tgid.
+ * @mem_profile_data:     Buffer containing the profiling information provided by
+ *                        Userspace, can be read through the mem_profile debugfs file.
+ * @mem_profile_size:     Size of the @mem_profile_data.
+ * @mem_profile_lock:     Lock to serialize the operations related to mem_profile
+ *                        debugfs file.
+ * @kctx_dentry:          Pointer to the debugfs directory created for every context,
+ *                        inside kbase_device::debugfs_ctx_directory, containing
+ *                        context specific files.
+ * @reg_dump:             Buffer containing a register offset & value pair, used
+ *                        for dumping job fault debug info.
+ * @job_fault_count:      Indicates that a job fault occurred for the context and
+ *                        dumping of its debug info is in progress.
+ * @job_fault_resume_event_list: List containing atoms completed after the faulty
+ *                        atom but before the debug data for faulty atom was dumped.
+ * @jsctx_queue:          Per slot & priority arrays of object containing the root
+ *                        of RB-tree holding currently runnable atoms on the job slot
+ *                        and the head item of the linked list of atoms blocked on
+ *                        cross-slot dependencies.
+ * @atoms_pulled:         Total number of atoms currently pulled from the context.
+ * @atoms_pulled_slot:    Per slot count of the number of atoms currently pulled
+ *                        from the context.
+ * @atoms_pulled_slot_pri: Per slot & priority count of the number of atoms currently
+ *                        pulled from the context. hwaccess_lock shall be held when
+ *                        accessing it.
+ * @blocked_js:           Indicates if the context is blocked from submitting atoms
+ *                        on a slot at a given priority. This is set to true, when
+ *                        the atom corresponding to context is soft/hard stopped or
+ *                        removed from the HEAD_NEXT register in response to
+ *                        soft/hard stop.
+ * @slots_pullable:       Bitmask of slots, indicating the slots for which the
+ *                        context has pullable atoms in the runnable tree.
+ * @work:                 Work structure used for deferred ASID assignment.
+ * @vinstr_cli:           Pointer to the legacy userspace vinstr client, there can
+ *                        be only such client per kbase context.
+ * @vinstr_cli_lock:      Lock used for the vinstr ioctl calls made for @vinstr_cli.
+ * @completed_jobs:       List containing completed atoms for which base_jd_event is
+ *                        to be posted.
+ * @work_count:           Number of work items, corresponding to atoms, currently
+ *                        pending on job_done workqueue of @jctx.
+ * @soft_job_timeout:     Timer object used for failing/cancelling the waiting
+ *                        soft-jobs which have been blocked for more than the
+ *                        timeout value used for the soft-jobs
+ * @jit_alloc:            Array of 256 pointers to GPU memory regions, used for
+ *                        for JIT allocations.
+ * @jit_max_allocations:  Maximum number of JIT allocations allowed at once.
+ * @jit_current_allocations: Current number of in-flight JIT allocations.
+ * @jit_current_allocations_per_bin: Current number of in-flight JIT allocations per bin
+ * @jit_version:          version number indicating whether userspace is using
+ *                        old or new version of interface for JIT allocations
+ *	                  1 -> client used KBASE_IOCTL_MEM_JIT_INIT_OLD
+ *	                  2 -> client used KBASE_IOCTL_MEM_JIT_INIT
+ * @jit_active_head:      List containing the JIT allocations which are in use.
+ * @jit_pool_head:        List containing the JIT allocations which have been
+ *                        freed up by userpsace and so not being used by them.
+ *                        Driver caches them to quickly fulfill requests for new
+ *                        JIT allocations. They are released in case of memory
+ *                        pressure as they are put on the @evict_list when they
+ *                        are freed up by userspace.
+ * @jit_destroy_head:     List containing the JIT allocations which were moved to it
+ *                        from @jit_pool_head, in the shrinker callback, after freeing
+ *                        their backing physical pages.
+ * @jit_evict_lock:       Lock used for operations done on JIT allocations and also
+ *                        for accessing @evict_list.
+ * @jit_work:             Work item queued to defer the freeing of memory region when
+ *                        JIT allocation is moved to @jit_destroy_head.
+ * @jit_atoms_head:       A list of the JIT soft-jobs, both alloc & free, in submission
+ *                        order, protected by kbase_jd_context.lock.
+ * @jit_pending_alloc:    A list of JIT alloc soft-jobs for which allocation will be
+ *                        reattempted after the impending free of other active JIT
+ *                        allocations.
+ * @ext_res_meta_head:    A list of sticky external resources which were requested to
+ *                        be mapped on GPU side, through a softjob atom of type
+ *                        EXT_RES_MAP or STICKY_RESOURCE_MAP ioctl.
+ * @drain_pending:        Used to record that a flush/invalidate of the GPU caches was
+ *                        requested from atomic context, so that the next flush request
+ *                        can wait for the flush of GPU writes.
+ * @age_count:            Counter incremented on every call to jd_submit_atom,
+ *                        atom is assigned the snapshot of this counter, which
+ *                        is used to determine the atom's age when it is added to
+ *                        the runnable RB-tree.
+ * @trim_level:           Level of JIT allocation trimming to perform on free (0-100%)
+ * @gwt_enabled:          Indicates if tracking of GPU writes is enabled, protected by
+ *                        kbase_context.reg_lock.
+ * @gwt_was_enabled:      Simple sticky bit flag to know if GWT was ever enabled.
+ * @gwt_current_list:     A list of addresses for which GPU has generated write faults,
+ *                        after the last snapshot of it was sent to userspace.
+ * @gwt_snapshot_list:    Snapshot of the @gwt_current_list for sending to user space.
+ * @priority:             Indicates the context priority. Used along with @atoms_count
+ *                        for context scheduling, protected by hwaccess_lock.
+ * @atoms_count:          Number of gpu atoms currently in use, per priority
+ */
 struct kbase_context {
 	struct file *filp;
 	struct kbase_device *kbdev;
-	u32 id; /* System wide unique id */
+	u32 id;
 	unsigned long api_version;
 	phys_addr_t pgd;
 	struct list_head event_list;
@@ -1402,13 +2012,10 @@ struct kbase_context {
 	struct list_head        mem_partials;
 
 	struct mutex            mmu_lock;
-	struct mutex            reg_lock; /* To be converted to a rwlock? */
-	struct rb_root reg_rbtree_same; /* RB tree of GPU (live) regions,
-					 * SAME_VA zone */
-	struct rb_root reg_rbtree_exec; /* RB tree of GPU (live) regions,
-					 * EXEC zone */
-	struct rb_root reg_rbtree_custom; /* RB tree of GPU (live) regions,
-					 * CUSTOM_VA zone */
+	struct mutex            reg_lock;
+	struct rb_root reg_rbtree_same;
+	struct rb_root reg_rbtree_exec;
+	struct rb_root reg_rbtree_custom;
 
 	unsigned long    cookies;
 	struct kbase_va_region *pending_regions[BITS_PER_LONG];
@@ -1435,24 +2042,9 @@ struct kbase_context {
 		struct workqueue_struct *wq;
 	} dma_fence;
 #endif /* CONFIG_MALI_BIFROST_DMA_FENCE */
-	/** This is effectively part of the Run Pool, because it only has a valid
-	 * setting (!=KBASEP_AS_NR_INVALID) whilst the context is scheduled in
-	 *
-	 * The hwaccess_lock must be held whilst accessing this.
-	 *
-	 * If the context relating to this as_nr is required, you must use
-	 * kbasep_js_runpool_retain_ctx() to ensure that the context doesn't disappear
-	 * whilst you're using it. Alternatively, just hold the hwaccess_lock
-	 * to ensure the context doesn't disappear (but this has restrictions on what other locks
-	 * you can take whilst doing this) */
+
 	int as_nr;
 
-	/* Keeps track of the number of users of this context. A user can be a
-	 * job that is available for execution, instrumentation needing to 'pin'
-	 * a context for counter collection, etc. If the refcount reaches 0 then
-	 * this context is considered inactive and the previously programmed
-	 * AS might be cleared at any point.
-	 */
 	atomic_t refcount;
 
 	/* NOTE:
@@ -1463,28 +2055,19 @@ struct kbase_context {
 	 * All other flags must be added there */
 	spinlock_t         mm_update_lock;
 	struct mm_struct *process_mm;
-	/* End of the SAME_VA zone */
 	u64 same_va_end;
 
 #ifdef CONFIG_MALI_BIFROST_TRACE_TIMELINE
 	struct kbase_trace_kctx_timeline timeline;
 #endif
 #ifdef CONFIG_DEBUG_FS
-	/* Content of mem_profile file */
 	char *mem_profile_data;
-	/* Size of @c mem_profile_data */
 	size_t mem_profile_size;
-	/* Mutex guarding memory profile state */
 	struct mutex mem_profile_lock;
-	/* Memory profile directory under debugfs */
 	struct dentry *kctx_dentry;
 
-	/* for job fault debug */
 	unsigned int *reg_dump;
 	atomic_t job_fault_count;
-	/* This list will keep the following atoms during the dump
-	 * in the same context
-	 */
 	struct list_head job_fault_resume_event_list;
 
 #endif /* CONFIG_DEBUG_FS */
@@ -1492,86 +2075,59 @@ struct kbase_context {
 	struct jsctx_queue jsctx_queue
 		[KBASE_JS_ATOM_SCHED_PRIO_COUNT][BASE_JM_MAX_NR_SLOTS];
 
-	/* Number of atoms currently pulled from this context */
 	atomic_t atoms_pulled;
-	/* Number of atoms currently pulled from this context, per slot */
 	atomic_t atoms_pulled_slot[BASE_JM_MAX_NR_SLOTS];
-	/* Number of atoms currently pulled from this context, per slot and
-	 * priority. Hold hwaccess_lock when accessing */
 	int atoms_pulled_slot_pri[BASE_JM_MAX_NR_SLOTS][
 			KBASE_JS_ATOM_SCHED_PRIO_COUNT];
 
-	/* true if slot is blocked on the given priority. This will be set on a
-	 * soft-stop */
 	bool blocked_js[BASE_JM_MAX_NR_SLOTS][KBASE_JS_ATOM_SCHED_PRIO_COUNT];
 
-	/* Bitmask of slots that can be pulled from */
 	u32 slots_pullable;
 
-	/* Backend specific data */
-	struct kbase_context_backend backend;
-
-	/* Work structure used for deferred ASID assignment */
 	struct work_struct work;
 
-	/* Only one userspace vinstr client per kbase context */
 	struct kbase_vinstr_client *vinstr_cli;
 	struct mutex vinstr_cli_lock;
 
-	/* List of completed jobs waiting for events to be posted */
 	struct list_head completed_jobs;
-	/* Number of work items currently pending on job_done_wq */
 	atomic_t work_count;
 
-	/* Waiting soft-jobs will fail when this timer expires */
 	struct timer_list soft_job_timeout;
 
-	/* JIT allocation management */
 	struct kbase_va_region *jit_alloc[256];
+	u8 jit_max_allocations;
+	u8 jit_current_allocations;
+	u8 jit_current_allocations_per_bin[256];
+	u8 jit_version;
 	struct list_head jit_active_head;
 	struct list_head jit_pool_head;
 	struct list_head jit_destroy_head;
 	struct mutex jit_evict_lock;
 	struct work_struct jit_work;
 
-	/* A list of the JIT soft-jobs in submission order
-	 * (protected by kbase_jd_context.lock)
-	 */
 	struct list_head jit_atoms_head;
-	/* A list of pending JIT alloc soft-jobs (using the 'queue' list_head)
-	 * (protected by kbase_jd_context.lock)
-	 */
 	struct list_head jit_pending_alloc;
 
-	/* External sticky resource management */
 	struct list_head ext_res_meta_head;
 
-	/* Used to record that a drain was requested from atomic context */
 	atomic_t drain_pending;
 
-	/* Current age count, used to determine age for newly submitted atoms */
 	u32 age_count;
 
+	u8 trim_level;
+
 #ifdef CONFIG_MALI_JOB_DUMP
-	/* Used for tracking GPU writes.
-	 * (protected by kbase_context.reg_lock)
-	 */
 	bool gwt_enabled;
 
-	/* Simple sticky bit flag to know if GWT was ever enabled
-	 * (protected by kbase_context.reg_lock)
-	 */
 	bool gwt_was_enabled;
 
-	/* Current list of GPU writes.
-	 * (protected by kbase_context.reg_lock)
-	 */
 	struct list_head gwt_current_list;
 
-	 /* Snapshot of list of GPU writes for sending to user space. */
 	struct list_head gwt_snapshot_list;
-
 #endif
+
+	int priority;
+	s16 atoms_count[KBASE_JS_ATOM_SCHED_PRIO_COUNT];
 };
 
 #ifdef CONFIG_MALI_JOB_DUMP
@@ -1579,17 +2135,17 @@ struct kbase_context {
  * struct kbasep_gwt_list_element - Structure used to collect GPU
  *                                  write faults.
  * @link:                           List head for adding write faults.
- * @handle:                         The handle for the modified region.
- * @offset:                         The offset in pages of the modified
- *                                  part of the region.
+ * @region:                         Details of the region where we have the
+ *                                  faulting page address.
+ * @page_addr:                      Page address where GPU write fault occurred.
  * @num_pages:                      The number of pages modified.
  *
  * Using this structure all GPU write faults are stored in a list.
  */
 struct kbasep_gwt_list_element {
 	struct list_head link;
-	u64 handle;
-	u64 offset;
+	struct kbase_va_region *region;
+	u64 page_addr;
 	u64 num_pages;
 };
 
diff --git a/drivers/gpu/arm/bifrost/mali_kbase_device.c b/drivers/gpu/arm/bifrost/mali_kbase_device.c
index 8aaf4065dd6c..005ae088686b 100644
--- a/drivers/gpu/arm/bifrost/mali_kbase_device.c
+++ b/drivers/gpu/arm/bifrost/mali_kbase_device.c
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2010-2017 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2010-2018 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -254,10 +254,6 @@ int kbase_device_init(struct kbase_device * const kbdev)
 	else
 		kbdev->mmu_mode = kbase_mmu_mode_get_lpae();
 
-#ifdef CONFIG_MALI_BIFROST_DEBUG
-	init_waitqueue_head(&kbdev->driver_inactive_wait);
-#endif /* CONFIG_MALI_BIFROST_DEBUG */
-
 	return 0;
 term_trace:
 	kbasep_trace_term(kbdev);
diff --git a/drivers/gpu/arm/bifrost/mali_kbase_fence.h b/drivers/gpu/arm/bifrost/mali_kbase_fence.h
index 25d2a933e118..414f3f4d7436 100644
--- a/drivers/gpu/arm/bifrost/mali_kbase_fence.h
+++ b/drivers/gpu/arm/bifrost/mali_kbase_fence.h
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2010-2017 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2010-2018 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -139,8 +139,11 @@ static inline bool kbase_fence_out_is_ours(struct kbase_jd_atom *katom)
 static inline int kbase_fence_out_signal(struct kbase_jd_atom *katom,
 					 int status)
 {
-#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 11, 0))
-	katom->dma_fence.fence->error = status;
+#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE && \
+	  KERNEL_VERSION(4, 9, 68) <= LINUX_VERSION_CODE)
+	fence_set_error(katom->dma_fence.fence, status);
+#elif (KERNEL_VERSION(4, 11, 0) <= LINUX_VERSION_CODE)
+	dma_fence_set_error(katom->dma_fence.fence, status);
 #else
 	katom->dma_fence.fence->status = status;
 #endif
diff --git a/drivers/gpu/arm/bifrost/mali_kbase_fence_defs.h b/drivers/gpu/arm/bifrost/mali_kbase_fence_defs.h
index a19d7e0aff1c..475136430649 100644
--- a/drivers/gpu/arm/bifrost/mali_kbase_fence_defs.h
+++ b/drivers/gpu/arm/bifrost/mali_kbase_fence_defs.h
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2010-2017 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2010-2018 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -44,7 +44,12 @@
 #define dma_fence_is_signaled(a) fence_is_signaled(a)
 #define dma_fence_add_callback(a, b, c) fence_add_callback(a, b, c)
 #define dma_fence_remove_callback(a, b) fence_remove_callback(a, b)
+
+#if (KERNEL_VERSION(4, 9, 68) <= LINUX_VERSION_CODE)
+#define dma_fence_get_status(a) (fence_is_signaled(a) ? (a)->error ?: 1 : 0)
+#else
 #define dma_fence_get_status(a) (fence_is_signaled(a) ? (a)->status ?: 1 : 0)
+#endif
 
 #else
 
diff --git a/drivers/gpu/arm/bifrost/mali_kbase_gator_api.c b/drivers/gpu/arm/bifrost/mali_kbase_gator_api.c
index 2fa68067050a..040b2096bec6 100644
--- a/drivers/gpu/arm/bifrost/mali_kbase_gator_api.c
+++ b/drivers/gpu/arm/bifrost/mali_kbase_gator_api.c
@@ -158,7 +158,7 @@ KBASE_EXPORT_SYMBOL(kbase_gator_hwcnt_term_names);
 struct kbase_gator_hwcnt_handles *kbase_gator_hwcnt_init(struct kbase_gator_hwcnt_info *in_out_info)
 {
 	struct kbase_gator_hwcnt_handles *hand;
-	struct kbase_uk_hwcnt_reader_setup setup;
+	struct kbase_ioctl_hwcnt_reader_setup setup;
 	uint32_t dump_size = 0, i = 0;
 
 	if (!in_out_info)
diff --git a/drivers/gpu/arm/bifrost/mali_kbase_gator_hwcnt_names.h b/drivers/gpu/arm/bifrost/mali_kbase_gator_hwcnt_names.h
index b048db8bf834..5d38c7b73553 100644
--- a/drivers/gpu/arm/bifrost/mali_kbase_gator_hwcnt_names.h
+++ b/drivers/gpu/arm/bifrost/mali_kbase_gator_hwcnt_names.h
@@ -2169,6 +2169,8 @@ static const char * const hardware_counters_mali_t88x[] = {
 
 #include "mali_kbase_gator_hwcnt_names_tnox.h"
 
+#include "mali_kbase_gator_hwcnt_names_tgox.h"
+
 #include "mali_kbase_gator_hwcnt_names_tkax.h"
 
 #include "mali_kbase_gator_hwcnt_names_ttrx.h"
diff --git a/drivers/gpu/arm/bifrost/mali_kbase_gator_hwcnt_names_tgox.h b/drivers/gpu/arm/bifrost/mali_kbase_gator_hwcnt_names_tgox.h
new file mode 100644
index 000000000000..72b5266622a9
--- /dev/null
+++ b/drivers/gpu/arm/bifrost/mali_kbase_gator_hwcnt_names_tgox.h
@@ -0,0 +1,296 @@
+/*
+ *
+ * (C) COPYRIGHT 2016-2018 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+/*
+ * This header was autogenerated, it should not be edited.
+ */
+
+#ifndef _KBASE_GATOR_HWCNT_NAMES_TGOX_H_
+#define _KBASE_GATOR_HWCNT_NAMES_TGOX_H_
+
+static const char * const hardware_counters_mali_tGOx[] = {
+	/* Performance counters for the Job Manager */
+	"",
+	"",
+	"",
+	"",
+	"TGOx_MESSAGES_SENT",
+	"TGOx_MESSAGES_RECEIVED",
+	"TGOx_GPU_ACTIVE",
+	"TGOx_IRQ_ACTIVE",
+	"TGOx_JS0_JOBS",
+	"TGOx_JS0_TASKS",
+	"TGOx_JS0_ACTIVE",
+	"",
+	"TGOx_JS0_WAIT_READ",
+	"TGOx_JS0_WAIT_ISSUE",
+	"TGOx_JS0_WAIT_DEPEND",
+	"TGOx_JS0_WAIT_FINISH",
+	"TGOx_JS1_JOBS",
+	"TGOx_JS1_TASKS",
+	"TGOx_JS1_ACTIVE",
+	"",
+	"TGOx_JS1_WAIT_READ",
+	"TGOx_JS1_WAIT_ISSUE",
+	"TGOx_JS1_WAIT_DEPEND",
+	"TGOx_JS1_WAIT_FINISH",
+	"TGOx_JS2_JOBS",
+	"TGOx_JS2_TASKS",
+	"TGOx_JS2_ACTIVE",
+	"",
+	"TGOx_JS2_WAIT_READ",
+	"TGOx_JS2_WAIT_ISSUE",
+	"TGOx_JS2_WAIT_DEPEND",
+	"TGOx_JS2_WAIT_FINISH",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+
+	/* Performance counters for the Tiler */
+	"",
+	"",
+	"",
+	"",
+	"TGOx_TILER_ACTIVE",
+	"TGOx_JOBS_PROCESSED",
+	"TGOx_TRIANGLES",
+	"TGOx_LINES",
+	"TGOx_POINTS",
+	"TGOx_FRONT_FACING",
+	"TGOx_BACK_FACING",
+	"TGOx_PRIM_VISIBLE",
+	"TGOx_PRIM_CULLED",
+	"TGOx_PRIM_CLIPPED",
+	"TGOx_PRIM_SAT_CULLED",
+	"TGOx_BIN_ALLOC_INIT",
+	"TGOx_BIN_ALLOC_OVERFLOW",
+	"TGOx_BUS_READ",
+	"",
+	"TGOx_BUS_WRITE",
+	"TGOx_LOADING_DESC",
+	"TGOx_IDVS_POS_SHAD_REQ",
+	"TGOx_IDVS_POS_SHAD_WAIT",
+	"TGOx_IDVS_POS_SHAD_STALL",
+	"TGOx_IDVS_POS_FIFO_FULL",
+	"TGOx_PREFETCH_STALL",
+	"TGOx_VCACHE_HIT",
+	"TGOx_VCACHE_MISS",
+	"TGOx_VCACHE_LINE_WAIT",
+	"TGOx_VFETCH_POS_READ_WAIT",
+	"TGOx_VFETCH_VERTEX_WAIT",
+	"TGOx_VFETCH_STALL",
+	"TGOx_PRIMASSY_STALL",
+	"TGOx_BBOX_GEN_STALL",
+	"TGOx_IDVS_VBU_HIT",
+	"TGOx_IDVS_VBU_MISS",
+	"TGOx_IDVS_VBU_LINE_DEALLOCATE",
+	"TGOx_IDVS_VAR_SHAD_REQ",
+	"TGOx_IDVS_VAR_SHAD_STALL",
+	"TGOx_BINNER_STALL",
+	"TGOx_ITER_STALL",
+	"TGOx_COMPRESS_MISS",
+	"TGOx_COMPRESS_STALL",
+	"TGOx_PCACHE_HIT",
+	"TGOx_PCACHE_MISS",
+	"TGOx_PCACHE_MISS_STALL",
+	"TGOx_PCACHE_EVICT_STALL",
+	"TGOx_PMGR_PTR_WR_STALL",
+	"TGOx_PMGR_PTR_RD_STALL",
+	"TGOx_PMGR_CMD_WR_STALL",
+	"TGOx_WRBUF_ACTIVE",
+	"TGOx_WRBUF_HIT",
+	"TGOx_WRBUF_MISS",
+	"TGOx_WRBUF_NO_FREE_LINE_STALL",
+	"TGOx_WRBUF_NO_AXI_ID_STALL",
+	"TGOx_WRBUF_AXI_STALL",
+	"",
+	"",
+	"",
+	"TGOx_UTLB_TRANS",
+	"TGOx_UTLB_TRANS_HIT",
+	"TGOx_UTLB_TRANS_STALL",
+	"TGOx_UTLB_TRANS_MISS_DELAY",
+	"TGOx_UTLB_MMU_REQ",
+
+	/* Performance counters for the Shader Core */
+	"",
+	"",
+	"",
+	"",
+	"TGOx_FRAG_ACTIVE",
+	"TGOx_FRAG_PRIMITIVES",
+	"TGOx_FRAG_PRIM_RAST",
+	"TGOx_FRAG_FPK_ACTIVE",
+	"TGOx_FRAG_STARVING",
+	"TGOx_FRAG_WARPS",
+	"TGOx_FRAG_PARTIAL_WARPS",
+	"TGOx_FRAG_QUADS_RAST",
+	"TGOx_FRAG_QUADS_EZS_TEST",
+	"TGOx_FRAG_QUADS_EZS_UPDATE",
+	"TGOx_FRAG_QUADS_EZS_KILL",
+	"TGOx_FRAG_LZS_TEST",
+	"TGOx_FRAG_LZS_KILL",
+	"TGOx_WARP_REG_SIZE_64",
+	"TGOx_FRAG_PTILES",
+	"TGOx_FRAG_TRANS_ELIM",
+	"TGOx_QUAD_FPK_KILLER",
+	"TGOx_FULL_QUAD_WARPS",
+	"TGOx_COMPUTE_ACTIVE",
+	"TGOx_COMPUTE_TASKS",
+	"TGOx_COMPUTE_WARPS",
+	"TGOx_COMPUTE_STARVING",
+	"TGOx_EXEC_CORE_ACTIVE",
+	"TGOx_EXEC_ACTIVE",
+	"TGOx_EXEC_INSTR_COUNT",
+	"TGOx_EXEC_INSTR_DIVERGED",
+	"TGOx_EXEC_INSTR_STARVING",
+	"TGOx_ARITH_INSTR_SINGLE_FMA",
+	"TGOx_ARITH_INSTR_DOUBLE",
+	"TGOx_ARITH_INSTR_MSG",
+	"TGOx_ARITH_INSTR_MSG_ONLY",
+	"TGOx_TEX_MSGI_NUM_QUADS",
+	"TGOx_TEX_DFCH_NUM_PASSES",
+	"TGOx_TEX_DFCH_NUM_PASSES_MISS",
+	"TGOx_TEX_DFCH_NUM_PASSES_MIP_MAP",
+	"TGOx_TEX_TIDX_NUM_SPLIT_MIP_MAP",
+	"TGOx_TEX_TFCH_NUM_LINES_FETCHED",
+	"TGOx_TEX_TFCH_NUM_LINES_FETCHED_BLOCK",
+	"TGOx_TEX_TFCH_NUM_OPERATIONS",
+	"TGOx_TEX_FILT_NUM_OPERATIONS",
+	"TGOx_LS_MEM_READ_FULL",
+	"TGOx_LS_MEM_READ_SHORT",
+	"TGOx_LS_MEM_WRITE_FULL",
+	"TGOx_LS_MEM_WRITE_SHORT",
+	"TGOx_LS_MEM_ATOMIC",
+	"TGOx_VARY_INSTR",
+	"TGOx_VARY_SLOT_32",
+	"TGOx_VARY_SLOT_16",
+	"TGOx_ATTR_INSTR",
+	"TGOx_ARITH_INSTR_FP_MUL",
+	"TGOx_BEATS_RD_FTC",
+	"TGOx_BEATS_RD_FTC_EXT",
+	"TGOx_BEATS_RD_LSC",
+	"TGOx_BEATS_RD_LSC_EXT",
+	"TGOx_BEATS_RD_TEX",
+	"TGOx_BEATS_RD_TEX_EXT",
+	"TGOx_BEATS_RD_OTHER",
+	"TGOx_BEATS_WR_LSC_WB",
+	"TGOx_BEATS_WR_TIB",
+	"TGOx_BEATS_WR_LSC_OTHER",
+
+	/* Performance counters for the Memory System */
+	"",
+	"",
+	"",
+	"",
+	"TGOx_MMU_REQUESTS",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"TGOx_L2_RD_MSG_IN",
+	"TGOx_L2_RD_MSG_IN_STALL",
+	"TGOx_L2_WR_MSG_IN",
+	"TGOx_L2_WR_MSG_IN_STALL",
+	"TGOx_L2_SNP_MSG_IN",
+	"TGOx_L2_SNP_MSG_IN_STALL",
+	"TGOx_L2_RD_MSG_OUT",
+	"TGOx_L2_RD_MSG_OUT_STALL",
+	"TGOx_L2_WR_MSG_OUT",
+	"TGOx_L2_ANY_LOOKUP",
+	"TGOx_L2_READ_LOOKUP",
+	"TGOx_L2_WRITE_LOOKUP",
+	"TGOx_L2_EXT_SNOOP_LOOKUP",
+	"TGOx_L2_EXT_READ",
+	"TGOx_L2_EXT_READ_NOSNP",
+	"TGOx_L2_EXT_READ_UNIQUE",
+	"TGOx_L2_EXT_READ_BEATS",
+	"TGOx_L2_EXT_AR_STALL",
+	"TGOx_L2_EXT_AR_CNT_Q1",
+	"TGOx_L2_EXT_AR_CNT_Q2",
+	"TGOx_L2_EXT_AR_CNT_Q3",
+	"TGOx_L2_EXT_RRESP_0_127",
+	"TGOx_L2_EXT_RRESP_128_191",
+	"TGOx_L2_EXT_RRESP_192_255",
+	"TGOx_L2_EXT_RRESP_256_319",
+	"TGOx_L2_EXT_RRESP_320_383",
+	"TGOx_L2_EXT_WRITE",
+	"TGOx_L2_EXT_WRITE_NOSNP_FULL",
+	"TGOx_L2_EXT_WRITE_NOSNP_PTL",
+	"TGOx_L2_EXT_WRITE_SNP_FULL",
+	"TGOx_L2_EXT_WRITE_SNP_PTL",
+	"TGOx_L2_EXT_WRITE_BEATS",
+	"TGOx_L2_EXT_W_STALL",
+	"TGOx_L2_EXT_AW_CNT_Q1",
+	"TGOx_L2_EXT_AW_CNT_Q2",
+	"TGOx_L2_EXT_AW_CNT_Q3",
+	"TGOx_L2_EXT_SNOOP",
+	"TGOx_L2_EXT_SNOOP_STALL",
+	"TGOx_L2_EXT_SNOOP_RESP_CLEAN",
+	"TGOx_L2_EXT_SNOOP_RESP_DATA",
+	"TGOx_L2_EXT_SNOOP_INTERNAL",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+};
+
+#endif /* _KBASE_GATOR_HWCNT_NAMES_TGOX_H_ */
diff --git a/drivers/gpu/arm/bifrost/mali_kbase_gator_hwcnt_names_thex.h b/drivers/gpu/arm/bifrost/mali_kbase_gator_hwcnt_names_thex.h
index af00a6acb09b..e24e91ab1ca4 100644
--- a/drivers/gpu/arm/bifrost/mali_kbase_gator_hwcnt_names_thex.h
+++ b/drivers/gpu/arm/bifrost/mali_kbase_gator_hwcnt_names_thex.h
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2016-2017 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2016-2018 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
diff --git a/drivers/gpu/arm/bifrost/mali_kbase_gator_hwcnt_names_tkax.h b/drivers/gpu/arm/bifrost/mali_kbase_gator_hwcnt_names_tkax.h
index 1c1f6693bfb5..73db45c232f1 100644
--- a/drivers/gpu/arm/bifrost/mali_kbase_gator_hwcnt_names_tkax.h
+++ b/drivers/gpu/arm/bifrost/mali_kbase_gator_hwcnt_names_tkax.h
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2016-2017 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2016-2018 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -222,9 +222,9 @@ static const char * const hardware_counters_mali_tKAx[] = {
 	"TKAx_BEATS_RD_TEX",
 	"TKAx_BEATS_RD_TEX_EXT",
 	"TKAx_BEATS_RD_OTHER",
-	"TKAx_BEATS_WR_LSC_WB",
-	"TKAx_BEATS_WR_TIB",
 	"TKAx_BEATS_WR_LSC_OTHER",
+	"TKAx_BEATS_WR_TIB",
+	"TKAx_BEATS_WR_LSC_WB",
 
 	/* Performance counters for the Memory System */
 	"",
diff --git a/drivers/gpu/arm/bifrost/mali_kbase_gator_hwcnt_names_tmix.h b/drivers/gpu/arm/bifrost/mali_kbase_gator_hwcnt_names_tmix.h
index 233ffbec416e..63eac50e0cc7 100644
--- a/drivers/gpu/arm/bifrost/mali_kbase_gator_hwcnt_names_tmix.h
+++ b/drivers/gpu/arm/bifrost/mali_kbase_gator_hwcnt_names_tmix.h
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2016-2017 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2016-2018 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
diff --git a/drivers/gpu/arm/bifrost/mali_kbase_gator_hwcnt_names_tnox.h b/drivers/gpu/arm/bifrost/mali_kbase_gator_hwcnt_names_tnox.h
index fbb5080f6779..932663cfb6a9 100644
--- a/drivers/gpu/arm/bifrost/mali_kbase_gator_hwcnt_names_tnox.h
+++ b/drivers/gpu/arm/bifrost/mali_kbase_gator_hwcnt_names_tnox.h
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2016-2017 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2016-2018 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -222,9 +222,9 @@ static const char * const hardware_counters_mali_tNOx[] = {
 	"TNOx_BEATS_RD_TEX",
 	"TNOx_BEATS_RD_TEX_EXT",
 	"TNOx_BEATS_RD_OTHER",
-	"TNOx_BEATS_WR_LSC_WB",
-	"TNOx_BEATS_WR_TIB",
 	"TNOx_BEATS_WR_LSC_OTHER",
+	"TNOx_BEATS_WR_TIB",
+	"TNOx_BEATS_WR_LSC_WB",
 
 	/* Performance counters for the Memory System */
 	"",
diff --git a/drivers/gpu/arm/bifrost/mali_kbase_gator_hwcnt_names_tsix.h b/drivers/gpu/arm/bifrost/mali_kbase_gator_hwcnt_names_tsix.h
index 552db5732239..b8dde32bc529 100644
--- a/drivers/gpu/arm/bifrost/mali_kbase_gator_hwcnt_names_tsix.h
+++ b/drivers/gpu/arm/bifrost/mali_kbase_gator_hwcnt_names_tsix.h
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2016-2017 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2016-2018 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -222,9 +222,9 @@ static const char * const hardware_counters_mali_tSIx[] = {
 	"TSIx_BEATS_RD_TEX",
 	"TSIx_BEATS_RD_TEX_EXT",
 	"TSIx_BEATS_RD_OTHER",
-	"TSIx_BEATS_WR_LSC",
+	"TSIx_BEATS_WR_LSC_OTHER",
 	"TSIx_BEATS_WR_TIB",
-	"",
+	"TSIx_BEATS_WR_LSC_WB",
 
 	/* Performance counters for the Memory System */
 	"",
diff --git a/drivers/gpu/arm/bifrost/mali_kbase_gator_hwcnt_names_ttrx.h b/drivers/gpu/arm/bifrost/mali_kbase_gator_hwcnt_names_ttrx.h
index d1bb02a72fc3..e8148a018df5 100644
--- a/drivers/gpu/arm/bifrost/mali_kbase_gator_hwcnt_names_ttrx.h
+++ b/drivers/gpu/arm/bifrost/mali_kbase_gator_hwcnt_names_ttrx.h
@@ -178,24 +178,24 @@ static const char * const hardware_counters_mali_tTRx[] = {
 	"TTRx_FRAG_QUADS_EZS_KILL",
 	"TTRx_FRAG_LZS_TEST",
 	"TTRx_FRAG_LZS_KILL",
-	"",
+	"TTRx_WARP_REG_SIZE_64",
 	"TTRx_FRAG_PTILES",
 	"TTRx_FRAG_TRANS_ELIM",
 	"TTRx_QUAD_FPK_KILLER",
-	"",
+	"TTRx_FULL_QUAD_WARPS",
 	"TTRx_COMPUTE_ACTIVE",
 	"TTRx_COMPUTE_TASKS",
 	"TTRx_COMPUTE_WARPS",
 	"TTRx_COMPUTE_STARVING",
 	"TTRx_EXEC_CORE_ACTIVE",
-	"TTRx_EXEC_ACTIVE",
-	"TTRx_EXEC_INSTR_COUNT",
+	"TTRx_EXEC_INSTR_FMA",
+	"TTRx_EXEC_INSTR_CVT",
+	"TTRx_EXEC_INSTR_SFU",
+	"TTRx_EXEC_INSTR_MSG",
 	"TTRx_EXEC_INSTR_DIVERGED",
-	"TTRx_EXEC_INSTR_STARVING",
-	"TTRx_ARITH_INSTR_SINGLE_FMA",
-	"TTRx_ARITH_INSTR_DOUBLE",
-	"TTRx_ARITH_INSTR_MSG",
-	"TTRx_ARITH_INSTR_MSG_ONLY",
+	"TTRx_EXEC_ICACHE_MISS",
+	"TTRx_EXEC_STARVE_ARITH",
+	"TTRx_CALL_BLEND_SHADER",
 	"TTRx_TEX_INSTR",
 	"TTRx_TEX_INSTR_MIPMAP",
 	"TTRx_TEX_INSTR_COMPRESSED",
@@ -222,9 +222,9 @@ static const char * const hardware_counters_mali_tTRx[] = {
 	"TTRx_BEATS_RD_TEX",
 	"TTRx_BEATS_RD_TEX_EXT",
 	"TTRx_BEATS_RD_OTHER",
-	"TTRx_BEATS_WR_LSC",
-	"TTRx_BEATS_WR_TIB",
 	"",
+	"TTRx_BEATS_WR_TIB",
+	"TTRx_BEATS_WR_LSC",
 
 	/* Performance counters for the Memory System */
 	"",
diff --git a/drivers/gpu/arm/bifrost/mali_kbase_gpu_id.h b/drivers/gpu/arm/bifrost/mali_kbase_gpu_id.h
index 4052e2fd0768..c14317087ead 100644
--- a/drivers/gpu/arm/bifrost/mali_kbase_gpu_id.h
+++ b/drivers/gpu/arm/bifrost/mali_kbase_gpu_id.h
@@ -27,20 +27,20 @@
 #define GPU_ID_VERSION_MINOR_SHIFT        4
 #define GPU_ID_VERSION_MAJOR_SHIFT        12
 #define GPU_ID_VERSION_PRODUCT_ID_SHIFT   16
-#define GPU_ID_VERSION_STATUS             (0xF  << GPU_ID_VERSION_STATUS_SHIFT)
-#define GPU_ID_VERSION_MINOR              (0xFF << GPU_ID_VERSION_MINOR_SHIFT)
-#define GPU_ID_VERSION_MAJOR              (0xF  << GPU_ID_VERSION_MAJOR_SHIFT)
-#define GPU_ID_VERSION_PRODUCT_ID  (0xFFFF << GPU_ID_VERSION_PRODUCT_ID_SHIFT)
+#define GPU_ID_VERSION_STATUS             (0xFu  << GPU_ID_VERSION_STATUS_SHIFT)
+#define GPU_ID_VERSION_MINOR              (0xFFu << GPU_ID_VERSION_MINOR_SHIFT)
+#define GPU_ID_VERSION_MAJOR              (0xFu  << GPU_ID_VERSION_MAJOR_SHIFT)
+#define GPU_ID_VERSION_PRODUCT_ID  (0xFFFFu << GPU_ID_VERSION_PRODUCT_ID_SHIFT)
 
 /* Values for GPU_ID_VERSION_PRODUCT_ID bitfield */
-#define GPU_ID_PI_T60X                    0x6956
-#define GPU_ID_PI_T62X                    0x0620
-#define GPU_ID_PI_T76X                    0x0750
-#define GPU_ID_PI_T72X                    0x0720
-#define GPU_ID_PI_TFRX                    0x0880
-#define GPU_ID_PI_T86X                    0x0860
-#define GPU_ID_PI_T82X                    0x0820
-#define GPU_ID_PI_T83X                    0x0830
+#define GPU_ID_PI_T60X                    0x6956u
+#define GPU_ID_PI_T62X                    0x0620u
+#define GPU_ID_PI_T76X                    0x0750u
+#define GPU_ID_PI_T72X                    0x0720u
+#define GPU_ID_PI_TFRX                    0x0880u
+#define GPU_ID_PI_T86X                    0x0860u
+#define GPU_ID_PI_T82X                    0x0820u
+#define GPU_ID_PI_T83X                    0x0830u
 
 /* New GPU ID format when PRODUCT_ID is >= 0x1000 (and not 0x6956) */
 #define GPU_ID_PI_NEW_FORMAT_START        0x1000
@@ -55,13 +55,13 @@
 #define GPU_ID2_ARCH_REV_SHIFT            20
 #define GPU_ID2_ARCH_MINOR_SHIFT          24
 #define GPU_ID2_ARCH_MAJOR_SHIFT          28
-#define GPU_ID2_VERSION_STATUS            (0xF << GPU_ID2_VERSION_STATUS_SHIFT)
-#define GPU_ID2_VERSION_MINOR             (0xFF << GPU_ID2_VERSION_MINOR_SHIFT)
-#define GPU_ID2_VERSION_MAJOR             (0xF << GPU_ID2_VERSION_MAJOR_SHIFT)
-#define GPU_ID2_PRODUCT_MAJOR             (0xF << GPU_ID2_PRODUCT_MAJOR_SHIFT)
-#define GPU_ID2_ARCH_REV                  (0xF << GPU_ID2_ARCH_REV_SHIFT)
-#define GPU_ID2_ARCH_MINOR                (0xF << GPU_ID2_ARCH_MINOR_SHIFT)
-#define GPU_ID2_ARCH_MAJOR                (0xF << GPU_ID2_ARCH_MAJOR_SHIFT)
+#define GPU_ID2_VERSION_STATUS            (0xFu << GPU_ID2_VERSION_STATUS_SHIFT)
+#define GPU_ID2_VERSION_MINOR             (0xFFu << GPU_ID2_VERSION_MINOR_SHIFT)
+#define GPU_ID2_VERSION_MAJOR             (0xFu << GPU_ID2_VERSION_MAJOR_SHIFT)
+#define GPU_ID2_PRODUCT_MAJOR             (0xFu << GPU_ID2_PRODUCT_MAJOR_SHIFT)
+#define GPU_ID2_ARCH_REV                  (0xFu << GPU_ID2_ARCH_REV_SHIFT)
+#define GPU_ID2_ARCH_MINOR                (0xFu << GPU_ID2_ARCH_MINOR_SHIFT)
+#define GPU_ID2_ARCH_MAJOR                (0xFu << GPU_ID2_ARCH_MAJOR_SHIFT)
 #define GPU_ID2_PRODUCT_MODEL  (GPU_ID2_ARCH_MAJOR | GPU_ID2_PRODUCT_MAJOR)
 #define GPU_ID2_VERSION        (GPU_ID2_VERSION_MAJOR | \
 								GPU_ID2_VERSION_MINOR | \
@@ -70,17 +70,17 @@
 /* Helper macro to create a partial GPU_ID (new format) that defines
    a product ignoring its version. */
 #define GPU_ID2_PRODUCT_MAKE(arch_major, arch_minor, arch_rev, product_major) \
-		(((arch_major) << GPU_ID2_ARCH_MAJOR_SHIFT)  | \
-		 ((arch_minor) << GPU_ID2_ARCH_MINOR_SHIFT)  | \
-		 ((arch_rev) << GPU_ID2_ARCH_REV_SHIFT)      | \
-		 ((product_major) << GPU_ID2_PRODUCT_MAJOR_SHIFT))
+		((((u32)arch_major) << GPU_ID2_ARCH_MAJOR_SHIFT)  | \
+		 (((u32)arch_minor) << GPU_ID2_ARCH_MINOR_SHIFT)  | \
+		 (((u32)arch_rev) << GPU_ID2_ARCH_REV_SHIFT)      | \
+		 (((u32)product_major) << GPU_ID2_PRODUCT_MAJOR_SHIFT))
 
 /* Helper macro to create a partial GPU_ID (new format) that specifies the
    revision (major, minor, status) of a product */
 #define GPU_ID2_VERSION_MAKE(version_major, version_minor, version_status) \
-		(((version_major) << GPU_ID2_VERSION_MAJOR_SHIFT)  | \
-		 ((version_minor) << GPU_ID2_VERSION_MINOR_SHIFT)  | \
-		 ((version_status) << GPU_ID2_VERSION_STATUS_SHIFT))
+		((((u32)version_major) << GPU_ID2_VERSION_MAJOR_SHIFT)  | \
+		 (((u32)version_minor) << GPU_ID2_VERSION_MINOR_SHIFT)  | \
+		 (((u32)version_status) << GPU_ID2_VERSION_STATUS_SHIFT))
 
 /* Helper macro to create a complete GPU_ID (new format) */
 #define GPU_ID2_MAKE(arch_major, arch_minor, arch_rev, product_major, \
@@ -93,25 +93,25 @@
 /* Helper macro to create a partial GPU_ID (new format) that identifies
    a particular GPU model by its arch_major and product_major. */
 #define GPU_ID2_MODEL_MAKE(arch_major, product_major) \
-		(((arch_major) << GPU_ID2_ARCH_MAJOR_SHIFT)  | \
-		((product_major) << GPU_ID2_PRODUCT_MAJOR_SHIFT))
+		((((u32)arch_major) << GPU_ID2_ARCH_MAJOR_SHIFT)  | \
+		(((u32)product_major) << GPU_ID2_PRODUCT_MAJOR_SHIFT))
 
 /* Strip off the non-relevant bits from a product_id value and make it suitable
    for comparison against the GPU_ID2_PRODUCT_xxx values which identify a GPU
    model. */
 #define GPU_ID2_MODEL_MATCH_VALUE(product_id) \
-		(((product_id) << GPU_ID2_PRODUCT_MAJOR_SHIFT) & \
+		((((u32)product_id) << GPU_ID2_PRODUCT_MAJOR_SHIFT) & \
 		    GPU_ID2_PRODUCT_MODEL)
 
-#define GPU_ID2_PRODUCT_TMIX              GPU_ID2_MODEL_MAKE(6u, 0)
-#define GPU_ID2_PRODUCT_THEX              GPU_ID2_MODEL_MAKE(6u, 1)
-#define GPU_ID2_PRODUCT_TSIX              GPU_ID2_MODEL_MAKE(7u, 0)
-#define GPU_ID2_PRODUCT_TDVX              GPU_ID2_MODEL_MAKE(7u, 3)
-#define GPU_ID2_PRODUCT_TNOX              GPU_ID2_MODEL_MAKE(7u, 1)
-#define GPU_ID2_PRODUCT_TGOX              GPU_ID2_MODEL_MAKE(7u, 2)
-#define GPU_ID2_PRODUCT_TKAX              GPU_ID2_MODEL_MAKE(8u, 0)
-#define GPU_ID2_PRODUCT_TTRX              GPU_ID2_MODEL_MAKE(8u, 1)
-#define GPU_ID2_PRODUCT_TBOX              GPU_ID2_MODEL_MAKE(8u, 2)
+#define GPU_ID2_PRODUCT_TMIX              GPU_ID2_MODEL_MAKE(6, 0)
+#define GPU_ID2_PRODUCT_THEX              GPU_ID2_MODEL_MAKE(6, 1)
+#define GPU_ID2_PRODUCT_TSIX              GPU_ID2_MODEL_MAKE(7, 0)
+#define GPU_ID2_PRODUCT_TDVX              GPU_ID2_MODEL_MAKE(7, 3)
+#define GPU_ID2_PRODUCT_TNOX              GPU_ID2_MODEL_MAKE(7, 1)
+#define GPU_ID2_PRODUCT_TGOX              GPU_ID2_MODEL_MAKE(7, 2)
+#define GPU_ID2_PRODUCT_TKAX              GPU_ID2_MODEL_MAKE(8, 0)
+#define GPU_ID2_PRODUCT_TTRX              GPU_ID2_MODEL_MAKE(8, 1)
+#define GPU_ID2_PRODUCT_TBOX              GPU_ID2_MODEL_MAKE(8, 2)
 
 /* Values for GPU_ID_VERSION_STATUS field for PRODUCT_ID GPU_ID_PI_T60X */
 #define GPU_ID_S_15DEV0                   0x1
@@ -120,9 +120,9 @@
 /* Helper macro to create a GPU_ID assuming valid values for id, major,
    minor, status */
 #define GPU_ID_MAKE(id, major, minor, status) \
-		(((id) << GPU_ID_VERSION_PRODUCT_ID_SHIFT) | \
-		((major) << GPU_ID_VERSION_MAJOR_SHIFT) |   \
-		((minor) << GPU_ID_VERSION_MINOR_SHIFT) |   \
-		((status) << GPU_ID_VERSION_STATUS_SHIFT))
+		((((u32)id) << GPU_ID_VERSION_PRODUCT_ID_SHIFT) | \
+		(((u32)major) << GPU_ID_VERSION_MAJOR_SHIFT) |   \
+		(((u32)minor) << GPU_ID_VERSION_MINOR_SHIFT) |   \
+		(((u32)status) << GPU_ID_VERSION_STATUS_SHIFT))
 
 #endif /* _KBASE_GPU_ID_H_ */
diff --git a/drivers/gpu/arm/bifrost/mali_kbase_gpu_memory_debugfs.c b/drivers/gpu/arm/bifrost/mali_kbase_gpu_memory_debugfs.c
index 2fd033280359..514b065d4867 100644
--- a/drivers/gpu/arm/bifrost/mali_kbase_gpu_memory_debugfs.c
+++ b/drivers/gpu/arm/bifrost/mali_kbase_gpu_memory_debugfs.c
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2012-2016 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2012-2017 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -70,7 +70,7 @@ static int kbasep_gpu_memory_seq_show(struct seq_file *sfile, void *data)
  */
 static int kbasep_gpu_memory_debugfs_open(struct inode *in, struct file *file)
 {
-	return single_open(file, kbasep_gpu_memory_seq_show , NULL);
+	return single_open(file, kbasep_gpu_memory_seq_show, NULL);
 }
 
 static const struct file_operations kbasep_gpu_memory_debugfs_fops = {
diff --git a/drivers/gpu/arm/bifrost/mali_kbase_gpuprops.c b/drivers/gpu/arm/bifrost/mali_kbase_gpuprops.c
index 9a9ce2d9e661..62ba105ca417 100644
--- a/drivers/gpu/arm/bifrost/mali_kbase_gpuprops.c
+++ b/drivers/gpu/arm/bifrost/mali_kbase_gpuprops.c
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2011-2017 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2011-2018 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -138,7 +138,7 @@ static void kbase_gpuprops_get_props(base_gpu_props * const gpu_props, struct kb
 	gpu_props->raw_props.mem_features = regdump.mem_features;
 	gpu_props->raw_props.mmu_features = regdump.mmu_features;
 	gpu_props->raw_props.l2_features = regdump.l2_features;
-	gpu_props->raw_props.suspend_size = regdump.suspend_size;
+	gpu_props->raw_props.core_features = regdump.core_features;
 
 	gpu_props->raw_props.as_present = regdump.as_present;
 	gpu_props->raw_props.js_present = regdump.js_present;
@@ -165,6 +165,7 @@ static void kbase_gpuprops_get_props(base_gpu_props * const gpu_props, struct kb
 	gpu_props->raw_props.thread_max_threads = regdump.thread_max_threads;
 	gpu_props->raw_props.thread_max_workgroup_size = regdump.thread_max_workgroup_size;
 	gpu_props->raw_props.thread_features = regdump.thread_features;
+	gpu_props->raw_props.thread_tls_alloc = regdump.thread_tls_alloc;
 }
 
 void kbase_gpuprops_update_core_props_gpu_id(base_gpu_props * const gpu_props)
@@ -195,6 +196,8 @@ static void kbase_gpuprops_calculate_props(base_gpu_props * const gpu_props, str
 	kbase_gpuprops_update_core_props_gpu_id(gpu_props);
 	gpu_props->core_props.log2_program_counter_size = KBASE_GPU_PC_SIZE_LOG2;
 	gpu_props->core_props.gpu_available_memory_size = totalram_pages << PAGE_SHIFT;
+	gpu_props->core_props.num_exec_engines =
+		KBASE_UBFX32(gpu_props->raw_props.core_features, 0, 4);
 
 	for (i = 0; i < BASE_GPU_NUM_TEXTURE_FEATURES_REGISTERS; i++)
 		gpu_props->core_props.texture_features[i] = gpu_props->raw_props.texture_features[i];
@@ -226,6 +229,13 @@ static void kbase_gpuprops_calculate_props(base_gpu_props * const gpu_props, str
 	else
 		gpu_props->thread_props.max_barrier_size = gpu_props->raw_props.thread_max_barrier_size;
 
+	if (gpu_props->raw_props.thread_tls_alloc == 0)
+		gpu_props->thread_props.tls_alloc =
+				gpu_props->thread_props.max_threads;
+	else
+		gpu_props->thread_props.tls_alloc =
+				gpu_props->raw_props.thread_tls_alloc;
+
 	gpu_props->thread_props.max_registers = KBASE_UBFX32(gpu_props->raw_props.thread_features, 0U, 16);
 	gpu_props->thread_props.max_task_queue = KBASE_UBFX32(gpu_props->raw_props.thread_features, 16U, 8);
 	gpu_props->thread_props.max_thread_group_split = KBASE_UBFX32(gpu_props->raw_props.thread_features, 24U, 6);
@@ -312,6 +322,7 @@ static struct {
 	PROP(TEXTURE_FEATURES_2,          core_props.texture_features[2]),
 	PROP(TEXTURE_FEATURES_3,          core_props.texture_features[3]),
 	PROP(GPU_AVAILABLE_MEMORY_SIZE,   core_props.gpu_available_memory_size),
+	PROP(NUM_EXEC_ENGINES,            core_props.num_exec_engines),
 
 	PROP(L2_LOG2_LINE_SIZE,           l2_props.log2_line_size),
 	PROP(L2_LOG2_CACHE_SIZE,          l2_props.log2_cache_size),
@@ -327,13 +338,14 @@ static struct {
 	PROP(MAX_TASK_QUEUE,              thread_props.max_task_queue),
 	PROP(MAX_THREAD_GROUP_SPLIT,      thread_props.max_thread_group_split),
 	PROP(IMPL_TECH,                   thread_props.impl_tech),
+	PROP(TLS_ALLOC,                   thread_props.tls_alloc),
 
 	PROP(RAW_SHADER_PRESENT,          raw_props.shader_present),
 	PROP(RAW_TILER_PRESENT,           raw_props.tiler_present),
 	PROP(RAW_L2_PRESENT,              raw_props.l2_present),
 	PROP(RAW_STACK_PRESENT,           raw_props.stack_present),
 	PROP(RAW_L2_FEATURES,             raw_props.l2_features),
-	PROP(RAW_SUSPEND_SIZE,            raw_props.suspend_size),
+	PROP(RAW_CORE_FEATURES,           raw_props.core_features),
 	PROP(RAW_MEM_FEATURES,            raw_props.mem_features),
 	PROP(RAW_MMU_FEATURES,            raw_props.mmu_features),
 	PROP(RAW_AS_PRESENT,              raw_props.as_present),
@@ -365,6 +377,7 @@ static struct {
 			raw_props.thread_max_workgroup_size),
 	PROP(RAW_THREAD_MAX_BARRIER_SIZE, raw_props.thread_max_barrier_size),
 	PROP(RAW_THREAD_FEATURES,         raw_props.thread_features),
+	PROP(RAW_THREAD_TLS_ALLOC,        raw_props.thread_tls_alloc),
 	PROP(RAW_COHERENCY_MODE,          raw_props.coherency_mode),
 
 	PROP(COHERENCY_NUM_GROUPS,        coherency_info.num_groups),
diff --git a/drivers/gpu/arm/bifrost/mali_kbase_gpuprops_types.h b/drivers/gpu/arm/bifrost/mali_kbase_gpuprops_types.h
index a3ddec79bee7..d7877d1d4a57 100644
--- a/drivers/gpu/arm/bifrost/mali_kbase_gpuprops_types.h
+++ b/drivers/gpu/arm/bifrost/mali_kbase_gpuprops_types.h
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2011-2017 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2011-2018 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -38,7 +38,7 @@
 struct kbase_gpuprops_regdump {
 	u32 gpu_id;
 	u32 l2_features;
-	u32 suspend_size; /* API 8.2+ */
+	u32 core_features;
 	u32 tiler_features;
 	u32 mem_features;
 	u32 mmu_features;
@@ -48,6 +48,7 @@ struct kbase_gpuprops_regdump {
 	u32 thread_max_workgroup_size;
 	u32 thread_max_barrier_size;
 	u32 thread_features;
+	u32 thread_tls_alloc;
 	u32 texture_features[BASE_GPU_NUM_TEXTURE_FEATURES_REGISTERS];
 	u32 js_features[GPU_MAX_JOB_SLOTS];
 	u32 shader_present_lo;
diff --git a/drivers/gpu/arm/bifrost/mali_kbase_gwt.c b/drivers/gpu/arm/bifrost/mali_kbase_gwt.c
index 2caab877a447..b36254641327 100644
--- a/drivers/gpu/arm/bifrost/mali_kbase_gwt.c
+++ b/drivers/gpu/arm/bifrost/mali_kbase_gwt.c
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2010-2017 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2010-2018 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -124,7 +124,8 @@ int kbase_gpu_gwt_stop(struct kbase_context *kctx)
 }
 
 
-int list_cmp_function(void *priv, struct list_head *a, struct list_head *b)
+static int list_cmp_function(void *priv, struct list_head *a,
+				struct list_head *b)
 {
 	struct kbasep_gwt_list_element *elementA = container_of(a,
 				struct kbasep_gwt_list_element, link);
@@ -133,30 +134,27 @@ int list_cmp_function(void *priv, struct list_head *a, struct list_head *b)
 
 	CSTD_UNUSED(priv);
 
-	if (elementA->handle > elementB->handle)
+	if (elementA->page_addr > elementB->page_addr)
 		return 1;
-	else if ((elementA->handle == elementB->handle) &&
-			(elementA->offset > elementB->offset))
-		return 1;
-	else
-		return -1;
+	return -1;
 }
 
-void kbase_gpu_gwt_collate(struct kbase_context *kctx,
+static void kbase_gpu_gwt_collate(struct kbase_context *kctx,
 		struct list_head *snapshot_list)
 {
 	struct kbasep_gwt_list_element *pos, *n;
 	struct kbasep_gwt_list_element *collated = NULL;
 
-	/* sort the list */
+	/* Sort the list */
 	list_sort(NULL, snapshot_list, list_cmp_function);
 
-	/* Combine contiguous areas from same region */
+	/* Combine contiguous areas. */
 	list_for_each_entry_safe(pos, n, snapshot_list, link) {
-		if (NULL == collated ||
-				collated->handle != pos->handle ||
-				collated->offset + collated->num_pages !=
-						pos->offset) {
+		if (collated == NULL ||	collated->region !=
+					pos->region ||
+					(collated->page_addr +
+					(collated->num_pages * PAGE_SIZE)) !=
+					pos->page_addr) {
 			/* This is the first time through, a new region or
 			 * is not contiguous - start collating to this element
 			 */
@@ -176,10 +174,8 @@ int kbase_gpu_gwt_dump(struct kbase_context *kctx,
 {
 	const u32 ubuf_size = gwt_dump->in.len;
 	u32 ubuf_count = 0;
-	__user void *user_handles = (__user void *)
-			(uintptr_t)gwt_dump->in.handle_buffer;
-	__user void *user_offsets = (__user void *)
-			(uintptr_t)gwt_dump->in.offset_buffer;
+	__user void *user_addr = (__user void *)
+			(uintptr_t)gwt_dump->in.addr_buffer;
 	__user void *user_sizes = (__user void *)
 			(uintptr_t)gwt_dump->in.size_buffer;
 
@@ -191,8 +187,7 @@ int kbase_gpu_gwt_dump(struct kbase_context *kctx,
 		return -EPERM;
 	}
 
-	if (!gwt_dump->in.len || !gwt_dump->in.handle_buffer
-			|| !gwt_dump->in.offset_buffer
+	if (!gwt_dump->in.len || !gwt_dump->in.addr_buffer
 			|| !gwt_dump->in.size_buffer) {
 		kbase_gpu_vm_unlock(kctx);
 		/* We don't have any valid user space buffer to copy the
@@ -219,8 +214,7 @@ int kbase_gpu_gwt_dump(struct kbase_context *kctx,
 	}
 
 	while ((!list_empty(&kctx->gwt_snapshot_list))) {
-		u64 handle_buffer[32];
-		u64 offset_buffer[32];
+		u64 addr_buffer[32];
 		u64 num_page_buffer[32];
 		u32 count = 0;
 		int err;
@@ -228,30 +222,20 @@ int kbase_gpu_gwt_dump(struct kbase_context *kctx,
 
 		list_for_each_entry_safe(dump_info, n,
 				&kctx->gwt_snapshot_list, link) {
-			handle_buffer[count] = dump_info->handle;
-			offset_buffer[count] = dump_info->offset;
+			addr_buffer[count] = dump_info->page_addr;
 			num_page_buffer[count] = dump_info->num_pages;
 			count++;
 			list_del(&dump_info->link);
 			kfree(dump_info);
-			if (ARRAY_SIZE(handle_buffer) == count ||
+			if (ARRAY_SIZE(addr_buffer) == count ||
 					ubuf_size == (ubuf_count + count))
 				break;
 		}
 
 		if (count) {
-			err = copy_to_user((user_handles +
-						(ubuf_count * sizeof(u64))),
-					(void *)handle_buffer,
-					count * sizeof(u64));
-			if (err) {
-				dev_err(kctx->kbdev->dev, "Copy to user failure\n");
-				kbase_gpu_vm_unlock(kctx);
-				return err;
-			}
-			err = copy_to_user((user_offsets +
-						(ubuf_count * sizeof(u64))),
-					(void *)offset_buffer,
+			err = copy_to_user((user_addr +
+					(ubuf_count * sizeof(u64))),
+					(void *)addr_buffer,
 					count * sizeof(u64));
 			if (err) {
 				dev_err(kctx->kbdev->dev, "Copy to user failure\n");
@@ -259,7 +243,7 @@ int kbase_gpu_gwt_dump(struct kbase_context *kctx,
 				return err;
 			}
 			err = copy_to_user((user_sizes +
-						(ubuf_count * sizeof(u64))),
+					(ubuf_count * sizeof(u64))),
 					(void *)num_page_buffer,
 					count * sizeof(u64));
 			if (err) {
diff --git a/drivers/gpu/arm/bifrost/mali_kbase_hw.c b/drivers/gpu/arm/bifrost/mali_kbase_hw.c
index 286cc954ccbc..f34f53a919b8 100644
--- a/drivers/gpu/arm/bifrost/mali_kbase_hw.c
+++ b/drivers/gpu/arm/bifrost/mali_kbase_hw.c
@@ -176,6 +176,7 @@ static const enum base_hw_issue *kbase_hw_get_issues_for_new_id(
 
 		{GPU_ID2_PRODUCT_TGOX,
 		 {{GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tGOx_r0p0},
+		  {GPU_ID2_VERSION_MAKE(1, 0, 0), base_hw_issues_tGOx_r1p0},
 		  {U32_MAX, NULL} } },
 
 		{GPU_ID2_PRODUCT_TKAX,
diff --git a/drivers/gpu/arm/bifrost/mali_kbase_hwaccess_defs.h b/drivers/gpu/arm/bifrost/mali_kbase_hwaccess_defs.h
index dd25746d8434..64fa3028f4cc 100644
--- a/drivers/gpu/arm/bifrost/mali_kbase_hwaccess_defs.h
+++ b/drivers/gpu/arm/bifrost/mali_kbase_hwaccess_defs.h
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2014, 2016 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2014, 2016, 2018 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -31,7 +31,17 @@
 
 #include <mali_kbase_jm_defs.h>
 
-/* The hwaccess_lock (a spinlock) must be held when accessing this structure */
+/**
+ * struct kbase_hwaccess_data - object encapsulating the GPU backend specific
+ *                              data for the HW access layer.
+ *                              hwaccess_lock (a spinlock) must be held when
+ *                              accessing this structure.
+ * @active_kctx:     pointer to active kbase context which last submitted an
+ *                   atom to GPU and while the context is active it can
+ *                   submit new atoms to GPU from the irq context also, without
+ *                   going through the bottom half of job completion path.
+ * @backend:         GPU backend specific data for HW access layer
+ */
 struct kbase_hwaccess_data {
 	struct kbase_context *active_kctx;
 
diff --git a/drivers/gpu/arm/bifrost/mali_kbase_hwaccess_gpuprops.h b/drivers/gpu/arm/bifrost/mali_kbase_hwaccess_gpuprops.h
index b8ab0dc268f3..63844d97ce02 100644
--- a/drivers/gpu/arm/bifrost/mali_kbase_hwaccess_gpuprops.h
+++ b/drivers/gpu/arm/bifrost/mali_kbase_hwaccess_gpuprops.h
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2014-2015, 2018 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -33,6 +33,8 @@
  *				  GPU
  * @kbdev:	Device pointer
  * @regdump:	Pointer to struct kbase_gpuprops_regdump structure
+ *
+ * The caller should ensure that GPU remains powered-on during this function.
  */
 void kbase_backend_gpuprops_get(struct kbase_device *kbdev,
 					struct kbase_gpuprops_regdump *regdump);
@@ -43,7 +45,7 @@ void kbase_backend_gpuprops_get(struct kbase_device *kbdev,
  * @regdump: Pointer to struct kbase_gpuprops_regdump structure
  *
  * This function reads GPU properties that are dependent on the hardware
- * features bitmask
+ * features bitmask. It will power-on the GPU if required.
  */
 void kbase_backend_gpuprops_get_features(struct kbase_device *kbdev,
 					struct kbase_gpuprops_regdump *regdump);
diff --git a/drivers/gpu/arm/bifrost/mali_kbase_hwaccess_instr.h b/drivers/gpu/arm/bifrost/mali_kbase_hwaccess_instr.h
index d180e39253bd..0c5ceffb0e47 100644
--- a/drivers/gpu/arm/bifrost/mali_kbase_hwaccess_instr.h
+++ b/drivers/gpu/arm/bifrost/mali_kbase_hwaccess_instr.h
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2014-2015, 2017 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -35,15 +35,15 @@
  * kbase_instr_hwcnt_enable_internal - Enable HW counters collection
  * @kbdev:	Kbase device
  * @kctx:	Kbase context
- * @setup:	HW counter setup parameters
+ * @enable:	HW counter setup parameters
  *
  * Context: might sleep, waiting for reset to complete
  *
  * Return: 0 on success
  */
 int kbase_instr_hwcnt_enable_internal(struct kbase_device *kbdev,
-					struct kbase_context *kctx,
-					struct kbase_uk_hwcnt_setup *setup);
+				struct kbase_context *kctx,
+				struct kbase_ioctl_hwcnt_enable *enable);
 
 /**
  * kbase_instr_hwcnt_disable_internal - Disable HW counters collection
diff --git a/drivers/gpu/arm/bifrost/mali_kbase_hwaccess_jm.h b/drivers/gpu/arm/bifrost/mali_kbase_hwaccess_jm.h
index 8b3d7e20f609..706e60a7fe89 100644
--- a/drivers/gpu/arm/bifrost/mali_kbase_hwaccess_jm.h
+++ b/drivers/gpu/arm/bifrost/mali_kbase_hwaccess_jm.h
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2014-2017 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2014-2018 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -381,6 +381,9 @@ bool kbase_reset_gpu_active(struct kbase_device *kbdev);
 void kbase_job_slot_hardstop(struct kbase_context *kctx, int js,
 				struct kbase_jd_atom *target_katom);
 
+/* Object containing callbacks for enabling/disabling protected mode, used
+ * on GPU which supports protected mode switching natively.
+ */
 extern struct protected_mode_ops kbase_native_protected_ops;
 
 #endif /* _KBASE_HWACCESS_JM_H_ */
diff --git a/drivers/gpu/arm/bifrost/mali_kbase_ioctl.h b/drivers/gpu/arm/bifrost/mali_kbase_ioctl.h
index a8fe9cd9edde..58edda5a0427 100644
--- a/drivers/gpu/arm/bifrost/mali_kbase_ioctl.h
+++ b/drivers/gpu/arm/bifrost/mali_kbase_ioctl.h
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2017 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2017-2018 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -43,20 +43,14 @@ extern "C" {
  *   KBASE_IOCTL_STICKY_RESOURCE_UNMAP
  * 11.4:
  * - New ioctl KBASE_IOCTL_MEM_FIND_GPU_START_AND_OFFSET
+ * 11.5:
+ * - New ioctl: KBASE_IOCTL_MEM_JIT_INIT (old ioctl renamed to _OLD)
+ * 11.6:
+ * - Added flags field to base_jit_alloc_info structure, which can be used to
+ *   specify pseudo chunked tiler alignment for JIT allocations.
  */
 #define BASE_UK_VERSION_MAJOR 11
-#define BASE_UK_VERSION_MINOR 4
-
-#ifdef ANDROID
-/* Android's definition of ioctl is incorrect, specifying the type argument as
- * 'int'. This creates a warning when using _IOWR (as the top bit is set). Work
- * round this by redefining _IOC to include a case to 'int'.
- */
-#undef _IOC
-#define _IOC(dir, type, nr, size) \
-	((int)(((dir) << _IOC_DIRSHIFT) | ((type) << _IOC_TYPESHIFT) | \
-	((nr) << _IOC_NRSHIFT) | ((size) << _IOC_SIZESHIFT)))
-#endif
+#define BASE_UK_VERSION_MINOR 6
 
 /**
  * struct kbase_ioctl_version_check - Check version compatibility with kernel
@@ -191,9 +185,9 @@ union kbase_ioctl_mem_query {
 #define KBASE_IOCTL_MEM_QUERY \
 	_IOWR(KBASE_IOCTL_TYPE, 6, union kbase_ioctl_mem_query)
 
-#define KBASE_MEM_QUERY_COMMIT_SIZE	1
-#define KBASE_MEM_QUERY_VA_SIZE		2
-#define KBASE_MEM_QUERY_FLAGS		3
+#define KBASE_MEM_QUERY_COMMIT_SIZE	((u64)1)
+#define KBASE_MEM_QUERY_VA_SIZE		((u64)2)
+#define KBASE_MEM_QUERY_FLAGS		((u64)3)
 
 /**
  * struct kbase_ioctl_mem_free - Free a memory region
@@ -252,6 +246,21 @@ struct kbase_ioctl_hwcnt_enable {
 #define KBASE_IOCTL_HWCNT_CLEAR \
 	_IO(KBASE_IOCTL_TYPE, 11)
 
+/**
+ * struct kbase_ioctl_hwcnt_values - Values to set dummy the dummy counters to.
+ * @data:    Counter samples for the dummy model.
+ * @size:    Size of the counter sample data.
+ * @padding: Padding.
+ */
+struct kbase_ioctl_hwcnt_values {
+	__u64 data;
+	__u32 size;
+	__u32 padding;
+};
+
+#define KBASE_IOCTL_HWCNT_SET \
+	_IOW(KBASE_IOCTL_TYPE, 32, struct kbase_ioctl_hwcnt_values)
+
 /**
  * struct kbase_ioctl_disjoint_query - Query the disjoint counter
  * @counter:   A counter of disjoint events in the kernel
@@ -271,6 +280,10 @@ struct kbase_ioctl_disjoint_query {
  *
  * The ioctl will return the number of bytes written into version_buffer
  * (which includes a NULL byte) or a negative error code
+ *
+ * The ioctl request code has to be _IOW because the data in ioctl struct is
+ * being copied to the kernel, even though the kernel then writes out the
+ * version info to the buffer specified in the ioctl.
  */
 struct kbase_ioctl_get_ddk_version {
 	__u64 version_buffer;
@@ -281,16 +294,40 @@ struct kbase_ioctl_get_ddk_version {
 #define KBASE_IOCTL_GET_DDK_VERSION \
 	_IOW(KBASE_IOCTL_TYPE, 13, struct kbase_ioctl_get_ddk_version)
 
+/**
+ * struct kbase_ioctl_mem_jit_init_old - Initialise the JIT memory allocator
+ *
+ * @va_pages: Number of VA pages to reserve for JIT
+ *
+ * Note that depending on the VA size of the application and GPU, the value
+ * specified in @va_pages may be ignored.
+ *
+ * New code should use KBASE_IOCTL_MEM_JIT_INIT instead, this is kept for
+ * backwards compatibility.
+ */
+struct kbase_ioctl_mem_jit_init_old {
+	__u64 va_pages;
+};
+
+#define KBASE_IOCTL_MEM_JIT_INIT_OLD \
+	_IOW(KBASE_IOCTL_TYPE, 14, struct kbase_ioctl_mem_jit_init_old)
+
 /**
  * struct kbase_ioctl_mem_jit_init - Initialise the JIT memory allocator
  *
  * @va_pages: Number of VA pages to reserve for JIT
+ * @max_allocations: Maximum number of concurrent allocations
+ * @trim_level: Level of JIT allocation trimming to perform on free (0 - 100%)
+ * @padding: Currently unused, must be zero
  *
  * Note that depending on the VA size of the application and GPU, the value
  * specified in @va_pages may be ignored.
  */
 struct kbase_ioctl_mem_jit_init {
 	__u64 va_pages;
+	__u8 max_allocations;
+	__u8 trim_level;
+	__u8 padding[6];
 };
 
 #define KBASE_IOCTL_MEM_JIT_INIT \
@@ -595,7 +632,6 @@ union kbase_ioctl_mem_find_gpu_start_and_offset {
 #define KBASE_IOCTL_MEM_FIND_GPU_START_AND_OFFSET \
 	_IOWR(KBASE_IOCTL_TYPE, 31, union kbase_ioctl_mem_find_gpu_start_and_offset)
 
-/* IOCTL 32 is free for use */
 
 #define KBASE_IOCTL_CINSTR_GWT_START \
 	_IO(KBASE_IOCTL_TYPE, 33)
@@ -605,9 +641,7 @@ union kbase_ioctl_mem_find_gpu_start_and_offset {
 
 /**
  * union kbase_ioctl_gwt_dump - Used to collect all GPU write fault addresses.
- * @handle_buffer: Address of buffer to hold handles of modified areas.
- * @offset_buffer: Address of buffer to hold offset size of modified areas
- *                 (in pages)
+ * @addr_buffer: Address of buffer to hold addresses of gpu modified areas.
  * @size_buffer: Address of buffer to hold size of modified areas (in pages)
  * @len: Number of addresses the buffers can hold.
  * @more_data_available: Status indicating if more addresses are available.
@@ -620,8 +654,7 @@ union kbase_ioctl_mem_find_gpu_start_and_offset {
  */
 union kbase_ioctl_cinstr_gwt_dump {
 	struct {
-		__u64 handle_buffer;
-		__u64 offset_buffer;
+		__u64 addr_buffer;
 		__u64 size_buffer;
 		__u32 len;
 		__u32 padding;
@@ -639,6 +672,8 @@ union kbase_ioctl_cinstr_gwt_dump {
 
 /* IOCTLs 36-41 are reserved */
 
+/* IOCTL 42 is free for use */
+
 /***************
  * test ioctls *
  ***************/
@@ -723,7 +758,7 @@ struct kbase_ioctl_tlstream_stats {
 #define KBASE_GPUPROP_RAW_L2_PRESENT			27
 #define KBASE_GPUPROP_RAW_STACK_PRESENT			28
 #define KBASE_GPUPROP_RAW_L2_FEATURES			29
-#define KBASE_GPUPROP_RAW_SUSPEND_SIZE			30
+#define KBASE_GPUPROP_RAW_CORE_FEATURES			30
 #define KBASE_GPUPROP_RAW_MEM_FEATURES			31
 #define KBASE_GPUPROP_RAW_MMU_FEATURES			32
 #define KBASE_GPUPROP_RAW_AS_PRESENT			33
@@ -778,6 +813,11 @@ struct kbase_ioctl_tlstream_stats {
 #define KBASE_GPUPROP_TEXTURE_FEATURES_3		80
 #define KBASE_GPUPROP_RAW_TEXTURE_FEATURES_3		81
 
+#define KBASE_GPUPROP_NUM_EXEC_ENGINES                  82
+
+#define KBASE_GPUPROP_RAW_THREAD_TLS_ALLOC		83
+#define KBASE_GPUPROP_TLS_ALLOC				84
+
 #ifdef __cpluscplus
 }
 #endif
diff --git a/drivers/gpu/arm/bifrost/mali_kbase_js.c b/drivers/gpu/arm/bifrost/mali_kbase_js.c
index 649bb7ea3267..938eaa5ff608 100644
--- a/drivers/gpu/arm/bifrost/mali_kbase_js.c
+++ b/drivers/gpu/arm/bifrost/mali_kbase_js.c
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2011-2017 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2011-2018 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -422,7 +422,7 @@ static bool kbase_js_ctx_list_add_unpullable_nolock(struct kbase_device *kbdev,
 int kbasep_js_devdata_init(struct kbase_device * const kbdev)
 {
 	struct kbasep_js_device_data *jsdd;
-	int i;
+	int i, j;
 
 	KBASE_DEBUG_ASSERT(kbdev != NULL);
 
@@ -527,8 +527,10 @@ int kbasep_js_devdata_init(struct kbase_device * const kbdev)
 	sema_init(&jsdd->schedule_sem, 1);
 
 	for (i = 0; i < kbdev->gpu_props.num_job_slots; ++i) {
-		INIT_LIST_HEAD(&jsdd->ctx_list_pullable[i]);
-		INIT_LIST_HEAD(&jsdd->ctx_list_unpullable[i]);
+		for (j = 0; j < KBASE_JS_ATOM_SCHED_PRIO_COUNT; ++j) {
+			INIT_LIST_HEAD(&jsdd->ctx_list_pullable[i][j]);
+			INIT_LIST_HEAD(&jsdd->ctx_list_unpullable[i][j]);
+		}
 	}
 
 	return 0;
@@ -552,13 +554,13 @@ void kbasep_js_devdata_term(struct kbase_device *kbdev)
 	 */
 	KBASE_DEBUG_ASSERT(js_devdata->nr_all_contexts_running == 0);
 	KBASE_DEBUG_ASSERT(memcmp(
-	        js_devdata->runpool_irq.ctx_attr_ref_count,
-	        zero_ctx_attr_ref_count,
-	        sizeof(zero_ctx_attr_ref_count)) == 0);
+				  js_devdata->runpool_irq.ctx_attr_ref_count,
+				  zero_ctx_attr_ref_count,
+				  sizeof(zero_ctx_attr_ref_count)) == 0);
 	CSTD_UNUSED(zero_ctx_attr_ref_count);
 }
 
-int kbasep_js_kctx_init(struct kbase_context * const kctx)
+int kbasep_js_kctx_init(struct kbase_context *const kctx)
 {
 	struct kbase_device *kbdev;
 	struct kbasep_js_kctx_info *js_kctx_info;
@@ -666,7 +668,7 @@ static bool kbase_js_ctx_list_add_pullable_nolock(struct kbase_device *kbdev,
 		list_del_init(&kctx->jctx.sched_info.ctx.ctx_list_entry[js]);
 
 	list_add_tail(&kctx->jctx.sched_info.ctx.ctx_list_entry[js],
-					&kbdev->js_data.ctx_list_pullable[js]);
+			&kbdev->js_data.ctx_list_pullable[js][kctx->priority]);
 
 	if (!kctx->slots_pullable) {
 		kbdev->js_data.nr_contexts_pullable++;
@@ -706,7 +708,7 @@ static bool kbase_js_ctx_list_add_pullable_head_nolock(
 		list_del_init(&kctx->jctx.sched_info.ctx.ctx_list_entry[js]);
 
 	list_add(&kctx->jctx.sched_info.ctx.ctx_list_entry[js],
-					&kbdev->js_data.ctx_list_pullable[js]);
+			&kbdev->js_data.ctx_list_pullable[js][kctx->priority]);
 
 	if (!kctx->slots_pullable) {
 		kbdev->js_data.nr_contexts_pullable++;
@@ -777,7 +779,7 @@ static bool kbase_js_ctx_list_add_unpullable_nolock(struct kbase_device *kbdev,
 	lockdep_assert_held(&kbdev->hwaccess_lock);
 
 	list_move_tail(&kctx->jctx.sched_info.ctx.ctx_list_entry[js],
-				&kbdev->js_data.ctx_list_unpullable[js]);
+		&kbdev->js_data.ctx_list_unpullable[js][kctx->priority]);
 
 	if (kctx->slots_pullable == (1 << js)) {
 		kbdev->js_data.nr_contexts_pullable--;
@@ -852,19 +854,23 @@ static struct kbase_context *kbase_js_ctx_list_pop_head_nolock(
 						int js)
 {
 	struct kbase_context *kctx;
+	int i;
 
 	lockdep_assert_held(&kbdev->hwaccess_lock);
 
-	if (list_empty(&kbdev->js_data.ctx_list_pullable[js]))
-		return NULL;
+	for (i = 0; i < KBASE_JS_ATOM_SCHED_PRIO_COUNT; i++) {
+		if (list_empty(&kbdev->js_data.ctx_list_pullable[js][i]))
+			continue;
 
-	kctx = list_entry(kbdev->js_data.ctx_list_pullable[js].next,
-					struct kbase_context,
-					jctx.sched_info.ctx.ctx_list_entry[js]);
+		kctx = list_entry(kbdev->js_data.ctx_list_pullable[js][i].next,
+				struct kbase_context,
+				jctx.sched_info.ctx.ctx_list_entry[js]);
 
-	list_del_init(&kctx->jctx.sched_info.ctx.ctx_list_entry[js]);
+		list_del_init(&kctx->jctx.sched_info.ctx.ctx_list_entry[js]);
 
-	return kctx;
+		return kctx;
+	}
+	return NULL;
 }
 
 /**
@@ -1065,6 +1071,51 @@ static bool kbase_js_dep_validate(struct kbase_context *kctx,
 	return ret;
 }
 
+void kbase_js_set_ctx_priority(struct kbase_context *kctx, int new_priority)
+{
+	struct kbase_device *kbdev = kctx->kbdev;
+	int js;
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	/* Move kctx to the pullable/upullable list as per the new priority */
+	if (new_priority != kctx->priority) {
+		for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) {
+			if (kctx->slots_pullable & (1 << js))
+				list_move_tail(&kctx->jctx.sched_info.ctx.ctx_list_entry[js],
+					&kbdev->js_data.ctx_list_pullable[js][new_priority]);
+			else
+				list_move_tail(&kctx->jctx.sched_info.ctx.ctx_list_entry[js],
+					&kbdev->js_data.ctx_list_unpullable[js][new_priority]);
+		}
+
+		kctx->priority = new_priority;
+	}
+}
+
+void kbase_js_update_ctx_priority(struct kbase_context *kctx)
+{
+	struct kbase_device *kbdev = kctx->kbdev;
+	int new_priority = KBASE_JS_ATOM_SCHED_PRIO_LOW;
+	int prio;
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	if (kbdev->js_ctx_scheduling_mode == KBASE_JS_SYSTEM_PRIORITY_MODE) {
+		/* Determine the new priority for context, as per the priority
+		 * of currently in-use atoms.
+		 */
+		for (prio = 0; prio < KBASE_JS_ATOM_SCHED_PRIO_COUNT; prio++) {
+			if (kctx->atoms_count[prio]) {
+				new_priority = prio;
+				break;
+			}
+		}
+	}
+
+	kbase_js_set_ctx_priority(kctx, new_priority);
+}
+
 bool kbasep_js_add_job(struct kbase_context *kctx,
 		struct kbase_jd_atom *atom)
 {
@@ -1099,6 +1150,9 @@ bool kbasep_js_add_job(struct kbase_context *kctx,
 	/* Lock for state available during IRQ */
 	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
 
+	if (++kctx->atoms_count[atom->sched_priority] == 1)
+		kbase_js_update_ctx_priority(kctx);
+
 	if (!kbase_js_dep_validate(kctx, atom)) {
 		/* Dependencies could not be represented */
 		--(js_kctx_info->ctx.nr_jobs);
@@ -1107,6 +1161,19 @@ bool kbasep_js_add_job(struct kbase_context *kctx,
 		 * dependencies */
 		atom->status = KBASE_JD_ATOM_STATE_QUEUED;
 
+		/* Undo the count, as the atom will get added again later but
+		 * leave the context priority adjusted or boosted, in case if
+		 * this was the first higher priority atom received for this
+		 * context.
+		 * This will prevent the scenario of priority inversion, where
+		 * another context having medium priority atoms keeps getting
+		 * scheduled over this context, which is having both lower and
+		 * higher priority atoms, but higher priority atoms are blocked
+		 * due to dependency on lower priority atoms. With priority
+		 * boost the high priority atom will get to run at earliest.
+		 */
+		kctx->atoms_count[atom->sched_priority]--;
+
 		spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
 		mutex_unlock(&js_devdata->runpool_mutex);
 
@@ -1173,6 +1240,7 @@ void kbasep_js_remove_job(struct kbase_device *kbdev,
 		struct kbase_context *kctx, struct kbase_jd_atom *atom)
 {
 	struct kbasep_js_kctx_info *js_kctx_info;
+	unsigned long flags;
 
 	KBASE_DEBUG_ASSERT(kbdev != NULL);
 	KBASE_DEBUG_ASSERT(kctx != NULL);
@@ -1186,6 +1254,11 @@ void kbasep_js_remove_job(struct kbase_device *kbdev,
 	/* De-refcount ctx.nr_jobs */
 	KBASE_DEBUG_ASSERT(js_kctx_info->ctx.nr_jobs > 0);
 	--(js_kctx_info->ctx.nr_jobs);
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	if (--kctx->atoms_count[atom->sched_priority] == 0)
+		kbase_js_update_ctx_priority(kctx);
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
 }
 
 bool kbasep_js_remove_cancelled_job(struct kbase_device *kbdev,
@@ -1256,9 +1329,8 @@ struct kbase_context *kbasep_js_runpool_lookup_ctx(struct kbase_device *kbdev,
 }
 
 /**
- * kbasep_js_release_result - Try running more jobs after releasing a context
- *                            and/or atom
- *
+ * kbasep_js_run_jobs_after_ctx_and_atom_release - Try running more jobs after
+ *                           releasing a context and/or atom
  * @kbdev:                   The kbase_device to operate on
  * @kctx:                    The kbase_context to operate on
  * @katom_retained_state:    Retained state from the atom
@@ -1304,12 +1376,15 @@ static kbasep_js_release_result kbasep_js_run_jobs_after_ctx_and_atom_release(
 	return result;
 }
 
-/*
- * Internal function to release the reference on a ctx and an atom's "retained
- * state", only taking the runpool and as transaction mutexes
+/**
+ * kbasep_js_runpool_release_ctx_internal - Internal function to release the reference
+ *                                          on a ctx and an atom's "retained state", only
+ *                                          taking the runpool and as transaction mutexes
+ * @kbdev:                   The kbase_device to operate on
+ * @kctx:                    The kbase_context to operate on
+ * @katom_retained_state:    Retained state from the atom
  *
- * This also starts more jobs running in the case of an ctx-attribute state
- * change
+ * This also starts more jobs running in the case of an ctx-attribute state change
  *
  * This does none of the followup actions for scheduling:
  * - It does not schedule in a new context
@@ -1317,11 +1392,15 @@ static kbasep_js_release_result kbasep_js_run_jobs_after_ctx_and_atom_release(
  *
  * For those tasks, just call kbasep_js_runpool_release_ctx() instead
  *
- * Requires:
+ * Has following requirements
  * - Context is scheduled in, and kctx->as_nr matches kctx_as_nr
  * - Context has a non-zero refcount
  * - Caller holds js_kctx_info->ctx.jsctx_mutex
  * - Caller holds js_devdata->runpool_mutex
+ *
+ * Return: A bitpattern, containing KBASEP_JS_RELEASE_RESULT_* flags, indicating
+ *         the result of releasing a context that whether the caller should try
+ *         scheduling a new context or should try scheduling all contexts.
  */
 static kbasep_js_release_result kbasep_js_runpool_release_ctx_internal(
 		struct kbase_device *kbdev,
@@ -1880,7 +1959,7 @@ void kbasep_js_suspend(struct kbase_device *kbdev)
 void kbasep_js_resume(struct kbase_device *kbdev)
 {
 	struct kbasep_js_device_data *js_devdata;
-	int js;
+	int js, prio;
 
 	KBASE_DEBUG_ASSERT(kbdev);
 	js_devdata = &kbdev->js_data;
@@ -1888,31 +1967,33 @@ void kbasep_js_resume(struct kbase_device *kbdev)
 
 	mutex_lock(&js_devdata->queue_mutex);
 	for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) {
-		struct kbase_context *kctx, *n;
+		for (prio = 0; prio < KBASE_JS_ATOM_SCHED_PRIO_COUNT; prio++) {
+			struct kbase_context *kctx, *n;
 
-		list_for_each_entry_safe(kctx, n,
-				&kbdev->js_data.ctx_list_unpullable[js],
-				jctx.sched_info.ctx.ctx_list_entry[js]) {
-			struct kbasep_js_kctx_info *js_kctx_info;
-			unsigned long flags;
-			bool timer_sync = false;
+			list_for_each_entry_safe(kctx, n,
+				 &kbdev->js_data.ctx_list_unpullable[js][prio],
+				 jctx.sched_info.ctx.ctx_list_entry[js]) {
+				struct kbasep_js_kctx_info *js_kctx_info;
+				unsigned long flags;
+				bool timer_sync = false;
 
-			js_kctx_info = &kctx->jctx.sched_info;
+				js_kctx_info = &kctx->jctx.sched_info;
 
-			mutex_lock(&js_kctx_info->ctx.jsctx_mutex);
-			mutex_lock(&js_devdata->runpool_mutex);
-			spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+				mutex_lock(&js_kctx_info->ctx.jsctx_mutex);
+				mutex_lock(&js_devdata->runpool_mutex);
+				spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
 
-			if (!kbase_ctx_flag(kctx, KCTX_SCHEDULED) &&
-				kbase_js_ctx_pullable(kctx, js, false))
-				timer_sync =
-					kbase_js_ctx_list_add_pullable_nolock(
-							kbdev, kctx, js);
-			spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
-			if (timer_sync)
-				kbase_backend_ctx_count_changed(kbdev);
-			mutex_unlock(&js_devdata->runpool_mutex);
-			mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
+				if (!kbase_ctx_flag(kctx, KCTX_SCHEDULED) &&
+					kbase_js_ctx_pullable(kctx, js, false))
+					timer_sync =
+						kbase_js_ctx_list_add_pullable_nolock(
+								kbdev, kctx, js);
+				spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+				if (timer_sync)
+					kbase_backend_ctx_count_changed(kbdev);
+				mutex_unlock(&js_devdata->runpool_mutex);
+				mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
+			}
 		}
 	}
 	mutex_unlock(&js_devdata->queue_mutex);
diff --git a/drivers/gpu/arm/bifrost/mali_kbase_js.h b/drivers/gpu/arm/bifrost/mali_kbase_js.h
index aa930b9d83f6..963cef903209 100644
--- a/drivers/gpu/arm/bifrost/mali_kbase_js.h
+++ b/drivers/gpu/arm/bifrost/mali_kbase_js.h
@@ -596,6 +596,27 @@ bool kbase_js_is_atom_valid(struct kbase_device *kbdev,
  */
 void kbase_js_set_timeouts(struct kbase_device *kbdev);
 
+/**
+ * kbase_js_set_ctx_priority - set the context priority
+ * @kctx: Context pointer
+ * @new_priority: New priority value for the Context
+ *
+ * The context priority is set to a new value and it is moved to the
+ * pullable/unpullable list as per the new priority.
+ */
+void kbase_js_set_ctx_priority(struct kbase_context *kctx, int new_priority);
+
+
+/**
+ * kbase_js_update_ctx_priority - update the context priority
+ * @kctx: Context pointer
+ *
+ * The context priority gets updated as per the priority of atoms currently in
+ * use for that context, but only if system priority mode for context scheduling
+ * is being used.
+ */
+void kbase_js_update_ctx_priority(struct kbase_context *kctx);
+
 /*
  * Helpers follow
  */
diff --git a/drivers/gpu/arm/bifrost/mali_kbase_js_defs.h b/drivers/gpu/arm/bifrost/mali_kbase_js_defs.h
index 8c8aa68fbc98..b53f4adf00d3 100644
--- a/drivers/gpu/arm/bifrost/mali_kbase_js_defs.h
+++ b/drivers/gpu/arm/bifrost/mali_kbase_js_defs.h
@@ -146,6 +146,48 @@ enum {
 /** Combination of KBASE_JS_ATOM_DONE_<...> bits */
 typedef u32 kbasep_js_atom_done_code;
 
+/*
+ * Context scheduling mode defines for kbase_device::js_ctx_scheduling_mode
+ */
+enum {
+	/*
+	 * In this mode, the context containing higher priority atoms will be
+	 * scheduled first and also the new runnable higher priority atoms can
+	 * preempt lower priority atoms currently running on the GPU, even if
+	 * they belong to a different context.
+	 */
+	KBASE_JS_SYSTEM_PRIORITY_MODE = 0,
+
+	/*
+	 * In this mode, the contexts are scheduled in round-robin fashion and
+	 * the new runnable higher priority atoms can preempt the lower priority
+	 * atoms currently running on the GPU, only if they belong to the same
+	 * context.
+	 */
+	KBASE_JS_PROCESS_LOCAL_PRIORITY_MODE,
+
+	/* Must be the last in the enum */
+	KBASE_JS_PRIORITY_MODE_COUNT,
+};
+
+/*
+ * Internal atom priority defines for kbase_jd_atom::sched_prio
+ */
+enum {
+	KBASE_JS_ATOM_SCHED_PRIO_HIGH = 0,
+	KBASE_JS_ATOM_SCHED_PRIO_MED,
+	KBASE_JS_ATOM_SCHED_PRIO_LOW,
+	KBASE_JS_ATOM_SCHED_PRIO_COUNT,
+};
+
+/* Invalid priority for kbase_jd_atom::sched_prio */
+#define KBASE_JS_ATOM_SCHED_PRIO_INVALID -1
+
+/* Default priority in the case of contexts with no atoms, or being lenient
+ * about invalid priorities from userspace.
+ */
+#define KBASE_JS_ATOM_SCHED_PRIO_DEFAULT KBASE_JS_ATOM_SCHED_PRIO_MED
+
 /**
  * @brief KBase Device Data Job Scheduler sub-structure
  *
@@ -229,12 +271,12 @@ struct kbasep_js_device_data {
 	/**
 	 * List of contexts that can currently be pulled from
 	 */
-	struct list_head ctx_list_pullable[BASE_JM_MAX_NR_SLOTS];
+	struct list_head ctx_list_pullable[BASE_JM_MAX_NR_SLOTS][KBASE_JS_ATOM_SCHED_PRIO_COUNT];
 	/**
 	 * List of contexts that can not currently be pulled from, but have
 	 * jobs currently running.
 	 */
-	struct list_head ctx_list_unpullable[BASE_JM_MAX_NR_SLOTS];
+	struct list_head ctx_list_unpullable[BASE_JM_MAX_NR_SLOTS][KBASE_JS_ATOM_SCHED_PRIO_COUNT];
 
 	/** Number of currently scheduled user contexts (excluding ones that are not submitting jobs) */
 	s8 nr_user_contexts_running;
@@ -365,22 +407,6 @@ struct kbasep_js_atom_retained_state {
  */
 #define KBASEP_JS_TICK_RESOLUTION_US 1
 
-/*
- * Internal atom priority defines for kbase_jd_atom::sched_prio
- */
-enum {
-	KBASE_JS_ATOM_SCHED_PRIO_HIGH = 0,
-	KBASE_JS_ATOM_SCHED_PRIO_MED,
-	KBASE_JS_ATOM_SCHED_PRIO_LOW,
-	KBASE_JS_ATOM_SCHED_PRIO_COUNT,
-};
-
-/* Invalid priority for kbase_jd_atom::sched_prio */
-#define KBASE_JS_ATOM_SCHED_PRIO_INVALID -1
-
-/* Default priority in the case of contexts with no atoms, or being lenient
- * about invalid priorities from userspace */
-#define KBASE_JS_ATOM_SCHED_PRIO_DEFAULT KBASE_JS_ATOM_SCHED_PRIO_MED
 
 	  /** @} *//* end group kbase_js */
 	  /** @} *//* end group base_kbase_api */
diff --git a/drivers/gpu/arm/bifrost/mali_kbase_mem.c b/drivers/gpu/arm/bifrost/mali_kbase_mem.c
index b457c5215616..e5acab915bd2 100644
--- a/drivers/gpu/arm/bifrost/mali_kbase_mem.c
+++ b/drivers/gpu/arm/bifrost/mali_kbase_mem.c
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2010-2017 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2010-2018 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -535,7 +535,8 @@ static void kbase_region_tracker_ds_init(struct kbase_context *kctx,
 
 	/* Although exec and custom_va_reg don't always exist,
 	 * initialize unconditionally because of the mem_view debugfs
-	 * implementation which relies on these being empty */
+	 * implementation which relies on these being empty
+	 */
 	kctx->reg_rbtree_exec = RB_ROOT;
 	kctx->reg_rbtree_custom = RB_ROOT;
 
@@ -567,6 +568,32 @@ void kbase_region_tracker_term(struct kbase_context *kctx)
 	kbase_region_tracker_erase_rbtree(&kctx->reg_rbtree_custom);
 }
 
+static size_t kbase_get_same_va_bits(struct kbase_context *kctx)
+{
+#if defined(CONFIG_ARM64)
+	/* VA_BITS can be as high as 48 bits, but all bits are available for
+	 * both user and kernel.
+	 */
+	size_t cpu_va_bits = VA_BITS;
+#elif defined(CONFIG_X86_64)
+	/* x86_64 can access 48 bits of VA, but the 48th is used to denote
+	 * kernel (1) vs userspace (0), so the max here is 47.
+	 */
+	size_t cpu_va_bits = 47;
+#elif defined(CONFIG_ARM) || defined(CONFIG_X86_32)
+	size_t cpu_va_bits = sizeof(void *) * BITS_PER_BYTE;
+#else
+#error "Unknown CPU VA width for this architecture"
+#endif
+
+#ifdef CONFIG_64BIT
+	if (kbase_ctx_flag(kctx, KCTX_COMPAT))
+		cpu_va_bits = 32;
+#endif
+
+	return min(cpu_va_bits, (size_t) kctx->kbdev->gpu_props.mmu.va_bits);
+}
+
 /**
  * Initialize the region tracker data structure.
  */
@@ -575,7 +602,7 @@ int kbase_region_tracker_init(struct kbase_context *kctx)
 	struct kbase_va_region *same_va_reg;
 	struct kbase_va_region *exec_reg = NULL;
 	struct kbase_va_region *custom_va_reg = NULL;
-	size_t same_va_bits = sizeof(void *) * BITS_PER_BYTE;
+	size_t same_va_bits = kbase_get_same_va_bits(kctx);
 	u64 custom_va_size = KBASE_REG_ZONE_CUSTOM_VA_SIZE;
 	u64 gpu_va_limit = (1ULL << kctx->kbdev->gpu_props.mmu.va_bits) >> PAGE_SHIFT;
 	u64 same_va_pages;
@@ -584,26 +611,6 @@ int kbase_region_tracker_init(struct kbase_context *kctx)
 	/* Take the lock as kbase_free_alloced_region requires it */
 	kbase_gpu_vm_lock(kctx);
 
-#if defined(CONFIG_ARM64)
-	same_va_bits = VA_BITS;
-#elif defined(CONFIG_X86_64)
-	same_va_bits = 47;
-#elif defined(CONFIG_64BIT)
-#error Unsupported 64-bit architecture
-#endif
-
-#ifdef CONFIG_64BIT
-	if (kbase_ctx_flag(kctx, KCTX_COMPAT))
-		same_va_bits = 32;
-	else if (kbase_hw_has_feature(kctx->kbdev, BASE_HW_FEATURE_33BIT_VA))
-		same_va_bits = 33;
-#endif
-
-	if (kctx->kbdev->gpu_props.mmu.va_bits < same_va_bits) {
-		err = -EINVAL;
-		goto fail_unlock;
-	}
-
 	same_va_pages = (1ULL << (same_va_bits - PAGE_SHIFT)) - 1;
 	/* all have SAME_VA */
 	same_va_reg = kbase_alloc_free_region(kctx, 1,
@@ -652,7 +659,8 @@ int kbase_region_tracker_init(struct kbase_context *kctx)
 	}
 #endif
 
-	kbase_region_tracker_ds_init(kctx, same_va_reg, exec_reg, custom_va_reg);
+	kbase_region_tracker_ds_init(kctx, same_va_reg, exec_reg,
+					custom_va_reg);
 
 	kctx->same_va_end = same_va_pages + 1;
 
@@ -668,33 +676,16 @@ int kbase_region_tracker_init(struct kbase_context *kctx)
 	return err;
 }
 
-int kbase_region_tracker_init_jit(struct kbase_context *kctx, u64 jit_va_pages)
-{
 #ifdef CONFIG_64BIT
+static int kbase_region_tracker_init_jit_64(struct kbase_context *kctx,
+		u64 jit_va_pages)
+{
 	struct kbase_va_region *same_va;
 	struct kbase_va_region *custom_va_reg;
-	u64 same_va_bits;
+	u64 same_va_bits = kbase_get_same_va_bits(kctx);
 	u64 total_va_size;
 	int err;
 
-	/*
-	 * Nothing to do for 32-bit clients, JIT uses the existing
-	 * custom VA zone.
-	 */
-	if (kbase_ctx_flag(kctx, KCTX_COMPAT))
-		return 0;
-
-#if defined(CONFIG_ARM64)
-	same_va_bits = VA_BITS;
-#elif defined(CONFIG_X86_64)
-	same_va_bits = 47;
-#elif defined(CONFIG_64BIT)
-#error Unsupported 64-bit architecture
-#endif
-
-	if (kbase_hw_has_feature(kctx->kbdev, BASE_HW_FEATURE_33BIT_VA))
-		same_va_bits = 33;
-
 	total_va_size = (1ULL << (same_va_bits - PAGE_SHIFT)) - 1;
 
 	kbase_gpu_vm_lock(kctx);
@@ -754,9 +745,27 @@ int kbase_region_tracker_init_jit(struct kbase_context *kctx, u64 jit_va_pages)
 fail_unlock:
 	kbase_gpu_vm_unlock(kctx);
 	return err;
-#else
-	return 0;
+}
 #endif
+
+int kbase_region_tracker_init_jit(struct kbase_context *kctx, u64 jit_va_pages,
+		u8 max_allocations, u8 trim_level)
+{
+	if (trim_level > 100)
+		return -EINVAL;
+
+	kctx->jit_max_allocations = max_allocations;
+	kctx->trim_level = trim_level;
+
+#ifdef CONFIG_64BIT
+	if (!kbase_ctx_flag(kctx, KCTX_COMPAT))
+		return kbase_region_tracker_init_jit_64(kctx, jit_va_pages);
+#endif
+	/*
+	 * Nothing to do for 32-bit clients, JIT uses the existing
+	 * custom VA zone.
+	 */
+	return 0;
 }
 
 int kbase_mem_init(struct kbase_device *kbdev)
@@ -824,7 +833,8 @@ KBASE_EXPORT_TEST_API(kbase_mem_term);
  * The allocated object is not part of any list yet, and is flagged as
  * KBASE_REG_FREE. No mapping is allocated yet.
  *
- * zone is KBASE_REG_ZONE_CUSTOM_VA, KBASE_REG_ZONE_SAME_VA, or KBASE_REG_ZONE_EXEC
+ * zone is KBASE_REG_ZONE_CUSTOM_VA, KBASE_REG_ZONE_SAME_VA,
+ * or KBASE_REG_ZONE_EXEC
  *
  */
 struct kbase_va_region *kbase_alloc_free_region(struct kbase_context *kctx, u64 start_pfn, size_t nr_pages, int zone)
@@ -874,6 +884,8 @@ KBASE_EXPORT_TEST_API(kbase_alloc_free_region);
 void kbase_free_alloced_region(struct kbase_va_region *reg)
 {
 	if (!(reg->flags & KBASE_REG_FREE)) {
+		mutex_lock(&reg->kctx->jit_evict_lock);
+
 		/*
 		 * The physical allocation should have been removed from the
 		 * eviction list before this function is called. However, in the
@@ -882,6 +894,8 @@ void kbase_free_alloced_region(struct kbase_va_region *reg)
 		 * on the list at termination time of the region tracker.
 		 */
 		if (!list_empty(&reg->gpu_alloc->evict_node)) {
+			mutex_unlock(&reg->kctx->jit_evict_lock);
+
 			/*
 			 * Unlink the physical allocation before unmaking it
 			 * evictable so that the allocation isn't grown back to
@@ -904,6 +918,8 @@ void kbase_free_alloced_region(struct kbase_va_region *reg)
 						   KBASE_MEM_TYPE_NATIVE);
 				kbase_mem_evictable_unmake(reg->gpu_alloc);
 			}
+		} else {
+			mutex_unlock(&reg->kctx->jit_evict_lock);
 		}
 
 		/*
@@ -1497,9 +1513,8 @@ int kbase_update_region_flags(struct kbase_context *kctx,
 	return 0;
 }
 
-int kbase_alloc_phy_pages_helper(
-	struct kbase_mem_phy_alloc *alloc,
-	size_t nr_pages_requested)
+int kbase_alloc_phy_pages_helper(struct kbase_mem_phy_alloc *alloc,
+		size_t nr_pages_requested)
 {
 	int new_page_count __maybe_unused;
 	size_t nr_left = nr_pages_requested;
@@ -1649,12 +1664,18 @@ int kbase_alloc_phy_pages_helper(
 
 alloc_failed:
 	/* rollback needed if got one or more 2MB but failed later */
-	if (nr_left != nr_pages_requested)
-		kbase_mem_pool_free_pages(&kctx->lp_mem_pool,
-				  nr_pages_requested - nr_left,
-				  alloc->pages + alloc->nents,
-				  false,
-				  false);
+	if (nr_left != nr_pages_requested) {
+		size_t nr_pages_to_free = nr_pages_requested - nr_left;
+
+		alloc->nents += nr_pages_to_free;
+
+		kbase_process_page_usage_inc(kctx, nr_pages_to_free);
+		kbase_atomic_add_pages(nr_pages_to_free, &kctx->used_pages);
+		kbase_atomic_add_pages(nr_pages_to_free,
+			       &kctx->kbdev->memdev.used_pages);
+
+		kbase_free_phy_pages_helper(alloc, nr_pages_to_free);
+	}
 
 	kbase_process_page_usage_dec(kctx, nr_pages_requested);
 	kbase_atomic_sub_pages(nr_pages_requested, &kctx->used_pages);
@@ -1665,6 +1686,181 @@ int kbase_alloc_phy_pages_helper(
 	return -ENOMEM;
 }
 
+struct tagged_addr *kbase_alloc_phy_pages_helper_locked(
+		struct kbase_mem_phy_alloc *alloc, struct kbase_mem_pool *pool,
+		size_t nr_pages_requested)
+{
+	int new_page_count __maybe_unused;
+	size_t nr_left = nr_pages_requested;
+	int res;
+	struct kbase_context *kctx;
+	struct tagged_addr *tp;
+	struct tagged_addr *new_pages = NULL;
+
+	KBASE_DEBUG_ASSERT(alloc->type == KBASE_MEM_TYPE_NATIVE);
+	KBASE_DEBUG_ASSERT(alloc->imported.kctx);
+
+	lockdep_assert_held(&pool->pool_lock);
+
+#if !defined(CONFIG_MALI_2MB_ALLOC)
+	WARN_ON(pool->order);
+#endif
+
+	if (alloc->reg) {
+		if (nr_pages_requested > alloc->reg->nr_pages - alloc->nents)
+			goto invalid_request;
+	}
+
+	kctx = alloc->imported.kctx;
+
+	lockdep_assert_held(&kctx->mem_partials_lock);
+
+	if (nr_pages_requested == 0)
+		goto done; /*nothing to do*/
+
+	new_page_count = kbase_atomic_add_pages(
+			nr_pages_requested, &kctx->used_pages);
+	kbase_atomic_add_pages(nr_pages_requested,
+			       &kctx->kbdev->memdev.used_pages);
+
+	/* Increase mm counters before we allocate pages so that this
+	 * allocation is visible to the OOM killer
+	 */
+	kbase_process_page_usage_inc(kctx, nr_pages_requested);
+
+	tp = alloc->pages + alloc->nents;
+	new_pages = tp;
+
+#ifdef CONFIG_MALI_2MB_ALLOC
+	if (pool->order) {
+		int nr_lp = nr_left / (SZ_2M / SZ_4K);
+
+		res = kbase_mem_pool_alloc_pages_locked(pool,
+						 nr_lp * (SZ_2M / SZ_4K),
+						 tp);
+
+		if (res > 0) {
+			nr_left -= res;
+			tp += res;
+		}
+
+		if (nr_left) {
+			struct kbase_sub_alloc *sa, *temp_sa;
+
+			list_for_each_entry_safe(sa, temp_sa,
+						 &kctx->mem_partials, link) {
+				int pidx = 0;
+
+				while (nr_left) {
+					pidx = find_next_zero_bit(sa->sub_pages,
+								  SZ_2M / SZ_4K,
+								  pidx);
+					bitmap_set(sa->sub_pages, pidx, 1);
+					*tp++ = as_tagged_tag(page_to_phys(
+							sa->page + pidx),
+							FROM_PARTIAL);
+					nr_left--;
+
+					if (bitmap_full(sa->sub_pages,
+							SZ_2M / SZ_4K)) {
+						/* unlink from partial list when
+						 * full
+						 */
+						list_del_init(&sa->link);
+						break;
+					}
+				}
+			}
+		}
+
+		/* only if we actually have a chunk left <512. If more it
+		 * indicates that we couldn't allocate a 2MB above, so no point
+		 * to retry here.
+		 */
+		if (nr_left > 0 && nr_left < (SZ_2M / SZ_4K)) {
+			/* create a new partial and suballocate the rest from it
+			 */
+			struct page *np = NULL;
+
+			np = kbase_mem_pool_alloc_locked(pool);
+
+			if (np) {
+				int i;
+				struct kbase_sub_alloc *sa;
+				struct page *p;
+
+				sa = kmalloc(sizeof(*sa), GFP_KERNEL);
+				if (!sa) {
+					kbase_mem_pool_free_locked(pool, np,
+							false);
+					goto alloc_failed;
+				}
+
+				/* store pointers back to the control struct */
+				np->lru.next = (void *)sa;
+				for (p = np; p < np + SZ_2M / SZ_4K; p++)
+					p->lru.prev = (void *)np;
+				INIT_LIST_HEAD(&sa->link);
+				bitmap_zero(sa->sub_pages, SZ_2M / SZ_4K);
+				sa->page = np;
+
+				for (i = 0; i < nr_left; i++)
+					*tp++ = as_tagged_tag(
+							page_to_phys(np + i),
+							FROM_PARTIAL);
+
+				bitmap_set(sa->sub_pages, 0, nr_left);
+				nr_left = 0;
+
+				/* expose for later use */
+				list_add(&sa->link, &kctx->mem_partials);
+			}
+		}
+		if (nr_left)
+			goto alloc_failed;
+	} else {
+#endif
+		res = kbase_mem_pool_alloc_pages_locked(pool,
+						 nr_left,
+						 tp);
+		if (res <= 0)
+			goto alloc_failed;
+#ifdef CONFIG_MALI_2MB_ALLOC
+	}
+#endif
+
+	KBASE_TLSTREAM_AUX_PAGESALLOC(
+			kctx->id,
+			(u64)new_page_count);
+
+	alloc->nents += nr_pages_requested;
+done:
+	return new_pages;
+
+alloc_failed:
+	/* rollback needed if got one or more 2MB but failed later */
+	if (nr_left != nr_pages_requested) {
+		size_t nr_pages_to_free = nr_pages_requested - nr_left;
+
+		alloc->nents += nr_pages_to_free;
+
+		kbase_process_page_usage_inc(kctx, nr_pages_to_free);
+		kbase_atomic_add_pages(nr_pages_to_free, &kctx->used_pages);
+		kbase_atomic_add_pages(nr_pages_to_free,
+			       &kctx->kbdev->memdev.used_pages);
+
+		kbase_free_phy_pages_helper(alloc, nr_pages_to_free);
+	}
+
+	kbase_process_page_usage_dec(kctx, nr_pages_requested);
+	kbase_atomic_sub_pages(nr_pages_requested, &kctx->used_pages);
+	kbase_atomic_sub_pages(nr_pages_requested,
+			       &kctx->kbdev->memdev.used_pages);
+
+invalid_request:
+	return NULL;
+}
+
 static void free_partial(struct kbase_context *kctx, struct tagged_addr tp)
 {
 	struct page *p, *head_page;
@@ -1776,6 +1972,124 @@ int kbase_free_phy_pages_helper(
 	return 0;
 }
 
+static void free_partial_locked(struct kbase_context *kctx,
+		struct kbase_mem_pool *pool, struct tagged_addr tp)
+{
+	struct page *p, *head_page;
+	struct kbase_sub_alloc *sa;
+
+	lockdep_assert_held(&pool->pool_lock);
+	lockdep_assert_held(&kctx->mem_partials_lock);
+
+	p = phys_to_page(as_phys_addr_t(tp));
+	head_page = (struct page *)p->lru.prev;
+	sa = (struct kbase_sub_alloc *)head_page->lru.next;
+	clear_bit(p - head_page, sa->sub_pages);
+	if (bitmap_empty(sa->sub_pages, SZ_2M / SZ_4K)) {
+		list_del(&sa->link);
+		kbase_mem_pool_free(pool, head_page, true);
+		kfree(sa);
+	} else if (bitmap_weight(sa->sub_pages, SZ_2M / SZ_4K) ==
+		   SZ_2M / SZ_4K - 1) {
+		/* expose the partial again */
+		list_add(&sa->link, &kctx->mem_partials);
+	}
+}
+
+void kbase_free_phy_pages_helper_locked(struct kbase_mem_phy_alloc *alloc,
+		struct kbase_mem_pool *pool, struct tagged_addr *pages,
+		size_t nr_pages_to_free)
+{
+	struct kbase_context *kctx = alloc->imported.kctx;
+	bool syncback;
+	bool reclaimed = (alloc->evicted != 0);
+	struct tagged_addr *start_free;
+	size_t freed = 0;
+
+	KBASE_DEBUG_ASSERT(alloc->type == KBASE_MEM_TYPE_NATIVE);
+	KBASE_DEBUG_ASSERT(alloc->imported.kctx);
+	KBASE_DEBUG_ASSERT(alloc->nents >= nr_pages_to_free);
+
+	lockdep_assert_held(&pool->pool_lock);
+	lockdep_assert_held(&kctx->mem_partials_lock);
+
+	/* early out if nothing to do */
+	if (!nr_pages_to_free)
+		return;
+
+	start_free = pages;
+
+	syncback = alloc->properties & KBASE_MEM_PHY_ALLOC_ACCESSED_CACHED;
+
+	/* pad start_free to a valid start location */
+	while (nr_pages_to_free && is_huge(*start_free) &&
+	       !is_huge_head(*start_free)) {
+		nr_pages_to_free--;
+		start_free++;
+	}
+
+	while (nr_pages_to_free) {
+		if (is_huge_head(*start_free)) {
+			/* This is a 2MB entry, so free all the 512 pages that
+			 * it points to
+			 */
+			WARN_ON(!pool->order);
+			kbase_mem_pool_free_pages_locked(pool,
+					512,
+					start_free,
+					syncback,
+					reclaimed);
+			nr_pages_to_free -= 512;
+			start_free += 512;
+			freed += 512;
+		} else if (is_partial(*start_free)) {
+			WARN_ON(!pool->order);
+			free_partial_locked(kctx, pool, *start_free);
+			nr_pages_to_free--;
+			start_free++;
+			freed++;
+		} else {
+			struct tagged_addr *local_end_free;
+
+			WARN_ON(pool->order);
+			local_end_free = start_free;
+			while (nr_pages_to_free &&
+			       !is_huge(*local_end_free) &&
+			       !is_partial(*local_end_free)) {
+				local_end_free++;
+				nr_pages_to_free--;
+			}
+			kbase_mem_pool_free_pages_locked(pool,
+					local_end_free - start_free,
+					start_free,
+					syncback,
+					reclaimed);
+			freed += local_end_free - start_free;
+			start_free += local_end_free - start_free;
+		}
+	}
+
+	alloc->nents -= freed;
+
+	/*
+	 * If the allocation was not evicted (i.e. evicted == 0) then
+	 * the page accounting needs to be done.
+	 */
+	if (!reclaimed) {
+		int new_page_count;
+
+		kbase_process_page_usage_dec(kctx, freed);
+		new_page_count = kbase_atomic_sub_pages(freed,
+							&kctx->used_pages);
+		kbase_atomic_sub_pages(freed,
+				       &kctx->kbdev->memdev.used_pages);
+
+		KBASE_TLSTREAM_AUX_PAGESALLOC(
+				kctx->id,
+				(u64)new_page_count);
+	}
+}
+
 void kbase_mem_kref_free(struct kref *kref)
 {
 	struct kbase_mem_phy_alloc *alloc;
@@ -1784,12 +2098,15 @@ void kbase_mem_kref_free(struct kref *kref)
 
 	switch (alloc->type) {
 	case KBASE_MEM_TYPE_NATIVE: {
-		WARN_ON(!alloc->imported.kctx);
-		/*
-		 * The physical allocation must have been removed from the
-		 * eviction list before trying to free it.
-		 */
-		WARN_ON(!list_empty(&alloc->evict_node));
+		if (!WARN_ON(!alloc->imported.kctx)) {
+			/*
+			 * The physical allocation must have been removed from
+			 * the eviction list before trying to free it.
+			 */
+			mutex_lock(&alloc->imported.kctx->jit_evict_lock);
+			WARN_ON(!list_empty(&alloc->evict_node));
+			mutex_unlock(&alloc->imported.kctx->jit_evict_lock);
+		}
 		kbase_free_phy_pages_helper(alloc, alloc->nents);
 		break;
 	}
@@ -2284,6 +2601,7 @@ static void kbase_jit_destroy_worker(struct work_struct *work)
 
 int kbase_jit_init(struct kbase_context *kctx)
 {
+	mutex_lock(&kctx->jit_evict_lock);
 	INIT_LIST_HEAD(&kctx->jit_active_head);
 	INIT_LIST_HEAD(&kctx->jit_pool_head);
 	INIT_LIST_HEAD(&kctx->jit_destroy_head);
@@ -2291,49 +2609,255 @@ int kbase_jit_init(struct kbase_context *kctx)
 
 	INIT_LIST_HEAD(&kctx->jit_pending_alloc);
 	INIT_LIST_HEAD(&kctx->jit_atoms_head);
+	mutex_unlock(&kctx->jit_evict_lock);
+
+	kctx->jit_max_allocations = 0;
+	kctx->jit_current_allocations = 0;
+	kctx->trim_level = 0;
 
 	return 0;
 }
 
+/* Check if the allocation from JIT pool is of the same size as the new JIT
+ * allocation and also, if BASE_JIT_ALLOC_MEM_TILER_ALIGN_TOP is set, meets
+ * the alignment requirements.
+ */
+static bool meet_size_and_tiler_align_top_requirements(struct kbase_context *kctx,
+	struct kbase_va_region *walker, struct base_jit_alloc_info *info)
+{
+	bool meet_reqs = true;
+
+	if (walker->nr_pages != info->va_pages)
+		meet_reqs = false;
+	else if (info->flags & BASE_JIT_ALLOC_MEM_TILER_ALIGN_TOP) {
+		size_t align = info->extent;
+		size_t align_mask = align - 1;
+
+		if ((walker->start_pfn + info->commit_pages) & align_mask)
+			meet_reqs = false;
+	}
+
+	return meet_reqs;
+}
+
+static int kbase_jit_grow(struct kbase_context *kctx,
+		struct base_jit_alloc_info *info, struct kbase_va_region *reg)
+{
+	size_t delta;
+	size_t pages_required;
+	size_t old_size;
+	struct kbase_mem_pool *pool;
+	int ret = -ENOMEM;
+	struct tagged_addr *gpu_pages;
+
+	if (info->commit_pages > reg->nr_pages) {
+		/* Attempted to grow larger than maximum size */
+		return -EINVAL;
+	}
+
+	kbase_gpu_vm_lock(kctx);
+
+	/* Make the physical backing no longer reclaimable */
+	if (!kbase_mem_evictable_unmake(reg->gpu_alloc))
+		goto update_failed;
+
+	if (reg->gpu_alloc->nents >= info->commit_pages)
+		goto done;
+
+	/* Grow the backing */
+	old_size = reg->gpu_alloc->nents;
+
+	/* Allocate some more pages */
+	delta = info->commit_pages - reg->gpu_alloc->nents;
+	pages_required = delta;
+
+#ifdef CONFIG_MALI_2MB_ALLOC
+	if (pages_required >= (SZ_2M / SZ_4K)) {
+		pool = &kctx->lp_mem_pool;
+		/* Round up to number of 2 MB pages required */
+		pages_required += ((SZ_2M / SZ_4K) - 1);
+		pages_required /= (SZ_2M / SZ_4K);
+	} else {
+#endif
+		pool = &kctx->mem_pool;
+#ifdef CONFIG_MALI_2MB_ALLOC
+	}
+#endif
+
+	if (reg->cpu_alloc != reg->gpu_alloc)
+		pages_required *= 2;
+
+	mutex_lock(&kctx->mem_partials_lock);
+	kbase_mem_pool_lock(pool);
+
+	/* As we can not allocate memory from the kernel with the vm_lock held,
+	 * grow the pool to the required size with the lock dropped. We hold the
+	 * pool lock to prevent another thread from allocating from the pool
+	 * between the grow and allocation.
+	 */
+	while (kbase_mem_pool_size(pool) < pages_required) {
+		int pool_delta = pages_required - kbase_mem_pool_size(pool);
+
+		kbase_mem_pool_unlock(pool);
+		mutex_unlock(&kctx->mem_partials_lock);
+		kbase_gpu_vm_unlock(kctx);
+
+		if (kbase_mem_pool_grow(pool, pool_delta))
+			goto update_failed_unlocked;
+
+		kbase_gpu_vm_lock(kctx);
+		mutex_lock(&kctx->mem_partials_lock);
+		kbase_mem_pool_lock(pool);
+	}
+
+	gpu_pages = kbase_alloc_phy_pages_helper_locked(reg->gpu_alloc, pool,
+			delta);
+	if (!gpu_pages) {
+		kbase_mem_pool_unlock(pool);
+		mutex_unlock(&kctx->mem_partials_lock);
+		goto update_failed;
+	}
+
+	if (reg->cpu_alloc != reg->gpu_alloc) {
+		struct tagged_addr *cpu_pages;
+
+		cpu_pages = kbase_alloc_phy_pages_helper_locked(reg->cpu_alloc,
+				pool, delta);
+		if (!cpu_pages) {
+			kbase_free_phy_pages_helper_locked(reg->gpu_alloc,
+					pool, gpu_pages, delta);
+			kbase_mem_pool_unlock(pool);
+			mutex_unlock(&kctx->mem_partials_lock);
+			goto update_failed;
+		}
+	}
+	kbase_mem_pool_unlock(pool);
+	mutex_unlock(&kctx->mem_partials_lock);
+
+	ret = kbase_mem_grow_gpu_mapping(kctx, reg, info->commit_pages,
+			old_size);
+	/*
+	 * The grow failed so put the allocation back in the
+	 * pool and return failure.
+	 */
+	if (ret)
+		goto update_failed;
+
+done:
+	ret = 0;
+
+	/* Update attributes of JIT allocation taken from the pool */
+	reg->initial_commit = info->commit_pages;
+	reg->extent = info->extent;
+
+update_failed:
+	kbase_gpu_vm_unlock(kctx);
+update_failed_unlocked:
+	return ret;
+}
+
 struct kbase_va_region *kbase_jit_allocate(struct kbase_context *kctx,
 		struct base_jit_alloc_info *info)
 {
 	struct kbase_va_region *reg = NULL;
-	struct kbase_va_region *walker;
-	struct kbase_va_region *temp;
-	size_t current_diff = SIZE_MAX;
 
-	int ret;
+	if (kctx->jit_current_allocations >= kctx->jit_max_allocations) {
+		/* Too many current allocations */
+		return NULL;
+	}
+	if (info->max_allocations > 0 &&
+			kctx->jit_current_allocations_per_bin[info->bin_id] >=
+			info->max_allocations) {
+		/* Too many current allocations in this bin */
+		return NULL;
+	}
 
 	mutex_lock(&kctx->jit_evict_lock);
+
 	/*
 	 * Scan the pool for an existing allocation which meets our
 	 * requirements and remove it.
 	 */
-	list_for_each_entry_safe(walker, temp, &kctx->jit_pool_head, jit_node) {
+	if (info->usage_id != 0) {
+		/* First scan for an allocation with the same usage ID */
+		struct kbase_va_region *walker;
+		struct kbase_va_region *temp;
+		size_t current_diff = SIZE_MAX;
 
-		if (walker->nr_pages >= info->va_pages) {
-			size_t min_size, max_size, diff;
+		list_for_each_entry_safe(walker, temp, &kctx->jit_pool_head,
+				jit_node) {
 
-			/*
-			 * The JIT allocations VA requirements have been
-			 * meet, it's suitable but other allocations
-			 * might be a better fit.
-			 */
-			min_size = min_t(size_t, walker->gpu_alloc->nents,
-					info->commit_pages);
-			max_size = max_t(size_t, walker->gpu_alloc->nents,
-					info->commit_pages);
-			diff = max_size - min_size;
+			if (walker->jit_usage_id == info->usage_id &&
+					walker->jit_bin_id == info->bin_id &&
+					meet_size_and_tiler_align_top_requirements(
+							kctx, walker, info)) {
+				size_t min_size, max_size, diff;
 
-			if (current_diff > diff) {
-				current_diff = diff;
-				reg = walker;
+				/*
+				 * The JIT allocations VA requirements have been
+				 * met, it's suitable but other allocations
+				 * might be a better fit.
+				 */
+				min_size = min_t(size_t,
+						walker->gpu_alloc->nents,
+						info->commit_pages);
+				max_size = max_t(size_t,
+						walker->gpu_alloc->nents,
+						info->commit_pages);
+				diff = max_size - min_size;
+
+				if (current_diff > diff) {
+					current_diff = diff;
+					reg = walker;
+				}
+
+				/* The allocation is an exact match */
+				if (current_diff == 0)
+					break;
 			}
+		}
+	}
 
-			/* The allocation is an exact match, stop looking */
-			if (current_diff == 0)
-				break;
+	if (!reg) {
+		/* No allocation with the same usage ID, or usage IDs not in
+		 * use. Search for an allocation we can reuse.
+		 */
+		struct kbase_va_region *walker;
+		struct kbase_va_region *temp;
+		size_t current_diff = SIZE_MAX;
+
+		list_for_each_entry_safe(walker, temp, &kctx->jit_pool_head,
+				jit_node) {
+
+			if (walker->jit_bin_id == info->bin_id &&
+					meet_size_and_tiler_align_top_requirements(
+							kctx, walker, info)) {
+				size_t min_size, max_size, diff;
+
+				/*
+				 * The JIT allocations VA requirements have been
+				 * met, it's suitable but other allocations
+				 * might be a better fit.
+				 */
+				min_size = min_t(size_t,
+						walker->gpu_alloc->nents,
+						info->commit_pages);
+				max_size = max_t(size_t,
+						walker->gpu_alloc->nents,
+						info->commit_pages);
+				diff = max_size - min_size;
+
+				if (current_diff > diff) {
+					current_diff = diff;
+					reg = walker;
+				}
+
+				/* The allocation is an exact match, so stop
+				 * looking.
+				 */
+				if (current_diff == 0)
+					break;
+			}
 		}
 	}
 
@@ -2352,42 +2876,15 @@ struct kbase_va_region *kbase_jit_allocate(struct kbase_context *kctx,
 		list_del_init(&reg->gpu_alloc->evict_node);
 		mutex_unlock(&kctx->jit_evict_lock);
 
-		kbase_gpu_vm_lock(kctx);
-
-		/* Make the physical backing no longer reclaimable */
-		if (!kbase_mem_evictable_unmake(reg->gpu_alloc))
-			goto update_failed;
-
-		/* Grow the backing if required */
-		if (reg->gpu_alloc->nents < info->commit_pages) {
-			size_t delta;
-			size_t old_size = reg->gpu_alloc->nents;
-
-			/* Allocate some more pages */
-			delta = info->commit_pages - reg->gpu_alloc->nents;
-			if (kbase_alloc_phy_pages_helper(reg->gpu_alloc, delta)
-					!= 0)
-				goto update_failed;
-
-			if (reg->cpu_alloc != reg->gpu_alloc) {
-				if (kbase_alloc_phy_pages_helper(
-						reg->cpu_alloc, delta) != 0) {
-					kbase_free_phy_pages_helper(
-							reg->gpu_alloc, delta);
-					goto update_failed;
-				}
-			}
-
-			ret = kbase_mem_grow_gpu_mapping(kctx, reg,
-					info->commit_pages, old_size);
+		if (kbase_jit_grow(kctx, info, reg) < 0) {
 			/*
-			 * The grow failed so put the allocation back in the
-			 * pool and return failure.
+			 * An update to an allocation from the pool failed,
+			 * chances are slim a new allocation would fair any
+			 * better so return the allocation to the pool and
+			 * return the function with failure.
 			 */
-			if (ret)
-				goto update_failed;
+			goto update_failed_unlocked;
 		}
-		kbase_gpu_vm_unlock(kctx);
 	} else {
 		/* No suitable JIT allocation was found so create a new one */
 		u64 flags = BASE_MEM_PROT_CPU_RD | BASE_MEM_PROT_GPU_RD |
@@ -2397,6 +2894,9 @@ struct kbase_va_region *kbase_jit_allocate(struct kbase_context *kctx,
 
 		mutex_unlock(&kctx->jit_evict_lock);
 
+		if (info->flags & BASE_JIT_ALLOC_MEM_TILER_ALIGN_TOP)
+			flags |= BASE_MEM_TILER_ALIGN_TOP;
+
 		reg = kbase_mem_alloc(kctx, info->va_pages, info->commit_pages,
 				info->extent, &flags, &gpu_addr);
 		if (!reg)
@@ -2409,15 +2909,15 @@ struct kbase_va_region *kbase_jit_allocate(struct kbase_context *kctx,
 		mutex_unlock(&kctx->jit_evict_lock);
 	}
 
+	kctx->jit_current_allocations++;
+	kctx->jit_current_allocations_per_bin[info->bin_id]++;
+
+	reg->jit_usage_id = info->usage_id;
+	reg->jit_bin_id = info->bin_id;
+
 	return reg;
 
-update_failed:
-	/*
-	 * An update to an allocation from the pool failed, chances
-	 * are slim a new allocation would fair any better so return
-	 * the allocation to the pool and return the function with failure.
-	 */
-	kbase_gpu_vm_unlock(kctx);
+update_failed_unlocked:
 	mutex_lock(&kctx->jit_evict_lock);
 	list_move(&reg->jit_node, &kctx->jit_pool_head);
 	mutex_unlock(&kctx->jit_evict_lock);
@@ -2427,13 +2927,53 @@ struct kbase_va_region *kbase_jit_allocate(struct kbase_context *kctx,
 
 void kbase_jit_free(struct kbase_context *kctx, struct kbase_va_region *reg)
 {
-	/* The physical backing of memory in the pool is always reclaimable */
+	u64 old_pages;
+
+	/* Get current size of JIT region */
+	old_pages = kbase_reg_current_backed_size(reg);
+	if (reg->initial_commit < old_pages) {
+		/* Free trim_level % of region, but don't go below initial
+		 * commit size
+		 */
+		u64 new_size = MAX(reg->initial_commit,
+			div_u64(old_pages * (100 - kctx->trim_level), 100));
+		u64 delta = old_pages - new_size;
+
+		if (delta) {
+			kbase_mem_shrink_cpu_mapping(kctx, reg, old_pages-delta,
+					old_pages);
+			kbase_mem_shrink_gpu_mapping(kctx, reg, old_pages-delta,
+					old_pages);
+
+			kbase_free_phy_pages_helper(reg->cpu_alloc, delta);
+			if (reg->cpu_alloc != reg->gpu_alloc)
+				kbase_free_phy_pages_helper(reg->gpu_alloc,
+						delta);
+		}
+	}
+
+	kctx->jit_current_allocations--;
+	kctx->jit_current_allocations_per_bin[reg->jit_bin_id]--;
+
+	kbase_mem_evictable_mark_reclaim(reg->gpu_alloc);
+
 	kbase_gpu_vm_lock(kctx);
-	kbase_mem_evictable_make(reg->gpu_alloc);
+	reg->flags |= KBASE_REG_DONT_NEED;
+	kbase_mem_shrink_cpu_mapping(kctx, reg, 0, reg->gpu_alloc->nents);
 	kbase_gpu_vm_unlock(kctx);
 
+	/*
+	 * Add the allocation to the eviction list and the jit pool, after this
+	 * point the shrink can reclaim it, or it may be reused.
+	 */
 	mutex_lock(&kctx->jit_evict_lock);
+
+	/* This allocation can't already be on a list. */
+	WARN_ON(!list_empty(&reg->gpu_alloc->evict_node));
+	list_add(&reg->gpu_alloc->evict_node, &kctx->evict_list);
+
 	list_move(&reg->jit_node, &kctx->jit_pool_head);
+
 	mutex_unlock(&kctx->jit_evict_lock);
 }
 
@@ -2673,6 +3213,7 @@ static int kbase_jd_umm_map(struct kbase_context *kctx,
 	int err;
 	size_t count = 0;
 	struct kbase_mem_phy_alloc *alloc;
+	unsigned long gwt_mask = ~0;
 
 	alloc = reg->gpu_alloc;
 
@@ -2722,10 +3263,16 @@ static int kbase_jd_umm_map(struct kbase_context *kctx,
 	/* Update nents as we now have pages to map */
 	alloc->nents = reg->nr_pages;
 
+#ifdef CONFIG_MALI_JOB_DUMP
+	if (kctx->gwt_enabled)
+		gwt_mask = ~KBASE_REG_GPU_WR;
+#endif
+
 	err = kbase_mmu_insert_pages(kctx, reg->start_pfn,
 			kbase_get_gpu_phy_pages(reg),
 			count,
-			reg->flags | KBASE_REG_GPU_WR | KBASE_REG_GPU_RD);
+			(reg->flags | KBASE_REG_GPU_WR | KBASE_REG_GPU_RD) &
+			 gwt_mask);
 	if (err)
 		goto err_unmap_attachment;
 
diff --git a/drivers/gpu/arm/bifrost/mali_kbase_mem.h b/drivers/gpu/arm/bifrost/mali_kbase_mem.h
index f67255121f5e..b456752b417b 100644
--- a/drivers/gpu/arm/bifrost/mali_kbase_mem.h
+++ b/drivers/gpu/arm/bifrost/mali_kbase_mem.h
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2010-2017 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2010-2018 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -319,7 +319,9 @@ struct kbase_va_region {
 #define KBASE_REG_ZONE_EXEC_SIZE    ((16ULL * 1024 * 1024) >> PAGE_SHIFT)
 
 #define KBASE_REG_ZONE_CUSTOM_VA         KBASE_REG_ZONE(2)
-#define KBASE_REG_ZONE_CUSTOM_VA_BASE    (KBASE_REG_ZONE_EXEC_BASE + KBASE_REG_ZONE_EXEC_SIZE) /* Starting after KBASE_REG_ZONE_EXEC */
+/* Starting after KBASE_REG_ZONE_EXEC */
+#define KBASE_REG_ZONE_CUSTOM_VA_BASE    \
+	(KBASE_REG_ZONE_EXEC_BASE + KBASE_REG_ZONE_EXEC_SIZE)
 #define KBASE_REG_ZONE_CUSTOM_VA_SIZE    (((1ULL << 44) >> PAGE_SHIFT) - KBASE_REG_ZONE_CUSTOM_VA_BASE)
 /* end 32-bit clients only */
 
@@ -332,6 +334,10 @@ struct kbase_va_region {
 
 	/* List head used to store the region in the JIT allocation pool */
 	struct list_head jit_node;
+	/* The last JIT usage ID for this region */
+	u16 jit_usage_id;
+	/* The JIT bin this allocation came from */
+	u8 jit_bin_id;
 };
 
 /* Common functions */
@@ -435,23 +441,32 @@ static inline int kbase_reg_prepare_native(struct kbase_va_region *reg,
 		return PTR_ERR(reg->cpu_alloc);
 	else if (!reg->cpu_alloc)
 		return -ENOMEM;
+
 	reg->cpu_alloc->imported.kctx = kctx;
-	INIT_LIST_HEAD(&reg->cpu_alloc->evict_node);
 	if (kbase_ctx_flag(kctx, KCTX_INFINITE_CACHE)
 	    && (reg->flags & KBASE_REG_CPU_CACHED)) {
 		reg->gpu_alloc = kbase_alloc_create(reg->nr_pages,
 				KBASE_MEM_TYPE_NATIVE);
+		if (IS_ERR_OR_NULL(reg->gpu_alloc)) {
+			kbase_mem_phy_alloc_put(reg->cpu_alloc);
+			return -ENOMEM;
+		}
 		reg->gpu_alloc->imported.kctx = kctx;
-		INIT_LIST_HEAD(&reg->gpu_alloc->evict_node);
 	} else {
 		reg->gpu_alloc = kbase_mem_phy_alloc_get(reg->cpu_alloc);
 	}
 
+	mutex_lock(&kctx->jit_evict_lock);
+	INIT_LIST_HEAD(&reg->cpu_alloc->evict_node);
+	INIT_LIST_HEAD(&reg->gpu_alloc->evict_node);
+	mutex_unlock(&kctx->jit_evict_lock);
+
 	reg->flags &= ~KBASE_REG_FREE;
+
 	return 0;
 }
 
-static inline int kbase_atomic_add_pages(int num_pages, atomic_t *used_pages)
+static inline u32 kbase_atomic_add_pages(u32 num_pages, atomic_t *used_pages)
 {
 	int new_val = atomic_add_return(num_pages, used_pages);
 #if defined(CONFIG_MALI_BIFROST_GATOR_SUPPORT)
@@ -460,7 +475,7 @@ static inline int kbase_atomic_add_pages(int num_pages, atomic_t *used_pages)
 	return new_val;
 }
 
-static inline int kbase_atomic_sub_pages(int num_pages, atomic_t *used_pages)
+static inline u32 kbase_atomic_sub_pages(u32 num_pages, atomic_t *used_pages)
 {
 	int new_val = atomic_sub_return(num_pages, used_pages);
 #if defined(CONFIG_MALI_BIFROST_GATOR_SUPPORT)
@@ -539,9 +554,25 @@ void kbase_mem_pool_term(struct kbase_mem_pool *pool);
  * 3. Return NULL if no memory in the pool
  *
  * Return: Pointer to allocated page, or NULL if allocation failed.
+ *
+ * Note : This function should not be used if the pool lock is held. Use
+ * kbase_mem_pool_alloc_locked() instead.
  */
 struct page *kbase_mem_pool_alloc(struct kbase_mem_pool *pool);
 
+/**
+ * kbase_mem_pool_alloc_locked - Allocate a page from memory pool
+ * @pool:  Memory pool to allocate from
+ *
+ * If there are free pages in the pool, this function allocates a page from
+ * @pool. This function does not use @next_pool.
+ *
+ * Return: Pointer to allocated page, or NULL if allocation failed.
+ *
+ * Note : Caller must hold the pool lock.
+ */
+struct page *kbase_mem_pool_alloc_locked(struct kbase_mem_pool *pool);
+
 /**
  * kbase_mem_pool_free - Free a page to memory pool
  * @pool:  Memory pool where page should be freed
@@ -553,10 +584,27 @@ struct page *kbase_mem_pool_alloc(struct kbase_mem_pool *pool);
  * 2. Otherwise, if @next_pool is not NULL and not full, add @page to
  *    @next_pool.
  * 3. Finally, free @page to the kernel.
+ *
+ * Note : This function should not be used if the pool lock is held. Use
+ * kbase_mem_pool_free_locked() instead.
  */
 void kbase_mem_pool_free(struct kbase_mem_pool *pool, struct page *page,
 		bool dirty);
 
+/**
+ * kbase_mem_pool_free_locked - Free a page to memory pool
+ * @pool:  Memory pool where page should be freed
+ * @p:     Page to free to the pool
+ * @dirty: Whether some of the page may be dirty in the cache.
+ *
+ * If @pool is not full, this function adds @page to @pool. Otherwise, @page is
+ * freed to the kernel. This function does not use @next_pool.
+ *
+ * Note : Caller must hold the pool lock.
+ */
+void kbase_mem_pool_free_locked(struct kbase_mem_pool *pool, struct page *p,
+		bool dirty);
+
 /**
  * kbase_mem_pool_alloc_pages - Allocate pages from memory pool
  * @pool:     Memory pool to allocate from
@@ -571,10 +619,57 @@ void kbase_mem_pool_free(struct kbase_mem_pool *pool, struct page *page,
  * On success number of pages allocated (could be less than nr_pages if
  * partial_allowed).
  * On error an error code.
+ *
+ * Note : This function should not be used if the pool lock is held. Use
+ * kbase_mem_pool_alloc_pages_locked() instead.
+ *
+ * The caller must not hold vm_lock, as this could cause a deadlock if
+ * the kernel OoM killer runs. If the caller must allocate pages while holding
+ * this lock, it should use kbase_mem_pool_alloc_pages_locked() instead.
  */
 int kbase_mem_pool_alloc_pages(struct kbase_mem_pool *pool, size_t nr_pages,
 		struct tagged_addr *pages, bool partial_allowed);
 
+/**
+ * kbase_mem_pool_alloc_pages_locked - Allocate pages from memory pool
+ * @pool:        Memory pool to allocate from
+ * @nr_4k_pages: Number of pages to allocate
+ * @pages:       Pointer to array where the physical address of the allocated
+ *               pages will be stored.
+ *
+ * Like kbase_mem_pool_alloc() but optimized for allocating many pages. This
+ * version does not allocate new pages from the kernel, and therefore will never
+ * trigger the OoM killer. Therefore, it can be run while the vm_lock is held.
+ *
+ * As new pages can not be allocated, the caller must ensure there are
+ * sufficient pages in the pool. Usage of this function should look like :
+ *
+ *   kbase_gpu_vm_lock(kctx);
+ *   kbase_mem_pool_lock(pool)
+ *   while (kbase_mem_pool_size(pool) < pages_required) {
+ *     kbase_mem_pool_unlock(pool)
+ *     kbase_gpu_vm_unlock(kctx);
+ *     kbase_mem_pool_grow(pool)
+ *     kbase_gpu_vm_lock(kctx);
+ *     kbase_mem_pool_lock(pool)
+ *   }
+ *   kbase_mem_pool_alloc_pages_locked(pool)
+ *   kbase_mem_pool_unlock(pool)
+ *   Perform other processing that requires vm_lock...
+ *   kbase_gpu_vm_unlock(kctx);
+ *
+ * This ensures that the pool can be grown to the required size and that the
+ * allocation can complete without another thread using the newly grown pages.
+ *
+ * Return:
+ * On success number of pages allocated.
+ * On error an error code.
+ *
+ * Note : Caller must hold the pool lock.
+ */
+int kbase_mem_pool_alloc_pages_locked(struct kbase_mem_pool *pool,
+		size_t nr_4k_pages, struct tagged_addr *pages);
+
 /**
  * kbase_mem_pool_free_pages - Free pages to memory pool
  * @pool:     Memory pool where pages should be freed
@@ -590,6 +685,22 @@ int kbase_mem_pool_alloc_pages(struct kbase_mem_pool *pool, size_t nr_pages,
 void kbase_mem_pool_free_pages(struct kbase_mem_pool *pool, size_t nr_pages,
 		struct tagged_addr *pages, bool dirty, bool reclaimed);
 
+/**
+ * kbase_mem_pool_free_pages_locked - Free pages to memory pool
+ * @pool:     Memory pool where pages should be freed
+ * @nr_pages: Number of pages to free
+ * @pages:    Pointer to array holding the physical addresses of the pages to
+ *            free.
+ * @dirty:    Whether any pages may be dirty in the cache.
+ * @reclaimed: Whether the pages where reclaimable and thus should bypass
+ *             the pool and go straight to the kernel.
+ *
+ * Like kbase_mem_pool_free() but optimized for freeing many pages.
+ */
+void kbase_mem_pool_free_pages_locked(struct kbase_mem_pool *pool,
+		size_t nr_pages, struct tagged_addr *pages, bool dirty,
+		bool reclaimed);
+
 /**
  * kbase_mem_pool_size - Get number of free pages in memory pool
  * @pool:  Memory pool to inspect
@@ -600,7 +711,7 @@ void kbase_mem_pool_free_pages(struct kbase_mem_pool *pool, size_t nr_pages,
  */
 static inline size_t kbase_mem_pool_size(struct kbase_mem_pool *pool)
 {
-	return ACCESS_ONCE(pool->cur_size);
+	return READ_ONCE(pool->cur_size);
 }
 
 /**
@@ -648,6 +759,15 @@ int kbase_mem_pool_grow(struct kbase_mem_pool *pool, size_t nr_to_grow);
  */
 void kbase_mem_pool_trim(struct kbase_mem_pool *pool, size_t new_size);
 
+/**
+ * kbase_mem_pool_mark_dying - Mark that this pool is dying
+ * @pool:     Memory pool
+ *
+ * This will cause any ongoing allocation operations (eg growing on page fault)
+ * to be terminated.
+ */
+void kbase_mem_pool_mark_dying(struct kbase_mem_pool *pool);
+
 /**
  * kbase_mem_alloc_page - Allocate a new page for a device
  * @pool:  Memory pool to allocate a page from
@@ -660,7 +780,8 @@ void kbase_mem_pool_trim(struct kbase_mem_pool *pool, size_t new_size);
 struct page *kbase_mem_alloc_page(struct kbase_mem_pool *pool);
 
 int kbase_region_tracker_init(struct kbase_context *kctx);
-int kbase_region_tracker_init_jit(struct kbase_context *kctx, u64 jit_va_pages);
+int kbase_region_tracker_init_jit(struct kbase_context *kctx, u64 jit_va_pages,
+		u8 max_allocations, u8 trim_level);
 void kbase_region_tracker_term(struct kbase_context *kctx);
 
 struct kbase_va_region *kbase_region_tracker_find_region_enclosing_address(struct kbase_context *kctx, u64 gpu_addr);
@@ -922,16 +1043,62 @@ void kbase_as_poking_timer_retain_atom(struct kbase_device *kbdev, struct kbase_
 void kbase_as_poking_timer_release_atom(struct kbase_device *kbdev, struct kbase_context *kctx, struct kbase_jd_atom *katom);
 
 /**
-* @brief Allocates physical pages.
-*
-* Allocates \a nr_pages_requested and updates the alloc object.
-*
-* @param[in] alloc allocation object to add pages to
-* @param[in] nr_pages_requested number of physical pages to allocate
-*
-* @return 0 if all pages have been successfully allocated. Error code otherwise
-*/
-int kbase_alloc_phy_pages_helper(struct kbase_mem_phy_alloc *alloc, size_t nr_pages_requested);
+ * kbase_alloc_phy_pages_helper - Allocates physical pages.
+ * @alloc:              allocation object to add pages to
+ * @nr_pages_requested: number of physical pages to allocate
+ *
+ * Allocates \a nr_pages_requested and updates the alloc object.
+ *
+ * Return: 0 if all pages have been successfully allocated. Error code otherwise
+ *
+ * Note : The caller must not hold vm_lock, as this could cause a deadlock if
+ * the kernel OoM killer runs. If the caller must allocate pages while holding
+ * this lock, it should use kbase_mem_pool_alloc_pages_locked() instead.
+ */
+int kbase_alloc_phy_pages_helper(struct kbase_mem_phy_alloc *alloc,
+		size_t nr_pages_requested);
+
+/**
+ * kbase_alloc_phy_pages_helper_locked - Allocates physical pages.
+ * @alloc:              allocation object to add pages to
+ * @pool:               Memory pool to allocate from
+ * @nr_pages_requested: number of physical pages to allocate
+ *
+ * Allocates \a nr_pages_requested and updates the alloc object. This function
+ * does not allocate new pages from the kernel, and therefore will never trigger
+ * the OoM killer. Therefore, it can be run while the vm_lock is held.
+ *
+ * As new pages can not be allocated, the caller must ensure there are
+ * sufficient pages in the pool. Usage of this function should look like :
+ *
+ *   kbase_gpu_vm_lock(kctx);
+ *   kbase_mem_pool_lock(pool)
+ *   while (kbase_mem_pool_size(pool) < pages_required) {
+ *     kbase_mem_pool_unlock(pool)
+ *     kbase_gpu_vm_unlock(kctx);
+ *     kbase_mem_pool_grow(pool)
+ *     kbase_gpu_vm_lock(kctx);
+ *     kbase_mem_pool_lock(pool)
+ *   }
+ *   kbase_alloc_phy_pages_helper_locked(pool)
+ *   kbase_mem_pool_unlock(pool)
+ *   Perform other processing that requires vm_lock...
+ *   kbase_gpu_vm_unlock(kctx);
+ *
+ * This ensures that the pool can be grown to the required size and that the
+ * allocation can complete without another thread using the newly grown pages.
+ *
+ * If CONFIG_MALI_2MB_ALLOC is defined and the allocation is >= 2MB, then
+ * @pool must be alloc->imported.kctx->lp_mem_pool. Otherwise it must be
+ * alloc->imported.kctx->mem_pool.
+ *
+ * Return: Pointer to array of allocated pages. NULL on failure.
+ *
+ * Note : Caller must hold pool->pool_lock
+ */
+struct tagged_addr *kbase_alloc_phy_pages_helper_locked(
+		struct kbase_mem_phy_alloc *alloc, struct kbase_mem_pool *pool,
+		size_t nr_pages_requested);
 
 /**
 * @brief Free physical pages.
@@ -943,6 +1110,26 @@ int kbase_alloc_phy_pages_helper(struct kbase_mem_phy_alloc *alloc, size_t nr_pa
 */
 int kbase_free_phy_pages_helper(struct kbase_mem_phy_alloc *alloc, size_t nr_pages_to_free);
 
+/**
+ * kbase_free_phy_pages_helper_locked - Free pages allocated with
+ *                                      kbase_alloc_phy_pages_helper_locked()
+ * @alloc:            Allocation object to free pages from
+ * @pool:             Memory pool to return freed pages to
+ * @pages:            Pages allocated by kbase_alloc_phy_pages_helper_locked()
+ * @nr_pages_to_free: Number of physical pages to free
+ *
+ * This function atomically frees pages allocated with
+ * kbase_alloc_phy_pages_helper_locked(). @pages is the pointer to the page
+ * array that is returned by that function. @pool must be the pool that the
+ * pages were originally allocated from.
+ *
+ * If the mem_pool has been unlocked since the allocation then
+ * kbase_free_phy_pages_helper() should be used instead.
+ */
+void kbase_free_phy_pages_helper_locked(struct kbase_mem_phy_alloc *alloc,
+		struct kbase_mem_pool *pool, struct tagged_addr *pages,
+		size_t nr_pages_to_free);
+
 static inline void kbase_set_dma_addr(struct page *p, dma_addr_t dma_addr)
 {
 	SetPagePrivate(p);
@@ -1150,4 +1337,28 @@ bool kbase_sticky_resource_release(struct kbase_context *kctx,
  */
 void kbase_sticky_resource_term(struct kbase_context *kctx);
 
+/**
+ * kbase_mem_pool_lock - Lock a memory pool
+ * @pool: Memory pool to lock
+ */
+static inline void kbase_mem_pool_lock(struct kbase_mem_pool *pool)
+{
+	spin_lock(&pool->pool_lock);
+}
+
+/**
+ * kbase_mem_pool_lock - Release a memory pool
+ * @pool: Memory pool to lock
+ */
+static inline void kbase_mem_pool_unlock(struct kbase_mem_pool *pool)
+{
+	spin_unlock(&pool->pool_lock);
+}
+
+/**
+ * kbase_mem_evictable_mark_reclaim - Mark the pages as reclaimable.
+ * @alloc: The physical allocation
+ */
+void kbase_mem_evictable_mark_reclaim(struct kbase_mem_phy_alloc *alloc);
+
 #endif				/* _KBASE_MEM_H_ */
diff --git a/drivers/gpu/arm/bifrost/mali_kbase_mem_linux.c b/drivers/gpu/arm/bifrost/mali_kbase_mem_linux.c
index 4e6668e62477..33021142e6d5 100644
--- a/drivers/gpu/arm/bifrost/mali_kbase_mem_linux.c
+++ b/drivers/gpu/arm/bifrost/mali_kbase_mem_linux.c
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2010-2017 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2010-2018 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -53,39 +53,6 @@
 
 static int kbase_tracking_page_setup(struct kbase_context *kctx, struct vm_area_struct *vma);
 
-/**
- * kbase_mem_shrink_cpu_mapping - Shrink the CPU mapping(s) of an allocation
- * @kctx:      Context the region belongs to
- * @reg:       The GPU region
- * @new_pages: The number of pages after the shrink
- * @old_pages: The number of pages before the shrink
- *
- * Shrink (or completely remove) all CPU mappings which reference the shrunk
- * part of the allocation.
- *
- * Note: Caller must be holding the processes mmap_sem lock.
- */
-static void kbase_mem_shrink_cpu_mapping(struct kbase_context *kctx,
-		struct kbase_va_region *reg,
-		u64 new_pages, u64 old_pages);
-
-/**
- * kbase_mem_shrink_gpu_mapping - Shrink the GPU mapping of an allocation
- * @kctx:      Context the region belongs to
- * @reg:       The GPU region or NULL if there isn't one
- * @new_pages: The number of pages after the shrink
- * @old_pages: The number of pages before the shrink
- *
- * Return: 0 on success, negative -errno on error
- *
- * Unmap the shrunk pages from the GPU mapping. Note that the size of the region
- * itself is unmodified as we still need to reserve the VA, only the page tables
- * will be modified by this function.
- */
-static int kbase_mem_shrink_gpu_mapping(struct kbase_context *kctx,
-		struct kbase_va_region *reg,
-		u64 new_pages, u64 old_pages);
-
 struct kbase_va_region *kbase_mem_alloc(struct kbase_context *kctx,
 		u64 va_pages, u64 commit_pages, u64 extent, u64 *flags,
 		u64 *gpu_va)
@@ -243,7 +210,8 @@ struct kbase_va_region *kbase_mem_alloc(struct kbase_context *kctx,
 }
 KBASE_EXPORT_TEST_API(kbase_mem_alloc);
 
-int kbase_mem_query(struct kbase_context *kctx, u64 gpu_addr, int query, u64 * const out)
+int kbase_mem_query(struct kbase_context *kctx,
+		u64 gpu_addr, u64 query, u64 * const out)
 {
 	struct kbase_va_region *reg;
 	int ret = -EINVAL;
@@ -471,7 +439,7 @@ void kbase_mem_evictable_deinit(struct kbase_context *kctx)
  * kbase_mem_evictable_mark_reclaim - Mark the pages as reclaimable.
  * @alloc: The physical allocation
  */
-static void kbase_mem_evictable_mark_reclaim(struct kbase_mem_phy_alloc *alloc)
+void kbase_mem_evictable_mark_reclaim(struct kbase_mem_phy_alloc *alloc)
 {
 	struct kbase_context *kctx = alloc->imported.kctx;
 	int __maybe_unused new_page_count;
@@ -516,17 +484,17 @@ int kbase_mem_evictable_make(struct kbase_mem_phy_alloc *gpu_alloc)
 
 	lockdep_assert_held(&kctx->reg_lock);
 
-	/* This alloction can't already be on a list. */
-	WARN_ON(!list_empty(&gpu_alloc->evict_node));
-
 	kbase_mem_shrink_cpu_mapping(kctx, gpu_alloc->reg,
 			0, gpu_alloc->nents);
 
+	mutex_lock(&kctx->jit_evict_lock);
+	/* This allocation can't already be on a list. */
+	WARN_ON(!list_empty(&gpu_alloc->evict_node));
+
 	/*
 	 * Add the allocation to the eviction list, after this point the shrink
 	 * can reclaim it.
 	 */
-	mutex_lock(&kctx->jit_evict_lock);
 	list_add(&gpu_alloc->evict_node, &kctx->evict_list);
 	mutex_unlock(&kctx->jit_evict_lock);
 	kbase_mem_evictable_mark_reclaim(gpu_alloc);
@@ -542,11 +510,13 @@ bool kbase_mem_evictable_unmake(struct kbase_mem_phy_alloc *gpu_alloc)
 
 	lockdep_assert_held(&kctx->reg_lock);
 
+	mutex_lock(&kctx->jit_evict_lock);
 	/*
 	 * First remove the allocation from the eviction list as it's no
 	 * longer eligible for eviction.
 	 */
 	list_del_init(&gpu_alloc->evict_node);
+	mutex_unlock(&kctx->jit_evict_lock);
 
 	if (gpu_alloc->evicted == 0) {
 		/*
@@ -667,7 +637,7 @@ int kbase_mem_flags_change(struct kbase_context *kctx, u64 gpu_addr, unsigned in
 	case KBASE_MEM_TYPE_IMPORTED_UMP:
 		ret = kbase_mmu_update_pages(kctx, reg->start_pfn,
 					     kbase_get_gpu_phy_pages(reg),
-				             reg->gpu_alloc->nents, reg->flags);
+					     reg->gpu_alloc->nents, reg->flags);
 		break;
 #endif
 #ifdef CONFIG_DMA_SHARED_BUFFER
@@ -903,6 +873,7 @@ static struct kbase_va_region *kbase_mem_from_umm(struct kbase_context *kctx,
 
 invalid_flags:
 	kbase_mem_phy_alloc_put(reg->gpu_alloc);
+	kbase_mem_phy_alloc_put(reg->cpu_alloc);
 no_alloc_obj:
 	kfree(reg);
 no_region:
@@ -1010,6 +981,7 @@ static struct kbase_va_region *kbase_mem_from_user_buffer(
 	user_buf->address = address;
 	user_buf->nr_pages = *va_pages;
 	user_buf->mm = current->mm;
+	atomic_inc(&current->mm->mm_count);
 	user_buf->pages = kmalloc_array(*va_pages, sizeof(struct page *),
 			GFP_KERNEL);
 
@@ -1047,8 +1019,6 @@ static struct kbase_va_region *kbase_mem_from_user_buffer(
 	if (faulted_pages != *va_pages)
 		goto fault_mismatch;
 
-	atomic_inc(&current->mm->mm_count);
-
 	reg->gpu_alloc->nents = 0;
 	reg->extent = 0;
 
@@ -1095,7 +1065,6 @@ static struct kbase_va_region *kbase_mem_from_user_buffer(
 		for (i = 0; i < faulted_pages; i++)
 			put_page(pages[i]);
 	}
-	kfree(user_buf->pages);
 no_page_array:
 invalid_flags:
 	kbase_mem_phy_alloc_put(reg->cpu_alloc);
@@ -1462,7 +1431,7 @@ int kbase_mem_grow_gpu_mapping(struct kbase_context *kctx,
 	return ret;
 }
 
-static void kbase_mem_shrink_cpu_mapping(struct kbase_context *kctx,
+void kbase_mem_shrink_cpu_mapping(struct kbase_context *kctx,
 		struct kbase_va_region *reg,
 		u64 new_pages, u64 old_pages)
 {
@@ -1477,7 +1446,7 @@ static void kbase_mem_shrink_cpu_mapping(struct kbase_context *kctx,
 			(old_pages - new_pages)<<PAGE_SHIFT, 1);
 }
 
-static int kbase_mem_shrink_gpu_mapping(struct kbase_context *kctx,
+int kbase_mem_shrink_gpu_mapping(struct kbase_context *kctx,
 		struct kbase_va_region *reg,
 		u64 new_pages, u64 old_pages)
 {
diff --git a/drivers/gpu/arm/bifrost/mali_kbase_mem_linux.h b/drivers/gpu/arm/bifrost/mali_kbase_mem_linux.h
index 301fdc33ea1e..a14826ebc772 100644
--- a/drivers/gpu/arm/bifrost/mali_kbase_mem_linux.h
+++ b/drivers/gpu/arm/bifrost/mali_kbase_mem_linux.h
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2010, 2012-2017 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2010, 2012-2018 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -37,14 +37,84 @@ struct kbase_hwc_dma_mapping {
 	size_t      size;
 };
 
+/**
+ * kbase_mem_alloc - Create a new allocation for GPU
+ *
+ * @kctx:         The kernel context
+ * @va_pages:     The number of pages of virtual address space to reserve
+ * @commit_pages: The number of physical pages to allocate upfront
+ * @extent:       The number of extra pages to allocate on each GPU fault which
+ *                grows the region.
+ * @flags:        bitmask of BASE_MEM_* flags to convey special requirements &
+ *                properties for the new allocation.
+ * @gpu_va:       Start address of the memory region which was allocated from GPU
+ *                virtual address space.
+ *
+ * Return: 0 on success or error code
+ */
 struct kbase_va_region *kbase_mem_alloc(struct kbase_context *kctx,
 		u64 va_pages, u64 commit_pages, u64 extent, u64 *flags,
 		u64 *gpu_va);
-int kbase_mem_query(struct kbase_context *kctx, u64 gpu_addr, int query, u64 *const pages);
+
+/**
+ * kbase_mem_query - Query properties of a GPU memory region
+ *
+ * @kctx:     The kernel context
+ * @gpu_addr: A GPU address contained within the memory region
+ * @query:    The type of query, from KBASE_MEM_QUERY_* flags, which could be
+ *            regarding the amount of backing physical memory allocated so far
+ *            for the region or the size of the region or the flags associated
+ *            with the region.
+ * @out:      Pointer to the location to store the result of query.
+ *
+ * Return: 0 on success or error code
+ */
+int kbase_mem_query(struct kbase_context *kctx, u64 gpu_addr, u64 query,
+		u64 *const out);
+
+/**
+ * kbase_mem_import - Import the external memory for use by the GPU
+ *
+ * @kctx:     The kernel context
+ * @type:     Type of external memory
+ * @phandle:  Handle to the external memory interpreted as per the type.
+ * @padding:  Amount of extra VA pages to append to the imported buffer
+ * @gpu_va:   GPU address assigned to the imported external memory
+ * @va_pages: Size of the memory region reserved from the GPU address space
+ * @flags:    bitmask of BASE_MEM_* flags to convey special requirements &
+ *            properties for the new allocation representing the external
+ *            memory.
+ * Return: 0 on success or error code
+ */
 int kbase_mem_import(struct kbase_context *kctx, enum base_mem_import_type type,
 		void __user *phandle, u32 padding, u64 *gpu_va, u64 *va_pages,
 		u64 *flags);
+
+/**
+ * kbase_mem_alias - Create a new allocation for GPU, aliasing one or more
+ *                   memory regions
+ *
+ * @kctx:      The kernel context
+ * @flags:     bitmask of BASE_MEM_* flags.
+ * @stride:    Bytes between start of each memory region
+ * @nents:     The number of regions to pack together into the alias
+ * @ai:        Pointer to the struct containing the memory aliasing info
+ * @num_pages: Number of pages the alias will cover
+ *
+ * Return: 0 on failure or otherwise the GPU VA for the alias
+ */
 u64 kbase_mem_alias(struct kbase_context *kctx, u64 *flags, u64 stride, u64 nents, struct base_mem_aliasing_info *ai, u64 *num_pages);
+
+/**
+ * kbase_mem_flags_change - Change the flags for a memory region
+ *
+ * @kctx:     The kernel context
+ * @gpu_addr: A GPU address contained within the memory region to modify.
+ * @flags:    The new flags to set
+ * @mask:     Mask of the flags, from BASE_MEM_*, to modify.
+ *
+ * Return: 0 on success or error code
+ */
 int kbase_mem_flags_change(struct kbase_context *kctx, u64 gpu_addr, unsigned int flags, unsigned int mask);
 
 /**
@@ -58,10 +128,19 @@ int kbase_mem_flags_change(struct kbase_context *kctx, u64 gpu_addr, unsigned in
  */
 int kbase_mem_commit(struct kbase_context *kctx, u64 gpu_addr, u64 new_pages);
 
+/**
+ * kbase_mmap - Mmap method, gets invoked when mmap system call is issued on
+ *              device file /dev/malixx.
+ * @file: Pointer to the device file /dev/malixx instance.
+ * @vma:  Pointer to the struct containing the info where the GPU allocation
+ *        will be mapped in virtual address space of CPU.
+ *
+ * Return: 0 on success or error code
+ */
 int kbase_mmap(struct file *file, struct vm_area_struct *vma);
 
 /**
- * kbase_mem_evictable_init - Initialize the Ephemeral memory the eviction
+ * kbase_mem_evictable_init - Initialize the Ephemeral memory eviction
  * mechanism.
  * @kctx: The kbase context to initialize.
  *
@@ -242,4 +321,35 @@ void kbase_va_free(struct kbase_context *kctx, struct kbase_hwc_dma_mapping *han
 
 extern const struct vm_operations_struct kbase_vm_ops;
 
+/**
+ * kbase_mem_shrink_cpu_mapping - Shrink the CPU mapping(s) of an allocation
+ * @kctx:      Context the region belongs to
+ * @reg:       The GPU region
+ * @new_pages: The number of pages after the shrink
+ * @old_pages: The number of pages before the shrink
+ *
+ * Shrink (or completely remove) all CPU mappings which reference the shrunk
+ * part of the allocation.
+ */
+void kbase_mem_shrink_cpu_mapping(struct kbase_context *kctx,
+		struct kbase_va_region *reg,
+		u64 new_pages, u64 old_pages);
+
+/**
+ * kbase_mem_shrink_gpu_mapping - Shrink the GPU mapping of an allocation
+ * @kctx:      Context the region belongs to
+ * @reg:       The GPU region or NULL if there isn't one
+ * @new_pages: The number of pages after the shrink
+ * @old_pages: The number of pages before the shrink
+ *
+ * Return: 0 on success, negative -errno on error
+ *
+ * Unmap the shrunk pages from the GPU mapping. Note that the size of the region
+ * itself is unmodified as we still need to reserve the VA, only the page tables
+ * will be modified by this function.
+ */
+int kbase_mem_shrink_gpu_mapping(struct kbase_context *kctx,
+		struct kbase_va_region *reg,
+		u64 new_pages, u64 old_pages);
+
 #endif				/* _KBASE_MEM_LINUX_H_ */
diff --git a/drivers/gpu/arm/bifrost/mali_kbase_mem_lowlevel.h b/drivers/gpu/arm/bifrost/mali_kbase_mem_lowlevel.h
index 0c2b70bcfbf5..6581ecfc95a0 100644
--- a/drivers/gpu/arm/bifrost/mali_kbase_mem_lowlevel.h
+++ b/drivers/gpu/arm/bifrost/mali_kbase_mem_lowlevel.h
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2012-2014,2017 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2012-2014,2018 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -53,11 +53,39 @@ struct tagged_addr { phys_addr_t tagged_addr; };
 #define HUGE_HEAD    (1u << 1)
 #define FROM_PARTIAL (1u << 2)
 
+/*
+ * Note: if macro for converting physical address to page is not defined
+ * in the kernel itself, it is defined hereby. This is to avoid build errors
+ * which are reported during builds for some architectures.
+ */
+#ifndef phys_to_page
+#define phys_to_page(phys)	(pfn_to_page((phys) >> PAGE_SHIFT))
+#endif
+
+/**
+ * as_phys_addr_t - Retrieve the physical address from tagged address by
+ *                  masking the lower order 12 bits.
+ * @t: tagged address to be translated.
+ *
+ * Return: physical address corresponding to tagged address.
+ */
 static inline phys_addr_t as_phys_addr_t(struct tagged_addr t)
 {
 	return t.tagged_addr & PAGE_MASK;
 }
 
+/**
+ * as_tagged - Convert the physical address to tagged address type though
+ *             there is no tag info present, the lower order 12 bits will be 0
+ * @phys: physical address to be converted to tagged type
+ *
+ * This is used for 4KB physical pages allocated by the Driver or imported pages
+ * and is needed as physical pages tracking object stores the reference for
+ * physical pages using tagged address type in lieu of the type generally used
+ * for physical addresses.
+ *
+ * Return: address of tagged address type.
+ */
 static inline struct tagged_addr as_tagged(phys_addr_t phys)
 {
 	struct tagged_addr t;
@@ -66,6 +94,16 @@ static inline struct tagged_addr as_tagged(phys_addr_t phys)
 	return t;
 }
 
+/**
+ * as_tagged_tag - Form the tagged address by storing the tag or metadata in the
+ *                 lower order 12 bits of physial address
+ * @phys: physical address to be converted to tagged address
+ * @tag:  tag to be stored along with the physical address.
+ *
+ * The tag info is used while freeing up the pages
+ *
+ * Return: tagged address storing physical address & tag.
+ */
 static inline struct tagged_addr as_tagged_tag(phys_addr_t phys, int tag)
 {
 	struct tagged_addr t;
@@ -74,11 +112,26 @@ static inline struct tagged_addr as_tagged_tag(phys_addr_t phys, int tag)
 	return t;
 }
 
+/**
+ * is_huge - Check if the physical page is one of the 512 4KB pages of the
+ *           large page which was not split to be used partially
+ * @t: tagged address storing the tag in the lower order bits.
+ *
+ * Return: true if page belongs to large page, or false
+ */
 static inline bool is_huge(struct tagged_addr t)
 {
 	return t.tagged_addr & HUGE_PAGE;
 }
 
+/**
+ * is_huge_head - Check if the physical page is the first 4KB page of the
+ *                512 4KB pages within a large page which was not split
+ *                to be used partially
+ * @t: tagged address storing the tag in the lower order bits.
+ *
+ * Return: true if page is the first page of a large page, or false
+ */
 static inline bool is_huge_head(struct tagged_addr t)
 {
 	int mask = HUGE_HEAD | HUGE_PAGE;
@@ -86,6 +139,14 @@ static inline bool is_huge_head(struct tagged_addr t)
 	return mask == (t.tagged_addr & mask);
 }
 
+/**
+ * is_partial - Check if the physical page is one of the 512 pages of the
+ *              large page which was split in 4KB pages to be used
+ *              partially for allocations >= 2 MB in size.
+ * @t: tagged address storing the tag in the lower order bits.
+ *
+ * Return: true if page was taken from large page used partially, or false
+ */
 static inline bool is_partial(struct tagged_addr t)
 {
 	return t.tagged_addr & FROM_PARTIAL;
diff --git a/drivers/gpu/arm/bifrost/mali_kbase_mem_pool.c b/drivers/gpu/arm/bifrost/mali_kbase_mem_pool.c
index 574f1d51cccf..1255df0fc1ae 100644
--- a/drivers/gpu/arm/bifrost/mali_kbase_mem_pool.c
+++ b/drivers/gpu/arm/bifrost/mali_kbase_mem_pool.c
@@ -39,16 +39,6 @@
 #define NOT_DIRTY false
 #define NOT_RECLAIMED false
 
-static inline void kbase_mem_pool_lock(struct kbase_mem_pool *pool)
-{
-	spin_lock(&pool->pool_lock);
-}
-
-static inline void kbase_mem_pool_unlock(struct kbase_mem_pool *pool)
-{
-	spin_unlock(&pool->pool_lock);
-}
-
 static size_t kbase_mem_pool_capacity(struct kbase_mem_pool *pool)
 {
 	ssize_t max_size = kbase_mem_pool_max_size(pool);
@@ -177,12 +167,6 @@ struct page *kbase_mem_alloc_page(struct kbase_mem_pool *pool)
 	gfp = GFP_HIGHUSER | __GFP_ZERO;
 #endif
 
-	if (current->flags & PF_KTHREAD) {
-		/* Don't trigger OOM killer from kernel threads, e.g. when
-		 * growing memory on GPU page fault */
-		gfp |= __GFP_NORETRY;
-	}
-
 	/* don't warn on higer order failures */
 	if (pool->order)
 		gfp |= __GFP_NOWARN;
@@ -255,12 +239,33 @@ int kbase_mem_pool_grow(struct kbase_mem_pool *pool,
 	struct page *p;
 	size_t i;
 
+	kbase_mem_pool_lock(pool);
+
+	pool->dont_reclaim = true;
 	for (i = 0; i < nr_to_grow; i++) {
-		p = kbase_mem_alloc_page(pool);
-		if (!p)
+		if (pool->dying) {
+			pool->dont_reclaim = false;
+			kbase_mem_pool_shrink_locked(pool, nr_to_grow);
+			kbase_mem_pool_unlock(pool);
+
 			return -ENOMEM;
-		kbase_mem_pool_add(pool, p);
+		}
+		kbase_mem_pool_unlock(pool);
+
+		p = kbase_mem_alloc_page(pool);
+		if (!p) {
+			kbase_mem_pool_lock(pool);
+			pool->dont_reclaim = false;
+			kbase_mem_pool_unlock(pool);
+
+			return -ENOMEM;
+		}
+
+		kbase_mem_pool_lock(pool);
+		kbase_mem_pool_add_locked(pool, p);
 	}
+	pool->dont_reclaim = false;
+	kbase_mem_pool_unlock(pool);
 
 	return 0;
 }
@@ -312,10 +317,19 @@ static unsigned long kbase_mem_pool_reclaim_count_objects(struct shrinker *s,
 		struct shrink_control *sc)
 {
 	struct kbase_mem_pool *pool;
+	size_t pool_size;
 
 	pool = container_of(s, struct kbase_mem_pool, reclaim);
-	pool_dbg(pool, "reclaim count: %zu\n", kbase_mem_pool_size(pool));
-	return kbase_mem_pool_size(pool);
+
+	kbase_mem_pool_lock(pool);
+	if (pool->dont_reclaim && !pool->dying) {
+		kbase_mem_pool_unlock(pool);
+		return 0;
+	}
+	pool_size = kbase_mem_pool_size(pool);
+	kbase_mem_pool_unlock(pool);
+
+	return pool_size;
 }
 
 static unsigned long kbase_mem_pool_reclaim_scan_objects(struct shrinker *s,
@@ -326,9 +340,17 @@ static unsigned long kbase_mem_pool_reclaim_scan_objects(struct shrinker *s,
 
 	pool = container_of(s, struct kbase_mem_pool, reclaim);
 
+	kbase_mem_pool_lock(pool);
+	if (pool->dont_reclaim && !pool->dying) {
+		kbase_mem_pool_unlock(pool);
+		return 0;
+	}
+
 	pool_dbg(pool, "reclaim scan %ld:\n", sc->nr_to_scan);
 
-	freed = kbase_mem_pool_shrink(pool, sc->nr_to_scan);
+	freed = kbase_mem_pool_shrink_locked(pool, sc->nr_to_scan);
+
+	kbase_mem_pool_unlock(pool);
 
 	pool_dbg(pool, "reclaim freed %ld pages\n", freed);
 
@@ -357,6 +379,7 @@ int kbase_mem_pool_init(struct kbase_mem_pool *pool,
 	pool->order = order;
 	pool->kbdev = kbdev;
 	pool->next_pool = next_pool;
+	pool->dying = false;
 
 	spin_lock_init(&pool->pool_lock);
 	INIT_LIST_HEAD(&pool->page_list);
@@ -381,6 +404,13 @@ int kbase_mem_pool_init(struct kbase_mem_pool *pool,
 	return 0;
 }
 
+void kbase_mem_pool_mark_dying(struct kbase_mem_pool *pool)
+{
+	kbase_mem_pool_lock(pool);
+	pool->dying = true;
+	kbase_mem_pool_unlock(pool);
+}
+
 void kbase_mem_pool_term(struct kbase_mem_pool *pool)
 {
 	struct kbase_mem_pool *next_pool = pool->next_pool;
@@ -444,6 +474,21 @@ struct page *kbase_mem_pool_alloc(struct kbase_mem_pool *pool)
 	return NULL;
 }
 
+struct page *kbase_mem_pool_alloc_locked(struct kbase_mem_pool *pool)
+{
+	struct page *p;
+
+	lockdep_assert_held(&pool->pool_lock);
+
+	pool_dbg(pool, "alloc_locked()\n");
+	p = kbase_mem_pool_remove_locked(pool);
+
+	if (p)
+		return p;
+
+	return NULL;
+}
+
 void kbase_mem_pool_free(struct kbase_mem_pool *pool, struct page *p,
 		bool dirty)
 {
@@ -466,6 +511,25 @@ void kbase_mem_pool_free(struct kbase_mem_pool *pool, struct page *p,
 	}
 }
 
+void kbase_mem_pool_free_locked(struct kbase_mem_pool *pool, struct page *p,
+		bool dirty)
+{
+	pool_dbg(pool, "free_locked()\n");
+
+	lockdep_assert_held(&pool->pool_lock);
+
+	if (!kbase_mem_pool_is_full(pool)) {
+		/* Add to our own pool */
+		if (dirty)
+			kbase_mem_pool_sync_page(pool, p);
+
+		kbase_mem_pool_add_locked(pool, p);
+	} else {
+		/* Free page */
+		kbase_mem_pool_free_page(pool, p);
+	}
+}
+
 int kbase_mem_pool_alloc_pages(struct kbase_mem_pool *pool, size_t nr_4k_pages,
 		struct tagged_addr *pages, bool partial_allowed)
 {
@@ -543,7 +607,6 @@ int kbase_mem_pool_alloc_pages(struct kbase_mem_pool *pool, size_t nr_4k_pages,
 
 done:
 	pool_dbg(pool, "alloc_pages(%zu) done\n", i);
-
 	return i;
 
 err_rollback:
@@ -551,6 +614,49 @@ int kbase_mem_pool_alloc_pages(struct kbase_mem_pool *pool, size_t nr_4k_pages,
 	return err;
 }
 
+int kbase_mem_pool_alloc_pages_locked(struct kbase_mem_pool *pool,
+		size_t nr_4k_pages, struct tagged_addr *pages)
+{
+	struct page *p;
+	size_t i;
+	size_t nr_pages_internal;
+
+	lockdep_assert_held(&pool->pool_lock);
+
+	nr_pages_internal = nr_4k_pages / (1u << (pool->order));
+
+	if (nr_pages_internal * (1u << pool->order) != nr_4k_pages)
+		return -EINVAL;
+
+	pool_dbg(pool, "alloc_pages_locked(4k=%zu):\n", nr_4k_pages);
+	pool_dbg(pool, "alloc_pages_locked(internal=%zu):\n",
+			nr_pages_internal);
+
+	if (kbase_mem_pool_size(pool) < nr_pages_internal) {
+		pool_dbg(pool, "Failed alloc\n");
+		return -ENOMEM;
+	}
+
+	for (i = 0; i < nr_pages_internal; i++) {
+		int j;
+
+		p = kbase_mem_pool_remove_locked(pool);
+		if (pool->order) {
+			*pages++ = as_tagged_tag(page_to_phys(p),
+						   HUGE_HEAD | HUGE_PAGE);
+			for (j = 1; j < (1u << pool->order); j++) {
+				*pages++ = as_tagged_tag(page_to_phys(p) +
+							   PAGE_SIZE * j,
+							   HUGE_PAGE);
+			}
+		} else {
+			*pages++ = as_tagged(page_to_phys(p));
+		}
+	}
+
+	return nr_4k_pages;
+}
+
 static void kbase_mem_pool_add_array(struct kbase_mem_pool *pool,
 				     size_t nr_pages, struct tagged_addr *pages,
 				     bool zero, bool sync)
@@ -591,6 +697,48 @@ static void kbase_mem_pool_add_array(struct kbase_mem_pool *pool,
 			nr_pages, nr_to_pool);
 }
 
+static void kbase_mem_pool_add_array_locked(struct kbase_mem_pool *pool,
+		size_t nr_pages, struct tagged_addr *pages,
+		bool zero, bool sync)
+{
+	struct page *p;
+	size_t nr_to_pool = 0;
+	LIST_HEAD(new_page_list);
+	size_t i;
+
+	lockdep_assert_held(&pool->pool_lock);
+
+	if (!nr_pages)
+		return;
+
+	pool_dbg(pool, "add_array_locked(%zu, zero=%d, sync=%d):\n",
+			nr_pages, zero, sync);
+
+	/* Zero/sync pages first */
+	for (i = 0; i < nr_pages; i++) {
+		if (unlikely(!as_phys_addr_t(pages[i])))
+			continue;
+
+		if (is_huge_head(pages[i]) || !is_huge(pages[i])) {
+			p = phys_to_page(as_phys_addr_t(pages[i]));
+			if (zero)
+				kbase_mem_pool_zero_page(pool, p);
+			else if (sync)
+				kbase_mem_pool_sync_page(pool, p);
+
+			list_add(&p->lru, &new_page_list);
+			nr_to_pool++;
+		}
+		pages[i] = as_tagged(0);
+	}
+
+	/* Add new page list to pool */
+	kbase_mem_pool_add_list_locked(pool, &new_page_list, nr_to_pool);
+
+	pool_dbg(pool, "add_array_locked(%zu) added %zu pages\n",
+			nr_pages, nr_to_pool);
+}
+
 void kbase_mem_pool_free_pages(struct kbase_mem_pool *pool, size_t nr_pages,
 		struct tagged_addr *pages, bool dirty, bool reclaimed)
 {
@@ -640,3 +788,47 @@ void kbase_mem_pool_free_pages(struct kbase_mem_pool *pool, size_t nr_pages,
 
 	pool_dbg(pool, "free_pages(%zu) done\n", nr_pages);
 }
+
+
+void kbase_mem_pool_free_pages_locked(struct kbase_mem_pool *pool,
+		size_t nr_pages, struct tagged_addr *pages, bool dirty,
+		bool reclaimed)
+{
+	struct page *p;
+	size_t nr_to_pool;
+	LIST_HEAD(to_pool_list);
+	size_t i = 0;
+
+	lockdep_assert_held(&pool->pool_lock);
+
+	pool_dbg(pool, "free_pages_locked(%zu):\n", nr_pages);
+
+	if (!reclaimed) {
+		/* Add to this pool */
+		nr_to_pool = kbase_mem_pool_capacity(pool);
+		nr_to_pool = min(nr_pages, nr_to_pool);
+
+		kbase_mem_pool_add_array_locked(pool, nr_pages, pages, false,
+				dirty);
+
+		i += nr_to_pool;
+	}
+
+	/* Free any remaining pages to kernel */
+	for (; i < nr_pages; i++) {
+		if (unlikely(!as_phys_addr_t(pages[i])))
+			continue;
+
+		if (is_huge(pages[i]) && !is_huge_head(pages[i])) {
+			pages[i] = as_tagged(0);
+			continue;
+		}
+
+		p = phys_to_page(as_phys_addr_t(pages[i]));
+
+		kbase_mem_pool_free_page(pool, p);
+		pages[i] = as_tagged(0);
+	}
+
+	pool_dbg(pool, "free_pages_locked(%zu) done\n", nr_pages);
+}
diff --git a/drivers/gpu/arm/bifrost/mali_kbase_mem_profile_debugfs_buf_size.h b/drivers/gpu/arm/bifrost/mali_kbase_mem_profile_debugfs_buf_size.h
index cb968f65fc5c..7f44d81e34e2 100644
--- a/drivers/gpu/arm/bifrost/mali_kbase_mem_profile_debugfs_buf_size.h
+++ b/drivers/gpu/arm/bifrost/mali_kbase_mem_profile_debugfs_buf_size.h
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2014 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2014, 2018 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -32,7 +32,8 @@
  * The size of the buffer to accumulate the histogram report text in
  * @see @ref CCTXP_HIST_BUF_SIZE_MAX_LENGTH_REPORT
  */
-#define KBASE_MEM_PROFILE_MAX_BUF_SIZE ((size_t) (64 + ((80 + (56 * 64)) * 15) + 56))
+#define KBASE_MEM_PROFILE_MAX_BUF_SIZE \
+	((size_t) (64 + ((80 + (56 * 64)) * 31) + 56))
 
 #endif  /*_KBASE_MEM_PROFILE_DEBUGFS_BUF_SIZE_H_*/
 
diff --git a/drivers/gpu/arm/bifrost/mali_kbase_mmu.c b/drivers/gpu/arm/bifrost/mali_kbase_mmu.c
index f8979d9841da..4abab34cf155 100644
--- a/drivers/gpu/arm/bifrost/mali_kbase_mmu.c
+++ b/drivers/gpu/arm/bifrost/mali_kbase_mmu.c
@@ -224,23 +224,15 @@ static void kbase_gpu_mmu_handle_write_fault(struct kbase_context *kctx,
 		return;
 	}
 
-	/* Capture handle and offset of the faulting write location
+	/* Capture addresses of faulting write location
 	 * for job dumping if write tracking is enabled.
 	 */
 	if (kctx->gwt_enabled) {
 		u64 page_addr = faulting_as->fault_addr & PAGE_MASK;
-		u64 offset = (page_addr >> PAGE_SHIFT) - region->start_pfn;
-		u64 handle = region->start_pfn << PAGE_SHIFT;
 		bool found = false;
-
-		if (KBASE_MEM_TYPE_IMPORTED_UMM == region->cpu_alloc->type)
-			handle |= BIT(0);
-
 		/* Check if this write was already handled. */
 		list_for_each_entry(pos, &kctx->gwt_current_list, link) {
-			if (handle == pos->handle &&
-					offset >= pos->offset &&
-					offset < pos->offset + pos->num_pages) {
+			if (page_addr == pos->page_addr) {
 				found = true;
 				break;
 			}
@@ -249,8 +241,8 @@ static void kbase_gpu_mmu_handle_write_fault(struct kbase_context *kctx,
 		if (!found) {
 			pos = kmalloc(sizeof(*pos), GFP_KERNEL);
 			if (pos) {
-				pos->handle = handle;
-				pos->offset = offset;
+				pos->region = region;
+				pos->page_addr = page_addr;
 				pos->num_pages = 1;
 				list_add(&pos->link, &kctx->gwt_current_list);
 			} else {
@@ -318,6 +310,10 @@ void page_fault_worker(struct work_struct *data)
 	struct kbase_va_region *region;
 	int err;
 	bool grown = false;
+	size_t min_pool_size;
+	struct kbase_mem_pool *pool;
+	int pages_to_grow;
+	struct tagged_addr *gpu_pages, *cpu_pages;
 
 	faulting_as = container_of(data, struct kbase_as, work_pagefault);
 	fault_pfn = faulting_as->fault_addr >> PAGE_SHIFT;
@@ -336,8 +332,7 @@ void page_fault_worker(struct work_struct *data)
 
 	KBASE_DEBUG_ASSERT(kctx->kbdev == kbdev);
 
-	if (unlikely(faulting_as->protected_mode))
-	{
+	if (unlikely(faulting_as->protected_mode)) {
 		kbase_mmu_report_fault_and_kill(kctx, faulting_as,
 				"Protected mode fault");
 		kbase_mmu_hw_clear_fault(kbdev, faulting_as, kctx,
@@ -403,6 +398,7 @@ void page_fault_worker(struct work_struct *data)
 		goto fault_done;
 	}
 
+page_fault_retry:
 	/* so we have a translation fault, let's see if it is for growable
 	 * memory */
 	kbase_gpu_vm_lock(kctx);
@@ -496,20 +492,59 @@ void page_fault_worker(struct work_struct *data)
 		goto fault_done;
 	}
 
-	if (kbase_alloc_phy_pages_helper(region->gpu_alloc, new_pages) == 0) {
-		if (region->gpu_alloc != region->cpu_alloc) {
-			if (kbase_alloc_phy_pages_helper(
-					region->cpu_alloc, new_pages) == 0) {
-				grown = true;
-			} else {
-				kbase_free_phy_pages_helper(region->gpu_alloc,
-						new_pages);
-			}
-		} else {
-			grown = true;
-		}
+#ifdef CONFIG_MALI_2MB_ALLOC
+	if (new_pages >= (SZ_2M / SZ_4K)) {
+		pool = &kctx->lp_mem_pool;
+		/* Round up to number of 2 MB pages required */
+		min_pool_size = new_pages + ((SZ_2M / SZ_4K) - 1);
+		min_pool_size /= (SZ_2M / SZ_4K);
+	} else {
+#endif
+		pool = &kctx->mem_pool;
+		min_pool_size = new_pages;
+#ifdef CONFIG_MALI_2MB_ALLOC
 	}
+#endif
 
+	if (region->gpu_alloc != region->cpu_alloc)
+		min_pool_size *= 2;
+
+	pages_to_grow = 0;
+
+	mutex_lock(&kctx->mem_partials_lock);
+	kbase_mem_pool_lock(pool);
+	/* We can not allocate memory from the kernel with the vm_lock held, so
+	 * check that there is enough memory in the pool. If not then calculate
+	 * how much it has to grow by, grow the pool when the vm_lock is
+	 * dropped, and retry the allocation.
+	 */
+	if (kbase_mem_pool_size(pool) >= min_pool_size) {
+		gpu_pages = kbase_alloc_phy_pages_helper_locked(
+				region->gpu_alloc, pool, new_pages);
+
+		if (gpu_pages) {
+			if (region->gpu_alloc != region->cpu_alloc) {
+				cpu_pages = kbase_alloc_phy_pages_helper_locked(
+						region->cpu_alloc, pool,
+						new_pages);
+
+				if (cpu_pages) {
+					grown = true;
+				} else {
+					kbase_free_phy_pages_helper_locked(
+							region->gpu_alloc,
+							pool, gpu_pages,
+							new_pages);
+				}
+			} else {
+				grown = true;
+			}
+		}
+	} else {
+		pages_to_grow = min_pool_size - kbase_mem_pool_size(pool);
+	}
+	kbase_mem_pool_unlock(pool);
+	mutex_unlock(&kctx->mem_partials_lock);
 
 	if (grown) {
 		u64 pfn_offset;
@@ -587,12 +622,13 @@ void page_fault_worker(struct work_struct *data)
 
 			pos = kmalloc(sizeof(*pos), GFP_KERNEL);
 			if (pos) {
-				pos->handle = region->start_pfn << PAGE_SHIFT;
-				pos->offset = pfn_offset;
+				pos->region = region;
+				pos->page_addr = (region->start_pfn +
+							pfn_offset) <<
+							 PAGE_SHIFT;
 				pos->num_pages = new_pages;
 				list_add(&pos->link,
 					&kctx->gwt_current_list);
-
 			} else {
 				dev_warn(kbdev->dev, "kmalloc failure");
 			}
@@ -600,10 +636,23 @@ void page_fault_worker(struct work_struct *data)
 #endif
 		kbase_gpu_vm_unlock(kctx);
 	} else {
-		/* failed to extend, handle as a normal PF */
+		int ret = -ENOMEM;
+
 		kbase_gpu_vm_unlock(kctx);
-		kbase_mmu_report_fault_and_kill(kctx, faulting_as,
-				"Page allocation failure");
+
+		/* If the memory pool was insufficient then grow it and retry.
+		 * Otherwise fail the allocation.
+		 */
+		if (pages_to_grow > 0)
+			ret = kbase_mem_pool_grow(pool, pages_to_grow);
+
+		if (ret < 0) {
+			/* failed to extend, handle as a normal PF */
+			kbase_mmu_report_fault_and_kill(kctx, faulting_as,
+					"Page allocation failure");
+		} else {
+			goto page_fault_retry;
+		}
 	}
 
 fault_done:
@@ -1767,8 +1816,7 @@ void bus_fault_worker(struct work_struct *data)
 		return;
 	}
 
-	if (unlikely(faulting_as->protected_mode))
-	{
+	if (unlikely(faulting_as->protected_mode)) {
 		kbase_mmu_report_fault_and_kill(kctx, faulting_as,
 				"Permission failure");
 		kbase_mmu_hw_clear_fault(kbdev, faulting_as, kctx,
diff --git a/drivers/gpu/arm/bifrost/mali_kbase_mmu_mode_aarch64.c b/drivers/gpu/arm/bifrost/mali_kbase_mmu_mode_aarch64.c
index 4bb2628c9251..aa0c4038b563 100644
--- a/drivers/gpu/arm/bifrost/mali_kbase_mmu_mode_aarch64.c
+++ b/drivers/gpu/arm/bifrost/mali_kbase_mmu_mode_aarch64.c
@@ -21,7 +21,6 @@
  */
 
 
-
 #include "mali_kbase.h"
 #include "mali_midg_regmap.h"
 #include "mali_kbase_defs.h"
@@ -48,29 +47,25 @@
  */
 static inline void page_table_entry_set(u64 *pte, u64 phy)
 {
+#if KERNEL_VERSION(3, 18, 13) <= LINUX_VERSION_CODE
+	WRITE_ONCE(*pte, phy);
+#else
 #ifdef CONFIG_64BIT
+	barrier();
 	*pte = phy;
+	barrier();
 #elif defined(CONFIG_ARM)
-	/*
-	 * In order to prevent the compiler keeping cached copies of
-	 * memory, we have to explicitly say that we have updated memory.
-	 *
-	 * Note: We could manually move the data ourselves into R0 and
-	 * R1 by specifying register variables that are explicitly
-	 * given registers assignments, the down side of this is that
-	 * we have to assume cpu endianness.  To avoid this we can use
-	 * the ldrd to read the data from memory into R0 and R1 which
-	 * will respect the cpu endianness, we then use strd to make
-	 * the 64 bit assignment to the page table entry.
-	 */
-	asm volatile("ldrd r0, r1, [%[ptemp]]\n\t"
-			"strd r0, r1, [%[pte]]\n\t"
-			: "=m" (*pte)
-			: [ptemp] "r" (&phy), [pte] "r" (pte), "m" (phy)
-			: "r0", "r1");
+	barrier();
+	asm volatile("ldrd r0, [%1]\n\t"
+		     "strd r0, %0\n\t"
+		     : "=m" (*pte)
+		     : "r" (&phy)
+		     : "r0", "r1");
+	barrier();
 #else
 #error "64-bit atomic write must be implemented for your architecture"
 #endif
+#endif
 }
 
 static void mmu_get_as_setup(struct kbase_context *kctx,
diff --git a/drivers/gpu/arm/bifrost/mali_kbase_mmu_mode_lpae.c b/drivers/gpu/arm/bifrost/mali_kbase_mmu_mode_lpae.c
index bc8da6348772..7dc38fcb792b 100644
--- a/drivers/gpu/arm/bifrost/mali_kbase_mmu_mode_lpae.c
+++ b/drivers/gpu/arm/bifrost/mali_kbase_mmu_mode_lpae.c
@@ -21,7 +21,6 @@
  */
 
 
-
 #include "mali_kbase.h"
 #include "mali_midg_regmap.h"
 #include "mali_kbase_defs.h"
@@ -46,30 +45,25 @@
  */
 static inline void page_table_entry_set(u64 *pte, u64 phy)
 {
+#if KERNEL_VERSION(3, 18, 13) <= LINUX_VERSION_CODE
+	WRITE_ONCE(*pte, phy);
+#else
 #ifdef CONFIG_64BIT
+	barrier();
 	*pte = phy;
+	barrier();
 #elif defined(CONFIG_ARM)
-	/*
-	 * In order to prevent the compiler keeping cached copies of
-	 * memory, we have to explicitly say that we have updated
-	 * memory.
-	 *
-	 * Note: We could manually move the data ourselves into R0 and
-	 * R1 by specifying register variables that are explicitly
-	 * given registers assignments, the down side of this is that
-	 * we have to assume cpu endianness.  To avoid this we can use
-	 * the ldrd to read the data from memory into R0 and R1 which
-	 * will respect the cpu endianness, we then use strd to make
-	 * the 64 bit assignment to the page table entry.
-	 */
-	asm volatile("ldrd r0, r1, [%[ptemp]]\n\t"
-			"strd r0, r1, [%[pte]]\n\t"
-			: "=m" (*pte)
-			: [ptemp] "r" (&phy), [pte] "r" (pte), "m" (phy)
-			: "r0", "r1");
+	barrier();
+	asm volatile("ldrd r0, [%1]\n\t"
+		     "strd r0, %0\n\t"
+		     : "=m" (*pte)
+		     : "r" (&phy)
+		     : "r0", "r1");
+	barrier();
 #else
 #error "64-bit atomic write must be implemented for your architecture"
 #endif
+#endif
 }
 
 static void mmu_get_as_setup(struct kbase_context *kctx,
diff --git a/drivers/gpu/arm/bifrost/mali_kbase_pm.c b/drivers/gpu/arm/bifrost/mali_kbase_pm.c
index da56f0af2f86..c226350ff88c 100644
--- a/drivers/gpu/arm/bifrost/mali_kbase_pm.c
+++ b/drivers/gpu/arm/bifrost/mali_kbase_pm.c
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2010-2018 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -94,10 +94,15 @@ int kbase_pm_context_active_handle_suspend(struct kbase_device *kbdev, enum kbas
 	if (old_count == 0)
 		kbase_timeline_pm_handle_event(kbdev, KBASE_TIMELINE_PM_EVENT_GPU_ACTIVE);
 
-	if (c == 1)
+	if (c == 1) {
 		/* First context active: Power on the GPU and any cores requested by
 		 * the policy */
 		kbase_hwaccess_pm_gpu_active(kbdev);
+	}
+#if defined(CONFIG_DEVFREQ_THERMAL) && defined(CONFIG_MALI_BIFROST_DEVFREQ)
+	if (kbdev->ipa.gpu_active_callback)
+		kbdev->ipa.gpu_active_callback(kbdev->ipa.model_data);
+#endif
 
 	mutex_unlock(&kbdev->pm.lock);
 	mutex_unlock(&js_devdata->runpool_mutex);
@@ -146,6 +151,21 @@ void kbase_pm_context_idle(struct kbase_device *kbdev)
 		wake_up(&kbdev->pm.zero_active_count_wait);
 	}
 
+#if defined(CONFIG_DEVFREQ_THERMAL) && defined(CONFIG_MALI_BIFROST_DEVFREQ)
+	/* IPA may be using vinstr, in which case there may be one PM reference
+	 * still held when all other contexts have left the GPU. Inform IPA that
+	 * the GPU is now idle so that vinstr can drop it's reference.
+	 *
+	 * If the GPU was only briefly active then it might have gone idle
+	 * before vinstr has taken a PM reference, meaning that active_count is
+	 * zero. We still need to inform IPA in this case, so that vinstr can
+	 * drop the PM reference and avoid keeping the GPU powered
+	 * unnecessarily.
+	 */
+	if (c <= 1 && kbdev->ipa.gpu_idle_callback)
+		kbdev->ipa.gpu_idle_callback(kbdev->ipa.model_data);
+#endif
+
 	mutex_unlock(&kbdev->pm.lock);
 	mutex_unlock(&js_devdata->runpool_mutex);
 }
diff --git a/drivers/gpu/arm/bifrost/mali_kbase_replay.c b/drivers/gpu/arm/bifrost/mali_kbase_replay.c
index ddef76c30f82..6929af9c5c94 100644
--- a/drivers/gpu/arm/bifrost/mali_kbase_replay.c
+++ b/drivers/gpu/arm/bifrost/mali_kbase_replay.c
@@ -664,8 +664,8 @@ static void kbasep_replay_create_atom(struct kbase_context *kctx,
 	atom->prio = prio;
 	atom->atom_number = atom_nr;
 
-	base_jd_atom_dep_set(&atom->pre_dep[0], 0 , BASE_JD_DEP_TYPE_INVALID);
-	base_jd_atom_dep_set(&atom->pre_dep[1], 0 , BASE_JD_DEP_TYPE_INVALID);
+	base_jd_atom_dep_set(&atom->pre_dep[0], 0, BASE_JD_DEP_TYPE_INVALID);
+	base_jd_atom_dep_set(&atom->pre_dep[1], 0, BASE_JD_DEP_TYPE_INVALID);
 
 	atom->udata.blob[0] = 0;
 	atom->udata.blob[1] = 0;
@@ -713,7 +713,8 @@ static int kbasep_replay_create_atoms(struct kbase_context *kctx,
 	kbasep_replay_create_atom(kctx, t_atom, t_atom_nr, prio);
 	kbasep_replay_create_atom(kctx, f_atom, f_atom_nr, prio);
 
-	base_jd_atom_dep_set(&f_atom->pre_dep[0], t_atom_nr , BASE_JD_DEP_TYPE_DATA);
+	base_jd_atom_dep_set(&f_atom->pre_dep[0], t_atom_nr,
+			     BASE_JD_DEP_TYPE_DATA);
 
 	return 0;
 }
diff --git a/drivers/gpu/arm/bifrost/mali_kbase_softjobs.c b/drivers/gpu/arm/bifrost/mali_kbase_softjobs.c
index 0c20f66f0137..83b83fe7533d 100644
--- a/drivers/gpu/arm/bifrost/mali_kbase_softjobs.c
+++ b/drivers/gpu/arm/bifrost/mali_kbase_softjobs.c
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2011-2017 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2011-2018 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -375,12 +375,12 @@ static void kbase_fence_debug_timeout(struct kbase_jd_atom *katom)
 }
 #endif /* CONFIG_MALI_BIFROST_FENCE_DEBUG */
 
-void kbasep_soft_job_timeout_worker(unsigned long data)
+void kbasep_soft_job_timeout_worker(struct timer_list *timer)
 {
-	struct kbase_context *kctx = (struct kbase_context *)data;
+	struct kbase_context *kctx = container_of(timer, struct kbase_context,
+			soft_job_timeout);
 	u32 timeout_ms = (u32)atomic_read(
 			&kctx->kbdev->js_data.soft_job_timeout_ms);
-	struct timer_list *timer = &kctx->soft_job_timeout;
 	ktime_t cur_time = ktime_get();
 	bool restarting = false;
 	unsigned long lflags;
@@ -771,9 +771,11 @@ static int kbase_mem_copy_from_extres(struct kbase_context *kctx,
 	u64 offset = buf_data->offset;
 	size_t extres_size = buf_data->nr_extres_pages*PAGE_SIZE;
 	size_t to_copy = min(extres_size, buf_data->size);
-	size_t dma_to_copy;
 	struct kbase_mem_phy_alloc *gpu_alloc = buf_data->gpu_alloc;
 	int ret = 0;
+#ifdef CONFIG_DMA_SHARED_BUFFER
+	size_t dma_to_copy;
+#endif
 
 	KBASE_DEBUG_ASSERT(pages != NULL);
 
@@ -915,6 +917,33 @@ static int kbase_jit_allocate_prepare(struct kbase_jd_atom *katom)
 		goto free_info;
 	}
 
+	if (kctx->jit_version == 1) {
+		/* Old JIT didn't have usage_id, max_allocations, bin_id
+		 * or padding, so force them to zero
+		 */
+		info->usage_id = 0;
+		info->max_allocations = 0;
+		info->bin_id = 0;
+		info->flags = 0;
+		memset(info->padding, 0, sizeof(info->padding));
+	} else {
+		int i;
+
+		/* Check padding is all zeroed */
+		for (i = 0; i < sizeof(info->padding); i++) {
+			if (info->padding[i] != 0) {
+				ret = -EINVAL;
+				goto free_info;
+			}
+		}
+
+		/* No bit other than TILER_ALIGN_TOP shall be set */
+		if (info->flags & ~BASE_JIT_ALLOC_MEM_TILER_ALIGN_TOP) {
+			ret = -EINVAL;
+			goto free_info;
+		}
+	}
+
 	katom->softjob_data = info;
 	katom->jit_blocked = false;
 
@@ -1271,6 +1300,8 @@ static void kbase_ext_res_finish(struct kbase_jd_atom *katom)
 
 int kbase_process_soft_job(struct kbase_jd_atom *katom)
 {
+	KBASE_TLSTREAM_TL_EVENT_ATOM_SOFTJOB_START(katom);
+
 	switch (katom->core_req & BASE_JD_REQ_SOFT_JOB_TYPE) {
 	case BASE_JD_REQ_SOFT_DUMP_CPU_GPU_TIME:
 		return kbase_dump_cpu_gpu_time(katom);
@@ -1434,6 +1465,7 @@ int kbase_prepare_soft_job(struct kbase_jd_atom *katom)
 
 void kbase_finish_soft_job(struct kbase_jd_atom *katom)
 {
+	KBASE_TLSTREAM_TL_EVENT_ATOM_SOFTJOB_END(katom);
 	switch (katom->core_req & BASE_JD_REQ_SOFT_JOB_TYPE) {
 	case BASE_JD_REQ_SOFT_DUMP_CPU_GPU_TIME:
 		/* Nothing to do */
diff --git a/drivers/gpu/arm/bifrost/mali_kbase_sync_file.c b/drivers/gpu/arm/bifrost/mali_kbase_sync_file.c
index 0fcb5078d782..349a33ebafe2 100644
--- a/drivers/gpu/arm/bifrost/mali_kbase_sync_file.c
+++ b/drivers/gpu/arm/bifrost/mali_kbase_sync_file.c
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2012-2017 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2012-2018 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -166,7 +166,9 @@ static void kbase_fence_wait_callback(struct dma_fence *fence,
 	struct kbase_context *kctx = katom->kctx;
 
 	/* Cancel atom if fence is erroneous */
-#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 11, 0))
+#if (KERNEL_VERSION(4, 11, 0) <= LINUX_VERSION_CODE || \
+	 (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE && \
+	  KERNEL_VERSION(4, 9, 68) <= LINUX_VERSION_CODE))
 	if (dma_fence_is_signaled(kcb->fence) && kcb->fence->error)
 #else
 	if (dma_fence_is_signaled(kcb->fence) && kcb->fence->status < 0)
@@ -282,7 +284,9 @@ static void kbase_sync_fence_info_get(struct dma_fence *fence,
 	 * 1 : signaled
 	 */
 	if (dma_fence_is_signaled(fence)) {
-#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 11, 0))
+#if (KERNEL_VERSION(4, 11, 0) <= LINUX_VERSION_CODE || \
+	 (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE && \
+	  KERNEL_VERSION(4, 9, 68) <= LINUX_VERSION_CODE))
 		int status = fence->error;
 #else
 		int status = fence->status;
diff --git a/drivers/gpu/arm/bifrost/mali_kbase_tlstream.c b/drivers/gpu/arm/bifrost/mali_kbase_tlstream.c
index 926d6b631469..2ff45f50bf16 100644
--- a/drivers/gpu/arm/bifrost/mali_kbase_tlstream.c
+++ b/drivers/gpu/arm/bifrost/mali_kbase_tlstream.c
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2015-2017 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2015-2018 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -155,6 +155,8 @@ enum tl_msg_id_obj {
 	KBASE_TL_EVENT_LPU_SOFTSTOP,
 	KBASE_TL_EVENT_ATOM_SOFTSTOP_EX,
 	KBASE_TL_EVENT_ATOM_SOFTSTOP_ISSUE,
+	KBASE_TL_EVENT_ATOM_SOFTJOB_START,
+	KBASE_TL_EVENT_ATOM_SOFTJOB_END,
 
 	/* Job dump specific events. */
 	KBASE_JD_GPU_SOFT_RESET
@@ -499,6 +501,20 @@ static const struct tp_desc tp_desc_obj[] = {
 		"@p",
 		"atom"
 	},
+	{
+		KBASE_TL_EVENT_ATOM_SOFTJOB_START,
+		__stringify(KBASE_TL_EVENT_ATOM_SOFTJOB_START),
+		"atom soft job has started",
+		"@p",
+		"atom"
+	},
+	{
+		KBASE_TL_EVENT_ATOM_SOFTJOB_END,
+		__stringify(KBASE_TL_EVENT_ATOM_SOFTJOB_END),
+		"atom soft job has completed",
+		"@p",
+		"atom"
+	},
 	{
 		KBASE_JD_GPU_SOFT_RESET,
 		__stringify(KBASE_JD_GPU_SOFT_RESET),
@@ -1042,17 +1058,17 @@ static void kbasep_tlstream_flush_stream(enum tl_stream_type stype)
 
 /**
  * kbasep_tlstream_autoflush_timer_callback - autoflush timer callback
- * @data:  unused
+ * @timer: unused
  *
  * Timer is executed periodically to check if any of the stream contains
  * buffer ready to be submitted to user space.
  */
-static void kbasep_tlstream_autoflush_timer_callback(unsigned long data)
+static void kbasep_tlstream_autoflush_timer_callback(struct timer_list *timer)
 {
 	enum tl_stream_type stype;
 	int                 rcode;
 
-	CSTD_UNUSED(data);
+	CSTD_UNUSED(timer);
 
 	for (stype = 0; stype < TL_STREAM_TYPE_COUNT; stype++) {
 		struct tl_stream *stream = tl_stream[stype];
@@ -1376,9 +1392,8 @@ int kbase_tlstream_init(void)
 
 	/* Initialize autoflush timer. */
 	atomic_set(&autoflush_timer_active, 0);
-	setup_timer(&autoflush_timer,
-			kbasep_tlstream_autoflush_timer_callback,
-			0);
+	kbase_timer_setup(&autoflush_timer,
+			  kbasep_tlstream_autoflush_timer_callback);
 
 	return 0;
 }
@@ -2365,6 +2380,52 @@ void __kbase_tlstream_tl_event_atom_softstop_issue(void *atom)
 	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
 }
 
+void __kbase_tlstream_tl_event_atom_softjob_start(void *atom)
+{
+	const u32     msg_id = KBASE_TL_EVENT_ATOM_SOFTJOB_START;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(atom);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_OBJ,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &atom, sizeof(atom));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
+}
+
+void __kbase_tlstream_tl_event_atom_softjob_end(void *atom)
+{
+	const u32     msg_id = KBASE_TL_EVENT_ATOM_SOFTJOB_END;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(atom);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_OBJ,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &atom, sizeof(atom));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
+}
+
 void __kbase_tlstream_jd_gpu_soft_reset(void *gpu)
 {
 	const u32     msg_id = KBASE_JD_GPU_SOFT_RESET;
diff --git a/drivers/gpu/arm/bifrost/mali_kbase_tlstream.h b/drivers/gpu/arm/bifrost/mali_kbase_tlstream.h
index f4369014d219..bfa25d98264a 100644
--- a/drivers/gpu/arm/bifrost/mali_kbase_tlstream.h
+++ b/drivers/gpu/arm/bifrost/mali_kbase_tlstream.h
@@ -147,6 +147,8 @@ void __kbase_tlstream_tl_attrib_as_config(
 void __kbase_tlstream_tl_event_atom_softstop_ex(void *atom);
 void __kbase_tlstream_tl_event_lpu_softstop(void *lpu);
 void __kbase_tlstream_tl_event_atom_softstop_issue(void *atom);
+void __kbase_tlstream_tl_event_atom_softjob_start(void *atom);
+void __kbase_tlstream_tl_event_atom_softjob_end(void *atom);
 void __kbase_tlstream_jd_gpu_soft_reset(void *gpu);
 void __kbase_tlstream_aux_pm_state(u32 core_type, u64 state);
 void __kbase_tlstream_aux_pagefault(u32 ctx_nr, u64 page_count_change);
@@ -515,26 +517,40 @@ extern atomic_t kbase_tlstream_enabled;
 	__TRACE_IF_ENABLED(tl_attrib_as_config, as, transtab, memattr, transcfg)
 
 /**
- * KBASE_TLSTREAM_TL_EVENT_ATOM_SOFTSTOP_ex
+ * KBASE_TLSTREAM_TL_EVENT_ATOM_SOFTSTOP_EX
  * @atom:       atom identifier
  */
 #define KBASE_TLSTREAM_TL_EVENT_ATOM_SOFTSTOP_EX(atom) \
 	__TRACE_IF_ENABLED(tl_event_atom_softstop_ex, atom)
 
 /**
- * KBASE_TLSTREAM_TL_EVENT_LPU_softstop
+ * KBASE_TLSTREAM_TL_EVENT_LPU_SOFTSTOP
  * @lpu:        name of the LPU object
  */
 #define KBASE_TLSTREAM_TL_EVENT_LPU_SOFTSTOP(lpu) \
 	__TRACE_IF_ENABLED(tl_event_lpu_softstop, lpu)
 
 /**
- * KBASE_TLSTREAM_TL_EVENT_ATOM_SOFTSTOP_issue
+ * KBASE_TLSTREAM_TL_EVENT_ATOM_SOFTSTOP_ISSUE
  * @atom:       atom identifier
  */
 #define KBASE_TLSTREAM_TL_EVENT_ATOM_SOFTSTOP_ISSUE(atom) \
 	__TRACE_IF_ENABLED(tl_event_atom_softstop_issue, atom)
 
+/**
+ * KBASE_TLSTREAM_TL_EVENT_ATOM_SOFTJOB_START
+ * @atom:       atom identifier
+ */
+#define KBASE_TLSTREAM_TL_EVENT_ATOM_SOFTJOB_START(atom) \
+	__TRACE_IF_ENABLED(tl_event_atom_softjob_start, atom)
+
+/**
+ * KBASE_TLSTREAM_TL_EVENT_ATOM_SOFTJOB_END
+ * @atom:       atom identifier
+ */
+#define KBASE_TLSTREAM_TL_EVENT_ATOM_SOFTJOB_END(atom) \
+	__TRACE_IF_ENABLED(tl_event_atom_softjob_end, atom)
+
 /**
  * KBASE_TLSTREAM_JD_GPU_SOFT_RESET - The GPU is being soft reset
  * @gpu:        name of the GPU object
diff --git a/drivers/gpu/arm/bifrost/mali_kbase_utility.h b/drivers/gpu/arm/bifrost/mali_kbase_utility.h
index d36285e26a68..f2e5a3381e13 100644
--- a/drivers/gpu/arm/bifrost/mali_kbase_utility.h
+++ b/drivers/gpu/arm/bifrost/mali_kbase_utility.h
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2012-2013, 2015 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2012-2013, 2015, 2018 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -39,4 +39,28 @@
  */
 bool kbasep_list_member_of(const struct list_head *base, struct list_head *entry);
 
+
+static inline void kbase_timer_setup(struct timer_list *timer,
+				     void (*callback)(struct timer_list *timer))
+{
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 14, 0)
+	setup_timer(timer, (void (*)(unsigned long)) callback,
+			(unsigned long) timer);
+#else
+	timer_setup(timer, callback, 0);
+#endif
+}
+
+#ifndef WRITE_ONCE
+	#ifdef ASSIGN_ONCE
+		#define WRITE_ONCE(x, val) ASSIGN_ONCE(val, x)
+	#else
+		#define WRITE_ONCE(x, val) (ACCESS_ONCE(x) = (val))
+	#endif
+#endif
+
+#ifndef READ_ONCE
+	#define READ_ONCE(x) ACCESS_ONCE(x)
+#endif
+
 #endif				/* _KBASE_UTILITY_H */
diff --git a/drivers/gpu/arm/bifrost/mali_kbase_vinstr.c b/drivers/gpu/arm/bifrost/mali_kbase_vinstr.c
index e7d33da7bc77..e25338b3544a 100644
--- a/drivers/gpu/arm/bifrost/mali_kbase_vinstr.c
+++ b/drivers/gpu/arm/bifrost/mali_kbase_vinstr.c
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2011-2017 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2011-2018 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -82,7 +82,9 @@ enum vinstr_state {
 
 /**
  * struct kbase_vinstr_context - vinstr context per device
- * @lock:              protects the entire vinstr context
+ * @lock:              protects the entire vinstr context, but the list of
+ *                     vinstr clients can be updated outside the lock using
+ *                     @state_lock.
  * @kbdev:             pointer to kbase device
  * @kctx:              pointer to kbase context
  * @vmap:              vinstr vmap for mapping hwcnt dump buffer
@@ -94,12 +96,14 @@ enum vinstr_state {
  * @reprogram:         when true, reprogram hwcnt block with the new set of
  *                     counters
  * @state:             vinstr state
- * @state_lock:        protects information about vinstr state
+ * @state_lock:        protects information about vinstr state and list of
+ *                     clients.
  * @suspend_waitq:     notification queue to trigger state re-validation
  * @suspend_cnt:       reference counter of vinstr's suspend state
  * @suspend_work:      worker to execute on entering suspended state
  * @resume_work:       worker to execute on leaving suspended state
- * @nclients:          number of attached clients, pending or otherwise
+ * @nclients:          number of attached clients, pending or idle
+ * @nclients_suspended: number of attached but suspended clients
  * @waiting_clients:   head of list of clients being periodically sampled
  * @idle_clients:      head of list of clients being idle
  * @suspended_clients: head of list of clients being suspended
@@ -109,6 +113,10 @@ enum vinstr_state {
  * @clients_present:   when true, we have at least one client
  *                     Note: this variable is in sync. with nclients and is
  *                     present to preserve simplicity. Protected by state_lock.
+ * @need_suspend:      when true, a suspend has been requested while a resume is
+ *                     in progress. Resume worker should queue a suspend.
+ * @need_resume:       when true, a resume has been requested while a suspend is
+ *                     in progress. Suspend worker should queue a resume.
  */
 struct kbase_vinstr_context {
 	struct mutex             lock;
@@ -130,6 +138,7 @@ struct kbase_vinstr_context {
 	struct work_struct       resume_work;
 
 	u32                      nclients;
+	u32                      nclients_suspended;
 	struct list_head         waiting_clients;
 	struct list_head         idle_clients;
 	struct list_head         suspended_clients;
@@ -139,6 +148,9 @@ struct kbase_vinstr_context {
 	atomic_t                 request_pending;
 
 	bool                     clients_present;
+
+	bool                     need_suspend;
+	bool                     need_resume;
 };
 
 /**
@@ -161,6 +173,7 @@ struct kbase_vinstr_context {
  * @write_idx:     index of buffer being written by dumping service
  * @waitq:         client's notification queue
  * @pending:       when true, client has attached but hwcnt not yet updated
+ * @suspended:     when true, client is suspended
  */
 struct kbase_vinstr_client {
 	struct kbase_vinstr_context        *vinstr_ctx;
@@ -181,6 +194,7 @@ struct kbase_vinstr_client {
 	atomic_t                           write_idx;
 	wait_queue_head_t                  waitq;
 	bool                               pending;
+	bool                               suspended;
 };
 
 /**
@@ -195,6 +209,9 @@ struct kbasep_vinstr_wake_up_timer {
 
 /*****************************************************************************/
 
+static void kbase_vinstr_update_suspend(
+		struct kbase_vinstr_context *vinstr_ctx);
+
 static int kbasep_vinstr_service_task(void *data);
 
 static unsigned int kbasep_vinstr_hwcnt_reader_poll(
@@ -226,14 +243,14 @@ static int enable_hwcnt(struct kbase_vinstr_context *vinstr_ctx)
 {
 	struct kbase_context *kctx = vinstr_ctx->kctx;
 	struct kbase_device *kbdev = kctx->kbdev;
-	struct kbase_uk_hwcnt_setup setup;
+	struct kbase_ioctl_hwcnt_enable enable;
 	int err;
 
-	setup.dump_buffer = vinstr_ctx->gpu_va;
-	setup.jm_bm       = vinstr_ctx->bitmap[JM_HWCNT_BM];
-	setup.tiler_bm    = vinstr_ctx->bitmap[TILER_HWCNT_BM];
-	setup.shader_bm   = vinstr_ctx->bitmap[SHADER_HWCNT_BM];
-	setup.mmu_l2_bm   = vinstr_ctx->bitmap[MMU_L2_HWCNT_BM];
+	enable.dump_buffer = vinstr_ctx->gpu_va;
+	enable.jm_bm       = vinstr_ctx->bitmap[JM_HWCNT_BM];
+	enable.tiler_bm    = vinstr_ctx->bitmap[TILER_HWCNT_BM];
+	enable.shader_bm   = vinstr_ctx->bitmap[SHADER_HWCNT_BM];
+	enable.mmu_l2_bm   = vinstr_ctx->bitmap[MMU_L2_HWCNT_BM];
 
 	/* Mark the context as active so the GPU is kept turned on */
 	/* A suspend won't happen here, because we're in a syscall from a
@@ -242,7 +259,7 @@ static int enable_hwcnt(struct kbase_vinstr_context *vinstr_ctx)
 
 	/* Schedule the context in */
 	kbasep_js_schedule_privileged_ctx(kbdev, kctx);
-	err = kbase_instr_hwcnt_enable_internal(kbdev, kctx, &setup);
+	err = kbase_instr_hwcnt_enable_internal(kbdev, kctx, &enable);
 	if (err) {
 		/* Release the context. This had its own Power Manager Active
 		 * reference */
@@ -472,18 +489,24 @@ static void kbasep_vinstr_destroy_kctx(struct kbase_vinstr_context *vinstr_ctx)
 	struct kbasep_kctx_list_element *element;
 	struct kbasep_kctx_list_element *tmp;
 	bool                            found = false;
+	bool                            hwcnt_disabled = false;
 	unsigned long                   flags;
 
 	/* Release hw counters dumping resources. */
 	vinstr_ctx->thread = NULL;
-	disable_hwcnt(vinstr_ctx);
-	kbasep_vinstr_unmap_kernel_dump_buffer(vinstr_ctx);
 
 	/* Simplify state transitions by specifying that we have no clients. */
 	spin_lock_irqsave(&vinstr_ctx->state_lock, flags);
 	vinstr_ctx->clients_present = false;
+	if ((VINSTR_SUSPENDED == vinstr_ctx->state) || (VINSTR_RESUMING == vinstr_ctx->state))
+		hwcnt_disabled = true;
 	spin_unlock_irqrestore(&vinstr_ctx->state_lock, flags);
 
+	if (!hwcnt_disabled)
+		disable_hwcnt(vinstr_ctx);
+
+	kbasep_vinstr_unmap_kernel_dump_buffer(vinstr_ctx);
+
 	/* Remove kernel context from the device's contexts list. */
 	mutex_lock(&kbdev->kctx_list_lock);
 	list_for_each_entry_safe(element, tmp, &kbdev->kctx_list, link) {
@@ -523,6 +546,8 @@ static struct kbase_vinstr_client *kbasep_vinstr_attach_client(
 {
 	struct task_struct         *thread = NULL;
 	struct kbase_vinstr_client *cli;
+	unsigned long flags;
+	bool clients_present = false;
 
 	KBASE_DEBUG_ASSERT(vinstr_ctx);
 
@@ -548,10 +573,14 @@ static struct kbase_vinstr_client *kbasep_vinstr_attach_client(
 	hwcnt_bitmap_union(vinstr_ctx->bitmap, cli->bitmap);
 	vinstr_ctx->reprogram = true;
 
+	spin_lock_irqsave(&vinstr_ctx->state_lock, flags);
+	clients_present = (vinstr_ctx->nclients || vinstr_ctx->nclients_suspended);
+	spin_unlock_irqrestore(&vinstr_ctx->state_lock, flags);
+
 	/* If this is the first client, create the vinstr kbase
 	 * context. This context is permanently resident until the
 	 * last client exits. */
-	if (!vinstr_ctx->nclients) {
+	if (!clients_present) {
 		hwcnt_bitmap_set(vinstr_ctx->bitmap, cli->bitmap);
 		if (kbasep_vinstr_create_kctx(vinstr_ctx) < 0)
 			goto error;
@@ -606,8 +635,11 @@ static struct kbase_vinstr_client *kbasep_vinstr_attach_client(
 	atomic_set(&cli->write_idx, 0);
 	init_waitqueue_head(&cli->waitq);
 
+	spin_lock_irqsave(&vinstr_ctx->state_lock, flags);
 	vinstr_ctx->nclients++;
 	list_add(&cli->list, &vinstr_ctx->idle_clients);
+	kbase_vinstr_update_suspend(vinstr_ctx);
+	spin_unlock_irqrestore(&vinstr_ctx->state_lock, flags);
 
 	mutex_unlock(&vinstr_ctx->lock);
 
@@ -620,7 +652,7 @@ static struct kbase_vinstr_client *kbasep_vinstr_attach_client(
 				(unsigned long)cli->dump_buffers,
 				get_order(cli->dump_size * cli->buffer_count));
 	kfree(cli->accum_buffer);
-	if (!vinstr_ctx->nclients && vinstr_ctx->kctx) {
+	if (!clients_present && vinstr_ctx->kctx) {
 		thread = vinstr_ctx->thread;
 		kbasep_vinstr_destroy_kctx(vinstr_ctx);
 	}
@@ -642,18 +674,19 @@ void kbase_vinstr_detach_client(struct kbase_vinstr_client *cli)
 	struct task_struct          *thread = NULL;
 	u32 zerobitmap[4] = { 0 };
 	int cli_found = 0;
+	unsigned long flags;
+	bool clients_present;
 
 	KBASE_DEBUG_ASSERT(cli);
 	vinstr_ctx = cli->vinstr_ctx;
 	KBASE_DEBUG_ASSERT(vinstr_ctx);
 
 	mutex_lock(&vinstr_ctx->lock);
+	spin_lock_irqsave(&vinstr_ctx->state_lock, flags);
 
 	list_for_each_entry_safe(iter, tmp, &vinstr_ctx->idle_clients, list) {
 		if (iter == cli) {
-			vinstr_ctx->reprogram = true;
 			cli_found = 1;
-			list_del(&iter->list);
 			break;
 		}
 	}
@@ -661,15 +694,47 @@ void kbase_vinstr_detach_client(struct kbase_vinstr_client *cli)
 		list_for_each_entry_safe(
 				iter, tmp, &vinstr_ctx->waiting_clients, list) {
 			if (iter == cli) {
-				vinstr_ctx->reprogram = true;
 				cli_found = 1;
-				list_del(&iter->list);
+				break;
+			}
+		}
+	}
+	if (!cli_found) {
+		list_for_each_entry_safe(
+				iter, tmp, &vinstr_ctx->suspended_clients, list) {
+			if (iter == cli) {
+				cli_found = 1;
 				break;
 			}
 		}
 	}
 	KBASE_DEBUG_ASSERT(cli_found);
 
+	if (cli_found) {
+		vinstr_ctx->reprogram = true;
+		list_del(&iter->list);
+	}
+
+	if (!cli->suspended)
+		vinstr_ctx->nclients--;
+	else
+		vinstr_ctx->nclients_suspended--;
+
+	kbase_vinstr_update_suspend(vinstr_ctx);
+
+	clients_present = (vinstr_ctx->nclients || vinstr_ctx->nclients_suspended);
+
+	/* Rebuild context bitmap now that the client has detached */
+	hwcnt_bitmap_set(vinstr_ctx->bitmap, zerobitmap);
+	list_for_each_entry(iter, &vinstr_ctx->idle_clients, list)
+		hwcnt_bitmap_union(vinstr_ctx->bitmap, iter->bitmap);
+	list_for_each_entry(iter, &vinstr_ctx->waiting_clients, list)
+		hwcnt_bitmap_union(vinstr_ctx->bitmap, iter->bitmap);
+	list_for_each_entry(iter, &vinstr_ctx->suspended_clients, list)
+		hwcnt_bitmap_union(vinstr_ctx->bitmap, iter->bitmap);
+
+	spin_unlock_irqrestore(&vinstr_ctx->state_lock, flags);
+
 	kfree(cli->dump_buffers_meta);
 	free_pages(
 			(unsigned long)cli->dump_buffers,
@@ -677,19 +742,11 @@ void kbase_vinstr_detach_client(struct kbase_vinstr_client *cli)
 	kfree(cli->accum_buffer);
 	kfree(cli);
 
-	vinstr_ctx->nclients--;
-	if (!vinstr_ctx->nclients) {
+	if (!clients_present) {
 		thread = vinstr_ctx->thread;
 		kbasep_vinstr_destroy_kctx(vinstr_ctx);
 	}
 
-	/* Rebuild context bitmap now that the client has detached */
-	hwcnt_bitmap_set(vinstr_ctx->bitmap, zerobitmap);
-	list_for_each_entry(iter, &vinstr_ctx->idle_clients, list)
-		hwcnt_bitmap_union(vinstr_ctx->bitmap, iter->bitmap);
-	list_for_each_entry(iter, &vinstr_ctx->waiting_clients, list)
-		hwcnt_bitmap_union(vinstr_ctx->bitmap, iter->bitmap);
-
 	mutex_unlock(&vinstr_ctx->lock);
 
 	/* Thread must be stopped after lock is released. */
@@ -978,8 +1035,7 @@ static int kbasep_vinstr_collect_and_accumulate(
 	WARN_ON(rcode);
 
 	spin_lock_irqsave(&vinstr_ctx->state_lock, flags);
-	switch (vinstr_ctx->state)
-	{
+	switch (vinstr_ctx->state) {
 	case VINSTR_SUSPENDING:
 		schedule_work(&vinstr_ctx->suspend_work);
 		break;
@@ -990,12 +1046,13 @@ static int kbasep_vinstr_collect_and_accumulate(
 	default:
 		break;
 	}
-	spin_unlock_irqrestore(&vinstr_ctx->state_lock, flags);
 
 	/* Accumulate values of collected counters. */
 	if (!rcode)
 		accum_clients(vinstr_ctx);
 
+	spin_unlock_irqrestore(&vinstr_ctx->state_lock, flags);
+
 	return rcode;
 }
 
@@ -1100,6 +1157,7 @@ static void kbasep_vinstr_reprogram(
 
 		if (!reprogram_hwcnt(vinstr_ctx)) {
 			vinstr_ctx->reprogram = false;
+			spin_lock_irqsave(&vinstr_ctx->state_lock, flags);
 			list_for_each_entry(
 					iter,
 					&vinstr_ctx->idle_clients,
@@ -1110,6 +1168,7 @@ static void kbasep_vinstr_reprogram(
 					&vinstr_ctx->waiting_clients,
 					list)
 				iter->pending = false;
+			spin_unlock_irqrestore(&vinstr_ctx->state_lock, flags);
 		}
 	}
 }
@@ -1128,6 +1187,7 @@ static int kbasep_vinstr_update_client(
 		enum base_hwcnt_reader_event event_id)
 {
 	int rcode = 0;
+	unsigned long flags;
 
 	/* Copy collected counters to user readable buffer. */
 	if (cli->buffer_count)
@@ -1138,18 +1198,23 @@ static int kbasep_vinstr_update_client(
 	else
 		rcode = kbasep_vinstr_fill_dump_buffer_legacy(cli);
 
+	/* Prepare for next request. */
+	memset(cli->accum_buffer, 0, cli->dump_size);
+
+	spin_lock_irqsave(&cli->vinstr_ctx->state_lock, flags);
+	/* Check if client was put to suspend state while it was being updated */
+	if (cli->suspended)
+		rcode = -EINVAL;
+	spin_unlock_irqrestore(&cli->vinstr_ctx->state_lock, flags);
+
 	if (rcode)
 		goto exit;
 
-
 	/* Notify client. Make sure all changes to memory are visible. */
 	wmb();
 	atomic_inc(&cli->write_idx);
 	wake_up_interruptible(&cli->waitq);
 
-	/* Prepare for next request. */
-	memset(cli->accum_buffer, 0, cli->dump_size);
-
 exit:
 	return rcode;
 }
@@ -1208,6 +1273,7 @@ static int kbasep_vinstr_service_task(void *data)
 		struct kbase_vinstr_client *cli = NULL;
 		struct kbase_vinstr_client *tmp;
 		int                        rcode;
+		unsigned long              flags;
 
 		u64              timestamp = kbasep_vinstr_get_timestamp();
 		u64              dump_time = 0;
@@ -1220,6 +1286,7 @@ static int kbasep_vinstr_service_task(void *data)
 		if (current == vinstr_ctx->thread) {
 			atomic_set(&vinstr_ctx->request_pending, 0);
 
+			spin_lock_irqsave(&vinstr_ctx->state_lock, flags);
 			if (!list_empty(&vinstr_ctx->waiting_clients)) {
 				cli = list_first_entry(
 						&vinstr_ctx->waiting_clients,
@@ -1227,6 +1294,7 @@ static int kbasep_vinstr_service_task(void *data)
 						list);
 				dump_time = cli->dump_time;
 			}
+			spin_unlock_irqrestore(&vinstr_ctx->state_lock, flags);
 		}
 
 		if (!cli || ((s64)timestamp - (s64)dump_time < 0ll)) {
@@ -1255,6 +1323,7 @@ static int kbasep_vinstr_service_task(void *data)
 
 		INIT_LIST_HEAD(&expired_requests);
 
+		spin_lock_irqsave(&vinstr_ctx->state_lock, flags);
 		/* Find all expired requests. */
 		list_for_each_entry_safe(
 				cli,
@@ -1273,18 +1342,29 @@ static int kbasep_vinstr_service_task(void *data)
 		}
 
 		/* Fill data for each request found. */
-		list_for_each_entry_safe(cli, tmp, &expired_requests, list) {
+		while (!list_empty(&expired_requests)) {
+			cli = list_first_entry(&expired_requests,
+					struct kbase_vinstr_client, list);
+
 			/* Ensure that legacy buffer will not be used from
 			 * this kthread context. */
 			BUG_ON(0 == cli->buffer_count);
 			/* Expect only periodically sampled clients. */
 			BUG_ON(0 == cli->dump_interval);
 
+			/* Release the spinlock, as filling the data in client's
+			 * userspace buffer could result in page faults. */
+			spin_unlock_irqrestore(&vinstr_ctx->state_lock, flags);
 			if (!rcode)
 				kbasep_vinstr_update_client(
 						cli,
 						timestamp,
 						BASE_HWCNT_READER_EVENT_PERIODIC);
+			spin_lock_irqsave(&cli->vinstr_ctx->state_lock, flags);
+
+			/* This client got suspended, move to the next one. */
+			if (cli->suspended)
+				continue;
 
 			/* Set new dumping time. Drop missed probing times. */
 			do {
@@ -1296,6 +1376,7 @@ static int kbasep_vinstr_service_task(void *data)
 					cli,
 					&vinstr_ctx->waiting_clients);
 		}
+		spin_unlock_irqrestore(&vinstr_ctx->state_lock, flags);
 
 		/* Reprogram counters set if required. */
 		kbasep_vinstr_reprogram(vinstr_ctx);
@@ -1410,10 +1491,18 @@ static long kbasep_vinstr_hwcnt_reader_ioctl_set_interval(
 		struct kbase_vinstr_client *cli, u32 interval)
 {
 	struct kbase_vinstr_context *vinstr_ctx = cli->vinstr_ctx;
+	unsigned long flags;
 
 	KBASE_DEBUG_ASSERT(vinstr_ctx);
 
 	mutex_lock(&vinstr_ctx->lock);
+	spin_lock_irqsave(&vinstr_ctx->state_lock, flags);
+
+	if (cli->suspended) {
+		spin_unlock_irqrestore(&vinstr_ctx->state_lock, flags);
+		mutex_unlock(&vinstr_ctx->lock);
+		return -ENOMEM;
+	}
 
 	list_del(&cli->list);
 
@@ -1435,6 +1524,7 @@ static long kbasep_vinstr_hwcnt_reader_ioctl_set_interval(
 		list_add(&cli->list, &vinstr_ctx->idle_clients);
 	}
 
+	spin_unlock_irqrestore(&vinstr_ctx->state_lock, flags);
 	mutex_unlock(&vinstr_ctx->lock);
 
 	return 0;
@@ -1739,17 +1829,29 @@ static void kbasep_vinstr_suspend_worker(struct work_struct *data)
 	spin_lock_irqsave(&vinstr_ctx->state_lock, flags);
 	vinstr_ctx->state = VINSTR_SUSPENDED;
 	wake_up_all(&vinstr_ctx->suspend_waitq);
-	spin_unlock_irqrestore(&vinstr_ctx->state_lock, flags);
 
-	mutex_unlock(&vinstr_ctx->lock);
+	if (vinstr_ctx->need_resume) {
+		vinstr_ctx->need_resume = false;
+		vinstr_ctx->state = VINSTR_RESUMING;
+		schedule_work(&vinstr_ctx->resume_work);
 
-	/* Kick GPU scheduler to allow entering protected mode.
-	 * This must happen after vinstr was suspended. */
-	kbasep_vinstr_kick_scheduler(vinstr_ctx->kbdev);
+		spin_unlock_irqrestore(&vinstr_ctx->state_lock, flags);
+
+		mutex_unlock(&vinstr_ctx->lock);
+	} else {
+		spin_unlock_irqrestore(&vinstr_ctx->state_lock, flags);
+
+		mutex_unlock(&vinstr_ctx->lock);
+
+		/* Kick GPU scheduler to allow entering protected mode.
+		 * This must happen after vinstr was suspended.
+		 */
+		kbasep_vinstr_kick_scheduler(vinstr_ctx->kbdev);
+	}
 }
 
 /**
- * kbasep_vinstr_suspend_worker - worker resuming vinstr module
+ * kbasep_vinstr_resume_worker - worker resuming vinstr module
  * @data: pointer to work structure
  */
 static void kbasep_vinstr_resume_worker(struct work_struct *data)
@@ -1768,15 +1870,27 @@ static void kbasep_vinstr_resume_worker(struct work_struct *data)
 	spin_lock_irqsave(&vinstr_ctx->state_lock, flags);
 	vinstr_ctx->state = VINSTR_IDLE;
 	wake_up_all(&vinstr_ctx->suspend_waitq);
-	spin_unlock_irqrestore(&vinstr_ctx->state_lock, flags);
 
-	mutex_unlock(&vinstr_ctx->lock);
+	if (vinstr_ctx->need_suspend) {
+		vinstr_ctx->need_suspend = false;
+		vinstr_ctx->state = VINSTR_SUSPENDING;
+		schedule_work(&vinstr_ctx->suspend_work);
 
-	/* Kick GPU scheduler to allow entering protected mode.
-	 * Note that scheduler state machine might requested re-entry to
-	 * protected mode before vinstr was resumed.
-	 * This must happen after vinstr was release. */
-	kbasep_vinstr_kick_scheduler(vinstr_ctx->kbdev);
+		spin_unlock_irqrestore(&vinstr_ctx->state_lock, flags);
+
+		mutex_unlock(&vinstr_ctx->lock);
+	} else {
+		spin_unlock_irqrestore(&vinstr_ctx->state_lock, flags);
+
+		mutex_unlock(&vinstr_ctx->lock);
+
+		/* Kick GPU scheduler to allow entering protected mode.
+		 * Note that scheduler state machine might requested re-entry to
+		 * protected mode before vinstr was resumed.
+		 * This must happen after vinstr was release.
+		 */
+		kbasep_vinstr_kick_scheduler(vinstr_ctx->kbdev);
+	}
 }
 
 /*****************************************************************************/
@@ -1791,6 +1905,7 @@ struct kbase_vinstr_context *kbase_vinstr_init(struct kbase_device *kbdev)
 
 	INIT_LIST_HEAD(&vinstr_ctx->idle_clients);
 	INIT_LIST_HEAD(&vinstr_ctx->waiting_clients);
+	INIT_LIST_HEAD(&vinstr_ctx->suspended_clients);
 	mutex_init(&vinstr_ctx->lock);
 	spin_lock_init(&vinstr_ctx->state_lock);
 	vinstr_ctx->kbdev = kbdev;
@@ -1824,27 +1939,35 @@ void kbase_vinstr_term(struct kbase_vinstr_context *vinstr_ctx)
 
 		if (list_empty(list)) {
 			list = &vinstr_ctx->waiting_clients;
-			if (list_empty(list))
-				break;
+			if (list_empty(list)) {
+				list = &vinstr_ctx->suspended_clients;
+				if (list_empty(list))
+					break;
+			}
 		}
 
 		cli = list_first_entry(list, struct kbase_vinstr_client, list);
 		list_del(&cli->list);
+		if (!cli->suspended)
+			vinstr_ctx->nclients--;
+		else
+			vinstr_ctx->nclients_suspended--;
 		kfree(cli->accum_buffer);
 		kfree(cli);
-		vinstr_ctx->nclients--;
 	}
 	KBASE_DEBUG_ASSERT(!vinstr_ctx->nclients);
+	KBASE_DEBUG_ASSERT(!vinstr_ctx->nclients_suspended);
 	if (vinstr_ctx->kctx)
 		kbasep_vinstr_destroy_kctx(vinstr_ctx);
 	kfree(vinstr_ctx);
 }
 
 int kbase_vinstr_hwcnt_reader_setup(struct kbase_vinstr_context *vinstr_ctx,
-		struct kbase_uk_hwcnt_reader_setup *setup)
+		struct kbase_ioctl_hwcnt_reader_setup *setup)
 {
 	struct kbase_vinstr_client  *cli;
 	u32                         bitmap[4];
+	int                         fd;
 
 	KBASE_DEBUG_ASSERT(vinstr_ctx);
 	KBASE_DEBUG_ASSERT(setup);
@@ -1859,31 +1982,32 @@ int kbase_vinstr_hwcnt_reader_setup(struct kbase_vinstr_context *vinstr_ctx,
 			vinstr_ctx,
 			setup->buffer_count,
 			bitmap,
-			&setup->fd,
+			&fd,
 			NULL);
 
 	if (!cli)
 		return -ENOMEM;
 
-	return 0;
+	kbase_vinstr_wait_for_ready(vinstr_ctx);
+	return fd;
 }
 
 int kbase_vinstr_legacy_hwc_setup(
 		struct kbase_vinstr_context *vinstr_ctx,
 		struct kbase_vinstr_client  **cli,
-		struct kbase_uk_hwcnt_setup *setup)
+		struct kbase_ioctl_hwcnt_enable *enable)
 {
 	KBASE_DEBUG_ASSERT(vinstr_ctx);
-	KBASE_DEBUG_ASSERT(setup);
+	KBASE_DEBUG_ASSERT(enable);
 	KBASE_DEBUG_ASSERT(cli);
 
-	if (setup->dump_buffer) {
+	if (enable->dump_buffer) {
 		u32 bitmap[4];
 
-		bitmap[SHADER_HWCNT_BM] = setup->shader_bm;
-		bitmap[TILER_HWCNT_BM]  = setup->tiler_bm;
-		bitmap[MMU_L2_HWCNT_BM] = setup->mmu_l2_bm;
-		bitmap[JM_HWCNT_BM]     = setup->jm_bm;
+		bitmap[SHADER_HWCNT_BM] = enable->shader_bm;
+		bitmap[TILER_HWCNT_BM]  = enable->tiler_bm;
+		bitmap[MMU_L2_HWCNT_BM] = enable->mmu_l2_bm;
+		bitmap[JM_HWCNT_BM]     = enable->jm_bm;
 
 		if (*cli)
 			return -EBUSY;
@@ -1892,11 +2016,13 @@ int kbase_vinstr_legacy_hwc_setup(
 				vinstr_ctx,
 				0,
 				bitmap,
-				(void *)(long)setup->dump_buffer,
+				(void *)(uintptr_t)enable->dump_buffer,
 				NULL);
 
 		if (!(*cli))
 			return -ENOMEM;
+
+		kbase_vinstr_wait_for_ready(vinstr_ctx);
 	} else {
 		if (!*cli)
 			return -EINVAL;
@@ -1910,9 +2036,10 @@ int kbase_vinstr_legacy_hwc_setup(
 
 struct kbase_vinstr_client *kbase_vinstr_hwcnt_kernel_setup(
 		struct kbase_vinstr_context *vinstr_ctx,
-		struct kbase_uk_hwcnt_reader_setup *setup,
+		struct kbase_ioctl_hwcnt_reader_setup *setup,
 		void *kernel_buffer)
 {
+	struct kbase_vinstr_client *kernel_client;
 	u32 bitmap[4];
 
 	if (!vinstr_ctx || !setup || !kernel_buffer)
@@ -1923,12 +2050,17 @@ struct kbase_vinstr_client *kbase_vinstr_hwcnt_kernel_setup(
 	bitmap[MMU_L2_HWCNT_BM] = setup->mmu_l2_bm;
 	bitmap[JM_HWCNT_BM]     = setup->jm_bm;
 
-	return kbasep_vinstr_attach_client(
-			vinstr_ctx,
-			0,
-			bitmap,
-			NULL,
-			kernel_buffer);
+	kernel_client = kbasep_vinstr_attach_client(
+				vinstr_ctx,
+				0,
+				bitmap,
+				NULL,
+				kernel_buffer);
+
+	if (kernel_client)
+		kbase_vinstr_wait_for_ready(vinstr_ctx);
+
+	return kernel_client;
 }
 KBASE_EXPORT_TEST_API(kbase_vinstr_hwcnt_kernel_setup);
 
@@ -2036,13 +2168,42 @@ int kbase_vinstr_try_suspend(struct kbase_vinstr_context *vinstr_ctx)
 		vinstr_ctx->state = VINSTR_SUSPENDING;
 		break;
 
-	case VINSTR_SUSPENDING:
-		/* fall through */
 	case VINSTR_RESUMING:
+		vinstr_ctx->need_suspend = true;
+		break;
+
+	case VINSTR_SUSPENDING:
 		break;
 
 	default:
-		BUG();
+		KBASE_DEBUG_ASSERT(0);
+		break;
+	}
+	spin_unlock_irqrestore(&vinstr_ctx->state_lock, flags);
+
+	return ret;
+}
+
+static int kbase_vinstr_is_ready(struct kbase_vinstr_context *vinstr_ctx)
+{
+	unsigned long flags;
+	int ret = -EAGAIN;
+
+	KBASE_DEBUG_ASSERT(vinstr_ctx);
+
+	spin_lock_irqsave(&vinstr_ctx->state_lock, flags);
+	switch (vinstr_ctx->state) {
+	case VINSTR_SUSPENDED:
+	case VINSTR_RESUMING:
+	case VINSTR_SUSPENDING:
+		break;
+
+	case VINSTR_IDLE:
+	case VINSTR_DUMPING:
+		ret = 0;
+		break;
+	default:
+		KBASE_DEBUG_ASSERT(0);
 		break;
 	}
 	spin_unlock_irqrestore(&vinstr_ctx->state_lock, flags);
@@ -2056,6 +2217,58 @@ void kbase_vinstr_suspend(struct kbase_vinstr_context *vinstr_ctx)
 			(0 == kbase_vinstr_try_suspend(vinstr_ctx)));
 }
 
+void kbase_vinstr_wait_for_ready(struct kbase_vinstr_context *vinstr_ctx)
+{
+	wait_event(vinstr_ctx->suspend_waitq,
+			(0 == kbase_vinstr_is_ready(vinstr_ctx)));
+}
+KBASE_EXPORT_TEST_API(kbase_vinstr_wait_for_ready);
+
+/**
+ * kbase_vinstr_update_suspend - Update vinstr suspend/resume status depending
+ *                               on nclients
+ * @vinstr_ctx: vinstr context pointer
+ *
+ * This function should be called whenever vinstr_ctx->nclients changes. This
+ * may cause vinstr to be suspended or resumed, depending on the number of
+ * clients and whether IPA is suspended or not.
+ */
+static void kbase_vinstr_update_suspend(struct kbase_vinstr_context *vinstr_ctx)
+{
+	lockdep_assert_held(&vinstr_ctx->state_lock);
+
+	switch (vinstr_ctx->state) {
+	case VINSTR_SUSPENDED:
+		if ((vinstr_ctx->nclients) && (0 == vinstr_ctx->suspend_cnt)) {
+			vinstr_ctx->state = VINSTR_RESUMING;
+			schedule_work(&vinstr_ctx->resume_work);
+		}
+		break;
+
+	case VINSTR_SUSPENDING:
+		if (vinstr_ctx->nclients)
+			vinstr_ctx->need_resume = true;
+		break;
+
+	case VINSTR_IDLE:
+		if (!vinstr_ctx->nclients) {
+			vinstr_ctx->state = VINSTR_SUSPENDING;
+			schedule_work(&vinstr_ctx->suspend_work);
+		}
+		break;
+
+	case VINSTR_DUMPING:
+		if (!vinstr_ctx->nclients)
+			vinstr_ctx->state = VINSTR_SUSPENDING;
+		break;
+
+	case VINSTR_RESUMING:
+		if (!vinstr_ctx->nclients)
+			vinstr_ctx->need_suspend = true;
+		break;
+	}
+}
+
 void kbase_vinstr_resume(struct kbase_vinstr_context *vinstr_ctx)
 {
 	unsigned long flags;
@@ -2078,3 +2291,45 @@ void kbase_vinstr_resume(struct kbase_vinstr_context *vinstr_ctx)
 	}
 	spin_unlock_irqrestore(&vinstr_ctx->state_lock, flags);
 }
+
+void kbase_vinstr_suspend_client(struct kbase_vinstr_client *client)
+{
+	struct kbase_vinstr_context *vinstr_ctx = client->vinstr_ctx;
+	unsigned long flags;
+
+	spin_lock_irqsave(&vinstr_ctx->state_lock, flags);
+
+	if (!client->suspended) {
+		list_del(&client->list);
+		list_add(&client->list, &vinstr_ctx->suspended_clients);
+
+		vinstr_ctx->nclients--;
+		vinstr_ctx->nclients_suspended++;
+		kbase_vinstr_update_suspend(vinstr_ctx);
+
+		client->suspended = true;
+	}
+
+	spin_unlock_irqrestore(&vinstr_ctx->state_lock, flags);
+}
+
+void kbase_vinstr_resume_client(struct kbase_vinstr_client *client)
+{
+	struct kbase_vinstr_context *vinstr_ctx = client->vinstr_ctx;
+	unsigned long flags;
+
+	spin_lock_irqsave(&vinstr_ctx->state_lock, flags);
+
+	if (client->suspended) {
+		list_del(&client->list);
+		list_add(&client->list, &vinstr_ctx->idle_clients);
+
+		vinstr_ctx->nclients++;
+		vinstr_ctx->nclients_suspended--;
+		kbase_vinstr_update_suspend(vinstr_ctx);
+
+		client->suspended = false;
+	}
+
+	spin_unlock_irqrestore(&vinstr_ctx->state_lock, flags);
+}
diff --git a/drivers/gpu/arm/bifrost/mali_kbase_vinstr.h b/drivers/gpu/arm/bifrost/mali_kbase_vinstr.h
index af7c7b68aa26..d32799f74084 100644
--- a/drivers/gpu/arm/bifrost/mali_kbase_vinstr.h
+++ b/drivers/gpu/arm/bifrost/mali_kbase_vinstr.h
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2015-2017 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2015-2018 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -24,35 +24,13 @@
 #define _KBASE_VINSTR_H_
 
 #include <mali_kbase_hwcnt_reader.h>
+#include <mali_kbase_ioctl.h>
 
 /*****************************************************************************/
 
 struct kbase_vinstr_context;
 struct kbase_vinstr_client;
 
-struct kbase_uk_hwcnt_setup {
-	/* IN */
-	u64 dump_buffer;
-	u32 jm_bm;
-	u32 shader_bm;
-	u32 tiler_bm;
-	u32 unused_1; /* keep for backwards compatibility */
-	u32 mmu_l2_bm;
-	u32 padding;
-	/* OUT */
-};
-
-struct kbase_uk_hwcnt_reader_setup {
-	/* IN */
-	u32 buffer_count;
-	u32 jm_bm;
-	u32 shader_bm;
-	u32 tiler_bm;
-	u32 mmu_l2_bm;
-
-	/* OUT */
-	s32 fd;
-};
 /*****************************************************************************/
 
 /**
@@ -74,24 +52,24 @@ void kbase_vinstr_term(struct kbase_vinstr_context *vinstr_ctx);
  * @vinstr_ctx: vinstr context
  * @setup:      reader's configuration
  *
- * Return: zero on success
+ * Return: file descriptor on success and a (negative) error code otherwise
  */
 int kbase_vinstr_hwcnt_reader_setup(
 		struct kbase_vinstr_context        *vinstr_ctx,
-		struct kbase_uk_hwcnt_reader_setup *setup);
+		struct kbase_ioctl_hwcnt_reader_setup *setup);
 
 /**
  * kbase_vinstr_legacy_hwc_setup - configure hw counters for dumping
  * @vinstr_ctx: vinstr context
  * @cli:        pointer where to store pointer to new vinstr client structure
- * @setup:      hwc configuration
+ * @enable:      hwc configuration
  *
  * Return: zero on success
  */
 int kbase_vinstr_legacy_hwc_setup(
 		struct kbase_vinstr_context *vinstr_ctx,
 		struct kbase_vinstr_client  **cli,
-		struct kbase_uk_hwcnt_setup *setup);
+		struct kbase_ioctl_hwcnt_enable *enable);
 
 /**
  * kbase_vinstr_hwcnt_kernel_setup - configure hw counters for kernel side
@@ -100,13 +78,13 @@ int kbase_vinstr_legacy_hwc_setup(
  * @setup:         reader's configuration
  * @kernel_buffer: pointer to dump buffer
  *
- * setup->buffer_count and setup->fd are not used for kernel side clients.
+ * setup->buffer_count is not used for kernel side clients.
  *
  * Return: pointer to client structure, or NULL on failure
  */
 struct kbase_vinstr_client *kbase_vinstr_hwcnt_kernel_setup(
 		struct kbase_vinstr_context *vinstr_ctx,
-		struct kbase_uk_hwcnt_reader_setup *setup,
+		struct kbase_ioctl_hwcnt_reader_setup *setup,
 		void *kernel_buffer);
 
 /**
@@ -155,6 +133,16 @@ int kbase_vinstr_try_suspend(struct kbase_vinstr_context *vinstr_ctx);
  */
 void kbase_vinstr_suspend(struct kbase_vinstr_context *vinstr_ctx);
 
+/**
+ * kbase_vinstr_wait_for_ready - waits for the vinstr context to get ready
+ * @vinstr_ctx: vinstr context
+ *
+ * Function waits for the vinstr to become ready for dumping. It can be in the
+ * resuming state after the client was attached but the client currently expects
+ * that vinstr is ready for dumping immediately post attach.
+ */
+void kbase_vinstr_wait_for_ready(struct kbase_vinstr_context *vinstr_ctx);
+
 /**
  * kbase_vinstr_resume - resumes operation of a given vinstr context
  * @vinstr_ctx: vinstr context
@@ -178,5 +166,17 @@ size_t kbase_vinstr_dump_size(struct kbase_device *kbdev);
  */
 void kbase_vinstr_detach_client(struct kbase_vinstr_client *cli);
 
+/**
+ * kbase_vinstr_suspend_client - Suspend vinstr client
+ * @client: pointer to vinstr client
+ */
+void kbase_vinstr_suspend_client(struct kbase_vinstr_client *client);
+
+/**
+ * kbase_vinstr_resume_client - Resume vinstr client
+ * @client: pointer to vinstr client
+ */
+void kbase_vinstr_resume_client(struct kbase_vinstr_client *client);
+
 #endif /* _KBASE_VINSTR_H_ */
 
diff --git a/drivers/gpu/arm/bifrost/mali_midg_regmap.h b/drivers/gpu/arm/bifrost/mali_midg_regmap.h
index 5e83ee87242c..180850069f2e 100644
--- a/drivers/gpu/arm/bifrost/mali_midg_regmap.h
+++ b/drivers/gpu/arm/bifrost/mali_midg_regmap.h
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2010-2017 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2010-2018 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -34,8 +34,7 @@
 #define GPU_CONTROL_REG(r)      (GPU_CONTROL_BASE + (r))
 #define GPU_ID                  0x000	/* (RO) GPU and revision identifier */
 #define L2_FEATURES             0x004	/* (RO) Level 2 cache features */
-#define SUSPEND_SIZE            0x008   /* (RO) Fixed-function suspend buffer
-						size */
+#define CORE_FEATURES           0x008	/* (RO) Shader Core Features */
 #define TILER_FEATURES          0x00C	/* (RO) Tiler Features */
 #define MEM_FEATURES            0x010	/* (RO) Memory system features */
 #define MMU_FEATURES            0x014	/* (RO) MMU features */
@@ -93,6 +92,9 @@
 #define THREAD_MAX_WORKGROUP_SIZE 0x0A4	/* (RO) Maximum workgroup size */
 #define THREAD_MAX_BARRIER_SIZE 0x0A8	/* (RO) Maximum threads waiting at a barrier */
 #define THREAD_FEATURES         0x0AC	/* (RO) Thread features */
+#define THREAD_TLS_ALLOC        0x310   /* (RO) Number of threads per core that
+					 * TLS must be allocated for
+					 */
 
 #define TEXTURE_FEATURES_0      0x0B0	/* (RO) Support flags for indexed texture formats 0..31 */
 #define TEXTURE_FEATURES_1      0x0B4	/* (RO) Support flags for indexed texture formats 32..63 */
diff --git a/drivers/gpu/arm/bifrost/thirdparty/mali_kbase_mmap.c b/drivers/gpu/arm/bifrost/thirdparty/mali_kbase_mmap.c
index 1690da43a0c3..6857eb761ee2 100644
--- a/drivers/gpu/arm/bifrost/thirdparty/mali_kbase_mmap.c
+++ b/drivers/gpu/arm/bifrost/thirdparty/mali_kbase_mmap.c
@@ -245,12 +245,13 @@ unsigned long kbase_get_unmapped_area(struct file *filp,
 
 	if (!kbase_ctx_flag(kctx, KCTX_COMPAT)) {
 
-		if (kbase_hw_has_feature(kctx->kbdev,
-						BASE_HW_FEATURE_33BIT_VA)) {
-			high_limit = kctx->same_va_end << PAGE_SHIFT;
-		} else {
-			high_limit = min_t(unsigned long, mm->mmap_base,
-					(kctx->same_va_end << PAGE_SHIFT));
+		high_limit = min_t(unsigned long, mm->mmap_base,
+				(kctx->same_va_end << PAGE_SHIFT));
+
+		/* If there's enough (> 33 bits) of GPU VA space, align
+		 * to 2MB boundaries.
+		 */
+		if (kctx->kbdev->gpu_props.mmu.va_bits > 33) {
 			if (len >= SZ_2M) {
 				align_offset = SZ_2M;
 				align_mask = SZ_2M - 1;
diff --git a/drivers/gpu/arm/sconscript b/drivers/gpu/arm/sconscript
old mode 100755
new mode 100644
index c31eec7cc170..a06092bd5bf0
--- a/drivers/gpu/arm/sconscript
+++ b/drivers/gpu/arm/sconscript
@@ -1,18 +1,25 @@
 #
-# (C) COPYRIGHT 2010-2013 ARM Limited. All rights reserved.
+# (C) COPYRIGHT 2015-2016 ARM Limited. All rights reserved.
 #
 # This program is free software and is provided to you under the terms of the
 # GNU General Public License version 2 as published by the Free Software
 # Foundation, and any use by you of this program is subject to the terms
 # of such GNU licence.
 #
-# A copy of the licence is included with the program, and can also be obtained
-# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
-# Boston, MA  02110-1301, USA.
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, you can access it online at
+# http://www.gnu.org/licenses/gpl-2.0.html.
+#
+# SPDX-License-Identifier: GPL-2.0
 #
 #
 
-
+import glob
 
 
 SConscript('midgard/sconscript')