diff --git a/Documentation/devicetree/bindings/arm/mali-bifrost.txt b/Documentation/devicetree/bindings/arm/mali-bifrost.txt new file mode 100644 index 000000000000..a488e3afbccb --- /dev/null +++ b/Documentation/devicetree/bindings/arm/mali-bifrost.txt @@ -0,0 +1,150 @@ +# +# (C) COPYRIGHT 2013-2018 ARM Limited. All rights reserved. +# +# This program is free software and is provided to you under the terms of the +# GNU General Public License version 2 as published by the Free Software +# Foundation, and any use by you of this program is subject to the terms +# of such GNU licence. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, you can access it online at +# http://www.gnu.org/licenses/gpl-2.0.html. +# +# SPDX-License-Identifier: GPL-2.0 +# +# + +* ARM Mali Midgard devices + + +Required properties: + +- compatible : Should be mali, replacing digits with x from the back, +until malitxx, ending with arm,mali-midgard, the latter not optional. +- reg : Physical base address of the device and length of the register area. +- interrupts : Contains the three IRQ lines required by T-6xx devices +- interrupt-names : Contains the names of IRQ resources in the order they were +provided in the interrupts property. Must contain: "JOB, "MMU", "GPU". + +Optional: + +- clocks : Phandle to clock for the Mali T-6xx device. +- clock-names : Shall be "clk_mali". +- mali-supply : Phandle to regulator for the Mali device. Refer to +Documentation/devicetree/bindings/regulator/regulator.txt for details. +- operating-points-v2 : Refer to Documentation/devicetree/bindings/power/opp.txt +for details. +- jm_config : For T860/T880. Sets job manager configuration. An array containing: + - 1 to override the TIMESTAMP value, 0 otherwise. + - 1 to override clock gate, forcing them to be always on, 0 otherwise. + - 1 to enable job throttle, limiting the number of cores that can be started + simultaneously, 0 otherwise. + - Value between 0 and 63 (including). If job throttle is enabled, this is one + less than the number of cores that can be started simultaneously. +- power_model : Sets the power model parameters. Three power models are currently + defined which include "mali-simple-power-model", "mali-g71-power-model" and + "mali-g72-power-model". + - mali-simple-power-model: this model derives the GPU power usage based + on the GPU voltage scaled by the system temperature. Note: it was + designed for the Juno platform, and may not be suitable for others. + - compatible: Should be "arm,mali-simple-power-model" + - dynamic-coefficient: Coefficient, in pW/(Hz V^2), which is + multiplied by v^2*f to calculate the dynamic power consumption. + - static-coefficient: Coefficient, in uW/V^3, which is + multiplied by v^3 to calculate the static power consumption. + - ts: An array containing coefficients for the temperature + scaling factor. This is used to scale the static power by a + factor of tsf/1000000, + where tsf = ts[3]*T^3 + ts[2]*T^2 + ts[1]*T + ts[0], + and T = temperature in degrees. + - thermal-zone: A string identifying the thermal zone used for + the GPU + - temp-poll-interval-ms: the interval at which the system + temperature is polled + - mali-g71-power-model / mali-g72-power-model: these models derive + the GPU power usage based on performance counters, so they are more + accurate. + - compatible: Should be "arm,mali-g71-power-model" / + "arm,mali-g72-power-model" + - scale: the dynamic power calculated by the power model is + scaled by a factor of "scale"/1000. This value should be + chosen to match a particular implementation. + * Note: when IPA is used, two separate power models (simple and counter-based) + are used at different points so care should be taken to configure + both power models in the device tree (specifically dynamic-coefficient, + static-coefficient and scale) to best match the platform. +- system-coherency : Sets the coherency protocol to be used for coherent + accesses made from the GPU. + If not set then no coherency is used. + - 0 : ACE-Lite + - 1 : ACE + - 31 : No coherency +- ipa-model : Sets the IPA model to be used for power management. GPU probe will fail if the + model is not found in the registered models list. If no model is specified here, + a gpu-id based model is picked if available, otherwise the default model is used. + - mali-simple-power-model: Default model used on mali +- protected-mode-switcher : Phandle to device implemented protected mode switching functionality. +Refer to Documentation/devicetree/bindings/arm/smc-protected-mode-switcher.txt for one implementation. + +Example for a Mali GPU: + +gpu@0xfc010000 { + compatible = "arm,malit602", "arm,malit60x", "arm,malit6xx", "arm,mali-midgard"; + reg = <0xfc010000 0x4000>; + interrupts = <0 36 4>, <0 37 4>, <0 38 4>; + interrupt-names = "JOB", "MMU", "GPU"; + + clocks = <&pclk_mali>; + clock-names = "clk_mali"; + mali-supply = <&vdd_mali>; + operating-points-v2 = <&gpu_opp_table>; + power_model@0 { + compatible = "arm,mali-simple-power-model"; + static-coefficient = <2427750>; + dynamic-coefficient = <4687>; + ts = <20000 2000 (-20) 2>; + thermal-zone = "gpu"; + }; + power_model@1 { + compatible = "arm,mali-g71-power-model"; + scale = <5>; + }; +}; + +gpu_opp_table: opp_table0 { + compatible = "operating-points-v2"; + + opp@533000000 { + opp-hz = /bits/ 64 <533000000>; + opp-microvolt = <1250000>; + }; + opp@450000000 { + opp-hz = /bits/ 64 <450000000>; + opp-microvolt = <1150000>; + }; + opp@400000000 { + opp-hz = /bits/ 64 <400000000>; + opp-microvolt = <1125000>; + }; + opp@350000000 { + opp-hz = /bits/ 64 <350000000>; + opp-microvolt = <1075000>; + }; + opp@266000000 { + opp-hz = /bits/ 64 <266000000>; + opp-microvolt = <1025000>; + }; + opp@160000000 { + opp-hz = /bits/ 64 <160000000>; + opp-microvolt = <925000>; + }; + opp@100000000 { + opp-hz = /bits/ 64 <100000000>; + opp-microvolt = <912500>; + }; +}; diff --git a/drivers/gpu/arm/Kbuild b/drivers/gpu/arm/Kbuild old mode 100755 new mode 100644 index f14f1c824f87..30246dc556f5 --- a/drivers/gpu/arm/Kbuild +++ b/drivers/gpu/arm/Kbuild @@ -6,14 +6,20 @@ # Foundation, and any use by you of this program is subject to the terms # of such GNU licence. # -# A copy of the licence is included with the program, and can also be obtained -# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, -# Boston, MA 02110-1301, USA. +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, you can access it online at +# http://www.gnu.org/licenses/gpl-2.0.html. +# +# SPDX-License-Identifier: GPL-2.0 # # - obj-$(CONFIG_MALI_MIDGARD_FOR_LINUX) += midgard_for_linux/ obj-$(CONFIG_MALI_MIDGARD_FOR_ANDROID) += midgard/ diff --git a/drivers/gpu/arm/Kconfig b/drivers/gpu/arm/Kconfig old mode 100755 new mode 100644 index 8a16646f89de..8403a1a48c00 --- a/drivers/gpu/arm/Kconfig +++ b/drivers/gpu/arm/Kconfig @@ -6,9 +6,16 @@ # Foundation, and any use by you of this program is subject to the terms # of such GNU licence. # -# A copy of the licence is included with the program, and can also be obtained -# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, -# Boston, MA 02110-1301, USA. +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, you can access it online at +# http://www.gnu.org/licenses/gpl-2.0.html. +# +# SPDX-License-Identifier: GPL-2.0 # # # diff --git a/drivers/gpu/arm/bifrost/Kbuild b/drivers/gpu/arm/bifrost/Kbuild index 4b001c67ab69..deb6e8f976b0 100644 --- a/drivers/gpu/arm/bifrost/Kbuild +++ b/drivers/gpu/arm/bifrost/Kbuild @@ -20,7 +20,7 @@ # # Driver version string which is returned to userspace via an ioctl -MALI_RELEASE_NAME ?= "r10p0-01rel0" +MALI_RELEASE_NAME ?= "r11p0-01rel0" # Paths required for build KBASE_PATH = $(src) diff --git a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_devfreq.c b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_devfreq.c index 7e3c456bafbb..027eeeb6575b 100644 --- a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_devfreq.c +++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_devfreq.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2014-2017 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2018 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -92,15 +92,21 @@ kbase_devfreq_target(struct device *dev, unsigned long *target_freq, u32 flags) freq = *target_freq; +#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 11, 0) rcu_read_lock(); +#endif opp = devfreq_recommended_opp(dev, &freq, flags); - if (IS_ERR(opp)) { - rcu_read_unlock(); + voltage = dev_pm_opp_get_voltage(opp); +#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 11, 0) + rcu_read_unlock(); +#endif + if (IS_ERR_OR_NULL(opp)) { dev_err(dev, "Failed to get opp (%ld)\n", PTR_ERR(opp)); return PTR_ERR(opp); } - voltage = dev_pm_opp_get_voltage(opp); - rcu_read_unlock(); +#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 11, 0) + dev_pm_opp_put(opp); +#endif nominal_freq = freq; /* @@ -201,20 +207,24 @@ static int kbase_devfreq_init_freq_table(struct kbase_device *kbdev, unsigned long freq; struct dev_pm_opp *opp; +#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 11, 0) rcu_read_lock(); +#endif count = dev_pm_opp_get_opp_count(kbdev->dev); - if (count < 0) { - rcu_read_unlock(); - return count; - } +#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 11, 0) rcu_read_unlock(); +#endif + if (count < 0) + return count; dp->freq_table = kmalloc_array(count, sizeof(dp->freq_table[0]), GFP_KERNEL); if (!dp->freq_table) return -ENOMEM; +#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 11, 0) rcu_read_lock(); +#endif for (i = 0, freq = ULONG_MAX; i < count; i++, freq--) { opp = dev_pm_opp_find_freq_floor(kbdev->dev, &freq); if (IS_ERR(opp)) @@ -225,7 +235,9 @@ static int kbase_devfreq_init_freq_table(struct kbase_device *kbdev, dp->freq_table[i] = freq; } +#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 11, 0) rcu_read_unlock(); +#endif if (count != i) dev_warn(kbdev->dev, "Unable to enumerate all OPPs (%d!=%d\n", @@ -368,7 +380,7 @@ int kbase_devfreq_init(struct kbase_device *kbdev) kbdev->devfreq = devfreq_add_device(kbdev->dev, dp, "simple_ondemand", NULL); if (IS_ERR(kbdev->devfreq)) { - kbase_devfreq_term_freq_table(kbdev); + kfree(dp->freq_table); return PTR_ERR(kbdev->devfreq); } diff --git a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_gpuprops_backend.c b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_gpuprops_backend.c index 02dc1ea0061f..8809ab0bed5b 100644 --- a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_gpuprops_backend.c +++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_gpuprops_backend.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2018 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -41,8 +41,8 @@ void kbase_backend_gpuprops_get(struct kbase_device *kbdev, regdump->l2_features = kbase_reg_read(kbdev, GPU_CONTROL_REG(L2_FEATURES), NULL); - regdump->suspend_size = kbase_reg_read(kbdev, - GPU_CONTROL_REG(SUSPEND_SIZE), NULL); + regdump->core_features = kbase_reg_read(kbdev, + GPU_CONTROL_REG(CORE_FEATURES), NULL); regdump->tiler_features = kbase_reg_read(kbdev, GPU_CONTROL_REG(TILER_FEATURES), NULL); regdump->mem_features = kbase_reg_read(kbdev, @@ -71,6 +71,8 @@ void kbase_backend_gpuprops_get(struct kbase_device *kbdev, GPU_CONTROL_REG(THREAD_MAX_BARRIER_SIZE), NULL); regdump->thread_features = kbase_reg_read(kbdev, GPU_CONTROL_REG(THREAD_FEATURES), NULL); + regdump->thread_tls_alloc = kbase_reg_read(kbdev, + GPU_CONTROL_REG(THREAD_TLS_ALLOC), NULL); regdump->shader_present_lo = kbase_reg_read(kbdev, GPU_CONTROL_REG(SHADER_PRESENT_LO), NULL); diff --git a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_instr_backend.c b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_instr_backend.c index 43a380e53c60..3a6545db9092 100644 --- a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_instr_backend.c +++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_instr_backend.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2017 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -68,7 +68,7 @@ static void kbasep_instr_hwcnt_cacheclean(struct kbase_device *kbdev) int kbase_instr_hwcnt_enable_internal(struct kbase_device *kbdev, struct kbase_context *kctx, - struct kbase_uk_hwcnt_setup *setup) + struct kbase_ioctl_hwcnt_enable *enable) { unsigned long flags, pm_flags; int err = -EINVAL; @@ -81,7 +81,7 @@ int kbase_instr_hwcnt_enable_internal(struct kbase_device *kbdev, KBASE_PM_CORE_SHADER); /* alignment failure */ - if ((setup->dump_buffer == 0ULL) || (setup->dump_buffer & (2048 - 1))) + if ((enable->dump_buffer == 0ULL) || (enable->dump_buffer & (2048 - 1))) goto out_err; /* Override core availability policy to ensure all cores are available @@ -110,7 +110,7 @@ int kbase_instr_hwcnt_enable_internal(struct kbase_device *kbdev, /* In use, this context is the owner */ kbdev->hwcnt.kctx = kctx; /* Remember the dump address so we can reprogram it later */ - kbdev->hwcnt.addr = setup->dump_buffer; + kbdev->hwcnt.addr = enable->dump_buffer; /* Request the clean */ kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_REQUEST_CLEAN; @@ -150,15 +150,15 @@ int kbase_instr_hwcnt_enable_internal(struct kbase_device *kbdev, prfcnt_config | PRFCNT_CONFIG_MODE_OFF, kctx); kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_BASE_LO), - setup->dump_buffer & 0xFFFFFFFF, kctx); + enable->dump_buffer & 0xFFFFFFFF, kctx); kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_BASE_HI), - setup->dump_buffer >> 32, kctx); + enable->dump_buffer >> 32, kctx); kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_JM_EN), - setup->jm_bm, kctx); + enable->jm_bm, kctx); kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_SHADER_EN), - setup->shader_bm, kctx); + enable->shader_bm, kctx); kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_MMU_L2_EN), - setup->mmu_l2_bm, kctx); + enable->mmu_l2_bm, kctx); /* Due to PRLAM-8186 we need to disable the Tiler before we enable the * HW counter dump. */ if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8186)) @@ -166,7 +166,7 @@ int kbase_instr_hwcnt_enable_internal(struct kbase_device *kbdev, kctx); else kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_TILER_EN), - setup->tiler_bm, kctx); + enable->tiler_bm, kctx); kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_CONFIG), prfcnt_config | PRFCNT_CONFIG_MODE_MANUAL, kctx); @@ -175,7 +175,7 @@ int kbase_instr_hwcnt_enable_internal(struct kbase_device *kbdev, */ if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8186)) kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_TILER_EN), - setup->tiler_bm, kctx); + enable->tiler_bm, kctx); spin_lock_irqsave(&kbdev->hwcnt.lock, flags); diff --git a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_jm_defs.h b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_jm_defs.h index 27a6ca0d871b..b4d2ae1cc4e8 100644 --- a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_jm_defs.h +++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_jm_defs.h @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2016, 2018 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -113,16 +113,4 @@ struct kbase_backend_data { bool timeouts_updated; }; -/** - * struct kbase_jd_atom_backend - GPU backend specific katom data - */ -struct kbase_jd_atom_backend { -}; - -/** - * struct kbase_context_backend - GPU backend specific context data - */ -struct kbase_context_backend { -}; - #endif /* _KBASE_HWACCESS_GPU_DEFS_H_ */ diff --git a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_jm_hw.c b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_jm_hw.c index 63a0dcdb1fb3..c6a8b7f34a03 100644 --- a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_jm_hw.c +++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_jm_hw.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2010-2017 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010-2018 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -245,9 +245,6 @@ void kbase_job_done(struct kbase_device *kbdev, u32 done) KBASE_TRACE_ADD(kbdev, JM_IRQ, NULL, NULL, 0, done); - memset(&kbdev->slot_submit_count_irq[0], 0, - sizeof(kbdev->slot_submit_count_irq)); - spin_lock_irqsave(&kbdev->hwaccess_lock, flags); while (done) { @@ -312,7 +309,7 @@ void kbase_job_done(struct kbase_device *kbdev, u32 done) completion_code)); } - kbase_gpu_irq_evict(kbdev, i); + kbase_gpu_irq_evict(kbdev, i, completion_code); } kbase_reg_write(kbdev, JOB_CONTROL_REG(JOB_IRQ_CLEAR), @@ -745,7 +742,9 @@ void kbase_job_slot_ctx_priority_check_locked(struct kbase_context *kctx, if (!katom) continue; - if (katom->kctx != kctx) + if ((kbdev->js_ctx_scheduling_mode == + KBASE_JS_PROCESS_LOCAL_PRIORITY_MODE) && + (katom->kctx != kctx)) continue; if (katom->sched_priority > priority) { diff --git a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_jm_rb.c b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_jm_rb.c index ee93d4eb9522..7f09fd229748 100644 --- a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_jm_rb.c +++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_jm_rb.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2014-2017 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2018 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -1247,7 +1247,8 @@ void kbase_backend_run_atom(struct kbase_device *kbdev, #define HAS_DEP(katom) (katom->pre_dep || katom->atom_flags & \ (KBASE_KATOM_FLAG_X_DEP_BLOCKED | KBASE_KATOM_FLAG_FAIL_BLOCKER)) -bool kbase_gpu_irq_evict(struct kbase_device *kbdev, int js) +bool kbase_gpu_irq_evict(struct kbase_device *kbdev, int js, + u32 completion_code) { struct kbase_jd_atom *katom; struct kbase_jd_atom *next_katom; @@ -1268,14 +1269,16 @@ bool kbase_gpu_irq_evict(struct kbase_device *kbdev, int js) JS_COMMAND_NOP, NULL); next_katom->gpu_rb_state = KBASE_ATOM_GPU_RB_READY; - KBASE_TLSTREAM_TL_NRET_ATOM_LPU(katom, + if (completion_code == BASE_JD_EVENT_STOPPED) { + KBASE_TLSTREAM_TL_NRET_ATOM_LPU(katom, &kbdev->gpu_props.props.raw_props.js_features [katom->slot_nr]); - KBASE_TLSTREAM_TL_NRET_ATOM_AS(katom, &kbdev->as + KBASE_TLSTREAM_TL_NRET_ATOM_AS(katom, &kbdev->as [katom->kctx->as_nr]); - KBASE_TLSTREAM_TL_NRET_CTX_LPU(katom->kctx, + KBASE_TLSTREAM_TL_NRET_CTX_LPU(katom->kctx, &kbdev->gpu_props.props.raw_props.js_features [katom->slot_nr]); + } return true; } @@ -1515,8 +1518,7 @@ void kbase_backend_reset(struct kbase_device *kbdev, ktime_t *end_timestamp) if (!katom) break; if (katom->protected_state.exit == - KBASE_ATOM_EXIT_PROTECTED_RESET_WAIT) - { + KBASE_ATOM_EXIT_PROTECTED_RESET_WAIT) { KBASE_TLSTREAM_AUX_PROTECTED_LEAVE_END(kbdev); kbase_vinstr_resume(kbdev->vinstr_ctx); @@ -1683,7 +1685,7 @@ bool kbase_backend_soft_hard_stop_slot(struct kbase_device *kbdev, katom_idx0->kctx->blocked_js[js][prio_idx0] = true; } else { /* katom_idx0 is on GPU */ - if (katom_idx1 && katom_idx1->gpu_rb_state == + if (katom_idx1_valid && katom_idx1->gpu_rb_state == KBASE_ATOM_GPU_RB_SUBMITTED) { /* katom_idx0 and katom_idx1 are on GPU */ diff --git a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_jm_rb.h b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_jm_rb.h index 456700814ee9..c3b9f2d85536 100644 --- a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_jm_rb.h +++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_jm_rb.h @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2018 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -33,15 +33,17 @@ /** * kbase_gpu_irq_evict - Evict an atom from a NEXT slot * - * @kbdev: Device pointer - * @js: Job slot to evict from + * @kbdev: Device pointer + * @js: Job slot to evict from + * @completion_code: Event code from job that was run. * * Evict the atom in the NEXT slot for the specified job slot. This function is * called from the job complete IRQ handler when the previous job has failed. * * Return: true if job evicted from NEXT registers, false otherwise */ -bool kbase_gpu_irq_evict(struct kbase_device *kbdev, int js); +bool kbase_gpu_irq_evict(struct kbase_device *kbdev, int js, + u32 completion_code); /** * kbase_gpu_complete_hw - Complete an atom on job slot js diff --git a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_js_backend.c b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_js_backend.c index f09a26423951..729b971ee072 100644 --- a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_js_backend.c +++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_js_backend.c @@ -147,16 +147,17 @@ static enum hrtimer_restart timer_callback(struct hrtimer *timer) /* Job is Soft-Stoppable */ if (ticks == soft_stop_ticks) { - int disjoint_threshold = - KBASE_DISJOINT_STATE_INTERLEAVED_CONTEXT_COUNT_THRESHOLD; - u32 softstop_flags = 0u; /* Job has been scheduled for at least * js_devdata->soft_stop_ticks ticks. * Soft stop the slot so we can run * other jobs. */ - dev_dbg(kbdev->dev, "Soft-stop"); #if !KBASE_DISABLE_SCHEDULING_SOFT_STOPS + int disjoint_threshold = + KBASE_DISJOINT_STATE_INTERLEAVED_CONTEXT_COUNT_THRESHOLD; + u32 softstop_flags = 0u; + + dev_dbg(kbdev->dev, "Soft-stop"); /* nr_user_contexts_running is updated * with the runpool_mutex, but we can't * take that here. diff --git a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_mmu_hw_direct.c b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_mmu_hw_direct.c index ad27202c8f08..9cd29828016a 100644 --- a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_mmu_hw_direct.c +++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_mmu_hw_direct.c @@ -185,8 +185,7 @@ void kbase_mmu_interrupt(struct kbase_device *kbdev, u32 irq_stat) /* Mark the fault protected or not */ as->protected_mode = kbdev->protected_mode; - if (kbdev->protected_mode && as->fault_addr) - { + if (kbdev->protected_mode && as->fault_addr) { /* check if address reporting is allowed */ validate_protected_page_fault(kbdev, kctx); } diff --git a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_backend.c b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_backend.c index 8ff607d3c5fd..e57834350e8c 100644 --- a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_backend.c +++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_backend.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2010-2017 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010-2018 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -252,7 +252,10 @@ static void kbase_pm_gpu_poweroff_wait_wq(struct work_struct *data) /* Turn off clock now that fault have been handled. We * dropped locks so poweron_required may have changed - - * power back on if this is the case.*/ + * power back on if this is the case (effectively only + * re-enabling of the interrupts would be done in this + * case, as the clocks to GPU were not withdrawn yet). + */ if (backend->poweron_required) kbase_pm_clock_on(kbdev, false); else diff --git a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_policy.c b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_policy.c index 18a06d833169..0bf133494f03 100644 --- a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_policy.c +++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_policy.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010-2018 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -383,6 +383,7 @@ void kbase_pm_update_active(struct kbase_device *kbdev) /* Power on the GPU and any cores requested by the policy */ if (pm->backend.poweroff_wait_in_progress) { + KBASE_DEBUG_ASSERT(kbdev->pm.backend.gpu_powered); pm->backend.poweron_required = true; spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); } else { @@ -927,19 +928,24 @@ KBASE_EXPORT_TEST_API(kbase_pm_request_cores_sync); void kbase_pm_request_l2_caches(struct kbase_device *kbdev) { unsigned long flags; - u32 prior_l2_users_count; spin_lock_irqsave(&kbdev->hwaccess_lock, flags); - prior_l2_users_count = kbdev->l2_users_count++; + kbdev->l2_users_count++; KBASE_DEBUG_ASSERT(kbdev->l2_users_count != 0); - /* if the GPU is reset while the l2 is on, l2 will be off but - * prior_l2_users_count will be > 0. l2_available_bitmap will have been - * set to 0 though by kbase_pm_init_hw */ - if (!prior_l2_users_count || !kbdev->l2_available_bitmap) - kbase_pm_check_transitions_nolock(kbdev); + /* Check for the required L2 transitions. + * Caller would block here for the L2 caches of all core groups to be + * powered on, so need to inform the Hw to power up all the L2 caches. + * Can't rely on the l2_users_count value being non-zero previously to + * avoid checking for the transition, as the count could be non-zero + * even if not all the instances of L2 cache are powered up since + * currently the power status of L2 is not tracked separately for each + * core group. Also if the GPU is reset while the L2 is on, L2 will be + * off but the count will be non-zero. + */ + kbase_pm_check_transitions_nolock(kbdev); spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); wait_event(kbdev->pm.backend.l2_powered_wait, diff --git a/drivers/gpu/arm/bifrost/docs/Doxyfile b/drivers/gpu/arm/bifrost/docs/Doxyfile index cea7bd9a1730..6498dcbc1840 100644 --- a/drivers/gpu/arm/bifrost/docs/Doxyfile +++ b/drivers/gpu/arm/bifrost/docs/Doxyfile @@ -38,7 +38,7 @@ # directories like "/usr/src/myproject". Separate the files or directories # with spaces. -INPUT += ../../kernel/drivers/gpu/arm/midgard/ +INPUT += ../../kernel/drivers/gpu/arm/midgard/ ############################################################################## # Everything below here is optional, and in most cases not required diff --git a/drivers/gpu/arm/bifrost/ipa/Kbuild b/drivers/gpu/arm/bifrost/ipa/Kbuild index e06d2eac4aa1..32008511f366 100644 --- a/drivers/gpu/arm/bifrost/ipa/Kbuild +++ b/drivers/gpu/arm/bifrost/ipa/Kbuild @@ -1,5 +1,5 @@ # -# (C) COPYRIGHT 2016-2017 ARM Limited. All rights reserved. +# (C) COPYRIGHT 2016-2018 ARM Limited. All rights reserved. # # This program is free software and is provided to you under the terms of the # GNU General Public License version 2 as published by the Free Software @@ -25,9 +25,8 @@ bifrost_kbase-y += \ bifrost_kbase-$(CONFIG_DEBUG_FS) += ipa/mali_kbase_ipa_debugfs.o -ifneq ($(wildcard $(src)/ipa/mali_kbase_ipa_vinstr_g71.c),) +ifneq ($(wildcard $(src)/ipa/mali_kbase_ipa_vinstr_common.c),) bifrost_kbase-y += \ - ipa/mali_kbase_ipa_vinstr_g71.o \ + ipa/mali_kbase_ipa_vinstr_g7x.o \ ipa/mali_kbase_ipa_vinstr_common.o - endif diff --git a/drivers/gpu/arm/bifrost/ipa/mali_kbase_ipa.c b/drivers/gpu/arm/bifrost/ipa/mali_kbase_ipa.c index d95a8971d6e2..d14e1e6d01fa 100644 --- a/drivers/gpu/arm/bifrost/ipa/mali_kbase_ipa.c +++ b/drivers/gpu/arm/bifrost/ipa/mali_kbase_ipa.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2016-2017 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2016-2018 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -38,10 +38,14 @@ #define KBASE_IPA_FALLBACK_MODEL_NAME "mali-simple-power-model" #define KBASE_IPA_G71_MODEL_NAME "mali-g71-power-model" +#define KBASE_IPA_G72_MODEL_NAME "mali-g72-power-model" +#define KBASE_IPA_TNOX_MODEL_NAME "mali-tnox-power-model" static struct kbase_ipa_model_ops *kbase_ipa_all_model_ops[] = { &kbase_simple_ipa_model_ops, - &kbase_g71_ipa_model_ops + &kbase_g71_ipa_model_ops, + &kbase_g72_ipa_model_ops, + &kbase_tnox_ipa_model_ops }; int kbase_ipa_model_recalculate(struct kbase_ipa_model *model) @@ -98,6 +102,15 @@ const char *kbase_ipa_model_name_from_id(u32 gpu_id) switch (GPU_ID2_MODEL_MATCH_VALUE(prod_id)) { case GPU_ID2_PRODUCT_TMIX: return KBASE_IPA_G71_MODEL_NAME; + case GPU_ID2_PRODUCT_THEX: + return KBASE_IPA_G72_MODEL_NAME; + case GPU_ID2_PRODUCT_TNOX: + return KBASE_IPA_TNOX_MODEL_NAME; + case GPU_ID2_PRODUCT_TGOX: + if ((gpu_id & GPU_ID2_VERSION_MAJOR) == + (0 << GPU_ID2_VERSION_MAJOR_SHIFT)) + /* TGOX r0 shares a power model with TNOX */ + return KBASE_IPA_TNOX_MODEL_NAME; default: return KBASE_IPA_FALLBACK_MODEL_NAME; } diff --git a/drivers/gpu/arm/bifrost/ipa/mali_kbase_ipa.h b/drivers/gpu/arm/bifrost/ipa/mali_kbase_ipa.h index e087d2348763..fc45f22d6a6a 100644 --- a/drivers/gpu/arm/bifrost/ipa/mali_kbase_ipa.h +++ b/drivers/gpu/arm/bifrost/ipa/mali_kbase_ipa.h @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2016-2017 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2016-2018 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -27,8 +27,17 @@ struct devfreq; +/** + * struct kbase_ipa_model - Object describing a particular IPA model. + * @kbdev: pointer to kbase device + * @model_data: opaque pointer to model specific data, accessed + * only by model specific methods. + * @ops: pointer to object containing model specific methods. + * @params: head of the list of debugfs params added for model + * @missing_dt_node_warning: flag to limit the matching power model DT not found + * warning to once. + */ struct kbase_ipa_model { - struct list_head link; struct kbase_device *kbdev; void *model_data; struct kbase_ipa_model_ops *ops; @@ -116,22 +125,72 @@ struct kbase_ipa_model_ops { bool do_utilization_scaling_in_framework; }; -/* Models can be registered only in the platform's platform_init_func call */ -int kbase_ipa_model_ops_register(struct kbase_device *kbdev, - struct kbase_ipa_model_ops *new_model_ops); -struct kbase_ipa_model *kbase_ipa_get_model(struct kbase_device *kbdev, - const char *name); - +/** + * kbase_ipa_init - Initialize the IPA feature + * @kbdev: pointer to kbase device + * + * simple IPA power model is initialized as a fallback model and if that + * initialization fails then IPA is not used. + * The device tree is read for the name of ipa model to be used, by using the + * property string "ipa-model". If that ipa model is supported then it is + * initialized but if the initialization fails then simple power model is used. + * + * Return: 0 on success, negative -errno on error + */ int kbase_ipa_init(struct kbase_device *kbdev); + +/** + * kbase_ipa_term - Terminate the IPA feature + * @kbdev: pointer to kbase device + * + * Both simple IPA power model and model retrieved from device tree are + * terminated. + */ void kbase_ipa_term(struct kbase_device *kbdev); -void kbase_ipa_model_use_fallback_locked(struct kbase_device *kbdev); -void kbase_ipa_model_use_configured_locked(struct kbase_device *kbdev); + +/** + * kbase_ipa_model_recalculate - Recalculate the model coefficients + * @model: pointer to the IPA model object, already initialized + * + * It shall be called immediately after the model has been initialized + * or when the model parameter has changed, so that any coefficients + * derived from parameters can be recalculated. + * Its a wrapper for the module specific recalculate() method. + * + * Return: 0 on success, negative -errno on error + */ int kbase_ipa_model_recalculate(struct kbase_ipa_model *model); + +/** + * kbase_ipa_init_model - Initilaize the particular IPA model + * @kbdev: pointer to the IPA model object, already initialized + * @ops: pointer to object containing model specific methods. + * + * Initialize the model corresponding to the @ops pointer passed. + * The init() method specified in @ops would be called. + * + * Return: pointer to kbase_ipa_model on success, NULL on error + */ struct kbase_ipa_model *kbase_ipa_init_model(struct kbase_device *kbdev, struct kbase_ipa_model_ops *ops); +/** + * kbase_ipa_term_model - Terminate the particular IPA model + * @model: pointer to the IPA model object, already initialized + * + * Terminate the model, using the term() method. + * Module specific parameters would be freed. + */ void kbase_ipa_term_model(struct kbase_ipa_model *model); +/* Switch to the fallback model */ +void kbase_ipa_model_use_fallback_locked(struct kbase_device *kbdev); + +/* Switch to the model retrieved from device tree */ +void kbase_ipa_model_use_configured_locked(struct kbase_device *kbdev); + extern struct kbase_ipa_model_ops kbase_g71_ipa_model_ops; +extern struct kbase_ipa_model_ops kbase_g72_ipa_model_ops; +extern struct kbase_ipa_model_ops kbase_tnox_ipa_model_ops; #if MALI_UNIT_TEST /** diff --git a/drivers/gpu/arm/bifrost/ipa/mali_kbase_ipa_simple.c b/drivers/gpu/arm/bifrost/ipa/mali_kbase_ipa_simple.c index 639ade266e14..7dd2ae252112 100644 --- a/drivers/gpu/arm/bifrost/ipa/mali_kbase_ipa_simple.c +++ b/drivers/gpu/arm/bifrost/ipa/mali_kbase_ipa_simple.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2016-2017 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2016-2018 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -43,7 +43,7 @@ static int kbase_simple_power_model_get_dummy_temp( struct thermal_zone_device *tz, unsigned long *temp) { - *temp = ACCESS_ONCE(dummy_temp); + *temp = READ_ONCE(dummy_temp); return 0; } @@ -54,7 +54,7 @@ static int kbase_simple_power_model_get_dummy_temp( struct thermal_zone_device *tz, int *temp) { - *temp = ACCESS_ONCE(dummy_temp); + *temp = READ_ONCE(dummy_temp); return 0; } #endif @@ -68,7 +68,7 @@ static int kbase_simple_power_model_get_dummy_temp( void kbase_simple_power_model_set_dummy_temp(int temp) { - ACCESS_ONCE(dummy_temp) = temp; + WRITE_ONCE(dummy_temp, temp); } KBASE_EXPORT_TEST_API(kbase_simple_power_model_set_dummy_temp); @@ -155,7 +155,7 @@ static int poll_temperature(void *data) #endif while (!kthread_should_stop()) { - struct thermal_zone_device *tz = ACCESS_ONCE(model_data->gpu_tz); + struct thermal_zone_device *tz = READ_ONCE(model_data->gpu_tz); if (tz) { int ret; @@ -170,9 +170,9 @@ static int poll_temperature(void *data) temp = FALLBACK_STATIC_TEMPERATURE; } - ACCESS_ONCE(model_data->current_temperature) = temp; + WRITE_ONCE(model_data->current_temperature, temp); - msleep_interruptible(ACCESS_ONCE(model_data->temperature_poll_interval_ms)); + msleep_interruptible(READ_ONCE(model_data->temperature_poll_interval_ms)); } return 0; @@ -186,7 +186,7 @@ static int model_static_coeff(struct kbase_ipa_model *model, u32 *coeffp) u64 coeff_big; int temp; - temp = ACCESS_ONCE(model_data->current_temperature); + temp = READ_ONCE(model_data->current_temperature); /* Range: 0 <= temp_scaling_factor < 2^24 */ temp_scaling_factor = calculate_temp_scaling_factor(model_data->ts, diff --git a/drivers/gpu/arm/bifrost/ipa/mali_kbase_ipa_vinstr_common.c b/drivers/gpu/arm/bifrost/ipa/mali_kbase_ipa_vinstr_common.c index b9a9e573607e..3917fb8d0ef7 100644 --- a/drivers/gpu/arm/bifrost/ipa/mali_kbase_ipa_vinstr_common.c +++ b/drivers/gpu/arm/bifrost/ipa/mali_kbase_ipa_vinstr_common.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2017 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2017-2018 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -21,6 +21,7 @@ */ #include "mali_kbase_ipa_vinstr_common.h" +#include "mali_kbase_ipa_debugfs.h" #if MALI_UNIT_TEST static ktime_t dummy_time; @@ -31,11 +32,11 @@ static ktime_t dummy_time; #endif #if LINUX_VERSION_CODE < KERNEL_VERSION(4, 3, 0) -#define ktime_get() (ACCESS_ONCE(dummy_time)) +#define ktime_get() (READ_ONCE(dummy_time)) void kbase_ipa_set_dummy_time(ktime_t t) { - ACCESS_ONCE(dummy_time) = t; + WRITE_ONCE(dummy_time, t); } KBASE_EXPORT_TEST_API(kbase_ipa_set_dummy_time); #else @@ -119,10 +120,46 @@ s64 kbase_ipa_single_counter( return div_s64(multiplied, 1000000); } +/** + * kbase_ipa_gpu_active - Inform IPA that GPU is now active + * @model_data: Pointer to model data + * + * This function may cause vinstr to become active. + */ +static void kbase_ipa_gpu_active(struct kbase_ipa_model_vinstr_data *model_data) +{ + struct kbase_device *kbdev = model_data->kbdev; + + lockdep_assert_held(&kbdev->pm.lock); + + if (!kbdev->ipa.vinstr_active) { + kbdev->ipa.vinstr_active = true; + kbase_vinstr_resume_client(model_data->vinstr_cli); + } +} + +/** + * kbase_ipa_gpu_idle - Inform IPA that GPU is now idle + * @model_data: Pointer to model data + * + * This function may cause vinstr to become idle. + */ +static void kbase_ipa_gpu_idle(struct kbase_ipa_model_vinstr_data *model_data) +{ + struct kbase_device *kbdev = model_data->kbdev; + + lockdep_assert_held(&kbdev->pm.lock); + + if (kbdev->ipa.vinstr_active) { + kbase_vinstr_suspend_client(model_data->vinstr_cli); + kbdev->ipa.vinstr_active = false; + } +} + int kbase_ipa_attach_vinstr(struct kbase_ipa_model_vinstr_data *model_data) { struct kbase_device *kbdev = model_data->kbdev; - struct kbase_uk_hwcnt_reader_setup setup; + struct kbase_ioctl_hwcnt_reader_setup setup; size_t dump_size; dump_size = kbase_vinstr_dump_size(kbdev); @@ -148,13 +185,30 @@ int kbase_ipa_attach_vinstr(struct kbase_ipa_model_vinstr_data *model_data) model_data->last_sample_read_time = ktime_get(); kbase_vinstr_hwc_clear(model_data->vinstr_cli); + kbdev->ipa.gpu_active_callback = kbase_ipa_gpu_active; + kbdev->ipa.gpu_idle_callback = kbase_ipa_gpu_idle; + kbdev->ipa.model_data = model_data; + kbdev->ipa.vinstr_active = false; + /* Suspend vinstr, to ensure that the GPU is powered off until there is + * something to execute. + */ + kbase_vinstr_suspend_client(model_data->vinstr_cli); + return 0; } void kbase_ipa_detach_vinstr(struct kbase_ipa_model_vinstr_data *model_data) { + struct kbase_device *kbdev = model_data->kbdev; + + kbdev->ipa.gpu_active_callback = NULL; + kbdev->ipa.gpu_idle_callback = NULL; + kbdev->ipa.model_data = NULL; + kbdev->ipa.vinstr_active = false; + if (model_data->vinstr_cli) kbase_vinstr_detach_client(model_data->vinstr_cli); + model_data->vinstr_cli = NULL; kfree(model_data->vinstr_buffer); model_data->vinstr_buffer = NULL; @@ -165,6 +219,7 @@ int kbase_ipa_vinstr_dynamic_coeff(struct kbase_ipa_model *model, u32 *coeffp, { struct kbase_ipa_model_vinstr_data *model_data = (struct kbase_ipa_model_vinstr_data *)model->model_data; + struct kbase_device *kbdev = model_data->kbdev; s64 energy = 0; size_t i; ktime_t now = ktime_get(); @@ -176,6 +231,9 @@ int kbase_ipa_vinstr_dynamic_coeff(struct kbase_ipa_model *model, u32 *coeffp, u64 num_cycles; int err = 0; + if (!kbdev->ipa.vinstr_active) + goto err0; /* GPU powered off - no counters to collect */ + err = kbase_vinstr_hwc_dump(model_data->vinstr_cli, BASE_HWCNT_READER_EVENT_MANUAL); if (err) @@ -232,3 +290,58 @@ int kbase_ipa_vinstr_dynamic_coeff(struct kbase_ipa_model *model, u32 *coeffp, *coeffp = clamp(coeff, (u64) 0, (u64) 1 << 16); return err; } + +int kbase_ipa_vinstr_common_model_init(struct kbase_ipa_model *model, + const struct kbase_ipa_group *ipa_groups_def, + size_t ipa_group_size) +{ + int err = 0; + size_t i; + struct kbase_ipa_model_vinstr_data *model_data; + + model_data = kzalloc(sizeof(*model_data), GFP_KERNEL); + if (!model_data) + return -ENOMEM; + + model_data->kbdev = model->kbdev; + model_data->groups_def = ipa_groups_def; + model_data->groups_def_num = ipa_group_size; + + model->model_data = (void *) model_data; + + for (i = 0; i < model_data->groups_def_num; ++i) { + const struct kbase_ipa_group *group = &model_data->groups_def[i]; + + model_data->group_values[i] = group->default_value; + err = kbase_ipa_model_add_param_s32(model, group->name, + &model_data->group_values[i], + 1, false); + if (err) + goto exit; + } + + model_data->scaling_factor = 5; + err = kbase_ipa_model_add_param_s32(model, "scale", + &model_data->scaling_factor, + 1, false); + if (err) + goto exit; + + err = kbase_ipa_attach_vinstr(model_data); + +exit: + if (err) { + kbase_ipa_model_param_free_all(model); + kfree(model_data); + } + return err; +} + +void kbase_ipa_vinstr_common_model_term(struct kbase_ipa_model *model) +{ + struct kbase_ipa_model_vinstr_data *model_data = + (struct kbase_ipa_model_vinstr_data *)model->model_data; + + kbase_ipa_detach_vinstr(model_data); + kfree(model_data); +} diff --git a/drivers/gpu/arm/bifrost/ipa/mali_kbase_ipa_vinstr_common.h b/drivers/gpu/arm/bifrost/ipa/mali_kbase_ipa_vinstr_common.h index 7233642add78..18c30fe701a0 100644 --- a/drivers/gpu/arm/bifrost/ipa/mali_kbase_ipa_vinstr_common.h +++ b/drivers/gpu/arm/bifrost/ipa/mali_kbase_ipa_vinstr_common.h @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2017 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2017-2018 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -25,9 +25,6 @@ #include "mali_kbase.h" -/* Maximum length for the name of an IPA group. */ -#define KBASE_IPA_MAX_GROUP_NAME_LEN 15 - /* Maximum number of IPA groups for an IPA model. */ #define KBASE_IPA_MAX_GROUP_DEF_NUM 16 @@ -41,8 +38,6 @@ #define KBASE_IPA_NR_BYTES_PER_BLOCK \ (KBASE_IPA_NR_CNT_PER_BLOCK * KBASE_IPA_NR_BYTES_PER_CNT) - - /** * struct kbase_ipa_model_vinstr_data - IPA context per device * @kbdev: pointer to kbase device @@ -77,7 +72,7 @@ struct kbase_ipa_model_vinstr_data { * @counter_block_offset: block offset in bytes of the counter used to calculate energy for IPA group */ struct kbase_ipa_group { - char name[KBASE_IPA_MAX_GROUP_NAME_LEN + 1]; + const char *name; s32 default_value; s64 (*op)(struct kbase_ipa_model_vinstr_data *, s32, u32); u32 counter_block_offset; @@ -152,6 +147,33 @@ void kbase_ipa_detach_vinstr(struct kbase_ipa_model_vinstr_data *model_data); int kbase_ipa_vinstr_dynamic_coeff(struct kbase_ipa_model *model, u32 *coeffp, u32 current_freq); +/** + * kbase_ipa_vinstr_common_model_init() - initialize ipa power model + * @model: ipa power model to initialize + * @ipa_groups_def: array of ipa groups which sets coefficients for + * the corresponding counters used in the ipa model + * @ipa_group_size: number of elements in the array @ipa_groups_def + * + * This initialization function performs initialization steps common + * for ipa models based on counter values. In each call, the model + * passes its specific coefficient values per ipa counter group via + * @ipa_groups_def array. + * + * Return: 0 on success, error code otherwise + */ +int kbase_ipa_vinstr_common_model_init(struct kbase_ipa_model *model, + const struct kbase_ipa_group *ipa_groups_def, + size_t ipa_group_size); + +/** + * kbase_ipa_vinstr_common_model_term() - terminate ipa power model + * @model: ipa power model to terminate + * + * This function performs all necessary steps to terminate ipa power model + * including clean up of resources allocated to hold model data. + */ +void kbase_ipa_vinstr_common_model_term(struct kbase_ipa_model *model); + #if MALI_UNIT_TEST /** * kbase_ipa_set_dummy_time() - set a dummy monotonic time value diff --git a/drivers/gpu/arm/bifrost/ipa/mali_kbase_ipa_vinstr_g71.c b/drivers/gpu/arm/bifrost/ipa/mali_kbase_ipa_vinstr_g71.c deleted file mode 100644 index d07fb36d901e..000000000000 --- a/drivers/gpu/arm/bifrost/ipa/mali_kbase_ipa_vinstr_g71.c +++ /dev/null @@ -1,256 +0,0 @@ -/* - * - * (C) COPYRIGHT 2016-2017 ARM Limited. All rights reserved. - * - * This program is free software and is provided to you under the terms of the - * GNU General Public License version 2 as published by the Free Software - * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, you can access it online at - * http://www.gnu.org/licenses/gpl-2.0.html. - * - * SPDX-License-Identifier: GPL-2.0 - * - */ -#include - -#include "mali_kbase_ipa_vinstr_common.h" -#include "mali_kbase.h" -#include "mali_kbase_ipa_debugfs.h" - - -/* Performance counter blocks base offsets */ -#define JM_BASE (0 * KBASE_IPA_NR_BYTES_PER_BLOCK) -#define TILER_BASE (1 * KBASE_IPA_NR_BYTES_PER_BLOCK) -#define MEMSYS_BASE (2 * KBASE_IPA_NR_BYTES_PER_BLOCK) -#define SC0_BASE_ONE_MEMSYS (3 * KBASE_IPA_NR_BYTES_PER_BLOCK) -#define SC0_BASE_TWO_MEMSYS (4 * KBASE_IPA_NR_BYTES_PER_BLOCK) - -/* JM counter block offsets */ -#define JM_GPU_ACTIVE (KBASE_IPA_NR_BYTES_PER_CNT * 6) - -/* Tiler counter block offsets */ -#define TILER_ACTIVE (KBASE_IPA_NR_BYTES_PER_CNT * 45) - -/* MEMSYS counter block offsets */ -#define MEMSYS_L2_ANY_LOOKUP (KBASE_IPA_NR_BYTES_PER_CNT * 25) - -/* SC counter block offsets */ -#define SC_FRAG_ACTIVE (KBASE_IPA_NR_BYTES_PER_CNT * 4) -#define SC_EXEC_CORE_ACTIVE (KBASE_IPA_NR_BYTES_PER_CNT * 26) -#define SC_EXEC_INSTR_COUNT (KBASE_IPA_NR_BYTES_PER_CNT * 28) -#define SC_TEX_COORD_ISSUE (KBASE_IPA_NR_BYTES_PER_CNT * 40) -#define SC_VARY_SLOT_32 (KBASE_IPA_NR_BYTES_PER_CNT * 50) -#define SC_VARY_SLOT_16 (KBASE_IPA_NR_BYTES_PER_CNT * 51) -#define SC_BEATS_RD_LSC (KBASE_IPA_NR_BYTES_PER_CNT * 56) -#define SC_BEATS_WR_LSC (KBASE_IPA_NR_BYTES_PER_CNT * 61) -#define SC_BEATS_WR_TIB (KBASE_IPA_NR_BYTES_PER_CNT * 62) - -/** Maximum number of cores for which a single Memory System block of performance counters is present. */ -#define KBASE_G71_SINGLE_MEMSYS_MAX_NUM_CORES ((u8)4) - - -/** - * get_jm_counter() - get performance counter offset inside the Job Manager block - * @model_data: pointer to GPU model data. - * @counter_block_offset: offset in bytes of the performance counter inside the Job Manager block. - * - * Return: Block offset in bytes of the required performance counter. - */ -static u32 kbase_g71_power_model_get_jm_counter(struct kbase_ipa_model_vinstr_data *model_data, - u32 counter_block_offset) -{ - return JM_BASE + counter_block_offset; -} - -/** - * get_memsys_counter() - get peformance counter offset inside the Memory System block - * @model_data: pointer to GPU model data. - * @counter_block_offset: offset in bytes of the performance counter inside the (first) Memory System block. - * - * Return: Block offset in bytes of the required performance counter. - */ -static u32 kbase_g71_power_model_get_memsys_counter(struct kbase_ipa_model_vinstr_data *model_data, - u32 counter_block_offset) -{ - /* The base address of Memory System performance counters is always the same, although their number - * may vary based on the number of cores. For the moment it's ok to return a constant. - */ - return MEMSYS_BASE + counter_block_offset; -} - -/** - * get_sc_counter() - get performance counter offset inside the Shader Cores block - * @model_data: pointer to GPU model data. - * @counter_block_offset: offset in bytes of the performance counter inside the (first) Shader Cores block. - * - * Return: Block offset in bytes of the required performance counter. - */ -static u32 kbase_g71_power_model_get_sc_counter(struct kbase_ipa_model_vinstr_data *model_data, - u32 counter_block_offset) -{ - const u32 sc_base = model_data->kbdev->gpu_props.num_cores <= KBASE_G71_SINGLE_MEMSYS_MAX_NUM_CORES ? - SC0_BASE_ONE_MEMSYS : - SC0_BASE_TWO_MEMSYS; - - return sc_base + counter_block_offset; -} - -/** - * memsys_single_counter() - calculate energy for a single Memory System performance counter. - * @model_data: pointer to GPU model data. - * @coeff: default value of coefficient for IPA group. - * @counter_block_offset: offset in bytes of the counter inside the block it belongs to. - * - * Return: Energy estimation for a single Memory System performance counter. - */ -static s64 kbase_g71_memsys_single_counter( - struct kbase_ipa_model_vinstr_data *model_data, - s32 coeff, - u32 counter_block_offset) -{ - return kbase_ipa_single_counter(model_data, coeff, - kbase_g71_power_model_get_memsys_counter(model_data, counter_block_offset)); -} - -/** - * sum_all_shader_cores() - calculate energy for a Shader Cores performance counter for all cores. - * @model_data: pointer to GPU model data. - * @coeff: default value of coefficient for IPA group. - * @counter_block_offset: offset in bytes of the counter inside the block it belongs to. - * - * Return: Energy estimation for a Shader Cores performance counter for all cores. - */ -static s64 kbase_g71_sum_all_shader_cores( - struct kbase_ipa_model_vinstr_data *model_data, - s32 coeff, - u32 counter_block_offset) -{ - return kbase_ipa_sum_all_shader_cores(model_data, coeff, - kbase_g71_power_model_get_sc_counter(model_data, counter_block_offset)); -} - -/** - * jm_single_counter() - calculate energy for a single Job Manager performance counter. - * @model_data: pointer to GPU model data. - * @coeff: default value of coefficient for IPA group. - * @counter_block_offset: offset in bytes of the counter inside the block it belongs to. - * - * Return: Energy estimation for a single Job Manager performance counter. - */ -static s64 kbase_g71_jm_single_counter( - struct kbase_ipa_model_vinstr_data *model_data, - s32 coeff, - u32 counter_block_offset) -{ - return kbase_ipa_single_counter(model_data, coeff, - kbase_g71_power_model_get_jm_counter(model_data, counter_block_offset)); -} - -/** Table of IPA group definitions. - * - * For each IPA group, this table defines a function to access the given performance block counter (or counters, - * if the operation needs to be iterated on multiple blocks) and calculate energy estimation. - */ -static const struct kbase_ipa_group ipa_groups_def[] = { - { - .name = "l2_access", - .default_value = 526300, - .op = kbase_g71_memsys_single_counter, - .counter_block_offset = MEMSYS_L2_ANY_LOOKUP, - }, - { - .name = "exec_instr_count", - .default_value = 301100, - .op = kbase_g71_sum_all_shader_cores, - .counter_block_offset = SC_EXEC_INSTR_COUNT, - }, - { - .name = "tex_issue", - .default_value = 197400, - .op = kbase_g71_sum_all_shader_cores, - .counter_block_offset = SC_TEX_COORD_ISSUE, - }, - { - .name = "tile_wb", - .default_value = -156400, - .op = kbase_g71_sum_all_shader_cores, - .counter_block_offset = SC_BEATS_WR_TIB, - }, - { - .name = "gpu_active", - .default_value = 115800, - .op = kbase_g71_jm_single_counter, - .counter_block_offset = JM_GPU_ACTIVE, - }, -}; - -static int kbase_g71_power_model_init(struct kbase_ipa_model *model) -{ - int i, err = 0; - struct kbase_ipa_model_vinstr_data *model_data; - - model_data = kzalloc(sizeof(*model_data), GFP_KERNEL); - if (!model_data) - return -ENOMEM; - - model_data->kbdev = model->kbdev; - model_data->groups_def = ipa_groups_def; - BUILD_BUG_ON(ARRAY_SIZE(ipa_groups_def) > KBASE_IPA_MAX_GROUP_DEF_NUM); - model_data->groups_def_num = ARRAY_SIZE(ipa_groups_def); - - model->model_data = (void *) model_data; - - for (i = 0; i < ARRAY_SIZE(ipa_groups_def); ++i) { - const struct kbase_ipa_group *group = &ipa_groups_def[i]; - - model_data->group_values[i] = group->default_value; - err = kbase_ipa_model_add_param_s32(model, group->name, - &model_data->group_values[i], - 1, false); - if (err) - goto exit; - } - - model_data->scaling_factor = 5; - err = kbase_ipa_model_add_param_s32(model, "scale", - &model_data->scaling_factor, - 1, false); - if (err) - goto exit; - - err = kbase_ipa_attach_vinstr(model_data); - -exit: - if (err) { - kbase_ipa_model_param_free_all(model); - kfree(model_data); - } - return err; -} - -static void kbase_g71_power_model_term(struct kbase_ipa_model *model) -{ - struct kbase_ipa_model_vinstr_data *model_data = - (struct kbase_ipa_model_vinstr_data *)model->model_data; - - kbase_ipa_detach_vinstr(model_data); - kfree(model_data); -} - - -struct kbase_ipa_model_ops kbase_g71_ipa_model_ops = { - .name = "mali-g71-power-model", - .init = kbase_g71_power_model_init, - .term = kbase_g71_power_model_term, - .get_dynamic_coeff = kbase_ipa_vinstr_dynamic_coeff, - .do_utilization_scaling_in_framework = false, -}; -KBASE_EXPORT_TEST_API(kbase_g71_ipa_model_ops); diff --git a/drivers/gpu/arm/bifrost/ipa/mali_kbase_ipa_vinstr_g7x.c b/drivers/gpu/arm/bifrost/ipa/mali_kbase_ipa_vinstr_g7x.c new file mode 100644 index 000000000000..81137ea80454 --- /dev/null +++ b/drivers/gpu/arm/bifrost/ipa/mali_kbase_ipa_vinstr_g7x.c @@ -0,0 +1,293 @@ +/* + * + * (C) COPYRIGHT 2016-2018 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ +#include + +#include "mali_kbase_ipa_vinstr_common.h" +#include "mali_kbase.h" +#include "mali_kbase_ipa_debugfs.h" + + +/* Performance counter blocks base offsets */ +#define JM_BASE (0 * KBASE_IPA_NR_BYTES_PER_BLOCK) +#define TILER_BASE (1 * KBASE_IPA_NR_BYTES_PER_BLOCK) +#define MEMSYS_BASE (2 * KBASE_IPA_NR_BYTES_PER_BLOCK) +#define SC0_BASE_ONE_MEMSYS (3 * KBASE_IPA_NR_BYTES_PER_BLOCK) +#define SC0_BASE_TWO_MEMSYS (4 * KBASE_IPA_NR_BYTES_PER_BLOCK) + +/* JM counter block offsets */ +#define JM_GPU_ACTIVE (KBASE_IPA_NR_BYTES_PER_CNT * 6) + +/* Tiler counter block offsets */ +#define TILER_ACTIVE (KBASE_IPA_NR_BYTES_PER_CNT * 45) + +/* MEMSYS counter block offsets */ +#define MEMSYS_L2_ANY_LOOKUP (KBASE_IPA_NR_BYTES_PER_CNT * 25) + +/* SC counter block offsets */ +#define SC_FRAG_ACTIVE (KBASE_IPA_NR_BYTES_PER_CNT * 4) +#define SC_EXEC_CORE_ACTIVE (KBASE_IPA_NR_BYTES_PER_CNT * 26) +#define SC_EXEC_INSTR_COUNT (KBASE_IPA_NR_BYTES_PER_CNT * 28) +#define SC_TEX_COORD_ISSUE (KBASE_IPA_NR_BYTES_PER_CNT * 40) +#define SC_TEX_TFCH_NUM_OPERATIONS (KBASE_IPA_NR_BYTES_PER_CNT * 42) +#define SC_VARY_INSTR (KBASE_IPA_NR_BYTES_PER_CNT * 49) +#define SC_VARY_SLOT_32 (KBASE_IPA_NR_BYTES_PER_CNT * 50) +#define SC_VARY_SLOT_16 (KBASE_IPA_NR_BYTES_PER_CNT * 51) +#define SC_BEATS_RD_LSC (KBASE_IPA_NR_BYTES_PER_CNT * 56) +#define SC_BEATS_WR_LSC (KBASE_IPA_NR_BYTES_PER_CNT * 61) +#define SC_BEATS_WR_TIB (KBASE_IPA_NR_BYTES_PER_CNT * 62) + +/** Maximum number of cores for which a single Memory System block of performance counters is present. */ +#define KBASE_G7x_SINGLE_MEMSYS_MAX_NUM_CORES ((u8)4) + + +/** + * get_jm_counter() - get performance counter offset inside the Job Manager block + * @model_data: pointer to GPU model data. + * @counter_block_offset: offset in bytes of the performance counter inside the Job Manager block. + * + * Return: Block offset in bytes of the required performance counter. + */ +static u32 kbase_g7x_power_model_get_jm_counter(struct kbase_ipa_model_vinstr_data *model_data, + u32 counter_block_offset) +{ + return JM_BASE + counter_block_offset; +} + +/** + * get_memsys_counter() - get performance counter offset inside the Memory System block + * @model_data: pointer to GPU model data. + * @counter_block_offset: offset in bytes of the performance counter inside the (first) Memory System block. + * + * Return: Block offset in bytes of the required performance counter. + */ +static u32 kbase_g7x_power_model_get_memsys_counter(struct kbase_ipa_model_vinstr_data *model_data, + u32 counter_block_offset) +{ + /* The base address of Memory System performance counters is always the same, although their number + * may vary based on the number of cores. For the moment it's ok to return a constant. + */ + return MEMSYS_BASE + counter_block_offset; +} + +/** + * get_sc_counter() - get performance counter offset inside the Shader Cores block + * @model_data: pointer to GPU model data. + * @counter_block_offset: offset in bytes of the performance counter inside the (first) Shader Cores block. + * + * Return: Block offset in bytes of the required performance counter. + */ +static u32 kbase_g7x_power_model_get_sc_counter(struct kbase_ipa_model_vinstr_data *model_data, + u32 counter_block_offset) +{ + const u32 sc_base = model_data->kbdev->gpu_props.num_cores <= KBASE_G7x_SINGLE_MEMSYS_MAX_NUM_CORES ? + SC0_BASE_ONE_MEMSYS : + SC0_BASE_TWO_MEMSYS; + + return sc_base + counter_block_offset; +} + +/** + * memsys_single_counter() - calculate energy for a single Memory System performance counter. + * @model_data: pointer to GPU model data. + * @coeff: default value of coefficient for IPA group. + * @offset: offset in bytes of the counter inside the block it belongs to. + * + * Return: Energy estimation for a single Memory System performance counter. + */ +static s64 kbase_g7x_memsys_single_counter( + struct kbase_ipa_model_vinstr_data *model_data, + s32 coeff, + u32 offset) +{ + u32 counter; + + counter = kbase_g7x_power_model_get_memsys_counter(model_data, offset); + return kbase_ipa_single_counter(model_data, coeff, counter); +} + +/** + * sum_all_shader_cores() - calculate energy for a Shader Cores performance counter for all cores. + * @model_data: pointer to GPU model data. + * @coeff: default value of coefficient for IPA group. + * @counter_block_offset: offset in bytes of the counter inside the block it belongs to. + * + * Return: Energy estimation for a Shader Cores performance counter for all cores. + */ +static s64 kbase_g7x_sum_all_shader_cores( + struct kbase_ipa_model_vinstr_data *model_data, + s32 coeff, + u32 counter_block_offset) +{ + u32 counter; + + counter = kbase_g7x_power_model_get_sc_counter(model_data, + counter_block_offset); + return kbase_ipa_sum_all_shader_cores(model_data, coeff, counter); +} + +/** + * jm_single_counter() - calculate energy for a single Job Manager performance counter. + * @model_data: pointer to GPU model data. + * @coeff: default value of coefficient for IPA group. + * @counter_block_offset: offset in bytes of the counter inside the block it belongs to. + * + * Return: Energy estimation for a single Job Manager performance counter. + */ +static s64 kbase_g7x_jm_single_counter( + struct kbase_ipa_model_vinstr_data *model_data, + s32 coeff, + u32 counter_block_offset) +{ + u32 counter; + + counter = kbase_g7x_power_model_get_jm_counter(model_data, + counter_block_offset); + return kbase_ipa_single_counter(model_data, coeff, counter); +} + +/** Table of IPA group definitions. + * + * For each IPA group, this table defines a function to access the given performance block counter (or counters, + * if the operation needs to be iterated on multiple blocks) and calculate energy estimation. + */ + +static const struct kbase_ipa_group ipa_groups_def_g71[] = { + { + .name = "l2_access", + .default_value = 526300, + .op = kbase_g7x_memsys_single_counter, + .counter_block_offset = MEMSYS_L2_ANY_LOOKUP, + }, + { + .name = "exec_instr_count", + .default_value = 301100, + .op = kbase_g7x_sum_all_shader_cores, + .counter_block_offset = SC_EXEC_INSTR_COUNT, + }, + { + .name = "tex_issue", + .default_value = 197400, + .op = kbase_g7x_sum_all_shader_cores, + .counter_block_offset = SC_TEX_COORD_ISSUE, + }, + { + .name = "tile_wb", + .default_value = -156400, + .op = kbase_g7x_sum_all_shader_cores, + .counter_block_offset = SC_BEATS_WR_TIB, + }, + { + .name = "gpu_active", + .default_value = 115800, + .op = kbase_g7x_jm_single_counter, + .counter_block_offset = JM_GPU_ACTIVE, + }, +}; + +static const struct kbase_ipa_group ipa_groups_def_g72[] = { + { + .name = "l2_access", + .default_value = 393000, + .op = kbase_g7x_memsys_single_counter, + .counter_block_offset = MEMSYS_L2_ANY_LOOKUP, + }, + { + .name = "exec_instr_count", + .default_value = 227000, + .op = kbase_g7x_sum_all_shader_cores, + .counter_block_offset = SC_EXEC_INSTR_COUNT, + }, + { + .name = "tex_issue", + .default_value = 181900, + .op = kbase_g7x_sum_all_shader_cores, + .counter_block_offset = SC_TEX_COORD_ISSUE, + }, + { + .name = "tile_wb", + .default_value = -120200, + .op = kbase_g7x_sum_all_shader_cores, + .counter_block_offset = SC_BEATS_WR_TIB, + }, + { + .name = "gpu_active", + .default_value = 133100, + .op = kbase_g7x_jm_single_counter, + .counter_block_offset = JM_GPU_ACTIVE, + }, +}; + +static const struct kbase_ipa_group ipa_groups_def_tnox[] = { + { + .name = "gpu_active", + .default_value = 122000, + .op = kbase_g7x_jm_single_counter, + .counter_block_offset = JM_GPU_ACTIVE, + }, + { + .name = "exec_instr_count", + .default_value = 488900, + .op = kbase_g7x_sum_all_shader_cores, + .counter_block_offset = SC_EXEC_INSTR_COUNT, + }, + { + .name = "vary_instr", + .default_value = 212100, + .op = kbase_g7x_sum_all_shader_cores, + .counter_block_offset = SC_VARY_INSTR, + }, + { + .name = "tex_tfch_num_operations", + .default_value = 288000, + .op = kbase_g7x_sum_all_shader_cores, + .counter_block_offset = SC_TEX_TFCH_NUM_OPERATIONS, + }, + { + .name = "l2_access", + .default_value = 378100, + .op = kbase_g7x_memsys_single_counter, + .counter_block_offset = MEMSYS_L2_ANY_LOOKUP, + }, +}; + +#define STANDARD_POWER_MODEL(gpu) \ + static int kbase_ ## gpu ## _power_model_init(\ + struct kbase_ipa_model *model) \ + { \ + BUILD_BUG_ON(ARRAY_SIZE(ipa_groups_def_ ## gpu) > \ + KBASE_IPA_MAX_GROUP_DEF_NUM); \ + return kbase_ipa_vinstr_common_model_init(model, \ + ipa_groups_def_ ## gpu, \ + ARRAY_SIZE(ipa_groups_def_ ## gpu)); \ + } \ + struct kbase_ipa_model_ops kbase_ ## gpu ## _ipa_model_ops = { \ + .name = "mali-" #gpu "-power-model", \ + .init = kbase_ ## gpu ## _power_model_init, \ + .term = kbase_ipa_vinstr_common_model_term, \ + .get_dynamic_coeff = kbase_ipa_vinstr_dynamic_coeff, \ + .do_utilization_scaling_in_framework = false \ + }; \ + KBASE_EXPORT_TEST_API(kbase_ ## gpu ## _ipa_model_ops) + +STANDARD_POWER_MODEL(g71); +STANDARD_POWER_MODEL(g72); +STANDARD_POWER_MODEL(tnox); diff --git a/drivers/gpu/arm/bifrost/mali_base_hwconfig_features.h b/drivers/gpu/arm/bifrost/mali_base_hwconfig_features.h index e0eebd872eb9..10da0c58e9eb 100644 --- a/drivers/gpu/arm/bifrost/mali_base_hwconfig_features.h +++ b/drivers/gpu/arm/bifrost/mali_base_hwconfig_features.h @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2015-2017 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2018 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -31,7 +31,6 @@ enum base_hw_feature { BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION, BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS, - BASE_HW_FEATURE_33BIT_VA, BASE_HW_FEATURE_XAFFINITY, BASE_HW_FEATURE_OUT_OF_ORDER_EXEC, BASE_HW_FEATURE_MRT, @@ -85,7 +84,6 @@ static const enum base_hw_feature base_hw_features_t62x[] = { }; static const enum base_hw_feature base_hw_features_t72x[] = { - BASE_HW_FEATURE_33BIT_VA, BASE_HW_FEATURE_32_BIT_UNIFORM_ADDRESS, BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL, BASE_HW_FEATURE_INTERPIPE_REG_ALIASING, @@ -139,7 +137,6 @@ static const enum base_hw_feature base_hw_features_tFxx[] = { }; static const enum base_hw_feature base_hw_features_t83x[] = { - BASE_HW_FEATURE_33BIT_VA, BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION, BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS, BASE_HW_FEATURE_XAFFINITY, @@ -162,7 +159,6 @@ static const enum base_hw_feature base_hw_features_t83x[] = { }; static const enum base_hw_feature base_hw_features_t82x[] = { - BASE_HW_FEATURE_33BIT_VA, BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION, BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS, BASE_HW_FEATURE_XAFFINITY, @@ -238,7 +234,6 @@ static const enum base_hw_feature base_hw_features_tHEx[] = { }; static const enum base_hw_feature base_hw_features_tSIx[] = { - BASE_HW_FEATURE_33BIT_VA, BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION, BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS, BASE_HW_FEATURE_XAFFINITY, @@ -266,7 +261,6 @@ static const enum base_hw_feature base_hw_features_tSIx[] = { }; static const enum base_hw_feature base_hw_features_tDVx[] = { - BASE_HW_FEATURE_33BIT_VA, BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION, BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS, BASE_HW_FEATURE_XAFFINITY, diff --git a/drivers/gpu/arm/bifrost/mali_base_hwconfig_issues.h b/drivers/gpu/arm/bifrost/mali_base_hwconfig_issues.h index 7b70e7a82b6f..d3cfdbee5921 100644 --- a/drivers/gpu/arm/bifrost/mali_base_hwconfig_issues.h +++ b/drivers/gpu/arm/bifrost/mali_base_hwconfig_issues.h @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2015-2017 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2018 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -1141,6 +1141,13 @@ static const enum base_hw_issue base_hw_issues_tGOx_r0p0[] = { BASE_HW_ISSUE_END }; +static const enum base_hw_issue base_hw_issues_tGOx_r1p0[] = { + BASE_HW_ISSUE_9435, + BASE_HW_ISSUE_TMIX_8133, + BASE_HW_ISSUE_TSIX_1116, + BASE_HW_ISSUE_END +}; + static const enum base_hw_issue base_hw_issues_model_tGOx[] = { BASE_HW_ISSUE_5736, BASE_HW_ISSUE_9435, diff --git a/drivers/gpu/arm/bifrost/mali_base_kernel.h b/drivers/gpu/arm/bifrost/mali_base_kernel.h index e6b568fba520..f0ccd789e81c 100644 --- a/drivers/gpu/arm/bifrost/mali_base_kernel.h +++ b/drivers/gpu/arm/bifrost/mali_base_kernel.h @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2010-2017 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010-2018 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -317,8 +317,10 @@ struct base_mem_import_user_buffer { * * This is the same as the maximum limit for a Buffer Descriptor's chunk size */ +#define BASE_MEM_TILER_ALIGN_TOP_EXTENT_MAX_PAGES_LOG2 \ + (21u - (LOCAL_PAGE_SHIFT)) #define BASE_MEM_TILER_ALIGN_TOP_EXTENT_MAX_PAGES \ - ((2ull * 1024ull * 1024ull) >> (LOCAL_PAGE_SHIFT)) + (1ull << (BASE_MEM_TILER_ALIGN_TOP_EXTENT_MAX_PAGES_LOG2)) /* Bit mask of cookies used for for memory allocation setup */ #define KBASE_COOKIE_MASK ~1UL /* bit 0 is reserved */ @@ -433,6 +435,13 @@ struct base_mem_aliasing_info { u64 length; }; +/** + * Similar to BASE_MEM_TILER_ALIGN_TOP, memory starting from the end of the + * initial commit is aligned to 'extent' pages, where 'extent' must be a power + * of 2 and no more than BASE_MEM_TILER_ALIGN_TOP_EXTENT_MAX_PAGES + */ +#define BASE_JIT_ALLOC_MEM_TILER_ALIGN_TOP (1 << 0) + /** * struct base_jit_alloc_info - Structure which describes a JIT allocation * request. @@ -446,6 +455,18 @@ struct base_mem_aliasing_info { * @id: Unique ID provided by the caller, this is used * to pair allocation and free requests. * Zero is not a valid value. + * @bin_id: The JIT allocation bin, used in conjunction with + * @max_allocations to limit the number of each + * type of JIT allocation. + * @max_allocations: The maximum number of allocations allowed within + * the bin specified by @bin_id. Should be the same + * for all JIT allocations within the same bin. + * @flags: flags specifying the special requirements for + * the JIT allocation. + * @padding: Expansion space - should be initialised to zero + * @usage_id: A hint about which allocation should be reused. + * The kernel should attempt to use a previous + * allocation with the same usage_id */ struct base_jit_alloc_info { u64 gpu_alloc_addr; @@ -453,6 +474,11 @@ struct base_jit_alloc_info { u64 commit_pages; u64 extent; u8 id; + u8 bin_id; + u8 max_allocations; + u8 flags; + u8 padding[2]; + u16 usage_id; }; /** @@ -1412,6 +1438,11 @@ struct mali_base_gpu_core_props { * client will not be expecting to allocate anywhere near this value. */ u64 gpu_available_memory_size; + + /** + * The number of execution engines. + */ + u8 num_exec_engines; }; /** @@ -1442,7 +1473,10 @@ struct mali_base_gpu_thread_props { u8 max_task_queue; /* Max. tasks [1..255] which may be sent to a core before it becomes blocked. */ u8 max_thread_group_split; /* Max. allowed value [1..15] of the Thread Group Split field. */ u8 impl_tech; /* 0 = Not specified, 1 = Silicon, 2 = FPGA, 3 = SW Model/Emulation */ - u8 padding[7]; + u8 padding[3]; + u32 tls_alloc; /* Number of threads per core that TLS must + * be allocated for + */ }; /** @@ -1524,7 +1558,7 @@ struct gpu_raw_gpu_props { u64 stack_present; u32 l2_features; - u32 suspend_size; /* API 8.2+ */ + u32 core_features; u32 mem_features; u32 mmu_features; @@ -1547,6 +1581,8 @@ struct gpu_raw_gpu_props { * available modes as exposed in the coherency_features register. */ u32 coherency_mode; + + u32 thread_tls_alloc; }; /** @@ -1582,36 +1618,40 @@ typedef struct base_gpu_props { */ /** - * \enum base_context_create_flags - * * Flags to pass to ::base_context_init. * Flags can be ORed together to enable multiple things. * * These share the same space as BASEP_CONTEXT_FLAG_*, and so must * not collide with them. */ -enum base_context_create_flags { - /** No flags set */ - BASE_CONTEXT_CREATE_FLAG_NONE = 0, +typedef u32 base_context_create_flags; - /** Base context is embedded in a cctx object (flag used for CINSTR software counter macros) */ - BASE_CONTEXT_CCTX_EMBEDDED = (1u << 0), +/** No flags set */ +#define BASE_CONTEXT_CREATE_FLAG_NONE ((base_context_create_flags)0) - /** Base context is a 'System Monitor' context for Hardware counters. - * - * One important side effect of this is that job submission is disabled. */ - BASE_CONTEXT_SYSTEM_MONITOR_SUBMIT_DISABLED = (1u << 1) -}; +/** Base context is embedded in a cctx object (flag used for CINSTR + * software counter macros) + */ +#define BASE_CONTEXT_CCTX_EMBEDDED ((base_context_create_flags)1 << 0) + +/** Base context is a 'System Monitor' context for Hardware counters. + * + * One important side effect of this is that job submission is disabled. + */ +#define BASE_CONTEXT_SYSTEM_MONITOR_SUBMIT_DISABLED \ + ((base_context_create_flags)1 << 1) /** - * Bitpattern describing the ::base_context_create_flags that can be passed to base_context_init() + * Bitpattern describing the ::base_context_create_flags that can be + * passed to base_context_init() */ #define BASE_CONTEXT_CREATE_ALLOWED_FLAGS \ (((u32)BASE_CONTEXT_CCTX_EMBEDDED) | \ ((u32)BASE_CONTEXT_SYSTEM_MONITOR_SUBMIT_DISABLED)) /** - * Bitpattern describing the ::base_context_create_flags that can be passed to the kernel + * Bitpattern describing the ::base_context_create_flags that can be + * passed to the kernel */ #define BASE_CONTEXT_CREATE_KERNEL_FLAGS \ ((u32)BASE_CONTEXT_SYSTEM_MONITOR_SUBMIT_DISABLED) diff --git a/drivers/gpu/arm/bifrost/mali_kbase.h b/drivers/gpu/arm/bifrost/mali_kbase.h index f9cc148c9969..35ce1b9e06a0 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase.h +++ b/drivers/gpu/arm/bifrost/mali_kbase.h @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2010-2017 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010-2018 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -61,8 +61,8 @@ #include "mali_kbase_mem_lowlevel.h" #include "mali_kbase_trace_timeline.h" #include "mali_kbase_js.h" -#include "mali_kbase_mem.h" #include "mali_kbase_utility.h" +#include "mali_kbase_mem.h" #include "mali_kbase_gpu_memory_debugfs.h" #include "mali_kbase_mem_profile_debugfs.h" #include "mali_kbase_debug_job_fault.h" @@ -111,10 +111,6 @@ void kbase_release_device(struct kbase_device *kbdev); void kbase_set_profiling_control(struct kbase_device *kbdev, u32 control, u32 value); -struct kbase_context * -kbase_create_context(struct kbase_device *kbdev, bool is_compat); -void kbase_destroy_context(struct kbase_context *kctx); - /** * kbase_get_unmapped_area() - get an address range which is currently @@ -258,7 +254,7 @@ int kbase_soft_event_update(struct kbase_context *kctx, bool kbase_replay_process(struct kbase_jd_atom *katom); -void kbasep_soft_job_timeout_worker(unsigned long data); +void kbasep_soft_job_timeout_worker(struct timer_list *timer); void kbasep_complete_triggered_soft_events(struct kbase_context *kctx, u64 evt); /* api used internally for register access. Contains validation and tracing */ diff --git a/drivers/gpu/arm/bifrost/mali_kbase_10969_workaround.c b/drivers/gpu/arm/bifrost/mali_kbase_10969_workaround.c index e0e40a9292e8..f3e71d1a40d0 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_10969_workaround.c +++ b/drivers/gpu/arm/bifrost/mali_kbase_10969_workaround.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2013-2015,2017 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2013-2015,2017-2018 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -23,11 +23,6 @@ #include #include -/* This function is used to solve an HW issue with single iterator GPUs. - * If a fragment job is soft-stopped on the edge of its bounding box, can happen that the - * restart index is out of bounds and the rerun causes a tile range fault. If this happens - * we try to clamp the restart index to a correct value and rerun the job. - */ /* Mask of X and Y coordinates for the coordinates words in the descriptors*/ #define X_COORDINATE_MASK 0x00000FFF #define Y_COORDINATE_MASK 0x0FFF0000 diff --git a/drivers/gpu/arm/bifrost/mali_kbase_10969_workaround.h b/drivers/gpu/arm/bifrost/mali_kbase_10969_workaround.h index 624dc4a86b52..379a05a1a128 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_10969_workaround.h +++ b/drivers/gpu/arm/bifrost/mali_kbase_10969_workaround.h @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2013-2014 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2013-2014, 2018 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -23,6 +23,15 @@ #ifndef _KBASE_10969_WORKAROUND_ #define _KBASE_10969_WORKAROUND_ +/** + * kbasep_10969_workaround_clamp_coordinates - Apply the WA to clamp the restart indices + * @katom: atom representing the fragment job for which the WA has to be applied + * + * This workaround is used to solve an HW issue with single iterator GPUs. + * If a fragment job is soft-stopped on the edge of its bounding box, it can happen + * that the restart index is out of bounds and the rerun causes a tile range + * fault. If this happens we try to clamp the restart index to a correct value. + */ int kbasep_10969_workaround_clamp_coordinates(struct kbase_jd_atom *katom); #endif /* _KBASE_10969_WORKAROUND_ */ diff --git a/drivers/gpu/arm/bifrost/mali_kbase_as_fault_debugfs.c b/drivers/gpu/arm/bifrost/mali_kbase_as_fault_debugfs.c index f57eb7c25492..44aa237a7497 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_as_fault_debugfs.c +++ b/drivers/gpu/arm/bifrost/mali_kbase_as_fault_debugfs.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2016 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2016-2017 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -41,13 +41,14 @@ static int kbase_as_fault_read(struct seq_file *sfile, void *data) list_for_each(entry, kbdev_list) { kbdev = list_entry(entry, struct kbase_device, entry); - if(kbdev->debugfs_as_read_bitmap & (1ULL << as_no)) { + if (kbdev->debugfs_as_read_bitmap & (1ULL << as_no)) { /* don't show this one again until another fault occors */ kbdev->debugfs_as_read_bitmap &= ~(1ULL << as_no); /* output the last page fault addr */ - seq_printf(sfile, "%llu\n", (u64) kbdev->as[as_no].fault_addr); + seq_printf(sfile, "%llu\n", + (u64) kbdev->as[as_no].fault_addr); } } @@ -59,7 +60,7 @@ static int kbase_as_fault_read(struct seq_file *sfile, void *data) static int kbase_as_fault_debugfs_open(struct inode *in, struct file *file) { - return single_open(file, kbase_as_fault_read , in->i_private); + return single_open(file, kbase_as_fault_read, in->i_private); } static const struct file_operations as_fault_fops = { @@ -89,17 +90,20 @@ void kbase_as_fault_debugfs_init(struct kbase_device *kbdev) KBASE_DEBUG_ASSERT(sizeof(kbdev->as[0].fault_addr) == sizeof(u64)); debugfs_directory = debugfs_create_dir("address_spaces", - kbdev->mali_debugfs_directory); + kbdev->mali_debugfs_directory); - if(debugfs_directory) { - for(i = 0; i < kbdev->nr_hw_address_spaces; i++) { + if (debugfs_directory) { + for (i = 0; i < kbdev->nr_hw_address_spaces; i++) { snprintf(as_name, ARRAY_SIZE(as_name), "as%u", i); debugfs_create_file(as_name, S_IRUGO, - debugfs_directory, (void*) ((uintptr_t) i), &as_fault_fops); + debugfs_directory, + (void *)(uintptr_t)i, + &as_fault_fops); } + } else { + dev_warn(kbdev->dev, + "unable to create address_spaces debugfs directory"); } - else - dev_warn(kbdev->dev, "unable to create address_spaces debugfs directory"); #endif /* CONFIG_MALI_BIFROST_DEBUG */ #endif /* CONFIG_DEBUG_FS */ diff --git a/drivers/gpu/arm/bifrost/mali_kbase_config_defaults.h b/drivers/gpu/arm/bifrost/mali_kbase_config_defaults.h index dbb4f977bb33..a95736c54a29 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_config_defaults.h +++ b/drivers/gpu/arm/bifrost/mali_kbase_config_defaults.h @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2013-2017 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2013-2018 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -156,12 +156,12 @@ enum { */ #define DEFAULT_UMP_GPU_DEVICE_SHIFT UMP_DEVICE_Z_SHIFT -/* +/** * Default period for DVFS sampling */ #define DEFAULT_PM_DVFS_PERIOD 100 /* 100ms */ -/* +/** * Power Management poweroff tick granuality. This is in nanoseconds to * allow HR timer support. * @@ -171,22 +171,22 @@ enum { */ #define DEFAULT_PM_GPU_POWEROFF_TICK_NS (400000) /* 400us */ -/* +/** * Power Manager number of ticks before shader cores are powered off */ #define DEFAULT_PM_POWEROFF_TICK_SHADER (2) /* 400-800us */ -/* +/** * Power Manager number of ticks before GPU is powered off */ #define DEFAULT_PM_POWEROFF_TICK_GPU (2) /* 400-800us */ -/* +/** * Default scheduling tick granuality */ #define DEFAULT_JS_SCHEDULING_PERIOD_NS (100000000u) /* 100ms */ -/* +/** * Default minimum number of scheduling ticks before jobs are soft-stopped. * * This defines the time-slice for a job (which may be different from that of a @@ -194,60 +194,60 @@ enum { */ #define DEFAULT_JS_SOFT_STOP_TICKS (1) /* 100ms-200ms */ -/* +/** * Default minimum number of scheduling ticks before CL jobs are soft-stopped. */ #define DEFAULT_JS_SOFT_STOP_TICKS_CL (1) /* 100ms-200ms */ -/* +/** * Default minimum number of scheduling ticks before jobs are hard-stopped */ #define DEFAULT_JS_HARD_STOP_TICKS_SS (50) /* 5s */ #define DEFAULT_JS_HARD_STOP_TICKS_SS_8408 (300) /* 30s */ -/* +/** * Default minimum number of scheduling ticks before CL jobs are hard-stopped. */ #define DEFAULT_JS_HARD_STOP_TICKS_CL (50) /* 5s */ -/* +/** * Default minimum number of scheduling ticks before jobs are hard-stopped * during dumping */ #define DEFAULT_JS_HARD_STOP_TICKS_DUMPING (15000) /* 1500s */ -/* +/** * Default timeout for some software jobs, after which the software event wait * jobs will be cancelled. */ #define DEFAULT_JS_SOFT_JOB_TIMEOUT (3000) /* 3s */ -/* +/** * Default minimum number of scheduling ticks before the GPU is reset to clear a * "stuck" job */ #define DEFAULT_JS_RESET_TICKS_SS (55) /* 5.5s */ #define DEFAULT_JS_RESET_TICKS_SS_8408 (450) /* 45s */ -/* +/** * Default minimum number of scheduling ticks before the GPU is reset to clear a * "stuck" CL job. */ #define DEFAULT_JS_RESET_TICKS_CL (55) /* 5.5s */ -/* +/** * Default minimum number of scheduling ticks before the GPU is reset to clear a * "stuck" job during dumping. */ #define DEFAULT_JS_RESET_TICKS_DUMPING (15020) /* 1502s */ -/* +/** * Default number of milliseconds given for other jobs on the GPU to be * soft-stopped when the GPU needs to be reset. */ #define DEFAULT_RESET_TIMEOUT_MS (3000) /* 3s */ -/* +/** * Default timeslice that a context is scheduled in for, in nanoseconds. * * When a context has used up this amount of time across its jobs, it is @@ -258,7 +258,7 @@ enum { */ #define DEFAULT_JS_CTX_TIMESLICE_NS (50000000) /* 50ms */ -/* +/** * Perform GPU power down using only platform specific code, skipping DDK power * management. * @@ -272,7 +272,7 @@ enum { */ #define PLATFORM_POWER_DOWN_ONLY (0) -/* +/** * Maximum frequency (in kHz) that the GPU can be clocked. For some platforms * this isn't available, so we simply define a dummy value here. If devfreq * is enabled the value will be read from there, otherwise this should be diff --git a/drivers/gpu/arm/bifrost/mali_kbase_context.c b/drivers/gpu/arm/bifrost/mali_kbase_context.c index d873f9feea30..e9de7239b1e1 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_context.c +++ b/drivers/gpu/arm/bifrost/mali_kbase_context.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2010-2017 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010-2018 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -32,15 +32,6 @@ #include #include -/** - * kbase_create_context() - Create a kernel base context. - * @kbdev: Kbase device - * @is_compat: Force creation of a 32-bit context - * - * Allocate and init a kernel base context. - * - * Return: new kbase context - */ struct kbase_context * kbase_create_context(struct kbase_device *kbdev, bool is_compat) { @@ -175,9 +166,8 @@ kbase_create_context(struct kbase_device *kbdev, bool is_compat) mutex_init(&kctx->vinstr_cli_lock); - setup_timer(&kctx->soft_job_timeout, - kbasep_soft_job_timeout_worker, - (uintptr_t)kctx); + kbase_timer_setup(&kctx->soft_job_timeout, + kbasep_soft_job_timeout_worker); return kctx; @@ -225,13 +215,6 @@ static void kbase_reg_pending_dtor(struct kbase_va_region *reg) kfree(reg); } -/** - * kbase_destroy_context - Destroy a kernel base context. - * @kctx: Context to destroy - * - * Calls kbase_destroy_os_context() to free OS specific structures. - * Will release all outstanding regions. - */ void kbase_destroy_context(struct kbase_context *kctx) { struct kbase_device *kbdev; @@ -252,6 +235,8 @@ void kbase_destroy_context(struct kbase_context *kctx) * thread. */ kbase_pm_context_active(kbdev); + kbase_mem_pool_mark_dying(&kctx->mem_pool); + kbase_jd_zap_context(kctx); #ifdef CONFIG_DEBUG_FS @@ -328,13 +313,6 @@ void kbase_destroy_context(struct kbase_context *kctx) } KBASE_EXPORT_SYMBOL(kbase_destroy_context); -/** - * kbase_context_set_create_flags - Set creation flags on a context - * @kctx: Kbase context - * @flags: Flags to set - * - * Return: 0 on success - */ int kbase_context_set_create_flags(struct kbase_context *kctx, u32 flags) { int err = 0; diff --git a/drivers/gpu/arm/bifrost/mali_kbase_context.h b/drivers/gpu/arm/bifrost/mali_kbase_context.h index 431f9e5aa6de..30b0f649806b 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_context.h +++ b/drivers/gpu/arm/bifrost/mali_kbase_context.h @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2011-2016 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2011-2016, 2018 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -25,7 +25,35 @@ #include +/** + * kbase_create_context() - Create a kernel base context. + * @kbdev: Kbase device + * @is_compat: Force creation of a 32-bit context + * + * Allocate and init a kernel base context. + * + * Return: new kbase context + */ +struct kbase_context * +kbase_create_context(struct kbase_device *kbdev, bool is_compat); +/** + * kbase_destroy_context - Destroy a kernel base context. + * @kctx: Context to destroy + * + * Calls kbase_destroy_os_context() to free OS specific structures. + * Will release all outstanding regions. + */ +void kbase_destroy_context(struct kbase_context *kctx); + +/** + * kbase_context_set_create_flags - Set creation flags on a context + * @kctx: Kbase context + * @flags: Flags to set, which shall be one of the flags of + * BASE_CONTEXT_CREATE_KERNEL_FLAGS. + * + * Return: 0 on success, -EINVAL otherwise when an invalid flag is specified. + */ int kbase_context_set_create_flags(struct kbase_context *kctx, u32 flags); /** diff --git a/drivers/gpu/arm/bifrost/mali_kbase_core_linux.c b/drivers/gpu/arm/bifrost/mali_kbase_core_linux.c index a8a4c98669dc..21fd399f58da 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_core_linux.c +++ b/drivers/gpu/arm/bifrost/mali_kbase_core_linux.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2010-2017 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010-2018 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -34,6 +34,7 @@ #endif /* CONFIG_MALI_BIFROST_DEVFREQ */ #ifdef CONFIG_MALI_BIFROST_NO_MALI #include "mali_kbase_model_linux.h" +#include #endif /* CONFIG_MALI_BIFROST_NO_MALI */ #include "mali_kbase_mem_profile_debugfs_buf_size.h" #include "mali_kbase_debug_mem_view.h" @@ -82,7 +83,7 @@ #include -#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 13, 0)) +#if (KERNEL_VERSION(3, 13, 0) <= LINUX_VERSION_CODE) #include #include #else @@ -106,19 +107,20 @@ static LIST_HEAD(kbase_dev_list); #define KERNEL_SIDE_DDK_VERSION_STRING "K:" MALI_RELEASE_NAME "(GPL)" static int kbase_api_handshake(struct kbase_context *kctx, - struct kbase_ioctl_version_check *version) + struct kbase_ioctl_version_check *version) { switch (version->major) { case BASE_UK_VERSION_MAJOR: /* set minor to be the lowest common */ version->minor = min_t(int, BASE_UK_VERSION_MINOR, - (int)version->minor); + (int)version->minor); break; default: /* We return our actual version regardless if it * matches the version returned by userspace - * userspace can bail if it can't handle this - * version */ + * version + */ version->major = BASE_UK_VERSION_MAJOR; version->minor = BASE_UK_VERSION_MINOR; break; @@ -494,11 +496,11 @@ static int kbase_release(struct inode *inode, struct file *filp) /* If this client was performing hwcnt dumping and did not explicitly * detach itself, remove it from the vinstr core now */ if (kctx->vinstr_cli) { - struct kbase_uk_hwcnt_setup setup; + struct kbase_ioctl_hwcnt_enable enable; - setup.dump_buffer = 0llu; + enable.dump_buffer = 0llu; kbase_vinstr_legacy_hwc_setup( - kbdev->vinstr_ctx, &kctx->vinstr_cli, &setup); + kbdev->vinstr_ctx, &kctx->vinstr_cli, &enable); } mutex_unlock(&kctx->vinstr_cli_lock); @@ -611,38 +613,22 @@ static int kbase_api_hwcnt_reader_setup(struct kbase_context *kctx, struct kbase_ioctl_hwcnt_reader_setup *setup) { int ret; - struct kbase_uk_hwcnt_reader_setup args = { - .buffer_count = setup->buffer_count, - .jm_bm = setup->jm_bm, - .shader_bm = setup->shader_bm, - .tiler_bm = setup->tiler_bm, - .mmu_l2_bm = setup->mmu_l2_bm - }; mutex_lock(&kctx->vinstr_cli_lock); - ret = kbase_vinstr_hwcnt_reader_setup(kctx->kbdev->vinstr_ctx, &args); + ret = kbase_vinstr_hwcnt_reader_setup(kctx->kbdev->vinstr_ctx, setup); mutex_unlock(&kctx->vinstr_cli_lock); - if (ret) - return ret; - return args.fd; + return ret; } static int kbase_api_hwcnt_enable(struct kbase_context *kctx, struct kbase_ioctl_hwcnt_enable *enable) { int ret; - struct kbase_uk_hwcnt_setup args = { - .dump_buffer = enable->dump_buffer, - .jm_bm = enable->jm_bm, - .shader_bm = enable->shader_bm, - .tiler_bm = enable->tiler_bm, - .mmu_l2_bm = enable->mmu_l2_bm - }; mutex_lock(&kctx->vinstr_cli_lock); ret = kbase_vinstr_legacy_hwc_setup(kctx->kbdev->vinstr_ctx, - &kctx->vinstr_cli, &args); + &kctx->vinstr_cli, enable); mutex_unlock(&kctx->vinstr_cli_lock); return ret; @@ -671,6 +657,18 @@ static int kbase_api_hwcnt_clear(struct kbase_context *kctx) return ret; } +#ifdef CONFIG_MALI_BIFROST_NO_MALI +static int kbase_api_hwcnt_set(struct kbase_context *kctx, + struct kbase_ioctl_hwcnt_values *values) +{ + gpu_model_set_dummy_prfcnt_sample( + (u32 __user *)(uintptr_t)values->data, + values->size); + + return 0; +} +#endif + static int kbase_api_disjoint_query(struct kbase_context *kctx, struct kbase_ioctl_disjoint_query *query) { @@ -701,10 +699,37 @@ static int kbase_api_get_ddk_version(struct kbase_context *kctx, return len; } +/* Defaults for legacy JIT init ioctl */ +#define DEFAULT_MAX_JIT_ALLOCATIONS 255 +#define JIT_LEGACY_TRIM_LEVEL (0) /* No trimming */ + +static int kbase_api_mem_jit_init_old(struct kbase_context *kctx, + struct kbase_ioctl_mem_jit_init_old *jit_init) +{ + kctx->jit_version = 1; + + return kbase_region_tracker_init_jit(kctx, jit_init->va_pages, + DEFAULT_MAX_JIT_ALLOCATIONS, + JIT_LEGACY_TRIM_LEVEL); +} + static int kbase_api_mem_jit_init(struct kbase_context *kctx, struct kbase_ioctl_mem_jit_init *jit_init) { - return kbase_region_tracker_init_jit(kctx, jit_init->va_pages); + int i; + + kctx->jit_version = 2; + + for (i = 0; i < sizeof(jit_init->padding); i++) { + /* Ensure all padding bytes are 0 for potential future + * extension + */ + if (jit_init->padding[i]) + return -EINVAL; + } + + return kbase_region_tracker_init_jit(kctx, jit_init->va_pages, + jit_init->max_allocations, jit_init->trim_level); } static int kbase_api_mem_sync(struct kbase_context *kctx, @@ -1012,14 +1037,12 @@ static int kbase_api_tlstream_stats(struct kbase_context *kctx, #endif /* MALI_UNIT_TEST */ #define KBASE_HANDLE_IOCTL(cmd, function) \ - case cmd: \ do { \ BUILD_BUG_ON(_IOC_DIR(cmd) != _IOC_NONE); \ return function(kctx); \ } while (0) #define KBASE_HANDLE_IOCTL_IN(cmd, function, type) \ - case cmd: \ do { \ type param; \ int err; \ @@ -1032,7 +1055,6 @@ static int kbase_api_tlstream_stats(struct kbase_context *kctx, } while (0) #define KBASE_HANDLE_IOCTL_OUT(cmd, function, type) \ - case cmd: \ do { \ type param; \ int ret, err; \ @@ -1046,7 +1068,6 @@ static int kbase_api_tlstream_stats(struct kbase_context *kctx, } while (0) #define KBASE_HANDLE_IOCTL_INOUT(cmd, function, type) \ - case cmd: \ do { \ type param; \ int ret, err; \ @@ -1070,12 +1091,17 @@ static long kbase_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) /* Only these ioctls are available until setup is complete */ switch (cmd) { + case KBASE_IOCTL_VERSION_CHECK: KBASE_HANDLE_IOCTL_INOUT(KBASE_IOCTL_VERSION_CHECK, kbase_api_handshake, struct kbase_ioctl_version_check); + break; + + case KBASE_IOCTL_SET_FLAGS: KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_SET_FLAGS, kbase_api_set_flags, struct kbase_ioctl_set_flags); + break; } /* Block call until version handshake and setup is complete */ @@ -1084,109 +1110,192 @@ static long kbase_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) /* Normal ioctls */ switch (cmd) { + case KBASE_IOCTL_JOB_SUBMIT: KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_JOB_SUBMIT, kbase_api_job_submit, struct kbase_ioctl_job_submit); + break; + case KBASE_IOCTL_GET_GPUPROPS: KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_GET_GPUPROPS, kbase_api_get_gpuprops, struct kbase_ioctl_get_gpuprops); + break; + case KBASE_IOCTL_POST_TERM: KBASE_HANDLE_IOCTL(KBASE_IOCTL_POST_TERM, kbase_api_post_term); + break; + case KBASE_IOCTL_MEM_ALLOC: KBASE_HANDLE_IOCTL_INOUT(KBASE_IOCTL_MEM_ALLOC, kbase_api_mem_alloc, union kbase_ioctl_mem_alloc); + break; + case KBASE_IOCTL_MEM_QUERY: KBASE_HANDLE_IOCTL_INOUT(KBASE_IOCTL_MEM_QUERY, kbase_api_mem_query, union kbase_ioctl_mem_query); + break; + case KBASE_IOCTL_MEM_FREE: KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_MEM_FREE, kbase_api_mem_free, struct kbase_ioctl_mem_free); - KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_HWCNT_READER_SETUP, - kbase_api_hwcnt_reader_setup, - struct kbase_ioctl_hwcnt_reader_setup); - KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_HWCNT_ENABLE, - kbase_api_hwcnt_enable, - struct kbase_ioctl_hwcnt_enable); - KBASE_HANDLE_IOCTL(KBASE_IOCTL_HWCNT_DUMP, - kbase_api_hwcnt_dump); - KBASE_HANDLE_IOCTL(KBASE_IOCTL_HWCNT_CLEAR, - kbase_api_hwcnt_clear); + break; + case KBASE_IOCTL_DISJOINT_QUERY: KBASE_HANDLE_IOCTL_OUT(KBASE_IOCTL_DISJOINT_QUERY, kbase_api_disjoint_query, struct kbase_ioctl_disjoint_query); + break; + case KBASE_IOCTL_GET_DDK_VERSION: KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_GET_DDK_VERSION, kbase_api_get_ddk_version, struct kbase_ioctl_get_ddk_version); + break; + case KBASE_IOCTL_MEM_JIT_INIT_OLD: + KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_MEM_JIT_INIT_OLD, + kbase_api_mem_jit_init_old, + struct kbase_ioctl_mem_jit_init_old); + break; + case KBASE_IOCTL_MEM_JIT_INIT: KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_MEM_JIT_INIT, kbase_api_mem_jit_init, struct kbase_ioctl_mem_jit_init); + break; + case KBASE_IOCTL_MEM_SYNC: KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_MEM_SYNC, kbase_api_mem_sync, struct kbase_ioctl_mem_sync); + break; + case KBASE_IOCTL_MEM_FIND_CPU_OFFSET: KBASE_HANDLE_IOCTL_INOUT(KBASE_IOCTL_MEM_FIND_CPU_OFFSET, kbase_api_mem_find_cpu_offset, union kbase_ioctl_mem_find_cpu_offset); + break; + case KBASE_IOCTL_MEM_FIND_GPU_START_AND_OFFSET: KBASE_HANDLE_IOCTL_INOUT(KBASE_IOCTL_MEM_FIND_GPU_START_AND_OFFSET, kbase_api_mem_find_gpu_start_and_offset, union kbase_ioctl_mem_find_gpu_start_and_offset); + break; + case KBASE_IOCTL_GET_CONTEXT_ID: KBASE_HANDLE_IOCTL_OUT(KBASE_IOCTL_GET_CONTEXT_ID, kbase_api_get_context_id, struct kbase_ioctl_get_context_id); + break; + case KBASE_IOCTL_TLSTREAM_ACQUIRE: KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_TLSTREAM_ACQUIRE, kbase_api_tlstream_acquire, struct kbase_ioctl_tlstream_acquire); + break; + case KBASE_IOCTL_TLSTREAM_FLUSH: KBASE_HANDLE_IOCTL(KBASE_IOCTL_TLSTREAM_FLUSH, kbase_api_tlstream_flush); + break; + case KBASE_IOCTL_MEM_COMMIT: KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_MEM_COMMIT, kbase_api_mem_commit, struct kbase_ioctl_mem_commit); + break; + case KBASE_IOCTL_MEM_ALIAS: KBASE_HANDLE_IOCTL_INOUT(KBASE_IOCTL_MEM_ALIAS, kbase_api_mem_alias, union kbase_ioctl_mem_alias); + break; + case KBASE_IOCTL_MEM_IMPORT: KBASE_HANDLE_IOCTL_INOUT(KBASE_IOCTL_MEM_IMPORT, kbase_api_mem_import, union kbase_ioctl_mem_import); + break; + case KBASE_IOCTL_MEM_FLAGS_CHANGE: KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_MEM_FLAGS_CHANGE, kbase_api_mem_flags_change, struct kbase_ioctl_mem_flags_change); + break; + case KBASE_IOCTL_STREAM_CREATE: KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_STREAM_CREATE, kbase_api_stream_create, struct kbase_ioctl_stream_create); + break; + case KBASE_IOCTL_FENCE_VALIDATE: KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_FENCE_VALIDATE, kbase_api_fence_validate, struct kbase_ioctl_fence_validate); + break; + case KBASE_IOCTL_GET_PROFILING_CONTROLS: KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_GET_PROFILING_CONTROLS, kbase_api_get_profiling_controls, struct kbase_ioctl_get_profiling_controls); + break; + case KBASE_IOCTL_MEM_PROFILE_ADD: KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_MEM_PROFILE_ADD, kbase_api_mem_profile_add, struct kbase_ioctl_mem_profile_add); + break; + case KBASE_IOCTL_SOFT_EVENT_UPDATE: KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_SOFT_EVENT_UPDATE, kbase_api_soft_event_update, struct kbase_ioctl_soft_event_update); -#ifdef CONFIG_MALI_JOB_DUMP - KBASE_HANDLE_IOCTL(KBASE_IOCTL_CINSTR_GWT_START, - kbase_gpu_gwt_start); - KBASE_HANDLE_IOCTL(KBASE_IOCTL_CINSTR_GWT_STOP, - kbase_gpu_gwt_stop); - KBASE_HANDLE_IOCTL_INOUT(KBASE_IOCTL_CINSTR_GWT_DUMP, - kbase_gpu_gwt_dump, - union kbase_ioctl_cinstr_gwt_dump); -#endif + break; + case KBASE_IOCTL_STICKY_RESOURCE_MAP: KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_STICKY_RESOURCE_MAP, kbase_api_sticky_resource_map, struct kbase_ioctl_sticky_resource_map); + break; + case KBASE_IOCTL_STICKY_RESOURCE_UNMAP: KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_STICKY_RESOURCE_UNMAP, kbase_api_sticky_resource_unmap, struct kbase_ioctl_sticky_resource_unmap); + break; + /* Instrumentation. */ + case KBASE_IOCTL_HWCNT_READER_SETUP: + KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_HWCNT_READER_SETUP, + kbase_api_hwcnt_reader_setup, + struct kbase_ioctl_hwcnt_reader_setup); + break; + case KBASE_IOCTL_HWCNT_ENABLE: + KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_HWCNT_ENABLE, + kbase_api_hwcnt_enable, + struct kbase_ioctl_hwcnt_enable); + break; + case KBASE_IOCTL_HWCNT_DUMP: + KBASE_HANDLE_IOCTL(KBASE_IOCTL_HWCNT_DUMP, + kbase_api_hwcnt_dump); + break; + case KBASE_IOCTL_HWCNT_CLEAR: + KBASE_HANDLE_IOCTL(KBASE_IOCTL_HWCNT_CLEAR, + kbase_api_hwcnt_clear); + break; +#ifdef CONFIG_MALI_BIFROST_NO_MALI + case KBASE_IOCTL_HWCNT_SET: + KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_HWCNT_SET, + kbase_api_hwcnt_set, + struct kbase_ioctl_hwcnt_values); + break; +#endif +#ifdef CONFIG_MALI_JOB_DUMP + case KBASE_IOCTL_CINSTR_GWT_START: + KBASE_HANDLE_IOCTL(KBASE_IOCTL_CINSTR_GWT_START, + kbase_gpu_gwt_start); + break; + case KBASE_IOCTL_CINSTR_GWT_STOP: + KBASE_HANDLE_IOCTL(KBASE_IOCTL_CINSTR_GWT_STOP, + kbase_gpu_gwt_stop); + break; + case KBASE_IOCTL_CINSTR_GWT_DUMP: + KBASE_HANDLE_IOCTL_INOUT(KBASE_IOCTL_CINSTR_GWT_DUMP, + kbase_gpu_gwt_dump, + union kbase_ioctl_cinstr_gwt_dump); + break; +#endif #if MALI_UNIT_TEST + case KBASE_IOCTL_TLSTREAM_TEST: KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_TLSTREAM_TEST, kbase_api_tlstream_test, struct kbase_ioctl_tlstream_test); + break; + case KBASE_IOCTL_TLSTREAM_STATS: KBASE_HANDLE_IOCTL_OUT(KBASE_IOCTL_TLSTREAM_STATS, kbase_api_tlstream_stats, struct kbase_ioctl_tlstream_stats); + break; #endif } @@ -2738,6 +2847,88 @@ static ssize_t set_lp_mem_pool_max_size(struct device *dev, static DEVICE_ATTR(lp_mem_pool_max_size, S_IRUGO | S_IWUSR, show_lp_mem_pool_max_size, set_lp_mem_pool_max_size); +/** + * show_js_ctx_scheduling_mode - Show callback for js_ctx_scheduling_mode sysfs + * entry. + * @dev: The device this sysfs file is for. + * @attr: The attributes of the sysfs file. + * @buf: The output buffer to receive the context scheduling mode information. + * + * This function is called to get the context scheduling mode being used by JS. + * + * Return: The number of bytes output to @buf. + */ +static ssize_t show_js_ctx_scheduling_mode(struct device *dev, + struct device_attribute *attr, char * const buf) +{ + struct kbase_device *kbdev; + + kbdev = to_kbase_device(dev); + if (!kbdev) + return -ENODEV; + + return scnprintf(buf, PAGE_SIZE, "%u\n", kbdev->js_ctx_scheduling_mode); +} + +/** + * set_js_ctx_scheduling_mode - Set callback for js_ctx_scheduling_mode sysfs + * entry. + * @dev: The device this sysfs file is for. + * @attr: The attributes of the sysfs file. + * @buf: The value written to the sysfs file. + * @count: The number of bytes written to the sysfs file. + * + * This function is called when the js_ctx_scheduling_mode sysfs file is written + * to. It checks the data written, and if valid updates the ctx scheduling mode + * being by JS. + * + * Return: @count if the function succeeded. An error code on failure. + */ +static ssize_t set_js_ctx_scheduling_mode(struct device *dev, + struct device_attribute *attr, const char *buf, size_t count) +{ + struct kbasep_kctx_list_element *element; + u32 new_js_ctx_scheduling_mode; + struct kbase_device *kbdev; + unsigned long flags; + int ret; + + kbdev = to_kbase_device(dev); + if (!kbdev) + return -ENODEV; + + ret = kstrtouint(buf, 0, &new_js_ctx_scheduling_mode); + if (ret || new_js_ctx_scheduling_mode >= KBASE_JS_PRIORITY_MODE_COUNT) { + dev_err(kbdev->dev, "Couldn't process js_ctx_scheduling_mode" + " write operation.\n" + "Use format \n"); + return -EINVAL; + } + + if (new_js_ctx_scheduling_mode == kbdev->js_ctx_scheduling_mode) + return count; + + mutex_lock(&kbdev->kctx_list_lock); + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + + /* Update the context priority mode */ + kbdev->js_ctx_scheduling_mode = new_js_ctx_scheduling_mode; + + /* Adjust priority of all the contexts as per the new mode */ + list_for_each_entry(element, &kbdev->kctx_list, link) + kbase_js_update_ctx_priority(element->kctx); + + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + mutex_unlock(&kbdev->kctx_list_lock); + + dev_dbg(kbdev->dev, "JS ctx scheduling mode: %u\n", new_js_ctx_scheduling_mode); + + return count; +} + +static DEVICE_ATTR(js_ctx_scheduling_mode, S_IRUGO | S_IWUSR, + show_js_ctx_scheduling_mode, + set_js_ctx_scheduling_mode); #ifdef CONFIG_DEBUG_FS /* Number of entries in serialize_jobs_settings[] */ @@ -3398,6 +3589,7 @@ static struct attribute *kbase_attrs[] = { &dev_attr_mem_pool_max_size.attr, &dev_attr_lp_mem_pool_size.attr, &dev_attr_lp_mem_pool_max_size.attr, + &dev_attr_js_ctx_scheduling_mode.attr, NULL }; diff --git a/drivers/gpu/arm/bifrost/mali_kbase_ctx_sched.h b/drivers/gpu/arm/bifrost/mali_kbase_ctx_sched.h index 400ee623055d..ab57a0dc1ca8 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_ctx_sched.h +++ b/drivers/gpu/arm/bifrost/mali_kbase_ctx_sched.h @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2017 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2017-2018 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -25,7 +25,8 @@ #include -/* The Context Scheduler manages address space assignment and reference +/** + * The Context Scheduler manages address space assignment and reference * counting to kbase_context. The interface has been designed to minimise * interactions between the Job Scheduler and Power Management/MMU to support * the existing Job Scheduler interface. @@ -39,35 +40,30 @@ * code. */ -/* base_ctx_sched_init - Initialise the context scheduler +/** + * kbase_ctx_sched_init - Initialise the context scheduler + * @kbdev: The device for which the context scheduler needs to be initialised * - * @kbdev: The device for which the context scheduler needs to be - * initialised + * This must be called during device initialisation. The number of hardware + * address spaces must already be established before calling this function. * * Return: 0 for success, otherwise failure - * - * This must be called during device initilisation. The number of hardware - * address spaces must already be established before calling this function. */ int kbase_ctx_sched_init(struct kbase_device *kbdev); -/* base_ctx_sched_term - Terminate the context scheduler - * - * @kbdev: The device for which the context scheduler needs to be - * terminated +/** + * kbase_ctx_sched_term - Terminate the context scheduler + * @kbdev: The device for which the context scheduler needs to be terminated * * This must be called during device termination after all contexts have been * destroyed. */ void kbase_ctx_sched_term(struct kbase_device *kbdev); -/* kbase_ctx_sched_retain_ctx - Retain a reference to the @ref kbase_context - * +/** + * kbase_ctx_sched_retain_ctx - Retain a reference to the @ref kbase_context * @kctx: The context to which to retain a reference * - * Return: The address space that the context has been assigned to or - * KBASEP_AS_NR_INVALID if no address space was available. - * * This function should be called whenever an address space should be assigned * to a context and programmed onto the MMU. It should typically be called * when jobs are ready to be submitted to the GPU. @@ -77,11 +73,14 @@ void kbase_ctx_sched_term(struct kbase_device *kbdev); * * The kbase_device::mmu_hw_mutex and kbase_device::hwaccess_lock locks must be * held whilst calling this function. + * + * Return: The address space that the context has been assigned to or + * KBASEP_AS_NR_INVALID if no address space was available. */ int kbase_ctx_sched_retain_ctx(struct kbase_context *kctx); -/* kbase_ctx_sched_retain_ctx_refcount - * +/** + * kbase_ctx_sched_retain_ctx_refcount * @kctx: The context to which to retain a reference * * This function only retains a reference to the context. It must be called @@ -95,8 +94,8 @@ int kbase_ctx_sched_retain_ctx(struct kbase_context *kctx); */ void kbase_ctx_sched_retain_ctx_refcount(struct kbase_context *kctx); -/* kbase_ctx_sched_release_ctx - Release a reference to the @ref kbase_context - * +/** + * kbase_ctx_sched_release_ctx - Release a reference to the @ref kbase_context * @kctx: The context from which to release a reference * * This function should be called whenever an address space could be unassigned @@ -108,8 +107,8 @@ void kbase_ctx_sched_retain_ctx_refcount(struct kbase_context *kctx); */ void kbase_ctx_sched_release_ctx(struct kbase_context *kctx); -/* kbase_ctx_sched_remove_ctx - Unassign previously assigned address space - * +/** + * kbase_ctx_sched_remove_ctx - Unassign previously assigned address space * @kctx: The context to be removed * * This function should be called when a context is being destroyed. The @@ -121,8 +120,8 @@ void kbase_ctx_sched_release_ctx(struct kbase_context *kctx); */ void kbase_ctx_sched_remove_ctx(struct kbase_context *kctx); -/* kbase_ctx_sched_restore_all_as - Reprogram all address spaces - * +/** + * kbase_ctx_sched_restore_all_as - Reprogram all address spaces * @kbdev: The device for which address spaces to be reprogrammed * * This function shall reprogram all address spaces previously assigned to diff --git a/drivers/gpu/arm/bifrost/mali_kbase_debug_mem_view.c b/drivers/gpu/arm/bifrost/mali_kbase_debug_mem_view.c index d2c57cab1177..857fe9712ef9 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_debug_mem_view.c +++ b/drivers/gpu/arm/bifrost/mali_kbase_debug_mem_view.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2013-2017 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2013-2018 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -224,7 +224,7 @@ static int debug_mem_open(struct inode *i, struct file *file) } ret = debug_mem_zone_open(&kctx->reg_rbtree_exec, mem_data); - if (0 != ret) { + if (ret != 0) { kbase_gpu_vm_unlock(kctx); goto out; } diff --git a/drivers/gpu/arm/bifrost/mali_kbase_defs.h b/drivers/gpu/arm/bifrost/mali_kbase_defs.h index c53d9eb002a4..db1b9abfb2bb 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_defs.h +++ b/drivers/gpu/arm/bifrost/mali_kbase_defs.h @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2011-2017 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2011-2018 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -147,11 +147,7 @@ #define MIDGARD_MMU_LEVEL(x) (x) -#if MIDGARD_MMU_VA_BITS > 39 #define MIDGARD_MMU_TOPLEVEL MIDGARD_MMU_LEVEL(0) -#else -#define MIDGARD_MMU_TOPLEVEL MIDGARD_MMU_LEVEL(1) -#endif #define MIDGARD_MMU_BOTTOMLEVEL MIDGARD_MMU_LEVEL(3) @@ -179,6 +175,9 @@ #define KBASE_KATOM_FLAG_BEEN_SOFT_STOPPPED (1<<1) /** Atom has been previously retried to execute */ #define KBASE_KATOM_FLAGS_RERUN (1<<2) +/* Atom submitted with JOB_CHAIN_FLAG bit set in JS_CONFIG_NEXT register, helps to + * disambiguate short-running job chains during soft/hard stopping of jobs + */ #define KBASE_KATOM_FLAGS_JOBCHAIN (1<<3) /** Atom has been previously hard-stopped. */ #define KBASE_KATOM_FLAG_BEEN_HARD_STOPPED (1<<4) @@ -229,8 +228,27 @@ struct kbase_context; struct kbase_device; struct kbase_as; struct kbase_mmu_setup; +struct kbase_ipa_model_vinstr_data; #ifdef CONFIG_DEBUG_FS +/** + * struct base_job_fault_event - keeps track of the atom which faulted or which + * completed after the faulty atom but before the + * debug data for faulty atom was dumped. + * + * @event_code: event code for the atom, should != BASE_JD_EVENT_DONE for the + * atom which faulted. + * @katom: pointer to the atom for which job fault occurred or which completed + * after the faulty atom. + * @job_fault_work: work item, queued only for the faulty atom, which waits for + * the dumping to get completed and then does the bottom half + * of job done for the atoms which followed the faulty atom. + * @head: List head used to store the atom in the global list of faulty + * atoms or context specific list of atoms which got completed + * during the dump. + * @reg_offset: offset of the register to be dumped next, only applicable for + * the faulty atom. + */ struct base_job_fault_event { u32 event_code; @@ -242,6 +260,12 @@ struct base_job_fault_event { #endif +/** + * struct kbase_jd_atom_dependency - Contains the dependency info for an atom. + * @atom: pointer to the dependee atom. + * @dep_type: type of dependency on the dependee @atom, i.e. order or data + * dependency. BASE_JD_DEP_TYPE_INVALID indicates no dependency. + */ struct kbase_jd_atom_dependency { struct kbase_jd_atom *atom; u8 dep_type; @@ -281,14 +305,14 @@ struct kbase_io_history { }; /** - * @brief The function retrieves a read-only reference to the atom field from - * the kbase_jd_atom_dependency structure + * kbase_jd_katom_dep_atom - Retrieves a read-only reference to the + * dependee atom. + * @dep: pointer to the dependency info structure. * - * @param[in] dep kbase jd atom dependency. - * - * @return readonly reference to dependent ATOM. + * Return: readonly reference to dependee atom. */ -static inline const struct kbase_jd_atom * kbase_jd_katom_dep_atom(const struct kbase_jd_atom_dependency *dep) +static inline const struct kbase_jd_atom * +kbase_jd_katom_dep_atom(const struct kbase_jd_atom_dependency *dep) { LOCAL_ASSERT(dep != NULL); @@ -296,12 +320,11 @@ static inline const struct kbase_jd_atom * kbase_jd_katom_dep_atom(const struct } /** - * @brief The function retrieves a read-only reference to the dependency type field from - * the kbase_jd_atom_dependency structure + * kbase_jd_katom_dep_type - Retrieves the dependency type info * - * @param[in] dep kbase jd atom dependency. + * @dep: pointer to the dependency info structure. * - * @return A dependency type value. + * Return: the type of dependency there is on the dependee atom. */ static inline u8 kbase_jd_katom_dep_type(const struct kbase_jd_atom_dependency *dep) { @@ -311,12 +334,11 @@ static inline u8 kbase_jd_katom_dep_type(const struct kbase_jd_atom_dependency * } /** - * @brief Setter macro for dep_atom array entry in kbase_jd_atom - * - * @param[in] dep The kbase jd atom dependency. - * @param[in] a The ATOM to be set as a dependency. - * @param type The ATOM dependency type to be set. - * + * kbase_jd_katom_dep_set - sets up the dependency info structure + * as per the values passed. + * @const_dep: pointer to the dependency info structure to be setup. + * @a: pointer to the dependee atom. + * @type: type of dependency there is on the dependee atom. */ static inline void kbase_jd_katom_dep_set(const struct kbase_jd_atom_dependency *const_dep, struct kbase_jd_atom *a, u8 type) @@ -332,10 +354,9 @@ static inline void kbase_jd_katom_dep_set(const struct kbase_jd_atom_dependency } /** - * @brief Setter macro for dep_atom array entry in kbase_jd_atom - * - * @param[in] dep The kbase jd atom dependency to be cleared. + * kbase_jd_katom_dep_clear - resets the dependency info structure * + * @const_dep: pointer to the dependency info structure to be setup. */ static inline void kbase_jd_katom_dep_clear(const struct kbase_jd_atom_dependency *const_dep) { @@ -349,74 +370,212 @@ static inline void kbase_jd_katom_dep_clear(const struct kbase_jd_atom_dependenc dep->dep_type = BASE_JD_DEP_TYPE_INVALID; } +/** + * enum kbase_atom_gpu_rb_state - The state of an atom, pertinent after it becomes + * runnable, with respect to job slot ringbuffer/fifo. + * @KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB: Atom not currently present in slot fifo, which + * implies that either atom has not become runnable + * due to dependency or has completed the execution + * on GPU. + * @KBASE_ATOM_GPU_RB_WAITING_BLOCKED: Atom has been added to slot fifo but is blocked + * due to cross slot dependency, can't be submitted to GPU. + * @KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_PREV: Atom has been added to slot fifo but + * is waiting for the completion of previously added atoms + * in current & other slots, as their protected mode + * requirements do not match with the current atom. + * @KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_TRANSITION: Atom is in slot fifo and is + * waiting for completion of protected mode transition, + * needed before the atom is submitted to GPU. + * @KBASE_ATOM_GPU_RB_WAITING_FOR_CORE_AVAILABLE: Atom is in slot fifo but is waiting + * for the cores, which are needed to execute the job + * chain represented by the atom, to become available + * @KBASE_ATOM_GPU_RB_WAITING_AFFINITY: Atom is in slot fifo but is blocked on + * affinity due to rmu workaround for Hw issue 8987. + * @KBASE_ATOM_GPU_RB_READY: Atom is in slot fifo and can be submitted to GPU. + * @KBASE_ATOM_GPU_RB_SUBMITTED: Atom is in slot fifo and has been submitted to GPU. + * @KBASE_ATOM_GPU_RB_RETURN_TO_JS: Atom must be returned to JS due to some failure, + * but only after the previously added atoms in fifo + * have completed or have also been returned to JS. + */ enum kbase_atom_gpu_rb_state { - /* Atom is not currently present in slot ringbuffer */ KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB, - /* Atom is in slot ringbuffer but is blocked on a previous atom */ KBASE_ATOM_GPU_RB_WAITING_BLOCKED, - /* Atom is in slot ringbuffer but is waiting for a previous protected - * mode transition to complete */ KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_PREV, - /* Atom is in slot ringbuffer but is waiting for proected mode - * transition */ KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_TRANSITION, - /* Atom is in slot ringbuffer but is waiting for cores to become - * available */ KBASE_ATOM_GPU_RB_WAITING_FOR_CORE_AVAILABLE, - /* Atom is in slot ringbuffer but is blocked on affinity */ KBASE_ATOM_GPU_RB_WAITING_AFFINITY, - /* Atom is in slot ringbuffer and ready to run */ KBASE_ATOM_GPU_RB_READY, - /* Atom is in slot ringbuffer and has been submitted to the GPU */ KBASE_ATOM_GPU_RB_SUBMITTED, - /* Atom must be returned to JS as soon as it reaches the head of the - * ringbuffer due to a previous failure */ KBASE_ATOM_GPU_RB_RETURN_TO_JS = -1 }; +/** + * enum kbase_atom_enter_protected_state - The state of an atom with respect to the + * preparation for GPU's entry into protected mode, becomes + * pertinent only after atom's state with respect to slot + * ringbuffer is KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_TRANSITION + * @KBASE_ATOM_ENTER_PROTECTED_CHECK: Starting state. Check if there are any atoms + * currently submitted to GPU and protected mode transition is + * not already in progress. + * @KBASE_ATOM_ENTER_PROTECTED_VINSTR: Wait for vinstr to suspend before entry into + * protected mode. + * @KBASE_ATOM_ENTER_PROTECTED_IDLE_L2: Wait for the L2 to become idle in preparation + * for the coherency change. L2 shall be powered down and GPU shall + * come out of fully coherent mode before entering protected mode. + * @KBASE_ATOM_ENTER_PROTECTED_FINISHED: End state; Prepare coherency change and switch + * GPU to protected mode. + */ enum kbase_atom_enter_protected_state { - /* - * Starting state: - * Check if a transition into protected mode is required. - * - * NOTE: The integer value of this must - * match KBASE_ATOM_EXIT_PROTECTED_CHECK. + /** + * NOTE: The integer value of this must match KBASE_ATOM_EXIT_PROTECTED_CHECK. */ KBASE_ATOM_ENTER_PROTECTED_CHECK = 0, - /* Wait for vinstr to suspend. */ KBASE_ATOM_ENTER_PROTECTED_VINSTR, - /* Wait for the L2 to become idle in preparation for - * the coherency change. */ KBASE_ATOM_ENTER_PROTECTED_IDLE_L2, - /* End state; - * Prepare coherency change. */ KBASE_ATOM_ENTER_PROTECTED_FINISHED, }; +/** + * enum kbase_atom_exit_protected_state - The state of an atom with respect to the + * preparation for GPU's exit from protected mode, becomes + * pertinent only after atom's state with respect to slot + * ringbuffer is KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_TRANSITION + * @KBASE_ATOM_EXIT_PROTECTED_CHECK: Starting state. Check if there are any atoms + * currently submitted to GPU and protected mode transition is + * not already in progress. + * @KBASE_ATOM_EXIT_PROTECTED_IDLE_L2: Wait for the L2 to become idle in preparation + * for the reset, as exiting protected mode requires a reset. + * @KBASE_ATOM_EXIT_PROTECTED_RESET: Issue the reset to trigger exit from protected mode + * @KBASE_ATOM_EXIT_PROTECTED_RESET_WAIT: End state, Wait for the reset to complete + */ enum kbase_atom_exit_protected_state { - /* - * Starting state: - * Check if a transition out of protected mode is required. - * - * NOTE: The integer value of this must - * match KBASE_ATOM_ENTER_PROTECTED_CHECK. + /** + * NOTE: The integer value of this must match KBASE_ATOM_ENTER_PROTECTED_CHECK. */ KBASE_ATOM_EXIT_PROTECTED_CHECK = 0, - /* Wait for the L2 to become idle in preparation - * for the reset. */ KBASE_ATOM_EXIT_PROTECTED_IDLE_L2, - /* Issue the protected reset. */ KBASE_ATOM_EXIT_PROTECTED_RESET, - /* End state; - * Wait for the reset to complete. */ KBASE_ATOM_EXIT_PROTECTED_RESET_WAIT, }; +/** + * struct kbase_ext_res - Contains the info for external resources referred + * by an atom, which have been mapped on GPU side. + * @gpu_address: Start address of the memory region allocated for + * the resource from GPU virtual address space. + * @alloc: pointer to physical pages tracking object, set on + * mapping the external resource on GPU side. + */ struct kbase_ext_res { u64 gpu_address; struct kbase_mem_phy_alloc *alloc; }; +/** + * struct kbase_jd_atom - object representing the atom, containing the complete + * state and attributes of an atom. + * @work: work item for the bottom half processing of the atom, + * by JD or JS, after it got executed on GPU or the input + * fence got signaled + * @start_timestamp: time at which the atom was submitted to the GPU, by + * updating the JS_HEAD_NEXTn register. + * @udata: copy of the user data sent for the atom in base_jd_submit. + * @kctx: Pointer to the base context with which the atom is associated. + * @dep_head: Array of 2 list heads, pointing to the two list of atoms + * which are blocked due to dependency on this atom. + * @dep_item: Array of 2 list heads, used to store the atom in the list of + * other atoms depending on the same dependee atom. + * @dep: Array containing the dependency info for the 2 atoms on which + * the atom depends upon. + * @jd_item: List head used during job dispatch job_done processing - as + * dependencies may not be entirely resolved at this point, + * we need to use a separate list head. + * @in_jd_list: flag set to true if atom's @jd_item is currently on a list, + * prevents atom being processed twice. + * @nr_extres: number of external resources referenced by the atom. + * @extres: pointer to the location containing info about @nr_extres + * external resources referenced by the atom. + * @device_nr: indicates the coregroup with which the atom is associated, + * when BASE_JD_REQ_SPECIFIC_COHERENT_GROUP specified. + * @affinity: bitmask of the shader cores on which the atom can execute. + * @jc: GPU address of the job-chain. + * @softjob_data: Copy of data read from the user space buffer that @jc + * points to. + * @coreref_state: state of the atom with respect to retention of shader + * cores for affinity & power management. + * @fence: Stores either an input or output sync fence, depending + * on soft-job type + * @sync_waiter: Pointer to the sync fence waiter structure passed to the + * callback function on signaling of the input fence. + * @dma_fence: object containing pointers to both input & output fences + * and other related members used for explicit sync through + * soft jobs and for the implicit synchronization required + * on access to external resources. + * @event_code: Event code for the job chain represented by the atom, both + * HW and low-level SW events are represented by event codes. + * @core_req: bitmask of BASE_JD_REQ_* flags specifying either Hw or Sw + * requirements for the job chain represented by the atom. + * @ticks: Number of scheduling ticks for which atom has been running + * on the GPU. + * @sched_priority: Priority of the atom for Job scheduling, as per the + * KBASE_JS_ATOM_SCHED_PRIO_*. + * @poking: Indicates whether poking of MMU is ongoing for the atom, + * as a WA for the issue HW_ISSUE_8316. + * @completed: Wait queue to wait upon for the completion of atom. + * @status: Indicates at high level at what stage the atom is in, + * as per KBASE_JD_ATOM_STATE_*, that whether it is not in + * use or its queued in JD or given to JS or submitted to Hw + * or it completed the execution on Hw. + * @work_id: used for GPU tracepoints, its a snapshot of the 'work_id' + * counter in kbase_jd_context which is incremented on + * every call to base_jd_submit. + * @slot_nr: Job slot chosen for the atom. + * @atom_flags: bitmask of KBASE_KATOM_FLAG* flags capturing the exact + * low level state of the atom. + * @retry_count: Number of times this atom has been retried. Used by replay + * soft job. + * @gpu_rb_state: bitmnask of KBASE_ATOM_GPU_RB_* flags, precisely tracking + * atom's state after it has entered Job scheduler on becoming + * runnable. Atom could be blocked due to cross slot dependency + * or waiting for the shader cores to become available or + * waiting for protected mode transitions to complete. + * @need_cache_flush_cores_retained: flag indicating that manual flush of GPU + * cache is needed for the atom and the shader cores used + * for atom have been kept on. + * @blocked: flag indicating that atom's resubmission to GPU is + * blocked till the work item is scheduled to return the + * atom to JS. + * @pre_dep: Pointer to atom that this atom has same-slot dependency on + * @post_dep: Pointer to atom that has same-slot dependency on this atom + * @x_pre_dep: Pointer to atom that this atom has cross-slot dependency on + * @x_post_dep: Pointer to atom that has cross-slot dependency on this atom + * @flush_id: The GPU's flush count recorded at the time of submission, + * used for the cache flush optimisation + * @fault_event: Info for dumping the debug data on Job fault. + * @queue: List head used for 4 different purposes : + * Adds atom to the list of dma-buf fence waiting atoms. + * Adds atom to the list of atoms blocked due to cross + * slot dependency. + * Adds atom to the list of softjob atoms for which JIT + * allocation has been deferred + * Adds atom to the list of softjob atoms waiting for the + * signaling of fence. + * @jit_node: Used to keep track of all JIT free/alloc jobs in submission order + * @jit_blocked: Flag indicating that JIT allocation requested through + * softjob atom will be reattempted after the impending + * free of other active JIT allocations. + * @will_fail_event_code: If non-zero, this indicates that the atom will fail + * with the set event_code when the atom is processed. + * Used for special handling of atoms, which have a data + * dependency on the failed atoms. + * @protected_state: State of the atom, as per KBASE_ATOM_(ENTER|EXIT)_PROTECTED_*, + * when transitioning into or out of protected mode. Atom will + * be either entering or exiting the protected mode. + * @runnable_tree_node: The node added to context's job slot specific rb tree + * when the atom becomes runnable. + * @age: Age of atom relative to other atoms in the context, is + * snapshot of the age_count counter in kbase context. + */ struct kbase_jd_atom { struct work_struct work; ktime_t start_timestamp; @@ -427,12 +586,7 @@ struct kbase_jd_atom { struct list_head dep_head[2]; struct list_head dep_item[2]; const struct kbase_jd_atom_dependency dep[2]; - /* List head used during job dispatch job_done processing - as - * dependencies may not be entirely resolved at this point, we need to - * use a separate list head. */ struct list_head jd_item; - /* true if atom's jd_item is currently on a list. Prevents atom being - * processed twice. */ bool in_jd_list; u16 nr_extres; @@ -441,11 +595,9 @@ struct kbase_jd_atom { u32 device_nr; u64 affinity; u64 jc; - /* Copy of data read from the user space buffer that jc points to */ void *softjob_data; enum kbase_atom_coreref_state coreref_state; #if defined(CONFIG_SYNC) - /* Stores either an input or output fence, depending on soft-job type */ struct sync_fence *fence; struct sync_fence_waiter sync_waiter; #endif /* CONFIG_SYNC */ @@ -519,26 +671,22 @@ struct kbase_jd_atom { /* Note: refer to kbasep_js_atom_retained_state, which will take a copy of some of the following members */ enum base_jd_event_code event_code; - base_jd_core_req core_req; /**< core requirements */ + base_jd_core_req core_req; u32 ticks; - /* JS atom priority with respect to other atoms on its kctx. */ int sched_priority; - int poking; /* BASE_HW_ISSUE_8316 */ + int poking; wait_queue_head_t completed; enum kbase_jd_atom_state status; #ifdef CONFIG_GPU_TRACEPOINTS int work_id; #endif - /* Assigned after atom is completed. Used to check whether PRLAM-10676 workaround should be applied */ int slot_nr; u32 atom_flags; - /* Number of times this atom has been retried. Used by replay soft job. - */ int retry_count; enum kbase_atom_gpu_rb_state gpu_rb_state; @@ -547,45 +695,25 @@ struct kbase_jd_atom { atomic_t blocked; - /* Pointer to atom that this atom has same-slot dependency on */ struct kbase_jd_atom *pre_dep; - /* Pointer to atom that has same-slot dependency on this atom */ struct kbase_jd_atom *post_dep; - /* Pointer to atom that this atom has cross-slot dependency on */ struct kbase_jd_atom *x_pre_dep; - /* Pointer to atom that has cross-slot dependency on this atom */ struct kbase_jd_atom *x_post_dep; - /* The GPU's flush count recorded at the time of submission, used for - * the cache flush optimisation */ u32 flush_id; - struct kbase_jd_atom_backend backend; #ifdef CONFIG_DEBUG_FS struct base_job_fault_event fault_event; #endif - /* List head used for three different purposes: - * 1. Overflow list for JS ring buffers. If an atom is ready to run, - * but there is no room in the JS ring buffer, then the atom is put - * on the ring buffer's overflow list using this list node. - * 2. List of waiting soft jobs. - */ struct list_head queue; - /* Used to keep track of all JIT free/alloc jobs in submission order - */ struct list_head jit_node; bool jit_blocked; - /* If non-zero, this indicates that the atom will fail with the set - * event_code when the atom is processed. */ enum base_jd_event_code will_fail_event_code; - /* Atoms will only ever be transitioning into, or out of - * protected mode so we do not need two separate fields. - */ union { enum kbase_atom_enter_protected_state enter; enum kbase_atom_exit_protected_state exit; @@ -593,7 +721,6 @@ struct kbase_jd_atom { struct rb_node runnable_tree_node; - /* 'Age' of atom relative to other atoms in the context. */ u32 age; }; @@ -612,38 +739,57 @@ static inline bool kbase_jd_katom_is_protected(const struct kbase_jd_atom *katom #define KBASE_JD_DEP_QUEUE_SIZE 256 +/** + * struct kbase_jd_context - per context object encapsulating all the Job dispatcher + * related state. + * @lock: lock to serialize the updates made to the Job dispatcher + * state and kbase_jd_atom objects. + * @sched_info: Structure encapsulating all the Job scheduling info. + * @atoms: Array of the objects representing atoms, containing + * the complete state and attributes of an atom. + * @job_nr: Tracks the number of atoms being processed by the + * kbase. This includes atoms that are not tracked by + * scheduler: 'not ready to run' & 'dependency-only' jobs. + * @zero_jobs_wait: Waitq that reflects whether there are no jobs + * (including SW-only dependency jobs). This is set + * when no jobs are present on the ctx, and clear when + * there are jobs. + * This must be updated atomically with @job_nr. + * note: Job Dispatcher knows about more jobs than the + * Job Scheduler as it is unaware of jobs that are + * blocked on dependencies and SW-only dependency jobs. + * This waitq can be waited upon to find out when the + * context jobs are all done/cancelled (including those + * that might've been blocked on dependencies) - and so, + * whether it can be terminated. However, it should only + * be terminated once it is not present in the run-pool. + * Since the waitq is only set under @lock, the waiter + * should also briefly obtain and drop @lock to guarantee + * that the setter has completed its work on the kbase_context + * @job_done_wq: Workqueue to which the per atom work item is queued + * for bottom half processing when the atom completes + * execution on GPU or the input fence get signaled. + * @tb_lock: Lock to serialize the write access made to @tb to + * to store the register access trace messages. + * @tb: Pointer to the Userspace accessible buffer storing + * the trace messages for register read/write accesses + * made by the Kbase. The buffer is filled in circular + * fashion. + * @tb_wrap_offset: Offset to the end location in the trace buffer, the + * write pointer is moved to the beginning on reaching + * this offset. + * @work_id: atomic variable used for GPU tracepoints, incremented + * on every call to base_jd_submit. + */ struct kbase_jd_context { struct mutex lock; struct kbasep_js_kctx_info sched_info; struct kbase_jd_atom atoms[BASE_JD_ATOM_COUNT]; - /** Tracks all job-dispatch jobs. This includes those not tracked by - * the scheduler: 'not ready to run' and 'dependency-only' jobs. */ u32 job_nr; - /** Waitq that reflects whether there are no jobs (including SW-only - * dependency jobs). This is set when no jobs are present on the ctx, - * and clear when there are jobs. - * - * @note: Job Dispatcher knows about more jobs than the Job Scheduler: - * the Job Scheduler is unaware of jobs that are blocked on dependencies, - * and SW-only dependency jobs. - * - * This waitq can be waited upon to find out when the context jobs are all - * done/cancelled (including those that might've been blocked on - * dependencies) - and so, whether it can be terminated. However, it should - * only be terminated once it is not present in the run-pool (see - * kbasep_js_kctx_info::ctx::is_scheduled). - * - * Since the waitq is only set under kbase_jd_context::lock, - * the waiter should also briefly obtain and drop kbase_jd_context::lock to - * guarentee that the setter has completed its work on the kbase_context - * - * This must be updated atomically with: - * - kbase_jd_context::job_nr */ wait_queue_head_t zero_jobs_wait; - /** Job Done workqueue. */ struct workqueue_struct *job_done_wq; spinlock_t tb_lock; @@ -675,15 +821,34 @@ struct kbase_mmu_setup { }; /** - * Important: Our code makes assumptions that a struct kbase_as structure is always at - * kbase_device->as[number]. This is used to recover the containing - * struct kbase_device from a struct kbase_as structure. - * - * Therefore, struct kbase_as structures must not be allocated anywhere else. + * struct kbase_as - object representing an address space of GPU. + * @number: Index at which this address space structure is present + * in an array of address space structures embedded inside the + * struct kbase_device. + * @pf_wq: Workqueue for processing work items related to Bus fault + * and Page fault handling. + * @work_pagefault: Work item for the Page fault handling. + * @work_busfault: Work item for the Bus fault handling. + * @fault_type: Type of fault which occured for this address space, + * regular/unexpected Bus or Page fault. + * @protected_mode: Flag indicating whether the fault occurred in protected + * mode or not. + * @fault_status: Records the fault status as reported by Hw. + * @fault_addr: Records the faulting address. + * @fault_extra_addr: Records the secondary fault address. + * @current_setup: Stores the MMU configuration for this address space. + * @poke_wq: Workqueue to process the work items queue for poking the + * MMU as a WA for BASE_HW_ISSUE_8316. + * @poke_work: Work item to do the poking of MMU for this address space. + * @poke_refcount: Refcount for the need of poking MMU. While the refcount is + * non zero the poking of MMU will continue. + * Protected by hwaccess_lock. + * @poke_state: State indicating whether poking is in progress or it has + * been stopped. Protected by hwaccess_lock. + * @poke_timer: Timer used to schedule the poking at regular intervals. */ struct kbase_as { int number; - struct workqueue_struct *pf_wq; struct work_struct work_pagefault; struct work_struct work_busfault; @@ -692,15 +857,10 @@ struct kbase_as { u32 fault_status; u64 fault_addr; u64 fault_extra_addr; - struct kbase_mmu_setup current_setup; - - /* BASE_HW_ISSUE_8316 */ struct workqueue_struct *poke_wq; struct work_struct poke_work; - /** Protected by hwaccess_lock */ int poke_refcount; - /** Protected by hwaccess_lock */ kbase_as_poke_state poke_state; struct hrtimer poke_timer; }; @@ -738,6 +898,37 @@ enum kbase_trace_code { #define KBASE_TRACE_FLAG_REFCOUNT (((u8)1) << 0) #define KBASE_TRACE_FLAG_JOBSLOT (((u8)1) << 1) +/** + * struct kbase_trace - object representing a trace message added to trace buffer + * kbase_device::trace_rbuf + * @timestamp: CPU timestamp at which the trace message was added. + * @thread_id: id of the thread in the context of which trace message + * was added. + * @cpu: indicates which CPU the @thread_id was scheduled on when + * the trace message was added. + * @ctx: Pointer to the kbase context for which the trace message + * was added. Will be NULL for certain trace messages like + * for traces added corresponding to power management events. + * Will point to the appropriate context corresponding to + * job-slot & context's reference count related events. + * @katom: indicates if the trace message has atom related info. + * @atom_number: id of the atom for which trace message was added. + * Only valid if @katom is true. + * @atom_udata: Copy of the user data sent for the atom in base_jd_submit. + * Only valid if @katom is true. + * @gpu_addr: GPU address of the job-chain represented by atom. Could + * be valid even if @katom is false. + * @info_val: value specific to the type of event being traced. For the + * case where @katom is true, will be set to atom's affinity, + * i.e. bitmask of shader cores chosen for atom's execution. + * @code: Identifies the event, refer enum kbase_trace_code. + * @jobslot: job-slot for which trace message was added, valid only for + * job-slot management events. + * @refcount: reference count for the context, valid for certain events + * related to scheduler core and policy. + * @flags: indicates if info related to @jobslot & @refcount is present + * in the trace message, used during dumping of the message. + */ struct kbase_trace { struct timespec timestamp; u32 thread_id; @@ -904,19 +1095,23 @@ struct kbase_pm_device_data { /** * struct kbase_mem_pool - Page based memory pool for kctx/kbdev - * @kbdev: Kbase device where memory is used - * @cur_size: Number of free pages currently in the pool (may exceed @max_size - * in some corner cases) - * @max_size: Maximum number of free pages in the pool - * @order: order = 0 refers to a pool of 4 KB pages - * order = 9 refers to a pool of 2 MB pages (2^9 * 4KB = 2 MB) - * @pool_lock: Lock protecting the pool - must be held when modifying @cur_size - * and @page_list - * @page_list: List of free pages in the pool - * @reclaim: Shrinker for kernel reclaim of free pages - * @next_pool: Pointer to next pool where pages can be allocated when this pool - * is empty. Pages will spill over to the next pool when this pool - * is full. Can be NULL if there is no next pool. + * @kbdev: Kbase device where memory is used + * @cur_size: Number of free pages currently in the pool (may exceed + * @max_size in some corner cases) + * @max_size: Maximum number of free pages in the pool + * @order: order = 0 refers to a pool of 4 KB pages + * order = 9 refers to a pool of 2 MB pages (2^9 * 4KB = 2 MB) + * @pool_lock: Lock protecting the pool - must be held when modifying + * @cur_size and @page_list + * @page_list: List of free pages in the pool + * @reclaim: Shrinker for kernel reclaim of free pages + * @next_pool: Pointer to next pool where pages can be allocated when this + * pool is empty. Pages will spill over to the next pool when + * this pool is full. Can be NULL if there is no next pool. + * @dying: true if the pool is being terminated, and any ongoing + * operations should be abandoned + * @dont_reclaim: true if the shrinker is forbidden from reclaiming memory from + * this pool, eg during a grow operation */ struct kbase_mem_pool { struct kbase_device *kbdev; @@ -928,6 +1123,9 @@ struct kbase_mem_pool { struct shrinker reclaim; struct kbase_mem_pool *next_pool; + + bool dying; + bool dont_reclaim; }; /** @@ -963,9 +1161,261 @@ struct kbase_mmu_mode const *kbase_mmu_mode_get_aarch64(void); #define DEVNAME_SIZE 16 +/** + * struct kbase_device - Object representing an instance of GPU platform device, + * allocated from the probe method of mali driver. + * @hw_quirks_sc: Configuration to be used for the shader cores as per + * the HW issues present in the GPU. + * @hw_quirks_tiler: Configuration to be used for the Tiler as per the HW + * issues present in the GPU. + * @hw_quirks_mmu: Configuration to be used for the MMU as per the HW + * issues present in the GPU. + * @hw_quirks_jm: Configuration to be used for the Job Manager as per + * the HW issues present in the GPU. + * @entry: Links the device instance to the global list of GPU + * devices. The list would have as many entries as there + * are GPU device instances. + * @dev: Pointer to the kernel's generic/base representation + * of the GPU platform device. + * @mdev: Pointer to the miscellaneous device registered to + * provide Userspace access to kernel driver through the + * device file /dev/malixx. + * @reg_start: Base address of the region in physical address space + * where GPU registers have been mapped. + * @reg_size: Size of the region containing GPU registers + * @reg: Kernel virtual address of the region containing GPU + * registers, using which Driver will access the registers. + * @irqs: Array containing IRQ resource info for 3 types of + * interrupts : Job scheduling, MMU & GPU events (like + * power management, cache etc.) + * @clock: Pointer to the input clock resource (having an id of 0), + * referenced by the GPU device node. + * @regulator: Pointer to the struct corresponding to the regulator + * for GPU device + * @devname: string containing the name used for GPU device instance, + * miscellaneous device is registered using the same name. + * @model: Pointer, valid only when Driver is compiled to not access + * the real GPU Hw, to the dummy model which tries to mimic + * to some extent the state & behavior of GPU Hw in response + * to the register accesses made by the Driver. + * @irq_slab: slab cache for allocating the work items queued when + * model mimics raising of IRQ to cause an interrupt on CPU. + * @irq_workq: workqueue for processing the irq work items. + * @serving_job_irq: function to execute work items queued when model mimics + * the raising of JS irq, mimics the interrupt handler + * processing JS interrupts. + * @serving_gpu_irq: function to execute work items queued when model mimics + * the raising of GPU irq, mimics the interrupt handler + * processing GPU interrupts. + * @serving_mmu_irq: function to execute work items queued when model mimics + * the raising of MMU irq, mimics the interrupt handler + * processing MMU interrupts. + * @reg_op_lock: lock used by model to serialize the handling of register + * accesses made by the driver. + * @pm: Per device object for storing data for power management + * framework. + * @js_data: Per device object encapsulating the current context of + * Job Scheduler, which is global to the device and is not + * tied to any particular struct kbase_context running on + * the device + * @mem_pool: Object containing the state for global pool of 4KB size + * physical pages which can be used by all the contexts. + * @lp_mem_pool: Object containing the state for global pool of 2MB size + * physical pages which can be used by all the contexts. + * @memdev: keeps track of the in use physical pages allocated by + * the Driver. + * @mmu_mode: Pointer to the object containing methods for programming + * the MMU, depending on the type of MMU supported by Hw. + * @as: Array of objects representing address spaces of GPU. + * @as_free: Bitpattern of free/available address space lots + * @as_to_kctx: Array of pointers to struct kbase_context, having + * GPU adrress spaces assigned to them. + * @mmu_mask_change: Lock to serialize the access to MMU interrupt mask + * register used in the handling of Bus & Page faults. + * @gpu_props: Object containing complete information about the + * configuration/properties of GPU HW device in use. + * @hw_issues_mask: List of SW workarounds for HW issues + * @hw_features_mask: List of available HW features. + * shader_inuse_bitmap: Bitmaps of shader cores that are currently in use. + * These should be kept up to date by the job scheduler. + * The bit to be set in this bitmap should already be set + * in the @shader_needed_bitmap. + * @pm.power_change_lock should be held when accessing + * these members. + * @shader_inuse_cnt: Usage count for each of the 64 shader cores + * @shader_needed_bitmap: Bitmaps of cores the JS needs for jobs ready to run + * kbase_pm_check_transitions_nolock() should be called + * when the bitmap is modified to update the power + * management system and allow transitions to occur. + * @shader_needed_cnt: Count for each of the 64 shader cores, incremented + * when the core is requested for use and decremented + * later when the core is known to be powered up for use. + * @tiler_inuse_cnt: Usage count for the Tiler block. @tiler_needed_cnt + * should be non zero at the time of incrementing the + * usage count. + * @tiler_needed_cnt: Count for the Tiler block shader cores, incremented + * when Tiler is requested for use and decremented + * later when Tiler is known to be powered up for use. + * @disjoint_event: struct for keeping track of the disjoint information, + * that whether the GPU is in a disjoint state and the + * number of disjoint events that have occurred on GPU. + * @l2_users_count: Refcount for tracking users of the l2 cache, e.g. + * when using hardware counter instrumentation. + * @shader_available_bitmap: Bitmap of shader cores that are currently available, + * powered up and the power policy is happy for jobs + * to be submitted to these cores. These are updated + * by the power management code. The job scheduler + * should avoid submitting new jobs to any cores + * that are not marked as available. + * @tiler_available_bitmap: Bitmap of tiler units that are currently available. + * @l2_available_bitmap: Bitmap of the currently available Level 2 caches. + * @stack_available_bitmap: Bitmap of the currently available Core stacks. + * @shader_ready_bitmap: Bitmap of shader cores that are ready (powered on) + * @shader_transitioning_bitmap: Bitmap of shader cores that are currently changing + * power state. + * @nr_hw_address_spaces: Number of address spaces actually available in the + * GPU, remains constant after driver initialisation. + * @nr_user_address_spaces: Number of address spaces available to user contexts + * @hwcnt: Structure used for instrumentation and HW counters + * dumping + * @vinstr_ctx: vinstr context created per device + * @trace_lock: Lock to serialize the access to trace buffer. + * @trace_first_out: Index/offset in the trace buffer at which the first + * unread message is present. + * @trace_next_in: Index/offset in the trace buffer at which the new + * message will be written. + * @trace_rbuf: Pointer to the buffer storing debug messages/prints + * tracing the various events in Driver. + * The buffer is filled in circular fashion. + * @reset_timeout_ms: Number of milliseconds to wait for the soft stop to + * complete for the GPU jobs before proceeding with the + * GPU reset. + * @cacheclean_lock: Lock to serialize the clean & invalidation of GPU caches, + * between Job Manager backend & Instrumentation code. + * @platform_context: Platform specific private data to be accessed by + * platform specific config files only. + * @kctx_list: List of kbase_contexts created for the device, including + * the kbase_context created for vinstr_ctx. + * @kctx_list_lock: Lock protecting concurrent accesses to @kctx_list. + * @devfreq_profile: Describes devfreq profile for the Mali GPU device, passed + * to devfreq_add_device() to add devfreq feature to Mali + * GPU device. + * @devfreq: Pointer to devfreq structure for Mali GPU device, + * returned on the call to devfreq_add_device(). + * @current_freq: The real frequency, corresponding to @current_nominal_freq, + * at which the Mali GPU device is currently operating, as + * retrieved from @opp_table in the target callback of + * @devfreq_profile. + * @current_nominal_freq: The nominal frequency currently used for the Mali GPU + * device as retrieved through devfreq_recommended_opp() + * using the freq value passed as an argument to target + * callback of @devfreq_profile + * @current_voltage: The voltage corresponding to @current_nominal_freq, as + * retrieved through dev_pm_opp_get_voltage(). + * @current_core_mask: bitmask of shader cores that are currently desired & + * enabled, corresponding to @current_nominal_freq as + * retrieved from @opp_table in the target callback of + * @devfreq_profile. + * @opp_table: Pointer to the lookup table for converting between nominal + * OPP (operating performance point) frequency, and real + * frequency and core mask. This table is constructed according + * to operating-points-v2-mali table in devicetree. + * @num_opps: Number of operating performance points available for the Mali + * GPU device. + * @devfreq_cooling: Pointer returned on registering devfreq cooling device + * corresponding to @devfreq. + * @ipa_use_configured_model: set to TRUE when configured model is used for IPA and + * FALSE when fallback model is used. + * @ipa: Top level structure for IPA, containing pointers to both + * configured & fallback models. + * @timeline: Stores the global timeline tracking information. + * @job_fault_debug: Flag to control the dumping of debug data for job faults, + * set when the 'job_fault' debugfs file is opened. + * @mali_debugfs_directory: Root directory for the debugfs files created by the driver + * @debugfs_ctx_directory: Directory inside the @mali_debugfs_directory containing + * a sub-directory for every context. + * @debugfs_as_read_bitmap: bitmap of address spaces for which the bus or page fault + * has occurred. + * @job_fault_wq: Waitqueue to block the job fault dumping daemon till the + * occurrence of a job fault. + * @job_fault_resume_wq: Waitqueue on which every context with a faulty job wait + * for the job fault dumping to complete before they can + * do bottom half of job done for the atoms which followed + * the faulty atom. + * @job_fault_resume_workq: workqueue to process the work items queued for the faulty + * atoms, whereby the work item function waits for the dumping + * to get completed. + * @job_fault_event_list: List of atoms, each belonging to a different context, which + * generated a job fault. + * @job_fault_event_lock: Lock to protect concurrent accesses to @job_fault_event_list + * @regs_dump_debugfs_data: Contains the offset of register to be read through debugfs + * file "read_register". + * @kbase_profiling_controls: Profiling controls set by gator to control frame buffer + * dumping and s/w counter reporting. + * @force_replay_limit: Number of gpu jobs, having replay atoms associated with them, + * that are run before a job is forced to fail and replay. + * Set to 0 to disable forced failures. + * @force_replay_count: Count of gpu jobs, having replay atoms associated with them, + * between forced failures. Incremented on each gpu job which + * has replay atoms dependent on it. A gpu job is forced to + * fail once this is greater than or equal to @force_replay_limit + * @force_replay_core_req: Core requirements, set through the sysfs file, for the replay + * job atoms to consider the associated gpu job for forceful + * failure and replay. May be zero + * @force_replay_random: Set to 1 to randomize the @force_replay_limit, in the + * range of 1 - KBASEP_FORCE_REPLAY_RANDOM_LIMIT. + * @ctx_num: Total number of contexts created for the device. + * @io_history: Pointer to an object keeping a track of all recent + * register accesses. The history of register accesses + * can be read through "regs_history" debugfs file. + * @hwaccess: Contains a pointer to active kbase context and GPU + * backend specific data for HW access layer. + * @faults_pending: Count of page/bus faults waiting for bottom half processing + * via workqueues. + * @poweroff_pending: Set when power off operation for GPU is started, reset when + * power on for GPU is started. + * @infinite_cache_active_default: Set to enable using infinite cache for all the + * allocations of a new context. + * @mem_pool_max_size_default: Initial/default value for the maximum size of both + * types of pool created for a new context. + * @current_gpu_coherency_mode: coherency mode in use, which can be different + * from @system_coherency, when using protected mode. + * @system_coherency: coherency mode as retrieved from the device tree. + * @cci_snoop_enabled: Flag to track when CCI snoops have been enabled. + * @snoop_enable_smc: SMC function ID to call into Trusted firmware to + * enable cache snooping. Value of 0 indicates that it + * is not used. + * @snoop_disable_smc: SMC function ID to call disable cache snooping. + * @protected_ops: Pointer to the methods for switching in or out of the + * protected mode, as per the @protected_dev being used. + * @protected_dev: Pointer to the protected mode switcher device attached + * to the GPU device retrieved through device tree if + * GPU do not support protected mode switching natively. + * @protected_mode: set to TRUE when GPU is put into protected mode + * @protected_mode_transition: set to TRUE when GPU is transitioning into or + * out of protected mode. + * @protected_mode_support: set to true if protected mode is supported. + * @buslogger: Pointer to the structure required for interfacing + * with the bus logger module to set the size of buffer + * used by the module for capturing bus logs. + * @irq_reset_flush: Flag to indicate that GPU reset is in-flight and flush of + * IRQ + bottom half is being done, to prevent the writes + * to MMU_IRQ_CLEAR & MMU_IRQ_MASK registers. + * @inited_subsys: Bitmap of inited sub systems at the time of device probe. + * Used during device remove or for handling error in probe. + * @hwaccess_lock: Lock, which can be taken from IRQ context, to serialize + * the updates made to Job dispatcher + scheduler states. + * @mmu_hw_mutex: Protects access to MMU operations and address space + * related state. + * @serialize_jobs: Currently used mode for serialization of jobs, both + * intra & inter slots serialization is supported. + * @backup_serialize_jobs: Copy of the original value of @serialize_jobs taken + * when GWT is enabled. Used to restore the original value + * on disabling of GWT. + * @js_ctx_scheduling_mode: Context scheduling mode currently being used by + * Job Scheduler + */ struct kbase_device { - s8 slot_submit_count_irq[BASE_JM_MAX_NR_SLOTS]; - u32 hw_quirks_sc; u32 hw_quirks_tiler; u32 hw_quirks_mmu; @@ -1007,12 +1457,7 @@ struct kbase_device { struct kbase_mmu_mode const *mmu_mode; struct kbase_as as[BASE_MAX_NR_AS]; - /* The below variables (as_free and as_to_kctx) are managed by the - * Context Scheduler. The kbasep_js_device_data::runpool_irq::lock must - * be held whilst accessing these. - */ u16 as_free; /* Bitpattern of free Address Spaces */ - /* Mapping from active Address Spaces to kbase_context */ struct kbase_context *as_to_kctx[BASE_MAX_NR_AS]; @@ -1020,53 +1465,28 @@ struct kbase_device { struct kbase_gpu_props gpu_props; - /** List of SW workarounds for HW issues */ unsigned long hw_issues_mask[(BASE_HW_ISSUE_END + BITS_PER_LONG - 1) / BITS_PER_LONG]; - /** List of features available */ unsigned long hw_features_mask[(BASE_HW_FEATURE_END + BITS_PER_LONG - 1) / BITS_PER_LONG]; - /* Bitmaps of cores that are currently in use (running jobs). - * These should be kept up to date by the job scheduler. - * - * pm.power_change_lock should be held when accessing these members. - * - * kbase_pm_check_transitions_nolock() should be called when bits are - * cleared to update the power management system and allow transitions to - * occur. */ u64 shader_inuse_bitmap; - /* Refcount for cores in use */ u32 shader_inuse_cnt[64]; - /* Bitmaps of cores the JS needs for jobs ready to run */ u64 shader_needed_bitmap; - /* Refcount for cores needed */ u32 shader_needed_cnt[64]; u32 tiler_inuse_cnt; u32 tiler_needed_cnt; - /* struct for keeping track of the disjoint information - * - * The state is > 0 if the GPU is in a disjoint state. Otherwise 0 - * The count is the number of disjoint events that have occurred on the GPU - */ struct { atomic_t count; atomic_t state; } disjoint_event; - /* Refcount for tracking users of the l2 cache, e.g. when using hardware counter instrumentation. */ u32 l2_users_count; - /* Bitmaps of cores that are currently available (powered up and the power policy is happy for jobs to be - * submitted to these cores. These are updated by the power management code. The job scheduler should avoid - * submitting new jobs to any cores that are not marked as available. - * - * pm.power_change_lock should be held when accessing these members. - */ u64 shader_available_bitmap; u64 tiler_available_bitmap; u64 l2_available_bitmap; @@ -1075,10 +1495,9 @@ struct kbase_device { u64 shader_ready_bitmap; u64 shader_transitioning_bitmap; - s8 nr_hw_address_spaces; /**< Number of address spaces in the GPU (constant after driver initialisation) */ - s8 nr_user_address_spaces; /**< Number of address spaces available to user contexts */ + s8 nr_hw_address_spaces; + s8 nr_user_address_spaces; - /* Structure used for instrumentation and HW counters dumping */ struct kbase_hwcnt { /* The lock should be used when accessing any of the following members */ spinlock_t lock; @@ -1102,10 +1521,8 @@ struct kbase_device { struct mutex cacheclean_lock; - /* Platform specific private data to be accessed by mali_kbase_config_xxx.c only */ void *platform_context; - /* List of kbase_contexts created */ struct list_head kctx_list; struct mutex kctx_list_lock; @@ -1124,13 +1541,32 @@ struct kbase_device { #else struct thermal_cooling_device *devfreq_cooling; #endif - /* Current IPA model - true for configured model, false for fallback */ atomic_t ipa_use_configured_model; struct { /* Access to this struct must be with ipa.lock held */ struct mutex lock; struct kbase_ipa_model *configured_model; struct kbase_ipa_model *fallback_model; + + /* + * gpu_active_callback - Inform IPA that GPU is now active + * @model_data: Pointer to model data + */ + void (*gpu_active_callback)( + struct kbase_ipa_model_vinstr_data *model_data); + + /* + * gpu_idle_callback - Inform IPA that GPU is now idle + * @model_data: Pointer to model data + */ + void (*gpu_idle_callback)( + struct kbase_ipa_model_vinstr_data *model_data); + + /* Model data to pass to ipa_gpu_active/idle() */ + struct kbase_ipa_model_vinstr_data *model_data; + + /* true if IPA is currently using vinstr */ + bool vinstr_active; } ipa; #endif /* CONFIG_DEVFREQ_THERMAL */ #endif /* CONFIG_MALI_BIFROST_DEVFREQ */ @@ -1140,79 +1576,52 @@ struct kbase_device { struct kbase_trace_kbdev_timeline timeline; #endif - /* - * Control for enabling job dump on failure, set when control debugfs - * is opened. - */ bool job_fault_debug; #ifdef CONFIG_DEBUG_FS - /* directory for debugfs entries */ struct dentry *mali_debugfs_directory; - /* Root directory for per context entry */ struct dentry *debugfs_ctx_directory; #ifdef CONFIG_MALI_BIFROST_DEBUG - /* bit for each as, set if there is new data to report */ u64 debugfs_as_read_bitmap; #endif /* CONFIG_MALI_BIFROST_DEBUG */ - /* failed job dump, used for separate debug process */ wait_queue_head_t job_fault_wq; wait_queue_head_t job_fault_resume_wq; struct workqueue_struct *job_fault_resume_workq; struct list_head job_fault_event_list; spinlock_t job_fault_event_lock; - struct kbase_context *kctx_fault; #if !MALI_CUSTOMER_RELEASE - /* Per-device data for register dumping interface */ struct { - u16 reg_offset; /* Offset of a GPU_CONTROL register to be - dumped upon request */ + u16 reg_offset; } regs_dump_debugfs_data; #endif /* !MALI_CUSTOMER_RELEASE */ #endif /* CONFIG_DEBUG_FS */ - /* fbdump profiling controls set by gator */ u32 kbase_profiling_controls[FBDUMP_CONTROL_MAX]; #if MALI_CUSTOMER_RELEASE == 0 - /* Number of jobs that are run before a job is forced to fail and - * replay. May be KBASEP_FORCE_REPLAY_DISABLED, to disable forced - * failures. */ int force_replay_limit; - /* Count of jobs between forced failures. Incremented on each job. A - * job is forced to fail once this is greater than or equal to - * force_replay_limit. */ int force_replay_count; - /* Core requirement for jobs to be failed and replayed. May be zero. */ base_jd_core_req force_replay_core_req; - /* true if force_replay_limit should be randomized. The random - * value will be in the range of 1 - KBASEP_FORCE_REPLAY_RANDOM_LIMIT. - */ bool force_replay_random; #endif - /* Total number of created contexts */ atomic_t ctx_num; #ifdef CONFIG_DEBUG_FS - /* Holds the most recent register accesses */ struct kbase_io_history io_history; #endif /* CONFIG_DEBUG_FS */ struct kbase_hwaccess_data hwaccess; - /* Count of page/bus faults waiting for workqueues to process */ atomic_t faults_pending; - /* true if GPU is powered off or power off operation is in progress */ bool poweroff_pending; - /* defaults for new context created for this device */ #if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 4, 0)) bool infinite_cache_active_default; #else @@ -1220,73 +1629,45 @@ struct kbase_device { #endif size_t mem_pool_max_size_default; - /* current gpu coherency mode */ u32 current_gpu_coherency_mode; - /* system coherency mode */ u32 system_coherency; - /* Flag to track when cci snoops have been enabled on the interface */ + bool cci_snoop_enabled; - /* SMC function IDs to call into Trusted firmware to enable/disable - * cache snooping. Value of 0 indicates that they are not used - */ u32 snoop_enable_smc; u32 snoop_disable_smc; - /* Protected mode operations */ struct protected_mode_ops *protected_ops; - /* Protected device attached to this kbase device */ struct protected_mode_device *protected_dev; - /* - * true when GPU is put into protected mode - */ bool protected_mode; - /* - * true when GPU is transitioning into or out of protected mode - */ bool protected_mode_transition; - /* - * true if protected mode is supported - */ bool protected_mode_support; - -#ifdef CONFIG_MALI_BIFROST_DEBUG - wait_queue_head_t driver_inactive_wait; - bool driver_inactive; -#endif /* CONFIG_MALI_BIFROST_DEBUG */ - #ifdef CONFIG_MALI_FPGA_BUS_LOGGER - /* - * Bus logger integration. - */ struct bus_logger_client *buslogger; #endif - /* Boolean indicating if an IRQ flush during reset is in progress. */ + bool irq_reset_flush; - /* list of inited sub systems. Used during terminate/error recovery */ u32 inited_subsys; spinlock_t hwaccess_lock; - /* Protects access to MMU operations */ struct mutex mmu_hw_mutex; - /* Current serialization mode. See KBASE_SERIALIZE_* for details */ + /* See KBASE_SERIALIZE_* for details */ u8 serialize_jobs; #ifdef CONFIG_MALI_JOB_DUMP - /* Used to backup status of job serialization mode - * when we use GWT and restore when GWT is disabled. - * GWT uses full serialization mode. - */ u8 backup_serialize_jobs; #endif + + /* See KBASE_JS_*_PRIORITY_MODE for details. */ + u32 js_ctx_scheduling_mode; }; /** @@ -1375,10 +1756,239 @@ struct kbase_sub_alloc { DECLARE_BITMAP(sub_pages, SZ_2M / SZ_4K); }; +/** + * struct kbase_context - Object representing an entity, among which GPU is + * scheduled and gets its own GPU address space. + * Created when the device file /dev/malixx is opened. + * @filp: Pointer to the struct file corresponding to device file + * /dev/malixx instance, passed to the file's open method. + * @kbdev: Pointer to the Kbase device for which the context is created. + * @id: Unique indentifier for the context, indicates the number of + * contexts which have been created for the device so far. + * @api_version: contains the version number for User/kernel interface, + * used for compatibility check. + * @pgd: Physical address of the page allocated for the top level + * page table of the context, this will be used for MMU Hw + * programming as the address translation will start from + * the top level page table. + * @event_list: list of posted events about completed atoms, to be sent to + * event handling thread of Userpsace. + * @event_coalesce_list: list containing events corresponding to successive atoms + * which have requested deferred delivery of the completion + * events to Userspace. + * @event_mutex: Lock to protect the concurrent access to @event_list & + * @event_mutex. + * @event_closed: Flag set through POST_TERM ioctl, indicates that Driver + * should stop posting events and also inform event handling + * thread that context termination is in progress. + * @event_workq: Workqueue for processing work items corresponding to atoms + * that do not return an event to Userspace or have to perform + * a replay job + * @event_count: Count of the posted events to be consumed by Userspace. + * @event_coalesce_count: Count of the events present in @event_coalesce_list. + * @flags: bitmap of enums from kbase_context_flags, indicating the + * state & attributes for the context. + * @setup_complete: Indicates if the setup for context has completed, i.e. + * flags have been set for the context. Driver allows only + * 2 ioctls until the setup is done. Valid only for + * @api_version value 0. + * @setup_in_progress: Indicates if the context's setup is in progress and other + * setup calls during that shall be rejected. + * @mmu_teardown_pages: Buffer of 4 Pages in size, used to cache the entries of + * top & intermediate level page tables to avoid repeated + * calls to kmap_atomic during the MMU teardown. + * @aliasing_sink_page: Special page used for KBASE_MEM_TYPE_ALIAS allocations, + * which can alias number of memory regions. The page is + * represent a region where it is mapped with a write-alloc + * cache setup, typically used when the write result of the + * GPU isn't needed, but the GPU must write anyway. + * @mem_partials_lock: Lock for protecting the operations done on the elements + * added to @mem_partials list. + * @mem_partials: List head for the list of large pages, 2MB in size, which + * which have been split into 4 KB pages and are used + * partially for the allocations >= 2 MB in size. + * @mmu_lock: Lock to serialize the accesses made to multi level GPU + * page tables, maintained for every context. + * @reg_lock: Lock used for GPU virtual address space management operations, + * like adding/freeing a memory region in the address space. + * Can be converted to a rwlock ?. + * @reg_rbtree_same: RB tree of the memory regions allocated from the SAME_VA + * zone of the GPU virtual address space. Used for allocations + * having the same value for GPU & CPU virtual address. + * @reg_rbtree_exec: RB tree of the memory regions allocated from the EXEC + * zone of the GPU virtual address space. Used for + * allocations containing executable code for + * shader programs. + * @reg_rbtree_custom: RB tree of the memory regions allocated from the CUSTOM_VA + * zone of the GPU virtual address space. + * @cookies: Bitmask containing of BITS_PER_LONG bits, used mainly for + * SAME_VA allocations to defer the reservation of memory region + * (from the GPU virtual address space) from base_mem_alloc + * ioctl to mmap system call. This helps returning unique + * handles, disguised as GPU VA, to Userspace from base_mem_alloc + * and later retrieving the pointer to memory region structure + * in the mmap handler. + * @pending_regions: Array containing pointers to memory region structures, + * used in conjunction with @cookies bitmask mainly for + * providing a mechansim to have the same value for CPU & + * GPU virtual address. + * @event_queue: Wait queue used for blocking the thread, which consumes + * the base_jd_event corresponding to an atom, when there + * are no more posted events. + * @tgid: thread group id of the process, whose thread opened the + * device file /dev/malixx instance to create a context. + * @pid: id of the thread, corresponding to process @tgid, which + * actually which opened the device file. + * @jctx: object encapsulating all the Job dispatcher related state, + * including the array of atoms. + * @used_pages: Keeps a track of the number of 4KB physical pages in use + * for the context. + * @nonmapped_pages: Updated in the same way as @used_pages, except for the case + * when special tracking page is freed by userspace where it + * is reset to 0. + * @mem_pool: Object containing the state for the context specific pool of + * 4KB size physical pages. + * @lp_mem_pool: Object containing the state for the context specific pool of + * 2MB size physical pages. + * @reclaim: Shrinker object registered with the kernel containing + * the pointer to callback function which is invoked under + * low memory conditions. In the callback function Driver + * frees up the memory for allocations marked as + * evictable/reclaimable. + * @evict_list: List head for the list containing the allocations which + * can be evicted or freed up in the shrinker callback. + * @waiting_soft_jobs: List head for the list containing softjob atoms, which + * are either waiting for the event set operation, or waiting + * for the signaling of input fence or waiting for the GPU + * device to powered on so as to dump the CPU/GPU timestamps. + * @waiting_soft_jobs_lock: Lock to protect @waiting_soft_jobs list from concurrent + * accesses. + * @dma_fence: Object containing list head for the list of dma-buf fence + * waiting atoms and the waitqueue to process the work item + * queued for the atoms blocked on the signaling of dma-buf + * fences. + * @as_nr: id of the address space being used for the scheduled in + * context. This is effectively part of the Run Pool, because + * it only has a valid setting (!=KBASEP_AS_NR_INVALID) whilst + * the context is scheduled in. The hwaccess_lock must be held + * whilst accessing this. + * If the context relating to this value of as_nr is required, + * then the context must be retained to ensure that it doesn't + * disappear whilst it is being used. Alternatively, hwaccess_lock + * can be held to ensure the context doesn't disappear (but this + * has restrictions on what other locks can be taken simutaneously). + * @refcount: Keeps track of the number of users of this context. A user + * can be a job that is available for execution, instrumentation + * needing to 'pin' a context for counter collection, etc. + * If the refcount reaches 0 then this context is considered + * inactive and the previously programmed AS might be cleared + * at any point. + * Generally the reference count is incremented when the context + * is scheduled in and an atom is pulled from the context's per + * slot runnable tree. + * @mm_update_lock: lock used for handling of special tracking page. + * @process_mm: Pointer to the memory descriptor of the process which + * created the context. Used for accounting the physical + * pages used for GPU allocations, done for the context, + * to the memory consumed by the process. + * @same_va_end: End address of the SAME_VA zone (in 4KB page units) + * @timeline: Object tracking the number of atoms currently in flight for + * the context and thread group id of the process, i.e. @tgid. + * @mem_profile_data: Buffer containing the profiling information provided by + * Userspace, can be read through the mem_profile debugfs file. + * @mem_profile_size: Size of the @mem_profile_data. + * @mem_profile_lock: Lock to serialize the operations related to mem_profile + * debugfs file. + * @kctx_dentry: Pointer to the debugfs directory created for every context, + * inside kbase_device::debugfs_ctx_directory, containing + * context specific files. + * @reg_dump: Buffer containing a register offset & value pair, used + * for dumping job fault debug info. + * @job_fault_count: Indicates that a job fault occurred for the context and + * dumping of its debug info is in progress. + * @job_fault_resume_event_list: List containing atoms completed after the faulty + * atom but before the debug data for faulty atom was dumped. + * @jsctx_queue: Per slot & priority arrays of object containing the root + * of RB-tree holding currently runnable atoms on the job slot + * and the head item of the linked list of atoms blocked on + * cross-slot dependencies. + * @atoms_pulled: Total number of atoms currently pulled from the context. + * @atoms_pulled_slot: Per slot count of the number of atoms currently pulled + * from the context. + * @atoms_pulled_slot_pri: Per slot & priority count of the number of atoms currently + * pulled from the context. hwaccess_lock shall be held when + * accessing it. + * @blocked_js: Indicates if the context is blocked from submitting atoms + * on a slot at a given priority. This is set to true, when + * the atom corresponding to context is soft/hard stopped or + * removed from the HEAD_NEXT register in response to + * soft/hard stop. + * @slots_pullable: Bitmask of slots, indicating the slots for which the + * context has pullable atoms in the runnable tree. + * @work: Work structure used for deferred ASID assignment. + * @vinstr_cli: Pointer to the legacy userspace vinstr client, there can + * be only such client per kbase context. + * @vinstr_cli_lock: Lock used for the vinstr ioctl calls made for @vinstr_cli. + * @completed_jobs: List containing completed atoms for which base_jd_event is + * to be posted. + * @work_count: Number of work items, corresponding to atoms, currently + * pending on job_done workqueue of @jctx. + * @soft_job_timeout: Timer object used for failing/cancelling the waiting + * soft-jobs which have been blocked for more than the + * timeout value used for the soft-jobs + * @jit_alloc: Array of 256 pointers to GPU memory regions, used for + * for JIT allocations. + * @jit_max_allocations: Maximum number of JIT allocations allowed at once. + * @jit_current_allocations: Current number of in-flight JIT allocations. + * @jit_current_allocations_per_bin: Current number of in-flight JIT allocations per bin + * @jit_version: version number indicating whether userspace is using + * old or new version of interface for JIT allocations + * 1 -> client used KBASE_IOCTL_MEM_JIT_INIT_OLD + * 2 -> client used KBASE_IOCTL_MEM_JIT_INIT + * @jit_active_head: List containing the JIT allocations which are in use. + * @jit_pool_head: List containing the JIT allocations which have been + * freed up by userpsace and so not being used by them. + * Driver caches them to quickly fulfill requests for new + * JIT allocations. They are released in case of memory + * pressure as they are put on the @evict_list when they + * are freed up by userspace. + * @jit_destroy_head: List containing the JIT allocations which were moved to it + * from @jit_pool_head, in the shrinker callback, after freeing + * their backing physical pages. + * @jit_evict_lock: Lock used for operations done on JIT allocations and also + * for accessing @evict_list. + * @jit_work: Work item queued to defer the freeing of memory region when + * JIT allocation is moved to @jit_destroy_head. + * @jit_atoms_head: A list of the JIT soft-jobs, both alloc & free, in submission + * order, protected by kbase_jd_context.lock. + * @jit_pending_alloc: A list of JIT alloc soft-jobs for which allocation will be + * reattempted after the impending free of other active JIT + * allocations. + * @ext_res_meta_head: A list of sticky external resources which were requested to + * be mapped on GPU side, through a softjob atom of type + * EXT_RES_MAP or STICKY_RESOURCE_MAP ioctl. + * @drain_pending: Used to record that a flush/invalidate of the GPU caches was + * requested from atomic context, so that the next flush request + * can wait for the flush of GPU writes. + * @age_count: Counter incremented on every call to jd_submit_atom, + * atom is assigned the snapshot of this counter, which + * is used to determine the atom's age when it is added to + * the runnable RB-tree. + * @trim_level: Level of JIT allocation trimming to perform on free (0-100%) + * @gwt_enabled: Indicates if tracking of GPU writes is enabled, protected by + * kbase_context.reg_lock. + * @gwt_was_enabled: Simple sticky bit flag to know if GWT was ever enabled. + * @gwt_current_list: A list of addresses for which GPU has generated write faults, + * after the last snapshot of it was sent to userspace. + * @gwt_snapshot_list: Snapshot of the @gwt_current_list for sending to user space. + * @priority: Indicates the context priority. Used along with @atoms_count + * for context scheduling, protected by hwaccess_lock. + * @atoms_count: Number of gpu atoms currently in use, per priority + */ struct kbase_context { struct file *filp; struct kbase_device *kbdev; - u32 id; /* System wide unique id */ + u32 id; unsigned long api_version; phys_addr_t pgd; struct list_head event_list; @@ -1402,13 +2012,10 @@ struct kbase_context { struct list_head mem_partials; struct mutex mmu_lock; - struct mutex reg_lock; /* To be converted to a rwlock? */ - struct rb_root reg_rbtree_same; /* RB tree of GPU (live) regions, - * SAME_VA zone */ - struct rb_root reg_rbtree_exec; /* RB tree of GPU (live) regions, - * EXEC zone */ - struct rb_root reg_rbtree_custom; /* RB tree of GPU (live) regions, - * CUSTOM_VA zone */ + struct mutex reg_lock; + struct rb_root reg_rbtree_same; + struct rb_root reg_rbtree_exec; + struct rb_root reg_rbtree_custom; unsigned long cookies; struct kbase_va_region *pending_regions[BITS_PER_LONG]; @@ -1435,24 +2042,9 @@ struct kbase_context { struct workqueue_struct *wq; } dma_fence; #endif /* CONFIG_MALI_BIFROST_DMA_FENCE */ - /** This is effectively part of the Run Pool, because it only has a valid - * setting (!=KBASEP_AS_NR_INVALID) whilst the context is scheduled in - * - * The hwaccess_lock must be held whilst accessing this. - * - * If the context relating to this as_nr is required, you must use - * kbasep_js_runpool_retain_ctx() to ensure that the context doesn't disappear - * whilst you're using it. Alternatively, just hold the hwaccess_lock - * to ensure the context doesn't disappear (but this has restrictions on what other locks - * you can take whilst doing this) */ + int as_nr; - /* Keeps track of the number of users of this context. A user can be a - * job that is available for execution, instrumentation needing to 'pin' - * a context for counter collection, etc. If the refcount reaches 0 then - * this context is considered inactive and the previously programmed - * AS might be cleared at any point. - */ atomic_t refcount; /* NOTE: @@ -1463,28 +2055,19 @@ struct kbase_context { * All other flags must be added there */ spinlock_t mm_update_lock; struct mm_struct *process_mm; - /* End of the SAME_VA zone */ u64 same_va_end; #ifdef CONFIG_MALI_BIFROST_TRACE_TIMELINE struct kbase_trace_kctx_timeline timeline; #endif #ifdef CONFIG_DEBUG_FS - /* Content of mem_profile file */ char *mem_profile_data; - /* Size of @c mem_profile_data */ size_t mem_profile_size; - /* Mutex guarding memory profile state */ struct mutex mem_profile_lock; - /* Memory profile directory under debugfs */ struct dentry *kctx_dentry; - /* for job fault debug */ unsigned int *reg_dump; atomic_t job_fault_count; - /* This list will keep the following atoms during the dump - * in the same context - */ struct list_head job_fault_resume_event_list; #endif /* CONFIG_DEBUG_FS */ @@ -1492,86 +2075,59 @@ struct kbase_context { struct jsctx_queue jsctx_queue [KBASE_JS_ATOM_SCHED_PRIO_COUNT][BASE_JM_MAX_NR_SLOTS]; - /* Number of atoms currently pulled from this context */ atomic_t atoms_pulled; - /* Number of atoms currently pulled from this context, per slot */ atomic_t atoms_pulled_slot[BASE_JM_MAX_NR_SLOTS]; - /* Number of atoms currently pulled from this context, per slot and - * priority. Hold hwaccess_lock when accessing */ int atoms_pulled_slot_pri[BASE_JM_MAX_NR_SLOTS][ KBASE_JS_ATOM_SCHED_PRIO_COUNT]; - /* true if slot is blocked on the given priority. This will be set on a - * soft-stop */ bool blocked_js[BASE_JM_MAX_NR_SLOTS][KBASE_JS_ATOM_SCHED_PRIO_COUNT]; - /* Bitmask of slots that can be pulled from */ u32 slots_pullable; - /* Backend specific data */ - struct kbase_context_backend backend; - - /* Work structure used for deferred ASID assignment */ struct work_struct work; - /* Only one userspace vinstr client per kbase context */ struct kbase_vinstr_client *vinstr_cli; struct mutex vinstr_cli_lock; - /* List of completed jobs waiting for events to be posted */ struct list_head completed_jobs; - /* Number of work items currently pending on job_done_wq */ atomic_t work_count; - /* Waiting soft-jobs will fail when this timer expires */ struct timer_list soft_job_timeout; - /* JIT allocation management */ struct kbase_va_region *jit_alloc[256]; + u8 jit_max_allocations; + u8 jit_current_allocations; + u8 jit_current_allocations_per_bin[256]; + u8 jit_version; struct list_head jit_active_head; struct list_head jit_pool_head; struct list_head jit_destroy_head; struct mutex jit_evict_lock; struct work_struct jit_work; - /* A list of the JIT soft-jobs in submission order - * (protected by kbase_jd_context.lock) - */ struct list_head jit_atoms_head; - /* A list of pending JIT alloc soft-jobs (using the 'queue' list_head) - * (protected by kbase_jd_context.lock) - */ struct list_head jit_pending_alloc; - /* External sticky resource management */ struct list_head ext_res_meta_head; - /* Used to record that a drain was requested from atomic context */ atomic_t drain_pending; - /* Current age count, used to determine age for newly submitted atoms */ u32 age_count; + u8 trim_level; + #ifdef CONFIG_MALI_JOB_DUMP - /* Used for tracking GPU writes. - * (protected by kbase_context.reg_lock) - */ bool gwt_enabled; - /* Simple sticky bit flag to know if GWT was ever enabled - * (protected by kbase_context.reg_lock) - */ bool gwt_was_enabled; - /* Current list of GPU writes. - * (protected by kbase_context.reg_lock) - */ struct list_head gwt_current_list; - /* Snapshot of list of GPU writes for sending to user space. */ struct list_head gwt_snapshot_list; - #endif + + int priority; + s16 atoms_count[KBASE_JS_ATOM_SCHED_PRIO_COUNT]; }; #ifdef CONFIG_MALI_JOB_DUMP @@ -1579,17 +2135,17 @@ struct kbase_context { * struct kbasep_gwt_list_element - Structure used to collect GPU * write faults. * @link: List head for adding write faults. - * @handle: The handle for the modified region. - * @offset: The offset in pages of the modified - * part of the region. + * @region: Details of the region where we have the + * faulting page address. + * @page_addr: Page address where GPU write fault occurred. * @num_pages: The number of pages modified. * * Using this structure all GPU write faults are stored in a list. */ struct kbasep_gwt_list_element { struct list_head link; - u64 handle; - u64 offset; + struct kbase_va_region *region; + u64 page_addr; u64 num_pages; }; diff --git a/drivers/gpu/arm/bifrost/mali_kbase_device.c b/drivers/gpu/arm/bifrost/mali_kbase_device.c index 8aaf4065dd6c..005ae088686b 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_device.c +++ b/drivers/gpu/arm/bifrost/mali_kbase_device.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2010-2017 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010-2018 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -254,10 +254,6 @@ int kbase_device_init(struct kbase_device * const kbdev) else kbdev->mmu_mode = kbase_mmu_mode_get_lpae(); -#ifdef CONFIG_MALI_BIFROST_DEBUG - init_waitqueue_head(&kbdev->driver_inactive_wait); -#endif /* CONFIG_MALI_BIFROST_DEBUG */ - return 0; term_trace: kbasep_trace_term(kbdev); diff --git a/drivers/gpu/arm/bifrost/mali_kbase_fence.h b/drivers/gpu/arm/bifrost/mali_kbase_fence.h index 25d2a933e118..414f3f4d7436 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_fence.h +++ b/drivers/gpu/arm/bifrost/mali_kbase_fence.h @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2010-2017 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010-2018 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -139,8 +139,11 @@ static inline bool kbase_fence_out_is_ours(struct kbase_jd_atom *katom) static inline int kbase_fence_out_signal(struct kbase_jd_atom *katom, int status) { -#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 11, 0)) - katom->dma_fence.fence->error = status; +#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE && \ + KERNEL_VERSION(4, 9, 68) <= LINUX_VERSION_CODE) + fence_set_error(katom->dma_fence.fence, status); +#elif (KERNEL_VERSION(4, 11, 0) <= LINUX_VERSION_CODE) + dma_fence_set_error(katom->dma_fence.fence, status); #else katom->dma_fence.fence->status = status; #endif diff --git a/drivers/gpu/arm/bifrost/mali_kbase_fence_defs.h b/drivers/gpu/arm/bifrost/mali_kbase_fence_defs.h index a19d7e0aff1c..475136430649 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_fence_defs.h +++ b/drivers/gpu/arm/bifrost/mali_kbase_fence_defs.h @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2010-2017 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010-2018 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -44,7 +44,12 @@ #define dma_fence_is_signaled(a) fence_is_signaled(a) #define dma_fence_add_callback(a, b, c) fence_add_callback(a, b, c) #define dma_fence_remove_callback(a, b) fence_remove_callback(a, b) + +#if (KERNEL_VERSION(4, 9, 68) <= LINUX_VERSION_CODE) +#define dma_fence_get_status(a) (fence_is_signaled(a) ? (a)->error ?: 1 : 0) +#else #define dma_fence_get_status(a) (fence_is_signaled(a) ? (a)->status ?: 1 : 0) +#endif #else diff --git a/drivers/gpu/arm/bifrost/mali_kbase_gator_api.c b/drivers/gpu/arm/bifrost/mali_kbase_gator_api.c index 2fa68067050a..040b2096bec6 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_gator_api.c +++ b/drivers/gpu/arm/bifrost/mali_kbase_gator_api.c @@ -158,7 +158,7 @@ KBASE_EXPORT_SYMBOL(kbase_gator_hwcnt_term_names); struct kbase_gator_hwcnt_handles *kbase_gator_hwcnt_init(struct kbase_gator_hwcnt_info *in_out_info) { struct kbase_gator_hwcnt_handles *hand; - struct kbase_uk_hwcnt_reader_setup setup; + struct kbase_ioctl_hwcnt_reader_setup setup; uint32_t dump_size = 0, i = 0; if (!in_out_info) diff --git a/drivers/gpu/arm/bifrost/mali_kbase_gator_hwcnt_names.h b/drivers/gpu/arm/bifrost/mali_kbase_gator_hwcnt_names.h index b048db8bf834..5d38c7b73553 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_gator_hwcnt_names.h +++ b/drivers/gpu/arm/bifrost/mali_kbase_gator_hwcnt_names.h @@ -2169,6 +2169,8 @@ static const char * const hardware_counters_mali_t88x[] = { #include "mali_kbase_gator_hwcnt_names_tnox.h" +#include "mali_kbase_gator_hwcnt_names_tgox.h" + #include "mali_kbase_gator_hwcnt_names_tkax.h" #include "mali_kbase_gator_hwcnt_names_ttrx.h" diff --git a/drivers/gpu/arm/bifrost/mali_kbase_gator_hwcnt_names_tgox.h b/drivers/gpu/arm/bifrost/mali_kbase_gator_hwcnt_names_tgox.h new file mode 100644 index 000000000000..72b5266622a9 --- /dev/null +++ b/drivers/gpu/arm/bifrost/mali_kbase_gator_hwcnt_names_tgox.h @@ -0,0 +1,296 @@ +/* + * + * (C) COPYRIGHT 2016-2018 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +/* + * This header was autogenerated, it should not be edited. + */ + +#ifndef _KBASE_GATOR_HWCNT_NAMES_TGOX_H_ +#define _KBASE_GATOR_HWCNT_NAMES_TGOX_H_ + +static const char * const hardware_counters_mali_tGOx[] = { + /* Performance counters for the Job Manager */ + "", + "", + "", + "", + "TGOx_MESSAGES_SENT", + "TGOx_MESSAGES_RECEIVED", + "TGOx_GPU_ACTIVE", + "TGOx_IRQ_ACTIVE", + "TGOx_JS0_JOBS", + "TGOx_JS0_TASKS", + "TGOx_JS0_ACTIVE", + "", + "TGOx_JS0_WAIT_READ", + "TGOx_JS0_WAIT_ISSUE", + "TGOx_JS0_WAIT_DEPEND", + "TGOx_JS0_WAIT_FINISH", + "TGOx_JS1_JOBS", + "TGOx_JS1_TASKS", + "TGOx_JS1_ACTIVE", + "", + "TGOx_JS1_WAIT_READ", + "TGOx_JS1_WAIT_ISSUE", + "TGOx_JS1_WAIT_DEPEND", + "TGOx_JS1_WAIT_FINISH", + "TGOx_JS2_JOBS", + "TGOx_JS2_TASKS", + "TGOx_JS2_ACTIVE", + "", + "TGOx_JS2_WAIT_READ", + "TGOx_JS2_WAIT_ISSUE", + "TGOx_JS2_WAIT_DEPEND", + "TGOx_JS2_WAIT_FINISH", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + + /* Performance counters for the Tiler */ + "", + "", + "", + "", + "TGOx_TILER_ACTIVE", + "TGOx_JOBS_PROCESSED", + "TGOx_TRIANGLES", + "TGOx_LINES", + "TGOx_POINTS", + "TGOx_FRONT_FACING", + "TGOx_BACK_FACING", + "TGOx_PRIM_VISIBLE", + "TGOx_PRIM_CULLED", + "TGOx_PRIM_CLIPPED", + "TGOx_PRIM_SAT_CULLED", + "TGOx_BIN_ALLOC_INIT", + "TGOx_BIN_ALLOC_OVERFLOW", + "TGOx_BUS_READ", + "", + "TGOx_BUS_WRITE", + "TGOx_LOADING_DESC", + "TGOx_IDVS_POS_SHAD_REQ", + "TGOx_IDVS_POS_SHAD_WAIT", + "TGOx_IDVS_POS_SHAD_STALL", + "TGOx_IDVS_POS_FIFO_FULL", + "TGOx_PREFETCH_STALL", + "TGOx_VCACHE_HIT", + "TGOx_VCACHE_MISS", + "TGOx_VCACHE_LINE_WAIT", + "TGOx_VFETCH_POS_READ_WAIT", + "TGOx_VFETCH_VERTEX_WAIT", + "TGOx_VFETCH_STALL", + "TGOx_PRIMASSY_STALL", + "TGOx_BBOX_GEN_STALL", + "TGOx_IDVS_VBU_HIT", + "TGOx_IDVS_VBU_MISS", + "TGOx_IDVS_VBU_LINE_DEALLOCATE", + "TGOx_IDVS_VAR_SHAD_REQ", + "TGOx_IDVS_VAR_SHAD_STALL", + "TGOx_BINNER_STALL", + "TGOx_ITER_STALL", + "TGOx_COMPRESS_MISS", + "TGOx_COMPRESS_STALL", + "TGOx_PCACHE_HIT", + "TGOx_PCACHE_MISS", + "TGOx_PCACHE_MISS_STALL", + "TGOx_PCACHE_EVICT_STALL", + "TGOx_PMGR_PTR_WR_STALL", + "TGOx_PMGR_PTR_RD_STALL", + "TGOx_PMGR_CMD_WR_STALL", + "TGOx_WRBUF_ACTIVE", + "TGOx_WRBUF_HIT", + "TGOx_WRBUF_MISS", + "TGOx_WRBUF_NO_FREE_LINE_STALL", + "TGOx_WRBUF_NO_AXI_ID_STALL", + "TGOx_WRBUF_AXI_STALL", + "", + "", + "", + "TGOx_UTLB_TRANS", + "TGOx_UTLB_TRANS_HIT", + "TGOx_UTLB_TRANS_STALL", + "TGOx_UTLB_TRANS_MISS_DELAY", + "TGOx_UTLB_MMU_REQ", + + /* Performance counters for the Shader Core */ + "", + "", + "", + "", + "TGOx_FRAG_ACTIVE", + "TGOx_FRAG_PRIMITIVES", + "TGOx_FRAG_PRIM_RAST", + "TGOx_FRAG_FPK_ACTIVE", + "TGOx_FRAG_STARVING", + "TGOx_FRAG_WARPS", + "TGOx_FRAG_PARTIAL_WARPS", + "TGOx_FRAG_QUADS_RAST", + "TGOx_FRAG_QUADS_EZS_TEST", + "TGOx_FRAG_QUADS_EZS_UPDATE", + "TGOx_FRAG_QUADS_EZS_KILL", + "TGOx_FRAG_LZS_TEST", + "TGOx_FRAG_LZS_KILL", + "TGOx_WARP_REG_SIZE_64", + "TGOx_FRAG_PTILES", + "TGOx_FRAG_TRANS_ELIM", + "TGOx_QUAD_FPK_KILLER", + "TGOx_FULL_QUAD_WARPS", + "TGOx_COMPUTE_ACTIVE", + "TGOx_COMPUTE_TASKS", + "TGOx_COMPUTE_WARPS", + "TGOx_COMPUTE_STARVING", + "TGOx_EXEC_CORE_ACTIVE", + "TGOx_EXEC_ACTIVE", + "TGOx_EXEC_INSTR_COUNT", + "TGOx_EXEC_INSTR_DIVERGED", + "TGOx_EXEC_INSTR_STARVING", + "TGOx_ARITH_INSTR_SINGLE_FMA", + "TGOx_ARITH_INSTR_DOUBLE", + "TGOx_ARITH_INSTR_MSG", + "TGOx_ARITH_INSTR_MSG_ONLY", + "TGOx_TEX_MSGI_NUM_QUADS", + "TGOx_TEX_DFCH_NUM_PASSES", + "TGOx_TEX_DFCH_NUM_PASSES_MISS", + "TGOx_TEX_DFCH_NUM_PASSES_MIP_MAP", + "TGOx_TEX_TIDX_NUM_SPLIT_MIP_MAP", + "TGOx_TEX_TFCH_NUM_LINES_FETCHED", + "TGOx_TEX_TFCH_NUM_LINES_FETCHED_BLOCK", + "TGOx_TEX_TFCH_NUM_OPERATIONS", + "TGOx_TEX_FILT_NUM_OPERATIONS", + "TGOx_LS_MEM_READ_FULL", + "TGOx_LS_MEM_READ_SHORT", + "TGOx_LS_MEM_WRITE_FULL", + "TGOx_LS_MEM_WRITE_SHORT", + "TGOx_LS_MEM_ATOMIC", + "TGOx_VARY_INSTR", + "TGOx_VARY_SLOT_32", + "TGOx_VARY_SLOT_16", + "TGOx_ATTR_INSTR", + "TGOx_ARITH_INSTR_FP_MUL", + "TGOx_BEATS_RD_FTC", + "TGOx_BEATS_RD_FTC_EXT", + "TGOx_BEATS_RD_LSC", + "TGOx_BEATS_RD_LSC_EXT", + "TGOx_BEATS_RD_TEX", + "TGOx_BEATS_RD_TEX_EXT", + "TGOx_BEATS_RD_OTHER", + "TGOx_BEATS_WR_LSC_WB", + "TGOx_BEATS_WR_TIB", + "TGOx_BEATS_WR_LSC_OTHER", + + /* Performance counters for the Memory System */ + "", + "", + "", + "", + "TGOx_MMU_REQUESTS", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "TGOx_L2_RD_MSG_IN", + "TGOx_L2_RD_MSG_IN_STALL", + "TGOx_L2_WR_MSG_IN", + "TGOx_L2_WR_MSG_IN_STALL", + "TGOx_L2_SNP_MSG_IN", + "TGOx_L2_SNP_MSG_IN_STALL", + "TGOx_L2_RD_MSG_OUT", + "TGOx_L2_RD_MSG_OUT_STALL", + "TGOx_L2_WR_MSG_OUT", + "TGOx_L2_ANY_LOOKUP", + "TGOx_L2_READ_LOOKUP", + "TGOx_L2_WRITE_LOOKUP", + "TGOx_L2_EXT_SNOOP_LOOKUP", + "TGOx_L2_EXT_READ", + "TGOx_L2_EXT_READ_NOSNP", + "TGOx_L2_EXT_READ_UNIQUE", + "TGOx_L2_EXT_READ_BEATS", + "TGOx_L2_EXT_AR_STALL", + "TGOx_L2_EXT_AR_CNT_Q1", + "TGOx_L2_EXT_AR_CNT_Q2", + "TGOx_L2_EXT_AR_CNT_Q3", + "TGOx_L2_EXT_RRESP_0_127", + "TGOx_L2_EXT_RRESP_128_191", + "TGOx_L2_EXT_RRESP_192_255", + "TGOx_L2_EXT_RRESP_256_319", + "TGOx_L2_EXT_RRESP_320_383", + "TGOx_L2_EXT_WRITE", + "TGOx_L2_EXT_WRITE_NOSNP_FULL", + "TGOx_L2_EXT_WRITE_NOSNP_PTL", + "TGOx_L2_EXT_WRITE_SNP_FULL", + "TGOx_L2_EXT_WRITE_SNP_PTL", + "TGOx_L2_EXT_WRITE_BEATS", + "TGOx_L2_EXT_W_STALL", + "TGOx_L2_EXT_AW_CNT_Q1", + "TGOx_L2_EXT_AW_CNT_Q2", + "TGOx_L2_EXT_AW_CNT_Q3", + "TGOx_L2_EXT_SNOOP", + "TGOx_L2_EXT_SNOOP_STALL", + "TGOx_L2_EXT_SNOOP_RESP_CLEAN", + "TGOx_L2_EXT_SNOOP_RESP_DATA", + "TGOx_L2_EXT_SNOOP_INTERNAL", + "", + "", + "", + "", + "", + "", + "", +}; + +#endif /* _KBASE_GATOR_HWCNT_NAMES_TGOX_H_ */ diff --git a/drivers/gpu/arm/bifrost/mali_kbase_gator_hwcnt_names_thex.h b/drivers/gpu/arm/bifrost/mali_kbase_gator_hwcnt_names_thex.h index af00a6acb09b..e24e91ab1ca4 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_gator_hwcnt_names_thex.h +++ b/drivers/gpu/arm/bifrost/mali_kbase_gator_hwcnt_names_thex.h @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2016-2017 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2016-2018 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software diff --git a/drivers/gpu/arm/bifrost/mali_kbase_gator_hwcnt_names_tkax.h b/drivers/gpu/arm/bifrost/mali_kbase_gator_hwcnt_names_tkax.h index 1c1f6693bfb5..73db45c232f1 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_gator_hwcnt_names_tkax.h +++ b/drivers/gpu/arm/bifrost/mali_kbase_gator_hwcnt_names_tkax.h @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2016-2017 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2016-2018 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -222,9 +222,9 @@ static const char * const hardware_counters_mali_tKAx[] = { "TKAx_BEATS_RD_TEX", "TKAx_BEATS_RD_TEX_EXT", "TKAx_BEATS_RD_OTHER", - "TKAx_BEATS_WR_LSC_WB", - "TKAx_BEATS_WR_TIB", "TKAx_BEATS_WR_LSC_OTHER", + "TKAx_BEATS_WR_TIB", + "TKAx_BEATS_WR_LSC_WB", /* Performance counters for the Memory System */ "", diff --git a/drivers/gpu/arm/bifrost/mali_kbase_gator_hwcnt_names_tmix.h b/drivers/gpu/arm/bifrost/mali_kbase_gator_hwcnt_names_tmix.h index 233ffbec416e..63eac50e0cc7 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_gator_hwcnt_names_tmix.h +++ b/drivers/gpu/arm/bifrost/mali_kbase_gator_hwcnt_names_tmix.h @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2016-2017 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2016-2018 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software diff --git a/drivers/gpu/arm/bifrost/mali_kbase_gator_hwcnt_names_tnox.h b/drivers/gpu/arm/bifrost/mali_kbase_gator_hwcnt_names_tnox.h index fbb5080f6779..932663cfb6a9 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_gator_hwcnt_names_tnox.h +++ b/drivers/gpu/arm/bifrost/mali_kbase_gator_hwcnt_names_tnox.h @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2016-2017 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2016-2018 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -222,9 +222,9 @@ static const char * const hardware_counters_mali_tNOx[] = { "TNOx_BEATS_RD_TEX", "TNOx_BEATS_RD_TEX_EXT", "TNOx_BEATS_RD_OTHER", - "TNOx_BEATS_WR_LSC_WB", - "TNOx_BEATS_WR_TIB", "TNOx_BEATS_WR_LSC_OTHER", + "TNOx_BEATS_WR_TIB", + "TNOx_BEATS_WR_LSC_WB", /* Performance counters for the Memory System */ "", diff --git a/drivers/gpu/arm/bifrost/mali_kbase_gator_hwcnt_names_tsix.h b/drivers/gpu/arm/bifrost/mali_kbase_gator_hwcnt_names_tsix.h index 552db5732239..b8dde32bc529 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_gator_hwcnt_names_tsix.h +++ b/drivers/gpu/arm/bifrost/mali_kbase_gator_hwcnt_names_tsix.h @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2016-2017 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2016-2018 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -222,9 +222,9 @@ static const char * const hardware_counters_mali_tSIx[] = { "TSIx_BEATS_RD_TEX", "TSIx_BEATS_RD_TEX_EXT", "TSIx_BEATS_RD_OTHER", - "TSIx_BEATS_WR_LSC", + "TSIx_BEATS_WR_LSC_OTHER", "TSIx_BEATS_WR_TIB", - "", + "TSIx_BEATS_WR_LSC_WB", /* Performance counters for the Memory System */ "", diff --git a/drivers/gpu/arm/bifrost/mali_kbase_gator_hwcnt_names_ttrx.h b/drivers/gpu/arm/bifrost/mali_kbase_gator_hwcnt_names_ttrx.h index d1bb02a72fc3..e8148a018df5 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_gator_hwcnt_names_ttrx.h +++ b/drivers/gpu/arm/bifrost/mali_kbase_gator_hwcnt_names_ttrx.h @@ -178,24 +178,24 @@ static const char * const hardware_counters_mali_tTRx[] = { "TTRx_FRAG_QUADS_EZS_KILL", "TTRx_FRAG_LZS_TEST", "TTRx_FRAG_LZS_KILL", - "", + "TTRx_WARP_REG_SIZE_64", "TTRx_FRAG_PTILES", "TTRx_FRAG_TRANS_ELIM", "TTRx_QUAD_FPK_KILLER", - "", + "TTRx_FULL_QUAD_WARPS", "TTRx_COMPUTE_ACTIVE", "TTRx_COMPUTE_TASKS", "TTRx_COMPUTE_WARPS", "TTRx_COMPUTE_STARVING", "TTRx_EXEC_CORE_ACTIVE", - "TTRx_EXEC_ACTIVE", - "TTRx_EXEC_INSTR_COUNT", + "TTRx_EXEC_INSTR_FMA", + "TTRx_EXEC_INSTR_CVT", + "TTRx_EXEC_INSTR_SFU", + "TTRx_EXEC_INSTR_MSG", "TTRx_EXEC_INSTR_DIVERGED", - "TTRx_EXEC_INSTR_STARVING", - "TTRx_ARITH_INSTR_SINGLE_FMA", - "TTRx_ARITH_INSTR_DOUBLE", - "TTRx_ARITH_INSTR_MSG", - "TTRx_ARITH_INSTR_MSG_ONLY", + "TTRx_EXEC_ICACHE_MISS", + "TTRx_EXEC_STARVE_ARITH", + "TTRx_CALL_BLEND_SHADER", "TTRx_TEX_INSTR", "TTRx_TEX_INSTR_MIPMAP", "TTRx_TEX_INSTR_COMPRESSED", @@ -222,9 +222,9 @@ static const char * const hardware_counters_mali_tTRx[] = { "TTRx_BEATS_RD_TEX", "TTRx_BEATS_RD_TEX_EXT", "TTRx_BEATS_RD_OTHER", - "TTRx_BEATS_WR_LSC", - "TTRx_BEATS_WR_TIB", "", + "TTRx_BEATS_WR_TIB", + "TTRx_BEATS_WR_LSC", /* Performance counters for the Memory System */ "", diff --git a/drivers/gpu/arm/bifrost/mali_kbase_gpu_id.h b/drivers/gpu/arm/bifrost/mali_kbase_gpu_id.h index 4052e2fd0768..c14317087ead 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_gpu_id.h +++ b/drivers/gpu/arm/bifrost/mali_kbase_gpu_id.h @@ -27,20 +27,20 @@ #define GPU_ID_VERSION_MINOR_SHIFT 4 #define GPU_ID_VERSION_MAJOR_SHIFT 12 #define GPU_ID_VERSION_PRODUCT_ID_SHIFT 16 -#define GPU_ID_VERSION_STATUS (0xF << GPU_ID_VERSION_STATUS_SHIFT) -#define GPU_ID_VERSION_MINOR (0xFF << GPU_ID_VERSION_MINOR_SHIFT) -#define GPU_ID_VERSION_MAJOR (0xF << GPU_ID_VERSION_MAJOR_SHIFT) -#define GPU_ID_VERSION_PRODUCT_ID (0xFFFF << GPU_ID_VERSION_PRODUCT_ID_SHIFT) +#define GPU_ID_VERSION_STATUS (0xFu << GPU_ID_VERSION_STATUS_SHIFT) +#define GPU_ID_VERSION_MINOR (0xFFu << GPU_ID_VERSION_MINOR_SHIFT) +#define GPU_ID_VERSION_MAJOR (0xFu << GPU_ID_VERSION_MAJOR_SHIFT) +#define GPU_ID_VERSION_PRODUCT_ID (0xFFFFu << GPU_ID_VERSION_PRODUCT_ID_SHIFT) /* Values for GPU_ID_VERSION_PRODUCT_ID bitfield */ -#define GPU_ID_PI_T60X 0x6956 -#define GPU_ID_PI_T62X 0x0620 -#define GPU_ID_PI_T76X 0x0750 -#define GPU_ID_PI_T72X 0x0720 -#define GPU_ID_PI_TFRX 0x0880 -#define GPU_ID_PI_T86X 0x0860 -#define GPU_ID_PI_T82X 0x0820 -#define GPU_ID_PI_T83X 0x0830 +#define GPU_ID_PI_T60X 0x6956u +#define GPU_ID_PI_T62X 0x0620u +#define GPU_ID_PI_T76X 0x0750u +#define GPU_ID_PI_T72X 0x0720u +#define GPU_ID_PI_TFRX 0x0880u +#define GPU_ID_PI_T86X 0x0860u +#define GPU_ID_PI_T82X 0x0820u +#define GPU_ID_PI_T83X 0x0830u /* New GPU ID format when PRODUCT_ID is >= 0x1000 (and not 0x6956) */ #define GPU_ID_PI_NEW_FORMAT_START 0x1000 @@ -55,13 +55,13 @@ #define GPU_ID2_ARCH_REV_SHIFT 20 #define GPU_ID2_ARCH_MINOR_SHIFT 24 #define GPU_ID2_ARCH_MAJOR_SHIFT 28 -#define GPU_ID2_VERSION_STATUS (0xF << GPU_ID2_VERSION_STATUS_SHIFT) -#define GPU_ID2_VERSION_MINOR (0xFF << GPU_ID2_VERSION_MINOR_SHIFT) -#define GPU_ID2_VERSION_MAJOR (0xF << GPU_ID2_VERSION_MAJOR_SHIFT) -#define GPU_ID2_PRODUCT_MAJOR (0xF << GPU_ID2_PRODUCT_MAJOR_SHIFT) -#define GPU_ID2_ARCH_REV (0xF << GPU_ID2_ARCH_REV_SHIFT) -#define GPU_ID2_ARCH_MINOR (0xF << GPU_ID2_ARCH_MINOR_SHIFT) -#define GPU_ID2_ARCH_MAJOR (0xF << GPU_ID2_ARCH_MAJOR_SHIFT) +#define GPU_ID2_VERSION_STATUS (0xFu << GPU_ID2_VERSION_STATUS_SHIFT) +#define GPU_ID2_VERSION_MINOR (0xFFu << GPU_ID2_VERSION_MINOR_SHIFT) +#define GPU_ID2_VERSION_MAJOR (0xFu << GPU_ID2_VERSION_MAJOR_SHIFT) +#define GPU_ID2_PRODUCT_MAJOR (0xFu << GPU_ID2_PRODUCT_MAJOR_SHIFT) +#define GPU_ID2_ARCH_REV (0xFu << GPU_ID2_ARCH_REV_SHIFT) +#define GPU_ID2_ARCH_MINOR (0xFu << GPU_ID2_ARCH_MINOR_SHIFT) +#define GPU_ID2_ARCH_MAJOR (0xFu << GPU_ID2_ARCH_MAJOR_SHIFT) #define GPU_ID2_PRODUCT_MODEL (GPU_ID2_ARCH_MAJOR | GPU_ID2_PRODUCT_MAJOR) #define GPU_ID2_VERSION (GPU_ID2_VERSION_MAJOR | \ GPU_ID2_VERSION_MINOR | \ @@ -70,17 +70,17 @@ /* Helper macro to create a partial GPU_ID (new format) that defines a product ignoring its version. */ #define GPU_ID2_PRODUCT_MAKE(arch_major, arch_minor, arch_rev, product_major) \ - (((arch_major) << GPU_ID2_ARCH_MAJOR_SHIFT) | \ - ((arch_minor) << GPU_ID2_ARCH_MINOR_SHIFT) | \ - ((arch_rev) << GPU_ID2_ARCH_REV_SHIFT) | \ - ((product_major) << GPU_ID2_PRODUCT_MAJOR_SHIFT)) + ((((u32)arch_major) << GPU_ID2_ARCH_MAJOR_SHIFT) | \ + (((u32)arch_minor) << GPU_ID2_ARCH_MINOR_SHIFT) | \ + (((u32)arch_rev) << GPU_ID2_ARCH_REV_SHIFT) | \ + (((u32)product_major) << GPU_ID2_PRODUCT_MAJOR_SHIFT)) /* Helper macro to create a partial GPU_ID (new format) that specifies the revision (major, minor, status) of a product */ #define GPU_ID2_VERSION_MAKE(version_major, version_minor, version_status) \ - (((version_major) << GPU_ID2_VERSION_MAJOR_SHIFT) | \ - ((version_minor) << GPU_ID2_VERSION_MINOR_SHIFT) | \ - ((version_status) << GPU_ID2_VERSION_STATUS_SHIFT)) + ((((u32)version_major) << GPU_ID2_VERSION_MAJOR_SHIFT) | \ + (((u32)version_minor) << GPU_ID2_VERSION_MINOR_SHIFT) | \ + (((u32)version_status) << GPU_ID2_VERSION_STATUS_SHIFT)) /* Helper macro to create a complete GPU_ID (new format) */ #define GPU_ID2_MAKE(arch_major, arch_minor, arch_rev, product_major, \ @@ -93,25 +93,25 @@ /* Helper macro to create a partial GPU_ID (new format) that identifies a particular GPU model by its arch_major and product_major. */ #define GPU_ID2_MODEL_MAKE(arch_major, product_major) \ - (((arch_major) << GPU_ID2_ARCH_MAJOR_SHIFT) | \ - ((product_major) << GPU_ID2_PRODUCT_MAJOR_SHIFT)) + ((((u32)arch_major) << GPU_ID2_ARCH_MAJOR_SHIFT) | \ + (((u32)product_major) << GPU_ID2_PRODUCT_MAJOR_SHIFT)) /* Strip off the non-relevant bits from a product_id value and make it suitable for comparison against the GPU_ID2_PRODUCT_xxx values which identify a GPU model. */ #define GPU_ID2_MODEL_MATCH_VALUE(product_id) \ - (((product_id) << GPU_ID2_PRODUCT_MAJOR_SHIFT) & \ + ((((u32)product_id) << GPU_ID2_PRODUCT_MAJOR_SHIFT) & \ GPU_ID2_PRODUCT_MODEL) -#define GPU_ID2_PRODUCT_TMIX GPU_ID2_MODEL_MAKE(6u, 0) -#define GPU_ID2_PRODUCT_THEX GPU_ID2_MODEL_MAKE(6u, 1) -#define GPU_ID2_PRODUCT_TSIX GPU_ID2_MODEL_MAKE(7u, 0) -#define GPU_ID2_PRODUCT_TDVX GPU_ID2_MODEL_MAKE(7u, 3) -#define GPU_ID2_PRODUCT_TNOX GPU_ID2_MODEL_MAKE(7u, 1) -#define GPU_ID2_PRODUCT_TGOX GPU_ID2_MODEL_MAKE(7u, 2) -#define GPU_ID2_PRODUCT_TKAX GPU_ID2_MODEL_MAKE(8u, 0) -#define GPU_ID2_PRODUCT_TTRX GPU_ID2_MODEL_MAKE(8u, 1) -#define GPU_ID2_PRODUCT_TBOX GPU_ID2_MODEL_MAKE(8u, 2) +#define GPU_ID2_PRODUCT_TMIX GPU_ID2_MODEL_MAKE(6, 0) +#define GPU_ID2_PRODUCT_THEX GPU_ID2_MODEL_MAKE(6, 1) +#define GPU_ID2_PRODUCT_TSIX GPU_ID2_MODEL_MAKE(7, 0) +#define GPU_ID2_PRODUCT_TDVX GPU_ID2_MODEL_MAKE(7, 3) +#define GPU_ID2_PRODUCT_TNOX GPU_ID2_MODEL_MAKE(7, 1) +#define GPU_ID2_PRODUCT_TGOX GPU_ID2_MODEL_MAKE(7, 2) +#define GPU_ID2_PRODUCT_TKAX GPU_ID2_MODEL_MAKE(8, 0) +#define GPU_ID2_PRODUCT_TTRX GPU_ID2_MODEL_MAKE(8, 1) +#define GPU_ID2_PRODUCT_TBOX GPU_ID2_MODEL_MAKE(8, 2) /* Values for GPU_ID_VERSION_STATUS field for PRODUCT_ID GPU_ID_PI_T60X */ #define GPU_ID_S_15DEV0 0x1 @@ -120,9 +120,9 @@ /* Helper macro to create a GPU_ID assuming valid values for id, major, minor, status */ #define GPU_ID_MAKE(id, major, minor, status) \ - (((id) << GPU_ID_VERSION_PRODUCT_ID_SHIFT) | \ - ((major) << GPU_ID_VERSION_MAJOR_SHIFT) | \ - ((minor) << GPU_ID_VERSION_MINOR_SHIFT) | \ - ((status) << GPU_ID_VERSION_STATUS_SHIFT)) + ((((u32)id) << GPU_ID_VERSION_PRODUCT_ID_SHIFT) | \ + (((u32)major) << GPU_ID_VERSION_MAJOR_SHIFT) | \ + (((u32)minor) << GPU_ID_VERSION_MINOR_SHIFT) | \ + (((u32)status) << GPU_ID_VERSION_STATUS_SHIFT)) #endif /* _KBASE_GPU_ID_H_ */ diff --git a/drivers/gpu/arm/bifrost/mali_kbase_gpu_memory_debugfs.c b/drivers/gpu/arm/bifrost/mali_kbase_gpu_memory_debugfs.c index 2fd033280359..514b065d4867 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_gpu_memory_debugfs.c +++ b/drivers/gpu/arm/bifrost/mali_kbase_gpu_memory_debugfs.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2012-2016 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2012-2017 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -70,7 +70,7 @@ static int kbasep_gpu_memory_seq_show(struct seq_file *sfile, void *data) */ static int kbasep_gpu_memory_debugfs_open(struct inode *in, struct file *file) { - return single_open(file, kbasep_gpu_memory_seq_show , NULL); + return single_open(file, kbasep_gpu_memory_seq_show, NULL); } static const struct file_operations kbasep_gpu_memory_debugfs_fops = { diff --git a/drivers/gpu/arm/bifrost/mali_kbase_gpuprops.c b/drivers/gpu/arm/bifrost/mali_kbase_gpuprops.c index 9a9ce2d9e661..62ba105ca417 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_gpuprops.c +++ b/drivers/gpu/arm/bifrost/mali_kbase_gpuprops.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2011-2017 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2011-2018 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -138,7 +138,7 @@ static void kbase_gpuprops_get_props(base_gpu_props * const gpu_props, struct kb gpu_props->raw_props.mem_features = regdump.mem_features; gpu_props->raw_props.mmu_features = regdump.mmu_features; gpu_props->raw_props.l2_features = regdump.l2_features; - gpu_props->raw_props.suspend_size = regdump.suspend_size; + gpu_props->raw_props.core_features = regdump.core_features; gpu_props->raw_props.as_present = regdump.as_present; gpu_props->raw_props.js_present = regdump.js_present; @@ -165,6 +165,7 @@ static void kbase_gpuprops_get_props(base_gpu_props * const gpu_props, struct kb gpu_props->raw_props.thread_max_threads = regdump.thread_max_threads; gpu_props->raw_props.thread_max_workgroup_size = regdump.thread_max_workgroup_size; gpu_props->raw_props.thread_features = regdump.thread_features; + gpu_props->raw_props.thread_tls_alloc = regdump.thread_tls_alloc; } void kbase_gpuprops_update_core_props_gpu_id(base_gpu_props * const gpu_props) @@ -195,6 +196,8 @@ static void kbase_gpuprops_calculate_props(base_gpu_props * const gpu_props, str kbase_gpuprops_update_core_props_gpu_id(gpu_props); gpu_props->core_props.log2_program_counter_size = KBASE_GPU_PC_SIZE_LOG2; gpu_props->core_props.gpu_available_memory_size = totalram_pages << PAGE_SHIFT; + gpu_props->core_props.num_exec_engines = + KBASE_UBFX32(gpu_props->raw_props.core_features, 0, 4); for (i = 0; i < BASE_GPU_NUM_TEXTURE_FEATURES_REGISTERS; i++) gpu_props->core_props.texture_features[i] = gpu_props->raw_props.texture_features[i]; @@ -226,6 +229,13 @@ static void kbase_gpuprops_calculate_props(base_gpu_props * const gpu_props, str else gpu_props->thread_props.max_barrier_size = gpu_props->raw_props.thread_max_barrier_size; + if (gpu_props->raw_props.thread_tls_alloc == 0) + gpu_props->thread_props.tls_alloc = + gpu_props->thread_props.max_threads; + else + gpu_props->thread_props.tls_alloc = + gpu_props->raw_props.thread_tls_alloc; + gpu_props->thread_props.max_registers = KBASE_UBFX32(gpu_props->raw_props.thread_features, 0U, 16); gpu_props->thread_props.max_task_queue = KBASE_UBFX32(gpu_props->raw_props.thread_features, 16U, 8); gpu_props->thread_props.max_thread_group_split = KBASE_UBFX32(gpu_props->raw_props.thread_features, 24U, 6); @@ -312,6 +322,7 @@ static struct { PROP(TEXTURE_FEATURES_2, core_props.texture_features[2]), PROP(TEXTURE_FEATURES_3, core_props.texture_features[3]), PROP(GPU_AVAILABLE_MEMORY_SIZE, core_props.gpu_available_memory_size), + PROP(NUM_EXEC_ENGINES, core_props.num_exec_engines), PROP(L2_LOG2_LINE_SIZE, l2_props.log2_line_size), PROP(L2_LOG2_CACHE_SIZE, l2_props.log2_cache_size), @@ -327,13 +338,14 @@ static struct { PROP(MAX_TASK_QUEUE, thread_props.max_task_queue), PROP(MAX_THREAD_GROUP_SPLIT, thread_props.max_thread_group_split), PROP(IMPL_TECH, thread_props.impl_tech), + PROP(TLS_ALLOC, thread_props.tls_alloc), PROP(RAW_SHADER_PRESENT, raw_props.shader_present), PROP(RAW_TILER_PRESENT, raw_props.tiler_present), PROP(RAW_L2_PRESENT, raw_props.l2_present), PROP(RAW_STACK_PRESENT, raw_props.stack_present), PROP(RAW_L2_FEATURES, raw_props.l2_features), - PROP(RAW_SUSPEND_SIZE, raw_props.suspend_size), + PROP(RAW_CORE_FEATURES, raw_props.core_features), PROP(RAW_MEM_FEATURES, raw_props.mem_features), PROP(RAW_MMU_FEATURES, raw_props.mmu_features), PROP(RAW_AS_PRESENT, raw_props.as_present), @@ -365,6 +377,7 @@ static struct { raw_props.thread_max_workgroup_size), PROP(RAW_THREAD_MAX_BARRIER_SIZE, raw_props.thread_max_barrier_size), PROP(RAW_THREAD_FEATURES, raw_props.thread_features), + PROP(RAW_THREAD_TLS_ALLOC, raw_props.thread_tls_alloc), PROP(RAW_COHERENCY_MODE, raw_props.coherency_mode), PROP(COHERENCY_NUM_GROUPS, coherency_info.num_groups), diff --git a/drivers/gpu/arm/bifrost/mali_kbase_gpuprops_types.h b/drivers/gpu/arm/bifrost/mali_kbase_gpuprops_types.h index a3ddec79bee7..d7877d1d4a57 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_gpuprops_types.h +++ b/drivers/gpu/arm/bifrost/mali_kbase_gpuprops_types.h @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2011-2017 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2011-2018 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -38,7 +38,7 @@ struct kbase_gpuprops_regdump { u32 gpu_id; u32 l2_features; - u32 suspend_size; /* API 8.2+ */ + u32 core_features; u32 tiler_features; u32 mem_features; u32 mmu_features; @@ -48,6 +48,7 @@ struct kbase_gpuprops_regdump { u32 thread_max_workgroup_size; u32 thread_max_barrier_size; u32 thread_features; + u32 thread_tls_alloc; u32 texture_features[BASE_GPU_NUM_TEXTURE_FEATURES_REGISTERS]; u32 js_features[GPU_MAX_JOB_SLOTS]; u32 shader_present_lo; diff --git a/drivers/gpu/arm/bifrost/mali_kbase_gwt.c b/drivers/gpu/arm/bifrost/mali_kbase_gwt.c index 2caab877a447..b36254641327 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_gwt.c +++ b/drivers/gpu/arm/bifrost/mali_kbase_gwt.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2010-2017 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010-2018 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -124,7 +124,8 @@ int kbase_gpu_gwt_stop(struct kbase_context *kctx) } -int list_cmp_function(void *priv, struct list_head *a, struct list_head *b) +static int list_cmp_function(void *priv, struct list_head *a, + struct list_head *b) { struct kbasep_gwt_list_element *elementA = container_of(a, struct kbasep_gwt_list_element, link); @@ -133,30 +134,27 @@ int list_cmp_function(void *priv, struct list_head *a, struct list_head *b) CSTD_UNUSED(priv); - if (elementA->handle > elementB->handle) + if (elementA->page_addr > elementB->page_addr) return 1; - else if ((elementA->handle == elementB->handle) && - (elementA->offset > elementB->offset)) - return 1; - else - return -1; + return -1; } -void kbase_gpu_gwt_collate(struct kbase_context *kctx, +static void kbase_gpu_gwt_collate(struct kbase_context *kctx, struct list_head *snapshot_list) { struct kbasep_gwt_list_element *pos, *n; struct kbasep_gwt_list_element *collated = NULL; - /* sort the list */ + /* Sort the list */ list_sort(NULL, snapshot_list, list_cmp_function); - /* Combine contiguous areas from same region */ + /* Combine contiguous areas. */ list_for_each_entry_safe(pos, n, snapshot_list, link) { - if (NULL == collated || - collated->handle != pos->handle || - collated->offset + collated->num_pages != - pos->offset) { + if (collated == NULL || collated->region != + pos->region || + (collated->page_addr + + (collated->num_pages * PAGE_SIZE)) != + pos->page_addr) { /* This is the first time through, a new region or * is not contiguous - start collating to this element */ @@ -176,10 +174,8 @@ int kbase_gpu_gwt_dump(struct kbase_context *kctx, { const u32 ubuf_size = gwt_dump->in.len; u32 ubuf_count = 0; - __user void *user_handles = (__user void *) - (uintptr_t)gwt_dump->in.handle_buffer; - __user void *user_offsets = (__user void *) - (uintptr_t)gwt_dump->in.offset_buffer; + __user void *user_addr = (__user void *) + (uintptr_t)gwt_dump->in.addr_buffer; __user void *user_sizes = (__user void *) (uintptr_t)gwt_dump->in.size_buffer; @@ -191,8 +187,7 @@ int kbase_gpu_gwt_dump(struct kbase_context *kctx, return -EPERM; } - if (!gwt_dump->in.len || !gwt_dump->in.handle_buffer - || !gwt_dump->in.offset_buffer + if (!gwt_dump->in.len || !gwt_dump->in.addr_buffer || !gwt_dump->in.size_buffer) { kbase_gpu_vm_unlock(kctx); /* We don't have any valid user space buffer to copy the @@ -219,8 +214,7 @@ int kbase_gpu_gwt_dump(struct kbase_context *kctx, } while ((!list_empty(&kctx->gwt_snapshot_list))) { - u64 handle_buffer[32]; - u64 offset_buffer[32]; + u64 addr_buffer[32]; u64 num_page_buffer[32]; u32 count = 0; int err; @@ -228,30 +222,20 @@ int kbase_gpu_gwt_dump(struct kbase_context *kctx, list_for_each_entry_safe(dump_info, n, &kctx->gwt_snapshot_list, link) { - handle_buffer[count] = dump_info->handle; - offset_buffer[count] = dump_info->offset; + addr_buffer[count] = dump_info->page_addr; num_page_buffer[count] = dump_info->num_pages; count++; list_del(&dump_info->link); kfree(dump_info); - if (ARRAY_SIZE(handle_buffer) == count || + if (ARRAY_SIZE(addr_buffer) == count || ubuf_size == (ubuf_count + count)) break; } if (count) { - err = copy_to_user((user_handles + - (ubuf_count * sizeof(u64))), - (void *)handle_buffer, - count * sizeof(u64)); - if (err) { - dev_err(kctx->kbdev->dev, "Copy to user failure\n"); - kbase_gpu_vm_unlock(kctx); - return err; - } - err = copy_to_user((user_offsets + - (ubuf_count * sizeof(u64))), - (void *)offset_buffer, + err = copy_to_user((user_addr + + (ubuf_count * sizeof(u64))), + (void *)addr_buffer, count * sizeof(u64)); if (err) { dev_err(kctx->kbdev->dev, "Copy to user failure\n"); @@ -259,7 +243,7 @@ int kbase_gpu_gwt_dump(struct kbase_context *kctx, return err; } err = copy_to_user((user_sizes + - (ubuf_count * sizeof(u64))), + (ubuf_count * sizeof(u64))), (void *)num_page_buffer, count * sizeof(u64)); if (err) { diff --git a/drivers/gpu/arm/bifrost/mali_kbase_hw.c b/drivers/gpu/arm/bifrost/mali_kbase_hw.c index 286cc954ccbc..f34f53a919b8 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_hw.c +++ b/drivers/gpu/arm/bifrost/mali_kbase_hw.c @@ -176,6 +176,7 @@ static const enum base_hw_issue *kbase_hw_get_issues_for_new_id( {GPU_ID2_PRODUCT_TGOX, {{GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tGOx_r0p0}, + {GPU_ID2_VERSION_MAKE(1, 0, 0), base_hw_issues_tGOx_r1p0}, {U32_MAX, NULL} } }, {GPU_ID2_PRODUCT_TKAX, diff --git a/drivers/gpu/arm/bifrost/mali_kbase_hwaccess_defs.h b/drivers/gpu/arm/bifrost/mali_kbase_hwaccess_defs.h index dd25746d8434..64fa3028f4cc 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_hwaccess_defs.h +++ b/drivers/gpu/arm/bifrost/mali_kbase_hwaccess_defs.h @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2014, 2016 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014, 2016, 2018 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -31,7 +31,17 @@ #include -/* The hwaccess_lock (a spinlock) must be held when accessing this structure */ +/** + * struct kbase_hwaccess_data - object encapsulating the GPU backend specific + * data for the HW access layer. + * hwaccess_lock (a spinlock) must be held when + * accessing this structure. + * @active_kctx: pointer to active kbase context which last submitted an + * atom to GPU and while the context is active it can + * submit new atoms to GPU from the irq context also, without + * going through the bottom half of job completion path. + * @backend: GPU backend specific data for HW access layer + */ struct kbase_hwaccess_data { struct kbase_context *active_kctx; diff --git a/drivers/gpu/arm/bifrost/mali_kbase_hwaccess_gpuprops.h b/drivers/gpu/arm/bifrost/mali_kbase_hwaccess_gpuprops.h index b8ab0dc268f3..63844d97ce02 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_hwaccess_gpuprops.h +++ b/drivers/gpu/arm/bifrost/mali_kbase_hwaccess_gpuprops.h @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2015, 2018 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -33,6 +33,8 @@ * GPU * @kbdev: Device pointer * @regdump: Pointer to struct kbase_gpuprops_regdump structure + * + * The caller should ensure that GPU remains powered-on during this function. */ void kbase_backend_gpuprops_get(struct kbase_device *kbdev, struct kbase_gpuprops_regdump *regdump); @@ -43,7 +45,7 @@ void kbase_backend_gpuprops_get(struct kbase_device *kbdev, * @regdump: Pointer to struct kbase_gpuprops_regdump structure * * This function reads GPU properties that are dependent on the hardware - * features bitmask + * features bitmask. It will power-on the GPU if required. */ void kbase_backend_gpuprops_get_features(struct kbase_device *kbdev, struct kbase_gpuprops_regdump *regdump); diff --git a/drivers/gpu/arm/bifrost/mali_kbase_hwaccess_instr.h b/drivers/gpu/arm/bifrost/mali_kbase_hwaccess_instr.h index d180e39253bd..0c5ceffb0e47 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_hwaccess_instr.h +++ b/drivers/gpu/arm/bifrost/mali_kbase_hwaccess_instr.h @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2015, 2017 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -35,15 +35,15 @@ * kbase_instr_hwcnt_enable_internal - Enable HW counters collection * @kbdev: Kbase device * @kctx: Kbase context - * @setup: HW counter setup parameters + * @enable: HW counter setup parameters * * Context: might sleep, waiting for reset to complete * * Return: 0 on success */ int kbase_instr_hwcnt_enable_internal(struct kbase_device *kbdev, - struct kbase_context *kctx, - struct kbase_uk_hwcnt_setup *setup); + struct kbase_context *kctx, + struct kbase_ioctl_hwcnt_enable *enable); /** * kbase_instr_hwcnt_disable_internal - Disable HW counters collection diff --git a/drivers/gpu/arm/bifrost/mali_kbase_hwaccess_jm.h b/drivers/gpu/arm/bifrost/mali_kbase_hwaccess_jm.h index 8b3d7e20f609..706e60a7fe89 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_hwaccess_jm.h +++ b/drivers/gpu/arm/bifrost/mali_kbase_hwaccess_jm.h @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2014-2017 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2018 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -381,6 +381,9 @@ bool kbase_reset_gpu_active(struct kbase_device *kbdev); void kbase_job_slot_hardstop(struct kbase_context *kctx, int js, struct kbase_jd_atom *target_katom); +/* Object containing callbacks for enabling/disabling protected mode, used + * on GPU which supports protected mode switching natively. + */ extern struct protected_mode_ops kbase_native_protected_ops; #endif /* _KBASE_HWACCESS_JM_H_ */ diff --git a/drivers/gpu/arm/bifrost/mali_kbase_ioctl.h b/drivers/gpu/arm/bifrost/mali_kbase_ioctl.h index a8fe9cd9edde..58edda5a0427 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_ioctl.h +++ b/drivers/gpu/arm/bifrost/mali_kbase_ioctl.h @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2017 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2017-2018 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -43,20 +43,14 @@ extern "C" { * KBASE_IOCTL_STICKY_RESOURCE_UNMAP * 11.4: * - New ioctl KBASE_IOCTL_MEM_FIND_GPU_START_AND_OFFSET + * 11.5: + * - New ioctl: KBASE_IOCTL_MEM_JIT_INIT (old ioctl renamed to _OLD) + * 11.6: + * - Added flags field to base_jit_alloc_info structure, which can be used to + * specify pseudo chunked tiler alignment for JIT allocations. */ #define BASE_UK_VERSION_MAJOR 11 -#define BASE_UK_VERSION_MINOR 4 - -#ifdef ANDROID -/* Android's definition of ioctl is incorrect, specifying the type argument as - * 'int'. This creates a warning when using _IOWR (as the top bit is set). Work - * round this by redefining _IOC to include a case to 'int'. - */ -#undef _IOC -#define _IOC(dir, type, nr, size) \ - ((int)(((dir) << _IOC_DIRSHIFT) | ((type) << _IOC_TYPESHIFT) | \ - ((nr) << _IOC_NRSHIFT) | ((size) << _IOC_SIZESHIFT))) -#endif +#define BASE_UK_VERSION_MINOR 6 /** * struct kbase_ioctl_version_check - Check version compatibility with kernel @@ -191,9 +185,9 @@ union kbase_ioctl_mem_query { #define KBASE_IOCTL_MEM_QUERY \ _IOWR(KBASE_IOCTL_TYPE, 6, union kbase_ioctl_mem_query) -#define KBASE_MEM_QUERY_COMMIT_SIZE 1 -#define KBASE_MEM_QUERY_VA_SIZE 2 -#define KBASE_MEM_QUERY_FLAGS 3 +#define KBASE_MEM_QUERY_COMMIT_SIZE ((u64)1) +#define KBASE_MEM_QUERY_VA_SIZE ((u64)2) +#define KBASE_MEM_QUERY_FLAGS ((u64)3) /** * struct kbase_ioctl_mem_free - Free a memory region @@ -252,6 +246,21 @@ struct kbase_ioctl_hwcnt_enable { #define KBASE_IOCTL_HWCNT_CLEAR \ _IO(KBASE_IOCTL_TYPE, 11) +/** + * struct kbase_ioctl_hwcnt_values - Values to set dummy the dummy counters to. + * @data: Counter samples for the dummy model. + * @size: Size of the counter sample data. + * @padding: Padding. + */ +struct kbase_ioctl_hwcnt_values { + __u64 data; + __u32 size; + __u32 padding; +}; + +#define KBASE_IOCTL_HWCNT_SET \ + _IOW(KBASE_IOCTL_TYPE, 32, struct kbase_ioctl_hwcnt_values) + /** * struct kbase_ioctl_disjoint_query - Query the disjoint counter * @counter: A counter of disjoint events in the kernel @@ -271,6 +280,10 @@ struct kbase_ioctl_disjoint_query { * * The ioctl will return the number of bytes written into version_buffer * (which includes a NULL byte) or a negative error code + * + * The ioctl request code has to be _IOW because the data in ioctl struct is + * being copied to the kernel, even though the kernel then writes out the + * version info to the buffer specified in the ioctl. */ struct kbase_ioctl_get_ddk_version { __u64 version_buffer; @@ -281,16 +294,40 @@ struct kbase_ioctl_get_ddk_version { #define KBASE_IOCTL_GET_DDK_VERSION \ _IOW(KBASE_IOCTL_TYPE, 13, struct kbase_ioctl_get_ddk_version) +/** + * struct kbase_ioctl_mem_jit_init_old - Initialise the JIT memory allocator + * + * @va_pages: Number of VA pages to reserve for JIT + * + * Note that depending on the VA size of the application and GPU, the value + * specified in @va_pages may be ignored. + * + * New code should use KBASE_IOCTL_MEM_JIT_INIT instead, this is kept for + * backwards compatibility. + */ +struct kbase_ioctl_mem_jit_init_old { + __u64 va_pages; +}; + +#define KBASE_IOCTL_MEM_JIT_INIT_OLD \ + _IOW(KBASE_IOCTL_TYPE, 14, struct kbase_ioctl_mem_jit_init_old) + /** * struct kbase_ioctl_mem_jit_init - Initialise the JIT memory allocator * * @va_pages: Number of VA pages to reserve for JIT + * @max_allocations: Maximum number of concurrent allocations + * @trim_level: Level of JIT allocation trimming to perform on free (0 - 100%) + * @padding: Currently unused, must be zero * * Note that depending on the VA size of the application and GPU, the value * specified in @va_pages may be ignored. */ struct kbase_ioctl_mem_jit_init { __u64 va_pages; + __u8 max_allocations; + __u8 trim_level; + __u8 padding[6]; }; #define KBASE_IOCTL_MEM_JIT_INIT \ @@ -595,7 +632,6 @@ union kbase_ioctl_mem_find_gpu_start_and_offset { #define KBASE_IOCTL_MEM_FIND_GPU_START_AND_OFFSET \ _IOWR(KBASE_IOCTL_TYPE, 31, union kbase_ioctl_mem_find_gpu_start_and_offset) -/* IOCTL 32 is free for use */ #define KBASE_IOCTL_CINSTR_GWT_START \ _IO(KBASE_IOCTL_TYPE, 33) @@ -605,9 +641,7 @@ union kbase_ioctl_mem_find_gpu_start_and_offset { /** * union kbase_ioctl_gwt_dump - Used to collect all GPU write fault addresses. - * @handle_buffer: Address of buffer to hold handles of modified areas. - * @offset_buffer: Address of buffer to hold offset size of modified areas - * (in pages) + * @addr_buffer: Address of buffer to hold addresses of gpu modified areas. * @size_buffer: Address of buffer to hold size of modified areas (in pages) * @len: Number of addresses the buffers can hold. * @more_data_available: Status indicating if more addresses are available. @@ -620,8 +654,7 @@ union kbase_ioctl_mem_find_gpu_start_and_offset { */ union kbase_ioctl_cinstr_gwt_dump { struct { - __u64 handle_buffer; - __u64 offset_buffer; + __u64 addr_buffer; __u64 size_buffer; __u32 len; __u32 padding; @@ -639,6 +672,8 @@ union kbase_ioctl_cinstr_gwt_dump { /* IOCTLs 36-41 are reserved */ +/* IOCTL 42 is free for use */ + /*************** * test ioctls * ***************/ @@ -723,7 +758,7 @@ struct kbase_ioctl_tlstream_stats { #define KBASE_GPUPROP_RAW_L2_PRESENT 27 #define KBASE_GPUPROP_RAW_STACK_PRESENT 28 #define KBASE_GPUPROP_RAW_L2_FEATURES 29 -#define KBASE_GPUPROP_RAW_SUSPEND_SIZE 30 +#define KBASE_GPUPROP_RAW_CORE_FEATURES 30 #define KBASE_GPUPROP_RAW_MEM_FEATURES 31 #define KBASE_GPUPROP_RAW_MMU_FEATURES 32 #define KBASE_GPUPROP_RAW_AS_PRESENT 33 @@ -778,6 +813,11 @@ struct kbase_ioctl_tlstream_stats { #define KBASE_GPUPROP_TEXTURE_FEATURES_3 80 #define KBASE_GPUPROP_RAW_TEXTURE_FEATURES_3 81 +#define KBASE_GPUPROP_NUM_EXEC_ENGINES 82 + +#define KBASE_GPUPROP_RAW_THREAD_TLS_ALLOC 83 +#define KBASE_GPUPROP_TLS_ALLOC 84 + #ifdef __cpluscplus } #endif diff --git a/drivers/gpu/arm/bifrost/mali_kbase_js.c b/drivers/gpu/arm/bifrost/mali_kbase_js.c index 649bb7ea3267..938eaa5ff608 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_js.c +++ b/drivers/gpu/arm/bifrost/mali_kbase_js.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2011-2017 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2011-2018 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -422,7 +422,7 @@ static bool kbase_js_ctx_list_add_unpullable_nolock(struct kbase_device *kbdev, int kbasep_js_devdata_init(struct kbase_device * const kbdev) { struct kbasep_js_device_data *jsdd; - int i; + int i, j; KBASE_DEBUG_ASSERT(kbdev != NULL); @@ -527,8 +527,10 @@ int kbasep_js_devdata_init(struct kbase_device * const kbdev) sema_init(&jsdd->schedule_sem, 1); for (i = 0; i < kbdev->gpu_props.num_job_slots; ++i) { - INIT_LIST_HEAD(&jsdd->ctx_list_pullable[i]); - INIT_LIST_HEAD(&jsdd->ctx_list_unpullable[i]); + for (j = 0; j < KBASE_JS_ATOM_SCHED_PRIO_COUNT; ++j) { + INIT_LIST_HEAD(&jsdd->ctx_list_pullable[i][j]); + INIT_LIST_HEAD(&jsdd->ctx_list_unpullable[i][j]); + } } return 0; @@ -552,13 +554,13 @@ void kbasep_js_devdata_term(struct kbase_device *kbdev) */ KBASE_DEBUG_ASSERT(js_devdata->nr_all_contexts_running == 0); KBASE_DEBUG_ASSERT(memcmp( - js_devdata->runpool_irq.ctx_attr_ref_count, - zero_ctx_attr_ref_count, - sizeof(zero_ctx_attr_ref_count)) == 0); + js_devdata->runpool_irq.ctx_attr_ref_count, + zero_ctx_attr_ref_count, + sizeof(zero_ctx_attr_ref_count)) == 0); CSTD_UNUSED(zero_ctx_attr_ref_count); } -int kbasep_js_kctx_init(struct kbase_context * const kctx) +int kbasep_js_kctx_init(struct kbase_context *const kctx) { struct kbase_device *kbdev; struct kbasep_js_kctx_info *js_kctx_info; @@ -666,7 +668,7 @@ static bool kbase_js_ctx_list_add_pullable_nolock(struct kbase_device *kbdev, list_del_init(&kctx->jctx.sched_info.ctx.ctx_list_entry[js]); list_add_tail(&kctx->jctx.sched_info.ctx.ctx_list_entry[js], - &kbdev->js_data.ctx_list_pullable[js]); + &kbdev->js_data.ctx_list_pullable[js][kctx->priority]); if (!kctx->slots_pullable) { kbdev->js_data.nr_contexts_pullable++; @@ -706,7 +708,7 @@ static bool kbase_js_ctx_list_add_pullable_head_nolock( list_del_init(&kctx->jctx.sched_info.ctx.ctx_list_entry[js]); list_add(&kctx->jctx.sched_info.ctx.ctx_list_entry[js], - &kbdev->js_data.ctx_list_pullable[js]); + &kbdev->js_data.ctx_list_pullable[js][kctx->priority]); if (!kctx->slots_pullable) { kbdev->js_data.nr_contexts_pullable++; @@ -777,7 +779,7 @@ static bool kbase_js_ctx_list_add_unpullable_nolock(struct kbase_device *kbdev, lockdep_assert_held(&kbdev->hwaccess_lock); list_move_tail(&kctx->jctx.sched_info.ctx.ctx_list_entry[js], - &kbdev->js_data.ctx_list_unpullable[js]); + &kbdev->js_data.ctx_list_unpullable[js][kctx->priority]); if (kctx->slots_pullable == (1 << js)) { kbdev->js_data.nr_contexts_pullable--; @@ -852,19 +854,23 @@ static struct kbase_context *kbase_js_ctx_list_pop_head_nolock( int js) { struct kbase_context *kctx; + int i; lockdep_assert_held(&kbdev->hwaccess_lock); - if (list_empty(&kbdev->js_data.ctx_list_pullable[js])) - return NULL; + for (i = 0; i < KBASE_JS_ATOM_SCHED_PRIO_COUNT; i++) { + if (list_empty(&kbdev->js_data.ctx_list_pullable[js][i])) + continue; - kctx = list_entry(kbdev->js_data.ctx_list_pullable[js].next, - struct kbase_context, - jctx.sched_info.ctx.ctx_list_entry[js]); + kctx = list_entry(kbdev->js_data.ctx_list_pullable[js][i].next, + struct kbase_context, + jctx.sched_info.ctx.ctx_list_entry[js]); - list_del_init(&kctx->jctx.sched_info.ctx.ctx_list_entry[js]); + list_del_init(&kctx->jctx.sched_info.ctx.ctx_list_entry[js]); - return kctx; + return kctx; + } + return NULL; } /** @@ -1065,6 +1071,51 @@ static bool kbase_js_dep_validate(struct kbase_context *kctx, return ret; } +void kbase_js_set_ctx_priority(struct kbase_context *kctx, int new_priority) +{ + struct kbase_device *kbdev = kctx->kbdev; + int js; + + lockdep_assert_held(&kbdev->hwaccess_lock); + + /* Move kctx to the pullable/upullable list as per the new priority */ + if (new_priority != kctx->priority) { + for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) { + if (kctx->slots_pullable & (1 << js)) + list_move_tail(&kctx->jctx.sched_info.ctx.ctx_list_entry[js], + &kbdev->js_data.ctx_list_pullable[js][new_priority]); + else + list_move_tail(&kctx->jctx.sched_info.ctx.ctx_list_entry[js], + &kbdev->js_data.ctx_list_unpullable[js][new_priority]); + } + + kctx->priority = new_priority; + } +} + +void kbase_js_update_ctx_priority(struct kbase_context *kctx) +{ + struct kbase_device *kbdev = kctx->kbdev; + int new_priority = KBASE_JS_ATOM_SCHED_PRIO_LOW; + int prio; + + lockdep_assert_held(&kbdev->hwaccess_lock); + + if (kbdev->js_ctx_scheduling_mode == KBASE_JS_SYSTEM_PRIORITY_MODE) { + /* Determine the new priority for context, as per the priority + * of currently in-use atoms. + */ + for (prio = 0; prio < KBASE_JS_ATOM_SCHED_PRIO_COUNT; prio++) { + if (kctx->atoms_count[prio]) { + new_priority = prio; + break; + } + } + } + + kbase_js_set_ctx_priority(kctx, new_priority); +} + bool kbasep_js_add_job(struct kbase_context *kctx, struct kbase_jd_atom *atom) { @@ -1099,6 +1150,9 @@ bool kbasep_js_add_job(struct kbase_context *kctx, /* Lock for state available during IRQ */ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + if (++kctx->atoms_count[atom->sched_priority] == 1) + kbase_js_update_ctx_priority(kctx); + if (!kbase_js_dep_validate(kctx, atom)) { /* Dependencies could not be represented */ --(js_kctx_info->ctx.nr_jobs); @@ -1107,6 +1161,19 @@ bool kbasep_js_add_job(struct kbase_context *kctx, * dependencies */ atom->status = KBASE_JD_ATOM_STATE_QUEUED; + /* Undo the count, as the atom will get added again later but + * leave the context priority adjusted or boosted, in case if + * this was the first higher priority atom received for this + * context. + * This will prevent the scenario of priority inversion, where + * another context having medium priority atoms keeps getting + * scheduled over this context, which is having both lower and + * higher priority atoms, but higher priority atoms are blocked + * due to dependency on lower priority atoms. With priority + * boost the high priority atom will get to run at earliest. + */ + kctx->atoms_count[atom->sched_priority]--; + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); mutex_unlock(&js_devdata->runpool_mutex); @@ -1173,6 +1240,7 @@ void kbasep_js_remove_job(struct kbase_device *kbdev, struct kbase_context *kctx, struct kbase_jd_atom *atom) { struct kbasep_js_kctx_info *js_kctx_info; + unsigned long flags; KBASE_DEBUG_ASSERT(kbdev != NULL); KBASE_DEBUG_ASSERT(kctx != NULL); @@ -1186,6 +1254,11 @@ void kbasep_js_remove_job(struct kbase_device *kbdev, /* De-refcount ctx.nr_jobs */ KBASE_DEBUG_ASSERT(js_kctx_info->ctx.nr_jobs > 0); --(js_kctx_info->ctx.nr_jobs); + + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + if (--kctx->atoms_count[atom->sched_priority] == 0) + kbase_js_update_ctx_priority(kctx); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); } bool kbasep_js_remove_cancelled_job(struct kbase_device *kbdev, @@ -1256,9 +1329,8 @@ struct kbase_context *kbasep_js_runpool_lookup_ctx(struct kbase_device *kbdev, } /** - * kbasep_js_release_result - Try running more jobs after releasing a context - * and/or atom - * + * kbasep_js_run_jobs_after_ctx_and_atom_release - Try running more jobs after + * releasing a context and/or atom * @kbdev: The kbase_device to operate on * @kctx: The kbase_context to operate on * @katom_retained_state: Retained state from the atom @@ -1304,12 +1376,15 @@ static kbasep_js_release_result kbasep_js_run_jobs_after_ctx_and_atom_release( return result; } -/* - * Internal function to release the reference on a ctx and an atom's "retained - * state", only taking the runpool and as transaction mutexes +/** + * kbasep_js_runpool_release_ctx_internal - Internal function to release the reference + * on a ctx and an atom's "retained state", only + * taking the runpool and as transaction mutexes + * @kbdev: The kbase_device to operate on + * @kctx: The kbase_context to operate on + * @katom_retained_state: Retained state from the atom * - * This also starts more jobs running in the case of an ctx-attribute state - * change + * This also starts more jobs running in the case of an ctx-attribute state change * * This does none of the followup actions for scheduling: * - It does not schedule in a new context @@ -1317,11 +1392,15 @@ static kbasep_js_release_result kbasep_js_run_jobs_after_ctx_and_atom_release( * * For those tasks, just call kbasep_js_runpool_release_ctx() instead * - * Requires: + * Has following requirements * - Context is scheduled in, and kctx->as_nr matches kctx_as_nr * - Context has a non-zero refcount * - Caller holds js_kctx_info->ctx.jsctx_mutex * - Caller holds js_devdata->runpool_mutex + * + * Return: A bitpattern, containing KBASEP_JS_RELEASE_RESULT_* flags, indicating + * the result of releasing a context that whether the caller should try + * scheduling a new context or should try scheduling all contexts. */ static kbasep_js_release_result kbasep_js_runpool_release_ctx_internal( struct kbase_device *kbdev, @@ -1880,7 +1959,7 @@ void kbasep_js_suspend(struct kbase_device *kbdev) void kbasep_js_resume(struct kbase_device *kbdev) { struct kbasep_js_device_data *js_devdata; - int js; + int js, prio; KBASE_DEBUG_ASSERT(kbdev); js_devdata = &kbdev->js_data; @@ -1888,31 +1967,33 @@ void kbasep_js_resume(struct kbase_device *kbdev) mutex_lock(&js_devdata->queue_mutex); for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) { - struct kbase_context *kctx, *n; + for (prio = 0; prio < KBASE_JS_ATOM_SCHED_PRIO_COUNT; prio++) { + struct kbase_context *kctx, *n; - list_for_each_entry_safe(kctx, n, - &kbdev->js_data.ctx_list_unpullable[js], - jctx.sched_info.ctx.ctx_list_entry[js]) { - struct kbasep_js_kctx_info *js_kctx_info; - unsigned long flags; - bool timer_sync = false; + list_for_each_entry_safe(kctx, n, + &kbdev->js_data.ctx_list_unpullable[js][prio], + jctx.sched_info.ctx.ctx_list_entry[js]) { + struct kbasep_js_kctx_info *js_kctx_info; + unsigned long flags; + bool timer_sync = false; - js_kctx_info = &kctx->jctx.sched_info; + js_kctx_info = &kctx->jctx.sched_info; - mutex_lock(&js_kctx_info->ctx.jsctx_mutex); - mutex_lock(&js_devdata->runpool_mutex); - spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + mutex_lock(&js_kctx_info->ctx.jsctx_mutex); + mutex_lock(&js_devdata->runpool_mutex); + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); - if (!kbase_ctx_flag(kctx, KCTX_SCHEDULED) && - kbase_js_ctx_pullable(kctx, js, false)) - timer_sync = - kbase_js_ctx_list_add_pullable_nolock( - kbdev, kctx, js); - spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); - if (timer_sync) - kbase_backend_ctx_count_changed(kbdev); - mutex_unlock(&js_devdata->runpool_mutex); - mutex_unlock(&js_kctx_info->ctx.jsctx_mutex); + if (!kbase_ctx_flag(kctx, KCTX_SCHEDULED) && + kbase_js_ctx_pullable(kctx, js, false)) + timer_sync = + kbase_js_ctx_list_add_pullable_nolock( + kbdev, kctx, js); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + if (timer_sync) + kbase_backend_ctx_count_changed(kbdev); + mutex_unlock(&js_devdata->runpool_mutex); + mutex_unlock(&js_kctx_info->ctx.jsctx_mutex); + } } } mutex_unlock(&js_devdata->queue_mutex); diff --git a/drivers/gpu/arm/bifrost/mali_kbase_js.h b/drivers/gpu/arm/bifrost/mali_kbase_js.h index aa930b9d83f6..963cef903209 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_js.h +++ b/drivers/gpu/arm/bifrost/mali_kbase_js.h @@ -596,6 +596,27 @@ bool kbase_js_is_atom_valid(struct kbase_device *kbdev, */ void kbase_js_set_timeouts(struct kbase_device *kbdev); +/** + * kbase_js_set_ctx_priority - set the context priority + * @kctx: Context pointer + * @new_priority: New priority value for the Context + * + * The context priority is set to a new value and it is moved to the + * pullable/unpullable list as per the new priority. + */ +void kbase_js_set_ctx_priority(struct kbase_context *kctx, int new_priority); + + +/** + * kbase_js_update_ctx_priority - update the context priority + * @kctx: Context pointer + * + * The context priority gets updated as per the priority of atoms currently in + * use for that context, but only if system priority mode for context scheduling + * is being used. + */ +void kbase_js_update_ctx_priority(struct kbase_context *kctx); + /* * Helpers follow */ diff --git a/drivers/gpu/arm/bifrost/mali_kbase_js_defs.h b/drivers/gpu/arm/bifrost/mali_kbase_js_defs.h index 8c8aa68fbc98..b53f4adf00d3 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_js_defs.h +++ b/drivers/gpu/arm/bifrost/mali_kbase_js_defs.h @@ -146,6 +146,48 @@ enum { /** Combination of KBASE_JS_ATOM_DONE_<...> bits */ typedef u32 kbasep_js_atom_done_code; +/* + * Context scheduling mode defines for kbase_device::js_ctx_scheduling_mode + */ +enum { + /* + * In this mode, the context containing higher priority atoms will be + * scheduled first and also the new runnable higher priority atoms can + * preempt lower priority atoms currently running on the GPU, even if + * they belong to a different context. + */ + KBASE_JS_SYSTEM_PRIORITY_MODE = 0, + + /* + * In this mode, the contexts are scheduled in round-robin fashion and + * the new runnable higher priority atoms can preempt the lower priority + * atoms currently running on the GPU, only if they belong to the same + * context. + */ + KBASE_JS_PROCESS_LOCAL_PRIORITY_MODE, + + /* Must be the last in the enum */ + KBASE_JS_PRIORITY_MODE_COUNT, +}; + +/* + * Internal atom priority defines for kbase_jd_atom::sched_prio + */ +enum { + KBASE_JS_ATOM_SCHED_PRIO_HIGH = 0, + KBASE_JS_ATOM_SCHED_PRIO_MED, + KBASE_JS_ATOM_SCHED_PRIO_LOW, + KBASE_JS_ATOM_SCHED_PRIO_COUNT, +}; + +/* Invalid priority for kbase_jd_atom::sched_prio */ +#define KBASE_JS_ATOM_SCHED_PRIO_INVALID -1 + +/* Default priority in the case of contexts with no atoms, or being lenient + * about invalid priorities from userspace. + */ +#define KBASE_JS_ATOM_SCHED_PRIO_DEFAULT KBASE_JS_ATOM_SCHED_PRIO_MED + /** * @brief KBase Device Data Job Scheduler sub-structure * @@ -229,12 +271,12 @@ struct kbasep_js_device_data { /** * List of contexts that can currently be pulled from */ - struct list_head ctx_list_pullable[BASE_JM_MAX_NR_SLOTS]; + struct list_head ctx_list_pullable[BASE_JM_MAX_NR_SLOTS][KBASE_JS_ATOM_SCHED_PRIO_COUNT]; /** * List of contexts that can not currently be pulled from, but have * jobs currently running. */ - struct list_head ctx_list_unpullable[BASE_JM_MAX_NR_SLOTS]; + struct list_head ctx_list_unpullable[BASE_JM_MAX_NR_SLOTS][KBASE_JS_ATOM_SCHED_PRIO_COUNT]; /** Number of currently scheduled user contexts (excluding ones that are not submitting jobs) */ s8 nr_user_contexts_running; @@ -365,22 +407,6 @@ struct kbasep_js_atom_retained_state { */ #define KBASEP_JS_TICK_RESOLUTION_US 1 -/* - * Internal atom priority defines for kbase_jd_atom::sched_prio - */ -enum { - KBASE_JS_ATOM_SCHED_PRIO_HIGH = 0, - KBASE_JS_ATOM_SCHED_PRIO_MED, - KBASE_JS_ATOM_SCHED_PRIO_LOW, - KBASE_JS_ATOM_SCHED_PRIO_COUNT, -}; - -/* Invalid priority for kbase_jd_atom::sched_prio */ -#define KBASE_JS_ATOM_SCHED_PRIO_INVALID -1 - -/* Default priority in the case of contexts with no atoms, or being lenient - * about invalid priorities from userspace */ -#define KBASE_JS_ATOM_SCHED_PRIO_DEFAULT KBASE_JS_ATOM_SCHED_PRIO_MED /** @} *//* end group kbase_js */ /** @} *//* end group base_kbase_api */ diff --git a/drivers/gpu/arm/bifrost/mali_kbase_mem.c b/drivers/gpu/arm/bifrost/mali_kbase_mem.c index b457c5215616..e5acab915bd2 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_mem.c +++ b/drivers/gpu/arm/bifrost/mali_kbase_mem.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2010-2017 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010-2018 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -535,7 +535,8 @@ static void kbase_region_tracker_ds_init(struct kbase_context *kctx, /* Although exec and custom_va_reg don't always exist, * initialize unconditionally because of the mem_view debugfs - * implementation which relies on these being empty */ + * implementation which relies on these being empty + */ kctx->reg_rbtree_exec = RB_ROOT; kctx->reg_rbtree_custom = RB_ROOT; @@ -567,6 +568,32 @@ void kbase_region_tracker_term(struct kbase_context *kctx) kbase_region_tracker_erase_rbtree(&kctx->reg_rbtree_custom); } +static size_t kbase_get_same_va_bits(struct kbase_context *kctx) +{ +#if defined(CONFIG_ARM64) + /* VA_BITS can be as high as 48 bits, but all bits are available for + * both user and kernel. + */ + size_t cpu_va_bits = VA_BITS; +#elif defined(CONFIG_X86_64) + /* x86_64 can access 48 bits of VA, but the 48th is used to denote + * kernel (1) vs userspace (0), so the max here is 47. + */ + size_t cpu_va_bits = 47; +#elif defined(CONFIG_ARM) || defined(CONFIG_X86_32) + size_t cpu_va_bits = sizeof(void *) * BITS_PER_BYTE; +#else +#error "Unknown CPU VA width for this architecture" +#endif + +#ifdef CONFIG_64BIT + if (kbase_ctx_flag(kctx, KCTX_COMPAT)) + cpu_va_bits = 32; +#endif + + return min(cpu_va_bits, (size_t) kctx->kbdev->gpu_props.mmu.va_bits); +} + /** * Initialize the region tracker data structure. */ @@ -575,7 +602,7 @@ int kbase_region_tracker_init(struct kbase_context *kctx) struct kbase_va_region *same_va_reg; struct kbase_va_region *exec_reg = NULL; struct kbase_va_region *custom_va_reg = NULL; - size_t same_va_bits = sizeof(void *) * BITS_PER_BYTE; + size_t same_va_bits = kbase_get_same_va_bits(kctx); u64 custom_va_size = KBASE_REG_ZONE_CUSTOM_VA_SIZE; u64 gpu_va_limit = (1ULL << kctx->kbdev->gpu_props.mmu.va_bits) >> PAGE_SHIFT; u64 same_va_pages; @@ -584,26 +611,6 @@ int kbase_region_tracker_init(struct kbase_context *kctx) /* Take the lock as kbase_free_alloced_region requires it */ kbase_gpu_vm_lock(kctx); -#if defined(CONFIG_ARM64) - same_va_bits = VA_BITS; -#elif defined(CONFIG_X86_64) - same_va_bits = 47; -#elif defined(CONFIG_64BIT) -#error Unsupported 64-bit architecture -#endif - -#ifdef CONFIG_64BIT - if (kbase_ctx_flag(kctx, KCTX_COMPAT)) - same_va_bits = 32; - else if (kbase_hw_has_feature(kctx->kbdev, BASE_HW_FEATURE_33BIT_VA)) - same_va_bits = 33; -#endif - - if (kctx->kbdev->gpu_props.mmu.va_bits < same_va_bits) { - err = -EINVAL; - goto fail_unlock; - } - same_va_pages = (1ULL << (same_va_bits - PAGE_SHIFT)) - 1; /* all have SAME_VA */ same_va_reg = kbase_alloc_free_region(kctx, 1, @@ -652,7 +659,8 @@ int kbase_region_tracker_init(struct kbase_context *kctx) } #endif - kbase_region_tracker_ds_init(kctx, same_va_reg, exec_reg, custom_va_reg); + kbase_region_tracker_ds_init(kctx, same_va_reg, exec_reg, + custom_va_reg); kctx->same_va_end = same_va_pages + 1; @@ -668,33 +676,16 @@ int kbase_region_tracker_init(struct kbase_context *kctx) return err; } -int kbase_region_tracker_init_jit(struct kbase_context *kctx, u64 jit_va_pages) -{ #ifdef CONFIG_64BIT +static int kbase_region_tracker_init_jit_64(struct kbase_context *kctx, + u64 jit_va_pages) +{ struct kbase_va_region *same_va; struct kbase_va_region *custom_va_reg; - u64 same_va_bits; + u64 same_va_bits = kbase_get_same_va_bits(kctx); u64 total_va_size; int err; - /* - * Nothing to do for 32-bit clients, JIT uses the existing - * custom VA zone. - */ - if (kbase_ctx_flag(kctx, KCTX_COMPAT)) - return 0; - -#if defined(CONFIG_ARM64) - same_va_bits = VA_BITS; -#elif defined(CONFIG_X86_64) - same_va_bits = 47; -#elif defined(CONFIG_64BIT) -#error Unsupported 64-bit architecture -#endif - - if (kbase_hw_has_feature(kctx->kbdev, BASE_HW_FEATURE_33BIT_VA)) - same_va_bits = 33; - total_va_size = (1ULL << (same_va_bits - PAGE_SHIFT)) - 1; kbase_gpu_vm_lock(kctx); @@ -754,9 +745,27 @@ int kbase_region_tracker_init_jit(struct kbase_context *kctx, u64 jit_va_pages) fail_unlock: kbase_gpu_vm_unlock(kctx); return err; -#else - return 0; +} #endif + +int kbase_region_tracker_init_jit(struct kbase_context *kctx, u64 jit_va_pages, + u8 max_allocations, u8 trim_level) +{ + if (trim_level > 100) + return -EINVAL; + + kctx->jit_max_allocations = max_allocations; + kctx->trim_level = trim_level; + +#ifdef CONFIG_64BIT + if (!kbase_ctx_flag(kctx, KCTX_COMPAT)) + return kbase_region_tracker_init_jit_64(kctx, jit_va_pages); +#endif + /* + * Nothing to do for 32-bit clients, JIT uses the existing + * custom VA zone. + */ + return 0; } int kbase_mem_init(struct kbase_device *kbdev) @@ -824,7 +833,8 @@ KBASE_EXPORT_TEST_API(kbase_mem_term); * The allocated object is not part of any list yet, and is flagged as * KBASE_REG_FREE. No mapping is allocated yet. * - * zone is KBASE_REG_ZONE_CUSTOM_VA, KBASE_REG_ZONE_SAME_VA, or KBASE_REG_ZONE_EXEC + * zone is KBASE_REG_ZONE_CUSTOM_VA, KBASE_REG_ZONE_SAME_VA, + * or KBASE_REG_ZONE_EXEC * */ struct kbase_va_region *kbase_alloc_free_region(struct kbase_context *kctx, u64 start_pfn, size_t nr_pages, int zone) @@ -874,6 +884,8 @@ KBASE_EXPORT_TEST_API(kbase_alloc_free_region); void kbase_free_alloced_region(struct kbase_va_region *reg) { if (!(reg->flags & KBASE_REG_FREE)) { + mutex_lock(®->kctx->jit_evict_lock); + /* * The physical allocation should have been removed from the * eviction list before this function is called. However, in the @@ -882,6 +894,8 @@ void kbase_free_alloced_region(struct kbase_va_region *reg) * on the list at termination time of the region tracker. */ if (!list_empty(®->gpu_alloc->evict_node)) { + mutex_unlock(®->kctx->jit_evict_lock); + /* * Unlink the physical allocation before unmaking it * evictable so that the allocation isn't grown back to @@ -904,6 +918,8 @@ void kbase_free_alloced_region(struct kbase_va_region *reg) KBASE_MEM_TYPE_NATIVE); kbase_mem_evictable_unmake(reg->gpu_alloc); } + } else { + mutex_unlock(®->kctx->jit_evict_lock); } /* @@ -1497,9 +1513,8 @@ int kbase_update_region_flags(struct kbase_context *kctx, return 0; } -int kbase_alloc_phy_pages_helper( - struct kbase_mem_phy_alloc *alloc, - size_t nr_pages_requested) +int kbase_alloc_phy_pages_helper(struct kbase_mem_phy_alloc *alloc, + size_t nr_pages_requested) { int new_page_count __maybe_unused; size_t nr_left = nr_pages_requested; @@ -1649,12 +1664,18 @@ int kbase_alloc_phy_pages_helper( alloc_failed: /* rollback needed if got one or more 2MB but failed later */ - if (nr_left != nr_pages_requested) - kbase_mem_pool_free_pages(&kctx->lp_mem_pool, - nr_pages_requested - nr_left, - alloc->pages + alloc->nents, - false, - false); + if (nr_left != nr_pages_requested) { + size_t nr_pages_to_free = nr_pages_requested - nr_left; + + alloc->nents += nr_pages_to_free; + + kbase_process_page_usage_inc(kctx, nr_pages_to_free); + kbase_atomic_add_pages(nr_pages_to_free, &kctx->used_pages); + kbase_atomic_add_pages(nr_pages_to_free, + &kctx->kbdev->memdev.used_pages); + + kbase_free_phy_pages_helper(alloc, nr_pages_to_free); + } kbase_process_page_usage_dec(kctx, nr_pages_requested); kbase_atomic_sub_pages(nr_pages_requested, &kctx->used_pages); @@ -1665,6 +1686,181 @@ int kbase_alloc_phy_pages_helper( return -ENOMEM; } +struct tagged_addr *kbase_alloc_phy_pages_helper_locked( + struct kbase_mem_phy_alloc *alloc, struct kbase_mem_pool *pool, + size_t nr_pages_requested) +{ + int new_page_count __maybe_unused; + size_t nr_left = nr_pages_requested; + int res; + struct kbase_context *kctx; + struct tagged_addr *tp; + struct tagged_addr *new_pages = NULL; + + KBASE_DEBUG_ASSERT(alloc->type == KBASE_MEM_TYPE_NATIVE); + KBASE_DEBUG_ASSERT(alloc->imported.kctx); + + lockdep_assert_held(&pool->pool_lock); + +#if !defined(CONFIG_MALI_2MB_ALLOC) + WARN_ON(pool->order); +#endif + + if (alloc->reg) { + if (nr_pages_requested > alloc->reg->nr_pages - alloc->nents) + goto invalid_request; + } + + kctx = alloc->imported.kctx; + + lockdep_assert_held(&kctx->mem_partials_lock); + + if (nr_pages_requested == 0) + goto done; /*nothing to do*/ + + new_page_count = kbase_atomic_add_pages( + nr_pages_requested, &kctx->used_pages); + kbase_atomic_add_pages(nr_pages_requested, + &kctx->kbdev->memdev.used_pages); + + /* Increase mm counters before we allocate pages so that this + * allocation is visible to the OOM killer + */ + kbase_process_page_usage_inc(kctx, nr_pages_requested); + + tp = alloc->pages + alloc->nents; + new_pages = tp; + +#ifdef CONFIG_MALI_2MB_ALLOC + if (pool->order) { + int nr_lp = nr_left / (SZ_2M / SZ_4K); + + res = kbase_mem_pool_alloc_pages_locked(pool, + nr_lp * (SZ_2M / SZ_4K), + tp); + + if (res > 0) { + nr_left -= res; + tp += res; + } + + if (nr_left) { + struct kbase_sub_alloc *sa, *temp_sa; + + list_for_each_entry_safe(sa, temp_sa, + &kctx->mem_partials, link) { + int pidx = 0; + + while (nr_left) { + pidx = find_next_zero_bit(sa->sub_pages, + SZ_2M / SZ_4K, + pidx); + bitmap_set(sa->sub_pages, pidx, 1); + *tp++ = as_tagged_tag(page_to_phys( + sa->page + pidx), + FROM_PARTIAL); + nr_left--; + + if (bitmap_full(sa->sub_pages, + SZ_2M / SZ_4K)) { + /* unlink from partial list when + * full + */ + list_del_init(&sa->link); + break; + } + } + } + } + + /* only if we actually have a chunk left <512. If more it + * indicates that we couldn't allocate a 2MB above, so no point + * to retry here. + */ + if (nr_left > 0 && nr_left < (SZ_2M / SZ_4K)) { + /* create a new partial and suballocate the rest from it + */ + struct page *np = NULL; + + np = kbase_mem_pool_alloc_locked(pool); + + if (np) { + int i; + struct kbase_sub_alloc *sa; + struct page *p; + + sa = kmalloc(sizeof(*sa), GFP_KERNEL); + if (!sa) { + kbase_mem_pool_free_locked(pool, np, + false); + goto alloc_failed; + } + + /* store pointers back to the control struct */ + np->lru.next = (void *)sa; + for (p = np; p < np + SZ_2M / SZ_4K; p++) + p->lru.prev = (void *)np; + INIT_LIST_HEAD(&sa->link); + bitmap_zero(sa->sub_pages, SZ_2M / SZ_4K); + sa->page = np; + + for (i = 0; i < nr_left; i++) + *tp++ = as_tagged_tag( + page_to_phys(np + i), + FROM_PARTIAL); + + bitmap_set(sa->sub_pages, 0, nr_left); + nr_left = 0; + + /* expose for later use */ + list_add(&sa->link, &kctx->mem_partials); + } + } + if (nr_left) + goto alloc_failed; + } else { +#endif + res = kbase_mem_pool_alloc_pages_locked(pool, + nr_left, + tp); + if (res <= 0) + goto alloc_failed; +#ifdef CONFIG_MALI_2MB_ALLOC + } +#endif + + KBASE_TLSTREAM_AUX_PAGESALLOC( + kctx->id, + (u64)new_page_count); + + alloc->nents += nr_pages_requested; +done: + return new_pages; + +alloc_failed: + /* rollback needed if got one or more 2MB but failed later */ + if (nr_left != nr_pages_requested) { + size_t nr_pages_to_free = nr_pages_requested - nr_left; + + alloc->nents += nr_pages_to_free; + + kbase_process_page_usage_inc(kctx, nr_pages_to_free); + kbase_atomic_add_pages(nr_pages_to_free, &kctx->used_pages); + kbase_atomic_add_pages(nr_pages_to_free, + &kctx->kbdev->memdev.used_pages); + + kbase_free_phy_pages_helper(alloc, nr_pages_to_free); + } + + kbase_process_page_usage_dec(kctx, nr_pages_requested); + kbase_atomic_sub_pages(nr_pages_requested, &kctx->used_pages); + kbase_atomic_sub_pages(nr_pages_requested, + &kctx->kbdev->memdev.used_pages); + +invalid_request: + return NULL; +} + static void free_partial(struct kbase_context *kctx, struct tagged_addr tp) { struct page *p, *head_page; @@ -1776,6 +1972,124 @@ int kbase_free_phy_pages_helper( return 0; } +static void free_partial_locked(struct kbase_context *kctx, + struct kbase_mem_pool *pool, struct tagged_addr tp) +{ + struct page *p, *head_page; + struct kbase_sub_alloc *sa; + + lockdep_assert_held(&pool->pool_lock); + lockdep_assert_held(&kctx->mem_partials_lock); + + p = phys_to_page(as_phys_addr_t(tp)); + head_page = (struct page *)p->lru.prev; + sa = (struct kbase_sub_alloc *)head_page->lru.next; + clear_bit(p - head_page, sa->sub_pages); + if (bitmap_empty(sa->sub_pages, SZ_2M / SZ_4K)) { + list_del(&sa->link); + kbase_mem_pool_free(pool, head_page, true); + kfree(sa); + } else if (bitmap_weight(sa->sub_pages, SZ_2M / SZ_4K) == + SZ_2M / SZ_4K - 1) { + /* expose the partial again */ + list_add(&sa->link, &kctx->mem_partials); + } +} + +void kbase_free_phy_pages_helper_locked(struct kbase_mem_phy_alloc *alloc, + struct kbase_mem_pool *pool, struct tagged_addr *pages, + size_t nr_pages_to_free) +{ + struct kbase_context *kctx = alloc->imported.kctx; + bool syncback; + bool reclaimed = (alloc->evicted != 0); + struct tagged_addr *start_free; + size_t freed = 0; + + KBASE_DEBUG_ASSERT(alloc->type == KBASE_MEM_TYPE_NATIVE); + KBASE_DEBUG_ASSERT(alloc->imported.kctx); + KBASE_DEBUG_ASSERT(alloc->nents >= nr_pages_to_free); + + lockdep_assert_held(&pool->pool_lock); + lockdep_assert_held(&kctx->mem_partials_lock); + + /* early out if nothing to do */ + if (!nr_pages_to_free) + return; + + start_free = pages; + + syncback = alloc->properties & KBASE_MEM_PHY_ALLOC_ACCESSED_CACHED; + + /* pad start_free to a valid start location */ + while (nr_pages_to_free && is_huge(*start_free) && + !is_huge_head(*start_free)) { + nr_pages_to_free--; + start_free++; + } + + while (nr_pages_to_free) { + if (is_huge_head(*start_free)) { + /* This is a 2MB entry, so free all the 512 pages that + * it points to + */ + WARN_ON(!pool->order); + kbase_mem_pool_free_pages_locked(pool, + 512, + start_free, + syncback, + reclaimed); + nr_pages_to_free -= 512; + start_free += 512; + freed += 512; + } else if (is_partial(*start_free)) { + WARN_ON(!pool->order); + free_partial_locked(kctx, pool, *start_free); + nr_pages_to_free--; + start_free++; + freed++; + } else { + struct tagged_addr *local_end_free; + + WARN_ON(pool->order); + local_end_free = start_free; + while (nr_pages_to_free && + !is_huge(*local_end_free) && + !is_partial(*local_end_free)) { + local_end_free++; + nr_pages_to_free--; + } + kbase_mem_pool_free_pages_locked(pool, + local_end_free - start_free, + start_free, + syncback, + reclaimed); + freed += local_end_free - start_free; + start_free += local_end_free - start_free; + } + } + + alloc->nents -= freed; + + /* + * If the allocation was not evicted (i.e. evicted == 0) then + * the page accounting needs to be done. + */ + if (!reclaimed) { + int new_page_count; + + kbase_process_page_usage_dec(kctx, freed); + new_page_count = kbase_atomic_sub_pages(freed, + &kctx->used_pages); + kbase_atomic_sub_pages(freed, + &kctx->kbdev->memdev.used_pages); + + KBASE_TLSTREAM_AUX_PAGESALLOC( + kctx->id, + (u64)new_page_count); + } +} + void kbase_mem_kref_free(struct kref *kref) { struct kbase_mem_phy_alloc *alloc; @@ -1784,12 +2098,15 @@ void kbase_mem_kref_free(struct kref *kref) switch (alloc->type) { case KBASE_MEM_TYPE_NATIVE: { - WARN_ON(!alloc->imported.kctx); - /* - * The physical allocation must have been removed from the - * eviction list before trying to free it. - */ - WARN_ON(!list_empty(&alloc->evict_node)); + if (!WARN_ON(!alloc->imported.kctx)) { + /* + * The physical allocation must have been removed from + * the eviction list before trying to free it. + */ + mutex_lock(&alloc->imported.kctx->jit_evict_lock); + WARN_ON(!list_empty(&alloc->evict_node)); + mutex_unlock(&alloc->imported.kctx->jit_evict_lock); + } kbase_free_phy_pages_helper(alloc, alloc->nents); break; } @@ -2284,6 +2601,7 @@ static void kbase_jit_destroy_worker(struct work_struct *work) int kbase_jit_init(struct kbase_context *kctx) { + mutex_lock(&kctx->jit_evict_lock); INIT_LIST_HEAD(&kctx->jit_active_head); INIT_LIST_HEAD(&kctx->jit_pool_head); INIT_LIST_HEAD(&kctx->jit_destroy_head); @@ -2291,49 +2609,255 @@ int kbase_jit_init(struct kbase_context *kctx) INIT_LIST_HEAD(&kctx->jit_pending_alloc); INIT_LIST_HEAD(&kctx->jit_atoms_head); + mutex_unlock(&kctx->jit_evict_lock); + + kctx->jit_max_allocations = 0; + kctx->jit_current_allocations = 0; + kctx->trim_level = 0; return 0; } +/* Check if the allocation from JIT pool is of the same size as the new JIT + * allocation and also, if BASE_JIT_ALLOC_MEM_TILER_ALIGN_TOP is set, meets + * the alignment requirements. + */ +static bool meet_size_and_tiler_align_top_requirements(struct kbase_context *kctx, + struct kbase_va_region *walker, struct base_jit_alloc_info *info) +{ + bool meet_reqs = true; + + if (walker->nr_pages != info->va_pages) + meet_reqs = false; + else if (info->flags & BASE_JIT_ALLOC_MEM_TILER_ALIGN_TOP) { + size_t align = info->extent; + size_t align_mask = align - 1; + + if ((walker->start_pfn + info->commit_pages) & align_mask) + meet_reqs = false; + } + + return meet_reqs; +} + +static int kbase_jit_grow(struct kbase_context *kctx, + struct base_jit_alloc_info *info, struct kbase_va_region *reg) +{ + size_t delta; + size_t pages_required; + size_t old_size; + struct kbase_mem_pool *pool; + int ret = -ENOMEM; + struct tagged_addr *gpu_pages; + + if (info->commit_pages > reg->nr_pages) { + /* Attempted to grow larger than maximum size */ + return -EINVAL; + } + + kbase_gpu_vm_lock(kctx); + + /* Make the physical backing no longer reclaimable */ + if (!kbase_mem_evictable_unmake(reg->gpu_alloc)) + goto update_failed; + + if (reg->gpu_alloc->nents >= info->commit_pages) + goto done; + + /* Grow the backing */ + old_size = reg->gpu_alloc->nents; + + /* Allocate some more pages */ + delta = info->commit_pages - reg->gpu_alloc->nents; + pages_required = delta; + +#ifdef CONFIG_MALI_2MB_ALLOC + if (pages_required >= (SZ_2M / SZ_4K)) { + pool = &kctx->lp_mem_pool; + /* Round up to number of 2 MB pages required */ + pages_required += ((SZ_2M / SZ_4K) - 1); + pages_required /= (SZ_2M / SZ_4K); + } else { +#endif + pool = &kctx->mem_pool; +#ifdef CONFIG_MALI_2MB_ALLOC + } +#endif + + if (reg->cpu_alloc != reg->gpu_alloc) + pages_required *= 2; + + mutex_lock(&kctx->mem_partials_lock); + kbase_mem_pool_lock(pool); + + /* As we can not allocate memory from the kernel with the vm_lock held, + * grow the pool to the required size with the lock dropped. We hold the + * pool lock to prevent another thread from allocating from the pool + * between the grow and allocation. + */ + while (kbase_mem_pool_size(pool) < pages_required) { + int pool_delta = pages_required - kbase_mem_pool_size(pool); + + kbase_mem_pool_unlock(pool); + mutex_unlock(&kctx->mem_partials_lock); + kbase_gpu_vm_unlock(kctx); + + if (kbase_mem_pool_grow(pool, pool_delta)) + goto update_failed_unlocked; + + kbase_gpu_vm_lock(kctx); + mutex_lock(&kctx->mem_partials_lock); + kbase_mem_pool_lock(pool); + } + + gpu_pages = kbase_alloc_phy_pages_helper_locked(reg->gpu_alloc, pool, + delta); + if (!gpu_pages) { + kbase_mem_pool_unlock(pool); + mutex_unlock(&kctx->mem_partials_lock); + goto update_failed; + } + + if (reg->cpu_alloc != reg->gpu_alloc) { + struct tagged_addr *cpu_pages; + + cpu_pages = kbase_alloc_phy_pages_helper_locked(reg->cpu_alloc, + pool, delta); + if (!cpu_pages) { + kbase_free_phy_pages_helper_locked(reg->gpu_alloc, + pool, gpu_pages, delta); + kbase_mem_pool_unlock(pool); + mutex_unlock(&kctx->mem_partials_lock); + goto update_failed; + } + } + kbase_mem_pool_unlock(pool); + mutex_unlock(&kctx->mem_partials_lock); + + ret = kbase_mem_grow_gpu_mapping(kctx, reg, info->commit_pages, + old_size); + /* + * The grow failed so put the allocation back in the + * pool and return failure. + */ + if (ret) + goto update_failed; + +done: + ret = 0; + + /* Update attributes of JIT allocation taken from the pool */ + reg->initial_commit = info->commit_pages; + reg->extent = info->extent; + +update_failed: + kbase_gpu_vm_unlock(kctx); +update_failed_unlocked: + return ret; +} + struct kbase_va_region *kbase_jit_allocate(struct kbase_context *kctx, struct base_jit_alloc_info *info) { struct kbase_va_region *reg = NULL; - struct kbase_va_region *walker; - struct kbase_va_region *temp; - size_t current_diff = SIZE_MAX; - int ret; + if (kctx->jit_current_allocations >= kctx->jit_max_allocations) { + /* Too many current allocations */ + return NULL; + } + if (info->max_allocations > 0 && + kctx->jit_current_allocations_per_bin[info->bin_id] >= + info->max_allocations) { + /* Too many current allocations in this bin */ + return NULL; + } mutex_lock(&kctx->jit_evict_lock); + /* * Scan the pool for an existing allocation which meets our * requirements and remove it. */ - list_for_each_entry_safe(walker, temp, &kctx->jit_pool_head, jit_node) { + if (info->usage_id != 0) { + /* First scan for an allocation with the same usage ID */ + struct kbase_va_region *walker; + struct kbase_va_region *temp; + size_t current_diff = SIZE_MAX; - if (walker->nr_pages >= info->va_pages) { - size_t min_size, max_size, diff; + list_for_each_entry_safe(walker, temp, &kctx->jit_pool_head, + jit_node) { - /* - * The JIT allocations VA requirements have been - * meet, it's suitable but other allocations - * might be a better fit. - */ - min_size = min_t(size_t, walker->gpu_alloc->nents, - info->commit_pages); - max_size = max_t(size_t, walker->gpu_alloc->nents, - info->commit_pages); - diff = max_size - min_size; + if (walker->jit_usage_id == info->usage_id && + walker->jit_bin_id == info->bin_id && + meet_size_and_tiler_align_top_requirements( + kctx, walker, info)) { + size_t min_size, max_size, diff; - if (current_diff > diff) { - current_diff = diff; - reg = walker; + /* + * The JIT allocations VA requirements have been + * met, it's suitable but other allocations + * might be a better fit. + */ + min_size = min_t(size_t, + walker->gpu_alloc->nents, + info->commit_pages); + max_size = max_t(size_t, + walker->gpu_alloc->nents, + info->commit_pages); + diff = max_size - min_size; + + if (current_diff > diff) { + current_diff = diff; + reg = walker; + } + + /* The allocation is an exact match */ + if (current_diff == 0) + break; } + } + } - /* The allocation is an exact match, stop looking */ - if (current_diff == 0) - break; + if (!reg) { + /* No allocation with the same usage ID, or usage IDs not in + * use. Search for an allocation we can reuse. + */ + struct kbase_va_region *walker; + struct kbase_va_region *temp; + size_t current_diff = SIZE_MAX; + + list_for_each_entry_safe(walker, temp, &kctx->jit_pool_head, + jit_node) { + + if (walker->jit_bin_id == info->bin_id && + meet_size_and_tiler_align_top_requirements( + kctx, walker, info)) { + size_t min_size, max_size, diff; + + /* + * The JIT allocations VA requirements have been + * met, it's suitable but other allocations + * might be a better fit. + */ + min_size = min_t(size_t, + walker->gpu_alloc->nents, + info->commit_pages); + max_size = max_t(size_t, + walker->gpu_alloc->nents, + info->commit_pages); + diff = max_size - min_size; + + if (current_diff > diff) { + current_diff = diff; + reg = walker; + } + + /* The allocation is an exact match, so stop + * looking. + */ + if (current_diff == 0) + break; + } } } @@ -2352,42 +2876,15 @@ struct kbase_va_region *kbase_jit_allocate(struct kbase_context *kctx, list_del_init(®->gpu_alloc->evict_node); mutex_unlock(&kctx->jit_evict_lock); - kbase_gpu_vm_lock(kctx); - - /* Make the physical backing no longer reclaimable */ - if (!kbase_mem_evictable_unmake(reg->gpu_alloc)) - goto update_failed; - - /* Grow the backing if required */ - if (reg->gpu_alloc->nents < info->commit_pages) { - size_t delta; - size_t old_size = reg->gpu_alloc->nents; - - /* Allocate some more pages */ - delta = info->commit_pages - reg->gpu_alloc->nents; - if (kbase_alloc_phy_pages_helper(reg->gpu_alloc, delta) - != 0) - goto update_failed; - - if (reg->cpu_alloc != reg->gpu_alloc) { - if (kbase_alloc_phy_pages_helper( - reg->cpu_alloc, delta) != 0) { - kbase_free_phy_pages_helper( - reg->gpu_alloc, delta); - goto update_failed; - } - } - - ret = kbase_mem_grow_gpu_mapping(kctx, reg, - info->commit_pages, old_size); + if (kbase_jit_grow(kctx, info, reg) < 0) { /* - * The grow failed so put the allocation back in the - * pool and return failure. + * An update to an allocation from the pool failed, + * chances are slim a new allocation would fair any + * better so return the allocation to the pool and + * return the function with failure. */ - if (ret) - goto update_failed; + goto update_failed_unlocked; } - kbase_gpu_vm_unlock(kctx); } else { /* No suitable JIT allocation was found so create a new one */ u64 flags = BASE_MEM_PROT_CPU_RD | BASE_MEM_PROT_GPU_RD | @@ -2397,6 +2894,9 @@ struct kbase_va_region *kbase_jit_allocate(struct kbase_context *kctx, mutex_unlock(&kctx->jit_evict_lock); + if (info->flags & BASE_JIT_ALLOC_MEM_TILER_ALIGN_TOP) + flags |= BASE_MEM_TILER_ALIGN_TOP; + reg = kbase_mem_alloc(kctx, info->va_pages, info->commit_pages, info->extent, &flags, &gpu_addr); if (!reg) @@ -2409,15 +2909,15 @@ struct kbase_va_region *kbase_jit_allocate(struct kbase_context *kctx, mutex_unlock(&kctx->jit_evict_lock); } + kctx->jit_current_allocations++; + kctx->jit_current_allocations_per_bin[info->bin_id]++; + + reg->jit_usage_id = info->usage_id; + reg->jit_bin_id = info->bin_id; + return reg; -update_failed: - /* - * An update to an allocation from the pool failed, chances - * are slim a new allocation would fair any better so return - * the allocation to the pool and return the function with failure. - */ - kbase_gpu_vm_unlock(kctx); +update_failed_unlocked: mutex_lock(&kctx->jit_evict_lock); list_move(®->jit_node, &kctx->jit_pool_head); mutex_unlock(&kctx->jit_evict_lock); @@ -2427,13 +2927,53 @@ struct kbase_va_region *kbase_jit_allocate(struct kbase_context *kctx, void kbase_jit_free(struct kbase_context *kctx, struct kbase_va_region *reg) { - /* The physical backing of memory in the pool is always reclaimable */ + u64 old_pages; + + /* Get current size of JIT region */ + old_pages = kbase_reg_current_backed_size(reg); + if (reg->initial_commit < old_pages) { + /* Free trim_level % of region, but don't go below initial + * commit size + */ + u64 new_size = MAX(reg->initial_commit, + div_u64(old_pages * (100 - kctx->trim_level), 100)); + u64 delta = old_pages - new_size; + + if (delta) { + kbase_mem_shrink_cpu_mapping(kctx, reg, old_pages-delta, + old_pages); + kbase_mem_shrink_gpu_mapping(kctx, reg, old_pages-delta, + old_pages); + + kbase_free_phy_pages_helper(reg->cpu_alloc, delta); + if (reg->cpu_alloc != reg->gpu_alloc) + kbase_free_phy_pages_helper(reg->gpu_alloc, + delta); + } + } + + kctx->jit_current_allocations--; + kctx->jit_current_allocations_per_bin[reg->jit_bin_id]--; + + kbase_mem_evictable_mark_reclaim(reg->gpu_alloc); + kbase_gpu_vm_lock(kctx); - kbase_mem_evictable_make(reg->gpu_alloc); + reg->flags |= KBASE_REG_DONT_NEED; + kbase_mem_shrink_cpu_mapping(kctx, reg, 0, reg->gpu_alloc->nents); kbase_gpu_vm_unlock(kctx); + /* + * Add the allocation to the eviction list and the jit pool, after this + * point the shrink can reclaim it, or it may be reused. + */ mutex_lock(&kctx->jit_evict_lock); + + /* This allocation can't already be on a list. */ + WARN_ON(!list_empty(®->gpu_alloc->evict_node)); + list_add(®->gpu_alloc->evict_node, &kctx->evict_list); + list_move(®->jit_node, &kctx->jit_pool_head); + mutex_unlock(&kctx->jit_evict_lock); } @@ -2673,6 +3213,7 @@ static int kbase_jd_umm_map(struct kbase_context *kctx, int err; size_t count = 0; struct kbase_mem_phy_alloc *alloc; + unsigned long gwt_mask = ~0; alloc = reg->gpu_alloc; @@ -2722,10 +3263,16 @@ static int kbase_jd_umm_map(struct kbase_context *kctx, /* Update nents as we now have pages to map */ alloc->nents = reg->nr_pages; +#ifdef CONFIG_MALI_JOB_DUMP + if (kctx->gwt_enabled) + gwt_mask = ~KBASE_REG_GPU_WR; +#endif + err = kbase_mmu_insert_pages(kctx, reg->start_pfn, kbase_get_gpu_phy_pages(reg), count, - reg->flags | KBASE_REG_GPU_WR | KBASE_REG_GPU_RD); + (reg->flags | KBASE_REG_GPU_WR | KBASE_REG_GPU_RD) & + gwt_mask); if (err) goto err_unmap_attachment; diff --git a/drivers/gpu/arm/bifrost/mali_kbase_mem.h b/drivers/gpu/arm/bifrost/mali_kbase_mem.h index f67255121f5e..b456752b417b 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_mem.h +++ b/drivers/gpu/arm/bifrost/mali_kbase_mem.h @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2010-2017 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010-2018 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -319,7 +319,9 @@ struct kbase_va_region { #define KBASE_REG_ZONE_EXEC_SIZE ((16ULL * 1024 * 1024) >> PAGE_SHIFT) #define KBASE_REG_ZONE_CUSTOM_VA KBASE_REG_ZONE(2) -#define KBASE_REG_ZONE_CUSTOM_VA_BASE (KBASE_REG_ZONE_EXEC_BASE + KBASE_REG_ZONE_EXEC_SIZE) /* Starting after KBASE_REG_ZONE_EXEC */ +/* Starting after KBASE_REG_ZONE_EXEC */ +#define KBASE_REG_ZONE_CUSTOM_VA_BASE \ + (KBASE_REG_ZONE_EXEC_BASE + KBASE_REG_ZONE_EXEC_SIZE) #define KBASE_REG_ZONE_CUSTOM_VA_SIZE (((1ULL << 44) >> PAGE_SHIFT) - KBASE_REG_ZONE_CUSTOM_VA_BASE) /* end 32-bit clients only */ @@ -332,6 +334,10 @@ struct kbase_va_region { /* List head used to store the region in the JIT allocation pool */ struct list_head jit_node; + /* The last JIT usage ID for this region */ + u16 jit_usage_id; + /* The JIT bin this allocation came from */ + u8 jit_bin_id; }; /* Common functions */ @@ -435,23 +441,32 @@ static inline int kbase_reg_prepare_native(struct kbase_va_region *reg, return PTR_ERR(reg->cpu_alloc); else if (!reg->cpu_alloc) return -ENOMEM; + reg->cpu_alloc->imported.kctx = kctx; - INIT_LIST_HEAD(®->cpu_alloc->evict_node); if (kbase_ctx_flag(kctx, KCTX_INFINITE_CACHE) && (reg->flags & KBASE_REG_CPU_CACHED)) { reg->gpu_alloc = kbase_alloc_create(reg->nr_pages, KBASE_MEM_TYPE_NATIVE); + if (IS_ERR_OR_NULL(reg->gpu_alloc)) { + kbase_mem_phy_alloc_put(reg->cpu_alloc); + return -ENOMEM; + } reg->gpu_alloc->imported.kctx = kctx; - INIT_LIST_HEAD(®->gpu_alloc->evict_node); } else { reg->gpu_alloc = kbase_mem_phy_alloc_get(reg->cpu_alloc); } + mutex_lock(&kctx->jit_evict_lock); + INIT_LIST_HEAD(®->cpu_alloc->evict_node); + INIT_LIST_HEAD(®->gpu_alloc->evict_node); + mutex_unlock(&kctx->jit_evict_lock); + reg->flags &= ~KBASE_REG_FREE; + return 0; } -static inline int kbase_atomic_add_pages(int num_pages, atomic_t *used_pages) +static inline u32 kbase_atomic_add_pages(u32 num_pages, atomic_t *used_pages) { int new_val = atomic_add_return(num_pages, used_pages); #if defined(CONFIG_MALI_BIFROST_GATOR_SUPPORT) @@ -460,7 +475,7 @@ static inline int kbase_atomic_add_pages(int num_pages, atomic_t *used_pages) return new_val; } -static inline int kbase_atomic_sub_pages(int num_pages, atomic_t *used_pages) +static inline u32 kbase_atomic_sub_pages(u32 num_pages, atomic_t *used_pages) { int new_val = atomic_sub_return(num_pages, used_pages); #if defined(CONFIG_MALI_BIFROST_GATOR_SUPPORT) @@ -539,9 +554,25 @@ void kbase_mem_pool_term(struct kbase_mem_pool *pool); * 3. Return NULL if no memory in the pool * * Return: Pointer to allocated page, or NULL if allocation failed. + * + * Note : This function should not be used if the pool lock is held. Use + * kbase_mem_pool_alloc_locked() instead. */ struct page *kbase_mem_pool_alloc(struct kbase_mem_pool *pool); +/** + * kbase_mem_pool_alloc_locked - Allocate a page from memory pool + * @pool: Memory pool to allocate from + * + * If there are free pages in the pool, this function allocates a page from + * @pool. This function does not use @next_pool. + * + * Return: Pointer to allocated page, or NULL if allocation failed. + * + * Note : Caller must hold the pool lock. + */ +struct page *kbase_mem_pool_alloc_locked(struct kbase_mem_pool *pool); + /** * kbase_mem_pool_free - Free a page to memory pool * @pool: Memory pool where page should be freed @@ -553,10 +584,27 @@ struct page *kbase_mem_pool_alloc(struct kbase_mem_pool *pool); * 2. Otherwise, if @next_pool is not NULL and not full, add @page to * @next_pool. * 3. Finally, free @page to the kernel. + * + * Note : This function should not be used if the pool lock is held. Use + * kbase_mem_pool_free_locked() instead. */ void kbase_mem_pool_free(struct kbase_mem_pool *pool, struct page *page, bool dirty); +/** + * kbase_mem_pool_free_locked - Free a page to memory pool + * @pool: Memory pool where page should be freed + * @p: Page to free to the pool + * @dirty: Whether some of the page may be dirty in the cache. + * + * If @pool is not full, this function adds @page to @pool. Otherwise, @page is + * freed to the kernel. This function does not use @next_pool. + * + * Note : Caller must hold the pool lock. + */ +void kbase_mem_pool_free_locked(struct kbase_mem_pool *pool, struct page *p, + bool dirty); + /** * kbase_mem_pool_alloc_pages - Allocate pages from memory pool * @pool: Memory pool to allocate from @@ -571,10 +619,57 @@ void kbase_mem_pool_free(struct kbase_mem_pool *pool, struct page *page, * On success number of pages allocated (could be less than nr_pages if * partial_allowed). * On error an error code. + * + * Note : This function should not be used if the pool lock is held. Use + * kbase_mem_pool_alloc_pages_locked() instead. + * + * The caller must not hold vm_lock, as this could cause a deadlock if + * the kernel OoM killer runs. If the caller must allocate pages while holding + * this lock, it should use kbase_mem_pool_alloc_pages_locked() instead. */ int kbase_mem_pool_alloc_pages(struct kbase_mem_pool *pool, size_t nr_pages, struct tagged_addr *pages, bool partial_allowed); +/** + * kbase_mem_pool_alloc_pages_locked - Allocate pages from memory pool + * @pool: Memory pool to allocate from + * @nr_4k_pages: Number of pages to allocate + * @pages: Pointer to array where the physical address of the allocated + * pages will be stored. + * + * Like kbase_mem_pool_alloc() but optimized for allocating many pages. This + * version does not allocate new pages from the kernel, and therefore will never + * trigger the OoM killer. Therefore, it can be run while the vm_lock is held. + * + * As new pages can not be allocated, the caller must ensure there are + * sufficient pages in the pool. Usage of this function should look like : + * + * kbase_gpu_vm_lock(kctx); + * kbase_mem_pool_lock(pool) + * while (kbase_mem_pool_size(pool) < pages_required) { + * kbase_mem_pool_unlock(pool) + * kbase_gpu_vm_unlock(kctx); + * kbase_mem_pool_grow(pool) + * kbase_gpu_vm_lock(kctx); + * kbase_mem_pool_lock(pool) + * } + * kbase_mem_pool_alloc_pages_locked(pool) + * kbase_mem_pool_unlock(pool) + * Perform other processing that requires vm_lock... + * kbase_gpu_vm_unlock(kctx); + * + * This ensures that the pool can be grown to the required size and that the + * allocation can complete without another thread using the newly grown pages. + * + * Return: + * On success number of pages allocated. + * On error an error code. + * + * Note : Caller must hold the pool lock. + */ +int kbase_mem_pool_alloc_pages_locked(struct kbase_mem_pool *pool, + size_t nr_4k_pages, struct tagged_addr *pages); + /** * kbase_mem_pool_free_pages - Free pages to memory pool * @pool: Memory pool where pages should be freed @@ -590,6 +685,22 @@ int kbase_mem_pool_alloc_pages(struct kbase_mem_pool *pool, size_t nr_pages, void kbase_mem_pool_free_pages(struct kbase_mem_pool *pool, size_t nr_pages, struct tagged_addr *pages, bool dirty, bool reclaimed); +/** + * kbase_mem_pool_free_pages_locked - Free pages to memory pool + * @pool: Memory pool where pages should be freed + * @nr_pages: Number of pages to free + * @pages: Pointer to array holding the physical addresses of the pages to + * free. + * @dirty: Whether any pages may be dirty in the cache. + * @reclaimed: Whether the pages where reclaimable and thus should bypass + * the pool and go straight to the kernel. + * + * Like kbase_mem_pool_free() but optimized for freeing many pages. + */ +void kbase_mem_pool_free_pages_locked(struct kbase_mem_pool *pool, + size_t nr_pages, struct tagged_addr *pages, bool dirty, + bool reclaimed); + /** * kbase_mem_pool_size - Get number of free pages in memory pool * @pool: Memory pool to inspect @@ -600,7 +711,7 @@ void kbase_mem_pool_free_pages(struct kbase_mem_pool *pool, size_t nr_pages, */ static inline size_t kbase_mem_pool_size(struct kbase_mem_pool *pool) { - return ACCESS_ONCE(pool->cur_size); + return READ_ONCE(pool->cur_size); } /** @@ -648,6 +759,15 @@ int kbase_mem_pool_grow(struct kbase_mem_pool *pool, size_t nr_to_grow); */ void kbase_mem_pool_trim(struct kbase_mem_pool *pool, size_t new_size); +/** + * kbase_mem_pool_mark_dying - Mark that this pool is dying + * @pool: Memory pool + * + * This will cause any ongoing allocation operations (eg growing on page fault) + * to be terminated. + */ +void kbase_mem_pool_mark_dying(struct kbase_mem_pool *pool); + /** * kbase_mem_alloc_page - Allocate a new page for a device * @pool: Memory pool to allocate a page from @@ -660,7 +780,8 @@ void kbase_mem_pool_trim(struct kbase_mem_pool *pool, size_t new_size); struct page *kbase_mem_alloc_page(struct kbase_mem_pool *pool); int kbase_region_tracker_init(struct kbase_context *kctx); -int kbase_region_tracker_init_jit(struct kbase_context *kctx, u64 jit_va_pages); +int kbase_region_tracker_init_jit(struct kbase_context *kctx, u64 jit_va_pages, + u8 max_allocations, u8 trim_level); void kbase_region_tracker_term(struct kbase_context *kctx); struct kbase_va_region *kbase_region_tracker_find_region_enclosing_address(struct kbase_context *kctx, u64 gpu_addr); @@ -922,16 +1043,62 @@ void kbase_as_poking_timer_retain_atom(struct kbase_device *kbdev, struct kbase_ void kbase_as_poking_timer_release_atom(struct kbase_device *kbdev, struct kbase_context *kctx, struct kbase_jd_atom *katom); /** -* @brief Allocates physical pages. -* -* Allocates \a nr_pages_requested and updates the alloc object. -* -* @param[in] alloc allocation object to add pages to -* @param[in] nr_pages_requested number of physical pages to allocate -* -* @return 0 if all pages have been successfully allocated. Error code otherwise -*/ -int kbase_alloc_phy_pages_helper(struct kbase_mem_phy_alloc *alloc, size_t nr_pages_requested); + * kbase_alloc_phy_pages_helper - Allocates physical pages. + * @alloc: allocation object to add pages to + * @nr_pages_requested: number of physical pages to allocate + * + * Allocates \a nr_pages_requested and updates the alloc object. + * + * Return: 0 if all pages have been successfully allocated. Error code otherwise + * + * Note : The caller must not hold vm_lock, as this could cause a deadlock if + * the kernel OoM killer runs. If the caller must allocate pages while holding + * this lock, it should use kbase_mem_pool_alloc_pages_locked() instead. + */ +int kbase_alloc_phy_pages_helper(struct kbase_mem_phy_alloc *alloc, + size_t nr_pages_requested); + +/** + * kbase_alloc_phy_pages_helper_locked - Allocates physical pages. + * @alloc: allocation object to add pages to + * @pool: Memory pool to allocate from + * @nr_pages_requested: number of physical pages to allocate + * + * Allocates \a nr_pages_requested and updates the alloc object. This function + * does not allocate new pages from the kernel, and therefore will never trigger + * the OoM killer. Therefore, it can be run while the vm_lock is held. + * + * As new pages can not be allocated, the caller must ensure there are + * sufficient pages in the pool. Usage of this function should look like : + * + * kbase_gpu_vm_lock(kctx); + * kbase_mem_pool_lock(pool) + * while (kbase_mem_pool_size(pool) < pages_required) { + * kbase_mem_pool_unlock(pool) + * kbase_gpu_vm_unlock(kctx); + * kbase_mem_pool_grow(pool) + * kbase_gpu_vm_lock(kctx); + * kbase_mem_pool_lock(pool) + * } + * kbase_alloc_phy_pages_helper_locked(pool) + * kbase_mem_pool_unlock(pool) + * Perform other processing that requires vm_lock... + * kbase_gpu_vm_unlock(kctx); + * + * This ensures that the pool can be grown to the required size and that the + * allocation can complete without another thread using the newly grown pages. + * + * If CONFIG_MALI_2MB_ALLOC is defined and the allocation is >= 2MB, then + * @pool must be alloc->imported.kctx->lp_mem_pool. Otherwise it must be + * alloc->imported.kctx->mem_pool. + * + * Return: Pointer to array of allocated pages. NULL on failure. + * + * Note : Caller must hold pool->pool_lock + */ +struct tagged_addr *kbase_alloc_phy_pages_helper_locked( + struct kbase_mem_phy_alloc *alloc, struct kbase_mem_pool *pool, + size_t nr_pages_requested); /** * @brief Free physical pages. @@ -943,6 +1110,26 @@ int kbase_alloc_phy_pages_helper(struct kbase_mem_phy_alloc *alloc, size_t nr_pa */ int kbase_free_phy_pages_helper(struct kbase_mem_phy_alloc *alloc, size_t nr_pages_to_free); +/** + * kbase_free_phy_pages_helper_locked - Free pages allocated with + * kbase_alloc_phy_pages_helper_locked() + * @alloc: Allocation object to free pages from + * @pool: Memory pool to return freed pages to + * @pages: Pages allocated by kbase_alloc_phy_pages_helper_locked() + * @nr_pages_to_free: Number of physical pages to free + * + * This function atomically frees pages allocated with + * kbase_alloc_phy_pages_helper_locked(). @pages is the pointer to the page + * array that is returned by that function. @pool must be the pool that the + * pages were originally allocated from. + * + * If the mem_pool has been unlocked since the allocation then + * kbase_free_phy_pages_helper() should be used instead. + */ +void kbase_free_phy_pages_helper_locked(struct kbase_mem_phy_alloc *alloc, + struct kbase_mem_pool *pool, struct tagged_addr *pages, + size_t nr_pages_to_free); + static inline void kbase_set_dma_addr(struct page *p, dma_addr_t dma_addr) { SetPagePrivate(p); @@ -1150,4 +1337,28 @@ bool kbase_sticky_resource_release(struct kbase_context *kctx, */ void kbase_sticky_resource_term(struct kbase_context *kctx); +/** + * kbase_mem_pool_lock - Lock a memory pool + * @pool: Memory pool to lock + */ +static inline void kbase_mem_pool_lock(struct kbase_mem_pool *pool) +{ + spin_lock(&pool->pool_lock); +} + +/** + * kbase_mem_pool_lock - Release a memory pool + * @pool: Memory pool to lock + */ +static inline void kbase_mem_pool_unlock(struct kbase_mem_pool *pool) +{ + spin_unlock(&pool->pool_lock); +} + +/** + * kbase_mem_evictable_mark_reclaim - Mark the pages as reclaimable. + * @alloc: The physical allocation + */ +void kbase_mem_evictable_mark_reclaim(struct kbase_mem_phy_alloc *alloc); + #endif /* _KBASE_MEM_H_ */ diff --git a/drivers/gpu/arm/bifrost/mali_kbase_mem_linux.c b/drivers/gpu/arm/bifrost/mali_kbase_mem_linux.c index 4e6668e62477..33021142e6d5 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_mem_linux.c +++ b/drivers/gpu/arm/bifrost/mali_kbase_mem_linux.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2010-2017 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010-2018 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -53,39 +53,6 @@ static int kbase_tracking_page_setup(struct kbase_context *kctx, struct vm_area_struct *vma); -/** - * kbase_mem_shrink_cpu_mapping - Shrink the CPU mapping(s) of an allocation - * @kctx: Context the region belongs to - * @reg: The GPU region - * @new_pages: The number of pages after the shrink - * @old_pages: The number of pages before the shrink - * - * Shrink (or completely remove) all CPU mappings which reference the shrunk - * part of the allocation. - * - * Note: Caller must be holding the processes mmap_sem lock. - */ -static void kbase_mem_shrink_cpu_mapping(struct kbase_context *kctx, - struct kbase_va_region *reg, - u64 new_pages, u64 old_pages); - -/** - * kbase_mem_shrink_gpu_mapping - Shrink the GPU mapping of an allocation - * @kctx: Context the region belongs to - * @reg: The GPU region or NULL if there isn't one - * @new_pages: The number of pages after the shrink - * @old_pages: The number of pages before the shrink - * - * Return: 0 on success, negative -errno on error - * - * Unmap the shrunk pages from the GPU mapping. Note that the size of the region - * itself is unmodified as we still need to reserve the VA, only the page tables - * will be modified by this function. - */ -static int kbase_mem_shrink_gpu_mapping(struct kbase_context *kctx, - struct kbase_va_region *reg, - u64 new_pages, u64 old_pages); - struct kbase_va_region *kbase_mem_alloc(struct kbase_context *kctx, u64 va_pages, u64 commit_pages, u64 extent, u64 *flags, u64 *gpu_va) @@ -243,7 +210,8 @@ struct kbase_va_region *kbase_mem_alloc(struct kbase_context *kctx, } KBASE_EXPORT_TEST_API(kbase_mem_alloc); -int kbase_mem_query(struct kbase_context *kctx, u64 gpu_addr, int query, u64 * const out) +int kbase_mem_query(struct kbase_context *kctx, + u64 gpu_addr, u64 query, u64 * const out) { struct kbase_va_region *reg; int ret = -EINVAL; @@ -471,7 +439,7 @@ void kbase_mem_evictable_deinit(struct kbase_context *kctx) * kbase_mem_evictable_mark_reclaim - Mark the pages as reclaimable. * @alloc: The physical allocation */ -static void kbase_mem_evictable_mark_reclaim(struct kbase_mem_phy_alloc *alloc) +void kbase_mem_evictable_mark_reclaim(struct kbase_mem_phy_alloc *alloc) { struct kbase_context *kctx = alloc->imported.kctx; int __maybe_unused new_page_count; @@ -516,17 +484,17 @@ int kbase_mem_evictable_make(struct kbase_mem_phy_alloc *gpu_alloc) lockdep_assert_held(&kctx->reg_lock); - /* This alloction can't already be on a list. */ - WARN_ON(!list_empty(&gpu_alloc->evict_node)); - kbase_mem_shrink_cpu_mapping(kctx, gpu_alloc->reg, 0, gpu_alloc->nents); + mutex_lock(&kctx->jit_evict_lock); + /* This allocation can't already be on a list. */ + WARN_ON(!list_empty(&gpu_alloc->evict_node)); + /* * Add the allocation to the eviction list, after this point the shrink * can reclaim it. */ - mutex_lock(&kctx->jit_evict_lock); list_add(&gpu_alloc->evict_node, &kctx->evict_list); mutex_unlock(&kctx->jit_evict_lock); kbase_mem_evictable_mark_reclaim(gpu_alloc); @@ -542,11 +510,13 @@ bool kbase_mem_evictable_unmake(struct kbase_mem_phy_alloc *gpu_alloc) lockdep_assert_held(&kctx->reg_lock); + mutex_lock(&kctx->jit_evict_lock); /* * First remove the allocation from the eviction list as it's no * longer eligible for eviction. */ list_del_init(&gpu_alloc->evict_node); + mutex_unlock(&kctx->jit_evict_lock); if (gpu_alloc->evicted == 0) { /* @@ -667,7 +637,7 @@ int kbase_mem_flags_change(struct kbase_context *kctx, u64 gpu_addr, unsigned in case KBASE_MEM_TYPE_IMPORTED_UMP: ret = kbase_mmu_update_pages(kctx, reg->start_pfn, kbase_get_gpu_phy_pages(reg), - reg->gpu_alloc->nents, reg->flags); + reg->gpu_alloc->nents, reg->flags); break; #endif #ifdef CONFIG_DMA_SHARED_BUFFER @@ -903,6 +873,7 @@ static struct kbase_va_region *kbase_mem_from_umm(struct kbase_context *kctx, invalid_flags: kbase_mem_phy_alloc_put(reg->gpu_alloc); + kbase_mem_phy_alloc_put(reg->cpu_alloc); no_alloc_obj: kfree(reg); no_region: @@ -1010,6 +981,7 @@ static struct kbase_va_region *kbase_mem_from_user_buffer( user_buf->address = address; user_buf->nr_pages = *va_pages; user_buf->mm = current->mm; + atomic_inc(¤t->mm->mm_count); user_buf->pages = kmalloc_array(*va_pages, sizeof(struct page *), GFP_KERNEL); @@ -1047,8 +1019,6 @@ static struct kbase_va_region *kbase_mem_from_user_buffer( if (faulted_pages != *va_pages) goto fault_mismatch; - atomic_inc(¤t->mm->mm_count); - reg->gpu_alloc->nents = 0; reg->extent = 0; @@ -1095,7 +1065,6 @@ static struct kbase_va_region *kbase_mem_from_user_buffer( for (i = 0; i < faulted_pages; i++) put_page(pages[i]); } - kfree(user_buf->pages); no_page_array: invalid_flags: kbase_mem_phy_alloc_put(reg->cpu_alloc); @@ -1462,7 +1431,7 @@ int kbase_mem_grow_gpu_mapping(struct kbase_context *kctx, return ret; } -static void kbase_mem_shrink_cpu_mapping(struct kbase_context *kctx, +void kbase_mem_shrink_cpu_mapping(struct kbase_context *kctx, struct kbase_va_region *reg, u64 new_pages, u64 old_pages) { @@ -1477,7 +1446,7 @@ static void kbase_mem_shrink_cpu_mapping(struct kbase_context *kctx, (old_pages - new_pages)<> PAGE_SHIFT)) +#endif + +/** + * as_phys_addr_t - Retrieve the physical address from tagged address by + * masking the lower order 12 bits. + * @t: tagged address to be translated. + * + * Return: physical address corresponding to tagged address. + */ static inline phys_addr_t as_phys_addr_t(struct tagged_addr t) { return t.tagged_addr & PAGE_MASK; } +/** + * as_tagged - Convert the physical address to tagged address type though + * there is no tag info present, the lower order 12 bits will be 0 + * @phys: physical address to be converted to tagged type + * + * This is used for 4KB physical pages allocated by the Driver or imported pages + * and is needed as physical pages tracking object stores the reference for + * physical pages using tagged address type in lieu of the type generally used + * for physical addresses. + * + * Return: address of tagged address type. + */ static inline struct tagged_addr as_tagged(phys_addr_t phys) { struct tagged_addr t; @@ -66,6 +94,16 @@ static inline struct tagged_addr as_tagged(phys_addr_t phys) return t; } +/** + * as_tagged_tag - Form the tagged address by storing the tag or metadata in the + * lower order 12 bits of physial address + * @phys: physical address to be converted to tagged address + * @tag: tag to be stored along with the physical address. + * + * The tag info is used while freeing up the pages + * + * Return: tagged address storing physical address & tag. + */ static inline struct tagged_addr as_tagged_tag(phys_addr_t phys, int tag) { struct tagged_addr t; @@ -74,11 +112,26 @@ static inline struct tagged_addr as_tagged_tag(phys_addr_t phys, int tag) return t; } +/** + * is_huge - Check if the physical page is one of the 512 4KB pages of the + * large page which was not split to be used partially + * @t: tagged address storing the tag in the lower order bits. + * + * Return: true if page belongs to large page, or false + */ static inline bool is_huge(struct tagged_addr t) { return t.tagged_addr & HUGE_PAGE; } +/** + * is_huge_head - Check if the physical page is the first 4KB page of the + * 512 4KB pages within a large page which was not split + * to be used partially + * @t: tagged address storing the tag in the lower order bits. + * + * Return: true if page is the first page of a large page, or false + */ static inline bool is_huge_head(struct tagged_addr t) { int mask = HUGE_HEAD | HUGE_PAGE; @@ -86,6 +139,14 @@ static inline bool is_huge_head(struct tagged_addr t) return mask == (t.tagged_addr & mask); } +/** + * is_partial - Check if the physical page is one of the 512 pages of the + * large page which was split in 4KB pages to be used + * partially for allocations >= 2 MB in size. + * @t: tagged address storing the tag in the lower order bits. + * + * Return: true if page was taken from large page used partially, or false + */ static inline bool is_partial(struct tagged_addr t) { return t.tagged_addr & FROM_PARTIAL; diff --git a/drivers/gpu/arm/bifrost/mali_kbase_mem_pool.c b/drivers/gpu/arm/bifrost/mali_kbase_mem_pool.c index 574f1d51cccf..1255df0fc1ae 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_mem_pool.c +++ b/drivers/gpu/arm/bifrost/mali_kbase_mem_pool.c @@ -39,16 +39,6 @@ #define NOT_DIRTY false #define NOT_RECLAIMED false -static inline void kbase_mem_pool_lock(struct kbase_mem_pool *pool) -{ - spin_lock(&pool->pool_lock); -} - -static inline void kbase_mem_pool_unlock(struct kbase_mem_pool *pool) -{ - spin_unlock(&pool->pool_lock); -} - static size_t kbase_mem_pool_capacity(struct kbase_mem_pool *pool) { ssize_t max_size = kbase_mem_pool_max_size(pool); @@ -177,12 +167,6 @@ struct page *kbase_mem_alloc_page(struct kbase_mem_pool *pool) gfp = GFP_HIGHUSER | __GFP_ZERO; #endif - if (current->flags & PF_KTHREAD) { - /* Don't trigger OOM killer from kernel threads, e.g. when - * growing memory on GPU page fault */ - gfp |= __GFP_NORETRY; - } - /* don't warn on higer order failures */ if (pool->order) gfp |= __GFP_NOWARN; @@ -255,12 +239,33 @@ int kbase_mem_pool_grow(struct kbase_mem_pool *pool, struct page *p; size_t i; + kbase_mem_pool_lock(pool); + + pool->dont_reclaim = true; for (i = 0; i < nr_to_grow; i++) { - p = kbase_mem_alloc_page(pool); - if (!p) + if (pool->dying) { + pool->dont_reclaim = false; + kbase_mem_pool_shrink_locked(pool, nr_to_grow); + kbase_mem_pool_unlock(pool); + return -ENOMEM; - kbase_mem_pool_add(pool, p); + } + kbase_mem_pool_unlock(pool); + + p = kbase_mem_alloc_page(pool); + if (!p) { + kbase_mem_pool_lock(pool); + pool->dont_reclaim = false; + kbase_mem_pool_unlock(pool); + + return -ENOMEM; + } + + kbase_mem_pool_lock(pool); + kbase_mem_pool_add_locked(pool, p); } + pool->dont_reclaim = false; + kbase_mem_pool_unlock(pool); return 0; } @@ -312,10 +317,19 @@ static unsigned long kbase_mem_pool_reclaim_count_objects(struct shrinker *s, struct shrink_control *sc) { struct kbase_mem_pool *pool; + size_t pool_size; pool = container_of(s, struct kbase_mem_pool, reclaim); - pool_dbg(pool, "reclaim count: %zu\n", kbase_mem_pool_size(pool)); - return kbase_mem_pool_size(pool); + + kbase_mem_pool_lock(pool); + if (pool->dont_reclaim && !pool->dying) { + kbase_mem_pool_unlock(pool); + return 0; + } + pool_size = kbase_mem_pool_size(pool); + kbase_mem_pool_unlock(pool); + + return pool_size; } static unsigned long kbase_mem_pool_reclaim_scan_objects(struct shrinker *s, @@ -326,9 +340,17 @@ static unsigned long kbase_mem_pool_reclaim_scan_objects(struct shrinker *s, pool = container_of(s, struct kbase_mem_pool, reclaim); + kbase_mem_pool_lock(pool); + if (pool->dont_reclaim && !pool->dying) { + kbase_mem_pool_unlock(pool); + return 0; + } + pool_dbg(pool, "reclaim scan %ld:\n", sc->nr_to_scan); - freed = kbase_mem_pool_shrink(pool, sc->nr_to_scan); + freed = kbase_mem_pool_shrink_locked(pool, sc->nr_to_scan); + + kbase_mem_pool_unlock(pool); pool_dbg(pool, "reclaim freed %ld pages\n", freed); @@ -357,6 +379,7 @@ int kbase_mem_pool_init(struct kbase_mem_pool *pool, pool->order = order; pool->kbdev = kbdev; pool->next_pool = next_pool; + pool->dying = false; spin_lock_init(&pool->pool_lock); INIT_LIST_HEAD(&pool->page_list); @@ -381,6 +404,13 @@ int kbase_mem_pool_init(struct kbase_mem_pool *pool, return 0; } +void kbase_mem_pool_mark_dying(struct kbase_mem_pool *pool) +{ + kbase_mem_pool_lock(pool); + pool->dying = true; + kbase_mem_pool_unlock(pool); +} + void kbase_mem_pool_term(struct kbase_mem_pool *pool) { struct kbase_mem_pool *next_pool = pool->next_pool; @@ -444,6 +474,21 @@ struct page *kbase_mem_pool_alloc(struct kbase_mem_pool *pool) return NULL; } +struct page *kbase_mem_pool_alloc_locked(struct kbase_mem_pool *pool) +{ + struct page *p; + + lockdep_assert_held(&pool->pool_lock); + + pool_dbg(pool, "alloc_locked()\n"); + p = kbase_mem_pool_remove_locked(pool); + + if (p) + return p; + + return NULL; +} + void kbase_mem_pool_free(struct kbase_mem_pool *pool, struct page *p, bool dirty) { @@ -466,6 +511,25 @@ void kbase_mem_pool_free(struct kbase_mem_pool *pool, struct page *p, } } +void kbase_mem_pool_free_locked(struct kbase_mem_pool *pool, struct page *p, + bool dirty) +{ + pool_dbg(pool, "free_locked()\n"); + + lockdep_assert_held(&pool->pool_lock); + + if (!kbase_mem_pool_is_full(pool)) { + /* Add to our own pool */ + if (dirty) + kbase_mem_pool_sync_page(pool, p); + + kbase_mem_pool_add_locked(pool, p); + } else { + /* Free page */ + kbase_mem_pool_free_page(pool, p); + } +} + int kbase_mem_pool_alloc_pages(struct kbase_mem_pool *pool, size_t nr_4k_pages, struct tagged_addr *pages, bool partial_allowed) { @@ -543,7 +607,6 @@ int kbase_mem_pool_alloc_pages(struct kbase_mem_pool *pool, size_t nr_4k_pages, done: pool_dbg(pool, "alloc_pages(%zu) done\n", i); - return i; err_rollback: @@ -551,6 +614,49 @@ int kbase_mem_pool_alloc_pages(struct kbase_mem_pool *pool, size_t nr_4k_pages, return err; } +int kbase_mem_pool_alloc_pages_locked(struct kbase_mem_pool *pool, + size_t nr_4k_pages, struct tagged_addr *pages) +{ + struct page *p; + size_t i; + size_t nr_pages_internal; + + lockdep_assert_held(&pool->pool_lock); + + nr_pages_internal = nr_4k_pages / (1u << (pool->order)); + + if (nr_pages_internal * (1u << pool->order) != nr_4k_pages) + return -EINVAL; + + pool_dbg(pool, "alloc_pages_locked(4k=%zu):\n", nr_4k_pages); + pool_dbg(pool, "alloc_pages_locked(internal=%zu):\n", + nr_pages_internal); + + if (kbase_mem_pool_size(pool) < nr_pages_internal) { + pool_dbg(pool, "Failed alloc\n"); + return -ENOMEM; + } + + for (i = 0; i < nr_pages_internal; i++) { + int j; + + p = kbase_mem_pool_remove_locked(pool); + if (pool->order) { + *pages++ = as_tagged_tag(page_to_phys(p), + HUGE_HEAD | HUGE_PAGE); + for (j = 1; j < (1u << pool->order); j++) { + *pages++ = as_tagged_tag(page_to_phys(p) + + PAGE_SIZE * j, + HUGE_PAGE); + } + } else { + *pages++ = as_tagged(page_to_phys(p)); + } + } + + return nr_4k_pages; +} + static void kbase_mem_pool_add_array(struct kbase_mem_pool *pool, size_t nr_pages, struct tagged_addr *pages, bool zero, bool sync) @@ -591,6 +697,48 @@ static void kbase_mem_pool_add_array(struct kbase_mem_pool *pool, nr_pages, nr_to_pool); } +static void kbase_mem_pool_add_array_locked(struct kbase_mem_pool *pool, + size_t nr_pages, struct tagged_addr *pages, + bool zero, bool sync) +{ + struct page *p; + size_t nr_to_pool = 0; + LIST_HEAD(new_page_list); + size_t i; + + lockdep_assert_held(&pool->pool_lock); + + if (!nr_pages) + return; + + pool_dbg(pool, "add_array_locked(%zu, zero=%d, sync=%d):\n", + nr_pages, zero, sync); + + /* Zero/sync pages first */ + for (i = 0; i < nr_pages; i++) { + if (unlikely(!as_phys_addr_t(pages[i]))) + continue; + + if (is_huge_head(pages[i]) || !is_huge(pages[i])) { + p = phys_to_page(as_phys_addr_t(pages[i])); + if (zero) + kbase_mem_pool_zero_page(pool, p); + else if (sync) + kbase_mem_pool_sync_page(pool, p); + + list_add(&p->lru, &new_page_list); + nr_to_pool++; + } + pages[i] = as_tagged(0); + } + + /* Add new page list to pool */ + kbase_mem_pool_add_list_locked(pool, &new_page_list, nr_to_pool); + + pool_dbg(pool, "add_array_locked(%zu) added %zu pages\n", + nr_pages, nr_to_pool); +} + void kbase_mem_pool_free_pages(struct kbase_mem_pool *pool, size_t nr_pages, struct tagged_addr *pages, bool dirty, bool reclaimed) { @@ -640,3 +788,47 @@ void kbase_mem_pool_free_pages(struct kbase_mem_pool *pool, size_t nr_pages, pool_dbg(pool, "free_pages(%zu) done\n", nr_pages); } + + +void kbase_mem_pool_free_pages_locked(struct kbase_mem_pool *pool, + size_t nr_pages, struct tagged_addr *pages, bool dirty, + bool reclaimed) +{ + struct page *p; + size_t nr_to_pool; + LIST_HEAD(to_pool_list); + size_t i = 0; + + lockdep_assert_held(&pool->pool_lock); + + pool_dbg(pool, "free_pages_locked(%zu):\n", nr_pages); + + if (!reclaimed) { + /* Add to this pool */ + nr_to_pool = kbase_mem_pool_capacity(pool); + nr_to_pool = min(nr_pages, nr_to_pool); + + kbase_mem_pool_add_array_locked(pool, nr_pages, pages, false, + dirty); + + i += nr_to_pool; + } + + /* Free any remaining pages to kernel */ + for (; i < nr_pages; i++) { + if (unlikely(!as_phys_addr_t(pages[i]))) + continue; + + if (is_huge(pages[i]) && !is_huge_head(pages[i])) { + pages[i] = as_tagged(0); + continue; + } + + p = phys_to_page(as_phys_addr_t(pages[i])); + + kbase_mem_pool_free_page(pool, p); + pages[i] = as_tagged(0); + } + + pool_dbg(pool, "free_pages_locked(%zu) done\n", nr_pages); +} diff --git a/drivers/gpu/arm/bifrost/mali_kbase_mem_profile_debugfs_buf_size.h b/drivers/gpu/arm/bifrost/mali_kbase_mem_profile_debugfs_buf_size.h index cb968f65fc5c..7f44d81e34e2 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_mem_profile_debugfs_buf_size.h +++ b/drivers/gpu/arm/bifrost/mali_kbase_mem_profile_debugfs_buf_size.h @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2014 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014, 2018 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -32,7 +32,8 @@ * The size of the buffer to accumulate the histogram report text in * @see @ref CCTXP_HIST_BUF_SIZE_MAX_LENGTH_REPORT */ -#define KBASE_MEM_PROFILE_MAX_BUF_SIZE ((size_t) (64 + ((80 + (56 * 64)) * 15) + 56)) +#define KBASE_MEM_PROFILE_MAX_BUF_SIZE \ + ((size_t) (64 + ((80 + (56 * 64)) * 31) + 56)) #endif /*_KBASE_MEM_PROFILE_DEBUGFS_BUF_SIZE_H_*/ diff --git a/drivers/gpu/arm/bifrost/mali_kbase_mmu.c b/drivers/gpu/arm/bifrost/mali_kbase_mmu.c index f8979d9841da..4abab34cf155 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_mmu.c +++ b/drivers/gpu/arm/bifrost/mali_kbase_mmu.c @@ -224,23 +224,15 @@ static void kbase_gpu_mmu_handle_write_fault(struct kbase_context *kctx, return; } - /* Capture handle and offset of the faulting write location + /* Capture addresses of faulting write location * for job dumping if write tracking is enabled. */ if (kctx->gwt_enabled) { u64 page_addr = faulting_as->fault_addr & PAGE_MASK; - u64 offset = (page_addr >> PAGE_SHIFT) - region->start_pfn; - u64 handle = region->start_pfn << PAGE_SHIFT; bool found = false; - - if (KBASE_MEM_TYPE_IMPORTED_UMM == region->cpu_alloc->type) - handle |= BIT(0); - /* Check if this write was already handled. */ list_for_each_entry(pos, &kctx->gwt_current_list, link) { - if (handle == pos->handle && - offset >= pos->offset && - offset < pos->offset + pos->num_pages) { + if (page_addr == pos->page_addr) { found = true; break; } @@ -249,8 +241,8 @@ static void kbase_gpu_mmu_handle_write_fault(struct kbase_context *kctx, if (!found) { pos = kmalloc(sizeof(*pos), GFP_KERNEL); if (pos) { - pos->handle = handle; - pos->offset = offset; + pos->region = region; + pos->page_addr = page_addr; pos->num_pages = 1; list_add(&pos->link, &kctx->gwt_current_list); } else { @@ -318,6 +310,10 @@ void page_fault_worker(struct work_struct *data) struct kbase_va_region *region; int err; bool grown = false; + size_t min_pool_size; + struct kbase_mem_pool *pool; + int pages_to_grow; + struct tagged_addr *gpu_pages, *cpu_pages; faulting_as = container_of(data, struct kbase_as, work_pagefault); fault_pfn = faulting_as->fault_addr >> PAGE_SHIFT; @@ -336,8 +332,7 @@ void page_fault_worker(struct work_struct *data) KBASE_DEBUG_ASSERT(kctx->kbdev == kbdev); - if (unlikely(faulting_as->protected_mode)) - { + if (unlikely(faulting_as->protected_mode)) { kbase_mmu_report_fault_and_kill(kctx, faulting_as, "Protected mode fault"); kbase_mmu_hw_clear_fault(kbdev, faulting_as, kctx, @@ -403,6 +398,7 @@ void page_fault_worker(struct work_struct *data) goto fault_done; } +page_fault_retry: /* so we have a translation fault, let's see if it is for growable * memory */ kbase_gpu_vm_lock(kctx); @@ -496,20 +492,59 @@ void page_fault_worker(struct work_struct *data) goto fault_done; } - if (kbase_alloc_phy_pages_helper(region->gpu_alloc, new_pages) == 0) { - if (region->gpu_alloc != region->cpu_alloc) { - if (kbase_alloc_phy_pages_helper( - region->cpu_alloc, new_pages) == 0) { - grown = true; - } else { - kbase_free_phy_pages_helper(region->gpu_alloc, - new_pages); - } - } else { - grown = true; - } +#ifdef CONFIG_MALI_2MB_ALLOC + if (new_pages >= (SZ_2M / SZ_4K)) { + pool = &kctx->lp_mem_pool; + /* Round up to number of 2 MB pages required */ + min_pool_size = new_pages + ((SZ_2M / SZ_4K) - 1); + min_pool_size /= (SZ_2M / SZ_4K); + } else { +#endif + pool = &kctx->mem_pool; + min_pool_size = new_pages; +#ifdef CONFIG_MALI_2MB_ALLOC } +#endif + if (region->gpu_alloc != region->cpu_alloc) + min_pool_size *= 2; + + pages_to_grow = 0; + + mutex_lock(&kctx->mem_partials_lock); + kbase_mem_pool_lock(pool); + /* We can not allocate memory from the kernel with the vm_lock held, so + * check that there is enough memory in the pool. If not then calculate + * how much it has to grow by, grow the pool when the vm_lock is + * dropped, and retry the allocation. + */ + if (kbase_mem_pool_size(pool) >= min_pool_size) { + gpu_pages = kbase_alloc_phy_pages_helper_locked( + region->gpu_alloc, pool, new_pages); + + if (gpu_pages) { + if (region->gpu_alloc != region->cpu_alloc) { + cpu_pages = kbase_alloc_phy_pages_helper_locked( + region->cpu_alloc, pool, + new_pages); + + if (cpu_pages) { + grown = true; + } else { + kbase_free_phy_pages_helper_locked( + region->gpu_alloc, + pool, gpu_pages, + new_pages); + } + } else { + grown = true; + } + } + } else { + pages_to_grow = min_pool_size - kbase_mem_pool_size(pool); + } + kbase_mem_pool_unlock(pool); + mutex_unlock(&kctx->mem_partials_lock); if (grown) { u64 pfn_offset; @@ -587,12 +622,13 @@ void page_fault_worker(struct work_struct *data) pos = kmalloc(sizeof(*pos), GFP_KERNEL); if (pos) { - pos->handle = region->start_pfn << PAGE_SHIFT; - pos->offset = pfn_offset; + pos->region = region; + pos->page_addr = (region->start_pfn + + pfn_offset) << + PAGE_SHIFT; pos->num_pages = new_pages; list_add(&pos->link, &kctx->gwt_current_list); - } else { dev_warn(kbdev->dev, "kmalloc failure"); } @@ -600,10 +636,23 @@ void page_fault_worker(struct work_struct *data) #endif kbase_gpu_vm_unlock(kctx); } else { - /* failed to extend, handle as a normal PF */ + int ret = -ENOMEM; + kbase_gpu_vm_unlock(kctx); - kbase_mmu_report_fault_and_kill(kctx, faulting_as, - "Page allocation failure"); + + /* If the memory pool was insufficient then grow it and retry. + * Otherwise fail the allocation. + */ + if (pages_to_grow > 0) + ret = kbase_mem_pool_grow(pool, pages_to_grow); + + if (ret < 0) { + /* failed to extend, handle as a normal PF */ + kbase_mmu_report_fault_and_kill(kctx, faulting_as, + "Page allocation failure"); + } else { + goto page_fault_retry; + } } fault_done: @@ -1767,8 +1816,7 @@ void bus_fault_worker(struct work_struct *data) return; } - if (unlikely(faulting_as->protected_mode)) - { + if (unlikely(faulting_as->protected_mode)) { kbase_mmu_report_fault_and_kill(kctx, faulting_as, "Permission failure"); kbase_mmu_hw_clear_fault(kbdev, faulting_as, kctx, diff --git a/drivers/gpu/arm/bifrost/mali_kbase_mmu_mode_aarch64.c b/drivers/gpu/arm/bifrost/mali_kbase_mmu_mode_aarch64.c index 4bb2628c9251..aa0c4038b563 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_mmu_mode_aarch64.c +++ b/drivers/gpu/arm/bifrost/mali_kbase_mmu_mode_aarch64.c @@ -21,7 +21,6 @@ */ - #include "mali_kbase.h" #include "mali_midg_regmap.h" #include "mali_kbase_defs.h" @@ -48,29 +47,25 @@ */ static inline void page_table_entry_set(u64 *pte, u64 phy) { +#if KERNEL_VERSION(3, 18, 13) <= LINUX_VERSION_CODE + WRITE_ONCE(*pte, phy); +#else #ifdef CONFIG_64BIT + barrier(); *pte = phy; + barrier(); #elif defined(CONFIG_ARM) - /* - * In order to prevent the compiler keeping cached copies of - * memory, we have to explicitly say that we have updated memory. - * - * Note: We could manually move the data ourselves into R0 and - * R1 by specifying register variables that are explicitly - * given registers assignments, the down side of this is that - * we have to assume cpu endianness. To avoid this we can use - * the ldrd to read the data from memory into R0 and R1 which - * will respect the cpu endianness, we then use strd to make - * the 64 bit assignment to the page table entry. - */ - asm volatile("ldrd r0, r1, [%[ptemp]]\n\t" - "strd r0, r1, [%[pte]]\n\t" - : "=m" (*pte) - : [ptemp] "r" (&phy), [pte] "r" (pte), "m" (phy) - : "r0", "r1"); + barrier(); + asm volatile("ldrd r0, [%1]\n\t" + "strd r0, %0\n\t" + : "=m" (*pte) + : "r" (&phy) + : "r0", "r1"); + barrier(); #else #error "64-bit atomic write must be implemented for your architecture" #endif +#endif } static void mmu_get_as_setup(struct kbase_context *kctx, diff --git a/drivers/gpu/arm/bifrost/mali_kbase_mmu_mode_lpae.c b/drivers/gpu/arm/bifrost/mali_kbase_mmu_mode_lpae.c index bc8da6348772..7dc38fcb792b 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_mmu_mode_lpae.c +++ b/drivers/gpu/arm/bifrost/mali_kbase_mmu_mode_lpae.c @@ -21,7 +21,6 @@ */ - #include "mali_kbase.h" #include "mali_midg_regmap.h" #include "mali_kbase_defs.h" @@ -46,30 +45,25 @@ */ static inline void page_table_entry_set(u64 *pte, u64 phy) { +#if KERNEL_VERSION(3, 18, 13) <= LINUX_VERSION_CODE + WRITE_ONCE(*pte, phy); +#else #ifdef CONFIG_64BIT + barrier(); *pte = phy; + barrier(); #elif defined(CONFIG_ARM) - /* - * In order to prevent the compiler keeping cached copies of - * memory, we have to explicitly say that we have updated - * memory. - * - * Note: We could manually move the data ourselves into R0 and - * R1 by specifying register variables that are explicitly - * given registers assignments, the down side of this is that - * we have to assume cpu endianness. To avoid this we can use - * the ldrd to read the data from memory into R0 and R1 which - * will respect the cpu endianness, we then use strd to make - * the 64 bit assignment to the page table entry. - */ - asm volatile("ldrd r0, r1, [%[ptemp]]\n\t" - "strd r0, r1, [%[pte]]\n\t" - : "=m" (*pte) - : [ptemp] "r" (&phy), [pte] "r" (pte), "m" (phy) - : "r0", "r1"); + barrier(); + asm volatile("ldrd r0, [%1]\n\t" + "strd r0, %0\n\t" + : "=m" (*pte) + : "r" (&phy) + : "r0", "r1"); + barrier(); #else #error "64-bit atomic write must be implemented for your architecture" #endif +#endif } static void mmu_get_as_setup(struct kbase_context *kctx, diff --git a/drivers/gpu/arm/bifrost/mali_kbase_pm.c b/drivers/gpu/arm/bifrost/mali_kbase_pm.c index da56f0af2f86..c226350ff88c 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_pm.c +++ b/drivers/gpu/arm/bifrost/mali_kbase_pm.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010-2018 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -94,10 +94,15 @@ int kbase_pm_context_active_handle_suspend(struct kbase_device *kbdev, enum kbas if (old_count == 0) kbase_timeline_pm_handle_event(kbdev, KBASE_TIMELINE_PM_EVENT_GPU_ACTIVE); - if (c == 1) + if (c == 1) { /* First context active: Power on the GPU and any cores requested by * the policy */ kbase_hwaccess_pm_gpu_active(kbdev); + } +#if defined(CONFIG_DEVFREQ_THERMAL) && defined(CONFIG_MALI_BIFROST_DEVFREQ) + if (kbdev->ipa.gpu_active_callback) + kbdev->ipa.gpu_active_callback(kbdev->ipa.model_data); +#endif mutex_unlock(&kbdev->pm.lock); mutex_unlock(&js_devdata->runpool_mutex); @@ -146,6 +151,21 @@ void kbase_pm_context_idle(struct kbase_device *kbdev) wake_up(&kbdev->pm.zero_active_count_wait); } +#if defined(CONFIG_DEVFREQ_THERMAL) && defined(CONFIG_MALI_BIFROST_DEVFREQ) + /* IPA may be using vinstr, in which case there may be one PM reference + * still held when all other contexts have left the GPU. Inform IPA that + * the GPU is now idle so that vinstr can drop it's reference. + * + * If the GPU was only briefly active then it might have gone idle + * before vinstr has taken a PM reference, meaning that active_count is + * zero. We still need to inform IPA in this case, so that vinstr can + * drop the PM reference and avoid keeping the GPU powered + * unnecessarily. + */ + if (c <= 1 && kbdev->ipa.gpu_idle_callback) + kbdev->ipa.gpu_idle_callback(kbdev->ipa.model_data); +#endif + mutex_unlock(&kbdev->pm.lock); mutex_unlock(&js_devdata->runpool_mutex); } diff --git a/drivers/gpu/arm/bifrost/mali_kbase_replay.c b/drivers/gpu/arm/bifrost/mali_kbase_replay.c index ddef76c30f82..6929af9c5c94 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_replay.c +++ b/drivers/gpu/arm/bifrost/mali_kbase_replay.c @@ -664,8 +664,8 @@ static void kbasep_replay_create_atom(struct kbase_context *kctx, atom->prio = prio; atom->atom_number = atom_nr; - base_jd_atom_dep_set(&atom->pre_dep[0], 0 , BASE_JD_DEP_TYPE_INVALID); - base_jd_atom_dep_set(&atom->pre_dep[1], 0 , BASE_JD_DEP_TYPE_INVALID); + base_jd_atom_dep_set(&atom->pre_dep[0], 0, BASE_JD_DEP_TYPE_INVALID); + base_jd_atom_dep_set(&atom->pre_dep[1], 0, BASE_JD_DEP_TYPE_INVALID); atom->udata.blob[0] = 0; atom->udata.blob[1] = 0; @@ -713,7 +713,8 @@ static int kbasep_replay_create_atoms(struct kbase_context *kctx, kbasep_replay_create_atom(kctx, t_atom, t_atom_nr, prio); kbasep_replay_create_atom(kctx, f_atom, f_atom_nr, prio); - base_jd_atom_dep_set(&f_atom->pre_dep[0], t_atom_nr , BASE_JD_DEP_TYPE_DATA); + base_jd_atom_dep_set(&f_atom->pre_dep[0], t_atom_nr, + BASE_JD_DEP_TYPE_DATA); return 0; } diff --git a/drivers/gpu/arm/bifrost/mali_kbase_softjobs.c b/drivers/gpu/arm/bifrost/mali_kbase_softjobs.c index 0c20f66f0137..83b83fe7533d 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_softjobs.c +++ b/drivers/gpu/arm/bifrost/mali_kbase_softjobs.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2011-2017 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2011-2018 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -375,12 +375,12 @@ static void kbase_fence_debug_timeout(struct kbase_jd_atom *katom) } #endif /* CONFIG_MALI_BIFROST_FENCE_DEBUG */ -void kbasep_soft_job_timeout_worker(unsigned long data) +void kbasep_soft_job_timeout_worker(struct timer_list *timer) { - struct kbase_context *kctx = (struct kbase_context *)data; + struct kbase_context *kctx = container_of(timer, struct kbase_context, + soft_job_timeout); u32 timeout_ms = (u32)atomic_read( &kctx->kbdev->js_data.soft_job_timeout_ms); - struct timer_list *timer = &kctx->soft_job_timeout; ktime_t cur_time = ktime_get(); bool restarting = false; unsigned long lflags; @@ -771,9 +771,11 @@ static int kbase_mem_copy_from_extres(struct kbase_context *kctx, u64 offset = buf_data->offset; size_t extres_size = buf_data->nr_extres_pages*PAGE_SIZE; size_t to_copy = min(extres_size, buf_data->size); - size_t dma_to_copy; struct kbase_mem_phy_alloc *gpu_alloc = buf_data->gpu_alloc; int ret = 0; +#ifdef CONFIG_DMA_SHARED_BUFFER + size_t dma_to_copy; +#endif KBASE_DEBUG_ASSERT(pages != NULL); @@ -915,6 +917,33 @@ static int kbase_jit_allocate_prepare(struct kbase_jd_atom *katom) goto free_info; } + if (kctx->jit_version == 1) { + /* Old JIT didn't have usage_id, max_allocations, bin_id + * or padding, so force them to zero + */ + info->usage_id = 0; + info->max_allocations = 0; + info->bin_id = 0; + info->flags = 0; + memset(info->padding, 0, sizeof(info->padding)); + } else { + int i; + + /* Check padding is all zeroed */ + for (i = 0; i < sizeof(info->padding); i++) { + if (info->padding[i] != 0) { + ret = -EINVAL; + goto free_info; + } + } + + /* No bit other than TILER_ALIGN_TOP shall be set */ + if (info->flags & ~BASE_JIT_ALLOC_MEM_TILER_ALIGN_TOP) { + ret = -EINVAL; + goto free_info; + } + } + katom->softjob_data = info; katom->jit_blocked = false; @@ -1271,6 +1300,8 @@ static void kbase_ext_res_finish(struct kbase_jd_atom *katom) int kbase_process_soft_job(struct kbase_jd_atom *katom) { + KBASE_TLSTREAM_TL_EVENT_ATOM_SOFTJOB_START(katom); + switch (katom->core_req & BASE_JD_REQ_SOFT_JOB_TYPE) { case BASE_JD_REQ_SOFT_DUMP_CPU_GPU_TIME: return kbase_dump_cpu_gpu_time(katom); @@ -1434,6 +1465,7 @@ int kbase_prepare_soft_job(struct kbase_jd_atom *katom) void kbase_finish_soft_job(struct kbase_jd_atom *katom) { + KBASE_TLSTREAM_TL_EVENT_ATOM_SOFTJOB_END(katom); switch (katom->core_req & BASE_JD_REQ_SOFT_JOB_TYPE) { case BASE_JD_REQ_SOFT_DUMP_CPU_GPU_TIME: /* Nothing to do */ diff --git a/drivers/gpu/arm/bifrost/mali_kbase_sync_file.c b/drivers/gpu/arm/bifrost/mali_kbase_sync_file.c index 0fcb5078d782..349a33ebafe2 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_sync_file.c +++ b/drivers/gpu/arm/bifrost/mali_kbase_sync_file.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2012-2017 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2012-2018 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -166,7 +166,9 @@ static void kbase_fence_wait_callback(struct dma_fence *fence, struct kbase_context *kctx = katom->kctx; /* Cancel atom if fence is erroneous */ -#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 11, 0)) +#if (KERNEL_VERSION(4, 11, 0) <= LINUX_VERSION_CODE || \ + (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE && \ + KERNEL_VERSION(4, 9, 68) <= LINUX_VERSION_CODE)) if (dma_fence_is_signaled(kcb->fence) && kcb->fence->error) #else if (dma_fence_is_signaled(kcb->fence) && kcb->fence->status < 0) @@ -282,7 +284,9 @@ static void kbase_sync_fence_info_get(struct dma_fence *fence, * 1 : signaled */ if (dma_fence_is_signaled(fence)) { -#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 11, 0)) +#if (KERNEL_VERSION(4, 11, 0) <= LINUX_VERSION_CODE || \ + (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE && \ + KERNEL_VERSION(4, 9, 68) <= LINUX_VERSION_CODE)) int status = fence->error; #else int status = fence->status; diff --git a/drivers/gpu/arm/bifrost/mali_kbase_tlstream.c b/drivers/gpu/arm/bifrost/mali_kbase_tlstream.c index 926d6b631469..2ff45f50bf16 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_tlstream.c +++ b/drivers/gpu/arm/bifrost/mali_kbase_tlstream.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2015-2017 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2015-2018 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -155,6 +155,8 @@ enum tl_msg_id_obj { KBASE_TL_EVENT_LPU_SOFTSTOP, KBASE_TL_EVENT_ATOM_SOFTSTOP_EX, KBASE_TL_EVENT_ATOM_SOFTSTOP_ISSUE, + KBASE_TL_EVENT_ATOM_SOFTJOB_START, + KBASE_TL_EVENT_ATOM_SOFTJOB_END, /* Job dump specific events. */ KBASE_JD_GPU_SOFT_RESET @@ -499,6 +501,20 @@ static const struct tp_desc tp_desc_obj[] = { "@p", "atom" }, + { + KBASE_TL_EVENT_ATOM_SOFTJOB_START, + __stringify(KBASE_TL_EVENT_ATOM_SOFTJOB_START), + "atom soft job has started", + "@p", + "atom" + }, + { + KBASE_TL_EVENT_ATOM_SOFTJOB_END, + __stringify(KBASE_TL_EVENT_ATOM_SOFTJOB_END), + "atom soft job has completed", + "@p", + "atom" + }, { KBASE_JD_GPU_SOFT_RESET, __stringify(KBASE_JD_GPU_SOFT_RESET), @@ -1042,17 +1058,17 @@ static void kbasep_tlstream_flush_stream(enum tl_stream_type stype) /** * kbasep_tlstream_autoflush_timer_callback - autoflush timer callback - * @data: unused + * @timer: unused * * Timer is executed periodically to check if any of the stream contains * buffer ready to be submitted to user space. */ -static void kbasep_tlstream_autoflush_timer_callback(unsigned long data) +static void kbasep_tlstream_autoflush_timer_callback(struct timer_list *timer) { enum tl_stream_type stype; int rcode; - CSTD_UNUSED(data); + CSTD_UNUSED(timer); for (stype = 0; stype < TL_STREAM_TYPE_COUNT; stype++) { struct tl_stream *stream = tl_stream[stype]; @@ -1376,9 +1392,8 @@ int kbase_tlstream_init(void) /* Initialize autoflush timer. */ atomic_set(&autoflush_timer_active, 0); - setup_timer(&autoflush_timer, - kbasep_tlstream_autoflush_timer_callback, - 0); + kbase_timer_setup(&autoflush_timer, + kbasep_tlstream_autoflush_timer_callback); return 0; } @@ -2365,6 +2380,52 @@ void __kbase_tlstream_tl_event_atom_softstop_issue(void *atom) kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags); } +void __kbase_tlstream_tl_event_atom_softjob_start(void *atom) +{ + const u32 msg_id = KBASE_TL_EVENT_ATOM_SOFTJOB_START; + const size_t msg_size = + sizeof(msg_id) + sizeof(u64) + sizeof(atom); + unsigned long flags; + char *buffer; + size_t pos = 0; + + buffer = kbasep_tlstream_msgbuf_acquire( + TL_STREAM_TYPE_OBJ, + msg_size, &flags); + KBASE_DEBUG_ASSERT(buffer); + + pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id)); + pos = kbasep_tlstream_write_timestamp(buffer, pos); + pos = kbasep_tlstream_write_bytes( + buffer, pos, &atom, sizeof(atom)); + KBASE_DEBUG_ASSERT(msg_size == pos); + + kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags); +} + +void __kbase_tlstream_tl_event_atom_softjob_end(void *atom) +{ + const u32 msg_id = KBASE_TL_EVENT_ATOM_SOFTJOB_END; + const size_t msg_size = + sizeof(msg_id) + sizeof(u64) + sizeof(atom); + unsigned long flags; + char *buffer; + size_t pos = 0; + + buffer = kbasep_tlstream_msgbuf_acquire( + TL_STREAM_TYPE_OBJ, + msg_size, &flags); + KBASE_DEBUG_ASSERT(buffer); + + pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id)); + pos = kbasep_tlstream_write_timestamp(buffer, pos); + pos = kbasep_tlstream_write_bytes( + buffer, pos, &atom, sizeof(atom)); + KBASE_DEBUG_ASSERT(msg_size == pos); + + kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags); +} + void __kbase_tlstream_jd_gpu_soft_reset(void *gpu) { const u32 msg_id = KBASE_JD_GPU_SOFT_RESET; diff --git a/drivers/gpu/arm/bifrost/mali_kbase_tlstream.h b/drivers/gpu/arm/bifrost/mali_kbase_tlstream.h index f4369014d219..bfa25d98264a 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_tlstream.h +++ b/drivers/gpu/arm/bifrost/mali_kbase_tlstream.h @@ -147,6 +147,8 @@ void __kbase_tlstream_tl_attrib_as_config( void __kbase_tlstream_tl_event_atom_softstop_ex(void *atom); void __kbase_tlstream_tl_event_lpu_softstop(void *lpu); void __kbase_tlstream_tl_event_atom_softstop_issue(void *atom); +void __kbase_tlstream_tl_event_atom_softjob_start(void *atom); +void __kbase_tlstream_tl_event_atom_softjob_end(void *atom); void __kbase_tlstream_jd_gpu_soft_reset(void *gpu); void __kbase_tlstream_aux_pm_state(u32 core_type, u64 state); void __kbase_tlstream_aux_pagefault(u32 ctx_nr, u64 page_count_change); @@ -515,26 +517,40 @@ extern atomic_t kbase_tlstream_enabled; __TRACE_IF_ENABLED(tl_attrib_as_config, as, transtab, memattr, transcfg) /** - * KBASE_TLSTREAM_TL_EVENT_ATOM_SOFTSTOP_ex + * KBASE_TLSTREAM_TL_EVENT_ATOM_SOFTSTOP_EX * @atom: atom identifier */ #define KBASE_TLSTREAM_TL_EVENT_ATOM_SOFTSTOP_EX(atom) \ __TRACE_IF_ENABLED(tl_event_atom_softstop_ex, atom) /** - * KBASE_TLSTREAM_TL_EVENT_LPU_softstop + * KBASE_TLSTREAM_TL_EVENT_LPU_SOFTSTOP * @lpu: name of the LPU object */ #define KBASE_TLSTREAM_TL_EVENT_LPU_SOFTSTOP(lpu) \ __TRACE_IF_ENABLED(tl_event_lpu_softstop, lpu) /** - * KBASE_TLSTREAM_TL_EVENT_ATOM_SOFTSTOP_issue + * KBASE_TLSTREAM_TL_EVENT_ATOM_SOFTSTOP_ISSUE * @atom: atom identifier */ #define KBASE_TLSTREAM_TL_EVENT_ATOM_SOFTSTOP_ISSUE(atom) \ __TRACE_IF_ENABLED(tl_event_atom_softstop_issue, atom) +/** + * KBASE_TLSTREAM_TL_EVENT_ATOM_SOFTJOB_START + * @atom: atom identifier + */ +#define KBASE_TLSTREAM_TL_EVENT_ATOM_SOFTJOB_START(atom) \ + __TRACE_IF_ENABLED(tl_event_atom_softjob_start, atom) + +/** + * KBASE_TLSTREAM_TL_EVENT_ATOM_SOFTJOB_END + * @atom: atom identifier + */ +#define KBASE_TLSTREAM_TL_EVENT_ATOM_SOFTJOB_END(atom) \ + __TRACE_IF_ENABLED(tl_event_atom_softjob_end, atom) + /** * KBASE_TLSTREAM_JD_GPU_SOFT_RESET - The GPU is being soft reset * @gpu: name of the GPU object diff --git a/drivers/gpu/arm/bifrost/mali_kbase_utility.h b/drivers/gpu/arm/bifrost/mali_kbase_utility.h index d36285e26a68..f2e5a3381e13 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_utility.h +++ b/drivers/gpu/arm/bifrost/mali_kbase_utility.h @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2012-2013, 2015 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2012-2013, 2015, 2018 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -39,4 +39,28 @@ */ bool kbasep_list_member_of(const struct list_head *base, struct list_head *entry); + +static inline void kbase_timer_setup(struct timer_list *timer, + void (*callback)(struct timer_list *timer)) +{ +#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 14, 0) + setup_timer(timer, (void (*)(unsigned long)) callback, + (unsigned long) timer); +#else + timer_setup(timer, callback, 0); +#endif +} + +#ifndef WRITE_ONCE + #ifdef ASSIGN_ONCE + #define WRITE_ONCE(x, val) ASSIGN_ONCE(val, x) + #else + #define WRITE_ONCE(x, val) (ACCESS_ONCE(x) = (val)) + #endif +#endif + +#ifndef READ_ONCE + #define READ_ONCE(x) ACCESS_ONCE(x) +#endif + #endif /* _KBASE_UTILITY_H */ diff --git a/drivers/gpu/arm/bifrost/mali_kbase_vinstr.c b/drivers/gpu/arm/bifrost/mali_kbase_vinstr.c index e7d33da7bc77..e25338b3544a 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_vinstr.c +++ b/drivers/gpu/arm/bifrost/mali_kbase_vinstr.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2011-2017 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2011-2018 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -82,7 +82,9 @@ enum vinstr_state { /** * struct kbase_vinstr_context - vinstr context per device - * @lock: protects the entire vinstr context + * @lock: protects the entire vinstr context, but the list of + * vinstr clients can be updated outside the lock using + * @state_lock. * @kbdev: pointer to kbase device * @kctx: pointer to kbase context * @vmap: vinstr vmap for mapping hwcnt dump buffer @@ -94,12 +96,14 @@ enum vinstr_state { * @reprogram: when true, reprogram hwcnt block with the new set of * counters * @state: vinstr state - * @state_lock: protects information about vinstr state + * @state_lock: protects information about vinstr state and list of + * clients. * @suspend_waitq: notification queue to trigger state re-validation * @suspend_cnt: reference counter of vinstr's suspend state * @suspend_work: worker to execute on entering suspended state * @resume_work: worker to execute on leaving suspended state - * @nclients: number of attached clients, pending or otherwise + * @nclients: number of attached clients, pending or idle + * @nclients_suspended: number of attached but suspended clients * @waiting_clients: head of list of clients being periodically sampled * @idle_clients: head of list of clients being idle * @suspended_clients: head of list of clients being suspended @@ -109,6 +113,10 @@ enum vinstr_state { * @clients_present: when true, we have at least one client * Note: this variable is in sync. with nclients and is * present to preserve simplicity. Protected by state_lock. + * @need_suspend: when true, a suspend has been requested while a resume is + * in progress. Resume worker should queue a suspend. + * @need_resume: when true, a resume has been requested while a suspend is + * in progress. Suspend worker should queue a resume. */ struct kbase_vinstr_context { struct mutex lock; @@ -130,6 +138,7 @@ struct kbase_vinstr_context { struct work_struct resume_work; u32 nclients; + u32 nclients_suspended; struct list_head waiting_clients; struct list_head idle_clients; struct list_head suspended_clients; @@ -139,6 +148,9 @@ struct kbase_vinstr_context { atomic_t request_pending; bool clients_present; + + bool need_suspend; + bool need_resume; }; /** @@ -161,6 +173,7 @@ struct kbase_vinstr_context { * @write_idx: index of buffer being written by dumping service * @waitq: client's notification queue * @pending: when true, client has attached but hwcnt not yet updated + * @suspended: when true, client is suspended */ struct kbase_vinstr_client { struct kbase_vinstr_context *vinstr_ctx; @@ -181,6 +194,7 @@ struct kbase_vinstr_client { atomic_t write_idx; wait_queue_head_t waitq; bool pending; + bool suspended; }; /** @@ -195,6 +209,9 @@ struct kbasep_vinstr_wake_up_timer { /*****************************************************************************/ +static void kbase_vinstr_update_suspend( + struct kbase_vinstr_context *vinstr_ctx); + static int kbasep_vinstr_service_task(void *data); static unsigned int kbasep_vinstr_hwcnt_reader_poll( @@ -226,14 +243,14 @@ static int enable_hwcnt(struct kbase_vinstr_context *vinstr_ctx) { struct kbase_context *kctx = vinstr_ctx->kctx; struct kbase_device *kbdev = kctx->kbdev; - struct kbase_uk_hwcnt_setup setup; + struct kbase_ioctl_hwcnt_enable enable; int err; - setup.dump_buffer = vinstr_ctx->gpu_va; - setup.jm_bm = vinstr_ctx->bitmap[JM_HWCNT_BM]; - setup.tiler_bm = vinstr_ctx->bitmap[TILER_HWCNT_BM]; - setup.shader_bm = vinstr_ctx->bitmap[SHADER_HWCNT_BM]; - setup.mmu_l2_bm = vinstr_ctx->bitmap[MMU_L2_HWCNT_BM]; + enable.dump_buffer = vinstr_ctx->gpu_va; + enable.jm_bm = vinstr_ctx->bitmap[JM_HWCNT_BM]; + enable.tiler_bm = vinstr_ctx->bitmap[TILER_HWCNT_BM]; + enable.shader_bm = vinstr_ctx->bitmap[SHADER_HWCNT_BM]; + enable.mmu_l2_bm = vinstr_ctx->bitmap[MMU_L2_HWCNT_BM]; /* Mark the context as active so the GPU is kept turned on */ /* A suspend won't happen here, because we're in a syscall from a @@ -242,7 +259,7 @@ static int enable_hwcnt(struct kbase_vinstr_context *vinstr_ctx) /* Schedule the context in */ kbasep_js_schedule_privileged_ctx(kbdev, kctx); - err = kbase_instr_hwcnt_enable_internal(kbdev, kctx, &setup); + err = kbase_instr_hwcnt_enable_internal(kbdev, kctx, &enable); if (err) { /* Release the context. This had its own Power Manager Active * reference */ @@ -472,18 +489,24 @@ static void kbasep_vinstr_destroy_kctx(struct kbase_vinstr_context *vinstr_ctx) struct kbasep_kctx_list_element *element; struct kbasep_kctx_list_element *tmp; bool found = false; + bool hwcnt_disabled = false; unsigned long flags; /* Release hw counters dumping resources. */ vinstr_ctx->thread = NULL; - disable_hwcnt(vinstr_ctx); - kbasep_vinstr_unmap_kernel_dump_buffer(vinstr_ctx); /* Simplify state transitions by specifying that we have no clients. */ spin_lock_irqsave(&vinstr_ctx->state_lock, flags); vinstr_ctx->clients_present = false; + if ((VINSTR_SUSPENDED == vinstr_ctx->state) || (VINSTR_RESUMING == vinstr_ctx->state)) + hwcnt_disabled = true; spin_unlock_irqrestore(&vinstr_ctx->state_lock, flags); + if (!hwcnt_disabled) + disable_hwcnt(vinstr_ctx); + + kbasep_vinstr_unmap_kernel_dump_buffer(vinstr_ctx); + /* Remove kernel context from the device's contexts list. */ mutex_lock(&kbdev->kctx_list_lock); list_for_each_entry_safe(element, tmp, &kbdev->kctx_list, link) { @@ -523,6 +546,8 @@ static struct kbase_vinstr_client *kbasep_vinstr_attach_client( { struct task_struct *thread = NULL; struct kbase_vinstr_client *cli; + unsigned long flags; + bool clients_present = false; KBASE_DEBUG_ASSERT(vinstr_ctx); @@ -548,10 +573,14 @@ static struct kbase_vinstr_client *kbasep_vinstr_attach_client( hwcnt_bitmap_union(vinstr_ctx->bitmap, cli->bitmap); vinstr_ctx->reprogram = true; + spin_lock_irqsave(&vinstr_ctx->state_lock, flags); + clients_present = (vinstr_ctx->nclients || vinstr_ctx->nclients_suspended); + spin_unlock_irqrestore(&vinstr_ctx->state_lock, flags); + /* If this is the first client, create the vinstr kbase * context. This context is permanently resident until the * last client exits. */ - if (!vinstr_ctx->nclients) { + if (!clients_present) { hwcnt_bitmap_set(vinstr_ctx->bitmap, cli->bitmap); if (kbasep_vinstr_create_kctx(vinstr_ctx) < 0) goto error; @@ -606,8 +635,11 @@ static struct kbase_vinstr_client *kbasep_vinstr_attach_client( atomic_set(&cli->write_idx, 0); init_waitqueue_head(&cli->waitq); + spin_lock_irqsave(&vinstr_ctx->state_lock, flags); vinstr_ctx->nclients++; list_add(&cli->list, &vinstr_ctx->idle_clients); + kbase_vinstr_update_suspend(vinstr_ctx); + spin_unlock_irqrestore(&vinstr_ctx->state_lock, flags); mutex_unlock(&vinstr_ctx->lock); @@ -620,7 +652,7 @@ static struct kbase_vinstr_client *kbasep_vinstr_attach_client( (unsigned long)cli->dump_buffers, get_order(cli->dump_size * cli->buffer_count)); kfree(cli->accum_buffer); - if (!vinstr_ctx->nclients && vinstr_ctx->kctx) { + if (!clients_present && vinstr_ctx->kctx) { thread = vinstr_ctx->thread; kbasep_vinstr_destroy_kctx(vinstr_ctx); } @@ -642,18 +674,19 @@ void kbase_vinstr_detach_client(struct kbase_vinstr_client *cli) struct task_struct *thread = NULL; u32 zerobitmap[4] = { 0 }; int cli_found = 0; + unsigned long flags; + bool clients_present; KBASE_DEBUG_ASSERT(cli); vinstr_ctx = cli->vinstr_ctx; KBASE_DEBUG_ASSERT(vinstr_ctx); mutex_lock(&vinstr_ctx->lock); + spin_lock_irqsave(&vinstr_ctx->state_lock, flags); list_for_each_entry_safe(iter, tmp, &vinstr_ctx->idle_clients, list) { if (iter == cli) { - vinstr_ctx->reprogram = true; cli_found = 1; - list_del(&iter->list); break; } } @@ -661,15 +694,47 @@ void kbase_vinstr_detach_client(struct kbase_vinstr_client *cli) list_for_each_entry_safe( iter, tmp, &vinstr_ctx->waiting_clients, list) { if (iter == cli) { - vinstr_ctx->reprogram = true; cli_found = 1; - list_del(&iter->list); + break; + } + } + } + if (!cli_found) { + list_for_each_entry_safe( + iter, tmp, &vinstr_ctx->suspended_clients, list) { + if (iter == cli) { + cli_found = 1; break; } } } KBASE_DEBUG_ASSERT(cli_found); + if (cli_found) { + vinstr_ctx->reprogram = true; + list_del(&iter->list); + } + + if (!cli->suspended) + vinstr_ctx->nclients--; + else + vinstr_ctx->nclients_suspended--; + + kbase_vinstr_update_suspend(vinstr_ctx); + + clients_present = (vinstr_ctx->nclients || vinstr_ctx->nclients_suspended); + + /* Rebuild context bitmap now that the client has detached */ + hwcnt_bitmap_set(vinstr_ctx->bitmap, zerobitmap); + list_for_each_entry(iter, &vinstr_ctx->idle_clients, list) + hwcnt_bitmap_union(vinstr_ctx->bitmap, iter->bitmap); + list_for_each_entry(iter, &vinstr_ctx->waiting_clients, list) + hwcnt_bitmap_union(vinstr_ctx->bitmap, iter->bitmap); + list_for_each_entry(iter, &vinstr_ctx->suspended_clients, list) + hwcnt_bitmap_union(vinstr_ctx->bitmap, iter->bitmap); + + spin_unlock_irqrestore(&vinstr_ctx->state_lock, flags); + kfree(cli->dump_buffers_meta); free_pages( (unsigned long)cli->dump_buffers, @@ -677,19 +742,11 @@ void kbase_vinstr_detach_client(struct kbase_vinstr_client *cli) kfree(cli->accum_buffer); kfree(cli); - vinstr_ctx->nclients--; - if (!vinstr_ctx->nclients) { + if (!clients_present) { thread = vinstr_ctx->thread; kbasep_vinstr_destroy_kctx(vinstr_ctx); } - /* Rebuild context bitmap now that the client has detached */ - hwcnt_bitmap_set(vinstr_ctx->bitmap, zerobitmap); - list_for_each_entry(iter, &vinstr_ctx->idle_clients, list) - hwcnt_bitmap_union(vinstr_ctx->bitmap, iter->bitmap); - list_for_each_entry(iter, &vinstr_ctx->waiting_clients, list) - hwcnt_bitmap_union(vinstr_ctx->bitmap, iter->bitmap); - mutex_unlock(&vinstr_ctx->lock); /* Thread must be stopped after lock is released. */ @@ -978,8 +1035,7 @@ static int kbasep_vinstr_collect_and_accumulate( WARN_ON(rcode); spin_lock_irqsave(&vinstr_ctx->state_lock, flags); - switch (vinstr_ctx->state) - { + switch (vinstr_ctx->state) { case VINSTR_SUSPENDING: schedule_work(&vinstr_ctx->suspend_work); break; @@ -990,12 +1046,13 @@ static int kbasep_vinstr_collect_and_accumulate( default: break; } - spin_unlock_irqrestore(&vinstr_ctx->state_lock, flags); /* Accumulate values of collected counters. */ if (!rcode) accum_clients(vinstr_ctx); + spin_unlock_irqrestore(&vinstr_ctx->state_lock, flags); + return rcode; } @@ -1100,6 +1157,7 @@ static void kbasep_vinstr_reprogram( if (!reprogram_hwcnt(vinstr_ctx)) { vinstr_ctx->reprogram = false; + spin_lock_irqsave(&vinstr_ctx->state_lock, flags); list_for_each_entry( iter, &vinstr_ctx->idle_clients, @@ -1110,6 +1168,7 @@ static void kbasep_vinstr_reprogram( &vinstr_ctx->waiting_clients, list) iter->pending = false; + spin_unlock_irqrestore(&vinstr_ctx->state_lock, flags); } } } @@ -1128,6 +1187,7 @@ static int kbasep_vinstr_update_client( enum base_hwcnt_reader_event event_id) { int rcode = 0; + unsigned long flags; /* Copy collected counters to user readable buffer. */ if (cli->buffer_count) @@ -1138,18 +1198,23 @@ static int kbasep_vinstr_update_client( else rcode = kbasep_vinstr_fill_dump_buffer_legacy(cli); + /* Prepare for next request. */ + memset(cli->accum_buffer, 0, cli->dump_size); + + spin_lock_irqsave(&cli->vinstr_ctx->state_lock, flags); + /* Check if client was put to suspend state while it was being updated */ + if (cli->suspended) + rcode = -EINVAL; + spin_unlock_irqrestore(&cli->vinstr_ctx->state_lock, flags); + if (rcode) goto exit; - /* Notify client. Make sure all changes to memory are visible. */ wmb(); atomic_inc(&cli->write_idx); wake_up_interruptible(&cli->waitq); - /* Prepare for next request. */ - memset(cli->accum_buffer, 0, cli->dump_size); - exit: return rcode; } @@ -1208,6 +1273,7 @@ static int kbasep_vinstr_service_task(void *data) struct kbase_vinstr_client *cli = NULL; struct kbase_vinstr_client *tmp; int rcode; + unsigned long flags; u64 timestamp = kbasep_vinstr_get_timestamp(); u64 dump_time = 0; @@ -1220,6 +1286,7 @@ static int kbasep_vinstr_service_task(void *data) if (current == vinstr_ctx->thread) { atomic_set(&vinstr_ctx->request_pending, 0); + spin_lock_irqsave(&vinstr_ctx->state_lock, flags); if (!list_empty(&vinstr_ctx->waiting_clients)) { cli = list_first_entry( &vinstr_ctx->waiting_clients, @@ -1227,6 +1294,7 @@ static int kbasep_vinstr_service_task(void *data) list); dump_time = cli->dump_time; } + spin_unlock_irqrestore(&vinstr_ctx->state_lock, flags); } if (!cli || ((s64)timestamp - (s64)dump_time < 0ll)) { @@ -1255,6 +1323,7 @@ static int kbasep_vinstr_service_task(void *data) INIT_LIST_HEAD(&expired_requests); + spin_lock_irqsave(&vinstr_ctx->state_lock, flags); /* Find all expired requests. */ list_for_each_entry_safe( cli, @@ -1273,18 +1342,29 @@ static int kbasep_vinstr_service_task(void *data) } /* Fill data for each request found. */ - list_for_each_entry_safe(cli, tmp, &expired_requests, list) { + while (!list_empty(&expired_requests)) { + cli = list_first_entry(&expired_requests, + struct kbase_vinstr_client, list); + /* Ensure that legacy buffer will not be used from * this kthread context. */ BUG_ON(0 == cli->buffer_count); /* Expect only periodically sampled clients. */ BUG_ON(0 == cli->dump_interval); + /* Release the spinlock, as filling the data in client's + * userspace buffer could result in page faults. */ + spin_unlock_irqrestore(&vinstr_ctx->state_lock, flags); if (!rcode) kbasep_vinstr_update_client( cli, timestamp, BASE_HWCNT_READER_EVENT_PERIODIC); + spin_lock_irqsave(&cli->vinstr_ctx->state_lock, flags); + + /* This client got suspended, move to the next one. */ + if (cli->suspended) + continue; /* Set new dumping time. Drop missed probing times. */ do { @@ -1296,6 +1376,7 @@ static int kbasep_vinstr_service_task(void *data) cli, &vinstr_ctx->waiting_clients); } + spin_unlock_irqrestore(&vinstr_ctx->state_lock, flags); /* Reprogram counters set if required. */ kbasep_vinstr_reprogram(vinstr_ctx); @@ -1410,10 +1491,18 @@ static long kbasep_vinstr_hwcnt_reader_ioctl_set_interval( struct kbase_vinstr_client *cli, u32 interval) { struct kbase_vinstr_context *vinstr_ctx = cli->vinstr_ctx; + unsigned long flags; KBASE_DEBUG_ASSERT(vinstr_ctx); mutex_lock(&vinstr_ctx->lock); + spin_lock_irqsave(&vinstr_ctx->state_lock, flags); + + if (cli->suspended) { + spin_unlock_irqrestore(&vinstr_ctx->state_lock, flags); + mutex_unlock(&vinstr_ctx->lock); + return -ENOMEM; + } list_del(&cli->list); @@ -1435,6 +1524,7 @@ static long kbasep_vinstr_hwcnt_reader_ioctl_set_interval( list_add(&cli->list, &vinstr_ctx->idle_clients); } + spin_unlock_irqrestore(&vinstr_ctx->state_lock, flags); mutex_unlock(&vinstr_ctx->lock); return 0; @@ -1739,17 +1829,29 @@ static void kbasep_vinstr_suspend_worker(struct work_struct *data) spin_lock_irqsave(&vinstr_ctx->state_lock, flags); vinstr_ctx->state = VINSTR_SUSPENDED; wake_up_all(&vinstr_ctx->suspend_waitq); - spin_unlock_irqrestore(&vinstr_ctx->state_lock, flags); - mutex_unlock(&vinstr_ctx->lock); + if (vinstr_ctx->need_resume) { + vinstr_ctx->need_resume = false; + vinstr_ctx->state = VINSTR_RESUMING; + schedule_work(&vinstr_ctx->resume_work); - /* Kick GPU scheduler to allow entering protected mode. - * This must happen after vinstr was suspended. */ - kbasep_vinstr_kick_scheduler(vinstr_ctx->kbdev); + spin_unlock_irqrestore(&vinstr_ctx->state_lock, flags); + + mutex_unlock(&vinstr_ctx->lock); + } else { + spin_unlock_irqrestore(&vinstr_ctx->state_lock, flags); + + mutex_unlock(&vinstr_ctx->lock); + + /* Kick GPU scheduler to allow entering protected mode. + * This must happen after vinstr was suspended. + */ + kbasep_vinstr_kick_scheduler(vinstr_ctx->kbdev); + } } /** - * kbasep_vinstr_suspend_worker - worker resuming vinstr module + * kbasep_vinstr_resume_worker - worker resuming vinstr module * @data: pointer to work structure */ static void kbasep_vinstr_resume_worker(struct work_struct *data) @@ -1768,15 +1870,27 @@ static void kbasep_vinstr_resume_worker(struct work_struct *data) spin_lock_irqsave(&vinstr_ctx->state_lock, flags); vinstr_ctx->state = VINSTR_IDLE; wake_up_all(&vinstr_ctx->suspend_waitq); - spin_unlock_irqrestore(&vinstr_ctx->state_lock, flags); - mutex_unlock(&vinstr_ctx->lock); + if (vinstr_ctx->need_suspend) { + vinstr_ctx->need_suspend = false; + vinstr_ctx->state = VINSTR_SUSPENDING; + schedule_work(&vinstr_ctx->suspend_work); - /* Kick GPU scheduler to allow entering protected mode. - * Note that scheduler state machine might requested re-entry to - * protected mode before vinstr was resumed. - * This must happen after vinstr was release. */ - kbasep_vinstr_kick_scheduler(vinstr_ctx->kbdev); + spin_unlock_irqrestore(&vinstr_ctx->state_lock, flags); + + mutex_unlock(&vinstr_ctx->lock); + } else { + spin_unlock_irqrestore(&vinstr_ctx->state_lock, flags); + + mutex_unlock(&vinstr_ctx->lock); + + /* Kick GPU scheduler to allow entering protected mode. + * Note that scheduler state machine might requested re-entry to + * protected mode before vinstr was resumed. + * This must happen after vinstr was release. + */ + kbasep_vinstr_kick_scheduler(vinstr_ctx->kbdev); + } } /*****************************************************************************/ @@ -1791,6 +1905,7 @@ struct kbase_vinstr_context *kbase_vinstr_init(struct kbase_device *kbdev) INIT_LIST_HEAD(&vinstr_ctx->idle_clients); INIT_LIST_HEAD(&vinstr_ctx->waiting_clients); + INIT_LIST_HEAD(&vinstr_ctx->suspended_clients); mutex_init(&vinstr_ctx->lock); spin_lock_init(&vinstr_ctx->state_lock); vinstr_ctx->kbdev = kbdev; @@ -1824,27 +1939,35 @@ void kbase_vinstr_term(struct kbase_vinstr_context *vinstr_ctx) if (list_empty(list)) { list = &vinstr_ctx->waiting_clients; - if (list_empty(list)) - break; + if (list_empty(list)) { + list = &vinstr_ctx->suspended_clients; + if (list_empty(list)) + break; + } } cli = list_first_entry(list, struct kbase_vinstr_client, list); list_del(&cli->list); + if (!cli->suspended) + vinstr_ctx->nclients--; + else + vinstr_ctx->nclients_suspended--; kfree(cli->accum_buffer); kfree(cli); - vinstr_ctx->nclients--; } KBASE_DEBUG_ASSERT(!vinstr_ctx->nclients); + KBASE_DEBUG_ASSERT(!vinstr_ctx->nclients_suspended); if (vinstr_ctx->kctx) kbasep_vinstr_destroy_kctx(vinstr_ctx); kfree(vinstr_ctx); } int kbase_vinstr_hwcnt_reader_setup(struct kbase_vinstr_context *vinstr_ctx, - struct kbase_uk_hwcnt_reader_setup *setup) + struct kbase_ioctl_hwcnt_reader_setup *setup) { struct kbase_vinstr_client *cli; u32 bitmap[4]; + int fd; KBASE_DEBUG_ASSERT(vinstr_ctx); KBASE_DEBUG_ASSERT(setup); @@ -1859,31 +1982,32 @@ int kbase_vinstr_hwcnt_reader_setup(struct kbase_vinstr_context *vinstr_ctx, vinstr_ctx, setup->buffer_count, bitmap, - &setup->fd, + &fd, NULL); if (!cli) return -ENOMEM; - return 0; + kbase_vinstr_wait_for_ready(vinstr_ctx); + return fd; } int kbase_vinstr_legacy_hwc_setup( struct kbase_vinstr_context *vinstr_ctx, struct kbase_vinstr_client **cli, - struct kbase_uk_hwcnt_setup *setup) + struct kbase_ioctl_hwcnt_enable *enable) { KBASE_DEBUG_ASSERT(vinstr_ctx); - KBASE_DEBUG_ASSERT(setup); + KBASE_DEBUG_ASSERT(enable); KBASE_DEBUG_ASSERT(cli); - if (setup->dump_buffer) { + if (enable->dump_buffer) { u32 bitmap[4]; - bitmap[SHADER_HWCNT_BM] = setup->shader_bm; - bitmap[TILER_HWCNT_BM] = setup->tiler_bm; - bitmap[MMU_L2_HWCNT_BM] = setup->mmu_l2_bm; - bitmap[JM_HWCNT_BM] = setup->jm_bm; + bitmap[SHADER_HWCNT_BM] = enable->shader_bm; + bitmap[TILER_HWCNT_BM] = enable->tiler_bm; + bitmap[MMU_L2_HWCNT_BM] = enable->mmu_l2_bm; + bitmap[JM_HWCNT_BM] = enable->jm_bm; if (*cli) return -EBUSY; @@ -1892,11 +2016,13 @@ int kbase_vinstr_legacy_hwc_setup( vinstr_ctx, 0, bitmap, - (void *)(long)setup->dump_buffer, + (void *)(uintptr_t)enable->dump_buffer, NULL); if (!(*cli)) return -ENOMEM; + + kbase_vinstr_wait_for_ready(vinstr_ctx); } else { if (!*cli) return -EINVAL; @@ -1910,9 +2036,10 @@ int kbase_vinstr_legacy_hwc_setup( struct kbase_vinstr_client *kbase_vinstr_hwcnt_kernel_setup( struct kbase_vinstr_context *vinstr_ctx, - struct kbase_uk_hwcnt_reader_setup *setup, + struct kbase_ioctl_hwcnt_reader_setup *setup, void *kernel_buffer) { + struct kbase_vinstr_client *kernel_client; u32 bitmap[4]; if (!vinstr_ctx || !setup || !kernel_buffer) @@ -1923,12 +2050,17 @@ struct kbase_vinstr_client *kbase_vinstr_hwcnt_kernel_setup( bitmap[MMU_L2_HWCNT_BM] = setup->mmu_l2_bm; bitmap[JM_HWCNT_BM] = setup->jm_bm; - return kbasep_vinstr_attach_client( - vinstr_ctx, - 0, - bitmap, - NULL, - kernel_buffer); + kernel_client = kbasep_vinstr_attach_client( + vinstr_ctx, + 0, + bitmap, + NULL, + kernel_buffer); + + if (kernel_client) + kbase_vinstr_wait_for_ready(vinstr_ctx); + + return kernel_client; } KBASE_EXPORT_TEST_API(kbase_vinstr_hwcnt_kernel_setup); @@ -2036,13 +2168,42 @@ int kbase_vinstr_try_suspend(struct kbase_vinstr_context *vinstr_ctx) vinstr_ctx->state = VINSTR_SUSPENDING; break; - case VINSTR_SUSPENDING: - /* fall through */ case VINSTR_RESUMING: + vinstr_ctx->need_suspend = true; + break; + + case VINSTR_SUSPENDING: break; default: - BUG(); + KBASE_DEBUG_ASSERT(0); + break; + } + spin_unlock_irqrestore(&vinstr_ctx->state_lock, flags); + + return ret; +} + +static int kbase_vinstr_is_ready(struct kbase_vinstr_context *vinstr_ctx) +{ + unsigned long flags; + int ret = -EAGAIN; + + KBASE_DEBUG_ASSERT(vinstr_ctx); + + spin_lock_irqsave(&vinstr_ctx->state_lock, flags); + switch (vinstr_ctx->state) { + case VINSTR_SUSPENDED: + case VINSTR_RESUMING: + case VINSTR_SUSPENDING: + break; + + case VINSTR_IDLE: + case VINSTR_DUMPING: + ret = 0; + break; + default: + KBASE_DEBUG_ASSERT(0); break; } spin_unlock_irqrestore(&vinstr_ctx->state_lock, flags); @@ -2056,6 +2217,58 @@ void kbase_vinstr_suspend(struct kbase_vinstr_context *vinstr_ctx) (0 == kbase_vinstr_try_suspend(vinstr_ctx))); } +void kbase_vinstr_wait_for_ready(struct kbase_vinstr_context *vinstr_ctx) +{ + wait_event(vinstr_ctx->suspend_waitq, + (0 == kbase_vinstr_is_ready(vinstr_ctx))); +} +KBASE_EXPORT_TEST_API(kbase_vinstr_wait_for_ready); + +/** + * kbase_vinstr_update_suspend - Update vinstr suspend/resume status depending + * on nclients + * @vinstr_ctx: vinstr context pointer + * + * This function should be called whenever vinstr_ctx->nclients changes. This + * may cause vinstr to be suspended or resumed, depending on the number of + * clients and whether IPA is suspended or not. + */ +static void kbase_vinstr_update_suspend(struct kbase_vinstr_context *vinstr_ctx) +{ + lockdep_assert_held(&vinstr_ctx->state_lock); + + switch (vinstr_ctx->state) { + case VINSTR_SUSPENDED: + if ((vinstr_ctx->nclients) && (0 == vinstr_ctx->suspend_cnt)) { + vinstr_ctx->state = VINSTR_RESUMING; + schedule_work(&vinstr_ctx->resume_work); + } + break; + + case VINSTR_SUSPENDING: + if (vinstr_ctx->nclients) + vinstr_ctx->need_resume = true; + break; + + case VINSTR_IDLE: + if (!vinstr_ctx->nclients) { + vinstr_ctx->state = VINSTR_SUSPENDING; + schedule_work(&vinstr_ctx->suspend_work); + } + break; + + case VINSTR_DUMPING: + if (!vinstr_ctx->nclients) + vinstr_ctx->state = VINSTR_SUSPENDING; + break; + + case VINSTR_RESUMING: + if (!vinstr_ctx->nclients) + vinstr_ctx->need_suspend = true; + break; + } +} + void kbase_vinstr_resume(struct kbase_vinstr_context *vinstr_ctx) { unsigned long flags; @@ -2078,3 +2291,45 @@ void kbase_vinstr_resume(struct kbase_vinstr_context *vinstr_ctx) } spin_unlock_irqrestore(&vinstr_ctx->state_lock, flags); } + +void kbase_vinstr_suspend_client(struct kbase_vinstr_client *client) +{ + struct kbase_vinstr_context *vinstr_ctx = client->vinstr_ctx; + unsigned long flags; + + spin_lock_irqsave(&vinstr_ctx->state_lock, flags); + + if (!client->suspended) { + list_del(&client->list); + list_add(&client->list, &vinstr_ctx->suspended_clients); + + vinstr_ctx->nclients--; + vinstr_ctx->nclients_suspended++; + kbase_vinstr_update_suspend(vinstr_ctx); + + client->suspended = true; + } + + spin_unlock_irqrestore(&vinstr_ctx->state_lock, flags); +} + +void kbase_vinstr_resume_client(struct kbase_vinstr_client *client) +{ + struct kbase_vinstr_context *vinstr_ctx = client->vinstr_ctx; + unsigned long flags; + + spin_lock_irqsave(&vinstr_ctx->state_lock, flags); + + if (client->suspended) { + list_del(&client->list); + list_add(&client->list, &vinstr_ctx->idle_clients); + + vinstr_ctx->nclients++; + vinstr_ctx->nclients_suspended--; + kbase_vinstr_update_suspend(vinstr_ctx); + + client->suspended = false; + } + + spin_unlock_irqrestore(&vinstr_ctx->state_lock, flags); +} diff --git a/drivers/gpu/arm/bifrost/mali_kbase_vinstr.h b/drivers/gpu/arm/bifrost/mali_kbase_vinstr.h index af7c7b68aa26..d32799f74084 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_vinstr.h +++ b/drivers/gpu/arm/bifrost/mali_kbase_vinstr.h @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2015-2017 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2015-2018 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -24,35 +24,13 @@ #define _KBASE_VINSTR_H_ #include +#include /*****************************************************************************/ struct kbase_vinstr_context; struct kbase_vinstr_client; -struct kbase_uk_hwcnt_setup { - /* IN */ - u64 dump_buffer; - u32 jm_bm; - u32 shader_bm; - u32 tiler_bm; - u32 unused_1; /* keep for backwards compatibility */ - u32 mmu_l2_bm; - u32 padding; - /* OUT */ -}; - -struct kbase_uk_hwcnt_reader_setup { - /* IN */ - u32 buffer_count; - u32 jm_bm; - u32 shader_bm; - u32 tiler_bm; - u32 mmu_l2_bm; - - /* OUT */ - s32 fd; -}; /*****************************************************************************/ /** @@ -74,24 +52,24 @@ void kbase_vinstr_term(struct kbase_vinstr_context *vinstr_ctx); * @vinstr_ctx: vinstr context * @setup: reader's configuration * - * Return: zero on success + * Return: file descriptor on success and a (negative) error code otherwise */ int kbase_vinstr_hwcnt_reader_setup( struct kbase_vinstr_context *vinstr_ctx, - struct kbase_uk_hwcnt_reader_setup *setup); + struct kbase_ioctl_hwcnt_reader_setup *setup); /** * kbase_vinstr_legacy_hwc_setup - configure hw counters for dumping * @vinstr_ctx: vinstr context * @cli: pointer where to store pointer to new vinstr client structure - * @setup: hwc configuration + * @enable: hwc configuration * * Return: zero on success */ int kbase_vinstr_legacy_hwc_setup( struct kbase_vinstr_context *vinstr_ctx, struct kbase_vinstr_client **cli, - struct kbase_uk_hwcnt_setup *setup); + struct kbase_ioctl_hwcnt_enable *enable); /** * kbase_vinstr_hwcnt_kernel_setup - configure hw counters for kernel side @@ -100,13 +78,13 @@ int kbase_vinstr_legacy_hwc_setup( * @setup: reader's configuration * @kernel_buffer: pointer to dump buffer * - * setup->buffer_count and setup->fd are not used for kernel side clients. + * setup->buffer_count is not used for kernel side clients. * * Return: pointer to client structure, or NULL on failure */ struct kbase_vinstr_client *kbase_vinstr_hwcnt_kernel_setup( struct kbase_vinstr_context *vinstr_ctx, - struct kbase_uk_hwcnt_reader_setup *setup, + struct kbase_ioctl_hwcnt_reader_setup *setup, void *kernel_buffer); /** @@ -155,6 +133,16 @@ int kbase_vinstr_try_suspend(struct kbase_vinstr_context *vinstr_ctx); */ void kbase_vinstr_suspend(struct kbase_vinstr_context *vinstr_ctx); +/** + * kbase_vinstr_wait_for_ready - waits for the vinstr context to get ready + * @vinstr_ctx: vinstr context + * + * Function waits for the vinstr to become ready for dumping. It can be in the + * resuming state after the client was attached but the client currently expects + * that vinstr is ready for dumping immediately post attach. + */ +void kbase_vinstr_wait_for_ready(struct kbase_vinstr_context *vinstr_ctx); + /** * kbase_vinstr_resume - resumes operation of a given vinstr context * @vinstr_ctx: vinstr context @@ -178,5 +166,17 @@ size_t kbase_vinstr_dump_size(struct kbase_device *kbdev); */ void kbase_vinstr_detach_client(struct kbase_vinstr_client *cli); +/** + * kbase_vinstr_suspend_client - Suspend vinstr client + * @client: pointer to vinstr client + */ +void kbase_vinstr_suspend_client(struct kbase_vinstr_client *client); + +/** + * kbase_vinstr_resume_client - Resume vinstr client + * @client: pointer to vinstr client + */ +void kbase_vinstr_resume_client(struct kbase_vinstr_client *client); + #endif /* _KBASE_VINSTR_H_ */ diff --git a/drivers/gpu/arm/bifrost/mali_midg_regmap.h b/drivers/gpu/arm/bifrost/mali_midg_regmap.h index 5e83ee87242c..180850069f2e 100644 --- a/drivers/gpu/arm/bifrost/mali_midg_regmap.h +++ b/drivers/gpu/arm/bifrost/mali_midg_regmap.h @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2010-2017 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010-2018 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -34,8 +34,7 @@ #define GPU_CONTROL_REG(r) (GPU_CONTROL_BASE + (r)) #define GPU_ID 0x000 /* (RO) GPU and revision identifier */ #define L2_FEATURES 0x004 /* (RO) Level 2 cache features */ -#define SUSPEND_SIZE 0x008 /* (RO) Fixed-function suspend buffer - size */ +#define CORE_FEATURES 0x008 /* (RO) Shader Core Features */ #define TILER_FEATURES 0x00C /* (RO) Tiler Features */ #define MEM_FEATURES 0x010 /* (RO) Memory system features */ #define MMU_FEATURES 0x014 /* (RO) MMU features */ @@ -93,6 +92,9 @@ #define THREAD_MAX_WORKGROUP_SIZE 0x0A4 /* (RO) Maximum workgroup size */ #define THREAD_MAX_BARRIER_SIZE 0x0A8 /* (RO) Maximum threads waiting at a barrier */ #define THREAD_FEATURES 0x0AC /* (RO) Thread features */ +#define THREAD_TLS_ALLOC 0x310 /* (RO) Number of threads per core that + * TLS must be allocated for + */ #define TEXTURE_FEATURES_0 0x0B0 /* (RO) Support flags for indexed texture formats 0..31 */ #define TEXTURE_FEATURES_1 0x0B4 /* (RO) Support flags for indexed texture formats 32..63 */ diff --git a/drivers/gpu/arm/bifrost/thirdparty/mali_kbase_mmap.c b/drivers/gpu/arm/bifrost/thirdparty/mali_kbase_mmap.c index 1690da43a0c3..6857eb761ee2 100644 --- a/drivers/gpu/arm/bifrost/thirdparty/mali_kbase_mmap.c +++ b/drivers/gpu/arm/bifrost/thirdparty/mali_kbase_mmap.c @@ -245,12 +245,13 @@ unsigned long kbase_get_unmapped_area(struct file *filp, if (!kbase_ctx_flag(kctx, KCTX_COMPAT)) { - if (kbase_hw_has_feature(kctx->kbdev, - BASE_HW_FEATURE_33BIT_VA)) { - high_limit = kctx->same_va_end << PAGE_SHIFT; - } else { - high_limit = min_t(unsigned long, mm->mmap_base, - (kctx->same_va_end << PAGE_SHIFT)); + high_limit = min_t(unsigned long, mm->mmap_base, + (kctx->same_va_end << PAGE_SHIFT)); + + /* If there's enough (> 33 bits) of GPU VA space, align + * to 2MB boundaries. + */ + if (kctx->kbdev->gpu_props.mmu.va_bits > 33) { if (len >= SZ_2M) { align_offset = SZ_2M; align_mask = SZ_2M - 1; diff --git a/drivers/gpu/arm/sconscript b/drivers/gpu/arm/sconscript old mode 100755 new mode 100644 index c31eec7cc170..a06092bd5bf0 --- a/drivers/gpu/arm/sconscript +++ b/drivers/gpu/arm/sconscript @@ -1,18 +1,25 @@ # -# (C) COPYRIGHT 2010-2013 ARM Limited. All rights reserved. +# (C) COPYRIGHT 2015-2016 ARM Limited. All rights reserved. # # This program is free software and is provided to you under the terms of the # GNU General Public License version 2 as published by the Free Software # Foundation, and any use by you of this program is subject to the terms # of such GNU licence. # -# A copy of the licence is included with the program, and can also be obtained -# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, -# Boston, MA 02110-1301, USA. +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, you can access it online at +# http://www.gnu.org/licenses/gpl-2.0.html. +# +# SPDX-License-Identifier: GPL-2.0 # # - +import glob SConscript('midgard/sconscript')