mirror of
https://github.com/torvalds/linux.git
synced 2026-06-07 05:55:44 +02:00
MALI: rockchip: upgrade bifrost DDK to g15p0-01eac0, from g13p0-01eac0
Note, the corresponding mali_csffw.bin for DDK g15 MUST be used. Change-Id: Ic30634fa6247d62bf96f506c64d13b89e16b02e6 Signed-off-by: Zhen Chen <chenzhen@rock-chips.com>
This commit is contained in:
parent
96e93dba44
commit
034aad5dd8
|
|
@ -236,6 +236,7 @@ Description:
|
|||
device-driver that supports a CSF GPU. The duration value unit
|
||||
is in milliseconds and is used for configuring csf scheduling
|
||||
tick duration.
|
||||
|
||||
What: /sys/class/misc/mali%u/device/reset_timeout
|
||||
Description:
|
||||
This attribute is used to set the number of milliseconds to
|
||||
|
|
|
|||
|
|
@ -1,6 +1,6 @@
|
|||
# SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
|
||||
#
|
||||
# (C) COPYRIGHT 2013-2021 ARM Limited. All rights reserved.
|
||||
# (C) COPYRIGHT 2013-2022 ARM Limited. All rights reserved.
|
||||
#
|
||||
# This program is free software and is provided to you under the terms of the
|
||||
# GNU General Public License version 2 as published by the Free Software
|
||||
|
|
@ -129,7 +129,7 @@ for details.
|
|||
set and the setting coresponding to the SYSC_ALLOC register.
|
||||
|
||||
|
||||
Example for a Mali GPU with 1 clock and no regulators:
|
||||
Example for a Mali GPU with 1 clock and 1 regulator:
|
||||
|
||||
gpu@0xfc010000 {
|
||||
compatible = "arm,malit602", "arm,malit60x", "arm,malit6xx", "arm,mali-midgard";
|
||||
|
|
|
|||
|
|
@ -1,6 +1,6 @@
|
|||
# SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
|
||||
#
|
||||
# (C) COPYRIGHT 2021 ARM Limited. All rights reserved.
|
||||
# (C) COPYRIGHT 2021-2022 ARM Limited. All rights reserved.
|
||||
#
|
||||
# This program is free software and is provided to you under the terms of the
|
||||
# GNU General Public License version 2 as published by the Free Software
|
||||
|
|
@ -28,7 +28,6 @@ subdir-ccflags-y += $(ccflags-y)
|
|||
#
|
||||
# Kernel modules
|
||||
#
|
||||
obj-$(CONFIG_DMA_BUF_LOCK) += dma_buf_lock/src/
|
||||
obj-$(CONFIG_DMA_SHARED_BUFFER_TEST_EXPORTER) += dma_buf_test_exporter/
|
||||
obj-$(CONFIG_MALI_MEMORY_GROUP_MANAGER) += memory_group_manager/
|
||||
obj-$(CONFIG_MALI_PROTECTED_MEMORY_ALLOCATOR) += protected_memory_allocator/
|
||||
|
|
|
|||
|
|
@ -1,6 +1,6 @@
|
|||
# SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
|
||||
#
|
||||
# (C) COPYRIGHT 2021 ARM Limited. All rights reserved.
|
||||
# (C) COPYRIGHT 2021-2022 ARM Limited. All rights reserved.
|
||||
#
|
||||
# This program is free software and is provided to you under the terms of the
|
||||
# GNU General Public License version 2 as published by the Free Software
|
||||
|
|
@ -26,16 +26,6 @@ menuconfig MALI_BASE_MODULES
|
|||
Those modules provide extra features or debug interfaces and,
|
||||
are optional for the use of the Mali GPU modules.
|
||||
|
||||
config DMA_BUF_LOCK
|
||||
bool "Build dma-buf lock module"
|
||||
depends on MALI_BASE_MODULES && MALI_DMA_FENCE
|
||||
default y
|
||||
help
|
||||
This option will build the dma_buf_lock module.
|
||||
|
||||
Modules:
|
||||
- dma_buf_lock.ko
|
||||
|
||||
config DMA_SHARED_BUFFER_TEST_EXPORTER
|
||||
bool "Build dma-buf framework test exporter module"
|
||||
depends on MALI_BASE_MODULES && DMA_SHARED_BUFFER
|
||||
|
|
|
|||
|
|
@ -38,11 +38,9 @@ ifeq ($(CONFIG_MALI_BASE_MODULES),y)
|
|||
CONFIG_MALI_CSF_SUPPORT ?= n
|
||||
|
||||
ifneq ($(CONFIG_DMA_SHARED_BUFFER),n)
|
||||
CONFIG_DMA_BUF_LOCK ?= y
|
||||
CONFIG_DMA_SHARED_BUFFER_TEST_EXPORTER ?= y
|
||||
else
|
||||
# Prevent misuse when CONFIG_DMA_SHARED_BUFFER=n
|
||||
CONFIG_DMA_BUF_LOCK = n
|
||||
CONFIG_DMA_SHARED_BUFFER_TEST_EXPORTER = n
|
||||
endif
|
||||
|
||||
|
|
@ -54,7 +52,6 @@ ifeq ($(CONFIG_MALI_BASE_MODULES),y)
|
|||
|
||||
else
|
||||
# Prevent misuse when CONFIG_MALI_BASE_MODULES=n
|
||||
CONFIG_DMA_BUF_LOCK = n
|
||||
CONFIG_DMA_SHARED_BUFFER_TEST_EXPORTER = n
|
||||
CONFIG_MALI_MEMORY_GROUP_MANAGER = n
|
||||
CONFIG_MALI_PROTECTED_MEMORY_ALLOCATOR = n
|
||||
|
|
@ -64,10 +61,9 @@ endif
|
|||
CONFIGS := \
|
||||
CONFIG_MALI_BASE_MODULES \
|
||||
CONFIG_MALI_CSF_SUPPORT \
|
||||
CONFIG_DMA_BUF_LOCK \
|
||||
CONFIG_DMA_SHARED_BUFFER_TEST_EXPORTER \
|
||||
CONFIG_MALI_MEMORY_GROUP_MANAGER \
|
||||
CONFIG_MALI_PROTECTED_MEMORY_ALLOCATOR
|
||||
CONFIG_MALI_PROTECTED_MEMORY_ALLOCATOR \
|
||||
|
||||
|
||||
#
|
||||
|
|
@ -92,26 +88,47 @@ EXTRA_CFLAGS := $(foreach config,$(CONFIGS), \
|
|||
$(if $(filter y m,$(value $(value config))), \
|
||||
-D$(value config)=1))
|
||||
|
||||
# The following were added to align with W=1 in scripts/Makefile.extrawarn
|
||||
# from the Linux source tree
|
||||
KBUILD_CFLAGS += -Wall -Werror
|
||||
|
||||
# The following were added to align with W=1 in scripts/Makefile.extrawarn
|
||||
# from the Linux source tree (v5.18.14)
|
||||
KBUILD_CFLAGS += -Wextra -Wunused -Wno-unused-parameter
|
||||
KBUILD_CFLAGS += -Wmissing-declarations
|
||||
KBUILD_CFLAGS += -Wmissing-format-attribute
|
||||
KBUILD_CFLAGS += -Wmissing-prototypes
|
||||
KBUILD_CFLAGS += -Wold-style-definition
|
||||
KBUILD_CFLAGS += -Wmissing-include-dirs
|
||||
# The -Wmissing-include-dirs cannot be enabled as the path to some of the
|
||||
# included directories change depending on whether it is an in-tree or
|
||||
# out-of-tree build.
|
||||
KBUILD_CFLAGS += $(call cc-option, -Wunused-but-set-variable)
|
||||
KBUILD_CFLAGS += $(call cc-option, -Wunused-const-variable)
|
||||
KBUILD_CFLAGS += $(call cc-option, -Wpacked-not-aligned)
|
||||
KBUILD_CFLAGS += $(call cc-option, -Wstringop-truncation)
|
||||
# The following turn off the warnings enabled by -Wextra
|
||||
KBUILD_CFLAGS += -Wno-missing-field-initializers
|
||||
KBUILD_CFLAGS += -Wno-sign-compare
|
||||
KBUILD_CFLAGS += -Wno-type-limits
|
||||
KBUILD_CFLAGS += -Wno-shift-negative-value
|
||||
# This flag is needed to avoid build errors on older kernels
|
||||
KBUILD_CFLAGS += $(call cc-option, -Wno-cast-function-type)
|
||||
|
||||
KBUILD_CPPFLAGS += -DKBUILD_EXTRA_WARN1
|
||||
|
||||
# The following were added to align with W=2 in scripts/Makefile.extrawarn
|
||||
# from the Linux source tree (v5.18.14)
|
||||
KBUILD_CFLAGS += -Wdisabled-optimization
|
||||
# The -Wshadow flag cannot be enabled unless upstream kernels are
|
||||
# patched to fix redefinitions of certain built-in functions and
|
||||
# global variables.
|
||||
KBUILD_CFLAGS += $(call cc-option, -Wlogical-op)
|
||||
KBUILD_CFLAGS += -Wmissing-field-initializers
|
||||
KBUILD_CFLAGS += -Wtype-limits
|
||||
KBUILD_CFLAGS += $(call cc-option, -Wmaybe-uninitialized)
|
||||
KBUILD_CFLAGS += $(call cc-option, -Wunused-macros)
|
||||
|
||||
KBUILD_CPPFLAGS += -DKBUILD_EXTRA_WARN2
|
||||
|
||||
# This warning is disabled to avoid build failures in some kernel versions
|
||||
KBUILD_CFLAGS += -Wno-ignored-qualifiers
|
||||
|
||||
all:
|
||||
$(MAKE) -C $(KDIR) M=$(CURDIR) $(MAKE_ARGS) EXTRA_CFLAGS="$(EXTRA_CFLAGS)" KBUILD_EXTRA_SYMBOLS="$(EXTRA_SYMBOLS)" modules
|
||||
|
||||
|
|
|
|||
|
|
@ -1,6 +1,6 @@
|
|||
# SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
|
||||
#
|
||||
# (C) COPYRIGHT 2021 ARM Limited. All rights reserved.
|
||||
# (C) COPYRIGHT 2021-2022 ARM Limited. All rights reserved.
|
||||
#
|
||||
# This program is free software and is provided to you under the terms of the
|
||||
# GNU General Public License version 2 as published by the Free Software
|
||||
|
|
@ -26,16 +26,6 @@ menuconfig MALI_BASE_MODULES
|
|||
Those modules provide extra features or debug interfaces and,
|
||||
are optional for the use of the Mali GPU modules.
|
||||
|
||||
config DMA_BUF_LOCK
|
||||
bool "Build dma-buf lock module"
|
||||
depends on MALI_BASE_MODULES && MALI_DMA_FENCE
|
||||
default y
|
||||
help
|
||||
This option will build the dma_buf_lock module.
|
||||
|
||||
Modules:
|
||||
- dma_buf_lock.ko
|
||||
|
||||
config DMA_SHARED_BUFFER_TEST_EXPORTER
|
||||
bool "Build dma-buf framework test exporter module"
|
||||
depends on MALI_BASE_MODULES
|
||||
|
|
@ -45,7 +35,7 @@ config DMA_SHARED_BUFFER_TEST_EXPORTER
|
|||
Usable to help test importers.
|
||||
|
||||
Modules:
|
||||
- dma-buf-test-exporter.ko
|
||||
- dma-buf-test-exporter.ko
|
||||
|
||||
config MALI_MEMORY_GROUP_MANAGER
|
||||
bool "Build Mali Memory Group Manager module"
|
||||
|
|
@ -57,7 +47,7 @@ config MALI_MEMORY_GROUP_MANAGER
|
|||
for memory pools managed by Mali GPU device drivers.
|
||||
|
||||
Modules:
|
||||
- memory_group_manager.ko
|
||||
- memory_group_manager.ko
|
||||
|
||||
config MALI_PROTECTED_MEMORY_ALLOCATOR
|
||||
bool "Build Mali Protected Memory Allocator module"
|
||||
|
|
@ -70,5 +60,5 @@ config MALI_PROTECTED_MEMORY_ALLOCATOR
|
|||
of Mali GPU device drivers.
|
||||
|
||||
Modules:
|
||||
- protected_memory_allocator.ko
|
||||
- protected_memory_allocator.ko
|
||||
|
||||
|
|
|
|||
|
|
@ -1,908 +0,0 @@
|
|||
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
|
||||
/*
|
||||
*
|
||||
* (C) COPYRIGHT 2012-2014, 2017-2018, 2020-2022 ARM Limited. All rights reserved.
|
||||
*
|
||||
* This program is free software and is provided to you under the terms of the
|
||||
* GNU General Public License version 2 as published by the Free Software
|
||||
* Foundation, and any use by you of this program is subject to the terms
|
||||
* of such GNU license.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, you can access it online at
|
||||
* http://www.gnu.org/licenses/gpl-2.0.html.
|
||||
*
|
||||
*/
|
||||
|
||||
#include <linux/version.h>
|
||||
#include <linux/version_compat_defs.h>
|
||||
#include <linux/uaccess.h>
|
||||
#include <linux/init.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/fs.h>
|
||||
#include <linux/cdev.h>
|
||||
#include <linux/device.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/atomic.h>
|
||||
#if (KERNEL_VERSION(5, 4, 0) > LINUX_VERSION_CODE)
|
||||
#include <linux/reservation.h>
|
||||
#else
|
||||
#include <linux/dma-resv.h>
|
||||
#endif
|
||||
#include <linux/dma-buf.h>
|
||||
#include <linux/wait.h>
|
||||
#include <linux/sched.h>
|
||||
#include <linux/poll.h>
|
||||
#include <linux/anon_inodes.h>
|
||||
#include <linux/file.h>
|
||||
|
||||
#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE)
|
||||
|
||||
#include <linux/fence.h>
|
||||
|
||||
#define dma_fence_context_alloc(a) fence_context_alloc(a)
|
||||
#define dma_fence_init(a, b, c, d, e) fence_init(a, b, c, d, e)
|
||||
#define dma_fence_get(a) fence_get(a)
|
||||
#define dma_fence_put(a) fence_put(a)
|
||||
#define dma_fence_signal(a) fence_signal(a)
|
||||
#define dma_fence_is_signaled(a) fence_is_signaled(a)
|
||||
#define dma_fence_add_callback(a, b, c) fence_add_callback(a, b, c)
|
||||
#define dma_fence_remove_callback(a, b) fence_remove_callback(a, b)
|
||||
|
||||
#if (KERNEL_VERSION(4, 9, 68) > LINUX_VERSION_CODE)
|
||||
#define dma_fence_get_status(a) (fence_is_signaled(a) ? (a)->status ?: 1 : 0)
|
||||
#else
|
||||
#define dma_fence_get_status(a) (fence_is_signaled(a) ? (a)->error ?: 1 : 0)
|
||||
#endif
|
||||
|
||||
#else
|
||||
|
||||
#include <linux/dma-fence.h>
|
||||
|
||||
#if (KERNEL_VERSION(4, 11, 0) > LINUX_VERSION_CODE)
|
||||
#define dma_fence_get_status(a) (dma_fence_is_signaled(a) ? \
|
||||
(a)->status ?: 1 \
|
||||
: 0)
|
||||
#endif
|
||||
|
||||
#endif /* < 4.10.0 */
|
||||
|
||||
#include "dma_buf_lock.h"
|
||||
|
||||
/* Maximum number of buffers that a single handle can address */
|
||||
#define DMA_BUF_LOCK_BUF_MAX 32
|
||||
|
||||
#define DMA_BUF_LOCK_DEBUG 1
|
||||
|
||||
#define DMA_BUF_LOCK_INIT_BIAS 0xFF
|
||||
|
||||
static dev_t dma_buf_lock_dev;
|
||||
static struct cdev dma_buf_lock_cdev;
|
||||
static struct class *dma_buf_lock_class;
|
||||
static const char dma_buf_lock_dev_name[] = "dma_buf_lock";
|
||||
|
||||
#if defined(HAVE_UNLOCKED_IOCTL) || defined(HAVE_COMPAT_IOCTL) || ((KERNEL_VERSION(5, 9, 0) <= LINUX_VERSION_CODE))
|
||||
static long dma_buf_lock_ioctl(struct file *filp, unsigned int cmd, unsigned long arg);
|
||||
#else
|
||||
static int dma_buf_lock_ioctl(struct inode *inode, struct file *filp, unsigned int cmd, unsigned long arg);
|
||||
#endif
|
||||
|
||||
static const struct file_operations dma_buf_lock_fops = {
|
||||
.owner = THIS_MODULE,
|
||||
#if defined(HAVE_UNLOCKED_IOCTL) || ((KERNEL_VERSION(5, 9, 0) <= LINUX_VERSION_CODE))
|
||||
.unlocked_ioctl = dma_buf_lock_ioctl,
|
||||
#endif
|
||||
#if defined(HAVE_COMPAT_IOCTL) || ((KERNEL_VERSION(5, 9, 0) <= LINUX_VERSION_CODE))
|
||||
.compat_ioctl = dma_buf_lock_ioctl,
|
||||
#endif
|
||||
};
|
||||
|
||||
struct dma_buf_lock_resource {
|
||||
#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE)
|
||||
struct fence fence;
|
||||
#else
|
||||
struct dma_fence fence;
|
||||
#endif
|
||||
int *list_of_dma_buf_fds; /* List of buffers copied from userspace */
|
||||
atomic_t locked; /* Status of lock */
|
||||
struct dma_buf **dma_bufs;
|
||||
unsigned long exclusive; /* Exclusive access bitmap */
|
||||
atomic_t fence_dep_count; /* Number of dma-fence dependencies */
|
||||
struct list_head dma_fence_callbacks; /* list of all callbacks set up to wait on other fences */
|
||||
wait_queue_head_t wait;
|
||||
struct kref refcount;
|
||||
struct list_head link;
|
||||
struct work_struct work;
|
||||
int count;
|
||||
};
|
||||
|
||||
/**
|
||||
* struct dma_buf_lock_fence_cb - Callback data struct for dma-fence
|
||||
* @fence_cb: Callback function
|
||||
* @fence: Pointer to the fence object on which this callback is waiting
|
||||
* @res: Pointer to dma_buf_lock_resource that is waiting on this callback
|
||||
* @node: List head for linking this callback to the lock resource
|
||||
*/
|
||||
struct dma_buf_lock_fence_cb {
|
||||
#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE)
|
||||
struct fence_cb fence_cb;
|
||||
struct fence *fence;
|
||||
#else
|
||||
struct dma_fence_cb fence_cb;
|
||||
struct dma_fence *fence;
|
||||
#endif
|
||||
struct dma_buf_lock_resource *res;
|
||||
struct list_head node;
|
||||
};
|
||||
|
||||
static LIST_HEAD(dma_buf_lock_resource_list);
|
||||
static DEFINE_MUTEX(dma_buf_lock_mutex);
|
||||
|
||||
static inline int is_dma_buf_lock_file(struct file *);
|
||||
static void dma_buf_lock_dounlock(struct kref *ref);
|
||||
|
||||
|
||||
/*** dma_buf_lock fence part ***/
|
||||
|
||||
/* Spin lock protecting all Mali fences as fence->lock. */
|
||||
static DEFINE_SPINLOCK(dma_buf_lock_fence_lock);
|
||||
|
||||
static const char *
|
||||
#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE)
|
||||
dma_buf_lock_fence_get_driver_name(struct fence *fence)
|
||||
#else
|
||||
dma_buf_lock_fence_get_driver_name(struct dma_fence *fence)
|
||||
#endif
|
||||
{
|
||||
return "dma_buf_lock";
|
||||
}
|
||||
|
||||
static const char *
|
||||
#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE)
|
||||
dma_buf_lock_fence_get_timeline_name(struct fence *fence)
|
||||
#else
|
||||
dma_buf_lock_fence_get_timeline_name(struct dma_fence *fence)
|
||||
#endif
|
||||
{
|
||||
return "dma_buf_lock.timeline";
|
||||
}
|
||||
|
||||
static bool
|
||||
#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE)
|
||||
dma_buf_lock_fence_enable_signaling(struct fence *fence)
|
||||
#else
|
||||
dma_buf_lock_fence_enable_signaling(struct dma_fence *fence)
|
||||
#endif
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE)
|
||||
const struct fence_ops dma_buf_lock_fence_ops = {
|
||||
.wait = fence_default_wait,
|
||||
#else
|
||||
const struct dma_fence_ops dma_buf_lock_fence_ops = {
|
||||
.wait = dma_fence_default_wait,
|
||||
#endif
|
||||
.get_driver_name = dma_buf_lock_fence_get_driver_name,
|
||||
.get_timeline_name = dma_buf_lock_fence_get_timeline_name,
|
||||
.enable_signaling = dma_buf_lock_fence_enable_signaling,
|
||||
};
|
||||
|
||||
static void
|
||||
dma_buf_lock_fence_init(struct dma_buf_lock_resource *resource)
|
||||
{
|
||||
dma_fence_init(&resource->fence,
|
||||
&dma_buf_lock_fence_ops,
|
||||
&dma_buf_lock_fence_lock,
|
||||
0,
|
||||
0);
|
||||
}
|
||||
|
||||
static void
|
||||
dma_buf_lock_fence_free_callbacks(struct dma_buf_lock_resource *resource)
|
||||
{
|
||||
struct dma_buf_lock_fence_cb *cb, *tmp;
|
||||
|
||||
/* Clean up and free callbacks. */
|
||||
list_for_each_entry_safe(cb, tmp, &resource->dma_fence_callbacks, node) {
|
||||
/* Cancel callbacks that hasn't been called yet and release the
|
||||
* reference taken in dma_buf_lock_fence_add_callback().
|
||||
*/
|
||||
dma_fence_remove_callback(cb->fence, &cb->fence_cb);
|
||||
dma_fence_put(cb->fence);
|
||||
list_del(&cb->node);
|
||||
kfree(cb);
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
dma_buf_lock_fence_work(struct work_struct *pwork)
|
||||
{
|
||||
struct dma_buf_lock_resource *resource =
|
||||
container_of(pwork, struct dma_buf_lock_resource, work);
|
||||
|
||||
WARN_ON(atomic_read(&resource->fence_dep_count));
|
||||
WARN_ON(!atomic_read(&resource->locked));
|
||||
WARN_ON(!resource->exclusive);
|
||||
|
||||
mutex_lock(&dma_buf_lock_mutex);
|
||||
kref_put(&resource->refcount, dma_buf_lock_dounlock);
|
||||
mutex_unlock(&dma_buf_lock_mutex);
|
||||
}
|
||||
|
||||
static void
|
||||
#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE)
|
||||
dma_buf_lock_fence_callback(struct fence *fence, struct fence_cb *cb)
|
||||
#else
|
||||
dma_buf_lock_fence_callback(struct dma_fence *fence, struct dma_fence_cb *cb)
|
||||
#endif
|
||||
{
|
||||
struct dma_buf_lock_fence_cb *dma_buf_lock_cb = container_of(cb,
|
||||
struct dma_buf_lock_fence_cb,
|
||||
fence_cb);
|
||||
struct dma_buf_lock_resource *resource = dma_buf_lock_cb->res;
|
||||
|
||||
#if DMA_BUF_LOCK_DEBUG
|
||||
pr_debug("%s\n", __func__);
|
||||
#endif
|
||||
|
||||
/* Callback function will be invoked in atomic context. */
|
||||
|
||||
if (atomic_dec_and_test(&resource->fence_dep_count)) {
|
||||
atomic_set(&resource->locked, 1);
|
||||
wake_up(&resource->wait);
|
||||
|
||||
if (resource->exclusive)
|
||||
/* Warn if the work was already queued */
|
||||
WARN_ON(!schedule_work(&resource->work));
|
||||
}
|
||||
}
|
||||
|
||||
#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE)
|
||||
static int
|
||||
dma_buf_lock_fence_add_callback(struct dma_buf_lock_resource *resource,
|
||||
struct fence *fence,
|
||||
fence_func_t callback)
|
||||
#else
|
||||
static int
|
||||
dma_buf_lock_fence_add_callback(struct dma_buf_lock_resource *resource,
|
||||
struct dma_fence *fence,
|
||||
dma_fence_func_t callback)
|
||||
#endif
|
||||
{
|
||||
int err = 0;
|
||||
struct dma_buf_lock_fence_cb *fence_cb;
|
||||
|
||||
if (!fence)
|
||||
return -EINVAL;
|
||||
|
||||
fence_cb = kmalloc(sizeof(*fence_cb), GFP_KERNEL);
|
||||
if (!fence_cb)
|
||||
return -ENOMEM;
|
||||
|
||||
fence_cb->fence = fence;
|
||||
fence_cb->res = resource;
|
||||
INIT_LIST_HEAD(&fence_cb->node);
|
||||
|
||||
err = dma_fence_add_callback(fence, &fence_cb->fence_cb,
|
||||
callback);
|
||||
|
||||
if (err == -ENOENT) {
|
||||
/* Fence signaled, get the completion result */
|
||||
err = dma_fence_get_status(fence);
|
||||
|
||||
/* remap success completion to err code */
|
||||
if (err == 1)
|
||||
err = 0;
|
||||
|
||||
kfree(fence_cb);
|
||||
} else if (err) {
|
||||
kfree(fence_cb);
|
||||
} else {
|
||||
/*
|
||||
* Get reference to fence that will be kept until callback gets
|
||||
* cleaned up in dma_buf_lock_fence_free_callbacks().
|
||||
*/
|
||||
dma_fence_get(fence);
|
||||
atomic_inc(&resource->fence_dep_count);
|
||||
/* Add callback to resource's list of callbacks */
|
||||
list_add(&fence_cb->node, &resource->dma_fence_callbacks);
|
||||
}
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
#if (KERNEL_VERSION(5, 4, 0) > LINUX_VERSION_CODE)
|
||||
static int
|
||||
dma_buf_lock_add_fence_reservation_callback(struct dma_buf_lock_resource *resource,
|
||||
struct reservation_object *resv,
|
||||
bool exclusive)
|
||||
#else
|
||||
static int
|
||||
dma_buf_lock_add_fence_reservation_callback(struct dma_buf_lock_resource *resource,
|
||||
struct dma_resv *resv,
|
||||
bool exclusive)
|
||||
#endif
|
||||
{
|
||||
#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE)
|
||||
struct fence *excl_fence = NULL;
|
||||
struct fence **shared_fences = NULL;
|
||||
#else
|
||||
struct dma_fence *excl_fence = NULL;
|
||||
struct dma_fence **shared_fences = NULL;
|
||||
#endif
|
||||
unsigned int shared_count = 0;
|
||||
int err, i;
|
||||
|
||||
#if (KERNEL_VERSION(5, 4, 0) > LINUX_VERSION_CODE)
|
||||
err = reservation_object_get_fences_rcu(
|
||||
#elif (KERNEL_VERSION(5, 14, 0) > LINUX_VERSION_CODE)
|
||||
err = dma_resv_get_fences_rcu(
|
||||
#else
|
||||
err = dma_resv_get_fences(
|
||||
#endif
|
||||
resv,
|
||||
&excl_fence,
|
||||
&shared_count,
|
||||
&shared_fences);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
if (excl_fence) {
|
||||
err = dma_buf_lock_fence_add_callback(resource,
|
||||
excl_fence,
|
||||
dma_buf_lock_fence_callback);
|
||||
|
||||
/* Release our reference, taken by reservation_object_get_fences_rcu(),
|
||||
* to the fence. We have set up our callback (if that was possible),
|
||||
* and it's the fence's owner is responsible for singling the fence
|
||||
* before allowing it to disappear.
|
||||
*/
|
||||
dma_fence_put(excl_fence);
|
||||
|
||||
if (err)
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (exclusive) {
|
||||
for (i = 0; i < shared_count; i++) {
|
||||
err = dma_buf_lock_fence_add_callback(resource,
|
||||
shared_fences[i],
|
||||
dma_buf_lock_fence_callback);
|
||||
if (err)
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
|
||||
/* Release all our references to the shared fences, taken by
|
||||
* reservation_object_get_fences_rcu(). We have set up our callback (if
|
||||
* that was possible), and it's the fence's owner is responsible for
|
||||
* signaling the fence before allowing it to disappear.
|
||||
*/
|
||||
out:
|
||||
for (i = 0; i < shared_count; i++)
|
||||
dma_fence_put(shared_fences[i]);
|
||||
kfree(shared_fences);
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
static void
|
||||
dma_buf_lock_release_fence_reservation(struct dma_buf_lock_resource *resource,
|
||||
struct ww_acquire_ctx *ctx)
|
||||
{
|
||||
unsigned int r;
|
||||
|
||||
for (r = 0; r < resource->count; r++)
|
||||
ww_mutex_unlock(&resource->dma_bufs[r]->resv->lock);
|
||||
ww_acquire_fini(ctx);
|
||||
}
|
||||
|
||||
static int
|
||||
dma_buf_lock_acquire_fence_reservation(struct dma_buf_lock_resource *resource,
|
||||
struct ww_acquire_ctx *ctx)
|
||||
{
|
||||
#if (KERNEL_VERSION(5, 4, 0) > LINUX_VERSION_CODE)
|
||||
struct reservation_object *content_resv = NULL;
|
||||
#else
|
||||
struct dma_resv *content_resv = NULL;
|
||||
#endif
|
||||
unsigned int content_resv_idx = 0;
|
||||
unsigned int r;
|
||||
int err = 0;
|
||||
|
||||
ww_acquire_init(ctx, &reservation_ww_class);
|
||||
|
||||
retry:
|
||||
for (r = 0; r < resource->count; r++) {
|
||||
if (resource->dma_bufs[r]->resv == content_resv) {
|
||||
content_resv = NULL;
|
||||
continue;
|
||||
}
|
||||
|
||||
err = ww_mutex_lock(&resource->dma_bufs[r]->resv->lock, ctx);
|
||||
if (err)
|
||||
goto error;
|
||||
}
|
||||
|
||||
ww_acquire_done(ctx);
|
||||
return err;
|
||||
|
||||
error:
|
||||
content_resv_idx = r;
|
||||
|
||||
/* Unlock the locked one ones */
|
||||
while (r--)
|
||||
ww_mutex_unlock(&resource->dma_bufs[r]->resv->lock);
|
||||
|
||||
if (content_resv)
|
||||
ww_mutex_unlock(&content_resv->lock);
|
||||
|
||||
/* If we deadlock try with lock_slow and retry */
|
||||
if (err == -EDEADLK) {
|
||||
#if DMA_BUF_LOCK_DEBUG
|
||||
pr_debug("deadlock at dma_buf fd %i\n",
|
||||
resource->list_of_dma_buf_fds[content_resv_idx]);
|
||||
#endif
|
||||
content_resv = resource->dma_bufs[content_resv_idx]->resv;
|
||||
ww_mutex_lock_slow(&content_resv->lock, ctx);
|
||||
goto retry;
|
||||
}
|
||||
|
||||
/* If we are here the function failed */
|
||||
ww_acquire_fini(ctx);
|
||||
return err;
|
||||
}
|
||||
|
||||
static int dma_buf_lock_handle_release(struct inode *inode, struct file *file)
|
||||
{
|
||||
struct dma_buf_lock_resource *resource;
|
||||
|
||||
if (!is_dma_buf_lock_file(file))
|
||||
return -EINVAL;
|
||||
|
||||
resource = file->private_data;
|
||||
#if DMA_BUF_LOCK_DEBUG
|
||||
pr_debug("%s\n", __func__);
|
||||
#endif
|
||||
mutex_lock(&dma_buf_lock_mutex);
|
||||
kref_put(&resource->refcount, dma_buf_lock_dounlock);
|
||||
mutex_unlock(&dma_buf_lock_mutex);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static __poll_t dma_buf_lock_handle_poll(struct file *file, poll_table *wait)
|
||||
{
|
||||
struct dma_buf_lock_resource *resource;
|
||||
unsigned int ret = 0;
|
||||
|
||||
if (!is_dma_buf_lock_file(file)) {
|
||||
#if (KERNEL_VERSION(4, 19, 0) > LINUX_VERSION_CODE)
|
||||
return POLLERR;
|
||||
#else
|
||||
return EPOLLERR;
|
||||
#endif
|
||||
}
|
||||
|
||||
resource = file->private_data;
|
||||
#if DMA_BUF_LOCK_DEBUG
|
||||
pr_debug("%s\n", __func__);
|
||||
#endif
|
||||
if (atomic_read(&resource->locked) == 1) {
|
||||
/* Resources have been locked */
|
||||
#if (KERNEL_VERSION(4, 19, 0) > LINUX_VERSION_CODE)
|
||||
ret = POLLIN | POLLRDNORM;
|
||||
if (resource->exclusive)
|
||||
ret |= POLLOUT | POLLWRNORM;
|
||||
#else
|
||||
ret = EPOLLIN | EPOLLRDNORM;
|
||||
if (resource->exclusive)
|
||||
ret |= EPOLLOUT | EPOLLWRNORM;
|
||||
#endif
|
||||
} else {
|
||||
if (!poll_does_not_wait(wait))
|
||||
poll_wait(file, &resource->wait, wait);
|
||||
}
|
||||
#if DMA_BUF_LOCK_DEBUG
|
||||
pr_debug("%s : return %i\n", __func__, ret);
|
||||
#endif
|
||||
return ret;
|
||||
}
|
||||
|
||||
static const struct file_operations dma_buf_lock_handle_fops = {
|
||||
.owner = THIS_MODULE,
|
||||
.release = dma_buf_lock_handle_release,
|
||||
.poll = dma_buf_lock_handle_poll,
|
||||
};
|
||||
|
||||
/*
|
||||
* is_dma_buf_lock_file - Check if struct file* is associated with dma_buf_lock
|
||||
*/
|
||||
static inline int is_dma_buf_lock_file(struct file *file)
|
||||
{
|
||||
return file->f_op == &dma_buf_lock_handle_fops;
|
||||
}
|
||||
|
||||
/*
|
||||
* Start requested lock.
|
||||
*
|
||||
* Allocates required memory, copies dma_buf_fd list from userspace,
|
||||
* acquires related reservation objects, and starts the lock.
|
||||
*/
|
||||
static int dma_buf_lock_dolock(struct dma_buf_lock_k_request *request)
|
||||
{
|
||||
struct dma_buf_lock_resource *resource;
|
||||
struct ww_acquire_ctx ww_ctx;
|
||||
struct file *file;
|
||||
int size;
|
||||
int fd;
|
||||
int i;
|
||||
int ret;
|
||||
int error;
|
||||
|
||||
if (request->list_of_dma_buf_fds == NULL)
|
||||
return -EINVAL;
|
||||
if (request->count <= 0)
|
||||
return -EINVAL;
|
||||
if (request->count > DMA_BUF_LOCK_BUF_MAX)
|
||||
return -EINVAL;
|
||||
if (request->exclusive != DMA_BUF_LOCK_NONEXCLUSIVE &&
|
||||
request->exclusive != DMA_BUF_LOCK_EXCLUSIVE)
|
||||
return -EINVAL;
|
||||
|
||||
resource = kzalloc(sizeof(*resource), GFP_KERNEL);
|
||||
if (resource == NULL)
|
||||
return -ENOMEM;
|
||||
|
||||
atomic_set(&resource->locked, 0);
|
||||
kref_init(&resource->refcount);
|
||||
INIT_LIST_HEAD(&resource->link);
|
||||
INIT_WORK(&resource->work, dma_buf_lock_fence_work);
|
||||
resource->count = request->count;
|
||||
|
||||
/* Allocate space to store dma_buf_fds received from user space */
|
||||
size = request->count * sizeof(int);
|
||||
resource->list_of_dma_buf_fds = kmalloc(size, GFP_KERNEL);
|
||||
|
||||
if (resource->list_of_dma_buf_fds == NULL) {
|
||||
kfree(resource);
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
/* Allocate space to store dma_buf pointers associated with dma_buf_fds */
|
||||
size = sizeof(struct dma_buf *) * request->count;
|
||||
resource->dma_bufs = kmalloc(size, GFP_KERNEL);
|
||||
|
||||
if (resource->dma_bufs == NULL) {
|
||||
kfree(resource->list_of_dma_buf_fds);
|
||||
kfree(resource);
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
/* Copy requested list of dma_buf_fds from user space */
|
||||
size = request->count * sizeof(int);
|
||||
if (copy_from_user(resource->list_of_dma_buf_fds,
|
||||
(void __user *)request->list_of_dma_buf_fds,
|
||||
size) != 0) {
|
||||
kfree(resource->list_of_dma_buf_fds);
|
||||
kfree(resource->dma_bufs);
|
||||
kfree(resource);
|
||||
return -ENOMEM;
|
||||
}
|
||||
#if DMA_BUF_LOCK_DEBUG
|
||||
for (i = 0; i < request->count; i++)
|
||||
pr_debug("dma_buf %i = %X\n", i, resource->list_of_dma_buf_fds[i]);
|
||||
#endif
|
||||
|
||||
/* Initialize the fence associated with dma_buf_lock resource */
|
||||
dma_buf_lock_fence_init(resource);
|
||||
|
||||
INIT_LIST_HEAD(&resource->dma_fence_callbacks);
|
||||
|
||||
atomic_set(&resource->fence_dep_count, DMA_BUF_LOCK_INIT_BIAS);
|
||||
|
||||
/* Add resource to global list */
|
||||
mutex_lock(&dma_buf_lock_mutex);
|
||||
|
||||
list_add(&resource->link, &dma_buf_lock_resource_list);
|
||||
|
||||
mutex_unlock(&dma_buf_lock_mutex);
|
||||
|
||||
for (i = 0; i < request->count; i++) {
|
||||
/* Convert fd into dma_buf structure */
|
||||
resource->dma_bufs[i] = dma_buf_get(resource->list_of_dma_buf_fds[i]);
|
||||
|
||||
if (IS_ERR_VALUE(PTR_ERR(resource->dma_bufs[i]))) {
|
||||
mutex_lock(&dma_buf_lock_mutex);
|
||||
kref_put(&resource->refcount, dma_buf_lock_dounlock);
|
||||
mutex_unlock(&dma_buf_lock_mutex);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
/*Check the reservation object associated with dma_buf */
|
||||
if (resource->dma_bufs[i]->resv == NULL) {
|
||||
mutex_lock(&dma_buf_lock_mutex);
|
||||
kref_put(&resource->refcount, dma_buf_lock_dounlock);
|
||||
mutex_unlock(&dma_buf_lock_mutex);
|
||||
return -EINVAL;
|
||||
}
|
||||
#if DMA_BUF_LOCK_DEBUG
|
||||
pr_debug("%s : dma_buf_fd %i dma_buf %p dma_fence reservation %p\n",
|
||||
__func__, resource->list_of_dma_buf_fds[i], resource->dma_bufs[i], resource->dma_bufs[i]->resv);
|
||||
#endif
|
||||
}
|
||||
|
||||
init_waitqueue_head(&resource->wait);
|
||||
|
||||
kref_get(&resource->refcount);
|
||||
|
||||
error = get_unused_fd_flags(0);
|
||||
if (error < 0)
|
||||
return error;
|
||||
|
||||
fd = error;
|
||||
|
||||
file = anon_inode_getfile("dma_buf_lock", &dma_buf_lock_handle_fops, (void *)resource, 0);
|
||||
|
||||
if (IS_ERR(file)) {
|
||||
put_unused_fd(fd);
|
||||
mutex_lock(&dma_buf_lock_mutex);
|
||||
kref_put(&resource->refcount, dma_buf_lock_dounlock);
|
||||
kref_put(&resource->refcount, dma_buf_lock_dounlock);
|
||||
mutex_unlock(&dma_buf_lock_mutex);
|
||||
return PTR_ERR(file);
|
||||
}
|
||||
|
||||
resource->exclusive = request->exclusive;
|
||||
|
||||
/* Start locking process */
|
||||
ret = dma_buf_lock_acquire_fence_reservation(resource, &ww_ctx);
|
||||
if (ret) {
|
||||
#if DMA_BUF_LOCK_DEBUG
|
||||
pr_debug("%s : Error %d locking reservations.\n", __func__, ret);
|
||||
#endif
|
||||
put_unused_fd(fd);
|
||||
mutex_lock(&dma_buf_lock_mutex);
|
||||
kref_put(&resource->refcount, dma_buf_lock_dounlock);
|
||||
kref_put(&resource->refcount, dma_buf_lock_dounlock);
|
||||
mutex_unlock(&dma_buf_lock_mutex);
|
||||
return ret;
|
||||
}
|
||||
|
||||
/* Take an extra reference for exclusive access, which will be dropped
|
||||
* once the pre-existing fences attached to dma-buf resources, for which
|
||||
* we have commited for exclusive access, are signaled.
|
||||
* At a given time there can be only one exclusive fence attached to a
|
||||
* reservation object, so the new exclusive fence replaces the original
|
||||
* fence and the future sync is done against the new fence which is
|
||||
* supposed to be signaled only after the original fence was signaled.
|
||||
* If the new exclusive fence is signaled prematurely then the resources
|
||||
* would become available for new access while they are already being
|
||||
* written to by the original owner.
|
||||
*/
|
||||
if (resource->exclusive)
|
||||
kref_get(&resource->refcount);
|
||||
|
||||
for (i = 0; i < request->count; i++) {
|
||||
#if (KERNEL_VERSION(5, 4, 0) > LINUX_VERSION_CODE)
|
||||
struct reservation_object *resv = resource->dma_bufs[i]->resv;
|
||||
#else
|
||||
struct dma_resv *resv = resource->dma_bufs[i]->resv;
|
||||
#endif
|
||||
if (!test_bit(i, &resource->exclusive)) {
|
||||
|
||||
#if (KERNEL_VERSION(5, 4, 0) > LINUX_VERSION_CODE)
|
||||
ret = reservation_object_reserve_shared(resv);
|
||||
#else
|
||||
ret = dma_resv_reserve_shared(resv, 0);
|
||||
#endif
|
||||
if (ret) {
|
||||
#if DMA_BUF_LOCK_DEBUG
|
||||
pr_debug("%s : Error %d reserving space for shared fence.\n", __func__, ret);
|
||||
#endif
|
||||
break;
|
||||
}
|
||||
|
||||
ret = dma_buf_lock_add_fence_reservation_callback(resource,
|
||||
resv,
|
||||
false);
|
||||
if (ret) {
|
||||
#if DMA_BUF_LOCK_DEBUG
|
||||
pr_debug("%s : Error %d adding reservation to callback.\n", __func__, ret);
|
||||
#endif
|
||||
break;
|
||||
}
|
||||
|
||||
#if (KERNEL_VERSION(5, 4, 0) > LINUX_VERSION_CODE)
|
||||
reservation_object_add_shared_fence(resv, &resource->fence);
|
||||
#else
|
||||
dma_resv_add_shared_fence(resv, &resource->fence);
|
||||
#endif
|
||||
} else {
|
||||
ret = dma_buf_lock_add_fence_reservation_callback(resource, resv, true);
|
||||
if (ret) {
|
||||
#if DMA_BUF_LOCK_DEBUG
|
||||
pr_debug("%s : Error %d adding reservation to callback.\n", __func__, ret);
|
||||
#endif
|
||||
break;
|
||||
}
|
||||
|
||||
#if (KERNEL_VERSION(5, 4, 0) > LINUX_VERSION_CODE)
|
||||
reservation_object_add_excl_fence(resv, &resource->fence);
|
||||
#else
|
||||
dma_resv_add_excl_fence(resv, &resource->fence);
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
dma_buf_lock_release_fence_reservation(resource, &ww_ctx);
|
||||
|
||||
/* Test if the callbacks were already triggered */
|
||||
if (!atomic_sub_return(DMA_BUF_LOCK_INIT_BIAS, &resource->fence_dep_count)) {
|
||||
atomic_set(&resource->locked, 1);
|
||||
|
||||
/* Drop the extra reference taken for exclusive access */
|
||||
if (resource->exclusive)
|
||||
dma_buf_lock_fence_work(&resource->work);
|
||||
}
|
||||
|
||||
if (IS_ERR_VALUE((unsigned long)ret)) {
|
||||
put_unused_fd(fd);
|
||||
|
||||
mutex_lock(&dma_buf_lock_mutex);
|
||||
kref_put(&resource->refcount, dma_buf_lock_dounlock);
|
||||
kref_put(&resource->refcount, dma_buf_lock_dounlock);
|
||||
mutex_unlock(&dma_buf_lock_mutex);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
#if DMA_BUF_LOCK_DEBUG
|
||||
pr_debug("%s : complete\n", __func__);
|
||||
#endif
|
||||
mutex_lock(&dma_buf_lock_mutex);
|
||||
kref_put(&resource->refcount, dma_buf_lock_dounlock);
|
||||
mutex_unlock(&dma_buf_lock_mutex);
|
||||
|
||||
/* Installing the fd is deferred to the very last operation before return
|
||||
* to avoid allowing userspace to close it during the setup.
|
||||
*/
|
||||
fd_install(fd, file);
|
||||
return fd;
|
||||
}
|
||||
|
||||
static void dma_buf_lock_dounlock(struct kref *ref)
|
||||
{
|
||||
int i;
|
||||
struct dma_buf_lock_resource *resource = container_of(ref, struct dma_buf_lock_resource, refcount);
|
||||
|
||||
atomic_set(&resource->locked, 0);
|
||||
|
||||
/* Signal the resource's fence. */
|
||||
dma_fence_signal(&resource->fence);
|
||||
|
||||
dma_buf_lock_fence_free_callbacks(resource);
|
||||
|
||||
list_del(&resource->link);
|
||||
|
||||
for (i = 0; i < resource->count; i++) {
|
||||
if (resource->dma_bufs[i])
|
||||
dma_buf_put(resource->dma_bufs[i]);
|
||||
}
|
||||
|
||||
kfree(resource->dma_bufs);
|
||||
kfree(resource->list_of_dma_buf_fds);
|
||||
dma_fence_put(&resource->fence);
|
||||
}
|
||||
|
||||
static int __init dma_buf_lock_init(void)
|
||||
{
|
||||
int err;
|
||||
#if DMA_BUF_LOCK_DEBUG
|
||||
pr_debug("%s\n", __func__);
|
||||
#endif
|
||||
err = alloc_chrdev_region(&dma_buf_lock_dev, 0, 1, dma_buf_lock_dev_name);
|
||||
|
||||
if (err == 0) {
|
||||
cdev_init(&dma_buf_lock_cdev, &dma_buf_lock_fops);
|
||||
|
||||
err = cdev_add(&dma_buf_lock_cdev, dma_buf_lock_dev, 1);
|
||||
|
||||
if (err == 0) {
|
||||
dma_buf_lock_class = class_create(THIS_MODULE, dma_buf_lock_dev_name);
|
||||
if (IS_ERR(dma_buf_lock_class))
|
||||
err = PTR_ERR(dma_buf_lock_class);
|
||||
else {
|
||||
struct device *mdev = device_create(
|
||||
dma_buf_lock_class, NULL, dma_buf_lock_dev,
|
||||
NULL, "%s", dma_buf_lock_dev_name);
|
||||
if (!IS_ERR(mdev))
|
||||
return 0;
|
||||
|
||||
err = PTR_ERR(mdev);
|
||||
class_destroy(dma_buf_lock_class);
|
||||
}
|
||||
cdev_del(&dma_buf_lock_cdev);
|
||||
}
|
||||
|
||||
unregister_chrdev_region(dma_buf_lock_dev, 1);
|
||||
}
|
||||
#if DMA_BUF_LOCK_DEBUG
|
||||
pr_debug("%s failed\n", __func__);
|
||||
#endif
|
||||
return err;
|
||||
}
|
||||
|
||||
static void __exit dma_buf_lock_exit(void)
|
||||
{
|
||||
#if DMA_BUF_LOCK_DEBUG
|
||||
pr_debug("%s\n", __func__);
|
||||
#endif
|
||||
|
||||
/* Unlock all outstanding references */
|
||||
while (1) {
|
||||
struct dma_buf_lock_resource *resource;
|
||||
|
||||
mutex_lock(&dma_buf_lock_mutex);
|
||||
if (list_empty(&dma_buf_lock_resource_list)) {
|
||||
mutex_unlock(&dma_buf_lock_mutex);
|
||||
break;
|
||||
}
|
||||
|
||||
resource = list_entry(dma_buf_lock_resource_list.next,
|
||||
struct dma_buf_lock_resource, link);
|
||||
|
||||
kref_put(&resource->refcount, dma_buf_lock_dounlock);
|
||||
mutex_unlock(&dma_buf_lock_mutex);
|
||||
}
|
||||
|
||||
device_destroy(dma_buf_lock_class, dma_buf_lock_dev);
|
||||
|
||||
class_destroy(dma_buf_lock_class);
|
||||
|
||||
cdev_del(&dma_buf_lock_cdev);
|
||||
|
||||
unregister_chrdev_region(dma_buf_lock_dev, 1);
|
||||
}
|
||||
|
||||
#if defined(HAVE_UNLOCKED_IOCTL) || defined(HAVE_COMPAT_IOCTL) || ((KERNEL_VERSION(5, 9, 0) <= LINUX_VERSION_CODE))
|
||||
static long dma_buf_lock_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
|
||||
#else
|
||||
static int dma_buf_lock_ioctl(struct inode *inode, struct file *filp, unsigned int cmd, unsigned long arg)
|
||||
#endif
|
||||
{
|
||||
struct dma_buf_lock_k_request request;
|
||||
int size = _IOC_SIZE(cmd);
|
||||
|
||||
if (_IOC_TYPE(cmd) != DMA_BUF_LOCK_IOC_MAGIC)
|
||||
return -ENOTTY;
|
||||
if ((_IOC_NR(cmd) < DMA_BUF_LOCK_IOC_MINNR) || (_IOC_NR(cmd) > DMA_BUF_LOCK_IOC_MAXNR))
|
||||
return -ENOTTY;
|
||||
|
||||
switch (cmd) {
|
||||
case DMA_BUF_LOCK_FUNC_LOCK_ASYNC:
|
||||
if (size != sizeof(request))
|
||||
return -ENOTTY;
|
||||
if (copy_from_user(&request, (void __user *)arg, size))
|
||||
return -EFAULT;
|
||||
#if DMA_BUF_LOCK_DEBUG
|
||||
pr_debug("DMA_BUF_LOCK_FUNC_LOCK_ASYNC - %i\n", request.count);
|
||||
#endif
|
||||
return dma_buf_lock_dolock(&request);
|
||||
}
|
||||
|
||||
return -ENOTTY;
|
||||
}
|
||||
|
||||
module_init(dma_buf_lock_init);
|
||||
module_exit(dma_buf_lock_exit);
|
||||
|
||||
MODULE_LICENSE("GPL");
|
||||
MODULE_INFO(import_ns, "DMA_BUF");
|
||||
|
|
@ -1,7 +1,7 @@
|
|||
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
|
||||
/*
|
||||
*
|
||||
* (C) COPYRIGHT 2017, 2020-2021 ARM Limited. All rights reserved.
|
||||
* (C) COPYRIGHT 2017, 2020-2022 ARM Limited. All rights reserved.
|
||||
*
|
||||
* This program is free software and is provided to you under the terms of the
|
||||
* GNU General Public License version 2 as published by the Free Software
|
||||
|
|
@ -22,7 +22,7 @@
|
|||
bob_kernel_module {
|
||||
name: "dma-buf-test-exporter",
|
||||
defaults: [
|
||||
"kernel_defaults"
|
||||
"kernel_defaults",
|
||||
],
|
||||
srcs: [
|
||||
"Kbuild",
|
||||
|
|
|
|||
|
|
@ -19,7 +19,7 @@
|
|||
*
|
||||
*/
|
||||
|
||||
#include <linux/dma-buf-test-exporter.h>
|
||||
#include <uapi/base/arm/dma_buf_test_exporter/dma-buf-test-exporter.h>
|
||||
#include <linux/dma-buf.h>
|
||||
#include <linux/miscdevice.h>
|
||||
#include <linux/slab.h>
|
||||
|
|
@ -32,6 +32,9 @@
|
|||
#include <linux/highmem.h>
|
||||
#include <linux/dma-mapping.h>
|
||||
|
||||
#define DMA_BUF_TE_VER_MAJOR 1
|
||||
#define DMA_BUF_TE_VER_MINOR 0
|
||||
|
||||
/* Maximum size allowed in a single DMA_BUF_TE_ALLOC call */
|
||||
#define DMA_BUF_TE_ALLOC_MAX_SIZE ((8ull << 30) >> PAGE_SHIFT) /* 8 GB */
|
||||
|
||||
|
|
|
|||
|
|
@ -1,7 +1,7 @@
|
|||
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
|
||||
/*
|
||||
*
|
||||
* (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved.
|
||||
* (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved.
|
||||
*
|
||||
* This program is free software and is provided to you under the terms of the
|
||||
* GNU General Public License version 2 as published by the Free Software
|
||||
|
|
@ -22,7 +22,7 @@
|
|||
bob_kernel_module {
|
||||
name: "memory_group_manager",
|
||||
defaults: [
|
||||
"kernel_defaults"
|
||||
"kernel_defaults",
|
||||
],
|
||||
srcs: [
|
||||
"Kbuild",
|
||||
|
|
|
|||
|
|
@ -265,8 +265,8 @@ static struct page *example_mgm_alloc_page(
|
|||
struct mgm_groups *const data = mgm_dev->data;
|
||||
struct page *p;
|
||||
|
||||
dev_dbg(data->dev, "%s(mgm_dev=%p, group_id=%d gfp_mask=0x%x order=%u\n",
|
||||
__func__, (void *)mgm_dev, group_id, gfp_mask, order);
|
||||
dev_dbg(data->dev, "%s(mgm_dev=%pK, group_id=%d gfp_mask=0x%x order=%u\n", __func__,
|
||||
(void *)mgm_dev, group_id, gfp_mask, order);
|
||||
|
||||
if (WARN_ON(group_id < 0) ||
|
||||
WARN_ON(group_id >= MEMORY_GROUP_MANAGER_NR_GROUPS))
|
||||
|
|
@ -291,8 +291,8 @@ static void example_mgm_free_page(
|
|||
{
|
||||
struct mgm_groups *const data = mgm_dev->data;
|
||||
|
||||
dev_dbg(data->dev, "%s(mgm_dev=%p, group_id=%d page=%p order=%u\n",
|
||||
__func__, (void *)mgm_dev, group_id, (void *)page, order);
|
||||
dev_dbg(data->dev, "%s(mgm_dev=%pK, group_id=%d page=%pK order=%u\n", __func__,
|
||||
(void *)mgm_dev, group_id, (void *)page, order);
|
||||
|
||||
if (WARN_ON(group_id < 0) ||
|
||||
WARN_ON(group_id >= MEMORY_GROUP_MANAGER_NR_GROUPS))
|
||||
|
|
@ -309,9 +309,8 @@ static int example_mgm_get_import_memory_id(
|
|||
{
|
||||
struct mgm_groups *const data = mgm_dev->data;
|
||||
|
||||
dev_dbg(data->dev, "%s(mgm_dev=%p, import_data=%p (type=%d)\n",
|
||||
__func__, (void *)mgm_dev, (void *)import_data,
|
||||
(int)import_data->type);
|
||||
dev_dbg(data->dev, "%s(mgm_dev=%pK, import_data=%pK (type=%d)\n", __func__, (void *)mgm_dev,
|
||||
(void *)import_data, (int)import_data->type);
|
||||
|
||||
if (!WARN_ON(!import_data)) {
|
||||
WARN_ON(!import_data->u.dma_buf);
|
||||
|
|
@ -329,9 +328,8 @@ static u64 example_mgm_update_gpu_pte(
|
|||
{
|
||||
struct mgm_groups *const data = mgm_dev->data;
|
||||
|
||||
dev_dbg(data->dev,
|
||||
"%s(mgm_dev=%p, group_id=%d, mmu_level=%d, pte=0x%llx)\n",
|
||||
__func__, (void *)mgm_dev, group_id, mmu_level, pte);
|
||||
dev_dbg(data->dev, "%s(mgm_dev=%pK, group_id=%d, mmu_level=%d, pte=0x%llx)\n", __func__,
|
||||
(void *)mgm_dev, group_id, mmu_level, pte);
|
||||
|
||||
if (WARN_ON(group_id < 0) ||
|
||||
WARN_ON(group_id >= MEMORY_GROUP_MANAGER_NR_GROUPS))
|
||||
|
|
@ -367,9 +365,9 @@ static vm_fault_t example_mgm_vmf_insert_pfn_prot(
|
|||
vm_fault_t fault;
|
||||
|
||||
dev_dbg(data->dev,
|
||||
"%s(mgm_dev=%p, group_id=%d, vma=%p, addr=0x%lx, pfn=0x%lx, prot=0x%llx)\n",
|
||||
"%s(mgm_dev=%pK, group_id=%d, vma=%pK, addr=0x%lx, pfn=0x%lx, prot=0x%llx)\n",
|
||||
__func__, (void *)mgm_dev, group_id, (void *)vma, addr, pfn,
|
||||
(unsigned long long) pgprot_val(prot));
|
||||
(unsigned long long)pgprot_val(prot));
|
||||
|
||||
if (WARN_ON(group_id < 0) ||
|
||||
WARN_ON(group_id >= MEMORY_GROUP_MANAGER_NR_GROUPS))
|
||||
|
|
|
|||
|
|
@ -1,7 +1,7 @@
|
|||
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
|
||||
/*
|
||||
*
|
||||
* (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved.
|
||||
* (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved.
|
||||
*
|
||||
* This program is free software and is provided to you under the terms of the
|
||||
* GNU General Public License version 2 as published by the Free Software
|
||||
|
|
@ -22,7 +22,7 @@
|
|||
bob_kernel_module {
|
||||
name: "protected_memory_allocator",
|
||||
defaults: [
|
||||
"kernel_defaults"
|
||||
"kernel_defaults",
|
||||
],
|
||||
srcs: [
|
||||
"Kbuild",
|
||||
|
|
|
|||
|
|
@ -1,6 +1,6 @@
|
|||
# SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
|
||||
#
|
||||
# (C) COPYRIGHT 2012-2021 ARM Limited. All rights reserved.
|
||||
# (C) COPYRIGHT 2012-2022 ARM Limited. All rights reserved.
|
||||
#
|
||||
# This program is free software and is provided to you under the terms of the
|
||||
# GNU General Public License version 2 as published by the Free Software
|
||||
|
|
@ -59,10 +59,8 @@ ifeq ($(CONFIG_MALI_PRFCNT_SET_SELECT_VIA_DEBUG_FS), y)
|
|||
endif
|
||||
|
||||
ifeq ($(CONFIG_MALI_BIFROST_FENCE_DEBUG), y)
|
||||
ifneq ($(CONFIG_SYNC), y)
|
||||
ifneq ($(CONFIG_SYNC_FILE), y)
|
||||
$(error CONFIG_MALI_BIFROST_FENCE_DEBUG depends on CONFIG_SYNC || CONFIG_SYNC_FILE to be set in Kernel configuration)
|
||||
endif
|
||||
ifneq ($(CONFIG_SYNC_FILE), y)
|
||||
$(error CONFIG_MALI_BIFROST_FENCE_DEBUG depends on CONFIG_SYNC_FILE to be set in Kernel configuration)
|
||||
endif
|
||||
endif
|
||||
|
||||
|
|
@ -71,7 +69,7 @@ endif
|
|||
#
|
||||
|
||||
# Driver version string which is returned to userspace via an ioctl
|
||||
MALI_RELEASE_NAME ?= '"g13p0-01eac0"'
|
||||
MALI_RELEASE_NAME ?= '"g15p0-01eac0"'
|
||||
# Set up defaults if not defined by build system
|
||||
ifeq ($(CONFIG_MALI_BIFROST_DEBUG), y)
|
||||
MALI_UNIT_TEST = 1
|
||||
|
|
@ -151,6 +149,7 @@ bifrost_kbase-y := \
|
|||
mali_kbase_cache_policy.o \
|
||||
mali_kbase_ccswe.o \
|
||||
mali_kbase_mem.o \
|
||||
mali_kbase_mem_migrate.o \
|
||||
mali_kbase_mem_pool_group.o \
|
||||
mali_kbase_native_mgm.o \
|
||||
mali_kbase_ctx_sched.o \
|
||||
|
|
@ -159,12 +158,6 @@ bifrost_kbase-y := \
|
|||
mali_kbase_config.o \
|
||||
mali_kbase_kinstr_prfcnt.o \
|
||||
mali_kbase_vinstr.o \
|
||||
mali_kbase_hwcnt.o \
|
||||
mali_kbase_hwcnt_gpu.o \
|
||||
mali_kbase_hwcnt_gpu_narrow.o \
|
||||
mali_kbase_hwcnt_types.o \
|
||||
mali_kbase_hwcnt_virtualizer.o \
|
||||
mali_kbase_hwcnt_watchdog_if_timer.o \
|
||||
mali_kbase_softjobs.o \
|
||||
mali_kbase_hw.o \
|
||||
mali_kbase_debug.o \
|
||||
|
|
@ -175,6 +168,7 @@ bifrost_kbase-y := \
|
|||
mali_kbase_disjoint_events.o \
|
||||
mali_kbase_debug_mem_view.o \
|
||||
mali_kbase_debug_mem_zones.o \
|
||||
mali_kbase_debug_mem_allocs.o \
|
||||
mali_kbase_smc.o \
|
||||
mali_kbase_mem_pool.o \
|
||||
mali_kbase_mem_pool_debugfs.o \
|
||||
|
|
@ -191,24 +185,14 @@ bifrost_kbase-$(CONFIG_DEBUG_FS) += mali_kbase_pbha_debugfs.o
|
|||
|
||||
bifrost_kbase-$(CONFIG_MALI_CINSTR_GWT) += mali_kbase_gwt.o
|
||||
|
||||
bifrost_kbase-$(CONFIG_SYNC) += \
|
||||
mali_kbase_sync_android.o \
|
||||
mali_kbase_sync_common.o
|
||||
|
||||
bifrost_kbase-$(CONFIG_SYNC_FILE) += \
|
||||
mali_kbase_fence_ops.o \
|
||||
mali_kbase_sync_file.o \
|
||||
mali_kbase_sync_common.o
|
||||
|
||||
ifeq ($(CONFIG_MALI_CSF_SUPPORT),y)
|
||||
bifrost_kbase-y += \
|
||||
mali_kbase_hwcnt_backend_csf.o \
|
||||
mali_kbase_hwcnt_backend_csf_if_fw.o
|
||||
else
|
||||
ifneq ($(CONFIG_MALI_CSF_SUPPORT),y)
|
||||
bifrost_kbase-y += \
|
||||
mali_kbase_jm.o \
|
||||
mali_kbase_hwcnt_backend_jm.o \
|
||||
mali_kbase_hwcnt_backend_jm_watchdog.o \
|
||||
mali_kbase_dummy_job_wa.o \
|
||||
mali_kbase_debug_job_fault.o \
|
||||
mali_kbase_event.o \
|
||||
|
|
@ -218,11 +202,6 @@ else
|
|||
mali_kbase_js_ctx_attr.o \
|
||||
mali_kbase_kinstr_jm.o
|
||||
|
||||
bifrost_kbase-$(CONFIG_MALI_BIFROST_DMA_FENCE) += \
|
||||
mali_kbase_fence_ops.o \
|
||||
mali_kbase_dma_fence.o \
|
||||
mali_kbase_fence.o
|
||||
|
||||
bifrost_kbase-$(CONFIG_SYNC_FILE) += \
|
||||
mali_kbase_fence_ops.o \
|
||||
mali_kbase_fence.o
|
||||
|
|
@ -236,6 +215,7 @@ INCLUDE_SUBDIR = \
|
|||
$(src)/backend/gpu/Kbuild \
|
||||
$(src)/mmu/Kbuild \
|
||||
$(src)/tl/Kbuild \
|
||||
$(src)/hwcnt/Kbuild \
|
||||
$(src)/gpu/Kbuild \
|
||||
$(src)/thirdparty/Kbuild \
|
||||
$(src)/platform/$(MALI_PLATFORM_DIR)/Kbuild
|
||||
|
|
|
|||
|
|
@ -1,6 +1,6 @@
|
|||
# SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
|
||||
#
|
||||
# (C) COPYRIGHT 2012-2021 ARM Limited. All rights reserved.
|
||||
# (C) COPYRIGHT 2012-2022 ARM Limited. All rights reserved.
|
||||
#
|
||||
# This program is free software and is provided to you under the terms of the
|
||||
# GNU General Public License version 2 as published by the Free Software
|
||||
|
|
@ -91,16 +91,6 @@ config MALI_BIFROST_ENABLE_TRACE
|
|||
Enables tracing in kbase. Trace log available through
|
||||
the "mali_trace" debugfs file, when the CONFIG_DEBUG_FS is enabled
|
||||
|
||||
config MALI_BIFROST_DMA_FENCE
|
||||
bool "Enable DMA_BUF fence support for Mali"
|
||||
depends on MALI_BIFROST
|
||||
default n
|
||||
help
|
||||
Support DMA_BUF fences for Mali.
|
||||
|
||||
This option should only be enabled if the Linux Kernel has built in
|
||||
support for DMA_BUF fences.
|
||||
|
||||
config MALI_ARBITER_SUPPORT
|
||||
bool "Enable arbiter support for Mali"
|
||||
depends on MALI_BIFROST && !MALI_CSF_SUPPORT
|
||||
|
|
@ -117,7 +107,7 @@ config MALI_DMA_BUF_MAP_ON_DEMAND
|
|||
depends on MALI_BIFROST
|
||||
default n
|
||||
help
|
||||
This option caused kbase to set up the GPU mapping of imported
|
||||
This option will cause kbase to set up the GPU mapping of imported
|
||||
dma-buf when needed to run atoms. This is the legacy behavior.
|
||||
|
||||
This is intended for testing and the option will get removed in the
|
||||
|
|
@ -237,7 +227,7 @@ config MALI_BIFROST_DEBUG
|
|||
|
||||
config MALI_BIFROST_FENCE_DEBUG
|
||||
bool "Enable debug sync fence usage"
|
||||
depends on MALI_BIFROST && MALI_BIFROST_EXPERT && (SYNC || SYNC_FILE)
|
||||
depends on MALI_BIFROST && MALI_BIFROST_EXPERT && SYNC_FILE
|
||||
default y if MALI_BIFROST_DEBUG
|
||||
help
|
||||
Select this option to enable additional checking and reporting on the
|
||||
|
|
@ -385,9 +375,6 @@ config MALI_ARBITRATION
|
|||
virtualization setup for Mali
|
||||
If unsure, say N.
|
||||
|
||||
if MALI_ARBITRATION
|
||||
source "drivers/gpu/arm/bifrost/arbitration/Kconfig"
|
||||
endif
|
||||
|
||||
# source "drivers/gpu/arm/bifrost/tests/Kconfig"
|
||||
|
||||
|
|
|
|||
|
|
@ -65,7 +65,7 @@ ifeq ($(CONFIG_MALI_BIFROST),m)
|
|||
endif
|
||||
|
||||
ifeq ($(CONFIG_XEN),y)
|
||||
ifneq ($(CONFIG_MALI_ARBITRATION), n)
|
||||
ifneq ($(CONFIG_MALI_ARBITER_SUPPORT), n)
|
||||
CONFIG_MALI_XEN ?= m
|
||||
endif
|
||||
endif
|
||||
|
|
@ -91,14 +91,10 @@ ifeq ($(CONFIG_MALI_BIFROST),m)
|
|||
CONFIG_MALI_BIFROST_ENABLE_TRACE ?= y
|
||||
CONFIG_MALI_BIFROST_SYSTEM_TRACE ?= y
|
||||
|
||||
ifeq ($(CONFIG_SYNC), y)
|
||||
ifeq ($(CONFIG_SYNC_FILE), y)
|
||||
CONFIG_MALI_BIFROST_FENCE_DEBUG ?= y
|
||||
else
|
||||
ifeq ($(CONFIG_SYNC_FILE), y)
|
||||
CONFIG_MALI_BIFROST_FENCE_DEBUG ?= y
|
||||
else
|
||||
CONFIG_MALI_BIFROST_FENCE_DEBUG = n
|
||||
endif
|
||||
CONFIG_MALI_BIFROST_FENCE_DEBUG = n
|
||||
endif
|
||||
else
|
||||
# Prevent misuse when CONFIG_MALI_BIFROST_DEBUG=n
|
||||
|
|
@ -160,7 +156,6 @@ CONFIGS := \
|
|||
CONFIG_MALI_BIFROST \
|
||||
CONFIG_MALI_CSF_SUPPORT \
|
||||
CONFIG_MALI_BIFROST_GATOR_SUPPORT \
|
||||
CONFIG_MALI_BIFROST_DMA_FENCE \
|
||||
CONFIG_MALI_ARBITER_SUPPORT \
|
||||
CONFIG_MALI_ARBITRATION \
|
||||
CONFIG_MALI_ARBITER_MODULES \
|
||||
|
|
@ -227,26 +222,47 @@ EXTRA_CFLAGS += -DCONFIG_MALI_PLATFORM_NAME=$(CONFIG_MALI_PLATFORM_NAME)
|
|||
# KBUILD_EXTRA_SYMBOLS to prevent warnings about unknown functions
|
||||
#
|
||||
|
||||
# The following were added to align with W=1 in scripts/Makefile.extrawarn
|
||||
# from the Linux source tree
|
||||
KBUILD_CFLAGS += -Wall -Werror
|
||||
|
||||
# The following were added to align with W=1 in scripts/Makefile.extrawarn
|
||||
# from the Linux source tree (v5.18.14)
|
||||
KBUILD_CFLAGS += -Wextra -Wunused -Wno-unused-parameter
|
||||
KBUILD_CFLAGS += -Wmissing-declarations
|
||||
KBUILD_CFLAGS += -Wmissing-format-attribute
|
||||
KBUILD_CFLAGS += -Wmissing-prototypes
|
||||
KBUILD_CFLAGS += -Wold-style-definition
|
||||
KBUILD_CFLAGS += -Wmissing-include-dirs
|
||||
# The -Wmissing-include-dirs cannot be enabled as the path to some of the
|
||||
# included directories change depending on whether it is an in-tree or
|
||||
# out-of-tree build.
|
||||
KBUILD_CFLAGS += $(call cc-option, -Wunused-but-set-variable)
|
||||
KBUILD_CFLAGS += $(call cc-option, -Wunused-const-variable)
|
||||
KBUILD_CFLAGS += $(call cc-option, -Wpacked-not-aligned)
|
||||
KBUILD_CFLAGS += $(call cc-option, -Wstringop-truncation)
|
||||
# The following turn off the warnings enabled by -Wextra
|
||||
KBUILD_CFLAGS += -Wno-missing-field-initializers
|
||||
KBUILD_CFLAGS += -Wno-sign-compare
|
||||
KBUILD_CFLAGS += -Wno-type-limits
|
||||
KBUILD_CFLAGS += -Wno-shift-negative-value
|
||||
# This flag is needed to avoid build errors on older kernels
|
||||
KBUILD_CFLAGS += $(call cc-option, -Wno-cast-function-type)
|
||||
|
||||
KBUILD_CPPFLAGS += -DKBUILD_EXTRA_WARN1
|
||||
|
||||
# The following were added to align with W=2 in scripts/Makefile.extrawarn
|
||||
# from the Linux source tree (v5.18.14)
|
||||
KBUILD_CFLAGS += -Wdisabled-optimization
|
||||
# The -Wshadow flag cannot be enabled unless upstream kernels are
|
||||
# patched to fix redefinitions of certain built-in functions and
|
||||
# global variables.
|
||||
KBUILD_CFLAGS += $(call cc-option, -Wlogical-op)
|
||||
KBUILD_CFLAGS += -Wmissing-field-initializers
|
||||
KBUILD_CFLAGS += -Wtype-limits
|
||||
KBUILD_CFLAGS += $(call cc-option, -Wmaybe-uninitialized)
|
||||
KBUILD_CFLAGS += $(call cc-option, -Wunused-macros)
|
||||
|
||||
KBUILD_CPPFLAGS += -DKBUILD_EXTRA_WARN2
|
||||
|
||||
# This warning is disabled to avoid build failures in some kernel versions
|
||||
KBUILD_CFLAGS += -Wno-ignored-qualifiers
|
||||
|
||||
all:
|
||||
$(MAKE) -C $(KDIR) M=$(CURDIR) $(MAKE_ARGS) EXTRA_CFLAGS="$(EXTRA_CFLAGS)" KBUILD_EXTRA_SYMBOLS="$(EXTRA_SYMBOLS)" modules
|
||||
|
||||
|
|
|
|||
|
|
@ -97,16 +97,6 @@ config MALI_BIFROST_ENABLE_TRACE
|
|||
Enables tracing in kbase. Trace log available through
|
||||
the "mali_trace" debugfs file, when the CONFIG_DEBUG_FS is enabled
|
||||
|
||||
config MALI_BIFROST_DMA_FENCE
|
||||
bool "Enable DMA_BUF fence support for Mali"
|
||||
depends on MALI_BIFROST
|
||||
default n
|
||||
help
|
||||
Support DMA_BUF fences for Mali.
|
||||
|
||||
This option should only be enabled if the Linux Kernel has built in
|
||||
support for DMA_BUF fences.
|
||||
|
||||
config MALI_ARBITER_SUPPORT
|
||||
bool "Enable arbiter support for Mali"
|
||||
depends on MALI_BIFROST && !MALI_CSF_SUPPORT
|
||||
|
|
@ -129,7 +119,7 @@ config MALI_DMA_BUF_MAP_ON_DEMAND
|
|||
default n
|
||||
default y if !DMA_BUF_SYNC_IOCTL_SUPPORTED
|
||||
help
|
||||
This option caused kbase to set up the GPU mapping of imported
|
||||
This option will cause kbase to set up the GPU mapping of imported
|
||||
dma-buf when needed to run atoms. This is the legacy behavior.
|
||||
|
||||
This is intended for testing and the option will get removed in the
|
||||
|
|
@ -157,17 +147,6 @@ menuconfig MALI_BIFROST_EXPERT
|
|||
Enabling this option and modifying the default settings may produce
|
||||
a driver with performance or other limitations.
|
||||
|
||||
config MALI_2MB_ALLOC
|
||||
bool "Attempt to allocate 2MB pages"
|
||||
depends on MALI_BIFROST && MALI_BIFROST_EXPERT
|
||||
default n
|
||||
help
|
||||
Rather than allocating all GPU memory page-by-page, attempt to
|
||||
allocate 2MB pages from the kernel. This reduces TLB pressure and
|
||||
helps to prevent memory fragmentation.
|
||||
|
||||
If in doubt, say N
|
||||
|
||||
config MALI_MEMORY_FULLY_BACKED
|
||||
bool "Enable memory fully physically-backed"
|
||||
depends on MALI_BIFROST && MALI_BIFROST_EXPERT
|
||||
|
|
@ -200,10 +179,10 @@ config MALI_FW_CORE_DUMP
|
|||
|
||||
Example:
|
||||
* To explicitly request core dump:
|
||||
echo 1 >/sys/kernel/debug/mali0/fw_core_dump
|
||||
echo 1 >/sys/kernel/debug/mali0/fw_core_dump
|
||||
* To output current core dump (after explicitly requesting a core dump,
|
||||
or kernel driver reported an internal firmware error):
|
||||
cat /sys/kernel/debug/mali0/fw_core_dump
|
||||
or kernel driver reported an internal firmware error):
|
||||
cat /sys/kernel/debug/mali0/fw_core_dump
|
||||
|
||||
choice
|
||||
prompt "Error injection level"
|
||||
|
|
@ -343,5 +322,5 @@ config MALI_HW_ERRATA_1485982_USE_CLOCK_ALTERNATIVE
|
|||
slowest clock will be selected.
|
||||
|
||||
|
||||
source "kernel/drivers/gpu/arm/midgard/arbitration/Mconfig"
|
||||
source "kernel/drivers/gpu/arm/arbitration/Mconfig"
|
||||
source "kernel/drivers/gpu/arm/midgard/tests/Mconfig"
|
||||
|
|
|
|||
|
|
@ -1,7 +1,7 @@
|
|||
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
|
||||
/*
|
||||
*
|
||||
* (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved.
|
||||
* (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved.
|
||||
*
|
||||
* This program is free software and is provided to you under the terms of the
|
||||
* GNU General Public License version 2 as published by the Free Software
|
||||
|
|
@ -28,12 +28,12 @@
|
|||
#include <tl/mali_kbase_tracepoints.h>
|
||||
#include <linux/of.h>
|
||||
#include <linux/of_platform.h>
|
||||
#include "mali_kbase_arbiter_interface.h"
|
||||
#include "linux/mali_arbiter_interface.h"
|
||||
|
||||
/* Arbiter interface version against which was implemented this module */
|
||||
#define MALI_REQUIRED_KBASE_ARBITER_INTERFACE_VERSION 5
|
||||
#if MALI_REQUIRED_KBASE_ARBITER_INTERFACE_VERSION != \
|
||||
MALI_KBASE_ARBITER_INTERFACE_VERSION
|
||||
MALI_ARBITER_INTERFACE_VERSION
|
||||
#error "Unsupported Mali Arbiter interface version."
|
||||
#endif
|
||||
|
||||
|
|
@ -205,6 +205,7 @@ int kbase_arbif_init(struct kbase_device *kbdev)
|
|||
|
||||
if (!pdev->dev.driver || !try_module_get(pdev->dev.driver->owner)) {
|
||||
dev_err(kbdev->dev, "arbiter_if driver not available\n");
|
||||
put_device(&pdev->dev);
|
||||
return -EPROBE_DEFER;
|
||||
}
|
||||
kbdev->arb.arb_dev = &pdev->dev;
|
||||
|
|
@ -212,6 +213,7 @@ int kbase_arbif_init(struct kbase_device *kbdev)
|
|||
if (!arb_if) {
|
||||
dev_err(kbdev->dev, "arbiter_if driver not ready\n");
|
||||
module_put(pdev->dev.driver->owner);
|
||||
put_device(&pdev->dev);
|
||||
return -EPROBE_DEFER;
|
||||
}
|
||||
|
||||
|
|
@ -233,6 +235,7 @@ int kbase_arbif_init(struct kbase_device *kbdev)
|
|||
if (err) {
|
||||
dev_err(&pdev->dev, "Failed to register with arbiter\n");
|
||||
module_put(pdev->dev.driver->owner);
|
||||
put_device(&pdev->dev);
|
||||
if (err != -EPROBE_DEFER)
|
||||
err = -EFAULT;
|
||||
return err;
|
||||
|
|
@ -262,8 +265,10 @@ void kbase_arbif_destroy(struct kbase_device *kbdev)
|
|||
arb_if->vm_ops.vm_arb_unregister_dev(kbdev->arb.arb_if);
|
||||
}
|
||||
kbdev->arb.arb_if = NULL;
|
||||
if (kbdev->arb.arb_dev)
|
||||
if (kbdev->arb.arb_dev) {
|
||||
module_put(kbdev->arb.arb_dev->driver->owner);
|
||||
put_device(kbdev->arb.arb_dev);
|
||||
}
|
||||
kbdev->arb.arb_dev = NULL;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -1,49 +0,0 @@
|
|||
# SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note OR MIT
|
||||
#
|
||||
# (C) COPYRIGHT 2012-2021 ARM Limited. All rights reserved.
|
||||
#
|
||||
# This program is free software and is provided to you under the terms of the
|
||||
# GNU General Public License version 2 as published by the Free Software
|
||||
# Foundation, and any use by you of this program is subject to the terms
|
||||
# of such GNU license.
|
||||
#
|
||||
# This program is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with this program; if not, you can access it online at
|
||||
# http://www.gnu.org/licenses/gpl-2.0.html.
|
||||
#
|
||||
#
|
||||
|
||||
config MALI_XEN
|
||||
tristate "Enable Xen Interface reference code"
|
||||
depends on MALI_ARBITRATION && XEN
|
||||
default n
|
||||
help
|
||||
Enables the build of xen interface modules used in the reference
|
||||
virtualization setup for Mali
|
||||
If unsure, say N.
|
||||
|
||||
config MALI_ARBITER_MODULES
|
||||
tristate "Enable mali arbiter modules"
|
||||
depends on MALI_ARBITRATION
|
||||
default y
|
||||
help
|
||||
Enables the build of the arbiter modules used in the reference
|
||||
virtualization setup for Mali
|
||||
If unsure, say N
|
||||
|
||||
config MALI_GPU_POWER_MODULES
|
||||
tristate "Enable gpu power modules"
|
||||
depends on MALI_ARBITRATION
|
||||
default y
|
||||
help
|
||||
Enables the build of the gpu power modules used in the reference
|
||||
virtualization setup for Mali
|
||||
If unsure, say N
|
||||
|
||||
|
||||
source "drivers/gpu/arm/bifrost/arbitration/ptm/Kconfig"
|
||||
|
|
@ -1,28 +0,0 @@
|
|||
# SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note OR MIT
|
||||
#
|
||||
# (C) COPYRIGHT 2021 ARM Limited. All rights reserved.
|
||||
#
|
||||
# This program is free software and is provided to you under the terms of the
|
||||
# GNU General Public License version 2 as published by the Free Software
|
||||
# Foundation, and any use by you of this program is subject to the terms
|
||||
# of such GNU license.
|
||||
#
|
||||
# This program is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with this program; if not, you can access it online at
|
||||
# http://www.gnu.org/licenses/gpl-2.0.html.
|
||||
#
|
||||
#
|
||||
|
||||
config MALI_PARTITION_MANAGER
|
||||
tristate "Enable compilation of partition manager modules"
|
||||
depends on MALI_ARBITRATION
|
||||
default n
|
||||
help
|
||||
This option enables the compilation of the partition manager
|
||||
modules used to configure the Mali-G78AE GPU.
|
||||
|
||||
|
|
@ -1,6 +1,6 @@
|
|||
# SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
|
||||
#
|
||||
# (C) COPYRIGHT 2014-2021 ARM Limited. All rights reserved.
|
||||
# (C) COPYRIGHT 2014-2022 ARM Limited. All rights reserved.
|
||||
#
|
||||
# This program is free software and is provided to you under the terms of the
|
||||
# GNU General Public License version 2 as published by the Free Software
|
||||
|
|
|
|||
|
|
@ -1,7 +1,7 @@
|
|||
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
|
||||
/*
|
||||
*
|
||||
* (C) COPYRIGHT 2014-2016, 2018, 2020-2021 ARM Limited. All rights reserved.
|
||||
* (C) COPYRIGHT 2014-2016, 2018, 2020-2022 ARM Limited. All rights reserved.
|
||||
*
|
||||
* This program is free software and is provided to you under the terms of the
|
||||
* GNU General Public License version 2 as published by the Free Software
|
||||
|
|
@ -22,12 +22,32 @@
|
|||
#include "backend/gpu/mali_kbase_cache_policy_backend.h"
|
||||
#include <device/mali_kbase_device.h>
|
||||
|
||||
/**
|
||||
* kbasep_amba_register_present() - Check AMBA_<> register is present
|
||||
* in the GPU.
|
||||
* @kbdev: Device pointer
|
||||
*
|
||||
* Note: Only for arch version 12.x.1 onwards.
|
||||
*
|
||||
* Return: true if AMBA_FEATURES/ENABLE registers are present.
|
||||
*/
|
||||
static bool kbasep_amba_register_present(struct kbase_device *kbdev)
|
||||
{
|
||||
return (ARCH_MAJOR_REV_REG(kbdev->gpu_props.props.raw_props.gpu_id) >=
|
||||
GPU_ID2_ARCH_MAJOR_REV_MAKE(12, 1));
|
||||
}
|
||||
|
||||
void kbase_cache_set_coherency_mode(struct kbase_device *kbdev,
|
||||
u32 mode)
|
||||
{
|
||||
kbdev->current_gpu_coherency_mode = mode;
|
||||
|
||||
if (kbasep_amba_register_present(kbdev)) {
|
||||
u32 val = kbase_reg_read(kbdev, AMBA_ENABLE);
|
||||
|
||||
val = AMBA_ENABLE_COHERENCY_PROTOCOL_SET(val, mode);
|
||||
kbase_reg_write(kbdev, AMBA_ENABLE, val);
|
||||
} else
|
||||
kbase_reg_write(kbdev, COHERENCY_ENABLE, mode);
|
||||
}
|
||||
|
||||
|
|
@ -35,9 +55,38 @@ u32 kbase_cache_get_coherency_features(struct kbase_device *kbdev)
|
|||
{
|
||||
u32 coherency_features;
|
||||
|
||||
if (kbasep_amba_register_present(kbdev))
|
||||
coherency_features =
|
||||
kbase_reg_read(kbdev, GPU_CONTROL_REG(AMBA_FEATURES));
|
||||
else
|
||||
coherency_features = kbase_reg_read(
|
||||
kbdev, GPU_CONTROL_REG(COHERENCY_FEATURES));
|
||||
|
||||
return coherency_features;
|
||||
}
|
||||
|
||||
void kbase_amba_set_memory_cache_support(struct kbase_device *kbdev,
|
||||
bool enable)
|
||||
{
|
||||
if (kbasep_amba_register_present(kbdev)) {
|
||||
u32 val = kbase_reg_read(kbdev, AMBA_ENABLE);
|
||||
|
||||
val = AMBA_ENABLE_MEMORY_CACHE_SUPPORT_SET(val, enable);
|
||||
kbase_reg_write(kbdev, AMBA_ENABLE, val);
|
||||
|
||||
} else {
|
||||
WARN(1, "memory_cache_support not supported");
|
||||
}
|
||||
}
|
||||
|
||||
void kbase_amba_set_invalidate_hint(struct kbase_device *kbdev, bool enable)
|
||||
{
|
||||
if (kbasep_amba_register_present(kbdev)) {
|
||||
u32 val = kbase_reg_read(kbdev, AMBA_ENABLE);
|
||||
|
||||
val = AMBA_ENABLE_INVALIDATE_HINT_SET(val, enable);
|
||||
kbase_reg_write(kbdev, AMBA_ENABLE, val);
|
||||
} else {
|
||||
WARN(1, "invalidate_hint not supported");
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,7 +1,7 @@
|
|||
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
|
||||
/*
|
||||
*
|
||||
* (C) COPYRIGHT 2014-2016, 2020-2021 ARM Limited. All rights reserved.
|
||||
* (C) COPYRIGHT 2014-2016, 2020-2022 ARM Limited. All rights reserved.
|
||||
*
|
||||
* This program is free software and is provided to you under the terms of the
|
||||
* GNU General Public License version 2 as published by the Free Software
|
||||
|
|
@ -43,4 +43,23 @@ void kbase_cache_set_coherency_mode(struct kbase_device *kbdev,
|
|||
*/
|
||||
u32 kbase_cache_get_coherency_features(struct kbase_device *kbdev);
|
||||
|
||||
/**
|
||||
* kbase_amba_set_memory_cache_support() - Sets AMBA memory cache support
|
||||
* in the GPU.
|
||||
* @kbdev: Device pointer
|
||||
* @enable: true for enable.
|
||||
*
|
||||
* Note: Only for arch version 12.x.1 onwards.
|
||||
*/
|
||||
void kbase_amba_set_memory_cache_support(struct kbase_device *kbdev,
|
||||
bool enable);
|
||||
/**
|
||||
* kbase_amba_set_invalidate_hint() - Sets AMBA invalidate hint
|
||||
* in the GPU.
|
||||
* @kbdev: Device pointer
|
||||
* @enable: true for enable.
|
||||
*
|
||||
* Note: Only for arch version 12.x.1 onwards.
|
||||
*/
|
||||
void kbase_amba_set_invalidate_hint(struct kbase_device *kbdev, bool enable);
|
||||
#endif /* _KBASE_CACHE_POLICY_BACKEND_H_ */
|
||||
|
|
|
|||
|
|
@ -1,7 +1,7 @@
|
|||
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
|
||||
/*
|
||||
*
|
||||
* (C) COPYRIGHT 2014, 2016, 2018-2021 ARM Limited. All rights reserved.
|
||||
* (C) COPYRIGHT 2014, 2016, 2018-2022 ARM Limited. All rights reserved.
|
||||
*
|
||||
* This program is free software and is provided to you under the terms of the
|
||||
* GNU General Public License version 2 as published by the Free Software
|
||||
|
|
@ -26,7 +26,7 @@
|
|||
#ifndef _KBASE_INSTR_DEFS_H_
|
||||
#define _KBASE_INSTR_DEFS_H_
|
||||
|
||||
#include <mali_kbase_hwcnt_gpu.h>
|
||||
#include <hwcnt/mali_kbase_hwcnt_gpu.h>
|
||||
|
||||
/*
|
||||
* Instrumentation State Machine States
|
||||
|
|
|
|||
|
|
@ -1,7 +1,7 @@
|
|||
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
|
||||
/*
|
||||
*
|
||||
* (C) COPYRIGHT 2014-2016, 2018-2021 ARM Limited. All rights reserved.
|
||||
* (C) COPYRIGHT 2014-2016, 2018-2022 ARM Limited. All rights reserved.
|
||||
*
|
||||
* This program is free software and is provided to you under the terms of the
|
||||
* GNU General Public License version 2 as published by the Free Software
|
||||
|
|
@ -163,7 +163,6 @@ static irq_handler_t kbase_handler_table[] = {
|
|||
|
||||
#ifdef CONFIG_MALI_BIFROST_DEBUG
|
||||
#define JOB_IRQ_HANDLER JOB_IRQ_TAG
|
||||
#define MMU_IRQ_HANDLER MMU_IRQ_TAG
|
||||
#define GPU_IRQ_HANDLER GPU_IRQ_TAG
|
||||
|
||||
/**
|
||||
|
|
|
|||
|
|
@ -34,7 +34,7 @@
|
|||
#include <mali_kbase_ctx_sched.h>
|
||||
#include <mali_kbase_kinstr_jm.h>
|
||||
#include <mali_kbase_hwaccess_instr.h>
|
||||
#include <mali_kbase_hwcnt_context.h>
|
||||
#include <hwcnt/mali_kbase_hwcnt_context.h>
|
||||
#include <device/mali_kbase_device.h>
|
||||
#include <backend/gpu/mali_kbase_irq_internal.h>
|
||||
#include <backend/gpu/mali_kbase_jm_internal.h>
|
||||
|
|
@ -1440,6 +1440,11 @@ bool kbase_reset_gpu_is_active(struct kbase_device *kbdev)
|
|||
return true;
|
||||
}
|
||||
|
||||
bool kbase_reset_gpu_is_not_pending(struct kbase_device *kbdev)
|
||||
{
|
||||
return atomic_read(&kbdev->hwaccess.backend.reset_gpu) == KBASE_RESET_GPU_NOT_PENDING;
|
||||
}
|
||||
|
||||
int kbase_reset_gpu_wait(struct kbase_device *kbdev)
|
||||
{
|
||||
wait_event(kbdev->hwaccess.backend.reset_wait,
|
||||
|
|
|
|||
|
|
@ -29,7 +29,7 @@
|
|||
#include <mali_kbase_jm.h>
|
||||
#include <mali_kbase_js.h>
|
||||
#include <tl/mali_kbase_tracepoints.h>
|
||||
#include <mali_kbase_hwcnt_context.h>
|
||||
#include <hwcnt/mali_kbase_hwcnt_context.h>
|
||||
#include <mali_kbase_reset_gpu.h>
|
||||
#include <mali_kbase_kinstr_jm.h>
|
||||
#include <backend/gpu/mali_kbase_cache_policy_backend.h>
|
||||
|
|
|
|||
|
|
@ -80,31 +80,360 @@ static bool ipa_control_timer_enabled;
|
|||
#endif
|
||||
|
||||
#define LO_MASK(M) ((M) & 0xFFFFFFFF)
|
||||
#if !MALI_USE_CSF
|
||||
#define HI_MASK(M) ((M) & 0xFFFFFFFF00000000)
|
||||
#endif
|
||||
|
||||
static u32 get_implementation_register(u32 reg)
|
||||
{
|
||||
switch (reg) {
|
||||
case GPU_CONTROL_REG(SHADER_PRESENT_LO):
|
||||
return LO_MASK(DUMMY_IMPLEMENTATION_SHADER_PRESENT);
|
||||
case GPU_CONTROL_REG(TILER_PRESENT_LO):
|
||||
return LO_MASK(DUMMY_IMPLEMENTATION_TILER_PRESENT);
|
||||
case GPU_CONTROL_REG(L2_PRESENT_LO):
|
||||
return LO_MASK(DUMMY_IMPLEMENTATION_L2_PRESENT);
|
||||
case GPU_CONTROL_REG(STACK_PRESENT_LO):
|
||||
return LO_MASK(DUMMY_IMPLEMENTATION_STACK_PRESENT);
|
||||
/* Construct a value for the THREAD_FEATURES register, *except* the two most
|
||||
* significant bits, which are set to IMPLEMENTATION_MODEL in
|
||||
* midgard_model_read_reg().
|
||||
*/
|
||||
#if MALI_USE_CSF
|
||||
#define THREAD_FEATURES_PARTIAL(MAX_REGISTERS, MAX_TASK_QUEUE, MAX_TG_SPLIT) \
|
||||
((MAX_REGISTERS) | ((MAX_TASK_QUEUE) << 24))
|
||||
#else
|
||||
#define THREAD_FEATURES_PARTIAL(MAX_REGISTERS, MAX_TASK_QUEUE, MAX_TG_SPLIT) \
|
||||
((MAX_REGISTERS) | ((MAX_TASK_QUEUE) << 16) | ((MAX_TG_SPLIT) << 24))
|
||||
#endif
|
||||
|
||||
case GPU_CONTROL_REG(SHADER_PRESENT_HI):
|
||||
case GPU_CONTROL_REG(TILER_PRESENT_HI):
|
||||
case GPU_CONTROL_REG(L2_PRESENT_HI):
|
||||
case GPU_CONTROL_REG(STACK_PRESENT_HI):
|
||||
/* *** FALLTHROUGH *** */
|
||||
default:
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
struct error_status_t hw_error_status;
|
||||
|
||||
struct {
|
||||
/**
|
||||
* struct control_reg_values_t - control register values specific to the GPU being 'emulated'
|
||||
* @name: GPU name
|
||||
* @gpu_id: GPU ID to report
|
||||
* @as_present: Bitmap of address spaces present
|
||||
* @thread_max_threads: Maximum number of threads per core
|
||||
* @thread_max_workgroup_size: Maximum number of threads per workgroup
|
||||
* @thread_max_barrier_size: Maximum number of threads per barrier
|
||||
* @thread_features: Thread features, NOT INCLUDING the 2
|
||||
* most-significant bits, which are always set to
|
||||
* IMPLEMENTATION_MODEL.
|
||||
* @core_features: Core features
|
||||
* @tiler_features: Tiler features
|
||||
* @mmu_features: MMU features
|
||||
* @gpu_features_lo: GPU features (low)
|
||||
* @gpu_features_hi: GPU features (high)
|
||||
* @shader_present: Available shader bitmap
|
||||
* @stack_present: Core stack present bitmap
|
||||
*
|
||||
*/
|
||||
struct control_reg_values_t {
|
||||
const char *name;
|
||||
u32 gpu_id;
|
||||
u32 as_present;
|
||||
u32 thread_max_threads;
|
||||
u32 thread_max_workgroup_size;
|
||||
u32 thread_max_barrier_size;
|
||||
u32 thread_features;
|
||||
u32 core_features;
|
||||
u32 tiler_features;
|
||||
u32 mmu_features;
|
||||
u32 gpu_features_lo;
|
||||
u32 gpu_features_hi;
|
||||
u32 shader_present;
|
||||
u32 stack_present;
|
||||
};
|
||||
|
||||
struct job_slot {
|
||||
int job_active;
|
||||
int job_queued;
|
||||
int job_complete_irq_asserted;
|
||||
int job_irq_mask;
|
||||
int job_disabled;
|
||||
};
|
||||
|
||||
struct dummy_model_t {
|
||||
int reset_completed;
|
||||
int reset_completed_mask;
|
||||
#if !MALI_USE_CSF
|
||||
int prfcnt_sample_completed;
|
||||
#endif /* !MALI_USE_CSF */
|
||||
int power_changed_mask; /* 2bits: _ALL,_SINGLE */
|
||||
int power_changed; /* 1bit */
|
||||
bool clean_caches_completed;
|
||||
bool clean_caches_completed_irq_enabled;
|
||||
#if MALI_USE_CSF
|
||||
bool flush_pa_range_completed;
|
||||
bool flush_pa_range_completed_irq_enabled;
|
||||
#endif
|
||||
int power_on; /* 6bits: SHADER[4],TILER,L2 */
|
||||
u32 stack_power_on_lo;
|
||||
u32 coherency_enable;
|
||||
unsigned int job_irq_js_state;
|
||||
struct job_slot slots[NUM_SLOTS];
|
||||
const struct control_reg_values_t *control_reg_values;
|
||||
u32 l2_config;
|
||||
void *data;
|
||||
};
|
||||
|
||||
/* Array associating GPU names with control register values. The first
|
||||
* one is used in the case of no match.
|
||||
*/
|
||||
static const struct control_reg_values_t all_control_reg_values[] = {
|
||||
{
|
||||
.name = "tMIx",
|
||||
.gpu_id = GPU_ID2_MAKE(6, 0, 10, 0, 0, 1, 0),
|
||||
.as_present = 0xFF,
|
||||
.thread_max_threads = 0x180,
|
||||
.thread_max_workgroup_size = 0x180,
|
||||
.thread_max_barrier_size = 0x180,
|
||||
.thread_features = THREAD_FEATURES_PARTIAL(0x6000, 4, 10),
|
||||
.tiler_features = 0x809,
|
||||
.mmu_features = 0x2830,
|
||||
.gpu_features_lo = 0,
|
||||
.gpu_features_hi = 0,
|
||||
.shader_present = DUMMY_IMPLEMENTATION_SHADER_PRESENT,
|
||||
.stack_present = DUMMY_IMPLEMENTATION_STACK_PRESENT,
|
||||
},
|
||||
{
|
||||
.name = "tHEx",
|
||||
.gpu_id = GPU_ID2_MAKE(6, 2, 0, 1, 0, 3, 0),
|
||||
.as_present = 0xFF,
|
||||
.thread_max_threads = 0x180,
|
||||
.thread_max_workgroup_size = 0x180,
|
||||
.thread_max_barrier_size = 0x180,
|
||||
.thread_features = THREAD_FEATURES_PARTIAL(0x6000, 4, 10),
|
||||
.tiler_features = 0x809,
|
||||
.mmu_features = 0x2830,
|
||||
.gpu_features_lo = 0,
|
||||
.gpu_features_hi = 0,
|
||||
.shader_present = DUMMY_IMPLEMENTATION_SHADER_PRESENT,
|
||||
.stack_present = DUMMY_IMPLEMENTATION_STACK_PRESENT,
|
||||
},
|
||||
{
|
||||
.name = "tSIx",
|
||||
.gpu_id = GPU_ID2_MAKE(7, 0, 0, 0, 1, 1, 0),
|
||||
.as_present = 0xFF,
|
||||
.thread_max_threads = 0x300,
|
||||
.thread_max_workgroup_size = 0x180,
|
||||
.thread_max_barrier_size = 0x180,
|
||||
.thread_features = THREAD_FEATURES_PARTIAL(0x6000, 4, 10),
|
||||
.tiler_features = 0x209,
|
||||
.mmu_features = 0x2821,
|
||||
.gpu_features_lo = 0,
|
||||
.gpu_features_hi = 0,
|
||||
.shader_present = DUMMY_IMPLEMENTATION_SHADER_PRESENT,
|
||||
.stack_present = DUMMY_IMPLEMENTATION_STACK_PRESENT,
|
||||
},
|
||||
{
|
||||
.name = "tDVx",
|
||||
.gpu_id = GPU_ID2_MAKE(7, 0, 0, 3, 0, 0, 0),
|
||||
.as_present = 0xFF,
|
||||
.thread_max_threads = 0x300,
|
||||
.thread_max_workgroup_size = 0x180,
|
||||
.thread_max_barrier_size = 0x180,
|
||||
.thread_features = THREAD_FEATURES_PARTIAL(0x6000, 4, 10),
|
||||
.tiler_features = 0x209,
|
||||
.mmu_features = 0x2821,
|
||||
.gpu_features_lo = 0,
|
||||
.gpu_features_hi = 0,
|
||||
.shader_present = DUMMY_IMPLEMENTATION_SHADER_PRESENT,
|
||||
.stack_present = DUMMY_IMPLEMENTATION_STACK_PRESENT,
|
||||
},
|
||||
{
|
||||
.name = "tNOx",
|
||||
.gpu_id = GPU_ID2_MAKE(7, 2, 1, 1, 0, 0, 0),
|
||||
.as_present = 0xFF,
|
||||
.thread_max_threads = 0x180,
|
||||
.thread_max_workgroup_size = 0x180,
|
||||
.thread_max_barrier_size = 0x180,
|
||||
.thread_features = THREAD_FEATURES_PARTIAL(0x6000, 4, 10),
|
||||
.tiler_features = 0x809,
|
||||
.mmu_features = 0x2830,
|
||||
.gpu_features_lo = 0,
|
||||
.gpu_features_hi = 0,
|
||||
.shader_present = DUMMY_IMPLEMENTATION_SHADER_PRESENT,
|
||||
.stack_present = DUMMY_IMPLEMENTATION_STACK_PRESENT,
|
||||
},
|
||||
{
|
||||
.name = "tGOx_r0p0",
|
||||
.gpu_id = GPU_ID2_MAKE(7, 2, 2, 2, 0, 0, 0),
|
||||
.as_present = 0xFF,
|
||||
.thread_max_threads = 0x180,
|
||||
.thread_max_workgroup_size = 0x180,
|
||||
.thread_max_barrier_size = 0x180,
|
||||
.thread_features = THREAD_FEATURES_PARTIAL(0x6000, 4, 10),
|
||||
.tiler_features = 0x809,
|
||||
.mmu_features = 0x2830,
|
||||
.gpu_features_lo = 0,
|
||||
.gpu_features_hi = 0,
|
||||
.shader_present = DUMMY_IMPLEMENTATION_SHADER_PRESENT,
|
||||
.stack_present = DUMMY_IMPLEMENTATION_STACK_PRESENT,
|
||||
},
|
||||
{
|
||||
.name = "tGOx_r1p0",
|
||||
.gpu_id = GPU_ID2_MAKE(7, 4, 0, 2, 1, 0, 0),
|
||||
.as_present = 0xFF,
|
||||
.thread_max_threads = 0x180,
|
||||
.thread_max_workgroup_size = 0x180,
|
||||
.thread_max_barrier_size = 0x180,
|
||||
.thread_features = THREAD_FEATURES_PARTIAL(0x6000, 4, 10),
|
||||
.core_features = 0x2,
|
||||
.tiler_features = 0x209,
|
||||
.mmu_features = 0x2823,
|
||||
.gpu_features_lo = 0,
|
||||
.gpu_features_hi = 0,
|
||||
.shader_present = DUMMY_IMPLEMENTATION_SHADER_PRESENT,
|
||||
.stack_present = DUMMY_IMPLEMENTATION_STACK_PRESENT,
|
||||
},
|
||||
{
|
||||
.name = "tTRx",
|
||||
.gpu_id = GPU_ID2_MAKE(9, 0, 8, 0, 0, 0, 0),
|
||||
.as_present = 0xFF,
|
||||
.thread_max_threads = 0x180,
|
||||
.thread_max_workgroup_size = 0x180,
|
||||
.thread_max_barrier_size = 0x180,
|
||||
.thread_features = THREAD_FEATURES_PARTIAL(0x6000, 4, 0),
|
||||
.tiler_features = 0x809,
|
||||
.mmu_features = 0x2830,
|
||||
.gpu_features_lo = 0,
|
||||
.gpu_features_hi = 0,
|
||||
.shader_present = DUMMY_IMPLEMENTATION_SHADER_PRESENT,
|
||||
.stack_present = DUMMY_IMPLEMENTATION_STACK_PRESENT,
|
||||
},
|
||||
{
|
||||
.name = "tNAx",
|
||||
.gpu_id = GPU_ID2_MAKE(9, 0, 8, 1, 0, 0, 0),
|
||||
.as_present = 0xFF,
|
||||
.thread_max_threads = 0x180,
|
||||
.thread_max_workgroup_size = 0x180,
|
||||
.thread_max_barrier_size = 0x180,
|
||||
.thread_features = THREAD_FEATURES_PARTIAL(0x6000, 4, 0),
|
||||
.tiler_features = 0x809,
|
||||
.mmu_features = 0x2830,
|
||||
.gpu_features_lo = 0,
|
||||
.gpu_features_hi = 0,
|
||||
.shader_present = DUMMY_IMPLEMENTATION_SHADER_PRESENT,
|
||||
.stack_present = DUMMY_IMPLEMENTATION_STACK_PRESENT,
|
||||
},
|
||||
{
|
||||
.name = "tBEx",
|
||||
.gpu_id = GPU_ID2_MAKE(9, 2, 0, 2, 0, 0, 0),
|
||||
.as_present = 0xFF,
|
||||
.thread_max_threads = 0x180,
|
||||
.thread_max_workgroup_size = 0x180,
|
||||
.thread_max_barrier_size = 0x180,
|
||||
.thread_features = THREAD_FEATURES_PARTIAL(0x6000, 4, 0),
|
||||
.tiler_features = 0x809,
|
||||
.mmu_features = 0x2830,
|
||||
.gpu_features_lo = 0,
|
||||
.gpu_features_hi = 0,
|
||||
.shader_present = DUMMY_IMPLEMENTATION_SHADER_PRESENT,
|
||||
.stack_present = DUMMY_IMPLEMENTATION_STACK_PRESENT,
|
||||
},
|
||||
{
|
||||
.name = "tBAx",
|
||||
.gpu_id = GPU_ID2_MAKE(9, 14, 4, 5, 0, 0, 0),
|
||||
.as_present = 0xFF,
|
||||
.thread_max_threads = 0x180,
|
||||
.thread_max_workgroup_size = 0x180,
|
||||
.thread_max_barrier_size = 0x180,
|
||||
.thread_features = THREAD_FEATURES_PARTIAL(0x6000, 4, 0),
|
||||
.tiler_features = 0x809,
|
||||
.mmu_features = 0x2830,
|
||||
.gpu_features_lo = 0,
|
||||
.gpu_features_hi = 0,
|
||||
.shader_present = DUMMY_IMPLEMENTATION_SHADER_PRESENT,
|
||||
.stack_present = DUMMY_IMPLEMENTATION_STACK_PRESENT,
|
||||
},
|
||||
{
|
||||
.name = "tDUx",
|
||||
.gpu_id = GPU_ID2_MAKE(10, 2, 0, 1, 0, 0, 0),
|
||||
.as_present = 0xFF,
|
||||
.thread_max_threads = 0x180,
|
||||
.thread_max_workgroup_size = 0x180,
|
||||
.thread_max_barrier_size = 0x180,
|
||||
.thread_features = THREAD_FEATURES_PARTIAL(0x6000, 4, 0),
|
||||
.tiler_features = 0x809,
|
||||
.mmu_features = 0x2830,
|
||||
.gpu_features_lo = 0,
|
||||
.gpu_features_hi = 0,
|
||||
.shader_present = DUMMY_IMPLEMENTATION_SHADER_PRESENT,
|
||||
.stack_present = DUMMY_IMPLEMENTATION_STACK_PRESENT,
|
||||
},
|
||||
{
|
||||
.name = "tODx",
|
||||
.gpu_id = GPU_ID2_MAKE(10, 8, 0, 2, 0, 0, 0),
|
||||
.as_present = 0xFF,
|
||||
.thread_max_threads = 0x180,
|
||||
.thread_max_workgroup_size = 0x180,
|
||||
.thread_max_barrier_size = 0x180,
|
||||
.thread_features = THREAD_FEATURES_PARTIAL(0x6000, 4, 0),
|
||||
.tiler_features = 0x809,
|
||||
.mmu_features = 0x2830,
|
||||
.gpu_features_lo = 0,
|
||||
.gpu_features_hi = 0,
|
||||
.shader_present = DUMMY_IMPLEMENTATION_SHADER_PRESENT,
|
||||
.stack_present = DUMMY_IMPLEMENTATION_STACK_PRESENT,
|
||||
},
|
||||
{
|
||||
.name = "tGRx",
|
||||
.gpu_id = GPU_ID2_MAKE(10, 10, 0, 3, 0, 0, 0),
|
||||
.as_present = 0xFF,
|
||||
.thread_max_threads = 0x180,
|
||||
.thread_max_workgroup_size = 0x180,
|
||||
.thread_max_barrier_size = 0x180,
|
||||
.thread_features = THREAD_FEATURES_PARTIAL(0x6000, 4, 0),
|
||||
.core_features = 0x0, /* core_1e16fma2tex */
|
||||
.tiler_features = 0x809,
|
||||
.mmu_features = 0x2830,
|
||||
.gpu_features_lo = 0,
|
||||
.gpu_features_hi = 0,
|
||||
.shader_present = DUMMY_IMPLEMENTATION_SHADER_PRESENT,
|
||||
.stack_present = DUMMY_IMPLEMENTATION_STACK_PRESENT,
|
||||
},
|
||||
{
|
||||
.name = "tVAx",
|
||||
.gpu_id = GPU_ID2_MAKE(10, 12, 0, 4, 0, 0, 0),
|
||||
.as_present = 0xFF,
|
||||
.thread_max_threads = 0x180,
|
||||
.thread_max_workgroup_size = 0x180,
|
||||
.thread_max_barrier_size = 0x180,
|
||||
.thread_features = THREAD_FEATURES_PARTIAL(0x6000, 4, 0),
|
||||
.core_features = 0x0, /* core_1e16fma2tex */
|
||||
.tiler_features = 0x809,
|
||||
.mmu_features = 0x2830,
|
||||
.gpu_features_lo = 0,
|
||||
.gpu_features_hi = 0,
|
||||
.shader_present = DUMMY_IMPLEMENTATION_SHADER_PRESENT,
|
||||
.stack_present = DUMMY_IMPLEMENTATION_STACK_PRESENT,
|
||||
},
|
||||
{
|
||||
.name = "tTUx",
|
||||
.gpu_id = GPU_ID2_MAKE(11, 8, 5, 2, 0, 0, 0),
|
||||
.as_present = 0xFF,
|
||||
.thread_max_threads = 0x800,
|
||||
.thread_max_workgroup_size = 0x400,
|
||||
.thread_max_barrier_size = 0x400,
|
||||
.thread_features = THREAD_FEATURES_PARTIAL(0x10000, 4, 0),
|
||||
.core_features = 0x0, /* core_1e32fma2tex */
|
||||
.tiler_features = 0x809,
|
||||
.mmu_features = 0x2830,
|
||||
.gpu_features_lo = 0xf,
|
||||
.gpu_features_hi = 0,
|
||||
.shader_present = 0xFF,
|
||||
.stack_present = 0xF,
|
||||
},
|
||||
{
|
||||
.name = "tTIx",
|
||||
.gpu_id = GPU_ID2_MAKE(12, 8, 1, 0, 0, 0, 0),
|
||||
.as_present = 0xFF,
|
||||
.thread_max_threads = 0x800,
|
||||
.thread_max_workgroup_size = 0x400,
|
||||
.thread_max_barrier_size = 0x400,
|
||||
.thread_features = THREAD_FEATURES_PARTIAL(0x10000, 16, 0),
|
||||
.core_features = 0x1, /* core_1e64fma4tex */
|
||||
.tiler_features = 0x809,
|
||||
.mmu_features = 0x2830,
|
||||
.gpu_features_lo = 0xf,
|
||||
.gpu_features_hi = 0,
|
||||
.shader_present = 0xFF,
|
||||
.stack_present = 0xF,
|
||||
},
|
||||
};
|
||||
|
||||
static struct {
|
||||
spinlock_t access_lock;
|
||||
#if !MALI_USE_CSF
|
||||
unsigned long prfcnt_base;
|
||||
|
|
@ -125,74 +454,33 @@ struct {
|
|||
#endif /* !MALI_USE_CSF */
|
||||
u64 tiler_counters[KBASE_DUMMY_MODEL_COUNTER_PER_CORE];
|
||||
u64 l2_counters[KBASE_DUMMY_MODEL_MAX_MEMSYS_BLOCKS *
|
||||
KBASE_DUMMY_MODEL_COUNTER_PER_CORE];
|
||||
KBASE_DUMMY_MODEL_COUNTER_PER_CORE];
|
||||
u64 shader_counters[KBASE_DUMMY_MODEL_MAX_SHADER_CORES *
|
||||
KBASE_DUMMY_MODEL_COUNTER_PER_CORE];
|
||||
KBASE_DUMMY_MODEL_COUNTER_PER_CORE];
|
||||
} performance_counters;
|
||||
|
||||
} performance_counters = {
|
||||
.l2_present = DUMMY_IMPLEMENTATION_L2_PRESENT,
|
||||
.shader_present = DUMMY_IMPLEMENTATION_SHADER_PRESENT,
|
||||
};
|
||||
static u32 get_implementation_register(u32 reg,
|
||||
const struct control_reg_values_t *const control_reg_values)
|
||||
{
|
||||
switch (reg) {
|
||||
case GPU_CONTROL_REG(SHADER_PRESENT_LO):
|
||||
return LO_MASK(control_reg_values->shader_present);
|
||||
case GPU_CONTROL_REG(TILER_PRESENT_LO):
|
||||
return LO_MASK(DUMMY_IMPLEMENTATION_TILER_PRESENT);
|
||||
case GPU_CONTROL_REG(L2_PRESENT_LO):
|
||||
return LO_MASK(DUMMY_IMPLEMENTATION_L2_PRESENT);
|
||||
case GPU_CONTROL_REG(STACK_PRESENT_LO):
|
||||
return LO_MASK(control_reg_values->stack_present);
|
||||
|
||||
struct job_slot {
|
||||
int job_active;
|
||||
int job_queued;
|
||||
int job_complete_irq_asserted;
|
||||
int job_irq_mask;
|
||||
int job_disabled;
|
||||
};
|
||||
|
||||
/**
|
||||
* struct control_reg_values_t - control register values specific to the GPU being 'emulated'
|
||||
* @name: GPU name
|
||||
* @gpu_id: GPU ID to report
|
||||
* @as_present: Bitmap of address spaces present
|
||||
* @thread_max_threads: Maximum number of threads per core
|
||||
* @thread_max_workgroup_size: Maximum number of threads per workgroup
|
||||
* @thread_max_barrier_size: Maximum number of threads per barrier
|
||||
* @thread_features: Thread features, NOT INCLUDING the 2
|
||||
* most-significant bits, which are always set to
|
||||
* IMPLEMENTATION_MODEL.
|
||||
* @core_features: Core features
|
||||
* @tiler_features: Tiler features
|
||||
* @mmu_features: MMU features
|
||||
* @gpu_features_lo: GPU features (low)
|
||||
* @gpu_features_hi: GPU features (high)
|
||||
*/
|
||||
struct control_reg_values_t {
|
||||
const char *name;
|
||||
u32 gpu_id;
|
||||
u32 as_present;
|
||||
u32 thread_max_threads;
|
||||
u32 thread_max_workgroup_size;
|
||||
u32 thread_max_barrier_size;
|
||||
u32 thread_features;
|
||||
u32 core_features;
|
||||
u32 tiler_features;
|
||||
u32 mmu_features;
|
||||
u32 gpu_features_lo;
|
||||
u32 gpu_features_hi;
|
||||
};
|
||||
|
||||
struct dummy_model_t {
|
||||
int reset_completed;
|
||||
int reset_completed_mask;
|
||||
#if !MALI_USE_CSF
|
||||
int prfcnt_sample_completed;
|
||||
#endif /* !MALI_USE_CSF */
|
||||
int power_changed_mask; /* 2bits: _ALL,_SINGLE */
|
||||
int power_changed; /* 1bit */
|
||||
bool clean_caches_completed;
|
||||
bool clean_caches_completed_irq_enabled;
|
||||
int power_on; /* 6bits: SHADER[4],TILER,L2 */
|
||||
u32 stack_power_on_lo;
|
||||
u32 coherency_enable;
|
||||
unsigned int job_irq_js_state;
|
||||
struct job_slot slots[NUM_SLOTS];
|
||||
const struct control_reg_values_t *control_reg_values;
|
||||
u32 l2_config;
|
||||
void *data;
|
||||
};
|
||||
case GPU_CONTROL_REG(SHADER_PRESENT_HI):
|
||||
case GPU_CONTROL_REG(TILER_PRESENT_HI):
|
||||
case GPU_CONTROL_REG(L2_PRESENT_HI):
|
||||
case GPU_CONTROL_REG(STACK_PRESENT_HI):
|
||||
/* *** FALLTHROUGH *** */
|
||||
default:
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
void gpu_device_set_data(void *model, void *data)
|
||||
{
|
||||
|
|
@ -221,238 +509,6 @@ static char *no_mali_gpu = CONFIG_MALI_NO_MALI_DEFAULT_GPU;
|
|||
module_param(no_mali_gpu, charp, 0000);
|
||||
MODULE_PARM_DESC(no_mali_gpu, "GPU to identify as");
|
||||
|
||||
/* Construct a value for the THREAD_FEATURES register, *except* the two most
|
||||
* significant bits, which are set to IMPLEMENTATION_MODEL in
|
||||
* midgard_model_read_reg().
|
||||
*/
|
||||
#if MALI_USE_CSF
|
||||
#define THREAD_FEATURES_PARTIAL(MAX_REGISTERS, MAX_TASK_QUEUE, MAX_TG_SPLIT) \
|
||||
((MAX_REGISTERS) | ((MAX_TASK_QUEUE) << 24))
|
||||
#else
|
||||
#define THREAD_FEATURES_PARTIAL(MAX_REGISTERS, MAX_TASK_QUEUE, MAX_TG_SPLIT) \
|
||||
((MAX_REGISTERS) | ((MAX_TASK_QUEUE) << 16) | ((MAX_TG_SPLIT) << 24))
|
||||
#endif
|
||||
|
||||
/* Array associating GPU names with control register values. The first
|
||||
* one is used in the case of no match.
|
||||
*/
|
||||
static const struct control_reg_values_t all_control_reg_values[] = {
|
||||
{
|
||||
.name = "tMIx",
|
||||
.gpu_id = GPU_ID2_MAKE(6, 0, 10, 0, 0, 1, 0),
|
||||
.as_present = 0xFF,
|
||||
.thread_max_threads = 0x180,
|
||||
.thread_max_workgroup_size = 0x180,
|
||||
.thread_max_barrier_size = 0x180,
|
||||
.thread_features = THREAD_FEATURES_PARTIAL(0x6000, 4, 10),
|
||||
.tiler_features = 0x809,
|
||||
.mmu_features = 0x2830,
|
||||
.gpu_features_lo = 0,
|
||||
.gpu_features_hi = 0,
|
||||
},
|
||||
{
|
||||
.name = "tHEx",
|
||||
.gpu_id = GPU_ID2_MAKE(6, 2, 0, 1, 0, 3, 0),
|
||||
.as_present = 0xFF,
|
||||
.thread_max_threads = 0x180,
|
||||
.thread_max_workgroup_size = 0x180,
|
||||
.thread_max_barrier_size = 0x180,
|
||||
.thread_features = THREAD_FEATURES_PARTIAL(0x6000, 4, 10),
|
||||
.tiler_features = 0x809,
|
||||
.mmu_features = 0x2830,
|
||||
.gpu_features_lo = 0,
|
||||
.gpu_features_hi = 0,
|
||||
},
|
||||
{
|
||||
.name = "tSIx",
|
||||
.gpu_id = GPU_ID2_MAKE(7, 0, 0, 0, 1, 1, 0),
|
||||
.as_present = 0xFF,
|
||||
.thread_max_threads = 0x300,
|
||||
.thread_max_workgroup_size = 0x180,
|
||||
.thread_max_barrier_size = 0x180,
|
||||
.thread_features = THREAD_FEATURES_PARTIAL(0x6000, 4, 10),
|
||||
.tiler_features = 0x209,
|
||||
.mmu_features = 0x2821,
|
||||
.gpu_features_lo = 0,
|
||||
.gpu_features_hi = 0,
|
||||
},
|
||||
{
|
||||
.name = "tDVx",
|
||||
.gpu_id = GPU_ID2_MAKE(7, 0, 0, 3, 0, 0, 0),
|
||||
.as_present = 0xFF,
|
||||
.thread_max_threads = 0x300,
|
||||
.thread_max_workgroup_size = 0x180,
|
||||
.thread_max_barrier_size = 0x180,
|
||||
.thread_features = THREAD_FEATURES_PARTIAL(0x6000, 4, 10),
|
||||
.tiler_features = 0x209,
|
||||
.mmu_features = 0x2821,
|
||||
.gpu_features_lo = 0,
|
||||
.gpu_features_hi = 0,
|
||||
},
|
||||
{
|
||||
.name = "tNOx",
|
||||
.gpu_id = GPU_ID2_MAKE(7, 2, 1, 1, 0, 0, 0),
|
||||
.as_present = 0xFF,
|
||||
.thread_max_threads = 0x180,
|
||||
.thread_max_workgroup_size = 0x180,
|
||||
.thread_max_barrier_size = 0x180,
|
||||
.thread_features = THREAD_FEATURES_PARTIAL(0x6000, 4, 10),
|
||||
.tiler_features = 0x809,
|
||||
.mmu_features = 0x2830,
|
||||
.gpu_features_lo = 0,
|
||||
.gpu_features_hi = 0,
|
||||
},
|
||||
{
|
||||
.name = "tGOx_r0p0",
|
||||
.gpu_id = GPU_ID2_MAKE(7, 2, 2, 2, 0, 0, 0),
|
||||
.as_present = 0xFF,
|
||||
.thread_max_threads = 0x180,
|
||||
.thread_max_workgroup_size = 0x180,
|
||||
.thread_max_barrier_size = 0x180,
|
||||
.thread_features = THREAD_FEATURES_PARTIAL(0x6000, 4, 10),
|
||||
.tiler_features = 0x809,
|
||||
.mmu_features = 0x2830,
|
||||
.gpu_features_lo = 0,
|
||||
.gpu_features_hi = 0,
|
||||
},
|
||||
{
|
||||
.name = "tGOx_r1p0",
|
||||
.gpu_id = GPU_ID2_MAKE(7, 4, 0, 2, 1, 0, 0),
|
||||
.as_present = 0xFF,
|
||||
.thread_max_threads = 0x180,
|
||||
.thread_max_workgroup_size = 0x180,
|
||||
.thread_max_barrier_size = 0x180,
|
||||
.thread_features = THREAD_FEATURES_PARTIAL(0x6000, 4, 10),
|
||||
.core_features = 0x2,
|
||||
.tiler_features = 0x209,
|
||||
.mmu_features = 0x2823,
|
||||
.gpu_features_lo = 0,
|
||||
.gpu_features_hi = 0,
|
||||
},
|
||||
{
|
||||
.name = "tTRx",
|
||||
.gpu_id = GPU_ID2_MAKE(9, 0, 8, 0, 0, 0, 0),
|
||||
.as_present = 0xFF,
|
||||
.thread_max_threads = 0x180,
|
||||
.thread_max_workgroup_size = 0x180,
|
||||
.thread_max_barrier_size = 0x180,
|
||||
.thread_features = THREAD_FEATURES_PARTIAL(0x6000, 4, 0),
|
||||
.tiler_features = 0x809,
|
||||
.mmu_features = 0x2830,
|
||||
.gpu_features_lo = 0,
|
||||
.gpu_features_hi = 0,
|
||||
},
|
||||
{
|
||||
.name = "tNAx",
|
||||
.gpu_id = GPU_ID2_MAKE(9, 0, 8, 1, 0, 0, 0),
|
||||
.as_present = 0xFF,
|
||||
.thread_max_threads = 0x180,
|
||||
.thread_max_workgroup_size = 0x180,
|
||||
.thread_max_barrier_size = 0x180,
|
||||
.thread_features = THREAD_FEATURES_PARTIAL(0x6000, 4, 0),
|
||||
.tiler_features = 0x809,
|
||||
.mmu_features = 0x2830,
|
||||
.gpu_features_lo = 0,
|
||||
.gpu_features_hi = 0,
|
||||
},
|
||||
{
|
||||
.name = "tBEx",
|
||||
.gpu_id = GPU_ID2_MAKE(9, 2, 0, 2, 0, 0, 0),
|
||||
.as_present = 0xFF,
|
||||
.thread_max_threads = 0x180,
|
||||
.thread_max_workgroup_size = 0x180,
|
||||
.thread_max_barrier_size = 0x180,
|
||||
.thread_features = THREAD_FEATURES_PARTIAL(0x6000, 4, 0),
|
||||
.tiler_features = 0x809,
|
||||
.mmu_features = 0x2830,
|
||||
.gpu_features_lo = 0,
|
||||
.gpu_features_hi = 0,
|
||||
},
|
||||
{
|
||||
.name = "tBAx",
|
||||
.gpu_id = GPU_ID2_MAKE(9, 14, 4, 5, 0, 0, 0),
|
||||
.as_present = 0xFF,
|
||||
.thread_max_threads = 0x180,
|
||||
.thread_max_workgroup_size = 0x180,
|
||||
.thread_max_barrier_size = 0x180,
|
||||
.thread_features = THREAD_FEATURES_PARTIAL(0x6000, 4, 0),
|
||||
.tiler_features = 0x809,
|
||||
.mmu_features = 0x2830,
|
||||
.gpu_features_lo = 0,
|
||||
.gpu_features_hi = 0,
|
||||
},
|
||||
{
|
||||
.name = "tDUx",
|
||||
.gpu_id = GPU_ID2_MAKE(10, 2, 0, 1, 0, 0, 0),
|
||||
.as_present = 0xFF,
|
||||
.thread_max_threads = 0x180,
|
||||
.thread_max_workgroup_size = 0x180,
|
||||
.thread_max_barrier_size = 0x180,
|
||||
.thread_features = THREAD_FEATURES_PARTIAL(0x6000, 4, 0),
|
||||
.tiler_features = 0x809,
|
||||
.mmu_features = 0x2830,
|
||||
.gpu_features_lo = 0,
|
||||
.gpu_features_hi = 0,
|
||||
},
|
||||
{
|
||||
.name = "tODx",
|
||||
.gpu_id = GPU_ID2_MAKE(10, 8, 0, 2, 0, 0, 0),
|
||||
.as_present = 0xFF,
|
||||
.thread_max_threads = 0x180,
|
||||
.thread_max_workgroup_size = 0x180,
|
||||
.thread_max_barrier_size = 0x180,
|
||||
.thread_features = THREAD_FEATURES_PARTIAL(0x6000, 4, 0),
|
||||
.tiler_features = 0x809,
|
||||
.mmu_features = 0x2830,
|
||||
.gpu_features_lo = 0,
|
||||
.gpu_features_hi = 0,
|
||||
},
|
||||
{
|
||||
.name = "tGRx",
|
||||
.gpu_id = GPU_ID2_MAKE(10, 10, 0, 3, 0, 0, 0),
|
||||
.as_present = 0xFF,
|
||||
.thread_max_threads = 0x180,
|
||||
.thread_max_workgroup_size = 0x180,
|
||||
.thread_max_barrier_size = 0x180,
|
||||
.thread_features = THREAD_FEATURES_PARTIAL(0x6000, 4, 0),
|
||||
.core_features = 0x0, /* core_1e16fma2tex */
|
||||
.tiler_features = 0x809,
|
||||
.mmu_features = 0x2830,
|
||||
.gpu_features_lo = 0,
|
||||
.gpu_features_hi = 0,
|
||||
},
|
||||
{
|
||||
.name = "tVAx",
|
||||
.gpu_id = GPU_ID2_MAKE(10, 12, 0, 4, 0, 0, 0),
|
||||
.as_present = 0xFF,
|
||||
.thread_max_threads = 0x180,
|
||||
.thread_max_workgroup_size = 0x180,
|
||||
.thread_max_barrier_size = 0x180,
|
||||
.thread_features = THREAD_FEATURES_PARTIAL(0x6000, 4, 0),
|
||||
.core_features = 0x0, /* core_1e16fma2tex */
|
||||
.tiler_features = 0x809,
|
||||
.mmu_features = 0x2830,
|
||||
.gpu_features_lo = 0,
|
||||
.gpu_features_hi = 0,
|
||||
},
|
||||
{
|
||||
.name = "tTUx",
|
||||
.gpu_id = GPU_ID2_MAKE(11, 8, 5, 2, 0, 0, 0),
|
||||
.as_present = 0xFF,
|
||||
.thread_max_threads = 0x800,
|
||||
.thread_max_workgroup_size = 0x400,
|
||||
.thread_max_barrier_size = 0x400,
|
||||
.thread_features = THREAD_FEATURES_PARTIAL(0x10000, 4, 0),
|
||||
.core_features = 0x0, /* core_1e32fma2tex */
|
||||
.tiler_features = 0x809,
|
||||
.mmu_features = 0x2830,
|
||||
.gpu_features_lo = 0xf,
|
||||
.gpu_features_hi = 0,
|
||||
},
|
||||
};
|
||||
|
||||
struct error_status_t hw_error_status;
|
||||
|
||||
#if MALI_USE_CSF
|
||||
static u32 gpu_model_get_prfcnt_value(enum kbase_ipa_core_type core_type,
|
||||
u32 cnt_idx, bool is_low_word)
|
||||
|
|
@ -1011,6 +1067,21 @@ static const struct control_reg_values_t *find_control_reg_values(const char *gp
|
|||
size_t i;
|
||||
const struct control_reg_values_t *ret = NULL;
|
||||
|
||||
/* Edge case for tGOx, as it has 2 entries in the table for its R0 and R1
|
||||
* revisions respectively. As none of them are named "tGOx" the name comparison
|
||||
* needs to be fixed in these cases. CONFIG_GPU_HWVER should be one of "r0p0"
|
||||
* or "r1p0" and is derived from the DDK's build configuration. In cases
|
||||
* where it is unavailable, it defaults to tGOx r1p0.
|
||||
*/
|
||||
if (!strcmp(gpu, "tGOx")) {
|
||||
#ifdef CONFIG_GPU_HWVER
|
||||
if (!strcmp(CONFIG_GPU_HWVER, "r0p0"))
|
||||
gpu = "tGOx_r0p0";
|
||||
else if (!strcmp(CONFIG_GPU_HWVER, "r1p0"))
|
||||
#endif /* CONFIG_GPU_HWVER defined */
|
||||
gpu = "tGOx_r1p0";
|
||||
}
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(all_control_reg_values); ++i) {
|
||||
const struct control_reg_values_t * const fcrv = &all_control_reg_values[i];
|
||||
|
||||
|
|
@ -1043,6 +1114,10 @@ void *midgard_model_create(const void *config)
|
|||
dummy->job_irq_js_state = 0;
|
||||
init_register_statuses(dummy);
|
||||
dummy->control_reg_values = find_control_reg_values(no_mali_gpu);
|
||||
performance_counters.l2_present = get_implementation_register(
|
||||
GPU_CONTROL_REG(L2_PRESENT_LO), dummy->control_reg_values);
|
||||
performance_counters.shader_present = get_implementation_register(
|
||||
GPU_CONTROL_REG(SHADER_PRESENT_LO), dummy->control_reg_values);
|
||||
}
|
||||
return dummy;
|
||||
}
|
||||
|
|
@ -1066,6 +1141,8 @@ static void midgard_model_get_outputs(void *h)
|
|||
hw_error_status.gpu_error_irq ||
|
||||
#if !MALI_USE_CSF
|
||||
dummy->prfcnt_sample_completed ||
|
||||
#else
|
||||
(dummy->flush_pa_range_completed && dummy->flush_pa_range_completed_irq_enabled) ||
|
||||
#endif
|
||||
(dummy->clean_caches_completed && dummy->clean_caches_completed_irq_enabled))
|
||||
gpu_device_raise_irq(dummy, GPU_DUMMY_GPU_IRQ);
|
||||
|
|
@ -1235,6 +1312,9 @@ u8 midgard_model_write_reg(void *h, u32 addr, u32 value)
|
|||
dummy->reset_completed_mask = (value >> 8) & 0x01;
|
||||
dummy->power_changed_mask = (value >> 9) & 0x03;
|
||||
dummy->clean_caches_completed_irq_enabled = (value & (1u << 17)) != 0u;
|
||||
#if MALI_USE_CSF
|
||||
dummy->flush_pa_range_completed_irq_enabled = (value & (1u << 20)) != 0u;
|
||||
#endif
|
||||
} else if (addr == GPU_CONTROL_REG(COHERENCY_ENABLE)) {
|
||||
dummy->coherency_enable = value;
|
||||
} else if (addr == GPU_CONTROL_REG(GPU_IRQ_CLEAR)) {
|
||||
|
|
@ -1247,10 +1327,17 @@ u8 midgard_model_write_reg(void *h, u32 addr, u32 value)
|
|||
|
||||
if (value & (1 << 17))
|
||||
dummy->clean_caches_completed = false;
|
||||
#if !MALI_USE_CSF
|
||||
if (value & PRFCNT_SAMPLE_COMPLETED)
|
||||
|
||||
#if MALI_USE_CSF
|
||||
if (value & (1u << 20))
|
||||
dummy->flush_pa_range_completed = false;
|
||||
#endif /* MALI_USE_CSF */
|
||||
|
||||
#if !MALI_USE_CSF
|
||||
if (value & PRFCNT_SAMPLE_COMPLETED) /* (1 << 16) */
|
||||
dummy->prfcnt_sample_completed = 0;
|
||||
#endif /* !MALI_USE_CSF */
|
||||
|
||||
/*update error status */
|
||||
hw_error_status.gpu_error_irq &= ~(value);
|
||||
} else if (addr == GPU_CONTROL_REG(GPU_COMMAND)) {
|
||||
|
|
@ -1274,7 +1361,15 @@ u8 midgard_model_write_reg(void *h, u32 addr, u32 value)
|
|||
pr_debug("clean caches requested");
|
||||
dummy->clean_caches_completed = true;
|
||||
break;
|
||||
#if !MALI_USE_CSF
|
||||
#if MALI_USE_CSF
|
||||
case GPU_COMMAND_FLUSH_PA_RANGE_CLN_INV_L2:
|
||||
case GPU_COMMAND_FLUSH_PA_RANGE_CLN_INV_L2_LSC:
|
||||
case GPU_COMMAND_FLUSH_PA_RANGE_CLN_INV_FULL:
|
||||
pr_debug("pa range flush requested");
|
||||
dummy->flush_pa_range_completed = true;
|
||||
break;
|
||||
#endif /* MALI_USE_CSF */
|
||||
#if !MALI_USE_CSF
|
||||
case GPU_COMMAND_PRFCNT_SAMPLE:
|
||||
midgard_model_dump_prfcnt();
|
||||
dummy->prfcnt_sample_completed = 1;
|
||||
|
|
@ -1282,6 +1377,11 @@ u8 midgard_model_write_reg(void *h, u32 addr, u32 value)
|
|||
default:
|
||||
break;
|
||||
}
|
||||
#if MALI_USE_CSF
|
||||
} else if (addr >= GPU_CONTROL_REG(GPU_COMMAND_ARG0_LO) &&
|
||||
addr <= GPU_CONTROL_REG(GPU_COMMAND_ARG1_HI)) {
|
||||
/* Writes ignored */
|
||||
#endif
|
||||
} else if (addr == GPU_CONTROL_REG(L2_CONFIG)) {
|
||||
dummy->l2_config = value;
|
||||
}
|
||||
|
|
@ -1291,6 +1391,12 @@ u8 midgard_model_write_reg(void *h, u32 addr, u32 value)
|
|||
(CSF_NUM_DOORBELL * CSF_HW_DOORBELL_PAGE_SIZE))) {
|
||||
if (addr == GPU_CONTROL_REG(CSF_HW_DOORBELL_PAGE_OFFSET))
|
||||
hw_error_status.job_irq_status = JOB_IRQ_GLOBAL_IF;
|
||||
} else if ((addr >= GPU_CONTROL_REG(SYSC_ALLOC0)) &&
|
||||
(addr < GPU_CONTROL_REG(SYSC_ALLOC(SYSC_ALLOC_COUNT)))) {
|
||||
/* Do nothing */
|
||||
} else if ((addr >= GPU_CONTROL_REG(ASN_HASH_0)) &&
|
||||
(addr < GPU_CONTROL_REG(ASN_HASH(ASN_HASH_COUNT)))) {
|
||||
/* Do nothing */
|
||||
} else if (addr == IPA_CONTROL_REG(COMMAND)) {
|
||||
pr_debug("Received IPA_CONTROL command");
|
||||
} else if (addr == IPA_CONTROL_REG(TIMER)) {
|
||||
|
|
@ -1315,8 +1421,7 @@ u8 midgard_model_write_reg(void *h, u32 addr, u32 value)
|
|||
hw_error_status.mmu_irq_mask = value;
|
||||
} else if (addr == MMU_REG(MMU_IRQ_CLEAR)) {
|
||||
hw_error_status.mmu_irq_rawstat &= (~value);
|
||||
} else if ((addr >= MMU_AS_REG(0, AS_TRANSTAB_LO)) &&
|
||||
(addr <= MMU_AS_REG(15, AS_STATUS))) {
|
||||
} else if ((addr >= MMU_AS_REG(0, AS_TRANSTAB_LO)) && (addr <= MMU_AS_REG(15, AS_STATUS))) {
|
||||
int mem_addr_space = (addr - MMU_AS_REG(0, AS_TRANSTAB_LO))
|
||||
>> 6;
|
||||
|
||||
|
|
@ -1443,7 +1548,8 @@ u8 midgard_model_write_reg(void *h, u32 addr, u32 value)
|
|||
dummy->power_changed = 1;
|
||||
break;
|
||||
case SHADER_PWRON_LO:
|
||||
dummy->power_on |= (value & 0xF) << 2;
|
||||
dummy->power_on |=
|
||||
(value & dummy->control_reg_values->shader_present) << 2;
|
||||
dummy->power_changed = 1;
|
||||
break;
|
||||
case L2_PWRON_LO:
|
||||
|
|
@ -1459,7 +1565,8 @@ u8 midgard_model_write_reg(void *h, u32 addr, u32 value)
|
|||
dummy->power_changed = 1;
|
||||
break;
|
||||
case SHADER_PWROFF_LO:
|
||||
dummy->power_on &= ~((value & 0xF) << 2);
|
||||
dummy->power_on &=
|
||||
~((value & dummy->control_reg_values->shader_present) << 2);
|
||||
dummy->power_changed = 1;
|
||||
break;
|
||||
case L2_PWROFF_LO:
|
||||
|
|
@ -1546,6 +1653,9 @@ u8 midgard_model_read_reg(void *h, u32 addr, u32 * const value)
|
|||
else if (addr == GPU_CONTROL_REG(GPU_IRQ_MASK)) {
|
||||
*value = (dummy->reset_completed_mask << 8) |
|
||||
((dummy->clean_caches_completed_irq_enabled ? 1u : 0u) << 17) |
|
||||
#if MALI_USE_CSF
|
||||
((dummy->flush_pa_range_completed_irq_enabled ? 1u : 0u) << 20) |
|
||||
#endif
|
||||
(dummy->power_changed_mask << 9) | (1 << 7) | 1;
|
||||
pr_debug("GPU_IRQ_MASK read %x", *value);
|
||||
} else if (addr == GPU_CONTROL_REG(GPU_IRQ_RAWSTAT)) {
|
||||
|
|
@ -1555,6 +1665,9 @@ u8 midgard_model_read_reg(void *h, u32 addr, u32 * const value)
|
|||
(dummy->prfcnt_sample_completed ? PRFCNT_SAMPLE_COMPLETED : 0) |
|
||||
#endif /* !MALI_USE_CSF */
|
||||
((dummy->clean_caches_completed ? 1u : 0u) << 17) |
|
||||
#if MALI_USE_CSF
|
||||
((dummy->flush_pa_range_completed ? 1u : 0u) << 20) |
|
||||
#endif
|
||||
hw_error_status.gpu_error_irq;
|
||||
pr_debug("GPU_IRQ_RAWSTAT read %x", *value);
|
||||
} else if (addr == GPU_CONTROL_REG(GPU_IRQ_STATUS)) {
|
||||
|
|
@ -1569,6 +1682,13 @@ u8 midgard_model_read_reg(void *h, u32 addr, u32 * const value)
|
|||
1u :
|
||||
0u)
|
||||
<< 17) |
|
||||
#if MALI_USE_CSF
|
||||
(((dummy->flush_pa_range_completed &&
|
||||
dummy->flush_pa_range_completed_irq_enabled) ?
|
||||
1u :
|
||||
0u)
|
||||
<< 20) |
|
||||
#endif
|
||||
hw_error_status.gpu_error_irq;
|
||||
pr_debug("GPU_IRQ_STAT read %x", *value);
|
||||
} else if (addr == GPU_CONTROL_REG(GPU_STATUS)) {
|
||||
|
|
@ -1581,8 +1701,18 @@ u8 midgard_model_read_reg(void *h, u32 addr, u32 * const value)
|
|||
*value = hw_error_status.gpu_fault_status;
|
||||
} else if (addr == GPU_CONTROL_REG(L2_CONFIG)) {
|
||||
*value = dummy->l2_config;
|
||||
} else if ((addr >= GPU_CONTROL_REG(SHADER_PRESENT_LO)) &&
|
||||
(addr <= GPU_CONTROL_REG(L2_MMU_CONFIG))) {
|
||||
}
|
||||
#if MALI_USE_CSF
|
||||
else if ((addr >= GPU_CONTROL_REG(SYSC_ALLOC0)) &&
|
||||
(addr < GPU_CONTROL_REG(SYSC_ALLOC(SYSC_ALLOC_COUNT)))) {
|
||||
*value = 0;
|
||||
} else if ((addr >= GPU_CONTROL_REG(ASN_HASH_0)) &&
|
||||
(addr < GPU_CONTROL_REG(ASN_HASH(ASN_HASH_COUNT)))) {
|
||||
*value = 0;
|
||||
}
|
||||
#endif
|
||||
else if ((addr >= GPU_CONTROL_REG(SHADER_PRESENT_LO)) &&
|
||||
(addr <= GPU_CONTROL_REG(L2_MMU_CONFIG))) {
|
||||
switch (addr) {
|
||||
case GPU_CONTROL_REG(SHADER_PRESENT_LO):
|
||||
case GPU_CONTROL_REG(SHADER_PRESENT_HI):
|
||||
|
|
@ -1592,27 +1722,27 @@ u8 midgard_model_read_reg(void *h, u32 addr, u32 * const value)
|
|||
case GPU_CONTROL_REG(L2_PRESENT_HI):
|
||||
case GPU_CONTROL_REG(STACK_PRESENT_LO):
|
||||
case GPU_CONTROL_REG(STACK_PRESENT_HI):
|
||||
*value = get_implementation_register(addr);
|
||||
*value = get_implementation_register(addr, dummy->control_reg_values);
|
||||
break;
|
||||
case GPU_CONTROL_REG(SHADER_READY_LO):
|
||||
*value = (dummy->power_on >> 0x02) &
|
||||
get_implementation_register(
|
||||
GPU_CONTROL_REG(SHADER_PRESENT_LO));
|
||||
get_implementation_register(GPU_CONTROL_REG(SHADER_PRESENT_LO),
|
||||
dummy->control_reg_values);
|
||||
break;
|
||||
case GPU_CONTROL_REG(TILER_READY_LO):
|
||||
*value = (dummy->power_on >> 0x01) &
|
||||
get_implementation_register(
|
||||
GPU_CONTROL_REG(TILER_PRESENT_LO));
|
||||
get_implementation_register(GPU_CONTROL_REG(TILER_PRESENT_LO),
|
||||
dummy->control_reg_values);
|
||||
break;
|
||||
case GPU_CONTROL_REG(L2_READY_LO):
|
||||
*value = dummy->power_on &
|
||||
get_implementation_register(
|
||||
GPU_CONTROL_REG(L2_PRESENT_LO));
|
||||
get_implementation_register(GPU_CONTROL_REG(L2_PRESENT_LO),
|
||||
dummy->control_reg_values);
|
||||
break;
|
||||
case GPU_CONTROL_REG(STACK_READY_LO):
|
||||
*value = dummy->stack_power_on_lo &
|
||||
get_implementation_register(
|
||||
GPU_CONTROL_REG(STACK_PRESENT_LO));
|
||||
get_implementation_register(GPU_CONTROL_REG(STACK_PRESENT_LO),
|
||||
dummy->control_reg_values);
|
||||
break;
|
||||
|
||||
case GPU_CONTROL_REG(SHADER_READY_HI):
|
||||
|
|
@ -1904,6 +2034,8 @@ u8 midgard_model_read_reg(void *h, u32 addr, u32 * const value)
|
|||
|
||||
*value = gpu_model_get_prfcnt_value(KBASE_IPA_CORE_TYPE_SHADER,
|
||||
counter_index, is_low_word);
|
||||
} else if (addr == USER_REG(LATEST_FLUSH)) {
|
||||
*value = 0;
|
||||
}
|
||||
#endif
|
||||
else if (addr == GPU_CONTROL_REG(GPU_FEATURES_LO)) {
|
||||
|
|
|
|||
|
|
@ -23,13 +23,6 @@
|
|||
#include <linux/random.h>
|
||||
#include "backend/gpu/mali_kbase_model_dummy.h"
|
||||
|
||||
/* all the error conditions supported by the model */
|
||||
#define TOTAL_FAULTS 27
|
||||
/* maximum number of levels in the MMU translation table tree */
|
||||
#define MAX_MMU_TABLE_LEVEL 4
|
||||
/* worst case scenario is <1 MMU fault + 1 job fault + 2 GPU faults> */
|
||||
#define MAX_CONCURRENT_FAULTS 3
|
||||
|
||||
static struct kbase_error_atom *error_track_list;
|
||||
|
||||
unsigned int rand_seed;
|
||||
|
|
@ -40,6 +33,14 @@ unsigned int error_probability = 50; /* to be set between 0 and 100 */
|
|||
unsigned int multiple_error_probability = 50;
|
||||
|
||||
#ifdef CONFIG_MALI_ERROR_INJECT_RANDOM
|
||||
|
||||
/* all the error conditions supported by the model */
|
||||
#define TOTAL_FAULTS 27
|
||||
/* maximum number of levels in the MMU translation table tree */
|
||||
#define MAX_MMU_TABLE_LEVEL 4
|
||||
/* worst case scenario is <1 MMU fault + 1 job fault + 2 GPU faults> */
|
||||
#define MAX_CONCURRENT_FAULTS 3
|
||||
|
||||
/**
|
||||
* gpu_generate_error - Generate GPU error
|
||||
*/
|
||||
|
|
|
|||
|
|
@ -36,7 +36,7 @@
|
|||
#include <linux/pm_runtime.h>
|
||||
#include <mali_kbase_reset_gpu.h>
|
||||
#endif /* !MALI_USE_CSF */
|
||||
#include <mali_kbase_hwcnt_context.h>
|
||||
#include <hwcnt/mali_kbase_hwcnt_context.h>
|
||||
#include <backend/gpu/mali_kbase_pm_internal.h>
|
||||
#include <backend/gpu/mali_kbase_devfreq.h>
|
||||
#include <mali_kbase_dummy_job_wa.h>
|
||||
|
|
|
|||
|
|
@ -1,7 +1,7 @@
|
|||
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
|
||||
/*
|
||||
*
|
||||
* (C) COPYRIGHT 2013-2021 ARM Limited. All rights reserved.
|
||||
* (C) COPYRIGHT 2013-2022 ARM Limited. All rights reserved.
|
||||
*
|
||||
* This program is free software and is provided to you under the terms of the
|
||||
* GNU General Public License version 2 as published by the Free Software
|
||||
|
|
@ -92,29 +92,10 @@ void kbase_devfreq_set_core_mask(struct kbase_device *kbdev, u64 core_mask)
|
|||
* for those cores to get powered down
|
||||
*/
|
||||
if ((core_mask & old_core_mask) != old_core_mask) {
|
||||
bool can_wait;
|
||||
|
||||
spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
|
||||
can_wait = kbdev->pm.backend.gpu_ready && kbase_pm_is_mcu_desired(kbdev);
|
||||
spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
|
||||
|
||||
/* This check is ideally not required, the wait function can
|
||||
* deal with the GPU power down. But it has been added to
|
||||
* address the scenario where down-scaling request comes from
|
||||
* the platform specific code soon after the GPU power down
|
||||
* and at the time same time application thread tries to
|
||||
* power up the GPU (on the flush of GPU queue).
|
||||
* The platform specific @ref callback_power_on that gets
|
||||
* invoked on power up does not return until down-scaling
|
||||
* request is complete. The check mitigates the race caused by
|
||||
* the problem in platform specific code.
|
||||
*/
|
||||
if (likely(can_wait)) {
|
||||
if (kbase_pm_wait_for_desired_state(kbdev)) {
|
||||
dev_warn(kbdev->dev,
|
||||
"Wait for update of core_mask from %llx to %llx failed",
|
||||
old_core_mask, core_mask);
|
||||
}
|
||||
if (kbase_pm_wait_for_cores_down_scale(kbdev)) {
|
||||
dev_warn(kbdev->dev,
|
||||
"Wait for update of core_mask from %llx to %llx failed",
|
||||
old_core_mask, core_mask);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
|
|
|||
|
|
@ -39,7 +39,7 @@
|
|||
|
||||
#include <mali_kbase_reset_gpu.h>
|
||||
#include <mali_kbase_ctx_sched.h>
|
||||
#include <mali_kbase_hwcnt_context.h>
|
||||
#include <hwcnt/mali_kbase_hwcnt_context.h>
|
||||
#include <mali_kbase_pbha.h>
|
||||
#include <backend/gpu/mali_kbase_cache_policy_backend.h>
|
||||
#include <device/mali_kbase_device.h>
|
||||
|
|
@ -538,6 +538,14 @@ static void kbase_pm_l2_config_override(struct kbase_device *kbdev)
|
|||
if (!kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_L2_CONFIG))
|
||||
return;
|
||||
|
||||
#if MALI_USE_CSF
|
||||
if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_PBHA_HWU)) {
|
||||
val = kbase_reg_read(kbdev, GPU_CONTROL_REG(L2_CONFIG));
|
||||
kbase_reg_write(kbdev, GPU_CONTROL_REG(L2_CONFIG),
|
||||
L2_CONFIG_PBHA_HWU_SET(val, kbdev->pbha_propagate_bits));
|
||||
}
|
||||
#endif /* MALI_USE_CSF */
|
||||
|
||||
/*
|
||||
* Skip if size and hash are not given explicitly,
|
||||
* which means default values are used.
|
||||
|
|
@ -599,6 +607,21 @@ static const char *kbase_mcu_state_to_string(enum kbase_mcu_state state)
|
|||
return strings[state];
|
||||
}
|
||||
|
||||
static
|
||||
void kbase_ktrace_log_mcu_state(struct kbase_device *kbdev, enum kbase_mcu_state state)
|
||||
{
|
||||
#if KBASE_KTRACE_ENABLE
|
||||
switch (state) {
|
||||
#define KBASEP_MCU_STATE(n) \
|
||||
case KBASE_MCU_ ## n: \
|
||||
KBASE_KTRACE_ADD(kbdev, PM_MCU_ ## n, NULL, state); \
|
||||
break;
|
||||
#include "mali_kbase_pm_mcu_states.h"
|
||||
#undef KBASEP_MCU_STATE
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline bool kbase_pm_handle_mcu_core_attr_update(struct kbase_device *kbdev)
|
||||
{
|
||||
struct kbase_pm_backend_data *backend = &kbdev->pm.backend;
|
||||
|
|
@ -689,7 +712,6 @@ static void wait_mcu_as_inactive(struct kbase_device *kbdev)
|
|||
}
|
||||
#endif
|
||||
|
||||
|
||||
/**
|
||||
* kbasep_pm_toggle_power_interrupt - Toggles the IRQ mask for power interrupts
|
||||
* from the firmware
|
||||
|
|
@ -697,10 +719,10 @@ static void wait_mcu_as_inactive(struct kbase_device *kbdev)
|
|||
* @kbdev: Pointer to the device
|
||||
* @enable: boolean indicating to enable interrupts or not
|
||||
*
|
||||
* The POWER_CHANGED_ALL and POWER_CHANGED_SINGLE interrupts can be disabled
|
||||
* after L2 has been turned on when FW is controlling the power for the shader
|
||||
* cores. Correspondingly, the interrupts can be re-enabled after the MCU has
|
||||
* been disabled before the power down of L2.
|
||||
* The POWER_CHANGED_ALL interrupt can be disabled after L2 has been turned on
|
||||
* when FW is controlling the power for the shader cores. Correspondingly, the
|
||||
* interrupts can be re-enabled after the MCU has been disabled before the
|
||||
* power down of L2.
|
||||
*/
|
||||
static void kbasep_pm_toggle_power_interrupt(struct kbase_device *kbdev, bool enable)
|
||||
{
|
||||
|
|
@ -710,10 +732,12 @@ static void kbasep_pm_toggle_power_interrupt(struct kbase_device *kbdev, bool en
|
|||
|
||||
irq_mask = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK));
|
||||
|
||||
if (enable)
|
||||
irq_mask |= POWER_CHANGED_ALL | POWER_CHANGED_SINGLE;
|
||||
else
|
||||
irq_mask &= ~(POWER_CHANGED_ALL | POWER_CHANGED_SINGLE);
|
||||
if (enable) {
|
||||
irq_mask |= POWER_CHANGED_ALL;
|
||||
kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_CLEAR), POWER_CHANGED_ALL);
|
||||
} else {
|
||||
irq_mask &= ~POWER_CHANGED_ALL;
|
||||
}
|
||||
|
||||
kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), irq_mask);
|
||||
}
|
||||
|
|
@ -1028,10 +1052,12 @@ static int kbase_pm_mcu_update_state(struct kbase_device *kbdev)
|
|||
backend->mcu_state);
|
||||
}
|
||||
|
||||
if (backend->mcu_state != prev_state)
|
||||
if (backend->mcu_state != prev_state) {
|
||||
dev_dbg(kbdev->dev, "MCU state transition: %s to %s\n",
|
||||
kbase_mcu_state_to_string(prev_state),
|
||||
kbase_mcu_state_to_string(backend->mcu_state));
|
||||
kbase_ktrace_log_mcu_state(kbdev, backend->mcu_state);
|
||||
}
|
||||
|
||||
} while (backend->mcu_state != prev_state);
|
||||
|
||||
|
|
@ -1079,6 +1105,21 @@ static const char *kbase_l2_core_state_to_string(enum kbase_l2_core_state state)
|
|||
return strings[state];
|
||||
}
|
||||
|
||||
static
|
||||
void kbase_ktrace_log_l2_core_state(struct kbase_device *kbdev, enum kbase_l2_core_state state)
|
||||
{
|
||||
#if KBASE_KTRACE_ENABLE
|
||||
switch (state) {
|
||||
#define KBASEP_L2_STATE(n) \
|
||||
case KBASE_L2_ ## n: \
|
||||
KBASE_KTRACE_ADD(kbdev, PM_L2_ ## n, NULL, state); \
|
||||
break;
|
||||
#include "mali_kbase_pm_l2_states.h"
|
||||
#undef KBASEP_L2_STATE
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
#if !MALI_USE_CSF
|
||||
/* On powering on the L2, the tracked kctx becomes stale and can be cleared.
|
||||
* This enables the backend to spare the START_FLUSH.INV_SHADER_OTHER
|
||||
|
|
@ -1136,18 +1177,13 @@ static int kbase_pm_l2_update_state(struct kbase_device *kbdev)
|
|||
KBASE_PM_CORE_L2);
|
||||
u64 l2_ready = kbase_pm_get_ready_cores(kbdev,
|
||||
KBASE_PM_CORE_L2);
|
||||
#ifdef CONFIG_MALI_ARBITER_SUPPORT
|
||||
u64 tiler_trans = kbase_pm_get_trans_cores(
|
||||
kbdev, KBASE_PM_CORE_TILER);
|
||||
u64 tiler_ready = kbase_pm_get_ready_cores(
|
||||
kbdev, KBASE_PM_CORE_TILER);
|
||||
|
||||
#ifdef CONFIG_MALI_ARBITER_SUPPORT
|
||||
/*
|
||||
* kbase_pm_get_ready_cores and kbase_pm_get_trans_cores
|
||||
* are vulnerable to corruption if gpu is lost
|
||||
*/
|
||||
if (kbase_is_gpu_removed(kbdev)
|
||||
|| kbase_pm_is_gpu_lost(kbdev)) {
|
||||
if (kbase_is_gpu_removed(kbdev) || kbase_pm_is_gpu_lost(kbdev)) {
|
||||
backend->shaders_state =
|
||||
KBASE_SHADERS_OFF_CORESTACK_OFF;
|
||||
backend->hwcnt_desired = false;
|
||||
|
|
@ -1161,16 +1197,19 @@ static int kbase_pm_l2_update_state(struct kbase_device *kbdev)
|
|||
*/
|
||||
backend->l2_state =
|
||||
KBASE_L2_ON_HWCNT_DISABLE;
|
||||
KBASE_KTRACE_ADD(kbdev, PM_L2_ON_HWCNT_DISABLE, NULL,
|
||||
backend->l2_state);
|
||||
kbase_pm_trigger_hwcnt_disable(kbdev);
|
||||
}
|
||||
|
||||
if (backend->hwcnt_disabled) {
|
||||
backend->l2_state = KBASE_L2_OFF;
|
||||
KBASE_KTRACE_ADD(kbdev, PM_L2_OFF, NULL, backend->l2_state);
|
||||
dev_dbg(kbdev->dev, "GPU lost has occurred - L2 off\n");
|
||||
}
|
||||
break;
|
||||
}
|
||||
#endif /* CONFIG_MALI_ARBITER_SUPPORT */
|
||||
#endif
|
||||
|
||||
/* mask off ready from trans in case transitions finished
|
||||
* between the register reads
|
||||
|
|
@ -1182,6 +1221,12 @@ static int kbase_pm_l2_update_state(struct kbase_device *kbdev)
|
|||
switch (backend->l2_state) {
|
||||
case KBASE_L2_OFF:
|
||||
if (kbase_pm_is_l2_desired(kbdev)) {
|
||||
#if MALI_USE_CSF && defined(KBASE_PM_RUNTIME)
|
||||
/* Enable HW timer of IPA control before
|
||||
* L2 cache is powered-up.
|
||||
*/
|
||||
kbase_ipa_control_handle_gpu_sleep_exit(kbdev);
|
||||
#endif
|
||||
/*
|
||||
* Set the desired config for L2 before
|
||||
* powering it on
|
||||
|
|
@ -1221,14 +1266,12 @@ static int kbase_pm_l2_update_state(struct kbase_device *kbdev)
|
|||
l2_power_up_done = false;
|
||||
if (!l2_trans && l2_ready == l2_present) {
|
||||
if (need_tiler_control(kbdev)) {
|
||||
#ifndef CONFIG_MALI_ARBITER_SUPPORT
|
||||
u64 tiler_trans = kbase_pm_get_trans_cores(
|
||||
kbdev, KBASE_PM_CORE_TILER);
|
||||
u64 tiler_ready = kbase_pm_get_ready_cores(
|
||||
kbdev, KBASE_PM_CORE_TILER);
|
||||
#endif
|
||||
|
||||
tiler_trans &= ~tiler_ready;
|
||||
|
||||
if (!tiler_trans && tiler_ready == tiler_present) {
|
||||
KBASE_KTRACE_ADD(kbdev,
|
||||
PM_CORES_CHANGE_AVAILABLE_TILER,
|
||||
|
|
@ -1437,12 +1480,26 @@ static int kbase_pm_l2_update_state(struct kbase_device *kbdev)
|
|||
/* We only need to check the L2 here - if the L2
|
||||
* is off then the tiler is definitely also off.
|
||||
*/
|
||||
if (!l2_trans && !l2_ready)
|
||||
if (!l2_trans && !l2_ready) {
|
||||
#if MALI_USE_CSF && defined(KBASE_PM_RUNTIME)
|
||||
/* Allow clock gating within the GPU and prevent it
|
||||
* from being seen as active during sleep.
|
||||
*/
|
||||
kbase_ipa_control_handle_gpu_sleep_enter(kbdev);
|
||||
#endif
|
||||
/* L2 is now powered off */
|
||||
backend->l2_state = KBASE_L2_OFF;
|
||||
}
|
||||
} else {
|
||||
if (!kbdev->cache_clean_in_progress)
|
||||
if (!kbdev->cache_clean_in_progress) {
|
||||
#if MALI_USE_CSF && defined(KBASE_PM_RUNTIME)
|
||||
/* Allow clock gating within the GPU and prevent it
|
||||
* from being seen as active during sleep.
|
||||
*/
|
||||
kbase_ipa_control_handle_gpu_sleep_enter(kbdev);
|
||||
#endif
|
||||
backend->l2_state = KBASE_L2_OFF;
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
|
|
@ -1457,11 +1514,13 @@ static int kbase_pm_l2_update_state(struct kbase_device *kbdev)
|
|||
backend->l2_state);
|
||||
}
|
||||
|
||||
if (backend->l2_state != prev_state)
|
||||
if (backend->l2_state != prev_state) {
|
||||
dev_dbg(kbdev->dev, "L2 state transition: %s to %s\n",
|
||||
kbase_l2_core_state_to_string(prev_state),
|
||||
kbase_l2_core_state_to_string(
|
||||
backend->l2_state));
|
||||
kbase_ktrace_log_l2_core_state(kbdev, backend->l2_state);
|
||||
}
|
||||
|
||||
} while (backend->l2_state != prev_state);
|
||||
|
||||
|
|
@ -1925,7 +1984,7 @@ static bool kbase_pm_is_in_desired_state_nolock(struct kbase_device *kbdev)
|
|||
kbdev->pm.backend.shaders_state != KBASE_SHADERS_OFF_CORESTACK_OFF)
|
||||
in_desired_state = false;
|
||||
#else
|
||||
in_desired_state = kbase_pm_mcu_is_in_desired_state(kbdev);
|
||||
in_desired_state &= kbase_pm_mcu_is_in_desired_state(kbdev);
|
||||
#endif
|
||||
|
||||
return in_desired_state;
|
||||
|
|
@ -2122,6 +2181,7 @@ void kbase_pm_reset_start_locked(struct kbase_device *kbdev)
|
|||
|
||||
backend->in_reset = true;
|
||||
backend->l2_state = KBASE_L2_RESET_WAIT;
|
||||
KBASE_KTRACE_ADD(kbdev, PM_L2_RESET_WAIT, NULL, backend->l2_state);
|
||||
#if !MALI_USE_CSF
|
||||
backend->shaders_state = KBASE_SHADERS_RESET_WAIT;
|
||||
#else
|
||||
|
|
@ -2130,6 +2190,7 @@ void kbase_pm_reset_start_locked(struct kbase_device *kbdev)
|
|||
*/
|
||||
if (likely(kbdev->csf.firmware_inited)) {
|
||||
backend->mcu_state = KBASE_MCU_RESET_WAIT;
|
||||
KBASE_KTRACE_ADD(kbdev, PM_MCU_RESET_WAIT, NULL, backend->mcu_state);
|
||||
#ifdef KBASE_PM_RUNTIME
|
||||
backend->exit_gpu_sleep_mode = true;
|
||||
#endif
|
||||
|
|
@ -2328,6 +2389,66 @@ int kbase_pm_wait_for_desired_state(struct kbase_device *kbdev)
|
|||
}
|
||||
KBASE_EXPORT_TEST_API(kbase_pm_wait_for_desired_state);
|
||||
|
||||
#if MALI_USE_CSF
|
||||
/**
|
||||
* core_mask_update_done - Check if downscaling of shader cores is done
|
||||
*
|
||||
* @kbdev: The kbase device structure for the device.
|
||||
*
|
||||
* This function checks if the downscaling of cores is effectively complete.
|
||||
*
|
||||
* Return: true if the downscale is done.
|
||||
*/
|
||||
static bool core_mask_update_done(struct kbase_device *kbdev)
|
||||
{
|
||||
bool update_done = false;
|
||||
unsigned long flags;
|
||||
|
||||
spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
|
||||
/* If MCU is in stable ON state then it implies that the downscale
|
||||
* request had completed.
|
||||
* If MCU is not active then it implies all cores are off, so can
|
||||
* consider the downscale request as complete.
|
||||
*/
|
||||
if ((kbdev->pm.backend.mcu_state == KBASE_MCU_ON) ||
|
||||
kbase_pm_is_mcu_inactive(kbdev, kbdev->pm.backend.mcu_state))
|
||||
update_done = true;
|
||||
spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
|
||||
|
||||
return update_done;
|
||||
}
|
||||
|
||||
int kbase_pm_wait_for_cores_down_scale(struct kbase_device *kbdev)
|
||||
{
|
||||
long timeout = kbase_csf_timeout_in_jiffies(kbase_get_timeout_ms(kbdev, CSF_PM_TIMEOUT));
|
||||
long remaining;
|
||||
int err = 0;
|
||||
|
||||
/* Wait for core mask update to complete */
|
||||
#if KERNEL_VERSION(4, 13, 1) <= LINUX_VERSION_CODE
|
||||
remaining = wait_event_killable_timeout(
|
||||
kbdev->pm.backend.gpu_in_desired_state_wait,
|
||||
core_mask_update_done(kbdev), timeout);
|
||||
#else
|
||||
remaining = wait_event_timeout(
|
||||
kbdev->pm.backend.gpu_in_desired_state_wait,
|
||||
core_mask_update_done(kbdev), timeout);
|
||||
#endif
|
||||
|
||||
if (!remaining) {
|
||||
kbase_pm_timed_out(kbdev);
|
||||
err = -ETIMEDOUT;
|
||||
} else if (remaining < 0) {
|
||||
dev_info(
|
||||
kbdev->dev,
|
||||
"Wait for cores down scaling got interrupted");
|
||||
err = (int)remaining;
|
||||
}
|
||||
|
||||
return err;
|
||||
}
|
||||
#endif
|
||||
|
||||
void kbase_pm_enable_interrupts(struct kbase_device *kbdev)
|
||||
{
|
||||
unsigned long flags;
|
||||
|
|
@ -2391,19 +2512,25 @@ static void update_user_reg_page_mapping(struct kbase_device *kbdev)
|
|||
lockdep_assert_held(&kbdev->pm.lock);
|
||||
|
||||
mutex_lock(&kbdev->csf.reg_lock);
|
||||
if (kbdev->csf.mali_file_inode) {
|
||||
/* This would zap the pte corresponding to the mapping of User
|
||||
* register page for all the Kbase contexts.
|
||||
*/
|
||||
unmap_mapping_range(kbdev->csf.mali_file_inode->i_mapping,
|
||||
BASEP_MEM_CSF_USER_REG_PAGE_HANDLE,
|
||||
PAGE_SIZE, 1);
|
||||
|
||||
/* Only if the mappings for USER page exist, update all PTEs associated to it */
|
||||
if (kbdev->csf.nr_user_page_mapped > 0) {
|
||||
if (likely(kbdev->csf.mali_file_inode)) {
|
||||
/* This would zap the pte corresponding to the mapping of User
|
||||
* register page for all the Kbase contexts.
|
||||
*/
|
||||
unmap_mapping_range(kbdev->csf.mali_file_inode->i_mapping,
|
||||
BASEP_MEM_CSF_USER_REG_PAGE_HANDLE, PAGE_SIZE, 1);
|
||||
} else {
|
||||
dev_err(kbdev->dev,
|
||||
"Device file inode not exist even if USER page previously mapped");
|
||||
}
|
||||
}
|
||||
|
||||
mutex_unlock(&kbdev->csf.reg_lock);
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
/*
|
||||
* pmu layout:
|
||||
* 0x0000: PMU TAG (RO) (0xCAFECAFE)
|
||||
|
|
@ -2541,7 +2668,6 @@ void kbase_pm_clock_on(struct kbase_device *kbdev, bool is_resume)
|
|||
backend->gpu_idled = false;
|
||||
}
|
||||
#endif
|
||||
|
||||
}
|
||||
|
||||
KBASE_EXPORT_TEST_API(kbase_pm_clock_on);
|
||||
|
|
|
|||
|
|
@ -269,6 +269,37 @@ int kbase_pm_wait_for_desired_state(struct kbase_device *kbdev);
|
|||
*/
|
||||
int kbase_pm_wait_for_l2_powered(struct kbase_device *kbdev);
|
||||
|
||||
#if MALI_USE_CSF
|
||||
/**
|
||||
* kbase_pm_wait_for_cores_down_scale - Wait for the downscaling of shader cores
|
||||
*
|
||||
* @kbdev: The kbase device structure for the device (must be a valid pointer)
|
||||
*
|
||||
* This function can be called to ensure that the downscaling of cores is
|
||||
* effectively complete and it would be safe to lower the voltage.
|
||||
* The function assumes that caller had exercised the MCU state machine for the
|
||||
* downscale request through the kbase_pm_update_state() function.
|
||||
*
|
||||
* This function needs to be used by the caller to safely wait for the completion
|
||||
* of downscale request, instead of kbase_pm_wait_for_desired_state().
|
||||
* The downscale request would trigger a state change in MCU state machine
|
||||
* and so when MCU reaches the stable ON state, it can be inferred that
|
||||
* downscaling is complete. But it has been observed that the wake up of the
|
||||
* waiting thread can get delayed by few milli seconds and by the time the
|
||||
* thread wakes up the power down transition could have started (after the
|
||||
* completion of downscale request).
|
||||
* On the completion of power down transition another wake up signal would be
|
||||
* sent, but again by the time thread wakes up the power up transition can begin.
|
||||
* And the power up transition could then get blocked inside the platform specific
|
||||
* callback_power_on() function due to the thread that called into Kbase (from the
|
||||
* platform specific code) to perform the downscaling and then ended up waiting
|
||||
* for the completion of downscale request.
|
||||
*
|
||||
* Return: 0 on success, error code on error or remaining jiffies on timeout.
|
||||
*/
|
||||
int kbase_pm_wait_for_cores_down_scale(struct kbase_device *kbdev);
|
||||
#endif
|
||||
|
||||
/**
|
||||
* kbase_pm_update_dynamic_cores_onoff - Update the L2 and shader power state
|
||||
* machines after changing shader core
|
||||
|
|
|
|||
|
|
@ -38,11 +38,13 @@
|
|||
#include <backend/gpu/mali_kbase_pm_defs.h>
|
||||
#include <mali_linux_trace.h>
|
||||
|
||||
#if defined(CONFIG_MALI_BIFROST_DEVFREQ) || defined(CONFIG_MALI_BIFROST_DVFS) || !MALI_USE_CSF
|
||||
/* Shift used for kbasep_pm_metrics_data.time_busy/idle - units of (1 << 8) ns
|
||||
* This gives a maximum period between samples of 2^(32+8)/100 ns = slightly
|
||||
* under 11s. Exceeding this will cause overflow
|
||||
*/
|
||||
#define KBASE_PM_TIME_SHIFT 8
|
||||
#endif
|
||||
|
||||
#if MALI_USE_CSF
|
||||
/* To get the GPU_ACTIVE value in nano seconds unit */
|
||||
|
|
|
|||
|
|
@ -32,6 +32,7 @@ bob_defaults {
|
|||
kbuild_options: [
|
||||
"CONFIG_MALI_BIFROST_NO_MALI=y",
|
||||
"CONFIG_MALI_NO_MALI_DEFAULT_GPU={{.gpu}}",
|
||||
"CONFIG_GPU_HWVER={{.hwver}}",
|
||||
],
|
||||
},
|
||||
mali_platform_dt_pin_rst: {
|
||||
|
|
@ -52,9 +53,6 @@ bob_defaults {
|
|||
mali_midgard_enable_trace: {
|
||||
kbuild_options: ["CONFIG_MALI_BIFROST_ENABLE_TRACE=y"],
|
||||
},
|
||||
mali_dma_fence: {
|
||||
kbuild_options: ["CONFIG_MALI_BIFROST_DMA_FENCE=y"],
|
||||
},
|
||||
mali_arbiter_support: {
|
||||
kbuild_options: ["CONFIG_MALI_ARBITER_SUPPORT=y"],
|
||||
},
|
||||
|
|
@ -64,7 +62,7 @@ bob_defaults {
|
|||
mali_dma_buf_legacy_compat: {
|
||||
kbuild_options: ["CONFIG_MALI_DMA_BUF_LEGACY_COMPAT=y"],
|
||||
},
|
||||
mali_2mb_alloc: {
|
||||
large_page_alloc: {
|
||||
kbuild_options: ["CONFIG_MALI_2MB_ALLOC=y"],
|
||||
},
|
||||
mali_memory_fully_backed: {
|
||||
|
|
@ -89,7 +87,7 @@ bob_defaults {
|
|||
kbuild_options: ["CONFIG_MALI_BIFROST_ERROR_INJECT=y"],
|
||||
},
|
||||
mali_gem5_build: {
|
||||
kbuild_options: ["CONFIG_MALI_GEM5_BUILD=y"],
|
||||
kbuild_options: ["CONFIG_MALI_GEM5_BUILD=y"],
|
||||
},
|
||||
mali_debug: {
|
||||
kbuild_options: [
|
||||
|
|
@ -163,9 +161,7 @@ bob_defaults {
|
|||
// (catch-all for experimental CS code without separating it into
|
||||
// different features).
|
||||
"MALI_INCREMENTAL_RENDERING_JM={{.incremental_rendering_jm}}",
|
||||
"MALI_GPU_TIMESTAMP_CORRECTION={{.gpu_timestamp_correction}}",
|
||||
"MALI_BASE_CSF_PERFORMANCE_TESTS={{.base_csf_performance_tests}}",
|
||||
"MALI_GPU_TIMESTAMP_INTERPOLATION={{.gpu_timestamp_interpolation}}",
|
||||
],
|
||||
}
|
||||
|
||||
|
|
@ -184,6 +180,10 @@ bob_kernel_module {
|
|||
"context/*.c",
|
||||
"context/*.h",
|
||||
"context/Kbuild",
|
||||
"hwcnt/*.c",
|
||||
"hwcnt/*.h",
|
||||
"hwcnt/backend/*.h",
|
||||
"hwcnt/Kbuild",
|
||||
"ipa/*.c",
|
||||
"ipa/*.h",
|
||||
"ipa/Kbuild",
|
||||
|
|
@ -217,6 +217,10 @@ bob_kernel_module {
|
|||
"device/backend/*_jm.c",
|
||||
"gpu/backend/*_jm.c",
|
||||
"gpu/backend/*_jm.h",
|
||||
"hwcnt/backend/*_jm.c",
|
||||
"hwcnt/backend/*_jm.h",
|
||||
"hwcnt/backend/*_jm_*.c",
|
||||
"hwcnt/backend/*_jm_*.h",
|
||||
"jm/*.h",
|
||||
"tl/backend/*_jm.c",
|
||||
"mmu/backend/*_jm.c",
|
||||
|
|
@ -238,6 +242,10 @@ bob_kernel_module {
|
|||
"device/backend/*_csf.c",
|
||||
"gpu/backend/*_csf.c",
|
||||
"gpu/backend/*_csf.h",
|
||||
"hwcnt/backend/*_csf.c",
|
||||
"hwcnt/backend/*_csf.h",
|
||||
"hwcnt/backend/*_csf_*.c",
|
||||
"hwcnt/backend/*_csf_*.h",
|
||||
"tl/backend/*_csf.c",
|
||||
"mmu/backend/*_csf.c",
|
||||
"ipa/backend/*_csf.c",
|
||||
|
|
|
|||
|
|
@ -26,7 +26,6 @@
|
|||
#include <context/mali_kbase_context_internal.h>
|
||||
#include <gpu/mali_kbase_gpu_regmap.h>
|
||||
#include <mali_kbase.h>
|
||||
#include <mali_kbase_dma_fence.h>
|
||||
#include <mali_kbase_mem_linux.h>
|
||||
#include <mali_kbase_mem_pool_group.h>
|
||||
#include <mmu/mali_kbase_mmu.h>
|
||||
|
|
@ -39,12 +38,14 @@
|
|||
#include <csf/mali_kbase_csf_cpu_queue_debugfs.h>
|
||||
#include <mali_kbase_debug_mem_view.h>
|
||||
#include <mali_kbase_debug_mem_zones.h>
|
||||
#include <mali_kbase_debug_mem_allocs.h>
|
||||
#include <mali_kbase_mem_pool_debugfs.h>
|
||||
|
||||
void kbase_context_debugfs_init(struct kbase_context *const kctx)
|
||||
{
|
||||
kbase_debug_mem_view_init(kctx);
|
||||
kbase_debug_mem_zones_init(kctx);
|
||||
kbase_debug_mem_allocs_init(kctx);
|
||||
kbase_mem_pool_debugfs_init(kctx->kctx_dentry, kctx);
|
||||
kbase_jit_debugfs_init(kctx);
|
||||
kbase_csf_queue_group_debugfs_init(kctx);
|
||||
|
|
|
|||
|
|
@ -27,7 +27,6 @@
|
|||
#include <gpu/mali_kbase_gpu_regmap.h>
|
||||
#include <mali_kbase.h>
|
||||
#include <mali_kbase_ctx_sched.h>
|
||||
#include <mali_kbase_dma_fence.h>
|
||||
#include <mali_kbase_kinstr_jm.h>
|
||||
#include <mali_kbase_mem_linux.h>
|
||||
#include <mali_kbase_mem_pool_group.h>
|
||||
|
|
@ -37,12 +36,14 @@
|
|||
#if IS_ENABLED(CONFIG_DEBUG_FS)
|
||||
#include <mali_kbase_debug_mem_view.h>
|
||||
#include <mali_kbase_debug_mem_zones.h>
|
||||
#include <mali_kbase_debug_mem_allocs.h>
|
||||
#include <mali_kbase_mem_pool_debugfs.h>
|
||||
|
||||
void kbase_context_debugfs_init(struct kbase_context *const kctx)
|
||||
{
|
||||
kbase_debug_mem_view_init(kctx);
|
||||
kbase_debug_mem_zones_init(kctx);
|
||||
kbase_debug_mem_allocs_init(kctx);
|
||||
kbase_mem_pool_debugfs_init(kctx->kctx_dentry, kctx);
|
||||
kbase_jit_debugfs_init(kctx);
|
||||
kbasep_jd_debugfs_ctx_init(kctx);
|
||||
|
|
@ -128,8 +129,6 @@ static const struct kbase_context_init context_init[] = {
|
|||
{ NULL, kbase_context_free, NULL },
|
||||
{ kbase_context_common_init, kbase_context_common_term,
|
||||
"Common context initialization failed" },
|
||||
{ kbase_dma_fence_init, kbase_dma_fence_term,
|
||||
"DMA fence initialization failed" },
|
||||
{ kbase_context_mem_pool_group_init, kbase_context_mem_pool_group_term,
|
||||
"Memory pool group initialization failed" },
|
||||
{ kbase_mem_evictable_init, kbase_mem_evictable_deinit,
|
||||
|
|
|
|||
|
|
@ -165,7 +165,9 @@ int kbase_context_common_init(struct kbase_context *kctx)
|
|||
atomic64_set(&kctx->num_fixed_allocs, 0);
|
||||
#endif
|
||||
|
||||
kbase_gpu_vm_lock(kctx);
|
||||
bitmap_copy(kctx->cookies, &cookies_mask, BITS_PER_LONG);
|
||||
kbase_gpu_vm_unlock(kctx);
|
||||
|
||||
kctx->id = atomic_add_return(1, &(kctx->kbdev->ctx_num)) - 1;
|
||||
|
||||
|
|
@ -274,10 +276,8 @@ void kbase_context_common_term(struct kbase_context *kctx)
|
|||
|
||||
int kbase_context_mem_pool_group_init(struct kbase_context *kctx)
|
||||
{
|
||||
return kbase_mem_pool_group_init(&kctx->mem_pools,
|
||||
kctx->kbdev,
|
||||
&kctx->kbdev->mem_pool_defaults,
|
||||
&kctx->kbdev->mem_pools);
|
||||
return kbase_mem_pool_group_init(&kctx->mem_pools, kctx->kbdev,
|
||||
&kctx->kbdev->mem_pool_defaults, &kctx->kbdev->mem_pools);
|
||||
}
|
||||
|
||||
void kbase_context_mem_pool_group_term(struct kbase_context *kctx)
|
||||
|
|
|
|||
|
|
@ -1,6 +1,6 @@
|
|||
# SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
|
||||
#
|
||||
# (C) COPYRIGHT 2018-2021 ARM Limited. All rights reserved.
|
||||
# (C) COPYRIGHT 2018-2022 ARM Limited. All rights reserved.
|
||||
#
|
||||
# This program is free software and is provided to you under the terms of the
|
||||
# GNU General Public License version 2 as published by the Free Software
|
||||
|
|
@ -34,12 +34,16 @@ bifrost_kbase-y += \
|
|||
csf/mali_kbase_csf_protected_memory.o \
|
||||
csf/mali_kbase_csf_tiler_heap_debugfs.o \
|
||||
csf/mali_kbase_csf_cpu_queue_debugfs.o \
|
||||
csf/mali_kbase_csf_event.o
|
||||
csf/mali_kbase_csf_event.o \
|
||||
csf/mali_kbase_csf_firmware_log.o \
|
||||
csf/mali_kbase_csf_tiler_heap_reclaim.o
|
||||
|
||||
bifrost_kbase-$(CONFIG_MALI_REAL_HW) += csf/mali_kbase_csf_firmware.o
|
||||
|
||||
bifrost_kbase-$(CONFIG_MALI_BIFROST_NO_MALI) += csf/mali_kbase_csf_firmware_no_mali.o
|
||||
|
||||
bifrost_kbase-$(CONFIG_DEBUG_FS) += csf/mali_kbase_debug_csf_fault.o
|
||||
|
||||
|
||||
ifeq ($(KBUILD_EXTMOD),)
|
||||
# in-tree
|
||||
|
|
|
|||
|
|
@ -28,8 +28,6 @@
|
|||
* Status flags from the STATUS register of the IPA Control interface.
|
||||
*/
|
||||
#define STATUS_COMMAND_ACTIVE ((u32)1 << 0)
|
||||
#define STATUS_TIMER_ACTIVE ((u32)1 << 1)
|
||||
#define STATUS_AUTO_ACTIVE ((u32)1 << 2)
|
||||
#define STATUS_PROTECTED_MODE ((u32)1 << 8)
|
||||
#define STATUS_RESET ((u32)1 << 9)
|
||||
#define STATUS_TIMER_ENABLED ((u32)1 << 31)
|
||||
|
|
@ -37,9 +35,7 @@
|
|||
/*
|
||||
* Commands for the COMMAND register of the IPA Control interface.
|
||||
*/
|
||||
#define COMMAND_NOP ((u32)0)
|
||||
#define COMMAND_APPLY ((u32)1)
|
||||
#define COMMAND_CLEAR ((u32)2)
|
||||
#define COMMAND_SAMPLE ((u32)3)
|
||||
#define COMMAND_PROTECTED_ACK ((u32)4)
|
||||
#define COMMAND_RESET_ACK ((u32)5)
|
||||
|
|
@ -965,6 +961,43 @@ void kbase_ipa_control_handle_gpu_reset_post(struct kbase_device *kbdev)
|
|||
}
|
||||
KBASE_EXPORT_TEST_API(kbase_ipa_control_handle_gpu_reset_post);
|
||||
|
||||
#ifdef KBASE_PM_RUNTIME
|
||||
void kbase_ipa_control_handle_gpu_sleep_enter(struct kbase_device *kbdev)
|
||||
{
|
||||
lockdep_assert_held(&kbdev->hwaccess_lock);
|
||||
|
||||
if (kbdev->pm.backend.mcu_state == KBASE_MCU_IN_SLEEP) {
|
||||
/* GPU Sleep is treated as a power down */
|
||||
kbase_ipa_control_handle_gpu_power_off(kbdev);
|
||||
|
||||
/* SELECT_CSHW register needs to be cleared to prevent any
|
||||
* IPA control message to be sent to the top level GPU HWCNT.
|
||||
*/
|
||||
kbase_reg_write(kbdev, IPA_CONTROL_REG(SELECT_CSHW_LO), 0);
|
||||
kbase_reg_write(kbdev, IPA_CONTROL_REG(SELECT_CSHW_HI), 0);
|
||||
|
||||
/* No need to issue the APPLY command here */
|
||||
}
|
||||
}
|
||||
KBASE_EXPORT_TEST_API(kbase_ipa_control_handle_gpu_sleep_enter);
|
||||
|
||||
void kbase_ipa_control_handle_gpu_sleep_exit(struct kbase_device *kbdev)
|
||||
{
|
||||
lockdep_assert_held(&kbdev->hwaccess_lock);
|
||||
|
||||
if (kbdev->pm.backend.mcu_state == KBASE_MCU_IN_SLEEP) {
|
||||
/* To keep things simple, currently exit from
|
||||
* GPU Sleep is treated as a power on event where
|
||||
* all 4 SELECT registers are reconfigured.
|
||||
* On exit from sleep, reconfiguration is needed
|
||||
* only for the SELECT_CSHW register.
|
||||
*/
|
||||
kbase_ipa_control_handle_gpu_power_on(kbdev);
|
||||
}
|
||||
}
|
||||
KBASE_EXPORT_TEST_API(kbase_ipa_control_handle_gpu_sleep_exit);
|
||||
#endif
|
||||
|
||||
#if MALI_UNIT_TEST
|
||||
void kbase_ipa_control_rate_change_notify_test(struct kbase_device *kbdev,
|
||||
u32 clk_index, u32 clk_rate_hz)
|
||||
|
|
|
|||
|
|
@ -1,7 +1,7 @@
|
|||
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
|
||||
/*
|
||||
*
|
||||
* (C) COPYRIGHT 2020-2021 ARM Limited. All rights reserved.
|
||||
* (C) COPYRIGHT 2020-2022 ARM Limited. All rights reserved.
|
||||
*
|
||||
* This program is free software and is provided to you under the terms of the
|
||||
* GNU General Public License version 2 as published by the Free Software
|
||||
|
|
@ -198,6 +198,33 @@ void kbase_ipa_control_handle_gpu_reset_pre(struct kbase_device *kbdev);
|
|||
*/
|
||||
void kbase_ipa_control_handle_gpu_reset_post(struct kbase_device *kbdev);
|
||||
|
||||
#ifdef KBASE_PM_RUNTIME
|
||||
/**
|
||||
* kbase_ipa_control_handle_gpu_sleep_enter - Handle the pre GPU Sleep event
|
||||
*
|
||||
* @kbdev: Pointer to kbase device.
|
||||
*
|
||||
* This function is called after MCU has been put to sleep state & L2 cache has
|
||||
* been powered down. The top level part of GPU is still powered up when this
|
||||
* function is called.
|
||||
*/
|
||||
void kbase_ipa_control_handle_gpu_sleep_enter(struct kbase_device *kbdev);
|
||||
|
||||
/**
|
||||
* kbase_ipa_control_handle_gpu_sleep_exit - Handle the post GPU Sleep event
|
||||
*
|
||||
* @kbdev: Pointer to kbase device.
|
||||
*
|
||||
* This function is called when L2 needs to be powered up and MCU can exit the
|
||||
* sleep state. The top level part of GPU is powered up when this function is
|
||||
* called.
|
||||
*
|
||||
* This function must be called only if kbase_ipa_control_handle_gpu_sleep_enter()
|
||||
* was called previously.
|
||||
*/
|
||||
void kbase_ipa_control_handle_gpu_sleep_exit(struct kbase_device *kbdev);
|
||||
#endif
|
||||
|
||||
#if MALI_UNIT_TEST
|
||||
/**
|
||||
* kbase_ipa_control_rate_change_notify_test - Notify GPU rate change
|
||||
|
|
|
|||
|
|
@ -348,9 +348,8 @@ int kbase_csf_alloc_command_stream_user_pages(struct kbase_context *kctx,
|
|||
if (!reg)
|
||||
return -ENOMEM;
|
||||
|
||||
ret = kbase_mem_pool_alloc_pages(
|
||||
&kctx->mem_pools.small[KBASE_MEM_GROUP_CSF_IO],
|
||||
num_pages, queue->phys, false);
|
||||
ret = kbase_mem_pool_alloc_pages(&kctx->mem_pools.small[KBASE_MEM_GROUP_CSF_IO], num_pages,
|
||||
queue->phys, false);
|
||||
|
||||
if (ret != num_pages)
|
||||
goto phys_alloc_failed;
|
||||
|
|
@ -374,8 +373,11 @@ int kbase_csf_alloc_command_stream_user_pages(struct kbase_context *kctx,
|
|||
|
||||
queue->db_file_offset = kbdev->csf.db_file_offsets;
|
||||
kbdev->csf.db_file_offsets += BASEP_QUEUE_NR_MMAP_USER_PAGES;
|
||||
|
||||
#if (KERNEL_VERSION(4, 11, 0) > LINUX_VERSION_CODE)
|
||||
WARN(atomic_read(&queue->refcount) != 1, "Incorrect refcounting for queue object\n");
|
||||
#else
|
||||
WARN(refcount_read(&queue->refcount) != 1, "Incorrect refcounting for queue object\n");
|
||||
#endif
|
||||
/* This is the second reference taken on the queue object and
|
||||
* would be dropped only when the IO mapping is removed either
|
||||
* explicitly by userspace or implicitly by kernel on process exit.
|
||||
|
|
@ -444,25 +446,34 @@ static struct kbase_queue *find_queue(struct kbase_context *kctx, u64 base_addr)
|
|||
|
||||
static void get_queue(struct kbase_queue *queue)
|
||||
{
|
||||
#if (KERNEL_VERSION(4, 11, 0) > LINUX_VERSION_CODE)
|
||||
WARN_ON(!atomic_inc_not_zero(&queue->refcount));
|
||||
#else
|
||||
WARN_ON(!refcount_inc_not_zero(&queue->refcount));
|
||||
#endif
|
||||
}
|
||||
|
||||
static void release_queue(struct kbase_queue *queue)
|
||||
{
|
||||
lockdep_assert_held(&queue->kctx->csf.lock);
|
||||
|
||||
WARN_ON(atomic_read(&queue->refcount) <= 0);
|
||||
|
||||
#if (KERNEL_VERSION(4, 11, 0) > LINUX_VERSION_CODE)
|
||||
if (atomic_dec_and_test(&queue->refcount)) {
|
||||
#else
|
||||
if (refcount_dec_and_test(&queue->refcount)) {
|
||||
#endif
|
||||
/* The queue can't still be on the per context list. */
|
||||
WARN_ON(!list_empty(&queue->link));
|
||||
WARN_ON(queue->group);
|
||||
dev_dbg(queue->kctx->kbdev->dev,
|
||||
"Remove any pending command queue fatal from ctx %d_%d",
|
||||
queue->kctx->tgid, queue->kctx->id);
|
||||
kbase_csf_event_remove_error(queue->kctx, &queue->error);
|
||||
kfree(queue);
|
||||
}
|
||||
}
|
||||
|
||||
static void oom_event_worker(struct work_struct *data);
|
||||
static void fatal_event_worker(struct work_struct *data);
|
||||
static void cs_error_worker(struct work_struct *data);
|
||||
|
||||
/* Between reg and reg_ex, one and only one must be null */
|
||||
static int csf_queue_register_internal(struct kbase_context *kctx,
|
||||
|
|
@ -565,7 +576,11 @@ static int csf_queue_register_internal(struct kbase_context *kctx,
|
|||
queue->enabled = false;
|
||||
|
||||
queue->priority = reg->priority;
|
||||
#if (KERNEL_VERSION(4, 11, 0) > LINUX_VERSION_CODE)
|
||||
atomic_set(&queue->refcount, 1);
|
||||
#else
|
||||
refcount_set(&queue->refcount, 1);
|
||||
#endif
|
||||
|
||||
queue->group = NULL;
|
||||
queue->bind_state = KBASE_CSF_QUEUE_UNBOUND;
|
||||
|
|
@ -588,7 +603,7 @@ static int csf_queue_register_internal(struct kbase_context *kctx,
|
|||
INIT_LIST_HEAD(&queue->link);
|
||||
INIT_LIST_HEAD(&queue->error.link);
|
||||
INIT_WORK(&queue->oom_event_work, oom_event_worker);
|
||||
INIT_WORK(&queue->fatal_event_work, fatal_event_worker);
|
||||
INIT_WORK(&queue->cs_error_work, cs_error_worker);
|
||||
list_add(&queue->link, &kctx->csf.queue_list);
|
||||
|
||||
queue->extract_ofs = 0;
|
||||
|
|
@ -699,11 +714,6 @@ void kbase_csf_queue_terminate(struct kbase_context *kctx,
|
|||
}
|
||||
kbase_gpu_vm_unlock(kctx);
|
||||
|
||||
dev_dbg(kctx->kbdev->dev,
|
||||
"Remove any pending command queue fatal from context %pK\n",
|
||||
(void *)kctx);
|
||||
kbase_csf_event_remove_error(kctx, &queue->error);
|
||||
|
||||
release_queue(queue);
|
||||
}
|
||||
|
||||
|
|
@ -784,6 +794,11 @@ static struct kbase_queue_group *get_bound_queue_group(
|
|||
return group;
|
||||
}
|
||||
|
||||
static void enqueue_gpu_submission_work(struct kbase_context *const kctx)
|
||||
{
|
||||
queue_work(system_highpri_wq, &kctx->csf.pending_submission_work);
|
||||
}
|
||||
|
||||
/**
|
||||
* pending_submission_worker() - Work item to process pending kicked GPU command queues.
|
||||
*
|
||||
|
|
@ -813,11 +828,21 @@ static void pending_submission_worker(struct work_struct *work)
|
|||
list_for_each_entry(queue, &kctx->csf.queue_list, link) {
|
||||
if (atomic_cmpxchg(&queue->pending, 1, 0) == 1) {
|
||||
struct kbase_queue_group *group = get_bound_queue_group(queue);
|
||||
int ret;
|
||||
|
||||
if (!group || queue->bind_state != KBASE_CSF_QUEUE_BOUND)
|
||||
if (!group || queue->bind_state != KBASE_CSF_QUEUE_BOUND) {
|
||||
dev_dbg(kbdev->dev, "queue is not bound to a group");
|
||||
else
|
||||
WARN_ON(kbase_csf_scheduler_queue_start(queue));
|
||||
continue;
|
||||
}
|
||||
|
||||
ret = kbase_csf_scheduler_queue_start(queue);
|
||||
if (unlikely(ret)) {
|
||||
dev_dbg(kbdev->dev, "Failed to start queue");
|
||||
if (ret == -EBUSY) {
|
||||
atomic_cmpxchg(&queue->pending, 0, 1);
|
||||
enqueue_gpu_submission_work(kctx);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -831,6 +856,8 @@ void kbase_csf_ring_csg_doorbell(struct kbase_device *kbdev, int slot)
|
|||
if (WARN_ON(slot < 0))
|
||||
return;
|
||||
|
||||
kbase_csf_scheduler_spin_lock_assert_held(kbdev);
|
||||
|
||||
kbase_csf_ring_csg_slots_doorbell(kbdev, (u32) (1 << slot));
|
||||
}
|
||||
|
||||
|
|
@ -843,6 +870,8 @@ void kbase_csf_ring_csg_slots_doorbell(struct kbase_device *kbdev,
|
|||
(u32) ((1U << kbdev->csf.global_iface.group_num) - 1);
|
||||
u32 value;
|
||||
|
||||
kbase_csf_scheduler_spin_lock_assert_held(kbdev);
|
||||
|
||||
if (WARN_ON(slot_bitmap > allowed_bitmap))
|
||||
return;
|
||||
|
||||
|
|
@ -872,6 +901,8 @@ void kbase_csf_ring_cs_kernel_doorbell(struct kbase_device *kbdev,
|
|||
struct kbase_csf_cmd_stream_group_info *ginfo;
|
||||
u32 value;
|
||||
|
||||
kbase_csf_scheduler_spin_lock_assert_held(kbdev);
|
||||
|
||||
if (WARN_ON(csg_nr < 0) ||
|
||||
WARN_ON(csg_nr >= kbdev->csf.global_iface.group_num))
|
||||
return;
|
||||
|
|
@ -891,11 +922,6 @@ void kbase_csf_ring_cs_kernel_doorbell(struct kbase_device *kbdev,
|
|||
kbase_csf_ring_csg_doorbell(kbdev, csg_nr);
|
||||
}
|
||||
|
||||
static void enqueue_gpu_submission_work(struct kbase_context *const kctx)
|
||||
{
|
||||
queue_work(system_highpri_wq, &kctx->csf.pending_submission_work);
|
||||
}
|
||||
|
||||
int kbase_csf_queue_kick(struct kbase_context *kctx,
|
||||
struct kbase_ioctl_cs_queue_kick *kick)
|
||||
{
|
||||
|
|
@ -1129,9 +1155,8 @@ static int create_normal_suspend_buffer(struct kbase_context *const kctx,
|
|||
}
|
||||
|
||||
/* Get physical page for a normal suspend buffer */
|
||||
err = kbase_mem_pool_alloc_pages(
|
||||
&kctx->mem_pools.small[KBASE_MEM_GROUP_CSF_FW],
|
||||
nr_pages, &s_buf->phy[0], false);
|
||||
err = kbase_mem_pool_alloc_pages(&kctx->mem_pools.small[KBASE_MEM_GROUP_CSF_FW], nr_pages,
|
||||
&s_buf->phy[0], false);
|
||||
|
||||
if (err < 0)
|
||||
goto phy_pages_alloc_failed;
|
||||
|
|
@ -1362,6 +1387,11 @@ static int create_queue_group(struct kbase_context *const kctx,
|
|||
group->cs_unrecoverable = false;
|
||||
group->reevaluate_idle_status = false;
|
||||
|
||||
group->dvs_buf = create->in.dvs_buf;
|
||||
|
||||
#if IS_ENABLED(CONFIG_DEBUG_FS)
|
||||
group->deschedule_deferred_cnt = 0;
|
||||
#endif
|
||||
|
||||
group->group_uid = generate_group_uid();
|
||||
create->out.group_uid = group->group_uid;
|
||||
|
|
@ -1377,6 +1407,9 @@ static int create_queue_group(struct kbase_context *const kctx,
|
|||
MAX_SUPPORTED_STREAMS_PER_GROUP);
|
||||
|
||||
group->run_state = KBASE_CSF_GROUP_INACTIVE;
|
||||
KBASE_KTRACE_ADD_CSF_GRP(group->kctx->kbdev, CSF_GROUP_INACTIVE, group,
|
||||
group->run_state);
|
||||
|
||||
err = create_suspend_buffers(kctx, group);
|
||||
|
||||
if (err < 0) {
|
||||
|
|
@ -1396,6 +1429,17 @@ static int create_queue_group(struct kbase_context *const kctx,
|
|||
return group_handle;
|
||||
}
|
||||
|
||||
static bool dvs_supported(u32 csf_version)
|
||||
{
|
||||
if (GLB_VERSION_MAJOR_GET(csf_version) < 3)
|
||||
return false;
|
||||
|
||||
if (GLB_VERSION_MAJOR_GET(csf_version) == 3)
|
||||
if (GLB_VERSION_MINOR_GET(csf_version) < 2)
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
int kbase_csf_queue_group_create(struct kbase_context *const kctx,
|
||||
union kbase_ioctl_cs_queue_group_create *const create)
|
||||
|
|
@ -1434,8 +1478,17 @@ int kbase_csf_queue_group_create(struct kbase_context *const kctx,
|
|||
dev_warn(kctx->kbdev->dev, "Unknown exception handler flags set: %u",
|
||||
create->in.csi_handlers & ~BASE_CSF_EXCEPTION_HANDLER_FLAGS_MASK);
|
||||
err = -EINVAL;
|
||||
} else if (create->in.reserved) {
|
||||
dev_warn(kctx->kbdev->dev, "Reserved field was set to non-0");
|
||||
} else if (!dvs_supported(kctx->kbdev->csf.global_iface.version) &&
|
||||
create->in.dvs_buf) {
|
||||
dev_warn(
|
||||
kctx->kbdev->dev,
|
||||
"GPU does not support DVS but userspace is trying to use it");
|
||||
err = -EINVAL;
|
||||
} else if (dvs_supported(kctx->kbdev->csf.global_iface.version) &&
|
||||
!CSG_DVS_BUF_BUFFER_POINTER_GET(create->in.dvs_buf) &&
|
||||
CSG_DVS_BUF_BUFFER_SIZE_GET(create->in.dvs_buf)) {
|
||||
dev_warn(kctx->kbdev->dev,
|
||||
"DVS buffer pointer is null but size is not 0");
|
||||
err = -EINVAL;
|
||||
} else {
|
||||
/* For the CSG which satisfies the condition for having
|
||||
|
|
@ -1555,6 +1608,7 @@ void kbase_csf_term_descheduled_queue_group(struct kbase_queue_group *group)
|
|||
&group->protected_suspend_buf);
|
||||
|
||||
group->run_state = KBASE_CSF_GROUP_TERMINATED;
|
||||
KBASE_KTRACE_ADD_CSF_GRP(group->kctx->kbdev, CSF_GROUP_TERMINATED, group, group->run_state);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -1585,6 +1639,34 @@ static void term_queue_group(struct kbase_queue_group *group)
|
|||
kbase_csf_term_descheduled_queue_group(group);
|
||||
}
|
||||
|
||||
/**
|
||||
* wait_group_deferred_deschedule_completion - Wait for refcount of the group to
|
||||
* become 0 that was taken when the group deschedule had to be deferred.
|
||||
*
|
||||
* @group: Pointer to GPU command queue group that is being deleted.
|
||||
*
|
||||
* This function is called when Userspace deletes the group and after the group
|
||||
* has been descheduled. The function synchronizes with the other threads that were
|
||||
* also trying to deschedule the group whilst the dumping was going on for a fault.
|
||||
* Please refer the documentation of wait_for_dump_complete_on_group_deschedule()
|
||||
* for more details.
|
||||
*/
|
||||
static void wait_group_deferred_deschedule_completion(struct kbase_queue_group *group)
|
||||
{
|
||||
#if IS_ENABLED(CONFIG_DEBUG_FS)
|
||||
struct kbase_context *kctx = group->kctx;
|
||||
|
||||
lockdep_assert_held(&kctx->csf.lock);
|
||||
|
||||
if (likely(!group->deschedule_deferred_cnt))
|
||||
return;
|
||||
|
||||
mutex_unlock(&kctx->csf.lock);
|
||||
wait_event(kctx->kbdev->csf.event_wait, !group->deschedule_deferred_cnt);
|
||||
mutex_lock(&kctx->csf.lock);
|
||||
#endif
|
||||
}
|
||||
|
||||
static void cancel_queue_group_events(struct kbase_queue_group *group)
|
||||
{
|
||||
cancel_work_sync(&group->timer_event_work);
|
||||
|
|
@ -1626,24 +1708,39 @@ void kbase_csf_queue_group_terminate(struct kbase_context *kctx,
|
|||
group = find_queue_group(kctx, group_handle);
|
||||
|
||||
if (group) {
|
||||
remove_pending_group_fatal_error(group);
|
||||
term_queue_group(group);
|
||||
kctx->csf.queue_groups[group_handle] = NULL;
|
||||
/* Stop the running of the given group */
|
||||
term_queue_group(group);
|
||||
mutex_unlock(&kctx->csf.lock);
|
||||
|
||||
if (reset_prevented) {
|
||||
/* Allow GPU reset before cancelling the group specific
|
||||
* work item to avoid potential deadlock.
|
||||
* Reset prevention isn't needed after group termination.
|
||||
*/
|
||||
kbase_reset_gpu_allow(kbdev);
|
||||
reset_prevented = false;
|
||||
}
|
||||
|
||||
/* Cancel any pending event callbacks. If one is in progress
|
||||
* then this thread waits synchronously for it to complete (which
|
||||
* is why we must unlock the context first). We already ensured
|
||||
* that no more callbacks can be enqueued by terminating the group.
|
||||
*/
|
||||
cancel_queue_group_events(group);
|
||||
|
||||
mutex_lock(&kctx->csf.lock);
|
||||
|
||||
/* Clean up after the termination */
|
||||
remove_pending_group_fatal_error(group);
|
||||
|
||||
wait_group_deferred_deschedule_completion(group);
|
||||
}
|
||||
|
||||
mutex_unlock(&kctx->csf.lock);
|
||||
if (reset_prevented)
|
||||
kbase_reset_gpu_allow(kbdev);
|
||||
|
||||
if (!group)
|
||||
return;
|
||||
|
||||
/* Cancel any pending event callbacks. If one is in progress
|
||||
* then this thread waits synchronously for it to complete (which
|
||||
* is why we must unlock the context first). We already ensured
|
||||
* that no more callbacks can be enqueued by terminating the group.
|
||||
*/
|
||||
cancel_queue_group_events(group);
|
||||
kfree(group);
|
||||
}
|
||||
|
||||
|
|
@ -1738,7 +1835,6 @@ void kbase_csf_active_queue_groups_reset(struct kbase_device *kbdev,
|
|||
|
||||
int kbase_csf_ctx_init(struct kbase_context *kctx)
|
||||
{
|
||||
struct kbase_device *kbdev = kctx->kbdev;
|
||||
int err = -ENOMEM;
|
||||
|
||||
INIT_LIST_HEAD(&kctx->csf.queue_list);
|
||||
|
|
@ -1747,19 +1843,6 @@ int kbase_csf_ctx_init(struct kbase_context *kctx)
|
|||
kbase_csf_event_init(kctx);
|
||||
|
||||
kctx->csf.user_reg_vma = NULL;
|
||||
mutex_lock(&kbdev->pm.lock);
|
||||
/* The inode information for /dev/malixx file is not available at the
|
||||
* time of device probe as the inode is created when the device node
|
||||
* is created by udevd (through mknod).
|
||||
*/
|
||||
if (kctx->filp) {
|
||||
if (!kbdev->csf.mali_file_inode)
|
||||
kbdev->csf.mali_file_inode = kctx->filp->f_inode;
|
||||
|
||||
/* inode is unique for a file */
|
||||
WARN_ON(kbdev->csf.mali_file_inode != kctx->filp->f_inode);
|
||||
}
|
||||
mutex_unlock(&kbdev->pm.lock);
|
||||
|
||||
/* Mark all the cookies as 'free' */
|
||||
bitmap_fill(kctx->csf.cookies, KBASE_CSF_NUM_USER_IO_PAGES_HANDLE);
|
||||
|
|
@ -1874,8 +1957,6 @@ void kbase_csf_ctx_term(struct kbase_context *kctx)
|
|||
else
|
||||
reset_prevented = true;
|
||||
|
||||
cancel_work_sync(&kctx->csf.pending_submission_work);
|
||||
|
||||
mutex_lock(&kctx->csf.lock);
|
||||
|
||||
/* Iterate through the queue groups that were not terminated by
|
||||
|
|
@ -1894,6 +1975,8 @@ void kbase_csf_ctx_term(struct kbase_context *kctx)
|
|||
if (reset_prevented)
|
||||
kbase_reset_gpu_allow(kbdev);
|
||||
|
||||
cancel_work_sync(&kctx->csf.pending_submission_work);
|
||||
|
||||
/* Now that all queue groups have been terminated, there can be no
|
||||
* more OoM or timer event interrupts but there can be inflight work
|
||||
* items. Destroying the wq will implicitly flush those work items.
|
||||
|
|
@ -1938,7 +2021,11 @@ void kbase_csf_ctx_term(struct kbase_context *kctx)
|
|||
* only one reference left that was taken when queue was
|
||||
* registered.
|
||||
*/
|
||||
#if (KERNEL_VERSION(4, 11, 0) > LINUX_VERSION_CODE)
|
||||
if (atomic_read(&queue->refcount) != 1)
|
||||
#else
|
||||
if (refcount_read(&queue->refcount) != 1)
|
||||
#endif
|
||||
dev_warn(kctx->kbdev->dev,
|
||||
"Releasing queue with incorrect refcounting!\n");
|
||||
list_del_init(&queue->link);
|
||||
|
|
@ -2059,6 +2146,36 @@ static void report_tiler_oom_error(struct kbase_queue_group *group)
|
|||
kbase_event_wakeup(group->kctx);
|
||||
}
|
||||
|
||||
static void flush_gpu_cache_on_fatal_error(struct kbase_device *kbdev)
|
||||
{
|
||||
int err;
|
||||
const unsigned int cache_flush_wait_timeout_ms = 2000;
|
||||
|
||||
kbase_pm_lock(kbdev);
|
||||
/* With the advent of partial cache flush, dirty cache lines could
|
||||
* be left in the GPU L2 caches by terminating the queue group here
|
||||
* without waiting for proper cache maintenance. A full cache flush
|
||||
* here will prevent these dirty cache lines from being arbitrarily
|
||||
* evicted later and possible causing memory corruption.
|
||||
*/
|
||||
if (kbdev->pm.backend.gpu_powered) {
|
||||
kbase_gpu_start_cache_clean(kbdev, GPU_COMMAND_CACHE_CLN_INV_L2_LSC);
|
||||
err = kbase_gpu_wait_cache_clean_timeout(kbdev, cache_flush_wait_timeout_ms);
|
||||
|
||||
if (err) {
|
||||
dev_warn(
|
||||
kbdev->dev,
|
||||
"[%llu] Timeout waiting for cache clean to complete after fatal error",
|
||||
kbase_backend_get_cycle_cnt(kbdev));
|
||||
|
||||
if (kbase_prepare_to_reset_gpu(kbdev, RESET_FLAGS_HWC_UNRECOVERABLE_ERROR))
|
||||
kbase_reset_gpu(kbdev);
|
||||
}
|
||||
}
|
||||
|
||||
kbase_pm_unlock(kbdev);
|
||||
}
|
||||
|
||||
/**
|
||||
* kbase_queue_oom_event - Handle tiler out-of-memory for a GPU command queue.
|
||||
*
|
||||
|
|
@ -2071,8 +2188,8 @@ static void report_tiler_oom_error(struct kbase_queue_group *group)
|
|||
* notification to allow the firmware to report out-of-memory again in future.
|
||||
* If the out-of-memory condition was successfully handled then this function
|
||||
* rings the relevant doorbell to notify the firmware; otherwise, it terminates
|
||||
* the GPU command queue group to which the queue is bound. See
|
||||
* term_queue_group() for details.
|
||||
* the GPU command queue group to which the queue is bound and notify a waiting
|
||||
* user space client of the failure.
|
||||
*/
|
||||
static void kbase_queue_oom_event(struct kbase_queue *const queue)
|
||||
{
|
||||
|
|
@ -2084,6 +2201,7 @@ static void kbase_queue_oom_event(struct kbase_queue *const queue)
|
|||
struct kbase_csf_cmd_stream_info const *stream;
|
||||
int csi_index = queue->csi_index;
|
||||
u32 cs_oom_ack, cs_oom_req;
|
||||
unsigned long flags;
|
||||
|
||||
lockdep_assert_held(&kctx->csf.lock);
|
||||
|
||||
|
|
@ -2129,20 +2247,23 @@ static void kbase_queue_oom_event(struct kbase_queue *const queue)
|
|||
|
||||
err = handle_oom_event(group, stream);
|
||||
|
||||
kbase_csf_scheduler_spin_lock(kbdev, &flags);
|
||||
kbase_csf_firmware_cs_input_mask(stream, CS_REQ, cs_oom_ack,
|
||||
CS_REQ_TILER_OOM_MASK);
|
||||
kbase_csf_ring_cs_kernel_doorbell(kbdev, csi_index, slot_num, true);
|
||||
kbase_csf_scheduler_spin_unlock(kbdev, flags);
|
||||
|
||||
if (err) {
|
||||
if (unlikely(err)) {
|
||||
dev_warn(
|
||||
kbdev->dev,
|
||||
"Queue group to be terminated, couldn't handle the OoM event\n");
|
||||
kbase_debug_csf_fault_notify(kbdev, kctx, DF_TILER_OOM);
|
||||
kbase_csf_scheduler_unlock(kbdev);
|
||||
term_queue_group(group);
|
||||
flush_gpu_cache_on_fatal_error(kbdev);
|
||||
report_tiler_oom_error(group);
|
||||
return;
|
||||
}
|
||||
|
||||
kbase_csf_ring_cs_kernel_doorbell(kbdev, csi_index, slot_num, true);
|
||||
unlock:
|
||||
kbase_csf_scheduler_unlock(kbdev);
|
||||
}
|
||||
|
|
@ -2164,6 +2285,7 @@ static void oom_event_worker(struct work_struct *data)
|
|||
struct kbase_device *const kbdev = kctx->kbdev;
|
||||
|
||||
int err = kbase_reset_gpu_try_prevent(kbdev);
|
||||
|
||||
/* Regardless of whether reset failed or is currently happening, exit
|
||||
* early
|
||||
*/
|
||||
|
|
@ -2216,12 +2338,13 @@ static void timer_event_worker(struct work_struct *data)
|
|||
struct kbase_queue_group *const group =
|
||||
container_of(data, struct kbase_queue_group, timer_event_work);
|
||||
struct kbase_context *const kctx = group->kctx;
|
||||
struct kbase_device *const kbdev = kctx->kbdev;
|
||||
bool reset_prevented = false;
|
||||
int err = kbase_reset_gpu_prevent_and_wait(kctx->kbdev);
|
||||
int err = kbase_reset_gpu_prevent_and_wait(kbdev);
|
||||
|
||||
if (err)
|
||||
dev_warn(
|
||||
kctx->kbdev->dev,
|
||||
kbdev->dev,
|
||||
"Unsuccessful GPU reset detected when terminating group %d on progress timeout, attempting to terminate regardless",
|
||||
group->handle);
|
||||
else
|
||||
|
|
@ -2230,11 +2353,12 @@ static void timer_event_worker(struct work_struct *data)
|
|||
mutex_lock(&kctx->csf.lock);
|
||||
|
||||
term_queue_group(group);
|
||||
flush_gpu_cache_on_fatal_error(kbdev);
|
||||
report_group_timeout_error(group);
|
||||
|
||||
mutex_unlock(&kctx->csf.lock);
|
||||
if (reset_prevented)
|
||||
kbase_reset_gpu_allow(kctx->kbdev);
|
||||
kbase_reset_gpu_allow(kbdev);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -2242,11 +2366,15 @@ static void timer_event_worker(struct work_struct *data)
|
|||
*
|
||||
* @group: Pointer to GPU queue group for which the timeout event is received.
|
||||
*
|
||||
* Notify a waiting user space client of the timeout.
|
||||
* Enqueue a work item to terminate the group and notify the event notification
|
||||
* thread of progress timeout fault for the GPU command queue group.
|
||||
*/
|
||||
static void handle_progress_timer_event(struct kbase_queue_group *const group)
|
||||
{
|
||||
kbase_debug_csf_fault_notify(group->kctx->kbdev, group->kctx,
|
||||
DF_PROGRESS_TIMER_TIMEOUT);
|
||||
|
||||
queue_work(group->kctx->csf.wq, &group->timer_event_work);
|
||||
}
|
||||
|
||||
|
|
@ -2274,16 +2402,20 @@ static void protm_event_worker(struct work_struct *data)
|
|||
* handle_fault_event - Handler for CS fault.
|
||||
*
|
||||
* @queue: Pointer to queue for which fault event was received.
|
||||
* @stream: Pointer to the structure containing info provided by the
|
||||
* firmware about the CSI.
|
||||
*
|
||||
* Prints meaningful CS fault information.
|
||||
* @cs_ack: Value of the CS_ACK register in the CS kernel input page used for
|
||||
* the queue.
|
||||
*
|
||||
* Print required information about the CS fault and notify the user space client
|
||||
* about the fault.
|
||||
*/
|
||||
static void
|
||||
handle_fault_event(struct kbase_queue *const queue,
|
||||
struct kbase_csf_cmd_stream_info const *const stream)
|
||||
handle_fault_event(struct kbase_queue *const queue, const u32 cs_ack)
|
||||
{
|
||||
struct kbase_device *const kbdev = queue->kctx->kbdev;
|
||||
struct kbase_csf_cmd_stream_group_info const *ginfo =
|
||||
&kbdev->csf.global_iface.groups[queue->group->csg_nr];
|
||||
struct kbase_csf_cmd_stream_info const *stream =
|
||||
&ginfo->streams[queue->csi_index];
|
||||
const u32 cs_fault = kbase_csf_firmware_cs_output(stream, CS_FAULT);
|
||||
const u64 cs_fault_info =
|
||||
kbase_csf_firmware_cs_output(stream, CS_FAULT_INFO_LO) |
|
||||
|
|
@ -2295,7 +2427,6 @@ handle_fault_event(struct kbase_queue *const queue,
|
|||
CS_FAULT_EXCEPTION_DATA_GET(cs_fault);
|
||||
const u64 cs_fault_info_exception_data =
|
||||
CS_FAULT_INFO_EXCEPTION_DATA_GET(cs_fault_info);
|
||||
struct kbase_device *const kbdev = queue->kctx->kbdev;
|
||||
|
||||
kbase_csf_scheduler_spin_lock_assert_held(kbdev);
|
||||
|
||||
|
|
@ -2310,6 +2441,36 @@ handle_fault_event(struct kbase_queue *const queue,
|
|||
kbase_gpu_exception_name(cs_fault_exception_type),
|
||||
cs_fault_exception_data, cs_fault_info_exception_data);
|
||||
|
||||
|
||||
#if IS_ENABLED(CONFIG_DEBUG_FS)
|
||||
/* CS_RESOURCE_TERMINATED type fault event can be ignored from the
|
||||
* standpoint of dump on error. It is used to report fault for the CSIs
|
||||
* that are associated with the same CSG as the CSI for which the actual
|
||||
* fault was reported by the Iterator.
|
||||
* Dumping would be triggered when the actual fault is reported.
|
||||
*
|
||||
* CS_INHERIT_FAULT can also be ignored. It could happen due to the error
|
||||
* in other types of queues (cpu/kcpu). If a fault had occurred in some
|
||||
* other GPU queue then the dump would have been performed anyways when
|
||||
* that fault was reported.
|
||||
*/
|
||||
if ((cs_fault_exception_type != CS_FAULT_EXCEPTION_TYPE_CS_INHERIT_FAULT) &&
|
||||
(cs_fault_exception_type != CS_FAULT_EXCEPTION_TYPE_CS_RESOURCE_TERMINATED)) {
|
||||
if (unlikely(kbase_debug_csf_fault_notify(kbdev, queue->kctx, DF_CS_FAULT))) {
|
||||
get_queue(queue);
|
||||
queue->cs_error = cs_fault;
|
||||
queue->cs_error_info = cs_fault_info;
|
||||
queue->cs_error_fatal = false;
|
||||
if (!queue_work(queue->kctx->csf.wq, &queue->cs_error_work))
|
||||
release_queue(queue);
|
||||
return;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
kbase_csf_firmware_cs_input_mask(stream, CS_REQ, cs_ack,
|
||||
CS_REQ_FAULT_MASK);
|
||||
kbase_csf_ring_cs_kernel_doorbell(kbdev, queue->csi_index, queue->group->csg_nr, true);
|
||||
}
|
||||
|
||||
static void report_queue_fatal_error(struct kbase_queue *const queue,
|
||||
|
|
@ -2341,16 +2502,16 @@ static void report_queue_fatal_error(struct kbase_queue *const queue,
|
|||
}
|
||||
|
||||
/**
|
||||
* fatal_event_worker - Handle the fatal error for the GPU queue
|
||||
* fatal_event_worker - Handle the CS_FATAL/CS_FAULT error for the GPU queue
|
||||
*
|
||||
* @data: Pointer to a work_struct embedded in GPU command queue.
|
||||
*
|
||||
* Terminate the CSG and report the error to userspace.
|
||||
*/
|
||||
static void fatal_event_worker(struct work_struct *const data)
|
||||
static void cs_error_worker(struct work_struct *const data)
|
||||
{
|
||||
struct kbase_queue *const queue =
|
||||
container_of(data, struct kbase_queue, fatal_event_work);
|
||||
container_of(data, struct kbase_queue, cs_error_work);
|
||||
struct kbase_context *const kctx = queue->kctx;
|
||||
struct kbase_device *const kbdev = kctx->kbdev;
|
||||
struct kbase_queue_group *group;
|
||||
|
|
@ -2365,6 +2526,7 @@ static void fatal_event_worker(struct work_struct *const data)
|
|||
else
|
||||
reset_prevented = true;
|
||||
|
||||
kbase_debug_csf_fault_wait_completion(kbdev);
|
||||
mutex_lock(&kctx->csf.lock);
|
||||
|
||||
group = get_bound_queue_group(queue);
|
||||
|
|
@ -2373,9 +2535,35 @@ static void fatal_event_worker(struct work_struct *const data)
|
|||
goto unlock;
|
||||
}
|
||||
|
||||
#if IS_ENABLED(CONFIG_DEBUG_FS)
|
||||
if (!queue->cs_error_fatal) {
|
||||
unsigned long flags;
|
||||
int slot_num;
|
||||
|
||||
kbase_csf_scheduler_spin_lock(kbdev, &flags);
|
||||
slot_num = kbase_csf_scheduler_group_get_slot_locked(group);
|
||||
if (slot_num >= 0) {
|
||||
struct kbase_csf_cmd_stream_group_info const *ginfo =
|
||||
&kbdev->csf.global_iface.groups[slot_num];
|
||||
struct kbase_csf_cmd_stream_info const *stream =
|
||||
&ginfo->streams[queue->csi_index];
|
||||
u32 const cs_ack =
|
||||
kbase_csf_firmware_cs_output(stream, CS_ACK);
|
||||
|
||||
kbase_csf_firmware_cs_input_mask(stream, CS_REQ, cs_ack,
|
||||
CS_REQ_FAULT_MASK);
|
||||
kbase_csf_ring_cs_kernel_doorbell(kbdev, queue->csi_index,
|
||||
slot_num, true);
|
||||
}
|
||||
kbase_csf_scheduler_spin_unlock(kbdev, flags);
|
||||
goto unlock;
|
||||
}
|
||||
#endif
|
||||
|
||||
group_handle = group->handle;
|
||||
term_queue_group(group);
|
||||
report_queue_fatal_error(queue, queue->cs_fatal, queue->cs_fatal_info,
|
||||
flush_gpu_cache_on_fatal_error(kbdev);
|
||||
report_queue_fatal_error(queue, queue->cs_error, queue->cs_error_info,
|
||||
group_handle);
|
||||
|
||||
unlock:
|
||||
|
|
@ -2391,14 +2579,18 @@ static void fatal_event_worker(struct work_struct *const data)
|
|||
* @queue: Pointer to queue for which fatal event was received.
|
||||
* @stream: Pointer to the structure containing info provided by the
|
||||
* firmware about the CSI.
|
||||
* @cs_ack: Value of the CS_ACK register in the CS kernel input page used for
|
||||
* the queue.
|
||||
*
|
||||
* Prints meaningful CS fatal information.
|
||||
* Notify a waiting user space client of the CS fatal and prints meaningful
|
||||
* information.
|
||||
* Enqueue a work item to terminate the group and report the fatal error
|
||||
* to user space.
|
||||
*/
|
||||
static void
|
||||
handle_fatal_event(struct kbase_queue *const queue,
|
||||
struct kbase_csf_cmd_stream_info const *const stream)
|
||||
struct kbase_csf_cmd_stream_info const *const stream,
|
||||
u32 cs_ack)
|
||||
{
|
||||
const u32 cs_fatal = kbase_csf_firmware_cs_output(stream, CS_FATAL);
|
||||
const u64 cs_fatal_info =
|
||||
|
|
@ -2428,57 +2620,26 @@ handle_fatal_event(struct kbase_queue *const queue,
|
|||
|
||||
if (cs_fatal_exception_type ==
|
||||
CS_FATAL_EXCEPTION_TYPE_FIRMWARE_INTERNAL_ERROR) {
|
||||
kbase_debug_csf_fault_notify(kbdev, queue->kctx, DF_FW_INTERNAL_ERROR);
|
||||
queue_work(system_wq, &kbdev->csf.fw_error_work);
|
||||
} else {
|
||||
kbase_debug_csf_fault_notify(kbdev, queue->kctx, DF_CS_FATAL);
|
||||
if (cs_fatal_exception_type == CS_FATAL_EXCEPTION_TYPE_CS_UNRECOVERABLE) {
|
||||
queue->group->cs_unrecoverable = true;
|
||||
if (kbase_prepare_to_reset_gpu(queue->kctx->kbdev, RESET_FLAGS_NONE))
|
||||
kbase_reset_gpu(queue->kctx->kbdev);
|
||||
}
|
||||
get_queue(queue);
|
||||
queue->cs_fatal = cs_fatal;
|
||||
queue->cs_fatal_info = cs_fatal_info;
|
||||
if (!queue_work(queue->kctx->csf.wq, &queue->fatal_event_work))
|
||||
queue->cs_error = cs_fatal;
|
||||
queue->cs_error_info = cs_fatal_info;
|
||||
queue->cs_error_fatal = true;
|
||||
if (!queue_work(queue->kctx->csf.wq, &queue->cs_error_work))
|
||||
release_queue(queue);
|
||||
}
|
||||
|
||||
}
|
||||
kbase_csf_firmware_cs_input_mask(stream, CS_REQ, cs_ack,
|
||||
CS_REQ_FATAL_MASK);
|
||||
|
||||
/**
|
||||
* handle_queue_exception_event - Handler for CS fatal/fault exception events.
|
||||
*
|
||||
* @queue: Pointer to queue for which fatal/fault event was received.
|
||||
* @cs_req: Value of the CS_REQ register from the CS's input page.
|
||||
* @cs_ack: Value of the CS_ACK register from the CS's output page.
|
||||
*/
|
||||
static void handle_queue_exception_event(struct kbase_queue *const queue,
|
||||
const u32 cs_req, const u32 cs_ack)
|
||||
{
|
||||
struct kbase_csf_cmd_stream_group_info const *ginfo;
|
||||
struct kbase_csf_cmd_stream_info const *stream;
|
||||
struct kbase_context *const kctx = queue->kctx;
|
||||
struct kbase_device *const kbdev = kctx->kbdev;
|
||||
struct kbase_queue_group *group = queue->group;
|
||||
int csi_index = queue->csi_index;
|
||||
int slot_num = group->csg_nr;
|
||||
|
||||
kbase_csf_scheduler_spin_lock_assert_held(kbdev);
|
||||
|
||||
ginfo = &kbdev->csf.global_iface.groups[slot_num];
|
||||
stream = &ginfo->streams[csi_index];
|
||||
|
||||
if ((cs_ack & CS_ACK_FATAL_MASK) != (cs_req & CS_REQ_FATAL_MASK)) {
|
||||
handle_fatal_event(queue, stream);
|
||||
kbase_csf_firmware_cs_input_mask(stream, CS_REQ, cs_ack,
|
||||
CS_REQ_FATAL_MASK);
|
||||
}
|
||||
|
||||
if ((cs_ack & CS_ACK_FAULT_MASK) != (cs_req & CS_REQ_FAULT_MASK)) {
|
||||
handle_fault_event(queue, stream);
|
||||
kbase_csf_firmware_cs_input_mask(stream, CS_REQ, cs_ack,
|
||||
CS_REQ_FAULT_MASK);
|
||||
kbase_csf_ring_cs_kernel_doorbell(kbdev, csi_index, slot_num, true);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -2531,11 +2692,16 @@ static void process_cs_interrupts(struct kbase_queue_group *const group,
|
|||
kbase_csf_firmware_cs_output(stream, CS_ACK);
|
||||
struct workqueue_struct *wq = group->kctx->csf.wq;
|
||||
|
||||
if ((cs_req & CS_REQ_EXCEPTION_MASK) ^
|
||||
(cs_ack & CS_ACK_EXCEPTION_MASK)) {
|
||||
if ((cs_ack & CS_ACK_FATAL_MASK) != (cs_req & CS_REQ_FATAL_MASK)) {
|
||||
KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, CSI_INTERRUPT_FAULT,
|
||||
group, queue, cs_req ^ cs_ack);
|
||||
handle_queue_exception_event(queue, cs_req, cs_ack);
|
||||
handle_fatal_event(queue, stream, cs_ack);
|
||||
}
|
||||
|
||||
if ((cs_ack & CS_ACK_FAULT_MASK) != (cs_req & CS_REQ_FAULT_MASK)) {
|
||||
KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, CSI_INTERRUPT_FAULT,
|
||||
group, queue, cs_req ^ cs_ack);
|
||||
handle_fault_event(queue, cs_ack);
|
||||
}
|
||||
|
||||
/* PROTM_PEND and TILER_OOM can be safely ignored
|
||||
|
|
@ -2597,6 +2763,8 @@ static void process_cs_interrupts(struct kbase_queue_group *const group,
|
|||
if (test_bit(group->csg_nr, scheduler->csg_slots_idle_mask)) {
|
||||
clear_bit(group->csg_nr,
|
||||
scheduler->csg_slots_idle_mask);
|
||||
KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_SLOT_IDLE_CLEAR, group,
|
||||
scheduler->csg_slots_idle_mask[0]);
|
||||
dev_dbg(kbdev->dev,
|
||||
"Group-%d on slot %d de-idled by protm request",
|
||||
group->handle, group->csg_nr);
|
||||
|
|
@ -2698,7 +2866,12 @@ static void process_csg_interrupts(struct kbase_device *const kbdev, int const c
|
|||
/* If there are non-idle CSGs waiting for a slot, fire
|
||||
* a tock for a replacement.
|
||||
*/
|
||||
mod_delayed_work(scheduler->wq, &scheduler->tock_work, 0);
|
||||
KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_INTERRUPT_NON_IDLE_GROUPS,
|
||||
group, req ^ ack);
|
||||
kbase_csf_scheduler_invoke_tock(kbdev);
|
||||
} else {
|
||||
KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_INTERRUPT_NO_NON_IDLE_GROUPS,
|
||||
group, req ^ ack);
|
||||
}
|
||||
|
||||
if (group->scan_seq_num < track->idle_seq) {
|
||||
|
|
@ -2709,14 +2882,15 @@ static void process_csg_interrupts(struct kbase_device *const kbdev, int const c
|
|||
|
||||
if ((req ^ ack) & CSG_REQ_PROGRESS_TIMER_EVENT_MASK) {
|
||||
kbase_csf_firmware_csg_input_mask(ginfo, CSG_REQ, ack,
|
||||
CSG_REQ_PROGRESS_TIMER_EVENT_MASK);
|
||||
CSG_REQ_PROGRESS_TIMER_EVENT_MASK);
|
||||
|
||||
KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_INTERRUPT_PROGRESS_TIMER_EVENT,
|
||||
group, req ^ ack);
|
||||
dev_info(kbdev->dev,
|
||||
KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_INTERRUPT_PROGRESS_TIMER_EVENT, group,
|
||||
req ^ ack);
|
||||
dev_info(
|
||||
kbdev->dev,
|
||||
"[%llu] Iterator PROGRESS_TIMER timeout notification received for group %u of ctx %d_%d on slot %d\n",
|
||||
kbase_backend_get_cycle_cnt(kbdev),
|
||||
group->handle, group->kctx->tgid, group->kctx->id, csg_nr);
|
||||
kbase_backend_get_cycle_cnt(kbdev), group->handle, group->kctx->tgid,
|
||||
group->kctx->id, csg_nr);
|
||||
|
||||
handle_progress_timer_event(group);
|
||||
}
|
||||
|
|
@ -2904,7 +3078,7 @@ static inline void process_tracked_info_for_protm(struct kbase_device *kbdev,
|
|||
* for the scheduler to re-examine the case.
|
||||
*/
|
||||
dev_dbg(kbdev->dev, "Attempt pending protm from idle slot %d\n", track->idle_slot);
|
||||
mod_delayed_work(scheduler->wq, &scheduler->tock_work, 0);
|
||||
kbase_csf_scheduler_invoke_tock(kbdev);
|
||||
} else if (group) {
|
||||
u32 i, num_groups = kbdev->csf.global_iface.group_num;
|
||||
struct kbase_queue_group *grp;
|
||||
|
|
@ -2927,7 +3101,7 @@ static inline void process_tracked_info_for_protm(struct kbase_device *kbdev,
|
|||
tock_triggered = true;
|
||||
dev_dbg(kbdev->dev,
|
||||
"Attempt new protm from tick/tock idle slot %d\n", i);
|
||||
mod_delayed_work(scheduler->wq, &scheduler->tock_work, 0);
|
||||
kbase_csf_scheduler_invoke_tock(kbdev);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
|
@ -2940,77 +3114,133 @@ static inline void process_tracked_info_for_protm(struct kbase_device *kbdev,
|
|||
}
|
||||
}
|
||||
|
||||
static void order_job_irq_clear_with_iface_mem_read(void)
|
||||
{
|
||||
/* Ensure that write to the JOB_IRQ_CLEAR is ordered with regards to the
|
||||
* read from interface memory. The ordering is needed considering the way
|
||||
* FW & Kbase writes to the JOB_IRQ_RAWSTAT and JOB_IRQ_CLEAR registers
|
||||
* without any synchronization. Without the barrier there is no guarantee
|
||||
* about the ordering, the write to IRQ_CLEAR can take effect after the read
|
||||
* from interface memory and that could cause a problem for the scenario where
|
||||
* FW sends back to back notifications for the same CSG for events like
|
||||
* SYNC_UPDATE and IDLE, but Kbase gets a single IRQ and observes only the
|
||||
* first event. Similar thing can happen with glb events like CFG_ALLOC_EN
|
||||
* acknowledgment and GPU idle notification.
|
||||
*
|
||||
* MCU CPU
|
||||
* --------------- ----------------
|
||||
* Update interface memory Write to IRQ_CLEAR to clear current IRQ
|
||||
* <barrier> <barrier>
|
||||
* Write to IRQ_RAWSTAT to raise new IRQ Read interface memory
|
||||
*/
|
||||
|
||||
/* CPU and GPU would be in the same Outer shareable domain */
|
||||
dmb(osh);
|
||||
}
|
||||
|
||||
void kbase_csf_interrupt(struct kbase_device *kbdev, u32 val)
|
||||
{
|
||||
unsigned long flags;
|
||||
u32 csg_interrupts = val & ~JOB_IRQ_GLOBAL_IF;
|
||||
struct irq_idle_and_protm_track track = { .protm_grp = NULL, .idle_seq = U32_MAX };
|
||||
bool deferred_handling_glb_idle_irq = false;
|
||||
|
||||
lockdep_assert_held(&kbdev->hwaccess_lock);
|
||||
|
||||
KBASE_KTRACE_ADD(kbdev, CSF_INTERRUPT_START, NULL, val);
|
||||
kbase_reg_write(kbdev, JOB_CONTROL_REG(JOB_IRQ_CLEAR), val);
|
||||
|
||||
if (csg_interrupts != 0) {
|
||||
kbase_csf_scheduler_spin_lock(kbdev, &flags);
|
||||
/* Looping through and track the highest idle and protm groups */
|
||||
while (csg_interrupts != 0) {
|
||||
int const csg_nr = ffs(csg_interrupts) - 1;
|
||||
do {
|
||||
unsigned long flags;
|
||||
u32 csg_interrupts = val & ~JOB_IRQ_GLOBAL_IF;
|
||||
struct irq_idle_and_protm_track track = { .protm_grp = NULL, .idle_seq = U32_MAX };
|
||||
bool glb_idle_irq_received = false;
|
||||
|
||||
process_csg_interrupts(kbdev, csg_nr, &track);
|
||||
csg_interrupts &= ~(1 << csg_nr);
|
||||
kbase_reg_write(kbdev, JOB_CONTROL_REG(JOB_IRQ_CLEAR), val);
|
||||
order_job_irq_clear_with_iface_mem_read();
|
||||
|
||||
if (csg_interrupts != 0) {
|
||||
kbase_csf_scheduler_spin_lock(kbdev, &flags);
|
||||
/* Looping through and track the highest idle and protm groups */
|
||||
while (csg_interrupts != 0) {
|
||||
int const csg_nr = ffs(csg_interrupts) - 1;
|
||||
|
||||
process_csg_interrupts(kbdev, csg_nr, &track);
|
||||
csg_interrupts &= ~(1 << csg_nr);
|
||||
}
|
||||
|
||||
/* Handle protm from the tracked information */
|
||||
process_tracked_info_for_protm(kbdev, &track);
|
||||
kbase_csf_scheduler_spin_unlock(kbdev, flags);
|
||||
}
|
||||
|
||||
/* Handle protm from the tracked information */
|
||||
process_tracked_info_for_protm(kbdev, &track);
|
||||
kbase_csf_scheduler_spin_unlock(kbdev, flags);
|
||||
}
|
||||
if (val & JOB_IRQ_GLOBAL_IF) {
|
||||
const struct kbase_csf_global_iface *const global_iface =
|
||||
&kbdev->csf.global_iface;
|
||||
|
||||
if (val & JOB_IRQ_GLOBAL_IF) {
|
||||
const struct kbase_csf_global_iface *const global_iface =
|
||||
&kbdev->csf.global_iface;
|
||||
kbdev->csf.interrupt_received = true;
|
||||
|
||||
kbdev->csf.interrupt_received = true;
|
||||
if (!kbdev->csf.firmware_reloaded)
|
||||
kbase_csf_firmware_reload_completed(kbdev);
|
||||
else if (global_iface->output) {
|
||||
u32 glb_req, glb_ack;
|
||||
|
||||
if (!kbdev->csf.firmware_reloaded)
|
||||
kbase_csf_firmware_reload_completed(kbdev);
|
||||
else if (global_iface->output) {
|
||||
u32 glb_req, glb_ack;
|
||||
kbase_csf_scheduler_spin_lock(kbdev, &flags);
|
||||
glb_req =
|
||||
kbase_csf_firmware_global_input_read(global_iface, GLB_REQ);
|
||||
glb_ack = kbase_csf_firmware_global_output(global_iface, GLB_ACK);
|
||||
KBASE_KTRACE_ADD(kbdev, CSF_INTERRUPT_GLB_REQ_ACK, NULL,
|
||||
glb_req ^ glb_ack);
|
||||
|
||||
kbase_csf_scheduler_spin_lock(kbdev, &flags);
|
||||
glb_req = kbase_csf_firmware_global_input_read(
|
||||
global_iface, GLB_REQ);
|
||||
glb_ack = kbase_csf_firmware_global_output(
|
||||
global_iface, GLB_ACK);
|
||||
KBASE_KTRACE_ADD(kbdev, CSF_INTERRUPT_GLB_REQ_ACK, NULL, glb_req ^ glb_ack);
|
||||
check_protm_enter_req_complete(kbdev, glb_req, glb_ack);
|
||||
|
||||
check_protm_enter_req_complete(kbdev, glb_req, glb_ack);
|
||||
if ((glb_req ^ glb_ack) & GLB_REQ_PROTM_EXIT_MASK)
|
||||
process_protm_exit(kbdev, glb_ack);
|
||||
|
||||
if ((glb_req ^ glb_ack) & GLB_REQ_PROTM_EXIT_MASK)
|
||||
process_protm_exit(kbdev, glb_ack);
|
||||
|
||||
/* Handle IDLE Hysteresis notification event */
|
||||
if ((glb_req ^ glb_ack) & GLB_REQ_IDLE_EVENT_MASK) {
|
||||
dev_dbg(kbdev->dev, "Idle-hysteresis event flagged");
|
||||
kbase_csf_firmware_global_input_mask(
|
||||
/* Handle IDLE Hysteresis notification event */
|
||||
if ((glb_req ^ glb_ack) & GLB_REQ_IDLE_EVENT_MASK) {
|
||||
dev_dbg(kbdev->dev, "Idle-hysteresis event flagged");
|
||||
kbase_csf_firmware_global_input_mask(
|
||||
global_iface, GLB_REQ, glb_ack,
|
||||
GLB_REQ_IDLE_EVENT_MASK);
|
||||
|
||||
kbase_csf_scheduler_process_gpu_idle_event(kbdev);
|
||||
glb_idle_irq_received = true;
|
||||
/* Defer handling this IRQ to account for a race condition
|
||||
* where the idle worker could be executed before we have
|
||||
* finished handling all pending IRQs (including CSG IDLE
|
||||
* IRQs).
|
||||
*/
|
||||
deferred_handling_glb_idle_irq = true;
|
||||
}
|
||||
|
||||
process_prfcnt_interrupts(kbdev, glb_req, glb_ack);
|
||||
|
||||
kbase_csf_scheduler_spin_unlock(kbdev, flags);
|
||||
|
||||
/* Invoke the MCU state machine as a state transition
|
||||
* might have completed.
|
||||
*/
|
||||
kbase_pm_update_state(kbdev);
|
||||
}
|
||||
|
||||
process_prfcnt_interrupts(kbdev, glb_req, glb_ack);
|
||||
|
||||
kbase_csf_scheduler_spin_unlock(kbdev, flags);
|
||||
|
||||
/* Invoke the MCU state machine as a state transition
|
||||
* might have completed.
|
||||
*/
|
||||
kbase_pm_update_state(kbdev);
|
||||
}
|
||||
|
||||
if (!glb_idle_irq_received)
|
||||
break;
|
||||
/* Attempt to serve potential IRQs that might have occurred
|
||||
* whilst handling the previous IRQ. In case we have observed
|
||||
* the GLB IDLE IRQ without all CSGs having been marked as
|
||||
* idle, the GPU would be treated as no longer idle and left
|
||||
* powered on.
|
||||
*/
|
||||
val = kbase_reg_read(kbdev, JOB_CONTROL_REG(JOB_IRQ_STATUS));
|
||||
} while (val);
|
||||
|
||||
if (deferred_handling_glb_idle_irq) {
|
||||
unsigned long flags;
|
||||
|
||||
kbase_csf_scheduler_spin_lock(kbdev, &flags);
|
||||
kbase_csf_scheduler_process_gpu_idle_event(kbdev);
|
||||
kbase_csf_scheduler_spin_unlock(kbdev, flags);
|
||||
}
|
||||
|
||||
wake_up_all(&kbdev->csf.event_wait);
|
||||
|
||||
KBASE_KTRACE_ADD(kbdev, CSF_INTERRUPT_END, NULL, val);
|
||||
}
|
||||
|
||||
|
|
@ -3037,9 +3267,8 @@ int kbase_csf_doorbell_mapping_init(struct kbase_device *kbdev)
|
|||
if (IS_ERR(filp))
|
||||
return PTR_ERR(filp);
|
||||
|
||||
ret = kbase_mem_pool_alloc_pages(
|
||||
&kbdev->mem_pools.small[KBASE_MEM_GROUP_CSF_FW],
|
||||
1, &phys, false);
|
||||
ret = kbase_mem_pool_alloc_pages(&kbdev->mem_pools.small[KBASE_MEM_GROUP_CSF_FW], 1, &phys,
|
||||
false);
|
||||
|
||||
if (ret <= 0) {
|
||||
fput(filp);
|
||||
|
|
@ -3073,9 +3302,8 @@ int kbase_csf_setup_dummy_user_reg_page(struct kbase_device *kbdev)
|
|||
|
||||
kbdev->csf.dummy_user_reg_page = as_tagged(0);
|
||||
|
||||
ret = kbase_mem_pool_alloc_pages(
|
||||
&kbdev->mem_pools.small[KBASE_MEM_GROUP_CSF_FW], 1, &phys,
|
||||
false);
|
||||
ret = kbase_mem_pool_alloc_pages(&kbdev->mem_pools.small[KBASE_MEM_GROUP_CSF_FW], 1, &phys,
|
||||
false);
|
||||
|
||||
if (ret <= 0)
|
||||
return ret;
|
||||
|
|
|
|||
|
|
@ -23,12 +23,135 @@
|
|||
#include <mali_kbase.h>
|
||||
#include <linux/seq_file.h>
|
||||
#include <linux/delay.h>
|
||||
#include <csf/mali_kbase_csf_trace_buffer.h>
|
||||
#include <backend/gpu/mali_kbase_pm_internal.h>
|
||||
|
||||
#if IS_ENABLED(CONFIG_DEBUG_FS)
|
||||
#include "mali_kbase_csf_tl_reader.h"
|
||||
|
||||
/* Wait time to be used cumulatively for all the CSG slots.
|
||||
* Since scheduler lock is held when STATUS_UPDATE request is sent, there won't be
|
||||
* any other Host request pending on the FW side and usually FW would be responsive
|
||||
* to the Doorbell IRQs as it won't do any polling for a long time and also it won't
|
||||
* have to wait for any HW state transition to complete for publishing the status.
|
||||
* So it is reasonable to expect that handling of STATUS_UPDATE request would be
|
||||
* relatively very quick.
|
||||
*/
|
||||
#define STATUS_UPDATE_WAIT_TIMEOUT 500
|
||||
|
||||
/* The bitmask of CSG slots for which the STATUS_UPDATE request completed.
|
||||
* The access to it is serialized with scheduler lock, so at a time it would
|
||||
* get used either for "active_groups" or per context "groups" debugfs file.
|
||||
*/
|
||||
static DECLARE_BITMAP(csg_slots_status_updated, MAX_SUPPORTED_CSGS);
|
||||
|
||||
static
|
||||
bool csg_slot_status_update_finish(struct kbase_device *kbdev, u32 csg_nr)
|
||||
{
|
||||
struct kbase_csf_cmd_stream_group_info const *const ginfo =
|
||||
&kbdev->csf.global_iface.groups[csg_nr];
|
||||
|
||||
return !((kbase_csf_firmware_csg_input_read(ginfo, CSG_REQ) ^
|
||||
kbase_csf_firmware_csg_output(ginfo, CSG_ACK)) &
|
||||
CSG_REQ_STATUS_UPDATE_MASK);
|
||||
}
|
||||
|
||||
static
|
||||
bool csg_slots_status_update_finish(struct kbase_device *kbdev,
|
||||
const unsigned long *slots_mask)
|
||||
{
|
||||
const u32 max_csg_slots = kbdev->csf.global_iface.group_num;
|
||||
bool changed = false;
|
||||
u32 csg_nr;
|
||||
|
||||
lockdep_assert_held(&kbdev->csf.scheduler.lock);
|
||||
|
||||
for_each_set_bit(csg_nr, slots_mask, max_csg_slots) {
|
||||
if (csg_slot_status_update_finish(kbdev, csg_nr)) {
|
||||
set_bit(csg_nr, csg_slots_status_updated);
|
||||
changed = true;
|
||||
}
|
||||
}
|
||||
|
||||
return changed;
|
||||
}
|
||||
|
||||
static void wait_csg_slots_status_update_finish(struct kbase_device *kbdev,
|
||||
unsigned long *slots_mask)
|
||||
{
|
||||
const u32 max_csg_slots = kbdev->csf.global_iface.group_num;
|
||||
long remaining = kbase_csf_timeout_in_jiffies(STATUS_UPDATE_WAIT_TIMEOUT);
|
||||
|
||||
lockdep_assert_held(&kbdev->csf.scheduler.lock);
|
||||
|
||||
bitmap_zero(csg_slots_status_updated, max_csg_slots);
|
||||
|
||||
while (!bitmap_empty(slots_mask, max_csg_slots) && remaining) {
|
||||
remaining = wait_event_timeout(kbdev->csf.event_wait,
|
||||
csg_slots_status_update_finish(kbdev, slots_mask),
|
||||
remaining);
|
||||
if (likely(remaining)) {
|
||||
bitmap_andnot(slots_mask, slots_mask,
|
||||
csg_slots_status_updated, max_csg_slots);
|
||||
} else {
|
||||
dev_warn(kbdev->dev,
|
||||
"STATUS_UPDATE request timed out for slots 0x%lx",
|
||||
slots_mask[0]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void update_active_groups_status(struct kbase_device *kbdev, struct seq_file *file)
|
||||
{
|
||||
u32 max_csg_slots = kbdev->csf.global_iface.group_num;
|
||||
DECLARE_BITMAP(used_csgs, MAX_SUPPORTED_CSGS) = { 0 };
|
||||
u32 csg_nr;
|
||||
unsigned long flags;
|
||||
|
||||
lockdep_assert_held(&kbdev->csf.scheduler.lock);
|
||||
|
||||
/* Global doorbell ring for CSG STATUS_UPDATE request or User doorbell
|
||||
* ring for Extract offset update, shall not be made when MCU has been
|
||||
* put to sleep otherwise it will undesirably make MCU exit the sleep
|
||||
* state. Also it isn't really needed as FW will implicitly update the
|
||||
* status of all on-slot groups when MCU sleep request is sent to it.
|
||||
*/
|
||||
if (kbdev->csf.scheduler.state == SCHED_SLEEPING) {
|
||||
bitmap_copy(csg_slots_status_updated,
|
||||
kbdev->csf.scheduler.csg_inuse_bitmap, max_csg_slots);
|
||||
return;
|
||||
}
|
||||
|
||||
for (csg_nr = 0; csg_nr < max_csg_slots; csg_nr++) {
|
||||
struct kbase_queue_group *const group =
|
||||
kbdev->csf.scheduler.csg_slots[csg_nr].resident_group;
|
||||
if (!group)
|
||||
continue;
|
||||
/* Ring the User doorbell for FW to update the Extract offset */
|
||||
kbase_csf_ring_doorbell(kbdev, group->doorbell_nr);
|
||||
set_bit(csg_nr, used_csgs);
|
||||
}
|
||||
|
||||
/* Return early if there are no on-slot groups */
|
||||
if (bitmap_empty(used_csgs, max_csg_slots))
|
||||
return;
|
||||
|
||||
kbase_csf_scheduler_spin_lock(kbdev, &flags);
|
||||
for_each_set_bit(csg_nr, used_csgs, max_csg_slots) {
|
||||
struct kbase_csf_cmd_stream_group_info const *const ginfo =
|
||||
&kbdev->csf.global_iface.groups[csg_nr];
|
||||
kbase_csf_firmware_csg_input_mask(ginfo, CSG_REQ,
|
||||
~kbase_csf_firmware_csg_output(ginfo, CSG_ACK),
|
||||
CSG_REQ_STATUS_UPDATE_MASK);
|
||||
}
|
||||
|
||||
BUILD_BUG_ON(MAX_SUPPORTED_CSGS > (sizeof(used_csgs[0]) * BITS_PER_BYTE));
|
||||
kbase_csf_ring_csg_slots_doorbell(kbdev, used_csgs[0]);
|
||||
kbase_csf_scheduler_spin_unlock(kbdev, flags);
|
||||
wait_csg_slots_status_update_finish(kbdev, used_csgs);
|
||||
/* Wait for the User doobell ring to take effect */
|
||||
msleep(100);
|
||||
}
|
||||
|
||||
#define MAX_SCHED_STATE_STRING_LEN (16)
|
||||
static const char *scheduler_state_to_string(struct kbase_device *kbdev,
|
||||
enum kbase_csf_scheduler_state sched_state)
|
||||
|
|
@ -77,16 +200,32 @@ static const char *blocked_reason_to_string(u32 reason_id)
|
|||
return cs_blocked_reason[reason_id];
|
||||
}
|
||||
|
||||
static bool sb_source_supported(u32 glb_version)
|
||||
{
|
||||
bool supported = false;
|
||||
|
||||
if (((GLB_VERSION_MAJOR_GET(glb_version) == 3) &&
|
||||
(GLB_VERSION_MINOR_GET(glb_version) >= 5)) ||
|
||||
((GLB_VERSION_MAJOR_GET(glb_version) == 2) &&
|
||||
(GLB_VERSION_MINOR_GET(glb_version) >= 6)) ||
|
||||
((GLB_VERSION_MAJOR_GET(glb_version) == 1) &&
|
||||
(GLB_VERSION_MINOR_GET(glb_version) >= 3)))
|
||||
supported = true;
|
||||
|
||||
return supported;
|
||||
}
|
||||
|
||||
static void kbasep_csf_scheduler_dump_active_queue_cs_status_wait(
|
||||
struct seq_file *file, u32 wait_status, u32 wait_sync_value,
|
||||
u64 wait_sync_live_value, u64 wait_sync_pointer, u32 sb_status,
|
||||
u32 blocked_reason)
|
||||
struct seq_file *file, u32 glb_version, u32 wait_status, u32 wait_sync_value,
|
||||
u64 wait_sync_live_value, u64 wait_sync_pointer, u32 sb_status, u32 blocked_reason)
|
||||
{
|
||||
#define WAITING "Waiting"
|
||||
#define NOT_WAITING "Not waiting"
|
||||
|
||||
seq_printf(file, "SB_MASK: %d\n",
|
||||
CS_STATUS_WAIT_SB_MASK_GET(wait_status));
|
||||
if (sb_source_supported(glb_version))
|
||||
seq_printf(file, "SB_SOURCE: %d\n", CS_STATUS_WAIT_SB_SOURCE_GET(wait_status));
|
||||
seq_printf(file, "PROGRESS_WAIT: %s\n",
|
||||
CS_STATUS_WAIT_PROGRESS_WAIT_GET(wait_status) ?
|
||||
WAITING : NOT_WAITING);
|
||||
|
|
@ -156,10 +295,13 @@ static void kbasep_csf_scheduler_dump_active_queue(struct seq_file *file,
|
|||
struct kbase_vmap_struct *mapping;
|
||||
u64 *evt;
|
||||
u64 wait_sync_live_value;
|
||||
u32 glb_version;
|
||||
|
||||
if (!queue)
|
||||
return;
|
||||
|
||||
glb_version = queue->kctx->kbdev->csf.global_iface.version;
|
||||
|
||||
if (WARN_ON(queue->csi_index == KBASEP_IF_NR_INVALID ||
|
||||
!queue->group))
|
||||
return;
|
||||
|
|
@ -200,9 +342,8 @@ static void kbasep_csf_scheduler_dump_active_queue(struct seq_file *file,
|
|||
}
|
||||
|
||||
kbasep_csf_scheduler_dump_active_queue_cs_status_wait(
|
||||
file, wait_status, wait_sync_value,
|
||||
wait_sync_live_value, wait_sync_pointer,
|
||||
sb_status, blocked_reason);
|
||||
file, glb_version, wait_status, wait_sync_value,
|
||||
wait_sync_live_value, wait_sync_pointer, sb_status, blocked_reason);
|
||||
}
|
||||
} else {
|
||||
struct kbase_device const *const kbdev =
|
||||
|
|
@ -257,9 +398,8 @@ static void kbasep_csf_scheduler_dump_active_queue(struct seq_file *file,
|
|||
}
|
||||
|
||||
kbasep_csf_scheduler_dump_active_queue_cs_status_wait(
|
||||
file, wait_status, wait_sync_value,
|
||||
wait_sync_live_value, wait_sync_pointer, sb_status,
|
||||
blocked_reason);
|
||||
file, glb_version, wait_status, wait_sync_value, wait_sync_live_value,
|
||||
wait_sync_pointer, sb_status, blocked_reason);
|
||||
/* Dealing with cs_trace */
|
||||
if (kbase_csf_scheduler_queue_has_trace(queue))
|
||||
kbasep_csf_scheduler_dump_active_cs_trace(file, stream);
|
||||
|
|
@ -270,54 +410,6 @@ static void kbasep_csf_scheduler_dump_active_queue(struct seq_file *file,
|
|||
seq_puts(file, "\n");
|
||||
}
|
||||
|
||||
static void update_active_group_status(struct seq_file *file,
|
||||
struct kbase_queue_group *const group)
|
||||
{
|
||||
struct kbase_device *const kbdev = group->kctx->kbdev;
|
||||
struct kbase_csf_cmd_stream_group_info const *const ginfo =
|
||||
&kbdev->csf.global_iface.groups[group->csg_nr];
|
||||
long remaining = kbase_csf_timeout_in_jiffies(kbdev->csf.fw_timeout_ms);
|
||||
unsigned long flags;
|
||||
|
||||
/* Global doorbell ring for CSG STATUS_UPDATE request or User doorbell
|
||||
* ring for Extract offset update, shall not be made when MCU has been
|
||||
* put to sleep otherwise it will undesirably make MCU exit the sleep
|
||||
* state. Also it isn't really needed as FW will implicitly update the
|
||||
* status of all on-slot groups when MCU sleep request is sent to it.
|
||||
*/
|
||||
if (kbdev->csf.scheduler.state == SCHED_SLEEPING)
|
||||
return;
|
||||
|
||||
/* Ring the User doobell shared between the queues bound to this
|
||||
* group, to have FW update the CS_EXTRACT for all the queues
|
||||
* bound to the group. Ring early so that FW gets adequate time
|
||||
* for the handling.
|
||||
*/
|
||||
kbase_csf_ring_doorbell(kbdev, group->doorbell_nr);
|
||||
|
||||
kbase_csf_scheduler_spin_lock(kbdev, &flags);
|
||||
kbase_csf_firmware_csg_input_mask(ginfo, CSG_REQ,
|
||||
~kbase_csf_firmware_csg_output(ginfo, CSG_ACK),
|
||||
CSG_REQ_STATUS_UPDATE_MASK);
|
||||
kbase_csf_scheduler_spin_unlock(kbdev, flags);
|
||||
kbase_csf_ring_csg_doorbell(kbdev, group->csg_nr);
|
||||
|
||||
remaining = wait_event_timeout(kbdev->csf.event_wait,
|
||||
!((kbase_csf_firmware_csg_input_read(ginfo, CSG_REQ) ^
|
||||
kbase_csf_firmware_csg_output(ginfo, CSG_ACK)) &
|
||||
CSG_REQ_STATUS_UPDATE_MASK), remaining);
|
||||
|
||||
if (!remaining) {
|
||||
dev_err(kbdev->dev,
|
||||
"Timed out for STATUS_UPDATE on group %d on slot %d",
|
||||
group->handle, group->csg_nr);
|
||||
|
||||
seq_printf(file, "*** Warn: Timed out for STATUS_UPDATE on slot %d\n",
|
||||
group->csg_nr);
|
||||
seq_puts(file, "*** The following group-record is likely stale\n");
|
||||
}
|
||||
}
|
||||
|
||||
static void kbasep_csf_scheduler_dump_active_group(struct seq_file *file,
|
||||
struct kbase_queue_group *const group)
|
||||
{
|
||||
|
|
@ -331,8 +423,6 @@ static void kbasep_csf_scheduler_dump_active_group(struct seq_file *file,
|
|||
u8 slot_priority =
|
||||
kbdev->csf.scheduler.csg_slots[group->csg_nr].priority;
|
||||
|
||||
update_active_group_status(file, group);
|
||||
|
||||
ep_c = kbase_csf_firmware_csg_output(ginfo,
|
||||
CSG_STATUS_EP_CURRENT);
|
||||
ep_r = kbase_csf_firmware_csg_output(ginfo, CSG_STATUS_EP_REQ);
|
||||
|
|
@ -348,6 +438,12 @@ static void kbasep_csf_scheduler_dump_active_group(struct seq_file *file,
|
|||
CSG_STATUS_STATE_IDLE_MASK)
|
||||
idle = 'Y';
|
||||
|
||||
if (!test_bit(group->csg_nr, csg_slots_status_updated)) {
|
||||
seq_printf(file, "*** Warn: Timed out for STATUS_UPDATE on slot %d\n",
|
||||
group->csg_nr);
|
||||
seq_puts(file, "*** The following group-record is likely stale\n");
|
||||
}
|
||||
|
||||
seq_puts(file, "GroupID, CSG NR, CSG Prio, Run State, Priority, C_EP(Alloc/Req), F_EP(Alloc/Req), T_EP(Alloc/Req), Exclusive, Idle\n");
|
||||
seq_printf(file, "%7d, %6d, %8d, %9d, %8d, %11d/%3d, %11d/%3d, %11d/%3d, %9c, %4c\n",
|
||||
group->handle,
|
||||
|
|
@ -363,10 +459,6 @@ static void kbasep_csf_scheduler_dump_active_group(struct seq_file *file,
|
|||
CSG_STATUS_EP_REQ_TILER_EP_GET(ep_r),
|
||||
exclusive,
|
||||
idle);
|
||||
|
||||
/* Wait for the User doobell ring to take effect */
|
||||
if (kbdev->csf.scheduler.state != SCHED_SLEEPING)
|
||||
msleep(100);
|
||||
} else {
|
||||
seq_puts(file, "GroupID, CSG NR, Run State, Priority\n");
|
||||
seq_printf(file, "%7d, %6d, %9d, %8d\n",
|
||||
|
|
@ -416,10 +508,11 @@ static int kbasep_csf_queue_group_debugfs_show(struct seq_file *file,
|
|||
kbase_csf_scheduler_lock(kbdev);
|
||||
if (kbdev->csf.scheduler.state == SCHED_SLEEPING) {
|
||||
/* Wait for the MCU sleep request to complete. Please refer the
|
||||
* update_active_group_status() function for the explanation.
|
||||
* update_active_groups_status() function for the explanation.
|
||||
*/
|
||||
kbase_pm_wait_for_desired_state(kbdev);
|
||||
}
|
||||
update_active_groups_status(kbdev, file);
|
||||
for (gr = 0; gr < MAX_QUEUE_GROUP_NUM; gr++) {
|
||||
struct kbase_queue_group *const group =
|
||||
kctx->csf.queue_groups[gr];
|
||||
|
|
@ -455,10 +548,11 @@ static int kbasep_csf_scheduler_dump_active_groups(struct seq_file *file,
|
|||
kbase_csf_scheduler_lock(kbdev);
|
||||
if (kbdev->csf.scheduler.state == SCHED_SLEEPING) {
|
||||
/* Wait for the MCU sleep request to complete. Please refer the
|
||||
* update_active_group_status() function for the explanation.
|
||||
* update_active_groups_status() function for the explanation.
|
||||
*/
|
||||
kbase_pm_wait_for_desired_state(kbdev);
|
||||
}
|
||||
update_active_groups_status(kbdev, file);
|
||||
for (csg_nr = 0; csg_nr < num_groups; csg_nr++) {
|
||||
struct kbase_queue_group *const group =
|
||||
kbdev->csf.scheduler.csg_slots[csg_nr].resident_group;
|
||||
|
|
@ -664,7 +758,6 @@ void kbase_csf_debugfs_init(struct kbase_device *kbdev)
|
|||
&kbasep_csf_debugfs_scheduler_state_fops);
|
||||
|
||||
kbase_csf_tl_reader_debugfs_init(kbdev);
|
||||
kbase_csf_firmware_trace_buffer_debugfs_init(kbdev);
|
||||
}
|
||||
|
||||
#else
|
||||
|
|
|
|||
|
|
@ -31,6 +31,7 @@
|
|||
|
||||
#include "mali_kbase_csf_firmware.h"
|
||||
#include "mali_kbase_csf_event.h"
|
||||
#include <uapi/gpu/arm/bifrost/csf/mali_kbase_csf_errors_dumpfault.h>
|
||||
|
||||
/* Maximum number of KCPU command queues to be created per GPU address space.
|
||||
*/
|
||||
|
|
@ -355,14 +356,19 @@ struct kbase_csf_notification {
|
|||
* @trace_buffer_size: CS trace buffer size for the queue.
|
||||
* @trace_cfg: CS trace configuration parameters.
|
||||
* @error: GPU command queue fatal information to pass to user space.
|
||||
* @fatal_event_work: Work item to handle the CS fatal event reported for this
|
||||
* queue.
|
||||
* @cs_fatal_info: Records additional information about the CS fatal event.
|
||||
* @cs_fatal: Records information about the CS fatal event.
|
||||
* @cs_error_work: Work item to handle the CS fatal event reported for this
|
||||
* queue or the CS fault event if dump on fault is enabled
|
||||
* and acknowledgment for CS fault event needs to be done
|
||||
* after dumping is complete.
|
||||
* @cs_error_info: Records additional information about the CS fatal event or
|
||||
* about CS fault event if dump on fault is enabled.
|
||||
* @cs_error: Records information about the CS fatal event or
|
||||
* about CS fault event if dump on fault is enabled.
|
||||
* @cs_error_fatal: Flag to track if the CS fault or CS fatal event occurred.
|
||||
* @pending: Indicating whether the queue has new submitted work.
|
||||
* @extract_ofs: The current EXTRACT offset, this is updated during certain
|
||||
* events such as GPU idle IRQ in order to help detect a
|
||||
* queue's true idle status.
|
||||
* @extract_ofs: The current EXTRACT offset, this is only updated when handling
|
||||
* the GLB IDLE IRQ if the idle timeout value is non-0 in order
|
||||
* to help detect a queue's true idle status.
|
||||
* @saved_cmd_ptr: The command pointer value for the GPU queue, saved when the
|
||||
* group to which queue is bound is suspended.
|
||||
* This can be useful in certain cases to know that till which
|
||||
|
|
@ -377,7 +383,11 @@ struct kbase_queue {
|
|||
int doorbell_nr;
|
||||
unsigned long db_file_offset;
|
||||
struct list_head link;
|
||||
#if (KERNEL_VERSION(4, 11, 0) > LINUX_VERSION_CODE)
|
||||
atomic_t refcount;
|
||||
#else
|
||||
refcount_t refcount;
|
||||
#endif
|
||||
struct kbase_queue_group *group;
|
||||
struct kbase_va_region *queue_reg;
|
||||
struct work_struct oom_event_work;
|
||||
|
|
@ -397,14 +407,15 @@ struct kbase_queue {
|
|||
u32 trace_buffer_size;
|
||||
u32 trace_cfg;
|
||||
struct kbase_csf_notification error;
|
||||
struct work_struct fatal_event_work;
|
||||
u64 cs_fatal_info;
|
||||
u32 cs_fatal;
|
||||
struct work_struct cs_error_work;
|
||||
u64 cs_error_info;
|
||||
u32 cs_error;
|
||||
bool cs_error_fatal;
|
||||
atomic_t pending;
|
||||
u64 extract_ofs;
|
||||
#if IS_ENABLED(CONFIG_DEBUG_FS)
|
||||
u64 saved_cmd_ptr;
|
||||
#endif
|
||||
#endif /* CONFIG_DEBUG_FS */
|
||||
};
|
||||
|
||||
/**
|
||||
|
|
@ -498,6 +509,9 @@ struct kbase_protected_suspend_buffer {
|
|||
* to be returned to userspace if such an error has occurred.
|
||||
* @timer_event_work: Work item to handle the progress timeout fatal event
|
||||
* for the group.
|
||||
* @deschedule_deferred_cnt: Counter keeping a track of the number of threads
|
||||
* that tried to deschedule the group and had to defer
|
||||
* the descheduling due to the dump on fault.
|
||||
*/
|
||||
struct kbase_queue_group {
|
||||
struct kbase_context *kctx;
|
||||
|
|
@ -539,6 +553,15 @@ struct kbase_queue_group {
|
|||
|
||||
struct work_struct timer_event_work;
|
||||
|
||||
/**
|
||||
* @dvs_buf: Address and size of scratch memory.
|
||||
*
|
||||
* Used to store intermediate DVS data by the GPU.
|
||||
*/
|
||||
u64 dvs_buf;
|
||||
#if IS_ENABLED(CONFIG_DEBUG_FS)
|
||||
u32 deschedule_deferred_cnt;
|
||||
#endif
|
||||
};
|
||||
|
||||
/**
|
||||
|
|
@ -548,10 +571,10 @@ struct kbase_queue_group {
|
|||
* @lock: Lock preventing concurrent access to @array and the @in_use bitmap.
|
||||
* @array: Array of pointers to kernel CPU command queues.
|
||||
* @in_use: Bitmap which indicates which kernel CPU command queues are in use.
|
||||
* @wq: Dedicated workqueue for processing kernel CPU command queues.
|
||||
* @num_cmds: The number of commands that have been enqueued across
|
||||
* all the KCPU command queues. This could be used as a
|
||||
* timestamp to determine the command's enqueueing time.
|
||||
* @cmd_seq_num: The sequence number assigned to an enqueued command,
|
||||
* in incrementing order (older commands shall have a
|
||||
* smaller number).
|
||||
* @jit_lock: Lock to serialise JIT operations.
|
||||
* @jit_cmds_head: A list of the just-in-time memory commands, both
|
||||
* allocate & free, in submission order, protected
|
||||
* by kbase_csf_kcpu_queue_context.lock.
|
||||
|
|
@ -564,9 +587,9 @@ struct kbase_csf_kcpu_queue_context {
|
|||
struct mutex lock;
|
||||
struct kbase_kcpu_command_queue *array[KBASEP_MAX_KCPU_QUEUES];
|
||||
DECLARE_BITMAP(in_use, KBASEP_MAX_KCPU_QUEUES);
|
||||
struct workqueue_struct *wq;
|
||||
u64 num_cmds;
|
||||
atomic64_t cmd_seq_num;
|
||||
|
||||
struct mutex jit_lock;
|
||||
struct list_head jit_cmds_head;
|
||||
struct list_head jit_blocked_queues;
|
||||
};
|
||||
|
|
@ -636,6 +659,28 @@ struct kbase_csf_tiler_heap_context {
|
|||
u64 nr_of_heaps;
|
||||
};
|
||||
|
||||
/**
|
||||
* struct kbase_csf_ctx_heap_reclaim_info - Object representing the data section of
|
||||
* a kctx for tiler heap reclaim manger
|
||||
* @mgr_link: Link for hooking up to the heap reclaim manger's kctx lists
|
||||
* @nr_freed_pages: Number of freed pages from the the kctx, after its attachment
|
||||
* to the reclaim manager. This is used for tracking reclaim's
|
||||
* free operation progress.
|
||||
* @nr_est_unused_pages: Estimated number of pages that could be freed for the kctx
|
||||
* when all its CSGs are off-slot, on attaching to the reclaim
|
||||
* manager.
|
||||
* @on_slot_grps: Number of on-slot groups from this kctx. In principle, if a
|
||||
* kctx has groups on-slot, the scheduler will detach it from
|
||||
* the tiler heap reclaim manager, i.e. no tiler heap memory
|
||||
* reclaiming operations on the kctx.
|
||||
*/
|
||||
struct kbase_csf_ctx_heap_reclaim_info {
|
||||
struct list_head mgr_link;
|
||||
u32 nr_freed_pages;
|
||||
u32 nr_est_unused_pages;
|
||||
u8 on_slot_grps;
|
||||
};
|
||||
|
||||
/**
|
||||
* struct kbase_csf_scheduler_context - Object representing the scheduler's
|
||||
* context for a GPU address space.
|
||||
|
|
@ -657,6 +702,10 @@ struct kbase_csf_tiler_heap_context {
|
|||
* streams bound to groups of @idle_wait_groups list.
|
||||
* @ngrp_to_schedule: Number of groups added for the context to the
|
||||
* 'groups_to_schedule' list of scheduler instance.
|
||||
* @heap_info: Heap reclaim information data of the kctx. As the
|
||||
* reclaim action needs to be coordinated with the scheduler
|
||||
* operations, any manipulations on the data needs holding
|
||||
* the scheduler's mutex lock.
|
||||
*/
|
||||
struct kbase_csf_scheduler_context {
|
||||
struct list_head runnable_groups[KBASE_QUEUE_GROUP_PRIORITY_COUNT];
|
||||
|
|
@ -666,6 +715,7 @@ struct kbase_csf_scheduler_context {
|
|||
struct workqueue_struct *sync_update_wq;
|
||||
struct work_struct sync_update_work;
|
||||
u32 ngrp_to_schedule;
|
||||
struct kbase_csf_ctx_heap_reclaim_info heap_info;
|
||||
};
|
||||
|
||||
/**
|
||||
|
|
@ -808,6 +858,22 @@ struct kbase_csf_csg_slot {
|
|||
u8 priority;
|
||||
};
|
||||
|
||||
/**
|
||||
* struct kbase_csf_sched_heap_reclaim_mgr - Object for managing tiler heap reclaim
|
||||
* kctx lists inside the CSF device's scheduler.
|
||||
*
|
||||
* @heap_reclaim: Tiler heap reclaim shrinker object.
|
||||
* @ctx_lists: Array of kctx lists, size matching CSG defined priorities. The
|
||||
* lists track the kctxs attached to the reclaim manager.
|
||||
* @unused_pages: Estimated number of unused pages from the @ctxlist array. The
|
||||
* number is indicative for use with reclaim shrinker's count method.
|
||||
*/
|
||||
struct kbase_csf_sched_heap_reclaim_mgr {
|
||||
struct shrinker heap_reclaim;
|
||||
struct list_head ctx_lists[KBASE_QUEUE_GROUP_PRIORITY_COUNT];
|
||||
atomic_t unused_pages;
|
||||
};
|
||||
|
||||
/**
|
||||
* struct kbase_csf_scheduler - Object representing the scheduler used for
|
||||
* CSF for an instance of GPU platform device.
|
||||
|
|
@ -880,6 +946,8 @@ struct kbase_csf_csg_slot {
|
|||
* operation to implement timeslice-based scheduling.
|
||||
* @tock_work: Work item that would perform the schedule on tock
|
||||
* operation to implement the asynchronous scheduling.
|
||||
* @pending_tock_work: Indicates that the tock work item should re-execute
|
||||
* once it's finished instead of going back to sleep.
|
||||
* @ping_work: Work item that would ping the firmware at regular
|
||||
* intervals, only if there is a single active CSG
|
||||
* slot, to check if firmware is alive and would
|
||||
|
|
@ -889,8 +957,6 @@ struct kbase_csf_csg_slot {
|
|||
* @top_grp.
|
||||
* @top_grp: Pointer to queue group inside @groups_to_schedule
|
||||
* list that was assigned the highest slot priority.
|
||||
* @tock_pending_request: A "tock" request is pending: a group that is not
|
||||
* currently on the GPU demands to be scheduled.
|
||||
* @active_protm_grp: Indicates if firmware has been permitted to let GPU
|
||||
* enter protected mode with the given group. On exit
|
||||
* from protected mode the pointer is reset to NULL.
|
||||
|
|
@ -903,6 +969,13 @@ struct kbase_csf_csg_slot {
|
|||
* handler.
|
||||
* @gpu_idle_work: Work item for facilitating the scheduler to bring
|
||||
* the GPU to a low-power mode on becoming idle.
|
||||
* @fast_gpu_idle_handling: Indicates whether to relax many of the checks
|
||||
* normally done in the GPU idle worker. This is
|
||||
* set to true when handling the GLB IDLE IRQ if the
|
||||
* idle hysteresis timeout is 0, since it makes it
|
||||
* possible to receive this IRQ before the extract
|
||||
* offset is published (which would cause more
|
||||
* extensive GPU idle checks to fail).
|
||||
* @gpu_no_longer_idle: Effective only when the GPU idle worker has been
|
||||
* queued for execution, this indicates whether the
|
||||
* GPU has become non-idle since the last time the
|
||||
|
|
@ -934,6 +1007,7 @@ struct kbase_csf_csg_slot {
|
|||
* groups. It is updated on every tick/tock.
|
||||
* @interrupt_lock is used to serialize the access.
|
||||
* @protm_enter_time: GPU protected mode enter time.
|
||||
* @reclaim_mgr: CSGs tiler heap manager object.
|
||||
*/
|
||||
struct kbase_csf_scheduler {
|
||||
struct mutex lock;
|
||||
|
|
@ -960,13 +1034,14 @@ struct kbase_csf_scheduler {
|
|||
struct hrtimer tick_timer;
|
||||
struct work_struct tick_work;
|
||||
struct delayed_work tock_work;
|
||||
atomic_t pending_tock_work;
|
||||
struct delayed_work ping_work;
|
||||
struct kbase_context *top_ctx;
|
||||
struct kbase_queue_group *top_grp;
|
||||
bool tock_pending_request;
|
||||
struct kbase_queue_group *active_protm_grp;
|
||||
struct workqueue_struct *idle_wq;
|
||||
struct work_struct gpu_idle_work;
|
||||
bool fast_gpu_idle_handling;
|
||||
atomic_t gpu_no_longer_idle;
|
||||
atomic_t non_idle_offslot_grps;
|
||||
u32 non_idle_scanout_grps;
|
||||
|
|
@ -975,6 +1050,7 @@ struct kbase_csf_scheduler {
|
|||
bool tick_timer_active;
|
||||
u32 tick_protm_pending_seq;
|
||||
ktime_t protm_enter_time;
|
||||
struct kbase_csf_sched_heap_reclaim_mgr reclaim_mgr;
|
||||
};
|
||||
|
||||
/*
|
||||
|
|
@ -1161,6 +1237,7 @@ struct kbase_ipa_control {
|
|||
* @flags: bitmask of CSF_FIRMWARE_ENTRY_* conveying the interface attributes
|
||||
* @data_start: Offset into firmware image at which the interface data starts
|
||||
* @data_end: Offset into firmware image at which the interface data ends
|
||||
* @virtual_exe_start: Starting GPU execution virtual address of this interface
|
||||
* @kernel_map: A kernel mapping of the memory or NULL if not required to be
|
||||
* mapped in the kernel
|
||||
* @pma: Array of pointers to protected memory allocations.
|
||||
|
|
@ -1177,6 +1254,7 @@ struct kbase_csf_firmware_interface {
|
|||
u32 flags;
|
||||
u32 data_start;
|
||||
u32 data_end;
|
||||
u32 virtual_exe_start;
|
||||
void *kernel_map;
|
||||
struct protected_memory_allocation **pma;
|
||||
};
|
||||
|
|
@ -1208,6 +1286,74 @@ struct kbase_csf_mcu_fw {
|
|||
u8 *data;
|
||||
};
|
||||
|
||||
/*
|
||||
* Firmware log polling period.
|
||||
*/
|
||||
#define KBASE_CSF_FIRMWARE_LOG_POLL_PERIOD_MS 25
|
||||
|
||||
/**
|
||||
* enum kbase_csf_firmware_log_mode - Firmware log operating mode
|
||||
*
|
||||
* @KBASE_CSF_FIRMWARE_LOG_MODE_MANUAL: Manual mode, firmware log can be read
|
||||
* manually by the userspace (and it will also be dumped automatically into
|
||||
* dmesg on GPU reset).
|
||||
*
|
||||
* @KBASE_CSF_FIRMWARE_LOG_MODE_AUTO_PRINT: Automatic printing mode, firmware log
|
||||
* will be periodically emptied into dmesg, manual reading through debugfs is
|
||||
* disabled.
|
||||
*/
|
||||
enum kbase_csf_firmware_log_mode {
|
||||
KBASE_CSF_FIRMWARE_LOG_MODE_MANUAL,
|
||||
KBASE_CSF_FIRMWARE_LOG_MODE_AUTO_PRINT
|
||||
};
|
||||
|
||||
/**
|
||||
* struct kbase_csf_firmware_log - Object containing members for handling firmware log.
|
||||
*
|
||||
* @mode: Firmware log operating mode.
|
||||
* @busy: Indicating whether a firmware log operation is in progress.
|
||||
* @poll_work: Work item that would poll firmware log buffer
|
||||
* at regular intervals to perform any periodic
|
||||
* activities required by current log mode.
|
||||
* @dump_buf: Buffer used for dumping the log.
|
||||
* @func_call_list_va_start: Virtual address of the start of the call list of FW log functions.
|
||||
* @func_call_list_va_end: Virtual address of the end of the call list of FW log functions.
|
||||
*/
|
||||
struct kbase_csf_firmware_log {
|
||||
enum kbase_csf_firmware_log_mode mode;
|
||||
atomic_t busy;
|
||||
struct delayed_work poll_work;
|
||||
u8 *dump_buf;
|
||||
u32 func_call_list_va_start;
|
||||
u32 func_call_list_va_end;
|
||||
};
|
||||
|
||||
#if IS_ENABLED(CONFIG_DEBUG_FS)
|
||||
/**
|
||||
* struct kbase_csf_dump_on_fault - Faulty information to deliver to the daemon
|
||||
*
|
||||
* @error_code: Error code.
|
||||
* @kctx_tgid: tgid value of the Kbase context for which the fault happened.
|
||||
* @kctx_id: id of the Kbase context for which the fault happened.
|
||||
* @enabled: Flag to indicate that 'csf_fault' debugfs has been opened
|
||||
* so dump on fault is enabled.
|
||||
* @fault_wait_wq: Waitqueue on which user space client is blocked till kbase
|
||||
* reports a fault.
|
||||
* @dump_wait_wq: Waitqueue on which kbase threads are blocked till user space client
|
||||
* completes the dump on fault.
|
||||
* @lock: Lock to protect this struct members from concurrent access.
|
||||
*/
|
||||
struct kbase_csf_dump_on_fault {
|
||||
enum dumpfault_error_type error_code;
|
||||
u32 kctx_tgid;
|
||||
u32 kctx_id;
|
||||
atomic_t enabled;
|
||||
wait_queue_head_t fault_wait_wq;
|
||||
wait_queue_head_t dump_wait_wq;
|
||||
spinlock_t lock;
|
||||
};
|
||||
#endif /* CONFIG_DEBUG_FS*/
|
||||
|
||||
/**
|
||||
* struct kbase_csf_device - Object representing CSF for an instance of GPU
|
||||
* platform device.
|
||||
|
|
@ -1251,11 +1397,14 @@ struct kbase_csf_mcu_fw {
|
|||
* in the address space of every process, that created
|
||||
* a Base context, to enable the access to LATEST_FLUSH
|
||||
* register from userspace.
|
||||
* @nr_user_page_mapped: The number of clients using the mapping of USER page.
|
||||
* This is used to maintain backward compatibility.
|
||||
* It's protected by @reg_lock.
|
||||
* @mali_file_inode: Pointer to the inode corresponding to mali device
|
||||
* file. This is needed in order to switch to the
|
||||
* @dummy_user_reg_page on GPU power down.
|
||||
* All instances of the mali device file will point to
|
||||
* the same inode.
|
||||
* the same inode. It's protected by @reg_lock.
|
||||
* @reg_lock: Lock to serialize the MCU firmware related actions
|
||||
* that affect all contexts such as allocation of
|
||||
* regions from shared interface area, assignment of
|
||||
|
|
@ -1320,6 +1469,8 @@ struct kbase_csf_mcu_fw {
|
|||
* @hwcnt: Contain members required for handling the dump of
|
||||
* HW counters.
|
||||
* @fw: Copy of the loaded MCU firmware image.
|
||||
* @fw_log: Contain members required for handling firmware log.
|
||||
* @dof: Structure for dump on fault.
|
||||
*/
|
||||
struct kbase_csf_device {
|
||||
struct kbase_mmu_table mcu_mmu;
|
||||
|
|
@ -1334,6 +1485,7 @@ struct kbase_csf_device {
|
|||
u32 db_file_offsets;
|
||||
struct tagged_addr dummy_db_page;
|
||||
struct tagged_addr dummy_user_reg_page;
|
||||
u32 nr_user_page_mapped;
|
||||
struct inode *mali_file_inode;
|
||||
struct mutex reg_lock;
|
||||
wait_queue_head_t event_wait;
|
||||
|
|
@ -1360,6 +1512,10 @@ struct kbase_csf_device {
|
|||
unsigned int fw_timeout_ms;
|
||||
struct kbase_csf_hwcnt hwcnt;
|
||||
struct kbase_csf_mcu_fw fw;
|
||||
struct kbase_csf_firmware_log fw_log;
|
||||
#if IS_ENABLED(CONFIG_DEBUG_FS)
|
||||
struct kbase_csf_dump_on_fault dof;
|
||||
#endif /* CONFIG_DEBUG_FS */
|
||||
};
|
||||
|
||||
/**
|
||||
|
|
|
|||
|
|
@ -169,7 +169,8 @@ void kbase_csf_event_term(struct kbase_context *kctx)
|
|||
kfree(event_cb);
|
||||
}
|
||||
|
||||
WARN_ON(!list_empty(&kctx->csf.event.error_list));
|
||||
WARN(!list_empty(&kctx->csf.event.error_list),
|
||||
"Error list not empty for ctx %d_%d\n", kctx->tgid, kctx->id);
|
||||
|
||||
spin_unlock_irqrestore(&kctx->csf.event.lock, flags);
|
||||
}
|
||||
|
|
@ -244,6 +245,14 @@ bool kbase_csf_event_error_pending(struct kbase_context *kctx)
|
|||
bool error_pending = false;
|
||||
unsigned long flags;
|
||||
|
||||
/* Withhold the error event if the dump on fault is ongoing.
|
||||
* This would prevent the Userspace from taking error recovery actions
|
||||
* (which can potentially affect the state that is being dumped).
|
||||
* Event handling thread would eventually notice the error event.
|
||||
*/
|
||||
if (unlikely(!kbase_debug_csf_fault_dump_complete(kctx->kbdev)))
|
||||
return false;
|
||||
|
||||
spin_lock_irqsave(&kctx->csf.event.lock, flags);
|
||||
error_pending = !list_empty(&kctx->csf.event.error_list);
|
||||
|
||||
|
|
|
|||
|
|
@ -21,6 +21,7 @@
|
|||
|
||||
#include "mali_kbase.h"
|
||||
#include "mali_kbase_csf_firmware_cfg.h"
|
||||
#include "mali_kbase_csf_firmware_log.h"
|
||||
#include "mali_kbase_csf_trace_buffer.h"
|
||||
#include "mali_kbase_csf_timeout.h"
|
||||
#include "mali_kbase_mem.h"
|
||||
|
|
@ -77,9 +78,11 @@ MODULE_PARM_DESC(fw_debug,
|
|||
"Enables effective use of a debugger for debugging firmware code.");
|
||||
#endif
|
||||
|
||||
#define FIRMWARE_HEADER_MAGIC (0xC3F13A6Eul)
|
||||
#define FIRMWARE_HEADER_VERSION (0ul)
|
||||
#define FIRMWARE_HEADER_LENGTH (0x14ul)
|
||||
|
||||
#define FIRMWARE_HEADER_MAGIC (0xC3F13A6Eul)
|
||||
#define FIRMWARE_HEADER_VERSION_MAJOR (0ul)
|
||||
#define FIRMWARE_HEADER_VERSION_MINOR (2ul)
|
||||
#define FIRMWARE_HEADER_LENGTH (0x14ul)
|
||||
|
||||
#define CSF_FIRMWARE_ENTRY_SUPPORTED_FLAGS \
|
||||
(CSF_FIRMWARE_ENTRY_READ | \
|
||||
|
|
@ -92,10 +95,10 @@ MODULE_PARM_DESC(fw_debug,
|
|||
|
||||
#define CSF_FIRMWARE_ENTRY_TYPE_INTERFACE (0)
|
||||
#define CSF_FIRMWARE_ENTRY_TYPE_CONFIGURATION (1)
|
||||
#define CSF_FIRMWARE_ENTRY_TYPE_FUTF_TEST (2)
|
||||
#define CSF_FIRMWARE_ENTRY_TYPE_TRACE_BUFFER (3)
|
||||
#define CSF_FIRMWARE_ENTRY_TYPE_TIMELINE_METADATA (4)
|
||||
#define CSF_FIRMWARE_ENTRY_TYPE_BUILD_INFO_METADATA (6)
|
||||
#define CSF_FIRMWARE_ENTRY_TYPE_FUNC_CALL_LIST (7)
|
||||
|
||||
#define CSF_FIRMWARE_CACHE_MODE_NONE (0ul << 3)
|
||||
#define CSF_FIRMWARE_CACHE_MODE_CACHED (1ul << 3)
|
||||
|
|
@ -431,8 +434,8 @@ static void load_fw_image_section(struct kbase_device *kbdev, const u8 *data,
|
|||
memset(p + copy_len, 0, zi_len);
|
||||
}
|
||||
|
||||
kbase_sync_single_for_device(kbdev, kbase_dma_addr(page),
|
||||
PAGE_SIZE, DMA_TO_DEVICE);
|
||||
kbase_sync_single_for_device(kbdev, kbase_dma_addr_from_tagged(phys[page_num]),
|
||||
PAGE_SIZE, DMA_TO_DEVICE);
|
||||
kunmap_atomic(p);
|
||||
}
|
||||
}
|
||||
|
|
@ -525,6 +528,58 @@ static inline bool entry_find_large_page_to_reuse(
|
|||
*pma = NULL;
|
||||
|
||||
|
||||
/* If the section starts at 2MB aligned boundary,
|
||||
* then use 2MB page(s) for it.
|
||||
*/
|
||||
if (!(virtual_start & (SZ_2M - 1))) {
|
||||
*num_pages_aligned =
|
||||
round_up(*num_pages_aligned, NUM_4K_PAGES_IN_2MB_PAGE);
|
||||
*is_small_page = false;
|
||||
goto out;
|
||||
}
|
||||
|
||||
/* If the section doesn't lie within the same 2MB aligned boundary,
|
||||
* then use 4KB pages as it would be complicated to use a 2MB page
|
||||
* for such section.
|
||||
*/
|
||||
if ((virtual_start & ~(SZ_2M - 1)) != (virtual_end & ~(SZ_2M - 1)))
|
||||
goto out;
|
||||
|
||||
/* Find the nearest 2MB aligned section which comes before the current
|
||||
* section.
|
||||
*/
|
||||
list_for_each_entry(interface, &kbdev->csf.firmware_interfaces, node) {
|
||||
const u32 virtual_diff = virtual_start - interface->virtual;
|
||||
|
||||
if (interface->virtual > virtual_end)
|
||||
continue;
|
||||
|
||||
if (interface->virtual & (SZ_2M - 1))
|
||||
continue;
|
||||
|
||||
if (virtual_diff < virtual_diff_min) {
|
||||
target_interface = interface;
|
||||
virtual_diff_min = virtual_diff;
|
||||
}
|
||||
}
|
||||
|
||||
if (target_interface) {
|
||||
const u32 page_index = virtual_diff_min >> PAGE_SHIFT;
|
||||
|
||||
if (page_index >= target_interface->num_pages_aligned)
|
||||
goto out;
|
||||
|
||||
if (target_interface->phys)
|
||||
*phys = &target_interface->phys[page_index];
|
||||
|
||||
if (target_interface->pma)
|
||||
*pma = &target_interface->pma[page_index / NUM_4K_PAGES_IN_2MB_PAGE];
|
||||
|
||||
*is_small_page = false;
|
||||
reuse_large_page = true;
|
||||
}
|
||||
|
||||
out:
|
||||
return reuse_large_page;
|
||||
}
|
||||
|
||||
|
|
@ -555,6 +610,8 @@ static int parse_memory_setup_entry(struct kbase_device *kbdev,
|
|||
u32 num_pages;
|
||||
u32 num_pages_aligned;
|
||||
char *name;
|
||||
void *name_entry;
|
||||
unsigned int name_len;
|
||||
struct tagged_addr *phys = NULL;
|
||||
struct kbase_csf_firmware_interface *interface = NULL;
|
||||
bool allocated_pages = false, protected_mode = false;
|
||||
|
|
@ -625,8 +682,8 @@ static int parse_memory_setup_entry(struct kbase_device *kbdev,
|
|||
} else {
|
||||
if (!reuse_pages) {
|
||||
ret = kbase_mem_pool_alloc_pages(
|
||||
kbase_mem_pool_group_select(
|
||||
kbdev, KBASE_MEM_GROUP_CSF_FW, is_small_page),
|
||||
kbase_mem_pool_group_select(kbdev, KBASE_MEM_GROUP_CSF_FW,
|
||||
is_small_page),
|
||||
num_pages_aligned, phys, false);
|
||||
}
|
||||
}
|
||||
|
|
@ -643,21 +700,24 @@ static int parse_memory_setup_entry(struct kbase_device *kbdev,
|
|||
data_start, data_end);
|
||||
|
||||
/* Allocate enough memory for the struct kbase_csf_firmware_interface and
|
||||
* the name of the interface. An extra byte is allocated to place a
|
||||
* NUL-terminator in. This should already be included according to the
|
||||
* specification but here we add it anyway to be robust against a
|
||||
* corrupt firmware image.
|
||||
* the name of the interface.
|
||||
*/
|
||||
interface = kmalloc(sizeof(*interface) +
|
||||
size - INTERFACE_ENTRY_NAME_OFFSET + 1, GFP_KERNEL);
|
||||
name_entry = (void *)entry + INTERFACE_ENTRY_NAME_OFFSET;
|
||||
name_len = strnlen(name_entry, size - INTERFACE_ENTRY_NAME_OFFSET);
|
||||
if (size < (INTERFACE_ENTRY_NAME_OFFSET + name_len + 1 + sizeof(u32))) {
|
||||
dev_err(kbdev->dev, "Memory setup entry too short to contain virtual_exe_start");
|
||||
ret = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
|
||||
interface = kmalloc(sizeof(*interface) + name_len + 1, GFP_KERNEL);
|
||||
if (!interface) {
|
||||
ret = -ENOMEM;
|
||||
goto out;
|
||||
}
|
||||
name = (void *)(interface + 1);
|
||||
memcpy(name, entry + (INTERFACE_ENTRY_NAME_OFFSET / sizeof(*entry)),
|
||||
size - INTERFACE_ENTRY_NAME_OFFSET);
|
||||
name[size - INTERFACE_ENTRY_NAME_OFFSET] = 0;
|
||||
memcpy(name, name_entry, name_len);
|
||||
name[name_len] = 0;
|
||||
|
||||
interface->name = name;
|
||||
interface->phys = phys;
|
||||
|
|
@ -672,6 +732,11 @@ static int parse_memory_setup_entry(struct kbase_device *kbdev,
|
|||
interface->data_end = data_end;
|
||||
interface->pma = pma;
|
||||
|
||||
/* Discover the virtual execution address field after the end of the name
|
||||
* field taking into account the NULL-termination character.
|
||||
*/
|
||||
interface->virtual_exe_start = *((u32 *)(name_entry + name_len + 1));
|
||||
|
||||
mem_flags = convert_mem_flags(kbdev, flags, &cache_mode);
|
||||
|
||||
if (flags & CSF_FIRMWARE_ENTRY_SHARED) {
|
||||
|
|
@ -956,6 +1021,15 @@ static int load_firmware_entry(struct kbase_device *kbdev, const struct kbase_cs
|
|||
return -EINVAL;
|
||||
}
|
||||
return parse_build_info_metadata_entry(kbdev, fw, entry, size);
|
||||
case CSF_FIRMWARE_ENTRY_TYPE_FUNC_CALL_LIST:
|
||||
/* Function call list section */
|
||||
if (size < 2 * sizeof(*entry)) {
|
||||
dev_err(kbdev->dev, "Function call list entry too short (size=%u)\n",
|
||||
size);
|
||||
return -EINVAL;
|
||||
}
|
||||
kbase_csf_firmware_log_parse_logging_call_list_entry(kbdev, entry);
|
||||
break;
|
||||
}
|
||||
|
||||
if (!optional) {
|
||||
|
|
@ -1179,40 +1253,80 @@ static int parse_capabilities(struct kbase_device *kbdev)
|
|||
return 0;
|
||||
}
|
||||
|
||||
static inline void access_firmware_memory_common(struct kbase_device *kbdev,
|
||||
struct kbase_csf_firmware_interface *interface, u32 offset_bytes,
|
||||
u32 *value, const bool read)
|
||||
{
|
||||
u32 page_num = offset_bytes >> PAGE_SHIFT;
|
||||
u32 offset_in_page = offset_bytes & ~PAGE_MASK;
|
||||
struct page *target_page = as_page(interface->phys[page_num]);
|
||||
uintptr_t cpu_addr = (uintptr_t)kmap_atomic(target_page);
|
||||
u32 *addr = (u32 *)(cpu_addr + offset_in_page);
|
||||
|
||||
if (read) {
|
||||
kbase_sync_single_for_device(kbdev,
|
||||
kbase_dma_addr_from_tagged(interface->phys[page_num]) + offset_in_page,
|
||||
sizeof(u32), DMA_BIDIRECTIONAL);
|
||||
*value = *addr;
|
||||
} else {
|
||||
*addr = *value;
|
||||
kbase_sync_single_for_device(kbdev,
|
||||
kbase_dma_addr_from_tagged(interface->phys[page_num]) + offset_in_page,
|
||||
sizeof(u32), DMA_BIDIRECTIONAL);
|
||||
}
|
||||
|
||||
kunmap_atomic((u32 *)cpu_addr);
|
||||
}
|
||||
|
||||
static inline void access_firmware_memory(struct kbase_device *kbdev,
|
||||
u32 gpu_addr, u32 *value, const bool read)
|
||||
{
|
||||
struct kbase_csf_firmware_interface *interface;
|
||||
struct kbase_csf_firmware_interface *interface, *access_interface = NULL;
|
||||
u32 offset_bytes = 0;
|
||||
|
||||
list_for_each_entry(interface, &kbdev->csf.firmware_interfaces, node) {
|
||||
if ((gpu_addr >= interface->virtual) &&
|
||||
(gpu_addr < interface->virtual + (interface->num_pages << PAGE_SHIFT))) {
|
||||
u32 offset_bytes = gpu_addr - interface->virtual;
|
||||
u32 page_num = offset_bytes >> PAGE_SHIFT;
|
||||
u32 offset_in_page = offset_bytes & ~PAGE_MASK;
|
||||
struct page *target_page = as_page(
|
||||
interface->phys[page_num]);
|
||||
u32 *cpu_addr = kmap_atomic(target_page);
|
||||
|
||||
if (read) {
|
||||
kbase_sync_single_for_device(kbdev,
|
||||
kbase_dma_addr(target_page) + offset_in_page,
|
||||
sizeof(u32), DMA_BIDIRECTIONAL);
|
||||
|
||||
*value = cpu_addr[offset_in_page >> 2];
|
||||
} else {
|
||||
cpu_addr[offset_in_page >> 2] = *value;
|
||||
|
||||
kbase_sync_single_for_device(kbdev,
|
||||
kbase_dma_addr(target_page) + offset_in_page,
|
||||
sizeof(u32), DMA_BIDIRECTIONAL);
|
||||
}
|
||||
|
||||
kunmap_atomic(cpu_addr);
|
||||
return;
|
||||
offset_bytes = gpu_addr - interface->virtual;
|
||||
access_interface = interface;
|
||||
break;
|
||||
}
|
||||
}
|
||||
dev_warn(kbdev->dev, "Invalid GPU VA %x passed\n", gpu_addr);
|
||||
|
||||
if (access_interface)
|
||||
access_firmware_memory_common(kbdev, access_interface, offset_bytes, value, read);
|
||||
else
|
||||
dev_warn(kbdev->dev, "Invalid GPU VA %x passed", gpu_addr);
|
||||
}
|
||||
|
||||
static inline void access_firmware_memory_exe(struct kbase_device *kbdev,
|
||||
u32 gpu_addr, u32 *value, const bool read)
|
||||
{
|
||||
struct kbase_csf_firmware_interface *interface, *access_interface = NULL;
|
||||
u32 offset_bytes = 0;
|
||||
|
||||
list_for_each_entry(interface, &kbdev->csf.firmware_interfaces, node) {
|
||||
if ((gpu_addr >= interface->virtual_exe_start) &&
|
||||
(gpu_addr < interface->virtual_exe_start +
|
||||
(interface->num_pages << PAGE_SHIFT))) {
|
||||
offset_bytes = gpu_addr - interface->virtual_exe_start;
|
||||
access_interface = interface;
|
||||
|
||||
/* If there's an overlap in execution address range between a moved and a
|
||||
* non-moved areas, always prefer the moved one. The idea is that FW may
|
||||
* move sections around during init time, but after the layout is settled,
|
||||
* any moved sections are going to override non-moved areas at the same
|
||||
* location.
|
||||
*/
|
||||
if (interface->virtual_exe_start != interface->virtual)
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (access_interface)
|
||||
access_firmware_memory_common(kbdev, access_interface, offset_bytes, value, read);
|
||||
else
|
||||
dev_warn(kbdev->dev, "Invalid GPU VA %x passed", gpu_addr);
|
||||
}
|
||||
|
||||
void kbase_csf_read_firmware_memory(struct kbase_device *kbdev,
|
||||
|
|
@ -1227,6 +1341,18 @@ void kbase_csf_update_firmware_memory(struct kbase_device *kbdev,
|
|||
access_firmware_memory(kbdev, gpu_addr, &value, false);
|
||||
}
|
||||
|
||||
void kbase_csf_read_firmware_memory_exe(struct kbase_device *kbdev,
|
||||
u32 gpu_addr, u32 *value)
|
||||
{
|
||||
access_firmware_memory_exe(kbdev, gpu_addr, value, true);
|
||||
}
|
||||
|
||||
void kbase_csf_update_firmware_memory_exe(struct kbase_device *kbdev,
|
||||
u32 gpu_addr, u32 value)
|
||||
{
|
||||
access_firmware_memory_exe(kbdev, gpu_addr, &value, false);
|
||||
}
|
||||
|
||||
void kbase_csf_firmware_cs_input(
|
||||
const struct kbase_csf_cmd_stream_info *const info, const u32 offset,
|
||||
const u32 value)
|
||||
|
|
@ -1462,11 +1588,10 @@ static bool global_request_complete(struct kbase_device *const kbdev,
|
|||
return complete;
|
||||
}
|
||||
|
||||
static int wait_for_global_request(struct kbase_device *const kbdev,
|
||||
u32 const req_mask)
|
||||
static int wait_for_global_request_with_timeout(struct kbase_device *const kbdev,
|
||||
u32 const req_mask, unsigned int timeout_ms)
|
||||
{
|
||||
const long wait_timeout =
|
||||
kbase_csf_timeout_in_jiffies(kbdev->csf.fw_timeout_ms);
|
||||
const long wait_timeout = kbase_csf_timeout_in_jiffies(timeout_ms);
|
||||
long remaining;
|
||||
int err = 0;
|
||||
|
||||
|
|
@ -1475,10 +1600,9 @@ static int wait_for_global_request(struct kbase_device *const kbdev,
|
|||
wait_timeout);
|
||||
|
||||
if (!remaining) {
|
||||
dev_warn(kbdev->dev, "[%llu] Timeout (%d ms) waiting for global request %x to complete",
|
||||
kbase_backend_get_cycle_cnt(kbdev),
|
||||
kbdev->csf.fw_timeout_ms,
|
||||
req_mask);
|
||||
dev_warn(kbdev->dev,
|
||||
"[%llu] Timeout (%d ms) waiting for global request %x to complete",
|
||||
kbase_backend_get_cycle_cnt(kbdev), timeout_ms, req_mask);
|
||||
err = -ETIMEDOUT;
|
||||
|
||||
}
|
||||
|
|
@ -1486,6 +1610,11 @@ static int wait_for_global_request(struct kbase_device *const kbdev,
|
|||
return err;
|
||||
}
|
||||
|
||||
static int wait_for_global_request(struct kbase_device *const kbdev, u32 const req_mask)
|
||||
{
|
||||
return wait_for_global_request_with_timeout(kbdev, req_mask, kbdev->csf.fw_timeout_ms);
|
||||
}
|
||||
|
||||
static void set_global_request(
|
||||
const struct kbase_csf_global_iface *const global_iface,
|
||||
u32 const req_mask)
|
||||
|
|
@ -1559,6 +1688,25 @@ static void enable_gpu_idle_timer(struct kbase_device *const kbdev)
|
|||
}
|
||||
|
||||
|
||||
/**
|
||||
* kbasep_enable_rtu - Enable Ray Tracing Unit on powering up shader core
|
||||
*
|
||||
* @kbdev: The kbase device structure of the device
|
||||
*
|
||||
* This function needs to be called to enable the Ray Tracing Unit
|
||||
* by writing SHADER_PWRFEATURES only when host controls shader cores power.
|
||||
*/
|
||||
static void kbasep_enable_rtu(struct kbase_device *kbdev)
|
||||
{
|
||||
const u32 gpu_id = kbdev->gpu_props.props.raw_props.gpu_id;
|
||||
|
||||
if (gpu_id < GPU_ID2_PRODUCT_MAKE(12, 8, 3, 0))
|
||||
return;
|
||||
|
||||
if (kbdev->csf.firmware_hctl_core_pwr)
|
||||
kbase_reg_write(kbdev, GPU_CONTROL_REG(SHADER_PWRFEATURES), 1);
|
||||
}
|
||||
|
||||
static void global_init(struct kbase_device *const kbdev, u64 core_mask)
|
||||
{
|
||||
u32 const ack_irq_mask =
|
||||
|
|
@ -1574,6 +1722,8 @@ static void global_init(struct kbase_device *const kbdev, u64 core_mask)
|
|||
|
||||
kbase_csf_scheduler_spin_lock(kbdev, &flags);
|
||||
|
||||
kbasep_enable_rtu(kbdev);
|
||||
|
||||
/* Update shader core allocation enable mask */
|
||||
enable_endpoints_global(global_iface, core_mask);
|
||||
enable_shader_poweroff_timer(kbdev, global_iface);
|
||||
|
|
@ -1854,7 +2004,6 @@ u32 kbase_csf_firmware_set_gpu_idle_hysteresis_time(struct kbase_device *kbdev,
|
|||
|
||||
static u32 convert_dur_to_core_pwroff_count(struct kbase_device *kbdev, const u32 dur_us)
|
||||
{
|
||||
#define PWROFF_VAL_UNIT_SHIFT (10)
|
||||
/* Get the cntfreq_el0 value, which drives the SYSTEM_TIMESTAMP */
|
||||
u64 freq = arch_timer_get_cntfrq();
|
||||
u64 dur_val = dur_us;
|
||||
|
|
@ -1991,16 +2140,6 @@ int kbase_csf_firmware_early_init(struct kbase_device *kbdev)
|
|||
kbdev->csf.fw_timeout_ms =
|
||||
kbase_get_timeout_ms(kbdev, CSF_FIRMWARE_TIMEOUT);
|
||||
|
||||
kbdev->csf.gpu_idle_hysteresis_ms = FIRMWARE_IDLE_HYSTERESIS_TIME_MS;
|
||||
#ifdef KBASE_PM_RUNTIME
|
||||
if (kbase_pm_gpu_sleep_allowed(kbdev))
|
||||
kbdev->csf.gpu_idle_hysteresis_ms /=
|
||||
FIRMWARE_IDLE_HYSTERESIS_GPU_SLEEP_SCALER;
|
||||
#endif
|
||||
WARN_ON(!kbdev->csf.gpu_idle_hysteresis_ms);
|
||||
kbdev->csf.gpu_idle_dur_count = convert_dur_to_idle_count(
|
||||
kbdev, kbdev->csf.gpu_idle_hysteresis_ms);
|
||||
|
||||
kbdev->csf.mcu_core_pwroff_dur_us = DEFAULT_GLB_PWROFF_TIMEOUT_US;
|
||||
kbdev->csf.mcu_core_pwroff_dur_count = convert_dur_to_core_pwroff_count(
|
||||
kbdev, DEFAULT_GLB_PWROFF_TIMEOUT_US);
|
||||
|
|
@ -2020,7 +2159,26 @@ int kbase_csf_firmware_early_init(struct kbase_device *kbdev)
|
|||
return 0;
|
||||
}
|
||||
|
||||
int kbase_csf_firmware_init(struct kbase_device *kbdev)
|
||||
void kbase_csf_firmware_early_term(struct kbase_device *kbdev)
|
||||
{
|
||||
mutex_destroy(&kbdev->csf.reg_lock);
|
||||
}
|
||||
|
||||
int kbase_csf_firmware_late_init(struct kbase_device *kbdev)
|
||||
{
|
||||
kbdev->csf.gpu_idle_hysteresis_ms = FIRMWARE_IDLE_HYSTERESIS_TIME_MS;
|
||||
#ifdef KBASE_PM_RUNTIME
|
||||
if (kbase_pm_gpu_sleep_allowed(kbdev))
|
||||
kbdev->csf.gpu_idle_hysteresis_ms /= FIRMWARE_IDLE_HYSTERESIS_GPU_SLEEP_SCALER;
|
||||
#endif
|
||||
WARN_ON(!kbdev->csf.gpu_idle_hysteresis_ms);
|
||||
kbdev->csf.gpu_idle_dur_count =
|
||||
convert_dur_to_idle_count(kbdev, kbdev->csf.gpu_idle_hysteresis_ms);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int kbase_csf_firmware_load_init(struct kbase_device *kbdev)
|
||||
{
|
||||
const struct firmware *firmware = NULL;
|
||||
struct kbase_csf_mcu_fw *const mcu_fw = &kbdev->csf.fw;
|
||||
|
|
@ -2093,7 +2251,8 @@ int kbase_csf_firmware_init(struct kbase_device *kbdev)
|
|||
version_minor = mcu_fw->data[4];
|
||||
version_major = mcu_fw->data[5];
|
||||
|
||||
if (version_major != FIRMWARE_HEADER_VERSION) {
|
||||
if (version_major != FIRMWARE_HEADER_VERSION_MAJOR ||
|
||||
version_minor != FIRMWARE_HEADER_VERSION_MINOR) {
|
||||
dev_err(kbdev->dev,
|
||||
"Firmware header version %d.%d not understood\n",
|
||||
version_major, version_minor);
|
||||
|
|
@ -2188,6 +2347,12 @@ int kbase_csf_firmware_init(struct kbase_device *kbdev)
|
|||
if (ret != 0)
|
||||
goto err_out;
|
||||
|
||||
ret = kbase_csf_firmware_log_init(kbdev);
|
||||
if (ret != 0) {
|
||||
dev_err(kbdev->dev, "Failed to initialize FW trace (err %d)", ret);
|
||||
goto err_out;
|
||||
}
|
||||
|
||||
/* Firmware loaded successfully, ret = 0 */
|
||||
KBASE_KTRACE_ADD(kbdev, CSF_FIRMWARE_BOOT, NULL,
|
||||
(((u64)version_hash) << 32) |
|
||||
|
|
@ -2195,11 +2360,11 @@ int kbase_csf_firmware_init(struct kbase_device *kbdev)
|
|||
return 0;
|
||||
|
||||
err_out:
|
||||
kbase_csf_firmware_term(kbdev);
|
||||
kbase_csf_firmware_unload_term(kbdev);
|
||||
return ret;
|
||||
}
|
||||
|
||||
void kbase_csf_firmware_term(struct kbase_device *kbdev)
|
||||
void kbase_csf_firmware_unload_term(struct kbase_device *kbdev)
|
||||
{
|
||||
unsigned long flags;
|
||||
int ret = 0;
|
||||
|
|
@ -2210,6 +2375,8 @@ void kbase_csf_firmware_term(struct kbase_device *kbdev)
|
|||
|
||||
WARN(ret, "failed to wait for GPU reset");
|
||||
|
||||
kbase_csf_firmware_log_term(kbdev);
|
||||
|
||||
kbase_csf_firmware_cfg_term(kbdev);
|
||||
|
||||
kbase_csf_timeout_term(kbdev);
|
||||
|
|
@ -2297,8 +2464,6 @@ void kbase_csf_firmware_term(struct kbase_device *kbdev)
|
|||
*/
|
||||
kbase_mcu_shared_interface_region_tracker_term(kbdev);
|
||||
|
||||
mutex_destroy(&kbdev->csf.reg_lock);
|
||||
|
||||
kbase_mmu_term(kbdev, &kbdev->csf.mcu_mmu);
|
||||
|
||||
/* Release the address space */
|
||||
|
|
@ -2350,10 +2515,11 @@ void kbase_csf_firmware_ping(struct kbase_device *const kbdev)
|
|||
kbase_csf_scheduler_spin_unlock(kbdev, flags);
|
||||
}
|
||||
|
||||
int kbase_csf_firmware_ping_wait(struct kbase_device *const kbdev)
|
||||
int kbase_csf_firmware_ping_wait(struct kbase_device *const kbdev, unsigned int wait_timeout_ms)
|
||||
{
|
||||
kbase_csf_firmware_ping(kbdev);
|
||||
return wait_for_global_request(kbdev, GLB_REQ_PING_MASK);
|
||||
|
||||
return wait_for_global_request_with_timeout(kbdev, GLB_REQ_PING_MASK, wait_timeout_ms);
|
||||
}
|
||||
|
||||
int kbase_csf_firmware_set_timeout(struct kbase_device *const kbdev,
|
||||
|
|
@ -2392,7 +2558,7 @@ void kbase_csf_enter_protected_mode(struct kbase_device *kbdev)
|
|||
kbase_csf_ring_doorbell(kbdev, CSF_KERNEL_DOORBELL_NR);
|
||||
}
|
||||
|
||||
void kbase_csf_wait_protected_mode_enter(struct kbase_device *kbdev)
|
||||
int kbase_csf_wait_protected_mode_enter(struct kbase_device *kbdev)
|
||||
{
|
||||
int err;
|
||||
|
||||
|
|
@ -2432,12 +2598,14 @@ void kbase_csf_wait_protected_mode_enter(struct kbase_device *kbdev)
|
|||
}
|
||||
}
|
||||
|
||||
if (err) {
|
||||
if (unlikely(err)) {
|
||||
if (kbase_prepare_to_reset_gpu(kbdev, RESET_FLAGS_HWC_UNRECOVERABLE_ERROR))
|
||||
kbase_reset_gpu(kbdev);
|
||||
}
|
||||
|
||||
KBASE_TLSTREAM_AUX_PROTECTED_ENTER_END(kbdev, kbdev);
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
void kbase_csf_firmware_trigger_mcu_halt(struct kbase_device *kbdev)
|
||||
|
|
@ -2651,9 +2819,8 @@ int kbase_csf_firmware_mcu_shared_mapping_init(
|
|||
if (!page_list)
|
||||
goto page_list_alloc_error;
|
||||
|
||||
ret = kbase_mem_pool_alloc_pages(
|
||||
&kbdev->mem_pools.small[KBASE_MEM_GROUP_CSF_FW],
|
||||
num_pages, phys, false);
|
||||
ret = kbase_mem_pool_alloc_pages(&kbdev->mem_pools.small[KBASE_MEM_GROUP_CSF_FW], num_pages,
|
||||
phys, false);
|
||||
if (ret <= 0)
|
||||
goto phys_mem_pool_alloc_error;
|
||||
|
||||
|
|
|
|||
|
|
@ -364,7 +364,45 @@ void kbase_csf_update_firmware_memory(struct kbase_device *kbdev,
|
|||
u32 gpu_addr, u32 value);
|
||||
|
||||
/**
|
||||
* kbase_csf_firmware_early_init() - Early initializatin for the firmware.
|
||||
* kbase_csf_read_firmware_memory_exe - Read a value in a GPU address in the
|
||||
* region of its final execution location.
|
||||
*
|
||||
* @kbdev: Device pointer
|
||||
* @gpu_addr: GPU address to read
|
||||
* @value: Output pointer to which the read value will be written
|
||||
*
|
||||
* This function read a value in a GPU address that belongs to a private loaded
|
||||
* firmware memory region based on its final execution location. The function
|
||||
* assumes that the location is not permanently mapped on the CPU address space,
|
||||
* therefore it maps it and then unmaps it to access it independently. This function
|
||||
* needs to be used when accessing firmware memory regions which will be moved to
|
||||
* their final execution location during firmware boot using an address based on the
|
||||
* final execution location.
|
||||
*/
|
||||
void kbase_csf_read_firmware_memory_exe(struct kbase_device *kbdev,
|
||||
u32 gpu_addr, u32 *value);
|
||||
|
||||
/**
|
||||
* kbase_csf_update_firmware_memory_exe - Write a value in a GPU address in the
|
||||
* region of its final execution location.
|
||||
*
|
||||
* @kbdev: Device pointer
|
||||
* @gpu_addr: GPU address to write
|
||||
* @value: Value to write
|
||||
*
|
||||
* This function writes a value in a GPU address that belongs to a private loaded
|
||||
* firmware memory region based on its final execution location. The function
|
||||
* assumes that the location is not permanently mapped on the CPU address space,
|
||||
* therefore it maps it and then unmaps it to access it independently. This function
|
||||
* needs to be used when accessing firmware memory regions which will be moved to
|
||||
* their final execution location during firmware boot using an address based on the
|
||||
* final execution location.
|
||||
*/
|
||||
void kbase_csf_update_firmware_memory_exe(struct kbase_device *kbdev,
|
||||
u32 gpu_addr, u32 value);
|
||||
|
||||
/**
|
||||
* kbase_csf_firmware_early_init() - Early initialization for the firmware.
|
||||
* @kbdev: Kbase device
|
||||
*
|
||||
* Initialize resources related to the firmware. Must be called at kbase probe.
|
||||
|
|
@ -374,22 +412,43 @@ void kbase_csf_update_firmware_memory(struct kbase_device *kbdev,
|
|||
int kbase_csf_firmware_early_init(struct kbase_device *kbdev);
|
||||
|
||||
/**
|
||||
* kbase_csf_firmware_init() - Load the firmware for the CSF MCU
|
||||
* kbase_csf_firmware_early_term() - Terminate resources related to the firmware
|
||||
* after the firmware unload has been done.
|
||||
*
|
||||
* @kbdev: Device pointer
|
||||
*
|
||||
* This should be called only when kbase probe fails or gets rmmoded.
|
||||
*/
|
||||
void kbase_csf_firmware_early_term(struct kbase_device *kbdev);
|
||||
|
||||
/**
|
||||
* kbase_csf_firmware_late_init() - Late initialization for the firmware.
|
||||
* @kbdev: Kbase device
|
||||
*
|
||||
* Initialize resources related to the firmware. But must be called after
|
||||
* backend late init is done. Must be used at probe time only.
|
||||
*
|
||||
* Return: 0 if successful, negative error code on failure
|
||||
*/
|
||||
int kbase_csf_firmware_late_init(struct kbase_device *kbdev);
|
||||
|
||||
/**
|
||||
* kbase_csf_firmware_load_init() - Load the firmware for the CSF MCU
|
||||
* @kbdev: Kbase device
|
||||
*
|
||||
* Request the firmware from user space and load it into memory.
|
||||
*
|
||||
* Return: 0 if successful, negative error code on failure
|
||||
*/
|
||||
int kbase_csf_firmware_init(struct kbase_device *kbdev);
|
||||
int kbase_csf_firmware_load_init(struct kbase_device *kbdev);
|
||||
|
||||
/**
|
||||
* kbase_csf_firmware_term() - Unload the firmware
|
||||
* kbase_csf_firmware_unload_term() - Unload the firmware
|
||||
* @kbdev: Kbase device
|
||||
*
|
||||
* Frees the memory allocated by kbase_csf_firmware_init()
|
||||
* Frees the memory allocated by kbase_csf_firmware_load_init()
|
||||
*/
|
||||
void kbase_csf_firmware_term(struct kbase_device *kbdev);
|
||||
void kbase_csf_firmware_unload_term(struct kbase_device *kbdev);
|
||||
|
||||
/**
|
||||
* kbase_csf_firmware_ping - Send the ping request to firmware.
|
||||
|
|
@ -404,13 +463,14 @@ void kbase_csf_firmware_ping(struct kbase_device *kbdev);
|
|||
* kbase_csf_firmware_ping_wait - Send the ping request to firmware and waits.
|
||||
*
|
||||
* @kbdev: Instance of a GPU platform device that implements a CSF interface.
|
||||
* @wait_timeout_ms: Timeout to get the acknowledgment for PING request from FW.
|
||||
*
|
||||
* The function sends the ping request to firmware and waits to confirm it is
|
||||
* alive.
|
||||
*
|
||||
* Return: 0 on success, or negative on failure.
|
||||
*/
|
||||
int kbase_csf_firmware_ping_wait(struct kbase_device *kbdev);
|
||||
int kbase_csf_firmware_ping_wait(struct kbase_device *kbdev, unsigned int wait_timeout_ms);
|
||||
|
||||
/**
|
||||
* kbase_csf_firmware_set_timeout - Set a hardware endpoint progress timeout.
|
||||
|
|
@ -447,8 +507,10 @@ void kbase_csf_enter_protected_mode(struct kbase_device *kbdev);
|
|||
* This function needs to be called after kbase_csf_enter_protected_mode() to
|
||||
* wait for the GPU to actually enter protected mode. GPU reset is triggered if
|
||||
* the wait is unsuccessful.
|
||||
*
|
||||
* Return: 0 on success, or negative on failure.
|
||||
*/
|
||||
void kbase_csf_wait_protected_mode_enter(struct kbase_device *kbdev);
|
||||
int kbase_csf_wait_protected_mode_enter(struct kbase_device *kbdev);
|
||||
|
||||
static inline bool kbase_csf_firmware_mcu_halted(struct kbase_device *kbdev)
|
||||
{
|
||||
|
|
|
|||
|
|
@ -20,13 +20,17 @@
|
|||
*/
|
||||
|
||||
#include <mali_kbase.h>
|
||||
#include "mali_kbase_csf_firmware_cfg.h"
|
||||
#include <mali_kbase_reset_gpu.h>
|
||||
#include <linux/version.h>
|
||||
|
||||
#include "mali_kbase_csf_firmware_cfg.h"
|
||||
#include "mali_kbase_csf_firmware_log.h"
|
||||
|
||||
#if CONFIG_SYSFS
|
||||
#define CSF_FIRMWARE_CFG_SYSFS_DIR_NAME "firmware_config"
|
||||
|
||||
#define CSF_FIRMWARE_CFG_LOG_VERBOSITY_ENTRY_NAME "Log verbosity"
|
||||
|
||||
/**
|
||||
* struct firmware_config - Configuration item within the MCU firmware
|
||||
*
|
||||
|
|
@ -125,7 +129,7 @@ static ssize_t store_fw_cfg(struct kobject *kobj,
|
|||
|
||||
if (attr == &fw_cfg_attr_cur) {
|
||||
unsigned long flags;
|
||||
u32 val;
|
||||
u32 val, cur_val;
|
||||
int ret = kstrtouint(buf, 0, &val);
|
||||
|
||||
if (ret) {
|
||||
|
|
@ -140,7 +144,9 @@ static ssize_t store_fw_cfg(struct kobject *kobj,
|
|||
return -EINVAL;
|
||||
|
||||
spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
|
||||
if (config->cur_val == val) {
|
||||
|
||||
cur_val = config->cur_val;
|
||||
if (cur_val == val) {
|
||||
spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
|
||||
return count;
|
||||
}
|
||||
|
|
@ -177,6 +183,20 @@ static ssize_t store_fw_cfg(struct kobject *kobj,
|
|||
|
||||
spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
|
||||
|
||||
/* Enable FW logging only if Log verbosity is non-zero */
|
||||
if (!strcmp(config->name, CSF_FIRMWARE_CFG_LOG_VERBOSITY_ENTRY_NAME) &&
|
||||
(!cur_val || !val)) {
|
||||
ret = kbase_csf_firmware_log_toggle_logging_calls(kbdev, val);
|
||||
if (ret) {
|
||||
/* Undo FW configuration changes */
|
||||
spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
|
||||
config->cur_val = cur_val;
|
||||
kbase_csf_update_firmware_memory(kbdev, config->address, cur_val);
|
||||
spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
|
||||
/* If we can update the config without firmware reset then
|
||||
* we need to just trigger FIRMWARE_CONFIG_UPDATE.
|
||||
*/
|
||||
|
|
|
|||
451
drivers/gpu/arm/bifrost/csf/mali_kbase_csf_firmware_log.c
Normal file
451
drivers/gpu/arm/bifrost/csf/mali_kbase_csf_firmware_log.c
Normal file
|
|
@ -0,0 +1,451 @@
|
|||
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
|
||||
/*
|
||||
*
|
||||
* (C) COPYRIGHT 2022 ARM Limited. All rights reserved.
|
||||
*
|
||||
* This program is free software and is provided to you under the terms of the
|
||||
* GNU General Public License version 2 as published by the Free Software
|
||||
* Foundation, and any use by you of this program is subject to the terms
|
||||
* of such GNU license.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, you can access it online at
|
||||
* http://www.gnu.org/licenses/gpl-2.0.html.
|
||||
*
|
||||
*/
|
||||
|
||||
#include <mali_kbase.h>
|
||||
#include "backend/gpu/mali_kbase_pm_internal.h"
|
||||
#include <csf/mali_kbase_csf_firmware_log.h>
|
||||
#include <csf/mali_kbase_csf_trace_buffer.h>
|
||||
#include <linux/debugfs.h>
|
||||
#include <linux/string.h>
|
||||
#include <linux/workqueue.h>
|
||||
|
||||
/*
|
||||
* ARMv7 instruction: Branch with Link calls a subroutine at a PC-relative address.
|
||||
*/
|
||||
#define ARMV7_T1_BL_IMM_INSTR 0xd800f000
|
||||
|
||||
/*
|
||||
* ARMv7 instruction: Branch with Link calls a subroutine at a PC-relative address, maximum
|
||||
* negative jump offset.
|
||||
*/
|
||||
#define ARMV7_T1_BL_IMM_RANGE_MIN -16777216
|
||||
|
||||
/*
|
||||
* ARMv7 instruction: Branch with Link calls a subroutine at a PC-relative address, maximum
|
||||
* positive jump offset.
|
||||
*/
|
||||
#define ARMV7_T1_BL_IMM_RANGE_MAX 16777214
|
||||
|
||||
/*
|
||||
* ARMv7 instruction: Double NOP instructions.
|
||||
*/
|
||||
#define ARMV7_DOUBLE_NOP_INSTR 0xbf00bf00
|
||||
|
||||
#if defined(CONFIG_DEBUG_FS)
|
||||
|
||||
static int kbase_csf_firmware_log_enable_mask_read(void *data, u64 *val)
|
||||
{
|
||||
struct kbase_device *kbdev = (struct kbase_device *)data;
|
||||
struct firmware_trace_buffer *tb =
|
||||
kbase_csf_firmware_get_trace_buffer(kbdev, FIRMWARE_LOG_BUF_NAME);
|
||||
|
||||
if (tb == NULL) {
|
||||
dev_err(kbdev->dev, "Couldn't get the firmware trace buffer");
|
||||
return -EIO;
|
||||
}
|
||||
/* The enabled traces limited to u64 here, regarded practical */
|
||||
*val = kbase_csf_firmware_trace_buffer_get_active_mask64(tb);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int kbase_csf_firmware_log_enable_mask_write(void *data, u64 val)
|
||||
{
|
||||
struct kbase_device *kbdev = (struct kbase_device *)data;
|
||||
struct firmware_trace_buffer *tb =
|
||||
kbase_csf_firmware_get_trace_buffer(kbdev, FIRMWARE_LOG_BUF_NAME);
|
||||
u64 new_mask;
|
||||
unsigned int enable_bits_count;
|
||||
|
||||
if (tb == NULL) {
|
||||
dev_err(kbdev->dev, "Couldn't get the firmware trace buffer");
|
||||
return -EIO;
|
||||
}
|
||||
|
||||
/* Ignore unsupported types */
|
||||
enable_bits_count = kbase_csf_firmware_trace_buffer_get_trace_enable_bits_count(tb);
|
||||
if (enable_bits_count > 64) {
|
||||
dev_dbg(kbdev->dev, "Limit enabled bits count from %u to 64", enable_bits_count);
|
||||
enable_bits_count = 64;
|
||||
}
|
||||
new_mask = val & ((1 << enable_bits_count) - 1);
|
||||
|
||||
if (new_mask != kbase_csf_firmware_trace_buffer_get_active_mask64(tb))
|
||||
return kbase_csf_firmware_trace_buffer_set_active_mask64(tb, new_mask);
|
||||
else
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int kbasep_csf_firmware_log_debugfs_open(struct inode *in, struct file *file)
|
||||
{
|
||||
struct kbase_device *kbdev = in->i_private;
|
||||
|
||||
file->private_data = kbdev;
|
||||
dev_dbg(kbdev->dev, "Opened firmware trace buffer dump debugfs file");
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static ssize_t kbasep_csf_firmware_log_debugfs_read(struct file *file, char __user *buf,
|
||||
size_t size, loff_t *ppos)
|
||||
{
|
||||
struct kbase_device *kbdev = file->private_data;
|
||||
struct kbase_csf_firmware_log *fw_log = &kbdev->csf.fw_log;
|
||||
unsigned int n_read;
|
||||
unsigned long not_copied;
|
||||
/* Limit reads to the kernel dump buffer size */
|
||||
size_t mem = MIN(size, FIRMWARE_LOG_DUMP_BUF_SIZE);
|
||||
int ret;
|
||||
|
||||
struct firmware_trace_buffer *tb =
|
||||
kbase_csf_firmware_get_trace_buffer(kbdev, FIRMWARE_LOG_BUF_NAME);
|
||||
|
||||
if (tb == NULL) {
|
||||
dev_err(kbdev->dev, "Couldn't get the firmware trace buffer");
|
||||
return -EIO;
|
||||
}
|
||||
|
||||
if (atomic_cmpxchg(&fw_log->busy, 0, 1) != 0)
|
||||
return -EBUSY;
|
||||
|
||||
/* Reading from userspace is only allowed in manual mode */
|
||||
if (fw_log->mode != KBASE_CSF_FIRMWARE_LOG_MODE_MANUAL) {
|
||||
ret = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
|
||||
n_read = kbase_csf_firmware_trace_buffer_read_data(tb, fw_log->dump_buf, mem);
|
||||
|
||||
/* Do the copy, if we have obtained some trace data */
|
||||
not_copied = (n_read) ? copy_to_user(buf, fw_log->dump_buf, n_read) : 0;
|
||||
|
||||
if (not_copied) {
|
||||
dev_err(kbdev->dev, "Couldn't copy trace buffer data to user space buffer");
|
||||
ret = -EFAULT;
|
||||
goto out;
|
||||
}
|
||||
|
||||
*ppos += n_read;
|
||||
ret = n_read;
|
||||
|
||||
out:
|
||||
atomic_set(&fw_log->busy, 0);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int kbase_csf_firmware_log_mode_read(void *data, u64 *val)
|
||||
{
|
||||
struct kbase_device *kbdev = (struct kbase_device *)data;
|
||||
struct kbase_csf_firmware_log *fw_log = &kbdev->csf.fw_log;
|
||||
|
||||
*val = fw_log->mode;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int kbase_csf_firmware_log_mode_write(void *data, u64 val)
|
||||
{
|
||||
struct kbase_device *kbdev = (struct kbase_device *)data;
|
||||
struct kbase_csf_firmware_log *fw_log = &kbdev->csf.fw_log;
|
||||
int ret = 0;
|
||||
|
||||
if (atomic_cmpxchg(&fw_log->busy, 0, 1) != 0)
|
||||
return -EBUSY;
|
||||
|
||||
if (val == fw_log->mode)
|
||||
goto out;
|
||||
|
||||
switch (val) {
|
||||
case KBASE_CSF_FIRMWARE_LOG_MODE_MANUAL:
|
||||
cancel_delayed_work_sync(&fw_log->poll_work);
|
||||
break;
|
||||
case KBASE_CSF_FIRMWARE_LOG_MODE_AUTO_PRINT:
|
||||
schedule_delayed_work(&fw_log->poll_work,
|
||||
msecs_to_jiffies(KBASE_CSF_FIRMWARE_LOG_POLL_PERIOD_MS));
|
||||
break;
|
||||
default:
|
||||
ret = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
|
||||
fw_log->mode = val;
|
||||
|
||||
out:
|
||||
atomic_set(&fw_log->busy, 0);
|
||||
return ret;
|
||||
}
|
||||
|
||||
DEFINE_DEBUGFS_ATTRIBUTE(kbase_csf_firmware_log_enable_mask_fops,
|
||||
kbase_csf_firmware_log_enable_mask_read,
|
||||
kbase_csf_firmware_log_enable_mask_write, "%llx\n");
|
||||
|
||||
static const struct file_operations kbasep_csf_firmware_log_debugfs_fops = {
|
||||
.owner = THIS_MODULE,
|
||||
.open = kbasep_csf_firmware_log_debugfs_open,
|
||||
.read = kbasep_csf_firmware_log_debugfs_read,
|
||||
.llseek = no_llseek,
|
||||
};
|
||||
|
||||
DEFINE_DEBUGFS_ATTRIBUTE(kbase_csf_firmware_log_mode_fops, kbase_csf_firmware_log_mode_read,
|
||||
kbase_csf_firmware_log_mode_write, "%llu\n");
|
||||
|
||||
#endif /* CONFIG_DEBUG_FS */
|
||||
|
||||
static void kbase_csf_firmware_log_poll(struct work_struct *work)
|
||||
{
|
||||
struct kbase_device *kbdev =
|
||||
container_of(work, struct kbase_device, csf.fw_log.poll_work.work);
|
||||
struct kbase_csf_firmware_log *fw_log = &kbdev->csf.fw_log;
|
||||
|
||||
schedule_delayed_work(&fw_log->poll_work,
|
||||
msecs_to_jiffies(KBASE_CSF_FIRMWARE_LOG_POLL_PERIOD_MS));
|
||||
|
||||
kbase_csf_firmware_log_dump_buffer(kbdev);
|
||||
}
|
||||
|
||||
int kbase_csf_firmware_log_init(struct kbase_device *kbdev)
|
||||
{
|
||||
struct kbase_csf_firmware_log *fw_log = &kbdev->csf.fw_log;
|
||||
|
||||
/* Add one byte for null-termination */
|
||||
fw_log->dump_buf = kmalloc(FIRMWARE_LOG_DUMP_BUF_SIZE + 1, GFP_KERNEL);
|
||||
if (fw_log->dump_buf == NULL)
|
||||
return -ENOMEM;
|
||||
|
||||
/* Ensure null-termination for all strings */
|
||||
fw_log->dump_buf[FIRMWARE_LOG_DUMP_BUF_SIZE] = 0;
|
||||
|
||||
fw_log->mode = KBASE_CSF_FIRMWARE_LOG_MODE_MANUAL;
|
||||
|
||||
atomic_set(&fw_log->busy, 0);
|
||||
INIT_DEFERRABLE_WORK(&fw_log->poll_work, kbase_csf_firmware_log_poll);
|
||||
|
||||
#if defined(CONFIG_DEBUG_FS)
|
||||
debugfs_create_file("fw_trace_enable_mask", 0644, kbdev->mali_debugfs_directory, kbdev,
|
||||
&kbase_csf_firmware_log_enable_mask_fops);
|
||||
debugfs_create_file("fw_traces", 0444, kbdev->mali_debugfs_directory, kbdev,
|
||||
&kbasep_csf_firmware_log_debugfs_fops);
|
||||
debugfs_create_file("fw_trace_mode", 0644, kbdev->mali_debugfs_directory, kbdev,
|
||||
&kbase_csf_firmware_log_mode_fops);
|
||||
#endif /* CONFIG_DEBUG_FS */
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void kbase_csf_firmware_log_term(struct kbase_device *kbdev)
|
||||
{
|
||||
struct kbase_csf_firmware_log *fw_log = &kbdev->csf.fw_log;
|
||||
|
||||
if (fw_log->dump_buf) {
|
||||
cancel_delayed_work_sync(&fw_log->poll_work);
|
||||
kfree(fw_log->dump_buf);
|
||||
fw_log->dump_buf = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
void kbase_csf_firmware_log_dump_buffer(struct kbase_device *kbdev)
|
||||
{
|
||||
struct kbase_csf_firmware_log *fw_log = &kbdev->csf.fw_log;
|
||||
u8 *buf = fw_log->dump_buf, *p, *pnewline, *pend, *pendbuf;
|
||||
unsigned int read_size, remaining_size;
|
||||
struct firmware_trace_buffer *tb =
|
||||
kbase_csf_firmware_get_trace_buffer(kbdev, FIRMWARE_LOG_BUF_NAME);
|
||||
|
||||
if (tb == NULL) {
|
||||
dev_dbg(kbdev->dev, "Can't get the trace buffer, firmware trace dump skipped");
|
||||
return;
|
||||
}
|
||||
|
||||
if (atomic_cmpxchg(&fw_log->busy, 0, 1) != 0)
|
||||
return;
|
||||
|
||||
/* FW should only print complete messages, so there's no need to handle
|
||||
* partial messages over multiple invocations of this function
|
||||
*/
|
||||
|
||||
p = buf;
|
||||
pendbuf = &buf[FIRMWARE_LOG_DUMP_BUF_SIZE];
|
||||
|
||||
while ((read_size = kbase_csf_firmware_trace_buffer_read_data(tb, p, pendbuf - p))) {
|
||||
pend = p + read_size;
|
||||
p = buf;
|
||||
|
||||
while (p < pend && (pnewline = memchr(p, '\n', pend - p))) {
|
||||
/* Null-terminate the string */
|
||||
*pnewline = 0;
|
||||
|
||||
dev_err(kbdev->dev, "FW> %s", p);
|
||||
|
||||
p = pnewline + 1;
|
||||
}
|
||||
|
||||
remaining_size = pend - p;
|
||||
|
||||
if (!remaining_size) {
|
||||
p = buf;
|
||||
} else if (remaining_size < FIRMWARE_LOG_DUMP_BUF_SIZE) {
|
||||
/* Copy unfinished string to the start of the buffer */
|
||||
memmove(buf, p, remaining_size);
|
||||
p = &buf[remaining_size];
|
||||
} else {
|
||||
/* Print abnormally long string without newlines */
|
||||
dev_err(kbdev->dev, "FW> %s", buf);
|
||||
p = buf;
|
||||
}
|
||||
}
|
||||
|
||||
if (p != buf) {
|
||||
/* Null-terminate and print last unfinished string */
|
||||
*p = 0;
|
||||
dev_err(kbdev->dev, "FW> %s", buf);
|
||||
}
|
||||
|
||||
atomic_set(&fw_log->busy, 0);
|
||||
}
|
||||
|
||||
void kbase_csf_firmware_log_parse_logging_call_list_entry(struct kbase_device *kbdev,
|
||||
const uint32_t *entry)
|
||||
{
|
||||
kbdev->csf.fw_log.func_call_list_va_start = entry[0];
|
||||
kbdev->csf.fw_log.func_call_list_va_end = entry[1];
|
||||
}
|
||||
|
||||
/**
|
||||
* toggle_logging_calls_in_loaded_image - Toggles FW log func calls in loaded FW image.
|
||||
*
|
||||
* @kbdev: Instance of a GPU platform device that implements a CSF interface.
|
||||
* @enable: Whether to enable or disable the function calls.
|
||||
*/
|
||||
static void toggle_logging_calls_in_loaded_image(struct kbase_device *kbdev, bool enable)
|
||||
{
|
||||
uint32_t bl_instruction, diff;
|
||||
uint32_t imm11, imm10, i1, i2, j1, j2, sign;
|
||||
uint32_t calling_address = 0, callee_address = 0;
|
||||
uint32_t list_entry = kbdev->csf.fw_log.func_call_list_va_start;
|
||||
const uint32_t list_va_end = kbdev->csf.fw_log.func_call_list_va_end;
|
||||
|
||||
if (list_entry == 0 || list_va_end == 0)
|
||||
return;
|
||||
|
||||
if (enable) {
|
||||
for (; list_entry < list_va_end; list_entry += 2 * sizeof(uint32_t)) {
|
||||
/* Read calling address */
|
||||
kbase_csf_read_firmware_memory(kbdev, list_entry, &calling_address);
|
||||
/* Read callee address */
|
||||
kbase_csf_read_firmware_memory(kbdev, list_entry + sizeof(uint32_t),
|
||||
&callee_address);
|
||||
|
||||
diff = callee_address - calling_address - 4;
|
||||
sign = !!(diff & 0x80000000);
|
||||
if (ARMV7_T1_BL_IMM_RANGE_MIN > (int32_t)diff &&
|
||||
ARMV7_T1_BL_IMM_RANGE_MAX < (int32_t)diff) {
|
||||
dev_warn(kbdev->dev, "FW log patch 0x%x out of range, skipping",
|
||||
calling_address);
|
||||
continue;
|
||||
}
|
||||
|
||||
i1 = (diff & 0x00800000) >> 23;
|
||||
j1 = !i1 ^ sign;
|
||||
i2 = (diff & 0x00400000) >> 22;
|
||||
j2 = !i2 ^ sign;
|
||||
imm11 = (diff & 0xffe) >> 1;
|
||||
imm10 = (diff & 0x3ff000) >> 12;
|
||||
|
||||
/* Compose BL instruction */
|
||||
bl_instruction = ARMV7_T1_BL_IMM_INSTR;
|
||||
bl_instruction |= j1 << 29;
|
||||
bl_instruction |= j2 << 27;
|
||||
bl_instruction |= imm11 << 16;
|
||||
bl_instruction |= sign << 10;
|
||||
bl_instruction |= imm10;
|
||||
|
||||
/* Patch logging func calls in their load location */
|
||||
dev_dbg(kbdev->dev, "FW log patch 0x%x: 0x%x\n", calling_address,
|
||||
bl_instruction);
|
||||
kbase_csf_update_firmware_memory_exe(kbdev, calling_address,
|
||||
bl_instruction);
|
||||
}
|
||||
} else {
|
||||
for (; list_entry < list_va_end; list_entry += 2 * sizeof(uint32_t)) {
|
||||
/* Read calling address */
|
||||
kbase_csf_read_firmware_memory(kbdev, list_entry, &calling_address);
|
||||
|
||||
/* Overwrite logging func calls with 2 NOP instructions */
|
||||
kbase_csf_update_firmware_memory_exe(kbdev, calling_address,
|
||||
ARMV7_DOUBLE_NOP_INSTR);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
int kbase_csf_firmware_log_toggle_logging_calls(struct kbase_device *kbdev, u32 val)
|
||||
{
|
||||
unsigned long flags;
|
||||
struct kbase_csf_firmware_log *fw_log = &kbdev->csf.fw_log;
|
||||
bool mcu_inactive;
|
||||
bool resume_needed = false;
|
||||
int ret = 0;
|
||||
struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
|
||||
|
||||
if (atomic_cmpxchg(&fw_log->busy, 0, 1) != 0)
|
||||
return -EBUSY;
|
||||
|
||||
/* Suspend all the active CS groups */
|
||||
dev_dbg(kbdev->dev, "Suspend all the active CS groups");
|
||||
|
||||
kbase_csf_scheduler_lock(kbdev);
|
||||
while (scheduler->state != SCHED_SUSPENDED) {
|
||||
kbase_csf_scheduler_unlock(kbdev);
|
||||
kbase_csf_scheduler_pm_suspend(kbdev);
|
||||
kbase_csf_scheduler_lock(kbdev);
|
||||
resume_needed = true;
|
||||
}
|
||||
|
||||
/* Wait for the MCU to get disabled */
|
||||
dev_info(kbdev->dev, "Wait for the MCU to get disabled");
|
||||
ret = kbase_pm_wait_for_desired_state(kbdev);
|
||||
if (ret) {
|
||||
dev_err(kbdev->dev,
|
||||
"wait for PM state failed when toggling FW logging calls");
|
||||
ret = -EAGAIN;
|
||||
goto out;
|
||||
}
|
||||
|
||||
spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
|
||||
mcu_inactive =
|
||||
kbase_pm_is_mcu_inactive(kbdev, kbdev->pm.backend.mcu_state);
|
||||
spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
|
||||
if (!mcu_inactive) {
|
||||
dev_err(kbdev->dev,
|
||||
"MCU not inactive after PM state wait when toggling FW logging calls");
|
||||
ret = -EAGAIN;
|
||||
goto out;
|
||||
}
|
||||
|
||||
/* Toggle FW logging call in the loaded FW image */
|
||||
toggle_logging_calls_in_loaded_image(kbdev, val);
|
||||
dev_dbg(kbdev->dev, "FW logging: %s", val ? "enabled" : "disabled");
|
||||
|
||||
out:
|
||||
kbase_csf_scheduler_unlock(kbdev);
|
||||
if (resume_needed)
|
||||
/* Resume queue groups and start mcu */
|
||||
kbase_csf_scheduler_pm_resume(kbdev);
|
||||
atomic_set(&fw_log->busy, 0);
|
||||
return ret;
|
||||
}
|
||||
74
drivers/gpu/arm/bifrost/csf/mali_kbase_csf_firmware_log.h
Normal file
74
drivers/gpu/arm/bifrost/csf/mali_kbase_csf_firmware_log.h
Normal file
|
|
@ -0,0 +1,74 @@
|
|||
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
|
||||
/*
|
||||
*
|
||||
* (C) COPYRIGHT 2022 ARM Limited. All rights reserved.
|
||||
*
|
||||
* This program is free software and is provided to you under the terms of the
|
||||
* GNU General Public License version 2 as published by the Free Software
|
||||
* Foundation, and any use by you of this program is subject to the terms
|
||||
* of such GNU license.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, you can access it online at
|
||||
* http://www.gnu.org/licenses/gpl-2.0.html.
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef _KBASE_CSF_FIRMWARE_LOG_H_
|
||||
#define _KBASE_CSF_FIRMWARE_LOG_H_
|
||||
|
||||
#include <mali_kbase.h>
|
||||
|
||||
/*
|
||||
* Firmware log dumping buffer size.
|
||||
*/
|
||||
#define FIRMWARE_LOG_DUMP_BUF_SIZE PAGE_SIZE
|
||||
|
||||
/**
|
||||
* kbase_csf_firmware_log_init - Initialize firmware log handling.
|
||||
*
|
||||
* @kbdev: Pointer to the Kbase device
|
||||
*
|
||||
* Return: The initialization error code.
|
||||
*/
|
||||
int kbase_csf_firmware_log_init(struct kbase_device *kbdev);
|
||||
|
||||
/**
|
||||
* kbase_csf_firmware_log_term - Terminate firmware log handling.
|
||||
*
|
||||
* @kbdev: Pointer to the Kbase device
|
||||
*/
|
||||
void kbase_csf_firmware_log_term(struct kbase_device *kbdev);
|
||||
|
||||
/**
|
||||
* kbase_csf_firmware_log_dump_buffer - Read remaining data in the firmware log
|
||||
* buffer and print it to dmesg.
|
||||
*
|
||||
* @kbdev: Pointer to the Kbase device
|
||||
*/
|
||||
void kbase_csf_firmware_log_dump_buffer(struct kbase_device *kbdev);
|
||||
|
||||
/**
|
||||
* kbase_csf_firmware_log_parse_logging_call_list_entry - Parse FW logging function call list entry.
|
||||
*
|
||||
* @kbdev: Instance of a GPU platform device that implements a CSF interface.
|
||||
* @entry: Pointer to section.
|
||||
*/
|
||||
void kbase_csf_firmware_log_parse_logging_call_list_entry(struct kbase_device *kbdev,
|
||||
const uint32_t *entry);
|
||||
/**
|
||||
* kbase_csf_firmware_log_toggle_logging_calls - Enables/Disables FW logging function calls.
|
||||
*
|
||||
* @kbdev: Instance of a GPU platform device that implements a CSF interface.
|
||||
* @val: Configuration option value.
|
||||
*
|
||||
* Return: 0 if successful, negative error code on failure
|
||||
*/
|
||||
int kbase_csf_firmware_log_toggle_logging_calls(struct kbase_device *kbdev, u32 val);
|
||||
|
||||
#endif /* _KBASE_CSF_FIRMWARE_LOG_H_ */
|
||||
|
|
@ -273,6 +273,18 @@ void kbase_csf_update_firmware_memory(struct kbase_device *kbdev,
|
|||
/* NO_MALI: Nothing to do here */
|
||||
}
|
||||
|
||||
void kbase_csf_read_firmware_memory_exe(struct kbase_device *kbdev,
|
||||
u32 gpu_addr, u32 *value)
|
||||
{
|
||||
/* NO_MALI: Nothing to do here */
|
||||
}
|
||||
|
||||
void kbase_csf_update_firmware_memory_exe(struct kbase_device *kbdev,
|
||||
u32 gpu_addr, u32 value)
|
||||
{
|
||||
/* NO_MALI: Nothing to do here */
|
||||
}
|
||||
|
||||
void kbase_csf_firmware_cs_input(
|
||||
const struct kbase_csf_cmd_stream_info *const info, const u32 offset,
|
||||
const u32 value)
|
||||
|
|
@ -971,7 +983,6 @@ u32 kbase_csf_firmware_set_gpu_idle_hysteresis_time(struct kbase_device *kbdev,
|
|||
|
||||
static u32 convert_dur_to_core_pwroff_count(struct kbase_device *kbdev, const u32 dur_us)
|
||||
{
|
||||
#define PWROFF_VAL_UNIT_SHIFT (10)
|
||||
/* Get the cntfreq_el0 value, which drives the SYSTEM_TIMESTAMP */
|
||||
u64 freq = arch_timer_get_cntfrq();
|
||||
u64 dur_val = dur_us;
|
||||
|
|
@ -1046,16 +1057,6 @@ int kbase_csf_firmware_early_init(struct kbase_device *kbdev)
|
|||
kbdev->csf.fw_timeout_ms =
|
||||
kbase_get_timeout_ms(kbdev, CSF_FIRMWARE_TIMEOUT);
|
||||
|
||||
kbdev->csf.gpu_idle_hysteresis_ms = FIRMWARE_IDLE_HYSTERESIS_TIME_MS;
|
||||
#ifdef KBASE_PM_RUNTIME
|
||||
if (kbase_pm_gpu_sleep_allowed(kbdev))
|
||||
kbdev->csf.gpu_idle_hysteresis_ms /=
|
||||
FIRMWARE_IDLE_HYSTERESIS_GPU_SLEEP_SCALER;
|
||||
#endif
|
||||
WARN_ON(!kbdev->csf.gpu_idle_hysteresis_ms);
|
||||
kbdev->csf.gpu_idle_dur_count = convert_dur_to_idle_count(
|
||||
kbdev, kbdev->csf.gpu_idle_hysteresis_ms);
|
||||
|
||||
INIT_LIST_HEAD(&kbdev->csf.firmware_interfaces);
|
||||
INIT_LIST_HEAD(&kbdev->csf.firmware_config);
|
||||
INIT_LIST_HEAD(&kbdev->csf.firmware_trace_buffers.list);
|
||||
|
|
@ -1068,7 +1069,26 @@ int kbase_csf_firmware_early_init(struct kbase_device *kbdev)
|
|||
return 0;
|
||||
}
|
||||
|
||||
int kbase_csf_firmware_init(struct kbase_device *kbdev)
|
||||
void kbase_csf_firmware_early_term(struct kbase_device *kbdev)
|
||||
{
|
||||
mutex_destroy(&kbdev->csf.reg_lock);
|
||||
}
|
||||
|
||||
int kbase_csf_firmware_late_init(struct kbase_device *kbdev)
|
||||
{
|
||||
kbdev->csf.gpu_idle_hysteresis_ms = FIRMWARE_IDLE_HYSTERESIS_TIME_MS;
|
||||
#ifdef KBASE_PM_RUNTIME
|
||||
if (kbase_pm_gpu_sleep_allowed(kbdev))
|
||||
kbdev->csf.gpu_idle_hysteresis_ms /= FIRMWARE_IDLE_HYSTERESIS_GPU_SLEEP_SCALER;
|
||||
#endif
|
||||
WARN_ON(!kbdev->csf.gpu_idle_hysteresis_ms);
|
||||
kbdev->csf.gpu_idle_dur_count =
|
||||
convert_dur_to_idle_count(kbdev, kbdev->csf.gpu_idle_hysteresis_ms);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int kbase_csf_firmware_load_init(struct kbase_device *kbdev)
|
||||
{
|
||||
int ret;
|
||||
|
||||
|
|
@ -1134,11 +1154,11 @@ int kbase_csf_firmware_init(struct kbase_device *kbdev)
|
|||
return 0;
|
||||
|
||||
error:
|
||||
kbase_csf_firmware_term(kbdev);
|
||||
kbase_csf_firmware_unload_term(kbdev);
|
||||
return ret;
|
||||
}
|
||||
|
||||
void kbase_csf_firmware_term(struct kbase_device *kbdev)
|
||||
void kbase_csf_firmware_unload_term(struct kbase_device *kbdev)
|
||||
{
|
||||
cancel_work_sync(&kbdev->csf.fw_error_work);
|
||||
|
||||
|
|
@ -1173,8 +1193,6 @@ void kbase_csf_firmware_term(struct kbase_device *kbdev)
|
|||
|
||||
/* NO_MALI: No trace buffers to terminate */
|
||||
|
||||
mutex_destroy(&kbdev->csf.reg_lock);
|
||||
|
||||
/* This will also free up the region allocated for the shared interface
|
||||
* entry parsed from the firmware image.
|
||||
*/
|
||||
|
|
@ -1227,8 +1245,9 @@ void kbase_csf_firmware_ping(struct kbase_device *const kbdev)
|
|||
kbase_csf_scheduler_spin_unlock(kbdev, flags);
|
||||
}
|
||||
|
||||
int kbase_csf_firmware_ping_wait(struct kbase_device *const kbdev)
|
||||
int kbase_csf_firmware_ping_wait(struct kbase_device *const kbdev, unsigned int wait_timeout_ms)
|
||||
{
|
||||
CSTD_UNUSED(wait_timeout_ms);
|
||||
kbase_csf_firmware_ping(kbdev);
|
||||
return wait_for_global_request(kbdev, GLB_REQ_PING_MASK);
|
||||
}
|
||||
|
|
@ -1267,7 +1286,7 @@ void kbase_csf_enter_protected_mode(struct kbase_device *kbdev)
|
|||
kbase_csf_ring_doorbell(kbdev, CSF_KERNEL_DOORBELL_NR);
|
||||
}
|
||||
|
||||
void kbase_csf_wait_protected_mode_enter(struct kbase_device *kbdev)
|
||||
int kbase_csf_wait_protected_mode_enter(struct kbase_device *kbdev)
|
||||
{
|
||||
int err = wait_for_global_request(kbdev, GLB_REQ_PROTM_ENTER_MASK);
|
||||
|
||||
|
|
@ -1275,6 +1294,8 @@ void kbase_csf_wait_protected_mode_enter(struct kbase_device *kbdev)
|
|||
if (kbase_prepare_to_reset_gpu(kbdev, RESET_FLAGS_NONE))
|
||||
kbase_reset_gpu(kbdev);
|
||||
}
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
void kbase_csf_firmware_trigger_mcu_halt(struct kbase_device *kbdev)
|
||||
|
|
@ -1483,9 +1504,8 @@ int kbase_csf_firmware_mcu_shared_mapping_init(
|
|||
if (!page_list)
|
||||
goto page_list_alloc_error;
|
||||
|
||||
ret = kbase_mem_pool_alloc_pages(
|
||||
&kbdev->mem_pools.small[KBASE_MEM_GROUP_CSF_FW],
|
||||
num_pages, phys, false);
|
||||
ret = kbase_mem_pool_alloc_pages(&kbdev->mem_pools.small[KBASE_MEM_GROUP_CSF_FW], num_pages,
|
||||
phys, false);
|
||||
if (ret <= 0)
|
||||
goto phys_mem_pool_alloc_error;
|
||||
|
||||
|
|
|
|||
|
|
@ -1,7 +1,7 @@
|
|||
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
|
||||
/*
|
||||
*
|
||||
* (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved.
|
||||
* (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved.
|
||||
*
|
||||
* This program is free software and is provided to you under the terms of the
|
||||
* GNU General Public License version 2 as published by the Free Software
|
||||
|
|
@ -154,8 +154,8 @@ u64 kbase_csf_heap_context_allocator_alloc(
|
|||
struct kbase_csf_heap_context_allocator *const ctx_alloc)
|
||||
{
|
||||
struct kbase_context *const kctx = ctx_alloc->kctx;
|
||||
u64 flags = BASE_MEM_PROT_GPU_RD | BASE_MEM_PROT_GPU_WR |
|
||||
BASE_MEM_PROT_CPU_WR | BASEP_MEM_NO_USER_FREE;
|
||||
u64 flags = BASE_MEM_PROT_GPU_RD | BASE_MEM_PROT_GPU_WR | BASE_MEM_PROT_CPU_WR |
|
||||
BASEP_MEM_NO_USER_FREE | BASE_MEM_PROT_CPU_RD;
|
||||
u64 nr_pages = PFN_UP(HEAP_CTX_REGION_SIZE);
|
||||
u64 heap_gpu_va = 0;
|
||||
|
||||
|
|
@ -164,10 +164,6 @@ u64 kbase_csf_heap_context_allocator_alloc(
|
|||
*/
|
||||
const enum kbase_caller_mmu_sync_info mmu_sync_info = CALLER_MMU_ASYNC;
|
||||
|
||||
#ifdef CONFIG_MALI_VECTOR_DUMP
|
||||
flags |= BASE_MEM_PROT_CPU_RD;
|
||||
#endif
|
||||
|
||||
mutex_lock(&ctx_alloc->lock);
|
||||
|
||||
/* If the pool of heap contexts wasn't already allocated then
|
||||
|
|
|
|||
|
|
@ -55,7 +55,7 @@ static int kbase_kcpu_map_import_prepare(
|
|||
long i;
|
||||
int ret = 0;
|
||||
|
||||
lockdep_assert_held(&kctx->csf.kcpu_queues.lock);
|
||||
lockdep_assert_held(&kcpu_queue->lock);
|
||||
|
||||
/* Take the processes mmap lock */
|
||||
down_read(kbase_mem_get_process_mmap_lock());
|
||||
|
|
@ -114,7 +114,7 @@ static int kbase_kcpu_unmap_import_prepare_internal(
|
|||
struct kbase_va_region *reg;
|
||||
int ret = 0;
|
||||
|
||||
lockdep_assert_held(&kctx->csf.kcpu_queues.lock);
|
||||
lockdep_assert_held(&kcpu_queue->lock);
|
||||
|
||||
kbase_gpu_vm_lock(kctx);
|
||||
|
||||
|
|
@ -182,7 +182,8 @@ static void kbase_jit_add_to_pending_alloc_list(
|
|||
&kctx->csf.kcpu_queues.jit_blocked_queues;
|
||||
struct kbase_kcpu_command_queue *blocked_queue;
|
||||
|
||||
lockdep_assert_held(&kctx->csf.kcpu_queues.lock);
|
||||
lockdep_assert_held(&queue->lock);
|
||||
lockdep_assert_held(&kctx->csf.kcpu_queues.jit_lock);
|
||||
|
||||
list_for_each_entry(blocked_queue,
|
||||
&kctx->csf.kcpu_queues.jit_blocked_queues,
|
||||
|
|
@ -227,25 +228,28 @@ static int kbase_kcpu_jit_allocate_process(
|
|||
u32 i;
|
||||
int ret;
|
||||
|
||||
lockdep_assert_held(&kctx->csf.kcpu_queues.lock);
|
||||
|
||||
if (alloc_info->blocked) {
|
||||
list_del(&queue->jit_blocked);
|
||||
alloc_info->blocked = false;
|
||||
}
|
||||
lockdep_assert_held(&queue->lock);
|
||||
|
||||
if (WARN_ON(!info))
|
||||
return -EINVAL;
|
||||
|
||||
mutex_lock(&kctx->csf.kcpu_queues.jit_lock);
|
||||
|
||||
/* Check if all JIT IDs are not in use */
|
||||
for (i = 0; i < count; i++, info++) {
|
||||
/* The JIT ID is still in use so fail the allocation */
|
||||
if (kctx->jit_alloc[info->id]) {
|
||||
dev_dbg(kctx->kbdev->dev, "JIT ID still in use");
|
||||
return -EINVAL;
|
||||
ret = -EINVAL;
|
||||
goto fail;
|
||||
}
|
||||
}
|
||||
|
||||
if (alloc_info->blocked) {
|
||||
list_del(&queue->jit_blocked);
|
||||
alloc_info->blocked = false;
|
||||
}
|
||||
|
||||
/* Now start the allocation loop */
|
||||
for (i = 0, info = alloc_info->info; i < count; i++, info++) {
|
||||
/* Create a JIT allocation */
|
||||
|
|
@ -280,7 +284,7 @@ static int kbase_kcpu_jit_allocate_process(
|
|||
*/
|
||||
dev_warn_ratelimited(kctx->kbdev->dev, "JIT alloc command failed: %pK\n", cmd);
|
||||
ret = -ENOMEM;
|
||||
goto fail;
|
||||
goto fail_rollback;
|
||||
}
|
||||
|
||||
/* There are pending frees for an active allocation
|
||||
|
|
@ -298,7 +302,8 @@ static int kbase_kcpu_jit_allocate_process(
|
|||
kctx->jit_alloc[info->id] = NULL;
|
||||
}
|
||||
|
||||
return -EAGAIN;
|
||||
ret = -EAGAIN;
|
||||
goto fail;
|
||||
}
|
||||
|
||||
/* Bind it to the user provided ID. */
|
||||
|
|
@ -314,7 +319,7 @@ static int kbase_kcpu_jit_allocate_process(
|
|||
KBASE_REG_CPU_WR, &mapping);
|
||||
if (!ptr) {
|
||||
ret = -ENOMEM;
|
||||
goto fail;
|
||||
goto fail_rollback;
|
||||
}
|
||||
|
||||
reg = kctx->jit_alloc[info->id];
|
||||
|
|
@ -323,9 +328,11 @@ static int kbase_kcpu_jit_allocate_process(
|
|||
kbase_vunmap(kctx, &mapping);
|
||||
}
|
||||
|
||||
mutex_unlock(&kctx->csf.kcpu_queues.jit_lock);
|
||||
|
||||
return 0;
|
||||
|
||||
fail:
|
||||
fail_rollback:
|
||||
/* Roll back completely */
|
||||
for (i = 0, info = alloc_info->info; i < count; i++, info++) {
|
||||
/* Free the allocations that were successful.
|
||||
|
|
@ -338,6 +345,8 @@ static int kbase_kcpu_jit_allocate_process(
|
|||
|
||||
kctx->jit_alloc[info->id] = KBASE_RESERVED_REG_JIT_ALLOC;
|
||||
}
|
||||
fail:
|
||||
mutex_unlock(&kctx->csf.kcpu_queues.jit_lock);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
|
@ -354,7 +363,7 @@ static int kbase_kcpu_jit_allocate_prepare(
|
|||
int ret = 0;
|
||||
u32 i;
|
||||
|
||||
lockdep_assert_held(&kctx->csf.kcpu_queues.lock);
|
||||
lockdep_assert_held(&kcpu_queue->lock);
|
||||
|
||||
if (!data || count > kcpu_queue->kctx->jit_max_allocations ||
|
||||
count > ARRAY_SIZE(kctx->jit_alloc)) {
|
||||
|
|
@ -392,11 +401,13 @@ static int kbase_kcpu_jit_allocate_prepare(
|
|||
}
|
||||
|
||||
current_command->type = BASE_KCPU_COMMAND_TYPE_JIT_ALLOC;
|
||||
list_add_tail(¤t_command->info.jit_alloc.node,
|
||||
&kctx->csf.kcpu_queues.jit_cmds_head);
|
||||
current_command->info.jit_alloc.info = info;
|
||||
current_command->info.jit_alloc.count = count;
|
||||
current_command->info.jit_alloc.blocked = false;
|
||||
mutex_lock(&kctx->csf.kcpu_queues.jit_lock);
|
||||
list_add_tail(¤t_command->info.jit_alloc.node,
|
||||
&kctx->csf.kcpu_queues.jit_cmds_head);
|
||||
mutex_unlock(&kctx->csf.kcpu_queues.jit_lock);
|
||||
|
||||
return 0;
|
||||
out_free:
|
||||
|
|
@ -415,7 +426,9 @@ static void kbase_kcpu_jit_allocate_finish(
|
|||
struct kbase_kcpu_command_queue *queue,
|
||||
struct kbase_kcpu_command *cmd)
|
||||
{
|
||||
lockdep_assert_held(&queue->kctx->csf.kcpu_queues.lock);
|
||||
lockdep_assert_held(&queue->lock);
|
||||
|
||||
mutex_lock(&queue->kctx->csf.kcpu_queues.jit_lock);
|
||||
|
||||
/* Remove this command from the jit_cmds_head list */
|
||||
list_del(&cmd->info.jit_alloc.node);
|
||||
|
|
@ -429,6 +442,8 @@ static void kbase_kcpu_jit_allocate_finish(
|
|||
cmd->info.jit_alloc.blocked = false;
|
||||
}
|
||||
|
||||
mutex_unlock(&queue->kctx->csf.kcpu_queues.jit_lock);
|
||||
|
||||
kfree(cmd->info.jit_alloc.info);
|
||||
}
|
||||
|
||||
|
|
@ -441,18 +456,17 @@ static void kbase_kcpu_jit_retry_pending_allocs(struct kbase_context *kctx)
|
|||
{
|
||||
struct kbase_kcpu_command_queue *blocked_queue;
|
||||
|
||||
lockdep_assert_held(&kctx->csf.kcpu_queues.lock);
|
||||
lockdep_assert_held(&kctx->csf.kcpu_queues.jit_lock);
|
||||
|
||||
/*
|
||||
* Reschedule all queues blocked by JIT_ALLOC commands.
|
||||
* NOTE: This code traverses the list of blocked queues directly. It
|
||||
* only works as long as the queued works are not executed at the same
|
||||
* time. This precondition is true since we're holding the
|
||||
* kbase_csf_kcpu_queue_context.lock .
|
||||
* kbase_csf_kcpu_queue_context.jit_lock .
|
||||
*/
|
||||
list_for_each_entry(blocked_queue,
|
||||
&kctx->csf.kcpu_queues.jit_blocked_queues, jit_blocked)
|
||||
queue_work(kctx->csf.kcpu_queues.wq, &blocked_queue->work);
|
||||
list_for_each_entry(blocked_queue, &kctx->csf.kcpu_queues.jit_blocked_queues, jit_blocked)
|
||||
queue_work(blocked_queue->wq, &blocked_queue->work);
|
||||
}
|
||||
|
||||
static int kbase_kcpu_jit_free_process(struct kbase_kcpu_command_queue *queue,
|
||||
|
|
@ -469,7 +483,8 @@ static int kbase_kcpu_jit_free_process(struct kbase_kcpu_command_queue *queue,
|
|||
if (WARN_ON(!ids))
|
||||
return -EINVAL;
|
||||
|
||||
lockdep_assert_held(&kctx->csf.kcpu_queues.lock);
|
||||
lockdep_assert_held(&queue->lock);
|
||||
mutex_lock(&kctx->csf.kcpu_queues.jit_lock);
|
||||
|
||||
KBASE_TLSTREAM_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_EXECUTE_JIT_FREE_END(queue->kctx->kbdev,
|
||||
queue);
|
||||
|
|
@ -501,9 +516,6 @@ static int kbase_kcpu_jit_free_process(struct kbase_kcpu_command_queue *queue,
|
|||
queue->kctx->kbdev, queue, item_err, pages_used);
|
||||
}
|
||||
|
||||
/* Free the list of ids */
|
||||
kfree(ids);
|
||||
|
||||
/*
|
||||
* Remove this command from the jit_cmds_head list and retry pending
|
||||
* allocations.
|
||||
|
|
@ -511,6 +523,11 @@ static int kbase_kcpu_jit_free_process(struct kbase_kcpu_command_queue *queue,
|
|||
list_del(&cmd->info.jit_free.node);
|
||||
kbase_kcpu_jit_retry_pending_allocs(kctx);
|
||||
|
||||
mutex_unlock(&kctx->csf.kcpu_queues.jit_lock);
|
||||
|
||||
/* Free the list of ids */
|
||||
kfree(ids);
|
||||
|
||||
return rc;
|
||||
}
|
||||
|
||||
|
|
@ -526,7 +543,7 @@ static int kbase_kcpu_jit_free_prepare(
|
|||
int ret;
|
||||
u32 i;
|
||||
|
||||
lockdep_assert_held(&kctx->csf.kcpu_queues.lock);
|
||||
lockdep_assert_held(&kcpu_queue->lock);
|
||||
|
||||
/* Sanity checks */
|
||||
if (!count || count > ARRAY_SIZE(kctx->jit_alloc)) {
|
||||
|
|
@ -572,10 +589,12 @@ static int kbase_kcpu_jit_free_prepare(
|
|||
}
|
||||
|
||||
current_command->type = BASE_KCPU_COMMAND_TYPE_JIT_FREE;
|
||||
list_add_tail(¤t_command->info.jit_free.node,
|
||||
&kctx->csf.kcpu_queues.jit_cmds_head);
|
||||
current_command->info.jit_free.ids = ids;
|
||||
current_command->info.jit_free.count = count;
|
||||
mutex_lock(&kctx->csf.kcpu_queues.jit_lock);
|
||||
list_add_tail(¤t_command->info.jit_free.node,
|
||||
&kctx->csf.kcpu_queues.jit_cmds_head);
|
||||
mutex_unlock(&kctx->csf.kcpu_queues.jit_lock);
|
||||
|
||||
return 0;
|
||||
out_free:
|
||||
|
|
@ -601,7 +620,7 @@ static int kbase_csf_queue_group_suspend_prepare(
|
|||
int pinned_pages = 0, ret = 0;
|
||||
struct kbase_va_region *reg;
|
||||
|
||||
lockdep_assert_held(&kctx->csf.kcpu_queues.lock);
|
||||
lockdep_assert_held(&kcpu_queue->lock);
|
||||
|
||||
if (suspend_buf->size < csg_suspend_buf_size)
|
||||
return -EINVAL;
|
||||
|
|
@ -652,9 +671,12 @@ static int kbase_csf_queue_group_suspend_prepare(
|
|||
u64 start, end, i;
|
||||
|
||||
if (((reg->flags & KBASE_REG_ZONE_MASK) != KBASE_REG_ZONE_SAME_VA) ||
|
||||
reg->nr_pages < nr_pages ||
|
||||
kbase_reg_current_backed_size(reg) !=
|
||||
reg->nr_pages) {
|
||||
(kbase_reg_current_backed_size(reg) < nr_pages) ||
|
||||
!(reg->flags & KBASE_REG_CPU_WR) ||
|
||||
(reg->gpu_alloc->type != KBASE_MEM_TYPE_NATIVE) ||
|
||||
(reg->flags & KBASE_REG_DONT_NEED) ||
|
||||
(reg->flags & KBASE_REG_ACTIVE_JIT_ALLOC) ||
|
||||
(reg->flags & KBASE_REG_NO_USER_FREE)) {
|
||||
ret = -EINVAL;
|
||||
goto out_clean_pages;
|
||||
}
|
||||
|
|
@ -703,9 +725,8 @@ static enum kbase_csf_event_callback_action event_cqs_callback(void *param)
|
|||
{
|
||||
struct kbase_kcpu_command_queue *kcpu_queue =
|
||||
(struct kbase_kcpu_command_queue *)param;
|
||||
struct kbase_context *const kctx = kcpu_queue->kctx;
|
||||
|
||||
queue_work(kctx->csf.kcpu_queues.wq, &kcpu_queue->work);
|
||||
queue_work(kcpu_queue->wq, &kcpu_queue->work);
|
||||
|
||||
return KBASE_CSF_EVENT_CALLBACK_KEEP;
|
||||
}
|
||||
|
|
@ -735,7 +756,7 @@ static int kbase_kcpu_cqs_wait_process(struct kbase_device *kbdev,
|
|||
{
|
||||
u32 i;
|
||||
|
||||
lockdep_assert_held(&queue->kctx->csf.kcpu_queues.lock);
|
||||
lockdep_assert_held(&queue->lock);
|
||||
|
||||
if (WARN_ON(!cqs_wait->objs))
|
||||
return -EINVAL;
|
||||
|
|
@ -803,7 +824,7 @@ static int kbase_kcpu_cqs_wait_prepare(struct kbase_kcpu_command_queue *queue,
|
|||
struct base_cqs_wait_info *objs;
|
||||
unsigned int nr_objs = cqs_wait_info->nr_objs;
|
||||
|
||||
lockdep_assert_held(&queue->kctx->csf.kcpu_queues.lock);
|
||||
lockdep_assert_held(&queue->lock);
|
||||
|
||||
if (nr_objs > BASEP_KCPU_CQS_MAX_NUM_OBJS)
|
||||
return -EINVAL;
|
||||
|
|
@ -857,7 +878,7 @@ static void kbase_kcpu_cqs_set_process(struct kbase_device *kbdev,
|
|||
{
|
||||
unsigned int i;
|
||||
|
||||
lockdep_assert_held(&queue->kctx->csf.kcpu_queues.lock);
|
||||
lockdep_assert_held(&queue->lock);
|
||||
|
||||
if (WARN_ON(!cqs_set->objs))
|
||||
return;
|
||||
|
|
@ -898,11 +919,10 @@ static int kbase_kcpu_cqs_set_prepare(
|
|||
struct base_kcpu_command_cqs_set_info *cqs_set_info,
|
||||
struct kbase_kcpu_command *current_command)
|
||||
{
|
||||
struct kbase_context *const kctx = kcpu_queue->kctx;
|
||||
struct base_cqs_set *objs;
|
||||
unsigned int nr_objs = cqs_set_info->nr_objs;
|
||||
|
||||
lockdep_assert_held(&kctx->csf.kcpu_queues.lock);
|
||||
lockdep_assert_held(&kcpu_queue->lock);
|
||||
|
||||
if (nr_objs > BASEP_KCPU_CQS_MAX_NUM_OBJS)
|
||||
return -EINVAL;
|
||||
|
|
@ -952,7 +972,7 @@ static int kbase_kcpu_cqs_wait_operation_process(struct kbase_device *kbdev,
|
|||
{
|
||||
u32 i;
|
||||
|
||||
lockdep_assert_held(&queue->kctx->csf.kcpu_queues.lock);
|
||||
lockdep_assert_held(&queue->lock);
|
||||
|
||||
if (WARN_ON(!cqs_wait_operation->objs))
|
||||
return -EINVAL;
|
||||
|
|
@ -1039,7 +1059,7 @@ static int kbase_kcpu_cqs_wait_operation_prepare(struct kbase_kcpu_command_queue
|
|||
struct base_cqs_wait_operation_info *objs;
|
||||
unsigned int nr_objs = cqs_wait_operation_info->nr_objs;
|
||||
|
||||
lockdep_assert_held(&queue->kctx->csf.kcpu_queues.lock);
|
||||
lockdep_assert_held(&queue->lock);
|
||||
|
||||
if (nr_objs > BASEP_KCPU_CQS_MAX_NUM_OBJS)
|
||||
return -EINVAL;
|
||||
|
|
@ -1094,7 +1114,7 @@ static void kbase_kcpu_cqs_set_operation_process(
|
|||
{
|
||||
unsigned int i;
|
||||
|
||||
lockdep_assert_held(&queue->kctx->csf.kcpu_queues.lock);
|
||||
lockdep_assert_held(&queue->lock);
|
||||
|
||||
if (WARN_ON(!cqs_set_operation->objs))
|
||||
return;
|
||||
|
|
@ -1161,11 +1181,10 @@ static int kbase_kcpu_cqs_set_operation_prepare(
|
|||
struct base_kcpu_command_cqs_set_operation_info *cqs_set_operation_info,
|
||||
struct kbase_kcpu_command *current_command)
|
||||
{
|
||||
struct kbase_context *const kctx = kcpu_queue->kctx;
|
||||
struct base_cqs_set_operation_info *objs;
|
||||
unsigned int nr_objs = cqs_set_operation_info->nr_objs;
|
||||
|
||||
lockdep_assert_held(&kctx->csf.kcpu_queues.lock);
|
||||
lockdep_assert_held(&kcpu_queue->lock);
|
||||
|
||||
if (nr_objs > BASEP_KCPU_CQS_MAX_NUM_OBJS)
|
||||
return -EINVAL;
|
||||
|
|
@ -1212,7 +1231,7 @@ static void kbase_csf_fence_wait_callback(struct dma_fence *fence,
|
|||
fence->context, fence->seqno);
|
||||
|
||||
/* Resume kcpu command queue processing. */
|
||||
queue_work(kctx->csf.kcpu_queues.wq, &kcpu_queue->work);
|
||||
queue_work(kcpu_queue->wq, &kcpu_queue->work);
|
||||
}
|
||||
|
||||
static void kbase_kcpu_fence_wait_cancel(
|
||||
|
|
@ -1221,7 +1240,7 @@ static void kbase_kcpu_fence_wait_cancel(
|
|||
{
|
||||
struct kbase_context *const kctx = kcpu_queue->kctx;
|
||||
|
||||
lockdep_assert_held(&kctx->csf.kcpu_queues.lock);
|
||||
lockdep_assert_held(&kcpu_queue->lock);
|
||||
|
||||
if (WARN_ON(!fence_info->fence))
|
||||
return;
|
||||
|
|
@ -1293,7 +1312,7 @@ static void fence_timeout_callback(struct timer_list *timer)
|
|||
kbase_sync_fence_info_get(fence, &info);
|
||||
|
||||
if (info.status == 1) {
|
||||
queue_work(kctx->csf.kcpu_queues.wq, &kcpu_queue->work);
|
||||
queue_work(kcpu_queue->wq, &kcpu_queue->work);
|
||||
} else if (info.status == 0) {
|
||||
dev_warn(kctx->kbdev->dev, "fence has not yet signalled in %ums",
|
||||
FENCE_WAIT_TIMEOUT_MS);
|
||||
|
|
@ -1345,7 +1364,7 @@ static int kbase_kcpu_fence_wait_process(
|
|||
#endif
|
||||
struct kbase_context *const kctx = kcpu_queue->kctx;
|
||||
|
||||
lockdep_assert_held(&kctx->csf.kcpu_queues.lock);
|
||||
lockdep_assert_held(&kcpu_queue->lock);
|
||||
|
||||
if (WARN_ON(!fence_info->fence))
|
||||
return -EINVAL;
|
||||
|
|
@ -1401,7 +1420,6 @@ static int kbase_kcpu_fence_wait_prepare(
|
|||
struct base_kcpu_command_fence_info *fence_info,
|
||||
struct kbase_kcpu_command *current_command)
|
||||
{
|
||||
struct kbase_context *const kctx = kcpu_queue->kctx;
|
||||
#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE)
|
||||
struct fence *fence_in;
|
||||
#else
|
||||
|
|
@ -1409,7 +1427,7 @@ static int kbase_kcpu_fence_wait_prepare(
|
|||
#endif
|
||||
struct base_fence fence;
|
||||
|
||||
lockdep_assert_held(&kctx->csf.kcpu_queues.lock);
|
||||
lockdep_assert_held(&kcpu_queue->lock);
|
||||
|
||||
if (copy_from_user(&fence, u64_to_user_ptr(fence_info->fence),
|
||||
sizeof(fence)))
|
||||
|
|
@ -1460,7 +1478,6 @@ static int kbase_kcpu_fence_signal_prepare(
|
|||
struct base_kcpu_command_fence_info *fence_info,
|
||||
struct kbase_kcpu_command *current_command)
|
||||
{
|
||||
struct kbase_context *const kctx = kcpu_queue->kctx;
|
||||
#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE)
|
||||
struct fence *fence_out;
|
||||
#else
|
||||
|
|
@ -1471,7 +1488,7 @@ static int kbase_kcpu_fence_signal_prepare(
|
|||
int ret = 0;
|
||||
int fd;
|
||||
|
||||
lockdep_assert_held(&kctx->csf.kcpu_queues.lock);
|
||||
lockdep_assert_held(&kcpu_queue->lock);
|
||||
|
||||
if (copy_from_user(&fence, u64_to_user_ptr(fence_info->fence),
|
||||
sizeof(fence)))
|
||||
|
|
@ -1549,11 +1566,9 @@ static void kcpu_queue_process_worker(struct work_struct *data)
|
|||
struct kbase_kcpu_command_queue *queue = container_of(data,
|
||||
struct kbase_kcpu_command_queue, work);
|
||||
|
||||
mutex_lock(&queue->kctx->csf.kcpu_queues.lock);
|
||||
|
||||
mutex_lock(&queue->lock);
|
||||
kcpu_queue_process(queue, false);
|
||||
|
||||
mutex_unlock(&queue->kctx->csf.kcpu_queues.lock);
|
||||
mutex_unlock(&queue->lock);
|
||||
}
|
||||
|
||||
static int delete_queue(struct kbase_context *kctx, u32 id)
|
||||
|
|
@ -1569,6 +1584,17 @@ static int delete_queue(struct kbase_context *kctx, u32 id)
|
|||
KBASE_KTRACE_ADD_CSF_KCPU(kctx->kbdev, KCPU_QUEUE_DELETE,
|
||||
queue, queue->num_pending_cmds, queue->cqs_wait_count);
|
||||
|
||||
/* Disassociate the queue from the system to prevent further
|
||||
* submissions. Draining pending commands would be acceptable
|
||||
* even if a new queue is created using the same ID.
|
||||
*/
|
||||
kctx->csf.kcpu_queues.array[id] = NULL;
|
||||
bitmap_clear(kctx->csf.kcpu_queues.in_use, id, 1);
|
||||
|
||||
mutex_unlock(&kctx->csf.kcpu_queues.lock);
|
||||
|
||||
mutex_lock(&queue->lock);
|
||||
|
||||
/* Drain the remaining work for this queue first and go past
|
||||
* all the waits.
|
||||
*/
|
||||
|
|
@ -1580,17 +1606,17 @@ static int delete_queue(struct kbase_context *kctx, u32 id)
|
|||
/* All CQS wait commands should have been cleaned up */
|
||||
WARN_ON(queue->cqs_wait_count);
|
||||
|
||||
kctx->csf.kcpu_queues.array[id] = NULL;
|
||||
bitmap_clear(kctx->csf.kcpu_queues.in_use, id, 1);
|
||||
|
||||
/* Fire the tracepoint with the mutex held to enforce correct
|
||||
* ordering with the summary stream.
|
||||
*/
|
||||
KBASE_TLSTREAM_TL_KBASE_DEL_KCPUQUEUE(kctx->kbdev, queue);
|
||||
|
||||
mutex_unlock(&kctx->csf.kcpu_queues.lock);
|
||||
mutex_unlock(&queue->lock);
|
||||
|
||||
cancel_work_sync(&queue->work);
|
||||
destroy_workqueue(queue->wq);
|
||||
|
||||
mutex_destroy(&queue->lock);
|
||||
|
||||
kfree(queue);
|
||||
} else {
|
||||
|
|
@ -1657,7 +1683,7 @@ static void kcpu_queue_process(struct kbase_kcpu_command_queue *queue,
|
|||
bool process_next = true;
|
||||
size_t i;
|
||||
|
||||
lockdep_assert_held(&queue->kctx->csf.kcpu_queues.lock);
|
||||
lockdep_assert_held(&queue->lock);
|
||||
|
||||
for (i = 0; i != queue->num_pending_cmds; ++i) {
|
||||
struct kbase_kcpu_command *cmd =
|
||||
|
|
@ -2058,9 +2084,11 @@ int kbase_csf_kcpu_queue_enqueue(struct kbase_context *kctx,
|
|||
|
||||
/* The offset to the first command that is being processed or yet to
|
||||
* be processed is of u8 type, so the number of commands inside the
|
||||
* queue cannot be more than 256.
|
||||
* queue cannot be more than 256. The current implementation expects
|
||||
* exactly 256, any other size will require the addition of wrapping
|
||||
* logic.
|
||||
*/
|
||||
BUILD_BUG_ON(KBASEP_KCPU_QUEUE_SIZE > 256);
|
||||
BUILD_BUG_ON(KBASEP_KCPU_QUEUE_SIZE != 256);
|
||||
|
||||
/* Whilst the backend interface allows enqueueing multiple commands in
|
||||
* a single operation, the Base interface does not expose any mechanism
|
||||
|
|
@ -2076,13 +2104,13 @@ int kbase_csf_kcpu_queue_enqueue(struct kbase_context *kctx,
|
|||
}
|
||||
|
||||
mutex_lock(&kctx->csf.kcpu_queues.lock);
|
||||
|
||||
if (!kctx->csf.kcpu_queues.array[enq->id]) {
|
||||
ret = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
|
||||
queue = kctx->csf.kcpu_queues.array[enq->id];
|
||||
mutex_unlock(&kctx->csf.kcpu_queues.lock);
|
||||
|
||||
if (queue == NULL)
|
||||
return -EINVAL;
|
||||
|
||||
mutex_lock(&queue->lock);
|
||||
|
||||
if (kcpu_queue_get_space(queue) < enq->nr_commands) {
|
||||
ret = -EBUSY;
|
||||
|
|
@ -2097,7 +2125,7 @@ int kbase_csf_kcpu_queue_enqueue(struct kbase_context *kctx,
|
|||
* for the possibility to roll back.
|
||||
*/
|
||||
|
||||
for (i = 0; (i != enq->nr_commands) && !ret; ++i, ++kctx->csf.kcpu_queues.num_cmds) {
|
||||
for (i = 0; (i != enq->nr_commands) && !ret; ++i) {
|
||||
struct kbase_kcpu_command *kcpu_cmd =
|
||||
&queue->commands[(u8)(queue->start_offset + queue->num_pending_cmds + i)];
|
||||
struct base_kcpu_command command;
|
||||
|
|
@ -2120,7 +2148,7 @@ int kbase_csf_kcpu_queue_enqueue(struct kbase_context *kctx,
|
|||
}
|
||||
}
|
||||
|
||||
kcpu_cmd->enqueue_ts = kctx->csf.kcpu_queues.num_cmds;
|
||||
kcpu_cmd->enqueue_ts = atomic64_inc_return(&kctx->csf.kcpu_queues.cmd_seq_num);
|
||||
switch (command.type) {
|
||||
case BASE_KCPU_COMMAND_TYPE_FENCE_WAIT:
|
||||
#if IS_ENABLED(CONFIG_SYNC_FILE)
|
||||
|
|
@ -2208,13 +2236,10 @@ int kbase_csf_kcpu_queue_enqueue(struct kbase_context *kctx,
|
|||
|
||||
queue->num_pending_cmds += enq->nr_commands;
|
||||
kcpu_queue_process(queue, false);
|
||||
} else {
|
||||
/* Roll back the number of enqueued commands */
|
||||
kctx->csf.kcpu_queues.num_cmds -= i;
|
||||
}
|
||||
|
||||
out:
|
||||
mutex_unlock(&kctx->csf.kcpu_queues.lock);
|
||||
mutex_unlock(&queue->lock);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
|
@ -2228,14 +2253,9 @@ int kbase_csf_kcpu_queue_context_init(struct kbase_context *kctx)
|
|||
for (idx = 0; idx < KBASEP_MAX_KCPU_QUEUES; ++idx)
|
||||
kctx->csf.kcpu_queues.array[idx] = NULL;
|
||||
|
||||
kctx->csf.kcpu_queues.wq = alloc_workqueue("mali_kbase_csf_kcpu",
|
||||
WQ_UNBOUND | WQ_HIGHPRI, 0);
|
||||
if (!kctx->csf.kcpu_queues.wq)
|
||||
return -ENOMEM;
|
||||
|
||||
mutex_init(&kctx->csf.kcpu_queues.lock);
|
||||
|
||||
kctx->csf.kcpu_queues.num_cmds = 0;
|
||||
atomic64_set(&kctx->csf.kcpu_queues.cmd_seq_num, 0);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
|
@ -2253,7 +2273,6 @@ void kbase_csf_kcpu_queue_context_term(struct kbase_context *kctx)
|
|||
(void)delete_queue(kctx, id);
|
||||
}
|
||||
|
||||
destroy_workqueue(kctx->csf.kcpu_queues.wq);
|
||||
mutex_destroy(&kctx->csf.kcpu_queues.lock);
|
||||
}
|
||||
|
||||
|
|
@ -2297,8 +2316,17 @@ int kbase_csf_kcpu_queue_new(struct kbase_context *kctx,
|
|||
goto out;
|
||||
}
|
||||
|
||||
queue->wq = alloc_workqueue("mali_kbase_csf_kcpu_wq_%i", WQ_UNBOUND | WQ_HIGHPRI, 0, idx);
|
||||
if (queue->wq == NULL) {
|
||||
kfree(queue);
|
||||
ret = -ENOMEM;
|
||||
|
||||
goto out;
|
||||
}
|
||||
|
||||
bitmap_set(kctx->csf.kcpu_queues.in_use, idx, 1);
|
||||
kctx->csf.kcpu_queues.array[idx] = queue;
|
||||
mutex_init(&queue->lock);
|
||||
queue->kctx = kctx;
|
||||
queue->start_offset = 0;
|
||||
queue->num_pending_cmds = 0;
|
||||
|
|
|
|||
|
|
@ -236,9 +236,11 @@ struct kbase_kcpu_command {
|
|||
/**
|
||||
* struct kbase_kcpu_command_queue - a command queue executed by the kernel
|
||||
*
|
||||
* @lock: Lock to protect accesses to this queue.
|
||||
* @kctx: The context to which this command queue belongs.
|
||||
* @commands: Array of commands which have been successfully
|
||||
* enqueued to this command queue.
|
||||
* @wq: Dedicated workqueue for processing commands.
|
||||
* @work: struct work_struct which contains a pointer to
|
||||
* the function which handles processing of kcpu
|
||||
* commands enqueued into a kcpu command queue;
|
||||
|
|
@ -274,8 +276,10 @@ struct kbase_kcpu_command {
|
|||
* @fence_timeout: Timer used to detect the fence wait timeout.
|
||||
*/
|
||||
struct kbase_kcpu_command_queue {
|
||||
struct mutex lock;
|
||||
struct kbase_context *kctx;
|
||||
struct kbase_kcpu_command commands[KBASEP_KCPU_QUEUE_SIZE];
|
||||
struct workqueue_struct *wq;
|
||||
struct work_struct work;
|
||||
u8 start_offset;
|
||||
u8 id;
|
||||
|
|
|
|||
|
|
@ -163,6 +163,8 @@
|
|||
#define CSG_PROTM_SUSPEND_BUF_HI 0x004C /* () Protected mode suspend buffer, high word */
|
||||
#define CSG_CONFIG 0x0050 /* () CSG configuration options */
|
||||
#define CSG_ITER_TRACE_CONFIG 0x0054 /* () CSG trace configuration */
|
||||
#define CSG_DVS_BUF_LO 0x0060 /* () Normal mode deferred vertex shading work buffer, low word */
|
||||
#define CSG_DVS_BUF_HI 0x0064 /* () Normal mode deferred vertex shading work buffer, high word */
|
||||
|
||||
/* CSG_OUTPUT_BLOCK register offsets */
|
||||
#define CSG_ACK 0x0000 /* () CSG acknowledge flags */
|
||||
|
|
@ -547,6 +549,13 @@
|
|||
#define CS_STATUS_WAIT_SB_MASK_SET(reg_val, value) \
|
||||
(((reg_val) & ~CS_STATUS_WAIT_SB_MASK_MASK) | \
|
||||
(((value) << CS_STATUS_WAIT_SB_MASK_SHIFT) & CS_STATUS_WAIT_SB_MASK_MASK))
|
||||
#define CS_STATUS_WAIT_SB_SOURCE_SHIFT 16
|
||||
#define CS_STATUS_WAIT_SB_SOURCE_MASK (0xF << CS_STATUS_WAIT_SB_SOURCE_SHIFT)
|
||||
#define CS_STATUS_WAIT_SB_SOURCE_GET(reg_val) \
|
||||
(((reg_val)&CS_STATUS_WAIT_SB_SOURCE_MASK) >> CS_STATUS_WAIT_SB_SOURCE_SHIFT)
|
||||
#define CS_STATUS_WAIT_SB_SOURCE_SET(reg_val, value) \
|
||||
(((reg_val) & ~CS_STATUS_WAIT_SB_SOURCE_MASK) | \
|
||||
(((value) << CS_STATUS_WAIT_SB_SOURCE_SHIFT) & CS_STATUS_WAIT_SB_SOURCE_MASK))
|
||||
#define CS_STATUS_WAIT_SYNC_WAIT_CONDITION_SHIFT 24
|
||||
#define CS_STATUS_WAIT_SYNC_WAIT_CONDITION_MASK (0xF << CS_STATUS_WAIT_SYNC_WAIT_CONDITION_SHIFT)
|
||||
#define CS_STATUS_WAIT_SYNC_WAIT_CONDITION_GET(reg_val) \
|
||||
|
|
@ -557,6 +566,7 @@
|
|||
/* CS_STATUS_WAIT_SYNC_WAIT_CONDITION values */
|
||||
#define CS_STATUS_WAIT_SYNC_WAIT_CONDITION_LE 0x0
|
||||
#define CS_STATUS_WAIT_SYNC_WAIT_CONDITION_GT 0x1
|
||||
#define CS_STATUS_WAIT_SYNC_WAIT_CONDITION_GE 0x5
|
||||
/* End of CS_STATUS_WAIT_SYNC_WAIT_CONDITION values */
|
||||
#define CS_STATUS_WAIT_PROGRESS_WAIT_SHIFT 28
|
||||
#define CS_STATUS_WAIT_PROGRESS_WAIT_MASK (0x1 << CS_STATUS_WAIT_PROGRESS_WAIT_SHIFT)
|
||||
|
|
@ -835,11 +845,6 @@
|
|||
#define CSG_REQ_IDLE_GET(reg_val) (((reg_val)&CSG_REQ_IDLE_MASK) >> CSG_REQ_IDLE_SHIFT)
|
||||
#define CSG_REQ_IDLE_SET(reg_val, value) \
|
||||
(((reg_val) & ~CSG_REQ_IDLE_MASK) | (((value) << CSG_REQ_IDLE_SHIFT) & CSG_REQ_IDLE_MASK))
|
||||
#define CSG_REQ_DOORBELL_SHIFT 30
|
||||
#define CSG_REQ_DOORBELL_MASK (0x1 << CSG_REQ_DOORBELL_SHIFT)
|
||||
#define CSG_REQ_DOORBELL_GET(reg_val) (((reg_val)&CSG_REQ_DOORBELL_MASK) >> CSG_REQ_DOORBELL_SHIFT)
|
||||
#define CSG_REQ_DOORBELL_SET(reg_val, value) \
|
||||
(((reg_val) & ~CSG_REQ_DOORBELL_MASK) | (((value) << CSG_REQ_DOORBELL_SHIFT) & CSG_REQ_DOORBELL_MASK))
|
||||
#define CSG_REQ_PROGRESS_TIMER_EVENT_SHIFT 31
|
||||
#define CSG_REQ_PROGRESS_TIMER_EVENT_MASK (0x1 << CSG_REQ_PROGRESS_TIMER_EVENT_SHIFT)
|
||||
#define CSG_REQ_PROGRESS_TIMER_EVENT_GET(reg_val) \
|
||||
|
|
@ -956,6 +961,21 @@
|
|||
(((reg_val) & ~CSG_PROTM_SUSPEND_BUF_POINTER_MASK) | \
|
||||
(((value) << CSG_PROTM_SUSPEND_BUF_POINTER_SHIFT) & CSG_PROTM_SUSPEND_BUF_POINTER_MASK))
|
||||
|
||||
/* CSG_DVS_BUF_BUFFER register */
|
||||
#define CSG_DVS_BUF_BUFFER_SIZE_SHIFT GPU_U(0)
|
||||
#define CSG_DVS_BUF_BUFFER_SIZE_MASK (GPU_U(0xFFF) << CSG_DVS_BUF_BUFFER_SIZE_SHIFT)
|
||||
#define CSG_DVS_BUF_BUFFER_SIZE_GET(reg_val) (((reg_val)&CSG_DVS_BUF_BUFFER_SIZE_MASK) >> CSG_DVS_BUF_BUFFER_SIZE_SHIFT)
|
||||
#define CSG_DVS_BUF_BUFFER_SIZE_SET(reg_val, value) \
|
||||
(((reg_val) & ~CSG_DVS_BUF_BUFFER_SIZE_MASK) | \
|
||||
(((value) << CSG_DVS_BUF_BUFFER_SIZE_SHIFT) & CSG_DVS_BUF_BUFFER_SIZE_MASK))
|
||||
#define CSG_DVS_BUF_BUFFER_POINTER_SHIFT GPU_U(12)
|
||||
#define CSG_DVS_BUF_BUFFER_POINTER_MASK \
|
||||
(GPU_ULL(0xFFFFFFFFFFFFF) << CSG_DVS_BUF_BUFFER_POINTER_SHIFT)
|
||||
#define CSG_DVS_BUF_BUFFER_POINTER_GET(reg_val) \
|
||||
(((reg_val)&CSG_DVS_BUF_BUFFER_POINTER_MASK) >> CSG_DVS_BUF_BUFFER_POINTER_SHIFT)
|
||||
#define CSG_DVS_BUF_BUFFER_POINTER_SET(reg_val, value) \
|
||||
(((reg_val) & ~CSG_DVS_BUF_BUFFER_POINTER_MASK) | \
|
||||
(((value) << CSG_DVS_BUF_BUFFER_POINTER_SHIFT) & CSG_DVS_BUF_BUFFER_POINTER_MASK))
|
||||
|
||||
/* End of CSG_INPUT_BLOCK register set definitions */
|
||||
|
||||
|
|
|
|||
|
|
@ -1,7 +1,7 @@
|
|||
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
|
||||
/*
|
||||
*
|
||||
* (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved.
|
||||
* (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved.
|
||||
*
|
||||
* This program is free software and is provided to you under the terms of the
|
||||
* GNU General Public License version 2 as published by the Free Software
|
||||
|
|
@ -21,7 +21,7 @@
|
|||
|
||||
#include <mali_kbase.h>
|
||||
#include <mali_kbase_ctx_sched.h>
|
||||
#include <mali_kbase_hwcnt_context.h>
|
||||
#include <hwcnt/mali_kbase_hwcnt_context.h>
|
||||
#include <device/mali_kbase_device.h>
|
||||
#include <backend/gpu/mali_kbase_irq_internal.h>
|
||||
#include <backend/gpu/mali_kbase_pm_internal.h>
|
||||
|
|
@ -29,7 +29,7 @@
|
|||
#include <csf/mali_kbase_csf_trace_buffer.h>
|
||||
#include <csf/ipa_control/mali_kbase_csf_ipa_control.h>
|
||||
#include <mali_kbase_reset_gpu.h>
|
||||
#include <linux/string.h>
|
||||
#include <csf/mali_kbase_csf_firmware_log.h>
|
||||
|
||||
enum kbasep_soft_reset_status {
|
||||
RESET_SUCCESS = 0,
|
||||
|
|
@ -257,68 +257,6 @@ static void kbase_csf_debug_dump_registers(struct kbase_device *kbdev)
|
|||
kbase_reg_read(kbdev, GPU_CONTROL_REG(TILER_CONFIG)));
|
||||
}
|
||||
|
||||
static void kbase_csf_dump_firmware_trace_buffer(struct kbase_device *kbdev)
|
||||
{
|
||||
u8 *buf, *p, *pnewline, *pend, *pendbuf;
|
||||
unsigned int read_size, remaining_size;
|
||||
struct firmware_trace_buffer *tb =
|
||||
kbase_csf_firmware_get_trace_buffer(kbdev, FW_TRACE_BUF_NAME);
|
||||
|
||||
if (tb == NULL) {
|
||||
dev_dbg(kbdev->dev, "Can't get the trace buffer, firmware trace dump skipped");
|
||||
return;
|
||||
}
|
||||
|
||||
buf = kmalloc(PAGE_SIZE + 1, GFP_KERNEL);
|
||||
if (buf == NULL) {
|
||||
dev_err(kbdev->dev, "Short of memory, firmware trace dump skipped");
|
||||
return;
|
||||
}
|
||||
|
||||
buf[PAGE_SIZE] = 0;
|
||||
|
||||
p = buf;
|
||||
pendbuf = &buf[PAGE_SIZE];
|
||||
|
||||
dev_err(kbdev->dev, "Firmware trace buffer dump:");
|
||||
while ((read_size = kbase_csf_firmware_trace_buffer_read_data(tb, p,
|
||||
pendbuf - p))) {
|
||||
pend = p + read_size;
|
||||
p = buf;
|
||||
|
||||
while (p < pend && (pnewline = memchr(p, '\n', pend - p))) {
|
||||
/* Null-terminate the string */
|
||||
*pnewline = 0;
|
||||
|
||||
dev_err(kbdev->dev, "FW> %s", p);
|
||||
|
||||
p = pnewline + 1;
|
||||
}
|
||||
|
||||
remaining_size = pend - p;
|
||||
|
||||
if (!remaining_size) {
|
||||
p = buf;
|
||||
} else if (remaining_size < PAGE_SIZE) {
|
||||
/* Copy unfinished string to the start of the buffer */
|
||||
memmove(buf, p, remaining_size);
|
||||
p = &buf[remaining_size];
|
||||
} else {
|
||||
/* Print abnormal page-long string without newlines */
|
||||
dev_err(kbdev->dev, "FW> %s", buf);
|
||||
p = buf;
|
||||
}
|
||||
}
|
||||
|
||||
if (p != buf) {
|
||||
/* Null-terminate and print last unfinished string */
|
||||
*p = 0;
|
||||
dev_err(kbdev->dev, "FW> %s", buf);
|
||||
}
|
||||
|
||||
kfree(buf);
|
||||
}
|
||||
|
||||
/**
|
||||
* kbase_csf_hwcnt_on_reset_error() - Sets HWCNT to appropriate state in the
|
||||
* event of an error during GPU reset.
|
||||
|
|
@ -378,7 +316,6 @@ static enum kbasep_soft_reset_status kbase_csf_reset_gpu_once(struct kbase_devic
|
|||
"The flush has completed so reset the active indicator\n");
|
||||
kbdev->irq_reset_flush = false;
|
||||
|
||||
mutex_lock(&kbdev->pm.lock);
|
||||
if (!silent)
|
||||
dev_err(kbdev->dev, "Resetting GPU (allowing up to %d ms)",
|
||||
RESET_TIMEOUT);
|
||||
|
|
@ -389,7 +326,7 @@ static enum kbasep_soft_reset_status kbase_csf_reset_gpu_once(struct kbase_devic
|
|||
if (!silent) {
|
||||
kbase_csf_debug_dump_registers(kbdev);
|
||||
if (likely(firmware_inited))
|
||||
kbase_csf_dump_firmware_trace_buffer(kbdev);
|
||||
kbase_csf_firmware_log_dump_buffer(kbdev);
|
||||
}
|
||||
|
||||
spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
|
||||
|
|
@ -403,6 +340,7 @@ static enum kbasep_soft_reset_status kbase_csf_reset_gpu_once(struct kbase_devic
|
|||
*/
|
||||
kbase_hwcnt_backend_csf_on_before_reset(&kbdev->hwcnt_gpu_iface);
|
||||
|
||||
mutex_lock(&kbdev->pm.lock);
|
||||
/* Reset the GPU */
|
||||
err = kbase_pm_init_hw(kbdev, 0);
|
||||
|
||||
|
|
@ -633,6 +571,11 @@ bool kbase_reset_gpu_is_active(struct kbase_device *kbdev)
|
|||
return kbase_csf_reset_state_is_active(reset_state);
|
||||
}
|
||||
|
||||
bool kbase_reset_gpu_is_not_pending(struct kbase_device *kbdev)
|
||||
{
|
||||
return atomic_read(&kbdev->csf.reset.state) == KBASE_CSF_RESET_GPU_NOT_PENDING;
|
||||
}
|
||||
|
||||
int kbase_reset_gpu_wait(struct kbase_device *kbdev)
|
||||
{
|
||||
const long wait_timeout =
|
||||
|
|
|
|||
File diff suppressed because it is too large
Load Diff
|
|
@ -36,7 +36,9 @@
|
|||
* If the CSG is already scheduled and resident, the CSI will be started
|
||||
* right away, otherwise once the group is made resident.
|
||||
*
|
||||
* Return: 0 on success, or negative on failure.
|
||||
* Return: 0 on success, or negative on failure. -EBUSY is returned to
|
||||
* indicate to the caller that queue could not be enabled due to Scheduler
|
||||
* state and the caller can try to enable the queue after sometime.
|
||||
*/
|
||||
int kbase_csf_scheduler_queue_start(struct kbase_queue *queue);
|
||||
|
||||
|
|
@ -530,12 +532,30 @@ static inline void kbase_csf_scheduler_invoke_tick(struct kbase_device *kbdev)
|
|||
struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler;
|
||||
unsigned long flags;
|
||||
|
||||
KBASE_KTRACE_ADD(kbdev, SCHEDULER_TICK_INVOKE, NULL, 0u);
|
||||
spin_lock_irqsave(&scheduler->interrupt_lock, flags);
|
||||
if (!scheduler->tick_timer_active)
|
||||
queue_work(scheduler->wq, &scheduler->tick_work);
|
||||
spin_unlock_irqrestore(&scheduler->interrupt_lock, flags);
|
||||
}
|
||||
|
||||
/**
|
||||
* kbase_csf_scheduler_invoke_tock() - Invoke the scheduling tock
|
||||
*
|
||||
* @kbdev: Pointer to the device
|
||||
*
|
||||
* This function will queue the scheduling tock work item for immediate
|
||||
* execution.
|
||||
*/
|
||||
static inline void kbase_csf_scheduler_invoke_tock(struct kbase_device *kbdev)
|
||||
{
|
||||
struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler;
|
||||
|
||||
KBASE_KTRACE_ADD(kbdev, SCHEDULER_TOCK_INVOKE, NULL, 0u);
|
||||
if (atomic_cmpxchg(&scheduler->pending_tock_work, false, true) == false)
|
||||
mod_delayed_work(scheduler->wq, &scheduler->tock_work, 0);
|
||||
}
|
||||
|
||||
/**
|
||||
* kbase_csf_scheduler_queue_has_trace() - report whether the queue has been
|
||||
* configured to operate with the
|
||||
|
|
|
|||
File diff suppressed because it is too large
Load Diff
|
|
@ -23,7 +23,6 @@
|
|||
#define _KBASE_CSF_TILER_HEAP_H_
|
||||
|
||||
#include <mali_kbase.h>
|
||||
|
||||
/**
|
||||
* kbase_csf_tiler_heap_context_init - Initialize the tiler heaps context for a
|
||||
* GPU address space
|
||||
|
|
@ -58,6 +57,12 @@ void kbase_csf_tiler_heap_context_term(struct kbase_context *kctx);
|
|||
* @target_in_flight: Number of render-passes that the driver should attempt to
|
||||
* keep in flight for which allocation of new chunks is
|
||||
* allowed. Must not be zero.
|
||||
* @buf_desc_va: Buffer descriptor GPU virtual address. This is a hint for
|
||||
* indicating that the caller is intending to perform tiler heap
|
||||
* chunks reclaim for those that are hoarded with hardware while
|
||||
* the associated shader activites are suspended and the CSGs are
|
||||
* off slots. If the referred reclaiming is not desired, can
|
||||
* set it to 0.
|
||||
* @gpu_heap_va: Where to store the GPU virtual address of the context that was
|
||||
* set up for the tiler heap.
|
||||
* @first_chunk_va: Where to store the GPU virtual address of the first chunk
|
||||
|
|
@ -66,10 +71,9 @@ void kbase_csf_tiler_heap_context_term(struct kbase_context *kctx);
|
|||
*
|
||||
* Return: 0 if successful or a negative error code on failure.
|
||||
*/
|
||||
int kbase_csf_tiler_heap_init(struct kbase_context *kctx,
|
||||
u32 chunk_size, u32 initial_chunks, u32 max_chunks,
|
||||
u16 target_in_flight, u64 *gpu_heap_va,
|
||||
u64 *first_chunk_va);
|
||||
int kbase_csf_tiler_heap_init(struct kbase_context *kctx, u32 chunk_size, u32 initial_chunks,
|
||||
u32 max_chunks, u16 target_in_flight, u64 const buf_desc_va,
|
||||
u64 *gpu_heap_va, u64 *first_chunk_va);
|
||||
|
||||
/**
|
||||
* kbase_csf_tiler_heap_term - Terminate a chunked tiler memory heap.
|
||||
|
|
@ -112,4 +116,27 @@ int kbase_csf_tiler_heap_term(struct kbase_context *kctx, u64 gpu_heap_va);
|
|||
*/
|
||||
int kbase_csf_tiler_heap_alloc_new_chunk(struct kbase_context *kctx,
|
||||
u64 gpu_heap_va, u32 nr_in_flight, u32 pending_frag_count, u64 *new_chunk_ptr);
|
||||
|
||||
/**
|
||||
* kbase_csf_tiler_heap_scan_kctx_unused_pages - Performs the tiler heap shrinker calim's scan
|
||||
* functionality.
|
||||
*
|
||||
* @kctx: Pointer to the kbase context for which the tiler heap recalim is to be
|
||||
* operated with.
|
||||
* @to_free: Number of pages suggested for the reclaim scan (free) method to reach.
|
||||
*
|
||||
* Return: the actual number of pages the scan method has freed from the call.
|
||||
*/
|
||||
u32 kbase_csf_tiler_heap_scan_kctx_unused_pages(struct kbase_context *kctx, u32 to_free);
|
||||
|
||||
/**
|
||||
* kbase_csf_tiler_heap_count_kctx_unused_pages - Performs the tiler heap shrinker calim's count
|
||||
* functionality.
|
||||
*
|
||||
* @kctx: Pointer to the kbase context for which the tiler heap recalim is to be
|
||||
* operated with.
|
||||
*
|
||||
* Return: a number of pages that could likely be freed on the subsequent scan method call.
|
||||
*/
|
||||
u32 kbase_csf_tiler_heap_count_kctx_unused_pages(struct kbase_context *kctx);
|
||||
#endif
|
||||
|
|
|
|||
|
|
@ -56,12 +56,20 @@
|
|||
((CHUNK_HDR_NEXT_ADDR_MASK >> CHUNK_HDR_NEXT_ADDR_POS) << \
|
||||
CHUNK_HDR_NEXT_ADDR_ENCODE_SHIFT)
|
||||
|
||||
/* The size of the area needed to be vmapped prior to handing the tiler heap
|
||||
* over to the tiler, so that the shrinker could be invoked.
|
||||
*/
|
||||
#define NEXT_CHUNK_ADDR_SIZE (sizeof(u64))
|
||||
|
||||
/**
|
||||
* struct kbase_csf_tiler_heap_chunk - A tiler heap chunk managed by the kernel
|
||||
*
|
||||
* @link: Link to this chunk in a list of chunks belonging to a
|
||||
* @kbase_csf_tiler_heap.
|
||||
* @region: Pointer to the GPU memory region allocated for the chunk.
|
||||
* @map: Kernel VA mapping so that we would not need to use vmap in the
|
||||
* shrinker callback, which can allocate. This maps only the header
|
||||
* of the chunk, so it could be traversed.
|
||||
* @gpu_va: GPU virtual address of the start of the memory region.
|
||||
* This points to the header of the chunk and not to the low address
|
||||
* of free memory within it.
|
||||
|
|
@ -75,9 +83,12 @@
|
|||
struct kbase_csf_tiler_heap_chunk {
|
||||
struct list_head link;
|
||||
struct kbase_va_region *region;
|
||||
struct kbase_vmap_struct map;
|
||||
u64 gpu_va;
|
||||
};
|
||||
|
||||
#define HEAP_BUF_DESCRIPTOR_CHECKED (1 << 0)
|
||||
|
||||
/**
|
||||
* struct kbase_csf_tiler_heap - A tiler heap managed by the kernel
|
||||
*
|
||||
|
|
@ -85,6 +96,20 @@ struct kbase_csf_tiler_heap_chunk {
|
|||
* associated.
|
||||
* @link: Link to this heap in a list of tiler heaps belonging to
|
||||
* the @kbase_csf_tiler_heap_context.
|
||||
* @chunks_list: Linked list of allocated chunks.
|
||||
* @gpu_va: The GPU virtual address of the heap context structure that
|
||||
* was allocated for the firmware. This is also used to
|
||||
* uniquely identify the heap.
|
||||
* @heap_id: Unique id representing the heap, assigned during heap
|
||||
* initialization.
|
||||
* @buf_desc_va: Buffer descriptor GPU VA. Can be 0 for backward compatible
|
||||
* to earlier version base interfaces.
|
||||
* @buf_desc_reg: Pointer to the VA region that covers the provided buffer
|
||||
* descriptor memory object pointed to by buf_desc_va.
|
||||
* @gpu_va_map: Kernel VA mapping of the GPU VA region.
|
||||
* @buf_desc_map: Kernel VA mapping of the buffer descriptor, read from
|
||||
* during the tiler heap shrinker. Sync operations may need
|
||||
* to be done before each read.
|
||||
* @chunk_size: Size of each chunk, in bytes. Must be page-aligned.
|
||||
* @chunk_count: The number of chunks currently allocated. Must not be
|
||||
* zero or greater than @max_chunks.
|
||||
|
|
@ -93,22 +118,23 @@ struct kbase_csf_tiler_heap_chunk {
|
|||
* @target_in_flight: Number of render-passes that the driver should attempt
|
||||
* to keep in flight for which allocation of new chunks is
|
||||
* allowed. Must not be zero.
|
||||
* @gpu_va: The GPU virtual address of the heap context structure that
|
||||
* was allocated for the firmware. This is also used to
|
||||
* uniquely identify the heap.
|
||||
* @heap_id: Unique id representing the heap, assigned during heap
|
||||
* initialization.
|
||||
* @chunks_list: Linked list of allocated chunks.
|
||||
* @buf_desc_checked: Indicates if runtime check on buffer descriptor has been done.
|
||||
*/
|
||||
struct kbase_csf_tiler_heap {
|
||||
struct kbase_context *kctx;
|
||||
struct list_head link;
|
||||
struct list_head chunks_list;
|
||||
u64 gpu_va;
|
||||
u64 heap_id;
|
||||
u64 buf_desc_va;
|
||||
struct kbase_va_region *buf_desc_reg;
|
||||
struct kbase_vmap_struct buf_desc_map;
|
||||
struct kbase_vmap_struct gpu_va_map;
|
||||
u32 chunk_size;
|
||||
u32 chunk_count;
|
||||
u32 max_chunks;
|
||||
u16 target_in_flight;
|
||||
u64 gpu_va;
|
||||
u64 heap_id;
|
||||
struct list_head chunks_list;
|
||||
bool buf_desc_checked;
|
||||
};
|
||||
|
||||
#endif /* !_KBASE_CSF_TILER_HEAP_DEF_H_ */
|
||||
|
|
|
|||
367
drivers/gpu/arm/bifrost/csf/mali_kbase_csf_tiler_heap_reclaim.c
Normal file
367
drivers/gpu/arm/bifrost/csf/mali_kbase_csf_tiler_heap_reclaim.c
Normal file
|
|
@ -0,0 +1,367 @@
|
|||
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
|
||||
/*
|
||||
*
|
||||
* (C) COPYRIGHT 2022 ARM Limited. All rights reserved.
|
||||
*
|
||||
* This program is free software and is provided to you under the terms of the
|
||||
* GNU General Public License version 2 as published by the Free Software
|
||||
* Foundation, and any use by you of this program is subject to the terms
|
||||
* of such GNU license.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, you can access it online at
|
||||
* http://www.gnu.org/licenses/gpl-2.0.html.
|
||||
*
|
||||
*/
|
||||
|
||||
#include <mali_kbase.h>
|
||||
#include "mali_kbase_csf.h"
|
||||
#include "mali_kbase_csf_tiler_heap.h"
|
||||
#include "mali_kbase_csf_tiler_heap_reclaim.h"
|
||||
|
||||
/* Tiler heap shrinker seek value, needs to be higher than jit and memory pools */
|
||||
#define HEAP_SHRINKER_SEEKS (DEFAULT_SEEKS + 2)
|
||||
|
||||
/* Tiler heap shrinker batch value */
|
||||
#define HEAP_SHRINKER_BATCH (512)
|
||||
|
||||
/* Tiler heap reclaim scan (free) method size for limiting a scan run length */
|
||||
#define HEAP_RECLAIM_SCAN_BATCH_SIZE (HEAP_SHRINKER_BATCH << 7)
|
||||
|
||||
static u8 get_kctx_highest_csg_priority(struct kbase_context *kctx)
|
||||
{
|
||||
u8 prio;
|
||||
|
||||
for (prio = KBASE_QUEUE_GROUP_PRIORITY_REALTIME; prio < KBASE_QUEUE_GROUP_PRIORITY_LOW;
|
||||
prio++)
|
||||
if (!list_empty(&kctx->csf.sched.runnable_groups[prio]))
|
||||
break;
|
||||
|
||||
if (prio != KBASE_QUEUE_GROUP_PRIORITY_REALTIME && kctx->csf.sched.num_idle_wait_grps) {
|
||||
struct kbase_queue_group *group;
|
||||
|
||||
list_for_each_entry(group, &kctx->csf.sched.idle_wait_groups, link) {
|
||||
if (group->priority < prio)
|
||||
prio = group->priority;
|
||||
}
|
||||
}
|
||||
|
||||
return prio;
|
||||
}
|
||||
|
||||
static void detach_ctx_from_heap_reclaim_mgr(struct kbase_context *kctx)
|
||||
{
|
||||
struct kbase_csf_scheduler *const scheduler = &kctx->kbdev->csf.scheduler;
|
||||
struct kbase_csf_ctx_heap_reclaim_info *info = &kctx->csf.sched.heap_info;
|
||||
|
||||
lockdep_assert_held(&scheduler->lock);
|
||||
|
||||
if (!list_empty(&info->mgr_link)) {
|
||||
u32 remaining = (info->nr_est_unused_pages > info->nr_freed_pages) ?
|
||||
info->nr_est_unused_pages - info->nr_freed_pages :
|
||||
0;
|
||||
|
||||
list_del_init(&info->mgr_link);
|
||||
if (remaining)
|
||||
WARN_ON(atomic_sub_return(remaining, &scheduler->reclaim_mgr.unused_pages) <
|
||||
0);
|
||||
|
||||
dev_dbg(kctx->kbdev->dev,
|
||||
"Reclaim_mgr_detach: ctx_%d_%d, est_pages=0%u, freed_pages=%u", kctx->tgid,
|
||||
kctx->id, info->nr_est_unused_pages, info->nr_freed_pages);
|
||||
}
|
||||
}
|
||||
|
||||
static void attach_ctx_to_heap_reclaim_mgr(struct kbase_context *kctx)
|
||||
{
|
||||
struct kbase_csf_ctx_heap_reclaim_info *const info = &kctx->csf.sched.heap_info;
|
||||
struct kbase_csf_scheduler *const scheduler = &kctx->kbdev->csf.scheduler;
|
||||
u8 const prio = get_kctx_highest_csg_priority(kctx);
|
||||
|
||||
lockdep_assert_held(&scheduler->lock);
|
||||
|
||||
if (WARN_ON(!list_empty(&info->mgr_link)))
|
||||
list_del_init(&info->mgr_link);
|
||||
|
||||
/* Count the pages that could be freed */
|
||||
info->nr_est_unused_pages = kbase_csf_tiler_heap_count_kctx_unused_pages(kctx);
|
||||
/* Initialize the scan operation tracking pages */
|
||||
info->nr_freed_pages = 0;
|
||||
|
||||
list_add_tail(&info->mgr_link, &scheduler->reclaim_mgr.ctx_lists[prio]);
|
||||
/* Accumulate the estimated pages to the manager total field */
|
||||
atomic_add(info->nr_est_unused_pages, &scheduler->reclaim_mgr.unused_pages);
|
||||
|
||||
dev_dbg(kctx->kbdev->dev, "Reclaim_mgr_attach: ctx_%d_%d, est_count_pages=%u", kctx->tgid,
|
||||
kctx->id, info->nr_est_unused_pages);
|
||||
}
|
||||
|
||||
void kbase_csf_tiler_heap_reclaim_sched_notify_grp_active(struct kbase_queue_group *group)
|
||||
{
|
||||
struct kbase_context *kctx = group->kctx;
|
||||
struct kbase_csf_ctx_heap_reclaim_info *info = &kctx->csf.sched.heap_info;
|
||||
|
||||
lockdep_assert_held(&kctx->kbdev->csf.scheduler.lock);
|
||||
|
||||
info->on_slot_grps++;
|
||||
/* If the kctx has an on-slot change from 0 => 1, detach it from reclaim_mgr */
|
||||
if (info->on_slot_grps == 1) {
|
||||
dev_dbg(kctx->kbdev->dev, "CSG_%d_%d_%d on-slot, remove kctx from reclaim manager",
|
||||
group->kctx->tgid, group->kctx->id, group->handle);
|
||||
|
||||
detach_ctx_from_heap_reclaim_mgr(kctx);
|
||||
}
|
||||
}
|
||||
|
||||
void kbase_csf_tiler_heap_reclaim_sched_notify_grp_evict(struct kbase_queue_group *group)
|
||||
{
|
||||
struct kbase_context *kctx = group->kctx;
|
||||
struct kbase_csf_ctx_heap_reclaim_info *const info = &kctx->csf.sched.heap_info;
|
||||
struct kbase_csf_scheduler *const scheduler = &kctx->kbdev->csf.scheduler;
|
||||
const u32 num_groups = kctx->kbdev->csf.global_iface.group_num;
|
||||
u32 on_slot_grps = 0;
|
||||
u32 i;
|
||||
|
||||
lockdep_assert_held(&scheduler->lock);
|
||||
|
||||
/* Group eviction from the scheduler is a bit more complex, but fairly less
|
||||
* frequent in operations. Taking the opportunity to actually count the
|
||||
* on-slot CSGs from the given kctx, for robustness and clearer code logic.
|
||||
*/
|
||||
for_each_set_bit(i, scheduler->csg_inuse_bitmap, num_groups) {
|
||||
struct kbase_csf_csg_slot *csg_slot = &scheduler->csg_slots[i];
|
||||
struct kbase_queue_group *grp = csg_slot->resident_group;
|
||||
|
||||
if (unlikely(!grp))
|
||||
continue;
|
||||
|
||||
if (grp->kctx == kctx)
|
||||
on_slot_grps++;
|
||||
}
|
||||
|
||||
info->on_slot_grps = on_slot_grps;
|
||||
|
||||
/* If the kctx has no other CSGs on-slot, handle the heap reclaim related actions */
|
||||
if (!info->on_slot_grps) {
|
||||
if (kctx->csf.sched.num_runnable_grps || kctx->csf.sched.num_idle_wait_grps) {
|
||||
/* The kctx has other operational CSGs, attach it if not yet done */
|
||||
if (list_empty(&info->mgr_link)) {
|
||||
dev_dbg(kctx->kbdev->dev,
|
||||
"CSG_%d_%d_%d evict, add kctx to reclaim manager",
|
||||
group->kctx->tgid, group->kctx->id, group->handle);
|
||||
|
||||
attach_ctx_to_heap_reclaim_mgr(kctx);
|
||||
}
|
||||
} else {
|
||||
/* The kctx is a zombie after the group eviction, drop it out */
|
||||
dev_dbg(kctx->kbdev->dev,
|
||||
"CSG_%d_%d_%d evict leading to zombie kctx, dettach from reclaim manager",
|
||||
group->kctx->tgid, group->kctx->id, group->handle);
|
||||
|
||||
detach_ctx_from_heap_reclaim_mgr(kctx);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void kbase_csf_tiler_heap_reclaim_sched_notify_grp_suspend(struct kbase_queue_group *group)
|
||||
{
|
||||
struct kbase_context *kctx = group->kctx;
|
||||
struct kbase_csf_ctx_heap_reclaim_info *info = &kctx->csf.sched.heap_info;
|
||||
|
||||
lockdep_assert_held(&kctx->kbdev->csf.scheduler.lock);
|
||||
|
||||
if (!WARN_ON(info->on_slot_grps == 0))
|
||||
info->on_slot_grps--;
|
||||
/* If the kctx has no CSGs on-slot, attach it to scheduler's reclaim manager */
|
||||
if (info->on_slot_grps == 0) {
|
||||
dev_dbg(kctx->kbdev->dev, "CSG_%d_%d_%d off-slot, add kctx to reclaim manager",
|
||||
group->kctx->tgid, group->kctx->id, group->handle);
|
||||
|
||||
attach_ctx_to_heap_reclaim_mgr(kctx);
|
||||
}
|
||||
}
|
||||
|
||||
static unsigned long reclaim_unused_heap_pages(struct kbase_device *kbdev)
|
||||
{
|
||||
struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler;
|
||||
struct kbase_csf_sched_heap_reclaim_mgr *const mgr = &scheduler->reclaim_mgr;
|
||||
unsigned long total_freed_pages = 0;
|
||||
int prio;
|
||||
|
||||
lockdep_assert_held(&kbdev->csf.scheduler.lock);
|
||||
|
||||
for (prio = KBASE_QUEUE_GROUP_PRIORITY_LOW;
|
||||
total_freed_pages < HEAP_RECLAIM_SCAN_BATCH_SIZE &&
|
||||
prio >= KBASE_QUEUE_GROUP_PRIORITY_REALTIME;
|
||||
prio--) {
|
||||
struct kbase_csf_ctx_heap_reclaim_info *info, *tmp;
|
||||
u32 cnt_ctxs = 0;
|
||||
|
||||
list_for_each_entry_safe(info, tmp, &scheduler->reclaim_mgr.ctx_lists[prio],
|
||||
mgr_link) {
|
||||
struct kbase_context *kctx =
|
||||
container_of(info, struct kbase_context, csf.sched.heap_info);
|
||||
u32 freed_pages = kbase_csf_tiler_heap_scan_kctx_unused_pages(
|
||||
kctx, info->nr_est_unused_pages);
|
||||
|
||||
if (freed_pages) {
|
||||
/* Remove the freed pages from the manager retained estimate. The
|
||||
* accumulated removals from the kctx should not exceed the kctx
|
||||
* initially notified contribution amount:
|
||||
* info->nr_est_unused_pages.
|
||||
*/
|
||||
u32 rm_cnt = MIN(info->nr_est_unused_pages - info->nr_freed_pages,
|
||||
freed_pages);
|
||||
|
||||
WARN_ON(atomic_sub_return(rm_cnt, &mgr->unused_pages) < 0);
|
||||
|
||||
/* tracking the freed pages, before a potential detach call */
|
||||
info->nr_freed_pages += freed_pages;
|
||||
total_freed_pages += freed_pages;
|
||||
|
||||
schedule_work(&kctx->jit_work);
|
||||
}
|
||||
|
||||
/* If the kctx can't offer anymore, drop it from the reclaim manger,
|
||||
* otherwise leave it remaining in. If the kctx changes its state (i.e.
|
||||
* some CSGs becoming on-slot), the scheduler will pull it out.
|
||||
*/
|
||||
if (info->nr_freed_pages >= info->nr_est_unused_pages || freed_pages == 0)
|
||||
detach_ctx_from_heap_reclaim_mgr(kctx);
|
||||
|
||||
cnt_ctxs++;
|
||||
|
||||
/* Enough has been freed, break to avoid holding the lock too long */
|
||||
if (total_freed_pages >= HEAP_RECLAIM_SCAN_BATCH_SIZE)
|
||||
break;
|
||||
}
|
||||
|
||||
dev_dbg(kbdev->dev, "Reclaim free heap pages: %lu (cnt_ctxs: %u, prio: %d)",
|
||||
total_freed_pages, cnt_ctxs, prio);
|
||||
}
|
||||
|
||||
dev_dbg(kbdev->dev, "Reclaim free total heap pages: %lu (across all CSG priority)",
|
||||
total_freed_pages);
|
||||
|
||||
return total_freed_pages;
|
||||
}
|
||||
|
||||
static unsigned long kbase_csf_tiler_heap_reclaim_count_free_pages(struct kbase_device *kbdev,
|
||||
struct shrink_control *sc)
|
||||
{
|
||||
struct kbase_csf_sched_heap_reclaim_mgr *mgr = &kbdev->csf.scheduler.reclaim_mgr;
|
||||
unsigned long page_cnt = atomic_read(&mgr->unused_pages);
|
||||
|
||||
dev_dbg(kbdev->dev, "Reclaim count unused pages (estimate): %lu", page_cnt);
|
||||
|
||||
return page_cnt;
|
||||
}
|
||||
|
||||
static unsigned long kbase_csf_tiler_heap_reclaim_scan_free_pages(struct kbase_device *kbdev,
|
||||
struct shrink_control *sc)
|
||||
{
|
||||
struct kbase_csf_sched_heap_reclaim_mgr *mgr = &kbdev->csf.scheduler.reclaim_mgr;
|
||||
unsigned long freed = 0;
|
||||
unsigned long avail = 0;
|
||||
|
||||
/* If Scheduler is busy in action, return 0 */
|
||||
if (!mutex_trylock(&kbdev->csf.scheduler.lock)) {
|
||||
struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler;
|
||||
|
||||
/* Wait for roughly 2-ms */
|
||||
wait_event_timeout(kbdev->csf.event_wait, (scheduler->state != SCHED_BUSY),
|
||||
msecs_to_jiffies(2));
|
||||
if (!mutex_trylock(&kbdev->csf.scheduler.lock)) {
|
||||
dev_dbg(kbdev->dev, "Tiler heap reclaim scan see device busy (freed: 0)");
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
avail = atomic_read(&mgr->unused_pages);
|
||||
if (avail)
|
||||
freed = reclaim_unused_heap_pages(kbdev);
|
||||
|
||||
mutex_unlock(&kbdev->csf.scheduler.lock);
|
||||
|
||||
#if (KERNEL_VERSION(4, 14, 0) <= LINUX_VERSION_CODE)
|
||||
if (freed > sc->nr_to_scan)
|
||||
sc->nr_scanned = freed;
|
||||
#endif /* (KERNEL_VERSION(4, 14, 0) <= LINUX_VERSION_CODE) */
|
||||
|
||||
dev_info(kbdev->dev, "Tiler heap reclaim scan freed pages: %lu (unused: %lu)", freed,
|
||||
avail);
|
||||
|
||||
/* On estimate suggesting available, yet actual free failed, return STOP */
|
||||
if (avail && !freed)
|
||||
return SHRINK_STOP;
|
||||
else
|
||||
return freed;
|
||||
}
|
||||
|
||||
static unsigned long kbase_csf_tiler_heap_reclaim_count_objects(struct shrinker *s,
|
||||
struct shrink_control *sc)
|
||||
{
|
||||
struct kbase_device *kbdev =
|
||||
container_of(s, struct kbase_device, csf.scheduler.reclaim_mgr.heap_reclaim);
|
||||
|
||||
return kbase_csf_tiler_heap_reclaim_count_free_pages(kbdev, sc);
|
||||
}
|
||||
|
||||
static unsigned long kbase_csf_tiler_heap_reclaim_scan_objects(struct shrinker *s,
|
||||
struct shrink_control *sc)
|
||||
{
|
||||
struct kbase_device *kbdev =
|
||||
container_of(s, struct kbase_device, csf.scheduler.reclaim_mgr.heap_reclaim);
|
||||
|
||||
return kbase_csf_tiler_heap_reclaim_scan_free_pages(kbdev, sc);
|
||||
}
|
||||
|
||||
void kbase_csf_tiler_heap_reclaim_ctx_init(struct kbase_context *kctx)
|
||||
{
|
||||
/* Per-kctx heap_info object initialization */
|
||||
memset(&kctx->csf.sched.heap_info, 0, sizeof(struct kbase_csf_ctx_heap_reclaim_info));
|
||||
INIT_LIST_HEAD(&kctx->csf.sched.heap_info.mgr_link);
|
||||
}
|
||||
|
||||
void kbase_csf_tiler_heap_reclaim_mgr_init(struct kbase_device *kbdev)
|
||||
{
|
||||
struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
|
||||
struct shrinker *reclaim = &scheduler->reclaim_mgr.heap_reclaim;
|
||||
u8 prio;
|
||||
|
||||
for (prio = KBASE_QUEUE_GROUP_PRIORITY_REALTIME; prio < KBASE_QUEUE_GROUP_PRIORITY_COUNT;
|
||||
prio++)
|
||||
INIT_LIST_HEAD(&scheduler->reclaim_mgr.ctx_lists[prio]);
|
||||
|
||||
atomic_set(&scheduler->reclaim_mgr.unused_pages, 0);
|
||||
|
||||
reclaim->count_objects = kbase_csf_tiler_heap_reclaim_count_objects;
|
||||
reclaim->scan_objects = kbase_csf_tiler_heap_reclaim_scan_objects;
|
||||
reclaim->seeks = HEAP_SHRINKER_SEEKS;
|
||||
reclaim->batch = HEAP_SHRINKER_BATCH;
|
||||
|
||||
#if !defined(CONFIG_MALI_VECTOR_DUMP)
|
||||
register_shrinker(reclaim);
|
||||
#endif
|
||||
}
|
||||
|
||||
void kbase_csf_tiler_heap_reclaim_mgr_term(struct kbase_device *kbdev)
|
||||
{
|
||||
struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
|
||||
u8 prio;
|
||||
|
||||
#if !defined(CONFIG_MALI_VECTOR_DUMP)
|
||||
unregister_shrinker(&scheduler->reclaim_mgr.heap_reclaim);
|
||||
#endif
|
||||
|
||||
for (prio = KBASE_QUEUE_GROUP_PRIORITY_REALTIME; prio < KBASE_QUEUE_GROUP_PRIORITY_COUNT;
|
||||
prio++)
|
||||
WARN_ON(!list_empty(&scheduler->reclaim_mgr.ctx_lists[prio]));
|
||||
|
||||
WARN_ON(atomic_read(&scheduler->reclaim_mgr.unused_pages));
|
||||
}
|
||||
|
|
@ -0,0 +1,80 @@
|
|||
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
|
||||
/*
|
||||
*
|
||||
* (C) COPYRIGHT 2022 ARM Limited. All rights reserved.
|
||||
*
|
||||
* This program is free software and is provided to you under the terms of the
|
||||
* GNU General Public License version 2 as published by the Free Software
|
||||
* Foundation, and any use by you of this program is subject to the terms
|
||||
* of such GNU license.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, you can access it online at
|
||||
* http://www.gnu.org/licenses/gpl-2.0.html.
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef _KBASE_CSF_TILER_HEAP_RECLAIM_H_
|
||||
#define _KBASE_CSF_TILER_HEAP_RECLAIM_H_
|
||||
|
||||
#include <mali_kbase.h>
|
||||
|
||||
/**
|
||||
* kbase_csf_tiler_heap_reclaim_sched_notify_grp_active - Notifier function for the scheduler
|
||||
* to use when a group is put on-slot.
|
||||
*
|
||||
* @group: Pointer to the group object that has been placed on-slot for running.
|
||||
*
|
||||
*/
|
||||
void kbase_csf_tiler_heap_reclaim_sched_notify_grp_active(struct kbase_queue_group *group);
|
||||
|
||||
/**
|
||||
* kbase_csf_tiler_heap_reclaim_sched_notify_grp_evict - Notifier function for the scheduler
|
||||
* to use when a group is evicted out of the schedulder's scope, i.e no run of
|
||||
* the group is possible afterwards.
|
||||
*
|
||||
* @group: Pointer to the group object that has been evicted.
|
||||
*
|
||||
*/
|
||||
void kbase_csf_tiler_heap_reclaim_sched_notify_grp_evict(struct kbase_queue_group *group);
|
||||
|
||||
/**
|
||||
* kbase_csf_tiler_heap_reclaim_sched_notify_grp_suspend - Notifier function for the scheduler
|
||||
* to use when a group is suspended from running, but could resume in future.
|
||||
*
|
||||
* @group: Pointer to the group object that is in suspended state.
|
||||
*
|
||||
*/
|
||||
void kbase_csf_tiler_heap_reclaim_sched_notify_grp_suspend(struct kbase_queue_group *group);
|
||||
|
||||
/**
|
||||
* kbase_csf_tiler_heap_reclaim_ctx_init - Initializer on per context data fields for use
|
||||
* with the tiler heap reclaim manager.
|
||||
*
|
||||
* @kctx: Pointer to the kbase_context.
|
||||
*
|
||||
*/
|
||||
void kbase_csf_tiler_heap_reclaim_ctx_init(struct kbase_context *kctx);
|
||||
|
||||
/**
|
||||
* kbase_csf_tiler_heap_reclaim_mgr_init - Initializer for the tiler heap reclaim manger.
|
||||
*
|
||||
* @kbdev: Pointer to the device.
|
||||
*
|
||||
*/
|
||||
void kbase_csf_tiler_heap_reclaim_mgr_init(struct kbase_device *kbdev);
|
||||
|
||||
/**
|
||||
* kbase_csf_tiler_heap_reclaim_mgr_term - Termination call for the tiler heap reclaim manger.
|
||||
*
|
||||
* @kbdev: Pointer to the device.
|
||||
*
|
||||
*/
|
||||
void kbase_csf_tiler_heap_reclaim_mgr_term(struct kbase_device *kbdev);
|
||||
|
||||
#endif
|
||||
|
|
@ -88,13 +88,11 @@ DEFINE_DEBUGFS_ATTRIBUTE(kbase_csf_tl_poll_interval_fops,
|
|||
kbase_csf_tl_debugfs_poll_interval_read,
|
||||
kbase_csf_tl_debugfs_poll_interval_write, "%llu\n");
|
||||
|
||||
|
||||
void kbase_csf_tl_reader_debugfs_init(struct kbase_device *kbdev)
|
||||
{
|
||||
debugfs_create_file("csf_tl_poll_interval_in_ms", 0644,
|
||||
kbdev->debugfs_instr_directory, kbdev,
|
||||
&kbase_csf_tl_poll_interval_fops);
|
||||
|
||||
}
|
||||
#endif
|
||||
|
||||
|
|
@ -166,11 +164,10 @@ static int kbase_ts_converter_init(
|
|||
*
|
||||
* Return: The CPU timestamp.
|
||||
*/
|
||||
static void __maybe_unused
|
||||
kbase_ts_converter_convert(const struct kbase_ts_converter *self, u64 *gpu_ts)
|
||||
static u64 __maybe_unused
|
||||
kbase_ts_converter_convert(const struct kbase_ts_converter *self, u64 gpu_ts)
|
||||
{
|
||||
u64 old_gpu_ts = *gpu_ts;
|
||||
*gpu_ts = div64_u64(old_gpu_ts * self->multiplier, self->divisor) +
|
||||
return div64_u64(gpu_ts * self->multiplier, self->divisor) +
|
||||
self->offset;
|
||||
}
|
||||
|
||||
|
|
@ -250,7 +247,6 @@ static void tl_reader_reset(struct kbase_csf_tl_reader *self)
|
|||
self->tl_header.btc = 0;
|
||||
}
|
||||
|
||||
|
||||
int kbase_csf_tl_reader_flush_buffer(struct kbase_csf_tl_reader *self)
|
||||
{
|
||||
int ret = 0;
|
||||
|
|
@ -275,7 +271,6 @@ int kbase_csf_tl_reader_flush_buffer(struct kbase_csf_tl_reader *self)
|
|||
return -EBUSY;
|
||||
}
|
||||
|
||||
|
||||
/* Copying the whole buffer in a single shot. We assume
|
||||
* that the buffer will not contain partially written messages.
|
||||
*/
|
||||
|
|
@ -326,8 +321,8 @@ int kbase_csf_tl_reader_flush_buffer(struct kbase_csf_tl_reader *self)
|
|||
{
|
||||
struct kbase_csffw_tl_message *msg =
|
||||
(struct kbase_csffw_tl_message *) csffw_data_it;
|
||||
kbase_ts_converter_convert(&self->ts_converter,
|
||||
&msg->timestamp);
|
||||
msg->timestamp = kbase_ts_converter_convert(&self->ts_converter,
|
||||
msg->timestamp);
|
||||
}
|
||||
|
||||
/* Copy the message out to the tl_stream. */
|
||||
|
|
|
|||
|
|
@ -119,7 +119,7 @@ static const struct firmware_trace_buffer_data trace_buffer_data[] = {
|
|||
#if MALI_UNIT_TEST
|
||||
{ "fwutf", { 0 }, 1 },
|
||||
#endif
|
||||
{ FW_TRACE_BUF_NAME, { 0 }, 4 },
|
||||
{ FIRMWARE_LOG_BUF_NAME, { 0 }, 4 },
|
||||
{ "benchmark", { 0 }, 2 },
|
||||
{ "timeline", { 0 }, KBASE_CSF_TL_BUFFER_NR_PAGES },
|
||||
};
|
||||
|
|
@ -506,10 +506,16 @@ unsigned int kbase_csf_firmware_trace_buffer_read_data(
|
|||
}
|
||||
EXPORT_SYMBOL(kbase_csf_firmware_trace_buffer_read_data);
|
||||
|
||||
#if IS_ENABLED(CONFIG_DEBUG_FS)
|
||||
static void update_trace_buffer_active_mask64(struct firmware_trace_buffer *tb, u64 mask)
|
||||
{
|
||||
unsigned int i;
|
||||
|
||||
for (i = 0; i < tb->trace_enable_entry_count; i++)
|
||||
kbasep_csf_firmware_trace_buffer_update_trace_enable_bit(tb, i, (mask >> i) & 1);
|
||||
}
|
||||
|
||||
#define U32_BITS 32
|
||||
static u64 get_trace_buffer_active_mask64(struct firmware_trace_buffer *tb)
|
||||
u64 kbase_csf_firmware_trace_buffer_get_active_mask64(struct firmware_trace_buffer *tb)
|
||||
{
|
||||
u64 active_mask = tb->trace_enable_init_mask[0];
|
||||
|
||||
|
|
@ -519,18 +525,7 @@ static u64 get_trace_buffer_active_mask64(struct firmware_trace_buffer *tb)
|
|||
return active_mask;
|
||||
}
|
||||
|
||||
static void update_trace_buffer_active_mask64(struct firmware_trace_buffer *tb,
|
||||
u64 mask)
|
||||
{
|
||||
unsigned int i;
|
||||
|
||||
for (i = 0; i < tb->trace_enable_entry_count; i++)
|
||||
kbasep_csf_firmware_trace_buffer_update_trace_enable_bit(
|
||||
tb, i, (mask >> i) & 1);
|
||||
}
|
||||
|
||||
static int set_trace_buffer_active_mask64(struct firmware_trace_buffer *tb,
|
||||
u64 mask)
|
||||
int kbase_csf_firmware_trace_buffer_set_active_mask64(struct firmware_trace_buffer *tb, u64 mask)
|
||||
{
|
||||
struct kbase_device *kbdev = tb->kbdev;
|
||||
unsigned long flags;
|
||||
|
|
@ -558,123 +553,3 @@ static int set_trace_buffer_active_mask64(struct firmware_trace_buffer *tb,
|
|||
|
||||
return err;
|
||||
}
|
||||
|
||||
static int kbase_csf_firmware_trace_enable_mask_read(void *data, u64 *val)
|
||||
{
|
||||
struct kbase_device *kbdev = (struct kbase_device *)data;
|
||||
struct firmware_trace_buffer *tb =
|
||||
kbase_csf_firmware_get_trace_buffer(kbdev, FW_TRACE_BUF_NAME);
|
||||
|
||||
if (tb == NULL) {
|
||||
dev_err(kbdev->dev, "Couldn't get the firmware trace buffer");
|
||||
return -EIO;
|
||||
}
|
||||
/* The enabled traces limited to u64 here, regarded practical */
|
||||
*val = get_trace_buffer_active_mask64(tb);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int kbase_csf_firmware_trace_enable_mask_write(void *data, u64 val)
|
||||
{
|
||||
struct kbase_device *kbdev = (struct kbase_device *)data;
|
||||
struct firmware_trace_buffer *tb =
|
||||
kbase_csf_firmware_get_trace_buffer(kbdev, FW_TRACE_BUF_NAME);
|
||||
u64 new_mask;
|
||||
unsigned int enable_bits_count;
|
||||
|
||||
if (tb == NULL) {
|
||||
dev_err(kbdev->dev, "Couldn't get the firmware trace buffer");
|
||||
return -EIO;
|
||||
}
|
||||
|
||||
/* Ignore unsupported types */
|
||||
enable_bits_count =
|
||||
kbase_csf_firmware_trace_buffer_get_trace_enable_bits_count(tb);
|
||||
if (enable_bits_count > 64) {
|
||||
dev_dbg(kbdev->dev, "Limit enabled bits count from %u to 64",
|
||||
enable_bits_count);
|
||||
enable_bits_count = 64;
|
||||
}
|
||||
new_mask = val & ((1 << enable_bits_count) - 1);
|
||||
|
||||
if (new_mask != get_trace_buffer_active_mask64(tb))
|
||||
return set_trace_buffer_active_mask64(tb, new_mask);
|
||||
else
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int kbasep_csf_firmware_trace_debugfs_open(struct inode *in,
|
||||
struct file *file)
|
||||
{
|
||||
struct kbase_device *kbdev = in->i_private;
|
||||
|
||||
file->private_data = kbdev;
|
||||
dev_dbg(kbdev->dev, "Opened firmware trace buffer dump debugfs file");
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static ssize_t kbasep_csf_firmware_trace_debugfs_read(struct file *file,
|
||||
char __user *buf, size_t size, loff_t *ppos)
|
||||
{
|
||||
struct kbase_device *kbdev = file->private_data;
|
||||
u8 *pbyte;
|
||||
unsigned int n_read;
|
||||
unsigned long not_copied;
|
||||
/* Limit the kernel buffer to no more than two pages */
|
||||
size_t mem = MIN(size, 2 * PAGE_SIZE);
|
||||
unsigned long flags;
|
||||
|
||||
struct firmware_trace_buffer *tb =
|
||||
kbase_csf_firmware_get_trace_buffer(kbdev, FW_TRACE_BUF_NAME);
|
||||
|
||||
if (tb == NULL) {
|
||||
dev_err(kbdev->dev, "Couldn't get the firmware trace buffer");
|
||||
return -EIO;
|
||||
}
|
||||
|
||||
pbyte = kmalloc(mem, GFP_KERNEL);
|
||||
if (pbyte == NULL) {
|
||||
dev_err(kbdev->dev, "Couldn't allocate memory for trace buffer dump");
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
|
||||
n_read = kbase_csf_firmware_trace_buffer_read_data(tb, pbyte, mem);
|
||||
spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
|
||||
|
||||
/* Do the copy, if we have obtained some trace data */
|
||||
not_copied = (n_read) ? copy_to_user(buf, pbyte, n_read) : 0;
|
||||
kfree(pbyte);
|
||||
|
||||
if (!not_copied) {
|
||||
*ppos += n_read;
|
||||
return n_read;
|
||||
}
|
||||
|
||||
dev_err(kbdev->dev, "Couldn't copy trace buffer data to user space buffer");
|
||||
return -EFAULT;
|
||||
}
|
||||
|
||||
DEFINE_DEBUGFS_ATTRIBUTE(kbase_csf_firmware_trace_enable_mask_fops,
|
||||
kbase_csf_firmware_trace_enable_mask_read,
|
||||
kbase_csf_firmware_trace_enable_mask_write, "%llx\n");
|
||||
|
||||
static const struct file_operations kbasep_csf_firmware_trace_debugfs_fops = {
|
||||
.owner = THIS_MODULE,
|
||||
.open = kbasep_csf_firmware_trace_debugfs_open,
|
||||
.read = kbasep_csf_firmware_trace_debugfs_read,
|
||||
.llseek = no_llseek,
|
||||
};
|
||||
|
||||
void kbase_csf_firmware_trace_buffer_debugfs_init(struct kbase_device *kbdev)
|
||||
{
|
||||
debugfs_create_file("fw_trace_enable_mask", 0644,
|
||||
kbdev->mali_debugfs_directory, kbdev,
|
||||
&kbase_csf_firmware_trace_enable_mask_fops);
|
||||
|
||||
debugfs_create_file("fw_traces", 0444,
|
||||
kbdev->mali_debugfs_directory, kbdev,
|
||||
&kbasep_csf_firmware_trace_debugfs_fops);
|
||||
}
|
||||
#endif /* CONFIG_DEBUG_FS */
|
||||
|
|
|
|||
|
|
@ -25,7 +25,7 @@
|
|||
#include <linux/types.h>
|
||||
|
||||
#define CSF_FIRMWARE_TRACE_ENABLE_INIT_MASK_MAX (4)
|
||||
#define FW_TRACE_BUF_NAME "fwlog"
|
||||
#define FIRMWARE_LOG_BUF_NAME "fwlog"
|
||||
|
||||
/* Forward declarations */
|
||||
struct firmware_trace_buffer;
|
||||
|
|
@ -165,14 +165,23 @@ bool kbase_csf_firmware_trace_buffer_is_empty(
|
|||
unsigned int kbase_csf_firmware_trace_buffer_read_data(
|
||||
struct firmware_trace_buffer *trace_buffer, u8 *data, unsigned int num_bytes);
|
||||
|
||||
#if IS_ENABLED(CONFIG_DEBUG_FS)
|
||||
/**
|
||||
* kbase_csf_firmware_trace_buffer_debugfs_init() - Add debugfs entries for
|
||||
* setting enable mask and dumping the binary firmware trace buffer
|
||||
* kbase_csf_firmware_trace_buffer_get_active_mask64 - Get trace buffer active mask
|
||||
*
|
||||
* @kbdev: Pointer to the device
|
||||
* @tb: Trace buffer handle
|
||||
*
|
||||
* Return: Trace buffer active mask.
|
||||
*/
|
||||
void kbase_csf_firmware_trace_buffer_debugfs_init(struct kbase_device *kbdev);
|
||||
#endif /* CONFIG_DEBUG_FS */
|
||||
u64 kbase_csf_firmware_trace_buffer_get_active_mask64(struct firmware_trace_buffer *tb);
|
||||
|
||||
/**
|
||||
* kbase_csf_firmware_trace_buffer_set_active_mask64 - Set trace buffer active mask
|
||||
*
|
||||
* @tb: Trace buffer handle
|
||||
* @mask: New active mask
|
||||
*
|
||||
* Return: 0 if successful, negative error code on failure.
|
||||
*/
|
||||
int kbase_csf_firmware_trace_buffer_set_active_mask64(struct firmware_trace_buffer *tb, u64 mask);
|
||||
|
||||
#endif /* _KBASE_CSF_TRACE_BUFFER_H_ */
|
||||
|
|
|
|||
271
drivers/gpu/arm/bifrost/csf/mali_kbase_debug_csf_fault.c
Normal file
271
drivers/gpu/arm/bifrost/csf/mali_kbase_debug_csf_fault.c
Normal file
|
|
@ -0,0 +1,271 @@
|
|||
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
|
||||
/*
|
||||
*
|
||||
* (C) COPYRIGHT 2022 ARM Limited. All rights reserved.
|
||||
*
|
||||
* This program is free software and is provided to you under the terms of the
|
||||
* GNU General Public License version 2 as published by the Free Software
|
||||
* Foundation, and any use by you of this program is subject to the terms
|
||||
* of such GNU license.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, you can access it online at
|
||||
* http://www.gnu.org/licenses/gpl-2.0.html.
|
||||
*
|
||||
*/
|
||||
|
||||
#include <mali_kbase.h>
|
||||
|
||||
#if IS_ENABLED(CONFIG_DEBUG_FS)
|
||||
|
||||
/**
|
||||
* kbasep_fault_occurred - Check if fault occurred.
|
||||
*
|
||||
* @kbdev: Device pointer
|
||||
*
|
||||
* Return: true if a fault occurred.
|
||||
*/
|
||||
static bool kbasep_fault_occurred(struct kbase_device *kbdev)
|
||||
{
|
||||
unsigned long flags;
|
||||
bool ret;
|
||||
|
||||
spin_lock_irqsave(&kbdev->csf.dof.lock, flags);
|
||||
ret = (kbdev->csf.dof.error_code != DF_NO_ERROR);
|
||||
spin_unlock_irqrestore(&kbdev->csf.dof.lock, flags);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
void kbase_debug_csf_fault_wait_completion(struct kbase_device *kbdev)
|
||||
{
|
||||
if (likely(!kbase_debug_csf_fault_dump_enabled(kbdev))) {
|
||||
dev_dbg(kbdev->dev, "No userspace client for dumping exists");
|
||||
return;
|
||||
}
|
||||
|
||||
wait_event(kbdev->csf.dof.dump_wait_wq, kbase_debug_csf_fault_dump_complete(kbdev));
|
||||
}
|
||||
KBASE_EXPORT_TEST_API(kbase_debug_csf_fault_wait_completion);
|
||||
|
||||
/**
|
||||
* kbase_debug_csf_fault_wakeup - Wake up a waiting user space client.
|
||||
*
|
||||
* @kbdev: Kbase device
|
||||
*/
|
||||
static void kbase_debug_csf_fault_wakeup(struct kbase_device *kbdev)
|
||||
{
|
||||
wake_up_interruptible(&kbdev->csf.dof.fault_wait_wq);
|
||||
}
|
||||
|
||||
bool kbase_debug_csf_fault_notify(struct kbase_device *kbdev,
|
||||
struct kbase_context *kctx, enum dumpfault_error_type error)
|
||||
{
|
||||
unsigned long flags;
|
||||
|
||||
if (likely(!kbase_debug_csf_fault_dump_enabled(kbdev)))
|
||||
return false;
|
||||
|
||||
if (WARN_ON(error == DF_NO_ERROR))
|
||||
return false;
|
||||
|
||||
if (kctx && kbase_ctx_flag(kctx, KCTX_DYING)) {
|
||||
dev_info(kbdev->dev, "kctx %d_%d is dying when error %d is reported",
|
||||
kctx->tgid, kctx->id, error);
|
||||
kctx = NULL;
|
||||
}
|
||||
|
||||
spin_lock_irqsave(&kbdev->csf.dof.lock, flags);
|
||||
|
||||
/* Only one fault at a time can be processed */
|
||||
if (kbdev->csf.dof.error_code) {
|
||||
dev_info(kbdev->dev, "skip this fault as there's a pending fault");
|
||||
goto unlock;
|
||||
}
|
||||
|
||||
kbdev->csf.dof.kctx_tgid = kctx ? kctx->tgid : 0;
|
||||
kbdev->csf.dof.kctx_id = kctx ? kctx->id : 0;
|
||||
kbdev->csf.dof.error_code = error;
|
||||
kbase_debug_csf_fault_wakeup(kbdev);
|
||||
|
||||
unlock:
|
||||
spin_unlock_irqrestore(&kbdev->csf.dof.lock, flags);
|
||||
return true;
|
||||
}
|
||||
|
||||
static ssize_t debug_csf_fault_read(struct file *file, char __user *buffer, size_t size,
|
||||
loff_t *f_pos)
|
||||
{
|
||||
#define BUF_SIZE 64
|
||||
struct kbase_device *kbdev;
|
||||
unsigned long flags;
|
||||
int count;
|
||||
char buf[BUF_SIZE];
|
||||
u32 tgid, ctx_id;
|
||||
enum dumpfault_error_type error_code;
|
||||
|
||||
if (unlikely(!file)) {
|
||||
pr_warn("%s: file is NULL", __func__);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
kbdev = file->private_data;
|
||||
if (unlikely(!buffer)) {
|
||||
dev_warn(kbdev->dev, "%s: buffer is NULL", __func__);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
if (unlikely(*f_pos < 0)) {
|
||||
dev_warn(kbdev->dev, "%s: f_pos is negative", __func__);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
if (size < sizeof(buf)) {
|
||||
dev_warn(kbdev->dev, "%s: buffer is too small", __func__);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
if (wait_event_interruptible(kbdev->csf.dof.fault_wait_wq, kbasep_fault_occurred(kbdev)))
|
||||
return -ERESTARTSYS;
|
||||
|
||||
spin_lock_irqsave(&kbdev->csf.dof.lock, flags);
|
||||
tgid = kbdev->csf.dof.kctx_tgid;
|
||||
ctx_id = kbdev->csf.dof.kctx_id;
|
||||
error_code = kbdev->csf.dof.error_code;
|
||||
BUILD_BUG_ON(sizeof(buf) < (sizeof(tgid) + sizeof(ctx_id) + sizeof(error_code)));
|
||||
count = scnprintf(buf, sizeof(buf), "%u_%u_%u\n", tgid, ctx_id, error_code);
|
||||
spin_unlock_irqrestore(&kbdev->csf.dof.lock, flags);
|
||||
|
||||
dev_info(kbdev->dev, "debug csf fault info read");
|
||||
return simple_read_from_buffer(buffer, size, f_pos, buf, count);
|
||||
}
|
||||
|
||||
static int debug_csf_fault_open(struct inode *in, struct file *file)
|
||||
{
|
||||
struct kbase_device *kbdev;
|
||||
|
||||
if (unlikely(!in)) {
|
||||
pr_warn("%s: inode is NULL", __func__);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
kbdev = in->i_private;
|
||||
if (unlikely(!file)) {
|
||||
dev_warn(kbdev->dev, "%s: file is NULL", __func__);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
if (atomic_cmpxchg(&kbdev->csf.dof.enabled, 0, 1) == 1) {
|
||||
dev_warn(kbdev->dev, "Only one client is allowed for dump on fault");
|
||||
return -EBUSY;
|
||||
}
|
||||
|
||||
dev_info(kbdev->dev, "debug csf fault file open");
|
||||
|
||||
return simple_open(in, file);
|
||||
}
|
||||
|
||||
static ssize_t debug_csf_fault_write(struct file *file, const char __user *ubuf, size_t count,
|
||||
loff_t *ppos)
|
||||
{
|
||||
struct kbase_device *kbdev;
|
||||
unsigned long flags;
|
||||
|
||||
if (unlikely(!file)) {
|
||||
pr_warn("%s: file is NULL", __func__);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
kbdev = file->private_data;
|
||||
spin_lock_irqsave(&kbdev->csf.dof.lock, flags);
|
||||
kbdev->csf.dof.error_code = DF_NO_ERROR;
|
||||
kbdev->csf.dof.kctx_tgid = 0;
|
||||
kbdev->csf.dof.kctx_id = 0;
|
||||
dev_info(kbdev->dev, "debug csf fault dump complete");
|
||||
spin_unlock_irqrestore(&kbdev->csf.dof.lock, flags);
|
||||
|
||||
/* User space finished the dump.
|
||||
* Wake up blocked kernel threads to proceed.
|
||||
*/
|
||||
wake_up(&kbdev->csf.dof.dump_wait_wq);
|
||||
|
||||
return count;
|
||||
}
|
||||
|
||||
static int debug_csf_fault_release(struct inode *in, struct file *file)
|
||||
{
|
||||
struct kbase_device *kbdev;
|
||||
unsigned long flags;
|
||||
|
||||
if (unlikely(!in)) {
|
||||
pr_warn("%s: inode is NULL", __func__);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
kbdev = in->i_private;
|
||||
spin_lock_irqsave(&kbdev->csf.dof.lock, flags);
|
||||
kbdev->csf.dof.kctx_tgid = 0;
|
||||
kbdev->csf.dof.kctx_id = 0;
|
||||
kbdev->csf.dof.error_code = DF_NO_ERROR;
|
||||
spin_unlock_irqrestore(&kbdev->csf.dof.lock, flags);
|
||||
|
||||
atomic_set(&kbdev->csf.dof.enabled, 0);
|
||||
dev_info(kbdev->dev, "debug csf fault file close");
|
||||
|
||||
/* User space closed the debugfs file.
|
||||
* Wake up blocked kernel threads to resume.
|
||||
*/
|
||||
wake_up(&kbdev->csf.dof.dump_wait_wq);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static const struct file_operations kbasep_debug_csf_fault_fops = {
|
||||
.owner = THIS_MODULE,
|
||||
.open = debug_csf_fault_open,
|
||||
.read = debug_csf_fault_read,
|
||||
.write = debug_csf_fault_write,
|
||||
.llseek = default_llseek,
|
||||
.release = debug_csf_fault_release,
|
||||
};
|
||||
|
||||
void kbase_debug_csf_fault_debugfs_init(struct kbase_device *kbdev)
|
||||
{
|
||||
const char *fname = "csf_fault";
|
||||
|
||||
if (unlikely(!kbdev)) {
|
||||
pr_warn("%s: kbdev is NULL", __func__);
|
||||
return;
|
||||
}
|
||||
|
||||
debugfs_create_file(fname, 0600, kbdev->mali_debugfs_directory, kbdev,
|
||||
&kbasep_debug_csf_fault_fops);
|
||||
}
|
||||
|
||||
int kbase_debug_csf_fault_init(struct kbase_device *kbdev)
|
||||
{
|
||||
if (unlikely(!kbdev)) {
|
||||
pr_warn("%s: kbdev is NULL", __func__);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
init_waitqueue_head(&(kbdev->csf.dof.fault_wait_wq));
|
||||
init_waitqueue_head(&(kbdev->csf.dof.dump_wait_wq));
|
||||
spin_lock_init(&kbdev->csf.dof.lock);
|
||||
kbdev->csf.dof.kctx_tgid = 0;
|
||||
kbdev->csf.dof.kctx_id = 0;
|
||||
kbdev->csf.dof.error_code = DF_NO_ERROR;
|
||||
atomic_set(&kbdev->csf.dof.enabled, 0);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void kbase_debug_csf_fault_term(struct kbase_device *kbdev)
|
||||
{
|
||||
}
|
||||
#endif /* CONFIG_DEBUG_FS */
|
||||
137
drivers/gpu/arm/bifrost/csf/mali_kbase_debug_csf_fault.h
Normal file
137
drivers/gpu/arm/bifrost/csf/mali_kbase_debug_csf_fault.h
Normal file
|
|
@ -0,0 +1,137 @@
|
|||
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
|
||||
/*
|
||||
*
|
||||
* (C) COPYRIGHT 2022 ARM Limited. All rights reserved.
|
||||
*
|
||||
* This program is free software and is provided to you under the terms of the
|
||||
* GNU General Public License version 2 as published by the Free Software
|
||||
* Foundation, and any use by you of this program is subject to the terms
|
||||
* of such GNU license.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, you can access it online at
|
||||
* http://www.gnu.org/licenses/gpl-2.0.html.
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef _KBASE_DEBUG_CSF_FAULT_H
|
||||
#define _KBASE_DEBUG_CSF_FAULT_H
|
||||
|
||||
#if IS_ENABLED(CONFIG_DEBUG_FS)
|
||||
/**
|
||||
* kbase_debug_csf_fault_debugfs_init - Initialize CSF fault debugfs
|
||||
* @kbdev: Device pointer
|
||||
*/
|
||||
void kbase_debug_csf_fault_debugfs_init(struct kbase_device *kbdev);
|
||||
|
||||
/**
|
||||
* kbase_debug_csf_fault_init - Create the fault event wait queue per device
|
||||
* and initialize the required resources.
|
||||
* @kbdev: Device pointer
|
||||
*
|
||||
* Return: Zero on success or a negative error code.
|
||||
*/
|
||||
int kbase_debug_csf_fault_init(struct kbase_device *kbdev);
|
||||
|
||||
/**
|
||||
* kbase_debug_csf_fault_term - Clean up resources created by
|
||||
* @kbase_debug_csf_fault_init.
|
||||
* @kbdev: Device pointer
|
||||
*/
|
||||
void kbase_debug_csf_fault_term(struct kbase_device *kbdev);
|
||||
|
||||
/**
|
||||
* kbase_debug_csf_fault_wait_completion - Wait for the client to complete.
|
||||
*
|
||||
* @kbdev: Device Pointer
|
||||
*
|
||||
* Wait for the user space client to finish reading the fault information.
|
||||
* This function must be called in thread context.
|
||||
*/
|
||||
void kbase_debug_csf_fault_wait_completion(struct kbase_device *kbdev);
|
||||
|
||||
/**
|
||||
* kbase_debug_csf_fault_notify - Notify client of a fault.
|
||||
*
|
||||
* @kbdev: Device pointer
|
||||
* @kctx: Faulty context (can be NULL)
|
||||
* @error: Error code.
|
||||
*
|
||||
* Store fault information and wake up the user space client.
|
||||
*
|
||||
* Return: true if a dump on fault was initiated or was is in progress and
|
||||
* so caller can opt to wait for the dumping to complete.
|
||||
*/
|
||||
bool kbase_debug_csf_fault_notify(struct kbase_device *kbdev,
|
||||
struct kbase_context *kctx, enum dumpfault_error_type error);
|
||||
|
||||
/**
|
||||
* kbase_debug_csf_fault_dump_enabled - Check if dump on fault is enabled.
|
||||
*
|
||||
* @kbdev: Device pointer
|
||||
*
|
||||
* Return: true if debugfs file is opened so dump on fault is enabled.
|
||||
*/
|
||||
static inline bool kbase_debug_csf_fault_dump_enabled(struct kbase_device *kbdev)
|
||||
{
|
||||
return atomic_read(&kbdev->csf.dof.enabled);
|
||||
}
|
||||
|
||||
/**
|
||||
* kbase_debug_csf_fault_dump_complete - Check if dump on fault is completed.
|
||||
*
|
||||
* @kbdev: Device pointer
|
||||
*
|
||||
* Return: true if dump on fault completes or file is closed.
|
||||
*/
|
||||
static inline bool kbase_debug_csf_fault_dump_complete(struct kbase_device *kbdev)
|
||||
{
|
||||
unsigned long flags;
|
||||
bool ret;
|
||||
|
||||
if (likely(!kbase_debug_csf_fault_dump_enabled(kbdev)))
|
||||
return true;
|
||||
|
||||
spin_lock_irqsave(&kbdev->csf.dof.lock, flags);
|
||||
ret = (kbdev->csf.dof.error_code == DF_NO_ERROR);
|
||||
spin_unlock_irqrestore(&kbdev->csf.dof.lock, flags);
|
||||
|
||||
return ret;
|
||||
}
|
||||
#else /* CONFIG_DEBUG_FS */
|
||||
static inline int kbase_debug_csf_fault_init(struct kbase_device *kbdev)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline void kbase_debug_csf_fault_term(struct kbase_device *kbdev)
|
||||
{
|
||||
}
|
||||
|
||||
static inline void kbase_debug_csf_fault_wait_completion(struct kbase_device *kbdev)
|
||||
{
|
||||
}
|
||||
|
||||
static inline bool kbase_debug_csf_fault_notify(struct kbase_device *kbdev,
|
||||
struct kbase_context *kctx, enum dumpfault_error_type error)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
static inline bool kbase_debug_csf_fault_dump_enabled(struct kbase_device *kbdev)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
static inline bool kbase_debug_csf_fault_dump_complete(struct kbase_device *kbdev)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
#endif /* CONFIG_DEBUG_FS */
|
||||
|
||||
#endif /*_KBASE_DEBUG_CSF_FAULT_H*/
|
||||
|
|
@ -42,19 +42,25 @@ int dummy_array[] = {
|
|||
/*
|
||||
* Generic CSF events
|
||||
*/
|
||||
/* info_val = 0 */
|
||||
KBASE_KTRACE_CODE_MAKE_CODE(SCHEDULER_EVICT_CTX_SLOTS_START),
|
||||
/* info_val == number of CSGs supported */
|
||||
KBASE_KTRACE_CODE_MAKE_CODE(SCHEDULER_EVICT_CTX_SLOTS_END),
|
||||
/* info_val[0:7] == fw version_minor
|
||||
* info_val[15:8] == fw version_major
|
||||
* info_val[63:32] == fw version_hash
|
||||
*/
|
||||
KBASE_KTRACE_CODE_MAKE_CODE(CSF_FIRMWARE_BOOT),
|
||||
KBASE_KTRACE_CODE_MAKE_CODE(CSF_FIRMWARE_REBOOT),
|
||||
KBASE_KTRACE_CODE_MAKE_CODE(SCHEDULER_TOCK_INVOKE),
|
||||
KBASE_KTRACE_CODE_MAKE_CODE(SCHEDULER_TICK_INVOKE),
|
||||
KBASE_KTRACE_CODE_MAKE_CODE(SCHEDULER_TOCK_START),
|
||||
KBASE_KTRACE_CODE_MAKE_CODE(SCHEDULER_TOCK_END),
|
||||
/* info_val == total number of runnable groups across all kctxs */
|
||||
KBASE_KTRACE_CODE_MAKE_CODE(SCHEDULER_TICK_START),
|
||||
KBASE_KTRACE_CODE_MAKE_CODE(SCHEDULER_TICK_END),
|
||||
KBASE_KTRACE_CODE_MAKE_CODE(SCHEDULER_RESET_START),
|
||||
KBASE_KTRACE_CODE_MAKE_CODE(SCHEDULER_RESET_END),
|
||||
/* info_val = timeout in ms */
|
||||
KBASE_KTRACE_CODE_MAKE_CODE(SCHEDULER_PROTM_WAIT_QUIT_START),
|
||||
/* info_val = remaining ms timeout, or 0 if timedout */
|
||||
|
|
@ -101,6 +107,8 @@ int dummy_array[] = {
|
|||
* purpose.
|
||||
*/
|
||||
KBASE_KTRACE_CODE_MAKE_CODE(SCHEDULER_GPU_IDLE_WORKER_HANDLING_START),
|
||||
KBASE_KTRACE_CODE_MAKE_CODE(SCHEDULER_GPU_IDLE_WORKER_HANDLING_END),
|
||||
|
||||
KBASE_KTRACE_CODE_MAKE_CODE(CSF_FIRMWARE_MCU_HALTED),
|
||||
KBASE_KTRACE_CODE_MAKE_CODE(CSF_FIRMWARE_MCU_SLEEP),
|
||||
|
||||
|
|
@ -126,6 +134,8 @@ int dummy_array[] = {
|
|||
* group->csg_nr indicates which bit was set
|
||||
*/
|
||||
KBASE_KTRACE_CODE_MAKE_CODE(CSG_SLOT_IDLE_SET),
|
||||
KBASE_KTRACE_CODE_MAKE_CODE(CSG_INTERRUPT_NO_NON_IDLE_GROUPS),
|
||||
KBASE_KTRACE_CODE_MAKE_CODE(CSG_INTERRUPT_NON_IDLE_GROUPS),
|
||||
/* info_val = scheduler's new csg_slots_idle_mask[0]
|
||||
* group->csg_nr indicates which bit was cleared
|
||||
*
|
||||
|
|
@ -190,10 +200,37 @@ int dummy_array[] = {
|
|||
KBASE_KTRACE_CODE_MAKE_CODE(SCHEDULER_NONIDLE_OFFSLOT_GRP_INC),
|
||||
/* info_val == new count of off-slot non-idle groups */
|
||||
KBASE_KTRACE_CODE_MAKE_CODE(SCHEDULER_NONIDLE_OFFSLOT_GRP_DEC),
|
||||
/* info_val = scheduler's new csg_slots_idle_mask[0]
|
||||
* group->csg_nr indicates which bit was set
|
||||
*/
|
||||
KBASE_KTRACE_CODE_MAKE_CODE(SCHEDULER_HANDLE_IDLE_SLOTS),
|
||||
|
||||
KBASE_KTRACE_CODE_MAKE_CODE(PROTM_EVENT_WORKER_START),
|
||||
KBASE_KTRACE_CODE_MAKE_CODE(PROTM_EVENT_WORKER_END),
|
||||
|
||||
/* info_val = scheduler state */
|
||||
KBASE_KTRACE_CODE_MAKE_CODE(SCHED_BUSY),
|
||||
KBASE_KTRACE_CODE_MAKE_CODE(SCHED_INACTIVE),
|
||||
KBASE_KTRACE_CODE_MAKE_CODE(SCHED_SUSPENDED),
|
||||
KBASE_KTRACE_CODE_MAKE_CODE(SCHED_SLEEPING),
|
||||
|
||||
/* info_val = mcu state */
|
||||
#define KBASEP_MCU_STATE(n) KBASE_KTRACE_CODE_MAKE_CODE(PM_MCU_ ## n),
|
||||
#include "backend/gpu/mali_kbase_pm_mcu_states.h"
|
||||
#undef KBASEP_MCU_STATE
|
||||
|
||||
/* info_val = number of runnable groups */
|
||||
KBASE_KTRACE_CODE_MAKE_CODE(CSF_GROUP_INACTIVE),
|
||||
KBASE_KTRACE_CODE_MAKE_CODE(CSF_GROUP_RUNNABLE),
|
||||
KBASE_KTRACE_CODE_MAKE_CODE(CSF_GROUP_IDLE),
|
||||
KBASE_KTRACE_CODE_MAKE_CODE(CSF_GROUP_SUSPENDED),
|
||||
KBASE_KTRACE_CODE_MAKE_CODE(CSF_GROUP_SUSPENDED_ON_IDLE),
|
||||
KBASE_KTRACE_CODE_MAKE_CODE(CSF_GROUP_SUSPENDED_ON_WAIT_SYNC),
|
||||
/* info_val = new run state of the evicted group */
|
||||
KBASE_KTRACE_CODE_MAKE_CODE(CSF_GROUP_FAULT_EVICTED),
|
||||
/* info_val = get the number of active CSGs */
|
||||
KBASE_KTRACE_CODE_MAKE_CODE(CSF_GROUP_TERMINATED),
|
||||
|
||||
/*
|
||||
* Group + Queue events
|
||||
*/
|
||||
|
|
|
|||
|
|
@ -31,13 +31,17 @@
|
|||
* Generic CSF events - using the common DEFINE_MALI_ADD_EVENT
|
||||
*/
|
||||
DEFINE_MALI_ADD_EVENT(SCHEDULER_EVICT_CTX_SLOTS_START);
|
||||
DEFINE_MALI_ADD_EVENT(SCHEDULER_EVICT_CTX_SLOTS_END);
|
||||
DEFINE_MALI_ADD_EVENT(CSF_FIRMWARE_BOOT);
|
||||
DEFINE_MALI_ADD_EVENT(CSF_FIRMWARE_REBOOT);
|
||||
DEFINE_MALI_ADD_EVENT(SCHEDULER_TOCK_INVOKE);
|
||||
DEFINE_MALI_ADD_EVENT(SCHEDULER_TICK_INVOKE);
|
||||
DEFINE_MALI_ADD_EVENT(SCHEDULER_TOCK_START);
|
||||
DEFINE_MALI_ADD_EVENT(SCHEDULER_TOCK_END);
|
||||
DEFINE_MALI_ADD_EVENT(SCHEDULER_TICK_START);
|
||||
DEFINE_MALI_ADD_EVENT(SCHEDULER_TICK_END);
|
||||
DEFINE_MALI_ADD_EVENT(SCHEDULER_RESET_START);
|
||||
DEFINE_MALI_ADD_EVENT(SCHEDULER_RESET_END);
|
||||
DEFINE_MALI_ADD_EVENT(SCHEDULER_PROTM_WAIT_QUIT_START);
|
||||
DEFINE_MALI_ADD_EVENT(SCHEDULER_PROTM_WAIT_QUIT_END);
|
||||
DEFINE_MALI_ADD_EVENT(SCHEDULER_GROUP_SYNC_UPDATE_EVENT);
|
||||
|
|
@ -58,8 +62,16 @@ DEFINE_MALI_ADD_EVENT(SCHEDULER_GROUP_SYNC_UPDATE_WORKER_START);
|
|||
DEFINE_MALI_ADD_EVENT(SCHEDULER_GROUP_SYNC_UPDATE_WORKER_END);
|
||||
DEFINE_MALI_ADD_EVENT(SCHEDULER_UPDATE_IDLE_SLOTS_ACK);
|
||||
DEFINE_MALI_ADD_EVENT(SCHEDULER_GPU_IDLE_WORKER_HANDLING_START);
|
||||
DEFINE_MALI_ADD_EVENT(SCHEDULER_GPU_IDLE_WORKER_HANDLING_END);
|
||||
DEFINE_MALI_ADD_EVENT(CSF_FIRMWARE_MCU_HALTED);
|
||||
DEFINE_MALI_ADD_EVENT(CSF_FIRMWARE_MCU_SLEEP);
|
||||
DEFINE_MALI_ADD_EVENT(SCHED_BUSY);
|
||||
DEFINE_MALI_ADD_EVENT(SCHED_INACTIVE);
|
||||
DEFINE_MALI_ADD_EVENT(SCHED_SUSPENDED);
|
||||
DEFINE_MALI_ADD_EVENT(SCHED_SLEEPING);
|
||||
#define KBASEP_MCU_STATE(n) DEFINE_MALI_ADD_EVENT(PM_MCU_ ## n);
|
||||
#include "backend/gpu/mali_kbase_pm_mcu_states.h"
|
||||
#undef KBASEP_MCU_STATE
|
||||
|
||||
DECLARE_EVENT_CLASS(mali_csf_grp_q_template,
|
||||
TP_PROTO(struct kbase_device *kbdev, struct kbase_queue_group *group,
|
||||
|
|
@ -136,6 +148,8 @@ DEFINE_MALI_CSF_GRP_EVENT(CSG_SLOT_STOPPED);
|
|||
DEFINE_MALI_CSF_GRP_EVENT(CSG_SLOT_CLEANED);
|
||||
DEFINE_MALI_CSF_GRP_EVENT(CSG_UPDATE_IDLE_SLOT_REQ);
|
||||
DEFINE_MALI_CSF_GRP_EVENT(CSG_SLOT_IDLE_SET);
|
||||
DEFINE_MALI_CSF_GRP_EVENT(CSG_INTERRUPT_NO_NON_IDLE_GROUPS);
|
||||
DEFINE_MALI_CSF_GRP_EVENT(CSG_INTERRUPT_NON_IDLE_GROUPS);
|
||||
DEFINE_MALI_CSF_GRP_EVENT(CSG_SLOT_IDLE_CLEAR);
|
||||
DEFINE_MALI_CSF_GRP_EVENT(CSG_SLOT_PRIO_UPDATE);
|
||||
DEFINE_MALI_CSF_GRP_EVENT(CSG_INTERRUPT_SYNC_UPDATE);
|
||||
|
|
@ -160,8 +174,17 @@ DEFINE_MALI_CSF_GRP_EVENT(SCHEDULER_PROTM_EXIT);
|
|||
DEFINE_MALI_CSF_GRP_EVENT(SCHEDULER_TOP_GRP);
|
||||
DEFINE_MALI_CSF_GRP_EVENT(SCHEDULER_NONIDLE_OFFSLOT_GRP_INC);
|
||||
DEFINE_MALI_CSF_GRP_EVENT(SCHEDULER_NONIDLE_OFFSLOT_GRP_DEC);
|
||||
DEFINE_MALI_CSF_GRP_EVENT(SCHEDULER_HANDLE_IDLE_SLOTS);
|
||||
DEFINE_MALI_CSF_GRP_EVENT(PROTM_EVENT_WORKER_START);
|
||||
DEFINE_MALI_CSF_GRP_EVENT(PROTM_EVENT_WORKER_END);
|
||||
DEFINE_MALI_CSF_GRP_EVENT(CSF_GROUP_INACTIVE);
|
||||
DEFINE_MALI_CSF_GRP_EVENT(CSF_GROUP_RUNNABLE);
|
||||
DEFINE_MALI_CSF_GRP_EVENT(CSF_GROUP_IDLE);
|
||||
DEFINE_MALI_CSF_GRP_EVENT(CSF_GROUP_SUSPENDED);
|
||||
DEFINE_MALI_CSF_GRP_EVENT(CSF_GROUP_SUSPENDED_ON_IDLE);
|
||||
DEFINE_MALI_CSF_GRP_EVENT(CSF_GROUP_SUSPENDED_ON_WAIT_SYNC);
|
||||
DEFINE_MALI_CSF_GRP_EVENT(CSF_GROUP_FAULT_EVICTED);
|
||||
DEFINE_MALI_CSF_GRP_EVENT(CSF_GROUP_TERMINATED);
|
||||
|
||||
#undef DEFINE_MALI_CSF_GRP_EVENT
|
||||
|
||||
|
|
|
|||
|
|
@ -1,7 +1,7 @@
|
|||
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
|
||||
/*
|
||||
*
|
||||
* (C) COPYRIGHT 2011-2015, 2018-2021 ARM Limited. All rights reserved.
|
||||
* (C) COPYRIGHT 2011-2015, 2018-2022 ARM Limited. All rights reserved.
|
||||
*
|
||||
* This program is free software and is provided to you under the terms of the
|
||||
* GNU General Public License version 2 as published by the Free Software
|
||||
|
|
@ -142,6 +142,11 @@ int dummy_array[] = {
|
|||
KBASE_KTRACE_CODE_MAKE_CODE(PM_RUNTIME_SUSPEND_CALLBACK),
|
||||
KBASE_KTRACE_CODE_MAKE_CODE(PM_RUNTIME_RESUME_CALLBACK),
|
||||
|
||||
/* info_val = l2 state */
|
||||
#define KBASEP_L2_STATE(n) KBASE_KTRACE_CODE_MAKE_CODE(PM_L2_ ## n),
|
||||
#include "backend/gpu/mali_kbase_pm_l2_states.h"
|
||||
#undef KBASEP_L2_STATE
|
||||
|
||||
/*
|
||||
* Context Scheduler events
|
||||
*/
|
||||
|
|
|
|||
|
|
@ -1,7 +1,7 @@
|
|||
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
|
||||
/*
|
||||
*
|
||||
* (C) COPYRIGHT 2014, 2018, 2020-2021 ARM Limited. All rights reserved.
|
||||
* (C) COPYRIGHT 2014, 2018, 2020-2022 ARM Limited. All rights reserved.
|
||||
*
|
||||
* This program is free software and is provided to you under the terms of the
|
||||
* GNU General Public License version 2 as published by the Free Software
|
||||
|
|
@ -98,6 +98,9 @@ DEFINE_MALI_ADD_EVENT(PM_WAKE_WAITERS);
|
|||
DEFINE_MALI_ADD_EVENT(PM_POWEROFF_WAIT_WQ);
|
||||
DEFINE_MALI_ADD_EVENT(PM_RUNTIME_SUSPEND_CALLBACK);
|
||||
DEFINE_MALI_ADD_EVENT(PM_RUNTIME_RESUME_CALLBACK);
|
||||
#define KBASEP_L2_STATE(n) DEFINE_MALI_ADD_EVENT(PM_L2_ ## n);
|
||||
#include "backend/gpu/mali_kbase_pm_l2_states.h"
|
||||
#undef KBASEP_L2_STATE
|
||||
DEFINE_MALI_ADD_EVENT(SCHED_RETAIN_CTX_NOLOCK);
|
||||
DEFINE_MALI_ADD_EVENT(SCHED_RELEASE_CTX);
|
||||
#ifdef CONFIG_MALI_ARBITER_SUPPORT
|
||||
|
|
|
|||
|
|
@ -23,8 +23,8 @@
|
|||
#include <device/mali_kbase_device.h>
|
||||
|
||||
#include <mali_kbase_hwaccess_backend.h>
|
||||
#include <mali_kbase_hwcnt_backend_csf_if_fw.h>
|
||||
#include <mali_kbase_hwcnt_watchdog_if_timer.h>
|
||||
#include <hwcnt/backend/mali_kbase_hwcnt_backend_csf_if_fw.h>
|
||||
#include <hwcnt/mali_kbase_hwcnt_watchdog_if_timer.h>
|
||||
#include <mali_kbase_ctx_sched.h>
|
||||
#include <mali_kbase_reset_gpu.h>
|
||||
#include <csf/mali_kbase_csf.h>
|
||||
|
|
@ -40,9 +40,10 @@
|
|||
#include <backend/gpu/mali_kbase_js_internal.h>
|
||||
#include <backend/gpu/mali_kbase_clk_rate_trace_mgr.h>
|
||||
#include <csf/mali_kbase_csf_csg_debugfs.h>
|
||||
#include <mali_kbase_hwcnt_virtualizer.h>
|
||||
#include <hwcnt/mali_kbase_hwcnt_virtualizer.h>
|
||||
#include <mali_kbase_kinstr_prfcnt.h>
|
||||
#include <mali_kbase_vinstr.h>
|
||||
#include <tl/mali_kbase_timeline.h>
|
||||
|
||||
/**
|
||||
* kbase_device_firmware_hwcnt_term - Terminate CSF firmware and HWC
|
||||
|
|
@ -60,7 +61,7 @@ static void kbase_device_firmware_hwcnt_term(struct kbase_device *kbdev)
|
|||
kbase_vinstr_term(kbdev->vinstr_ctx);
|
||||
kbase_hwcnt_virtualizer_term(kbdev->hwcnt_gpu_virt);
|
||||
kbase_hwcnt_backend_csf_metadata_term(&kbdev->hwcnt_gpu_iface);
|
||||
kbase_csf_firmware_term(kbdev);
|
||||
kbase_csf_firmware_unload_term(kbdev);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -197,6 +198,20 @@ static int kbase_csf_early_init(struct kbase_device *kbdev)
|
|||
static void kbase_csf_early_term(struct kbase_device *kbdev)
|
||||
{
|
||||
kbase_csf_scheduler_early_term(kbdev);
|
||||
kbase_csf_firmware_early_term(kbdev);
|
||||
}
|
||||
|
||||
/**
|
||||
* kbase_csf_late_init - late initialization for firmware.
|
||||
* @kbdev: Device pointer
|
||||
*
|
||||
* Return: 0 on success, error code otherwise.
|
||||
*/
|
||||
static int kbase_csf_late_init(struct kbase_device *kbdev)
|
||||
{
|
||||
int err = kbase_csf_firmware_late_init(kbdev);
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -269,59 +284,48 @@ static void kbase_device_hwcnt_backend_csf_term(struct kbase_device *kbdev)
|
|||
|
||||
static const struct kbase_device_init dev_init[] = {
|
||||
#if IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI)
|
||||
{ kbase_gpu_device_create, kbase_gpu_device_destroy,
|
||||
"Dummy model initialization failed" },
|
||||
{ kbase_gpu_device_create, kbase_gpu_device_destroy, "Dummy model initialization failed" },
|
||||
#else
|
||||
{ assign_irqs, NULL, "IRQ search failed" },
|
||||
{ registers_map, registers_unmap, "Register map failed" },
|
||||
#endif
|
||||
{ power_control_init, power_control_term,
|
||||
"Power control initialization failed" },
|
||||
{ power_control_init, power_control_term, "Power control initialization failed" },
|
||||
{ kbase_device_io_history_init, kbase_device_io_history_term,
|
||||
"Register access history initialization failed" },
|
||||
{ kbase_device_early_init, kbase_device_early_term,
|
||||
"Early device initialization failed" },
|
||||
{ kbase_device_populate_max_freq, NULL,
|
||||
"Populating max frequency failed" },
|
||||
{ kbase_pm_lowest_gpu_freq_init, NULL,
|
||||
"Lowest freq initialization failed" },
|
||||
{ kbase_device_early_init, kbase_device_early_term, "Early device initialization failed" },
|
||||
{ kbase_device_populate_max_freq, NULL, "Populating max frequency failed" },
|
||||
{ kbase_pm_lowest_gpu_freq_init, NULL, "Lowest freq initialization failed" },
|
||||
{ kbase_device_misc_init, kbase_device_misc_term,
|
||||
"Miscellaneous device initialization failed" },
|
||||
{ kbase_device_pcm_dev_init, kbase_device_pcm_dev_term,
|
||||
"Priority control manager initialization failed" },
|
||||
{ kbase_ctx_sched_init, kbase_ctx_sched_term,
|
||||
"Context scheduler initialization failed" },
|
||||
{ kbase_mem_init, kbase_mem_term,
|
||||
"Memory subsystem initialization failed" },
|
||||
{ kbase_ctx_sched_init, kbase_ctx_sched_term, "Context scheduler initialization failed" },
|
||||
{ kbase_mem_init, kbase_mem_term, "Memory subsystem initialization failed" },
|
||||
{ kbase_csf_protected_memory_init, kbase_csf_protected_memory_term,
|
||||
"Protected memory allocator initialization failed" },
|
||||
{ kbase_device_coherency_init, NULL, "Device coherency init failed" },
|
||||
{ kbase_protected_mode_init, kbase_protected_mode_term,
|
||||
"Protected mode subsystem initialization failed" },
|
||||
{ kbase_device_list_init, kbase_device_list_term,
|
||||
"Device list setup failed" },
|
||||
{ kbase_device_list_init, kbase_device_list_term, "Device list setup failed" },
|
||||
{ kbase_device_timeline_init, kbase_device_timeline_term,
|
||||
"Timeline stream initialization failed" },
|
||||
{ kbase_clk_rate_trace_manager_init, kbase_clk_rate_trace_manager_term,
|
||||
"Clock rate trace manager initialization failed" },
|
||||
{ kbase_device_hwcnt_watchdog_if_init,
|
||||
kbase_device_hwcnt_watchdog_if_term,
|
||||
{ kbase_device_hwcnt_watchdog_if_init, kbase_device_hwcnt_watchdog_if_term,
|
||||
"GPU hwcnt backend watchdog interface creation failed" },
|
||||
{ kbase_device_hwcnt_backend_csf_if_init,
|
||||
kbase_device_hwcnt_backend_csf_if_term,
|
||||
{ kbase_device_hwcnt_backend_csf_if_init, kbase_device_hwcnt_backend_csf_if_term,
|
||||
"GPU hwcnt backend CSF interface creation failed" },
|
||||
{ kbase_device_hwcnt_backend_csf_init,
|
||||
kbase_device_hwcnt_backend_csf_term,
|
||||
{ kbase_device_hwcnt_backend_csf_init, kbase_device_hwcnt_backend_csf_term,
|
||||
"GPU hwcnt backend creation failed" },
|
||||
{ kbase_device_hwcnt_context_init, kbase_device_hwcnt_context_term,
|
||||
"GPU hwcnt context initialization failed" },
|
||||
{ kbase_csf_early_init, kbase_csf_early_term,
|
||||
"Early CSF initialization failed" },
|
||||
{ kbase_backend_late_init, kbase_backend_late_term,
|
||||
"Late backend initialization failed" },
|
||||
{ kbase_csf_early_init, kbase_csf_early_term, "Early CSF initialization failed" },
|
||||
{ kbase_backend_late_init, kbase_backend_late_term, "Late backend initialization failed" },
|
||||
{ kbase_csf_late_init, NULL, "Late CSF initialization failed" },
|
||||
{ NULL, kbase_device_firmware_hwcnt_term, NULL },
|
||||
{ kbase_device_debugfs_init, kbase_device_debugfs_term,
|
||||
"DebugFS initialization failed" },
|
||||
{ kbase_debug_csf_fault_init, kbase_debug_csf_fault_term,
|
||||
"CSF fault debug initialization failed" },
|
||||
{ kbase_device_debugfs_init, kbase_device_debugfs_term, "DebugFS initialization failed" },
|
||||
/* Sysfs init needs to happen before registering the device with
|
||||
* misc_register(), otherwise it causes a race condition between
|
||||
* registering the device and a uevent event being generated for
|
||||
|
|
@ -339,8 +343,7 @@ static const struct kbase_device_init dev_init[] = {
|
|||
"Misc device registration failed" },
|
||||
{ kbase_gpuprops_populate_user_buffer, kbase_gpuprops_free_user_buffer,
|
||||
"GPU property population failed" },
|
||||
{ kbase_device_late_init, kbase_device_late_term,
|
||||
"Late device initialization failed" },
|
||||
{ kbase_device_late_init, kbase_device_late_term, "Late device initialization failed" },
|
||||
};
|
||||
|
||||
static void kbase_device_term_partial(struct kbase_device *kbdev,
|
||||
|
|
@ -468,7 +471,7 @@ static int kbase_csf_firmware_deferred_init(struct kbase_device *kbdev)
|
|||
|
||||
lockdep_assert_held(&kbdev->fw_load_lock);
|
||||
|
||||
err = kbase_csf_firmware_init(kbdev);
|
||||
err = kbase_csf_firmware_load_init(kbdev);
|
||||
if (!err) {
|
||||
unsigned long flags;
|
||||
|
||||
|
|
@ -498,11 +501,12 @@ int kbase_device_firmware_init_once(struct kbase_device *kbdev)
|
|||
|
||||
ret = kbase_device_hwcnt_csf_deferred_init(kbdev);
|
||||
if (ret) {
|
||||
kbase_csf_firmware_term(kbdev);
|
||||
kbase_csf_firmware_unload_term(kbdev);
|
||||
goto out;
|
||||
}
|
||||
|
||||
kbase_csf_debugfs_init(kbdev);
|
||||
kbase_timeline_io_debugfs_init(kbdev);
|
||||
out:
|
||||
kbase_pm_context_idle(kbdev);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -115,6 +115,9 @@ void kbase_gpu_interrupt(struct kbase_device *kbdev, u32 val)
|
|||
GPU_EXCEPTION_TYPE_SW_FAULT_0,
|
||||
} } };
|
||||
|
||||
kbase_debug_csf_fault_notify(kbdev, scheduler->active_protm_grp->kctx,
|
||||
DF_GPU_PROTECTED_FAULT);
|
||||
|
||||
scheduler->active_protm_grp->faulted = true;
|
||||
kbase_csf_add_group_fatal_error(
|
||||
scheduler->active_protm_grp, &err_payload);
|
||||
|
|
@ -201,8 +204,11 @@ static bool kbase_is_register_accessible(u32 offset)
|
|||
|
||||
void kbase_reg_write(struct kbase_device *kbdev, u32 offset, u32 value)
|
||||
{
|
||||
KBASE_DEBUG_ASSERT(kbdev->pm.backend.gpu_powered);
|
||||
KBASE_DEBUG_ASSERT(kbdev->dev != NULL);
|
||||
if (WARN_ON(!kbdev->pm.backend.gpu_powered))
|
||||
return;
|
||||
|
||||
if (WARN_ON(kbdev->dev == NULL))
|
||||
return;
|
||||
|
||||
if (!kbase_is_register_accessible(offset))
|
||||
return;
|
||||
|
|
@ -222,8 +228,11 @@ u32 kbase_reg_read(struct kbase_device *kbdev, u32 offset)
|
|||
{
|
||||
u32 val;
|
||||
|
||||
KBASE_DEBUG_ASSERT(kbdev->pm.backend.gpu_powered);
|
||||
KBASE_DEBUG_ASSERT(kbdev->dev != NULL);
|
||||
if (WARN_ON(!kbdev->pm.backend.gpu_powered))
|
||||
return 0;
|
||||
|
||||
if (WARN_ON(kbdev->dev == NULL))
|
||||
return 0;
|
||||
|
||||
if (!kbase_is_register_accessible(offset))
|
||||
return 0;
|
||||
|
|
|
|||
|
|
@ -27,9 +27,9 @@
|
|||
#include <mali_kbase_hwaccess_backend.h>
|
||||
#include <mali_kbase_ctx_sched.h>
|
||||
#include <mali_kbase_reset_gpu.h>
|
||||
#include <mali_kbase_hwcnt_watchdog_if_timer.h>
|
||||
#include <mali_kbase_hwcnt_backend_jm.h>
|
||||
#include <mali_kbase_hwcnt_backend_jm_watchdog.h>
|
||||
#include <hwcnt/mali_kbase_hwcnt_watchdog_if_timer.h>
|
||||
#include <hwcnt/backend/mali_kbase_hwcnt_backend_jm.h>
|
||||
#include <hwcnt/backend/mali_kbase_hwcnt_backend_jm_watchdog.h>
|
||||
|
||||
#if IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI)
|
||||
#include <backend/gpu/mali_kbase_model_linux.h>
|
||||
|
|
|
|||
|
|
@ -42,8 +42,8 @@
|
|||
#include <tl/mali_kbase_timeline.h>
|
||||
#include "mali_kbase_kinstr_prfcnt.h"
|
||||
#include "mali_kbase_vinstr.h"
|
||||
#include "mali_kbase_hwcnt_context.h"
|
||||
#include "mali_kbase_hwcnt_virtualizer.h"
|
||||
#include "hwcnt/mali_kbase_hwcnt_context.h"
|
||||
#include "hwcnt/mali_kbase_hwcnt_virtualizer.h"
|
||||
|
||||
#include "mali_kbase_device.h"
|
||||
#include "mali_kbase_device_internal.h"
|
||||
|
|
@ -56,17 +56,15 @@
|
|||
#include "arbiter/mali_kbase_arbiter_pm.h"
|
||||
#endif /* CONFIG_MALI_ARBITER_SUPPORT */
|
||||
|
||||
/* NOTE: Magic - 0x45435254 (TRCE in ASCII).
|
||||
* Supports tracing feature provided in the base module.
|
||||
* Please keep it in sync with the value of base module.
|
||||
*/
|
||||
#define TRACE_BUFFER_HEADER_SPECIAL 0x45435254
|
||||
#if defined(CONFIG_DEBUG_FS) && !IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI)
|
||||
|
||||
/* Number of register accesses for the buffer that we allocate during
|
||||
* initialization time. The buffer size can be changed later via debugfs.
|
||||
*/
|
||||
#define KBASEP_DEFAULT_REGISTER_HISTORY_SIZE ((u16)512)
|
||||
|
||||
#endif /* defined(CONFIG_DEBUG_FS) && !IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI) */
|
||||
|
||||
static DEFINE_MUTEX(kbase_dev_list_lock);
|
||||
static LIST_HEAD(kbase_dev_list);
|
||||
static int kbase_dev_nr;
|
||||
|
|
|
|||
|
|
@ -130,7 +130,11 @@ bool kbase_is_gpu_removed(struct kbase_device *kbdev);
|
|||
*
|
||||
* Return: 0 if successful or a negative error code on failure.
|
||||
*/
|
||||
#define kbase_gpu_cache_flush_pa_range_and_busy_wait(kbdev, phys, nr_bytes, flush_op) (0)
|
||||
#if MALI_USE_CSF
|
||||
int kbase_gpu_cache_flush_pa_range_and_busy_wait(struct kbase_device *kbdev, phys_addr_t phys,
|
||||
size_t nr_bytes, u32 flush_op);
|
||||
#endif /* MALI_USE_CSF */
|
||||
|
||||
/**
|
||||
* kbase_gpu_cache_flush_and_busy_wait - Start a cache flush and busy wait
|
||||
* @kbdev: Kbase device
|
||||
|
|
|
|||
|
|
@ -27,9 +27,6 @@
|
|||
#include <mali_kbase_reset_gpu.h>
|
||||
#include <mmu/mali_kbase_mmu.h>
|
||||
|
||||
#define U64_LO_MASK ((1ULL << 32) - 1)
|
||||
#define U64_HI_MASK (~U64_LO_MASK)
|
||||
|
||||
#if !IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI)
|
||||
bool kbase_is_gpu_removed(struct kbase_device *kbdev)
|
||||
{
|
||||
|
|
@ -86,7 +83,38 @@ static int busy_wait_on_irq(struct kbase_device *kbdev, u32 irq_bit)
|
|||
return 0;
|
||||
}
|
||||
|
||||
#define kbase_gpu_cache_flush_pa_range_and_busy_wait(kbdev, phys, nr_bytes, flush_op) (0)
|
||||
#if MALI_USE_CSF
|
||||
#define U64_LO_MASK ((1ULL << 32) - 1)
|
||||
#define U64_HI_MASK (~U64_LO_MASK)
|
||||
|
||||
int kbase_gpu_cache_flush_pa_range_and_busy_wait(struct kbase_device *kbdev, phys_addr_t phys,
|
||||
size_t nr_bytes, u32 flush_op)
|
||||
{
|
||||
u64 start_pa, end_pa;
|
||||
int ret = 0;
|
||||
|
||||
lockdep_assert_held(&kbdev->hwaccess_lock);
|
||||
|
||||
/* 1. Clear the interrupt FLUSH_PA_RANGE_COMPLETED bit. */
|
||||
kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_CLEAR), FLUSH_PA_RANGE_COMPLETED);
|
||||
|
||||
/* 2. Issue GPU_CONTROL.COMMAND.FLUSH_PA_RANGE operation. */
|
||||
start_pa = phys;
|
||||
end_pa = start_pa + nr_bytes - 1;
|
||||
|
||||
kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND_ARG0_LO), start_pa & U64_LO_MASK);
|
||||
kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND_ARG0_HI),
|
||||
(start_pa & U64_HI_MASK) >> 32);
|
||||
kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND_ARG1_LO), end_pa & U64_LO_MASK);
|
||||
kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND_ARG1_HI), (end_pa & U64_HI_MASK) >> 32);
|
||||
kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND), flush_op);
|
||||
|
||||
/* 3. Busy-wait irq status to be enabled. */
|
||||
ret = busy_wait_on_irq(kbdev, (u32)FLUSH_PA_RANGE_COMPLETED);
|
||||
|
||||
return ret;
|
||||
}
|
||||
#endif /* MALI_USE_CSF */
|
||||
|
||||
int kbase_gpu_cache_flush_and_busy_wait(struct kbase_device *kbdev,
|
||||
u32 flush_op)
|
||||
|
|
|
|||
|
|
@ -1,7 +1,7 @@
|
|||
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
|
||||
/*
|
||||
*
|
||||
* (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved.
|
||||
* (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved.
|
||||
*
|
||||
* This program is free software and is provided to you under the terms of the
|
||||
* GNU General Public License version 2 as published by the Free Software
|
||||
|
|
@ -170,7 +170,7 @@ const char *kbase_gpu_exception_name(u32 const exception_code)
|
|||
default:
|
||||
e = "UNKNOWN";
|
||||
break;
|
||||
};
|
||||
}
|
||||
|
||||
return e;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -35,10 +35,7 @@
|
|||
#define MCU_SUBSYSTEM_BASE 0x20000
|
||||
|
||||
/* IPA control registers */
|
||||
#define IPA_CONTROL_BASE 0x40000
|
||||
#define IPA_CONTROL_REG(r) (IPA_CONTROL_BASE+(r))
|
||||
#define COMMAND 0x000 /* (WO) Command register */
|
||||
#define STATUS 0x004 /* (RO) Status register */
|
||||
#define TIMER 0x008 /* (RW) Timer control register */
|
||||
|
||||
#define SELECT_CSHW_LO 0x010 /* (RW) Counter select for CS hardware, low word */
|
||||
|
|
@ -127,8 +124,16 @@
|
|||
|
||||
#define MCU_STATUS_HALTED (1 << 1)
|
||||
|
||||
#define L2_CONFIG_PBHA_HWU_SHIFT GPU_U(12)
|
||||
#define L2_CONFIG_PBHA_HWU_MASK (GPU_U(0xF) << L2_CONFIG_PBHA_HWU_SHIFT)
|
||||
#define L2_CONFIG_PBHA_HWU_GET(reg_val) \
|
||||
(((reg_val)&L2_CONFIG_PBHA_HWU_MASK) >> L2_CONFIG_PBHA_HWU_SHIFT)
|
||||
#define L2_CONFIG_PBHA_HWU_SET(reg_val, value) \
|
||||
(((reg_val) & ~L2_CONFIG_PBHA_HWU_MASK) | \
|
||||
(((value) << L2_CONFIG_PBHA_HWU_SHIFT) & L2_CONFIG_PBHA_HWU_MASK))
|
||||
|
||||
/* JOB IRQ flags */
|
||||
#define JOB_IRQ_GLOBAL_IF (1 << 31) /* Global interface interrupt received */
|
||||
#define JOB_IRQ_GLOBAL_IF (1u << 31) /* Global interface interrupt received */
|
||||
|
||||
/* GPU_COMMAND codes */
|
||||
#define GPU_COMMAND_CODE_NOP 0x00 /* No operation, nothing happens */
|
||||
|
|
|
|||
|
|
@ -127,29 +127,12 @@
|
|||
|
||||
#define JOB_SLOT_REG(n, r) (JOB_CONTROL_REG(JOB_SLOT0 + ((n) << 7)) + (r))
|
||||
|
||||
#define JS_HEAD_LO 0x00 /* (RO) Job queue head pointer for job slot n, low word */
|
||||
#define JS_HEAD_HI 0x04 /* (RO) Job queue head pointer for job slot n, high word */
|
||||
#define JS_TAIL_LO 0x08 /* (RO) Job queue tail pointer for job slot n, low word */
|
||||
#define JS_TAIL_HI 0x0C /* (RO) Job queue tail pointer for job slot n, high word */
|
||||
#define JS_AFFINITY_LO 0x10 /* (RO) Core affinity mask for job slot n, low word */
|
||||
#define JS_AFFINITY_HI 0x14 /* (RO) Core affinity mask for job slot n, high word */
|
||||
#define JS_CONFIG 0x18 /* (RO) Configuration settings for job slot n */
|
||||
/* (RO) Extended affinity mask for job slot n*/
|
||||
#define JS_XAFFINITY 0x1C
|
||||
#define JS_XAFFINITY 0x1C /* (RO) Extended affinity mask for job slot n*/
|
||||
|
||||
#define JS_COMMAND 0x20 /* (WO) Command register for job slot n */
|
||||
#define JS_STATUS 0x24 /* (RO) Status register for job slot n */
|
||||
|
||||
#define JS_HEAD_NEXT_LO 0x40 /* (RW) Next job queue head pointer for job slot n, low word */
|
||||
#define JS_HEAD_NEXT_HI 0x44 /* (RW) Next job queue head pointer for job slot n, high word */
|
||||
|
||||
#define JS_AFFINITY_NEXT_LO 0x50 /* (RW) Next core affinity mask for job slot n, low word */
|
||||
#define JS_AFFINITY_NEXT_HI 0x54 /* (RW) Next core affinity mask for job slot n, high word */
|
||||
#define JS_CONFIG_NEXT 0x58 /* (RW) Next configuration settings for job slot n */
|
||||
/* (RW) Next extended affinity mask for job slot n */
|
||||
#define JS_XAFFINITY_NEXT 0x5C
|
||||
|
||||
#define JS_COMMAND_NEXT 0x60 /* (RW) Next command register for job slot n */
|
||||
#define JS_XAFFINITY_NEXT 0x5C /* (RW) Next extended affinity mask for job slot n */
|
||||
|
||||
#define JS_FLUSH_ID_NEXT 0x70 /* (RW) Next job slot n cache flush ID */
|
||||
|
||||
|
|
|
|||
|
|
@ -45,9 +45,6 @@
|
|||
/* Begin Register Offsets */
|
||||
/* GPU control registers */
|
||||
|
||||
#define GPU_CONTROL_BASE 0x0000
|
||||
#define GPU_CONTROL_REG(r) (GPU_CONTROL_BASE + (r))
|
||||
#define GPU_ID 0x000 /* (RO) GPU and revision identifier */
|
||||
#define L2_FEATURES 0x004 /* (RO) Level 2 cache features */
|
||||
#define TILER_FEATURES 0x00C /* (RO) Tiler Features */
|
||||
#define MEM_FEATURES 0x010 /* (RO) Memory system features */
|
||||
|
|
@ -100,6 +97,10 @@
|
|||
|
||||
#define TEXTURE_FEATURES_REG(n) GPU_CONTROL_REG(TEXTURE_FEATURES_0 + ((n) << 2))
|
||||
|
||||
#define GPU_COMMAND_ARG0_LO 0x0D0 /* (RW) Additional parameter 0 for GPU commands, low word */
|
||||
#define GPU_COMMAND_ARG0_HI 0x0D4 /* (RW) Additional parameter 0 for GPU commands, high word */
|
||||
#define GPU_COMMAND_ARG1_LO 0x0D8 /* (RW) Additional parameter 1 for GPU commands, low word */
|
||||
#define GPU_COMMAND_ARG1_HI 0x0DC /* (RW) Additional parameter 1 for GPU commands, high word */
|
||||
|
||||
#define SHADER_PRESENT_LO 0x100 /* (RO) Shader core present bitmap, low word */
|
||||
#define SHADER_PRESENT_HI 0x104 /* (RO) Shader core present bitmap, high word */
|
||||
|
|
@ -113,26 +114,10 @@
|
|||
#define STACK_PRESENT_LO 0xE00 /* (RO) Core stack present bitmap, low word */
|
||||
#define STACK_PRESENT_HI 0xE04 /* (RO) Core stack present bitmap, high word */
|
||||
|
||||
#define SHADER_READY_LO 0x140 /* (RO) Shader core ready bitmap, low word */
|
||||
#define SHADER_READY_HI 0x144 /* (RO) Shader core ready bitmap, high word */
|
||||
|
||||
#define TILER_READY_LO 0x150 /* (RO) Tiler core ready bitmap, low word */
|
||||
#define TILER_READY_HI 0x154 /* (RO) Tiler core ready bitmap, high word */
|
||||
|
||||
#define L2_READY_LO 0x160 /* (RO) Level 2 cache ready bitmap, low word */
|
||||
#define L2_READY_HI 0x164 /* (RO) Level 2 cache ready bitmap, high word */
|
||||
|
||||
#define STACK_READY_LO 0xE10 /* (RO) Core stack ready bitmap, low word */
|
||||
#define STACK_READY_HI 0xE14 /* (RO) Core stack ready bitmap, high word */
|
||||
|
||||
#define SHADER_PWRON_LO 0x180 /* (WO) Shader core power on bitmap, low word */
|
||||
#define SHADER_PWRON_HI 0x184 /* (WO) Shader core power on bitmap, high word */
|
||||
|
||||
#define TILER_PWRON_LO 0x190 /* (WO) Tiler core power on bitmap, low word */
|
||||
#define TILER_PWRON_HI 0x194 /* (WO) Tiler core power on bitmap, high word */
|
||||
|
||||
#define L2_PWRON_LO 0x1A0 /* (WO) Level 2 cache power on bitmap, low word */
|
||||
#define L2_PWRON_HI 0x1A4 /* (WO) Level 2 cache power on bitmap, high word */
|
||||
#define SHADER_PWRFEATURES 0x188 /* (RW) Shader core power features */
|
||||
|
||||
#define STACK_PWRON_LO 0xE20 /* (RO) Core stack power on bitmap, low word */
|
||||
#define STACK_PWRON_HI 0xE24 /* (RO) Core stack power on bitmap, high word */
|
||||
|
|
@ -181,6 +166,8 @@
|
|||
#define COHERENCY_FEATURES 0x300 /* (RO) Coherency features present */
|
||||
#define COHERENCY_ENABLE 0x304 /* (RW) Coherency enable */
|
||||
|
||||
#define AMBA_FEATURES 0x300 /* (RO) AMBA bus supported features */
|
||||
#define AMBA_ENABLE 0x304 /* (RW) AMBA features enable */
|
||||
|
||||
#define SHADER_CONFIG 0xF04 /* (RW) Shader core configuration (implementation-specific) */
|
||||
#define TILER_CONFIG 0xF08 /* (RW) Tiler core configuration (implementation-specific) */
|
||||
|
|
@ -188,13 +175,7 @@
|
|||
|
||||
/* Job control registers */
|
||||
|
||||
#define JOB_CONTROL_BASE 0x1000
|
||||
|
||||
#define JOB_CONTROL_REG(r) (JOB_CONTROL_BASE + (r))
|
||||
|
||||
#define JOB_IRQ_RAWSTAT 0x000 /* Raw interrupt status register */
|
||||
#define JOB_IRQ_CLEAR 0x004 /* Interrupt clear register */
|
||||
#define JOB_IRQ_MASK 0x008 /* Interrupt mask register */
|
||||
#define JOB_IRQ_STATUS 0x00C /* Interrupt status register */
|
||||
|
||||
/* MMU control registers */
|
||||
|
|
@ -203,7 +184,6 @@
|
|||
#define MMU_IRQ_MASK 0x008 /* (RW) Interrupt mask register */
|
||||
#define MMU_IRQ_STATUS 0x00C /* (RO) Interrupt status register */
|
||||
|
||||
#define MMU_AS0 0x400 /* Configuration registers for address space 0 */
|
||||
#define MMU_AS1 0x440 /* Configuration registers for address space 1 */
|
||||
#define MMU_AS2 0x480 /* Configuration registers for address space 2 */
|
||||
#define MMU_AS3 0x4C0 /* Configuration registers for address space 3 */
|
||||
|
|
@ -221,25 +201,13 @@
|
|||
#define MMU_AS15 0x7C0 /* Configuration registers for address space 15 */
|
||||
|
||||
/* MMU address space control registers */
|
||||
|
||||
#define MMU_AS_REG(n, r) (MMU_REG(MMU_AS0 + ((n) << 6)) + (r))
|
||||
|
||||
#define AS_TRANSTAB_LO 0x00 /* (RW) Translation Table Base Address for address space n, low word */
|
||||
#define AS_TRANSTAB_HI 0x04 /* (RW) Translation Table Base Address for address space n, high word */
|
||||
#define AS_MEMATTR_LO 0x08 /* (RW) Memory attributes for address space n, low word. */
|
||||
#define AS_MEMATTR_HI 0x0C /* (RW) Memory attributes for address space n, high word. */
|
||||
#define AS_LOCKADDR_LO 0x10 /* (RW) Lock region address for address space n, low word */
|
||||
#define AS_LOCKADDR_HI 0x14 /* (RW) Lock region address for address space n, high word */
|
||||
#define AS_COMMAND 0x18 /* (WO) MMU command register for address space n */
|
||||
#define AS_FAULTSTATUS 0x1C /* (RO) MMU fault status register for address space n */
|
||||
#define AS_FAULTADDRESS_LO 0x20 /* (RO) Fault Address for address space n, low word */
|
||||
#define AS_FAULTADDRESS_HI 0x24 /* (RO) Fault Address for address space n, high word */
|
||||
#define AS_STATUS 0x28 /* (RO) Status flags for address space n */
|
||||
|
||||
/* (RW) Translation table configuration for address space n, low word */
|
||||
#define AS_TRANSCFG_LO 0x30
|
||||
/* (RW) Translation table configuration for address space n, high word */
|
||||
#define AS_TRANSCFG_HI 0x34
|
||||
/* (RO) Secondary fault address for address space n, low word */
|
||||
#define AS_FAULTEXTRA_LO 0x38
|
||||
/* (RO) Secondary fault address for address space n, high word */
|
||||
|
|
@ -464,6 +432,80 @@
|
|||
#define L2_CONFIG_ASN_HASH_ENABLE_MASK (1ul << L2_CONFIG_ASN_HASH_ENABLE_SHIFT)
|
||||
/* End L2_CONFIG register */
|
||||
|
||||
/* AMBA_FEATURES register */
|
||||
#define AMBA_FEATURES_ACE_LITE_SHIFT GPU_U(0)
|
||||
#define AMBA_FEATURES_ACE_LITE_MASK (GPU_U(0x1) << AMBA_FEATURES_ACE_LITE_SHIFT)
|
||||
#define AMBA_FEATURES_ACE_LITE_GET(reg_val) \
|
||||
(((reg_val)&AMBA_FEATURES_ACE_LITE_MASK) >> \
|
||||
AMBA_FEATURES_ACE_LITE_SHIFT)
|
||||
#define AMBA_FEATURES_ACE_LITE_SET(reg_val, value) \
|
||||
(((reg_val) & ~AMBA_FEATURES_ACE_LITE_MASK) | \
|
||||
(((value) << AMBA_FEATURES_ACE_LITE_SHIFT) & \
|
||||
AMBA_FEATURES_ACE_LITE_MASK))
|
||||
#define AMBA_FEATURES_ACE_SHIFT GPU_U(1)
|
||||
#define AMBA_FEATURES_ACE_MASK (GPU_U(0x1) << AMBA_FEATURES_ACE_SHIFT)
|
||||
#define AMBA_FEATURES_ACE_GET(reg_val) \
|
||||
(((reg_val)&AMBA_FEATURES_ACE_MASK) >> AMBA_FEATURES_ACE_SHIFT)
|
||||
#define AMBA_FEATURES_ACE_SET(reg_val, value) \
|
||||
(((reg_val) & ~AMBA_FEATURES_ACE_MASK) | \
|
||||
(((value) << AMBA_FEATURES_ACE_SHIFT) & AMBA_FEATURES_ACE_MASK))
|
||||
#define AMBA_FEATURES_MEMORY_CACHE_SUPPORT_SHIFT GPU_U(5)
|
||||
#define AMBA_FEATURES_MEMORY_CACHE_SUPPORT_MASK \
|
||||
(GPU_U(0x1) << AMBA_FEATURES_MEMORY_CACHE_SUPPORT_SHIFT)
|
||||
#define AMBA_FEATURES_MEMORY_CACHE_SUPPORT_GET(reg_val) \
|
||||
(((reg_val)&AMBA_FEATURES_MEMORY_CACHE_SUPPORT_MASK) >> \
|
||||
AMBA_FEATURES_MEMORY_CACHE_SUPPORT_SHIFT)
|
||||
#define AMBA_FEATURES_MEMORY_CACHE_SUPPORT_SET(reg_val, value) \
|
||||
(((reg_val) & ~AMBA_FEATURES_MEMORY_CACHE_SUPPORT_MASK) | \
|
||||
(((value) << AMBA_FEATURES_MEMORY_CACHE_SUPPORT_SHIFT) & \
|
||||
AMBA_FEATURES_MEMORY_CACHE_SUPPORT_MASK))
|
||||
#define AMBA_FEATURES_INVALIDATE_HINT_SHIFT GPU_U(6)
|
||||
#define AMBA_FEATURES_INVALIDATE_HINT_MASK \
|
||||
(GPU_U(0x1) << AMBA_FEATURES_INVALIDATE_HINT_SHIFT)
|
||||
#define AMBA_FEATURES_INVALIDATE_HINT_GET(reg_val) \
|
||||
(((reg_val)&AMBA_FEATURES_INVALIDATE_HINT_MASK) >> \
|
||||
AMBA_FEATURES_INVALIDATE_HINT_SHIFT)
|
||||
#define AMBA_FEATURES_INVALIDATE_HINT_SET(reg_val, value) \
|
||||
(((reg_val) & ~AMBA_FEATURES_INVALIDATE_HINT_MASK) | \
|
||||
(((value) << AMBA_FEATURES_INVALIDATE_HINT_SHIFT) & \
|
||||
AMBA_FEATURES_INVALIDATE_HINT_MASK))
|
||||
|
||||
/* AMBA_ENABLE register */
|
||||
#define AMBA_ENABLE_COHERENCY_PROTOCOL_SHIFT GPU_U(0)
|
||||
#define AMBA_ENABLE_COHERENCY_PROTOCOL_MASK \
|
||||
(GPU_U(0x1F) << AMBA_ENABLE_COHERENCY_PROTOCOL_SHIFT)
|
||||
#define AMBA_ENABLE_COHERENCY_PROTOCOL_GET(reg_val) \
|
||||
(((reg_val)&AMBA_ENABLE_COHERENCY_PROTOCOL_MASK) >> \
|
||||
AMBA_ENABLE_COHERENCY_PROTOCOL_SHIFT)
|
||||
#define AMBA_ENABLE_COHERENCY_PROTOCOL_SET(reg_val, value) \
|
||||
(((reg_val) & ~AMBA_ENABLE_COHERENCY_PROTOCOL_MASK) | \
|
||||
(((value) << AMBA_ENABLE_COHERENCY_PROTOCOL_SHIFT) & \
|
||||
AMBA_ENABLE_COHERENCY_PROTOCOL_MASK))
|
||||
/* AMBA_ENABLE_coherency_protocol values */
|
||||
#define AMBA_ENABLE_COHERENCY_PROTOCOL_ACE_LITE 0x0
|
||||
#define AMBA_ENABLE_COHERENCY_PROTOCOL_ACE 0x1
|
||||
#define AMBA_ENABLE_COHERENCY_PROTOCOL_NO_COHERENCY 0x1F
|
||||
/* End of AMBA_ENABLE_coherency_protocol values */
|
||||
#define AMBA_ENABLE_MEMORY_CACHE_SUPPORT_SHIFT GPU_U(5)
|
||||
#define AMBA_ENABLE_MEMORY_CACHE_SUPPORT_MASK \
|
||||
(GPU_U(0x1) << AMBA_ENABLE_MEMORY_CACHE_SUPPORT_SHIFT)
|
||||
#define AMBA_ENABLE_MEMORY_CACHE_SUPPORT_GET(reg_val) \
|
||||
(((reg_val)&AMBA_ENABLE_MEMORY_CACHE_SUPPORT_MASK) >> \
|
||||
AMBA_ENABLE_MEMORY_CACHE_SUPPORT_SHIFT)
|
||||
#define AMBA_ENABLE_MEMORY_CACHE_SUPPORT_SET(reg_val, value) \
|
||||
(((reg_val) & ~AMBA_ENABLE_MEMORY_CACHE_SUPPORT_MASK) | \
|
||||
(((value) << AMBA_ENABLE_MEMORY_CACHE_SUPPORT_SHIFT) & \
|
||||
AMBA_ENABLE_MEMORY_CACHE_SUPPORT_MASK))
|
||||
#define AMBA_ENABLE_INVALIDATE_HINT_SHIFT GPU_U(6)
|
||||
#define AMBA_ENABLE_INVALIDATE_HINT_MASK \
|
||||
(GPU_U(0x1) << AMBA_ENABLE_INVALIDATE_HINT_SHIFT)
|
||||
#define AMBA_ENABLE_INVALIDATE_HINT_GET(reg_val) \
|
||||
(((reg_val)&AMBA_ENABLE_INVALIDATE_HINT_MASK) >> \
|
||||
AMBA_ENABLE_INVALIDATE_HINT_SHIFT)
|
||||
#define AMBA_ENABLE_INVALIDATE_HINT_SET(reg_val, value) \
|
||||
(((reg_val) & ~AMBA_ENABLE_INVALIDATE_HINT_MASK) | \
|
||||
(((value) << AMBA_ENABLE_INVALIDATE_HINT_SHIFT) & \
|
||||
AMBA_ENABLE_INVALIDATE_HINT_MASK))
|
||||
|
||||
/* IDVS_GROUP register */
|
||||
#define IDVS_GROUP_SIZE_SHIFT (16)
|
||||
|
|
|
|||
|
|
@ -1,6 +1,6 @@
|
|||
# SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
|
||||
#
|
||||
# (C) COPYRIGHT 2012, 2020-2021 ARM Limited. All rights reserved.
|
||||
# (C) COPYRIGHT 2022 ARM Limited. All rights reserved.
|
||||
#
|
||||
# This program is free software and is provided to you under the terms of the
|
||||
# GNU General Public License version 2 as published by the Free Software
|
||||
|
|
@ -18,6 +18,20 @@
|
|||
#
|
||||
#
|
||||
|
||||
ifeq ($(CONFIG_DMA_BUF_LOCK), y)
|
||||
obj-m := dma_buf_lock.o
|
||||
bifrost_kbase-y += \
|
||||
hwcnt/mali_kbase_hwcnt.o \
|
||||
hwcnt/mali_kbase_hwcnt_gpu.o \
|
||||
hwcnt/mali_kbase_hwcnt_gpu_narrow.o \
|
||||
hwcnt/mali_kbase_hwcnt_types.o \
|
||||
hwcnt/mali_kbase_hwcnt_virtualizer.o \
|
||||
hwcnt/mali_kbase_hwcnt_watchdog_if_timer.o
|
||||
|
||||
ifeq ($(CONFIG_MALI_CSF_SUPPORT),y)
|
||||
bifrost_kbase-y += \
|
||||
hwcnt/backend/mali_kbase_hwcnt_backend_csf.o \
|
||||
hwcnt/backend/mali_kbase_hwcnt_backend_csf_if_fw.o
|
||||
else
|
||||
bifrost_kbase-y += \
|
||||
hwcnt/backend/mali_kbase_hwcnt_backend_jm.o \
|
||||
hwcnt/backend/mali_kbase_hwcnt_backend_jm_watchdog.o
|
||||
endif
|
||||
|
|
@ -1,7 +1,7 @@
|
|||
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
|
||||
/*
|
||||
*
|
||||
* (C) COPYRIGHT 2018, 2020-2021 ARM Limited. All rights reserved.
|
||||
* (C) COPYRIGHT 2018, 2020-2022 ARM Limited. All rights reserved.
|
||||
*
|
||||
* This program is free software and is provided to you under the terms of the
|
||||
* GNU General Public License version 2 as published by the Free Software
|
||||
|
|
@ -56,8 +56,8 @@ struct kbase_hwcnt_backend;
|
|||
*
|
||||
* Return: Non-NULL pointer to immutable hardware counter metadata.
|
||||
*/
|
||||
typedef const struct kbase_hwcnt_metadata *kbase_hwcnt_backend_metadata_fn(
|
||||
const struct kbase_hwcnt_backend_info *info);
|
||||
typedef const struct kbase_hwcnt_metadata *
|
||||
kbase_hwcnt_backend_metadata_fn(const struct kbase_hwcnt_backend_info *info);
|
||||
|
||||
/**
|
||||
* typedef kbase_hwcnt_backend_init_fn - Initialise a counter backend.
|
||||
|
|
@ -69,9 +69,8 @@ typedef const struct kbase_hwcnt_metadata *kbase_hwcnt_backend_metadata_fn(
|
|||
*
|
||||
* Return: 0 on success, else error code.
|
||||
*/
|
||||
typedef int kbase_hwcnt_backend_init_fn(
|
||||
const struct kbase_hwcnt_backend_info *info,
|
||||
struct kbase_hwcnt_backend **out_backend);
|
||||
typedef int kbase_hwcnt_backend_init_fn(const struct kbase_hwcnt_backend_info *info,
|
||||
struct kbase_hwcnt_backend **out_backend);
|
||||
|
||||
/**
|
||||
* typedef kbase_hwcnt_backend_term_fn - Terminate a counter backend.
|
||||
|
|
@ -86,8 +85,7 @@ typedef void kbase_hwcnt_backend_term_fn(struct kbase_hwcnt_backend *backend);
|
|||
*
|
||||
* Return: Backend timestamp in nanoseconds.
|
||||
*/
|
||||
typedef u64 kbase_hwcnt_backend_timestamp_ns_fn(
|
||||
struct kbase_hwcnt_backend *backend);
|
||||
typedef u64 kbase_hwcnt_backend_timestamp_ns_fn(struct kbase_hwcnt_backend *backend);
|
||||
|
||||
/**
|
||||
* typedef kbase_hwcnt_backend_dump_enable_fn - Start counter dumping with the
|
||||
|
|
@ -102,9 +100,8 @@ typedef u64 kbase_hwcnt_backend_timestamp_ns_fn(
|
|||
*
|
||||
* Return: 0 on success, else error code.
|
||||
*/
|
||||
typedef int kbase_hwcnt_backend_dump_enable_fn(
|
||||
struct kbase_hwcnt_backend *backend,
|
||||
const struct kbase_hwcnt_enable_map *enable_map);
|
||||
typedef int kbase_hwcnt_backend_dump_enable_fn(struct kbase_hwcnt_backend *backend,
|
||||
const struct kbase_hwcnt_enable_map *enable_map);
|
||||
|
||||
/**
|
||||
* typedef kbase_hwcnt_backend_dump_enable_nolock_fn - Start counter dumping
|
||||
|
|
@ -118,9 +115,9 @@ typedef int kbase_hwcnt_backend_dump_enable_fn(
|
|||
*
|
||||
* Return: 0 on success, else error code.
|
||||
*/
|
||||
typedef int kbase_hwcnt_backend_dump_enable_nolock_fn(
|
||||
struct kbase_hwcnt_backend *backend,
|
||||
const struct kbase_hwcnt_enable_map *enable_map);
|
||||
typedef int
|
||||
kbase_hwcnt_backend_dump_enable_nolock_fn(struct kbase_hwcnt_backend *backend,
|
||||
const struct kbase_hwcnt_enable_map *enable_map);
|
||||
|
||||
/**
|
||||
* typedef kbase_hwcnt_backend_dump_disable_fn - Disable counter dumping with
|
||||
|
|
@ -130,8 +127,7 @@ typedef int kbase_hwcnt_backend_dump_enable_nolock_fn(
|
|||
* If the backend is already disabled, does nothing.
|
||||
* Any undumped counter values since the last dump get will be lost.
|
||||
*/
|
||||
typedef void kbase_hwcnt_backend_dump_disable_fn(
|
||||
struct kbase_hwcnt_backend *backend);
|
||||
typedef void kbase_hwcnt_backend_dump_disable_fn(struct kbase_hwcnt_backend *backend);
|
||||
|
||||
/**
|
||||
* typedef kbase_hwcnt_backend_dump_clear_fn - Reset all the current undumped
|
||||
|
|
@ -142,8 +138,7 @@ typedef void kbase_hwcnt_backend_dump_disable_fn(
|
|||
*
|
||||
* Return: 0 on success, else error code.
|
||||
*/
|
||||
typedef int kbase_hwcnt_backend_dump_clear_fn(
|
||||
struct kbase_hwcnt_backend *backend);
|
||||
typedef int kbase_hwcnt_backend_dump_clear_fn(struct kbase_hwcnt_backend *backend);
|
||||
|
||||
/**
|
||||
* typedef kbase_hwcnt_backend_dump_request_fn - Request an asynchronous counter
|
||||
|
|
@ -157,9 +152,8 @@ typedef int kbase_hwcnt_backend_dump_clear_fn(
|
|||
*
|
||||
* Return: 0 on success, else error code.
|
||||
*/
|
||||
typedef int kbase_hwcnt_backend_dump_request_fn(
|
||||
struct kbase_hwcnt_backend *backend,
|
||||
u64 *dump_time_ns);
|
||||
typedef int kbase_hwcnt_backend_dump_request_fn(struct kbase_hwcnt_backend *backend,
|
||||
u64 *dump_time_ns);
|
||||
|
||||
/**
|
||||
* typedef kbase_hwcnt_backend_dump_wait_fn - Wait until the last requested
|
||||
|
|
@ -170,8 +164,7 @@ typedef int kbase_hwcnt_backend_dump_request_fn(
|
|||
*
|
||||
* Return: 0 on success, else error code.
|
||||
*/
|
||||
typedef int kbase_hwcnt_backend_dump_wait_fn(
|
||||
struct kbase_hwcnt_backend *backend);
|
||||
typedef int kbase_hwcnt_backend_dump_wait_fn(struct kbase_hwcnt_backend *backend);
|
||||
|
||||
/**
|
||||
* typedef kbase_hwcnt_backend_dump_get_fn - Copy or accumulate enable the
|
||||
|
|
@ -189,11 +182,10 @@ typedef int kbase_hwcnt_backend_dump_wait_fn(
|
|||
*
|
||||
* Return: 0 on success, else error code.
|
||||
*/
|
||||
typedef int kbase_hwcnt_backend_dump_get_fn(
|
||||
struct kbase_hwcnt_backend *backend,
|
||||
struct kbase_hwcnt_dump_buffer *dump_buffer,
|
||||
const struct kbase_hwcnt_enable_map *enable_map,
|
||||
bool accumulate);
|
||||
typedef int kbase_hwcnt_backend_dump_get_fn(struct kbase_hwcnt_backend *backend,
|
||||
struct kbase_hwcnt_dump_buffer *dump_buffer,
|
||||
const struct kbase_hwcnt_enable_map *enable_map,
|
||||
bool accumulate);
|
||||
|
||||
/**
|
||||
* struct kbase_hwcnt_backend_interface - Hardware counter backend virtual
|
||||
File diff suppressed because it is too large
Load Diff
|
|
@ -1,7 +1,7 @@
|
|||
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
|
||||
/*
|
||||
*
|
||||
* (C) COPYRIGHT 2021 ARM Limited. All rights reserved.
|
||||
* (C) COPYRIGHT 2021-2022 ARM Limited. All rights reserved.
|
||||
*
|
||||
* This program is free software and is provided to you under the terms of the
|
||||
* GNU General Public License version 2 as published by the Free Software
|
||||
|
|
@ -27,9 +27,9 @@
|
|||
#ifndef _KBASE_HWCNT_BACKEND_CSF_H_
|
||||
#define _KBASE_HWCNT_BACKEND_CSF_H_
|
||||
|
||||
#include "mali_kbase_hwcnt_backend.h"
|
||||
#include "mali_kbase_hwcnt_backend_csf_if.h"
|
||||
#include "mali_kbase_hwcnt_watchdog_if.h"
|
||||
#include "hwcnt/backend/mali_kbase_hwcnt_backend.h"
|
||||
#include "hwcnt/backend/mali_kbase_hwcnt_backend_csf_if.h"
|
||||
#include "hwcnt/mali_kbase_hwcnt_watchdog_if.h"
|
||||
|
||||
/**
|
||||
* kbase_hwcnt_backend_csf_create() - Create a CSF hardware counter backend
|
||||
|
|
@ -47,10 +47,9 @@
|
|||
*
|
||||
* Return: 0 on success, else error code.
|
||||
*/
|
||||
int kbase_hwcnt_backend_csf_create(
|
||||
struct kbase_hwcnt_backend_csf_if *csf_if, u32 ring_buf_cnt,
|
||||
struct kbase_hwcnt_watchdog_interface *watchdog_if,
|
||||
struct kbase_hwcnt_backend_interface *iface);
|
||||
int kbase_hwcnt_backend_csf_create(struct kbase_hwcnt_backend_csf_if *csf_if, u32 ring_buf_cnt,
|
||||
struct kbase_hwcnt_watchdog_interface *watchdog_if,
|
||||
struct kbase_hwcnt_backend_interface *iface);
|
||||
|
||||
/**
|
||||
* kbase_hwcnt_backend_csf_metadata_init() - Initialize the metadata for a CSF
|
||||
|
|
@ -58,16 +57,14 @@ int kbase_hwcnt_backend_csf_create(
|
|||
* @iface: Non-NULL pointer to backend interface structure
|
||||
* Return: 0 on success, else error code.
|
||||
*/
|
||||
int kbase_hwcnt_backend_csf_metadata_init(
|
||||
struct kbase_hwcnt_backend_interface *iface);
|
||||
int kbase_hwcnt_backend_csf_metadata_init(struct kbase_hwcnt_backend_interface *iface);
|
||||
|
||||
/**
|
||||
* kbase_hwcnt_backend_csf_metadata_term() - Terminate the metadata for a CSF
|
||||
* hardware counter backend.
|
||||
* @iface: Non-NULL pointer to backend interface structure.
|
||||
*/
|
||||
void kbase_hwcnt_backend_csf_metadata_term(
|
||||
struct kbase_hwcnt_backend_interface *iface);
|
||||
void kbase_hwcnt_backend_csf_metadata_term(struct kbase_hwcnt_backend_interface *iface);
|
||||
|
||||
/**
|
||||
* kbase_hwcnt_backend_csf_destroy() - Destroy a CSF hardware counter backend
|
||||
|
|
@ -77,8 +74,7 @@ void kbase_hwcnt_backend_csf_metadata_term(
|
|||
* Can be safely called on an all-zeroed interface, or on an already destroyed
|
||||
* interface.
|
||||
*/
|
||||
void kbase_hwcnt_backend_csf_destroy(
|
||||
struct kbase_hwcnt_backend_interface *iface);
|
||||
void kbase_hwcnt_backend_csf_destroy(struct kbase_hwcnt_backend_interface *iface);
|
||||
|
||||
/**
|
||||
* kbase_hwcnt_backend_csf_protm_entered() - CSF HWC backend function to receive
|
||||
|
|
@ -86,8 +82,7 @@ void kbase_hwcnt_backend_csf_destroy(
|
|||
* has been entered.
|
||||
* @iface: Non-NULL pointer to HWC backend interface.
|
||||
*/
|
||||
void kbase_hwcnt_backend_csf_protm_entered(
|
||||
struct kbase_hwcnt_backend_interface *iface);
|
||||
void kbase_hwcnt_backend_csf_protm_entered(struct kbase_hwcnt_backend_interface *iface);
|
||||
|
||||
/**
|
||||
* kbase_hwcnt_backend_csf_protm_exited() - CSF HWC backend function to receive
|
||||
|
|
@ -95,8 +90,7 @@ void kbase_hwcnt_backend_csf_protm_entered(
|
|||
* been exited.
|
||||
* @iface: Non-NULL pointer to HWC backend interface.
|
||||
*/
|
||||
void kbase_hwcnt_backend_csf_protm_exited(
|
||||
struct kbase_hwcnt_backend_interface *iface);
|
||||
void kbase_hwcnt_backend_csf_protm_exited(struct kbase_hwcnt_backend_interface *iface);
|
||||
|
||||
/**
|
||||
* kbase_hwcnt_backend_csf_on_unrecoverable_error() - CSF HWC backend function
|
||||
|
|
@ -108,8 +102,7 @@ void kbase_hwcnt_backend_csf_protm_exited(
|
|||
* with reset, or that may put HWC logic in state that could result in hang. For
|
||||
* example, on bus error, or when FW becomes unresponsive.
|
||||
*/
|
||||
void kbase_hwcnt_backend_csf_on_unrecoverable_error(
|
||||
struct kbase_hwcnt_backend_interface *iface);
|
||||
void kbase_hwcnt_backend_csf_on_unrecoverable_error(struct kbase_hwcnt_backend_interface *iface);
|
||||
|
||||
/**
|
||||
* kbase_hwcnt_backend_csf_on_before_reset() - CSF HWC backend function to be
|
||||
|
|
@ -119,16 +112,14 @@ void kbase_hwcnt_backend_csf_on_unrecoverable_error(
|
|||
* were in it.
|
||||
* @iface: Non-NULL pointer to HWC backend interface.
|
||||
*/
|
||||
void kbase_hwcnt_backend_csf_on_before_reset(
|
||||
struct kbase_hwcnt_backend_interface *iface);
|
||||
void kbase_hwcnt_backend_csf_on_before_reset(struct kbase_hwcnt_backend_interface *iface);
|
||||
|
||||
/**
|
||||
* kbase_hwcnt_backend_csf_on_prfcnt_sample() - CSF performance counter sample
|
||||
* complete interrupt handler.
|
||||
* @iface: Non-NULL pointer to HWC backend interface.
|
||||
*/
|
||||
void kbase_hwcnt_backend_csf_on_prfcnt_sample(
|
||||
struct kbase_hwcnt_backend_interface *iface);
|
||||
void kbase_hwcnt_backend_csf_on_prfcnt_sample(struct kbase_hwcnt_backend_interface *iface);
|
||||
|
||||
/**
|
||||
* kbase_hwcnt_backend_csf_on_prfcnt_threshold() - CSF performance counter
|
||||
|
|
@ -136,31 +127,27 @@ void kbase_hwcnt_backend_csf_on_prfcnt_sample(
|
|||
* interrupt handler.
|
||||
* @iface: Non-NULL pointer to HWC backend interface.
|
||||
*/
|
||||
void kbase_hwcnt_backend_csf_on_prfcnt_threshold(
|
||||
struct kbase_hwcnt_backend_interface *iface);
|
||||
void kbase_hwcnt_backend_csf_on_prfcnt_threshold(struct kbase_hwcnt_backend_interface *iface);
|
||||
|
||||
/**
|
||||
* kbase_hwcnt_backend_csf_on_prfcnt_overflow() - CSF performance counter buffer
|
||||
* overflow interrupt handler.
|
||||
* @iface: Non-NULL pointer to HWC backend interface.
|
||||
*/
|
||||
void kbase_hwcnt_backend_csf_on_prfcnt_overflow(
|
||||
struct kbase_hwcnt_backend_interface *iface);
|
||||
void kbase_hwcnt_backend_csf_on_prfcnt_overflow(struct kbase_hwcnt_backend_interface *iface);
|
||||
|
||||
/**
|
||||
* kbase_hwcnt_backend_csf_on_prfcnt_enable() - CSF performance counter enabled
|
||||
* interrupt handler.
|
||||
* @iface: Non-NULL pointer to HWC backend interface.
|
||||
*/
|
||||
void kbase_hwcnt_backend_csf_on_prfcnt_enable(
|
||||
struct kbase_hwcnt_backend_interface *iface);
|
||||
void kbase_hwcnt_backend_csf_on_prfcnt_enable(struct kbase_hwcnt_backend_interface *iface);
|
||||
|
||||
/**
|
||||
* kbase_hwcnt_backend_csf_on_prfcnt_disable() - CSF performance counter
|
||||
* disabled interrupt handler.
|
||||
* @iface: Non-NULL pointer to HWC backend interface.
|
||||
*/
|
||||
void kbase_hwcnt_backend_csf_on_prfcnt_disable(
|
||||
struct kbase_hwcnt_backend_interface *iface);
|
||||
void kbase_hwcnt_backend_csf_on_prfcnt_disable(struct kbase_hwcnt_backend_interface *iface);
|
||||
|
||||
#endif /* _KBASE_HWCNT_BACKEND_CSF_H_ */
|
||||
|
|
@ -85,8 +85,8 @@ struct kbase_hwcnt_backend_csf_if_prfcnt_info {
|
|||
* held.
|
||||
* @ctx: Non-NULL pointer to a CSF context.
|
||||
*/
|
||||
typedef void kbase_hwcnt_backend_csf_if_assert_lock_held_fn(
|
||||
struct kbase_hwcnt_backend_csf_if_ctx *ctx);
|
||||
typedef void
|
||||
kbase_hwcnt_backend_csf_if_assert_lock_held_fn(struct kbase_hwcnt_backend_csf_if_ctx *ctx);
|
||||
|
||||
/**
|
||||
* typedef kbase_hwcnt_backend_csf_if_lock_fn - Acquire backend spinlock.
|
||||
|
|
@ -95,9 +95,8 @@ typedef void kbase_hwcnt_backend_csf_if_assert_lock_held_fn(
|
|||
* @flags: Pointer to the memory location that would store the previous
|
||||
* interrupt state.
|
||||
*/
|
||||
typedef void kbase_hwcnt_backend_csf_if_lock_fn(
|
||||
struct kbase_hwcnt_backend_csf_if_ctx *ctx,
|
||||
unsigned long *flags);
|
||||
typedef void kbase_hwcnt_backend_csf_if_lock_fn(struct kbase_hwcnt_backend_csf_if_ctx *ctx,
|
||||
unsigned long *flags);
|
||||
|
||||
/**
|
||||
* typedef kbase_hwcnt_backend_csf_if_unlock_fn - Release backend spinlock.
|
||||
|
|
@ -106,9 +105,8 @@ typedef void kbase_hwcnt_backend_csf_if_lock_fn(
|
|||
* @flags: Previously stored interrupt state when Scheduler interrupt
|
||||
* spinlock was acquired.
|
||||
*/
|
||||
typedef void kbase_hwcnt_backend_csf_if_unlock_fn(
|
||||
struct kbase_hwcnt_backend_csf_if_ctx *ctx,
|
||||
unsigned long flags);
|
||||
typedef void kbase_hwcnt_backend_csf_if_unlock_fn(struct kbase_hwcnt_backend_csf_if_ctx *ctx,
|
||||
unsigned long flags);
|
||||
|
||||
/**
|
||||
* typedef kbase_hwcnt_backend_csf_if_get_prfcnt_info_fn - Get performance
|
||||
|
|
@ -137,10 +135,10 @@ typedef void kbase_hwcnt_backend_csf_if_get_prfcnt_info_fn(
|
|||
*
|
||||
* Return: 0 on success, else error code.
|
||||
*/
|
||||
typedef int kbase_hwcnt_backend_csf_if_ring_buf_alloc_fn(
|
||||
struct kbase_hwcnt_backend_csf_if_ctx *ctx, u32 buf_count,
|
||||
void **cpu_dump_base,
|
||||
struct kbase_hwcnt_backend_csf_if_ring_buf **ring_buf);
|
||||
typedef int
|
||||
kbase_hwcnt_backend_csf_if_ring_buf_alloc_fn(struct kbase_hwcnt_backend_csf_if_ctx *ctx,
|
||||
u32 buf_count, void **cpu_dump_base,
|
||||
struct kbase_hwcnt_backend_csf_if_ring_buf **ring_buf);
|
||||
|
||||
/**
|
||||
* typedef kbase_hwcnt_backend_csf_if_ring_buf_sync_fn - Sync HWC dump buffers
|
||||
|
|
@ -159,10 +157,10 @@ typedef int kbase_hwcnt_backend_csf_if_ring_buf_alloc_fn(
|
|||
* Flush cached HWC dump buffer data to ensure that all writes from GPU and CPU
|
||||
* are correctly observed.
|
||||
*/
|
||||
typedef void kbase_hwcnt_backend_csf_if_ring_buf_sync_fn(
|
||||
struct kbase_hwcnt_backend_csf_if_ctx *ctx,
|
||||
struct kbase_hwcnt_backend_csf_if_ring_buf *ring_buf,
|
||||
u32 buf_index_first, u32 buf_index_last, bool for_cpu);
|
||||
typedef void
|
||||
kbase_hwcnt_backend_csf_if_ring_buf_sync_fn(struct kbase_hwcnt_backend_csf_if_ctx *ctx,
|
||||
struct kbase_hwcnt_backend_csf_if_ring_buf *ring_buf,
|
||||
u32 buf_index_first, u32 buf_index_last, bool for_cpu);
|
||||
|
||||
/**
|
||||
* typedef kbase_hwcnt_backend_csf_if_ring_buf_free_fn - Free a ring buffer for
|
||||
|
|
@ -171,9 +169,9 @@ typedef void kbase_hwcnt_backend_csf_if_ring_buf_sync_fn(
|
|||
* @ctx: Non-NULL pointer to a CSF interface context.
|
||||
* @ring_buf: Non-NULL pointer to the ring buffer which to be freed.
|
||||
*/
|
||||
typedef void kbase_hwcnt_backend_csf_if_ring_buf_free_fn(
|
||||
struct kbase_hwcnt_backend_csf_if_ctx *ctx,
|
||||
struct kbase_hwcnt_backend_csf_if_ring_buf *ring_buf);
|
||||
typedef void
|
||||
kbase_hwcnt_backend_csf_if_ring_buf_free_fn(struct kbase_hwcnt_backend_csf_if_ctx *ctx,
|
||||
struct kbase_hwcnt_backend_csf_if_ring_buf *ring_buf);
|
||||
|
||||
/**
|
||||
* typedef kbase_hwcnt_backend_csf_if_timestamp_ns_fn - Get the current
|
||||
|
|
@ -183,8 +181,7 @@ typedef void kbase_hwcnt_backend_csf_if_ring_buf_free_fn(
|
|||
*
|
||||
* Return: CSF interface timestamp in nanoseconds.
|
||||
*/
|
||||
typedef u64 kbase_hwcnt_backend_csf_if_timestamp_ns_fn(
|
||||
struct kbase_hwcnt_backend_csf_if_ctx *ctx);
|
||||
typedef u64 kbase_hwcnt_backend_csf_if_timestamp_ns_fn(struct kbase_hwcnt_backend_csf_if_ctx *ctx);
|
||||
|
||||
/**
|
||||
* typedef kbase_hwcnt_backend_csf_if_dump_enable_fn - Setup and enable hardware
|
||||
|
|
@ -195,10 +192,10 @@ typedef u64 kbase_hwcnt_backend_csf_if_timestamp_ns_fn(
|
|||
*
|
||||
* Requires lock to be taken before calling.
|
||||
*/
|
||||
typedef void kbase_hwcnt_backend_csf_if_dump_enable_fn(
|
||||
struct kbase_hwcnt_backend_csf_if_ctx *ctx,
|
||||
struct kbase_hwcnt_backend_csf_if_ring_buf *ring_buf,
|
||||
struct kbase_hwcnt_backend_csf_if_enable *enable);
|
||||
typedef void
|
||||
kbase_hwcnt_backend_csf_if_dump_enable_fn(struct kbase_hwcnt_backend_csf_if_ctx *ctx,
|
||||
struct kbase_hwcnt_backend_csf_if_ring_buf *ring_buf,
|
||||
struct kbase_hwcnt_backend_csf_if_enable *enable);
|
||||
|
||||
/**
|
||||
* typedef kbase_hwcnt_backend_csf_if_dump_disable_fn - Disable hardware counter
|
||||
|
|
@ -207,8 +204,7 @@ typedef void kbase_hwcnt_backend_csf_if_dump_enable_fn(
|
|||
*
|
||||
* Requires lock to be taken before calling.
|
||||
*/
|
||||
typedef void kbase_hwcnt_backend_csf_if_dump_disable_fn(
|
||||
struct kbase_hwcnt_backend_csf_if_ctx *ctx);
|
||||
typedef void kbase_hwcnt_backend_csf_if_dump_disable_fn(struct kbase_hwcnt_backend_csf_if_ctx *ctx);
|
||||
|
||||
/**
|
||||
* typedef kbase_hwcnt_backend_csf_if_dump_request_fn - Request a HWC dump.
|
||||
|
|
@ -217,8 +213,7 @@ typedef void kbase_hwcnt_backend_csf_if_dump_disable_fn(
|
|||
*
|
||||
* Requires lock to be taken before calling.
|
||||
*/
|
||||
typedef void kbase_hwcnt_backend_csf_if_dump_request_fn(
|
||||
struct kbase_hwcnt_backend_csf_if_ctx *ctx);
|
||||
typedef void kbase_hwcnt_backend_csf_if_dump_request_fn(struct kbase_hwcnt_backend_csf_if_ctx *ctx);
|
||||
|
||||
/**
|
||||
* typedef kbase_hwcnt_backend_csf_if_get_indexes_fn - Get current extract and
|
||||
|
|
@ -231,9 +226,8 @@ typedef void kbase_hwcnt_backend_csf_if_dump_request_fn(
|
|||
*
|
||||
* Requires lock to be taken before calling.
|
||||
*/
|
||||
typedef void kbase_hwcnt_backend_csf_if_get_indexes_fn(
|
||||
struct kbase_hwcnt_backend_csf_if_ctx *ctx, u32 *extract_index,
|
||||
u32 *insert_index);
|
||||
typedef void kbase_hwcnt_backend_csf_if_get_indexes_fn(struct kbase_hwcnt_backend_csf_if_ctx *ctx,
|
||||
u32 *extract_index, u32 *insert_index);
|
||||
|
||||
/**
|
||||
* typedef kbase_hwcnt_backend_csf_if_set_extract_index_fn - Update the extract
|
||||
|
|
@ -245,8 +239,9 @@ typedef void kbase_hwcnt_backend_csf_if_get_indexes_fn(
|
|||
*
|
||||
* Requires lock to be taken before calling.
|
||||
*/
|
||||
typedef void kbase_hwcnt_backend_csf_if_set_extract_index_fn(
|
||||
struct kbase_hwcnt_backend_csf_if_ctx *ctx, u32 extract_index);
|
||||
typedef void
|
||||
kbase_hwcnt_backend_csf_if_set_extract_index_fn(struct kbase_hwcnt_backend_csf_if_ctx *ctx,
|
||||
u32 extract_index);
|
||||
|
||||
/**
|
||||
* typedef kbase_hwcnt_backend_csf_if_get_gpu_cycle_count_fn - Get the current
|
||||
|
|
@ -260,9 +255,9 @@ typedef void kbase_hwcnt_backend_csf_if_set_extract_index_fn(
|
|||
*
|
||||
* Requires lock to be taken before calling.
|
||||
*/
|
||||
typedef void kbase_hwcnt_backend_csf_if_get_gpu_cycle_count_fn(
|
||||
struct kbase_hwcnt_backend_csf_if_ctx *ctx, u64 *cycle_counts,
|
||||
u64 clk_enable_map);
|
||||
typedef void
|
||||
kbase_hwcnt_backend_csf_if_get_gpu_cycle_count_fn(struct kbase_hwcnt_backend_csf_if_ctx *ctx,
|
||||
u64 *cycle_counts, u64 clk_enable_map);
|
||||
|
||||
/**
|
||||
* struct kbase_hwcnt_backend_csf_if - Hardware counter backend CSF virtual
|
||||
|
|
@ -26,12 +26,12 @@
|
|||
#include <mali_kbase.h>
|
||||
#include <gpu/mali_kbase_gpu_regmap.h>
|
||||
#include <device/mali_kbase_device.h>
|
||||
#include "mali_kbase_hwcnt_gpu.h"
|
||||
#include "mali_kbase_hwcnt_types.h"
|
||||
#include "hwcnt/mali_kbase_hwcnt_gpu.h"
|
||||
#include "hwcnt/mali_kbase_hwcnt_types.h"
|
||||
#include <csf/mali_kbase_csf_registers.h>
|
||||
|
||||
#include "csf/mali_kbase_csf_firmware.h"
|
||||
#include "mali_kbase_hwcnt_backend_csf_if_fw.h"
|
||||
#include "hwcnt/backend/mali_kbase_hwcnt_backend_csf_if_fw.h"
|
||||
#include "mali_kbase_hwaccess_time.h"
|
||||
#include "backend/gpu/mali_kbase_clk_rate_trace_mgr.h"
|
||||
|
||||
|
|
@ -42,9 +42,6 @@
|
|||
#include <backend/gpu/mali_kbase_model_dummy.h>
|
||||
#endif /* CONFIG_MALI_BIFROST_NO_MALI */
|
||||
|
||||
/** The number of nanoseconds in a second. */
|
||||
#define NSECS_IN_SEC 1000000000ull /* ns */
|
||||
|
||||
/* Ring buffer virtual address start at 4GB */
|
||||
#define KBASE_HWC_CSF_RING_BUFFER_VA_START (1ull << 32)
|
||||
|
||||
|
|
@ -90,8 +87,8 @@ struct kbase_hwcnt_backend_csf_if_fw_ctx {
|
|||
struct kbase_ccswe ccswe_shader_cores;
|
||||
};
|
||||
|
||||
static void kbasep_hwcnt_backend_csf_if_fw_assert_lock_held(
|
||||
struct kbase_hwcnt_backend_csf_if_ctx *ctx)
|
||||
static void
|
||||
kbasep_hwcnt_backend_csf_if_fw_assert_lock_held(struct kbase_hwcnt_backend_csf_if_ctx *ctx)
|
||||
{
|
||||
struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx;
|
||||
struct kbase_device *kbdev;
|
||||
|
|
@ -104,9 +101,8 @@ static void kbasep_hwcnt_backend_csf_if_fw_assert_lock_held(
|
|||
kbase_csf_scheduler_spin_lock_assert_held(kbdev);
|
||||
}
|
||||
|
||||
static void
|
||||
kbasep_hwcnt_backend_csf_if_fw_lock(struct kbase_hwcnt_backend_csf_if_ctx *ctx,
|
||||
unsigned long *flags)
|
||||
static void kbasep_hwcnt_backend_csf_if_fw_lock(struct kbase_hwcnt_backend_csf_if_ctx *ctx,
|
||||
unsigned long *flags)
|
||||
{
|
||||
struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx;
|
||||
struct kbase_device *kbdev;
|
||||
|
|
@ -119,8 +115,8 @@ kbasep_hwcnt_backend_csf_if_fw_lock(struct kbase_hwcnt_backend_csf_if_ctx *ctx,
|
|||
kbase_csf_scheduler_spin_lock(kbdev, flags);
|
||||
}
|
||||
|
||||
static void kbasep_hwcnt_backend_csf_if_fw_unlock(
|
||||
struct kbase_hwcnt_backend_csf_if_ctx *ctx, unsigned long flags)
|
||||
static void kbasep_hwcnt_backend_csf_if_fw_unlock(struct kbase_hwcnt_backend_csf_if_ctx *ctx,
|
||||
unsigned long flags)
|
||||
{
|
||||
struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx;
|
||||
struct kbase_device *kbdev;
|
||||
|
|
@ -141,22 +137,19 @@ static void kbasep_hwcnt_backend_csf_if_fw_unlock(
|
|||
* @clk_index: Clock index
|
||||
* @clk_rate_hz: Clock frequency(hz)
|
||||
*/
|
||||
static void kbasep_hwcnt_backend_csf_if_fw_on_freq_change(
|
||||
struct kbase_clk_rate_listener *rate_listener, u32 clk_index,
|
||||
u32 clk_rate_hz)
|
||||
static void
|
||||
kbasep_hwcnt_backend_csf_if_fw_on_freq_change(struct kbase_clk_rate_listener *rate_listener,
|
||||
u32 clk_index, u32 clk_rate_hz)
|
||||
{
|
||||
struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx =
|
||||
container_of(rate_listener,
|
||||
struct kbase_hwcnt_backend_csf_if_fw_ctx,
|
||||
rate_listener);
|
||||
struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx = container_of(
|
||||
rate_listener, struct kbase_hwcnt_backend_csf_if_fw_ctx, rate_listener);
|
||||
u64 timestamp_ns;
|
||||
|
||||
if (clk_index != KBASE_CLOCK_DOMAIN_SHADER_CORES)
|
||||
return;
|
||||
|
||||
timestamp_ns = ktime_get_raw_ns();
|
||||
kbase_ccswe_freq_change(&fw_ctx->ccswe_shader_cores, timestamp_ns,
|
||||
clk_rate_hz);
|
||||
kbase_ccswe_freq_change(&fw_ctx->ccswe_shader_cores, timestamp_ns, clk_rate_hz);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -165,17 +158,16 @@ static void kbasep_hwcnt_backend_csf_if_fw_on_freq_change(
|
|||
* @fw_ctx: Non-NULL pointer to CSF firmware interface context.
|
||||
* @clk_enable_map: Non-NULL pointer to enable map specifying enabled counters.
|
||||
*/
|
||||
static void kbasep_hwcnt_backend_csf_if_fw_cc_enable(
|
||||
struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx, u64 clk_enable_map)
|
||||
static void
|
||||
kbasep_hwcnt_backend_csf_if_fw_cc_enable(struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx,
|
||||
u64 clk_enable_map)
|
||||
{
|
||||
struct kbase_device *kbdev = fw_ctx->kbdev;
|
||||
|
||||
if (kbase_hwcnt_clk_enable_map_enabled(
|
||||
clk_enable_map, KBASE_CLOCK_DOMAIN_SHADER_CORES)) {
|
||||
if (kbase_hwcnt_clk_enable_map_enabled(clk_enable_map, KBASE_CLOCK_DOMAIN_SHADER_CORES)) {
|
||||
/* software estimation for non-top clock domains */
|
||||
struct kbase_clk_rate_trace_manager *rtm = &kbdev->pm.clk_rtm;
|
||||
const struct kbase_clk_data *clk_data =
|
||||
rtm->clks[KBASE_CLOCK_DOMAIN_SHADER_CORES];
|
||||
const struct kbase_clk_data *clk_data = rtm->clks[KBASE_CLOCK_DOMAIN_SHADER_CORES];
|
||||
u32 cur_freq;
|
||||
unsigned long flags;
|
||||
u64 timestamp_ns;
|
||||
|
|
@ -186,11 +178,9 @@ static void kbasep_hwcnt_backend_csf_if_fw_cc_enable(
|
|||
|
||||
cur_freq = (u32)clk_data->clock_val;
|
||||
kbase_ccswe_reset(&fw_ctx->ccswe_shader_cores);
|
||||
kbase_ccswe_freq_change(&fw_ctx->ccswe_shader_cores,
|
||||
timestamp_ns, cur_freq);
|
||||
kbase_ccswe_freq_change(&fw_ctx->ccswe_shader_cores, timestamp_ns, cur_freq);
|
||||
|
||||
kbase_clk_rate_trace_manager_subscribe_no_lock(
|
||||
rtm, &fw_ctx->rate_listener);
|
||||
kbase_clk_rate_trace_manager_subscribe_no_lock(rtm, &fw_ctx->rate_listener);
|
||||
|
||||
spin_unlock_irqrestore(&rtm->lock, flags);
|
||||
}
|
||||
|
|
@ -203,17 +193,15 @@ static void kbasep_hwcnt_backend_csf_if_fw_cc_enable(
|
|||
*
|
||||
* @fw_ctx: Non-NULL pointer to CSF firmware interface context.
|
||||
*/
|
||||
static void kbasep_hwcnt_backend_csf_if_fw_cc_disable(
|
||||
struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx)
|
||||
static void
|
||||
kbasep_hwcnt_backend_csf_if_fw_cc_disable(struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx)
|
||||
{
|
||||
struct kbase_device *kbdev = fw_ctx->kbdev;
|
||||
struct kbase_clk_rate_trace_manager *rtm = &kbdev->pm.clk_rtm;
|
||||
u64 clk_enable_map = fw_ctx->clk_enable_map;
|
||||
|
||||
if (kbase_hwcnt_clk_enable_map_enabled(clk_enable_map,
|
||||
KBASE_CLOCK_DOMAIN_SHADER_CORES))
|
||||
kbase_clk_rate_trace_manager_unsubscribe(
|
||||
rtm, &fw_ctx->rate_listener);
|
||||
if (kbase_hwcnt_clk_enable_map_enabled(clk_enable_map, KBASE_CLOCK_DOMAIN_SHADER_CORES))
|
||||
kbase_clk_rate_trace_manager_unsubscribe(rtm, &fw_ctx->rate_listener);
|
||||
}
|
||||
|
||||
static void kbasep_hwcnt_backend_csf_if_fw_get_prfcnt_info(
|
||||
|
|
@ -244,8 +232,8 @@ static void kbasep_hwcnt_backend_csf_if_fw_get_prfcnt_info(
|
|||
u32 prfcnt_size;
|
||||
u32 prfcnt_hw_size;
|
||||
u32 prfcnt_fw_size;
|
||||
u32 prfcnt_block_size = KBASE_HWCNT_V5_DEFAULT_VALUES_PER_BLOCK *
|
||||
KBASE_HWCNT_VALUE_HW_BYTES;
|
||||
u32 prfcnt_block_size =
|
||||
KBASE_HWCNT_V5_DEFAULT_VALUES_PER_BLOCK * KBASE_HWCNT_VALUE_HW_BYTES;
|
||||
|
||||
WARN_ON(!ctx);
|
||||
WARN_ON(!prfcnt_info);
|
||||
|
|
@ -262,10 +250,9 @@ static void kbasep_hwcnt_backend_csf_if_fw_get_prfcnt_info(
|
|||
*/
|
||||
if ((kbdev->gpu_props.props.raw_props.gpu_id & GPU_ID2_PRODUCT_MODEL) >=
|
||||
GPU_ID2_PRODUCT_TTUX) {
|
||||
prfcnt_block_size =
|
||||
PRFCNT_FEATURES_COUNTER_BLOCK_SIZE_GET(kbase_reg_read(
|
||||
kbdev, GPU_CONTROL_REG(PRFCNT_FEATURES)))
|
||||
<< 8;
|
||||
prfcnt_block_size = PRFCNT_FEATURES_COUNTER_BLOCK_SIZE_GET(
|
||||
kbase_reg_read(kbdev, GPU_CONTROL_REG(PRFCNT_FEATURES)))
|
||||
<< 8;
|
||||
}
|
||||
|
||||
*prfcnt_info = (struct kbase_hwcnt_backend_csf_if_prfcnt_info){
|
||||
|
|
@ -280,17 +267,14 @@ static void kbasep_hwcnt_backend_csf_if_fw_get_prfcnt_info(
|
|||
};
|
||||
|
||||
/* Block size must be multiple of counter size. */
|
||||
WARN_ON((prfcnt_info->prfcnt_block_size % KBASE_HWCNT_VALUE_HW_BYTES) !=
|
||||
0);
|
||||
WARN_ON((prfcnt_info->prfcnt_block_size % KBASE_HWCNT_VALUE_HW_BYTES) != 0);
|
||||
/* Total size must be multiple of block size. */
|
||||
WARN_ON((prfcnt_info->dump_bytes % prfcnt_info->prfcnt_block_size) !=
|
||||
0);
|
||||
WARN_ON((prfcnt_info->dump_bytes % prfcnt_info->prfcnt_block_size) != 0);
|
||||
#endif
|
||||
}
|
||||
|
||||
static int kbasep_hwcnt_backend_csf_if_fw_ring_buf_alloc(
|
||||
struct kbase_hwcnt_backend_csf_if_ctx *ctx, u32 buf_count,
|
||||
void **cpu_dump_base,
|
||||
struct kbase_hwcnt_backend_csf_if_ctx *ctx, u32 buf_count, void **cpu_dump_base,
|
||||
struct kbase_hwcnt_backend_csf_if_ring_buf **out_ring_buf)
|
||||
{
|
||||
struct kbase_device *kbdev;
|
||||
|
|
@ -342,9 +326,8 @@ static int kbasep_hwcnt_backend_csf_if_fw_ring_buf_alloc(
|
|||
goto page_list_alloc_error;
|
||||
|
||||
/* Get physical page for the buffer */
|
||||
ret = kbase_mem_pool_alloc_pages(
|
||||
&kbdev->mem_pools.small[KBASE_MEM_GROUP_CSF_FW], num_pages,
|
||||
phys, false);
|
||||
ret = kbase_mem_pool_alloc_pages(&kbdev->mem_pools.small[KBASE_MEM_GROUP_CSF_FW], num_pages,
|
||||
phys, false);
|
||||
if (ret != num_pages)
|
||||
goto phys_mem_pool_alloc_error;
|
||||
|
||||
|
|
@ -360,9 +343,8 @@ static int kbasep_hwcnt_backend_csf_if_fw_ring_buf_alloc(
|
|||
KBASE_REG_MEMATTR_INDEX(AS_MEMATTR_INDEX_NON_CACHEABLE);
|
||||
|
||||
/* Update MMU table */
|
||||
ret = kbase_mmu_insert_pages(kbdev, &kbdev->csf.mcu_mmu,
|
||||
gpu_va_base >> PAGE_SHIFT, phys, num_pages,
|
||||
flags, MCU_AS_NR, KBASE_MEM_GROUP_CSF_FW,
|
||||
ret = kbase_mmu_insert_pages(kbdev, &kbdev->csf.mcu_mmu, gpu_va_base >> PAGE_SHIFT, phys,
|
||||
num_pages, flags, MCU_AS_NR, KBASE_MEM_GROUP_CSF_FW,
|
||||
mmu_sync_info);
|
||||
if (ret)
|
||||
goto mmu_insert_failed;
|
||||
|
|
@ -381,17 +363,15 @@ static int kbasep_hwcnt_backend_csf_if_fw_ring_buf_alloc(
|
|||
fw_ring_buf->as_nr = MCU_AS_NR;
|
||||
|
||||
*cpu_dump_base = fw_ring_buf->cpu_dump_base;
|
||||
*out_ring_buf =
|
||||
(struct kbase_hwcnt_backend_csf_if_ring_buf *)fw_ring_buf;
|
||||
*out_ring_buf = (struct kbase_hwcnt_backend_csf_if_ring_buf *)fw_ring_buf;
|
||||
|
||||
return 0;
|
||||
|
||||
mmu_insert_failed:
|
||||
vunmap(cpu_addr);
|
||||
vmap_error:
|
||||
kbase_mem_pool_free_pages(
|
||||
&kbdev->mem_pools.small[KBASE_MEM_GROUP_CSF_FW], num_pages,
|
||||
phys, false, false);
|
||||
kbase_mem_pool_free_pages(&kbdev->mem_pools.small[KBASE_MEM_GROUP_CSF_FW], num_pages, phys,
|
||||
false, false);
|
||||
phys_mem_pool_alloc_error:
|
||||
kfree(page_list);
|
||||
page_list_alloc_error:
|
||||
|
|
@ -401,10 +381,10 @@ static int kbasep_hwcnt_backend_csf_if_fw_ring_buf_alloc(
|
|||
return -ENOMEM;
|
||||
}
|
||||
|
||||
static void kbasep_hwcnt_backend_csf_if_fw_ring_buf_sync(
|
||||
struct kbase_hwcnt_backend_csf_if_ctx *ctx,
|
||||
struct kbase_hwcnt_backend_csf_if_ring_buf *ring_buf,
|
||||
u32 buf_index_first, u32 buf_index_last, bool for_cpu)
|
||||
static void
|
||||
kbasep_hwcnt_backend_csf_if_fw_ring_buf_sync(struct kbase_hwcnt_backend_csf_if_ctx *ctx,
|
||||
struct kbase_hwcnt_backend_csf_if_ring_buf *ring_buf,
|
||||
u32 buf_index_first, u32 buf_index_last, bool for_cpu)
|
||||
{
|
||||
struct kbase_hwcnt_backend_csf_if_fw_ring_buf *fw_ring_buf =
|
||||
(struct kbase_hwcnt_backend_csf_if_fw_ring_buf *)ring_buf;
|
||||
|
|
@ -435,8 +415,7 @@ static void kbasep_hwcnt_backend_csf_if_fw_ring_buf_sync(
|
|||
* inclusive at both ends so full flushes are not 0 -> 0.
|
||||
*/
|
||||
ring_buf_index_first = buf_index_first & (fw_ring_buf->buf_count - 1);
|
||||
ring_buf_index_last =
|
||||
(buf_index_last - 1) & (fw_ring_buf->buf_count - 1);
|
||||
ring_buf_index_last = (buf_index_last - 1) & (fw_ring_buf->buf_count - 1);
|
||||
|
||||
/* The start address is the offset of the first buffer. */
|
||||
start_address = fw_ctx->buf_bytes * ring_buf_index_first;
|
||||
|
|
@ -453,15 +432,11 @@ static void kbasep_hwcnt_backend_csf_if_fw_ring_buf_sync(
|
|||
struct page *pg = as_page(fw_ring_buf->phys[i]);
|
||||
|
||||
if (for_cpu) {
|
||||
kbase_sync_single_for_cpu(fw_ctx->kbdev,
|
||||
kbase_dma_addr(pg),
|
||||
PAGE_SIZE,
|
||||
DMA_BIDIRECTIONAL);
|
||||
kbase_sync_single_for_cpu(fw_ctx->kbdev, kbase_dma_addr(pg),
|
||||
PAGE_SIZE, DMA_BIDIRECTIONAL);
|
||||
} else {
|
||||
kbase_sync_single_for_device(fw_ctx->kbdev,
|
||||
kbase_dma_addr(pg),
|
||||
PAGE_SIZE,
|
||||
DMA_BIDIRECTIONAL);
|
||||
kbase_sync_single_for_device(fw_ctx->kbdev, kbase_dma_addr(pg),
|
||||
PAGE_SIZE, DMA_BIDIRECTIONAL);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -473,28 +448,24 @@ static void kbasep_hwcnt_backend_csf_if_fw_ring_buf_sync(
|
|||
struct page *pg = as_page(fw_ring_buf->phys[i]);
|
||||
|
||||
if (for_cpu) {
|
||||
kbase_sync_single_for_cpu(fw_ctx->kbdev,
|
||||
kbase_dma_addr(pg), PAGE_SIZE,
|
||||
kbase_sync_single_for_cpu(fw_ctx->kbdev, kbase_dma_addr(pg), PAGE_SIZE,
|
||||
DMA_BIDIRECTIONAL);
|
||||
} else {
|
||||
kbase_sync_single_for_device(fw_ctx->kbdev,
|
||||
kbase_dma_addr(pg),
|
||||
PAGE_SIZE,
|
||||
kbase_sync_single_for_device(fw_ctx->kbdev, kbase_dma_addr(pg), PAGE_SIZE,
|
||||
DMA_BIDIRECTIONAL);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static u64 kbasep_hwcnt_backend_csf_if_fw_timestamp_ns(
|
||||
struct kbase_hwcnt_backend_csf_if_ctx *ctx)
|
||||
static u64 kbasep_hwcnt_backend_csf_if_fw_timestamp_ns(struct kbase_hwcnt_backend_csf_if_ctx *ctx)
|
||||
{
|
||||
CSTD_UNUSED(ctx);
|
||||
return ktime_get_raw_ns();
|
||||
}
|
||||
|
||||
static void kbasep_hwcnt_backend_csf_if_fw_ring_buf_free(
|
||||
struct kbase_hwcnt_backend_csf_if_ctx *ctx,
|
||||
struct kbase_hwcnt_backend_csf_if_ring_buf *ring_buf)
|
||||
static void
|
||||
kbasep_hwcnt_backend_csf_if_fw_ring_buf_free(struct kbase_hwcnt_backend_csf_if_ctx *ctx,
|
||||
struct kbase_hwcnt_backend_csf_if_ring_buf *ring_buf)
|
||||
{
|
||||
struct kbase_hwcnt_backend_csf_if_fw_ring_buf *fw_ring_buf =
|
||||
(struct kbase_hwcnt_backend_csf_if_fw_ring_buf *)ring_buf;
|
||||
|
|
@ -513,10 +484,8 @@ static void kbasep_hwcnt_backend_csf_if_fw_ring_buf_free(
|
|||
|
||||
vunmap(fw_ring_buf->cpu_dump_base);
|
||||
|
||||
kbase_mem_pool_free_pages(
|
||||
&fw_ctx->kbdev->mem_pools.small[KBASE_MEM_GROUP_CSF_FW],
|
||||
fw_ring_buf->num_pages, fw_ring_buf->phys, false,
|
||||
false);
|
||||
kbase_mem_pool_free_pages(&fw_ctx->kbdev->mem_pools.small[KBASE_MEM_GROUP_CSF_FW],
|
||||
fw_ring_buf->num_pages, fw_ring_buf->phys, false, false);
|
||||
|
||||
kfree(fw_ring_buf->phys);
|
||||
|
||||
|
|
@ -524,10 +493,10 @@ static void kbasep_hwcnt_backend_csf_if_fw_ring_buf_free(
|
|||
}
|
||||
}
|
||||
|
||||
static void kbasep_hwcnt_backend_csf_if_fw_dump_enable(
|
||||
struct kbase_hwcnt_backend_csf_if_ctx *ctx,
|
||||
struct kbase_hwcnt_backend_csf_if_ring_buf *ring_buf,
|
||||
struct kbase_hwcnt_backend_csf_if_enable *enable)
|
||||
static void
|
||||
kbasep_hwcnt_backend_csf_if_fw_dump_enable(struct kbase_hwcnt_backend_csf_if_ctx *ctx,
|
||||
struct kbase_hwcnt_backend_csf_if_ring_buf *ring_buf,
|
||||
struct kbase_hwcnt_backend_csf_if_enable *enable)
|
||||
{
|
||||
u32 prfcnt_config;
|
||||
struct kbase_device *kbdev;
|
||||
|
|
@ -550,8 +519,7 @@ static void kbasep_hwcnt_backend_csf_if_fw_dump_enable(
|
|||
prfcnt_config = GLB_PRFCNT_CONFIG_SET_SELECT_SET(prfcnt_config, enable->counter_set);
|
||||
|
||||
/* Configure the ring buffer base address */
|
||||
kbase_csf_firmware_global_input(global_iface, GLB_PRFCNT_JASID,
|
||||
fw_ring_buf->as_nr);
|
||||
kbase_csf_firmware_global_input(global_iface, GLB_PRFCNT_JASID, fw_ring_buf->as_nr);
|
||||
kbase_csf_firmware_global_input(global_iface, GLB_PRFCNT_BASE_LO,
|
||||
fw_ring_buf->gpu_dump_base & U32_MAX);
|
||||
kbase_csf_firmware_global_input(global_iface, GLB_PRFCNT_BASE_HI,
|
||||
|
|
@ -561,38 +529,29 @@ static void kbasep_hwcnt_backend_csf_if_fw_dump_enable(
|
|||
kbase_csf_firmware_global_input(global_iface, GLB_PRFCNT_EXTRACT, 0);
|
||||
|
||||
/* Configure the enable bitmap */
|
||||
kbase_csf_firmware_global_input(global_iface, GLB_PRFCNT_CSF_EN,
|
||||
enable->fe_bm);
|
||||
kbase_csf_firmware_global_input(global_iface, GLB_PRFCNT_SHADER_EN,
|
||||
enable->shader_bm);
|
||||
kbase_csf_firmware_global_input(global_iface, GLB_PRFCNT_MMU_L2_EN,
|
||||
enable->mmu_l2_bm);
|
||||
kbase_csf_firmware_global_input(global_iface, GLB_PRFCNT_TILER_EN,
|
||||
enable->tiler_bm);
|
||||
kbase_csf_firmware_global_input(global_iface, GLB_PRFCNT_CSF_EN, enable->fe_bm);
|
||||
kbase_csf_firmware_global_input(global_iface, GLB_PRFCNT_SHADER_EN, enable->shader_bm);
|
||||
kbase_csf_firmware_global_input(global_iface, GLB_PRFCNT_MMU_L2_EN, enable->mmu_l2_bm);
|
||||
kbase_csf_firmware_global_input(global_iface, GLB_PRFCNT_TILER_EN, enable->tiler_bm);
|
||||
|
||||
/* Configure the HWC set and buffer size */
|
||||
kbase_csf_firmware_global_input(global_iface, GLB_PRFCNT_CONFIG,
|
||||
prfcnt_config);
|
||||
kbase_csf_firmware_global_input(global_iface, GLB_PRFCNT_CONFIG, prfcnt_config);
|
||||
|
||||
kbdev->csf.hwcnt.enable_pending = true;
|
||||
|
||||
/* Unmask the interrupts */
|
||||
kbase_csf_firmware_global_input_mask(
|
||||
global_iface, GLB_ACK_IRQ_MASK,
|
||||
GLB_ACK_IRQ_MASK_PRFCNT_SAMPLE_MASK,
|
||||
GLB_ACK_IRQ_MASK_PRFCNT_SAMPLE_MASK);
|
||||
kbase_csf_firmware_global_input_mask(
|
||||
global_iface, GLB_ACK_IRQ_MASK,
|
||||
GLB_ACK_IRQ_MASK_PRFCNT_THRESHOLD_MASK,
|
||||
GLB_ACK_IRQ_MASK_PRFCNT_THRESHOLD_MASK);
|
||||
kbase_csf_firmware_global_input_mask(
|
||||
global_iface, GLB_ACK_IRQ_MASK,
|
||||
GLB_ACK_IRQ_MASK_PRFCNT_OVERFLOW_MASK,
|
||||
GLB_ACK_IRQ_MASK_PRFCNT_OVERFLOW_MASK);
|
||||
kbase_csf_firmware_global_input_mask(
|
||||
global_iface, GLB_ACK_IRQ_MASK,
|
||||
GLB_ACK_IRQ_MASK_PRFCNT_ENABLE_MASK,
|
||||
GLB_ACK_IRQ_MASK_PRFCNT_ENABLE_MASK);
|
||||
kbase_csf_firmware_global_input_mask(global_iface, GLB_ACK_IRQ_MASK,
|
||||
GLB_ACK_IRQ_MASK_PRFCNT_SAMPLE_MASK,
|
||||
GLB_ACK_IRQ_MASK_PRFCNT_SAMPLE_MASK);
|
||||
kbase_csf_firmware_global_input_mask(global_iface, GLB_ACK_IRQ_MASK,
|
||||
GLB_ACK_IRQ_MASK_PRFCNT_THRESHOLD_MASK,
|
||||
GLB_ACK_IRQ_MASK_PRFCNT_THRESHOLD_MASK);
|
||||
kbase_csf_firmware_global_input_mask(global_iface, GLB_ACK_IRQ_MASK,
|
||||
GLB_ACK_IRQ_MASK_PRFCNT_OVERFLOW_MASK,
|
||||
GLB_ACK_IRQ_MASK_PRFCNT_OVERFLOW_MASK);
|
||||
kbase_csf_firmware_global_input_mask(global_iface, GLB_ACK_IRQ_MASK,
|
||||
GLB_ACK_IRQ_MASK_PRFCNT_ENABLE_MASK,
|
||||
GLB_ACK_IRQ_MASK_PRFCNT_ENABLE_MASK);
|
||||
|
||||
/* Enable the HWC */
|
||||
kbase_csf_firmware_global_input_mask(global_iface, GLB_REQ,
|
||||
|
|
@ -600,15 +559,12 @@ static void kbasep_hwcnt_backend_csf_if_fw_dump_enable(
|
|||
GLB_REQ_PRFCNT_ENABLE_MASK);
|
||||
kbase_csf_ring_doorbell(kbdev, CSF_KERNEL_DOORBELL_NR);
|
||||
|
||||
prfcnt_config = kbase_csf_firmware_global_input_read(global_iface,
|
||||
GLB_PRFCNT_CONFIG);
|
||||
prfcnt_config = kbase_csf_firmware_global_input_read(global_iface, GLB_PRFCNT_CONFIG);
|
||||
|
||||
kbasep_hwcnt_backend_csf_if_fw_cc_enable(fw_ctx,
|
||||
enable->clk_enable_map);
|
||||
kbasep_hwcnt_backend_csf_if_fw_cc_enable(fw_ctx, enable->clk_enable_map);
|
||||
}
|
||||
|
||||
static void kbasep_hwcnt_backend_csf_if_fw_dump_disable(
|
||||
struct kbase_hwcnt_backend_csf_if_ctx *ctx)
|
||||
static void kbasep_hwcnt_backend_csf_if_fw_dump_disable(struct kbase_hwcnt_backend_csf_if_ctx *ctx)
|
||||
{
|
||||
struct kbase_device *kbdev;
|
||||
struct kbase_csf_global_iface *global_iface;
|
||||
|
|
@ -623,20 +579,16 @@ static void kbasep_hwcnt_backend_csf_if_fw_dump_disable(
|
|||
|
||||
/* Disable the HWC */
|
||||
kbdev->csf.hwcnt.enable_pending = true;
|
||||
kbase_csf_firmware_global_input_mask(global_iface, GLB_REQ, 0,
|
||||
GLB_REQ_PRFCNT_ENABLE_MASK);
|
||||
kbase_csf_firmware_global_input_mask(global_iface, GLB_REQ, 0, GLB_REQ_PRFCNT_ENABLE_MASK);
|
||||
kbase_csf_ring_doorbell(kbdev, CSF_KERNEL_DOORBELL_NR);
|
||||
|
||||
/* mask the interrupts */
|
||||
kbase_csf_firmware_global_input_mask(
|
||||
global_iface, GLB_ACK_IRQ_MASK, 0,
|
||||
GLB_ACK_IRQ_MASK_PRFCNT_SAMPLE_MASK);
|
||||
kbase_csf_firmware_global_input_mask(
|
||||
global_iface, GLB_ACK_IRQ_MASK, 0,
|
||||
GLB_ACK_IRQ_MASK_PRFCNT_THRESHOLD_MASK);
|
||||
kbase_csf_firmware_global_input_mask(
|
||||
global_iface, GLB_ACK_IRQ_MASK, 0,
|
||||
GLB_ACK_IRQ_MASK_PRFCNT_OVERFLOW_MASK);
|
||||
kbase_csf_firmware_global_input_mask(global_iface, GLB_ACK_IRQ_MASK, 0,
|
||||
GLB_ACK_IRQ_MASK_PRFCNT_SAMPLE_MASK);
|
||||
kbase_csf_firmware_global_input_mask(global_iface, GLB_ACK_IRQ_MASK, 0,
|
||||
GLB_ACK_IRQ_MASK_PRFCNT_THRESHOLD_MASK);
|
||||
kbase_csf_firmware_global_input_mask(global_iface, GLB_ACK_IRQ_MASK, 0,
|
||||
GLB_ACK_IRQ_MASK_PRFCNT_OVERFLOW_MASK);
|
||||
|
||||
/* In case we have a previous request in flight when the disable
|
||||
* happens.
|
||||
|
|
@ -646,8 +598,7 @@ static void kbasep_hwcnt_backend_csf_if_fw_dump_disable(
|
|||
kbasep_hwcnt_backend_csf_if_fw_cc_disable(fw_ctx);
|
||||
}
|
||||
|
||||
static void kbasep_hwcnt_backend_csf_if_fw_dump_request(
|
||||
struct kbase_hwcnt_backend_csf_if_ctx *ctx)
|
||||
static void kbasep_hwcnt_backend_csf_if_fw_dump_request(struct kbase_hwcnt_backend_csf_if_ctx *ctx)
|
||||
{
|
||||
u32 glb_req;
|
||||
struct kbase_device *kbdev;
|
||||
|
|
@ -670,9 +621,8 @@ static void kbasep_hwcnt_backend_csf_if_fw_dump_request(
|
|||
kbase_csf_ring_doorbell(kbdev, CSF_KERNEL_DOORBELL_NR);
|
||||
}
|
||||
|
||||
static void kbasep_hwcnt_backend_csf_if_fw_get_indexes(
|
||||
struct kbase_hwcnt_backend_csf_if_ctx *ctx, u32 *extract_index,
|
||||
u32 *insert_index)
|
||||
static void kbasep_hwcnt_backend_csf_if_fw_get_indexes(struct kbase_hwcnt_backend_csf_if_ctx *ctx,
|
||||
u32 *extract_index, u32 *insert_index)
|
||||
{
|
||||
struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx =
|
||||
(struct kbase_hwcnt_backend_csf_if_fw_ctx *)ctx;
|
||||
|
|
@ -682,14 +632,15 @@ static void kbasep_hwcnt_backend_csf_if_fw_get_indexes(
|
|||
WARN_ON(!insert_index);
|
||||
kbasep_hwcnt_backend_csf_if_fw_assert_lock_held(ctx);
|
||||
|
||||
*extract_index = kbase_csf_firmware_global_input_read(
|
||||
&fw_ctx->kbdev->csf.global_iface, GLB_PRFCNT_EXTRACT);
|
||||
*insert_index = kbase_csf_firmware_global_output(
|
||||
&fw_ctx->kbdev->csf.global_iface, GLB_PRFCNT_INSERT);
|
||||
*extract_index = kbase_csf_firmware_global_input_read(&fw_ctx->kbdev->csf.global_iface,
|
||||
GLB_PRFCNT_EXTRACT);
|
||||
*insert_index = kbase_csf_firmware_global_output(&fw_ctx->kbdev->csf.global_iface,
|
||||
GLB_PRFCNT_INSERT);
|
||||
}
|
||||
|
||||
static void kbasep_hwcnt_backend_csf_if_fw_set_extract_index(
|
||||
struct kbase_hwcnt_backend_csf_if_ctx *ctx, u32 extract_idx)
|
||||
static void
|
||||
kbasep_hwcnt_backend_csf_if_fw_set_extract_index(struct kbase_hwcnt_backend_csf_if_ctx *ctx,
|
||||
u32 extract_idx)
|
||||
{
|
||||
struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx =
|
||||
(struct kbase_hwcnt_backend_csf_if_fw_ctx *)ctx;
|
||||
|
|
@ -700,13 +651,13 @@ static void kbasep_hwcnt_backend_csf_if_fw_set_extract_index(
|
|||
/* Set the raw extract index to release the buffer back to the ring
|
||||
* buffer.
|
||||
*/
|
||||
kbase_csf_firmware_global_input(&fw_ctx->kbdev->csf.global_iface,
|
||||
GLB_PRFCNT_EXTRACT, extract_idx);
|
||||
kbase_csf_firmware_global_input(&fw_ctx->kbdev->csf.global_iface, GLB_PRFCNT_EXTRACT,
|
||||
extract_idx);
|
||||
}
|
||||
|
||||
static void kbasep_hwcnt_backend_csf_if_fw_get_gpu_cycle_count(
|
||||
struct kbase_hwcnt_backend_csf_if_ctx *ctx, u64 *cycle_counts,
|
||||
u64 clk_enable_map)
|
||||
static void
|
||||
kbasep_hwcnt_backend_csf_if_fw_get_gpu_cycle_count(struct kbase_hwcnt_backend_csf_if_ctx *ctx,
|
||||
u64 *cycle_counts, u64 clk_enable_map)
|
||||
{
|
||||
struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx =
|
||||
(struct kbase_hwcnt_backend_csf_if_fw_ctx *)ctx;
|
||||
|
|
@ -723,12 +674,12 @@ static void kbasep_hwcnt_backend_csf_if_fw_get_gpu_cycle_count(
|
|||
|
||||
if (clk == KBASE_CLOCK_DOMAIN_TOP) {
|
||||
/* Read cycle count for top clock domain. */
|
||||
kbase_backend_get_gpu_time_norequest(
|
||||
fw_ctx->kbdev, &cycle_counts[clk], NULL, NULL);
|
||||
kbase_backend_get_gpu_time_norequest(fw_ctx->kbdev, &cycle_counts[clk],
|
||||
NULL, NULL);
|
||||
} else {
|
||||
/* Estimate cycle count for non-top clock domain. */
|
||||
cycle_counts[clk] = kbase_ccswe_cycle_at(
|
||||
&fw_ctx->ccswe_shader_cores, timestamp_ns);
|
||||
cycle_counts[clk] =
|
||||
kbase_ccswe_cycle_at(&fw_ctx->ccswe_shader_cores, timestamp_ns);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -738,8 +689,8 @@ static void kbasep_hwcnt_backend_csf_if_fw_get_gpu_cycle_count(
|
|||
*
|
||||
* @fw_ctx: Pointer to context to destroy.
|
||||
*/
|
||||
static void kbasep_hwcnt_backend_csf_if_fw_ctx_destroy(
|
||||
struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx)
|
||||
static void
|
||||
kbasep_hwcnt_backend_csf_if_fw_ctx_destroy(struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx)
|
||||
{
|
||||
if (!fw_ctx)
|
||||
return;
|
||||
|
|
@ -754,9 +705,9 @@ static void kbasep_hwcnt_backend_csf_if_fw_ctx_destroy(
|
|||
* @out_ctx: Non-NULL pointer to where info is stored on success.
|
||||
* Return: 0 on success, else error code.
|
||||
*/
|
||||
static int kbasep_hwcnt_backend_csf_if_fw_ctx_create(
|
||||
struct kbase_device *kbdev,
|
||||
struct kbase_hwcnt_backend_csf_if_fw_ctx **out_ctx)
|
||||
static int
|
||||
kbasep_hwcnt_backend_csf_if_fw_ctx_create(struct kbase_device *kbdev,
|
||||
struct kbase_hwcnt_backend_csf_if_fw_ctx **out_ctx)
|
||||
{
|
||||
u8 clk;
|
||||
int errcode = -ENOMEM;
|
||||
|
|
@ -780,8 +731,7 @@ static int kbasep_hwcnt_backend_csf_if_fw_ctx_create(
|
|||
|
||||
ctx->clk_enable_map = 0;
|
||||
kbase_ccswe_init(&ctx->ccswe_shader_cores);
|
||||
ctx->rate_listener.notify =
|
||||
kbasep_hwcnt_backend_csf_if_fw_on_freq_change;
|
||||
ctx->rate_listener.notify = kbasep_hwcnt_backend_csf_if_fw_on_freq_change;
|
||||
|
||||
*out_ctx = ctx;
|
||||
|
||||
|
|
@ -791,8 +741,7 @@ static int kbasep_hwcnt_backend_csf_if_fw_ctx_create(
|
|||
return errcode;
|
||||
}
|
||||
|
||||
void kbase_hwcnt_backend_csf_if_fw_destroy(
|
||||
struct kbase_hwcnt_backend_csf_if *if_fw)
|
||||
void kbase_hwcnt_backend_csf_if_fw_destroy(struct kbase_hwcnt_backend_csf_if *if_fw)
|
||||
{
|
||||
if (!if_fw)
|
||||
return;
|
||||
|
|
@ -802,8 +751,8 @@ void kbase_hwcnt_backend_csf_if_fw_destroy(
|
|||
memset(if_fw, 0, sizeof(*if_fw));
|
||||
}
|
||||
|
||||
int kbase_hwcnt_backend_csf_if_fw_create(
|
||||
struct kbase_device *kbdev, struct kbase_hwcnt_backend_csf_if *if_fw)
|
||||
int kbase_hwcnt_backend_csf_if_fw_create(struct kbase_device *kbdev,
|
||||
struct kbase_hwcnt_backend_csf_if *if_fw)
|
||||
{
|
||||
int errcode;
|
||||
struct kbase_hwcnt_backend_csf_if_fw_ctx *ctx = NULL;
|
||||
|
|
@ -816,8 +765,7 @@ int kbase_hwcnt_backend_csf_if_fw_create(
|
|||
return errcode;
|
||||
|
||||
if_fw->ctx = (struct kbase_hwcnt_backend_csf_if_ctx *)ctx;
|
||||
if_fw->assert_lock_held =
|
||||
kbasep_hwcnt_backend_csf_if_fw_assert_lock_held;
|
||||
if_fw->assert_lock_held = kbasep_hwcnt_backend_csf_if_fw_assert_lock_held;
|
||||
if_fw->lock = kbasep_hwcnt_backend_csf_if_fw_lock;
|
||||
if_fw->unlock = kbasep_hwcnt_backend_csf_if_fw_unlock;
|
||||
if_fw->get_prfcnt_info = kbasep_hwcnt_backend_csf_if_fw_get_prfcnt_info;
|
||||
|
|
@ -828,11 +776,9 @@ int kbase_hwcnt_backend_csf_if_fw_create(
|
|||
if_fw->dump_enable = kbasep_hwcnt_backend_csf_if_fw_dump_enable;
|
||||
if_fw->dump_disable = kbasep_hwcnt_backend_csf_if_fw_dump_disable;
|
||||
if_fw->dump_request = kbasep_hwcnt_backend_csf_if_fw_dump_request;
|
||||
if_fw->get_gpu_cycle_count =
|
||||
kbasep_hwcnt_backend_csf_if_fw_get_gpu_cycle_count;
|
||||
if_fw->get_gpu_cycle_count = kbasep_hwcnt_backend_csf_if_fw_get_gpu_cycle_count;
|
||||
if_fw->get_indexes = kbasep_hwcnt_backend_csf_if_fw_get_indexes;
|
||||
if_fw->set_extract_index =
|
||||
kbasep_hwcnt_backend_csf_if_fw_set_extract_index;
|
||||
if_fw->set_extract_index = kbasep_hwcnt_backend_csf_if_fw_set_extract_index;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
|
@ -1,7 +1,7 @@
|
|||
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
|
||||
/*
|
||||
*
|
||||
* (C) COPYRIGHT 2021 ARM Limited. All rights reserved.
|
||||
* (C) COPYRIGHT 2021-2022 ARM Limited. All rights reserved.
|
||||
*
|
||||
* This program is free software and is provided to you under the terms of the
|
||||
* GNU General Public License version 2 as published by the Free Software
|
||||
|
|
@ -26,7 +26,7 @@
|
|||
#ifndef _KBASE_HWCNT_BACKEND_CSF_IF_FW_H_
|
||||
#define _KBASE_HWCNT_BACKEND_CSF_IF_FW_H_
|
||||
|
||||
#include "mali_kbase_hwcnt_backend_csf_if.h"
|
||||
#include "hwcnt/backend/mali_kbase_hwcnt_backend_csf_if.h"
|
||||
|
||||
/**
|
||||
* kbase_hwcnt_backend_csf_if_fw_create() - Create a firmware CSF interface
|
||||
|
|
@ -36,15 +36,14 @@
|
|||
* creation success.
|
||||
* Return: 0 on success, else error code.
|
||||
*/
|
||||
int kbase_hwcnt_backend_csf_if_fw_create(
|
||||
struct kbase_device *kbdev, struct kbase_hwcnt_backend_csf_if *if_fw);
|
||||
int kbase_hwcnt_backend_csf_if_fw_create(struct kbase_device *kbdev,
|
||||
struct kbase_hwcnt_backend_csf_if *if_fw);
|
||||
|
||||
/**
|
||||
* kbase_hwcnt_backend_csf_if_fw_destroy() - Destroy a firmware CSF interface of
|
||||
* hardware counter backend.
|
||||
* @if_fw: Pointer to a CSF interface to destroy.
|
||||
*/
|
||||
void kbase_hwcnt_backend_csf_if_fw_destroy(
|
||||
struct kbase_hwcnt_backend_csf_if *if_fw);
|
||||
void kbase_hwcnt_backend_csf_if_fw_destroy(struct kbase_hwcnt_backend_csf_if *if_fw);
|
||||
|
||||
#endif /* _KBASE_HWCNT_BACKEND_CSF_IF_FW_H_ */
|
||||
|
|
@ -19,9 +19,9 @@
|
|||
*
|
||||
*/
|
||||
|
||||
#include "mali_kbase_hwcnt_backend_jm.h"
|
||||
#include "mali_kbase_hwcnt_gpu.h"
|
||||
#include "mali_kbase_hwcnt_types.h"
|
||||
#include "hwcnt/backend/mali_kbase_hwcnt_backend_jm.h"
|
||||
#include "hwcnt/mali_kbase_hwcnt_gpu.h"
|
||||
#include "hwcnt/mali_kbase_hwcnt_types.h"
|
||||
#include "mali_kbase.h"
|
||||
#include "backend/gpu/mali_kbase_pm_ca.h"
|
||||
#include "mali_kbase_hwaccess_instr.h"
|
||||
|
|
@ -136,9 +136,8 @@ struct kbase_hwcnt_backend_jm {
|
|||
*
|
||||
* Return: 0 on success, else error code.
|
||||
*/
|
||||
static int
|
||||
kbasep_hwcnt_backend_jm_gpu_info_init(struct kbase_device *kbdev,
|
||||
struct kbase_hwcnt_gpu_info *info)
|
||||
static int kbasep_hwcnt_backend_jm_gpu_info_init(struct kbase_device *kbdev,
|
||||
struct kbase_hwcnt_gpu_info *info)
|
||||
{
|
||||
size_t clk;
|
||||
|
||||
|
|
@ -153,13 +152,11 @@ kbasep_hwcnt_backend_jm_gpu_info_init(struct kbase_device *kbdev,
|
|||
{
|
||||
const struct base_gpu_props *props = &kbdev->gpu_props.props;
|
||||
const size_t l2_count = props->l2_props.num_l2_slices;
|
||||
const size_t core_mask =
|
||||
props->coherency_info.group[0].core_mask;
|
||||
const size_t core_mask = props->coherency_info.group[0].core_mask;
|
||||
|
||||
info->l2_count = l2_count;
|
||||
info->core_mask = core_mask;
|
||||
info->prfcnt_values_per_block =
|
||||
KBASE_HWCNT_V5_DEFAULT_VALUES_PER_BLOCK;
|
||||
info->prfcnt_values_per_block = KBASE_HWCNT_V5_DEFAULT_VALUES_PER_BLOCK;
|
||||
}
|
||||
#endif /* CONFIG_MALI_BIFROST_NO_MALI */
|
||||
|
||||
|
|
@ -173,9 +170,8 @@ kbasep_hwcnt_backend_jm_gpu_info_init(struct kbase_device *kbdev,
|
|||
return 0;
|
||||
}
|
||||
|
||||
static void kbasep_hwcnt_backend_jm_init_layout(
|
||||
const struct kbase_hwcnt_gpu_info *gpu_info,
|
||||
struct kbase_hwcnt_jm_physical_layout *phys_layout)
|
||||
static void kbasep_hwcnt_backend_jm_init_layout(const struct kbase_hwcnt_gpu_info *gpu_info,
|
||||
struct kbase_hwcnt_jm_physical_layout *phys_layout)
|
||||
{
|
||||
u8 shader_core_cnt;
|
||||
|
||||
|
|
@ -189,32 +185,29 @@ static void kbasep_hwcnt_backend_jm_init_layout(
|
|||
.tiler_cnt = KBASE_HWCNT_V5_TILER_BLOCK_COUNT,
|
||||
.mmu_l2_cnt = gpu_info->l2_count,
|
||||
.shader_cnt = shader_core_cnt,
|
||||
.block_cnt = KBASE_HWCNT_V5_FE_BLOCK_COUNT +
|
||||
KBASE_HWCNT_V5_TILER_BLOCK_COUNT +
|
||||
.block_cnt = KBASE_HWCNT_V5_FE_BLOCK_COUNT + KBASE_HWCNT_V5_TILER_BLOCK_COUNT +
|
||||
gpu_info->l2_count + shader_core_cnt,
|
||||
.shader_avail_mask = gpu_info->core_mask,
|
||||
.headers_per_block = KBASE_HWCNT_V5_HEADERS_PER_BLOCK,
|
||||
.values_per_block = gpu_info->prfcnt_values_per_block,
|
||||
.counters_per_block = gpu_info->prfcnt_values_per_block -
|
||||
KBASE_HWCNT_V5_HEADERS_PER_BLOCK,
|
||||
.counters_per_block =
|
||||
gpu_info->prfcnt_values_per_block - KBASE_HWCNT_V5_HEADERS_PER_BLOCK,
|
||||
.enable_mask_offset = KBASE_HWCNT_V5_PRFCNT_EN_HEADER,
|
||||
};
|
||||
}
|
||||
|
||||
static void kbasep_hwcnt_backend_jm_dump_sample(
|
||||
const struct kbase_hwcnt_backend_jm *const backend_jm)
|
||||
static void
|
||||
kbasep_hwcnt_backend_jm_dump_sample(const struct kbase_hwcnt_backend_jm *const backend_jm)
|
||||
{
|
||||
size_t block_idx;
|
||||
const u32 *new_sample_buf = backend_jm->cpu_dump_va;
|
||||
const u32 *new_block = new_sample_buf;
|
||||
u64 *dst_buf = backend_jm->to_user_buf;
|
||||
u64 *dst_block = dst_buf;
|
||||
const size_t values_per_block =
|
||||
backend_jm->phys_layout.values_per_block;
|
||||
const size_t values_per_block = backend_jm->phys_layout.values_per_block;
|
||||
const size_t dump_bytes = backend_jm->info->dump_bytes;
|
||||
|
||||
for (block_idx = 0; block_idx < backend_jm->phys_layout.block_cnt;
|
||||
block_idx++) {
|
||||
for (block_idx = 0; block_idx < backend_jm->phys_layout.block_cnt; block_idx++) {
|
||||
size_t ctr_idx;
|
||||
|
||||
for (ctr_idx = 0; ctr_idx < values_per_block; ctr_idx++)
|
||||
|
|
@ -224,10 +217,8 @@ static void kbasep_hwcnt_backend_jm_dump_sample(
|
|||
dst_block += values_per_block;
|
||||
}
|
||||
|
||||
WARN_ON(new_block !=
|
||||
new_sample_buf + (dump_bytes / KBASE_HWCNT_VALUE_HW_BYTES));
|
||||
WARN_ON(dst_block !=
|
||||
dst_buf + (dump_bytes / KBASE_HWCNT_VALUE_HW_BYTES));
|
||||
WARN_ON(new_block != new_sample_buf + (dump_bytes / KBASE_HWCNT_VALUE_HW_BYTES));
|
||||
WARN_ON(dst_block != dst_buf + (dump_bytes / KBASE_HWCNT_VALUE_HW_BYTES));
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -237,21 +228,18 @@ static void kbasep_hwcnt_backend_jm_dump_sample(
|
|||
* @clk_index: Clock index
|
||||
* @clk_rate_hz: Clock frequency(hz)
|
||||
*/
|
||||
static void kbasep_hwcnt_backend_jm_on_freq_change(
|
||||
struct kbase_clk_rate_listener *rate_listener,
|
||||
u32 clk_index,
|
||||
u32 clk_rate_hz)
|
||||
static void kbasep_hwcnt_backend_jm_on_freq_change(struct kbase_clk_rate_listener *rate_listener,
|
||||
u32 clk_index, u32 clk_rate_hz)
|
||||
{
|
||||
struct kbase_hwcnt_backend_jm *backend_jm = container_of(
|
||||
rate_listener, struct kbase_hwcnt_backend_jm, rate_listener);
|
||||
struct kbase_hwcnt_backend_jm *backend_jm =
|
||||
container_of(rate_listener, struct kbase_hwcnt_backend_jm, rate_listener);
|
||||
u64 timestamp_ns;
|
||||
|
||||
if (clk_index != KBASE_CLOCK_DOMAIN_SHADER_CORES)
|
||||
return;
|
||||
|
||||
timestamp_ns = ktime_get_raw_ns();
|
||||
kbase_ccswe_freq_change(
|
||||
&backend_jm->ccswe_shader_cores, timestamp_ns, clk_rate_hz);
|
||||
kbase_ccswe_freq_change(&backend_jm->ccswe_shader_cores, timestamp_ns, clk_rate_hz);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -261,53 +249,42 @@ static void kbasep_hwcnt_backend_jm_on_freq_change(
|
|||
* @enable_map: Non-NULL pointer to enable map specifying enabled counters.
|
||||
* @timestamp_ns: Timestamp(ns) when HWCNT were enabled.
|
||||
*/
|
||||
static void kbasep_hwcnt_backend_jm_cc_enable(
|
||||
struct kbase_hwcnt_backend_jm *backend_jm,
|
||||
const struct kbase_hwcnt_enable_map *enable_map,
|
||||
u64 timestamp_ns)
|
||||
static void kbasep_hwcnt_backend_jm_cc_enable(struct kbase_hwcnt_backend_jm *backend_jm,
|
||||
const struct kbase_hwcnt_enable_map *enable_map,
|
||||
u64 timestamp_ns)
|
||||
{
|
||||
struct kbase_device *kbdev = backend_jm->kctx->kbdev;
|
||||
u64 clk_enable_map = enable_map->clk_enable_map;
|
||||
u64 cycle_count;
|
||||
|
||||
if (kbase_hwcnt_clk_enable_map_enabled(
|
||||
clk_enable_map, KBASE_CLOCK_DOMAIN_TOP)) {
|
||||
if (kbase_hwcnt_clk_enable_map_enabled(clk_enable_map, KBASE_CLOCK_DOMAIN_TOP)) {
|
||||
/* turn on the cycle counter */
|
||||
kbase_pm_request_gpu_cycle_counter_l2_is_on(kbdev);
|
||||
/* Read cycle count for top clock domain. */
|
||||
kbase_backend_get_gpu_time_norequest(
|
||||
kbdev, &cycle_count, NULL, NULL);
|
||||
kbase_backend_get_gpu_time_norequest(kbdev, &cycle_count, NULL, NULL);
|
||||
|
||||
backend_jm->prev_cycle_count[KBASE_CLOCK_DOMAIN_TOP] =
|
||||
cycle_count;
|
||||
backend_jm->prev_cycle_count[KBASE_CLOCK_DOMAIN_TOP] = cycle_count;
|
||||
}
|
||||
|
||||
if (kbase_hwcnt_clk_enable_map_enabled(
|
||||
clk_enable_map, KBASE_CLOCK_DOMAIN_SHADER_CORES)) {
|
||||
if (kbase_hwcnt_clk_enable_map_enabled(clk_enable_map, KBASE_CLOCK_DOMAIN_SHADER_CORES)) {
|
||||
/* software estimation for non-top clock domains */
|
||||
struct kbase_clk_rate_trace_manager *rtm = &kbdev->pm.clk_rtm;
|
||||
const struct kbase_clk_data *clk_data =
|
||||
rtm->clks[KBASE_CLOCK_DOMAIN_SHADER_CORES];
|
||||
const struct kbase_clk_data *clk_data = rtm->clks[KBASE_CLOCK_DOMAIN_SHADER_CORES];
|
||||
u32 cur_freq;
|
||||
unsigned long flags;
|
||||
|
||||
spin_lock_irqsave(&rtm->lock, flags);
|
||||
|
||||
cur_freq = (u32) clk_data->clock_val;
|
||||
cur_freq = (u32)clk_data->clock_val;
|
||||
kbase_ccswe_reset(&backend_jm->ccswe_shader_cores);
|
||||
kbase_ccswe_freq_change(
|
||||
&backend_jm->ccswe_shader_cores,
|
||||
timestamp_ns,
|
||||
cur_freq);
|
||||
kbase_ccswe_freq_change(&backend_jm->ccswe_shader_cores, timestamp_ns, cur_freq);
|
||||
|
||||
kbase_clk_rate_trace_manager_subscribe_no_lock(
|
||||
rtm, &backend_jm->rate_listener);
|
||||
kbase_clk_rate_trace_manager_subscribe_no_lock(rtm, &backend_jm->rate_listener);
|
||||
|
||||
spin_unlock_irqrestore(&rtm->lock, flags);
|
||||
|
||||
/* ccswe was reset. The estimated cycle is zero. */
|
||||
backend_jm->prev_cycle_count[
|
||||
KBASE_CLOCK_DOMAIN_SHADER_CORES] = 0;
|
||||
backend_jm->prev_cycle_count[KBASE_CLOCK_DOMAIN_SHADER_CORES] = 0;
|
||||
}
|
||||
|
||||
/* Keep clk_enable_map for dump_request. */
|
||||
|
|
@ -319,28 +296,22 @@ static void kbasep_hwcnt_backend_jm_cc_enable(
|
|||
*
|
||||
* @backend_jm: Non-NULL pointer to backend.
|
||||
*/
|
||||
static void kbasep_hwcnt_backend_jm_cc_disable(
|
||||
struct kbase_hwcnt_backend_jm *backend_jm)
|
||||
static void kbasep_hwcnt_backend_jm_cc_disable(struct kbase_hwcnt_backend_jm *backend_jm)
|
||||
{
|
||||
struct kbase_device *kbdev = backend_jm->kctx->kbdev;
|
||||
struct kbase_clk_rate_trace_manager *rtm = &kbdev->pm.clk_rtm;
|
||||
u64 clk_enable_map = backend_jm->clk_enable_map;
|
||||
|
||||
if (kbase_hwcnt_clk_enable_map_enabled(
|
||||
clk_enable_map, KBASE_CLOCK_DOMAIN_TOP)) {
|
||||
if (kbase_hwcnt_clk_enable_map_enabled(clk_enable_map, KBASE_CLOCK_DOMAIN_TOP)) {
|
||||
/* turn off the cycle counter */
|
||||
kbase_pm_release_gpu_cycle_counter(kbdev);
|
||||
}
|
||||
|
||||
if (kbase_hwcnt_clk_enable_map_enabled(
|
||||
clk_enable_map, KBASE_CLOCK_DOMAIN_SHADER_CORES)) {
|
||||
|
||||
kbase_clk_rate_trace_manager_unsubscribe(
|
||||
rtm, &backend_jm->rate_listener);
|
||||
if (kbase_hwcnt_clk_enable_map_enabled(clk_enable_map, KBASE_CLOCK_DOMAIN_SHADER_CORES)) {
|
||||
kbase_clk_rate_trace_manager_unsubscribe(rtm, &backend_jm->rate_listener);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* kbasep_hwcnt_gpu_update_curr_config() - Update the destination buffer with
|
||||
* current config information.
|
||||
|
|
@ -356,38 +327,33 @@ static void kbasep_hwcnt_backend_jm_cc_disable(
|
|||
*
|
||||
* Return: 0 on success, else error code.
|
||||
*/
|
||||
static int kbasep_hwcnt_gpu_update_curr_config(
|
||||
struct kbase_device *kbdev,
|
||||
struct kbase_hwcnt_curr_config *curr_config)
|
||||
static int kbasep_hwcnt_gpu_update_curr_config(struct kbase_device *kbdev,
|
||||
struct kbase_hwcnt_curr_config *curr_config)
|
||||
{
|
||||
if (WARN_ON(!kbdev) || WARN_ON(!curr_config))
|
||||
return -EINVAL;
|
||||
|
||||
lockdep_assert_held(&kbdev->hwaccess_lock);
|
||||
|
||||
curr_config->num_l2_slices =
|
||||
kbdev->gpu_props.curr_config.l2_slices;
|
||||
curr_config->shader_present =
|
||||
kbdev->gpu_props.curr_config.shader_present;
|
||||
curr_config->num_l2_slices = kbdev->gpu_props.curr_config.l2_slices;
|
||||
curr_config->shader_present = kbdev->gpu_props.curr_config.shader_present;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* JM backend implementation of kbase_hwcnt_backend_timestamp_ns_fn */
|
||||
static u64 kbasep_hwcnt_backend_jm_timestamp_ns(
|
||||
struct kbase_hwcnt_backend *backend)
|
||||
static u64 kbasep_hwcnt_backend_jm_timestamp_ns(struct kbase_hwcnt_backend *backend)
|
||||
{
|
||||
(void)backend;
|
||||
return ktime_get_raw_ns();
|
||||
}
|
||||
|
||||
/* JM backend implementation of kbase_hwcnt_backend_dump_enable_nolock_fn */
|
||||
static int kbasep_hwcnt_backend_jm_dump_enable_nolock(
|
||||
struct kbase_hwcnt_backend *backend,
|
||||
const struct kbase_hwcnt_enable_map *enable_map)
|
||||
static int
|
||||
kbasep_hwcnt_backend_jm_dump_enable_nolock(struct kbase_hwcnt_backend *backend,
|
||||
const struct kbase_hwcnt_enable_map *enable_map)
|
||||
{
|
||||
int errcode;
|
||||
struct kbase_hwcnt_backend_jm *backend_jm =
|
||||
(struct kbase_hwcnt_backend_jm *)backend;
|
||||
struct kbase_hwcnt_backend_jm *backend_jm = (struct kbase_hwcnt_backend_jm *)backend;
|
||||
struct kbase_context *kctx;
|
||||
struct kbase_device *kbdev;
|
||||
struct kbase_hwcnt_physical_enable_map phys_enable_map;
|
||||
|
|
@ -406,8 +372,7 @@ static int kbasep_hwcnt_backend_jm_dump_enable_nolock(
|
|||
|
||||
kbase_hwcnt_gpu_enable_map_to_physical(&phys_enable_map, enable_map);
|
||||
|
||||
kbase_hwcnt_gpu_set_to_physical(&phys_counter_set,
|
||||
backend_jm->info->counter_set);
|
||||
kbase_hwcnt_gpu_set_to_physical(&phys_counter_set, backend_jm->info->counter_set);
|
||||
|
||||
enable.fe_bm = phys_enable_map.fe_bm;
|
||||
enable.shader_bm = phys_enable_map.shader_bm;
|
||||
|
|
@ -425,8 +390,7 @@ static int kbasep_hwcnt_backend_jm_dump_enable_nolock(
|
|||
timestamp_ns = kbasep_hwcnt_backend_jm_timestamp_ns(backend);
|
||||
|
||||
/* Update the current configuration information. */
|
||||
errcode = kbasep_hwcnt_gpu_update_curr_config(kbdev,
|
||||
&backend_jm->curr_config);
|
||||
errcode = kbasep_hwcnt_gpu_update_curr_config(kbdev, &backend_jm->curr_config);
|
||||
if (errcode)
|
||||
goto error;
|
||||
|
||||
|
|
@ -446,14 +410,12 @@ static int kbasep_hwcnt_backend_jm_dump_enable_nolock(
|
|||
}
|
||||
|
||||
/* JM backend implementation of kbase_hwcnt_backend_dump_enable_fn */
|
||||
static int kbasep_hwcnt_backend_jm_dump_enable(
|
||||
struct kbase_hwcnt_backend *backend,
|
||||
const struct kbase_hwcnt_enable_map *enable_map)
|
||||
static int kbasep_hwcnt_backend_jm_dump_enable(struct kbase_hwcnt_backend *backend,
|
||||
const struct kbase_hwcnt_enable_map *enable_map)
|
||||
{
|
||||
unsigned long flags;
|
||||
int errcode;
|
||||
struct kbase_hwcnt_backend_jm *backend_jm =
|
||||
(struct kbase_hwcnt_backend_jm *)backend;
|
||||
struct kbase_hwcnt_backend_jm *backend_jm = (struct kbase_hwcnt_backend_jm *)backend;
|
||||
struct kbase_device *kbdev;
|
||||
|
||||
if (!backend_jm)
|
||||
|
|
@ -463,8 +425,7 @@ static int kbasep_hwcnt_backend_jm_dump_enable(
|
|||
|
||||
spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
|
||||
|
||||
errcode = kbasep_hwcnt_backend_jm_dump_enable_nolock(
|
||||
backend, enable_map);
|
||||
errcode = kbasep_hwcnt_backend_jm_dump_enable_nolock(backend, enable_map);
|
||||
|
||||
spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
|
||||
|
||||
|
|
@ -472,12 +433,10 @@ static int kbasep_hwcnt_backend_jm_dump_enable(
|
|||
}
|
||||
|
||||
/* JM backend implementation of kbase_hwcnt_backend_dump_disable_fn */
|
||||
static void kbasep_hwcnt_backend_jm_dump_disable(
|
||||
struct kbase_hwcnt_backend *backend)
|
||||
static void kbasep_hwcnt_backend_jm_dump_disable(struct kbase_hwcnt_backend *backend)
|
||||
{
|
||||
int errcode;
|
||||
struct kbase_hwcnt_backend_jm *backend_jm =
|
||||
(struct kbase_hwcnt_backend_jm *)backend;
|
||||
struct kbase_hwcnt_backend_jm *backend_jm = (struct kbase_hwcnt_backend_jm *)backend;
|
||||
|
||||
if (WARN_ON(!backend_jm) || !backend_jm->enabled)
|
||||
return;
|
||||
|
|
@ -491,11 +450,9 @@ static void kbasep_hwcnt_backend_jm_dump_disable(
|
|||
}
|
||||
|
||||
/* JM backend implementation of kbase_hwcnt_backend_dump_clear_fn */
|
||||
static int kbasep_hwcnt_backend_jm_dump_clear(
|
||||
struct kbase_hwcnt_backend *backend)
|
||||
static int kbasep_hwcnt_backend_jm_dump_clear(struct kbase_hwcnt_backend *backend)
|
||||
{
|
||||
struct kbase_hwcnt_backend_jm *backend_jm =
|
||||
(struct kbase_hwcnt_backend_jm *)backend;
|
||||
struct kbase_hwcnt_backend_jm *backend_jm = (struct kbase_hwcnt_backend_jm *)backend;
|
||||
|
||||
if (!backend_jm || !backend_jm->enabled)
|
||||
return -EINVAL;
|
||||
|
|
@ -504,12 +461,10 @@ static int kbasep_hwcnt_backend_jm_dump_clear(
|
|||
}
|
||||
|
||||
/* JM backend implementation of kbase_hwcnt_backend_dump_request_fn */
|
||||
static int kbasep_hwcnt_backend_jm_dump_request(
|
||||
struct kbase_hwcnt_backend *backend,
|
||||
u64 *dump_time_ns)
|
||||
static int kbasep_hwcnt_backend_jm_dump_request(struct kbase_hwcnt_backend *backend,
|
||||
u64 *dump_time_ns)
|
||||
{
|
||||
struct kbase_hwcnt_backend_jm *backend_jm =
|
||||
(struct kbase_hwcnt_backend_jm *)backend;
|
||||
struct kbase_hwcnt_backend_jm *backend_jm = (struct kbase_hwcnt_backend_jm *)backend;
|
||||
struct kbase_device *kbdev;
|
||||
const struct kbase_hwcnt_metadata *metadata;
|
||||
u64 current_cycle_count;
|
||||
|
|
@ -528,28 +483,25 @@ static int kbasep_hwcnt_backend_jm_dump_request(
|
|||
*dump_time_ns = kbasep_hwcnt_backend_jm_timestamp_ns(backend);
|
||||
ret = kbase_instr_hwcnt_request_dump(backend_jm->kctx);
|
||||
|
||||
kbase_hwcnt_metadata_for_each_clock(metadata, clk) {
|
||||
if (!kbase_hwcnt_clk_enable_map_enabled(
|
||||
backend_jm->clk_enable_map, clk))
|
||||
kbase_hwcnt_metadata_for_each_clock(metadata, clk)
|
||||
{
|
||||
if (!kbase_hwcnt_clk_enable_map_enabled(backend_jm->clk_enable_map, clk))
|
||||
continue;
|
||||
|
||||
if (clk == KBASE_CLOCK_DOMAIN_TOP) {
|
||||
/* Read cycle count for top clock domain. */
|
||||
kbase_backend_get_gpu_time_norequest(
|
||||
kbdev, ¤t_cycle_count,
|
||||
NULL, NULL);
|
||||
kbase_backend_get_gpu_time_norequest(kbdev, ¤t_cycle_count,
|
||||
NULL, NULL);
|
||||
} else {
|
||||
/*
|
||||
* Estimate cycle count for non-top clock
|
||||
* domain.
|
||||
*/
|
||||
current_cycle_count = kbase_ccswe_cycle_at(
|
||||
&backend_jm->ccswe_shader_cores,
|
||||
*dump_time_ns);
|
||||
&backend_jm->ccswe_shader_cores, *dump_time_ns);
|
||||
}
|
||||
backend_jm->cycle_count_elapsed[clk] =
|
||||
current_cycle_count -
|
||||
backend_jm->prev_cycle_count[clk];
|
||||
current_cycle_count - backend_jm->prev_cycle_count[clk];
|
||||
|
||||
/*
|
||||
* Keep the current cycle count for later calculation.
|
||||
|
|
@ -563,11 +515,9 @@ static int kbasep_hwcnt_backend_jm_dump_request(
|
|||
}
|
||||
|
||||
/* JM backend implementation of kbase_hwcnt_backend_dump_wait_fn */
|
||||
static int kbasep_hwcnt_backend_jm_dump_wait(
|
||||
struct kbase_hwcnt_backend *backend)
|
||||
static int kbasep_hwcnt_backend_jm_dump_wait(struct kbase_hwcnt_backend *backend)
|
||||
{
|
||||
struct kbase_hwcnt_backend_jm *backend_jm =
|
||||
(struct kbase_hwcnt_backend_jm *)backend;
|
||||
struct kbase_hwcnt_backend_jm *backend_jm = (struct kbase_hwcnt_backend_jm *)backend;
|
||||
|
||||
if (!backend_jm || !backend_jm->enabled)
|
||||
return -EINVAL;
|
||||
|
|
@ -576,14 +526,12 @@ static int kbasep_hwcnt_backend_jm_dump_wait(
|
|||
}
|
||||
|
||||
/* JM backend implementation of kbase_hwcnt_backend_dump_get_fn */
|
||||
static int kbasep_hwcnt_backend_jm_dump_get(
|
||||
struct kbase_hwcnt_backend *backend,
|
||||
struct kbase_hwcnt_dump_buffer *dst,
|
||||
const struct kbase_hwcnt_enable_map *dst_enable_map,
|
||||
bool accumulate)
|
||||
static int kbasep_hwcnt_backend_jm_dump_get(struct kbase_hwcnt_backend *backend,
|
||||
struct kbase_hwcnt_dump_buffer *dst,
|
||||
const struct kbase_hwcnt_enable_map *dst_enable_map,
|
||||
bool accumulate)
|
||||
{
|
||||
struct kbase_hwcnt_backend_jm *backend_jm =
|
||||
(struct kbase_hwcnt_backend_jm *)backend;
|
||||
struct kbase_hwcnt_backend_jm *backend_jm = (struct kbase_hwcnt_backend_jm *)backend;
|
||||
size_t clk;
|
||||
#if IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI)
|
||||
struct kbase_device *kbdev;
|
||||
|
|
@ -597,16 +545,15 @@ static int kbasep_hwcnt_backend_jm_dump_get(
|
|||
return -EINVAL;
|
||||
|
||||
/* Invalidate the kernel buffer before reading from it. */
|
||||
kbase_sync_mem_regions(
|
||||
backend_jm->kctx, backend_jm->vmap, KBASE_SYNC_TO_CPU);
|
||||
kbase_sync_mem_regions(backend_jm->kctx, backend_jm->vmap, KBASE_SYNC_TO_CPU);
|
||||
|
||||
/* Dump sample to the internal 64-bit user buffer. */
|
||||
kbasep_hwcnt_backend_jm_dump_sample(backend_jm);
|
||||
|
||||
/* Extract elapsed cycle count for each clock domain if enabled. */
|
||||
kbase_hwcnt_metadata_for_each_clock(dst_enable_map->metadata, clk) {
|
||||
if (!kbase_hwcnt_clk_enable_map_enabled(
|
||||
dst_enable_map->clk_enable_map, clk))
|
||||
kbase_hwcnt_metadata_for_each_clock(dst_enable_map->metadata, clk)
|
||||
{
|
||||
if (!kbase_hwcnt_clk_enable_map_enabled(dst_enable_map->clk_enable_map, clk))
|
||||
continue;
|
||||
|
||||
/* Reset the counter to zero if accumulation is off. */
|
||||
|
|
@ -621,17 +568,16 @@ static int kbasep_hwcnt_backend_jm_dump_get(
|
|||
spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
|
||||
|
||||
/* Update the current configuration information. */
|
||||
errcode = kbasep_hwcnt_gpu_update_curr_config(kbdev,
|
||||
&backend_jm->curr_config);
|
||||
errcode = kbasep_hwcnt_gpu_update_curr_config(kbdev, &backend_jm->curr_config);
|
||||
|
||||
spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
|
||||
|
||||
if (errcode)
|
||||
return errcode;
|
||||
#endif /* CONFIG_MALI_BIFROST_NO_MALI */
|
||||
return kbase_hwcnt_jm_dump_get(dst, backend_jm->to_user_buf,
|
||||
dst_enable_map, backend_jm->pm_core_mask,
|
||||
&backend_jm->curr_config, accumulate);
|
||||
return kbase_hwcnt_jm_dump_get(dst, backend_jm->to_user_buf, dst_enable_map,
|
||||
backend_jm->pm_core_mask, &backend_jm->curr_config,
|
||||
accumulate);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -643,10 +589,8 @@ static int kbasep_hwcnt_backend_jm_dump_get(
|
|||
*
|
||||
* Return: 0 on success, else error code.
|
||||
*/
|
||||
static int kbasep_hwcnt_backend_jm_dump_alloc(
|
||||
const struct kbase_hwcnt_backend_jm_info *info,
|
||||
struct kbase_context *kctx,
|
||||
u64 *gpu_dump_va)
|
||||
static int kbasep_hwcnt_backend_jm_dump_alloc(const struct kbase_hwcnt_backend_jm_info *info,
|
||||
struct kbase_context *kctx, u64 *gpu_dump_va)
|
||||
{
|
||||
struct kbase_va_region *reg;
|
||||
u64 flags;
|
||||
|
|
@ -661,16 +605,12 @@ static int kbasep_hwcnt_backend_jm_dump_alloc(
|
|||
WARN_ON(!kctx);
|
||||
WARN_ON(!gpu_dump_va);
|
||||
|
||||
flags = BASE_MEM_PROT_CPU_RD |
|
||||
BASE_MEM_PROT_GPU_WR |
|
||||
BASEP_MEM_PERMANENT_KERNEL_MAPPING |
|
||||
BASE_MEM_CACHED_CPU |
|
||||
BASE_MEM_UNCACHED_GPU;
|
||||
flags = BASE_MEM_PROT_CPU_RD | BASE_MEM_PROT_GPU_WR | BASEP_MEM_PERMANENT_KERNEL_MAPPING |
|
||||
BASE_MEM_CACHED_CPU | BASE_MEM_UNCACHED_GPU;
|
||||
|
||||
nr_pages = PFN_UP(info->dump_bytes);
|
||||
|
||||
reg = kbase_mem_alloc(kctx, nr_pages, nr_pages, 0, &flags, gpu_dump_va,
|
||||
mmu_sync_info);
|
||||
reg = kbase_mem_alloc(kctx, nr_pages, nr_pages, 0, &flags, gpu_dump_va, mmu_sync_info);
|
||||
|
||||
if (!reg)
|
||||
return -ENOMEM;
|
||||
|
|
@ -683,9 +623,7 @@ static int kbasep_hwcnt_backend_jm_dump_alloc(
|
|||
* @kctx: Non-NULL pointer to kbase context.
|
||||
* @gpu_dump_va: GPU dump buffer virtual address.
|
||||
*/
|
||||
static void kbasep_hwcnt_backend_jm_dump_free(
|
||||
struct kbase_context *kctx,
|
||||
u64 gpu_dump_va)
|
||||
static void kbasep_hwcnt_backend_jm_dump_free(struct kbase_context *kctx, u64 gpu_dump_va)
|
||||
{
|
||||
WARN_ON(!kctx);
|
||||
if (gpu_dump_va)
|
||||
|
|
@ -698,8 +636,7 @@ static void kbasep_hwcnt_backend_jm_dump_free(
|
|||
*
|
||||
* Can be safely called on a backend in any state of partial construction.
|
||||
*/
|
||||
static void kbasep_hwcnt_backend_jm_destroy(
|
||||
struct kbase_hwcnt_backend_jm *backend)
|
||||
static void kbasep_hwcnt_backend_jm_destroy(struct kbase_hwcnt_backend_jm *backend)
|
||||
{
|
||||
if (!backend)
|
||||
return;
|
||||
|
|
@ -712,8 +649,7 @@ static void kbasep_hwcnt_backend_jm_destroy(
|
|||
kbase_phy_alloc_mapping_put(kctx, backend->vmap);
|
||||
|
||||
if (backend->gpu_dump_va)
|
||||
kbasep_hwcnt_backend_jm_dump_free(
|
||||
kctx, backend->gpu_dump_va);
|
||||
kbasep_hwcnt_backend_jm_dump_free(kctx, backend->gpu_dump_va);
|
||||
|
||||
kbasep_js_release_privileged_ctx(kbdev, kctx);
|
||||
kbase_destroy_context(kctx);
|
||||
|
|
@ -731,9 +667,8 @@ static void kbasep_hwcnt_backend_jm_destroy(
|
|||
*
|
||||
* Return: 0 on success, else error code.
|
||||
*/
|
||||
static int kbasep_hwcnt_backend_jm_create(
|
||||
const struct kbase_hwcnt_backend_jm_info *info,
|
||||
struct kbase_hwcnt_backend_jm **out_backend)
|
||||
static int kbasep_hwcnt_backend_jm_create(const struct kbase_hwcnt_backend_jm_info *info,
|
||||
struct kbase_hwcnt_backend_jm **out_backend)
|
||||
{
|
||||
int errcode;
|
||||
struct kbase_device *kbdev;
|
||||
|
|
@ -749,28 +684,25 @@ static int kbasep_hwcnt_backend_jm_create(
|
|||
goto alloc_error;
|
||||
|
||||
backend->info = info;
|
||||
kbasep_hwcnt_backend_jm_init_layout(&info->hwcnt_gpu_info,
|
||||
&backend->phys_layout);
|
||||
kbasep_hwcnt_backend_jm_init_layout(&info->hwcnt_gpu_info, &backend->phys_layout);
|
||||
|
||||
backend->kctx = kbase_create_context(kbdev, true,
|
||||
BASE_CONTEXT_SYSTEM_MONITOR_SUBMIT_DISABLED, 0, NULL);
|
||||
BASE_CONTEXT_SYSTEM_MONITOR_SUBMIT_DISABLED, 0, NULL);
|
||||
if (!backend->kctx)
|
||||
goto alloc_error;
|
||||
|
||||
kbasep_js_schedule_privileged_ctx(kbdev, backend->kctx);
|
||||
|
||||
errcode = kbasep_hwcnt_backend_jm_dump_alloc(
|
||||
info, backend->kctx, &backend->gpu_dump_va);
|
||||
errcode = kbasep_hwcnt_backend_jm_dump_alloc(info, backend->kctx, &backend->gpu_dump_va);
|
||||
if (errcode)
|
||||
goto error;
|
||||
|
||||
backend->cpu_dump_va = kbase_phy_alloc_mapping_get(backend->kctx,
|
||||
backend->gpu_dump_va, &backend->vmap);
|
||||
backend->cpu_dump_va =
|
||||
kbase_phy_alloc_mapping_get(backend->kctx, backend->gpu_dump_va, &backend->vmap);
|
||||
if (!backend->cpu_dump_va || !backend->vmap)
|
||||
goto alloc_error;
|
||||
|
||||
backend->to_user_buf =
|
||||
kzalloc(info->metadata->dump_buf_bytes, GFP_KERNEL);
|
||||
backend->to_user_buf = kzalloc(info->metadata->dump_buf_bytes, GFP_KERNEL);
|
||||
if (!backend->to_user_buf)
|
||||
goto alloc_error;
|
||||
|
||||
|
|
@ -798,9 +730,8 @@ kbasep_hwcnt_backend_jm_metadata(const struct kbase_hwcnt_backend_info *info)
|
|||
}
|
||||
|
||||
/* JM backend implementation of kbase_hwcnt_backend_init_fn */
|
||||
static int kbasep_hwcnt_backend_jm_init(
|
||||
const struct kbase_hwcnt_backend_info *info,
|
||||
struct kbase_hwcnt_backend **out_backend)
|
||||
static int kbasep_hwcnt_backend_jm_init(const struct kbase_hwcnt_backend_info *info,
|
||||
struct kbase_hwcnt_backend **out_backend)
|
||||
{
|
||||
int errcode;
|
||||
struct kbase_hwcnt_backend_jm *backend = NULL;
|
||||
|
|
@ -808,8 +739,8 @@ static int kbasep_hwcnt_backend_jm_init(
|
|||
if (!info || !out_backend)
|
||||
return -EINVAL;
|
||||
|
||||
errcode = kbasep_hwcnt_backend_jm_create(
|
||||
(const struct kbase_hwcnt_backend_jm_info *) info, &backend);
|
||||
errcode = kbasep_hwcnt_backend_jm_create((const struct kbase_hwcnt_backend_jm_info *)info,
|
||||
&backend);
|
||||
if (errcode)
|
||||
return errcode;
|
||||
|
||||
|
|
@ -825,8 +756,7 @@ static void kbasep_hwcnt_backend_jm_term(struct kbase_hwcnt_backend *backend)
|
|||
return;
|
||||
|
||||
kbasep_hwcnt_backend_jm_dump_disable(backend);
|
||||
kbasep_hwcnt_backend_jm_destroy(
|
||||
(struct kbase_hwcnt_backend_jm *)backend);
|
||||
kbasep_hwcnt_backend_jm_destroy((struct kbase_hwcnt_backend_jm *)backend);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -835,8 +765,7 @@ static void kbasep_hwcnt_backend_jm_term(struct kbase_hwcnt_backend *backend)
|
|||
*
|
||||
* Can be safely called on a backend info in any state of partial construction.
|
||||
*/
|
||||
static void kbasep_hwcnt_backend_jm_info_destroy(
|
||||
const struct kbase_hwcnt_backend_jm_info *info)
|
||||
static void kbasep_hwcnt_backend_jm_info_destroy(const struct kbase_hwcnt_backend_jm_info *info)
|
||||
{
|
||||
if (!info)
|
||||
return;
|
||||
|
|
@ -852,9 +781,8 @@ static void kbasep_hwcnt_backend_jm_info_destroy(
|
|||
*
|
||||
* Return: 0 on success, else error code.
|
||||
*/
|
||||
static int kbasep_hwcnt_backend_jm_info_create(
|
||||
struct kbase_device *kbdev,
|
||||
const struct kbase_hwcnt_backend_jm_info **out_info)
|
||||
static int kbasep_hwcnt_backend_jm_info_create(struct kbase_device *kbdev,
|
||||
const struct kbase_hwcnt_backend_jm_info **out_info)
|
||||
{
|
||||
int errcode = -ENOMEM;
|
||||
struct kbase_hwcnt_backend_jm_info *info = NULL;
|
||||
|
|
@ -877,15 +805,12 @@ static int kbasep_hwcnt_backend_jm_info_create(
|
|||
info->counter_set = KBASE_HWCNT_SET_PRIMARY;
|
||||
#endif
|
||||
|
||||
errcode = kbasep_hwcnt_backend_jm_gpu_info_init(kbdev,
|
||||
&info->hwcnt_gpu_info);
|
||||
errcode = kbasep_hwcnt_backend_jm_gpu_info_init(kbdev, &info->hwcnt_gpu_info);
|
||||
if (errcode)
|
||||
goto error;
|
||||
|
||||
errcode = kbase_hwcnt_jm_metadata_create(&info->hwcnt_gpu_info,
|
||||
info->counter_set,
|
||||
&info->metadata,
|
||||
&info->dump_bytes);
|
||||
errcode = kbase_hwcnt_jm_metadata_create(&info->hwcnt_gpu_info, info->counter_set,
|
||||
&info->metadata, &info->dump_bytes);
|
||||
if (errcode)
|
||||
goto error;
|
||||
|
||||
|
|
@ -897,9 +822,8 @@ static int kbasep_hwcnt_backend_jm_info_create(
|
|||
return errcode;
|
||||
}
|
||||
|
||||
int kbase_hwcnt_backend_jm_create(
|
||||
struct kbase_device *kbdev,
|
||||
struct kbase_hwcnt_backend_interface *iface)
|
||||
int kbase_hwcnt_backend_jm_create(struct kbase_device *kbdev,
|
||||
struct kbase_hwcnt_backend_interface *iface)
|
||||
{
|
||||
int errcode;
|
||||
const struct kbase_hwcnt_backend_jm_info *info = NULL;
|
||||
|
|
@ -928,8 +852,7 @@ int kbase_hwcnt_backend_jm_create(
|
|||
return 0;
|
||||
}
|
||||
|
||||
void kbase_hwcnt_backend_jm_destroy(
|
||||
struct kbase_hwcnt_backend_interface *iface)
|
||||
void kbase_hwcnt_backend_jm_destroy(struct kbase_hwcnt_backend_interface *iface)
|
||||
{
|
||||
if (!iface)
|
||||
return;
|
||||
|
|
@ -1,7 +1,7 @@
|
|||
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
|
||||
/*
|
||||
*
|
||||
* (C) COPYRIGHT 2018, 2020-2021 ARM Limited. All rights reserved.
|
||||
* (C) COPYRIGHT 2018, 2020-2022 ARM Limited. All rights reserved.
|
||||
*
|
||||
* This program is free software and is provided to you under the terms of the
|
||||
* GNU General Public License version 2 as published by the Free Software
|
||||
|
|
@ -27,7 +27,7 @@
|
|||
#ifndef _KBASE_HWCNT_BACKEND_JM_H_
|
||||
#define _KBASE_HWCNT_BACKEND_JM_H_
|
||||
|
||||
#include "mali_kbase_hwcnt_backend.h"
|
||||
#include "hwcnt/backend/mali_kbase_hwcnt_backend.h"
|
||||
|
||||
struct kbase_device;
|
||||
|
||||
|
|
@ -42,9 +42,8 @@ struct kbase_device;
|
|||
*
|
||||
* Return: 0 on success, else error code.
|
||||
*/
|
||||
int kbase_hwcnt_backend_jm_create(
|
||||
struct kbase_device *kbdev,
|
||||
struct kbase_hwcnt_backend_interface *iface);
|
||||
int kbase_hwcnt_backend_jm_create(struct kbase_device *kbdev,
|
||||
struct kbase_hwcnt_backend_interface *iface);
|
||||
|
||||
/**
|
||||
* kbase_hwcnt_backend_jm_destroy() - Destroy a JM hardware counter backend
|
||||
|
|
@ -54,7 +53,6 @@ int kbase_hwcnt_backend_jm_create(
|
|||
* Can be safely called on an all-zeroed interface, or on an already destroyed
|
||||
* interface.
|
||||
*/
|
||||
void kbase_hwcnt_backend_jm_destroy(
|
||||
struct kbase_hwcnt_backend_interface *iface);
|
||||
void kbase_hwcnt_backend_jm_destroy(struct kbase_hwcnt_backend_interface *iface);
|
||||
|
||||
#endif /* _KBASE_HWCNT_BACKEND_JM_H_ */
|
||||
|
|
@ -21,11 +21,12 @@
|
|||
|
||||
#include <mali_kbase.h>
|
||||
|
||||
#include <mali_kbase_hwcnt_gpu.h>
|
||||
#include <mali_kbase_hwcnt_types.h>
|
||||
#include <hwcnt/mali_kbase_hwcnt_gpu.h>
|
||||
#include <hwcnt/mali_kbase_hwcnt_types.h>
|
||||
|
||||
#include <mali_kbase_hwcnt_backend.h>
|
||||
#include <mali_kbase_hwcnt_watchdog_if.h>
|
||||
#include <hwcnt/backend/mali_kbase_hwcnt_backend.h>
|
||||
#include <hwcnt/backend/mali_kbase_hwcnt_backend_jm_watchdog.h>
|
||||
#include <hwcnt/mali_kbase_hwcnt_watchdog_if.h>
|
||||
|
||||
#if IS_ENABLED(CONFIG_MALI_IS_FPGA) && !IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI)
|
||||
/* Backend watch dog timer interval in milliseconds: 18 seconds. */
|
||||
|
|
@ -118,8 +119,7 @@ enum backend_watchdog_state {
|
|||
*/
|
||||
enum wd_init_state {
|
||||
HWCNT_JM_WD_INIT_START,
|
||||
HWCNT_JM_WD_INIT_ALLOC = HWCNT_JM_WD_INIT_START,
|
||||
HWCNT_JM_WD_INIT_BACKEND,
|
||||
HWCNT_JM_WD_INIT_BACKEND = HWCNT_JM_WD_INIT_START,
|
||||
HWCNT_JM_WD_INIT_ENABLE_MAP,
|
||||
HWCNT_JM_WD_INIT_DUMP_BUFFER,
|
||||
HWCNT_JM_WD_INIT_END
|
||||
|
|
@ -296,16 +296,10 @@ kbasep_hwcnt_backend_jm_watchdog_term_partial(struct kbase_hwcnt_backend_jm_watc
|
|||
if (!wd_backend)
|
||||
return;
|
||||
|
||||
/* disable timer thread to avoid concurrent access to shared resources */
|
||||
wd_backend->info->dump_watchdog_iface->disable(
|
||||
wd_backend->info->dump_watchdog_iface->timer);
|
||||
WARN_ON(state > HWCNT_JM_WD_INIT_END);
|
||||
|
||||
/*will exit the loop when state reaches HWCNT_JM_WD_INIT_START*/
|
||||
while (state-- > HWCNT_JM_WD_INIT_START) {
|
||||
switch (state) {
|
||||
case HWCNT_JM_WD_INIT_ALLOC:
|
||||
kfree(wd_backend);
|
||||
break;
|
||||
case HWCNT_JM_WD_INIT_BACKEND:
|
||||
wd_backend->info->jm_backend_iface->term(wd_backend->jm_backend);
|
||||
break;
|
||||
|
|
@ -319,6 +313,8 @@ kbasep_hwcnt_backend_jm_watchdog_term_partial(struct kbase_hwcnt_backend_jm_watc
|
|||
break;
|
||||
}
|
||||
}
|
||||
|
||||
kfree(wd_backend);
|
||||
}
|
||||
|
||||
/* Job manager watchdog backend, implementation of kbase_hwcnt_backend_term_fn
|
||||
|
|
@ -326,11 +322,17 @@ kbasep_hwcnt_backend_jm_watchdog_term_partial(struct kbase_hwcnt_backend_jm_watc
|
|||
*/
|
||||
static void kbasep_hwcnt_backend_jm_watchdog_term(struct kbase_hwcnt_backend *backend)
|
||||
{
|
||||
struct kbase_hwcnt_backend_jm_watchdog *wd_backend =
|
||||
(struct kbase_hwcnt_backend_jm_watchdog *)backend;
|
||||
|
||||
if (!backend)
|
||||
return;
|
||||
|
||||
kbasep_hwcnt_backend_jm_watchdog_term_partial(
|
||||
(struct kbase_hwcnt_backend_jm_watchdog *)backend, HWCNT_JM_WD_INIT_END);
|
||||
/* disable timer thread to avoid concurrent access to shared resources */
|
||||
wd_backend->info->dump_watchdog_iface->disable(
|
||||
wd_backend->info->dump_watchdog_iface->timer);
|
||||
|
||||
kbasep_hwcnt_backend_jm_watchdog_term_partial(wd_backend, HWCNT_JM_WD_INIT_END);
|
||||
}
|
||||
|
||||
/* Job manager watchdog backend, implementation of kbase_hwcnt_backend_init_fn */
|
||||
|
|
@ -350,20 +352,20 @@ static int kbasep_hwcnt_backend_jm_watchdog_init(const struct kbase_hwcnt_backen
|
|||
jm_info = wd_info->jm_backend_iface->info;
|
||||
metadata = wd_info->jm_backend_iface->metadata(wd_info->jm_backend_iface->info);
|
||||
|
||||
wd_backend = kmalloc(sizeof(*wd_backend), GFP_KERNEL);
|
||||
if (!wd_backend) {
|
||||
*out_backend = NULL;
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
*wd_backend = (struct kbase_hwcnt_backend_jm_watchdog){
|
||||
.info = wd_info,
|
||||
.timeout_ms = hwcnt_backend_watchdog_timer_interval_ms,
|
||||
.locked = { .state = HWCNT_JM_WD_IDLE_BUFFER_EMPTY, .is_enabled = false }
|
||||
};
|
||||
|
||||
while (state < HWCNT_JM_WD_INIT_END && !errcode) {
|
||||
switch (state) {
|
||||
case HWCNT_JM_WD_INIT_ALLOC:
|
||||
wd_backend = kmalloc(sizeof(*wd_backend), GFP_KERNEL);
|
||||
if (wd_backend) {
|
||||
*wd_backend = (struct kbase_hwcnt_backend_jm_watchdog){
|
||||
.info = wd_info,
|
||||
.timeout_ms = hwcnt_backend_watchdog_timer_interval_ms,
|
||||
.locked = { .state = HWCNT_JM_WD_IDLE_BUFFER_EMPTY,
|
||||
.is_enabled = false }
|
||||
};
|
||||
} else
|
||||
errcode = -ENOMEM;
|
||||
break;
|
||||
case HWCNT_JM_WD_INIT_BACKEND:
|
||||
errcode = wd_info->jm_backend_iface->init(jm_info, &wd_backend->jm_backend);
|
||||
break;
|
||||
|
|
@ -823,5 +825,5 @@ void kbase_hwcnt_backend_jm_watchdog_destroy(struct kbase_hwcnt_backend_interfac
|
|||
kfree((struct kbase_hwcnt_backend_jm_watchdog_info *)iface->info);
|
||||
|
||||
/*blanking the watchdog backend interface*/
|
||||
*iface = (struct kbase_hwcnt_backend_interface){ NULL };
|
||||
memset(iface, 0, sizeof(*iface));
|
||||
}
|
||||
|
|
@ -1,7 +1,7 @@
|
|||
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
|
||||
/*
|
||||
*
|
||||
* (C) COPYRIGHT 2021 ARM Limited. All rights reserved.
|
||||
* (C) COPYRIGHT 2021-2022 ARM Limited. All rights reserved.
|
||||
*
|
||||
* This program is free software and is provided to you under the terms of the
|
||||
* GNU General Public License version 2 as published by the Free Software
|
||||
|
|
@ -32,8 +32,8 @@
|
|||
#ifndef _KBASE_HWCNT_BACKEND_JM_WATCHDOG_H_
|
||||
#define _KBASE_HWCNT_BACKEND_JM_WATCHDOG_H_
|
||||
|
||||
#include <mali_kbase_hwcnt_backend.h>
|
||||
#include <mali_kbase_hwcnt_watchdog_if.h>
|
||||
#include <hwcnt/backend/mali_kbase_hwcnt_backend.h>
|
||||
#include <hwcnt/mali_kbase_hwcnt_watchdog_if.h>
|
||||
|
||||
/**
|
||||
* kbase_hwcnt_backend_jm_watchdog_create() - Create a job manager hardware counter watchdog
|
||||
|
|
@ -23,10 +23,10 @@
|
|||
* Implementation of hardware counter context and accumulator APIs.
|
||||
*/
|
||||
|
||||
#include "mali_kbase_hwcnt_context.h"
|
||||
#include "mali_kbase_hwcnt_accumulator.h"
|
||||
#include "mali_kbase_hwcnt_backend.h"
|
||||
#include "mali_kbase_hwcnt_types.h"
|
||||
#include "hwcnt/mali_kbase_hwcnt_context.h"
|
||||
#include "hwcnt/mali_kbase_hwcnt_accumulator.h"
|
||||
#include "hwcnt/backend/mali_kbase_hwcnt_backend.h"
|
||||
#include "hwcnt/mali_kbase_hwcnt_types.h"
|
||||
|
||||
#include <linux/mutex.h>
|
||||
#include <linux/spinlock.h>
|
||||
|
|
@ -39,11 +39,7 @@
|
|||
* @ACCUM_STATE_ENABLED: Enabled state, where dumping is enabled if there are
|
||||
* any enabled counters.
|
||||
*/
|
||||
enum kbase_hwcnt_accum_state {
|
||||
ACCUM_STATE_ERROR,
|
||||
ACCUM_STATE_DISABLED,
|
||||
ACCUM_STATE_ENABLED
|
||||
};
|
||||
enum kbase_hwcnt_accum_state { ACCUM_STATE_ERROR, ACCUM_STATE_DISABLED, ACCUM_STATE_ENABLED };
|
||||
|
||||
/**
|
||||
* struct kbase_hwcnt_accumulator - Hardware counter accumulator structure.
|
||||
|
|
@ -130,9 +126,8 @@ struct kbase_hwcnt_context {
|
|||
struct workqueue_struct *wq;
|
||||
};
|
||||
|
||||
int kbase_hwcnt_context_init(
|
||||
const struct kbase_hwcnt_backend_interface *iface,
|
||||
struct kbase_hwcnt_context **out_hctx)
|
||||
int kbase_hwcnt_context_init(const struct kbase_hwcnt_backend_interface *iface,
|
||||
struct kbase_hwcnt_context **out_hctx)
|
||||
{
|
||||
struct kbase_hwcnt_context *hctx = NULL;
|
||||
|
||||
|
|
@ -149,8 +144,7 @@ int kbase_hwcnt_context_init(
|
|||
mutex_init(&hctx->accum_lock);
|
||||
hctx->accum_inited = false;
|
||||
|
||||
hctx->wq =
|
||||
alloc_workqueue("mali_kbase_hwcnt", WQ_HIGHPRI | WQ_UNBOUND, 0);
|
||||
hctx->wq = alloc_workqueue("mali_kbase_hwcnt", WQ_HIGHPRI | WQ_UNBOUND, 0);
|
||||
if (!hctx->wq)
|
||||
goto err_alloc_workqueue;
|
||||
|
||||
|
|
@ -208,35 +202,30 @@ static int kbasep_hwcnt_accumulator_init(struct kbase_hwcnt_context *hctx)
|
|||
WARN_ON(!hctx);
|
||||
WARN_ON(!hctx->accum_inited);
|
||||
|
||||
errcode = hctx->iface->init(
|
||||
hctx->iface->info, &hctx->accum.backend);
|
||||
errcode = hctx->iface->init(hctx->iface->info, &hctx->accum.backend);
|
||||
if (errcode)
|
||||
goto error;
|
||||
|
||||
hctx->accum.metadata = hctx->iface->metadata(hctx->iface->info);
|
||||
hctx->accum.state = ACCUM_STATE_ERROR;
|
||||
|
||||
errcode = kbase_hwcnt_enable_map_alloc(hctx->accum.metadata,
|
||||
&hctx->accum.enable_map);
|
||||
errcode = kbase_hwcnt_enable_map_alloc(hctx->accum.metadata, &hctx->accum.enable_map);
|
||||
if (errcode)
|
||||
goto error;
|
||||
|
||||
hctx->accum.enable_map_any_enabled = false;
|
||||
|
||||
errcode = kbase_hwcnt_dump_buffer_alloc(hctx->accum.metadata,
|
||||
&hctx->accum.accum_buf);
|
||||
errcode = kbase_hwcnt_dump_buffer_alloc(hctx->accum.metadata, &hctx->accum.accum_buf);
|
||||
if (errcode)
|
||||
goto error;
|
||||
|
||||
errcode = kbase_hwcnt_enable_map_alloc(hctx->accum.metadata,
|
||||
&hctx->accum.scratch_map);
|
||||
errcode = kbase_hwcnt_enable_map_alloc(hctx->accum.metadata, &hctx->accum.scratch_map);
|
||||
if (errcode)
|
||||
goto error;
|
||||
|
||||
hctx->accum.accumulated = false;
|
||||
|
||||
hctx->accum.ts_last_dump_ns =
|
||||
hctx->iface->timestamp_ns(hctx->accum.backend);
|
||||
hctx->accum.ts_last_dump_ns = hctx->iface->timestamp_ns(hctx->accum.backend);
|
||||
|
||||
return 0;
|
||||
|
||||
|
|
@ -252,8 +241,7 @@ static int kbasep_hwcnt_accumulator_init(struct kbase_hwcnt_context *hctx)
|
|||
* @hctx: Non-NULL pointer to hardware counter context.
|
||||
* @accumulate: True if we should accumulate before disabling, else false.
|
||||
*/
|
||||
static void kbasep_hwcnt_accumulator_disable(
|
||||
struct kbase_hwcnt_context *hctx, bool accumulate)
|
||||
static void kbasep_hwcnt_accumulator_disable(struct kbase_hwcnt_context *hctx, bool accumulate)
|
||||
{
|
||||
int errcode = 0;
|
||||
bool backend_enabled = false;
|
||||
|
|
@ -272,8 +260,7 @@ static void kbasep_hwcnt_accumulator_disable(
|
|||
WARN_ON(hctx->disable_count != 0);
|
||||
WARN_ON(hctx->accum.state == ACCUM_STATE_DISABLED);
|
||||
|
||||
if ((hctx->accum.state == ACCUM_STATE_ENABLED) &&
|
||||
(accum->enable_map_any_enabled))
|
||||
if ((hctx->accum.state == ACCUM_STATE_ENABLED) && (accum->enable_map_any_enabled))
|
||||
backend_enabled = true;
|
||||
|
||||
if (!backend_enabled)
|
||||
|
|
@ -297,8 +284,8 @@ static void kbasep_hwcnt_accumulator_disable(
|
|||
if (errcode)
|
||||
goto disable;
|
||||
|
||||
errcode = hctx->iface->dump_get(accum->backend,
|
||||
&accum->accum_buf, &accum->enable_map, accum->accumulated);
|
||||
errcode = hctx->iface->dump_get(accum->backend, &accum->accum_buf, &accum->enable_map,
|
||||
accum->accumulated);
|
||||
if (errcode)
|
||||
goto disable;
|
||||
|
||||
|
|
@ -336,8 +323,7 @@ static void kbasep_hwcnt_accumulator_enable(struct kbase_hwcnt_context *hctx)
|
|||
|
||||
/* The backend only needs enabling if any counters are enabled */
|
||||
if (accum->enable_map_any_enabled)
|
||||
errcode = hctx->iface->dump_enable_nolock(
|
||||
accum->backend, &accum->enable_map);
|
||||
errcode = hctx->iface->dump_enable_nolock(accum->backend, &accum->enable_map);
|
||||
|
||||
if (!errcode)
|
||||
accum->state = ACCUM_STATE_ENABLED;
|
||||
|
|
@ -364,12 +350,9 @@ static void kbasep_hwcnt_accumulator_enable(struct kbase_hwcnt_context *hctx)
|
|||
*
|
||||
* Return: 0 on success, else error code.
|
||||
*/
|
||||
static int kbasep_hwcnt_accumulator_dump(
|
||||
struct kbase_hwcnt_context *hctx,
|
||||
u64 *ts_start_ns,
|
||||
u64 *ts_end_ns,
|
||||
struct kbase_hwcnt_dump_buffer *dump_buf,
|
||||
const struct kbase_hwcnt_enable_map *new_map)
|
||||
static int kbasep_hwcnt_accumulator_dump(struct kbase_hwcnt_context *hctx, u64 *ts_start_ns,
|
||||
u64 *ts_end_ns, struct kbase_hwcnt_dump_buffer *dump_buf,
|
||||
const struct kbase_hwcnt_enable_map *new_map)
|
||||
{
|
||||
int errcode = 0;
|
||||
unsigned long flags;
|
||||
|
|
@ -398,8 +381,7 @@ static int kbasep_hwcnt_accumulator_dump(
|
|||
kbase_hwcnt_enable_map_copy(cur_map, &accum->enable_map);
|
||||
|
||||
if (new_map)
|
||||
new_map_any_enabled =
|
||||
kbase_hwcnt_enable_map_any_enabled(new_map);
|
||||
new_map_any_enabled = kbase_hwcnt_enable_map_any_enabled(new_map);
|
||||
|
||||
/*
|
||||
* We're holding accum_lock, so the accumulator state might transition
|
||||
|
|
@ -426,8 +408,7 @@ static int kbasep_hwcnt_accumulator_dump(
|
|||
* then we'll do it ourselves after the dump.
|
||||
*/
|
||||
if (new_map) {
|
||||
kbase_hwcnt_enable_map_copy(
|
||||
&accum->enable_map, new_map);
|
||||
kbase_hwcnt_enable_map_copy(&accum->enable_map, new_map);
|
||||
accum->enable_map_any_enabled = new_map_any_enabled;
|
||||
}
|
||||
|
||||
|
|
@ -440,12 +421,10 @@ static int kbasep_hwcnt_accumulator_dump(
|
|||
/* Initiate the dump if the backend is enabled. */
|
||||
if ((state == ACCUM_STATE_ENABLED) && cur_map_any_enabled) {
|
||||
if (dump_buf) {
|
||||
errcode = hctx->iface->dump_request(
|
||||
accum->backend, &dump_time_ns);
|
||||
errcode = hctx->iface->dump_request(accum->backend, &dump_time_ns);
|
||||
dump_requested = true;
|
||||
} else {
|
||||
dump_time_ns = hctx->iface->timestamp_ns(
|
||||
accum->backend);
|
||||
dump_time_ns = hctx->iface->timestamp_ns(accum->backend);
|
||||
errcode = hctx->iface->dump_clear(accum->backend);
|
||||
}
|
||||
|
||||
|
|
@ -457,8 +436,7 @@ static int kbasep_hwcnt_accumulator_dump(
|
|||
|
||||
/* Copy any accumulation into the dest buffer */
|
||||
if (accum->accumulated && dump_buf) {
|
||||
kbase_hwcnt_dump_buffer_copy(
|
||||
dump_buf, &accum->accum_buf, cur_map);
|
||||
kbase_hwcnt_dump_buffer_copy(dump_buf, &accum->accum_buf, cur_map);
|
||||
dump_written = true;
|
||||
}
|
||||
|
||||
|
|
@ -483,8 +461,7 @@ static int kbasep_hwcnt_accumulator_dump(
|
|||
* we're already enabled and holding accum_lock is impossible.
|
||||
*/
|
||||
if (new_map_any_enabled) {
|
||||
errcode = hctx->iface->dump_enable(
|
||||
accum->backend, new_map);
|
||||
errcode = hctx->iface->dump_enable(accum->backend, new_map);
|
||||
if (errcode)
|
||||
goto error;
|
||||
}
|
||||
|
|
@ -495,11 +472,8 @@ static int kbasep_hwcnt_accumulator_dump(
|
|||
/* If we dumped, copy or accumulate it into the destination */
|
||||
if (dump_requested) {
|
||||
WARN_ON(state != ACCUM_STATE_ENABLED);
|
||||
errcode = hctx->iface->dump_get(
|
||||
accum->backend,
|
||||
dump_buf,
|
||||
cur_map,
|
||||
dump_written);
|
||||
errcode = hctx->iface->dump_get(accum->backend, dump_buf, cur_map,
|
||||
dump_written);
|
||||
if (errcode)
|
||||
goto error;
|
||||
dump_written = true;
|
||||
|
|
@ -540,8 +514,7 @@ static int kbasep_hwcnt_accumulator_dump(
|
|||
* @hctx: Non-NULL pointer to hardware counter context.
|
||||
* @accumulate: True if we should accumulate before disabling, else false.
|
||||
*/
|
||||
static void kbasep_hwcnt_context_disable(
|
||||
struct kbase_hwcnt_context *hctx, bool accumulate)
|
||||
static void kbasep_hwcnt_context_disable(struct kbase_hwcnt_context *hctx, bool accumulate)
|
||||
{
|
||||
unsigned long flags;
|
||||
|
||||
|
|
@ -563,9 +536,8 @@ static void kbasep_hwcnt_context_disable(
|
|||
}
|
||||
}
|
||||
|
||||
int kbase_hwcnt_accumulator_acquire(
|
||||
struct kbase_hwcnt_context *hctx,
|
||||
struct kbase_hwcnt_accumulator **accum)
|
||||
int kbase_hwcnt_accumulator_acquire(struct kbase_hwcnt_context *hctx,
|
||||
struct kbase_hwcnt_accumulator **accum)
|
||||
{
|
||||
int errcode = 0;
|
||||
unsigned long flags;
|
||||
|
|
@ -618,9 +590,7 @@ int kbase_hwcnt_accumulator_acquire(
|
|||
* Regardless of initial state, counters don't need to be enabled via
|
||||
* the backend, as the initial enable map has no enabled counters.
|
||||
*/
|
||||
hctx->accum.state = (hctx->disable_count == 0) ?
|
||||
ACCUM_STATE_ENABLED :
|
||||
ACCUM_STATE_DISABLED;
|
||||
hctx->accum.state = (hctx->disable_count == 0) ? ACCUM_STATE_ENABLED : ACCUM_STATE_DISABLED;
|
||||
|
||||
spin_unlock_irqrestore(&hctx->state_lock, flags);
|
||||
|
||||
|
|
@ -728,8 +698,7 @@ void kbase_hwcnt_context_enable(struct kbase_hwcnt_context *hctx)
|
|||
spin_unlock_irqrestore(&hctx->state_lock, flags);
|
||||
}
|
||||
|
||||
const struct kbase_hwcnt_metadata *kbase_hwcnt_context_metadata(
|
||||
struct kbase_hwcnt_context *hctx)
|
||||
const struct kbase_hwcnt_metadata *kbase_hwcnt_context_metadata(struct kbase_hwcnt_context *hctx)
|
||||
{
|
||||
if (!hctx)
|
||||
return NULL;
|
||||
|
|
@ -737,8 +706,7 @@ const struct kbase_hwcnt_metadata *kbase_hwcnt_context_metadata(
|
|||
return hctx->iface->metadata(hctx->iface->info);
|
||||
}
|
||||
|
||||
bool kbase_hwcnt_context_queue_work(struct kbase_hwcnt_context *hctx,
|
||||
struct work_struct *work)
|
||||
bool kbase_hwcnt_context_queue_work(struct kbase_hwcnt_context *hctx, struct work_struct *work)
|
||||
{
|
||||
if (WARN_ON(!hctx) || WARN_ON(!work))
|
||||
return false;
|
||||
|
|
@ -746,12 +714,10 @@ bool kbase_hwcnt_context_queue_work(struct kbase_hwcnt_context *hctx,
|
|||
return queue_work(hctx->wq, work);
|
||||
}
|
||||
|
||||
int kbase_hwcnt_accumulator_set_counters(
|
||||
struct kbase_hwcnt_accumulator *accum,
|
||||
const struct kbase_hwcnt_enable_map *new_map,
|
||||
u64 *ts_start_ns,
|
||||
u64 *ts_end_ns,
|
||||
struct kbase_hwcnt_dump_buffer *dump_buf)
|
||||
int kbase_hwcnt_accumulator_set_counters(struct kbase_hwcnt_accumulator *accum,
|
||||
const struct kbase_hwcnt_enable_map *new_map,
|
||||
u64 *ts_start_ns, u64 *ts_end_ns,
|
||||
struct kbase_hwcnt_dump_buffer *dump_buf)
|
||||
{
|
||||
int errcode;
|
||||
struct kbase_hwcnt_context *hctx;
|
||||
|
|
@ -767,19 +733,15 @@ int kbase_hwcnt_accumulator_set_counters(
|
|||
|
||||
mutex_lock(&hctx->accum_lock);
|
||||
|
||||
errcode = kbasep_hwcnt_accumulator_dump(
|
||||
hctx, ts_start_ns, ts_end_ns, dump_buf, new_map);
|
||||
errcode = kbasep_hwcnt_accumulator_dump(hctx, ts_start_ns, ts_end_ns, dump_buf, new_map);
|
||||
|
||||
mutex_unlock(&hctx->accum_lock);
|
||||
|
||||
return errcode;
|
||||
}
|
||||
|
||||
int kbase_hwcnt_accumulator_dump(
|
||||
struct kbase_hwcnt_accumulator *accum,
|
||||
u64 *ts_start_ns,
|
||||
u64 *ts_end_ns,
|
||||
struct kbase_hwcnt_dump_buffer *dump_buf)
|
||||
int kbase_hwcnt_accumulator_dump(struct kbase_hwcnt_accumulator *accum, u64 *ts_start_ns,
|
||||
u64 *ts_end_ns, struct kbase_hwcnt_dump_buffer *dump_buf)
|
||||
{
|
||||
int errcode;
|
||||
struct kbase_hwcnt_context *hctx;
|
||||
|
|
@ -794,8 +756,7 @@ int kbase_hwcnt_accumulator_dump(
|
|||
|
||||
mutex_lock(&hctx->accum_lock);
|
||||
|
||||
errcode = kbasep_hwcnt_accumulator_dump(
|
||||
hctx, ts_start_ns, ts_end_ns, dump_buf, NULL);
|
||||
errcode = kbasep_hwcnt_accumulator_dump(hctx, ts_start_ns, ts_end_ns, dump_buf, NULL);
|
||||
|
||||
mutex_unlock(&hctx->accum_lock);
|
||||
|
||||
|
|
@ -1,7 +1,7 @@
|
|||
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
|
||||
/*
|
||||
*
|
||||
* (C) COPYRIGHT 2018, 2020-2021 ARM Limited. All rights reserved.
|
||||
* (C) COPYRIGHT 2018, 2020-2022 ARM Limited. All rights reserved.
|
||||
*
|
||||
* This program is free software and is provided to you under the terms of the
|
||||
* GNU General Public License version 2 as published by the Free Software
|
||||
|
|
@ -67,9 +67,8 @@ struct kbase_hwcnt_dump_buffer;
|
|||
*
|
||||
* Return: 0 on success or error code.
|
||||
*/
|
||||
int kbase_hwcnt_accumulator_acquire(
|
||||
struct kbase_hwcnt_context *hctx,
|
||||
struct kbase_hwcnt_accumulator **accum);
|
||||
int kbase_hwcnt_accumulator_acquire(struct kbase_hwcnt_context *hctx,
|
||||
struct kbase_hwcnt_accumulator **accum);
|
||||
|
||||
/**
|
||||
* kbase_hwcnt_accumulator_release() - Release a hardware counter accumulator.
|
||||
|
|
@ -102,12 +101,10 @@ void kbase_hwcnt_accumulator_release(struct kbase_hwcnt_accumulator *accum);
|
|||
*
|
||||
* Return: 0 on success or error code.
|
||||
*/
|
||||
int kbase_hwcnt_accumulator_set_counters(
|
||||
struct kbase_hwcnt_accumulator *accum,
|
||||
const struct kbase_hwcnt_enable_map *new_map,
|
||||
u64 *ts_start_ns,
|
||||
u64 *ts_end_ns,
|
||||
struct kbase_hwcnt_dump_buffer *dump_buf);
|
||||
int kbase_hwcnt_accumulator_set_counters(struct kbase_hwcnt_accumulator *accum,
|
||||
const struct kbase_hwcnt_enable_map *new_map,
|
||||
u64 *ts_start_ns, u64 *ts_end_ns,
|
||||
struct kbase_hwcnt_dump_buffer *dump_buf);
|
||||
|
||||
/**
|
||||
* kbase_hwcnt_accumulator_dump() - Perform a dump of the currently enabled
|
||||
|
|
@ -127,11 +124,8 @@ int kbase_hwcnt_accumulator_set_counters(
|
|||
*
|
||||
* Return: 0 on success or error code.
|
||||
*/
|
||||
int kbase_hwcnt_accumulator_dump(
|
||||
struct kbase_hwcnt_accumulator *accum,
|
||||
u64 *ts_start_ns,
|
||||
u64 *ts_end_ns,
|
||||
struct kbase_hwcnt_dump_buffer *dump_buf);
|
||||
int kbase_hwcnt_accumulator_dump(struct kbase_hwcnt_accumulator *accum, u64 *ts_start_ns,
|
||||
u64 *ts_end_ns, struct kbase_hwcnt_dump_buffer *dump_buf);
|
||||
|
||||
/**
|
||||
* kbase_hwcnt_accumulator_timestamp_ns() - Get the current accumulator backend
|
||||
|
|
@ -1,7 +1,7 @@
|
|||
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
|
||||
/*
|
||||
*
|
||||
* (C) COPYRIGHT 2018, 2020-2021 ARM Limited. All rights reserved.
|
||||
* (C) COPYRIGHT 2018, 2020-2022 ARM Limited. All rights reserved.
|
||||
*
|
||||
* This program is free software and is provided to you under the terms of the
|
||||
* GNU General Public License version 2 as published by the Free Software
|
||||
|
|
@ -43,9 +43,8 @@ struct kbase_hwcnt_context;
|
|||
*
|
||||
* Return: 0 on success, else error code.
|
||||
*/
|
||||
int kbase_hwcnt_context_init(
|
||||
const struct kbase_hwcnt_backend_interface *iface,
|
||||
struct kbase_hwcnt_context **out_hctx);
|
||||
int kbase_hwcnt_context_init(const struct kbase_hwcnt_backend_interface *iface,
|
||||
struct kbase_hwcnt_context **out_hctx);
|
||||
|
||||
/**
|
||||
* kbase_hwcnt_context_term() - Terminate a hardware counter context.
|
||||
|
|
@ -61,8 +60,7 @@ void kbase_hwcnt_context_term(struct kbase_hwcnt_context *hctx);
|
|||
*
|
||||
* Return: Non-NULL pointer to metadata, or NULL on error.
|
||||
*/
|
||||
const struct kbase_hwcnt_metadata *kbase_hwcnt_context_metadata(
|
||||
struct kbase_hwcnt_context *hctx);
|
||||
const struct kbase_hwcnt_metadata *kbase_hwcnt_context_metadata(struct kbase_hwcnt_context *hctx);
|
||||
|
||||
/**
|
||||
* kbase_hwcnt_context_disable() - Increment the disable count of the context.
|
||||
|
|
@ -145,7 +143,6 @@ void kbase_hwcnt_context_enable(struct kbase_hwcnt_context *hctx);
|
|||
* this meant progress through the power management states could be stalled
|
||||
* for however long that higher priority thread took.
|
||||
*/
|
||||
bool kbase_hwcnt_context_queue_work(struct kbase_hwcnt_context *hctx,
|
||||
struct work_struct *work);
|
||||
bool kbase_hwcnt_context_queue_work(struct kbase_hwcnt_context *hctx, struct work_struct *work);
|
||||
|
||||
#endif /* _KBASE_HWCNT_CONTEXT_H_ */
|
||||
|
|
@ -19,8 +19,8 @@
|
|||
*
|
||||
*/
|
||||
|
||||
#include "mali_kbase_hwcnt_gpu.h"
|
||||
#include "mali_kbase_hwcnt_types.h"
|
||||
#include "hwcnt/mali_kbase_hwcnt_gpu.h"
|
||||
#include "hwcnt/mali_kbase_hwcnt_types.h"
|
||||
|
||||
#include <linux/err.h>
|
||||
|
||||
|
|
@ -32,8 +32,7 @@ enum enable_map_idx {
|
|||
EM_COUNT,
|
||||
};
|
||||
|
||||
static void kbasep_get_fe_block_type(u64 *dst, enum kbase_hwcnt_set counter_set,
|
||||
bool is_csf)
|
||||
static void kbasep_get_fe_block_type(u64 *dst, enum kbase_hwcnt_set counter_set, bool is_csf)
|
||||
{
|
||||
switch (counter_set) {
|
||||
case KBASE_HWCNT_SET_PRIMARY:
|
||||
|
|
@ -56,8 +55,7 @@ static void kbasep_get_fe_block_type(u64 *dst, enum kbase_hwcnt_set counter_set,
|
|||
}
|
||||
}
|
||||
|
||||
static void kbasep_get_tiler_block_type(u64 *dst,
|
||||
enum kbase_hwcnt_set counter_set)
|
||||
static void kbasep_get_tiler_block_type(u64 *dst, enum kbase_hwcnt_set counter_set)
|
||||
{
|
||||
switch (counter_set) {
|
||||
case KBASE_HWCNT_SET_PRIMARY:
|
||||
|
|
@ -72,8 +70,7 @@ static void kbasep_get_tiler_block_type(u64 *dst,
|
|||
}
|
||||
}
|
||||
|
||||
static void kbasep_get_sc_block_type(u64 *dst, enum kbase_hwcnt_set counter_set,
|
||||
bool is_csf)
|
||||
static void kbasep_get_sc_block_type(u64 *dst, enum kbase_hwcnt_set counter_set, bool is_csf)
|
||||
{
|
||||
switch (counter_set) {
|
||||
case KBASE_HWCNT_SET_PRIMARY:
|
||||
|
|
@ -93,8 +90,7 @@ static void kbasep_get_sc_block_type(u64 *dst, enum kbase_hwcnt_set counter_set,
|
|||
}
|
||||
}
|
||||
|
||||
static void kbasep_get_memsys_block_type(u64 *dst,
|
||||
enum kbase_hwcnt_set counter_set)
|
||||
static void kbasep_get_memsys_block_type(u64 *dst, enum kbase_hwcnt_set counter_set)
|
||||
{
|
||||
switch (counter_set) {
|
||||
case KBASE_HWCNT_SET_PRIMARY:
|
||||
|
|
@ -122,15 +118,14 @@ static void kbasep_get_memsys_block_type(u64 *dst,
|
|||
*
|
||||
* Return: 0 on success, else error code.
|
||||
*/
|
||||
static int kbasep_hwcnt_backend_gpu_metadata_create(
|
||||
const struct kbase_hwcnt_gpu_info *gpu_info, const bool is_csf,
|
||||
enum kbase_hwcnt_set counter_set,
|
||||
const struct kbase_hwcnt_metadata **metadata)
|
||||
static int kbasep_hwcnt_backend_gpu_metadata_create(const struct kbase_hwcnt_gpu_info *gpu_info,
|
||||
const bool is_csf,
|
||||
enum kbase_hwcnt_set counter_set,
|
||||
const struct kbase_hwcnt_metadata **metadata)
|
||||
{
|
||||
struct kbase_hwcnt_description desc;
|
||||
struct kbase_hwcnt_group_description group;
|
||||
struct kbase_hwcnt_block_description
|
||||
blks[KBASE_HWCNT_V5_BLOCK_TYPE_COUNT];
|
||||
struct kbase_hwcnt_block_description blks[KBASE_HWCNT_V5_BLOCK_TYPE_COUNT];
|
||||
size_t non_sc_block_count;
|
||||
size_t sc_block_count;
|
||||
|
||||
|
|
@ -156,22 +151,19 @@ static int kbasep_hwcnt_backend_gpu_metadata_create(
|
|||
kbasep_get_fe_block_type(&blks[0].type, counter_set, is_csf);
|
||||
blks[0].inst_cnt = 1;
|
||||
blks[0].hdr_cnt = KBASE_HWCNT_V5_HEADERS_PER_BLOCK;
|
||||
blks[0].ctr_cnt = gpu_info->prfcnt_values_per_block -
|
||||
KBASE_HWCNT_V5_HEADERS_PER_BLOCK;
|
||||
blks[0].ctr_cnt = gpu_info->prfcnt_values_per_block - KBASE_HWCNT_V5_HEADERS_PER_BLOCK;
|
||||
|
||||
/* One Tiler block */
|
||||
kbasep_get_tiler_block_type(&blks[1].type, counter_set);
|
||||
blks[1].inst_cnt = 1;
|
||||
blks[1].hdr_cnt = KBASE_HWCNT_V5_HEADERS_PER_BLOCK;
|
||||
blks[1].ctr_cnt = gpu_info->prfcnt_values_per_block -
|
||||
KBASE_HWCNT_V5_HEADERS_PER_BLOCK;
|
||||
blks[1].ctr_cnt = gpu_info->prfcnt_values_per_block - KBASE_HWCNT_V5_HEADERS_PER_BLOCK;
|
||||
|
||||
/* l2_count memsys blks */
|
||||
kbasep_get_memsys_block_type(&blks[2].type, counter_set);
|
||||
blks[2].inst_cnt = gpu_info->l2_count;
|
||||
blks[2].hdr_cnt = KBASE_HWCNT_V5_HEADERS_PER_BLOCK;
|
||||
blks[2].ctr_cnt = gpu_info->prfcnt_values_per_block -
|
||||
KBASE_HWCNT_V5_HEADERS_PER_BLOCK;
|
||||
blks[2].ctr_cnt = gpu_info->prfcnt_values_per_block - KBASE_HWCNT_V5_HEADERS_PER_BLOCK;
|
||||
|
||||
/*
|
||||
* There are as many shader cores in the system as there are bits set in
|
||||
|
|
@ -192,8 +184,7 @@ static int kbasep_hwcnt_backend_gpu_metadata_create(
|
|||
kbasep_get_sc_block_type(&blks[3].type, counter_set, is_csf);
|
||||
blks[3].inst_cnt = sc_block_count;
|
||||
blks[3].hdr_cnt = KBASE_HWCNT_V5_HEADERS_PER_BLOCK;
|
||||
blks[3].ctr_cnt = gpu_info->prfcnt_values_per_block -
|
||||
KBASE_HWCNT_V5_HEADERS_PER_BLOCK;
|
||||
blks[3].ctr_cnt = gpu_info->prfcnt_values_per_block - KBASE_HWCNT_V5_HEADERS_PER_BLOCK;
|
||||
|
||||
WARN_ON(KBASE_HWCNT_V5_BLOCK_TYPE_COUNT != 4);
|
||||
|
||||
|
|
@ -220,8 +211,7 @@ static int kbasep_hwcnt_backend_gpu_metadata_create(
|
|||
*
|
||||
* Return: Size of buffer the GPU needs to perform a counter dump.
|
||||
*/
|
||||
static size_t
|
||||
kbasep_hwcnt_backend_jm_dump_bytes(const struct kbase_hwcnt_gpu_info *gpu_info)
|
||||
static size_t kbasep_hwcnt_backend_jm_dump_bytes(const struct kbase_hwcnt_gpu_info *gpu_info)
|
||||
{
|
||||
WARN_ON(!gpu_info);
|
||||
|
||||
|
|
@ -229,11 +219,10 @@ kbasep_hwcnt_backend_jm_dump_bytes(const struct kbase_hwcnt_gpu_info *gpu_info)
|
|||
gpu_info->prfcnt_values_per_block * KBASE_HWCNT_VALUE_HW_BYTES;
|
||||
}
|
||||
|
||||
int kbase_hwcnt_jm_metadata_create(
|
||||
const struct kbase_hwcnt_gpu_info *gpu_info,
|
||||
enum kbase_hwcnt_set counter_set,
|
||||
const struct kbase_hwcnt_metadata **out_metadata,
|
||||
size_t *out_dump_bytes)
|
||||
int kbase_hwcnt_jm_metadata_create(const struct kbase_hwcnt_gpu_info *gpu_info,
|
||||
enum kbase_hwcnt_set counter_set,
|
||||
const struct kbase_hwcnt_metadata **out_metadata,
|
||||
size_t *out_dump_bytes)
|
||||
{
|
||||
int errcode;
|
||||
const struct kbase_hwcnt_metadata *metadata;
|
||||
|
|
@ -250,8 +239,7 @@ int kbase_hwcnt_jm_metadata_create(
|
|||
* all the available L2 cache and Shader cores are allocated.
|
||||
*/
|
||||
dump_bytes = kbasep_hwcnt_backend_jm_dump_bytes(gpu_info);
|
||||
errcode = kbasep_hwcnt_backend_gpu_metadata_create(
|
||||
gpu_info, false, counter_set, &metadata);
|
||||
errcode = kbasep_hwcnt_backend_gpu_metadata_create(gpu_info, false, counter_set, &metadata);
|
||||
if (errcode)
|
||||
return errcode;
|
||||
|
||||
|
|
@ -276,10 +264,9 @@ void kbase_hwcnt_jm_metadata_destroy(const struct kbase_hwcnt_metadata *metadata
|
|||
kbase_hwcnt_metadata_destroy(metadata);
|
||||
}
|
||||
|
||||
int kbase_hwcnt_csf_metadata_create(
|
||||
const struct kbase_hwcnt_gpu_info *gpu_info,
|
||||
enum kbase_hwcnt_set counter_set,
|
||||
const struct kbase_hwcnt_metadata **out_metadata)
|
||||
int kbase_hwcnt_csf_metadata_create(const struct kbase_hwcnt_gpu_info *gpu_info,
|
||||
enum kbase_hwcnt_set counter_set,
|
||||
const struct kbase_hwcnt_metadata **out_metadata)
|
||||
{
|
||||
int errcode;
|
||||
const struct kbase_hwcnt_metadata *metadata;
|
||||
|
|
@ -287,8 +274,7 @@ int kbase_hwcnt_csf_metadata_create(
|
|||
if (!gpu_info || !out_metadata)
|
||||
return -EINVAL;
|
||||
|
||||
errcode = kbasep_hwcnt_backend_gpu_metadata_create(
|
||||
gpu_info, true, counter_set, &metadata);
|
||||
errcode = kbasep_hwcnt_backend_gpu_metadata_create(gpu_info, true, counter_set, &metadata);
|
||||
if (errcode)
|
||||
return errcode;
|
||||
|
||||
|
|
@ -297,8 +283,7 @@ int kbase_hwcnt_csf_metadata_create(
|
|||
return 0;
|
||||
}
|
||||
|
||||
void kbase_hwcnt_csf_metadata_destroy(
|
||||
const struct kbase_hwcnt_metadata *metadata)
|
||||
void kbase_hwcnt_csf_metadata_destroy(const struct kbase_hwcnt_metadata *metadata)
|
||||
{
|
||||
if (!metadata)
|
||||
return;
|
||||
|
|
@ -306,10 +291,7 @@ void kbase_hwcnt_csf_metadata_destroy(
|
|||
kbase_hwcnt_metadata_destroy(metadata);
|
||||
}
|
||||
|
||||
static bool is_block_type_shader(
|
||||
const u64 grp_type,
|
||||
const u64 blk_type,
|
||||
const size_t blk)
|
||||
static bool is_block_type_shader(const u64 grp_type, const u64 blk_type, const size_t blk)
|
||||
{
|
||||
bool is_shader = false;
|
||||
|
||||
|
|
@ -326,9 +308,7 @@ static bool is_block_type_shader(
|
|||
return is_shader;
|
||||
}
|
||||
|
||||
static bool is_block_type_l2_cache(
|
||||
const u64 grp_type,
|
||||
const u64 blk_type)
|
||||
static bool is_block_type_l2_cache(const u64 grp_type, const u64 blk_type)
|
||||
{
|
||||
bool is_l2_cache = false;
|
||||
|
||||
|
|
@ -348,10 +328,8 @@ static bool is_block_type_l2_cache(
|
|||
}
|
||||
|
||||
int kbase_hwcnt_jm_dump_get(struct kbase_hwcnt_dump_buffer *dst, u64 *src,
|
||||
const struct kbase_hwcnt_enable_map *dst_enable_map,
|
||||
u64 pm_core_mask,
|
||||
const struct kbase_hwcnt_curr_config *curr_config,
|
||||
bool accumulate)
|
||||
const struct kbase_hwcnt_enable_map *dst_enable_map, u64 pm_core_mask,
|
||||
const struct kbase_hwcnt_curr_config *curr_config, bool accumulate)
|
||||
{
|
||||
const struct kbase_hwcnt_metadata *metadata;
|
||||
size_t grp, blk, blk_inst;
|
||||
|
|
@ -362,28 +340,21 @@ int kbase_hwcnt_jm_dump_get(struct kbase_hwcnt_dump_buffer *dst, u64 *src,
|
|||
/* Variables to deal with the current configuration */
|
||||
int l2_count = 0;
|
||||
|
||||
if (!dst || !src || !dst_enable_map ||
|
||||
(dst_enable_map->metadata != dst->metadata))
|
||||
if (!dst || !src || !dst_enable_map || (dst_enable_map->metadata != dst->metadata))
|
||||
return -EINVAL;
|
||||
|
||||
metadata = dst->metadata;
|
||||
|
||||
kbase_hwcnt_metadata_for_each_block(
|
||||
metadata, grp, blk, blk_inst) {
|
||||
const size_t hdr_cnt =
|
||||
kbase_hwcnt_metadata_block_headers_count(
|
||||
metadata, grp, blk);
|
||||
kbase_hwcnt_metadata_for_each_block(metadata, grp, blk, blk_inst)
|
||||
{
|
||||
const size_t hdr_cnt = kbase_hwcnt_metadata_block_headers_count(metadata, grp, blk);
|
||||
const size_t ctr_cnt =
|
||||
kbase_hwcnt_metadata_block_counters_count(
|
||||
metadata, grp, blk);
|
||||
const u64 blk_type = kbase_hwcnt_metadata_block_type(
|
||||
metadata, grp, blk);
|
||||
kbase_hwcnt_metadata_block_counters_count(metadata, grp, blk);
|
||||
const u64 blk_type = kbase_hwcnt_metadata_block_type(metadata, grp, blk);
|
||||
const bool is_shader_core = is_block_type_shader(
|
||||
kbase_hwcnt_metadata_group_type(metadata, grp),
|
||||
blk_type, blk);
|
||||
kbase_hwcnt_metadata_group_type(metadata, grp), blk_type, blk);
|
||||
const bool is_l2_cache = is_block_type_l2_cache(
|
||||
kbase_hwcnt_metadata_group_type(metadata, grp),
|
||||
blk_type);
|
||||
kbase_hwcnt_metadata_group_type(metadata, grp), blk_type);
|
||||
const bool is_undefined = kbase_hwcnt_is_block_type_undefined(
|
||||
kbase_hwcnt_metadata_group_type(metadata, grp), blk_type);
|
||||
bool hw_res_available = true;
|
||||
|
|
@ -412,10 +383,9 @@ int kbase_hwcnt_jm_dump_get(struct kbase_hwcnt_dump_buffer *dst, u64 *src,
|
|||
/*
|
||||
* Skip block if no values in the destination block are enabled.
|
||||
*/
|
||||
if (kbase_hwcnt_enable_map_block_enabled(
|
||||
dst_enable_map, grp, blk, blk_inst)) {
|
||||
u64 *dst_blk = kbase_hwcnt_dump_buffer_block_instance(
|
||||
dst, grp, blk, blk_inst);
|
||||
if (kbase_hwcnt_enable_map_block_enabled(dst_enable_map, grp, blk, blk_inst)) {
|
||||
u64 *dst_blk =
|
||||
kbase_hwcnt_dump_buffer_block_instance(dst, grp, blk, blk_inst);
|
||||
const u64 *src_blk = dump_src + src_offset;
|
||||
bool blk_powered;
|
||||
|
||||
|
|
@ -435,13 +405,11 @@ int kbase_hwcnt_jm_dump_get(struct kbase_hwcnt_dump_buffer *dst, u64 *src,
|
|||
if (blk_powered && !is_undefined && hw_res_available) {
|
||||
/* Only powered and defined blocks have valid data. */
|
||||
if (accumulate) {
|
||||
kbase_hwcnt_dump_buffer_block_accumulate(
|
||||
dst_blk, src_blk, hdr_cnt,
|
||||
ctr_cnt);
|
||||
kbase_hwcnt_dump_buffer_block_accumulate(dst_blk, src_blk,
|
||||
hdr_cnt, ctr_cnt);
|
||||
} else {
|
||||
kbase_hwcnt_dump_buffer_block_copy(
|
||||
dst_blk, src_blk,
|
||||
(hdr_cnt + ctr_cnt));
|
||||
kbase_hwcnt_dump_buffer_block_copy(dst_blk, src_blk,
|
||||
(hdr_cnt + ctr_cnt));
|
||||
}
|
||||
} else {
|
||||
/* Even though the block might be undefined, the
|
||||
|
|
@ -469,26 +437,23 @@ int kbase_hwcnt_jm_dump_get(struct kbase_hwcnt_dump_buffer *dst, u64 *src,
|
|||
}
|
||||
|
||||
int kbase_hwcnt_csf_dump_get(struct kbase_hwcnt_dump_buffer *dst, u64 *src,
|
||||
const struct kbase_hwcnt_enable_map *dst_enable_map,
|
||||
bool accumulate)
|
||||
const struct kbase_hwcnt_enable_map *dst_enable_map, bool accumulate)
|
||||
{
|
||||
const struct kbase_hwcnt_metadata *metadata;
|
||||
const u64 *dump_src = src;
|
||||
size_t src_offset = 0;
|
||||
size_t grp, blk, blk_inst;
|
||||
|
||||
if (!dst || !src || !dst_enable_map ||
|
||||
(dst_enable_map->metadata != dst->metadata))
|
||||
if (!dst || !src || !dst_enable_map || (dst_enable_map->metadata != dst->metadata))
|
||||
return -EINVAL;
|
||||
|
||||
metadata = dst->metadata;
|
||||
|
||||
kbase_hwcnt_metadata_for_each_block(metadata, grp, blk, blk_inst) {
|
||||
const size_t hdr_cnt = kbase_hwcnt_metadata_block_headers_count(
|
||||
metadata, grp, blk);
|
||||
kbase_hwcnt_metadata_for_each_block(metadata, grp, blk, blk_inst)
|
||||
{
|
||||
const size_t hdr_cnt = kbase_hwcnt_metadata_block_headers_count(metadata, grp, blk);
|
||||
const size_t ctr_cnt =
|
||||
kbase_hwcnt_metadata_block_counters_count(metadata, grp,
|
||||
blk);
|
||||
kbase_hwcnt_metadata_block_counters_count(metadata, grp, blk);
|
||||
const uint64_t blk_type = kbase_hwcnt_metadata_block_type(metadata, grp, blk);
|
||||
const bool is_undefined = kbase_hwcnt_is_block_type_undefined(
|
||||
kbase_hwcnt_metadata_group_type(metadata, grp), blk_type);
|
||||
|
|
@ -496,10 +461,9 @@ int kbase_hwcnt_csf_dump_get(struct kbase_hwcnt_dump_buffer *dst, u64 *src,
|
|||
/*
|
||||
* Skip block if no values in the destination block are enabled.
|
||||
*/
|
||||
if (kbase_hwcnt_enable_map_block_enabled(dst_enable_map, grp,
|
||||
blk, blk_inst)) {
|
||||
u64 *dst_blk = kbase_hwcnt_dump_buffer_block_instance(
|
||||
dst, grp, blk, blk_inst);
|
||||
if (kbase_hwcnt_enable_map_block_enabled(dst_enable_map, grp, blk, blk_inst)) {
|
||||
u64 *dst_blk =
|
||||
kbase_hwcnt_dump_buffer_block_instance(dst, grp, blk, blk_inst);
|
||||
const u64 *src_blk = dump_src + src_offset;
|
||||
|
||||
if (!is_undefined) {
|
||||
|
|
@ -542,12 +506,9 @@ int kbase_hwcnt_csf_dump_get(struct kbase_hwcnt_dump_buffer *dst, u64 *src,
|
|||
* @hi: Non-NULL pointer to where high 64 bits of block enable map abstraction
|
||||
* will be stored.
|
||||
*/
|
||||
static inline void kbasep_hwcnt_backend_gpu_block_map_from_physical(
|
||||
u32 phys,
|
||||
u64 *lo,
|
||||
u64 *hi)
|
||||
static inline void kbasep_hwcnt_backend_gpu_block_map_from_physical(u32 phys, u64 *lo, u64 *hi)
|
||||
{
|
||||
u64 dwords[2] = {0, 0};
|
||||
u64 dwords[2] = { 0, 0 };
|
||||
|
||||
size_t dword_idx;
|
||||
|
||||
|
|
@ -572,9 +533,8 @@ static inline void kbasep_hwcnt_backend_gpu_block_map_from_physical(
|
|||
*hi = dwords[1];
|
||||
}
|
||||
|
||||
void kbase_hwcnt_gpu_enable_map_to_physical(
|
||||
struct kbase_hwcnt_physical_enable_map *dst,
|
||||
const struct kbase_hwcnt_enable_map *src)
|
||||
void kbase_hwcnt_gpu_enable_map_to_physical(struct kbase_hwcnt_physical_enable_map *dst,
|
||||
const struct kbase_hwcnt_enable_map *src)
|
||||
{
|
||||
const struct kbase_hwcnt_metadata *metadata;
|
||||
u64 fe_bm[EM_COUNT] = { 0 };
|
||||
|
|
@ -588,17 +548,13 @@ void kbase_hwcnt_gpu_enable_map_to_physical(
|
|||
|
||||
metadata = src->metadata;
|
||||
|
||||
kbase_hwcnt_metadata_for_each_block(
|
||||
metadata, grp, blk, blk_inst) {
|
||||
const u64 grp_type = kbase_hwcnt_metadata_group_type(
|
||||
metadata, grp);
|
||||
const u64 blk_type = kbase_hwcnt_metadata_block_type(
|
||||
metadata, grp, blk);
|
||||
const u64 *blk_map = kbase_hwcnt_enable_map_block_instance(
|
||||
src, grp, blk, blk_inst);
|
||||
kbase_hwcnt_metadata_for_each_block(metadata, grp, blk, blk_inst)
|
||||
{
|
||||
const u64 grp_type = kbase_hwcnt_metadata_group_type(metadata, grp);
|
||||
const u64 blk_type = kbase_hwcnt_metadata_block_type(metadata, grp, blk);
|
||||
const u64 *blk_map = kbase_hwcnt_enable_map_block_instance(src, grp, blk, blk_inst);
|
||||
|
||||
if ((enum kbase_hwcnt_gpu_group_type)grp_type ==
|
||||
KBASE_HWCNT_GPU_GROUP_TYPE_V5) {
|
||||
if ((enum kbase_hwcnt_gpu_group_type)grp_type == KBASE_HWCNT_GPU_GROUP_TYPE_V5) {
|
||||
const size_t map_stride =
|
||||
kbase_hwcnt_metadata_block_enable_map_stride(metadata, grp, blk);
|
||||
size_t map_idx;
|
||||
|
|
@ -649,8 +605,7 @@ void kbase_hwcnt_gpu_enable_map_to_physical(
|
|||
kbase_hwcnt_backend_gpu_block_map_to_physical(mmu_l2_bm[EM_LO], mmu_l2_bm[EM_HI]);
|
||||
}
|
||||
|
||||
void kbase_hwcnt_gpu_set_to_physical(enum kbase_hwcnt_physical_set *dst,
|
||||
enum kbase_hwcnt_set src)
|
||||
void kbase_hwcnt_gpu_set_to_physical(enum kbase_hwcnt_physical_set *dst, enum kbase_hwcnt_set src)
|
||||
{
|
||||
switch (src) {
|
||||
case KBASE_HWCNT_SET_PRIMARY:
|
||||
|
|
@ -667,9 +622,8 @@ void kbase_hwcnt_gpu_set_to_physical(enum kbase_hwcnt_physical_set *dst,
|
|||
}
|
||||
}
|
||||
|
||||
void kbase_hwcnt_gpu_enable_map_from_physical(
|
||||
struct kbase_hwcnt_enable_map *dst,
|
||||
const struct kbase_hwcnt_physical_enable_map *src)
|
||||
void kbase_hwcnt_gpu_enable_map_from_physical(struct kbase_hwcnt_enable_map *dst,
|
||||
const struct kbase_hwcnt_physical_enable_map *src)
|
||||
{
|
||||
const struct kbase_hwcnt_metadata *metadata;
|
||||
|
||||
|
|
@ -692,16 +646,13 @@ void kbase_hwcnt_gpu_enable_map_from_physical(
|
|||
kbasep_hwcnt_backend_gpu_block_map_from_physical(src->mmu_l2_bm, &mmu_l2_bm[EM_LO],
|
||||
&mmu_l2_bm[EM_HI]);
|
||||
|
||||
kbase_hwcnt_metadata_for_each_block(metadata, grp, blk, blk_inst) {
|
||||
const u64 grp_type = kbase_hwcnt_metadata_group_type(
|
||||
metadata, grp);
|
||||
const u64 blk_type = kbase_hwcnt_metadata_block_type(
|
||||
metadata, grp, blk);
|
||||
u64 *blk_map = kbase_hwcnt_enable_map_block_instance(
|
||||
dst, grp, blk, blk_inst);
|
||||
kbase_hwcnt_metadata_for_each_block(metadata, grp, blk, blk_inst)
|
||||
{
|
||||
const u64 grp_type = kbase_hwcnt_metadata_group_type(metadata, grp);
|
||||
const u64 blk_type = kbase_hwcnt_metadata_block_type(metadata, grp, blk);
|
||||
u64 *blk_map = kbase_hwcnt_enable_map_block_instance(dst, grp, blk, blk_inst);
|
||||
|
||||
if ((enum kbase_hwcnt_gpu_group_type)grp_type ==
|
||||
KBASE_HWCNT_GPU_GROUP_TYPE_V5) {
|
||||
if ((enum kbase_hwcnt_gpu_group_type)grp_type == KBASE_HWCNT_GPU_GROUP_TYPE_V5) {
|
||||
const size_t map_stride =
|
||||
kbase_hwcnt_metadata_block_enable_map_stride(metadata, grp, blk);
|
||||
size_t map_idx;
|
||||
|
|
@ -744,29 +695,25 @@ void kbase_hwcnt_gpu_enable_map_from_physical(
|
|||
}
|
||||
}
|
||||
|
||||
void kbase_hwcnt_gpu_patch_dump_headers(
|
||||
struct kbase_hwcnt_dump_buffer *buf,
|
||||
const struct kbase_hwcnt_enable_map *enable_map)
|
||||
void kbase_hwcnt_gpu_patch_dump_headers(struct kbase_hwcnt_dump_buffer *buf,
|
||||
const struct kbase_hwcnt_enable_map *enable_map)
|
||||
{
|
||||
const struct kbase_hwcnt_metadata *metadata;
|
||||
size_t grp, blk, blk_inst;
|
||||
|
||||
if (WARN_ON(!buf) || WARN_ON(!enable_map) ||
|
||||
WARN_ON(buf->metadata != enable_map->metadata))
|
||||
if (WARN_ON(!buf) || WARN_ON(!enable_map) || WARN_ON(buf->metadata != enable_map->metadata))
|
||||
return;
|
||||
|
||||
metadata = buf->metadata;
|
||||
|
||||
kbase_hwcnt_metadata_for_each_block(metadata, grp, blk, blk_inst) {
|
||||
const u64 grp_type =
|
||||
kbase_hwcnt_metadata_group_type(metadata, grp);
|
||||
u64 *buf_blk = kbase_hwcnt_dump_buffer_block_instance(
|
||||
buf, grp, blk, blk_inst);
|
||||
const u64 *blk_map = kbase_hwcnt_enable_map_block_instance(
|
||||
enable_map, grp, blk, blk_inst);
|
||||
kbase_hwcnt_metadata_for_each_block(metadata, grp, blk, blk_inst)
|
||||
{
|
||||
const u64 grp_type = kbase_hwcnt_metadata_group_type(metadata, grp);
|
||||
u64 *buf_blk = kbase_hwcnt_dump_buffer_block_instance(buf, grp, blk, blk_inst);
|
||||
const u64 *blk_map =
|
||||
kbase_hwcnt_enable_map_block_instance(enable_map, grp, blk, blk_inst);
|
||||
|
||||
if ((enum kbase_hwcnt_gpu_group_type)grp_type ==
|
||||
KBASE_HWCNT_GPU_GROUP_TYPE_V5) {
|
||||
if ((enum kbase_hwcnt_gpu_group_type)grp_type == KBASE_HWCNT_GPU_GROUP_TYPE_V5) {
|
||||
const size_t map_stride =
|
||||
kbase_hwcnt_metadata_block_enable_map_stride(metadata, grp, blk);
|
||||
u64 prfcnt_bm[EM_COUNT] = { 0 };
|
||||
|
|
@ -34,9 +34,8 @@ struct kbase_hwcnt_dump_buffer;
|
|||
#define KBASE_HWCNT_V5_BLOCK_TYPE_COUNT 4
|
||||
#define KBASE_HWCNT_V5_HEADERS_PER_BLOCK 4
|
||||
#define KBASE_HWCNT_V5_DEFAULT_COUNTERS_PER_BLOCK 60
|
||||
#define KBASE_HWCNT_V5_DEFAULT_VALUES_PER_BLOCK \
|
||||
(KBASE_HWCNT_V5_HEADERS_PER_BLOCK + \
|
||||
KBASE_HWCNT_V5_DEFAULT_COUNTERS_PER_BLOCK)
|
||||
#define KBASE_HWCNT_V5_DEFAULT_VALUES_PER_BLOCK \
|
||||
(KBASE_HWCNT_V5_HEADERS_PER_BLOCK + KBASE_HWCNT_V5_DEFAULT_COUNTERS_PER_BLOCK)
|
||||
|
||||
/* FrontEnd block count in V5 GPU hardware counter. */
|
||||
#define KBASE_HWCNT_V5_FE_BLOCK_COUNT 1
|
||||
|
|
@ -228,19 +227,17 @@ static inline bool kbase_hwcnt_is_block_type_undefined(const uint64_t grp_type,
|
|||
*
|
||||
* Return: 0 on success, else error code.
|
||||
*/
|
||||
int kbase_hwcnt_jm_metadata_create(
|
||||
const struct kbase_hwcnt_gpu_info *info,
|
||||
enum kbase_hwcnt_set counter_set,
|
||||
const struct kbase_hwcnt_metadata **out_metadata,
|
||||
size_t *out_dump_bytes);
|
||||
int kbase_hwcnt_jm_metadata_create(const struct kbase_hwcnt_gpu_info *info,
|
||||
enum kbase_hwcnt_set counter_set,
|
||||
const struct kbase_hwcnt_metadata **out_metadata,
|
||||
size_t *out_dump_bytes);
|
||||
|
||||
/**
|
||||
* kbase_hwcnt_jm_metadata_destroy() - Destroy JM GPU hardware counter metadata.
|
||||
*
|
||||
* @metadata: Pointer to metadata to destroy.
|
||||
*/
|
||||
void kbase_hwcnt_jm_metadata_destroy(
|
||||
const struct kbase_hwcnt_metadata *metadata);
|
||||
void kbase_hwcnt_jm_metadata_destroy(const struct kbase_hwcnt_metadata *metadata);
|
||||
|
||||
/**
|
||||
* kbase_hwcnt_csf_metadata_create() - Create hardware counter metadata for the
|
||||
|
|
@ -252,18 +249,16 @@ void kbase_hwcnt_jm_metadata_destroy(
|
|||
*
|
||||
* Return: 0 on success, else error code.
|
||||
*/
|
||||
int kbase_hwcnt_csf_metadata_create(
|
||||
const struct kbase_hwcnt_gpu_info *info,
|
||||
enum kbase_hwcnt_set counter_set,
|
||||
const struct kbase_hwcnt_metadata **out_metadata);
|
||||
int kbase_hwcnt_csf_metadata_create(const struct kbase_hwcnt_gpu_info *info,
|
||||
enum kbase_hwcnt_set counter_set,
|
||||
const struct kbase_hwcnt_metadata **out_metadata);
|
||||
|
||||
/**
|
||||
* kbase_hwcnt_csf_metadata_destroy() - Destroy CSF GPU hardware counter
|
||||
* metadata.
|
||||
* @metadata: Pointer to metadata to destroy.
|
||||
*/
|
||||
void kbase_hwcnt_csf_metadata_destroy(
|
||||
const struct kbase_hwcnt_metadata *metadata);
|
||||
void kbase_hwcnt_csf_metadata_destroy(const struct kbase_hwcnt_metadata *metadata);
|
||||
|
||||
/**
|
||||
* kbase_hwcnt_jm_dump_get() - Copy or accumulate enabled counters from the raw
|
||||
|
|
@ -289,8 +284,7 @@ void kbase_hwcnt_csf_metadata_destroy(
|
|||
int kbase_hwcnt_jm_dump_get(struct kbase_hwcnt_dump_buffer *dst, u64 *src,
|
||||
const struct kbase_hwcnt_enable_map *dst_enable_map,
|
||||
const u64 pm_core_mask,
|
||||
const struct kbase_hwcnt_curr_config *curr_config,
|
||||
bool accumulate);
|
||||
const struct kbase_hwcnt_curr_config *curr_config, bool accumulate);
|
||||
|
||||
/**
|
||||
* kbase_hwcnt_csf_dump_get() - Copy or accumulate enabled counters from the raw
|
||||
|
|
@ -310,8 +304,7 @@ int kbase_hwcnt_jm_dump_get(struct kbase_hwcnt_dump_buffer *dst, u64 *src,
|
|||
* Return: 0 on success, else error code.
|
||||
*/
|
||||
int kbase_hwcnt_csf_dump_get(struct kbase_hwcnt_dump_buffer *dst, u64 *src,
|
||||
const struct kbase_hwcnt_enable_map *dst_enable_map,
|
||||
bool accumulate);
|
||||
const struct kbase_hwcnt_enable_map *dst_enable_map, bool accumulate);
|
||||
|
||||
/**
|
||||
* kbase_hwcnt_backend_gpu_block_map_to_physical() - Convert from a block
|
||||
|
|
@ -365,9 +358,8 @@ static inline u32 kbase_hwcnt_backend_gpu_block_map_to_physical(u64 lo, u64 hi)
|
|||
* individual counter block value, but the physical enable map uses 1 bit for
|
||||
* every 4 counters, shared over all instances of a block.
|
||||
*/
|
||||
void kbase_hwcnt_gpu_enable_map_to_physical(
|
||||
struct kbase_hwcnt_physical_enable_map *dst,
|
||||
const struct kbase_hwcnt_enable_map *src);
|
||||
void kbase_hwcnt_gpu_enable_map_to_physical(struct kbase_hwcnt_physical_enable_map *dst,
|
||||
const struct kbase_hwcnt_enable_map *src);
|
||||
|
||||
/**
|
||||
* kbase_hwcnt_gpu_set_to_physical() - Map counter set selection to physical
|
||||
|
|
@ -376,8 +368,7 @@ void kbase_hwcnt_gpu_enable_map_to_physical(
|
|||
* @dst: Non-NULL pointer to destination physical SET_SELECT value.
|
||||
* @src: Non-NULL pointer to source counter set selection.
|
||||
*/
|
||||
void kbase_hwcnt_gpu_set_to_physical(enum kbase_hwcnt_physical_set *dst,
|
||||
enum kbase_hwcnt_set src);
|
||||
void kbase_hwcnt_gpu_set_to_physical(enum kbase_hwcnt_physical_set *dst, enum kbase_hwcnt_set src);
|
||||
|
||||
/**
|
||||
* kbase_hwcnt_gpu_enable_map_from_physical() - Convert a physical enable map to
|
||||
|
|
@ -393,9 +384,8 @@ void kbase_hwcnt_gpu_set_to_physical(enum kbase_hwcnt_physical_set *dst,
|
|||
* more than 64, so the enable map abstraction has nowhere to store the enable
|
||||
* information for the 64 non-existent counters.
|
||||
*/
|
||||
void kbase_hwcnt_gpu_enable_map_from_physical(
|
||||
struct kbase_hwcnt_enable_map *dst,
|
||||
const struct kbase_hwcnt_physical_enable_map *src);
|
||||
void kbase_hwcnt_gpu_enable_map_from_physical(struct kbase_hwcnt_enable_map *dst,
|
||||
const struct kbase_hwcnt_physical_enable_map *src);
|
||||
|
||||
/**
|
||||
* kbase_hwcnt_gpu_patch_dump_headers() - Patch all the performance counter
|
||||
|
|
@ -411,8 +401,7 @@ void kbase_hwcnt_gpu_enable_map_from_physical(
|
|||
* kernel-user boundary, to ensure the header is accurate for the enable map
|
||||
* used by the user.
|
||||
*/
|
||||
void kbase_hwcnt_gpu_patch_dump_headers(
|
||||
struct kbase_hwcnt_dump_buffer *buf,
|
||||
const struct kbase_hwcnt_enable_map *enable_map);
|
||||
void kbase_hwcnt_gpu_patch_dump_headers(struct kbase_hwcnt_dump_buffer *buf,
|
||||
const struct kbase_hwcnt_enable_map *enable_map);
|
||||
|
||||
#endif /* _KBASE_HWCNT_GPU_H_ */
|
||||
|
|
@ -19,21 +19,19 @@
|
|||
*
|
||||
*/
|
||||
|
||||
#include "mali_kbase_hwcnt_gpu.h"
|
||||
#include "mali_kbase_hwcnt_gpu_narrow.h"
|
||||
#include "hwcnt/mali_kbase_hwcnt_gpu.h"
|
||||
#include "hwcnt/mali_kbase_hwcnt_gpu_narrow.h"
|
||||
|
||||
#include <linux/bug.h>
|
||||
#include <linux/err.h>
|
||||
#include <linux/slab.h>
|
||||
|
||||
int kbase_hwcnt_gpu_metadata_narrow_create(
|
||||
const struct kbase_hwcnt_metadata_narrow **dst_md_narrow,
|
||||
const struct kbase_hwcnt_metadata *src_md)
|
||||
int kbase_hwcnt_gpu_metadata_narrow_create(const struct kbase_hwcnt_metadata_narrow **dst_md_narrow,
|
||||
const struct kbase_hwcnt_metadata *src_md)
|
||||
{
|
||||
struct kbase_hwcnt_description desc;
|
||||
struct kbase_hwcnt_group_description group;
|
||||
struct kbase_hwcnt_block_description
|
||||
blks[KBASE_HWCNT_V5_BLOCK_TYPE_COUNT];
|
||||
struct kbase_hwcnt_block_description blks[KBASE_HWCNT_V5_BLOCK_TYPE_COUNT];
|
||||
size_t prfcnt_values_per_block;
|
||||
size_t blk;
|
||||
int err;
|
||||
|
|
@ -47,18 +45,15 @@ int kbase_hwcnt_gpu_metadata_narrow_create(
|
|||
* count in the metadata.
|
||||
*/
|
||||
if ((kbase_hwcnt_metadata_group_count(src_md) != 1) ||
|
||||
(kbase_hwcnt_metadata_block_count(src_md, 0) !=
|
||||
KBASE_HWCNT_V5_BLOCK_TYPE_COUNT))
|
||||
(kbase_hwcnt_metadata_block_count(src_md, 0) != KBASE_HWCNT_V5_BLOCK_TYPE_COUNT))
|
||||
return -EINVAL;
|
||||
|
||||
/* Get the values count in the first block. */
|
||||
prfcnt_values_per_block =
|
||||
kbase_hwcnt_metadata_block_values_count(src_md, 0, 0);
|
||||
prfcnt_values_per_block = kbase_hwcnt_metadata_block_values_count(src_md, 0, 0);
|
||||
|
||||
/* check all blocks should have same values count. */
|
||||
for (blk = 1; blk < KBASE_HWCNT_V5_BLOCK_TYPE_COUNT; blk++) {
|
||||
size_t val_cnt =
|
||||
kbase_hwcnt_metadata_block_values_count(src_md, 0, blk);
|
||||
size_t val_cnt = kbase_hwcnt_metadata_block_values_count(src_md, 0, blk);
|
||||
if (val_cnt != prfcnt_values_per_block)
|
||||
return -EINVAL;
|
||||
}
|
||||
|
|
@ -75,12 +70,10 @@ int kbase_hwcnt_gpu_metadata_narrow_create(
|
|||
prfcnt_values_per_block = 64;
|
||||
|
||||
for (blk = 0; blk < KBASE_HWCNT_V5_BLOCK_TYPE_COUNT; blk++) {
|
||||
size_t blk_hdr_cnt = kbase_hwcnt_metadata_block_headers_count(
|
||||
src_md, 0, blk);
|
||||
size_t blk_hdr_cnt = kbase_hwcnt_metadata_block_headers_count(src_md, 0, blk);
|
||||
blks[blk] = (struct kbase_hwcnt_block_description){
|
||||
.type = kbase_hwcnt_metadata_block_type(src_md, 0, blk),
|
||||
.inst_cnt = kbase_hwcnt_metadata_block_instance_count(
|
||||
src_md, 0, blk),
|
||||
.inst_cnt = kbase_hwcnt_metadata_block_instance_count(src_md, 0, blk),
|
||||
.hdr_cnt = blk_hdr_cnt,
|
||||
.ctr_cnt = prfcnt_values_per_block - blk_hdr_cnt,
|
||||
};
|
||||
|
|
@ -105,8 +98,7 @@ int kbase_hwcnt_gpu_metadata_narrow_create(
|
|||
* only supports 32-bit but the created metadata uses 64-bit for
|
||||
* block entry.
|
||||
*/
|
||||
metadata_narrow->dump_buf_bytes =
|
||||
metadata_narrow->metadata->dump_buf_bytes >> 1;
|
||||
metadata_narrow->dump_buf_bytes = metadata_narrow->metadata->dump_buf_bytes >> 1;
|
||||
*dst_md_narrow = metadata_narrow;
|
||||
} else {
|
||||
kfree(metadata_narrow);
|
||||
|
|
@ -115,8 +107,7 @@ int kbase_hwcnt_gpu_metadata_narrow_create(
|
|||
return err;
|
||||
}
|
||||
|
||||
void kbase_hwcnt_gpu_metadata_narrow_destroy(
|
||||
const struct kbase_hwcnt_metadata_narrow *md_narrow)
|
||||
void kbase_hwcnt_gpu_metadata_narrow_destroy(const struct kbase_hwcnt_metadata_narrow *md_narrow)
|
||||
{
|
||||
if (!md_narrow)
|
||||
return;
|
||||
|
|
@ -125,9 +116,8 @@ void kbase_hwcnt_gpu_metadata_narrow_destroy(
|
|||
kfree(md_narrow);
|
||||
}
|
||||
|
||||
int kbase_hwcnt_dump_buffer_narrow_alloc(
|
||||
const struct kbase_hwcnt_metadata_narrow *md_narrow,
|
||||
struct kbase_hwcnt_dump_buffer_narrow *dump_buf)
|
||||
int kbase_hwcnt_dump_buffer_narrow_alloc(const struct kbase_hwcnt_metadata_narrow *md_narrow,
|
||||
struct kbase_hwcnt_dump_buffer_narrow *dump_buf)
|
||||
{
|
||||
size_t dump_buf_bytes;
|
||||
size_t clk_cnt_buf_bytes;
|
||||
|
|
@ -137,8 +127,7 @@ int kbase_hwcnt_dump_buffer_narrow_alloc(
|
|||
return -EINVAL;
|
||||
|
||||
dump_buf_bytes = md_narrow->dump_buf_bytes;
|
||||
clk_cnt_buf_bytes =
|
||||
sizeof(*dump_buf->clk_cnt_buf) * md_narrow->metadata->clk_cnt;
|
||||
clk_cnt_buf_bytes = sizeof(*dump_buf->clk_cnt_buf) * md_narrow->metadata->clk_cnt;
|
||||
|
||||
/* Make a single allocation for both dump_buf and clk_cnt_buf. */
|
||||
buf = kmalloc(dump_buf_bytes + clk_cnt_buf_bytes, GFP_KERNEL);
|
||||
|
|
@ -154,14 +143,15 @@ int kbase_hwcnt_dump_buffer_narrow_alloc(
|
|||
return 0;
|
||||
}
|
||||
|
||||
void kbase_hwcnt_dump_buffer_narrow_free(
|
||||
struct kbase_hwcnt_dump_buffer_narrow *dump_buf_narrow)
|
||||
void kbase_hwcnt_dump_buffer_narrow_free(struct kbase_hwcnt_dump_buffer_narrow *dump_buf_narrow)
|
||||
{
|
||||
if (!dump_buf_narrow)
|
||||
return;
|
||||
|
||||
kfree(dump_buf_narrow->dump_buf);
|
||||
*dump_buf_narrow = (struct kbase_hwcnt_dump_buffer_narrow){ NULL };
|
||||
*dump_buf_narrow = (struct kbase_hwcnt_dump_buffer_narrow){ .md_narrow = NULL,
|
||||
.dump_buf = NULL,
|
||||
.clk_cnt_buf = NULL };
|
||||
}
|
||||
|
||||
int kbase_hwcnt_dump_buffer_narrow_array_alloc(
|
||||
|
|
@ -180,8 +170,7 @@ int kbase_hwcnt_dump_buffer_narrow_array_alloc(
|
|||
return -EINVAL;
|
||||
|
||||
dump_buf_bytes = md_narrow->dump_buf_bytes;
|
||||
clk_cnt_buf_bytes = sizeof(*dump_bufs->bufs->clk_cnt_buf) *
|
||||
md_narrow->metadata->clk_cnt;
|
||||
clk_cnt_buf_bytes = sizeof(*dump_bufs->bufs->clk_cnt_buf) * md_narrow->metadata->clk_cnt;
|
||||
|
||||
/* Allocate memory for the dump buffer struct array */
|
||||
buffers = kmalloc_array(n, sizeof(*buffers), GFP_KERNEL);
|
||||
|
|
@ -234,27 +223,22 @@ void kbase_hwcnt_dump_buffer_narrow_array_free(
|
|||
memset(dump_bufs, 0, sizeof(*dump_bufs));
|
||||
}
|
||||
|
||||
void kbase_hwcnt_dump_buffer_block_copy_strict_narrow(u32 *dst_blk,
|
||||
const u64 *src_blk,
|
||||
const u64 *blk_em,
|
||||
size_t val_cnt)
|
||||
void kbase_hwcnt_dump_buffer_block_copy_strict_narrow(u32 *dst_blk, const u64 *src_blk,
|
||||
const u64 *blk_em, size_t val_cnt)
|
||||
{
|
||||
size_t val;
|
||||
|
||||
for (val = 0; val < val_cnt; val++) {
|
||||
bool val_enabled =
|
||||
kbase_hwcnt_enable_map_block_value_enabled(blk_em, val);
|
||||
u32 src_val =
|
||||
(src_blk[val] > U32_MAX) ? U32_MAX : (u32)src_blk[val];
|
||||
bool val_enabled = kbase_hwcnt_enable_map_block_value_enabled(blk_em, val);
|
||||
u32 src_val = (src_blk[val] > U32_MAX) ? U32_MAX : (u32)src_blk[val];
|
||||
|
||||
dst_blk[val] = val_enabled ? src_val : 0;
|
||||
}
|
||||
}
|
||||
|
||||
void kbase_hwcnt_dump_buffer_copy_strict_narrow(
|
||||
struct kbase_hwcnt_dump_buffer_narrow *dst_narrow,
|
||||
const struct kbase_hwcnt_dump_buffer *src,
|
||||
const struct kbase_hwcnt_enable_map *dst_enable_map)
|
||||
void kbase_hwcnt_dump_buffer_copy_strict_narrow(struct kbase_hwcnt_dump_buffer_narrow *dst_narrow,
|
||||
const struct kbase_hwcnt_dump_buffer *src,
|
||||
const struct kbase_hwcnt_enable_map *dst_enable_map)
|
||||
{
|
||||
const struct kbase_hwcnt_metadata_narrow *metadata_narrow;
|
||||
size_t grp;
|
||||
|
|
@ -262,68 +246,53 @@ void kbase_hwcnt_dump_buffer_copy_strict_narrow(
|
|||
|
||||
if (WARN_ON(!dst_narrow) || WARN_ON(!src) || WARN_ON(!dst_enable_map) ||
|
||||
WARN_ON(dst_narrow->md_narrow->metadata == src->metadata) ||
|
||||
WARN_ON(dst_narrow->md_narrow->metadata->grp_cnt !=
|
||||
src->metadata->grp_cnt) ||
|
||||
WARN_ON(dst_narrow->md_narrow->metadata->grp_cnt != src->metadata->grp_cnt) ||
|
||||
WARN_ON(src->metadata->grp_cnt != 1) ||
|
||||
WARN_ON(dst_narrow->md_narrow->metadata->grp_metadata[0].blk_cnt !=
|
||||
src->metadata->grp_metadata[0].blk_cnt) ||
|
||||
WARN_ON(dst_narrow->md_narrow->metadata->grp_metadata[0].blk_cnt !=
|
||||
KBASE_HWCNT_V5_BLOCK_TYPE_COUNT) ||
|
||||
WARN_ON(dst_narrow->md_narrow->metadata->grp_metadata[0]
|
||||
.blk_metadata[0]
|
||||
.ctr_cnt >
|
||||
WARN_ON(dst_narrow->md_narrow->metadata->grp_metadata[0].blk_metadata[0].ctr_cnt >
|
||||
src->metadata->grp_metadata[0].blk_metadata[0].ctr_cnt))
|
||||
return;
|
||||
|
||||
/* Don't use src metadata since src buffer is bigger than dst buffer. */
|
||||
metadata_narrow = dst_narrow->md_narrow;
|
||||
|
||||
for (grp = 0;
|
||||
grp < kbase_hwcnt_metadata_narrow_group_count(metadata_narrow);
|
||||
grp++) {
|
||||
for (grp = 0; grp < kbase_hwcnt_metadata_narrow_group_count(metadata_narrow); grp++) {
|
||||
size_t blk;
|
||||
size_t blk_cnt = kbase_hwcnt_metadata_narrow_block_count(
|
||||
metadata_narrow, grp);
|
||||
size_t blk_cnt = kbase_hwcnt_metadata_narrow_block_count(metadata_narrow, grp);
|
||||
|
||||
for (blk = 0; blk < blk_cnt; blk++) {
|
||||
size_t blk_inst;
|
||||
size_t blk_inst_cnt =
|
||||
kbase_hwcnt_metadata_narrow_block_instance_count(
|
||||
metadata_narrow, grp, blk);
|
||||
size_t blk_inst_cnt = kbase_hwcnt_metadata_narrow_block_instance_count(
|
||||
metadata_narrow, grp, blk);
|
||||
|
||||
for (blk_inst = 0; blk_inst < blk_inst_cnt;
|
||||
blk_inst++) {
|
||||
for (blk_inst = 0; blk_inst < blk_inst_cnt; blk_inst++) {
|
||||
/* The narrowed down buffer is only 32-bit. */
|
||||
u32 *dst_blk =
|
||||
kbase_hwcnt_dump_buffer_narrow_block_instance(
|
||||
dst_narrow, grp, blk, blk_inst);
|
||||
const u64 *src_blk =
|
||||
kbase_hwcnt_dump_buffer_block_instance(
|
||||
src, grp, blk, blk_inst);
|
||||
const u64 *blk_em =
|
||||
kbase_hwcnt_enable_map_block_instance(
|
||||
dst_enable_map, grp, blk,
|
||||
blk_inst);
|
||||
size_t val_cnt =
|
||||
kbase_hwcnt_metadata_narrow_block_values_count(
|
||||
metadata_narrow, grp, blk);
|
||||
u32 *dst_blk = kbase_hwcnt_dump_buffer_narrow_block_instance(
|
||||
dst_narrow, grp, blk, blk_inst);
|
||||
const u64 *src_blk = kbase_hwcnt_dump_buffer_block_instance(
|
||||
src, grp, blk, blk_inst);
|
||||
const u64 *blk_em = kbase_hwcnt_enable_map_block_instance(
|
||||
dst_enable_map, grp, blk, blk_inst);
|
||||
size_t val_cnt = kbase_hwcnt_metadata_narrow_block_values_count(
|
||||
metadata_narrow, grp, blk);
|
||||
/* Align upwards to include padding bytes */
|
||||
val_cnt = KBASE_HWCNT_ALIGN_UPWARDS(
|
||||
val_cnt,
|
||||
(KBASE_HWCNT_BLOCK_BYTE_ALIGNMENT /
|
||||
KBASE_HWCNT_VALUE_BYTES));
|
||||
val_cnt, (KBASE_HWCNT_BLOCK_BYTE_ALIGNMENT /
|
||||
KBASE_HWCNT_VALUE_BYTES));
|
||||
|
||||
kbase_hwcnt_dump_buffer_block_copy_strict_narrow(
|
||||
dst_blk, src_blk, blk_em, val_cnt);
|
||||
kbase_hwcnt_dump_buffer_block_copy_strict_narrow(dst_blk, src_blk,
|
||||
blk_em, val_cnt);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (clk = 0; clk < metadata_narrow->metadata->clk_cnt; clk++) {
|
||||
bool clk_enabled = kbase_hwcnt_clk_enable_map_enabled(
|
||||
dst_enable_map->clk_enable_map, clk);
|
||||
bool clk_enabled =
|
||||
kbase_hwcnt_clk_enable_map_enabled(dst_enable_map->clk_enable_map, clk);
|
||||
|
||||
dst_narrow->clk_cnt_buf[clk] =
|
||||
clk_enabled ? src->clk_cnt_buf[clk] : 0;
|
||||
dst_narrow->clk_cnt_buf[clk] = clk_enabled ? src->clk_cnt_buf[clk] : 0;
|
||||
}
|
||||
}
|
||||
|
|
@ -1,7 +1,7 @@
|
|||
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
|
||||
/*
|
||||
*
|
||||
* (C) COPYRIGHT 2021 ARM Limited. All rights reserved.
|
||||
* (C) COPYRIGHT 2021-2022 ARM Limited. All rights reserved.
|
||||
*
|
||||
* This program is free software and is provided to you under the terms of the
|
||||
* GNU General Public License version 2 as published by the Free Software
|
||||
|
|
@ -22,7 +22,7 @@
|
|||
#ifndef _KBASE_HWCNT_GPU_NARROW_H_
|
||||
#define _KBASE_HWCNT_GPU_NARROW_H_
|
||||
|
||||
#include "mali_kbase_hwcnt_types.h"
|
||||
#include "hwcnt/mali_kbase_hwcnt_types.h"
|
||||
#include <linux/types.h>
|
||||
|
||||
struct kbase_device;
|
||||
|
|
@ -86,8 +86,8 @@ struct kbase_hwcnt_dump_buffer_narrow_array {
|
|||
*
|
||||
* Return: Number of hardware counter groups described by narrow metadata.
|
||||
*/
|
||||
static inline size_t kbase_hwcnt_metadata_narrow_group_count(
|
||||
const struct kbase_hwcnt_metadata_narrow *md_narrow)
|
||||
static inline size_t
|
||||
kbase_hwcnt_metadata_narrow_group_count(const struct kbase_hwcnt_metadata_narrow *md_narrow)
|
||||
{
|
||||
return kbase_hwcnt_metadata_group_count(md_narrow->metadata);
|
||||
}
|
||||
|
|
@ -100,8 +100,9 @@ static inline size_t kbase_hwcnt_metadata_narrow_group_count(
|
|||
*
|
||||
* Return: Type of the group grp.
|
||||
*/
|
||||
static inline u64 kbase_hwcnt_metadata_narrow_group_type(
|
||||
const struct kbase_hwcnt_metadata_narrow *md_narrow, size_t grp)
|
||||
static inline u64
|
||||
kbase_hwcnt_metadata_narrow_group_type(const struct kbase_hwcnt_metadata_narrow *md_narrow,
|
||||
size_t grp)
|
||||
{
|
||||
return kbase_hwcnt_metadata_group_type(md_narrow->metadata, grp);
|
||||
}
|
||||
|
|
@ -114,8 +115,9 @@ static inline u64 kbase_hwcnt_metadata_narrow_group_type(
|
|||
*
|
||||
* Return: Number of blocks in group grp.
|
||||
*/
|
||||
static inline size_t kbase_hwcnt_metadata_narrow_block_count(
|
||||
const struct kbase_hwcnt_metadata_narrow *md_narrow, size_t grp)
|
||||
static inline size_t
|
||||
kbase_hwcnt_metadata_narrow_block_count(const struct kbase_hwcnt_metadata_narrow *md_narrow,
|
||||
size_t grp)
|
||||
{
|
||||
return kbase_hwcnt_metadata_block_count(md_narrow->metadata, grp);
|
||||
}
|
||||
|
|
@ -131,11 +133,9 @@ static inline size_t kbase_hwcnt_metadata_narrow_block_count(
|
|||
* Return: Number of instances of block blk in group grp.
|
||||
*/
|
||||
static inline size_t kbase_hwcnt_metadata_narrow_block_instance_count(
|
||||
const struct kbase_hwcnt_metadata_narrow *md_narrow, size_t grp,
|
||||
size_t blk)
|
||||
const struct kbase_hwcnt_metadata_narrow *md_narrow, size_t grp, size_t blk)
|
||||
{
|
||||
return kbase_hwcnt_metadata_block_instance_count(md_narrow->metadata,
|
||||
grp, blk);
|
||||
return kbase_hwcnt_metadata_block_instance_count(md_narrow->metadata, grp, blk);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -148,12 +148,11 @@ static inline size_t kbase_hwcnt_metadata_narrow_block_instance_count(
|
|||
*
|
||||
* Return: Number of counter headers in each instance of block blk in group grp.
|
||||
*/
|
||||
static inline size_t kbase_hwcnt_metadata_narrow_block_headers_count(
|
||||
const struct kbase_hwcnt_metadata_narrow *md_narrow, size_t grp,
|
||||
size_t blk)
|
||||
static inline size_t
|
||||
kbase_hwcnt_metadata_narrow_block_headers_count(const struct kbase_hwcnt_metadata_narrow *md_narrow,
|
||||
size_t grp, size_t blk)
|
||||
{
|
||||
return kbase_hwcnt_metadata_block_headers_count(md_narrow->metadata,
|
||||
grp, blk);
|
||||
return kbase_hwcnt_metadata_block_headers_count(md_narrow->metadata, grp, blk);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -167,11 +166,9 @@ static inline size_t kbase_hwcnt_metadata_narrow_block_headers_count(
|
|||
* Return: Number of counters in each instance of block blk in group grp.
|
||||
*/
|
||||
static inline size_t kbase_hwcnt_metadata_narrow_block_counters_count(
|
||||
const struct kbase_hwcnt_metadata_narrow *md_narrow, size_t grp,
|
||||
size_t blk)
|
||||
const struct kbase_hwcnt_metadata_narrow *md_narrow, size_t grp, size_t blk)
|
||||
{
|
||||
return kbase_hwcnt_metadata_block_counters_count(md_narrow->metadata,
|
||||
grp, blk);
|
||||
return kbase_hwcnt_metadata_block_counters_count(md_narrow->metadata, grp, blk);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -184,14 +181,12 @@ static inline size_t kbase_hwcnt_metadata_narrow_block_counters_count(
|
|||
* Return: Number of headers plus counters in each instance of block blk
|
||||
* in group grp.
|
||||
*/
|
||||
static inline size_t kbase_hwcnt_metadata_narrow_block_values_count(
|
||||
const struct kbase_hwcnt_metadata_narrow *md_narrow, size_t grp,
|
||||
size_t blk)
|
||||
static inline size_t
|
||||
kbase_hwcnt_metadata_narrow_block_values_count(const struct kbase_hwcnt_metadata_narrow *md_narrow,
|
||||
size_t grp, size_t blk)
|
||||
{
|
||||
return kbase_hwcnt_metadata_narrow_block_counters_count(md_narrow, grp,
|
||||
blk) +
|
||||
kbase_hwcnt_metadata_narrow_block_headers_count(md_narrow, grp,
|
||||
blk);
|
||||
return kbase_hwcnt_metadata_narrow_block_counters_count(md_narrow, grp, blk) +
|
||||
kbase_hwcnt_metadata_narrow_block_headers_count(md_narrow, grp, blk);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -205,18 +200,13 @@ static inline size_t kbase_hwcnt_metadata_narrow_block_values_count(
|
|||
*
|
||||
* Return: u32* to the dump buffer for the block instance.
|
||||
*/
|
||||
static inline u32 *kbase_hwcnt_dump_buffer_narrow_block_instance(
|
||||
const struct kbase_hwcnt_dump_buffer_narrow *buf, size_t grp,
|
||||
size_t blk, size_t blk_inst)
|
||||
static inline u32 *
|
||||
kbase_hwcnt_dump_buffer_narrow_block_instance(const struct kbase_hwcnt_dump_buffer_narrow *buf,
|
||||
size_t grp, size_t blk, size_t blk_inst)
|
||||
{
|
||||
return buf->dump_buf +
|
||||
buf->md_narrow->metadata->grp_metadata[grp].dump_buf_index +
|
||||
buf->md_narrow->metadata->grp_metadata[grp]
|
||||
.blk_metadata[blk]
|
||||
.dump_buf_index +
|
||||
(buf->md_narrow->metadata->grp_metadata[grp]
|
||||
.blk_metadata[blk]
|
||||
.dump_buf_stride *
|
||||
return buf->dump_buf + buf->md_narrow->metadata->grp_metadata[grp].dump_buf_index +
|
||||
buf->md_narrow->metadata->grp_metadata[grp].blk_metadata[blk].dump_buf_index +
|
||||
(buf->md_narrow->metadata->grp_metadata[grp].blk_metadata[blk].dump_buf_stride *
|
||||
blk_inst);
|
||||
}
|
||||
|
||||
|
|
@ -239,17 +229,15 @@ static inline u32 *kbase_hwcnt_dump_buffer_narrow_block_instance(
|
|||
*
|
||||
* Return: 0 on success, else error code.
|
||||
*/
|
||||
int kbase_hwcnt_gpu_metadata_narrow_create(
|
||||
const struct kbase_hwcnt_metadata_narrow **dst_md_narrow,
|
||||
const struct kbase_hwcnt_metadata *src_md);
|
||||
int kbase_hwcnt_gpu_metadata_narrow_create(const struct kbase_hwcnt_metadata_narrow **dst_md_narrow,
|
||||
const struct kbase_hwcnt_metadata *src_md);
|
||||
|
||||
/**
|
||||
* kbase_hwcnt_gpu_metadata_narrow_destroy() - Destroy a hardware counter narrow
|
||||
* metadata object.
|
||||
* @md_narrow: Pointer to hardware counter narrow metadata.
|
||||
*/
|
||||
void kbase_hwcnt_gpu_metadata_narrow_destroy(
|
||||
const struct kbase_hwcnt_metadata_narrow *md_narrow);
|
||||
void kbase_hwcnt_gpu_metadata_narrow_destroy(const struct kbase_hwcnt_metadata_narrow *md_narrow);
|
||||
|
||||
/**
|
||||
* kbase_hwcnt_dump_buffer_narrow_alloc() - Allocate a narrow dump buffer.
|
||||
|
|
@ -260,9 +248,8 @@ void kbase_hwcnt_gpu_metadata_narrow_destroy(
|
|||
*
|
||||
* Return: 0 on success, else error code.
|
||||
*/
|
||||
int kbase_hwcnt_dump_buffer_narrow_alloc(
|
||||
const struct kbase_hwcnt_metadata_narrow *md_narrow,
|
||||
struct kbase_hwcnt_dump_buffer_narrow *dump_buf);
|
||||
int kbase_hwcnt_dump_buffer_narrow_alloc(const struct kbase_hwcnt_metadata_narrow *md_narrow,
|
||||
struct kbase_hwcnt_dump_buffer_narrow *dump_buf);
|
||||
|
||||
/**
|
||||
* kbase_hwcnt_dump_buffer_narrow_free() - Free a narrow dump buffer.
|
||||
|
|
@ -271,8 +258,7 @@ int kbase_hwcnt_dump_buffer_narrow_alloc(
|
|||
* Can be safely called on an all-zeroed narrow dump buffer structure, or on an
|
||||
* already freed narrow dump buffer.
|
||||
*/
|
||||
void kbase_hwcnt_dump_buffer_narrow_free(
|
||||
struct kbase_hwcnt_dump_buffer_narrow *dump_buf);
|
||||
void kbase_hwcnt_dump_buffer_narrow_free(struct kbase_hwcnt_dump_buffer_narrow *dump_buf);
|
||||
|
||||
/**
|
||||
* kbase_hwcnt_dump_buffer_narrow_array_alloc() - Allocate an array of narrow
|
||||
|
|
@ -320,10 +306,8 @@ void kbase_hwcnt_dump_buffer_narrow_array_free(
|
|||
* source value is bigger than U32_MAX, or copy the value from source if the
|
||||
* corresponding source value is less than or equal to U32_MAX.
|
||||
*/
|
||||
void kbase_hwcnt_dump_buffer_block_copy_strict_narrow(u32 *dst_blk,
|
||||
const u64 *src_blk,
|
||||
const u64 *blk_em,
|
||||
size_t val_cnt);
|
||||
void kbase_hwcnt_dump_buffer_block_copy_strict_narrow(u32 *dst_blk, const u64 *src_blk,
|
||||
const u64 *blk_em, size_t val_cnt);
|
||||
|
||||
/**
|
||||
* kbase_hwcnt_dump_buffer_copy_strict_narrow() - Copy all enabled values to a
|
||||
|
|
@ -339,9 +323,8 @@ void kbase_hwcnt_dump_buffer_block_copy_strict_narrow(u32 *dst_blk,
|
|||
* corresponding source value is bigger than U32_MAX, or copy the value from
|
||||
* source if the corresponding source value is less than or equal to U32_MAX.
|
||||
*/
|
||||
void kbase_hwcnt_dump_buffer_copy_strict_narrow(
|
||||
struct kbase_hwcnt_dump_buffer_narrow *dst_narrow,
|
||||
const struct kbase_hwcnt_dump_buffer *src,
|
||||
const struct kbase_hwcnt_enable_map *dst_enable_map);
|
||||
void kbase_hwcnt_dump_buffer_copy_strict_narrow(struct kbase_hwcnt_dump_buffer_narrow *dst_narrow,
|
||||
const struct kbase_hwcnt_dump_buffer *src,
|
||||
const struct kbase_hwcnt_enable_map *dst_enable_map);
|
||||
|
||||
#endif /* _KBASE_HWCNT_GPU_NARROW_H_ */
|
||||
|
|
@ -1,7 +1,7 @@
|
|||
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
|
||||
/*
|
||||
*
|
||||
* (C) COPYRIGHT 2018, 2020-2021 ARM Limited. All rights reserved.
|
||||
* (C) COPYRIGHT 2018, 2020-2022 ARM Limited. All rights reserved.
|
||||
*
|
||||
* This program is free software and is provided to you under the terms of the
|
||||
* GNU General Public License version 2 as published by the Free Software
|
||||
|
|
@ -19,13 +19,12 @@
|
|||
*
|
||||
*/
|
||||
|
||||
#include "mali_kbase_hwcnt_types.h"
|
||||
#include "hwcnt/mali_kbase_hwcnt_types.h"
|
||||
|
||||
#include <linux/slab.h>
|
||||
|
||||
int kbase_hwcnt_metadata_create(
|
||||
const struct kbase_hwcnt_description *desc,
|
||||
const struct kbase_hwcnt_metadata **out_metadata)
|
||||
int kbase_hwcnt_metadata_create(const struct kbase_hwcnt_description *desc,
|
||||
const struct kbase_hwcnt_metadata **out_metadata)
|
||||
{
|
||||
char *buf;
|
||||
struct kbase_hwcnt_metadata *metadata;
|
||||
|
|
@ -56,8 +55,7 @@ int kbase_hwcnt_metadata_create(
|
|||
|
||||
/* Block metadata */
|
||||
for (grp = 0; grp < desc->grp_cnt; grp++) {
|
||||
size += sizeof(struct kbase_hwcnt_block_metadata) *
|
||||
desc->grps[grp].blk_cnt;
|
||||
size += sizeof(struct kbase_hwcnt_block_metadata) * desc->grps[grp].blk_cnt;
|
||||
}
|
||||
|
||||
/* Single allocation for the entire metadata */
|
||||
|
|
@ -83,8 +81,7 @@ int kbase_hwcnt_metadata_create(
|
|||
for (grp = 0; grp < desc->grp_cnt; grp++) {
|
||||
size_t blk;
|
||||
|
||||
const struct kbase_hwcnt_group_description *grp_desc =
|
||||
desc->grps + grp;
|
||||
const struct kbase_hwcnt_group_description *grp_desc = desc->grps + grp;
|
||||
struct kbase_hwcnt_group_metadata *grp_md = grp_mds + grp;
|
||||
|
||||
size_t group_enable_map_count = 0;
|
||||
|
|
@ -94,37 +91,28 @@ int kbase_hwcnt_metadata_create(
|
|||
/* Bump allocate this group's block metadata */
|
||||
struct kbase_hwcnt_block_metadata *blk_mds =
|
||||
(struct kbase_hwcnt_block_metadata *)(buf + offset);
|
||||
offset += sizeof(struct kbase_hwcnt_block_metadata) *
|
||||
grp_desc->blk_cnt;
|
||||
offset += sizeof(struct kbase_hwcnt_block_metadata) * grp_desc->blk_cnt;
|
||||
|
||||
/* Fill in each block in the group's information */
|
||||
for (blk = 0; blk < grp_desc->blk_cnt; blk++) {
|
||||
const struct kbase_hwcnt_block_description *blk_desc =
|
||||
grp_desc->blks + blk;
|
||||
struct kbase_hwcnt_block_metadata *blk_md =
|
||||
blk_mds + blk;
|
||||
const size_t n_values =
|
||||
blk_desc->hdr_cnt + blk_desc->ctr_cnt;
|
||||
const struct kbase_hwcnt_block_description *blk_desc = grp_desc->blks + blk;
|
||||
struct kbase_hwcnt_block_metadata *blk_md = blk_mds + blk;
|
||||
const size_t n_values = blk_desc->hdr_cnt + blk_desc->ctr_cnt;
|
||||
|
||||
blk_md->type = blk_desc->type;
|
||||
blk_md->inst_cnt = blk_desc->inst_cnt;
|
||||
blk_md->hdr_cnt = blk_desc->hdr_cnt;
|
||||
blk_md->ctr_cnt = blk_desc->ctr_cnt;
|
||||
blk_md->enable_map_index = group_enable_map_count;
|
||||
blk_md->enable_map_stride =
|
||||
kbase_hwcnt_bitfield_count(n_values);
|
||||
blk_md->enable_map_stride = kbase_hwcnt_bitfield_count(n_values);
|
||||
blk_md->dump_buf_index = group_dump_buffer_count;
|
||||
blk_md->dump_buf_stride =
|
||||
KBASE_HWCNT_ALIGN_UPWARDS(
|
||||
n_values,
|
||||
(KBASE_HWCNT_BLOCK_BYTE_ALIGNMENT /
|
||||
KBASE_HWCNT_VALUE_BYTES));
|
||||
blk_md->dump_buf_stride = KBASE_HWCNT_ALIGN_UPWARDS(
|
||||
n_values,
|
||||
(KBASE_HWCNT_BLOCK_BYTE_ALIGNMENT / KBASE_HWCNT_VALUE_BYTES));
|
||||
blk_md->avail_mask_index = group_avail_mask_bits;
|
||||
|
||||
group_enable_map_count +=
|
||||
blk_md->enable_map_stride * blk_md->inst_cnt;
|
||||
group_dump_buffer_count +=
|
||||
blk_md->dump_buf_stride * blk_md->inst_cnt;
|
||||
group_enable_map_count += blk_md->enable_map_stride * blk_md->inst_cnt;
|
||||
group_dump_buffer_count += blk_md->dump_buf_stride * blk_md->inst_cnt;
|
||||
group_avail_mask_bits += blk_md->inst_cnt;
|
||||
}
|
||||
|
||||
|
|
@ -144,8 +132,7 @@ int kbase_hwcnt_metadata_create(
|
|||
/* Fill in the top level metadata's information */
|
||||
metadata->grp_cnt = desc->grp_cnt;
|
||||
metadata->grp_metadata = grp_mds;
|
||||
metadata->enable_map_bytes =
|
||||
enable_map_count * KBASE_HWCNT_BITFIELD_BYTES;
|
||||
metadata->enable_map_bytes = enable_map_count * KBASE_HWCNT_BITFIELD_BYTES;
|
||||
metadata->dump_buf_bytes = dump_buf_count * KBASE_HWCNT_VALUE_BYTES;
|
||||
metadata->avail_mask = desc->avail_mask;
|
||||
metadata->clk_cnt = desc->clk_cnt;
|
||||
|
|
@ -155,8 +142,7 @@ int kbase_hwcnt_metadata_create(
|
|||
* bit per 4 bytes in the dump buffer.
|
||||
*/
|
||||
WARN_ON(metadata->dump_buf_bytes !=
|
||||
(metadata->enable_map_bytes *
|
||||
BITS_PER_BYTE * KBASE_HWCNT_VALUE_BYTES));
|
||||
(metadata->enable_map_bytes * BITS_PER_BYTE * KBASE_HWCNT_VALUE_BYTES));
|
||||
|
||||
*out_metadata = metadata;
|
||||
return 0;
|
||||
|
|
@ -167,9 +153,8 @@ void kbase_hwcnt_metadata_destroy(const struct kbase_hwcnt_metadata *metadata)
|
|||
kfree(metadata);
|
||||
}
|
||||
|
||||
int kbase_hwcnt_enable_map_alloc(
|
||||
const struct kbase_hwcnt_metadata *metadata,
|
||||
struct kbase_hwcnt_enable_map *enable_map)
|
||||
int kbase_hwcnt_enable_map_alloc(const struct kbase_hwcnt_metadata *metadata,
|
||||
struct kbase_hwcnt_enable_map *enable_map)
|
||||
{
|
||||
u64 *enable_map_buf;
|
||||
|
||||
|
|
@ -177,8 +162,7 @@ int kbase_hwcnt_enable_map_alloc(
|
|||
return -EINVAL;
|
||||
|
||||
if (metadata->enable_map_bytes > 0) {
|
||||
enable_map_buf =
|
||||
kzalloc(metadata->enable_map_bytes, GFP_KERNEL);
|
||||
enable_map_buf = kzalloc(metadata->enable_map_bytes, GFP_KERNEL);
|
||||
if (!enable_map_buf)
|
||||
return -ENOMEM;
|
||||
} else {
|
||||
|
|
@ -200,9 +184,8 @@ void kbase_hwcnt_enable_map_free(struct kbase_hwcnt_enable_map *enable_map)
|
|||
enable_map->metadata = NULL;
|
||||
}
|
||||
|
||||
int kbase_hwcnt_dump_buffer_alloc(
|
||||
const struct kbase_hwcnt_metadata *metadata,
|
||||
struct kbase_hwcnt_dump_buffer *dump_buf)
|
||||
int kbase_hwcnt_dump_buffer_alloc(const struct kbase_hwcnt_metadata *metadata,
|
||||
struct kbase_hwcnt_dump_buffer *dump_buf)
|
||||
{
|
||||
size_t dump_buf_bytes;
|
||||
size_t clk_cnt_buf_bytes;
|
||||
|
|
@ -235,10 +218,8 @@ void kbase_hwcnt_dump_buffer_free(struct kbase_hwcnt_dump_buffer *dump_buf)
|
|||
memset(dump_buf, 0, sizeof(*dump_buf));
|
||||
}
|
||||
|
||||
int kbase_hwcnt_dump_buffer_array_alloc(
|
||||
const struct kbase_hwcnt_metadata *metadata,
|
||||
size_t n,
|
||||
struct kbase_hwcnt_dump_buffer_array *dump_bufs)
|
||||
int kbase_hwcnt_dump_buffer_array_alloc(const struct kbase_hwcnt_metadata *metadata, size_t n,
|
||||
struct kbase_hwcnt_dump_buffer_array *dump_bufs)
|
||||
{
|
||||
struct kbase_hwcnt_dump_buffer *buffers;
|
||||
size_t buf_idx;
|
||||
|
|
@ -251,8 +232,7 @@ int kbase_hwcnt_dump_buffer_array_alloc(
|
|||
return -EINVAL;
|
||||
|
||||
dump_buf_bytes = metadata->dump_buf_bytes;
|
||||
clk_cnt_buf_bytes =
|
||||
sizeof(*dump_bufs->bufs->clk_cnt_buf) * metadata->clk_cnt;
|
||||
clk_cnt_buf_bytes = sizeof(*dump_bufs->bufs->clk_cnt_buf) * metadata->clk_cnt;
|
||||
|
||||
/* Allocate memory for the dump buffer struct array */
|
||||
buffers = kmalloc_array(n, sizeof(*buffers), GFP_KERNEL);
|
||||
|
|
@ -283,15 +263,13 @@ int kbase_hwcnt_dump_buffer_array_alloc(
|
|||
|
||||
buffers[buf_idx].metadata = metadata;
|
||||
buffers[buf_idx].dump_buf = (u64 *)(addr + dump_buf_offset);
|
||||
buffers[buf_idx].clk_cnt_buf =
|
||||
(u64 *)(addr + clk_cnt_buf_offset);
|
||||
buffers[buf_idx].clk_cnt_buf = (u64 *)(addr + clk_cnt_buf_offset);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void kbase_hwcnt_dump_buffer_array_free(
|
||||
struct kbase_hwcnt_dump_buffer_array *dump_bufs)
|
||||
void kbase_hwcnt_dump_buffer_array_free(struct kbase_hwcnt_dump_buffer_array *dump_bufs)
|
||||
{
|
||||
if (!dump_bufs)
|
||||
return;
|
||||
|
|
@ -301,84 +279,71 @@ void kbase_hwcnt_dump_buffer_array_free(
|
|||
memset(dump_bufs, 0, sizeof(*dump_bufs));
|
||||
}
|
||||
|
||||
void kbase_hwcnt_dump_buffer_zero(
|
||||
struct kbase_hwcnt_dump_buffer *dst,
|
||||
const struct kbase_hwcnt_enable_map *dst_enable_map)
|
||||
void kbase_hwcnt_dump_buffer_zero(struct kbase_hwcnt_dump_buffer *dst,
|
||||
const struct kbase_hwcnt_enable_map *dst_enable_map)
|
||||
{
|
||||
const struct kbase_hwcnt_metadata *metadata;
|
||||
size_t grp, blk, blk_inst;
|
||||
|
||||
if (WARN_ON(!dst) ||
|
||||
WARN_ON(!dst_enable_map) ||
|
||||
if (WARN_ON(!dst) || WARN_ON(!dst_enable_map) ||
|
||||
WARN_ON(dst->metadata != dst_enable_map->metadata))
|
||||
return;
|
||||
|
||||
metadata = dst->metadata;
|
||||
|
||||
kbase_hwcnt_metadata_for_each_block(metadata, grp, blk, blk_inst) {
|
||||
kbase_hwcnt_metadata_for_each_block(metadata, grp, blk, blk_inst)
|
||||
{
|
||||
u64 *dst_blk;
|
||||
size_t val_cnt;
|
||||
|
||||
if (!kbase_hwcnt_enable_map_block_enabled(
|
||||
dst_enable_map, grp, blk, blk_inst))
|
||||
if (!kbase_hwcnt_enable_map_block_enabled(dst_enable_map, grp, blk, blk_inst))
|
||||
continue;
|
||||
|
||||
dst_blk = kbase_hwcnt_dump_buffer_block_instance(
|
||||
dst, grp, blk, blk_inst);
|
||||
val_cnt = kbase_hwcnt_metadata_block_values_count(
|
||||
metadata, grp, blk);
|
||||
dst_blk = kbase_hwcnt_dump_buffer_block_instance(dst, grp, blk, blk_inst);
|
||||
val_cnt = kbase_hwcnt_metadata_block_values_count(metadata, grp, blk);
|
||||
|
||||
kbase_hwcnt_dump_buffer_block_zero(dst_blk, val_cnt);
|
||||
}
|
||||
|
||||
memset(dst->clk_cnt_buf, 0,
|
||||
sizeof(*dst->clk_cnt_buf) * metadata->clk_cnt);
|
||||
memset(dst->clk_cnt_buf, 0, sizeof(*dst->clk_cnt_buf) * metadata->clk_cnt);
|
||||
}
|
||||
|
||||
void kbase_hwcnt_dump_buffer_zero_strict(
|
||||
struct kbase_hwcnt_dump_buffer *dst)
|
||||
void kbase_hwcnt_dump_buffer_zero_strict(struct kbase_hwcnt_dump_buffer *dst)
|
||||
{
|
||||
if (WARN_ON(!dst))
|
||||
return;
|
||||
|
||||
memset(dst->dump_buf, 0, dst->metadata->dump_buf_bytes);
|
||||
|
||||
memset(dst->clk_cnt_buf, 0,
|
||||
sizeof(*dst->clk_cnt_buf) * dst->metadata->clk_cnt);
|
||||
memset(dst->clk_cnt_buf, 0, sizeof(*dst->clk_cnt_buf) * dst->metadata->clk_cnt);
|
||||
}
|
||||
|
||||
void kbase_hwcnt_dump_buffer_zero_non_enabled(
|
||||
struct kbase_hwcnt_dump_buffer *dst,
|
||||
const struct kbase_hwcnt_enable_map *dst_enable_map)
|
||||
void kbase_hwcnt_dump_buffer_zero_non_enabled(struct kbase_hwcnt_dump_buffer *dst,
|
||||
const struct kbase_hwcnt_enable_map *dst_enable_map)
|
||||
{
|
||||
const struct kbase_hwcnt_metadata *metadata;
|
||||
size_t grp, blk, blk_inst;
|
||||
|
||||
if (WARN_ON(!dst) ||
|
||||
WARN_ON(!dst_enable_map) ||
|
||||
if (WARN_ON(!dst) || WARN_ON(!dst_enable_map) ||
|
||||
WARN_ON(dst->metadata != dst_enable_map->metadata))
|
||||
return;
|
||||
|
||||
metadata = dst->metadata;
|
||||
|
||||
kbase_hwcnt_metadata_for_each_block(metadata, grp, blk, blk_inst) {
|
||||
u64 *dst_blk = kbase_hwcnt_dump_buffer_block_instance(
|
||||
dst, grp, blk, blk_inst);
|
||||
const u64 *blk_em = kbase_hwcnt_enable_map_block_instance(
|
||||
dst_enable_map, grp, blk, blk_inst);
|
||||
size_t val_cnt = kbase_hwcnt_metadata_block_values_count(
|
||||
metadata, grp, blk);
|
||||
kbase_hwcnt_metadata_for_each_block(metadata, grp, blk, blk_inst)
|
||||
{
|
||||
u64 *dst_blk = kbase_hwcnt_dump_buffer_block_instance(dst, grp, blk, blk_inst);
|
||||
const u64 *blk_em =
|
||||
kbase_hwcnt_enable_map_block_instance(dst_enable_map, grp, blk, blk_inst);
|
||||
size_t val_cnt = kbase_hwcnt_metadata_block_values_count(metadata, grp, blk);
|
||||
|
||||
/* Align upwards to include padding bytes */
|
||||
val_cnt = KBASE_HWCNT_ALIGN_UPWARDS(val_cnt,
|
||||
(KBASE_HWCNT_BLOCK_BYTE_ALIGNMENT /
|
||||
KBASE_HWCNT_VALUE_BYTES));
|
||||
val_cnt = KBASE_HWCNT_ALIGN_UPWARDS(
|
||||
val_cnt, (KBASE_HWCNT_BLOCK_BYTE_ALIGNMENT / KBASE_HWCNT_VALUE_BYTES));
|
||||
|
||||
if (kbase_hwcnt_metadata_block_instance_avail(
|
||||
metadata, grp, blk, blk_inst)) {
|
||||
if (kbase_hwcnt_metadata_block_instance_avail(metadata, grp, blk, blk_inst)) {
|
||||
/* Block available, so only zero non-enabled values */
|
||||
kbase_hwcnt_dump_buffer_block_zero_non_enabled(
|
||||
dst_blk, blk_em, val_cnt);
|
||||
kbase_hwcnt_dump_buffer_block_zero_non_enabled(dst_blk, blk_em, val_cnt);
|
||||
} else {
|
||||
/* Block not available, so zero the entire thing */
|
||||
kbase_hwcnt_dump_buffer_block_zero(dst_blk, val_cnt);
|
||||
|
|
@ -386,188 +351,159 @@ void kbase_hwcnt_dump_buffer_zero_non_enabled(
|
|||
}
|
||||
}
|
||||
|
||||
void kbase_hwcnt_dump_buffer_copy(
|
||||
struct kbase_hwcnt_dump_buffer *dst,
|
||||
const struct kbase_hwcnt_dump_buffer *src,
|
||||
const struct kbase_hwcnt_enable_map *dst_enable_map)
|
||||
void kbase_hwcnt_dump_buffer_copy(struct kbase_hwcnt_dump_buffer *dst,
|
||||
const struct kbase_hwcnt_dump_buffer *src,
|
||||
const struct kbase_hwcnt_enable_map *dst_enable_map)
|
||||
{
|
||||
const struct kbase_hwcnt_metadata *metadata;
|
||||
size_t grp, blk, blk_inst;
|
||||
size_t clk;
|
||||
|
||||
if (WARN_ON(!dst) ||
|
||||
WARN_ON(!src) ||
|
||||
WARN_ON(!dst_enable_map) ||
|
||||
WARN_ON(dst == src) ||
|
||||
if (WARN_ON(!dst) || WARN_ON(!src) || WARN_ON(!dst_enable_map) || WARN_ON(dst == src) ||
|
||||
WARN_ON(dst->metadata != src->metadata) ||
|
||||
WARN_ON(dst->metadata != dst_enable_map->metadata))
|
||||
return;
|
||||
|
||||
metadata = dst->metadata;
|
||||
|
||||
kbase_hwcnt_metadata_for_each_block(metadata, grp, blk, blk_inst) {
|
||||
kbase_hwcnt_metadata_for_each_block(metadata, grp, blk, blk_inst)
|
||||
{
|
||||
u64 *dst_blk;
|
||||
const u64 *src_blk;
|
||||
size_t val_cnt;
|
||||
|
||||
if (!kbase_hwcnt_enable_map_block_enabled(
|
||||
dst_enable_map, grp, blk, blk_inst))
|
||||
if (!kbase_hwcnt_enable_map_block_enabled(dst_enable_map, grp, blk, blk_inst))
|
||||
continue;
|
||||
|
||||
dst_blk = kbase_hwcnt_dump_buffer_block_instance(
|
||||
dst, grp, blk, blk_inst);
|
||||
src_blk = kbase_hwcnt_dump_buffer_block_instance(
|
||||
src, grp, blk, blk_inst);
|
||||
val_cnt = kbase_hwcnt_metadata_block_values_count(
|
||||
metadata, grp, blk);
|
||||
dst_blk = kbase_hwcnt_dump_buffer_block_instance(dst, grp, blk, blk_inst);
|
||||
src_blk = kbase_hwcnt_dump_buffer_block_instance(src, grp, blk, blk_inst);
|
||||
val_cnt = kbase_hwcnt_metadata_block_values_count(metadata, grp, blk);
|
||||
|
||||
kbase_hwcnt_dump_buffer_block_copy(dst_blk, src_blk, val_cnt);
|
||||
}
|
||||
|
||||
kbase_hwcnt_metadata_for_each_clock(metadata, clk) {
|
||||
if (kbase_hwcnt_clk_enable_map_enabled(
|
||||
dst_enable_map->clk_enable_map, clk))
|
||||
kbase_hwcnt_metadata_for_each_clock(metadata, clk)
|
||||
{
|
||||
if (kbase_hwcnt_clk_enable_map_enabled(dst_enable_map->clk_enable_map, clk))
|
||||
dst->clk_cnt_buf[clk] = src->clk_cnt_buf[clk];
|
||||
}
|
||||
}
|
||||
|
||||
void kbase_hwcnt_dump_buffer_copy_strict(
|
||||
struct kbase_hwcnt_dump_buffer *dst,
|
||||
const struct kbase_hwcnt_dump_buffer *src,
|
||||
const struct kbase_hwcnt_enable_map *dst_enable_map)
|
||||
void kbase_hwcnt_dump_buffer_copy_strict(struct kbase_hwcnt_dump_buffer *dst,
|
||||
const struct kbase_hwcnt_dump_buffer *src,
|
||||
const struct kbase_hwcnt_enable_map *dst_enable_map)
|
||||
{
|
||||
const struct kbase_hwcnt_metadata *metadata;
|
||||
size_t grp, blk, blk_inst;
|
||||
size_t clk;
|
||||
|
||||
if (WARN_ON(!dst) ||
|
||||
WARN_ON(!src) ||
|
||||
WARN_ON(!dst_enable_map) ||
|
||||
WARN_ON(dst == src) ||
|
||||
if (WARN_ON(!dst) || WARN_ON(!src) || WARN_ON(!dst_enable_map) || WARN_ON(dst == src) ||
|
||||
WARN_ON(dst->metadata != src->metadata) ||
|
||||
WARN_ON(dst->metadata != dst_enable_map->metadata))
|
||||
return;
|
||||
|
||||
metadata = dst->metadata;
|
||||
|
||||
kbase_hwcnt_metadata_for_each_block(metadata, grp, blk, blk_inst) {
|
||||
u64 *dst_blk = kbase_hwcnt_dump_buffer_block_instance(
|
||||
dst, grp, blk, blk_inst);
|
||||
const u64 *src_blk = kbase_hwcnt_dump_buffer_block_instance(
|
||||
src, grp, blk, blk_inst);
|
||||
const u64 *blk_em = kbase_hwcnt_enable_map_block_instance(
|
||||
dst_enable_map, grp, blk, blk_inst);
|
||||
size_t val_cnt = kbase_hwcnt_metadata_block_values_count(
|
||||
metadata, grp, blk);
|
||||
kbase_hwcnt_metadata_for_each_block(metadata, grp, blk, blk_inst)
|
||||
{
|
||||
u64 *dst_blk = kbase_hwcnt_dump_buffer_block_instance(dst, grp, blk, blk_inst);
|
||||
const u64 *src_blk =
|
||||
kbase_hwcnt_dump_buffer_block_instance(src, grp, blk, blk_inst);
|
||||
const u64 *blk_em =
|
||||
kbase_hwcnt_enable_map_block_instance(dst_enable_map, grp, blk, blk_inst);
|
||||
size_t val_cnt = kbase_hwcnt_metadata_block_values_count(metadata, grp, blk);
|
||||
/* Align upwards to include padding bytes */
|
||||
val_cnt = KBASE_HWCNT_ALIGN_UPWARDS(val_cnt,
|
||||
(KBASE_HWCNT_BLOCK_BYTE_ALIGNMENT /
|
||||
KBASE_HWCNT_VALUE_BYTES));
|
||||
val_cnt = KBASE_HWCNT_ALIGN_UPWARDS(
|
||||
val_cnt, (KBASE_HWCNT_BLOCK_BYTE_ALIGNMENT / KBASE_HWCNT_VALUE_BYTES));
|
||||
|
||||
kbase_hwcnt_dump_buffer_block_copy_strict(
|
||||
dst_blk, src_blk, blk_em, val_cnt);
|
||||
kbase_hwcnt_dump_buffer_block_copy_strict(dst_blk, src_blk, blk_em, val_cnt);
|
||||
}
|
||||
|
||||
kbase_hwcnt_metadata_for_each_clock(metadata, clk) {
|
||||
kbase_hwcnt_metadata_for_each_clock(metadata, clk)
|
||||
{
|
||||
bool clk_enabled =
|
||||
kbase_hwcnt_clk_enable_map_enabled(
|
||||
dst_enable_map->clk_enable_map, clk);
|
||||
kbase_hwcnt_clk_enable_map_enabled(dst_enable_map->clk_enable_map, clk);
|
||||
|
||||
dst->clk_cnt_buf[clk] = clk_enabled ? src->clk_cnt_buf[clk] : 0;
|
||||
}
|
||||
}
|
||||
|
||||
void kbase_hwcnt_dump_buffer_accumulate(
|
||||
struct kbase_hwcnt_dump_buffer *dst,
|
||||
const struct kbase_hwcnt_dump_buffer *src,
|
||||
const struct kbase_hwcnt_enable_map *dst_enable_map)
|
||||
void kbase_hwcnt_dump_buffer_accumulate(struct kbase_hwcnt_dump_buffer *dst,
|
||||
const struct kbase_hwcnt_dump_buffer *src,
|
||||
const struct kbase_hwcnt_enable_map *dst_enable_map)
|
||||
{
|
||||
const struct kbase_hwcnt_metadata *metadata;
|
||||
size_t grp, blk, blk_inst;
|
||||
size_t clk;
|
||||
|
||||
if (WARN_ON(!dst) ||
|
||||
WARN_ON(!src) ||
|
||||
WARN_ON(!dst_enable_map) ||
|
||||
WARN_ON(dst == src) ||
|
||||
if (WARN_ON(!dst) || WARN_ON(!src) || WARN_ON(!dst_enable_map) || WARN_ON(dst == src) ||
|
||||
WARN_ON(dst->metadata != src->metadata) ||
|
||||
WARN_ON(dst->metadata != dst_enable_map->metadata))
|
||||
return;
|
||||
|
||||
metadata = dst->metadata;
|
||||
|
||||
kbase_hwcnt_metadata_for_each_block(metadata, grp, blk, blk_inst) {
|
||||
kbase_hwcnt_metadata_for_each_block(metadata, grp, blk, blk_inst)
|
||||
{
|
||||
u64 *dst_blk;
|
||||
const u64 *src_blk;
|
||||
size_t hdr_cnt;
|
||||
size_t ctr_cnt;
|
||||
|
||||
if (!kbase_hwcnt_enable_map_block_enabled(
|
||||
dst_enable_map, grp, blk, blk_inst))
|
||||
if (!kbase_hwcnt_enable_map_block_enabled(dst_enable_map, grp, blk, blk_inst))
|
||||
continue;
|
||||
|
||||
dst_blk = kbase_hwcnt_dump_buffer_block_instance(
|
||||
dst, grp, blk, blk_inst);
|
||||
src_blk = kbase_hwcnt_dump_buffer_block_instance(
|
||||
src, grp, blk, blk_inst);
|
||||
hdr_cnt = kbase_hwcnt_metadata_block_headers_count(
|
||||
metadata, grp, blk);
|
||||
ctr_cnt = kbase_hwcnt_metadata_block_counters_count(
|
||||
metadata, grp, blk);
|
||||
dst_blk = kbase_hwcnt_dump_buffer_block_instance(dst, grp, blk, blk_inst);
|
||||
src_blk = kbase_hwcnt_dump_buffer_block_instance(src, grp, blk, blk_inst);
|
||||
hdr_cnt = kbase_hwcnt_metadata_block_headers_count(metadata, grp, blk);
|
||||
ctr_cnt = kbase_hwcnt_metadata_block_counters_count(metadata, grp, blk);
|
||||
|
||||
kbase_hwcnt_dump_buffer_block_accumulate(
|
||||
dst_blk, src_blk, hdr_cnt, ctr_cnt);
|
||||
kbase_hwcnt_dump_buffer_block_accumulate(dst_blk, src_blk, hdr_cnt, ctr_cnt);
|
||||
}
|
||||
|
||||
kbase_hwcnt_metadata_for_each_clock(metadata, clk) {
|
||||
if (kbase_hwcnt_clk_enable_map_enabled(
|
||||
dst_enable_map->clk_enable_map, clk))
|
||||
kbase_hwcnt_metadata_for_each_clock(metadata, clk)
|
||||
{
|
||||
if (kbase_hwcnt_clk_enable_map_enabled(dst_enable_map->clk_enable_map, clk))
|
||||
dst->clk_cnt_buf[clk] += src->clk_cnt_buf[clk];
|
||||
}
|
||||
}
|
||||
|
||||
void kbase_hwcnt_dump_buffer_accumulate_strict(
|
||||
struct kbase_hwcnt_dump_buffer *dst,
|
||||
const struct kbase_hwcnt_dump_buffer *src,
|
||||
const struct kbase_hwcnt_enable_map *dst_enable_map)
|
||||
void kbase_hwcnt_dump_buffer_accumulate_strict(struct kbase_hwcnt_dump_buffer *dst,
|
||||
const struct kbase_hwcnt_dump_buffer *src,
|
||||
const struct kbase_hwcnt_enable_map *dst_enable_map)
|
||||
{
|
||||
const struct kbase_hwcnt_metadata *metadata;
|
||||
size_t grp, blk, blk_inst;
|
||||
size_t clk;
|
||||
|
||||
if (WARN_ON(!dst) ||
|
||||
WARN_ON(!src) ||
|
||||
WARN_ON(!dst_enable_map) ||
|
||||
WARN_ON(dst == src) ||
|
||||
if (WARN_ON(!dst) || WARN_ON(!src) || WARN_ON(!dst_enable_map) || WARN_ON(dst == src) ||
|
||||
WARN_ON(dst->metadata != src->metadata) ||
|
||||
WARN_ON(dst->metadata != dst_enable_map->metadata))
|
||||
return;
|
||||
|
||||
metadata = dst->metadata;
|
||||
|
||||
kbase_hwcnt_metadata_for_each_block(metadata, grp, blk, blk_inst) {
|
||||
u64 *dst_blk = kbase_hwcnt_dump_buffer_block_instance(
|
||||
dst, grp, blk, blk_inst);
|
||||
const u64 *src_blk = kbase_hwcnt_dump_buffer_block_instance(
|
||||
src, grp, blk, blk_inst);
|
||||
const u64 *blk_em = kbase_hwcnt_enable_map_block_instance(
|
||||
dst_enable_map, grp, blk, blk_inst);
|
||||
size_t hdr_cnt = kbase_hwcnt_metadata_block_headers_count(
|
||||
metadata, grp, blk);
|
||||
size_t ctr_cnt = kbase_hwcnt_metadata_block_counters_count(
|
||||
metadata, grp, blk);
|
||||
kbase_hwcnt_metadata_for_each_block(metadata, grp, blk, blk_inst)
|
||||
{
|
||||
u64 *dst_blk = kbase_hwcnt_dump_buffer_block_instance(dst, grp, blk, blk_inst);
|
||||
const u64 *src_blk =
|
||||
kbase_hwcnt_dump_buffer_block_instance(src, grp, blk, blk_inst);
|
||||
const u64 *blk_em =
|
||||
kbase_hwcnt_enable_map_block_instance(dst_enable_map, grp, blk, blk_inst);
|
||||
size_t hdr_cnt = kbase_hwcnt_metadata_block_headers_count(metadata, grp, blk);
|
||||
size_t ctr_cnt = kbase_hwcnt_metadata_block_counters_count(metadata, grp, blk);
|
||||
/* Align upwards to include padding bytes */
|
||||
ctr_cnt = KBASE_HWCNT_ALIGN_UPWARDS(hdr_cnt + ctr_cnt,
|
||||
(KBASE_HWCNT_BLOCK_BYTE_ALIGNMENT /
|
||||
KBASE_HWCNT_VALUE_BYTES) - hdr_cnt);
|
||||
ctr_cnt = KBASE_HWCNT_ALIGN_UPWARDS(
|
||||
hdr_cnt + ctr_cnt,
|
||||
(KBASE_HWCNT_BLOCK_BYTE_ALIGNMENT / KBASE_HWCNT_VALUE_BYTES) - hdr_cnt);
|
||||
|
||||
kbase_hwcnt_dump_buffer_block_accumulate_strict(
|
||||
dst_blk, src_blk, blk_em, hdr_cnt, ctr_cnt);
|
||||
kbase_hwcnt_dump_buffer_block_accumulate_strict(dst_blk, src_blk, blk_em, hdr_cnt,
|
||||
ctr_cnt);
|
||||
}
|
||||
|
||||
kbase_hwcnt_metadata_for_each_clock(metadata, clk) {
|
||||
if (kbase_hwcnt_clk_enable_map_enabled(
|
||||
dst_enable_map->clk_enable_map, clk))
|
||||
kbase_hwcnt_metadata_for_each_clock(metadata, clk)
|
||||
{
|
||||
if (kbase_hwcnt_clk_enable_map_enabled(dst_enable_map->clk_enable_map, clk))
|
||||
dst->clk_cnt_buf[clk] += src->clk_cnt_buf[clk];
|
||||
else
|
||||
dst->clk_cnt_buf[clk] = 0;
|
||||
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user