MALI: rockchip: upgrade bifrost DDK to g15p0-01eac0, from g13p0-01eac0

Note, the corresponding mali_csffw.bin for DDK g15 MUST be used.

Change-Id: Ic30634fa6247d62bf96f506c64d13b89e16b02e6
Signed-off-by: Zhen Chen <chenzhen@rock-chips.com>
This commit is contained in:
Zhen Chen 2022-10-13 08:42:04 +08:00 committed by Tao Huang
parent 96e93dba44
commit 034aad5dd8
192 changed files with 10415 additions and 8170 deletions

View File

@ -236,6 +236,7 @@ Description:
device-driver that supports a CSF GPU. The duration value unit
is in milliseconds and is used for configuring csf scheduling
tick duration.
What: /sys/class/misc/mali%u/device/reset_timeout
Description:
This attribute is used to set the number of milliseconds to

View File

@ -1,6 +1,6 @@
# SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
#
# (C) COPYRIGHT 2013-2021 ARM Limited. All rights reserved.
# (C) COPYRIGHT 2013-2022 ARM Limited. All rights reserved.
#
# This program is free software and is provided to you under the terms of the
# GNU General Public License version 2 as published by the Free Software
@ -129,7 +129,7 @@ for details.
set and the setting coresponding to the SYSC_ALLOC register.
Example for a Mali GPU with 1 clock and no regulators:
Example for a Mali GPU with 1 clock and 1 regulator:
gpu@0xfc010000 {
compatible = "arm,malit602", "arm,malit60x", "arm,malit6xx", "arm,mali-midgard";

View File

@ -1,6 +1,6 @@
# SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
#
# (C) COPYRIGHT 2021 ARM Limited. All rights reserved.
# (C) COPYRIGHT 2021-2022 ARM Limited. All rights reserved.
#
# This program is free software and is provided to you under the terms of the
# GNU General Public License version 2 as published by the Free Software
@ -28,7 +28,6 @@ subdir-ccflags-y += $(ccflags-y)
#
# Kernel modules
#
obj-$(CONFIG_DMA_BUF_LOCK) += dma_buf_lock/src/
obj-$(CONFIG_DMA_SHARED_BUFFER_TEST_EXPORTER) += dma_buf_test_exporter/
obj-$(CONFIG_MALI_MEMORY_GROUP_MANAGER) += memory_group_manager/
obj-$(CONFIG_MALI_PROTECTED_MEMORY_ALLOCATOR) += protected_memory_allocator/

View File

@ -1,6 +1,6 @@
# SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
#
# (C) COPYRIGHT 2021 ARM Limited. All rights reserved.
# (C) COPYRIGHT 2021-2022 ARM Limited. All rights reserved.
#
# This program is free software and is provided to you under the terms of the
# GNU General Public License version 2 as published by the Free Software
@ -26,16 +26,6 @@ menuconfig MALI_BASE_MODULES
Those modules provide extra features or debug interfaces and,
are optional for the use of the Mali GPU modules.
config DMA_BUF_LOCK
bool "Build dma-buf lock module"
depends on MALI_BASE_MODULES && MALI_DMA_FENCE
default y
help
This option will build the dma_buf_lock module.
Modules:
- dma_buf_lock.ko
config DMA_SHARED_BUFFER_TEST_EXPORTER
bool "Build dma-buf framework test exporter module"
depends on MALI_BASE_MODULES && DMA_SHARED_BUFFER

View File

@ -38,11 +38,9 @@ ifeq ($(CONFIG_MALI_BASE_MODULES),y)
CONFIG_MALI_CSF_SUPPORT ?= n
ifneq ($(CONFIG_DMA_SHARED_BUFFER),n)
CONFIG_DMA_BUF_LOCK ?= y
CONFIG_DMA_SHARED_BUFFER_TEST_EXPORTER ?= y
else
# Prevent misuse when CONFIG_DMA_SHARED_BUFFER=n
CONFIG_DMA_BUF_LOCK = n
CONFIG_DMA_SHARED_BUFFER_TEST_EXPORTER = n
endif
@ -54,7 +52,6 @@ ifeq ($(CONFIG_MALI_BASE_MODULES),y)
else
# Prevent misuse when CONFIG_MALI_BASE_MODULES=n
CONFIG_DMA_BUF_LOCK = n
CONFIG_DMA_SHARED_BUFFER_TEST_EXPORTER = n
CONFIG_MALI_MEMORY_GROUP_MANAGER = n
CONFIG_MALI_PROTECTED_MEMORY_ALLOCATOR = n
@ -64,10 +61,9 @@ endif
CONFIGS := \
CONFIG_MALI_BASE_MODULES \
CONFIG_MALI_CSF_SUPPORT \
CONFIG_DMA_BUF_LOCK \
CONFIG_DMA_SHARED_BUFFER_TEST_EXPORTER \
CONFIG_MALI_MEMORY_GROUP_MANAGER \
CONFIG_MALI_PROTECTED_MEMORY_ALLOCATOR
CONFIG_MALI_PROTECTED_MEMORY_ALLOCATOR \
#
@ -92,26 +88,47 @@ EXTRA_CFLAGS := $(foreach config,$(CONFIGS), \
$(if $(filter y m,$(value $(value config))), \
-D$(value config)=1))
# The following were added to align with W=1 in scripts/Makefile.extrawarn
# from the Linux source tree
KBUILD_CFLAGS += -Wall -Werror
# The following were added to align with W=1 in scripts/Makefile.extrawarn
# from the Linux source tree (v5.18.14)
KBUILD_CFLAGS += -Wextra -Wunused -Wno-unused-parameter
KBUILD_CFLAGS += -Wmissing-declarations
KBUILD_CFLAGS += -Wmissing-format-attribute
KBUILD_CFLAGS += -Wmissing-prototypes
KBUILD_CFLAGS += -Wold-style-definition
KBUILD_CFLAGS += -Wmissing-include-dirs
# The -Wmissing-include-dirs cannot be enabled as the path to some of the
# included directories change depending on whether it is an in-tree or
# out-of-tree build.
KBUILD_CFLAGS += $(call cc-option, -Wunused-but-set-variable)
KBUILD_CFLAGS += $(call cc-option, -Wunused-const-variable)
KBUILD_CFLAGS += $(call cc-option, -Wpacked-not-aligned)
KBUILD_CFLAGS += $(call cc-option, -Wstringop-truncation)
# The following turn off the warnings enabled by -Wextra
KBUILD_CFLAGS += -Wno-missing-field-initializers
KBUILD_CFLAGS += -Wno-sign-compare
KBUILD_CFLAGS += -Wno-type-limits
KBUILD_CFLAGS += -Wno-shift-negative-value
# This flag is needed to avoid build errors on older kernels
KBUILD_CFLAGS += $(call cc-option, -Wno-cast-function-type)
KBUILD_CPPFLAGS += -DKBUILD_EXTRA_WARN1
# The following were added to align with W=2 in scripts/Makefile.extrawarn
# from the Linux source tree (v5.18.14)
KBUILD_CFLAGS += -Wdisabled-optimization
# The -Wshadow flag cannot be enabled unless upstream kernels are
# patched to fix redefinitions of certain built-in functions and
# global variables.
KBUILD_CFLAGS += $(call cc-option, -Wlogical-op)
KBUILD_CFLAGS += -Wmissing-field-initializers
KBUILD_CFLAGS += -Wtype-limits
KBUILD_CFLAGS += $(call cc-option, -Wmaybe-uninitialized)
KBUILD_CFLAGS += $(call cc-option, -Wunused-macros)
KBUILD_CPPFLAGS += -DKBUILD_EXTRA_WARN2
# This warning is disabled to avoid build failures in some kernel versions
KBUILD_CFLAGS += -Wno-ignored-qualifiers
all:
$(MAKE) -C $(KDIR) M=$(CURDIR) $(MAKE_ARGS) EXTRA_CFLAGS="$(EXTRA_CFLAGS)" KBUILD_EXTRA_SYMBOLS="$(EXTRA_SYMBOLS)" modules

View File

@ -1,6 +1,6 @@
# SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
#
# (C) COPYRIGHT 2021 ARM Limited. All rights reserved.
# (C) COPYRIGHT 2021-2022 ARM Limited. All rights reserved.
#
# This program is free software and is provided to you under the terms of the
# GNU General Public License version 2 as published by the Free Software
@ -26,16 +26,6 @@ menuconfig MALI_BASE_MODULES
Those modules provide extra features or debug interfaces and,
are optional for the use of the Mali GPU modules.
config DMA_BUF_LOCK
bool "Build dma-buf lock module"
depends on MALI_BASE_MODULES && MALI_DMA_FENCE
default y
help
This option will build the dma_buf_lock module.
Modules:
- dma_buf_lock.ko
config DMA_SHARED_BUFFER_TEST_EXPORTER
bool "Build dma-buf framework test exporter module"
depends on MALI_BASE_MODULES
@ -45,7 +35,7 @@ config DMA_SHARED_BUFFER_TEST_EXPORTER
Usable to help test importers.
Modules:
- dma-buf-test-exporter.ko
- dma-buf-test-exporter.ko
config MALI_MEMORY_GROUP_MANAGER
bool "Build Mali Memory Group Manager module"
@ -57,7 +47,7 @@ config MALI_MEMORY_GROUP_MANAGER
for memory pools managed by Mali GPU device drivers.
Modules:
- memory_group_manager.ko
- memory_group_manager.ko
config MALI_PROTECTED_MEMORY_ALLOCATOR
bool "Build Mali Protected Memory Allocator module"
@ -70,5 +60,5 @@ config MALI_PROTECTED_MEMORY_ALLOCATOR
of Mali GPU device drivers.
Modules:
- protected_memory_allocator.ko
- protected_memory_allocator.ko

View File

@ -1,908 +0,0 @@
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
/*
*
* (C) COPYRIGHT 2012-2014, 2017-2018, 2020-2022 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
* Foundation, and any use by you of this program is subject to the terms
* of such GNU license.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, you can access it online at
* http://www.gnu.org/licenses/gpl-2.0.html.
*
*/
#include <linux/version.h>
#include <linux/version_compat_defs.h>
#include <linux/uaccess.h>
#include <linux/init.h>
#include <linux/module.h>
#include <linux/fs.h>
#include <linux/cdev.h>
#include <linux/device.h>
#include <linux/slab.h>
#include <linux/atomic.h>
#if (KERNEL_VERSION(5, 4, 0) > LINUX_VERSION_CODE)
#include <linux/reservation.h>
#else
#include <linux/dma-resv.h>
#endif
#include <linux/dma-buf.h>
#include <linux/wait.h>
#include <linux/sched.h>
#include <linux/poll.h>
#include <linux/anon_inodes.h>
#include <linux/file.h>
#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE)
#include <linux/fence.h>
#define dma_fence_context_alloc(a) fence_context_alloc(a)
#define dma_fence_init(a, b, c, d, e) fence_init(a, b, c, d, e)
#define dma_fence_get(a) fence_get(a)
#define dma_fence_put(a) fence_put(a)
#define dma_fence_signal(a) fence_signal(a)
#define dma_fence_is_signaled(a) fence_is_signaled(a)
#define dma_fence_add_callback(a, b, c) fence_add_callback(a, b, c)
#define dma_fence_remove_callback(a, b) fence_remove_callback(a, b)
#if (KERNEL_VERSION(4, 9, 68) > LINUX_VERSION_CODE)
#define dma_fence_get_status(a) (fence_is_signaled(a) ? (a)->status ?: 1 : 0)
#else
#define dma_fence_get_status(a) (fence_is_signaled(a) ? (a)->error ?: 1 : 0)
#endif
#else
#include <linux/dma-fence.h>
#if (KERNEL_VERSION(4, 11, 0) > LINUX_VERSION_CODE)
#define dma_fence_get_status(a) (dma_fence_is_signaled(a) ? \
(a)->status ?: 1 \
: 0)
#endif
#endif /* < 4.10.0 */
#include "dma_buf_lock.h"
/* Maximum number of buffers that a single handle can address */
#define DMA_BUF_LOCK_BUF_MAX 32
#define DMA_BUF_LOCK_DEBUG 1
#define DMA_BUF_LOCK_INIT_BIAS 0xFF
static dev_t dma_buf_lock_dev;
static struct cdev dma_buf_lock_cdev;
static struct class *dma_buf_lock_class;
static const char dma_buf_lock_dev_name[] = "dma_buf_lock";
#if defined(HAVE_UNLOCKED_IOCTL) || defined(HAVE_COMPAT_IOCTL) || ((KERNEL_VERSION(5, 9, 0) <= LINUX_VERSION_CODE))
static long dma_buf_lock_ioctl(struct file *filp, unsigned int cmd, unsigned long arg);
#else
static int dma_buf_lock_ioctl(struct inode *inode, struct file *filp, unsigned int cmd, unsigned long arg);
#endif
static const struct file_operations dma_buf_lock_fops = {
.owner = THIS_MODULE,
#if defined(HAVE_UNLOCKED_IOCTL) || ((KERNEL_VERSION(5, 9, 0) <= LINUX_VERSION_CODE))
.unlocked_ioctl = dma_buf_lock_ioctl,
#endif
#if defined(HAVE_COMPAT_IOCTL) || ((KERNEL_VERSION(5, 9, 0) <= LINUX_VERSION_CODE))
.compat_ioctl = dma_buf_lock_ioctl,
#endif
};
struct dma_buf_lock_resource {
#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE)
struct fence fence;
#else
struct dma_fence fence;
#endif
int *list_of_dma_buf_fds; /* List of buffers copied from userspace */
atomic_t locked; /* Status of lock */
struct dma_buf **dma_bufs;
unsigned long exclusive; /* Exclusive access bitmap */
atomic_t fence_dep_count; /* Number of dma-fence dependencies */
struct list_head dma_fence_callbacks; /* list of all callbacks set up to wait on other fences */
wait_queue_head_t wait;
struct kref refcount;
struct list_head link;
struct work_struct work;
int count;
};
/**
* struct dma_buf_lock_fence_cb - Callback data struct for dma-fence
* @fence_cb: Callback function
* @fence: Pointer to the fence object on which this callback is waiting
* @res: Pointer to dma_buf_lock_resource that is waiting on this callback
* @node: List head for linking this callback to the lock resource
*/
struct dma_buf_lock_fence_cb {
#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE)
struct fence_cb fence_cb;
struct fence *fence;
#else
struct dma_fence_cb fence_cb;
struct dma_fence *fence;
#endif
struct dma_buf_lock_resource *res;
struct list_head node;
};
static LIST_HEAD(dma_buf_lock_resource_list);
static DEFINE_MUTEX(dma_buf_lock_mutex);
static inline int is_dma_buf_lock_file(struct file *);
static void dma_buf_lock_dounlock(struct kref *ref);
/*** dma_buf_lock fence part ***/
/* Spin lock protecting all Mali fences as fence->lock. */
static DEFINE_SPINLOCK(dma_buf_lock_fence_lock);
static const char *
#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE)
dma_buf_lock_fence_get_driver_name(struct fence *fence)
#else
dma_buf_lock_fence_get_driver_name(struct dma_fence *fence)
#endif
{
return "dma_buf_lock";
}
static const char *
#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE)
dma_buf_lock_fence_get_timeline_name(struct fence *fence)
#else
dma_buf_lock_fence_get_timeline_name(struct dma_fence *fence)
#endif
{
return "dma_buf_lock.timeline";
}
static bool
#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE)
dma_buf_lock_fence_enable_signaling(struct fence *fence)
#else
dma_buf_lock_fence_enable_signaling(struct dma_fence *fence)
#endif
{
return true;
}
#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE)
const struct fence_ops dma_buf_lock_fence_ops = {
.wait = fence_default_wait,
#else
const struct dma_fence_ops dma_buf_lock_fence_ops = {
.wait = dma_fence_default_wait,
#endif
.get_driver_name = dma_buf_lock_fence_get_driver_name,
.get_timeline_name = dma_buf_lock_fence_get_timeline_name,
.enable_signaling = dma_buf_lock_fence_enable_signaling,
};
static void
dma_buf_lock_fence_init(struct dma_buf_lock_resource *resource)
{
dma_fence_init(&resource->fence,
&dma_buf_lock_fence_ops,
&dma_buf_lock_fence_lock,
0,
0);
}
static void
dma_buf_lock_fence_free_callbacks(struct dma_buf_lock_resource *resource)
{
struct dma_buf_lock_fence_cb *cb, *tmp;
/* Clean up and free callbacks. */
list_for_each_entry_safe(cb, tmp, &resource->dma_fence_callbacks, node) {
/* Cancel callbacks that hasn't been called yet and release the
* reference taken in dma_buf_lock_fence_add_callback().
*/
dma_fence_remove_callback(cb->fence, &cb->fence_cb);
dma_fence_put(cb->fence);
list_del(&cb->node);
kfree(cb);
}
}
static void
dma_buf_lock_fence_work(struct work_struct *pwork)
{
struct dma_buf_lock_resource *resource =
container_of(pwork, struct dma_buf_lock_resource, work);
WARN_ON(atomic_read(&resource->fence_dep_count));
WARN_ON(!atomic_read(&resource->locked));
WARN_ON(!resource->exclusive);
mutex_lock(&dma_buf_lock_mutex);
kref_put(&resource->refcount, dma_buf_lock_dounlock);
mutex_unlock(&dma_buf_lock_mutex);
}
static void
#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE)
dma_buf_lock_fence_callback(struct fence *fence, struct fence_cb *cb)
#else
dma_buf_lock_fence_callback(struct dma_fence *fence, struct dma_fence_cb *cb)
#endif
{
struct dma_buf_lock_fence_cb *dma_buf_lock_cb = container_of(cb,
struct dma_buf_lock_fence_cb,
fence_cb);
struct dma_buf_lock_resource *resource = dma_buf_lock_cb->res;
#if DMA_BUF_LOCK_DEBUG
pr_debug("%s\n", __func__);
#endif
/* Callback function will be invoked in atomic context. */
if (atomic_dec_and_test(&resource->fence_dep_count)) {
atomic_set(&resource->locked, 1);
wake_up(&resource->wait);
if (resource->exclusive)
/* Warn if the work was already queued */
WARN_ON(!schedule_work(&resource->work));
}
}
#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE)
static int
dma_buf_lock_fence_add_callback(struct dma_buf_lock_resource *resource,
struct fence *fence,
fence_func_t callback)
#else
static int
dma_buf_lock_fence_add_callback(struct dma_buf_lock_resource *resource,
struct dma_fence *fence,
dma_fence_func_t callback)
#endif
{
int err = 0;
struct dma_buf_lock_fence_cb *fence_cb;
if (!fence)
return -EINVAL;
fence_cb = kmalloc(sizeof(*fence_cb), GFP_KERNEL);
if (!fence_cb)
return -ENOMEM;
fence_cb->fence = fence;
fence_cb->res = resource;
INIT_LIST_HEAD(&fence_cb->node);
err = dma_fence_add_callback(fence, &fence_cb->fence_cb,
callback);
if (err == -ENOENT) {
/* Fence signaled, get the completion result */
err = dma_fence_get_status(fence);
/* remap success completion to err code */
if (err == 1)
err = 0;
kfree(fence_cb);
} else if (err) {
kfree(fence_cb);
} else {
/*
* Get reference to fence that will be kept until callback gets
* cleaned up in dma_buf_lock_fence_free_callbacks().
*/
dma_fence_get(fence);
atomic_inc(&resource->fence_dep_count);
/* Add callback to resource's list of callbacks */
list_add(&fence_cb->node, &resource->dma_fence_callbacks);
}
return err;
}
#if (KERNEL_VERSION(5, 4, 0) > LINUX_VERSION_CODE)
static int
dma_buf_lock_add_fence_reservation_callback(struct dma_buf_lock_resource *resource,
struct reservation_object *resv,
bool exclusive)
#else
static int
dma_buf_lock_add_fence_reservation_callback(struct dma_buf_lock_resource *resource,
struct dma_resv *resv,
bool exclusive)
#endif
{
#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE)
struct fence *excl_fence = NULL;
struct fence **shared_fences = NULL;
#else
struct dma_fence *excl_fence = NULL;
struct dma_fence **shared_fences = NULL;
#endif
unsigned int shared_count = 0;
int err, i;
#if (KERNEL_VERSION(5, 4, 0) > LINUX_VERSION_CODE)
err = reservation_object_get_fences_rcu(
#elif (KERNEL_VERSION(5, 14, 0) > LINUX_VERSION_CODE)
err = dma_resv_get_fences_rcu(
#else
err = dma_resv_get_fences(
#endif
resv,
&excl_fence,
&shared_count,
&shared_fences);
if (err)
return err;
if (excl_fence) {
err = dma_buf_lock_fence_add_callback(resource,
excl_fence,
dma_buf_lock_fence_callback);
/* Release our reference, taken by reservation_object_get_fences_rcu(),
* to the fence. We have set up our callback (if that was possible),
* and it's the fence's owner is responsible for singling the fence
* before allowing it to disappear.
*/
dma_fence_put(excl_fence);
if (err)
goto out;
}
if (exclusive) {
for (i = 0; i < shared_count; i++) {
err = dma_buf_lock_fence_add_callback(resource,
shared_fences[i],
dma_buf_lock_fence_callback);
if (err)
goto out;
}
}
/* Release all our references to the shared fences, taken by
* reservation_object_get_fences_rcu(). We have set up our callback (if
* that was possible), and it's the fence's owner is responsible for
* signaling the fence before allowing it to disappear.
*/
out:
for (i = 0; i < shared_count; i++)
dma_fence_put(shared_fences[i]);
kfree(shared_fences);
return err;
}
static void
dma_buf_lock_release_fence_reservation(struct dma_buf_lock_resource *resource,
struct ww_acquire_ctx *ctx)
{
unsigned int r;
for (r = 0; r < resource->count; r++)
ww_mutex_unlock(&resource->dma_bufs[r]->resv->lock);
ww_acquire_fini(ctx);
}
static int
dma_buf_lock_acquire_fence_reservation(struct dma_buf_lock_resource *resource,
struct ww_acquire_ctx *ctx)
{
#if (KERNEL_VERSION(5, 4, 0) > LINUX_VERSION_CODE)
struct reservation_object *content_resv = NULL;
#else
struct dma_resv *content_resv = NULL;
#endif
unsigned int content_resv_idx = 0;
unsigned int r;
int err = 0;
ww_acquire_init(ctx, &reservation_ww_class);
retry:
for (r = 0; r < resource->count; r++) {
if (resource->dma_bufs[r]->resv == content_resv) {
content_resv = NULL;
continue;
}
err = ww_mutex_lock(&resource->dma_bufs[r]->resv->lock, ctx);
if (err)
goto error;
}
ww_acquire_done(ctx);
return err;
error:
content_resv_idx = r;
/* Unlock the locked one ones */
while (r--)
ww_mutex_unlock(&resource->dma_bufs[r]->resv->lock);
if (content_resv)
ww_mutex_unlock(&content_resv->lock);
/* If we deadlock try with lock_slow and retry */
if (err == -EDEADLK) {
#if DMA_BUF_LOCK_DEBUG
pr_debug("deadlock at dma_buf fd %i\n",
resource->list_of_dma_buf_fds[content_resv_idx]);
#endif
content_resv = resource->dma_bufs[content_resv_idx]->resv;
ww_mutex_lock_slow(&content_resv->lock, ctx);
goto retry;
}
/* If we are here the function failed */
ww_acquire_fini(ctx);
return err;
}
static int dma_buf_lock_handle_release(struct inode *inode, struct file *file)
{
struct dma_buf_lock_resource *resource;
if (!is_dma_buf_lock_file(file))
return -EINVAL;
resource = file->private_data;
#if DMA_BUF_LOCK_DEBUG
pr_debug("%s\n", __func__);
#endif
mutex_lock(&dma_buf_lock_mutex);
kref_put(&resource->refcount, dma_buf_lock_dounlock);
mutex_unlock(&dma_buf_lock_mutex);
return 0;
}
static __poll_t dma_buf_lock_handle_poll(struct file *file, poll_table *wait)
{
struct dma_buf_lock_resource *resource;
unsigned int ret = 0;
if (!is_dma_buf_lock_file(file)) {
#if (KERNEL_VERSION(4, 19, 0) > LINUX_VERSION_CODE)
return POLLERR;
#else
return EPOLLERR;
#endif
}
resource = file->private_data;
#if DMA_BUF_LOCK_DEBUG
pr_debug("%s\n", __func__);
#endif
if (atomic_read(&resource->locked) == 1) {
/* Resources have been locked */
#if (KERNEL_VERSION(4, 19, 0) > LINUX_VERSION_CODE)
ret = POLLIN | POLLRDNORM;
if (resource->exclusive)
ret |= POLLOUT | POLLWRNORM;
#else
ret = EPOLLIN | EPOLLRDNORM;
if (resource->exclusive)
ret |= EPOLLOUT | EPOLLWRNORM;
#endif
} else {
if (!poll_does_not_wait(wait))
poll_wait(file, &resource->wait, wait);
}
#if DMA_BUF_LOCK_DEBUG
pr_debug("%s : return %i\n", __func__, ret);
#endif
return ret;
}
static const struct file_operations dma_buf_lock_handle_fops = {
.owner = THIS_MODULE,
.release = dma_buf_lock_handle_release,
.poll = dma_buf_lock_handle_poll,
};
/*
* is_dma_buf_lock_file - Check if struct file* is associated with dma_buf_lock
*/
static inline int is_dma_buf_lock_file(struct file *file)
{
return file->f_op == &dma_buf_lock_handle_fops;
}
/*
* Start requested lock.
*
* Allocates required memory, copies dma_buf_fd list from userspace,
* acquires related reservation objects, and starts the lock.
*/
static int dma_buf_lock_dolock(struct dma_buf_lock_k_request *request)
{
struct dma_buf_lock_resource *resource;
struct ww_acquire_ctx ww_ctx;
struct file *file;
int size;
int fd;
int i;
int ret;
int error;
if (request->list_of_dma_buf_fds == NULL)
return -EINVAL;
if (request->count <= 0)
return -EINVAL;
if (request->count > DMA_BUF_LOCK_BUF_MAX)
return -EINVAL;
if (request->exclusive != DMA_BUF_LOCK_NONEXCLUSIVE &&
request->exclusive != DMA_BUF_LOCK_EXCLUSIVE)
return -EINVAL;
resource = kzalloc(sizeof(*resource), GFP_KERNEL);
if (resource == NULL)
return -ENOMEM;
atomic_set(&resource->locked, 0);
kref_init(&resource->refcount);
INIT_LIST_HEAD(&resource->link);
INIT_WORK(&resource->work, dma_buf_lock_fence_work);
resource->count = request->count;
/* Allocate space to store dma_buf_fds received from user space */
size = request->count * sizeof(int);
resource->list_of_dma_buf_fds = kmalloc(size, GFP_KERNEL);
if (resource->list_of_dma_buf_fds == NULL) {
kfree(resource);
return -ENOMEM;
}
/* Allocate space to store dma_buf pointers associated with dma_buf_fds */
size = sizeof(struct dma_buf *) * request->count;
resource->dma_bufs = kmalloc(size, GFP_KERNEL);
if (resource->dma_bufs == NULL) {
kfree(resource->list_of_dma_buf_fds);
kfree(resource);
return -ENOMEM;
}
/* Copy requested list of dma_buf_fds from user space */
size = request->count * sizeof(int);
if (copy_from_user(resource->list_of_dma_buf_fds,
(void __user *)request->list_of_dma_buf_fds,
size) != 0) {
kfree(resource->list_of_dma_buf_fds);
kfree(resource->dma_bufs);
kfree(resource);
return -ENOMEM;
}
#if DMA_BUF_LOCK_DEBUG
for (i = 0; i < request->count; i++)
pr_debug("dma_buf %i = %X\n", i, resource->list_of_dma_buf_fds[i]);
#endif
/* Initialize the fence associated with dma_buf_lock resource */
dma_buf_lock_fence_init(resource);
INIT_LIST_HEAD(&resource->dma_fence_callbacks);
atomic_set(&resource->fence_dep_count, DMA_BUF_LOCK_INIT_BIAS);
/* Add resource to global list */
mutex_lock(&dma_buf_lock_mutex);
list_add(&resource->link, &dma_buf_lock_resource_list);
mutex_unlock(&dma_buf_lock_mutex);
for (i = 0; i < request->count; i++) {
/* Convert fd into dma_buf structure */
resource->dma_bufs[i] = dma_buf_get(resource->list_of_dma_buf_fds[i]);
if (IS_ERR_VALUE(PTR_ERR(resource->dma_bufs[i]))) {
mutex_lock(&dma_buf_lock_mutex);
kref_put(&resource->refcount, dma_buf_lock_dounlock);
mutex_unlock(&dma_buf_lock_mutex);
return -EINVAL;
}
/*Check the reservation object associated with dma_buf */
if (resource->dma_bufs[i]->resv == NULL) {
mutex_lock(&dma_buf_lock_mutex);
kref_put(&resource->refcount, dma_buf_lock_dounlock);
mutex_unlock(&dma_buf_lock_mutex);
return -EINVAL;
}
#if DMA_BUF_LOCK_DEBUG
pr_debug("%s : dma_buf_fd %i dma_buf %p dma_fence reservation %p\n",
__func__, resource->list_of_dma_buf_fds[i], resource->dma_bufs[i], resource->dma_bufs[i]->resv);
#endif
}
init_waitqueue_head(&resource->wait);
kref_get(&resource->refcount);
error = get_unused_fd_flags(0);
if (error < 0)
return error;
fd = error;
file = anon_inode_getfile("dma_buf_lock", &dma_buf_lock_handle_fops, (void *)resource, 0);
if (IS_ERR(file)) {
put_unused_fd(fd);
mutex_lock(&dma_buf_lock_mutex);
kref_put(&resource->refcount, dma_buf_lock_dounlock);
kref_put(&resource->refcount, dma_buf_lock_dounlock);
mutex_unlock(&dma_buf_lock_mutex);
return PTR_ERR(file);
}
resource->exclusive = request->exclusive;
/* Start locking process */
ret = dma_buf_lock_acquire_fence_reservation(resource, &ww_ctx);
if (ret) {
#if DMA_BUF_LOCK_DEBUG
pr_debug("%s : Error %d locking reservations.\n", __func__, ret);
#endif
put_unused_fd(fd);
mutex_lock(&dma_buf_lock_mutex);
kref_put(&resource->refcount, dma_buf_lock_dounlock);
kref_put(&resource->refcount, dma_buf_lock_dounlock);
mutex_unlock(&dma_buf_lock_mutex);
return ret;
}
/* Take an extra reference for exclusive access, which will be dropped
* once the pre-existing fences attached to dma-buf resources, for which
* we have commited for exclusive access, are signaled.
* At a given time there can be only one exclusive fence attached to a
* reservation object, so the new exclusive fence replaces the original
* fence and the future sync is done against the new fence which is
* supposed to be signaled only after the original fence was signaled.
* If the new exclusive fence is signaled prematurely then the resources
* would become available for new access while they are already being
* written to by the original owner.
*/
if (resource->exclusive)
kref_get(&resource->refcount);
for (i = 0; i < request->count; i++) {
#if (KERNEL_VERSION(5, 4, 0) > LINUX_VERSION_CODE)
struct reservation_object *resv = resource->dma_bufs[i]->resv;
#else
struct dma_resv *resv = resource->dma_bufs[i]->resv;
#endif
if (!test_bit(i, &resource->exclusive)) {
#if (KERNEL_VERSION(5, 4, 0) > LINUX_VERSION_CODE)
ret = reservation_object_reserve_shared(resv);
#else
ret = dma_resv_reserve_shared(resv, 0);
#endif
if (ret) {
#if DMA_BUF_LOCK_DEBUG
pr_debug("%s : Error %d reserving space for shared fence.\n", __func__, ret);
#endif
break;
}
ret = dma_buf_lock_add_fence_reservation_callback(resource,
resv,
false);
if (ret) {
#if DMA_BUF_LOCK_DEBUG
pr_debug("%s : Error %d adding reservation to callback.\n", __func__, ret);
#endif
break;
}
#if (KERNEL_VERSION(5, 4, 0) > LINUX_VERSION_CODE)
reservation_object_add_shared_fence(resv, &resource->fence);
#else
dma_resv_add_shared_fence(resv, &resource->fence);
#endif
} else {
ret = dma_buf_lock_add_fence_reservation_callback(resource, resv, true);
if (ret) {
#if DMA_BUF_LOCK_DEBUG
pr_debug("%s : Error %d adding reservation to callback.\n", __func__, ret);
#endif
break;
}
#if (KERNEL_VERSION(5, 4, 0) > LINUX_VERSION_CODE)
reservation_object_add_excl_fence(resv, &resource->fence);
#else
dma_resv_add_excl_fence(resv, &resource->fence);
#endif
}
}
dma_buf_lock_release_fence_reservation(resource, &ww_ctx);
/* Test if the callbacks were already triggered */
if (!atomic_sub_return(DMA_BUF_LOCK_INIT_BIAS, &resource->fence_dep_count)) {
atomic_set(&resource->locked, 1);
/* Drop the extra reference taken for exclusive access */
if (resource->exclusive)
dma_buf_lock_fence_work(&resource->work);
}
if (IS_ERR_VALUE((unsigned long)ret)) {
put_unused_fd(fd);
mutex_lock(&dma_buf_lock_mutex);
kref_put(&resource->refcount, dma_buf_lock_dounlock);
kref_put(&resource->refcount, dma_buf_lock_dounlock);
mutex_unlock(&dma_buf_lock_mutex);
return ret;
}
#if DMA_BUF_LOCK_DEBUG
pr_debug("%s : complete\n", __func__);
#endif
mutex_lock(&dma_buf_lock_mutex);
kref_put(&resource->refcount, dma_buf_lock_dounlock);
mutex_unlock(&dma_buf_lock_mutex);
/* Installing the fd is deferred to the very last operation before return
* to avoid allowing userspace to close it during the setup.
*/
fd_install(fd, file);
return fd;
}
static void dma_buf_lock_dounlock(struct kref *ref)
{
int i;
struct dma_buf_lock_resource *resource = container_of(ref, struct dma_buf_lock_resource, refcount);
atomic_set(&resource->locked, 0);
/* Signal the resource's fence. */
dma_fence_signal(&resource->fence);
dma_buf_lock_fence_free_callbacks(resource);
list_del(&resource->link);
for (i = 0; i < resource->count; i++) {
if (resource->dma_bufs[i])
dma_buf_put(resource->dma_bufs[i]);
}
kfree(resource->dma_bufs);
kfree(resource->list_of_dma_buf_fds);
dma_fence_put(&resource->fence);
}
static int __init dma_buf_lock_init(void)
{
int err;
#if DMA_BUF_LOCK_DEBUG
pr_debug("%s\n", __func__);
#endif
err = alloc_chrdev_region(&dma_buf_lock_dev, 0, 1, dma_buf_lock_dev_name);
if (err == 0) {
cdev_init(&dma_buf_lock_cdev, &dma_buf_lock_fops);
err = cdev_add(&dma_buf_lock_cdev, dma_buf_lock_dev, 1);
if (err == 0) {
dma_buf_lock_class = class_create(THIS_MODULE, dma_buf_lock_dev_name);
if (IS_ERR(dma_buf_lock_class))
err = PTR_ERR(dma_buf_lock_class);
else {
struct device *mdev = device_create(
dma_buf_lock_class, NULL, dma_buf_lock_dev,
NULL, "%s", dma_buf_lock_dev_name);
if (!IS_ERR(mdev))
return 0;
err = PTR_ERR(mdev);
class_destroy(dma_buf_lock_class);
}
cdev_del(&dma_buf_lock_cdev);
}
unregister_chrdev_region(dma_buf_lock_dev, 1);
}
#if DMA_BUF_LOCK_DEBUG
pr_debug("%s failed\n", __func__);
#endif
return err;
}
static void __exit dma_buf_lock_exit(void)
{
#if DMA_BUF_LOCK_DEBUG
pr_debug("%s\n", __func__);
#endif
/* Unlock all outstanding references */
while (1) {
struct dma_buf_lock_resource *resource;
mutex_lock(&dma_buf_lock_mutex);
if (list_empty(&dma_buf_lock_resource_list)) {
mutex_unlock(&dma_buf_lock_mutex);
break;
}
resource = list_entry(dma_buf_lock_resource_list.next,
struct dma_buf_lock_resource, link);
kref_put(&resource->refcount, dma_buf_lock_dounlock);
mutex_unlock(&dma_buf_lock_mutex);
}
device_destroy(dma_buf_lock_class, dma_buf_lock_dev);
class_destroy(dma_buf_lock_class);
cdev_del(&dma_buf_lock_cdev);
unregister_chrdev_region(dma_buf_lock_dev, 1);
}
#if defined(HAVE_UNLOCKED_IOCTL) || defined(HAVE_COMPAT_IOCTL) || ((KERNEL_VERSION(5, 9, 0) <= LINUX_VERSION_CODE))
static long dma_buf_lock_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
#else
static int dma_buf_lock_ioctl(struct inode *inode, struct file *filp, unsigned int cmd, unsigned long arg)
#endif
{
struct dma_buf_lock_k_request request;
int size = _IOC_SIZE(cmd);
if (_IOC_TYPE(cmd) != DMA_BUF_LOCK_IOC_MAGIC)
return -ENOTTY;
if ((_IOC_NR(cmd) < DMA_BUF_LOCK_IOC_MINNR) || (_IOC_NR(cmd) > DMA_BUF_LOCK_IOC_MAXNR))
return -ENOTTY;
switch (cmd) {
case DMA_BUF_LOCK_FUNC_LOCK_ASYNC:
if (size != sizeof(request))
return -ENOTTY;
if (copy_from_user(&request, (void __user *)arg, size))
return -EFAULT;
#if DMA_BUF_LOCK_DEBUG
pr_debug("DMA_BUF_LOCK_FUNC_LOCK_ASYNC - %i\n", request.count);
#endif
return dma_buf_lock_dolock(&request);
}
return -ENOTTY;
}
module_init(dma_buf_lock_init);
module_exit(dma_buf_lock_exit);
MODULE_LICENSE("GPL");
MODULE_INFO(import_ns, "DMA_BUF");

View File

@ -1,7 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
/*
*
* (C) COPYRIGHT 2017, 2020-2021 ARM Limited. All rights reserved.
* (C) COPYRIGHT 2017, 2020-2022 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@ -22,7 +22,7 @@
bob_kernel_module {
name: "dma-buf-test-exporter",
defaults: [
"kernel_defaults"
"kernel_defaults",
],
srcs: [
"Kbuild",

View File

@ -19,7 +19,7 @@
*
*/
#include <linux/dma-buf-test-exporter.h>
#include <uapi/base/arm/dma_buf_test_exporter/dma-buf-test-exporter.h>
#include <linux/dma-buf.h>
#include <linux/miscdevice.h>
#include <linux/slab.h>
@ -32,6 +32,9 @@
#include <linux/highmem.h>
#include <linux/dma-mapping.h>
#define DMA_BUF_TE_VER_MAJOR 1
#define DMA_BUF_TE_VER_MINOR 0
/* Maximum size allowed in a single DMA_BUF_TE_ALLOC call */
#define DMA_BUF_TE_ALLOC_MAX_SIZE ((8ull << 30) >> PAGE_SHIFT) /* 8 GB */

View File

@ -1,7 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
/*
*
* (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved.
* (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@ -22,7 +22,7 @@
bob_kernel_module {
name: "memory_group_manager",
defaults: [
"kernel_defaults"
"kernel_defaults",
],
srcs: [
"Kbuild",

View File

@ -265,8 +265,8 @@ static struct page *example_mgm_alloc_page(
struct mgm_groups *const data = mgm_dev->data;
struct page *p;
dev_dbg(data->dev, "%s(mgm_dev=%p, group_id=%d gfp_mask=0x%x order=%u\n",
__func__, (void *)mgm_dev, group_id, gfp_mask, order);
dev_dbg(data->dev, "%s(mgm_dev=%pK, group_id=%d gfp_mask=0x%x order=%u\n", __func__,
(void *)mgm_dev, group_id, gfp_mask, order);
if (WARN_ON(group_id < 0) ||
WARN_ON(group_id >= MEMORY_GROUP_MANAGER_NR_GROUPS))
@ -291,8 +291,8 @@ static void example_mgm_free_page(
{
struct mgm_groups *const data = mgm_dev->data;
dev_dbg(data->dev, "%s(mgm_dev=%p, group_id=%d page=%p order=%u\n",
__func__, (void *)mgm_dev, group_id, (void *)page, order);
dev_dbg(data->dev, "%s(mgm_dev=%pK, group_id=%d page=%pK order=%u\n", __func__,
(void *)mgm_dev, group_id, (void *)page, order);
if (WARN_ON(group_id < 0) ||
WARN_ON(group_id >= MEMORY_GROUP_MANAGER_NR_GROUPS))
@ -309,9 +309,8 @@ static int example_mgm_get_import_memory_id(
{
struct mgm_groups *const data = mgm_dev->data;
dev_dbg(data->dev, "%s(mgm_dev=%p, import_data=%p (type=%d)\n",
__func__, (void *)mgm_dev, (void *)import_data,
(int)import_data->type);
dev_dbg(data->dev, "%s(mgm_dev=%pK, import_data=%pK (type=%d)\n", __func__, (void *)mgm_dev,
(void *)import_data, (int)import_data->type);
if (!WARN_ON(!import_data)) {
WARN_ON(!import_data->u.dma_buf);
@ -329,9 +328,8 @@ static u64 example_mgm_update_gpu_pte(
{
struct mgm_groups *const data = mgm_dev->data;
dev_dbg(data->dev,
"%s(mgm_dev=%p, group_id=%d, mmu_level=%d, pte=0x%llx)\n",
__func__, (void *)mgm_dev, group_id, mmu_level, pte);
dev_dbg(data->dev, "%s(mgm_dev=%pK, group_id=%d, mmu_level=%d, pte=0x%llx)\n", __func__,
(void *)mgm_dev, group_id, mmu_level, pte);
if (WARN_ON(group_id < 0) ||
WARN_ON(group_id >= MEMORY_GROUP_MANAGER_NR_GROUPS))
@ -367,9 +365,9 @@ static vm_fault_t example_mgm_vmf_insert_pfn_prot(
vm_fault_t fault;
dev_dbg(data->dev,
"%s(mgm_dev=%p, group_id=%d, vma=%p, addr=0x%lx, pfn=0x%lx, prot=0x%llx)\n",
"%s(mgm_dev=%pK, group_id=%d, vma=%pK, addr=0x%lx, pfn=0x%lx, prot=0x%llx)\n",
__func__, (void *)mgm_dev, group_id, (void *)vma, addr, pfn,
(unsigned long long) pgprot_val(prot));
(unsigned long long)pgprot_val(prot));
if (WARN_ON(group_id < 0) ||
WARN_ON(group_id >= MEMORY_GROUP_MANAGER_NR_GROUPS))

View File

@ -1,7 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
/*
*
* (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved.
* (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@ -22,7 +22,7 @@
bob_kernel_module {
name: "protected_memory_allocator",
defaults: [
"kernel_defaults"
"kernel_defaults",
],
srcs: [
"Kbuild",

View File

@ -1,6 +1,6 @@
# SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
#
# (C) COPYRIGHT 2012-2021 ARM Limited. All rights reserved.
# (C) COPYRIGHT 2012-2022 ARM Limited. All rights reserved.
#
# This program is free software and is provided to you under the terms of the
# GNU General Public License version 2 as published by the Free Software
@ -59,10 +59,8 @@ ifeq ($(CONFIG_MALI_PRFCNT_SET_SELECT_VIA_DEBUG_FS), y)
endif
ifeq ($(CONFIG_MALI_BIFROST_FENCE_DEBUG), y)
ifneq ($(CONFIG_SYNC), y)
ifneq ($(CONFIG_SYNC_FILE), y)
$(error CONFIG_MALI_BIFROST_FENCE_DEBUG depends on CONFIG_SYNC || CONFIG_SYNC_FILE to be set in Kernel configuration)
endif
ifneq ($(CONFIG_SYNC_FILE), y)
$(error CONFIG_MALI_BIFROST_FENCE_DEBUG depends on CONFIG_SYNC_FILE to be set in Kernel configuration)
endif
endif
@ -71,7 +69,7 @@ endif
#
# Driver version string which is returned to userspace via an ioctl
MALI_RELEASE_NAME ?= '"g13p0-01eac0"'
MALI_RELEASE_NAME ?= '"g15p0-01eac0"'
# Set up defaults if not defined by build system
ifeq ($(CONFIG_MALI_BIFROST_DEBUG), y)
MALI_UNIT_TEST = 1
@ -151,6 +149,7 @@ bifrost_kbase-y := \
mali_kbase_cache_policy.o \
mali_kbase_ccswe.o \
mali_kbase_mem.o \
mali_kbase_mem_migrate.o \
mali_kbase_mem_pool_group.o \
mali_kbase_native_mgm.o \
mali_kbase_ctx_sched.o \
@ -159,12 +158,6 @@ bifrost_kbase-y := \
mali_kbase_config.o \
mali_kbase_kinstr_prfcnt.o \
mali_kbase_vinstr.o \
mali_kbase_hwcnt.o \
mali_kbase_hwcnt_gpu.o \
mali_kbase_hwcnt_gpu_narrow.o \
mali_kbase_hwcnt_types.o \
mali_kbase_hwcnt_virtualizer.o \
mali_kbase_hwcnt_watchdog_if_timer.o \
mali_kbase_softjobs.o \
mali_kbase_hw.o \
mali_kbase_debug.o \
@ -175,6 +168,7 @@ bifrost_kbase-y := \
mali_kbase_disjoint_events.o \
mali_kbase_debug_mem_view.o \
mali_kbase_debug_mem_zones.o \
mali_kbase_debug_mem_allocs.o \
mali_kbase_smc.o \
mali_kbase_mem_pool.o \
mali_kbase_mem_pool_debugfs.o \
@ -191,24 +185,14 @@ bifrost_kbase-$(CONFIG_DEBUG_FS) += mali_kbase_pbha_debugfs.o
bifrost_kbase-$(CONFIG_MALI_CINSTR_GWT) += mali_kbase_gwt.o
bifrost_kbase-$(CONFIG_SYNC) += \
mali_kbase_sync_android.o \
mali_kbase_sync_common.o
bifrost_kbase-$(CONFIG_SYNC_FILE) += \
mali_kbase_fence_ops.o \
mali_kbase_sync_file.o \
mali_kbase_sync_common.o
ifeq ($(CONFIG_MALI_CSF_SUPPORT),y)
bifrost_kbase-y += \
mali_kbase_hwcnt_backend_csf.o \
mali_kbase_hwcnt_backend_csf_if_fw.o
else
ifneq ($(CONFIG_MALI_CSF_SUPPORT),y)
bifrost_kbase-y += \
mali_kbase_jm.o \
mali_kbase_hwcnt_backend_jm.o \
mali_kbase_hwcnt_backend_jm_watchdog.o \
mali_kbase_dummy_job_wa.o \
mali_kbase_debug_job_fault.o \
mali_kbase_event.o \
@ -218,11 +202,6 @@ else
mali_kbase_js_ctx_attr.o \
mali_kbase_kinstr_jm.o
bifrost_kbase-$(CONFIG_MALI_BIFROST_DMA_FENCE) += \
mali_kbase_fence_ops.o \
mali_kbase_dma_fence.o \
mali_kbase_fence.o
bifrost_kbase-$(CONFIG_SYNC_FILE) += \
mali_kbase_fence_ops.o \
mali_kbase_fence.o
@ -236,6 +215,7 @@ INCLUDE_SUBDIR = \
$(src)/backend/gpu/Kbuild \
$(src)/mmu/Kbuild \
$(src)/tl/Kbuild \
$(src)/hwcnt/Kbuild \
$(src)/gpu/Kbuild \
$(src)/thirdparty/Kbuild \
$(src)/platform/$(MALI_PLATFORM_DIR)/Kbuild

View File

@ -1,6 +1,6 @@
# SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
#
# (C) COPYRIGHT 2012-2021 ARM Limited. All rights reserved.
# (C) COPYRIGHT 2012-2022 ARM Limited. All rights reserved.
#
# This program is free software and is provided to you under the terms of the
# GNU General Public License version 2 as published by the Free Software
@ -91,16 +91,6 @@ config MALI_BIFROST_ENABLE_TRACE
Enables tracing in kbase. Trace log available through
the "mali_trace" debugfs file, when the CONFIG_DEBUG_FS is enabled
config MALI_BIFROST_DMA_FENCE
bool "Enable DMA_BUF fence support for Mali"
depends on MALI_BIFROST
default n
help
Support DMA_BUF fences for Mali.
This option should only be enabled if the Linux Kernel has built in
support for DMA_BUF fences.
config MALI_ARBITER_SUPPORT
bool "Enable arbiter support for Mali"
depends on MALI_BIFROST && !MALI_CSF_SUPPORT
@ -117,7 +107,7 @@ config MALI_DMA_BUF_MAP_ON_DEMAND
depends on MALI_BIFROST
default n
help
This option caused kbase to set up the GPU mapping of imported
This option will cause kbase to set up the GPU mapping of imported
dma-buf when needed to run atoms. This is the legacy behavior.
This is intended for testing and the option will get removed in the
@ -237,7 +227,7 @@ config MALI_BIFROST_DEBUG
config MALI_BIFROST_FENCE_DEBUG
bool "Enable debug sync fence usage"
depends on MALI_BIFROST && MALI_BIFROST_EXPERT && (SYNC || SYNC_FILE)
depends on MALI_BIFROST && MALI_BIFROST_EXPERT && SYNC_FILE
default y if MALI_BIFROST_DEBUG
help
Select this option to enable additional checking and reporting on the
@ -385,9 +375,6 @@ config MALI_ARBITRATION
virtualization setup for Mali
If unsure, say N.
if MALI_ARBITRATION
source "drivers/gpu/arm/bifrost/arbitration/Kconfig"
endif
# source "drivers/gpu/arm/bifrost/tests/Kconfig"

View File

@ -65,7 +65,7 @@ ifeq ($(CONFIG_MALI_BIFROST),m)
endif
ifeq ($(CONFIG_XEN),y)
ifneq ($(CONFIG_MALI_ARBITRATION), n)
ifneq ($(CONFIG_MALI_ARBITER_SUPPORT), n)
CONFIG_MALI_XEN ?= m
endif
endif
@ -91,14 +91,10 @@ ifeq ($(CONFIG_MALI_BIFROST),m)
CONFIG_MALI_BIFROST_ENABLE_TRACE ?= y
CONFIG_MALI_BIFROST_SYSTEM_TRACE ?= y
ifeq ($(CONFIG_SYNC), y)
ifeq ($(CONFIG_SYNC_FILE), y)
CONFIG_MALI_BIFROST_FENCE_DEBUG ?= y
else
ifeq ($(CONFIG_SYNC_FILE), y)
CONFIG_MALI_BIFROST_FENCE_DEBUG ?= y
else
CONFIG_MALI_BIFROST_FENCE_DEBUG = n
endif
CONFIG_MALI_BIFROST_FENCE_DEBUG = n
endif
else
# Prevent misuse when CONFIG_MALI_BIFROST_DEBUG=n
@ -160,7 +156,6 @@ CONFIGS := \
CONFIG_MALI_BIFROST \
CONFIG_MALI_CSF_SUPPORT \
CONFIG_MALI_BIFROST_GATOR_SUPPORT \
CONFIG_MALI_BIFROST_DMA_FENCE \
CONFIG_MALI_ARBITER_SUPPORT \
CONFIG_MALI_ARBITRATION \
CONFIG_MALI_ARBITER_MODULES \
@ -227,26 +222,47 @@ EXTRA_CFLAGS += -DCONFIG_MALI_PLATFORM_NAME=$(CONFIG_MALI_PLATFORM_NAME)
# KBUILD_EXTRA_SYMBOLS to prevent warnings about unknown functions
#
# The following were added to align with W=1 in scripts/Makefile.extrawarn
# from the Linux source tree
KBUILD_CFLAGS += -Wall -Werror
# The following were added to align with W=1 in scripts/Makefile.extrawarn
# from the Linux source tree (v5.18.14)
KBUILD_CFLAGS += -Wextra -Wunused -Wno-unused-parameter
KBUILD_CFLAGS += -Wmissing-declarations
KBUILD_CFLAGS += -Wmissing-format-attribute
KBUILD_CFLAGS += -Wmissing-prototypes
KBUILD_CFLAGS += -Wold-style-definition
KBUILD_CFLAGS += -Wmissing-include-dirs
# The -Wmissing-include-dirs cannot be enabled as the path to some of the
# included directories change depending on whether it is an in-tree or
# out-of-tree build.
KBUILD_CFLAGS += $(call cc-option, -Wunused-but-set-variable)
KBUILD_CFLAGS += $(call cc-option, -Wunused-const-variable)
KBUILD_CFLAGS += $(call cc-option, -Wpacked-not-aligned)
KBUILD_CFLAGS += $(call cc-option, -Wstringop-truncation)
# The following turn off the warnings enabled by -Wextra
KBUILD_CFLAGS += -Wno-missing-field-initializers
KBUILD_CFLAGS += -Wno-sign-compare
KBUILD_CFLAGS += -Wno-type-limits
KBUILD_CFLAGS += -Wno-shift-negative-value
# This flag is needed to avoid build errors on older kernels
KBUILD_CFLAGS += $(call cc-option, -Wno-cast-function-type)
KBUILD_CPPFLAGS += -DKBUILD_EXTRA_WARN1
# The following were added to align with W=2 in scripts/Makefile.extrawarn
# from the Linux source tree (v5.18.14)
KBUILD_CFLAGS += -Wdisabled-optimization
# The -Wshadow flag cannot be enabled unless upstream kernels are
# patched to fix redefinitions of certain built-in functions and
# global variables.
KBUILD_CFLAGS += $(call cc-option, -Wlogical-op)
KBUILD_CFLAGS += -Wmissing-field-initializers
KBUILD_CFLAGS += -Wtype-limits
KBUILD_CFLAGS += $(call cc-option, -Wmaybe-uninitialized)
KBUILD_CFLAGS += $(call cc-option, -Wunused-macros)
KBUILD_CPPFLAGS += -DKBUILD_EXTRA_WARN2
# This warning is disabled to avoid build failures in some kernel versions
KBUILD_CFLAGS += -Wno-ignored-qualifiers
all:
$(MAKE) -C $(KDIR) M=$(CURDIR) $(MAKE_ARGS) EXTRA_CFLAGS="$(EXTRA_CFLAGS)" KBUILD_EXTRA_SYMBOLS="$(EXTRA_SYMBOLS)" modules

View File

@ -97,16 +97,6 @@ config MALI_BIFROST_ENABLE_TRACE
Enables tracing in kbase. Trace log available through
the "mali_trace" debugfs file, when the CONFIG_DEBUG_FS is enabled
config MALI_BIFROST_DMA_FENCE
bool "Enable DMA_BUF fence support for Mali"
depends on MALI_BIFROST
default n
help
Support DMA_BUF fences for Mali.
This option should only be enabled if the Linux Kernel has built in
support for DMA_BUF fences.
config MALI_ARBITER_SUPPORT
bool "Enable arbiter support for Mali"
depends on MALI_BIFROST && !MALI_CSF_SUPPORT
@ -129,7 +119,7 @@ config MALI_DMA_BUF_MAP_ON_DEMAND
default n
default y if !DMA_BUF_SYNC_IOCTL_SUPPORTED
help
This option caused kbase to set up the GPU mapping of imported
This option will cause kbase to set up the GPU mapping of imported
dma-buf when needed to run atoms. This is the legacy behavior.
This is intended for testing and the option will get removed in the
@ -157,17 +147,6 @@ menuconfig MALI_BIFROST_EXPERT
Enabling this option and modifying the default settings may produce
a driver with performance or other limitations.
config MALI_2MB_ALLOC
bool "Attempt to allocate 2MB pages"
depends on MALI_BIFROST && MALI_BIFROST_EXPERT
default n
help
Rather than allocating all GPU memory page-by-page, attempt to
allocate 2MB pages from the kernel. This reduces TLB pressure and
helps to prevent memory fragmentation.
If in doubt, say N
config MALI_MEMORY_FULLY_BACKED
bool "Enable memory fully physically-backed"
depends on MALI_BIFROST && MALI_BIFROST_EXPERT
@ -200,10 +179,10 @@ config MALI_FW_CORE_DUMP
Example:
* To explicitly request core dump:
echo 1 >/sys/kernel/debug/mali0/fw_core_dump
echo 1 >/sys/kernel/debug/mali0/fw_core_dump
* To output current core dump (after explicitly requesting a core dump,
or kernel driver reported an internal firmware error):
cat /sys/kernel/debug/mali0/fw_core_dump
or kernel driver reported an internal firmware error):
cat /sys/kernel/debug/mali0/fw_core_dump
choice
prompt "Error injection level"
@ -343,5 +322,5 @@ config MALI_HW_ERRATA_1485982_USE_CLOCK_ALTERNATIVE
slowest clock will be selected.
source "kernel/drivers/gpu/arm/midgard/arbitration/Mconfig"
source "kernel/drivers/gpu/arm/arbitration/Mconfig"
source "kernel/drivers/gpu/arm/midgard/tests/Mconfig"

View File

@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
/*
*
* (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved.
* (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@ -28,12 +28,12 @@
#include <tl/mali_kbase_tracepoints.h>
#include <linux/of.h>
#include <linux/of_platform.h>
#include "mali_kbase_arbiter_interface.h"
#include "linux/mali_arbiter_interface.h"
/* Arbiter interface version against which was implemented this module */
#define MALI_REQUIRED_KBASE_ARBITER_INTERFACE_VERSION 5
#if MALI_REQUIRED_KBASE_ARBITER_INTERFACE_VERSION != \
MALI_KBASE_ARBITER_INTERFACE_VERSION
MALI_ARBITER_INTERFACE_VERSION
#error "Unsupported Mali Arbiter interface version."
#endif
@ -205,6 +205,7 @@ int kbase_arbif_init(struct kbase_device *kbdev)
if (!pdev->dev.driver || !try_module_get(pdev->dev.driver->owner)) {
dev_err(kbdev->dev, "arbiter_if driver not available\n");
put_device(&pdev->dev);
return -EPROBE_DEFER;
}
kbdev->arb.arb_dev = &pdev->dev;
@ -212,6 +213,7 @@ int kbase_arbif_init(struct kbase_device *kbdev)
if (!arb_if) {
dev_err(kbdev->dev, "arbiter_if driver not ready\n");
module_put(pdev->dev.driver->owner);
put_device(&pdev->dev);
return -EPROBE_DEFER;
}
@ -233,6 +235,7 @@ int kbase_arbif_init(struct kbase_device *kbdev)
if (err) {
dev_err(&pdev->dev, "Failed to register with arbiter\n");
module_put(pdev->dev.driver->owner);
put_device(&pdev->dev);
if (err != -EPROBE_DEFER)
err = -EFAULT;
return err;
@ -262,8 +265,10 @@ void kbase_arbif_destroy(struct kbase_device *kbdev)
arb_if->vm_ops.vm_arb_unregister_dev(kbdev->arb.arb_if);
}
kbdev->arb.arb_if = NULL;
if (kbdev->arb.arb_dev)
if (kbdev->arb.arb_dev) {
module_put(kbdev->arb.arb_dev->driver->owner);
put_device(kbdev->arb.arb_dev);
}
kbdev->arb.arb_dev = NULL;
}

View File

@ -1,49 +0,0 @@
# SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note OR MIT
#
# (C) COPYRIGHT 2012-2021 ARM Limited. All rights reserved.
#
# This program is free software and is provided to you under the terms of the
# GNU General Public License version 2 as published by the Free Software
# Foundation, and any use by you of this program is subject to the terms
# of such GNU license.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, you can access it online at
# http://www.gnu.org/licenses/gpl-2.0.html.
#
#
config MALI_XEN
tristate "Enable Xen Interface reference code"
depends on MALI_ARBITRATION && XEN
default n
help
Enables the build of xen interface modules used in the reference
virtualization setup for Mali
If unsure, say N.
config MALI_ARBITER_MODULES
tristate "Enable mali arbiter modules"
depends on MALI_ARBITRATION
default y
help
Enables the build of the arbiter modules used in the reference
virtualization setup for Mali
If unsure, say N
config MALI_GPU_POWER_MODULES
tristate "Enable gpu power modules"
depends on MALI_ARBITRATION
default y
help
Enables the build of the gpu power modules used in the reference
virtualization setup for Mali
If unsure, say N
source "drivers/gpu/arm/bifrost/arbitration/ptm/Kconfig"

View File

@ -1,28 +0,0 @@
# SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note OR MIT
#
# (C) COPYRIGHT 2021 ARM Limited. All rights reserved.
#
# This program is free software and is provided to you under the terms of the
# GNU General Public License version 2 as published by the Free Software
# Foundation, and any use by you of this program is subject to the terms
# of such GNU license.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, you can access it online at
# http://www.gnu.org/licenses/gpl-2.0.html.
#
#
config MALI_PARTITION_MANAGER
tristate "Enable compilation of partition manager modules"
depends on MALI_ARBITRATION
default n
help
This option enables the compilation of the partition manager
modules used to configure the Mali-G78AE GPU.

View File

@ -1,6 +1,6 @@
# SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
#
# (C) COPYRIGHT 2014-2021 ARM Limited. All rights reserved.
# (C) COPYRIGHT 2014-2022 ARM Limited. All rights reserved.
#
# This program is free software and is provided to you under the terms of the
# GNU General Public License version 2 as published by the Free Software

View File

@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
/*
*
* (C) COPYRIGHT 2014-2016, 2018, 2020-2021 ARM Limited. All rights reserved.
* (C) COPYRIGHT 2014-2016, 2018, 2020-2022 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@ -22,12 +22,32 @@
#include "backend/gpu/mali_kbase_cache_policy_backend.h"
#include <device/mali_kbase_device.h>
/**
* kbasep_amba_register_present() - Check AMBA_<> register is present
* in the GPU.
* @kbdev: Device pointer
*
* Note: Only for arch version 12.x.1 onwards.
*
* Return: true if AMBA_FEATURES/ENABLE registers are present.
*/
static bool kbasep_amba_register_present(struct kbase_device *kbdev)
{
return (ARCH_MAJOR_REV_REG(kbdev->gpu_props.props.raw_props.gpu_id) >=
GPU_ID2_ARCH_MAJOR_REV_MAKE(12, 1));
}
void kbase_cache_set_coherency_mode(struct kbase_device *kbdev,
u32 mode)
{
kbdev->current_gpu_coherency_mode = mode;
if (kbasep_amba_register_present(kbdev)) {
u32 val = kbase_reg_read(kbdev, AMBA_ENABLE);
val = AMBA_ENABLE_COHERENCY_PROTOCOL_SET(val, mode);
kbase_reg_write(kbdev, AMBA_ENABLE, val);
} else
kbase_reg_write(kbdev, COHERENCY_ENABLE, mode);
}
@ -35,9 +55,38 @@ u32 kbase_cache_get_coherency_features(struct kbase_device *kbdev)
{
u32 coherency_features;
if (kbasep_amba_register_present(kbdev))
coherency_features =
kbase_reg_read(kbdev, GPU_CONTROL_REG(AMBA_FEATURES));
else
coherency_features = kbase_reg_read(
kbdev, GPU_CONTROL_REG(COHERENCY_FEATURES));
return coherency_features;
}
void kbase_amba_set_memory_cache_support(struct kbase_device *kbdev,
bool enable)
{
if (kbasep_amba_register_present(kbdev)) {
u32 val = kbase_reg_read(kbdev, AMBA_ENABLE);
val = AMBA_ENABLE_MEMORY_CACHE_SUPPORT_SET(val, enable);
kbase_reg_write(kbdev, AMBA_ENABLE, val);
} else {
WARN(1, "memory_cache_support not supported");
}
}
void kbase_amba_set_invalidate_hint(struct kbase_device *kbdev, bool enable)
{
if (kbasep_amba_register_present(kbdev)) {
u32 val = kbase_reg_read(kbdev, AMBA_ENABLE);
val = AMBA_ENABLE_INVALIDATE_HINT_SET(val, enable);
kbase_reg_write(kbdev, AMBA_ENABLE, val);
} else {
WARN(1, "invalidate_hint not supported");
}
}

View File

@ -1,7 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
/*
*
* (C) COPYRIGHT 2014-2016, 2020-2021 ARM Limited. All rights reserved.
* (C) COPYRIGHT 2014-2016, 2020-2022 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@ -43,4 +43,23 @@ void kbase_cache_set_coherency_mode(struct kbase_device *kbdev,
*/
u32 kbase_cache_get_coherency_features(struct kbase_device *kbdev);
/**
* kbase_amba_set_memory_cache_support() - Sets AMBA memory cache support
* in the GPU.
* @kbdev: Device pointer
* @enable: true for enable.
*
* Note: Only for arch version 12.x.1 onwards.
*/
void kbase_amba_set_memory_cache_support(struct kbase_device *kbdev,
bool enable);
/**
* kbase_amba_set_invalidate_hint() - Sets AMBA invalidate hint
* in the GPU.
* @kbdev: Device pointer
* @enable: true for enable.
*
* Note: Only for arch version 12.x.1 onwards.
*/
void kbase_amba_set_invalidate_hint(struct kbase_device *kbdev, bool enable);
#endif /* _KBASE_CACHE_POLICY_BACKEND_H_ */

View File

@ -1,7 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
/*
*
* (C) COPYRIGHT 2014, 2016, 2018-2021 ARM Limited. All rights reserved.
* (C) COPYRIGHT 2014, 2016, 2018-2022 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@ -26,7 +26,7 @@
#ifndef _KBASE_INSTR_DEFS_H_
#define _KBASE_INSTR_DEFS_H_
#include <mali_kbase_hwcnt_gpu.h>
#include <hwcnt/mali_kbase_hwcnt_gpu.h>
/*
* Instrumentation State Machine States

View File

@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
/*
*
* (C) COPYRIGHT 2014-2016, 2018-2021 ARM Limited. All rights reserved.
* (C) COPYRIGHT 2014-2016, 2018-2022 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@ -163,7 +163,6 @@ static irq_handler_t kbase_handler_table[] = {
#ifdef CONFIG_MALI_BIFROST_DEBUG
#define JOB_IRQ_HANDLER JOB_IRQ_TAG
#define MMU_IRQ_HANDLER MMU_IRQ_TAG
#define GPU_IRQ_HANDLER GPU_IRQ_TAG
/**

View File

@ -34,7 +34,7 @@
#include <mali_kbase_ctx_sched.h>
#include <mali_kbase_kinstr_jm.h>
#include <mali_kbase_hwaccess_instr.h>
#include <mali_kbase_hwcnt_context.h>
#include <hwcnt/mali_kbase_hwcnt_context.h>
#include <device/mali_kbase_device.h>
#include <backend/gpu/mali_kbase_irq_internal.h>
#include <backend/gpu/mali_kbase_jm_internal.h>
@ -1440,6 +1440,11 @@ bool kbase_reset_gpu_is_active(struct kbase_device *kbdev)
return true;
}
bool kbase_reset_gpu_is_not_pending(struct kbase_device *kbdev)
{
return atomic_read(&kbdev->hwaccess.backend.reset_gpu) == KBASE_RESET_GPU_NOT_PENDING;
}
int kbase_reset_gpu_wait(struct kbase_device *kbdev)
{
wait_event(kbdev->hwaccess.backend.reset_wait,

View File

@ -29,7 +29,7 @@
#include <mali_kbase_jm.h>
#include <mali_kbase_js.h>
#include <tl/mali_kbase_tracepoints.h>
#include <mali_kbase_hwcnt_context.h>
#include <hwcnt/mali_kbase_hwcnt_context.h>
#include <mali_kbase_reset_gpu.h>
#include <mali_kbase_kinstr_jm.h>
#include <backend/gpu/mali_kbase_cache_policy_backend.h>

View File

@ -80,31 +80,360 @@ static bool ipa_control_timer_enabled;
#endif
#define LO_MASK(M) ((M) & 0xFFFFFFFF)
#if !MALI_USE_CSF
#define HI_MASK(M) ((M) & 0xFFFFFFFF00000000)
#endif
static u32 get_implementation_register(u32 reg)
{
switch (reg) {
case GPU_CONTROL_REG(SHADER_PRESENT_LO):
return LO_MASK(DUMMY_IMPLEMENTATION_SHADER_PRESENT);
case GPU_CONTROL_REG(TILER_PRESENT_LO):
return LO_MASK(DUMMY_IMPLEMENTATION_TILER_PRESENT);
case GPU_CONTROL_REG(L2_PRESENT_LO):
return LO_MASK(DUMMY_IMPLEMENTATION_L2_PRESENT);
case GPU_CONTROL_REG(STACK_PRESENT_LO):
return LO_MASK(DUMMY_IMPLEMENTATION_STACK_PRESENT);
/* Construct a value for the THREAD_FEATURES register, *except* the two most
* significant bits, which are set to IMPLEMENTATION_MODEL in
* midgard_model_read_reg().
*/
#if MALI_USE_CSF
#define THREAD_FEATURES_PARTIAL(MAX_REGISTERS, MAX_TASK_QUEUE, MAX_TG_SPLIT) \
((MAX_REGISTERS) | ((MAX_TASK_QUEUE) << 24))
#else
#define THREAD_FEATURES_PARTIAL(MAX_REGISTERS, MAX_TASK_QUEUE, MAX_TG_SPLIT) \
((MAX_REGISTERS) | ((MAX_TASK_QUEUE) << 16) | ((MAX_TG_SPLIT) << 24))
#endif
case GPU_CONTROL_REG(SHADER_PRESENT_HI):
case GPU_CONTROL_REG(TILER_PRESENT_HI):
case GPU_CONTROL_REG(L2_PRESENT_HI):
case GPU_CONTROL_REG(STACK_PRESENT_HI):
/* *** FALLTHROUGH *** */
default:
return 0;
}
}
struct error_status_t hw_error_status;
struct {
/**
* struct control_reg_values_t - control register values specific to the GPU being 'emulated'
* @name: GPU name
* @gpu_id: GPU ID to report
* @as_present: Bitmap of address spaces present
* @thread_max_threads: Maximum number of threads per core
* @thread_max_workgroup_size: Maximum number of threads per workgroup
* @thread_max_barrier_size: Maximum number of threads per barrier
* @thread_features: Thread features, NOT INCLUDING the 2
* most-significant bits, which are always set to
* IMPLEMENTATION_MODEL.
* @core_features: Core features
* @tiler_features: Tiler features
* @mmu_features: MMU features
* @gpu_features_lo: GPU features (low)
* @gpu_features_hi: GPU features (high)
* @shader_present: Available shader bitmap
* @stack_present: Core stack present bitmap
*
*/
struct control_reg_values_t {
const char *name;
u32 gpu_id;
u32 as_present;
u32 thread_max_threads;
u32 thread_max_workgroup_size;
u32 thread_max_barrier_size;
u32 thread_features;
u32 core_features;
u32 tiler_features;
u32 mmu_features;
u32 gpu_features_lo;
u32 gpu_features_hi;
u32 shader_present;
u32 stack_present;
};
struct job_slot {
int job_active;
int job_queued;
int job_complete_irq_asserted;
int job_irq_mask;
int job_disabled;
};
struct dummy_model_t {
int reset_completed;
int reset_completed_mask;
#if !MALI_USE_CSF
int prfcnt_sample_completed;
#endif /* !MALI_USE_CSF */
int power_changed_mask; /* 2bits: _ALL,_SINGLE */
int power_changed; /* 1bit */
bool clean_caches_completed;
bool clean_caches_completed_irq_enabled;
#if MALI_USE_CSF
bool flush_pa_range_completed;
bool flush_pa_range_completed_irq_enabled;
#endif
int power_on; /* 6bits: SHADER[4],TILER,L2 */
u32 stack_power_on_lo;
u32 coherency_enable;
unsigned int job_irq_js_state;
struct job_slot slots[NUM_SLOTS];
const struct control_reg_values_t *control_reg_values;
u32 l2_config;
void *data;
};
/* Array associating GPU names with control register values. The first
* one is used in the case of no match.
*/
static const struct control_reg_values_t all_control_reg_values[] = {
{
.name = "tMIx",
.gpu_id = GPU_ID2_MAKE(6, 0, 10, 0, 0, 1, 0),
.as_present = 0xFF,
.thread_max_threads = 0x180,
.thread_max_workgroup_size = 0x180,
.thread_max_barrier_size = 0x180,
.thread_features = THREAD_FEATURES_PARTIAL(0x6000, 4, 10),
.tiler_features = 0x809,
.mmu_features = 0x2830,
.gpu_features_lo = 0,
.gpu_features_hi = 0,
.shader_present = DUMMY_IMPLEMENTATION_SHADER_PRESENT,
.stack_present = DUMMY_IMPLEMENTATION_STACK_PRESENT,
},
{
.name = "tHEx",
.gpu_id = GPU_ID2_MAKE(6, 2, 0, 1, 0, 3, 0),
.as_present = 0xFF,
.thread_max_threads = 0x180,
.thread_max_workgroup_size = 0x180,
.thread_max_barrier_size = 0x180,
.thread_features = THREAD_FEATURES_PARTIAL(0x6000, 4, 10),
.tiler_features = 0x809,
.mmu_features = 0x2830,
.gpu_features_lo = 0,
.gpu_features_hi = 0,
.shader_present = DUMMY_IMPLEMENTATION_SHADER_PRESENT,
.stack_present = DUMMY_IMPLEMENTATION_STACK_PRESENT,
},
{
.name = "tSIx",
.gpu_id = GPU_ID2_MAKE(7, 0, 0, 0, 1, 1, 0),
.as_present = 0xFF,
.thread_max_threads = 0x300,
.thread_max_workgroup_size = 0x180,
.thread_max_barrier_size = 0x180,
.thread_features = THREAD_FEATURES_PARTIAL(0x6000, 4, 10),
.tiler_features = 0x209,
.mmu_features = 0x2821,
.gpu_features_lo = 0,
.gpu_features_hi = 0,
.shader_present = DUMMY_IMPLEMENTATION_SHADER_PRESENT,
.stack_present = DUMMY_IMPLEMENTATION_STACK_PRESENT,
},
{
.name = "tDVx",
.gpu_id = GPU_ID2_MAKE(7, 0, 0, 3, 0, 0, 0),
.as_present = 0xFF,
.thread_max_threads = 0x300,
.thread_max_workgroup_size = 0x180,
.thread_max_barrier_size = 0x180,
.thread_features = THREAD_FEATURES_PARTIAL(0x6000, 4, 10),
.tiler_features = 0x209,
.mmu_features = 0x2821,
.gpu_features_lo = 0,
.gpu_features_hi = 0,
.shader_present = DUMMY_IMPLEMENTATION_SHADER_PRESENT,
.stack_present = DUMMY_IMPLEMENTATION_STACK_PRESENT,
},
{
.name = "tNOx",
.gpu_id = GPU_ID2_MAKE(7, 2, 1, 1, 0, 0, 0),
.as_present = 0xFF,
.thread_max_threads = 0x180,
.thread_max_workgroup_size = 0x180,
.thread_max_barrier_size = 0x180,
.thread_features = THREAD_FEATURES_PARTIAL(0x6000, 4, 10),
.tiler_features = 0x809,
.mmu_features = 0x2830,
.gpu_features_lo = 0,
.gpu_features_hi = 0,
.shader_present = DUMMY_IMPLEMENTATION_SHADER_PRESENT,
.stack_present = DUMMY_IMPLEMENTATION_STACK_PRESENT,
},
{
.name = "tGOx_r0p0",
.gpu_id = GPU_ID2_MAKE(7, 2, 2, 2, 0, 0, 0),
.as_present = 0xFF,
.thread_max_threads = 0x180,
.thread_max_workgroup_size = 0x180,
.thread_max_barrier_size = 0x180,
.thread_features = THREAD_FEATURES_PARTIAL(0x6000, 4, 10),
.tiler_features = 0x809,
.mmu_features = 0x2830,
.gpu_features_lo = 0,
.gpu_features_hi = 0,
.shader_present = DUMMY_IMPLEMENTATION_SHADER_PRESENT,
.stack_present = DUMMY_IMPLEMENTATION_STACK_PRESENT,
},
{
.name = "tGOx_r1p0",
.gpu_id = GPU_ID2_MAKE(7, 4, 0, 2, 1, 0, 0),
.as_present = 0xFF,
.thread_max_threads = 0x180,
.thread_max_workgroup_size = 0x180,
.thread_max_barrier_size = 0x180,
.thread_features = THREAD_FEATURES_PARTIAL(0x6000, 4, 10),
.core_features = 0x2,
.tiler_features = 0x209,
.mmu_features = 0x2823,
.gpu_features_lo = 0,
.gpu_features_hi = 0,
.shader_present = DUMMY_IMPLEMENTATION_SHADER_PRESENT,
.stack_present = DUMMY_IMPLEMENTATION_STACK_PRESENT,
},
{
.name = "tTRx",
.gpu_id = GPU_ID2_MAKE(9, 0, 8, 0, 0, 0, 0),
.as_present = 0xFF,
.thread_max_threads = 0x180,
.thread_max_workgroup_size = 0x180,
.thread_max_barrier_size = 0x180,
.thread_features = THREAD_FEATURES_PARTIAL(0x6000, 4, 0),
.tiler_features = 0x809,
.mmu_features = 0x2830,
.gpu_features_lo = 0,
.gpu_features_hi = 0,
.shader_present = DUMMY_IMPLEMENTATION_SHADER_PRESENT,
.stack_present = DUMMY_IMPLEMENTATION_STACK_PRESENT,
},
{
.name = "tNAx",
.gpu_id = GPU_ID2_MAKE(9, 0, 8, 1, 0, 0, 0),
.as_present = 0xFF,
.thread_max_threads = 0x180,
.thread_max_workgroup_size = 0x180,
.thread_max_barrier_size = 0x180,
.thread_features = THREAD_FEATURES_PARTIAL(0x6000, 4, 0),
.tiler_features = 0x809,
.mmu_features = 0x2830,
.gpu_features_lo = 0,
.gpu_features_hi = 0,
.shader_present = DUMMY_IMPLEMENTATION_SHADER_PRESENT,
.stack_present = DUMMY_IMPLEMENTATION_STACK_PRESENT,
},
{
.name = "tBEx",
.gpu_id = GPU_ID2_MAKE(9, 2, 0, 2, 0, 0, 0),
.as_present = 0xFF,
.thread_max_threads = 0x180,
.thread_max_workgroup_size = 0x180,
.thread_max_barrier_size = 0x180,
.thread_features = THREAD_FEATURES_PARTIAL(0x6000, 4, 0),
.tiler_features = 0x809,
.mmu_features = 0x2830,
.gpu_features_lo = 0,
.gpu_features_hi = 0,
.shader_present = DUMMY_IMPLEMENTATION_SHADER_PRESENT,
.stack_present = DUMMY_IMPLEMENTATION_STACK_PRESENT,
},
{
.name = "tBAx",
.gpu_id = GPU_ID2_MAKE(9, 14, 4, 5, 0, 0, 0),
.as_present = 0xFF,
.thread_max_threads = 0x180,
.thread_max_workgroup_size = 0x180,
.thread_max_barrier_size = 0x180,
.thread_features = THREAD_FEATURES_PARTIAL(0x6000, 4, 0),
.tiler_features = 0x809,
.mmu_features = 0x2830,
.gpu_features_lo = 0,
.gpu_features_hi = 0,
.shader_present = DUMMY_IMPLEMENTATION_SHADER_PRESENT,
.stack_present = DUMMY_IMPLEMENTATION_STACK_PRESENT,
},
{
.name = "tDUx",
.gpu_id = GPU_ID2_MAKE(10, 2, 0, 1, 0, 0, 0),
.as_present = 0xFF,
.thread_max_threads = 0x180,
.thread_max_workgroup_size = 0x180,
.thread_max_barrier_size = 0x180,
.thread_features = THREAD_FEATURES_PARTIAL(0x6000, 4, 0),
.tiler_features = 0x809,
.mmu_features = 0x2830,
.gpu_features_lo = 0,
.gpu_features_hi = 0,
.shader_present = DUMMY_IMPLEMENTATION_SHADER_PRESENT,
.stack_present = DUMMY_IMPLEMENTATION_STACK_PRESENT,
},
{
.name = "tODx",
.gpu_id = GPU_ID2_MAKE(10, 8, 0, 2, 0, 0, 0),
.as_present = 0xFF,
.thread_max_threads = 0x180,
.thread_max_workgroup_size = 0x180,
.thread_max_barrier_size = 0x180,
.thread_features = THREAD_FEATURES_PARTIAL(0x6000, 4, 0),
.tiler_features = 0x809,
.mmu_features = 0x2830,
.gpu_features_lo = 0,
.gpu_features_hi = 0,
.shader_present = DUMMY_IMPLEMENTATION_SHADER_PRESENT,
.stack_present = DUMMY_IMPLEMENTATION_STACK_PRESENT,
},
{
.name = "tGRx",
.gpu_id = GPU_ID2_MAKE(10, 10, 0, 3, 0, 0, 0),
.as_present = 0xFF,
.thread_max_threads = 0x180,
.thread_max_workgroup_size = 0x180,
.thread_max_barrier_size = 0x180,
.thread_features = THREAD_FEATURES_PARTIAL(0x6000, 4, 0),
.core_features = 0x0, /* core_1e16fma2tex */
.tiler_features = 0x809,
.mmu_features = 0x2830,
.gpu_features_lo = 0,
.gpu_features_hi = 0,
.shader_present = DUMMY_IMPLEMENTATION_SHADER_PRESENT,
.stack_present = DUMMY_IMPLEMENTATION_STACK_PRESENT,
},
{
.name = "tVAx",
.gpu_id = GPU_ID2_MAKE(10, 12, 0, 4, 0, 0, 0),
.as_present = 0xFF,
.thread_max_threads = 0x180,
.thread_max_workgroup_size = 0x180,
.thread_max_barrier_size = 0x180,
.thread_features = THREAD_FEATURES_PARTIAL(0x6000, 4, 0),
.core_features = 0x0, /* core_1e16fma2tex */
.tiler_features = 0x809,
.mmu_features = 0x2830,
.gpu_features_lo = 0,
.gpu_features_hi = 0,
.shader_present = DUMMY_IMPLEMENTATION_SHADER_PRESENT,
.stack_present = DUMMY_IMPLEMENTATION_STACK_PRESENT,
},
{
.name = "tTUx",
.gpu_id = GPU_ID2_MAKE(11, 8, 5, 2, 0, 0, 0),
.as_present = 0xFF,
.thread_max_threads = 0x800,
.thread_max_workgroup_size = 0x400,
.thread_max_barrier_size = 0x400,
.thread_features = THREAD_FEATURES_PARTIAL(0x10000, 4, 0),
.core_features = 0x0, /* core_1e32fma2tex */
.tiler_features = 0x809,
.mmu_features = 0x2830,
.gpu_features_lo = 0xf,
.gpu_features_hi = 0,
.shader_present = 0xFF,
.stack_present = 0xF,
},
{
.name = "tTIx",
.gpu_id = GPU_ID2_MAKE(12, 8, 1, 0, 0, 0, 0),
.as_present = 0xFF,
.thread_max_threads = 0x800,
.thread_max_workgroup_size = 0x400,
.thread_max_barrier_size = 0x400,
.thread_features = THREAD_FEATURES_PARTIAL(0x10000, 16, 0),
.core_features = 0x1, /* core_1e64fma4tex */
.tiler_features = 0x809,
.mmu_features = 0x2830,
.gpu_features_lo = 0xf,
.gpu_features_hi = 0,
.shader_present = 0xFF,
.stack_present = 0xF,
},
};
static struct {
spinlock_t access_lock;
#if !MALI_USE_CSF
unsigned long prfcnt_base;
@ -125,74 +454,33 @@ struct {
#endif /* !MALI_USE_CSF */
u64 tiler_counters[KBASE_DUMMY_MODEL_COUNTER_PER_CORE];
u64 l2_counters[KBASE_DUMMY_MODEL_MAX_MEMSYS_BLOCKS *
KBASE_DUMMY_MODEL_COUNTER_PER_CORE];
KBASE_DUMMY_MODEL_COUNTER_PER_CORE];
u64 shader_counters[KBASE_DUMMY_MODEL_MAX_SHADER_CORES *
KBASE_DUMMY_MODEL_COUNTER_PER_CORE];
KBASE_DUMMY_MODEL_COUNTER_PER_CORE];
} performance_counters;
} performance_counters = {
.l2_present = DUMMY_IMPLEMENTATION_L2_PRESENT,
.shader_present = DUMMY_IMPLEMENTATION_SHADER_PRESENT,
};
static u32 get_implementation_register(u32 reg,
const struct control_reg_values_t *const control_reg_values)
{
switch (reg) {
case GPU_CONTROL_REG(SHADER_PRESENT_LO):
return LO_MASK(control_reg_values->shader_present);
case GPU_CONTROL_REG(TILER_PRESENT_LO):
return LO_MASK(DUMMY_IMPLEMENTATION_TILER_PRESENT);
case GPU_CONTROL_REG(L2_PRESENT_LO):
return LO_MASK(DUMMY_IMPLEMENTATION_L2_PRESENT);
case GPU_CONTROL_REG(STACK_PRESENT_LO):
return LO_MASK(control_reg_values->stack_present);
struct job_slot {
int job_active;
int job_queued;
int job_complete_irq_asserted;
int job_irq_mask;
int job_disabled;
};
/**
* struct control_reg_values_t - control register values specific to the GPU being 'emulated'
* @name: GPU name
* @gpu_id: GPU ID to report
* @as_present: Bitmap of address spaces present
* @thread_max_threads: Maximum number of threads per core
* @thread_max_workgroup_size: Maximum number of threads per workgroup
* @thread_max_barrier_size: Maximum number of threads per barrier
* @thread_features: Thread features, NOT INCLUDING the 2
* most-significant bits, which are always set to
* IMPLEMENTATION_MODEL.
* @core_features: Core features
* @tiler_features: Tiler features
* @mmu_features: MMU features
* @gpu_features_lo: GPU features (low)
* @gpu_features_hi: GPU features (high)
*/
struct control_reg_values_t {
const char *name;
u32 gpu_id;
u32 as_present;
u32 thread_max_threads;
u32 thread_max_workgroup_size;
u32 thread_max_barrier_size;
u32 thread_features;
u32 core_features;
u32 tiler_features;
u32 mmu_features;
u32 gpu_features_lo;
u32 gpu_features_hi;
};
struct dummy_model_t {
int reset_completed;
int reset_completed_mask;
#if !MALI_USE_CSF
int prfcnt_sample_completed;
#endif /* !MALI_USE_CSF */
int power_changed_mask; /* 2bits: _ALL,_SINGLE */
int power_changed; /* 1bit */
bool clean_caches_completed;
bool clean_caches_completed_irq_enabled;
int power_on; /* 6bits: SHADER[4],TILER,L2 */
u32 stack_power_on_lo;
u32 coherency_enable;
unsigned int job_irq_js_state;
struct job_slot slots[NUM_SLOTS];
const struct control_reg_values_t *control_reg_values;
u32 l2_config;
void *data;
};
case GPU_CONTROL_REG(SHADER_PRESENT_HI):
case GPU_CONTROL_REG(TILER_PRESENT_HI):
case GPU_CONTROL_REG(L2_PRESENT_HI):
case GPU_CONTROL_REG(STACK_PRESENT_HI):
/* *** FALLTHROUGH *** */
default:
return 0;
}
}
void gpu_device_set_data(void *model, void *data)
{
@ -221,238 +509,6 @@ static char *no_mali_gpu = CONFIG_MALI_NO_MALI_DEFAULT_GPU;
module_param(no_mali_gpu, charp, 0000);
MODULE_PARM_DESC(no_mali_gpu, "GPU to identify as");
/* Construct a value for the THREAD_FEATURES register, *except* the two most
* significant bits, which are set to IMPLEMENTATION_MODEL in
* midgard_model_read_reg().
*/
#if MALI_USE_CSF
#define THREAD_FEATURES_PARTIAL(MAX_REGISTERS, MAX_TASK_QUEUE, MAX_TG_SPLIT) \
((MAX_REGISTERS) | ((MAX_TASK_QUEUE) << 24))
#else
#define THREAD_FEATURES_PARTIAL(MAX_REGISTERS, MAX_TASK_QUEUE, MAX_TG_SPLIT) \
((MAX_REGISTERS) | ((MAX_TASK_QUEUE) << 16) | ((MAX_TG_SPLIT) << 24))
#endif
/* Array associating GPU names with control register values. The first
* one is used in the case of no match.
*/
static const struct control_reg_values_t all_control_reg_values[] = {
{
.name = "tMIx",
.gpu_id = GPU_ID2_MAKE(6, 0, 10, 0, 0, 1, 0),
.as_present = 0xFF,
.thread_max_threads = 0x180,
.thread_max_workgroup_size = 0x180,
.thread_max_barrier_size = 0x180,
.thread_features = THREAD_FEATURES_PARTIAL(0x6000, 4, 10),
.tiler_features = 0x809,
.mmu_features = 0x2830,
.gpu_features_lo = 0,
.gpu_features_hi = 0,
},
{
.name = "tHEx",
.gpu_id = GPU_ID2_MAKE(6, 2, 0, 1, 0, 3, 0),
.as_present = 0xFF,
.thread_max_threads = 0x180,
.thread_max_workgroup_size = 0x180,
.thread_max_barrier_size = 0x180,
.thread_features = THREAD_FEATURES_PARTIAL(0x6000, 4, 10),
.tiler_features = 0x809,
.mmu_features = 0x2830,
.gpu_features_lo = 0,
.gpu_features_hi = 0,
},
{
.name = "tSIx",
.gpu_id = GPU_ID2_MAKE(7, 0, 0, 0, 1, 1, 0),
.as_present = 0xFF,
.thread_max_threads = 0x300,
.thread_max_workgroup_size = 0x180,
.thread_max_barrier_size = 0x180,
.thread_features = THREAD_FEATURES_PARTIAL(0x6000, 4, 10),
.tiler_features = 0x209,
.mmu_features = 0x2821,
.gpu_features_lo = 0,
.gpu_features_hi = 0,
},
{
.name = "tDVx",
.gpu_id = GPU_ID2_MAKE(7, 0, 0, 3, 0, 0, 0),
.as_present = 0xFF,
.thread_max_threads = 0x300,
.thread_max_workgroup_size = 0x180,
.thread_max_barrier_size = 0x180,
.thread_features = THREAD_FEATURES_PARTIAL(0x6000, 4, 10),
.tiler_features = 0x209,
.mmu_features = 0x2821,
.gpu_features_lo = 0,
.gpu_features_hi = 0,
},
{
.name = "tNOx",
.gpu_id = GPU_ID2_MAKE(7, 2, 1, 1, 0, 0, 0),
.as_present = 0xFF,
.thread_max_threads = 0x180,
.thread_max_workgroup_size = 0x180,
.thread_max_barrier_size = 0x180,
.thread_features = THREAD_FEATURES_PARTIAL(0x6000, 4, 10),
.tiler_features = 0x809,
.mmu_features = 0x2830,
.gpu_features_lo = 0,
.gpu_features_hi = 0,
},
{
.name = "tGOx_r0p0",
.gpu_id = GPU_ID2_MAKE(7, 2, 2, 2, 0, 0, 0),
.as_present = 0xFF,
.thread_max_threads = 0x180,
.thread_max_workgroup_size = 0x180,
.thread_max_barrier_size = 0x180,
.thread_features = THREAD_FEATURES_PARTIAL(0x6000, 4, 10),
.tiler_features = 0x809,
.mmu_features = 0x2830,
.gpu_features_lo = 0,
.gpu_features_hi = 0,
},
{
.name = "tGOx_r1p0",
.gpu_id = GPU_ID2_MAKE(7, 4, 0, 2, 1, 0, 0),
.as_present = 0xFF,
.thread_max_threads = 0x180,
.thread_max_workgroup_size = 0x180,
.thread_max_barrier_size = 0x180,
.thread_features = THREAD_FEATURES_PARTIAL(0x6000, 4, 10),
.core_features = 0x2,
.tiler_features = 0x209,
.mmu_features = 0x2823,
.gpu_features_lo = 0,
.gpu_features_hi = 0,
},
{
.name = "tTRx",
.gpu_id = GPU_ID2_MAKE(9, 0, 8, 0, 0, 0, 0),
.as_present = 0xFF,
.thread_max_threads = 0x180,
.thread_max_workgroup_size = 0x180,
.thread_max_barrier_size = 0x180,
.thread_features = THREAD_FEATURES_PARTIAL(0x6000, 4, 0),
.tiler_features = 0x809,
.mmu_features = 0x2830,
.gpu_features_lo = 0,
.gpu_features_hi = 0,
},
{
.name = "tNAx",
.gpu_id = GPU_ID2_MAKE(9, 0, 8, 1, 0, 0, 0),
.as_present = 0xFF,
.thread_max_threads = 0x180,
.thread_max_workgroup_size = 0x180,
.thread_max_barrier_size = 0x180,
.thread_features = THREAD_FEATURES_PARTIAL(0x6000, 4, 0),
.tiler_features = 0x809,
.mmu_features = 0x2830,
.gpu_features_lo = 0,
.gpu_features_hi = 0,
},
{
.name = "tBEx",
.gpu_id = GPU_ID2_MAKE(9, 2, 0, 2, 0, 0, 0),
.as_present = 0xFF,
.thread_max_threads = 0x180,
.thread_max_workgroup_size = 0x180,
.thread_max_barrier_size = 0x180,
.thread_features = THREAD_FEATURES_PARTIAL(0x6000, 4, 0),
.tiler_features = 0x809,
.mmu_features = 0x2830,
.gpu_features_lo = 0,
.gpu_features_hi = 0,
},
{
.name = "tBAx",
.gpu_id = GPU_ID2_MAKE(9, 14, 4, 5, 0, 0, 0),
.as_present = 0xFF,
.thread_max_threads = 0x180,
.thread_max_workgroup_size = 0x180,
.thread_max_barrier_size = 0x180,
.thread_features = THREAD_FEATURES_PARTIAL(0x6000, 4, 0),
.tiler_features = 0x809,
.mmu_features = 0x2830,
.gpu_features_lo = 0,
.gpu_features_hi = 0,
},
{
.name = "tDUx",
.gpu_id = GPU_ID2_MAKE(10, 2, 0, 1, 0, 0, 0),
.as_present = 0xFF,
.thread_max_threads = 0x180,
.thread_max_workgroup_size = 0x180,
.thread_max_barrier_size = 0x180,
.thread_features = THREAD_FEATURES_PARTIAL(0x6000, 4, 0),
.tiler_features = 0x809,
.mmu_features = 0x2830,
.gpu_features_lo = 0,
.gpu_features_hi = 0,
},
{
.name = "tODx",
.gpu_id = GPU_ID2_MAKE(10, 8, 0, 2, 0, 0, 0),
.as_present = 0xFF,
.thread_max_threads = 0x180,
.thread_max_workgroup_size = 0x180,
.thread_max_barrier_size = 0x180,
.thread_features = THREAD_FEATURES_PARTIAL(0x6000, 4, 0),
.tiler_features = 0x809,
.mmu_features = 0x2830,
.gpu_features_lo = 0,
.gpu_features_hi = 0,
},
{
.name = "tGRx",
.gpu_id = GPU_ID2_MAKE(10, 10, 0, 3, 0, 0, 0),
.as_present = 0xFF,
.thread_max_threads = 0x180,
.thread_max_workgroup_size = 0x180,
.thread_max_barrier_size = 0x180,
.thread_features = THREAD_FEATURES_PARTIAL(0x6000, 4, 0),
.core_features = 0x0, /* core_1e16fma2tex */
.tiler_features = 0x809,
.mmu_features = 0x2830,
.gpu_features_lo = 0,
.gpu_features_hi = 0,
},
{
.name = "tVAx",
.gpu_id = GPU_ID2_MAKE(10, 12, 0, 4, 0, 0, 0),
.as_present = 0xFF,
.thread_max_threads = 0x180,
.thread_max_workgroup_size = 0x180,
.thread_max_barrier_size = 0x180,
.thread_features = THREAD_FEATURES_PARTIAL(0x6000, 4, 0),
.core_features = 0x0, /* core_1e16fma2tex */
.tiler_features = 0x809,
.mmu_features = 0x2830,
.gpu_features_lo = 0,
.gpu_features_hi = 0,
},
{
.name = "tTUx",
.gpu_id = GPU_ID2_MAKE(11, 8, 5, 2, 0, 0, 0),
.as_present = 0xFF,
.thread_max_threads = 0x800,
.thread_max_workgroup_size = 0x400,
.thread_max_barrier_size = 0x400,
.thread_features = THREAD_FEATURES_PARTIAL(0x10000, 4, 0),
.core_features = 0x0, /* core_1e32fma2tex */
.tiler_features = 0x809,
.mmu_features = 0x2830,
.gpu_features_lo = 0xf,
.gpu_features_hi = 0,
},
};
struct error_status_t hw_error_status;
#if MALI_USE_CSF
static u32 gpu_model_get_prfcnt_value(enum kbase_ipa_core_type core_type,
u32 cnt_idx, bool is_low_word)
@ -1011,6 +1067,21 @@ static const struct control_reg_values_t *find_control_reg_values(const char *gp
size_t i;
const struct control_reg_values_t *ret = NULL;
/* Edge case for tGOx, as it has 2 entries in the table for its R0 and R1
* revisions respectively. As none of them are named "tGOx" the name comparison
* needs to be fixed in these cases. CONFIG_GPU_HWVER should be one of "r0p0"
* or "r1p0" and is derived from the DDK's build configuration. In cases
* where it is unavailable, it defaults to tGOx r1p0.
*/
if (!strcmp(gpu, "tGOx")) {
#ifdef CONFIG_GPU_HWVER
if (!strcmp(CONFIG_GPU_HWVER, "r0p0"))
gpu = "tGOx_r0p0";
else if (!strcmp(CONFIG_GPU_HWVER, "r1p0"))
#endif /* CONFIG_GPU_HWVER defined */
gpu = "tGOx_r1p0";
}
for (i = 0; i < ARRAY_SIZE(all_control_reg_values); ++i) {
const struct control_reg_values_t * const fcrv = &all_control_reg_values[i];
@ -1043,6 +1114,10 @@ void *midgard_model_create(const void *config)
dummy->job_irq_js_state = 0;
init_register_statuses(dummy);
dummy->control_reg_values = find_control_reg_values(no_mali_gpu);
performance_counters.l2_present = get_implementation_register(
GPU_CONTROL_REG(L2_PRESENT_LO), dummy->control_reg_values);
performance_counters.shader_present = get_implementation_register(
GPU_CONTROL_REG(SHADER_PRESENT_LO), dummy->control_reg_values);
}
return dummy;
}
@ -1066,6 +1141,8 @@ static void midgard_model_get_outputs(void *h)
hw_error_status.gpu_error_irq ||
#if !MALI_USE_CSF
dummy->prfcnt_sample_completed ||
#else
(dummy->flush_pa_range_completed && dummy->flush_pa_range_completed_irq_enabled) ||
#endif
(dummy->clean_caches_completed && dummy->clean_caches_completed_irq_enabled))
gpu_device_raise_irq(dummy, GPU_DUMMY_GPU_IRQ);
@ -1235,6 +1312,9 @@ u8 midgard_model_write_reg(void *h, u32 addr, u32 value)
dummy->reset_completed_mask = (value >> 8) & 0x01;
dummy->power_changed_mask = (value >> 9) & 0x03;
dummy->clean_caches_completed_irq_enabled = (value & (1u << 17)) != 0u;
#if MALI_USE_CSF
dummy->flush_pa_range_completed_irq_enabled = (value & (1u << 20)) != 0u;
#endif
} else if (addr == GPU_CONTROL_REG(COHERENCY_ENABLE)) {
dummy->coherency_enable = value;
} else if (addr == GPU_CONTROL_REG(GPU_IRQ_CLEAR)) {
@ -1247,10 +1327,17 @@ u8 midgard_model_write_reg(void *h, u32 addr, u32 value)
if (value & (1 << 17))
dummy->clean_caches_completed = false;
#if !MALI_USE_CSF
if (value & PRFCNT_SAMPLE_COMPLETED)
#if MALI_USE_CSF
if (value & (1u << 20))
dummy->flush_pa_range_completed = false;
#endif /* MALI_USE_CSF */
#if !MALI_USE_CSF
if (value & PRFCNT_SAMPLE_COMPLETED) /* (1 << 16) */
dummy->prfcnt_sample_completed = 0;
#endif /* !MALI_USE_CSF */
/*update error status */
hw_error_status.gpu_error_irq &= ~(value);
} else if (addr == GPU_CONTROL_REG(GPU_COMMAND)) {
@ -1274,7 +1361,15 @@ u8 midgard_model_write_reg(void *h, u32 addr, u32 value)
pr_debug("clean caches requested");
dummy->clean_caches_completed = true;
break;
#if !MALI_USE_CSF
#if MALI_USE_CSF
case GPU_COMMAND_FLUSH_PA_RANGE_CLN_INV_L2:
case GPU_COMMAND_FLUSH_PA_RANGE_CLN_INV_L2_LSC:
case GPU_COMMAND_FLUSH_PA_RANGE_CLN_INV_FULL:
pr_debug("pa range flush requested");
dummy->flush_pa_range_completed = true;
break;
#endif /* MALI_USE_CSF */
#if !MALI_USE_CSF
case GPU_COMMAND_PRFCNT_SAMPLE:
midgard_model_dump_prfcnt();
dummy->prfcnt_sample_completed = 1;
@ -1282,6 +1377,11 @@ u8 midgard_model_write_reg(void *h, u32 addr, u32 value)
default:
break;
}
#if MALI_USE_CSF
} else if (addr >= GPU_CONTROL_REG(GPU_COMMAND_ARG0_LO) &&
addr <= GPU_CONTROL_REG(GPU_COMMAND_ARG1_HI)) {
/* Writes ignored */
#endif
} else if (addr == GPU_CONTROL_REG(L2_CONFIG)) {
dummy->l2_config = value;
}
@ -1291,6 +1391,12 @@ u8 midgard_model_write_reg(void *h, u32 addr, u32 value)
(CSF_NUM_DOORBELL * CSF_HW_DOORBELL_PAGE_SIZE))) {
if (addr == GPU_CONTROL_REG(CSF_HW_DOORBELL_PAGE_OFFSET))
hw_error_status.job_irq_status = JOB_IRQ_GLOBAL_IF;
} else if ((addr >= GPU_CONTROL_REG(SYSC_ALLOC0)) &&
(addr < GPU_CONTROL_REG(SYSC_ALLOC(SYSC_ALLOC_COUNT)))) {
/* Do nothing */
} else if ((addr >= GPU_CONTROL_REG(ASN_HASH_0)) &&
(addr < GPU_CONTROL_REG(ASN_HASH(ASN_HASH_COUNT)))) {
/* Do nothing */
} else if (addr == IPA_CONTROL_REG(COMMAND)) {
pr_debug("Received IPA_CONTROL command");
} else if (addr == IPA_CONTROL_REG(TIMER)) {
@ -1315,8 +1421,7 @@ u8 midgard_model_write_reg(void *h, u32 addr, u32 value)
hw_error_status.mmu_irq_mask = value;
} else if (addr == MMU_REG(MMU_IRQ_CLEAR)) {
hw_error_status.mmu_irq_rawstat &= (~value);
} else if ((addr >= MMU_AS_REG(0, AS_TRANSTAB_LO)) &&
(addr <= MMU_AS_REG(15, AS_STATUS))) {
} else if ((addr >= MMU_AS_REG(0, AS_TRANSTAB_LO)) && (addr <= MMU_AS_REG(15, AS_STATUS))) {
int mem_addr_space = (addr - MMU_AS_REG(0, AS_TRANSTAB_LO))
>> 6;
@ -1443,7 +1548,8 @@ u8 midgard_model_write_reg(void *h, u32 addr, u32 value)
dummy->power_changed = 1;
break;
case SHADER_PWRON_LO:
dummy->power_on |= (value & 0xF) << 2;
dummy->power_on |=
(value & dummy->control_reg_values->shader_present) << 2;
dummy->power_changed = 1;
break;
case L2_PWRON_LO:
@ -1459,7 +1565,8 @@ u8 midgard_model_write_reg(void *h, u32 addr, u32 value)
dummy->power_changed = 1;
break;
case SHADER_PWROFF_LO:
dummy->power_on &= ~((value & 0xF) << 2);
dummy->power_on &=
~((value & dummy->control_reg_values->shader_present) << 2);
dummy->power_changed = 1;
break;
case L2_PWROFF_LO:
@ -1546,6 +1653,9 @@ u8 midgard_model_read_reg(void *h, u32 addr, u32 * const value)
else if (addr == GPU_CONTROL_REG(GPU_IRQ_MASK)) {
*value = (dummy->reset_completed_mask << 8) |
((dummy->clean_caches_completed_irq_enabled ? 1u : 0u) << 17) |
#if MALI_USE_CSF
((dummy->flush_pa_range_completed_irq_enabled ? 1u : 0u) << 20) |
#endif
(dummy->power_changed_mask << 9) | (1 << 7) | 1;
pr_debug("GPU_IRQ_MASK read %x", *value);
} else if (addr == GPU_CONTROL_REG(GPU_IRQ_RAWSTAT)) {
@ -1555,6 +1665,9 @@ u8 midgard_model_read_reg(void *h, u32 addr, u32 * const value)
(dummy->prfcnt_sample_completed ? PRFCNT_SAMPLE_COMPLETED : 0) |
#endif /* !MALI_USE_CSF */
((dummy->clean_caches_completed ? 1u : 0u) << 17) |
#if MALI_USE_CSF
((dummy->flush_pa_range_completed ? 1u : 0u) << 20) |
#endif
hw_error_status.gpu_error_irq;
pr_debug("GPU_IRQ_RAWSTAT read %x", *value);
} else if (addr == GPU_CONTROL_REG(GPU_IRQ_STATUS)) {
@ -1569,6 +1682,13 @@ u8 midgard_model_read_reg(void *h, u32 addr, u32 * const value)
1u :
0u)
<< 17) |
#if MALI_USE_CSF
(((dummy->flush_pa_range_completed &&
dummy->flush_pa_range_completed_irq_enabled) ?
1u :
0u)
<< 20) |
#endif
hw_error_status.gpu_error_irq;
pr_debug("GPU_IRQ_STAT read %x", *value);
} else if (addr == GPU_CONTROL_REG(GPU_STATUS)) {
@ -1581,8 +1701,18 @@ u8 midgard_model_read_reg(void *h, u32 addr, u32 * const value)
*value = hw_error_status.gpu_fault_status;
} else if (addr == GPU_CONTROL_REG(L2_CONFIG)) {
*value = dummy->l2_config;
} else if ((addr >= GPU_CONTROL_REG(SHADER_PRESENT_LO)) &&
(addr <= GPU_CONTROL_REG(L2_MMU_CONFIG))) {
}
#if MALI_USE_CSF
else if ((addr >= GPU_CONTROL_REG(SYSC_ALLOC0)) &&
(addr < GPU_CONTROL_REG(SYSC_ALLOC(SYSC_ALLOC_COUNT)))) {
*value = 0;
} else if ((addr >= GPU_CONTROL_REG(ASN_HASH_0)) &&
(addr < GPU_CONTROL_REG(ASN_HASH(ASN_HASH_COUNT)))) {
*value = 0;
}
#endif
else if ((addr >= GPU_CONTROL_REG(SHADER_PRESENT_LO)) &&
(addr <= GPU_CONTROL_REG(L2_MMU_CONFIG))) {
switch (addr) {
case GPU_CONTROL_REG(SHADER_PRESENT_LO):
case GPU_CONTROL_REG(SHADER_PRESENT_HI):
@ -1592,27 +1722,27 @@ u8 midgard_model_read_reg(void *h, u32 addr, u32 * const value)
case GPU_CONTROL_REG(L2_PRESENT_HI):
case GPU_CONTROL_REG(STACK_PRESENT_LO):
case GPU_CONTROL_REG(STACK_PRESENT_HI):
*value = get_implementation_register(addr);
*value = get_implementation_register(addr, dummy->control_reg_values);
break;
case GPU_CONTROL_REG(SHADER_READY_LO):
*value = (dummy->power_on >> 0x02) &
get_implementation_register(
GPU_CONTROL_REG(SHADER_PRESENT_LO));
get_implementation_register(GPU_CONTROL_REG(SHADER_PRESENT_LO),
dummy->control_reg_values);
break;
case GPU_CONTROL_REG(TILER_READY_LO):
*value = (dummy->power_on >> 0x01) &
get_implementation_register(
GPU_CONTROL_REG(TILER_PRESENT_LO));
get_implementation_register(GPU_CONTROL_REG(TILER_PRESENT_LO),
dummy->control_reg_values);
break;
case GPU_CONTROL_REG(L2_READY_LO):
*value = dummy->power_on &
get_implementation_register(
GPU_CONTROL_REG(L2_PRESENT_LO));
get_implementation_register(GPU_CONTROL_REG(L2_PRESENT_LO),
dummy->control_reg_values);
break;
case GPU_CONTROL_REG(STACK_READY_LO):
*value = dummy->stack_power_on_lo &
get_implementation_register(
GPU_CONTROL_REG(STACK_PRESENT_LO));
get_implementation_register(GPU_CONTROL_REG(STACK_PRESENT_LO),
dummy->control_reg_values);
break;
case GPU_CONTROL_REG(SHADER_READY_HI):
@ -1904,6 +2034,8 @@ u8 midgard_model_read_reg(void *h, u32 addr, u32 * const value)
*value = gpu_model_get_prfcnt_value(KBASE_IPA_CORE_TYPE_SHADER,
counter_index, is_low_word);
} else if (addr == USER_REG(LATEST_FLUSH)) {
*value = 0;
}
#endif
else if (addr == GPU_CONTROL_REG(GPU_FEATURES_LO)) {

View File

@ -23,13 +23,6 @@
#include <linux/random.h>
#include "backend/gpu/mali_kbase_model_dummy.h"
/* all the error conditions supported by the model */
#define TOTAL_FAULTS 27
/* maximum number of levels in the MMU translation table tree */
#define MAX_MMU_TABLE_LEVEL 4
/* worst case scenario is <1 MMU fault + 1 job fault + 2 GPU faults> */
#define MAX_CONCURRENT_FAULTS 3
static struct kbase_error_atom *error_track_list;
unsigned int rand_seed;
@ -40,6 +33,14 @@ unsigned int error_probability = 50; /* to be set between 0 and 100 */
unsigned int multiple_error_probability = 50;
#ifdef CONFIG_MALI_ERROR_INJECT_RANDOM
/* all the error conditions supported by the model */
#define TOTAL_FAULTS 27
/* maximum number of levels in the MMU translation table tree */
#define MAX_MMU_TABLE_LEVEL 4
/* worst case scenario is <1 MMU fault + 1 job fault + 2 GPU faults> */
#define MAX_CONCURRENT_FAULTS 3
/**
* gpu_generate_error - Generate GPU error
*/

View File

@ -36,7 +36,7 @@
#include <linux/pm_runtime.h>
#include <mali_kbase_reset_gpu.h>
#endif /* !MALI_USE_CSF */
#include <mali_kbase_hwcnt_context.h>
#include <hwcnt/mali_kbase_hwcnt_context.h>
#include <backend/gpu/mali_kbase_pm_internal.h>
#include <backend/gpu/mali_kbase_devfreq.h>
#include <mali_kbase_dummy_job_wa.h>

View File

@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
/*
*
* (C) COPYRIGHT 2013-2021 ARM Limited. All rights reserved.
* (C) COPYRIGHT 2013-2022 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@ -92,29 +92,10 @@ void kbase_devfreq_set_core_mask(struct kbase_device *kbdev, u64 core_mask)
* for those cores to get powered down
*/
if ((core_mask & old_core_mask) != old_core_mask) {
bool can_wait;
spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
can_wait = kbdev->pm.backend.gpu_ready && kbase_pm_is_mcu_desired(kbdev);
spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
/* This check is ideally not required, the wait function can
* deal with the GPU power down. But it has been added to
* address the scenario where down-scaling request comes from
* the platform specific code soon after the GPU power down
* and at the time same time application thread tries to
* power up the GPU (on the flush of GPU queue).
* The platform specific @ref callback_power_on that gets
* invoked on power up does not return until down-scaling
* request is complete. The check mitigates the race caused by
* the problem in platform specific code.
*/
if (likely(can_wait)) {
if (kbase_pm_wait_for_desired_state(kbdev)) {
dev_warn(kbdev->dev,
"Wait for update of core_mask from %llx to %llx failed",
old_core_mask, core_mask);
}
if (kbase_pm_wait_for_cores_down_scale(kbdev)) {
dev_warn(kbdev->dev,
"Wait for update of core_mask from %llx to %llx failed",
old_core_mask, core_mask);
}
}
#endif

View File

@ -39,7 +39,7 @@
#include <mali_kbase_reset_gpu.h>
#include <mali_kbase_ctx_sched.h>
#include <mali_kbase_hwcnt_context.h>
#include <hwcnt/mali_kbase_hwcnt_context.h>
#include <mali_kbase_pbha.h>
#include <backend/gpu/mali_kbase_cache_policy_backend.h>
#include <device/mali_kbase_device.h>
@ -538,6 +538,14 @@ static void kbase_pm_l2_config_override(struct kbase_device *kbdev)
if (!kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_L2_CONFIG))
return;
#if MALI_USE_CSF
if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_PBHA_HWU)) {
val = kbase_reg_read(kbdev, GPU_CONTROL_REG(L2_CONFIG));
kbase_reg_write(kbdev, GPU_CONTROL_REG(L2_CONFIG),
L2_CONFIG_PBHA_HWU_SET(val, kbdev->pbha_propagate_bits));
}
#endif /* MALI_USE_CSF */
/*
* Skip if size and hash are not given explicitly,
* which means default values are used.
@ -599,6 +607,21 @@ static const char *kbase_mcu_state_to_string(enum kbase_mcu_state state)
return strings[state];
}
static
void kbase_ktrace_log_mcu_state(struct kbase_device *kbdev, enum kbase_mcu_state state)
{
#if KBASE_KTRACE_ENABLE
switch (state) {
#define KBASEP_MCU_STATE(n) \
case KBASE_MCU_ ## n: \
KBASE_KTRACE_ADD(kbdev, PM_MCU_ ## n, NULL, state); \
break;
#include "mali_kbase_pm_mcu_states.h"
#undef KBASEP_MCU_STATE
}
#endif
}
static inline bool kbase_pm_handle_mcu_core_attr_update(struct kbase_device *kbdev)
{
struct kbase_pm_backend_data *backend = &kbdev->pm.backend;
@ -689,7 +712,6 @@ static void wait_mcu_as_inactive(struct kbase_device *kbdev)
}
#endif
/**
* kbasep_pm_toggle_power_interrupt - Toggles the IRQ mask for power interrupts
* from the firmware
@ -697,10 +719,10 @@ static void wait_mcu_as_inactive(struct kbase_device *kbdev)
* @kbdev: Pointer to the device
* @enable: boolean indicating to enable interrupts or not
*
* The POWER_CHANGED_ALL and POWER_CHANGED_SINGLE interrupts can be disabled
* after L2 has been turned on when FW is controlling the power for the shader
* cores. Correspondingly, the interrupts can be re-enabled after the MCU has
* been disabled before the power down of L2.
* The POWER_CHANGED_ALL interrupt can be disabled after L2 has been turned on
* when FW is controlling the power for the shader cores. Correspondingly, the
* interrupts can be re-enabled after the MCU has been disabled before the
* power down of L2.
*/
static void kbasep_pm_toggle_power_interrupt(struct kbase_device *kbdev, bool enable)
{
@ -710,10 +732,12 @@ static void kbasep_pm_toggle_power_interrupt(struct kbase_device *kbdev, bool en
irq_mask = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK));
if (enable)
irq_mask |= POWER_CHANGED_ALL | POWER_CHANGED_SINGLE;
else
irq_mask &= ~(POWER_CHANGED_ALL | POWER_CHANGED_SINGLE);
if (enable) {
irq_mask |= POWER_CHANGED_ALL;
kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_CLEAR), POWER_CHANGED_ALL);
} else {
irq_mask &= ~POWER_CHANGED_ALL;
}
kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), irq_mask);
}
@ -1028,10 +1052,12 @@ static int kbase_pm_mcu_update_state(struct kbase_device *kbdev)
backend->mcu_state);
}
if (backend->mcu_state != prev_state)
if (backend->mcu_state != prev_state) {
dev_dbg(kbdev->dev, "MCU state transition: %s to %s\n",
kbase_mcu_state_to_string(prev_state),
kbase_mcu_state_to_string(backend->mcu_state));
kbase_ktrace_log_mcu_state(kbdev, backend->mcu_state);
}
} while (backend->mcu_state != prev_state);
@ -1079,6 +1105,21 @@ static const char *kbase_l2_core_state_to_string(enum kbase_l2_core_state state)
return strings[state];
}
static
void kbase_ktrace_log_l2_core_state(struct kbase_device *kbdev, enum kbase_l2_core_state state)
{
#if KBASE_KTRACE_ENABLE
switch (state) {
#define KBASEP_L2_STATE(n) \
case KBASE_L2_ ## n: \
KBASE_KTRACE_ADD(kbdev, PM_L2_ ## n, NULL, state); \
break;
#include "mali_kbase_pm_l2_states.h"
#undef KBASEP_L2_STATE
}
#endif
}
#if !MALI_USE_CSF
/* On powering on the L2, the tracked kctx becomes stale and can be cleared.
* This enables the backend to spare the START_FLUSH.INV_SHADER_OTHER
@ -1136,18 +1177,13 @@ static int kbase_pm_l2_update_state(struct kbase_device *kbdev)
KBASE_PM_CORE_L2);
u64 l2_ready = kbase_pm_get_ready_cores(kbdev,
KBASE_PM_CORE_L2);
#ifdef CONFIG_MALI_ARBITER_SUPPORT
u64 tiler_trans = kbase_pm_get_trans_cores(
kbdev, KBASE_PM_CORE_TILER);
u64 tiler_ready = kbase_pm_get_ready_cores(
kbdev, KBASE_PM_CORE_TILER);
#ifdef CONFIG_MALI_ARBITER_SUPPORT
/*
* kbase_pm_get_ready_cores and kbase_pm_get_trans_cores
* are vulnerable to corruption if gpu is lost
*/
if (kbase_is_gpu_removed(kbdev)
|| kbase_pm_is_gpu_lost(kbdev)) {
if (kbase_is_gpu_removed(kbdev) || kbase_pm_is_gpu_lost(kbdev)) {
backend->shaders_state =
KBASE_SHADERS_OFF_CORESTACK_OFF;
backend->hwcnt_desired = false;
@ -1161,16 +1197,19 @@ static int kbase_pm_l2_update_state(struct kbase_device *kbdev)
*/
backend->l2_state =
KBASE_L2_ON_HWCNT_DISABLE;
KBASE_KTRACE_ADD(kbdev, PM_L2_ON_HWCNT_DISABLE, NULL,
backend->l2_state);
kbase_pm_trigger_hwcnt_disable(kbdev);
}
if (backend->hwcnt_disabled) {
backend->l2_state = KBASE_L2_OFF;
KBASE_KTRACE_ADD(kbdev, PM_L2_OFF, NULL, backend->l2_state);
dev_dbg(kbdev->dev, "GPU lost has occurred - L2 off\n");
}
break;
}
#endif /* CONFIG_MALI_ARBITER_SUPPORT */
#endif
/* mask off ready from trans in case transitions finished
* between the register reads
@ -1182,6 +1221,12 @@ static int kbase_pm_l2_update_state(struct kbase_device *kbdev)
switch (backend->l2_state) {
case KBASE_L2_OFF:
if (kbase_pm_is_l2_desired(kbdev)) {
#if MALI_USE_CSF && defined(KBASE_PM_RUNTIME)
/* Enable HW timer of IPA control before
* L2 cache is powered-up.
*/
kbase_ipa_control_handle_gpu_sleep_exit(kbdev);
#endif
/*
* Set the desired config for L2 before
* powering it on
@ -1221,14 +1266,12 @@ static int kbase_pm_l2_update_state(struct kbase_device *kbdev)
l2_power_up_done = false;
if (!l2_trans && l2_ready == l2_present) {
if (need_tiler_control(kbdev)) {
#ifndef CONFIG_MALI_ARBITER_SUPPORT
u64 tiler_trans = kbase_pm_get_trans_cores(
kbdev, KBASE_PM_CORE_TILER);
u64 tiler_ready = kbase_pm_get_ready_cores(
kbdev, KBASE_PM_CORE_TILER);
#endif
tiler_trans &= ~tiler_ready;
if (!tiler_trans && tiler_ready == tiler_present) {
KBASE_KTRACE_ADD(kbdev,
PM_CORES_CHANGE_AVAILABLE_TILER,
@ -1437,12 +1480,26 @@ static int kbase_pm_l2_update_state(struct kbase_device *kbdev)
/* We only need to check the L2 here - if the L2
* is off then the tiler is definitely also off.
*/
if (!l2_trans && !l2_ready)
if (!l2_trans && !l2_ready) {
#if MALI_USE_CSF && defined(KBASE_PM_RUNTIME)
/* Allow clock gating within the GPU and prevent it
* from being seen as active during sleep.
*/
kbase_ipa_control_handle_gpu_sleep_enter(kbdev);
#endif
/* L2 is now powered off */
backend->l2_state = KBASE_L2_OFF;
}
} else {
if (!kbdev->cache_clean_in_progress)
if (!kbdev->cache_clean_in_progress) {
#if MALI_USE_CSF && defined(KBASE_PM_RUNTIME)
/* Allow clock gating within the GPU and prevent it
* from being seen as active during sleep.
*/
kbase_ipa_control_handle_gpu_sleep_enter(kbdev);
#endif
backend->l2_state = KBASE_L2_OFF;
}
}
break;
@ -1457,11 +1514,13 @@ static int kbase_pm_l2_update_state(struct kbase_device *kbdev)
backend->l2_state);
}
if (backend->l2_state != prev_state)
if (backend->l2_state != prev_state) {
dev_dbg(kbdev->dev, "L2 state transition: %s to %s\n",
kbase_l2_core_state_to_string(prev_state),
kbase_l2_core_state_to_string(
backend->l2_state));
kbase_ktrace_log_l2_core_state(kbdev, backend->l2_state);
}
} while (backend->l2_state != prev_state);
@ -1925,7 +1984,7 @@ static bool kbase_pm_is_in_desired_state_nolock(struct kbase_device *kbdev)
kbdev->pm.backend.shaders_state != KBASE_SHADERS_OFF_CORESTACK_OFF)
in_desired_state = false;
#else
in_desired_state = kbase_pm_mcu_is_in_desired_state(kbdev);
in_desired_state &= kbase_pm_mcu_is_in_desired_state(kbdev);
#endif
return in_desired_state;
@ -2122,6 +2181,7 @@ void kbase_pm_reset_start_locked(struct kbase_device *kbdev)
backend->in_reset = true;
backend->l2_state = KBASE_L2_RESET_WAIT;
KBASE_KTRACE_ADD(kbdev, PM_L2_RESET_WAIT, NULL, backend->l2_state);
#if !MALI_USE_CSF
backend->shaders_state = KBASE_SHADERS_RESET_WAIT;
#else
@ -2130,6 +2190,7 @@ void kbase_pm_reset_start_locked(struct kbase_device *kbdev)
*/
if (likely(kbdev->csf.firmware_inited)) {
backend->mcu_state = KBASE_MCU_RESET_WAIT;
KBASE_KTRACE_ADD(kbdev, PM_MCU_RESET_WAIT, NULL, backend->mcu_state);
#ifdef KBASE_PM_RUNTIME
backend->exit_gpu_sleep_mode = true;
#endif
@ -2328,6 +2389,66 @@ int kbase_pm_wait_for_desired_state(struct kbase_device *kbdev)
}
KBASE_EXPORT_TEST_API(kbase_pm_wait_for_desired_state);
#if MALI_USE_CSF
/**
* core_mask_update_done - Check if downscaling of shader cores is done
*
* @kbdev: The kbase device structure for the device.
*
* This function checks if the downscaling of cores is effectively complete.
*
* Return: true if the downscale is done.
*/
static bool core_mask_update_done(struct kbase_device *kbdev)
{
bool update_done = false;
unsigned long flags;
spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
/* If MCU is in stable ON state then it implies that the downscale
* request had completed.
* If MCU is not active then it implies all cores are off, so can
* consider the downscale request as complete.
*/
if ((kbdev->pm.backend.mcu_state == KBASE_MCU_ON) ||
kbase_pm_is_mcu_inactive(kbdev, kbdev->pm.backend.mcu_state))
update_done = true;
spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
return update_done;
}
int kbase_pm_wait_for_cores_down_scale(struct kbase_device *kbdev)
{
long timeout = kbase_csf_timeout_in_jiffies(kbase_get_timeout_ms(kbdev, CSF_PM_TIMEOUT));
long remaining;
int err = 0;
/* Wait for core mask update to complete */
#if KERNEL_VERSION(4, 13, 1) <= LINUX_VERSION_CODE
remaining = wait_event_killable_timeout(
kbdev->pm.backend.gpu_in_desired_state_wait,
core_mask_update_done(kbdev), timeout);
#else
remaining = wait_event_timeout(
kbdev->pm.backend.gpu_in_desired_state_wait,
core_mask_update_done(kbdev), timeout);
#endif
if (!remaining) {
kbase_pm_timed_out(kbdev);
err = -ETIMEDOUT;
} else if (remaining < 0) {
dev_info(
kbdev->dev,
"Wait for cores down scaling got interrupted");
err = (int)remaining;
}
return err;
}
#endif
void kbase_pm_enable_interrupts(struct kbase_device *kbdev)
{
unsigned long flags;
@ -2391,19 +2512,25 @@ static void update_user_reg_page_mapping(struct kbase_device *kbdev)
lockdep_assert_held(&kbdev->pm.lock);
mutex_lock(&kbdev->csf.reg_lock);
if (kbdev->csf.mali_file_inode) {
/* This would zap the pte corresponding to the mapping of User
* register page for all the Kbase contexts.
*/
unmap_mapping_range(kbdev->csf.mali_file_inode->i_mapping,
BASEP_MEM_CSF_USER_REG_PAGE_HANDLE,
PAGE_SIZE, 1);
/* Only if the mappings for USER page exist, update all PTEs associated to it */
if (kbdev->csf.nr_user_page_mapped > 0) {
if (likely(kbdev->csf.mali_file_inode)) {
/* This would zap the pte corresponding to the mapping of User
* register page for all the Kbase contexts.
*/
unmap_mapping_range(kbdev->csf.mali_file_inode->i_mapping,
BASEP_MEM_CSF_USER_REG_PAGE_HANDLE, PAGE_SIZE, 1);
} else {
dev_err(kbdev->dev,
"Device file inode not exist even if USER page previously mapped");
}
}
mutex_unlock(&kbdev->csf.reg_lock);
}
#endif
/*
* pmu layout:
* 0x0000: PMU TAG (RO) (0xCAFECAFE)
@ -2541,7 +2668,6 @@ void kbase_pm_clock_on(struct kbase_device *kbdev, bool is_resume)
backend->gpu_idled = false;
}
#endif
}
KBASE_EXPORT_TEST_API(kbase_pm_clock_on);

View File

@ -269,6 +269,37 @@ int kbase_pm_wait_for_desired_state(struct kbase_device *kbdev);
*/
int kbase_pm_wait_for_l2_powered(struct kbase_device *kbdev);
#if MALI_USE_CSF
/**
* kbase_pm_wait_for_cores_down_scale - Wait for the downscaling of shader cores
*
* @kbdev: The kbase device structure for the device (must be a valid pointer)
*
* This function can be called to ensure that the downscaling of cores is
* effectively complete and it would be safe to lower the voltage.
* The function assumes that caller had exercised the MCU state machine for the
* downscale request through the kbase_pm_update_state() function.
*
* This function needs to be used by the caller to safely wait for the completion
* of downscale request, instead of kbase_pm_wait_for_desired_state().
* The downscale request would trigger a state change in MCU state machine
* and so when MCU reaches the stable ON state, it can be inferred that
* downscaling is complete. But it has been observed that the wake up of the
* waiting thread can get delayed by few milli seconds and by the time the
* thread wakes up the power down transition could have started (after the
* completion of downscale request).
* On the completion of power down transition another wake up signal would be
* sent, but again by the time thread wakes up the power up transition can begin.
* And the power up transition could then get blocked inside the platform specific
* callback_power_on() function due to the thread that called into Kbase (from the
* platform specific code) to perform the downscaling and then ended up waiting
* for the completion of downscale request.
*
* Return: 0 on success, error code on error or remaining jiffies on timeout.
*/
int kbase_pm_wait_for_cores_down_scale(struct kbase_device *kbdev);
#endif
/**
* kbase_pm_update_dynamic_cores_onoff - Update the L2 and shader power state
* machines after changing shader core

View File

@ -38,11 +38,13 @@
#include <backend/gpu/mali_kbase_pm_defs.h>
#include <mali_linux_trace.h>
#if defined(CONFIG_MALI_BIFROST_DEVFREQ) || defined(CONFIG_MALI_BIFROST_DVFS) || !MALI_USE_CSF
/* Shift used for kbasep_pm_metrics_data.time_busy/idle - units of (1 << 8) ns
* This gives a maximum period between samples of 2^(32+8)/100 ns = slightly
* under 11s. Exceeding this will cause overflow
*/
#define KBASE_PM_TIME_SHIFT 8
#endif
#if MALI_USE_CSF
/* To get the GPU_ACTIVE value in nano seconds unit */

View File

@ -32,6 +32,7 @@ bob_defaults {
kbuild_options: [
"CONFIG_MALI_BIFROST_NO_MALI=y",
"CONFIG_MALI_NO_MALI_DEFAULT_GPU={{.gpu}}",
"CONFIG_GPU_HWVER={{.hwver}}",
],
},
mali_platform_dt_pin_rst: {
@ -52,9 +53,6 @@ bob_defaults {
mali_midgard_enable_trace: {
kbuild_options: ["CONFIG_MALI_BIFROST_ENABLE_TRACE=y"],
},
mali_dma_fence: {
kbuild_options: ["CONFIG_MALI_BIFROST_DMA_FENCE=y"],
},
mali_arbiter_support: {
kbuild_options: ["CONFIG_MALI_ARBITER_SUPPORT=y"],
},
@ -64,7 +62,7 @@ bob_defaults {
mali_dma_buf_legacy_compat: {
kbuild_options: ["CONFIG_MALI_DMA_BUF_LEGACY_COMPAT=y"],
},
mali_2mb_alloc: {
large_page_alloc: {
kbuild_options: ["CONFIG_MALI_2MB_ALLOC=y"],
},
mali_memory_fully_backed: {
@ -89,7 +87,7 @@ bob_defaults {
kbuild_options: ["CONFIG_MALI_BIFROST_ERROR_INJECT=y"],
},
mali_gem5_build: {
kbuild_options: ["CONFIG_MALI_GEM5_BUILD=y"],
kbuild_options: ["CONFIG_MALI_GEM5_BUILD=y"],
},
mali_debug: {
kbuild_options: [
@ -163,9 +161,7 @@ bob_defaults {
// (catch-all for experimental CS code without separating it into
// different features).
"MALI_INCREMENTAL_RENDERING_JM={{.incremental_rendering_jm}}",
"MALI_GPU_TIMESTAMP_CORRECTION={{.gpu_timestamp_correction}}",
"MALI_BASE_CSF_PERFORMANCE_TESTS={{.base_csf_performance_tests}}",
"MALI_GPU_TIMESTAMP_INTERPOLATION={{.gpu_timestamp_interpolation}}",
],
}
@ -184,6 +180,10 @@ bob_kernel_module {
"context/*.c",
"context/*.h",
"context/Kbuild",
"hwcnt/*.c",
"hwcnt/*.h",
"hwcnt/backend/*.h",
"hwcnt/Kbuild",
"ipa/*.c",
"ipa/*.h",
"ipa/Kbuild",
@ -217,6 +217,10 @@ bob_kernel_module {
"device/backend/*_jm.c",
"gpu/backend/*_jm.c",
"gpu/backend/*_jm.h",
"hwcnt/backend/*_jm.c",
"hwcnt/backend/*_jm.h",
"hwcnt/backend/*_jm_*.c",
"hwcnt/backend/*_jm_*.h",
"jm/*.h",
"tl/backend/*_jm.c",
"mmu/backend/*_jm.c",
@ -238,6 +242,10 @@ bob_kernel_module {
"device/backend/*_csf.c",
"gpu/backend/*_csf.c",
"gpu/backend/*_csf.h",
"hwcnt/backend/*_csf.c",
"hwcnt/backend/*_csf.h",
"hwcnt/backend/*_csf_*.c",
"hwcnt/backend/*_csf_*.h",
"tl/backend/*_csf.c",
"mmu/backend/*_csf.c",
"ipa/backend/*_csf.c",

View File

@ -26,7 +26,6 @@
#include <context/mali_kbase_context_internal.h>
#include <gpu/mali_kbase_gpu_regmap.h>
#include <mali_kbase.h>
#include <mali_kbase_dma_fence.h>
#include <mali_kbase_mem_linux.h>
#include <mali_kbase_mem_pool_group.h>
#include <mmu/mali_kbase_mmu.h>
@ -39,12 +38,14 @@
#include <csf/mali_kbase_csf_cpu_queue_debugfs.h>
#include <mali_kbase_debug_mem_view.h>
#include <mali_kbase_debug_mem_zones.h>
#include <mali_kbase_debug_mem_allocs.h>
#include <mali_kbase_mem_pool_debugfs.h>
void kbase_context_debugfs_init(struct kbase_context *const kctx)
{
kbase_debug_mem_view_init(kctx);
kbase_debug_mem_zones_init(kctx);
kbase_debug_mem_allocs_init(kctx);
kbase_mem_pool_debugfs_init(kctx->kctx_dentry, kctx);
kbase_jit_debugfs_init(kctx);
kbase_csf_queue_group_debugfs_init(kctx);

View File

@ -27,7 +27,6 @@
#include <gpu/mali_kbase_gpu_regmap.h>
#include <mali_kbase.h>
#include <mali_kbase_ctx_sched.h>
#include <mali_kbase_dma_fence.h>
#include <mali_kbase_kinstr_jm.h>
#include <mali_kbase_mem_linux.h>
#include <mali_kbase_mem_pool_group.h>
@ -37,12 +36,14 @@
#if IS_ENABLED(CONFIG_DEBUG_FS)
#include <mali_kbase_debug_mem_view.h>
#include <mali_kbase_debug_mem_zones.h>
#include <mali_kbase_debug_mem_allocs.h>
#include <mali_kbase_mem_pool_debugfs.h>
void kbase_context_debugfs_init(struct kbase_context *const kctx)
{
kbase_debug_mem_view_init(kctx);
kbase_debug_mem_zones_init(kctx);
kbase_debug_mem_allocs_init(kctx);
kbase_mem_pool_debugfs_init(kctx->kctx_dentry, kctx);
kbase_jit_debugfs_init(kctx);
kbasep_jd_debugfs_ctx_init(kctx);
@ -128,8 +129,6 @@ static const struct kbase_context_init context_init[] = {
{ NULL, kbase_context_free, NULL },
{ kbase_context_common_init, kbase_context_common_term,
"Common context initialization failed" },
{ kbase_dma_fence_init, kbase_dma_fence_term,
"DMA fence initialization failed" },
{ kbase_context_mem_pool_group_init, kbase_context_mem_pool_group_term,
"Memory pool group initialization failed" },
{ kbase_mem_evictable_init, kbase_mem_evictable_deinit,

View File

@ -165,7 +165,9 @@ int kbase_context_common_init(struct kbase_context *kctx)
atomic64_set(&kctx->num_fixed_allocs, 0);
#endif
kbase_gpu_vm_lock(kctx);
bitmap_copy(kctx->cookies, &cookies_mask, BITS_PER_LONG);
kbase_gpu_vm_unlock(kctx);
kctx->id = atomic_add_return(1, &(kctx->kbdev->ctx_num)) - 1;
@ -274,10 +276,8 @@ void kbase_context_common_term(struct kbase_context *kctx)
int kbase_context_mem_pool_group_init(struct kbase_context *kctx)
{
return kbase_mem_pool_group_init(&kctx->mem_pools,
kctx->kbdev,
&kctx->kbdev->mem_pool_defaults,
&kctx->kbdev->mem_pools);
return kbase_mem_pool_group_init(&kctx->mem_pools, kctx->kbdev,
&kctx->kbdev->mem_pool_defaults, &kctx->kbdev->mem_pools);
}
void kbase_context_mem_pool_group_term(struct kbase_context *kctx)

View File

@ -1,6 +1,6 @@
# SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
#
# (C) COPYRIGHT 2018-2021 ARM Limited. All rights reserved.
# (C) COPYRIGHT 2018-2022 ARM Limited. All rights reserved.
#
# This program is free software and is provided to you under the terms of the
# GNU General Public License version 2 as published by the Free Software
@ -34,12 +34,16 @@ bifrost_kbase-y += \
csf/mali_kbase_csf_protected_memory.o \
csf/mali_kbase_csf_tiler_heap_debugfs.o \
csf/mali_kbase_csf_cpu_queue_debugfs.o \
csf/mali_kbase_csf_event.o
csf/mali_kbase_csf_event.o \
csf/mali_kbase_csf_firmware_log.o \
csf/mali_kbase_csf_tiler_heap_reclaim.o
bifrost_kbase-$(CONFIG_MALI_REAL_HW) += csf/mali_kbase_csf_firmware.o
bifrost_kbase-$(CONFIG_MALI_BIFROST_NO_MALI) += csf/mali_kbase_csf_firmware_no_mali.o
bifrost_kbase-$(CONFIG_DEBUG_FS) += csf/mali_kbase_debug_csf_fault.o
ifeq ($(KBUILD_EXTMOD),)
# in-tree

View File

@ -28,8 +28,6 @@
* Status flags from the STATUS register of the IPA Control interface.
*/
#define STATUS_COMMAND_ACTIVE ((u32)1 << 0)
#define STATUS_TIMER_ACTIVE ((u32)1 << 1)
#define STATUS_AUTO_ACTIVE ((u32)1 << 2)
#define STATUS_PROTECTED_MODE ((u32)1 << 8)
#define STATUS_RESET ((u32)1 << 9)
#define STATUS_TIMER_ENABLED ((u32)1 << 31)
@ -37,9 +35,7 @@
/*
* Commands for the COMMAND register of the IPA Control interface.
*/
#define COMMAND_NOP ((u32)0)
#define COMMAND_APPLY ((u32)1)
#define COMMAND_CLEAR ((u32)2)
#define COMMAND_SAMPLE ((u32)3)
#define COMMAND_PROTECTED_ACK ((u32)4)
#define COMMAND_RESET_ACK ((u32)5)
@ -965,6 +961,43 @@ void kbase_ipa_control_handle_gpu_reset_post(struct kbase_device *kbdev)
}
KBASE_EXPORT_TEST_API(kbase_ipa_control_handle_gpu_reset_post);
#ifdef KBASE_PM_RUNTIME
void kbase_ipa_control_handle_gpu_sleep_enter(struct kbase_device *kbdev)
{
lockdep_assert_held(&kbdev->hwaccess_lock);
if (kbdev->pm.backend.mcu_state == KBASE_MCU_IN_SLEEP) {
/* GPU Sleep is treated as a power down */
kbase_ipa_control_handle_gpu_power_off(kbdev);
/* SELECT_CSHW register needs to be cleared to prevent any
* IPA control message to be sent to the top level GPU HWCNT.
*/
kbase_reg_write(kbdev, IPA_CONTROL_REG(SELECT_CSHW_LO), 0);
kbase_reg_write(kbdev, IPA_CONTROL_REG(SELECT_CSHW_HI), 0);
/* No need to issue the APPLY command here */
}
}
KBASE_EXPORT_TEST_API(kbase_ipa_control_handle_gpu_sleep_enter);
void kbase_ipa_control_handle_gpu_sleep_exit(struct kbase_device *kbdev)
{
lockdep_assert_held(&kbdev->hwaccess_lock);
if (kbdev->pm.backend.mcu_state == KBASE_MCU_IN_SLEEP) {
/* To keep things simple, currently exit from
* GPU Sleep is treated as a power on event where
* all 4 SELECT registers are reconfigured.
* On exit from sleep, reconfiguration is needed
* only for the SELECT_CSHW register.
*/
kbase_ipa_control_handle_gpu_power_on(kbdev);
}
}
KBASE_EXPORT_TEST_API(kbase_ipa_control_handle_gpu_sleep_exit);
#endif
#if MALI_UNIT_TEST
void kbase_ipa_control_rate_change_notify_test(struct kbase_device *kbdev,
u32 clk_index, u32 clk_rate_hz)

View File

@ -1,7 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
/*
*
* (C) COPYRIGHT 2020-2021 ARM Limited. All rights reserved.
* (C) COPYRIGHT 2020-2022 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@ -198,6 +198,33 @@ void kbase_ipa_control_handle_gpu_reset_pre(struct kbase_device *kbdev);
*/
void kbase_ipa_control_handle_gpu_reset_post(struct kbase_device *kbdev);
#ifdef KBASE_PM_RUNTIME
/**
* kbase_ipa_control_handle_gpu_sleep_enter - Handle the pre GPU Sleep event
*
* @kbdev: Pointer to kbase device.
*
* This function is called after MCU has been put to sleep state & L2 cache has
* been powered down. The top level part of GPU is still powered up when this
* function is called.
*/
void kbase_ipa_control_handle_gpu_sleep_enter(struct kbase_device *kbdev);
/**
* kbase_ipa_control_handle_gpu_sleep_exit - Handle the post GPU Sleep event
*
* @kbdev: Pointer to kbase device.
*
* This function is called when L2 needs to be powered up and MCU can exit the
* sleep state. The top level part of GPU is powered up when this function is
* called.
*
* This function must be called only if kbase_ipa_control_handle_gpu_sleep_enter()
* was called previously.
*/
void kbase_ipa_control_handle_gpu_sleep_exit(struct kbase_device *kbdev);
#endif
#if MALI_UNIT_TEST
/**
* kbase_ipa_control_rate_change_notify_test - Notify GPU rate change

View File

@ -348,9 +348,8 @@ int kbase_csf_alloc_command_stream_user_pages(struct kbase_context *kctx,
if (!reg)
return -ENOMEM;
ret = kbase_mem_pool_alloc_pages(
&kctx->mem_pools.small[KBASE_MEM_GROUP_CSF_IO],
num_pages, queue->phys, false);
ret = kbase_mem_pool_alloc_pages(&kctx->mem_pools.small[KBASE_MEM_GROUP_CSF_IO], num_pages,
queue->phys, false);
if (ret != num_pages)
goto phys_alloc_failed;
@ -374,8 +373,11 @@ int kbase_csf_alloc_command_stream_user_pages(struct kbase_context *kctx,
queue->db_file_offset = kbdev->csf.db_file_offsets;
kbdev->csf.db_file_offsets += BASEP_QUEUE_NR_MMAP_USER_PAGES;
#if (KERNEL_VERSION(4, 11, 0) > LINUX_VERSION_CODE)
WARN(atomic_read(&queue->refcount) != 1, "Incorrect refcounting for queue object\n");
#else
WARN(refcount_read(&queue->refcount) != 1, "Incorrect refcounting for queue object\n");
#endif
/* This is the second reference taken on the queue object and
* would be dropped only when the IO mapping is removed either
* explicitly by userspace or implicitly by kernel on process exit.
@ -444,25 +446,34 @@ static struct kbase_queue *find_queue(struct kbase_context *kctx, u64 base_addr)
static void get_queue(struct kbase_queue *queue)
{
#if (KERNEL_VERSION(4, 11, 0) > LINUX_VERSION_CODE)
WARN_ON(!atomic_inc_not_zero(&queue->refcount));
#else
WARN_ON(!refcount_inc_not_zero(&queue->refcount));
#endif
}
static void release_queue(struct kbase_queue *queue)
{
lockdep_assert_held(&queue->kctx->csf.lock);
WARN_ON(atomic_read(&queue->refcount) <= 0);
#if (KERNEL_VERSION(4, 11, 0) > LINUX_VERSION_CODE)
if (atomic_dec_and_test(&queue->refcount)) {
#else
if (refcount_dec_and_test(&queue->refcount)) {
#endif
/* The queue can't still be on the per context list. */
WARN_ON(!list_empty(&queue->link));
WARN_ON(queue->group);
dev_dbg(queue->kctx->kbdev->dev,
"Remove any pending command queue fatal from ctx %d_%d",
queue->kctx->tgid, queue->kctx->id);
kbase_csf_event_remove_error(queue->kctx, &queue->error);
kfree(queue);
}
}
static void oom_event_worker(struct work_struct *data);
static void fatal_event_worker(struct work_struct *data);
static void cs_error_worker(struct work_struct *data);
/* Between reg and reg_ex, one and only one must be null */
static int csf_queue_register_internal(struct kbase_context *kctx,
@ -565,7 +576,11 @@ static int csf_queue_register_internal(struct kbase_context *kctx,
queue->enabled = false;
queue->priority = reg->priority;
#if (KERNEL_VERSION(4, 11, 0) > LINUX_VERSION_CODE)
atomic_set(&queue->refcount, 1);
#else
refcount_set(&queue->refcount, 1);
#endif
queue->group = NULL;
queue->bind_state = KBASE_CSF_QUEUE_UNBOUND;
@ -588,7 +603,7 @@ static int csf_queue_register_internal(struct kbase_context *kctx,
INIT_LIST_HEAD(&queue->link);
INIT_LIST_HEAD(&queue->error.link);
INIT_WORK(&queue->oom_event_work, oom_event_worker);
INIT_WORK(&queue->fatal_event_work, fatal_event_worker);
INIT_WORK(&queue->cs_error_work, cs_error_worker);
list_add(&queue->link, &kctx->csf.queue_list);
queue->extract_ofs = 0;
@ -699,11 +714,6 @@ void kbase_csf_queue_terminate(struct kbase_context *kctx,
}
kbase_gpu_vm_unlock(kctx);
dev_dbg(kctx->kbdev->dev,
"Remove any pending command queue fatal from context %pK\n",
(void *)kctx);
kbase_csf_event_remove_error(kctx, &queue->error);
release_queue(queue);
}
@ -784,6 +794,11 @@ static struct kbase_queue_group *get_bound_queue_group(
return group;
}
static void enqueue_gpu_submission_work(struct kbase_context *const kctx)
{
queue_work(system_highpri_wq, &kctx->csf.pending_submission_work);
}
/**
* pending_submission_worker() - Work item to process pending kicked GPU command queues.
*
@ -813,11 +828,21 @@ static void pending_submission_worker(struct work_struct *work)
list_for_each_entry(queue, &kctx->csf.queue_list, link) {
if (atomic_cmpxchg(&queue->pending, 1, 0) == 1) {
struct kbase_queue_group *group = get_bound_queue_group(queue);
int ret;
if (!group || queue->bind_state != KBASE_CSF_QUEUE_BOUND)
if (!group || queue->bind_state != KBASE_CSF_QUEUE_BOUND) {
dev_dbg(kbdev->dev, "queue is not bound to a group");
else
WARN_ON(kbase_csf_scheduler_queue_start(queue));
continue;
}
ret = kbase_csf_scheduler_queue_start(queue);
if (unlikely(ret)) {
dev_dbg(kbdev->dev, "Failed to start queue");
if (ret == -EBUSY) {
atomic_cmpxchg(&queue->pending, 0, 1);
enqueue_gpu_submission_work(kctx);
}
}
}
}
@ -831,6 +856,8 @@ void kbase_csf_ring_csg_doorbell(struct kbase_device *kbdev, int slot)
if (WARN_ON(slot < 0))
return;
kbase_csf_scheduler_spin_lock_assert_held(kbdev);
kbase_csf_ring_csg_slots_doorbell(kbdev, (u32) (1 << slot));
}
@ -843,6 +870,8 @@ void kbase_csf_ring_csg_slots_doorbell(struct kbase_device *kbdev,
(u32) ((1U << kbdev->csf.global_iface.group_num) - 1);
u32 value;
kbase_csf_scheduler_spin_lock_assert_held(kbdev);
if (WARN_ON(slot_bitmap > allowed_bitmap))
return;
@ -872,6 +901,8 @@ void kbase_csf_ring_cs_kernel_doorbell(struct kbase_device *kbdev,
struct kbase_csf_cmd_stream_group_info *ginfo;
u32 value;
kbase_csf_scheduler_spin_lock_assert_held(kbdev);
if (WARN_ON(csg_nr < 0) ||
WARN_ON(csg_nr >= kbdev->csf.global_iface.group_num))
return;
@ -891,11 +922,6 @@ void kbase_csf_ring_cs_kernel_doorbell(struct kbase_device *kbdev,
kbase_csf_ring_csg_doorbell(kbdev, csg_nr);
}
static void enqueue_gpu_submission_work(struct kbase_context *const kctx)
{
queue_work(system_highpri_wq, &kctx->csf.pending_submission_work);
}
int kbase_csf_queue_kick(struct kbase_context *kctx,
struct kbase_ioctl_cs_queue_kick *kick)
{
@ -1129,9 +1155,8 @@ static int create_normal_suspend_buffer(struct kbase_context *const kctx,
}
/* Get physical page for a normal suspend buffer */
err = kbase_mem_pool_alloc_pages(
&kctx->mem_pools.small[KBASE_MEM_GROUP_CSF_FW],
nr_pages, &s_buf->phy[0], false);
err = kbase_mem_pool_alloc_pages(&kctx->mem_pools.small[KBASE_MEM_GROUP_CSF_FW], nr_pages,
&s_buf->phy[0], false);
if (err < 0)
goto phy_pages_alloc_failed;
@ -1362,6 +1387,11 @@ static int create_queue_group(struct kbase_context *const kctx,
group->cs_unrecoverable = false;
group->reevaluate_idle_status = false;
group->dvs_buf = create->in.dvs_buf;
#if IS_ENABLED(CONFIG_DEBUG_FS)
group->deschedule_deferred_cnt = 0;
#endif
group->group_uid = generate_group_uid();
create->out.group_uid = group->group_uid;
@ -1377,6 +1407,9 @@ static int create_queue_group(struct kbase_context *const kctx,
MAX_SUPPORTED_STREAMS_PER_GROUP);
group->run_state = KBASE_CSF_GROUP_INACTIVE;
KBASE_KTRACE_ADD_CSF_GRP(group->kctx->kbdev, CSF_GROUP_INACTIVE, group,
group->run_state);
err = create_suspend_buffers(kctx, group);
if (err < 0) {
@ -1396,6 +1429,17 @@ static int create_queue_group(struct kbase_context *const kctx,
return group_handle;
}
static bool dvs_supported(u32 csf_version)
{
if (GLB_VERSION_MAJOR_GET(csf_version) < 3)
return false;
if (GLB_VERSION_MAJOR_GET(csf_version) == 3)
if (GLB_VERSION_MINOR_GET(csf_version) < 2)
return false;
return true;
}
int kbase_csf_queue_group_create(struct kbase_context *const kctx,
union kbase_ioctl_cs_queue_group_create *const create)
@ -1434,8 +1478,17 @@ int kbase_csf_queue_group_create(struct kbase_context *const kctx,
dev_warn(kctx->kbdev->dev, "Unknown exception handler flags set: %u",
create->in.csi_handlers & ~BASE_CSF_EXCEPTION_HANDLER_FLAGS_MASK);
err = -EINVAL;
} else if (create->in.reserved) {
dev_warn(kctx->kbdev->dev, "Reserved field was set to non-0");
} else if (!dvs_supported(kctx->kbdev->csf.global_iface.version) &&
create->in.dvs_buf) {
dev_warn(
kctx->kbdev->dev,
"GPU does not support DVS but userspace is trying to use it");
err = -EINVAL;
} else if (dvs_supported(kctx->kbdev->csf.global_iface.version) &&
!CSG_DVS_BUF_BUFFER_POINTER_GET(create->in.dvs_buf) &&
CSG_DVS_BUF_BUFFER_SIZE_GET(create->in.dvs_buf)) {
dev_warn(kctx->kbdev->dev,
"DVS buffer pointer is null but size is not 0");
err = -EINVAL;
} else {
/* For the CSG which satisfies the condition for having
@ -1555,6 +1608,7 @@ void kbase_csf_term_descheduled_queue_group(struct kbase_queue_group *group)
&group->protected_suspend_buf);
group->run_state = KBASE_CSF_GROUP_TERMINATED;
KBASE_KTRACE_ADD_CSF_GRP(group->kctx->kbdev, CSF_GROUP_TERMINATED, group, group->run_state);
}
/**
@ -1585,6 +1639,34 @@ static void term_queue_group(struct kbase_queue_group *group)
kbase_csf_term_descheduled_queue_group(group);
}
/**
* wait_group_deferred_deschedule_completion - Wait for refcount of the group to
* become 0 that was taken when the group deschedule had to be deferred.
*
* @group: Pointer to GPU command queue group that is being deleted.
*
* This function is called when Userspace deletes the group and after the group
* has been descheduled. The function synchronizes with the other threads that were
* also trying to deschedule the group whilst the dumping was going on for a fault.
* Please refer the documentation of wait_for_dump_complete_on_group_deschedule()
* for more details.
*/
static void wait_group_deferred_deschedule_completion(struct kbase_queue_group *group)
{
#if IS_ENABLED(CONFIG_DEBUG_FS)
struct kbase_context *kctx = group->kctx;
lockdep_assert_held(&kctx->csf.lock);
if (likely(!group->deschedule_deferred_cnt))
return;
mutex_unlock(&kctx->csf.lock);
wait_event(kctx->kbdev->csf.event_wait, !group->deschedule_deferred_cnt);
mutex_lock(&kctx->csf.lock);
#endif
}
static void cancel_queue_group_events(struct kbase_queue_group *group)
{
cancel_work_sync(&group->timer_event_work);
@ -1626,24 +1708,39 @@ void kbase_csf_queue_group_terminate(struct kbase_context *kctx,
group = find_queue_group(kctx, group_handle);
if (group) {
remove_pending_group_fatal_error(group);
term_queue_group(group);
kctx->csf.queue_groups[group_handle] = NULL;
/* Stop the running of the given group */
term_queue_group(group);
mutex_unlock(&kctx->csf.lock);
if (reset_prevented) {
/* Allow GPU reset before cancelling the group specific
* work item to avoid potential deadlock.
* Reset prevention isn't needed after group termination.
*/
kbase_reset_gpu_allow(kbdev);
reset_prevented = false;
}
/* Cancel any pending event callbacks. If one is in progress
* then this thread waits synchronously for it to complete (which
* is why we must unlock the context first). We already ensured
* that no more callbacks can be enqueued by terminating the group.
*/
cancel_queue_group_events(group);
mutex_lock(&kctx->csf.lock);
/* Clean up after the termination */
remove_pending_group_fatal_error(group);
wait_group_deferred_deschedule_completion(group);
}
mutex_unlock(&kctx->csf.lock);
if (reset_prevented)
kbase_reset_gpu_allow(kbdev);
if (!group)
return;
/* Cancel any pending event callbacks. If one is in progress
* then this thread waits synchronously for it to complete (which
* is why we must unlock the context first). We already ensured
* that no more callbacks can be enqueued by terminating the group.
*/
cancel_queue_group_events(group);
kfree(group);
}
@ -1738,7 +1835,6 @@ void kbase_csf_active_queue_groups_reset(struct kbase_device *kbdev,
int kbase_csf_ctx_init(struct kbase_context *kctx)
{
struct kbase_device *kbdev = kctx->kbdev;
int err = -ENOMEM;
INIT_LIST_HEAD(&kctx->csf.queue_list);
@ -1747,19 +1843,6 @@ int kbase_csf_ctx_init(struct kbase_context *kctx)
kbase_csf_event_init(kctx);
kctx->csf.user_reg_vma = NULL;
mutex_lock(&kbdev->pm.lock);
/* The inode information for /dev/malixx file is not available at the
* time of device probe as the inode is created when the device node
* is created by udevd (through mknod).
*/
if (kctx->filp) {
if (!kbdev->csf.mali_file_inode)
kbdev->csf.mali_file_inode = kctx->filp->f_inode;
/* inode is unique for a file */
WARN_ON(kbdev->csf.mali_file_inode != kctx->filp->f_inode);
}
mutex_unlock(&kbdev->pm.lock);
/* Mark all the cookies as 'free' */
bitmap_fill(kctx->csf.cookies, KBASE_CSF_NUM_USER_IO_PAGES_HANDLE);
@ -1874,8 +1957,6 @@ void kbase_csf_ctx_term(struct kbase_context *kctx)
else
reset_prevented = true;
cancel_work_sync(&kctx->csf.pending_submission_work);
mutex_lock(&kctx->csf.lock);
/* Iterate through the queue groups that were not terminated by
@ -1894,6 +1975,8 @@ void kbase_csf_ctx_term(struct kbase_context *kctx)
if (reset_prevented)
kbase_reset_gpu_allow(kbdev);
cancel_work_sync(&kctx->csf.pending_submission_work);
/* Now that all queue groups have been terminated, there can be no
* more OoM or timer event interrupts but there can be inflight work
* items. Destroying the wq will implicitly flush those work items.
@ -1938,7 +2021,11 @@ void kbase_csf_ctx_term(struct kbase_context *kctx)
* only one reference left that was taken when queue was
* registered.
*/
#if (KERNEL_VERSION(4, 11, 0) > LINUX_VERSION_CODE)
if (atomic_read(&queue->refcount) != 1)
#else
if (refcount_read(&queue->refcount) != 1)
#endif
dev_warn(kctx->kbdev->dev,
"Releasing queue with incorrect refcounting!\n");
list_del_init(&queue->link);
@ -2059,6 +2146,36 @@ static void report_tiler_oom_error(struct kbase_queue_group *group)
kbase_event_wakeup(group->kctx);
}
static void flush_gpu_cache_on_fatal_error(struct kbase_device *kbdev)
{
int err;
const unsigned int cache_flush_wait_timeout_ms = 2000;
kbase_pm_lock(kbdev);
/* With the advent of partial cache flush, dirty cache lines could
* be left in the GPU L2 caches by terminating the queue group here
* without waiting for proper cache maintenance. A full cache flush
* here will prevent these dirty cache lines from being arbitrarily
* evicted later and possible causing memory corruption.
*/
if (kbdev->pm.backend.gpu_powered) {
kbase_gpu_start_cache_clean(kbdev, GPU_COMMAND_CACHE_CLN_INV_L2_LSC);
err = kbase_gpu_wait_cache_clean_timeout(kbdev, cache_flush_wait_timeout_ms);
if (err) {
dev_warn(
kbdev->dev,
"[%llu] Timeout waiting for cache clean to complete after fatal error",
kbase_backend_get_cycle_cnt(kbdev));
if (kbase_prepare_to_reset_gpu(kbdev, RESET_FLAGS_HWC_UNRECOVERABLE_ERROR))
kbase_reset_gpu(kbdev);
}
}
kbase_pm_unlock(kbdev);
}
/**
* kbase_queue_oom_event - Handle tiler out-of-memory for a GPU command queue.
*
@ -2071,8 +2188,8 @@ static void report_tiler_oom_error(struct kbase_queue_group *group)
* notification to allow the firmware to report out-of-memory again in future.
* If the out-of-memory condition was successfully handled then this function
* rings the relevant doorbell to notify the firmware; otherwise, it terminates
* the GPU command queue group to which the queue is bound. See
* term_queue_group() for details.
* the GPU command queue group to which the queue is bound and notify a waiting
* user space client of the failure.
*/
static void kbase_queue_oom_event(struct kbase_queue *const queue)
{
@ -2084,6 +2201,7 @@ static void kbase_queue_oom_event(struct kbase_queue *const queue)
struct kbase_csf_cmd_stream_info const *stream;
int csi_index = queue->csi_index;
u32 cs_oom_ack, cs_oom_req;
unsigned long flags;
lockdep_assert_held(&kctx->csf.lock);
@ -2129,20 +2247,23 @@ static void kbase_queue_oom_event(struct kbase_queue *const queue)
err = handle_oom_event(group, stream);
kbase_csf_scheduler_spin_lock(kbdev, &flags);
kbase_csf_firmware_cs_input_mask(stream, CS_REQ, cs_oom_ack,
CS_REQ_TILER_OOM_MASK);
kbase_csf_ring_cs_kernel_doorbell(kbdev, csi_index, slot_num, true);
kbase_csf_scheduler_spin_unlock(kbdev, flags);
if (err) {
if (unlikely(err)) {
dev_warn(
kbdev->dev,
"Queue group to be terminated, couldn't handle the OoM event\n");
kbase_debug_csf_fault_notify(kbdev, kctx, DF_TILER_OOM);
kbase_csf_scheduler_unlock(kbdev);
term_queue_group(group);
flush_gpu_cache_on_fatal_error(kbdev);
report_tiler_oom_error(group);
return;
}
kbase_csf_ring_cs_kernel_doorbell(kbdev, csi_index, slot_num, true);
unlock:
kbase_csf_scheduler_unlock(kbdev);
}
@ -2164,6 +2285,7 @@ static void oom_event_worker(struct work_struct *data)
struct kbase_device *const kbdev = kctx->kbdev;
int err = kbase_reset_gpu_try_prevent(kbdev);
/* Regardless of whether reset failed or is currently happening, exit
* early
*/
@ -2216,12 +2338,13 @@ static void timer_event_worker(struct work_struct *data)
struct kbase_queue_group *const group =
container_of(data, struct kbase_queue_group, timer_event_work);
struct kbase_context *const kctx = group->kctx;
struct kbase_device *const kbdev = kctx->kbdev;
bool reset_prevented = false;
int err = kbase_reset_gpu_prevent_and_wait(kctx->kbdev);
int err = kbase_reset_gpu_prevent_and_wait(kbdev);
if (err)
dev_warn(
kctx->kbdev->dev,
kbdev->dev,
"Unsuccessful GPU reset detected when terminating group %d on progress timeout, attempting to terminate regardless",
group->handle);
else
@ -2230,11 +2353,12 @@ static void timer_event_worker(struct work_struct *data)
mutex_lock(&kctx->csf.lock);
term_queue_group(group);
flush_gpu_cache_on_fatal_error(kbdev);
report_group_timeout_error(group);
mutex_unlock(&kctx->csf.lock);
if (reset_prevented)
kbase_reset_gpu_allow(kctx->kbdev);
kbase_reset_gpu_allow(kbdev);
}
/**
@ -2242,11 +2366,15 @@ static void timer_event_worker(struct work_struct *data)
*
* @group: Pointer to GPU queue group for which the timeout event is received.
*
* Notify a waiting user space client of the timeout.
* Enqueue a work item to terminate the group and notify the event notification
* thread of progress timeout fault for the GPU command queue group.
*/
static void handle_progress_timer_event(struct kbase_queue_group *const group)
{
kbase_debug_csf_fault_notify(group->kctx->kbdev, group->kctx,
DF_PROGRESS_TIMER_TIMEOUT);
queue_work(group->kctx->csf.wq, &group->timer_event_work);
}
@ -2274,16 +2402,20 @@ static void protm_event_worker(struct work_struct *data)
* handle_fault_event - Handler for CS fault.
*
* @queue: Pointer to queue for which fault event was received.
* @stream: Pointer to the structure containing info provided by the
* firmware about the CSI.
*
* Prints meaningful CS fault information.
* @cs_ack: Value of the CS_ACK register in the CS kernel input page used for
* the queue.
*
* Print required information about the CS fault and notify the user space client
* about the fault.
*/
static void
handle_fault_event(struct kbase_queue *const queue,
struct kbase_csf_cmd_stream_info const *const stream)
handle_fault_event(struct kbase_queue *const queue, const u32 cs_ack)
{
struct kbase_device *const kbdev = queue->kctx->kbdev;
struct kbase_csf_cmd_stream_group_info const *ginfo =
&kbdev->csf.global_iface.groups[queue->group->csg_nr];
struct kbase_csf_cmd_stream_info const *stream =
&ginfo->streams[queue->csi_index];
const u32 cs_fault = kbase_csf_firmware_cs_output(stream, CS_FAULT);
const u64 cs_fault_info =
kbase_csf_firmware_cs_output(stream, CS_FAULT_INFO_LO) |
@ -2295,7 +2427,6 @@ handle_fault_event(struct kbase_queue *const queue,
CS_FAULT_EXCEPTION_DATA_GET(cs_fault);
const u64 cs_fault_info_exception_data =
CS_FAULT_INFO_EXCEPTION_DATA_GET(cs_fault_info);
struct kbase_device *const kbdev = queue->kctx->kbdev;
kbase_csf_scheduler_spin_lock_assert_held(kbdev);
@ -2310,6 +2441,36 @@ handle_fault_event(struct kbase_queue *const queue,
kbase_gpu_exception_name(cs_fault_exception_type),
cs_fault_exception_data, cs_fault_info_exception_data);
#if IS_ENABLED(CONFIG_DEBUG_FS)
/* CS_RESOURCE_TERMINATED type fault event can be ignored from the
* standpoint of dump on error. It is used to report fault for the CSIs
* that are associated with the same CSG as the CSI for which the actual
* fault was reported by the Iterator.
* Dumping would be triggered when the actual fault is reported.
*
* CS_INHERIT_FAULT can also be ignored. It could happen due to the error
* in other types of queues (cpu/kcpu). If a fault had occurred in some
* other GPU queue then the dump would have been performed anyways when
* that fault was reported.
*/
if ((cs_fault_exception_type != CS_FAULT_EXCEPTION_TYPE_CS_INHERIT_FAULT) &&
(cs_fault_exception_type != CS_FAULT_EXCEPTION_TYPE_CS_RESOURCE_TERMINATED)) {
if (unlikely(kbase_debug_csf_fault_notify(kbdev, queue->kctx, DF_CS_FAULT))) {
get_queue(queue);
queue->cs_error = cs_fault;
queue->cs_error_info = cs_fault_info;
queue->cs_error_fatal = false;
if (!queue_work(queue->kctx->csf.wq, &queue->cs_error_work))
release_queue(queue);
return;
}
}
#endif
kbase_csf_firmware_cs_input_mask(stream, CS_REQ, cs_ack,
CS_REQ_FAULT_MASK);
kbase_csf_ring_cs_kernel_doorbell(kbdev, queue->csi_index, queue->group->csg_nr, true);
}
static void report_queue_fatal_error(struct kbase_queue *const queue,
@ -2341,16 +2502,16 @@ static void report_queue_fatal_error(struct kbase_queue *const queue,
}
/**
* fatal_event_worker - Handle the fatal error for the GPU queue
* fatal_event_worker - Handle the CS_FATAL/CS_FAULT error for the GPU queue
*
* @data: Pointer to a work_struct embedded in GPU command queue.
*
* Terminate the CSG and report the error to userspace.
*/
static void fatal_event_worker(struct work_struct *const data)
static void cs_error_worker(struct work_struct *const data)
{
struct kbase_queue *const queue =
container_of(data, struct kbase_queue, fatal_event_work);
container_of(data, struct kbase_queue, cs_error_work);
struct kbase_context *const kctx = queue->kctx;
struct kbase_device *const kbdev = kctx->kbdev;
struct kbase_queue_group *group;
@ -2365,6 +2526,7 @@ static void fatal_event_worker(struct work_struct *const data)
else
reset_prevented = true;
kbase_debug_csf_fault_wait_completion(kbdev);
mutex_lock(&kctx->csf.lock);
group = get_bound_queue_group(queue);
@ -2373,9 +2535,35 @@ static void fatal_event_worker(struct work_struct *const data)
goto unlock;
}
#if IS_ENABLED(CONFIG_DEBUG_FS)
if (!queue->cs_error_fatal) {
unsigned long flags;
int slot_num;
kbase_csf_scheduler_spin_lock(kbdev, &flags);
slot_num = kbase_csf_scheduler_group_get_slot_locked(group);
if (slot_num >= 0) {
struct kbase_csf_cmd_stream_group_info const *ginfo =
&kbdev->csf.global_iface.groups[slot_num];
struct kbase_csf_cmd_stream_info const *stream =
&ginfo->streams[queue->csi_index];
u32 const cs_ack =
kbase_csf_firmware_cs_output(stream, CS_ACK);
kbase_csf_firmware_cs_input_mask(stream, CS_REQ, cs_ack,
CS_REQ_FAULT_MASK);
kbase_csf_ring_cs_kernel_doorbell(kbdev, queue->csi_index,
slot_num, true);
}
kbase_csf_scheduler_spin_unlock(kbdev, flags);
goto unlock;
}
#endif
group_handle = group->handle;
term_queue_group(group);
report_queue_fatal_error(queue, queue->cs_fatal, queue->cs_fatal_info,
flush_gpu_cache_on_fatal_error(kbdev);
report_queue_fatal_error(queue, queue->cs_error, queue->cs_error_info,
group_handle);
unlock:
@ -2391,14 +2579,18 @@ static void fatal_event_worker(struct work_struct *const data)
* @queue: Pointer to queue for which fatal event was received.
* @stream: Pointer to the structure containing info provided by the
* firmware about the CSI.
* @cs_ack: Value of the CS_ACK register in the CS kernel input page used for
* the queue.
*
* Prints meaningful CS fatal information.
* Notify a waiting user space client of the CS fatal and prints meaningful
* information.
* Enqueue a work item to terminate the group and report the fatal error
* to user space.
*/
static void
handle_fatal_event(struct kbase_queue *const queue,
struct kbase_csf_cmd_stream_info const *const stream)
struct kbase_csf_cmd_stream_info const *const stream,
u32 cs_ack)
{
const u32 cs_fatal = kbase_csf_firmware_cs_output(stream, CS_FATAL);
const u64 cs_fatal_info =
@ -2428,57 +2620,26 @@ handle_fatal_event(struct kbase_queue *const queue,
if (cs_fatal_exception_type ==
CS_FATAL_EXCEPTION_TYPE_FIRMWARE_INTERNAL_ERROR) {
kbase_debug_csf_fault_notify(kbdev, queue->kctx, DF_FW_INTERNAL_ERROR);
queue_work(system_wq, &kbdev->csf.fw_error_work);
} else {
kbase_debug_csf_fault_notify(kbdev, queue->kctx, DF_CS_FATAL);
if (cs_fatal_exception_type == CS_FATAL_EXCEPTION_TYPE_CS_UNRECOVERABLE) {
queue->group->cs_unrecoverable = true;
if (kbase_prepare_to_reset_gpu(queue->kctx->kbdev, RESET_FLAGS_NONE))
kbase_reset_gpu(queue->kctx->kbdev);
}
get_queue(queue);
queue->cs_fatal = cs_fatal;
queue->cs_fatal_info = cs_fatal_info;
if (!queue_work(queue->kctx->csf.wq, &queue->fatal_event_work))
queue->cs_error = cs_fatal;
queue->cs_error_info = cs_fatal_info;
queue->cs_error_fatal = true;
if (!queue_work(queue->kctx->csf.wq, &queue->cs_error_work))
release_queue(queue);
}
}
kbase_csf_firmware_cs_input_mask(stream, CS_REQ, cs_ack,
CS_REQ_FATAL_MASK);
/**
* handle_queue_exception_event - Handler for CS fatal/fault exception events.
*
* @queue: Pointer to queue for which fatal/fault event was received.
* @cs_req: Value of the CS_REQ register from the CS's input page.
* @cs_ack: Value of the CS_ACK register from the CS's output page.
*/
static void handle_queue_exception_event(struct kbase_queue *const queue,
const u32 cs_req, const u32 cs_ack)
{
struct kbase_csf_cmd_stream_group_info const *ginfo;
struct kbase_csf_cmd_stream_info const *stream;
struct kbase_context *const kctx = queue->kctx;
struct kbase_device *const kbdev = kctx->kbdev;
struct kbase_queue_group *group = queue->group;
int csi_index = queue->csi_index;
int slot_num = group->csg_nr;
kbase_csf_scheduler_spin_lock_assert_held(kbdev);
ginfo = &kbdev->csf.global_iface.groups[slot_num];
stream = &ginfo->streams[csi_index];
if ((cs_ack & CS_ACK_FATAL_MASK) != (cs_req & CS_REQ_FATAL_MASK)) {
handle_fatal_event(queue, stream);
kbase_csf_firmware_cs_input_mask(stream, CS_REQ, cs_ack,
CS_REQ_FATAL_MASK);
}
if ((cs_ack & CS_ACK_FAULT_MASK) != (cs_req & CS_REQ_FAULT_MASK)) {
handle_fault_event(queue, stream);
kbase_csf_firmware_cs_input_mask(stream, CS_REQ, cs_ack,
CS_REQ_FAULT_MASK);
kbase_csf_ring_cs_kernel_doorbell(kbdev, csi_index, slot_num, true);
}
}
/**
@ -2531,11 +2692,16 @@ static void process_cs_interrupts(struct kbase_queue_group *const group,
kbase_csf_firmware_cs_output(stream, CS_ACK);
struct workqueue_struct *wq = group->kctx->csf.wq;
if ((cs_req & CS_REQ_EXCEPTION_MASK) ^
(cs_ack & CS_ACK_EXCEPTION_MASK)) {
if ((cs_ack & CS_ACK_FATAL_MASK) != (cs_req & CS_REQ_FATAL_MASK)) {
KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, CSI_INTERRUPT_FAULT,
group, queue, cs_req ^ cs_ack);
handle_queue_exception_event(queue, cs_req, cs_ack);
handle_fatal_event(queue, stream, cs_ack);
}
if ((cs_ack & CS_ACK_FAULT_MASK) != (cs_req & CS_REQ_FAULT_MASK)) {
KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, CSI_INTERRUPT_FAULT,
group, queue, cs_req ^ cs_ack);
handle_fault_event(queue, cs_ack);
}
/* PROTM_PEND and TILER_OOM can be safely ignored
@ -2597,6 +2763,8 @@ static void process_cs_interrupts(struct kbase_queue_group *const group,
if (test_bit(group->csg_nr, scheduler->csg_slots_idle_mask)) {
clear_bit(group->csg_nr,
scheduler->csg_slots_idle_mask);
KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_SLOT_IDLE_CLEAR, group,
scheduler->csg_slots_idle_mask[0]);
dev_dbg(kbdev->dev,
"Group-%d on slot %d de-idled by protm request",
group->handle, group->csg_nr);
@ -2698,7 +2866,12 @@ static void process_csg_interrupts(struct kbase_device *const kbdev, int const c
/* If there are non-idle CSGs waiting for a slot, fire
* a tock for a replacement.
*/
mod_delayed_work(scheduler->wq, &scheduler->tock_work, 0);
KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_INTERRUPT_NON_IDLE_GROUPS,
group, req ^ ack);
kbase_csf_scheduler_invoke_tock(kbdev);
} else {
KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_INTERRUPT_NO_NON_IDLE_GROUPS,
group, req ^ ack);
}
if (group->scan_seq_num < track->idle_seq) {
@ -2709,14 +2882,15 @@ static void process_csg_interrupts(struct kbase_device *const kbdev, int const c
if ((req ^ ack) & CSG_REQ_PROGRESS_TIMER_EVENT_MASK) {
kbase_csf_firmware_csg_input_mask(ginfo, CSG_REQ, ack,
CSG_REQ_PROGRESS_TIMER_EVENT_MASK);
CSG_REQ_PROGRESS_TIMER_EVENT_MASK);
KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_INTERRUPT_PROGRESS_TIMER_EVENT,
group, req ^ ack);
dev_info(kbdev->dev,
KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_INTERRUPT_PROGRESS_TIMER_EVENT, group,
req ^ ack);
dev_info(
kbdev->dev,
"[%llu] Iterator PROGRESS_TIMER timeout notification received for group %u of ctx %d_%d on slot %d\n",
kbase_backend_get_cycle_cnt(kbdev),
group->handle, group->kctx->tgid, group->kctx->id, csg_nr);
kbase_backend_get_cycle_cnt(kbdev), group->handle, group->kctx->tgid,
group->kctx->id, csg_nr);
handle_progress_timer_event(group);
}
@ -2904,7 +3078,7 @@ static inline void process_tracked_info_for_protm(struct kbase_device *kbdev,
* for the scheduler to re-examine the case.
*/
dev_dbg(kbdev->dev, "Attempt pending protm from idle slot %d\n", track->idle_slot);
mod_delayed_work(scheduler->wq, &scheduler->tock_work, 0);
kbase_csf_scheduler_invoke_tock(kbdev);
} else if (group) {
u32 i, num_groups = kbdev->csf.global_iface.group_num;
struct kbase_queue_group *grp;
@ -2927,7 +3101,7 @@ static inline void process_tracked_info_for_protm(struct kbase_device *kbdev,
tock_triggered = true;
dev_dbg(kbdev->dev,
"Attempt new protm from tick/tock idle slot %d\n", i);
mod_delayed_work(scheduler->wq, &scheduler->tock_work, 0);
kbase_csf_scheduler_invoke_tock(kbdev);
break;
}
}
@ -2940,77 +3114,133 @@ static inline void process_tracked_info_for_protm(struct kbase_device *kbdev,
}
}
static void order_job_irq_clear_with_iface_mem_read(void)
{
/* Ensure that write to the JOB_IRQ_CLEAR is ordered with regards to the
* read from interface memory. The ordering is needed considering the way
* FW & Kbase writes to the JOB_IRQ_RAWSTAT and JOB_IRQ_CLEAR registers
* without any synchronization. Without the barrier there is no guarantee
* about the ordering, the write to IRQ_CLEAR can take effect after the read
* from interface memory and that could cause a problem for the scenario where
* FW sends back to back notifications for the same CSG for events like
* SYNC_UPDATE and IDLE, but Kbase gets a single IRQ and observes only the
* first event. Similar thing can happen with glb events like CFG_ALLOC_EN
* acknowledgment and GPU idle notification.
*
* MCU CPU
* --------------- ----------------
* Update interface memory Write to IRQ_CLEAR to clear current IRQ
* <barrier> <barrier>
* Write to IRQ_RAWSTAT to raise new IRQ Read interface memory
*/
/* CPU and GPU would be in the same Outer shareable domain */
dmb(osh);
}
void kbase_csf_interrupt(struct kbase_device *kbdev, u32 val)
{
unsigned long flags;
u32 csg_interrupts = val & ~JOB_IRQ_GLOBAL_IF;
struct irq_idle_and_protm_track track = { .protm_grp = NULL, .idle_seq = U32_MAX };
bool deferred_handling_glb_idle_irq = false;
lockdep_assert_held(&kbdev->hwaccess_lock);
KBASE_KTRACE_ADD(kbdev, CSF_INTERRUPT_START, NULL, val);
kbase_reg_write(kbdev, JOB_CONTROL_REG(JOB_IRQ_CLEAR), val);
if (csg_interrupts != 0) {
kbase_csf_scheduler_spin_lock(kbdev, &flags);
/* Looping through and track the highest idle and protm groups */
while (csg_interrupts != 0) {
int const csg_nr = ffs(csg_interrupts) - 1;
do {
unsigned long flags;
u32 csg_interrupts = val & ~JOB_IRQ_GLOBAL_IF;
struct irq_idle_and_protm_track track = { .protm_grp = NULL, .idle_seq = U32_MAX };
bool glb_idle_irq_received = false;
process_csg_interrupts(kbdev, csg_nr, &track);
csg_interrupts &= ~(1 << csg_nr);
kbase_reg_write(kbdev, JOB_CONTROL_REG(JOB_IRQ_CLEAR), val);
order_job_irq_clear_with_iface_mem_read();
if (csg_interrupts != 0) {
kbase_csf_scheduler_spin_lock(kbdev, &flags);
/* Looping through and track the highest idle and protm groups */
while (csg_interrupts != 0) {
int const csg_nr = ffs(csg_interrupts) - 1;
process_csg_interrupts(kbdev, csg_nr, &track);
csg_interrupts &= ~(1 << csg_nr);
}
/* Handle protm from the tracked information */
process_tracked_info_for_protm(kbdev, &track);
kbase_csf_scheduler_spin_unlock(kbdev, flags);
}
/* Handle protm from the tracked information */
process_tracked_info_for_protm(kbdev, &track);
kbase_csf_scheduler_spin_unlock(kbdev, flags);
}
if (val & JOB_IRQ_GLOBAL_IF) {
const struct kbase_csf_global_iface *const global_iface =
&kbdev->csf.global_iface;
if (val & JOB_IRQ_GLOBAL_IF) {
const struct kbase_csf_global_iface *const global_iface =
&kbdev->csf.global_iface;
kbdev->csf.interrupt_received = true;
kbdev->csf.interrupt_received = true;
if (!kbdev->csf.firmware_reloaded)
kbase_csf_firmware_reload_completed(kbdev);
else if (global_iface->output) {
u32 glb_req, glb_ack;
if (!kbdev->csf.firmware_reloaded)
kbase_csf_firmware_reload_completed(kbdev);
else if (global_iface->output) {
u32 glb_req, glb_ack;
kbase_csf_scheduler_spin_lock(kbdev, &flags);
glb_req =
kbase_csf_firmware_global_input_read(global_iface, GLB_REQ);
glb_ack = kbase_csf_firmware_global_output(global_iface, GLB_ACK);
KBASE_KTRACE_ADD(kbdev, CSF_INTERRUPT_GLB_REQ_ACK, NULL,
glb_req ^ glb_ack);
kbase_csf_scheduler_spin_lock(kbdev, &flags);
glb_req = kbase_csf_firmware_global_input_read(
global_iface, GLB_REQ);
glb_ack = kbase_csf_firmware_global_output(
global_iface, GLB_ACK);
KBASE_KTRACE_ADD(kbdev, CSF_INTERRUPT_GLB_REQ_ACK, NULL, glb_req ^ glb_ack);
check_protm_enter_req_complete(kbdev, glb_req, glb_ack);
check_protm_enter_req_complete(kbdev, glb_req, glb_ack);
if ((glb_req ^ glb_ack) & GLB_REQ_PROTM_EXIT_MASK)
process_protm_exit(kbdev, glb_ack);
if ((glb_req ^ glb_ack) & GLB_REQ_PROTM_EXIT_MASK)
process_protm_exit(kbdev, glb_ack);
/* Handle IDLE Hysteresis notification event */
if ((glb_req ^ glb_ack) & GLB_REQ_IDLE_EVENT_MASK) {
dev_dbg(kbdev->dev, "Idle-hysteresis event flagged");
kbase_csf_firmware_global_input_mask(
/* Handle IDLE Hysteresis notification event */
if ((glb_req ^ glb_ack) & GLB_REQ_IDLE_EVENT_MASK) {
dev_dbg(kbdev->dev, "Idle-hysteresis event flagged");
kbase_csf_firmware_global_input_mask(
global_iface, GLB_REQ, glb_ack,
GLB_REQ_IDLE_EVENT_MASK);
kbase_csf_scheduler_process_gpu_idle_event(kbdev);
glb_idle_irq_received = true;
/* Defer handling this IRQ to account for a race condition
* where the idle worker could be executed before we have
* finished handling all pending IRQs (including CSG IDLE
* IRQs).
*/
deferred_handling_glb_idle_irq = true;
}
process_prfcnt_interrupts(kbdev, glb_req, glb_ack);
kbase_csf_scheduler_spin_unlock(kbdev, flags);
/* Invoke the MCU state machine as a state transition
* might have completed.
*/
kbase_pm_update_state(kbdev);
}
process_prfcnt_interrupts(kbdev, glb_req, glb_ack);
kbase_csf_scheduler_spin_unlock(kbdev, flags);
/* Invoke the MCU state machine as a state transition
* might have completed.
*/
kbase_pm_update_state(kbdev);
}
if (!glb_idle_irq_received)
break;
/* Attempt to serve potential IRQs that might have occurred
* whilst handling the previous IRQ. In case we have observed
* the GLB IDLE IRQ without all CSGs having been marked as
* idle, the GPU would be treated as no longer idle and left
* powered on.
*/
val = kbase_reg_read(kbdev, JOB_CONTROL_REG(JOB_IRQ_STATUS));
} while (val);
if (deferred_handling_glb_idle_irq) {
unsigned long flags;
kbase_csf_scheduler_spin_lock(kbdev, &flags);
kbase_csf_scheduler_process_gpu_idle_event(kbdev);
kbase_csf_scheduler_spin_unlock(kbdev, flags);
}
wake_up_all(&kbdev->csf.event_wait);
KBASE_KTRACE_ADD(kbdev, CSF_INTERRUPT_END, NULL, val);
}
@ -3037,9 +3267,8 @@ int kbase_csf_doorbell_mapping_init(struct kbase_device *kbdev)
if (IS_ERR(filp))
return PTR_ERR(filp);
ret = kbase_mem_pool_alloc_pages(
&kbdev->mem_pools.small[KBASE_MEM_GROUP_CSF_FW],
1, &phys, false);
ret = kbase_mem_pool_alloc_pages(&kbdev->mem_pools.small[KBASE_MEM_GROUP_CSF_FW], 1, &phys,
false);
if (ret <= 0) {
fput(filp);
@ -3073,9 +3302,8 @@ int kbase_csf_setup_dummy_user_reg_page(struct kbase_device *kbdev)
kbdev->csf.dummy_user_reg_page = as_tagged(0);
ret = kbase_mem_pool_alloc_pages(
&kbdev->mem_pools.small[KBASE_MEM_GROUP_CSF_FW], 1, &phys,
false);
ret = kbase_mem_pool_alloc_pages(&kbdev->mem_pools.small[KBASE_MEM_GROUP_CSF_FW], 1, &phys,
false);
if (ret <= 0)
return ret;

View File

@ -23,12 +23,135 @@
#include <mali_kbase.h>
#include <linux/seq_file.h>
#include <linux/delay.h>
#include <csf/mali_kbase_csf_trace_buffer.h>
#include <backend/gpu/mali_kbase_pm_internal.h>
#if IS_ENABLED(CONFIG_DEBUG_FS)
#include "mali_kbase_csf_tl_reader.h"
/* Wait time to be used cumulatively for all the CSG slots.
* Since scheduler lock is held when STATUS_UPDATE request is sent, there won't be
* any other Host request pending on the FW side and usually FW would be responsive
* to the Doorbell IRQs as it won't do any polling for a long time and also it won't
* have to wait for any HW state transition to complete for publishing the status.
* So it is reasonable to expect that handling of STATUS_UPDATE request would be
* relatively very quick.
*/
#define STATUS_UPDATE_WAIT_TIMEOUT 500
/* The bitmask of CSG slots for which the STATUS_UPDATE request completed.
* The access to it is serialized with scheduler lock, so at a time it would
* get used either for "active_groups" or per context "groups" debugfs file.
*/
static DECLARE_BITMAP(csg_slots_status_updated, MAX_SUPPORTED_CSGS);
static
bool csg_slot_status_update_finish(struct kbase_device *kbdev, u32 csg_nr)
{
struct kbase_csf_cmd_stream_group_info const *const ginfo =
&kbdev->csf.global_iface.groups[csg_nr];
return !((kbase_csf_firmware_csg_input_read(ginfo, CSG_REQ) ^
kbase_csf_firmware_csg_output(ginfo, CSG_ACK)) &
CSG_REQ_STATUS_UPDATE_MASK);
}
static
bool csg_slots_status_update_finish(struct kbase_device *kbdev,
const unsigned long *slots_mask)
{
const u32 max_csg_slots = kbdev->csf.global_iface.group_num;
bool changed = false;
u32 csg_nr;
lockdep_assert_held(&kbdev->csf.scheduler.lock);
for_each_set_bit(csg_nr, slots_mask, max_csg_slots) {
if (csg_slot_status_update_finish(kbdev, csg_nr)) {
set_bit(csg_nr, csg_slots_status_updated);
changed = true;
}
}
return changed;
}
static void wait_csg_slots_status_update_finish(struct kbase_device *kbdev,
unsigned long *slots_mask)
{
const u32 max_csg_slots = kbdev->csf.global_iface.group_num;
long remaining = kbase_csf_timeout_in_jiffies(STATUS_UPDATE_WAIT_TIMEOUT);
lockdep_assert_held(&kbdev->csf.scheduler.lock);
bitmap_zero(csg_slots_status_updated, max_csg_slots);
while (!bitmap_empty(slots_mask, max_csg_slots) && remaining) {
remaining = wait_event_timeout(kbdev->csf.event_wait,
csg_slots_status_update_finish(kbdev, slots_mask),
remaining);
if (likely(remaining)) {
bitmap_andnot(slots_mask, slots_mask,
csg_slots_status_updated, max_csg_slots);
} else {
dev_warn(kbdev->dev,
"STATUS_UPDATE request timed out for slots 0x%lx",
slots_mask[0]);
}
}
}
static void update_active_groups_status(struct kbase_device *kbdev, struct seq_file *file)
{
u32 max_csg_slots = kbdev->csf.global_iface.group_num;
DECLARE_BITMAP(used_csgs, MAX_SUPPORTED_CSGS) = { 0 };
u32 csg_nr;
unsigned long flags;
lockdep_assert_held(&kbdev->csf.scheduler.lock);
/* Global doorbell ring for CSG STATUS_UPDATE request or User doorbell
* ring for Extract offset update, shall not be made when MCU has been
* put to sleep otherwise it will undesirably make MCU exit the sleep
* state. Also it isn't really needed as FW will implicitly update the
* status of all on-slot groups when MCU sleep request is sent to it.
*/
if (kbdev->csf.scheduler.state == SCHED_SLEEPING) {
bitmap_copy(csg_slots_status_updated,
kbdev->csf.scheduler.csg_inuse_bitmap, max_csg_slots);
return;
}
for (csg_nr = 0; csg_nr < max_csg_slots; csg_nr++) {
struct kbase_queue_group *const group =
kbdev->csf.scheduler.csg_slots[csg_nr].resident_group;
if (!group)
continue;
/* Ring the User doorbell for FW to update the Extract offset */
kbase_csf_ring_doorbell(kbdev, group->doorbell_nr);
set_bit(csg_nr, used_csgs);
}
/* Return early if there are no on-slot groups */
if (bitmap_empty(used_csgs, max_csg_slots))
return;
kbase_csf_scheduler_spin_lock(kbdev, &flags);
for_each_set_bit(csg_nr, used_csgs, max_csg_slots) {
struct kbase_csf_cmd_stream_group_info const *const ginfo =
&kbdev->csf.global_iface.groups[csg_nr];
kbase_csf_firmware_csg_input_mask(ginfo, CSG_REQ,
~kbase_csf_firmware_csg_output(ginfo, CSG_ACK),
CSG_REQ_STATUS_UPDATE_MASK);
}
BUILD_BUG_ON(MAX_SUPPORTED_CSGS > (sizeof(used_csgs[0]) * BITS_PER_BYTE));
kbase_csf_ring_csg_slots_doorbell(kbdev, used_csgs[0]);
kbase_csf_scheduler_spin_unlock(kbdev, flags);
wait_csg_slots_status_update_finish(kbdev, used_csgs);
/* Wait for the User doobell ring to take effect */
msleep(100);
}
#define MAX_SCHED_STATE_STRING_LEN (16)
static const char *scheduler_state_to_string(struct kbase_device *kbdev,
enum kbase_csf_scheduler_state sched_state)
@ -77,16 +200,32 @@ static const char *blocked_reason_to_string(u32 reason_id)
return cs_blocked_reason[reason_id];
}
static bool sb_source_supported(u32 glb_version)
{
bool supported = false;
if (((GLB_VERSION_MAJOR_GET(glb_version) == 3) &&
(GLB_VERSION_MINOR_GET(glb_version) >= 5)) ||
((GLB_VERSION_MAJOR_GET(glb_version) == 2) &&
(GLB_VERSION_MINOR_GET(glb_version) >= 6)) ||
((GLB_VERSION_MAJOR_GET(glb_version) == 1) &&
(GLB_VERSION_MINOR_GET(glb_version) >= 3)))
supported = true;
return supported;
}
static void kbasep_csf_scheduler_dump_active_queue_cs_status_wait(
struct seq_file *file, u32 wait_status, u32 wait_sync_value,
u64 wait_sync_live_value, u64 wait_sync_pointer, u32 sb_status,
u32 blocked_reason)
struct seq_file *file, u32 glb_version, u32 wait_status, u32 wait_sync_value,
u64 wait_sync_live_value, u64 wait_sync_pointer, u32 sb_status, u32 blocked_reason)
{
#define WAITING "Waiting"
#define NOT_WAITING "Not waiting"
seq_printf(file, "SB_MASK: %d\n",
CS_STATUS_WAIT_SB_MASK_GET(wait_status));
if (sb_source_supported(glb_version))
seq_printf(file, "SB_SOURCE: %d\n", CS_STATUS_WAIT_SB_SOURCE_GET(wait_status));
seq_printf(file, "PROGRESS_WAIT: %s\n",
CS_STATUS_WAIT_PROGRESS_WAIT_GET(wait_status) ?
WAITING : NOT_WAITING);
@ -156,10 +295,13 @@ static void kbasep_csf_scheduler_dump_active_queue(struct seq_file *file,
struct kbase_vmap_struct *mapping;
u64 *evt;
u64 wait_sync_live_value;
u32 glb_version;
if (!queue)
return;
glb_version = queue->kctx->kbdev->csf.global_iface.version;
if (WARN_ON(queue->csi_index == KBASEP_IF_NR_INVALID ||
!queue->group))
return;
@ -200,9 +342,8 @@ static void kbasep_csf_scheduler_dump_active_queue(struct seq_file *file,
}
kbasep_csf_scheduler_dump_active_queue_cs_status_wait(
file, wait_status, wait_sync_value,
wait_sync_live_value, wait_sync_pointer,
sb_status, blocked_reason);
file, glb_version, wait_status, wait_sync_value,
wait_sync_live_value, wait_sync_pointer, sb_status, blocked_reason);
}
} else {
struct kbase_device const *const kbdev =
@ -257,9 +398,8 @@ static void kbasep_csf_scheduler_dump_active_queue(struct seq_file *file,
}
kbasep_csf_scheduler_dump_active_queue_cs_status_wait(
file, wait_status, wait_sync_value,
wait_sync_live_value, wait_sync_pointer, sb_status,
blocked_reason);
file, glb_version, wait_status, wait_sync_value, wait_sync_live_value,
wait_sync_pointer, sb_status, blocked_reason);
/* Dealing with cs_trace */
if (kbase_csf_scheduler_queue_has_trace(queue))
kbasep_csf_scheduler_dump_active_cs_trace(file, stream);
@ -270,54 +410,6 @@ static void kbasep_csf_scheduler_dump_active_queue(struct seq_file *file,
seq_puts(file, "\n");
}
static void update_active_group_status(struct seq_file *file,
struct kbase_queue_group *const group)
{
struct kbase_device *const kbdev = group->kctx->kbdev;
struct kbase_csf_cmd_stream_group_info const *const ginfo =
&kbdev->csf.global_iface.groups[group->csg_nr];
long remaining = kbase_csf_timeout_in_jiffies(kbdev->csf.fw_timeout_ms);
unsigned long flags;
/* Global doorbell ring for CSG STATUS_UPDATE request or User doorbell
* ring for Extract offset update, shall not be made when MCU has been
* put to sleep otherwise it will undesirably make MCU exit the sleep
* state. Also it isn't really needed as FW will implicitly update the
* status of all on-slot groups when MCU sleep request is sent to it.
*/
if (kbdev->csf.scheduler.state == SCHED_SLEEPING)
return;
/* Ring the User doobell shared between the queues bound to this
* group, to have FW update the CS_EXTRACT for all the queues
* bound to the group. Ring early so that FW gets adequate time
* for the handling.
*/
kbase_csf_ring_doorbell(kbdev, group->doorbell_nr);
kbase_csf_scheduler_spin_lock(kbdev, &flags);
kbase_csf_firmware_csg_input_mask(ginfo, CSG_REQ,
~kbase_csf_firmware_csg_output(ginfo, CSG_ACK),
CSG_REQ_STATUS_UPDATE_MASK);
kbase_csf_scheduler_spin_unlock(kbdev, flags);
kbase_csf_ring_csg_doorbell(kbdev, group->csg_nr);
remaining = wait_event_timeout(kbdev->csf.event_wait,
!((kbase_csf_firmware_csg_input_read(ginfo, CSG_REQ) ^
kbase_csf_firmware_csg_output(ginfo, CSG_ACK)) &
CSG_REQ_STATUS_UPDATE_MASK), remaining);
if (!remaining) {
dev_err(kbdev->dev,
"Timed out for STATUS_UPDATE on group %d on slot %d",
group->handle, group->csg_nr);
seq_printf(file, "*** Warn: Timed out for STATUS_UPDATE on slot %d\n",
group->csg_nr);
seq_puts(file, "*** The following group-record is likely stale\n");
}
}
static void kbasep_csf_scheduler_dump_active_group(struct seq_file *file,
struct kbase_queue_group *const group)
{
@ -331,8 +423,6 @@ static void kbasep_csf_scheduler_dump_active_group(struct seq_file *file,
u8 slot_priority =
kbdev->csf.scheduler.csg_slots[group->csg_nr].priority;
update_active_group_status(file, group);
ep_c = kbase_csf_firmware_csg_output(ginfo,
CSG_STATUS_EP_CURRENT);
ep_r = kbase_csf_firmware_csg_output(ginfo, CSG_STATUS_EP_REQ);
@ -348,6 +438,12 @@ static void kbasep_csf_scheduler_dump_active_group(struct seq_file *file,
CSG_STATUS_STATE_IDLE_MASK)
idle = 'Y';
if (!test_bit(group->csg_nr, csg_slots_status_updated)) {
seq_printf(file, "*** Warn: Timed out for STATUS_UPDATE on slot %d\n",
group->csg_nr);
seq_puts(file, "*** The following group-record is likely stale\n");
}
seq_puts(file, "GroupID, CSG NR, CSG Prio, Run State, Priority, C_EP(Alloc/Req), F_EP(Alloc/Req), T_EP(Alloc/Req), Exclusive, Idle\n");
seq_printf(file, "%7d, %6d, %8d, %9d, %8d, %11d/%3d, %11d/%3d, %11d/%3d, %9c, %4c\n",
group->handle,
@ -363,10 +459,6 @@ static void kbasep_csf_scheduler_dump_active_group(struct seq_file *file,
CSG_STATUS_EP_REQ_TILER_EP_GET(ep_r),
exclusive,
idle);
/* Wait for the User doobell ring to take effect */
if (kbdev->csf.scheduler.state != SCHED_SLEEPING)
msleep(100);
} else {
seq_puts(file, "GroupID, CSG NR, Run State, Priority\n");
seq_printf(file, "%7d, %6d, %9d, %8d\n",
@ -416,10 +508,11 @@ static int kbasep_csf_queue_group_debugfs_show(struct seq_file *file,
kbase_csf_scheduler_lock(kbdev);
if (kbdev->csf.scheduler.state == SCHED_SLEEPING) {
/* Wait for the MCU sleep request to complete. Please refer the
* update_active_group_status() function for the explanation.
* update_active_groups_status() function for the explanation.
*/
kbase_pm_wait_for_desired_state(kbdev);
}
update_active_groups_status(kbdev, file);
for (gr = 0; gr < MAX_QUEUE_GROUP_NUM; gr++) {
struct kbase_queue_group *const group =
kctx->csf.queue_groups[gr];
@ -455,10 +548,11 @@ static int kbasep_csf_scheduler_dump_active_groups(struct seq_file *file,
kbase_csf_scheduler_lock(kbdev);
if (kbdev->csf.scheduler.state == SCHED_SLEEPING) {
/* Wait for the MCU sleep request to complete. Please refer the
* update_active_group_status() function for the explanation.
* update_active_groups_status() function for the explanation.
*/
kbase_pm_wait_for_desired_state(kbdev);
}
update_active_groups_status(kbdev, file);
for (csg_nr = 0; csg_nr < num_groups; csg_nr++) {
struct kbase_queue_group *const group =
kbdev->csf.scheduler.csg_slots[csg_nr].resident_group;
@ -664,7 +758,6 @@ void kbase_csf_debugfs_init(struct kbase_device *kbdev)
&kbasep_csf_debugfs_scheduler_state_fops);
kbase_csf_tl_reader_debugfs_init(kbdev);
kbase_csf_firmware_trace_buffer_debugfs_init(kbdev);
}
#else

View File

@ -31,6 +31,7 @@
#include "mali_kbase_csf_firmware.h"
#include "mali_kbase_csf_event.h"
#include <uapi/gpu/arm/bifrost/csf/mali_kbase_csf_errors_dumpfault.h>
/* Maximum number of KCPU command queues to be created per GPU address space.
*/
@ -355,14 +356,19 @@ struct kbase_csf_notification {
* @trace_buffer_size: CS trace buffer size for the queue.
* @trace_cfg: CS trace configuration parameters.
* @error: GPU command queue fatal information to pass to user space.
* @fatal_event_work: Work item to handle the CS fatal event reported for this
* queue.
* @cs_fatal_info: Records additional information about the CS fatal event.
* @cs_fatal: Records information about the CS fatal event.
* @cs_error_work: Work item to handle the CS fatal event reported for this
* queue or the CS fault event if dump on fault is enabled
* and acknowledgment for CS fault event needs to be done
* after dumping is complete.
* @cs_error_info: Records additional information about the CS fatal event or
* about CS fault event if dump on fault is enabled.
* @cs_error: Records information about the CS fatal event or
* about CS fault event if dump on fault is enabled.
* @cs_error_fatal: Flag to track if the CS fault or CS fatal event occurred.
* @pending: Indicating whether the queue has new submitted work.
* @extract_ofs: The current EXTRACT offset, this is updated during certain
* events such as GPU idle IRQ in order to help detect a
* queue's true idle status.
* @extract_ofs: The current EXTRACT offset, this is only updated when handling
* the GLB IDLE IRQ if the idle timeout value is non-0 in order
* to help detect a queue's true idle status.
* @saved_cmd_ptr: The command pointer value for the GPU queue, saved when the
* group to which queue is bound is suspended.
* This can be useful in certain cases to know that till which
@ -377,7 +383,11 @@ struct kbase_queue {
int doorbell_nr;
unsigned long db_file_offset;
struct list_head link;
#if (KERNEL_VERSION(4, 11, 0) > LINUX_VERSION_CODE)
atomic_t refcount;
#else
refcount_t refcount;
#endif
struct kbase_queue_group *group;
struct kbase_va_region *queue_reg;
struct work_struct oom_event_work;
@ -397,14 +407,15 @@ struct kbase_queue {
u32 trace_buffer_size;
u32 trace_cfg;
struct kbase_csf_notification error;
struct work_struct fatal_event_work;
u64 cs_fatal_info;
u32 cs_fatal;
struct work_struct cs_error_work;
u64 cs_error_info;
u32 cs_error;
bool cs_error_fatal;
atomic_t pending;
u64 extract_ofs;
#if IS_ENABLED(CONFIG_DEBUG_FS)
u64 saved_cmd_ptr;
#endif
#endif /* CONFIG_DEBUG_FS */
};
/**
@ -498,6 +509,9 @@ struct kbase_protected_suspend_buffer {
* to be returned to userspace if such an error has occurred.
* @timer_event_work: Work item to handle the progress timeout fatal event
* for the group.
* @deschedule_deferred_cnt: Counter keeping a track of the number of threads
* that tried to deschedule the group and had to defer
* the descheduling due to the dump on fault.
*/
struct kbase_queue_group {
struct kbase_context *kctx;
@ -539,6 +553,15 @@ struct kbase_queue_group {
struct work_struct timer_event_work;
/**
* @dvs_buf: Address and size of scratch memory.
*
* Used to store intermediate DVS data by the GPU.
*/
u64 dvs_buf;
#if IS_ENABLED(CONFIG_DEBUG_FS)
u32 deschedule_deferred_cnt;
#endif
};
/**
@ -548,10 +571,10 @@ struct kbase_queue_group {
* @lock: Lock preventing concurrent access to @array and the @in_use bitmap.
* @array: Array of pointers to kernel CPU command queues.
* @in_use: Bitmap which indicates which kernel CPU command queues are in use.
* @wq: Dedicated workqueue for processing kernel CPU command queues.
* @num_cmds: The number of commands that have been enqueued across
* all the KCPU command queues. This could be used as a
* timestamp to determine the command's enqueueing time.
* @cmd_seq_num: The sequence number assigned to an enqueued command,
* in incrementing order (older commands shall have a
* smaller number).
* @jit_lock: Lock to serialise JIT operations.
* @jit_cmds_head: A list of the just-in-time memory commands, both
* allocate & free, in submission order, protected
* by kbase_csf_kcpu_queue_context.lock.
@ -564,9 +587,9 @@ struct kbase_csf_kcpu_queue_context {
struct mutex lock;
struct kbase_kcpu_command_queue *array[KBASEP_MAX_KCPU_QUEUES];
DECLARE_BITMAP(in_use, KBASEP_MAX_KCPU_QUEUES);
struct workqueue_struct *wq;
u64 num_cmds;
atomic64_t cmd_seq_num;
struct mutex jit_lock;
struct list_head jit_cmds_head;
struct list_head jit_blocked_queues;
};
@ -636,6 +659,28 @@ struct kbase_csf_tiler_heap_context {
u64 nr_of_heaps;
};
/**
* struct kbase_csf_ctx_heap_reclaim_info - Object representing the data section of
* a kctx for tiler heap reclaim manger
* @mgr_link: Link for hooking up to the heap reclaim manger's kctx lists
* @nr_freed_pages: Number of freed pages from the the kctx, after its attachment
* to the reclaim manager. This is used for tracking reclaim's
* free operation progress.
* @nr_est_unused_pages: Estimated number of pages that could be freed for the kctx
* when all its CSGs are off-slot, on attaching to the reclaim
* manager.
* @on_slot_grps: Number of on-slot groups from this kctx. In principle, if a
* kctx has groups on-slot, the scheduler will detach it from
* the tiler heap reclaim manager, i.e. no tiler heap memory
* reclaiming operations on the kctx.
*/
struct kbase_csf_ctx_heap_reclaim_info {
struct list_head mgr_link;
u32 nr_freed_pages;
u32 nr_est_unused_pages;
u8 on_slot_grps;
};
/**
* struct kbase_csf_scheduler_context - Object representing the scheduler's
* context for a GPU address space.
@ -657,6 +702,10 @@ struct kbase_csf_tiler_heap_context {
* streams bound to groups of @idle_wait_groups list.
* @ngrp_to_schedule: Number of groups added for the context to the
* 'groups_to_schedule' list of scheduler instance.
* @heap_info: Heap reclaim information data of the kctx. As the
* reclaim action needs to be coordinated with the scheduler
* operations, any manipulations on the data needs holding
* the scheduler's mutex lock.
*/
struct kbase_csf_scheduler_context {
struct list_head runnable_groups[KBASE_QUEUE_GROUP_PRIORITY_COUNT];
@ -666,6 +715,7 @@ struct kbase_csf_scheduler_context {
struct workqueue_struct *sync_update_wq;
struct work_struct sync_update_work;
u32 ngrp_to_schedule;
struct kbase_csf_ctx_heap_reclaim_info heap_info;
};
/**
@ -808,6 +858,22 @@ struct kbase_csf_csg_slot {
u8 priority;
};
/**
* struct kbase_csf_sched_heap_reclaim_mgr - Object for managing tiler heap reclaim
* kctx lists inside the CSF device's scheduler.
*
* @heap_reclaim: Tiler heap reclaim shrinker object.
* @ctx_lists: Array of kctx lists, size matching CSG defined priorities. The
* lists track the kctxs attached to the reclaim manager.
* @unused_pages: Estimated number of unused pages from the @ctxlist array. The
* number is indicative for use with reclaim shrinker's count method.
*/
struct kbase_csf_sched_heap_reclaim_mgr {
struct shrinker heap_reclaim;
struct list_head ctx_lists[KBASE_QUEUE_GROUP_PRIORITY_COUNT];
atomic_t unused_pages;
};
/**
* struct kbase_csf_scheduler - Object representing the scheduler used for
* CSF for an instance of GPU platform device.
@ -880,6 +946,8 @@ struct kbase_csf_csg_slot {
* operation to implement timeslice-based scheduling.
* @tock_work: Work item that would perform the schedule on tock
* operation to implement the asynchronous scheduling.
* @pending_tock_work: Indicates that the tock work item should re-execute
* once it's finished instead of going back to sleep.
* @ping_work: Work item that would ping the firmware at regular
* intervals, only if there is a single active CSG
* slot, to check if firmware is alive and would
@ -889,8 +957,6 @@ struct kbase_csf_csg_slot {
* @top_grp.
* @top_grp: Pointer to queue group inside @groups_to_schedule
* list that was assigned the highest slot priority.
* @tock_pending_request: A "tock" request is pending: a group that is not
* currently on the GPU demands to be scheduled.
* @active_protm_grp: Indicates if firmware has been permitted to let GPU
* enter protected mode with the given group. On exit
* from protected mode the pointer is reset to NULL.
@ -903,6 +969,13 @@ struct kbase_csf_csg_slot {
* handler.
* @gpu_idle_work: Work item for facilitating the scheduler to bring
* the GPU to a low-power mode on becoming idle.
* @fast_gpu_idle_handling: Indicates whether to relax many of the checks
* normally done in the GPU idle worker. This is
* set to true when handling the GLB IDLE IRQ if the
* idle hysteresis timeout is 0, since it makes it
* possible to receive this IRQ before the extract
* offset is published (which would cause more
* extensive GPU idle checks to fail).
* @gpu_no_longer_idle: Effective only when the GPU idle worker has been
* queued for execution, this indicates whether the
* GPU has become non-idle since the last time the
@ -934,6 +1007,7 @@ struct kbase_csf_csg_slot {
* groups. It is updated on every tick/tock.
* @interrupt_lock is used to serialize the access.
* @protm_enter_time: GPU protected mode enter time.
* @reclaim_mgr: CSGs tiler heap manager object.
*/
struct kbase_csf_scheduler {
struct mutex lock;
@ -960,13 +1034,14 @@ struct kbase_csf_scheduler {
struct hrtimer tick_timer;
struct work_struct tick_work;
struct delayed_work tock_work;
atomic_t pending_tock_work;
struct delayed_work ping_work;
struct kbase_context *top_ctx;
struct kbase_queue_group *top_grp;
bool tock_pending_request;
struct kbase_queue_group *active_protm_grp;
struct workqueue_struct *idle_wq;
struct work_struct gpu_idle_work;
bool fast_gpu_idle_handling;
atomic_t gpu_no_longer_idle;
atomic_t non_idle_offslot_grps;
u32 non_idle_scanout_grps;
@ -975,6 +1050,7 @@ struct kbase_csf_scheduler {
bool tick_timer_active;
u32 tick_protm_pending_seq;
ktime_t protm_enter_time;
struct kbase_csf_sched_heap_reclaim_mgr reclaim_mgr;
};
/*
@ -1161,6 +1237,7 @@ struct kbase_ipa_control {
* @flags: bitmask of CSF_FIRMWARE_ENTRY_* conveying the interface attributes
* @data_start: Offset into firmware image at which the interface data starts
* @data_end: Offset into firmware image at which the interface data ends
* @virtual_exe_start: Starting GPU execution virtual address of this interface
* @kernel_map: A kernel mapping of the memory or NULL if not required to be
* mapped in the kernel
* @pma: Array of pointers to protected memory allocations.
@ -1177,6 +1254,7 @@ struct kbase_csf_firmware_interface {
u32 flags;
u32 data_start;
u32 data_end;
u32 virtual_exe_start;
void *kernel_map;
struct protected_memory_allocation **pma;
};
@ -1208,6 +1286,74 @@ struct kbase_csf_mcu_fw {
u8 *data;
};
/*
* Firmware log polling period.
*/
#define KBASE_CSF_FIRMWARE_LOG_POLL_PERIOD_MS 25
/**
* enum kbase_csf_firmware_log_mode - Firmware log operating mode
*
* @KBASE_CSF_FIRMWARE_LOG_MODE_MANUAL: Manual mode, firmware log can be read
* manually by the userspace (and it will also be dumped automatically into
* dmesg on GPU reset).
*
* @KBASE_CSF_FIRMWARE_LOG_MODE_AUTO_PRINT: Automatic printing mode, firmware log
* will be periodically emptied into dmesg, manual reading through debugfs is
* disabled.
*/
enum kbase_csf_firmware_log_mode {
KBASE_CSF_FIRMWARE_LOG_MODE_MANUAL,
KBASE_CSF_FIRMWARE_LOG_MODE_AUTO_PRINT
};
/**
* struct kbase_csf_firmware_log - Object containing members for handling firmware log.
*
* @mode: Firmware log operating mode.
* @busy: Indicating whether a firmware log operation is in progress.
* @poll_work: Work item that would poll firmware log buffer
* at regular intervals to perform any periodic
* activities required by current log mode.
* @dump_buf: Buffer used for dumping the log.
* @func_call_list_va_start: Virtual address of the start of the call list of FW log functions.
* @func_call_list_va_end: Virtual address of the end of the call list of FW log functions.
*/
struct kbase_csf_firmware_log {
enum kbase_csf_firmware_log_mode mode;
atomic_t busy;
struct delayed_work poll_work;
u8 *dump_buf;
u32 func_call_list_va_start;
u32 func_call_list_va_end;
};
#if IS_ENABLED(CONFIG_DEBUG_FS)
/**
* struct kbase_csf_dump_on_fault - Faulty information to deliver to the daemon
*
* @error_code: Error code.
* @kctx_tgid: tgid value of the Kbase context for which the fault happened.
* @kctx_id: id of the Kbase context for which the fault happened.
* @enabled: Flag to indicate that 'csf_fault' debugfs has been opened
* so dump on fault is enabled.
* @fault_wait_wq: Waitqueue on which user space client is blocked till kbase
* reports a fault.
* @dump_wait_wq: Waitqueue on which kbase threads are blocked till user space client
* completes the dump on fault.
* @lock: Lock to protect this struct members from concurrent access.
*/
struct kbase_csf_dump_on_fault {
enum dumpfault_error_type error_code;
u32 kctx_tgid;
u32 kctx_id;
atomic_t enabled;
wait_queue_head_t fault_wait_wq;
wait_queue_head_t dump_wait_wq;
spinlock_t lock;
};
#endif /* CONFIG_DEBUG_FS*/
/**
* struct kbase_csf_device - Object representing CSF for an instance of GPU
* platform device.
@ -1251,11 +1397,14 @@ struct kbase_csf_mcu_fw {
* in the address space of every process, that created
* a Base context, to enable the access to LATEST_FLUSH
* register from userspace.
* @nr_user_page_mapped: The number of clients using the mapping of USER page.
* This is used to maintain backward compatibility.
* It's protected by @reg_lock.
* @mali_file_inode: Pointer to the inode corresponding to mali device
* file. This is needed in order to switch to the
* @dummy_user_reg_page on GPU power down.
* All instances of the mali device file will point to
* the same inode.
* the same inode. It's protected by @reg_lock.
* @reg_lock: Lock to serialize the MCU firmware related actions
* that affect all contexts such as allocation of
* regions from shared interface area, assignment of
@ -1320,6 +1469,8 @@ struct kbase_csf_mcu_fw {
* @hwcnt: Contain members required for handling the dump of
* HW counters.
* @fw: Copy of the loaded MCU firmware image.
* @fw_log: Contain members required for handling firmware log.
* @dof: Structure for dump on fault.
*/
struct kbase_csf_device {
struct kbase_mmu_table mcu_mmu;
@ -1334,6 +1485,7 @@ struct kbase_csf_device {
u32 db_file_offsets;
struct tagged_addr dummy_db_page;
struct tagged_addr dummy_user_reg_page;
u32 nr_user_page_mapped;
struct inode *mali_file_inode;
struct mutex reg_lock;
wait_queue_head_t event_wait;
@ -1360,6 +1512,10 @@ struct kbase_csf_device {
unsigned int fw_timeout_ms;
struct kbase_csf_hwcnt hwcnt;
struct kbase_csf_mcu_fw fw;
struct kbase_csf_firmware_log fw_log;
#if IS_ENABLED(CONFIG_DEBUG_FS)
struct kbase_csf_dump_on_fault dof;
#endif /* CONFIG_DEBUG_FS */
};
/**

View File

@ -169,7 +169,8 @@ void kbase_csf_event_term(struct kbase_context *kctx)
kfree(event_cb);
}
WARN_ON(!list_empty(&kctx->csf.event.error_list));
WARN(!list_empty(&kctx->csf.event.error_list),
"Error list not empty for ctx %d_%d\n", kctx->tgid, kctx->id);
spin_unlock_irqrestore(&kctx->csf.event.lock, flags);
}
@ -244,6 +245,14 @@ bool kbase_csf_event_error_pending(struct kbase_context *kctx)
bool error_pending = false;
unsigned long flags;
/* Withhold the error event if the dump on fault is ongoing.
* This would prevent the Userspace from taking error recovery actions
* (which can potentially affect the state that is being dumped).
* Event handling thread would eventually notice the error event.
*/
if (unlikely(!kbase_debug_csf_fault_dump_complete(kctx->kbdev)))
return false;
spin_lock_irqsave(&kctx->csf.event.lock, flags);
error_pending = !list_empty(&kctx->csf.event.error_list);

View File

@ -21,6 +21,7 @@
#include "mali_kbase.h"
#include "mali_kbase_csf_firmware_cfg.h"
#include "mali_kbase_csf_firmware_log.h"
#include "mali_kbase_csf_trace_buffer.h"
#include "mali_kbase_csf_timeout.h"
#include "mali_kbase_mem.h"
@ -77,9 +78,11 @@ MODULE_PARM_DESC(fw_debug,
"Enables effective use of a debugger for debugging firmware code.");
#endif
#define FIRMWARE_HEADER_MAGIC (0xC3F13A6Eul)
#define FIRMWARE_HEADER_VERSION (0ul)
#define FIRMWARE_HEADER_LENGTH (0x14ul)
#define FIRMWARE_HEADER_MAGIC (0xC3F13A6Eul)
#define FIRMWARE_HEADER_VERSION_MAJOR (0ul)
#define FIRMWARE_HEADER_VERSION_MINOR (2ul)
#define FIRMWARE_HEADER_LENGTH (0x14ul)
#define CSF_FIRMWARE_ENTRY_SUPPORTED_FLAGS \
(CSF_FIRMWARE_ENTRY_READ | \
@ -92,10 +95,10 @@ MODULE_PARM_DESC(fw_debug,
#define CSF_FIRMWARE_ENTRY_TYPE_INTERFACE (0)
#define CSF_FIRMWARE_ENTRY_TYPE_CONFIGURATION (1)
#define CSF_FIRMWARE_ENTRY_TYPE_FUTF_TEST (2)
#define CSF_FIRMWARE_ENTRY_TYPE_TRACE_BUFFER (3)
#define CSF_FIRMWARE_ENTRY_TYPE_TIMELINE_METADATA (4)
#define CSF_FIRMWARE_ENTRY_TYPE_BUILD_INFO_METADATA (6)
#define CSF_FIRMWARE_ENTRY_TYPE_FUNC_CALL_LIST (7)
#define CSF_FIRMWARE_CACHE_MODE_NONE (0ul << 3)
#define CSF_FIRMWARE_CACHE_MODE_CACHED (1ul << 3)
@ -431,8 +434,8 @@ static void load_fw_image_section(struct kbase_device *kbdev, const u8 *data,
memset(p + copy_len, 0, zi_len);
}
kbase_sync_single_for_device(kbdev, kbase_dma_addr(page),
PAGE_SIZE, DMA_TO_DEVICE);
kbase_sync_single_for_device(kbdev, kbase_dma_addr_from_tagged(phys[page_num]),
PAGE_SIZE, DMA_TO_DEVICE);
kunmap_atomic(p);
}
}
@ -525,6 +528,58 @@ static inline bool entry_find_large_page_to_reuse(
*pma = NULL;
/* If the section starts at 2MB aligned boundary,
* then use 2MB page(s) for it.
*/
if (!(virtual_start & (SZ_2M - 1))) {
*num_pages_aligned =
round_up(*num_pages_aligned, NUM_4K_PAGES_IN_2MB_PAGE);
*is_small_page = false;
goto out;
}
/* If the section doesn't lie within the same 2MB aligned boundary,
* then use 4KB pages as it would be complicated to use a 2MB page
* for such section.
*/
if ((virtual_start & ~(SZ_2M - 1)) != (virtual_end & ~(SZ_2M - 1)))
goto out;
/* Find the nearest 2MB aligned section which comes before the current
* section.
*/
list_for_each_entry(interface, &kbdev->csf.firmware_interfaces, node) {
const u32 virtual_diff = virtual_start - interface->virtual;
if (interface->virtual > virtual_end)
continue;
if (interface->virtual & (SZ_2M - 1))
continue;
if (virtual_diff < virtual_diff_min) {
target_interface = interface;
virtual_diff_min = virtual_diff;
}
}
if (target_interface) {
const u32 page_index = virtual_diff_min >> PAGE_SHIFT;
if (page_index >= target_interface->num_pages_aligned)
goto out;
if (target_interface->phys)
*phys = &target_interface->phys[page_index];
if (target_interface->pma)
*pma = &target_interface->pma[page_index / NUM_4K_PAGES_IN_2MB_PAGE];
*is_small_page = false;
reuse_large_page = true;
}
out:
return reuse_large_page;
}
@ -555,6 +610,8 @@ static int parse_memory_setup_entry(struct kbase_device *kbdev,
u32 num_pages;
u32 num_pages_aligned;
char *name;
void *name_entry;
unsigned int name_len;
struct tagged_addr *phys = NULL;
struct kbase_csf_firmware_interface *interface = NULL;
bool allocated_pages = false, protected_mode = false;
@ -625,8 +682,8 @@ static int parse_memory_setup_entry(struct kbase_device *kbdev,
} else {
if (!reuse_pages) {
ret = kbase_mem_pool_alloc_pages(
kbase_mem_pool_group_select(
kbdev, KBASE_MEM_GROUP_CSF_FW, is_small_page),
kbase_mem_pool_group_select(kbdev, KBASE_MEM_GROUP_CSF_FW,
is_small_page),
num_pages_aligned, phys, false);
}
}
@ -643,21 +700,24 @@ static int parse_memory_setup_entry(struct kbase_device *kbdev,
data_start, data_end);
/* Allocate enough memory for the struct kbase_csf_firmware_interface and
* the name of the interface. An extra byte is allocated to place a
* NUL-terminator in. This should already be included according to the
* specification but here we add it anyway to be robust against a
* corrupt firmware image.
* the name of the interface.
*/
interface = kmalloc(sizeof(*interface) +
size - INTERFACE_ENTRY_NAME_OFFSET + 1, GFP_KERNEL);
name_entry = (void *)entry + INTERFACE_ENTRY_NAME_OFFSET;
name_len = strnlen(name_entry, size - INTERFACE_ENTRY_NAME_OFFSET);
if (size < (INTERFACE_ENTRY_NAME_OFFSET + name_len + 1 + sizeof(u32))) {
dev_err(kbdev->dev, "Memory setup entry too short to contain virtual_exe_start");
ret = -EINVAL;
goto out;
}
interface = kmalloc(sizeof(*interface) + name_len + 1, GFP_KERNEL);
if (!interface) {
ret = -ENOMEM;
goto out;
}
name = (void *)(interface + 1);
memcpy(name, entry + (INTERFACE_ENTRY_NAME_OFFSET / sizeof(*entry)),
size - INTERFACE_ENTRY_NAME_OFFSET);
name[size - INTERFACE_ENTRY_NAME_OFFSET] = 0;
memcpy(name, name_entry, name_len);
name[name_len] = 0;
interface->name = name;
interface->phys = phys;
@ -672,6 +732,11 @@ static int parse_memory_setup_entry(struct kbase_device *kbdev,
interface->data_end = data_end;
interface->pma = pma;
/* Discover the virtual execution address field after the end of the name
* field taking into account the NULL-termination character.
*/
interface->virtual_exe_start = *((u32 *)(name_entry + name_len + 1));
mem_flags = convert_mem_flags(kbdev, flags, &cache_mode);
if (flags & CSF_FIRMWARE_ENTRY_SHARED) {
@ -956,6 +1021,15 @@ static int load_firmware_entry(struct kbase_device *kbdev, const struct kbase_cs
return -EINVAL;
}
return parse_build_info_metadata_entry(kbdev, fw, entry, size);
case CSF_FIRMWARE_ENTRY_TYPE_FUNC_CALL_LIST:
/* Function call list section */
if (size < 2 * sizeof(*entry)) {
dev_err(kbdev->dev, "Function call list entry too short (size=%u)\n",
size);
return -EINVAL;
}
kbase_csf_firmware_log_parse_logging_call_list_entry(kbdev, entry);
break;
}
if (!optional) {
@ -1179,40 +1253,80 @@ static int parse_capabilities(struct kbase_device *kbdev)
return 0;
}
static inline void access_firmware_memory_common(struct kbase_device *kbdev,
struct kbase_csf_firmware_interface *interface, u32 offset_bytes,
u32 *value, const bool read)
{
u32 page_num = offset_bytes >> PAGE_SHIFT;
u32 offset_in_page = offset_bytes & ~PAGE_MASK;
struct page *target_page = as_page(interface->phys[page_num]);
uintptr_t cpu_addr = (uintptr_t)kmap_atomic(target_page);
u32 *addr = (u32 *)(cpu_addr + offset_in_page);
if (read) {
kbase_sync_single_for_device(kbdev,
kbase_dma_addr_from_tagged(interface->phys[page_num]) + offset_in_page,
sizeof(u32), DMA_BIDIRECTIONAL);
*value = *addr;
} else {
*addr = *value;
kbase_sync_single_for_device(kbdev,
kbase_dma_addr_from_tagged(interface->phys[page_num]) + offset_in_page,
sizeof(u32), DMA_BIDIRECTIONAL);
}
kunmap_atomic((u32 *)cpu_addr);
}
static inline void access_firmware_memory(struct kbase_device *kbdev,
u32 gpu_addr, u32 *value, const bool read)
{
struct kbase_csf_firmware_interface *interface;
struct kbase_csf_firmware_interface *interface, *access_interface = NULL;
u32 offset_bytes = 0;
list_for_each_entry(interface, &kbdev->csf.firmware_interfaces, node) {
if ((gpu_addr >= interface->virtual) &&
(gpu_addr < interface->virtual + (interface->num_pages << PAGE_SHIFT))) {
u32 offset_bytes = gpu_addr - interface->virtual;
u32 page_num = offset_bytes >> PAGE_SHIFT;
u32 offset_in_page = offset_bytes & ~PAGE_MASK;
struct page *target_page = as_page(
interface->phys[page_num]);
u32 *cpu_addr = kmap_atomic(target_page);
if (read) {
kbase_sync_single_for_device(kbdev,
kbase_dma_addr(target_page) + offset_in_page,
sizeof(u32), DMA_BIDIRECTIONAL);
*value = cpu_addr[offset_in_page >> 2];
} else {
cpu_addr[offset_in_page >> 2] = *value;
kbase_sync_single_for_device(kbdev,
kbase_dma_addr(target_page) + offset_in_page,
sizeof(u32), DMA_BIDIRECTIONAL);
}
kunmap_atomic(cpu_addr);
return;
offset_bytes = gpu_addr - interface->virtual;
access_interface = interface;
break;
}
}
dev_warn(kbdev->dev, "Invalid GPU VA %x passed\n", gpu_addr);
if (access_interface)
access_firmware_memory_common(kbdev, access_interface, offset_bytes, value, read);
else
dev_warn(kbdev->dev, "Invalid GPU VA %x passed", gpu_addr);
}
static inline void access_firmware_memory_exe(struct kbase_device *kbdev,
u32 gpu_addr, u32 *value, const bool read)
{
struct kbase_csf_firmware_interface *interface, *access_interface = NULL;
u32 offset_bytes = 0;
list_for_each_entry(interface, &kbdev->csf.firmware_interfaces, node) {
if ((gpu_addr >= interface->virtual_exe_start) &&
(gpu_addr < interface->virtual_exe_start +
(interface->num_pages << PAGE_SHIFT))) {
offset_bytes = gpu_addr - interface->virtual_exe_start;
access_interface = interface;
/* If there's an overlap in execution address range between a moved and a
* non-moved areas, always prefer the moved one. The idea is that FW may
* move sections around during init time, but after the layout is settled,
* any moved sections are going to override non-moved areas at the same
* location.
*/
if (interface->virtual_exe_start != interface->virtual)
break;
}
}
if (access_interface)
access_firmware_memory_common(kbdev, access_interface, offset_bytes, value, read);
else
dev_warn(kbdev->dev, "Invalid GPU VA %x passed", gpu_addr);
}
void kbase_csf_read_firmware_memory(struct kbase_device *kbdev,
@ -1227,6 +1341,18 @@ void kbase_csf_update_firmware_memory(struct kbase_device *kbdev,
access_firmware_memory(kbdev, gpu_addr, &value, false);
}
void kbase_csf_read_firmware_memory_exe(struct kbase_device *kbdev,
u32 gpu_addr, u32 *value)
{
access_firmware_memory_exe(kbdev, gpu_addr, value, true);
}
void kbase_csf_update_firmware_memory_exe(struct kbase_device *kbdev,
u32 gpu_addr, u32 value)
{
access_firmware_memory_exe(kbdev, gpu_addr, &value, false);
}
void kbase_csf_firmware_cs_input(
const struct kbase_csf_cmd_stream_info *const info, const u32 offset,
const u32 value)
@ -1462,11 +1588,10 @@ static bool global_request_complete(struct kbase_device *const kbdev,
return complete;
}
static int wait_for_global_request(struct kbase_device *const kbdev,
u32 const req_mask)
static int wait_for_global_request_with_timeout(struct kbase_device *const kbdev,
u32 const req_mask, unsigned int timeout_ms)
{
const long wait_timeout =
kbase_csf_timeout_in_jiffies(kbdev->csf.fw_timeout_ms);
const long wait_timeout = kbase_csf_timeout_in_jiffies(timeout_ms);
long remaining;
int err = 0;
@ -1475,10 +1600,9 @@ static int wait_for_global_request(struct kbase_device *const kbdev,
wait_timeout);
if (!remaining) {
dev_warn(kbdev->dev, "[%llu] Timeout (%d ms) waiting for global request %x to complete",
kbase_backend_get_cycle_cnt(kbdev),
kbdev->csf.fw_timeout_ms,
req_mask);
dev_warn(kbdev->dev,
"[%llu] Timeout (%d ms) waiting for global request %x to complete",
kbase_backend_get_cycle_cnt(kbdev), timeout_ms, req_mask);
err = -ETIMEDOUT;
}
@ -1486,6 +1610,11 @@ static int wait_for_global_request(struct kbase_device *const kbdev,
return err;
}
static int wait_for_global_request(struct kbase_device *const kbdev, u32 const req_mask)
{
return wait_for_global_request_with_timeout(kbdev, req_mask, kbdev->csf.fw_timeout_ms);
}
static void set_global_request(
const struct kbase_csf_global_iface *const global_iface,
u32 const req_mask)
@ -1559,6 +1688,25 @@ static void enable_gpu_idle_timer(struct kbase_device *const kbdev)
}
/**
* kbasep_enable_rtu - Enable Ray Tracing Unit on powering up shader core
*
* @kbdev: The kbase device structure of the device
*
* This function needs to be called to enable the Ray Tracing Unit
* by writing SHADER_PWRFEATURES only when host controls shader cores power.
*/
static void kbasep_enable_rtu(struct kbase_device *kbdev)
{
const u32 gpu_id = kbdev->gpu_props.props.raw_props.gpu_id;
if (gpu_id < GPU_ID2_PRODUCT_MAKE(12, 8, 3, 0))
return;
if (kbdev->csf.firmware_hctl_core_pwr)
kbase_reg_write(kbdev, GPU_CONTROL_REG(SHADER_PWRFEATURES), 1);
}
static void global_init(struct kbase_device *const kbdev, u64 core_mask)
{
u32 const ack_irq_mask =
@ -1574,6 +1722,8 @@ static void global_init(struct kbase_device *const kbdev, u64 core_mask)
kbase_csf_scheduler_spin_lock(kbdev, &flags);
kbasep_enable_rtu(kbdev);
/* Update shader core allocation enable mask */
enable_endpoints_global(global_iface, core_mask);
enable_shader_poweroff_timer(kbdev, global_iface);
@ -1854,7 +2004,6 @@ u32 kbase_csf_firmware_set_gpu_idle_hysteresis_time(struct kbase_device *kbdev,
static u32 convert_dur_to_core_pwroff_count(struct kbase_device *kbdev, const u32 dur_us)
{
#define PWROFF_VAL_UNIT_SHIFT (10)
/* Get the cntfreq_el0 value, which drives the SYSTEM_TIMESTAMP */
u64 freq = arch_timer_get_cntfrq();
u64 dur_val = dur_us;
@ -1991,16 +2140,6 @@ int kbase_csf_firmware_early_init(struct kbase_device *kbdev)
kbdev->csf.fw_timeout_ms =
kbase_get_timeout_ms(kbdev, CSF_FIRMWARE_TIMEOUT);
kbdev->csf.gpu_idle_hysteresis_ms = FIRMWARE_IDLE_HYSTERESIS_TIME_MS;
#ifdef KBASE_PM_RUNTIME
if (kbase_pm_gpu_sleep_allowed(kbdev))
kbdev->csf.gpu_idle_hysteresis_ms /=
FIRMWARE_IDLE_HYSTERESIS_GPU_SLEEP_SCALER;
#endif
WARN_ON(!kbdev->csf.gpu_idle_hysteresis_ms);
kbdev->csf.gpu_idle_dur_count = convert_dur_to_idle_count(
kbdev, kbdev->csf.gpu_idle_hysteresis_ms);
kbdev->csf.mcu_core_pwroff_dur_us = DEFAULT_GLB_PWROFF_TIMEOUT_US;
kbdev->csf.mcu_core_pwroff_dur_count = convert_dur_to_core_pwroff_count(
kbdev, DEFAULT_GLB_PWROFF_TIMEOUT_US);
@ -2020,7 +2159,26 @@ int kbase_csf_firmware_early_init(struct kbase_device *kbdev)
return 0;
}
int kbase_csf_firmware_init(struct kbase_device *kbdev)
void kbase_csf_firmware_early_term(struct kbase_device *kbdev)
{
mutex_destroy(&kbdev->csf.reg_lock);
}
int kbase_csf_firmware_late_init(struct kbase_device *kbdev)
{
kbdev->csf.gpu_idle_hysteresis_ms = FIRMWARE_IDLE_HYSTERESIS_TIME_MS;
#ifdef KBASE_PM_RUNTIME
if (kbase_pm_gpu_sleep_allowed(kbdev))
kbdev->csf.gpu_idle_hysteresis_ms /= FIRMWARE_IDLE_HYSTERESIS_GPU_SLEEP_SCALER;
#endif
WARN_ON(!kbdev->csf.gpu_idle_hysteresis_ms);
kbdev->csf.gpu_idle_dur_count =
convert_dur_to_idle_count(kbdev, kbdev->csf.gpu_idle_hysteresis_ms);
return 0;
}
int kbase_csf_firmware_load_init(struct kbase_device *kbdev)
{
const struct firmware *firmware = NULL;
struct kbase_csf_mcu_fw *const mcu_fw = &kbdev->csf.fw;
@ -2093,7 +2251,8 @@ int kbase_csf_firmware_init(struct kbase_device *kbdev)
version_minor = mcu_fw->data[4];
version_major = mcu_fw->data[5];
if (version_major != FIRMWARE_HEADER_VERSION) {
if (version_major != FIRMWARE_HEADER_VERSION_MAJOR ||
version_minor != FIRMWARE_HEADER_VERSION_MINOR) {
dev_err(kbdev->dev,
"Firmware header version %d.%d not understood\n",
version_major, version_minor);
@ -2188,6 +2347,12 @@ int kbase_csf_firmware_init(struct kbase_device *kbdev)
if (ret != 0)
goto err_out;
ret = kbase_csf_firmware_log_init(kbdev);
if (ret != 0) {
dev_err(kbdev->dev, "Failed to initialize FW trace (err %d)", ret);
goto err_out;
}
/* Firmware loaded successfully, ret = 0 */
KBASE_KTRACE_ADD(kbdev, CSF_FIRMWARE_BOOT, NULL,
(((u64)version_hash) << 32) |
@ -2195,11 +2360,11 @@ int kbase_csf_firmware_init(struct kbase_device *kbdev)
return 0;
err_out:
kbase_csf_firmware_term(kbdev);
kbase_csf_firmware_unload_term(kbdev);
return ret;
}
void kbase_csf_firmware_term(struct kbase_device *kbdev)
void kbase_csf_firmware_unload_term(struct kbase_device *kbdev)
{
unsigned long flags;
int ret = 0;
@ -2210,6 +2375,8 @@ void kbase_csf_firmware_term(struct kbase_device *kbdev)
WARN(ret, "failed to wait for GPU reset");
kbase_csf_firmware_log_term(kbdev);
kbase_csf_firmware_cfg_term(kbdev);
kbase_csf_timeout_term(kbdev);
@ -2297,8 +2464,6 @@ void kbase_csf_firmware_term(struct kbase_device *kbdev)
*/
kbase_mcu_shared_interface_region_tracker_term(kbdev);
mutex_destroy(&kbdev->csf.reg_lock);
kbase_mmu_term(kbdev, &kbdev->csf.mcu_mmu);
/* Release the address space */
@ -2350,10 +2515,11 @@ void kbase_csf_firmware_ping(struct kbase_device *const kbdev)
kbase_csf_scheduler_spin_unlock(kbdev, flags);
}
int kbase_csf_firmware_ping_wait(struct kbase_device *const kbdev)
int kbase_csf_firmware_ping_wait(struct kbase_device *const kbdev, unsigned int wait_timeout_ms)
{
kbase_csf_firmware_ping(kbdev);
return wait_for_global_request(kbdev, GLB_REQ_PING_MASK);
return wait_for_global_request_with_timeout(kbdev, GLB_REQ_PING_MASK, wait_timeout_ms);
}
int kbase_csf_firmware_set_timeout(struct kbase_device *const kbdev,
@ -2392,7 +2558,7 @@ void kbase_csf_enter_protected_mode(struct kbase_device *kbdev)
kbase_csf_ring_doorbell(kbdev, CSF_KERNEL_DOORBELL_NR);
}
void kbase_csf_wait_protected_mode_enter(struct kbase_device *kbdev)
int kbase_csf_wait_protected_mode_enter(struct kbase_device *kbdev)
{
int err;
@ -2432,12 +2598,14 @@ void kbase_csf_wait_protected_mode_enter(struct kbase_device *kbdev)
}
}
if (err) {
if (unlikely(err)) {
if (kbase_prepare_to_reset_gpu(kbdev, RESET_FLAGS_HWC_UNRECOVERABLE_ERROR))
kbase_reset_gpu(kbdev);
}
KBASE_TLSTREAM_AUX_PROTECTED_ENTER_END(kbdev, kbdev);
return err;
}
void kbase_csf_firmware_trigger_mcu_halt(struct kbase_device *kbdev)
@ -2651,9 +2819,8 @@ int kbase_csf_firmware_mcu_shared_mapping_init(
if (!page_list)
goto page_list_alloc_error;
ret = kbase_mem_pool_alloc_pages(
&kbdev->mem_pools.small[KBASE_MEM_GROUP_CSF_FW],
num_pages, phys, false);
ret = kbase_mem_pool_alloc_pages(&kbdev->mem_pools.small[KBASE_MEM_GROUP_CSF_FW], num_pages,
phys, false);
if (ret <= 0)
goto phys_mem_pool_alloc_error;

View File

@ -364,7 +364,45 @@ void kbase_csf_update_firmware_memory(struct kbase_device *kbdev,
u32 gpu_addr, u32 value);
/**
* kbase_csf_firmware_early_init() - Early initializatin for the firmware.
* kbase_csf_read_firmware_memory_exe - Read a value in a GPU address in the
* region of its final execution location.
*
* @kbdev: Device pointer
* @gpu_addr: GPU address to read
* @value: Output pointer to which the read value will be written
*
* This function read a value in a GPU address that belongs to a private loaded
* firmware memory region based on its final execution location. The function
* assumes that the location is not permanently mapped on the CPU address space,
* therefore it maps it and then unmaps it to access it independently. This function
* needs to be used when accessing firmware memory regions which will be moved to
* their final execution location during firmware boot using an address based on the
* final execution location.
*/
void kbase_csf_read_firmware_memory_exe(struct kbase_device *kbdev,
u32 gpu_addr, u32 *value);
/**
* kbase_csf_update_firmware_memory_exe - Write a value in a GPU address in the
* region of its final execution location.
*
* @kbdev: Device pointer
* @gpu_addr: GPU address to write
* @value: Value to write
*
* This function writes a value in a GPU address that belongs to a private loaded
* firmware memory region based on its final execution location. The function
* assumes that the location is not permanently mapped on the CPU address space,
* therefore it maps it and then unmaps it to access it independently. This function
* needs to be used when accessing firmware memory regions which will be moved to
* their final execution location during firmware boot using an address based on the
* final execution location.
*/
void kbase_csf_update_firmware_memory_exe(struct kbase_device *kbdev,
u32 gpu_addr, u32 value);
/**
* kbase_csf_firmware_early_init() - Early initialization for the firmware.
* @kbdev: Kbase device
*
* Initialize resources related to the firmware. Must be called at kbase probe.
@ -374,22 +412,43 @@ void kbase_csf_update_firmware_memory(struct kbase_device *kbdev,
int kbase_csf_firmware_early_init(struct kbase_device *kbdev);
/**
* kbase_csf_firmware_init() - Load the firmware for the CSF MCU
* kbase_csf_firmware_early_term() - Terminate resources related to the firmware
* after the firmware unload has been done.
*
* @kbdev: Device pointer
*
* This should be called only when kbase probe fails or gets rmmoded.
*/
void kbase_csf_firmware_early_term(struct kbase_device *kbdev);
/**
* kbase_csf_firmware_late_init() - Late initialization for the firmware.
* @kbdev: Kbase device
*
* Initialize resources related to the firmware. But must be called after
* backend late init is done. Must be used at probe time only.
*
* Return: 0 if successful, negative error code on failure
*/
int kbase_csf_firmware_late_init(struct kbase_device *kbdev);
/**
* kbase_csf_firmware_load_init() - Load the firmware for the CSF MCU
* @kbdev: Kbase device
*
* Request the firmware from user space and load it into memory.
*
* Return: 0 if successful, negative error code on failure
*/
int kbase_csf_firmware_init(struct kbase_device *kbdev);
int kbase_csf_firmware_load_init(struct kbase_device *kbdev);
/**
* kbase_csf_firmware_term() - Unload the firmware
* kbase_csf_firmware_unload_term() - Unload the firmware
* @kbdev: Kbase device
*
* Frees the memory allocated by kbase_csf_firmware_init()
* Frees the memory allocated by kbase_csf_firmware_load_init()
*/
void kbase_csf_firmware_term(struct kbase_device *kbdev);
void kbase_csf_firmware_unload_term(struct kbase_device *kbdev);
/**
* kbase_csf_firmware_ping - Send the ping request to firmware.
@ -404,13 +463,14 @@ void kbase_csf_firmware_ping(struct kbase_device *kbdev);
* kbase_csf_firmware_ping_wait - Send the ping request to firmware and waits.
*
* @kbdev: Instance of a GPU platform device that implements a CSF interface.
* @wait_timeout_ms: Timeout to get the acknowledgment for PING request from FW.
*
* The function sends the ping request to firmware and waits to confirm it is
* alive.
*
* Return: 0 on success, or negative on failure.
*/
int kbase_csf_firmware_ping_wait(struct kbase_device *kbdev);
int kbase_csf_firmware_ping_wait(struct kbase_device *kbdev, unsigned int wait_timeout_ms);
/**
* kbase_csf_firmware_set_timeout - Set a hardware endpoint progress timeout.
@ -447,8 +507,10 @@ void kbase_csf_enter_protected_mode(struct kbase_device *kbdev);
* This function needs to be called after kbase_csf_enter_protected_mode() to
* wait for the GPU to actually enter protected mode. GPU reset is triggered if
* the wait is unsuccessful.
*
* Return: 0 on success, or negative on failure.
*/
void kbase_csf_wait_protected_mode_enter(struct kbase_device *kbdev);
int kbase_csf_wait_protected_mode_enter(struct kbase_device *kbdev);
static inline bool kbase_csf_firmware_mcu_halted(struct kbase_device *kbdev)
{

View File

@ -20,13 +20,17 @@
*/
#include <mali_kbase.h>
#include "mali_kbase_csf_firmware_cfg.h"
#include <mali_kbase_reset_gpu.h>
#include <linux/version.h>
#include "mali_kbase_csf_firmware_cfg.h"
#include "mali_kbase_csf_firmware_log.h"
#if CONFIG_SYSFS
#define CSF_FIRMWARE_CFG_SYSFS_DIR_NAME "firmware_config"
#define CSF_FIRMWARE_CFG_LOG_VERBOSITY_ENTRY_NAME "Log verbosity"
/**
* struct firmware_config - Configuration item within the MCU firmware
*
@ -125,7 +129,7 @@ static ssize_t store_fw_cfg(struct kobject *kobj,
if (attr == &fw_cfg_attr_cur) {
unsigned long flags;
u32 val;
u32 val, cur_val;
int ret = kstrtouint(buf, 0, &val);
if (ret) {
@ -140,7 +144,9 @@ static ssize_t store_fw_cfg(struct kobject *kobj,
return -EINVAL;
spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
if (config->cur_val == val) {
cur_val = config->cur_val;
if (cur_val == val) {
spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
return count;
}
@ -177,6 +183,20 @@ static ssize_t store_fw_cfg(struct kobject *kobj,
spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
/* Enable FW logging only if Log verbosity is non-zero */
if (!strcmp(config->name, CSF_FIRMWARE_CFG_LOG_VERBOSITY_ENTRY_NAME) &&
(!cur_val || !val)) {
ret = kbase_csf_firmware_log_toggle_logging_calls(kbdev, val);
if (ret) {
/* Undo FW configuration changes */
spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
config->cur_val = cur_val;
kbase_csf_update_firmware_memory(kbdev, config->address, cur_val);
spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
return ret;
}
}
/* If we can update the config without firmware reset then
* we need to just trigger FIRMWARE_CONFIG_UPDATE.
*/

View File

@ -0,0 +1,451 @@
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
/*
*
* (C) COPYRIGHT 2022 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
* Foundation, and any use by you of this program is subject to the terms
* of such GNU license.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, you can access it online at
* http://www.gnu.org/licenses/gpl-2.0.html.
*
*/
#include <mali_kbase.h>
#include "backend/gpu/mali_kbase_pm_internal.h"
#include <csf/mali_kbase_csf_firmware_log.h>
#include <csf/mali_kbase_csf_trace_buffer.h>
#include <linux/debugfs.h>
#include <linux/string.h>
#include <linux/workqueue.h>
/*
* ARMv7 instruction: Branch with Link calls a subroutine at a PC-relative address.
*/
#define ARMV7_T1_BL_IMM_INSTR 0xd800f000
/*
* ARMv7 instruction: Branch with Link calls a subroutine at a PC-relative address, maximum
* negative jump offset.
*/
#define ARMV7_T1_BL_IMM_RANGE_MIN -16777216
/*
* ARMv7 instruction: Branch with Link calls a subroutine at a PC-relative address, maximum
* positive jump offset.
*/
#define ARMV7_T1_BL_IMM_RANGE_MAX 16777214
/*
* ARMv7 instruction: Double NOP instructions.
*/
#define ARMV7_DOUBLE_NOP_INSTR 0xbf00bf00
#if defined(CONFIG_DEBUG_FS)
static int kbase_csf_firmware_log_enable_mask_read(void *data, u64 *val)
{
struct kbase_device *kbdev = (struct kbase_device *)data;
struct firmware_trace_buffer *tb =
kbase_csf_firmware_get_trace_buffer(kbdev, FIRMWARE_LOG_BUF_NAME);
if (tb == NULL) {
dev_err(kbdev->dev, "Couldn't get the firmware trace buffer");
return -EIO;
}
/* The enabled traces limited to u64 here, regarded practical */
*val = kbase_csf_firmware_trace_buffer_get_active_mask64(tb);
return 0;
}
static int kbase_csf_firmware_log_enable_mask_write(void *data, u64 val)
{
struct kbase_device *kbdev = (struct kbase_device *)data;
struct firmware_trace_buffer *tb =
kbase_csf_firmware_get_trace_buffer(kbdev, FIRMWARE_LOG_BUF_NAME);
u64 new_mask;
unsigned int enable_bits_count;
if (tb == NULL) {
dev_err(kbdev->dev, "Couldn't get the firmware trace buffer");
return -EIO;
}
/* Ignore unsupported types */
enable_bits_count = kbase_csf_firmware_trace_buffer_get_trace_enable_bits_count(tb);
if (enable_bits_count > 64) {
dev_dbg(kbdev->dev, "Limit enabled bits count from %u to 64", enable_bits_count);
enable_bits_count = 64;
}
new_mask = val & ((1 << enable_bits_count) - 1);
if (new_mask != kbase_csf_firmware_trace_buffer_get_active_mask64(tb))
return kbase_csf_firmware_trace_buffer_set_active_mask64(tb, new_mask);
else
return 0;
}
static int kbasep_csf_firmware_log_debugfs_open(struct inode *in, struct file *file)
{
struct kbase_device *kbdev = in->i_private;
file->private_data = kbdev;
dev_dbg(kbdev->dev, "Opened firmware trace buffer dump debugfs file");
return 0;
}
static ssize_t kbasep_csf_firmware_log_debugfs_read(struct file *file, char __user *buf,
size_t size, loff_t *ppos)
{
struct kbase_device *kbdev = file->private_data;
struct kbase_csf_firmware_log *fw_log = &kbdev->csf.fw_log;
unsigned int n_read;
unsigned long not_copied;
/* Limit reads to the kernel dump buffer size */
size_t mem = MIN(size, FIRMWARE_LOG_DUMP_BUF_SIZE);
int ret;
struct firmware_trace_buffer *tb =
kbase_csf_firmware_get_trace_buffer(kbdev, FIRMWARE_LOG_BUF_NAME);
if (tb == NULL) {
dev_err(kbdev->dev, "Couldn't get the firmware trace buffer");
return -EIO;
}
if (atomic_cmpxchg(&fw_log->busy, 0, 1) != 0)
return -EBUSY;
/* Reading from userspace is only allowed in manual mode */
if (fw_log->mode != KBASE_CSF_FIRMWARE_LOG_MODE_MANUAL) {
ret = -EINVAL;
goto out;
}
n_read = kbase_csf_firmware_trace_buffer_read_data(tb, fw_log->dump_buf, mem);
/* Do the copy, if we have obtained some trace data */
not_copied = (n_read) ? copy_to_user(buf, fw_log->dump_buf, n_read) : 0;
if (not_copied) {
dev_err(kbdev->dev, "Couldn't copy trace buffer data to user space buffer");
ret = -EFAULT;
goto out;
}
*ppos += n_read;
ret = n_read;
out:
atomic_set(&fw_log->busy, 0);
return ret;
}
static int kbase_csf_firmware_log_mode_read(void *data, u64 *val)
{
struct kbase_device *kbdev = (struct kbase_device *)data;
struct kbase_csf_firmware_log *fw_log = &kbdev->csf.fw_log;
*val = fw_log->mode;
return 0;
}
static int kbase_csf_firmware_log_mode_write(void *data, u64 val)
{
struct kbase_device *kbdev = (struct kbase_device *)data;
struct kbase_csf_firmware_log *fw_log = &kbdev->csf.fw_log;
int ret = 0;
if (atomic_cmpxchg(&fw_log->busy, 0, 1) != 0)
return -EBUSY;
if (val == fw_log->mode)
goto out;
switch (val) {
case KBASE_CSF_FIRMWARE_LOG_MODE_MANUAL:
cancel_delayed_work_sync(&fw_log->poll_work);
break;
case KBASE_CSF_FIRMWARE_LOG_MODE_AUTO_PRINT:
schedule_delayed_work(&fw_log->poll_work,
msecs_to_jiffies(KBASE_CSF_FIRMWARE_LOG_POLL_PERIOD_MS));
break;
default:
ret = -EINVAL;
goto out;
}
fw_log->mode = val;
out:
atomic_set(&fw_log->busy, 0);
return ret;
}
DEFINE_DEBUGFS_ATTRIBUTE(kbase_csf_firmware_log_enable_mask_fops,
kbase_csf_firmware_log_enable_mask_read,
kbase_csf_firmware_log_enable_mask_write, "%llx\n");
static const struct file_operations kbasep_csf_firmware_log_debugfs_fops = {
.owner = THIS_MODULE,
.open = kbasep_csf_firmware_log_debugfs_open,
.read = kbasep_csf_firmware_log_debugfs_read,
.llseek = no_llseek,
};
DEFINE_DEBUGFS_ATTRIBUTE(kbase_csf_firmware_log_mode_fops, kbase_csf_firmware_log_mode_read,
kbase_csf_firmware_log_mode_write, "%llu\n");
#endif /* CONFIG_DEBUG_FS */
static void kbase_csf_firmware_log_poll(struct work_struct *work)
{
struct kbase_device *kbdev =
container_of(work, struct kbase_device, csf.fw_log.poll_work.work);
struct kbase_csf_firmware_log *fw_log = &kbdev->csf.fw_log;
schedule_delayed_work(&fw_log->poll_work,
msecs_to_jiffies(KBASE_CSF_FIRMWARE_LOG_POLL_PERIOD_MS));
kbase_csf_firmware_log_dump_buffer(kbdev);
}
int kbase_csf_firmware_log_init(struct kbase_device *kbdev)
{
struct kbase_csf_firmware_log *fw_log = &kbdev->csf.fw_log;
/* Add one byte for null-termination */
fw_log->dump_buf = kmalloc(FIRMWARE_LOG_DUMP_BUF_SIZE + 1, GFP_KERNEL);
if (fw_log->dump_buf == NULL)
return -ENOMEM;
/* Ensure null-termination for all strings */
fw_log->dump_buf[FIRMWARE_LOG_DUMP_BUF_SIZE] = 0;
fw_log->mode = KBASE_CSF_FIRMWARE_LOG_MODE_MANUAL;
atomic_set(&fw_log->busy, 0);
INIT_DEFERRABLE_WORK(&fw_log->poll_work, kbase_csf_firmware_log_poll);
#if defined(CONFIG_DEBUG_FS)
debugfs_create_file("fw_trace_enable_mask", 0644, kbdev->mali_debugfs_directory, kbdev,
&kbase_csf_firmware_log_enable_mask_fops);
debugfs_create_file("fw_traces", 0444, kbdev->mali_debugfs_directory, kbdev,
&kbasep_csf_firmware_log_debugfs_fops);
debugfs_create_file("fw_trace_mode", 0644, kbdev->mali_debugfs_directory, kbdev,
&kbase_csf_firmware_log_mode_fops);
#endif /* CONFIG_DEBUG_FS */
return 0;
}
void kbase_csf_firmware_log_term(struct kbase_device *kbdev)
{
struct kbase_csf_firmware_log *fw_log = &kbdev->csf.fw_log;
if (fw_log->dump_buf) {
cancel_delayed_work_sync(&fw_log->poll_work);
kfree(fw_log->dump_buf);
fw_log->dump_buf = NULL;
}
}
void kbase_csf_firmware_log_dump_buffer(struct kbase_device *kbdev)
{
struct kbase_csf_firmware_log *fw_log = &kbdev->csf.fw_log;
u8 *buf = fw_log->dump_buf, *p, *pnewline, *pend, *pendbuf;
unsigned int read_size, remaining_size;
struct firmware_trace_buffer *tb =
kbase_csf_firmware_get_trace_buffer(kbdev, FIRMWARE_LOG_BUF_NAME);
if (tb == NULL) {
dev_dbg(kbdev->dev, "Can't get the trace buffer, firmware trace dump skipped");
return;
}
if (atomic_cmpxchg(&fw_log->busy, 0, 1) != 0)
return;
/* FW should only print complete messages, so there's no need to handle
* partial messages over multiple invocations of this function
*/
p = buf;
pendbuf = &buf[FIRMWARE_LOG_DUMP_BUF_SIZE];
while ((read_size = kbase_csf_firmware_trace_buffer_read_data(tb, p, pendbuf - p))) {
pend = p + read_size;
p = buf;
while (p < pend && (pnewline = memchr(p, '\n', pend - p))) {
/* Null-terminate the string */
*pnewline = 0;
dev_err(kbdev->dev, "FW> %s", p);
p = pnewline + 1;
}
remaining_size = pend - p;
if (!remaining_size) {
p = buf;
} else if (remaining_size < FIRMWARE_LOG_DUMP_BUF_SIZE) {
/* Copy unfinished string to the start of the buffer */
memmove(buf, p, remaining_size);
p = &buf[remaining_size];
} else {
/* Print abnormally long string without newlines */
dev_err(kbdev->dev, "FW> %s", buf);
p = buf;
}
}
if (p != buf) {
/* Null-terminate and print last unfinished string */
*p = 0;
dev_err(kbdev->dev, "FW> %s", buf);
}
atomic_set(&fw_log->busy, 0);
}
void kbase_csf_firmware_log_parse_logging_call_list_entry(struct kbase_device *kbdev,
const uint32_t *entry)
{
kbdev->csf.fw_log.func_call_list_va_start = entry[0];
kbdev->csf.fw_log.func_call_list_va_end = entry[1];
}
/**
* toggle_logging_calls_in_loaded_image - Toggles FW log func calls in loaded FW image.
*
* @kbdev: Instance of a GPU platform device that implements a CSF interface.
* @enable: Whether to enable or disable the function calls.
*/
static void toggle_logging_calls_in_loaded_image(struct kbase_device *kbdev, bool enable)
{
uint32_t bl_instruction, diff;
uint32_t imm11, imm10, i1, i2, j1, j2, sign;
uint32_t calling_address = 0, callee_address = 0;
uint32_t list_entry = kbdev->csf.fw_log.func_call_list_va_start;
const uint32_t list_va_end = kbdev->csf.fw_log.func_call_list_va_end;
if (list_entry == 0 || list_va_end == 0)
return;
if (enable) {
for (; list_entry < list_va_end; list_entry += 2 * sizeof(uint32_t)) {
/* Read calling address */
kbase_csf_read_firmware_memory(kbdev, list_entry, &calling_address);
/* Read callee address */
kbase_csf_read_firmware_memory(kbdev, list_entry + sizeof(uint32_t),
&callee_address);
diff = callee_address - calling_address - 4;
sign = !!(diff & 0x80000000);
if (ARMV7_T1_BL_IMM_RANGE_MIN > (int32_t)diff &&
ARMV7_T1_BL_IMM_RANGE_MAX < (int32_t)diff) {
dev_warn(kbdev->dev, "FW log patch 0x%x out of range, skipping",
calling_address);
continue;
}
i1 = (diff & 0x00800000) >> 23;
j1 = !i1 ^ sign;
i2 = (diff & 0x00400000) >> 22;
j2 = !i2 ^ sign;
imm11 = (diff & 0xffe) >> 1;
imm10 = (diff & 0x3ff000) >> 12;
/* Compose BL instruction */
bl_instruction = ARMV7_T1_BL_IMM_INSTR;
bl_instruction |= j1 << 29;
bl_instruction |= j2 << 27;
bl_instruction |= imm11 << 16;
bl_instruction |= sign << 10;
bl_instruction |= imm10;
/* Patch logging func calls in their load location */
dev_dbg(kbdev->dev, "FW log patch 0x%x: 0x%x\n", calling_address,
bl_instruction);
kbase_csf_update_firmware_memory_exe(kbdev, calling_address,
bl_instruction);
}
} else {
for (; list_entry < list_va_end; list_entry += 2 * sizeof(uint32_t)) {
/* Read calling address */
kbase_csf_read_firmware_memory(kbdev, list_entry, &calling_address);
/* Overwrite logging func calls with 2 NOP instructions */
kbase_csf_update_firmware_memory_exe(kbdev, calling_address,
ARMV7_DOUBLE_NOP_INSTR);
}
}
}
int kbase_csf_firmware_log_toggle_logging_calls(struct kbase_device *kbdev, u32 val)
{
unsigned long flags;
struct kbase_csf_firmware_log *fw_log = &kbdev->csf.fw_log;
bool mcu_inactive;
bool resume_needed = false;
int ret = 0;
struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
if (atomic_cmpxchg(&fw_log->busy, 0, 1) != 0)
return -EBUSY;
/* Suspend all the active CS groups */
dev_dbg(kbdev->dev, "Suspend all the active CS groups");
kbase_csf_scheduler_lock(kbdev);
while (scheduler->state != SCHED_SUSPENDED) {
kbase_csf_scheduler_unlock(kbdev);
kbase_csf_scheduler_pm_suspend(kbdev);
kbase_csf_scheduler_lock(kbdev);
resume_needed = true;
}
/* Wait for the MCU to get disabled */
dev_info(kbdev->dev, "Wait for the MCU to get disabled");
ret = kbase_pm_wait_for_desired_state(kbdev);
if (ret) {
dev_err(kbdev->dev,
"wait for PM state failed when toggling FW logging calls");
ret = -EAGAIN;
goto out;
}
spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
mcu_inactive =
kbase_pm_is_mcu_inactive(kbdev, kbdev->pm.backend.mcu_state);
spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
if (!mcu_inactive) {
dev_err(kbdev->dev,
"MCU not inactive after PM state wait when toggling FW logging calls");
ret = -EAGAIN;
goto out;
}
/* Toggle FW logging call in the loaded FW image */
toggle_logging_calls_in_loaded_image(kbdev, val);
dev_dbg(kbdev->dev, "FW logging: %s", val ? "enabled" : "disabled");
out:
kbase_csf_scheduler_unlock(kbdev);
if (resume_needed)
/* Resume queue groups and start mcu */
kbase_csf_scheduler_pm_resume(kbdev);
atomic_set(&fw_log->busy, 0);
return ret;
}

View File

@ -0,0 +1,74 @@
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
/*
*
* (C) COPYRIGHT 2022 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
* Foundation, and any use by you of this program is subject to the terms
* of such GNU license.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, you can access it online at
* http://www.gnu.org/licenses/gpl-2.0.html.
*
*/
#ifndef _KBASE_CSF_FIRMWARE_LOG_H_
#define _KBASE_CSF_FIRMWARE_LOG_H_
#include <mali_kbase.h>
/*
* Firmware log dumping buffer size.
*/
#define FIRMWARE_LOG_DUMP_BUF_SIZE PAGE_SIZE
/**
* kbase_csf_firmware_log_init - Initialize firmware log handling.
*
* @kbdev: Pointer to the Kbase device
*
* Return: The initialization error code.
*/
int kbase_csf_firmware_log_init(struct kbase_device *kbdev);
/**
* kbase_csf_firmware_log_term - Terminate firmware log handling.
*
* @kbdev: Pointer to the Kbase device
*/
void kbase_csf_firmware_log_term(struct kbase_device *kbdev);
/**
* kbase_csf_firmware_log_dump_buffer - Read remaining data in the firmware log
* buffer and print it to dmesg.
*
* @kbdev: Pointer to the Kbase device
*/
void kbase_csf_firmware_log_dump_buffer(struct kbase_device *kbdev);
/**
* kbase_csf_firmware_log_parse_logging_call_list_entry - Parse FW logging function call list entry.
*
* @kbdev: Instance of a GPU platform device that implements a CSF interface.
* @entry: Pointer to section.
*/
void kbase_csf_firmware_log_parse_logging_call_list_entry(struct kbase_device *kbdev,
const uint32_t *entry);
/**
* kbase_csf_firmware_log_toggle_logging_calls - Enables/Disables FW logging function calls.
*
* @kbdev: Instance of a GPU platform device that implements a CSF interface.
* @val: Configuration option value.
*
* Return: 0 if successful, negative error code on failure
*/
int kbase_csf_firmware_log_toggle_logging_calls(struct kbase_device *kbdev, u32 val);
#endif /* _KBASE_CSF_FIRMWARE_LOG_H_ */

View File

@ -273,6 +273,18 @@ void kbase_csf_update_firmware_memory(struct kbase_device *kbdev,
/* NO_MALI: Nothing to do here */
}
void kbase_csf_read_firmware_memory_exe(struct kbase_device *kbdev,
u32 gpu_addr, u32 *value)
{
/* NO_MALI: Nothing to do here */
}
void kbase_csf_update_firmware_memory_exe(struct kbase_device *kbdev,
u32 gpu_addr, u32 value)
{
/* NO_MALI: Nothing to do here */
}
void kbase_csf_firmware_cs_input(
const struct kbase_csf_cmd_stream_info *const info, const u32 offset,
const u32 value)
@ -971,7 +983,6 @@ u32 kbase_csf_firmware_set_gpu_idle_hysteresis_time(struct kbase_device *kbdev,
static u32 convert_dur_to_core_pwroff_count(struct kbase_device *kbdev, const u32 dur_us)
{
#define PWROFF_VAL_UNIT_SHIFT (10)
/* Get the cntfreq_el0 value, which drives the SYSTEM_TIMESTAMP */
u64 freq = arch_timer_get_cntfrq();
u64 dur_val = dur_us;
@ -1046,16 +1057,6 @@ int kbase_csf_firmware_early_init(struct kbase_device *kbdev)
kbdev->csf.fw_timeout_ms =
kbase_get_timeout_ms(kbdev, CSF_FIRMWARE_TIMEOUT);
kbdev->csf.gpu_idle_hysteresis_ms = FIRMWARE_IDLE_HYSTERESIS_TIME_MS;
#ifdef KBASE_PM_RUNTIME
if (kbase_pm_gpu_sleep_allowed(kbdev))
kbdev->csf.gpu_idle_hysteresis_ms /=
FIRMWARE_IDLE_HYSTERESIS_GPU_SLEEP_SCALER;
#endif
WARN_ON(!kbdev->csf.gpu_idle_hysteresis_ms);
kbdev->csf.gpu_idle_dur_count = convert_dur_to_idle_count(
kbdev, kbdev->csf.gpu_idle_hysteresis_ms);
INIT_LIST_HEAD(&kbdev->csf.firmware_interfaces);
INIT_LIST_HEAD(&kbdev->csf.firmware_config);
INIT_LIST_HEAD(&kbdev->csf.firmware_trace_buffers.list);
@ -1068,7 +1069,26 @@ int kbase_csf_firmware_early_init(struct kbase_device *kbdev)
return 0;
}
int kbase_csf_firmware_init(struct kbase_device *kbdev)
void kbase_csf_firmware_early_term(struct kbase_device *kbdev)
{
mutex_destroy(&kbdev->csf.reg_lock);
}
int kbase_csf_firmware_late_init(struct kbase_device *kbdev)
{
kbdev->csf.gpu_idle_hysteresis_ms = FIRMWARE_IDLE_HYSTERESIS_TIME_MS;
#ifdef KBASE_PM_RUNTIME
if (kbase_pm_gpu_sleep_allowed(kbdev))
kbdev->csf.gpu_idle_hysteresis_ms /= FIRMWARE_IDLE_HYSTERESIS_GPU_SLEEP_SCALER;
#endif
WARN_ON(!kbdev->csf.gpu_idle_hysteresis_ms);
kbdev->csf.gpu_idle_dur_count =
convert_dur_to_idle_count(kbdev, kbdev->csf.gpu_idle_hysteresis_ms);
return 0;
}
int kbase_csf_firmware_load_init(struct kbase_device *kbdev)
{
int ret;
@ -1134,11 +1154,11 @@ int kbase_csf_firmware_init(struct kbase_device *kbdev)
return 0;
error:
kbase_csf_firmware_term(kbdev);
kbase_csf_firmware_unload_term(kbdev);
return ret;
}
void kbase_csf_firmware_term(struct kbase_device *kbdev)
void kbase_csf_firmware_unload_term(struct kbase_device *kbdev)
{
cancel_work_sync(&kbdev->csf.fw_error_work);
@ -1173,8 +1193,6 @@ void kbase_csf_firmware_term(struct kbase_device *kbdev)
/* NO_MALI: No trace buffers to terminate */
mutex_destroy(&kbdev->csf.reg_lock);
/* This will also free up the region allocated for the shared interface
* entry parsed from the firmware image.
*/
@ -1227,8 +1245,9 @@ void kbase_csf_firmware_ping(struct kbase_device *const kbdev)
kbase_csf_scheduler_spin_unlock(kbdev, flags);
}
int kbase_csf_firmware_ping_wait(struct kbase_device *const kbdev)
int kbase_csf_firmware_ping_wait(struct kbase_device *const kbdev, unsigned int wait_timeout_ms)
{
CSTD_UNUSED(wait_timeout_ms);
kbase_csf_firmware_ping(kbdev);
return wait_for_global_request(kbdev, GLB_REQ_PING_MASK);
}
@ -1267,7 +1286,7 @@ void kbase_csf_enter_protected_mode(struct kbase_device *kbdev)
kbase_csf_ring_doorbell(kbdev, CSF_KERNEL_DOORBELL_NR);
}
void kbase_csf_wait_protected_mode_enter(struct kbase_device *kbdev)
int kbase_csf_wait_protected_mode_enter(struct kbase_device *kbdev)
{
int err = wait_for_global_request(kbdev, GLB_REQ_PROTM_ENTER_MASK);
@ -1275,6 +1294,8 @@ void kbase_csf_wait_protected_mode_enter(struct kbase_device *kbdev)
if (kbase_prepare_to_reset_gpu(kbdev, RESET_FLAGS_NONE))
kbase_reset_gpu(kbdev);
}
return err;
}
void kbase_csf_firmware_trigger_mcu_halt(struct kbase_device *kbdev)
@ -1483,9 +1504,8 @@ int kbase_csf_firmware_mcu_shared_mapping_init(
if (!page_list)
goto page_list_alloc_error;
ret = kbase_mem_pool_alloc_pages(
&kbdev->mem_pools.small[KBASE_MEM_GROUP_CSF_FW],
num_pages, phys, false);
ret = kbase_mem_pool_alloc_pages(&kbdev->mem_pools.small[KBASE_MEM_GROUP_CSF_FW], num_pages,
phys, false);
if (ret <= 0)
goto phys_mem_pool_alloc_error;

View File

@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
/*
*
* (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved.
* (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@ -154,8 +154,8 @@ u64 kbase_csf_heap_context_allocator_alloc(
struct kbase_csf_heap_context_allocator *const ctx_alloc)
{
struct kbase_context *const kctx = ctx_alloc->kctx;
u64 flags = BASE_MEM_PROT_GPU_RD | BASE_MEM_PROT_GPU_WR |
BASE_MEM_PROT_CPU_WR | BASEP_MEM_NO_USER_FREE;
u64 flags = BASE_MEM_PROT_GPU_RD | BASE_MEM_PROT_GPU_WR | BASE_MEM_PROT_CPU_WR |
BASEP_MEM_NO_USER_FREE | BASE_MEM_PROT_CPU_RD;
u64 nr_pages = PFN_UP(HEAP_CTX_REGION_SIZE);
u64 heap_gpu_va = 0;
@ -164,10 +164,6 @@ u64 kbase_csf_heap_context_allocator_alloc(
*/
const enum kbase_caller_mmu_sync_info mmu_sync_info = CALLER_MMU_ASYNC;
#ifdef CONFIG_MALI_VECTOR_DUMP
flags |= BASE_MEM_PROT_CPU_RD;
#endif
mutex_lock(&ctx_alloc->lock);
/* If the pool of heap contexts wasn't already allocated then

View File

@ -55,7 +55,7 @@ static int kbase_kcpu_map_import_prepare(
long i;
int ret = 0;
lockdep_assert_held(&kctx->csf.kcpu_queues.lock);
lockdep_assert_held(&kcpu_queue->lock);
/* Take the processes mmap lock */
down_read(kbase_mem_get_process_mmap_lock());
@ -114,7 +114,7 @@ static int kbase_kcpu_unmap_import_prepare_internal(
struct kbase_va_region *reg;
int ret = 0;
lockdep_assert_held(&kctx->csf.kcpu_queues.lock);
lockdep_assert_held(&kcpu_queue->lock);
kbase_gpu_vm_lock(kctx);
@ -182,7 +182,8 @@ static void kbase_jit_add_to_pending_alloc_list(
&kctx->csf.kcpu_queues.jit_blocked_queues;
struct kbase_kcpu_command_queue *blocked_queue;
lockdep_assert_held(&kctx->csf.kcpu_queues.lock);
lockdep_assert_held(&queue->lock);
lockdep_assert_held(&kctx->csf.kcpu_queues.jit_lock);
list_for_each_entry(blocked_queue,
&kctx->csf.kcpu_queues.jit_blocked_queues,
@ -227,25 +228,28 @@ static int kbase_kcpu_jit_allocate_process(
u32 i;
int ret;
lockdep_assert_held(&kctx->csf.kcpu_queues.lock);
if (alloc_info->blocked) {
list_del(&queue->jit_blocked);
alloc_info->blocked = false;
}
lockdep_assert_held(&queue->lock);
if (WARN_ON(!info))
return -EINVAL;
mutex_lock(&kctx->csf.kcpu_queues.jit_lock);
/* Check if all JIT IDs are not in use */
for (i = 0; i < count; i++, info++) {
/* The JIT ID is still in use so fail the allocation */
if (kctx->jit_alloc[info->id]) {
dev_dbg(kctx->kbdev->dev, "JIT ID still in use");
return -EINVAL;
ret = -EINVAL;
goto fail;
}
}
if (alloc_info->blocked) {
list_del(&queue->jit_blocked);
alloc_info->blocked = false;
}
/* Now start the allocation loop */
for (i = 0, info = alloc_info->info; i < count; i++, info++) {
/* Create a JIT allocation */
@ -280,7 +284,7 @@ static int kbase_kcpu_jit_allocate_process(
*/
dev_warn_ratelimited(kctx->kbdev->dev, "JIT alloc command failed: %pK\n", cmd);
ret = -ENOMEM;
goto fail;
goto fail_rollback;
}
/* There are pending frees for an active allocation
@ -298,7 +302,8 @@ static int kbase_kcpu_jit_allocate_process(
kctx->jit_alloc[info->id] = NULL;
}
return -EAGAIN;
ret = -EAGAIN;
goto fail;
}
/* Bind it to the user provided ID. */
@ -314,7 +319,7 @@ static int kbase_kcpu_jit_allocate_process(
KBASE_REG_CPU_WR, &mapping);
if (!ptr) {
ret = -ENOMEM;
goto fail;
goto fail_rollback;
}
reg = kctx->jit_alloc[info->id];
@ -323,9 +328,11 @@ static int kbase_kcpu_jit_allocate_process(
kbase_vunmap(kctx, &mapping);
}
mutex_unlock(&kctx->csf.kcpu_queues.jit_lock);
return 0;
fail:
fail_rollback:
/* Roll back completely */
for (i = 0, info = alloc_info->info; i < count; i++, info++) {
/* Free the allocations that were successful.
@ -338,6 +345,8 @@ static int kbase_kcpu_jit_allocate_process(
kctx->jit_alloc[info->id] = KBASE_RESERVED_REG_JIT_ALLOC;
}
fail:
mutex_unlock(&kctx->csf.kcpu_queues.jit_lock);
return ret;
}
@ -354,7 +363,7 @@ static int kbase_kcpu_jit_allocate_prepare(
int ret = 0;
u32 i;
lockdep_assert_held(&kctx->csf.kcpu_queues.lock);
lockdep_assert_held(&kcpu_queue->lock);
if (!data || count > kcpu_queue->kctx->jit_max_allocations ||
count > ARRAY_SIZE(kctx->jit_alloc)) {
@ -392,11 +401,13 @@ static int kbase_kcpu_jit_allocate_prepare(
}
current_command->type = BASE_KCPU_COMMAND_TYPE_JIT_ALLOC;
list_add_tail(&current_command->info.jit_alloc.node,
&kctx->csf.kcpu_queues.jit_cmds_head);
current_command->info.jit_alloc.info = info;
current_command->info.jit_alloc.count = count;
current_command->info.jit_alloc.blocked = false;
mutex_lock(&kctx->csf.kcpu_queues.jit_lock);
list_add_tail(&current_command->info.jit_alloc.node,
&kctx->csf.kcpu_queues.jit_cmds_head);
mutex_unlock(&kctx->csf.kcpu_queues.jit_lock);
return 0;
out_free:
@ -415,7 +426,9 @@ static void kbase_kcpu_jit_allocate_finish(
struct kbase_kcpu_command_queue *queue,
struct kbase_kcpu_command *cmd)
{
lockdep_assert_held(&queue->kctx->csf.kcpu_queues.lock);
lockdep_assert_held(&queue->lock);
mutex_lock(&queue->kctx->csf.kcpu_queues.jit_lock);
/* Remove this command from the jit_cmds_head list */
list_del(&cmd->info.jit_alloc.node);
@ -429,6 +442,8 @@ static void kbase_kcpu_jit_allocate_finish(
cmd->info.jit_alloc.blocked = false;
}
mutex_unlock(&queue->kctx->csf.kcpu_queues.jit_lock);
kfree(cmd->info.jit_alloc.info);
}
@ -441,18 +456,17 @@ static void kbase_kcpu_jit_retry_pending_allocs(struct kbase_context *kctx)
{
struct kbase_kcpu_command_queue *blocked_queue;
lockdep_assert_held(&kctx->csf.kcpu_queues.lock);
lockdep_assert_held(&kctx->csf.kcpu_queues.jit_lock);
/*
* Reschedule all queues blocked by JIT_ALLOC commands.
* NOTE: This code traverses the list of blocked queues directly. It
* only works as long as the queued works are not executed at the same
* time. This precondition is true since we're holding the
* kbase_csf_kcpu_queue_context.lock .
* kbase_csf_kcpu_queue_context.jit_lock .
*/
list_for_each_entry(blocked_queue,
&kctx->csf.kcpu_queues.jit_blocked_queues, jit_blocked)
queue_work(kctx->csf.kcpu_queues.wq, &blocked_queue->work);
list_for_each_entry(blocked_queue, &kctx->csf.kcpu_queues.jit_blocked_queues, jit_blocked)
queue_work(blocked_queue->wq, &blocked_queue->work);
}
static int kbase_kcpu_jit_free_process(struct kbase_kcpu_command_queue *queue,
@ -469,7 +483,8 @@ static int kbase_kcpu_jit_free_process(struct kbase_kcpu_command_queue *queue,
if (WARN_ON(!ids))
return -EINVAL;
lockdep_assert_held(&kctx->csf.kcpu_queues.lock);
lockdep_assert_held(&queue->lock);
mutex_lock(&kctx->csf.kcpu_queues.jit_lock);
KBASE_TLSTREAM_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_EXECUTE_JIT_FREE_END(queue->kctx->kbdev,
queue);
@ -501,9 +516,6 @@ static int kbase_kcpu_jit_free_process(struct kbase_kcpu_command_queue *queue,
queue->kctx->kbdev, queue, item_err, pages_used);
}
/* Free the list of ids */
kfree(ids);
/*
* Remove this command from the jit_cmds_head list and retry pending
* allocations.
@ -511,6 +523,11 @@ static int kbase_kcpu_jit_free_process(struct kbase_kcpu_command_queue *queue,
list_del(&cmd->info.jit_free.node);
kbase_kcpu_jit_retry_pending_allocs(kctx);
mutex_unlock(&kctx->csf.kcpu_queues.jit_lock);
/* Free the list of ids */
kfree(ids);
return rc;
}
@ -526,7 +543,7 @@ static int kbase_kcpu_jit_free_prepare(
int ret;
u32 i;
lockdep_assert_held(&kctx->csf.kcpu_queues.lock);
lockdep_assert_held(&kcpu_queue->lock);
/* Sanity checks */
if (!count || count > ARRAY_SIZE(kctx->jit_alloc)) {
@ -572,10 +589,12 @@ static int kbase_kcpu_jit_free_prepare(
}
current_command->type = BASE_KCPU_COMMAND_TYPE_JIT_FREE;
list_add_tail(&current_command->info.jit_free.node,
&kctx->csf.kcpu_queues.jit_cmds_head);
current_command->info.jit_free.ids = ids;
current_command->info.jit_free.count = count;
mutex_lock(&kctx->csf.kcpu_queues.jit_lock);
list_add_tail(&current_command->info.jit_free.node,
&kctx->csf.kcpu_queues.jit_cmds_head);
mutex_unlock(&kctx->csf.kcpu_queues.jit_lock);
return 0;
out_free:
@ -601,7 +620,7 @@ static int kbase_csf_queue_group_suspend_prepare(
int pinned_pages = 0, ret = 0;
struct kbase_va_region *reg;
lockdep_assert_held(&kctx->csf.kcpu_queues.lock);
lockdep_assert_held(&kcpu_queue->lock);
if (suspend_buf->size < csg_suspend_buf_size)
return -EINVAL;
@ -652,9 +671,12 @@ static int kbase_csf_queue_group_suspend_prepare(
u64 start, end, i;
if (((reg->flags & KBASE_REG_ZONE_MASK) != KBASE_REG_ZONE_SAME_VA) ||
reg->nr_pages < nr_pages ||
kbase_reg_current_backed_size(reg) !=
reg->nr_pages) {
(kbase_reg_current_backed_size(reg) < nr_pages) ||
!(reg->flags & KBASE_REG_CPU_WR) ||
(reg->gpu_alloc->type != KBASE_MEM_TYPE_NATIVE) ||
(reg->flags & KBASE_REG_DONT_NEED) ||
(reg->flags & KBASE_REG_ACTIVE_JIT_ALLOC) ||
(reg->flags & KBASE_REG_NO_USER_FREE)) {
ret = -EINVAL;
goto out_clean_pages;
}
@ -703,9 +725,8 @@ static enum kbase_csf_event_callback_action event_cqs_callback(void *param)
{
struct kbase_kcpu_command_queue *kcpu_queue =
(struct kbase_kcpu_command_queue *)param;
struct kbase_context *const kctx = kcpu_queue->kctx;
queue_work(kctx->csf.kcpu_queues.wq, &kcpu_queue->work);
queue_work(kcpu_queue->wq, &kcpu_queue->work);
return KBASE_CSF_EVENT_CALLBACK_KEEP;
}
@ -735,7 +756,7 @@ static int kbase_kcpu_cqs_wait_process(struct kbase_device *kbdev,
{
u32 i;
lockdep_assert_held(&queue->kctx->csf.kcpu_queues.lock);
lockdep_assert_held(&queue->lock);
if (WARN_ON(!cqs_wait->objs))
return -EINVAL;
@ -803,7 +824,7 @@ static int kbase_kcpu_cqs_wait_prepare(struct kbase_kcpu_command_queue *queue,
struct base_cqs_wait_info *objs;
unsigned int nr_objs = cqs_wait_info->nr_objs;
lockdep_assert_held(&queue->kctx->csf.kcpu_queues.lock);
lockdep_assert_held(&queue->lock);
if (nr_objs > BASEP_KCPU_CQS_MAX_NUM_OBJS)
return -EINVAL;
@ -857,7 +878,7 @@ static void kbase_kcpu_cqs_set_process(struct kbase_device *kbdev,
{
unsigned int i;
lockdep_assert_held(&queue->kctx->csf.kcpu_queues.lock);
lockdep_assert_held(&queue->lock);
if (WARN_ON(!cqs_set->objs))
return;
@ -898,11 +919,10 @@ static int kbase_kcpu_cqs_set_prepare(
struct base_kcpu_command_cqs_set_info *cqs_set_info,
struct kbase_kcpu_command *current_command)
{
struct kbase_context *const kctx = kcpu_queue->kctx;
struct base_cqs_set *objs;
unsigned int nr_objs = cqs_set_info->nr_objs;
lockdep_assert_held(&kctx->csf.kcpu_queues.lock);
lockdep_assert_held(&kcpu_queue->lock);
if (nr_objs > BASEP_KCPU_CQS_MAX_NUM_OBJS)
return -EINVAL;
@ -952,7 +972,7 @@ static int kbase_kcpu_cqs_wait_operation_process(struct kbase_device *kbdev,
{
u32 i;
lockdep_assert_held(&queue->kctx->csf.kcpu_queues.lock);
lockdep_assert_held(&queue->lock);
if (WARN_ON(!cqs_wait_operation->objs))
return -EINVAL;
@ -1039,7 +1059,7 @@ static int kbase_kcpu_cqs_wait_operation_prepare(struct kbase_kcpu_command_queue
struct base_cqs_wait_operation_info *objs;
unsigned int nr_objs = cqs_wait_operation_info->nr_objs;
lockdep_assert_held(&queue->kctx->csf.kcpu_queues.lock);
lockdep_assert_held(&queue->lock);
if (nr_objs > BASEP_KCPU_CQS_MAX_NUM_OBJS)
return -EINVAL;
@ -1094,7 +1114,7 @@ static void kbase_kcpu_cqs_set_operation_process(
{
unsigned int i;
lockdep_assert_held(&queue->kctx->csf.kcpu_queues.lock);
lockdep_assert_held(&queue->lock);
if (WARN_ON(!cqs_set_operation->objs))
return;
@ -1161,11 +1181,10 @@ static int kbase_kcpu_cqs_set_operation_prepare(
struct base_kcpu_command_cqs_set_operation_info *cqs_set_operation_info,
struct kbase_kcpu_command *current_command)
{
struct kbase_context *const kctx = kcpu_queue->kctx;
struct base_cqs_set_operation_info *objs;
unsigned int nr_objs = cqs_set_operation_info->nr_objs;
lockdep_assert_held(&kctx->csf.kcpu_queues.lock);
lockdep_assert_held(&kcpu_queue->lock);
if (nr_objs > BASEP_KCPU_CQS_MAX_NUM_OBJS)
return -EINVAL;
@ -1212,7 +1231,7 @@ static void kbase_csf_fence_wait_callback(struct dma_fence *fence,
fence->context, fence->seqno);
/* Resume kcpu command queue processing. */
queue_work(kctx->csf.kcpu_queues.wq, &kcpu_queue->work);
queue_work(kcpu_queue->wq, &kcpu_queue->work);
}
static void kbase_kcpu_fence_wait_cancel(
@ -1221,7 +1240,7 @@ static void kbase_kcpu_fence_wait_cancel(
{
struct kbase_context *const kctx = kcpu_queue->kctx;
lockdep_assert_held(&kctx->csf.kcpu_queues.lock);
lockdep_assert_held(&kcpu_queue->lock);
if (WARN_ON(!fence_info->fence))
return;
@ -1293,7 +1312,7 @@ static void fence_timeout_callback(struct timer_list *timer)
kbase_sync_fence_info_get(fence, &info);
if (info.status == 1) {
queue_work(kctx->csf.kcpu_queues.wq, &kcpu_queue->work);
queue_work(kcpu_queue->wq, &kcpu_queue->work);
} else if (info.status == 0) {
dev_warn(kctx->kbdev->dev, "fence has not yet signalled in %ums",
FENCE_WAIT_TIMEOUT_MS);
@ -1345,7 +1364,7 @@ static int kbase_kcpu_fence_wait_process(
#endif
struct kbase_context *const kctx = kcpu_queue->kctx;
lockdep_assert_held(&kctx->csf.kcpu_queues.lock);
lockdep_assert_held(&kcpu_queue->lock);
if (WARN_ON(!fence_info->fence))
return -EINVAL;
@ -1401,7 +1420,6 @@ static int kbase_kcpu_fence_wait_prepare(
struct base_kcpu_command_fence_info *fence_info,
struct kbase_kcpu_command *current_command)
{
struct kbase_context *const kctx = kcpu_queue->kctx;
#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE)
struct fence *fence_in;
#else
@ -1409,7 +1427,7 @@ static int kbase_kcpu_fence_wait_prepare(
#endif
struct base_fence fence;
lockdep_assert_held(&kctx->csf.kcpu_queues.lock);
lockdep_assert_held(&kcpu_queue->lock);
if (copy_from_user(&fence, u64_to_user_ptr(fence_info->fence),
sizeof(fence)))
@ -1460,7 +1478,6 @@ static int kbase_kcpu_fence_signal_prepare(
struct base_kcpu_command_fence_info *fence_info,
struct kbase_kcpu_command *current_command)
{
struct kbase_context *const kctx = kcpu_queue->kctx;
#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE)
struct fence *fence_out;
#else
@ -1471,7 +1488,7 @@ static int kbase_kcpu_fence_signal_prepare(
int ret = 0;
int fd;
lockdep_assert_held(&kctx->csf.kcpu_queues.lock);
lockdep_assert_held(&kcpu_queue->lock);
if (copy_from_user(&fence, u64_to_user_ptr(fence_info->fence),
sizeof(fence)))
@ -1549,11 +1566,9 @@ static void kcpu_queue_process_worker(struct work_struct *data)
struct kbase_kcpu_command_queue *queue = container_of(data,
struct kbase_kcpu_command_queue, work);
mutex_lock(&queue->kctx->csf.kcpu_queues.lock);
mutex_lock(&queue->lock);
kcpu_queue_process(queue, false);
mutex_unlock(&queue->kctx->csf.kcpu_queues.lock);
mutex_unlock(&queue->lock);
}
static int delete_queue(struct kbase_context *kctx, u32 id)
@ -1569,6 +1584,17 @@ static int delete_queue(struct kbase_context *kctx, u32 id)
KBASE_KTRACE_ADD_CSF_KCPU(kctx->kbdev, KCPU_QUEUE_DELETE,
queue, queue->num_pending_cmds, queue->cqs_wait_count);
/* Disassociate the queue from the system to prevent further
* submissions. Draining pending commands would be acceptable
* even if a new queue is created using the same ID.
*/
kctx->csf.kcpu_queues.array[id] = NULL;
bitmap_clear(kctx->csf.kcpu_queues.in_use, id, 1);
mutex_unlock(&kctx->csf.kcpu_queues.lock);
mutex_lock(&queue->lock);
/* Drain the remaining work for this queue first and go past
* all the waits.
*/
@ -1580,17 +1606,17 @@ static int delete_queue(struct kbase_context *kctx, u32 id)
/* All CQS wait commands should have been cleaned up */
WARN_ON(queue->cqs_wait_count);
kctx->csf.kcpu_queues.array[id] = NULL;
bitmap_clear(kctx->csf.kcpu_queues.in_use, id, 1);
/* Fire the tracepoint with the mutex held to enforce correct
* ordering with the summary stream.
*/
KBASE_TLSTREAM_TL_KBASE_DEL_KCPUQUEUE(kctx->kbdev, queue);
mutex_unlock(&kctx->csf.kcpu_queues.lock);
mutex_unlock(&queue->lock);
cancel_work_sync(&queue->work);
destroy_workqueue(queue->wq);
mutex_destroy(&queue->lock);
kfree(queue);
} else {
@ -1657,7 +1683,7 @@ static void kcpu_queue_process(struct kbase_kcpu_command_queue *queue,
bool process_next = true;
size_t i;
lockdep_assert_held(&queue->kctx->csf.kcpu_queues.lock);
lockdep_assert_held(&queue->lock);
for (i = 0; i != queue->num_pending_cmds; ++i) {
struct kbase_kcpu_command *cmd =
@ -2058,9 +2084,11 @@ int kbase_csf_kcpu_queue_enqueue(struct kbase_context *kctx,
/* The offset to the first command that is being processed or yet to
* be processed is of u8 type, so the number of commands inside the
* queue cannot be more than 256.
* queue cannot be more than 256. The current implementation expects
* exactly 256, any other size will require the addition of wrapping
* logic.
*/
BUILD_BUG_ON(KBASEP_KCPU_QUEUE_SIZE > 256);
BUILD_BUG_ON(KBASEP_KCPU_QUEUE_SIZE != 256);
/* Whilst the backend interface allows enqueueing multiple commands in
* a single operation, the Base interface does not expose any mechanism
@ -2076,13 +2104,13 @@ int kbase_csf_kcpu_queue_enqueue(struct kbase_context *kctx,
}
mutex_lock(&kctx->csf.kcpu_queues.lock);
if (!kctx->csf.kcpu_queues.array[enq->id]) {
ret = -EINVAL;
goto out;
}
queue = kctx->csf.kcpu_queues.array[enq->id];
mutex_unlock(&kctx->csf.kcpu_queues.lock);
if (queue == NULL)
return -EINVAL;
mutex_lock(&queue->lock);
if (kcpu_queue_get_space(queue) < enq->nr_commands) {
ret = -EBUSY;
@ -2097,7 +2125,7 @@ int kbase_csf_kcpu_queue_enqueue(struct kbase_context *kctx,
* for the possibility to roll back.
*/
for (i = 0; (i != enq->nr_commands) && !ret; ++i, ++kctx->csf.kcpu_queues.num_cmds) {
for (i = 0; (i != enq->nr_commands) && !ret; ++i) {
struct kbase_kcpu_command *kcpu_cmd =
&queue->commands[(u8)(queue->start_offset + queue->num_pending_cmds + i)];
struct base_kcpu_command command;
@ -2120,7 +2148,7 @@ int kbase_csf_kcpu_queue_enqueue(struct kbase_context *kctx,
}
}
kcpu_cmd->enqueue_ts = kctx->csf.kcpu_queues.num_cmds;
kcpu_cmd->enqueue_ts = atomic64_inc_return(&kctx->csf.kcpu_queues.cmd_seq_num);
switch (command.type) {
case BASE_KCPU_COMMAND_TYPE_FENCE_WAIT:
#if IS_ENABLED(CONFIG_SYNC_FILE)
@ -2208,13 +2236,10 @@ int kbase_csf_kcpu_queue_enqueue(struct kbase_context *kctx,
queue->num_pending_cmds += enq->nr_commands;
kcpu_queue_process(queue, false);
} else {
/* Roll back the number of enqueued commands */
kctx->csf.kcpu_queues.num_cmds -= i;
}
out:
mutex_unlock(&kctx->csf.kcpu_queues.lock);
mutex_unlock(&queue->lock);
return ret;
}
@ -2228,14 +2253,9 @@ int kbase_csf_kcpu_queue_context_init(struct kbase_context *kctx)
for (idx = 0; idx < KBASEP_MAX_KCPU_QUEUES; ++idx)
kctx->csf.kcpu_queues.array[idx] = NULL;
kctx->csf.kcpu_queues.wq = alloc_workqueue("mali_kbase_csf_kcpu",
WQ_UNBOUND | WQ_HIGHPRI, 0);
if (!kctx->csf.kcpu_queues.wq)
return -ENOMEM;
mutex_init(&kctx->csf.kcpu_queues.lock);
kctx->csf.kcpu_queues.num_cmds = 0;
atomic64_set(&kctx->csf.kcpu_queues.cmd_seq_num, 0);
return 0;
}
@ -2253,7 +2273,6 @@ void kbase_csf_kcpu_queue_context_term(struct kbase_context *kctx)
(void)delete_queue(kctx, id);
}
destroy_workqueue(kctx->csf.kcpu_queues.wq);
mutex_destroy(&kctx->csf.kcpu_queues.lock);
}
@ -2297,8 +2316,17 @@ int kbase_csf_kcpu_queue_new(struct kbase_context *kctx,
goto out;
}
queue->wq = alloc_workqueue("mali_kbase_csf_kcpu_wq_%i", WQ_UNBOUND | WQ_HIGHPRI, 0, idx);
if (queue->wq == NULL) {
kfree(queue);
ret = -ENOMEM;
goto out;
}
bitmap_set(kctx->csf.kcpu_queues.in_use, idx, 1);
kctx->csf.kcpu_queues.array[idx] = queue;
mutex_init(&queue->lock);
queue->kctx = kctx;
queue->start_offset = 0;
queue->num_pending_cmds = 0;

View File

@ -236,9 +236,11 @@ struct kbase_kcpu_command {
/**
* struct kbase_kcpu_command_queue - a command queue executed by the kernel
*
* @lock: Lock to protect accesses to this queue.
* @kctx: The context to which this command queue belongs.
* @commands: Array of commands which have been successfully
* enqueued to this command queue.
* @wq: Dedicated workqueue for processing commands.
* @work: struct work_struct which contains a pointer to
* the function which handles processing of kcpu
* commands enqueued into a kcpu command queue;
@ -274,8 +276,10 @@ struct kbase_kcpu_command {
* @fence_timeout: Timer used to detect the fence wait timeout.
*/
struct kbase_kcpu_command_queue {
struct mutex lock;
struct kbase_context *kctx;
struct kbase_kcpu_command commands[KBASEP_KCPU_QUEUE_SIZE];
struct workqueue_struct *wq;
struct work_struct work;
u8 start_offset;
u8 id;

View File

@ -163,6 +163,8 @@
#define CSG_PROTM_SUSPEND_BUF_HI 0x004C /* () Protected mode suspend buffer, high word */
#define CSG_CONFIG 0x0050 /* () CSG configuration options */
#define CSG_ITER_TRACE_CONFIG 0x0054 /* () CSG trace configuration */
#define CSG_DVS_BUF_LO 0x0060 /* () Normal mode deferred vertex shading work buffer, low word */
#define CSG_DVS_BUF_HI 0x0064 /* () Normal mode deferred vertex shading work buffer, high word */
/* CSG_OUTPUT_BLOCK register offsets */
#define CSG_ACK 0x0000 /* () CSG acknowledge flags */
@ -547,6 +549,13 @@
#define CS_STATUS_WAIT_SB_MASK_SET(reg_val, value) \
(((reg_val) & ~CS_STATUS_WAIT_SB_MASK_MASK) | \
(((value) << CS_STATUS_WAIT_SB_MASK_SHIFT) & CS_STATUS_WAIT_SB_MASK_MASK))
#define CS_STATUS_WAIT_SB_SOURCE_SHIFT 16
#define CS_STATUS_WAIT_SB_SOURCE_MASK (0xF << CS_STATUS_WAIT_SB_SOURCE_SHIFT)
#define CS_STATUS_WAIT_SB_SOURCE_GET(reg_val) \
(((reg_val)&CS_STATUS_WAIT_SB_SOURCE_MASK) >> CS_STATUS_WAIT_SB_SOURCE_SHIFT)
#define CS_STATUS_WAIT_SB_SOURCE_SET(reg_val, value) \
(((reg_val) & ~CS_STATUS_WAIT_SB_SOURCE_MASK) | \
(((value) << CS_STATUS_WAIT_SB_SOURCE_SHIFT) & CS_STATUS_WAIT_SB_SOURCE_MASK))
#define CS_STATUS_WAIT_SYNC_WAIT_CONDITION_SHIFT 24
#define CS_STATUS_WAIT_SYNC_WAIT_CONDITION_MASK (0xF << CS_STATUS_WAIT_SYNC_WAIT_CONDITION_SHIFT)
#define CS_STATUS_WAIT_SYNC_WAIT_CONDITION_GET(reg_val) \
@ -557,6 +566,7 @@
/* CS_STATUS_WAIT_SYNC_WAIT_CONDITION values */
#define CS_STATUS_WAIT_SYNC_WAIT_CONDITION_LE 0x0
#define CS_STATUS_WAIT_SYNC_WAIT_CONDITION_GT 0x1
#define CS_STATUS_WAIT_SYNC_WAIT_CONDITION_GE 0x5
/* End of CS_STATUS_WAIT_SYNC_WAIT_CONDITION values */
#define CS_STATUS_WAIT_PROGRESS_WAIT_SHIFT 28
#define CS_STATUS_WAIT_PROGRESS_WAIT_MASK (0x1 << CS_STATUS_WAIT_PROGRESS_WAIT_SHIFT)
@ -835,11 +845,6 @@
#define CSG_REQ_IDLE_GET(reg_val) (((reg_val)&CSG_REQ_IDLE_MASK) >> CSG_REQ_IDLE_SHIFT)
#define CSG_REQ_IDLE_SET(reg_val, value) \
(((reg_val) & ~CSG_REQ_IDLE_MASK) | (((value) << CSG_REQ_IDLE_SHIFT) & CSG_REQ_IDLE_MASK))
#define CSG_REQ_DOORBELL_SHIFT 30
#define CSG_REQ_DOORBELL_MASK (0x1 << CSG_REQ_DOORBELL_SHIFT)
#define CSG_REQ_DOORBELL_GET(reg_val) (((reg_val)&CSG_REQ_DOORBELL_MASK) >> CSG_REQ_DOORBELL_SHIFT)
#define CSG_REQ_DOORBELL_SET(reg_val, value) \
(((reg_val) & ~CSG_REQ_DOORBELL_MASK) | (((value) << CSG_REQ_DOORBELL_SHIFT) & CSG_REQ_DOORBELL_MASK))
#define CSG_REQ_PROGRESS_TIMER_EVENT_SHIFT 31
#define CSG_REQ_PROGRESS_TIMER_EVENT_MASK (0x1 << CSG_REQ_PROGRESS_TIMER_EVENT_SHIFT)
#define CSG_REQ_PROGRESS_TIMER_EVENT_GET(reg_val) \
@ -956,6 +961,21 @@
(((reg_val) & ~CSG_PROTM_SUSPEND_BUF_POINTER_MASK) | \
(((value) << CSG_PROTM_SUSPEND_BUF_POINTER_SHIFT) & CSG_PROTM_SUSPEND_BUF_POINTER_MASK))
/* CSG_DVS_BUF_BUFFER register */
#define CSG_DVS_BUF_BUFFER_SIZE_SHIFT GPU_U(0)
#define CSG_DVS_BUF_BUFFER_SIZE_MASK (GPU_U(0xFFF) << CSG_DVS_BUF_BUFFER_SIZE_SHIFT)
#define CSG_DVS_BUF_BUFFER_SIZE_GET(reg_val) (((reg_val)&CSG_DVS_BUF_BUFFER_SIZE_MASK) >> CSG_DVS_BUF_BUFFER_SIZE_SHIFT)
#define CSG_DVS_BUF_BUFFER_SIZE_SET(reg_val, value) \
(((reg_val) & ~CSG_DVS_BUF_BUFFER_SIZE_MASK) | \
(((value) << CSG_DVS_BUF_BUFFER_SIZE_SHIFT) & CSG_DVS_BUF_BUFFER_SIZE_MASK))
#define CSG_DVS_BUF_BUFFER_POINTER_SHIFT GPU_U(12)
#define CSG_DVS_BUF_BUFFER_POINTER_MASK \
(GPU_ULL(0xFFFFFFFFFFFFF) << CSG_DVS_BUF_BUFFER_POINTER_SHIFT)
#define CSG_DVS_BUF_BUFFER_POINTER_GET(reg_val) \
(((reg_val)&CSG_DVS_BUF_BUFFER_POINTER_MASK) >> CSG_DVS_BUF_BUFFER_POINTER_SHIFT)
#define CSG_DVS_BUF_BUFFER_POINTER_SET(reg_val, value) \
(((reg_val) & ~CSG_DVS_BUF_BUFFER_POINTER_MASK) | \
(((value) << CSG_DVS_BUF_BUFFER_POINTER_SHIFT) & CSG_DVS_BUF_BUFFER_POINTER_MASK))
/* End of CSG_INPUT_BLOCK register set definitions */

View File

@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
/*
*
* (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved.
* (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@ -21,7 +21,7 @@
#include <mali_kbase.h>
#include <mali_kbase_ctx_sched.h>
#include <mali_kbase_hwcnt_context.h>
#include <hwcnt/mali_kbase_hwcnt_context.h>
#include <device/mali_kbase_device.h>
#include <backend/gpu/mali_kbase_irq_internal.h>
#include <backend/gpu/mali_kbase_pm_internal.h>
@ -29,7 +29,7 @@
#include <csf/mali_kbase_csf_trace_buffer.h>
#include <csf/ipa_control/mali_kbase_csf_ipa_control.h>
#include <mali_kbase_reset_gpu.h>
#include <linux/string.h>
#include <csf/mali_kbase_csf_firmware_log.h>
enum kbasep_soft_reset_status {
RESET_SUCCESS = 0,
@ -257,68 +257,6 @@ static void kbase_csf_debug_dump_registers(struct kbase_device *kbdev)
kbase_reg_read(kbdev, GPU_CONTROL_REG(TILER_CONFIG)));
}
static void kbase_csf_dump_firmware_trace_buffer(struct kbase_device *kbdev)
{
u8 *buf, *p, *pnewline, *pend, *pendbuf;
unsigned int read_size, remaining_size;
struct firmware_trace_buffer *tb =
kbase_csf_firmware_get_trace_buffer(kbdev, FW_TRACE_BUF_NAME);
if (tb == NULL) {
dev_dbg(kbdev->dev, "Can't get the trace buffer, firmware trace dump skipped");
return;
}
buf = kmalloc(PAGE_SIZE + 1, GFP_KERNEL);
if (buf == NULL) {
dev_err(kbdev->dev, "Short of memory, firmware trace dump skipped");
return;
}
buf[PAGE_SIZE] = 0;
p = buf;
pendbuf = &buf[PAGE_SIZE];
dev_err(kbdev->dev, "Firmware trace buffer dump:");
while ((read_size = kbase_csf_firmware_trace_buffer_read_data(tb, p,
pendbuf - p))) {
pend = p + read_size;
p = buf;
while (p < pend && (pnewline = memchr(p, '\n', pend - p))) {
/* Null-terminate the string */
*pnewline = 0;
dev_err(kbdev->dev, "FW> %s", p);
p = pnewline + 1;
}
remaining_size = pend - p;
if (!remaining_size) {
p = buf;
} else if (remaining_size < PAGE_SIZE) {
/* Copy unfinished string to the start of the buffer */
memmove(buf, p, remaining_size);
p = &buf[remaining_size];
} else {
/* Print abnormal page-long string without newlines */
dev_err(kbdev->dev, "FW> %s", buf);
p = buf;
}
}
if (p != buf) {
/* Null-terminate and print last unfinished string */
*p = 0;
dev_err(kbdev->dev, "FW> %s", buf);
}
kfree(buf);
}
/**
* kbase_csf_hwcnt_on_reset_error() - Sets HWCNT to appropriate state in the
* event of an error during GPU reset.
@ -378,7 +316,6 @@ static enum kbasep_soft_reset_status kbase_csf_reset_gpu_once(struct kbase_devic
"The flush has completed so reset the active indicator\n");
kbdev->irq_reset_flush = false;
mutex_lock(&kbdev->pm.lock);
if (!silent)
dev_err(kbdev->dev, "Resetting GPU (allowing up to %d ms)",
RESET_TIMEOUT);
@ -389,7 +326,7 @@ static enum kbasep_soft_reset_status kbase_csf_reset_gpu_once(struct kbase_devic
if (!silent) {
kbase_csf_debug_dump_registers(kbdev);
if (likely(firmware_inited))
kbase_csf_dump_firmware_trace_buffer(kbdev);
kbase_csf_firmware_log_dump_buffer(kbdev);
}
spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
@ -403,6 +340,7 @@ static enum kbasep_soft_reset_status kbase_csf_reset_gpu_once(struct kbase_devic
*/
kbase_hwcnt_backend_csf_on_before_reset(&kbdev->hwcnt_gpu_iface);
mutex_lock(&kbdev->pm.lock);
/* Reset the GPU */
err = kbase_pm_init_hw(kbdev, 0);
@ -633,6 +571,11 @@ bool kbase_reset_gpu_is_active(struct kbase_device *kbdev)
return kbase_csf_reset_state_is_active(reset_state);
}
bool kbase_reset_gpu_is_not_pending(struct kbase_device *kbdev)
{
return atomic_read(&kbdev->csf.reset.state) == KBASE_CSF_RESET_GPU_NOT_PENDING;
}
int kbase_reset_gpu_wait(struct kbase_device *kbdev)
{
const long wait_timeout =

File diff suppressed because it is too large Load Diff

View File

@ -36,7 +36,9 @@
* If the CSG is already scheduled and resident, the CSI will be started
* right away, otherwise once the group is made resident.
*
* Return: 0 on success, or negative on failure.
* Return: 0 on success, or negative on failure. -EBUSY is returned to
* indicate to the caller that queue could not be enabled due to Scheduler
* state and the caller can try to enable the queue after sometime.
*/
int kbase_csf_scheduler_queue_start(struct kbase_queue *queue);
@ -530,12 +532,30 @@ static inline void kbase_csf_scheduler_invoke_tick(struct kbase_device *kbdev)
struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler;
unsigned long flags;
KBASE_KTRACE_ADD(kbdev, SCHEDULER_TICK_INVOKE, NULL, 0u);
spin_lock_irqsave(&scheduler->interrupt_lock, flags);
if (!scheduler->tick_timer_active)
queue_work(scheduler->wq, &scheduler->tick_work);
spin_unlock_irqrestore(&scheduler->interrupt_lock, flags);
}
/**
* kbase_csf_scheduler_invoke_tock() - Invoke the scheduling tock
*
* @kbdev: Pointer to the device
*
* This function will queue the scheduling tock work item for immediate
* execution.
*/
static inline void kbase_csf_scheduler_invoke_tock(struct kbase_device *kbdev)
{
struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler;
KBASE_KTRACE_ADD(kbdev, SCHEDULER_TOCK_INVOKE, NULL, 0u);
if (atomic_cmpxchg(&scheduler->pending_tock_work, false, true) == false)
mod_delayed_work(scheduler->wq, &scheduler->tock_work, 0);
}
/**
* kbase_csf_scheduler_queue_has_trace() - report whether the queue has been
* configured to operate with the

File diff suppressed because it is too large Load Diff

View File

@ -23,7 +23,6 @@
#define _KBASE_CSF_TILER_HEAP_H_
#include <mali_kbase.h>
/**
* kbase_csf_tiler_heap_context_init - Initialize the tiler heaps context for a
* GPU address space
@ -58,6 +57,12 @@ void kbase_csf_tiler_heap_context_term(struct kbase_context *kctx);
* @target_in_flight: Number of render-passes that the driver should attempt to
* keep in flight for which allocation of new chunks is
* allowed. Must not be zero.
* @buf_desc_va: Buffer descriptor GPU virtual address. This is a hint for
* indicating that the caller is intending to perform tiler heap
* chunks reclaim for those that are hoarded with hardware while
* the associated shader activites are suspended and the CSGs are
* off slots. If the referred reclaiming is not desired, can
* set it to 0.
* @gpu_heap_va: Where to store the GPU virtual address of the context that was
* set up for the tiler heap.
* @first_chunk_va: Where to store the GPU virtual address of the first chunk
@ -66,10 +71,9 @@ void kbase_csf_tiler_heap_context_term(struct kbase_context *kctx);
*
* Return: 0 if successful or a negative error code on failure.
*/
int kbase_csf_tiler_heap_init(struct kbase_context *kctx,
u32 chunk_size, u32 initial_chunks, u32 max_chunks,
u16 target_in_flight, u64 *gpu_heap_va,
u64 *first_chunk_va);
int kbase_csf_tiler_heap_init(struct kbase_context *kctx, u32 chunk_size, u32 initial_chunks,
u32 max_chunks, u16 target_in_flight, u64 const buf_desc_va,
u64 *gpu_heap_va, u64 *first_chunk_va);
/**
* kbase_csf_tiler_heap_term - Terminate a chunked tiler memory heap.
@ -112,4 +116,27 @@ int kbase_csf_tiler_heap_term(struct kbase_context *kctx, u64 gpu_heap_va);
*/
int kbase_csf_tiler_heap_alloc_new_chunk(struct kbase_context *kctx,
u64 gpu_heap_va, u32 nr_in_flight, u32 pending_frag_count, u64 *new_chunk_ptr);
/**
* kbase_csf_tiler_heap_scan_kctx_unused_pages - Performs the tiler heap shrinker calim's scan
* functionality.
*
* @kctx: Pointer to the kbase context for which the tiler heap recalim is to be
* operated with.
* @to_free: Number of pages suggested for the reclaim scan (free) method to reach.
*
* Return: the actual number of pages the scan method has freed from the call.
*/
u32 kbase_csf_tiler_heap_scan_kctx_unused_pages(struct kbase_context *kctx, u32 to_free);
/**
* kbase_csf_tiler_heap_count_kctx_unused_pages - Performs the tiler heap shrinker calim's count
* functionality.
*
* @kctx: Pointer to the kbase context for which the tiler heap recalim is to be
* operated with.
*
* Return: a number of pages that could likely be freed on the subsequent scan method call.
*/
u32 kbase_csf_tiler_heap_count_kctx_unused_pages(struct kbase_context *kctx);
#endif

View File

@ -56,12 +56,20 @@
((CHUNK_HDR_NEXT_ADDR_MASK >> CHUNK_HDR_NEXT_ADDR_POS) << \
CHUNK_HDR_NEXT_ADDR_ENCODE_SHIFT)
/* The size of the area needed to be vmapped prior to handing the tiler heap
* over to the tiler, so that the shrinker could be invoked.
*/
#define NEXT_CHUNK_ADDR_SIZE (sizeof(u64))
/**
* struct kbase_csf_tiler_heap_chunk - A tiler heap chunk managed by the kernel
*
* @link: Link to this chunk in a list of chunks belonging to a
* @kbase_csf_tiler_heap.
* @region: Pointer to the GPU memory region allocated for the chunk.
* @map: Kernel VA mapping so that we would not need to use vmap in the
* shrinker callback, which can allocate. This maps only the header
* of the chunk, so it could be traversed.
* @gpu_va: GPU virtual address of the start of the memory region.
* This points to the header of the chunk and not to the low address
* of free memory within it.
@ -75,9 +83,12 @@
struct kbase_csf_tiler_heap_chunk {
struct list_head link;
struct kbase_va_region *region;
struct kbase_vmap_struct map;
u64 gpu_va;
};
#define HEAP_BUF_DESCRIPTOR_CHECKED (1 << 0)
/**
* struct kbase_csf_tiler_heap - A tiler heap managed by the kernel
*
@ -85,6 +96,20 @@ struct kbase_csf_tiler_heap_chunk {
* associated.
* @link: Link to this heap in a list of tiler heaps belonging to
* the @kbase_csf_tiler_heap_context.
* @chunks_list: Linked list of allocated chunks.
* @gpu_va: The GPU virtual address of the heap context structure that
* was allocated for the firmware. This is also used to
* uniquely identify the heap.
* @heap_id: Unique id representing the heap, assigned during heap
* initialization.
* @buf_desc_va: Buffer descriptor GPU VA. Can be 0 for backward compatible
* to earlier version base interfaces.
* @buf_desc_reg: Pointer to the VA region that covers the provided buffer
* descriptor memory object pointed to by buf_desc_va.
* @gpu_va_map: Kernel VA mapping of the GPU VA region.
* @buf_desc_map: Kernel VA mapping of the buffer descriptor, read from
* during the tiler heap shrinker. Sync operations may need
* to be done before each read.
* @chunk_size: Size of each chunk, in bytes. Must be page-aligned.
* @chunk_count: The number of chunks currently allocated. Must not be
* zero or greater than @max_chunks.
@ -93,22 +118,23 @@ struct kbase_csf_tiler_heap_chunk {
* @target_in_flight: Number of render-passes that the driver should attempt
* to keep in flight for which allocation of new chunks is
* allowed. Must not be zero.
* @gpu_va: The GPU virtual address of the heap context structure that
* was allocated for the firmware. This is also used to
* uniquely identify the heap.
* @heap_id: Unique id representing the heap, assigned during heap
* initialization.
* @chunks_list: Linked list of allocated chunks.
* @buf_desc_checked: Indicates if runtime check on buffer descriptor has been done.
*/
struct kbase_csf_tiler_heap {
struct kbase_context *kctx;
struct list_head link;
struct list_head chunks_list;
u64 gpu_va;
u64 heap_id;
u64 buf_desc_va;
struct kbase_va_region *buf_desc_reg;
struct kbase_vmap_struct buf_desc_map;
struct kbase_vmap_struct gpu_va_map;
u32 chunk_size;
u32 chunk_count;
u32 max_chunks;
u16 target_in_flight;
u64 gpu_va;
u64 heap_id;
struct list_head chunks_list;
bool buf_desc_checked;
};
#endif /* !_KBASE_CSF_TILER_HEAP_DEF_H_ */

View File

@ -0,0 +1,367 @@
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
/*
*
* (C) COPYRIGHT 2022 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
* Foundation, and any use by you of this program is subject to the terms
* of such GNU license.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, you can access it online at
* http://www.gnu.org/licenses/gpl-2.0.html.
*
*/
#include <mali_kbase.h>
#include "mali_kbase_csf.h"
#include "mali_kbase_csf_tiler_heap.h"
#include "mali_kbase_csf_tiler_heap_reclaim.h"
/* Tiler heap shrinker seek value, needs to be higher than jit and memory pools */
#define HEAP_SHRINKER_SEEKS (DEFAULT_SEEKS + 2)
/* Tiler heap shrinker batch value */
#define HEAP_SHRINKER_BATCH (512)
/* Tiler heap reclaim scan (free) method size for limiting a scan run length */
#define HEAP_RECLAIM_SCAN_BATCH_SIZE (HEAP_SHRINKER_BATCH << 7)
static u8 get_kctx_highest_csg_priority(struct kbase_context *kctx)
{
u8 prio;
for (prio = KBASE_QUEUE_GROUP_PRIORITY_REALTIME; prio < KBASE_QUEUE_GROUP_PRIORITY_LOW;
prio++)
if (!list_empty(&kctx->csf.sched.runnable_groups[prio]))
break;
if (prio != KBASE_QUEUE_GROUP_PRIORITY_REALTIME && kctx->csf.sched.num_idle_wait_grps) {
struct kbase_queue_group *group;
list_for_each_entry(group, &kctx->csf.sched.idle_wait_groups, link) {
if (group->priority < prio)
prio = group->priority;
}
}
return prio;
}
static void detach_ctx_from_heap_reclaim_mgr(struct kbase_context *kctx)
{
struct kbase_csf_scheduler *const scheduler = &kctx->kbdev->csf.scheduler;
struct kbase_csf_ctx_heap_reclaim_info *info = &kctx->csf.sched.heap_info;
lockdep_assert_held(&scheduler->lock);
if (!list_empty(&info->mgr_link)) {
u32 remaining = (info->nr_est_unused_pages > info->nr_freed_pages) ?
info->nr_est_unused_pages - info->nr_freed_pages :
0;
list_del_init(&info->mgr_link);
if (remaining)
WARN_ON(atomic_sub_return(remaining, &scheduler->reclaim_mgr.unused_pages) <
0);
dev_dbg(kctx->kbdev->dev,
"Reclaim_mgr_detach: ctx_%d_%d, est_pages=0%u, freed_pages=%u", kctx->tgid,
kctx->id, info->nr_est_unused_pages, info->nr_freed_pages);
}
}
static void attach_ctx_to_heap_reclaim_mgr(struct kbase_context *kctx)
{
struct kbase_csf_ctx_heap_reclaim_info *const info = &kctx->csf.sched.heap_info;
struct kbase_csf_scheduler *const scheduler = &kctx->kbdev->csf.scheduler;
u8 const prio = get_kctx_highest_csg_priority(kctx);
lockdep_assert_held(&scheduler->lock);
if (WARN_ON(!list_empty(&info->mgr_link)))
list_del_init(&info->mgr_link);
/* Count the pages that could be freed */
info->nr_est_unused_pages = kbase_csf_tiler_heap_count_kctx_unused_pages(kctx);
/* Initialize the scan operation tracking pages */
info->nr_freed_pages = 0;
list_add_tail(&info->mgr_link, &scheduler->reclaim_mgr.ctx_lists[prio]);
/* Accumulate the estimated pages to the manager total field */
atomic_add(info->nr_est_unused_pages, &scheduler->reclaim_mgr.unused_pages);
dev_dbg(kctx->kbdev->dev, "Reclaim_mgr_attach: ctx_%d_%d, est_count_pages=%u", kctx->tgid,
kctx->id, info->nr_est_unused_pages);
}
void kbase_csf_tiler_heap_reclaim_sched_notify_grp_active(struct kbase_queue_group *group)
{
struct kbase_context *kctx = group->kctx;
struct kbase_csf_ctx_heap_reclaim_info *info = &kctx->csf.sched.heap_info;
lockdep_assert_held(&kctx->kbdev->csf.scheduler.lock);
info->on_slot_grps++;
/* If the kctx has an on-slot change from 0 => 1, detach it from reclaim_mgr */
if (info->on_slot_grps == 1) {
dev_dbg(kctx->kbdev->dev, "CSG_%d_%d_%d on-slot, remove kctx from reclaim manager",
group->kctx->tgid, group->kctx->id, group->handle);
detach_ctx_from_heap_reclaim_mgr(kctx);
}
}
void kbase_csf_tiler_heap_reclaim_sched_notify_grp_evict(struct kbase_queue_group *group)
{
struct kbase_context *kctx = group->kctx;
struct kbase_csf_ctx_heap_reclaim_info *const info = &kctx->csf.sched.heap_info;
struct kbase_csf_scheduler *const scheduler = &kctx->kbdev->csf.scheduler;
const u32 num_groups = kctx->kbdev->csf.global_iface.group_num;
u32 on_slot_grps = 0;
u32 i;
lockdep_assert_held(&scheduler->lock);
/* Group eviction from the scheduler is a bit more complex, but fairly less
* frequent in operations. Taking the opportunity to actually count the
* on-slot CSGs from the given kctx, for robustness and clearer code logic.
*/
for_each_set_bit(i, scheduler->csg_inuse_bitmap, num_groups) {
struct kbase_csf_csg_slot *csg_slot = &scheduler->csg_slots[i];
struct kbase_queue_group *grp = csg_slot->resident_group;
if (unlikely(!grp))
continue;
if (grp->kctx == kctx)
on_slot_grps++;
}
info->on_slot_grps = on_slot_grps;
/* If the kctx has no other CSGs on-slot, handle the heap reclaim related actions */
if (!info->on_slot_grps) {
if (kctx->csf.sched.num_runnable_grps || kctx->csf.sched.num_idle_wait_grps) {
/* The kctx has other operational CSGs, attach it if not yet done */
if (list_empty(&info->mgr_link)) {
dev_dbg(kctx->kbdev->dev,
"CSG_%d_%d_%d evict, add kctx to reclaim manager",
group->kctx->tgid, group->kctx->id, group->handle);
attach_ctx_to_heap_reclaim_mgr(kctx);
}
} else {
/* The kctx is a zombie after the group eviction, drop it out */
dev_dbg(kctx->kbdev->dev,
"CSG_%d_%d_%d evict leading to zombie kctx, dettach from reclaim manager",
group->kctx->tgid, group->kctx->id, group->handle);
detach_ctx_from_heap_reclaim_mgr(kctx);
}
}
}
void kbase_csf_tiler_heap_reclaim_sched_notify_grp_suspend(struct kbase_queue_group *group)
{
struct kbase_context *kctx = group->kctx;
struct kbase_csf_ctx_heap_reclaim_info *info = &kctx->csf.sched.heap_info;
lockdep_assert_held(&kctx->kbdev->csf.scheduler.lock);
if (!WARN_ON(info->on_slot_grps == 0))
info->on_slot_grps--;
/* If the kctx has no CSGs on-slot, attach it to scheduler's reclaim manager */
if (info->on_slot_grps == 0) {
dev_dbg(kctx->kbdev->dev, "CSG_%d_%d_%d off-slot, add kctx to reclaim manager",
group->kctx->tgid, group->kctx->id, group->handle);
attach_ctx_to_heap_reclaim_mgr(kctx);
}
}
static unsigned long reclaim_unused_heap_pages(struct kbase_device *kbdev)
{
struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler;
struct kbase_csf_sched_heap_reclaim_mgr *const mgr = &scheduler->reclaim_mgr;
unsigned long total_freed_pages = 0;
int prio;
lockdep_assert_held(&kbdev->csf.scheduler.lock);
for (prio = KBASE_QUEUE_GROUP_PRIORITY_LOW;
total_freed_pages < HEAP_RECLAIM_SCAN_BATCH_SIZE &&
prio >= KBASE_QUEUE_GROUP_PRIORITY_REALTIME;
prio--) {
struct kbase_csf_ctx_heap_reclaim_info *info, *tmp;
u32 cnt_ctxs = 0;
list_for_each_entry_safe(info, tmp, &scheduler->reclaim_mgr.ctx_lists[prio],
mgr_link) {
struct kbase_context *kctx =
container_of(info, struct kbase_context, csf.sched.heap_info);
u32 freed_pages = kbase_csf_tiler_heap_scan_kctx_unused_pages(
kctx, info->nr_est_unused_pages);
if (freed_pages) {
/* Remove the freed pages from the manager retained estimate. The
* accumulated removals from the kctx should not exceed the kctx
* initially notified contribution amount:
* info->nr_est_unused_pages.
*/
u32 rm_cnt = MIN(info->nr_est_unused_pages - info->nr_freed_pages,
freed_pages);
WARN_ON(atomic_sub_return(rm_cnt, &mgr->unused_pages) < 0);
/* tracking the freed pages, before a potential detach call */
info->nr_freed_pages += freed_pages;
total_freed_pages += freed_pages;
schedule_work(&kctx->jit_work);
}
/* If the kctx can't offer anymore, drop it from the reclaim manger,
* otherwise leave it remaining in. If the kctx changes its state (i.e.
* some CSGs becoming on-slot), the scheduler will pull it out.
*/
if (info->nr_freed_pages >= info->nr_est_unused_pages || freed_pages == 0)
detach_ctx_from_heap_reclaim_mgr(kctx);
cnt_ctxs++;
/* Enough has been freed, break to avoid holding the lock too long */
if (total_freed_pages >= HEAP_RECLAIM_SCAN_BATCH_SIZE)
break;
}
dev_dbg(kbdev->dev, "Reclaim free heap pages: %lu (cnt_ctxs: %u, prio: %d)",
total_freed_pages, cnt_ctxs, prio);
}
dev_dbg(kbdev->dev, "Reclaim free total heap pages: %lu (across all CSG priority)",
total_freed_pages);
return total_freed_pages;
}
static unsigned long kbase_csf_tiler_heap_reclaim_count_free_pages(struct kbase_device *kbdev,
struct shrink_control *sc)
{
struct kbase_csf_sched_heap_reclaim_mgr *mgr = &kbdev->csf.scheduler.reclaim_mgr;
unsigned long page_cnt = atomic_read(&mgr->unused_pages);
dev_dbg(kbdev->dev, "Reclaim count unused pages (estimate): %lu", page_cnt);
return page_cnt;
}
static unsigned long kbase_csf_tiler_heap_reclaim_scan_free_pages(struct kbase_device *kbdev,
struct shrink_control *sc)
{
struct kbase_csf_sched_heap_reclaim_mgr *mgr = &kbdev->csf.scheduler.reclaim_mgr;
unsigned long freed = 0;
unsigned long avail = 0;
/* If Scheduler is busy in action, return 0 */
if (!mutex_trylock(&kbdev->csf.scheduler.lock)) {
struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler;
/* Wait for roughly 2-ms */
wait_event_timeout(kbdev->csf.event_wait, (scheduler->state != SCHED_BUSY),
msecs_to_jiffies(2));
if (!mutex_trylock(&kbdev->csf.scheduler.lock)) {
dev_dbg(kbdev->dev, "Tiler heap reclaim scan see device busy (freed: 0)");
return 0;
}
}
avail = atomic_read(&mgr->unused_pages);
if (avail)
freed = reclaim_unused_heap_pages(kbdev);
mutex_unlock(&kbdev->csf.scheduler.lock);
#if (KERNEL_VERSION(4, 14, 0) <= LINUX_VERSION_CODE)
if (freed > sc->nr_to_scan)
sc->nr_scanned = freed;
#endif /* (KERNEL_VERSION(4, 14, 0) <= LINUX_VERSION_CODE) */
dev_info(kbdev->dev, "Tiler heap reclaim scan freed pages: %lu (unused: %lu)", freed,
avail);
/* On estimate suggesting available, yet actual free failed, return STOP */
if (avail && !freed)
return SHRINK_STOP;
else
return freed;
}
static unsigned long kbase_csf_tiler_heap_reclaim_count_objects(struct shrinker *s,
struct shrink_control *sc)
{
struct kbase_device *kbdev =
container_of(s, struct kbase_device, csf.scheduler.reclaim_mgr.heap_reclaim);
return kbase_csf_tiler_heap_reclaim_count_free_pages(kbdev, sc);
}
static unsigned long kbase_csf_tiler_heap_reclaim_scan_objects(struct shrinker *s,
struct shrink_control *sc)
{
struct kbase_device *kbdev =
container_of(s, struct kbase_device, csf.scheduler.reclaim_mgr.heap_reclaim);
return kbase_csf_tiler_heap_reclaim_scan_free_pages(kbdev, sc);
}
void kbase_csf_tiler_heap_reclaim_ctx_init(struct kbase_context *kctx)
{
/* Per-kctx heap_info object initialization */
memset(&kctx->csf.sched.heap_info, 0, sizeof(struct kbase_csf_ctx_heap_reclaim_info));
INIT_LIST_HEAD(&kctx->csf.sched.heap_info.mgr_link);
}
void kbase_csf_tiler_heap_reclaim_mgr_init(struct kbase_device *kbdev)
{
struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
struct shrinker *reclaim = &scheduler->reclaim_mgr.heap_reclaim;
u8 prio;
for (prio = KBASE_QUEUE_GROUP_PRIORITY_REALTIME; prio < KBASE_QUEUE_GROUP_PRIORITY_COUNT;
prio++)
INIT_LIST_HEAD(&scheduler->reclaim_mgr.ctx_lists[prio]);
atomic_set(&scheduler->reclaim_mgr.unused_pages, 0);
reclaim->count_objects = kbase_csf_tiler_heap_reclaim_count_objects;
reclaim->scan_objects = kbase_csf_tiler_heap_reclaim_scan_objects;
reclaim->seeks = HEAP_SHRINKER_SEEKS;
reclaim->batch = HEAP_SHRINKER_BATCH;
#if !defined(CONFIG_MALI_VECTOR_DUMP)
register_shrinker(reclaim);
#endif
}
void kbase_csf_tiler_heap_reclaim_mgr_term(struct kbase_device *kbdev)
{
struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
u8 prio;
#if !defined(CONFIG_MALI_VECTOR_DUMP)
unregister_shrinker(&scheduler->reclaim_mgr.heap_reclaim);
#endif
for (prio = KBASE_QUEUE_GROUP_PRIORITY_REALTIME; prio < KBASE_QUEUE_GROUP_PRIORITY_COUNT;
prio++)
WARN_ON(!list_empty(&scheduler->reclaim_mgr.ctx_lists[prio]));
WARN_ON(atomic_read(&scheduler->reclaim_mgr.unused_pages));
}

View File

@ -0,0 +1,80 @@
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
/*
*
* (C) COPYRIGHT 2022 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
* Foundation, and any use by you of this program is subject to the terms
* of such GNU license.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, you can access it online at
* http://www.gnu.org/licenses/gpl-2.0.html.
*
*/
#ifndef _KBASE_CSF_TILER_HEAP_RECLAIM_H_
#define _KBASE_CSF_TILER_HEAP_RECLAIM_H_
#include <mali_kbase.h>
/**
* kbase_csf_tiler_heap_reclaim_sched_notify_grp_active - Notifier function for the scheduler
* to use when a group is put on-slot.
*
* @group: Pointer to the group object that has been placed on-slot for running.
*
*/
void kbase_csf_tiler_heap_reclaim_sched_notify_grp_active(struct kbase_queue_group *group);
/**
* kbase_csf_tiler_heap_reclaim_sched_notify_grp_evict - Notifier function for the scheduler
* to use when a group is evicted out of the schedulder's scope, i.e no run of
* the group is possible afterwards.
*
* @group: Pointer to the group object that has been evicted.
*
*/
void kbase_csf_tiler_heap_reclaim_sched_notify_grp_evict(struct kbase_queue_group *group);
/**
* kbase_csf_tiler_heap_reclaim_sched_notify_grp_suspend - Notifier function for the scheduler
* to use when a group is suspended from running, but could resume in future.
*
* @group: Pointer to the group object that is in suspended state.
*
*/
void kbase_csf_tiler_heap_reclaim_sched_notify_grp_suspend(struct kbase_queue_group *group);
/**
* kbase_csf_tiler_heap_reclaim_ctx_init - Initializer on per context data fields for use
* with the tiler heap reclaim manager.
*
* @kctx: Pointer to the kbase_context.
*
*/
void kbase_csf_tiler_heap_reclaim_ctx_init(struct kbase_context *kctx);
/**
* kbase_csf_tiler_heap_reclaim_mgr_init - Initializer for the tiler heap reclaim manger.
*
* @kbdev: Pointer to the device.
*
*/
void kbase_csf_tiler_heap_reclaim_mgr_init(struct kbase_device *kbdev);
/**
* kbase_csf_tiler_heap_reclaim_mgr_term - Termination call for the tiler heap reclaim manger.
*
* @kbdev: Pointer to the device.
*
*/
void kbase_csf_tiler_heap_reclaim_mgr_term(struct kbase_device *kbdev);
#endif

View File

@ -88,13 +88,11 @@ DEFINE_DEBUGFS_ATTRIBUTE(kbase_csf_tl_poll_interval_fops,
kbase_csf_tl_debugfs_poll_interval_read,
kbase_csf_tl_debugfs_poll_interval_write, "%llu\n");
void kbase_csf_tl_reader_debugfs_init(struct kbase_device *kbdev)
{
debugfs_create_file("csf_tl_poll_interval_in_ms", 0644,
kbdev->debugfs_instr_directory, kbdev,
&kbase_csf_tl_poll_interval_fops);
}
#endif
@ -166,11 +164,10 @@ static int kbase_ts_converter_init(
*
* Return: The CPU timestamp.
*/
static void __maybe_unused
kbase_ts_converter_convert(const struct kbase_ts_converter *self, u64 *gpu_ts)
static u64 __maybe_unused
kbase_ts_converter_convert(const struct kbase_ts_converter *self, u64 gpu_ts)
{
u64 old_gpu_ts = *gpu_ts;
*gpu_ts = div64_u64(old_gpu_ts * self->multiplier, self->divisor) +
return div64_u64(gpu_ts * self->multiplier, self->divisor) +
self->offset;
}
@ -250,7 +247,6 @@ static void tl_reader_reset(struct kbase_csf_tl_reader *self)
self->tl_header.btc = 0;
}
int kbase_csf_tl_reader_flush_buffer(struct kbase_csf_tl_reader *self)
{
int ret = 0;
@ -275,7 +271,6 @@ int kbase_csf_tl_reader_flush_buffer(struct kbase_csf_tl_reader *self)
return -EBUSY;
}
/* Copying the whole buffer in a single shot. We assume
* that the buffer will not contain partially written messages.
*/
@ -326,8 +321,8 @@ int kbase_csf_tl_reader_flush_buffer(struct kbase_csf_tl_reader *self)
{
struct kbase_csffw_tl_message *msg =
(struct kbase_csffw_tl_message *) csffw_data_it;
kbase_ts_converter_convert(&self->ts_converter,
&msg->timestamp);
msg->timestamp = kbase_ts_converter_convert(&self->ts_converter,
msg->timestamp);
}
/* Copy the message out to the tl_stream. */

View File

@ -119,7 +119,7 @@ static const struct firmware_trace_buffer_data trace_buffer_data[] = {
#if MALI_UNIT_TEST
{ "fwutf", { 0 }, 1 },
#endif
{ FW_TRACE_BUF_NAME, { 0 }, 4 },
{ FIRMWARE_LOG_BUF_NAME, { 0 }, 4 },
{ "benchmark", { 0 }, 2 },
{ "timeline", { 0 }, KBASE_CSF_TL_BUFFER_NR_PAGES },
};
@ -506,10 +506,16 @@ unsigned int kbase_csf_firmware_trace_buffer_read_data(
}
EXPORT_SYMBOL(kbase_csf_firmware_trace_buffer_read_data);
#if IS_ENABLED(CONFIG_DEBUG_FS)
static void update_trace_buffer_active_mask64(struct firmware_trace_buffer *tb, u64 mask)
{
unsigned int i;
for (i = 0; i < tb->trace_enable_entry_count; i++)
kbasep_csf_firmware_trace_buffer_update_trace_enable_bit(tb, i, (mask >> i) & 1);
}
#define U32_BITS 32
static u64 get_trace_buffer_active_mask64(struct firmware_trace_buffer *tb)
u64 kbase_csf_firmware_trace_buffer_get_active_mask64(struct firmware_trace_buffer *tb)
{
u64 active_mask = tb->trace_enable_init_mask[0];
@ -519,18 +525,7 @@ static u64 get_trace_buffer_active_mask64(struct firmware_trace_buffer *tb)
return active_mask;
}
static void update_trace_buffer_active_mask64(struct firmware_trace_buffer *tb,
u64 mask)
{
unsigned int i;
for (i = 0; i < tb->trace_enable_entry_count; i++)
kbasep_csf_firmware_trace_buffer_update_trace_enable_bit(
tb, i, (mask >> i) & 1);
}
static int set_trace_buffer_active_mask64(struct firmware_trace_buffer *tb,
u64 mask)
int kbase_csf_firmware_trace_buffer_set_active_mask64(struct firmware_trace_buffer *tb, u64 mask)
{
struct kbase_device *kbdev = tb->kbdev;
unsigned long flags;
@ -558,123 +553,3 @@ static int set_trace_buffer_active_mask64(struct firmware_trace_buffer *tb,
return err;
}
static int kbase_csf_firmware_trace_enable_mask_read(void *data, u64 *val)
{
struct kbase_device *kbdev = (struct kbase_device *)data;
struct firmware_trace_buffer *tb =
kbase_csf_firmware_get_trace_buffer(kbdev, FW_TRACE_BUF_NAME);
if (tb == NULL) {
dev_err(kbdev->dev, "Couldn't get the firmware trace buffer");
return -EIO;
}
/* The enabled traces limited to u64 here, regarded practical */
*val = get_trace_buffer_active_mask64(tb);
return 0;
}
static int kbase_csf_firmware_trace_enable_mask_write(void *data, u64 val)
{
struct kbase_device *kbdev = (struct kbase_device *)data;
struct firmware_trace_buffer *tb =
kbase_csf_firmware_get_trace_buffer(kbdev, FW_TRACE_BUF_NAME);
u64 new_mask;
unsigned int enable_bits_count;
if (tb == NULL) {
dev_err(kbdev->dev, "Couldn't get the firmware trace buffer");
return -EIO;
}
/* Ignore unsupported types */
enable_bits_count =
kbase_csf_firmware_trace_buffer_get_trace_enable_bits_count(tb);
if (enable_bits_count > 64) {
dev_dbg(kbdev->dev, "Limit enabled bits count from %u to 64",
enable_bits_count);
enable_bits_count = 64;
}
new_mask = val & ((1 << enable_bits_count) - 1);
if (new_mask != get_trace_buffer_active_mask64(tb))
return set_trace_buffer_active_mask64(tb, new_mask);
else
return 0;
}
static int kbasep_csf_firmware_trace_debugfs_open(struct inode *in,
struct file *file)
{
struct kbase_device *kbdev = in->i_private;
file->private_data = kbdev;
dev_dbg(kbdev->dev, "Opened firmware trace buffer dump debugfs file");
return 0;
}
static ssize_t kbasep_csf_firmware_trace_debugfs_read(struct file *file,
char __user *buf, size_t size, loff_t *ppos)
{
struct kbase_device *kbdev = file->private_data;
u8 *pbyte;
unsigned int n_read;
unsigned long not_copied;
/* Limit the kernel buffer to no more than two pages */
size_t mem = MIN(size, 2 * PAGE_SIZE);
unsigned long flags;
struct firmware_trace_buffer *tb =
kbase_csf_firmware_get_trace_buffer(kbdev, FW_TRACE_BUF_NAME);
if (tb == NULL) {
dev_err(kbdev->dev, "Couldn't get the firmware trace buffer");
return -EIO;
}
pbyte = kmalloc(mem, GFP_KERNEL);
if (pbyte == NULL) {
dev_err(kbdev->dev, "Couldn't allocate memory for trace buffer dump");
return -ENOMEM;
}
spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
n_read = kbase_csf_firmware_trace_buffer_read_data(tb, pbyte, mem);
spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
/* Do the copy, if we have obtained some trace data */
not_copied = (n_read) ? copy_to_user(buf, pbyte, n_read) : 0;
kfree(pbyte);
if (!not_copied) {
*ppos += n_read;
return n_read;
}
dev_err(kbdev->dev, "Couldn't copy trace buffer data to user space buffer");
return -EFAULT;
}
DEFINE_DEBUGFS_ATTRIBUTE(kbase_csf_firmware_trace_enable_mask_fops,
kbase_csf_firmware_trace_enable_mask_read,
kbase_csf_firmware_trace_enable_mask_write, "%llx\n");
static const struct file_operations kbasep_csf_firmware_trace_debugfs_fops = {
.owner = THIS_MODULE,
.open = kbasep_csf_firmware_trace_debugfs_open,
.read = kbasep_csf_firmware_trace_debugfs_read,
.llseek = no_llseek,
};
void kbase_csf_firmware_trace_buffer_debugfs_init(struct kbase_device *kbdev)
{
debugfs_create_file("fw_trace_enable_mask", 0644,
kbdev->mali_debugfs_directory, kbdev,
&kbase_csf_firmware_trace_enable_mask_fops);
debugfs_create_file("fw_traces", 0444,
kbdev->mali_debugfs_directory, kbdev,
&kbasep_csf_firmware_trace_debugfs_fops);
}
#endif /* CONFIG_DEBUG_FS */

View File

@ -25,7 +25,7 @@
#include <linux/types.h>
#define CSF_FIRMWARE_TRACE_ENABLE_INIT_MASK_MAX (4)
#define FW_TRACE_BUF_NAME "fwlog"
#define FIRMWARE_LOG_BUF_NAME "fwlog"
/* Forward declarations */
struct firmware_trace_buffer;
@ -165,14 +165,23 @@ bool kbase_csf_firmware_trace_buffer_is_empty(
unsigned int kbase_csf_firmware_trace_buffer_read_data(
struct firmware_trace_buffer *trace_buffer, u8 *data, unsigned int num_bytes);
#if IS_ENABLED(CONFIG_DEBUG_FS)
/**
* kbase_csf_firmware_trace_buffer_debugfs_init() - Add debugfs entries for
* setting enable mask and dumping the binary firmware trace buffer
* kbase_csf_firmware_trace_buffer_get_active_mask64 - Get trace buffer active mask
*
* @kbdev: Pointer to the device
* @tb: Trace buffer handle
*
* Return: Trace buffer active mask.
*/
void kbase_csf_firmware_trace_buffer_debugfs_init(struct kbase_device *kbdev);
#endif /* CONFIG_DEBUG_FS */
u64 kbase_csf_firmware_trace_buffer_get_active_mask64(struct firmware_trace_buffer *tb);
/**
* kbase_csf_firmware_trace_buffer_set_active_mask64 - Set trace buffer active mask
*
* @tb: Trace buffer handle
* @mask: New active mask
*
* Return: 0 if successful, negative error code on failure.
*/
int kbase_csf_firmware_trace_buffer_set_active_mask64(struct firmware_trace_buffer *tb, u64 mask);
#endif /* _KBASE_CSF_TRACE_BUFFER_H_ */

View File

@ -0,0 +1,271 @@
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
/*
*
* (C) COPYRIGHT 2022 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
* Foundation, and any use by you of this program is subject to the terms
* of such GNU license.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, you can access it online at
* http://www.gnu.org/licenses/gpl-2.0.html.
*
*/
#include <mali_kbase.h>
#if IS_ENABLED(CONFIG_DEBUG_FS)
/**
* kbasep_fault_occurred - Check if fault occurred.
*
* @kbdev: Device pointer
*
* Return: true if a fault occurred.
*/
static bool kbasep_fault_occurred(struct kbase_device *kbdev)
{
unsigned long flags;
bool ret;
spin_lock_irqsave(&kbdev->csf.dof.lock, flags);
ret = (kbdev->csf.dof.error_code != DF_NO_ERROR);
spin_unlock_irqrestore(&kbdev->csf.dof.lock, flags);
return ret;
}
void kbase_debug_csf_fault_wait_completion(struct kbase_device *kbdev)
{
if (likely(!kbase_debug_csf_fault_dump_enabled(kbdev))) {
dev_dbg(kbdev->dev, "No userspace client for dumping exists");
return;
}
wait_event(kbdev->csf.dof.dump_wait_wq, kbase_debug_csf_fault_dump_complete(kbdev));
}
KBASE_EXPORT_TEST_API(kbase_debug_csf_fault_wait_completion);
/**
* kbase_debug_csf_fault_wakeup - Wake up a waiting user space client.
*
* @kbdev: Kbase device
*/
static void kbase_debug_csf_fault_wakeup(struct kbase_device *kbdev)
{
wake_up_interruptible(&kbdev->csf.dof.fault_wait_wq);
}
bool kbase_debug_csf_fault_notify(struct kbase_device *kbdev,
struct kbase_context *kctx, enum dumpfault_error_type error)
{
unsigned long flags;
if (likely(!kbase_debug_csf_fault_dump_enabled(kbdev)))
return false;
if (WARN_ON(error == DF_NO_ERROR))
return false;
if (kctx && kbase_ctx_flag(kctx, KCTX_DYING)) {
dev_info(kbdev->dev, "kctx %d_%d is dying when error %d is reported",
kctx->tgid, kctx->id, error);
kctx = NULL;
}
spin_lock_irqsave(&kbdev->csf.dof.lock, flags);
/* Only one fault at a time can be processed */
if (kbdev->csf.dof.error_code) {
dev_info(kbdev->dev, "skip this fault as there's a pending fault");
goto unlock;
}
kbdev->csf.dof.kctx_tgid = kctx ? kctx->tgid : 0;
kbdev->csf.dof.kctx_id = kctx ? kctx->id : 0;
kbdev->csf.dof.error_code = error;
kbase_debug_csf_fault_wakeup(kbdev);
unlock:
spin_unlock_irqrestore(&kbdev->csf.dof.lock, flags);
return true;
}
static ssize_t debug_csf_fault_read(struct file *file, char __user *buffer, size_t size,
loff_t *f_pos)
{
#define BUF_SIZE 64
struct kbase_device *kbdev;
unsigned long flags;
int count;
char buf[BUF_SIZE];
u32 tgid, ctx_id;
enum dumpfault_error_type error_code;
if (unlikely(!file)) {
pr_warn("%s: file is NULL", __func__);
return -EINVAL;
}
kbdev = file->private_data;
if (unlikely(!buffer)) {
dev_warn(kbdev->dev, "%s: buffer is NULL", __func__);
return -EINVAL;
}
if (unlikely(*f_pos < 0)) {
dev_warn(kbdev->dev, "%s: f_pos is negative", __func__);
return -EINVAL;
}
if (size < sizeof(buf)) {
dev_warn(kbdev->dev, "%s: buffer is too small", __func__);
return -EINVAL;
}
if (wait_event_interruptible(kbdev->csf.dof.fault_wait_wq, kbasep_fault_occurred(kbdev)))
return -ERESTARTSYS;
spin_lock_irqsave(&kbdev->csf.dof.lock, flags);
tgid = kbdev->csf.dof.kctx_tgid;
ctx_id = kbdev->csf.dof.kctx_id;
error_code = kbdev->csf.dof.error_code;
BUILD_BUG_ON(sizeof(buf) < (sizeof(tgid) + sizeof(ctx_id) + sizeof(error_code)));
count = scnprintf(buf, sizeof(buf), "%u_%u_%u\n", tgid, ctx_id, error_code);
spin_unlock_irqrestore(&kbdev->csf.dof.lock, flags);
dev_info(kbdev->dev, "debug csf fault info read");
return simple_read_from_buffer(buffer, size, f_pos, buf, count);
}
static int debug_csf_fault_open(struct inode *in, struct file *file)
{
struct kbase_device *kbdev;
if (unlikely(!in)) {
pr_warn("%s: inode is NULL", __func__);
return -EINVAL;
}
kbdev = in->i_private;
if (unlikely(!file)) {
dev_warn(kbdev->dev, "%s: file is NULL", __func__);
return -EINVAL;
}
if (atomic_cmpxchg(&kbdev->csf.dof.enabled, 0, 1) == 1) {
dev_warn(kbdev->dev, "Only one client is allowed for dump on fault");
return -EBUSY;
}
dev_info(kbdev->dev, "debug csf fault file open");
return simple_open(in, file);
}
static ssize_t debug_csf_fault_write(struct file *file, const char __user *ubuf, size_t count,
loff_t *ppos)
{
struct kbase_device *kbdev;
unsigned long flags;
if (unlikely(!file)) {
pr_warn("%s: file is NULL", __func__);
return -EINVAL;
}
kbdev = file->private_data;
spin_lock_irqsave(&kbdev->csf.dof.lock, flags);
kbdev->csf.dof.error_code = DF_NO_ERROR;
kbdev->csf.dof.kctx_tgid = 0;
kbdev->csf.dof.kctx_id = 0;
dev_info(kbdev->dev, "debug csf fault dump complete");
spin_unlock_irqrestore(&kbdev->csf.dof.lock, flags);
/* User space finished the dump.
* Wake up blocked kernel threads to proceed.
*/
wake_up(&kbdev->csf.dof.dump_wait_wq);
return count;
}
static int debug_csf_fault_release(struct inode *in, struct file *file)
{
struct kbase_device *kbdev;
unsigned long flags;
if (unlikely(!in)) {
pr_warn("%s: inode is NULL", __func__);
return -EINVAL;
}
kbdev = in->i_private;
spin_lock_irqsave(&kbdev->csf.dof.lock, flags);
kbdev->csf.dof.kctx_tgid = 0;
kbdev->csf.dof.kctx_id = 0;
kbdev->csf.dof.error_code = DF_NO_ERROR;
spin_unlock_irqrestore(&kbdev->csf.dof.lock, flags);
atomic_set(&kbdev->csf.dof.enabled, 0);
dev_info(kbdev->dev, "debug csf fault file close");
/* User space closed the debugfs file.
* Wake up blocked kernel threads to resume.
*/
wake_up(&kbdev->csf.dof.dump_wait_wq);
return 0;
}
static const struct file_operations kbasep_debug_csf_fault_fops = {
.owner = THIS_MODULE,
.open = debug_csf_fault_open,
.read = debug_csf_fault_read,
.write = debug_csf_fault_write,
.llseek = default_llseek,
.release = debug_csf_fault_release,
};
void kbase_debug_csf_fault_debugfs_init(struct kbase_device *kbdev)
{
const char *fname = "csf_fault";
if (unlikely(!kbdev)) {
pr_warn("%s: kbdev is NULL", __func__);
return;
}
debugfs_create_file(fname, 0600, kbdev->mali_debugfs_directory, kbdev,
&kbasep_debug_csf_fault_fops);
}
int kbase_debug_csf_fault_init(struct kbase_device *kbdev)
{
if (unlikely(!kbdev)) {
pr_warn("%s: kbdev is NULL", __func__);
return -EINVAL;
}
init_waitqueue_head(&(kbdev->csf.dof.fault_wait_wq));
init_waitqueue_head(&(kbdev->csf.dof.dump_wait_wq));
spin_lock_init(&kbdev->csf.dof.lock);
kbdev->csf.dof.kctx_tgid = 0;
kbdev->csf.dof.kctx_id = 0;
kbdev->csf.dof.error_code = DF_NO_ERROR;
atomic_set(&kbdev->csf.dof.enabled, 0);
return 0;
}
void kbase_debug_csf_fault_term(struct kbase_device *kbdev)
{
}
#endif /* CONFIG_DEBUG_FS */

View File

@ -0,0 +1,137 @@
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
/*
*
* (C) COPYRIGHT 2022 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
* Foundation, and any use by you of this program is subject to the terms
* of such GNU license.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, you can access it online at
* http://www.gnu.org/licenses/gpl-2.0.html.
*
*/
#ifndef _KBASE_DEBUG_CSF_FAULT_H
#define _KBASE_DEBUG_CSF_FAULT_H
#if IS_ENABLED(CONFIG_DEBUG_FS)
/**
* kbase_debug_csf_fault_debugfs_init - Initialize CSF fault debugfs
* @kbdev: Device pointer
*/
void kbase_debug_csf_fault_debugfs_init(struct kbase_device *kbdev);
/**
* kbase_debug_csf_fault_init - Create the fault event wait queue per device
* and initialize the required resources.
* @kbdev: Device pointer
*
* Return: Zero on success or a negative error code.
*/
int kbase_debug_csf_fault_init(struct kbase_device *kbdev);
/**
* kbase_debug_csf_fault_term - Clean up resources created by
* @kbase_debug_csf_fault_init.
* @kbdev: Device pointer
*/
void kbase_debug_csf_fault_term(struct kbase_device *kbdev);
/**
* kbase_debug_csf_fault_wait_completion - Wait for the client to complete.
*
* @kbdev: Device Pointer
*
* Wait for the user space client to finish reading the fault information.
* This function must be called in thread context.
*/
void kbase_debug_csf_fault_wait_completion(struct kbase_device *kbdev);
/**
* kbase_debug_csf_fault_notify - Notify client of a fault.
*
* @kbdev: Device pointer
* @kctx: Faulty context (can be NULL)
* @error: Error code.
*
* Store fault information and wake up the user space client.
*
* Return: true if a dump on fault was initiated or was is in progress and
* so caller can opt to wait for the dumping to complete.
*/
bool kbase_debug_csf_fault_notify(struct kbase_device *kbdev,
struct kbase_context *kctx, enum dumpfault_error_type error);
/**
* kbase_debug_csf_fault_dump_enabled - Check if dump on fault is enabled.
*
* @kbdev: Device pointer
*
* Return: true if debugfs file is opened so dump on fault is enabled.
*/
static inline bool kbase_debug_csf_fault_dump_enabled(struct kbase_device *kbdev)
{
return atomic_read(&kbdev->csf.dof.enabled);
}
/**
* kbase_debug_csf_fault_dump_complete - Check if dump on fault is completed.
*
* @kbdev: Device pointer
*
* Return: true if dump on fault completes or file is closed.
*/
static inline bool kbase_debug_csf_fault_dump_complete(struct kbase_device *kbdev)
{
unsigned long flags;
bool ret;
if (likely(!kbase_debug_csf_fault_dump_enabled(kbdev)))
return true;
spin_lock_irqsave(&kbdev->csf.dof.lock, flags);
ret = (kbdev->csf.dof.error_code == DF_NO_ERROR);
spin_unlock_irqrestore(&kbdev->csf.dof.lock, flags);
return ret;
}
#else /* CONFIG_DEBUG_FS */
static inline int kbase_debug_csf_fault_init(struct kbase_device *kbdev)
{
return 0;
}
static inline void kbase_debug_csf_fault_term(struct kbase_device *kbdev)
{
}
static inline void kbase_debug_csf_fault_wait_completion(struct kbase_device *kbdev)
{
}
static inline bool kbase_debug_csf_fault_notify(struct kbase_device *kbdev,
struct kbase_context *kctx, enum dumpfault_error_type error)
{
return false;
}
static inline bool kbase_debug_csf_fault_dump_enabled(struct kbase_device *kbdev)
{
return false;
}
static inline bool kbase_debug_csf_fault_dump_complete(struct kbase_device *kbdev)
{
return true;
}
#endif /* CONFIG_DEBUG_FS */
#endif /*_KBASE_DEBUG_CSF_FAULT_H*/

View File

@ -42,19 +42,25 @@ int dummy_array[] = {
/*
* Generic CSF events
*/
/* info_val = 0 */
KBASE_KTRACE_CODE_MAKE_CODE(SCHEDULER_EVICT_CTX_SLOTS_START),
/* info_val == number of CSGs supported */
KBASE_KTRACE_CODE_MAKE_CODE(SCHEDULER_EVICT_CTX_SLOTS_END),
/* info_val[0:7] == fw version_minor
* info_val[15:8] == fw version_major
* info_val[63:32] == fw version_hash
*/
KBASE_KTRACE_CODE_MAKE_CODE(CSF_FIRMWARE_BOOT),
KBASE_KTRACE_CODE_MAKE_CODE(CSF_FIRMWARE_REBOOT),
KBASE_KTRACE_CODE_MAKE_CODE(SCHEDULER_TOCK_INVOKE),
KBASE_KTRACE_CODE_MAKE_CODE(SCHEDULER_TICK_INVOKE),
KBASE_KTRACE_CODE_MAKE_CODE(SCHEDULER_TOCK_START),
KBASE_KTRACE_CODE_MAKE_CODE(SCHEDULER_TOCK_END),
/* info_val == total number of runnable groups across all kctxs */
KBASE_KTRACE_CODE_MAKE_CODE(SCHEDULER_TICK_START),
KBASE_KTRACE_CODE_MAKE_CODE(SCHEDULER_TICK_END),
KBASE_KTRACE_CODE_MAKE_CODE(SCHEDULER_RESET_START),
KBASE_KTRACE_CODE_MAKE_CODE(SCHEDULER_RESET_END),
/* info_val = timeout in ms */
KBASE_KTRACE_CODE_MAKE_CODE(SCHEDULER_PROTM_WAIT_QUIT_START),
/* info_val = remaining ms timeout, or 0 if timedout */
@ -101,6 +107,8 @@ int dummy_array[] = {
* purpose.
*/
KBASE_KTRACE_CODE_MAKE_CODE(SCHEDULER_GPU_IDLE_WORKER_HANDLING_START),
KBASE_KTRACE_CODE_MAKE_CODE(SCHEDULER_GPU_IDLE_WORKER_HANDLING_END),
KBASE_KTRACE_CODE_MAKE_CODE(CSF_FIRMWARE_MCU_HALTED),
KBASE_KTRACE_CODE_MAKE_CODE(CSF_FIRMWARE_MCU_SLEEP),
@ -126,6 +134,8 @@ int dummy_array[] = {
* group->csg_nr indicates which bit was set
*/
KBASE_KTRACE_CODE_MAKE_CODE(CSG_SLOT_IDLE_SET),
KBASE_KTRACE_CODE_MAKE_CODE(CSG_INTERRUPT_NO_NON_IDLE_GROUPS),
KBASE_KTRACE_CODE_MAKE_CODE(CSG_INTERRUPT_NON_IDLE_GROUPS),
/* info_val = scheduler's new csg_slots_idle_mask[0]
* group->csg_nr indicates which bit was cleared
*
@ -190,10 +200,37 @@ int dummy_array[] = {
KBASE_KTRACE_CODE_MAKE_CODE(SCHEDULER_NONIDLE_OFFSLOT_GRP_INC),
/* info_val == new count of off-slot non-idle groups */
KBASE_KTRACE_CODE_MAKE_CODE(SCHEDULER_NONIDLE_OFFSLOT_GRP_DEC),
/* info_val = scheduler's new csg_slots_idle_mask[0]
* group->csg_nr indicates which bit was set
*/
KBASE_KTRACE_CODE_MAKE_CODE(SCHEDULER_HANDLE_IDLE_SLOTS),
KBASE_KTRACE_CODE_MAKE_CODE(PROTM_EVENT_WORKER_START),
KBASE_KTRACE_CODE_MAKE_CODE(PROTM_EVENT_WORKER_END),
/* info_val = scheduler state */
KBASE_KTRACE_CODE_MAKE_CODE(SCHED_BUSY),
KBASE_KTRACE_CODE_MAKE_CODE(SCHED_INACTIVE),
KBASE_KTRACE_CODE_MAKE_CODE(SCHED_SUSPENDED),
KBASE_KTRACE_CODE_MAKE_CODE(SCHED_SLEEPING),
/* info_val = mcu state */
#define KBASEP_MCU_STATE(n) KBASE_KTRACE_CODE_MAKE_CODE(PM_MCU_ ## n),
#include "backend/gpu/mali_kbase_pm_mcu_states.h"
#undef KBASEP_MCU_STATE
/* info_val = number of runnable groups */
KBASE_KTRACE_CODE_MAKE_CODE(CSF_GROUP_INACTIVE),
KBASE_KTRACE_CODE_MAKE_CODE(CSF_GROUP_RUNNABLE),
KBASE_KTRACE_CODE_MAKE_CODE(CSF_GROUP_IDLE),
KBASE_KTRACE_CODE_MAKE_CODE(CSF_GROUP_SUSPENDED),
KBASE_KTRACE_CODE_MAKE_CODE(CSF_GROUP_SUSPENDED_ON_IDLE),
KBASE_KTRACE_CODE_MAKE_CODE(CSF_GROUP_SUSPENDED_ON_WAIT_SYNC),
/* info_val = new run state of the evicted group */
KBASE_KTRACE_CODE_MAKE_CODE(CSF_GROUP_FAULT_EVICTED),
/* info_val = get the number of active CSGs */
KBASE_KTRACE_CODE_MAKE_CODE(CSF_GROUP_TERMINATED),
/*
* Group + Queue events
*/

View File

@ -31,13 +31,17 @@
* Generic CSF events - using the common DEFINE_MALI_ADD_EVENT
*/
DEFINE_MALI_ADD_EVENT(SCHEDULER_EVICT_CTX_SLOTS_START);
DEFINE_MALI_ADD_EVENT(SCHEDULER_EVICT_CTX_SLOTS_END);
DEFINE_MALI_ADD_EVENT(CSF_FIRMWARE_BOOT);
DEFINE_MALI_ADD_EVENT(CSF_FIRMWARE_REBOOT);
DEFINE_MALI_ADD_EVENT(SCHEDULER_TOCK_INVOKE);
DEFINE_MALI_ADD_EVENT(SCHEDULER_TICK_INVOKE);
DEFINE_MALI_ADD_EVENT(SCHEDULER_TOCK_START);
DEFINE_MALI_ADD_EVENT(SCHEDULER_TOCK_END);
DEFINE_MALI_ADD_EVENT(SCHEDULER_TICK_START);
DEFINE_MALI_ADD_EVENT(SCHEDULER_TICK_END);
DEFINE_MALI_ADD_EVENT(SCHEDULER_RESET_START);
DEFINE_MALI_ADD_EVENT(SCHEDULER_RESET_END);
DEFINE_MALI_ADD_EVENT(SCHEDULER_PROTM_WAIT_QUIT_START);
DEFINE_MALI_ADD_EVENT(SCHEDULER_PROTM_WAIT_QUIT_END);
DEFINE_MALI_ADD_EVENT(SCHEDULER_GROUP_SYNC_UPDATE_EVENT);
@ -58,8 +62,16 @@ DEFINE_MALI_ADD_EVENT(SCHEDULER_GROUP_SYNC_UPDATE_WORKER_START);
DEFINE_MALI_ADD_EVENT(SCHEDULER_GROUP_SYNC_UPDATE_WORKER_END);
DEFINE_MALI_ADD_EVENT(SCHEDULER_UPDATE_IDLE_SLOTS_ACK);
DEFINE_MALI_ADD_EVENT(SCHEDULER_GPU_IDLE_WORKER_HANDLING_START);
DEFINE_MALI_ADD_EVENT(SCHEDULER_GPU_IDLE_WORKER_HANDLING_END);
DEFINE_MALI_ADD_EVENT(CSF_FIRMWARE_MCU_HALTED);
DEFINE_MALI_ADD_EVENT(CSF_FIRMWARE_MCU_SLEEP);
DEFINE_MALI_ADD_EVENT(SCHED_BUSY);
DEFINE_MALI_ADD_EVENT(SCHED_INACTIVE);
DEFINE_MALI_ADD_EVENT(SCHED_SUSPENDED);
DEFINE_MALI_ADD_EVENT(SCHED_SLEEPING);
#define KBASEP_MCU_STATE(n) DEFINE_MALI_ADD_EVENT(PM_MCU_ ## n);
#include "backend/gpu/mali_kbase_pm_mcu_states.h"
#undef KBASEP_MCU_STATE
DECLARE_EVENT_CLASS(mali_csf_grp_q_template,
TP_PROTO(struct kbase_device *kbdev, struct kbase_queue_group *group,
@ -136,6 +148,8 @@ DEFINE_MALI_CSF_GRP_EVENT(CSG_SLOT_STOPPED);
DEFINE_MALI_CSF_GRP_EVENT(CSG_SLOT_CLEANED);
DEFINE_MALI_CSF_GRP_EVENT(CSG_UPDATE_IDLE_SLOT_REQ);
DEFINE_MALI_CSF_GRP_EVENT(CSG_SLOT_IDLE_SET);
DEFINE_MALI_CSF_GRP_EVENT(CSG_INTERRUPT_NO_NON_IDLE_GROUPS);
DEFINE_MALI_CSF_GRP_EVENT(CSG_INTERRUPT_NON_IDLE_GROUPS);
DEFINE_MALI_CSF_GRP_EVENT(CSG_SLOT_IDLE_CLEAR);
DEFINE_MALI_CSF_GRP_EVENT(CSG_SLOT_PRIO_UPDATE);
DEFINE_MALI_CSF_GRP_EVENT(CSG_INTERRUPT_SYNC_UPDATE);
@ -160,8 +174,17 @@ DEFINE_MALI_CSF_GRP_EVENT(SCHEDULER_PROTM_EXIT);
DEFINE_MALI_CSF_GRP_EVENT(SCHEDULER_TOP_GRP);
DEFINE_MALI_CSF_GRP_EVENT(SCHEDULER_NONIDLE_OFFSLOT_GRP_INC);
DEFINE_MALI_CSF_GRP_EVENT(SCHEDULER_NONIDLE_OFFSLOT_GRP_DEC);
DEFINE_MALI_CSF_GRP_EVENT(SCHEDULER_HANDLE_IDLE_SLOTS);
DEFINE_MALI_CSF_GRP_EVENT(PROTM_EVENT_WORKER_START);
DEFINE_MALI_CSF_GRP_EVENT(PROTM_EVENT_WORKER_END);
DEFINE_MALI_CSF_GRP_EVENT(CSF_GROUP_INACTIVE);
DEFINE_MALI_CSF_GRP_EVENT(CSF_GROUP_RUNNABLE);
DEFINE_MALI_CSF_GRP_EVENT(CSF_GROUP_IDLE);
DEFINE_MALI_CSF_GRP_EVENT(CSF_GROUP_SUSPENDED);
DEFINE_MALI_CSF_GRP_EVENT(CSF_GROUP_SUSPENDED_ON_IDLE);
DEFINE_MALI_CSF_GRP_EVENT(CSF_GROUP_SUSPENDED_ON_WAIT_SYNC);
DEFINE_MALI_CSF_GRP_EVENT(CSF_GROUP_FAULT_EVICTED);
DEFINE_MALI_CSF_GRP_EVENT(CSF_GROUP_TERMINATED);
#undef DEFINE_MALI_CSF_GRP_EVENT

View File

@ -1,7 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
/*
*
* (C) COPYRIGHT 2011-2015, 2018-2021 ARM Limited. All rights reserved.
* (C) COPYRIGHT 2011-2015, 2018-2022 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@ -142,6 +142,11 @@ int dummy_array[] = {
KBASE_KTRACE_CODE_MAKE_CODE(PM_RUNTIME_SUSPEND_CALLBACK),
KBASE_KTRACE_CODE_MAKE_CODE(PM_RUNTIME_RESUME_CALLBACK),
/* info_val = l2 state */
#define KBASEP_L2_STATE(n) KBASE_KTRACE_CODE_MAKE_CODE(PM_L2_ ## n),
#include "backend/gpu/mali_kbase_pm_l2_states.h"
#undef KBASEP_L2_STATE
/*
* Context Scheduler events
*/

View File

@ -1,7 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
/*
*
* (C) COPYRIGHT 2014, 2018, 2020-2021 ARM Limited. All rights reserved.
* (C) COPYRIGHT 2014, 2018, 2020-2022 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@ -98,6 +98,9 @@ DEFINE_MALI_ADD_EVENT(PM_WAKE_WAITERS);
DEFINE_MALI_ADD_EVENT(PM_POWEROFF_WAIT_WQ);
DEFINE_MALI_ADD_EVENT(PM_RUNTIME_SUSPEND_CALLBACK);
DEFINE_MALI_ADD_EVENT(PM_RUNTIME_RESUME_CALLBACK);
#define KBASEP_L2_STATE(n) DEFINE_MALI_ADD_EVENT(PM_L2_ ## n);
#include "backend/gpu/mali_kbase_pm_l2_states.h"
#undef KBASEP_L2_STATE
DEFINE_MALI_ADD_EVENT(SCHED_RETAIN_CTX_NOLOCK);
DEFINE_MALI_ADD_EVENT(SCHED_RELEASE_CTX);
#ifdef CONFIG_MALI_ARBITER_SUPPORT

View File

@ -23,8 +23,8 @@
#include <device/mali_kbase_device.h>
#include <mali_kbase_hwaccess_backend.h>
#include <mali_kbase_hwcnt_backend_csf_if_fw.h>
#include <mali_kbase_hwcnt_watchdog_if_timer.h>
#include <hwcnt/backend/mali_kbase_hwcnt_backend_csf_if_fw.h>
#include <hwcnt/mali_kbase_hwcnt_watchdog_if_timer.h>
#include <mali_kbase_ctx_sched.h>
#include <mali_kbase_reset_gpu.h>
#include <csf/mali_kbase_csf.h>
@ -40,9 +40,10 @@
#include <backend/gpu/mali_kbase_js_internal.h>
#include <backend/gpu/mali_kbase_clk_rate_trace_mgr.h>
#include <csf/mali_kbase_csf_csg_debugfs.h>
#include <mali_kbase_hwcnt_virtualizer.h>
#include <hwcnt/mali_kbase_hwcnt_virtualizer.h>
#include <mali_kbase_kinstr_prfcnt.h>
#include <mali_kbase_vinstr.h>
#include <tl/mali_kbase_timeline.h>
/**
* kbase_device_firmware_hwcnt_term - Terminate CSF firmware and HWC
@ -60,7 +61,7 @@ static void kbase_device_firmware_hwcnt_term(struct kbase_device *kbdev)
kbase_vinstr_term(kbdev->vinstr_ctx);
kbase_hwcnt_virtualizer_term(kbdev->hwcnt_gpu_virt);
kbase_hwcnt_backend_csf_metadata_term(&kbdev->hwcnt_gpu_iface);
kbase_csf_firmware_term(kbdev);
kbase_csf_firmware_unload_term(kbdev);
}
}
@ -197,6 +198,20 @@ static int kbase_csf_early_init(struct kbase_device *kbdev)
static void kbase_csf_early_term(struct kbase_device *kbdev)
{
kbase_csf_scheduler_early_term(kbdev);
kbase_csf_firmware_early_term(kbdev);
}
/**
* kbase_csf_late_init - late initialization for firmware.
* @kbdev: Device pointer
*
* Return: 0 on success, error code otherwise.
*/
static int kbase_csf_late_init(struct kbase_device *kbdev)
{
int err = kbase_csf_firmware_late_init(kbdev);
return err;
}
/**
@ -269,59 +284,48 @@ static void kbase_device_hwcnt_backend_csf_term(struct kbase_device *kbdev)
static const struct kbase_device_init dev_init[] = {
#if IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI)
{ kbase_gpu_device_create, kbase_gpu_device_destroy,
"Dummy model initialization failed" },
{ kbase_gpu_device_create, kbase_gpu_device_destroy, "Dummy model initialization failed" },
#else
{ assign_irqs, NULL, "IRQ search failed" },
{ registers_map, registers_unmap, "Register map failed" },
#endif
{ power_control_init, power_control_term,
"Power control initialization failed" },
{ power_control_init, power_control_term, "Power control initialization failed" },
{ kbase_device_io_history_init, kbase_device_io_history_term,
"Register access history initialization failed" },
{ kbase_device_early_init, kbase_device_early_term,
"Early device initialization failed" },
{ kbase_device_populate_max_freq, NULL,
"Populating max frequency failed" },
{ kbase_pm_lowest_gpu_freq_init, NULL,
"Lowest freq initialization failed" },
{ kbase_device_early_init, kbase_device_early_term, "Early device initialization failed" },
{ kbase_device_populate_max_freq, NULL, "Populating max frequency failed" },
{ kbase_pm_lowest_gpu_freq_init, NULL, "Lowest freq initialization failed" },
{ kbase_device_misc_init, kbase_device_misc_term,
"Miscellaneous device initialization failed" },
{ kbase_device_pcm_dev_init, kbase_device_pcm_dev_term,
"Priority control manager initialization failed" },
{ kbase_ctx_sched_init, kbase_ctx_sched_term,
"Context scheduler initialization failed" },
{ kbase_mem_init, kbase_mem_term,
"Memory subsystem initialization failed" },
{ kbase_ctx_sched_init, kbase_ctx_sched_term, "Context scheduler initialization failed" },
{ kbase_mem_init, kbase_mem_term, "Memory subsystem initialization failed" },
{ kbase_csf_protected_memory_init, kbase_csf_protected_memory_term,
"Protected memory allocator initialization failed" },
{ kbase_device_coherency_init, NULL, "Device coherency init failed" },
{ kbase_protected_mode_init, kbase_protected_mode_term,
"Protected mode subsystem initialization failed" },
{ kbase_device_list_init, kbase_device_list_term,
"Device list setup failed" },
{ kbase_device_list_init, kbase_device_list_term, "Device list setup failed" },
{ kbase_device_timeline_init, kbase_device_timeline_term,
"Timeline stream initialization failed" },
{ kbase_clk_rate_trace_manager_init, kbase_clk_rate_trace_manager_term,
"Clock rate trace manager initialization failed" },
{ kbase_device_hwcnt_watchdog_if_init,
kbase_device_hwcnt_watchdog_if_term,
{ kbase_device_hwcnt_watchdog_if_init, kbase_device_hwcnt_watchdog_if_term,
"GPU hwcnt backend watchdog interface creation failed" },
{ kbase_device_hwcnt_backend_csf_if_init,
kbase_device_hwcnt_backend_csf_if_term,
{ kbase_device_hwcnt_backend_csf_if_init, kbase_device_hwcnt_backend_csf_if_term,
"GPU hwcnt backend CSF interface creation failed" },
{ kbase_device_hwcnt_backend_csf_init,
kbase_device_hwcnt_backend_csf_term,
{ kbase_device_hwcnt_backend_csf_init, kbase_device_hwcnt_backend_csf_term,
"GPU hwcnt backend creation failed" },
{ kbase_device_hwcnt_context_init, kbase_device_hwcnt_context_term,
"GPU hwcnt context initialization failed" },
{ kbase_csf_early_init, kbase_csf_early_term,
"Early CSF initialization failed" },
{ kbase_backend_late_init, kbase_backend_late_term,
"Late backend initialization failed" },
{ kbase_csf_early_init, kbase_csf_early_term, "Early CSF initialization failed" },
{ kbase_backend_late_init, kbase_backend_late_term, "Late backend initialization failed" },
{ kbase_csf_late_init, NULL, "Late CSF initialization failed" },
{ NULL, kbase_device_firmware_hwcnt_term, NULL },
{ kbase_device_debugfs_init, kbase_device_debugfs_term,
"DebugFS initialization failed" },
{ kbase_debug_csf_fault_init, kbase_debug_csf_fault_term,
"CSF fault debug initialization failed" },
{ kbase_device_debugfs_init, kbase_device_debugfs_term, "DebugFS initialization failed" },
/* Sysfs init needs to happen before registering the device with
* misc_register(), otherwise it causes a race condition between
* registering the device and a uevent event being generated for
@ -339,8 +343,7 @@ static const struct kbase_device_init dev_init[] = {
"Misc device registration failed" },
{ kbase_gpuprops_populate_user_buffer, kbase_gpuprops_free_user_buffer,
"GPU property population failed" },
{ kbase_device_late_init, kbase_device_late_term,
"Late device initialization failed" },
{ kbase_device_late_init, kbase_device_late_term, "Late device initialization failed" },
};
static void kbase_device_term_partial(struct kbase_device *kbdev,
@ -468,7 +471,7 @@ static int kbase_csf_firmware_deferred_init(struct kbase_device *kbdev)
lockdep_assert_held(&kbdev->fw_load_lock);
err = kbase_csf_firmware_init(kbdev);
err = kbase_csf_firmware_load_init(kbdev);
if (!err) {
unsigned long flags;
@ -498,11 +501,12 @@ int kbase_device_firmware_init_once(struct kbase_device *kbdev)
ret = kbase_device_hwcnt_csf_deferred_init(kbdev);
if (ret) {
kbase_csf_firmware_term(kbdev);
kbase_csf_firmware_unload_term(kbdev);
goto out;
}
kbase_csf_debugfs_init(kbdev);
kbase_timeline_io_debugfs_init(kbdev);
out:
kbase_pm_context_idle(kbdev);
}

View File

@ -115,6 +115,9 @@ void kbase_gpu_interrupt(struct kbase_device *kbdev, u32 val)
GPU_EXCEPTION_TYPE_SW_FAULT_0,
} } };
kbase_debug_csf_fault_notify(kbdev, scheduler->active_protm_grp->kctx,
DF_GPU_PROTECTED_FAULT);
scheduler->active_protm_grp->faulted = true;
kbase_csf_add_group_fatal_error(
scheduler->active_protm_grp, &err_payload);
@ -201,8 +204,11 @@ static bool kbase_is_register_accessible(u32 offset)
void kbase_reg_write(struct kbase_device *kbdev, u32 offset, u32 value)
{
KBASE_DEBUG_ASSERT(kbdev->pm.backend.gpu_powered);
KBASE_DEBUG_ASSERT(kbdev->dev != NULL);
if (WARN_ON(!kbdev->pm.backend.gpu_powered))
return;
if (WARN_ON(kbdev->dev == NULL))
return;
if (!kbase_is_register_accessible(offset))
return;
@ -222,8 +228,11 @@ u32 kbase_reg_read(struct kbase_device *kbdev, u32 offset)
{
u32 val;
KBASE_DEBUG_ASSERT(kbdev->pm.backend.gpu_powered);
KBASE_DEBUG_ASSERT(kbdev->dev != NULL);
if (WARN_ON(!kbdev->pm.backend.gpu_powered))
return 0;
if (WARN_ON(kbdev->dev == NULL))
return 0;
if (!kbase_is_register_accessible(offset))
return 0;

View File

@ -27,9 +27,9 @@
#include <mali_kbase_hwaccess_backend.h>
#include <mali_kbase_ctx_sched.h>
#include <mali_kbase_reset_gpu.h>
#include <mali_kbase_hwcnt_watchdog_if_timer.h>
#include <mali_kbase_hwcnt_backend_jm.h>
#include <mali_kbase_hwcnt_backend_jm_watchdog.h>
#include <hwcnt/mali_kbase_hwcnt_watchdog_if_timer.h>
#include <hwcnt/backend/mali_kbase_hwcnt_backend_jm.h>
#include <hwcnt/backend/mali_kbase_hwcnt_backend_jm_watchdog.h>
#if IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI)
#include <backend/gpu/mali_kbase_model_linux.h>

View File

@ -42,8 +42,8 @@
#include <tl/mali_kbase_timeline.h>
#include "mali_kbase_kinstr_prfcnt.h"
#include "mali_kbase_vinstr.h"
#include "mali_kbase_hwcnt_context.h"
#include "mali_kbase_hwcnt_virtualizer.h"
#include "hwcnt/mali_kbase_hwcnt_context.h"
#include "hwcnt/mali_kbase_hwcnt_virtualizer.h"
#include "mali_kbase_device.h"
#include "mali_kbase_device_internal.h"
@ -56,17 +56,15 @@
#include "arbiter/mali_kbase_arbiter_pm.h"
#endif /* CONFIG_MALI_ARBITER_SUPPORT */
/* NOTE: Magic - 0x45435254 (TRCE in ASCII).
* Supports tracing feature provided in the base module.
* Please keep it in sync with the value of base module.
*/
#define TRACE_BUFFER_HEADER_SPECIAL 0x45435254
#if defined(CONFIG_DEBUG_FS) && !IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI)
/* Number of register accesses for the buffer that we allocate during
* initialization time. The buffer size can be changed later via debugfs.
*/
#define KBASEP_DEFAULT_REGISTER_HISTORY_SIZE ((u16)512)
#endif /* defined(CONFIG_DEBUG_FS) && !IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI) */
static DEFINE_MUTEX(kbase_dev_list_lock);
static LIST_HEAD(kbase_dev_list);
static int kbase_dev_nr;

View File

@ -130,7 +130,11 @@ bool kbase_is_gpu_removed(struct kbase_device *kbdev);
*
* Return: 0 if successful or a negative error code on failure.
*/
#define kbase_gpu_cache_flush_pa_range_and_busy_wait(kbdev, phys, nr_bytes, flush_op) (0)
#if MALI_USE_CSF
int kbase_gpu_cache_flush_pa_range_and_busy_wait(struct kbase_device *kbdev, phys_addr_t phys,
size_t nr_bytes, u32 flush_op);
#endif /* MALI_USE_CSF */
/**
* kbase_gpu_cache_flush_and_busy_wait - Start a cache flush and busy wait
* @kbdev: Kbase device

View File

@ -27,9 +27,6 @@
#include <mali_kbase_reset_gpu.h>
#include <mmu/mali_kbase_mmu.h>
#define U64_LO_MASK ((1ULL << 32) - 1)
#define U64_HI_MASK (~U64_LO_MASK)
#if !IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI)
bool kbase_is_gpu_removed(struct kbase_device *kbdev)
{
@ -86,7 +83,38 @@ static int busy_wait_on_irq(struct kbase_device *kbdev, u32 irq_bit)
return 0;
}
#define kbase_gpu_cache_flush_pa_range_and_busy_wait(kbdev, phys, nr_bytes, flush_op) (0)
#if MALI_USE_CSF
#define U64_LO_MASK ((1ULL << 32) - 1)
#define U64_HI_MASK (~U64_LO_MASK)
int kbase_gpu_cache_flush_pa_range_and_busy_wait(struct kbase_device *kbdev, phys_addr_t phys,
size_t nr_bytes, u32 flush_op)
{
u64 start_pa, end_pa;
int ret = 0;
lockdep_assert_held(&kbdev->hwaccess_lock);
/* 1. Clear the interrupt FLUSH_PA_RANGE_COMPLETED bit. */
kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_CLEAR), FLUSH_PA_RANGE_COMPLETED);
/* 2. Issue GPU_CONTROL.COMMAND.FLUSH_PA_RANGE operation. */
start_pa = phys;
end_pa = start_pa + nr_bytes - 1;
kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND_ARG0_LO), start_pa & U64_LO_MASK);
kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND_ARG0_HI),
(start_pa & U64_HI_MASK) >> 32);
kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND_ARG1_LO), end_pa & U64_LO_MASK);
kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND_ARG1_HI), (end_pa & U64_HI_MASK) >> 32);
kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND), flush_op);
/* 3. Busy-wait irq status to be enabled. */
ret = busy_wait_on_irq(kbdev, (u32)FLUSH_PA_RANGE_COMPLETED);
return ret;
}
#endif /* MALI_USE_CSF */
int kbase_gpu_cache_flush_and_busy_wait(struct kbase_device *kbdev,
u32 flush_op)

View File

@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
/*
*
* (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved.
* (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@ -170,7 +170,7 @@ const char *kbase_gpu_exception_name(u32 const exception_code)
default:
e = "UNKNOWN";
break;
};
}
return e;
}

View File

@ -35,10 +35,7 @@
#define MCU_SUBSYSTEM_BASE 0x20000
/* IPA control registers */
#define IPA_CONTROL_BASE 0x40000
#define IPA_CONTROL_REG(r) (IPA_CONTROL_BASE+(r))
#define COMMAND 0x000 /* (WO) Command register */
#define STATUS 0x004 /* (RO) Status register */
#define TIMER 0x008 /* (RW) Timer control register */
#define SELECT_CSHW_LO 0x010 /* (RW) Counter select for CS hardware, low word */
@ -127,8 +124,16 @@
#define MCU_STATUS_HALTED (1 << 1)
#define L2_CONFIG_PBHA_HWU_SHIFT GPU_U(12)
#define L2_CONFIG_PBHA_HWU_MASK (GPU_U(0xF) << L2_CONFIG_PBHA_HWU_SHIFT)
#define L2_CONFIG_PBHA_HWU_GET(reg_val) \
(((reg_val)&L2_CONFIG_PBHA_HWU_MASK) >> L2_CONFIG_PBHA_HWU_SHIFT)
#define L2_CONFIG_PBHA_HWU_SET(reg_val, value) \
(((reg_val) & ~L2_CONFIG_PBHA_HWU_MASK) | \
(((value) << L2_CONFIG_PBHA_HWU_SHIFT) & L2_CONFIG_PBHA_HWU_MASK))
/* JOB IRQ flags */
#define JOB_IRQ_GLOBAL_IF (1 << 31) /* Global interface interrupt received */
#define JOB_IRQ_GLOBAL_IF (1u << 31) /* Global interface interrupt received */
/* GPU_COMMAND codes */
#define GPU_COMMAND_CODE_NOP 0x00 /* No operation, nothing happens */

View File

@ -127,29 +127,12 @@
#define JOB_SLOT_REG(n, r) (JOB_CONTROL_REG(JOB_SLOT0 + ((n) << 7)) + (r))
#define JS_HEAD_LO 0x00 /* (RO) Job queue head pointer for job slot n, low word */
#define JS_HEAD_HI 0x04 /* (RO) Job queue head pointer for job slot n, high word */
#define JS_TAIL_LO 0x08 /* (RO) Job queue tail pointer for job slot n, low word */
#define JS_TAIL_HI 0x0C /* (RO) Job queue tail pointer for job slot n, high word */
#define JS_AFFINITY_LO 0x10 /* (RO) Core affinity mask for job slot n, low word */
#define JS_AFFINITY_HI 0x14 /* (RO) Core affinity mask for job slot n, high word */
#define JS_CONFIG 0x18 /* (RO) Configuration settings for job slot n */
/* (RO) Extended affinity mask for job slot n*/
#define JS_XAFFINITY 0x1C
#define JS_XAFFINITY 0x1C /* (RO) Extended affinity mask for job slot n*/
#define JS_COMMAND 0x20 /* (WO) Command register for job slot n */
#define JS_STATUS 0x24 /* (RO) Status register for job slot n */
#define JS_HEAD_NEXT_LO 0x40 /* (RW) Next job queue head pointer for job slot n, low word */
#define JS_HEAD_NEXT_HI 0x44 /* (RW) Next job queue head pointer for job slot n, high word */
#define JS_AFFINITY_NEXT_LO 0x50 /* (RW) Next core affinity mask for job slot n, low word */
#define JS_AFFINITY_NEXT_HI 0x54 /* (RW) Next core affinity mask for job slot n, high word */
#define JS_CONFIG_NEXT 0x58 /* (RW) Next configuration settings for job slot n */
/* (RW) Next extended affinity mask for job slot n */
#define JS_XAFFINITY_NEXT 0x5C
#define JS_COMMAND_NEXT 0x60 /* (RW) Next command register for job slot n */
#define JS_XAFFINITY_NEXT 0x5C /* (RW) Next extended affinity mask for job slot n */
#define JS_FLUSH_ID_NEXT 0x70 /* (RW) Next job slot n cache flush ID */

View File

@ -45,9 +45,6 @@
/* Begin Register Offsets */
/* GPU control registers */
#define GPU_CONTROL_BASE 0x0000
#define GPU_CONTROL_REG(r) (GPU_CONTROL_BASE + (r))
#define GPU_ID 0x000 /* (RO) GPU and revision identifier */
#define L2_FEATURES 0x004 /* (RO) Level 2 cache features */
#define TILER_FEATURES 0x00C /* (RO) Tiler Features */
#define MEM_FEATURES 0x010 /* (RO) Memory system features */
@ -100,6 +97,10 @@
#define TEXTURE_FEATURES_REG(n) GPU_CONTROL_REG(TEXTURE_FEATURES_0 + ((n) << 2))
#define GPU_COMMAND_ARG0_LO 0x0D0 /* (RW) Additional parameter 0 for GPU commands, low word */
#define GPU_COMMAND_ARG0_HI 0x0D4 /* (RW) Additional parameter 0 for GPU commands, high word */
#define GPU_COMMAND_ARG1_LO 0x0D8 /* (RW) Additional parameter 1 for GPU commands, low word */
#define GPU_COMMAND_ARG1_HI 0x0DC /* (RW) Additional parameter 1 for GPU commands, high word */
#define SHADER_PRESENT_LO 0x100 /* (RO) Shader core present bitmap, low word */
#define SHADER_PRESENT_HI 0x104 /* (RO) Shader core present bitmap, high word */
@ -113,26 +114,10 @@
#define STACK_PRESENT_LO 0xE00 /* (RO) Core stack present bitmap, low word */
#define STACK_PRESENT_HI 0xE04 /* (RO) Core stack present bitmap, high word */
#define SHADER_READY_LO 0x140 /* (RO) Shader core ready bitmap, low word */
#define SHADER_READY_HI 0x144 /* (RO) Shader core ready bitmap, high word */
#define TILER_READY_LO 0x150 /* (RO) Tiler core ready bitmap, low word */
#define TILER_READY_HI 0x154 /* (RO) Tiler core ready bitmap, high word */
#define L2_READY_LO 0x160 /* (RO) Level 2 cache ready bitmap, low word */
#define L2_READY_HI 0x164 /* (RO) Level 2 cache ready bitmap, high word */
#define STACK_READY_LO 0xE10 /* (RO) Core stack ready bitmap, low word */
#define STACK_READY_HI 0xE14 /* (RO) Core stack ready bitmap, high word */
#define SHADER_PWRON_LO 0x180 /* (WO) Shader core power on bitmap, low word */
#define SHADER_PWRON_HI 0x184 /* (WO) Shader core power on bitmap, high word */
#define TILER_PWRON_LO 0x190 /* (WO) Tiler core power on bitmap, low word */
#define TILER_PWRON_HI 0x194 /* (WO) Tiler core power on bitmap, high word */
#define L2_PWRON_LO 0x1A0 /* (WO) Level 2 cache power on bitmap, low word */
#define L2_PWRON_HI 0x1A4 /* (WO) Level 2 cache power on bitmap, high word */
#define SHADER_PWRFEATURES 0x188 /* (RW) Shader core power features */
#define STACK_PWRON_LO 0xE20 /* (RO) Core stack power on bitmap, low word */
#define STACK_PWRON_HI 0xE24 /* (RO) Core stack power on bitmap, high word */
@ -181,6 +166,8 @@
#define COHERENCY_FEATURES 0x300 /* (RO) Coherency features present */
#define COHERENCY_ENABLE 0x304 /* (RW) Coherency enable */
#define AMBA_FEATURES 0x300 /* (RO) AMBA bus supported features */
#define AMBA_ENABLE 0x304 /* (RW) AMBA features enable */
#define SHADER_CONFIG 0xF04 /* (RW) Shader core configuration (implementation-specific) */
#define TILER_CONFIG 0xF08 /* (RW) Tiler core configuration (implementation-specific) */
@ -188,13 +175,7 @@
/* Job control registers */
#define JOB_CONTROL_BASE 0x1000
#define JOB_CONTROL_REG(r) (JOB_CONTROL_BASE + (r))
#define JOB_IRQ_RAWSTAT 0x000 /* Raw interrupt status register */
#define JOB_IRQ_CLEAR 0x004 /* Interrupt clear register */
#define JOB_IRQ_MASK 0x008 /* Interrupt mask register */
#define JOB_IRQ_STATUS 0x00C /* Interrupt status register */
/* MMU control registers */
@ -203,7 +184,6 @@
#define MMU_IRQ_MASK 0x008 /* (RW) Interrupt mask register */
#define MMU_IRQ_STATUS 0x00C /* (RO) Interrupt status register */
#define MMU_AS0 0x400 /* Configuration registers for address space 0 */
#define MMU_AS1 0x440 /* Configuration registers for address space 1 */
#define MMU_AS2 0x480 /* Configuration registers for address space 2 */
#define MMU_AS3 0x4C0 /* Configuration registers for address space 3 */
@ -221,25 +201,13 @@
#define MMU_AS15 0x7C0 /* Configuration registers for address space 15 */
/* MMU address space control registers */
#define MMU_AS_REG(n, r) (MMU_REG(MMU_AS0 + ((n) << 6)) + (r))
#define AS_TRANSTAB_LO 0x00 /* (RW) Translation Table Base Address for address space n, low word */
#define AS_TRANSTAB_HI 0x04 /* (RW) Translation Table Base Address for address space n, high word */
#define AS_MEMATTR_LO 0x08 /* (RW) Memory attributes for address space n, low word. */
#define AS_MEMATTR_HI 0x0C /* (RW) Memory attributes for address space n, high word. */
#define AS_LOCKADDR_LO 0x10 /* (RW) Lock region address for address space n, low word */
#define AS_LOCKADDR_HI 0x14 /* (RW) Lock region address for address space n, high word */
#define AS_COMMAND 0x18 /* (WO) MMU command register for address space n */
#define AS_FAULTSTATUS 0x1C /* (RO) MMU fault status register for address space n */
#define AS_FAULTADDRESS_LO 0x20 /* (RO) Fault Address for address space n, low word */
#define AS_FAULTADDRESS_HI 0x24 /* (RO) Fault Address for address space n, high word */
#define AS_STATUS 0x28 /* (RO) Status flags for address space n */
/* (RW) Translation table configuration for address space n, low word */
#define AS_TRANSCFG_LO 0x30
/* (RW) Translation table configuration for address space n, high word */
#define AS_TRANSCFG_HI 0x34
/* (RO) Secondary fault address for address space n, low word */
#define AS_FAULTEXTRA_LO 0x38
/* (RO) Secondary fault address for address space n, high word */
@ -464,6 +432,80 @@
#define L2_CONFIG_ASN_HASH_ENABLE_MASK (1ul << L2_CONFIG_ASN_HASH_ENABLE_SHIFT)
/* End L2_CONFIG register */
/* AMBA_FEATURES register */
#define AMBA_FEATURES_ACE_LITE_SHIFT GPU_U(0)
#define AMBA_FEATURES_ACE_LITE_MASK (GPU_U(0x1) << AMBA_FEATURES_ACE_LITE_SHIFT)
#define AMBA_FEATURES_ACE_LITE_GET(reg_val) \
(((reg_val)&AMBA_FEATURES_ACE_LITE_MASK) >> \
AMBA_FEATURES_ACE_LITE_SHIFT)
#define AMBA_FEATURES_ACE_LITE_SET(reg_val, value) \
(((reg_val) & ~AMBA_FEATURES_ACE_LITE_MASK) | \
(((value) << AMBA_FEATURES_ACE_LITE_SHIFT) & \
AMBA_FEATURES_ACE_LITE_MASK))
#define AMBA_FEATURES_ACE_SHIFT GPU_U(1)
#define AMBA_FEATURES_ACE_MASK (GPU_U(0x1) << AMBA_FEATURES_ACE_SHIFT)
#define AMBA_FEATURES_ACE_GET(reg_val) \
(((reg_val)&AMBA_FEATURES_ACE_MASK) >> AMBA_FEATURES_ACE_SHIFT)
#define AMBA_FEATURES_ACE_SET(reg_val, value) \
(((reg_val) & ~AMBA_FEATURES_ACE_MASK) | \
(((value) << AMBA_FEATURES_ACE_SHIFT) & AMBA_FEATURES_ACE_MASK))
#define AMBA_FEATURES_MEMORY_CACHE_SUPPORT_SHIFT GPU_U(5)
#define AMBA_FEATURES_MEMORY_CACHE_SUPPORT_MASK \
(GPU_U(0x1) << AMBA_FEATURES_MEMORY_CACHE_SUPPORT_SHIFT)
#define AMBA_FEATURES_MEMORY_CACHE_SUPPORT_GET(reg_val) \
(((reg_val)&AMBA_FEATURES_MEMORY_CACHE_SUPPORT_MASK) >> \
AMBA_FEATURES_MEMORY_CACHE_SUPPORT_SHIFT)
#define AMBA_FEATURES_MEMORY_CACHE_SUPPORT_SET(reg_val, value) \
(((reg_val) & ~AMBA_FEATURES_MEMORY_CACHE_SUPPORT_MASK) | \
(((value) << AMBA_FEATURES_MEMORY_CACHE_SUPPORT_SHIFT) & \
AMBA_FEATURES_MEMORY_CACHE_SUPPORT_MASK))
#define AMBA_FEATURES_INVALIDATE_HINT_SHIFT GPU_U(6)
#define AMBA_FEATURES_INVALIDATE_HINT_MASK \
(GPU_U(0x1) << AMBA_FEATURES_INVALIDATE_HINT_SHIFT)
#define AMBA_FEATURES_INVALIDATE_HINT_GET(reg_val) \
(((reg_val)&AMBA_FEATURES_INVALIDATE_HINT_MASK) >> \
AMBA_FEATURES_INVALIDATE_HINT_SHIFT)
#define AMBA_FEATURES_INVALIDATE_HINT_SET(reg_val, value) \
(((reg_val) & ~AMBA_FEATURES_INVALIDATE_HINT_MASK) | \
(((value) << AMBA_FEATURES_INVALIDATE_HINT_SHIFT) & \
AMBA_FEATURES_INVALIDATE_HINT_MASK))
/* AMBA_ENABLE register */
#define AMBA_ENABLE_COHERENCY_PROTOCOL_SHIFT GPU_U(0)
#define AMBA_ENABLE_COHERENCY_PROTOCOL_MASK \
(GPU_U(0x1F) << AMBA_ENABLE_COHERENCY_PROTOCOL_SHIFT)
#define AMBA_ENABLE_COHERENCY_PROTOCOL_GET(reg_val) \
(((reg_val)&AMBA_ENABLE_COHERENCY_PROTOCOL_MASK) >> \
AMBA_ENABLE_COHERENCY_PROTOCOL_SHIFT)
#define AMBA_ENABLE_COHERENCY_PROTOCOL_SET(reg_val, value) \
(((reg_val) & ~AMBA_ENABLE_COHERENCY_PROTOCOL_MASK) | \
(((value) << AMBA_ENABLE_COHERENCY_PROTOCOL_SHIFT) & \
AMBA_ENABLE_COHERENCY_PROTOCOL_MASK))
/* AMBA_ENABLE_coherency_protocol values */
#define AMBA_ENABLE_COHERENCY_PROTOCOL_ACE_LITE 0x0
#define AMBA_ENABLE_COHERENCY_PROTOCOL_ACE 0x1
#define AMBA_ENABLE_COHERENCY_PROTOCOL_NO_COHERENCY 0x1F
/* End of AMBA_ENABLE_coherency_protocol values */
#define AMBA_ENABLE_MEMORY_CACHE_SUPPORT_SHIFT GPU_U(5)
#define AMBA_ENABLE_MEMORY_CACHE_SUPPORT_MASK \
(GPU_U(0x1) << AMBA_ENABLE_MEMORY_CACHE_SUPPORT_SHIFT)
#define AMBA_ENABLE_MEMORY_CACHE_SUPPORT_GET(reg_val) \
(((reg_val)&AMBA_ENABLE_MEMORY_CACHE_SUPPORT_MASK) >> \
AMBA_ENABLE_MEMORY_CACHE_SUPPORT_SHIFT)
#define AMBA_ENABLE_MEMORY_CACHE_SUPPORT_SET(reg_val, value) \
(((reg_val) & ~AMBA_ENABLE_MEMORY_CACHE_SUPPORT_MASK) | \
(((value) << AMBA_ENABLE_MEMORY_CACHE_SUPPORT_SHIFT) & \
AMBA_ENABLE_MEMORY_CACHE_SUPPORT_MASK))
#define AMBA_ENABLE_INVALIDATE_HINT_SHIFT GPU_U(6)
#define AMBA_ENABLE_INVALIDATE_HINT_MASK \
(GPU_U(0x1) << AMBA_ENABLE_INVALIDATE_HINT_SHIFT)
#define AMBA_ENABLE_INVALIDATE_HINT_GET(reg_val) \
(((reg_val)&AMBA_ENABLE_INVALIDATE_HINT_MASK) >> \
AMBA_ENABLE_INVALIDATE_HINT_SHIFT)
#define AMBA_ENABLE_INVALIDATE_HINT_SET(reg_val, value) \
(((reg_val) & ~AMBA_ENABLE_INVALIDATE_HINT_MASK) | \
(((value) << AMBA_ENABLE_INVALIDATE_HINT_SHIFT) & \
AMBA_ENABLE_INVALIDATE_HINT_MASK))
/* IDVS_GROUP register */
#define IDVS_GROUP_SIZE_SHIFT (16)

View File

@ -1,6 +1,6 @@
# SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
#
# (C) COPYRIGHT 2012, 2020-2021 ARM Limited. All rights reserved.
# (C) COPYRIGHT 2022 ARM Limited. All rights reserved.
#
# This program is free software and is provided to you under the terms of the
# GNU General Public License version 2 as published by the Free Software
@ -18,6 +18,20 @@
#
#
ifeq ($(CONFIG_DMA_BUF_LOCK), y)
obj-m := dma_buf_lock.o
bifrost_kbase-y += \
hwcnt/mali_kbase_hwcnt.o \
hwcnt/mali_kbase_hwcnt_gpu.o \
hwcnt/mali_kbase_hwcnt_gpu_narrow.o \
hwcnt/mali_kbase_hwcnt_types.o \
hwcnt/mali_kbase_hwcnt_virtualizer.o \
hwcnt/mali_kbase_hwcnt_watchdog_if_timer.o
ifeq ($(CONFIG_MALI_CSF_SUPPORT),y)
bifrost_kbase-y += \
hwcnt/backend/mali_kbase_hwcnt_backend_csf.o \
hwcnt/backend/mali_kbase_hwcnt_backend_csf_if_fw.o
else
bifrost_kbase-y += \
hwcnt/backend/mali_kbase_hwcnt_backend_jm.o \
hwcnt/backend/mali_kbase_hwcnt_backend_jm_watchdog.o
endif

View File

@ -1,7 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
/*
*
* (C) COPYRIGHT 2018, 2020-2021 ARM Limited. All rights reserved.
* (C) COPYRIGHT 2018, 2020-2022 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@ -56,8 +56,8 @@ struct kbase_hwcnt_backend;
*
* Return: Non-NULL pointer to immutable hardware counter metadata.
*/
typedef const struct kbase_hwcnt_metadata *kbase_hwcnt_backend_metadata_fn(
const struct kbase_hwcnt_backend_info *info);
typedef const struct kbase_hwcnt_metadata *
kbase_hwcnt_backend_metadata_fn(const struct kbase_hwcnt_backend_info *info);
/**
* typedef kbase_hwcnt_backend_init_fn - Initialise a counter backend.
@ -69,9 +69,8 @@ typedef const struct kbase_hwcnt_metadata *kbase_hwcnt_backend_metadata_fn(
*
* Return: 0 on success, else error code.
*/
typedef int kbase_hwcnt_backend_init_fn(
const struct kbase_hwcnt_backend_info *info,
struct kbase_hwcnt_backend **out_backend);
typedef int kbase_hwcnt_backend_init_fn(const struct kbase_hwcnt_backend_info *info,
struct kbase_hwcnt_backend **out_backend);
/**
* typedef kbase_hwcnt_backend_term_fn - Terminate a counter backend.
@ -86,8 +85,7 @@ typedef void kbase_hwcnt_backend_term_fn(struct kbase_hwcnt_backend *backend);
*
* Return: Backend timestamp in nanoseconds.
*/
typedef u64 kbase_hwcnt_backend_timestamp_ns_fn(
struct kbase_hwcnt_backend *backend);
typedef u64 kbase_hwcnt_backend_timestamp_ns_fn(struct kbase_hwcnt_backend *backend);
/**
* typedef kbase_hwcnt_backend_dump_enable_fn - Start counter dumping with the
@ -102,9 +100,8 @@ typedef u64 kbase_hwcnt_backend_timestamp_ns_fn(
*
* Return: 0 on success, else error code.
*/
typedef int kbase_hwcnt_backend_dump_enable_fn(
struct kbase_hwcnt_backend *backend,
const struct kbase_hwcnt_enable_map *enable_map);
typedef int kbase_hwcnt_backend_dump_enable_fn(struct kbase_hwcnt_backend *backend,
const struct kbase_hwcnt_enable_map *enable_map);
/**
* typedef kbase_hwcnt_backend_dump_enable_nolock_fn - Start counter dumping
@ -118,9 +115,9 @@ typedef int kbase_hwcnt_backend_dump_enable_fn(
*
* Return: 0 on success, else error code.
*/
typedef int kbase_hwcnt_backend_dump_enable_nolock_fn(
struct kbase_hwcnt_backend *backend,
const struct kbase_hwcnt_enable_map *enable_map);
typedef int
kbase_hwcnt_backend_dump_enable_nolock_fn(struct kbase_hwcnt_backend *backend,
const struct kbase_hwcnt_enable_map *enable_map);
/**
* typedef kbase_hwcnt_backend_dump_disable_fn - Disable counter dumping with
@ -130,8 +127,7 @@ typedef int kbase_hwcnt_backend_dump_enable_nolock_fn(
* If the backend is already disabled, does nothing.
* Any undumped counter values since the last dump get will be lost.
*/
typedef void kbase_hwcnt_backend_dump_disable_fn(
struct kbase_hwcnt_backend *backend);
typedef void kbase_hwcnt_backend_dump_disable_fn(struct kbase_hwcnt_backend *backend);
/**
* typedef kbase_hwcnt_backend_dump_clear_fn - Reset all the current undumped
@ -142,8 +138,7 @@ typedef void kbase_hwcnt_backend_dump_disable_fn(
*
* Return: 0 on success, else error code.
*/
typedef int kbase_hwcnt_backend_dump_clear_fn(
struct kbase_hwcnt_backend *backend);
typedef int kbase_hwcnt_backend_dump_clear_fn(struct kbase_hwcnt_backend *backend);
/**
* typedef kbase_hwcnt_backend_dump_request_fn - Request an asynchronous counter
@ -157,9 +152,8 @@ typedef int kbase_hwcnt_backend_dump_clear_fn(
*
* Return: 0 on success, else error code.
*/
typedef int kbase_hwcnt_backend_dump_request_fn(
struct kbase_hwcnt_backend *backend,
u64 *dump_time_ns);
typedef int kbase_hwcnt_backend_dump_request_fn(struct kbase_hwcnt_backend *backend,
u64 *dump_time_ns);
/**
* typedef kbase_hwcnt_backend_dump_wait_fn - Wait until the last requested
@ -170,8 +164,7 @@ typedef int kbase_hwcnt_backend_dump_request_fn(
*
* Return: 0 on success, else error code.
*/
typedef int kbase_hwcnt_backend_dump_wait_fn(
struct kbase_hwcnt_backend *backend);
typedef int kbase_hwcnt_backend_dump_wait_fn(struct kbase_hwcnt_backend *backend);
/**
* typedef kbase_hwcnt_backend_dump_get_fn - Copy or accumulate enable the
@ -189,11 +182,10 @@ typedef int kbase_hwcnt_backend_dump_wait_fn(
*
* Return: 0 on success, else error code.
*/
typedef int kbase_hwcnt_backend_dump_get_fn(
struct kbase_hwcnt_backend *backend,
struct kbase_hwcnt_dump_buffer *dump_buffer,
const struct kbase_hwcnt_enable_map *enable_map,
bool accumulate);
typedef int kbase_hwcnt_backend_dump_get_fn(struct kbase_hwcnt_backend *backend,
struct kbase_hwcnt_dump_buffer *dump_buffer,
const struct kbase_hwcnt_enable_map *enable_map,
bool accumulate);
/**
* struct kbase_hwcnt_backend_interface - Hardware counter backend virtual

View File

@ -1,7 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
/*
*
* (C) COPYRIGHT 2021 ARM Limited. All rights reserved.
* (C) COPYRIGHT 2021-2022 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@ -27,9 +27,9 @@
#ifndef _KBASE_HWCNT_BACKEND_CSF_H_
#define _KBASE_HWCNT_BACKEND_CSF_H_
#include "mali_kbase_hwcnt_backend.h"
#include "mali_kbase_hwcnt_backend_csf_if.h"
#include "mali_kbase_hwcnt_watchdog_if.h"
#include "hwcnt/backend/mali_kbase_hwcnt_backend.h"
#include "hwcnt/backend/mali_kbase_hwcnt_backend_csf_if.h"
#include "hwcnt/mali_kbase_hwcnt_watchdog_if.h"
/**
* kbase_hwcnt_backend_csf_create() - Create a CSF hardware counter backend
@ -47,10 +47,9 @@
*
* Return: 0 on success, else error code.
*/
int kbase_hwcnt_backend_csf_create(
struct kbase_hwcnt_backend_csf_if *csf_if, u32 ring_buf_cnt,
struct kbase_hwcnt_watchdog_interface *watchdog_if,
struct kbase_hwcnt_backend_interface *iface);
int kbase_hwcnt_backend_csf_create(struct kbase_hwcnt_backend_csf_if *csf_if, u32 ring_buf_cnt,
struct kbase_hwcnt_watchdog_interface *watchdog_if,
struct kbase_hwcnt_backend_interface *iface);
/**
* kbase_hwcnt_backend_csf_metadata_init() - Initialize the metadata for a CSF
@ -58,16 +57,14 @@ int kbase_hwcnt_backend_csf_create(
* @iface: Non-NULL pointer to backend interface structure
* Return: 0 on success, else error code.
*/
int kbase_hwcnt_backend_csf_metadata_init(
struct kbase_hwcnt_backend_interface *iface);
int kbase_hwcnt_backend_csf_metadata_init(struct kbase_hwcnt_backend_interface *iface);
/**
* kbase_hwcnt_backend_csf_metadata_term() - Terminate the metadata for a CSF
* hardware counter backend.
* @iface: Non-NULL pointer to backend interface structure.
*/
void kbase_hwcnt_backend_csf_metadata_term(
struct kbase_hwcnt_backend_interface *iface);
void kbase_hwcnt_backend_csf_metadata_term(struct kbase_hwcnt_backend_interface *iface);
/**
* kbase_hwcnt_backend_csf_destroy() - Destroy a CSF hardware counter backend
@ -77,8 +74,7 @@ void kbase_hwcnt_backend_csf_metadata_term(
* Can be safely called on an all-zeroed interface, or on an already destroyed
* interface.
*/
void kbase_hwcnt_backend_csf_destroy(
struct kbase_hwcnt_backend_interface *iface);
void kbase_hwcnt_backend_csf_destroy(struct kbase_hwcnt_backend_interface *iface);
/**
* kbase_hwcnt_backend_csf_protm_entered() - CSF HWC backend function to receive
@ -86,8 +82,7 @@ void kbase_hwcnt_backend_csf_destroy(
* has been entered.
* @iface: Non-NULL pointer to HWC backend interface.
*/
void kbase_hwcnt_backend_csf_protm_entered(
struct kbase_hwcnt_backend_interface *iface);
void kbase_hwcnt_backend_csf_protm_entered(struct kbase_hwcnt_backend_interface *iface);
/**
* kbase_hwcnt_backend_csf_protm_exited() - CSF HWC backend function to receive
@ -95,8 +90,7 @@ void kbase_hwcnt_backend_csf_protm_entered(
* been exited.
* @iface: Non-NULL pointer to HWC backend interface.
*/
void kbase_hwcnt_backend_csf_protm_exited(
struct kbase_hwcnt_backend_interface *iface);
void kbase_hwcnt_backend_csf_protm_exited(struct kbase_hwcnt_backend_interface *iface);
/**
* kbase_hwcnt_backend_csf_on_unrecoverable_error() - CSF HWC backend function
@ -108,8 +102,7 @@ void kbase_hwcnt_backend_csf_protm_exited(
* with reset, or that may put HWC logic in state that could result in hang. For
* example, on bus error, or when FW becomes unresponsive.
*/
void kbase_hwcnt_backend_csf_on_unrecoverable_error(
struct kbase_hwcnt_backend_interface *iface);
void kbase_hwcnt_backend_csf_on_unrecoverable_error(struct kbase_hwcnt_backend_interface *iface);
/**
* kbase_hwcnt_backend_csf_on_before_reset() - CSF HWC backend function to be
@ -119,16 +112,14 @@ void kbase_hwcnt_backend_csf_on_unrecoverable_error(
* were in it.
* @iface: Non-NULL pointer to HWC backend interface.
*/
void kbase_hwcnt_backend_csf_on_before_reset(
struct kbase_hwcnt_backend_interface *iface);
void kbase_hwcnt_backend_csf_on_before_reset(struct kbase_hwcnt_backend_interface *iface);
/**
* kbase_hwcnt_backend_csf_on_prfcnt_sample() - CSF performance counter sample
* complete interrupt handler.
* @iface: Non-NULL pointer to HWC backend interface.
*/
void kbase_hwcnt_backend_csf_on_prfcnt_sample(
struct kbase_hwcnt_backend_interface *iface);
void kbase_hwcnt_backend_csf_on_prfcnt_sample(struct kbase_hwcnt_backend_interface *iface);
/**
* kbase_hwcnt_backend_csf_on_prfcnt_threshold() - CSF performance counter
@ -136,31 +127,27 @@ void kbase_hwcnt_backend_csf_on_prfcnt_sample(
* interrupt handler.
* @iface: Non-NULL pointer to HWC backend interface.
*/
void kbase_hwcnt_backend_csf_on_prfcnt_threshold(
struct kbase_hwcnt_backend_interface *iface);
void kbase_hwcnt_backend_csf_on_prfcnt_threshold(struct kbase_hwcnt_backend_interface *iface);
/**
* kbase_hwcnt_backend_csf_on_prfcnt_overflow() - CSF performance counter buffer
* overflow interrupt handler.
* @iface: Non-NULL pointer to HWC backend interface.
*/
void kbase_hwcnt_backend_csf_on_prfcnt_overflow(
struct kbase_hwcnt_backend_interface *iface);
void kbase_hwcnt_backend_csf_on_prfcnt_overflow(struct kbase_hwcnt_backend_interface *iface);
/**
* kbase_hwcnt_backend_csf_on_prfcnt_enable() - CSF performance counter enabled
* interrupt handler.
* @iface: Non-NULL pointer to HWC backend interface.
*/
void kbase_hwcnt_backend_csf_on_prfcnt_enable(
struct kbase_hwcnt_backend_interface *iface);
void kbase_hwcnt_backend_csf_on_prfcnt_enable(struct kbase_hwcnt_backend_interface *iface);
/**
* kbase_hwcnt_backend_csf_on_prfcnt_disable() - CSF performance counter
* disabled interrupt handler.
* @iface: Non-NULL pointer to HWC backend interface.
*/
void kbase_hwcnt_backend_csf_on_prfcnt_disable(
struct kbase_hwcnt_backend_interface *iface);
void kbase_hwcnt_backend_csf_on_prfcnt_disable(struct kbase_hwcnt_backend_interface *iface);
#endif /* _KBASE_HWCNT_BACKEND_CSF_H_ */

View File

@ -85,8 +85,8 @@ struct kbase_hwcnt_backend_csf_if_prfcnt_info {
* held.
* @ctx: Non-NULL pointer to a CSF context.
*/
typedef void kbase_hwcnt_backend_csf_if_assert_lock_held_fn(
struct kbase_hwcnt_backend_csf_if_ctx *ctx);
typedef void
kbase_hwcnt_backend_csf_if_assert_lock_held_fn(struct kbase_hwcnt_backend_csf_if_ctx *ctx);
/**
* typedef kbase_hwcnt_backend_csf_if_lock_fn - Acquire backend spinlock.
@ -95,9 +95,8 @@ typedef void kbase_hwcnt_backend_csf_if_assert_lock_held_fn(
* @flags: Pointer to the memory location that would store the previous
* interrupt state.
*/
typedef void kbase_hwcnt_backend_csf_if_lock_fn(
struct kbase_hwcnt_backend_csf_if_ctx *ctx,
unsigned long *flags);
typedef void kbase_hwcnt_backend_csf_if_lock_fn(struct kbase_hwcnt_backend_csf_if_ctx *ctx,
unsigned long *flags);
/**
* typedef kbase_hwcnt_backend_csf_if_unlock_fn - Release backend spinlock.
@ -106,9 +105,8 @@ typedef void kbase_hwcnt_backend_csf_if_lock_fn(
* @flags: Previously stored interrupt state when Scheduler interrupt
* spinlock was acquired.
*/
typedef void kbase_hwcnt_backend_csf_if_unlock_fn(
struct kbase_hwcnt_backend_csf_if_ctx *ctx,
unsigned long flags);
typedef void kbase_hwcnt_backend_csf_if_unlock_fn(struct kbase_hwcnt_backend_csf_if_ctx *ctx,
unsigned long flags);
/**
* typedef kbase_hwcnt_backend_csf_if_get_prfcnt_info_fn - Get performance
@ -137,10 +135,10 @@ typedef void kbase_hwcnt_backend_csf_if_get_prfcnt_info_fn(
*
* Return: 0 on success, else error code.
*/
typedef int kbase_hwcnt_backend_csf_if_ring_buf_alloc_fn(
struct kbase_hwcnt_backend_csf_if_ctx *ctx, u32 buf_count,
void **cpu_dump_base,
struct kbase_hwcnt_backend_csf_if_ring_buf **ring_buf);
typedef int
kbase_hwcnt_backend_csf_if_ring_buf_alloc_fn(struct kbase_hwcnt_backend_csf_if_ctx *ctx,
u32 buf_count, void **cpu_dump_base,
struct kbase_hwcnt_backend_csf_if_ring_buf **ring_buf);
/**
* typedef kbase_hwcnt_backend_csf_if_ring_buf_sync_fn - Sync HWC dump buffers
@ -159,10 +157,10 @@ typedef int kbase_hwcnt_backend_csf_if_ring_buf_alloc_fn(
* Flush cached HWC dump buffer data to ensure that all writes from GPU and CPU
* are correctly observed.
*/
typedef void kbase_hwcnt_backend_csf_if_ring_buf_sync_fn(
struct kbase_hwcnt_backend_csf_if_ctx *ctx,
struct kbase_hwcnt_backend_csf_if_ring_buf *ring_buf,
u32 buf_index_first, u32 buf_index_last, bool for_cpu);
typedef void
kbase_hwcnt_backend_csf_if_ring_buf_sync_fn(struct kbase_hwcnt_backend_csf_if_ctx *ctx,
struct kbase_hwcnt_backend_csf_if_ring_buf *ring_buf,
u32 buf_index_first, u32 buf_index_last, bool for_cpu);
/**
* typedef kbase_hwcnt_backend_csf_if_ring_buf_free_fn - Free a ring buffer for
@ -171,9 +169,9 @@ typedef void kbase_hwcnt_backend_csf_if_ring_buf_sync_fn(
* @ctx: Non-NULL pointer to a CSF interface context.
* @ring_buf: Non-NULL pointer to the ring buffer which to be freed.
*/
typedef void kbase_hwcnt_backend_csf_if_ring_buf_free_fn(
struct kbase_hwcnt_backend_csf_if_ctx *ctx,
struct kbase_hwcnt_backend_csf_if_ring_buf *ring_buf);
typedef void
kbase_hwcnt_backend_csf_if_ring_buf_free_fn(struct kbase_hwcnt_backend_csf_if_ctx *ctx,
struct kbase_hwcnt_backend_csf_if_ring_buf *ring_buf);
/**
* typedef kbase_hwcnt_backend_csf_if_timestamp_ns_fn - Get the current
@ -183,8 +181,7 @@ typedef void kbase_hwcnt_backend_csf_if_ring_buf_free_fn(
*
* Return: CSF interface timestamp in nanoseconds.
*/
typedef u64 kbase_hwcnt_backend_csf_if_timestamp_ns_fn(
struct kbase_hwcnt_backend_csf_if_ctx *ctx);
typedef u64 kbase_hwcnt_backend_csf_if_timestamp_ns_fn(struct kbase_hwcnt_backend_csf_if_ctx *ctx);
/**
* typedef kbase_hwcnt_backend_csf_if_dump_enable_fn - Setup and enable hardware
@ -195,10 +192,10 @@ typedef u64 kbase_hwcnt_backend_csf_if_timestamp_ns_fn(
*
* Requires lock to be taken before calling.
*/
typedef void kbase_hwcnt_backend_csf_if_dump_enable_fn(
struct kbase_hwcnt_backend_csf_if_ctx *ctx,
struct kbase_hwcnt_backend_csf_if_ring_buf *ring_buf,
struct kbase_hwcnt_backend_csf_if_enable *enable);
typedef void
kbase_hwcnt_backend_csf_if_dump_enable_fn(struct kbase_hwcnt_backend_csf_if_ctx *ctx,
struct kbase_hwcnt_backend_csf_if_ring_buf *ring_buf,
struct kbase_hwcnt_backend_csf_if_enable *enable);
/**
* typedef kbase_hwcnt_backend_csf_if_dump_disable_fn - Disable hardware counter
@ -207,8 +204,7 @@ typedef void kbase_hwcnt_backend_csf_if_dump_enable_fn(
*
* Requires lock to be taken before calling.
*/
typedef void kbase_hwcnt_backend_csf_if_dump_disable_fn(
struct kbase_hwcnt_backend_csf_if_ctx *ctx);
typedef void kbase_hwcnt_backend_csf_if_dump_disable_fn(struct kbase_hwcnt_backend_csf_if_ctx *ctx);
/**
* typedef kbase_hwcnt_backend_csf_if_dump_request_fn - Request a HWC dump.
@ -217,8 +213,7 @@ typedef void kbase_hwcnt_backend_csf_if_dump_disable_fn(
*
* Requires lock to be taken before calling.
*/
typedef void kbase_hwcnt_backend_csf_if_dump_request_fn(
struct kbase_hwcnt_backend_csf_if_ctx *ctx);
typedef void kbase_hwcnt_backend_csf_if_dump_request_fn(struct kbase_hwcnt_backend_csf_if_ctx *ctx);
/**
* typedef kbase_hwcnt_backend_csf_if_get_indexes_fn - Get current extract and
@ -231,9 +226,8 @@ typedef void kbase_hwcnt_backend_csf_if_dump_request_fn(
*
* Requires lock to be taken before calling.
*/
typedef void kbase_hwcnt_backend_csf_if_get_indexes_fn(
struct kbase_hwcnt_backend_csf_if_ctx *ctx, u32 *extract_index,
u32 *insert_index);
typedef void kbase_hwcnt_backend_csf_if_get_indexes_fn(struct kbase_hwcnt_backend_csf_if_ctx *ctx,
u32 *extract_index, u32 *insert_index);
/**
* typedef kbase_hwcnt_backend_csf_if_set_extract_index_fn - Update the extract
@ -245,8 +239,9 @@ typedef void kbase_hwcnt_backend_csf_if_get_indexes_fn(
*
* Requires lock to be taken before calling.
*/
typedef void kbase_hwcnt_backend_csf_if_set_extract_index_fn(
struct kbase_hwcnt_backend_csf_if_ctx *ctx, u32 extract_index);
typedef void
kbase_hwcnt_backend_csf_if_set_extract_index_fn(struct kbase_hwcnt_backend_csf_if_ctx *ctx,
u32 extract_index);
/**
* typedef kbase_hwcnt_backend_csf_if_get_gpu_cycle_count_fn - Get the current
@ -260,9 +255,9 @@ typedef void kbase_hwcnt_backend_csf_if_set_extract_index_fn(
*
* Requires lock to be taken before calling.
*/
typedef void kbase_hwcnt_backend_csf_if_get_gpu_cycle_count_fn(
struct kbase_hwcnt_backend_csf_if_ctx *ctx, u64 *cycle_counts,
u64 clk_enable_map);
typedef void
kbase_hwcnt_backend_csf_if_get_gpu_cycle_count_fn(struct kbase_hwcnt_backend_csf_if_ctx *ctx,
u64 *cycle_counts, u64 clk_enable_map);
/**
* struct kbase_hwcnt_backend_csf_if - Hardware counter backend CSF virtual

View File

@ -26,12 +26,12 @@
#include <mali_kbase.h>
#include <gpu/mali_kbase_gpu_regmap.h>
#include <device/mali_kbase_device.h>
#include "mali_kbase_hwcnt_gpu.h"
#include "mali_kbase_hwcnt_types.h"
#include "hwcnt/mali_kbase_hwcnt_gpu.h"
#include "hwcnt/mali_kbase_hwcnt_types.h"
#include <csf/mali_kbase_csf_registers.h>
#include "csf/mali_kbase_csf_firmware.h"
#include "mali_kbase_hwcnt_backend_csf_if_fw.h"
#include "hwcnt/backend/mali_kbase_hwcnt_backend_csf_if_fw.h"
#include "mali_kbase_hwaccess_time.h"
#include "backend/gpu/mali_kbase_clk_rate_trace_mgr.h"
@ -42,9 +42,6 @@
#include <backend/gpu/mali_kbase_model_dummy.h>
#endif /* CONFIG_MALI_BIFROST_NO_MALI */
/** The number of nanoseconds in a second. */
#define NSECS_IN_SEC 1000000000ull /* ns */
/* Ring buffer virtual address start at 4GB */
#define KBASE_HWC_CSF_RING_BUFFER_VA_START (1ull << 32)
@ -90,8 +87,8 @@ struct kbase_hwcnt_backend_csf_if_fw_ctx {
struct kbase_ccswe ccswe_shader_cores;
};
static void kbasep_hwcnt_backend_csf_if_fw_assert_lock_held(
struct kbase_hwcnt_backend_csf_if_ctx *ctx)
static void
kbasep_hwcnt_backend_csf_if_fw_assert_lock_held(struct kbase_hwcnt_backend_csf_if_ctx *ctx)
{
struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx;
struct kbase_device *kbdev;
@ -104,9 +101,8 @@ static void kbasep_hwcnt_backend_csf_if_fw_assert_lock_held(
kbase_csf_scheduler_spin_lock_assert_held(kbdev);
}
static void
kbasep_hwcnt_backend_csf_if_fw_lock(struct kbase_hwcnt_backend_csf_if_ctx *ctx,
unsigned long *flags)
static void kbasep_hwcnt_backend_csf_if_fw_lock(struct kbase_hwcnt_backend_csf_if_ctx *ctx,
unsigned long *flags)
{
struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx;
struct kbase_device *kbdev;
@ -119,8 +115,8 @@ kbasep_hwcnt_backend_csf_if_fw_lock(struct kbase_hwcnt_backend_csf_if_ctx *ctx,
kbase_csf_scheduler_spin_lock(kbdev, flags);
}
static void kbasep_hwcnt_backend_csf_if_fw_unlock(
struct kbase_hwcnt_backend_csf_if_ctx *ctx, unsigned long flags)
static void kbasep_hwcnt_backend_csf_if_fw_unlock(struct kbase_hwcnt_backend_csf_if_ctx *ctx,
unsigned long flags)
{
struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx;
struct kbase_device *kbdev;
@ -141,22 +137,19 @@ static void kbasep_hwcnt_backend_csf_if_fw_unlock(
* @clk_index: Clock index
* @clk_rate_hz: Clock frequency(hz)
*/
static void kbasep_hwcnt_backend_csf_if_fw_on_freq_change(
struct kbase_clk_rate_listener *rate_listener, u32 clk_index,
u32 clk_rate_hz)
static void
kbasep_hwcnt_backend_csf_if_fw_on_freq_change(struct kbase_clk_rate_listener *rate_listener,
u32 clk_index, u32 clk_rate_hz)
{
struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx =
container_of(rate_listener,
struct kbase_hwcnt_backend_csf_if_fw_ctx,
rate_listener);
struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx = container_of(
rate_listener, struct kbase_hwcnt_backend_csf_if_fw_ctx, rate_listener);
u64 timestamp_ns;
if (clk_index != KBASE_CLOCK_DOMAIN_SHADER_CORES)
return;
timestamp_ns = ktime_get_raw_ns();
kbase_ccswe_freq_change(&fw_ctx->ccswe_shader_cores, timestamp_ns,
clk_rate_hz);
kbase_ccswe_freq_change(&fw_ctx->ccswe_shader_cores, timestamp_ns, clk_rate_hz);
}
/**
@ -165,17 +158,16 @@ static void kbasep_hwcnt_backend_csf_if_fw_on_freq_change(
* @fw_ctx: Non-NULL pointer to CSF firmware interface context.
* @clk_enable_map: Non-NULL pointer to enable map specifying enabled counters.
*/
static void kbasep_hwcnt_backend_csf_if_fw_cc_enable(
struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx, u64 clk_enable_map)
static void
kbasep_hwcnt_backend_csf_if_fw_cc_enable(struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx,
u64 clk_enable_map)
{
struct kbase_device *kbdev = fw_ctx->kbdev;
if (kbase_hwcnt_clk_enable_map_enabled(
clk_enable_map, KBASE_CLOCK_DOMAIN_SHADER_CORES)) {
if (kbase_hwcnt_clk_enable_map_enabled(clk_enable_map, KBASE_CLOCK_DOMAIN_SHADER_CORES)) {
/* software estimation for non-top clock domains */
struct kbase_clk_rate_trace_manager *rtm = &kbdev->pm.clk_rtm;
const struct kbase_clk_data *clk_data =
rtm->clks[KBASE_CLOCK_DOMAIN_SHADER_CORES];
const struct kbase_clk_data *clk_data = rtm->clks[KBASE_CLOCK_DOMAIN_SHADER_CORES];
u32 cur_freq;
unsigned long flags;
u64 timestamp_ns;
@ -186,11 +178,9 @@ static void kbasep_hwcnt_backend_csf_if_fw_cc_enable(
cur_freq = (u32)clk_data->clock_val;
kbase_ccswe_reset(&fw_ctx->ccswe_shader_cores);
kbase_ccswe_freq_change(&fw_ctx->ccswe_shader_cores,
timestamp_ns, cur_freq);
kbase_ccswe_freq_change(&fw_ctx->ccswe_shader_cores, timestamp_ns, cur_freq);
kbase_clk_rate_trace_manager_subscribe_no_lock(
rtm, &fw_ctx->rate_listener);
kbase_clk_rate_trace_manager_subscribe_no_lock(rtm, &fw_ctx->rate_listener);
spin_unlock_irqrestore(&rtm->lock, flags);
}
@ -203,17 +193,15 @@ static void kbasep_hwcnt_backend_csf_if_fw_cc_enable(
*
* @fw_ctx: Non-NULL pointer to CSF firmware interface context.
*/
static void kbasep_hwcnt_backend_csf_if_fw_cc_disable(
struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx)
static void
kbasep_hwcnt_backend_csf_if_fw_cc_disable(struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx)
{
struct kbase_device *kbdev = fw_ctx->kbdev;
struct kbase_clk_rate_trace_manager *rtm = &kbdev->pm.clk_rtm;
u64 clk_enable_map = fw_ctx->clk_enable_map;
if (kbase_hwcnt_clk_enable_map_enabled(clk_enable_map,
KBASE_CLOCK_DOMAIN_SHADER_CORES))
kbase_clk_rate_trace_manager_unsubscribe(
rtm, &fw_ctx->rate_listener);
if (kbase_hwcnt_clk_enable_map_enabled(clk_enable_map, KBASE_CLOCK_DOMAIN_SHADER_CORES))
kbase_clk_rate_trace_manager_unsubscribe(rtm, &fw_ctx->rate_listener);
}
static void kbasep_hwcnt_backend_csf_if_fw_get_prfcnt_info(
@ -244,8 +232,8 @@ static void kbasep_hwcnt_backend_csf_if_fw_get_prfcnt_info(
u32 prfcnt_size;
u32 prfcnt_hw_size;
u32 prfcnt_fw_size;
u32 prfcnt_block_size = KBASE_HWCNT_V5_DEFAULT_VALUES_PER_BLOCK *
KBASE_HWCNT_VALUE_HW_BYTES;
u32 prfcnt_block_size =
KBASE_HWCNT_V5_DEFAULT_VALUES_PER_BLOCK * KBASE_HWCNT_VALUE_HW_BYTES;
WARN_ON(!ctx);
WARN_ON(!prfcnt_info);
@ -262,10 +250,9 @@ static void kbasep_hwcnt_backend_csf_if_fw_get_prfcnt_info(
*/
if ((kbdev->gpu_props.props.raw_props.gpu_id & GPU_ID2_PRODUCT_MODEL) >=
GPU_ID2_PRODUCT_TTUX) {
prfcnt_block_size =
PRFCNT_FEATURES_COUNTER_BLOCK_SIZE_GET(kbase_reg_read(
kbdev, GPU_CONTROL_REG(PRFCNT_FEATURES)))
<< 8;
prfcnt_block_size = PRFCNT_FEATURES_COUNTER_BLOCK_SIZE_GET(
kbase_reg_read(kbdev, GPU_CONTROL_REG(PRFCNT_FEATURES)))
<< 8;
}
*prfcnt_info = (struct kbase_hwcnt_backend_csf_if_prfcnt_info){
@ -280,17 +267,14 @@ static void kbasep_hwcnt_backend_csf_if_fw_get_prfcnt_info(
};
/* Block size must be multiple of counter size. */
WARN_ON((prfcnt_info->prfcnt_block_size % KBASE_HWCNT_VALUE_HW_BYTES) !=
0);
WARN_ON((prfcnt_info->prfcnt_block_size % KBASE_HWCNT_VALUE_HW_BYTES) != 0);
/* Total size must be multiple of block size. */
WARN_ON((prfcnt_info->dump_bytes % prfcnt_info->prfcnt_block_size) !=
0);
WARN_ON((prfcnt_info->dump_bytes % prfcnt_info->prfcnt_block_size) != 0);
#endif
}
static int kbasep_hwcnt_backend_csf_if_fw_ring_buf_alloc(
struct kbase_hwcnt_backend_csf_if_ctx *ctx, u32 buf_count,
void **cpu_dump_base,
struct kbase_hwcnt_backend_csf_if_ctx *ctx, u32 buf_count, void **cpu_dump_base,
struct kbase_hwcnt_backend_csf_if_ring_buf **out_ring_buf)
{
struct kbase_device *kbdev;
@ -342,9 +326,8 @@ static int kbasep_hwcnt_backend_csf_if_fw_ring_buf_alloc(
goto page_list_alloc_error;
/* Get physical page for the buffer */
ret = kbase_mem_pool_alloc_pages(
&kbdev->mem_pools.small[KBASE_MEM_GROUP_CSF_FW], num_pages,
phys, false);
ret = kbase_mem_pool_alloc_pages(&kbdev->mem_pools.small[KBASE_MEM_GROUP_CSF_FW], num_pages,
phys, false);
if (ret != num_pages)
goto phys_mem_pool_alloc_error;
@ -360,9 +343,8 @@ static int kbasep_hwcnt_backend_csf_if_fw_ring_buf_alloc(
KBASE_REG_MEMATTR_INDEX(AS_MEMATTR_INDEX_NON_CACHEABLE);
/* Update MMU table */
ret = kbase_mmu_insert_pages(kbdev, &kbdev->csf.mcu_mmu,
gpu_va_base >> PAGE_SHIFT, phys, num_pages,
flags, MCU_AS_NR, KBASE_MEM_GROUP_CSF_FW,
ret = kbase_mmu_insert_pages(kbdev, &kbdev->csf.mcu_mmu, gpu_va_base >> PAGE_SHIFT, phys,
num_pages, flags, MCU_AS_NR, KBASE_MEM_GROUP_CSF_FW,
mmu_sync_info);
if (ret)
goto mmu_insert_failed;
@ -381,17 +363,15 @@ static int kbasep_hwcnt_backend_csf_if_fw_ring_buf_alloc(
fw_ring_buf->as_nr = MCU_AS_NR;
*cpu_dump_base = fw_ring_buf->cpu_dump_base;
*out_ring_buf =
(struct kbase_hwcnt_backend_csf_if_ring_buf *)fw_ring_buf;
*out_ring_buf = (struct kbase_hwcnt_backend_csf_if_ring_buf *)fw_ring_buf;
return 0;
mmu_insert_failed:
vunmap(cpu_addr);
vmap_error:
kbase_mem_pool_free_pages(
&kbdev->mem_pools.small[KBASE_MEM_GROUP_CSF_FW], num_pages,
phys, false, false);
kbase_mem_pool_free_pages(&kbdev->mem_pools.small[KBASE_MEM_GROUP_CSF_FW], num_pages, phys,
false, false);
phys_mem_pool_alloc_error:
kfree(page_list);
page_list_alloc_error:
@ -401,10 +381,10 @@ static int kbasep_hwcnt_backend_csf_if_fw_ring_buf_alloc(
return -ENOMEM;
}
static void kbasep_hwcnt_backend_csf_if_fw_ring_buf_sync(
struct kbase_hwcnt_backend_csf_if_ctx *ctx,
struct kbase_hwcnt_backend_csf_if_ring_buf *ring_buf,
u32 buf_index_first, u32 buf_index_last, bool for_cpu)
static void
kbasep_hwcnt_backend_csf_if_fw_ring_buf_sync(struct kbase_hwcnt_backend_csf_if_ctx *ctx,
struct kbase_hwcnt_backend_csf_if_ring_buf *ring_buf,
u32 buf_index_first, u32 buf_index_last, bool for_cpu)
{
struct kbase_hwcnt_backend_csf_if_fw_ring_buf *fw_ring_buf =
(struct kbase_hwcnt_backend_csf_if_fw_ring_buf *)ring_buf;
@ -435,8 +415,7 @@ static void kbasep_hwcnt_backend_csf_if_fw_ring_buf_sync(
* inclusive at both ends so full flushes are not 0 -> 0.
*/
ring_buf_index_first = buf_index_first & (fw_ring_buf->buf_count - 1);
ring_buf_index_last =
(buf_index_last - 1) & (fw_ring_buf->buf_count - 1);
ring_buf_index_last = (buf_index_last - 1) & (fw_ring_buf->buf_count - 1);
/* The start address is the offset of the first buffer. */
start_address = fw_ctx->buf_bytes * ring_buf_index_first;
@ -453,15 +432,11 @@ static void kbasep_hwcnt_backend_csf_if_fw_ring_buf_sync(
struct page *pg = as_page(fw_ring_buf->phys[i]);
if (for_cpu) {
kbase_sync_single_for_cpu(fw_ctx->kbdev,
kbase_dma_addr(pg),
PAGE_SIZE,
DMA_BIDIRECTIONAL);
kbase_sync_single_for_cpu(fw_ctx->kbdev, kbase_dma_addr(pg),
PAGE_SIZE, DMA_BIDIRECTIONAL);
} else {
kbase_sync_single_for_device(fw_ctx->kbdev,
kbase_dma_addr(pg),
PAGE_SIZE,
DMA_BIDIRECTIONAL);
kbase_sync_single_for_device(fw_ctx->kbdev, kbase_dma_addr(pg),
PAGE_SIZE, DMA_BIDIRECTIONAL);
}
}
@ -473,28 +448,24 @@ static void kbasep_hwcnt_backend_csf_if_fw_ring_buf_sync(
struct page *pg = as_page(fw_ring_buf->phys[i]);
if (for_cpu) {
kbase_sync_single_for_cpu(fw_ctx->kbdev,
kbase_dma_addr(pg), PAGE_SIZE,
kbase_sync_single_for_cpu(fw_ctx->kbdev, kbase_dma_addr(pg), PAGE_SIZE,
DMA_BIDIRECTIONAL);
} else {
kbase_sync_single_for_device(fw_ctx->kbdev,
kbase_dma_addr(pg),
PAGE_SIZE,
kbase_sync_single_for_device(fw_ctx->kbdev, kbase_dma_addr(pg), PAGE_SIZE,
DMA_BIDIRECTIONAL);
}
}
}
static u64 kbasep_hwcnt_backend_csf_if_fw_timestamp_ns(
struct kbase_hwcnt_backend_csf_if_ctx *ctx)
static u64 kbasep_hwcnt_backend_csf_if_fw_timestamp_ns(struct kbase_hwcnt_backend_csf_if_ctx *ctx)
{
CSTD_UNUSED(ctx);
return ktime_get_raw_ns();
}
static void kbasep_hwcnt_backend_csf_if_fw_ring_buf_free(
struct kbase_hwcnt_backend_csf_if_ctx *ctx,
struct kbase_hwcnt_backend_csf_if_ring_buf *ring_buf)
static void
kbasep_hwcnt_backend_csf_if_fw_ring_buf_free(struct kbase_hwcnt_backend_csf_if_ctx *ctx,
struct kbase_hwcnt_backend_csf_if_ring_buf *ring_buf)
{
struct kbase_hwcnt_backend_csf_if_fw_ring_buf *fw_ring_buf =
(struct kbase_hwcnt_backend_csf_if_fw_ring_buf *)ring_buf;
@ -513,10 +484,8 @@ static void kbasep_hwcnt_backend_csf_if_fw_ring_buf_free(
vunmap(fw_ring_buf->cpu_dump_base);
kbase_mem_pool_free_pages(
&fw_ctx->kbdev->mem_pools.small[KBASE_MEM_GROUP_CSF_FW],
fw_ring_buf->num_pages, fw_ring_buf->phys, false,
false);
kbase_mem_pool_free_pages(&fw_ctx->kbdev->mem_pools.small[KBASE_MEM_GROUP_CSF_FW],
fw_ring_buf->num_pages, fw_ring_buf->phys, false, false);
kfree(fw_ring_buf->phys);
@ -524,10 +493,10 @@ static void kbasep_hwcnt_backend_csf_if_fw_ring_buf_free(
}
}
static void kbasep_hwcnt_backend_csf_if_fw_dump_enable(
struct kbase_hwcnt_backend_csf_if_ctx *ctx,
struct kbase_hwcnt_backend_csf_if_ring_buf *ring_buf,
struct kbase_hwcnt_backend_csf_if_enable *enable)
static void
kbasep_hwcnt_backend_csf_if_fw_dump_enable(struct kbase_hwcnt_backend_csf_if_ctx *ctx,
struct kbase_hwcnt_backend_csf_if_ring_buf *ring_buf,
struct kbase_hwcnt_backend_csf_if_enable *enable)
{
u32 prfcnt_config;
struct kbase_device *kbdev;
@ -550,8 +519,7 @@ static void kbasep_hwcnt_backend_csf_if_fw_dump_enable(
prfcnt_config = GLB_PRFCNT_CONFIG_SET_SELECT_SET(prfcnt_config, enable->counter_set);
/* Configure the ring buffer base address */
kbase_csf_firmware_global_input(global_iface, GLB_PRFCNT_JASID,
fw_ring_buf->as_nr);
kbase_csf_firmware_global_input(global_iface, GLB_PRFCNT_JASID, fw_ring_buf->as_nr);
kbase_csf_firmware_global_input(global_iface, GLB_PRFCNT_BASE_LO,
fw_ring_buf->gpu_dump_base & U32_MAX);
kbase_csf_firmware_global_input(global_iface, GLB_PRFCNT_BASE_HI,
@ -561,38 +529,29 @@ static void kbasep_hwcnt_backend_csf_if_fw_dump_enable(
kbase_csf_firmware_global_input(global_iface, GLB_PRFCNT_EXTRACT, 0);
/* Configure the enable bitmap */
kbase_csf_firmware_global_input(global_iface, GLB_PRFCNT_CSF_EN,
enable->fe_bm);
kbase_csf_firmware_global_input(global_iface, GLB_PRFCNT_SHADER_EN,
enable->shader_bm);
kbase_csf_firmware_global_input(global_iface, GLB_PRFCNT_MMU_L2_EN,
enable->mmu_l2_bm);
kbase_csf_firmware_global_input(global_iface, GLB_PRFCNT_TILER_EN,
enable->tiler_bm);
kbase_csf_firmware_global_input(global_iface, GLB_PRFCNT_CSF_EN, enable->fe_bm);
kbase_csf_firmware_global_input(global_iface, GLB_PRFCNT_SHADER_EN, enable->shader_bm);
kbase_csf_firmware_global_input(global_iface, GLB_PRFCNT_MMU_L2_EN, enable->mmu_l2_bm);
kbase_csf_firmware_global_input(global_iface, GLB_PRFCNT_TILER_EN, enable->tiler_bm);
/* Configure the HWC set and buffer size */
kbase_csf_firmware_global_input(global_iface, GLB_PRFCNT_CONFIG,
prfcnt_config);
kbase_csf_firmware_global_input(global_iface, GLB_PRFCNT_CONFIG, prfcnt_config);
kbdev->csf.hwcnt.enable_pending = true;
/* Unmask the interrupts */
kbase_csf_firmware_global_input_mask(
global_iface, GLB_ACK_IRQ_MASK,
GLB_ACK_IRQ_MASK_PRFCNT_SAMPLE_MASK,
GLB_ACK_IRQ_MASK_PRFCNT_SAMPLE_MASK);
kbase_csf_firmware_global_input_mask(
global_iface, GLB_ACK_IRQ_MASK,
GLB_ACK_IRQ_MASK_PRFCNT_THRESHOLD_MASK,
GLB_ACK_IRQ_MASK_PRFCNT_THRESHOLD_MASK);
kbase_csf_firmware_global_input_mask(
global_iface, GLB_ACK_IRQ_MASK,
GLB_ACK_IRQ_MASK_PRFCNT_OVERFLOW_MASK,
GLB_ACK_IRQ_MASK_PRFCNT_OVERFLOW_MASK);
kbase_csf_firmware_global_input_mask(
global_iface, GLB_ACK_IRQ_MASK,
GLB_ACK_IRQ_MASK_PRFCNT_ENABLE_MASK,
GLB_ACK_IRQ_MASK_PRFCNT_ENABLE_MASK);
kbase_csf_firmware_global_input_mask(global_iface, GLB_ACK_IRQ_MASK,
GLB_ACK_IRQ_MASK_PRFCNT_SAMPLE_MASK,
GLB_ACK_IRQ_MASK_PRFCNT_SAMPLE_MASK);
kbase_csf_firmware_global_input_mask(global_iface, GLB_ACK_IRQ_MASK,
GLB_ACK_IRQ_MASK_PRFCNT_THRESHOLD_MASK,
GLB_ACK_IRQ_MASK_PRFCNT_THRESHOLD_MASK);
kbase_csf_firmware_global_input_mask(global_iface, GLB_ACK_IRQ_MASK,
GLB_ACK_IRQ_MASK_PRFCNT_OVERFLOW_MASK,
GLB_ACK_IRQ_MASK_PRFCNT_OVERFLOW_MASK);
kbase_csf_firmware_global_input_mask(global_iface, GLB_ACK_IRQ_MASK,
GLB_ACK_IRQ_MASK_PRFCNT_ENABLE_MASK,
GLB_ACK_IRQ_MASK_PRFCNT_ENABLE_MASK);
/* Enable the HWC */
kbase_csf_firmware_global_input_mask(global_iface, GLB_REQ,
@ -600,15 +559,12 @@ static void kbasep_hwcnt_backend_csf_if_fw_dump_enable(
GLB_REQ_PRFCNT_ENABLE_MASK);
kbase_csf_ring_doorbell(kbdev, CSF_KERNEL_DOORBELL_NR);
prfcnt_config = kbase_csf_firmware_global_input_read(global_iface,
GLB_PRFCNT_CONFIG);
prfcnt_config = kbase_csf_firmware_global_input_read(global_iface, GLB_PRFCNT_CONFIG);
kbasep_hwcnt_backend_csf_if_fw_cc_enable(fw_ctx,
enable->clk_enable_map);
kbasep_hwcnt_backend_csf_if_fw_cc_enable(fw_ctx, enable->clk_enable_map);
}
static void kbasep_hwcnt_backend_csf_if_fw_dump_disable(
struct kbase_hwcnt_backend_csf_if_ctx *ctx)
static void kbasep_hwcnt_backend_csf_if_fw_dump_disable(struct kbase_hwcnt_backend_csf_if_ctx *ctx)
{
struct kbase_device *kbdev;
struct kbase_csf_global_iface *global_iface;
@ -623,20 +579,16 @@ static void kbasep_hwcnt_backend_csf_if_fw_dump_disable(
/* Disable the HWC */
kbdev->csf.hwcnt.enable_pending = true;
kbase_csf_firmware_global_input_mask(global_iface, GLB_REQ, 0,
GLB_REQ_PRFCNT_ENABLE_MASK);
kbase_csf_firmware_global_input_mask(global_iface, GLB_REQ, 0, GLB_REQ_PRFCNT_ENABLE_MASK);
kbase_csf_ring_doorbell(kbdev, CSF_KERNEL_DOORBELL_NR);
/* mask the interrupts */
kbase_csf_firmware_global_input_mask(
global_iface, GLB_ACK_IRQ_MASK, 0,
GLB_ACK_IRQ_MASK_PRFCNT_SAMPLE_MASK);
kbase_csf_firmware_global_input_mask(
global_iface, GLB_ACK_IRQ_MASK, 0,
GLB_ACK_IRQ_MASK_PRFCNT_THRESHOLD_MASK);
kbase_csf_firmware_global_input_mask(
global_iface, GLB_ACK_IRQ_MASK, 0,
GLB_ACK_IRQ_MASK_PRFCNT_OVERFLOW_MASK);
kbase_csf_firmware_global_input_mask(global_iface, GLB_ACK_IRQ_MASK, 0,
GLB_ACK_IRQ_MASK_PRFCNT_SAMPLE_MASK);
kbase_csf_firmware_global_input_mask(global_iface, GLB_ACK_IRQ_MASK, 0,
GLB_ACK_IRQ_MASK_PRFCNT_THRESHOLD_MASK);
kbase_csf_firmware_global_input_mask(global_iface, GLB_ACK_IRQ_MASK, 0,
GLB_ACK_IRQ_MASK_PRFCNT_OVERFLOW_MASK);
/* In case we have a previous request in flight when the disable
* happens.
@ -646,8 +598,7 @@ static void kbasep_hwcnt_backend_csf_if_fw_dump_disable(
kbasep_hwcnt_backend_csf_if_fw_cc_disable(fw_ctx);
}
static void kbasep_hwcnt_backend_csf_if_fw_dump_request(
struct kbase_hwcnt_backend_csf_if_ctx *ctx)
static void kbasep_hwcnt_backend_csf_if_fw_dump_request(struct kbase_hwcnt_backend_csf_if_ctx *ctx)
{
u32 glb_req;
struct kbase_device *kbdev;
@ -670,9 +621,8 @@ static void kbasep_hwcnt_backend_csf_if_fw_dump_request(
kbase_csf_ring_doorbell(kbdev, CSF_KERNEL_DOORBELL_NR);
}
static void kbasep_hwcnt_backend_csf_if_fw_get_indexes(
struct kbase_hwcnt_backend_csf_if_ctx *ctx, u32 *extract_index,
u32 *insert_index)
static void kbasep_hwcnt_backend_csf_if_fw_get_indexes(struct kbase_hwcnt_backend_csf_if_ctx *ctx,
u32 *extract_index, u32 *insert_index)
{
struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx =
(struct kbase_hwcnt_backend_csf_if_fw_ctx *)ctx;
@ -682,14 +632,15 @@ static void kbasep_hwcnt_backend_csf_if_fw_get_indexes(
WARN_ON(!insert_index);
kbasep_hwcnt_backend_csf_if_fw_assert_lock_held(ctx);
*extract_index = kbase_csf_firmware_global_input_read(
&fw_ctx->kbdev->csf.global_iface, GLB_PRFCNT_EXTRACT);
*insert_index = kbase_csf_firmware_global_output(
&fw_ctx->kbdev->csf.global_iface, GLB_PRFCNT_INSERT);
*extract_index = kbase_csf_firmware_global_input_read(&fw_ctx->kbdev->csf.global_iface,
GLB_PRFCNT_EXTRACT);
*insert_index = kbase_csf_firmware_global_output(&fw_ctx->kbdev->csf.global_iface,
GLB_PRFCNT_INSERT);
}
static void kbasep_hwcnt_backend_csf_if_fw_set_extract_index(
struct kbase_hwcnt_backend_csf_if_ctx *ctx, u32 extract_idx)
static void
kbasep_hwcnt_backend_csf_if_fw_set_extract_index(struct kbase_hwcnt_backend_csf_if_ctx *ctx,
u32 extract_idx)
{
struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx =
(struct kbase_hwcnt_backend_csf_if_fw_ctx *)ctx;
@ -700,13 +651,13 @@ static void kbasep_hwcnt_backend_csf_if_fw_set_extract_index(
/* Set the raw extract index to release the buffer back to the ring
* buffer.
*/
kbase_csf_firmware_global_input(&fw_ctx->kbdev->csf.global_iface,
GLB_PRFCNT_EXTRACT, extract_idx);
kbase_csf_firmware_global_input(&fw_ctx->kbdev->csf.global_iface, GLB_PRFCNT_EXTRACT,
extract_idx);
}
static void kbasep_hwcnt_backend_csf_if_fw_get_gpu_cycle_count(
struct kbase_hwcnt_backend_csf_if_ctx *ctx, u64 *cycle_counts,
u64 clk_enable_map)
static void
kbasep_hwcnt_backend_csf_if_fw_get_gpu_cycle_count(struct kbase_hwcnt_backend_csf_if_ctx *ctx,
u64 *cycle_counts, u64 clk_enable_map)
{
struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx =
(struct kbase_hwcnt_backend_csf_if_fw_ctx *)ctx;
@ -723,12 +674,12 @@ static void kbasep_hwcnt_backend_csf_if_fw_get_gpu_cycle_count(
if (clk == KBASE_CLOCK_DOMAIN_TOP) {
/* Read cycle count for top clock domain. */
kbase_backend_get_gpu_time_norequest(
fw_ctx->kbdev, &cycle_counts[clk], NULL, NULL);
kbase_backend_get_gpu_time_norequest(fw_ctx->kbdev, &cycle_counts[clk],
NULL, NULL);
} else {
/* Estimate cycle count for non-top clock domain. */
cycle_counts[clk] = kbase_ccswe_cycle_at(
&fw_ctx->ccswe_shader_cores, timestamp_ns);
cycle_counts[clk] =
kbase_ccswe_cycle_at(&fw_ctx->ccswe_shader_cores, timestamp_ns);
}
}
}
@ -738,8 +689,8 @@ static void kbasep_hwcnt_backend_csf_if_fw_get_gpu_cycle_count(
*
* @fw_ctx: Pointer to context to destroy.
*/
static void kbasep_hwcnt_backend_csf_if_fw_ctx_destroy(
struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx)
static void
kbasep_hwcnt_backend_csf_if_fw_ctx_destroy(struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx)
{
if (!fw_ctx)
return;
@ -754,9 +705,9 @@ static void kbasep_hwcnt_backend_csf_if_fw_ctx_destroy(
* @out_ctx: Non-NULL pointer to where info is stored on success.
* Return: 0 on success, else error code.
*/
static int kbasep_hwcnt_backend_csf_if_fw_ctx_create(
struct kbase_device *kbdev,
struct kbase_hwcnt_backend_csf_if_fw_ctx **out_ctx)
static int
kbasep_hwcnt_backend_csf_if_fw_ctx_create(struct kbase_device *kbdev,
struct kbase_hwcnt_backend_csf_if_fw_ctx **out_ctx)
{
u8 clk;
int errcode = -ENOMEM;
@ -780,8 +731,7 @@ static int kbasep_hwcnt_backend_csf_if_fw_ctx_create(
ctx->clk_enable_map = 0;
kbase_ccswe_init(&ctx->ccswe_shader_cores);
ctx->rate_listener.notify =
kbasep_hwcnt_backend_csf_if_fw_on_freq_change;
ctx->rate_listener.notify = kbasep_hwcnt_backend_csf_if_fw_on_freq_change;
*out_ctx = ctx;
@ -791,8 +741,7 @@ static int kbasep_hwcnt_backend_csf_if_fw_ctx_create(
return errcode;
}
void kbase_hwcnt_backend_csf_if_fw_destroy(
struct kbase_hwcnt_backend_csf_if *if_fw)
void kbase_hwcnt_backend_csf_if_fw_destroy(struct kbase_hwcnt_backend_csf_if *if_fw)
{
if (!if_fw)
return;
@ -802,8 +751,8 @@ void kbase_hwcnt_backend_csf_if_fw_destroy(
memset(if_fw, 0, sizeof(*if_fw));
}
int kbase_hwcnt_backend_csf_if_fw_create(
struct kbase_device *kbdev, struct kbase_hwcnt_backend_csf_if *if_fw)
int kbase_hwcnt_backend_csf_if_fw_create(struct kbase_device *kbdev,
struct kbase_hwcnt_backend_csf_if *if_fw)
{
int errcode;
struct kbase_hwcnt_backend_csf_if_fw_ctx *ctx = NULL;
@ -816,8 +765,7 @@ int kbase_hwcnt_backend_csf_if_fw_create(
return errcode;
if_fw->ctx = (struct kbase_hwcnt_backend_csf_if_ctx *)ctx;
if_fw->assert_lock_held =
kbasep_hwcnt_backend_csf_if_fw_assert_lock_held;
if_fw->assert_lock_held = kbasep_hwcnt_backend_csf_if_fw_assert_lock_held;
if_fw->lock = kbasep_hwcnt_backend_csf_if_fw_lock;
if_fw->unlock = kbasep_hwcnt_backend_csf_if_fw_unlock;
if_fw->get_prfcnt_info = kbasep_hwcnt_backend_csf_if_fw_get_prfcnt_info;
@ -828,11 +776,9 @@ int kbase_hwcnt_backend_csf_if_fw_create(
if_fw->dump_enable = kbasep_hwcnt_backend_csf_if_fw_dump_enable;
if_fw->dump_disable = kbasep_hwcnt_backend_csf_if_fw_dump_disable;
if_fw->dump_request = kbasep_hwcnt_backend_csf_if_fw_dump_request;
if_fw->get_gpu_cycle_count =
kbasep_hwcnt_backend_csf_if_fw_get_gpu_cycle_count;
if_fw->get_gpu_cycle_count = kbasep_hwcnt_backend_csf_if_fw_get_gpu_cycle_count;
if_fw->get_indexes = kbasep_hwcnt_backend_csf_if_fw_get_indexes;
if_fw->set_extract_index =
kbasep_hwcnt_backend_csf_if_fw_set_extract_index;
if_fw->set_extract_index = kbasep_hwcnt_backend_csf_if_fw_set_extract_index;
return 0;
}

View File

@ -1,7 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
/*
*
* (C) COPYRIGHT 2021 ARM Limited. All rights reserved.
* (C) COPYRIGHT 2021-2022 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@ -26,7 +26,7 @@
#ifndef _KBASE_HWCNT_BACKEND_CSF_IF_FW_H_
#define _KBASE_HWCNT_BACKEND_CSF_IF_FW_H_
#include "mali_kbase_hwcnt_backend_csf_if.h"
#include "hwcnt/backend/mali_kbase_hwcnt_backend_csf_if.h"
/**
* kbase_hwcnt_backend_csf_if_fw_create() - Create a firmware CSF interface
@ -36,15 +36,14 @@
* creation success.
* Return: 0 on success, else error code.
*/
int kbase_hwcnt_backend_csf_if_fw_create(
struct kbase_device *kbdev, struct kbase_hwcnt_backend_csf_if *if_fw);
int kbase_hwcnt_backend_csf_if_fw_create(struct kbase_device *kbdev,
struct kbase_hwcnt_backend_csf_if *if_fw);
/**
* kbase_hwcnt_backend_csf_if_fw_destroy() - Destroy a firmware CSF interface of
* hardware counter backend.
* @if_fw: Pointer to a CSF interface to destroy.
*/
void kbase_hwcnt_backend_csf_if_fw_destroy(
struct kbase_hwcnt_backend_csf_if *if_fw);
void kbase_hwcnt_backend_csf_if_fw_destroy(struct kbase_hwcnt_backend_csf_if *if_fw);
#endif /* _KBASE_HWCNT_BACKEND_CSF_IF_FW_H_ */

View File

@ -19,9 +19,9 @@
*
*/
#include "mali_kbase_hwcnt_backend_jm.h"
#include "mali_kbase_hwcnt_gpu.h"
#include "mali_kbase_hwcnt_types.h"
#include "hwcnt/backend/mali_kbase_hwcnt_backend_jm.h"
#include "hwcnt/mali_kbase_hwcnt_gpu.h"
#include "hwcnt/mali_kbase_hwcnt_types.h"
#include "mali_kbase.h"
#include "backend/gpu/mali_kbase_pm_ca.h"
#include "mali_kbase_hwaccess_instr.h"
@ -136,9 +136,8 @@ struct kbase_hwcnt_backend_jm {
*
* Return: 0 on success, else error code.
*/
static int
kbasep_hwcnt_backend_jm_gpu_info_init(struct kbase_device *kbdev,
struct kbase_hwcnt_gpu_info *info)
static int kbasep_hwcnt_backend_jm_gpu_info_init(struct kbase_device *kbdev,
struct kbase_hwcnt_gpu_info *info)
{
size_t clk;
@ -153,13 +152,11 @@ kbasep_hwcnt_backend_jm_gpu_info_init(struct kbase_device *kbdev,
{
const struct base_gpu_props *props = &kbdev->gpu_props.props;
const size_t l2_count = props->l2_props.num_l2_slices;
const size_t core_mask =
props->coherency_info.group[0].core_mask;
const size_t core_mask = props->coherency_info.group[0].core_mask;
info->l2_count = l2_count;
info->core_mask = core_mask;
info->prfcnt_values_per_block =
KBASE_HWCNT_V5_DEFAULT_VALUES_PER_BLOCK;
info->prfcnt_values_per_block = KBASE_HWCNT_V5_DEFAULT_VALUES_PER_BLOCK;
}
#endif /* CONFIG_MALI_BIFROST_NO_MALI */
@ -173,9 +170,8 @@ kbasep_hwcnt_backend_jm_gpu_info_init(struct kbase_device *kbdev,
return 0;
}
static void kbasep_hwcnt_backend_jm_init_layout(
const struct kbase_hwcnt_gpu_info *gpu_info,
struct kbase_hwcnt_jm_physical_layout *phys_layout)
static void kbasep_hwcnt_backend_jm_init_layout(const struct kbase_hwcnt_gpu_info *gpu_info,
struct kbase_hwcnt_jm_physical_layout *phys_layout)
{
u8 shader_core_cnt;
@ -189,32 +185,29 @@ static void kbasep_hwcnt_backend_jm_init_layout(
.tiler_cnt = KBASE_HWCNT_V5_TILER_BLOCK_COUNT,
.mmu_l2_cnt = gpu_info->l2_count,
.shader_cnt = shader_core_cnt,
.block_cnt = KBASE_HWCNT_V5_FE_BLOCK_COUNT +
KBASE_HWCNT_V5_TILER_BLOCK_COUNT +
.block_cnt = KBASE_HWCNT_V5_FE_BLOCK_COUNT + KBASE_HWCNT_V5_TILER_BLOCK_COUNT +
gpu_info->l2_count + shader_core_cnt,
.shader_avail_mask = gpu_info->core_mask,
.headers_per_block = KBASE_HWCNT_V5_HEADERS_PER_BLOCK,
.values_per_block = gpu_info->prfcnt_values_per_block,
.counters_per_block = gpu_info->prfcnt_values_per_block -
KBASE_HWCNT_V5_HEADERS_PER_BLOCK,
.counters_per_block =
gpu_info->prfcnt_values_per_block - KBASE_HWCNT_V5_HEADERS_PER_BLOCK,
.enable_mask_offset = KBASE_HWCNT_V5_PRFCNT_EN_HEADER,
};
}
static void kbasep_hwcnt_backend_jm_dump_sample(
const struct kbase_hwcnt_backend_jm *const backend_jm)
static void
kbasep_hwcnt_backend_jm_dump_sample(const struct kbase_hwcnt_backend_jm *const backend_jm)
{
size_t block_idx;
const u32 *new_sample_buf = backend_jm->cpu_dump_va;
const u32 *new_block = new_sample_buf;
u64 *dst_buf = backend_jm->to_user_buf;
u64 *dst_block = dst_buf;
const size_t values_per_block =
backend_jm->phys_layout.values_per_block;
const size_t values_per_block = backend_jm->phys_layout.values_per_block;
const size_t dump_bytes = backend_jm->info->dump_bytes;
for (block_idx = 0; block_idx < backend_jm->phys_layout.block_cnt;
block_idx++) {
for (block_idx = 0; block_idx < backend_jm->phys_layout.block_cnt; block_idx++) {
size_t ctr_idx;
for (ctr_idx = 0; ctr_idx < values_per_block; ctr_idx++)
@ -224,10 +217,8 @@ static void kbasep_hwcnt_backend_jm_dump_sample(
dst_block += values_per_block;
}
WARN_ON(new_block !=
new_sample_buf + (dump_bytes / KBASE_HWCNT_VALUE_HW_BYTES));
WARN_ON(dst_block !=
dst_buf + (dump_bytes / KBASE_HWCNT_VALUE_HW_BYTES));
WARN_ON(new_block != new_sample_buf + (dump_bytes / KBASE_HWCNT_VALUE_HW_BYTES));
WARN_ON(dst_block != dst_buf + (dump_bytes / KBASE_HWCNT_VALUE_HW_BYTES));
}
/**
@ -237,21 +228,18 @@ static void kbasep_hwcnt_backend_jm_dump_sample(
* @clk_index: Clock index
* @clk_rate_hz: Clock frequency(hz)
*/
static void kbasep_hwcnt_backend_jm_on_freq_change(
struct kbase_clk_rate_listener *rate_listener,
u32 clk_index,
u32 clk_rate_hz)
static void kbasep_hwcnt_backend_jm_on_freq_change(struct kbase_clk_rate_listener *rate_listener,
u32 clk_index, u32 clk_rate_hz)
{
struct kbase_hwcnt_backend_jm *backend_jm = container_of(
rate_listener, struct kbase_hwcnt_backend_jm, rate_listener);
struct kbase_hwcnt_backend_jm *backend_jm =
container_of(rate_listener, struct kbase_hwcnt_backend_jm, rate_listener);
u64 timestamp_ns;
if (clk_index != KBASE_CLOCK_DOMAIN_SHADER_CORES)
return;
timestamp_ns = ktime_get_raw_ns();
kbase_ccswe_freq_change(
&backend_jm->ccswe_shader_cores, timestamp_ns, clk_rate_hz);
kbase_ccswe_freq_change(&backend_jm->ccswe_shader_cores, timestamp_ns, clk_rate_hz);
}
/**
@ -261,53 +249,42 @@ static void kbasep_hwcnt_backend_jm_on_freq_change(
* @enable_map: Non-NULL pointer to enable map specifying enabled counters.
* @timestamp_ns: Timestamp(ns) when HWCNT were enabled.
*/
static void kbasep_hwcnt_backend_jm_cc_enable(
struct kbase_hwcnt_backend_jm *backend_jm,
const struct kbase_hwcnt_enable_map *enable_map,
u64 timestamp_ns)
static void kbasep_hwcnt_backend_jm_cc_enable(struct kbase_hwcnt_backend_jm *backend_jm,
const struct kbase_hwcnt_enable_map *enable_map,
u64 timestamp_ns)
{
struct kbase_device *kbdev = backend_jm->kctx->kbdev;
u64 clk_enable_map = enable_map->clk_enable_map;
u64 cycle_count;
if (kbase_hwcnt_clk_enable_map_enabled(
clk_enable_map, KBASE_CLOCK_DOMAIN_TOP)) {
if (kbase_hwcnt_clk_enable_map_enabled(clk_enable_map, KBASE_CLOCK_DOMAIN_TOP)) {
/* turn on the cycle counter */
kbase_pm_request_gpu_cycle_counter_l2_is_on(kbdev);
/* Read cycle count for top clock domain. */
kbase_backend_get_gpu_time_norequest(
kbdev, &cycle_count, NULL, NULL);
kbase_backend_get_gpu_time_norequest(kbdev, &cycle_count, NULL, NULL);
backend_jm->prev_cycle_count[KBASE_CLOCK_DOMAIN_TOP] =
cycle_count;
backend_jm->prev_cycle_count[KBASE_CLOCK_DOMAIN_TOP] = cycle_count;
}
if (kbase_hwcnt_clk_enable_map_enabled(
clk_enable_map, KBASE_CLOCK_DOMAIN_SHADER_CORES)) {
if (kbase_hwcnt_clk_enable_map_enabled(clk_enable_map, KBASE_CLOCK_DOMAIN_SHADER_CORES)) {
/* software estimation for non-top clock domains */
struct kbase_clk_rate_trace_manager *rtm = &kbdev->pm.clk_rtm;
const struct kbase_clk_data *clk_data =
rtm->clks[KBASE_CLOCK_DOMAIN_SHADER_CORES];
const struct kbase_clk_data *clk_data = rtm->clks[KBASE_CLOCK_DOMAIN_SHADER_CORES];
u32 cur_freq;
unsigned long flags;
spin_lock_irqsave(&rtm->lock, flags);
cur_freq = (u32) clk_data->clock_val;
cur_freq = (u32)clk_data->clock_val;
kbase_ccswe_reset(&backend_jm->ccswe_shader_cores);
kbase_ccswe_freq_change(
&backend_jm->ccswe_shader_cores,
timestamp_ns,
cur_freq);
kbase_ccswe_freq_change(&backend_jm->ccswe_shader_cores, timestamp_ns, cur_freq);
kbase_clk_rate_trace_manager_subscribe_no_lock(
rtm, &backend_jm->rate_listener);
kbase_clk_rate_trace_manager_subscribe_no_lock(rtm, &backend_jm->rate_listener);
spin_unlock_irqrestore(&rtm->lock, flags);
/* ccswe was reset. The estimated cycle is zero. */
backend_jm->prev_cycle_count[
KBASE_CLOCK_DOMAIN_SHADER_CORES] = 0;
backend_jm->prev_cycle_count[KBASE_CLOCK_DOMAIN_SHADER_CORES] = 0;
}
/* Keep clk_enable_map for dump_request. */
@ -319,28 +296,22 @@ static void kbasep_hwcnt_backend_jm_cc_enable(
*
* @backend_jm: Non-NULL pointer to backend.
*/
static void kbasep_hwcnt_backend_jm_cc_disable(
struct kbase_hwcnt_backend_jm *backend_jm)
static void kbasep_hwcnt_backend_jm_cc_disable(struct kbase_hwcnt_backend_jm *backend_jm)
{
struct kbase_device *kbdev = backend_jm->kctx->kbdev;
struct kbase_clk_rate_trace_manager *rtm = &kbdev->pm.clk_rtm;
u64 clk_enable_map = backend_jm->clk_enable_map;
if (kbase_hwcnt_clk_enable_map_enabled(
clk_enable_map, KBASE_CLOCK_DOMAIN_TOP)) {
if (kbase_hwcnt_clk_enable_map_enabled(clk_enable_map, KBASE_CLOCK_DOMAIN_TOP)) {
/* turn off the cycle counter */
kbase_pm_release_gpu_cycle_counter(kbdev);
}
if (kbase_hwcnt_clk_enable_map_enabled(
clk_enable_map, KBASE_CLOCK_DOMAIN_SHADER_CORES)) {
kbase_clk_rate_trace_manager_unsubscribe(
rtm, &backend_jm->rate_listener);
if (kbase_hwcnt_clk_enable_map_enabled(clk_enable_map, KBASE_CLOCK_DOMAIN_SHADER_CORES)) {
kbase_clk_rate_trace_manager_unsubscribe(rtm, &backend_jm->rate_listener);
}
}
/**
* kbasep_hwcnt_gpu_update_curr_config() - Update the destination buffer with
* current config information.
@ -356,38 +327,33 @@ static void kbasep_hwcnt_backend_jm_cc_disable(
*
* Return: 0 on success, else error code.
*/
static int kbasep_hwcnt_gpu_update_curr_config(
struct kbase_device *kbdev,
struct kbase_hwcnt_curr_config *curr_config)
static int kbasep_hwcnt_gpu_update_curr_config(struct kbase_device *kbdev,
struct kbase_hwcnt_curr_config *curr_config)
{
if (WARN_ON(!kbdev) || WARN_ON(!curr_config))
return -EINVAL;
lockdep_assert_held(&kbdev->hwaccess_lock);
curr_config->num_l2_slices =
kbdev->gpu_props.curr_config.l2_slices;
curr_config->shader_present =
kbdev->gpu_props.curr_config.shader_present;
curr_config->num_l2_slices = kbdev->gpu_props.curr_config.l2_slices;
curr_config->shader_present = kbdev->gpu_props.curr_config.shader_present;
return 0;
}
/* JM backend implementation of kbase_hwcnt_backend_timestamp_ns_fn */
static u64 kbasep_hwcnt_backend_jm_timestamp_ns(
struct kbase_hwcnt_backend *backend)
static u64 kbasep_hwcnt_backend_jm_timestamp_ns(struct kbase_hwcnt_backend *backend)
{
(void)backend;
return ktime_get_raw_ns();
}
/* JM backend implementation of kbase_hwcnt_backend_dump_enable_nolock_fn */
static int kbasep_hwcnt_backend_jm_dump_enable_nolock(
struct kbase_hwcnt_backend *backend,
const struct kbase_hwcnt_enable_map *enable_map)
static int
kbasep_hwcnt_backend_jm_dump_enable_nolock(struct kbase_hwcnt_backend *backend,
const struct kbase_hwcnt_enable_map *enable_map)
{
int errcode;
struct kbase_hwcnt_backend_jm *backend_jm =
(struct kbase_hwcnt_backend_jm *)backend;
struct kbase_hwcnt_backend_jm *backend_jm = (struct kbase_hwcnt_backend_jm *)backend;
struct kbase_context *kctx;
struct kbase_device *kbdev;
struct kbase_hwcnt_physical_enable_map phys_enable_map;
@ -406,8 +372,7 @@ static int kbasep_hwcnt_backend_jm_dump_enable_nolock(
kbase_hwcnt_gpu_enable_map_to_physical(&phys_enable_map, enable_map);
kbase_hwcnt_gpu_set_to_physical(&phys_counter_set,
backend_jm->info->counter_set);
kbase_hwcnt_gpu_set_to_physical(&phys_counter_set, backend_jm->info->counter_set);
enable.fe_bm = phys_enable_map.fe_bm;
enable.shader_bm = phys_enable_map.shader_bm;
@ -425,8 +390,7 @@ static int kbasep_hwcnt_backend_jm_dump_enable_nolock(
timestamp_ns = kbasep_hwcnt_backend_jm_timestamp_ns(backend);
/* Update the current configuration information. */
errcode = kbasep_hwcnt_gpu_update_curr_config(kbdev,
&backend_jm->curr_config);
errcode = kbasep_hwcnt_gpu_update_curr_config(kbdev, &backend_jm->curr_config);
if (errcode)
goto error;
@ -446,14 +410,12 @@ static int kbasep_hwcnt_backend_jm_dump_enable_nolock(
}
/* JM backend implementation of kbase_hwcnt_backend_dump_enable_fn */
static int kbasep_hwcnt_backend_jm_dump_enable(
struct kbase_hwcnt_backend *backend,
const struct kbase_hwcnt_enable_map *enable_map)
static int kbasep_hwcnt_backend_jm_dump_enable(struct kbase_hwcnt_backend *backend,
const struct kbase_hwcnt_enable_map *enable_map)
{
unsigned long flags;
int errcode;
struct kbase_hwcnt_backend_jm *backend_jm =
(struct kbase_hwcnt_backend_jm *)backend;
struct kbase_hwcnt_backend_jm *backend_jm = (struct kbase_hwcnt_backend_jm *)backend;
struct kbase_device *kbdev;
if (!backend_jm)
@ -463,8 +425,7 @@ static int kbasep_hwcnt_backend_jm_dump_enable(
spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
errcode = kbasep_hwcnt_backend_jm_dump_enable_nolock(
backend, enable_map);
errcode = kbasep_hwcnt_backend_jm_dump_enable_nolock(backend, enable_map);
spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
@ -472,12 +433,10 @@ static int kbasep_hwcnt_backend_jm_dump_enable(
}
/* JM backend implementation of kbase_hwcnt_backend_dump_disable_fn */
static void kbasep_hwcnt_backend_jm_dump_disable(
struct kbase_hwcnt_backend *backend)
static void kbasep_hwcnt_backend_jm_dump_disable(struct kbase_hwcnt_backend *backend)
{
int errcode;
struct kbase_hwcnt_backend_jm *backend_jm =
(struct kbase_hwcnt_backend_jm *)backend;
struct kbase_hwcnt_backend_jm *backend_jm = (struct kbase_hwcnt_backend_jm *)backend;
if (WARN_ON(!backend_jm) || !backend_jm->enabled)
return;
@ -491,11 +450,9 @@ static void kbasep_hwcnt_backend_jm_dump_disable(
}
/* JM backend implementation of kbase_hwcnt_backend_dump_clear_fn */
static int kbasep_hwcnt_backend_jm_dump_clear(
struct kbase_hwcnt_backend *backend)
static int kbasep_hwcnt_backend_jm_dump_clear(struct kbase_hwcnt_backend *backend)
{
struct kbase_hwcnt_backend_jm *backend_jm =
(struct kbase_hwcnt_backend_jm *)backend;
struct kbase_hwcnt_backend_jm *backend_jm = (struct kbase_hwcnt_backend_jm *)backend;
if (!backend_jm || !backend_jm->enabled)
return -EINVAL;
@ -504,12 +461,10 @@ static int kbasep_hwcnt_backend_jm_dump_clear(
}
/* JM backend implementation of kbase_hwcnt_backend_dump_request_fn */
static int kbasep_hwcnt_backend_jm_dump_request(
struct kbase_hwcnt_backend *backend,
u64 *dump_time_ns)
static int kbasep_hwcnt_backend_jm_dump_request(struct kbase_hwcnt_backend *backend,
u64 *dump_time_ns)
{
struct kbase_hwcnt_backend_jm *backend_jm =
(struct kbase_hwcnt_backend_jm *)backend;
struct kbase_hwcnt_backend_jm *backend_jm = (struct kbase_hwcnt_backend_jm *)backend;
struct kbase_device *kbdev;
const struct kbase_hwcnt_metadata *metadata;
u64 current_cycle_count;
@ -528,28 +483,25 @@ static int kbasep_hwcnt_backend_jm_dump_request(
*dump_time_ns = kbasep_hwcnt_backend_jm_timestamp_ns(backend);
ret = kbase_instr_hwcnt_request_dump(backend_jm->kctx);
kbase_hwcnt_metadata_for_each_clock(metadata, clk) {
if (!kbase_hwcnt_clk_enable_map_enabled(
backend_jm->clk_enable_map, clk))
kbase_hwcnt_metadata_for_each_clock(metadata, clk)
{
if (!kbase_hwcnt_clk_enable_map_enabled(backend_jm->clk_enable_map, clk))
continue;
if (clk == KBASE_CLOCK_DOMAIN_TOP) {
/* Read cycle count for top clock domain. */
kbase_backend_get_gpu_time_norequest(
kbdev, &current_cycle_count,
NULL, NULL);
kbase_backend_get_gpu_time_norequest(kbdev, &current_cycle_count,
NULL, NULL);
} else {
/*
* Estimate cycle count for non-top clock
* domain.
*/
current_cycle_count = kbase_ccswe_cycle_at(
&backend_jm->ccswe_shader_cores,
*dump_time_ns);
&backend_jm->ccswe_shader_cores, *dump_time_ns);
}
backend_jm->cycle_count_elapsed[clk] =
current_cycle_count -
backend_jm->prev_cycle_count[clk];
current_cycle_count - backend_jm->prev_cycle_count[clk];
/*
* Keep the current cycle count for later calculation.
@ -563,11 +515,9 @@ static int kbasep_hwcnt_backend_jm_dump_request(
}
/* JM backend implementation of kbase_hwcnt_backend_dump_wait_fn */
static int kbasep_hwcnt_backend_jm_dump_wait(
struct kbase_hwcnt_backend *backend)
static int kbasep_hwcnt_backend_jm_dump_wait(struct kbase_hwcnt_backend *backend)
{
struct kbase_hwcnt_backend_jm *backend_jm =
(struct kbase_hwcnt_backend_jm *)backend;
struct kbase_hwcnt_backend_jm *backend_jm = (struct kbase_hwcnt_backend_jm *)backend;
if (!backend_jm || !backend_jm->enabled)
return -EINVAL;
@ -576,14 +526,12 @@ static int kbasep_hwcnt_backend_jm_dump_wait(
}
/* JM backend implementation of kbase_hwcnt_backend_dump_get_fn */
static int kbasep_hwcnt_backend_jm_dump_get(
struct kbase_hwcnt_backend *backend,
struct kbase_hwcnt_dump_buffer *dst,
const struct kbase_hwcnt_enable_map *dst_enable_map,
bool accumulate)
static int kbasep_hwcnt_backend_jm_dump_get(struct kbase_hwcnt_backend *backend,
struct kbase_hwcnt_dump_buffer *dst,
const struct kbase_hwcnt_enable_map *dst_enable_map,
bool accumulate)
{
struct kbase_hwcnt_backend_jm *backend_jm =
(struct kbase_hwcnt_backend_jm *)backend;
struct kbase_hwcnt_backend_jm *backend_jm = (struct kbase_hwcnt_backend_jm *)backend;
size_t clk;
#if IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI)
struct kbase_device *kbdev;
@ -597,16 +545,15 @@ static int kbasep_hwcnt_backend_jm_dump_get(
return -EINVAL;
/* Invalidate the kernel buffer before reading from it. */
kbase_sync_mem_regions(
backend_jm->kctx, backend_jm->vmap, KBASE_SYNC_TO_CPU);
kbase_sync_mem_regions(backend_jm->kctx, backend_jm->vmap, KBASE_SYNC_TO_CPU);
/* Dump sample to the internal 64-bit user buffer. */
kbasep_hwcnt_backend_jm_dump_sample(backend_jm);
/* Extract elapsed cycle count for each clock domain if enabled. */
kbase_hwcnt_metadata_for_each_clock(dst_enable_map->metadata, clk) {
if (!kbase_hwcnt_clk_enable_map_enabled(
dst_enable_map->clk_enable_map, clk))
kbase_hwcnt_metadata_for_each_clock(dst_enable_map->metadata, clk)
{
if (!kbase_hwcnt_clk_enable_map_enabled(dst_enable_map->clk_enable_map, clk))
continue;
/* Reset the counter to zero if accumulation is off. */
@ -621,17 +568,16 @@ static int kbasep_hwcnt_backend_jm_dump_get(
spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
/* Update the current configuration information. */
errcode = kbasep_hwcnt_gpu_update_curr_config(kbdev,
&backend_jm->curr_config);
errcode = kbasep_hwcnt_gpu_update_curr_config(kbdev, &backend_jm->curr_config);
spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
if (errcode)
return errcode;
#endif /* CONFIG_MALI_BIFROST_NO_MALI */
return kbase_hwcnt_jm_dump_get(dst, backend_jm->to_user_buf,
dst_enable_map, backend_jm->pm_core_mask,
&backend_jm->curr_config, accumulate);
return kbase_hwcnt_jm_dump_get(dst, backend_jm->to_user_buf, dst_enable_map,
backend_jm->pm_core_mask, &backend_jm->curr_config,
accumulate);
}
/**
@ -643,10 +589,8 @@ static int kbasep_hwcnt_backend_jm_dump_get(
*
* Return: 0 on success, else error code.
*/
static int kbasep_hwcnt_backend_jm_dump_alloc(
const struct kbase_hwcnt_backend_jm_info *info,
struct kbase_context *kctx,
u64 *gpu_dump_va)
static int kbasep_hwcnt_backend_jm_dump_alloc(const struct kbase_hwcnt_backend_jm_info *info,
struct kbase_context *kctx, u64 *gpu_dump_va)
{
struct kbase_va_region *reg;
u64 flags;
@ -661,16 +605,12 @@ static int kbasep_hwcnt_backend_jm_dump_alloc(
WARN_ON(!kctx);
WARN_ON(!gpu_dump_va);
flags = BASE_MEM_PROT_CPU_RD |
BASE_MEM_PROT_GPU_WR |
BASEP_MEM_PERMANENT_KERNEL_MAPPING |
BASE_MEM_CACHED_CPU |
BASE_MEM_UNCACHED_GPU;
flags = BASE_MEM_PROT_CPU_RD | BASE_MEM_PROT_GPU_WR | BASEP_MEM_PERMANENT_KERNEL_MAPPING |
BASE_MEM_CACHED_CPU | BASE_MEM_UNCACHED_GPU;
nr_pages = PFN_UP(info->dump_bytes);
reg = kbase_mem_alloc(kctx, nr_pages, nr_pages, 0, &flags, gpu_dump_va,
mmu_sync_info);
reg = kbase_mem_alloc(kctx, nr_pages, nr_pages, 0, &flags, gpu_dump_va, mmu_sync_info);
if (!reg)
return -ENOMEM;
@ -683,9 +623,7 @@ static int kbasep_hwcnt_backend_jm_dump_alloc(
* @kctx: Non-NULL pointer to kbase context.
* @gpu_dump_va: GPU dump buffer virtual address.
*/
static void kbasep_hwcnt_backend_jm_dump_free(
struct kbase_context *kctx,
u64 gpu_dump_va)
static void kbasep_hwcnt_backend_jm_dump_free(struct kbase_context *kctx, u64 gpu_dump_va)
{
WARN_ON(!kctx);
if (gpu_dump_va)
@ -698,8 +636,7 @@ static void kbasep_hwcnt_backend_jm_dump_free(
*
* Can be safely called on a backend in any state of partial construction.
*/
static void kbasep_hwcnt_backend_jm_destroy(
struct kbase_hwcnt_backend_jm *backend)
static void kbasep_hwcnt_backend_jm_destroy(struct kbase_hwcnt_backend_jm *backend)
{
if (!backend)
return;
@ -712,8 +649,7 @@ static void kbasep_hwcnt_backend_jm_destroy(
kbase_phy_alloc_mapping_put(kctx, backend->vmap);
if (backend->gpu_dump_va)
kbasep_hwcnt_backend_jm_dump_free(
kctx, backend->gpu_dump_va);
kbasep_hwcnt_backend_jm_dump_free(kctx, backend->gpu_dump_va);
kbasep_js_release_privileged_ctx(kbdev, kctx);
kbase_destroy_context(kctx);
@ -731,9 +667,8 @@ static void kbasep_hwcnt_backend_jm_destroy(
*
* Return: 0 on success, else error code.
*/
static int kbasep_hwcnt_backend_jm_create(
const struct kbase_hwcnt_backend_jm_info *info,
struct kbase_hwcnt_backend_jm **out_backend)
static int kbasep_hwcnt_backend_jm_create(const struct kbase_hwcnt_backend_jm_info *info,
struct kbase_hwcnt_backend_jm **out_backend)
{
int errcode;
struct kbase_device *kbdev;
@ -749,28 +684,25 @@ static int kbasep_hwcnt_backend_jm_create(
goto alloc_error;
backend->info = info;
kbasep_hwcnt_backend_jm_init_layout(&info->hwcnt_gpu_info,
&backend->phys_layout);
kbasep_hwcnt_backend_jm_init_layout(&info->hwcnt_gpu_info, &backend->phys_layout);
backend->kctx = kbase_create_context(kbdev, true,
BASE_CONTEXT_SYSTEM_MONITOR_SUBMIT_DISABLED, 0, NULL);
BASE_CONTEXT_SYSTEM_MONITOR_SUBMIT_DISABLED, 0, NULL);
if (!backend->kctx)
goto alloc_error;
kbasep_js_schedule_privileged_ctx(kbdev, backend->kctx);
errcode = kbasep_hwcnt_backend_jm_dump_alloc(
info, backend->kctx, &backend->gpu_dump_va);
errcode = kbasep_hwcnt_backend_jm_dump_alloc(info, backend->kctx, &backend->gpu_dump_va);
if (errcode)
goto error;
backend->cpu_dump_va = kbase_phy_alloc_mapping_get(backend->kctx,
backend->gpu_dump_va, &backend->vmap);
backend->cpu_dump_va =
kbase_phy_alloc_mapping_get(backend->kctx, backend->gpu_dump_va, &backend->vmap);
if (!backend->cpu_dump_va || !backend->vmap)
goto alloc_error;
backend->to_user_buf =
kzalloc(info->metadata->dump_buf_bytes, GFP_KERNEL);
backend->to_user_buf = kzalloc(info->metadata->dump_buf_bytes, GFP_KERNEL);
if (!backend->to_user_buf)
goto alloc_error;
@ -798,9 +730,8 @@ kbasep_hwcnt_backend_jm_metadata(const struct kbase_hwcnt_backend_info *info)
}
/* JM backend implementation of kbase_hwcnt_backend_init_fn */
static int kbasep_hwcnt_backend_jm_init(
const struct kbase_hwcnt_backend_info *info,
struct kbase_hwcnt_backend **out_backend)
static int kbasep_hwcnt_backend_jm_init(const struct kbase_hwcnt_backend_info *info,
struct kbase_hwcnt_backend **out_backend)
{
int errcode;
struct kbase_hwcnt_backend_jm *backend = NULL;
@ -808,8 +739,8 @@ static int kbasep_hwcnt_backend_jm_init(
if (!info || !out_backend)
return -EINVAL;
errcode = kbasep_hwcnt_backend_jm_create(
(const struct kbase_hwcnt_backend_jm_info *) info, &backend);
errcode = kbasep_hwcnt_backend_jm_create((const struct kbase_hwcnt_backend_jm_info *)info,
&backend);
if (errcode)
return errcode;
@ -825,8 +756,7 @@ static void kbasep_hwcnt_backend_jm_term(struct kbase_hwcnt_backend *backend)
return;
kbasep_hwcnt_backend_jm_dump_disable(backend);
kbasep_hwcnt_backend_jm_destroy(
(struct kbase_hwcnt_backend_jm *)backend);
kbasep_hwcnt_backend_jm_destroy((struct kbase_hwcnt_backend_jm *)backend);
}
/**
@ -835,8 +765,7 @@ static void kbasep_hwcnt_backend_jm_term(struct kbase_hwcnt_backend *backend)
*
* Can be safely called on a backend info in any state of partial construction.
*/
static void kbasep_hwcnt_backend_jm_info_destroy(
const struct kbase_hwcnt_backend_jm_info *info)
static void kbasep_hwcnt_backend_jm_info_destroy(const struct kbase_hwcnt_backend_jm_info *info)
{
if (!info)
return;
@ -852,9 +781,8 @@ static void kbasep_hwcnt_backend_jm_info_destroy(
*
* Return: 0 on success, else error code.
*/
static int kbasep_hwcnt_backend_jm_info_create(
struct kbase_device *kbdev,
const struct kbase_hwcnt_backend_jm_info **out_info)
static int kbasep_hwcnt_backend_jm_info_create(struct kbase_device *kbdev,
const struct kbase_hwcnt_backend_jm_info **out_info)
{
int errcode = -ENOMEM;
struct kbase_hwcnt_backend_jm_info *info = NULL;
@ -877,15 +805,12 @@ static int kbasep_hwcnt_backend_jm_info_create(
info->counter_set = KBASE_HWCNT_SET_PRIMARY;
#endif
errcode = kbasep_hwcnt_backend_jm_gpu_info_init(kbdev,
&info->hwcnt_gpu_info);
errcode = kbasep_hwcnt_backend_jm_gpu_info_init(kbdev, &info->hwcnt_gpu_info);
if (errcode)
goto error;
errcode = kbase_hwcnt_jm_metadata_create(&info->hwcnt_gpu_info,
info->counter_set,
&info->metadata,
&info->dump_bytes);
errcode = kbase_hwcnt_jm_metadata_create(&info->hwcnt_gpu_info, info->counter_set,
&info->metadata, &info->dump_bytes);
if (errcode)
goto error;
@ -897,9 +822,8 @@ static int kbasep_hwcnt_backend_jm_info_create(
return errcode;
}
int kbase_hwcnt_backend_jm_create(
struct kbase_device *kbdev,
struct kbase_hwcnt_backend_interface *iface)
int kbase_hwcnt_backend_jm_create(struct kbase_device *kbdev,
struct kbase_hwcnt_backend_interface *iface)
{
int errcode;
const struct kbase_hwcnt_backend_jm_info *info = NULL;
@ -928,8 +852,7 @@ int kbase_hwcnt_backend_jm_create(
return 0;
}
void kbase_hwcnt_backend_jm_destroy(
struct kbase_hwcnt_backend_interface *iface)
void kbase_hwcnt_backend_jm_destroy(struct kbase_hwcnt_backend_interface *iface)
{
if (!iface)
return;

View File

@ -1,7 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
/*
*
* (C) COPYRIGHT 2018, 2020-2021 ARM Limited. All rights reserved.
* (C) COPYRIGHT 2018, 2020-2022 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@ -27,7 +27,7 @@
#ifndef _KBASE_HWCNT_BACKEND_JM_H_
#define _KBASE_HWCNT_BACKEND_JM_H_
#include "mali_kbase_hwcnt_backend.h"
#include "hwcnt/backend/mali_kbase_hwcnt_backend.h"
struct kbase_device;
@ -42,9 +42,8 @@ struct kbase_device;
*
* Return: 0 on success, else error code.
*/
int kbase_hwcnt_backend_jm_create(
struct kbase_device *kbdev,
struct kbase_hwcnt_backend_interface *iface);
int kbase_hwcnt_backend_jm_create(struct kbase_device *kbdev,
struct kbase_hwcnt_backend_interface *iface);
/**
* kbase_hwcnt_backend_jm_destroy() - Destroy a JM hardware counter backend
@ -54,7 +53,6 @@ int kbase_hwcnt_backend_jm_create(
* Can be safely called on an all-zeroed interface, or on an already destroyed
* interface.
*/
void kbase_hwcnt_backend_jm_destroy(
struct kbase_hwcnt_backend_interface *iface);
void kbase_hwcnt_backend_jm_destroy(struct kbase_hwcnt_backend_interface *iface);
#endif /* _KBASE_HWCNT_BACKEND_JM_H_ */

View File

@ -21,11 +21,12 @@
#include <mali_kbase.h>
#include <mali_kbase_hwcnt_gpu.h>
#include <mali_kbase_hwcnt_types.h>
#include <hwcnt/mali_kbase_hwcnt_gpu.h>
#include <hwcnt/mali_kbase_hwcnt_types.h>
#include <mali_kbase_hwcnt_backend.h>
#include <mali_kbase_hwcnt_watchdog_if.h>
#include <hwcnt/backend/mali_kbase_hwcnt_backend.h>
#include <hwcnt/backend/mali_kbase_hwcnt_backend_jm_watchdog.h>
#include <hwcnt/mali_kbase_hwcnt_watchdog_if.h>
#if IS_ENABLED(CONFIG_MALI_IS_FPGA) && !IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI)
/* Backend watch dog timer interval in milliseconds: 18 seconds. */
@ -118,8 +119,7 @@ enum backend_watchdog_state {
*/
enum wd_init_state {
HWCNT_JM_WD_INIT_START,
HWCNT_JM_WD_INIT_ALLOC = HWCNT_JM_WD_INIT_START,
HWCNT_JM_WD_INIT_BACKEND,
HWCNT_JM_WD_INIT_BACKEND = HWCNT_JM_WD_INIT_START,
HWCNT_JM_WD_INIT_ENABLE_MAP,
HWCNT_JM_WD_INIT_DUMP_BUFFER,
HWCNT_JM_WD_INIT_END
@ -296,16 +296,10 @@ kbasep_hwcnt_backend_jm_watchdog_term_partial(struct kbase_hwcnt_backend_jm_watc
if (!wd_backend)
return;
/* disable timer thread to avoid concurrent access to shared resources */
wd_backend->info->dump_watchdog_iface->disable(
wd_backend->info->dump_watchdog_iface->timer);
WARN_ON(state > HWCNT_JM_WD_INIT_END);
/*will exit the loop when state reaches HWCNT_JM_WD_INIT_START*/
while (state-- > HWCNT_JM_WD_INIT_START) {
switch (state) {
case HWCNT_JM_WD_INIT_ALLOC:
kfree(wd_backend);
break;
case HWCNT_JM_WD_INIT_BACKEND:
wd_backend->info->jm_backend_iface->term(wd_backend->jm_backend);
break;
@ -319,6 +313,8 @@ kbasep_hwcnt_backend_jm_watchdog_term_partial(struct kbase_hwcnt_backend_jm_watc
break;
}
}
kfree(wd_backend);
}
/* Job manager watchdog backend, implementation of kbase_hwcnt_backend_term_fn
@ -326,11 +322,17 @@ kbasep_hwcnt_backend_jm_watchdog_term_partial(struct kbase_hwcnt_backend_jm_watc
*/
static void kbasep_hwcnt_backend_jm_watchdog_term(struct kbase_hwcnt_backend *backend)
{
struct kbase_hwcnt_backend_jm_watchdog *wd_backend =
(struct kbase_hwcnt_backend_jm_watchdog *)backend;
if (!backend)
return;
kbasep_hwcnt_backend_jm_watchdog_term_partial(
(struct kbase_hwcnt_backend_jm_watchdog *)backend, HWCNT_JM_WD_INIT_END);
/* disable timer thread to avoid concurrent access to shared resources */
wd_backend->info->dump_watchdog_iface->disable(
wd_backend->info->dump_watchdog_iface->timer);
kbasep_hwcnt_backend_jm_watchdog_term_partial(wd_backend, HWCNT_JM_WD_INIT_END);
}
/* Job manager watchdog backend, implementation of kbase_hwcnt_backend_init_fn */
@ -350,20 +352,20 @@ static int kbasep_hwcnt_backend_jm_watchdog_init(const struct kbase_hwcnt_backen
jm_info = wd_info->jm_backend_iface->info;
metadata = wd_info->jm_backend_iface->metadata(wd_info->jm_backend_iface->info);
wd_backend = kmalloc(sizeof(*wd_backend), GFP_KERNEL);
if (!wd_backend) {
*out_backend = NULL;
return -ENOMEM;
}
*wd_backend = (struct kbase_hwcnt_backend_jm_watchdog){
.info = wd_info,
.timeout_ms = hwcnt_backend_watchdog_timer_interval_ms,
.locked = { .state = HWCNT_JM_WD_IDLE_BUFFER_EMPTY, .is_enabled = false }
};
while (state < HWCNT_JM_WD_INIT_END && !errcode) {
switch (state) {
case HWCNT_JM_WD_INIT_ALLOC:
wd_backend = kmalloc(sizeof(*wd_backend), GFP_KERNEL);
if (wd_backend) {
*wd_backend = (struct kbase_hwcnt_backend_jm_watchdog){
.info = wd_info,
.timeout_ms = hwcnt_backend_watchdog_timer_interval_ms,
.locked = { .state = HWCNT_JM_WD_IDLE_BUFFER_EMPTY,
.is_enabled = false }
};
} else
errcode = -ENOMEM;
break;
case HWCNT_JM_WD_INIT_BACKEND:
errcode = wd_info->jm_backend_iface->init(jm_info, &wd_backend->jm_backend);
break;
@ -823,5 +825,5 @@ void kbase_hwcnt_backend_jm_watchdog_destroy(struct kbase_hwcnt_backend_interfac
kfree((struct kbase_hwcnt_backend_jm_watchdog_info *)iface->info);
/*blanking the watchdog backend interface*/
*iface = (struct kbase_hwcnt_backend_interface){ NULL };
memset(iface, 0, sizeof(*iface));
}

View File

@ -1,7 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
/*
*
* (C) COPYRIGHT 2021 ARM Limited. All rights reserved.
* (C) COPYRIGHT 2021-2022 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@ -32,8 +32,8 @@
#ifndef _KBASE_HWCNT_BACKEND_JM_WATCHDOG_H_
#define _KBASE_HWCNT_BACKEND_JM_WATCHDOG_H_
#include <mali_kbase_hwcnt_backend.h>
#include <mali_kbase_hwcnt_watchdog_if.h>
#include <hwcnt/backend/mali_kbase_hwcnt_backend.h>
#include <hwcnt/mali_kbase_hwcnt_watchdog_if.h>
/**
* kbase_hwcnt_backend_jm_watchdog_create() - Create a job manager hardware counter watchdog

View File

@ -23,10 +23,10 @@
* Implementation of hardware counter context and accumulator APIs.
*/
#include "mali_kbase_hwcnt_context.h"
#include "mali_kbase_hwcnt_accumulator.h"
#include "mali_kbase_hwcnt_backend.h"
#include "mali_kbase_hwcnt_types.h"
#include "hwcnt/mali_kbase_hwcnt_context.h"
#include "hwcnt/mali_kbase_hwcnt_accumulator.h"
#include "hwcnt/backend/mali_kbase_hwcnt_backend.h"
#include "hwcnt/mali_kbase_hwcnt_types.h"
#include <linux/mutex.h>
#include <linux/spinlock.h>
@ -39,11 +39,7 @@
* @ACCUM_STATE_ENABLED: Enabled state, where dumping is enabled if there are
* any enabled counters.
*/
enum kbase_hwcnt_accum_state {
ACCUM_STATE_ERROR,
ACCUM_STATE_DISABLED,
ACCUM_STATE_ENABLED
};
enum kbase_hwcnt_accum_state { ACCUM_STATE_ERROR, ACCUM_STATE_DISABLED, ACCUM_STATE_ENABLED };
/**
* struct kbase_hwcnt_accumulator - Hardware counter accumulator structure.
@ -130,9 +126,8 @@ struct kbase_hwcnt_context {
struct workqueue_struct *wq;
};
int kbase_hwcnt_context_init(
const struct kbase_hwcnt_backend_interface *iface,
struct kbase_hwcnt_context **out_hctx)
int kbase_hwcnt_context_init(const struct kbase_hwcnt_backend_interface *iface,
struct kbase_hwcnt_context **out_hctx)
{
struct kbase_hwcnt_context *hctx = NULL;
@ -149,8 +144,7 @@ int kbase_hwcnt_context_init(
mutex_init(&hctx->accum_lock);
hctx->accum_inited = false;
hctx->wq =
alloc_workqueue("mali_kbase_hwcnt", WQ_HIGHPRI | WQ_UNBOUND, 0);
hctx->wq = alloc_workqueue("mali_kbase_hwcnt", WQ_HIGHPRI | WQ_UNBOUND, 0);
if (!hctx->wq)
goto err_alloc_workqueue;
@ -208,35 +202,30 @@ static int kbasep_hwcnt_accumulator_init(struct kbase_hwcnt_context *hctx)
WARN_ON(!hctx);
WARN_ON(!hctx->accum_inited);
errcode = hctx->iface->init(
hctx->iface->info, &hctx->accum.backend);
errcode = hctx->iface->init(hctx->iface->info, &hctx->accum.backend);
if (errcode)
goto error;
hctx->accum.metadata = hctx->iface->metadata(hctx->iface->info);
hctx->accum.state = ACCUM_STATE_ERROR;
errcode = kbase_hwcnt_enable_map_alloc(hctx->accum.metadata,
&hctx->accum.enable_map);
errcode = kbase_hwcnt_enable_map_alloc(hctx->accum.metadata, &hctx->accum.enable_map);
if (errcode)
goto error;
hctx->accum.enable_map_any_enabled = false;
errcode = kbase_hwcnt_dump_buffer_alloc(hctx->accum.metadata,
&hctx->accum.accum_buf);
errcode = kbase_hwcnt_dump_buffer_alloc(hctx->accum.metadata, &hctx->accum.accum_buf);
if (errcode)
goto error;
errcode = kbase_hwcnt_enable_map_alloc(hctx->accum.metadata,
&hctx->accum.scratch_map);
errcode = kbase_hwcnt_enable_map_alloc(hctx->accum.metadata, &hctx->accum.scratch_map);
if (errcode)
goto error;
hctx->accum.accumulated = false;
hctx->accum.ts_last_dump_ns =
hctx->iface->timestamp_ns(hctx->accum.backend);
hctx->accum.ts_last_dump_ns = hctx->iface->timestamp_ns(hctx->accum.backend);
return 0;
@ -252,8 +241,7 @@ static int kbasep_hwcnt_accumulator_init(struct kbase_hwcnt_context *hctx)
* @hctx: Non-NULL pointer to hardware counter context.
* @accumulate: True if we should accumulate before disabling, else false.
*/
static void kbasep_hwcnt_accumulator_disable(
struct kbase_hwcnt_context *hctx, bool accumulate)
static void kbasep_hwcnt_accumulator_disable(struct kbase_hwcnt_context *hctx, bool accumulate)
{
int errcode = 0;
bool backend_enabled = false;
@ -272,8 +260,7 @@ static void kbasep_hwcnt_accumulator_disable(
WARN_ON(hctx->disable_count != 0);
WARN_ON(hctx->accum.state == ACCUM_STATE_DISABLED);
if ((hctx->accum.state == ACCUM_STATE_ENABLED) &&
(accum->enable_map_any_enabled))
if ((hctx->accum.state == ACCUM_STATE_ENABLED) && (accum->enable_map_any_enabled))
backend_enabled = true;
if (!backend_enabled)
@ -297,8 +284,8 @@ static void kbasep_hwcnt_accumulator_disable(
if (errcode)
goto disable;
errcode = hctx->iface->dump_get(accum->backend,
&accum->accum_buf, &accum->enable_map, accum->accumulated);
errcode = hctx->iface->dump_get(accum->backend, &accum->accum_buf, &accum->enable_map,
accum->accumulated);
if (errcode)
goto disable;
@ -336,8 +323,7 @@ static void kbasep_hwcnt_accumulator_enable(struct kbase_hwcnt_context *hctx)
/* The backend only needs enabling if any counters are enabled */
if (accum->enable_map_any_enabled)
errcode = hctx->iface->dump_enable_nolock(
accum->backend, &accum->enable_map);
errcode = hctx->iface->dump_enable_nolock(accum->backend, &accum->enable_map);
if (!errcode)
accum->state = ACCUM_STATE_ENABLED;
@ -364,12 +350,9 @@ static void kbasep_hwcnt_accumulator_enable(struct kbase_hwcnt_context *hctx)
*
* Return: 0 on success, else error code.
*/
static int kbasep_hwcnt_accumulator_dump(
struct kbase_hwcnt_context *hctx,
u64 *ts_start_ns,
u64 *ts_end_ns,
struct kbase_hwcnt_dump_buffer *dump_buf,
const struct kbase_hwcnt_enable_map *new_map)
static int kbasep_hwcnt_accumulator_dump(struct kbase_hwcnt_context *hctx, u64 *ts_start_ns,
u64 *ts_end_ns, struct kbase_hwcnt_dump_buffer *dump_buf,
const struct kbase_hwcnt_enable_map *new_map)
{
int errcode = 0;
unsigned long flags;
@ -398,8 +381,7 @@ static int kbasep_hwcnt_accumulator_dump(
kbase_hwcnt_enable_map_copy(cur_map, &accum->enable_map);
if (new_map)
new_map_any_enabled =
kbase_hwcnt_enable_map_any_enabled(new_map);
new_map_any_enabled = kbase_hwcnt_enable_map_any_enabled(new_map);
/*
* We're holding accum_lock, so the accumulator state might transition
@ -426,8 +408,7 @@ static int kbasep_hwcnt_accumulator_dump(
* then we'll do it ourselves after the dump.
*/
if (new_map) {
kbase_hwcnt_enable_map_copy(
&accum->enable_map, new_map);
kbase_hwcnt_enable_map_copy(&accum->enable_map, new_map);
accum->enable_map_any_enabled = new_map_any_enabled;
}
@ -440,12 +421,10 @@ static int kbasep_hwcnt_accumulator_dump(
/* Initiate the dump if the backend is enabled. */
if ((state == ACCUM_STATE_ENABLED) && cur_map_any_enabled) {
if (dump_buf) {
errcode = hctx->iface->dump_request(
accum->backend, &dump_time_ns);
errcode = hctx->iface->dump_request(accum->backend, &dump_time_ns);
dump_requested = true;
} else {
dump_time_ns = hctx->iface->timestamp_ns(
accum->backend);
dump_time_ns = hctx->iface->timestamp_ns(accum->backend);
errcode = hctx->iface->dump_clear(accum->backend);
}
@ -457,8 +436,7 @@ static int kbasep_hwcnt_accumulator_dump(
/* Copy any accumulation into the dest buffer */
if (accum->accumulated && dump_buf) {
kbase_hwcnt_dump_buffer_copy(
dump_buf, &accum->accum_buf, cur_map);
kbase_hwcnt_dump_buffer_copy(dump_buf, &accum->accum_buf, cur_map);
dump_written = true;
}
@ -483,8 +461,7 @@ static int kbasep_hwcnt_accumulator_dump(
* we're already enabled and holding accum_lock is impossible.
*/
if (new_map_any_enabled) {
errcode = hctx->iface->dump_enable(
accum->backend, new_map);
errcode = hctx->iface->dump_enable(accum->backend, new_map);
if (errcode)
goto error;
}
@ -495,11 +472,8 @@ static int kbasep_hwcnt_accumulator_dump(
/* If we dumped, copy or accumulate it into the destination */
if (dump_requested) {
WARN_ON(state != ACCUM_STATE_ENABLED);
errcode = hctx->iface->dump_get(
accum->backend,
dump_buf,
cur_map,
dump_written);
errcode = hctx->iface->dump_get(accum->backend, dump_buf, cur_map,
dump_written);
if (errcode)
goto error;
dump_written = true;
@ -540,8 +514,7 @@ static int kbasep_hwcnt_accumulator_dump(
* @hctx: Non-NULL pointer to hardware counter context.
* @accumulate: True if we should accumulate before disabling, else false.
*/
static void kbasep_hwcnt_context_disable(
struct kbase_hwcnt_context *hctx, bool accumulate)
static void kbasep_hwcnt_context_disable(struct kbase_hwcnt_context *hctx, bool accumulate)
{
unsigned long flags;
@ -563,9 +536,8 @@ static void kbasep_hwcnt_context_disable(
}
}
int kbase_hwcnt_accumulator_acquire(
struct kbase_hwcnt_context *hctx,
struct kbase_hwcnt_accumulator **accum)
int kbase_hwcnt_accumulator_acquire(struct kbase_hwcnt_context *hctx,
struct kbase_hwcnt_accumulator **accum)
{
int errcode = 0;
unsigned long flags;
@ -618,9 +590,7 @@ int kbase_hwcnt_accumulator_acquire(
* Regardless of initial state, counters don't need to be enabled via
* the backend, as the initial enable map has no enabled counters.
*/
hctx->accum.state = (hctx->disable_count == 0) ?
ACCUM_STATE_ENABLED :
ACCUM_STATE_DISABLED;
hctx->accum.state = (hctx->disable_count == 0) ? ACCUM_STATE_ENABLED : ACCUM_STATE_DISABLED;
spin_unlock_irqrestore(&hctx->state_lock, flags);
@ -728,8 +698,7 @@ void kbase_hwcnt_context_enable(struct kbase_hwcnt_context *hctx)
spin_unlock_irqrestore(&hctx->state_lock, flags);
}
const struct kbase_hwcnt_metadata *kbase_hwcnt_context_metadata(
struct kbase_hwcnt_context *hctx)
const struct kbase_hwcnt_metadata *kbase_hwcnt_context_metadata(struct kbase_hwcnt_context *hctx)
{
if (!hctx)
return NULL;
@ -737,8 +706,7 @@ const struct kbase_hwcnt_metadata *kbase_hwcnt_context_metadata(
return hctx->iface->metadata(hctx->iface->info);
}
bool kbase_hwcnt_context_queue_work(struct kbase_hwcnt_context *hctx,
struct work_struct *work)
bool kbase_hwcnt_context_queue_work(struct kbase_hwcnt_context *hctx, struct work_struct *work)
{
if (WARN_ON(!hctx) || WARN_ON(!work))
return false;
@ -746,12 +714,10 @@ bool kbase_hwcnt_context_queue_work(struct kbase_hwcnt_context *hctx,
return queue_work(hctx->wq, work);
}
int kbase_hwcnt_accumulator_set_counters(
struct kbase_hwcnt_accumulator *accum,
const struct kbase_hwcnt_enable_map *new_map,
u64 *ts_start_ns,
u64 *ts_end_ns,
struct kbase_hwcnt_dump_buffer *dump_buf)
int kbase_hwcnt_accumulator_set_counters(struct kbase_hwcnt_accumulator *accum,
const struct kbase_hwcnt_enable_map *new_map,
u64 *ts_start_ns, u64 *ts_end_ns,
struct kbase_hwcnt_dump_buffer *dump_buf)
{
int errcode;
struct kbase_hwcnt_context *hctx;
@ -767,19 +733,15 @@ int kbase_hwcnt_accumulator_set_counters(
mutex_lock(&hctx->accum_lock);
errcode = kbasep_hwcnt_accumulator_dump(
hctx, ts_start_ns, ts_end_ns, dump_buf, new_map);
errcode = kbasep_hwcnt_accumulator_dump(hctx, ts_start_ns, ts_end_ns, dump_buf, new_map);
mutex_unlock(&hctx->accum_lock);
return errcode;
}
int kbase_hwcnt_accumulator_dump(
struct kbase_hwcnt_accumulator *accum,
u64 *ts_start_ns,
u64 *ts_end_ns,
struct kbase_hwcnt_dump_buffer *dump_buf)
int kbase_hwcnt_accumulator_dump(struct kbase_hwcnt_accumulator *accum, u64 *ts_start_ns,
u64 *ts_end_ns, struct kbase_hwcnt_dump_buffer *dump_buf)
{
int errcode;
struct kbase_hwcnt_context *hctx;
@ -794,8 +756,7 @@ int kbase_hwcnt_accumulator_dump(
mutex_lock(&hctx->accum_lock);
errcode = kbasep_hwcnt_accumulator_dump(
hctx, ts_start_ns, ts_end_ns, dump_buf, NULL);
errcode = kbasep_hwcnt_accumulator_dump(hctx, ts_start_ns, ts_end_ns, dump_buf, NULL);
mutex_unlock(&hctx->accum_lock);

View File

@ -1,7 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
/*
*
* (C) COPYRIGHT 2018, 2020-2021 ARM Limited. All rights reserved.
* (C) COPYRIGHT 2018, 2020-2022 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@ -67,9 +67,8 @@ struct kbase_hwcnt_dump_buffer;
*
* Return: 0 on success or error code.
*/
int kbase_hwcnt_accumulator_acquire(
struct kbase_hwcnt_context *hctx,
struct kbase_hwcnt_accumulator **accum);
int kbase_hwcnt_accumulator_acquire(struct kbase_hwcnt_context *hctx,
struct kbase_hwcnt_accumulator **accum);
/**
* kbase_hwcnt_accumulator_release() - Release a hardware counter accumulator.
@ -102,12 +101,10 @@ void kbase_hwcnt_accumulator_release(struct kbase_hwcnt_accumulator *accum);
*
* Return: 0 on success or error code.
*/
int kbase_hwcnt_accumulator_set_counters(
struct kbase_hwcnt_accumulator *accum,
const struct kbase_hwcnt_enable_map *new_map,
u64 *ts_start_ns,
u64 *ts_end_ns,
struct kbase_hwcnt_dump_buffer *dump_buf);
int kbase_hwcnt_accumulator_set_counters(struct kbase_hwcnt_accumulator *accum,
const struct kbase_hwcnt_enable_map *new_map,
u64 *ts_start_ns, u64 *ts_end_ns,
struct kbase_hwcnt_dump_buffer *dump_buf);
/**
* kbase_hwcnt_accumulator_dump() - Perform a dump of the currently enabled
@ -127,11 +124,8 @@ int kbase_hwcnt_accumulator_set_counters(
*
* Return: 0 on success or error code.
*/
int kbase_hwcnt_accumulator_dump(
struct kbase_hwcnt_accumulator *accum,
u64 *ts_start_ns,
u64 *ts_end_ns,
struct kbase_hwcnt_dump_buffer *dump_buf);
int kbase_hwcnt_accumulator_dump(struct kbase_hwcnt_accumulator *accum, u64 *ts_start_ns,
u64 *ts_end_ns, struct kbase_hwcnt_dump_buffer *dump_buf);
/**
* kbase_hwcnt_accumulator_timestamp_ns() - Get the current accumulator backend

View File

@ -1,7 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
/*
*
* (C) COPYRIGHT 2018, 2020-2021 ARM Limited. All rights reserved.
* (C) COPYRIGHT 2018, 2020-2022 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@ -43,9 +43,8 @@ struct kbase_hwcnt_context;
*
* Return: 0 on success, else error code.
*/
int kbase_hwcnt_context_init(
const struct kbase_hwcnt_backend_interface *iface,
struct kbase_hwcnt_context **out_hctx);
int kbase_hwcnt_context_init(const struct kbase_hwcnt_backend_interface *iface,
struct kbase_hwcnt_context **out_hctx);
/**
* kbase_hwcnt_context_term() - Terminate a hardware counter context.
@ -61,8 +60,7 @@ void kbase_hwcnt_context_term(struct kbase_hwcnt_context *hctx);
*
* Return: Non-NULL pointer to metadata, or NULL on error.
*/
const struct kbase_hwcnt_metadata *kbase_hwcnt_context_metadata(
struct kbase_hwcnt_context *hctx);
const struct kbase_hwcnt_metadata *kbase_hwcnt_context_metadata(struct kbase_hwcnt_context *hctx);
/**
* kbase_hwcnt_context_disable() - Increment the disable count of the context.
@ -145,7 +143,6 @@ void kbase_hwcnt_context_enable(struct kbase_hwcnt_context *hctx);
* this meant progress through the power management states could be stalled
* for however long that higher priority thread took.
*/
bool kbase_hwcnt_context_queue_work(struct kbase_hwcnt_context *hctx,
struct work_struct *work);
bool kbase_hwcnt_context_queue_work(struct kbase_hwcnt_context *hctx, struct work_struct *work);
#endif /* _KBASE_HWCNT_CONTEXT_H_ */

View File

@ -19,8 +19,8 @@
*
*/
#include "mali_kbase_hwcnt_gpu.h"
#include "mali_kbase_hwcnt_types.h"
#include "hwcnt/mali_kbase_hwcnt_gpu.h"
#include "hwcnt/mali_kbase_hwcnt_types.h"
#include <linux/err.h>
@ -32,8 +32,7 @@ enum enable_map_idx {
EM_COUNT,
};
static void kbasep_get_fe_block_type(u64 *dst, enum kbase_hwcnt_set counter_set,
bool is_csf)
static void kbasep_get_fe_block_type(u64 *dst, enum kbase_hwcnt_set counter_set, bool is_csf)
{
switch (counter_set) {
case KBASE_HWCNT_SET_PRIMARY:
@ -56,8 +55,7 @@ static void kbasep_get_fe_block_type(u64 *dst, enum kbase_hwcnt_set counter_set,
}
}
static void kbasep_get_tiler_block_type(u64 *dst,
enum kbase_hwcnt_set counter_set)
static void kbasep_get_tiler_block_type(u64 *dst, enum kbase_hwcnt_set counter_set)
{
switch (counter_set) {
case KBASE_HWCNT_SET_PRIMARY:
@ -72,8 +70,7 @@ static void kbasep_get_tiler_block_type(u64 *dst,
}
}
static void kbasep_get_sc_block_type(u64 *dst, enum kbase_hwcnt_set counter_set,
bool is_csf)
static void kbasep_get_sc_block_type(u64 *dst, enum kbase_hwcnt_set counter_set, bool is_csf)
{
switch (counter_set) {
case KBASE_HWCNT_SET_PRIMARY:
@ -93,8 +90,7 @@ static void kbasep_get_sc_block_type(u64 *dst, enum kbase_hwcnt_set counter_set,
}
}
static void kbasep_get_memsys_block_type(u64 *dst,
enum kbase_hwcnt_set counter_set)
static void kbasep_get_memsys_block_type(u64 *dst, enum kbase_hwcnt_set counter_set)
{
switch (counter_set) {
case KBASE_HWCNT_SET_PRIMARY:
@ -122,15 +118,14 @@ static void kbasep_get_memsys_block_type(u64 *dst,
*
* Return: 0 on success, else error code.
*/
static int kbasep_hwcnt_backend_gpu_metadata_create(
const struct kbase_hwcnt_gpu_info *gpu_info, const bool is_csf,
enum kbase_hwcnt_set counter_set,
const struct kbase_hwcnt_metadata **metadata)
static int kbasep_hwcnt_backend_gpu_metadata_create(const struct kbase_hwcnt_gpu_info *gpu_info,
const bool is_csf,
enum kbase_hwcnt_set counter_set,
const struct kbase_hwcnt_metadata **metadata)
{
struct kbase_hwcnt_description desc;
struct kbase_hwcnt_group_description group;
struct kbase_hwcnt_block_description
blks[KBASE_HWCNT_V5_BLOCK_TYPE_COUNT];
struct kbase_hwcnt_block_description blks[KBASE_HWCNT_V5_BLOCK_TYPE_COUNT];
size_t non_sc_block_count;
size_t sc_block_count;
@ -156,22 +151,19 @@ static int kbasep_hwcnt_backend_gpu_metadata_create(
kbasep_get_fe_block_type(&blks[0].type, counter_set, is_csf);
blks[0].inst_cnt = 1;
blks[0].hdr_cnt = KBASE_HWCNT_V5_HEADERS_PER_BLOCK;
blks[0].ctr_cnt = gpu_info->prfcnt_values_per_block -
KBASE_HWCNT_V5_HEADERS_PER_BLOCK;
blks[0].ctr_cnt = gpu_info->prfcnt_values_per_block - KBASE_HWCNT_V5_HEADERS_PER_BLOCK;
/* One Tiler block */
kbasep_get_tiler_block_type(&blks[1].type, counter_set);
blks[1].inst_cnt = 1;
blks[1].hdr_cnt = KBASE_HWCNT_V5_HEADERS_PER_BLOCK;
blks[1].ctr_cnt = gpu_info->prfcnt_values_per_block -
KBASE_HWCNT_V5_HEADERS_PER_BLOCK;
blks[1].ctr_cnt = gpu_info->prfcnt_values_per_block - KBASE_HWCNT_V5_HEADERS_PER_BLOCK;
/* l2_count memsys blks */
kbasep_get_memsys_block_type(&blks[2].type, counter_set);
blks[2].inst_cnt = gpu_info->l2_count;
blks[2].hdr_cnt = KBASE_HWCNT_V5_HEADERS_PER_BLOCK;
blks[2].ctr_cnt = gpu_info->prfcnt_values_per_block -
KBASE_HWCNT_V5_HEADERS_PER_BLOCK;
blks[2].ctr_cnt = gpu_info->prfcnt_values_per_block - KBASE_HWCNT_V5_HEADERS_PER_BLOCK;
/*
* There are as many shader cores in the system as there are bits set in
@ -192,8 +184,7 @@ static int kbasep_hwcnt_backend_gpu_metadata_create(
kbasep_get_sc_block_type(&blks[3].type, counter_set, is_csf);
blks[3].inst_cnt = sc_block_count;
blks[3].hdr_cnt = KBASE_HWCNT_V5_HEADERS_PER_BLOCK;
blks[3].ctr_cnt = gpu_info->prfcnt_values_per_block -
KBASE_HWCNT_V5_HEADERS_PER_BLOCK;
blks[3].ctr_cnt = gpu_info->prfcnt_values_per_block - KBASE_HWCNT_V5_HEADERS_PER_BLOCK;
WARN_ON(KBASE_HWCNT_V5_BLOCK_TYPE_COUNT != 4);
@ -220,8 +211,7 @@ static int kbasep_hwcnt_backend_gpu_metadata_create(
*
* Return: Size of buffer the GPU needs to perform a counter dump.
*/
static size_t
kbasep_hwcnt_backend_jm_dump_bytes(const struct kbase_hwcnt_gpu_info *gpu_info)
static size_t kbasep_hwcnt_backend_jm_dump_bytes(const struct kbase_hwcnt_gpu_info *gpu_info)
{
WARN_ON(!gpu_info);
@ -229,11 +219,10 @@ kbasep_hwcnt_backend_jm_dump_bytes(const struct kbase_hwcnt_gpu_info *gpu_info)
gpu_info->prfcnt_values_per_block * KBASE_HWCNT_VALUE_HW_BYTES;
}
int kbase_hwcnt_jm_metadata_create(
const struct kbase_hwcnt_gpu_info *gpu_info,
enum kbase_hwcnt_set counter_set,
const struct kbase_hwcnt_metadata **out_metadata,
size_t *out_dump_bytes)
int kbase_hwcnt_jm_metadata_create(const struct kbase_hwcnt_gpu_info *gpu_info,
enum kbase_hwcnt_set counter_set,
const struct kbase_hwcnt_metadata **out_metadata,
size_t *out_dump_bytes)
{
int errcode;
const struct kbase_hwcnt_metadata *metadata;
@ -250,8 +239,7 @@ int kbase_hwcnt_jm_metadata_create(
* all the available L2 cache and Shader cores are allocated.
*/
dump_bytes = kbasep_hwcnt_backend_jm_dump_bytes(gpu_info);
errcode = kbasep_hwcnt_backend_gpu_metadata_create(
gpu_info, false, counter_set, &metadata);
errcode = kbasep_hwcnt_backend_gpu_metadata_create(gpu_info, false, counter_set, &metadata);
if (errcode)
return errcode;
@ -276,10 +264,9 @@ void kbase_hwcnt_jm_metadata_destroy(const struct kbase_hwcnt_metadata *metadata
kbase_hwcnt_metadata_destroy(metadata);
}
int kbase_hwcnt_csf_metadata_create(
const struct kbase_hwcnt_gpu_info *gpu_info,
enum kbase_hwcnt_set counter_set,
const struct kbase_hwcnt_metadata **out_metadata)
int kbase_hwcnt_csf_metadata_create(const struct kbase_hwcnt_gpu_info *gpu_info,
enum kbase_hwcnt_set counter_set,
const struct kbase_hwcnt_metadata **out_metadata)
{
int errcode;
const struct kbase_hwcnt_metadata *metadata;
@ -287,8 +274,7 @@ int kbase_hwcnt_csf_metadata_create(
if (!gpu_info || !out_metadata)
return -EINVAL;
errcode = kbasep_hwcnt_backend_gpu_metadata_create(
gpu_info, true, counter_set, &metadata);
errcode = kbasep_hwcnt_backend_gpu_metadata_create(gpu_info, true, counter_set, &metadata);
if (errcode)
return errcode;
@ -297,8 +283,7 @@ int kbase_hwcnt_csf_metadata_create(
return 0;
}
void kbase_hwcnt_csf_metadata_destroy(
const struct kbase_hwcnt_metadata *metadata)
void kbase_hwcnt_csf_metadata_destroy(const struct kbase_hwcnt_metadata *metadata)
{
if (!metadata)
return;
@ -306,10 +291,7 @@ void kbase_hwcnt_csf_metadata_destroy(
kbase_hwcnt_metadata_destroy(metadata);
}
static bool is_block_type_shader(
const u64 grp_type,
const u64 blk_type,
const size_t blk)
static bool is_block_type_shader(const u64 grp_type, const u64 blk_type, const size_t blk)
{
bool is_shader = false;
@ -326,9 +308,7 @@ static bool is_block_type_shader(
return is_shader;
}
static bool is_block_type_l2_cache(
const u64 grp_type,
const u64 blk_type)
static bool is_block_type_l2_cache(const u64 grp_type, const u64 blk_type)
{
bool is_l2_cache = false;
@ -348,10 +328,8 @@ static bool is_block_type_l2_cache(
}
int kbase_hwcnt_jm_dump_get(struct kbase_hwcnt_dump_buffer *dst, u64 *src,
const struct kbase_hwcnt_enable_map *dst_enable_map,
u64 pm_core_mask,
const struct kbase_hwcnt_curr_config *curr_config,
bool accumulate)
const struct kbase_hwcnt_enable_map *dst_enable_map, u64 pm_core_mask,
const struct kbase_hwcnt_curr_config *curr_config, bool accumulate)
{
const struct kbase_hwcnt_metadata *metadata;
size_t grp, blk, blk_inst;
@ -362,28 +340,21 @@ int kbase_hwcnt_jm_dump_get(struct kbase_hwcnt_dump_buffer *dst, u64 *src,
/* Variables to deal with the current configuration */
int l2_count = 0;
if (!dst || !src || !dst_enable_map ||
(dst_enable_map->metadata != dst->metadata))
if (!dst || !src || !dst_enable_map || (dst_enable_map->metadata != dst->metadata))
return -EINVAL;
metadata = dst->metadata;
kbase_hwcnt_metadata_for_each_block(
metadata, grp, blk, blk_inst) {
const size_t hdr_cnt =
kbase_hwcnt_metadata_block_headers_count(
metadata, grp, blk);
kbase_hwcnt_metadata_for_each_block(metadata, grp, blk, blk_inst)
{
const size_t hdr_cnt = kbase_hwcnt_metadata_block_headers_count(metadata, grp, blk);
const size_t ctr_cnt =
kbase_hwcnt_metadata_block_counters_count(
metadata, grp, blk);
const u64 blk_type = kbase_hwcnt_metadata_block_type(
metadata, grp, blk);
kbase_hwcnt_metadata_block_counters_count(metadata, grp, blk);
const u64 blk_type = kbase_hwcnt_metadata_block_type(metadata, grp, blk);
const bool is_shader_core = is_block_type_shader(
kbase_hwcnt_metadata_group_type(metadata, grp),
blk_type, blk);
kbase_hwcnt_metadata_group_type(metadata, grp), blk_type, blk);
const bool is_l2_cache = is_block_type_l2_cache(
kbase_hwcnt_metadata_group_type(metadata, grp),
blk_type);
kbase_hwcnt_metadata_group_type(metadata, grp), blk_type);
const bool is_undefined = kbase_hwcnt_is_block_type_undefined(
kbase_hwcnt_metadata_group_type(metadata, grp), blk_type);
bool hw_res_available = true;
@ -412,10 +383,9 @@ int kbase_hwcnt_jm_dump_get(struct kbase_hwcnt_dump_buffer *dst, u64 *src,
/*
* Skip block if no values in the destination block are enabled.
*/
if (kbase_hwcnt_enable_map_block_enabled(
dst_enable_map, grp, blk, blk_inst)) {
u64 *dst_blk = kbase_hwcnt_dump_buffer_block_instance(
dst, grp, blk, blk_inst);
if (kbase_hwcnt_enable_map_block_enabled(dst_enable_map, grp, blk, blk_inst)) {
u64 *dst_blk =
kbase_hwcnt_dump_buffer_block_instance(dst, grp, blk, blk_inst);
const u64 *src_blk = dump_src + src_offset;
bool blk_powered;
@ -435,13 +405,11 @@ int kbase_hwcnt_jm_dump_get(struct kbase_hwcnt_dump_buffer *dst, u64 *src,
if (blk_powered && !is_undefined && hw_res_available) {
/* Only powered and defined blocks have valid data. */
if (accumulate) {
kbase_hwcnt_dump_buffer_block_accumulate(
dst_blk, src_blk, hdr_cnt,
ctr_cnt);
kbase_hwcnt_dump_buffer_block_accumulate(dst_blk, src_blk,
hdr_cnt, ctr_cnt);
} else {
kbase_hwcnt_dump_buffer_block_copy(
dst_blk, src_blk,
(hdr_cnt + ctr_cnt));
kbase_hwcnt_dump_buffer_block_copy(dst_blk, src_blk,
(hdr_cnt + ctr_cnt));
}
} else {
/* Even though the block might be undefined, the
@ -469,26 +437,23 @@ int kbase_hwcnt_jm_dump_get(struct kbase_hwcnt_dump_buffer *dst, u64 *src,
}
int kbase_hwcnt_csf_dump_get(struct kbase_hwcnt_dump_buffer *dst, u64 *src,
const struct kbase_hwcnt_enable_map *dst_enable_map,
bool accumulate)
const struct kbase_hwcnt_enable_map *dst_enable_map, bool accumulate)
{
const struct kbase_hwcnt_metadata *metadata;
const u64 *dump_src = src;
size_t src_offset = 0;
size_t grp, blk, blk_inst;
if (!dst || !src || !dst_enable_map ||
(dst_enable_map->metadata != dst->metadata))
if (!dst || !src || !dst_enable_map || (dst_enable_map->metadata != dst->metadata))
return -EINVAL;
metadata = dst->metadata;
kbase_hwcnt_metadata_for_each_block(metadata, grp, blk, blk_inst) {
const size_t hdr_cnt = kbase_hwcnt_metadata_block_headers_count(
metadata, grp, blk);
kbase_hwcnt_metadata_for_each_block(metadata, grp, blk, blk_inst)
{
const size_t hdr_cnt = kbase_hwcnt_metadata_block_headers_count(metadata, grp, blk);
const size_t ctr_cnt =
kbase_hwcnt_metadata_block_counters_count(metadata, grp,
blk);
kbase_hwcnt_metadata_block_counters_count(metadata, grp, blk);
const uint64_t blk_type = kbase_hwcnt_metadata_block_type(metadata, grp, blk);
const bool is_undefined = kbase_hwcnt_is_block_type_undefined(
kbase_hwcnt_metadata_group_type(metadata, grp), blk_type);
@ -496,10 +461,9 @@ int kbase_hwcnt_csf_dump_get(struct kbase_hwcnt_dump_buffer *dst, u64 *src,
/*
* Skip block if no values in the destination block are enabled.
*/
if (kbase_hwcnt_enable_map_block_enabled(dst_enable_map, grp,
blk, blk_inst)) {
u64 *dst_blk = kbase_hwcnt_dump_buffer_block_instance(
dst, grp, blk, blk_inst);
if (kbase_hwcnt_enable_map_block_enabled(dst_enable_map, grp, blk, blk_inst)) {
u64 *dst_blk =
kbase_hwcnt_dump_buffer_block_instance(dst, grp, blk, blk_inst);
const u64 *src_blk = dump_src + src_offset;
if (!is_undefined) {
@ -542,12 +506,9 @@ int kbase_hwcnt_csf_dump_get(struct kbase_hwcnt_dump_buffer *dst, u64 *src,
* @hi: Non-NULL pointer to where high 64 bits of block enable map abstraction
* will be stored.
*/
static inline void kbasep_hwcnt_backend_gpu_block_map_from_physical(
u32 phys,
u64 *lo,
u64 *hi)
static inline void kbasep_hwcnt_backend_gpu_block_map_from_physical(u32 phys, u64 *lo, u64 *hi)
{
u64 dwords[2] = {0, 0};
u64 dwords[2] = { 0, 0 };
size_t dword_idx;
@ -572,9 +533,8 @@ static inline void kbasep_hwcnt_backend_gpu_block_map_from_physical(
*hi = dwords[1];
}
void kbase_hwcnt_gpu_enable_map_to_physical(
struct kbase_hwcnt_physical_enable_map *dst,
const struct kbase_hwcnt_enable_map *src)
void kbase_hwcnt_gpu_enable_map_to_physical(struct kbase_hwcnt_physical_enable_map *dst,
const struct kbase_hwcnt_enable_map *src)
{
const struct kbase_hwcnt_metadata *metadata;
u64 fe_bm[EM_COUNT] = { 0 };
@ -588,17 +548,13 @@ void kbase_hwcnt_gpu_enable_map_to_physical(
metadata = src->metadata;
kbase_hwcnt_metadata_for_each_block(
metadata, grp, blk, blk_inst) {
const u64 grp_type = kbase_hwcnt_metadata_group_type(
metadata, grp);
const u64 blk_type = kbase_hwcnt_metadata_block_type(
metadata, grp, blk);
const u64 *blk_map = kbase_hwcnt_enable_map_block_instance(
src, grp, blk, blk_inst);
kbase_hwcnt_metadata_for_each_block(metadata, grp, blk, blk_inst)
{
const u64 grp_type = kbase_hwcnt_metadata_group_type(metadata, grp);
const u64 blk_type = kbase_hwcnt_metadata_block_type(metadata, grp, blk);
const u64 *blk_map = kbase_hwcnt_enable_map_block_instance(src, grp, blk, blk_inst);
if ((enum kbase_hwcnt_gpu_group_type)grp_type ==
KBASE_HWCNT_GPU_GROUP_TYPE_V5) {
if ((enum kbase_hwcnt_gpu_group_type)grp_type == KBASE_HWCNT_GPU_GROUP_TYPE_V5) {
const size_t map_stride =
kbase_hwcnt_metadata_block_enable_map_stride(metadata, grp, blk);
size_t map_idx;
@ -649,8 +605,7 @@ void kbase_hwcnt_gpu_enable_map_to_physical(
kbase_hwcnt_backend_gpu_block_map_to_physical(mmu_l2_bm[EM_LO], mmu_l2_bm[EM_HI]);
}
void kbase_hwcnt_gpu_set_to_physical(enum kbase_hwcnt_physical_set *dst,
enum kbase_hwcnt_set src)
void kbase_hwcnt_gpu_set_to_physical(enum kbase_hwcnt_physical_set *dst, enum kbase_hwcnt_set src)
{
switch (src) {
case KBASE_HWCNT_SET_PRIMARY:
@ -667,9 +622,8 @@ void kbase_hwcnt_gpu_set_to_physical(enum kbase_hwcnt_physical_set *dst,
}
}
void kbase_hwcnt_gpu_enable_map_from_physical(
struct kbase_hwcnt_enable_map *dst,
const struct kbase_hwcnt_physical_enable_map *src)
void kbase_hwcnt_gpu_enable_map_from_physical(struct kbase_hwcnt_enable_map *dst,
const struct kbase_hwcnt_physical_enable_map *src)
{
const struct kbase_hwcnt_metadata *metadata;
@ -692,16 +646,13 @@ void kbase_hwcnt_gpu_enable_map_from_physical(
kbasep_hwcnt_backend_gpu_block_map_from_physical(src->mmu_l2_bm, &mmu_l2_bm[EM_LO],
&mmu_l2_bm[EM_HI]);
kbase_hwcnt_metadata_for_each_block(metadata, grp, blk, blk_inst) {
const u64 grp_type = kbase_hwcnt_metadata_group_type(
metadata, grp);
const u64 blk_type = kbase_hwcnt_metadata_block_type(
metadata, grp, blk);
u64 *blk_map = kbase_hwcnt_enable_map_block_instance(
dst, grp, blk, blk_inst);
kbase_hwcnt_metadata_for_each_block(metadata, grp, blk, blk_inst)
{
const u64 grp_type = kbase_hwcnt_metadata_group_type(metadata, grp);
const u64 blk_type = kbase_hwcnt_metadata_block_type(metadata, grp, blk);
u64 *blk_map = kbase_hwcnt_enable_map_block_instance(dst, grp, blk, blk_inst);
if ((enum kbase_hwcnt_gpu_group_type)grp_type ==
KBASE_HWCNT_GPU_GROUP_TYPE_V5) {
if ((enum kbase_hwcnt_gpu_group_type)grp_type == KBASE_HWCNT_GPU_GROUP_TYPE_V5) {
const size_t map_stride =
kbase_hwcnt_metadata_block_enable_map_stride(metadata, grp, blk);
size_t map_idx;
@ -744,29 +695,25 @@ void kbase_hwcnt_gpu_enable_map_from_physical(
}
}
void kbase_hwcnt_gpu_patch_dump_headers(
struct kbase_hwcnt_dump_buffer *buf,
const struct kbase_hwcnt_enable_map *enable_map)
void kbase_hwcnt_gpu_patch_dump_headers(struct kbase_hwcnt_dump_buffer *buf,
const struct kbase_hwcnt_enable_map *enable_map)
{
const struct kbase_hwcnt_metadata *metadata;
size_t grp, blk, blk_inst;
if (WARN_ON(!buf) || WARN_ON(!enable_map) ||
WARN_ON(buf->metadata != enable_map->metadata))
if (WARN_ON(!buf) || WARN_ON(!enable_map) || WARN_ON(buf->metadata != enable_map->metadata))
return;
metadata = buf->metadata;
kbase_hwcnt_metadata_for_each_block(metadata, grp, blk, blk_inst) {
const u64 grp_type =
kbase_hwcnt_metadata_group_type(metadata, grp);
u64 *buf_blk = kbase_hwcnt_dump_buffer_block_instance(
buf, grp, blk, blk_inst);
const u64 *blk_map = kbase_hwcnt_enable_map_block_instance(
enable_map, grp, blk, blk_inst);
kbase_hwcnt_metadata_for_each_block(metadata, grp, blk, blk_inst)
{
const u64 grp_type = kbase_hwcnt_metadata_group_type(metadata, grp);
u64 *buf_blk = kbase_hwcnt_dump_buffer_block_instance(buf, grp, blk, blk_inst);
const u64 *blk_map =
kbase_hwcnt_enable_map_block_instance(enable_map, grp, blk, blk_inst);
if ((enum kbase_hwcnt_gpu_group_type)grp_type ==
KBASE_HWCNT_GPU_GROUP_TYPE_V5) {
if ((enum kbase_hwcnt_gpu_group_type)grp_type == KBASE_HWCNT_GPU_GROUP_TYPE_V5) {
const size_t map_stride =
kbase_hwcnt_metadata_block_enable_map_stride(metadata, grp, blk);
u64 prfcnt_bm[EM_COUNT] = { 0 };

View File

@ -34,9 +34,8 @@ struct kbase_hwcnt_dump_buffer;
#define KBASE_HWCNT_V5_BLOCK_TYPE_COUNT 4
#define KBASE_HWCNT_V5_HEADERS_PER_BLOCK 4
#define KBASE_HWCNT_V5_DEFAULT_COUNTERS_PER_BLOCK 60
#define KBASE_HWCNT_V5_DEFAULT_VALUES_PER_BLOCK \
(KBASE_HWCNT_V5_HEADERS_PER_BLOCK + \
KBASE_HWCNT_V5_DEFAULT_COUNTERS_PER_BLOCK)
#define KBASE_HWCNT_V5_DEFAULT_VALUES_PER_BLOCK \
(KBASE_HWCNT_V5_HEADERS_PER_BLOCK + KBASE_HWCNT_V5_DEFAULT_COUNTERS_PER_BLOCK)
/* FrontEnd block count in V5 GPU hardware counter. */
#define KBASE_HWCNT_V5_FE_BLOCK_COUNT 1
@ -228,19 +227,17 @@ static inline bool kbase_hwcnt_is_block_type_undefined(const uint64_t grp_type,
*
* Return: 0 on success, else error code.
*/
int kbase_hwcnt_jm_metadata_create(
const struct kbase_hwcnt_gpu_info *info,
enum kbase_hwcnt_set counter_set,
const struct kbase_hwcnt_metadata **out_metadata,
size_t *out_dump_bytes);
int kbase_hwcnt_jm_metadata_create(const struct kbase_hwcnt_gpu_info *info,
enum kbase_hwcnt_set counter_set,
const struct kbase_hwcnt_metadata **out_metadata,
size_t *out_dump_bytes);
/**
* kbase_hwcnt_jm_metadata_destroy() - Destroy JM GPU hardware counter metadata.
*
* @metadata: Pointer to metadata to destroy.
*/
void kbase_hwcnt_jm_metadata_destroy(
const struct kbase_hwcnt_metadata *metadata);
void kbase_hwcnt_jm_metadata_destroy(const struct kbase_hwcnt_metadata *metadata);
/**
* kbase_hwcnt_csf_metadata_create() - Create hardware counter metadata for the
@ -252,18 +249,16 @@ void kbase_hwcnt_jm_metadata_destroy(
*
* Return: 0 on success, else error code.
*/
int kbase_hwcnt_csf_metadata_create(
const struct kbase_hwcnt_gpu_info *info,
enum kbase_hwcnt_set counter_set,
const struct kbase_hwcnt_metadata **out_metadata);
int kbase_hwcnt_csf_metadata_create(const struct kbase_hwcnt_gpu_info *info,
enum kbase_hwcnt_set counter_set,
const struct kbase_hwcnt_metadata **out_metadata);
/**
* kbase_hwcnt_csf_metadata_destroy() - Destroy CSF GPU hardware counter
* metadata.
* @metadata: Pointer to metadata to destroy.
*/
void kbase_hwcnt_csf_metadata_destroy(
const struct kbase_hwcnt_metadata *metadata);
void kbase_hwcnt_csf_metadata_destroy(const struct kbase_hwcnt_metadata *metadata);
/**
* kbase_hwcnt_jm_dump_get() - Copy or accumulate enabled counters from the raw
@ -289,8 +284,7 @@ void kbase_hwcnt_csf_metadata_destroy(
int kbase_hwcnt_jm_dump_get(struct kbase_hwcnt_dump_buffer *dst, u64 *src,
const struct kbase_hwcnt_enable_map *dst_enable_map,
const u64 pm_core_mask,
const struct kbase_hwcnt_curr_config *curr_config,
bool accumulate);
const struct kbase_hwcnt_curr_config *curr_config, bool accumulate);
/**
* kbase_hwcnt_csf_dump_get() - Copy or accumulate enabled counters from the raw
@ -310,8 +304,7 @@ int kbase_hwcnt_jm_dump_get(struct kbase_hwcnt_dump_buffer *dst, u64 *src,
* Return: 0 on success, else error code.
*/
int kbase_hwcnt_csf_dump_get(struct kbase_hwcnt_dump_buffer *dst, u64 *src,
const struct kbase_hwcnt_enable_map *dst_enable_map,
bool accumulate);
const struct kbase_hwcnt_enable_map *dst_enable_map, bool accumulate);
/**
* kbase_hwcnt_backend_gpu_block_map_to_physical() - Convert from a block
@ -365,9 +358,8 @@ static inline u32 kbase_hwcnt_backend_gpu_block_map_to_physical(u64 lo, u64 hi)
* individual counter block value, but the physical enable map uses 1 bit for
* every 4 counters, shared over all instances of a block.
*/
void kbase_hwcnt_gpu_enable_map_to_physical(
struct kbase_hwcnt_physical_enable_map *dst,
const struct kbase_hwcnt_enable_map *src);
void kbase_hwcnt_gpu_enable_map_to_physical(struct kbase_hwcnt_physical_enable_map *dst,
const struct kbase_hwcnt_enable_map *src);
/**
* kbase_hwcnt_gpu_set_to_physical() - Map counter set selection to physical
@ -376,8 +368,7 @@ void kbase_hwcnt_gpu_enable_map_to_physical(
* @dst: Non-NULL pointer to destination physical SET_SELECT value.
* @src: Non-NULL pointer to source counter set selection.
*/
void kbase_hwcnt_gpu_set_to_physical(enum kbase_hwcnt_physical_set *dst,
enum kbase_hwcnt_set src);
void kbase_hwcnt_gpu_set_to_physical(enum kbase_hwcnt_physical_set *dst, enum kbase_hwcnt_set src);
/**
* kbase_hwcnt_gpu_enable_map_from_physical() - Convert a physical enable map to
@ -393,9 +384,8 @@ void kbase_hwcnt_gpu_set_to_physical(enum kbase_hwcnt_physical_set *dst,
* more than 64, so the enable map abstraction has nowhere to store the enable
* information for the 64 non-existent counters.
*/
void kbase_hwcnt_gpu_enable_map_from_physical(
struct kbase_hwcnt_enable_map *dst,
const struct kbase_hwcnt_physical_enable_map *src);
void kbase_hwcnt_gpu_enable_map_from_physical(struct kbase_hwcnt_enable_map *dst,
const struct kbase_hwcnt_physical_enable_map *src);
/**
* kbase_hwcnt_gpu_patch_dump_headers() - Patch all the performance counter
@ -411,8 +401,7 @@ void kbase_hwcnt_gpu_enable_map_from_physical(
* kernel-user boundary, to ensure the header is accurate for the enable map
* used by the user.
*/
void kbase_hwcnt_gpu_patch_dump_headers(
struct kbase_hwcnt_dump_buffer *buf,
const struct kbase_hwcnt_enable_map *enable_map);
void kbase_hwcnt_gpu_patch_dump_headers(struct kbase_hwcnt_dump_buffer *buf,
const struct kbase_hwcnt_enable_map *enable_map);
#endif /* _KBASE_HWCNT_GPU_H_ */

View File

@ -19,21 +19,19 @@
*
*/
#include "mali_kbase_hwcnt_gpu.h"
#include "mali_kbase_hwcnt_gpu_narrow.h"
#include "hwcnt/mali_kbase_hwcnt_gpu.h"
#include "hwcnt/mali_kbase_hwcnt_gpu_narrow.h"
#include <linux/bug.h>
#include <linux/err.h>
#include <linux/slab.h>
int kbase_hwcnt_gpu_metadata_narrow_create(
const struct kbase_hwcnt_metadata_narrow **dst_md_narrow,
const struct kbase_hwcnt_metadata *src_md)
int kbase_hwcnt_gpu_metadata_narrow_create(const struct kbase_hwcnt_metadata_narrow **dst_md_narrow,
const struct kbase_hwcnt_metadata *src_md)
{
struct kbase_hwcnt_description desc;
struct kbase_hwcnt_group_description group;
struct kbase_hwcnt_block_description
blks[KBASE_HWCNT_V5_BLOCK_TYPE_COUNT];
struct kbase_hwcnt_block_description blks[KBASE_HWCNT_V5_BLOCK_TYPE_COUNT];
size_t prfcnt_values_per_block;
size_t blk;
int err;
@ -47,18 +45,15 @@ int kbase_hwcnt_gpu_metadata_narrow_create(
* count in the metadata.
*/
if ((kbase_hwcnt_metadata_group_count(src_md) != 1) ||
(kbase_hwcnt_metadata_block_count(src_md, 0) !=
KBASE_HWCNT_V5_BLOCK_TYPE_COUNT))
(kbase_hwcnt_metadata_block_count(src_md, 0) != KBASE_HWCNT_V5_BLOCK_TYPE_COUNT))
return -EINVAL;
/* Get the values count in the first block. */
prfcnt_values_per_block =
kbase_hwcnt_metadata_block_values_count(src_md, 0, 0);
prfcnt_values_per_block = kbase_hwcnt_metadata_block_values_count(src_md, 0, 0);
/* check all blocks should have same values count. */
for (blk = 1; blk < KBASE_HWCNT_V5_BLOCK_TYPE_COUNT; blk++) {
size_t val_cnt =
kbase_hwcnt_metadata_block_values_count(src_md, 0, blk);
size_t val_cnt = kbase_hwcnt_metadata_block_values_count(src_md, 0, blk);
if (val_cnt != prfcnt_values_per_block)
return -EINVAL;
}
@ -75,12 +70,10 @@ int kbase_hwcnt_gpu_metadata_narrow_create(
prfcnt_values_per_block = 64;
for (blk = 0; blk < KBASE_HWCNT_V5_BLOCK_TYPE_COUNT; blk++) {
size_t blk_hdr_cnt = kbase_hwcnt_metadata_block_headers_count(
src_md, 0, blk);
size_t blk_hdr_cnt = kbase_hwcnt_metadata_block_headers_count(src_md, 0, blk);
blks[blk] = (struct kbase_hwcnt_block_description){
.type = kbase_hwcnt_metadata_block_type(src_md, 0, blk),
.inst_cnt = kbase_hwcnt_metadata_block_instance_count(
src_md, 0, blk),
.inst_cnt = kbase_hwcnt_metadata_block_instance_count(src_md, 0, blk),
.hdr_cnt = blk_hdr_cnt,
.ctr_cnt = prfcnt_values_per_block - blk_hdr_cnt,
};
@ -105,8 +98,7 @@ int kbase_hwcnt_gpu_metadata_narrow_create(
* only supports 32-bit but the created metadata uses 64-bit for
* block entry.
*/
metadata_narrow->dump_buf_bytes =
metadata_narrow->metadata->dump_buf_bytes >> 1;
metadata_narrow->dump_buf_bytes = metadata_narrow->metadata->dump_buf_bytes >> 1;
*dst_md_narrow = metadata_narrow;
} else {
kfree(metadata_narrow);
@ -115,8 +107,7 @@ int kbase_hwcnt_gpu_metadata_narrow_create(
return err;
}
void kbase_hwcnt_gpu_metadata_narrow_destroy(
const struct kbase_hwcnt_metadata_narrow *md_narrow)
void kbase_hwcnt_gpu_metadata_narrow_destroy(const struct kbase_hwcnt_metadata_narrow *md_narrow)
{
if (!md_narrow)
return;
@ -125,9 +116,8 @@ void kbase_hwcnt_gpu_metadata_narrow_destroy(
kfree(md_narrow);
}
int kbase_hwcnt_dump_buffer_narrow_alloc(
const struct kbase_hwcnt_metadata_narrow *md_narrow,
struct kbase_hwcnt_dump_buffer_narrow *dump_buf)
int kbase_hwcnt_dump_buffer_narrow_alloc(const struct kbase_hwcnt_metadata_narrow *md_narrow,
struct kbase_hwcnt_dump_buffer_narrow *dump_buf)
{
size_t dump_buf_bytes;
size_t clk_cnt_buf_bytes;
@ -137,8 +127,7 @@ int kbase_hwcnt_dump_buffer_narrow_alloc(
return -EINVAL;
dump_buf_bytes = md_narrow->dump_buf_bytes;
clk_cnt_buf_bytes =
sizeof(*dump_buf->clk_cnt_buf) * md_narrow->metadata->clk_cnt;
clk_cnt_buf_bytes = sizeof(*dump_buf->clk_cnt_buf) * md_narrow->metadata->clk_cnt;
/* Make a single allocation for both dump_buf and clk_cnt_buf. */
buf = kmalloc(dump_buf_bytes + clk_cnt_buf_bytes, GFP_KERNEL);
@ -154,14 +143,15 @@ int kbase_hwcnt_dump_buffer_narrow_alloc(
return 0;
}
void kbase_hwcnt_dump_buffer_narrow_free(
struct kbase_hwcnt_dump_buffer_narrow *dump_buf_narrow)
void kbase_hwcnt_dump_buffer_narrow_free(struct kbase_hwcnt_dump_buffer_narrow *dump_buf_narrow)
{
if (!dump_buf_narrow)
return;
kfree(dump_buf_narrow->dump_buf);
*dump_buf_narrow = (struct kbase_hwcnt_dump_buffer_narrow){ NULL };
*dump_buf_narrow = (struct kbase_hwcnt_dump_buffer_narrow){ .md_narrow = NULL,
.dump_buf = NULL,
.clk_cnt_buf = NULL };
}
int kbase_hwcnt_dump_buffer_narrow_array_alloc(
@ -180,8 +170,7 @@ int kbase_hwcnt_dump_buffer_narrow_array_alloc(
return -EINVAL;
dump_buf_bytes = md_narrow->dump_buf_bytes;
clk_cnt_buf_bytes = sizeof(*dump_bufs->bufs->clk_cnt_buf) *
md_narrow->metadata->clk_cnt;
clk_cnt_buf_bytes = sizeof(*dump_bufs->bufs->clk_cnt_buf) * md_narrow->metadata->clk_cnt;
/* Allocate memory for the dump buffer struct array */
buffers = kmalloc_array(n, sizeof(*buffers), GFP_KERNEL);
@ -234,27 +223,22 @@ void kbase_hwcnt_dump_buffer_narrow_array_free(
memset(dump_bufs, 0, sizeof(*dump_bufs));
}
void kbase_hwcnt_dump_buffer_block_copy_strict_narrow(u32 *dst_blk,
const u64 *src_blk,
const u64 *blk_em,
size_t val_cnt)
void kbase_hwcnt_dump_buffer_block_copy_strict_narrow(u32 *dst_blk, const u64 *src_blk,
const u64 *blk_em, size_t val_cnt)
{
size_t val;
for (val = 0; val < val_cnt; val++) {
bool val_enabled =
kbase_hwcnt_enable_map_block_value_enabled(blk_em, val);
u32 src_val =
(src_blk[val] > U32_MAX) ? U32_MAX : (u32)src_blk[val];
bool val_enabled = kbase_hwcnt_enable_map_block_value_enabled(blk_em, val);
u32 src_val = (src_blk[val] > U32_MAX) ? U32_MAX : (u32)src_blk[val];
dst_blk[val] = val_enabled ? src_val : 0;
}
}
void kbase_hwcnt_dump_buffer_copy_strict_narrow(
struct kbase_hwcnt_dump_buffer_narrow *dst_narrow,
const struct kbase_hwcnt_dump_buffer *src,
const struct kbase_hwcnt_enable_map *dst_enable_map)
void kbase_hwcnt_dump_buffer_copy_strict_narrow(struct kbase_hwcnt_dump_buffer_narrow *dst_narrow,
const struct kbase_hwcnt_dump_buffer *src,
const struct kbase_hwcnt_enable_map *dst_enable_map)
{
const struct kbase_hwcnt_metadata_narrow *metadata_narrow;
size_t grp;
@ -262,68 +246,53 @@ void kbase_hwcnt_dump_buffer_copy_strict_narrow(
if (WARN_ON(!dst_narrow) || WARN_ON(!src) || WARN_ON(!dst_enable_map) ||
WARN_ON(dst_narrow->md_narrow->metadata == src->metadata) ||
WARN_ON(dst_narrow->md_narrow->metadata->grp_cnt !=
src->metadata->grp_cnt) ||
WARN_ON(dst_narrow->md_narrow->metadata->grp_cnt != src->metadata->grp_cnt) ||
WARN_ON(src->metadata->grp_cnt != 1) ||
WARN_ON(dst_narrow->md_narrow->metadata->grp_metadata[0].blk_cnt !=
src->metadata->grp_metadata[0].blk_cnt) ||
WARN_ON(dst_narrow->md_narrow->metadata->grp_metadata[0].blk_cnt !=
KBASE_HWCNT_V5_BLOCK_TYPE_COUNT) ||
WARN_ON(dst_narrow->md_narrow->metadata->grp_metadata[0]
.blk_metadata[0]
.ctr_cnt >
WARN_ON(dst_narrow->md_narrow->metadata->grp_metadata[0].blk_metadata[0].ctr_cnt >
src->metadata->grp_metadata[0].blk_metadata[0].ctr_cnt))
return;
/* Don't use src metadata since src buffer is bigger than dst buffer. */
metadata_narrow = dst_narrow->md_narrow;
for (grp = 0;
grp < kbase_hwcnt_metadata_narrow_group_count(metadata_narrow);
grp++) {
for (grp = 0; grp < kbase_hwcnt_metadata_narrow_group_count(metadata_narrow); grp++) {
size_t blk;
size_t blk_cnt = kbase_hwcnt_metadata_narrow_block_count(
metadata_narrow, grp);
size_t blk_cnt = kbase_hwcnt_metadata_narrow_block_count(metadata_narrow, grp);
for (blk = 0; blk < blk_cnt; blk++) {
size_t blk_inst;
size_t blk_inst_cnt =
kbase_hwcnt_metadata_narrow_block_instance_count(
metadata_narrow, grp, blk);
size_t blk_inst_cnt = kbase_hwcnt_metadata_narrow_block_instance_count(
metadata_narrow, grp, blk);
for (blk_inst = 0; blk_inst < blk_inst_cnt;
blk_inst++) {
for (blk_inst = 0; blk_inst < blk_inst_cnt; blk_inst++) {
/* The narrowed down buffer is only 32-bit. */
u32 *dst_blk =
kbase_hwcnt_dump_buffer_narrow_block_instance(
dst_narrow, grp, blk, blk_inst);
const u64 *src_blk =
kbase_hwcnt_dump_buffer_block_instance(
src, grp, blk, blk_inst);
const u64 *blk_em =
kbase_hwcnt_enable_map_block_instance(
dst_enable_map, grp, blk,
blk_inst);
size_t val_cnt =
kbase_hwcnt_metadata_narrow_block_values_count(
metadata_narrow, grp, blk);
u32 *dst_blk = kbase_hwcnt_dump_buffer_narrow_block_instance(
dst_narrow, grp, blk, blk_inst);
const u64 *src_blk = kbase_hwcnt_dump_buffer_block_instance(
src, grp, blk, blk_inst);
const u64 *blk_em = kbase_hwcnt_enable_map_block_instance(
dst_enable_map, grp, blk, blk_inst);
size_t val_cnt = kbase_hwcnt_metadata_narrow_block_values_count(
metadata_narrow, grp, blk);
/* Align upwards to include padding bytes */
val_cnt = KBASE_HWCNT_ALIGN_UPWARDS(
val_cnt,
(KBASE_HWCNT_BLOCK_BYTE_ALIGNMENT /
KBASE_HWCNT_VALUE_BYTES));
val_cnt, (KBASE_HWCNT_BLOCK_BYTE_ALIGNMENT /
KBASE_HWCNT_VALUE_BYTES));
kbase_hwcnt_dump_buffer_block_copy_strict_narrow(
dst_blk, src_blk, blk_em, val_cnt);
kbase_hwcnt_dump_buffer_block_copy_strict_narrow(dst_blk, src_blk,
blk_em, val_cnt);
}
}
}
for (clk = 0; clk < metadata_narrow->metadata->clk_cnt; clk++) {
bool clk_enabled = kbase_hwcnt_clk_enable_map_enabled(
dst_enable_map->clk_enable_map, clk);
bool clk_enabled =
kbase_hwcnt_clk_enable_map_enabled(dst_enable_map->clk_enable_map, clk);
dst_narrow->clk_cnt_buf[clk] =
clk_enabled ? src->clk_cnt_buf[clk] : 0;
dst_narrow->clk_cnt_buf[clk] = clk_enabled ? src->clk_cnt_buf[clk] : 0;
}
}

View File

@ -1,7 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
/*
*
* (C) COPYRIGHT 2021 ARM Limited. All rights reserved.
* (C) COPYRIGHT 2021-2022 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@ -22,7 +22,7 @@
#ifndef _KBASE_HWCNT_GPU_NARROW_H_
#define _KBASE_HWCNT_GPU_NARROW_H_
#include "mali_kbase_hwcnt_types.h"
#include "hwcnt/mali_kbase_hwcnt_types.h"
#include <linux/types.h>
struct kbase_device;
@ -86,8 +86,8 @@ struct kbase_hwcnt_dump_buffer_narrow_array {
*
* Return: Number of hardware counter groups described by narrow metadata.
*/
static inline size_t kbase_hwcnt_metadata_narrow_group_count(
const struct kbase_hwcnt_metadata_narrow *md_narrow)
static inline size_t
kbase_hwcnt_metadata_narrow_group_count(const struct kbase_hwcnt_metadata_narrow *md_narrow)
{
return kbase_hwcnt_metadata_group_count(md_narrow->metadata);
}
@ -100,8 +100,9 @@ static inline size_t kbase_hwcnt_metadata_narrow_group_count(
*
* Return: Type of the group grp.
*/
static inline u64 kbase_hwcnt_metadata_narrow_group_type(
const struct kbase_hwcnt_metadata_narrow *md_narrow, size_t grp)
static inline u64
kbase_hwcnt_metadata_narrow_group_type(const struct kbase_hwcnt_metadata_narrow *md_narrow,
size_t grp)
{
return kbase_hwcnt_metadata_group_type(md_narrow->metadata, grp);
}
@ -114,8 +115,9 @@ static inline u64 kbase_hwcnt_metadata_narrow_group_type(
*
* Return: Number of blocks in group grp.
*/
static inline size_t kbase_hwcnt_metadata_narrow_block_count(
const struct kbase_hwcnt_metadata_narrow *md_narrow, size_t grp)
static inline size_t
kbase_hwcnt_metadata_narrow_block_count(const struct kbase_hwcnt_metadata_narrow *md_narrow,
size_t grp)
{
return kbase_hwcnt_metadata_block_count(md_narrow->metadata, grp);
}
@ -131,11 +133,9 @@ static inline size_t kbase_hwcnt_metadata_narrow_block_count(
* Return: Number of instances of block blk in group grp.
*/
static inline size_t kbase_hwcnt_metadata_narrow_block_instance_count(
const struct kbase_hwcnt_metadata_narrow *md_narrow, size_t grp,
size_t blk)
const struct kbase_hwcnt_metadata_narrow *md_narrow, size_t grp, size_t blk)
{
return kbase_hwcnt_metadata_block_instance_count(md_narrow->metadata,
grp, blk);
return kbase_hwcnt_metadata_block_instance_count(md_narrow->metadata, grp, blk);
}
/**
@ -148,12 +148,11 @@ static inline size_t kbase_hwcnt_metadata_narrow_block_instance_count(
*
* Return: Number of counter headers in each instance of block blk in group grp.
*/
static inline size_t kbase_hwcnt_metadata_narrow_block_headers_count(
const struct kbase_hwcnt_metadata_narrow *md_narrow, size_t grp,
size_t blk)
static inline size_t
kbase_hwcnt_metadata_narrow_block_headers_count(const struct kbase_hwcnt_metadata_narrow *md_narrow,
size_t grp, size_t blk)
{
return kbase_hwcnt_metadata_block_headers_count(md_narrow->metadata,
grp, blk);
return kbase_hwcnt_metadata_block_headers_count(md_narrow->metadata, grp, blk);
}
/**
@ -167,11 +166,9 @@ static inline size_t kbase_hwcnt_metadata_narrow_block_headers_count(
* Return: Number of counters in each instance of block blk in group grp.
*/
static inline size_t kbase_hwcnt_metadata_narrow_block_counters_count(
const struct kbase_hwcnt_metadata_narrow *md_narrow, size_t grp,
size_t blk)
const struct kbase_hwcnt_metadata_narrow *md_narrow, size_t grp, size_t blk)
{
return kbase_hwcnt_metadata_block_counters_count(md_narrow->metadata,
grp, blk);
return kbase_hwcnt_metadata_block_counters_count(md_narrow->metadata, grp, blk);
}
/**
@ -184,14 +181,12 @@ static inline size_t kbase_hwcnt_metadata_narrow_block_counters_count(
* Return: Number of headers plus counters in each instance of block blk
* in group grp.
*/
static inline size_t kbase_hwcnt_metadata_narrow_block_values_count(
const struct kbase_hwcnt_metadata_narrow *md_narrow, size_t grp,
size_t blk)
static inline size_t
kbase_hwcnt_metadata_narrow_block_values_count(const struct kbase_hwcnt_metadata_narrow *md_narrow,
size_t grp, size_t blk)
{
return kbase_hwcnt_metadata_narrow_block_counters_count(md_narrow, grp,
blk) +
kbase_hwcnt_metadata_narrow_block_headers_count(md_narrow, grp,
blk);
return kbase_hwcnt_metadata_narrow_block_counters_count(md_narrow, grp, blk) +
kbase_hwcnt_metadata_narrow_block_headers_count(md_narrow, grp, blk);
}
/**
@ -205,18 +200,13 @@ static inline size_t kbase_hwcnt_metadata_narrow_block_values_count(
*
* Return: u32* to the dump buffer for the block instance.
*/
static inline u32 *kbase_hwcnt_dump_buffer_narrow_block_instance(
const struct kbase_hwcnt_dump_buffer_narrow *buf, size_t grp,
size_t blk, size_t blk_inst)
static inline u32 *
kbase_hwcnt_dump_buffer_narrow_block_instance(const struct kbase_hwcnt_dump_buffer_narrow *buf,
size_t grp, size_t blk, size_t blk_inst)
{
return buf->dump_buf +
buf->md_narrow->metadata->grp_metadata[grp].dump_buf_index +
buf->md_narrow->metadata->grp_metadata[grp]
.blk_metadata[blk]
.dump_buf_index +
(buf->md_narrow->metadata->grp_metadata[grp]
.blk_metadata[blk]
.dump_buf_stride *
return buf->dump_buf + buf->md_narrow->metadata->grp_metadata[grp].dump_buf_index +
buf->md_narrow->metadata->grp_metadata[grp].blk_metadata[blk].dump_buf_index +
(buf->md_narrow->metadata->grp_metadata[grp].blk_metadata[blk].dump_buf_stride *
blk_inst);
}
@ -239,17 +229,15 @@ static inline u32 *kbase_hwcnt_dump_buffer_narrow_block_instance(
*
* Return: 0 on success, else error code.
*/
int kbase_hwcnt_gpu_metadata_narrow_create(
const struct kbase_hwcnt_metadata_narrow **dst_md_narrow,
const struct kbase_hwcnt_metadata *src_md);
int kbase_hwcnt_gpu_metadata_narrow_create(const struct kbase_hwcnt_metadata_narrow **dst_md_narrow,
const struct kbase_hwcnt_metadata *src_md);
/**
* kbase_hwcnt_gpu_metadata_narrow_destroy() - Destroy a hardware counter narrow
* metadata object.
* @md_narrow: Pointer to hardware counter narrow metadata.
*/
void kbase_hwcnt_gpu_metadata_narrow_destroy(
const struct kbase_hwcnt_metadata_narrow *md_narrow);
void kbase_hwcnt_gpu_metadata_narrow_destroy(const struct kbase_hwcnt_metadata_narrow *md_narrow);
/**
* kbase_hwcnt_dump_buffer_narrow_alloc() - Allocate a narrow dump buffer.
@ -260,9 +248,8 @@ void kbase_hwcnt_gpu_metadata_narrow_destroy(
*
* Return: 0 on success, else error code.
*/
int kbase_hwcnt_dump_buffer_narrow_alloc(
const struct kbase_hwcnt_metadata_narrow *md_narrow,
struct kbase_hwcnt_dump_buffer_narrow *dump_buf);
int kbase_hwcnt_dump_buffer_narrow_alloc(const struct kbase_hwcnt_metadata_narrow *md_narrow,
struct kbase_hwcnt_dump_buffer_narrow *dump_buf);
/**
* kbase_hwcnt_dump_buffer_narrow_free() - Free a narrow dump buffer.
@ -271,8 +258,7 @@ int kbase_hwcnt_dump_buffer_narrow_alloc(
* Can be safely called on an all-zeroed narrow dump buffer structure, or on an
* already freed narrow dump buffer.
*/
void kbase_hwcnt_dump_buffer_narrow_free(
struct kbase_hwcnt_dump_buffer_narrow *dump_buf);
void kbase_hwcnt_dump_buffer_narrow_free(struct kbase_hwcnt_dump_buffer_narrow *dump_buf);
/**
* kbase_hwcnt_dump_buffer_narrow_array_alloc() - Allocate an array of narrow
@ -320,10 +306,8 @@ void kbase_hwcnt_dump_buffer_narrow_array_free(
* source value is bigger than U32_MAX, or copy the value from source if the
* corresponding source value is less than or equal to U32_MAX.
*/
void kbase_hwcnt_dump_buffer_block_copy_strict_narrow(u32 *dst_blk,
const u64 *src_blk,
const u64 *blk_em,
size_t val_cnt);
void kbase_hwcnt_dump_buffer_block_copy_strict_narrow(u32 *dst_blk, const u64 *src_blk,
const u64 *blk_em, size_t val_cnt);
/**
* kbase_hwcnt_dump_buffer_copy_strict_narrow() - Copy all enabled values to a
@ -339,9 +323,8 @@ void kbase_hwcnt_dump_buffer_block_copy_strict_narrow(u32 *dst_blk,
* corresponding source value is bigger than U32_MAX, or copy the value from
* source if the corresponding source value is less than or equal to U32_MAX.
*/
void kbase_hwcnt_dump_buffer_copy_strict_narrow(
struct kbase_hwcnt_dump_buffer_narrow *dst_narrow,
const struct kbase_hwcnt_dump_buffer *src,
const struct kbase_hwcnt_enable_map *dst_enable_map);
void kbase_hwcnt_dump_buffer_copy_strict_narrow(struct kbase_hwcnt_dump_buffer_narrow *dst_narrow,
const struct kbase_hwcnt_dump_buffer *src,
const struct kbase_hwcnt_enable_map *dst_enable_map);
#endif /* _KBASE_HWCNT_GPU_NARROW_H_ */

View File

@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
/*
*
* (C) COPYRIGHT 2018, 2020-2021 ARM Limited. All rights reserved.
* (C) COPYRIGHT 2018, 2020-2022 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@ -19,13 +19,12 @@
*
*/
#include "mali_kbase_hwcnt_types.h"
#include "hwcnt/mali_kbase_hwcnt_types.h"
#include <linux/slab.h>
int kbase_hwcnt_metadata_create(
const struct kbase_hwcnt_description *desc,
const struct kbase_hwcnt_metadata **out_metadata)
int kbase_hwcnt_metadata_create(const struct kbase_hwcnt_description *desc,
const struct kbase_hwcnt_metadata **out_metadata)
{
char *buf;
struct kbase_hwcnt_metadata *metadata;
@ -56,8 +55,7 @@ int kbase_hwcnt_metadata_create(
/* Block metadata */
for (grp = 0; grp < desc->grp_cnt; grp++) {
size += sizeof(struct kbase_hwcnt_block_metadata) *
desc->grps[grp].blk_cnt;
size += sizeof(struct kbase_hwcnt_block_metadata) * desc->grps[grp].blk_cnt;
}
/* Single allocation for the entire metadata */
@ -83,8 +81,7 @@ int kbase_hwcnt_metadata_create(
for (grp = 0; grp < desc->grp_cnt; grp++) {
size_t blk;
const struct kbase_hwcnt_group_description *grp_desc =
desc->grps + grp;
const struct kbase_hwcnt_group_description *grp_desc = desc->grps + grp;
struct kbase_hwcnt_group_metadata *grp_md = grp_mds + grp;
size_t group_enable_map_count = 0;
@ -94,37 +91,28 @@ int kbase_hwcnt_metadata_create(
/* Bump allocate this group's block metadata */
struct kbase_hwcnt_block_metadata *blk_mds =
(struct kbase_hwcnt_block_metadata *)(buf + offset);
offset += sizeof(struct kbase_hwcnt_block_metadata) *
grp_desc->blk_cnt;
offset += sizeof(struct kbase_hwcnt_block_metadata) * grp_desc->blk_cnt;
/* Fill in each block in the group's information */
for (blk = 0; blk < grp_desc->blk_cnt; blk++) {
const struct kbase_hwcnt_block_description *blk_desc =
grp_desc->blks + blk;
struct kbase_hwcnt_block_metadata *blk_md =
blk_mds + blk;
const size_t n_values =
blk_desc->hdr_cnt + blk_desc->ctr_cnt;
const struct kbase_hwcnt_block_description *blk_desc = grp_desc->blks + blk;
struct kbase_hwcnt_block_metadata *blk_md = blk_mds + blk;
const size_t n_values = blk_desc->hdr_cnt + blk_desc->ctr_cnt;
blk_md->type = blk_desc->type;
blk_md->inst_cnt = blk_desc->inst_cnt;
blk_md->hdr_cnt = blk_desc->hdr_cnt;
blk_md->ctr_cnt = blk_desc->ctr_cnt;
blk_md->enable_map_index = group_enable_map_count;
blk_md->enable_map_stride =
kbase_hwcnt_bitfield_count(n_values);
blk_md->enable_map_stride = kbase_hwcnt_bitfield_count(n_values);
blk_md->dump_buf_index = group_dump_buffer_count;
blk_md->dump_buf_stride =
KBASE_HWCNT_ALIGN_UPWARDS(
n_values,
(KBASE_HWCNT_BLOCK_BYTE_ALIGNMENT /
KBASE_HWCNT_VALUE_BYTES));
blk_md->dump_buf_stride = KBASE_HWCNT_ALIGN_UPWARDS(
n_values,
(KBASE_HWCNT_BLOCK_BYTE_ALIGNMENT / KBASE_HWCNT_VALUE_BYTES));
blk_md->avail_mask_index = group_avail_mask_bits;
group_enable_map_count +=
blk_md->enable_map_stride * blk_md->inst_cnt;
group_dump_buffer_count +=
blk_md->dump_buf_stride * blk_md->inst_cnt;
group_enable_map_count += blk_md->enable_map_stride * blk_md->inst_cnt;
group_dump_buffer_count += blk_md->dump_buf_stride * blk_md->inst_cnt;
group_avail_mask_bits += blk_md->inst_cnt;
}
@ -144,8 +132,7 @@ int kbase_hwcnt_metadata_create(
/* Fill in the top level metadata's information */
metadata->grp_cnt = desc->grp_cnt;
metadata->grp_metadata = grp_mds;
metadata->enable_map_bytes =
enable_map_count * KBASE_HWCNT_BITFIELD_BYTES;
metadata->enable_map_bytes = enable_map_count * KBASE_HWCNT_BITFIELD_BYTES;
metadata->dump_buf_bytes = dump_buf_count * KBASE_HWCNT_VALUE_BYTES;
metadata->avail_mask = desc->avail_mask;
metadata->clk_cnt = desc->clk_cnt;
@ -155,8 +142,7 @@ int kbase_hwcnt_metadata_create(
* bit per 4 bytes in the dump buffer.
*/
WARN_ON(metadata->dump_buf_bytes !=
(metadata->enable_map_bytes *
BITS_PER_BYTE * KBASE_HWCNT_VALUE_BYTES));
(metadata->enable_map_bytes * BITS_PER_BYTE * KBASE_HWCNT_VALUE_BYTES));
*out_metadata = metadata;
return 0;
@ -167,9 +153,8 @@ void kbase_hwcnt_metadata_destroy(const struct kbase_hwcnt_metadata *metadata)
kfree(metadata);
}
int kbase_hwcnt_enable_map_alloc(
const struct kbase_hwcnt_metadata *metadata,
struct kbase_hwcnt_enable_map *enable_map)
int kbase_hwcnt_enable_map_alloc(const struct kbase_hwcnt_metadata *metadata,
struct kbase_hwcnt_enable_map *enable_map)
{
u64 *enable_map_buf;
@ -177,8 +162,7 @@ int kbase_hwcnt_enable_map_alloc(
return -EINVAL;
if (metadata->enable_map_bytes > 0) {
enable_map_buf =
kzalloc(metadata->enable_map_bytes, GFP_KERNEL);
enable_map_buf = kzalloc(metadata->enable_map_bytes, GFP_KERNEL);
if (!enable_map_buf)
return -ENOMEM;
} else {
@ -200,9 +184,8 @@ void kbase_hwcnt_enable_map_free(struct kbase_hwcnt_enable_map *enable_map)
enable_map->metadata = NULL;
}
int kbase_hwcnt_dump_buffer_alloc(
const struct kbase_hwcnt_metadata *metadata,
struct kbase_hwcnt_dump_buffer *dump_buf)
int kbase_hwcnt_dump_buffer_alloc(const struct kbase_hwcnt_metadata *metadata,
struct kbase_hwcnt_dump_buffer *dump_buf)
{
size_t dump_buf_bytes;
size_t clk_cnt_buf_bytes;
@ -235,10 +218,8 @@ void kbase_hwcnt_dump_buffer_free(struct kbase_hwcnt_dump_buffer *dump_buf)
memset(dump_buf, 0, sizeof(*dump_buf));
}
int kbase_hwcnt_dump_buffer_array_alloc(
const struct kbase_hwcnt_metadata *metadata,
size_t n,
struct kbase_hwcnt_dump_buffer_array *dump_bufs)
int kbase_hwcnt_dump_buffer_array_alloc(const struct kbase_hwcnt_metadata *metadata, size_t n,
struct kbase_hwcnt_dump_buffer_array *dump_bufs)
{
struct kbase_hwcnt_dump_buffer *buffers;
size_t buf_idx;
@ -251,8 +232,7 @@ int kbase_hwcnt_dump_buffer_array_alloc(
return -EINVAL;
dump_buf_bytes = metadata->dump_buf_bytes;
clk_cnt_buf_bytes =
sizeof(*dump_bufs->bufs->clk_cnt_buf) * metadata->clk_cnt;
clk_cnt_buf_bytes = sizeof(*dump_bufs->bufs->clk_cnt_buf) * metadata->clk_cnt;
/* Allocate memory for the dump buffer struct array */
buffers = kmalloc_array(n, sizeof(*buffers), GFP_KERNEL);
@ -283,15 +263,13 @@ int kbase_hwcnt_dump_buffer_array_alloc(
buffers[buf_idx].metadata = metadata;
buffers[buf_idx].dump_buf = (u64 *)(addr + dump_buf_offset);
buffers[buf_idx].clk_cnt_buf =
(u64 *)(addr + clk_cnt_buf_offset);
buffers[buf_idx].clk_cnt_buf = (u64 *)(addr + clk_cnt_buf_offset);
}
return 0;
}
void kbase_hwcnt_dump_buffer_array_free(
struct kbase_hwcnt_dump_buffer_array *dump_bufs)
void kbase_hwcnt_dump_buffer_array_free(struct kbase_hwcnt_dump_buffer_array *dump_bufs)
{
if (!dump_bufs)
return;
@ -301,84 +279,71 @@ void kbase_hwcnt_dump_buffer_array_free(
memset(dump_bufs, 0, sizeof(*dump_bufs));
}
void kbase_hwcnt_dump_buffer_zero(
struct kbase_hwcnt_dump_buffer *dst,
const struct kbase_hwcnt_enable_map *dst_enable_map)
void kbase_hwcnt_dump_buffer_zero(struct kbase_hwcnt_dump_buffer *dst,
const struct kbase_hwcnt_enable_map *dst_enable_map)
{
const struct kbase_hwcnt_metadata *metadata;
size_t grp, blk, blk_inst;
if (WARN_ON(!dst) ||
WARN_ON(!dst_enable_map) ||
if (WARN_ON(!dst) || WARN_ON(!dst_enable_map) ||
WARN_ON(dst->metadata != dst_enable_map->metadata))
return;
metadata = dst->metadata;
kbase_hwcnt_metadata_for_each_block(metadata, grp, blk, blk_inst) {
kbase_hwcnt_metadata_for_each_block(metadata, grp, blk, blk_inst)
{
u64 *dst_blk;
size_t val_cnt;
if (!kbase_hwcnt_enable_map_block_enabled(
dst_enable_map, grp, blk, blk_inst))
if (!kbase_hwcnt_enable_map_block_enabled(dst_enable_map, grp, blk, blk_inst))
continue;
dst_blk = kbase_hwcnt_dump_buffer_block_instance(
dst, grp, blk, blk_inst);
val_cnt = kbase_hwcnt_metadata_block_values_count(
metadata, grp, blk);
dst_blk = kbase_hwcnt_dump_buffer_block_instance(dst, grp, blk, blk_inst);
val_cnt = kbase_hwcnt_metadata_block_values_count(metadata, grp, blk);
kbase_hwcnt_dump_buffer_block_zero(dst_blk, val_cnt);
}
memset(dst->clk_cnt_buf, 0,
sizeof(*dst->clk_cnt_buf) * metadata->clk_cnt);
memset(dst->clk_cnt_buf, 0, sizeof(*dst->clk_cnt_buf) * metadata->clk_cnt);
}
void kbase_hwcnt_dump_buffer_zero_strict(
struct kbase_hwcnt_dump_buffer *dst)
void kbase_hwcnt_dump_buffer_zero_strict(struct kbase_hwcnt_dump_buffer *dst)
{
if (WARN_ON(!dst))
return;
memset(dst->dump_buf, 0, dst->metadata->dump_buf_bytes);
memset(dst->clk_cnt_buf, 0,
sizeof(*dst->clk_cnt_buf) * dst->metadata->clk_cnt);
memset(dst->clk_cnt_buf, 0, sizeof(*dst->clk_cnt_buf) * dst->metadata->clk_cnt);
}
void kbase_hwcnt_dump_buffer_zero_non_enabled(
struct kbase_hwcnt_dump_buffer *dst,
const struct kbase_hwcnt_enable_map *dst_enable_map)
void kbase_hwcnt_dump_buffer_zero_non_enabled(struct kbase_hwcnt_dump_buffer *dst,
const struct kbase_hwcnt_enable_map *dst_enable_map)
{
const struct kbase_hwcnt_metadata *metadata;
size_t grp, blk, blk_inst;
if (WARN_ON(!dst) ||
WARN_ON(!dst_enable_map) ||
if (WARN_ON(!dst) || WARN_ON(!dst_enable_map) ||
WARN_ON(dst->metadata != dst_enable_map->metadata))
return;
metadata = dst->metadata;
kbase_hwcnt_metadata_for_each_block(metadata, grp, blk, blk_inst) {
u64 *dst_blk = kbase_hwcnt_dump_buffer_block_instance(
dst, grp, blk, blk_inst);
const u64 *blk_em = kbase_hwcnt_enable_map_block_instance(
dst_enable_map, grp, blk, blk_inst);
size_t val_cnt = kbase_hwcnt_metadata_block_values_count(
metadata, grp, blk);
kbase_hwcnt_metadata_for_each_block(metadata, grp, blk, blk_inst)
{
u64 *dst_blk = kbase_hwcnt_dump_buffer_block_instance(dst, grp, blk, blk_inst);
const u64 *blk_em =
kbase_hwcnt_enable_map_block_instance(dst_enable_map, grp, blk, blk_inst);
size_t val_cnt = kbase_hwcnt_metadata_block_values_count(metadata, grp, blk);
/* Align upwards to include padding bytes */
val_cnt = KBASE_HWCNT_ALIGN_UPWARDS(val_cnt,
(KBASE_HWCNT_BLOCK_BYTE_ALIGNMENT /
KBASE_HWCNT_VALUE_BYTES));
val_cnt = KBASE_HWCNT_ALIGN_UPWARDS(
val_cnt, (KBASE_HWCNT_BLOCK_BYTE_ALIGNMENT / KBASE_HWCNT_VALUE_BYTES));
if (kbase_hwcnt_metadata_block_instance_avail(
metadata, grp, blk, blk_inst)) {
if (kbase_hwcnt_metadata_block_instance_avail(metadata, grp, blk, blk_inst)) {
/* Block available, so only zero non-enabled values */
kbase_hwcnt_dump_buffer_block_zero_non_enabled(
dst_blk, blk_em, val_cnt);
kbase_hwcnt_dump_buffer_block_zero_non_enabled(dst_blk, blk_em, val_cnt);
} else {
/* Block not available, so zero the entire thing */
kbase_hwcnt_dump_buffer_block_zero(dst_blk, val_cnt);
@ -386,188 +351,159 @@ void kbase_hwcnt_dump_buffer_zero_non_enabled(
}
}
void kbase_hwcnt_dump_buffer_copy(
struct kbase_hwcnt_dump_buffer *dst,
const struct kbase_hwcnt_dump_buffer *src,
const struct kbase_hwcnt_enable_map *dst_enable_map)
void kbase_hwcnt_dump_buffer_copy(struct kbase_hwcnt_dump_buffer *dst,
const struct kbase_hwcnt_dump_buffer *src,
const struct kbase_hwcnt_enable_map *dst_enable_map)
{
const struct kbase_hwcnt_metadata *metadata;
size_t grp, blk, blk_inst;
size_t clk;
if (WARN_ON(!dst) ||
WARN_ON(!src) ||
WARN_ON(!dst_enable_map) ||
WARN_ON(dst == src) ||
if (WARN_ON(!dst) || WARN_ON(!src) || WARN_ON(!dst_enable_map) || WARN_ON(dst == src) ||
WARN_ON(dst->metadata != src->metadata) ||
WARN_ON(dst->metadata != dst_enable_map->metadata))
return;
metadata = dst->metadata;
kbase_hwcnt_metadata_for_each_block(metadata, grp, blk, blk_inst) {
kbase_hwcnt_metadata_for_each_block(metadata, grp, blk, blk_inst)
{
u64 *dst_blk;
const u64 *src_blk;
size_t val_cnt;
if (!kbase_hwcnt_enable_map_block_enabled(
dst_enable_map, grp, blk, blk_inst))
if (!kbase_hwcnt_enable_map_block_enabled(dst_enable_map, grp, blk, blk_inst))
continue;
dst_blk = kbase_hwcnt_dump_buffer_block_instance(
dst, grp, blk, blk_inst);
src_blk = kbase_hwcnt_dump_buffer_block_instance(
src, grp, blk, blk_inst);
val_cnt = kbase_hwcnt_metadata_block_values_count(
metadata, grp, blk);
dst_blk = kbase_hwcnt_dump_buffer_block_instance(dst, grp, blk, blk_inst);
src_blk = kbase_hwcnt_dump_buffer_block_instance(src, grp, blk, blk_inst);
val_cnt = kbase_hwcnt_metadata_block_values_count(metadata, grp, blk);
kbase_hwcnt_dump_buffer_block_copy(dst_blk, src_blk, val_cnt);
}
kbase_hwcnt_metadata_for_each_clock(metadata, clk) {
if (kbase_hwcnt_clk_enable_map_enabled(
dst_enable_map->clk_enable_map, clk))
kbase_hwcnt_metadata_for_each_clock(metadata, clk)
{
if (kbase_hwcnt_clk_enable_map_enabled(dst_enable_map->clk_enable_map, clk))
dst->clk_cnt_buf[clk] = src->clk_cnt_buf[clk];
}
}
void kbase_hwcnt_dump_buffer_copy_strict(
struct kbase_hwcnt_dump_buffer *dst,
const struct kbase_hwcnt_dump_buffer *src,
const struct kbase_hwcnt_enable_map *dst_enable_map)
void kbase_hwcnt_dump_buffer_copy_strict(struct kbase_hwcnt_dump_buffer *dst,
const struct kbase_hwcnt_dump_buffer *src,
const struct kbase_hwcnt_enable_map *dst_enable_map)
{
const struct kbase_hwcnt_metadata *metadata;
size_t grp, blk, blk_inst;
size_t clk;
if (WARN_ON(!dst) ||
WARN_ON(!src) ||
WARN_ON(!dst_enable_map) ||
WARN_ON(dst == src) ||
if (WARN_ON(!dst) || WARN_ON(!src) || WARN_ON(!dst_enable_map) || WARN_ON(dst == src) ||
WARN_ON(dst->metadata != src->metadata) ||
WARN_ON(dst->metadata != dst_enable_map->metadata))
return;
metadata = dst->metadata;
kbase_hwcnt_metadata_for_each_block(metadata, grp, blk, blk_inst) {
u64 *dst_blk = kbase_hwcnt_dump_buffer_block_instance(
dst, grp, blk, blk_inst);
const u64 *src_blk = kbase_hwcnt_dump_buffer_block_instance(
src, grp, blk, blk_inst);
const u64 *blk_em = kbase_hwcnt_enable_map_block_instance(
dst_enable_map, grp, blk, blk_inst);
size_t val_cnt = kbase_hwcnt_metadata_block_values_count(
metadata, grp, blk);
kbase_hwcnt_metadata_for_each_block(metadata, grp, blk, blk_inst)
{
u64 *dst_blk = kbase_hwcnt_dump_buffer_block_instance(dst, grp, blk, blk_inst);
const u64 *src_blk =
kbase_hwcnt_dump_buffer_block_instance(src, grp, blk, blk_inst);
const u64 *blk_em =
kbase_hwcnt_enable_map_block_instance(dst_enable_map, grp, blk, blk_inst);
size_t val_cnt = kbase_hwcnt_metadata_block_values_count(metadata, grp, blk);
/* Align upwards to include padding bytes */
val_cnt = KBASE_HWCNT_ALIGN_UPWARDS(val_cnt,
(KBASE_HWCNT_BLOCK_BYTE_ALIGNMENT /
KBASE_HWCNT_VALUE_BYTES));
val_cnt = KBASE_HWCNT_ALIGN_UPWARDS(
val_cnt, (KBASE_HWCNT_BLOCK_BYTE_ALIGNMENT / KBASE_HWCNT_VALUE_BYTES));
kbase_hwcnt_dump_buffer_block_copy_strict(
dst_blk, src_blk, blk_em, val_cnt);
kbase_hwcnt_dump_buffer_block_copy_strict(dst_blk, src_blk, blk_em, val_cnt);
}
kbase_hwcnt_metadata_for_each_clock(metadata, clk) {
kbase_hwcnt_metadata_for_each_clock(metadata, clk)
{
bool clk_enabled =
kbase_hwcnt_clk_enable_map_enabled(
dst_enable_map->clk_enable_map, clk);
kbase_hwcnt_clk_enable_map_enabled(dst_enable_map->clk_enable_map, clk);
dst->clk_cnt_buf[clk] = clk_enabled ? src->clk_cnt_buf[clk] : 0;
}
}
void kbase_hwcnt_dump_buffer_accumulate(
struct kbase_hwcnt_dump_buffer *dst,
const struct kbase_hwcnt_dump_buffer *src,
const struct kbase_hwcnt_enable_map *dst_enable_map)
void kbase_hwcnt_dump_buffer_accumulate(struct kbase_hwcnt_dump_buffer *dst,
const struct kbase_hwcnt_dump_buffer *src,
const struct kbase_hwcnt_enable_map *dst_enable_map)
{
const struct kbase_hwcnt_metadata *metadata;
size_t grp, blk, blk_inst;
size_t clk;
if (WARN_ON(!dst) ||
WARN_ON(!src) ||
WARN_ON(!dst_enable_map) ||
WARN_ON(dst == src) ||
if (WARN_ON(!dst) || WARN_ON(!src) || WARN_ON(!dst_enable_map) || WARN_ON(dst == src) ||
WARN_ON(dst->metadata != src->metadata) ||
WARN_ON(dst->metadata != dst_enable_map->metadata))
return;
metadata = dst->metadata;
kbase_hwcnt_metadata_for_each_block(metadata, grp, blk, blk_inst) {
kbase_hwcnt_metadata_for_each_block(metadata, grp, blk, blk_inst)
{
u64 *dst_blk;
const u64 *src_blk;
size_t hdr_cnt;
size_t ctr_cnt;
if (!kbase_hwcnt_enable_map_block_enabled(
dst_enable_map, grp, blk, blk_inst))
if (!kbase_hwcnt_enable_map_block_enabled(dst_enable_map, grp, blk, blk_inst))
continue;
dst_blk = kbase_hwcnt_dump_buffer_block_instance(
dst, grp, blk, blk_inst);
src_blk = kbase_hwcnt_dump_buffer_block_instance(
src, grp, blk, blk_inst);
hdr_cnt = kbase_hwcnt_metadata_block_headers_count(
metadata, grp, blk);
ctr_cnt = kbase_hwcnt_metadata_block_counters_count(
metadata, grp, blk);
dst_blk = kbase_hwcnt_dump_buffer_block_instance(dst, grp, blk, blk_inst);
src_blk = kbase_hwcnt_dump_buffer_block_instance(src, grp, blk, blk_inst);
hdr_cnt = kbase_hwcnt_metadata_block_headers_count(metadata, grp, blk);
ctr_cnt = kbase_hwcnt_metadata_block_counters_count(metadata, grp, blk);
kbase_hwcnt_dump_buffer_block_accumulate(
dst_blk, src_blk, hdr_cnt, ctr_cnt);
kbase_hwcnt_dump_buffer_block_accumulate(dst_blk, src_blk, hdr_cnt, ctr_cnt);
}
kbase_hwcnt_metadata_for_each_clock(metadata, clk) {
if (kbase_hwcnt_clk_enable_map_enabled(
dst_enable_map->clk_enable_map, clk))
kbase_hwcnt_metadata_for_each_clock(metadata, clk)
{
if (kbase_hwcnt_clk_enable_map_enabled(dst_enable_map->clk_enable_map, clk))
dst->clk_cnt_buf[clk] += src->clk_cnt_buf[clk];
}
}
void kbase_hwcnt_dump_buffer_accumulate_strict(
struct kbase_hwcnt_dump_buffer *dst,
const struct kbase_hwcnt_dump_buffer *src,
const struct kbase_hwcnt_enable_map *dst_enable_map)
void kbase_hwcnt_dump_buffer_accumulate_strict(struct kbase_hwcnt_dump_buffer *dst,
const struct kbase_hwcnt_dump_buffer *src,
const struct kbase_hwcnt_enable_map *dst_enable_map)
{
const struct kbase_hwcnt_metadata *metadata;
size_t grp, blk, blk_inst;
size_t clk;
if (WARN_ON(!dst) ||
WARN_ON(!src) ||
WARN_ON(!dst_enable_map) ||
WARN_ON(dst == src) ||
if (WARN_ON(!dst) || WARN_ON(!src) || WARN_ON(!dst_enable_map) || WARN_ON(dst == src) ||
WARN_ON(dst->metadata != src->metadata) ||
WARN_ON(dst->metadata != dst_enable_map->metadata))
return;
metadata = dst->metadata;
kbase_hwcnt_metadata_for_each_block(metadata, grp, blk, blk_inst) {
u64 *dst_blk = kbase_hwcnt_dump_buffer_block_instance(
dst, grp, blk, blk_inst);
const u64 *src_blk = kbase_hwcnt_dump_buffer_block_instance(
src, grp, blk, blk_inst);
const u64 *blk_em = kbase_hwcnt_enable_map_block_instance(
dst_enable_map, grp, blk, blk_inst);
size_t hdr_cnt = kbase_hwcnt_metadata_block_headers_count(
metadata, grp, blk);
size_t ctr_cnt = kbase_hwcnt_metadata_block_counters_count(
metadata, grp, blk);
kbase_hwcnt_metadata_for_each_block(metadata, grp, blk, blk_inst)
{
u64 *dst_blk = kbase_hwcnt_dump_buffer_block_instance(dst, grp, blk, blk_inst);
const u64 *src_blk =
kbase_hwcnt_dump_buffer_block_instance(src, grp, blk, blk_inst);
const u64 *blk_em =
kbase_hwcnt_enable_map_block_instance(dst_enable_map, grp, blk, blk_inst);
size_t hdr_cnt = kbase_hwcnt_metadata_block_headers_count(metadata, grp, blk);
size_t ctr_cnt = kbase_hwcnt_metadata_block_counters_count(metadata, grp, blk);
/* Align upwards to include padding bytes */
ctr_cnt = KBASE_HWCNT_ALIGN_UPWARDS(hdr_cnt + ctr_cnt,
(KBASE_HWCNT_BLOCK_BYTE_ALIGNMENT /
KBASE_HWCNT_VALUE_BYTES) - hdr_cnt);
ctr_cnt = KBASE_HWCNT_ALIGN_UPWARDS(
hdr_cnt + ctr_cnt,
(KBASE_HWCNT_BLOCK_BYTE_ALIGNMENT / KBASE_HWCNT_VALUE_BYTES) - hdr_cnt);
kbase_hwcnt_dump_buffer_block_accumulate_strict(
dst_blk, src_blk, blk_em, hdr_cnt, ctr_cnt);
kbase_hwcnt_dump_buffer_block_accumulate_strict(dst_blk, src_blk, blk_em, hdr_cnt,
ctr_cnt);
}
kbase_hwcnt_metadata_for_each_clock(metadata, clk) {
if (kbase_hwcnt_clk_enable_map_enabled(
dst_enable_map->clk_enable_map, clk))
kbase_hwcnt_metadata_for_each_clock(metadata, clk)
{
if (kbase_hwcnt_clk_enable_map_enabled(dst_enable_map->clk_enable_map, clk))
dst->clk_cnt_buf[clk] += src->clk_cnt_buf[clk];
else
dst->clk_cnt_buf[clk] = 0;

Some files were not shown because too many files have changed in this diff Show More