From b32714ef3631e39d507c69b7fff75722905ea925 Mon Sep 17 00:00:00 2001 From: Felix Zeng Date: Wed, 12 Apr 2023 10:37:14 +0800 Subject: [PATCH] driver: rknpu: Update rknpu driver, version: 0.8.8 * Add nbuf memory support * Add rknpu session for each instance * Add multi core irq status timeout log * Fix job run count error Signed-off-by: Felix Zeng Change-Id: I408e535fa1b3f8c853682a8ebaa245e3d7c188fb --- drivers/rknpu/Makefile | 1 + drivers/rknpu/include/rknpu_drv.h | 15 +- drivers/rknpu/include/rknpu_gem.h | 6 + drivers/rknpu/include/rknpu_ioctl.h | 4 +- drivers/rknpu/include/rknpu_iommu.h | 36 ++++ drivers/rknpu/include/rknpu_job.h | 4 +- drivers/rknpu/include/rknpu_mem.h | 7 +- drivers/rknpu/include/rknpu_mm.h | 18 -- drivers/rknpu/rknpu_drv.c | 105 ++++++++- drivers/rknpu/rknpu_gem.c | 319 ++++++++++++++++++++-------- drivers/rknpu/rknpu_iommu.c | 58 +++++ drivers/rknpu/rknpu_job.c | 42 ++-- drivers/rknpu/rknpu_mem.c | 76 +++++-- drivers/rknpu/rknpu_mm.c | 51 ----- 14 files changed, 524 insertions(+), 218 deletions(-) create mode 100644 drivers/rknpu/include/rknpu_iommu.h create mode 100644 drivers/rknpu/rknpu_iommu.c diff --git a/drivers/rknpu/Makefile b/drivers/rknpu/Makefile index 41dacc93157c..a8d265d7338a 100644 --- a/drivers/rknpu/Makefile +++ b/drivers/rknpu/Makefile @@ -9,6 +9,7 @@ rknpu-y += rknpu_drv.o rknpu-y += rknpu_reset.o rknpu-y += rknpu_job.o rknpu-y += rknpu_debugger.o +rknpu-y += rknpu_iommu.o rknpu-$(CONFIG_ROCKCHIP_RKNPU_SRAM) += rknpu_mm.o rknpu-$(CONFIG_ROCKCHIP_RKNPU_FENCE) += rknpu_fence.o rknpu-$(CONFIG_ROCKCHIP_RKNPU_DRM_GEM) += rknpu_gem.o diff --git a/drivers/rknpu/include/rknpu_drv.h b/drivers/rknpu/include/rknpu_drv.h index 67da03cb63bb..13280c18a2fd 100644 --- a/drivers/rknpu/include/rknpu_drv.h +++ b/drivers/rknpu/include/rknpu_drv.h @@ -30,10 +30,10 @@ #define DRIVER_NAME "rknpu" #define DRIVER_DESC "RKNPU driver" -#define DRIVER_DATE "20230202" +#define DRIVER_DATE "20230428" #define DRIVER_MAJOR 0 #define DRIVER_MINOR 8 -#define DRIVER_PATCHLEVEL 5 +#define DRIVER_PATCHLEVEL 8 #define LOG_TAG "RKNPU" @@ -73,6 +73,8 @@ struct rknpu_config { const struct rknpu_reset_data *resets; int num_irqs; int num_resets; + __u64 nbuf_phyaddr; + __u64 nbuf_size; }; struct rknpu_timer { @@ -149,12 +151,21 @@ struct rknpu_device { ktime_t kt; phys_addr_t sram_start; phys_addr_t sram_end; + phys_addr_t nbuf_start; + phys_addr_t nbuf_end; uint32_t sram_size; + uint32_t nbuf_size; void __iomem *sram_base_io; + void __iomem *nbuf_base_io; struct rknpu_mm *sram_mm; unsigned long power_put_delay; }; +struct rknpu_session { + struct rknpu_device *rknpu_dev; + struct list_head list; +}; + int rknpu_power_get(struct rknpu_device *rknpu_dev); int rknpu_power_put(struct rknpu_device *rknpu_dev); diff --git a/drivers/rknpu/include/rknpu_gem.h b/drivers/rknpu/include/rknpu_gem.h index 954586607b16..0afc87bba322 100644 --- a/drivers/rknpu/include/rknpu_gem.h +++ b/drivers/rknpu/include/rknpu_gem.h @@ -48,6 +48,7 @@ struct rknpu_gem_object { unsigned int flags; unsigned long size; unsigned long sram_size; + unsigned long nbuf_size; struct rknpu_mm_obj *sram_obj; dma_addr_t iova_start; unsigned long iova_size; @@ -61,6 +62,11 @@ struct rknpu_gem_object { struct drm_mm_node mm_node; }; +enum rknpu_cache_type { + RKNPU_CACHE_SRAM = 1 << 0, + RKNPU_CACHE_NBUF = 1 << 1, +}; + /* create a new buffer with gem object */ struct rknpu_gem_object *rknpu_gem_object_create(struct drm_device *dev, unsigned int flags, diff --git a/drivers/rknpu/include/rknpu_ioctl.h b/drivers/rknpu/include/rknpu_ioctl.h index fc7225fb7b47..6294ac59274d 100644 --- a/drivers/rknpu/include/rknpu_ioctl.h +++ b/drivers/rknpu/include/rknpu_ioctl.h @@ -77,11 +77,13 @@ enum e_rknpu_mem_type { RKNPU_MEM_NON_DMA32 = 1 << 7, /* request SRAM */ RKNPU_MEM_TRY_ALLOC_SRAM = 1 << 8, + /* request NBUF */ + RKNPU_MEM_TRY_ALLOC_NBUF = 1 << 9, RKNPU_MEM_MASK = RKNPU_MEM_NON_CONTIGUOUS | RKNPU_MEM_CACHEABLE | RKNPU_MEM_WRITE_COMBINE | RKNPU_MEM_KERNEL_MAPPING | RKNPU_MEM_IOMMU | RKNPU_MEM_ZEROING | RKNPU_MEM_SECURE | RKNPU_MEM_NON_DMA32 | - RKNPU_MEM_TRY_ALLOC_SRAM + RKNPU_MEM_TRY_ALLOC_SRAM | RKNPU_MEM_TRY_ALLOC_NBUF }; /* sync mode definitions. */ diff --git a/drivers/rknpu/include/rknpu_iommu.h b/drivers/rknpu/include/rknpu_iommu.h new file mode 100644 index 000000000000..3951764072a7 --- /dev/null +++ b/drivers/rknpu/include/rknpu_iommu.h @@ -0,0 +1,36 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) Rockchip Electronics Co.Ltd + * Author: Felix Zeng + */ + +#ifndef __LINUX_RKNPU_IOMMU_H +#define __LINUX_RKNPU_IOMMU_H + +#include +#include +#include +#include +#include + +#include "rknpu_drv.h" + +enum iommu_dma_cookie_type { + IOMMU_DMA_IOVA_COOKIE, + IOMMU_DMA_MSI_COOKIE, +}; + +struct rknpu_iommu_dma_cookie { + enum iommu_dma_cookie_type type; + + /* Full allocator for IOMMU_DMA_IOVA_COOKIE */ + struct iova_domain iovad; +}; + +dma_addr_t rknpu_iommu_dma_alloc_iova(struct iommu_domain *domain, size_t size, + u64 dma_limit, struct device *dev); + +void rknpu_iommu_dma_free_iova(struct rknpu_iommu_dma_cookie *cookie, + dma_addr_t iova, size_t size); + +#endif diff --git a/drivers/rknpu/include/rknpu_job.h b/drivers/rknpu/include/rknpu_job.h index 6ef52d439277..73f27190a8c3 100644 --- a/drivers/rknpu/include/rknpu_job.h +++ b/drivers/rknpu/include/rknpu_job.h @@ -43,8 +43,8 @@ struct rknpu_job { struct dma_fence *fence; ktime_t timestamp; uint32_t use_core_num; - uint32_t run_count; - uint32_t interrupt_count; + atomic_t run_count; + atomic_t interrupt_count; ktime_t hw_recoder_time; }; diff --git a/drivers/rknpu/include/rknpu_mem.h b/drivers/rknpu/include/rknpu_mem.h index 925535c85f06..69975408f434 100644 --- a/drivers/rknpu/include/rknpu_mem.h +++ b/drivers/rknpu/include/rknpu_mem.h @@ -33,11 +33,14 @@ struct rknpu_mem_object { struct page **pages; struct sg_table *sgt; struct dma_buf *dmabuf; + struct list_head head; unsigned int owner; }; -int rknpu_mem_create_ioctl(struct rknpu_device *rknpu_dev, unsigned long data); -int rknpu_mem_destroy_ioctl(struct rknpu_device *rknpu_dev, unsigned long data); +int rknpu_mem_create_ioctl(struct rknpu_device *rknpu_dev, unsigned long data, + struct file *file); +int rknpu_mem_destroy_ioctl(struct rknpu_device *rknpu_dev, unsigned long data, + struct file *file); int rknpu_mem_sync_ioctl(struct rknpu_device *rknpu_dev, unsigned long data); #endif diff --git a/drivers/rknpu/include/rknpu_mm.h b/drivers/rknpu/include/rknpu_mm.h index b764892d1dc4..52fa044e18e2 100644 --- a/drivers/rknpu/include/rknpu_mm.h +++ b/drivers/rknpu/include/rknpu_mm.h @@ -40,22 +40,4 @@ int rknpu_mm_free(struct rknpu_mm *mm, struct rknpu_mm_obj *mm_obj); int rknpu_mm_dump(struct seq_file *m, void *data); -enum iommu_dma_cookie_type { - IOMMU_DMA_IOVA_COOKIE, - IOMMU_DMA_MSI_COOKIE, -}; - -struct rknpu_iommu_dma_cookie { - enum iommu_dma_cookie_type type; - - /* Full allocator for IOMMU_DMA_IOVA_COOKIE */ - struct iova_domain iovad; -}; - -dma_addr_t rknpu_iommu_dma_alloc_iova(struct iommu_domain *domain, size_t size, - u64 dma_limit, struct device *dev); - -void rknpu_iommu_dma_free_iova(struct rknpu_iommu_dma_cookie *cookie, - dma_addr_t iova, size_t size); - #endif diff --git a/drivers/rknpu/rknpu_drv.c b/drivers/rknpu/rknpu_drv.c index bbcf19af8794..3b008142d31d 100644 --- a/drivers/rknpu/rknpu_drv.c +++ b/drivers/rknpu/rknpu_drv.c @@ -116,7 +116,9 @@ static const struct rknpu_config rk356x_rknpu_config = { .irqs = rknpu_irqs, .resets = rknpu_resets, .num_irqs = ARRAY_SIZE(rknpu_irqs), - .num_resets = ARRAY_SIZE(rknpu_resets) + .num_resets = ARRAY_SIZE(rknpu_resets), + .nbuf_phyaddr = 0, + .nbuf_size = 0 }; static const struct rknpu_config rk3588_rknpu_config = { @@ -132,7 +134,9 @@ static const struct rknpu_config rk3588_rknpu_config = { .irqs = rk3588_npu_irqs, .resets = rk3588_npu_resets, .num_irqs = ARRAY_SIZE(rk3588_npu_irqs), - .num_resets = ARRAY_SIZE(rk3588_npu_resets) + .num_resets = ARRAY_SIZE(rk3588_npu_resets), + .nbuf_phyaddr = 0, + .nbuf_size = 0 }; static const struct rknpu_config rv1106_rknpu_config = { @@ -148,7 +152,9 @@ static const struct rknpu_config rv1106_rknpu_config = { .irqs = rknpu_irqs, .resets = rknpu_resets, .num_irqs = ARRAY_SIZE(rknpu_irqs), - .num_resets = ARRAY_SIZE(rknpu_resets) + .num_resets = ARRAY_SIZE(rknpu_resets), + .nbuf_phyaddr = 0, + .nbuf_size = 0 }; static const struct rknpu_config rk3562_rknpu_config = { @@ -164,7 +170,9 @@ static const struct rknpu_config rk3562_rknpu_config = { .irqs = rknpu_irqs, .resets = rknpu_resets, .num_irqs = ARRAY_SIZE(rknpu_irqs), - .num_resets = ARRAY_SIZE(rknpu_resets) + .num_resets = ARRAY_SIZE(rknpu_resets), + .nbuf_phyaddr = 0xfe400000, + .nbuf_size = 256 * 1024 }; /* driver probe and init */ @@ -353,11 +361,56 @@ static int rknpu_action(struct rknpu_device *rknpu_dev, #ifdef CONFIG_ROCKCHIP_RKNPU_DMA_HEAP static int rknpu_open(struct inode *inode, struct file *file) { + struct rknpu_device *rknpu_dev = + container_of(file->private_data, struct rknpu_device, miscdev); + struct rknpu_session *session = NULL; + + session = kzalloc(sizeof(*session), GFP_KERNEL); + if (!session) { + LOG_ERROR("rknpu session alloc failed\n"); + return -ENOMEM; + } + + session->rknpu_dev = rknpu_dev; + INIT_LIST_HEAD(&session->list); + + file->private_data = (void *)session; + return nonseekable_open(inode, file); } static int rknpu_release(struct inode *inode, struct file *file) { + struct rknpu_mem_object *entry; + struct rknpu_session *session = file->private_data; + struct rknpu_device *rknpu_dev = session->rknpu_dev; + LIST_HEAD(local_list); + + spin_lock(&rknpu_dev->lock); + list_replace_init(&session->list, &local_list); + file->private_data = NULL; + spin_unlock(&rknpu_dev->lock); + + while (!list_empty(&local_list)) { + entry = list_first_entry(&local_list, struct rknpu_mem_object, + head); + + LOG_DEBUG( + "Fd close free rknpu_obj: %#llx, rknpu_obj->dma_addr: %#llx\n", + (__u64)(uintptr_t)entry, (__u64)entry->dma_addr); + + vunmap(entry->kv_addr); + entry->kv_addr = NULL; + + if (!entry->owner) + dma_buf_put(entry->dmabuf); + + list_del(&entry->head); + kfree(entry); + } + + kfree(session); + return 0; } @@ -389,8 +442,12 @@ static int rknpu_action_ioctl(struct rknpu_device *rknpu_dev, static long rknpu_ioctl(struct file *file, uint32_t cmd, unsigned long arg) { long ret = -EINVAL; - struct rknpu_device *rknpu_dev = - container_of(file->private_data, struct rknpu_device, miscdev); + struct rknpu_device *rknpu_dev = NULL; + + if (!file->private_data) + return -EINVAL; + + rknpu_dev = ((struct rknpu_session *)file->private_data)->rknpu_dev; rknpu_power_get(rknpu_dev); @@ -402,12 +459,12 @@ static long rknpu_ioctl(struct file *file, uint32_t cmd, unsigned long arg) ret = rknpu_submit_ioctl(rknpu_dev, arg); break; case IOCTL_RKNPU_MEM_CREATE: - ret = rknpu_mem_create_ioctl(rknpu_dev, arg); + ret = rknpu_mem_create_ioctl(rknpu_dev, arg, file); break; case RKNPU_MEM_MAP: break; case IOCTL_RKNPU_MEM_DESTROY: - ret = rknpu_mem_destroy_ioctl(rknpu_dev, arg); + ret = rknpu_mem_destroy_ioctl(rknpu_dev, arg, file); break; case IOCTL_RKNPU_MEM_SYNC: ret = rknpu_mem_sync_ioctl(rknpu_dev, arg); @@ -1603,6 +1660,31 @@ static int rknpu_find_sram_resource(struct rknpu_device *rknpu_dev) return 0; } +static int rknpu_find_nbuf_resource(struct rknpu_device *rknpu_dev) +{ + struct device *dev = rknpu_dev->dev; + + if (rknpu_dev->config->nbuf_size == 0) + return -EINVAL; + + rknpu_dev->nbuf_start = rknpu_dev->config->nbuf_phyaddr; + rknpu_dev->nbuf_size = rknpu_dev->config->nbuf_size; + rknpu_dev->nbuf_base_io = + devm_ioremap(dev, rknpu_dev->nbuf_start, rknpu_dev->nbuf_size); + if (IS_ERR(rknpu_dev->nbuf_base_io)) { + LOG_DEV_ERROR(dev, "failed to remap nbuf base io!\n"); + rknpu_dev->nbuf_base_io = NULL; + } + + rknpu_dev->nbuf_end = rknpu_dev->nbuf_start + rknpu_dev->nbuf_size; + + LOG_DEV_INFO(dev, "nbuf region: [%pa, %pa), nbuf size: %#x\n", + &rknpu_dev->nbuf_start, &rknpu_dev->nbuf_end, + rknpu_dev->nbuf_size); + + return 0; +} + static int rknpu_probe(struct platform_device *pdev) { struct resource *res = NULL; @@ -1819,7 +1901,8 @@ static int rknpu_probe(struct platform_device *pdev) INIT_DEFERRABLE_WORK(&rknpu_dev->power_off_work, rknpu_power_off_delay_work); - if (IS_ENABLED(CONFIG_ROCKCHIP_RKNPU_SRAM) && rknpu_dev->iommu_en) { + if (IS_ENABLED(CONFIG_NO_GKI) && + IS_ENABLED(CONFIG_ROCKCHIP_RKNPU_SRAM) && rknpu_dev->iommu_en) { if (!rknpu_find_sram_resource(rknpu_dev)) { ret = rknpu_mm_create(rknpu_dev->sram_size, PAGE_SIZE, &rknpu_dev->sram_mm); @@ -1830,6 +1913,10 @@ static int rknpu_probe(struct platform_device *pdev) } } + if (IS_ENABLED(CONFIG_NO_GKI) && rknpu_dev->iommu_en && + rknpu_dev->config->nbuf_size > 0) + rknpu_find_nbuf_resource(rknpu_dev); + rknpu_power_off(rknpu_dev); atomic_set(&rknpu_dev->power_refcount, 0); atomic_set(&rknpu_dev->cmdline_power_refcount, 0); diff --git a/drivers/rknpu/rknpu_gem.c b/drivers/rknpu/rknpu_gem.c index 38d3197fad76..f97be2b58e9b 100644 --- a/drivers/rknpu/rknpu_gem.c +++ b/drivers/rknpu/rknpu_gem.c @@ -25,6 +25,7 @@ #include "rknpu_drv.h" #include "rknpu_ioctl.h" #include "rknpu_gem.h" +#include "rknpu_iommu.h" #define RKNPU_GEM_ALLOC_FROM_PAGES 1 @@ -379,7 +380,8 @@ static void rknpu_gem_release(struct rknpu_gem_object *rknpu_obj) kfree(rknpu_obj); } -static int rknpu_gem_alloc_buf_with_sram(struct rknpu_gem_object *rknpu_obj) +static int rknpu_gem_alloc_buf_with_cache(struct rknpu_gem_object *rknpu_obj, + enum rknpu_cache_type cache_type) { struct drm_device *drm = rknpu_obj->base.dev; struct rknpu_device *rknpu_dev = drm->dev_private; @@ -392,8 +394,28 @@ static int rknpu_gem_alloc_buf_with_sram(struct rknpu_gem_object *rknpu_obj) unsigned long offset = 0; int i = 0; int ret = -EINVAL; + phys_addr_t cache_start = 0; + unsigned long cache_offset = 0; + unsigned long cache_size = 0; - /* iova map to sram */ + switch (cache_type) { + case RKNPU_CACHE_SRAM: + cache_start = rknpu_dev->sram_start; + cache_offset = rknpu_obj->sram_obj->range_start * + rknpu_dev->sram_mm->chunk_size; + cache_size = rknpu_obj->sram_size; + break; + case RKNPU_CACHE_NBUF: + cache_start = rknpu_dev->nbuf_start; + cache_offset = 0; + cache_size = rknpu_obj->nbuf_size; + break; + default: + LOG_ERROR("Unknown rknpu_cache_type: %d", cache_type); + return -EINVAL; + } + + /* iova map to cache */ domain = iommu_get_domain_for_dev(rknpu_dev->dev); if (!domain) { LOG_ERROR("failed to get iommu domain!"); @@ -402,8 +424,7 @@ static int rknpu_gem_alloc_buf_with_sram(struct rknpu_gem_object *rknpu_obj) cookie = domain->iova_cookie; iovad = &cookie->iovad; - rknpu_obj->iova_size = - iova_align(iovad, rknpu_obj->sram_size + rknpu_obj->size); + rknpu_obj->iova_size = iova_align(iovad, cache_size + rknpu_obj->size); rknpu_obj->iova_start = rknpu_iommu_dma_alloc_iova( domain, rknpu_obj->iova_size, dma_get_mask(drm->dev), drm->dev); if (!rknpu_obj->iova_start) { @@ -415,20 +436,20 @@ static int rknpu_gem_alloc_buf_with_sram(struct rknpu_gem_object *rknpu_obj) &rknpu_obj->iova_start, rknpu_obj->iova_size); /* - * Overview SRAM + DDR map to IOVA + * Overview cache + DDR map to IOVA * -------- - * sram_size: rknpu_obj->sram_size - * - allocate from SRAM, this size value has been page-aligned + * cache_size: + * - allocate from CACHE, this size value has been page-aligned * size: rknpu_obj->size * - allocate from DDR pages, this size value has been page-aligned * iova_size: rknpu_obj->iova_size - * - from iova_align(sram_size + size) - * - it may be larger than the (sram_size + size), and the larger part is not mapped + * - from iova_align(cache_size + size) + * - it may be larger than the (cache_size + size), and the larger part is not mapped * -------- * - * |<- sram_size ->| |<- - - - size - - - ->| + * |<- cache_size ->| |<- - - - size - - - ->| * +---------------+ +----------------------+ - * | SRAM | | DDR | + * | CACHE | | DDR | * +---------------+ +----------------------+ * | | * | V | V | @@ -438,20 +459,18 @@ static int rknpu_gem_alloc_buf_with_sram(struct rknpu_gem_object *rknpu_obj) * |<- - - - - - - iova_size - - - - - - ->| * */ - offset = rknpu_obj->sram_obj->range_start * - rknpu_dev->sram_mm->chunk_size; ret = iommu_map(domain, rknpu_obj->iova_start, - rknpu_dev->sram_start + offset, rknpu_obj->sram_size, + cache_start + cache_offset, cache_size, IOMMU_READ | IOMMU_WRITE); if (ret) { - LOG_ERROR("sram iommu_map error: %d\n", ret); + LOG_ERROR("cache iommu_map error: %d\n", ret); goto free_iova; } rknpu_obj->dma_addr = rknpu_obj->iova_start; if (rknpu_obj->size == 0) { - LOG_INFO("allocate sram size: %lu\n", rknpu_obj->sram_size); + LOG_INFO("allocate cache size: %lu\n", cache_size); return 0; } @@ -459,7 +478,7 @@ static int rknpu_gem_alloc_buf_with_sram(struct rknpu_gem_object *rknpu_obj) if (IS_ERR(rknpu_obj->pages)) { ret = PTR_ERR(rknpu_obj->pages); LOG_ERROR("failed to get pages: %d\n", ret); - goto sram_unmap; + goto cache_unmap; } rknpu_obj->num_pages = rknpu_obj->size >> PAGE_SHIFT; @@ -478,7 +497,7 @@ static int rknpu_gem_alloc_buf_with_sram(struct rknpu_gem_object *rknpu_obj) } length = rknpu_obj->size; - offset = rknpu_obj->iova_start + rknpu_obj->sram_size; + offset = rknpu_obj->iova_start + cache_size; for_each_sg(rknpu_obj->sgt->sgl, s, rknpu_obj->sgt->nents, i) { size = (length < s->length) ? length : s->length; @@ -497,13 +516,13 @@ static int rknpu_gem_alloc_buf_with_sram(struct rknpu_gem_object *rknpu_obj) break; } - LOG_INFO("allocate size: %lu with sram size: %lu\n", rknpu_obj->size, - rknpu_obj->sram_size); + LOG_INFO("allocate size: %lu with cache size: %lu\n", rknpu_obj->size, + cache_size); return 0; sgl_unmap: - iommu_unmap(domain, rknpu_obj->iova_start + rknpu_obj->sram_size, + iommu_unmap(domain, rknpu_obj->iova_start + cache_size, rknpu_obj->size - length); sg_free_table(rknpu_obj->sgt); kfree(rknpu_obj->sgt); @@ -511,8 +530,8 @@ static int rknpu_gem_alloc_buf_with_sram(struct rknpu_gem_object *rknpu_obj) put_pages: drm_gem_put_pages(&rknpu_obj->base, rknpu_obj->pages, false, false); -sram_unmap: - iommu_unmap(domain, rknpu_obj->iova_start, rknpu_obj->sram_size); +cache_unmap: + iommu_unmap(domain, rknpu_obj->iova_start, cache_size); free_iova: rknpu_iommu_dma_free_iova(domain->iova_cookie, rknpu_obj->iova_start, @@ -521,20 +540,31 @@ static int rknpu_gem_alloc_buf_with_sram(struct rknpu_gem_object *rknpu_obj) return ret; } -static void rknpu_gem_free_buf_with_sram(struct rknpu_gem_object *rknpu_obj) +static void rknpu_gem_free_buf_with_cache(struct rknpu_gem_object *rknpu_obj, + enum rknpu_cache_type cache_type) { struct drm_device *drm = rknpu_obj->base.dev; struct rknpu_device *rknpu_dev = drm->dev_private; struct iommu_domain *domain = NULL; + unsigned long cache_size = 0; + + switch (cache_type) { + case RKNPU_CACHE_SRAM: + cache_size = rknpu_obj->sram_size; + break; + case RKNPU_CACHE_NBUF: + cache_size = rknpu_obj->nbuf_size; + break; + default: + LOG_ERROR("Unknown rknpu_cache_type: %d", cache_type); + return; + } domain = iommu_get_domain_for_dev(rknpu_dev->dev); if (domain) { - iommu_unmap(domain, rknpu_obj->iova_start, - rknpu_obj->sram_size); + iommu_unmap(domain, rknpu_obj->iova_start, cache_size); if (rknpu_obj->size > 0) - iommu_unmap(domain, - rknpu_obj->iova_start + - rknpu_obj->sram_size, + iommu_unmap(domain, rknpu_obj->iova_start + cache_size, rknpu_obj->size); rknpu_iommu_dma_free_iova(domain->iova_cookie, rknpu_obj->iova_start, @@ -617,11 +647,37 @@ struct rknpu_gem_object *rknpu_gem_object_create(struct drm_device *drm, if (real_sram_size > 0) { rknpu_obj->sram_size = real_sram_size; - ret = rknpu_gem_alloc_buf_with_sram(rknpu_obj); + ret = rknpu_gem_alloc_buf_with_cache(rknpu_obj, + RKNPU_CACHE_SRAM); if (ret < 0) goto mm_free; remain_ddr_size = 0; } + } else if (IS_ENABLED(CONFIG_NO_GKI) && + (flags & RKNPU_MEM_TRY_ALLOC_NBUF) && + rknpu_dev->nbuf_size > 0) { + size_t nbuf_size = 0; + + rknpu_obj = rknpu_gem_init(drm, remain_ddr_size); + if (IS_ERR(rknpu_obj)) + return rknpu_obj; + + nbuf_size = remain_ddr_size <= rknpu_dev->nbuf_size ? + remain_ddr_size : + rknpu_dev->nbuf_size; + + /* set memory type and cache attribute from user side. */ + rknpu_obj->flags = flags; + + if (nbuf_size > 0) { + rknpu_obj->nbuf_size = nbuf_size; + + ret = rknpu_gem_alloc_buf_with_cache(rknpu_obj, + RKNPU_CACHE_NBUF); + if (ret < 0) + goto gem_release; + remain_ddr_size = 0; + } } if (remain_ddr_size > 0) { @@ -639,10 +695,11 @@ struct rknpu_gem_object *rknpu_gem_object_create(struct drm_device *drm, if (rknpu_obj) LOG_DEBUG( - "created dma addr: %pad, cookie: %p, ddr size: %lu, sram size: %lu, attrs: %#lx, flags: %#x\n", + "created dma addr: %pad, cookie: %p, ddr size: %lu, sram size: %lu, nbuf size: %lu, attrs: %#lx, flags: %#x\n", &rknpu_obj->dma_addr, rknpu_obj->cookie, rknpu_obj->size, rknpu_obj->sram_size, - rknpu_obj->dma_attrs, rknpu_obj->flags); + rknpu_obj->nbuf_size, rknpu_obj->dma_attrs, + rknpu_obj->flags); return rknpu_obj; @@ -683,7 +740,12 @@ void rknpu_gem_object_destroy(struct rknpu_gem_object *rknpu_obj) if (rknpu_obj->sram_obj != NULL) rknpu_mm_free(rknpu_dev->sram_mm, rknpu_obj->sram_obj); - rknpu_gem_free_buf_with_sram(rknpu_obj); + rknpu_gem_free_buf_with_cache(rknpu_obj, + RKNPU_CACHE_SRAM); + } else if (IS_ENABLED(CONFIG_NO_GKI) && + rknpu_obj->nbuf_size > 0) { + rknpu_gem_free_buf_with_cache(rknpu_obj, + RKNPU_CACHE_NBUF); } else { rknpu_gem_free_buf(rknpu_obj); } @@ -808,6 +870,75 @@ static int rknpu_gem_mmap_pages(struct rknpu_gem_object *rknpu_obj, } #endif +static int rknpu_gem_mmap_cache(struct rknpu_gem_object *rknpu_obj, + struct vm_area_struct *vma, + enum rknpu_cache_type cache_type) +{ + struct drm_device *drm = rknpu_obj->base.dev; +#if RKNPU_GEM_ALLOC_FROM_PAGES + struct rknpu_device *rknpu_dev = drm->dev_private; +#endif + unsigned long vm_size = 0; + int ret = -EINVAL; + unsigned long offset = 0; + unsigned long num_pages = 0; + int i = 0; + phys_addr_t cache_start = 0; + unsigned long cache_offset = 0; + unsigned long cache_size = 0; + + switch (cache_type) { + case RKNPU_CACHE_SRAM: + cache_start = rknpu_dev->sram_start; + cache_offset = rknpu_obj->sram_obj->range_start * + rknpu_dev->sram_mm->chunk_size; + cache_size = rknpu_obj->sram_size; + break; + case RKNPU_CACHE_NBUF: + cache_start = rknpu_dev->nbuf_start; + cache_offset = 0; + cache_size = rknpu_obj->nbuf_size; + break; + default: + LOG_ERROR("Unknown rknpu_cache_type: %d", cache_type); + return -EINVAL; + } + + vma->vm_flags |= VM_MIXEDMAP; + + vm_size = vma->vm_end - vma->vm_start; + + /* + * Convert a physical address in a cache area to a page frame number (PFN), + * and store the resulting PFN in the vm_pgoff field of the given VMA. + * + * NOTE: This conversion carries a risk because the resulting PFN is not a true + * page frame number and may not be valid or usable in all contexts. + */ + vma->vm_pgoff = __phys_to_pfn(cache_start + cache_offset); + + ret = remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff, cache_size, + vma->vm_page_prot); + if (ret) + return -EAGAIN; + + if (rknpu_obj->size == 0) + return 0; + + offset = cache_size; + + num_pages = (vm_size - cache_size) / PAGE_SIZE; + for (i = 0; i < num_pages; ++i) { + ret = vm_insert_page(vma, vma->vm_start + offset, + rknpu_obj->pages[i]); + if (ret < 0) + return ret; + offset += PAGE_SIZE; + } + + return 0; +} + static int rknpu_gem_mmap_buffer(struct rknpu_gem_object *rknpu_obj, struct vm_area_struct *vma) { @@ -832,38 +963,10 @@ static int rknpu_gem_mmap_buffer(struct rknpu_gem_object *rknpu_obj, if (vm_size > rknpu_obj->size) return -EINVAL; - if (rknpu_obj->sram_size > 0) { - unsigned long offset = 0; - unsigned long num_pages = 0; - int i = 0; - - vma->vm_flags |= VM_MIXEDMAP; - - offset = rknpu_obj->sram_obj->range_start * - rknpu_dev->sram_mm->chunk_size; - vma->vm_pgoff = __phys_to_pfn(rknpu_dev->sram_start + offset); - - ret = remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff, - rknpu_obj->sram_size, vma->vm_page_prot); - if (ret) - return -EAGAIN; - - if (rknpu_obj->size == 0) - return 0; - - offset = rknpu_obj->sram_size; - - num_pages = (vm_size - rknpu_obj->sram_size) / PAGE_SIZE; - for (i = 0; i < num_pages; ++i) { - ret = vm_insert_page(vma, vma->vm_start + offset, - rknpu_obj->pages[i]); - if (ret < 0) - return ret; - offset += PAGE_SIZE; - } - - return 0; - } + if (rknpu_obj->sram_size > 0) + return rknpu_gem_mmap_cache(rknpu_obj, vma, RKNPU_CACHE_SRAM); + else if (rknpu_obj->nbuf_size > 0) + return rknpu_gem_mmap_cache(rknpu_obj, vma, RKNPU_CACHE_NBUF); #if RKNPU_GEM_ALLOC_FROM_PAGES if ((rknpu_obj->flags & RKNPU_MEM_NON_CONTIGUOUS) && @@ -1199,6 +1302,55 @@ int rknpu_gem_prime_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma) return rknpu_gem_mmap_obj(obj, vma); } +static int rknpu_cache_sync(struct rknpu_gem_object *rknpu_obj, + unsigned long *length, unsigned long *offset, + enum rknpu_cache_type cache_type) +{ + struct drm_gem_object *obj = &rknpu_obj->base; + struct rknpu_device *rknpu_dev = obj->dev->dev_private; + void __iomem *cache_base_io = NULL; + unsigned long cache_offset = 0; + unsigned long cache_size = 0; + + switch (cache_type) { + case RKNPU_CACHE_SRAM: + cache_base_io = rknpu_dev->sram_base_io; + cache_offset = rknpu_obj->sram_obj->range_start * + rknpu_dev->sram_mm->chunk_size; + cache_size = rknpu_obj->sram_size; + break; + case RKNPU_CACHE_NBUF: + cache_base_io = rknpu_dev->nbuf_base_io; + cache_offset = 0; + cache_size = rknpu_obj->nbuf_size; + break; + default: + LOG_ERROR("Unknown rknpu_cache_type: %d", cache_type); + return -EINVAL; + } + + if ((*offset + *length) <= cache_size) { + __dma_map_area(cache_base_io + *offset + cache_offset, *length, + DMA_TO_DEVICE); + __dma_unmap_area(cache_base_io + *offset + cache_offset, + *length, DMA_FROM_DEVICE); + *length = 0; + *offset = 0; + } else if (*offset >= cache_size) { + *offset -= cache_size; + } else { + unsigned long cache_length = cache_size - *offset; + + __dma_map_area(cache_base_io + *offset + cache_offset, + cache_length, DMA_TO_DEVICE); + __dma_unmap_area(cache_base_io + *offset + cache_offset, + cache_length, DMA_FROM_DEVICE); + *length -= cache_length; + *offset = 0; + } + return 0; +} + int rknpu_gem_sync_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv) { @@ -1233,36 +1385,15 @@ int rknpu_gem_sync_ioctl(struct drm_device *dev, void *data, length = args->size; offset = args->offset; - if (IS_ENABLED(CONFIG_ROCKCHIP_RKNPU_SRAM) && + if (IS_ENABLED(CONFIG_NO_GKI) && + IS_ENABLED(CONFIG_ROCKCHIP_RKNPU_SRAM) && rknpu_obj->sram_size > 0) { - struct drm_gem_object *obj = &rknpu_obj->base; - struct rknpu_device *rknpu_dev = obj->dev->dev_private; - unsigned long sram_offset = - rknpu_obj->sram_obj->range_start * - rknpu_dev->sram_mm->chunk_size; - if ((offset + length) <= rknpu_obj->sram_size) { - __dma_map_area(rknpu_dev->sram_base_io + - offset + sram_offset, - length, DMA_TO_DEVICE); - __dma_unmap_area(rknpu_dev->sram_base_io + - offset + sram_offset, - length, DMA_FROM_DEVICE); - length = 0; - offset = 0; - } else if (offset >= rknpu_obj->sram_size) { - offset -= rknpu_obj->sram_size; - } else { - unsigned long sram_length = - rknpu_obj->sram_size - offset; - __dma_map_area(rknpu_dev->sram_base_io + - offset + sram_offset, - sram_length, DMA_TO_DEVICE); - __dma_unmap_area(rknpu_dev->sram_base_io + - offset + sram_offset, - sram_length, DMA_FROM_DEVICE); - length -= sram_length; - offset = 0; - } + rknpu_cache_sync(rknpu_obj, &length, &offset, + RKNPU_CACHE_SRAM); + } else if (IS_ENABLED(CONFIG_NO_GKI) && + rknpu_obj->nbuf_size > 0) { + rknpu_cache_sync(rknpu_obj, &length, &offset, + RKNPU_CACHE_NBUF); } for_each_sg(rknpu_obj->sgt->sgl, sg, rknpu_obj->sgt->nents, diff --git a/drivers/rknpu/rknpu_iommu.c b/drivers/rknpu/rknpu_iommu.c new file mode 100644 index 000000000000..39cc8f8be7c2 --- /dev/null +++ b/drivers/rknpu/rknpu_iommu.c @@ -0,0 +1,58 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) Rockchip Electronics Co.Ltd + * Author: Felix Zeng + */ + +#include "rknpu_iommu.h" + +dma_addr_t rknpu_iommu_dma_alloc_iova(struct iommu_domain *domain, size_t size, + u64 dma_limit, struct device *dev) +{ + struct rknpu_iommu_dma_cookie *cookie = (void *)domain->iova_cookie; + struct iova_domain *iovad = &cookie->iovad; + unsigned long shift, iova_len, iova = 0; +#if (KERNEL_VERSION(5, 4, 0) > LINUX_VERSION_CODE) + dma_addr_t limit; +#endif + + shift = iova_shift(iovad); + iova_len = size >> shift; + /* + * Freeing non-power-of-two-sized allocations back into the IOVA caches + * will come back to bite us badly, so we have to waste a bit of space + * rounding up anything cacheable to make sure that can't happen. The + * order of the unadjusted size will still match upon freeing. + */ + if (iova_len < (1 << (IOVA_RANGE_CACHE_MAX_SIZE - 1))) + iova_len = roundup_pow_of_two(iova_len); + +#if (KERNEL_VERSION(5, 10, 0) <= LINUX_VERSION_CODE) + dma_limit = min_not_zero(dma_limit, dev->bus_dma_limit); +#else + if (dev->bus_dma_mask) + dma_limit &= dev->bus_dma_mask; +#endif + + if (domain->geometry.force_aperture) + dma_limit = + min_t(u64, dma_limit, domain->geometry.aperture_end); + +#if (KERNEL_VERSION(5, 4, 0) <= LINUX_VERSION_CODE) + iova = alloc_iova_fast(iovad, iova_len, dma_limit >> shift, true); +#else + limit = min_t(dma_addr_t, dma_limit >> shift, iovad->end_pfn); + + iova = alloc_iova_fast(iovad, iova_len, limit, true); +#endif + + return (dma_addr_t)iova << shift; +} + +void rknpu_iommu_dma_free_iova(struct rknpu_iommu_dma_cookie *cookie, + dma_addr_t iova, size_t size) +{ + struct iova_domain *iovad = &cookie->iovad; + + free_iova_fast(iovad, iova_pfn(iovad, iova), size >> iova_shift(iovad)); +} diff --git a/drivers/rknpu/rknpu_job.c b/drivers/rknpu/rknpu_job.c index 6be7824a90ce..6a167c4a72c7 100644 --- a/drivers/rknpu/rknpu_job.c +++ b/drivers/rknpu/rknpu_job.c @@ -124,8 +124,8 @@ static inline struct rknpu_job *rknpu_job_alloc(struct rknpu_device *rknpu_dev, job->use_core_num = (args->core_mask & RKNPU_CORE0_MASK) + ((args->core_mask & RKNPU_CORE1_MASK) >> 1) + ((args->core_mask & RKNPU_CORE2_MASK) >> 2); - job->run_count = job->use_core_num; - job->interrupt_count = job->use_core_num; + atomic_set(&job->run_count, job->use_core_num); + atomic_set(&job->interrupt_count, job->use_core_num); #ifdef CONFIG_ROCKCHIP_RKNPU_DRM_GEM task_obj = (struct rknpu_gem_object *)(uintptr_t)args->task_obj_addr; if (task_obj) @@ -358,11 +358,10 @@ static void rknpu_job_next(struct rknpu_device *rknpu_dev, int core_index) list_del_init(&job->head[core_index]); job->in_queue[core_index] = false; subcore_data->job = job; - job->run_count--; job->hw_recoder_time = ktime_get(); spin_unlock_irqrestore(&rknpu_dev->irq_lock, flags); - if (job->run_count == 0) { + if (atomic_dec_and_test(&job->run_count)) { if (job->args->core_mask & RKNPU_CORE0_MASK) job->ret = rknpu_job_commit(job, 0); if (job->args->core_mask & RKNPU_CORE1_MASK) @@ -384,12 +383,11 @@ static void rknpu_job_done(struct rknpu_job *job, int ret, int core_index) spin_lock_irqsave(&rknpu_dev->irq_lock, flags); subcore_data->job = NULL; subcore_data->task_num -= rknn_get_task_number(job, core_index); - job->interrupt_count--; subcore_data->timer.busy_time += ktime_us_delta(now, job->hw_recoder_time); spin_unlock_irqrestore(&rknpu_dev->irq_lock, flags); - if (job->interrupt_count == 0) { + if (atomic_dec_and_test(&job->interrupt_count)) { int use_core_num = job->use_core_num; job->flags |= RKNPU_JOB_DONE; @@ -449,8 +447,8 @@ static void rknpu_job_schedule(struct rknpu_job *job) job->args->core_mask = rknpu_core_mask(core_index); job->use_core_num = 1; - job->interrupt_count = 1; - job->run_count = 1; + atomic_set(&job->run_count, job->use_core_num); + atomic_set(&job->interrupt_count, job->use_core_num); } spin_lock_irqsave(&rknpu_dev->irq_lock, flags); @@ -474,8 +472,6 @@ static void rknpu_job_abort(struct rknpu_job *job) { struct rknpu_device *rknpu_dev = job->rknpu_dev; struct rknpu_subcore_data *subcore_data = NULL; - int core_index = rknpu_core_index(job->args->core_mask); - void __iomem *rknpu_core_base = rknpu_dev->base[core_index]; unsigned long flags; int i = 0; @@ -495,14 +491,24 @@ static void rknpu_job_abort(struct rknpu_job *job) } if (job->ret == -ETIMEDOUT) { - LOG_ERROR( - "job timeout, flags: %#x, irq status: %#x, raw status: %#x, require mask: %#x, task counter: %#x, elapsed time: %lldus\n", - job->flags, REG_READ(RKNPU_OFFSET_INT_STATUS), - REG_READ(RKNPU_OFFSET_INT_RAW_STATUS), - job->int_mask[core_index], - (REG_READ(rknpu_dev->config->pc_task_status_offset) & - rknpu_dev->config->pc_task_number_mask), - ktime_to_us(ktime_sub(ktime_get(), job->timestamp))); + LOG_ERROR("job timeout, flags: %#x:\n", job->flags); + for (i = 0; i < rknpu_dev->config->num_irqs; i++) { + if (job->args->core_mask & rknpu_core_mask(i)) { + void __iomem *rknpu_core_base = + rknpu_dev->base[i]; + LOG_ERROR( + "\tcore %d irq status: %#x, raw status: %#x, require mask: %#x, task counter: %#x, elapsed time: %lldus\n", + i, REG_READ(RKNPU_OFFSET_INT_STATUS), + REG_READ(RKNPU_OFFSET_INT_RAW_STATUS), + job->int_mask[i], + (REG_READ( + rknpu_dev->config + ->pc_task_status_offset) & + rknpu_dev->config->pc_task_number_mask), + ktime_to_us(ktime_sub(ktime_get(), + job->timestamp))); + } + } rknpu_soft_reset(rknpu_dev); } else { LOG_ERROR( diff --git a/drivers/rknpu/rknpu_mem.c b/drivers/rknpu/rknpu_mem.c index 4fd686687992..ff7e92d0174e 100644 --- a/drivers/rknpu/rknpu_mem.c +++ b/drivers/rknpu/rknpu_mem.c @@ -15,7 +15,8 @@ #include "rknpu_ioctl.h" #include "rknpu_mem.h" -int rknpu_mem_create_ioctl(struct rknpu_device *rknpu_dev, unsigned long data) +int rknpu_mem_create_ioctl(struct rknpu_device *rknpu_dev, unsigned long data, + struct file *file) { struct rknpu_mem_create args; int ret = -EINVAL; @@ -27,6 +28,7 @@ int rknpu_mem_create_ioctl(struct rknpu_device *rknpu_dev, unsigned long data) struct page **pages; struct page *page; struct rknpu_mem_object *rknpu_obj = NULL; + struct rknpu_session *session = NULL; int i, fd; unsigned int length, page_count; @@ -65,6 +67,8 @@ int rknpu_mem_create_ioctl(struct rknpu_device *rknpu_dev, unsigned long data) O_CLOEXEC | O_RDWR, 0x0, dev_name(rknpu_dev->dev)); if (IS_ERR(dmabuf)) { + LOG_ERROR("dmabuf alloc failed, args.size = %llu\n", + args.size); ret = PTR_ERR(dmabuf); goto err_free_obj; } @@ -74,6 +78,7 @@ int rknpu_mem_create_ioctl(struct rknpu_device *rknpu_dev, unsigned long data) fd = dma_buf_fd(dmabuf, O_CLOEXEC | O_RDWR); if (fd < 0) { + LOG_ERROR("dmabuf fd get failed\n"); ret = -EFAULT; goto err_free_dma_buf; } @@ -81,12 +86,14 @@ int rknpu_mem_create_ioctl(struct rknpu_device *rknpu_dev, unsigned long data) attachment = dma_buf_attach(dmabuf, rknpu_dev->dev); if (IS_ERR(attachment)) { + LOG_ERROR("dma_buf_attach failed\n"); ret = PTR_ERR(attachment); goto err_free_dma_buf; } table = dma_buf_map_attachment(attachment, DMA_BIDIRECTIONAL); if (IS_ERR(table)) { + LOG_ERROR("dma_buf_attach failed\n"); dma_buf_detach(dmabuf, attachment); ret = PTR_ERR(table); goto err_free_dma_buf; @@ -103,6 +110,7 @@ int rknpu_mem_create_ioctl(struct rknpu_device *rknpu_dev, unsigned long data) page_count = length >> PAGE_SHIFT; pages = kmalloc_array(page_count, sizeof(struct page), GFP_KERNEL); if (!pages) { + LOG_ERROR("alloc pages failed\n"); ret = -ENOMEM; goto err_detach_dma_buf; } @@ -112,6 +120,7 @@ int rknpu_mem_create_ioctl(struct rknpu_device *rknpu_dev, unsigned long data) rknpu_obj->kv_addr = vmap(pages, page_count, VM_MAP, PAGE_KERNEL); if (!rknpu_obj->kv_addr) { + LOG_ERROR("vmap pages addr failed\n"); ret = -ENOMEM; goto err_free_pages; } @@ -141,6 +150,18 @@ int rknpu_mem_create_ioctl(struct rknpu_device *rknpu_dev, unsigned long data) dma_buf_unmap_attachment(attachment, table, DMA_BIDIRECTIONAL); dma_buf_detach(dmabuf, attachment); + spin_lock(&rknpu_dev->lock); + + session = file->private_data; + if (!session) { + spin_unlock(&rknpu_dev->lock); + ret = -EFAULT; + goto err_unmap_kv_addr; + } + list_add_tail(&rknpu_obj->head, &session->list); + + spin_unlock(&rknpu_dev->lock); + return 0; err_unmap_kv_addr: @@ -166,11 +187,12 @@ int rknpu_mem_create_ioctl(struct rknpu_device *rknpu_dev, unsigned long data) return ret; } -int rknpu_mem_destroy_ioctl(struct rknpu_device *rknpu_dev, unsigned long data) +int rknpu_mem_destroy_ioctl(struct rknpu_device *rknpu_dev, unsigned long data, + struct file *file) { - struct rknpu_mem_object *rknpu_obj = NULL; + struct rknpu_mem_object *rknpu_obj, *entry, *q; + struct rknpu_session *session = NULL; struct rknpu_mem_destroy args; - struct dma_buf *dmabuf; int ret = -EFAULT; if (unlikely(copy_from_user(&args, (struct rknpu_mem_destroy *)data, @@ -188,19 +210,35 @@ int rknpu_mem_destroy_ioctl(struct rknpu_device *rknpu_dev, unsigned long data) } rknpu_obj = (struct rknpu_mem_object *)(uintptr_t)args.obj_addr; - dmabuf = rknpu_obj->dmabuf; LOG_DEBUG( "free args.handle: %d, rknpu_obj: %#llx, rknpu_obj->dma_addr: %#llx\n", args.handle, (__u64)(uintptr_t)rknpu_obj, (__u64)rknpu_obj->dma_addr); - vunmap(rknpu_obj->kv_addr); - rknpu_obj->kv_addr = NULL; + spin_lock(&rknpu_dev->lock); + session = file->private_data; + if (!session) { + spin_unlock(&rknpu_dev->lock); + ret = -EFAULT; + return ret; + } + list_for_each_entry_safe(entry, q, &session->list, head) { + if (entry == rknpu_obj) { + list_del(&entry->head); + break; + } + } + spin_unlock(&rknpu_dev->lock); - if (!rknpu_obj->owner) - dma_buf_put(dmabuf); + if (rknpu_obj == entry) { + vunmap(rknpu_obj->kv_addr); + rknpu_obj->kv_addr = NULL; - kfree(rknpu_obj); + if (!rknpu_obj->owner) + dma_buf_put(rknpu_obj->dmabuf); + + kfree(rknpu_obj); + } return 0; } @@ -209,11 +247,9 @@ int rknpu_mem_destroy_ioctl(struct rknpu_device *rknpu_dev, unsigned long data) * begin cpu access => for_cpu = true * end cpu access => for_cpu = false */ -static void __maybe_unused rknpu_dma_buf_sync(struct rknpu_device *rknpu_dev, - struct rknpu_mem_object *rknpu_obj, - u32 offset, u32 length, - enum dma_data_direction dir, - bool for_cpu) +static void __maybe_unused rknpu_dma_buf_sync( + struct rknpu_device *rknpu_dev, struct rknpu_mem_object *rknpu_obj, + u32 offset, u32 length, enum dma_data_direction dir, bool for_cpu) { struct device *dev = rknpu_dev->dev; struct sg_table *sgt = rknpu_obj->sgt; @@ -278,14 +314,12 @@ int rknpu_mem_sync_ioctl(struct rknpu_device *rknpu_dev, unsigned long data) #ifndef CONFIG_DMABUF_PARTIAL if (args.flags & RKNPU_MEM_SYNC_TO_DEVICE) { - rknpu_dma_buf_sync(rknpu_dev, rknpu_obj, - args.offset, args.size, DMA_TO_DEVICE, - false); + rknpu_dma_buf_sync(rknpu_dev, rknpu_obj, args.offset, args.size, + DMA_TO_DEVICE, false); } if (args.flags & RKNPU_MEM_SYNC_FROM_DEVICE) { - rknpu_dma_buf_sync(rknpu_dev, rknpu_obj, - args.offset, args.size, DMA_FROM_DEVICE, - true); + rknpu_dma_buf_sync(rknpu_dev, rknpu_obj, args.offset, args.size, + DMA_FROM_DEVICE, true); } #else if (args.flags & RKNPU_MEM_SYNC_TO_DEVICE) { diff --git a/drivers/rknpu/rknpu_mm.c b/drivers/rknpu/rknpu_mm.c index 9a13c3e256a4..a21bb6ded182 100644 --- a/drivers/rknpu/rknpu_mm.c +++ b/drivers/rknpu/rknpu_mm.c @@ -236,54 +236,3 @@ int rknpu_mm_dump(struct seq_file *m, void *data) return 0; } - -dma_addr_t rknpu_iommu_dma_alloc_iova(struct iommu_domain *domain, size_t size, - u64 dma_limit, struct device *dev) -{ - struct rknpu_iommu_dma_cookie *cookie = domain->iova_cookie; - struct iova_domain *iovad = &cookie->iovad; - unsigned long shift, iova_len, iova = 0; -#if (KERNEL_VERSION(5, 4, 0) > LINUX_VERSION_CODE) - dma_addr_t limit; -#endif - - shift = iova_shift(iovad); - iova_len = size >> shift; - /* - * Freeing non-power-of-two-sized allocations back into the IOVA caches - * will come back to bite us badly, so we have to waste a bit of space - * rounding up anything cacheable to make sure that can't happen. The - * order of the unadjusted size will still match upon freeing. - */ - if (iova_len < (1 << (IOVA_RANGE_CACHE_MAX_SIZE - 1))) - iova_len = roundup_pow_of_two(iova_len); - -#if (KERNEL_VERSION(5, 10, 0) <= LINUX_VERSION_CODE) - dma_limit = min_not_zero(dma_limit, dev->bus_dma_limit); -#else - if (dev->bus_dma_mask) - dma_limit &= dev->bus_dma_mask; -#endif - - if (domain->geometry.force_aperture) - dma_limit = - min_t(u64, dma_limit, domain->geometry.aperture_end); - -#if (KERNEL_VERSION(5, 4, 0) <= LINUX_VERSION_CODE) - iova = alloc_iova_fast(iovad, iova_len, dma_limit >> shift, true); -#else - limit = min_t(dma_addr_t, dma_limit >> shift, iovad->end_pfn); - - iova = alloc_iova_fast(iovad, iova_len, limit, true); -#endif - - return (dma_addr_t)iova << shift; -} - -void rknpu_iommu_dma_free_iova(struct rknpu_iommu_dma_cookie *cookie, - dma_addr_t iova, size_t size) -{ - struct iova_domain *iovad = &cookie->iovad; - - free_iova_fast(iovad, iova_pfn(iovad, iova), size >> iova_shift(iovad)); -}