video: rockchip: vpu: add iova remap when page fault

fix bugs:
1. 3328 play H.264 and H.265 will crash

Change-Id: I6f7b85b957a3838c5e14e12e48bcf8668a28bd22
Signed-off-by: Jung Zhao <jung.zhao@rock-chips.com>
This commit is contained in:
Jung Zhao 2017-12-22 09:41:12 +08:00 committed by Tao Huang
parent e2c56b719b
commit d52672c955
5 changed files with 316 additions and 93 deletions

View File

@ -40,6 +40,8 @@ struct vcodec_drm_buffer {
dma_addr_t iova;
unsigned long phys;
};
/* this is used for page fault */
unsigned long iova_out;
void *cpu_addr;
unsigned long size;
int index;
@ -134,6 +136,71 @@ static void vcodec_drm_sgt_unmap_kernel(struct vcodec_drm_buffer *drm_buffer)
kfree(drm_buffer->pages);
}
static int vcodec_finalise_sg(struct scatterlist *sg,
int nents,
dma_addr_t dma_addr)
{
struct scatterlist *s, *cur = sg;
unsigned long seg_mask = DMA_BIT_MASK(32);
unsigned int cur_len = 0, max_len = DMA_BIT_MASK(32);
int i, count = 0;
for_each_sg(sg, s, nents, i) {
/* Restore this segment's original unaligned fields first */
unsigned int s_iova_off = sg_dma_address(s);
unsigned int s_length = sg_dma_len(s);
unsigned int s_iova_len = s->length;
s->offset += s_iova_off;
s->length = s_length;
sg_dma_address(s) = DMA_ERROR_CODE;
sg_dma_len(s) = 0;
/*
* Now fill in the real DMA data. If...
* - there is a valid output segment to append to
* - and this segment starts on an IOVA page boundary
* - but doesn't fall at a segment boundary
* - and wouldn't make the resulting output segment too long
*/
if (cur_len && !s_iova_off && (dma_addr & seg_mask) &&
(cur_len + s_length <= max_len)) {
/* ...then concatenate it with the previous one */
cur_len += s_length;
} else {
/* Otherwise start the next output segment */
if (i > 0)
cur = sg_next(cur);
cur_len = s_length;
count++;
sg_dma_address(cur) = dma_addr + s_iova_off;
}
sg_dma_len(cur) = cur_len;
dma_addr += s_iova_len;
if (s_length + s_iova_off < s_iova_len)
cur_len = 0;
}
return count;
}
static void vcodec_invalidate_sg(struct scatterlist *sg, int nents)
{
struct scatterlist *s;
int i;
for_each_sg(sg, s, nents, i) {
if (sg_dma_address(s) != DMA_ERROR_CODE)
s->offset += sg_dma_address(s);
if (sg_dma_len(s))
s->length = sg_dma_len(s);
sg_dma_address(s) = DMA_ERROR_CODE;
sg_dma_len(s) = 0;
}
}
static void vcodec_dma_unmap_sg(struct iommu_domain *domain,
dma_addr_t dma_addr)
{
@ -196,6 +263,63 @@ static void vcodec_drm_clear_map(struct kref *ref)
mutex_unlock(&iommu_info->iommu_mutex);
}
static int
vcodec_drm_unmap_iommu_with_iova(struct vcodec_iommu_session_info *session_info,
int idx)
{
struct device *dev = session_info->dev;
struct vcodec_drm_buffer *drm_buffer;
struct vcodec_iommu_info *iommu_info = session_info->iommu_info;
struct vcodec_iommu_drm_info *drm_info = iommu_info->private;
struct iommu_domain *domain = drm_info->domain;
mutex_lock(&session_info->list_mutex);
drm_buffer = vcodec_drm_get_buffer_no_lock(session_info, idx);
mutex_unlock(&session_info->list_mutex);
if (!drm_buffer) {
dev_err(dev, "can not find %d buffer in list\n", idx);
return -EINVAL;
}
iommu_unmap(domain, drm_buffer->iova_out, PAGE_SIZE);
return 0;
}
static int
vcodec_drm_map_iommu_with_iova(struct vcodec_iommu_session_info *session_info,
int idx,
unsigned long iova,
unsigned long size)
{
struct device *dev = session_info->dev;
struct vcodec_drm_buffer *drm_buffer;
struct vcodec_iommu_info *iommu_info = session_info->iommu_info;
struct vcodec_iommu_drm_info *drm_info = iommu_info->private;
mutex_lock(&session_info->list_mutex);
drm_buffer = vcodec_drm_get_buffer_no_lock(session_info, idx);
mutex_unlock(&session_info->list_mutex);
if (!drm_buffer) {
dev_err(dev, "can not find %d buffer in list\n", idx);
return -EINVAL;
}
drm_buffer->iova_out = iova;
iommu_map_sg(drm_info->domain,
iova,
drm_buffer->copy_sgt->sgl,
drm_buffer->copy_sgt->nents,
IOMMU_READ | IOMMU_WRITE);
return vcodec_finalise_sg(drm_buffer->copy_sgt->sgl,
drm_buffer->copy_sgt->nents,
iova);
}
static void*
vcodec_drm_map_kernel(struct vcodec_iommu_session_info *session_info, int idx)
{
@ -339,71 +463,6 @@ static int vcodec_drm_attach(struct vcodec_iommu_info *iommu_info)
return ret;
}
static int vcodec_finalise_sg(struct scatterlist *sg,
int nents,
dma_addr_t dma_addr)
{
struct scatterlist *s, *cur = sg;
unsigned long seg_mask = DMA_BIT_MASK(32);
unsigned int cur_len = 0, max_len = DMA_BIT_MASK(32);
int i, count = 0;
for_each_sg(sg, s, nents, i) {
/* Restore this segment's original unaligned fields first */
unsigned int s_iova_off = sg_dma_address(s);
unsigned int s_length = sg_dma_len(s);
unsigned int s_iova_len = s->length;
s->offset += s_iova_off;
s->length = s_length;
sg_dma_address(s) = DMA_ERROR_CODE;
sg_dma_len(s) = 0;
/*
* Now fill in the real DMA data. If...
* - there is a valid output segment to append to
* - and this segment starts on an IOVA page boundary
* - but doesn't fall at a segment boundary
* - and wouldn't make the resulting output segment too long
*/
if (cur_len && !s_iova_off && (dma_addr & seg_mask) &&
(cur_len + s_length <= max_len)) {
/* ...then concatenate it with the previous one */
cur_len += s_length;
} else {
/* Otherwise start the next output segment */
if (i > 0)
cur = sg_next(cur);
cur_len = s_length;
count++;
sg_dma_address(cur) = dma_addr + s_iova_off;
}
sg_dma_len(cur) = cur_len;
dma_addr += s_iova_len;
if (s_length + s_iova_off < s_iova_len)
cur_len = 0;
}
return count;
}
static void vcodec_invalidate_sg(struct scatterlist *sg, int nents)
{
struct scatterlist *s;
int i;
for_each_sg(sg, s, nents, i) {
if (sg_dma_address(s) != DMA_ERROR_CODE)
s->offset += sg_dma_address(s);
if (sg_dma_len(s))
s->length = sg_dma_len(s);
sg_dma_address(s) = DMA_ERROR_CODE;
sg_dma_len(s) = 0;
}
}
static dma_addr_t vcodec_dma_map_sg(struct iommu_domain *domain,
struct scatterlist *sg,
int nents, int prot)
@ -911,6 +970,8 @@ static struct vcodec_iommu_ops drm_ops = {
.unmap_kernel = vcodec_drm_unmap_kernel,
.map_iommu = vcodec_drm_map_iommu,
.unmap_iommu = vcodec_drm_unmap_iommu,
.map_iommu_with_iova = vcodec_drm_map_iommu_with_iova,
.unmap_iommu_with_iova = vcodec_drm_unmap_iommu_with_iova,
.destroy = vcodec_drm_destroy,
.dump = vcdoec_drm_dump_info,
.attach = vcodec_drm_attach,

View File

@ -310,6 +310,8 @@ static struct vcodec_iommu_ops ion_ops = {
.unmap_kernel = vcodec_ion_unmap_kernel,
.map_iommu = vcodec_ion_map_iommu,
.unmap_iommu = vcodec_ion_unmap_iommu,
.map_iommu_with_iova = NULL,
.unmap_iommu_with_iova = NULL,
.dump = NULL,
.attach = vcodec_ion_attach,
.detach = vcodec_ion_detach,

View File

@ -135,6 +135,35 @@ int vcodec_iommu_free_fd(struct vcodec_iommu_info *iommu_info,
return iommu_info->ops->free_fd(session_info, fd);
}
int vcodec_iommu_map_iommu_with_iova(struct vcodec_iommu_info *iommu_info,
void *session,
int idx, unsigned long iova,
unsigned long size)
{
struct vcodec_iommu_session_info *session_info = NULL;
session_info = vcodec_iommu_get_session_info(iommu_info, session);
if (!iommu_info || !iommu_info->ops->map_iommu || !session_info)
return -EINVAL;
return iommu_info->ops->map_iommu_with_iova(session_info,
idx, iova, size);
}
int vcodec_iommu_unmap_iommu_with_iova(struct vcodec_iommu_info *iommu_info,
void *session, int idx)
{
struct vcodec_iommu_session_info *session_info = NULL;
session_info = vcodec_iommu_get_session_info(iommu_info, session);
if (!iommu_info || !iommu_info->ops->unmap_iommu || !session_info)
return -EINVAL;
return iommu_info->ops->unmap_iommu_with_iova(session_info, idx);
}
int vcodec_iommu_map_iommu(struct vcodec_iommu_info *iommu_info,
struct vpu_session *session,
int idx, dma_addr_t *iova,

View File

@ -61,6 +61,13 @@ struct vcodec_iommu_ops {
dma_addr_t *iova, unsigned long *size);
int (*unmap_iommu)(struct vcodec_iommu_session_info *session_info,
int idx);
int (*map_iommu_with_iova)(struct vcodec_iommu_session_info
*session_info,
int idx,
unsigned long iova, unsigned long size);
int (*unmap_iommu_with_iova)(struct vcodec_iommu_session_info
*session_info,
int idx);
int (*destroy)(struct vcodec_iommu_info *iommu_info);
void (*dump)(struct vcodec_iommu_session_info *session_info);
int (*attach)(struct vcodec_iommu_info *iommu_info);
@ -123,6 +130,14 @@ int vcodec_iommu_map_iommu(struct vcodec_iommu_info *iommu_info,
int vcodec_iommu_unmap_iommu(struct vcodec_iommu_info *iommu_info,
struct vpu_session *session,
int idx);
int vcodec_iommu_map_iommu_with_iova(struct vcodec_iommu_info *iommu_info,
void *session,
int idx,
unsigned long iova,
unsigned long size);
int vcodec_iommu_unmap_iommu_with_iova(struct vcodec_iommu_info *iommu_info,
void *session,
int idx);
void vcodec_iommu_dump(struct vcodec_iommu_info *iommu_info,
struct vpu_session *session);
void vcodec_iommu_clear(struct vcodec_iommu_info *iommu_info,

View File

@ -44,6 +44,7 @@
#include <linux/rockchip/pmu.h>
#include <linux/rockchip/grf.h>
#include <linux/dma-iommu.h>
#include <linux/dma-buf.h>
#include <linux/rockchip-iovmm.h>
#include <video/rk_vpu_service.h>
@ -90,6 +91,7 @@
#define SIZE_REG(reg) ((reg) * 4)
#define VCODEC_CLOCK_ENABLE 1
#define IOMMU_PAGE_SIZE 4096
#define EXTRA_INFO_MAGIC 0x4C4A46
static int debug;
@ -137,6 +139,11 @@ struct extra_info_for_iommu {
struct extra_info_elem elem[20];
};
struct vcodec_iommu_drm_info {
struct iommu_domain *domain;
bool attached;
};
static const struct vcodec_info vcodec_info_set[] = {
{
.hw_id = VPU_ID_8270,
@ -254,7 +261,8 @@ struct vpu_reg {
};
enum vpu_ctx_state {
MMU_ACTIVATED = BIT(0)
MMU_ACTIVATED = BIT(0),
MMU_PAGEFAULT = BIT(1)
};
struct vpu_subdev_data {
@ -286,6 +294,8 @@ struct vpu_subdev_data {
struct device *mmu_dev;
struct vcodec_iommu_info *iommu_info;
int pa_hdl;
unsigned long pa_iova;
struct work_struct set_work;
};
@ -2044,8 +2054,8 @@ static void vcodec_power_on_rk312x(struct vpu_service_info *pservice)
if (of_machine_is_compatible("rockchip,rk3126") ||
of_machine_is_compatible("rockchip,rk3128"))
regmap_write(pservice->grf, RK312X_GRF_SOC_CON1,
BIT_VCODEC_CLK_SEL |
(BIT_VCODEC_CLK_SEL << 16));
BIT_VCODEC_CLK_SEL |
(BIT_VCODEC_CLK_SEL << 16));
vcodec_power_on_default(pservice);
}
@ -2241,55 +2251,61 @@ static inline void platform_set_sysmmu(struct device *iommu,
}
#endif
int vcodec_sysmmu_fault_hdl(struct device *dev,
enum rk_iommu_inttype itype,
unsigned long pgtable_base,
unsigned long fault_addr, unsigned int status)
#define IOMMU_GET_BUS_ID(x) (((x) >> 6) & 0x1f)
static int
_vcodec_sys_fault_hdl(struct device *dev, unsigned long iova, int status)
{
struct platform_device *pdev;
struct vpu_service_info *pservice;
struct vpu_subdev_data *data;
struct vpu_session *session = NULL;
vpu_debug_enter();
if (dev == NULL) {
if (!dev) {
pr_err("invalid NULL dev\n");
return 0;
}
pdev = container_of(dev, struct platform_device, dev);
if (pdev == NULL) {
if (!pdev) {
pr_err("invalid NULL platform_device\n");
return 0;
}
data = platform_get_drvdata(pdev);
if (data == NULL) {
if (!data) {
pr_err("invalid NULL vpu_subdev_data\n");
return 0;
}
pservice = data->pservice;
if (pservice == NULL) {
if (!pservice) {
pr_err("invalid NULL vpu_service_info\n");
return 0;
}
session = list_first_entry(&pservice->session,
struct vpu_session, list_session);
if (pservice->reg_codec) {
struct vpu_reg *reg = pservice->reg_codec;
struct vcodec_mem_region *mem, *n;
struct vcodec_mem_region *mem = NULL, *n = NULL;
int i = 0;
unsigned long tmp_iova = mem->iova;
pr_err("vcodec, fault addr 0x%08lx\n", fault_addr);
if (!list_empty(&reg->mem_region_list))
dev_err(dev, "vcodec, fault addr 0x%08lx\n", iova);
if (!list_empty(&reg->mem_region_list)) {
list_for_each_entry_safe(mem, n, &reg->mem_region_list,
reg_lnk) {
dev_err(dev, "reg[%02u] mem region [%02d] %pad %lx\n",
mem->reg_idx, i, &mem->iova, mem->len);
dev_err(dev, "vcodec, reg[%02u] mem region [%02d] 0x%lx %lx\n",
mem->reg_idx, i, tmp_iova, mem->len);
i++;
}
else
} else {
dev_err(dev, "no memory region mapped\n");
}
if (reg->data) {
struct vpu_subdev_data *data = reg->data;
@ -2299,22 +2315,60 @@ int vcodec_sysmmu_fault_hdl(struct device *dev,
dev_err(dev, "current errror register set:\n");
for (i = 0; i < len; i++)
pr_err("reg[%02d] %08x\n",
i, readl_relaxed(base + i));
dev_err(dev, "reg[%02d] %08x\n",
i, readl_relaxed(base + i));
}
dev_alert(dev, "page fault occur, reset hw\n");
set_bit(MMU_PAGEFAULT, &data->state);
/* reg->reg[101] = 1; */
_vpu_reset(data);
/*
* defeat workaround, invalidate address generated when rk322x
* vdec pre-fetch colmv data.
*/
if (IOMMU_GET_BUS_ID(status) == 2 && data->pa_hdl) {
/* avoid another page fault occur after page fault */
if (data->pa_iova)
vcodec_iommu_unmap_iommu_with_iova(data->iommu_info,
session,
data->pa_hdl);
/* get the page align iova */
data->pa_iova = round_down(iova, IOMMU_PAGE_SIZE);
vcodec_iommu_map_iommu_with_iova(data->iommu_info,
session,
data->pa_hdl,
data->pa_iova,
IOMMU_PAGE_SIZE);
} else {
_vpu_reset(data);
}
}
return 0;
}
int vcodec_iommu_fault_hdl(struct iommu_domain *iommu,
struct device *iommu_dev,
unsigned long iova, int status, void *arg)
{
struct device *dev = (struct device *)arg;
return _vcodec_sys_fault_hdl(dev, iova, status);
}
int vcodec_iovmm_fault_hdl(struct device *dev,
enum rk_iommu_inttype itype,
unsigned long pgtable_base,
unsigned long fault_addr, unsigned int status)
{
return _vcodec_sys_fault_hdl(dev, fault_addr, status);
}
static int vcodec_spec_init_rk3328(struct vpu_service_info *pservice)
{
/* todo */
return 0;
}
@ -2428,9 +2482,9 @@ static void vcodec_set_hw_ops(struct vpu_service_info *pservice)
static int vcodec_subdev_probe(struct platform_device *pdev,
struct vpu_service_info *pservice)
{
uint8_t *regs = NULL;
int32_t ret = 0;
uint32_t ioaddr = 0;
u8 *regs = NULL;
s32 ret = 0;
u32 ioaddr = 0;
struct resource *res = NULL;
struct vpu_hw_info *hw_info = NULL;
struct device *dev = &pdev->dev;
@ -2438,6 +2492,9 @@ static int vcodec_subdev_probe(struct platform_device *pdev,
struct vpu_subdev_data *data = NULL;
struct platform_device *sub_dev = NULL;
struct device_node *sub_np = NULL;
struct vpu_session *session = list_first_entry(&pservice->session,
struct vpu_session,
list_session);
const char *name = np->name;
char mmu_dev_dts_name[40];
@ -2511,7 +2568,7 @@ static int vcodec_subdev_probe(struct platform_device *pdev,
platform_set_sysmmu(data->mmu_dev, dev);
rockchip_iovmm_set_fault_handler
(dev, vcodec_sysmmu_fault_hdl);
(dev, vcodec_iovmm_fault_hdl);
}
dev_info(dev, "vpu mmu dec %p\n", data->mmu_dev);
@ -2520,8 +2577,8 @@ static int vcodec_subdev_probe(struct platform_device *pdev,
if (of_machine_is_compatible("rockchip,rk3126") ||
of_machine_is_compatible("rockchip,rk3128"))
regmap_write(pservice->grf, RK312X_GRF_SOC_CON1,
BIT_VCODEC_CLK_SEL |
(BIT_VCODEC_CLK_SEL << 16));
BIT_VCODEC_CLK_SEL |
(BIT_VCODEC_CLK_SEL << 16));
clear_bit(MMU_ACTIVATED, &data->state);
vpu_service_power_on(data, pservice);
@ -2531,12 +2588,29 @@ static int vcodec_subdev_probe(struct platform_device *pdev,
pservice->alloc_type);
dev_info(dev, "allocator is %s\n", pservice->alloc_type == 1 ? "drm" :
(pservice->alloc_type == 2 ? "ion" : "null"));
if (pservice->alloc_type == 1) {
struct vcodec_iommu_drm_info *drm_info =
data->iommu_info->private;
iommu_set_fault_handler(drm_info->domain,
vcodec_iommu_fault_hdl, dev);
}
vcodec_enter_mode(data);
ret = vpu_service_check_hw(data);
if (ret < 0) {
dev_err(dev, "error: hw info check failed\n");
goto err;
}
data->pa_hdl = vcodec_iommu_alloc(data->iommu_info,
session, 4096, 4096);
if (data->pa_hdl < 0) {
dev_err(dev, "allocate page fault hdl failed\n");
return -1;
}
vcodec_iommu_map_iommu(data->iommu_info, session,
data->pa_hdl, NULL, NULL);
data->pa_iova = 0;
vcodec_exit_mode(data);
hw_info = data->hw_info;
@ -2642,9 +2716,14 @@ static int vcodec_subdev_probe(struct platform_device *pdev,
static void vcodec_subdev_remove(struct vpu_subdev_data *data)
{
struct vpu_service_info *pservice = data->pservice;
struct vpu_session *session = list_first_entry(&pservice->session,
struct vpu_session,
list_session);
vcodec_iommu_clear(data->iommu_info, session);
vcodec_iommu_info_destroy(data->iommu_info);
data->iommu_info = NULL;
kfree(session);
mutex_lock(&pservice->lock);
cancel_delayed_work_sync(&pservice->power_off_work);
@ -2759,6 +2838,7 @@ static const struct of_device_id vcodec_service_dt_ids[] = {
static void vcodec_init_drvdata(struct vpu_service_info *pservice)
{
const struct of_device_id *match = NULL;
pservice->dev_id = VCODEC_DEVICE_ID_VPU;
pservice->curr_mode = -1;
@ -2804,6 +2884,7 @@ static int vcodec_probe(struct platform_device *pdev)
struct device_node *np = pdev->dev.of_node;
struct vpu_service_info *pservice = NULL;
struct vcodec_device_info *driver_data;
struct vpu_session *session = NULL;
pservice = devm_kzalloc(dev, sizeof(struct vpu_service_info),
GFP_KERNEL);
@ -2865,6 +2946,29 @@ static int vcodec_probe(struct platform_device *pdev)
pm_runtime_enable(dev);
/*
* this session is global session, each dev
* only has one global session, and will be
* release when dev remove
*/
session = devm_kzalloc(dev, sizeof(*session), GFP_KERNEL);
if (!session) {
vpu_err("error: unable to allocate memory for vpu_session.");
return -ENOMEM;
}
session->type = VPU_TYPE_BUTT;
session->pid = current->pid;
INIT_LIST_HEAD(&session->waiting);
INIT_LIST_HEAD(&session->running);
INIT_LIST_HEAD(&session->done);
INIT_LIST_HEAD(&session->list_session);
init_waitqueue_head(&session->wait);
atomic_set(&session->task_running, 0);
mutex_lock(&pservice->lock);
list_add_tail(&session->list_session, &pservice->session);
mutex_unlock(&pservice->lock);
if (of_property_read_bool(np, "subcnt")) {
struct vpu_subdev_data *data = NULL;
@ -3137,6 +3241,10 @@ static irqreturn_t vdpu_isr(int irq, void *dev_id)
struct vpu_subdev_data *data = (struct vpu_subdev_data *)dev_id;
struct vpu_service_info *pservice = data->pservice;
struct vpu_device *dev = &data->dec_dev;
struct vpu_session *session = list_first_entry(&pservice->session,
struct vpu_session,
list_session);
if (atomic_read(&pservice->secure_mode))
return IRQ_HANDLED;
@ -3147,6 +3255,14 @@ static irqreturn_t vdpu_isr(int irq, void *dev_id)
if (pservice->reg_codec == NULL) {
vpu_err("error: dec isr with no task waiting\n");
} else {
if (test_bit(MMU_PAGEFAULT, &data->state) &&
data->pa_iova) {
vcodec_iommu_unmap_iommu_with_iova(data->iommu_info,
session,
data->pa_hdl);
data->pa_iova = 0;
clear_bit(MMU_PAGEFAULT, &data->state);
}
reg_from_run_to_done(data, pservice->reg_codec);
/* avoid vpu timeout and can't recover problem */
if (data->mode == VCODEC_RUNNING_MODE_VPU)