diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 8bc591deb546..fd50da4c7b18 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -1190,7 +1190,6 @@ struct amdgpu_device { bool apu_prefer_gtt; bool userq_halt_for_enforce_isolation; - struct work_struct userq_reset_work; struct amdgpu_uid *uid_info; struct amdgpu_uma_carveout_info uma_info; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c index d9e283f3b57d..9783a3cefb04 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c @@ -36,6 +36,9 @@ #include "amdgpu_ras.h" #include "amdgpu_umc.h" #include "amdgpu_reset.h" +#if IS_ENABLED(CONFIG_HSA_AMD) +#include "kfd_priv.h" +#endif /* Total memory size in system memory and all GPU VRAM. Used to * estimate worst case amount of memory to reserve for page tables @@ -320,6 +323,28 @@ void amdgpu_amdkfd_gpu_reset(struct amdgpu_device *adev) (void)amdgpu_reset_domain_schedule(adev->reset_domain, &adev->kfd.reset_work); } +void amdgpu_amdkfd_clear_kfd_mapping(struct amdgpu_device *adev) +{ +#if IS_ENABLED(CONFIG_HSA_AMD) + struct kfd_dev *kfd = adev->kfd.dev; + unsigned int i; + + if (!kfd) + return; + + for (i = 0; i < kfd->num_nodes; i++) { + struct kfd_node *node = kfd->nodes[i]; + + kfd_dev_unmap_mapping_range(KFD_MMAP_TYPE_DOORBELL | + KFD_MMAP_GPU_ID(node->id), + kfd_doorbell_process_slice(kfd)); + kfd_dev_unmap_mapping_range(KFD_MMAP_TYPE_MMIO | + KFD_MMAP_GPU_ID(node->id), + PAGE_SIZE); + } +#endif +} + int amdgpu_amdkfd_alloc_kernel_mem(struct amdgpu_device *adev, size_t size, u32 domain, void **mem_obj, uint64_t *gpu_addr, void **cpu_ptr, bool cp_mqd_gfx9) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h index cdbab7f8cee8..2b4108f83f48 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h @@ -358,6 +358,7 @@ int amdgpu_amdkfd_reserve_mem_limit(struct amdgpu_device *adev, uint64_t size, u32 alloc_flag, int8_t xcp_id); void amdgpu_amdkfd_unreserve_mem_limit(struct amdgpu_device *adev, uint64_t size, u32 alloc_flag, int8_t xcp_id); +void amdgpu_amdkfd_clear_kfd_mapping(struct amdgpu_device *adev); u64 amdgpu_amdkfd_xcp_memory_size(struct amdgpu_device *adev, int xcp_id); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 66ca043658ff..feab90e3efd1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -3787,7 +3787,6 @@ int amdgpu_device_init(struct amdgpu_device *adev, } INIT_WORK(&adev->xgmi_reset_work, amdgpu_device_xgmi_reset_func); - INIT_WORK(&adev->userq_reset_work, amdgpu_userq_reset_work); amdgpu_coredump_init(adev); @@ -5478,7 +5477,7 @@ static inline void amdgpu_device_stop_pending_resets(struct amdgpu_device *adev) if (!amdgpu_sriov_vf(adev)) cancel_work(&adev->reset_work); #endif - cancel_work(&adev->userq_reset_work); + amdgpu_userq_mgr_cancel_reset_work(adev); if (adev->kfd.dev) cancel_work(&adev->kfd.reset_work); @@ -5836,6 +5835,12 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev, /* We need to lock reset domain only once both for XGMI and single device */ amdgpu_device_recovery_get_reset_lock(adev, &device_list); + /* unmap all the mappings of doorbell and framebuffer to prevent user space from + * accessing them + */ + unmap_mapping_range(adev->ddev.anon_inode->i_mapping, 0, 0, 1); + amdgpu_amdkfd_clear_kfd_mapping(adev); + amdgpu_device_halt_activities(adev, job, reset_context, &device_list, hive, need_emergency_restart); if (need_emergency_restart) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c index 8d99bfaa498f..80efeca0ab73 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c @@ -304,7 +304,7 @@ static int amdgpu_discovery_get_tmr_info(struct amdgpu_device *adev, adev->virt.crit_regn_tbl[AMD_SRIOV_MSG_IPD_TABLE_ID].offset; adev->discovery.size = adev->virt.crit_regn_tbl[AMD_SRIOV_MSG_IPD_TABLE_ID].size_kb << 10; - if (!adev->discovery.offset || !adev->discovery.size) + if (!adev->discovery.size) return -EINVAL; } else { goto out; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c index 5376035d32fe..123d4a09114d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c @@ -31,6 +31,7 @@ #include #include #include +#include #include #include @@ -508,6 +509,9 @@ int amdgpu_gem_userptr_ioctl(struct drm_device *dev, void *data, if (offset_in_page(args->addr | args->size)) return -EINVAL; + if (!access_ok((void __user *)(uintptr_t)args->addr, args->size)) + return -EFAULT; + /* reject unknown flag values */ if (args->flags & ~(AMDGPU_GEM_USERPTR_READONLY | AMDGPU_GEM_USERPTR_ANONONLY | AMDGPU_GEM_USERPTR_VALIDATE | @@ -821,7 +825,7 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data, struct drm_syncobj *timeline_syncobj = NULL; struct dma_fence_chain *timeline_chain = NULL; struct drm_exec exec; - uint64_t vm_size; + uint64_t vm_size, tmp; int r = 0; /* Validate virtual address range against reserved regions. */ @@ -845,7 +849,7 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data, vm_size = adev->vm_manager.max_pfn * AMDGPU_GPU_PAGE_SIZE; vm_size -= AMDGPU_VA_RESERVED_TOP; - if (args->va_address + args->map_size > vm_size) { + if (check_add_overflow(args->va_address, args->map_size, &tmp) || tmp > vm_size) { dev_dbg(dev->dev, "va_address 0x%llx is in top reserved area 0x%llx\n", args->va_address + args->map_size, vm_size); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c index 620fddde4c4d..a5d26b943f6d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c @@ -199,11 +199,18 @@ int amdgpu_gtt_mgr_alloc_entries(struct amdgpu_gtt_mgr *mgr, enum drm_mm_insert_mode mode) { struct amdgpu_device *adev = container_of(mgr, typeof(*adev), mman.gtt_mgr); + u32 alignment = 0; int r; + /* Align to TLB L2 cache entry size to work around "V bit HW bug" */ + if (adev->asic_type == CHIP_TAHITI) { + alignment = 32 * 1024 / AMDGPU_GPU_PAGE_SIZE; + num_pages = ALIGN(num_pages, alignment); + } + spin_lock(&mgr->lock); r = drm_mm_insert_node_in_range(&mgr->mm, mm_node, num_pages, - 0, GART_ENTRY_WITHOUT_BO_COLOR, 0, + alignment, GART_ENTRY_WITHOUT_BO_COLOR, 0, adev->gmc.gart_size >> PAGE_SHIFT, mode); spin_unlock(&mgr->lock); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h index 912c9afaf9e1..4d68732d6223 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h @@ -96,7 +96,8 @@ struct amdgpu_bo_va { * if non-zero, cannot unmap from GPU because user queues may still access it */ unsigned int queue_refcount; - atomic_t userq_va_mapped; + /* Indicates if this buffer is mapped for any user queue. Once set, never reset. */ + bool userq_va_mapped; }; struct amdgpu_bo { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_seq64.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_seq64.c index a0b479d5fff1..f4be19223588 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_seq64.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_seq64.c @@ -175,11 +175,14 @@ int amdgpu_seq64_alloc(struct amdgpu_device *adev, u64 *va, { unsigned long bit_pos; - bit_pos = find_first_zero_bit(adev->seq64.used, adev->seq64.num_sem); - if (bit_pos >= adev->seq64.num_sem) - return -ENOSPC; + for (;;) { + bit_pos = find_first_zero_bit(adev->seq64.used, adev->seq64.num_sem); + if (bit_pos >= adev->seq64.num_sem) + return -ENOSPC; - __set_bit(bit_pos, adev->seq64.used); + if (!test_and_set_bit(bit_pos, adev->seq64.used)) + break; + } *va = bit_pos * sizeof(u64) + amdgpu_seq64_get_va_base(adev); @@ -205,7 +208,7 @@ void amdgpu_seq64_free(struct amdgpu_device *adev, u64 va) bit_pos = (va - amdgpu_seq64_get_va_base(adev)) / sizeof(u64); if (bit_pos < adev->seq64.num_sem) - __clear_bit(bit_pos, adev->seq64.used); + clear_bit(bit_pos, adev->seq64.used); } /** diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c index 0238c2798de4..b8ed931f8a40 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c @@ -130,6 +130,7 @@ void amdgpu_umc_handle_bad_pages(struct amdgpu_device *adev, if (adev->umc.ras && adev->umc.ras->ras_block.hw_ops && adev->umc.ras->ras_block.hw_ops->query_ras_error_address && adev->umc.max_ras_err_cnt_per_query) { + kfree(err_data->err_addr); err_data->err_addr = kzalloc_objs(struct eeprom_table_record, adev->umc.max_ras_err_cnt_per_query); @@ -160,6 +161,7 @@ void amdgpu_umc_handle_bad_pages(struct amdgpu_device *adev, if (adev->umc.ras && adev->umc.ras->ecc_info_query_ras_error_address && adev->umc.max_ras_err_cnt_per_query) { + kfree(err_data->err_addr); err_data->err_addr = kzalloc_objs(struct eeprom_table_record, adev->umc.max_ras_err_cnt_per_query); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c index 70d74f04d2dd..f070ea37d918 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c @@ -82,19 +82,11 @@ static bool amdgpu_userq_is_reset_type_supported(struct amdgpu_device *adev, return false; } -static void amdgpu_userq_gpu_reset(struct amdgpu_device *adev) -{ - if (amdgpu_device_should_recover_gpu(adev)) { - amdgpu_reset_domain_schedule(adev->reset_domain, - &adev->userq_reset_work); - /* Wait for the reset job to complete */ - flush_work(&adev->userq_reset_work); - } -} - -static int -amdgpu_userq_detect_and_reset_queues(struct amdgpu_userq_mgr *uq_mgr) +static void amdgpu_userq_mgr_reset_work(struct work_struct *work) { + struct amdgpu_userq_mgr *uq_mgr = + container_of(work, struct amdgpu_userq_mgr, + reset_work); struct amdgpu_device *adev = uq_mgr->adev; const int queue_types[] = { AMDGPU_RING_TYPE_COMPUTE, @@ -103,12 +95,11 @@ amdgpu_userq_detect_and_reset_queues(struct amdgpu_userq_mgr *uq_mgr) }; const int num_queue_types = ARRAY_SIZE(queue_types); bool gpu_reset = false; - int r = 0; - int i; + int i, r; if (unlikely(adev->debug_disable_gpu_ring_reset)) { dev_err(adev->dev, "userq reset disabled by debug mask\n"); - return 0; + return; } /* @@ -116,7 +107,7 @@ amdgpu_userq_detect_and_reset_queues(struct amdgpu_userq_mgr *uq_mgr) * skip all reset detection logic */ if (!amdgpu_gpu_recovery) - return 0; + return; /* * Iterate through all queue types to detect and reset problematic queues @@ -141,10 +132,19 @@ amdgpu_userq_detect_and_reset_queues(struct amdgpu_userq_mgr *uq_mgr) } } - if (gpu_reset) - amdgpu_userq_gpu_reset(adev); + if (gpu_reset) { + struct amdgpu_reset_context reset_context; - return r; + memset(&reset_context, 0, sizeof(reset_context)); + + reset_context.method = AMD_RESET_METHOD_NONE; + reset_context.reset_req_dev = adev; + reset_context.src = AMDGPU_RESET_SRC_USERQ; + set_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags); + /*set_bit(AMDGPU_SKIP_COREDUMP, &reset_context.flags);*/ + + amdgpu_device_gpu_recover(adev, NULL, &reset_context); + } } static void amdgpu_userq_hang_detect_work(struct work_struct *work) @@ -153,7 +153,11 @@ static void amdgpu_userq_hang_detect_work(struct work_struct *work) container_of(work, struct amdgpu_usermode_queue, hang_detect_work.work); - amdgpu_userq_detect_and_reset_queues(queue->userq_mgr); + /* + * Don't schedule the work here! Scheduling or queue work from one reset + * handler to another is illegal if you don't take extra precautions! + */ + amdgpu_userq_mgr_reset_work(&queue->userq_mgr->reset_work); } /* @@ -182,8 +186,8 @@ void amdgpu_userq_start_hang_detect_work(struct amdgpu_usermode_queue *queue) break; } - schedule_delayed_work(&queue->hang_detect_work, - msecs_to_jiffies(timeout_ms)); + queue_delayed_work(adev->reset_domain->wq, &queue->hang_detect_work, + msecs_to_jiffies(timeout_ms)); } void amdgpu_userq_process_fence_irq(struct amdgpu_device *adev, u32 doorbell) @@ -223,7 +227,7 @@ static int amdgpu_userq_buffer_va_list_add(struct amdgpu_usermode_queue *queue, INIT_LIST_HEAD(&va_cursor->list); va_cursor->gpu_addr = addr; - atomic_set(&va_map->bo_va->userq_va_mapped, 1); + va_map->bo_va->userq_va_mapped = true; list_add(&va_cursor->list, &queue->userq_va_list); return 0; @@ -270,7 +274,7 @@ static bool amdgpu_userq_buffer_va_mapped(struct amdgpu_vm *vm, u64 addr) dma_resv_assert_held(vm->root.bo->tbo.base.resv); mapping = amdgpu_vm_bo_lookup_mapping(vm, addr); - if (!IS_ERR_OR_NULL(mapping) && atomic_read(&mapping->bo_va->userq_va_mapped)) + if (!IS_ERR_OR_NULL(mapping) && mapping->bo_va->userq_va_mapped) r = true; else r = false; @@ -296,16 +300,8 @@ static bool amdgpu_userq_buffer_vas_mapped(struct amdgpu_usermode_queue *queue) return false; } -static void amdgpu_userq_buffer_va_list_del(struct amdgpu_bo_va_mapping *mapping, - struct amdgpu_userq_va_cursor *va_cursor) -{ - atomic_set(&mapping->bo_va->userq_va_mapped, 0); - list_del(&va_cursor->list); - kfree(va_cursor); -} - -static int amdgpu_userq_buffer_vas_list_cleanup(struct amdgpu_device *adev, - struct amdgpu_usermode_queue *queue) +static void amdgpu_userq_buffer_vas_list_cleanup(struct amdgpu_device *adev, + struct amdgpu_usermode_queue *queue) { struct amdgpu_userq_va_cursor *va_cursor, *tmp; struct amdgpu_bo_va_mapping *mapping; @@ -315,15 +311,12 @@ static int amdgpu_userq_buffer_vas_list_cleanup(struct amdgpu_device *adev, list_for_each_entry_safe(va_cursor, tmp, &queue->userq_va_list, list) { mapping = amdgpu_vm_bo_lookup_mapping(queue->vm, va_cursor->gpu_addr); - if (!mapping) { - return -EINVAL; - } - dev_dbg(adev->dev, "delete the userq:%p va:%llx\n", - queue, va_cursor->gpu_addr); - amdgpu_userq_buffer_va_list_del(mapping, va_cursor); + if (mapping) + dev_dbg(adev->dev, "delete the userq:%p va:%llx\n", + queue, va_cursor->gpu_addr); + list_del(&va_cursor->list); + kfree(va_cursor); } - - return 0; } static int amdgpu_userq_preempt_helper(struct amdgpu_usermode_queue *queue) @@ -504,16 +497,20 @@ int amdgpu_userq_create_object(struct amdgpu_userq_mgr *uq_mgr, goto free_obj; } + r = amdgpu_bo_pin(userq_obj->obj, AMDGPU_GEM_DOMAIN_GTT); + if (r) + goto unresv; + r = amdgpu_ttm_alloc_gart(&(userq_obj->obj)->tbo); if (r) { drm_file_err(uq_mgr->file, "Failed to alloc GART for userqueue object (%d)", r); - goto unresv; + goto unpin_bo; } r = amdgpu_bo_kmap(userq_obj->obj, &userq_obj->cpu_ptr); if (r) { drm_file_err(uq_mgr->file, "Failed to map BO for userqueue (%d)", r); - goto unresv; + goto unpin_bo; } userq_obj->gpu_addr = amdgpu_bo_gpu_offset(userq_obj->obj); @@ -521,11 +518,13 @@ int amdgpu_userq_create_object(struct amdgpu_userq_mgr *uq_mgr, memset(userq_obj->cpu_ptr, 0, size); return 0; +unpin_bo: + amdgpu_bo_unpin(userq_obj->obj); unresv: amdgpu_bo_unreserve(userq_obj->obj); - free_obj: amdgpu_bo_unref(&userq_obj->obj); + return r; } @@ -533,6 +532,7 @@ void amdgpu_userq_destroy_object(struct amdgpu_userq_mgr *uq_mgr, struct amdgpu_userq_obj *userq_obj) { amdgpu_bo_kunmap(userq_obj->obj); + amdgpu_bo_unpin(userq_obj->obj); amdgpu_bo_unref(&userq_obj->obj); } @@ -708,14 +708,14 @@ amdgpu_userq_create(struct drm_file *filp, union drm_amdgpu_userq *args) const struct amdgpu_userq_funcs *uq_funcs; struct amdgpu_usermode_queue *queue; struct amdgpu_db_info db_info; - bool skip_map_queue; - u32 qid; uint64_t index; - int r = 0; - int priority = - (args->in.flags & AMDGPU_USERQ_CREATE_FLAGS_QUEUE_PRIORITY_MASK) >> - AMDGPU_USERQ_CREATE_FLAGS_QUEUE_PRIORITY_SHIFT; + int priority; + u32 qid; + int r; + priority = + (args->in.flags & AMDGPU_USERQ_CREATE_FLAGS_QUEUE_PRIORITY_MASK) + >> AMDGPU_USERQ_CREATE_FLAGS_QUEUE_PRIORITY_SHIFT; r = amdgpu_userq_priority_permit(filp, priority); if (r) return r; @@ -728,40 +728,43 @@ amdgpu_userq_create(struct drm_file *filp, union drm_amdgpu_userq *args) uq_funcs = adev->userq_funcs[args->in.ip_type]; if (!uq_funcs) { - drm_file_err(uq_mgr->file, "Usermode queue is not supported for this IP (%u)\n", - args->in.ip_type); r = -EINVAL; goto err_pm_runtime; } queue = kzalloc_obj(struct amdgpu_usermode_queue); if (!queue) { - drm_file_err(uq_mgr->file, "Failed to allocate memory for queue\n"); r = -ENOMEM; goto err_pm_runtime; } + kref_init(&queue->refcount); INIT_LIST_HEAD(&queue->userq_va_list); queue->doorbell_handle = args->in.doorbell_handle; queue->queue_type = args->in.ip_type; queue->vm = &fpriv->vm; queue->priority = priority; - - db_info.queue_type = queue->queue_type; - db_info.doorbell_handle = queue->doorbell_handle; - db_info.db_obj = &queue->db_obj; - db_info.doorbell_offset = args->in.doorbell_offset; - queue->userq_mgr = uq_mgr; + INIT_DELAYED_WORK(&queue->hang_detect_work, + amdgpu_userq_hang_detect_work); - /* Validate the userq virtual address.*/ - r = amdgpu_bo_reserve(fpriv->vm.root.bo, false); + mutex_init(&queue->fence_drv_lock); + xa_init_flags(&queue->fence_drv_xa, XA_FLAGS_ALLOC); + r = amdgpu_userq_fence_driver_alloc(adev, &queue->fence_drv); if (r) goto free_queue; - if (amdgpu_userq_input_va_validate(adev, queue, args->in.queue_va, args->in.queue_size) || - amdgpu_userq_input_va_validate(adev, queue, args->in.rptr_va, AMDGPU_GPU_PAGE_SIZE) || - amdgpu_userq_input_va_validate(adev, queue, args->in.wptr_va, AMDGPU_GPU_PAGE_SIZE)) { + /* Make sure the queue can actually run with those virtual addresses. */ + r = amdgpu_bo_reserve(fpriv->vm.root.bo, false); + if (r) + goto free_fence_drv; + + if (amdgpu_userq_input_va_validate(adev, queue, args->in.queue_va, + args->in.queue_size) || + amdgpu_userq_input_va_validate(adev, queue, args->in.rptr_va, + AMDGPU_GPU_PAGE_SIZE) || + amdgpu_userq_input_va_validate(adev, queue, args->in.wptr_va, + AMDGPU_GPU_PAGE_SIZE)) { r = -EINVAL; amdgpu_bo_unreserve(fpriv->vm.root.bo); goto clean_mapping; @@ -769,6 +772,10 @@ amdgpu_userq_create(struct drm_file *filp, union drm_amdgpu_userq *args) amdgpu_bo_unreserve(fpriv->vm.root.bo); /* Convert relative doorbell offset into absolute doorbell index */ + db_info.queue_type = queue->queue_type; + db_info.doorbell_handle = queue->doorbell_handle; + db_info.db_obj = &queue->db_obj; + db_info.doorbell_offset = args->in.doorbell_offset; index = amdgpu_userq_get_doorbell_index(uq_mgr, &db_info, filp); if (index == (uint64_t)-EINVAL) { drm_file_err(uq_mgr->file, "Failed to get doorbell for queue\n"); @@ -777,82 +784,64 @@ amdgpu_userq_create(struct drm_file *filp, union drm_amdgpu_userq *args) } queue->doorbell_index = index; - mutex_init(&queue->fence_drv_lock); - xa_init_flags(&queue->fence_drv_xa, XA_FLAGS_ALLOC); - r = amdgpu_userq_fence_driver_alloc(adev, &queue->fence_drv); - if (r) { - drm_file_err(uq_mgr->file, "Failed to alloc fence driver\n"); - goto clean_mapping; - } - r = uq_funcs->mqd_create(queue, &args->in); if (r) { drm_file_err(uq_mgr->file, "Failed to create Queue\n"); - goto clean_fence_driver; + goto clean_mapping; } + /* Update VM owner at userq submit-time for page-fault attribution. */ + amdgpu_vm_set_task_info(&fpriv->vm); + + r = xa_err(xa_store_irq(&adev->userq_doorbell_xa, index, queue, + GFP_KERNEL)); + if (r) + goto clean_mqd; + amdgpu_userq_ensure_ev_fence(&fpriv->userq_mgr, &fpriv->evf_mgr); /* don't map the queue if scheduling is halted */ - if (adev->userq_halt_for_enforce_isolation && - ((queue->queue_type == AMDGPU_HW_IP_GFX) || - (queue->queue_type == AMDGPU_HW_IP_COMPUTE))) - skip_map_queue = true; - else - skip_map_queue = false; - if (!skip_map_queue) { + if (!adev->userq_halt_for_enforce_isolation || + ((queue->queue_type != AMDGPU_HW_IP_GFX) && + (queue->queue_type != AMDGPU_HW_IP_COMPUTE))) { r = amdgpu_userq_map_helper(queue); if (r) { drm_file_err(uq_mgr->file, "Failed to map Queue\n"); - goto clean_mqd; + mutex_unlock(&uq_mgr->userq_mutex); + goto clean_doorbell; } } - /* drop this refcount during queue destroy */ - kref_init(&queue->refcount); - - /* Wait for mode-1 reset to complete */ - down_read(&adev->reset_domain->sem); - - r = xa_alloc(&uq_mgr->userq_xa, &qid, queue, - XA_LIMIT(1, AMDGPU_MAX_USERQ_COUNT), GFP_KERNEL); - if (r) { - if (!skip_map_queue) - amdgpu_userq_unmap_helper(queue); - r = -ENOMEM; - goto clean_reset_domain; - } - - r = xa_err(xa_store_irq(&adev->userq_doorbell_xa, index, queue, GFP_KERNEL)); - if (r) { - xa_erase(&uq_mgr->userq_xa, qid); - if (!skip_map_queue) - amdgpu_userq_unmap_helper(queue); - goto clean_reset_domain; - } - up_read(&adev->reset_domain->sem); - - amdgpu_debugfs_userq_init(filp, queue, qid); - INIT_DELAYED_WORK(&queue->hang_detect_work, - amdgpu_userq_hang_detect_work); - - args->out.queue_id = qid; atomic_inc(&uq_mgr->userq_count[queue->queue_type]); mutex_unlock(&uq_mgr->userq_mutex); + + r = xa_alloc(&uq_mgr->userq_xa, &qid, queue, + XA_LIMIT(1, AMDGPU_MAX_USERQ_COUNT), + GFP_KERNEL); + if (r) { + /* + * This drops the last reference which should take care of + * all cleanup. + */ + amdgpu_userq_put(queue); + return r; + } + + amdgpu_debugfs_userq_init(filp, queue, qid); + args->out.queue_id = qid; return 0; -clean_reset_domain: - up_read(&adev->reset_domain->sem); +clean_doorbell: + xa_erase_irq(&adev->userq_doorbell_xa, index); clean_mqd: - mutex_unlock(&uq_mgr->userq_mutex); uq_funcs->mqd_destroy(queue); -clean_fence_driver: - amdgpu_userq_fence_driver_free(queue); clean_mapping: amdgpu_bo_reserve(fpriv->vm.root.bo, true); amdgpu_userq_buffer_vas_list_cleanup(adev, queue); amdgpu_bo_unreserve(fpriv->vm.root.bo); mutex_destroy(&queue->fence_drv_lock); +free_fence_drv: + amdgpu_userq_fence_driver_free(queue); free_queue: kfree(queue); err_pm_runtime: @@ -1252,28 +1241,13 @@ amdgpu_userq_evict_all(struct amdgpu_userq_mgr *uq_mgr) if (ret) { drm_file_err(uq_mgr->file, "Couldn't unmap all the queues, eviction failed ret=%d\n", ret); - amdgpu_userq_detect_and_reset_queues(uq_mgr); + amdgpu_reset_domain_schedule(uq_mgr->adev->reset_domain, + &uq_mgr->reset_work); + flush_work(&uq_mgr->reset_work); } return ret; } -void amdgpu_userq_reset_work(struct work_struct *work) -{ - struct amdgpu_device *adev = container_of(work, struct amdgpu_device, - userq_reset_work); - struct amdgpu_reset_context reset_context; - - memset(&reset_context, 0, sizeof(reset_context)); - - reset_context.method = AMD_RESET_METHOD_NONE; - reset_context.reset_req_dev = adev; - reset_context.src = AMDGPU_RESET_SRC_USERQ; - set_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags); - /*set_bit(AMDGPU_SKIP_COREDUMP, &reset_context.flags);*/ - - amdgpu_device_gpu_recover(adev, NULL, &reset_context); -} - static void amdgpu_userq_wait_for_signal(struct amdgpu_userq_mgr *uq_mgr) { @@ -1307,9 +1281,24 @@ int amdgpu_userq_mgr_init(struct amdgpu_userq_mgr *userq_mgr, struct drm_file *f userq_mgr->file = file_priv; INIT_DELAYED_WORK(&userq_mgr->resume_work, amdgpu_userq_restore_worker); + INIT_WORK(&userq_mgr->reset_work, amdgpu_userq_mgr_reset_work); return 0; } +void amdgpu_userq_mgr_cancel_reset_work(struct amdgpu_device *adev) +{ + struct xarray *xa = &adev->userq_doorbell_xa; + struct amdgpu_usermode_queue *queue; + unsigned long flags, queue_id; + + xa_lock_irqsave(xa, flags); + xa_for_each(xa, queue_id, queue) { + cancel_delayed_work(&queue->hang_detect_work); + cancel_work(&queue->userq_mgr->reset_work); + } + xa_unlock_irqrestore(xa, flags); +} + void amdgpu_userq_mgr_cancel_resume(struct amdgpu_userq_mgr *userq_mgr) { cancel_delayed_work_sync(&userq_mgr->resume_work); @@ -1335,6 +1324,14 @@ void amdgpu_userq_mgr_fini(struct amdgpu_userq_mgr *userq_mgr) } xa_destroy(&userq_mgr->userq_xa); + + /* + * Drain any in-flight reset_work. By this point all queues are freed + * and userq_count is 0, so if reset_work starts now it exits early. + * We still need to wait in case it was already executing gpu_recover. + */ + cancel_work_sync(&userq_mgr->reset_work); + mutex_destroy(&userq_mgr->userq_mutex); } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h index 85f460e7c31b..49b33e2d6932 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h @@ -84,7 +84,13 @@ struct amdgpu_usermode_queue { u32 xcp_id; int priority; struct dentry *debugfs_queue; - struct delayed_work hang_detect_work; + + /** + * @hang_detect_work: + * + * Delayed work which runs when userq_fences time out. + */ + struct delayed_work hang_detect_work; struct kref refcount; struct list_head userq_va_list; @@ -116,6 +122,13 @@ struct amdgpu_userq_mgr { struct amdgpu_device *adev; struct delayed_work resume_work; struct drm_file *file; + + /** + * @reset_work: + * + * Reset work which is used when eviction fails. + */ + struct work_struct reset_work; atomic_t userq_count[AMDGPU_RING_TYPE_MAX]; }; @@ -134,6 +147,7 @@ int amdgpu_userq_ioctl(struct drm_device *dev, void *data, struct drm_file *filp int amdgpu_userq_mgr_init(struct amdgpu_userq_mgr *userq_mgr, struct drm_file *file_priv, struct amdgpu_device *adev); +void amdgpu_userq_mgr_cancel_reset_work(struct amdgpu_device *adev); void amdgpu_userq_mgr_cancel_resume(struct amdgpu_userq_mgr *userq_mgr); void amdgpu_userq_mgr_fini(struct amdgpu_userq_mgr *userq_mgr); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c index 53a8944bab05..a41fb72dba94 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c @@ -370,51 +370,48 @@ static int amdgpu_userq_fence_read_wptr(struct amdgpu_device *adev, { struct amdgpu_bo_va_mapping *mapping; struct amdgpu_bo *bo; + struct drm_exec exec; u64 addr, *ptr; - int r; - - r = amdgpu_bo_reserve(queue->vm->root.bo, false); - if (r) - return r; + int ret; addr = queue->userq_prop->wptr_gpu_addr; addr &= AMDGPU_GMC_HOLE_MASK; - mapping = amdgpu_vm_bo_lookup_mapping(queue->vm, addr >> PAGE_SHIFT); - if (!mapping) { - amdgpu_bo_unreserve(queue->vm->root.bo); - DRM_ERROR("Failed to lookup amdgpu_bo_va_mapping\n"); - return -EINVAL; + drm_exec_init(&exec, DRM_EXEC_IGNORE_DUPLICATES, 2); + drm_exec_until_all_locked(&exec) { + ret = amdgpu_vm_lock_pd(queue->vm, &exec, 1); + drm_exec_retry_on_contention(&exec); + if (unlikely(ret)) + goto lock_error; + + mapping = amdgpu_vm_bo_lookup_mapping(queue->vm, addr >> PAGE_SHIFT); + if (!mapping) { + ret = -EINVAL; + goto lock_error; + } + + ret = drm_exec_lock_obj(&exec, &mapping->bo_va->base.bo->tbo.base); + drm_exec_retry_on_contention(&exec); + if (unlikely(ret)) + goto lock_error; } - bo = amdgpu_bo_ref(mapping->bo_va->base.bo); - amdgpu_bo_unreserve(queue->vm->root.bo); - r = amdgpu_bo_reserve(bo, true); - if (r) { - amdgpu_bo_unref(&bo); - DRM_ERROR("Failed to reserve userqueue wptr bo"); - return r; - } - - r = amdgpu_bo_kmap(bo, (void **)&ptr); - if (r) { + bo = mapping->bo_va->base.bo; + ret = amdgpu_bo_kmap(bo, (void **)&ptr); + if (ret) { DRM_ERROR("Failed mapping the userqueue wptr bo"); - goto map_error; + goto lock_error; } *wptr = le64_to_cpu(*ptr); amdgpu_bo_kunmap(bo); - amdgpu_bo_unreserve(bo); - amdgpu_bo_unref(&bo); - + drm_exec_fini(&exec); return 0; -map_error: - amdgpu_bo_unreserve(bo); - amdgpu_bo_unref(&bo); - - return r; +lock_error: + drm_exec_fini(&exec); + return ret; } static void diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 9ba9de16a27a..fccd758b6699 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -2002,7 +2002,7 @@ int amdgpu_vm_bo_unmap(struct amdgpu_device *adev, * during user requests GEM unmap IOCTL except for forcing the unmap * from user space. */ - if (unlikely(atomic_read(&bo_va->userq_va_mapped) > 0)) + if (unlikely(bo_va->userq_va_mapped)) amdgpu_userq_gem_va_unmap_validate(adev, mapping, saddr); list_del(&mapping->list); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.c index fd881388d612..f27f917e3cdb 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.c @@ -562,6 +562,11 @@ static void vpe_ring_emit_fence(struct amdgpu_ring *ring, uint64_t addr, amdgpu_ring_write(ring, 0); } + /* WA: Force sync after TRAP to avoid VPE1 fail to power off */ + if (ring->adev->vpe.collaborate_mode) { + amdgpu_ring_write(ring, VPE_CMD_HEADER(VPE_CMD_OPCODE_COLLAB_SYNC, 0)); + amdgpu_ring_write(ring, 0xabcd); + } } static void vpe_ring_emit_pipeline_sync(struct amdgpu_ring *ring) @@ -968,7 +973,7 @@ static const struct amdgpu_ring_funcs vpe_ring_funcs = { .emit_frame_size = 5 + /* vpe_ring_init_cond_exec */ 6 + /* vpe_ring_emit_pipeline_sync */ - 10 + 10 + 10 + /* vpe_ring_emit_fence */ + 12 + 12 + 12 + /* vpe_ring_emit_fence */ /* vpe_ring_emit_vm_flush */ SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 + SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 6, diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v1_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v1_0.c index 5b7b46d242c6..93253db5e2de 100644 --- a/drivers/gpu/drm/amd/amdgpu/vce_v1_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vce_v1_0.c @@ -42,9 +42,10 @@ #include "oss/oss_1_0_d.h" #include "oss/oss_1_0_sh_mask.h" +#define VCE_V1_0_ALIGNMENT (32 * 1024) #define VCE_V1_0_FW_SIZE (256 * 1024) #define VCE_V1_0_STACK_SIZE (64 * 1024) -#define VCE_V1_0_DATA_SIZE (7808 * (AMDGPU_MAX_VCE_HANDLES + 1)) +#define VCE_V1_0_DATA_SIZE (ALIGN(7808 * (AMDGPU_MAX_VCE_HANDLES + 1), VCE_V1_0_ALIGNMENT)) #define VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK 0x02 static void vce_v1_0_set_ring_funcs(struct amdgpu_device *adev); @@ -177,7 +178,7 @@ static void vce_v1_0_init_cg(struct amdgpu_device *adev) } /** - * vce_v1_0_load_fw_signature - load firmware signature into VCPU BO + * vce_v1_0_load_fw() - load firmware signature into VCPU BO * * @adev: amdgpu_device pointer * @@ -185,21 +186,26 @@ static void vce_v1_0_init_cg(struct amdgpu_device *adev) * This function finds the signature appropriate for the current * ASIC and writes that into the VCPU BO. */ -static int vce_v1_0_load_fw_signature(struct amdgpu_device *adev) +static int vce_v1_0_load_fw(struct amdgpu_device *adev) { const struct common_firmware_header *hdr; struct vce_v1_0_fw_signature *sign; - unsigned int ucode_offset; + u32 ucode_offset; + u32 ucode_size; uint32_t chip_id; u32 *cpu_addr; int i; hdr = (const struct common_firmware_header *)adev->vce.fw->data; ucode_offset = le32_to_cpu(hdr->ucode_array_offset_bytes); + ucode_size = hdr->ucode_size_bytes - sizeof(struct vce_v1_0_fw_signature *); cpu_addr = adev->vce.cpu_addr; sign = (void *)adev->vce.fw->data + ucode_offset; + if (ucode_size > VCE_V1_0_FW_SIZE - AMDGPU_VCE_FIRMWARE_OFFSET) + return -EINVAL; + switch (adev->asic_type) { case CHIP_TAHITI: chip_id = 0x01000014; @@ -226,12 +232,14 @@ static int vce_v1_0_load_fw_signature(struct amdgpu_device *adev) return -EINVAL; } + memset_io(&cpu_addr[0], 0, amdgpu_bo_size(adev->vce.vcpu_bo)); + cpu_addr += (256 - 64) / 4; memcpy_toio(&cpu_addr[0], &sign->val[i].nonce[0], 16); cpu_addr[4] = cpu_to_le32(le32_to_cpu(sign->length) + 64); memset_io(&cpu_addr[5], 0, 44); - memcpy_toio(&cpu_addr[16], &sign[1], hdr->ucode_size_bytes - sizeof(*sign)); + memcpy_toio(&cpu_addr[16], &sign[1], ucode_size); cpu_addr += (le32_to_cpu(sign->length) + 64) / 4; memcpy_toio(&cpu_addr[0], &sign->val[i].sigval[0], 16); @@ -312,18 +320,23 @@ static int vce_v1_0_mc_resume(struct amdgpu_device *adev) WREG32(mmVCE_VCPU_SCRATCH7, AMDGPU_MAX_VCE_HANDLES); offset = adev->vce.gpu_addr + AMDGPU_VCE_FIRMWARE_OFFSET; - size = VCE_V1_0_FW_SIZE; - WREG32(mmVCE_VCPU_CACHE_OFFSET0, offset & 0x7fffffff); + size = VCE_V1_0_FW_SIZE - AMDGPU_VCE_FIRMWARE_OFFSET; + WREG32(mmVCE_VCPU_CACHE_OFFSET0, offset); WREG32(mmVCE_VCPU_CACHE_SIZE0, size); offset += size; size = VCE_V1_0_STACK_SIZE; - WREG32(mmVCE_VCPU_CACHE_OFFSET1, offset & 0x7fffffff); + WARN_ON(!IS_ALIGNED(offset, VCE_V1_0_ALIGNMENT)); + WARN_ON(!IS_ALIGNED(size, VCE_V1_0_ALIGNMENT)); + WREG32(mmVCE_VCPU_CACHE_OFFSET1, offset); WREG32(mmVCE_VCPU_CACHE_SIZE1, size); offset += size; size = VCE_V1_0_DATA_SIZE; - WREG32(mmVCE_VCPU_CACHE_OFFSET2, offset & 0x7fffffff); + WARN_ON(!IS_ALIGNED(offset, VCE_V1_0_ALIGNMENT)); + WARN_ON(!IS_ALIGNED(size, VCE_V1_0_ALIGNMENT)); + WARN_ON((offset + size - adev->vce.gpu_addr) > amdgpu_bo_size(adev->vce.vcpu_bo)); + WREG32(mmVCE_VCPU_CACHE_OFFSET2, offset); WREG32(mmVCE_VCPU_CACHE_SIZE2, size); WREG32_P(mmVCE_LMI_CTRL2, 0x0, ~0x100); @@ -527,22 +540,31 @@ static int vce_v1_0_early_init(struct amdgpu_ip_block *ip_block) * To accomodate that, we put GART to the LOW address range * and reserve some GART pages where we map the VCPU BO, * so that it gets a 32-bit address. + * + * The BAR address is zero and we can't change it + * due to the firmware validation mechanism. + * It seems that it fails to initialize if the address is >= 128 MiB. */ static int vce_v1_0_ensure_vcpu_bo_32bit_addr(struct amdgpu_device *adev) { u64 bo_size = amdgpu_bo_size(adev->vce.vcpu_bo); - u64 max_vcpu_bo_addr = 0xffffffff - bo_size; + u64 max_vcpu_bo_addr = 0x07ffffff - bo_size; u64 num_pages = ALIGN(bo_size, AMDGPU_GPU_PAGE_SIZE) / AMDGPU_GPU_PAGE_SIZE; u64 pa = amdgpu_gmc_vram_pa(adev, adev->vce.vcpu_bo); u64 flags = AMDGPU_PTE_READABLE | AMDGPU_PTE_WRITEABLE | AMDGPU_PTE_VALID; u64 vce_gart_start_offs; int r; - r = amdgpu_gtt_mgr_alloc_entries(&adev->mman.gtt_mgr, - &adev->vce.gart_node, num_pages, - DRM_MM_INSERT_LOW); - if (r) - return r; + if (adev->gmc.vram_start < adev->gmc.gart_start) + return amdgpu_bo_gpu_offset(adev->vce.vcpu_bo) <= max_vcpu_bo_addr ? 0 : -EINVAL; + + if (!drm_mm_node_allocated(&adev->vce.gart_node)) { + r = amdgpu_gtt_mgr_alloc_entries(&adev->mman.gtt_mgr, + &adev->vce.gart_node, num_pages, + DRM_MM_INSERT_LOW); + if (r) + return r; + } vce_gart_start_offs = amdgpu_gtt_node_to_byte_offset(&adev->vce.gart_node); @@ -553,8 +575,6 @@ static int vce_v1_0_ensure_vcpu_bo_32bit_addr(struct amdgpu_device *adev) amdgpu_gart_map_vram_range(adev, pa, adev->vce.gart_node.start, num_pages, flags, adev->gart.ptr); adev->vce.gpu_addr = adev->gmc.gart_start + vce_gart_start_offs; - if (adev->vce.gpu_addr > max_vcpu_bo_addr) - return -EINVAL; return 0; } @@ -574,10 +594,7 @@ static int vce_v1_0_sw_init(struct amdgpu_ip_block *ip_block) if (r) return r; - r = amdgpu_vce_resume(adev); - if (r) - return r; - r = vce_v1_0_load_fw_signature(adev); + r = vce_v1_0_load_fw(adev); if (r) return r; r = vce_v1_0_ensure_vcpu_bo_32bit_addr(adev); @@ -696,10 +713,7 @@ static int vce_v1_0_resume(struct amdgpu_ip_block *ip_block) struct amdgpu_device *adev = ip_block->adev; int r; - r = amdgpu_vce_resume(adev); - if (r) - return r; - r = vce_v1_0_load_fw_signature(adev); + r = vce_v1_0_load_fw(adev); if (r) return r; r = vce_v1_0_ensure_vcpu_bo_32bit_addr(adev); diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c index db149eda6204..3a6fc8604108 100644 --- a/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c @@ -37,9 +37,14 @@ #include "oss/oss_2_0_d.h" #include "oss/oss_2_0_sh_mask.h" + +/* Use 24K to be safe. The FW supposedly only requires 23744 bytes. */ +#define VCE_V2_0_DATA_ENTRY_SIZE (24 * 1024) + #define VCE_V2_0_FW_SIZE (256 * 1024) #define VCE_V2_0_STACK_SIZE (64 * 1024) -#define VCE_V2_0_DATA_SIZE (23552 * AMDGPU_MAX_VCE_HANDLES) +#define VCE_V2_0_DATA_SIZE (VCE_V2_0_DATA_ENTRY_SIZE * (AMDGPU_MAX_VCE_HANDLES + 1)) + #define VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK 0x02 static void vce_v2_0_set_ring_funcs(struct amdgpu_device *adev); @@ -183,7 +188,7 @@ static void vce_v2_0_mc_resume(struct amdgpu_device *adev) WREG32(mmVCE_LMI_VCPU_CACHE_40BIT_BAR, (adev->vce.gpu_addr >> 8)); offset = AMDGPU_VCE_FIRMWARE_OFFSET; - size = VCE_V2_0_FW_SIZE; + size = VCE_V2_0_FW_SIZE - AMDGPU_VCE_FIRMWARE_OFFSET; WREG32(mmVCE_VCPU_CACHE_OFFSET0, offset & 0x7fffffff); WREG32(mmVCE_VCPU_CACHE_SIZE0, size); diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c index 03d79e464f04..c69f7d82060f 100644 --- a/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c @@ -574,7 +574,7 @@ static void vce_v3_0_mc_resume(struct amdgpu_device *adev, int idx) } else WREG32(mmVCE_LMI_VCPU_CACHE_40BIT_BAR, (adev->vce.gpu_addr >> 8)); offset = AMDGPU_VCE_FIRMWARE_OFFSET; - size = VCE_V3_0_FW_SIZE; + size = VCE_V3_0_FW_SIZE - AMDGPU_VCE_FIRMWARE_OFFSET; WREG32(mmVCE_VCPU_CACHE_OFFSET0, offset & 0x7fffffff); WREG32(mmVCE_VCPU_CACHE_SIZE0, size); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c index f95bf6d95534..03b266b26738 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c @@ -67,6 +67,21 @@ static const struct class kfd_class = { .name = kfd_dev_name, }; +/* + * Cache the address space of the chardev on first open so that the reset + * path can drop all userspace mappings of doorbell and MMIO ranges via + * unmap_mapping_range(). + */ +static struct address_space *kfd_dev_mapping; + +void kfd_dev_unmap_mapping_range(loff_t const holebegin, loff_t const holelen) +{ + struct address_space *mapping = READ_ONCE(kfd_dev_mapping); + + if (mapping) + unmap_mapping_range(mapping, holebegin, holelen, 1); +} + static inline struct kfd_process_device *kfd_lock_pdd_by_id(struct kfd_process *p, __u32 gpu_id) { struct kfd_process_device *pdd; @@ -133,6 +148,13 @@ static int kfd_open(struct inode *inode, struct file *filep) if (iminor(inode) != 0) return -ENODEV; + /* + * /dev/kfd is a single chardev so all opens share one inode. Cache + * its address_space on the first open for use by the reset path. + */ + if (!READ_ONCE(kfd_dev_mapping)) + cmpxchg(&kfd_dev_mapping, NULL, inode->i_mapping); + is_32bit_user_mode = in_compat_syscall(); if (is_32bit_user_mode) { diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c index 9185ebe4c079..e0a31e11f0ff 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c @@ -475,6 +475,9 @@ static int allocate_doorbell(struct qcm_process_device *qpd, } else { /* For CP queues on SOC15 */ if (restore_id) { + if (*restore_id >= KFD_MAX_NUM_OF_QUEUES_PER_PROCESS) + return -EINVAL; + /* make sure that ID is free */ if (__test_and_set_bit(*restore_id, qpd->doorbell_bitmap)) return -EINVAL; @@ -1587,6 +1590,9 @@ static int allocate_sdma_queue(struct device_queue_manager *dqm, } if (restore_sdma_id) { + if (*restore_sdma_id >= get_num_sdma_queues(dqm)) + return -EINVAL; + /* Re-use existing sdma_id */ if (!test_bit(*restore_sdma_id, dqm->sdma_bitmap)) { dev_err(dev, "SDMA queue already in use\n"); @@ -1613,6 +1619,9 @@ static int allocate_sdma_queue(struct device_queue_manager *dqm, return -ENOMEM; } if (restore_sdma_id) { + if (*restore_sdma_id >= get_num_xgmi_sdma_queues(dqm)) + return -EINVAL; + /* Re-use existing sdma_id */ if (!test_bit(*restore_sdma_id, dqm->xgmi_sdma_bitmap)) { dev_err(dev, "SDMA queue already in use\n"); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c index e8f97de9d6e4..f6d9d81003dc 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c @@ -364,11 +364,15 @@ static int get_wave_state(struct mqd_manager *mm, void *mqd, { struct v9_mqd *m; struct kfd_context_save_area_header header; + u32 cntl_stack_size; + u32 cntl_stack_offset; /* Control stack is located one page after MQD. */ void *mqd_ctl_stack = (void *)((uintptr_t)mqd + AMDGPU_GPU_PAGE_SIZE); m = get_mqd(mqd); + cntl_stack_size = min_t(u32, m->cp_hqd_cntl_stack_size, q->ctl_stack_size); + cntl_stack_offset = min_t(u32, m->cp_hqd_cntl_stack_offset, cntl_stack_size); *ctl_stack_used_size = m->cp_hqd_cntl_stack_size - m->cp_hqd_cntl_stack_offset; @@ -384,9 +388,10 @@ static int get_wave_state(struct mqd_manager *mm, void *mqd, if (copy_to_user(ctl_stack, &header, sizeof(header.wave_state))) return -EFAULT; - if (copy_to_user(ctl_stack + m->cp_hqd_cntl_stack_offset, - mqd_ctl_stack + m->cp_hqd_cntl_stack_offset, - *ctl_stack_used_size)) + *ctl_stack_used_size = cntl_stack_size - cntl_stack_offset; + + if (copy_to_user(ctl_stack + cntl_stack_offset, mqd_ctl_stack + cntl_stack_offset, + *ctl_stack_used_size)) return -EFAULT; return 0; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h index 7b5b12206919..d5b07789eda4 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h @@ -395,6 +395,7 @@ enum kfd_mempool { /* Character device interface */ int kfd_chardev_init(void); void kfd_chardev_exit(void); +void kfd_dev_unmap_mapping_range(loff_t const holebegin, loff_t const holelen); /** * enum kfd_unmap_queues_filter - Enum for queue filters. diff --git a/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c b/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c index a1c08e1cc411..c51c4b2c6fae 100644 --- a/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c +++ b/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c @@ -493,6 +493,10 @@ static enum bp_result get_gpio_i2c_info( - sizeof(struct atom_common_table_header)) / sizeof(struct atom_gpio_pin_assignment); + if (!bios_get_image(&bp->base, DATA_TABLES(gpio_pin_lut), + le16_to_cpu(header->table_header.structuresize))) + return BP_RESULT_BADBIOSTABLE; + pin = (struct atom_gpio_pin_assignment *) header->gpio_pin; for (table_index = 0; table_index < count; table_index++) { @@ -681,6 +685,11 @@ static enum bp_result bios_parser_get_gpio_pin_info( count = (le16_to_cpu(header->table_header.structuresize) - sizeof(struct atom_common_table_header)) / sizeof(struct atom_gpio_pin_assignment); + + if (!bios_get_image(&bp->base, DATA_TABLES(gpio_pin_lut), + le16_to_cpu(header->table_header.structuresize))) + return BP_RESULT_BADBIOSTABLE; + for (i = 0; i < count; ++i) { if (header->gpio_pin[i].gpio_id != gpio_id) continue; diff --git a/drivers/gpu/drm/amd/display/dc/bios/bios_parser_helper.c b/drivers/gpu/drm/amd/display/dc/bios/bios_parser_helper.c index 8d2cf95ae739..e00dc05c2d9d 100644 --- a/drivers/gpu/drm/amd/display/dc/bios/bios_parser_helper.c +++ b/drivers/gpu/drm/amd/display/dc/bios/bios_parser_helper.c @@ -37,10 +37,13 @@ uint8_t *bios_get_image(struct dc_bios *bp, uint32_t offset, uint32_t size) { - if (bp->bios && offset + size < bp->bios_size) - return bp->bios + offset; - else + if (!bp->bios) return NULL; + + if (offset > bp->bios_size || size > bp->bios_size - offset) + return NULL; + + return bp->bios + offset; } #include "reg_helper.h" diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c index 419f894c87b0..b3530fbf32f7 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc.c @@ -6071,7 +6071,11 @@ bool dc_process_dmub_aux_transfer_async(struct dc *dc, uint8_t action; union dmub_rb_cmd cmd = {0}; - ASSERT(payload->length <= 16); + if (link_index >= dc->link_count || !dc->links[link_index]) + return false; + + if (payload->length > sizeof(cmd.dp_aux_access.aux_control.dpaux.data)) + return false; cmd.dp_aux_access.header.type = DMUB_CMD__DP_AUX_ACCESS; cmd.dp_aux_access.header.payload_bytes = 0; diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu15/smu_v15_0.c b/drivers/gpu/drm/amd/pm/swsmu/smu15/smu_v15_0.c index c3cb36813806..940b43105817 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu15/smu_v15_0.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu15/smu_v15_0.c @@ -435,10 +435,12 @@ int smu_v15_0_fini_smc_tables(struct smu_context *smu) smu_table->watermarks_table = NULL; smu_table->metrics_time = 0; + kfree(smu_dpm->dpm_policies); kfree(smu_dpm->dpm_context); kfree(smu_dpm->golden_dpm_context); kfree(smu_dpm->dpm_current_power_state); kfree(smu_dpm->dpm_request_power_state); + smu_dpm->dpm_policies = NULL; smu_dpm->dpm_context = NULL; smu_dpm->golden_dpm_context = NULL; smu_dpm->dpm_context_size = 0; diff --git a/drivers/gpu/drm/radeon/evergreen_cs.c b/drivers/gpu/drm/radeon/evergreen_cs.c index 3142ef4da7f4..9196f85db9ce 100644 --- a/drivers/gpu/drm/radeon/evergreen_cs.c +++ b/drivers/gpu/drm/radeon/evergreen_cs.c @@ -312,8 +312,10 @@ static int evergreen_surface_check(struct radeon_cs_parser *p, case ARRAY_2D_TILED_THIN1: return evergreen_surface_check_2d(p, surf, prefix); default: - dev_warn(p->dev, "%s:%d %s invalid array mode %d\n", - __func__, __LINE__, prefix, surf->mode); + if (prefix) { + dev_warn(p->dev, "%s:%d %s invalid array mode %d\n", + __func__, __LINE__, prefix, surf->mode); + } return -EINVAL; } return -EINVAL;