diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c index 66e8a2f7afcf..d6bee5c30073 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c @@ -552,8 +552,9 @@ static ssize_t amdgpu_debugfs_ring_read(struct file *f, char __user *buf, size_t size, loff_t *pos) { struct amdgpu_ring *ring = file_inode(f)->i_private; - uint32_t value, result, early[3]; + u32 value, result, early[3] = { 0 }; uint64_t p; + u32 avail_dw, start_dw, read_dw; loff_t i; int r; @@ -565,10 +566,10 @@ static ssize_t amdgpu_debugfs_ring_read(struct file *f, char __user *buf, result = 0; - if (*pos < 12) { - if (ring->funcs->type == AMDGPU_RING_TYPE_CPER) - mutex_lock(&ring->adev->cper.ring_lock); + if (ring->funcs->type == AMDGPU_RING_TYPE_CPER) + mutex_lock(&ring->adev->cper.ring_lock); + if (*pos < 12) { early[0] = amdgpu_ring_get_rptr(ring) & ring->buf_mask; early[1] = amdgpu_ring_get_wptr(ring) & ring->buf_mask; early[2] = ring->wptr & ring->buf_mask; @@ -600,13 +601,24 @@ static ssize_t amdgpu_debugfs_ring_read(struct file *f, char __user *buf, *pos += 4; } } else { + early[0] = amdgpu_ring_get_rptr(ring) & ring->buf_mask; + early[1] = amdgpu_ring_get_wptr(ring) & ring->buf_mask; + p = early[0]; if (early[0] <= early[1]) - size = (early[1] - early[0]); + avail_dw = early[1] - early[0]; else - size = ring->ring_size - (early[0] - early[1]); + avail_dw = ring->buf_mask + 1 - (early[0] - early[1]); - while (size) { + start_dw = (*pos > 12) ? ((*pos - 12) >> 2) : 0; + if (start_dw >= avail_dw) + goto out; + + p = (p + start_dw) & ring->ptr_mask; + avail_dw -= start_dw; + read_dw = min_t(u32, avail_dw, size >> 2); + + while (read_dw) { if (p == early[1]) goto out; @@ -619,9 +631,10 @@ static ssize_t amdgpu_debugfs_ring_read(struct file *f, char __user *buf, buf += 4; result += 4; - size--; + read_dw--; p++; p &= ring->ptr_mask; + *pos += 4; } } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c index de140a8ed135..70d74f04d2dd 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c @@ -106,9 +106,6 @@ amdgpu_userq_detect_and_reset_queues(struct amdgpu_userq_mgr *uq_mgr) int r = 0; int i; - /* Warning if current process mutex is not held */ - WARN_ON(!mutex_is_locked(&uq_mgr->userq_mutex)); - if (unlikely(adev->debug_disable_gpu_ring_reset)) { dev_err(adev->dev, "userq reset disabled by debug mask\n"); return 0; @@ -127,9 +124,11 @@ amdgpu_userq_detect_and_reset_queues(struct amdgpu_userq_mgr *uq_mgr) */ for (i = 0; i < num_queue_types; i++) { int ring_type = queue_types[i]; - const struct amdgpu_userq_funcs *funcs = adev->userq_funcs[ring_type]; + const struct amdgpu_userq_funcs *funcs = + adev->userq_funcs[ring_type]; - if (!amdgpu_userq_is_reset_type_supported(adev, ring_type, AMDGPU_RESET_TYPE_PER_QUEUE)) + if (!amdgpu_userq_is_reset_type_supported(adev, ring_type, + AMDGPU_RESET_TYPE_PER_QUEUE)) continue; if (atomic_read(&uq_mgr->userq_count[ring_type]) > 0 && @@ -150,38 +149,22 @@ amdgpu_userq_detect_and_reset_queues(struct amdgpu_userq_mgr *uq_mgr) static void amdgpu_userq_hang_detect_work(struct work_struct *work) { - struct amdgpu_usermode_queue *queue = container_of(work, - struct amdgpu_usermode_queue, - hang_detect_work.work); - struct dma_fence *fence; - struct amdgpu_userq_mgr *uq_mgr; + struct amdgpu_usermode_queue *queue = + container_of(work, struct amdgpu_usermode_queue, + hang_detect_work.work); - if (!queue->userq_mgr) - return; - - uq_mgr = queue->userq_mgr; - fence = READ_ONCE(queue->hang_detect_fence); - /* Fence already signaled – no action needed */ - if (!fence || dma_fence_is_signaled(fence)) - return; - - mutex_lock(&uq_mgr->userq_mutex); - amdgpu_userq_detect_and_reset_queues(uq_mgr); - mutex_unlock(&uq_mgr->userq_mutex); + amdgpu_userq_detect_and_reset_queues(queue->userq_mgr); } /* * Start hang detection for a user queue fence. A delayed work will be scheduled - * to check if the fence is still pending after the timeout period. -*/ + * to reset the queues when the fence doesn't signal in time. + */ void amdgpu_userq_start_hang_detect_work(struct amdgpu_usermode_queue *queue) { struct amdgpu_device *adev; unsigned long timeout_ms; - if (!queue || !queue->userq_mgr || !queue->userq_mgr->adev) - return; - adev = queue->userq_mgr->adev; /* Determine timeout based on queue type */ switch (queue->queue_type) { @@ -199,8 +182,6 @@ void amdgpu_userq_start_hang_detect_work(struct amdgpu_usermode_queue *queue) break; } - /* Store the fence to monitor and schedule hang detection */ - WRITE_ONCE(queue->hang_detect_fence, queue->last_fence); schedule_delayed_work(&queue->hang_detect_work, msecs_to_jiffies(timeout_ms)); } @@ -210,18 +191,24 @@ void amdgpu_userq_process_fence_irq(struct amdgpu_device *adev, u32 doorbell) struct xarray *xa = &adev->userq_doorbell_xa; struct amdgpu_usermode_queue *queue; unsigned long flags; + int r; xa_lock_irqsave(xa, flags); queue = xa_load(xa, doorbell); - if (queue) - amdgpu_userq_fence_driver_process(queue->fence_drv); - xa_unlock_irqrestore(xa, flags); -} + if (queue) { + r = amdgpu_userq_fence_driver_process(queue->fence_drv); + /* + * We are in interrupt context here, this *can't* wait for + * reset work to finish. + */ + if (r >= 0) + cancel_delayed_work(&queue->hang_detect_work); -static void amdgpu_userq_init_hang_detect_work(struct amdgpu_usermode_queue *queue) -{ - INIT_DELAYED_WORK(&queue->hang_detect_work, amdgpu_userq_hang_detect_work); - queue->hang_detect_fence = NULL; + /* Restart the timer when there are still fences pending */ + if (r == 1) + amdgpu_userq_start_hang_detect_work(queue); + } + xa_unlock_irqrestore(xa, flags); } static int amdgpu_userq_buffer_va_list_add(struct amdgpu_usermode_queue *queue, @@ -345,23 +332,18 @@ static int amdgpu_userq_preempt_helper(struct amdgpu_usermode_queue *queue) struct amdgpu_device *adev = uq_mgr->adev; const struct amdgpu_userq_funcs *userq_funcs = adev->userq_funcs[queue->queue_type]; - bool found_hung_queue = false; - int r = 0; + int r; if (queue->state == AMDGPU_USERQ_STATE_MAPPED) { r = userq_funcs->preempt(queue); if (r) { queue->state = AMDGPU_USERQ_STATE_HUNG; - found_hung_queue = true; + return r; } else { queue->state = AMDGPU_USERQ_STATE_PREEMPTED; } } - - if (found_hung_queue) - amdgpu_userq_detect_and_reset_queues(uq_mgr); - - return r; + return 0; } static int amdgpu_userq_restore_helper(struct amdgpu_usermode_queue *queue) @@ -390,24 +372,21 @@ static int amdgpu_userq_unmap_helper(struct amdgpu_usermode_queue *queue) struct amdgpu_device *adev = uq_mgr->adev; const struct amdgpu_userq_funcs *userq_funcs = adev->userq_funcs[queue->queue_type]; - bool found_hung_queue = false; - int r = 0; + int r; if ((queue->state == AMDGPU_USERQ_STATE_MAPPED) || - (queue->state == AMDGPU_USERQ_STATE_PREEMPTED)) { + (queue->state == AMDGPU_USERQ_STATE_PREEMPTED)) { + r = userq_funcs->unmap(queue); if (r) { queue->state = AMDGPU_USERQ_STATE_HUNG; - found_hung_queue = true; + return r; } else { queue->state = AMDGPU_USERQ_STATE_UNMAPPED; } } - if (found_hung_queue) - amdgpu_userq_detect_and_reset_queues(uq_mgr); - - return r; + return 0; } static int amdgpu_userq_map_helper(struct amdgpu_usermode_queue *queue) @@ -416,19 +395,19 @@ static int amdgpu_userq_map_helper(struct amdgpu_usermode_queue *queue) struct amdgpu_device *adev = uq_mgr->adev; const struct amdgpu_userq_funcs *userq_funcs = adev->userq_funcs[queue->queue_type]; - int r = 0; + int r; if (queue->state == AMDGPU_USERQ_STATE_UNMAPPED) { r = userq_funcs->map(queue); if (r) { queue->state = AMDGPU_USERQ_STATE_HUNG; - amdgpu_userq_detect_and_reset_queues(uq_mgr); + return r; } else { queue->state = AMDGPU_USERQ_STATE_MAPPED; } } - return r; + return 0; } static void amdgpu_userq_wait_for_last_fence(struct amdgpu_usermode_queue *queue) @@ -648,13 +627,11 @@ amdgpu_userq_destroy(struct amdgpu_userq_mgr *uq_mgr, struct amdgpu_usermode_que amdgpu_bo_unreserve(vm->root.bo); mutex_lock(&uq_mgr->userq_mutex); - queue->hang_detect_fence = NULL; amdgpu_userq_wait_for_last_fence(queue); #if defined(CONFIG_DEBUG_FS) debugfs_remove_recursive(queue->debugfs_queue); #endif - amdgpu_userq_detect_and_reset_queues(uq_mgr); r = amdgpu_userq_unmap_helper(queue); atomic_dec(&uq_mgr->userq_count[queue->queue_type]); amdgpu_userq_cleanup(queue); @@ -800,6 +777,7 @@ amdgpu_userq_create(struct drm_file *filp, union drm_amdgpu_userq *args) } queue->doorbell_index = index; + mutex_init(&queue->fence_drv_lock); xa_init_flags(&queue->fence_drv_xa, XA_FLAGS_ALLOC); r = amdgpu_userq_fence_driver_alloc(adev, &queue->fence_drv); if (r) { @@ -855,7 +833,8 @@ amdgpu_userq_create(struct drm_file *filp, union drm_amdgpu_userq *args) up_read(&adev->reset_domain->sem); amdgpu_debugfs_userq_init(filp, queue, qid); - amdgpu_userq_init_hang_detect_work(queue); + INIT_DELAYED_WORK(&queue->hang_detect_work, + amdgpu_userq_hang_detect_work); args->out.queue_id = qid; atomic_inc(&uq_mgr->userq_count[queue->queue_type]); @@ -873,6 +852,7 @@ amdgpu_userq_create(struct drm_file *filp, union drm_amdgpu_userq *args) amdgpu_bo_reserve(fpriv->vm.root.bo, true); amdgpu_userq_buffer_vas_list_cleanup(adev, queue); amdgpu_bo_unreserve(fpriv->vm.root.bo); + mutex_destroy(&queue->fence_drv_lock); free_queue: kfree(queue); err_pm_runtime: @@ -1262,7 +1242,6 @@ amdgpu_userq_evict_all(struct amdgpu_userq_mgr *uq_mgr) unsigned long queue_id; int ret = 0, r; - amdgpu_userq_detect_and_reset_queues(uq_mgr); /* Try to unmap all the queues in this process ctx */ xa_for_each(&uq_mgr->userq_xa, queue_id, queue) { r = amdgpu_userq_preempt_helper(queue); @@ -1270,9 +1249,11 @@ amdgpu_userq_evict_all(struct amdgpu_userq_mgr *uq_mgr) ret = r; } - if (ret) + if (ret) { drm_file_err(uq_mgr->file, "Couldn't unmap all the queues, eviction failed ret=%d\n", ret); + amdgpu_userq_detect_and_reset_queues(uq_mgr); + } return ret; } @@ -1372,7 +1353,6 @@ int amdgpu_userq_suspend(struct amdgpu_device *adev) uqm = queue->userq_mgr; cancel_delayed_work_sync(&uqm->resume_work); guard(mutex)(&uqm->userq_mutex); - amdgpu_userq_detect_and_reset_queues(uqm); if (adev->in_s0ix) r = amdgpu_userq_preempt_helper(queue); else @@ -1431,7 +1411,6 @@ int amdgpu_userq_stop_sched_for_enforce_isolation(struct amdgpu_device *adev, if (((queue->queue_type == AMDGPU_HW_IP_GFX) || (queue->queue_type == AMDGPU_HW_IP_COMPUTE)) && (queue->xcp_id == idx)) { - amdgpu_userq_detect_and_reset_queues(uqm); r = amdgpu_userq_preempt_helper(queue); if (r) ret = r; @@ -1504,23 +1483,21 @@ void amdgpu_userq_pre_reset(struct amdgpu_device *adev) { const struct amdgpu_userq_funcs *userq_funcs; struct amdgpu_usermode_queue *queue; - struct amdgpu_userq_mgr *uqm; unsigned long queue_id; + /* TODO: We probably need a new lock for the queue state */ xa_for_each(&adev->userq_doorbell_xa, queue_id, queue) { - uqm = queue->userq_mgr; - cancel_delayed_work_sync(&uqm->resume_work); - if (queue->state == AMDGPU_USERQ_STATE_MAPPED) { - amdgpu_userq_wait_for_last_fence(queue); - userq_funcs = adev->userq_funcs[queue->queue_type]; - userq_funcs->unmap(queue); - /* just mark all queues as hung at this point. - * if unmap succeeds, we could map again - * in amdgpu_userq_post_reset() if vram is not lost - */ - queue->state = AMDGPU_USERQ_STATE_HUNG; - amdgpu_userq_fence_driver_force_completion(queue); - } + if (queue->state != AMDGPU_USERQ_STATE_MAPPED) + continue; + + userq_funcs = adev->userq_funcs[queue->queue_type]; + userq_funcs->unmap(queue); + /* just mark all queues as hung at this point. + * if unmap succeeds, we could map again + * in amdgpu_userq_post_reset() if vram is not lost + */ + queue->state = AMDGPU_USERQ_STATE_HUNG; + amdgpu_userq_fence_driver_force_completion(queue); } } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h index 8b8f345b60b6..85f460e7c31b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h @@ -66,6 +66,18 @@ struct amdgpu_usermode_queue { struct amdgpu_userq_obj db_obj; struct amdgpu_userq_obj fw_obj; struct amdgpu_userq_obj wptr_obj; + + /** + * @fence_drv_lock: Protecting @fence_drv_xa. + */ + struct mutex fence_drv_lock; + + /** + * @fence_drv_xa: + * + * References to the external fence drivers returned by wait_ioctl. + * Dropped on the next signaled dma_fence or queue destruction. + */ struct xarray fence_drv_xa; struct amdgpu_userq_fence_driver *fence_drv; struct dma_fence *last_fence; @@ -73,7 +85,6 @@ struct amdgpu_usermode_queue { int priority; struct dentry *debugfs_queue; struct delayed_work hang_detect_work; - struct dma_fence *hang_detect_fence; struct kref refcount; struct list_head userq_va_list; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c index e2d5f04296e1..53a8944bab05 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c @@ -121,6 +121,7 @@ amdgpu_userq_fence_driver_free(struct amdgpu_usermode_queue *userq) userq->last_fence = NULL; amdgpu_userq_walk_and_drop_fence_drv(&userq->fence_drv_xa); xa_destroy(&userq->fence_drv_xa); + mutex_destroy(&userq->fence_drv_lock); /* Drop the queue's ownership reference to fence_drv explicitly */ amdgpu_userq_fence_driver_put(userq->fence_drv); } @@ -134,7 +135,14 @@ amdgpu_userq_fence_put_fence_drv_array(struct amdgpu_userq_fence *userq_fence) userq_fence->fence_drv_array_count = 0; } -void amdgpu_userq_fence_driver_process(struct amdgpu_userq_fence_driver *fence_drv) +/* + * Returns: + * -ENOENT when no fences were processes + * 1 when more fences are pending + * 0 when no fences are pending any more + */ +int +amdgpu_userq_fence_driver_process(struct amdgpu_userq_fence_driver *fence_drv) { struct amdgpu_userq_fence *userq_fence, *tmp; LIST_HEAD(to_be_signaled); @@ -142,9 +150,6 @@ void amdgpu_userq_fence_driver_process(struct amdgpu_userq_fence_driver *fence_d unsigned long flags; u64 rptr; - if (!fence_drv) - return; - spin_lock_irqsave(&fence_drv->fence_list_lock, flags); rptr = amdgpu_userq_fence_read(fence_drv); @@ -157,6 +162,9 @@ void amdgpu_userq_fence_driver_process(struct amdgpu_userq_fence_driver *fence_d &userq_fence->link); spin_unlock_irqrestore(&fence_drv->fence_list_lock, flags); + if (list_empty(&to_be_signaled)) + return -ENOENT; + list_for_each_entry_safe(userq_fence, tmp, &to_be_signaled, link) { fence = &userq_fence->base; list_del_init(&userq_fence->link); @@ -168,6 +176,8 @@ void amdgpu_userq_fence_driver_process(struct amdgpu_userq_fence_driver *fence_d dma_fence_put(fence); } + /* That doesn't need to be accurate so no locking */ + return list_empty(&fence_drv->fences) ? 0 : 1; } void amdgpu_userq_fence_driver_destroy(struct kref *ref) @@ -209,80 +219,84 @@ void amdgpu_userq_fence_driver_put(struct amdgpu_userq_fence_driver *fence_drv) kref_put(&fence_drv->refcount, amdgpu_userq_fence_driver_destroy); } -static int amdgpu_userq_fence_alloc(struct amdgpu_userq_fence **userq_fence) +static int amdgpu_userq_fence_alloc(struct amdgpu_usermode_queue *userq, + struct amdgpu_userq_fence **pfence) { - *userq_fence = kmalloc(sizeof(**userq_fence), GFP_KERNEL); - return *userq_fence ? 0 : -ENOMEM; + struct amdgpu_userq_fence_driver *fence_drv = userq->fence_drv; + struct amdgpu_userq_fence *userq_fence; + void *entry; + + userq_fence = kmalloc(sizeof(*userq_fence), GFP_KERNEL); + if (!userq_fence) + return -ENOMEM; + + /* + * Get the next unused entry, since we fill from the start this can be + * used as size to allocate the array. + */ + mutex_lock(&userq->fence_drv_lock); + XA_STATE(xas, &userq->fence_drv_xa, 0); + + rcu_read_lock(); + do { + entry = xas_find_marked(&xas, ULONG_MAX, XA_FREE_MARK); + } while (xas_retry(&xas, entry)); + rcu_read_unlock(); + + userq_fence->fence_drv_array = kvmalloc_array(xas.xa_index, + sizeof(fence_drv), + GFP_KERNEL); + if (!userq_fence->fence_drv_array) { + mutex_unlock(&userq->fence_drv_lock); + kfree(userq_fence); + return -ENOMEM; + } + + userq_fence->fence_drv_array_count = xas.xa_index; + xa_extract(&userq->fence_drv_xa, (void **)userq_fence->fence_drv_array, + 0, ULONG_MAX, xas.xa_index, XA_PRESENT); + xa_destroy(&userq->fence_drv_xa); + + mutex_unlock(&userq->fence_drv_lock); + + amdgpu_userq_fence_driver_get(fence_drv); + userq_fence->fence_drv = fence_drv; + + *pfence = userq_fence; + return 0; } -static int amdgpu_userq_fence_create(struct amdgpu_usermode_queue *userq, - struct amdgpu_userq_fence *userq_fence, - u64 seq, struct dma_fence **f) +static void amdgpu_userq_fence_init(struct amdgpu_usermode_queue *userq, + struct amdgpu_userq_fence *fence, + u64 seq) { - struct amdgpu_userq_fence_driver *fence_drv; - struct dma_fence *fence; + struct amdgpu_userq_fence_driver *fence_drv = userq->fence_drv; unsigned long flags; bool signaled = false; - fence_drv = userq->fence_drv; - if (!fence_drv) - return -EINVAL; - - spin_lock_init(&userq_fence->lock); - INIT_LIST_HEAD(&userq_fence->link); - fence = &userq_fence->base; - userq_fence->fence_drv = fence_drv; - - dma_fence_init64(fence, &amdgpu_userq_fence_ops, &userq_fence->lock, + spin_lock_init(&fence->lock); + dma_fence_init64(&fence->base, &amdgpu_userq_fence_ops, &fence->lock, fence_drv->context, seq); - amdgpu_userq_fence_driver_get(fence_drv); - dma_fence_get(fence); + /* Make sure the fence is visible to the hang detect worker */ + dma_fence_put(userq->last_fence); + userq->last_fence = dma_fence_get(&fence->base); - if (!xa_empty(&userq->fence_drv_xa)) { - struct amdgpu_userq_fence_driver *stored_fence_drv; - unsigned long index, count = 0; - int i = 0; - - xa_lock(&userq->fence_drv_xa); - xa_for_each(&userq->fence_drv_xa, index, stored_fence_drv) - count++; - - userq_fence->fence_drv_array = - kvmalloc_objs(struct amdgpu_userq_fence_driver *, count, - GFP_ATOMIC); - - if (userq_fence->fence_drv_array) { - xa_for_each(&userq->fence_drv_xa, index, stored_fence_drv) { - userq_fence->fence_drv_array[i] = stored_fence_drv; - __xa_erase(&userq->fence_drv_xa, index); - i++; - } - } - - userq_fence->fence_drv_array_count = i; - xa_unlock(&userq->fence_drv_xa); - } else { - userq_fence->fence_drv_array = NULL; - userq_fence->fence_drv_array_count = 0; - } - - /* Check if hardware has already processed the job */ + /* Check if hardware has already processed the fence */ spin_lock_irqsave(&fence_drv->fence_list_lock, flags); - if (!dma_fence_is_signaled(fence)) { - list_add_tail(&userq_fence->link, &fence_drv->fences); + if (!dma_fence_is_signaled(&fence->base)) { + dma_fence_get(&fence->base); + list_add_tail(&fence->link, &fence_drv->fences); } else { + INIT_LIST_HEAD(&fence->link); signaled = true; - dma_fence_put(fence); } spin_unlock_irqrestore(&fence_drv->fence_list_lock, flags); if (signaled) - amdgpu_userq_fence_put_fence_drv_array(userq_fence); - - *f = fence; - - return 0; + amdgpu_userq_fence_put_fence_drv_array(fence); + else + amdgpu_userq_start_hang_detect_work(userq); } static const char *amdgpu_userq_fence_get_driver_name(struct dma_fence *f) @@ -403,11 +417,6 @@ static int amdgpu_userq_fence_read_wptr(struct amdgpu_device *adev, return r; } -static void amdgpu_userq_fence_cleanup(struct dma_fence *fence) -{ - dma_fence_put(fence); -} - static void amdgpu_userq_fence_driver_set_error(struct amdgpu_userq_fence *fence, int error) @@ -451,13 +460,14 @@ int amdgpu_userq_signal_ioctl(struct drm_device *dev, void *data, const unsigned int num_read_bo_handles = args->num_bo_read_handles; struct amdgpu_fpriv *fpriv = filp->driver_priv; struct amdgpu_userq_mgr *userq_mgr = &fpriv->userq_mgr; + struct drm_gem_object **gobj_write, **gobj_read; u32 *syncobj_handles, num_syncobj_handles; - struct amdgpu_userq_fence *userq_fence; - struct amdgpu_usermode_queue *queue = NULL; - struct drm_syncobj **syncobj = NULL; - struct dma_fence *fence; + struct amdgpu_usermode_queue *queue; + struct amdgpu_userq_fence *fence; + struct drm_syncobj **syncobj; struct drm_exec exec; + void __user *ptr; int r, i, entry; u64 wptr; @@ -469,13 +479,14 @@ int amdgpu_userq_signal_ioctl(struct drm_device *dev, void *data, return -EINVAL; num_syncobj_handles = args->num_syncobj_handles; - syncobj_handles = memdup_array_user(u64_to_user_ptr(args->syncobj_handles), - num_syncobj_handles, sizeof(u32)); + ptr = u64_to_user_ptr(args->syncobj_handles); + syncobj_handles = memdup_array_user(ptr, num_syncobj_handles, + sizeof(u32)); if (IS_ERR(syncobj_handles)) return PTR_ERR(syncobj_handles); - /* Array of pointers to the looked up syncobjs */ - syncobj = kmalloc_array(num_syncobj_handles, sizeof(*syncobj), GFP_KERNEL); + syncobj = kmalloc_array(num_syncobj_handles, sizeof(*syncobj), + GFP_KERNEL); if (!syncobj) { r = -ENOMEM; goto free_syncobj_handles; @@ -489,21 +500,17 @@ int amdgpu_userq_signal_ioctl(struct drm_device *dev, void *data, } } - r = drm_gem_objects_lookup(filp, - u64_to_user_ptr(args->bo_read_handles), - num_read_bo_handles, - &gobj_read); + ptr = u64_to_user_ptr(args->bo_read_handles); + r = drm_gem_objects_lookup(filp, ptr, num_read_bo_handles, &gobj_read); if (r) goto free_syncobj; - r = drm_gem_objects_lookup(filp, - u64_to_user_ptr(args->bo_write_handles), - num_write_bo_handles, + ptr = u64_to_user_ptr(args->bo_write_handles); + r = drm_gem_objects_lookup(filp, ptr, num_write_bo_handles, &gobj_write); if (r) goto put_gobj_read; - /* Retrieve the user queue */ queue = amdgpu_userq_get(userq_mgr, args->queue_id); if (!queue) { r = -ENOENT; @@ -512,73 +519,61 @@ int amdgpu_userq_signal_ioctl(struct drm_device *dev, void *data, r = amdgpu_userq_fence_read_wptr(adev, queue, &wptr); if (r) - goto put_gobj_write; + goto put_queue; - r = amdgpu_userq_fence_alloc(&userq_fence); + r = amdgpu_userq_fence_alloc(queue, &fence); if (r) - goto put_gobj_write; + goto put_queue; /* We are here means UQ is active, make sure the eviction fence is valid */ amdgpu_userq_ensure_ev_fence(&fpriv->userq_mgr, &fpriv->evf_mgr); - /* Create a new fence */ - r = amdgpu_userq_fence_create(queue, userq_fence, wptr, &fence); - if (r) { - mutex_unlock(&userq_mgr->userq_mutex); - kfree(userq_fence); - goto put_gobj_write; - } + /* Create the new fence */ + amdgpu_userq_fence_init(queue, fence, wptr); - dma_fence_put(queue->last_fence); - queue->last_fence = dma_fence_get(fence); - amdgpu_userq_start_hang_detect_work(queue); mutex_unlock(&userq_mgr->userq_mutex); + /* + * This needs to come after the fence is created since + * amdgpu_userq_ensure_ev_fence() can't be called while holding the resv + * locks. + */ drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT, (num_read_bo_handles + num_write_bo_handles)); - /* Lock all BOs with retry handling */ drm_exec_until_all_locked(&exec) { - r = drm_exec_prepare_array(&exec, gobj_read, num_read_bo_handles, 1); + r = drm_exec_prepare_array(&exec, gobj_read, + num_read_bo_handles, 1); drm_exec_retry_on_contention(&exec); - if (r) { - amdgpu_userq_fence_cleanup(fence); + if (r) goto exec_fini; - } - r = drm_exec_prepare_array(&exec, gobj_write, num_write_bo_handles, 1); + r = drm_exec_prepare_array(&exec, gobj_write, + num_write_bo_handles, 1); drm_exec_retry_on_contention(&exec); - if (r) { - amdgpu_userq_fence_cleanup(fence); + if (r) goto exec_fini; - } } - for (i = 0; i < num_read_bo_handles; i++) { - if (!gobj_read || !gobj_read[i]->resv) - continue; - - dma_resv_add_fence(gobj_read[i]->resv, fence, + /* And publish the new fence in the BOs and syncobj */ + for (i = 0; i < num_read_bo_handles; i++) + dma_resv_add_fence(gobj_read[i]->resv, &fence->base, DMA_RESV_USAGE_READ); - } - for (i = 0; i < num_write_bo_handles; i++) { - if (!gobj_write || !gobj_write[i]->resv) - continue; - - dma_resv_add_fence(gobj_write[i]->resv, fence, + for (i = 0; i < num_write_bo_handles; i++) + dma_resv_add_fence(gobj_write[i]->resv, &fence->base, DMA_RESV_USAGE_WRITE); - } - /* Add the created fence to syncobj/BO's */ for (i = 0; i < num_syncobj_handles; i++) - drm_syncobj_replace_fence(syncobj[i], fence); - - /* drop the reference acquired in fence creation function */ - dma_fence_put(fence); + drm_syncobj_replace_fence(syncobj[i], &fence->base); exec_fini: + /* drop the reference acquired in fence creation function */ + dma_fence_put(&fence->base); + drm_exec_fini(&exec); +put_queue: + amdgpu_userq_put(queue); put_gobj_write: for (i = 0; i < num_write_bo_handles; i++) drm_gem_object_put(gobj_write[i]); @@ -589,15 +584,11 @@ int amdgpu_userq_signal_ioctl(struct drm_device *dev, void *data, kvfree(gobj_read); free_syncobj: while (entry-- > 0) - if (syncobj[entry]) - drm_syncobj_put(syncobj[entry]); + drm_syncobj_put(syncobj[entry]); kfree(syncobj); free_syncobj_handles: kfree(syncobj_handles); - if (queue) - amdgpu_userq_put(queue); - return r; } @@ -872,8 +863,10 @@ amdgpu_userq_wait_return_fence_info(struct drm_file *filp, * Otherwise, we would gather those references until we don't * have any more space left and crash. */ + mutex_lock(&waitq->fence_drv_lock); r = xa_alloc(&waitq->fence_drv_xa, &index, fence_drv, xa_limit_32b, GFP_KERNEL); + mutex_unlock(&waitq->fence_drv_lock); if (r) goto put_waitq; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.h index d355a0eecc07..0bd51616cef1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.h @@ -63,7 +63,7 @@ void amdgpu_userq_fence_driver_put(struct amdgpu_userq_fence_driver *fence_drv); int amdgpu_userq_fence_driver_alloc(struct amdgpu_device *adev, struct amdgpu_userq_fence_driver **fence_drv_req); void amdgpu_userq_fence_driver_free(struct amdgpu_usermode_queue *userq); -void amdgpu_userq_fence_driver_process(struct amdgpu_userq_fence_driver *fence_drv); +int amdgpu_userq_fence_driver_process(struct amdgpu_userq_fence_driver *fence_drv); void amdgpu_userq_fence_driver_force_completion(struct amdgpu_usermode_queue *userq); void amdgpu_userq_fence_driver_destroy(struct kref *ref); int amdgpu_userq_signal_ioctl(struct drm_device *dev, void *data, diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c index 0e0b1e5b88fc..c35372e21261 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c @@ -602,6 +602,13 @@ static int gfx_v12_0_init_microcode(struct amdgpu_device *adev) "amdgpu/%s_pfp.bin", ucode_prefix); if (err) goto out; + + adev->gfx.rs64_enable = amdgpu_ucode_hdr_version( + (union amdgpu_firmware_header *) + adev->gfx.pfp_fw->data, 2, 0); + if (adev->gfx.rs64_enable) + dev_dbg(adev->dev, "CP RS64 enable\n"); + amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_PFP); amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_PFP_P0_STACK); diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.c index 82f81b586986..3751f7a94a05 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.c @@ -92,9 +92,14 @@ #include "dml/dcn32/dcn32_fpu.h" #include "dc_state_priv.h" +#include "dc_fpu.h" #include "dml2_0/dml2_wrapper.h" +#if !defined(DC_RUN_WITH_PREEMPTION_ENABLED) +#define DC_RUN_WITH_PREEMPTION_ENABLED(code) code +#endif + #define DC_LOGGER_INIT(logger) enum dcn32_clk_src_array_id { @@ -1684,7 +1689,8 @@ static void dcn32_enable_phantom_plane(struct dc *dc, if (curr_pipe->top_pipe && curr_pipe->top_pipe->plane_state == curr_pipe->plane_state) phantom_plane = prev_phantom_plane; else - phantom_plane = dc_state_create_phantom_plane(dc, context, curr_pipe->plane_state); + DC_RUN_WITH_PREEMPTION_ENABLED(phantom_plane = + dc_state_create_phantom_plane(dc, context, curr_pipe->plane_state)); if (!phantom_plane) continue;