mirror of
https://github.com/torvalds/linux.git
synced 2026-05-24 23:22:31 +02:00
drm/amdgpu: remove almost all calls to amdgpu_userq_detect_and_reset_queues
Well the reset handling seems broken on multiple levels. As first step of fixing this remove most calls to the hang detection. That function should only be called after we run into a timeout! And *NOT* as random check spread over the code in multiple places. Signed-off-by: Christian König <christian.koenig@amd.com> Reviewed-by: Sunil Khatri <sunil.khatri@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com> (cherry picked from commit 71bea36b54ccfb14cbc90f94267af6369af4e702)
This commit is contained in:
parent
44e5bc73bd
commit
d0053441ad
|
|
@ -345,23 +345,18 @@ static int amdgpu_userq_preempt_helper(struct amdgpu_usermode_queue *queue)
|
|||
struct amdgpu_device *adev = uq_mgr->adev;
|
||||
const struct amdgpu_userq_funcs *userq_funcs =
|
||||
adev->userq_funcs[queue->queue_type];
|
||||
bool found_hung_queue = false;
|
||||
int r = 0;
|
||||
int r;
|
||||
|
||||
if (queue->state == AMDGPU_USERQ_STATE_MAPPED) {
|
||||
r = userq_funcs->preempt(queue);
|
||||
if (r) {
|
||||
queue->state = AMDGPU_USERQ_STATE_HUNG;
|
||||
found_hung_queue = true;
|
||||
return r;
|
||||
} else {
|
||||
queue->state = AMDGPU_USERQ_STATE_PREEMPTED;
|
||||
}
|
||||
}
|
||||
|
||||
if (found_hung_queue)
|
||||
amdgpu_userq_detect_and_reset_queues(uq_mgr);
|
||||
|
||||
return r;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int amdgpu_userq_restore_helper(struct amdgpu_usermode_queue *queue)
|
||||
|
|
@ -390,24 +385,21 @@ static int amdgpu_userq_unmap_helper(struct amdgpu_usermode_queue *queue)
|
|||
struct amdgpu_device *adev = uq_mgr->adev;
|
||||
const struct amdgpu_userq_funcs *userq_funcs =
|
||||
adev->userq_funcs[queue->queue_type];
|
||||
bool found_hung_queue = false;
|
||||
int r = 0;
|
||||
int r;
|
||||
|
||||
if ((queue->state == AMDGPU_USERQ_STATE_MAPPED) ||
|
||||
(queue->state == AMDGPU_USERQ_STATE_PREEMPTED)) {
|
||||
(queue->state == AMDGPU_USERQ_STATE_PREEMPTED)) {
|
||||
|
||||
r = userq_funcs->unmap(queue);
|
||||
if (r) {
|
||||
queue->state = AMDGPU_USERQ_STATE_HUNG;
|
||||
found_hung_queue = true;
|
||||
return r;
|
||||
} else {
|
||||
queue->state = AMDGPU_USERQ_STATE_UNMAPPED;
|
||||
}
|
||||
}
|
||||
|
||||
if (found_hung_queue)
|
||||
amdgpu_userq_detect_and_reset_queues(uq_mgr);
|
||||
|
||||
return r;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int amdgpu_userq_map_helper(struct amdgpu_usermode_queue *queue)
|
||||
|
|
@ -416,19 +408,19 @@ static int amdgpu_userq_map_helper(struct amdgpu_usermode_queue *queue)
|
|||
struct amdgpu_device *adev = uq_mgr->adev;
|
||||
const struct amdgpu_userq_funcs *userq_funcs =
|
||||
adev->userq_funcs[queue->queue_type];
|
||||
int r = 0;
|
||||
int r;
|
||||
|
||||
if (queue->state == AMDGPU_USERQ_STATE_UNMAPPED) {
|
||||
r = userq_funcs->map(queue);
|
||||
if (r) {
|
||||
queue->state = AMDGPU_USERQ_STATE_HUNG;
|
||||
amdgpu_userq_detect_and_reset_queues(uq_mgr);
|
||||
return r;
|
||||
} else {
|
||||
queue->state = AMDGPU_USERQ_STATE_MAPPED;
|
||||
}
|
||||
}
|
||||
|
||||
return r;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void amdgpu_userq_wait_for_last_fence(struct amdgpu_usermode_queue *queue)
|
||||
|
|
@ -654,7 +646,6 @@ amdgpu_userq_destroy(struct amdgpu_userq_mgr *uq_mgr, struct amdgpu_usermode_que
|
|||
#if defined(CONFIG_DEBUG_FS)
|
||||
debugfs_remove_recursive(queue->debugfs_queue);
|
||||
#endif
|
||||
amdgpu_userq_detect_and_reset_queues(uq_mgr);
|
||||
r = amdgpu_userq_unmap_helper(queue);
|
||||
atomic_dec(&uq_mgr->userq_count[queue->queue_type]);
|
||||
amdgpu_userq_cleanup(queue);
|
||||
|
|
@ -1264,7 +1255,6 @@ amdgpu_userq_evict_all(struct amdgpu_userq_mgr *uq_mgr)
|
|||
unsigned long queue_id;
|
||||
int ret = 0, r;
|
||||
|
||||
amdgpu_userq_detect_and_reset_queues(uq_mgr);
|
||||
/* Try to unmap all the queues in this process ctx */
|
||||
xa_for_each(&uq_mgr->userq_xa, queue_id, queue) {
|
||||
r = amdgpu_userq_preempt_helper(queue);
|
||||
|
|
@ -1272,9 +1262,11 @@ amdgpu_userq_evict_all(struct amdgpu_userq_mgr *uq_mgr)
|
|||
ret = r;
|
||||
}
|
||||
|
||||
if (ret)
|
||||
if (ret) {
|
||||
drm_file_err(uq_mgr->file,
|
||||
"Couldn't unmap all the queues, eviction failed ret=%d\n", ret);
|
||||
amdgpu_userq_detect_and_reset_queues(uq_mgr);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
|
@ -1374,7 +1366,6 @@ int amdgpu_userq_suspend(struct amdgpu_device *adev)
|
|||
uqm = queue->userq_mgr;
|
||||
cancel_delayed_work_sync(&uqm->resume_work);
|
||||
guard(mutex)(&uqm->userq_mutex);
|
||||
amdgpu_userq_detect_and_reset_queues(uqm);
|
||||
if (adev->in_s0ix)
|
||||
r = amdgpu_userq_preempt_helper(queue);
|
||||
else
|
||||
|
|
@ -1433,7 +1424,6 @@ int amdgpu_userq_stop_sched_for_enforce_isolation(struct amdgpu_device *adev,
|
|||
if (((queue->queue_type == AMDGPU_HW_IP_GFX) ||
|
||||
(queue->queue_type == AMDGPU_HW_IP_COMPUTE)) &&
|
||||
(queue->xcp_id == idx)) {
|
||||
amdgpu_userq_detect_and_reset_queues(uqm);
|
||||
r = amdgpu_userq_preempt_helper(queue);
|
||||
if (r)
|
||||
ret = r;
|
||||
|
|
|
|||
Loading…
Reference in New Issue
Block a user