drm/amdgpu: don't call drm_sched_stop/start() in asic reset

We only want to stop the work queues, not mess with the
fences, etc.

v2: add the job back to the pending list.
v3: return the proper job status so scheduler adds the
    job back to the pending list

Acked-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
This commit is contained in:
Alex Deucher 2026-01-03 13:29:19 -05:00
parent d5e53ff42f
commit 01fb6e8568
2 changed files with 4 additions and 6 deletions

View File

@ -6304,7 +6304,7 @@ static void amdgpu_device_halt_activities(struct amdgpu_device *adev,
if (!amdgpu_ring_sched_ready(ring))
continue;
drm_sched_stop(&ring->sched, job ? &job->base : NULL);
drm_sched_wqueue_stop(&ring->sched);
if (need_emergency_restart)
amdgpu_job_stop_all_jobs_on_sched(&ring->sched);
@ -6388,7 +6388,7 @@ static int amdgpu_device_sched_resume(struct list_head *device_list,
if (!amdgpu_ring_sched_ready(ring))
continue;
drm_sched_start(&ring->sched, 0);
drm_sched_wqueue_start(&ring->sched);
}
if (!drm_drv_uses_atomic_modeset(adev_to_drm(tmp_adev)) && !job_signaled)

View File

@ -92,7 +92,6 @@ static enum drm_gpu_sched_stat amdgpu_job_timedout(struct drm_sched_job *s_job)
struct drm_wedge_task_info *info = NULL;
struct amdgpu_task_info *ti = NULL;
struct amdgpu_device *adev = ring->adev;
enum drm_gpu_sched_stat status = DRM_GPU_SCHED_STAT_RESET;
int idx, r;
if (!drm_dev_enter(adev_to_drm(adev), &idx)) {
@ -147,8 +146,6 @@ static enum drm_gpu_sched_stat amdgpu_job_timedout(struct drm_sched_job *s_job)
ring->sched.name);
drm_dev_wedged_event(adev_to_drm(adev),
DRM_WEDGE_RECOVERY_NONE, info);
/* This is needed to add the job back to the pending list */
status = DRM_GPU_SCHED_STAT_NO_HANG;
goto exit;
}
dev_err(adev->dev, "Ring %s reset failed\n", ring->sched.name);
@ -184,7 +181,8 @@ static enum drm_gpu_sched_stat amdgpu_job_timedout(struct drm_sched_job *s_job)
exit:
amdgpu_vm_put_task_info(ti);
drm_dev_exit(idx);
return status;
/* This is needed to add the job back to the pending list */
return DRM_GPU_SCHED_STAT_NO_HANG;
}
int amdgpu_job_alloc(struct amdgpu_device *adev, struct amdgpu_vm *vm,