mirror of
https://github.com/torvalds/linux.git
synced 2026-05-24 15:12:13 +02:00
drm/amdkfd: Handle GPU reset and drain retry fault race
Only check and drain IH1 ring if CAM is not enabled. If GPU is under reset, don't access IH to drain retry fault. Signed-off-by: Philip Yang <Philip.Yang@amd.com> Reviewed-by: Harish Kasiviswanathan <Harish.Kasiviswanathan@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
This commit is contained in:
parent
e1c94109c7
commit
5b57c3c3f2
|
|
@ -33,6 +33,7 @@
|
|||
#include "amdgpu_hmm.h"
|
||||
#include "amdgpu.h"
|
||||
#include "amdgpu_xgmi.h"
|
||||
#include "amdgpu_reset.h"
|
||||
#include "kfd_priv.h"
|
||||
#include "kfd_svm.h"
|
||||
#include "kfd_migrate.h"
|
||||
|
|
@ -2369,6 +2370,9 @@ static void svm_range_drain_retry_fault(struct svm_range_list *svms)
|
|||
|
||||
pr_debug("drain retry fault gpu %d svms %p\n", i, svms);
|
||||
|
||||
if (!down_read_trylock(&pdd->dev->adev->reset_domain->sem))
|
||||
continue;
|
||||
|
||||
amdgpu_ih_wait_on_checkpoint_process_ts(pdd->dev->adev,
|
||||
pdd->dev->adev->irq.retry_cam_enabled ?
|
||||
&pdd->dev->adev->irq.ih :
|
||||
|
|
@ -2378,6 +2382,7 @@ static void svm_range_drain_retry_fault(struct svm_range_list *svms)
|
|||
amdgpu_ih_wait_on_checkpoint_process_ts(pdd->dev->adev,
|
||||
&pdd->dev->adev->irq.ih_soft);
|
||||
|
||||
up_read(&pdd->dev->adev->reset_domain->sem);
|
||||
|
||||
pr_debug("drain retry fault gpu %d svms 0x%p done\n", i, svms);
|
||||
}
|
||||
|
|
@ -2561,7 +2566,7 @@ svm_range_unmap_from_cpu(struct mm_struct *mm, struct svm_range *prange,
|
|||
adev = pdd->dev->adev;
|
||||
|
||||
/* Check and drain ih1 ring if cam not available */
|
||||
if (adev->irq.ih1.ring_size) {
|
||||
if (!adev->irq.retry_cam_enabled && adev->irq.ih1.ring_size) {
|
||||
ih = &adev->irq.ih1;
|
||||
checkpoint_wptr = amdgpu_ih_get_wptr(adev, ih);
|
||||
if (ih->rptr != checkpoint_wptr) {
|
||||
|
|
|
|||
Loading…
Reference in New Issue
Block a user