mirror of
https://github.com/torvalds/linux.git
synced 2026-05-24 07:03:03 +02:00
drm/amdkfd: Remove arbitrary timeout for hmm_range_fault
On system with khugepaged enabled and user cases with THP buffer, the hmm_range_fault may takes > 15 seconds to return -EBUSY, the arbitrary timeout value is not accurate, cause memory allocation failure. Remove the arbitrary timeout value, return EAGAIN to application if hmm_range_fault return EBUSY, then userspace libdrm and Thunk will call ioctl again. Change EAGAIN to debug message as this is not error. Signed-off-by: Philip Yang <Philip.Yang@amd.com> Reviewed-by: Felix Kuehling <felix.kuehling@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
This commit is contained in:
parent
10f624ef23
commit
9095e55440
|
|
@ -1088,7 +1088,10 @@ static int init_user_pages(struct kgd_mem *mem, uint64_t user_addr,
|
|||
|
||||
ret = amdgpu_ttm_tt_get_user_pages(bo, bo->tbo.ttm->pages, &range);
|
||||
if (ret) {
|
||||
pr_err("%s: Failed to get user pages: %d\n", __func__, ret);
|
||||
if (ret == -EAGAIN)
|
||||
pr_debug("Failed to get user pages, try again\n");
|
||||
else
|
||||
pr_err("%s: Failed to get user pages: %d\n", __func__, ret);
|
||||
goto unregister_out;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -202,20 +202,12 @@ int amdgpu_hmm_range_get_pages(struct mmu_interval_notifier *notifier,
|
|||
pr_debug("hmm range: start = 0x%lx, end = 0x%lx",
|
||||
hmm_range->start, hmm_range->end);
|
||||
|
||||
/* Assuming 64MB takes maximum 1 second to fault page address */
|
||||
timeout = max((hmm_range->end - hmm_range->start) >> 26, 1UL);
|
||||
timeout *= HMM_RANGE_DEFAULT_TIMEOUT;
|
||||
timeout = jiffies + msecs_to_jiffies(timeout);
|
||||
timeout = jiffies + msecs_to_jiffies(HMM_RANGE_DEFAULT_TIMEOUT);
|
||||
|
||||
retry:
|
||||
hmm_range->notifier_seq = mmu_interval_read_begin(notifier);
|
||||
r = hmm_range_fault(hmm_range);
|
||||
if (unlikely(r)) {
|
||||
schedule();
|
||||
/*
|
||||
* FIXME: This timeout should encompass the retry from
|
||||
* mmu_interval_read_retry() as well.
|
||||
*/
|
||||
if (r == -EBUSY && !time_after(jiffies, timeout))
|
||||
goto retry;
|
||||
goto out_free_pfns;
|
||||
|
|
@ -247,6 +239,8 @@ int amdgpu_hmm_range_get_pages(struct mmu_interval_notifier *notifier,
|
|||
out_free_range:
|
||||
kfree(hmm_range);
|
||||
|
||||
if (r == -EBUSY)
|
||||
r = -EAGAIN;
|
||||
return r;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -1690,11 +1690,8 @@ static int svm_range_validate_and_map(struct mm_struct *mm,
|
|||
readonly, owner, NULL,
|
||||
&hmm_range);
|
||||
WRITE_ONCE(p->svms.faulting_task, NULL);
|
||||
if (r) {
|
||||
if (r)
|
||||
pr_debug("failed %d to get svm range pages\n", r);
|
||||
if (r == -EBUSY)
|
||||
r = -EAGAIN;
|
||||
}
|
||||
} else {
|
||||
r = -EFAULT;
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in New Issue
Block a user