diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c index 5179fa008626..a0940db1cd36 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c @@ -742,7 +742,7 @@ void amdgpu_gmc_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid, * translation. Avoid this by doing the invalidation from the SDMA * itself at least for GART. */ - mutex_lock(&adev->mman.gtt_window_lock); + mutex_lock(&adev->mman.default_entity.lock); r = amdgpu_job_alloc_with_ib(ring->adev, &adev->mman.default_entity.base, AMDGPU_FENCE_OWNER_UNDEFINED, 16 * 4, AMDGPU_IB_POOL_IMMEDIATE, @@ -755,7 +755,7 @@ void amdgpu_gmc_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid, job->ibs->ptr[job->ibs->length_dw++] = ring->funcs->nop; amdgpu_ring_pad_ib(ring, &job->ibs[0]); fence = amdgpu_job_submit(job); - mutex_unlock(&adev->mman.gtt_window_lock); + mutex_unlock(&adev->mman.default_entity.lock); dma_fence_wait(fence, false); dma_fence_put(fence); @@ -763,7 +763,7 @@ void amdgpu_gmc_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid, return; error_alloc: - mutex_unlock(&adev->mman.gtt_window_lock); + mutex_unlock(&adev->mman.default_entity.lock); dev_err(adev->dev, "Error flushing GPU TLB using the SDMA (%d)!\n", r); } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index eeaa56c8d129..3b2c0ae67ce4 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -228,9 +228,7 @@ static int amdgpu_ttm_map_buffer(struct amdgpu_ttm_buffer_entity *entity, *size = min(*size, (uint64_t)num_pages * PAGE_SIZE - offset); - *addr = adev->gmc.gart_start; - *addr += (u64)window * AMDGPU_GTT_MAX_TRANSFER_SIZE * - AMDGPU_GPU_PAGE_SIZE; + *addr = amdgpu_compute_gart_address(&adev->gmc, entity, window); *addr += offset; num_dw = ALIGN(adev->mman.buffer_funcs->copy_num_dw, 8); @@ -248,7 +246,7 @@ static int amdgpu_ttm_map_buffer(struct amdgpu_ttm_buffer_entity *entity, src_addr += job->ibs[0].gpu_addr; dst_addr = amdgpu_bo_gpu_offset(adev->gart.bo); - dst_addr += window * AMDGPU_GTT_MAX_TRANSFER_SIZE * 8; + dst_addr += (entity->gart_window_offs[window] >> AMDGPU_GPU_PAGE_SHIFT) * 8; amdgpu_emit_copy_buffer(adev, &job->ibs[0], src_addr, dst_addr, num_bytes, 0); @@ -313,7 +311,7 @@ static int amdgpu_ttm_copy_mem_to_mem(struct amdgpu_device *adev, amdgpu_res_first(src->mem, src->offset, size, &src_mm); amdgpu_res_first(dst->mem, dst->offset, size, &dst_mm); - mutex_lock(&adev->mman.gtt_window_lock); + mutex_lock(&entity->lock); while (src_mm.remaining) { uint64_t from, to, cur_size, tiling_flags; uint32_t num_type, data_format, max_com, write_compress_disable; @@ -368,7 +366,7 @@ static int amdgpu_ttm_copy_mem_to_mem(struct amdgpu_device *adev, amdgpu_res_next(&dst_mm, cur_size); } error: - mutex_unlock(&adev->mman.gtt_window_lock); + mutex_unlock(&entity->lock); *f = fence; return r; } @@ -1580,7 +1578,7 @@ static int amdgpu_ttm_access_memory_sdma(struct ttm_buffer_object *bo, if (r) goto out; - mutex_lock(&adev->mman.gtt_window_lock); + mutex_lock(&adev->mman.default_entity.lock); amdgpu_res_first(abo->tbo.resource, offset, len, &src_mm); src_addr = amdgpu_ttm_domain_start(adev, bo->resource->mem_type) + src_mm.start; @@ -1592,7 +1590,7 @@ static int amdgpu_ttm_access_memory_sdma(struct ttm_buffer_object *bo, PAGE_SIZE, 0); fence = amdgpu_ttm_job_submit(adev, job, num_dw); - mutex_unlock(&adev->mman.gtt_window_lock); + mutex_unlock(&adev->mman.default_entity.lock); if (!dma_fence_wait_timeout(fence, false, adev->sdma_timeout)) r = -ETIMEDOUT; @@ -2013,6 +2011,27 @@ static void amdgpu_ttm_free_mmio_remap_bo(struct amdgpu_device *adev) adev->rmmio_remap.bo = NULL; } +static int amdgpu_ttm_buffer_entity_init(struct amdgpu_ttm_buffer_entity *entity, + int starting_gart_window, + u32 num_gart_windows) +{ + int i; + + mutex_init(&entity->lock); + + if (ARRAY_SIZE(entity->gart_window_offs) < num_gart_windows) + return starting_gart_window; + + for (i = 0; i < num_gart_windows; i++) { + entity->gart_window_offs[i] = + (u64)starting_gart_window * AMDGPU_GTT_MAX_TRANSFER_SIZE * + AMDGPU_GPU_PAGE_SIZE; + starting_gart_window++; + } + + return starting_gart_window; +} + /* * amdgpu_ttm_init - Init the memory management (ttm) as well as various * gtt/vram related fields. @@ -2027,8 +2046,6 @@ int amdgpu_ttm_init(struct amdgpu_device *adev) uint64_t gtt_size; int r; - mutex_init(&adev->mman.gtt_window_lock); - dma_set_max_seg_size(adev->dev, UINT_MAX); /* No others user of address space so set it to 0 */ r = ttm_device_init(&adev->mman.bdev, &amdgpu_bo_driver, adev->dev, @@ -2302,6 +2319,7 @@ void amdgpu_ttm_fini(struct amdgpu_device *adev) void amdgpu_ttm_set_buffer_funcs_status(struct amdgpu_device *adev, bool enable) { struct ttm_resource_manager *man = ttm_manager_type(&adev->mman.bdev, TTM_PL_VRAM); + u32 used_windows; uint64_t size; int r; @@ -2345,6 +2363,13 @@ void amdgpu_ttm_set_buffer_funcs_status(struct amdgpu_device *adev, bool enable) drm_sched_entity_destroy(&adev->mman.clear_entity.base); goto error_free_entity; } + + /* Statically assign GART windows to each entity. */ + used_windows = amdgpu_ttm_buffer_entity_init(&adev->mman.default_entity, 0, 0); + used_windows = amdgpu_ttm_buffer_entity_init(&adev->mman.move_entity, + used_windows, 2); + used_windows = amdgpu_ttm_buffer_entity_init(&adev->mman.clear_entity, + used_windows, 1); } else { drm_sched_entity_destroy(&adev->mman.default_entity.base); drm_sched_entity_destroy(&adev->mman.clear_entity.base); @@ -2503,6 +2528,7 @@ int amdgpu_ttm_clear_buffer(struct amdgpu_bo *bo, struct dma_fence **fence) { struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); + struct amdgpu_ttm_buffer_entity *entity; struct amdgpu_res_cursor cursor; u64 addr; int r = 0; @@ -2513,11 +2539,12 @@ int amdgpu_ttm_clear_buffer(struct amdgpu_bo *bo, if (!fence) return -EINVAL; + entity = &adev->mman.clear_entity; *fence = dma_fence_get_stub(); amdgpu_res_first(bo->tbo.resource, 0, amdgpu_bo_size(bo), &cursor); - mutex_lock(&adev->mman.gtt_window_lock); + mutex_lock(&entity->lock); while (cursor.remaining) { struct dma_fence *next = NULL; u64 size; @@ -2530,13 +2557,12 @@ int amdgpu_ttm_clear_buffer(struct amdgpu_bo *bo, /* Never clear more than 256MiB at once to avoid timeouts */ size = min(cursor.size, 256ULL << 20); - r = amdgpu_ttm_map_buffer(&adev->mman.clear_entity, - &bo->tbo, bo->tbo.resource, &cursor, - 1, false, &size, &addr); + r = amdgpu_ttm_map_buffer(entity, &bo->tbo, bo->tbo.resource, &cursor, + 0, false, &size, &addr); if (r) goto err; - r = amdgpu_ttm_fill_mem(adev, &adev->mman.clear_entity, 0, addr, size, resv, + r = amdgpu_ttm_fill_mem(adev, entity, 0, addr, size, resv, &next, true, AMDGPU_KERNEL_JOB_ID_TTM_CLEAR_BUFFER); if (r) @@ -2548,7 +2574,7 @@ int amdgpu_ttm_clear_buffer(struct amdgpu_bo *bo, amdgpu_res_next(&cursor, size); } err: - mutex_unlock(&adev->mman.gtt_window_lock); + mutex_unlock(&entity->lock); return r; } @@ -2573,7 +2599,7 @@ int amdgpu_fill_buffer(struct amdgpu_ttm_buffer_entity *entity, amdgpu_res_first(bo->tbo.resource, 0, amdgpu_bo_size(bo), &dst); - mutex_lock(&adev->mman.gtt_window_lock); + mutex_lock(&entity->lock); while (dst.remaining) { struct dma_fence *next; uint64_t cur_size, to; @@ -2582,7 +2608,7 @@ int amdgpu_fill_buffer(struct amdgpu_ttm_buffer_entity *entity, cur_size = min(dst.size, 256ULL << 20); r = amdgpu_ttm_map_buffer(entity, &bo->tbo, bo->tbo.resource, &dst, - 1, false, &cur_size, &to); + 0, false, &cur_size, &to); if (r) goto error; @@ -2598,7 +2624,7 @@ int amdgpu_fill_buffer(struct amdgpu_ttm_buffer_entity *entity, amdgpu_res_next(&dst, cur_size); } error: - mutex_unlock(&adev->mman.gtt_window_lock); + mutex_unlock(&entity->lock); if (f) *f = dma_fence_get(fence); dma_fence_put(fence); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h index 143201ecea3f..871388b86503 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h @@ -29,6 +29,7 @@ #include #include "amdgpu_vram_mgr.h" #include "amdgpu_hmm.h" +#include "amdgpu_gmc.h" #define AMDGPU_PL_GDS (TTM_PL_PRIV + 0) #define AMDGPU_PL_GWS (TTM_PL_PRIV + 1) @@ -39,7 +40,7 @@ #define __AMDGPU_PL_NUM (TTM_PL_PRIV + 6) #define AMDGPU_GTT_MAX_TRANSFER_SIZE 512 -#define AMDGPU_GTT_NUM_TRANSFER_WINDOWS 2 +#define AMDGPU_GTT_NUM_TRANSFER_WINDOWS 3 extern const struct attribute_group amdgpu_vram_mgr_attr_group; extern const struct attribute_group amdgpu_gtt_mgr_attr_group; @@ -54,6 +55,8 @@ struct amdgpu_gtt_mgr { struct amdgpu_ttm_buffer_entity { struct drm_sched_entity base; + struct mutex lock; + u64 gart_window_offs[2]; }; struct amdgpu_mman { @@ -67,8 +70,7 @@ struct amdgpu_mman { struct amdgpu_ring *buffer_funcs_ring; bool buffer_funcs_enabled; - struct mutex gtt_window_lock; - + /* @default_entity: for workarounds, has no gart windows */ struct amdgpu_ttm_buffer_entity default_entity; struct amdgpu_ttm_buffer_entity clear_entity; struct amdgpu_ttm_buffer_entity move_entity; @@ -205,6 +207,19 @@ static inline int amdgpu_ttm_tt_get_user_pages(struct amdgpu_bo *bo, } #endif +/** + * amdgpu_compute_gart_address() - Returns GART address of an entity's window + * @gmc: The &struct amdgpu_gmc instance to use + * @entity: The &struct amdgpu_ttm_buffer_entity owning the GART window + * @index: The window to use (must be 0 or 1) + */ +static inline u64 amdgpu_compute_gart_address(struct amdgpu_gmc *gmc, + struct amdgpu_ttm_buffer_entity *entity, + int index) +{ + return gmc->gart_start + entity->gart_window_offs[index]; +} + void amdgpu_ttm_tt_set_user_pages(struct ttm_tt *ttm, struct amdgpu_hmm_range *range); int amdgpu_ttm_tt_get_userptr(const struct ttm_buffer_object *tbo, uint64_t *user_addr); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c index b021f1e56114..10bc81ce37cb 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c @@ -59,8 +59,7 @@ svm_migrate_gart_map(struct amdgpu_ring *ring, void *cpu_addr; int r; - /* use gart window 0 */ - *gart_addr = adev->gmc.gart_start; + *gart_addr = amdgpu_compute_gart_address(&adev->gmc, entity, 0); num_dw = ALIGN(adev->mman.buffer_funcs->copy_num_dw, 8); num_bytes = npages * 8 * AMDGPU_GPU_PAGES_IN_CPU_PAGE; @@ -78,6 +77,7 @@ svm_migrate_gart_map(struct amdgpu_ring *ring, src_addr += job->ibs[0].gpu_addr; dst_addr = amdgpu_bo_gpu_offset(adev->gart.bo); + dst_addr += (entity->gart_window_offs[0] >> AMDGPU_GPU_PAGE_SHIFT) * 8; amdgpu_emit_copy_buffer(adev, &job->ibs[0], src_addr, dst_addr, num_bytes, 0); @@ -116,7 +116,7 @@ svm_migrate_gart_map(struct amdgpu_ring *ring, * multiple GTT_MAX_PAGES transfer, all sdma operations are serialized, wait for * the last sdma finish fence which is returned to check copy memory is done. * - * Context: Process context, takes and releases gtt_window_lock + * Context: Process context * * Return: * 0 - OK, otherwise error code @@ -136,9 +136,9 @@ svm_migrate_copy_memory_gart(struct amdgpu_device *adev, dma_addr_t *sys, u64 size; int r; - entity = &adev->mman.default_entity; + entity = &adev->mman.move_entity; - mutex_lock(&adev->mman.gtt_window_lock); + mutex_lock(&entity->lock); while (npages) { size = min(GTT_MAX_PAGES, npages); @@ -175,7 +175,7 @@ svm_migrate_copy_memory_gart(struct amdgpu_device *adev, dma_addr_t *sys, } out_unlock: - mutex_unlock(&adev->mman.gtt_window_lock); + mutex_unlock(&entity->lock); return r; }