diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c index 216fc0ec2bb7..4ca48dd1cfd8 100644 --- a/drivers/gpu/drm/xe/xe_migrate.c +++ b/drivers/gpu/drm/xe/xe_migrate.c @@ -57,6 +57,13 @@ struct xe_migrate { u64 usm_batch_base_ofs; /** @cleared_mem_ofs: VM offset of @cleared_bo. */ u64 cleared_mem_ofs; + /** @large_page_copy_ofs: VM offset of 2M pages used for large copies */ + u64 large_page_copy_ofs; + /** + * @large_page_copy_pdes: BO offset to writeout 2M pages (PDEs) used for + * large copies + */ + u64 large_page_copy_pdes; /** * @fence: dma-fence representing the last migration job batch. * Protected by @job_mutex. @@ -288,6 +295,12 @@ static int xe_migrate_prepare_vm(struct xe_tile *tile, struct xe_migrate *m, (i + 1) * 8, u64, entry); } + /* Reserve 2M PDEs */ + level = 1; + m->large_page_copy_ofs = NUM_PT_SLOTS << xe_pt_shift(level); + m->large_page_copy_pdes = map_ofs + XE_PAGE_SIZE * level + + NUM_PT_SLOTS * 8; + /* Set up a 1GiB NULL mapping at 255GiB offset. */ level = 2; xe_map_wr(xe, &bo->vmap, map_ofs + XE_PAGE_SIZE * level + 255 * 8, u64, @@ -1778,10 +1791,10 @@ static u32 pte_update_cmd_size(u64 size) static void build_pt_update_batch_sram(struct xe_migrate *m, struct xe_bb *bb, u32 pt_offset, struct drm_pagemap_addr *sram_addr, - u32 size) + u32 size, int level) { u16 pat_index = tile_to_xe(m->tile)->pat.idx[XE_CACHE_WB]; - u64 gpu_page_size = 0x1ull << xe_pt_shift(0); + u64 gpu_page_size = 0x1ull << xe_pt_shift(level); u32 ptes; int i = 0; @@ -1808,7 +1821,7 @@ static void build_pt_update_batch_sram(struct xe_migrate *m, again: pte = m->q->vm->pt_ops->pte_encode_addr(m->tile->xe, addr, pat_index, - 0, false, 0); + level, false, 0); bb->cs[bb->len++] = lower_32_bits(pte); bb->cs[bb->len++] = upper_32_bits(pte); @@ -1826,6 +1839,19 @@ static void build_pt_update_batch_sram(struct xe_migrate *m, } } +static bool xe_migrate_vram_use_pde(struct drm_pagemap_addr *sram_addr, + unsigned long size) +{ + u32 large_size = (0x1 << xe_pt_shift(1)); + unsigned long i, incr = large_size / PAGE_SIZE; + + for (i = 0; i < DIV_ROUND_UP(size, PAGE_SIZE); i += incr) + if (PAGE_SIZE << sram_addr[i].order != large_size) + return false; + + return true; +} + enum xe_migrate_copy_dir { XE_MIGRATE_COPY_TO_VRAM, XE_MIGRATE_COPY_TO_SRAM, @@ -1855,6 +1881,7 @@ static struct dma_fence *xe_migrate_vram(struct xe_migrate *m, PAGE_SIZE : 4; int err; unsigned long i, j; + bool use_pde = xe_migrate_vram_use_pde(sram_addr, len + sram_offset); if (drm_WARN_ON(&xe->drm, (len & XE_CACHELINE_MASK) || (sram_offset | vram_addr) & XE_CACHELINE_MASK)) @@ -1879,7 +1906,7 @@ static struct dma_fence *xe_migrate_vram(struct xe_migrate *m, * struct drm_pagemap_addr. Ensure this is the case even with higher * orders. */ - for (i = 0; i < npages;) { + for (i = 0; !use_pde && i < npages;) { unsigned int order = sram_addr[i].order; for (j = 1; j < NR_PAGES(order) && i + j < npages; j++) @@ -1889,16 +1916,26 @@ static struct dma_fence *xe_migrate_vram(struct xe_migrate *m, i += NR_PAGES(order); } - build_pt_update_batch_sram(m, bb, pt_slot * XE_PAGE_SIZE, - sram_addr, len + sram_offset); + if (use_pde) + build_pt_update_batch_sram(m, bb, m->large_page_copy_pdes, + sram_addr, len + sram_offset, 1); + else + build_pt_update_batch_sram(m, bb, pt_slot * XE_PAGE_SIZE, + sram_addr, len + sram_offset, 0); if (dir == XE_MIGRATE_COPY_TO_VRAM) { - src_L0_ofs = xe_migrate_vm_addr(pt_slot, 0) + sram_offset; + if (use_pde) + src_L0_ofs = m->large_page_copy_ofs + sram_offset; + else + src_L0_ofs = xe_migrate_vm_addr(pt_slot, 0) + sram_offset; dst_L0_ofs = xe_migrate_vram_ofs(xe, vram_addr, false); } else { src_L0_ofs = xe_migrate_vram_ofs(xe, vram_addr, false); - dst_L0_ofs = xe_migrate_vm_addr(pt_slot, 0) + sram_offset; + if (use_pde) + dst_L0_ofs = m->large_page_copy_ofs + sram_offset; + else + dst_L0_ofs = xe_migrate_vm_addr(pt_slot, 0) + sram_offset; } bb->cs[bb->len++] = MI_BATCH_BUFFER_END;