drm/xe/migrate: make MI_TLB_INVALIDATE conditional

When clearing VRAM we should be able to skip invalidating the TLBs if we
are only using the identity map to access VRAM (which is the common
case), since no modifications are made to PTEs on the fly. Also since we
use huge 1G entries within the identity map, there should be a pretty
decent chance that the next packet(s) (if also clears) can avoid a tree
walk if we don't shoot down the TLBs, like if we have to process a long
stream of clears.

For normal moves/copies, we usually always end up with the src or dst
being system memory, meaning we can't only rely on the identity map and
will also need to emit PTEs and so will always require a TLB flush.

v2:
  - Update commit to explain the situation for normal copies (Matt B)
  - Rebase on latest changes

Signed-off-by: Matthew Auld <matthew.auld@intel.com>
Cc: Himal Prasad Ghimiray <himal.prasad.ghimiray@intel.com>
Cc: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Cc: Matthew Brost <matthew.brost@intel.com>
Reviewed-by: Matthew Brost <matthew.brost@intel.com>
Link: https://lore.kernel.org/r/20250808110452.467513-2-matthew.auld@intel.com
This commit is contained in:
Matthew Auld 2025-08-08 12:04:53 +01:00
parent db16f9d90c
commit 81a45cb7ea
2 changed files with 16 additions and 12 deletions

View File

@ -904,7 +904,7 @@ struct dma_fence *xe_migrate_copy(struct xe_migrate *m,
goto err;
}
xe_sched_job_add_migrate_flush(job, flush_flags);
xe_sched_job_add_migrate_flush(job, flush_flags | MI_INVALIDATE_TLB);
if (!fence) {
err = xe_sched_job_add_deps(job, src_bo->ttm.base.resv,
DMA_RESV_USAGE_BOOKKEEP);
@ -1288,11 +1288,13 @@ struct dma_fence *xe_migrate_clear(struct xe_migrate *m,
size -= clear_L0;
/* Preemption is enabled again by the ring ops. */
if (clear_vram && xe_migrate_allow_identity(clear_L0, &src_it))
if (clear_vram && xe_migrate_allow_identity(clear_L0, &src_it)) {
xe_res_next(&src_it, clear_L0);
else
emit_pte(m, bb, clear_L0_pt, clear_vram, clear_only_system_ccs,
&src_it, clear_L0, dst);
} else {
emit_pte(m, bb, clear_L0_pt, clear_vram,
clear_only_system_ccs, &src_it, clear_L0, dst);
flush_flags |= MI_INVALIDATE_TLB;
}
bb->cs[bb->len++] = MI_BATCH_BUFFER_END;
update_idx = bb->len;
@ -1303,7 +1305,7 @@ struct dma_fence *xe_migrate_clear(struct xe_migrate *m,
if (xe_migrate_needs_ccs_emit(xe)) {
emit_copy_ccs(gt, bb, clear_L0_ofs, true,
m->cleared_mem_ofs, false, clear_L0);
flush_flags = MI_FLUSH_DW_CCS;
flush_flags |= MI_FLUSH_DW_CCS;
}
job = xe_bb_create_migration_job(m->q, bb,
@ -1638,6 +1640,8 @@ __xe_migrate_update_pgtables(struct xe_migrate *m,
goto err_sa;
}
xe_sched_job_add_migrate_flush(job, MI_INVALIDATE_TLB);
if (ops->pre_commit) {
pt_update->job = job;
err = ops->pre_commit(pt_update);
@ -1863,7 +1867,7 @@ static struct dma_fence *xe_migrate_vram(struct xe_migrate *m,
goto err;
}
xe_sched_job_add_migrate_flush(job, 0);
xe_sched_job_add_migrate_flush(job, MI_INVALIDATE_TLB);
mutex_lock(&m->job_mutex);
xe_sched_job_arm(job);

View File

@ -110,10 +110,10 @@ static int emit_bb_start(u64 batch_addr, u32 ppgtt_flag, u32 *dw, int i)
return i;
}
static int emit_flush_invalidate(u32 addr, u32 val, u32 *dw, int i)
static int emit_flush_invalidate(u32 addr, u32 val, u32 flush_flags, u32 *dw, int i)
{
dw[i++] = MI_FLUSH_DW | MI_INVALIDATE_TLB | MI_FLUSH_DW_OP_STOREDW |
MI_FLUSH_IMM_DW;
dw[i++] = MI_FLUSH_DW | MI_FLUSH_DW_OP_STOREDW |
MI_FLUSH_IMM_DW | (flush_flags & MI_INVALIDATE_TLB) ?: 0;
dw[i++] = addr | MI_FLUSH_DW_USE_GTT;
dw[i++] = 0;
@ -410,13 +410,13 @@ static void emit_migration_job_gen12(struct xe_sched_job *job,
i = emit_bb_start(job->ptrs[0].batch_addr, BIT(8), dw, i);
dw[i++] = preparser_disable(true);
i = emit_flush_invalidate(saddr, seqno, dw, i);
i = emit_flush_invalidate(saddr, seqno, job->migrate_flush_flags, dw, i);
dw[i++] = preparser_disable(false);
i = emit_bb_start(job->ptrs[1].batch_addr, BIT(8), dw, i);
i = emit_flush_imm_ggtt(xe_lrc_seqno_ggtt_addr(lrc), seqno,
MI_INVALIDATE_TLB | job->migrate_flush_flags,
job->migrate_flush_flags,
dw, i);
i = emit_user_interrupt(dw, i);