From 4e65d104129e8be001d561410f6402135f6f8a45 Mon Sep 17 00:00:00 2001 From: John Harrison Date: Fri, 13 Jun 2025 20:02:22 -0700 Subject: [PATCH 01/10] drm/i915/guc: Enable CT_DEAD output in regular debug builds There is a sporadic failure to enable CTs ocurring in CI on one specific machine that can't be reproduced locally. The driver already supports dumping out a whole bunch of GuC related debug info on such a failure but only when the verbose GuC debug config option is enabled. It would be preferable to not enable all the verbose debug output. So just bump the CT_DEAD code to regular I915 debug level rather than GUC debug level, at least temporarily for CI. To prevent excessive spam in other parts of CI, also add a check against doing a CT_DEAD dump during an error injection test. No point in dumping large amounts of 'why did this fail' info when the fail is deliberately induced. v2: Revert accidentally enabling some other verbose debug output. Signed-off-by: John Harrison Reviewed-by: Daniele Ceraolo Spurio Signed-off-by: Daniele Ceraolo Spurio Link: https://lore.kernel.org/r/20250614030222.105601-1-John.C.Harrison@Intel.com --- drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c | 11 +++++++---- drivers/gpu/drm/i915/gt/uc/intel_guc_ct.h | 2 +- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c index 0d5197c0824a..380a11c92d63 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c @@ -13,7 +13,7 @@ #include "intel_guc_ct.h" #include "intel_guc_print.h" -#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GUC) +#if IS_ENABLED(CONFIG_DRM_I915_DEBUG) enum { CT_DEAD_ALIVE = 0, CT_DEAD_SETUP, @@ -144,7 +144,7 @@ void intel_guc_ct_init_early(struct intel_guc_ct *ct) spin_lock_init(&ct->requests.lock); INIT_LIST_HEAD(&ct->requests.pending); INIT_LIST_HEAD(&ct->requests.incoming); -#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GUC) +#if IS_ENABLED(CONFIG_DRM_I915_DEBUG) INIT_WORK(&ct->dead_ct_worker, ct_dead_ct_worker_func); #endif INIT_WORK(&ct->requests.worker, ct_incoming_request_worker_func); @@ -373,7 +373,7 @@ int intel_guc_ct_enable(struct intel_guc_ct *ct) ct->enabled = true; ct->stall_time = KTIME_MAX; -#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GUC) +#if IS_ENABLED(CONFIG_DRM_I915_DEBUG) ct->dead_ct_reported = false; ct->dead_ct_reason = CT_DEAD_ALIVE; #endif @@ -1377,7 +1377,7 @@ void intel_guc_ct_print_info(struct intel_guc_ct *ct, ct->ctbs.recv.desc->tail); } -#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GUC) +#if IS_ENABLED(CONFIG_DRM_I915_DEBUG) static void ct_dead_ct_worker_func(struct work_struct *w) { struct intel_guc_ct *ct = container_of(w, struct intel_guc_ct, dead_ct_worker); @@ -1386,6 +1386,9 @@ static void ct_dead_ct_worker_func(struct work_struct *w) if (ct->dead_ct_reported) return; + if (i915_error_injected()) + return; + ct->dead_ct_reported = true; guc_info(guc, "CTB is dead - reason=0x%X\n", ct->dead_ct_reason); diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.h index 2c4bb9a941be..e9a6ec4e6d38 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.h @@ -97,7 +97,7 @@ struct intel_guc_ct { /** @stall_time: time of first time a CTB submission is stalled */ ktime_t stall_time; -#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GUC) +#if IS_ENABLED(CONFIG_DRM_I915_DEBUG) int dead_ct_reason; bool dead_ct_reported; struct work_struct dead_ct_worker; From bf91bac3ef027852b3e7698af6a00e4df124072c Mon Sep 17 00:00:00 2001 From: Sebastian Brzezinka Date: Fri, 18 Jul 2025 10:28:04 +0000 Subject: [PATCH 02/10] drm/i915: Add braces around the else block in clflush_write32() According to the kernel coding style, if only one branch of a conditional statement is a single statement, braces should still be used in both branches. Signed-off-by: Sebastian Brzezinka Reviewed-by: Krzysztof Karas Reviewed-by: Andi Shyti Signed-off-by: Andi Shyti Link: https://lore.kernel.org/r/20250718102752.684975-2-sebastian.brzezinka@intel.com --- drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c index ca7e9216934a..0801d4a140e3 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c @@ -1382,8 +1382,9 @@ static void clflush_write32(u32 *addr, u32 value, unsigned int flushes) */ if (flushes & CLFLUSH_AFTER) drm_clflush_virt_range(addr, sizeof(*addr)); - } else + } else { *addr = value; + } } static u64 From c8bdf3165fccadac12d670e94ea4a9a893a5a813 Mon Sep 17 00:00:00 2001 From: Sebastian Brzezinka Date: Fri, 18 Jul 2025 10:28:19 +0000 Subject: [PATCH 03/10] drm/i915: Replace empty conditional with continue in eb_relocate_vma() Simplifies the control flow by replacing an empty `if (likely(offset == 0))` block with a `continue` statement. This improves readability and avoids unnecessary nesting. Signed-off-by: Sebastian Brzezinka Reviewed-by: Andi Shyti Signed-off-by: Andi Shyti Link: https://lore.kernel.org/r/20250718102752.684975-4-sebastian.brzezinka@intel.com --- .../gpu/drm/i915/gem/i915_gem_execbuffer.c | 54 +++++++++---------- 1 file changed, 27 insertions(+), 27 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c index 0801d4a140e3..f243f8a5215d 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c @@ -1568,36 +1568,36 @@ static int eb_relocate_vma(struct i915_execbuffer *eb, struct eb_vma *ev) do { u64 offset = eb_relocate_entry(eb, ev, r); - if (likely(offset == 0)) { - } else if ((s64)offset < 0) { + if (likely(offset == 0)) + continue; + + if ((s64)offset < 0) { remain = (int)offset; goto out; - } else { - /* - * Note that reporting an error now - * leaves everything in an inconsistent - * state as we have *already* changed - * the relocation value inside the - * object. As we have not changed the - * reloc.presumed_offset or will not - * change the execobject.offset, on the - * call we may not rewrite the value - * inside the object, leaving it - * dangling and causing a GPU hang. Unless - * userspace dynamically rebuilds the - * relocations on each execbuf rather than - * presume a static tree. - * - * We did previously check if the relocations - * were writable (access_ok), an error now - * would be a strange race with mprotect, - * having already demonstrated that we - * can read from this userspace address. - */ - offset = gen8_canonical_addr(offset & ~UPDATE); - __put_user(offset, - &urelocs[r - stack].presumed_offset); } + /* + * Note that reporting an error now + * leaves everything in an inconsistent + * state as we have *already* changed + * the relocation value inside the + * object. As we have not changed the + * reloc.presumed_offset or will not + * change the execobject.offset, on the + * call we may not rewrite the value + * inside the object, leaving it + * dangling and causing a GPU hang. Unless + * userspace dynamically rebuilds the + * relocations on each execbuf rather than + * presume a static tree. + * + * We did previously check if the relocations + * were writable (access_ok), an error now + * would be a strange race with mprotect, + * having already demonstrated that we + * can read from this userspace address. + */ + offset = gen8_canonical_addr(offset & ~UPDATE); + __put_user(offset, &urelocs[r - stack].presumed_offset); } while (r++, --count); urelocs += ARRAY_SIZE(stack); } while (remain); From c4d0f59ded1efbf0004dfdfffb9050caba234993 Mon Sep 17 00:00:00 2001 From: Krzysztof Karas Date: Wed, 30 Jul 2025 07:39:23 +0000 Subject: [PATCH 04/10] drm/i915/selftests: Do not overwrite error code after intel_context_migrate_clear() call Currently this function's error code is stored in err variable, which, if a i915_request is present, will be immediately overwritten by return from dma_resv_reserve_fences(). Call DMA functions only if intel_context_migrate_clear() succeeded. Suggested-by: Matthew Auld Signed-off-by: Krzysztof Karas Reviewed-by: Sebastian Brzezinka Reviewed-by: Andi Shyti Signed-off-by: Andi Shyti Link: https://lore.kernel.org/r/c0348e052bcb2870acdc15d60f5389fbdf4ef886.1753859971.git.krzysztof.karas@intel.com --- drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c index 9c3f17e51885..c94b71a963b2 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c @@ -1228,7 +1228,7 @@ static int __igt_mmap_migrate(struct intel_memory_region **placements, i915_gem_object_is_lmem(obj), expand32(POISON_INUSE), &rq); i915_gem_object_unpin_pages(obj); - if (rq) { + if (rq && !err) { err = dma_resv_reserve_fences(obj->base.resv, 1); if (!err) dma_resv_add_fence(obj->base.resv, &rq->fence, From f768ebbba9110846c9f986a96109d70154d60b5d Mon Sep 17 00:00:00 2001 From: Krzysztof Karas Date: Wed, 30 Jul 2025 07:40:18 +0000 Subject: [PATCH 05/10] drm/i915/selftests: Do not leak vm_area_struct on early return This structure may be leaked on early failure paths, so include vm_munmap() call in them to avoid that. Suggested-by: Chris Wilson Signed-off-by: Krzysztof Karas Reviewed-by: Andi Shyti Signed-off-by: Andi Shyti Link: https://lore.kernel.org/r/d8277e3af09c388fa5dfbf5c9bb3985ae0b191a2.1753859971.git.krzysztof.karas@intel.com --- .../drm/i915/gem/selftests/i915_gem_mman.c | 68 +++++++++---------- 1 file changed, 31 insertions(+), 37 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c index c94b71a963b2..78734c404a6d 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c @@ -1096,32 +1096,20 @@ static int ___igt_mmap_migrate(struct drm_i915_private *i915, unsigned long addr, bool unfaultable) { - struct vm_area_struct *area; - int err = 0, i; + int i; pr_info("igt_mmap(%s, %d) @ %lx\n", obj->mm.region->name, I915_MMAP_TYPE_FIXED, addr); - mmap_read_lock(current->mm); - area = vma_lookup(current->mm, addr); - mmap_read_unlock(current->mm); - if (!area) { - pr_err("%s: Did not create a vm_area_struct for the mmap\n", - obj->mm.region->name); - err = -EINVAL; - goto out_unmap; - } - for (i = 0; i < obj->base.size / sizeof(u32); i++) { u32 __user *ux = u64_to_user_ptr((u64)(addr + i * sizeof(*ux))); u32 x; if (get_user(x, ux)) { - err = -EFAULT; if (!unfaultable) { pr_err("%s: Unable to read from mmap, offset:%zd\n", obj->mm.region->name, i * sizeof(x)); - goto out_unmap; + return -EFAULT; } continue; @@ -1130,37 +1118,29 @@ static int ___igt_mmap_migrate(struct drm_i915_private *i915, if (unfaultable) { pr_err("%s: Faulted unmappable memory\n", obj->mm.region->name); - err = -EINVAL; - goto out_unmap; + return -EINVAL; } if (x != expand32(POISON_INUSE)) { pr_err("%s: Read incorrect value from mmap, offset:%zd, found:%x, expected:%x\n", obj->mm.region->name, i * sizeof(x), x, expand32(POISON_INUSE)); - err = -EINVAL; - goto out_unmap; + return -EINVAL; } x = expand32(POISON_FREE); if (put_user(x, ux)) { pr_err("%s: Unable to write to mmap, offset:%zd\n", obj->mm.region->name, i * sizeof(x)); - err = -EFAULT; - goto out_unmap; + return -EFAULT; } } - if (unfaultable) { - if (err == -EFAULT) - err = 0; - } else { - obj->flags &= ~I915_BO_ALLOC_GPU_ONLY; - err = wc_check(obj); - } -out_unmap: - vm_munmap(addr, obj->base.size); - return err; + if (unfaultable) + return 0; + + obj->flags &= ~I915_BO_ALLOC_GPU_ONLY; + return wc_check(obj); } #define IGT_MMAP_MIGRATE_TOPDOWN (1 << 0) @@ -1176,6 +1156,7 @@ static int __igt_mmap_migrate(struct intel_memory_region **placements, struct drm_i915_private *i915 = placements[0]->i915; struct drm_i915_gem_object *obj; struct i915_request *rq = NULL; + struct vm_area_struct *area; unsigned long addr; LIST_HEAD(objects); u64 offset; @@ -1207,20 +1188,30 @@ static int __igt_mmap_migrate(struct intel_memory_region **placements, goto out_put; } + mmap_read_lock(current->mm); + area = vma_lookup(current->mm, addr); + mmap_read_unlock(current->mm); + if (!area) { + pr_err("%s: Did not create a vm_area_struct for the mmap\n", + obj->mm.region->name); + err = -EINVAL; + goto out_addr; + } + if (flags & IGT_MMAP_MIGRATE_FILL) { err = igt_fill_mappable(placements[0], &objects); if (err) - goto out_put; + goto out_addr; } err = i915_gem_object_lock(obj, NULL); if (err) - goto out_put; + goto out_addr; err = i915_gem_object_pin_pages(obj); if (err) { i915_gem_object_unlock(obj); - goto out_put; + goto out_addr; } err = intel_context_migrate_clear(to_gt(i915)->migrate.context, NULL, @@ -1237,7 +1228,7 @@ static int __igt_mmap_migrate(struct intel_memory_region **placements, } i915_gem_object_unlock(obj); if (err) - goto out_put; + goto out_addr; if (flags & IGT_MMAP_MIGRATE_EVICTABLE) igt_make_evictable(&objects); @@ -1245,16 +1236,16 @@ static int __igt_mmap_migrate(struct intel_memory_region **placements, if (flags & IGT_MMAP_MIGRATE_FAIL_GPU) { err = i915_gem_object_lock(obj, NULL); if (err) - goto out_put; + goto out_addr; /* - * Ensure we only simulate the gpu failuire when faulting the + * Ensure we only simulate the gpu failure when faulting the * pages. */ err = i915_gem_object_wait_moving_fence(obj, true); i915_gem_object_unlock(obj); if (err) - goto out_put; + goto out_addr; i915_ttm_migrate_set_failure_modes(true, false); } @@ -1298,6 +1289,9 @@ static int __igt_mmap_migrate(struct intel_memory_region **placements, } } +out_addr: + vm_munmap(addr, obj->base.size); + out_put: i915_gem_object_put(obj); igt_close_objects(i915, &objects); From 317be9c64215f264740c6dffb0a44216960875b2 Mon Sep 17 00:00:00 2001 From: Krzysztof Karas Date: Tue, 5 Aug 2025 11:41:42 +0000 Subject: [PATCH 06/10] drm/i915/gt: Protect against overflow in active_engine() It is unlikely, but possible for the first call to intel_context_create() to fail with -ENOMEM, which would result in entering the following code block and decrementing "count", when it is set to 0 (initial condition in the for loop). Protect from overflowing the variable by checking for 0 value of "count" before entering the loop. Signed-off-by: Krzysztof Karas Reviewed-by: Sebastian Brzezinka Reviewed-by: Andi Shyti Signed-off-by: Andi Shyti Link: https://lore.kernel.org/r/pogr74jktdqehrfap4tjky23ees4x7erh5dwgg5jb2n522cfkw@kpnxe4qzx4pj --- drivers/gpu/drm/i915/gt/selftest_hangcheck.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/i915/gt/selftest_hangcheck.c b/drivers/gpu/drm/i915/gt/selftest_hangcheck.c index f057c16410e7..619c70c54ef9 100644 --- a/drivers/gpu/drm/i915/gt/selftest_hangcheck.c +++ b/drivers/gpu/drm/i915/gt/selftest_hangcheck.c @@ -904,6 +904,8 @@ static void active_engine(struct kthread_work *work) arg->result = PTR_ERR(ce[count]); pr_err("[%s] Create context #%ld failed: %d!\n", engine->name, count, arg->result); + if (!count) + return; while (--count) intel_context_put(ce[count]); return; From 60b006b7cf0dd5e8ee99bd77fb4573e1a3956035 Mon Sep 17 00:00:00 2001 From: Uros Bizjak Date: Thu, 14 Aug 2025 07:43:26 +0100 Subject: [PATCH 07/10] drm/i915/active: Use try_cmpxchg64() in __active_lookup() Replace this pattern in __active_lookup(): cmpxchg64(*ptr, old, new) == old ... with the simpler and faster: try_cmpxchg64(*ptr, &old, new) The x86 CMPXCHG instruction returns success in the ZF flag, so this change saves a compare after the CMPXCHG. The patch also improves the explanation of what the code really does. cmpxchg64() will *succeed* for the winner of the race and try_cmpxchg64() nicely documents this fact. No functional change intended. Signed-off-by: Uros Bizjak Cc: Jani Nikula Cc: Joonas Lahtinen Cc: Rodrigo Vivi Cc: Tvrtko Ursulin Cc: David Airlie Cc: Simona Vetter Reviewed-by: Tvrtko Ursulin Signed-off-by: Tvrtko Ursulin Signed-off-by: Tvrtko Ursulin Link: https://lore.kernel.org/r/20250814064326.95519-1-tvrtko.ursulin@igalia.com --- drivers/gpu/drm/i915/i915_active.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_active.c b/drivers/gpu/drm/i915/i915_active.c index 0dbc4e289300..6b0c1162505a 100644 --- a/drivers/gpu/drm/i915/i915_active.c +++ b/drivers/gpu/drm/i915/i915_active.c @@ -257,10 +257,9 @@ static struct active_node *__active_lookup(struct i915_active *ref, u64 idx) * claimed the cache and we know that is does not match our * idx. If, and only if, the timeline is currently zero is it * worth competing to claim it atomically for ourselves (for - * only the winner of that race will cmpxchg return the old - * value of 0). + * only the winner of that race will cmpxchg succeed). */ - if (!cached && !cmpxchg64(&it->timeline, 0, idx)) + if (!cached && try_cmpxchg64(&it->timeline, &cached, idx)) return it; } From c9932f0d604e4c8f2c6018e598a322acb43c68a2 Mon Sep 17 00:00:00 2001 From: Sebastian Brzezinka Date: Mon, 11 Aug 2025 09:12:31 +0000 Subject: [PATCH 08/10] drm/i915/gt: Relocate compression repacking WA for JSL/EHL CACHE_MODE_0 registers should be saved and restored as part of the context, not during engine reset. Move the related workaround (Disable Repacking for Compression) from rcs_engine_wa_init() to icl_ctx_workarounds_init() for Jasper Lake and Elkhart Lake platforms. This ensures the WA is applied during context initialisation. BSPEC: 11322 Fixes: 0ddae025ab6c ("drm/i915: Disable compression tricks on JSL") Closes: Fixes: 0ddae025ab6c ("drm/i915: Disable compression tricks on JSL") Signed-off-by: Sebastian Brzezinka Cc: stable@vger.kernel.org # v6.13+ Reviewed-by: Andi Shyti Reviewed-by: Krzysztof Karas Signed-off-by: Andi Shyti Link: https://lore.kernel.org/r/4feaa24094e019e000ceb6011d8cd419b0361b3f.1754902406.git.sebastian.brzezinka@intel.com --- drivers/gpu/drm/i915/gt/intel_workarounds.c | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c index b37e400f74e5..5a95f06900b5 100644 --- a/drivers/gpu/drm/i915/gt/intel_workarounds.c +++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c @@ -634,6 +634,8 @@ static void cfl_ctx_workarounds_init(struct intel_engine_cs *engine, static void icl_ctx_workarounds_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) { + struct drm_i915_private *i915 = engine->i915; + /* Wa_1406697149 (WaDisableBankHangMode:icl) */ wa_write(wal, GEN8_L3CNTLREG, GEN8_ERRDETBCTRL); @@ -669,6 +671,15 @@ static void icl_ctx_workarounds_init(struct intel_engine_cs *engine, /* Wa_1406306137:icl,ehl */ wa_mcr_masked_en(wal, GEN9_ROW_CHICKEN4, GEN11_DIS_PICK_2ND_EU); + + if (IS_JASPERLAKE(i915) || IS_ELKHARTLAKE(i915)) { + /* + * Disable Repacking for Compression (masked R/W access) + * before rendering compressed surfaces for display. + */ + wa_masked_en(wal, CACHE_MODE_0_GEN7, + DISABLE_REPACKING_FOR_COMPRESSION); + } } /* @@ -2306,15 +2317,6 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) GEN8_RC_SEMA_IDLE_MSG_DISABLE); } - if (IS_JASPERLAKE(i915) || IS_ELKHARTLAKE(i915)) { - /* - * "Disable Repacking for Compression (masked R/W access) - * before rendering compressed surfaces for display." - */ - wa_masked_en(wal, CACHE_MODE_0_GEN7, - DISABLE_REPACKING_FOR_COMPRESSION); - } - if (GRAPHICS_VER(i915) == 11) { /* This is not an Wa. Enable for better image quality */ wa_masked_en(wal, From 77a16455fae43e304e6adaf83da5b2ba6f3ad1ad Mon Sep 17 00:00:00 2001 From: Sebastian Brzezinka Date: Mon, 11 Aug 2025 09:12:39 +0000 Subject: [PATCH 09/10] drm/i915/gt: Relocate Gen7 context-specific workarounds CACHE_MODE_1 and CACHE_MODE_0 register should be saved and restored as part of the context, not during engine reset. Move the related workarounds (RC_OP_FLUSH_ENABLE, PIXEL_SUBSPAN_COLLECT_OPT_DISABLE) from rcs_engine_wa_init() to gen7_ctx_workarounds_init() for Gen7 platforms. This ensures the WA is applied during context initialisation. BSPEC: 11322, 11323 Signed-off-by: Sebastian Brzezinka Reviewed-by: Andi Shyti Reviewed-by: Krzysztof Karas Signed-off-by: Andi Shyti Link: https://lore.kernel.org/r/06cf152803ab0050e09c521ac2fc3637549860b3.1754902406.git.sebastian.brzezinka@intel.com --- drivers/gpu/drm/i915/gt/intel_workarounds.c | 23 ++++++++++----------- 1 file changed, 11 insertions(+), 12 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c index 5a95f06900b5..18f1ee529de9 100644 --- a/drivers/gpu/drm/i915/gt/intel_workarounds.c +++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c @@ -343,6 +343,17 @@ static void gen7_ctx_workarounds_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) { wa_masked_en(wal, INSTPM, INSTPM_FORCE_ORDERING); + /* WaDisable_RenderCache_OperationalFlush:ivb,vlv,hsw */ + wa_masked_dis(wal, CACHE_MODE_0_GEN7, RC_OP_FLUSH_ENABLE); + + /* + * BSpec says this must be set, even though + * WaDisable4x2SubspanOptimization:ivb,hsw + * WaDisable4x2SubspanOptimization isn't listed for VLV. + */ + wa_masked_en(wal, + CACHE_MODE_1, + PIXEL_SUBSPAN_COLLECT_OPT_DISABLE); } static void gen8_ctx_workarounds_init(struct intel_engine_cs *engine, @@ -2567,18 +2578,6 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) RING_MODE_GEN7(RENDER_RING_BASE), GFX_TLB_INVALIDATE_EXPLICIT | GFX_REPLAY_MODE); - /* WaDisable_RenderCache_OperationalFlush:ivb,vlv,hsw */ - wa_masked_dis(wal, CACHE_MODE_0_GEN7, RC_OP_FLUSH_ENABLE); - - /* - * BSpec says this must be set, even though - * WaDisable4x2SubspanOptimization:ivb,hsw - * WaDisable4x2SubspanOptimization isn't listed for VLV. - */ - wa_masked_en(wal, - CACHE_MODE_1, - PIXEL_SUBSPAN_COLLECT_OPT_DISABLE); - /* * BSpec recommends 8x4 when MSAA is used, * however in practice 16x4 seems fastest. From b7a855f488c6825e3dc06e78c49326491bef6f98 Mon Sep 17 00:00:00 2001 From: Sebastian Brzezinka Date: Mon, 11 Aug 2025 09:12:45 +0000 Subject: [PATCH 10/10] drm/i915/gt: Relocate Gen6 context-specific workaround CACHE_MODE_0 register should be saved and restored as part of the context, not during engine reset. Move the related workaround (RC_OP_FLUSH_ENABLE) from rcs_engine_wa_init() to gen6_ctx_workarounds_init() for Gen6 platforms. This ensures the WA is applied during context initialisation. CM0_STC_EVICT_DISABLE_LRA_SNB is also Gen6-specific, but it does not stick when applied in context, so it remains in engine init. BSPEC: 11322 Signed-off-by: Sebastian Brzezinka Reviewed-by: Andi Shyti Reviewed-by: Krzysztof Karas Signed-off-by: Andi Shyti Link: https://lore.kernel.org/r/f493bab389e51b2faf7c9a439724e9ea9ca04053.1754902406.git.sebastian.brzezinka@intel.com --- drivers/gpu/drm/i915/gt/intel_workarounds.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c index 18f1ee529de9..7d486dfa2fc1 100644 --- a/drivers/gpu/drm/i915/gt/intel_workarounds.c +++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c @@ -337,6 +337,9 @@ static void gen6_ctx_workarounds_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) { wa_masked_en(wal, INSTPM, INSTPM_FORCE_ORDERING); + + /* WaDisable_RenderCache_OperationalFlush:snb */ + wa_masked_dis(wal, CACHE_MODE_0, RC_OP_FLUSH_ENABLE); } static void gen7_ctx_workarounds_init(struct intel_engine_cs *engine, @@ -2644,9 +2647,6 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) GEN6_WIZ_HASHING_MASK, GEN6_WIZ_HASHING_16x4); - /* WaDisable_RenderCache_OperationalFlush:snb */ - wa_masked_dis(wal, CACHE_MODE_0, RC_OP_FLUSH_ENABLE); - /* * From the Sandybridge PRM, volume 1 part 3, page 24: * "If this bit is set, STCunit will have LRA as replacement