drm/i915/gt: Relocate Gen7 context-specific workarounds

CACHE_MODE_1 and CACHE_MODE_0 register should be saved and restored
as part of the context, not during engine reset. Move the related
workarounds (RC_OP_FLUSH_ENABLE, PIXEL_SUBSPAN_COLLECT_OPT_DISABLE)
from rcs_engine_wa_init() to gen7_ctx_workarounds_init() for
Gen7 platforms. This ensures the WA is applied during context
initialisation.

BSPEC: 11322, 11323

Signed-off-by: Sebastian Brzezinka <sebastian.brzezinka@intel.com>
Reviewed-by: Andi Shyti <andi.shyti@linux.intel.com>
Reviewed-by: Krzysztof Karas <krzysztof.karas@intel.com>
Signed-off-by: Andi Shyti <andi.shyti@linux.intel.com>
Link: https://lore.kernel.org/r/06cf152803ab0050e09c521ac2fc3637549860b3.1754902406.git.sebastian.brzezinka@intel.com
This commit is contained in:
Sebastian Brzezinka 2025-08-11 09:12:39 +00:00 committed by Andi Shyti
parent c9932f0d60
commit 77a16455fa

View File

@ -343,6 +343,17 @@ static void gen7_ctx_workarounds_init(struct intel_engine_cs *engine,
struct i915_wa_list *wal)
{
wa_masked_en(wal, INSTPM, INSTPM_FORCE_ORDERING);
/* WaDisable_RenderCache_OperationalFlush:ivb,vlv,hsw */
wa_masked_dis(wal, CACHE_MODE_0_GEN7, RC_OP_FLUSH_ENABLE);
/*
* BSpec says this must be set, even though
* WaDisable4x2SubspanOptimization:ivb,hsw
* WaDisable4x2SubspanOptimization isn't listed for VLV.
*/
wa_masked_en(wal,
CACHE_MODE_1,
PIXEL_SUBSPAN_COLLECT_OPT_DISABLE);
}
static void gen8_ctx_workarounds_init(struct intel_engine_cs *engine,
@ -2567,18 +2578,6 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
RING_MODE_GEN7(RENDER_RING_BASE),
GFX_TLB_INVALIDATE_EXPLICIT | GFX_REPLAY_MODE);
/* WaDisable_RenderCache_OperationalFlush:ivb,vlv,hsw */
wa_masked_dis(wal, CACHE_MODE_0_GEN7, RC_OP_FLUSH_ENABLE);
/*
* BSpec says this must be set, even though
* WaDisable4x2SubspanOptimization:ivb,hsw
* WaDisable4x2SubspanOptimization isn't listed for VLV.
*/
wa_masked_en(wal,
CACHE_MODE_1,
PIXEL_SUBSPAN_COLLECT_OPT_DISABLE);
/*
* BSpec recommends 8x4 when MSAA is used,
* however in practice 16x4 seems fastest.