Merge tag 'drm-intel-gt-next-2025-09-01' of https://gitlab.freedesktop.org/drm/i915/kernel into drm-next

Driver Changes:

- Apply multiple JSL/EHL/Gen7/Gen6 workaround properly at context level (Sebastian)
- Protect against overflow in active_engine() (Krzysztof)
- Use try_cmpxchg64() in __active_lookup() (Uros)

- Enable GuC CT_DEAD output in regular debug builds (John)
- Static checker and style fixes (Sebastian)
- Selftest improvements (Krzysztof)

Signed-off-by: Dave Airlie <airlied@redhat.com>
From: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Link: https://lore.kernel.org/r/aLWZoEZVlBj2d8J9@jlahtine-mobl
This commit is contained in:
Dave Airlie 2025-09-02 11:23:39 +10:00
commit 4bf83dd6e3
7 changed files with 98 additions and 98 deletions

View File

@ -1382,8 +1382,9 @@ static void clflush_write32(u32 *addr, u32 value, unsigned int flushes)
*/
if (flushes & CLFLUSH_AFTER)
drm_clflush_virt_range(addr, sizeof(*addr));
} else
} else {
*addr = value;
}
}
static u64
@ -1567,36 +1568,36 @@ static int eb_relocate_vma(struct i915_execbuffer *eb, struct eb_vma *ev)
do {
u64 offset = eb_relocate_entry(eb, ev, r);
if (likely(offset == 0)) {
} else if ((s64)offset < 0) {
if (likely(offset == 0))
continue;
if ((s64)offset < 0) {
remain = (int)offset;
goto out;
} else {
/*
* Note that reporting an error now
* leaves everything in an inconsistent
* state as we have *already* changed
* the relocation value inside the
* object. As we have not changed the
* reloc.presumed_offset or will not
* change the execobject.offset, on the
* call we may not rewrite the value
* inside the object, leaving it
* dangling and causing a GPU hang. Unless
* userspace dynamically rebuilds the
* relocations on each execbuf rather than
* presume a static tree.
*
* We did previously check if the relocations
* were writable (access_ok), an error now
* would be a strange race with mprotect,
* having already demonstrated that we
* can read from this userspace address.
*/
offset = gen8_canonical_addr(offset & ~UPDATE);
__put_user(offset,
&urelocs[r - stack].presumed_offset);
}
/*
* Note that reporting an error now
* leaves everything in an inconsistent
* state as we have *already* changed
* the relocation value inside the
* object. As we have not changed the
* reloc.presumed_offset or will not
* change the execobject.offset, on the
* call we may not rewrite the value
* inside the object, leaving it
* dangling and causing a GPU hang. Unless
* userspace dynamically rebuilds the
* relocations on each execbuf rather than
* presume a static tree.
*
* We did previously check if the relocations
* were writable (access_ok), an error now
* would be a strange race with mprotect,
* having already demonstrated that we
* can read from this userspace address.
*/
offset = gen8_canonical_addr(offset & ~UPDATE);
__put_user(offset, &urelocs[r - stack].presumed_offset);
} while (r++, --count);
urelocs += ARRAY_SIZE(stack);
} while (remain);

View File

@ -1096,32 +1096,20 @@ static int ___igt_mmap_migrate(struct drm_i915_private *i915,
unsigned long addr,
bool unfaultable)
{
struct vm_area_struct *area;
int err = 0, i;
int i;
pr_info("igt_mmap(%s, %d) @ %lx\n",
obj->mm.region->name, I915_MMAP_TYPE_FIXED, addr);
mmap_read_lock(current->mm);
area = vma_lookup(current->mm, addr);
mmap_read_unlock(current->mm);
if (!area) {
pr_err("%s: Did not create a vm_area_struct for the mmap\n",
obj->mm.region->name);
err = -EINVAL;
goto out_unmap;
}
for (i = 0; i < obj->base.size / sizeof(u32); i++) {
u32 __user *ux = u64_to_user_ptr((u64)(addr + i * sizeof(*ux)));
u32 x;
if (get_user(x, ux)) {
err = -EFAULT;
if (!unfaultable) {
pr_err("%s: Unable to read from mmap, offset:%zd\n",
obj->mm.region->name, i * sizeof(x));
goto out_unmap;
return -EFAULT;
}
continue;
@ -1130,37 +1118,29 @@ static int ___igt_mmap_migrate(struct drm_i915_private *i915,
if (unfaultable) {
pr_err("%s: Faulted unmappable memory\n",
obj->mm.region->name);
err = -EINVAL;
goto out_unmap;
return -EINVAL;
}
if (x != expand32(POISON_INUSE)) {
pr_err("%s: Read incorrect value from mmap, offset:%zd, found:%x, expected:%x\n",
obj->mm.region->name,
i * sizeof(x), x, expand32(POISON_INUSE));
err = -EINVAL;
goto out_unmap;
return -EINVAL;
}
x = expand32(POISON_FREE);
if (put_user(x, ux)) {
pr_err("%s: Unable to write to mmap, offset:%zd\n",
obj->mm.region->name, i * sizeof(x));
err = -EFAULT;
goto out_unmap;
return -EFAULT;
}
}
if (unfaultable) {
if (err == -EFAULT)
err = 0;
} else {
obj->flags &= ~I915_BO_ALLOC_GPU_ONLY;
err = wc_check(obj);
}
out_unmap:
vm_munmap(addr, obj->base.size);
return err;
if (unfaultable)
return 0;
obj->flags &= ~I915_BO_ALLOC_GPU_ONLY;
return wc_check(obj);
}
#define IGT_MMAP_MIGRATE_TOPDOWN (1 << 0)
@ -1176,6 +1156,7 @@ static int __igt_mmap_migrate(struct intel_memory_region **placements,
struct drm_i915_private *i915 = placements[0]->i915;
struct drm_i915_gem_object *obj;
struct i915_request *rq = NULL;
struct vm_area_struct *area;
unsigned long addr;
LIST_HEAD(objects);
u64 offset;
@ -1207,20 +1188,30 @@ static int __igt_mmap_migrate(struct intel_memory_region **placements,
goto out_put;
}
mmap_read_lock(current->mm);
area = vma_lookup(current->mm, addr);
mmap_read_unlock(current->mm);
if (!area) {
pr_err("%s: Did not create a vm_area_struct for the mmap\n",
obj->mm.region->name);
err = -EINVAL;
goto out_addr;
}
if (flags & IGT_MMAP_MIGRATE_FILL) {
err = igt_fill_mappable(placements[0], &objects);
if (err)
goto out_put;
goto out_addr;
}
err = i915_gem_object_lock(obj, NULL);
if (err)
goto out_put;
goto out_addr;
err = i915_gem_object_pin_pages(obj);
if (err) {
i915_gem_object_unlock(obj);
goto out_put;
goto out_addr;
}
err = intel_context_migrate_clear(to_gt(i915)->migrate.context, NULL,
@ -1228,7 +1219,7 @@ static int __igt_mmap_migrate(struct intel_memory_region **placements,
i915_gem_object_is_lmem(obj),
expand32(POISON_INUSE), &rq);
i915_gem_object_unpin_pages(obj);
if (rq) {
if (rq && !err) {
err = dma_resv_reserve_fences(obj->base.resv, 1);
if (!err)
dma_resv_add_fence(obj->base.resv, &rq->fence,
@ -1237,7 +1228,7 @@ static int __igt_mmap_migrate(struct intel_memory_region **placements,
}
i915_gem_object_unlock(obj);
if (err)
goto out_put;
goto out_addr;
if (flags & IGT_MMAP_MIGRATE_EVICTABLE)
igt_make_evictable(&objects);
@ -1245,16 +1236,16 @@ static int __igt_mmap_migrate(struct intel_memory_region **placements,
if (flags & IGT_MMAP_MIGRATE_FAIL_GPU) {
err = i915_gem_object_lock(obj, NULL);
if (err)
goto out_put;
goto out_addr;
/*
* Ensure we only simulate the gpu failuire when faulting the
* Ensure we only simulate the gpu failure when faulting the
* pages.
*/
err = i915_gem_object_wait_moving_fence(obj, true);
i915_gem_object_unlock(obj);
if (err)
goto out_put;
goto out_addr;
i915_ttm_migrate_set_failure_modes(true, false);
}
@ -1298,6 +1289,9 @@ static int __igt_mmap_migrate(struct intel_memory_region **placements,
}
}
out_addr:
vm_munmap(addr, obj->base.size);
out_put:
i915_gem_object_put(obj);
igt_close_objects(i915, &objects);

View File

@ -337,12 +337,26 @@ static void gen6_ctx_workarounds_init(struct intel_engine_cs *engine,
struct i915_wa_list *wal)
{
wa_masked_en(wal, INSTPM, INSTPM_FORCE_ORDERING);
/* WaDisable_RenderCache_OperationalFlush:snb */
wa_masked_dis(wal, CACHE_MODE_0, RC_OP_FLUSH_ENABLE);
}
static void gen7_ctx_workarounds_init(struct intel_engine_cs *engine,
struct i915_wa_list *wal)
{
wa_masked_en(wal, INSTPM, INSTPM_FORCE_ORDERING);
/* WaDisable_RenderCache_OperationalFlush:ivb,vlv,hsw */
wa_masked_dis(wal, CACHE_MODE_0_GEN7, RC_OP_FLUSH_ENABLE);
/*
* BSpec says this must be set, even though
* WaDisable4x2SubspanOptimization:ivb,hsw
* WaDisable4x2SubspanOptimization isn't listed for VLV.
*/
wa_masked_en(wal,
CACHE_MODE_1,
PIXEL_SUBSPAN_COLLECT_OPT_DISABLE);
}
static void gen8_ctx_workarounds_init(struct intel_engine_cs *engine,
@ -634,6 +648,8 @@ static void cfl_ctx_workarounds_init(struct intel_engine_cs *engine,
static void icl_ctx_workarounds_init(struct intel_engine_cs *engine,
struct i915_wa_list *wal)
{
struct drm_i915_private *i915 = engine->i915;
/* Wa_1406697149 (WaDisableBankHangMode:icl) */
wa_write(wal, GEN8_L3CNTLREG, GEN8_ERRDETBCTRL);
@ -669,6 +685,15 @@ static void icl_ctx_workarounds_init(struct intel_engine_cs *engine,
/* Wa_1406306137:icl,ehl */
wa_mcr_masked_en(wal, GEN9_ROW_CHICKEN4, GEN11_DIS_PICK_2ND_EU);
if (IS_JASPERLAKE(i915) || IS_ELKHARTLAKE(i915)) {
/*
* Disable Repacking for Compression (masked R/W access)
* before rendering compressed surfaces for display.
*/
wa_masked_en(wal, CACHE_MODE_0_GEN7,
DISABLE_REPACKING_FOR_COMPRESSION);
}
}
/*
@ -2306,15 +2331,6 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
GEN8_RC_SEMA_IDLE_MSG_DISABLE);
}
if (IS_JASPERLAKE(i915) || IS_ELKHARTLAKE(i915)) {
/*
* "Disable Repacking for Compression (masked R/W access)
* before rendering compressed surfaces for display."
*/
wa_masked_en(wal, CACHE_MODE_0_GEN7,
DISABLE_REPACKING_FOR_COMPRESSION);
}
if (GRAPHICS_VER(i915) == 11) {
/* This is not an Wa. Enable for better image quality */
wa_masked_en(wal,
@ -2565,18 +2581,6 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
RING_MODE_GEN7(RENDER_RING_BASE),
GFX_TLB_INVALIDATE_EXPLICIT | GFX_REPLAY_MODE);
/* WaDisable_RenderCache_OperationalFlush:ivb,vlv,hsw */
wa_masked_dis(wal, CACHE_MODE_0_GEN7, RC_OP_FLUSH_ENABLE);
/*
* BSpec says this must be set, even though
* WaDisable4x2SubspanOptimization:ivb,hsw
* WaDisable4x2SubspanOptimization isn't listed for VLV.
*/
wa_masked_en(wal,
CACHE_MODE_1,
PIXEL_SUBSPAN_COLLECT_OPT_DISABLE);
/*
* BSpec recommends 8x4 when MSAA is used,
* however in practice 16x4 seems fastest.
@ -2643,9 +2647,6 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
GEN6_WIZ_HASHING_MASK,
GEN6_WIZ_HASHING_16x4);
/* WaDisable_RenderCache_OperationalFlush:snb */
wa_masked_dis(wal, CACHE_MODE_0, RC_OP_FLUSH_ENABLE);
/*
* From the Sandybridge PRM, volume 1 part 3, page 24:
* "If this bit is set, STCunit will have LRA as replacement

View File

@ -904,6 +904,8 @@ static void active_engine(struct kthread_work *work)
arg->result = PTR_ERR(ce[count]);
pr_err("[%s] Create context #%ld failed: %d!\n",
engine->name, count, arg->result);
if (!count)
return;
while (--count)
intel_context_put(ce[count]);
return;

View File

@ -13,7 +13,7 @@
#include "intel_guc_ct.h"
#include "intel_guc_print.h"
#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GUC)
#if IS_ENABLED(CONFIG_DRM_I915_DEBUG)
enum {
CT_DEAD_ALIVE = 0,
CT_DEAD_SETUP,
@ -144,7 +144,7 @@ void intel_guc_ct_init_early(struct intel_guc_ct *ct)
spin_lock_init(&ct->requests.lock);
INIT_LIST_HEAD(&ct->requests.pending);
INIT_LIST_HEAD(&ct->requests.incoming);
#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GUC)
#if IS_ENABLED(CONFIG_DRM_I915_DEBUG)
INIT_WORK(&ct->dead_ct_worker, ct_dead_ct_worker_func);
#endif
INIT_WORK(&ct->requests.worker, ct_incoming_request_worker_func);
@ -373,7 +373,7 @@ int intel_guc_ct_enable(struct intel_guc_ct *ct)
ct->enabled = true;
ct->stall_time = KTIME_MAX;
#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GUC)
#if IS_ENABLED(CONFIG_DRM_I915_DEBUG)
ct->dead_ct_reported = false;
ct->dead_ct_reason = CT_DEAD_ALIVE;
#endif
@ -1377,7 +1377,7 @@ void intel_guc_ct_print_info(struct intel_guc_ct *ct,
ct->ctbs.recv.desc->tail);
}
#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GUC)
#if IS_ENABLED(CONFIG_DRM_I915_DEBUG)
static void ct_dead_ct_worker_func(struct work_struct *w)
{
struct intel_guc_ct *ct = container_of(w, struct intel_guc_ct, dead_ct_worker);
@ -1386,6 +1386,9 @@ static void ct_dead_ct_worker_func(struct work_struct *w)
if (ct->dead_ct_reported)
return;
if (i915_error_injected())
return;
ct->dead_ct_reported = true;
guc_info(guc, "CTB is dead - reason=0x%X\n", ct->dead_ct_reason);

View File

@ -97,7 +97,7 @@ struct intel_guc_ct {
/** @stall_time: time of first time a CTB submission is stalled */
ktime_t stall_time;
#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GUC)
#if IS_ENABLED(CONFIG_DRM_I915_DEBUG)
int dead_ct_reason;
bool dead_ct_reported;
struct work_struct dead_ct_worker;

View File

@ -257,10 +257,9 @@ static struct active_node *__active_lookup(struct i915_active *ref, u64 idx)
* claimed the cache and we know that is does not match our
* idx. If, and only if, the timeline is currently zero is it
* worth competing to claim it atomically for ourselves (for
* only the winner of that race will cmpxchg return the old
* value of 0).
* only the winner of that race will cmpxchg succeed).
*/
if (!cached && !cmpxchg64(&it->timeline, 0, idx))
if (!cached && try_cmpxchg64(&it->timeline, &cached, idx))
return it;
}