From ff868667a4f9c6e349f4a4f0dcc3ff60d6ea8a73 Mon Sep 17 00:00:00 2001 From: Julia Filipchuk Date: Fri, 2 May 2025 15:39:24 -0700 Subject: [PATCH 01/15] drm/i915/guc: Enable DUAL_QUEUE_WA for newer platforms For newer platforms (post DG2) hardware intentionally stalls on submisstion of concurrent submissions on RCS and CCS of different address spaces. With this workaround GuC will never schedule such conlicting contexts; preventing detection of a stall as a hang. GuC specs recommend to enable this for all platforms starting from MTL supporting CCS. v2: Use existing macros for version check. (Jani) v3: Reword explanation for clarity. Remove unneeded parens. Remove accidental comment change. (Daniele) Signed-off-by: Julia Filipchuk Cc: Daniele Ceraolo Spurio Reviewed-by: Daniele Ceraolo Spurio Signed-off-by: Daniele Ceraolo Spurio Link: https://lore.kernel.org/r/20250502223924.94628-1-julia.filipchuk@intel.com --- drivers/gpu/drm/i915/gt/uc/intel_guc.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.c b/drivers/gpu/drm/i915/gt/uc/intel_guc.c index 5949ff0b0161..553daf32c0b5 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.c @@ -313,8 +313,13 @@ static u32 guc_ctl_wa_flags(struct intel_guc *guc) * * The same WA bit is used for both and 22011391025 is applicable to * all DG2. + * + * Platforms post DG2 prevent this issue in hardware by stalling + * submissions. With this flag GuC will schedule as to avoid such + * stalls. */ - if (IS_DG2(gt->i915)) + if (IS_DG2(gt->i915) || + (CCS_MASK(gt) && GRAPHICS_VER_FULL(gt->i915) >= IP_VER(12, 70))) flags |= GUC_WA_DUAL_QUEUE; /* Wa_22011802037: graphics version 11/12 */ From 5dff17241483d934d8a72633fb2c2b7eaf2d91a3 Mon Sep 17 00:00:00 2001 From: Mikolaj Wasiak Date: Fri, 9 May 2025 09:57:18 +0200 Subject: [PATCH 02/15] drm/i915/selftest: allow larger memory allocation Due to changes in allocator, the size of the allocation for contiguous region is not rounded up to a power-of-two and instead allocated as is. Thus, change the part of test that expected the allocation to fail. Signed-off-by: Mikolaj Wasiak Reviewed-by: Krzysztof Karas Reviewed-by: Andi Shyti Reviewed-by: Krzysztof Niemiec Signed-off-by: Andi Shyti Link: https://lore.kernel.org/r/fptevdfbclvg2lbfuys5ibffbl2baouywkutnr7vdsy5tzcqfk@mpflwlh6jxfd --- .../gpu/drm/i915/selftests/intel_memory_region.c | 14 +++----------- 1 file changed, 3 insertions(+), 11 deletions(-) diff --git a/drivers/gpu/drm/i915/selftests/intel_memory_region.c b/drivers/gpu/drm/i915/selftests/intel_memory_region.c index f08f6674911e..7b856b5090f9 100644 --- a/drivers/gpu/drm/i915/selftests/intel_memory_region.c +++ b/drivers/gpu/drm/i915/selftests/intel_memory_region.c @@ -413,15 +413,8 @@ static int igt_mock_splintered_region(void *arg) close_objects(mem, &objects); - /* - * While we should be able allocate everything without any flag - * restrictions, if we consider I915_BO_ALLOC_CONTIGUOUS then we are - * actually limited to the largest power-of-two for the region size i.e - * max_order, due to the inner workings of the buddy allocator. So make - * sure that does indeed hold true. - */ - - obj = igt_object_create(mem, &objects, size, I915_BO_ALLOC_CONTIGUOUS); + obj = igt_object_create(mem, &objects, roundup_pow_of_two(size), + I915_BO_ALLOC_CONTIGUOUS); if (!IS_ERR(obj)) { pr_err("%s too large contiguous allocation was not rejected\n", __func__); @@ -429,8 +422,7 @@ static int igt_mock_splintered_region(void *arg) goto out_close; } - obj = igt_object_create(mem, &objects, rounddown_pow_of_two(size), - I915_BO_ALLOC_CONTIGUOUS); + obj = igt_object_create(mem, &objects, size, I915_BO_ALLOC_CONTIGUOUS); if (IS_ERR(obj)) { pr_err("%s largest possible contiguous allocation failed\n", __func__); From 16fa6b89990a76302446f5c4e866991ef08bcf1e Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Mon, 12 May 2025 08:20:47 +0100 Subject: [PATCH 03/15] drm/i915: Use provided dma_fence_is_chain Replace open-coded helper with the subsystem one. Signed-off-by: Tvrtko Ursulin Reviewed-by: Matthew Brost Signed-off-by: Tvrtko Ursulin Link: https://lore.kernel.org/r/20250512072047.56851-1-tvrtko.ursulin@igalia.com --- drivers/gpu/drm/i915/gem/i915_gem_wait.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_wait.c b/drivers/gpu/drm/i915/gem/i915_gem_wait.c index 7127e90c1a8f..991666fd9f85 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_wait.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_wait.c @@ -106,11 +106,6 @@ static void fence_set_priority(struct dma_fence *fence, rcu_read_unlock(); } -static inline bool __dma_fence_is_chain(const struct dma_fence *fence) -{ - return fence->ops == &dma_fence_chain_ops; -} - void i915_gem_fence_wait_priority(struct dma_fence *fence, const struct i915_sched_attr *attr) { @@ -126,7 +121,7 @@ void i915_gem_fence_wait_priority(struct dma_fence *fence, for (i = 0; i < array->num_fences; i++) fence_set_priority(array->fences[i], attr); - } else if (__dma_fence_is_chain(fence)) { + } else if (dma_fence_is_chain(fence)) { struct dma_fence *iter; /* The chain is ordered; if we boost the last, we boost all */ From d6e020819612a4a06207af858e0978be4d3e3140 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Mon, 12 May 2025 21:22:15 +0200 Subject: [PATCH 04/15] drm/i915/gem: Allow EXEC_CAPTURE on recoverable contexts on DG1 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The intel-media-driver is currently broken on DG1 because it uses EXEC_CAPTURE with recovarable contexts. Relax the check to allow that. I've also submitted a fix for the intel-media-driver: https://github.com/intel/media-driver/pull/1920 Cc: stable@vger.kernel.org # v6.0+ Cc: Matthew Auld Cc: Thomas Hellström Testcase: igt/gem_exec_capture/capture-invisible Fixes: 71b1669ea9bd ("drm/i915/uapi: tweak error capture on recoverable contexts") Reviewed-by: Andi Shyti Signed-off-by: Ville Syrjälä Signed-off-by: Andi Shyti Link: https://lore.kernel.org/r/20250411144313.11660-2-ville.syrjala@linux.intel.com --- drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c index 02c59808cbe4..7d44aadcd5a5 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c @@ -2013,7 +2013,7 @@ static int eb_capture_stage(struct i915_execbuffer *eb) continue; if (i915_gem_context_is_recoverable(eb->gem_context) && - (IS_DGFX(eb->i915) || GRAPHICS_VER_FULL(eb->i915) > IP_VER(12, 0))) + GRAPHICS_VER_FULL(eb->i915) > IP_VER(12, 10)) return -EINVAL; for_each_batch_create_order(eb, j) { From 9b961744a83a3027b8d4b97fe1fc587334883ce8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Mon, 12 May 2025 21:22:16 +0200 Subject: [PATCH 05/15] drm/i915/pci: Remove force_probe requirement for DG1 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Dunno why we still have .require_force_probe=1 on DG1 after all this time. I'm not aware of any real problems with DG1, so get rid of the force_probe requirement. Generally the difficulty with DG1 is that it requires a 4GiB BAR for the local memory, and that's not something that works on every system. Reviewed-by: Andi Shyti Signed-off-by: Ville Syrjälä Signed-off-by: Andi Shyti Link: https://lore.kernel.org/r/20250411144313.11660-3-ville.syrjala@linux.intel.com --- drivers/gpu/drm/i915/i915_pci.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c index 21006c7f615c..b2e311f4791a 100644 --- a/drivers/gpu/drm/i915/i915_pci.c +++ b/drivers/gpu/drm/i915/i915_pci.c @@ -663,7 +663,6 @@ static const struct intel_device_info dg1_info = { DGFX_FEATURES, .__runtime.graphics.ip.rel = 10, PLATFORM(INTEL_DG1), - .require_force_probe = 1, .platform_engine_mask = BIT(RCS0) | BIT(BCS0) | BIT(VECS0) | BIT(VCS0) | BIT(VCS2), From d2dc30e0aa252830f908c8e793d3139d51321370 Mon Sep 17 00:00:00 2001 From: Joonas Lahtinen Date: Thu, 22 May 2025 09:41:27 +0300 Subject: [PATCH 06/15] Revert "drm/i915/gem: Allow EXEC_CAPTURE on recoverable contexts on DG1" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This reverts commit d6e020819612a4a06207af858e0978be4d3e3140. The IS_DGFX check was put in place because error capture of buffer objects is expected to be broken on devices with VRAM. Userspace fix[1] to the impacted media driver has been submitted, merged and a new driver release is out as 25.2.3 where the capture flag is dropped on DG1 thus unblocking the usage of media driver on DG1. [1] https://github.com/intel/media-driver/commit/93c07d9b4b96a78bab21f6acd4eb863f4313ea4a Cc: stable@vger.kernel.org # v6.0+ Cc: Ville Syrjälä Cc: Andi Shyti Cc: Matthew Auld Cc: Thomas Hellström Cc: Tvrtko Ursulin Acked-by: Tvrtko Ursulin Reviewed-by: Andi Shyti Link: https://lore.kernel.org/r/20250522064127.24293-1-joonas.lahtinen@linux.intel.com [Joonas: Update message to point out the merged userspace fix] Signed-off-by: Joonas Lahtinen --- drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c index 7d44aadcd5a5..02c59808cbe4 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c @@ -2013,7 +2013,7 @@ static int eb_capture_stage(struct i915_execbuffer *eb) continue; if (i915_gem_context_is_recoverable(eb->gem_context) && - GRAPHICS_VER_FULL(eb->i915) > IP_VER(12, 10)) + (IS_DGFX(eb->i915) || GRAPHICS_VER_FULL(eb->i915) > IP_VER(12, 0))) return -EINVAL; for_each_batch_create_order(eb, j) { From a6a26786f22a4ab0227bcf610510c4c9c2df0808 Mon Sep 17 00:00:00 2001 From: Jesus Narvaez Date: Wed, 14 May 2025 15:52:24 -0700 Subject: [PATCH 07/15] drm/i915/guc: Check if expecting reply before decrementing outstanding_submission_g2h When sending a H2G message where a reply is expected in guc_submission_send_busy_loop(), outstanding_submission_g2h is incremented before the send. However, if there is an error sending the message, outstanding_submission_g2h is decremented without checking if a reply is expected. Therefore, check if reply is expected when there is a failure before decrementing outstanding_submission_g2h. Fixes: 2f2cc53b5fe7 ("drm/i915/guc: Close deregister-context race against CT-loss") Signed-off-by: Jesus Narvaez Cc: Daniele Ceraolo Spurio Cc: Alan Previn Cc: Anshuman Gupta Cc: Mousumi Jana Cc: Rodrigo Vivi Cc: Matt Roper Reviewed-by: Daniele Ceraolo Spurio Signed-off-by: John Harrison Link: https://lore.kernel.org/r/20250514225224.4142684-1-jesus.narvaez@intel.com --- drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c index f8cb7c630d5b..108331a69995 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c @@ -633,7 +633,7 @@ static int guc_submission_send_busy_loop(struct intel_guc *guc, atomic_inc(&guc->outstanding_submission_g2h); ret = intel_guc_send_busy_loop(guc, action, len, g2h_len_dw, loop); - if (ret) + if (ret && g2h_len_dw) atomic_dec(&guc->outstanding_submission_g2h); return ret; From f36a75aba1c3176d177964bca76f86a075d2943a Mon Sep 17 00:00:00 2001 From: Jesus Narvaez Date: Wed, 28 May 2025 16:05:51 -0700 Subject: [PATCH 08/15] drm/i915/guc: Handle race condition where wakeref count drops below 0 There is a rare race condition when preparing for a reset where guc_lrc_desc_unpin() could be in the process of deregistering a context while a different thread is scrubbing outstanding contexts and it alters the context state and does a wakeref put. Then, if there is a failure with deregister_context(), a second wakeref put could occur. As a result the wakeref count could drop below 0 and fail an INTEL_WAKEREF_BUG_ON() check. Therefore if there is a failure with deregister_context(), undo the context state changes and do a wakeref put only if the context was set to be destroyed earlier. v2: Expand comment to better explain change. (Daniele) v3: Removed addition to the original comment. (Daniele) Fixes: 2f2cc53b5fe7 ("drm/i915/guc: Close deregister-context race against CT-loss") Signed-off-by: Jesus Narvaez Cc: Daniele Ceraolo Spurio Cc: Alan Previn Cc: Anshuman Gupta Cc: Mousumi Jana Cc: Rodrigo Vivi Cc: Matt Roper Reviewed-by: Daniele Ceraolo Spurio Signed-off-by: John Harrison Link: https://lore.kernel.org/r/20250528230551.1855177-1-jesus.narvaez@intel.com --- .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c index 108331a69995..127316d2c8aa 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c @@ -3443,18 +3443,29 @@ static inline int guc_lrc_desc_unpin(struct intel_context *ce) * GuC is active, lets destroy this context, but at this point we can still be racing * with suspend, so we undo everything if the H2G fails in deregister_context so * that GuC reset will find this context during clean up. + * + * There is a race condition where the reset code could have altered + * this context's state and done a wakeref put before we try to + * deregister it here. So check if the context is still set to be + * destroyed before undoing earlier changes, to avoid two wakeref puts + * on the same context. */ ret = deregister_context(ce, ce->guc_id.id); if (ret) { + bool pending_destroyed; spin_lock_irqsave(&ce->guc_state.lock, flags); - set_context_registered(ce); - clr_context_destroyed(ce); + pending_destroyed = context_destroyed(ce); + if (pending_destroyed) { + set_context_registered(ce); + clr_context_destroyed(ce); + } spin_unlock_irqrestore(&ce->guc_state.lock, flags); /* * As gt-pm is awake at function entry, intel_wakeref_put_async merely decrements * the wakeref immediately but per function spec usage call this after unlock. */ - intel_wakeref_put_async(>->wakeref); + if (pending_destroyed) + intel_wakeref_put_async(>->wakeref); } return ret; From 686d773186bf72b739bab7e12eb8665d914676ee Mon Sep 17 00:00:00 2001 From: Tzung-Bi Shih Date: Thu, 12 Jun 2025 08:30:23 +0000 Subject: [PATCH 09/15] drm/i915/pmu: Fix build error with GCOV and AutoFDO enabled i915_pmu.c may fail to build with GCOV and AutoFDO enabled. ../drivers/gpu/drm/i915/i915_pmu.c:116:3: error: call to '__compiletime_assert_487' declared with 'error' attribute: BUILD_BUG_ON failed: bit > BITS_PER_TYPE(typeof_member(struct i915_pmu, enable)) - 1 116 | BUILD_BUG_ON(bit > | ^ Here is a way to reproduce the issue: $ git checkout v6.15 $ mkdir build $ ./scripts/kconfig/merge_config.sh -O build -n -m <(cat < Signed-off-by: Tzung-Bi Shih Signed-off-by: Tvrtko Ursulin Link: https://lore.kernel.org/r/20250612083023.562585-1-tzungbi@kernel.org --- drivers/gpu/drm/i915/i915_pmu.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c index e5a188ce3185..990bfaba3ce4 100644 --- a/drivers/gpu/drm/i915/i915_pmu.c +++ b/drivers/gpu/drm/i915/i915_pmu.c @@ -112,7 +112,7 @@ static u32 config_mask(const u64 config) { unsigned int bit = config_bit(config); - if (__builtin_constant_p(config)) + if (__builtin_constant_p(bit)) BUILD_BUG_ON(bit > BITS_PER_TYPE(typeof_member(struct i915_pmu, enable)) - 1); @@ -121,7 +121,7 @@ static u32 config_mask(const u64 config) BITS_PER_TYPE(typeof_member(struct i915_pmu, enable)) - 1); - return BIT(config_bit(config)); + return BIT(bit); } static bool is_engine_event(struct perf_event *event) From ef69f9dd1cd7301cdf04ba326ed28152a3affcf6 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 20 Jun 2025 13:18:18 +0200 Subject: [PATCH 10/15] drm/i915: fix build error some more An earlier patch fixed a build failure with clang, but I still see the same problem with some configurations using gcc: drivers/gpu/drm/i915/i915_pmu.c: In function 'config_mask': include/linux/compiler_types.h:568:38: error: call to '__compiletime_assert_462' declared with attribute error: BUILD_BUG_ON failed: bit > BITS_PER_TYPE(typeof_member(struct i915_pmu, enable)) - 1 drivers/gpu/drm/i915/i915_pmu.c:116:3: note: in expansion of macro 'BUILD_BUG_ON' 116 | BUILD_BUG_ON(bit > As I understand it, the problem is that the function is not always fully inlined, but the __builtin_constant_p() can still evaluate the argument as being constant. Marking it as __always_inline so far works for me in all configurations. Fixes: 686d773186bf ("drm/i915/pmu: Fix build error with GCOV and AutoFDO enabled") Fixes: a644fde77ff7 ("drm/i915/pmu: Change bitmask of enabled events to u32") Reviewed-by: Rodrigo Vivi Signed-off-by: Arnd Bergmann Link: https://lore.kernel.org/r/20250620111824.3395007-1-arnd@kernel.org Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/i915/i915_pmu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c index 990bfaba3ce4..5bc696bfbb0f 100644 --- a/drivers/gpu/drm/i915/i915_pmu.c +++ b/drivers/gpu/drm/i915/i915_pmu.c @@ -108,7 +108,7 @@ static unsigned int config_bit(const u64 config) return other_bit(config); } -static u32 config_mask(const u64 config) +static __always_inline u32 config_mask(const u64 config) { unsigned int bit = config_bit(config); From c371161086153623796dff9de713e98c47f4c586 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 20 Jun 2025 13:36:38 +0200 Subject: [PATCH 11/15] drm/i915: reduce stack usage in igt_vma_pin1() The igt_vma_pin1() function has a rather high stack usage, which gets in the way of reducing the default warning limit: In file included from drivers/gpu/drm/i915/i915_vma.c:2285: drivers/gpu/drm/i915/selftests/i915_vma.c:257:12: error: stack frame size (1288) exceeds limit (1280) in 'igt_vma_pin1' [-Werror,-Wframe-larger-than] There are two things going on here: - The on-stack modes[] array is really large itself and gets constructed for every call, using around 1000 bytes itself depending on the configuration. - The call to i915_vma_pin() gets inlined and adds another 200 bytes for the i915_gem_ww_ctx structure since commit 7d1c2618eac5 ("drm/i915: Take reservation lock around i915_vma_pin.") The second one is easy enough to change, by moving the function into the appropriate .c file. Since it is already large enough to not always be inlined, this seems like a good idea regardless, reducing both the code size and the internal stack usage of each of its 67 callers. Reviewed-by: Rodrigo Vivi Signed-off-by: Arnd Bergmann Link: https://lore.kernel.org/r/20250620113644.3844552-1-arnd@kernel.org Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/i915/i915_vma.c | 20 ++++++++++++++++++++ drivers/gpu/drm/i915/i915_vma.h | 22 ++-------------------- 2 files changed, 22 insertions(+), 20 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c index 632e316f8b05..25e97031d76e 100644 --- a/drivers/gpu/drm/i915/i915_vma.c +++ b/drivers/gpu/drm/i915/i915_vma.c @@ -1607,6 +1607,26 @@ int i915_vma_pin_ww(struct i915_vma *vma, struct i915_gem_ww_ctx *ww, return err; } +int i915_vma_pin(struct i915_vma *vma, u64 size, u64 alignment, u64 flags) +{ + struct i915_gem_ww_ctx ww; + int err; + + i915_gem_ww_ctx_init(&ww, true); +retry: + err = i915_gem_object_lock(vma->obj, &ww); + if (!err) + err = i915_vma_pin_ww(vma, &ww, size, alignment, flags); + if (err == -EDEADLK) { + err = i915_gem_ww_ctx_backoff(&ww); + if (!err) + goto retry; + } + i915_gem_ww_ctx_fini(&ww); + + return err; +} + static void flush_idle_contexts(struct intel_gt *gt) { struct intel_engine_cs *engine; diff --git a/drivers/gpu/drm/i915/i915_vma.h b/drivers/gpu/drm/i915/i915_vma.h index 6a6be8048aa8..14ccbd0636bb 100644 --- a/drivers/gpu/drm/i915/i915_vma.h +++ b/drivers/gpu/drm/i915/i915_vma.h @@ -289,26 +289,8 @@ int __must_check i915_vma_pin_ww(struct i915_vma *vma, struct i915_gem_ww_ctx *ww, u64 size, u64 alignment, u64 flags); -static inline int __must_check -i915_vma_pin(struct i915_vma *vma, u64 size, u64 alignment, u64 flags) -{ - struct i915_gem_ww_ctx ww; - int err; - - i915_gem_ww_ctx_init(&ww, true); -retry: - err = i915_gem_object_lock(vma->obj, &ww); - if (!err) - err = i915_vma_pin_ww(vma, &ww, size, alignment, flags); - if (err == -EDEADLK) { - err = i915_gem_ww_ctx_backoff(&ww); - if (!err) - goto retry; - } - i915_gem_ww_ctx_fini(&ww); - - return err; -} +int __must_check +i915_vma_pin(struct i915_vma *vma, u64 size, u64 alignment, u64 flags); int i915_ggtt_pin(struct i915_vma *vma, struct i915_gem_ww_ctx *ww, u32 align, unsigned int flags); From dcf558293fd8c6408a3acb2a534715af48ee63b3 Mon Sep 17 00:00:00 2001 From: Krzysztof Karas Date: Wed, 18 Jun 2025 13:51:30 +0000 Subject: [PATCH 12/15] drm/i915: Move out engine related macros from i915_drv.h Move macros related to engines out of i915_drv.h header and place them in intel_engine.h. Signed-off-by: Krzysztof Karas Reviewed-by: Andi Shyti Signed-off-by: Andi Shyti Link: https://lore.kernel.org/r/9b9ed5bbdb37470fa679c5baf961424c9cfbad11.1750251040.git.krzysztof.karas@intel.com --- drivers/gpu/drm/i915/gt/intel_engine.h | 31 ++++++++++++++++++++++++++ drivers/gpu/drm/i915/i915_drv.h | 31 -------------------------- 2 files changed, 31 insertions(+), 31 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_engine.h b/drivers/gpu/drm/i915/gt/intel_engine.h index 325da0414d94..f6a98cf1e5a5 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine.h +++ b/drivers/gpu/drm/i915/gt/intel_engine.h @@ -79,6 +79,29 @@ struct lock_class_key; #define ENGINE_WRITE(...) __ENGINE_WRITE_OP(write, __VA_ARGS__) #define ENGINE_WRITE_FW(...) __ENGINE_WRITE_OP(write_fw, __VA_ARGS__) +#define __HAS_ENGINE(engine_mask, id) ((engine_mask) & BIT(id)) +#define HAS_ENGINE(gt, id) __HAS_ENGINE((gt)->info.engine_mask, id) + +#define __ENGINE_INSTANCES_MASK(mask, first, count) ({ \ + unsigned int first__ = (first); \ + unsigned int count__ = (count); \ + ((mask) & GENMASK(first__ + count__ - 1, first__)) >> first__; \ +}) + +#define ENGINE_INSTANCES_MASK(gt, first, count) \ + __ENGINE_INSTANCES_MASK((gt)->info.engine_mask, first, count) + +#define RCS_MASK(gt) \ + ENGINE_INSTANCES_MASK(gt, RCS0, I915_MAX_RCS) +#define BCS_MASK(gt) \ + ENGINE_INSTANCES_MASK(gt, BCS0, I915_MAX_BCS) +#define VDBOX_MASK(gt) \ + ENGINE_INSTANCES_MASK(gt, VCS0, I915_MAX_VCS) +#define VEBOX_MASK(gt) \ + ENGINE_INSTANCES_MASK(gt, VECS0, I915_MAX_VECS) +#define CCS_MASK(gt) \ + ENGINE_INSTANCES_MASK(gt, CCS0, I915_MAX_CCS) + #define GEN6_RING_FAULT_REG_READ(engine__) \ intel_uncore_read((engine__)->uncore, RING_FAULT_REG(engine__)) @@ -355,4 +378,12 @@ u64 intel_clamp_preempt_timeout_ms(struct intel_engine_cs *engine, u64 value); u64 intel_clamp_stop_timeout_ms(struct intel_engine_cs *engine, u64 value); u64 intel_clamp_timeslice_duration_ms(struct intel_engine_cs *engine, u64 value); +#define rb_to_uabi_engine(rb) \ + rb_entry_safe(rb, struct intel_engine_cs, uabi_node) + +#define for_each_uabi_engine(engine__, i915__) \ + for ((engine__) = rb_to_uabi_engine(rb_first(&(i915__)->uabi_engines));\ + (engine__); \ + (engine__) = rb_to_uabi_engine(rb_next(&(engine__)->uabi_node))) + #endif /* _INTEL_RINGBUFFER_H_ */ diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index d0e1980dcba2..626743462989 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -374,14 +374,6 @@ static inline struct intel_gt *to_gt(const struct drm_i915_private *i915) return i915->gt[0]; } -#define rb_to_uabi_engine(rb) \ - rb_entry_safe(rb, struct intel_engine_cs, uabi_node) - -#define for_each_uabi_engine(engine__, i915__) \ - for ((engine__) = rb_to_uabi_engine(rb_first(&(i915__)->uabi_engines));\ - (engine__); \ - (engine__) = rb_to_uabi_engine(rb_next(&(engine__)->uabi_node))) - #define INTEL_INFO(i915) ((i915)->__info) #define RUNTIME_INFO(i915) (&(i915)->__runtime) #define DRIVER_CAPS(i915) (&(i915)->caps) @@ -590,29 +582,6 @@ IS_SUBPLATFORM(const struct drm_i915_private *i915, #define IS_GEN9_LP(i915) (IS_BROXTON(i915) || IS_GEMINILAKE(i915)) #define IS_GEN9_BC(i915) (GRAPHICS_VER(i915) == 9 && !IS_GEN9_LP(i915)) -#define __HAS_ENGINE(engine_mask, id) ((engine_mask) & BIT(id)) -#define HAS_ENGINE(gt, id) __HAS_ENGINE((gt)->info.engine_mask, id) - -#define __ENGINE_INSTANCES_MASK(mask, first, count) ({ \ - unsigned int first__ = (first); \ - unsigned int count__ = (count); \ - ((mask) & GENMASK(first__ + count__ - 1, first__)) >> first__; \ -}) - -#define ENGINE_INSTANCES_MASK(gt, first, count) \ - __ENGINE_INSTANCES_MASK((gt)->info.engine_mask, first, count) - -#define RCS_MASK(gt) \ - ENGINE_INSTANCES_MASK(gt, RCS0, I915_MAX_RCS) -#define BCS_MASK(gt) \ - ENGINE_INSTANCES_MASK(gt, BCS0, I915_MAX_BCS) -#define VDBOX_MASK(gt) \ - ENGINE_INSTANCES_MASK(gt, VCS0, I915_MAX_VCS) -#define VEBOX_MASK(gt) \ - ENGINE_INSTANCES_MASK(gt, VECS0, I915_MAX_VECS) -#define CCS_MASK(gt) \ - ENGINE_INSTANCES_MASK(gt, CCS0, I915_MAX_CCS) - #define HAS_MEDIA_RATIO_MODE(i915) (INTEL_INFO(i915)->has_media_ratio_mode) /* From 4afcdbbd38e27ec04a349f8ba23f64b92b47fa9c Mon Sep 17 00:00:00 2001 From: Krzysztof Karas Date: Wed, 18 Jun 2025 13:52:22 +0000 Subject: [PATCH 13/15] drm/i915: move GEM_QUIRK_PIN_SWIZZLED_PAGES to i915_gem.h Move this macro where other GEM_* definitions live. Reviewed-by: Andi Shyti Signed-off-by: Krzysztof Karas Reviewed-by: Andi Shyti Signed-off-by: Andi Shyti Link: https://lore.kernel.org/all/ca83a9d8aa86bb92de84c31fd075e92a61f78895.1750251040.git.krzysztof.karas@intel.com --- drivers/gpu/drm/i915/i915_drv.h | 2 -- drivers/gpu/drm/i915/i915_gem.h | 2 ++ 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 626743462989..37a1486cba15 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -66,8 +66,6 @@ struct drm_i915_clock_gating_funcs; struct vlv_s0ix_state; struct intel_pxp; -#define GEM_QUIRK_PIN_SWIZZLED_PAGES BIT(0) - /* Data Stolen Memory (DSM) aka "i915 stolen memory" */ struct i915_dsm { /* diff --git a/drivers/gpu/drm/i915/i915_gem.h b/drivers/gpu/drm/i915/i915_gem.h index 82e9d289398c..20b3cb29cfff 100644 --- a/drivers/gpu/drm/i915/i915_gem.h +++ b/drivers/gpu/drm/i915/i915_gem.h @@ -134,4 +134,6 @@ int i915_gem_open(struct drm_i915_private *i915, struct drm_file *file); #define I915_GEM_IDLE_TIMEOUT (HZ / 5) +#define GEM_QUIRK_PIN_SWIZZLED_PAGES BIT(0) + #endif /* __I915_GEM_H__ */ From cc43422b3cc79eacff4c5a8ba0d224688ca9dd4f Mon Sep 17 00:00:00 2001 From: Janusz Krzysztofik Date: Wed, 11 Jun 2025 12:42:13 +0200 Subject: [PATCH 14/15] drm/i915/gt: Fix timeline left held on VMA alloc error The following error has been reported sporadically by CI when a test unbinds the i915 driver on a ring submission platform: <4> [239.330153] ------------[ cut here ]------------ <4> [239.330166] i915 0000:00:02.0: [drm] drm_WARN_ON(dev_priv->mm.shrink_count) <4> [239.330196] WARNING: CPU: 1 PID: 18570 at drivers/gpu/drm/i915/i915_gem.c:1309 i915_gem_cleanup_early+0x13e/0x150 [i915] ... <4> [239.330640] RIP: 0010:i915_gem_cleanup_early+0x13e/0x150 [i915] ... <4> [239.330942] Call Trace: <4> [239.330944] <4> [239.330949] i915_driver_late_release+0x2b/0xa0 [i915] <4> [239.331202] i915_driver_release+0x86/0xa0 [i915] <4> [239.331482] devm_drm_dev_init_release+0x61/0x90 <4> [239.331494] devm_action_release+0x15/0x30 <4> [239.331504] release_nodes+0x3d/0x120 <4> [239.331517] devres_release_all+0x96/0xd0 <4> [239.331533] device_unbind_cleanup+0x12/0x80 <4> [239.331543] device_release_driver_internal+0x23a/0x280 <4> [239.331550] ? bus_find_device+0xa5/0xe0 <4> [239.331563] device_driver_detach+0x14/0x20 ... <4> [357.719679] ---[ end trace 0000000000000000 ]--- If the test also unloads the i915 module then that's followed with: <3> [357.787478] ============================================================================= <3> [357.788006] BUG i915_vma (Tainted: G U W N ): Objects remaining on __kmem_cache_shutdown() <3> [357.788031] ----------------------------------------------------------------------------- <3> [357.788204] Object 0xffff888109e7f480 @offset=29824 <3> [357.788670] Allocated in i915_vma_instance+0xee/0xc10 [i915] age=292729 cpu=4 pid=2244 <4> [357.788994] i915_vma_instance+0xee/0xc10 [i915] <4> [357.789290] init_status_page+0x7b/0x420 [i915] <4> [357.789532] intel_engines_init+0x1d8/0x980 [i915] <4> [357.789772] intel_gt_init+0x175/0x450 [i915] <4> [357.790014] i915_gem_init+0x113/0x340 [i915] <4> [357.790281] i915_driver_probe+0x847/0xed0 [i915] <4> [357.790504] i915_pci_probe+0xe6/0x220 [i915] ... Closer analysis of CI results history has revealed a dependency of the error on a few IGT tests, namely: - igt@api_intel_allocator@fork-simple-stress-signal, - igt@api_intel_allocator@two-level-inception-interruptible, - igt@gem_linear_blits@interruptible, - igt@prime_mmap_coherency@ioctl-errors, which invisibly trigger the issue, then exhibited with first driver unbind attempt. All of the above tests perform actions which are actively interrupted with signals. Further debugging has allowed to narrow that scope down to DRM_IOCTL_I915_GEM_EXECBUFFER2, and ring_context_alloc(), specific to ring submission, in particular. If successful then that function, or its execlists or GuC submission equivalent, is supposed to be called only once per GEM context engine, followed by raise of a flag that prevents the function from being called again. The function is expected to unwind its internal errors itself, so it may be safely called once more after it returns an error. In case of ring submission, the function first gets a reference to the engine's legacy timeline and then allocates a VMA. If the VMA allocation fails, e.g. when i915_vma_instance() called from inside is interrupted with a signal, then ring_context_alloc() fails, leaving the timeline held referenced. On next I915_GEM_EXECBUFFER2 IOCTL, another reference to the timeline is got, and only that last one is put on successful completion. As a consequence, the legacy timeline, with its underlying engine status page's VMA object, is still held and not released on driver unbind. Get the legacy timeline only after successful allocation of the context engine's VMA. v2: Add a note on other submission methods (Krzysztof Karas): Both execlists and GuC submission use lrc_alloc() which seems free from a similar issue. Fixes: 75d0a7f31eec ("drm/i915: Lift timeline into intel_context") Closes: https://gitlab.freedesktop.org/drm/i915/kernel/-/issues/12061 Cc: Chris Wilson Cc: Matthew Auld Cc: Krzysztof Karas Reviewed-by: Sebastian Brzezinka Reviewed-by: Krzysztof Niemiec Signed-off-by: Janusz Krzysztofik Reviewed-by: Nitin Gote Reviewed-by: Andi Shyti Signed-off-by: Andi Shyti Link: https://lore.kernel.org/r/20250611104352.1014011-2-janusz.krzysztofik@linux.intel.com --- drivers/gpu/drm/i915/gt/intel_ring_submission.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gt/intel_ring_submission.c b/drivers/gpu/drm/i915/gt/intel_ring_submission.c index a876a34455f1..2a6d79abf25b 100644 --- a/drivers/gpu/drm/i915/gt/intel_ring_submission.c +++ b/drivers/gpu/drm/i915/gt/intel_ring_submission.c @@ -610,7 +610,6 @@ static int ring_context_alloc(struct intel_context *ce) /* One ringbuffer to rule them all */ GEM_BUG_ON(!engine->legacy.ring); ce->ring = engine->legacy.ring; - ce->timeline = intel_timeline_get(engine->legacy.timeline); GEM_BUG_ON(ce->state); if (engine->context_size) { @@ -623,6 +622,8 @@ static int ring_context_alloc(struct intel_context *ce) ce->state = vma; } + ce->timeline = intel_timeline_get(engine->legacy.timeline); + return 0; } From dccf655f69002d496a527ba441b4f008aa5bebbf Mon Sep 17 00:00:00 2001 From: Junxiao Chang Date: Fri, 25 Apr 2025 23:11:07 +0800 Subject: [PATCH 15/15] drm/i915/gsc: mei interrupt top half should be in irq disabled context MEI GSC interrupt comes from i915. It has top half and bottom half. Top half is called from i915 interrupt handler. It should be in irq disabled context. With RT kernel, by default i915 IRQ handler is in threaded IRQ. MEI GSC top half might be in threaded IRQ context. generic_handle_irq_safe API could be called from either IRQ or process context, it disables local IRQ then calls MEI GSC interrupt top half. This change fixes A380/A770 GPU boot hang issue with RT kernel. Fixes: 1e3dc1d8622b ("drm/i915/gsc: add gsc as a mei auxiliary device") Tested-by: Furong Zhou Suggested-by: Sebastian Andrzej Siewior Acked-by: Sebastian Andrzej Siewior Signed-off-by: Junxiao Chang Link: https://lore.kernel.org/r/20250425151108.643649-1-junxiao.chang@intel.com Reviewed-by: Rodrigo Vivi Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/i915/gt/intel_gsc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gt/intel_gsc.c b/drivers/gpu/drm/i915/gt/intel_gsc.c index 1e925c75fb08..c43febc862dc 100644 --- a/drivers/gpu/drm/i915/gt/intel_gsc.c +++ b/drivers/gpu/drm/i915/gt/intel_gsc.c @@ -284,7 +284,7 @@ static void gsc_irq_handler(struct intel_gt *gt, unsigned int intf_id) if (gt->gsc.intf[intf_id].irq < 0) return; - ret = generic_handle_irq(gt->gsc.intf[intf_id].irq); + ret = generic_handle_irq_safe(gt->gsc.intf[intf_id].irq); if (ret) gt_err_ratelimited(gt, "error handling GSC irq: %d\n", ret); }