diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_capture.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_capture.c index 729a8fcf20dd..1def0b6467c7 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_capture.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_capture.c @@ -1540,6 +1540,36 @@ void intel_guc_capture_free_node(struct intel_engine_coredump *ee) ee->guc_capture_node = NULL; } +bool intel_guc_capture_is_matching_engine(struct intel_gt *gt, + struct intel_context *ce, + struct intel_engine_cs *engine) +{ + struct __guc_capture_parsed_output *n; + struct intel_guc *guc; + + if (!gt || !ce || !engine) + return false; + + guc = >->uc.guc; + if (!guc->capture) + return false; + + /* + * Look for a matching GuC reported error capture node from + * the internal output link-list based on lrca, guc-id and engine + * identification. + */ + list_for_each_entry(n, &guc->capture->outlist, link) { + if (n->eng_inst == GUC_ID_TO_ENGINE_INSTANCE(engine->guc_id) && + n->eng_class == GUC_ID_TO_ENGINE_CLASS(engine->guc_id) && + n->guc_id == ce->guc_id.id && + (n->lrca & CTX_GTT_ADDRESS_MASK) == (ce->lrc.lrca & CTX_GTT_ADDRESS_MASK)) + return true; + } + + return false; +} + void intel_guc_capture_get_matching_node(struct intel_gt *gt, struct intel_engine_coredump *ee, struct intel_context *ce) @@ -1555,6 +1585,7 @@ void intel_guc_capture_get_matching_node(struct intel_gt *gt, return; GEM_BUG_ON(ee->guc_capture_node); + /* * Look for a matching GuC reported error capture node from * the internal output link-list based on lrca, guc-id and engine diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_capture.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_capture.h index fbd3713c7832..302256d45431 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_capture.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_capture.h @@ -11,6 +11,7 @@ struct drm_i915_error_state_buf; struct guc_gt_system_info; struct intel_engine_coredump; +struct intel_engine_cs; struct intel_context; struct intel_gt; struct intel_guc; @@ -20,6 +21,8 @@ int intel_guc_capture_print_engine_node(struct drm_i915_error_state_buf *m, const struct intel_engine_coredump *ee); void intel_guc_capture_get_matching_node(struct intel_gt *gt, struct intel_engine_coredump *ee, struct intel_context *ce); +bool intel_guc_capture_is_matching_engine(struct intel_gt *gt, struct intel_context *ce, + struct intel_engine_cs *engine); void intel_guc_capture_process(struct intel_guc *guc); int intel_guc_capture_getlist(struct intel_guc *guc, u32 owner, u32 type, u32 classid, void **outptr); diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c index c869ddc73e69..a0e3ef1c65d2 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c @@ -4728,13 +4728,37 @@ static void capture_error_state(struct intel_guc *guc, { struct intel_gt *gt = guc_to_gt(guc); struct drm_i915_private *i915 = gt->i915; - struct intel_engine_cs *engine = __context_to_physical_engine(ce); intel_wakeref_t wakeref; + intel_engine_mask_t engine_mask; + + if (intel_engine_is_virtual(ce->engine)) { + struct intel_engine_cs *e; + intel_engine_mask_t tmp, virtual_mask = ce->engine->mask; + + engine_mask = 0; + for_each_engine_masked(e, ce->engine->gt, virtual_mask, tmp) { + bool match = intel_guc_capture_is_matching_engine(gt, ce, e); + + if (match) { + intel_engine_set_hung_context(e, ce); + engine_mask |= e->mask; + atomic_inc(&i915->gpu_error.reset_engine_count[e->uabi_class]); + } + } + + if (!engine_mask) { + guc_warn(guc, "No matching physical engine capture for virtual engine context 0x%04X / %s", + ce->guc_id.id, ce->engine->name); + engine_mask = ~0U; + } + } else { + intel_engine_set_hung_context(ce->engine, ce); + engine_mask = ce->engine->mask; + atomic_inc(&i915->gpu_error.reset_engine_count[ce->engine->uabi_class]); + } - intel_engine_set_hung_context(engine, ce); with_intel_runtime_pm(&i915->runtime_pm, wakeref) - i915_capture_error_state(gt, engine->mask, CORE_DUMP_FLAG_IS_GUC_CAPTURE); - atomic_inc(&i915->gpu_error.reset_engine_count[engine->uabi_class]); + i915_capture_error_state(gt, engine_mask, CORE_DUMP_FLAG_IS_GUC_CAPTURE); } static void guc_context_replay(struct intel_context *ce) diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c index f020c0086fbc..7360046b9945 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.c +++ b/drivers/gpu/drm/i915/i915_gpu_error.c @@ -808,10 +808,15 @@ static void err_print_gt_engines(struct drm_i915_error_state_buf *m, for (ee = gt->engine; ee; ee = ee->next) { const struct i915_vma_coredump *vma; - if (ee->guc_capture_node) - intel_guc_capture_print_engine_node(m, ee); - else + if (gt->uc && gt->uc->guc.is_guc_capture) { + if (ee->guc_capture_node) + intel_guc_capture_print_engine_node(m, ee); + else + err_printf(m, " Missing GuC capture node for %s\n", + ee->engine->name); + } else { error_print_engine(m, ee); + } err_printf(m, " hung: %u\n", ee->hung); err_printf(m, " engine reset count: %u\n", ee->reset_count);