diff --git a/drivers/gpu/drm/xe/xe_sriov_vf.c b/drivers/gpu/drm/xe/xe_sriov_vf.c index 6526fe450e55..26e243c28994 100644 --- a/drivers/gpu/drm/xe/xe_sriov_vf.c +++ b/drivers/gpu/drm/xe/xe_sriov_vf.c @@ -147,127 +147,113 @@ void xe_sriov_vf_init_early(struct xe_device *xe) xe_sriov_info(xe, "migration not supported by this module version\n"); } -/** - * vf_post_migration_requery_guc - Re-query GuC for current VF provisioning. +static bool gt_vf_post_migration_needed(struct xe_gt *gt) +{ + return test_bit(gt->info.id, >_to_xe(gt)->sriov.vf.migration.gt_flags); +} + +/* + * Notify GuCs marked in flags about resource fixups apply finished. * @xe: the &xe_device struct instance + * @gt_flags: flags marking to which GTs the notification shall be sent + */ +static int vf_post_migration_notify_resfix_done(struct xe_device *xe, unsigned long gt_flags) +{ + struct xe_gt *gt; + unsigned int id; + int err = 0; + + for_each_gt(gt, xe, id) { + if (!test_bit(id, >_flags)) + continue; + /* skip asking GuC for RESFIX exit if new recovery request arrived */ + if (gt_vf_post_migration_needed(gt)) + continue; + err = xe_gt_sriov_vf_notify_resfix_done(gt); + if (err) + break; + clear_bit(id, >_flags); + } + + if (gt_flags && !err) + drm_dbg(&xe->drm, "another recovery imminent, skipped some notifications\n"); + return err; +} + +static int vf_get_next_migrated_gt_id(struct xe_device *xe) +{ + struct xe_gt *gt; + unsigned int id; + + for_each_gt(gt, xe, id) { + if (test_and_clear_bit(id, &xe->sriov.vf.migration.gt_flags)) + return id; + } + return -1; +} + +/** + * Perform post-migration fixups on a single GT. * - * After migration, we need to re-query all VF configuration to make sure - * they match previous provisioning. Note that most of VF provisioning - * shall be the same, except GGTT range, since GGTT is not virtualized per-VF. + * After migration, GuC needs to be re-queried for VF configuration to check + * if it matches previous provisioning. Most of VF provisioning shall be the + * same, except GGTT range, since GGTT is not virtualized per-VF. If GGTT + * range has changed, we have to perform fixups - shift all GGTT references + * used anywhere within the driver. After the fixups in this function succeed, + * it is allowed to ask the GuC bound to this GT to continue normal operation. * * Returns: 0 if the operation completed successfully, or a negative error * code otherwise. */ -static int vf_post_migration_requery_guc(struct xe_device *xe) +static int gt_vf_post_migration_fixups(struct xe_gt *gt) { - struct xe_gt *gt; - unsigned int id; - int err, ret = 0; + s64 shift; + int err; - for_each_gt(gt, xe, id) { - err = xe_gt_sriov_vf_query_config(gt); - ret = ret ?: err; - } - - return ret; -} - -static void vf_post_migration_fixup_ctb(struct xe_device *xe) -{ - struct xe_gt *gt; - unsigned int id; - - xe_assert(xe, IS_SRIOV_VF(xe)); - - for_each_gt(gt, xe, id) { - s32 shift = xe_gt_sriov_vf_ggtt_shift(gt); + err = xe_gt_sriov_vf_query_config(gt); + if (err) + return err; + shift = xe_gt_sriov_vf_ggtt_shift(gt); + if (shift) { + xe_tile_sriov_vf_fixup_ggtt_nodes(gt_to_tile(gt), shift); + /* FIXME: add the recovery steps */ xe_guc_ct_fixup_messages_with_ggtt(>->uc.guc.ct, shift); } -} - -/* - * vf_post_migration_imminent - Check if post-restore recovery is coming. - * @xe: the &xe_device struct instance - * - * Return: True if migration recovery worker will soon be running. Any worker currently - * executing does not affect the result. - */ -static bool vf_post_migration_imminent(struct xe_device *xe) -{ - return xe->sriov.vf.migration.gt_flags != 0 || - work_pending(&xe->sriov.vf.migration.worker); -} - -static bool vf_post_migration_fixup_ggtt_nodes(struct xe_device *xe) -{ - bool need_fixups = false; - struct xe_tile *tile; - unsigned int id; - - for_each_tile(tile, xe, id) { - struct xe_gt *gt = tile->primary_gt; - s64 shift; - - shift = xe_gt_sriov_vf_ggtt_shift(gt); - if (shift) { - need_fixups = true; - xe_tile_sriov_vf_fixup_ggtt_nodes(tile, shift); - } - } - return need_fixups; -} - -/* - * Notify all GuCs about resource fixups apply finished. - */ -static void vf_post_migration_notify_resfix_done(struct xe_device *xe) -{ - struct xe_gt *gt; - unsigned int id; - - for_each_gt(gt, xe, id) { - if (vf_post_migration_imminent(xe)) - goto skip; - xe_gt_sriov_vf_notify_resfix_done(gt); - } - return; - -skip: - drm_dbg(&xe->drm, "another recovery imminent, skipping notifications\n"); + return 0; } static void vf_post_migration_recovery(struct xe_device *xe) { - bool need_fixups; - int err; + unsigned long fixed_gts = 0; + int id, err; drm_dbg(&xe->drm, "migration recovery in progress\n"); xe_pm_runtime_get(xe); - err = vf_post_migration_requery_guc(xe); - if (vf_post_migration_imminent(xe)) - goto defer; - if (unlikely(err)) - goto fail; + if (!vf_migration_supported(xe)) { xe_sriov_err(xe, "migration not supported by this module version\n"); err = -ENOTRECOVERABLE; goto fail; } - need_fixups = vf_post_migration_fixup_ggtt_nodes(xe); - /* FIXME: add the recovery steps */ - if (need_fixups) - vf_post_migration_fixup_ctb(xe); + while (id = vf_get_next_migrated_gt_id(xe), id >= 0) { + struct xe_gt *gt = xe_device_get_gt(xe, id); + + err = gt_vf_post_migration_fixups(gt); + if (err) + goto fail; + + set_bit(id, &fixed_gts); + } + + err = vf_post_migration_notify_resfix_done(xe, fixed_gts); + if (err) + goto fail; - vf_post_migration_notify_resfix_done(xe); xe_pm_runtime_put(xe); drm_notice(&xe->drm, "migration recovery ended\n"); return; -defer: - xe_pm_runtime_put(xe); - drm_dbg(&xe->drm, "migration recovery deferred\n"); - return; fail: xe_pm_runtime_put(xe); drm_err(&xe->drm, "migration recovery failed (%pe)\n", ERR_PTR(err)); @@ -282,18 +268,23 @@ static void migration_worker_func(struct work_struct *w) vf_post_migration_recovery(xe); } -static bool vf_ready_to_recovery_on_all_gts(struct xe_device *xe) +/* + * Check if post-restore recovery is coming on any of GTs. + * @xe: the &xe_device struct instance + * + * Return: True if migration recovery worker will soon be running. Any worker currently + * executing does not affect the result. + */ +static bool vf_ready_to_recovery_on_any_gts(struct xe_device *xe) { struct xe_gt *gt; unsigned int id; for_each_gt(gt, xe, id) { - if (!test_bit(id, &xe->sriov.vf.migration.gt_flags)) { - xe_gt_sriov_dbg_verbose(gt, "still not ready to recover\n"); - return false; - } + if (test_bit(id, &xe->sriov.vf.migration.gt_flags)) + return true; } - return true; + return false; } /** @@ -308,13 +299,9 @@ void xe_sriov_vf_start_migration_recovery(struct xe_device *xe) xe_assert(xe, IS_SRIOV_VF(xe)); - if (!vf_ready_to_recovery_on_all_gts(xe)) + if (!vf_ready_to_recovery_on_any_gts(xe)) return; - WRITE_ONCE(xe->sriov.vf.migration.gt_flags, 0); - /* Ensure other threads see that no flags are set now. */ - smp_mb(); - started = queue_work(xe->sriov.wq, &xe->sriov.vf.migration.worker); drm_info(&xe->drm, "VF migration recovery %s\n", started ? "scheduled" : "already in progress");