From 95fb5f188cba275a87955a7b9534bd80d38f78b9 Mon Sep 17 00:00:00 2001 From: John Harrison Date: Wed, 27 Apr 2022 09:55:50 -0700 Subject: [PATCH 01/16] drm/i915/dg2: Define GuC firmware version for DG2 First release of GuC for DG2. Signed-off-by: John Harrison CC: Tomasz Mistat CC: Ramalingam C CC: Daniele Ceraolo Spurio Reviewed-by: Daniele Ceraolo Spurio Signed-off-by: Lucas De Marchi Link: https://patchwork.freedesktop.org/patch/msgid/20220427165550.3636686-3-John.C.Harrison@Intel.com --- drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c index a876d39e6bcf..d078f884b5e3 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c @@ -53,6 +53,7 @@ void intel_uc_fw_change_status(struct intel_uc_fw *uc_fw, * firmware as TGL. */ #define INTEL_GUC_FIRMWARE_DEFS(fw_def, guc_def) \ + fw_def(DG2, 0, guc_def(dg2, 70, 1, 2)) \ fw_def(ALDERLAKE_P, 0, guc_def(adlp, 70, 1, 1)) \ fw_def(ALDERLAKE_S, 0, guc_def(tgl, 70, 1, 1)) \ fw_def(DG1, 0, guc_def(dg1, 70, 1, 1)) \ From ad6ade8e34df62e8b876ca40509bfa2ca8ad3262 Mon Sep 17 00:00:00 2001 From: Umesh Nerlige Ramappa Date: Tue, 26 Apr 2022 17:35:15 -0700 Subject: [PATCH 02/16] drm/i915/pmu: Use existing uncore helper to read gpm_timestamp Use intel_uncore_read64_2x32 to read upper and lower fields of the GPM timestamp. v2: Fix compile error Signed-off-by: Umesh Nerlige Ramappa Reviewed-by: Tvrtko Ursulin Signed-off-by: John Harrison Link: https://patchwork.freedesktop.org/patch/msgid/20220427003515.3944267-1-umesh.nerlige.ramappa@intel.com --- .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 17 ++--------------- 1 file changed, 2 insertions(+), 15 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c index 61a6f2424e24..33e695adfd6a 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c @@ -1200,20 +1200,6 @@ static u32 gpm_timestamp_shift(struct intel_gt *gt) return 3 - shift; } -static u64 gpm_timestamp(struct intel_gt *gt) -{ - u32 lo, hi, old_hi, loop = 0; - - hi = intel_uncore_read(gt->uncore, MISC_STATUS1); - do { - lo = intel_uncore_read(gt->uncore, MISC_STATUS0); - old_hi = hi; - hi = intel_uncore_read(gt->uncore, MISC_STATUS1); - } while (old_hi != hi && loop++ < 2); - - return ((u64)hi << 32) | lo; -} - static void guc_update_pm_timestamp(struct intel_guc *guc, ktime_t *now) { struct intel_gt *gt = guc_to_gt(guc); @@ -1223,7 +1209,8 @@ static void guc_update_pm_timestamp(struct intel_guc *guc, ktime_t *now) lockdep_assert_held(&guc->timestamp.lock); gt_stamp_hi = upper_32_bits(guc->timestamp.gt_stamp); - gpm_ts = gpm_timestamp(gt) >> guc->timestamp.shift; + gpm_ts = intel_uncore_read64_2x32(gt->uncore, MISC_STATUS0, + MISC_STATUS1) >> guc->timestamp.shift; gt_stamp_lo = lower_32_bits(gpm_ts); *now = ktime_get(); From 991b4de3275728fd746ce15e00087fdbd51c29a1 Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Wed, 27 Apr 2022 21:19:23 -0700 Subject: [PATCH 03/16] drm/i915/uapi: Add kerneldoc for engine class enum We'll be adding a new type of engine soon. Let's document the existing engine classes first to help make it clear what each type of engine is used for. Cc: Andi Shyti Signed-off-by: Matt Roper Reviewed-by: Andi Shyti Reviewed-by: Jordan Justen Link: https://patchwork.freedesktop.org/patch/msgid/20220428041926.1483683-2-matthew.d.roper@intel.com --- include/uapi/drm/i915_drm.h | 53 ++++++++++++++++++++++++++++++++----- 1 file changed, 47 insertions(+), 6 deletions(-) diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h index 35ca528803fd..ec000fc6c879 100644 --- a/include/uapi/drm/i915_drm.h +++ b/include/uapi/drm/i915_drm.h @@ -154,21 +154,62 @@ enum i915_mocs_table_index { I915_MOCS_CACHED, }; -/* +/** + * enum drm_i915_gem_engine_class - uapi engine type enumeration + * * Different engines serve different roles, and there may be more than one - * engine serving each role. enum drm_i915_gem_engine_class provides a - * classification of the role of the engine, which may be used when requesting - * operations to be performed on a certain subset of engines, or for providing - * information about that group. + * engine serving each role. This enum provides a classification of the role + * of the engine, which may be used when requesting operations to be performed + * on a certain subset of engines, or for providing information about that + * group. */ enum drm_i915_gem_engine_class { + /** + * @I915_ENGINE_CLASS_RENDER: + * + * Render engines support instructions used for 3D, Compute (GPGPU), + * and programmable media workloads. These instructions fetch data and + * dispatch individual work items to threads that operate in parallel. + * The threads run small programs (called "kernels" or "shaders") on + * the GPU's execution units (EUs). + */ I915_ENGINE_CLASS_RENDER = 0, + + /** + * @I915_ENGINE_CLASS_COPY: + * + * Copy engines (also referred to as "blitters") support instructions + * that move blocks of data from one location in memory to another, + * or that fill a specified location of memory with fixed data. + * Copy engines can perform pre-defined logical or bitwise operations + * on the source, destination, or pattern data. + */ I915_ENGINE_CLASS_COPY = 1, + + /** + * @I915_ENGINE_CLASS_VIDEO: + * + * Video engines (also referred to as "bit stream decode" (BSD) or + * "vdbox") support instructions that perform fixed-function media + * decode and encode. + */ I915_ENGINE_CLASS_VIDEO = 2, + + /** + * @I915_ENGINE_CLASS_VIDEO_ENHANCE: + * + * Video enhancement engines (also referred to as "vebox") support + * instructions related to image enhancement. + */ I915_ENGINE_CLASS_VIDEO_ENHANCE = 3, - /* should be kept compact */ + /* Values in this enum should be kept compact. */ + /** + * @I915_ENGINE_CLASS_INVALID: + * + * Placeholder value to represent an invalid engine class assignment. + */ I915_ENGINE_CLASS_INVALID = -1 }; From 97e17a09063074a2534e8c2f4873588a9aaa48b3 Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Wed, 27 Apr 2022 21:19:24 -0700 Subject: [PATCH 04/16] drm/i915/xehp: Add register for compute engine's MMIO-based TLB invalidation Compute engines have a separate register that the driver should use to perform MMIO-based TLB invalidation. Note that the term "context" in this register's bspec description is used to refer to the engine instance (in the same way "context" is used on bspec 46167). Bspec: 43930 Cc: Prathap Kumar Valsan Cc: Tvrtko Ursulin Signed-off-by: Matt Roper Acked-by: Tvrtko Ursulin Reviewed-by: Prathap Kumar Valsan Link: https://patchwork.freedesktop.org/patch/msgid/20220428041926.1483683-3-matthew.d.roper@intel.com --- drivers/gpu/drm/i915/gt/intel_gt.c | 1 + drivers/gpu/drm/i915/gt/intel_gt_regs.h | 1 + 2 files changed, 2 insertions(+) diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c b/drivers/gpu/drm/i915/gt/intel_gt.c index 92394f13b42f..53307ca0eed0 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt.c +++ b/drivers/gpu/drm/i915/gt/intel_gt.c @@ -1175,6 +1175,7 @@ void intel_gt_invalidate_tlbs(struct intel_gt *gt) [VIDEO_DECODE_CLASS] = GEN12_VD_TLB_INV_CR, [VIDEO_ENHANCEMENT_CLASS] = GEN12_VE_TLB_INV_CR, [COPY_ENGINE_CLASS] = GEN12_BLT_TLB_INV_CR, + [COMPUTE_CLASS] = GEN12_COMPCTX_TLB_INV_CR, }; struct drm_i915_private *i915 = gt->i915; struct intel_uncore *uncore = gt->uncore; diff --git a/drivers/gpu/drm/i915/gt/intel_gt_regs.h b/drivers/gpu/drm/i915/gt/intel_gt_regs.h index a39718a40cc3..a0a49c16babd 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_regs.h +++ b/drivers/gpu/drm/i915/gt/intel_gt_regs.h @@ -1007,6 +1007,7 @@ #define GEN12_VD_TLB_INV_CR _MMIO(0xcedc) #define GEN12_VE_TLB_INV_CR _MMIO(0xcee0) #define GEN12_BLT_TLB_INV_CR _MMIO(0xcee4) +#define GEN12_COMPCTX_TLB_INV_CR _MMIO(0xcf04) #define GEN12_MERT_MOD_CTRL _MMIO(0xcf28) #define RENDER_MOD_CTRL _MMIO(0xcf2c) From ecf8eca51f33dbfddcc23902bdee65c17587247c Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Wed, 27 Apr 2022 21:19:25 -0700 Subject: [PATCH 05/16] drm/i915/xehp: Add compute engine ABI We're now ready to start exposing compute engines to userspace. v2: - Move kerneldoc for other engine classes to a separate patch. (Andi) Cc: Daniele Ceraolo Spurio Cc: Tvrtko Ursulin Cc: Vinay Belgaumkar Cc: Jordan Justen Cc: Szymon Morek UMD (mesa): https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/14395 Signed-off-by: Matt Roper Acked-by: Tvrtko Ursulin Reviewed-by: Andi Shyti Reviewed-by: Jordan Justen Tested-by: Jordan Justen # mesa anvil & iris Link: https://patchwork.freedesktop.org/patch/msgid/20220428041926.1483683-4-matthew.d.roper@intel.com --- drivers/gpu/drm/i915/gt/intel_engine_user.c | 2 +- drivers/gpu/drm/i915/i915_drm_client.c | 1 + drivers/gpu/drm/i915/i915_drm_client.h | 2 +- include/uapi/drm/i915_drm.h | 9 +++++++++ 4 files changed, 12 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_engine_user.c b/drivers/gpu/drm/i915/gt/intel_engine_user.c index 0f6cd96b459f..46a174f8aa00 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_user.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_user.c @@ -47,7 +47,7 @@ static const u8 uabi_classes[] = { [COPY_ENGINE_CLASS] = I915_ENGINE_CLASS_COPY, [VIDEO_DECODE_CLASS] = I915_ENGINE_CLASS_VIDEO, [VIDEO_ENHANCEMENT_CLASS] = I915_ENGINE_CLASS_VIDEO_ENHANCE, - /* TODO: Add COMPUTE_CLASS mapping once ABI is available */ + [COMPUTE_CLASS] = I915_ENGINE_CLASS_COMPUTE, }; static int engine_cmp(void *priv, const struct list_head *A, diff --git a/drivers/gpu/drm/i915/i915_drm_client.c b/drivers/gpu/drm/i915/i915_drm_client.c index 475a6f824cad..18d38cb59923 100644 --- a/drivers/gpu/drm/i915/i915_drm_client.c +++ b/drivers/gpu/drm/i915/i915_drm_client.c @@ -81,6 +81,7 @@ static const char * const uabi_class_names[] = { [I915_ENGINE_CLASS_COPY] = "copy", [I915_ENGINE_CLASS_VIDEO] = "video", [I915_ENGINE_CLASS_VIDEO_ENHANCE] = "video-enhance", + [I915_ENGINE_CLASS_COMPUTE] = "compute", }; static u64 busy_add(struct i915_gem_context *ctx, unsigned int class) diff --git a/drivers/gpu/drm/i915/i915_drm_client.h b/drivers/gpu/drm/i915/i915_drm_client.h index 5f5b02b01ba0..f796c5e8e060 100644 --- a/drivers/gpu/drm/i915/i915_drm_client.h +++ b/drivers/gpu/drm/i915/i915_drm_client.h @@ -13,7 +13,7 @@ #include "gt/intel_engine_types.h" -#define I915_LAST_UABI_ENGINE_CLASS I915_ENGINE_CLASS_VIDEO_ENHANCE +#define I915_LAST_UABI_ENGINE_CLASS I915_ENGINE_CLASS_COMPUTE struct drm_i915_private; diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h index ec000fc6c879..a2def7b27009 100644 --- a/include/uapi/drm/i915_drm.h +++ b/include/uapi/drm/i915_drm.h @@ -203,6 +203,15 @@ enum drm_i915_gem_engine_class { */ I915_ENGINE_CLASS_VIDEO_ENHANCE = 3, + /** + * @I915_ENGINE_CLASS_COMPUTE: + * + * Compute engines support a subset of the instructions available + * on render engines: compute engines support Compute (GPGPU) and + * programmable media workloads, but do not support the 3D pipeline. + */ + I915_ENGINE_CLASS_COMPUTE = 4, + /* Values in this enum should be kept compact. */ /** From 59a4752895b2e43351c7c1dd2b264d17d74e8466 Mon Sep 17 00:00:00 2001 From: Daniele Ceraolo Spurio Date: Wed, 27 Apr 2022 21:19:26 -0700 Subject: [PATCH 06/16] drm/i915: Xe_HP SDV and DG2 have up to 4 CCS engines Cc: Vinay Belgaumkar Signed-off-by: Daniele Ceraolo Spurio Signed-off-by: Matt Roper Reviewed-by: Matt Roper Reviewed-by: Andi Shyti Reviewed-by: Jordan Justen Tested-by: Jordan Justen # mesa anvil & iris Link: https://patchwork.freedesktop.org/patch/msgid/20220428041926.1483683-5-matthew.d.roper@intel.com --- drivers/gpu/drm/i915/i915_pci.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c index 38f7de778914..2efd2201359e 100644 --- a/drivers/gpu/drm/i915/i915_pci.c +++ b/drivers/gpu/drm/i915/i915_pci.c @@ -1037,7 +1037,8 @@ static const struct intel_device_info xehpsdv_info = { BIT(RCS0) | BIT(BCS0) | BIT(VECS0) | BIT(VECS1) | BIT(VECS2) | BIT(VECS3) | BIT(VCS0) | BIT(VCS1) | BIT(VCS2) | BIT(VCS3) | - BIT(VCS4) | BIT(VCS5) | BIT(VCS6) | BIT(VCS7), + BIT(VCS4) | BIT(VCS5) | BIT(VCS6) | BIT(VCS7) | + BIT(CCS0) | BIT(CCS1) | BIT(CCS2) | BIT(CCS3), .require_force_probe = 1, }; @@ -1056,7 +1057,8 @@ static const struct intel_device_info xehpsdv_info = { .platform_engine_mask = \ BIT(RCS0) | BIT(BCS0) | \ BIT(VECS0) | BIT(VECS1) | \ - BIT(VCS0) | BIT(VCS2) + BIT(VCS0) | BIT(VCS2) | \ + BIT(CCS0) | BIT(CCS1) | BIT(CCS2) | BIT(CCS3) __maybe_unused static const struct intel_device_info dg2_info = { From 7c161b85e88552a037566678128c169fba3b1efe Mon Sep 17 00:00:00 2001 From: Akeem G Abodunrin Date: Mon, 25 Apr 2022 20:53:15 +0530 Subject: [PATCH 07/16] drm/i915/xehpsdv/dg1/tgl: Fix issue with LRI relative addressing When bit 19 of MI_LOAD_REGISTER_IMM instruction opcode is set on tgl+ devices, HW does not care about certain register address offsets, but instead check the following for valid address ranges on specific engines: RCS && CCS: BITS(0 - 10) BCS: BITS(0 - 11) VECS && VCS: BITS(0 - 13) Also, tgl+ now support relative addressing for BCS engine - So, this patch fixes issue with live_gt_lrc selftest that is failing where there is mismatch between LRC register layout generated during init and HW default register offsets. Signed-off-by: Akeem G Abodunrin cc: Prathap Kumar Valsan Signed-off-by: Ramalingam C Reviewed-by: Matthew Auld Link: https://patchwork.freedesktop.org/patch/msgid/20220425152317.4275-2-ramalingam.c@intel.com --- drivers/gpu/drm/i915/gt/selftest_lrc.c | 36 +++++++++++++++++++++++++- 1 file changed, 35 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gt/selftest_lrc.c b/drivers/gpu/drm/i915/gt/selftest_lrc.c index 6ba52ef1acb8..8dc7b88cdca0 100644 --- a/drivers/gpu/drm/i915/gt/selftest_lrc.c +++ b/drivers/gpu/drm/i915/gt/selftest_lrc.c @@ -128,6 +128,27 @@ static int context_flush(struct intel_context *ce, long timeout) return err; } +static int get_lri_mask(struct intel_engine_cs *engine, u32 lri) +{ + if ((lri & MI_LRI_LRM_CS_MMIO) == 0) + return ~0u; + + if (GRAPHICS_VER(engine->i915) < 12) + return 0xfff; + + switch (engine->class) { + default: + case RENDER_CLASS: + case COMPUTE_CLASS: + return 0x07ff; + case COPY_ENGINE_CLASS: + return 0x0fff; + case VIDEO_DECODE_CLASS: + case VIDEO_ENHANCEMENT_CLASS: + return 0x3fff; + } +} + static int live_lrc_layout(void *arg) { struct intel_gt *gt = arg; @@ -167,6 +188,7 @@ static int live_lrc_layout(void *arg) dw = 0; do { u32 lri = READ_ONCE(hw[dw]); + u32 lri_mask; if (lri == 0) { dw++; @@ -194,6 +216,18 @@ static int live_lrc_layout(void *arg) break; } + /* + * When bit 19 of MI_LOAD_REGISTER_IMM instruction + * opcode is set on Gen12+ devices, HW does not + * care about certain register address offsets, and + * instead check the following for valid address + * ranges on specific engines: + * RCS && CCS: BITS(0 - 10) + * BCS: BITS(0 - 11) + * VECS && VCS: BITS(0 - 13) + */ + lri_mask = get_lri_mask(engine, lri); + lri &= 0x7f; lri++; dw++; @@ -201,7 +235,7 @@ static int live_lrc_layout(void *arg) while (lri) { u32 offset = READ_ONCE(hw[dw]); - if (offset != lrc[dw]) { + if ((offset ^ lrc[dw]) & lri_mask) { pr_err("%s: Different registers found at dword %d, expected %x, found %x\n", engine->name, dw, offset, lrc[dw]); err = -EINVAL; From 17be812e76eee0fc46747ce3ef87eab39f9d565c Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 25 Apr 2022 20:53:16 +0530 Subject: [PATCH 08/16] drm/i915/selftests: Skip poisoning SET_PREDICATE_RESULT on dg2 When predication is enabled all commands baring a few (such as MI_BB_END) are nop'ed. If we accidentally enable predication while poisoning the context, not only is the rest of the poisoning skipped (thus disabling the test), but the closing instructions of the poison request are nop'ed. Not only do we then not signal the waiting context, but we even prevent re-enabling arbitration and the GPU will not perform a context switch at the end of the request. Cc: Joonas Lahtinen Suggested-by: CQ Tang Signed-off-by: Chris Wilson Signed-off-by: Ramalingam C Reviewed-by: Matthew Auld Link: https://patchwork.freedesktop.org/patch/msgid/20220425152317.4275-3-ramalingam.c@intel.com --- drivers/gpu/drm/i915/gt/intel_engine_regs.h | 1 + drivers/gpu/drm/i915/gt/selftest_lrc.c | 17 ++++++++++++++++- 2 files changed, 17 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gt/intel_engine_regs.h b/drivers/gpu/drm/i915/gt/intel_engine_regs.h index 594a629cb28f..1dab554bf640 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_regs.h +++ b/drivers/gpu/drm/i915/gt/intel_engine_regs.h @@ -193,6 +193,7 @@ #define RING_TIMESTAMP_UDW(base) _MMIO((base) + 0x358 + 4) #define RING_CONTEXT_STATUS_PTR(base) _MMIO((base) + 0x3a0) #define RING_CTX_TIMESTAMP(base) _MMIO((base) + 0x3a8) /* gen8+ */ +#define RING_PREDICATE_RESULT(base) _MMIO((base) + 0x3b8) #define RING_FORCE_TO_NONPRIV(base, i) _MMIO(((base) + 0x4D0) + (i) * 4) #define RING_FORCE_TO_NONPRIV_ADDRESS_MASK REG_GENMASK(25, 2) #define RING_FORCE_TO_NONPRIV_ACCESS_RW (0 << 28) /* CFL+ & Gen11+ */ diff --git a/drivers/gpu/drm/i915/gt/selftest_lrc.c b/drivers/gpu/drm/i915/gt/selftest_lrc.c index 8dc7b88cdca0..8b2c11dbe354 100644 --- a/drivers/gpu/drm/i915/gt/selftest_lrc.c +++ b/drivers/gpu/drm/i915/gt/selftest_lrc.c @@ -945,6 +945,19 @@ create_user_vma(struct i915_address_space *vm, unsigned long size) return vma; } +static u32 safe_poison(u32 offset, u32 poison) +{ + /* + * Do not enable predication as it will nop all subsequent commands, + * not only disabling the tests (by preventing all the other SRM) but + * also preventing the arbitration events at the end of the request. + */ + if (offset == i915_mmio_reg_offset(RING_PREDICATE_RESULT(0))) + poison &= ~REG_BIT(0); + + return poison; +} + static struct i915_vma * store_context(struct intel_context *ce, struct i915_vma *scratch) { @@ -1154,7 +1167,9 @@ static struct i915_vma *load_context(struct intel_context *ce, u32 poison) *cs++ = MI_LOAD_REGISTER_IMM(len); while (len--) { *cs++ = hw[dw]; - *cs++ = poison; + *cs++ = safe_poison(hw[dw] & get_lri_mask(ce->engine, + MI_LRI_LRM_CS_MMIO), + poison); dw += 2; } } while (dw < PAGE_SIZE / sizeof(u32) && From 166c44e6949a05b2e61024538007a8f983afd704 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 25 Apr 2022 20:53:17 +0530 Subject: [PATCH 09/16] drm/i915/gt: Clear SET_PREDICATE_RESULT prior to executing the ring Userspace may leave predication enabled upon return from the batch buffer, which has the consequent of preventing all operation from the ring from being executed, including all the synchronisation, coherency control, arbitration and user signaling. This is more than just a local gpu hang in one client, as the user has the ability to prevent the kernel from applying critical workarounds and can cause a full GT reset. We could simply execute MI_SET_PREDICATE upon return from the user batch, but this has the repercussion of modifying the user's context state. Instead, we opt to execute a fixup batch which by mixing predicated operations can determine the state of the SET_PREDICATE_RESULT register and restore it prior to the next userspace batch. This allows us to protect the kernel's ring without changing the uABI. Suggested-by: Zbigniew Kempczynski Signed-off-by: Chris Wilson Cc: Zbigniew Kempczynski Cc: Thomas Hellstrom Signed-off-by: Ramalingam C Reviewed-by: Matthew Auld Link: https://patchwork.freedesktop.org/patch/msgid/20220425152317.4275-4-ramalingam.c@intel.com --- drivers/gpu/drm/i915/gt/gen8_engine_cs.c | 54 +++++++++++++ drivers/gpu/drm/i915/gt/gen8_engine_cs.h | 7 ++ drivers/gpu/drm/i915/gt/intel_engine_regs.h | 1 + .../drm/i915/gt/intel_execlists_submission.c | 15 +++- drivers/gpu/drm/i915/gt/intel_gpu_commands.h | 2 + drivers/gpu/drm/i915/gt/intel_lrc.c | 75 ++++++++++++++----- drivers/gpu/drm/i915/gt/intel_lrc.h | 5 ++ .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 2 + 8 files changed, 137 insertions(+), 24 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/gen8_engine_cs.c b/drivers/gpu/drm/i915/gt/gen8_engine_cs.c index 9529c5455bc3..3e13960615bd 100644 --- a/drivers/gpu/drm/i915/gt/gen8_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/gen8_engine_cs.c @@ -5,6 +5,7 @@ #include "gen8_engine_cs.h" #include "i915_drv.h" +#include "intel_engine_regs.h" #include "intel_gpu_commands.h" #include "intel_lrc.h" #include "intel_ring.h" @@ -385,6 +386,59 @@ int gen8_emit_init_breadcrumb(struct i915_request *rq) return 0; } +static int __gen125_emit_bb_start(struct i915_request *rq, + u64 offset, u32 len, + const unsigned int flags, + u32 arb) +{ + struct intel_context *ce = rq->context; + u32 wa_offset = lrc_indirect_bb(ce); + u32 *cs; + + cs = intel_ring_begin(rq, 12); + if (IS_ERR(cs)) + return PTR_ERR(cs); + + *cs++ = MI_ARB_ON_OFF | arb; + + *cs++ = MI_LOAD_REGISTER_MEM_GEN8 | + MI_SRM_LRM_GLOBAL_GTT | + MI_LRI_LRM_CS_MMIO; + *cs++ = i915_mmio_reg_offset(RING_PREDICATE_RESULT(0)); + *cs++ = wa_offset + DG2_PREDICATE_RESULT_WA; + *cs++ = 0; + + *cs++ = MI_BATCH_BUFFER_START_GEN8 | + (flags & I915_DISPATCH_SECURE ? 0 : BIT(8)); + *cs++ = lower_32_bits(offset); + *cs++ = upper_32_bits(offset); + + /* Fixup stray MI_SET_PREDICATE as it prevents us executing the ring */ + *cs++ = MI_BATCH_BUFFER_START_GEN8; + *cs++ = wa_offset + DG2_PREDICATE_RESULT_BB; + *cs++ = 0; + + *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE; + + intel_ring_advance(rq, cs); + + return 0; +} + +int gen125_emit_bb_start_noarb(struct i915_request *rq, + u64 offset, u32 len, + const unsigned int flags) +{ + return __gen125_emit_bb_start(rq, offset, len, flags, MI_ARB_DISABLE); +} + +int gen125_emit_bb_start(struct i915_request *rq, + u64 offset, u32 len, + const unsigned int flags) +{ + return __gen125_emit_bb_start(rq, offset, len, flags, MI_ARB_ENABLE); +} + int gen8_emit_bb_start_noarb(struct i915_request *rq, u64 offset, u32 len, const unsigned int flags) diff --git a/drivers/gpu/drm/i915/gt/gen8_engine_cs.h b/drivers/gpu/drm/i915/gt/gen8_engine_cs.h index 107ab42539ab..32e3d2b831bb 100644 --- a/drivers/gpu/drm/i915/gt/gen8_engine_cs.h +++ b/drivers/gpu/drm/i915/gt/gen8_engine_cs.h @@ -31,6 +31,13 @@ int gen8_emit_bb_start(struct i915_request *rq, u64 offset, u32 len, const unsigned int flags); +int gen125_emit_bb_start_noarb(struct i915_request *rq, + u64 offset, u32 len, + const unsigned int flags); +int gen125_emit_bb_start(struct i915_request *rq, + u64 offset, u32 len, + const unsigned int flags); + u32 *gen8_emit_fini_breadcrumb_xcs(struct i915_request *rq, u32 *cs); u32 *gen12_emit_fini_breadcrumb_xcs(struct i915_request *rq, u32 *cs); diff --git a/drivers/gpu/drm/i915/gt/intel_engine_regs.h b/drivers/gpu/drm/i915/gt/intel_engine_regs.h index 1dab554bf640..75a0c55c5aa5 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_regs.h +++ b/drivers/gpu/drm/i915/gt/intel_engine_regs.h @@ -148,6 +148,7 @@ (REG_FIELD_PREP(CMD_CCTL_WRITE_OVERRIDE_MASK, (write) << 1) | \ REG_FIELD_PREP(CMD_CCTL_READ_OVERRIDE_MASK, (read) << 1)) +#define RING_PREDICATE_RESULT(base) _MMIO((base) + 0x3b8) /* gen12+ */ #define MI_PREDICATE_RESULT_2(base) _MMIO((base) + 0x3bc) #define LOWER_SLICE_ENABLED (1 << 0) #define LOWER_SLICE_DISABLED (0 << 0) diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c index f8749c433b7c..86f7a9ac1c39 100644 --- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c +++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c @@ -3433,10 +3433,17 @@ logical_ring_default_vfuncs(struct intel_engine_cs *engine) } } - if (intel_engine_has_preemption(engine)) - engine->emit_bb_start = gen8_emit_bb_start; - else - engine->emit_bb_start = gen8_emit_bb_start_noarb; + if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 50)) { + if (intel_engine_has_preemption(engine)) + engine->emit_bb_start = gen125_emit_bb_start; + else + engine->emit_bb_start = gen125_emit_bb_start_noarb; + } else { + if (intel_engine_has_preemption(engine)) + engine->emit_bb_start = gen8_emit_bb_start; + else + engine->emit_bb_start = gen8_emit_bb_start_noarb; + } engine->busyness = execlists_engine_busyness; } diff --git a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h index e52718a87f14..556bca3be804 100644 --- a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h +++ b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h @@ -39,6 +39,8 @@ #define MI_GLOBAL_GTT (1<<22) #define MI_NOOP MI_INSTR(0, 0) +#define MI_SET_PREDICATE MI_INSTR(0x01, 0) +#define MI_SET_PREDICATE_DISABLE (0 << 0) #define MI_USER_INTERRUPT MI_INSTR(0x02, 0) #define MI_WAIT_FOR_EVENT MI_INSTR(0x03, 0) #define MI_WAIT_FOR_OVERLAY_FLIP (1<<16) diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c index 3f83a9038e13..eec73c66406c 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.c +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c @@ -904,6 +904,24 @@ check_redzone(const void *vaddr, const struct intel_engine_cs *engine) engine->name); } +static u32 context_wa_bb_offset(const struct intel_context *ce) +{ + return PAGE_SIZE * ce->wa_bb_page; +} + +static u32 *context_indirect_bb(const struct intel_context *ce) +{ + void *ptr; + + GEM_BUG_ON(!ce->wa_bb_page); + + ptr = ce->lrc_reg_state; + ptr -= LRC_STATE_OFFSET; /* back to start of context image */ + ptr += context_wa_bb_offset(ce); + + return ptr; +} + void lrc_init_state(struct intel_context *ce, struct intel_engine_cs *engine, void *state) @@ -922,6 +940,10 @@ void lrc_init_state(struct intel_context *ce, /* Clear the ppHWSP (inc. per-context counters) */ memset(state, 0, PAGE_SIZE); + /* Clear the indirect wa and storage */ + if (ce->wa_bb_page) + memset(state + context_wa_bb_offset(ce), 0, PAGE_SIZE); + /* * The second page of the context object contains some registers which * must be set up prior to the first execution. @@ -929,6 +951,35 @@ void lrc_init_state(struct intel_context *ce, __lrc_init_regs(state + LRC_STATE_OFFSET, ce, engine, inhibit); } +u32 lrc_indirect_bb(const struct intel_context *ce) +{ + return i915_ggtt_offset(ce->state) + context_wa_bb_offset(ce); +} + +static u32 *setup_predicate_disable_wa(const struct intel_context *ce, u32 *cs) +{ + /* If predication is active, this will be noop'ed */ + *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT | (4 - 2); + *cs++ = lrc_indirect_bb(ce) + DG2_PREDICATE_RESULT_WA; + *cs++ = 0; + *cs++ = 0; /* No predication */ + + /* predicated end, only terminates if SET_PREDICATE_RESULT:0 is clear */ + *cs++ = MI_BATCH_BUFFER_END | BIT(15); + *cs++ = MI_SET_PREDICATE | MI_SET_PREDICATE_DISABLE; + + /* Instructions are no longer predicated (disabled), we can proceed */ + *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT | (4 - 2); + *cs++ = lrc_indirect_bb(ce) + DG2_PREDICATE_RESULT_WA; + *cs++ = 0; + *cs++ = 1; /* enable predication before the next BB */ + + *cs++ = MI_BATCH_BUFFER_END; + GEM_BUG_ON(offset_in_page(cs) > DG2_PREDICATE_RESULT_WA); + + return cs; +} + static struct i915_vma * __lrc_alloc_state(struct intel_context *ce, struct intel_engine_cs *engine) { @@ -1240,24 +1291,6 @@ gen12_emit_indirect_ctx_xcs(const struct intel_context *ce, u32 *cs) return cs; } -static u32 context_wa_bb_offset(const struct intel_context *ce) -{ - return PAGE_SIZE * ce->wa_bb_page; -} - -static u32 *context_indirect_bb(const struct intel_context *ce) -{ - void *ptr; - - GEM_BUG_ON(!ce->wa_bb_page); - - ptr = ce->lrc_reg_state; - ptr -= LRC_STATE_OFFSET; /* back to start of context image */ - ptr += context_wa_bb_offset(ce); - - return ptr; -} - static void setup_indirect_ctx_bb(const struct intel_context *ce, const struct intel_engine_cs *engine, @@ -1271,9 +1304,11 @@ setup_indirect_ctx_bb(const struct intel_context *ce, while ((unsigned long)cs % CACHELINE_BYTES) *cs++ = MI_NOOP; + GEM_BUG_ON(cs - start > DG2_PREDICATE_RESULT_BB / sizeof(*start)); + setup_predicate_disable_wa(ce, start + DG2_PREDICATE_RESULT_BB / sizeof(*start)); + lrc_setup_indirect_ctx(ce->lrc_reg_state, engine, - i915_ggtt_offset(ce->state) + - context_wa_bb_offset(ce), + lrc_indirect_bb(ce), (cs - start) * sizeof(*cs)); } diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.h b/drivers/gpu/drm/i915/gt/intel_lrc.h index 7371bb5c8129..31be734010db 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.h +++ b/drivers/gpu/drm/i915/gt/intel_lrc.h @@ -145,4 +145,9 @@ static inline void lrc_runtime_stop(struct intel_context *ce) WRITE_ONCE(stats->active, 0); } +#define DG2_PREDICATE_RESULT_WA (PAGE_SIZE - sizeof(u64)) +#define DG2_PREDICATE_RESULT_BB (2048) + +u32 lrc_indirect_bb(const struct intel_context *ce); + #endif /* __INTEL_LRC_H__ */ diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c index 33e695adfd6a..75291e9846c5 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c @@ -3897,6 +3897,8 @@ static void guc_default_vfuncs(struct intel_engine_cs *engine) */ engine->emit_bb_start = gen8_emit_bb_start; + if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 50)) + engine->emit_bb_start = gen125_emit_bb_start; } static void rcs_submission_override(struct intel_engine_cs *engine) From b8c9d486af7b462d117f92ecc5afb4d406b74d15 Mon Sep 17 00:00:00 2001 From: Ramalingam C Date: Mon, 2 May 2022 19:56:17 +0530 Subject: [PATCH 10/16] drm/i915/gt: optimize the ccs_sz calculation per chunk Calculate the ccs_sz that needs to be emitted based on the src and dst pages emitted per chunk. And handle the return value of emit_pte for the ccs pages. v2: ccs_sz moved to the reduced scope [Matt] Signed-off-by: Ramalingam C Reviewed-by: Matthew Auld Link: https://patchwork.freedesktop.org/patch/msgid/20220502142618.2704-3-ramalingam.c@intel.com --- drivers/gpu/drm/i915/gt/intel_migrate.c | 36 +++++++++---------------- 1 file changed, 13 insertions(+), 23 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_migrate.c b/drivers/gpu/drm/i915/gt/intel_migrate.c index 9d552f30b627..f80cea35b109 100644 --- a/drivers/gpu/drm/i915/gt/intel_migrate.c +++ b/drivers/gpu/drm/i915/gt/intel_migrate.c @@ -647,17 +647,9 @@ static int scatter_list_length(struct scatterlist *sg) static void calculate_chunk_sz(struct drm_i915_private *i915, bool src_is_lmem, - int *src_sz, int *ccs_sz, u32 bytes_to_cpy, - u32 ccs_bytes_to_cpy) + int *src_sz, u32 bytes_to_cpy, u32 ccs_bytes_to_cpy) { if (ccs_bytes_to_cpy) { - /* - * We can only copy the ccs data corresponding to - * the CHUNK_SZ of lmem which is - * GET_CCS_BYTES(i915, CHUNK_SZ)) - */ - *ccs_sz = min_t(int, ccs_bytes_to_cpy, GET_CCS_BYTES(i915, CHUNK_SZ)); - if (!src_is_lmem) /* * When CHUNK_SZ is passed all the pages upto CHUNK_SZ @@ -707,10 +699,10 @@ intel_context_migrate_copy(struct intel_context *ce, struct drm_i915_private *i915 = ce->engine->i915; u32 ccs_bytes_to_cpy = 0, bytes_to_cpy; enum i915_cache_level ccs_cache_level; - int src_sz, dst_sz, ccs_sz; u32 src_offset, dst_offset; u8 src_access, dst_access; struct i915_request *rq; + int src_sz, dst_sz; bool ccs_is_src; int err; @@ -791,7 +783,7 @@ intel_context_migrate_copy(struct intel_context *ce, if (err) goto out_rq; - calculate_chunk_sz(i915, src_is_lmem, &src_sz, &ccs_sz, + calculate_chunk_sz(i915, src_is_lmem, &src_sz, bytes_to_cpy, ccs_bytes_to_cpy); len = emit_pte(rq, &it_src, src_cache_level, src_is_lmem, @@ -825,37 +817,35 @@ intel_context_migrate_copy(struct intel_context *ce, bytes_to_cpy -= len; if (ccs_bytes_to_cpy) { + int ccs_sz; + err = rq->engine->emit_flush(rq, EMIT_INVALIDATE); if (err) goto out_rq; + ccs_sz = GET_CCS_BYTES(i915, len); err = emit_pte(rq, &it_ccs, ccs_cache_level, false, ccs_is_src ? src_offset : dst_offset, ccs_sz); + if (err < 0) + goto out_rq; + if (err < ccs_sz) { + err = -EINVAL; + goto out_rq; + } err = rq->engine->emit_flush(rq, EMIT_INVALIDATE); if (err) goto out_rq; - /* - * Using max of src_sz and dst_sz, as we need to - * pass the lmem size corresponding to the ccs - * blocks we need to handle. - */ - ccs_sz = max_t(int, ccs_is_src ? ccs_sz : src_sz, - ccs_is_src ? dst_sz : ccs_sz); - err = emit_copy_ccs(rq, dst_offset, dst_access, - src_offset, src_access, ccs_sz); + src_offset, src_access, len); if (err) goto out_rq; err = rq->engine->emit_flush(rq, EMIT_INVALIDATE); if (err) goto out_rq; - - /* Converting back to ccs bytes */ - ccs_sz = GET_CCS_BYTES(rq->engine->i915, ccs_sz); ccs_bytes_to_cpy -= ccs_sz; } From 6e29832f61a055638c8d9f3777ceb2d85f4b8875 Mon Sep 17 00:00:00 2001 From: Ramalingam C Date: Mon, 2 May 2022 19:56:18 +0530 Subject: [PATCH 11/16] drm/i915/gt: Document the eviction of the Flat-CCS objects Capture the eviction details for Flat-CCS capable, lmem objects. v2: Fix the Flat-ccs capbility of lmem obj with smem residency possibility [Thomas] v3: Fixed the suggestions [Matt] Signed-off-by: Ramalingam C cc: Thomas Hellstrom cc: Matthew Auld Reviewed-by: Matthew Auld Link: https://patchwork.freedesktop.org/patch/msgid/20220502142618.2704-4-ramalingam.c@intel.com --- drivers/gpu/drm/i915/gt/intel_migrate.c | 23 ++++++++++++++--------- 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_migrate.c b/drivers/gpu/drm/i915/gt/intel_migrate.c index f80cea35b109..2c35324b5f68 100644 --- a/drivers/gpu/drm/i915/gt/intel_migrate.c +++ b/drivers/gpu/drm/i915/gt/intel_migrate.c @@ -485,16 +485,21 @@ static bool wa_1209644611_applies(int ver, u32 size) * And CCS data can be copied in and out of CCS region through * XY_CTRL_SURF_COPY_BLT. CPU can't access the CCS data directly. * - * When we exhaust the lmem, if the object's placements support smem, then we can - * directly decompress the compressed lmem object into smem and start using it - * from smem itself. + * I915 supports Flat-CCS on lmem only objects. When an objects has smem in + * its preference list, on memory pressure, i915 needs to migrate the lmem + * content into smem. If the lmem object is Flat-CCS compressed by userspace, + * then i915 needs to decompress it. But I915 lack the required information + * for such decompression. Hence I915 supports Flat-CCS only on lmem only objects. * - * But when we need to swapout the compressed lmem object into a smem region - * though objects' placement doesn't support smem, then we copy the lmem content - * as it is into smem region along with ccs data (using XY_CTRL_SURF_COPY_BLT). - * When the object is referred, lmem content will be swaped in along with - * restoration of the CCS data (using XY_CTRL_SURF_COPY_BLT) at corresponding - * location. + * When we exhaust the lmem, Flat-CCS capable objects' lmem backing memory can + * be temporarily evicted to smem, along with the auxiliary CCS state, where + * it can be potentially swapped-out at a later point, if required. + * If userspace later touches the evicted pages, then we always move + * the backing memory back to lmem, which includes restoring the saved CCS state, + * and potentially performing any required swap-in. + * + * For the migration of the lmem objects with smem in placement list, such as + * {lmem, smem}, objects are treated as non Flat-CCS capable objects. */ static inline u32 *i915_flush_dw(u32 *cmd, u32 flags) From 10dcf783f7e986a36ec2d6d07d79360ddd4a2e4f Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Mon, 25 Apr 2022 18:47:54 +0300 Subject: [PATCH 12/16] drm/i915: remove superfluous string helper include Remove the duplicate and incorrect (uses "" instead of <>) linux/string_helpers.h include. Fixes: cc1338f259a2 ("drm/i915/xehp: Update topology dumps for Xe_HP") Cc: Matt Roper Cc: Lucas De Marchi Signed-off-by: Jani Nikula Reviewed-by: Matt Roper Link: https://patchwork.freedesktop.org/patch/msgid/20220425154754.990815-1-jani.nikula@intel.com --- drivers/gpu/drm/i915/gt/intel_sseu.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_sseu.c b/drivers/gpu/drm/i915/gt/intel_sseu.c index 9881a6790574..fdd25691beda 100644 --- a/drivers/gpu/drm/i915/gt/intel_sseu.c +++ b/drivers/gpu/drm/i915/gt/intel_sseu.c @@ -10,8 +10,6 @@ #include "intel_gt_regs.h" #include "intel_sseu.h" -#include "linux/string_helpers.h" - void intel_sseu_set_info(struct sseu_dev_info *sseu, u8 max_slices, u8 max_subslices, u8 max_eus_per_subslice) { From 0de2cc0e2b5faff50d36552ba174d525ca1e8fb3 Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Fri, 29 Apr 2022 15:07:57 +0100 Subject: [PATCH 13/16] drm/i915: Fix assert in i915_ggtt_pin MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Use lockdep_assert_not_held to simplify and correct the code. Otherwise false positive are hit if lock state is uknown like after a previous taint. Signed-off-by: Tvrtko Ursulin Reported-by: Ville Syrjälä Reviewed-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20220429140757.651406-1-tvrtko.ursulin@linux.intel.com --- drivers/gpu/drm/i915/i915_vma.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c index 033ef6916e26..cb3c9435a608 100644 --- a/drivers/gpu/drm/i915/i915_vma.c +++ b/drivers/gpu/drm/i915/i915_vma.c @@ -1562,9 +1562,7 @@ int i915_ggtt_pin(struct i915_vma *vma, struct i915_gem_ww_ctx *ww, if (ww) return __i915_ggtt_pin(vma, ww, align, flags); -#ifdef CONFIG_LOCKDEP - WARN_ON(dma_resv_held(vma->obj->base.resv)); -#endif + lockdep_assert_not_held(&vma->obj->base.resv->lock.base); for_i915_gem_ww(&_ww, err, true) { err = i915_gem_object_lock(vma->obj, &_ww); From 448a54ace4bb20216f5bfcecf272871d387d03dd Mon Sep 17 00:00:00 2001 From: Stuart Summers Date: Mon, 2 May 2022 09:34:07 -0700 Subject: [PATCH 14/16] drm/i915/pvc: add initial Ponte Vecchio definitions Additional blitter and media engines will be enabled later. Bspec: 44481, 44482 Signed-off-by: Stuart Summers Signed-off-by: Matt Roper Reviewed-by: Lucas De Marchi Link: https://patchwork.freedesktop.org/patch/msgid/20220502163417.2635462-2-matthew.d.roper@intel.com --- drivers/gpu/drm/i915/i915_drv.h | 2 ++ drivers/gpu/drm/i915/i915_pci.c | 21 +++++++++++++++++++++ drivers/gpu/drm/i915/intel_device_info.c | 1 + drivers/gpu/drm/i915/intel_device_info.h | 1 + 4 files changed, 25 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index a6cf9716d6aa..3ed9021c615d 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1059,6 +1059,8 @@ IS_SUBPLATFORM(const struct drm_i915_private *i915, #define IS_ALDERLAKE_P(dev_priv) IS_PLATFORM(dev_priv, INTEL_ALDERLAKE_P) #define IS_XEHPSDV(dev_priv) IS_PLATFORM(dev_priv, INTEL_XEHPSDV) #define IS_DG2(dev_priv) IS_PLATFORM(dev_priv, INTEL_DG2) +#define IS_PONTEVECCHIO(dev_priv) IS_PLATFORM(dev_priv, INTEL_PONTEVECCHIO) + #define IS_DG2_G10(dev_priv) \ IS_SUBPLATFORM(dev_priv, INTEL_DG2, INTEL_SUBPLATFORM_G10) #define IS_DG2_G11(dev_priv) \ diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c index 2efd2201359e..987bdeb090a5 100644 --- a/drivers/gpu/drm/i915/i915_pci.c +++ b/drivers/gpu/drm/i915/i915_pci.c @@ -1076,6 +1076,27 @@ static const struct intel_device_info ats_m_info = { .require_force_probe = 1, }; +#define XE_HPC_FEATURES \ + XE_HP_FEATURES, \ + .dma_mask_size = 52 + +__maybe_unused +static const struct intel_device_info pvc_info = { + XE_HPC_FEATURES, + XE_HPM_FEATURES, + DGFX_FEATURES, + .graphics.rel = 60, + .media.rel = 60, + PLATFORM(INTEL_PONTEVECCHIO), + .display = { 0 }, + .has_flat_ccs = 0, + .platform_engine_mask = + BIT(BCS0) | + BIT(VCS0) | + BIT(CCS0) | BIT(CCS1) | BIT(CCS2) | BIT(CCS3), + .require_force_probe = 1, +}; + #undef PLATFORM /* diff --git a/drivers/gpu/drm/i915/intel_device_info.c b/drivers/gpu/drm/i915/intel_device_info.c index 41a5b98d1342..b0e62a411534 100644 --- a/drivers/gpu/drm/i915/intel_device_info.c +++ b/drivers/gpu/drm/i915/intel_device_info.c @@ -72,6 +72,7 @@ static const char * const platform_names[] = { PLATFORM_NAME(ALDERLAKE_P), PLATFORM_NAME(XEHPSDV), PLATFORM_NAME(DG2), + PLATFORM_NAME(PONTEVECCHIO), }; #undef PLATFORM_NAME diff --git a/drivers/gpu/drm/i915/intel_device_info.h b/drivers/gpu/drm/i915/intel_device_info.h index 576d15a04c9e..ec0b8095e7fa 100644 --- a/drivers/gpu/drm/i915/intel_device_info.h +++ b/drivers/gpu/drm/i915/intel_device_info.h @@ -88,6 +88,7 @@ enum intel_platform { INTEL_ALDERLAKE_P, INTEL_XEHPSDV, INTEL_DG2, + INTEL_PONTEVECCHIO, INTEL_MAX_PLATFORMS }; From ea3ce08cb42b8a4d482282842dd93ae4a00d2506 Mon Sep 17 00:00:00 2001 From: Kefeng Wang Date: Tue, 3 May 2022 15:49:37 +0100 Subject: [PATCH 15/16] drm/i915: use IOMEM_ERR_PTR() directly Use IOMEM_ERR_PTR() instead of self defined IO_ERR_PTR(). Signed-off-by: Kefeng Wang Reviewed-by: Jani Nikula Signed-off-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20220503144937.679424-1-tvrtko.ursulin@linux.intel.com --- drivers/gpu/drm/i915/i915_vma.c | 4 ++-- drivers/gpu/drm/i915/i915_vma.h | 1 - 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c index cb3c9435a608..3b6482f43d6a 100644 --- a/drivers/gpu/drm/i915/i915_vma.c +++ b/drivers/gpu/drm/i915/i915_vma.c @@ -548,7 +548,7 @@ void __iomem *i915_vma_pin_iomap(struct i915_vma *vma) int err; if (WARN_ON_ONCE(vma->obj->flags & I915_BO_ALLOC_GPU_ONLY)) - return IO_ERR_PTR(-EINVAL); + return IOMEM_ERR_PTR(-EINVAL); if (!i915_gem_object_is_lmem(vma->obj)) { if (GEM_WARN_ON(!i915_vma_is_map_and_fenceable(vma))) { @@ -601,7 +601,7 @@ void __iomem *i915_vma_pin_iomap(struct i915_vma *vma) err_unpin: __i915_vma_unpin(vma); err: - return IO_ERR_PTR(err); + return IOMEM_ERR_PTR(err); } void i915_vma_flush_writes(struct i915_vma *vma) diff --git a/drivers/gpu/drm/i915/i915_vma.h b/drivers/gpu/drm/i915/i915_vma.h index 6034991d89fe..88ca0bd9c900 100644 --- a/drivers/gpu/drm/i915/i915_vma.h +++ b/drivers/gpu/drm/i915/i915_vma.h @@ -317,7 +317,6 @@ static inline bool i915_node_color_differs(const struct drm_mm_node *node, * Returns a valid iomapped pointer or ERR_PTR. */ void __iomem *i915_vma_pin_iomap(struct i915_vma *vma); -#define IO_ERR_PTR(x) ((void __iomem *)ERR_PTR(x)) /** * i915_vma_unpin_iomap - unpins the mapping returned from i915_vma_iomap From 1df1c79cbb7ac9bf148930be3418973c76ba8dde Mon Sep 17 00:00:00 2001 From: Karol Herbst Date: Wed, 20 Apr 2022 11:57:20 +0200 Subject: [PATCH 16/16] drm/i915: Fix race in __i915_vma_remove_closed i915_vma_reopen checked if the vma is closed before without taking the lock. So multiple threads could attempt removing the vma. Instead the lock needs to be taken before actually checking. v2: move struct declaration Cc: Chris Wilson Cc: intel-gfx@lists.freedesktop.org Cc: dri-devel@lists.freedesktop.org Cc: # v5.3+ Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/5732 Signed-off-by: Karol Herbst Fixes: 155ab8836caa ("drm/i915: Move object close under its own lock") Reviewed-by: Tvrtko Ursulin Signed-off-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20220420095720.3331609-1-kherbst@redhat.com --- drivers/gpu/drm/i915/i915_vma.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c index 3b6482f43d6a..79c286f85413 100644 --- a/drivers/gpu/drm/i915/i915_vma.c +++ b/drivers/gpu/drm/i915/i915_vma.c @@ -1610,17 +1610,17 @@ void i915_vma_close(struct i915_vma *vma) static void __i915_vma_remove_closed(struct i915_vma *vma) { - struct intel_gt *gt = vma->vm->gt; - - spin_lock_irq(>->closed_lock); list_del_init(&vma->closed_link); - spin_unlock_irq(>->closed_lock); } void i915_vma_reopen(struct i915_vma *vma) { + struct intel_gt *gt = vma->vm->gt; + + spin_lock_irq(>->closed_lock); if (i915_vma_is_closed(vma)) __i915_vma_remove_closed(vma); + spin_unlock_irq(>->closed_lock); } static void force_unbind(struct i915_vma *vma) @@ -1636,6 +1636,7 @@ static void force_unbind(struct i915_vma *vma) static void release_references(struct i915_vma *vma, bool vm_ddestroy) { struct drm_i915_gem_object *obj = vma->obj; + struct intel_gt *gt = vma->vm->gt; GEM_BUG_ON(i915_vma_is_active(vma)); @@ -1646,7 +1647,9 @@ static void release_references(struct i915_vma *vma, bool vm_ddestroy) spin_unlock(&obj->vma.lock); + spin_lock_irq(>->closed_lock); __i915_vma_remove_closed(vma); + spin_unlock_irq(>->closed_lock); if (vm_ddestroy) i915_vm_resv_put(vma->vm);