From ddb24fc525ddaf35130d96478f3f18682a9d5926 Mon Sep 17 00:00:00 2001 From: Nirmoy Das Date: Tue, 4 Apr 2023 16:30:56 +0200 Subject: [PATCH 001/108] drm/i915/ttm: Add I915_BO_PREALLOC MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add a mechanism to preserve existing data when creating a TTM object with the I915_BO_ALLOC_USER flag. This will be used in the subsequent patch where the I915_BO_ALLOC_USER flag will be applied to the framebuffer object. For a pre-allocated framebuffer without the I915_BO_PREALLOC flag. TTM would clear the content, which is not desirable. Cc: Matthew Auld Cc: Andi Shyti Cc: Andrzej Hajda Cc: Ville Syrjälä Cc: Jani Nikula Cc: Imre Deak Signed-off-by: Nirmoy Das Reviewed-by: Andrzej Hajda Reviewed-by: Andi Shyti Link: https://patchwork.freedesktop.org/patch/msgid/20230404143100.10452-1-nirmoy.das@intel.com --- drivers/gpu/drm/i915/gem/i915_gem_object_types.h | 15 +++++++++++---- drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c | 5 +++-- 2 files changed, 14 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h index 19c9bdd8f905..8540edff164b 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h @@ -328,6 +328,12 @@ struct drm_i915_gem_object { */ #define I915_BO_ALLOC_GPU_ONLY BIT(6) #define I915_BO_ALLOC_CCS_AUX BIT(7) +/* + * Object is allowed to retain its initial data and will not be cleared on first + * access if used along with I915_BO_ALLOC_USER. This is mainly to keep + * preallocated framebuffer data intact while transitioning it to i915drmfb. + */ +#define I915_BO_PREALLOC BIT(8) #define I915_BO_ALLOC_FLAGS (I915_BO_ALLOC_CONTIGUOUS | \ I915_BO_ALLOC_VOLATILE | \ I915_BO_ALLOC_CPU_CLEAR | \ @@ -335,10 +341,11 @@ struct drm_i915_gem_object { I915_BO_ALLOC_PM_VOLATILE | \ I915_BO_ALLOC_PM_EARLY | \ I915_BO_ALLOC_GPU_ONLY | \ - I915_BO_ALLOC_CCS_AUX) -#define I915_BO_READONLY BIT(8) -#define I915_TILING_QUIRK_BIT 9 /* unknown swizzling; do not release! */ -#define I915_BO_PROTECTED BIT(10) + I915_BO_ALLOC_CCS_AUX | \ + I915_BO_PREALLOC) +#define I915_BO_READONLY BIT(9) +#define I915_TILING_QUIRK_BIT 10 /* unknown swizzling; do not release! */ +#define I915_BO_PROTECTED BIT(11) /** * @mem_flags - Mutable placement-related flags * diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c b/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c index 6e3de0f33add..46b398ccd250 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c @@ -560,7 +560,7 @@ int i915_ttm_move(struct ttm_buffer_object *bo, bool evict, struct dma_fence *migration_fence = NULL; struct ttm_tt *ttm = bo->ttm; struct i915_refct_sgt *dst_rsgt; - bool clear; + bool clear, prealloc_bo; int ret; if (GEM_WARN_ON(i915_ttm_is_ghost_object(bo))) { @@ -590,7 +590,8 @@ int i915_ttm_move(struct ttm_buffer_object *bo, bool evict, return PTR_ERR(dst_rsgt); clear = !i915_ttm_cpu_maps_iomem(bo->resource) && (!ttm || !ttm_tt_is_populated(ttm)); - if (!(clear && ttm && !(ttm->page_flags & TTM_TT_FLAG_ZERO_ALLOC))) { + prealloc_bo = obj->flags & I915_BO_PREALLOC; + if (!(clear && ttm && !((ttm->page_flags & TTM_TT_FLAG_ZERO_ALLOC) && !prealloc_bo))) { struct i915_deps deps; i915_deps_init(&deps, GFP_KERNEL | __GFP_NORETRY | __GFP_NOWARN); From ddb78a51fac65e8db2316ded59e27ab621aea856 Mon Sep 17 00:00:00 2001 From: Nirmoy Das Date: Tue, 4 Apr 2023 16:30:57 +0200 Subject: [PATCH 002/108] drm/i915/display: Set I915_BO_ALLOC_USER for fb MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Framebuffer is exposed to userspace so make sure we set proper flags for it. Set I915_BO_PREALLOC for prealloced fb so that ttm won't clear existing data. Cc: Matthew Auld Cc: Andi Shyti Cc: Andrzej Hajda Cc: Ville Syrjälä Cc: Jani Nikula Cc: Imre Deak Signed-off-by: Nirmoy Das Reviewed-by: Andi Shyti Reviewed-by: Andrzej Hajda Link: https://patchwork.freedesktop.org/patch/msgid/20230404143100.10452-2-nirmoy.das@intel.com --- drivers/gpu/drm/i915/display/intel_fbdev.c | 3 ++- drivers/gpu/drm/i915/display/intel_plane_initial.c | 4 +++- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_fbdev.c b/drivers/gpu/drm/i915/display/intel_fbdev.c index 03ed4607a46d..9ea49aaa33ca 100644 --- a/drivers/gpu/drm/i915/display/intel_fbdev.c +++ b/drivers/gpu/drm/i915/display/intel_fbdev.c @@ -163,7 +163,8 @@ static int intelfb_alloc(struct drm_fb_helper *helper, obj = ERR_PTR(-ENODEV); if (HAS_LMEM(dev_priv)) { obj = i915_gem_object_create_lmem(dev_priv, size, - I915_BO_ALLOC_CONTIGUOUS); + I915_BO_ALLOC_CONTIGUOUS | + I915_BO_ALLOC_USER); } else { /* * If the FB is too big, just don't use it since fbdev is not very diff --git a/drivers/gpu/drm/i915/display/intel_plane_initial.c b/drivers/gpu/drm/i915/display/intel_plane_initial.c index 76be796df255..3a95d24eab74 100644 --- a/drivers/gpu/drm/i915/display/intel_plane_initial.c +++ b/drivers/gpu/drm/i915/display/intel_plane_initial.c @@ -110,7 +110,9 @@ initial_plane_vma(struct drm_i915_private *i915, size * 2 > i915->stolen_usable_size) return NULL; - obj = i915_gem_object_create_region_at(mem, phys_base, size, 0); + obj = i915_gem_object_create_region_at(mem, phys_base, size, + I915_BO_ALLOC_USER | + I915_BO_PREALLOC); if (IS_ERR(obj)) return NULL; From eaee1c08586395182e0004b3512a2f83570ea461 Mon Sep 17 00:00:00 2001 From: Nirmoy Das Date: Tue, 4 Apr 2023 16:30:58 +0200 Subject: [PATCH 003/108] drm/i915: Add a function to mmap framebuffer obj MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Implement i915_gem_fb_mmap() to enable fb_ops.fb_mmap() callback for i915's framebuffer objects. v2: add a comment why i915_gem_object_get() needed(Andi). v3: mmap also ttm objects. Cc: Matthew Auld Cc: Andi Shyti Cc: Ville Syrjälä Cc: Jani Nikula Cc: Imre Deak Signed-off-by: Nirmoy Das Reviewed-by: Andi Shyti Link: https://patchwork.freedesktop.org/patch/msgid/20230404143100.10452-3-nirmoy.das@intel.com --- drivers/gpu/drm/i915/gem/i915_gem_mman.c | 137 +++++++++++++++-------- drivers/gpu/drm/i915/gem/i915_gem_mman.h | 2 +- 2 files changed, 93 insertions(+), 46 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/i915_gem_mman.c index 4f69bff63068..d036ef2dcdba 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_mman.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_mman.c @@ -927,53 +927,15 @@ static struct file *mmap_singleton(struct drm_i915_private *i915) return file; } -/* - * This overcomes the limitation in drm_gem_mmap's assignment of a - * drm_gem_object as the vma->vm_private_data. Since we need to - * be able to resolve multiple mmap offsets which could be tied - * to a single gem object. - */ -int i915_gem_mmap(struct file *filp, struct vm_area_struct *vma) +static int +i915_gem_object_mmap(struct drm_i915_gem_object *obj, + struct i915_mmap_offset *mmo, + struct vm_area_struct *vma) { - struct drm_vma_offset_node *node; - struct drm_file *priv = filp->private_data; - struct drm_device *dev = priv->minor->dev; - struct drm_i915_gem_object *obj = NULL; - struct i915_mmap_offset *mmo = NULL; + struct drm_i915_private *i915 = to_i915(obj->base.dev); + struct drm_device *dev = &i915->drm; struct file *anon; - if (drm_dev_is_unplugged(dev)) - return -ENODEV; - - rcu_read_lock(); - drm_vma_offset_lock_lookup(dev->vma_offset_manager); - node = drm_vma_offset_exact_lookup_locked(dev->vma_offset_manager, - vma->vm_pgoff, - vma_pages(vma)); - if (node && drm_vma_node_is_allowed(node, priv)) { - /* - * Skip 0-refcnted objects as it is in the process of being - * destroyed and will be invalid when the vma manager lock - * is released. - */ - if (!node->driver_private) { - mmo = container_of(node, struct i915_mmap_offset, vma_node); - obj = i915_gem_object_get_rcu(mmo->obj); - - GEM_BUG_ON(obj && obj->ops->mmap_ops); - } else { - obj = i915_gem_object_get_rcu - (container_of(node, struct drm_i915_gem_object, - base.vma_node)); - - GEM_BUG_ON(obj && !obj->ops->mmap_ops); - } - } - drm_vma_offset_unlock_lookup(dev->vma_offset_manager); - rcu_read_unlock(); - if (!obj) - return node ? -EACCES : -EINVAL; - if (i915_gem_object_is_readonly(obj)) { if (vma->vm_flags & VM_WRITE) { i915_gem_object_put(obj); @@ -1005,7 +967,7 @@ int i915_gem_mmap(struct file *filp, struct vm_area_struct *vma) if (obj->ops->mmap_ops) { vma->vm_page_prot = pgprot_decrypted(vm_get_page_prot(vma->vm_flags)); vma->vm_ops = obj->ops->mmap_ops; - vma->vm_private_data = node->driver_private; + vma->vm_private_data = obj->base.vma_node.driver_private; return 0; } @@ -1043,6 +1005,91 @@ int i915_gem_mmap(struct file *filp, struct vm_area_struct *vma) return 0; } +/* + * This overcomes the limitation in drm_gem_mmap's assignment of a + * drm_gem_object as the vma->vm_private_data. Since we need to + * be able to resolve multiple mmap offsets which could be tied + * to a single gem object. + */ +int i915_gem_mmap(struct file *filp, struct vm_area_struct *vma) +{ + struct drm_vma_offset_node *node; + struct drm_file *priv = filp->private_data; + struct drm_device *dev = priv->minor->dev; + struct drm_i915_gem_object *obj = NULL; + struct i915_mmap_offset *mmo = NULL; + + if (drm_dev_is_unplugged(dev)) + return -ENODEV; + + rcu_read_lock(); + drm_vma_offset_lock_lookup(dev->vma_offset_manager); + node = drm_vma_offset_exact_lookup_locked(dev->vma_offset_manager, + vma->vm_pgoff, + vma_pages(vma)); + if (node && drm_vma_node_is_allowed(node, priv)) { + /* + * Skip 0-refcnted objects as it is in the process of being + * destroyed and will be invalid when the vma manager lock + * is released. + */ + if (!node->driver_private) { + mmo = container_of(node, struct i915_mmap_offset, vma_node); + obj = i915_gem_object_get_rcu(mmo->obj); + + GEM_BUG_ON(obj && obj->ops->mmap_ops); + } else { + obj = i915_gem_object_get_rcu + (container_of(node, struct drm_i915_gem_object, + base.vma_node)); + + GEM_BUG_ON(obj && !obj->ops->mmap_ops); + } + } + drm_vma_offset_unlock_lookup(dev->vma_offset_manager); + rcu_read_unlock(); + if (!obj) + return node ? -EACCES : -EINVAL; + + return i915_gem_object_mmap(obj, mmo, vma); +} + +int i915_gem_fb_mmap(struct drm_i915_gem_object *obj, struct vm_area_struct *vma) +{ + struct drm_i915_private *i915 = to_i915(obj->base.dev); + struct drm_device *dev = &i915->drm; + struct i915_mmap_offset *mmo = NULL; + enum i915_mmap_type mmap_type; + struct i915_ggtt *ggtt = to_gt(i915)->ggtt; + + if (drm_dev_is_unplugged(dev)) + return -ENODEV; + + /* handle ttm object */ + if (obj->ops->mmap_ops) { + /* + * ttm fault handler, ttm_bo_vm_fault_reserved() uses fake offset + * to calculate page offset so set that up. + */ + vma->vm_pgoff += drm_vma_node_start(&obj->base.vma_node); + } else { + /* handle stolen and smem objects */ + mmap_type = i915_ggtt_has_aperture(ggtt) ? I915_MMAP_TYPE_GTT : I915_MMAP_TYPE_WC; + mmo = mmap_offset_attach(obj, mmap_type, NULL); + if (!mmo) + return -ENODEV; + } + + /* + * When we install vm_ops for mmap we are too late for + * the vm_ops->open() which increases the ref_count of + * this obj and then it gets decreased by the vm_ops->close(). + * To balance this increase the obj ref_count here. + */ + obj = i915_gem_object_get(obj); + return i915_gem_object_mmap(obj, mmo, vma); +} + #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) #include "selftests/i915_gem_mman.c" #endif diff --git a/drivers/gpu/drm/i915/gem/i915_gem_mman.h b/drivers/gpu/drm/i915/gem/i915_gem_mman.h index 1fa91b3033b3..196417fd0f5c 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_mman.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_mman.h @@ -29,5 +29,5 @@ void i915_gem_object_release_mmap_gtt(struct drm_i915_gem_object *obj); void i915_gem_object_runtime_pm_release_mmap_offset(struct drm_i915_gem_object *obj); void i915_gem_object_release_mmap_offset(struct drm_i915_gem_object *obj); - +int i915_gem_fb_mmap(struct drm_i915_gem_object *obj, struct vm_area_struct *vma); #endif From 63b685efaa4d6b9db388857a2e6f5f5f11454f8d Mon Sep 17 00:00:00 2001 From: Nirmoy Das Date: Tue, 4 Apr 2023 16:30:59 +0200 Subject: [PATCH 004/108] drm/i915/display: Add helper func to get intel_fbdev from drm_fb_helper MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add a helper function to retrieve struct intel_fbdev from struct drm_fb_helper. Cc: Matthew Auld Cc: Andi Shyti Cc: Ville Syrjälä Cc: Jani Nikula Cc: Imre Deak Signed-off-by: Nirmoy Das Reviewed-by: Jani Nikula Reviewed-by: Andi Shyti Reviewed-by: Andrzej Hajda Link: https://patchwork.freedesktop.org/patch/msgid/20230404143100.10452-4-nirmoy.das@intel.com --- drivers/gpu/drm/i915/display/intel_fbdev.c | 23 ++++++++++------------ 1 file changed, 10 insertions(+), 13 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_fbdev.c b/drivers/gpu/drm/i915/display/intel_fbdev.c index 9ea49aaa33ca..64971a8d5531 100644 --- a/drivers/gpu/drm/i915/display/intel_fbdev.c +++ b/drivers/gpu/drm/i915/display/intel_fbdev.c @@ -67,6 +67,11 @@ struct intel_fbdev { struct mutex hpd_lock; }; +static struct intel_fbdev *to_intel_fbdev(struct drm_fb_helper *fb_helper) +{ + return container_of(fb_helper, struct intel_fbdev, helper); +} + static struct intel_frontbuffer *to_frontbuffer(struct intel_fbdev *ifbdev) { return ifbdev->fb->frontbuffer; @@ -79,9 +84,7 @@ static void intel_fbdev_invalidate(struct intel_fbdev *ifbdev) static int intel_fbdev_set_par(struct fb_info *info) { - struct drm_fb_helper *fb_helper = info->par; - struct intel_fbdev *ifbdev = - container_of(fb_helper, struct intel_fbdev, helper); + struct intel_fbdev *ifbdev = to_intel_fbdev(info->par); int ret; ret = drm_fb_helper_set_par(info); @@ -93,9 +96,7 @@ static int intel_fbdev_set_par(struct fb_info *info) static int intel_fbdev_blank(int blank, struct fb_info *info) { - struct drm_fb_helper *fb_helper = info->par; - struct intel_fbdev *ifbdev = - container_of(fb_helper, struct intel_fbdev, helper); + struct intel_fbdev *ifbdev = to_intel_fbdev(info->par); int ret; ret = drm_fb_helper_blank(blank, info); @@ -108,9 +109,7 @@ static int intel_fbdev_blank(int blank, struct fb_info *info) static int intel_fbdev_pan_display(struct fb_var_screeninfo *var, struct fb_info *info) { - struct drm_fb_helper *fb_helper = info->par; - struct intel_fbdev *ifbdev = - container_of(fb_helper, struct intel_fbdev, helper); + struct intel_fbdev *ifbdev = to_intel_fbdev(info->par); int ret; ret = drm_fb_helper_pan_display(var, info); @@ -136,8 +135,7 @@ static const struct fb_ops intelfb_ops = { static int intelfb_alloc(struct drm_fb_helper *helper, struct drm_fb_helper_surface_size *sizes) { - struct intel_fbdev *ifbdev = - container_of(helper, struct intel_fbdev, helper); + struct intel_fbdev *ifbdev = to_intel_fbdev(helper); struct drm_framebuffer *fb; struct drm_device *dev = helper->dev; struct drm_i915_private *dev_priv = to_i915(dev); @@ -194,8 +192,7 @@ static int intelfb_alloc(struct drm_fb_helper *helper, static int intelfb_create(struct drm_fb_helper *helper, struct drm_fb_helper_surface_size *sizes) { - struct intel_fbdev *ifbdev = - container_of(helper, struct intel_fbdev, helper); + struct intel_fbdev *ifbdev = to_intel_fbdev(helper); struct intel_framebuffer *intel_fb = ifbdev->fb; struct drm_device *dev = helper->dev; struct drm_i915_private *dev_priv = to_i915(dev); From e24e6d695377ca70008ffc39695c3975b3e177b6 Mon Sep 17 00:00:00 2001 From: Nirmoy Das Date: Tue, 4 Apr 2023 16:31:00 +0200 Subject: [PATCH 005/108] drm/i915/display: Implement fb_mmap callback function MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If stolen memory allocation fails for fbdev, the driver will fallback to system memory. Calculation of smem_start is wrong for such framebuffer objs if the platform comes with no gmadr or no aperture. Solve this by adding fb_mmap callback which will use GTT if aperture is available otherwise will use cpu to access the framebuffer. v2: Use to_intel_fbdev() function(Jani) Cc: Matthew Auld Cc: Andi Shyti Cc: Ville Syrjälä Cc: Jani Nikula Cc: Imre Deak Signed-off-by: Nirmoy Das Reviewed-by: Andi Shyti Reviewed-by: Andrzej Hajda Link: https://patchwork.freedesktop.org/patch/msgid/20230404143100.10452-5-nirmoy.das@intel.com --- drivers/gpu/drm/i915/display/intel_fbdev.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/drivers/gpu/drm/i915/display/intel_fbdev.c b/drivers/gpu/drm/i915/display/intel_fbdev.c index 64971a8d5531..a44a5535db1b 100644 --- a/drivers/gpu/drm/i915/display/intel_fbdev.c +++ b/drivers/gpu/drm/i915/display/intel_fbdev.c @@ -40,8 +40,10 @@ #include #include #include +#include #include "gem/i915_gem_lmem.h" +#include "gem/i915_gem_mman.h" #include "i915_drv.h" #include "intel_display_types.h" @@ -119,6 +121,15 @@ static int intel_fbdev_pan_display(struct fb_var_screeninfo *var, return ret; } +static int intel_fbdev_mmap(struct fb_info *info, struct vm_area_struct *vma) +{ + struct intel_fbdev *fbdev = to_intel_fbdev(info->par); + struct drm_gem_object *bo = drm_gem_fb_get_obj(&fbdev->fb->base, 0); + struct drm_i915_gem_object *obj = to_intel_bo(bo); + + return i915_gem_fb_mmap(obj, vma); +} + static const struct fb_ops intelfb_ops = { .owner = THIS_MODULE, DRM_FB_HELPER_DEFAULT_OPS, @@ -130,6 +141,7 @@ static const struct fb_ops intelfb_ops = { .fb_imageblit = drm_fb_helper_cfb_imageblit, .fb_pan_display = intel_fbdev_pan_display, .fb_blank = intel_fbdev_blank, + .fb_mmap = intel_fbdev_mmap, }; static int intelfb_alloc(struct drm_fb_helper *helper, From 16fc9c08f0ec7b1c95f1ea4a16097acdb3fc943d Mon Sep 17 00:00:00 2001 From: Lionel Landwerlin Date: Fri, 7 Apr 2023 12:32:37 +0300 Subject: [PATCH 006/108] drm/i915: disable sampler indirect state in bindless heap By default the indirect state sampler data (border colors) are stored in the same heap as the SAMPLER_STATE structure. For userspace drivers that can be 2 different heaps (dynamic state heap & bindless sampler state heap). This means that border colors have to copied in 2 different places so that the same SAMPLER_STATE structure find the right data. This change is forcing the indirect state sampler data to only be in the dynamic state pool (more convenient for userspace drivers, they only have to have one copy of the border colors). This is reproducing the behavior of the Windows drivers. BSpec: 46052 Signed-off-by: Lionel Landwerlin Cc: stable@vger.kernel.org Reviewed-by: Haridhar Kalvala Signed-off-by: Matt Roper Link: https://patchwork.freedesktop.org/patch/msgid/20230407093237.3296286-1-lionel.g.landwerlin@intel.com --- drivers/gpu/drm/i915/gt/intel_gt_regs.h | 1 + drivers/gpu/drm/i915/gt/intel_workarounds.c | 19 +++++++++++++++++++ 2 files changed, 20 insertions(+) diff --git a/drivers/gpu/drm/i915/gt/intel_gt_regs.h b/drivers/gpu/drm/i915/gt/intel_gt_regs.h index 6115cc1805de..6903c66a4ed7 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_regs.h +++ b/drivers/gpu/drm/i915/gt/intel_gt_regs.h @@ -1147,6 +1147,7 @@ #define SC_DISABLE_POWER_OPTIMIZATION_EBB REG_BIT(9) #define GEN11_SAMPLER_ENABLE_HEADLESS_MSG REG_BIT(5) #define MTL_DISABLE_SAMPLER_SC_OOO REG_BIT(3) +#define GEN11_INDIRECT_STATE_BASE_ADDR_OVERRIDE REG_BIT(0) #define GEN9_HALF_SLICE_CHICKEN7 MCR_REG(0xe194) #define DG2_DISABLE_ROUND_ENABLE_ALLOW_FOR_SSLA REG_BIT(15) diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c index c0de5276b3d3..4e3cbeca9eec 100644 --- a/drivers/gpu/drm/i915/gt/intel_workarounds.c +++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c @@ -3051,6 +3051,25 @@ general_render_compute_wa_init(struct intel_engine_cs *engine, struct i915_wa_li add_render_compute_tuning_settings(i915, wal); + if (GRAPHICS_VER(i915) >= 11) { + /* This is not a Wa (although referred to as + * WaSetInidrectStateOverride in places), this allows + * applications that reference sampler states through + * the BindlessSamplerStateBaseAddress to have their + * border color relative to DynamicStateBaseAddress + * rather than BindlessSamplerStateBaseAddress. + * + * Otherwise SAMPLER_STATE border colors have to be + * copied in multiple heaps (DynamicStateBaseAddress & + * BindlessSamplerStateBaseAddress) + * + * BSpec: 46052 + */ + wa_mcr_masked_en(wal, + GEN10_SAMPLER_MODE, + GEN11_INDIRECT_STATE_BASE_ADDR_OVERRIDE); + } + if (IS_MTL_GRAPHICS_STEP(i915, M, STEP_B0, STEP_FOREVER) || IS_MTL_GRAPHICS_STEP(i915, P, STEP_B0, STEP_FOREVER)) /* Wa_14017856879 */ From b90b044c64f669cb20919fb5e5673933de59c653 Mon Sep 17 00:00:00 2001 From: Nirmoy Das Date: Tue, 4 Apr 2023 20:13:42 +0200 Subject: [PATCH 007/108] drm/i915/mtl: Disable stolen memory backed FB for A0 Stolen memory is not usable for MTL A0 stepping beyond certain access size and we have no control over userspace access size of /dev/fb which can be backed by stolen memory. So disable stolen memory backed fb by setting i915->dsm.usable_size to zero. v2: remove hsdes reference and fix commit message(Andi) v3: use revid as we want to target SOC stepping(Radhakrishna) Cc: Matthew Auld Cc: Andi Shyti Cc: Daniele Ceraolo Spurio Cc: Lucas De Marchi Cc: Radhakrishna Sripada Signed-off-by: Nirmoy Das Reviewed-by: Andi Shyti Reviewed-by: Radhakrishna Sripada Link: https://patchwork.freedesktop.org/patch/msgid/20230404181342.23362-1-nirmoy.das@intel.com --- drivers/gpu/drm/i915/gem/i915_gem_stolen.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_stolen.c b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c index 8ac376c24aa2..ee492d823f1b 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_stolen.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c @@ -535,6 +535,14 @@ static int i915_gem_init_stolen(struct intel_memory_region *mem) /* Basic memrange allocator for stolen space. */ drm_mm_init(&i915->mm.stolen, 0, i915->dsm.usable_size); + /* + * Access to stolen lmem beyond certain size for MTL A0 stepping + * would crash the machine. Disable stolen lmem for userspace access + * by setting usable_size to zero. + */ + if (IS_METEORLAKE(i915) && INTEL_REVID(i915) == 0x0) + i915->dsm.usable_size = 0; + return 0; } From adfbae9ffe339eed08d54a4eb87c93f4b35f214b Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Thu, 13 Apr 2023 13:03:49 -0700 Subject: [PATCH 008/108] drm/i915/gt: Avoid out-of-bounds access when loading HuC When HuC is loaded by GSC, there is no header definition for the kernel to look at and firmware is just handed to GSC. However when reading the version, it should still check the size of the blob to guarantee it's not incurring into out-of-bounds array access. If firmware is smaller than expected, the following message is now printed: # echo boom > /lib/firmware/i915/dg2_huc_gsc.bin # dmesg | grep -i huc [drm] GT0: HuC firmware i915/dg2_huc_gsc.bin: invalid size: 5 < 184 [drm] *ERROR* GT0: HuC firmware i915/dg2_huc_gsc.bin: fetch failed -ENODATA ... Even without this change the size, header and signature are still checked by GSC when loading, so this only avoids the out-of-bounds array access. Fixes: a7b516bd981f ("drm/i915/huc: Add fetch support for gsc-loaded HuC binary") Cc: Daniele Ceraolo Spurio Cc: Alan Previn Signed-off-by: Lucas De Marchi Reviewed-by: Daniele Ceraolo Spurio Link: https://patchwork.freedesktop.org/patch/msgid/20230413200349.3492571-1-lucas.demarchi@intel.com --- drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c | 21 +++++++++++++++++---- 1 file changed, 17 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c index 264c952f777b..24765c30a0e1 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c @@ -488,12 +488,25 @@ static void __force_fw_fetch_failures(struct intel_uc_fw *uc_fw, int e) } } -static int check_gsc_manifest(const struct firmware *fw, +static int check_gsc_manifest(struct intel_gt *gt, + const struct firmware *fw, struct intel_uc_fw *uc_fw) { u32 *dw = (u32 *)fw->data; - u32 version_hi = dw[HUC_GSC_VERSION_HI_DW]; - u32 version_lo = dw[HUC_GSC_VERSION_LO_DW]; + u32 version_hi, version_lo; + size_t min_size; + + /* Check the size of the blob before examining buffer contents */ + min_size = sizeof(u32) * (HUC_GSC_VERSION_LO_DW + 1); + if (unlikely(fw->size < min_size)) { + gt_warn(gt, "%s firmware %s: invalid size: %zu < %zu\n", + intel_uc_fw_type_repr(uc_fw->type), uc_fw->file_selected.path, + fw->size, min_size); + return -ENODATA; + } + + version_hi = dw[HUC_GSC_VERSION_HI_DW]; + version_lo = dw[HUC_GSC_VERSION_LO_DW]; uc_fw->file_selected.ver.major = FIELD_GET(HUC_GSC_MAJOR_VER_HI_MASK, version_hi); uc_fw->file_selected.ver.minor = FIELD_GET(HUC_GSC_MINOR_VER_HI_MASK, version_hi); @@ -664,7 +677,7 @@ static int check_fw_header(struct intel_gt *gt, return 0; if (uc_fw->loaded_via_gsc) - err = check_gsc_manifest(fw, uc_fw); + err = check_gsc_manifest(gt, fw, uc_fw); else err = check_ccs_header(gt, fw, uc_fw); if (err) From 8bfbdadce85c4c51689da10f39c805a7106d4567 Mon Sep 17 00:00:00 2001 From: Cong Liu Date: Sat, 15 Apr 2023 00:41:09 +0200 Subject: [PATCH 009/108] drm/i915: Fix memory leaks in i915 selftests This patch fixes memory leaks on error escapes in function fake_get_pages Fixes: c3bfba9a2225 ("drm/i915: Check for integer truncation on scatterlist creation") Signed-off-by: Cong Liu Reviewed-by: Andrzej Hajda Reviewed-by: Andi Shyti Signed-off-by: Andi Shyti Link: https://patchwork.freedesktop.org/patch/msgid/20230414224109.1051922-1-andi.shyti@linux.intel.com --- drivers/gpu/drm/i915/selftests/i915_gem_gtt.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c index 5361ce70d3f2..154801f1c468 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c @@ -69,8 +69,10 @@ static int fake_get_pages(struct drm_i915_gem_object *obj) rem = round_up(obj->base.size, BIT(31)) >> 31; /* restricted by sg_alloc_table */ - if (overflows_type(rem, unsigned int)) + if (overflows_type(rem, unsigned int)) { + kfree(pages); return -E2BIG; + } if (sg_alloc_table(pages, rem, GFP)) { kfree(pages); From d1f3b5e92cbab10b4710ba33e20b264c852d19aa Mon Sep 17 00:00:00 2001 From: Andi Shyti Date: Tue, 18 Apr 2023 01:53:56 +0200 Subject: [PATCH 010/108] drm/i915: Make IRQ reset and postinstall multi-gt aware In multi-gt systems IRQs need to be reset and enabled per GT. This might add some redundancy when handling interrupts for engines that might not exist in every tile, but helps to keep the code cleaner and more understandable. Signed-off-by: Andi Shyti Cc: Tvrtko Ursulin Reviewed-by: Matt Roper Reviewed-by: Andrzej Hajda Link: https://patchwork.freedesktop.org/patch/msgid/20230417235356.1291060-1-andi.shyti@linux.intel.com --- drivers/gpu/drm/i915/i915_irq.c | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index d24bdea65a3d..53c83e257055 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -2762,12 +2762,15 @@ static void gen11_irq_reset(struct drm_i915_private *dev_priv) static void dg1_irq_reset(struct drm_i915_private *dev_priv) { - struct intel_gt *gt = to_gt(dev_priv); - struct intel_uncore *uncore = gt->uncore; + struct intel_uncore *uncore = &dev_priv->uncore; + struct intel_gt *gt; + unsigned int i; dg1_master_intr_disable(dev_priv->uncore.regs); - gen11_gt_irq_reset(gt); + for_each_gt(gt, dev_priv, i) + gen11_gt_irq_reset(gt); + gen11_display_irq_reset(dev_priv); GEN3_IRQ_RESET(uncore, GEN11_GU_MISC_); @@ -3425,11 +3428,13 @@ static void gen11_irq_postinstall(struct drm_i915_private *dev_priv) static void dg1_irq_postinstall(struct drm_i915_private *dev_priv) { - struct intel_gt *gt = to_gt(dev_priv); - struct intel_uncore *uncore = gt->uncore; + struct intel_uncore *uncore = &dev_priv->uncore; u32 gu_misc_masked = GEN11_GU_MISC_GSE; + struct intel_gt *gt; + unsigned int i; - gen11_gt_irq_postinstall(gt); + for_each_gt(gt, dev_priv, i) + gen11_gt_irq_postinstall(gt); GEN3_IRQ_INIT(uncore, GEN11_GU_MISC_, ~gu_misc_masked, gu_misc_masked); From a6704f4a5452950e7ff22cab0cff23477dbdf0e0 Mon Sep 17 00:00:00 2001 From: Tejas Upadhyay Date: Wed, 19 Apr 2023 11:30:34 +0530 Subject: [PATCH 011/108] drm/i915/gt: Consider multi-gt instead of to_gt() In order to enable complete multi-GT, use the GT reference obtained directly from the engine, rather than relying on the to_gt(), which only provides a reference to the primary GT. Problem appear when it runs on platform like MTL where different set of engines are possible on different GTs. Cc: Andi Shyti Signed-off-by: Tejas Upadhyay Reviewed-by: Andi Shyti Signed-off-by: Andi Shyti Link: https://patchwork.freedesktop.org/patch/msgid/20230419060036.3422635-2-tejas.upadhyay@intel.com --- drivers/gpu/drm/i915/gt/intel_engine_user.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gt/intel_engine_user.c b/drivers/gpu/drm/i915/gt/intel_engine_user.c index cd4f1b126f75..dcedff41a825 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_user.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_user.c @@ -117,7 +117,7 @@ static void set_scheduler_caps(struct drm_i915_private *i915) disabled |= (I915_SCHEDULER_CAP_ENABLED | I915_SCHEDULER_CAP_PRIORITY); - if (intel_uc_uses_guc_submission(&to_gt(i915)->uc)) + if (intel_uc_uses_guc_submission(&engine->gt->uc)) enabled |= I915_SCHEDULER_CAP_STATIC_PRIORITY_MAP; for (i = 0; i < ARRAY_SIZE(map); i++) { From a347279dec1eb68e995f864ae1fd41ab57cbcd21 Mon Sep 17 00:00:00 2001 From: Tejas Upadhyay Date: Wed, 19 Apr 2023 11:30:35 +0530 Subject: [PATCH 012/108] drm/i915/gem: Consider multi-gt instead of to_gt() In order to enable complete multi-GT, use the GT reference obtained directly from the engine, rather than relying on the to_gt(), which only provides a reference to the primary GT. Problem appear when it runs on platform like MTL where different set of engines are possible on different GTs. Signed-off-by: Tejas Upadhyay Reviewed-by: Andi Shyti Signed-off-by: Andi Shyti Link: https://patchwork.freedesktop.org/patch/msgid/20230419060036.3422635-3-tejas.upadhyay@intel.com --- drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c index a81fa6a20f5a..2697fbaa2ceb 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c @@ -93,7 +93,7 @@ static int live_nop_switch(void *arg) } if (i915_request_wait(rq, 0, 10 * HZ) < 0) { pr_err("Failed to populated %d contexts\n", nctx); - intel_gt_set_wedged(to_gt(i915)); + intel_gt_set_wedged(engine->gt); i915_request_put(rq); err = -EIO; goto out_file; @@ -149,7 +149,7 @@ static int live_nop_switch(void *arg) if (i915_request_wait(rq, 0, HZ / 5) < 0) { pr_err("Switching between %ld contexts timed out\n", prime); - intel_gt_set_wedged(to_gt(i915)); + intel_gt_set_wedged(engine->gt); i915_request_put(rq); break; } From 0c29efa23f5c2f51b744856200af0cff3e287e9e Mon Sep 17 00:00:00 2001 From: Tejas Upadhyay Date: Wed, 19 Apr 2023 11:30:36 +0530 Subject: [PATCH 013/108] drm/i915/selftests: Consider multi-gt instead of to_gt() In order to enable complete multi-GT, loop through all the GTs, rather than relying on the to_gt(), which only provides a reference to the primary GT. Problem appear when it runs on platform like MTL where different set of engines are possible on different GTs. Signed-off-by: Tejas Upadhyay Reviewed-by: Andi Shyti Signed-off-by: Andi Shyti Link: https://patchwork.freedesktop.org/patch/msgid/20230419060036.3422635-4-tejas.upadhyay@intel.com --- .../gpu/drm/i915/selftests/igt_live_test.c | 46 +++++++++++-------- 1 file changed, 27 insertions(+), 19 deletions(-) diff --git a/drivers/gpu/drm/i915/selftests/igt_live_test.c b/drivers/gpu/drm/i915/selftests/igt_live_test.c index 72b58b66692a..714b7afc490b 100644 --- a/drivers/gpu/drm/i915/selftests/igt_live_test.c +++ b/drivers/gpu/drm/i915/selftests/igt_live_test.c @@ -16,28 +16,32 @@ int igt_live_test_begin(struct igt_live_test *t, const char *func, const char *name) { - struct intel_gt *gt = to_gt(i915); struct intel_engine_cs *engine; enum intel_engine_id id; + struct intel_gt *gt; + unsigned int i; int err; t->i915 = i915; t->func = func; t->name = name; - err = intel_gt_wait_for_idle(gt, MAX_SCHEDULE_TIMEOUT); - if (err) { - pr_err("%s(%s): failed to idle before, with err=%d!", - func, name, err); - return err; + for_each_gt(gt, i915, i) { + + err = intel_gt_wait_for_idle(gt, MAX_SCHEDULE_TIMEOUT); + if (err) { + pr_err("%s(%s): failed to idle before, with err=%d!", + func, name, err); + return err; + } + + for_each_engine(engine, gt, id) + t->reset_engine[id] = + i915_reset_engine_count(&i915->gpu_error, engine); } t->reset_global = i915_reset_count(&i915->gpu_error); - for_each_engine(engine, gt, id) - t->reset_engine[id] = - i915_reset_engine_count(&i915->gpu_error, engine); - return 0; } @@ -46,6 +50,8 @@ int igt_live_test_end(struct igt_live_test *t) struct drm_i915_private *i915 = t->i915; struct intel_engine_cs *engine; enum intel_engine_id id; + struct intel_gt *gt; + unsigned int i; if (igt_flush_test(i915)) return -EIO; @@ -57,16 +63,18 @@ int igt_live_test_end(struct igt_live_test *t) return -EIO; } - for_each_engine(engine, to_gt(i915), id) { - if (t->reset_engine[id] == - i915_reset_engine_count(&i915->gpu_error, engine)) - continue; + for_each_gt(gt, i915, i) { + for_each_engine(engine, gt, id) { + if (t->reset_engine[id] == + i915_reset_engine_count(&i915->gpu_error, engine)) + continue; - pr_err("%s(%s): engine '%s' was reset %d times!\n", - t->func, t->name, engine->name, - i915_reset_engine_count(&i915->gpu_error, engine) - - t->reset_engine[id]); - return -EIO; + pr_err("%s(%s): engine '%s' was reset %d times!\n", + t->func, t->name, engine->name, + i915_reset_engine_count(&i915->gpu_error, engine) - + t->reset_engine[id]); + return -EIO; + } } return 0; From 514b8a79aa85d800458cfb7909ed0e1a1c1bffa2 Mon Sep 17 00:00:00 2001 From: Madhumitha Tolakanahalli Pradeep Date: Tue, 18 Apr 2023 15:04:45 -0700 Subject: [PATCH 014/108] drm/i915/mtl: Extend Wa_22011802037 to MTL A-step Wa_22011802037 was being applied to all graphics_ver 11 & 12. This patch updates the if statement to apply the W/A to right platforms and extends it to MTL-M:A step. v1.1: Fix checkpatch warning. v2: Change the check to reflect the wa at other places(Lucas) Bspec: 66622 Cc: Lucas De Marchi Cc: Umesh Nerlige Ramappa Signed-off-by: Madhumitha Tolakanahalli Pradeep Signed-off-by: Radhakrishna Sripada Reviewed-by: Matt Atwood Link: https://patchwork.freedesktop.org/patch/msgid/20230418220446.2205509-4-radhakrishna.sripada@intel.com --- drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c index 88e881b100cf..ee3e8352637f 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c @@ -1629,16 +1629,16 @@ static void guc_reset_state(struct intel_context *ce, u32 head, bool scrub) static void guc_engine_reset_prepare(struct intel_engine_cs *engine) { - if (!IS_GRAPHICS_VER(engine->i915, 11, 12)) - return; - - intel_engine_stop_cs(engine); - /* * Wa_22011802037: In addition to stopping the cs, we need * to wait for any pending mi force wakeups */ - intel_engine_wait_for_pending_mi_fw(engine); + if (IS_MTL_GRAPHICS_STEP(engine->i915, M, STEP_A0, STEP_B0) || + (GRAPHICS_VER(engine->i915) >= 11 && + GRAPHICS_VER_FULL(engine->i915) < IP_VER(12, 70))) { + intel_engine_stop_cs(engine); + intel_engine_wait_for_pending_mi_fw(engine); + } } static void guc_reset_nop(struct intel_engine_cs *engine) From 3bece767dab2ffbd6f77be1a0ceb3e214f0144f8 Mon Sep 17 00:00:00 2001 From: Haridhar Kalvala Date: Tue, 18 Apr 2023 15:04:46 -0700 Subject: [PATCH 015/108] drm/i915/mtl: WA to clear RDOP clock gating Workaround implementation to clear RDOP clock gating. Bspec: 66622 Signed-off-by: Haridhar Kalvala Signed-off-by: Radhakrishna Sripada Reviewed-by: Matt Atwood Link: https://patchwork.freedesktop.org/patch/msgid/20230418220446.2205509-5-radhakrishna.sripada@intel.com --- drivers/gpu/drm/i915/gt/intel_workarounds.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c index b925ef47304b..312eb8b5f949 100644 --- a/drivers/gpu/drm/i915/gt/intel_workarounds.c +++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c @@ -1703,6 +1703,9 @@ xelpg_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal) /* Wa_18018781329 */ wa_mcr_write_or(wal, RENDER_MOD_CTRL, FORCE_MISS_FTLB); wa_mcr_write_or(wal, COMP_MOD_CTRL, FORCE_MISS_FTLB); + + /* Wa_14015795083 */ + wa_write_clr(wal, GEN7_MISCCPCTL, GEN12_DOP_CLOCK_GATE_RENDER_ENABLE); } /* From 7787af256504b13f0ab6c311ed7870a895b762b8 Mon Sep 17 00:00:00 2001 From: Fei Yang Date: Thu, 20 Apr 2023 12:23:49 +0200 Subject: [PATCH 016/108] drm/i915/mtl: Set has_llc=0 On MTL, LLC is not shared between GT and CPU, set has_llc=0. Signed-off-by: Fei Yang Reviewed-by: Andi Shyti Reviewed-by: Andrzej Hajda Reviewed-by: Nirmoy Das Signed-off-by: Matt Roper Link: https://patchwork.freedesktop.org/patch/msgid/20230420102349.15302-1-nirmoy.das@intel.com --- drivers/gpu/drm/i915/i915_pci.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c index cddb6e197972..025d32c0b161 100644 --- a/drivers/gpu/drm/i915/i915_pci.c +++ b/drivers/gpu/drm/i915/i915_pci.c @@ -1146,6 +1146,7 @@ static const struct intel_device_info mtl_info = { .has_flat_ccs = 0, .has_gmd_id = 1, .has_guc_deprivilege = 1, + .has_llc = 0, .has_mslice_steering = 0, .has_snoop = 1, .__runtime.memory_regions = REGION_SMEM | REGION_STOLEN_LMEM, From b76c0deef6273609c02ed5053209f6397cd1b0fb Mon Sep 17 00:00:00 2001 From: Madhumitha Tolakanahalli Pradeep Date: Fri, 21 Apr 2023 20:25:34 +0200 Subject: [PATCH 017/108] drm/i915/mtl: Define MOCS and PAT tables for MTL On MTL, GT can no longer allocate on LLC - only the CPU can. This, along with programming new register bits that MTL requires calls for a MOCS/PAT table update. Also the PAT index registers are multicasted for primary GT, and there is an address jump from index 7 to 8. This patch makes sure that these registers are programmed in the proper way. BSpec: 44509, 45101, 44235 Cc: Matt Roper Cc: Lucas De Marchi Signed-off-by: Madhumitha Tolakanahalli Pradeep Signed-off-by: Aravind Iddamsetty Signed-off-by: Nirmoy Das Signed-off-by: Fei Yang Reviewed-by: Andrzej Hajda Reviewed-by: Nirmoy Das Reviewed-by: Andi Shyti Signed-off-by: Andi Shyti Link: https://patchwork.freedesktop.org/patch/msgid/20230421182535.292670-2-andi.shyti@linux.intel.com --- drivers/gpu/drm/i915/gt/intel_gt_regs.h | 6 ++- drivers/gpu/drm/i915/gt/intel_gtt.c | 47 ++++++++++++++++- drivers/gpu/drm/i915/gt/intel_gtt.h | 8 +++ drivers/gpu/drm/i915/gt/intel_mocs.c | 70 ++++++++++++++++++++++++- 4 files changed, 128 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_gt_regs.h b/drivers/gpu/drm/i915/gt/intel_gt_regs.h index fd1f9cd35e9d..e8c3b762a92a 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_regs.h +++ b/drivers/gpu/drm/i915/gt/intel_gt_regs.h @@ -356,7 +356,11 @@ #define GEN7_TLB_RD_ADDR _MMIO(0x4700) #define GEN12_PAT_INDEX(index) _MMIO(0x4800 + (index) * 4) -#define XEHP_PAT_INDEX(index) MCR_REG(0x4800 + (index) * 4) +#define _PAT_INDEX(index) _PICK_EVEN_2RANGES(index, 8, \ + 0x4800, 0x4804, \ + 0x4848, 0x484c) +#define XEHP_PAT_INDEX(index) MCR_REG(_PAT_INDEX(index)) +#define XELPMP_PAT_INDEX(index) _MMIO(_PAT_INDEX(index)) #define XEHP_TILE0_ADDR_RANGE MCR_REG(0x4900) #define XEHP_TILE_LMEM_RANGE_SHIFT 8 diff --git a/drivers/gpu/drm/i915/gt/intel_gtt.c b/drivers/gpu/drm/i915/gt/intel_gtt.c index 4f436ba7a3c8..2f6a9be0ffe6 100644 --- a/drivers/gpu/drm/i915/gt/intel_gtt.c +++ b/drivers/gpu/drm/i915/gt/intel_gtt.c @@ -468,6 +468,44 @@ void gtt_write_workarounds(struct intel_gt *gt) } } +static void xelpmp_setup_private_ppat(struct intel_uncore *uncore) +{ + intel_uncore_write(uncore, XELPMP_PAT_INDEX(0), + MTL_PPAT_L4_0_WB); + intel_uncore_write(uncore, XELPMP_PAT_INDEX(1), + MTL_PPAT_L4_1_WT); + intel_uncore_write(uncore, XELPMP_PAT_INDEX(2), + MTL_PPAT_L4_3_UC); + intel_uncore_write(uncore, XELPMP_PAT_INDEX(3), + MTL_PPAT_L4_0_WB | MTL_2_COH_1W); + intel_uncore_write(uncore, XELPMP_PAT_INDEX(4), + MTL_PPAT_L4_0_WB | MTL_3_COH_2W); + + /* + * Remaining PAT entries are left at the hardware-default + * fully-cached setting + */ +} + +static void xelpg_setup_private_ppat(struct intel_gt *gt) +{ + intel_gt_mcr_multicast_write(gt, XEHP_PAT_INDEX(0), + MTL_PPAT_L4_0_WB); + intel_gt_mcr_multicast_write(gt, XEHP_PAT_INDEX(1), + MTL_PPAT_L4_1_WT); + intel_gt_mcr_multicast_write(gt, XEHP_PAT_INDEX(2), + MTL_PPAT_L4_3_UC); + intel_gt_mcr_multicast_write(gt, XEHP_PAT_INDEX(3), + MTL_PPAT_L4_0_WB | MTL_2_COH_1W); + intel_gt_mcr_multicast_write(gt, XEHP_PAT_INDEX(4), + MTL_PPAT_L4_0_WB | MTL_3_COH_2W); + + /* + * Remaining PAT entries are left at the hardware-default + * fully-cached setting + */ +} + static void tgl_setup_private_ppat(struct intel_uncore *uncore) { /* TGL doesn't support LLC or AGE settings */ @@ -603,7 +641,14 @@ void setup_private_pat(struct intel_gt *gt) GEM_BUG_ON(GRAPHICS_VER(i915) < 8); - if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 50)) + if (gt->type == GT_MEDIA) { + xelpmp_setup_private_ppat(gt->uncore); + return; + } + + if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 70)) + xelpg_setup_private_ppat(gt); + else if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 50)) xehp_setup_private_ppat(gt); else if (GRAPHICS_VER(i915) >= 12) tgl_setup_private_ppat(uncore); diff --git a/drivers/gpu/drm/i915/gt/intel_gtt.h b/drivers/gpu/drm/i915/gt/intel_gtt.h index 69ce55f517f5..ea17849e7a5c 100644 --- a/drivers/gpu/drm/i915/gt/intel_gtt.h +++ b/drivers/gpu/drm/i915/gt/intel_gtt.h @@ -147,6 +147,14 @@ typedef u64 gen8_pte_t; #define GEN8_PDE_IPS_64K BIT(11) #define GEN8_PDE_PS_2M BIT(7) +#define MTL_PPAT_L4_CACHE_POLICY_MASK REG_GENMASK(3, 2) +#define MTL_PAT_INDEX_COH_MODE_MASK REG_GENMASK(1, 0) +#define MTL_PPAT_L4_3_UC REG_FIELD_PREP(MTL_PPAT_L4_CACHE_POLICY_MASK, 3) +#define MTL_PPAT_L4_1_WT REG_FIELD_PREP(MTL_PPAT_L4_CACHE_POLICY_MASK, 1) +#define MTL_PPAT_L4_0_WB REG_FIELD_PREP(MTL_PPAT_L4_CACHE_POLICY_MASK, 0) +#define MTL_3_COH_2W REG_FIELD_PREP(MTL_PAT_INDEX_COH_MODE_MASK, 3) +#define MTL_2_COH_1W REG_FIELD_PREP(MTL_PAT_INDEX_COH_MODE_MASK, 2) + enum i915_cache_level; struct drm_i915_gem_object; diff --git a/drivers/gpu/drm/i915/gt/intel_mocs.c b/drivers/gpu/drm/i915/gt/intel_mocs.c index 69b489e8dfed..2c014407225c 100644 --- a/drivers/gpu/drm/i915/gt/intel_mocs.c +++ b/drivers/gpu/drm/i915/gt/intel_mocs.c @@ -40,6 +40,10 @@ struct drm_i915_mocs_table { #define LE_COS(value) ((value) << 15) #define LE_SSE(value) ((value) << 17) +/* Defines for the tables (GLOB_MOCS_0 - GLOB_MOCS_16) */ +#define _L4_CACHEABILITY(value) ((value) << 2) +#define IG_PAT(value) ((value) << 8) + /* Defines for the tables (LNCFMOCS0 - LNCFMOCS31) - two entries per word */ #define L3_ESC(value) ((value) << 0) #define L3_SCC(value) ((value) << 1) @@ -50,6 +54,7 @@ struct drm_i915_mocs_table { /* Helper defines */ #define GEN9_NUM_MOCS_ENTRIES 64 /* 63-64 are reserved, but configured. */ #define PVC_NUM_MOCS_ENTRIES 3 +#define MTL_NUM_MOCS_ENTRIES 16 /* (e)LLC caching options */ /* @@ -73,6 +78,12 @@ struct drm_i915_mocs_table { #define L3_2_RESERVED _L3_CACHEABILITY(2) #define L3_3_WB _L3_CACHEABILITY(3) +/* L4 caching options */ +#define L4_0_WB _L4_CACHEABILITY(0) +#define L4_1_WT _L4_CACHEABILITY(1) +#define L4_2_RESERVED _L4_CACHEABILITY(2) +#define L4_3_UC _L4_CACHEABILITY(3) + #define MOCS_ENTRY(__idx, __control_value, __l3cc_value) \ [__idx] = { \ .control_value = __control_value, \ @@ -416,6 +427,57 @@ static const struct drm_i915_mocs_entry pvc_mocs_table[] = { MOCS_ENTRY(2, 0, L3_3_WB), }; +static const struct drm_i915_mocs_entry mtl_mocs_table[] = { + /* Error - Reserved for Non-Use */ + MOCS_ENTRY(0, + IG_PAT(0), + L3_LKUP(1) | L3_3_WB), + /* Cached - L3 + L4 */ + MOCS_ENTRY(1, + IG_PAT(1), + L3_LKUP(1) | L3_3_WB), + /* L4 - GO:L3 */ + MOCS_ENTRY(2, + IG_PAT(1), + L3_LKUP(1) | L3_1_UC), + /* Uncached - GO:L3 */ + MOCS_ENTRY(3, + IG_PAT(1) | L4_3_UC, + L3_LKUP(1) | L3_1_UC), + /* L4 - GO:Mem */ + MOCS_ENTRY(4, + IG_PAT(1), + L3_LKUP(1) | L3_GLBGO(1) | L3_1_UC), + /* Uncached - GO:Mem */ + MOCS_ENTRY(5, + IG_PAT(1) | L4_3_UC, + L3_LKUP(1) | L3_GLBGO(1) | L3_1_UC), + /* L4 - L3:NoLKUP; GO:L3 */ + MOCS_ENTRY(6, + IG_PAT(1), + L3_1_UC), + /* Uncached - L3:NoLKUP; GO:L3 */ + MOCS_ENTRY(7, + IG_PAT(1) | L4_3_UC, + L3_1_UC), + /* L4 - L3:NoLKUP; GO:Mem */ + MOCS_ENTRY(8, + IG_PAT(1), + L3_GLBGO(1) | L3_1_UC), + /* Uncached - L3:NoLKUP; GO:Mem */ + MOCS_ENTRY(9, + IG_PAT(1) | L4_3_UC, + L3_GLBGO(1) | L3_1_UC), + /* Display - L3; L4:WT */ + MOCS_ENTRY(14, + IG_PAT(1) | L4_1_WT, + L3_LKUP(1) | L3_3_WB), + /* CCS - Non-Displayable */ + MOCS_ENTRY(15, + IG_PAT(1), + L3_GLBGO(1) | L3_1_UC), +}; + enum { HAS_GLOBAL_MOCS = BIT(0), HAS_ENGINE_MOCS = BIT(1), @@ -445,7 +507,13 @@ static unsigned int get_mocs_settings(const struct drm_i915_private *i915, memset(table, 0, sizeof(struct drm_i915_mocs_table)); table->unused_entries_index = I915_MOCS_PTE; - if (IS_PONTEVECCHIO(i915)) { + if (IS_METEORLAKE(i915)) { + table->size = ARRAY_SIZE(mtl_mocs_table); + table->table = mtl_mocs_table; + table->n_entries = MTL_NUM_MOCS_ENTRIES; + table->uc_index = 9; + table->unused_entries_index = 1; + } else if (IS_PONTEVECCHIO(i915)) { table->size = ARRAY_SIZE(pvc_mocs_table); table->table = pvc_mocs_table; table->n_entries = PVC_NUM_MOCS_ENTRIES; From faca6aaa4838c3c234caa619d3c7d1f09da0d303 Mon Sep 17 00:00:00 2001 From: Fei Yang Date: Fri, 21 Apr 2023 20:25:35 +0200 Subject: [PATCH 018/108] drm/i915/mtl: fix mocs selftest Media GT has a different base for MOCS register, need to apply gsi_offset to the mmio address if not using the intel_uncore_r/w functions for register access. Cc: Matt Roper Signed-off-by: Fei Yang Reviewed-by: Matt Roper Signed-off-by: Andi Shyti Link: https://patchwork.freedesktop.org/patch/msgid/20230421182535.292670-3-andi.shyti@linux.intel.com --- drivers/gpu/drm/i915/gt/selftest_mocs.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gt/selftest_mocs.c b/drivers/gpu/drm/i915/gt/selftest_mocs.c index ca009a6a13bd..a8446ab82501 100644 --- a/drivers/gpu/drm/i915/gt/selftest_mocs.c +++ b/drivers/gpu/drm/i915/gt/selftest_mocs.c @@ -131,13 +131,14 @@ static int read_mocs_table(struct i915_request *rq, const struct drm_i915_mocs_table *table, u32 *offset) { + struct intel_gt *gt = rq->engine->gt; u32 addr; if (!table) return 0; if (HAS_GLOBAL_MOCS_REGISTERS(rq->engine->i915)) - addr = global_mocs_offset(); + addr = global_mocs_offset() + gt->uncore->gsi_offset; else addr = mocs_offset(rq->engine); From 64e22551b64c694de428a9d3e210587df2831ef3 Mon Sep 17 00:00:00 2001 From: Andi Shyti Date: Fri, 21 Apr 2023 21:00:25 +0200 Subject: [PATCH 019/108] drm/i915/i915_drv: Use proper parameter naming in for_each_engine() for_each_engine() loops through engines in the GT, not in dev_priv. Because it's misleading, call it "gt__" instead of "dev_priv__". Signed-off-by: Andi Shyti Reviewed-by: Rodrigo Vivi Reviewed-by: Andrzej Hajda Link: https://patchwork.freedesktop.org/patch/msgid/20230421190026.294208-2-andi.shyti@linux.intel.com --- drivers/gpu/drm/i915/i915_drv.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index e771fdc3099c..d95d354c4cb8 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -381,11 +381,11 @@ static inline struct intel_gt *to_gt(struct drm_i915_private *i915) } /* Simple iterator over all initialised engines */ -#define for_each_engine(engine__, dev_priv__, id__) \ +#define for_each_engine(engine__, gt__, id__) \ for ((id__) = 0; \ (id__) < I915_NUM_ENGINES; \ (id__)++) \ - for_each_if ((engine__) = (dev_priv__)->engine[(id__)]) + for_each_if ((engine__) = (gt__)->engine[(id__)]) /* Iterator over subset of engines selected by mask */ #define for_each_engine_masked(engine__, gt__, mask__, tmp__) \ From 66ca1d8f222bdb1c9470e44131e12d753622ab08 Mon Sep 17 00:00:00 2001 From: Andi Shyti Date: Fri, 21 Apr 2023 21:00:26 +0200 Subject: [PATCH 020/108] drm/i915/i915_drv: Use i915 instead of dev_priv insied the file_priv structure In the process of renaming all instances of 'dev_priv' to 'i915', start using 'i915' within the i915_drv.h file. Signed-off-by: Andi Shyti Reviewed-by: Andrzej Hajda Link: https://patchwork.freedesktop.org/patch/msgid/20230421190026.294208-3-andi.shyti@linux.intel.com --- drivers/gpu/drm/i915/i915_drv.h | 448 ++++++++++++++++---------------- 1 file changed, 224 insertions(+), 224 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index d95d354c4cb8..f23b030aaf09 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -407,11 +407,11 @@ static inline struct intel_gt *to_gt(struct drm_i915_private *i915) (engine__) && (engine__)->uabi_class == (class__); \ (engine__) = rb_to_uabi_engine(rb_next(&(engine__)->uabi_node))) -#define INTEL_INFO(dev_priv) (&(dev_priv)->__info) -#define RUNTIME_INFO(dev_priv) (&(dev_priv)->__runtime) -#define DRIVER_CAPS(dev_priv) (&(dev_priv)->caps) +#define INTEL_INFO(i915) (&(i915)->__info) +#define RUNTIME_INFO(i915) (&(i915)->__runtime) +#define DRIVER_CAPS(i915) (&(i915)->caps) -#define INTEL_DEVID(dev_priv) (RUNTIME_INFO(dev_priv)->device_id) +#define INTEL_DEVID(i915) (RUNTIME_INFO(i915)->device_id) #define IP_VER(ver, rel) ((ver) << 8 | (rel)) @@ -431,7 +431,7 @@ static inline struct intel_gt *to_gt(struct drm_i915_private *i915) #define IS_DISPLAY_VER(i915, from, until) \ (DISPLAY_VER(i915) >= (from) && DISPLAY_VER(i915) <= (until)) -#define INTEL_REVID(dev_priv) (to_pci_dev((dev_priv)->drm.dev)->revision) +#define INTEL_REVID(i915) (to_pci_dev((i915)->drm.dev)->revision) #define INTEL_DISPLAY_STEP(__i915) (RUNTIME_INFO(__i915)->step.display_step) #define INTEL_GRAPHICS_STEP(__i915) (RUNTIME_INFO(__i915)->step.graphics_step) @@ -516,135 +516,135 @@ IS_SUBPLATFORM(const struct drm_i915_private *i915, return ((mask << (msb - pb)) & (mask << (msb - s))) & BIT(msb); } -#define IS_MOBILE(dev_priv) (INTEL_INFO(dev_priv)->is_mobile) -#define IS_DGFX(dev_priv) (INTEL_INFO(dev_priv)->is_dgfx) +#define IS_MOBILE(i915) (INTEL_INFO(i915)->is_mobile) +#define IS_DGFX(i915) (INTEL_INFO(i915)->is_dgfx) -#define IS_I830(dev_priv) IS_PLATFORM(dev_priv, INTEL_I830) -#define IS_I845G(dev_priv) IS_PLATFORM(dev_priv, INTEL_I845G) -#define IS_I85X(dev_priv) IS_PLATFORM(dev_priv, INTEL_I85X) -#define IS_I865G(dev_priv) IS_PLATFORM(dev_priv, INTEL_I865G) -#define IS_I915G(dev_priv) IS_PLATFORM(dev_priv, INTEL_I915G) -#define IS_I915GM(dev_priv) IS_PLATFORM(dev_priv, INTEL_I915GM) -#define IS_I945G(dev_priv) IS_PLATFORM(dev_priv, INTEL_I945G) -#define IS_I945GM(dev_priv) IS_PLATFORM(dev_priv, INTEL_I945GM) -#define IS_I965G(dev_priv) IS_PLATFORM(dev_priv, INTEL_I965G) -#define IS_I965GM(dev_priv) IS_PLATFORM(dev_priv, INTEL_I965GM) -#define IS_G45(dev_priv) IS_PLATFORM(dev_priv, INTEL_G45) -#define IS_GM45(dev_priv) IS_PLATFORM(dev_priv, INTEL_GM45) -#define IS_G4X(dev_priv) (IS_G45(dev_priv) || IS_GM45(dev_priv)) -#define IS_PINEVIEW(dev_priv) IS_PLATFORM(dev_priv, INTEL_PINEVIEW) -#define IS_G33(dev_priv) IS_PLATFORM(dev_priv, INTEL_G33) -#define IS_IRONLAKE(dev_priv) IS_PLATFORM(dev_priv, INTEL_IRONLAKE) -#define IS_IRONLAKE_M(dev_priv) \ - (IS_PLATFORM(dev_priv, INTEL_IRONLAKE) && IS_MOBILE(dev_priv)) -#define IS_SANDYBRIDGE(dev_priv) IS_PLATFORM(dev_priv, INTEL_SANDYBRIDGE) -#define IS_IVYBRIDGE(dev_priv) IS_PLATFORM(dev_priv, INTEL_IVYBRIDGE) -#define IS_IVB_GT1(dev_priv) (IS_IVYBRIDGE(dev_priv) && \ - INTEL_INFO(dev_priv)->gt == 1) -#define IS_VALLEYVIEW(dev_priv) IS_PLATFORM(dev_priv, INTEL_VALLEYVIEW) -#define IS_CHERRYVIEW(dev_priv) IS_PLATFORM(dev_priv, INTEL_CHERRYVIEW) -#define IS_HASWELL(dev_priv) IS_PLATFORM(dev_priv, INTEL_HASWELL) -#define IS_BROADWELL(dev_priv) IS_PLATFORM(dev_priv, INTEL_BROADWELL) -#define IS_SKYLAKE(dev_priv) IS_PLATFORM(dev_priv, INTEL_SKYLAKE) -#define IS_BROXTON(dev_priv) IS_PLATFORM(dev_priv, INTEL_BROXTON) -#define IS_KABYLAKE(dev_priv) IS_PLATFORM(dev_priv, INTEL_KABYLAKE) -#define IS_GEMINILAKE(dev_priv) IS_PLATFORM(dev_priv, INTEL_GEMINILAKE) -#define IS_COFFEELAKE(dev_priv) IS_PLATFORM(dev_priv, INTEL_COFFEELAKE) -#define IS_COMETLAKE(dev_priv) IS_PLATFORM(dev_priv, INTEL_COMETLAKE) -#define IS_ICELAKE(dev_priv) IS_PLATFORM(dev_priv, INTEL_ICELAKE) -#define IS_JSL_EHL(dev_priv) (IS_PLATFORM(dev_priv, INTEL_JASPERLAKE) || \ - IS_PLATFORM(dev_priv, INTEL_ELKHARTLAKE)) -#define IS_TIGERLAKE(dev_priv) IS_PLATFORM(dev_priv, INTEL_TIGERLAKE) -#define IS_ROCKETLAKE(dev_priv) IS_PLATFORM(dev_priv, INTEL_ROCKETLAKE) -#define IS_DG1(dev_priv) IS_PLATFORM(dev_priv, INTEL_DG1) -#define IS_ALDERLAKE_S(dev_priv) IS_PLATFORM(dev_priv, INTEL_ALDERLAKE_S) -#define IS_ALDERLAKE_P(dev_priv) IS_PLATFORM(dev_priv, INTEL_ALDERLAKE_P) -#define IS_XEHPSDV(dev_priv) IS_PLATFORM(dev_priv, INTEL_XEHPSDV) -#define IS_DG2(dev_priv) IS_PLATFORM(dev_priv, INTEL_DG2) -#define IS_PONTEVECCHIO(dev_priv) IS_PLATFORM(dev_priv, INTEL_PONTEVECCHIO) -#define IS_METEORLAKE(dev_priv) IS_PLATFORM(dev_priv, INTEL_METEORLAKE) +#define IS_I830(i915) IS_PLATFORM(i915, INTEL_I830) +#define IS_I845G(i915) IS_PLATFORM(i915, INTEL_I845G) +#define IS_I85X(i915) IS_PLATFORM(i915, INTEL_I85X) +#define IS_I865G(i915) IS_PLATFORM(i915, INTEL_I865G) +#define IS_I915G(i915) IS_PLATFORM(i915, INTEL_I915G) +#define IS_I915GM(i915) IS_PLATFORM(i915, INTEL_I915GM) +#define IS_I945G(i915) IS_PLATFORM(i915, INTEL_I945G) +#define IS_I945GM(i915) IS_PLATFORM(i915, INTEL_I945GM) +#define IS_I965G(i915) IS_PLATFORM(i915, INTEL_I965G) +#define IS_I965GM(i915) IS_PLATFORM(i915, INTEL_I965GM) +#define IS_G45(i915) IS_PLATFORM(i915, INTEL_G45) +#define IS_GM45(i915) IS_PLATFORM(i915, INTEL_GM45) +#define IS_G4X(i915) (IS_G45(i915) || IS_GM45(i915)) +#define IS_PINEVIEW(i915) IS_PLATFORM(i915, INTEL_PINEVIEW) +#define IS_G33(i915) IS_PLATFORM(i915, INTEL_G33) +#define IS_IRONLAKE(i915) IS_PLATFORM(i915, INTEL_IRONLAKE) +#define IS_IRONLAKE_M(i915) \ + (IS_PLATFORM(i915, INTEL_IRONLAKE) && IS_MOBILE(i915)) +#define IS_SANDYBRIDGE(i915) IS_PLATFORM(i915, INTEL_SANDYBRIDGE) +#define IS_IVYBRIDGE(i915) IS_PLATFORM(i915, INTEL_IVYBRIDGE) +#define IS_IVB_GT1(i915) (IS_IVYBRIDGE(i915) && \ + INTEL_INFO(i915)->gt == 1) +#define IS_VALLEYVIEW(i915) IS_PLATFORM(i915, INTEL_VALLEYVIEW) +#define IS_CHERRYVIEW(i915) IS_PLATFORM(i915, INTEL_CHERRYVIEW) +#define IS_HASWELL(i915) IS_PLATFORM(i915, INTEL_HASWELL) +#define IS_BROADWELL(i915) IS_PLATFORM(i915, INTEL_BROADWELL) +#define IS_SKYLAKE(i915) IS_PLATFORM(i915, INTEL_SKYLAKE) +#define IS_BROXTON(i915) IS_PLATFORM(i915, INTEL_BROXTON) +#define IS_KABYLAKE(i915) IS_PLATFORM(i915, INTEL_KABYLAKE) +#define IS_GEMINILAKE(i915) IS_PLATFORM(i915, INTEL_GEMINILAKE) +#define IS_COFFEELAKE(i915) IS_PLATFORM(i915, INTEL_COFFEELAKE) +#define IS_COMETLAKE(i915) IS_PLATFORM(i915, INTEL_COMETLAKE) +#define IS_ICELAKE(i915) IS_PLATFORM(i915, INTEL_ICELAKE) +#define IS_JSL_EHL(i915) (IS_PLATFORM(i915, INTEL_JASPERLAKE) || \ + IS_PLATFORM(i915, INTEL_ELKHARTLAKE)) +#define IS_TIGERLAKE(i915) IS_PLATFORM(i915, INTEL_TIGERLAKE) +#define IS_ROCKETLAKE(i915) IS_PLATFORM(i915, INTEL_ROCKETLAKE) +#define IS_DG1(i915) IS_PLATFORM(i915, INTEL_DG1) +#define IS_ALDERLAKE_S(i915) IS_PLATFORM(i915, INTEL_ALDERLAKE_S) +#define IS_ALDERLAKE_P(i915) IS_PLATFORM(i915, INTEL_ALDERLAKE_P) +#define IS_XEHPSDV(i915) IS_PLATFORM(i915, INTEL_XEHPSDV) +#define IS_DG2(i915) IS_PLATFORM(i915, INTEL_DG2) +#define IS_PONTEVECCHIO(i915) IS_PLATFORM(i915, INTEL_PONTEVECCHIO) +#define IS_METEORLAKE(i915) IS_PLATFORM(i915, INTEL_METEORLAKE) -#define IS_METEORLAKE_M(dev_priv) \ - IS_SUBPLATFORM(dev_priv, INTEL_METEORLAKE, INTEL_SUBPLATFORM_M) -#define IS_METEORLAKE_P(dev_priv) \ - IS_SUBPLATFORM(dev_priv, INTEL_METEORLAKE, INTEL_SUBPLATFORM_P) -#define IS_DG2_G10(dev_priv) \ - IS_SUBPLATFORM(dev_priv, INTEL_DG2, INTEL_SUBPLATFORM_G10) -#define IS_DG2_G11(dev_priv) \ - IS_SUBPLATFORM(dev_priv, INTEL_DG2, INTEL_SUBPLATFORM_G11) -#define IS_DG2_G12(dev_priv) \ - IS_SUBPLATFORM(dev_priv, INTEL_DG2, INTEL_SUBPLATFORM_G12) -#define IS_ADLS_RPLS(dev_priv) \ - IS_SUBPLATFORM(dev_priv, INTEL_ALDERLAKE_S, INTEL_SUBPLATFORM_RPL) -#define IS_ADLP_N(dev_priv) \ - IS_SUBPLATFORM(dev_priv, INTEL_ALDERLAKE_P, INTEL_SUBPLATFORM_N) -#define IS_ADLP_RPLP(dev_priv) \ - IS_SUBPLATFORM(dev_priv, INTEL_ALDERLAKE_P, INTEL_SUBPLATFORM_RPL) -#define IS_ADLP_RPLU(dev_priv) \ - IS_SUBPLATFORM(dev_priv, INTEL_ALDERLAKE_P, INTEL_SUBPLATFORM_RPLU) -#define IS_HSW_EARLY_SDV(dev_priv) (IS_HASWELL(dev_priv) && \ - (INTEL_DEVID(dev_priv) & 0xFF00) == 0x0C00) -#define IS_BDW_ULT(dev_priv) \ - IS_SUBPLATFORM(dev_priv, INTEL_BROADWELL, INTEL_SUBPLATFORM_ULT) -#define IS_BDW_ULX(dev_priv) \ - IS_SUBPLATFORM(dev_priv, INTEL_BROADWELL, INTEL_SUBPLATFORM_ULX) -#define IS_BDW_GT3(dev_priv) (IS_BROADWELL(dev_priv) && \ - INTEL_INFO(dev_priv)->gt == 3) -#define IS_HSW_ULT(dev_priv) \ - IS_SUBPLATFORM(dev_priv, INTEL_HASWELL, INTEL_SUBPLATFORM_ULT) -#define IS_HSW_GT3(dev_priv) (IS_HASWELL(dev_priv) && \ - INTEL_INFO(dev_priv)->gt == 3) -#define IS_HSW_GT1(dev_priv) (IS_HASWELL(dev_priv) && \ - INTEL_INFO(dev_priv)->gt == 1) +#define IS_METEORLAKE_M(i915) \ + IS_SUBPLATFORM(i915, INTEL_METEORLAKE, INTEL_SUBPLATFORM_M) +#define IS_METEORLAKE_P(i915) \ + IS_SUBPLATFORM(i915, INTEL_METEORLAKE, INTEL_SUBPLATFORM_P) +#define IS_DG2_G10(i915) \ + IS_SUBPLATFORM(i915, INTEL_DG2, INTEL_SUBPLATFORM_G10) +#define IS_DG2_G11(i915) \ + IS_SUBPLATFORM(i915, INTEL_DG2, INTEL_SUBPLATFORM_G11) +#define IS_DG2_G12(i915) \ + IS_SUBPLATFORM(i915, INTEL_DG2, INTEL_SUBPLATFORM_G12) +#define IS_ADLS_RPLS(i915) \ + IS_SUBPLATFORM(i915, INTEL_ALDERLAKE_S, INTEL_SUBPLATFORM_RPL) +#define IS_ADLP_N(i915) \ + IS_SUBPLATFORM(i915, INTEL_ALDERLAKE_P, INTEL_SUBPLATFORM_N) +#define IS_ADLP_RPLP(i915) \ + IS_SUBPLATFORM(i915, INTEL_ALDERLAKE_P, INTEL_SUBPLATFORM_RPL) +#define IS_ADLP_RPLU(i915) \ + IS_SUBPLATFORM(i915, INTEL_ALDERLAKE_P, INTEL_SUBPLATFORM_RPLU) +#define IS_HSW_EARLY_SDV(i915) (IS_HASWELL(i915) && \ + (INTEL_DEVID(i915) & 0xFF00) == 0x0C00) +#define IS_BDW_ULT(i915) \ + IS_SUBPLATFORM(i915, INTEL_BROADWELL, INTEL_SUBPLATFORM_ULT) +#define IS_BDW_ULX(i915) \ + IS_SUBPLATFORM(i915, INTEL_BROADWELL, INTEL_SUBPLATFORM_ULX) +#define IS_BDW_GT3(i915) (IS_BROADWELL(i915) && \ + INTEL_INFO(i915)->gt == 3) +#define IS_HSW_ULT(i915) \ + IS_SUBPLATFORM(i915, INTEL_HASWELL, INTEL_SUBPLATFORM_ULT) +#define IS_HSW_GT3(i915) (IS_HASWELL(i915) && \ + INTEL_INFO(i915)->gt == 3) +#define IS_HSW_GT1(i915) (IS_HASWELL(i915) && \ + INTEL_INFO(i915)->gt == 1) /* ULX machines are also considered ULT. */ -#define IS_HSW_ULX(dev_priv) \ - IS_SUBPLATFORM(dev_priv, INTEL_HASWELL, INTEL_SUBPLATFORM_ULX) -#define IS_SKL_ULT(dev_priv) \ - IS_SUBPLATFORM(dev_priv, INTEL_SKYLAKE, INTEL_SUBPLATFORM_ULT) -#define IS_SKL_ULX(dev_priv) \ - IS_SUBPLATFORM(dev_priv, INTEL_SKYLAKE, INTEL_SUBPLATFORM_ULX) -#define IS_KBL_ULT(dev_priv) \ - IS_SUBPLATFORM(dev_priv, INTEL_KABYLAKE, INTEL_SUBPLATFORM_ULT) -#define IS_KBL_ULX(dev_priv) \ - IS_SUBPLATFORM(dev_priv, INTEL_KABYLAKE, INTEL_SUBPLATFORM_ULX) -#define IS_SKL_GT2(dev_priv) (IS_SKYLAKE(dev_priv) && \ - INTEL_INFO(dev_priv)->gt == 2) -#define IS_SKL_GT3(dev_priv) (IS_SKYLAKE(dev_priv) && \ - INTEL_INFO(dev_priv)->gt == 3) -#define IS_SKL_GT4(dev_priv) (IS_SKYLAKE(dev_priv) && \ - INTEL_INFO(dev_priv)->gt == 4) -#define IS_KBL_GT2(dev_priv) (IS_KABYLAKE(dev_priv) && \ - INTEL_INFO(dev_priv)->gt == 2) -#define IS_KBL_GT3(dev_priv) (IS_KABYLAKE(dev_priv) && \ - INTEL_INFO(dev_priv)->gt == 3) -#define IS_CFL_ULT(dev_priv) \ - IS_SUBPLATFORM(dev_priv, INTEL_COFFEELAKE, INTEL_SUBPLATFORM_ULT) -#define IS_CFL_ULX(dev_priv) \ - IS_SUBPLATFORM(dev_priv, INTEL_COFFEELAKE, INTEL_SUBPLATFORM_ULX) -#define IS_CFL_GT2(dev_priv) (IS_COFFEELAKE(dev_priv) && \ - INTEL_INFO(dev_priv)->gt == 2) -#define IS_CFL_GT3(dev_priv) (IS_COFFEELAKE(dev_priv) && \ - INTEL_INFO(dev_priv)->gt == 3) +#define IS_HSW_ULX(i915) \ + IS_SUBPLATFORM(i915, INTEL_HASWELL, INTEL_SUBPLATFORM_ULX) +#define IS_SKL_ULT(i915) \ + IS_SUBPLATFORM(i915, INTEL_SKYLAKE, INTEL_SUBPLATFORM_ULT) +#define IS_SKL_ULX(i915) \ + IS_SUBPLATFORM(i915, INTEL_SKYLAKE, INTEL_SUBPLATFORM_ULX) +#define IS_KBL_ULT(i915) \ + IS_SUBPLATFORM(i915, INTEL_KABYLAKE, INTEL_SUBPLATFORM_ULT) +#define IS_KBL_ULX(i915) \ + IS_SUBPLATFORM(i915, INTEL_KABYLAKE, INTEL_SUBPLATFORM_ULX) +#define IS_SKL_GT2(i915) (IS_SKYLAKE(i915) && \ + INTEL_INFO(i915)->gt == 2) +#define IS_SKL_GT3(i915) (IS_SKYLAKE(i915) && \ + INTEL_INFO(i915)->gt == 3) +#define IS_SKL_GT4(i915) (IS_SKYLAKE(i915) && \ + INTEL_INFO(i915)->gt == 4) +#define IS_KBL_GT2(i915) (IS_KABYLAKE(i915) && \ + INTEL_INFO(i915)->gt == 2) +#define IS_KBL_GT3(i915) (IS_KABYLAKE(i915) && \ + INTEL_INFO(i915)->gt == 3) +#define IS_CFL_ULT(i915) \ + IS_SUBPLATFORM(i915, INTEL_COFFEELAKE, INTEL_SUBPLATFORM_ULT) +#define IS_CFL_ULX(i915) \ + IS_SUBPLATFORM(i915, INTEL_COFFEELAKE, INTEL_SUBPLATFORM_ULX) +#define IS_CFL_GT2(i915) (IS_COFFEELAKE(i915) && \ + INTEL_INFO(i915)->gt == 2) +#define IS_CFL_GT3(i915) (IS_COFFEELAKE(i915) && \ + INTEL_INFO(i915)->gt == 3) -#define IS_CML_ULT(dev_priv) \ - IS_SUBPLATFORM(dev_priv, INTEL_COMETLAKE, INTEL_SUBPLATFORM_ULT) -#define IS_CML_ULX(dev_priv) \ - IS_SUBPLATFORM(dev_priv, INTEL_COMETLAKE, INTEL_SUBPLATFORM_ULX) -#define IS_CML_GT2(dev_priv) (IS_COMETLAKE(dev_priv) && \ - INTEL_INFO(dev_priv)->gt == 2) +#define IS_CML_ULT(i915) \ + IS_SUBPLATFORM(i915, INTEL_COMETLAKE, INTEL_SUBPLATFORM_ULT) +#define IS_CML_ULX(i915) \ + IS_SUBPLATFORM(i915, INTEL_COMETLAKE, INTEL_SUBPLATFORM_ULX) +#define IS_CML_GT2(i915) (IS_COMETLAKE(i915) && \ + INTEL_INFO(i915)->gt == 2) -#define IS_ICL_WITH_PORT_F(dev_priv) \ - IS_SUBPLATFORM(dev_priv, INTEL_ICELAKE, INTEL_SUBPLATFORM_PORTF) +#define IS_ICL_WITH_PORT_F(i915) \ + IS_SUBPLATFORM(i915, INTEL_ICELAKE, INTEL_SUBPLATFORM_PORTF) -#define IS_TGL_UY(dev_priv) \ - IS_SUBPLATFORM(dev_priv, INTEL_TIGERLAKE, INTEL_SUBPLATFORM_UY) +#define IS_TGL_UY(i915) \ + IS_SUBPLATFORM(i915, INTEL_TIGERLAKE, INTEL_SUBPLATFORM_UY) #define IS_SKL_GRAPHICS_STEP(p, since, until) (IS_SKYLAKE(p) && IS_GRAPHICS_STEP(p, since, until)) -#define IS_KBL_GRAPHICS_STEP(dev_priv, since, until) \ - (IS_KABYLAKE(dev_priv) && IS_GRAPHICS_STEP(dev_priv, since, until)) -#define IS_KBL_DISPLAY_STEP(dev_priv, since, until) \ - (IS_KABYLAKE(dev_priv) && IS_DISPLAY_STEP(dev_priv, since, until)) +#define IS_KBL_GRAPHICS_STEP(i915, since, until) \ + (IS_KABYLAKE(i915) && IS_GRAPHICS_STEP(i915, since, until)) +#define IS_KBL_DISPLAY_STEP(i915, since, until) \ + (IS_KABYLAKE(i915) && IS_DISPLAY_STEP(i915, since, until)) #define IS_JSL_EHL_GRAPHICS_STEP(p, since, until) \ (IS_JSL_EHL(p) && IS_GRAPHICS_STEP(p, since, until)) @@ -720,9 +720,9 @@ IS_SUBPLATFORM(const struct drm_i915_private *i915, (IS_PONTEVECCHIO(__i915) && \ IS_GRAPHICS_STEP(__i915, since, until)) -#define IS_LP(dev_priv) (INTEL_INFO(dev_priv)->is_lp) -#define IS_GEN9_LP(dev_priv) (GRAPHICS_VER(dev_priv) == 9 && IS_LP(dev_priv)) -#define IS_GEN9_BC(dev_priv) (GRAPHICS_VER(dev_priv) == 9 && !IS_LP(dev_priv)) +#define IS_LP(i915) (INTEL_INFO(i915)->is_lp) +#define IS_GEN9_LP(i915) (GRAPHICS_VER(i915) == 9 && IS_LP(i915)) +#define IS_GEN9_BC(i915) (GRAPHICS_VER(i915) == 9 && !IS_LP(i915)) #define __HAS_ENGINE(engine_mask, id) ((engine_mask) & BIT(id)) #define HAS_ENGINE(gt, id) __HAS_ENGINE((gt)->info.engine_mask, id) @@ -747,180 +747,180 @@ IS_SUBPLATFORM(const struct drm_i915_private *i915, #define CCS_MASK(gt) \ ENGINE_INSTANCES_MASK(gt, CCS0, I915_MAX_CCS) -#define HAS_MEDIA_RATIO_MODE(dev_priv) (INTEL_INFO(dev_priv)->has_media_ratio_mode) +#define HAS_MEDIA_RATIO_MODE(i915) (INTEL_INFO(i915)->has_media_ratio_mode) /* * The Gen7 cmdparser copies the scanned buffer to the ggtt for execution * All later gens can run the final buffer from the ppgtt */ -#define CMDPARSER_USES_GGTT(dev_priv) (GRAPHICS_VER(dev_priv) == 7) +#define CMDPARSER_USES_GGTT(i915) (GRAPHICS_VER(i915) == 7) -#define HAS_LLC(dev_priv) (INTEL_INFO(dev_priv)->has_llc) -#define HAS_4TILE(dev_priv) (INTEL_INFO(dev_priv)->has_4tile) -#define HAS_SNOOP(dev_priv) (INTEL_INFO(dev_priv)->has_snoop) -#define HAS_EDRAM(dev_priv) ((dev_priv)->edram_size_mb) -#define HAS_SECURE_BATCHES(dev_priv) (GRAPHICS_VER(dev_priv) < 6) -#define HAS_WT(dev_priv) HAS_EDRAM(dev_priv) +#define HAS_LLC(i915) (INTEL_INFO(i915)->has_llc) +#define HAS_4TILE(i915) (INTEL_INFO(i915)->has_4tile) +#define HAS_SNOOP(i915) (INTEL_INFO(i915)->has_snoop) +#define HAS_EDRAM(i915) ((i915)->edram_size_mb) +#define HAS_SECURE_BATCHES(i915) (GRAPHICS_VER(i915) < 6) +#define HAS_WT(i915) HAS_EDRAM(i915) -#define HWS_NEEDS_PHYSICAL(dev_priv) (INTEL_INFO(dev_priv)->hws_needs_physical) +#define HWS_NEEDS_PHYSICAL(i915) (INTEL_INFO(i915)->hws_needs_physical) -#define HAS_LOGICAL_RING_CONTEXTS(dev_priv) \ - (INTEL_INFO(dev_priv)->has_logical_ring_contexts) -#define HAS_LOGICAL_RING_ELSQ(dev_priv) \ - (INTEL_INFO(dev_priv)->has_logical_ring_elsq) +#define HAS_LOGICAL_RING_CONTEXTS(i915) \ + (INTEL_INFO(i915)->has_logical_ring_contexts) +#define HAS_LOGICAL_RING_ELSQ(i915) \ + (INTEL_INFO(i915)->has_logical_ring_elsq) -#define HAS_EXECLISTS(dev_priv) HAS_LOGICAL_RING_CONTEXTS(dev_priv) +#define HAS_EXECLISTS(i915) HAS_LOGICAL_RING_CONTEXTS(i915) -#define INTEL_PPGTT(dev_priv) (RUNTIME_INFO(dev_priv)->ppgtt_type) -#define HAS_PPGTT(dev_priv) \ - (INTEL_PPGTT(dev_priv) != INTEL_PPGTT_NONE) -#define HAS_FULL_PPGTT(dev_priv) \ - (INTEL_PPGTT(dev_priv) >= INTEL_PPGTT_FULL) +#define INTEL_PPGTT(i915) (RUNTIME_INFO(i915)->ppgtt_type) +#define HAS_PPGTT(i915) \ + (INTEL_PPGTT(i915) != INTEL_PPGTT_NONE) +#define HAS_FULL_PPGTT(i915) \ + (INTEL_PPGTT(i915) >= INTEL_PPGTT_FULL) -#define HAS_PAGE_SIZES(dev_priv, sizes) ({ \ +#define HAS_PAGE_SIZES(i915, sizes) ({ \ GEM_BUG_ON((sizes) == 0); \ - ((sizes) & ~RUNTIME_INFO(dev_priv)->page_sizes) == 0; \ + ((sizes) & ~RUNTIME_INFO(i915)->page_sizes) == 0; \ }) -#define HAS_OVERLAY(dev_priv) (INTEL_INFO(dev_priv)->display.has_overlay) -#define OVERLAY_NEEDS_PHYSICAL(dev_priv) \ - (INTEL_INFO(dev_priv)->display.overlay_needs_physical) +#define HAS_OVERLAY(i915) (INTEL_INFO(i915)->display.has_overlay) +#define OVERLAY_NEEDS_PHYSICAL(i915) \ + (INTEL_INFO(i915)->display.overlay_needs_physical) /* Early gen2 have a totally busted CS tlb and require pinned batches. */ -#define HAS_BROKEN_CS_TLB(dev_priv) (IS_I830(dev_priv) || IS_I845G(dev_priv)) +#define HAS_BROKEN_CS_TLB(i915) (IS_I830(i915) || IS_I845G(i915)) -#define NEEDS_RC6_CTX_CORRUPTION_WA(dev_priv) \ - (IS_BROADWELL(dev_priv) || GRAPHICS_VER(dev_priv) == 9) +#define NEEDS_RC6_CTX_CORRUPTION_WA(i915) \ + (IS_BROADWELL(i915) || GRAPHICS_VER(i915) == 9) /* WaRsDisableCoarsePowerGating:skl,cnl */ -#define NEEDS_WaRsDisableCoarsePowerGating(dev_priv) \ - (IS_SKL_GT3(dev_priv) || IS_SKL_GT4(dev_priv)) +#define NEEDS_WaRsDisableCoarsePowerGating(i915) \ + (IS_SKL_GT3(i915) || IS_SKL_GT4(i915)) -#define HAS_GMBUS_IRQ(dev_priv) (DISPLAY_VER(dev_priv) >= 4) -#define HAS_GMBUS_BURST_READ(dev_priv) (DISPLAY_VER(dev_priv) >= 11 || \ - IS_GEMINILAKE(dev_priv) || \ - IS_KABYLAKE(dev_priv)) +#define HAS_GMBUS_IRQ(i915) (DISPLAY_VER(i915) >= 4) +#define HAS_GMBUS_BURST_READ(i915) (DISPLAY_VER(i915) >= 11 || \ + IS_GEMINILAKE(i915) || \ + IS_KABYLAKE(i915)) /* With the 945 and later, Y tiling got adjusted so that it was 32 128-byte * rows, which changed the alignment requirements and fence programming. */ -#define HAS_128_BYTE_Y_TILING(dev_priv) (GRAPHICS_VER(dev_priv) != 2 && \ - !(IS_I915G(dev_priv) || IS_I915GM(dev_priv))) -#define SUPPORTS_TV(dev_priv) (INTEL_INFO(dev_priv)->display.supports_tv) -#define I915_HAS_HOTPLUG(dev_priv) (INTEL_INFO(dev_priv)->display.has_hotplug) +#define HAS_128_BYTE_Y_TILING(i915) (GRAPHICS_VER(i915) != 2 && \ + !(IS_I915G(i915) || IS_I915GM(i915))) +#define SUPPORTS_TV(i915) (INTEL_INFO(i915)->display.supports_tv) +#define I915_HAS_HOTPLUG(i915) (INTEL_INFO(i915)->display.has_hotplug) -#define HAS_FW_BLC(dev_priv) (DISPLAY_VER(dev_priv) > 2) -#define HAS_FBC(dev_priv) (RUNTIME_INFO(dev_priv)->fbc_mask != 0) -#define HAS_CUR_FBC(dev_priv) (!HAS_GMCH(dev_priv) && DISPLAY_VER(dev_priv) >= 7) +#define HAS_FW_BLC(i915) (DISPLAY_VER(i915) > 2) +#define HAS_FBC(i915) (RUNTIME_INFO(i915)->fbc_mask != 0) +#define HAS_CUR_FBC(i915) (!HAS_GMCH(i915) && DISPLAY_VER(i915) >= 7) -#define HAS_DPT(dev_priv) (DISPLAY_VER(dev_priv) >= 13) +#define HAS_DPT(i915) (DISPLAY_VER(i915) >= 13) -#define HAS_IPS(dev_priv) (IS_HSW_ULT(dev_priv) || IS_BROADWELL(dev_priv)) +#define HAS_IPS(i915) (IS_HSW_ULT(i915) || IS_BROADWELL(i915)) -#define HAS_DP_MST(dev_priv) (INTEL_INFO(dev_priv)->display.has_dp_mst) -#define HAS_DP20(dev_priv) (IS_DG2(dev_priv) || DISPLAY_VER(dev_priv) >= 14) +#define HAS_DP_MST(i915) (INTEL_INFO(i915)->display.has_dp_mst) +#define HAS_DP20(i915) (IS_DG2(i915) || DISPLAY_VER(i915) >= 14) -#define HAS_DOUBLE_BUFFERED_M_N(dev_priv) (DISPLAY_VER(dev_priv) >= 9 || IS_BROADWELL(dev_priv)) +#define HAS_DOUBLE_BUFFERED_M_N(i915) (DISPLAY_VER(i915) >= 9 || IS_BROADWELL(i915)) -#define HAS_CDCLK_CRAWL(dev_priv) (INTEL_INFO(dev_priv)->display.has_cdclk_crawl) -#define HAS_CDCLK_SQUASH(dev_priv) (INTEL_INFO(dev_priv)->display.has_cdclk_squash) -#define HAS_DDI(dev_priv) (INTEL_INFO(dev_priv)->display.has_ddi) -#define HAS_FPGA_DBG_UNCLAIMED(dev_priv) (INTEL_INFO(dev_priv)->display.has_fpga_dbg) -#define HAS_PSR(dev_priv) (INTEL_INFO(dev_priv)->display.has_psr) -#define HAS_PSR_HW_TRACKING(dev_priv) \ - (INTEL_INFO(dev_priv)->display.has_psr_hw_tracking) -#define HAS_PSR2_SEL_FETCH(dev_priv) (DISPLAY_VER(dev_priv) >= 12) -#define HAS_TRANSCODER(dev_priv, trans) ((RUNTIME_INFO(dev_priv)->cpu_transcoder_mask & BIT(trans)) != 0) +#define HAS_CDCLK_CRAWL(i915) (INTEL_INFO(i915)->display.has_cdclk_crawl) +#define HAS_CDCLK_SQUASH(i915) (INTEL_INFO(i915)->display.has_cdclk_squash) +#define HAS_DDI(i915) (INTEL_INFO(i915)->display.has_ddi) +#define HAS_FPGA_DBG_UNCLAIMED(i915) (INTEL_INFO(i915)->display.has_fpga_dbg) +#define HAS_PSR(i915) (INTEL_INFO(i915)->display.has_psr) +#define HAS_PSR_HW_TRACKING(i915) \ + (INTEL_INFO(i915)->display.has_psr_hw_tracking) +#define HAS_PSR2_SEL_FETCH(i915) (DISPLAY_VER(i915) >= 12) +#define HAS_TRANSCODER(i915, trans) ((RUNTIME_INFO(i915)->cpu_transcoder_mask & BIT(trans)) != 0) -#define HAS_RC6(dev_priv) (INTEL_INFO(dev_priv)->has_rc6) -#define HAS_RC6p(dev_priv) (INTEL_INFO(dev_priv)->has_rc6p) -#define HAS_RC6pp(dev_priv) (false) /* HW was never validated */ +#define HAS_RC6(i915) (INTEL_INFO(i915)->has_rc6) +#define HAS_RC6p(i915) (INTEL_INFO(i915)->has_rc6p) +#define HAS_RC6pp(i915) (false) /* HW was never validated */ -#define HAS_RPS(dev_priv) (INTEL_INFO(dev_priv)->has_rps) +#define HAS_RPS(i915) (INTEL_INFO(i915)->has_rps) -#define HAS_DMC(dev_priv) (RUNTIME_INFO(dev_priv)->has_dmc) -#define HAS_DSB(dev_priv) (INTEL_INFO(dev_priv)->display.has_dsb) +#define HAS_DMC(i915) (RUNTIME_INFO(i915)->has_dmc) +#define HAS_DSB(i915) (INTEL_INFO(i915)->display.has_dsb) #define HAS_DSC(__i915) (RUNTIME_INFO(__i915)->has_dsc) #define HAS_HW_SAGV_WM(i915) (DISPLAY_VER(i915) >= 13 && !IS_DGFX(i915)) -#define HAS_HECI_PXP(dev_priv) \ - (INTEL_INFO(dev_priv)->has_heci_pxp) +#define HAS_HECI_PXP(i915) \ + (INTEL_INFO(i915)->has_heci_pxp) -#define HAS_HECI_GSCFI(dev_priv) \ - (INTEL_INFO(dev_priv)->has_heci_gscfi) +#define HAS_HECI_GSCFI(i915) \ + (INTEL_INFO(i915)->has_heci_gscfi) -#define HAS_HECI_GSC(dev_priv) (HAS_HECI_PXP(dev_priv) || HAS_HECI_GSCFI(dev_priv)) +#define HAS_HECI_GSC(i915) (HAS_HECI_PXP(i915) || HAS_HECI_GSCFI(i915)) #define HAS_MSO(i915) (DISPLAY_VER(i915) >= 12) -#define HAS_RUNTIME_PM(dev_priv) (INTEL_INFO(dev_priv)->has_runtime_pm) -#define HAS_64BIT_RELOC(dev_priv) (INTEL_INFO(dev_priv)->has_64bit_reloc) +#define HAS_RUNTIME_PM(i915) (INTEL_INFO(i915)->has_runtime_pm) +#define HAS_64BIT_RELOC(i915) (INTEL_INFO(i915)->has_64bit_reloc) -#define HAS_OA_BPC_REPORTING(dev_priv) \ - (INTEL_INFO(dev_priv)->has_oa_bpc_reporting) -#define HAS_OA_SLICE_CONTRIB_LIMITS(dev_priv) \ - (INTEL_INFO(dev_priv)->has_oa_slice_contrib_limits) -#define HAS_OAM(dev_priv) \ - (INTEL_INFO(dev_priv)->has_oam) +#define HAS_OA_BPC_REPORTING(i915) \ + (INTEL_INFO(i915)->has_oa_bpc_reporting) +#define HAS_OA_SLICE_CONTRIB_LIMITS(i915) \ + (INTEL_INFO(i915)->has_oa_slice_contrib_limits) +#define HAS_OAM(i915) \ + (INTEL_INFO(i915)->has_oam) /* * Set this flag, when platform requires 64K GTT page sizes or larger for * device local memory access. */ -#define HAS_64K_PAGES(dev_priv) (INTEL_INFO(dev_priv)->has_64k_pages) +#define HAS_64K_PAGES(i915) (INTEL_INFO(i915)->has_64k_pages) -#define HAS_IPC(dev_priv) (INTEL_INFO(dev_priv)->display.has_ipc) -#define HAS_SAGV(dev_priv) (DISPLAY_VER(dev_priv) >= 9 && !IS_LP(dev_priv)) +#define HAS_IPC(i915) (INTEL_INFO(i915)->display.has_ipc) +#define HAS_SAGV(i915) (DISPLAY_VER(i915) >= 9 && !IS_LP(i915)) #define HAS_REGION(i915, i) (RUNTIME_INFO(i915)->memory_regions & (i)) #define HAS_LMEM(i915) HAS_REGION(i915, REGION_LMEM) -#define HAS_EXTRA_GT_LIST(dev_priv) (INTEL_INFO(dev_priv)->extra_gt_list) +#define HAS_EXTRA_GT_LIST(i915) (INTEL_INFO(i915)->extra_gt_list) /* * Platform has the dedicated compression control state for each lmem surfaces * stored in lmem to support the 3D and media compression formats. */ -#define HAS_FLAT_CCS(dev_priv) (INTEL_INFO(dev_priv)->has_flat_ccs) +#define HAS_FLAT_CCS(i915) (INTEL_INFO(i915)->has_flat_ccs) -#define HAS_GT_UC(dev_priv) (INTEL_INFO(dev_priv)->has_gt_uc) +#define HAS_GT_UC(i915) (INTEL_INFO(i915)->has_gt_uc) -#define HAS_POOLED_EU(dev_priv) (RUNTIME_INFO(dev_priv)->has_pooled_eu) +#define HAS_POOLED_EU(i915) (RUNTIME_INFO(i915)->has_pooled_eu) -#define HAS_GLOBAL_MOCS_REGISTERS(dev_priv) (INTEL_INFO(dev_priv)->has_global_mocs) +#define HAS_GLOBAL_MOCS_REGISTERS(i915) (INTEL_INFO(i915)->has_global_mocs) -#define HAS_GMCH(dev_priv) (INTEL_INFO(dev_priv)->display.has_gmch) +#define HAS_GMCH(i915) (INTEL_INFO(i915)->display.has_gmch) #define HAS_GMD_ID(i915) (INTEL_INFO(i915)->has_gmd_id) -#define HAS_LSPCON(dev_priv) (IS_DISPLAY_VER(dev_priv, 9, 10)) +#define HAS_LSPCON(i915) (IS_DISPLAY_VER(i915, 9, 10)) #define HAS_L3_CCS_READ(i915) (INTEL_INFO(i915)->has_l3_ccs_read) /* DPF == dynamic parity feature */ -#define HAS_L3_DPF(dev_priv) (INTEL_INFO(dev_priv)->has_l3_dpf) -#define NUM_L3_SLICES(dev_priv) (IS_HSW_GT3(dev_priv) ? \ - 2 : HAS_L3_DPF(dev_priv)) +#define HAS_L3_DPF(i915) (INTEL_INFO(i915)->has_l3_dpf) +#define NUM_L3_SLICES(i915) (IS_HSW_GT3(i915) ? \ + 2 : HAS_L3_DPF(i915)) -#define INTEL_NUM_PIPES(dev_priv) (hweight8(RUNTIME_INFO(dev_priv)->pipe_mask)) +#define INTEL_NUM_PIPES(i915) (hweight8(RUNTIME_INFO(i915)->pipe_mask)) -#define HAS_DISPLAY(dev_priv) (RUNTIME_INFO(dev_priv)->pipe_mask != 0) +#define HAS_DISPLAY(i915) (RUNTIME_INFO(i915)->pipe_mask != 0) #define HAS_VRR(i915) (DISPLAY_VER(i915) >= 11) #define HAS_ASYNC_FLIPS(i915) (DISPLAY_VER(i915) >= 5) /* Only valid when HAS_DISPLAY() is true */ -#define INTEL_DISPLAY_ENABLED(dev_priv) \ - (drm_WARN_ON(&(dev_priv)->drm, !HAS_DISPLAY(dev_priv)), \ - !(dev_priv)->params.disable_display && \ - !intel_opregion_headless_sku(dev_priv)) +#define INTEL_DISPLAY_ENABLED(i915) \ + (drm_WARN_ON(&(i915)->drm, !HAS_DISPLAY(i915)), \ + !(i915)->params.disable_display && \ + !intel_opregion_headless_sku(i915)) -#define HAS_GUC_DEPRIVILEGE(dev_priv) \ - (INTEL_INFO(dev_priv)->has_guc_deprivilege) +#define HAS_GUC_DEPRIVILEGE(i915) \ + (INTEL_INFO(i915)->has_guc_deprivilege) -#define HAS_D12_PLANE_MINIMIZATION(dev_priv) (IS_ROCKETLAKE(dev_priv) || \ - IS_ALDERLAKE_S(dev_priv)) +#define HAS_D12_PLANE_MINIMIZATION(i915) (IS_ROCKETLAKE(i915) || \ + IS_ALDERLAKE_S(i915)) #define HAS_MBUS_JOINING(i915) (IS_ALDERLAKE_P(i915) || DISPLAY_VER(i915) >= 14) From 341ad0e8e254267704e0b87e35ad23aba5c02359 Mon Sep 17 00:00:00 2001 From: Fei Yang Date: Mon, 24 Apr 2023 11:29:01 -0700 Subject: [PATCH 021/108] drm/i915/mtl: Add PTE encode function PTE encode functions are platform dependent. This patch implements PTE functions for MTL, and ensures the correct PTE encode function is used by calling pte_encode function pointer instead of the hardcoded gen8 version of PTE encode. Fixes: b76c0deef627 ("drm/i915/mtl: Define MOCS and PAT tables for MTL") Signed-off-by: Fei Yang Reviewed-by: Andrzej Hajda Reviewed-by: Andi Shyti Acked-by: Nirmoy Das Signed-off-by: Nirmoy Das Link: https://patchwork.freedesktop.org/patch/msgid/20230424182902.3663500-2-fei.yang@intel.com --- drivers/gpu/drm/i915/display/intel_dpt.c | 2 +- drivers/gpu/drm/i915/gt/gen8_ppgtt.c | 45 ++++++++++++++++++++---- drivers/gpu/drm/i915/gt/intel_ggtt.c | 36 +++++++++++++++++-- drivers/gpu/drm/i915/gt/intel_gtt.h | 10 +++++- 4 files changed, 81 insertions(+), 12 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_dpt.c b/drivers/gpu/drm/i915/display/intel_dpt.c index b8027392144d..c5eacfdba1a5 100644 --- a/drivers/gpu/drm/i915/display/intel_dpt.c +++ b/drivers/gpu/drm/i915/display/intel_dpt.c @@ -300,7 +300,7 @@ intel_dpt_create(struct intel_framebuffer *fb) vm->vma_ops.bind_vma = dpt_bind_vma; vm->vma_ops.unbind_vma = dpt_unbind_vma; - vm->pte_encode = gen8_ggtt_pte_encode; + vm->pte_encode = vm->gt->ggtt->vm.pte_encode; dpt->obj = dpt_obj; dpt->obj->is_dpt = true; diff --git a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c index 4daaa6f55668..4c9a2f2db908 100644 --- a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c +++ b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c @@ -55,6 +55,34 @@ static u64 gen8_pte_encode(dma_addr_t addr, return pte; } +static u64 mtl_pte_encode(dma_addr_t addr, + enum i915_cache_level level, + u32 flags) +{ + gen8_pte_t pte = addr | GEN8_PAGE_PRESENT | GEN8_PAGE_RW; + + if (unlikely(flags & PTE_READ_ONLY)) + pte &= ~GEN8_PAGE_RW; + + if (flags & PTE_LM) + pte |= GEN12_PPGTT_PTE_LM; + + switch (level) { + case I915_CACHE_NONE: + pte |= GEN12_PPGTT_PTE_PAT1; + break; + case I915_CACHE_LLC: + case I915_CACHE_L3_LLC: + pte |= GEN12_PPGTT_PTE_PAT0 | GEN12_PPGTT_PTE_PAT1; + break; + case I915_CACHE_WT: + pte |= GEN12_PPGTT_PTE_PAT0; + break; + } + + return pte; +} + static void gen8_ppgtt_notify_vgt(struct i915_ppgtt *ppgtt, bool create) { struct drm_i915_private *i915 = ppgtt->vm.i915; @@ -427,7 +455,7 @@ gen8_ppgtt_insert_pte(struct i915_ppgtt *ppgtt, u32 flags) { struct i915_page_directory *pd; - const gen8_pte_t pte_encode = gen8_pte_encode(0, cache_level, flags); + const gen8_pte_t pte_encode = ppgtt->vm.pte_encode(0, cache_level, flags); gen8_pte_t *vaddr; pd = i915_pd_entry(pdp, gen8_pd_index(idx, 2)); @@ -580,7 +608,7 @@ static void gen8_ppgtt_insert_huge(struct i915_address_space *vm, enum i915_cache_level cache_level, u32 flags) { - const gen8_pte_t pte_encode = gen8_pte_encode(0, cache_level, flags); + const gen8_pte_t pte_encode = vm->pte_encode(0, cache_level, flags); unsigned int rem = sg_dma_len(iter->sg); u64 start = vma_res->start; @@ -743,7 +771,7 @@ static void gen8_ppgtt_insert_entry(struct i915_address_space *vm, GEM_BUG_ON(pt->is_compact); vaddr = px_vaddr(pt); - vaddr[gen8_pd_index(idx, 0)] = gen8_pte_encode(addr, level, flags); + vaddr[gen8_pd_index(idx, 0)] = vm->pte_encode(addr, level, flags); drm_clflush_virt_range(&vaddr[gen8_pd_index(idx, 0)], sizeof(*vaddr)); } @@ -773,7 +801,7 @@ static void __xehpsdv_ppgtt_insert_entry_lm(struct i915_address_space *vm, } vaddr = px_vaddr(pt); - vaddr[gen8_pd_index(idx, 0) / 16] = gen8_pte_encode(addr, level, flags); + vaddr[gen8_pd_index(idx, 0) / 16] = vm->pte_encode(addr, level, flags); } static void xehpsdv_ppgtt_insert_entry(struct i915_address_space *vm, @@ -820,8 +848,8 @@ static int gen8_init_scratch(struct i915_address_space *vm) pte_flags |= PTE_LM; vm->scratch[0]->encode = - gen8_pte_encode(px_dma(vm->scratch[0]), - I915_CACHE_NONE, pte_flags); + vm->pte_encode(px_dma(vm->scratch[0]), + I915_CACHE_NONE, pte_flags); for (i = 1; i <= vm->top; i++) { struct drm_i915_gem_object *obj; @@ -963,7 +991,10 @@ struct i915_ppgtt *gen8_ppgtt_create(struct intel_gt *gt, */ ppgtt->vm.alloc_scratch_dma = alloc_pt_dma; - ppgtt->vm.pte_encode = gen8_pte_encode; + if (GRAPHICS_VER_FULL(gt->i915) >= IP_VER(12, 70)) + ppgtt->vm.pte_encode = mtl_pte_encode; + else + ppgtt->vm.pte_encode = gen8_pte_encode; ppgtt->vm.bind_async_flags = I915_VMA_LOCAL_BIND; ppgtt->vm.insert_entries = gen8_ppgtt_insert; diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt.c b/drivers/gpu/drm/i915/gt/intel_ggtt.c index 3c7f1ed92f5b..20915edc8bd9 100644 --- a/drivers/gpu/drm/i915/gt/intel_ggtt.c +++ b/drivers/gpu/drm/i915/gt/intel_ggtt.c @@ -220,6 +220,33 @@ static void guc_ggtt_invalidate(struct i915_ggtt *ggtt) } } +static u64 mtl_ggtt_pte_encode(dma_addr_t addr, + enum i915_cache_level level, + u32 flags) +{ + gen8_pte_t pte = addr | GEN8_PAGE_PRESENT; + + WARN_ON_ONCE(addr & ~GEN12_GGTT_PTE_ADDR_MASK); + + if (flags & PTE_LM) + pte |= GEN12_GGTT_PTE_LM; + + switch (level) { + case I915_CACHE_NONE: + pte |= MTL_GGTT_PTE_PAT1; + break; + case I915_CACHE_LLC: + case I915_CACHE_L3_LLC: + pte |= MTL_GGTT_PTE_PAT0 | MTL_GGTT_PTE_PAT1; + break; + case I915_CACHE_WT: + pte |= MTL_GGTT_PTE_PAT0; + break; + } + + return pte; +} + u64 gen8_ggtt_pte_encode(dma_addr_t addr, enum i915_cache_level level, u32 flags) @@ -247,7 +274,7 @@ static void gen8_ggtt_insert_page(struct i915_address_space *vm, gen8_pte_t __iomem *pte = (gen8_pte_t __iomem *)ggtt->gsm + offset / I915_GTT_PAGE_SIZE; - gen8_set_pte(pte, gen8_ggtt_pte_encode(addr, level, flags)); + gen8_set_pte(pte, ggtt->vm.pte_encode(addr, level, flags)); ggtt->invalidate(ggtt); } @@ -257,8 +284,8 @@ static void gen8_ggtt_insert_entries(struct i915_address_space *vm, enum i915_cache_level level, u32 flags) { - const gen8_pte_t pte_encode = gen8_ggtt_pte_encode(0, level, flags); struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); + const gen8_pte_t pte_encode = ggtt->vm.pte_encode(0, level, flags); gen8_pte_t __iomem *gte; gen8_pte_t __iomem *end; struct sgt_iter iter; @@ -981,7 +1008,10 @@ static int gen8_gmch_probe(struct i915_ggtt *ggtt) ggtt->vm.vma_ops.bind_vma = intel_ggtt_bind_vma; ggtt->vm.vma_ops.unbind_vma = intel_ggtt_unbind_vma; - ggtt->vm.pte_encode = gen8_ggtt_pte_encode; + if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 70)) + ggtt->vm.pte_encode = mtl_ggtt_pte_encode; + else + ggtt->vm.pte_encode = gen8_ggtt_pte_encode; return ggtt_probe_common(ggtt, size); } diff --git a/drivers/gpu/drm/i915/gt/intel_gtt.h b/drivers/gpu/drm/i915/gt/intel_gtt.h index ea17849e7a5c..1910683f03b4 100644 --- a/drivers/gpu/drm/i915/gt/intel_gtt.h +++ b/drivers/gpu/drm/i915/gt/intel_gtt.h @@ -88,9 +88,17 @@ typedef u64 gen8_pte_t; #define BYT_PTE_SNOOPED_BY_CPU_CACHES REG_BIT(2) #define BYT_PTE_WRITEABLE REG_BIT(1) +#define MTL_PPGTT_PTE_PAT3 BIT_ULL(62) #define GEN12_PPGTT_PTE_LM BIT_ULL(11) +#define GEN12_PPGTT_PTE_PAT2 BIT_ULL(7) +#define GEN12_PPGTT_PTE_PAT1 BIT_ULL(4) +#define GEN12_PPGTT_PTE_PAT0 BIT_ULL(3) -#define GEN12_GGTT_PTE_LM BIT_ULL(1) +#define GEN12_GGTT_PTE_LM BIT_ULL(1) +#define MTL_GGTT_PTE_PAT0 BIT_ULL(52) +#define MTL_GGTT_PTE_PAT1 BIT_ULL(53) +#define GEN12_GGTT_PTE_ADDR_MASK GENMASK_ULL(45, 12) +#define MTL_GGTT_PTE_PAT_MASK GENMASK_ULL(53, 52) #define GEN12_PDE_64K BIT(6) #define GEN12_PTE_PS64 BIT(8) From a161b6dba6e0e8132921a6b948d311cdf67f3476 Mon Sep 17 00:00:00 2001 From: Fei Yang Date: Mon, 24 Apr 2023 11:29:02 -0700 Subject: [PATCH 022/108] drm/i915/mtl: workaround coherency issue for Media This patch implements Wa_22016122933. In MTL, memory writes initiated by the Media tile update the whole cache line, even for partial writes. This creates a coherency problem for cacheable memory if both CPU and GPU are writing data to different locations within a single cache line. This patch circumvents the issue by making CPU/GPU shared memory uncacheable (WC on CPU side, and PAT index 2 for GPU). Additionally, it ensures that CPU writes are visible to the GPU with an intel_guc_write_barrier(). While fixing the CTB issue, we noticed some random GSC firmware loading failure because the share buffers are cacheable (WB) on CPU side but uncached on GPU side. To fix these issues we need to map such shared buffers as WC on CPU side. Since such allocations are not all done through GuC allocator, to avoid too many code changes, the i915_coherent_map_type() is now hard coded to return WC for MTL. v2: Simplify the commit message(Matt). BSpec: 45101 Signed-off-by: Fei Yang Reviewed-by: Andi Shyti Acked-by: Nirmoy Das Reviewed-by: Andrzej Hajda Reviewed-by: Matt Roper Signed-off-by: Nirmoy Das Link: https://patchwork.freedesktop.org/patch/msgid/20230424182902.3663500-3-fei.yang@intel.com --- drivers/gpu/drm/i915/gem/i915_gem_pages.c | 5 ++++- drivers/gpu/drm/i915/gt/uc/intel_gsc_fw.c | 13 +++++++++++++ drivers/gpu/drm/i915/gt/uc/intel_guc.c | 7 +++++++ drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c | 6 ++++++ 4 files changed, 30 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_pages.c b/drivers/gpu/drm/i915/gem/i915_gem_pages.c index ecd86130b74f..89fc8ea6bcfc 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_pages.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_pages.c @@ -469,7 +469,10 @@ enum i915_map_type i915_coherent_map_type(struct drm_i915_private *i915, struct drm_i915_gem_object *obj, bool always_coherent) { - if (i915_gem_object_is_lmem(obj)) + /* + * Wa_22016122933: always return I915_MAP_WC for MTL + */ + if (i915_gem_object_is_lmem(obj) || IS_METEORLAKE(i915)) return I915_MAP_WC; if (HAS_LLC(i915) || always_coherent) return I915_MAP_WB; diff --git a/drivers/gpu/drm/i915/gt/uc/intel_gsc_fw.c b/drivers/gpu/drm/i915/gt/uc/intel_gsc_fw.c index 1d9fdfb11268..236673c02f9a 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_gsc_fw.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_gsc_fw.c @@ -110,6 +110,13 @@ static int gsc_fw_load_prepare(struct intel_gsc_uc *gsc) if (obj->base.size < gsc->fw.size) return -ENOSPC; + /* + * Wa_22016122933: For MTL the shared memory needs to be mapped + * as WC on CPU side and UC (PAT index 2) on GPU side + */ + if (IS_METEORLAKE(i915)) + i915_gem_object_set_cache_coherency(obj, I915_CACHE_NONE); + dst = i915_gem_object_pin_map_unlocked(obj, i915_coherent_map_type(i915, obj, true)); if (IS_ERR(dst)) @@ -125,6 +132,12 @@ static int gsc_fw_load_prepare(struct intel_gsc_uc *gsc) memset(dst, 0, obj->base.size); memcpy(dst, src, gsc->fw.size); + /* + * Wa_22016122933: Making sure the data in dst is + * visible to GSC right away + */ + intel_guc_write_barrier(>->uc.guc); + i915_gem_object_unpin_map(gsc->fw.obj); i915_gem_object_unpin_map(obj); diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.c b/drivers/gpu/drm/i915/gt/uc/intel_guc.c index d76508fa3af7..f9bddaa876d9 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.c @@ -743,6 +743,13 @@ struct i915_vma *intel_guc_allocate_vma(struct intel_guc *guc, u32 size) if (IS_ERR(obj)) return ERR_CAST(obj); + /* + * Wa_22016122933: For MTL the shared memory needs to be mapped + * as WC on CPU side and UC (PAT index 2) on GPU side + */ + if (IS_METEORLAKE(gt->i915)) + i915_gem_object_set_cache_coherency(obj, I915_CACHE_NONE); + vma = i915_vma_instance(obj, >->ggtt->vm, NULL); if (IS_ERR(vma)) goto err; diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c index 1803a633ed64..99a0a89091e7 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c @@ -902,6 +902,12 @@ static int ct_read(struct intel_guc_ct *ct, struct ct_incoming_msg **msg) /* now update descriptor */ WRITE_ONCE(desc->head, head); + /* + * Wa_22016122933: Making sure the head update is + * visible to GuC right away + */ + intel_guc_write_barrier(ct_to_guc(ct)); + return available - len; corrupted: From e991b5244d89096943af7dcd506faecb57b9377d Mon Sep 17 00:00:00 2001 From: Tejas Upadhyay Date: Mon, 24 Apr 2023 19:06:07 +0530 Subject: [PATCH 023/108] drm/i915/selftest: Record GT error for gt failure igt_live_test has pr_err dumped in case of some GT failures. It will be more informative regarding GT if we use gt_err instead. Cc: Andi Shyti Signed-off-by: Tejas Upadhyay Reviewed-by: Andi Shyti Signed-off-by: Andi Shyti Link: https://patchwork.freedesktop.org/patch/msgid/20230424133607.3736825-1-tejas.upadhyay@intel.com --- drivers/gpu/drm/i915/selftests/igt_live_test.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/selftests/igt_live_test.c b/drivers/gpu/drm/i915/selftests/igt_live_test.c index 714b7afc490b..4ddc6d902752 100644 --- a/drivers/gpu/drm/i915/selftests/igt_live_test.c +++ b/drivers/gpu/drm/i915/selftests/igt_live_test.c @@ -6,6 +6,7 @@ #include "i915_drv.h" #include "gt/intel_gt.h" +#include "gt/intel_gt_print.h" #include "../i915_selftest.h" #include "igt_flush_test.h" @@ -30,7 +31,7 @@ int igt_live_test_begin(struct igt_live_test *t, err = intel_gt_wait_for_idle(gt, MAX_SCHEDULE_TIMEOUT); if (err) { - pr_err("%s(%s): failed to idle before, with err=%d!", + gt_err(gt, "%s(%s): GT failed to idle before, with err=%d!", func, name, err); return err; } @@ -69,7 +70,7 @@ int igt_live_test_end(struct igt_live_test *t) i915_reset_engine_count(&i915->gpu_error, engine)) continue; - pr_err("%s(%s): engine '%s' was reset %d times!\n", + gt_err(gt, "%s(%s): engine '%s' was reset %d times!\n", t->func, t->name, engine->name, i915_reset_engine_count(&i915->gpu_error, engine) - t->reset_engine[id]); From 47d8b3029684ce257e23022286b0861f72ac3f03 Mon Sep 17 00:00:00 2001 From: Tejas Upadhyay Date: Mon, 24 Apr 2023 15:47:49 +0530 Subject: [PATCH 024/108] drm/i915/mtl: Add workaround 14018778641 WA 18018781329 is applicable now across all MTL steppings. V2: - Remove IS_MTL check, code already running for MTL - Matt Cc: Matt Roper Signed-off-by: Tejas Upadhyay Reviewed-by: Andrzej Hajda Reviewed-by: Andi Shyti Signed-off-by: Andi Shyti Link: https://patchwork.freedesktop.org/patch/msgid/20230424101749.3719600-1-tejas.upadhyay@intel.com --- drivers/gpu/drm/i915/gt/intel_workarounds.c | 30 ++++++++++----------- 1 file changed, 14 insertions(+), 16 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c index 312eb8b5f949..de4f8e2e8e8c 100644 --- a/drivers/gpu/drm/i915/gt/intel_workarounds.c +++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c @@ -1695,19 +1695,18 @@ pvc_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal) static void xelpg_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal) { + /* Wa_14018778641 / Wa_18018781329 */ + wa_mcr_write_or(wal, RENDER_MOD_CTRL, FORCE_MISS_FTLB); + wa_mcr_write_or(wal, COMP_MOD_CTRL, FORCE_MISS_FTLB); + if (IS_MTL_GRAPHICS_STEP(gt->i915, M, STEP_A0, STEP_B0) || IS_MTL_GRAPHICS_STEP(gt->i915, P, STEP_A0, STEP_B0)) { /* Wa_14014830051 */ wa_mcr_write_clr(wal, SARB_CHICKEN1, COMP_CKN_IN); - /* Wa_18018781329 */ - wa_mcr_write_or(wal, RENDER_MOD_CTRL, FORCE_MISS_FTLB); - wa_mcr_write_or(wal, COMP_MOD_CTRL, FORCE_MISS_FTLB); - /* Wa_14015795083 */ wa_write_clr(wal, GEN7_MISCCPCTL, GEN12_DOP_CLOCK_GATE_RENDER_ENABLE); } - /* * Unlike older platforms, we no longer setup implicit steering here; * all MCR accesses are explicitly steered. @@ -1718,17 +1717,16 @@ xelpg_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal) static void xelpmp_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal) { - if (IS_MTL_MEDIA_STEP(gt->i915, STEP_A0, STEP_B0)) { - /* - * Wa_18018781329 - * - * Note that although these registers are MCR on the primary - * GT, the media GT's versions are regular singleton registers. - */ - wa_write_or(wal, XELPMP_GSC_MOD_CTRL, FORCE_MISS_FTLB); - wa_write_or(wal, XELPMP_VDBX_MOD_CTRL, FORCE_MISS_FTLB); - wa_write_or(wal, XELPMP_VEBX_MOD_CTRL, FORCE_MISS_FTLB); - } + /* + * Wa_14018778641 + * Wa_18018781329 + * + * Note that although these registers are MCR on the primary + * GT, the media GT's versions are regular singleton registers. + */ + wa_write_or(wal, XELPMP_GSC_MOD_CTRL, FORCE_MISS_FTLB); + wa_write_or(wal, XELPMP_VDBX_MOD_CTRL, FORCE_MISS_FTLB); + wa_write_or(wal, XELPMP_VEBX_MOD_CTRL, FORCE_MISS_FTLB); debug_dump_steering(gt); } From 80ab31799002166ac7c660bacfbff4f85bc29107 Mon Sep 17 00:00:00 2001 From: John Harrison Date: Fri, 21 Apr 2023 15:47:42 -0700 Subject: [PATCH 025/108] drm/i915/guc: Actually return an error if GuC version range check fails Dan Carpenter pointed out that 'err' was not being set in the case where the GuC firmware version range check fails. Fix that. Note that while this is a bug fix for a previous patch (see Fixes tag below). It is an exceedingly low risk bug. The range check is asserting that the GuC firmware version is within spec. So it should not be possible to ever have a firmware file that fails this check. If larger version numbers are required in the future, that would be a backwards breaking spec change and thus require a major version bump, in which case an old i915 driver would not load that new version anyway. Fixes: 9bbba0667f37 ("drm/i915/guc: Use GuC submission API version number") Reported-by: Dan Carpenter Signed-off-by: John Harrison Cc: John Harrison Cc: Daniele Ceraolo Spurio Cc: Alan Previn Cc: Umesh Nerlige Ramappa Cc: Rodrigo Vivi Cc: Matthew Brost Cc: Andi Shyti Cc: Matthew Auld Cc: Tvrtko Ursulin Cc: Lucas De Marchi Cc: Jani Nikula Reviewed-by: Andi Shyti Link: https://patchwork.freedesktop.org/patch/msgid/20230421224742.2357198-1-John.C.Harrison@Intel.com --- drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c index 24765c30a0e1..c36e68e23a14 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c @@ -635,9 +635,10 @@ static bool is_ver_8bit(struct intel_uc_fw_ver *ver) return ver->major < 0xFF && ver->minor < 0xFF && ver->patch < 0xFF; } -static bool guc_check_version_range(struct intel_uc_fw *uc_fw) +static int guc_check_version_range(struct intel_uc_fw *uc_fw) { struct intel_guc *guc = container_of(uc_fw, struct intel_guc, fw); + struct intel_gt *gt = __uc_fw_to_gt(uc_fw); /* * GuC version number components are defined as being 8-bits. @@ -646,24 +647,24 @@ static bool guc_check_version_range(struct intel_uc_fw *uc_fw) */ if (!is_ver_8bit(&uc_fw->file_selected.ver)) { - gt_warn(__uc_fw_to_gt(uc_fw), "%s firmware: invalid file version: 0x%02X:%02X:%02X\n", + gt_warn(gt, "%s firmware: invalid file version: 0x%02X:%02X:%02X\n", intel_uc_fw_type_repr(uc_fw->type), uc_fw->file_selected.ver.major, uc_fw->file_selected.ver.minor, uc_fw->file_selected.ver.patch); - return false; + return -EINVAL; } if (!is_ver_8bit(&guc->submission_version)) { - gt_warn(__uc_fw_to_gt(uc_fw), "%s firmware: invalid submit version: 0x%02X:%02X:%02X\n", + gt_warn(gt, "%s firmware: invalid submit version: 0x%02X:%02X:%02X\n", intel_uc_fw_type_repr(uc_fw->type), guc->submission_version.major, guc->submission_version.minor, guc->submission_version.patch); - return false; + return -EINVAL; } - return true; + return i915_inject_probe_error(gt->i915, -EINVAL); } static int check_fw_header(struct intel_gt *gt, @@ -772,8 +773,11 @@ int intel_uc_fw_fetch(struct intel_uc_fw *uc_fw) if (err) goto fail; - if (uc_fw->type == INTEL_UC_FW_TYPE_GUC && !guc_check_version_range(uc_fw)) - goto fail; + if (uc_fw->type == INTEL_UC_FW_TYPE_GUC) { + err = guc_check_version_range(uc_fw); + if (err) + goto fail; + } if (uc_fw->file_wanted.ver.major && uc_fw->file_selected.ver.major) { /* Check the file's major version was as it claimed */ From d81268ee1c1073471cf0e8adb0fc9d026b602c3b Mon Sep 17 00:00:00 2001 From: Ashutosh Dixit Date: Thu, 20 Apr 2023 09:40:39 -0700 Subject: [PATCH 026/108] drm/i915/hwmon: Get mutex and rpm ref just once in hwm_power_max_write In preparation for follow-on patches, refactor hwm_power_max_write to take hwmon_lock and runtime pm wakeref at start of the function and release them at the end, therefore acquiring these just once each. Signed-off-by: Ashutosh Dixit Reviewed-by: Rodrigo Vivi Signed-off-by: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/20230420164041.1428455-2-ashutosh.dixit@intel.com --- drivers/gpu/drm/i915/i915_hwmon.c | 28 +++++++++++++++------------- 1 file changed, 15 insertions(+), 13 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_hwmon.c b/drivers/gpu/drm/i915/i915_hwmon.c index 8e7dccc8d3a0..7f44e809ca15 100644 --- a/drivers/gpu/drm/i915/i915_hwmon.c +++ b/drivers/gpu/drm/i915/i915_hwmon.c @@ -396,31 +396,33 @@ hwm_power_max_write(struct hwm_drvdata *ddat, long val) { struct i915_hwmon *hwmon = ddat->hwmon; intel_wakeref_t wakeref; + int ret = 0; u32 nval; + mutex_lock(&hwmon->hwmon_lock); + wakeref = intel_runtime_pm_get(ddat->uncore->rpm); + /* Disable PL1 limit and verify, because the limit cannot be disabled on all platforms */ if (val == PL1_DISABLE) { - mutex_lock(&hwmon->hwmon_lock); - with_intel_runtime_pm(ddat->uncore->rpm, wakeref) { - intel_uncore_rmw(ddat->uncore, hwmon->rg.pkg_rapl_limit, - PKG_PWR_LIM_1_EN, 0); - nval = intel_uncore_read(ddat->uncore, hwmon->rg.pkg_rapl_limit); - } - mutex_unlock(&hwmon->hwmon_lock); + intel_uncore_rmw(ddat->uncore, hwmon->rg.pkg_rapl_limit, + PKG_PWR_LIM_1_EN, 0); + nval = intel_uncore_read(ddat->uncore, hwmon->rg.pkg_rapl_limit); if (nval & PKG_PWR_LIM_1_EN) - return -ENODEV; - return 0; + ret = -ENODEV; + goto exit; } /* Computation in 64-bits to avoid overflow. Round to nearest. */ nval = DIV_ROUND_CLOSEST_ULL((u64)val << hwmon->scl_shift_power, SF_POWER); nval = PKG_PWR_LIM_1_EN | REG_FIELD_PREP(PKG_PWR_LIM_1, nval); - hwm_locked_with_pm_intel_uncore_rmw(ddat, hwmon->rg.pkg_rapl_limit, - PKG_PWR_LIM_1_EN | PKG_PWR_LIM_1, - nval); - return 0; + intel_uncore_rmw(ddat->uncore, hwmon->rg.pkg_rapl_limit, + PKG_PWR_LIM_1_EN | PKG_PWR_LIM_1, nval); +exit: + intel_runtime_pm_put(ddat->uncore->rpm, wakeref); + mutex_unlock(&hwmon->hwmon_lock); + return ret; } static int From 1b44019a93e2bc6088d777b3d3072df5f40f9324 Mon Sep 17 00:00:00 2001 From: Ashutosh Dixit Date: Thu, 20 Apr 2023 09:40:40 -0700 Subject: [PATCH 027/108] drm/i915/guc: Disable PL1 power limit when loading GuC firmware On dGfx, the PL1 power limit being enabled and set to a low value results in a low GPU operating freq. It also negates the freq raise operation which is done before GuC firmware load. As a result GuC firmware load can time out. Such timeouts were seen in the GL #8062 bug below (where the PL1 power limit was enabled and set to a low value). Therefore disable the PL1 power limit when allowed by HW when loading GuC firmware. v2: - Take mutex (to disallow writes to power1_max) across GuC reset/fw load - Add hwm_power_max_restore to error return code path v3 (Jani N): - Add/remove explanatory comments - Function renames - Type corrections - Locking annotation v4: - Don't hold the lock across GuC reset (Rodrigo) - New locking scheme (suggested by Rodrigo) - Eliminate rpm_get in power_max_disable/restore, not needed (Tvrtko) v5: - Fix uninitialized pl1en variable compile warning reported by kernel build robot by creating new err_rps label Link: https://gitlab.freedesktop.org/drm/intel/-/issues/8062 Signed-off-by: Ashutosh Dixit Reviewed-by: Rodrigo Vivi Signed-off-by: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/20230420164041.1428455-3-ashutosh.dixit@intel.com --- drivers/gpu/drm/i915/gt/uc/intel_uc.c | 13 +++++++-- drivers/gpu/drm/i915/i915_hwmon.c | 40 +++++++++++++++++++++++++++ drivers/gpu/drm/i915/i915_hwmon.h | 7 +++++ 3 files changed, 58 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc.c b/drivers/gpu/drm/i915/gt/uc/intel_uc.c index 4ccb4be4c9cb..996168312340 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_uc.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_uc.c @@ -18,6 +18,7 @@ #include "intel_uc.h" #include "i915_drv.h" +#include "i915_hwmon.h" static const struct intel_uc_ops uc_ops_off; static const struct intel_uc_ops uc_ops_on; @@ -461,6 +462,7 @@ static int __uc_init_hw(struct intel_uc *uc) struct intel_guc *guc = &uc->guc; struct intel_huc *huc = &uc->huc; int ret, attempts; + bool pl1en; GEM_BUG_ON(!intel_uc_supports_guc(uc)); GEM_BUG_ON(!intel_uc_wants_guc(uc)); @@ -491,6 +493,9 @@ static int __uc_init_hw(struct intel_uc *uc) else attempts = 1; + /* Disable a potentially low PL1 power limit to allow freq to be raised */ + i915_hwmon_power_max_disable(gt->i915, &pl1en); + intel_rps_raise_unslice(&uc_to_gt(uc)->rps); while (attempts--) { @@ -500,7 +505,7 @@ static int __uc_init_hw(struct intel_uc *uc) */ ret = __uc_sanitize(uc); if (ret) - goto err_out; + goto err_rps; intel_huc_fw_upload(huc); intel_guc_ads_reset(guc); @@ -547,6 +552,8 @@ static int __uc_init_hw(struct intel_uc *uc) intel_rps_lower_unslice(&uc_to_gt(uc)->rps); } + i915_hwmon_power_max_restore(gt->i915, pl1en); + guc_info(guc, "submission %s\n", str_enabled_disabled(intel_uc_uses_guc_submission(uc))); guc_info(guc, "SLPC %s\n", str_enabled_disabled(intel_uc_uses_guc_slpc(uc))); @@ -559,10 +566,12 @@ static int __uc_init_hw(struct intel_uc *uc) intel_guc_submission_disable(guc); err_log_capture: __uc_capture_load_err_log(uc); -err_out: +err_rps: /* Return GT back to RPn */ intel_rps_lower_unslice(&uc_to_gt(uc)->rps); + i915_hwmon_power_max_restore(gt->i915, pl1en); +err_out: __uc_sanitize(uc); if (!ret) { diff --git a/drivers/gpu/drm/i915/i915_hwmon.c b/drivers/gpu/drm/i915/i915_hwmon.c index 7f44e809ca15..9ab8971679fe 100644 --- a/drivers/gpu/drm/i915/i915_hwmon.c +++ b/drivers/gpu/drm/i915/i915_hwmon.c @@ -50,6 +50,7 @@ struct hwm_drvdata { struct hwm_energy_info ei; /* Energy info for energy1_input */ char name[12]; int gt_n; + bool reset_in_progress; }; struct i915_hwmon { @@ -400,6 +401,10 @@ hwm_power_max_write(struct hwm_drvdata *ddat, long val) u32 nval; mutex_lock(&hwmon->hwmon_lock); + if (hwmon->ddat.reset_in_progress) { + ret = -EAGAIN; + goto unlock; + } wakeref = intel_runtime_pm_get(ddat->uncore->rpm); /* Disable PL1 limit and verify, because the limit cannot be disabled on all platforms */ @@ -421,6 +426,7 @@ hwm_power_max_write(struct hwm_drvdata *ddat, long val) PKG_PWR_LIM_1_EN | PKG_PWR_LIM_1, nval); exit: intel_runtime_pm_put(ddat->uncore->rpm, wakeref); +unlock: mutex_unlock(&hwmon->hwmon_lock); return ret; } @@ -472,6 +478,40 @@ hwm_power_write(struct hwm_drvdata *ddat, u32 attr, int chan, long val) } } +void i915_hwmon_power_max_disable(struct drm_i915_private *i915, bool *old) +{ + struct i915_hwmon *hwmon = i915->hwmon; + u32 r; + + if (!hwmon || !i915_mmio_reg_valid(hwmon->rg.pkg_rapl_limit)) + return; + + mutex_lock(&hwmon->hwmon_lock); + + hwmon->ddat.reset_in_progress = true; + r = intel_uncore_rmw(hwmon->ddat.uncore, hwmon->rg.pkg_rapl_limit, + PKG_PWR_LIM_1_EN, 0); + *old = !!(r & PKG_PWR_LIM_1_EN); + + mutex_unlock(&hwmon->hwmon_lock); +} + +void i915_hwmon_power_max_restore(struct drm_i915_private *i915, bool old) +{ + struct i915_hwmon *hwmon = i915->hwmon; + + if (!hwmon || !i915_mmio_reg_valid(hwmon->rg.pkg_rapl_limit)) + return; + + mutex_lock(&hwmon->hwmon_lock); + + intel_uncore_rmw(hwmon->ddat.uncore, hwmon->rg.pkg_rapl_limit, + PKG_PWR_LIM_1_EN, old ? PKG_PWR_LIM_1_EN : 0); + hwmon->ddat.reset_in_progress = false; + + mutex_unlock(&hwmon->hwmon_lock); +} + static umode_t hwm_energy_is_visible(const struct hwm_drvdata *ddat, u32 attr) { diff --git a/drivers/gpu/drm/i915/i915_hwmon.h b/drivers/gpu/drm/i915/i915_hwmon.h index 7ca9cf2c34c9..0fcb7de84406 100644 --- a/drivers/gpu/drm/i915/i915_hwmon.h +++ b/drivers/gpu/drm/i915/i915_hwmon.h @@ -7,14 +7,21 @@ #ifndef __I915_HWMON_H__ #define __I915_HWMON_H__ +#include + struct drm_i915_private; +struct intel_gt; #if IS_REACHABLE(CONFIG_HWMON) void i915_hwmon_register(struct drm_i915_private *i915); void i915_hwmon_unregister(struct drm_i915_private *i915); +void i915_hwmon_power_max_disable(struct drm_i915_private *i915, bool *old); +void i915_hwmon_power_max_restore(struct drm_i915_private *i915, bool old); #else static inline void i915_hwmon_register(struct drm_i915_private *i915) { }; static inline void i915_hwmon_unregister(struct drm_i915_private *i915) { }; +static inline void i915_hwmon_power_max_disable(struct drm_i915_private *i915, bool *old) { }; +static inline void i915_hwmon_power_max_restore(struct drm_i915_private *i915, bool old) { }; #endif #endif /* __I915_HWMON_H__ */ From 655bd3b954cf18ae4e1ff69ebbf4a20b562f580b Mon Sep 17 00:00:00 2001 From: Ashutosh Dixit Date: Thu, 20 Apr 2023 09:40:41 -0700 Subject: [PATCH 028/108] drm/i915/hwmon: Block waiting for GuC reset to complete Instead of erroring out when GuC reset is in progress, block waiting for GuC reset to complete which is a more reasonable uapi behavior. v2: Avoid race between wake_up_all and waiting for wakeup (Rodrigo) v3: Remove timeout when blocked (Tvrtko) Signed-off-by: Ashutosh Dixit Reviewed-by: Rodrigo Vivi Signed-off-by: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/20230420164041.1428455-4-ashutosh.dixit@intel.com --- drivers/gpu/drm/i915/i915_hwmon.c | 29 +++++++++++++++++++++++++---- 1 file changed, 25 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_hwmon.c b/drivers/gpu/drm/i915/i915_hwmon.c index 9ab8971679fe..a3bdd9f68a45 100644 --- a/drivers/gpu/drm/i915/i915_hwmon.c +++ b/drivers/gpu/drm/i915/i915_hwmon.c @@ -51,6 +51,7 @@ struct hwm_drvdata { char name[12]; int gt_n; bool reset_in_progress; + wait_queue_head_t waitq; }; struct i915_hwmon { @@ -397,14 +398,32 @@ hwm_power_max_write(struct hwm_drvdata *ddat, long val) { struct i915_hwmon *hwmon = ddat->hwmon; intel_wakeref_t wakeref; + DEFINE_WAIT(wait); int ret = 0; u32 nval; - mutex_lock(&hwmon->hwmon_lock); - if (hwmon->ddat.reset_in_progress) { - ret = -EAGAIN; - goto unlock; + /* Block waiting for GuC reset to complete when needed */ + for (;;) { + mutex_lock(&hwmon->hwmon_lock); + + prepare_to_wait(&ddat->waitq, &wait, TASK_INTERRUPTIBLE); + + if (!hwmon->ddat.reset_in_progress) + break; + + if (signal_pending(current)) { + ret = -EINTR; + break; + } + + mutex_unlock(&hwmon->hwmon_lock); + + schedule(); } + finish_wait(&ddat->waitq, &wait); + if (ret) + goto unlock; + wakeref = intel_runtime_pm_get(ddat->uncore->rpm); /* Disable PL1 limit and verify, because the limit cannot be disabled on all platforms */ @@ -508,6 +527,7 @@ void i915_hwmon_power_max_restore(struct drm_i915_private *i915, bool old) intel_uncore_rmw(hwmon->ddat.uncore, hwmon->rg.pkg_rapl_limit, PKG_PWR_LIM_1_EN, old ? PKG_PWR_LIM_1_EN : 0); hwmon->ddat.reset_in_progress = false; + wake_up_all(&hwmon->ddat.waitq); mutex_unlock(&hwmon->hwmon_lock); } @@ -784,6 +804,7 @@ void i915_hwmon_register(struct drm_i915_private *i915) ddat->uncore = &i915->uncore; snprintf(ddat->name, sizeof(ddat->name), "i915"); ddat->gt_n = -1; + init_waitqueue_head(&ddat->waitq); for_each_gt(gt, i915, i) { ddat_gt = hwmon->ddat_gt + i; From a7fa1537b791cfb2ea3895ac448ab1455a4405f8 Mon Sep 17 00:00:00 2001 From: Radhakrishna Sripada Date: Tue, 25 Apr 2023 11:30:11 -0700 Subject: [PATCH 029/108] drm/i915/mtl: Implement Wa_14019141245 Enable strict RAR to prevent spurious GPU hangs. v1.1: Rebase Bspec: 51762 Cc: Rodrigo Vivi Signed-off-by: Radhakrishna Sripada Reviewed-by:Haridhar Kalvala Reviewed-by: Umesh Nerlige Ramappa Link: https://patchwork.freedesktop.org/patch/msgid/20230425183011.865085-1-radhakrishna.sripada@intel.com --- drivers/gpu/drm/i915/gt/intel_gt_regs.h | 5 +++++ drivers/gpu/drm/i915/gt/intel_workarounds.c | 4 ++++ drivers/gpu/drm/i915/i915_perf_oa_regs.h | 4 ---- 3 files changed, 9 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_gt_regs.h b/drivers/gpu/drm/i915/gt/intel_gt_regs.h index e8c3b762a92a..af80d2fe739b 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_regs.h +++ b/drivers/gpu/drm/i915/gt/intel_gt_regs.h @@ -529,6 +529,11 @@ #define GEN8_RC6_CTX_INFO _MMIO(0x8504) +#define GEN12_SQCNT1 _MMIO(0x8718) +#define GEN12_SQCNT1_PMON_ENABLE REG_BIT(30) +#define GEN12_SQCNT1_OABPC REG_BIT(29) +#define GEN12_STRICT_RAR_ENABLE REG_BIT(23) + #define XEHP_SQCM MCR_REG(0x8724) #define EN_32B_ACCESS REG_BIT(30) diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c index de4f8e2e8e8c..ad9e7f49a6fa 100644 --- a/drivers/gpu/drm/i915/gt/intel_workarounds.c +++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c @@ -1699,6 +1699,9 @@ xelpg_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal) wa_mcr_write_or(wal, RENDER_MOD_CTRL, FORCE_MISS_FTLB); wa_mcr_write_or(wal, COMP_MOD_CTRL, FORCE_MISS_FTLB); + /* Wa_14019141245 */ + wa_write_or(wal, GEN12_SQCNT1, GEN12_STRICT_RAR_ENABLE); + if (IS_MTL_GRAPHICS_STEP(gt->i915, M, STEP_A0, STEP_B0) || IS_MTL_GRAPHICS_STEP(gt->i915, P, STEP_A0, STEP_B0)) { /* Wa_14014830051 */ @@ -1707,6 +1710,7 @@ xelpg_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal) /* Wa_14015795083 */ wa_write_clr(wal, GEN7_MISCCPCTL, GEN12_DOP_CLOCK_GATE_RENDER_ENABLE); } + /* * Unlike older platforms, we no longer setup implicit steering here; * all MCR accesses are explicitly steered. diff --git a/drivers/gpu/drm/i915/i915_perf_oa_regs.h b/drivers/gpu/drm/i915/i915_perf_oa_regs.h index ba103875e19f..e5ac7a8b5cb6 100644 --- a/drivers/gpu/drm/i915/i915_perf_oa_regs.h +++ b/drivers/gpu/drm/i915/i915_perf_oa_regs.h @@ -134,10 +134,6 @@ #define GDT_CHICKEN_BITS _MMIO(0x9840) #define GT_NOA_ENABLE 0x00000080 -#define GEN12_SQCNT1 _MMIO(0x8718) -#define GEN12_SQCNT1_PMON_ENABLE REG_BIT(30) -#define GEN12_SQCNT1_OABPC REG_BIT(29) - /* Gen12 OAM unit */ #define GEN12_OAM_HEAD_POINTER_OFFSET (0x1a0) #define GEN12_OAM_HEAD_POINTER_MASK 0xffffffc0 From d59ce1ff8e6830fab60b688f33a89ef48c94b5d8 Mon Sep 17 00:00:00 2001 From: Jonathan Cavitt Date: Wed, 26 Apr 2023 23:28:48 +0200 Subject: [PATCH 030/108] drm/i915: Migrate platform-dependent mock hugepage selftests to live Convert the igt_mock_ppgtt_huge_fill and igt_mock_ppgtt_64K mock selftests into live selftests as their requirements have recently become platform-dependent. Additionally, apply necessary platform dependency checks to these tests. v8: - handle properly 64K and 2M pages v9: - do not expect 64K pages if 2M are present - fix hex printing - obey commit message line limit Signed-off-by: Jonathan Cavitt Co-developed-by: Andrzej Hajda Signed-off-by: Andrzej Hajda Reviewed-by: Andi Shyti Link: https://patchwork.freedesktop.org/patch/msgid/20230425-hugepage-migrate-v8-1-7868d54eaa27@intel.com --- .../gpu/drm/i915/gem/selftests/huge_pages.c | 106 +++++++++++++----- 1 file changed, 81 insertions(+), 25 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/selftests/huge_pages.c b/drivers/gpu/drm/i915/gem/selftests/huge_pages.c index defece0bcb81..cb5863f37f9d 100644 --- a/drivers/gpu/drm/i915/gem/selftests/huge_pages.c +++ b/drivers/gpu/drm/i915/gem/selftests/huge_pages.c @@ -695,8 +695,7 @@ static int igt_mock_ppgtt_misaligned_dma(void *arg) return err; } -static void close_object_list(struct list_head *objects, - struct i915_ppgtt *ppgtt) +static void close_object_list(struct list_head *objects) { struct drm_i915_gem_object *obj, *on; @@ -710,17 +709,36 @@ static void close_object_list(struct list_head *objects, } } -static int igt_mock_ppgtt_huge_fill(void *arg) +static int igt_ppgtt_huge_fill(void *arg) { - struct i915_ppgtt *ppgtt = arg; - struct drm_i915_private *i915 = ppgtt->vm.i915; - unsigned long max_pages = ppgtt->vm.total >> PAGE_SHIFT; + struct drm_i915_private *i915 = arg; + unsigned int supported = RUNTIME_INFO(i915)->page_sizes; + bool has_pte64 = GRAPHICS_VER_FULL(i915) >= IP_VER(12, 50); + struct i915_address_space *vm; + struct i915_gem_context *ctx; + unsigned long max_pages; unsigned long page_num; + struct file *file; bool single = false; LIST_HEAD(objects); IGT_TIMEOUT(end_time); int err = -ENODEV; + if (supported == I915_GTT_PAGE_SIZE_4K) + return 0; + + file = mock_file(i915); + if (IS_ERR(file)) + return PTR_ERR(file); + + ctx = hugepage_ctx(i915, file); + if (IS_ERR(ctx)) { + err = PTR_ERR(ctx); + goto out; + } + vm = i915_gem_context_get_eb_vm(ctx); + max_pages = vm->total >> PAGE_SHIFT; + for_each_prime_number_from(page_num, 1, max_pages) { struct drm_i915_gem_object *obj; u64 size = page_num << PAGE_SHIFT; @@ -750,13 +768,14 @@ static int igt_mock_ppgtt_huge_fill(void *arg) list_add(&obj->st_link, &objects); - vma = i915_vma_instance(obj, &ppgtt->vm, NULL); + vma = i915_vma_instance(obj, vm, NULL); if (IS_ERR(vma)) { err = PTR_ERR(vma); break; } - err = i915_vma_pin(vma, 0, 0, PIN_USER); + /* vma start must be aligned to BIT(21) to allow 2M PTEs */ + err = i915_vma_pin(vma, 0, BIT(21), PIN_USER); if (err) break; @@ -784,12 +803,13 @@ static int igt_mock_ppgtt_huge_fill(void *arg) GEM_BUG_ON(!expected_gtt); GEM_BUG_ON(size); - if (expected_gtt & I915_GTT_PAGE_SIZE_4K) + if (!has_pte64 && (obj->base.size < I915_GTT_PAGE_SIZE_2M || + expected_gtt & I915_GTT_PAGE_SIZE_2M)) expected_gtt &= ~I915_GTT_PAGE_SIZE_64K; i915_vma_unpin(vma); - if (vma->page_sizes.sg & I915_GTT_PAGE_SIZE_64K) { + if (!has_pte64 && vma->page_sizes.sg & I915_GTT_PAGE_SIZE_64K) { if (!IS_ALIGNED(vma->node.start, I915_GTT_PAGE_SIZE_2M)) { pr_err("node.start(%llx) not aligned to 2M\n", @@ -808,7 +828,7 @@ static int igt_mock_ppgtt_huge_fill(void *arg) } if (vma->resource->page_sizes_gtt != expected_gtt) { - pr_err("gtt=%u, expected=%u, size=%zd, single=%s\n", + pr_err("gtt=%#x, expected=%#x, size=0x%zx, single=%s\n", vma->resource->page_sizes_gtt, expected_gtt, obj->base.size, str_yes_no(!!single)); err = -EINVAL; @@ -823,19 +843,25 @@ static int igt_mock_ppgtt_huge_fill(void *arg) single = !single; } - close_object_list(&objects, ppgtt); + close_object_list(&objects); if (err == -ENOMEM || err == -ENOSPC) err = 0; + i915_vm_put(vm); +out: + fput(file); return err; } -static int igt_mock_ppgtt_64K(void *arg) +static int igt_ppgtt_64K(void *arg) { - struct i915_ppgtt *ppgtt = arg; - struct drm_i915_private *i915 = ppgtt->vm.i915; + struct drm_i915_private *i915 = arg; + bool has_pte64 = GRAPHICS_VER_FULL(i915) >= IP_VER(12, 50); struct drm_i915_gem_object *obj; + struct i915_address_space *vm; + struct i915_gem_context *ctx; + struct file *file; const struct object_info { unsigned int size; unsigned int gtt; @@ -907,16 +933,41 @@ static int igt_mock_ppgtt_64K(void *arg) if (!HAS_PAGE_SIZES(i915, I915_GTT_PAGE_SIZE_64K)) return 0; + file = mock_file(i915); + if (IS_ERR(file)) + return PTR_ERR(file); + + ctx = hugepage_ctx(i915, file); + if (IS_ERR(ctx)) { + err = PTR_ERR(ctx); + goto out; + } + vm = i915_gem_context_get_eb_vm(ctx); + for (i = 0; i < ARRAY_SIZE(objects); ++i) { unsigned int size = objects[i].size; unsigned int expected_gtt = objects[i].gtt; unsigned int offset = objects[i].offset; unsigned int flags = PIN_USER; + /* + * For modern GTT models, the requirements for marking a page-table + * as 64K have been relaxed. Account for this. + */ + if (has_pte64) { + expected_gtt = 0; + if (size >= SZ_64K) + expected_gtt |= I915_GTT_PAGE_SIZE_64K; + if (size & (SZ_64K - 1)) + expected_gtt |= I915_GTT_PAGE_SIZE_4K; + } + for (single = 0; single <= 1; single++) { obj = fake_huge_pages_object(i915, size, !!single); - if (IS_ERR(obj)) - return PTR_ERR(obj); + if (IS_ERR(obj)) { + err = PTR_ERR(obj); + goto out_vm; + } err = i915_gem_object_pin_pages_unlocked(obj); if (err) @@ -928,7 +979,7 @@ static int igt_mock_ppgtt_64K(void *arg) */ obj->mm.page_sizes.sg &= ~I915_GTT_PAGE_SIZE_2M; - vma = i915_vma_instance(obj, &ppgtt->vm, NULL); + vma = i915_vma_instance(obj, vm, NULL); if (IS_ERR(vma)) { err = PTR_ERR(vma); goto out_object_unpin; @@ -945,7 +996,8 @@ static int igt_mock_ppgtt_64K(void *arg) if (err) goto out_vma_unpin; - if (!offset && vma->page_sizes.sg & I915_GTT_PAGE_SIZE_64K) { + if (!has_pte64 && !offset && + vma->page_sizes.sg & I915_GTT_PAGE_SIZE_64K) { if (!IS_ALIGNED(vma->node.start, I915_GTT_PAGE_SIZE_2M)) { pr_err("node.start(%llx) not aligned to 2M\n", @@ -964,9 +1016,10 @@ static int igt_mock_ppgtt_64K(void *arg) } if (vma->resource->page_sizes_gtt != expected_gtt) { - pr_err("gtt=%u, expected=%u, i=%d, single=%s\n", + pr_err("gtt=%#x, expected=%#x, i=%d, single=%s offset=%#x size=%#x\n", vma->resource->page_sizes_gtt, - expected_gtt, i, str_yes_no(!!single)); + expected_gtt, i, str_yes_no(!!single), + offset, size); err = -EINVAL; goto out_vma_unpin; } @@ -982,7 +1035,7 @@ static int igt_mock_ppgtt_64K(void *arg) } } - return 0; + goto out_vm; out_vma_unpin: i915_vma_unpin(vma); @@ -992,7 +1045,10 @@ static int igt_mock_ppgtt_64K(void *arg) i915_gem_object_unlock(obj); out_object_put: i915_gem_object_put(obj); - +out_vm: + i915_vm_put(vm); +out: + fput(file); return err; } @@ -1910,8 +1966,6 @@ int i915_gem_huge_page_mock_selftests(void) SUBTEST(igt_mock_exhaust_device_supported_pages), SUBTEST(igt_mock_memory_region_huge_pages), SUBTEST(igt_mock_ppgtt_misaligned_dma), - SUBTEST(igt_mock_ppgtt_huge_fill), - SUBTEST(igt_mock_ppgtt_64K), }; struct drm_i915_private *dev_priv; struct i915_ppgtt *ppgtt; @@ -1962,6 +2016,8 @@ int i915_gem_huge_page_live_selftests(struct drm_i915_private *i915) SUBTEST(igt_ppgtt_sanity_check), SUBTEST(igt_ppgtt_compact), SUBTEST(igt_ppgtt_mixed), + SUBTEST(igt_ppgtt_huge_fill), + SUBTEST(igt_ppgtt_64K), }; if (!HAS_PPGTT(i915)) { From eee44c2903e5222632536f86ad5cf0ff9bbfc61b Mon Sep 17 00:00:00 2001 From: Jonathan Cavitt Date: Wed, 26 Apr 2023 23:28:49 +0200 Subject: [PATCH 031/108] drm/i915: Use correct huge page manager for MTL MTL currently uses gen8_ppgtt_insert_huge when managing huge pages. This is because MTL reports as not supporting 64K pages, or more accurately, the system that reports whether a platform has 64K pages reports false for MTL. This is only half correct, as the 64K page support reporting system only cares about 64K page support for LMEM, which MTL doesn't have. MTL should be using xehpsdv_ppgtt_insert_huge. However, simply changing over to using that manager doesn't resolve the issue because MTL is expecting the virtual address space for the page table to be flushed after initialization, so we must also add a flush statement there. Signed-off-by: Jonathan Cavitt Reviewed-by: Matthew Auld Signed-off-by: Andrzej Hajda Reviewed-by: Andi Shyti Link: https://patchwork.freedesktop.org/patch/msgid/20230425-hugepage-migrate-v8-2-7868d54eaa27@intel.com --- drivers/gpu/drm/i915/gt/gen8_ppgtt.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c index 4c9a2f2db908..22ec1566d2a7 100644 --- a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c +++ b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c @@ -598,6 +598,7 @@ xehpsdv_ppgtt_insert_huge(struct i915_address_space *vm, } } while (rem >= page_size && index < max); + drm_clflush_virt_range(vaddr, PAGE_SIZE); vma_res->page_sizes_gtt |= page_size; } while (iter->sg && sg_dma_len(iter->sg)); } @@ -735,7 +736,7 @@ static void gen8_ppgtt_insert(struct i915_address_space *vm, struct sgt_dma iter = sgt_dma(vma_res); if (vma_res->bi.page_sizes.sg > I915_GTT_PAGE_SIZE) { - if (HAS_64K_PAGES(vm->i915)) + if (GRAPHICS_VER_FULL(vm->i915) >= IP_VER(12, 50)) xehpsdv_ppgtt_insert_huge(vm, vma_res, &iter, cache_level, flags); else gen8_ppgtt_insert_huge(vm, vma_res, &iter, cache_level, flags); From 55f9720dbf23ed640a51ea5564c22305efa8a467 Mon Sep 17 00:00:00 2001 From: Vinay Belgaumkar Date: Tue, 25 Apr 2023 17:39:41 -0700 Subject: [PATCH 032/108] drm/i915/guc/slpc: Provide sysfs for efficient freq SLPC enables use of efficient freq at init by default. It is possible for GuC to request frequencies that are higher than the 'software' max if user has set it lower than the efficient level. Scenarios/tests that require strict fixing of freq below the efficient level will need to disable it through this interface. v2: Keep just one interface to toggle sysfs. With this, user will be completely responsible for toggling efficient frequency if need be. There will be no implicit disabling when user sets min < RP1 (Ashutosh) v3: Remove unused label, review comments (Ashutosh) v4: Toggle efficient freq usage in SLPC selftest and checkpatch fixes v5: Review comments (Andi) and add a separate patch for selftest updates Fixes: 95ccf312a1e4 ("drm/i915/guc/slpc: Allow SLPC to use efficient frequency") Signed-off-by: Vinay Belgaumkar Reviewed-by: Rodrigo Vivi Reviewed-by: Ashutosh Dixit Reviewed-by: Andi Shyti Signed-off-by: John Harrison Link: https://patchwork.freedesktop.org/patch/msgid/20230426003942.1924347-1-vinay.belgaumkar@intel.com --- drivers/gpu/drm/i915/gt/intel_gt_sysfs_pm.c | 35 +++++++++++++++++ drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c | 38 +++++++++++++------ drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.h | 1 + .../gpu/drm/i915/gt/uc/intel_guc_slpc_types.h | 1 + 4 files changed, 64 insertions(+), 11 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_gt_sysfs_pm.c b/drivers/gpu/drm/i915/gt/intel_gt_sysfs_pm.c index 28f27091cd3b..ee2b44f896a2 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_sysfs_pm.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_sysfs_pm.c @@ -451,6 +451,33 @@ static ssize_t punit_req_freq_mhz_show(struct kobject *kobj, return sysfs_emit(buff, "%u\n", preq); } +static ssize_t slpc_ignore_eff_freq_show(struct kobject *kobj, + struct kobj_attribute *attr, + char *buff) +{ + struct intel_gt *gt = intel_gt_sysfs_get_drvdata(kobj, attr->attr.name); + struct intel_guc_slpc *slpc = >->uc.guc.slpc; + + return sysfs_emit(buff, "%u\n", slpc->ignore_eff_freq); +} + +static ssize_t slpc_ignore_eff_freq_store(struct kobject *kobj, + struct kobj_attribute *attr, + const char *buff, size_t count) +{ + struct intel_gt *gt = intel_gt_sysfs_get_drvdata(kobj, attr->attr.name); + struct intel_guc_slpc *slpc = >->uc.guc.slpc; + int err; + u32 val; + + err = kstrtou32(buff, 0, &val); + if (err) + return err; + + err = intel_guc_slpc_set_ignore_eff_freq(slpc, val); + return err ?: count; +} + struct intel_gt_bool_throttle_attr { struct attribute attr; ssize_t (*show)(struct kobject *kobj, struct kobj_attribute *attr, @@ -663,6 +690,8 @@ static struct kobj_attribute attr_media_freq_factor_scale = INTEL_GT_ATTR_RO(media_RP0_freq_mhz); INTEL_GT_ATTR_RO(media_RPn_freq_mhz); +INTEL_GT_ATTR_RW(slpc_ignore_eff_freq); + static const struct attribute *media_perf_power_attrs[] = { &attr_media_freq_factor.attr, &attr_media_freq_factor_scale.attr, @@ -744,6 +773,12 @@ void intel_gt_sysfs_pm_init(struct intel_gt *gt, struct kobject *kobj) if (ret) gt_warn(gt, "failed to create punit_req_freq_mhz sysfs (%pe)", ERR_PTR(ret)); + if (intel_uc_uses_guc_slpc(>->uc)) { + ret = sysfs_create_file(kobj, &attr_slpc_ignore_eff_freq.attr); + if (ret) + gt_warn(gt, "failed to create ignore_eff_freq sysfs (%pe)", ERR_PTR(ret)); + } + if (i915_mmio_reg_valid(intel_gt_perf_limit_reasons_reg(gt))) { ret = sysfs_create_files(kobj, throttle_reason_attrs); if (ret) diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c index 026d73855f36..56dbba1ef668 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c @@ -277,6 +277,7 @@ int intel_guc_slpc_init(struct intel_guc_slpc *slpc) slpc->max_freq_softlimit = 0; slpc->min_freq_softlimit = 0; + slpc->ignore_eff_freq = false; slpc->min_is_rpmax = false; slpc->boost_freq = 0; @@ -457,6 +458,29 @@ int intel_guc_slpc_get_max_freq(struct intel_guc_slpc *slpc, u32 *val) return ret; } +int intel_guc_slpc_set_ignore_eff_freq(struct intel_guc_slpc *slpc, bool val) +{ + struct drm_i915_private *i915 = slpc_to_i915(slpc); + intel_wakeref_t wakeref; + int ret; + + mutex_lock(&slpc->lock); + wakeref = intel_runtime_pm_get(&i915->runtime_pm); + + ret = slpc_set_param(slpc, + SLPC_PARAM_IGNORE_EFFICIENT_FREQUENCY, + val); + if (ret) + guc_probe_error(slpc_to_guc(slpc), "Failed to set efficient freq(%d): %pe\n", + val, ERR_PTR(ret)); + else + slpc->ignore_eff_freq = val; + + intel_runtime_pm_put(&i915->runtime_pm, wakeref); + mutex_unlock(&slpc->lock); + return ret; +} + /** * intel_guc_slpc_set_min_freq() - Set min frequency limit for SLPC. * @slpc: pointer to intel_guc_slpc. @@ -482,16 +506,6 @@ int intel_guc_slpc_set_min_freq(struct intel_guc_slpc *slpc, u32 val) mutex_lock(&slpc->lock); wakeref = intel_runtime_pm_get(&i915->runtime_pm); - /* Ignore efficient freq if lower min freq is requested */ - ret = slpc_set_param(slpc, - SLPC_PARAM_IGNORE_EFFICIENT_FREQUENCY, - val < slpc->rp1_freq); - if (ret) { - guc_probe_error(slpc_to_guc(slpc), "Failed to toggle efficient freq: %pe\n", - ERR_PTR(ret)); - goto out; - } - ret = slpc_set_param(slpc, SLPC_PARAM_GLOBAL_MIN_GT_UNSLICE_FREQ_MHZ, val); @@ -499,7 +513,6 @@ int intel_guc_slpc_set_min_freq(struct intel_guc_slpc *slpc, u32 val) if (!ret) slpc->min_freq_softlimit = val; -out: intel_runtime_pm_put(&i915->runtime_pm, wakeref); mutex_unlock(&slpc->lock); @@ -752,6 +765,9 @@ int intel_guc_slpc_enable(struct intel_guc_slpc *slpc) /* Set cached media freq ratio mode */ intel_guc_slpc_set_media_ratio_mode(slpc, slpc->media_ratio_mode); + /* Set cached value of ignore efficient freq */ + intel_guc_slpc_set_ignore_eff_freq(slpc, slpc->ignore_eff_freq); + return 0; } diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.h index 17ed515f6a85..597eb5413ddf 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.h @@ -46,5 +46,6 @@ void intel_guc_slpc_boost(struct intel_guc_slpc *slpc); void intel_guc_slpc_dec_waiters(struct intel_guc_slpc *slpc); int intel_guc_slpc_unset_gucrc_mode(struct intel_guc_slpc *slpc); int intel_guc_slpc_override_gucrc_mode(struct intel_guc_slpc *slpc, u32 mode); +int intel_guc_slpc_set_ignore_eff_freq(struct intel_guc_slpc *slpc, bool val); #endif diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc_types.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc_types.h index a6ef53b04e04..a88651331497 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc_types.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc_types.h @@ -31,6 +31,7 @@ struct intel_guc_slpc { /* frequency softlimits */ u32 min_freq_softlimit; u32 max_freq_softlimit; + bool ignore_eff_freq; /* cached media ratio mode */ u32 media_ratio_mode; From c73bd1706c0cdb5627d312efdc656baf48e0247f Mon Sep 17 00:00:00 2001 From: Vinay Belgaumkar Date: Tue, 25 Apr 2023 17:39:42 -0700 Subject: [PATCH 033/108] drm/i915/selftest: Update the SLPC selftest Use the new efficient frequency toggling interface. Also create a helper function to restore the frequencies after the test is done. v2: Restore max freq first and then min. Signed-off-by: Vinay Belgaumkar Reviewed-by: Andi Shyti Signed-off-by: John Harrison Link: https://patchwork.freedesktop.org/patch/msgid/20230426003942.1924347-2-vinay.belgaumkar@intel.com --- drivers/gpu/drm/i915/gt/selftest_slpc.c | 42 ++++++++++++++++++++++--- 1 file changed, 37 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/selftest_slpc.c b/drivers/gpu/drm/i915/gt/selftest_slpc.c index bd44ce73a504..952c8d52d68a 100644 --- a/drivers/gpu/drm/i915/gt/selftest_slpc.c +++ b/drivers/gpu/drm/i915/gt/selftest_slpc.c @@ -70,6 +70,31 @@ static int slpc_set_freq(struct intel_gt *gt, u32 freq) return err; } +static int slpc_restore_freq(struct intel_guc_slpc *slpc, u32 min, u32 max) +{ + int err; + + err = slpc_set_max_freq(slpc, max); + if (err) { + pr_err("Unable to restore max freq"); + return err; + } + + err = slpc_set_min_freq(slpc, min); + if (err) { + pr_err("Unable to restore min freq"); + return err; + } + + err = intel_guc_slpc_set_ignore_eff_freq(slpc, false); + if (err) { + pr_err("Unable to restore efficient freq"); + return err; + } + + return 0; +} + static u64 measure_power_at_freq(struct intel_gt *gt, int *freq, u64 *power) { int err = 0; @@ -268,8 +293,7 @@ static int run_test(struct intel_gt *gt, int test_type) /* * Set min frequency to RPn so that we can test the whole - * range of RPn-RP0. This also turns off efficient freq - * usage and makes results more predictable. + * range of RPn-RP0. */ err = slpc_set_min_freq(slpc, slpc->min_freq); if (err) { @@ -277,6 +301,15 @@ static int run_test(struct intel_gt *gt, int test_type) return err; } + /* + * Turn off efficient frequency so RPn/RP0 ranges are obeyed. + */ + err = intel_guc_slpc_set_ignore_eff_freq(slpc, true); + if (err) { + pr_err("Unable to turn off efficient freq!"); + return err; + } + intel_gt_pm_wait_for_idle(gt); intel_gt_pm_get(gt); for_each_engine(engine, gt, id) { @@ -358,9 +391,8 @@ static int run_test(struct intel_gt *gt, int test_type) break; } - /* Restore min/max frequencies */ - slpc_set_max_freq(slpc, slpc_max_freq); - slpc_set_min_freq(slpc, slpc_min_freq); + /* Restore min/max/efficient frequencies */ + err = slpc_restore_freq(slpc, slpc_min_freq, slpc_max_freq); if (igt_flush_test(gt->i915)) err = -EIO; From da3a99afd2575e744fe52cf661ce3f12feedd675 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Fri, 21 Apr 2023 16:59:48 +0300 Subject: [PATCH 034/108] drm/i915/rc6: throw out set() wrapper Remove useless indirection that's just misdirection for the readers. Signed-off-by: Jani Nikula Reviewed-by: Matt Roper Reviewed-by: Andrzej Hajda Link: https://patchwork.freedesktop.org/patch/msgid/20230421135948.2029121-1-jani.nikula@intel.com --- drivers/gpu/drm/i915/gt/intel_rc6.c | 157 ++++++++++++++-------------- 1 file changed, 76 insertions(+), 81 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_rc6.c b/drivers/gpu/drm/i915/gt/intel_rc6.c index 8f3cd68d14f8..908a3d0f2343 100644 --- a/drivers/gpu/drm/i915/gt/intel_rc6.c +++ b/drivers/gpu/drm/i915/gt/intel_rc6.c @@ -53,11 +53,6 @@ static struct drm_i915_private *rc6_to_i915(struct intel_rc6 *rc) return rc6_to_gt(rc)->i915; } -static void set(struct intel_uncore *uncore, i915_reg_t reg, u32 val) -{ - intel_uncore_write_fw(uncore, reg, val); -} - static void gen11_rc6_enable(struct intel_rc6 *rc6) { struct intel_gt *gt = rc6_to_gt(rc6); @@ -72,19 +67,19 @@ static void gen11_rc6_enable(struct intel_rc6 *rc6) */ if (!intel_uc_uses_guc_rc(>->uc)) { /* 2b: Program RC6 thresholds.*/ - set(uncore, GEN6_RC6_WAKE_RATE_LIMIT, 54 << 16 | 85); - set(uncore, GEN10_MEDIA_WAKE_RATE_LIMIT, 150); + intel_uncore_write_fw(uncore, GEN6_RC6_WAKE_RATE_LIMIT, 54 << 16 | 85); + intel_uncore_write_fw(uncore, GEN10_MEDIA_WAKE_RATE_LIMIT, 150); - set(uncore, GEN6_RC_EVALUATION_INTERVAL, 125000); /* 12500 * 1280ns */ - set(uncore, GEN6_RC_IDLE_HYSTERSIS, 25); /* 25 * 1280ns */ + intel_uncore_write_fw(uncore, GEN6_RC_EVALUATION_INTERVAL, 125000); /* 12500 * 1280ns */ + intel_uncore_write_fw(uncore, GEN6_RC_IDLE_HYSTERSIS, 25); /* 25 * 1280ns */ for_each_engine(engine, rc6_to_gt(rc6), id) - set(uncore, RING_MAX_IDLE(engine->mmio_base), 10); + intel_uncore_write_fw(uncore, RING_MAX_IDLE(engine->mmio_base), 10); - set(uncore, GUC_MAX_IDLE_COUNT, 0xA); + intel_uncore_write_fw(uncore, GUC_MAX_IDLE_COUNT, 0xA); - set(uncore, GEN6_RC_SLEEP, 0); + intel_uncore_write_fw(uncore, GEN6_RC_SLEEP, 0); - set(uncore, GEN6_RC6_THRESHOLD, 50000); /* 50/125ms per EI */ + intel_uncore_write_fw(uncore, GEN6_RC6_THRESHOLD, 50000); /* 50/125ms per EI */ } /* @@ -105,8 +100,8 @@ static void gen11_rc6_enable(struct intel_rc6 *rc6) * Broadwell+, To be conservative, we want to factor in a context * switch on top (due to ksoftirqd). */ - set(uncore, GEN9_MEDIA_PG_IDLE_HYSTERESIS, 60); - set(uncore, GEN9_RENDER_PG_IDLE_HYSTERESIS, 60); + intel_uncore_write_fw(uncore, GEN9_MEDIA_PG_IDLE_HYSTERESIS, 60); + intel_uncore_write_fw(uncore, GEN9_RENDER_PG_IDLE_HYSTERESIS, 60); /* 3a: Enable RC6 * @@ -141,7 +136,7 @@ static void gen11_rc6_enable(struct intel_rc6 *rc6) VDN_MFX_POWERGATE_ENABLE(i)); } - set(uncore, GEN9_PG_ENABLE, pg_enable); + intel_uncore_write_fw(uncore, GEN9_PG_ENABLE, pg_enable); } static void gen9_rc6_enable(struct intel_rc6 *rc6) @@ -152,26 +147,26 @@ static void gen9_rc6_enable(struct intel_rc6 *rc6) /* 2b: Program RC6 thresholds.*/ if (GRAPHICS_VER(rc6_to_i915(rc6)) >= 11) { - set(uncore, GEN6_RC6_WAKE_RATE_LIMIT, 54 << 16 | 85); - set(uncore, GEN10_MEDIA_WAKE_RATE_LIMIT, 150); + intel_uncore_write_fw(uncore, GEN6_RC6_WAKE_RATE_LIMIT, 54 << 16 | 85); + intel_uncore_write_fw(uncore, GEN10_MEDIA_WAKE_RATE_LIMIT, 150); } else if (IS_SKYLAKE(rc6_to_i915(rc6))) { /* * WaRsDoubleRc6WrlWithCoarsePowerGating:skl Doubling WRL only * when CPG is enabled */ - set(uncore, GEN6_RC6_WAKE_RATE_LIMIT, 108 << 16); + intel_uncore_write_fw(uncore, GEN6_RC6_WAKE_RATE_LIMIT, 108 << 16); } else { - set(uncore, GEN6_RC6_WAKE_RATE_LIMIT, 54 << 16); + intel_uncore_write_fw(uncore, GEN6_RC6_WAKE_RATE_LIMIT, 54 << 16); } - set(uncore, GEN6_RC_EVALUATION_INTERVAL, 125000); /* 12500 * 1280ns */ - set(uncore, GEN6_RC_IDLE_HYSTERSIS, 25); /* 25 * 1280ns */ + intel_uncore_write_fw(uncore, GEN6_RC_EVALUATION_INTERVAL, 125000); /* 12500 * 1280ns */ + intel_uncore_write_fw(uncore, GEN6_RC_IDLE_HYSTERSIS, 25); /* 25 * 1280ns */ for_each_engine(engine, rc6_to_gt(rc6), id) - set(uncore, RING_MAX_IDLE(engine->mmio_base), 10); + intel_uncore_write_fw(uncore, RING_MAX_IDLE(engine->mmio_base), 10); - set(uncore, GUC_MAX_IDLE_COUNT, 0xA); + intel_uncore_write_fw(uncore, GUC_MAX_IDLE_COUNT, 0xA); - set(uncore, GEN6_RC_SLEEP, 0); + intel_uncore_write_fw(uncore, GEN6_RC_SLEEP, 0); /* * 2c: Program Coarse Power Gating Policies. @@ -194,11 +189,11 @@ static void gen9_rc6_enable(struct intel_rc6 *rc6) * conservative, we have to factor in a context switch on top (due * to ksoftirqd). */ - set(uncore, GEN9_MEDIA_PG_IDLE_HYSTERESIS, 250); - set(uncore, GEN9_RENDER_PG_IDLE_HYSTERESIS, 250); + intel_uncore_write_fw(uncore, GEN9_MEDIA_PG_IDLE_HYSTERESIS, 250); + intel_uncore_write_fw(uncore, GEN9_RENDER_PG_IDLE_HYSTERESIS, 250); /* 3a: Enable RC6 */ - set(uncore, GEN6_RC6_THRESHOLD, 37500); /* 37.5/125ms per EI */ + intel_uncore_write_fw(uncore, GEN6_RC6_THRESHOLD, 37500); /* 37.5/125ms per EI */ rc6->ctl_enable = GEN6_RC_CTL_HW_ENABLE | @@ -210,8 +205,8 @@ static void gen9_rc6_enable(struct intel_rc6 *rc6) * - Render/Media PG need to be disabled with RC6. */ if (!NEEDS_WaRsDisableCoarsePowerGating(rc6_to_i915(rc6))) - set(uncore, GEN9_PG_ENABLE, - GEN9_RENDER_PG_ENABLE | GEN9_MEDIA_PG_ENABLE); + intel_uncore_write_fw(uncore, GEN9_PG_ENABLE, + GEN9_RENDER_PG_ENABLE | GEN9_MEDIA_PG_ENABLE); } static void gen8_rc6_enable(struct intel_rc6 *rc6) @@ -221,13 +216,13 @@ static void gen8_rc6_enable(struct intel_rc6 *rc6) enum intel_engine_id id; /* 2b: Program RC6 thresholds.*/ - set(uncore, GEN6_RC6_WAKE_RATE_LIMIT, 40 << 16); - set(uncore, GEN6_RC_EVALUATION_INTERVAL, 125000); /* 12500 * 1280ns */ - set(uncore, GEN6_RC_IDLE_HYSTERSIS, 25); /* 25 * 1280ns */ + intel_uncore_write_fw(uncore, GEN6_RC6_WAKE_RATE_LIMIT, 40 << 16); + intel_uncore_write_fw(uncore, GEN6_RC_EVALUATION_INTERVAL, 125000); /* 12500 * 1280ns */ + intel_uncore_write_fw(uncore, GEN6_RC_IDLE_HYSTERSIS, 25); /* 25 * 1280ns */ for_each_engine(engine, rc6_to_gt(rc6), id) - set(uncore, RING_MAX_IDLE(engine->mmio_base), 10); - set(uncore, GEN6_RC_SLEEP, 0); - set(uncore, GEN6_RC6_THRESHOLD, 625); /* 800us/1.28 for TO */ + intel_uncore_write_fw(uncore, RING_MAX_IDLE(engine->mmio_base), 10); + intel_uncore_write_fw(uncore, GEN6_RC_SLEEP, 0); + intel_uncore_write_fw(uncore, GEN6_RC6_THRESHOLD, 625); /* 800us/1.28 for TO */ /* 3: Enable RC6 */ rc6->ctl_enable = @@ -245,20 +240,20 @@ static void gen6_rc6_enable(struct intel_rc6 *rc6) u32 rc6vids, rc6_mask; int ret; - set(uncore, GEN6_RC1_WAKE_RATE_LIMIT, 1000 << 16); - set(uncore, GEN6_RC6_WAKE_RATE_LIMIT, 40 << 16 | 30); - set(uncore, GEN6_RC6pp_WAKE_RATE_LIMIT, 30); - set(uncore, GEN6_RC_EVALUATION_INTERVAL, 125000); - set(uncore, GEN6_RC_IDLE_HYSTERSIS, 25); + intel_uncore_write_fw(uncore, GEN6_RC1_WAKE_RATE_LIMIT, 1000 << 16); + intel_uncore_write_fw(uncore, GEN6_RC6_WAKE_RATE_LIMIT, 40 << 16 | 30); + intel_uncore_write_fw(uncore, GEN6_RC6pp_WAKE_RATE_LIMIT, 30); + intel_uncore_write_fw(uncore, GEN6_RC_EVALUATION_INTERVAL, 125000); + intel_uncore_write_fw(uncore, GEN6_RC_IDLE_HYSTERSIS, 25); for_each_engine(engine, rc6_to_gt(rc6), id) - set(uncore, RING_MAX_IDLE(engine->mmio_base), 10); + intel_uncore_write_fw(uncore, RING_MAX_IDLE(engine->mmio_base), 10); - set(uncore, GEN6_RC_SLEEP, 0); - set(uncore, GEN6_RC1e_THRESHOLD, 1000); - set(uncore, GEN6_RC6_THRESHOLD, 50000); - set(uncore, GEN6_RC6p_THRESHOLD, 150000); - set(uncore, GEN6_RC6pp_THRESHOLD, 64000); /* unused */ + intel_uncore_write_fw(uncore, GEN6_RC_SLEEP, 0); + intel_uncore_write_fw(uncore, GEN6_RC1e_THRESHOLD, 1000); + intel_uncore_write_fw(uncore, GEN6_RC6_THRESHOLD, 50000); + intel_uncore_write_fw(uncore, GEN6_RC6p_THRESHOLD, 150000); + intel_uncore_write_fw(uncore, GEN6_RC6pp_THRESHOLD, 64000); /* unused */ /* We don't use those on Haswell */ rc6_mask = GEN6_RC_CTL_RC6_ENABLE; @@ -372,22 +367,22 @@ static void chv_rc6_enable(struct intel_rc6 *rc6) enum intel_engine_id id; /* 2a: Program RC6 thresholds.*/ - set(uncore, GEN6_RC6_WAKE_RATE_LIMIT, 40 << 16); - set(uncore, GEN6_RC_EVALUATION_INTERVAL, 125000); /* 12500 * 1280ns */ - set(uncore, GEN6_RC_IDLE_HYSTERSIS, 25); /* 25 * 1280ns */ + intel_uncore_write_fw(uncore, GEN6_RC6_WAKE_RATE_LIMIT, 40 << 16); + intel_uncore_write_fw(uncore, GEN6_RC_EVALUATION_INTERVAL, 125000); /* 12500 * 1280ns */ + intel_uncore_write_fw(uncore, GEN6_RC_IDLE_HYSTERSIS, 25); /* 25 * 1280ns */ for_each_engine(engine, rc6_to_gt(rc6), id) - set(uncore, RING_MAX_IDLE(engine->mmio_base), 10); - set(uncore, GEN6_RC_SLEEP, 0); + intel_uncore_write_fw(uncore, RING_MAX_IDLE(engine->mmio_base), 10); + intel_uncore_write_fw(uncore, GEN6_RC_SLEEP, 0); /* TO threshold set to 500 us (0x186 * 1.28 us) */ - set(uncore, GEN6_RC6_THRESHOLD, 0x186); + intel_uncore_write_fw(uncore, GEN6_RC6_THRESHOLD, 0x186); /* Allows RC6 residency counter to work */ - set(uncore, VLV_COUNTER_CONTROL, - _MASKED_BIT_ENABLE(VLV_COUNT_RANGE_HIGH | - VLV_MEDIA_RC6_COUNT_EN | - VLV_RENDER_RC6_COUNT_EN)); + intel_uncore_write_fw(uncore, VLV_COUNTER_CONTROL, + _MASKED_BIT_ENABLE(VLV_COUNT_RANGE_HIGH | + VLV_MEDIA_RC6_COUNT_EN | + VLV_RENDER_RC6_COUNT_EN)); /* 3: Enable RC6 */ rc6->ctl_enable = GEN7_RC_CTL_TO_MODE; @@ -399,22 +394,22 @@ static void vlv_rc6_enable(struct intel_rc6 *rc6) struct intel_engine_cs *engine; enum intel_engine_id id; - set(uncore, GEN6_RC6_WAKE_RATE_LIMIT, 0x00280000); - set(uncore, GEN6_RC_EVALUATION_INTERVAL, 125000); - set(uncore, GEN6_RC_IDLE_HYSTERSIS, 25); + intel_uncore_write_fw(uncore, GEN6_RC6_WAKE_RATE_LIMIT, 0x00280000); + intel_uncore_write_fw(uncore, GEN6_RC_EVALUATION_INTERVAL, 125000); + intel_uncore_write_fw(uncore, GEN6_RC_IDLE_HYSTERSIS, 25); for_each_engine(engine, rc6_to_gt(rc6), id) - set(uncore, RING_MAX_IDLE(engine->mmio_base), 10); + intel_uncore_write_fw(uncore, RING_MAX_IDLE(engine->mmio_base), 10); - set(uncore, GEN6_RC6_THRESHOLD, 0x557); + intel_uncore_write_fw(uncore, GEN6_RC6_THRESHOLD, 0x557); /* Allows RC6 residency counter to work */ - set(uncore, VLV_COUNTER_CONTROL, - _MASKED_BIT_ENABLE(VLV_COUNT_RANGE_HIGH | - VLV_MEDIA_RC0_COUNT_EN | - VLV_RENDER_RC0_COUNT_EN | - VLV_MEDIA_RC6_COUNT_EN | - VLV_RENDER_RC6_COUNT_EN)); + intel_uncore_write_fw(uncore, VLV_COUNTER_CONTROL, + _MASKED_BIT_ENABLE(VLV_COUNT_RANGE_HIGH | + VLV_MEDIA_RC0_COUNT_EN | + VLV_RENDER_RC0_COUNT_EN | + VLV_MEDIA_RC6_COUNT_EN | + VLV_RENDER_RC6_COUNT_EN)); rc6->ctl_enable = GEN7_RC_CTL_TO_MODE | VLV_RC_CTL_CTX_RST_PARALLEL; @@ -575,9 +570,9 @@ static void __intel_rc6_disable(struct intel_rc6 *rc6) intel_uncore_forcewake_get(uncore, FORCEWAKE_ALL); if (GRAPHICS_VER(i915) >= 9) - set(uncore, GEN9_PG_ENABLE, 0); - set(uncore, GEN6_RC_CONTROL, 0); - set(uncore, GEN6_RC_STATE, 0); + intel_uncore_write_fw(uncore, GEN9_PG_ENABLE, 0); + intel_uncore_write_fw(uncore, GEN6_RC_CONTROL, 0); + intel_uncore_write_fw(uncore, GEN6_RC_STATE, 0); intel_uncore_forcewake_put(uncore, FORCEWAKE_ALL); } @@ -684,7 +679,7 @@ void intel_rc6_unpark(struct intel_rc6 *rc6) return; /* Restore HW timers for automatic RC6 entry while busy */ - set(uncore, GEN6_RC_CONTROL, rc6->ctl_enable); + intel_uncore_write_fw(uncore, GEN6_RC_CONTROL, rc6->ctl_enable); } void intel_rc6_park(struct intel_rc6 *rc6) @@ -704,7 +699,7 @@ void intel_rc6_park(struct intel_rc6 *rc6) return; /* Turn off the HW timers and go directly to rc6 */ - set(uncore, GEN6_RC_CONTROL, GEN6_RC_CTL_RC6_ENABLE); + intel_uncore_write_fw(uncore, GEN6_RC_CONTROL, GEN6_RC_CTL_RC6_ENABLE); if (HAS_RC6pp(rc6_to_i915(rc6))) target = 0x6; /* deepest rc6 */ @@ -712,7 +707,7 @@ void intel_rc6_park(struct intel_rc6 *rc6) target = 0x5; /* deep rc6 */ else target = 0x4; /* normal rc6 */ - set(uncore, GEN6_RC_STATE, target << RC_SW_TARGET_STATE_SHIFT); + intel_uncore_write_fw(uncore, GEN6_RC_STATE, target << RC_SW_TARGET_STATE_SHIFT); } void intel_rc6_disable(struct intel_rc6 *rc6) @@ -735,7 +730,7 @@ void intel_rc6_fini(struct intel_rc6 *rc6) /* We want the BIOS C6 state preserved across loads for MTL */ if (IS_METEORLAKE(rc6_to_i915(rc6)) && rc6->bios_state_captured) - set(uncore, GEN6_RC_STATE, rc6->bios_rc_state); + intel_uncore_write_fw(uncore, GEN6_RC_STATE, rc6->bios_rc_state); pctx = fetch_and_zero(&rc6->pctx); if (pctx) @@ -766,18 +761,18 @@ static u64 vlv_residency_raw(struct intel_uncore *uncore, const i915_reg_t reg) * before we have set the default VLV_COUNTER_CONTROL value. So always * set the high bit to be safe. */ - set(uncore, VLV_COUNTER_CONTROL, - _MASKED_BIT_ENABLE(VLV_COUNT_RANGE_HIGH)); + intel_uncore_write_fw(uncore, VLV_COUNTER_CONTROL, + _MASKED_BIT_ENABLE(VLV_COUNT_RANGE_HIGH)); upper = intel_uncore_read_fw(uncore, reg); do { tmp = upper; - set(uncore, VLV_COUNTER_CONTROL, - _MASKED_BIT_DISABLE(VLV_COUNT_RANGE_HIGH)); + intel_uncore_write_fw(uncore, VLV_COUNTER_CONTROL, + _MASKED_BIT_DISABLE(VLV_COUNT_RANGE_HIGH)); lower = intel_uncore_read_fw(uncore, reg); - set(uncore, VLV_COUNTER_CONTROL, - _MASKED_BIT_ENABLE(VLV_COUNT_RANGE_HIGH)); + intel_uncore_write_fw(uncore, VLV_COUNTER_CONTROL, + _MASKED_BIT_ENABLE(VLV_COUNT_RANGE_HIGH)); upper = intel_uncore_read_fw(uncore, reg); } while (upper != tmp && --loop); From 5aa857db54ad2e963d65e234f7e4c183cd89c993 Mon Sep 17 00:00:00 2001 From: Umesh Nerlige Ramappa Date: Thu, 27 Apr 2023 15:47:04 -0700 Subject: [PATCH 035/108] i915/pmu: Add support for total context runtime for GuC back-end GPU accumulates the context runtime in a 32 bit counter - CTX_TIMESTAMP in the context image. This value is saved/restored on context switches. KMD accumulates these values into a 64 bit counter taking care of any overflows as needed. This count provides the basis for client specific busyness in the fdinfo interface. KMD accumulation happens just before the context is unpinned and when context switches out. This works for execlist back-end since execlist scheduling has visibility into context switches. With GuC mode, KMD does not have visibility into context switches and this counter is accumulated only when context is unpinned. Context is unpinned once the context scheduling is successfully disabled. Disabling context scheduling is an asynchronous operation. Also if a context is servicing frequent requests, scheduling may never be disabled on it. For GuC mode, since updates to the context runtime may be delayed, add hooks to update the context runtime in a worker thread as well as when a user queries for it. Limitation: - If a context is never switched out or runs for a long period of time, the runtime value of CTX_TIMESTAMP may never be updated, so the counter value may be unreliable. This patch does not support such cases. Such support must be available from the GuC FW and it is WIP. This patch is an extract from previous work authored by John/Umesh here - https://patchwork.freedesktop.org/patch/496441/?series=105085&rev=4 v2: (Ashutosh) - Drop COPS_RUNTIME_ACTIVE_TOTAL - s/guc_context_update_clks/__guc_context_update_stats - Pin context before accessing in guc_timestamp_ping - In guc_context_unpin, use spinlock to serialize access to runtime stats Signed-off-by: Umesh Nerlige Ramappa Co-developed-by: John Harrison Signed-off-by: John Harrison Reviewed-by: Ashutosh Dixit Link: https://patchwork.freedesktop.org/patch/msgid/20230427224705.2785566-2-umesh.nerlige.ramappa@intel.com --- drivers/gpu/drm/i915/gt/intel_context.c | 5 ++- drivers/gpu/drm/i915/gt/intel_context.h | 2 +- drivers/gpu/drm/i915/gt/intel_context_types.h | 2 ++ .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 31 +++++++++++++++++++ 4 files changed, 38 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_context.c b/drivers/gpu/drm/i915/gt/intel_context.c index 2aa63ec521b8..a53b26178f0a 100644 --- a/drivers/gpu/drm/i915/gt/intel_context.c +++ b/drivers/gpu/drm/i915/gt/intel_context.c @@ -578,10 +578,13 @@ void intel_context_bind_parent_child(struct intel_context *parent, child->parallel.parent = parent; } -u64 intel_context_get_total_runtime_ns(const struct intel_context *ce) +u64 intel_context_get_total_runtime_ns(struct intel_context *ce) { u64 total, active; + if (ce->ops->update_stats) + ce->ops->update_stats(ce); + total = ce->stats.runtime.total; if (ce->ops->flags & COPS_RUNTIME_CYCLES) total *= ce->engine->gt->clock_period_ns; diff --git a/drivers/gpu/drm/i915/gt/intel_context.h b/drivers/gpu/drm/i915/gt/intel_context.h index 48f888c3da08..43ed92f8dc83 100644 --- a/drivers/gpu/drm/i915/gt/intel_context.h +++ b/drivers/gpu/drm/i915/gt/intel_context.h @@ -375,7 +375,7 @@ intel_context_clear_nopreempt(struct intel_context *ce) clear_bit(CONTEXT_NOPREEMPT, &ce->flags); } -u64 intel_context_get_total_runtime_ns(const struct intel_context *ce); +u64 intel_context_get_total_runtime_ns(struct intel_context *ce); u64 intel_context_get_avg_runtime_ns(struct intel_context *ce); static inline u64 intel_context_clock(void) diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h b/drivers/gpu/drm/i915/gt/intel_context_types.h index e36670f2e626..aceaac28a33e 100644 --- a/drivers/gpu/drm/i915/gt/intel_context_types.h +++ b/drivers/gpu/drm/i915/gt/intel_context_types.h @@ -58,6 +58,8 @@ struct intel_context_ops { void (*sched_disable)(struct intel_context *ce); + void (*update_stats)(struct intel_context *ce); + void (*reset)(struct intel_context *ce); void (*destroy)(struct kref *kref); diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c index ee3e8352637f..c869ddc73e69 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c @@ -1402,13 +1402,34 @@ static void __update_guc_busyness_stats(struct intel_guc *guc) spin_unlock_irqrestore(&guc->timestamp.lock, flags); } +static void __guc_context_update_stats(struct intel_context *ce) +{ + struct intel_guc *guc = ce_to_guc(ce); + unsigned long flags; + + spin_lock_irqsave(&guc->timestamp.lock, flags); + lrc_update_runtime(ce); + spin_unlock_irqrestore(&guc->timestamp.lock, flags); +} + +static void guc_context_update_stats(struct intel_context *ce) +{ + if (!intel_context_pin_if_active(ce)) + return; + + __guc_context_update_stats(ce); + intel_context_unpin(ce); +} + static void guc_timestamp_ping(struct work_struct *wrk) { struct intel_guc *guc = container_of(wrk, typeof(*guc), timestamp.work.work); struct intel_uc *uc = container_of(guc, typeof(*uc), guc); struct intel_gt *gt = guc_to_gt(guc); + struct intel_context *ce; intel_wakeref_t wakeref; + unsigned long index; int srcu, ret; /* @@ -1424,6 +1445,10 @@ static void guc_timestamp_ping(struct work_struct *wrk) with_intel_runtime_pm(>->i915->runtime_pm, wakeref) __update_guc_busyness_stats(guc); + /* adjust context stats for overflow */ + xa_for_each(&guc->context_lookup, index, ce) + guc_context_update_stats(ce); + intel_gt_reset_unlock(gt, srcu); guc_enable_busyness_worker(guc); @@ -2774,6 +2799,7 @@ static void guc_context_unpin(struct intel_context *ce) { struct intel_guc *guc = ce_to_guc(ce); + __guc_context_update_stats(ce); unpin_guc_id(guc, ce); lrc_unpin(ce); @@ -3455,6 +3481,7 @@ static void remove_from_context(struct i915_request *rq) } static const struct intel_context_ops guc_context_ops = { + .flags = COPS_RUNTIME_CYCLES, .alloc = guc_context_alloc, .close = guc_context_close, @@ -3473,6 +3500,8 @@ static const struct intel_context_ops guc_context_ops = { .sched_disable = guc_context_sched_disable, + .update_stats = guc_context_update_stats, + .reset = lrc_reset, .destroy = guc_context_destroy, @@ -3728,6 +3757,7 @@ static int guc_virtual_context_alloc(struct intel_context *ce) } static const struct intel_context_ops virtual_guc_context_ops = { + .flags = COPS_RUNTIME_CYCLES, .alloc = guc_virtual_context_alloc, .close = guc_context_close, @@ -3745,6 +3775,7 @@ static const struct intel_context_ops virtual_guc_context_ops = { .exit = guc_virtual_context_exit, .sched_disable = guc_context_sched_disable, + .update_stats = guc_context_update_stats, .destroy = guc_context_destroy, From 1324680a80ebf6eaba112bd54efa19e23081797d Mon Sep 17 00:00:00 2001 From: Umesh Nerlige Ramappa Date: Thu, 27 Apr 2023 15:47:05 -0700 Subject: [PATCH 036/108] drm/i915/fdinfo: Enable fdinfo for GuC backends Enable fdinfo for GuC based platforms with the exception that long running contexts will not provide reliable busyness data unless they switch out at some reasonable point in time. Link: https://gitlab.freedesktop.org/drm/intel/issues/8303 Signed-off-by: Umesh Nerlige Ramappa Reviewed-by: Ashutosh Dixit Link: https://patchwork.freedesktop.org/patch/msgid/20230427224705.2785566-3-umesh.nerlige.ramappa@intel.com --- drivers/gpu/drm/i915/i915_drm_client.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drm_client.c b/drivers/gpu/drm/i915/i915_drm_client.c index e8fa172ebe5e..d18d0a3ed905 100644 --- a/drivers/gpu/drm/i915/i915_drm_client.c +++ b/drivers/gpu/drm/i915/i915_drm_client.c @@ -147,11 +147,7 @@ void i915_drm_client_fdinfo(struct seq_file *m, struct file *f) PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn)); seq_printf(m, "drm-client-id:\t%u\n", client->id); - /* - * Temporarily skip showing client engine information with GuC submission till - * fetching engine busyness is implemented in the GuC submission backend - */ - if (GRAPHICS_VER(i915) < 8 || intel_uc_uses_guc_submission(&i915->gt0.uc)) + if (GRAPHICS_VER(i915) < 8) return; for (i = 0; i < ARRAY_SIZE(uabi_class_names); i++) From 860cf3bd715b3455e1c708386e773a669a2350ee Mon Sep 17 00:00:00 2001 From: Tejas Upadhyay Date: Fri, 28 Apr 2023 18:29:51 +0530 Subject: [PATCH 037/108] drm/i915/gt: Use gt_err for GT info It will be more informative regarding GT if we use gt_err instead. Cc: Andi Shyti Signed-off-by: Tejas Upadhyay Reviewed-by: Andi Shyti Signed-off-by: Andi Shyti Link: https://patchwork.freedesktop.org/patch/msgid/20230428125952.4037964-2-tejas.upadhyay@intel.com --- drivers/gpu/drm/i915/gt/selftest_engine_pm.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gt/selftest_engine_pm.c b/drivers/gpu/drm/i915/gt/selftest_engine_pm.c index 87c94314cf67..10e556a7eac4 100644 --- a/drivers/gpu/drm/i915/gt/selftest_engine_pm.c +++ b/drivers/gpu/drm/i915/gt/selftest_engine_pm.c @@ -5,6 +5,7 @@ #include +#include "gt/intel_gt_print.h" #include "i915_selftest.h" #include "intel_engine_regs.h" #include "intel_gpu_commands.h" @@ -402,7 +403,7 @@ static int live_engine_pm(void *arg) /* gt wakeref is async (deferred to workqueue) */ if (intel_gt_pm_wait_for_idle(gt)) { - pr_err("GT failed to idle\n"); + gt_err(gt, "GT failed to idle\n"); return -EINVAL; } } From 725859b98a8eeb50c9a711b323e0cffc367873ea Mon Sep 17 00:00:00 2001 From: Tejas Upadhyay Date: Fri, 28 Apr 2023 18:29:52 +0530 Subject: [PATCH 038/108] drm/i915/selftests: Use gt_err for GT info It will be more informative regarding GT if we use gt_err instead. Cc: Andi Shyti Signed-off-by: Tejas Upadhyay Reviewed-by: Andi Shyti Signed-off-by: Andi Shyti Link: https://patchwork.freedesktop.org/patch/msgid/20230428125952.4037964-3-tejas.upadhyay@intel.com --- drivers/gpu/drm/i915/selftests/i915_gem_evict.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_evict.c b/drivers/gpu/drm/i915/selftests/i915_gem_evict.c index 37068542aafe..f68ef4074088 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_evict.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_evict.c @@ -27,6 +27,7 @@ #include "gem/selftests/igt_gem_utils.h" #include "gem/selftests/mock_context.h" #include "gt/intel_gt.h" +#include "gt/intel_gt_print.h" #include "i915_selftest.h" @@ -507,7 +508,8 @@ static int igt_evict_contexts(void *arg) } err = intel_gt_wait_for_idle(engine->gt, HZ * 3); if (err) { - pr_err("Failed to idle GT (on %s)", engine->name); + gt_err(engine->gt, "Failed to idle GT (on %s)", + engine->name); break; } } From b049132d61336f643d8faf2f6574b063667088cf Mon Sep 17 00:00:00 2001 From: John Harrison Date: Fri, 28 Apr 2023 11:56:33 -0700 Subject: [PATCH 039/108] drm/i915/guc: Don't capture Gen8 regs on Xe devices A pair of pre-Xe registers were being included in the Xe capture list. GuC was rejecting those as being invalid and logging errors about them. So, stop doing it. Signed-off-by: John Harrison Reviewed-by: Alan Previn Fixes: dce2bd542337 ("drm/i915/guc: Add Gen9 registers for GuC error state capture.") Cc: Alan Previn Cc: Umesh Nerlige Ramappa Cc: Lucas De Marchi Cc: John Harrison Cc: Jani Nikula Cc: Matt Roper Cc: Balasubramani Vivekanandan Cc: Daniele Ceraolo Spurio Link: https://patchwork.freedesktop.org/patch/msgid/20230428185636.457407-2-John.C.Harrison@Intel.com --- drivers/gpu/drm/i915/gt/uc/intel_guc_capture.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_capture.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_capture.c index cf49188db6a6..e0e793167d61 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_capture.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_capture.c @@ -31,12 +31,14 @@ { FORCEWAKE_MT, 0, 0, "FORCEWAKE" } #define COMMON_GEN9BASE_GLOBAL \ - { GEN8_FAULT_TLB_DATA0, 0, 0, "GEN8_FAULT_TLB_DATA0" }, \ - { GEN8_FAULT_TLB_DATA1, 0, 0, "GEN8_FAULT_TLB_DATA1" }, \ { ERROR_GEN6, 0, 0, "ERROR_GEN6" }, \ { DONE_REG, 0, 0, "DONE_REG" }, \ { HSW_GTT_CACHE_EN, 0, 0, "HSW_GTT_CACHE_EN" } +#define GEN9_GLOBAL \ + { GEN8_FAULT_TLB_DATA0, 0, 0, "GEN8_FAULT_TLB_DATA0" }, \ + { GEN8_FAULT_TLB_DATA1, 0, 0, "GEN8_FAULT_TLB_DATA1" } + #define COMMON_GEN12BASE_GLOBAL \ { GEN12_FAULT_TLB_DATA0, 0, 0, "GEN12_FAULT_TLB_DATA0" }, \ { GEN12_FAULT_TLB_DATA1, 0, 0, "GEN12_FAULT_TLB_DATA1" }, \ @@ -142,6 +144,7 @@ static const struct __guc_mmio_reg_descr xe_lpd_gsc_inst_regs[] = { static const struct __guc_mmio_reg_descr default_global_regs[] = { COMMON_BASE_GLOBAL, COMMON_GEN9BASE_GLOBAL, + GEN9_GLOBAL, }; static const struct __guc_mmio_reg_descr default_rc_class_regs[] = { From 684ee005d670a1b6360ba53db24b808f7584c89b Mon Sep 17 00:00:00 2001 From: John Harrison Date: Fri, 28 Apr 2023 11:56:34 -0700 Subject: [PATCH 040/108] drm/i915/guc: Consolidate duplicated capture list code Remove 99% duplicated steered register list code. Also, include the pre-Xe steered registers in the pre-Xe list generation. Signed-off-by: John Harrison Reviewed-by: Alan Previn Link: https://patchwork.freedesktop.org/patch/msgid/20230428185636.457407-3-John.C.Harrison@Intel.com --- .../gpu/drm/i915/gt/uc/intel_guc_capture.c | 112 +++++------------- 1 file changed, 29 insertions(+), 83 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_capture.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_capture.c index e0e793167d61..9184d2595e4c 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_capture.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_capture.c @@ -260,11 +260,15 @@ struct __ext_steer_reg { i915_mcr_reg_t reg; }; -static const struct __ext_steer_reg xe_extregs[] = { +static const struct __ext_steer_reg gen8_extregs[] = { {"GEN8_SAMPLER_INSTDONE", GEN8_SAMPLER_INSTDONE}, {"GEN8_ROW_INSTDONE", GEN8_ROW_INSTDONE} }; +static const struct __ext_steer_reg xehpg_extregs[] = { + {"XEHPG_INSTDONE_GEOM_SVG", XEHPG_INSTDONE_GEOM_SVG} +}; + static void __fill_ext_reg(struct __guc_mmio_reg_descr *ext, const struct __ext_steer_reg *extlist, int slice_id, int subslice_id) @@ -295,8 +299,8 @@ __alloc_ext_regs(struct __guc_mmio_reg_descr_group *newlist, } static void -guc_capture_alloc_steered_lists_xe_lpd(struct intel_guc *guc, - const struct __guc_mmio_reg_descr_group *lists) +guc_capture_alloc_steered_lists(struct intel_guc *guc, + const struct __guc_mmio_reg_descr_group *lists) { struct intel_gt *gt = guc_to_gt(guc); int slice, subslice, iter, i, num_steer_regs, num_tot_regs = 0; @@ -304,74 +308,19 @@ guc_capture_alloc_steered_lists_xe_lpd(struct intel_guc *guc, struct __guc_mmio_reg_descr_group *extlists; struct __guc_mmio_reg_descr *extarray; struct sseu_dev_info *sseu; + bool has_xehpg_extregs; - /* In XE_LPD we only have steered registers for the render-class */ + /* steered registers currently only exist for the render-class */ list = guc_capture_get_one_list(lists, GUC_CAPTURE_LIST_INDEX_PF, GUC_CAPTURE_LIST_TYPE_ENGINE_CLASS, GUC_RENDER_CLASS); /* skip if extlists was previously allocated */ if (!list || guc->capture->extlists) return; - num_steer_regs = ARRAY_SIZE(xe_extregs); + has_xehpg_extregs = GRAPHICS_VER_FULL(gt->i915) >= IP_VER(12, 55); - sseu = >->info.sseu; - for_each_ss_steering(iter, gt, slice, subslice) - num_tot_regs += num_steer_regs; - - if (!num_tot_regs) - return; - - /* allocate an extra for an end marker */ - extlists = kcalloc(2, sizeof(struct __guc_mmio_reg_descr_group), GFP_KERNEL); - if (!extlists) - return; - - if (__alloc_ext_regs(&extlists[0], list, num_tot_regs)) { - kfree(extlists); - return; - } - - extarray = extlists[0].extlist; - for_each_ss_steering(iter, gt, slice, subslice) { - for (i = 0; i < num_steer_regs; ++i) { - __fill_ext_reg(extarray, &xe_extregs[i], slice, subslice); - ++extarray; - } - } - - guc->capture->extlists = extlists; -} - -static const struct __ext_steer_reg xehpg_extregs[] = { - {"XEHPG_INSTDONE_GEOM_SVG", XEHPG_INSTDONE_GEOM_SVG} -}; - -static bool __has_xehpg_extregs(u32 ipver) -{ - return (ipver >= IP_VER(12, 55)); -} - -static void -guc_capture_alloc_steered_lists_xe_hpg(struct intel_guc *guc, - const struct __guc_mmio_reg_descr_group *lists, - u32 ipver) -{ - struct intel_gt *gt = guc_to_gt(guc); - struct sseu_dev_info *sseu; - int slice, subslice, i, iter, num_steer_regs, num_tot_regs = 0; - const struct __guc_mmio_reg_descr_group *list; - struct __guc_mmio_reg_descr_group *extlists; - struct __guc_mmio_reg_descr *extarray; - - /* In XE_LP / HPG we only have render-class steering registers during error-capture */ - list = guc_capture_get_one_list(lists, GUC_CAPTURE_LIST_INDEX_PF, - GUC_CAPTURE_LIST_TYPE_ENGINE_CLASS, GUC_RENDER_CLASS); - /* skip if extlists was previously allocated */ - if (!list || guc->capture->extlists) - return; - - num_steer_regs = ARRAY_SIZE(xe_extregs); - if (__has_xehpg_extregs(ipver)) + num_steer_regs = ARRAY_SIZE(gen8_extregs); + if (has_xehpg_extregs) num_steer_regs += ARRAY_SIZE(xehpg_extregs); sseu = >->info.sseu; @@ -393,11 +342,12 @@ guc_capture_alloc_steered_lists_xe_hpg(struct intel_guc *guc, extarray = extlists[0].extlist; for_each_ss_steering(iter, gt, slice, subslice) { - for (i = 0; i < ARRAY_SIZE(xe_extregs); ++i) { - __fill_ext_reg(extarray, &xe_extregs[i], slice, subslice); + for (i = 0; i < ARRAY_SIZE(gen8_extregs); ++i) { + __fill_ext_reg(extarray, &gen8_extregs[i], slice, subslice); ++extarray; } - if (__has_xehpg_extregs(ipver)) { + + if (has_xehpg_extregs) { for (i = 0; i < ARRAY_SIZE(xehpg_extregs); ++i) { __fill_ext_reg(extarray, &xehpg_extregs[i], slice, subslice); ++extarray; @@ -413,26 +363,22 @@ static const struct __guc_mmio_reg_descr_group * guc_capture_get_device_reglist(struct intel_guc *guc) { struct drm_i915_private *i915 = guc_to_gt(guc)->i915; + const struct __guc_mmio_reg_descr_group *lists; - if (GRAPHICS_VER(i915) > 11) { - /* - * For certain engine classes, there are slice and subslice - * level registers requiring steering. We allocate and populate - * these at init time based on hw config add it as an extension - * list at the end of the pre-populated render list. - */ - if (IS_DG2(i915)) - guc_capture_alloc_steered_lists_xe_hpg(guc, xe_lpd_lists, IP_VER(12, 55)); - else if (IS_XEHPSDV(i915)) - guc_capture_alloc_steered_lists_xe_hpg(guc, xe_lpd_lists, IP_VER(12, 50)); - else - guc_capture_alloc_steered_lists_xe_lpd(guc, xe_lpd_lists); + if (GRAPHICS_VER(i915) >= 12) + lists = xe_lpd_lists; + else + lists = default_lists; - return xe_lpd_lists; - } + /* + * For certain engine classes, there are slice and subslice + * level registers requiring steering. We allocate and populate + * these at init time based on hw config add it as an extension + * list at the end of the pre-populated render list. + */ + guc_capture_alloc_steered_lists(guc, lists); - /* if GuC submission is enabled on a non-POR platform, just use a common baseline */ - return default_lists; + return lists; } static const char * From 44e36855bdb72cc2ade0153dce4a08a5b830a3a6 Mon Sep 17 00:00:00 2001 From: John Harrison Date: Fri, 28 Apr 2023 11:56:35 -0700 Subject: [PATCH 041/108] drm/i915/guc: Capture list naming clean up Don't use 'xe_lp*' prefixes for register lists that are common with Gen8. Don't add Xe only GSC registers to pre-Xe devices that don't even have a GSC engine. Fix Xe_LP name. Don't use GEN9 as a prefix for register lists that contain all GEN8 registers. Rename the 'default_' register list prefix to 'gen8_' as that is the more accurate name. Signed-off-by: John Harrison Reviewed-by: Alan Previn Link: https://patchwork.freedesktop.org/patch/msgid/20230428185636.457407-4-John.C.Harrison@Intel.com --- .../gpu/drm/i915/gt/uc/intel_guc_capture.c | 100 +++++++++--------- 1 file changed, 49 insertions(+), 51 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_capture.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_capture.c index 9184d2595e4c..729a8fcf20dd 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_capture.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_capture.c @@ -30,12 +30,12 @@ #define COMMON_BASE_GLOBAL \ { FORCEWAKE_MT, 0, 0, "FORCEWAKE" } -#define COMMON_GEN9BASE_GLOBAL \ +#define COMMON_GEN8BASE_GLOBAL \ { ERROR_GEN6, 0, 0, "ERROR_GEN6" }, \ { DONE_REG, 0, 0, "DONE_REG" }, \ { HSW_GTT_CACHE_EN, 0, 0, "HSW_GTT_CACHE_EN" } -#define GEN9_GLOBAL \ +#define GEN8_GLOBAL \ { GEN8_FAULT_TLB_DATA0, 0, 0, "GEN8_FAULT_TLB_DATA0" }, \ { GEN8_FAULT_TLB_DATA1, 0, 0, "GEN8_FAULT_TLB_DATA1" } @@ -96,67 +96,65 @@ { GEN12_SFC_DONE(2), 0, 0, "SFC_DONE[2]" }, \ { GEN12_SFC_DONE(3), 0, 0, "SFC_DONE[3]" } -/* XE_LPD - Global */ -static const struct __guc_mmio_reg_descr xe_lpd_global_regs[] = { +/* XE_LP Global */ +static const struct __guc_mmio_reg_descr xe_lp_global_regs[] = { COMMON_BASE_GLOBAL, - COMMON_GEN9BASE_GLOBAL, + COMMON_GEN8BASE_GLOBAL, COMMON_GEN12BASE_GLOBAL, }; -/* XE_LPD - Render / Compute Per-Class */ -static const struct __guc_mmio_reg_descr xe_lpd_rc_class_regs[] = { +/* XE_LP Render / Compute Per-Class */ +static const struct __guc_mmio_reg_descr xe_lp_rc_class_regs[] = { COMMON_BASE_HAS_EU, COMMON_BASE_RENDER, COMMON_GEN12BASE_RENDER, }; -/* GEN9/XE_LPD - Render / Compute Per-Engine-Instance */ -static const struct __guc_mmio_reg_descr xe_lpd_rc_inst_regs[] = { +/* GEN8+ Render / Compute Per-Engine-Instance */ +static const struct __guc_mmio_reg_descr gen8_rc_inst_regs[] = { COMMON_BASE_ENGINE_INSTANCE, }; -/* GEN9/XE_LPD - Media Decode/Encode Per-Engine-Instance */ -static const struct __guc_mmio_reg_descr xe_lpd_vd_inst_regs[] = { +/* GEN8+ Media Decode/Encode Per-Engine-Instance */ +static const struct __guc_mmio_reg_descr gen8_vd_inst_regs[] = { COMMON_BASE_ENGINE_INSTANCE, }; -/* XE_LPD - Video Enhancement Per-Class */ -static const struct __guc_mmio_reg_descr xe_lpd_vec_class_regs[] = { +/* XE_LP Video Enhancement Per-Class */ +static const struct __guc_mmio_reg_descr xe_lp_vec_class_regs[] = { COMMON_GEN12BASE_VEC, }; -/* GEN9/XE_LPD - Video Enhancement Per-Engine-Instance */ -static const struct __guc_mmio_reg_descr xe_lpd_vec_inst_regs[] = { +/* GEN8+ Video Enhancement Per-Engine-Instance */ +static const struct __guc_mmio_reg_descr gen8_vec_inst_regs[] = { COMMON_BASE_ENGINE_INSTANCE, }; -/* GEN9/XE_LPD - Blitter Per-Engine-Instance */ -static const struct __guc_mmio_reg_descr xe_lpd_blt_inst_regs[] = { +/* GEN8+ Blitter Per-Engine-Instance */ +static const struct __guc_mmio_reg_descr gen8_blt_inst_regs[] = { COMMON_BASE_ENGINE_INSTANCE, }; -/* XE_LPD - GSC Per-Engine-Instance */ -static const struct __guc_mmio_reg_descr xe_lpd_gsc_inst_regs[] = { +/* XE_LP - GSC Per-Engine-Instance */ +static const struct __guc_mmio_reg_descr xe_lp_gsc_inst_regs[] = { COMMON_BASE_ENGINE_INSTANCE, }; -/* GEN9 - Global */ -static const struct __guc_mmio_reg_descr default_global_regs[] = { +/* GEN8 - Global */ +static const struct __guc_mmio_reg_descr gen8_global_regs[] = { COMMON_BASE_GLOBAL, - COMMON_GEN9BASE_GLOBAL, - GEN9_GLOBAL, + COMMON_GEN8BASE_GLOBAL, + GEN8_GLOBAL, }; -static const struct __guc_mmio_reg_descr default_rc_class_regs[] = { +static const struct __guc_mmio_reg_descr gen8_rc_class_regs[] = { COMMON_BASE_HAS_EU, COMMON_BASE_RENDER, }; /* - * Empty lists: - * GEN9/XE_LPD - Blitter Per-Class - * GEN9/XE_LPD - Media Decode/Encode Per-Class - * GEN9 - VEC Class + * Empty list to prevent warnings about unknown class/instance types + * as not all class/instanace types have entries on all platforms. */ static const struct __guc_mmio_reg_descr empty_regs_list[] = { }; @@ -174,37 +172,37 @@ static const struct __guc_mmio_reg_descr empty_regs_list[] = { } /* List of lists */ -static const struct __guc_mmio_reg_descr_group default_lists[] = { - MAKE_REGLIST(default_global_regs, PF, GLOBAL, 0), - MAKE_REGLIST(default_rc_class_regs, PF, ENGINE_CLASS, GUC_RENDER_CLASS), - MAKE_REGLIST(xe_lpd_rc_inst_regs, PF, ENGINE_INSTANCE, GUC_RENDER_CLASS), - MAKE_REGLIST(default_rc_class_regs, PF, ENGINE_CLASS, GUC_COMPUTE_CLASS), - MAKE_REGLIST(xe_lpd_rc_inst_regs, PF, ENGINE_INSTANCE, GUC_COMPUTE_CLASS), +static const struct __guc_mmio_reg_descr_group gen8_lists[] = { + MAKE_REGLIST(gen8_global_regs, PF, GLOBAL, 0), + MAKE_REGLIST(gen8_rc_class_regs, PF, ENGINE_CLASS, GUC_RENDER_CLASS), + MAKE_REGLIST(gen8_rc_inst_regs, PF, ENGINE_INSTANCE, GUC_RENDER_CLASS), + MAKE_REGLIST(gen8_rc_class_regs, PF, ENGINE_CLASS, GUC_COMPUTE_CLASS), + MAKE_REGLIST(gen8_rc_inst_regs, PF, ENGINE_INSTANCE, GUC_COMPUTE_CLASS), MAKE_REGLIST(empty_regs_list, PF, ENGINE_CLASS, GUC_VIDEO_CLASS), - MAKE_REGLIST(xe_lpd_vd_inst_regs, PF, ENGINE_INSTANCE, GUC_VIDEO_CLASS), + MAKE_REGLIST(gen8_vd_inst_regs, PF, ENGINE_INSTANCE, GUC_VIDEO_CLASS), MAKE_REGLIST(empty_regs_list, PF, ENGINE_CLASS, GUC_VIDEOENHANCE_CLASS), - MAKE_REGLIST(xe_lpd_vec_inst_regs, PF, ENGINE_INSTANCE, GUC_VIDEOENHANCE_CLASS), + MAKE_REGLIST(gen8_vec_inst_regs, PF, ENGINE_INSTANCE, GUC_VIDEOENHANCE_CLASS), MAKE_REGLIST(empty_regs_list, PF, ENGINE_CLASS, GUC_BLITTER_CLASS), - MAKE_REGLIST(xe_lpd_blt_inst_regs, PF, ENGINE_INSTANCE, GUC_BLITTER_CLASS), + MAKE_REGLIST(gen8_blt_inst_regs, PF, ENGINE_INSTANCE, GUC_BLITTER_CLASS), MAKE_REGLIST(empty_regs_list, PF, ENGINE_CLASS, GUC_GSC_OTHER_CLASS), - MAKE_REGLIST(xe_lpd_gsc_inst_regs, PF, ENGINE_INSTANCE, GUC_GSC_OTHER_CLASS), + MAKE_REGLIST(empty_regs_list, PF, ENGINE_INSTANCE, GUC_GSC_OTHER_CLASS), {} }; -static const struct __guc_mmio_reg_descr_group xe_lpd_lists[] = { - MAKE_REGLIST(xe_lpd_global_regs, PF, GLOBAL, 0), - MAKE_REGLIST(xe_lpd_rc_class_regs, PF, ENGINE_CLASS, GUC_RENDER_CLASS), - MAKE_REGLIST(xe_lpd_rc_inst_regs, PF, ENGINE_INSTANCE, GUC_RENDER_CLASS), - MAKE_REGLIST(xe_lpd_rc_class_regs, PF, ENGINE_CLASS, GUC_COMPUTE_CLASS), - MAKE_REGLIST(xe_lpd_rc_inst_regs, PF, ENGINE_INSTANCE, GUC_COMPUTE_CLASS), +static const struct __guc_mmio_reg_descr_group xe_lp_lists[] = { + MAKE_REGLIST(xe_lp_global_regs, PF, GLOBAL, 0), + MAKE_REGLIST(xe_lp_rc_class_regs, PF, ENGINE_CLASS, GUC_RENDER_CLASS), + MAKE_REGLIST(gen8_rc_inst_regs, PF, ENGINE_INSTANCE, GUC_RENDER_CLASS), + MAKE_REGLIST(xe_lp_rc_class_regs, PF, ENGINE_CLASS, GUC_COMPUTE_CLASS), + MAKE_REGLIST(gen8_rc_inst_regs, PF, ENGINE_INSTANCE, GUC_COMPUTE_CLASS), MAKE_REGLIST(empty_regs_list, PF, ENGINE_CLASS, GUC_VIDEO_CLASS), - MAKE_REGLIST(xe_lpd_vd_inst_regs, PF, ENGINE_INSTANCE, GUC_VIDEO_CLASS), - MAKE_REGLIST(xe_lpd_vec_class_regs, PF, ENGINE_CLASS, GUC_VIDEOENHANCE_CLASS), - MAKE_REGLIST(xe_lpd_vec_inst_regs, PF, ENGINE_INSTANCE, GUC_VIDEOENHANCE_CLASS), + MAKE_REGLIST(gen8_vd_inst_regs, PF, ENGINE_INSTANCE, GUC_VIDEO_CLASS), + MAKE_REGLIST(xe_lp_vec_class_regs, PF, ENGINE_CLASS, GUC_VIDEOENHANCE_CLASS), + MAKE_REGLIST(gen8_vec_inst_regs, PF, ENGINE_INSTANCE, GUC_VIDEOENHANCE_CLASS), MAKE_REGLIST(empty_regs_list, PF, ENGINE_CLASS, GUC_BLITTER_CLASS), - MAKE_REGLIST(xe_lpd_blt_inst_regs, PF, ENGINE_INSTANCE, GUC_BLITTER_CLASS), + MAKE_REGLIST(gen8_blt_inst_regs, PF, ENGINE_INSTANCE, GUC_BLITTER_CLASS), MAKE_REGLIST(empty_regs_list, PF, ENGINE_CLASS, GUC_GSC_OTHER_CLASS), - MAKE_REGLIST(xe_lpd_gsc_inst_regs, PF, ENGINE_INSTANCE, GUC_GSC_OTHER_CLASS), + MAKE_REGLIST(xe_lp_gsc_inst_regs, PF, ENGINE_INSTANCE, GUC_GSC_OTHER_CLASS), {} }; @@ -366,9 +364,9 @@ guc_capture_get_device_reglist(struct intel_guc *guc) const struct __guc_mmio_reg_descr_group *lists; if (GRAPHICS_VER(i915) >= 12) - lists = xe_lpd_lists; + lists = xe_lp_lists; else - lists = default_lists; + lists = gen8_lists; /* * For certain engine classes, there are slice and subslice From e4730ae44240ae1a2e247bfc7f916813567acb92 Mon Sep 17 00:00:00 2001 From: John Harrison Date: Fri, 28 Apr 2023 11:56:36 -0700 Subject: [PATCH 042/108] drm/i915/guc: Fix error capture for virtual engines GuC based register dumps in error capture logs were basically broken for virtual engines. This can be seen in igt@gem_exec_balancer@hang: [IGT] gem_exec_balancer: starting subtest hang [drm] GPU HANG: ecode 12:4:e1524110, in gem_exec_balanc [6388] [drm] GT0: GUC: No register capture node found for 0x1005 / 0xFEDC311D [drm] GPU HANG: ecode 12:4:00000000, in gem_exec_balanc [6388] [IGT] gem_exec_balancer: exiting, ret=0 The test causes a hang on both engines of a virtual engine context. The engine instance zero hang gets a valid error capture but the non-instance-zero hang does not. Fix that by scanning through the list of pending register captures when a hang notification for a virtual engine is received. That way, the hang can be assigned to the correct physical engine prior to starting the error capture process. So later on, when the error capture handler tries to find the engine register list, it looks for one on the correct engine. Also, sneak in a missing blank line before a comment in the node search code. v2: Fix null pointer deref on non-GuC platforms. Signed-off-by: John Harrison Reviewed-by: Alan Previn Link: https://patchwork.freedesktop.org/patch/msgid/20230428185636.457407-5-John.C.Harrison@Intel.com --- .../gpu/drm/i915/gt/uc/intel_guc_capture.c | 31 ++++++++++++++++++ .../gpu/drm/i915/gt/uc/intel_guc_capture.h | 3 ++ .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 32 ++++++++++++++++--- drivers/gpu/drm/i915/i915_gpu_error.c | 11 +++++-- 4 files changed, 70 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_capture.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_capture.c index 729a8fcf20dd..1def0b6467c7 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_capture.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_capture.c @@ -1540,6 +1540,36 @@ void intel_guc_capture_free_node(struct intel_engine_coredump *ee) ee->guc_capture_node = NULL; } +bool intel_guc_capture_is_matching_engine(struct intel_gt *gt, + struct intel_context *ce, + struct intel_engine_cs *engine) +{ + struct __guc_capture_parsed_output *n; + struct intel_guc *guc; + + if (!gt || !ce || !engine) + return false; + + guc = >->uc.guc; + if (!guc->capture) + return false; + + /* + * Look for a matching GuC reported error capture node from + * the internal output link-list based on lrca, guc-id and engine + * identification. + */ + list_for_each_entry(n, &guc->capture->outlist, link) { + if (n->eng_inst == GUC_ID_TO_ENGINE_INSTANCE(engine->guc_id) && + n->eng_class == GUC_ID_TO_ENGINE_CLASS(engine->guc_id) && + n->guc_id == ce->guc_id.id && + (n->lrca & CTX_GTT_ADDRESS_MASK) == (ce->lrc.lrca & CTX_GTT_ADDRESS_MASK)) + return true; + } + + return false; +} + void intel_guc_capture_get_matching_node(struct intel_gt *gt, struct intel_engine_coredump *ee, struct intel_context *ce) @@ -1555,6 +1585,7 @@ void intel_guc_capture_get_matching_node(struct intel_gt *gt, return; GEM_BUG_ON(ee->guc_capture_node); + /* * Look for a matching GuC reported error capture node from * the internal output link-list based on lrca, guc-id and engine diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_capture.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_capture.h index fbd3713c7832..302256d45431 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_capture.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_capture.h @@ -11,6 +11,7 @@ struct drm_i915_error_state_buf; struct guc_gt_system_info; struct intel_engine_coredump; +struct intel_engine_cs; struct intel_context; struct intel_gt; struct intel_guc; @@ -20,6 +21,8 @@ int intel_guc_capture_print_engine_node(struct drm_i915_error_state_buf *m, const struct intel_engine_coredump *ee); void intel_guc_capture_get_matching_node(struct intel_gt *gt, struct intel_engine_coredump *ee, struct intel_context *ce); +bool intel_guc_capture_is_matching_engine(struct intel_gt *gt, struct intel_context *ce, + struct intel_engine_cs *engine); void intel_guc_capture_process(struct intel_guc *guc); int intel_guc_capture_getlist(struct intel_guc *guc, u32 owner, u32 type, u32 classid, void **outptr); diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c index c869ddc73e69..a0e3ef1c65d2 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c @@ -4728,13 +4728,37 @@ static void capture_error_state(struct intel_guc *guc, { struct intel_gt *gt = guc_to_gt(guc); struct drm_i915_private *i915 = gt->i915; - struct intel_engine_cs *engine = __context_to_physical_engine(ce); intel_wakeref_t wakeref; + intel_engine_mask_t engine_mask; + + if (intel_engine_is_virtual(ce->engine)) { + struct intel_engine_cs *e; + intel_engine_mask_t tmp, virtual_mask = ce->engine->mask; + + engine_mask = 0; + for_each_engine_masked(e, ce->engine->gt, virtual_mask, tmp) { + bool match = intel_guc_capture_is_matching_engine(gt, ce, e); + + if (match) { + intel_engine_set_hung_context(e, ce); + engine_mask |= e->mask; + atomic_inc(&i915->gpu_error.reset_engine_count[e->uabi_class]); + } + } + + if (!engine_mask) { + guc_warn(guc, "No matching physical engine capture for virtual engine context 0x%04X / %s", + ce->guc_id.id, ce->engine->name); + engine_mask = ~0U; + } + } else { + intel_engine_set_hung_context(ce->engine, ce); + engine_mask = ce->engine->mask; + atomic_inc(&i915->gpu_error.reset_engine_count[ce->engine->uabi_class]); + } - intel_engine_set_hung_context(engine, ce); with_intel_runtime_pm(&i915->runtime_pm, wakeref) - i915_capture_error_state(gt, engine->mask, CORE_DUMP_FLAG_IS_GUC_CAPTURE); - atomic_inc(&i915->gpu_error.reset_engine_count[engine->uabi_class]); + i915_capture_error_state(gt, engine_mask, CORE_DUMP_FLAG_IS_GUC_CAPTURE); } static void guc_context_replay(struct intel_context *ce) diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c index f020c0086fbc..7360046b9945 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.c +++ b/drivers/gpu/drm/i915/i915_gpu_error.c @@ -808,10 +808,15 @@ static void err_print_gt_engines(struct drm_i915_error_state_buf *m, for (ee = gt->engine; ee; ee = ee->next) { const struct i915_vma_coredump *vma; - if (ee->guc_capture_node) - intel_guc_capture_print_engine_node(m, ee); - else + if (gt->uc && gt->uc->guc.is_guc_capture) { + if (ee->guc_capture_node) + intel_guc_capture_print_engine_node(m, ee); + else + err_printf(m, " Missing GuC capture node for %s\n", + ee->engine->name); + } else { error_print_engine(m, ee); + } err_printf(m, " hung: %u\n", ee->hung); err_printf(m, " engine reset count: %u\n", ee->reset_count); From 67f2dd9f38abf4d994a5bc5131ce684e594d66a8 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Tue, 2 May 2023 18:37:18 +0300 Subject: [PATCH 043/108] drm/i915/gvt: fix intel_vgpu_alloc_resource() kernel-doc parameter drivers/gpu/drm/i915/gvt/aperture_gm.c:344: warning: Function parameter or member 'conf' not described in 'intel_vgpu_alloc_resource' drivers/gpu/drm/i915/gvt/aperture_gm.c:344: warning: Excess function parameter 'param' description in 'intel_vgpu_alloc_resource' Signed-off-by: Jani Nikula Reviewed-by: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/9213214c9caa296ebd349a5d5b44c2bbb45cdf99.1683041799.git.jani.nikula@intel.com --- drivers/gpu/drm/i915/gvt/aperture_gm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gvt/aperture_gm.c b/drivers/gpu/drm/i915/gvt/aperture_gm.c index 076c779f776a..eedd1865bb98 100644 --- a/drivers/gpu/drm/i915/gvt/aperture_gm.c +++ b/drivers/gpu/drm/i915/gvt/aperture_gm.c @@ -330,7 +330,7 @@ void intel_vgpu_reset_resource(struct intel_vgpu *vgpu) /** * intel_vgpu_alloc_resource() - allocate HW resource for a vGPU * @vgpu: vGPU - * @param: vGPU creation params + * @conf: vGPU creation params * * This function is used to allocate HW resource for a vGPU. User specifies * the resource configuration through the creation params. From e1172b617a93c2133750c4ff586f20f46e1e42c0 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Tue, 2 May 2023 18:37:19 +0300 Subject: [PATCH 044/108] drm/i915/vma: fix kernel-doc function name for i915_vma_size() drivers/gpu/drm/i915/i915_vma.h:145: warning: expecting prototype for i915_vma_offset(). Prototype was for i915_vma_size() instead Signed-off-by: Jani Nikula Reviewed-by: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/9fd58398ba6f86c55cc7a7c62efeab70c3311d59.1683041799.git.jani.nikula@intel.com --- drivers/gpu/drm/i915/i915_vma.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_vma.h b/drivers/gpu/drm/i915/i915_vma.h index ed5c9d682a1b..38c8c66ed724 100644 --- a/drivers/gpu/drm/i915/i915_vma.h +++ b/drivers/gpu/drm/i915/i915_vma.h @@ -132,7 +132,7 @@ static inline u64 __i915_vma_size(const struct i915_vma *vma) } /** - * i915_vma_offset - Obtain the va range size of the vma + * i915_vma_size - Obtain the va range size of the vma * @vma: The vma * * GPU virtual address space may be allocated with padding. This From 144c3f7b190981a659b8fe9179ddd1f46baadee7 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Tue, 2 May 2023 18:37:20 +0300 Subject: [PATCH 045/108] drm/i915/utils: drop kernel-doc from __wait_for() The parameters aren't documented, and the file isn't included in Sphinx build anyway, so demote the kernel-doc to a regular comment. drivers/gpu/drm/i915/i915_utils.h:284: warning: Function parameter or member 'OP' not described in '__wait_for' drivers/gpu/drm/i915/i915_utils.h:284: warning: Function parameter or member 'COND' not described in '__wait_for' drivers/gpu/drm/i915/i915_utils.h:284: warning: Function parameter or member 'US' not described in '__wait_for' drivers/gpu/drm/i915/i915_utils.h:284: warning: Function parameter or member 'Wmin' not described in '__wait_for' drivers/gpu/drm/i915/i915_utils.h:284: warning: Function parameter or member 'Wmax' not described in '__wait_for' Signed-off-by: Jani Nikula Reviewed-by: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/e6e33a7f03c4a78739fa96e6ae74eb272ae147e7.1683041799.git.jani.nikula@intel.com --- drivers/gpu/drm/i915/i915_utils.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_utils.h b/drivers/gpu/drm/i915/i915_utils.h index 2c430c0c3bad..c61066498bf2 100644 --- a/drivers/gpu/drm/i915/i915_utils.h +++ b/drivers/gpu/drm/i915/i915_utils.h @@ -250,7 +250,7 @@ wait_remaining_ms_from_jiffies(unsigned long timestamp_jiffies, int to_wait_ms) } } -/** +/* * __wait_for - magic wait macro * * Macro to help avoid open coding check/wait/timeout patterns. Note that it's From 28487ecb96b0bd1ff27a3e8ae8bc027af6e448e1 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Tue, 2 May 2023 18:37:21 +0300 Subject: [PATCH 046/108] drm/i915/vma: document struct i915_vma_resource wakeref member drivers/gpu/drm/i915/i915_vma_resource.h:129: warning: Function parameter or member 'wakeref' not described in 'i915_vma_resource' Signed-off-by: Jani Nikula Reviewed-by: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/50389365e095dc564ab5f1f1e3647934163ffefa.1683041799.git.jani.nikula@intel.com --- drivers/gpu/drm/i915/i915_vma_resource.h | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/i915/i915_vma_resource.h b/drivers/gpu/drm/i915/i915_vma_resource.h index c1864e3d0b43..2053c037dbfb 100644 --- a/drivers/gpu/drm/i915/i915_vma_resource.h +++ b/drivers/gpu/drm/i915/i915_vma_resource.h @@ -47,6 +47,7 @@ struct i915_page_sizes { * @chain: Pointer to struct i915_sw_fence used to await dependencies. * @rb: Rb node for the vm's pending unbind interval tree. * @__subtree_last: Interval tree private member. + * @wakeref: wakeref. * @vm: non-refcounted pointer to the vm. This is for internal use only and * this member is cleared after vm_resource unbind. * @mr: The memory region of the object pointed to by the vma. From 9570b039075192923d3bd26a7892a708eef1a483 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Tue, 2 May 2023 18:37:23 +0300 Subject: [PATCH 047/108] drm/i915/perf: fix i915_perf_ioctl_version() kernel-doc drivers/gpu/drm/i915/i915_perf.c:5307: warning: Function parameter or member 'i915' not described in 'i915_perf_ioctl_version' Signed-off-by: Jani Nikula Reviewed-by: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/b93ddb95a15d1376936349b32c7facb35c76be82.1683041799.git.jani.nikula@intel.com --- drivers/gpu/drm/i915/i915_perf.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c index 050b8ae7b8e7..19d5652300ee 100644 --- a/drivers/gpu/drm/i915/i915_perf.c +++ b/drivers/gpu/drm/i915/i915_perf.c @@ -5300,6 +5300,7 @@ void i915_perf_fini(struct drm_i915_private *i915) /** * i915_perf_ioctl_version - Version of the i915-perf subsystem + * @i915: The i915 device * * This version number is used by userspace to detect available features. */ From 88629feedcc4678ac21991a1154477d1c1dca19f Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Tue, 2 May 2023 18:37:24 +0300 Subject: [PATCH 048/108] drm/i915/error: fix i915_capture_error_state() kernel-doc drivers/gpu/drm/i915/i915_gpu_error.c:2174: warning: Function parameter or member 'dump_flags' not described in 'i915_capture_error_state' Signed-off-by: Jani Nikula Reviewed-by: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/20524292b002800975d82d23b5bd47da878f1733.1683041799.git.jani.nikula@intel.com --- drivers/gpu/drm/i915/i915_gpu_error.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c index 7360046b9945..8ffdd7f47e93 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.c +++ b/drivers/gpu/drm/i915/i915_gpu_error.c @@ -2167,7 +2167,7 @@ void i915_error_state_store(struct i915_gpu_coredump *error) * i915_capture_error_state - capture an error record for later analysis * @gt: intel_gt which originated the hang * @engine_mask: hung engines - * + * @dump_flags: dump flags * * Should be called when an error is detected (either a hang or an error * interrupt) to capture error state from the time of the error. Fills From d7b7332c0e9901ec7e6e73ca75b6c4cd398d5aaf Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Tue, 2 May 2023 18:37:25 +0300 Subject: [PATCH 049/108] drm/i915/request: drop kernel-doc The documentation is closer to not being kernel-doc. Just drop the kernel-doc /** indicators. drivers/gpu/drm/i915/i915_request.h:176: warning: This comment starts with '/**', but isn't a kernel-doc comment. Refer Documentation/doc-guide/kernel-doc.rst * Request queue structure. drivers/gpu/drm/i915/i915_request.h:477: warning: This comment starts with '/**', but isn't a kernel-doc comment. Refer Documentation/doc-guide/kernel-doc.rst * Returns true if seq1 is later than seq2. Signed-off-by: Jani Nikula Reviewed-by: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/62a7a0f6e5b33e59ef4339304d01e7ef7ee04430.1683041799.git.jani.nikula@intel.com --- drivers/gpu/drm/i915/i915_request.h | 52 ++++++++++++++--------------- 1 file changed, 26 insertions(+), 26 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h index f5e1bb5e857a..0ac55b2e4223 100644 --- a/drivers/gpu/drm/i915/i915_request.h +++ b/drivers/gpu/drm/i915/i915_request.h @@ -172,7 +172,7 @@ enum { I915_FENCE_FLAG_COMPOSITE, }; -/** +/* * Request queue structure. * * The request queue allows us to note sequence numbers that have been emitted @@ -198,7 +198,7 @@ struct i915_request { struct drm_i915_private *i915; - /** + /* * Context and ring buffer related to this request * Contexts are refcounted, so when this request is associated with a * context, we must increment the context's refcount, to guarantee that @@ -251,9 +251,9 @@ struct i915_request { }; struct llist_head execute_cb; struct i915_sw_fence semaphore; - /** - * @submit_work: complete submit fence from an IRQ if needed for - * locking hierarchy reasons. + /* + * complete submit fence from an IRQ if needed for locking hierarchy + * reasons. */ struct irq_work submit_work; @@ -277,35 +277,35 @@ struct i915_request { */ const u32 *hwsp_seqno; - /** Position in the ring of the start of the request */ + /* Position in the ring of the start of the request */ u32 head; - /** Position in the ring of the start of the user packets */ + /* Position in the ring of the start of the user packets */ u32 infix; - /** + /* * Position in the ring of the start of the postfix. * This is required to calculate the maximum available ring space * without overwriting the postfix. */ u32 postfix; - /** Position in the ring of the end of the whole request */ + /* Position in the ring of the end of the whole request */ u32 tail; - /** Position in the ring of the end of any workarounds after the tail */ + /* Position in the ring of the end of any workarounds after the tail */ u32 wa_tail; - /** Preallocate space in the ring for the emitting the request */ + /* Preallocate space in the ring for the emitting the request */ u32 reserved_space; - /** Batch buffer pointer for selftest internal use. */ + /* Batch buffer pointer for selftest internal use. */ I915_SELFTEST_DECLARE(struct i915_vma *batch); struct i915_vma_resource *batch_res; #if IS_ENABLED(CONFIG_DRM_I915_CAPTURE_ERROR) - /** + /* * Additional buffers requested by userspace to be captured upon * a GPU hang. The vma/obj on this list are protected by their * active reference - all objects on this list must also be @@ -314,29 +314,29 @@ struct i915_request { struct i915_capture_list *capture_list; #endif - /** Time at which this request was emitted, in jiffies. */ + /* Time at which this request was emitted, in jiffies. */ unsigned long emitted_jiffies; - /** timeline->request entry for this request */ + /* timeline->request entry for this request */ struct list_head link; - /** Watchdog support fields. */ + /* Watchdog support fields. */ struct i915_request_watchdog { struct llist_node link; struct hrtimer timer; } watchdog; - /** - * @guc_fence_link: Requests may need to be stalled when using GuC - * submission waiting for certain GuC operations to complete. If that is - * the case, stalled requests are added to a per context list of stalled - * requests. The below list_head is the link in that list. Protected by + /* + * Requests may need to be stalled when using GuC submission waiting for + * certain GuC operations to complete. If that is the case, stalled + * requests are added to a per context list of stalled requests. The + * below list_head is the link in that list. Protected by * ce->guc_state.lock. */ struct list_head guc_fence_link; - /** - * @guc_prio: Priority level while the request is in flight. Differs + /* + * Priority level while the request is in flight. Differs * from i915 scheduler priority. See comment above * I915_SCHEDULER_CAP_STATIC_PRIORITY_MAP for details. Protected by * ce->guc_active.lock. Two special values (GUC_PRIO_INIT and @@ -348,8 +348,8 @@ struct i915_request { #define GUC_PRIO_FINI 0xfe u8 guc_prio; - /** - * @hucq: wait queue entry used to wait on the HuC load to complete + /* + * wait queue entry used to wait on the HuC load to complete */ wait_queue_entry_t hucq; @@ -473,7 +473,7 @@ i915_request_has_initial_breadcrumb(const struct i915_request *rq) return test_bit(I915_FENCE_FLAG_INITIAL_BREADCRUMB, &rq->fence.flags); } -/** +/* * Returns true if seq1 is later than seq2. */ static inline bool i915_seqno_passed(u32 seq1, u32 seq2) From faa19ce89b0b2eb91c37e2c26b6be93e2e2cb47a Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Tue, 2 May 2023 18:37:26 +0300 Subject: [PATCH 050/108] drm/i915/gem: fix i915_gem_object_lookup_rcu() kernel-doc parameter name drivers/gpu/drm/i915/gem/i915_gem_object.h:94: warning: Function parameter or member 'file' not described in 'i915_gem_object_lookup_rcu' drivers/gpu/drm/i915/gem/i915_gem_object.h:94: warning: Excess function parameter 'filp' description in 'i915_gem_object_lookup_rcu' Signed-off-by: Jani Nikula Reviewed-by: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/80c96863b7a0755aaa07efb49bcccd9ba620c6d7.1683041799.git.jani.nikula@intel.com --- drivers/gpu/drm/i915/gem/i915_gem_object.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.h b/drivers/gpu/drm/i915/gem/i915_gem_object.h index 885ccde9dc3c..bc1291887d4f 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.h @@ -80,7 +80,7 @@ __i915_gem_object_unset_pages(struct drm_i915_gem_object *obj); /** * i915_gem_object_lookup_rcu - look up a temporary GEM object from its handle - * @filp: DRM file private date + * @file: DRM file private date * @handle: userspace handle * * Returns: From 9c55105be0a78942328882224c23965483e578a9 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Tue, 2 May 2023 18:37:29 +0300 Subject: [PATCH 051/108] drm/i915/engine: fix kernel-doc function name for intel_engine_cleanup_common() drivers/gpu/drm/i915/gt/intel_engine_cs.c:1525: warning: expecting prototype for intel_engines_cleanup_common(). Prototype was for intel_engine_cleanup_common() instead Signed-off-by: Jani Nikula Reviewed-by: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/33f8dfdf38be3e16675971e6983e3e300d4301a6.1683041799.git.jani.nikula@intel.com --- drivers/gpu/drm/i915/gt/intel_engine_cs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c index 5c6c9a6d469c..0aff5bb13c53 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c @@ -1515,7 +1515,7 @@ int intel_engines_init(struct intel_gt *gt) } /** - * intel_engines_cleanup_common - cleans up the engine state created by + * intel_engine_cleanup_common - cleans up the engine state created by * the common initiailizers. * @engine: Engine to cleanup. * From 71ca9b87a72bdc1dbe434d25c97eb14be58b4e00 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Tue, 2 May 2023 18:37:30 +0300 Subject: [PATCH 052/108] drm/i915/context: fix kernel-doc parameter descriptions drivers/gpu/drm/i915/gt/intel_context.h:108: warning: Function parameter or member 'ce' not described in 'intel_context_lock_pinned' drivers/gpu/drm/i915/gt/intel_context.h:123: warning: Function parameter or member 'ce' not described in 'intel_context_is_pinned' drivers/gpu/drm/i915/gt/intel_context.h:142: warning: Function parameter or member 'ce' not described in 'intel_context_unlock_pinned' Signed-off-by: Jani Nikula Reviewed-by: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/8359a1cef6b5ab268a9dcc1a382281b6e39cfa64.1683041799.git.jani.nikula@intel.com --- drivers/gpu/drm/i915/gt/intel_context.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_context.h b/drivers/gpu/drm/i915/gt/intel_context.h index 43ed92f8dc83..a80e3b7c24ff 100644 --- a/drivers/gpu/drm/i915/gt/intel_context.h +++ b/drivers/gpu/drm/i915/gt/intel_context.h @@ -97,7 +97,7 @@ void intel_context_bind_parent_child(struct intel_context *parent, /** * intel_context_lock_pinned - Stablises the 'pinned' status of the HW context - * @ce - the context + * @ce: the context * * Acquire a lock on the pinned status of the HW context, such that the context * can neither be bound to the GPU or unbound whilst the lock is held, i.e. @@ -111,7 +111,7 @@ static inline int intel_context_lock_pinned(struct intel_context *ce) /** * intel_context_is_pinned - Reports the 'pinned' status - * @ce - the context + * @ce: the context * * While in use by the GPU, the context, along with its ring and page * tables is pinned into memory and the GTT. @@ -133,7 +133,7 @@ static inline void intel_context_cancel_request(struct intel_context *ce, /** * intel_context_unlock_pinned - Releases the earlier locking of 'pinned' status - * @ce - the context + * @ce: the context * * Releases the lock earlier acquired by intel_context_unlock_pinned(). */ From e798a3b30dcb729fa9b9dc88de00984dc1f5aa04 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Tue, 2 May 2023 18:37:31 +0300 Subject: [PATCH 053/108] drm/i915/gtt: fix i915_vm_resv_put() kernel-doc parameter name drivers/gpu/drm/i915/gt/intel_gtt.h:515: warning: Function parameter or member 'vm' not described in 'i915_vm_resv_put' drivers/gpu/drm/i915/gt/intel_gtt.h:515: warning: Excess function parameter 'resv' description in 'i915_vm_resv_put' Signed-off-by: Jani Nikula Reviewed-by: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/c22e58e770019667980b3617f6e963b76d7e79a7.1683041799.git.jani.nikula@intel.com --- drivers/gpu/drm/i915/gt/intel_gtt.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gt/intel_gtt.h b/drivers/gpu/drm/i915/gt/intel_gtt.h index 1910683f03b4..9aff343beaa8 100644 --- a/drivers/gpu/drm/i915/gt/intel_gtt.h +++ b/drivers/gpu/drm/i915/gt/intel_gtt.h @@ -509,7 +509,7 @@ static inline void i915_vm_put(struct i915_address_space *vm) /** * i915_vm_resv_put - Release a reference on the vm's reservation lock - * @resv: Pointer to a reservation lock obtained from i915_vm_resv_get() + * @vm: The vm whose reservation lock reference we want to release */ static inline void i915_vm_resv_put(struct i915_address_space *vm) { From 1f5cf999bedcdff85f84b7dc4d780cbb515b6c2f Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Tue, 2 May 2023 18:37:32 +0300 Subject: [PATCH 054/108] drm/i915/engine: hide preempt_hang selftest member from kernel-doc drivers/gpu/drm/i915/gt/intel_engine_types.h:293: warning: Function parameter or member 'preempt_hang' not described in 'intel_engine_execlists' Signed-off-by: Jani Nikula Reviewed-by: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/dafd771bb75cf14965dd3b666987c58a438de134.1683041799.git.jani.nikula@intel.com --- drivers/gpu/drm/i915/gt/intel_engine_types.h | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h index 960291f88fd6..e99a6fa03d45 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_types.h +++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h @@ -289,6 +289,7 @@ struct intel_engine_execlists { */ u8 csb_head; + /* private: selftest */ I915_SELFTEST_DECLARE(struct st_preempt_hang preempt_hang;) }; From 3a21c6b4298d9d0c99c8dee28edcf317d68cd93e Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Tue, 2 May 2023 18:37:33 +0300 Subject: [PATCH 055/108] drm/i915/guc: add dbgfs_node member kernel-doc drivers/gpu/drm/i915/gt/uc/intel_guc.h:274: warning: Function parameter or member 'dbgfs_node' not described in 'intel_guc' Signed-off-by: Jani Nikula Reviewed-by: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/b0f681dd82289dd86da78c6242411e8d812e51a1.1683041799.git.jani.nikula@intel.com --- drivers/gpu/drm/i915/gt/uc/intel_guc.h | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.h b/drivers/gpu/drm/i915/gt/uc/intel_guc.h index e46aac1a41e6..8dc291ff0093 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.h @@ -42,6 +42,7 @@ struct intel_guc { /** @capture: the error-state-capture module's data and objects */ struct intel_guc_state_capture *capture; + /** @dbgfs_node: debugfs node */ struct dentry *dbgfs_node; /** @sched_engine: Global engine used to submit requests to GuC */ From f05e526e44cb11e70c5b7d8bb55d7e0c6fb10990 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Tue, 2 May 2023 18:37:34 +0300 Subject: [PATCH 056/108] drm/i915/guc: drop lots of kernel-doc markers The documentation is closer to not being kernel-doc, so just drop the kernel-doc markers. drivers/gpu/drm/i915/gt/uc/guc_capture_fwif.h:27: warning: Function parameter or member 'size' not described in '__guc_capture_bufstate' drivers/gpu/drm/i915/gt/uc/guc_capture_fwif.h:27: warning: Function parameter or member 'data' not described in '__guc_capture_bufstate' drivers/gpu/drm/i915/gt/uc/guc_capture_fwif.h:27: warning: Function parameter or member 'rd' not described in '__guc_capture_bufstate' drivers/gpu/drm/i915/gt/uc/guc_capture_fwif.h:27: warning: Function parameter or member 'wr' not described in '__guc_capture_bufstate' drivers/gpu/drm/i915/gt/uc/guc_capture_fwif.h:59: warning: Function parameter or member 'link' not described in '__guc_capture_parsed_output' drivers/gpu/drm/i915/gt/uc/guc_capture_fwif.h:59: warning: Function parameter or member 'is_partial' not described in '__guc_capture_parsed_output' drivers/gpu/drm/i915/gt/uc/guc_capture_fwif.h:59: warning: Function parameter or member 'eng_class' not described in '__guc_capture_parsed_output' drivers/gpu/drm/i915/gt/uc/guc_capture_fwif.h:59: warning: Function parameter or member 'eng_inst' not described in '__guc_capture_parsed_output' drivers/gpu/drm/i915/gt/uc/guc_capture_fwif.h:59: warning: Function parameter or member 'guc_id' not described in '__guc_capture_parsed_output' drivers/gpu/drm/i915/gt/uc/guc_capture_fwif.h:59: warning: Function parameter or member 'lrca' not described in '__guc_capture_parsed_output' drivers/gpu/drm/i915/gt/uc/guc_capture_fwif.h:59: warning: Function parameter or member 'reginfo' not described in '__guc_capture_parsed_output' drivers/gpu/drm/i915/gt/uc/guc_capture_fwif.h:62: warning: wrong kernel-doc identifier on line: * struct guc_debug_capture_list_header / struct guc_debug_capture_list drivers/gpu/drm/i915/gt/uc/guc_capture_fwif.h:80: warning: wrong kernel-doc identifier on line: * struct __guc_mmio_reg_descr / struct __guc_mmio_reg_descr_group drivers/gpu/drm/i915/gt/uc/guc_capture_fwif.h:105: warning: wrong kernel-doc identifier on line: * struct guc_state_capture_header_t / struct guc_state_capture_t / drivers/gpu/drm/i915/gt/uc/guc_capture_fwif.h:163: warning: Function parameter or member 'is_valid' not described in '__guc_capture_ads_cache' drivers/gpu/drm/i915/gt/uc/guc_capture_fwif.h:163: warning: Function parameter or member 'ptr' not described in '__guc_capture_ads_cache' drivers/gpu/drm/i915/gt/uc/guc_capture_fwif.h:163: warning: Function parameter or member 'size' not described in '__guc_capture_ads_cache' drivers/gpu/drm/i915/gt/uc/guc_capture_fwif.h:163: warning: Function parameter or member 'status' not described in '__guc_capture_ads_cache' drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h:491: warning: Function parameter or member 'marker' not described in 'guc_log_buffer_state' drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h:491: warning: Function parameter or member 'read_ptr' not described in 'guc_log_buffer_state' drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h:491: warning: Function parameter or member 'write_ptr' not described in 'guc_log_buffer_state' drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h:491: warning: Function parameter or member 'size' not described in 'guc_log_buffer_state' drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h:491: warning: Function parameter or member 'sampled_write_ptr' not described in 'guc_log_buffer_state' drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h:491: warning: Function parameter or member 'wrap_offset' not described in 'guc_log_buffer_state' drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h:491: warning: Function parameter or member 'flush_to_file' not described in 'guc_log_buffer_state' drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h:491: warning: Function parameter or member 'buffer_full_cnt' not described in 'guc_log_buffer_state' drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h:491: warning: Function parameter or member 'reserved' not described in 'guc_log_buffer_state' drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h:491: warning: Function parameter or member 'flags' not described in 'guc_log_buffer_state' drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h:491: warning: Function parameter or member 'version' not described in 'guc_log_buffer_state' Signed-off-by: Jani Nikula Reviewed-by: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/9c210d53fdbd6da5fac42e435855d269504919d7.1683041799.git.jani.nikula@intel.com --- drivers/gpu/drm/i915/gt/uc/guc_capture_fwif.h | 12 ++++++------ drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h | 2 +- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/uc/guc_capture_fwif.h b/drivers/gpu/drm/i915/gt/uc/guc_capture_fwif.h index 9d589c28f40f..1b6c219e1675 100644 --- a/drivers/gpu/drm/i915/gt/uc/guc_capture_fwif.h +++ b/drivers/gpu/drm/i915/gt/uc/guc_capture_fwif.h @@ -12,7 +12,7 @@ struct intel_guc; struct file; -/** +/* * struct __guc_capture_bufstate * * Book-keeping structure used to track read and write pointers @@ -26,7 +26,7 @@ struct __guc_capture_bufstate { u32 wr; }; -/** +/* * struct __guc_capture_parsed_output - extracted error capture node * * A single unit of extracted error-capture output data grouped together @@ -58,7 +58,7 @@ struct __guc_capture_parsed_output { #define GCAP_PARSED_REGLIST_INDEX_ENGINST BIT(GUC_CAPTURE_LIST_TYPE_ENGINE_INSTANCE) }; -/** +/* * struct guc_debug_capture_list_header / struct guc_debug_capture_list * * As part of ADS registration, these header structures (followed by @@ -76,7 +76,7 @@ struct guc_debug_capture_list { struct guc_mmio_reg regs[]; } __packed; -/** +/* * struct __guc_mmio_reg_descr / struct __guc_mmio_reg_descr_group * * intel_guc_capture module uses these structures to maintain static @@ -101,7 +101,7 @@ struct __guc_mmio_reg_descr_group { struct __guc_mmio_reg_descr *extlist; /* only used for steered registers */ }; -/** +/* * struct guc_state_capture_header_t / struct guc_state_capture_t / * guc_state_capture_group_header_t / guc_state_capture_group_t * @@ -148,7 +148,7 @@ struct guc_state_capture_group_t { struct guc_state_capture_t capture_entries[]; } __packed; -/** +/* * struct __guc_capture_ads_cache * * A structure to cache register lists that were populated and registered diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h index 4ae5fc2f6002..8bb9ed7c7b4d 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h @@ -451,7 +451,7 @@ enum guc_log_buffer_type { GUC_MAX_LOG_BUFFER }; -/** +/* * struct guc_log_buffer_state - GuC log buffer state * * Below state structure is used for coordination of retrieval of GuC firmware From 1c519e0628f19e2790ddfa4cb512921047e8f65a Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Tue, 2 May 2023 18:37:35 +0300 Subject: [PATCH 057/108] drm/i915/guc: add intel_guc_state_capture member docs for ads_null_cache and max_mmio_per_node drivers/gpu/drm/i915/gt/uc/guc_capture_fwif.h:216: warning: Function parameter or member 'ads_null_cache' not described in 'intel_guc_state_capture' drivers/gpu/drm/i915/gt/uc/guc_capture_fwif.h:216: warning: Function parameter or member 'max_mmio_per_node' not described in 'intel_guc_state_capture' Signed-off-by: Jani Nikula Reviewed-by: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/c83878163221ed3684a6de5d5e1c5373ddd5c06f.1683041799.git.jani.nikula@intel.com --- drivers/gpu/drm/i915/gt/uc/guc_capture_fwif.h | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/gpu/drm/i915/gt/uc/guc_capture_fwif.h b/drivers/gpu/drm/i915/gt/uc/guc_capture_fwif.h index 1b6c219e1675..1fc0c17b1230 100644 --- a/drivers/gpu/drm/i915/gt/uc/guc_capture_fwif.h +++ b/drivers/gpu/drm/i915/gt/uc/guc_capture_fwif.h @@ -187,6 +187,10 @@ struct intel_guc_state_capture { struct __guc_capture_ads_cache ads_cache[GUC_CAPTURE_LIST_INDEX_MAX] [GUC_CAPTURE_LIST_TYPE_MAX] [GUC_MAX_ENGINE_CLASSES]; + + /** + * @ads_null_cache: ADS null cache. + */ void *ads_null_cache; /** @@ -202,6 +206,10 @@ struct intel_guc_state_capture { struct list_head cachelist; #define PREALLOC_NODES_MAX_COUNT (3 * GUC_MAX_ENGINE_CLASSES * GUC_MAX_INSTANCES_PER_CLASS) #define PREALLOC_NODES_DEFAULT_NUMREGS 64 + + /** + * @max_mmio_per_node: Max MMIO per node. + */ int max_mmio_per_node; /** From 8802628237ac73bf5a6f878ea0cbd8a4c39a55a1 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Tue, 2 May 2023 18:37:36 +0300 Subject: [PATCH 058/108] drm/i915/active: fix kernel-doc for function parameters drivers/gpu/drm/i915/i915_active.h:66: warning: Function parameter or member 'active' not described in '__i915_active_fence_init' drivers/gpu/drm/i915/i915_active.h:66: warning: Function parameter or member 'fence' not described in '__i915_active_fence_init' drivers/gpu/drm/i915/i915_active.h:66: warning: Function parameter or member 'fn' not described in '__i915_active_fence_init' drivers/gpu/drm/i915/i915_active.h:89: warning: Function parameter or member 'active' not described in 'i915_active_fence_set' drivers/gpu/drm/i915/i915_active.h:89: warning: Function parameter or member 'rq' not described in 'i915_active_fence_set' drivers/gpu/drm/i915/i915_active.h:102: warning: Function parameter or member 'active' not described in 'i915_active_fence_get' drivers/gpu/drm/i915/i915_active.h:122: warning: Function parameter or member 'active' not described in 'i915_active_fence_isset' Signed-off-by: Jani Nikula Reviewed-by: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/7f201c6cb715e4b3cece78ffa893a75610ecd27d.1683041799.git.jani.nikula@intel.com --- drivers/gpu/drm/i915/i915_active.h | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_active.h b/drivers/gpu/drm/i915/i915_active.h index 7eb44132183a..77c676ecc263 100644 --- a/drivers/gpu/drm/i915/i915_active.h +++ b/drivers/gpu/drm/i915/i915_active.h @@ -49,9 +49,9 @@ void i915_active_noop(struct dma_fence *fence, struct dma_fence_cb *cb); /** * __i915_active_fence_init - prepares the activity tracker for use - * @active - the active tracker - * @fence - initial fence to track, can be NULL - * @func - a callback when then the tracker is retired (becomes idle), + * @active: the active tracker + * @fence: initial fence to track, can be NULL + * @fn: a callback when then the tracker is retired (becomes idle), * can be NULL * * i915_active_fence_init() prepares the embedded @active struct for use as @@ -77,8 +77,8 @@ __i915_active_fence_set(struct i915_active_fence *active, /** * i915_active_fence_set - updates the tracker to watch the current fence - * @active - the active tracker - * @rq - the request to watch + * @active: the active tracker + * @rq: the request to watch * * i915_active_fence_set() watches the given @rq for completion. While * that @rq is busy, the @active reports busy. When that @rq is signaled @@ -89,7 +89,7 @@ i915_active_fence_set(struct i915_active_fence *active, struct i915_request *rq); /** * i915_active_fence_get - return a reference to the active fence - * @active - the active tracker + * @active: the active tracker * * i915_active_fence_get() returns a reference to the active fence, * or NULL if the active tracker is idle. The reference is obtained under RCU, @@ -111,7 +111,7 @@ i915_active_fence_get(struct i915_active_fence *active) /** * i915_active_fence_isset - report whether the active tracker is assigned - * @active - the active tracker + * @active: the active tracker * * i915_active_fence_isset() returns true if the active tracker is currently * assigned to a fence. Due to the lazy retiring, that fence may be idle From 7bc30374f84c5588c952e47248595b1c38d06906 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Tue, 2 May 2023 18:37:37 +0300 Subject: [PATCH 059/108] drm/i915/pmu: drop kernel-doc The comments are closer to not being kernel-doc. drivers/gpu/drm/i915/i915_pmu.h:21: warning: cannot understand function prototype: 'enum i915_pmu_tracked_events ' drivers/gpu/drm/i915/i915_pmu.h:32: warning: cannot understand function prototype: 'enum ' drivers/gpu/drm/i915/i915_pmu.h:41: warning: This comment starts with '/**', but isn't a kernel-doc comment. Refer Documentation/doc-guide/kernel-doc.rst * How many different events we track in the global PMU mask. Signed-off-by: Jani Nikula Reviewed-by: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/7656b8f58b088c108a2a32f7089329740efabba1.1683041799.git.jani.nikula@intel.com --- drivers/gpu/drm/i915/i915_pmu.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_pmu.h b/drivers/gpu/drm/i915/i915_pmu.h index 449057648f39..c30f43319a78 100644 --- a/drivers/gpu/drm/i915/i915_pmu.h +++ b/drivers/gpu/drm/i915/i915_pmu.h @@ -14,7 +14,7 @@ struct drm_i915_private; -/** +/* * Non-engine events that we need to track enabled-disabled transition and * current state. */ @@ -25,7 +25,7 @@ enum i915_pmu_tracked_events { __I915_PMU_TRACKED_EVENT_COUNT, /* count marker */ }; -/** +/* * Slots used from the sampling timer (non-engine events) with some extras for * convenience. */ @@ -37,7 +37,7 @@ enum { __I915_NUM_PMU_SAMPLERS }; -/** +/* * How many different events we track in the global PMU mask. * * It is also used to know to needed number of event reference counters. From 70b9933c09b8687edff5c2f833dc2a72c1b40362 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Tue, 2 May 2023 18:37:38 +0300 Subject: [PATCH 060/108] drm/i915/pxp: fix kernel-doc for member dev_link Add /** to make it a kernel-doc. drivers/gpu/drm/i915/pxp/intel_pxp_types.h:96: warning: Function parameter or member 'dev_link' not described in 'intel_pxp' Signed-off-by: Jani Nikula Reviewed-by: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/84f89a3332fa323888f1e3241fb51ae10417ecd7.1683041799.git.jani.nikula@intel.com --- drivers/gpu/drm/i915/pxp/intel_pxp_types.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/pxp/intel_pxp_types.h b/drivers/gpu/drm/i915/pxp/intel_pxp_types.h index 007de49e1ea4..c445f7f2f47a 100644 --- a/drivers/gpu/drm/i915/pxp/intel_pxp_types.h +++ b/drivers/gpu/drm/i915/pxp/intel_pxp_types.h @@ -33,7 +33,9 @@ struct intel_pxp { */ struct i915_pxp_component *pxp_component; - /* @dev_link: Enforce module relationship for power management ordering. */ + /** + * @dev_link: Enforce module relationship for power management ordering. + */ struct device_link *dev_link; /** * @pxp_component_added: track if the pxp component has been added. From 8b2a7394e51d6a49edeae8fc640b8c23a4ab649d Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Tue, 2 May 2023 18:37:40 +0300 Subject: [PATCH 061/108] drm/i915/scatterlist: fix kernel-doc parameter documentation drivers/gpu/drm/i915/i915_scatterlist.h:164: warning: Function parameter or member 'release' not described in 'i915_refct_sgt_ops' drivers/gpu/drm/i915/i915_scatterlist.h:187: warning: Function parameter or member 'rsgt' not described in 'i915_refct_sgt_put' drivers/gpu/drm/i915/i915_scatterlist.h:198: warning: Function parameter or member 'rsgt' not described in 'i915_refct_sgt_get' drivers/gpu/drm/i915/i915_scatterlist.h:214: warning: Function parameter or member 'rsgt' not described in '__i915_refct_sgt_init' Signed-off-by: Jani Nikula Reviewed-by: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/0b31edbb33116c8002dd1e72d3ad25efe5dd0176.1683041799.git.jani.nikula@intel.com --- drivers/gpu/drm/i915/i915_scatterlist.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_scatterlist.h b/drivers/gpu/drm/i915/i915_scatterlist.h index b0a1db44f895..12c1fd63e9b8 100644 --- a/drivers/gpu/drm/i915/i915_scatterlist.h +++ b/drivers/gpu/drm/i915/i915_scatterlist.h @@ -181,7 +181,7 @@ struct i915_refct_sgt { /** * i915_refct_sgt_put - Put a refcounted sg-table - * @rsgt the struct i915_refct_sgt to put. + * @rsgt: the struct i915_refct_sgt to put. */ static inline void i915_refct_sgt_put(struct i915_refct_sgt *rsgt) { @@ -191,7 +191,7 @@ static inline void i915_refct_sgt_put(struct i915_refct_sgt *rsgt) /** * i915_refct_sgt_get - Get a refcounted sg-table - * @rsgt the struct i915_refct_sgt to get. + * @rsgt: the struct i915_refct_sgt to get. */ static inline struct i915_refct_sgt * i915_refct_sgt_get(struct i915_refct_sgt *rsgt) @@ -203,7 +203,7 @@ i915_refct_sgt_get(struct i915_refct_sgt *rsgt) /** * __i915_refct_sgt_init - Initialize a refcounted sg-list with a custom * operations structure - * @rsgt The struct i915_refct_sgt to initialize. + * @rsgt: The struct i915_refct_sgt to initialize. * @size: Size in bytes of the underlying memory buffer. * @ops: A customized operations structure in case the refcounted sg-list * is embedded into another structure. From e7b05d95cc1bfa1e61557358d936ebb33b0ae4be Mon Sep 17 00:00:00 2001 From: Alexander Usyskin Date: Tue, 2 May 2023 09:38:51 -0700 Subject: [PATCH 062/108] drm/i915/mtl: Define GSC Proxy component interface GSC Proxy component is used for communication between the Intel graphics driver and MEI driver. Cc: Alan Previn Signed-off-by: Alexander Usyskin Signed-off-by: Tomas Winkler Signed-off-by: Daniele Ceraolo Spurio Acked-by: Greg Kroah-Hartman Reviewed-by: Alan Previn Link: https://patchwork.freedesktop.org/patch/msgid/20230502163854.317653-2-daniele.ceraolospurio@intel.com --- include/drm/i915_component.h | 3 +- include/drm/i915_gsc_proxy_mei_interface.h | 53 ++++++++++++++++++++++ 2 files changed, 55 insertions(+), 1 deletion(-) create mode 100644 include/drm/i915_gsc_proxy_mei_interface.h diff --git a/include/drm/i915_component.h b/include/drm/i915_component.h index c1e2a43d2d1e..56a84ee1c64c 100644 --- a/include/drm/i915_component.h +++ b/include/drm/i915_component.h @@ -29,7 +29,8 @@ enum i915_component_type { I915_COMPONENT_AUDIO = 1, I915_COMPONENT_HDCP, - I915_COMPONENT_PXP + I915_COMPONENT_PXP, + I915_COMPONENT_GSC_PROXY, }; /* MAX_PORT is the number of port diff --git a/include/drm/i915_gsc_proxy_mei_interface.h b/include/drm/i915_gsc_proxy_mei_interface.h new file mode 100644 index 000000000000..9462341d3ae1 --- /dev/null +++ b/include/drm/i915_gsc_proxy_mei_interface.h @@ -0,0 +1,53 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright (c) 2022-2023 Intel Corporation + */ + +#ifndef _I915_GSC_PROXY_MEI_INTERFACE_H_ +#define _I915_GSC_PROXY_MEI_INTERFACE_H_ + +#include + +struct device; +struct module; + +/** + * struct i915_gsc_proxy_component_ops - ops for GSC Proxy services. + * @owner: Module providing the ops + * @send: sends a proxy message from GSC FW to ME FW + * @recv: receives a proxy message for GSC FW from ME FW + */ +struct i915_gsc_proxy_component_ops { + struct module *owner; + + /** + * send - Sends a proxy message to ME FW. + * @dev: device struct corresponding to the mei device + * @buf: message buffer to send + * @size: size of the message + * Return: bytes sent on success, negative errno value on failure + */ + int (*send)(struct device *dev, const void *buf, size_t size); + + /** + * recv - Receives a proxy message from ME FW. + * @dev: device struct corresponding to the mei device + * @buf: message buffer to contain the received message + * @size: size of the buffer + * Return: bytes received on success, negative errno value on failure + */ + int (*recv)(struct device *dev, void *buf, size_t size); +}; + +/** + * struct i915_gsc_proxy_component - Used for communication between i915 and + * MEI drivers for GSC proxy services + * @mei_dev: device that provide the GSC proxy service. + * @ops: Ops implemented by GSC proxy driver, used by i915 driver. + */ +struct i915_gsc_proxy_component { + struct device *mei_dev; + const struct i915_gsc_proxy_component_ops *ops; +}; + +#endif /* _I915_GSC_PROXY_MEI_INTERFACE_H_ */ From 1dd924f6885b1f3bddc586c39467dc9471729c5e Mon Sep 17 00:00:00 2001 From: Alexander Usyskin Date: Tue, 2 May 2023 09:38:52 -0700 Subject: [PATCH 063/108] mei: gsc_proxy: add gsc proxy driver Add GSC proxy driver. It to allows messaging between GSC component on Intel graphics card and CSE device. Cc: Alan Previn Signed-off-by: Alexander Usyskin Signed-off-by: Tomas Winkler Signed-off-by: Daniele Ceraolo Spurio Acked-by: Greg Kroah-Hartman Reviewed-by: Alan Previn Link: https://patchwork.freedesktop.org/patch/msgid/20230502163854.317653-3-daniele.ceraolospurio@intel.com --- drivers/misc/mei/Kconfig | 2 +- drivers/misc/mei/Makefile | 1 + drivers/misc/mei/gsc_proxy/Kconfig | 14 ++ drivers/misc/mei/gsc_proxy/Makefile | 7 + drivers/misc/mei/gsc_proxy/mei_gsc_proxy.c | 208 +++++++++++++++++++++ 5 files changed, 231 insertions(+), 1 deletion(-) create mode 100644 drivers/misc/mei/gsc_proxy/Kconfig create mode 100644 drivers/misc/mei/gsc_proxy/Makefile create mode 100644 drivers/misc/mei/gsc_proxy/mei_gsc_proxy.c diff --git a/drivers/misc/mei/Kconfig b/drivers/misc/mei/Kconfig index d21486d69df2..37db142de413 100644 --- a/drivers/misc/mei/Kconfig +++ b/drivers/misc/mei/Kconfig @@ -62,4 +62,4 @@ config INTEL_MEI_GSC source "drivers/misc/mei/hdcp/Kconfig" source "drivers/misc/mei/pxp/Kconfig" - +source "drivers/misc/mei/gsc_proxy/Kconfig" diff --git a/drivers/misc/mei/Makefile b/drivers/misc/mei/Makefile index fb740d754900..14aee253ae48 100644 --- a/drivers/misc/mei/Makefile +++ b/drivers/misc/mei/Makefile @@ -30,3 +30,4 @@ CFLAGS_mei-trace.o = -I$(src) obj-$(CONFIG_INTEL_MEI_HDCP) += hdcp/ obj-$(CONFIG_INTEL_MEI_PXP) += pxp/ +obj-$(CONFIG_INTEL_MEI_GSC_PROXY) += gsc_proxy/ diff --git a/drivers/misc/mei/gsc_proxy/Kconfig b/drivers/misc/mei/gsc_proxy/Kconfig new file mode 100644 index 000000000000..5f68d9f3d691 --- /dev/null +++ b/drivers/misc/mei/gsc_proxy/Kconfig @@ -0,0 +1,14 @@ +# SPDX-License-Identifier: GPL-2.0 +# Copyright (c) 2022-2023, Intel Corporation. All rights reserved. +# +config INTEL_MEI_GSC_PROXY + tristate "Intel GSC Proxy services of ME Interface" + select INTEL_MEI_ME + depends on DRM_I915 + help + MEI Support for GSC Proxy Services on Intel platforms. + + MEI GSC proxy enables messaging between GSC service on + Intel graphics card and services on CSE (MEI) firmware + residing SoC or PCH. + diff --git a/drivers/misc/mei/gsc_proxy/Makefile b/drivers/misc/mei/gsc_proxy/Makefile new file mode 100644 index 000000000000..358847e9aaa9 --- /dev/null +++ b/drivers/misc/mei/gsc_proxy/Makefile @@ -0,0 +1,7 @@ +# SPDX-License-Identifier: GPL-2.0 +# +# Copyright (c) 2022-2023, Intel Corporation. All rights reserved. +# +# Makefile - GSC Proxy client driver for Intel MEI Bus Driver. + +obj-$(CONFIG_INTEL_MEI_GSC_PROXY) += mei_gsc_proxy.o diff --git a/drivers/misc/mei/gsc_proxy/mei_gsc_proxy.c b/drivers/misc/mei/gsc_proxy/mei_gsc_proxy.c new file mode 100644 index 000000000000..be52b113aea9 --- /dev/null +++ b/drivers/misc/mei/gsc_proxy/mei_gsc_proxy.c @@ -0,0 +1,208 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (c) 2022-2023 Intel Corporation + */ + +/** + * DOC: MEI_GSC_PROXY Client Driver + * + * The mei_gsc_proxy driver acts as a translation layer between + * proxy user (I915) and ME FW by proxying messages to ME FW + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/** + * mei_gsc_proxy_send - Sends a proxy message to ME FW. + * @dev: device corresponding to the mei_cl_device + * @buf: a message buffer to send + * @size: size of the message + * Return: bytes sent on Success, <0 on Failure + */ +static int mei_gsc_proxy_send(struct device *dev, const void *buf, size_t size) +{ + ssize_t ret; + + if (!dev || !buf) + return -EINVAL; + + ret = mei_cldev_send(to_mei_cl_device(dev), buf, size); + if (ret < 0) + dev_dbg(dev, "mei_cldev_send failed. %zd\n", ret); + + return ret; +} + +/** + * mei_gsc_proxy_recv - Receives a proxy message from ME FW. + * @dev: device corresponding to the mei_cl_device + * @buf: a message buffer to contain the received message + * @size: size of the buffer + * Return: bytes received on Success, <0 on Failure + */ +static int mei_gsc_proxy_recv(struct device *dev, void *buf, size_t size) +{ + ssize_t ret; + + if (!dev || !buf) + return -EINVAL; + + ret = mei_cldev_recv(to_mei_cl_device(dev), buf, size); + if (ret < 0) + dev_dbg(dev, "mei_cldev_recv failed. %zd\n", ret); + + return ret; +} + +static const struct i915_gsc_proxy_component_ops mei_gsc_proxy_ops = { + .owner = THIS_MODULE, + .send = mei_gsc_proxy_send, + .recv = mei_gsc_proxy_recv, +}; + +static int mei_component_master_bind(struct device *dev) +{ + struct mei_cl_device *cldev = to_mei_cl_device(dev); + struct i915_gsc_proxy_component *comp_master = mei_cldev_get_drvdata(cldev); + + comp_master->ops = &mei_gsc_proxy_ops; + comp_master->mei_dev = dev; + return component_bind_all(dev, comp_master); +} + +static void mei_component_master_unbind(struct device *dev) +{ + struct mei_cl_device *cldev = to_mei_cl_device(dev); + struct i915_gsc_proxy_component *comp_master = mei_cldev_get_drvdata(cldev); + + component_unbind_all(dev, comp_master); +} + +static const struct component_master_ops mei_component_master_ops = { + .bind = mei_component_master_bind, + .unbind = mei_component_master_unbind, +}; + +/** + * mei_gsc_proxy_component_match - compare function for matching mei. + * + * The function checks if the device is pci device and + * Intel VGA adapter, the subcomponent is SW Proxy + * and the parent of MEI PCI and the parent of VGA are the same PCH device. + * + * @dev: master device + * @subcomponent: subcomponent to match (I915_COMPONENT_SWPROXY) + * @data: compare data (mei pci parent) + * + * Return: + * * 1 - if components match + * * 0 - otherwise + */ +static int mei_gsc_proxy_component_match(struct device *dev, int subcomponent, + void *data) +{ + struct pci_dev *pdev; + + if (!dev_is_pci(dev)) + return 0; + + pdev = to_pci_dev(dev); + + if (pdev->class != (PCI_CLASS_DISPLAY_VGA << 8) || + pdev->vendor != PCI_VENDOR_ID_INTEL) + return 0; + + if (subcomponent != I915_COMPONENT_GSC_PROXY) + return 0; + + return component_compare_dev(dev->parent, ((struct device *)data)->parent); +} + +static int mei_gsc_proxy_probe(struct mei_cl_device *cldev, + const struct mei_cl_device_id *id) +{ + struct i915_gsc_proxy_component *comp_master; + struct component_match *master_match = NULL; + int ret; + + ret = mei_cldev_enable(cldev); + if (ret < 0) { + dev_err(&cldev->dev, "mei_cldev_enable Failed. %d\n", ret); + goto enable_err_exit; + } + + comp_master = kzalloc(sizeof(*comp_master), GFP_KERNEL); + if (!comp_master) { + ret = -ENOMEM; + goto err_exit; + } + + component_match_add_typed(&cldev->dev, &master_match, + mei_gsc_proxy_component_match, cldev->dev.parent); + if (IS_ERR_OR_NULL(master_match)) { + ret = -ENOMEM; + goto err_exit; + } + + mei_cldev_set_drvdata(cldev, comp_master); + ret = component_master_add_with_match(&cldev->dev, + &mei_component_master_ops, + master_match); + if (ret < 0) { + dev_err(&cldev->dev, "Master comp add failed %d\n", ret); + goto err_exit; + } + + return 0; + +err_exit: + mei_cldev_set_drvdata(cldev, NULL); + kfree(comp_master); + mei_cldev_disable(cldev); +enable_err_exit: + return ret; +} + +static void mei_gsc_proxy_remove(struct mei_cl_device *cldev) +{ + struct i915_gsc_proxy_component *comp_master = mei_cldev_get_drvdata(cldev); + int ret; + + component_master_del(&cldev->dev, &mei_component_master_ops); + kfree(comp_master); + mei_cldev_set_drvdata(cldev, NULL); + + ret = mei_cldev_disable(cldev); + if (ret) + dev_warn(&cldev->dev, "mei_cldev_disable() failed %d\n", ret); +} + +#define MEI_UUID_GSC_PROXY UUID_LE(0xf73db04, 0x97ab, 0x4125, \ + 0xb8, 0x93, 0xe9, 0x4, 0xad, 0xd, 0x54, 0x64) + +static struct mei_cl_device_id mei_gsc_proxy_tbl[] = { + { .uuid = MEI_UUID_GSC_PROXY, .version = MEI_CL_VERSION_ANY }, + { } +}; +MODULE_DEVICE_TABLE(mei, mei_gsc_proxy_tbl); + +static struct mei_cl_driver mei_gsc_proxy_driver = { + .id_table = mei_gsc_proxy_tbl, + .name = KBUILD_MODNAME, + .probe = mei_gsc_proxy_probe, + .remove = mei_gsc_proxy_remove, +}; + +module_mei_cl_driver(mei_gsc_proxy_driver); + +MODULE_AUTHOR("Intel Corporation"); +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("MEI GSC PROXY"); From 8a9bf29546a13efd3ed7784b890c2534e995348f Mon Sep 17 00:00:00 2001 From: Daniele Ceraolo Spurio Date: Tue, 2 May 2023 09:38:53 -0700 Subject: [PATCH 064/108] drm/i915/gsc: add initial support for GSC proxy The GSC uC needs to communicate with the CSME to perform certain operations. Since the GSC can't perform this communication directly on platforms where it is integrated in GT, i915 needs to transfer the messages from GSC to CSME and back. The proxy flow is as follow: 1 - i915 submits a request to GSC asking for the message to CSME 2 - GSC replies with the proxy header + payload for CSME 3 - i915 sends the reply from GSC as-is to CSME via the mei proxy component 4 - CSME replies with the proxy header + payload for GSC 5 - i915 submits a request to GSC with the reply from CSME 6 - GSC replies either with a new header + payload (same as step 2, so we restart from there) or with an end message. After GSC load, i915 is expected to start the first proxy message chain, while all subsequent ones will be triggered by the GSC via interrupt. To communicate with the CSME, we use a dedicated mei component, which means that we need to wait for it to bind before we can initialize the proxies. This usually happens quite fast, but given that there is a chance that we'll have to wait a few seconds the GSC work has been moved to a dedicated WQ to not stall other processes. v2: fix code style, includes and variable naming (Alan) v3: add extra check for proxy status, fix includes and comments Signed-off-by: Daniele Ceraolo Spurio Cc: Alan Previn Reviewed-by: Alan Previn Link: https://patchwork.freedesktop.org/patch/msgid/20230502163854.317653-4-daniele.ceraolospurio@intel.com --- drivers/gpu/drm/i915/Makefile | 1 + drivers/gpu/drm/i915/gt/uc/intel_gsc_fw.c | 10 + drivers/gpu/drm/i915/gt/uc/intel_gsc_fw.h | 1 + drivers/gpu/drm/i915/gt/uc/intel_gsc_proxy.c | 384 ++++++++++++++++++ drivers/gpu/drm/i915/gt/uc/intel_gsc_proxy.h | 17 + drivers/gpu/drm/i915/gt/uc/intel_gsc_uc.c | 49 ++- drivers/gpu/drm/i915/gt/uc/intel_gsc_uc.h | 14 +- .../i915/gt/uc/intel_gsc_uc_heci_cmd_submit.h | 1 + 8 files changed, 472 insertions(+), 5 deletions(-) create mode 100644 drivers/gpu/drm/i915/gt/uc/intel_gsc_proxy.c create mode 100644 drivers/gpu/drm/i915/gt/uc/intel_gsc_proxy.h diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile index 97b0d4ae221a..1f9071ee6c83 100644 --- a/drivers/gpu/drm/i915/Makefile +++ b/drivers/gpu/drm/i915/Makefile @@ -194,6 +194,7 @@ i915-y += \ # general-purpose microcontroller (GuC) support i915-y += \ gt/uc/intel_gsc_fw.o \ + gt/uc/intel_gsc_proxy.o \ gt/uc/intel_gsc_uc.o \ gt/uc/intel_gsc_uc_heci_cmd_submit.o\ gt/uc/intel_guc.o \ diff --git a/drivers/gpu/drm/i915/gt/uc/intel_gsc_fw.c b/drivers/gpu/drm/i915/gt/uc/intel_gsc_fw.c index 236673c02f9a..f46eb17a7a98 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_gsc_fw.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_gsc_fw.c @@ -13,6 +13,7 @@ #define GSC_FW_STATUS_REG _MMIO(0x116C40) #define GSC_FW_CURRENT_STATE REG_GENMASK(3, 0) #define GSC_FW_CURRENT_STATE_RESET 0 +#define GSC_FW_PROXY_STATE_NORMAL 5 #define GSC_FW_INIT_COMPLETE_BIT REG_BIT(9) static bool gsc_is_in_reset(struct intel_uncore *uncore) @@ -23,6 +24,15 @@ static bool gsc_is_in_reset(struct intel_uncore *uncore) GSC_FW_CURRENT_STATE_RESET; } +bool intel_gsc_uc_fw_proxy_init_done(struct intel_gsc_uc *gsc) +{ + struct intel_uncore *uncore = gsc_uc_to_gt(gsc)->uncore; + u32 fw_status = intel_uncore_read(uncore, GSC_FW_STATUS_REG); + + return REG_FIELD_GET(GSC_FW_CURRENT_STATE, fw_status) == + GSC_FW_PROXY_STATE_NORMAL; +} + bool intel_gsc_uc_fw_init_done(struct intel_gsc_uc *gsc) { struct intel_uncore *uncore = gsc_uc_to_gt(gsc)->uncore; diff --git a/drivers/gpu/drm/i915/gt/uc/intel_gsc_fw.h b/drivers/gpu/drm/i915/gt/uc/intel_gsc_fw.h index f4c1106bb2a9..fff8928218df 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_gsc_fw.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_gsc_fw.h @@ -13,5 +13,6 @@ struct intel_uncore; int intel_gsc_uc_fw_upload(struct intel_gsc_uc *gsc); bool intel_gsc_uc_fw_init_done(struct intel_gsc_uc *gsc); +bool intel_gsc_uc_fw_proxy_init_done(struct intel_gsc_uc *gsc); #endif diff --git a/drivers/gpu/drm/i915/gt/uc/intel_gsc_proxy.c b/drivers/gpu/drm/i915/gt/uc/intel_gsc_proxy.c new file mode 100644 index 000000000000..65c9575863ea --- /dev/null +++ b/drivers/gpu/drm/i915/gt/uc/intel_gsc_proxy.c @@ -0,0 +1,384 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2023 Intel Corporation + */ + +#include + +#include "drm/i915_component.h" +#include "drm/i915_gsc_proxy_mei_interface.h" + +#include "gt/intel_gt.h" +#include "gt/intel_gt_print.h" +#include "intel_gsc_proxy.h" +#include "intel_gsc_uc.h" +#include "intel_gsc_uc_heci_cmd_submit.h" +#include "i915_drv.h" + +/* + * GSC proxy: + * The GSC uC needs to communicate with the CSME to perform certain operations. + * Since the GSC can't perform this communication directly on platforms where it + * is integrated in GT, i915 needs to transfer the messages from GSC to CSME + * and back. i915 must manually start the proxy flow after the GSC is loaded to + * signal to GSC that we're ready to handle its messages and allow it to query + * its init data from CSME; GSC will then trigger an HECI2 interrupt if it needs + * to send messages to CSME again. + * The proxy flow is as follow: + * 1 - i915 submits a request to GSC asking for the message to CSME + * 2 - GSC replies with the proxy header + payload for CSME + * 3 - i915 sends the reply from GSC as-is to CSME via the mei proxy component + * 4 - CSME replies with the proxy header + payload for GSC + * 5 - i915 submits a request to GSC with the reply from CSME + * 6 - GSC replies either with a new header + payload (same as step 2, so we + * restart from there) or with an end message. + */ + +/* + * The component should load quite quickly in most cases, but it could take + * a bit. Using a very big timeout just to cover the worst case scenario + */ +#define GSC_PROXY_INIT_TIMEOUT_MS 20000 + +/* the protocol supports up to 32K in each direction */ +#define GSC_PROXY_BUFFER_SIZE SZ_32K +#define GSC_PROXY_CHANNEL_SIZE (GSC_PROXY_BUFFER_SIZE * 2) +#define GSC_PROXY_MAX_MSG_SIZE (GSC_PROXY_BUFFER_SIZE - sizeof(struct intel_gsc_mtl_header)) + +/* FW-defined proxy header */ +struct intel_gsc_proxy_header { + /* + * hdr: + * Bits 0-7: type of the proxy message (see enum intel_gsc_proxy_type) + * Bits 8-15: rsvd + * Bits 16-31: length in bytes of the payload following the proxy header + */ + u32 hdr; +#define GSC_PROXY_TYPE GENMASK(7, 0) +#define GSC_PROXY_PAYLOAD_LENGTH GENMASK(31, 16) + + u32 source; /* Source of the Proxy message */ + u32 destination; /* Destination of the Proxy message */ +#define GSC_PROXY_ADDRESSING_KMD 0x10000 +#define GSC_PROXY_ADDRESSING_GSC 0x20000 +#define GSC_PROXY_ADDRESSING_CSME 0x30000 + + u32 status; /* Command status */ +} __packed; + +/* FW-defined proxy types */ +enum intel_gsc_proxy_type { + GSC_PROXY_MSG_TYPE_PROXY_INVALID = 0, + GSC_PROXY_MSG_TYPE_PROXY_QUERY = 1, + GSC_PROXY_MSG_TYPE_PROXY_PAYLOAD = 2, + GSC_PROXY_MSG_TYPE_PROXY_END = 3, + GSC_PROXY_MSG_TYPE_PROXY_NOTIFICATION = 4, +}; + +struct gsc_proxy_msg { + struct intel_gsc_mtl_header header; + struct intel_gsc_proxy_header proxy_header; +} __packed; + +static int proxy_send_to_csme(struct intel_gsc_uc *gsc) +{ + struct intel_gt *gt = gsc_uc_to_gt(gsc); + struct i915_gsc_proxy_component *comp = gsc->proxy.component; + struct intel_gsc_mtl_header *hdr; + void *in = gsc->proxy.to_csme; + void *out = gsc->proxy.to_gsc; + u32 in_size; + int ret; + + /* CSME msg only includes the proxy */ + hdr = in; + in += sizeof(struct intel_gsc_mtl_header); + out += sizeof(struct intel_gsc_mtl_header); + + in_size = hdr->message_size - sizeof(struct intel_gsc_mtl_header); + + /* the message must contain at least the proxy header */ + if (in_size < sizeof(struct intel_gsc_proxy_header) || + in_size > GSC_PROXY_MAX_MSG_SIZE) { + gt_err(gt, "Invalid CSME message size: %u\n", in_size); + return -EINVAL; + } + + ret = comp->ops->send(comp->mei_dev, in, in_size); + if (ret < 0) { + gt_err(gt, "Failed to send CSME message\n"); + return ret; + } + + ret = comp->ops->recv(comp->mei_dev, out, GSC_PROXY_MAX_MSG_SIZE); + if (ret < 0) { + gt_err(gt, "Failed to receive CSME message\n"); + return ret; + } + + return ret; +} + +static int proxy_send_to_gsc(struct intel_gsc_uc *gsc) +{ + struct intel_gt *gt = gsc_uc_to_gt(gsc); + u32 *marker = gsc->proxy.to_csme; /* first dw of the reply header */ + u64 addr_in = i915_ggtt_offset(gsc->proxy.vma); + u64 addr_out = addr_in + GSC_PROXY_BUFFER_SIZE; + u32 size = ((struct gsc_proxy_msg *)gsc->proxy.to_gsc)->header.message_size; + int err; + + /* the message must contain at least the gsc and proxy headers */ + if (size < sizeof(struct gsc_proxy_msg) || size > GSC_PROXY_BUFFER_SIZE) { + gt_err(gt, "Invalid GSC proxy message size: %u\n", size); + return -EINVAL; + } + + /* clear the message marker */ + *marker = 0; + + /* make sure the marker write is flushed */ + wmb(); + + /* send the request */ + err = intel_gsc_uc_heci_cmd_submit_packet(gsc, addr_in, size, + addr_out, GSC_PROXY_BUFFER_SIZE); + + if (!err) { + /* wait for the reply to show up */ + err = wait_for(*marker != 0, 300); + if (err) + gt_err(gt, "Failed to get a proxy reply from gsc\n"); + } + + return err; +} + +static int validate_proxy_header(struct intel_gsc_proxy_header *header, + u32 source, u32 dest) +{ + u32 type = FIELD_GET(GSC_PROXY_TYPE, header->hdr); + u32 length = FIELD_GET(GSC_PROXY_PAYLOAD_LENGTH, header->hdr); + int ret = 0; + + if (header->destination != dest || header->source != source) { + ret = -ENOEXEC; + goto fail; + } + + switch (type) { + case GSC_PROXY_MSG_TYPE_PROXY_PAYLOAD: + if (length > 0) + break; + fallthrough; + case GSC_PROXY_MSG_TYPE_PROXY_INVALID: + ret = -EIO; + goto fail; + default: + break; + } + +fail: + return ret; +} + +static int proxy_query(struct intel_gsc_uc *gsc) +{ + struct intel_gt *gt = gsc_uc_to_gt(gsc); + struct gsc_proxy_msg *to_gsc = gsc->proxy.to_gsc; + struct gsc_proxy_msg *to_csme = gsc->proxy.to_csme; + int ret; + + intel_gsc_uc_heci_cmd_emit_mtl_header(&to_gsc->header, + HECI_MEADDRESS_PROXY, + sizeof(struct gsc_proxy_msg), + 0); + + to_gsc->proxy_header.hdr = + FIELD_PREP(GSC_PROXY_TYPE, GSC_PROXY_MSG_TYPE_PROXY_QUERY) | + FIELD_PREP(GSC_PROXY_PAYLOAD_LENGTH, 0); + + to_gsc->proxy_header.source = GSC_PROXY_ADDRESSING_KMD; + to_gsc->proxy_header.destination = GSC_PROXY_ADDRESSING_GSC; + to_gsc->proxy_header.status = 0; + + while (1) { + /* clear the GSC response header space */ + memset(gsc->proxy.to_csme, 0, sizeof(struct gsc_proxy_msg)); + + /* send proxy message to GSC */ + ret = proxy_send_to_gsc(gsc); + if (ret) { + gt_err(gt, "failed to send proxy message to GSC! %d\n", ret); + goto proxy_error; + } + + /* stop if this was the last message */ + if (FIELD_GET(GSC_PROXY_TYPE, to_csme->proxy_header.hdr) == + GSC_PROXY_MSG_TYPE_PROXY_END) + break; + + /* make sure the GSC-to-CSME proxy header is sane */ + ret = validate_proxy_header(&to_csme->proxy_header, + GSC_PROXY_ADDRESSING_GSC, + GSC_PROXY_ADDRESSING_CSME); + if (ret) { + gt_err(gt, "invalid GSC to CSME proxy header! %d\n", ret); + goto proxy_error; + } + + /* send the GSC message to the CSME */ + ret = proxy_send_to_csme(gsc); + if (ret < 0) { + gt_err(gt, "failed to send proxy message to CSME! %d\n", ret); + goto proxy_error; + } + + /* update the GSC message size with the returned value from CSME */ + to_gsc->header.message_size = ret + sizeof(struct intel_gsc_mtl_header); + + /* make sure the CSME-to-GSC proxy header is sane */ + ret = validate_proxy_header(&to_gsc->proxy_header, + GSC_PROXY_ADDRESSING_CSME, + GSC_PROXY_ADDRESSING_GSC); + if (ret) { + gt_err(gt, "invalid CSME to GSC proxy header! %d\n", ret); + goto proxy_error; + } + } + +proxy_error: + return ret < 0 ? ret : 0; +} + +int intel_gsc_proxy_request_handler(struct intel_gsc_uc *gsc) +{ + struct intel_gt *gt = gsc_uc_to_gt(gsc); + int err; + + if (!gsc->proxy.component_added) + return -ENODEV; + + assert_rpm_wakelock_held(gt->uncore->rpm); + + /* when GSC is loaded, we can queue this before the component is bound */ + err = wait_for(gsc->proxy.component, GSC_PROXY_INIT_TIMEOUT_MS); + if (err) { + gt_err(gt, "GSC proxy component didn't bind within the expected timeout\n"); + return -EIO; + } + + mutex_lock(&gsc->proxy.mutex); + if (!gsc->proxy.component) { + gt_err(gt, "GSC proxy worker called without the component being bound!\n"); + err = -EIO; + } else { + err = proxy_query(gsc); + } + mutex_unlock(&gsc->proxy.mutex); + return err; +} + +static int i915_gsc_proxy_component_bind(struct device *i915_kdev, + struct device *mei_kdev, void *data) +{ + struct drm_i915_private *i915 = kdev_to_i915(i915_kdev); + struct intel_gsc_uc *gsc = &i915->media_gt->uc.gsc; + + mutex_lock(&gsc->proxy.mutex); + gsc->proxy.component = data; + gsc->proxy.component->mei_dev = mei_kdev; + mutex_unlock(&gsc->proxy.mutex); + + return 0; +} + +static void i915_gsc_proxy_component_unbind(struct device *i915_kdev, + struct device *mei_kdev, void *data) +{ + struct drm_i915_private *i915 = kdev_to_i915(i915_kdev); + struct intel_gsc_uc *gsc = &i915->media_gt->uc.gsc; + + mutex_lock(&gsc->proxy.mutex); + gsc->proxy.component = NULL; + mutex_unlock(&gsc->proxy.mutex); +} + +static const struct component_ops i915_gsc_proxy_component_ops = { + .bind = i915_gsc_proxy_component_bind, + .unbind = i915_gsc_proxy_component_unbind, +}; + +static int proxy_channel_alloc(struct intel_gsc_uc *gsc) +{ + struct intel_gt *gt = gsc_uc_to_gt(gsc); + struct i915_vma *vma; + void *vaddr; + int err; + + err = intel_guc_allocate_and_map_vma(>->uc.guc, GSC_PROXY_CHANNEL_SIZE, + &vma, &vaddr); + if (err) + return err; + + gsc->proxy.vma = vma; + gsc->proxy.to_gsc = vaddr; + gsc->proxy.to_csme = vaddr + GSC_PROXY_BUFFER_SIZE; + + return 0; +} + +static void proxy_channel_free(struct intel_gsc_uc *gsc) +{ + if (!gsc->proxy.vma) + return; + + gsc->proxy.to_gsc = NULL; + gsc->proxy.to_csme = NULL; + i915_vma_unpin_and_release(&gsc->proxy.vma, I915_VMA_RELEASE_MAP); +} + +void intel_gsc_proxy_fini(struct intel_gsc_uc *gsc) +{ + struct intel_gt *gt = gsc_uc_to_gt(gsc); + struct drm_i915_private *i915 = gt->i915; + + if (fetch_and_zero(&gsc->proxy.component_added)) + component_del(i915->drm.dev, &i915_gsc_proxy_component_ops); + + proxy_channel_free(gsc); +} + +int intel_gsc_proxy_init(struct intel_gsc_uc *gsc) +{ + int err; + struct intel_gt *gt = gsc_uc_to_gt(gsc); + struct drm_i915_private *i915 = gt->i915; + + mutex_init(&gsc->proxy.mutex); + + if (!IS_ENABLED(CONFIG_INTEL_MEI_GSC_PROXY)) { + gt_info(gt, "can't init GSC proxy due to missing mei component\n"); + return -ENODEV; + } + + err = proxy_channel_alloc(gsc); + if (err) + return err; + + err = component_add_typed(i915->drm.dev, &i915_gsc_proxy_component_ops, + I915_COMPONENT_GSC_PROXY); + if (err < 0) { + gt_err(gt, "Failed to add GSC_PROXY component (%d)\n", err); + goto out_free; + } + + gsc->proxy.component_added = true; + + return 0; + +out_free: + proxy_channel_free(gsc); + return err; +} + diff --git a/drivers/gpu/drm/i915/gt/uc/intel_gsc_proxy.h b/drivers/gpu/drm/i915/gt/uc/intel_gsc_proxy.h new file mode 100644 index 000000000000..1f27cb6e62ba --- /dev/null +++ b/drivers/gpu/drm/i915/gt/uc/intel_gsc_proxy.h @@ -0,0 +1,17 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2023 Intel Corporation + */ + +#ifndef _INTEL_GSC_PROXY_H_ +#define _INTEL_GSC_PROXY_H_ + +#include + +struct intel_gsc_uc; + +int intel_gsc_proxy_init(struct intel_gsc_uc *gsc); +void intel_gsc_proxy_fini(struct intel_gsc_uc *gsc); +int intel_gsc_proxy_request_handler(struct intel_gsc_uc *gsc); + +#endif diff --git a/drivers/gpu/drm/i915/gt/uc/intel_gsc_uc.c b/drivers/gpu/drm/i915/gt/uc/intel_gsc_uc.c index 2d5b70b3384c..b234bb585ad3 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_gsc_uc.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_gsc_uc.c @@ -10,15 +10,39 @@ #include "intel_gsc_uc.h" #include "intel_gsc_fw.h" #include "i915_drv.h" +#include "intel_gsc_proxy.h" static void gsc_work(struct work_struct *work) { struct intel_gsc_uc *gsc = container_of(work, typeof(*gsc), work); struct intel_gt *gt = gsc_uc_to_gt(gsc); intel_wakeref_t wakeref; + int ret; - with_intel_runtime_pm(gt->uncore->rpm, wakeref) - intel_gsc_uc_fw_upload(gsc); + wakeref = intel_runtime_pm_get(gt->uncore->rpm); + + ret = intel_gsc_uc_fw_upload(gsc); + if (ret) + goto out_put; + + ret = intel_gsc_proxy_request_handler(gsc); + if (ret) + goto out_put; + + /* + * If there is a proxy establishment error, the GSC might still + * complete the request handling cleanly, so we need to check the + * status register to check if the proxy init was actually successful + */ + if (intel_gsc_uc_fw_proxy_init_done(gsc)) { + drm_dbg(>->i915->drm, "GSC Proxy initialized\n"); + intel_uc_fw_change_status(&gsc->fw, INTEL_UC_FIRMWARE_RUNNING); + } else { + drm_err(>->i915->drm, "GSC status reports proxy init not complete\n"); + } + +out_put: + intel_runtime_pm_put(gt->uncore->rpm, wakeref); } static bool gsc_engine_supported(struct intel_gt *gt) @@ -43,6 +67,8 @@ static bool gsc_engine_supported(struct intel_gt *gt) void intel_gsc_uc_init_early(struct intel_gsc_uc *gsc) { + struct intel_gt *gt = gsc_uc_to_gt(gsc); + intel_uc_fw_init_early(&gsc->fw, INTEL_UC_FW_TYPE_GSC); INIT_WORK(&gsc->work, gsc_work); @@ -50,10 +76,16 @@ void intel_gsc_uc_init_early(struct intel_gsc_uc *gsc) * GT with it being not fully setup hence check device info's * engine mask */ - if (!gsc_engine_supported(gsc_uc_to_gt(gsc))) { + if (!gsc_engine_supported(gt)) { intel_uc_fw_change_status(&gsc->fw, INTEL_UC_FIRMWARE_NOT_SUPPORTED); return; } + + gsc->wq = alloc_ordered_workqueue("i915_gsc", 0); + if (!gsc->wq) { + gt_err(gt, "failed to allocate WQ for GSC, disabling FW\n"); + intel_uc_fw_change_status(&gsc->fw, INTEL_UC_FIRMWARE_NOT_SUPPORTED); + } } int intel_gsc_uc_init(struct intel_gsc_uc *gsc) @@ -88,6 +120,9 @@ int intel_gsc_uc_init(struct intel_gsc_uc *gsc) gsc->ce = ce; + /* if we fail to init proxy we still want to load GSC for PM */ + intel_gsc_proxy_init(gsc); + intel_uc_fw_change_status(&gsc->fw, INTEL_UC_FIRMWARE_LOADABLE); return 0; @@ -107,6 +142,12 @@ void intel_gsc_uc_fini(struct intel_gsc_uc *gsc) return; flush_work(&gsc->work); + if (gsc->wq) { + destroy_workqueue(gsc->wq); + gsc->wq = NULL; + } + + intel_gsc_proxy_fini(gsc); if (gsc->ce) intel_engine_destroy_pinned_context(fetch_and_zero(&gsc->ce)); @@ -151,5 +192,5 @@ void intel_gsc_uc_load_start(struct intel_gsc_uc *gsc) if (intel_gsc_uc_fw_init_done(gsc)) return; - queue_work(system_unbound_wq, &gsc->work); + queue_work(gsc->wq, &gsc->work); } diff --git a/drivers/gpu/drm/i915/gt/uc/intel_gsc_uc.h b/drivers/gpu/drm/i915/gt/uc/intel_gsc_uc.h index 5f50fa1ff8b9..023bded10dde 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_gsc_uc.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_gsc_uc.h @@ -10,6 +10,7 @@ struct i915_vma; struct intel_context; +struct i915_gsc_proxy_component; struct intel_gsc_uc { /* Generic uC firmware management */ @@ -19,7 +20,18 @@ struct intel_gsc_uc { struct i915_vma *local; /* private memory for GSC usage */ struct intel_context *ce; /* for submission to GSC FW via GSC engine */ - struct work_struct work; /* for delayed load */ + /* for delayed load and proxy handling */ + struct workqueue_struct *wq; + struct work_struct work; + + struct { + struct i915_gsc_proxy_component *component; + bool component_added; + struct i915_vma *vma; + void *to_gsc; + void *to_csme; + struct mutex mutex; /* protects the tee channel binding */ + } proxy; }; void intel_gsc_uc_init_early(struct intel_gsc_uc *gsc); diff --git a/drivers/gpu/drm/i915/gt/uc/intel_gsc_uc_heci_cmd_submit.h b/drivers/gpu/drm/i915/gt/uc/intel_gsc_uc_heci_cmd_submit.h index 3d56ae501991..8f199d5f963e 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_gsc_uc_heci_cmd_submit.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_gsc_uc_heci_cmd_submit.h @@ -14,6 +14,7 @@ struct intel_gsc_mtl_header { #define GSC_HECI_VALIDITY_MARKER 0xA578875A u8 heci_client_id; +#define HECI_MEADDRESS_PROXY 10 #define HECI_MEADDRESS_PXP 17 #define HECI_MEADDRESS_HDCP 18 From 31cc65b4c7e37eaf645be2d5fdcade56e3fdf8f2 Mon Sep 17 00:00:00 2001 From: Daniele Ceraolo Spurio Date: Tue, 2 May 2023 09:38:54 -0700 Subject: [PATCH 065/108] drm/i915/gsc: add support for GSC proxy interrupt The GSC notifies us of a proxy request via the HECI2 interrupt. The interrupt must be enabled both in the HECI layer and in our usual gt irq programming; for the latter, the interrupt is enabled via the same enable register as the GSC CS, but it does have its own mask register. When the interrupt is received, we also need to de-assert it in both layers. The handling of the proxy request is deferred to the same worker that we use for GSC load. New flags have been added to distinguish between the init case and the proxy interrupt. v2: Make sure not to set the reset bit when enabling/disabling the GSC interrupts, fix defines (Alan) v3: rebase on proxy status register check Signed-off-by: Daniele Ceraolo Spurio Cc: Alan Previn Reviewed-by: Alan Previn Link: https://patchwork.freedesktop.org/patch/msgid/20230502163854.317653-5-daniele.ceraolospurio@intel.com --- drivers/gpu/drm/i915/gt/intel_gt_irq.c | 22 +++++++- drivers/gpu/drm/i915/gt/intel_gt_regs.h | 3 + drivers/gpu/drm/i915/gt/uc/intel_gsc_proxy.c | 44 ++++++++++++++- drivers/gpu/drm/i915/gt/uc/intel_gsc_proxy.h | 1 + drivers/gpu/drm/i915/gt/uc/intel_gsc_uc.c | 59 ++++++++++++++------ drivers/gpu/drm/i915/gt/uc/intel_gsc_uc.h | 3 + 6 files changed, 111 insertions(+), 21 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_gt_irq.c b/drivers/gpu/drm/i915/gt/intel_gt_irq.c index 1b25a6039152..e13cfbe2daf5 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_irq.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_irq.c @@ -15,6 +15,7 @@ #include "intel_uncore.h" #include "intel_rps.h" #include "pxp/intel_pxp_irq.h" +#include "uc/intel_gsc_proxy.h" static void guc_irq_handler(struct intel_guc *guc, u16 iir) { @@ -81,6 +82,9 @@ gen11_other_irq_handler(struct intel_gt *gt, const u8 instance, if (instance == OTHER_GSC_INSTANCE) return intel_gsc_irq_handler(gt, iir); + if (instance == OTHER_GSC_HECI_2_INSTANCE) + return intel_gsc_proxy_irq_handler(>->uc.gsc, iir); + WARN_ONCE(1, "unhandled other interrupt instance=0x%x, iir=0x%x\n", instance, iir); } @@ -100,6 +104,8 @@ static struct intel_gt *pick_gt(struct intel_gt *gt, u8 class, u8 instance) case VIDEO_ENHANCEMENT_CLASS: return media_gt; case OTHER_CLASS: + if (instance == OTHER_GSC_HECI_2_INSTANCE) + return media_gt; if (instance == OTHER_GSC_INSTANCE && HAS_ENGINE(media_gt, GSC0)) return media_gt; fallthrough; @@ -256,6 +262,7 @@ void gen11_gt_irq_postinstall(struct intel_gt *gt) u32 irqs = GT_RENDER_USER_INTERRUPT; u32 guc_mask = intel_uc_wants_guc(>->uc) ? GUC_INTR_GUC2HOST : 0; u32 gsc_mask = 0; + u32 heci_mask = 0; u32 dmask; u32 smask; @@ -267,10 +274,16 @@ void gen11_gt_irq_postinstall(struct intel_gt *gt) dmask = irqs << 16 | irqs; smask = irqs << 16; - if (HAS_ENGINE(gt, GSC0)) + if (HAS_ENGINE(gt, GSC0)) { + /* + * the heci2 interrupt is enabled via the same register as the + * GSC interrupt, but it has its own mask register. + */ gsc_mask = irqs; - else if (HAS_HECI_GSC(gt->i915)) + heci_mask = GSC_IRQ_INTF(1); /* HECI2 IRQ for SW Proxy*/ + } else if (HAS_HECI_GSC(gt->i915)) { gsc_mask = GSC_IRQ_INTF(0) | GSC_IRQ_INTF(1); + } BUILD_BUG_ON(irqs & 0xffff0000); @@ -280,7 +293,7 @@ void gen11_gt_irq_postinstall(struct intel_gt *gt) if (CCS_MASK(gt)) intel_uncore_write(uncore, GEN12_CCS_RSVD_INTR_ENABLE, smask); if (gsc_mask) - intel_uncore_write(uncore, GEN11_GUNIT_CSME_INTR_ENABLE, gsc_mask); + intel_uncore_write(uncore, GEN11_GUNIT_CSME_INTR_ENABLE, gsc_mask | heci_mask); /* Unmask irqs on RCS, BCS, VCS and VECS engines. */ intel_uncore_write(uncore, GEN11_RCS0_RSVD_INTR_MASK, ~smask); @@ -308,6 +321,9 @@ void gen11_gt_irq_postinstall(struct intel_gt *gt) intel_uncore_write(uncore, GEN12_CCS2_CCS3_INTR_MASK, ~dmask); if (gsc_mask) intel_uncore_write(uncore, GEN11_GUNIT_CSME_INTR_MASK, ~gsc_mask); + if (heci_mask) + intel_uncore_write(uncore, GEN12_HECI2_RSVD_INTR_MASK, + ~REG_FIELD_PREP(ENGINE1_MASK, heci_mask)); if (guc_mask) { /* the enable bit is common for both GTs but the masks are separate */ diff --git a/drivers/gpu/drm/i915/gt/intel_gt_regs.h b/drivers/gpu/drm/i915/gt/intel_gt_regs.h index af80d2fe739b..b8a39c219b60 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_regs.h +++ b/drivers/gpu/drm/i915/gt/intel_gt_regs.h @@ -1596,6 +1596,7 @@ #define GEN11_GT_INTR_DW(x) _MMIO(0x190018 + ((x) * 4)) #define GEN11_CSME (31) +#define GEN12_HECI_2 (30) #define GEN11_GUNIT (28) #define GEN11_GUC (25) #define MTL_MGUC (24) @@ -1637,6 +1638,7 @@ /* irq instances for OTHER_CLASS */ #define OTHER_GUC_INSTANCE 0 #define OTHER_GTPM_INSTANCE 1 +#define OTHER_GSC_HECI_2_INSTANCE 3 #define OTHER_KCR_INSTANCE 4 #define OTHER_GSC_INSTANCE 6 #define OTHER_MEDIA_GUC_INSTANCE 16 @@ -1652,6 +1654,7 @@ #define GEN12_VCS6_VCS7_INTR_MASK _MMIO(0x1900b4) #define GEN11_VECS0_VECS1_INTR_MASK _MMIO(0x1900d0) #define GEN12_VECS2_VECS3_INTR_MASK _MMIO(0x1900d4) +#define GEN12_HECI2_RSVD_INTR_MASK _MMIO(0x1900e4) #define GEN11_GUC_SG_INTR_MASK _MMIO(0x1900e8) #define MTL_GUC_MGUC_INTR_MASK _MMIO(0x1900e8) /* MTL+ */ #define GEN11_GPM_WGBOXPERF_INTR_MASK _MMIO(0x1900ec) diff --git a/drivers/gpu/drm/i915/gt/uc/intel_gsc_proxy.c b/drivers/gpu/drm/i915/gt/uc/intel_gsc_proxy.c index 65c9575863ea..ebee0b5a2c1d 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_gsc_proxy.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_gsc_proxy.c @@ -14,6 +14,7 @@ #include "intel_gsc_uc.h" #include "intel_gsc_uc_heci_cmd_submit.h" #include "i915_drv.h" +#include "i915_reg.h" /* * GSC proxy: @@ -273,17 +274,49 @@ int intel_gsc_proxy_request_handler(struct intel_gsc_uc *gsc) gt_err(gt, "GSC proxy worker called without the component being bound!\n"); err = -EIO; } else { + /* + * write the status bit to clear it and allow new proxy + * interrupts to be generated while we handle the current + * request, but be sure not to write the reset bit + */ + intel_uncore_rmw(gt->uncore, HECI_H_CSR(MTL_GSC_HECI2_BASE), + HECI_H_CSR_RST, HECI_H_CSR_IS); err = proxy_query(gsc); } mutex_unlock(&gsc->proxy.mutex); return err; } +void intel_gsc_proxy_irq_handler(struct intel_gsc_uc *gsc, u32 iir) +{ + struct intel_gt *gt = gsc_uc_to_gt(gsc); + + if (unlikely(!iir)) + return; + + lockdep_assert_held(gt->irq_lock); + + if (!gsc->proxy.component) { + gt_err(gt, "GSC proxy irq received without the component being bound!\n"); + return; + } + + gsc->gsc_work_actions |= GSC_ACTION_SW_PROXY; + queue_work(gsc->wq, &gsc->work); +} + static int i915_gsc_proxy_component_bind(struct device *i915_kdev, struct device *mei_kdev, void *data) { struct drm_i915_private *i915 = kdev_to_i915(i915_kdev); - struct intel_gsc_uc *gsc = &i915->media_gt->uc.gsc; + struct intel_gt *gt = i915->media_gt; + struct intel_gsc_uc *gsc = >->uc.gsc; + intel_wakeref_t wakeref; + + /* enable HECI2 IRQs */ + with_intel_runtime_pm(&i915->runtime_pm, wakeref) + intel_uncore_rmw(gt->uncore, HECI_H_CSR(MTL_GSC_HECI2_BASE), + HECI_H_CSR_RST, HECI_H_CSR_IE); mutex_lock(&gsc->proxy.mutex); gsc->proxy.component = data; @@ -297,11 +330,18 @@ static void i915_gsc_proxy_component_unbind(struct device *i915_kdev, struct device *mei_kdev, void *data) { struct drm_i915_private *i915 = kdev_to_i915(i915_kdev); - struct intel_gsc_uc *gsc = &i915->media_gt->uc.gsc; + struct intel_gt *gt = i915->media_gt; + struct intel_gsc_uc *gsc = >->uc.gsc; + intel_wakeref_t wakeref; mutex_lock(&gsc->proxy.mutex); gsc->proxy.component = NULL; mutex_unlock(&gsc->proxy.mutex); + + /* disable HECI2 IRQs */ + with_intel_runtime_pm(&i915->runtime_pm, wakeref) + intel_uncore_rmw(gt->uncore, HECI_H_CSR(MTL_GSC_HECI2_BASE), + HECI_H_CSR_IE | HECI_H_CSR_RST, 0); } static const struct component_ops i915_gsc_proxy_component_ops = { diff --git a/drivers/gpu/drm/i915/gt/uc/intel_gsc_proxy.h b/drivers/gpu/drm/i915/gt/uc/intel_gsc_proxy.h index 1f27cb6e62ba..fc5aef10bfb4 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_gsc_proxy.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_gsc_proxy.h @@ -13,5 +13,6 @@ struct intel_gsc_uc; int intel_gsc_proxy_init(struct intel_gsc_uc *gsc); void intel_gsc_proxy_fini(struct intel_gsc_uc *gsc); int intel_gsc_proxy_request_handler(struct intel_gsc_uc *gsc); +void intel_gsc_proxy_irq_handler(struct intel_gsc_uc *gsc, u32 iir); #endif diff --git a/drivers/gpu/drm/i915/gt/uc/intel_gsc_uc.c b/drivers/gpu/drm/i915/gt/uc/intel_gsc_uc.c index b234bb585ad3..fb0984f875f9 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_gsc_uc.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_gsc_uc.c @@ -17,28 +17,49 @@ static void gsc_work(struct work_struct *work) struct intel_gsc_uc *gsc = container_of(work, typeof(*gsc), work); struct intel_gt *gt = gsc_uc_to_gt(gsc); intel_wakeref_t wakeref; + u32 actions; int ret; wakeref = intel_runtime_pm_get(gt->uncore->rpm); - ret = intel_gsc_uc_fw_upload(gsc); - if (ret) - goto out_put; + spin_lock_irq(gt->irq_lock); + actions = gsc->gsc_work_actions; + gsc->gsc_work_actions = 0; + spin_unlock_irq(gt->irq_lock); - ret = intel_gsc_proxy_request_handler(gsc); - if (ret) - goto out_put; + if (actions & GSC_ACTION_FW_LOAD) { + ret = intel_gsc_uc_fw_upload(gsc); + if (ret == -EEXIST) /* skip proxy if not a new load */ + actions &= ~GSC_ACTION_FW_LOAD; + else if (ret) + goto out_put; + } - /* - * If there is a proxy establishment error, the GSC might still - * complete the request handling cleanly, so we need to check the - * status register to check if the proxy init was actually successful - */ - if (intel_gsc_uc_fw_proxy_init_done(gsc)) { - drm_dbg(>->i915->drm, "GSC Proxy initialized\n"); - intel_uc_fw_change_status(&gsc->fw, INTEL_UC_FIRMWARE_RUNNING); - } else { - drm_err(>->i915->drm, "GSC status reports proxy init not complete\n"); + if (actions & (GSC_ACTION_FW_LOAD | GSC_ACTION_SW_PROXY)) { + if (!intel_gsc_uc_fw_init_done(gsc)) { + gt_err(gt, "Proxy request received with GSC not loaded!\n"); + goto out_put; + } + + ret = intel_gsc_proxy_request_handler(gsc); + if (ret) + goto out_put; + + /* mark the GSC FW init as done the first time we run this */ + if (actions & GSC_ACTION_FW_LOAD) { + /* + * If there is a proxy establishment error, the GSC might still + * complete the request handling cleanly, so we need to check the + * status register to check if the proxy init was actually successful + */ + if (intel_gsc_uc_fw_proxy_init_done(gsc)) { + drm_dbg(>->i915->drm, "GSC Proxy initialized\n"); + intel_uc_fw_change_status(&gsc->fw, INTEL_UC_FIRMWARE_RUNNING); + } else { + drm_err(>->i915->drm, + "GSC status reports proxy init not complete\n"); + } + } } out_put: @@ -186,11 +207,17 @@ void intel_gsc_uc_resume(struct intel_gsc_uc *gsc) void intel_gsc_uc_load_start(struct intel_gsc_uc *gsc) { + struct intel_gt *gt = gsc_uc_to_gt(gsc); + if (!intel_uc_fw_is_loadable(&gsc->fw)) return; if (intel_gsc_uc_fw_init_done(gsc)) return; + spin_lock_irq(gt->irq_lock); + gsc->gsc_work_actions |= GSC_ACTION_FW_LOAD; + spin_unlock_irq(gt->irq_lock); + queue_work(gsc->wq, &gsc->work); } diff --git a/drivers/gpu/drm/i915/gt/uc/intel_gsc_uc.h b/drivers/gpu/drm/i915/gt/uc/intel_gsc_uc.h index 023bded10dde..a2a0813b8a76 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_gsc_uc.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_gsc_uc.h @@ -23,6 +23,9 @@ struct intel_gsc_uc { /* for delayed load and proxy handling */ struct workqueue_struct *wq; struct work_struct work; + u32 gsc_work_actions; /* protected by gt->irq_lock */ +#define GSC_ACTION_FW_LOAD BIT(0) +#define GSC_ACTION_SW_PROXY BIT(1) struct { struct i915_gsc_proxy_component *component; From e971121350e72ff34a0c1d140127703cc2c085c2 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Tue, 2 May 2023 18:37:22 +0300 Subject: [PATCH 066/108] drm/i915/vma: fix struct i915_vma_bindinfo kernel-doc You can't document both a sub-struct type and a struct member at the same time. Separate them. drivers/gpu/drm/i915/i915_vma_resource.h:91: warning: Incorrect use of kernel-doc format: * struct i915_vma_bindinfo - Information needed for async bind drivers/gpu/drm/i915/i915_vma_resource.h:129: warning: Function parameter or member 'bi' not described in 'i915_vma_resource' Signed-off-by: Jani Nikula Reviewed-by: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/89ab5cf179566c429383cc57db746038f75cba0d.1683041799.git.jani.nikula@intel.com --- drivers/gpu/drm/i915/i915_vma_resource.h | 45 ++++++++++++++---------- 1 file changed, 27 insertions(+), 18 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_vma_resource.h b/drivers/gpu/drm/i915/i915_vma_resource.h index 2053c037dbfb..ca2b0f7f59bc 100644 --- a/drivers/gpu/drm/i915/i915_vma_resource.h +++ b/drivers/gpu/drm/i915/i915_vma_resource.h @@ -33,6 +33,27 @@ struct i915_page_sizes { unsigned int sg; }; +/** + * struct i915_vma_bindinfo - Information needed for async bind + * only but that can be dropped after the bind has taken place. + * Consider making this a separate argument to the bind_vma + * op, coalescing with other arguments like vm, stash, cache_level + * and flags + * @pages: The pages sg-table. + * @page_sizes: Page sizes of the pages. + * @pages_rsgt: Refcounted sg-table when delayed object destruction + * is supported. May be NULL. + * @readonly: Whether the vma should be bound read-only. + * @lmem: Whether the vma points to lmem. + */ +struct i915_vma_bindinfo { + struct sg_table *pages; + struct i915_page_sizes page_sizes; + struct i915_refct_sgt *pages_rsgt; + bool readonly:1; + bool lmem:1; +}; + /** * struct i915_vma_resource - Snapshotted unbind information. * @unbind_fence: Fence to mark unbinding complete. Note that this fence @@ -89,25 +110,13 @@ struct i915_vma_resource { intel_wakeref_t wakeref; /** - * struct i915_vma_bindinfo - Information needed for async bind - * only but that can be dropped after the bind has taken place. - * Consider making this a separate argument to the bind_vma - * op, coalescing with other arguments like vm, stash, cache_level - * and flags - * @pages: The pages sg-table. - * @page_sizes: Page sizes of the pages. - * @pages_rsgt: Refcounted sg-table when delayed object destruction - * is supported. May be NULL. - * @readonly: Whether the vma should be bound read-only. - * @lmem: Whether the vma points to lmem. + * @bi: Information needed for async bind only but that can be dropped + * after the bind has taken place. + * + * Consider making this a separate argument to the bind_vma op, + * coalescing with other arguments like vm, stash, cache_level and flags */ - struct i915_vma_bindinfo { - struct sg_table *pages; - struct i915_page_sizes page_sizes; - struct i915_refct_sgt *pages_rsgt; - bool readonly:1; - bool lmem:1; - } bi; + struct i915_vma_bindinfo bi; #if IS_ENABLED(CONFIG_DRM_I915_CAPTURE_ERROR) struct intel_memory_region *mr; From 08272a5a41876af38209def142de272e580d446e Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Tue, 2 May 2023 18:37:27 +0300 Subject: [PATCH 067/108] drm/i915/gem: fix function pointer member kernel-doc You can't document function pointer member as functions. drivers/gpu/drm/i915/gem/i915_gem_region.h:25: warning: Incorrect use of kernel-doc format: * process_obj - Process the current object drivers/gpu/drm/i915/gem/i915_gem_region.h:35: warning: Function parameter or member 'process_obj' not described in 'i915_gem_apply_to_region_ops' Signed-off-by: Jani Nikula Reviewed-by: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/a2a81f9ee78591def0534c81a63dbc652c44bbd3.1683041799.git.jani.nikula@intel.com --- drivers/gpu/drm/i915/gem/i915_gem_region.h | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_region.h b/drivers/gpu/drm/i915/gem/i915_gem_region.h index 2dfcc41c0170..8a7650b27cc2 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_region.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_region.h @@ -22,9 +22,7 @@ struct i915_gem_apply_to_region; */ struct i915_gem_apply_to_region_ops { /** - * process_obj - Process the current object - * @apply: Embed this for private data. - * @obj: The current object. + * @process_obj: Process the current object * * Note that if this function is part of a ww transaction, and * if returns -EDEADLK for one of the objects, it may be From 9d7fe94d35522115734b3c2c37cad3257c88c43b Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Tue, 2 May 2023 18:37:39 +0300 Subject: [PATCH 068/108] drm/i915/scatterlist: fix kernel-doc Can't document function pointer members as if they are functions. drivers/gpu/drm/i915/i915_scatterlist.h:160: warning: Incorrect use of kernel-doc format: * release() - Free the memory of the struct i915_refct_sgt Signed-off-by: Jani Nikula Reviewed-by: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/a0c099707ec4f1911b14b0f286848a298b2b29e0.1683041799.git.jani.nikula@intel.com --- drivers/gpu/drm/i915/i915_scatterlist.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_scatterlist.h b/drivers/gpu/drm/i915/i915_scatterlist.h index 12c1fd63e9b8..5a10c1a31183 100644 --- a/drivers/gpu/drm/i915/i915_scatterlist.h +++ b/drivers/gpu/drm/i915/i915_scatterlist.h @@ -157,8 +157,7 @@ bool i915_sg_trim(struct sg_table *orig_st); */ struct i915_refct_sgt_ops { /** - * release() - Free the memory of the struct i915_refct_sgt - * @ref: struct kref that is embedded in the struct i915_refct_sgt + * @release: Free the memory of the struct i915_refct_sgt */ void (*release)(struct kref *ref); }; From c6948d8c221a8a61ebeb69567fd1a6c820127b53 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Thu, 4 May 2023 12:23:20 +0300 Subject: [PATCH 069/108] drm/i915/ttm: fix i915_ttm_to_gem() kernel-doc drivers/gpu/drm/i915/gem/i915_gem_ttm.h:50: warning: Function parameter or member 'bo' not described in 'i915_ttm_to_gem' v2: Fix return value documentation while at it (Rodrigo) Signed-off-by: Jani Nikula Reviewed-by: Nirmoy Das Link: https://patchwork.freedesktop.org/patch/msgid/20230504092320.1787627-1-jani.nikula@intel.com --- drivers/gpu/drm/i915/gem/i915_gem_ttm.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm.h b/drivers/gpu/drm/i915/gem/i915_gem_ttm.h index f8f6bed1b297..67347e62e29b 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_ttm.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm.h @@ -42,8 +42,9 @@ static inline bool i915_ttm_is_ghost_object(struct ttm_buffer_object *bo) /** * i915_ttm_to_gem - Convert a struct ttm_buffer_object to an embedding * struct drm_i915_gem_object. + * @bo: Pointer to the ttm buffer object * - * Return: Pointer to the embedding struct ttm_buffer_object. + * Return: Pointer to the embedding struct drm_i915_gem_object. */ static inline struct drm_i915_gem_object * i915_ttm_to_gem(struct ttm_buffer_object *bo) From 83ac5457998ed464032665375dea56da8776a861 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Tue, 2 May 2023 18:37:41 +0300 Subject: [PATCH 070/108] drm/i915: use kernel-doc -Werror when CONFIG_DRM_I915_WERROR=y With CONFIG_DRM_I915_WERROR=y, we enable kernel-doc check for both objects and headers. Now that the kernel-doc warnings have been fixed, also enable kernel-doc -Werror to fail the build on kernel-doc warnings. Signed-off-by: Jani Nikula Reviewed-by: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/68ffcad0e6ff2b6cd70c6df28822f967898ce197.1683041799.git.jani.nikula@intel.com --- drivers/gpu/drm/i915/Makefile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile index 1f9071ee6c83..4494dcd2eb5d 100644 --- a/drivers/gpu/drm/i915/Makefile +++ b/drivers/gpu/drm/i915/Makefile @@ -374,7 +374,7 @@ obj-$(CONFIG_DRM_I915_GVT_KVMGT) += kvmgt.o # # Enable locally for CONFIG_DRM_I915_WERROR=y. See also scripts/Makefile.build ifdef CONFIG_DRM_I915_WERROR - cmd_checkdoc = $(srctree)/scripts/kernel-doc -none $< + cmd_checkdoc = $(srctree)/scripts/kernel-doc -none -Werror $< endif # header test @@ -389,7 +389,7 @@ always-$(CONFIG_DRM_I915_WERROR) += \ quiet_cmd_hdrtest = HDRTEST $(patsubst %.hdrtest,%.h,$@) cmd_hdrtest = $(CC) $(filter-out $(CFLAGS_GCOV), $(c_flags)) -S -o /dev/null -x c /dev/null -include $<; \ - $(srctree)/scripts/kernel-doc -none $<; touch $@ + $(srctree)/scripts/kernel-doc -none -Werror $<; touch $@ $(obj)/%.hdrtest: $(src)/%.h FORCE $(call if_changed_dep,hdrtest) From d41e14fabc16043664f153b89d3d5d26f7069ff7 Mon Sep 17 00:00:00 2001 From: John Harrison Date: Thu, 4 May 2023 13:22:51 -0700 Subject: [PATCH 071/108] drm/i915/uc: Track patch level versions on reduced version firmware files When reduced version firmware files were added (matching major component being the only strict requirement), the minor version was still tracked and a notification reported if it was older. However, the patch version should really be tracked as well for the same reasons. The KMD can work without the change but if the effort has been taken to release a new firmware with the change then there must be a valid reason for doing so - important bug fix, security fix, etc. And in that case it would be good to alert the user if they are missing out on that new fix. v2: Use correct patch version number and drop redunant debug print (review by Daniele / CI results). Signed-off-by: John Harrison Reviewed-by: Daniele Ceraolo Spurio Signed-off-by: Lucas De Marchi Link: https://patchwork.freedesktop.org/patch/msgid/20230504202252.1104212-2-John.C.Harrison@Intel.com --- drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c | 30 +++++++++++++++--------- 1 file changed, 19 insertions(+), 11 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c index c36e68e23a14..d26eef39f332 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c @@ -79,14 +79,14 @@ void intel_uc_fw_change_status(struct intel_uc_fw *uc_fw, * security fixes, etc. to be enabled. */ #define INTEL_GUC_FIRMWARE_DEFS(fw_def, guc_maj, guc_mmp) \ - fw_def(DG2, 0, guc_maj(dg2, 70, 5)) \ - fw_def(ALDERLAKE_P, 0, guc_maj(adlp, 70, 5)) \ + fw_def(DG2, 0, guc_maj(dg2, 70, 5, 1)) \ + fw_def(ALDERLAKE_P, 0, guc_maj(adlp, 70, 5, 1)) \ fw_def(ALDERLAKE_P, 0, guc_mmp(adlp, 70, 1, 1)) \ fw_def(ALDERLAKE_P, 0, guc_mmp(adlp, 69, 0, 3)) \ - fw_def(ALDERLAKE_S, 0, guc_maj(tgl, 70, 5)) \ + fw_def(ALDERLAKE_S, 0, guc_maj(tgl, 70, 5, 1)) \ fw_def(ALDERLAKE_S, 0, guc_mmp(tgl, 70, 1, 1)) \ fw_def(ALDERLAKE_S, 0, guc_mmp(tgl, 69, 0, 3)) \ - fw_def(DG1, 0, guc_maj(dg1, 70, 5)) \ + fw_def(DG1, 0, guc_maj(dg1, 70, 5, 1)) \ fw_def(ROCKETLAKE, 0, guc_mmp(tgl, 70, 1, 1)) \ fw_def(TIGERLAKE, 0, guc_mmp(tgl, 70, 1, 1)) \ fw_def(JASPERLAKE, 0, guc_mmp(ehl, 70, 1, 1)) \ @@ -140,7 +140,7 @@ void intel_uc_fw_change_status(struct intel_uc_fw *uc_fw, __stringify(patch_) ".bin" /* Minor for internal driver use, not part of file name */ -#define MAKE_GUC_FW_PATH_MAJOR(prefix_, major_, minor_) \ +#define MAKE_GUC_FW_PATH_MAJOR(prefix_, major_, minor_, patch_) \ __MAKE_UC_FW_PATH_MAJOR(prefix_, "guc", major_) #define MAKE_GUC_FW_PATH_MMP(prefix_, major_, minor_, patch_) \ @@ -196,9 +196,9 @@ struct __packed uc_fw_blob { { UC_FW_BLOB_BASE(major_, minor_, patch_, path_) \ .legacy = true } -#define GUC_FW_BLOB(prefix_, major_, minor_) \ - UC_FW_BLOB_NEW(major_, minor_, 0, false, \ - MAKE_GUC_FW_PATH_MAJOR(prefix_, major_, minor_)) +#define GUC_FW_BLOB(prefix_, major_, minor_, patch_) \ + UC_FW_BLOB_NEW(major_, minor_, patch_, false, \ + MAKE_GUC_FW_PATH_MAJOR(prefix_, major_, minor_, patch_)) #define GUC_FW_BLOB_MMP(prefix_, major_, minor_, patch_) \ UC_FW_BLOB_OLD(major_, minor_, patch_, \ @@ -295,6 +295,7 @@ __uc_fw_auto_select(struct drm_i915_private *i915, struct intel_uc_fw *uc_fw) uc_fw->file_wanted.path = blob->path; uc_fw->file_wanted.ver.major = blob->major; uc_fw->file_wanted.ver.minor = blob->minor; + uc_fw->file_wanted.ver.patch = blob->patch; uc_fw->loaded_via_gsc = blob->loaded_via_gsc; found = true; break; @@ -793,6 +794,9 @@ int intel_uc_fw_fetch(struct intel_uc_fw *uc_fw) } else { if (uc_fw->file_selected.ver.minor < uc_fw->file_wanted.ver.minor) old_ver = true; + else if ((uc_fw->file_selected.ver.minor == uc_fw->file_wanted.ver.minor) && + (uc_fw->file_selected.ver.patch < uc_fw->file_wanted.ver.patch)) + old_ver = true; } } @@ -800,12 +804,16 @@ int intel_uc_fw_fetch(struct intel_uc_fw *uc_fw) /* Preserve the version that was really wanted */ memcpy(&uc_fw->file_wanted, &file_ideal, sizeof(uc_fw->file_wanted)); - gt_notice(gt, "%s firmware %s (%d.%d) is recommended, but only %s (%d.%d) was found\n", + gt_notice(gt, "%s firmware %s (%d.%d.%d) is recommended, but only %s (%d.%d.%d) was found\n", intel_uc_fw_type_repr(uc_fw->type), uc_fw->file_wanted.path, - uc_fw->file_wanted.ver.major, uc_fw->file_wanted.ver.minor, + uc_fw->file_wanted.ver.major, + uc_fw->file_wanted.ver.minor, + uc_fw->file_wanted.ver.patch, uc_fw->file_selected.path, - uc_fw->file_selected.ver.major, uc_fw->file_selected.ver.minor); + uc_fw->file_selected.ver.major, + uc_fw->file_selected.ver.minor, + uc_fw->file_selected.ver.patch); gt_info(gt, "Consider updating your linux-firmware pkg or downloading from %s\n", INTEL_UC_FIRMWARE_URL); } From e541022b1ce2fe598b90cd96c88cf6fd2f7550d7 Mon Sep 17 00:00:00 2001 From: John Harrison Date: Thu, 4 May 2023 13:22:52 -0700 Subject: [PATCH 072/108] drm/i915/mtl: Define GuC firmware version for MTL First release of GuC for Meteorlake. Signed-off-by: John Harrison Reviewed-by: Lucas De Marchi Signed-off-by: Lucas De Marchi Link: https://patchwork.freedesktop.org/patch/msgid/20230504202252.1104212-3-John.C.Harrison@Intel.com --- drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c index d26eef39f332..10e48cbcf494 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c @@ -79,6 +79,7 @@ void intel_uc_fw_change_status(struct intel_uc_fw *uc_fw, * security fixes, etc. to be enabled. */ #define INTEL_GUC_FIRMWARE_DEFS(fw_def, guc_maj, guc_mmp) \ + fw_def(METEORLAKE, 0, guc_maj(mtl, 70, 6, 6)) \ fw_def(DG2, 0, guc_maj(dg2, 70, 5, 1)) \ fw_def(ALDERLAKE_P, 0, guc_maj(adlp, 70, 5, 1)) \ fw_def(ALDERLAKE_P, 0, guc_mmp(adlp, 70, 1, 1)) \ From 6b8bfff56b2140396fd28088ad2dca4463aef9c9 Mon Sep 17 00:00:00 2001 From: John Harrison Date: Tue, 2 May 2023 16:40:02 -0700 Subject: [PATCH 073/108] drm/i915/guc: Decode another GuC load failure case Explain another potential firmware failure mode and early exit the long wait if hit. Signed-off-by: John Harrison Reviewed-by: Daniele Ceraolo Spurio Link: https://patchwork.freedesktop.org/patch/msgid/20230502234007.1762014-2-John.C.Harrison@Intel.com --- drivers/gpu/drm/i915/gt/uc/abi/guc_errors_abi.h | 1 + drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c | 6 ++++++ 2 files changed, 7 insertions(+) diff --git a/drivers/gpu/drm/i915/gt/uc/abi/guc_errors_abi.h b/drivers/gpu/drm/i915/gt/uc/abi/guc_errors_abi.h index bcb1129b3610..dabeaf4f245f 100644 --- a/drivers/gpu/drm/i915/gt/uc/abi/guc_errors_abi.h +++ b/drivers/gpu/drm/i915/gt/uc/abi/guc_errors_abi.h @@ -44,6 +44,7 @@ enum intel_guc_load_status { enum intel_bootrom_load_status { INTEL_BOOTROM_STATUS_NO_KEY_FOUND = 0x13, INTEL_BOOTROM_STATUS_AES_PROD_KEY_FOUND = 0x1A, + INTEL_BOOTROM_STATUS_PROD_KEY_CHECK_FAILURE = 0x2B, INTEL_BOOTROM_STATUS_RSA_FAILED = 0x50, INTEL_BOOTROM_STATUS_PAVPC_FAILED = 0x73, INTEL_BOOTROM_STATUS_WOPCM_FAILED = 0x74, diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c index 6fda3aec5c66..0ff088a5e51a 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c @@ -129,6 +129,7 @@ static inline bool guc_load_done(struct intel_uncore *uncore, u32 *status, bool case INTEL_BOOTROM_STATUS_RC6CTXCONFIG_FAILED: case INTEL_BOOTROM_STATUS_MPUMAP_INCORRECT: case INTEL_BOOTROM_STATUS_EXCEPTION: + case INTEL_BOOTROM_STATUS_PROD_KEY_CHECK_FAILURE: *success = false; return true; } @@ -219,6 +220,11 @@ static int guc_wait_ucode(struct intel_guc *guc) guc_info(guc, "firmware signature verification failed\n"); ret = -ENOEXEC; break; + + case INTEL_BOOTROM_STATUS_PROD_KEY_CHECK_FAILURE: + guc_info(guc, "firmware production part check failure\n"); + ret = -ENOEXEC; + break; } switch (ukernel) { From c354feb5895fe80fd3f896443015cf53d5cf98e8 Mon Sep 17 00:00:00 2001 From: John Harrison Date: Tue, 2 May 2023 16:40:03 -0700 Subject: [PATCH 074/108] drm/i915/guc: Print status register when waiting for GuC to load If the GuC load is taking an excessively long time, the wait loop currently prints the GT frequency. Extend that to include the GuC status as well so we can see if the GuC is actually making progress or not. Signed-off-by: John Harrison Reviewed-by: Daniele Ceraolo Spurio Link: https://patchwork.freedesktop.org/patch/msgid/20230502234007.1762014-3-John.C.Harrison@Intel.com --- drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c index 0ff088a5e51a..364d0d546ec8 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c @@ -191,8 +191,10 @@ static int guc_wait_ucode(struct intel_guc *guc) if (!ret || !success) break; - guc_dbg(guc, "load still in progress, count = %d, freq = %dMHz\n", - count, intel_rps_read_actual_frequency(&uncore->gt->rps)); + guc_dbg(guc, "load still in progress, count = %d, freq = %dMHz, status = 0x%08X [0x%02X/%02X]\n", + count, intel_rps_read_actual_frequency(&uncore->gt->rps), status, + REG_FIELD_GET(GS_BOOTROM_MASK, status), + REG_FIELD_GET(GS_UKERNEL_MASK, status)); } after = ktime_get(); delta = ktime_sub(after, before); From edba77891b58a1f0626daf8598717b8efb307bc8 Mon Sep 17 00:00:00 2001 From: John Harrison Date: Tue, 2 May 2023 16:40:05 -0700 Subject: [PATCH 075/108] drm/i915/uc: Enhancements to firmware table validation The validation of the firmware table was being done inside the code for scanning the table for the next available firmware blob. Which is unnecessary. So pull it out into a separate function that is only called once per blob type at init time. Also, drop the CONFIG_SELFTEST requirement and make errors terminal. It was mentioned that potential issues with backports would not be caught by regular pre-merge CI as that only occurs on tip not stable branches. Making the validation unconditional and failing driver load on detecting of a problem ensures that such backports will also be validated correctly. This requires adding a firmware global flag to indicate an issue with any of the per firmware tables. This is done rather than adding a new state enum as a new enum value would be a much more invasive change - lots of places would need updating to support the new error state. Note also that this change means that a table error will cause the driver to wedge even on platforms that don't require firmware files. This is intentional as per the above backport concern - someone doing backports is not guaranteed to test on every platform that they may potential affect. So forcing a failure on all platforms ensures that the problem will be noticed and corrected immediately. v2: Change to unconditionally fail module load on a validation error (review feedback/discussion with Daniele). v3: Add a new flag to track table validation errors (review feedback/discussion with Daniele). Signed-off-by: John Harrison Reviewed-by: Daniele Ceraolo Spurio Link: https://patchwork.freedesktop.org/patch/msgid/20230502234007.1762014-5-John.C.Harrison@Intel.com --- drivers/gpu/drm/i915/gt/uc/intel_uc.c | 3 + drivers/gpu/drm/i915/gt/uc/intel_uc.h | 1 + drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c | 167 +++++++++++++---------- 3 files changed, 102 insertions(+), 69 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc.c b/drivers/gpu/drm/i915/gt/uc/intel_uc.c index 996168312340..1381943b8973 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_uc.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_uc.c @@ -432,6 +432,9 @@ static bool uc_is_wopcm_locked(struct intel_uc *uc) static int __uc_check_hw(struct intel_uc *uc) { + if (uc->fw_table_invalid) + return -EIO; + if (!intel_uc_supports_guc(uc)) return 0; diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc.h b/drivers/gpu/drm/i915/gt/uc/intel_uc.h index 5d0f1bcc381e..d585524d94de 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_uc.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_uc.h @@ -36,6 +36,7 @@ struct intel_uc { struct drm_i915_gem_object *load_err_log; bool reset_in_progress; + bool fw_table_invalid; }; void intel_uc_init_early(struct intel_uc *uc); diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c index 10e48cbcf494..b9ab370ce984 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c @@ -233,20 +233,22 @@ struct fw_blobs_by_type { u32 count; }; +static const struct uc_fw_platform_requirement blobs_guc[] = { + INTEL_GUC_FIRMWARE_DEFS(MAKE_FW_LIST, GUC_FW_BLOB, GUC_FW_BLOB_MMP) +}; + +static const struct uc_fw_platform_requirement blobs_huc[] = { + INTEL_HUC_FIRMWARE_DEFS(MAKE_FW_LIST, HUC_FW_BLOB, HUC_FW_BLOB_MMP, HUC_FW_BLOB_GSC) +}; + +static const struct fw_blobs_by_type blobs_all[INTEL_UC_FW_NUM_TYPES] = { + [INTEL_UC_FW_TYPE_GUC] = { blobs_guc, ARRAY_SIZE(blobs_guc) }, + [INTEL_UC_FW_TYPE_HUC] = { blobs_huc, ARRAY_SIZE(blobs_huc) }, +}; + static void __uc_fw_auto_select(struct drm_i915_private *i915, struct intel_uc_fw *uc_fw) { - static const struct uc_fw_platform_requirement blobs_guc[] = { - INTEL_GUC_FIRMWARE_DEFS(MAKE_FW_LIST, GUC_FW_BLOB, GUC_FW_BLOB_MMP) - }; - static const struct uc_fw_platform_requirement blobs_huc[] = { - INTEL_HUC_FIRMWARE_DEFS(MAKE_FW_LIST, HUC_FW_BLOB, HUC_FW_BLOB_MMP, HUC_FW_BLOB_GSC) - }; - static const struct fw_blobs_by_type blobs_all[INTEL_UC_FW_NUM_TYPES] = { - [INTEL_UC_FW_TYPE_GUC] = { blobs_guc, ARRAY_SIZE(blobs_guc) }, - [INTEL_UC_FW_TYPE_HUC] = { blobs_huc, ARRAY_SIZE(blobs_huc) }, - }; - static bool verified[INTEL_UC_FW_NUM_TYPES]; const struct uc_fw_platform_requirement *fw_blobs; enum intel_platform p = INTEL_INFO(i915)->platform; u32 fw_count; @@ -286,6 +288,11 @@ __uc_fw_auto_select(struct drm_i915_private *i915, struct intel_uc_fw *uc_fw) continue; if (uc_fw->file_selected.path) { + /* + * Continuing an earlier search after a found blob failed to load. + * Once the previously chosen path has been found, clear it out + * and let the search continue from there. + */ if (uc_fw->file_selected.path == blob->path) uc_fw->file_selected.path = NULL; @@ -306,76 +313,91 @@ __uc_fw_auto_select(struct drm_i915_private *i915, struct intel_uc_fw *uc_fw) /* Failed to find a match for the last attempt?! */ uc_fw->file_selected.path = NULL; } +} + +static bool validate_fw_table_type(struct drm_i915_private *i915, enum intel_uc_fw_type type) +{ + const struct uc_fw_platform_requirement *fw_blobs; + u32 fw_count; + int i; + + if (type >= ARRAY_SIZE(blobs_all)) { + drm_err(&i915->drm, "No blob array for %s\n", intel_uc_fw_type_repr(type)); + return false; + } + + fw_blobs = blobs_all[type].blobs; + fw_count = blobs_all[type].count; + + if (!fw_count) + return true; /* make sure the list is ordered as expected */ - if (IS_ENABLED(CONFIG_DRM_I915_SELFTEST) && !verified[uc_fw->type]) { - verified[uc_fw->type] = true; + for (i = 1; i < fw_count; i++) { + /* Next platform is good: */ + if (fw_blobs[i].p < fw_blobs[i - 1].p) + continue; - for (i = 1; i < fw_count; i++) { - /* Next platform is good: */ - if (fw_blobs[i].p < fw_blobs[i - 1].p) + /* Next platform revision is good: */ + if (fw_blobs[i].p == fw_blobs[i - 1].p && + fw_blobs[i].rev < fw_blobs[i - 1].rev) + continue; + + /* Platform/revision must be in order: */ + if (fw_blobs[i].p != fw_blobs[i - 1].p || + fw_blobs[i].rev != fw_blobs[i - 1].rev) + goto bad; + + /* Next major version is good: */ + if (fw_blobs[i].blob.major < fw_blobs[i - 1].blob.major) + continue; + + /* New must be before legacy: */ + if (!fw_blobs[i].blob.legacy && fw_blobs[i - 1].blob.legacy) + goto bad; + + /* New to legacy also means 0.0 to X.Y (HuC), or X.0 to X.Y (GuC) */ + if (fw_blobs[i].blob.legacy && !fw_blobs[i - 1].blob.legacy) { + if (!fw_blobs[i - 1].blob.major) continue; - /* Next platform revision is good: */ - if (fw_blobs[i].p == fw_blobs[i - 1].p && - fw_blobs[i].rev < fw_blobs[i - 1].rev) + if (fw_blobs[i].blob.major == fw_blobs[i - 1].blob.major) continue; + } - /* Platform/revision must be in order: */ - if (fw_blobs[i].p != fw_blobs[i - 1].p || - fw_blobs[i].rev != fw_blobs[i - 1].rev) - goto bad; + /* Major versions must be in order: */ + if (fw_blobs[i].blob.major != fw_blobs[i - 1].blob.major) + goto bad; - /* Next major version is good: */ - if (fw_blobs[i].blob.major < fw_blobs[i - 1].blob.major) - continue; + /* Next minor version is good: */ + if (fw_blobs[i].blob.minor < fw_blobs[i - 1].blob.minor) + continue; - /* New must be before legacy: */ - if (!fw_blobs[i].blob.legacy && fw_blobs[i - 1].blob.legacy) - goto bad; + /* Minor versions must be in order: */ + if (fw_blobs[i].blob.minor != fw_blobs[i - 1].blob.minor) + goto bad; - /* New to legacy also means 0.0 to X.Y (HuC), or X.0 to X.Y (GuC) */ - if (fw_blobs[i].blob.legacy && !fw_blobs[i - 1].blob.legacy) { - if (!fw_blobs[i - 1].blob.major) - continue; - - if (fw_blobs[i].blob.major == fw_blobs[i - 1].blob.major) - continue; - } - - /* Major versions must be in order: */ - if (fw_blobs[i].blob.major != fw_blobs[i - 1].blob.major) - goto bad; - - /* Next minor version is good: */ - if (fw_blobs[i].blob.minor < fw_blobs[i - 1].blob.minor) - continue; - - /* Minor versions must be in order: */ - if (fw_blobs[i].blob.minor != fw_blobs[i - 1].blob.minor) - goto bad; - - /* Patch versions must be in order: */ - if (fw_blobs[i].blob.patch <= fw_blobs[i - 1].blob.patch) - continue; + /* Patch versions must be in order: */ + if (fw_blobs[i].blob.patch <= fw_blobs[i - 1].blob.patch) + continue; bad: - drm_err(&i915->drm, "Invalid %s blob order: %s r%u %s%d.%d.%d comes before %s r%u %s%d.%d.%d\n", - intel_uc_fw_type_repr(uc_fw->type), - intel_platform_name(fw_blobs[i - 1].p), fw_blobs[i - 1].rev, - fw_blobs[i - 1].blob.legacy ? "L" : "v", - fw_blobs[i - 1].blob.major, - fw_blobs[i - 1].blob.minor, - fw_blobs[i - 1].blob.patch, - intel_platform_name(fw_blobs[i].p), fw_blobs[i].rev, - fw_blobs[i].blob.legacy ? "L" : "v", - fw_blobs[i].blob.major, - fw_blobs[i].blob.minor, - fw_blobs[i].blob.patch); - - uc_fw->file_selected.path = NULL; - } + drm_err(&i915->drm, "Invalid %s blob order: %s r%u %s%d.%d.%d comes before %s r%u %s%d.%d.%d\n", + intel_uc_fw_type_repr(type), + intel_platform_name(fw_blobs[i - 1].p), fw_blobs[i - 1].rev, + fw_blobs[i - 1].blob.legacy ? "L" : "v", + fw_blobs[i - 1].blob.major, + fw_blobs[i - 1].blob.minor, + fw_blobs[i - 1].blob.patch, + intel_platform_name(fw_blobs[i].p), fw_blobs[i].rev, + fw_blobs[i].blob.legacy ? "L" : "v", + fw_blobs[i].blob.major, + fw_blobs[i].blob.minor, + fw_blobs[i].blob.patch); + return false; } + + return true; } static const char *__override_guc_firmware_path(struct drm_i915_private *i915) @@ -430,7 +452,8 @@ static void __uc_fw_user_override(struct drm_i915_private *i915, struct intel_uc void intel_uc_fw_init_early(struct intel_uc_fw *uc_fw, enum intel_uc_fw_type type) { - struct drm_i915_private *i915 = ____uc_fw_to_gt(uc_fw, type)->i915; + struct intel_gt *gt = ____uc_fw_to_gt(uc_fw, type); + struct drm_i915_private *i915 = gt->i915; /* * we use FIRMWARE_UNINITIALIZED to detect checks against uc_fw->status @@ -443,6 +466,12 @@ void intel_uc_fw_init_early(struct intel_uc_fw *uc_fw, uc_fw->type = type; if (HAS_GT_UC(i915)) { + if (!validate_fw_table_type(i915, type)) { + gt->uc.fw_table_invalid = true; + intel_uc_fw_change_status(uc_fw, INTEL_UC_FIRMWARE_NOT_SUPPORTED); + return; + } + __uc_fw_auto_select(i915, uc_fw); __uc_fw_user_override(i915, uc_fw); } From bef550c952a71df6a6e86b11bdac5b10dac29163 Mon Sep 17 00:00:00 2001 From: John Harrison Date: Tue, 2 May 2023 16:40:06 -0700 Subject: [PATCH 076/108] drm/i915/uc: Reject duplicate entries in firmware table It was noticed that duplicate entries in the firmware table could cause an infinite loop in the firmware loading code if that entry failed to load. Duplicate entries are a bug anyway and so should never happen. Ensure they don't by tweaking the table validation code to reject duplicates. For full m/m/p files, that can be done by simply tweaking the patch level check to reject matching values. For reduced version entries, the filename itself must be compared. v2: Improve comment (review by Daniele) Signed-off-by: John Harrison Reviewed-by: Daniele Ceraolo Spurio Link: https://patchwork.freedesktop.org/patch/msgid/20230502234007.1762014-6-John.C.Harrison@Intel.com --- drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c | 26 +++++++++++++++++++++--- 1 file changed, 23 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c index b9ab370ce984..63e0049411d2 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c @@ -319,7 +319,7 @@ static bool validate_fw_table_type(struct drm_i915_private *i915, enum intel_uc_ { const struct uc_fw_platform_requirement *fw_blobs; u32 fw_count; - int i; + int i, j; if (type >= ARRAY_SIZE(blobs_all)) { drm_err(&i915->drm, "No blob array for %s\n", intel_uc_fw_type_repr(type)); @@ -334,6 +334,26 @@ static bool validate_fw_table_type(struct drm_i915_private *i915, enum intel_uc_ /* make sure the list is ordered as expected */ for (i = 1; i < fw_count; i++) { + /* Versionless file names must be unique per platform: */ + for (j = i + 1; j < fw_count; j++) { + /* Same platform? */ + if (fw_blobs[i].p != fw_blobs[j].p) + continue; + + if (fw_blobs[i].blob.path != fw_blobs[j].blob.path) + continue; + + drm_err(&i915->drm, "Duplicate %s blobs: %s r%u %s%d.%d.%d [%s] matches %s%d.%d.%d [%s]\n", + intel_uc_fw_type_repr(type), + intel_platform_name(fw_blobs[j].p), fw_blobs[j].rev, + fw_blobs[j].blob.legacy ? "L" : "v", + fw_blobs[j].blob.major, fw_blobs[j].blob.minor, + fw_blobs[j].blob.patch, fw_blobs[j].blob.path, + fw_blobs[i].blob.legacy ? "L" : "v", + fw_blobs[i].blob.major, fw_blobs[i].blob.minor, + fw_blobs[i].blob.patch, fw_blobs[i].blob.path); + } + /* Next platform is good: */ if (fw_blobs[i].p < fw_blobs[i - 1].p) continue; @@ -377,8 +397,8 @@ static bool validate_fw_table_type(struct drm_i915_private *i915, enum intel_uc_ if (fw_blobs[i].blob.minor != fw_blobs[i - 1].blob.minor) goto bad; - /* Patch versions must be in order: */ - if (fw_blobs[i].blob.patch <= fw_blobs[i - 1].blob.patch) + /* Patch versions must be in order and unique: */ + if (fw_blobs[i].blob.patch < fw_blobs[i - 1].blob.patch) continue; bad: From 760133d42f0adc92b8408ad5544c98f8aefbc75f Mon Sep 17 00:00:00 2001 From: John Harrison Date: Tue, 2 May 2023 16:40:07 -0700 Subject: [PATCH 077/108] drm/i915/uc: Make unexpected firmware versions an error in debug builds If the DEBUG_GEM config option is set then escalate the 'unexpected firmware version' message from a notice to an error. This will ensure that the CI system treats such occurences as a failure and logs a bug about it (or fails the pre-merge testing). Signed-off-by: John Harrison Reviewed-by: Daniele Ceraolo Spurio Link: https://patchwork.freedesktop.org/patch/msgid/20230502234007.1762014-7-John.C.Harrison@Intel.com --- drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c | 34 ++++++++++++++---------- 1 file changed, 20 insertions(+), 14 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c index 63e0049411d2..4ec7df9ed5ff 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c @@ -17,6 +17,12 @@ #include "i915_drv.h" #include "i915_reg.h" +#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM) +#define UNEXPECTED gt_err +#else +#define UNEXPECTED gt_notice +#endif + static inline struct intel_gt * ____uc_fw_to_gt(struct intel_uc_fw *uc_fw, enum intel_uc_fw_type type) { @@ -833,10 +839,10 @@ int intel_uc_fw_fetch(struct intel_uc_fw *uc_fw) if (uc_fw->file_wanted.ver.major && uc_fw->file_selected.ver.major) { /* Check the file's major version was as it claimed */ if (uc_fw->file_selected.ver.major != uc_fw->file_wanted.ver.major) { - gt_notice(gt, "%s firmware %s: unexpected version: %u.%u != %u.%u\n", - intel_uc_fw_type_repr(uc_fw->type), uc_fw->file_selected.path, - uc_fw->file_selected.ver.major, uc_fw->file_selected.ver.minor, - uc_fw->file_wanted.ver.major, uc_fw->file_wanted.ver.minor); + UNEXPECTED(gt, "%s firmware %s: unexpected version: %u.%u != %u.%u\n", + intel_uc_fw_type_repr(uc_fw->type), uc_fw->file_selected.path, + uc_fw->file_selected.ver.major, uc_fw->file_selected.ver.minor, + uc_fw->file_wanted.ver.major, uc_fw->file_wanted.ver.minor); if (!intel_uc_fw_is_overridden(uc_fw)) { err = -ENOEXEC; goto fail; @@ -854,16 +860,16 @@ int intel_uc_fw_fetch(struct intel_uc_fw *uc_fw) /* Preserve the version that was really wanted */ memcpy(&uc_fw->file_wanted, &file_ideal, sizeof(uc_fw->file_wanted)); - gt_notice(gt, "%s firmware %s (%d.%d.%d) is recommended, but only %s (%d.%d.%d) was found\n", - intel_uc_fw_type_repr(uc_fw->type), - uc_fw->file_wanted.path, - uc_fw->file_wanted.ver.major, - uc_fw->file_wanted.ver.minor, - uc_fw->file_wanted.ver.patch, - uc_fw->file_selected.path, - uc_fw->file_selected.ver.major, - uc_fw->file_selected.ver.minor, - uc_fw->file_selected.ver.patch); + UNEXPECTED(gt, "%s firmware %s (%d.%d.%d) is recommended, but only %s (%d.%d.%d) was found\n", + intel_uc_fw_type_repr(uc_fw->type), + uc_fw->file_wanted.path, + uc_fw->file_wanted.ver.major, + uc_fw->file_wanted.ver.minor, + uc_fw->file_wanted.ver.patch, + uc_fw->file_selected.path, + uc_fw->file_selected.ver.major, + uc_fw->file_selected.ver.minor, + uc_fw->file_selected.ver.patch); gt_info(gt, "Consider updating your linux-firmware pkg or downloading from %s\n", INTEL_UC_FIRMWARE_URL); } From 00b9dd061eb31c8619878f187e509eabbbdb681a Mon Sep 17 00:00:00 2001 From: Pallavi Mishra Date: Fri, 5 May 2023 16:40:04 +0200 Subject: [PATCH 078/108] drm/i915/mtl: Drop FLAT CCS check Remove FLAT CCS check from XY_FAST_COLOR_BLT usage, thus enabling MTL to use it. Signed-off-by: Pallavi Mishra Signed-off-by: Juha-Pekka Heikkila Reviewed-by: Nirmoy Das Reviewed-by: Andrzej Hajda Reviewed-by: Andi Shyti Signed-off-by: Nirmoy Das Link: https://patchwork.freedesktop.org/patch/msgid/20230505144005.23480-1-nirmoy.das@intel.com --- drivers/gpu/drm/i915/gt/intel_migrate.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_migrate.c b/drivers/gpu/drm/i915/gt/intel_migrate.c index 3f638f198796..e0998879a0e1 100644 --- a/drivers/gpu/drm/i915/gt/intel_migrate.c +++ b/drivers/gpu/drm/i915/gt/intel_migrate.c @@ -920,7 +920,7 @@ static int emit_clear(struct i915_request *rq, u32 offset, int size, GEM_BUG_ON(size >> PAGE_SHIFT > S16_MAX); - if (HAS_FLAT_CCS(i915) && ver >= 12) + if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 50)) ring_sz = XY_FAST_COLOR_BLT_DW; else if (ver >= 8) ring_sz = 8; @@ -931,7 +931,7 @@ static int emit_clear(struct i915_request *rq, u32 offset, int size, if (IS_ERR(cs)) return PTR_ERR(cs); - if (HAS_FLAT_CCS(i915) && ver >= 12) { + if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 50)) { *cs++ = XY_FAST_COLOR_BLT_CMD | XY_FAST_COLOR_BLT_DEPTH_32 | (XY_FAST_COLOR_BLT_DW - 2); *cs++ = FIELD_PREP(XY_FAST_COLOR_BLT_MOCS_MASK, mocs) | From 0da6bfe857ea9399498876cbe6ef428637b6e475 Mon Sep 17 00:00:00 2001 From: Clint Taylor Date: Fri, 5 May 2023 16:40:05 +0200 Subject: [PATCH 079/108] drm/i915/mtl: Add MTL for remapping CCS FBs Add support for remapping CCS FBs on MTL to remove the restriction of the power-of-two sized stride and the 2MB surface offset alignment for these FBs. Signed-off-by: Clint Taylor Signed-off-by: Juha-Pekka Heikkila Reviewed-by: Radhakrishna Sripada Signed-off-by: Nirmoy Das Link: https://patchwork.freedesktop.org/patch/msgid/20230505144005.23480-2-nirmoy.das@intel.com --- drivers/gpu/drm/i915/display/intel_fb.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_fb.c b/drivers/gpu/drm/i915/display/intel_fb.c index e5f637897b5e..c004f08fcfe1 100644 --- a/drivers/gpu/drm/i915/display/intel_fb.c +++ b/drivers/gpu/drm/i915/display/intel_fb.c @@ -1190,7 +1190,8 @@ bool intel_fb_needs_pot_stride_remap(const struct intel_framebuffer *fb) { struct drm_i915_private *i915 = to_i915(fb->base.dev); - return IS_ALDERLAKE_P(i915) && intel_fb_uses_dpt(&fb->base); + return (IS_ALDERLAKE_P(i915) || DISPLAY_VER(i915) >= 14) && + intel_fb_uses_dpt(&fb->base); } static int intel_fb_pitch(const struct intel_framebuffer *fb, int color_plane, unsigned int rotation) @@ -1326,9 +1327,10 @@ plane_view_scanout_stride(const struct intel_framebuffer *fb, int color_plane, unsigned int tile_width, unsigned int src_stride_tiles, unsigned int dst_stride_tiles) { + struct drm_i915_private *i915 = to_i915(fb->base.dev); unsigned int stride_tiles; - if (IS_ALDERLAKE_P(to_i915(fb->base.dev))) + if (IS_ALDERLAKE_P(i915) || DISPLAY_VER(i915) >= 14) stride_tiles = src_stride_tiles; else stride_tiles = dst_stride_tiles; @@ -1522,7 +1524,8 @@ static void intel_fb_view_init(struct drm_i915_private *i915, struct intel_fb_vi memset(view, 0, sizeof(*view)); view->gtt.type = view_type; - if (view_type == I915_GTT_VIEW_REMAPPED && IS_ALDERLAKE_P(i915)) + if (view_type == I915_GTT_VIEW_REMAPPED && + (IS_ALDERLAKE_P(i915) || DISPLAY_VER(i915) >= 14)) view->gtt.remapped.plane_alignment = SZ_2M / PAGE_SIZE; } From 5b8ff071b57e0a50a0e6eb8db55d159a68853f6b Mon Sep 17 00:00:00 2001 From: Radhakrishna Sripada Date: Fri, 5 May 2023 16:45:44 -0700 Subject: [PATCH 080/108] drm/i915/mtl: Fix the wa number for Wa_22016670082 Fixes the right lineage number for the workaround. Fixes: a7fa1537b791 ("drm/i915/mtl: Implement Wa_14019141245") Cc: Matt Roper Signed-off-by: Radhakrishna Sripada Reviewed-by: Matt Roper Reviewed-by: Tejas Upadhyay Link: https://patchwork.freedesktop.org/patch/msgid/20230505234544.4029535-1-radhakrishna.sripada@intel.com --- drivers/gpu/drm/i915/gt/intel_workarounds.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c index ad9e7f49a6fa..786349e95487 100644 --- a/drivers/gpu/drm/i915/gt/intel_workarounds.c +++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c @@ -1699,7 +1699,7 @@ xelpg_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal) wa_mcr_write_or(wal, RENDER_MOD_CTRL, FORCE_MISS_FTLB); wa_mcr_write_or(wal, COMP_MOD_CTRL, FORCE_MISS_FTLB); - /* Wa_14019141245 */ + /* Wa_22016670082 */ wa_write_or(wal, GEN12_SQCNT1, GEN12_STRICT_RAR_ENABLE); if (IS_MTL_GRAPHICS_STEP(gt->i915, M, STEP_A0, STEP_B0) || From 5e352e32aec23570ea948f039e25faf9b9ba362b Mon Sep 17 00:00:00 2001 From: Fei Yang Date: Tue, 9 May 2023 09:51:59 -0700 Subject: [PATCH 081/108] drm/i915: preparation for using PAT index This patch is a preparation for replacing enum i915_cache_level with PAT index. Caching policy for buffer objects is set through the PAT index in PTE, the old i915_cache_level is not sufficient to represent all caching modes supported by the hardware. Preparing the transition by adding some platform dependent data structures and helper functions to translate the cache_level to pat_index. cachelevel_to_pat: a platform dependent array mapping cache_level to pat_index. max_pat_index: the maximum PAT index recommended in hardware specification Needed for validating the PAT index passed in from user space. i915_gem_get_pat_index: function to convert cache_level to PAT index. obj_to_i915(obj): macro moved to header file for wider usage. I915_MAX_CACHE_LEVEL: upper bound of i915_cache_level for the convenience of coding. Cc: Chris Wilson Cc: Matt Roper Signed-off-by: Fei Yang Reviewed-by: Andi Shyti Reviewed-by: Andrzej Hajda Signed-off-by: Andi Shyti Link: https://patchwork.freedesktop.org/patch/msgid/20230509165200.1740-2-fei.yang@intel.com --- drivers/gpu/drm/i915/gem/i915_gem_object.c | 9 +++ drivers/gpu/drm/i915/gem/i915_gem_object.h | 4 + .../gpu/drm/i915/gem/i915_gem_object_types.h | 7 ++ drivers/gpu/drm/i915/gem/i915_gem_shrinker.c | 2 - drivers/gpu/drm/i915/gt/gen8_ppgtt.c | 6 ++ drivers/gpu/drm/i915/gt/intel_ggtt.c | 6 ++ drivers/gpu/drm/i915/i915_pci.c | 79 ++++++++++++++++--- drivers/gpu/drm/i915/intel_device_info.h | 5 ++ .../gpu/drm/i915/selftests/mock_gem_device.c | 9 +++ 9 files changed, 116 insertions(+), 11 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.c b/drivers/gpu/drm/i915/gem/i915_gem_object.c index 4666bb82f312..8c70a0ec7d2f 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.c @@ -45,6 +45,15 @@ static struct kmem_cache *slab_objects; static const struct drm_gem_object_funcs i915_gem_object_funcs; +unsigned int i915_gem_get_pat_index(struct drm_i915_private *i915, + enum i915_cache_level level) +{ + if (drm_WARN_ON(&i915->drm, level >= I915_MAX_CACHE_LEVEL)) + return 0; + + return INTEL_INFO(i915)->cachelevel_to_pat[level]; +} + struct drm_i915_gem_object *i915_gem_object_alloc(void) { struct drm_i915_gem_object *obj; diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.h b/drivers/gpu/drm/i915/gem/i915_gem_object.h index bc1291887d4f..284e1aa396cd 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.h @@ -20,6 +20,8 @@ enum intel_region_id; +#define obj_to_i915(obj__) to_i915((obj__)->base.dev) + static inline bool i915_gem_object_size_2big(u64 size) { struct drm_i915_gem_object *obj; @@ -30,6 +32,8 @@ static inline bool i915_gem_object_size_2big(u64 size) return false; } +unsigned int i915_gem_get_pat_index(struct drm_i915_private *i915, + enum i915_cache_level level); void i915_gem_init__objects(struct drm_i915_private *i915); void i915_objects_module_exit(void); diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h index 830c11431ee8..bf0bd8e11355 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h @@ -194,6 +194,13 @@ enum i915_cache_level { * engine. */ I915_CACHE_WT, + /** + * @I915_MAX_CACHE_LEVEL: + * + * Mark the last entry in the enum. Used for defining cachelevel_to_pat + * array for cache_level to pat translation table. + */ + I915_MAX_CACHE_LEVEL, }; enum i915_map_type { diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c b/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c index b1672e054b21..214763942aa2 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c @@ -460,8 +460,6 @@ void i915_gem_shrinker_taints_mutex(struct drm_i915_private *i915, fs_reclaim_release(GFP_KERNEL); } -#define obj_to_i915(obj__) to_i915((obj__)->base.dev) - /** * i915_gem_object_make_unshrinkable - Hide the object from the shrinker. By * default all object types that support shrinking(see IS_SHRINKABLE), will also diff --git a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c index 22ec1566d2a7..bb6998d67133 100644 --- a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c +++ b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c @@ -78,6 +78,12 @@ static u64 mtl_pte_encode(dma_addr_t addr, case I915_CACHE_WT: pte |= GEN12_PPGTT_PTE_PAT0; break; + default: + /* This should never happen. Added to deal with the compile + * error due to the addition of I915_MAX_CACHE_LEVEL. Will + * be removed by the pat_index patch. + */ + break; } return pte; diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt.c b/drivers/gpu/drm/i915/gt/intel_ggtt.c index 20915edc8bd9..c8390d03fce2 100644 --- a/drivers/gpu/drm/i915/gt/intel_ggtt.c +++ b/drivers/gpu/drm/i915/gt/intel_ggtt.c @@ -242,6 +242,12 @@ static u64 mtl_ggtt_pte_encode(dma_addr_t addr, case I915_CACHE_WT: pte |= MTL_GGTT_PTE_PAT0; break; + default: + /* This should never happen. Added to deal with the compile + * error due to the addition of I915_MAX_CACHE_LEVEL. Will + * be removed by the pat_index patch. + */ + break; } return pte; diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c index 025d32c0b161..1a3247451d0f 100644 --- a/drivers/gpu/drm/i915/i915_pci.c +++ b/drivers/gpu/drm/i915/i915_pci.c @@ -29,6 +29,7 @@ #include "display/intel_display.h" #include "gt/intel_gt_regs.h" #include "gt/intel_sa_media.h" +#include "gem/i915_gem_object_types.h" #include "i915_driver.h" #include "i915_drv.h" @@ -163,6 +164,38 @@ .gamma_lut_tests = DRM_COLOR_LUT_NON_DECREASING, \ } +#define LEGACY_CACHELEVEL \ + .cachelevel_to_pat = { \ + [I915_CACHE_NONE] = 0, \ + [I915_CACHE_LLC] = 1, \ + [I915_CACHE_L3_LLC] = 2, \ + [I915_CACHE_WT] = 3, \ + } + +#define TGL_CACHELEVEL \ + .cachelevel_to_pat = { \ + [I915_CACHE_NONE] = 3, \ + [I915_CACHE_LLC] = 0, \ + [I915_CACHE_L3_LLC] = 0, \ + [I915_CACHE_WT] = 2, \ + } + +#define PVC_CACHELEVEL \ + .cachelevel_to_pat = { \ + [I915_CACHE_NONE] = 0, \ + [I915_CACHE_LLC] = 3, \ + [I915_CACHE_L3_LLC] = 3, \ + [I915_CACHE_WT] = 2, \ + } + +#define MTL_CACHELEVEL \ + .cachelevel_to_pat = { \ + [I915_CACHE_NONE] = 2, \ + [I915_CACHE_LLC] = 3, \ + [I915_CACHE_L3_LLC] = 3, \ + [I915_CACHE_WT] = 1, \ + } + /* Keep in gen based order, and chronological order within a gen */ #define GEN_DEFAULT_PAGE_SIZES \ @@ -188,11 +221,13 @@ .has_snoop = true, \ .has_coherent_ggtt = false, \ .dma_mask_size = 32, \ + .max_pat_index = 3, \ I9XX_PIPE_OFFSETS, \ I9XX_CURSOR_OFFSETS, \ I9XX_COLORS, \ GEN_DEFAULT_PAGE_SIZES, \ - GEN_DEFAULT_REGIONS + GEN_DEFAULT_REGIONS, \ + LEGACY_CACHELEVEL #define I845_FEATURES \ GEN(2), \ @@ -209,11 +244,13 @@ .has_snoop = true, \ .has_coherent_ggtt = false, \ .dma_mask_size = 32, \ + .max_pat_index = 3, \ I845_PIPE_OFFSETS, \ I845_CURSOR_OFFSETS, \ I845_COLORS, \ GEN_DEFAULT_PAGE_SIZES, \ - GEN_DEFAULT_REGIONS + GEN_DEFAULT_REGIONS, \ + LEGACY_CACHELEVEL static const struct intel_device_info i830_info = { I830_FEATURES, @@ -248,11 +285,13 @@ static const struct intel_device_info i865g_info = { .has_snoop = true, \ .has_coherent_ggtt = true, \ .dma_mask_size = 32, \ + .max_pat_index = 3, \ I9XX_PIPE_OFFSETS, \ I9XX_CURSOR_OFFSETS, \ I9XX_COLORS, \ GEN_DEFAULT_PAGE_SIZES, \ - GEN_DEFAULT_REGIONS + GEN_DEFAULT_REGIONS, \ + LEGACY_CACHELEVEL static const struct intel_device_info i915g_info = { GEN3_FEATURES, @@ -340,11 +379,13 @@ static const struct intel_device_info pnv_m_info = { .has_snoop = true, \ .has_coherent_ggtt = true, \ .dma_mask_size = 36, \ + .max_pat_index = 3, \ I9XX_PIPE_OFFSETS, \ I9XX_CURSOR_OFFSETS, \ I9XX_COLORS, \ GEN_DEFAULT_PAGE_SIZES, \ - GEN_DEFAULT_REGIONS + GEN_DEFAULT_REGIONS, \ + LEGACY_CACHELEVEL static const struct intel_device_info i965g_info = { GEN4_FEATURES, @@ -394,11 +435,13 @@ static const struct intel_device_info gm45_info = { /* ilk does support rc6, but we do not implement [power] contexts */ \ .has_rc6 = 0, \ .dma_mask_size = 36, \ + .max_pat_index = 3, \ I9XX_PIPE_OFFSETS, \ I9XX_CURSOR_OFFSETS, \ ILK_COLORS, \ GEN_DEFAULT_PAGE_SIZES, \ - GEN_DEFAULT_REGIONS + GEN_DEFAULT_REGIONS, \ + LEGACY_CACHELEVEL static const struct intel_device_info ilk_d_info = { GEN5_FEATURES, @@ -428,13 +471,15 @@ static const struct intel_device_info ilk_m_info = { .has_rc6p = 0, \ .has_rps = true, \ .dma_mask_size = 40, \ + .max_pat_index = 3, \ .__runtime.ppgtt_type = INTEL_PPGTT_ALIASING, \ .__runtime.ppgtt_size = 31, \ I9XX_PIPE_OFFSETS, \ I9XX_CURSOR_OFFSETS, \ ILK_COLORS, \ GEN_DEFAULT_PAGE_SIZES, \ - GEN_DEFAULT_REGIONS + GEN_DEFAULT_REGIONS, \ + LEGACY_CACHELEVEL #define SNB_D_PLATFORM \ GEN6_FEATURES, \ @@ -481,13 +526,15 @@ static const struct intel_device_info snb_m_gt2_info = { .has_reset_engine = true, \ .has_rps = true, \ .dma_mask_size = 40, \ + .max_pat_index = 3, \ .__runtime.ppgtt_type = INTEL_PPGTT_ALIASING, \ .__runtime.ppgtt_size = 31, \ IVB_PIPE_OFFSETS, \ IVB_CURSOR_OFFSETS, \ IVB_COLORS, \ GEN_DEFAULT_PAGE_SIZES, \ - GEN_DEFAULT_REGIONS + GEN_DEFAULT_REGIONS, \ + LEGACY_CACHELEVEL #define IVB_D_PLATFORM \ GEN7_FEATURES, \ @@ -541,6 +588,7 @@ static const struct intel_device_info vlv_info = { .display.has_gmch = 1, .display.has_hotplug = 1, .dma_mask_size = 40, + .max_pat_index = 3, .__runtime.ppgtt_type = INTEL_PPGTT_ALIASING, .__runtime.ppgtt_size = 31, .has_snoop = true, @@ -552,6 +600,7 @@ static const struct intel_device_info vlv_info = { I9XX_COLORS, GEN_DEFAULT_PAGE_SIZES, GEN_DEFAULT_REGIONS, + LEGACY_CACHELEVEL, }; #define G75_FEATURES \ @@ -639,6 +688,7 @@ static const struct intel_device_info chv_info = { .has_logical_ring_contexts = 1, .display.has_gmch = 1, .dma_mask_size = 39, + .max_pat_index = 3, .__runtime.ppgtt_type = INTEL_PPGTT_FULL, .__runtime.ppgtt_size = 32, .has_reset_engine = 1, @@ -650,6 +700,7 @@ static const struct intel_device_info chv_info = { CHV_COLORS, GEN_DEFAULT_PAGE_SIZES, GEN_DEFAULT_REGIONS, + LEGACY_CACHELEVEL, }; #define GEN9_DEFAULT_PAGE_SIZES \ @@ -731,11 +782,13 @@ static const struct intel_device_info skl_gt4_info = { .has_snoop = true, \ .has_coherent_ggtt = false, \ .display.has_ipc = 1, \ + .max_pat_index = 3, \ HSW_PIPE_OFFSETS, \ IVB_CURSOR_OFFSETS, \ IVB_COLORS, \ GEN9_DEFAULT_PAGE_SIZES, \ - GEN_DEFAULT_REGIONS + GEN_DEFAULT_REGIONS, \ + LEGACY_CACHELEVEL static const struct intel_device_info bxt_info = { GEN9_LP_FEATURES, @@ -889,9 +942,11 @@ static const struct intel_device_info jsl_info = { [TRANSCODER_DSI_1] = TRANSCODER_DSI1_OFFSET, \ }, \ TGL_CURSOR_OFFSETS, \ + TGL_CACHELEVEL, \ .has_global_mocs = 1, \ .has_pxp = 1, \ - .display.has_dsb = 1 + .display.has_dsb = 1, \ + .max_pat_index = 3 static const struct intel_device_info tgl_info = { GEN12_FEATURES, @@ -1013,6 +1068,7 @@ static const struct intel_device_info adl_p_info = { .__runtime.graphics.ip.ver = 12, \ .__runtime.graphics.ip.rel = 50, \ XE_HP_PAGE_SIZES, \ + TGL_CACHELEVEL, \ .dma_mask_size = 46, \ .has_3d_pipeline = 1, \ .has_64bit_reloc = 1, \ @@ -1031,6 +1087,7 @@ static const struct intel_device_info adl_p_info = { .has_reset_engine = 1, \ .has_rps = 1, \ .has_runtime_pm = 1, \ + .max_pat_index = 3, \ .__runtime.ppgtt_size = 48, \ .__runtime.ppgtt_type = INTEL_PPGTT_FULL @@ -1107,11 +1164,13 @@ static const struct intel_device_info pvc_info = { PLATFORM(INTEL_PONTEVECCHIO), NO_DISPLAY, .has_flat_ccs = 0, + .max_pat_index = 7, .__runtime.platform_engine_mask = BIT(BCS0) | BIT(VCS0) | BIT(CCS0) | BIT(CCS1) | BIT(CCS2) | BIT(CCS3), .require_force_probe = 1, + PVC_CACHELEVEL, }; #define XE_LPDP_FEATURES \ @@ -1149,9 +1208,11 @@ static const struct intel_device_info mtl_info = { .has_llc = 0, .has_mslice_steering = 0, .has_snoop = 1, + .max_pat_index = 4, .__runtime.memory_regions = REGION_SMEM | REGION_STOLEN_LMEM, .__runtime.platform_engine_mask = BIT(RCS0) | BIT(BCS0) | BIT(CCS0), .require_force_probe = 1, + MTL_CACHELEVEL, }; #undef PLATFORM diff --git a/drivers/gpu/drm/i915/intel_device_info.h b/drivers/gpu/drm/i915/intel_device_info.h index 080a4557899b..d737dd601a6e 100644 --- a/drivers/gpu/drm/i915/intel_device_info.h +++ b/drivers/gpu/drm/i915/intel_device_info.h @@ -35,6 +35,8 @@ #include "gt/intel_context_types.h" #include "gt/intel_sseu.h" +#include "gem/i915_gem_object_types.h" + struct drm_printer; struct drm_i915_private; struct intel_gt_definition; @@ -308,6 +310,9 @@ struct intel_device_info { * Initial runtime info. Do not access outside of i915_driver_create(). */ const struct intel_runtime_info __runtime; + + u32 cachelevel_to_pat[I915_MAX_CACHE_LEVEL]; + u32 max_pat_index; }; struct intel_driver_caps { diff --git a/drivers/gpu/drm/i915/selftests/mock_gem_device.c b/drivers/gpu/drm/i915/selftests/mock_gem_device.c index f6a7c0bd2955..0eda8b4ee17f 100644 --- a/drivers/gpu/drm/i915/selftests/mock_gem_device.c +++ b/drivers/gpu/drm/i915/selftests/mock_gem_device.c @@ -123,7 +123,9 @@ struct drm_i915_private *mock_gem_device(void) static struct dev_iommu fake_iommu = { .priv = (void *)-1 }; #endif struct drm_i915_private *i915; + struct intel_device_info *i915_info; struct pci_dev *pdev; + unsigned int i; int ret; pdev = kzalloc(sizeof(*pdev), GFP_KERNEL); @@ -180,6 +182,13 @@ struct drm_i915_private *mock_gem_device(void) I915_GTT_PAGE_SIZE_2M; RUNTIME_INFO(i915)->memory_regions = REGION_SMEM; + + /* simply use legacy cache level for mock device */ + i915_info = (struct intel_device_info *)INTEL_INFO(i915); + i915_info->max_pat_index = 3; + for (i = 0; i < I915_MAX_CACHE_LEVEL; i++) + i915_info->cachelevel_to_pat[i] = i; + intel_memory_regions_hw_probe(i915); spin_lock_init(&i915->gpu_error.lock); From 9275277d53248d3f529d7ce66a6954241ae4d5cb Mon Sep 17 00:00:00 2001 From: Fei Yang Date: Tue, 9 May 2023 09:52:00 -0700 Subject: [PATCH 082/108] drm/i915: use pat_index instead of cache_level Currently the KMD is using enum i915_cache_level to set caching policy for buffer objects. This is flaky because the PAT index which really controls the caching behavior in PTE has far more levels than what's defined in the enum. In addition, the PAT index is platform dependent, having to translate between i915_cache_level and PAT index is not reliable, and makes the code more complicated. From UMD's perspective there is also a necessity to set caching policy for performance fine tuning. It's much easier for the UMD to directly use PAT index because the behavior of each PAT index is clearly defined in Bspec. Having the abstracted i915_cache_level sitting in between would only cause more ambiguity. PAT is expected to work much like MOCS already works today, and by design userspace is expected to select the index that exactly matches the desired behavior described in the hardware specification. For these reasons this patch replaces i915_cache_level with PAT index. Also note, the cache_level is not completely removed yet, because the KMD still has the need of creating buffer objects with simple cache settings such as cached, uncached, or writethrough. For kernel objects, cache_level is used for simplicity and backward compatibility. For Pre-gen12 platforms PAT can have 1:1 mapping to i915_cache_level, so these two are interchangeable. see the use of LEGACY_CACHELEVEL. One consequence of this change is that gen8_pte_encode is no longer working for gen12 platforms due to the fact that gen12 platforms has different PAT definitions. In the meantime the mtl_pte_encode introduced specfically for MTL becomes generic for all gen12 platforms. This patch renames the MTL PTE encode function into gen12_pte_encode and apply it to all gen12. Even though this change looks unrelated, but separating them would temporarily break gen12 PTE encoding, thus squash them in one patch. Special note: this patch changes the way caching behavior is controlled in the sense that some objects are left to be managed by userspace. For such objects we need to be careful not to change the userspace settings.There are kerneldoc and comments added around obj->cache_coherent, cache_dirty, and how to bypass the checkings by i915_gem_object_has_cache_level. For full understanding, these changes need to be looked at together with the two follow-up patches, one disables the {set|get}_caching ioctl's and the other adds set_pat extension to the GEM_CREATE uAPI. Bspec: 63019 Cc: Chris Wilson Signed-off-by: Fei Yang Reviewed-by: Andi Shyti Reviewed-by: Matt Roper Signed-off-by: Andi Shyti Link: https://patchwork.freedesktop.org/patch/msgid/20230509165200.1740-3-fei.yang@intel.com --- drivers/gpu/drm/i915/display/intel_dpt.c | 12 +-- drivers/gpu/drm/i915/gem/i915_gem_domain.c | 62 +++++++++----- .../gpu/drm/i915/gem/i915_gem_execbuffer.c | 15 +++- drivers/gpu/drm/i915/gem/i915_gem_mman.c | 11 ++- drivers/gpu/drm/i915/gem/i915_gem_object.c | 51 ++++++++++- drivers/gpu/drm/i915/gem/i915_gem_object.h | 4 + .../gpu/drm/i915/gem/i915_gem_object_types.h | 46 +++++++++- drivers/gpu/drm/i915/gem/i915_gem_stolen.c | 4 +- drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c | 8 +- .../gpu/drm/i915/gem/selftests/huge_pages.c | 2 +- .../drm/i915/gem/selftests/i915_gem_migrate.c | 2 +- .../drm/i915/gem/selftests/i915_gem_mman.c | 2 +- drivers/gpu/drm/i915/gt/gen6_ppgtt.c | 10 ++- drivers/gpu/drm/i915/gt/gen8_ppgtt.c | 84 +++++++++---------- drivers/gpu/drm/i915/gt/gen8_ppgtt.h | 3 +- drivers/gpu/drm/i915/gt/intel_ggtt.c | 82 +++++++++--------- drivers/gpu/drm/i915/gt/intel_gtt.h | 18 ++-- drivers/gpu/drm/i915/gt/intel_migrate.c | 47 ++++++----- drivers/gpu/drm/i915/gt/intel_migrate.h | 13 ++- drivers/gpu/drm/i915/gt/intel_ppgtt.c | 4 +- drivers/gpu/drm/i915/gt/selftest_migrate.c | 47 ++++++----- drivers/gpu/drm/i915/gt/selftest_reset.c | 8 +- drivers/gpu/drm/i915/gt/selftest_timeline.c | 2 +- drivers/gpu/drm/i915/gt/selftest_tlb.c | 4 +- drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c | 10 ++- drivers/gpu/drm/i915/i915_debugfs.c | 51 +++++++++-- drivers/gpu/drm/i915/i915_gem.c | 27 +++++- drivers/gpu/drm/i915/i915_gpu_error.c | 8 +- drivers/gpu/drm/i915/i915_vma.c | 16 ++-- drivers/gpu/drm/i915/i915_vma.h | 2 +- drivers/gpu/drm/i915/i915_vma_types.h | 2 - drivers/gpu/drm/i915/selftests/i915_gem.c | 5 +- .../gpu/drm/i915/selftests/i915_gem_evict.c | 4 +- drivers/gpu/drm/i915/selftests/i915_gem_gtt.c | 15 ++-- .../drm/i915/selftests/intel_memory_region.c | 4 +- drivers/gpu/drm/i915/selftests/mock_gtt.c | 8 +- 36 files changed, 454 insertions(+), 239 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_dpt.c b/drivers/gpu/drm/i915/display/intel_dpt.c index c5eacfdba1a5..7c5fddb203ba 100644 --- a/drivers/gpu/drm/i915/display/intel_dpt.c +++ b/drivers/gpu/drm/i915/display/intel_dpt.c @@ -43,24 +43,24 @@ static void gen8_set_pte(void __iomem *addr, gen8_pte_t pte) static void dpt_insert_page(struct i915_address_space *vm, dma_addr_t addr, u64 offset, - enum i915_cache_level level, + unsigned int pat_index, u32 flags) { struct i915_dpt *dpt = i915_vm_to_dpt(vm); gen8_pte_t __iomem *base = dpt->iomem; gen8_set_pte(base + offset / I915_GTT_PAGE_SIZE, - vm->pte_encode(addr, level, flags)); + vm->pte_encode(addr, pat_index, flags)); } static void dpt_insert_entries(struct i915_address_space *vm, struct i915_vma_resource *vma_res, - enum i915_cache_level level, + unsigned int pat_index, u32 flags) { struct i915_dpt *dpt = i915_vm_to_dpt(vm); gen8_pte_t __iomem *base = dpt->iomem; - const gen8_pte_t pte_encode = vm->pte_encode(0, level, flags); + const gen8_pte_t pte_encode = vm->pte_encode(0, pat_index, flags); struct sgt_iter sgt_iter; dma_addr_t addr; int i; @@ -83,7 +83,7 @@ static void dpt_clear_range(struct i915_address_space *vm, static void dpt_bind_vma(struct i915_address_space *vm, struct i915_vm_pt_stash *stash, struct i915_vma_resource *vma_res, - enum i915_cache_level cache_level, + unsigned int pat_index, u32 flags) { u32 pte_flags; @@ -98,7 +98,7 @@ static void dpt_bind_vma(struct i915_address_space *vm, if (vma_res->bi.lmem) pte_flags |= PTE_LM; - vm->insert_entries(vm, vma_res, cache_level, pte_flags); + vm->insert_entries(vm, vma_res, pat_index, pte_flags); vma_res->page_sizes_gtt = I915_GTT_PAGE_SIZE; diff --git a/drivers/gpu/drm/i915/gem/i915_gem_domain.c b/drivers/gpu/drm/i915/gem/i915_gem_domain.c index d2d5a24301b2..05107a6efe45 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_domain.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_domain.c @@ -27,8 +27,15 @@ static bool gpu_write_needs_clflush(struct drm_i915_gem_object *obj) if (IS_DGFX(i915)) return false; - return !(obj->cache_level == I915_CACHE_NONE || - obj->cache_level == I915_CACHE_WT); + /* + * For objects created by userspace through GEM_CREATE with pat_index + * set by set_pat extension, i915_gem_object_has_cache_level() will + * always return true, because the coherency of such object is managed + * by userspace. Othereise the call here would fall back to checking + * whether the object is un-cached or write-through. + */ + return !(i915_gem_object_has_cache_level(obj, I915_CACHE_NONE) || + i915_gem_object_has_cache_level(obj, I915_CACHE_WT)); } bool i915_gem_cpu_write_needs_clflush(struct drm_i915_gem_object *obj) @@ -267,7 +274,13 @@ int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj, { int ret; - if (obj->cache_level == cache_level) + /* + * For objects created by userspace through GEM_CREATE with pat_index + * set by set_pat extension, simply return 0 here without touching + * the cache setting, because such objects should have an immutable + * cache setting by desgin and always managed by userspace. + */ + if (i915_gem_object_has_cache_level(obj, cache_level)) return 0; ret = i915_gem_object_wait(obj, @@ -278,10 +291,8 @@ int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj, return ret; /* Always invalidate stale cachelines */ - if (obj->cache_level != cache_level) { - i915_gem_object_set_cache_coherency(obj, cache_level); - obj->cache_dirty = true; - } + i915_gem_object_set_cache_coherency(obj, cache_level); + obj->cache_dirty = true; /* The cache-level will be applied when each vma is rebound. */ return i915_gem_object_unbind(obj, @@ -306,20 +317,22 @@ int i915_gem_get_caching_ioctl(struct drm_device *dev, void *data, goto out; } - switch (obj->cache_level) { - case I915_CACHE_LLC: - case I915_CACHE_L3_LLC: - args->caching = I915_CACHING_CACHED; - break; - - case I915_CACHE_WT: - args->caching = I915_CACHING_DISPLAY; - break; - - default: - args->caching = I915_CACHING_NONE; - break; + /* + * This ioctl should be disabled for the objects with pat_index + * set by user space. + */ + if (obj->pat_set_by_user) { + err = -EOPNOTSUPP; + goto out; } + + if (i915_gem_object_has_cache_level(obj, I915_CACHE_LLC) || + i915_gem_object_has_cache_level(obj, I915_CACHE_L3_LLC)) + args->caching = I915_CACHING_CACHED; + else if (i915_gem_object_has_cache_level(obj, I915_CACHE_WT)) + args->caching = I915_CACHING_DISPLAY; + else + args->caching = I915_CACHING_NONE; out: rcu_read_unlock(); return err; @@ -364,6 +377,15 @@ int i915_gem_set_caching_ioctl(struct drm_device *dev, void *data, if (!obj) return -ENOENT; + /* + * This ioctl should be disabled for the objects with pat_index + * set by user space. + */ + if (obj->pat_set_by_user) { + ret = -EOPNOTSUPP; + goto out; + } + /* * The caching mode of proxy object is handled by its generator, and * not allowed to be changed by userspace. diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c index 3aeede6aee4d..5fb459ea4294 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c @@ -640,9 +640,15 @@ static inline int use_cpu_reloc(const struct reloc_cache *cache, if (DBG_FORCE_RELOC == FORCE_GTT_RELOC) return false; + /* + * For objects created by userspace through GEM_CREATE with pat_index + * set by set_pat extension, i915_gem_object_has_cache_level() always + * return true, otherwise the call would fall back to checking whether + * the object is un-cached. + */ return (cache->has_llc || obj->cache_dirty || - obj->cache_level != I915_CACHE_NONE); + !i915_gem_object_has_cache_level(obj, I915_CACHE_NONE)); } static int eb_reserve_vma(struct i915_execbuffer *eb, @@ -1324,7 +1330,10 @@ static void *reloc_iomap(struct i915_vma *batch, if (drm_mm_node_allocated(&cache->node)) { ggtt->vm.insert_page(&ggtt->vm, i915_gem_object_get_dma_address(obj, page), - offset, I915_CACHE_NONE, 0); + offset, + i915_gem_get_pat_index(ggtt->vm.i915, + I915_CACHE_NONE), + 0); } else { offset += page << PAGE_SHIFT; } @@ -1464,7 +1473,7 @@ eb_relocate_entry(struct i915_execbuffer *eb, reloc_cache_unmap(&eb->reloc_cache); mutex_lock(&vma->vm->mutex); err = i915_vma_bind(target->vma, - target->vma->obj->cache_level, + target->vma->obj->pat_index, PIN_GLOBAL, NULL, NULL); mutex_unlock(&vma->vm->mutex); reloc_cache_remap(&eb->reloc_cache, ev->vma->obj); diff --git a/drivers/gpu/drm/i915/gem/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/i915_gem_mman.c index 3dbacdf0911a..4e7a838ab7bd 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_mman.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_mman.c @@ -383,7 +383,16 @@ static vm_fault_t vm_fault_gtt(struct vm_fault *vmf) } /* Access to snoopable pages through the GTT is incoherent. */ - if (obj->cache_level != I915_CACHE_NONE && !HAS_LLC(i915)) { + /* + * For objects created by userspace through GEM_CREATE with pat_index + * set by set_pat extension, coherency is managed by userspace, make + * sure we don't fail handling the vm fault by calling + * i915_gem_object_has_cache_level() which always return true for such + * objects. Otherwise this helper function would fall back to checking + * whether the object is un-cached. + */ + if (!(i915_gem_object_has_cache_level(obj, I915_CACHE_NONE) || + HAS_LLC(i915))) { ret = -EFAULT; goto err_unpin; } diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.c b/drivers/gpu/drm/i915/gem/i915_gem_object.c index 8c70a0ec7d2f..46a19b099ec8 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.c @@ -54,6 +54,24 @@ unsigned int i915_gem_get_pat_index(struct drm_i915_private *i915, return INTEL_INFO(i915)->cachelevel_to_pat[level]; } +bool i915_gem_object_has_cache_level(const struct drm_i915_gem_object *obj, + enum i915_cache_level lvl) +{ + /* + * In case the pat_index is set by user space, this kernel mode + * driver should leave the coherency to be managed by user space, + * simply return true here. + */ + if (obj->pat_set_by_user) + return true; + + /* + * Otherwise the pat_index should have been converted from cache_level + * so that the following comparison is valid. + */ + return obj->pat_index == i915_gem_get_pat_index(obj_to_i915(obj), lvl); +} + struct drm_i915_gem_object *i915_gem_object_alloc(void) { struct drm_i915_gem_object *obj; @@ -133,7 +151,7 @@ void i915_gem_object_set_cache_coherency(struct drm_i915_gem_object *obj, { struct drm_i915_private *i915 = to_i915(obj->base.dev); - obj->cache_level = cache_level; + obj->pat_index = i915_gem_get_pat_index(i915, cache_level); if (cache_level != I915_CACHE_NONE) obj->cache_coherent = (I915_BO_CACHE_COHERENT_FOR_READ | @@ -148,6 +166,37 @@ void i915_gem_object_set_cache_coherency(struct drm_i915_gem_object *obj, !IS_DGFX(i915); } +/** + * i915_gem_object_set_pat_index - set PAT index to be used in PTE encode + * @obj: #drm_i915_gem_object + * @pat_index: PAT index + * + * This is a clone of i915_gem_object_set_cache_coherency taking pat index + * instead of cache_level as its second argument. + */ +void i915_gem_object_set_pat_index(struct drm_i915_gem_object *obj, + unsigned int pat_index) +{ + struct drm_i915_private *i915 = to_i915(obj->base.dev); + + if (obj->pat_index == pat_index) + return; + + obj->pat_index = pat_index; + + if (pat_index != i915_gem_get_pat_index(i915, I915_CACHE_NONE)) + obj->cache_coherent = (I915_BO_CACHE_COHERENT_FOR_READ | + I915_BO_CACHE_COHERENT_FOR_WRITE); + else if (HAS_LLC(i915)) + obj->cache_coherent = I915_BO_CACHE_COHERENT_FOR_READ; + else + obj->cache_coherent = 0; + + obj->cache_dirty = + !(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE) && + !IS_DGFX(i915); +} + bool i915_gem_object_can_bypass_llc(struct drm_i915_gem_object *obj) { struct drm_i915_private *i915 = to_i915(obj->base.dev); diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.h b/drivers/gpu/drm/i915/gem/i915_gem_object.h index 284e1aa396cd..884a17275b3a 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.h @@ -34,6 +34,8 @@ static inline bool i915_gem_object_size_2big(u64 size) unsigned int i915_gem_get_pat_index(struct drm_i915_private *i915, enum i915_cache_level level); +bool i915_gem_object_has_cache_level(const struct drm_i915_gem_object *obj, + enum i915_cache_level lvl); void i915_gem_init__objects(struct drm_i915_private *i915); void i915_objects_module_exit(void); @@ -764,6 +766,8 @@ bool i915_gem_object_has_unknown_state(struct drm_i915_gem_object *obj); void i915_gem_object_set_cache_coherency(struct drm_i915_gem_object *obj, unsigned int cache_level); +void i915_gem_object_set_pat_index(struct drm_i915_gem_object *obj, + unsigned int pat_index); bool i915_gem_object_can_bypass_llc(struct drm_i915_gem_object *obj); void i915_gem_object_flush_if_display(struct drm_i915_gem_object *obj); void i915_gem_object_flush_if_display_locked(struct drm_i915_gem_object *obj); diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h index bf0bd8e11355..e72c57716bee 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h @@ -364,15 +364,43 @@ struct drm_i915_gem_object { #define I915_BO_FLAG_STRUCT_PAGE BIT(0) /* Object backed by struct pages */ #define I915_BO_FLAG_IOMEM BIT(1) /* Object backed by IO memory */ /** - * @cache_level: The desired GTT caching level. + * @pat_index: The desired PAT index. * - * See enum i915_cache_level for possible values, along with what - * each does. + * See hardware specification for valid PAT indices for each platform. + * This field replaces the @cache_level that contains a value of enum + * i915_cache_level since PAT indices are being used by both userspace + * and kernel mode driver for caching policy control after GEN12. + * In the meantime platform specific tables are created to translate + * i915_cache_level into pat index, for more details check the macros + * defined i915/i915_pci.c, e.g. PVC_CACHELEVEL. + * For backward compatibility, this field contains values exactly match + * the entries of enum i915_cache_level for pre-GEN12 platforms (See + * LEGACY_CACHELEVEL), so that the PTE encode functions for these + * legacy platforms can stay the same. */ - unsigned int cache_level:3; + unsigned int pat_index:6; + /** + * @pat_set_by_user: Indicate whether pat_index is set by user space + * + * This field is set to false by default, only set to true if the + * pat_index is set by user space. By design, user space is capable of + * managing caching behavior by setting pat_index, in which case this + * kernel mode driver should never touch the pat_index. + */ + unsigned int pat_set_by_user:1; /** * @cache_coherent: * + * Note: with the change above which replaced @cache_level with pat_index, + * the use of @cache_coherent is limited to the objects created by kernel + * or by userspace without pat index specified. + * Check for @pat_set_by_user to find out if an object has pat index set + * by userspace. The ioctl's to change cache settings have also been + * disabled for the objects with pat index set by userspace. Please don't + * assume @cache_coherent having the flags set as describe here. A helper + * function i915_gem_object_has_cache_level() provides one way to bypass + * the use of this field. + * * Track whether the pages are coherent with the GPU if reading or * writing through the CPU caches. The largely depends on the * @cache_level setting. @@ -446,6 +474,16 @@ struct drm_i915_gem_object { /** * @cache_dirty: * + * Note: with the change above which replaced cache_level with pat_index, + * the use of @cache_dirty is limited to the objects created by kernel + * or by userspace without pat index specified. + * Check for @pat_set_by_user to find out if an object has pat index set + * by userspace. The ioctl's to change cache settings have also been + * disabled for the objects with pat_index set by userspace. Please don't + * assume @cache_dirty is set as describe here. Also see helper function + * i915_gem_object_has_cache_level() for possible ways to bypass the use + * of this field. + * * Track if we are we dirty with writes through the CPU cache for this * object. As a result reading directly from main memory might yield * stale data. diff --git a/drivers/gpu/drm/i915/gem/i915_gem_stolen.c b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c index ee492d823f1b..3b094d36a0b0 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_stolen.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c @@ -565,7 +565,9 @@ static void dbg_poison(struct i915_ggtt *ggtt, ggtt->vm.insert_page(&ggtt->vm, addr, ggtt->error_capture.start, - I915_CACHE_NONE, 0); + i915_gem_get_pat_index(ggtt->vm.i915, + I915_CACHE_NONE), + 0); mb(); s = io_mapping_map_wc(&ggtt->iomap, diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c b/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c index 69eb20ed4d47..7078af2f8f79 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c @@ -214,7 +214,8 @@ static struct dma_fence *i915_ttm_accel_move(struct ttm_buffer_object *bo, intel_engine_pm_get(to_gt(i915)->migrate.context->engine); ret = intel_context_migrate_clear(to_gt(i915)->migrate.context, deps, - dst_st->sgl, dst_level, + dst_st->sgl, + i915_gem_get_pat_index(i915, dst_level), i915_ttm_gtt_binds_lmem(dst_mem), 0, &rq); } else { @@ -228,9 +229,10 @@ static struct dma_fence *i915_ttm_accel_move(struct ttm_buffer_object *bo, intel_engine_pm_get(to_gt(i915)->migrate.context->engine); ret = intel_context_migrate_copy(to_gt(i915)->migrate.context, deps, src_rsgt->table.sgl, - src_level, + i915_gem_get_pat_index(i915, src_level), i915_ttm_gtt_binds_lmem(bo->resource), - dst_st->sgl, dst_level, + dst_st->sgl, + i915_gem_get_pat_index(i915, dst_level), i915_ttm_gtt_binds_lmem(dst_mem), &rq); diff --git a/drivers/gpu/drm/i915/gem/selftests/huge_pages.c b/drivers/gpu/drm/i915/gem/selftests/huge_pages.c index cb5863f37f9d..763df437c20c 100644 --- a/drivers/gpu/drm/i915/gem/selftests/huge_pages.c +++ b/drivers/gpu/drm/i915/gem/selftests/huge_pages.c @@ -354,7 +354,7 @@ fake_huge_pages_object(struct drm_i915_private *i915, u64 size, bool single) obj->write_domain = I915_GEM_DOMAIN_CPU; obj->read_domains = I915_GEM_DOMAIN_CPU; - obj->cache_level = I915_CACHE_NONE; + obj->pat_index = i915_gem_get_pat_index(i915, I915_CACHE_NONE); return obj; } diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_migrate.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_migrate.c index fe6c37fd7859..a93a90b15907 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_migrate.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_migrate.c @@ -219,7 +219,7 @@ static int __igt_lmem_pages_migrate(struct intel_gt *gt, continue; err = intel_migrate_clear(>->migrate, &ww, deps, - obj->mm.pages->sgl, obj->cache_level, + obj->mm.pages->sgl, obj->pat_index, i915_gem_object_is_lmem(obj), 0xdeadbeaf, &rq); if (rq) { diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c index 56279908ed30..a93d8f9f8bc1 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c @@ -1222,7 +1222,7 @@ static int __igt_mmap_migrate(struct intel_memory_region **placements, } err = intel_context_migrate_clear(to_gt(i915)->migrate.context, NULL, - obj->mm.pages->sgl, obj->cache_level, + obj->mm.pages->sgl, obj->pat_index, i915_gem_object_is_lmem(obj), expand32(POISON_INUSE), &rq); i915_gem_object_unpin_pages(obj); diff --git a/drivers/gpu/drm/i915/gt/gen6_ppgtt.c b/drivers/gpu/drm/i915/gt/gen6_ppgtt.c index 5aaacc53fa4c..c2bdc133c89a 100644 --- a/drivers/gpu/drm/i915/gt/gen6_ppgtt.c +++ b/drivers/gpu/drm/i915/gt/gen6_ppgtt.c @@ -109,7 +109,7 @@ static void gen6_ppgtt_clear_range(struct i915_address_space *vm, static void gen6_ppgtt_insert_entries(struct i915_address_space *vm, struct i915_vma_resource *vma_res, - enum i915_cache_level cache_level, + unsigned int pat_index, u32 flags) { struct i915_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); @@ -117,7 +117,7 @@ static void gen6_ppgtt_insert_entries(struct i915_address_space *vm, unsigned int first_entry = vma_res->start / I915_GTT_PAGE_SIZE; unsigned int act_pt = first_entry / GEN6_PTES; unsigned int act_pte = first_entry % GEN6_PTES; - const u32 pte_encode = vm->pte_encode(0, cache_level, flags); + const u32 pte_encode = vm->pte_encode(0, pat_index, flags); struct sgt_dma iter = sgt_dma(vma_res); gen6_pte_t *vaddr; @@ -227,7 +227,9 @@ static int gen6_ppgtt_init_scratch(struct gen6_ppgtt *ppgtt) vm->scratch[0]->encode = vm->pte_encode(px_dma(vm->scratch[0]), - I915_CACHE_NONE, PTE_READ_ONLY); + i915_gem_get_pat_index(vm->i915, + I915_CACHE_NONE), + PTE_READ_ONLY); vm->scratch[1] = vm->alloc_pt_dma(vm, I915_GTT_PAGE_SIZE_4K); if (IS_ERR(vm->scratch[1])) { @@ -278,7 +280,7 @@ static void gen6_ppgtt_cleanup(struct i915_address_space *vm) static void pd_vma_bind(struct i915_address_space *vm, struct i915_vm_pt_stash *stash, struct i915_vma_resource *vma_res, - enum i915_cache_level cache_level, + unsigned int pat_index, u32 unused) { struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); diff --git a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c index bb6998d67133..f948d33e5ec5 100644 --- a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c +++ b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c @@ -29,7 +29,7 @@ static u64 gen8_pde_encode(const dma_addr_t addr, } static u64 gen8_pte_encode(dma_addr_t addr, - enum i915_cache_level level, + unsigned int pat_index, u32 flags) { gen8_pte_t pte = addr | GEN8_PAGE_PRESENT | GEN8_PAGE_RW; @@ -40,7 +40,12 @@ static u64 gen8_pte_encode(dma_addr_t addr, if (flags & PTE_LM) pte |= GEN12_PPGTT_PTE_LM; - switch (level) { + /* + * For pre-gen12 platforms pat_index is the same as enum + * i915_cache_level, so the switch-case here is still valid. + * See translation table defined by LEGACY_CACHELEVEL. + */ + switch (pat_index) { case I915_CACHE_NONE: pte |= PPAT_UNCACHED; break; @@ -55,9 +60,9 @@ static u64 gen8_pte_encode(dma_addr_t addr, return pte; } -static u64 mtl_pte_encode(dma_addr_t addr, - enum i915_cache_level level, - u32 flags) +static u64 gen12_pte_encode(dma_addr_t addr, + unsigned int pat_index, + u32 flags) { gen8_pte_t pte = addr | GEN8_PAGE_PRESENT | GEN8_PAGE_RW; @@ -67,24 +72,17 @@ static u64 mtl_pte_encode(dma_addr_t addr, if (flags & PTE_LM) pte |= GEN12_PPGTT_PTE_LM; - switch (level) { - case I915_CACHE_NONE: - pte |= GEN12_PPGTT_PTE_PAT1; - break; - case I915_CACHE_LLC: - case I915_CACHE_L3_LLC: - pte |= GEN12_PPGTT_PTE_PAT0 | GEN12_PPGTT_PTE_PAT1; - break; - case I915_CACHE_WT: + if (pat_index & BIT(0)) pte |= GEN12_PPGTT_PTE_PAT0; - break; - default: - /* This should never happen. Added to deal with the compile - * error due to the addition of I915_MAX_CACHE_LEVEL. Will - * be removed by the pat_index patch. - */ - break; - } + + if (pat_index & BIT(1)) + pte |= GEN12_PPGTT_PTE_PAT1; + + if (pat_index & BIT(2)) + pte |= GEN12_PPGTT_PTE_PAT2; + + if (pat_index & BIT(3)) + pte |= MTL_PPGTT_PTE_PAT3; return pte; } @@ -457,11 +455,11 @@ gen8_ppgtt_insert_pte(struct i915_ppgtt *ppgtt, struct i915_page_directory *pdp, struct sgt_dma *iter, u64 idx, - enum i915_cache_level cache_level, + unsigned int pat_index, u32 flags) { struct i915_page_directory *pd; - const gen8_pte_t pte_encode = ppgtt->vm.pte_encode(0, cache_level, flags); + const gen8_pte_t pte_encode = ppgtt->vm.pte_encode(0, pat_index, flags); gen8_pte_t *vaddr; pd = i915_pd_entry(pdp, gen8_pd_index(idx, 2)); @@ -504,10 +502,10 @@ static void xehpsdv_ppgtt_insert_huge(struct i915_address_space *vm, struct i915_vma_resource *vma_res, struct sgt_dma *iter, - enum i915_cache_level cache_level, + unsigned int pat_index, u32 flags) { - const gen8_pte_t pte_encode = vm->pte_encode(0, cache_level, flags); + const gen8_pte_t pte_encode = vm->pte_encode(0, pat_index, flags); unsigned int rem = sg_dma_len(iter->sg); u64 start = vma_res->start; u64 end = start + vma_res->vma_size; @@ -612,10 +610,10 @@ xehpsdv_ppgtt_insert_huge(struct i915_address_space *vm, static void gen8_ppgtt_insert_huge(struct i915_address_space *vm, struct i915_vma_resource *vma_res, struct sgt_dma *iter, - enum i915_cache_level cache_level, + unsigned int pat_index, u32 flags) { - const gen8_pte_t pte_encode = vm->pte_encode(0, cache_level, flags); + const gen8_pte_t pte_encode = vm->pte_encode(0, pat_index, flags); unsigned int rem = sg_dma_len(iter->sg); u64 start = vma_res->start; @@ -735,7 +733,7 @@ static void gen8_ppgtt_insert_huge(struct i915_address_space *vm, static void gen8_ppgtt_insert(struct i915_address_space *vm, struct i915_vma_resource *vma_res, - enum i915_cache_level cache_level, + unsigned int pat_index, u32 flags) { struct i915_ppgtt * const ppgtt = i915_vm_to_ppgtt(vm); @@ -743,9 +741,9 @@ static void gen8_ppgtt_insert(struct i915_address_space *vm, if (vma_res->bi.page_sizes.sg > I915_GTT_PAGE_SIZE) { if (GRAPHICS_VER_FULL(vm->i915) >= IP_VER(12, 50)) - xehpsdv_ppgtt_insert_huge(vm, vma_res, &iter, cache_level, flags); + xehpsdv_ppgtt_insert_huge(vm, vma_res, &iter, pat_index, flags); else - gen8_ppgtt_insert_huge(vm, vma_res, &iter, cache_level, flags); + gen8_ppgtt_insert_huge(vm, vma_res, &iter, pat_index, flags); } else { u64 idx = vma_res->start >> GEN8_PTE_SHIFT; @@ -754,7 +752,7 @@ static void gen8_ppgtt_insert(struct i915_address_space *vm, gen8_pdp_for_page_index(vm, idx); idx = gen8_ppgtt_insert_pte(ppgtt, pdp, &iter, idx, - cache_level, flags); + pat_index, flags); } while (idx); vma_res->page_sizes_gtt = I915_GTT_PAGE_SIZE; @@ -764,7 +762,7 @@ static void gen8_ppgtt_insert(struct i915_address_space *vm, static void gen8_ppgtt_insert_entry(struct i915_address_space *vm, dma_addr_t addr, u64 offset, - enum i915_cache_level level, + unsigned int pat_index, u32 flags) { u64 idx = offset >> GEN8_PTE_SHIFT; @@ -778,14 +776,14 @@ static void gen8_ppgtt_insert_entry(struct i915_address_space *vm, GEM_BUG_ON(pt->is_compact); vaddr = px_vaddr(pt); - vaddr[gen8_pd_index(idx, 0)] = vm->pte_encode(addr, level, flags); + vaddr[gen8_pd_index(idx, 0)] = vm->pte_encode(addr, pat_index, flags); drm_clflush_virt_range(&vaddr[gen8_pd_index(idx, 0)], sizeof(*vaddr)); } static void __xehpsdv_ppgtt_insert_entry_lm(struct i915_address_space *vm, dma_addr_t addr, u64 offset, - enum i915_cache_level level, + unsigned int pat_index, u32 flags) { u64 idx = offset >> GEN8_PTE_SHIFT; @@ -808,20 +806,20 @@ static void __xehpsdv_ppgtt_insert_entry_lm(struct i915_address_space *vm, } vaddr = px_vaddr(pt); - vaddr[gen8_pd_index(idx, 0) / 16] = vm->pte_encode(addr, level, flags); + vaddr[gen8_pd_index(idx, 0) / 16] = vm->pte_encode(addr, pat_index, flags); } static void xehpsdv_ppgtt_insert_entry(struct i915_address_space *vm, dma_addr_t addr, u64 offset, - enum i915_cache_level level, + unsigned int pat_index, u32 flags) { if (flags & PTE_LM) return __xehpsdv_ppgtt_insert_entry_lm(vm, addr, offset, - level, flags); + pat_index, flags); - return gen8_ppgtt_insert_entry(vm, addr, offset, level, flags); + return gen8_ppgtt_insert_entry(vm, addr, offset, pat_index, flags); } static int gen8_init_scratch(struct i915_address_space *vm) @@ -856,7 +854,9 @@ static int gen8_init_scratch(struct i915_address_space *vm) vm->scratch[0]->encode = vm->pte_encode(px_dma(vm->scratch[0]), - I915_CACHE_NONE, pte_flags); + i915_gem_get_pat_index(vm->i915, + I915_CACHE_NONE), + pte_flags); for (i = 1; i <= vm->top; i++) { struct drm_i915_gem_object *obj; @@ -998,8 +998,8 @@ struct i915_ppgtt *gen8_ppgtt_create(struct intel_gt *gt, */ ppgtt->vm.alloc_scratch_dma = alloc_pt_dma; - if (GRAPHICS_VER_FULL(gt->i915) >= IP_VER(12, 70)) - ppgtt->vm.pte_encode = mtl_pte_encode; + if (GRAPHICS_VER(gt->i915) >= 12) + ppgtt->vm.pte_encode = gen12_pte_encode; else ppgtt->vm.pte_encode = gen8_pte_encode; diff --git a/drivers/gpu/drm/i915/gt/gen8_ppgtt.h b/drivers/gpu/drm/i915/gt/gen8_ppgtt.h index f541d19264b4..19c635441642 100644 --- a/drivers/gpu/drm/i915/gt/gen8_ppgtt.h +++ b/drivers/gpu/drm/i915/gt/gen8_ppgtt.h @@ -10,13 +10,12 @@ struct i915_address_space; struct intel_gt; -enum i915_cache_level; struct i915_ppgtt *gen8_ppgtt_create(struct intel_gt *gt, unsigned long lmem_pt_obj_flags); u64 gen8_ggtt_pte_encode(dma_addr_t addr, - enum i915_cache_level level, + unsigned int pat_index, u32 flags); #endif diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt.c b/drivers/gpu/drm/i915/gt/intel_ggtt.c index c8390d03fce2..2a7942fac798 100644 --- a/drivers/gpu/drm/i915/gt/intel_ggtt.c +++ b/drivers/gpu/drm/i915/gt/intel_ggtt.c @@ -221,7 +221,7 @@ static void guc_ggtt_invalidate(struct i915_ggtt *ggtt) } static u64 mtl_ggtt_pte_encode(dma_addr_t addr, - enum i915_cache_level level, + unsigned int pat_index, u32 flags) { gen8_pte_t pte = addr | GEN8_PAGE_PRESENT; @@ -231,30 +231,17 @@ static u64 mtl_ggtt_pte_encode(dma_addr_t addr, if (flags & PTE_LM) pte |= GEN12_GGTT_PTE_LM; - switch (level) { - case I915_CACHE_NONE: - pte |= MTL_GGTT_PTE_PAT1; - break; - case I915_CACHE_LLC: - case I915_CACHE_L3_LLC: - pte |= MTL_GGTT_PTE_PAT0 | MTL_GGTT_PTE_PAT1; - break; - case I915_CACHE_WT: + if (pat_index & BIT(0)) pte |= MTL_GGTT_PTE_PAT0; - break; - default: - /* This should never happen. Added to deal with the compile - * error due to the addition of I915_MAX_CACHE_LEVEL. Will - * be removed by the pat_index patch. - */ - break; - } + + if (pat_index & BIT(1)) + pte |= MTL_GGTT_PTE_PAT1; return pte; } u64 gen8_ggtt_pte_encode(dma_addr_t addr, - enum i915_cache_level level, + unsigned int pat_index, u32 flags) { gen8_pte_t pte = addr | GEN8_PAGE_PRESENT; @@ -273,25 +260,25 @@ static void gen8_set_pte(void __iomem *addr, gen8_pte_t pte) static void gen8_ggtt_insert_page(struct i915_address_space *vm, dma_addr_t addr, u64 offset, - enum i915_cache_level level, + unsigned int pat_index, u32 flags) { struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); gen8_pte_t __iomem *pte = (gen8_pte_t __iomem *)ggtt->gsm + offset / I915_GTT_PAGE_SIZE; - gen8_set_pte(pte, ggtt->vm.pte_encode(addr, level, flags)); + gen8_set_pte(pte, ggtt->vm.pte_encode(addr, pat_index, flags)); ggtt->invalidate(ggtt); } static void gen8_ggtt_insert_entries(struct i915_address_space *vm, struct i915_vma_resource *vma_res, - enum i915_cache_level level, + unsigned int pat_index, u32 flags) { struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); - const gen8_pte_t pte_encode = ggtt->vm.pte_encode(0, level, flags); + const gen8_pte_t pte_encode = ggtt->vm.pte_encode(0, pat_index, flags); gen8_pte_t __iomem *gte; gen8_pte_t __iomem *end; struct sgt_iter iter; @@ -348,14 +335,14 @@ static void gen8_ggtt_clear_range(struct i915_address_space *vm, static void gen6_ggtt_insert_page(struct i915_address_space *vm, dma_addr_t addr, u64 offset, - enum i915_cache_level level, + unsigned int pat_index, u32 flags) { struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); gen6_pte_t __iomem *pte = (gen6_pte_t __iomem *)ggtt->gsm + offset / I915_GTT_PAGE_SIZE; - iowrite32(vm->pte_encode(addr, level, flags), pte); + iowrite32(vm->pte_encode(addr, pat_index, flags), pte); ggtt->invalidate(ggtt); } @@ -368,7 +355,7 @@ static void gen6_ggtt_insert_page(struct i915_address_space *vm, */ static void gen6_ggtt_insert_entries(struct i915_address_space *vm, struct i915_vma_resource *vma_res, - enum i915_cache_level level, + unsigned int pat_index, u32 flags) { struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); @@ -385,7 +372,7 @@ static void gen6_ggtt_insert_entries(struct i915_address_space *vm, iowrite32(vm->scratch[0]->encode, gte++); end += (vma_res->node_size + vma_res->guard) / I915_GTT_PAGE_SIZE; for_each_sgt_daddr(addr, iter, vma_res->bi.pages) - iowrite32(vm->pte_encode(addr, level, flags), gte++); + iowrite32(vm->pte_encode(addr, pat_index, flags), gte++); GEM_BUG_ON(gte > end); /* Fill the allocated but "unused" space beyond the end of the buffer */ @@ -420,14 +407,15 @@ struct insert_page { struct i915_address_space *vm; dma_addr_t addr; u64 offset; - enum i915_cache_level level; + unsigned int pat_index; }; static int bxt_vtd_ggtt_insert_page__cb(void *_arg) { struct insert_page *arg = _arg; - gen8_ggtt_insert_page(arg->vm, arg->addr, arg->offset, arg->level, 0); + gen8_ggtt_insert_page(arg->vm, arg->addr, arg->offset, + arg->pat_index, 0); bxt_vtd_ggtt_wa(arg->vm); return 0; @@ -436,10 +424,10 @@ static int bxt_vtd_ggtt_insert_page__cb(void *_arg) static void bxt_vtd_ggtt_insert_page__BKL(struct i915_address_space *vm, dma_addr_t addr, u64 offset, - enum i915_cache_level level, + unsigned int pat_index, u32 unused) { - struct insert_page arg = { vm, addr, offset, level }; + struct insert_page arg = { vm, addr, offset, pat_index }; stop_machine(bxt_vtd_ggtt_insert_page__cb, &arg, NULL); } @@ -447,7 +435,7 @@ static void bxt_vtd_ggtt_insert_page__BKL(struct i915_address_space *vm, struct insert_entries { struct i915_address_space *vm; struct i915_vma_resource *vma_res; - enum i915_cache_level level; + unsigned int pat_index; u32 flags; }; @@ -455,7 +443,8 @@ static int bxt_vtd_ggtt_insert_entries__cb(void *_arg) { struct insert_entries *arg = _arg; - gen8_ggtt_insert_entries(arg->vm, arg->vma_res, arg->level, arg->flags); + gen8_ggtt_insert_entries(arg->vm, arg->vma_res, + arg->pat_index, arg->flags); bxt_vtd_ggtt_wa(arg->vm); return 0; @@ -463,10 +452,10 @@ static int bxt_vtd_ggtt_insert_entries__cb(void *_arg) static void bxt_vtd_ggtt_insert_entries__BKL(struct i915_address_space *vm, struct i915_vma_resource *vma_res, - enum i915_cache_level level, + unsigned int pat_index, u32 flags) { - struct insert_entries arg = { vm, vma_res, level, flags }; + struct insert_entries arg = { vm, vma_res, pat_index, flags }; stop_machine(bxt_vtd_ggtt_insert_entries__cb, &arg, NULL); } @@ -495,7 +484,7 @@ static void gen6_ggtt_clear_range(struct i915_address_space *vm, void intel_ggtt_bind_vma(struct i915_address_space *vm, struct i915_vm_pt_stash *stash, struct i915_vma_resource *vma_res, - enum i915_cache_level cache_level, + unsigned int pat_index, u32 flags) { u32 pte_flags; @@ -512,7 +501,7 @@ void intel_ggtt_bind_vma(struct i915_address_space *vm, if (vma_res->bi.lmem) pte_flags |= PTE_LM; - vm->insert_entries(vm, vma_res, cache_level, pte_flags); + vm->insert_entries(vm, vma_res, pat_index, pte_flags); vma_res->page_sizes_gtt = I915_GTT_PAGE_SIZE; } @@ -661,7 +650,7 @@ static int init_ggtt(struct i915_ggtt *ggtt) static void aliasing_gtt_bind_vma(struct i915_address_space *vm, struct i915_vm_pt_stash *stash, struct i915_vma_resource *vma_res, - enum i915_cache_level cache_level, + unsigned int pat_index, u32 flags) { u32 pte_flags; @@ -673,10 +662,10 @@ static void aliasing_gtt_bind_vma(struct i915_address_space *vm, if (flags & I915_VMA_LOCAL_BIND) ppgtt_bind_vma(&i915_vm_to_ggtt(vm)->alias->vm, - stash, vma_res, cache_level, flags); + stash, vma_res, pat_index, flags); if (flags & I915_VMA_GLOBAL_BIND) - vm->insert_entries(vm, vma_res, cache_level, pte_flags); + vm->insert_entries(vm, vma_res, pat_index, pte_flags); vma_res->bound_flags |= flags; } @@ -933,7 +922,9 @@ static int ggtt_probe_common(struct i915_ggtt *ggtt, u64 size) ggtt->vm.scratch[0]->encode = ggtt->vm.pte_encode(px_dma(ggtt->vm.scratch[0]), - I915_CACHE_NONE, pte_flags); + i915_gem_get_pat_index(i915, + I915_CACHE_NONE), + pte_flags); return 0; } @@ -1022,6 +1013,11 @@ static int gen8_gmch_probe(struct i915_ggtt *ggtt) return ggtt_probe_common(ggtt, size); } +/* + * For pre-gen8 platforms pat_index is the same as enum i915_cache_level, + * so these PTE encode functions are left with using cache_level. + * See translation table LEGACY_CACHELEVEL. + */ static u64 snb_pte_encode(dma_addr_t addr, enum i915_cache_level level, u32 flags) @@ -1302,7 +1298,9 @@ bool i915_ggtt_resume_vm(struct i915_address_space *vm) */ vma->resource->bound_flags = 0; vma->ops->bind_vma(vm, NULL, vma->resource, - obj ? obj->cache_level : 0, + obj ? obj->pat_index : + i915_gem_get_pat_index(vm->i915, + I915_CACHE_NONE), was_bound); if (obj) { /* only used during resume => exclusive access */ diff --git a/drivers/gpu/drm/i915/gt/intel_gtt.h b/drivers/gpu/drm/i915/gt/intel_gtt.h index 9aff343beaa8..4d6296cdbcfd 100644 --- a/drivers/gpu/drm/i915/gt/intel_gtt.h +++ b/drivers/gpu/drm/i915/gt/intel_gtt.h @@ -163,8 +163,6 @@ typedef u64 gen8_pte_t; #define MTL_3_COH_2W REG_FIELD_PREP(MTL_PAT_INDEX_COH_MODE_MASK, 3) #define MTL_2_COH_1W REG_FIELD_PREP(MTL_PAT_INDEX_COH_MODE_MASK, 2) -enum i915_cache_level; - struct drm_i915_gem_object; struct i915_fence_reg; struct i915_vma; @@ -232,7 +230,7 @@ struct i915_vma_ops { void (*bind_vma)(struct i915_address_space *vm, struct i915_vm_pt_stash *stash, struct i915_vma_resource *vma_res, - enum i915_cache_level cache_level, + unsigned int pat_index, u32 flags); /* * Unmap an object from an address space. This usually consists of @@ -304,7 +302,7 @@ struct i915_address_space { (*alloc_scratch_dma)(struct i915_address_space *vm, int sz); u64 (*pte_encode)(dma_addr_t addr, - enum i915_cache_level level, + unsigned int pat_index, u32 flags); /* Create a valid PTE */ #define PTE_READ_ONLY BIT(0) #define PTE_LM BIT(1) @@ -319,20 +317,20 @@ struct i915_address_space { void (*insert_page)(struct i915_address_space *vm, dma_addr_t addr, u64 offset, - enum i915_cache_level cache_level, + unsigned int pat_index, u32 flags); void (*insert_entries)(struct i915_address_space *vm, struct i915_vma_resource *vma_res, - enum i915_cache_level cache_level, + unsigned int pat_index, u32 flags); void (*raw_insert_page)(struct i915_address_space *vm, dma_addr_t addr, u64 offset, - enum i915_cache_level cache_level, + unsigned int pat_index, u32 flags); void (*raw_insert_entries)(struct i915_address_space *vm, struct i915_vma_resource *vma_res, - enum i915_cache_level cache_level, + unsigned int pat_index, u32 flags); void (*cleanup)(struct i915_address_space *vm); @@ -579,7 +577,7 @@ void ppgtt_init(struct i915_ppgtt *ppgtt, struct intel_gt *gt, void intel_ggtt_bind_vma(struct i915_address_space *vm, struct i915_vm_pt_stash *stash, struct i915_vma_resource *vma_res, - enum i915_cache_level cache_level, + unsigned int pat_index, u32 flags); void intel_ggtt_unbind_vma(struct i915_address_space *vm, struct i915_vma_resource *vma_res); @@ -657,7 +655,7 @@ void gen6_ggtt_invalidate(struct i915_ggtt *ggtt); void ppgtt_bind_vma(struct i915_address_space *vm, struct i915_vm_pt_stash *stash, struct i915_vma_resource *vma_res, - enum i915_cache_level cache_level, + unsigned int pat_index, u32 flags); void ppgtt_unbind_vma(struct i915_address_space *vm, struct i915_vma_resource *vma_res); diff --git a/drivers/gpu/drm/i915/gt/intel_migrate.c b/drivers/gpu/drm/i915/gt/intel_migrate.c index e0998879a0e1..6023288b0e2d 100644 --- a/drivers/gpu/drm/i915/gt/intel_migrate.c +++ b/drivers/gpu/drm/i915/gt/intel_migrate.c @@ -45,7 +45,9 @@ static void xehpsdv_toggle_pdes(struct i915_address_space *vm, * Insert a dummy PTE into every PT that will map to LMEM to ensure * we have a correctly setup PDE structure for later use. */ - vm->insert_page(vm, 0, d->offset, I915_CACHE_NONE, PTE_LM); + vm->insert_page(vm, 0, d->offset, + i915_gem_get_pat_index(vm->i915, I915_CACHE_NONE), + PTE_LM); GEM_BUG_ON(!pt->is_compact); d->offset += SZ_2M; } @@ -63,7 +65,9 @@ static void xehpsdv_insert_pte(struct i915_address_space *vm, * alignment is 64K underneath for the pt, and we are careful * not to access the space in the void. */ - vm->insert_page(vm, px_dma(pt), d->offset, I915_CACHE_NONE, PTE_LM); + vm->insert_page(vm, px_dma(pt), d->offset, + i915_gem_get_pat_index(vm->i915, I915_CACHE_NONE), + PTE_LM); d->offset += SZ_64K; } @@ -73,7 +77,8 @@ static void insert_pte(struct i915_address_space *vm, { struct insert_pte_data *d = data; - vm->insert_page(vm, px_dma(pt), d->offset, I915_CACHE_NONE, + vm->insert_page(vm, px_dma(pt), d->offset, + i915_gem_get_pat_index(vm->i915, I915_CACHE_NONE), i915_gem_object_is_lmem(pt->base) ? PTE_LM : 0); d->offset += PAGE_SIZE; } @@ -356,13 +361,13 @@ static int max_pte_pkt_size(struct i915_request *rq, int pkt) static int emit_pte(struct i915_request *rq, struct sgt_dma *it, - enum i915_cache_level cache_level, + unsigned int pat_index, bool is_lmem, u64 offset, int length) { bool has_64K_pages = HAS_64K_PAGES(rq->engine->i915); - const u64 encode = rq->context->vm->pte_encode(0, cache_level, + const u64 encode = rq->context->vm->pte_encode(0, pat_index, is_lmem ? PTE_LM : 0); struct intel_ring *ring = rq->ring; int pkt, dword_length; @@ -673,17 +678,17 @@ int intel_context_migrate_copy(struct intel_context *ce, const struct i915_deps *deps, struct scatterlist *src, - enum i915_cache_level src_cache_level, + unsigned int src_pat_index, bool src_is_lmem, struct scatterlist *dst, - enum i915_cache_level dst_cache_level, + unsigned int dst_pat_index, bool dst_is_lmem, struct i915_request **out) { struct sgt_dma it_src = sg_sgt(src), it_dst = sg_sgt(dst), it_ccs; struct drm_i915_private *i915 = ce->engine->i915; u64 ccs_bytes_to_cpy = 0, bytes_to_cpy; - enum i915_cache_level ccs_cache_level; + unsigned int ccs_pat_index; u32 src_offset, dst_offset; u8 src_access, dst_access; struct i915_request *rq; @@ -707,12 +712,12 @@ intel_context_migrate_copy(struct intel_context *ce, dst_sz = scatter_list_length(dst); if (src_is_lmem) { it_ccs = it_dst; - ccs_cache_level = dst_cache_level; + ccs_pat_index = dst_pat_index; ccs_is_src = false; } else if (dst_is_lmem) { bytes_to_cpy = dst_sz; it_ccs = it_src; - ccs_cache_level = src_cache_level; + ccs_pat_index = src_pat_index; ccs_is_src = true; } @@ -773,7 +778,7 @@ intel_context_migrate_copy(struct intel_context *ce, src_sz = calculate_chunk_sz(i915, src_is_lmem, bytes_to_cpy, ccs_bytes_to_cpy); - len = emit_pte(rq, &it_src, src_cache_level, src_is_lmem, + len = emit_pte(rq, &it_src, src_pat_index, src_is_lmem, src_offset, src_sz); if (!len) { err = -EINVAL; @@ -784,7 +789,7 @@ intel_context_migrate_copy(struct intel_context *ce, goto out_rq; } - err = emit_pte(rq, &it_dst, dst_cache_level, dst_is_lmem, + err = emit_pte(rq, &it_dst, dst_pat_index, dst_is_lmem, dst_offset, len); if (err < 0) goto out_rq; @@ -811,7 +816,7 @@ intel_context_migrate_copy(struct intel_context *ce, goto out_rq; ccs_sz = GET_CCS_BYTES(i915, len); - err = emit_pte(rq, &it_ccs, ccs_cache_level, false, + err = emit_pte(rq, &it_ccs, ccs_pat_index, false, ccs_is_src ? src_offset : dst_offset, ccs_sz); if (err < 0) @@ -979,7 +984,7 @@ int intel_context_migrate_clear(struct intel_context *ce, const struct i915_deps *deps, struct scatterlist *sg, - enum i915_cache_level cache_level, + unsigned int pat_index, bool is_lmem, u32 value, struct i915_request **out) @@ -1027,7 +1032,7 @@ intel_context_migrate_clear(struct intel_context *ce, if (err) goto out_rq; - len = emit_pte(rq, &it, cache_level, is_lmem, offset, CHUNK_SZ); + len = emit_pte(rq, &it, pat_index, is_lmem, offset, CHUNK_SZ); if (len <= 0) { err = len; goto out_rq; @@ -1074,10 +1079,10 @@ int intel_migrate_copy(struct intel_migrate *m, struct i915_gem_ww_ctx *ww, const struct i915_deps *deps, struct scatterlist *src, - enum i915_cache_level src_cache_level, + unsigned int src_pat_index, bool src_is_lmem, struct scatterlist *dst, - enum i915_cache_level dst_cache_level, + unsigned int dst_pat_index, bool dst_is_lmem, struct i915_request **out) { @@ -1098,8 +1103,8 @@ int intel_migrate_copy(struct intel_migrate *m, goto out; err = intel_context_migrate_copy(ce, deps, - src, src_cache_level, src_is_lmem, - dst, dst_cache_level, dst_is_lmem, + src, src_pat_index, src_is_lmem, + dst, dst_pat_index, dst_is_lmem, out); intel_context_unpin(ce); @@ -1113,7 +1118,7 @@ intel_migrate_clear(struct intel_migrate *m, struct i915_gem_ww_ctx *ww, const struct i915_deps *deps, struct scatterlist *sg, - enum i915_cache_level cache_level, + unsigned int pat_index, bool is_lmem, u32 value, struct i915_request **out) @@ -1134,7 +1139,7 @@ intel_migrate_clear(struct intel_migrate *m, if (err) goto out; - err = intel_context_migrate_clear(ce, deps, sg, cache_level, + err = intel_context_migrate_clear(ce, deps, sg, pat_index, is_lmem, value, out); intel_context_unpin(ce); diff --git a/drivers/gpu/drm/i915/gt/intel_migrate.h b/drivers/gpu/drm/i915/gt/intel_migrate.h index ccc677ec4aa3..11fc09a00c4b 100644 --- a/drivers/gpu/drm/i915/gt/intel_migrate.h +++ b/drivers/gpu/drm/i915/gt/intel_migrate.h @@ -16,7 +16,6 @@ struct i915_request; struct i915_gem_ww_ctx; struct intel_gt; struct scatterlist; -enum i915_cache_level; int intel_migrate_init(struct intel_migrate *m, struct intel_gt *gt); @@ -26,20 +25,20 @@ int intel_migrate_copy(struct intel_migrate *m, struct i915_gem_ww_ctx *ww, const struct i915_deps *deps, struct scatterlist *src, - enum i915_cache_level src_cache_level, + unsigned int src_pat_index, bool src_is_lmem, struct scatterlist *dst, - enum i915_cache_level dst_cache_level, + unsigned int dst_pat_index, bool dst_is_lmem, struct i915_request **out); int intel_context_migrate_copy(struct intel_context *ce, const struct i915_deps *deps, struct scatterlist *src, - enum i915_cache_level src_cache_level, + unsigned int src_pat_index, bool src_is_lmem, struct scatterlist *dst, - enum i915_cache_level dst_cache_level, + unsigned int dst_pat_index, bool dst_is_lmem, struct i915_request **out); @@ -48,7 +47,7 @@ intel_migrate_clear(struct intel_migrate *m, struct i915_gem_ww_ctx *ww, const struct i915_deps *deps, struct scatterlist *sg, - enum i915_cache_level cache_level, + unsigned int pat_index, bool is_lmem, u32 value, struct i915_request **out); @@ -56,7 +55,7 @@ int intel_context_migrate_clear(struct intel_context *ce, const struct i915_deps *deps, struct scatterlist *sg, - enum i915_cache_level cache_level, + unsigned int pat_index, bool is_lmem, u32 value, struct i915_request **out); diff --git a/drivers/gpu/drm/i915/gt/intel_ppgtt.c b/drivers/gpu/drm/i915/gt/intel_ppgtt.c index 7ecfa672f738..436756bfbb1a 100644 --- a/drivers/gpu/drm/i915/gt/intel_ppgtt.c +++ b/drivers/gpu/drm/i915/gt/intel_ppgtt.c @@ -181,7 +181,7 @@ struct i915_ppgtt *i915_ppgtt_create(struct intel_gt *gt, void ppgtt_bind_vma(struct i915_address_space *vm, struct i915_vm_pt_stash *stash, struct i915_vma_resource *vma_res, - enum i915_cache_level cache_level, + unsigned int pat_index, u32 flags) { u32 pte_flags; @@ -199,7 +199,7 @@ void ppgtt_bind_vma(struct i915_address_space *vm, if (vma_res->bi.lmem) pte_flags |= PTE_LM; - vm->insert_entries(vm, vma_res, cache_level, pte_flags); + vm->insert_entries(vm, vma_res, pat_index, pte_flags); wmb(); } diff --git a/drivers/gpu/drm/i915/gt/selftest_migrate.c b/drivers/gpu/drm/i915/gt/selftest_migrate.c index e677f2da093d..3def5ca72dec 100644 --- a/drivers/gpu/drm/i915/gt/selftest_migrate.c +++ b/drivers/gpu/drm/i915/gt/selftest_migrate.c @@ -137,7 +137,7 @@ static int copy(struct intel_migrate *migrate, static int intel_context_copy_ccs(struct intel_context *ce, const struct i915_deps *deps, struct scatterlist *sg, - enum i915_cache_level cache_level, + unsigned int pat_index, bool write_to_ccs, struct i915_request **out) { @@ -185,7 +185,7 @@ static int intel_context_copy_ccs(struct intel_context *ce, if (err) goto out_rq; - len = emit_pte(rq, &it, cache_level, true, offset, CHUNK_SZ); + len = emit_pte(rq, &it, pat_index, true, offset, CHUNK_SZ); if (len <= 0) { err = len; goto out_rq; @@ -223,7 +223,7 @@ intel_migrate_ccs_copy(struct intel_migrate *m, struct i915_gem_ww_ctx *ww, const struct i915_deps *deps, struct scatterlist *sg, - enum i915_cache_level cache_level, + unsigned int pat_index, bool write_to_ccs, struct i915_request **out) { @@ -243,7 +243,7 @@ intel_migrate_ccs_copy(struct intel_migrate *m, if (err) goto out; - err = intel_context_copy_ccs(ce, deps, sg, cache_level, + err = intel_context_copy_ccs(ce, deps, sg, pat_index, write_to_ccs, out); intel_context_unpin(ce); @@ -300,7 +300,7 @@ static int clear(struct intel_migrate *migrate, /* Write the obj data into ccs surface */ err = intel_migrate_ccs_copy(migrate, &ww, NULL, obj->mm.pages->sgl, - obj->cache_level, + obj->pat_index, true, &rq); if (rq && !err) { if (i915_request_wait(rq, 0, HZ) < 0) { @@ -351,7 +351,7 @@ static int clear(struct intel_migrate *migrate, err = intel_migrate_ccs_copy(migrate, &ww, NULL, obj->mm.pages->sgl, - obj->cache_level, + obj->pat_index, false, &rq); if (rq && !err) { if (i915_request_wait(rq, 0, HZ) < 0) { @@ -414,9 +414,9 @@ static int __migrate_copy(struct intel_migrate *migrate, struct i915_request **out) { return intel_migrate_copy(migrate, ww, NULL, - src->mm.pages->sgl, src->cache_level, + src->mm.pages->sgl, src->pat_index, i915_gem_object_is_lmem(src), - dst->mm.pages->sgl, dst->cache_level, + dst->mm.pages->sgl, dst->pat_index, i915_gem_object_is_lmem(dst), out); } @@ -428,9 +428,9 @@ static int __global_copy(struct intel_migrate *migrate, struct i915_request **out) { return intel_context_migrate_copy(migrate->context, NULL, - src->mm.pages->sgl, src->cache_level, + src->mm.pages->sgl, src->pat_index, i915_gem_object_is_lmem(src), - dst->mm.pages->sgl, dst->cache_level, + dst->mm.pages->sgl, dst->pat_index, i915_gem_object_is_lmem(dst), out); } @@ -455,7 +455,7 @@ static int __migrate_clear(struct intel_migrate *migrate, { return intel_migrate_clear(migrate, ww, NULL, obj->mm.pages->sgl, - obj->cache_level, + obj->pat_index, i915_gem_object_is_lmem(obj), value, out); } @@ -468,7 +468,7 @@ static int __global_clear(struct intel_migrate *migrate, { return intel_context_migrate_clear(migrate->context, NULL, obj->mm.pages->sgl, - obj->cache_level, + obj->pat_index, i915_gem_object_is_lmem(obj), value, out); } @@ -648,7 +648,7 @@ static int live_emit_pte_full_ring(void *arg) */ pr_info("%s emite_pte ring space=%u\n", __func__, rq->ring->space); it = sg_sgt(obj->mm.pages->sgl); - len = emit_pte(rq, &it, obj->cache_level, false, 0, CHUNK_SZ); + len = emit_pte(rq, &it, obj->pat_index, false, 0, CHUNK_SZ); if (!len) { err = -EINVAL; goto out_rq; @@ -844,7 +844,7 @@ static int wrap_ktime_compare(const void *A, const void *B) static int __perf_clear_blt(struct intel_context *ce, struct scatterlist *sg, - enum i915_cache_level cache_level, + unsigned int pat_index, bool is_lmem, size_t sz) { @@ -858,7 +858,7 @@ static int __perf_clear_blt(struct intel_context *ce, t0 = ktime_get(); - err = intel_context_migrate_clear(ce, NULL, sg, cache_level, + err = intel_context_migrate_clear(ce, NULL, sg, pat_index, is_lmem, 0, &rq); if (rq) { if (i915_request_wait(rq, 0, MAX_SCHEDULE_TIMEOUT) < 0) @@ -904,7 +904,8 @@ static int perf_clear_blt(void *arg) err = __perf_clear_blt(gt->migrate.context, dst->mm.pages->sgl, - I915_CACHE_NONE, + i915_gem_get_pat_index(gt->i915, + I915_CACHE_NONE), i915_gem_object_is_lmem(dst), sizes[i]); @@ -919,10 +920,10 @@ static int perf_clear_blt(void *arg) static int __perf_copy_blt(struct intel_context *ce, struct scatterlist *src, - enum i915_cache_level src_cache_level, + unsigned int src_pat_index, bool src_is_lmem, struct scatterlist *dst, - enum i915_cache_level dst_cache_level, + unsigned int dst_pat_index, bool dst_is_lmem, size_t sz) { @@ -937,9 +938,9 @@ static int __perf_copy_blt(struct intel_context *ce, t0 = ktime_get(); err = intel_context_migrate_copy(ce, NULL, - src, src_cache_level, + src, src_pat_index, src_is_lmem, - dst, dst_cache_level, + dst, dst_pat_index, dst_is_lmem, &rq); if (rq) { @@ -994,10 +995,12 @@ static int perf_copy_blt(void *arg) err = __perf_copy_blt(gt->migrate.context, src->mm.pages->sgl, - I915_CACHE_NONE, + i915_gem_get_pat_index(gt->i915, + I915_CACHE_NONE), i915_gem_object_is_lmem(src), dst->mm.pages->sgl, - I915_CACHE_NONE, + i915_gem_get_pat_index(gt->i915, + I915_CACHE_NONE), i915_gem_object_is_lmem(dst), sz); diff --git a/drivers/gpu/drm/i915/gt/selftest_reset.c b/drivers/gpu/drm/i915/gt/selftest_reset.c index a9e0a91bc0e0..79aa6ac66ad2 100644 --- a/drivers/gpu/drm/i915/gt/selftest_reset.c +++ b/drivers/gpu/drm/i915/gt/selftest_reset.c @@ -86,7 +86,9 @@ __igt_reset_stolen(struct intel_gt *gt, ggtt->vm.insert_page(&ggtt->vm, dma, ggtt->error_capture.start, - I915_CACHE_NONE, 0); + i915_gem_get_pat_index(gt->i915, + I915_CACHE_NONE), + 0); mb(); s = io_mapping_map_wc(&ggtt->iomap, @@ -127,7 +129,9 @@ __igt_reset_stolen(struct intel_gt *gt, ggtt->vm.insert_page(&ggtt->vm, dma, ggtt->error_capture.start, - I915_CACHE_NONE, 0); + i915_gem_get_pat_index(gt->i915, + I915_CACHE_NONE), + 0); mb(); s = io_mapping_map_wc(&ggtt->iomap, diff --git a/drivers/gpu/drm/i915/gt/selftest_timeline.c b/drivers/gpu/drm/i915/gt/selftest_timeline.c index 9f536c251179..39c3ec12df1a 100644 --- a/drivers/gpu/drm/i915/gt/selftest_timeline.c +++ b/drivers/gpu/drm/i915/gt/selftest_timeline.c @@ -836,7 +836,7 @@ static int setup_watcher(struct hwsp_watcher *w, struct intel_gt *gt, return PTR_ERR(obj); /* keep the same cache settings as timeline */ - i915_gem_object_set_cache_coherency(obj, tl->hwsp_ggtt->obj->cache_level); + i915_gem_object_set_pat_index(obj, tl->hwsp_ggtt->obj->pat_index); w->map = i915_gem_object_pin_map_unlocked(obj, page_unmask_bits(tl->hwsp_ggtt->obj->mm.mapping)); if (IS_ERR(w->map)) { diff --git a/drivers/gpu/drm/i915/gt/selftest_tlb.c b/drivers/gpu/drm/i915/gt/selftest_tlb.c index e6cac1f15d6e..4493c8518e91 100644 --- a/drivers/gpu/drm/i915/gt/selftest_tlb.c +++ b/drivers/gpu/drm/i915/gt/selftest_tlb.c @@ -36,6 +36,8 @@ pte_tlbinv(struct intel_context *ce, u64 length, struct rnd_state *prng) { + const unsigned int pat_index = + i915_gem_get_pat_index(ce->vm->i915, I915_CACHE_NONE); struct drm_i915_gem_object *batch; struct drm_mm_node vb_node; struct i915_request *rq; @@ -155,7 +157,7 @@ pte_tlbinv(struct intel_context *ce, /* Flip the PTE between A and B */ if (i915_gem_object_is_lmem(vb->obj)) pte_flags |= PTE_LM; - ce->vm->insert_entries(ce->vm, &vb_res, 0, pte_flags); + ce->vm->insert_entries(ce->vm, &vb_res, pat_index, pte_flags); /* Flush the PTE update to concurrent HW */ tlbinv(ce->vm, addr & -length, length); diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c index 4ec7df9ed5ff..e34c79120090 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c @@ -957,9 +957,15 @@ static void uc_fw_bind_ggtt(struct intel_uc_fw *uc_fw) pte_flags |= PTE_LM; if (ggtt->vm.raw_insert_entries) - ggtt->vm.raw_insert_entries(&ggtt->vm, dummy, I915_CACHE_NONE, pte_flags); + ggtt->vm.raw_insert_entries(&ggtt->vm, dummy, + i915_gem_get_pat_index(ggtt->vm.i915, + I915_CACHE_NONE), + pte_flags); else - ggtt->vm.insert_entries(&ggtt->vm, dummy, I915_CACHE_NONE, pte_flags); + ggtt->vm.insert_entries(&ggtt->vm, dummy, + i915_gem_get_pat_index(ggtt->vm.i915, + I915_CACHE_NONE), + pte_flags); } static void uc_fw_unbind_ggtt(struct intel_uc_fw *uc_fw) diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 80c2bf98e341..e39937c82d73 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -138,21 +138,54 @@ static const char *stringify_vma_type(const struct i915_vma *vma) return "ppgtt"; } -static const char *i915_cache_level_str(struct drm_i915_private *i915, int type) +static const char *i915_cache_level_str(struct drm_i915_gem_object *obj) { - switch (type) { - case I915_CACHE_NONE: return " uncached"; - case I915_CACHE_LLC: return HAS_LLC(i915) ? " LLC" : " snooped"; - case I915_CACHE_L3_LLC: return " L3+LLC"; - case I915_CACHE_WT: return " WT"; - default: return ""; + struct drm_i915_private *i915 = obj_to_i915(obj); + + if (IS_METEORLAKE(i915)) { + switch (obj->pat_index) { + case 0: return " WB"; + case 1: return " WT"; + case 2: return " UC"; + case 3: return " WB (1-Way Coh)"; + case 4: return " WB (2-Way Coh)"; + default: return " not defined"; + } + } else if (IS_PONTEVECCHIO(i915)) { + switch (obj->pat_index) { + case 0: return " UC"; + case 1: return " WC"; + case 2: return " WT"; + case 3: return " WB"; + case 4: return " WT (CLOS1)"; + case 5: return " WB (CLOS1)"; + case 6: return " WT (CLOS2)"; + case 7: return " WT (CLOS2)"; + default: return " not defined"; + } + } else if (GRAPHICS_VER(i915) >= 12) { + switch (obj->pat_index) { + case 0: return " WB"; + case 1: return " WC"; + case 2: return " WT"; + case 3: return " UC"; + default: return " not defined"; + } + } else { + switch (obj->pat_index) { + case 0: return " UC"; + case 1: return HAS_LLC(i915) ? + " LLC" : " snooped"; + case 2: return " L3+LLC"; + case 3: return " WT"; + default: return " not defined"; + } } } void i915_debugfs_describe_obj(struct seq_file *m, struct drm_i915_gem_object *obj) { - struct drm_i915_private *dev_priv = to_i915(obj->base.dev); struct i915_vma *vma; int pin_count = 0; @@ -164,7 +197,7 @@ i915_debugfs_describe_obj(struct seq_file *m, struct drm_i915_gem_object *obj) obj->base.size / 1024, obj->read_domains, obj->write_domain, - i915_cache_level_str(dev_priv, obj->cache_level), + i915_cache_level_str(obj), obj->mm.dirty ? " dirty" : "", obj->mm.madv == I915_MADV_DONTNEED ? " purgeable" : ""); if (obj->base.name) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 0a78bdbd36b1..e70b762f0b03 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -420,8 +420,11 @@ i915_gem_gtt_pread(struct drm_i915_gem_object *obj, page_length = remain < page_length ? remain : page_length; if (drm_mm_node_allocated(&node)) { ggtt->vm.insert_page(&ggtt->vm, - i915_gem_object_get_dma_address(obj, offset >> PAGE_SHIFT), - node.start, I915_CACHE_NONE, 0); + i915_gem_object_get_dma_address(obj, + offset >> PAGE_SHIFT), + node.start, + i915_gem_get_pat_index(i915, + I915_CACHE_NONE), 0); } else { page_base += offset & PAGE_MASK; } @@ -598,8 +601,11 @@ i915_gem_gtt_pwrite_fast(struct drm_i915_gem_object *obj, /* flush the write before we modify the GGTT */ intel_gt_flush_ggtt_writes(ggtt->vm.gt); ggtt->vm.insert_page(&ggtt->vm, - i915_gem_object_get_dma_address(obj, offset >> PAGE_SHIFT), - node.start, I915_CACHE_NONE, 0); + i915_gem_object_get_dma_address(obj, + offset >> PAGE_SHIFT), + node.start, + i915_gem_get_pat_index(i915, + I915_CACHE_NONE), 0); wmb(); /* flush modifications to the GGTT (insert_page) */ } else { page_base += offset & PAGE_MASK; @@ -1142,6 +1148,19 @@ int i915_gem_init(struct drm_i915_private *dev_priv) unsigned int i; int ret; + /* + * In the proccess of replacing cache_level with pat_index a tricky + * dependency is created on the definition of the enum i915_cache_level. + * in case this enum is changed, PTE encode would be broken. + * Add a WARNING here. And remove when we completely quit using this + * enum + */ + BUILD_BUG_ON(I915_CACHE_NONE != 0 || + I915_CACHE_LLC != 1 || + I915_CACHE_L3_LLC != 2 || + I915_CACHE_WT != 3 || + I915_MAX_CACHE_LEVEL != 4); + /* We need to fallback to 4K pages if host doesn't support huge gtt. */ if (intel_vgpu_active(dev_priv) && !intel_vgpu_has_huge_gtt(dev_priv)) RUNTIME_INFO(dev_priv)->page_sizes = I915_GTT_PAGE_SIZE_4K; diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c index 8ffdd7f47e93..566536499954 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.c +++ b/drivers/gpu/drm/i915/i915_gpu_error.c @@ -1122,10 +1122,14 @@ i915_vma_coredump_create(const struct intel_gt *gt, mutex_lock(&ggtt->error_mutex); if (ggtt->vm.raw_insert_page) ggtt->vm.raw_insert_page(&ggtt->vm, dma, slot, - I915_CACHE_NONE, 0); + i915_gem_get_pat_index(gt->i915, + I915_CACHE_NONE), + 0); else ggtt->vm.insert_page(&ggtt->vm, dma, slot, - I915_CACHE_NONE, 0); + i915_gem_get_pat_index(gt->i915, + I915_CACHE_NONE), + 0); mb(); s = io_mapping_map_wc(&ggtt->iomap, slot, PAGE_SIZE); diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c index 20a44788999e..a814775a363d 100644 --- a/drivers/gpu/drm/i915/i915_vma.c +++ b/drivers/gpu/drm/i915/i915_vma.c @@ -315,7 +315,7 @@ struct i915_vma_work { struct i915_vma_resource *vma_res; struct drm_i915_gem_object *obj; struct i915_sw_dma_fence_cb cb; - enum i915_cache_level cache_level; + unsigned int pat_index; unsigned int flags; }; @@ -334,7 +334,7 @@ static void __vma_bind(struct dma_fence_work *work) return; vma_res->ops->bind_vma(vma_res->vm, &vw->stash, - vma_res, vw->cache_level, vw->flags); + vma_res, vw->pat_index, vw->flags); } static void __vma_release(struct dma_fence_work *work) @@ -426,7 +426,7 @@ i915_vma_resource_init_from_vma(struct i915_vma_resource *vma_res, /** * i915_vma_bind - Sets up PTEs for an VMA in it's corresponding address space. * @vma: VMA to map - * @cache_level: mapping cache level + * @pat_index: PAT index to set in PTE * @flags: flags like global or local mapping * @work: preallocated worker for allocating and binding the PTE * @vma_res: pointer to a preallocated vma resource. The resource is either @@ -437,7 +437,7 @@ i915_vma_resource_init_from_vma(struct i915_vma_resource *vma_res, * Note that DMA addresses are also the only part of the SG table we care about. */ int i915_vma_bind(struct i915_vma *vma, - enum i915_cache_level cache_level, + unsigned int pat_index, u32 flags, struct i915_vma_work *work, struct i915_vma_resource *vma_res) @@ -507,7 +507,7 @@ int i915_vma_bind(struct i915_vma *vma, struct dma_fence *prev; work->vma_res = i915_vma_resource_get(vma->resource); - work->cache_level = cache_level; + work->pat_index = pat_index; work->flags = bind_flags; /* @@ -537,7 +537,7 @@ int i915_vma_bind(struct i915_vma *vma, return ret; } - vma->ops->bind_vma(vma->vm, NULL, vma->resource, cache_level, + vma->ops->bind_vma(vma->vm, NULL, vma->resource, pat_index, bind_flags); } @@ -814,7 +814,7 @@ i915_vma_insert(struct i915_vma *vma, struct i915_gem_ww_ctx *ww, color = 0; if (i915_vm_has_cache_coloring(vma->vm)) - color = vma->obj->cache_level; + color = vma->obj->pat_index; if (flags & PIN_OFFSET_FIXED) { u64 offset = flags & PIN_OFFSET_MASK; @@ -1518,7 +1518,7 @@ int i915_vma_pin_ww(struct i915_vma *vma, struct i915_gem_ww_ctx *ww, GEM_BUG_ON(!vma->pages); err = i915_vma_bind(vma, - vma->obj->cache_level, + vma->obj->pat_index, flags, work, vma_res); vma_res = NULL; if (err) diff --git a/drivers/gpu/drm/i915/i915_vma.h b/drivers/gpu/drm/i915/i915_vma.h index 38c8c66ed724..9a9729205d5b 100644 --- a/drivers/gpu/drm/i915/i915_vma.h +++ b/drivers/gpu/drm/i915/i915_vma.h @@ -250,7 +250,7 @@ i915_vma_compare(struct i915_vma *vma, struct i915_vma_work *i915_vma_work(void); int i915_vma_bind(struct i915_vma *vma, - enum i915_cache_level cache_level, + unsigned int pat_index, u32 flags, struct i915_vma_work *work, struct i915_vma_resource *vma_res); diff --git a/drivers/gpu/drm/i915/i915_vma_types.h b/drivers/gpu/drm/i915/i915_vma_types.h index 77fda2244d16..64472b7f0e77 100644 --- a/drivers/gpu/drm/i915/i915_vma_types.h +++ b/drivers/gpu/drm/i915/i915_vma_types.h @@ -32,8 +32,6 @@ #include "gem/i915_gem_object_types.h" -enum i915_cache_level; - /** * DOC: Global GTT views * diff --git a/drivers/gpu/drm/i915/selftests/i915_gem.c b/drivers/gpu/drm/i915/selftests/i915_gem.c index d91d0ade8abd..61da4ed9d521 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem.c @@ -57,7 +57,10 @@ static void trash_stolen(struct drm_i915_private *i915) u32 __iomem *s; int x; - ggtt->vm.insert_page(&ggtt->vm, dma, slot, I915_CACHE_NONE, 0); + ggtt->vm.insert_page(&ggtt->vm, dma, slot, + i915_gem_get_pat_index(i915, + I915_CACHE_NONE), + 0); s = io_mapping_map_atomic_wc(&ggtt->iomap, slot); for (x = 0; x < PAGE_SIZE / sizeof(u32); x++) { diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_evict.c b/drivers/gpu/drm/i915/selftests/i915_gem_evict.c index f68ef4074088..f8fe3681c3dc 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_evict.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_evict.c @@ -246,7 +246,7 @@ static int igt_evict_for_cache_color(void *arg) struct drm_mm_node target = { .start = I915_GTT_PAGE_SIZE * 2, .size = I915_GTT_PAGE_SIZE, - .color = I915_CACHE_LLC, + .color = i915_gem_get_pat_index(gt->i915, I915_CACHE_LLC), }; struct drm_i915_gem_object *obj; struct i915_vma *vma; @@ -309,7 +309,7 @@ static int igt_evict_for_cache_color(void *arg) /* Attempt to remove the first *pinned* vma, by removing the (empty) * neighbour -- this should fail. */ - target.color = I915_CACHE_L3_LLC; + target.color = i915_gem_get_pat_index(gt->i915, I915_CACHE_L3_LLC); mutex_lock(&ggtt->vm.mutex); err = i915_gem_evict_for_node(&ggtt->vm, NULL, &target, 0); diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c index 154801f1c468..36940ef10108 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c @@ -135,7 +135,7 @@ fake_dma_object(struct drm_i915_private *i915, u64 size) obj->write_domain = I915_GEM_DOMAIN_CPU; obj->read_domains = I915_GEM_DOMAIN_CPU; - obj->cache_level = I915_CACHE_NONE; + obj->pat_index = i915_gem_get_pat_index(i915, I915_CACHE_NONE); /* Preallocate the "backing storage" */ if (i915_gem_object_pin_pages_unlocked(obj)) @@ -359,7 +359,9 @@ static int lowlevel_hole(struct i915_address_space *vm, with_intel_runtime_pm(vm->gt->uncore->rpm, wakeref) vm->insert_entries(vm, mock_vma_res, - I915_CACHE_NONE, 0); + i915_gem_get_pat_index(vm->i915, + I915_CACHE_NONE), + 0); } count = n; @@ -1377,7 +1379,10 @@ static int igt_ggtt_page(void *arg) ggtt->vm.insert_page(&ggtt->vm, i915_gem_object_get_dma_address(obj, 0), - offset, I915_CACHE_NONE, 0); + offset, + i915_gem_get_pat_index(i915, + I915_CACHE_NONE), + 0); } order = i915_random_order(count, &prng); @@ -1510,7 +1515,7 @@ static int reserve_gtt_with_resource(struct i915_vma *vma, u64 offset) mutex_lock(&vm->mutex); err = i915_gem_gtt_reserve(vm, NULL, &vma->node, obj->base.size, offset, - obj->cache_level, + obj->pat_index, 0); if (!err) { i915_vma_resource_init_from_vma(vma_res, vma); @@ -1690,7 +1695,7 @@ static int insert_gtt_with_resource(struct i915_vma *vma) mutex_lock(&vm->mutex); err = i915_gem_gtt_insert(vm, NULL, &vma->node, obj->base.size, 0, - obj->cache_level, 0, vm->total, 0); + obj->pat_index, 0, vm->total, 0); if (!err) { i915_vma_resource_init_from_vma(vma_res, vma); vma->resource = vma_res; diff --git a/drivers/gpu/drm/i915/selftests/intel_memory_region.c b/drivers/gpu/drm/i915/selftests/intel_memory_region.c index 3b18e5905c86..d985d9bae2e8 100644 --- a/drivers/gpu/drm/i915/selftests/intel_memory_region.c +++ b/drivers/gpu/drm/i915/selftests/intel_memory_region.c @@ -1070,7 +1070,9 @@ static int igt_lmem_write_cpu(void *arg) /* Put the pages into a known state -- from the gpu for added fun */ intel_engine_pm_get(engine); err = intel_context_migrate_clear(engine->gt->migrate.context, NULL, - obj->mm.pages->sgl, I915_CACHE_NONE, + obj->mm.pages->sgl, + i915_gem_get_pat_index(i915, + I915_CACHE_NONE), true, 0xdeadbeaf, &rq); if (rq) { dma_resv_add_fence(obj->base.resv, &rq->fence, diff --git a/drivers/gpu/drm/i915/selftests/mock_gtt.c b/drivers/gpu/drm/i915/selftests/mock_gtt.c index ece97e4faacb..a516c0aa88fd 100644 --- a/drivers/gpu/drm/i915/selftests/mock_gtt.c +++ b/drivers/gpu/drm/i915/selftests/mock_gtt.c @@ -27,21 +27,21 @@ static void mock_insert_page(struct i915_address_space *vm, dma_addr_t addr, u64 offset, - enum i915_cache_level level, + unsigned int pat_index, u32 flags) { } static void mock_insert_entries(struct i915_address_space *vm, struct i915_vma_resource *vma_res, - enum i915_cache_level level, u32 flags) + unsigned int pat_index, u32 flags) { } static void mock_bind_ppgtt(struct i915_address_space *vm, struct i915_vm_pt_stash *stash, struct i915_vma_resource *vma_res, - enum i915_cache_level cache_level, + unsigned int pat_index, u32 flags) { GEM_BUG_ON(flags & I915_VMA_GLOBAL_BIND); @@ -94,7 +94,7 @@ struct i915_ppgtt *mock_ppgtt(struct drm_i915_private *i915, const char *name) static void mock_bind_ggtt(struct i915_address_space *vm, struct i915_vm_pt_stash *stash, struct i915_vma_resource *vma_res, - enum i915_cache_level cache_level, + unsigned int pat_index, u32 flags) { } From d6bf74de0772e233f97a82cc4dad2ec14b14fb28 Mon Sep 17 00:00:00 2001 From: Alan Previn Date: Thu, 11 May 2023 16:17:31 -0700 Subject: [PATCH 083/108] drm/i915/pxp: Add GSC-CS back-end resource init and cleanup For MTL, the PXP back-end transport uses the GSC engine to submit HECI packets through the HW to the GSC firmware for PXP arb session management. This submission uses a non-priveleged batch buffer, a buffer for the command packet and of course a context targeting the GSC-CS. Thus for MTL, we need to allocate and free a set of execution submission resources for the management of the arbitration session. Lets start with the context creation first since that object and its usage is very straight-forward. We'll add the buffer allocation and freeing later when we introduce the gsccs' send-message function. Do this one time allocation of gsccs specific resources in a new gsccs source file with intel_pxp_gsccs_init / fini functions and hook them up from the PXP front-end. Signed-off-by: Alan Previn Reviewed-by: Daniele Ceraolo Spurio Signed-off-by: Radhakrishna Sripada Link: https://patchwork.freedesktop.org/patch/msgid/20230511231738.1077674-2-alan.previn.teres.alexis@intel.com --- drivers/gpu/drm/i915/Makefile | 1 + drivers/gpu/drm/i915/pxp/intel_pxp.c | 20 +++++-- drivers/gpu/drm/i915/pxp/intel_pxp_gsccs.c | 63 ++++++++++++++++++++++ drivers/gpu/drm/i915/pxp/intel_pxp_gsccs.h | 29 ++++++++++ drivers/gpu/drm/i915/pxp/intel_pxp_tee.c | 2 - drivers/gpu/drm/i915/pxp/intel_pxp_types.h | 8 +++ 6 files changed, 117 insertions(+), 6 deletions(-) create mode 100644 drivers/gpu/drm/i915/pxp/intel_pxp_gsccs.c create mode 100644 drivers/gpu/drm/i915/pxp/intel_pxp_gsccs.h diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile index 4494dcd2eb5d..c58d7b193664 100644 --- a/drivers/gpu/drm/i915/Makefile +++ b/drivers/gpu/drm/i915/Makefile @@ -339,6 +339,7 @@ i915-y += \ i915-$(CONFIG_DRM_I915_PXP) += \ pxp/intel_pxp_cmd.o \ pxp/intel_pxp_debugfs.o \ + pxp/intel_pxp_gsccs.o \ pxp/intel_pxp_irq.o \ pxp/intel_pxp_pm.o \ pxp/intel_pxp_session.o diff --git a/drivers/gpu/drm/i915/pxp/intel_pxp.c b/drivers/gpu/drm/i915/pxp/intel_pxp.c index 9d4c7724e98e..f93aa171aa1e 100644 --- a/drivers/gpu/drm/i915/pxp/intel_pxp.c +++ b/drivers/gpu/drm/i915/pxp/intel_pxp.c @@ -12,6 +12,7 @@ #include "i915_drv.h" #include "intel_pxp.h" +#include "intel_pxp_gsccs.h" #include "intel_pxp_irq.h" #include "intel_pxp_session.h" #include "intel_pxp_tee.h" @@ -132,7 +133,10 @@ static void pxp_init_full(struct intel_pxp *pxp) if (ret) return; - ret = intel_pxp_tee_component_init(pxp); + if (HAS_ENGINE(pxp->ctrl_gt, GSC0)) + ret = intel_pxp_gsccs_init(pxp); + else + ret = intel_pxp_tee_component_init(pxp); if (ret) goto out_context; @@ -165,9 +169,12 @@ static struct intel_gt *find_gt_for_required_protected_content(struct drm_i915_p /* * For MTL onwards, PXP-controller-GT needs to have a valid GSC engine * on the media GT. NOTE: if we have a media-tile with a GSC-engine, - * the VDBOX is already present so skip that check + * the VDBOX is already present so skip that check. We also have to + * ensure the GSC and HUC firmware are coming online */ - if (i915->media_gt && HAS_ENGINE(i915->media_gt, GSC0)) + if (i915->media_gt && HAS_ENGINE(i915->media_gt, GSC0) && + intel_uc_fw_is_loadable(&i915->media_gt->uc.gsc.fw) && + intel_uc_fw_is_loadable(&i915->media_gt->uc.huc.fw)) return i915->media_gt; /* @@ -207,7 +214,9 @@ int intel_pxp_init(struct drm_i915_private *i915) if (!i915->pxp) return -ENOMEM; + /* init common info used by all feature-mode usages*/ i915->pxp->ctrl_gt = gt; + mutex_init(&i915->pxp->tee_mutex); /* * If full PXP feature is not available but HuC is loaded by GSC on pre-MTL @@ -229,7 +238,10 @@ void intel_pxp_fini(struct drm_i915_private *i915) i915->pxp->arb_is_valid = false; - intel_pxp_tee_component_fini(i915->pxp); + if (HAS_ENGINE(i915->pxp->ctrl_gt, GSC0)) + intel_pxp_gsccs_fini(i915->pxp); + else + intel_pxp_tee_component_fini(i915->pxp); destroy_vcs_context(i915->pxp); diff --git a/drivers/gpu/drm/i915/pxp/intel_pxp_gsccs.c b/drivers/gpu/drm/i915/pxp/intel_pxp_gsccs.c new file mode 100644 index 000000000000..bad55719a7ac --- /dev/null +++ b/drivers/gpu/drm/i915/pxp/intel_pxp_gsccs.c @@ -0,0 +1,63 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright(c) 2023 Intel Corporation. + */ + +#include "gem/i915_gem_internal.h" + +#include "gt/intel_context.h" + +#include "i915_drv.h" +#include "intel_pxp_cmd_interface_43.h" +#include "intel_pxp_gsccs.h" +#include "intel_pxp_types.h" + +static void +gsccs_destroy_execution_resource(struct intel_pxp *pxp) +{ + struct gsccs_session_resources *exec_res = &pxp->gsccs_res; + + if (exec_res->ce) + intel_context_put(exec_res->ce); + + memset(exec_res, 0, sizeof(*exec_res)); +} + +static int +gsccs_allocate_execution_resource(struct intel_pxp *pxp) +{ + struct intel_gt *gt = pxp->ctrl_gt; + struct gsccs_session_resources *exec_res = &pxp->gsccs_res; + struct intel_engine_cs *engine = gt->engine[GSC0]; + struct intel_context *ce; + + /* + * First, ensure the GSC engine is present. + * NOTE: Backend would only be called with the correct gt. + */ + if (!engine) + return -ENODEV; + + /* Finally, create an intel_context to be used during the submission */ + ce = intel_context_create(engine); + if (IS_ERR(ce)) { + drm_err(>->i915->drm, "Failed creating gsccs backend ctx\n"); + return PTR_ERR(ce); + } + + i915_vm_put(ce->vm); + ce->vm = i915_vm_get(pxp->ctrl_gt->vm); + exec_res->ce = ce; + + return 0; +} + +void intel_pxp_gsccs_fini(struct intel_pxp *pxp) +{ + gsccs_destroy_execution_resource(pxp); +} + +int intel_pxp_gsccs_init(struct intel_pxp *pxp) +{ + return gsccs_allocate_execution_resource(pxp); +} diff --git a/drivers/gpu/drm/i915/pxp/intel_pxp_gsccs.h b/drivers/gpu/drm/i915/pxp/intel_pxp_gsccs.h new file mode 100644 index 000000000000..354ea9a8f940 --- /dev/null +++ b/drivers/gpu/drm/i915/pxp/intel_pxp_gsccs.h @@ -0,0 +1,29 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright(c) 2022, Intel Corporation. All rights reserved. + */ + +#ifndef __INTEL_PXP_GSCCS_H__ +#define __INTEL_PXP_GSCCS_H__ + +#include + +struct intel_pxp; + +#ifdef CONFIG_DRM_I915_PXP +void intel_pxp_gsccs_fini(struct intel_pxp *pxp); +int intel_pxp_gsccs_init(struct intel_pxp *pxp); + +#else +static inline void intel_pxp_gsccs_fini(struct intel_pxp *pxp) +{ +} + +static inline int intel_pxp_gsccs_init(struct intel_pxp *pxp) +{ + return 0; +} + +#endif + +#endif /*__INTEL_PXP_GSCCS_H__ */ diff --git a/drivers/gpu/drm/i915/pxp/intel_pxp_tee.c b/drivers/gpu/drm/i915/pxp/intel_pxp_tee.c index a2846b1dbbee..1ce07d7e8769 100644 --- a/drivers/gpu/drm/i915/pxp/intel_pxp_tee.c +++ b/drivers/gpu/drm/i915/pxp/intel_pxp_tee.c @@ -284,8 +284,6 @@ int intel_pxp_tee_component_init(struct intel_pxp *pxp) struct intel_gt *gt = pxp->ctrl_gt; struct drm_i915_private *i915 = gt->i915; - mutex_init(&pxp->tee_mutex); - ret = alloc_streaming_command(pxp); if (ret) return ret; diff --git a/drivers/gpu/drm/i915/pxp/intel_pxp_types.h b/drivers/gpu/drm/i915/pxp/intel_pxp_types.h index c445f7f2f47a..1759106b9a8e 100644 --- a/drivers/gpu/drm/i915/pxp/intel_pxp_types.h +++ b/drivers/gpu/drm/i915/pxp/intel_pxp_types.h @@ -26,6 +26,14 @@ struct intel_pxp { */ struct intel_gt *ctrl_gt; + /** + * @gsccs_res: resources for request submission for platforms that have a GSC engine. + */ + struct gsccs_session_resources { + u64 host_session_handle; /* used by firmware to link commands to sessions */ + struct intel_context *ce; /* context for gsc command submission */ + } gsccs_res; + /** * @pxp_component: i915_pxp_component struct of the bound mei_pxp * module. Only set and cleared inside component bind/unbind functions, From 5adacf19f6c3fa7d6119878246e9a253867e14c9 Mon Sep 17 00:00:00 2001 From: Alan Previn Date: Thu, 11 May 2023 16:17:32 -0700 Subject: [PATCH 084/108] drm/i915/pxp: Add MTL hw-plumbing enabling for KCR operation Add MTL hw-plumbing enabling for KCR operation under PXP which includes: 1. Updating 'pick-gt' to get the media tile for KCR interrupt handling 2. Adding MTL's KCR registers for PXP operation (init, status-checking, etc.). While doing #2, lets create a separate registers header file for PXP to be consistent with other i915 global subsystems. Signed-off-by: Alan Previn Reviewed-by: Daniele Ceraolo Spurio Signed-off-by: Radhakrishna Sripada Link: https://patchwork.freedesktop.org/patch/msgid/20230511231738.1077674-3-alan.previn.teres.alexis@intel.com --- drivers/gpu/drm/i915/gt/intel_gt_irq.c | 3 +- drivers/gpu/drm/i915/pxp/intel_pxp.c | 34 ++++++++++++-------- drivers/gpu/drm/i915/pxp/intel_pxp_regs.h | 27 ++++++++++++++++ drivers/gpu/drm/i915/pxp/intel_pxp_session.c | 12 +++---- drivers/gpu/drm/i915/pxp/intel_pxp_types.h | 6 ++++ 5 files changed, 59 insertions(+), 23 deletions(-) create mode 100644 drivers/gpu/drm/i915/pxp/intel_pxp_regs.h diff --git a/drivers/gpu/drm/i915/gt/intel_gt_irq.c b/drivers/gpu/drm/i915/gt/intel_gt_irq.c index e13cfbe2daf5..acec6566b914 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_irq.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_irq.c @@ -106,7 +106,8 @@ static struct intel_gt *pick_gt(struct intel_gt *gt, u8 class, u8 instance) case OTHER_CLASS: if (instance == OTHER_GSC_HECI_2_INSTANCE) return media_gt; - if (instance == OTHER_GSC_INSTANCE && HAS_ENGINE(media_gt, GSC0)) + if ((instance == OTHER_GSC_INSTANCE || instance == OTHER_KCR_INSTANCE) && + HAS_ENGINE(media_gt, GSC0)) return media_gt; fallthrough; default: diff --git a/drivers/gpu/drm/i915/pxp/intel_pxp.c b/drivers/gpu/drm/i915/pxp/intel_pxp.c index f93aa171aa1e..8949d4be7882 100644 --- a/drivers/gpu/drm/i915/pxp/intel_pxp.c +++ b/drivers/gpu/drm/i915/pxp/intel_pxp.c @@ -14,6 +14,7 @@ #include "intel_pxp.h" #include "intel_pxp_gsccs.h" #include "intel_pxp_irq.h" +#include "intel_pxp_regs.h" #include "intel_pxp_session.h" #include "intel_pxp_tee.h" #include "intel_pxp_types.h" @@ -61,21 +62,22 @@ bool intel_pxp_is_active(const struct intel_pxp *pxp) return IS_ENABLED(CONFIG_DRM_I915_PXP) && pxp && pxp->arb_is_valid; } -/* KCR register definitions */ -#define KCR_INIT _MMIO(0x320f0) -/* Setting KCR Init bit is required after system boot */ -#define KCR_INIT_ALLOW_DISPLAY_ME_WRITES REG_BIT(14) - -static void kcr_pxp_enable(struct intel_gt *gt) +static void kcr_pxp_set_status(const struct intel_pxp *pxp, bool enable) { - intel_uncore_write(gt->uncore, KCR_INIT, - _MASKED_BIT_ENABLE(KCR_INIT_ALLOW_DISPLAY_ME_WRITES)); + u32 val = enable ? _MASKED_BIT_ENABLE(KCR_INIT_ALLOW_DISPLAY_ME_WRITES) : + _MASKED_BIT_DISABLE(KCR_INIT_ALLOW_DISPLAY_ME_WRITES); + + intel_uncore_write(pxp->ctrl_gt->uncore, KCR_INIT(pxp->kcr_base), val); } -static void kcr_pxp_disable(struct intel_gt *gt) +static void kcr_pxp_enable(const struct intel_pxp *pxp) { - intel_uncore_write(gt->uncore, KCR_INIT, - _MASKED_BIT_DISABLE(KCR_INIT_ALLOW_DISPLAY_ME_WRITES)); + kcr_pxp_set_status(pxp, true); +} + +static void kcr_pxp_disable(const struct intel_pxp *pxp) +{ + kcr_pxp_set_status(pxp, false); } static int create_vcs_context(struct intel_pxp *pxp) @@ -127,6 +129,11 @@ static void pxp_init_full(struct intel_pxp *pxp) init_completion(&pxp->termination); complete_all(&pxp->termination); + if (pxp->ctrl_gt->type == GT_MEDIA) + pxp->kcr_base = MTL_KCR_BASE; + else + pxp->kcr_base = GEN12_KCR_BASE; + intel_pxp_session_management_init(pxp); ret = create_vcs_context(pxp); @@ -369,14 +376,13 @@ int intel_pxp_start(struct intel_pxp *pxp) void intel_pxp_init_hw(struct intel_pxp *pxp) { - kcr_pxp_enable(pxp->ctrl_gt); + kcr_pxp_enable(pxp); intel_pxp_irq_enable(pxp); } void intel_pxp_fini_hw(struct intel_pxp *pxp) { - kcr_pxp_disable(pxp->ctrl_gt); - + kcr_pxp_disable(pxp); intel_pxp_irq_disable(pxp); } diff --git a/drivers/gpu/drm/i915/pxp/intel_pxp_regs.h b/drivers/gpu/drm/i915/pxp/intel_pxp_regs.h new file mode 100644 index 000000000000..a9e7e6efa4c7 --- /dev/null +++ b/drivers/gpu/drm/i915/pxp/intel_pxp_regs.h @@ -0,0 +1,27 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright(c) 2023, Intel Corporation. All rights reserved. + */ + +#ifndef __INTEL_PXP_REGS_H__ +#define __INTEL_PXP_REGS_H__ + +#include "i915_reg_defs.h" + +/* KCR subsystem register base address */ +#define GEN12_KCR_BASE 0x32000 +#define MTL_KCR_BASE 0x386000 + +/* KCR enable/disable control */ +#define KCR_INIT(base) _MMIO((base) + 0xf0) + +/* Setting KCR Init bit is required after system boot */ +#define KCR_INIT_ALLOW_DISPLAY_ME_WRITES REG_BIT(14) + +/* KCR hwdrm session in play status 0-31 */ +#define KCR_SIP(base) _MMIO((base) + 0x260) + +/* PXP global terminate register for session termination */ +#define KCR_GLOBAL_TERMINATE(base) _MMIO((base) + 0xf8) + +#endif /* __INTEL_PXP_REGS_H__ */ diff --git a/drivers/gpu/drm/i915/pxp/intel_pxp_session.c b/drivers/gpu/drm/i915/pxp/intel_pxp_session.c index 7de849cb6c47..7899079e17b0 100644 --- a/drivers/gpu/drm/i915/pxp/intel_pxp_session.c +++ b/drivers/gpu/drm/i915/pxp/intel_pxp_session.c @@ -10,14 +10,10 @@ #include "intel_pxp_session.h" #include "intel_pxp_tee.h" #include "intel_pxp_types.h" +#include "intel_pxp_regs.h" #define ARB_SESSION I915_PROTECTED_CONTENT_DEFAULT_SESSION /* shorter define */ -#define GEN12_KCR_SIP _MMIO(0x32260) /* KCR hwdrm session in play 0-31 */ - -/* PXP global terminate register for session termination */ -#define PXP_GLOBAL_TERMINATE _MMIO(0x320f8) - static bool intel_pxp_session_is_in_play(struct intel_pxp *pxp, u32 id) { struct intel_uncore *uncore = pxp->ctrl_gt->uncore; @@ -26,7 +22,7 @@ static bool intel_pxp_session_is_in_play(struct intel_pxp *pxp, u32 id) /* if we're suspended the session is considered off */ with_intel_runtime_pm_if_in_use(uncore->rpm, wakeref) - sip = intel_uncore_read(uncore, GEN12_KCR_SIP); + sip = intel_uncore_read(uncore, KCR_SIP(pxp->kcr_base)); return sip & BIT(id); } @@ -44,7 +40,7 @@ static int pxp_wait_for_session_state(struct intel_pxp *pxp, u32 id, bool in_pla return in_play ? -ENODEV : 0; ret = intel_wait_for_register(uncore, - GEN12_KCR_SIP, + KCR_SIP(pxp->kcr_base), mask, in_play ? mask : 0, 100); @@ -108,7 +104,7 @@ static int pxp_terminate_arb_session_and_global(struct intel_pxp *pxp) return ret; } - intel_uncore_write(gt->uncore, PXP_GLOBAL_TERMINATE, 1); + intel_uncore_write(gt->uncore, KCR_GLOBAL_TERMINATE(pxp->kcr_base), 1); intel_pxp_tee_end_arb_fw_session(pxp, ARB_SESSION); diff --git a/drivers/gpu/drm/i915/pxp/intel_pxp_types.h b/drivers/gpu/drm/i915/pxp/intel_pxp_types.h index 1759106b9a8e..87d0d8da98cd 100644 --- a/drivers/gpu/drm/i915/pxp/intel_pxp_types.h +++ b/drivers/gpu/drm/i915/pxp/intel_pxp_types.h @@ -26,6 +26,12 @@ struct intel_pxp { */ struct intel_gt *ctrl_gt; + /** + * @kcr_base: base mmio offset for the KCR engine which is different on legacy platforms + * vs newer platforms where the KCR is inside the media-tile. + */ + u32 kcr_base; + /** * @gsccs_res: resources for request submission for platforms that have a GSC engine. */ From e5e1e6d28ebcc0fe52567b1301c23f05d4c79df7 Mon Sep 17 00:00:00 2001 From: Alan Previn Date: Thu, 11 May 2023 16:17:33 -0700 Subject: [PATCH 085/108] drm/i915/pxp: Add MTL helpers to submit Heci-Cmd-Packet to GSC Add helper functions into a new file for heci-packet-submission. The helpers will handle generating the MTL GSC-CS Memory-Header and submission of the Heci-Cmd-Packet instructions to the engine. NOTE1: These common functions for heci-packet-submission will be used by different i915 callers: 1- GSC-SW-Proxy: This is pending upstream publication awaiting a few remaining opens 2- MTL-HDCP: An equivalent patch has also been published at: https://patchwork.freedesktop.org/series/111876/. (Patch 1) 3- PXP: This series. NOTE2: A difference in this patch vs what is appearing is in bullet 2 above is that HDCP (and SW-Proxy) will be using priveleged submission (GGTT and common gsc-uc-context) while PXP will be using non-priveleged PPGTT, context and batch buffer. Therefore this patch will only slightly overlap with the MTL-HDCP patches despite have very similar function names (emit_foo vs emit_nonpriv_foo). This is because HECI_CMD_PKT instructions require different flows and hw-specific code when done via PPGTT based submission (not different from other engines). MTL-HDCP contains the same intel_gsc_mtl_header_t structures as this but the helpers there are different. Both add the same new file names. NOTE3: Additional clarity about the heci-cmd-pkt layout and where the common helpers come in: - On MTL, when an i915 subsystem needs to send a command request to the security firmware, it will send that via the GSC- engine-command-streamer. - However those commands, (lets call them "gsc_specific_fw_api" calls), are not understood by the GSC command streamer hw. - The GSC CS only looks at the GSC_HECI_CMD_PKT instruction and passes it along to the GSC firmware. - The GSC FW on the other hand needs additional metadata to know which usage service is being called (PXP, HDCP, proxy, etc) along with session specific info. Thus an extra header called GSC-CS HECI Memory Header, (C) in below diagram is prepended before the FW specific API, (D). - Thus, the structural layout of the request submitted would need to look like the diagram below (for non-priv PXP). - In the diagram, the common helper for HDCP, (GSC-Sw-Proxy) and PXP (i.e. new function intel_gsc_uc_heci_cmd_emit_mtl_header) will populate blob (C) while additional helpers, different for PPGGTT (this patch) vs GGTT (HDCP series) will populate blobs (A) and (B) below. ___________________________________________________________ (A) | MI_BATCH_BUFFER_START (ppgtt, batchbuff-addr, ...) | | | | | _|________________________________________________ | | (B)| GSC_HECI_CMD_PKT (pkt-addr-in, pkt-size-in, | | | | pkt-addr-out, pkt-size-out) |-------- | | MI_BATCH_BUFFER_END | | | | |________________________________________________| | | | | | |_________________________________________________________| | | --------------------------------------------------------- | \|/ ______V___________________________________________ | _________________________________________ | |(C)| | | | | struct intel_gsc_mtl_header { | | | | validity marker | | | | heci_clent_id | | | | ... | | | | } | | | |_______________________________________| | |(D)| | | | | struct gsc_fw_specific_api_foobar { | | | | ... | | | | For an example, see | | | | 'struct pxp43_create_arb_in' at | | | | intel_pxp_cmd_interface_43.h | | | | | | | | } | | | | Struture depends on command type | | | | struct gsc_fw_specific_api_foobar { | | | |_______________________________________| | |________________________________________________| That said, this patch provides basic helpers but leaves the PXP subsystem (i.e. the caller) to handle (D) and everything else such as input/output size verification or handling the responses from security firmware (for example, requiring a retry). Signed-off-by: Alan Previn Reviewed-by: Daniele Ceraolo Spurio Signed-off-by: Radhakrishna Sripada Link: https://patchwork.freedesktop.org/patch/msgid/20230511231738.1077674-4-alan.previn.teres.alexis@intel.com --- .../i915/gt/uc/intel_gsc_uc_heci_cmd_submit.c | 102 ++++++++++++++++++ .../i915/gt/uc/intel_gsc_uc_heci_cmd_submit.h | 23 ++++ 2 files changed, 125 insertions(+) diff --git a/drivers/gpu/drm/i915/gt/uc/intel_gsc_uc_heci_cmd_submit.c b/drivers/gpu/drm/i915/gt/uc/intel_gsc_uc_heci_cmd_submit.c index ea0da06e2f39..579c0f5a1438 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_gsc_uc_heci_cmd_submit.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_gsc_uc_heci_cmd_submit.c @@ -3,6 +3,7 @@ * Copyright © 2023 Intel Corporation */ +#include "gt/intel_context.h" #include "gt/intel_engine_pm.h" #include "gt/intel_gpu_commands.h" #include "gt/intel_gt.h" @@ -107,3 +108,104 @@ void intel_gsc_uc_heci_cmd_emit_mtl_header(struct intel_gsc_mtl_header *header, header->header_version = MTL_GSC_HEADER_VERSION; header->message_size = message_size; } + +static void +emit_gsc_heci_pkt_nonpriv(u32 *cmd, struct intel_gsc_heci_non_priv_pkt *pkt) +{ + *cmd++ = GSC_HECI_CMD_PKT; + *cmd++ = lower_32_bits(pkt->addr_in); + *cmd++ = upper_32_bits(pkt->addr_in); + *cmd++ = pkt->size_in; + *cmd++ = lower_32_bits(pkt->addr_out); + *cmd++ = upper_32_bits(pkt->addr_out); + *cmd++ = pkt->size_out; + *cmd++ = 0; + *cmd++ = MI_BATCH_BUFFER_END; +} + +int +intel_gsc_uc_heci_cmd_submit_nonpriv(struct intel_gsc_uc *gsc, + struct intel_context *ce, + struct intel_gsc_heci_non_priv_pkt *pkt, + u32 *cmd, int timeout_ms) +{ + struct intel_engine_cs *engine; + struct i915_gem_ww_ctx ww; + struct i915_request *rq; + int err, trials = 0; + + i915_gem_ww_ctx_init(&ww, false); +retry: + err = i915_gem_object_lock(pkt->bb_vma->obj, &ww); + if (err) + goto out_ww; + err = i915_gem_object_lock(pkt->heci_pkt_vma->obj, &ww); + if (err) + goto out_ww; + err = intel_context_pin_ww(ce, &ww); + if (err) + goto out_ww; + + rq = i915_request_create(ce); + if (IS_ERR(rq)) { + err = PTR_ERR(rq); + goto out_unpin_ce; + } + + emit_gsc_heci_pkt_nonpriv(cmd, pkt); + + err = i915_vma_move_to_active(pkt->bb_vma, rq, 0); + if (err) + goto out_rq; + err = i915_vma_move_to_active(pkt->heci_pkt_vma, rq, EXEC_OBJECT_WRITE); + if (err) + goto out_rq; + + engine = rq->context->engine; + if (engine->emit_init_breadcrumb) { + err = engine->emit_init_breadcrumb(rq); + if (err) + goto out_rq; + } + + err = engine->emit_bb_start(rq, i915_vma_offset(pkt->bb_vma), PAGE_SIZE, 0); + if (err) + goto out_rq; + + err = ce->engine->emit_flush(rq, 0); + if (err) + drm_err(&gsc_uc_to_gt(gsc)->i915->drm, + "Failed emit-flush for gsc-heci-non-priv-pkterr=%d\n", err); + +out_rq: + i915_request_get(rq); + + if (unlikely(err)) + i915_request_set_error_once(rq, err); + + i915_request_add(rq); + + if (!err) { + if (i915_request_wait(rq, I915_WAIT_INTERRUPTIBLE, + msecs_to_jiffies(timeout_ms)) < 0) + err = -ETIME; + } + + i915_request_put(rq); + +out_unpin_ce: + intel_context_unpin(ce); +out_ww: + if (err == -EDEADLK) { + err = i915_gem_ww_ctx_backoff(&ww); + if (!err) { + if (++trials < 10) + goto retry; + else + err = EAGAIN; + } + } + i915_gem_ww_ctx_fini(&ww); + + return err; +} diff --git a/drivers/gpu/drm/i915/gt/uc/intel_gsc_uc_heci_cmd_submit.h b/drivers/gpu/drm/i915/gt/uc/intel_gsc_uc_heci_cmd_submit.h index 8f199d5f963e..e790608745d4 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_gsc_uc_heci_cmd_submit.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_gsc_uc_heci_cmd_submit.h @@ -8,7 +8,10 @@ #include +struct i915_vma; +struct intel_context; struct intel_gsc_uc; + struct intel_gsc_mtl_header { u32 validity_marker; #define GSC_HECI_VALIDITY_MARKER 0xA578875A @@ -59,4 +62,24 @@ int intel_gsc_uc_heci_cmd_submit_packet(struct intel_gsc_uc *gsc, void intel_gsc_uc_heci_cmd_emit_mtl_header(struct intel_gsc_mtl_header *header, u8 heci_client_id, u32 message_size, u64 host_session_id); + +struct intel_gsc_heci_non_priv_pkt { + u64 addr_in; + u32 size_in; + u64 addr_out; + u32 size_out; + struct i915_vma *heci_pkt_vma; + struct i915_vma *bb_vma; +}; + +void +intel_gsc_uc_heci_cmd_emit_mtl_header(struct intel_gsc_mtl_header *header, + u8 heci_client_id, u32 msg_size, + u64 host_session_id); + +int +intel_gsc_uc_heci_cmd_submit_nonpriv(struct intel_gsc_uc *gsc, + struct intel_context *ce, + struct intel_gsc_heci_non_priv_pkt *pkt, + u32 *cs, int timeout_ms); #endif From dc9ac125d81faf4761574a9f613ebc8eb35717e1 Mon Sep 17 00:00:00 2001 From: Alan Previn Date: Thu, 11 May 2023 16:17:34 -0700 Subject: [PATCH 086/108] drm/i915/pxp: Add GSC-CS backend to send GSC fw messages Add GSC engine based method for sending PXP firmware packets to the GSC firmware for MTL (and future) products. Use the newly added helpers to populate the GSC-CS memory header and send the message packet to the FW by dispatching the GSC_HECI_CMD_PKT instruction on the GSC engine. We use non-priveleged batches for submission to GSC engine which require two buffers for the request: - a buffer for the HECI packet that contains PXP FW commands - a batch-buffer that contains the engine instruction for sending the HECI packet to the GSC firmware. Thus, add the allocation and freeing of these buffers in gsccs init and fini. The GSC-fw may reply to commands with a SUCCESS but with an additional pending-bit set in the reply packet. This bit means the GSC-FW is currently busy and the caller needs to try again with the gsc_message_handle the fw returned. Thus, add a wrapper to continuously retry send_message while replaying the gsc_message_handle. Retries need to follow the arch-spec count and delay until GSC-FW replies with the real SUCCESS or timeout after that spec'd delay. The GSC-fw requires a non-zero host_session_handle provided by the caller to enable gsc_message_handle tracking. Thus, allocate the host_session_handle at init and destroy it at fini (the latter requiring an FYI to the gsc-firmware). Signed-off-by: Alan Previn Reviewed-by: Daniele Ceraolo Spurio Signed-off-by: Radhakrishna Sripada Link: https://patchwork.freedesktop.org/patch/msgid/20230511231738.1077674-5-alan.previn.teres.alexis@intel.com --- .../i915/gt/uc/intel_gsc_uc_heci_cmd_submit.h | 3 +- .../drm/i915/pxp/intel_pxp_cmd_interface_43.h | 3 + drivers/gpu/drm/i915/pxp/intel_pxp_gsccs.c | 240 +++++++++++++++++- drivers/gpu/drm/i915/pxp/intel_pxp_gsccs.h | 4 + drivers/gpu/drm/i915/pxp/intel_pxp_types.h | 6 + 5 files changed, 254 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/uc/intel_gsc_uc_heci_cmd_submit.h b/drivers/gpu/drm/i915/gt/uc/intel_gsc_uc_heci_cmd_submit.h index e790608745d4..ef70e304904a 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_gsc_uc_heci_cmd_submit.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_gsc_uc_heci_cmd_submit.h @@ -51,7 +51,8 @@ struct intel_gsc_mtl_header { * we distinguish the flags using OUTFLAG or INFLAG */ u32 flags; -#define GSC_OUTFLAG_MSG_PENDING 1 +#define GSC_OUTFLAG_MSG_PENDING BIT(0) +#define GSC_INFLAG_MSG_CLEANUP BIT(1) u32 status; } __packed; diff --git a/drivers/gpu/drm/i915/pxp/intel_pxp_cmd_interface_43.h b/drivers/gpu/drm/i915/pxp/intel_pxp_cmd_interface_43.h index ad67e3f49c20..c65ada99e54f 100644 --- a/drivers/gpu/drm/i915/pxp/intel_pxp_cmd_interface_43.h +++ b/drivers/gpu/drm/i915/pxp/intel_pxp_cmd_interface_43.h @@ -12,6 +12,9 @@ /* PXP-Cmd-Op definitions */ #define PXP43_CMDID_START_HUC_AUTH 0x0000003A +/* PXP-Packet sizes for MTL's GSCCS-HECI instruction */ +#define PXP43_MAX_HECI_INOUT_SIZE (SZ_32K) + /* PXP-Input-Packet: HUC-Authentication */ struct pxp43_start_huc_auth_in { struct pxp_cmd_header header; diff --git a/drivers/gpu/drm/i915/pxp/intel_pxp_gsccs.c b/drivers/gpu/drm/i915/pxp/intel_pxp_gsccs.c index bad55719a7ac..16e3b73d0653 100644 --- a/drivers/gpu/drm/i915/pxp/intel_pxp_gsccs.c +++ b/drivers/gpu/drm/i915/pxp/intel_pxp_gsccs.c @@ -6,23 +6,232 @@ #include "gem/i915_gem_internal.h" #include "gt/intel_context.h" +#include "gt/uc/intel_gsc_uc_heci_cmd_submit.h" #include "i915_drv.h" #include "intel_pxp_cmd_interface_43.h" #include "intel_pxp_gsccs.h" #include "intel_pxp_types.h" +static int +gsccs_send_message(struct intel_pxp *pxp, + void *msg_in, size_t msg_in_size, + void *msg_out, size_t msg_out_size_max, + size_t *msg_out_len, + u64 *gsc_msg_handle_retry) +{ + struct intel_gt *gt = pxp->ctrl_gt; + struct drm_i915_private *i915 = gt->i915; + struct gsccs_session_resources *exec_res = &pxp->gsccs_res; + struct intel_gsc_mtl_header *header = exec_res->pkt_vaddr; + struct intel_gsc_heci_non_priv_pkt pkt; + size_t max_msg_size; + u32 reply_size; + int ret; + + if (!exec_res->ce) + return -ENODEV; + + max_msg_size = PXP43_MAX_HECI_INOUT_SIZE - sizeof(*header); + + if (msg_in_size > max_msg_size || msg_out_size_max > max_msg_size) + return -ENOSPC; + + if (!exec_res->pkt_vma || !exec_res->bb_vma) + return -ENOENT; + + GEM_BUG_ON(exec_res->pkt_vma->size < (2 * PXP43_MAX_HECI_INOUT_SIZE)); + + mutex_lock(&pxp->tee_mutex); + + memset(header, 0, sizeof(*header)); + intel_gsc_uc_heci_cmd_emit_mtl_header(header, HECI_MEADDRESS_PXP, + msg_in_size + sizeof(*header), + exec_res->host_session_handle); + + /* check if this is a host-session-handle cleanup call (empty packet) */ + if (!msg_in && !msg_out) + header->flags |= GSC_INFLAG_MSG_CLEANUP; + + /* copy caller provided gsc message handle if this is polling for a prior msg completion */ + header->gsc_message_handle = *gsc_msg_handle_retry; + + /* NOTE: zero size packets are used for session-cleanups */ + if (msg_in && msg_in_size) + memcpy(exec_res->pkt_vaddr + sizeof(*header), msg_in, msg_in_size); + + pkt.addr_in = i915_vma_offset(exec_res->pkt_vma); + pkt.size_in = header->message_size; + pkt.addr_out = pkt.addr_in + PXP43_MAX_HECI_INOUT_SIZE; + pkt.size_out = msg_out_size_max + sizeof(*header); + pkt.heci_pkt_vma = exec_res->pkt_vma; + pkt.bb_vma = exec_res->bb_vma; + + /* + * Before submitting, let's clear-out the validity marker on the reply offset. + * We use offset PXP43_MAX_HECI_INOUT_SIZE for reply location so point header there. + */ + header = exec_res->pkt_vaddr + PXP43_MAX_HECI_INOUT_SIZE; + header->validity_marker = 0; + + ret = intel_gsc_uc_heci_cmd_submit_nonpriv(>->uc.gsc, + exec_res->ce, &pkt, exec_res->bb_vaddr, + GSC_REPLY_LATENCY_MS); + if (ret) { + drm_err(&i915->drm, "failed to send gsc PXP msg (%d)\n", ret); + goto unlock; + } + + /* Response validity marker, status and busyness */ + if (header->validity_marker != GSC_HECI_VALIDITY_MARKER) { + drm_err(&i915->drm, "gsc PXP reply with invalid validity marker\n"); + ret = -EINVAL; + goto unlock; + } + if (header->status != 0) { + drm_dbg(&i915->drm, "gsc PXP reply status has error = 0x%08x\n", + header->status); + ret = -EINVAL; + goto unlock; + } + if (header->flags & GSC_OUTFLAG_MSG_PENDING) { + drm_dbg(&i915->drm, "gsc PXP reply is busy\n"); + /* + * When the GSC firmware replies with pending bit, it means that the requested + * operation has begun but the completion is pending and the caller needs + * to re-request with the gsc_message_handle that was returned by the firmware. + * until the pending bit is turned off. + */ + *gsc_msg_handle_retry = header->gsc_message_handle; + ret = -EAGAIN; + goto unlock; + } + + reply_size = header->message_size - sizeof(*header); + if (reply_size > msg_out_size_max) { + drm_warn(&i915->drm, "caller with insufficient PXP reply size %u (%ld)\n", + reply_size, msg_out_size_max); + reply_size = msg_out_size_max; + } + + if (msg_out) + memcpy(msg_out, exec_res->pkt_vaddr + PXP43_MAX_HECI_INOUT_SIZE + sizeof(*header), + reply_size); + if (msg_out_len) + *msg_out_len = reply_size; + +unlock: + mutex_unlock(&pxp->tee_mutex); + return ret; +} + +static int +gsccs_send_message_retry_complete(struct intel_pxp *pxp, + void *msg_in, size_t msg_in_size, + void *msg_out, size_t msg_out_size_max, + size_t *msg_out_len) +{ + u64 gsc_session_retry = 0; + int ret, tries = 0; + + /* + * Keep sending request if GSC firmware was busy. Based on fw specs + + * sw overhead (and testing) we expect a worst case pending-bit delay of + * GSC_PENDING_RETRY_MAXCOUNT x GSC_PENDING_RETRY_PAUSE_MS millisecs. + */ + do { + ret = gsccs_send_message(pxp, msg_in, msg_in_size, msg_out, msg_out_size_max, + msg_out_len, &gsc_session_retry); + /* Only try again if gsc says so */ + if (ret != -EAGAIN) + break; + + msleep(GSC_PENDING_RETRY_PAUSE_MS); + } while (++tries < GSC_PENDING_RETRY_MAXCOUNT); + + return ret; +} + +static void +gsccs_cleanup_fw_host_session_handle(struct intel_pxp *pxp) +{ + struct drm_i915_private *i915 = pxp->ctrl_gt->i915; + int ret; + + ret = gsccs_send_message_retry_complete(pxp, NULL, 0, NULL, 0, NULL); + if (ret) + drm_dbg(&i915->drm, "Failed to send gsccs msg host-session-cleanup: ret=[%d]\n", + ret); +} + static void gsccs_destroy_execution_resource(struct intel_pxp *pxp) { struct gsccs_session_resources *exec_res = &pxp->gsccs_res; + if (exec_res->host_session_handle) + gsccs_cleanup_fw_host_session_handle(pxp); if (exec_res->ce) intel_context_put(exec_res->ce); + if (exec_res->bb_vma) + i915_vma_unpin_and_release(&exec_res->bb_vma, I915_VMA_RELEASE_MAP); + if (exec_res->pkt_vma) + i915_vma_unpin_and_release(&exec_res->pkt_vma, I915_VMA_RELEASE_MAP); memset(exec_res, 0, sizeof(*exec_res)); } +static int +gsccs_create_buffer(struct intel_gt *gt, + const char *bufname, size_t size, + struct i915_vma **vma, void **map) +{ + struct drm_i915_private *i915 = gt->i915; + struct drm_i915_gem_object *obj; + int err = 0; + + obj = i915_gem_object_create_internal(i915, size); + if (IS_ERR(obj)) { + drm_err(&i915->drm, "Failed to allocate gsccs backend %s.\n", bufname); + err = PTR_ERR(obj); + goto out_none; + } + + *vma = i915_vma_instance(obj, gt->vm, NULL); + if (IS_ERR(*vma)) { + drm_err(&i915->drm, "Failed to vma-instance gsccs backend %s.\n", bufname); + err = PTR_ERR(*vma); + goto out_put; + } + + /* return a virtual pointer */ + *map = i915_gem_object_pin_map_unlocked(obj, i915_coherent_map_type(i915, obj, true)); + if (IS_ERR(*map)) { + drm_err(&i915->drm, "Failed to map gsccs backend %s.\n", bufname); + err = PTR_ERR(*map); + goto out_put; + } + + /* all PXP sessions commands are treated as non-privileged */ + err = i915_vma_pin(*vma, 0, 0, PIN_USER); + if (err) { + drm_err(&i915->drm, "Failed to vma-pin gsccs backend %s.\n", bufname); + goto out_unmap; + } + + return 0; + +out_unmap: + i915_gem_object_unpin_map(obj); +out_put: + i915_gem_object_put(obj); +out_none: + *vma = NULL; + *map = NULL; + + return err; +} + static int gsccs_allocate_execution_resource(struct intel_pxp *pxp) { @@ -30,6 +239,7 @@ gsccs_allocate_execution_resource(struct intel_pxp *pxp) struct gsccs_session_resources *exec_res = &pxp->gsccs_res; struct intel_engine_cs *engine = gt->engine[GSC0]; struct intel_context *ce; + int err = 0; /* * First, ensure the GSC engine is present. @@ -38,18 +248,46 @@ gsccs_allocate_execution_resource(struct intel_pxp *pxp) if (!engine) return -ENODEV; + /* + * Now, allocate, pin and map two objects, one for the heci message packet + * and another for the batch buffer we submit into GSC engine (that includes the packet). + * NOTE: GSC-CS backend is currently only supported on MTL, so we allocate shmem. + */ + err = gsccs_create_buffer(pxp->ctrl_gt, "Heci Packet", + 2 * PXP43_MAX_HECI_INOUT_SIZE, + &exec_res->pkt_vma, &exec_res->pkt_vaddr); + if (err) + return err; + + err = gsccs_create_buffer(pxp->ctrl_gt, "Batch Buffer", PAGE_SIZE, + &exec_res->bb_vma, &exec_res->bb_vaddr); + if (err) + goto free_pkt; + /* Finally, create an intel_context to be used during the submission */ ce = intel_context_create(engine); if (IS_ERR(ce)) { drm_err(>->i915->drm, "Failed creating gsccs backend ctx\n"); - return PTR_ERR(ce); + err = PTR_ERR(ce); + goto free_batch; } i915_vm_put(ce->vm); ce->vm = i915_vm_get(pxp->ctrl_gt->vm); exec_res->ce = ce; + /* initialize host-session-handle (for all i915-to-gsc-firmware PXP cmds) */ + get_random_bytes(&exec_res->host_session_handle, sizeof(exec_res->host_session_handle)); + return 0; + +free_batch: + i915_vma_unpin_and_release(&exec_res->bb_vma, I915_VMA_RELEASE_MAP); +free_pkt: + i915_vma_unpin_and_release(&exec_res->pkt_vma, I915_VMA_RELEASE_MAP); + memset(exec_res, 0, sizeof(*exec_res)); + + return err; } void intel_pxp_gsccs_fini(struct intel_pxp *pxp) diff --git a/drivers/gpu/drm/i915/pxp/intel_pxp_gsccs.h b/drivers/gpu/drm/i915/pxp/intel_pxp_gsccs.h index 354ea9a8f940..bd1c028bc80f 100644 --- a/drivers/gpu/drm/i915/pxp/intel_pxp_gsccs.h +++ b/drivers/gpu/drm/i915/pxp/intel_pxp_gsccs.h @@ -10,6 +10,10 @@ struct intel_pxp; +#define GSC_REPLY_LATENCY_MS 200 +#define GSC_PENDING_RETRY_MAXCOUNT 40 +#define GSC_PENDING_RETRY_PAUSE_MS 50 + #ifdef CONFIG_DRM_I915_PXP void intel_pxp_gsccs_fini(struct intel_pxp *pxp); int intel_pxp_gsccs_init(struct intel_pxp *pxp); diff --git a/drivers/gpu/drm/i915/pxp/intel_pxp_types.h b/drivers/gpu/drm/i915/pxp/intel_pxp_types.h index 87d0d8da98cd..1a8765866b8b 100644 --- a/drivers/gpu/drm/i915/pxp/intel_pxp_types.h +++ b/drivers/gpu/drm/i915/pxp/intel_pxp_types.h @@ -38,6 +38,12 @@ struct intel_pxp { struct gsccs_session_resources { u64 host_session_handle; /* used by firmware to link commands to sessions */ struct intel_context *ce; /* context for gsc command submission */ + + struct i915_vma *pkt_vma; /* GSC FW cmd packet vma */ + void *pkt_vaddr; /* GSC FW cmd packet virt pointer */ + + struct i915_vma *bb_vma; /* HECI_PKT batch buffer vma */ + void *bb_vaddr; /* HECI_PKT batch buffer virt pointer */ } gsccs_res; /** From 99afb7cc8c44578615200ea4806b183e1e35a81d Mon Sep 17 00:00:00 2001 From: Alan Previn Date: Thu, 11 May 2023 16:17:35 -0700 Subject: [PATCH 087/108] drm/i915/pxp: Add ARB session creation and cleanup Add MTL's function for ARB session creation using PXP firmware version 4.3 ABI structure format. While relooking at the ARB session creation flow in intel_pxp_start, let's address missing UAPI documentation. Without actually changing backward compatible behavior, update i915's drm-uapi comments that describe the possible error values when creating a context with I915_CONTEXT_PARAM_PROTECTED_CONTENT: Since the first merge of PXP support on ADL, i915 returns -ENXIO if a dependency such as firmware or component driver was yet to be loaded or returns -EIO if the creation attempt failed when requested by the PXP firmware (specific firmware error responses are reported in dmesg). Add MTL's function for ARB session invalidation but this reuses PXP firmware version 4.2 ABI structure format. For both cases, in the back-end gsccs functions for sending messages to the firmware inspect the GSC-CS-Mem-Header's pending-bit which means the GSC firmware is busy and we should retry. Given the last hw requirement, lets also update functions in front-end layer that wait for session creation or teardown completion to use new worst case timeout periods. Signed-off-by: Alan Previn Reviewed-by: Daniele Ceraolo Spurio Signed-off-by: Radhakrishna Sripada Link: https://patchwork.freedesktop.org/patch/msgid/20230511231738.1077674-6-alan.previn.teres.alexis@intel.com --- drivers/gpu/drm/i915/pxp/intel_pxp.c | 27 +++- .../drm/i915/pxp/intel_pxp_cmd_interface_43.h | 21 +++ drivers/gpu/drm/i915/pxp/intel_pxp_gsccs.c | 130 ++++++++++++++++++ drivers/gpu/drm/i915/pxp/intel_pxp_gsccs.h | 12 +- drivers/gpu/drm/i915/pxp/intel_pxp_session.c | 11 +- include/uapi/drm/i915_drm.h | 15 ++ 6 files changed, 209 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/i915/pxp/intel_pxp.c b/drivers/gpu/drm/i915/pxp/intel_pxp.c index 8949d4be7882..b600d68de2a4 100644 --- a/drivers/gpu/drm/i915/pxp/intel_pxp.c +++ b/drivers/gpu/drm/i915/pxp/intel_pxp.c @@ -291,6 +291,8 @@ static bool pxp_component_bound(struct intel_pxp *pxp) static int __pxp_global_teardown_final(struct intel_pxp *pxp) { + int timeout; + if (!pxp->arb_is_valid) return 0; /* @@ -300,7 +302,12 @@ static int __pxp_global_teardown_final(struct intel_pxp *pxp) intel_pxp_mark_termination_in_progress(pxp); intel_pxp_terminate(pxp, false); - if (!wait_for_completion_timeout(&pxp->termination, msecs_to_jiffies(250))) + if (HAS_ENGINE(pxp->ctrl_gt, GSC0)) + timeout = GSCFW_MAX_ROUND_TRIP_LATENCY_MS; + else + timeout = 250; + + if (!wait_for_completion_timeout(&pxp->termination, msecs_to_jiffies(timeout))) return -ETIMEDOUT; return 0; @@ -308,6 +315,8 @@ static int __pxp_global_teardown_final(struct intel_pxp *pxp) static int __pxp_global_teardown_restart(struct intel_pxp *pxp) { + int timeout; + if (pxp->arb_is_valid) return 0; /* @@ -316,7 +325,12 @@ static int __pxp_global_teardown_restart(struct intel_pxp *pxp) */ pxp_queue_termination(pxp); - if (!wait_for_completion_timeout(&pxp->termination, msecs_to_jiffies(250))) + if (HAS_ENGINE(pxp->ctrl_gt, GSC0)) + timeout = GSCFW_MAX_ROUND_TRIP_LATENCY_MS; + else + timeout = 250; + + if (!wait_for_completion_timeout(&pxp->termination, msecs_to_jiffies(timeout))) return -ETIMEDOUT; return 0; @@ -354,8 +368,13 @@ int intel_pxp_start(struct intel_pxp *pxp) if (!intel_pxp_is_enabled(pxp)) return -ENODEV; - if (wait_for(pxp_component_bound(pxp), 250)) - return -ENXIO; + if (HAS_ENGINE(pxp->ctrl_gt, GSC0)) { + if (wait_for(intel_pxp_gsccs_is_ready_for_sessions(pxp), 250)) + return -ENXIO; + } else { + if (wait_for(pxp_component_bound(pxp), 250)) + return -ENXIO; + } mutex_lock(&pxp->arb_mutex); diff --git a/drivers/gpu/drm/i915/pxp/intel_pxp_cmd_interface_43.h b/drivers/gpu/drm/i915/pxp/intel_pxp_cmd_interface_43.h index c65ada99e54f..09777719cd84 100644 --- a/drivers/gpu/drm/i915/pxp/intel_pxp_cmd_interface_43.h +++ b/drivers/gpu/drm/i915/pxp/intel_pxp_cmd_interface_43.h @@ -11,6 +11,7 @@ /* PXP-Cmd-Op definitions */ #define PXP43_CMDID_START_HUC_AUTH 0x0000003A +#define PXP43_CMDID_INIT_SESSION 0x00000036 /* PXP-Packet sizes for MTL's GSCCS-HECI instruction */ #define PXP43_MAX_HECI_INOUT_SIZE (SZ_32K) @@ -26,4 +27,24 @@ struct pxp43_start_huc_auth_out { struct pxp_cmd_header header; } __packed; +/* PXP-Input-Packet: Init PXP session */ +struct pxp43_create_arb_in { + struct pxp_cmd_header header; + /* header.stream_id fields for vesion 4.3 of Init PXP session: */ + #define PXP43_INIT_SESSION_VALID BIT(0) + #define PXP43_INIT_SESSION_APPTYPE BIT(1) + #define PXP43_INIT_SESSION_APPID GENMASK(17, 2) + u32 protection_mode; + #define PXP43_INIT_SESSION_PROTECTION_ARB 0x2 + u32 sub_session_id; + u32 init_flags; + u32 rsvd[12]; +} __packed; + +/* PXP-Input-Packet: Init PXP session */ +struct pxp43_create_arb_out { + struct pxp_cmd_header header; + u32 rsvd[8]; +} __packed; + #endif /* __INTEL_PXP_FW_INTERFACE_43_H__ */ diff --git a/drivers/gpu/drm/i915/pxp/intel_pxp_gsccs.c b/drivers/gpu/drm/i915/pxp/intel_pxp_gsccs.c index 16e3b73d0653..4bc276daca16 100644 --- a/drivers/gpu/drm/i915/pxp/intel_pxp_gsccs.c +++ b/drivers/gpu/drm/i915/pxp/intel_pxp_gsccs.c @@ -6,13 +6,46 @@ #include "gem/i915_gem_internal.h" #include "gt/intel_context.h" +#include "gt/uc/intel_gsc_fw.h" #include "gt/uc/intel_gsc_uc_heci_cmd_submit.h" #include "i915_drv.h" +#include "intel_pxp_cmd_interface_42.h" #include "intel_pxp_cmd_interface_43.h" #include "intel_pxp_gsccs.h" #include "intel_pxp_types.h" +static bool +is_fw_err_platform_config(u32 type) +{ + switch (type) { + case PXP_STATUS_ERROR_API_VERSION: + case PXP_STATUS_PLATFCONFIG_KF1_NOVERIF: + case PXP_STATUS_PLATFCONFIG_KF1_BAD: + return true; + default: + break; + } + return false; +} + +static const char * +fw_err_to_string(u32 type) +{ + switch (type) { + case PXP_STATUS_ERROR_API_VERSION: + return "ERR_API_VERSION"; + case PXP_STATUS_NOT_READY: + return "ERR_NOT_READY"; + case PXP_STATUS_PLATFCONFIG_KF1_NOVERIF: + case PXP_STATUS_PLATFCONFIG_KF1_BAD: + return "ERR_PLATFORM_CONFIG"; + default: + break; + } + return NULL; +} + static int gsccs_send_message(struct intel_pxp *pxp, void *msg_in, size_t msg_in_size, @@ -152,6 +185,103 @@ gsccs_send_message_retry_complete(struct intel_pxp *pxp, return ret; } +bool intel_pxp_gsccs_is_ready_for_sessions(struct intel_pxp *pxp) +{ + /* + * GSC-fw loading, HuC-fw loading, HuC-fw authentication and + * GSC-proxy init flow (requiring an mei component driver) + * must all occur first before we can start requesting for PXP + * sessions. Checking for completion on HuC authentication and + * gsc-proxy init flow (the last set of dependencies that + * are out of order) will suffice. + */ + if (intel_huc_is_authenticated(&pxp->ctrl_gt->uc.huc) && + intel_gsc_uc_fw_proxy_init_done(&pxp->ctrl_gt->uc.gsc)) + return true; + + return false; +} + +int intel_pxp_gsccs_create_session(struct intel_pxp *pxp, + int arb_session_id) +{ + struct drm_i915_private *i915 = pxp->ctrl_gt->i915; + struct pxp43_create_arb_in msg_in = {0}; + struct pxp43_create_arb_out msg_out = {0}; + int ret; + + msg_in.header.api_version = PXP_APIVER(4, 3); + msg_in.header.command_id = PXP43_CMDID_INIT_SESSION; + msg_in.header.stream_id = (FIELD_PREP(PXP43_INIT_SESSION_APPID, arb_session_id) | + FIELD_PREP(PXP43_INIT_SESSION_VALID, 1) | + FIELD_PREP(PXP43_INIT_SESSION_APPTYPE, 0)); + msg_in.header.buffer_len = sizeof(msg_in) - sizeof(msg_in.header); + msg_in.protection_mode = PXP43_INIT_SESSION_PROTECTION_ARB; + + ret = gsccs_send_message_retry_complete(pxp, + &msg_in, sizeof(msg_in), + &msg_out, sizeof(msg_out), NULL); + if (ret) { + drm_err(&i915->drm, "Failed to init session %d, ret=[%d]\n", arb_session_id, ret); + } else if (msg_out.header.status != 0) { + if (is_fw_err_platform_config(msg_out.header.status)) { + drm_info_once(&i915->drm, + "PXP init-session-%d failed due to BIOS/SOC:0x%08x:%s\n", + arb_session_id, msg_out.header.status, + fw_err_to_string(msg_out.header.status)); + } else { + drm_dbg(&i915->drm, "PXP init-session-%d failed 0x%08x:%st:\n", + arb_session_id, msg_out.header.status, + fw_err_to_string(msg_out.header.status)); + drm_dbg(&i915->drm, " cmd-detail: ID=[0x%08x],API-Ver-[0x%08x]\n", + msg_in.header.command_id, msg_in.header.api_version); + } + } + + return ret; +} + +void intel_pxp_gsccs_end_arb_fw_session(struct intel_pxp *pxp, u32 session_id) +{ + struct drm_i915_private *i915 = pxp->ctrl_gt->i915; + struct pxp42_inv_stream_key_in msg_in = {0}; + struct pxp42_inv_stream_key_out msg_out = {0}; + int ret = 0; + + /* + * Stream key invalidation reuses the same version 4.2 input/output + * command format but firmware requires 4.3 API interaction + */ + msg_in.header.api_version = PXP_APIVER(4, 3); + msg_in.header.command_id = PXP42_CMDID_INVALIDATE_STREAM_KEY; + msg_in.header.buffer_len = sizeof(msg_in) - sizeof(msg_in.header); + + msg_in.header.stream_id = FIELD_PREP(PXP_CMDHDR_EXTDATA_SESSION_VALID, 1); + msg_in.header.stream_id |= FIELD_PREP(PXP_CMDHDR_EXTDATA_APP_TYPE, 0); + msg_in.header.stream_id |= FIELD_PREP(PXP_CMDHDR_EXTDATA_SESSION_ID, session_id); + + ret = gsccs_send_message_retry_complete(pxp, + &msg_in, sizeof(msg_in), + &msg_out, sizeof(msg_out), NULL); + if (ret) { + drm_err(&i915->drm, "Failed to inv-stream-key-%u, ret=[%d]\n", + session_id, ret); + } else if (msg_out.header.status != 0) { + if (is_fw_err_platform_config(msg_out.header.status)) { + drm_info_once(&i915->drm, + "PXP inv-stream-key-%u failed due to BIOS/SOC :0x%08x:%s\n", + session_id, msg_out.header.status, + fw_err_to_string(msg_out.header.status)); + } else { + drm_dbg(&i915->drm, "PXP inv-stream-key-%u failed 0x%08x:%s:\n", + session_id, msg_out.header.status, + fw_err_to_string(msg_out.header.status)); + drm_dbg(&i915->drm, " cmd-detail: ID=[0x%08x],API-Ver-[0x%08x]\n", + msg_in.header.command_id, msg_in.header.api_version); + } + } +} + static void gsccs_cleanup_fw_host_session_handle(struct intel_pxp *pxp) { diff --git a/drivers/gpu/drm/i915/pxp/intel_pxp_gsccs.h b/drivers/gpu/drm/i915/pxp/intel_pxp_gsccs.h index bd1c028bc80f..298ad38e6c7d 100644 --- a/drivers/gpu/drm/i915/pxp/intel_pxp_gsccs.h +++ b/drivers/gpu/drm/i915/pxp/intel_pxp_gsccs.h @@ -10,14 +10,22 @@ struct intel_pxp; -#define GSC_REPLY_LATENCY_MS 200 +#define GSC_REPLY_LATENCY_MS 210 +/* + * Max FW response time is 200ms, to which we add 10ms to account for overhead + * such as request preparation, GuC submission to hw and pipeline completion times. + */ #define GSC_PENDING_RETRY_MAXCOUNT 40 #define GSC_PENDING_RETRY_PAUSE_MS 50 +#define GSCFW_MAX_ROUND_TRIP_LATENCY_MS (GSC_PENDING_RETRY_MAXCOUNT * GSC_PENDING_RETRY_PAUSE_MS) #ifdef CONFIG_DRM_I915_PXP void intel_pxp_gsccs_fini(struct intel_pxp *pxp); int intel_pxp_gsccs_init(struct intel_pxp *pxp); +int intel_pxp_gsccs_create_session(struct intel_pxp *pxp, int arb_session_id); +void intel_pxp_gsccs_end_arb_fw_session(struct intel_pxp *pxp, u32 arb_session_id); + #else static inline void intel_pxp_gsccs_fini(struct intel_pxp *pxp) { @@ -30,4 +38,6 @@ static inline int intel_pxp_gsccs_init(struct intel_pxp *pxp) #endif +bool intel_pxp_gsccs_is_ready_for_sessions(struct intel_pxp *pxp); + #endif /*__INTEL_PXP_GSCCS_H__ */ diff --git a/drivers/gpu/drm/i915/pxp/intel_pxp_session.c b/drivers/gpu/drm/i915/pxp/intel_pxp_session.c index 7899079e17b0..e4d8242302c5 100644 --- a/drivers/gpu/drm/i915/pxp/intel_pxp_session.c +++ b/drivers/gpu/drm/i915/pxp/intel_pxp_session.c @@ -7,6 +7,7 @@ #include "intel_pxp.h" #include "intel_pxp_cmd.h" +#include "intel_pxp_gsccs.h" #include "intel_pxp_session.h" #include "intel_pxp_tee.h" #include "intel_pxp_types.h" @@ -62,7 +63,10 @@ static int pxp_create_arb_session(struct intel_pxp *pxp) return -EEXIST; } - ret = intel_pxp_tee_cmd_create_arb_session(pxp, ARB_SESSION); + if (HAS_ENGINE(pxp->ctrl_gt, GSC0)) + ret = intel_pxp_gsccs_create_session(pxp, ARB_SESSION); + else + ret = intel_pxp_tee_cmd_create_arb_session(pxp, ARB_SESSION); if (ret) { drm_err(>->i915->drm, "tee cmd for arb session creation failed\n"); return ret; @@ -106,7 +110,10 @@ static int pxp_terminate_arb_session_and_global(struct intel_pxp *pxp) intel_uncore_write(gt->uncore, KCR_GLOBAL_TERMINATE(pxp->kcr_base), 1); - intel_pxp_tee_end_arb_fw_session(pxp, ARB_SESSION); + if (HAS_ENGINE(gt, GSC0)) + intel_pxp_gsccs_end_arb_fw_session(pxp, ARB_SESSION); + else + intel_pxp_tee_end_arb_fw_session(pxp, ARB_SESSION); return ret; } diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h index dba7c5a5b25e..0aa3190e7654 100644 --- a/include/uapi/drm/i915_drm.h +++ b/include/uapi/drm/i915_drm.h @@ -2096,6 +2096,21 @@ struct drm_i915_gem_context_param { * * -ENODEV: feature not available * -EPERM: trying to mark a recoverable or not bannable context as protected + * -ENXIO: A dependency such as a component driver or firmware is not yet + * loaded so user space may need to attempt again. Depending on the + * device, this error may be reported if protected context creation is + * attempted very early after kernel start because the internal timeout + * waiting for such dependencies is not guaranteed to be larger than + * required (numbers differ depending on system and kernel config): + * - ADL/RPL: dependencies may take up to 3 seconds from kernel start + * while context creation internal timeout is 250 milisecs + * - MTL: dependencies may take up to 8 seconds from kernel start + * while context creation internal timeout is 250 milisecs + * NOTE: such dependencies happen once, so a subsequent call to create a + * protected context after a prior successful call will not experience + * such timeouts and will not return -ENXIO (unless the driver is reloaded, + * or, depending on the device, resumes from a suspended state). + * -EIO: The firmware did not succeed in creating the protected context. */ #define I915_CONTEXT_PARAM_PROTECTED_CONTENT 0xd /* Must be kept compact -- no holes and well documented */ From d1da138f245d4fb46b21d2ddb19504a2831d813f Mon Sep 17 00:00:00 2001 From: Alan Previn Date: Thu, 11 May 2023 16:17:36 -0700 Subject: [PATCH 088/108] drm/i915/uapi/pxp: Add a GET_PARAM for PXP Because of the additional firmware, component-driver and initialization depedencies required on MTL platform before a PXP context can be created, UMD calling for PXP creation as a way to get-caps can take a long time. An actual real world customer stack has seen this happen in the 4-to-8 second range after the kernel starts (which sees MESA's init appear in the middle of this range as the compositor comes up). To avoid unncessary delays experienced by the UMD for get-caps purposes, add a GET_PARAM for I915_PARAM_PXP_SUPPORT. However, some failures can still occur after all the depedencies are met (such as firmware init flow failure, bios configurations or SOC fusing not allowing PXP enablement). Those scenarios will only be known to user space when it attempts creating a PXP context and is documented in the GEM UAPI headers. While making this change, create a helper that is common to both GET_PARAM caller and intel_pxp_start since the latter does similar checks. Signed-off-by: Alan Previn Reviewed-by: Daniele Ceraolo Spurio Acked-by: Jordan Justen Signed-off-by: Radhakrishna Sripada Link: https://patchwork.freedesktop.org/patch/msgid/20230511231738.1077674-7-alan.previn.teres.alexis@intel.com --- drivers/gpu/drm/i915/i915_getparam.c | 7 ++++++ drivers/gpu/drm/i915/pxp/intel_pxp.c | 35 ++++++++++++++++++++-------- drivers/gpu/drm/i915/pxp/intel_pxp.h | 1 + include/uapi/drm/i915_drm.h | 19 +++++++++++++++ 4 files changed, 52 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_getparam.c b/drivers/gpu/drm/i915/i915_getparam.c index 2238e096c957..6f11d7eaa91a 100644 --- a/drivers/gpu/drm/i915/i915_getparam.c +++ b/drivers/gpu/drm/i915/i915_getparam.c @@ -5,6 +5,8 @@ #include "gem/i915_gem_mman.h" #include "gt/intel_engine_user.h" +#include "pxp/intel_pxp.h" + #include "i915_cmd_parser.h" #include "i915_drv.h" #include "i915_getparam.h" @@ -102,6 +104,11 @@ int i915_getparam_ioctl(struct drm_device *dev, void *data, if (value < 0) return value; break; + case I915_PARAM_PXP_STATUS: + value = intel_pxp_get_readiness_status(i915->pxp); + if (value < 0) + return value; + break; case I915_PARAM_MMAP_GTT_VERSION: /* Though we've started our numbering from 1, and so class all * earlier versions as 0, in effect their value is undefined as diff --git a/drivers/gpu/drm/i915/pxp/intel_pxp.c b/drivers/gpu/drm/i915/pxp/intel_pxp.c index b600d68de2a4..f143eadbc253 100644 --- a/drivers/gpu/drm/i915/pxp/intel_pxp.c +++ b/drivers/gpu/drm/i915/pxp/intel_pxp.c @@ -357,6 +357,26 @@ void intel_pxp_end(struct intel_pxp *pxp) intel_runtime_pm_put(&i915->runtime_pm, wakeref); } +/* + * this helper is used by both intel_pxp_start and by + * the GET_PARAM IOCTL that user space calls. Thus, the + * return values here should match the UAPI spec. + */ +int intel_pxp_get_readiness_status(struct intel_pxp *pxp) +{ + if (!intel_pxp_is_enabled(pxp)) + return -ENODEV; + + if (HAS_ENGINE(pxp->ctrl_gt, GSC0)) { + if (wait_for(intel_pxp_gsccs_is_ready_for_sessions(pxp), 250)) + return 2; + } else { + if (wait_for(pxp_component_bound(pxp), 250)) + return 2; + } + return 1; +} + /* * the arb session is restarted from the irq work when we receive the * termination completion interrupt @@ -365,16 +385,11 @@ int intel_pxp_start(struct intel_pxp *pxp) { int ret = 0; - if (!intel_pxp_is_enabled(pxp)) - return -ENODEV; - - if (HAS_ENGINE(pxp->ctrl_gt, GSC0)) { - if (wait_for(intel_pxp_gsccs_is_ready_for_sessions(pxp), 250)) - return -ENXIO; - } else { - if (wait_for(pxp_component_bound(pxp), 250)) - return -ENXIO; - } + ret = intel_pxp_get_readiness_status(pxp); + if (ret < 0) + return ret; + else if (ret > 1) + return -EIO; /* per UAPI spec, user may retry later */ mutex_lock(&pxp->arb_mutex); diff --git a/drivers/gpu/drm/i915/pxp/intel_pxp.h b/drivers/gpu/drm/i915/pxp/intel_pxp.h index 3ded0890cd27..17e6dbc86b38 100644 --- a/drivers/gpu/drm/i915/pxp/intel_pxp.h +++ b/drivers/gpu/drm/i915/pxp/intel_pxp.h @@ -26,6 +26,7 @@ void intel_pxp_fini_hw(struct intel_pxp *pxp); void intel_pxp_mark_termination_in_progress(struct intel_pxp *pxp); void intel_pxp_tee_end_arb_fw_session(struct intel_pxp *pxp, u32 arb_session_id); +int intel_pxp_get_readiness_status(struct intel_pxp *pxp); int intel_pxp_start(struct intel_pxp *pxp); void intel_pxp_end(struct intel_pxp *pxp); diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h index 0aa3190e7654..ba40855dbc93 100644 --- a/include/uapi/drm/i915_drm.h +++ b/include/uapi/drm/i915_drm.h @@ -771,6 +771,25 @@ typedef struct drm_i915_irq_wait { */ #define I915_PARAM_OA_TIMESTAMP_FREQUENCY 57 +/* + * Query the status of PXP support in i915. + * + * The query can fail in the following scenarios with the listed error codes: + * -ENODEV = PXP support is not available on the GPU device or in the + * kernel due to missing component drivers or kernel configs. + * + * If the IOCTL is successful, the returned parameter will be set to one of + * the following values: + * 1 = PXP feature is supported and is ready for use. + * 2 = PXP feature is supported but should be ready soon (pending + * initialization of non-i915 system dependencies). + * + * NOTE: When param is supported (positive return values), user space should + * still refer to the GEM PXP context-creation UAPI header specs to be + * aware of possible failure due to system state machine at the time. + */ +#define I915_PARAM_PXP_STATUS 58 + /* Must be kept compact -- no holes and well documented */ /** From 9e134ed720b6e69a5c857743daedd403101ca078 Mon Sep 17 00:00:00 2001 From: Alan Previn Date: Thu, 11 May 2023 16:17:37 -0700 Subject: [PATCH 089/108] drm/i915/pxp: On MTL, KCR enabling doesn't wait on tee component On legacy platforms, KCR HW enabling is done at the time the mei component interface is bound. It's also disabled during unbind. However, for MTL onwards, we don't depend on a tee component to start sending GSC-CS firmware messages. Thus, immediately enable (or disable) KCR HW on PXP's init, fini and resume. Signed-off-by: Alan Previn Reviewed-by: Daniele Ceraolo Spurio Signed-off-by: Radhakrishna Sripada Link: https://patchwork.freedesktop.org/patch/msgid/20230511231738.1077674-8-alan.previn.teres.alexis@intel.com --- drivers/gpu/drm/i915/pxp/intel_pxp_gsccs.c | 15 ++++++++++++++- drivers/gpu/drm/i915/pxp/intel_pxp_pm.c | 3 ++- 2 files changed, 16 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/pxp/intel_pxp_gsccs.c b/drivers/gpu/drm/i915/pxp/intel_pxp_gsccs.c index 4bc276daca16..8dc41de3f6f7 100644 --- a/drivers/gpu/drm/i915/pxp/intel_pxp_gsccs.c +++ b/drivers/gpu/drm/i915/pxp/intel_pxp_gsccs.c @@ -10,6 +10,7 @@ #include "gt/uc/intel_gsc_uc_heci_cmd_submit.h" #include "i915_drv.h" +#include "intel_pxp.h" #include "intel_pxp_cmd_interface_42.h" #include "intel_pxp_cmd_interface_43.h" #include "intel_pxp_gsccs.h" @@ -422,10 +423,22 @@ gsccs_allocate_execution_resource(struct intel_pxp *pxp) void intel_pxp_gsccs_fini(struct intel_pxp *pxp) { + intel_wakeref_t wakeref; + gsccs_destroy_execution_resource(pxp); + with_intel_runtime_pm(&pxp->ctrl_gt->i915->runtime_pm, wakeref) + intel_pxp_fini_hw(pxp); } int intel_pxp_gsccs_init(struct intel_pxp *pxp) { - return gsccs_allocate_execution_resource(pxp); + int ret; + intel_wakeref_t wakeref; + + ret = gsccs_allocate_execution_resource(pxp); + if (!ret) { + with_intel_runtime_pm(&pxp->ctrl_gt->i915->runtime_pm, wakeref) + intel_pxp_init_hw(pxp); + } + return ret; } diff --git a/drivers/gpu/drm/i915/pxp/intel_pxp_pm.c b/drivers/gpu/drm/i915/pxp/intel_pxp_pm.c index 4f836b317424..1a04067f61fc 100644 --- a/drivers/gpu/drm/i915/pxp/intel_pxp_pm.c +++ b/drivers/gpu/drm/i915/pxp/intel_pxp_pm.c @@ -43,8 +43,9 @@ void intel_pxp_resume_complete(struct intel_pxp *pxp) * The PXP component gets automatically unbound when we go into S3 and * re-bound after we come out, so in that scenario we can defer the * hw init to the bind call. + * NOTE: GSC-CS backend doesn't rely on components. */ - if (!pxp->pxp_component) + if (!HAS_ENGINE(pxp->ctrl_gt, GSC0) && !pxp->pxp_component) return; intel_pxp_init_hw(pxp); From 41e65d8790bd96d1caa3cb136a57ac45a131e66d Mon Sep 17 00:00:00 2001 From: Alan Previn Date: Thu, 11 May 2023 16:17:38 -0700 Subject: [PATCH 090/108] drm/i915/pxp: Enable PXP with MTL-GSC-CS Enable PXP with MTL-GSC-CS: add the has_pxp into device info and increase the debugfs teardown timeouts to align with new GSC-CS + firmware specs. Now that we have 3 places that are selecting pxp timeouts based on tee vs gsccs back-end, let's add a helper. Signed-off-by: Alan Previn Reviewed-by: Daniele Ceraolo Spurio Signed-off-by: Radhakrishna Sripada Link: https://patchwork.freedesktop.org/patch/msgid/20230511231738.1077674-9-alan.previn.teres.alexis@intel.com --- drivers/gpu/drm/i915/i915_pci.c | 1 + drivers/gpu/drm/i915/pxp/intel_pxp.c | 18 ++++++++++-------- drivers/gpu/drm/i915/pxp/intel_pxp.h | 1 + drivers/gpu/drm/i915/pxp/intel_pxp_debugfs.c | 6 +++++- drivers/gpu/drm/i915/pxp/intel_pxp_session.c | 2 +- 5 files changed, 18 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c index 1a3247451d0f..3e38283cbe8f 100644 --- a/drivers/gpu/drm/i915/i915_pci.c +++ b/drivers/gpu/drm/i915/i915_pci.c @@ -1209,6 +1209,7 @@ static const struct intel_device_info mtl_info = { .has_mslice_steering = 0, .has_snoop = 1, .max_pat_index = 4, + .has_pxp = 1, .__runtime.memory_regions = REGION_SMEM | REGION_STOLEN_LMEM, .__runtime.platform_engine_mask = BIT(RCS0) | BIT(BCS0) | BIT(CCS0), .require_force_probe = 1, diff --git a/drivers/gpu/drm/i915/pxp/intel_pxp.c b/drivers/gpu/drm/i915/pxp/intel_pxp.c index f143eadbc253..bb2e15329f34 100644 --- a/drivers/gpu/drm/i915/pxp/intel_pxp.c +++ b/drivers/gpu/drm/i915/pxp/intel_pxp.c @@ -289,6 +289,14 @@ static bool pxp_component_bound(struct intel_pxp *pxp) return bound; } +int intel_pxp_get_backend_timeout_ms(struct intel_pxp *pxp) +{ + if (HAS_ENGINE(pxp->ctrl_gt, GSC0)) + return GSCFW_MAX_ROUND_TRIP_LATENCY_MS; + else + return 250; +} + static int __pxp_global_teardown_final(struct intel_pxp *pxp) { int timeout; @@ -302,10 +310,7 @@ static int __pxp_global_teardown_final(struct intel_pxp *pxp) intel_pxp_mark_termination_in_progress(pxp); intel_pxp_terminate(pxp, false); - if (HAS_ENGINE(pxp->ctrl_gt, GSC0)) - timeout = GSCFW_MAX_ROUND_TRIP_LATENCY_MS; - else - timeout = 250; + timeout = intel_pxp_get_backend_timeout_ms(pxp); if (!wait_for_completion_timeout(&pxp->termination, msecs_to_jiffies(timeout))) return -ETIMEDOUT; @@ -325,10 +330,7 @@ static int __pxp_global_teardown_restart(struct intel_pxp *pxp) */ pxp_queue_termination(pxp); - if (HAS_ENGINE(pxp->ctrl_gt, GSC0)) - timeout = GSCFW_MAX_ROUND_TRIP_LATENCY_MS; - else - timeout = 250; + timeout = intel_pxp_get_backend_timeout_ms(pxp); if (!wait_for_completion_timeout(&pxp->termination, msecs_to_jiffies(timeout))) return -ETIMEDOUT; diff --git a/drivers/gpu/drm/i915/pxp/intel_pxp.h b/drivers/gpu/drm/i915/pxp/intel_pxp.h index 17e6dbc86b38..17254c3f1267 100644 --- a/drivers/gpu/drm/i915/pxp/intel_pxp.h +++ b/drivers/gpu/drm/i915/pxp/intel_pxp.h @@ -27,6 +27,7 @@ void intel_pxp_mark_termination_in_progress(struct intel_pxp *pxp); void intel_pxp_tee_end_arb_fw_session(struct intel_pxp *pxp, u32 arb_session_id); int intel_pxp_get_readiness_status(struct intel_pxp *pxp); +int intel_pxp_get_backend_timeout_ms(struct intel_pxp *pxp); int intel_pxp_start(struct intel_pxp *pxp); void intel_pxp_end(struct intel_pxp *pxp); diff --git a/drivers/gpu/drm/i915/pxp/intel_pxp_debugfs.c b/drivers/gpu/drm/i915/pxp/intel_pxp_debugfs.c index 4b8e70caa3ad..e07c5b380789 100644 --- a/drivers/gpu/drm/i915/pxp/intel_pxp_debugfs.c +++ b/drivers/gpu/drm/i915/pxp/intel_pxp_debugfs.c @@ -14,6 +14,7 @@ #include "intel_pxp.h" #include "intel_pxp_debugfs.h" +#include "intel_pxp_gsccs.h" #include "intel_pxp_irq.h" #include "intel_pxp_types.h" @@ -45,6 +46,7 @@ static int pxp_terminate_set(void *data, u64 val) { struct intel_pxp *pxp = data; struct intel_gt *gt = pxp->ctrl_gt; + int timeout_ms; if (!intel_pxp_is_active(pxp)) return -ENODEV; @@ -54,8 +56,10 @@ static int pxp_terminate_set(void *data, u64 val) intel_pxp_irq_handler(pxp, GEN12_DISPLAY_PXP_STATE_TERMINATED_INTERRUPT); spin_unlock_irq(gt->irq_lock); + timeout_ms = intel_pxp_get_backend_timeout_ms(pxp); + if (!wait_for_completion_timeout(&pxp->termination, - msecs_to_jiffies(100))) + msecs_to_jiffies(timeout_ms))) return -ETIMEDOUT; return 0; diff --git a/drivers/gpu/drm/i915/pxp/intel_pxp_session.c b/drivers/gpu/drm/i915/pxp/intel_pxp_session.c index e4d8242302c5..0a3e66b0265e 100644 --- a/drivers/gpu/drm/i915/pxp/intel_pxp_session.c +++ b/drivers/gpu/drm/i915/pxp/intel_pxp_session.c @@ -44,7 +44,7 @@ static int pxp_wait_for_session_state(struct intel_pxp *pxp, u32 id, bool in_pla KCR_SIP(pxp->kcr_base), mask, in_play ? mask : 0, - 100); + 250); intel_runtime_pm_put(uncore->rpm, wakeref); From db2ce1ab0508cd95efb4be938a146472c56c9461 Mon Sep 17 00:00:00 2001 From: John Harrison Date: Wed, 10 May 2023 13:55:56 -0700 Subject: [PATCH 091/108] drm/i1915/guc: Fix probe injection CI failures after recent change A recent change bumped a 'notice' message up to 'error' level for debug builds to help trap incorrect configurations in CI systems. Unfortunately, the error condition in question is triggered by the error injection probe test. So change the message again to be 'probe error' level instead. Signed-off-by: John Harrison Fixes: 760133d42f0a ("drm/i915/uc: Make unexpected firmware versions an error in debug builds") Cc: John Harrison Cc: Daniele Ceraolo Spurio Cc: Rodrigo Vivi Cc: Alan Previn Cc: Lucas De Marchi Cc: Jani Nikula Reviewed-by: Andi Shyti Link: https://patchwork.freedesktop.org/patch/msgid/20230510205556.312999-1-John.C.Harrison@Intel.com --- drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c index e34c79120090..dc5c96c503a9 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c @@ -18,7 +18,7 @@ #include "i915_reg.h" #if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM) -#define UNEXPECTED gt_err +#define UNEXPECTED gt_probe_error #else #define UNEXPECTED gt_notice #endif From 8ba3ba992fc2e456f4211ac4dc80dcb7775e722f Mon Sep 17 00:00:00 2001 From: John Harrison Date: Thu, 11 May 2023 18:35:44 -0700 Subject: [PATCH 092/108] drm/i915/guc: Fix confused register capture list creation The GuC has a completely separate engine class enum when referring to register capture lists, which combines render and compute. The driver was using the 'normal' GuC specific engine class enum instead. That meant that it thought it was defining a capture list for compute engines, the list was actually being applied to the GSC engine. And if a platform didn't have a render engine, then it would get no compute register captures at all. Fix that. Signed-off-by: John Harrison Reviewed-by: Alan Previn Link: https://patchwork.freedesktop.org/patch/msgid/20230512013544.3367606-1-John.C.Harrison@Intel.com --- drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c | 36 ++++++++++- .../gpu/drm/i915/gt/uc/intel_guc_capture.c | 61 +++++++++---------- drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h | 9 +++ 3 files changed, 72 insertions(+), 34 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c index 69ce06faf8cd..63724e17829a 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c @@ -643,6 +643,39 @@ static void guc_init_golden_context(struct intel_guc *guc) GEM_BUG_ON(guc->ads_golden_ctxt_size != total_size); } +static u32 guc_get_capture_engine_mask(struct iosys_map *info_map, u32 capture_class) +{ + u32 mask; + + switch (capture_class) { + case GUC_CAPTURE_LIST_CLASS_RENDER_COMPUTE: + mask = info_map_read(info_map, engine_enabled_masks[GUC_RENDER_CLASS]); + mask |= info_map_read(info_map, engine_enabled_masks[GUC_COMPUTE_CLASS]); + break; + + case GUC_CAPTURE_LIST_CLASS_VIDEO: + mask = info_map_read(info_map, engine_enabled_masks[GUC_VIDEO_CLASS]); + break; + + case GUC_CAPTURE_LIST_CLASS_VIDEOENHANCE: + mask = info_map_read(info_map, engine_enabled_masks[GUC_VIDEOENHANCE_CLASS]); + break; + + case GUC_CAPTURE_LIST_CLASS_BLITTER: + mask = info_map_read(info_map, engine_enabled_masks[GUC_BLITTER_CLASS]); + break; + + case GUC_CAPTURE_LIST_CLASS_GSC_OTHER: + mask = info_map_read(info_map, engine_enabled_masks[GUC_GSC_OTHER_CLASS]); + break; + + default: + mask = 0; + } + + return mask; +} + static int guc_capture_prep_lists(struct intel_guc *guc) { @@ -678,9 +711,10 @@ guc_capture_prep_lists(struct intel_guc *guc) for (i = 0; i < GUC_CAPTURE_LIST_INDEX_MAX; i++) { for (j = 0; j < GUC_MAX_ENGINE_CLASSES; j++) { + u32 engine_mask = guc_get_capture_engine_mask(&info_map, j); /* null list if we dont have said engine or list */ - if (!info_map_read(&info_map, engine_enabled_masks[j])) { + if (!engine_mask) { if (ads_is_mapped) { ads_blob_write(guc, ads.capture_class[i][j], null_ggtt); ads_blob_write(guc, ads.capture_instance[i][j], null_ggtt); diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_capture.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_capture.c index 1def0b6467c7..0ff864da92df 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_capture.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_capture.c @@ -174,35 +174,31 @@ static const struct __guc_mmio_reg_descr empty_regs_list[] = { /* List of lists */ static const struct __guc_mmio_reg_descr_group gen8_lists[] = { MAKE_REGLIST(gen8_global_regs, PF, GLOBAL, 0), - MAKE_REGLIST(gen8_rc_class_regs, PF, ENGINE_CLASS, GUC_RENDER_CLASS), - MAKE_REGLIST(gen8_rc_inst_regs, PF, ENGINE_INSTANCE, GUC_RENDER_CLASS), - MAKE_REGLIST(gen8_rc_class_regs, PF, ENGINE_CLASS, GUC_COMPUTE_CLASS), - MAKE_REGLIST(gen8_rc_inst_regs, PF, ENGINE_INSTANCE, GUC_COMPUTE_CLASS), - MAKE_REGLIST(empty_regs_list, PF, ENGINE_CLASS, GUC_VIDEO_CLASS), - MAKE_REGLIST(gen8_vd_inst_regs, PF, ENGINE_INSTANCE, GUC_VIDEO_CLASS), - MAKE_REGLIST(empty_regs_list, PF, ENGINE_CLASS, GUC_VIDEOENHANCE_CLASS), - MAKE_REGLIST(gen8_vec_inst_regs, PF, ENGINE_INSTANCE, GUC_VIDEOENHANCE_CLASS), - MAKE_REGLIST(empty_regs_list, PF, ENGINE_CLASS, GUC_BLITTER_CLASS), - MAKE_REGLIST(gen8_blt_inst_regs, PF, ENGINE_INSTANCE, GUC_BLITTER_CLASS), - MAKE_REGLIST(empty_regs_list, PF, ENGINE_CLASS, GUC_GSC_OTHER_CLASS), - MAKE_REGLIST(empty_regs_list, PF, ENGINE_INSTANCE, GUC_GSC_OTHER_CLASS), + MAKE_REGLIST(gen8_rc_class_regs, PF, ENGINE_CLASS, GUC_CAPTURE_LIST_CLASS_RENDER_COMPUTE), + MAKE_REGLIST(gen8_rc_inst_regs, PF, ENGINE_INSTANCE, GUC_CAPTURE_LIST_CLASS_RENDER_COMPUTE), + MAKE_REGLIST(empty_regs_list, PF, ENGINE_CLASS, GUC_CAPTURE_LIST_CLASS_VIDEO), + MAKE_REGLIST(gen8_vd_inst_regs, PF, ENGINE_INSTANCE, GUC_CAPTURE_LIST_CLASS_VIDEO), + MAKE_REGLIST(empty_regs_list, PF, ENGINE_CLASS, GUC_CAPTURE_LIST_CLASS_VIDEOENHANCE), + MAKE_REGLIST(gen8_vec_inst_regs, PF, ENGINE_INSTANCE, GUC_CAPTURE_LIST_CLASS_VIDEOENHANCE), + MAKE_REGLIST(empty_regs_list, PF, ENGINE_CLASS, GUC_CAPTURE_LIST_CLASS_BLITTER), + MAKE_REGLIST(gen8_blt_inst_regs, PF, ENGINE_INSTANCE, GUC_CAPTURE_LIST_CLASS_BLITTER), + MAKE_REGLIST(empty_regs_list, PF, ENGINE_CLASS, GUC_CAPTURE_LIST_CLASS_GSC_OTHER), + MAKE_REGLIST(empty_regs_list, PF, ENGINE_INSTANCE, GUC_CAPTURE_LIST_CLASS_GSC_OTHER), {} }; static const struct __guc_mmio_reg_descr_group xe_lp_lists[] = { MAKE_REGLIST(xe_lp_global_regs, PF, GLOBAL, 0), - MAKE_REGLIST(xe_lp_rc_class_regs, PF, ENGINE_CLASS, GUC_RENDER_CLASS), - MAKE_REGLIST(gen8_rc_inst_regs, PF, ENGINE_INSTANCE, GUC_RENDER_CLASS), - MAKE_REGLIST(xe_lp_rc_class_regs, PF, ENGINE_CLASS, GUC_COMPUTE_CLASS), - MAKE_REGLIST(gen8_rc_inst_regs, PF, ENGINE_INSTANCE, GUC_COMPUTE_CLASS), - MAKE_REGLIST(empty_regs_list, PF, ENGINE_CLASS, GUC_VIDEO_CLASS), - MAKE_REGLIST(gen8_vd_inst_regs, PF, ENGINE_INSTANCE, GUC_VIDEO_CLASS), - MAKE_REGLIST(xe_lp_vec_class_regs, PF, ENGINE_CLASS, GUC_VIDEOENHANCE_CLASS), - MAKE_REGLIST(gen8_vec_inst_regs, PF, ENGINE_INSTANCE, GUC_VIDEOENHANCE_CLASS), - MAKE_REGLIST(empty_regs_list, PF, ENGINE_CLASS, GUC_BLITTER_CLASS), - MAKE_REGLIST(gen8_blt_inst_regs, PF, ENGINE_INSTANCE, GUC_BLITTER_CLASS), - MAKE_REGLIST(empty_regs_list, PF, ENGINE_CLASS, GUC_GSC_OTHER_CLASS), - MAKE_REGLIST(xe_lp_gsc_inst_regs, PF, ENGINE_INSTANCE, GUC_GSC_OTHER_CLASS), + MAKE_REGLIST(xe_lp_rc_class_regs, PF, ENGINE_CLASS, GUC_CAPTURE_LIST_CLASS_RENDER_COMPUTE), + MAKE_REGLIST(gen8_rc_inst_regs, PF, ENGINE_INSTANCE, GUC_CAPTURE_LIST_CLASS_RENDER_COMPUTE), + MAKE_REGLIST(empty_regs_list, PF, ENGINE_CLASS, GUC_CAPTURE_LIST_CLASS_VIDEO), + MAKE_REGLIST(gen8_vd_inst_regs, PF, ENGINE_INSTANCE, GUC_CAPTURE_LIST_CLASS_VIDEO), + MAKE_REGLIST(xe_lp_vec_class_regs, PF, ENGINE_CLASS, GUC_CAPTURE_LIST_CLASS_VIDEOENHANCE), + MAKE_REGLIST(gen8_vec_inst_regs, PF, ENGINE_INSTANCE, GUC_CAPTURE_LIST_CLASS_VIDEOENHANCE), + MAKE_REGLIST(empty_regs_list, PF, ENGINE_CLASS, GUC_CAPTURE_LIST_CLASS_BLITTER), + MAKE_REGLIST(gen8_blt_inst_regs, PF, ENGINE_INSTANCE, GUC_CAPTURE_LIST_CLASS_BLITTER), + MAKE_REGLIST(empty_regs_list, PF, ENGINE_CLASS, GUC_CAPTURE_LIST_CLASS_GSC_OTHER), + MAKE_REGLIST(xe_lp_gsc_inst_regs, PF, ENGINE_INSTANCE, GUC_CAPTURE_LIST_CLASS_GSC_OTHER), {} }; @@ -310,7 +306,8 @@ guc_capture_alloc_steered_lists(struct intel_guc *guc, /* steered registers currently only exist for the render-class */ list = guc_capture_get_one_list(lists, GUC_CAPTURE_LIST_INDEX_PF, - GUC_CAPTURE_LIST_TYPE_ENGINE_CLASS, GUC_RENDER_CLASS); + GUC_CAPTURE_LIST_TYPE_ENGINE_CLASS, + GUC_CAPTURE_LIST_CLASS_RENDER_COMPUTE); /* skip if extlists was previously allocated */ if (!list || guc->capture->extlists) return; @@ -400,17 +397,15 @@ static const char * __stringify_engclass(u32 class) { switch (class) { - case GUC_RENDER_CLASS: - return "Render"; - case GUC_VIDEO_CLASS: + case GUC_CAPTURE_LIST_CLASS_RENDER_COMPUTE: + return "Render/Compute"; + case GUC_CAPTURE_LIST_CLASS_VIDEO: return "Video"; - case GUC_VIDEOENHANCE_CLASS: + case GUC_CAPTURE_LIST_CLASS_VIDEOENHANCE: return "VideoEnhance"; - case GUC_BLITTER_CLASS: + case GUC_CAPTURE_LIST_CLASS_BLITTER: return "Blitter"; - case GUC_COMPUTE_CLASS: - return "Compute"; - case GUC_GSC_OTHER_CLASS: + case GUC_CAPTURE_LIST_CLASS_GSC_OTHER: return "GSC-Other"; default: break; diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h index 8bb9ed7c7b4d..4e57bd09d50d 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h @@ -411,6 +411,15 @@ enum guc_capture_type { GUC_CAPTURE_LIST_TYPE_MAX, }; +/* Class indecies for capture_class and capture_instance arrays */ +enum { + GUC_CAPTURE_LIST_CLASS_RENDER_COMPUTE = 0, + GUC_CAPTURE_LIST_CLASS_VIDEO = 1, + GUC_CAPTURE_LIST_CLASS_VIDEOENHANCE = 2, + GUC_CAPTURE_LIST_CLASS_BLITTER = 3, + GUC_CAPTURE_LIST_CLASS_GSC_OTHER = 4, +}; + /* GuC Additional Data Struct */ struct guc_ads { struct guc_mmio_reg_set reg_state_list[GUC_MAX_ENGINE_CLASSES][GUC_MAX_INSTANCES_PER_CLASS]; From 621b6783c73100067c844c3be0b254c2f350e8ec Mon Sep 17 00:00:00 2001 From: Ashutosh Dixit Date: Fri, 12 May 2023 13:37:35 -0700 Subject: [PATCH 093/108] drm/i915/hwmon: Silence UBSAN uninitialized bool variable warning Loading i915 on UBSAN enabled kernels (CONFIG_UBSAN/CONFIG_UBSAN_BOOL) causes the following warning: UBSAN: invalid-load in drivers/gpu/drm/i915/gt/uc/intel_uc.c:558:2 load of value 255 is not a valid value for type '_Bool' Call Trace: dump_stack_lvl+0x57/0x7d ubsan_epilogue+0x5/0x40 __ubsan_handle_load_invalid_value.cold+0x43/0x48 __uc_init_hw+0x76a/0x903 [i915] ... i915_driver_probe+0xfb1/0x1eb0 [i915] i915_pci_probe+0xbe/0x2d0 [i915] The warning happens because during probe i915_hwmon is still not available which results in the output boolean variable *old remaining uninitialized. Silence the warning by initializing the variable to an arbitrary value. v2: Move variable initialization to the declaration (Andi) Signed-off-by: Ashutosh Dixit Reviewed-by: Andi Shyti Reviewed-by: Andrzej Hajda Signed-off-by: Andi Shyti Link: https://patchwork.freedesktop.org/patch/msgid/20230512203735.2635237-1-ashutosh.dixit@intel.com --- drivers/gpu/drm/i915/gt/uc/intel_uc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc.c b/drivers/gpu/drm/i915/gt/uc/intel_uc.c index 1381943b8973..c8b9cbb7ba3a 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_uc.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_uc.c @@ -465,7 +465,7 @@ static int __uc_init_hw(struct intel_uc *uc) struct intel_guc *guc = &uc->guc; struct intel_huc *huc = &uc->huc; int ret, attempts; - bool pl1en; + bool pl1en = false; GEM_BUG_ON(!intel_uc_supports_guc(uc)); GEM_BUG_ON(!intel_uc_wants_guc(uc)); From 6197cff30df44e4db85fed545fecb7df00ff8cd0 Mon Sep 17 00:00:00 2001 From: John Harrison Date: Tue, 18 Apr 2023 11:17:43 -0700 Subject: [PATCH 094/108] drm/i915: Dump error capture to kernel log This is useful for getting debug information out in certain situations, such as failing kernel selftests and CI runs that don't log error captures. It is especially useful for things like retrieving GuC logs as GuC operation can't be tracked by adding printk or ftrace entries. v2: Add CONFIG_DRM_I915_DEBUG_GEM wrapper (review feedback by Rodrigo). Signed-off-by: John Harrison Reviewed-by: Vinay Belgaumkar Link: https://patchwork.freedesktop.org/patch/msgid/20230418181744.3251240-2-John.C.Harrison@Intel.com --- drivers/gpu/drm/i915/i915_gpu_error.c | 132 ++++++++++++++++++++++++++ drivers/gpu/drm/i915/i915_gpu_error.h | 10 ++ 2 files changed, 142 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c index 566536499954..ec368e700235 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.c +++ b/drivers/gpu/drm/i915/i915_gpu_error.c @@ -2228,3 +2228,135 @@ void i915_disable_error_state(struct drm_i915_private *i915, int err) i915->gpu_error.first_error = ERR_PTR(err); spin_unlock_irq(&i915->gpu_error.lock); } + +#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM) +void intel_klog_error_capture(struct intel_gt *gt, + intel_engine_mask_t engine_mask) +{ + static int g_count; + struct drm_i915_private *i915 = gt->i915; + struct i915_gpu_coredump *error; + intel_wakeref_t wakeref; + size_t buf_size = PAGE_SIZE * 128; + size_t pos_err; + char *buf, *ptr, *next; + int l_count = g_count++; + int line = 0; + + /* Can't allocate memory during a reset */ + if (test_bit(I915_RESET_BACKOFF, >->reset.flags)) { + drm_err(>->i915->drm, "[Capture/%d.%d] Inside GT reset, skipping error capture :(\n", + l_count, line++); + return; + } + + error = READ_ONCE(i915->gpu_error.first_error); + if (error) { + drm_err(&i915->drm, "[Capture/%d.%d] Clearing existing error capture first...\n", + l_count, line++); + i915_reset_error_state(i915); + } + + with_intel_runtime_pm(&i915->runtime_pm, wakeref) + error = i915_gpu_coredump(gt, engine_mask, CORE_DUMP_FLAG_NONE); + + if (IS_ERR(error)) { + drm_err(&i915->drm, "[Capture/%d.%d] Failed to capture error capture: %ld!\n", + l_count, line++, PTR_ERR(error)); + return; + } + + buf = kvmalloc(buf_size, GFP_KERNEL); + if (!buf) { + drm_err(&i915->drm, "[Capture/%d.%d] Failed to allocate buffer for error capture!\n", + l_count, line++); + i915_gpu_coredump_put(error); + return; + } + + drm_info(&i915->drm, "[Capture/%d.%d] Dumping i915 error capture for %ps...\n", + l_count, line++, __builtin_return_address(0)); + + /* Largest string length safe to print via dmesg */ +# define MAX_CHUNK 800 + + pos_err = 0; + while (1) { + ssize_t got = i915_gpu_coredump_copy_to_buffer(error, buf, pos_err, buf_size - 1); + + if (got <= 0) + break; + + buf[got] = 0; + pos_err += got; + + ptr = buf; + while (got > 0) { + size_t count; + char tag[2]; + + next = strnchr(ptr, got, '\n'); + if (next) { + count = next - ptr; + *next = 0; + tag[0] = '>'; + tag[1] = '<'; + } else { + count = got; + tag[0] = '}'; + tag[1] = '{'; + } + + if (count > MAX_CHUNK) { + size_t pos; + char *ptr2 = ptr; + + for (pos = MAX_CHUNK; pos < count; pos += MAX_CHUNK) { + char chr = ptr[pos]; + + ptr[pos] = 0; + drm_info(&i915->drm, "[Capture/%d.%d] }%s{\n", + l_count, line++, ptr2); + ptr[pos] = chr; + ptr2 = ptr + pos; + + /* + * If spewing large amounts of data via a serial console, + * this can be a very slow process. So be friendly and try + * not to cause 'softlockup on CPU' problems. + */ + cond_resched(); + } + + if (ptr2 < (ptr + count)) + drm_info(&i915->drm, "[Capture/%d.%d] %c%s%c\n", + l_count, line++, tag[0], ptr2, tag[1]); + else if (tag[0] == '>') + drm_info(&i915->drm, "[Capture/%d.%d] ><\n", + l_count, line++); + } else { + drm_info(&i915->drm, "[Capture/%d.%d] %c%s%c\n", + l_count, line++, tag[0], ptr, tag[1]); + } + + ptr = next; + got -= count; + if (next) { + ptr++; + got--; + } + + /* As above. */ + cond_resched(); + } + + if (got) + drm_info(&i915->drm, "[Capture/%d.%d] Got %zd bytes remaining!\n", + l_count, line++, got); + } + + kvfree(buf); + + drm_info(&i915->drm, "[Capture/%d.%d] Dumped %zd bytes\n", l_count, line++, pos_err); +} +#endif diff --git a/drivers/gpu/drm/i915/i915_gpu_error.h b/drivers/gpu/drm/i915/i915_gpu_error.h index a91932cc6531..a78c061ce26f 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.h +++ b/drivers/gpu/drm/i915/i915_gpu_error.h @@ -258,6 +258,16 @@ static inline u32 i915_reset_engine_count(struct i915_gpu_error *error, #define CORE_DUMP_FLAG_NONE 0x0 #define CORE_DUMP_FLAG_IS_GUC_CAPTURE BIT(0) +#if IS_ENABLED(CONFIG_DRM_I915_CAPTURE_ERROR) && IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM) +void intel_klog_error_capture(struct intel_gt *gt, + intel_engine_mask_t engine_mask); +#else +static inline void intel_klog_error_capture(struct intel_gt *gt, + intel_engine_mask_t engine_mask) +{ +} +#endif + #if IS_ENABLED(CONFIG_DRM_I915_CAPTURE_ERROR) __printf(2, 3) From f6eeea8d7097a82d1460537146dee670d5014f13 Mon Sep 17 00:00:00 2001 From: John Harrison Date: Tue, 18 Apr 2023 11:17:44 -0700 Subject: [PATCH 095/108] drm/i915/guc: Dump error capture to dmesg on CTB error In the past, There have been sporadic CTB failures which proved hard to reproduce manually. The most effective solution was to dump the GuC log at the point of failure and let the CI system do the repro. It is preferable not to dump the GuC log via dmesg for all issues as it is not always necessary and is not helpful for end users. But rather than trying to re-invent the code to do this each time it is wanted, commit the code but for DEBUG_GUC builds only. v2: Use IS_ENABLED for testing config options. Signed-off-by: John Harrison Reviewed-by: Vinay Belgaumkar Link: https://patchwork.freedesktop.org/patch/msgid/20230418181744.3251240-3-John.C.Harrison@Intel.com --- drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c | 53 +++++++++++++++++++++++ drivers/gpu/drm/i915/gt/uc/intel_guc_ct.h | 6 +++ 2 files changed, 59 insertions(+) diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c index 99a0a89091e7..a22e33f37cae 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c @@ -13,6 +13,30 @@ #include "intel_guc_ct.h" #include "intel_guc_print.h" +#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GUC) +enum { + CT_DEAD_ALIVE = 0, + CT_DEAD_SETUP, + CT_DEAD_WRITE, + CT_DEAD_DEADLOCK, + CT_DEAD_H2G_HAS_ROOM, + CT_DEAD_READ, + CT_DEAD_PROCESS_FAILED, +}; + +static void ct_dead_ct_worker_func(struct work_struct *w); + +#define CT_DEAD(ct, reason) \ + do { \ + if (!(ct)->dead_ct_reported) { \ + (ct)->dead_ct_reason |= 1 << CT_DEAD_##reason; \ + queue_work(system_unbound_wq, &(ct)->dead_ct_worker); \ + } \ + } while (0) +#else +#define CT_DEAD(ct, reason) do { } while (0) +#endif + static inline struct intel_guc *ct_to_guc(struct intel_guc_ct *ct) { return container_of(ct, struct intel_guc, ct); @@ -93,6 +117,9 @@ void intel_guc_ct_init_early(struct intel_guc_ct *ct) spin_lock_init(&ct->requests.lock); INIT_LIST_HEAD(&ct->requests.pending); INIT_LIST_HEAD(&ct->requests.incoming); +#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GUC) + INIT_WORK(&ct->dead_ct_worker, ct_dead_ct_worker_func); +#endif INIT_WORK(&ct->requests.worker, ct_incoming_request_worker_func); tasklet_setup(&ct->receive_tasklet, ct_receive_tasklet_func); init_waitqueue_head(&ct->wq); @@ -319,11 +346,16 @@ int intel_guc_ct_enable(struct intel_guc_ct *ct) ct->enabled = true; ct->stall_time = KTIME_MAX; +#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GUC) + ct->dead_ct_reported = false; + ct->dead_ct_reason = CT_DEAD_ALIVE; +#endif return 0; err_out: CT_PROBE_ERROR(ct, "Failed to enable CTB (%pe)\n", ERR_PTR(err)); + CT_DEAD(ct, SETUP); return err; } @@ -434,6 +466,7 @@ static int ct_write(struct intel_guc_ct *ct, corrupted: CT_ERROR(ct, "Corrupted descriptor head=%u tail=%u status=%#x\n", desc->head, desc->tail, desc->status); + CT_DEAD(ct, WRITE); ctb->broken = true; return -EPIPE; } @@ -504,6 +537,7 @@ static inline bool ct_deadlocked(struct intel_guc_ct *ct) CT_ERROR(ct, "Head: %u\n (Dwords)", ct->ctbs.recv.desc->head); CT_ERROR(ct, "Tail: %u\n (Dwords)", ct->ctbs.recv.desc->tail); + CT_DEAD(ct, DEADLOCK); ct->ctbs.send.broken = true; } @@ -552,6 +586,7 @@ static inline bool h2g_has_room(struct intel_guc_ct *ct, u32 len_dw) head, ctb->size); desc->status |= GUC_CTB_STATUS_OVERFLOW; ctb->broken = true; + CT_DEAD(ct, H2G_HAS_ROOM); return false; } @@ -914,6 +949,7 @@ static int ct_read(struct intel_guc_ct *ct, struct ct_incoming_msg **msg) CT_ERROR(ct, "Corrupted descriptor head=%u tail=%u status=%#x\n", desc->head, desc->tail, desc->status); ctb->broken = true; + CT_DEAD(ct, READ); return -EPIPE; } @@ -1063,6 +1099,7 @@ static bool ct_process_incoming_requests(struct intel_guc_ct *ct) if (unlikely(err)) { CT_ERROR(ct, "Failed to process CT message (%pe) %*ph\n", ERR_PTR(err), 4 * request->size, request->msg); + CT_DEAD(ct, PROCESS_FAILED); ct_free_msg(request); } @@ -1239,3 +1276,19 @@ void intel_guc_ct_print_info(struct intel_guc_ct *ct, drm_printf(p, "Tail: %u\n", ct->ctbs.recv.desc->tail); } + +#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GUC) +static void ct_dead_ct_worker_func(struct work_struct *w) +{ + struct intel_guc_ct *ct = container_of(w, struct intel_guc_ct, dead_ct_worker); + struct intel_guc *guc = ct_to_guc(ct); + + if (ct->dead_ct_reported) + return; + + ct->dead_ct_reported = true; + + guc_info(guc, "CTB is dead - reason=0x%X\n", ct->dead_ct_reason); + intel_klog_error_capture(guc_to_gt(guc), (intel_engine_mask_t)~0U); +} +#endif diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.h index f709a19c7e21..818415b64f4d 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.h @@ -85,6 +85,12 @@ struct intel_guc_ct { /** @stall_time: time of first time a CTB submission is stalled */ ktime_t stall_time; + +#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GUC) + int dead_ct_reason; + bool dead_ct_reported; + struct work_struct dead_ct_worker; +#endif }; void intel_guc_ct_init_early(struct intel_guc_ct *ct); From 6f22587c915c34a4ee02c314cfdb708b11b5eafb Mon Sep 17 00:00:00 2001 From: Vinay Belgaumkar Date: Tue, 16 May 2023 08:49:05 -0700 Subject: [PATCH 096/108] drm/i915/guc/slpc: Disable rps_boost debugfs rps_boost debugfs shows host turbo related info. This is not valid when SLPC is enabled. guc_slpc_info already shows the number of boosts. Add num_waiters there as well and disable rps_boost when SLPC is enabled. v2: Replace Bug with Link to resolve checkpatch warning Link: https://gitlab.freedesktop.org/drm/intel/-/issues/7632 Reviewed-by: Ashutosh Dixit Signed-off-by: Vinay Belgaumkar Signed-off-by: John Harrison Link: https://patchwork.freedesktop.org/patch/msgid/20230516154905.1048006-1-vinay.belgaumkar@intel.com --- drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c | 5 ++++- drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c | 2 ++ 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c b/drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c index 80dbbef86b1d..357e2f865727 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c @@ -539,7 +539,10 @@ static bool rps_eval(void *data) { struct intel_gt *gt = data; - return HAS_RPS(gt->i915); + if (intel_guc_slpc_is_used(>->uc.guc)) + return false; + else + return HAS_RPS(gt->i915); } DEFINE_INTEL_GT_DEBUGFS_ATTRIBUTE(rps_boost); diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c index 56dbba1ef668..01b75529311c 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c @@ -837,6 +837,8 @@ int intel_guc_slpc_print_info(struct intel_guc_slpc *slpc, struct drm_printer *p slpc_decode_min_freq(slpc)); drm_printf(p, "\twaitboosts: %u\n", slpc->num_boosts); + drm_printf(p, "\tBoosts outstanding: %u\n", + atomic_read(&slpc->num_waiters)); } } From 25e7976db86bed5f1826bdd5c59e5be424a9d91f Mon Sep 17 00:00:00 2001 From: Andrzej Hajda Date: Thu, 18 May 2023 16:50:52 +0200 Subject: [PATCH 097/108] drm/i915/mtl: do not enable render power-gating on MTL Multiple CI tests fails with forcewake ack timeouts if render power gating is enabled. BSpec 52698 states it should be 0 for MTL, but apparently this info is outdated. Anyway since the patch makes MTL pass basic tests added FIXME tag informing this is temporary workaround. v2: added FIXME tag Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/4983 Signed-off-by: Andrzej Hajda Reviewed-by: Nirmoy Das Reviewed-by: Rodrigo Vivi Reviewed-by: Andi Shyti Link: https://patchwork.freedesktop.org/patch/msgid/20230517-mtl_disable_render_pg-v2-1-0b51180a43f0@intel.com --- drivers/gpu/drm/i915/gt/intel_rc6.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_rc6.c b/drivers/gpu/drm/i915/gt/intel_rc6.c index 908a3d0f2343..58bb1c55294c 100644 --- a/drivers/gpu/drm/i915/gt/intel_rc6.c +++ b/drivers/gpu/drm/i915/gt/intel_rc6.c @@ -117,8 +117,14 @@ static void gen11_rc6_enable(struct intel_rc6 *rc6) GEN6_RC_CTL_RC6_ENABLE | GEN6_RC_CTL_EI_MODE(1); - /* Wa_16011777198 - Render powergating must remain disabled */ - if (IS_DG2_GRAPHICS_STEP(gt->i915, G10, STEP_A0, STEP_C0) || + /* + * Wa_16011777198 and BSpec 52698 - Render powergating must be off. + * FIXME BSpec is outdated, disabling powergating for MTL is just + * temporary wa and should be removed after fixing real cause + * of forcewake timeouts. + */ + if (IS_METEORLAKE(gt->i915) || + IS_DG2_GRAPHICS_STEP(gt->i915, G10, STEP_A0, STEP_C0) || IS_DG2_GRAPHICS_STEP(gt->i915, G11, STEP_A0, STEP_B0)) pg_enable = GEN9_MEDIA_PG_ENABLE | From 5f12c8d0a761af5dda2e798b1af56ff967442b83 Mon Sep 17 00:00:00 2001 From: Radhakrishna Sripada Date: Wed, 17 May 2023 16:31:10 -0700 Subject: [PATCH 098/108] drm/i915/mtl: Add MTL performance tuning changes MTL reuses the tuning parameters for DG2. Extend the dg2 performance tuning parameters to MTL. v2: Add DRAW_WATERMARK tuning parameter. v3: Limit DRAW_WATERMARK tuning to non A0 step. v4: Reorder platform checks. Restrict Blend fill caching optimization to Render GT. v5: Move mtl tuning params to its own function Bspec: 68331 Cc: Haridhar Kalvala Cc: Matt Roper Reviewed-by: Gustavo Sousa Signed-off-by: Radhakrishna Sripada Link: https://patchwork.freedesktop.org/patch/msgid/20230517233111.297542-1-radhakrishna.sripada@intel.com --- drivers/gpu/drm/i915/gt/intel_workarounds.c | 23 ++++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c index 786349e95487..4d2dece96011 100644 --- a/drivers/gpu/drm/i915/gt/intel_workarounds.c +++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c @@ -812,11 +812,25 @@ static void dg2_ctx_workarounds_init(struct intel_engine_cs *engine, wa_masked_en(wal, CACHE_MODE_1, MSAA_OPTIMIZATION_REDUC_DISABLE); } +static void mtl_ctx_gt_tuning_init(struct intel_engine_cs *engine, + struct i915_wa_list *wal) +{ + struct drm_i915_private *i915 = engine->i915; + + dg2_ctx_gt_tuning_init(engine, wal); + + if (IS_MTL_GRAPHICS_STEP(i915, M, STEP_B0, STEP_FOREVER) || + IS_MTL_GRAPHICS_STEP(i915, P, STEP_B0, STEP_FOREVER)) + wa_add(wal, DRAW_WATERMARK, VERT_WM_VAL, 0x3FF, 0, false); +} + static void mtl_ctx_workarounds_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) { struct drm_i915_private *i915 = engine->i915; + mtl_ctx_gt_tuning_init(engine, wal); + if (IS_MTL_GRAPHICS_STEP(i915, M, STEP_A0, STEP_B0) || IS_MTL_GRAPHICS_STEP(i915, P, STEP_A0, STEP_B0)) { /* Wa_14014947963 */ @@ -1748,6 +1762,13 @@ xelpmp_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal) */ static void gt_tuning_settings(struct intel_gt *gt, struct i915_wa_list *wal) { + if (IS_METEORLAKE(gt->i915)) { + if (gt->type != GT_MEDIA) + wa_mcr_write_or(wal, XEHP_L3SCQREG7, BLEND_FILL_CACHING_OPT_DIS); + + wa_mcr_write_or(wal, XEHP_SQCM, EN_32B_ACCESS); + } + if (IS_PONTEVECCHIO(gt->i915)) { wa_mcr_write(wal, XEHPC_L3SCRUB, SCRUB_CL_DWNGRADE_SHARED | SCRUB_RATE_4B_PER_CLK); @@ -2944,7 +2965,7 @@ static void add_render_compute_tuning_settings(struct drm_i915_private *i915, struct i915_wa_list *wal) { - if (IS_DG2(i915)) + if (IS_METEORLAKE(i915) || IS_DG2(i915)) wa_mcr_write_clr_set(wal, RT_CTRL, STACKID_CTRL, STACKID_CTRL_512); /* From 1a365a2b24cda48ff8d441e91663a6c0ab1353a9 Mon Sep 17 00:00:00 2001 From: Radhakrishna Sripada Date: Wed, 17 May 2023 16:31:11 -0700 Subject: [PATCH 099/108] drm/i915/mtl: Extend Wa_16014892111 to MTL A-step Like DG2, MTL a-step hardware is subject to Wa_16014892111 which requires that any changes made to the DRAW_WATERMARK register be done via an INDIRECT_CTX batch buffer rather than through a regular context workaround. The bspec gives the same non-default recommended tuning value for DRAW_WATERMARK as DG2, so we can re-use the INDIRECT_CTX code to apply that tuning setting on A-step hardware. Application of the tuning setting on B-step and later does not need INDIRECT_CTX handling and is already done in mtl_ctx_workarounds_init() as usual. v2: Limit the WA for A-step v3: Update the commit message. v4: Reorder platform checks and update commit message. Bspec: 68331 Cc: Haridhar Kalvala Cc: Gustavo Sousa Reviewed-by: Matt Roper Signed-off-by: Radhakrishna Sripada Link: https://patchwork.freedesktop.org/patch/msgid/20230517233111.297542-2-radhakrishna.sripada@intel.com --- drivers/gpu/drm/i915/gt/intel_lrc.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c index 81a96c52a92b..a4ec20aaafe2 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.c +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c @@ -1370,7 +1370,9 @@ gen12_emit_indirect_ctx_rcs(const struct intel_context *ce, u32 *cs) cs, GEN12_GFX_CCS_AUX_NV); /* Wa_16014892111 */ - if (IS_DG2(ce->engine->i915)) + if (IS_MTL_GRAPHICS_STEP(ce->engine->i915, M, STEP_A0, STEP_B0) || + IS_MTL_GRAPHICS_STEP(ce->engine->i915, P, STEP_A0, STEP_B0) || + IS_DG2(ce->engine->i915)) cs = dg2_emit_draw_watermark_setting(cs); return cs; From 25553494b275a1a4cf06e4a7aa4073817cb2b846 Mon Sep 17 00:00:00 2001 From: Cong Liu Date: Wed, 17 May 2023 13:02:03 +0800 Subject: [PATCH 100/108] drm/i915: Fix memory leaks in function live_nop_switch Be sure to properly free the allocated memory before exiting the live_nop_switch function. Signed-off-by: Cong Liu Suggested-by: Rodrigo Vivi Signed-off-by: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/20230517050204.4111874-1-liucong2@kylinos.cn --- .../gpu/drm/i915/gem/selftests/i915_gem_context.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c index 2697fbaa2ceb..ad6a3b2fb387 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c @@ -66,7 +66,7 @@ static int live_nop_switch(void *arg) ctx[n] = live_context(i915, file); if (IS_ERR(ctx[n])) { err = PTR_ERR(ctx[n]); - goto out_file; + goto out_ctx; } } @@ -82,7 +82,7 @@ static int live_nop_switch(void *arg) this = igt_request_alloc(ctx[n], engine); if (IS_ERR(this)) { err = PTR_ERR(this); - goto out_file; + goto out_ctx; } if (rq) { i915_request_await_dma_fence(this, &rq->fence); @@ -96,7 +96,7 @@ static int live_nop_switch(void *arg) intel_gt_set_wedged(engine->gt); i915_request_put(rq); err = -EIO; - goto out_file; + goto out_ctx; } i915_request_put(rq); @@ -107,7 +107,7 @@ static int live_nop_switch(void *arg) err = igt_live_test_begin(&t, i915, __func__, engine->name); if (err) - goto out_file; + goto out_ctx; end_time = jiffies + i915_selftest.timeout_jiffies; for_each_prime_number_from(prime, 2, 8192) { @@ -120,7 +120,7 @@ static int live_nop_switch(void *arg) this = igt_request_alloc(ctx[n % nctx], engine); if (IS_ERR(this)) { err = PTR_ERR(this); - goto out_file; + goto out_ctx; } if (rq) { /* Force submission order */ @@ -165,7 +165,7 @@ static int live_nop_switch(void *arg) err = igt_live_test_end(&t); if (err) - goto out_file; + goto out_ctx; pr_info("Switch latencies on %s: 1 = %lluns, %lu = %lluns\n", engine->name, @@ -173,6 +173,8 @@ static int live_nop_switch(void *arg) prime - 1, div64_u64(ktime_to_ns(times[1]), prime - 1)); } +out_ctx: + kfree(ctx); out_file: fput(file); return err; From a644fde77ff73ee54970c0fc5a64cf7624c8b5b1 Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Fri, 19 May 2023 08:49:40 -0700 Subject: [PATCH 101/108] drm/i915/pmu: Change bitmask of enabled events to u32 Having it as u64 was a confusing (but harmless) mistake. Also add some asserts to make sure the internal field does not overflow in the future. v2: Fix WARN_ON firing for INTERRUPT event (Umesh) Signed-off-by: Tvrtko Ursulin Signed-off-by: Umesh Nerlige Ramappa Reviewed-by: Ashutosh Dixit Link: https://patchwork.freedesktop.org/patch/msgid/20230519154946.3751971-2-umesh.nerlige.ramappa@intel.com --- drivers/gpu/drm/i915/i915_pmu.c | 26 ++++++++++++++++++-------- 1 file changed, 18 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c index 7ece883a7d95..96543dce2db1 100644 --- a/drivers/gpu/drm/i915/i915_pmu.c +++ b/drivers/gpu/drm/i915/i915_pmu.c @@ -50,7 +50,7 @@ static u8 engine_event_instance(struct perf_event *event) return (event->attr.config >> I915_PMU_SAMPLE_BITS) & 0xff; } -static bool is_engine_config(u64 config) +static bool is_engine_config(const u64 config) { return config < __I915_PMU_OTHER(0); } @@ -88,9 +88,20 @@ static unsigned int config_bit(const u64 config) return other_bit(config); } -static u64 config_mask(u64 config) +static u32 config_mask(const u64 config) { - return BIT_ULL(config_bit(config)); + unsigned int bit = config_bit(config); + + if (__builtin_constant_p(config)) + BUILD_BUG_ON(bit > + BITS_PER_TYPE(typeof_member(struct i915_pmu, + enable)) - 1); + else + WARN_ON_ONCE(bit > + BITS_PER_TYPE(typeof_member(struct i915_pmu, + enable)) - 1); + + return BIT(config_bit(config)); } static bool is_engine_event(struct perf_event *event) @@ -633,11 +644,10 @@ static void i915_pmu_enable(struct perf_event *event) { struct drm_i915_private *i915 = container_of(event->pmu, typeof(*i915), pmu.base); + const unsigned int bit = event_bit(event); struct i915_pmu *pmu = &i915->pmu; unsigned long flags; - unsigned int bit; - bit = event_bit(event); if (bit == -1) goto update; @@ -651,7 +661,7 @@ static void i915_pmu_enable(struct perf_event *event) GEM_BUG_ON(bit >= ARRAY_SIZE(pmu->enable_count)); GEM_BUG_ON(pmu->enable_count[bit] == ~0); - pmu->enable |= BIT_ULL(bit); + pmu->enable |= BIT(bit); pmu->enable_count[bit]++; /* @@ -698,7 +708,7 @@ static void i915_pmu_disable(struct perf_event *event) { struct drm_i915_private *i915 = container_of(event->pmu, typeof(*i915), pmu.base); - unsigned int bit = event_bit(event); + const unsigned int bit = event_bit(event); struct i915_pmu *pmu = &i915->pmu; unsigned long flags; @@ -734,7 +744,7 @@ static void i915_pmu_disable(struct perf_event *event) * bitmask when the last listener on an event goes away. */ if (--pmu->enable_count[bit] == 0) { - pmu->enable &= ~BIT_ULL(bit); + pmu->enable &= ~BIT(bit); pmu->timer_enabled &= pmu_needs_timer(pmu, true); } From e367d3c45158ba34bb684227d87c52d8d840fd67 Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Fri, 19 May 2023 08:49:41 -0700 Subject: [PATCH 102/108] drm/i915/pmu: Support PMU for all engines Given how the metrics are already exported, we also need to run sampling over engines from all GTs. Problem of GT frequencies is left for later. Signed-off-by: Tvrtko Ursulin Reviewed-by: Umesh Nerlige Ramappa Signed-off-by: Umesh Nerlige Ramappa Link: https://patchwork.freedesktop.org/patch/msgid/20230519154946.3751971-3-umesh.nerlige.ramappa@intel.com --- drivers/gpu/drm/i915/i915_pmu.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c index 96543dce2db1..9edf87ee5d10 100644 --- a/drivers/gpu/drm/i915/i915_pmu.c +++ b/drivers/gpu/drm/i915/i915_pmu.c @@ -10,6 +10,7 @@ #include "gt/intel_engine_pm.h" #include "gt/intel_engine_regs.h" #include "gt/intel_engine_user.h" +#include "gt/intel_gt.h" #include "gt/intel_gt_pm.h" #include "gt/intel_gt_regs.h" #include "gt/intel_rc6.h" @@ -425,8 +426,9 @@ static enum hrtimer_restart i915_sample(struct hrtimer *hrtimer) struct drm_i915_private *i915 = container_of(hrtimer, struct drm_i915_private, pmu.timer); struct i915_pmu *pmu = &i915->pmu; - struct intel_gt *gt = to_gt(i915); unsigned int period_ns; + struct intel_gt *gt; + unsigned int i; ktime_t now; if (!READ_ONCE(pmu->timer_enabled)) @@ -442,8 +444,13 @@ static enum hrtimer_restart i915_sample(struct hrtimer *hrtimer) * grabbing the forcewake. However the potential error from timer call- * back delay greatly dominates this so we keep it simple. */ - engines_sample(gt, period_ns); - frequency_sample(gt, period_ns); + + for_each_gt(gt, i915, i) { + engines_sample(gt, period_ns); + + if (i == 0) /* FIXME */ + frequency_sample(gt, period_ns); + } hrtimer_forward(hrtimer, now, ns_to_ktime(PERIOD)); From 08322dabb5cbce75e210d8df4774fc078ed7161c Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Fri, 19 May 2023 08:49:42 -0700 Subject: [PATCH 103/108] drm/i915/pmu: Skip sampling engines with no enabled counters As we have more and more engines do not waste time sampling the ones no- one is monitoring. Signed-off-by: Tvrtko Ursulin Reviewed-by: Umesh Nerlige Ramappa Signed-off-by: Umesh Nerlige Ramappa Link: https://patchwork.freedesktop.org/patch/msgid/20230519154946.3751971-4-umesh.nerlige.ramappa@intel.com --- drivers/gpu/drm/i915/i915_pmu.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c index 9edf87ee5d10..6d594f67f365 100644 --- a/drivers/gpu/drm/i915/i915_pmu.c +++ b/drivers/gpu/drm/i915/i915_pmu.c @@ -350,6 +350,9 @@ engines_sample(struct intel_gt *gt, unsigned int period_ns) return; for_each_engine(engine, gt, id) { + if (!engine->pmu.enable) + continue; + if (!intel_engine_pm_get_if_awake(engine)) continue; From da5d51672874936c08810e63d6dfc670263d5e15 Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Fri, 19 May 2023 08:49:43 -0700 Subject: [PATCH 104/108] drm/i915/pmu: Transform PMU parking code to be GT based Trivial prep work for full multi-tile enablement later. Signed-off-by: Tvrtko Ursulin Signed-off-by: Vinay Belgaumkar Reviewed-by: Umesh Nerlige Ramappa Signed-off-by: Umesh Nerlige Ramappa Link: https://patchwork.freedesktop.org/patch/msgid/20230519154946.3751971-5-umesh.nerlige.ramappa@intel.com --- drivers/gpu/drm/i915/gt/intel_gt_pm.c | 4 ++-- drivers/gpu/drm/i915/i915_pmu.c | 16 ++++++++-------- drivers/gpu/drm/i915/i915_pmu.h | 9 +++++---- 3 files changed, 15 insertions(+), 14 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm.c b/drivers/gpu/drm/i915/gt/intel_gt_pm.c index e02cb90723ae..c2e69bafd02b 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_pm.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_pm.c @@ -87,7 +87,7 @@ static int __gt_unpark(struct intel_wakeref *wf) intel_rc6_unpark(>->rc6); intel_rps_unpark(>->rps); - i915_pmu_gt_unparked(i915); + i915_pmu_gt_unparked(gt); intel_guc_busyness_unpark(gt); intel_gt_unpark_requests(gt); @@ -109,7 +109,7 @@ static int __gt_park(struct intel_wakeref *wf) intel_guc_busyness_park(gt); i915_vma_parked(gt); - i915_pmu_gt_parked(i915); + i915_pmu_gt_parked(gt); intel_rps_park(>->rps); intel_rc6_park(>->rc6); diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c index 6d594f67f365..890693fdaf9e 100644 --- a/drivers/gpu/drm/i915/i915_pmu.c +++ b/drivers/gpu/drm/i915/i915_pmu.c @@ -228,11 +228,11 @@ static void init_rc6(struct i915_pmu *pmu) } } -static void park_rc6(struct drm_i915_private *i915) +static void park_rc6(struct intel_gt *gt) { - struct i915_pmu *pmu = &i915->pmu; + struct i915_pmu *pmu = >->i915->pmu; - pmu->sample[__I915_SAMPLE_RC6].cur = __get_rc6(to_gt(i915)); + pmu->sample[__I915_SAMPLE_RC6].cur = __get_rc6(gt); pmu->sleep_last = ktime_get_raw(); } @@ -247,16 +247,16 @@ static void __i915_pmu_maybe_start_timer(struct i915_pmu *pmu) } } -void i915_pmu_gt_parked(struct drm_i915_private *i915) +void i915_pmu_gt_parked(struct intel_gt *gt) { - struct i915_pmu *pmu = &i915->pmu; + struct i915_pmu *pmu = >->i915->pmu; if (!pmu->base.event_init) return; spin_lock_irq(&pmu->lock); - park_rc6(i915); + park_rc6(gt); /* * Signal sampling timer to stop if only engine events are enabled and @@ -267,9 +267,9 @@ void i915_pmu_gt_parked(struct drm_i915_private *i915) spin_unlock_irq(&pmu->lock); } -void i915_pmu_gt_unparked(struct drm_i915_private *i915) +void i915_pmu_gt_unparked(struct intel_gt *gt) { - struct i915_pmu *pmu = &i915->pmu; + struct i915_pmu *pmu = >->i915->pmu; if (!pmu->base.event_init) return; diff --git a/drivers/gpu/drm/i915/i915_pmu.h b/drivers/gpu/drm/i915/i915_pmu.h index c30f43319a78..a686fd7ccedf 100644 --- a/drivers/gpu/drm/i915/i915_pmu.h +++ b/drivers/gpu/drm/i915/i915_pmu.h @@ -13,6 +13,7 @@ #include struct drm_i915_private; +struct intel_gt; /* * Non-engine events that we need to track enabled-disabled transition and @@ -151,15 +152,15 @@ int i915_pmu_init(void); void i915_pmu_exit(void); void i915_pmu_register(struct drm_i915_private *i915); void i915_pmu_unregister(struct drm_i915_private *i915); -void i915_pmu_gt_parked(struct drm_i915_private *i915); -void i915_pmu_gt_unparked(struct drm_i915_private *i915); +void i915_pmu_gt_parked(struct intel_gt *gt); +void i915_pmu_gt_unparked(struct intel_gt *gt); #else static inline int i915_pmu_init(void) { return 0; } static inline void i915_pmu_exit(void) {} static inline void i915_pmu_register(struct drm_i915_private *i915) {} static inline void i915_pmu_unregister(struct drm_i915_private *i915) {} -static inline void i915_pmu_gt_parked(struct drm_i915_private *i915) {} -static inline void i915_pmu_gt_unparked(struct drm_i915_private *i915) {} +static inline void i915_pmu_gt_parked(struct intel_gt *gt) {} +static inline void i915_pmu_gt_unparked(struct intel_gt *gt) {} #endif #endif From b319cc594e666061b22331da654606b92730343a Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Fri, 19 May 2023 08:49:44 -0700 Subject: [PATCH 105/108] drm/i915/pmu: Add reference counting to the sampling timer We do not want to have timers per tile and waste CPU cycles and energy via multiple wake-up sources, for a relatively un-important task of PMU sampling, so keeping a single timer works well. But we also do not want the first GT which goes idle to turn off the timer. Add some reference counting, via a mask of unparked GTs, to solve this. v2: Drop the check for unparked in i915_sample (Ashutosh) v3: Revert v2 (Tvrtko) Signed-off-by: Tvrtko Ursulin Reviewed-by: Umesh Nerlige Ramappa Signed-off-by: Umesh Nerlige Ramappa Reviewed-by: Ashutosh Dixit Link: https://patchwork.freedesktop.org/patch/msgid/20230519154946.3751971-6-umesh.nerlige.ramappa@intel.com --- drivers/gpu/drm/i915/i915_pmu.c | 12 ++++++++++-- drivers/gpu/drm/i915/i915_pmu.h | 4 ++++ 2 files changed, 14 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c index 890693fdaf9e..ecb57a94143e 100644 --- a/drivers/gpu/drm/i915/i915_pmu.c +++ b/drivers/gpu/drm/i915/i915_pmu.c @@ -262,7 +262,9 @@ void i915_pmu_gt_parked(struct intel_gt *gt) * Signal sampling timer to stop if only engine events are enabled and * GPU went idle. */ - pmu->timer_enabled = pmu_needs_timer(pmu, false); + pmu->unparked &= ~BIT(gt->info.id); + if (pmu->unparked == 0) + pmu->timer_enabled = pmu_needs_timer(pmu, false); spin_unlock_irq(&pmu->lock); } @@ -279,7 +281,10 @@ void i915_pmu_gt_unparked(struct intel_gt *gt) /* * Re-enable sampling timer when GPU goes active. */ - __i915_pmu_maybe_start_timer(pmu); + if (pmu->unparked == 0) + __i915_pmu_maybe_start_timer(pmu); + + pmu->unparked |= BIT(gt->info.id); spin_unlock_irq(&pmu->lock); } @@ -449,6 +454,9 @@ static enum hrtimer_restart i915_sample(struct hrtimer *hrtimer) */ for_each_gt(gt, i915, i) { + if (!(pmu->unparked & BIT(i))) + continue; + engines_sample(gt, period_ns); if (i == 0) /* FIXME */ diff --git a/drivers/gpu/drm/i915/i915_pmu.h b/drivers/gpu/drm/i915/i915_pmu.h index a686fd7ccedf..3a811266ac6a 100644 --- a/drivers/gpu/drm/i915/i915_pmu.h +++ b/drivers/gpu/drm/i915/i915_pmu.h @@ -76,6 +76,10 @@ struct i915_pmu { * @lock: Lock protecting enable mask and ref count handling. */ spinlock_t lock; + /** + * @unparked: GT unparked mask. + */ + unsigned int unparked; /** * @timer: Timer for internal i915 PMU sampling. */ From bc4be0a38b63b6d4d00a58b10e164f56049be2c2 Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Fri, 19 May 2023 08:49:45 -0700 Subject: [PATCH 106/108] drm/i915/pmu: Prepare for multi-tile non-engine counters Reserve some bits in the counter config namespace which will carry the tile id and prepare the code to handle this. No per tile counters have been added yet. v2: - Fix checkpatch issues - Use 4 bits for gt id in non-engine counters. Drop FIXME. - Set MAX GTs to 4. Drop FIXME. v3: (Ashutosh, Tvrtko) - Drop BUG_ON that would never fire - Make enable u64 - Pull in some code from next patch v4: Set I915_PMU_MAX_GTS to 2 (Tvrtko) v5: s/u64/u32 where needed (Ashutosh) Signed-off-by: Tvrtko Ursulin Signed-off-by: Umesh Nerlige Ramappa Reviewed-by: Ashutosh Dixit Link: https://patchwork.freedesktop.org/patch/msgid/20230519154946.3751971-7-umesh.nerlige.ramappa@intel.com --- drivers/gpu/drm/i915/i915_pmu.c | 146 +++++++++++++++++++++++--------- drivers/gpu/drm/i915/i915_pmu.h | 9 +- include/uapi/drm/i915_drm.h | 17 +++- 3 files changed, 127 insertions(+), 45 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c index ecb57a94143e..5cfc322e69b4 100644 --- a/drivers/gpu/drm/i915/i915_pmu.c +++ b/drivers/gpu/drm/i915/i915_pmu.c @@ -56,11 +56,21 @@ static bool is_engine_config(const u64 config) return config < __I915_PMU_OTHER(0); } +static unsigned int config_gt_id(const u64 config) +{ + return config >> __I915_PMU_GT_SHIFT; +} + +static u64 config_counter(const u64 config) +{ + return config & ~(~0ULL << __I915_PMU_GT_SHIFT); +} + static unsigned int other_bit(const u64 config) { unsigned int val; - switch (config) { + switch (config_counter(config)) { case I915_PMU_ACTUAL_FREQUENCY: val = __I915_PMU_ACTUAL_FREQUENCY_ENABLED; break; @@ -78,7 +88,9 @@ static unsigned int other_bit(const u64 config) return -1; } - return I915_ENGINE_SAMPLE_COUNT + val; + return I915_ENGINE_SAMPLE_COUNT + + config_gt_id(config) * __I915_PMU_TRACKED_EVENT_COUNT + + val; } static unsigned int config_bit(const u64 config) @@ -115,6 +127,18 @@ static unsigned int event_bit(struct perf_event *event) return config_bit(event->attr.config); } +static u32 frequency_enabled_mask(void) +{ + unsigned int i; + u32 mask = 0; + + for (i = 0; i < I915_PMU_MAX_GTS; i++) + mask |= config_mask(__I915_PMU_ACTUAL_FREQUENCY(i)) | + config_mask(__I915_PMU_REQUESTED_FREQUENCY(i)); + + return mask; +} + static bool pmu_needs_timer(struct i915_pmu *pmu, bool gpu_active) { struct drm_i915_private *i915 = container_of(pmu, typeof(*i915), pmu); @@ -131,9 +155,7 @@ static bool pmu_needs_timer(struct i915_pmu *pmu, bool gpu_active) * Mask out all the ones which do not need the timer, or in * other words keep all the ones that could need the timer. */ - enable &= config_mask(I915_PMU_ACTUAL_FREQUENCY) | - config_mask(I915_PMU_REQUESTED_FREQUENCY) | - ENGINE_SAMPLE_MASK; + enable &= frequency_enabled_mask() | ENGINE_SAMPLE_MASK; /* * When the GPU is idle per-engine counters do not need to be @@ -175,9 +197,37 @@ static inline s64 ktime_since_raw(const ktime_t kt) return ktime_to_ns(ktime_sub(ktime_get_raw(), kt)); } +static unsigned int +__sample_idx(struct i915_pmu *pmu, unsigned int gt_id, int sample) +{ + unsigned int idx = gt_id * __I915_NUM_PMU_SAMPLERS + sample; + + GEM_BUG_ON(idx >= ARRAY_SIZE(pmu->sample)); + + return idx; +} + +static u64 read_sample(struct i915_pmu *pmu, unsigned int gt_id, int sample) +{ + return pmu->sample[__sample_idx(pmu, gt_id, sample)].cur; +} + +static void +store_sample(struct i915_pmu *pmu, unsigned int gt_id, int sample, u64 val) +{ + pmu->sample[__sample_idx(pmu, gt_id, sample)].cur = val; +} + +static void +add_sample_mult(struct i915_pmu *pmu, unsigned int gt_id, int sample, u32 val, u32 mul) +{ + pmu->sample[__sample_idx(pmu, gt_id, sample)].cur += mul_u32_u32(val, mul); +} + static u64 get_rc6(struct intel_gt *gt) { struct drm_i915_private *i915 = gt->i915; + const unsigned int gt_id = gt->info.id; struct i915_pmu *pmu = &i915->pmu; unsigned long flags; bool awake = false; @@ -192,7 +242,7 @@ static u64 get_rc6(struct intel_gt *gt) spin_lock_irqsave(&pmu->lock, flags); if (awake) { - pmu->sample[__I915_SAMPLE_RC6].cur = val; + store_sample(pmu, gt_id, __I915_SAMPLE_RC6, val); } else { /* * We think we are runtime suspended. @@ -201,14 +251,14 @@ static u64 get_rc6(struct intel_gt *gt) * on top of the last known real value, as the approximated RC6 * counter value. */ - val = ktime_since_raw(pmu->sleep_last); - val += pmu->sample[__I915_SAMPLE_RC6].cur; + val = ktime_since_raw(pmu->sleep_last[gt_id]); + val += read_sample(pmu, gt_id, __I915_SAMPLE_RC6); } - if (val < pmu->sample[__I915_SAMPLE_RC6_LAST_REPORTED].cur) - val = pmu->sample[__I915_SAMPLE_RC6_LAST_REPORTED].cur; + if (val < read_sample(pmu, gt_id, __I915_SAMPLE_RC6_LAST_REPORTED)) + val = read_sample(pmu, gt_id, __I915_SAMPLE_RC6_LAST_REPORTED); else - pmu->sample[__I915_SAMPLE_RC6_LAST_REPORTED].cur = val; + store_sample(pmu, gt_id, __I915_SAMPLE_RC6_LAST_REPORTED, val); spin_unlock_irqrestore(&pmu->lock, flags); @@ -218,13 +268,20 @@ static u64 get_rc6(struct intel_gt *gt) static void init_rc6(struct i915_pmu *pmu) { struct drm_i915_private *i915 = container_of(pmu, typeof(*i915), pmu); - intel_wakeref_t wakeref; + struct intel_gt *gt; + unsigned int i; - with_intel_runtime_pm(to_gt(i915)->uncore->rpm, wakeref) { - pmu->sample[__I915_SAMPLE_RC6].cur = __get_rc6(to_gt(i915)); - pmu->sample[__I915_SAMPLE_RC6_LAST_REPORTED].cur = - pmu->sample[__I915_SAMPLE_RC6].cur; - pmu->sleep_last = ktime_get_raw(); + for_each_gt(gt, i915, i) { + intel_wakeref_t wakeref; + + with_intel_runtime_pm(gt->uncore->rpm, wakeref) { + u64 val = __get_rc6(gt); + + store_sample(pmu, i, __I915_SAMPLE_RC6, val); + store_sample(pmu, i, __I915_SAMPLE_RC6_LAST_REPORTED, + val); + pmu->sleep_last[i] = ktime_get_raw(); + } } } @@ -232,8 +289,8 @@ static void park_rc6(struct intel_gt *gt) { struct i915_pmu *pmu = >->i915->pmu; - pmu->sample[__I915_SAMPLE_RC6].cur = __get_rc6(gt); - pmu->sleep_last = ktime_get_raw(); + store_sample(pmu, gt->info.id, __I915_SAMPLE_RC6, __get_rc6(gt)); + pmu->sleep_last[gt->info.id] = ktime_get_raw(); } static void __i915_pmu_maybe_start_timer(struct i915_pmu *pmu) @@ -373,34 +430,30 @@ engines_sample(struct intel_gt *gt, unsigned int period_ns) } } -static void -add_sample_mult(struct i915_pmu_sample *sample, u32 val, u32 mul) -{ - sample->cur += mul_u32_u32(val, mul); -} - -static bool frequency_sampling_enabled(struct i915_pmu *pmu) +static bool +frequency_sampling_enabled(struct i915_pmu *pmu, unsigned int gt) { return pmu->enable & - (config_mask(I915_PMU_ACTUAL_FREQUENCY) | - config_mask(I915_PMU_REQUESTED_FREQUENCY)); + (config_mask(__I915_PMU_ACTUAL_FREQUENCY(gt)) | + config_mask(__I915_PMU_REQUESTED_FREQUENCY(gt))); } static void frequency_sample(struct intel_gt *gt, unsigned int period_ns) { struct drm_i915_private *i915 = gt->i915; + const unsigned int gt_id = gt->info.id; struct i915_pmu *pmu = &i915->pmu; struct intel_rps *rps = >->rps; - if (!frequency_sampling_enabled(pmu)) + if (!frequency_sampling_enabled(pmu, gt_id)) return; /* Report 0/0 (actual/requested) frequency while parked. */ if (!intel_gt_pm_get_if_awake(gt)) return; - if (pmu->enable & config_mask(I915_PMU_ACTUAL_FREQUENCY)) { + if (pmu->enable & config_mask(__I915_PMU_ACTUAL_FREQUENCY(gt_id))) { u32 val; /* @@ -416,12 +469,12 @@ frequency_sample(struct intel_gt *gt, unsigned int period_ns) if (!val) val = intel_gpu_freq(rps, rps->cur_freq); - add_sample_mult(&pmu->sample[__I915_SAMPLE_FREQ_ACT], + add_sample_mult(pmu, gt_id, __I915_SAMPLE_FREQ_ACT, val, period_ns / 1000); } - if (pmu->enable & config_mask(I915_PMU_REQUESTED_FREQUENCY)) { - add_sample_mult(&pmu->sample[__I915_SAMPLE_FREQ_REQ], + if (pmu->enable & config_mask(__I915_PMU_REQUESTED_FREQUENCY(gt_id))) { + add_sample_mult(pmu, gt_id, __I915_SAMPLE_FREQ_REQ, intel_rps_get_requested_frequency(rps), period_ns / 1000); } @@ -458,9 +511,7 @@ static enum hrtimer_restart i915_sample(struct hrtimer *hrtimer) continue; engines_sample(gt, period_ns); - - if (i == 0) /* FIXME */ - frequency_sample(gt, period_ns); + frequency_sample(gt, period_ns); } hrtimer_forward(hrtimer, now, ns_to_ktime(PERIOD)); @@ -502,7 +553,13 @@ config_status(struct drm_i915_private *i915, u64 config) { struct intel_gt *gt = to_gt(i915); - switch (config) { + unsigned int gt_id = config_gt_id(config); + unsigned int max_gt_id = HAS_EXTRA_GT_LIST(i915) ? 1 : 0; + + if (gt_id > max_gt_id) + return -ENOENT; + + switch (config_counter(config)) { case I915_PMU_ACTUAL_FREQUENCY: if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915)) /* Requires a mutex for sampling! */ @@ -513,6 +570,8 @@ config_status(struct drm_i915_private *i915, u64 config) return -ENODEV; break; case I915_PMU_INTERRUPTS: + if (gt_id) + return -ENOENT; break; case I915_PMU_RC6_RESIDENCY: if (!gt->rc6.supported) @@ -610,22 +669,27 @@ static u64 __i915_pmu_event_read(struct perf_event *event) val = engine->pmu.sample[sample].cur; } } else { - switch (event->attr.config) { + const unsigned int gt_id = config_gt_id(event->attr.config); + const u64 config = config_counter(event->attr.config); + + switch (config) { case I915_PMU_ACTUAL_FREQUENCY: val = - div_u64(pmu->sample[__I915_SAMPLE_FREQ_ACT].cur, + div_u64(read_sample(pmu, gt_id, + __I915_SAMPLE_FREQ_ACT), USEC_PER_SEC /* to MHz */); break; case I915_PMU_REQUESTED_FREQUENCY: val = - div_u64(pmu->sample[__I915_SAMPLE_FREQ_REQ].cur, + div_u64(read_sample(pmu, gt_id, + __I915_SAMPLE_FREQ_REQ), USEC_PER_SEC /* to MHz */); break; case I915_PMU_INTERRUPTS: val = READ_ONCE(pmu->irq_count); break; case I915_PMU_RC6_RESIDENCY: - val = get_rc6(to_gt(i915)); + val = get_rc6(i915->gt[gt_id]); break; case I915_PMU_SOFTWARE_GT_AWAKE_TIME: val = ktime_to_ns(intel_gt_get_awake_time(to_gt(i915))); diff --git a/drivers/gpu/drm/i915/i915_pmu.h b/drivers/gpu/drm/i915/i915_pmu.h index 3a811266ac6a..33d80fbaab8b 100644 --- a/drivers/gpu/drm/i915/i915_pmu.h +++ b/drivers/gpu/drm/i915/i915_pmu.h @@ -38,13 +38,16 @@ enum { __I915_NUM_PMU_SAMPLERS }; +#define I915_PMU_MAX_GTS 2 + /* * How many different events we track in the global PMU mask. * * It is also used to know to needed number of event reference counters. */ #define I915_PMU_MASK_BITS \ - (I915_ENGINE_SAMPLE_COUNT + __I915_PMU_TRACKED_EVENT_COUNT) + (I915_ENGINE_SAMPLE_COUNT + \ + I915_PMU_MAX_GTS * __I915_PMU_TRACKED_EVENT_COUNT) #define I915_ENGINE_SAMPLE_COUNT (I915_SAMPLE_SEMA + 1) @@ -124,11 +127,11 @@ struct i915_pmu { * Only global counters are held here, while the per-engine ones are in * struct intel_engine_cs. */ - struct i915_pmu_sample sample[__I915_NUM_PMU_SAMPLERS]; + struct i915_pmu_sample sample[I915_PMU_MAX_GTS * __I915_NUM_PMU_SAMPLERS]; /** * @sleep_last: Last time GT parked for RC6 estimation. */ - ktime_t sleep_last; + ktime_t sleep_last[I915_PMU_MAX_GTS]; /** * @irq_count: Number of interrupts * diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h index ba40855dbc93..f31dfacde601 100644 --- a/include/uapi/drm/i915_drm.h +++ b/include/uapi/drm/i915_drm.h @@ -280,7 +280,16 @@ enum drm_i915_pmu_engine_sample { #define I915_PMU_ENGINE_SEMA(class, instance) \ __I915_PMU_ENGINE(class, instance, I915_SAMPLE_SEMA) -#define __I915_PMU_OTHER(x) (__I915_PMU_ENGINE(0xff, 0xff, 0xf) + 1 + (x)) +/* + * Top 4 bits of every non-engine counter are GT id. + */ +#define __I915_PMU_GT_SHIFT (60) + +#define ___I915_PMU_OTHER(gt, x) \ + (((__u64)__I915_PMU_ENGINE(0xff, 0xff, 0xf) + 1 + (x)) | \ + ((__u64)(gt) << __I915_PMU_GT_SHIFT)) + +#define __I915_PMU_OTHER(x) ___I915_PMU_OTHER(0, x) #define I915_PMU_ACTUAL_FREQUENCY __I915_PMU_OTHER(0) #define I915_PMU_REQUESTED_FREQUENCY __I915_PMU_OTHER(1) @@ -290,6 +299,12 @@ enum drm_i915_pmu_engine_sample { #define I915_PMU_LAST /* Deprecated - do not use */ I915_PMU_RC6_RESIDENCY +#define __I915_PMU_ACTUAL_FREQUENCY(gt) ___I915_PMU_OTHER(gt, 0) +#define __I915_PMU_REQUESTED_FREQUENCY(gt) ___I915_PMU_OTHER(gt, 1) +#define __I915_PMU_INTERRUPTS(gt) ___I915_PMU_OTHER(gt, 2) +#define __I915_PMU_RC6_RESIDENCY(gt) ___I915_PMU_OTHER(gt, 3) +#define __I915_PMU_SOFTWARE_GT_AWAKE_TIME(gt) ___I915_PMU_OTHER(gt, 4) + /* Each region is a minimum of 16k, and there are at most 255 of them. */ #define I915_NR_TEX_REGIONS 255 /* table size 2k - maximum due to use From 906bd0fb137ffc361b3ce0d0db07f288db5582ea Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Fri, 19 May 2023 08:49:46 -0700 Subject: [PATCH 107/108] drm/i915/pmu: Export counters from all tiles Start exporting frequency and RC6 counters from all tiles. Existing counters keep their names and config values and new one use the namespace added in the previous patch, with the "-gtN" added to their names. Interrupts counter is an odd one off. Because it is the global device counters (not only GT) we choose not to add per tile versions for now. Signed-off-by: Tvrtko Ursulin Signed-off-by: Aravind Iddamsetty Reviewed-by: Umesh Nerlige Ramappa Signed-off-by: Umesh Nerlige Ramappa Link: https://patchwork.freedesktop.org/patch/msgid/20230519154946.3751971-8-umesh.nerlige.ramappa@intel.com --- drivers/gpu/drm/i915/i915_pmu.c | 82 ++++++++++++++++++++++----------- 1 file changed, 55 insertions(+), 27 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c index 5cfc322e69b4..a814583e19fd 100644 --- a/drivers/gpu/drm/i915/i915_pmu.c +++ b/drivers/gpu/drm/i915/i915_pmu.c @@ -940,11 +940,20 @@ static const struct attribute_group i915_pmu_cpumask_attr_group = { .attrs = i915_cpumask_attrs, }; -#define __event(__config, __name, __unit) \ +#define __event(__counter, __name, __unit) \ { \ - .config = (__config), \ + .counter = (__counter), \ .name = (__name), \ .unit = (__unit), \ + .global = false, \ +} + +#define __global_event(__counter, __name, __unit) \ +{ \ + .counter = (__counter), \ + .name = (__name), \ + .unit = (__unit), \ + .global = true, \ } #define __engine_event(__sample, __name) \ @@ -983,15 +992,16 @@ create_event_attributes(struct i915_pmu *pmu) { struct drm_i915_private *i915 = container_of(pmu, typeof(*i915), pmu); static const struct { - u64 config; + unsigned int counter; const char *name; const char *unit; + bool global; } events[] = { - __event(I915_PMU_ACTUAL_FREQUENCY, "actual-frequency", "M"), - __event(I915_PMU_REQUESTED_FREQUENCY, "requested-frequency", "M"), - __event(I915_PMU_INTERRUPTS, "interrupts", NULL), - __event(I915_PMU_RC6_RESIDENCY, "rc6-residency", "ns"), - __event(I915_PMU_SOFTWARE_GT_AWAKE_TIME, "software-gt-awake-time", "ns"), + __event(0, "actual-frequency", "M"), + __event(1, "requested-frequency", "M"), + __global_event(2, "interrupts", NULL), + __event(3, "rc6-residency", "ns"), + __event(4, "software-gt-awake-time", "ns"), }; static const struct { enum drm_i915_pmu_engine_sample sample; @@ -1006,12 +1016,17 @@ create_event_attributes(struct i915_pmu *pmu) struct i915_ext_attribute *i915_attr = NULL, *i915_iter; struct attribute **attr = NULL, **attr_iter; struct intel_engine_cs *engine; - unsigned int i; + struct intel_gt *gt; + unsigned int i, j; /* Count how many counters we will be exposing. */ - for (i = 0; i < ARRAY_SIZE(events); i++) { - if (!config_status(i915, events[i].config)) - count++; + for_each_gt(gt, i915, j) { + for (i = 0; i < ARRAY_SIZE(events); i++) { + u64 config = ___I915_PMU_OTHER(j, events[i].counter); + + if (!config_status(i915, config)) + count++; + } } for_each_uabi_engine(engine, i915) { @@ -1041,26 +1056,39 @@ create_event_attributes(struct i915_pmu *pmu) attr_iter = attr; /* Initialize supported non-engine counters. */ - for (i = 0; i < ARRAY_SIZE(events); i++) { - char *str; + for_each_gt(gt, i915, j) { + for (i = 0; i < ARRAY_SIZE(events); i++) { + u64 config = ___I915_PMU_OTHER(j, events[i].counter); + char *str; - if (config_status(i915, events[i].config)) - continue; + if (config_status(i915, config)) + continue; - str = kstrdup(events[i].name, GFP_KERNEL); - if (!str) - goto err; - - *attr_iter++ = &i915_iter->attr.attr; - i915_iter = add_i915_attr(i915_iter, str, events[i].config); - - if (events[i].unit) { - str = kasprintf(GFP_KERNEL, "%s.unit", events[i].name); + if (events[i].global || !HAS_EXTRA_GT_LIST(i915)) + str = kstrdup(events[i].name, GFP_KERNEL); + else + str = kasprintf(GFP_KERNEL, "%s-gt%u", + events[i].name, j); if (!str) goto err; - *attr_iter++ = &pmu_iter->attr.attr; - pmu_iter = add_pmu_attr(pmu_iter, str, events[i].unit); + *attr_iter++ = &i915_iter->attr.attr; + i915_iter = add_i915_attr(i915_iter, str, config); + + if (events[i].unit) { + if (events[i].global || !HAS_EXTRA_GT_LIST(i915)) + str = kasprintf(GFP_KERNEL, "%s.unit", + events[i].name); + else + str = kasprintf(GFP_KERNEL, "%s-gt%u.unit", + events[i].name, j); + if (!str) + goto err; + + *attr_iter++ = &pmu_iter->attr.attr; + pmu_iter = add_pmu_attr(pmu_iter, str, + events[i].unit); + } } } From 0fbcf57077c47b444e91b9ce8a243e6f7f53693d Mon Sep 17 00:00:00 2001 From: Fei Yang Date: Thu, 18 May 2023 22:11:02 -0700 Subject: [PATCH 108/108] drm/i915/mtl: end support for set caching ioctl The design is to keep Buffer Object's caching policy immutable through out its life cycle. This patch ends the support for set caching ioctl from MTL onward. While doing that we also set BO's to be 1-way coherent at creation time because GPU is no longer automatically snooping CPU cache. For userspace components needing to fine tune the caching policy for BO's, a follow up patch will extend the GEM_CREATE uAPI to allow them specify caching mode at BO creation time. Signed-off-by: Fei Yang Reviewed-by: Andi Shyti Reviewed-by: Andrzej Hajda Signed-off-by: Andi Shyti Link: https://patchwork.freedesktop.org/patch/msgid/20230519051103.3404990-2-fei.yang@intel.com --- drivers/gpu/drm/i915/gem/i915_gem_domain.c | 3 +++ drivers/gpu/drm/i915/gem/i915_gem_shmem.c | 9 ++++++++- 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_domain.c b/drivers/gpu/drm/i915/gem/i915_gem_domain.c index 05107a6efe45..dfaaa8b66ac3 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_domain.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_domain.c @@ -350,6 +350,9 @@ int i915_gem_set_caching_ioctl(struct drm_device *dev, void *data, if (IS_DGFX(i915)) return -ENODEV; + if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 70)) + return -EOPNOTSUPP; + switch (args->caching) { case I915_CACHING_NONE: level = I915_CACHE_NONE; diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c index 37d1efcd3ca6..cad4a6017f4b 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c @@ -601,7 +601,14 @@ static int shmem_object_init(struct intel_memory_region *mem, obj->write_domain = I915_GEM_DOMAIN_CPU; obj->read_domains = I915_GEM_DOMAIN_CPU; - if (HAS_LLC(i915)) + /* + * MTL doesn't snoop CPU cache by default for GPU access (namely + * 1-way coherency). However some UMD's are currently depending on + * that. Make 1-way coherent the default setting for MTL. A follow + * up patch will extend the GEM_CREATE uAPI to allow UMD's specify + * caching mode at BO creation time + */ + if (HAS_LLC(i915) || (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 70))) /* On some devices, we can have the GPU use the LLC (the CPU * cache) for about a 10% performance improvement * compared to uncached. Graphics requests other than