From 2d22b63f3a5aae2088708941d08cf0f01f430a58 Mon Sep 17 00:00:00 2001 From: Maxime Ripard Date: Thu, 26 Jun 2025 12:04:59 +0200 Subject: [PATCH 01/41] drm/mipi-dsi: Add dev_is_mipi_dsi function This will be especially useful for generic panels (like panel-simple) which can take different code path depending on if they are MIPI-DSI devices or platform devices. Reviewed-by: Javier Martinez Canillas Tested-by: Francesco Dolcini # Toradex Colibri iMX6 Link: https://lore.kernel.org/r/20250626-drm-panel-simple-fixes-v2-1-5afcaa608bdc@kernel.org Signed-off-by: Maxime Ripard --- drivers/gpu/drm/drm_mipi_dsi.c | 3 ++- include/drm/drm_mipi_dsi.h | 3 +++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/drm_mipi_dsi.c b/drivers/gpu/drm/drm_mipi_dsi.c index e5184a0c2465..21fd647f8ce1 100644 --- a/drivers/gpu/drm/drm_mipi_dsi.c +++ b/drivers/gpu/drm/drm_mipi_dsi.c @@ -91,12 +91,13 @@ static const struct dev_pm_ops mipi_dsi_device_pm_ops = { .restore = pm_generic_restore, }; -static const struct bus_type mipi_dsi_bus_type = { +const struct bus_type mipi_dsi_bus_type = { .name = "mipi-dsi", .match = mipi_dsi_device_match, .uevent = mipi_dsi_uevent, .pm = &mipi_dsi_device_pm_ops, }; +EXPORT_SYMBOL_GPL(mipi_dsi_bus_type); /** * of_find_mipi_dsi_device_by_node() - find the MIPI DSI device matching a diff --git a/include/drm/drm_mipi_dsi.h b/include/drm/drm_mipi_dsi.h index b37860f4a895..6d2c08e81101 100644 --- a/include/drm/drm_mipi_dsi.h +++ b/include/drm/drm_mipi_dsi.h @@ -223,6 +223,9 @@ struct mipi_dsi_multi_context { #define to_mipi_dsi_device(__dev) container_of_const(__dev, struct mipi_dsi_device, dev) +extern const struct bus_type mipi_dsi_bus_type; +#define dev_is_mipi_dsi(dev) ((dev)->bus == &mipi_dsi_bus_type) + /** * mipi_dsi_pixel_format_to_bpp - obtain the number of bits per pixel for any * given pixel format defined by the MIPI DSI From 073667fce1667eab31d6a62cdd7fb795933ec7e8 Mon Sep 17 00:00:00 2001 From: Maxime Ripard Date: Thu, 26 Jun 2025 12:05:00 +0200 Subject: [PATCH 02/41] drm/panel: panel-simple: make panel_dpi_probe return a panel_desc If the panel-simple driver is probed from a panel-dpi compatible, the driver will use an empty panel_desc structure as a descriminant. It will then allocate and fill another panel_desc as part of its probe. However, that allocation needs to happen after the panel_simple structure has been allocated, since panel_dpi_probe(), the function doing the panel_desc allocation and initialization, takes a panel_simple pointer as an argument. This pointer is used to fill the panel_simple->desc pointer that is still initialized with the empty panel_desc when panel_dpi_probe() is called. Since commit de04bb0089a9 ("drm/panel/panel-simple: Use the new allocation in place of devm_kzalloc()"), we will need the panel connector type found in panel_desc to allocate panel_simple. This creates a circular dependency where we need panel_desc to create panel_simple, and need panel_simple to create panel_desc. Let's break that dependency by making panel_dpi_probe simply return the panel_desc it initialized and move the panel_simple->desc assignment to the caller. This will not fix the breaking commit entirely, but will move us towards the right direction. Fixes: de04bb0089a9 ("drm/panel/panel-simple: Use the new allocation in place of devm_kzalloc()") Reviewed-by: Javier Martinez Canillas Tested-by: Francesco Dolcini # Toradex Colibri iMX6 Link: https://lore.kernel.org/r/20250626-drm-panel-simple-fixes-v2-2-5afcaa608bdc@kernel.org Signed-off-by: Maxime Ripard --- drivers/gpu/drm/panel/panel-simple.c | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/drivers/gpu/drm/panel/panel-simple.c b/drivers/gpu/drm/panel/panel-simple.c index 0a3b26bb4d73..89188e683822 100644 --- a/drivers/gpu/drm/panel/panel-simple.c +++ b/drivers/gpu/drm/panel/panel-simple.c @@ -432,8 +432,7 @@ static const struct drm_panel_funcs panel_simple_funcs = { static struct panel_desc panel_dpi; -static int panel_dpi_probe(struct device *dev, - struct panel_simple *panel) +static struct panel_desc *panel_dpi_probe(struct device *dev) { struct display_timing *timing; const struct device_node *np; @@ -445,17 +444,17 @@ static int panel_dpi_probe(struct device *dev, np = dev->of_node; desc = devm_kzalloc(dev, sizeof(*desc), GFP_KERNEL); if (!desc) - return -ENOMEM; + return ERR_PTR(-ENOMEM); timing = devm_kzalloc(dev, sizeof(*timing), GFP_KERNEL); if (!timing) - return -ENOMEM; + return ERR_PTR(-ENOMEM); ret = of_get_display_timing(np, "panel-timing", timing); if (ret < 0) { dev_err(dev, "%pOF: no panel-timing node found for \"panel-dpi\" binding\n", np); - return ret; + return ERR_PTR(ret); } desc->timings = timing; @@ -473,9 +472,7 @@ static int panel_dpi_probe(struct device *dev, /* We do not know the connector for the DT node, so guess it */ desc->connector_type = DRM_MODE_CONNECTOR_DPI; - panel->desc = desc; - - return 0; + return desc; } #define PANEL_SIMPLE_BOUNDS_CHECK(to_check, bounds, field) \ @@ -613,10 +610,13 @@ static int panel_simple_probe(struct device *dev, const struct panel_desc *desc) if (desc == &panel_dpi) { /* Handle the generic panel-dpi binding */ - err = panel_dpi_probe(dev, panel); - if (err) + desc = panel_dpi_probe(dev); + if (IS_ERR(desc)) { + err = PTR_ERR(desc); goto free_ddc; - desc = panel->desc; + } + + panel->desc = desc; } else { if (!of_get_display_timing(dev->of_node, "panel-timing", &dt)) panel_simple_parse_panel_timing_node(dev, panel, &dt); From 921c41e509746aabecbbb2595ebf41a9d8fdc4e2 Mon Sep 17 00:00:00 2001 From: Maxime Ripard Date: Thu, 26 Jun 2025 12:05:01 +0200 Subject: [PATCH 03/41] drm/panel: panel-simple: Make panel_simple_probe return its panel In order to fix the regession introduced by commit de04bb0089a9 ("drm/panel/panel-simple: Use the new allocation in place of devm_kzalloc()"), we need to move the panel_desc lookup into the common panel_simple_probe() function. There's two callers for that function, the probe implementations of the platform and MIPI-DSI drivers panel-simple implements. The MIPI-DSI driver's probe will need to access the current panel_desc to initialize properly, which won't be possible anymore if we make that lookup in panel_simple_probe(). However, we can make panel_simple_probe() return the initialized panel_simple structure it allocated, which will contain a pointer to the associated panel_desc in its desc field. This doesn't fix de04bb0089a9 ("drm/panel/panel-simple: Use the new allocation in place of devm_kzalloc()") still, but makes progress towards that goal. Fixes: de04bb0089a9 ("drm/panel/panel-simple: Use the new allocation in place of devm_kzalloc()") Reviewed-by: Javier Martinez Canillas Tested-by: Francesco Dolcini # Toradex Colibri iMX6 Link: https://lore.kernel.org/r/20250626-drm-panel-simple-fixes-v2-3-5afcaa608bdc@kernel.org Signed-off-by: Maxime Ripard --- drivers/gpu/drm/panel/panel-simple.c | 33 +++++++++++++++++----------- 1 file changed, 20 insertions(+), 13 deletions(-) diff --git a/drivers/gpu/drm/panel/panel-simple.c b/drivers/gpu/drm/panel/panel-simple.c index 89188e683822..e70ee2d4a538 100644 --- a/drivers/gpu/drm/panel/panel-simple.c +++ b/drivers/gpu/drm/panel/panel-simple.c @@ -567,7 +567,7 @@ static int panel_simple_override_nondefault_lvds_datamapping(struct device *dev, return 0; } -static int panel_simple_probe(struct device *dev, const struct panel_desc *desc) +static struct panel_simple *panel_simple_probe(struct device *dev, const struct panel_desc *desc) { struct panel_simple *panel; struct display_timing dt; @@ -579,24 +579,24 @@ static int panel_simple_probe(struct device *dev, const struct panel_desc *desc) panel = devm_drm_panel_alloc(dev, struct panel_simple, base, &panel_simple_funcs, desc->connector_type); if (IS_ERR(panel)) - return PTR_ERR(panel); + return ERR_CAST(panel); panel->desc = desc; panel->supply = devm_regulator_get(dev, "power"); if (IS_ERR(panel->supply)) - return PTR_ERR(panel->supply); + return ERR_CAST(panel->supply); panel->enable_gpio = devm_gpiod_get_optional(dev, "enable", GPIOD_OUT_LOW); if (IS_ERR(panel->enable_gpio)) - return dev_err_probe(dev, PTR_ERR(panel->enable_gpio), - "failed to request GPIO\n"); + return dev_err_cast_probe(dev, panel->enable_gpio, + "failed to request GPIO\n"); err = of_drm_get_panel_orientation(dev->of_node, &panel->orientation); if (err) { dev_err(dev, "%pOF: failed to get orientation %d\n", dev->of_node, err); - return err; + return ERR_PTR(err); } ddc = of_parse_phandle(dev->of_node, "ddc-i2c-bus", 0); @@ -605,7 +605,7 @@ static int panel_simple_probe(struct device *dev, const struct panel_desc *desc) of_node_put(ddc); if (!panel->ddc) - return -EPROBE_DEFER; + return ERR_PTR(-EPROBE_DEFER); } if (desc == &panel_dpi) { @@ -703,7 +703,7 @@ static int panel_simple_probe(struct device *dev, const struct panel_desc *desc) drm_panel_add(&panel->base); - return 0; + return panel; disable_pm_runtime: pm_runtime_dont_use_autosuspend(dev); @@ -712,7 +712,7 @@ static int panel_simple_probe(struct device *dev, const struct panel_desc *desc) if (panel->ddc) put_device(&panel->ddc->dev); - return err; + return ERR_PTR(err); } static void panel_simple_shutdown(struct device *dev) @@ -5377,12 +5377,17 @@ MODULE_DEVICE_TABLE(of, platform_of_match); static int panel_simple_platform_probe(struct platform_device *pdev) { const struct panel_desc *desc; + struct panel_simple *panel; desc = of_device_get_match_data(&pdev->dev); if (!desc) return -ENODEV; - return panel_simple_probe(&pdev->dev, desc); + panel = panel_simple_probe(&pdev->dev, desc); + if (IS_ERR(panel)) + return PTR_ERR(panel); + + return 0; } static void panel_simple_platform_remove(struct platform_device *pdev) @@ -5653,16 +5658,18 @@ MODULE_DEVICE_TABLE(of, dsi_of_match); static int panel_simple_dsi_probe(struct mipi_dsi_device *dsi) { const struct panel_desc_dsi *desc; + struct panel_simple *panel; int err; desc = of_device_get_match_data(&dsi->dev); if (!desc) return -ENODEV; - err = panel_simple_probe(&dsi->dev, &desc->desc); - if (err < 0) - return err; + panel = panel_simple_probe(&dsi->dev, &desc->desc); + if (IS_ERR(panel)) + return PTR_ERR(panel); + desc = container_of(panel->desc, struct panel_desc_dsi, desc); dsi->mode_flags = desc->flags; dsi->format = desc->format; dsi->lanes = desc->lanes; From 47c08262f34e1cd9c48364431e4d32529029b910 Mon Sep 17 00:00:00 2001 From: Maxime Ripard Date: Thu, 26 Jun 2025 12:05:02 +0200 Subject: [PATCH 04/41] drm/panel: panel-simple: Add function to look panel data up Commit de04bb0089a9 ("drm/panel/panel-simple: Use the new allocation in place of devm_kzalloc()") moved the call to drm_panel_init into the devm_drm_panel_alloc(), which needs a connector type to initialize properly. In the panel-dpi compatible case, the passed panel_desc structure is an empty one used as a discriminant, and the connector type it contains isn't actually initialized. It is initialized through a call to panel_dpi_probe() later in the function, which used to be before the call to drm_panel_init() that got merged into devm_drm_panel_alloc(). So, we do need a proper panel_desc pointer before the call to devm_drm_panel_alloc() now. All cases associate their panel_desc with the panel compatible and use of_device_get_match_data, except for the panel-dpi compatible. In that case, we're expected to call panel_dpi_probe, which will allocate and initialize the panel_desc for us. Let's create such a helper function that would be called first in the driver and will lookup the desc by compatible, or allocate one if relevant. Reported-by: Francesco Dolcini Closes: https://lore.kernel.org/all/20250612081834.GA248237@francesco-nb/ Fixes: de04bb0089a9 ("drm/panel/panel-simple: Use the new allocation in place of devm_kzalloc()") Reviewed-by: Javier Martinez Canillas Tested-by: Francesco Dolcini # Toradex Colibri iMX6 Link: https://lore.kernel.org/r/20250626-drm-panel-simple-fixes-v2-4-5afcaa608bdc@kernel.org Signed-off-by: Maxime Ripard --- drivers/gpu/drm/panel/panel-simple.c | 82 +++++++++++++++++----------- 1 file changed, 49 insertions(+), 33 deletions(-) diff --git a/drivers/gpu/drm/panel/panel-simple.c b/drivers/gpu/drm/panel/panel-simple.c index e70ee2d4a538..4c831af86414 100644 --- a/drivers/gpu/drm/panel/panel-simple.c +++ b/drivers/gpu/drm/panel/panel-simple.c @@ -26,6 +26,7 @@ #include #include #include +#include #include #include #include @@ -136,6 +137,14 @@ struct panel_desc { int connector_type; }; +struct panel_desc_dsi { + struct panel_desc desc; + + unsigned long flags; + enum mipi_dsi_pixel_format format; + unsigned int lanes; +}; + struct panel_simple { struct drm_panel base; @@ -567,8 +576,38 @@ static int panel_simple_override_nondefault_lvds_datamapping(struct device *dev, return 0; } -static struct panel_simple *panel_simple_probe(struct device *dev, const struct panel_desc *desc) +static const struct panel_desc *panel_simple_get_desc(struct device *dev) { + if (IS_ENABLED(CONFIG_DRM_MIPI_DSI) && + dev_is_mipi_dsi(dev)) { + const struct panel_desc_dsi *dsi_desc; + + dsi_desc = of_device_get_match_data(dev); + if (!dsi_desc) + return ERR_PTR(-ENODEV); + + return &dsi_desc->desc; + } + + if (dev_is_platform(dev)) { + const struct panel_desc *desc; + + desc = of_device_get_match_data(dev); + if (!desc) + return ERR_PTR(-ENODEV); + + if (desc == &panel_dpi) + return panel_dpi_probe(dev); + + return desc; + } + + return ERR_PTR(-ENODEV); +} + +static struct panel_simple *panel_simple_probe(struct device *dev) +{ + const struct panel_desc *desc; struct panel_simple *panel; struct display_timing dt; struct device_node *ddc; @@ -576,6 +615,10 @@ static struct panel_simple *panel_simple_probe(struct device *dev, const struct u32 bus_flags; int err; + desc = panel_simple_get_desc(dev); + if (IS_ERR(desc)) + return ERR_CAST(desc); + panel = devm_drm_panel_alloc(dev, struct panel_simple, base, &panel_simple_funcs, desc->connector_type); if (IS_ERR(panel)) @@ -608,19 +651,9 @@ static struct panel_simple *panel_simple_probe(struct device *dev, const struct return ERR_PTR(-EPROBE_DEFER); } - if (desc == &panel_dpi) { - /* Handle the generic panel-dpi binding */ - desc = panel_dpi_probe(dev); - if (IS_ERR(desc)) { - err = PTR_ERR(desc); - goto free_ddc; - } - - panel->desc = desc; - } else { - if (!of_get_display_timing(dev->of_node, "panel-timing", &dt)) - panel_simple_parse_panel_timing_node(dev, panel, &dt); - } + if ((desc != &panel_dpi) && + !of_get_display_timing(dev->of_node, "panel-timing", &dt)) + panel_simple_parse_panel_timing_node(dev, panel, &dt); if (desc->connector_type == DRM_MODE_CONNECTOR_LVDS) { /* Optional data-mapping property for overriding bus format */ @@ -5376,14 +5409,9 @@ MODULE_DEVICE_TABLE(of, platform_of_match); static int panel_simple_platform_probe(struct platform_device *pdev) { - const struct panel_desc *desc; struct panel_simple *panel; - desc = of_device_get_match_data(&pdev->dev); - if (!desc) - return -ENODEV; - - panel = panel_simple_probe(&pdev->dev, desc); + panel = panel_simple_probe(&pdev->dev); if (IS_ERR(panel)) return PTR_ERR(panel); @@ -5417,14 +5445,6 @@ static struct platform_driver panel_simple_platform_driver = { .shutdown = panel_simple_platform_shutdown, }; -struct panel_desc_dsi { - struct panel_desc desc; - - unsigned long flags; - enum mipi_dsi_pixel_format format; - unsigned int lanes; -}; - static const struct drm_display_mode auo_b080uan01_mode = { .clock = 154500, .hdisplay = 1200, @@ -5661,11 +5681,7 @@ static int panel_simple_dsi_probe(struct mipi_dsi_device *dsi) struct panel_simple *panel; int err; - desc = of_device_get_match_data(&dsi->dev); - if (!desc) - return -ENODEV; - - panel = panel_simple_probe(&dsi->dev, &desc->desc); + panel = panel_simple_probe(&dsi->dev); if (IS_ERR(panel)) return PTR_ERR(panel); From f6faebc11a8a6d9521e5ab00481bd22ed9c7c67d Mon Sep 17 00:00:00 2001 From: Maxime Ripard Date: Thu, 26 Jun 2025 12:05:03 +0200 Subject: [PATCH 05/41] drm/panel: panel-simple: get rid of panel_dpi hack The empty panel_dpi struct was only ever used as a discriminant, but it's kind of a hack, and with the reworks done in the previous patches, we shouldn't need it anymore. Let's get rid of it. Reviewed-by: Javier Martinez Canillas Tested-by: Francesco Dolcini # Toradex Colibri iMX6 Link: https://lore.kernel.org/r/20250626-drm-panel-simple-fixes-v2-5-5afcaa608bdc@kernel.org Signed-off-by: Maxime Ripard --- drivers/gpu/drm/panel/panel-simple.c | 27 ++++++++++++++++++--------- 1 file changed, 18 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/panel/panel-simple.c b/drivers/gpu/drm/panel/panel-simple.c index 4c831af86414..9f81fa960b46 100644 --- a/drivers/gpu/drm/panel/panel-simple.c +++ b/drivers/gpu/drm/panel/panel-simple.c @@ -439,8 +439,6 @@ static const struct drm_panel_funcs panel_simple_funcs = { .get_timings = panel_simple_get_timings, }; -static struct panel_desc panel_dpi; - static struct panel_desc *panel_dpi_probe(struct device *dev) { struct display_timing *timing; @@ -593,11 +591,17 @@ static const struct panel_desc *panel_simple_get_desc(struct device *dev) const struct panel_desc *desc; desc = of_device_get_match_data(dev); - if (!desc) - return ERR_PTR(-ENODEV); - - if (desc == &panel_dpi) - return panel_dpi_probe(dev); + if (!desc) { + /* + * panel-dpi probes without a descriptor and + * panel_dpi_probe() will initialize one for us + * based on the device tree. + */ + if (of_device_is_compatible(dev->of_node, "panel-dpi")) + return panel_dpi_probe(dev); + else + return ERR_PTR(-ENODEV); + } return desc; } @@ -651,7 +655,7 @@ static struct panel_simple *panel_simple_probe(struct device *dev) return ERR_PTR(-EPROBE_DEFER); } - if ((desc != &panel_dpi) && + if (!of_device_is_compatible(dev->of_node, "panel-dpi") && !of_get_display_timing(dev->of_node, "panel-timing", &dt)) panel_simple_parse_panel_timing_node(dev, panel, &dt); @@ -5400,7 +5404,12 @@ static const struct of_device_id platform_of_match[] = { }, { /* Must be the last entry */ .compatible = "panel-dpi", - .data = &panel_dpi, + + /* + * Explicitly NULL, the panel_desc structure will be + * allocated by panel_dpi_probe(). + */ + .data = NULL, }, { /* sentinel */ } From 615cc4223fcbe1e0e6f68b8494b26bb6c08d917a Mon Sep 17 00:00:00 2001 From: Thomas Zimmermann Date: Tue, 17 Jun 2025 16:09:34 +0200 Subject: [PATCH 06/41] drm/vesadrm: Avoid NULL-ptr deref in vesadrm_pmi_cmap_write() Only set PMI fields if the screen_info's Vesa PM segment has been set. Vesa PMI is the power-management interface. It also provides means to set the color palette. The interface is optional, so not all VESA graphics cards support it. Print vesafb's warning [1] if the hardware palette cannot be set at all. If unsupported the field PrimaryPalette in struct vesadrm.pmi is NULL, which results in a segmentation fault. Happens with qemu's Cirrus emulation. Signed-off-by: Thomas Zimmermann Fixes: 814d270b31d2 ("drm/sysfb: vesadrm: Add gamma correction") Link: https://elixir.bootlin.com/linux/v6.15/source/drivers/video/fbdev/vesafb.c#L375 # 1 Cc: Thomas Zimmermann Cc: Javier Martinez Canillas Cc: dri-devel@lists.freedesktop.org Acked-by: Javier Martinez Canillas Link: https://lore.kernel.org/r/20250617140944.142392-1-tzimmermann@suse.de --- drivers/gpu/drm/sysfb/vesadrm.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/sysfb/vesadrm.c b/drivers/gpu/drm/sysfb/vesadrm.c index 4d62c78e7d1e..f7532db3831f 100644 --- a/drivers/gpu/drm/sysfb/vesadrm.c +++ b/drivers/gpu/drm/sysfb/vesadrm.c @@ -362,14 +362,19 @@ static struct vesadrm_device *vesadrm_device_create(struct drm_driver *drv, if (!__screen_info_vbe_mode_nonvga(si)) { vesa->cmap_write = vesadrm_vga_cmap_write; -#if defined(CONFIG_X86_32) } else { +#if defined(CONFIG_X86_32) phys_addr_t pmi_base = __screen_info_vesapm_info_base(si); - const u16 *pmi_addr = phys_to_virt(pmi_base); - vesa->pmi.PrimaryPalette = (u8 *)pmi_addr + pmi_addr[2]; - vesa->cmap_write = vesadrm_pmi_cmap_write; + if (pmi_base) { + const u16 *pmi_addr = phys_to_virt(pmi_base); + + vesa->pmi.PrimaryPalette = (u8 *)pmi_addr + pmi_addr[2]; + vesa->cmap_write = vesadrm_pmi_cmap_write; + } else #endif + if (format->is_color_indexed) + drm_warn(dev, "hardware palette is unchangeable, colors may be incorrect\n"); } #ifdef CONFIG_X86 From 18665eaa2acbe17da11c2748ac43c2f4ec2fad85 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Wed, 18 Jun 2025 09:52:21 +0200 Subject: [PATCH 07/41] drm/exynos: Don't use %pK through printk MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In the past %pK was preferable to %p as it would not leak raw pointer values into the kernel log. Since commit ad67b74d2469 ("printk: hash addresses printed with %p") the regular %p has been improved to avoid this issue. Furthermore, restricted pointers ("%pK") were never meant to be used through printk(). They can still unintentionally leak raw pointers or acquire sleeping locks in atomic contexts. Switch to the regular pointer formatting which is safer and easier to reason about. Signed-off-by: Thomas Weißschuh Signed-off-by: Inki Dae --- drivers/gpu/drm/exynos/exynos_drm_gem.c | 2 +- drivers/gpu/drm/exynos/exynos_drm_ipp.c | 32 ++++++++++++------------- 2 files changed, 17 insertions(+), 17 deletions(-) diff --git a/drivers/gpu/drm/exynos/exynos_drm_gem.c b/drivers/gpu/drm/exynos/exynos_drm_gem.c index 4787fee4696f..d44401a695e2 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_gem.c +++ b/drivers/gpu/drm/exynos/exynos_drm_gem.c @@ -174,7 +174,7 @@ static struct exynos_drm_gem *exynos_drm_gem_init(struct drm_device *dev, return ERR_PTR(ret); } - DRM_DEV_DEBUG_KMS(dev->dev, "created file object = %pK\n", obj->filp); + DRM_DEV_DEBUG_KMS(dev->dev, "created file object = %p\n", obj->filp); return exynos_gem; } diff --git a/drivers/gpu/drm/exynos/exynos_drm_ipp.c b/drivers/gpu/drm/exynos/exynos_drm_ipp.c index ea9f66037600..03c8490af4f4 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_ipp.c +++ b/drivers/gpu/drm/exynos/exynos_drm_ipp.c @@ -271,7 +271,7 @@ static inline struct exynos_drm_ipp_task * task->src.rect.h = task->dst.rect.h = UINT_MAX; task->transform.rotation = DRM_MODE_ROTATE_0; - DRM_DEV_DEBUG_DRIVER(task->dev, "Allocated task %pK\n", task); + DRM_DEV_DEBUG_DRIVER(task->dev, "Allocated task %p\n", task); return task; } @@ -339,7 +339,7 @@ static int exynos_drm_ipp_task_set(struct exynos_drm_ipp_task *task, } DRM_DEV_DEBUG_DRIVER(task->dev, - "Got task %pK configuration from userspace\n", + "Got task %p configuration from userspace\n", task); return 0; } @@ -394,7 +394,7 @@ static void exynos_drm_ipp_task_release_buf(struct exynos_drm_ipp_buffer *buf) static void exynos_drm_ipp_task_free(struct exynos_drm_ipp *ipp, struct exynos_drm_ipp_task *task) { - DRM_DEV_DEBUG_DRIVER(task->dev, "Freeing task %pK\n", task); + DRM_DEV_DEBUG_DRIVER(task->dev, "Freeing task %p\n", task); exynos_drm_ipp_task_release_buf(&task->src); exynos_drm_ipp_task_release_buf(&task->dst); @@ -559,7 +559,7 @@ static int exynos_drm_ipp_check_format(struct exynos_drm_ipp_task *task, DRM_EXYNOS_IPP_FORMAT_DESTINATION); if (!fmt) { DRM_DEV_DEBUG_DRIVER(task->dev, - "Task %pK: %s format not supported\n", + "Task %p: %s format not supported\n", task, buf == src ? "src" : "dst"); return -EINVAL; } @@ -609,7 +609,7 @@ static int exynos_drm_ipp_task_check(struct exynos_drm_ipp_task *task) bool rotate = (rotation != DRM_MODE_ROTATE_0); bool scale = false; - DRM_DEV_DEBUG_DRIVER(task->dev, "Checking task %pK\n", task); + DRM_DEV_DEBUG_DRIVER(task->dev, "Checking task %p\n", task); if (src->rect.w == UINT_MAX) src->rect.w = src->buf.width; @@ -625,7 +625,7 @@ static int exynos_drm_ipp_task_check(struct exynos_drm_ipp_task *task) dst->rect.x + dst->rect.w > (dst->buf.width) || dst->rect.y + dst->rect.h > (dst->buf.height)) { DRM_DEV_DEBUG_DRIVER(task->dev, - "Task %pK: defined area is outside provided buffers\n", + "Task %p: defined area is outside provided buffers\n", task); return -EINVAL; } @@ -642,7 +642,7 @@ static int exynos_drm_ipp_task_check(struct exynos_drm_ipp_task *task) (!(ipp->capabilities & DRM_EXYNOS_IPP_CAP_SCALE) && scale) || (!(ipp->capabilities & DRM_EXYNOS_IPP_CAP_CONVERT) && src->buf.fourcc != dst->buf.fourcc)) { - DRM_DEV_DEBUG_DRIVER(task->dev, "Task %pK: hw capabilities exceeded\n", + DRM_DEV_DEBUG_DRIVER(task->dev, "Task %p: hw capabilities exceeded\n", task); return -EINVAL; } @@ -655,7 +655,7 @@ static int exynos_drm_ipp_task_check(struct exynos_drm_ipp_task *task) if (ret) return ret; - DRM_DEV_DEBUG_DRIVER(ipp->dev, "Task %pK: all checks done.\n", + DRM_DEV_DEBUG_DRIVER(ipp->dev, "Task %p: all checks done.\n", task); return ret; @@ -667,25 +667,25 @@ static int exynos_drm_ipp_task_setup_buffers(struct exynos_drm_ipp_task *task, struct exynos_drm_ipp_buffer *src = &task->src, *dst = &task->dst; int ret = 0; - DRM_DEV_DEBUG_DRIVER(task->dev, "Setting buffer for task %pK\n", + DRM_DEV_DEBUG_DRIVER(task->dev, "Setting buffer for task %p\n", task); ret = exynos_drm_ipp_task_setup_buffer(src, filp); if (ret) { DRM_DEV_DEBUG_DRIVER(task->dev, - "Task %pK: src buffer setup failed\n", + "Task %p: src buffer setup failed\n", task); return ret; } ret = exynos_drm_ipp_task_setup_buffer(dst, filp); if (ret) { DRM_DEV_DEBUG_DRIVER(task->dev, - "Task %pK: dst buffer setup failed\n", + "Task %p: dst buffer setup failed\n", task); return ret; } - DRM_DEV_DEBUG_DRIVER(task->dev, "Task %pK: buffers prepared.\n", + DRM_DEV_DEBUG_DRIVER(task->dev, "Task %p: buffers prepared.\n", task); return ret; @@ -764,7 +764,7 @@ void exynos_drm_ipp_task_done(struct exynos_drm_ipp_task *task, int ret) struct exynos_drm_ipp *ipp = task->ipp; unsigned long flags; - DRM_DEV_DEBUG_DRIVER(task->dev, "ipp: %d, task %pK done: %d\n", + DRM_DEV_DEBUG_DRIVER(task->dev, "ipp: %d, task %p done: %d\n", ipp->id, task, ret); spin_lock_irqsave(&ipp->lock, flags); @@ -807,7 +807,7 @@ static void exynos_drm_ipp_next_task(struct exynos_drm_ipp *ipp) spin_unlock_irqrestore(&ipp->lock, flags); DRM_DEV_DEBUG_DRIVER(ipp->dev, - "ipp: %d, selected task %pK to run\n", ipp->id, + "ipp: %d, selected task %p to run\n", ipp->id, task); ret = ipp->funcs->commit(ipp, task); @@ -917,14 +917,14 @@ int exynos_drm_ipp_commit_ioctl(struct drm_device *dev, void *data, */ if (arg->flags & DRM_EXYNOS_IPP_FLAG_NONBLOCK) { DRM_DEV_DEBUG_DRIVER(ipp->dev, - "ipp: %d, nonblocking processing task %pK\n", + "ipp: %d, nonblocking processing task %p\n", ipp->id, task); task->flags |= DRM_EXYNOS_IPP_TASK_ASYNC; exynos_drm_ipp_schedule_task(task->ipp, task); ret = 0; } else { - DRM_DEV_DEBUG_DRIVER(ipp->dev, "ipp: %d, processing task %pK\n", + DRM_DEV_DEBUG_DRIVER(ipp->dev, "ipp: %d, processing task %p\n", ipp->id, task); exynos_drm_ipp_schedule_task(ipp, task); ret = wait_event_interruptible(ipp->done_wq, From b846350aa272de99bf6fecfa6b08e64ebfb13173 Mon Sep 17 00:00:00 2001 From: Kaustabh Chakraborty Date: Fri, 27 Jun 2025 00:50:30 +0530 Subject: [PATCH 08/41] drm/exynos: exynos7_drm_decon: add vblank check in IRQ handling If there's support for another console device (such as a TTY serial), the kernel occasionally panics during boot. The panic message and a relevant snippet of the call stack is as follows: Unable to handle kernel NULL pointer dereference at virtual address 000000000000000 Call trace: drm_crtc_handle_vblank+0x10/0x30 (P) decon_irq_handler+0x88/0xb4 [...] Otherwise, the panics don't happen. This indicates that it's some sort of race condition. Add a check to validate if the drm device can handle vblanks before calling drm_crtc_handle_vblank() to avoid this. Cc: stable@vger.kernel.org Fixes: 96976c3d9aff ("drm/exynos: Add DECON driver") Signed-off-by: Kaustabh Chakraborty Signed-off-by: Inki Dae --- drivers/gpu/drm/exynos/exynos7_drm_decon.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/gpu/drm/exynos/exynos7_drm_decon.c b/drivers/gpu/drm/exynos/exynos7_drm_decon.c index f91daefa9d2b..805aa28c1723 100644 --- a/drivers/gpu/drm/exynos/exynos7_drm_decon.c +++ b/drivers/gpu/drm/exynos/exynos7_drm_decon.c @@ -636,6 +636,10 @@ static irqreturn_t decon_irq_handler(int irq, void *dev_id) if (!ctx->drm_dev) goto out; + /* check if crtc and vblank have been initialized properly */ + if (!drm_dev_has_vblank(ctx->drm_dev)) + goto out; + if (!ctx->i80_if) { drm_crtc_handle_vblank(&ctx->crtc->base); From 5d91394f236167ac624b823820faf4aa928b889e Mon Sep 17 00:00:00 2001 From: Marek Szyprowski Date: Wed, 18 Jun 2025 14:06:26 +0200 Subject: [PATCH 09/41] drm/exynos: fimd: Guard display clock control with runtime PM calls Commit c9b1150a68d9 ("drm/atomic-helper: Re-order bridge chain pre-enable and post-disable") changed the call sequence to the CRTC enable/disable and bridge pre_enable/post_disable methods, so those bridge methods are now called when CRTC is not yet enabled. This causes a lockup observed on Samsung Peach-Pit/Pi Chromebooks. The source of this lockup is a call to fimd_dp_clock_enable() function, when FIMD device is not yet runtime resumed. It worked before the mentioned commit only because the CRTC implemented by the FIMD driver was always enabled what guaranteed the FIMD device to be runtime resumed. This patch adds runtime PM guards to the fimd_dp_clock_enable() function to enable its proper operation also when the CRTC implemented by FIMD is not yet enabled. Fixes: 196e059a8a6a ("drm/exynos: convert clock_enable crtc callback to pipeline clock") Signed-off-by: Marek Szyprowski Reviewed-by: Tomi Valkeinen Signed-off-by: Inki Dae --- drivers/gpu/drm/exynos/exynos_drm_fimd.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/drivers/gpu/drm/exynos/exynos_drm_fimd.c b/drivers/gpu/drm/exynos/exynos_drm_fimd.c index c394cc702d7d..205c238cc73a 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_fimd.c +++ b/drivers/gpu/drm/exynos/exynos_drm_fimd.c @@ -187,6 +187,7 @@ struct fimd_context { u32 i80ifcon; bool i80_if; bool suspended; + bool dp_clk_enabled; wait_queue_head_t wait_vsync_queue; atomic_t wait_vsync_event; atomic_t win_updated; @@ -1047,7 +1048,18 @@ static void fimd_dp_clock_enable(struct exynos_drm_clk *clk, bool enable) struct fimd_context *ctx = container_of(clk, struct fimd_context, dp_clk); u32 val = enable ? DP_MIE_CLK_DP_ENABLE : DP_MIE_CLK_DISABLE; + + if (enable == ctx->dp_clk_enabled) + return; + + if (enable) + pm_runtime_resume_and_get(ctx->dev); + + ctx->dp_clk_enabled = enable; writel(val, ctx->regs + DP_MIE_CLKCON); + + if (!enable) + pm_runtime_put(ctx->dev); } static const struct exynos_drm_crtc_ops fimd_crtc_ops = { From 2ab3ba39153dcdc9de7d2eec42bf19f84d4844cf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Wed, 18 Jun 2025 09:52:20 +0200 Subject: [PATCH 10/41] drm/bridge: samsung-dsim: Don't use %pK through printk MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In the past %pK was preferable to %p as it would not leak raw pointer values into the kernel log. Since commit ad67b74d2469 ("printk: hash addresses printed with %p") the regular %p has been improved to avoid this issue. Furthermore, restricted pointers ("%pK") were never meant to be used through printk(). They can still unintentionally leak raw pointers or acquire sleeping locks in atomic contexts. Switch to the regular pointer formatting which is safer and easier to reason about. Signed-off-by: Thomas Weißschuh Signed-off-by: Inki Dae --- drivers/gpu/drm/bridge/samsung-dsim.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/bridge/samsung-dsim.c b/drivers/gpu/drm/bridge/samsung-dsim.c index 0014c497e3fe..bccc88d25948 100644 --- a/drivers/gpu/drm/bridge/samsung-dsim.c +++ b/drivers/gpu/drm/bridge/samsung-dsim.c @@ -1095,7 +1095,7 @@ static void samsung_dsim_send_to_fifo(struct samsung_dsim *dsi, bool first = !xfer->tx_done; u32 reg; - dev_dbg(dev, "< xfer %pK: tx len %u, done %u, rx len %u, done %u\n", + dev_dbg(dev, "< xfer %p: tx len %u, done %u, rx len %u, done %u\n", xfer, length, xfer->tx_done, xfer->rx_len, xfer->rx_done); if (length > DSI_TX_FIFO_SIZE) @@ -1293,7 +1293,7 @@ static bool samsung_dsim_transfer_finish(struct samsung_dsim *dsi) spin_unlock_irqrestore(&dsi->transfer_lock, flags); dev_dbg(dsi->dev, - "> xfer %pK, tx_len %zu, tx_done %u, rx_len %u, rx_done %u\n", + "> xfer %p, tx_len %zu, tx_done %u, rx_len %u, rx_done %u\n", xfer, xfer->packet.payload_length, xfer->tx_done, xfer->rx_len, xfer->rx_done); From caa7c7a76b78ce41d347003f84975125383e6b59 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 25 Jun 2025 10:21:58 -0500 Subject: [PATCH 11/41] drm/i915/selftests: Change mock_request() to return error pointers There was an error pointer vs NULL bug in __igt_breadcrumbs_smoketest(). The __mock_request_alloc() function implements the smoketest->request_alloc() function pointer. It was supposed to return error pointers, but it propogates the NULL return from mock_request() so in the event of a failure, it would lead to a NULL pointer dereference. To fix this, change the mock_request() function to return error pointers and update all the callers to expect that. Fixes: 52c0fdb25c7c ("drm/i915: Replace global breadcrumbs with per-context interrupt tracking") Signed-off-by: Dan Carpenter Reviewed-by: Rodrigo Vivi Link: https://lore.kernel.org/r/685c1417.050a0220.696f5.5c05@mx.google.com Signed-off-by: Rodrigo Vivi (cherry picked from commit 778fa8ad5f0f23397d045c7ebca048ce8def1c43) Signed-off-by: Joonas Lahtinen --- drivers/gpu/drm/i915/selftests/i915_request.c | 20 +++++++++---------- drivers/gpu/drm/i915/selftests/mock_request.c | 2 +- 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/drivers/gpu/drm/i915/selftests/i915_request.c b/drivers/gpu/drm/i915/selftests/i915_request.c index 88870844b5bd..2fb7a9e7efec 100644 --- a/drivers/gpu/drm/i915/selftests/i915_request.c +++ b/drivers/gpu/drm/i915/selftests/i915_request.c @@ -73,8 +73,8 @@ static int igt_add_request(void *arg) /* Basic preliminary test to create a request and let it loose! */ request = mock_request(rcs0(i915)->kernel_context, HZ / 10); - if (!request) - return -ENOMEM; + if (IS_ERR(request)) + return PTR_ERR(request); i915_request_add(request); @@ -91,8 +91,8 @@ static int igt_wait_request(void *arg) /* Submit a request, then wait upon it */ request = mock_request(rcs0(i915)->kernel_context, T); - if (!request) - return -ENOMEM; + if (IS_ERR(request)) + return PTR_ERR(request); i915_request_get(request); @@ -160,8 +160,8 @@ static int igt_fence_wait(void *arg) /* Submit a request, treat it as a fence and wait upon it */ request = mock_request(rcs0(i915)->kernel_context, T); - if (!request) - return -ENOMEM; + if (IS_ERR(request)) + return PTR_ERR(request); if (dma_fence_wait_timeout(&request->fence, false, T) != -ETIME) { pr_err("fence wait success before submit (expected timeout)!\n"); @@ -219,8 +219,8 @@ static int igt_request_rewind(void *arg) GEM_BUG_ON(IS_ERR(ce)); request = mock_request(ce, 2 * HZ); intel_context_put(ce); - if (!request) { - err = -ENOMEM; + if (IS_ERR(request)) { + err = PTR_ERR(request); goto err_context_0; } @@ -237,8 +237,8 @@ static int igt_request_rewind(void *arg) GEM_BUG_ON(IS_ERR(ce)); vip = mock_request(ce, 0); intel_context_put(ce); - if (!vip) { - err = -ENOMEM; + if (IS_ERR(vip)) { + err = PTR_ERR(vip); goto err_context_1; } diff --git a/drivers/gpu/drm/i915/selftests/mock_request.c b/drivers/gpu/drm/i915/selftests/mock_request.c index 09f747228dff..1b0cf073e964 100644 --- a/drivers/gpu/drm/i915/selftests/mock_request.c +++ b/drivers/gpu/drm/i915/selftests/mock_request.c @@ -35,7 +35,7 @@ mock_request(struct intel_context *ce, unsigned long delay) /* NB the i915->requests slab cache is enlarged to fit mock_request */ request = intel_context_create_request(ce); if (IS_ERR(request)) - return NULL; + return request; request->mock.delay = delay; return request; From 7da6c155a67d42a0c1e4e22bd3f492fabcb14f2c Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Thu, 26 Jun 2025 16:33:17 +0200 Subject: [PATCH 12/41] drm/i915/dsi: Fix NULL pointer deref in vlv_dphy_param_init() Commit 77ba0b856225 ("drm/i915/dsi: convert vlv_dsi.[ch] to struct intel_display") added a to_intel_display(connector) call to vlv_dphy_param_init() but when vlv_dphy_param_init() gets called the connector object has not been initialized yet, so this leads to a NULL pointer deref: BUG: kernel NULL pointer dereference, address: 000000000000000c ... Hardware name: ASUSTeK COMPUTER INC. T100TA/T100TA, BIOS T100TA.314 08/13/2015 RIP: 0010:vlv_dsi_init+0x4e6/0x1600 [i915] ... Call Trace: ? intel_step_name+0x4be8/0x5c30 [i915] intel_setup_outputs+0x2d6/0xbd0 [i915] intel_display_driver_probe_nogem+0x13f/0x220 [i915] i915_driver_probe+0x3d9/0xaf0 [i915] Use to_intel_display(&intel_dsi->base) instead to fix this. Fixes: 77ba0b856225 ("drm/i915/dsi: convert vlv_dsi.[ch] to struct intel_display") Signed-off-by: Hans de Goede Reviewed-by: Jani Nikula Link: https://lore.kernel.org/r/20250626143317.101706-1-hansg@kernel.org Signed-off-by: Jani Nikula (cherry picked from commit 0dc6bfb50a5d0759e726cd36a3d3b7529fd2a627) Signed-off-by: Joonas Lahtinen --- drivers/gpu/drm/i915/display/vlv_dsi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/display/vlv_dsi.c b/drivers/gpu/drm/i915/display/vlv_dsi.c index 21c1e10caf68..2007bb9d974d 100644 --- a/drivers/gpu/drm/i915/display/vlv_dsi.c +++ b/drivers/gpu/drm/i915/display/vlv_dsi.c @@ -1589,8 +1589,8 @@ static void vlv_dsi_add_properties(struct intel_connector *connector) static void vlv_dphy_param_init(struct intel_dsi *intel_dsi) { + struct intel_display *display = to_intel_display(&intel_dsi->base); struct intel_connector *connector = intel_dsi->attached_connector; - struct intel_display *display = to_intel_display(connector); struct mipi_config *mipi_config = connector->panel.vbt.dsi.config; u32 tlpx_ns, extra_byte_count, tlpx_ui; u32 ui_num, ui_den; From 2b95a7db6e0f75587bffddbb490399cbb87e4985 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Tue, 28 Jan 2025 10:47:48 +0100 Subject: [PATCH 13/41] dma-buf: fix timeout handling in dma_resv_wait_timeout v2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Even the kerneldoc says that with a zero timeout the function should not wait for anything, but still return 1 to indicate that the fences are signaled now. Unfortunately that isn't what was implemented, instead of only returning 1 we also waited for at least one jiffies. Fix that by adjusting the handling to what the function is actually documented to do. v2: improve code readability Reported-by: Marek Olšák Reported-by: Lucas Stach Signed-off-by: Christian König Reviewed-by: Lucas Stach Cc: Link: https://lore.kernel.org/r/20250129105841.1806-1-christian.koenig@amd.com --- drivers/dma-buf/dma-resv.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/drivers/dma-buf/dma-resv.c b/drivers/dma-buf/dma-resv.c index b1ef4546346d..bea3e9858aca 100644 --- a/drivers/dma-buf/dma-resv.c +++ b/drivers/dma-buf/dma-resv.c @@ -685,11 +685,13 @@ long dma_resv_wait_timeout(struct dma_resv *obj, enum dma_resv_usage usage, dma_resv_iter_begin(&cursor, obj, usage); dma_resv_for_each_fence_unlocked(&cursor, fence) { - ret = dma_fence_wait_timeout(fence, intr, ret); - if (ret <= 0) { - dma_resv_iter_end(&cursor); - return ret; - } + ret = dma_fence_wait_timeout(fence, intr, timeout); + if (ret <= 0) + break; + + /* Even for zero timeout the return value is 1 */ + if (timeout) + timeout = ret; } dma_resv_iter_end(&cursor); From 97e000acf2e20a86a50a0ec8c2739f0846f37509 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Fri, 13 Jun 2025 13:16:38 +0200 Subject: [PATCH 14/41] drm/ttm: fix error handling in ttm_buffer_object_transfer MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Unlocking the resv object was missing in the error path, additionally to that we should move over the resource only after the fence slot was reserved. Signed-off-by: Christian König Reviewed-by: Matthew Brost Fixes: c8d4c18bfbc4a ("dma-buf/drivers: make reserving a shared slot mandatory v4") Cc: Link: https://lore.kernel.org/r/20250616130726.22863-3-christian.koenig@amd.com --- drivers/gpu/drm/ttm/ttm_bo_util.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/ttm/ttm_bo_util.c b/drivers/gpu/drm/ttm/ttm_bo_util.c index 15cab9bda17f..bd90404ea609 100644 --- a/drivers/gpu/drm/ttm/ttm_bo_util.c +++ b/drivers/gpu/drm/ttm/ttm_bo_util.c @@ -254,6 +254,13 @@ static int ttm_buffer_object_transfer(struct ttm_buffer_object *bo, ret = dma_resv_trylock(&fbo->base.base._resv); WARN_ON(!ret); + ret = dma_resv_reserve_fences(&fbo->base.base._resv, 1); + if (ret) { + dma_resv_unlock(&fbo->base.base._resv); + kfree(fbo); + return ret; + } + if (fbo->base.resource) { ttm_resource_set_bo(fbo->base.resource, &fbo->base); bo->resource = NULL; @@ -262,12 +269,6 @@ static int ttm_buffer_object_transfer(struct ttm_buffer_object *bo, fbo->base.bulk_move = NULL; } - ret = dma_resv_reserve_fences(&fbo->base.base._resv, 1); - if (ret) { - kfree(fbo); - return ret; - } - ttm_bo_get(bo); fbo->bo = bo; From eb028cd884e1b0976ff8c5944ee6650fe3ed0a6c Mon Sep 17 00:00:00 2001 From: Dmitry Baryshkov Date: Thu, 20 Feb 2025 17:07:26 +0200 Subject: [PATCH 15/41] drm/bridge: panel: move prepare_prev_first handling to drm_panel_bridge_add_typed The commit 5ea6b1702781 ("drm/panel: Add prepare_prev_first flag to drm_panel") and commit 0974687a19c3 ("drm/bridge: panel: Set pre_enable_prev_first from drmm_panel_bridge_add") added handling of panel's prepare_prev_first to devm_panel_bridge_add() and drmm_panel_bridge_add(). However if the driver calls drm_panel_bridge_add_typed() directly, then the flag won't be handled and thus the drm_bridge.pre_enable_prev_first will not be set. Move prepare_prev_first handling to the drm_panel_bridge_add_typed() so that there is no way to miss the flag. Fixes: 5ea6b1702781 ("drm/panel: Add prepare_prev_first flag to drm_panel") Fixes: 0974687a19c3 ("drm/bridge: panel: Set pre_enable_prev_first from drmm_panel_bridge_add") Reported-by: Svyatoslav Ryhel Closes: https://lore.kernel.org/dri-devel/CAPVz0n3YZass3Bns1m0XrFxtAC0DKbEPiW6vXimQx97G243sXw@mail.gmail.com/ Signed-off-by: Dmitry Baryshkov Reviewed-by: Neil Armstrong Signed-off-by: Neil Armstrong Link: https://lore.kernel.org/r/20250220-panel_prev_first-v1-1-b9e787825a1a@linaro.org --- drivers/gpu/drm/bridge/panel.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/drivers/gpu/drm/bridge/panel.c b/drivers/gpu/drm/bridge/panel.c index 79b009ab9396..29b0358a7b6d 100644 --- a/drivers/gpu/drm/bridge/panel.c +++ b/drivers/gpu/drm/bridge/panel.c @@ -299,6 +299,7 @@ struct drm_bridge *drm_panel_bridge_add_typed(struct drm_panel *panel, panel_bridge->bridge.of_node = panel->dev->of_node; panel_bridge->bridge.ops = DRM_BRIDGE_OP_MODES; panel_bridge->bridge.type = connector_type; + panel_bridge->bridge.pre_enable_prev_first = panel->prepare_prev_first; drm_bridge_add(&panel_bridge->bridge); @@ -413,8 +414,6 @@ struct drm_bridge *devm_drm_panel_bridge_add_typed(struct device *dev, return bridge; } - bridge->pre_enable_prev_first = panel->prepare_prev_first; - *ptr = bridge; devres_add(dev, ptr); @@ -456,8 +455,6 @@ struct drm_bridge *drmm_panel_bridge_add(struct drm_device *drm, if (ret) return ERR_PTR(ret); - bridge->pre_enable_prev_first = panel->prepare_prev_first; - return bridge; } EXPORT_SYMBOL(drmm_panel_bridge_add); From e8537cad824065b0425fb0429e762e14a08067c2 Mon Sep 17 00:00:00 2001 From: Dmitry Baryshkov Date: Sun, 8 Jun 2025 18:52:04 +0300 Subject: [PATCH 16/41] drm/bridge: aux-hpd-bridge: fix assignment of the of_node Perform fix similar to the one in the commit 85e444a68126 ("drm/bridge: Fix assignment of the of_node of the parent to aux bridge"). The assignment of the of_node to the aux HPD bridge needs to mark the of_node as reused, otherwise driver core will attempt to bind resources like pinctrl, which is going to fail as corresponding pins are already marked as used by the parent device. Fix that by using the device_set_of_node_from_dev() helper instead of assigning it directly. Fixes: e560518a6c2e ("drm/bridge: implement generic DP HPD bridge") Signed-off-by: Dmitry Baryshkov Reviewed-by: Neil Armstrong Signed-off-by: Neil Armstrong Link: https://lore.kernel.org/r/20250608-fix-aud-hpd-bridge-v1-1-4641a6f8e381@oss.qualcomm.com --- drivers/gpu/drm/bridge/aux-hpd-bridge.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/bridge/aux-hpd-bridge.c b/drivers/gpu/drm/bridge/aux-hpd-bridge.c index b3f588b71a7d..af6f79793407 100644 --- a/drivers/gpu/drm/bridge/aux-hpd-bridge.c +++ b/drivers/gpu/drm/bridge/aux-hpd-bridge.c @@ -64,10 +64,11 @@ struct auxiliary_device *devm_drm_dp_hpd_bridge_alloc(struct device *parent, str adev->id = ret; adev->name = "dp_hpd_bridge"; adev->dev.parent = parent; - adev->dev.of_node = of_node_get(parent->of_node); adev->dev.release = drm_aux_hpd_bridge_release; adev->dev.platform_data = of_node_get(np); + device_set_of_node_from_dev(&adev->dev, parent); + ret = auxiliary_device_init(adev); if (ret) { of_node_put(adev->dev.platform_data); From 905967e359f0a3345dce096504e8c0390d0a8f49 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Mon, 16 Jun 2025 14:04:54 -0400 Subject: [PATCH 17/41] drm/amdgpu/sdma5.x: suspend KFD queues in ring reset SDMA 5.x only supports engine soft reset which resets all queues on the engine. As such, we need to suspend KFD queues around resets like we do for SDMA 4.x. Reviewed-by: Jesse Zhang Signed-off-by: Alex Deucher (cherry picked from commit 61feed0baa1a0d094af0e07e968b1e6e875f07d0) --- drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c | 7 ++++++- drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c | 7 ++++++- 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c index 1813c3ed0aa6..37f4b5b4a098 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c @@ -1543,8 +1543,13 @@ static int sdma_v5_0_reset_queue(struct amdgpu_ring *ring, unsigned int vmid) { struct amdgpu_device *adev = ring->adev; u32 inst_id = ring->me; + int r; - return amdgpu_sdma_reset_engine(adev, inst_id); + amdgpu_amdkfd_suspend(adev, true); + r = amdgpu_sdma_reset_engine(adev, inst_id); + amdgpu_amdkfd_resume(adev, true); + + return r; } static int sdma_v5_0_stop_queue(struct amdgpu_ring *ring) diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c index 23f97da62808..0b40411b92a0 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c @@ -1456,8 +1456,13 @@ static int sdma_v5_2_reset_queue(struct amdgpu_ring *ring, unsigned int vmid) { struct amdgpu_device *adev = ring->adev; u32 inst_id = ring->me; + int r; - return amdgpu_sdma_reset_engine(adev, inst_id); + amdgpu_amdkfd_suspend(adev, true); + r = amdgpu_sdma_reset_engine(adev, inst_id); + amdgpu_amdkfd_resume(adev, true); + + return r; } static int sdma_v5_2_stop_queue(struct amdgpu_ring *ring) From 62461367f4c0dcf4fab9cafb4ab3a7d346788df6 Mon Sep 17 00:00:00 2001 From: David Yat Sin Date: Thu, 19 Jun 2025 17:51:13 +0000 Subject: [PATCH 18/41] amdkfd: MTYPE_UC for ext-coherent system memory Set memory mtype to UC host memory when ext-coherent flag is set and memory is registered as a SVM allocation. Reviewed-by: Amber Lin Signed-off-by: David Yat Sin Signed-off-by: Alex Deucher (cherry picked from commit 5d14fdab4778c29cfd39e62c3ce84d232b4a7d8c) --- drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c index 865dca2547de..7763e4742080 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c @@ -1278,7 +1278,7 @@ svm_range_get_pte_flags(struct kfd_node *node, mapping_flags |= ext_coherent ? AMDGPU_VM_MTYPE_UC : AMDGPU_VM_MTYPE_NC; /* system memory accessed by the dGPU */ } else { - if (gc_ip_version < IP_VERSION(9, 5, 0)) + if (gc_ip_version < IP_VERSION(9, 5, 0) || ext_coherent) mapping_flags |= AMDGPU_VM_MTYPE_UC; else mapping_flags |= AMDGPU_VM_MTYPE_NC; From e54c5de901ea56fc68f8d56b3cce9940169346f4 Mon Sep 17 00:00:00 2001 From: Kent Russell Date: Tue, 24 Jun 2025 11:42:06 -0400 Subject: [PATCH 19/41] drm/amdgpu: Include sdma_4_4_4.bin This got missed during SDMA 4.4.4 support. Fixes: 968e3811c3e8 ("drm/amdgpu: add initial support for sdma444") Signed-off-by: Kent Russell Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher (cherry picked from commit 51526efe02714339ed6139f7bc348377d363200a) Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c index cef68df4c663..bb82c652e4c0 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c @@ -45,6 +45,7 @@ #include "amdgpu_ras.h" MODULE_FIRMWARE("amdgpu/sdma_4_4_2.bin"); +MODULE_FIRMWARE("amdgpu/sdma_4_4_4.bin"); MODULE_FIRMWARE("amdgpu/sdma_4_4_5.bin"); static const struct amdgpu_hwip_reg_entry sdma_reg_list_4_4_2[] = { From cf234231fcbc7d391e2135b9518613218cc5347f Mon Sep 17 00:00:00 2001 From: Philip Yang Date: Fri, 20 Jun 2025 18:32:32 -0400 Subject: [PATCH 20/41] drm/amdkfd: Don't call mmput from MMU notifier callback If the process is exiting, the mmput inside mmu notifier callback from compactd or fork or numa balancing could release the last reference of mm struct to call exit_mmap and free_pgtable, this triggers deadlock with below backtrace. The deadlock will leak kfd process as mmu notifier release is not called and cause VRAM leaking. The fix is to take mm reference mmget_non_zero when adding prange to the deferred list to pair with mmput in deferred list work. If prange split and add into pchild list, the pchild work_item.mm is not used, so remove the mm parameter from svm_range_unmap_split and svm_range_add_child. The backtrace of hung task: INFO: task python:348105 blocked for more than 64512 seconds. Call Trace: __schedule+0x1c3/0x550 schedule+0x46/0xb0 rwsem_down_write_slowpath+0x24b/0x4c0 unlink_anon_vmas+0xb1/0x1c0 free_pgtables+0xa9/0x130 exit_mmap+0xbc/0x1a0 mmput+0x5a/0x140 svm_range_cpu_invalidate_pagetables+0x2b/0x40 [amdgpu] mn_itree_invalidate+0x72/0xc0 __mmu_notifier_invalidate_range_start+0x48/0x60 try_to_unmap_one+0x10fa/0x1400 rmap_walk_anon+0x196/0x460 try_to_unmap+0xbb/0x210 migrate_page_unmap+0x54d/0x7e0 migrate_pages_batch+0x1c3/0xae0 migrate_pages_sync+0x98/0x240 migrate_pages+0x25c/0x520 compact_zone+0x29d/0x590 compact_zone_order+0xb6/0xf0 try_to_compact_pages+0xbe/0x220 __alloc_pages_direct_compact+0x96/0x1a0 __alloc_pages_slowpath+0x410/0x930 __alloc_pages_nodemask+0x3a9/0x3e0 do_huge_pmd_anonymous_page+0xd7/0x3e0 __handle_mm_fault+0x5e3/0x5f0 handle_mm_fault+0xf7/0x2e0 hmm_vma_fault.isra.0+0x4d/0xa0 walk_pmd_range.isra.0+0xa8/0x310 walk_pud_range+0x167/0x240 walk_pgd_range+0x55/0x100 __walk_page_range+0x87/0x90 walk_page_range+0xf6/0x160 hmm_range_fault+0x4f/0x90 amdgpu_hmm_range_get_pages+0x123/0x230 [amdgpu] amdgpu_ttm_tt_get_user_pages+0xb1/0x150 [amdgpu] init_user_pages+0xb1/0x2a0 [amdgpu] amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu+0x543/0x7d0 [amdgpu] kfd_ioctl_alloc_memory_of_gpu+0x24c/0x4e0 [amdgpu] kfd_ioctl+0x29d/0x500 [amdgpu] Fixes: fa582c6f3684 ("drm/amdkfd: Use mmget_not_zero in MMU notifier") Signed-off-by: Philip Yang Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher (cherry picked from commit a29e067bd38946f752b0ef855f3dfff87e77bec7) Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 43 +++++++++++++--------------- 1 file changed, 20 insertions(+), 23 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c index 7763e4742080..a0f22ea6d15a 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c @@ -1171,13 +1171,12 @@ svm_range_split_head(struct svm_range *prange, uint64_t new_start, } static void -svm_range_add_child(struct svm_range *prange, struct mm_struct *mm, - struct svm_range *pchild, enum svm_work_list_ops op) +svm_range_add_child(struct svm_range *prange, struct svm_range *pchild, enum svm_work_list_ops op) { pr_debug("add child 0x%p [0x%lx 0x%lx] to prange 0x%p child list %d\n", pchild, pchild->start, pchild->last, prange, op); - pchild->work_item.mm = mm; + pchild->work_item.mm = NULL; pchild->work_item.op = op; list_add_tail(&pchild->child_list, &prange->child_list); } @@ -2394,15 +2393,17 @@ svm_range_add_list_work(struct svm_range_list *svms, struct svm_range *prange, prange->work_item.op != SVM_OP_UNMAP_RANGE) prange->work_item.op = op; } else { - prange->work_item.op = op; - - /* Pairs with mmput in deferred_list_work */ - mmget(mm); - prange->work_item.mm = mm; - list_add_tail(&prange->deferred_list, - &prange->svms->deferred_range_list); - pr_debug("add prange 0x%p [0x%lx 0x%lx] to work list op %d\n", - prange, prange->start, prange->last, op); + /* Pairs with mmput in deferred_list_work. + * If process is exiting and mm is gone, don't update mmu notifier. + */ + if (mmget_not_zero(mm)) { + prange->work_item.mm = mm; + prange->work_item.op = op; + list_add_tail(&prange->deferred_list, + &prange->svms->deferred_range_list); + pr_debug("add prange 0x%p [0x%lx 0x%lx] to work list op %d\n", + prange, prange->start, prange->last, op); + } } spin_unlock(&svms->deferred_list_lock); } @@ -2416,8 +2417,7 @@ void schedule_deferred_list_work(struct svm_range_list *svms) } static void -svm_range_unmap_split(struct mm_struct *mm, struct svm_range *parent, - struct svm_range *prange, unsigned long start, +svm_range_unmap_split(struct svm_range *parent, struct svm_range *prange, unsigned long start, unsigned long last) { struct svm_range *head; @@ -2438,12 +2438,12 @@ svm_range_unmap_split(struct mm_struct *mm, struct svm_range *parent, svm_range_split(tail, last + 1, tail->last, &head); if (head != prange && tail != prange) { - svm_range_add_child(parent, mm, head, SVM_OP_UNMAP_RANGE); - svm_range_add_child(parent, mm, tail, SVM_OP_ADD_RANGE); + svm_range_add_child(parent, head, SVM_OP_UNMAP_RANGE); + svm_range_add_child(parent, tail, SVM_OP_ADD_RANGE); } else if (tail != prange) { - svm_range_add_child(parent, mm, tail, SVM_OP_UNMAP_RANGE); + svm_range_add_child(parent, tail, SVM_OP_UNMAP_RANGE); } else if (head != prange) { - svm_range_add_child(parent, mm, head, SVM_OP_UNMAP_RANGE); + svm_range_add_child(parent, head, SVM_OP_UNMAP_RANGE); } else if (parent != prange) { prange->work_item.op = SVM_OP_UNMAP_RANGE; } @@ -2520,14 +2520,14 @@ svm_range_unmap_from_cpu(struct mm_struct *mm, struct svm_range *prange, l = min(last, pchild->last); if (l >= s) svm_range_unmap_from_gpus(pchild, s, l, trigger); - svm_range_unmap_split(mm, prange, pchild, start, last); + svm_range_unmap_split(prange, pchild, start, last); mutex_unlock(&pchild->lock); } s = max(start, prange->start); l = min(last, prange->last); if (l >= s) svm_range_unmap_from_gpus(prange, s, l, trigger); - svm_range_unmap_split(mm, prange, prange, start, last); + svm_range_unmap_split(prange, prange, start, last); if (unmap_parent) svm_range_add_list_work(svms, prange, mm, SVM_OP_UNMAP_RANGE); @@ -2570,8 +2570,6 @@ svm_range_cpu_invalidate_pagetables(struct mmu_interval_notifier *mni, if (range->event == MMU_NOTIFY_RELEASE) return true; - if (!mmget_not_zero(mni->mm)) - return true; start = mni->interval_tree.start; last = mni->interval_tree.last; @@ -2598,7 +2596,6 @@ svm_range_cpu_invalidate_pagetables(struct mmu_interval_notifier *mni, } svm_range_unlock(prange); - mmput(mni->mm); return true; } From f3e58d8e154dae5015c7400812c80789589fc36e Mon Sep 17 00:00:00 2001 From: "Lin.Cao" Date: Tue, 24 Jun 2025 17:05:34 +0800 Subject: [PATCH 21/41] drm/amdgpu: Fix memory leak in amdgpu_ctx_mgr_entity_fini MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit patch dd64956685fa ("drm/amdgpu: Remove duplicated "context still alive" check") removed ctx put, which will cause amdgpu_ctx_fini() cannot be called and then cause some finished fence that added by amdgpu_ctx_add_fence() cannot be released and cause memleak. Fixes: dd64956685fa ("drm/amdgpu: Remove duplicated "context still alive" check") Signed-off-by: Lin.Cao Reviewed-by: Tvrtko Ursulin Acked-by: Christian König Signed-off-by: Alex Deucher (cherry picked from commit 8cf66089e28108dedd47e6156a48489303cf525c) Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c index 85567d0d9545..f5d5c45ddc0d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c @@ -944,6 +944,7 @@ static void amdgpu_ctx_mgr_entity_fini(struct amdgpu_ctx_mgr *mgr) drm_sched_entity_fini(entity); } } + kref_put(&ctx->refcount, amdgpu_ctx_fini); } } From 34659c1a1f4fd4c148ab13e13b11fd64df01ffcd Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 25 Jun 2025 18:15:37 -0400 Subject: [PATCH 22/41] drm/amdkfd: add hqd_sdma_get_doorbell callbacks for gfx7/8 These were missed when support was added for other generations. The callbacks are called unconditionally so we need to make sure all generations have them. Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/4304 Link: https://github.com/ROCm/ROCm/issues/4965 Fixes: bac38ca8c475 ("drm/amdkfd: implement per queue sdma reset for gfx 9.4+") Cc: Jonathan Kim Reported-by: Johl Brown Reviewed-by: Jonathan Kim Signed-off-by: Alex Deucher (cherry picked from commit 1e9d17a5dcf1242e9518e461d8e63ad35240e49e) Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c | 8 ++++++++ drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c | 8 ++++++++ 2 files changed, 16 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c index ca4a6b82817f..df77558e03ef 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c @@ -561,6 +561,13 @@ static uint32_t read_vmid_from_vmfault_reg(struct amdgpu_device *adev) return REG_GET_FIELD(status, VM_CONTEXT1_PROTECTION_FAULT_STATUS, VMID); } +static uint32_t kgd_hqd_sdma_get_doorbell(struct amdgpu_device *adev, + int engine, int queue) + +{ + return 0; +} + const struct kfd2kgd_calls gfx_v7_kfd2kgd = { .program_sh_mem_settings = kgd_program_sh_mem_settings, .set_pasid_vmid_mapping = kgd_set_pasid_vmid_mapping, @@ -578,4 +585,5 @@ const struct kfd2kgd_calls gfx_v7_kfd2kgd = { .set_scratch_backing_va = set_scratch_backing_va, .set_vm_context_page_table_base = set_vm_context_page_table_base, .read_vmid_from_vmfault_reg = read_vmid_from_vmfault_reg, + .hqd_sdma_get_doorbell = kgd_hqd_sdma_get_doorbell, }; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c index 0f3e2944edd7..e68c0fa8d751 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c @@ -582,6 +582,13 @@ static void set_vm_context_page_table_base(struct amdgpu_device *adev, lower_32_bits(page_table_base)); } +static uint32_t kgd_hqd_sdma_get_doorbell(struct amdgpu_device *adev, + int engine, int queue) + +{ + return 0; +} + const struct kfd2kgd_calls gfx_v8_kfd2kgd = { .program_sh_mem_settings = kgd_program_sh_mem_settings, .set_pasid_vmid_mapping = kgd_set_pasid_vmid_mapping, @@ -599,4 +606,5 @@ const struct kfd2kgd_calls gfx_v8_kfd2kgd = { get_atc_vmid_pasid_mapping_info, .set_scratch_backing_va = set_scratch_backing_va, .set_vm_context_page_table_base = set_vm_context_page_table_base, + .hqd_sdma_get_doorbell = kgd_hqd_sdma_get_doorbell, }; From 84f2902bfcfe79e94adf5dfe9bf85ad334a4c63a Mon Sep 17 00:00:00 2001 From: Harold Sun Date: Thu, 19 Jun 2025 14:52:54 -0400 Subject: [PATCH 23/41] drm/amd/display: Added case for when RR equals panel's max RR using freesync [WHY] Rounding error sometimes occurs when the refresh rate is equal to a panel's max refresh rate, causing HDMI compliance failures. [HOW] Added a case so that we round up to avoid v_total_min to be below a panel's minimum bound. Reviewed-by: Jun Lei Signed-off-by: Harold Sun Signed-off-by: Ray Wu Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher (cherry picked from commit fe7645d22bc0f7c1558296538ec49987bf268ef6) --- drivers/gpu/drm/amd/display/dc/dc_hw_types.h | 1 + drivers/gpu/drm/amd/display/modules/freesync/freesync.c | 8 ++++++++ 2 files changed, 9 insertions(+) diff --git a/drivers/gpu/drm/amd/display/dc/dc_hw_types.h b/drivers/gpu/drm/amd/display/dc/dc_hw_types.h index d562ddeca512..c9f6c6275ca1 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_hw_types.h +++ b/drivers/gpu/drm/amd/display/dc/dc_hw_types.h @@ -974,6 +974,7 @@ struct dc_crtc_timing { uint32_t pix_clk_100hz; uint32_t min_refresh_in_uhz; + uint32_t max_refresh_in_uhz; uint32_t vic; uint32_t hdmi_vic; diff --git a/drivers/gpu/drm/amd/display/modules/freesync/freesync.c b/drivers/gpu/drm/amd/display/modules/freesync/freesync.c index 3ba9b62ba70b..250f09922d2f 100644 --- a/drivers/gpu/drm/amd/display/modules/freesync/freesync.c +++ b/drivers/gpu/drm/amd/display/modules/freesync/freesync.c @@ -155,6 +155,14 @@ unsigned int mod_freesync_calc_v_total_from_refresh( v_total = div64_u64(div64_u64(((unsigned long long)( frame_duration_in_ns) * (stream->timing.pix_clk_100hz / 10)), stream->timing.h_total), 1000000); + } else if (refresh_in_uhz >= stream->timing.max_refresh_in_uhz) { + /* When the target refresh rate is the maximum panel refresh rate + * round up the vtotal value to prevent off-by-one error causing + * v_total_min to be below the panel's lower bound + */ + v_total = div64_u64(div64_u64(((unsigned long long)( + frame_duration_in_ns) * (stream->timing.pix_clk_100hz / 10)), + stream->timing.h_total) + (1000000 - 1), 1000000); } else { v_total = div64_u64(div64_u64(((unsigned long long)( frame_duration_in_ns) * (stream->timing.pix_clk_100hz / 10)), From 39d81457ad3417a98ac826161f9ca0e642677661 Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Thu, 19 Jun 2025 09:29:13 -0500 Subject: [PATCH 24/41] drm/amd/display: Don't allow OLED to go down to fully off [Why] OLED panels can be fully off, but this behavior is unexpected. [How] Ensure that minimum luminance is at least 1. Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/4338 Reviewed-by: Alex Hung Signed-off-by: Mario Limonciello Signed-off-by: Ray Wu Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher (cherry picked from commit 51496c7737d06a74b599d0aa7974c3d5a4b1162e) --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 0b8ac9edc070..f58fa5da7fe5 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -3610,13 +3610,15 @@ static void update_connector_ext_caps(struct amdgpu_dm_connector *aconnector) luminance_range = &conn_base->display_info.luminance_range; - if (luminance_range->max_luminance) { - caps->aux_min_input_signal = luminance_range->min_luminance; + if (luminance_range->max_luminance) caps->aux_max_input_signal = luminance_range->max_luminance; - } else { - caps->aux_min_input_signal = 0; + else caps->aux_max_input_signal = 512; - } + + if (luminance_range->min_luminance) + caps->aux_min_input_signal = luminance_range->min_luminance; + else + caps->aux_min_input_signal = 1; min_input_signal_override = drm_get_panel_min_brightness_quirk(aconnector->drm_edid); if (min_input_signal_override >= 0) From 7dfede7d7edd18c0c91ca854cde8eaaf4ccf97ea Mon Sep 17 00:00:00 2001 From: Marko Kiiskila Date: Wed, 18 Jun 2025 15:29:26 -0400 Subject: [PATCH 25/41] drm/vmwgfx: Fix guests running with TDX/SEV Commit 81256a50aa0f ("x86/mm: Make memremap(MEMREMAP_WB) map memory as encrypted by default") changed the default behavior of memremap(MEMREMAP_WB) and started mapping memory as encrypted. The driver requires the fifo memory to be decrypted to communicate with the host but was relaying on the old default behavior of memremap(MEMREMAP_WB) and thus broke. Fix it by explicitly specifying the desired behavior and passing MEMREMAP_DEC to memremap. Fixes: 81256a50aa0f ("x86/mm: Make memremap(MEMREMAP_WB) map memory as encrypted by default") Signed-off-by: Marko Kiiskila Signed-off-by: Zack Rusin Cc: Kirill A. Shutemov Cc: Ingo Molnar Cc: Andrew Morton Cc: Dave Hansen Cc: Linus Torvalds Cc: Peter Zijlstra Cc: linux-mm@kvack.org Cc: linux-kernel@vger.kernel.org Acked-by: Kirill A. Shutemov Link: https://lore.kernel.org/r/20250618192926.1092450-1-zack.rusin@broadcom.com --- drivers/gpu/drm/vmwgfx/vmwgfx_drv.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c index 0695a342b1ef..5205552b1970 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c @@ -749,7 +749,7 @@ static int vmw_setup_pci_resources(struct vmw_private *dev, dev->fifo_mem = devm_memremap(dev->drm.dev, fifo_start, fifo_size, - MEMREMAP_WB); + MEMREMAP_WB | MEMREMAP_DEC); if (IS_ERR(dev->fifo_mem)) { drm_err(&dev->drm, From a5aa7bc1fca78c7fa127d9e33aa94a0c9066c1d6 Mon Sep 17 00:00:00 2001 From: Janusz Krzysztofik Date: Wed, 11 Jun 2025 12:42:13 +0200 Subject: [PATCH 26/41] drm/i915/gt: Fix timeline left held on VMA alloc error The following error has been reported sporadically by CI when a test unbinds the i915 driver on a ring submission platform: <4> [239.330153] ------------[ cut here ]------------ <4> [239.330166] i915 0000:00:02.0: [drm] drm_WARN_ON(dev_priv->mm.shrink_count) <4> [239.330196] WARNING: CPU: 1 PID: 18570 at drivers/gpu/drm/i915/i915_gem.c:1309 i915_gem_cleanup_early+0x13e/0x150 [i915] ... <4> [239.330640] RIP: 0010:i915_gem_cleanup_early+0x13e/0x150 [i915] ... <4> [239.330942] Call Trace: <4> [239.330944] <4> [239.330949] i915_driver_late_release+0x2b/0xa0 [i915] <4> [239.331202] i915_driver_release+0x86/0xa0 [i915] <4> [239.331482] devm_drm_dev_init_release+0x61/0x90 <4> [239.331494] devm_action_release+0x15/0x30 <4> [239.331504] release_nodes+0x3d/0x120 <4> [239.331517] devres_release_all+0x96/0xd0 <4> [239.331533] device_unbind_cleanup+0x12/0x80 <4> [239.331543] device_release_driver_internal+0x23a/0x280 <4> [239.331550] ? bus_find_device+0xa5/0xe0 <4> [239.331563] device_driver_detach+0x14/0x20 ... <4> [357.719679] ---[ end trace 0000000000000000 ]--- If the test also unloads the i915 module then that's followed with: <3> [357.787478] ============================================================================= <3> [357.788006] BUG i915_vma (Tainted: G U W N ): Objects remaining on __kmem_cache_shutdown() <3> [357.788031] ----------------------------------------------------------------------------- <3> [357.788204] Object 0xffff888109e7f480 @offset=29824 <3> [357.788670] Allocated in i915_vma_instance+0xee/0xc10 [i915] age=292729 cpu=4 pid=2244 <4> [357.788994] i915_vma_instance+0xee/0xc10 [i915] <4> [357.789290] init_status_page+0x7b/0x420 [i915] <4> [357.789532] intel_engines_init+0x1d8/0x980 [i915] <4> [357.789772] intel_gt_init+0x175/0x450 [i915] <4> [357.790014] i915_gem_init+0x113/0x340 [i915] <4> [357.790281] i915_driver_probe+0x847/0xed0 [i915] <4> [357.790504] i915_pci_probe+0xe6/0x220 [i915] ... Closer analysis of CI results history has revealed a dependency of the error on a few IGT tests, namely: - igt@api_intel_allocator@fork-simple-stress-signal, - igt@api_intel_allocator@two-level-inception-interruptible, - igt@gem_linear_blits@interruptible, - igt@prime_mmap_coherency@ioctl-errors, which invisibly trigger the issue, then exhibited with first driver unbind attempt. All of the above tests perform actions which are actively interrupted with signals. Further debugging has allowed to narrow that scope down to DRM_IOCTL_I915_GEM_EXECBUFFER2, and ring_context_alloc(), specific to ring submission, in particular. If successful then that function, or its execlists or GuC submission equivalent, is supposed to be called only once per GEM context engine, followed by raise of a flag that prevents the function from being called again. The function is expected to unwind its internal errors itself, so it may be safely called once more after it returns an error. In case of ring submission, the function first gets a reference to the engine's legacy timeline and then allocates a VMA. If the VMA allocation fails, e.g. when i915_vma_instance() called from inside is interrupted with a signal, then ring_context_alloc() fails, leaving the timeline held referenced. On next I915_GEM_EXECBUFFER2 IOCTL, another reference to the timeline is got, and only that last one is put on successful completion. As a consequence, the legacy timeline, with its underlying engine status page's VMA object, is still held and not released on driver unbind. Get the legacy timeline only after successful allocation of the context engine's VMA. v2: Add a note on other submission methods (Krzysztof Karas): Both execlists and GuC submission use lrc_alloc() which seems free from a similar issue. Fixes: 75d0a7f31eec ("drm/i915: Lift timeline into intel_context") Closes: https://gitlab.freedesktop.org/drm/i915/kernel/-/issues/12061 Cc: Chris Wilson Cc: Matthew Auld Cc: Krzysztof Karas Reviewed-by: Sebastian Brzezinka Reviewed-by: Krzysztof Niemiec Signed-off-by: Janusz Krzysztofik Reviewed-by: Nitin Gote Reviewed-by: Andi Shyti Signed-off-by: Andi Shyti Link: https://lore.kernel.org/r/20250611104352.1014011-2-janusz.krzysztofik@linux.intel.com (cherry picked from commit cc43422b3cc79eacff4c5a8ba0d224688ca9dd4f) Signed-off-by: Joonas Lahtinen --- drivers/gpu/drm/i915/gt/intel_ring_submission.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gt/intel_ring_submission.c b/drivers/gpu/drm/i915/gt/intel_ring_submission.c index a876a34455f1..2a6d79abf25b 100644 --- a/drivers/gpu/drm/i915/gt/intel_ring_submission.c +++ b/drivers/gpu/drm/i915/gt/intel_ring_submission.c @@ -610,7 +610,6 @@ static int ring_context_alloc(struct intel_context *ce) /* One ringbuffer to rule them all */ GEM_BUG_ON(!engine->legacy.ring); ce->ring = engine->legacy.ring; - ce->timeline = intel_timeline_get(engine->legacy.timeline); GEM_BUG_ON(ce->state); if (engine->context_size) { @@ -623,6 +622,8 @@ static int ring_context_alloc(struct intel_context *ce) ce->state = vma; } + ce->timeline = intel_timeline_get(engine->legacy.timeline); + return 0; } From 8cadce97bf264ed478669c6f32d5603b34608335 Mon Sep 17 00:00:00 2001 From: Junxiao Chang Date: Fri, 25 Apr 2025 23:11:07 +0800 Subject: [PATCH 27/41] drm/i915/gsc: mei interrupt top half should be in irq disabled context MEI GSC interrupt comes from i915. It has top half and bottom half. Top half is called from i915 interrupt handler. It should be in irq disabled context. With RT kernel, by default i915 IRQ handler is in threaded IRQ. MEI GSC top half might be in threaded IRQ context. generic_handle_irq_safe API could be called from either IRQ or process context, it disables local IRQ then calls MEI GSC interrupt top half. This change fixes A380/A770 GPU boot hang issue with RT kernel. Fixes: 1e3dc1d8622b ("drm/i915/gsc: add gsc as a mei auxiliary device") Tested-by: Furong Zhou Suggested-by: Sebastian Andrzej Siewior Acked-by: Sebastian Andrzej Siewior Signed-off-by: Junxiao Chang Link: https://lore.kernel.org/r/20250425151108.643649-1-junxiao.chang@intel.com Reviewed-by: Rodrigo Vivi Signed-off-by: Rodrigo Vivi (cherry picked from commit dccf655f69002d496a527ba441b4f008aa5bebbf) Signed-off-by: Joonas Lahtinen --- drivers/gpu/drm/i915/gt/intel_gsc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gt/intel_gsc.c b/drivers/gpu/drm/i915/gt/intel_gsc.c index 1e925c75fb08..c43febc862dc 100644 --- a/drivers/gpu/drm/i915/gt/intel_gsc.c +++ b/drivers/gpu/drm/i915/gt/intel_gsc.c @@ -284,7 +284,7 @@ static void gsc_irq_handler(struct intel_gt *gt, unsigned int intf_id) if (gt->gsc.intf[intf_id].irq < 0) return; - ret = generic_handle_irq(gt->gsc.intf[intf_id].irq); + ret = generic_handle_irq_safe(gt->gsc.intf[intf_id].irq); if (ret) gt_err_ratelimited(gt, "error handling GSC irq: %d\n", ret); } From 2d5cff2b4bc567dcaad7ab5b46c973ba534cc062 Mon Sep 17 00:00:00 2001 From: Jia Yao Date: Thu, 12 Jun 2025 22:46:20 +0000 Subject: [PATCH 28/41] drm/xe: Fix out-of-bounds field write in MI_STORE_DATA_IMM According to Bspec, bits 0~9 of MI_STORE_DATA_IMM must not exceed 0x3FE. The macro MI_SDI_NUM_QW(x) evaluates to 2 * x + 1, which means the condition 2 * x + 1 <= 0x3FE must be satisfied. Therefore, the maximum valid value for x is 0x1FE, not 0x1FF. v2 - Replace 0x1fe with macro MAX_PTE_PER_SDI (Auld, Matthew & Patelczyk, Maciej) v3 - Change macro MAX_PTE_PER_SDI from 0x1fe to 0x1feU (De Marchi, Lucas) Bspec: 60246 Fixes: 9c44fd5f6e8a ("drm/xe: Add migrate layer functions for SVM support") Cc: Matthew Brost Cc: Brian3 Nguyen Cc: Alex Zuo Cc: Matthew Auld Cc: Maciej Patelczyk Cc: Lucas De Marchi Suggested-by: Shuicheng Lin Signed-off-by: Jia Yao Reviewed-by: Matthew Brost Reviewed-by: Lucas De Marchi Reviewed-by: Maciej Patelczyk Link: https://lore.kernel.org/r/20250612224620.161105-1-jia.yao@intel.com Signed-off-by: Lucas De Marchi (cherry picked from commit c038bdba98c9f6a36378044a9d4385531a194d3e) Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_migrate.c | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c index 8f8e9fdfb2a8..7acdc4c78866 100644 --- a/drivers/gpu/drm/xe/xe_migrate.c +++ b/drivers/gpu/drm/xe/xe_migrate.c @@ -82,7 +82,7 @@ struct xe_migrate { * of the instruction. Subtracting the instruction header (1 dword) and * address (2 dwords), that leaves 0x3FD dwords (0x1FE qwords) for PTE values. */ -#define MAX_PTE_PER_SDI 0x1FE +#define MAX_PTE_PER_SDI 0x1FEU /** * xe_tile_migrate_exec_queue() - Get this tile's migrate exec queue. @@ -1553,15 +1553,17 @@ static u32 pte_update_cmd_size(u64 size) u64 entries = DIV_U64_ROUND_UP(size, XE_PAGE_SIZE); XE_WARN_ON(size > MAX_PREEMPTDISABLE_TRANSFER); + /* * MI_STORE_DATA_IMM command is used to update page table. Each - * instruction can update maximumly 0x1ff pte entries. To update - * n (n <= 0x1ff) pte entries, we need: - * 1 dword for the MI_STORE_DATA_IMM command header (opcode etc) - * 2 dword for the page table's physical location - * 2*n dword for value of pte to fill (each pte entry is 2 dwords) + * instruction can update maximumly MAX_PTE_PER_SDI pte entries. To + * update n (n <= MAX_PTE_PER_SDI) pte entries, we need: + * + * - 1 dword for the MI_STORE_DATA_IMM command header (opcode etc) + * - 2 dword for the page table's physical location + * - 2*n dword for value of pte to fill (each pte entry is 2 dwords) */ - num_dword = (1 + 2) * DIV_U64_ROUND_UP(entries, 0x1ff); + num_dword = (1 + 2) * DIV_U64_ROUND_UP(entries, MAX_PTE_PER_SDI); num_dword += entries * 2; return num_dword; @@ -1577,7 +1579,7 @@ static void build_pt_update_batch_sram(struct xe_migrate *m, ptes = DIV_ROUND_UP(size, XE_PAGE_SIZE); while (ptes) { - u32 chunk = min(0x1ffU, ptes); + u32 chunk = min(MAX_PTE_PER_SDI, ptes); bb->cs[bb->len++] = MI_STORE_DATA_IMM | MI_SDI_NUM_QW(chunk); bb->cs[bb->len++] = pt_offset; From afcad92411772a1f361339f22c49f855c6cc7d0f Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Wed, 11 Jun 2025 20:19:25 -0700 Subject: [PATCH 29/41] drm/xe: Make WA BB part of LRC BO No idea why, but without this GuC context switches randomly fail when running IGTs in a loop. Need to follow up why this fixes the aforementioned issue but can live with a stable driver for now. Fixes: 617d824c5323 ("drm/xe: Add WA BB to capture active context utilization") Cc: stable@vger.kernel.org Signed-off-by: Matthew Brost Reviewed-by: Lucas De Marchi Tested-by: Shuicheng Lin Link: https://lore.kernel.org/r/20250612031925.4009701-1-matthew.brost@intel.com (cherry picked from commit 3a1edef8f4b58b0ba826bc68bf4bce4bdf59ecf3) Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_lrc.c | 37 ++++++++++++++++--------------- drivers/gpu/drm/xe/xe_lrc_types.h | 3 --- 2 files changed, 19 insertions(+), 21 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_lrc.c b/drivers/gpu/drm/xe/xe_lrc.c index bf7c3981897d..6e7b70532d11 100644 --- a/drivers/gpu/drm/xe/xe_lrc.c +++ b/drivers/gpu/drm/xe/xe_lrc.c @@ -40,6 +40,7 @@ #define LRC_PPHWSP_SIZE SZ_4K #define LRC_INDIRECT_RING_STATE_SIZE SZ_4K +#define LRC_WA_BB_SIZE SZ_4K static struct xe_device * lrc_to_xe(struct xe_lrc *lrc) @@ -910,7 +911,11 @@ static void xe_lrc_finish(struct xe_lrc *lrc) { xe_hw_fence_ctx_finish(&lrc->fence_ctx); xe_bo_unpin_map_no_vm(lrc->bo); - xe_bo_unpin_map_no_vm(lrc->bb_per_ctx_bo); +} + +static size_t wa_bb_offset(struct xe_lrc *lrc) +{ + return lrc->bo->size - LRC_WA_BB_SIZE; } /* @@ -943,15 +948,16 @@ static void xe_lrc_finish(struct xe_lrc *lrc) #define CONTEXT_ACTIVE 1ULL static int xe_lrc_setup_utilization(struct xe_lrc *lrc) { + const size_t max_size = LRC_WA_BB_SIZE; u32 *cmd, *buf = NULL; - if (lrc->bb_per_ctx_bo->vmap.is_iomem) { - buf = kmalloc(lrc->bb_per_ctx_bo->size, GFP_KERNEL); + if (lrc->bo->vmap.is_iomem) { + buf = kmalloc(max_size, GFP_KERNEL); if (!buf) return -ENOMEM; cmd = buf; } else { - cmd = lrc->bb_per_ctx_bo->vmap.vaddr; + cmd = lrc->bo->vmap.vaddr + wa_bb_offset(lrc); } *cmd++ = MI_STORE_REGISTER_MEM | MI_SRM_USE_GGTT | MI_SRM_ADD_CS_OFFSET; @@ -974,13 +980,14 @@ static int xe_lrc_setup_utilization(struct xe_lrc *lrc) *cmd++ = MI_BATCH_BUFFER_END; if (buf) { - xe_map_memcpy_to(gt_to_xe(lrc->gt), &lrc->bb_per_ctx_bo->vmap, 0, - buf, (cmd - buf) * sizeof(*cmd)); + xe_map_memcpy_to(gt_to_xe(lrc->gt), &lrc->bo->vmap, + wa_bb_offset(lrc), buf, + (cmd - buf) * sizeof(*cmd)); kfree(buf); } - xe_lrc_write_ctx_reg(lrc, CTX_BB_PER_CTX_PTR, - xe_bo_ggtt_addr(lrc->bb_per_ctx_bo) | 1); + xe_lrc_write_ctx_reg(lrc, CTX_BB_PER_CTX_PTR, xe_bo_ggtt_addr(lrc->bo) + + wa_bb_offset(lrc) + 1); return 0; } @@ -1018,20 +1025,13 @@ static int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe, * FIXME: Perma-pinning LRC as we don't yet support moving GGTT address * via VM bind calls. */ - lrc->bo = xe_bo_create_pin_map(xe, tile, NULL, lrc_size, + lrc->bo = xe_bo_create_pin_map(xe, tile, NULL, + lrc_size + LRC_WA_BB_SIZE, ttm_bo_type_kernel, bo_flags); if (IS_ERR(lrc->bo)) return PTR_ERR(lrc->bo); - lrc->bb_per_ctx_bo = xe_bo_create_pin_map(xe, tile, NULL, SZ_4K, - ttm_bo_type_kernel, - bo_flags); - if (IS_ERR(lrc->bb_per_ctx_bo)) { - err = PTR_ERR(lrc->bb_per_ctx_bo); - goto err_lrc_finish; - } - lrc->size = lrc_size; lrc->ring.size = ring_size; lrc->ring.tail = 0; @@ -1819,7 +1819,8 @@ struct xe_lrc_snapshot *xe_lrc_snapshot_capture(struct xe_lrc *lrc) snapshot->seqno = xe_lrc_seqno(lrc); snapshot->lrc_bo = xe_bo_get(lrc->bo); snapshot->lrc_offset = xe_lrc_pphwsp_offset(lrc); - snapshot->lrc_size = lrc->bo->size - snapshot->lrc_offset; + snapshot->lrc_size = lrc->bo->size - snapshot->lrc_offset - + LRC_WA_BB_SIZE; snapshot->lrc_snapshot = NULL; snapshot->ctx_timestamp = lower_32_bits(xe_lrc_ctx_timestamp(lrc)); snapshot->ctx_job_timestamp = xe_lrc_ctx_job_timestamp(lrc); diff --git a/drivers/gpu/drm/xe/xe_lrc_types.h b/drivers/gpu/drm/xe/xe_lrc_types.h index ae24cf6f8dd9..883e550a9423 100644 --- a/drivers/gpu/drm/xe/xe_lrc_types.h +++ b/drivers/gpu/drm/xe/xe_lrc_types.h @@ -53,9 +53,6 @@ struct xe_lrc { /** @ctx_timestamp: readout value of CTX_TIMESTAMP on last update */ u64 ctx_timestamp; - - /** @bb_per_ctx_bo: buffer object for per context batch wa buffer */ - struct xe_bo *bb_per_ctx_bo; }; struct xe_lrc_snapshot; From d8390768dcf6f5a78af56aa03797a076871b01f3 Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Wed, 18 Jun 2025 11:49:58 -0700 Subject: [PATCH 30/41] drm/xe/guc_pc: Add _locked variant for min/max freq There are places in which the getters/setters are called one after the other causing a multiple lock()/unlock(). These are not currently a problem since they are all happening from the same thread, but there's a race possibility as calls are added outside of the early init when the max/min and stashed values need to be correlated. Add the _locked() variants to prepare for that. Reviewed-by: Rodrigo Vivi Link: https://lore.kernel.org/r/20250618-wa-22019338487-v5-1-b888388477f2@intel.com Signed-off-by: Lucas De Marchi (cherry picked from commit 1beae9aa2b88d3a02eb666e7b777eb2d7bc645f4) Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_guc_pc.c | 123 ++++++++++++++++++--------------- 1 file changed, 69 insertions(+), 54 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_guc_pc.c b/drivers/gpu/drm/xe/xe_guc_pc.c index 3beaaa7b25c1..2d9663acf2f2 100644 --- a/drivers/gpu/drm/xe/xe_guc_pc.c +++ b/drivers/gpu/drm/xe/xe_guc_pc.c @@ -5,6 +5,7 @@ #include "xe_guc_pc.h" +#include #include #include @@ -553,6 +554,25 @@ u32 xe_guc_pc_get_rpn_freq(struct xe_guc_pc *pc) return pc->rpn_freq; } +static int xe_guc_pc_get_min_freq_locked(struct xe_guc_pc *pc, u32 *freq) +{ + int ret; + + lockdep_assert_held(&pc->freq_lock); + + /* Might be in the middle of a gt reset */ + if (!pc->freq_ready) + return -EAGAIN; + + ret = pc_action_query_task_state(pc); + if (ret) + return ret; + + *freq = pc_get_min_freq(pc); + + return 0; +} + /** * xe_guc_pc_get_min_freq - Get the min operational frequency * @pc: The GuC PC @@ -562,27 +582,29 @@ u32 xe_guc_pc_get_rpn_freq(struct xe_guc_pc *pc) * -EAGAIN if GuC PC not ready (likely in middle of a reset). */ int xe_guc_pc_get_min_freq(struct xe_guc_pc *pc, u32 *freq) +{ + guard(mutex)(&pc->freq_lock); + + return xe_guc_pc_get_min_freq_locked(pc, freq); +} + +static int xe_guc_pc_set_min_freq_locked(struct xe_guc_pc *pc, u32 freq) { int ret; - xe_device_assert_mem_access(pc_to_xe(pc)); + lockdep_assert_held(&pc->freq_lock); - mutex_lock(&pc->freq_lock); - if (!pc->freq_ready) { - /* Might be in the middle of a gt reset */ - ret = -EAGAIN; - goto out; - } + /* Might be in the middle of a gt reset */ + if (!pc->freq_ready) + return -EAGAIN; - ret = pc_action_query_task_state(pc); + ret = pc_set_min_freq(pc, freq); if (ret) - goto out; + return ret; - *freq = pc_get_min_freq(pc); + pc->user_requested_min = freq; -out: - mutex_unlock(&pc->freq_lock); - return ret; + return 0; } /** @@ -595,25 +617,29 @@ int xe_guc_pc_get_min_freq(struct xe_guc_pc *pc, u32 *freq) * -EINVAL if value out of bounds. */ int xe_guc_pc_set_min_freq(struct xe_guc_pc *pc, u32 freq) +{ + guard(mutex)(&pc->freq_lock); + + return xe_guc_pc_set_min_freq_locked(pc, freq); +} + +static int xe_guc_pc_get_max_freq_locked(struct xe_guc_pc *pc, u32 *freq) { int ret; - mutex_lock(&pc->freq_lock); - if (!pc->freq_ready) { - /* Might be in the middle of a gt reset */ - ret = -EAGAIN; - goto out; - } + lockdep_assert_held(&pc->freq_lock); - ret = pc_set_min_freq(pc, freq); + /* Might be in the middle of a gt reset */ + if (!pc->freq_ready) + return -EAGAIN; + + ret = pc_action_query_task_state(pc); if (ret) - goto out; + return ret; - pc->user_requested_min = freq; + *freq = pc_get_max_freq(pc); -out: - mutex_unlock(&pc->freq_lock); - return ret; + return 0; } /** @@ -625,25 +651,29 @@ int xe_guc_pc_set_min_freq(struct xe_guc_pc *pc, u32 freq) * -EAGAIN if GuC PC not ready (likely in middle of a reset). */ int xe_guc_pc_get_max_freq(struct xe_guc_pc *pc, u32 *freq) +{ + guard(mutex)(&pc->freq_lock); + + return xe_guc_pc_get_max_freq_locked(pc, freq); +} + +static int xe_guc_pc_set_max_freq_locked(struct xe_guc_pc *pc, u32 freq) { int ret; - mutex_lock(&pc->freq_lock); - if (!pc->freq_ready) { - /* Might be in the middle of a gt reset */ - ret = -EAGAIN; - goto out; - } + lockdep_assert_held(&pc->freq_lock); - ret = pc_action_query_task_state(pc); + /* Might be in the middle of a gt reset */ + if (!pc->freq_ready) + return -EAGAIN; + + ret = pc_set_max_freq(pc, freq); if (ret) - goto out; + return ret; - *freq = pc_get_max_freq(pc); + pc->user_requested_max = freq; -out: - mutex_unlock(&pc->freq_lock); - return ret; + return 0; } /** @@ -657,24 +687,9 @@ int xe_guc_pc_get_max_freq(struct xe_guc_pc *pc, u32 *freq) */ int xe_guc_pc_set_max_freq(struct xe_guc_pc *pc, u32 freq) { - int ret; + guard(mutex)(&pc->freq_lock); - mutex_lock(&pc->freq_lock); - if (!pc->freq_ready) { - /* Might be in the middle of a gt reset */ - ret = -EAGAIN; - goto out; - } - - ret = pc_set_max_freq(pc, freq); - if (ret) - goto out; - - pc->user_requested_max = freq; - -out: - mutex_unlock(&pc->freq_lock); - return ret; + return xe_guc_pc_set_max_freq_locked(pc, freq); } /** From 4cec9099b93a63eb0b1e8cbbe6419fdaeb72e09b Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Wed, 18 Jun 2025 11:49:59 -0700 Subject: [PATCH 31/41] drm/xe/xe_guc_pc: Lock once to update stashed frequencies pc_set_mert_freq_cap() currently lock()/unlock() the mutex multiple times to stash the current frequencies. It's not a problem since xe_guc_pc_restore_stashed_freq() is guaranteed to be called only later in the init sequence. However, now that we have _locked() variants for this functions, use them and avoid potential issues when called from other places or using the same pattern. While at it, prefer and early return for the WA check to reduce indentation. Reviewed-by: Rodrigo Vivi Link: https://lore.kernel.org/r/20250618-wa-22019338487-v5-2-b888388477f2@intel.com Signed-off-by: Lucas De Marchi (cherry picked from commit d878c97daa603573e5af01fd8beec2fffdb42ad1) Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_guc_pc.c | 39 +++++++++++++++++----------------- 1 file changed, 20 insertions(+), 19 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_guc_pc.c b/drivers/gpu/drm/xe/xe_guc_pc.c index 2d9663acf2f2..9f029acbd257 100644 --- a/drivers/gpu/drm/xe/xe_guc_pc.c +++ b/drivers/gpu/drm/xe/xe_guc_pc.c @@ -885,27 +885,28 @@ static int pc_adjust_requested_freq(struct xe_guc_pc *pc) static int pc_set_mert_freq_cap(struct xe_guc_pc *pc) { - int ret = 0; + int ret; - if (XE_WA(pc_to_gt(pc), 22019338487)) { - /* - * Get updated min/max and stash them. - */ - ret = xe_guc_pc_get_min_freq(pc, &pc->stashed_min_freq); - if (!ret) - ret = xe_guc_pc_get_max_freq(pc, &pc->stashed_max_freq); - if (ret) - return ret; + if (!XE_WA(pc_to_gt(pc), 22019338487)) + return 0; - /* - * Ensure min and max are bound by MERT_FREQ_CAP until driver loads. - */ - mutex_lock(&pc->freq_lock); - ret = pc_set_min_freq(pc, min(pc->rpe_freq, pc_max_freq_cap(pc))); - if (!ret) - ret = pc_set_max_freq(pc, min(pc->rp0_freq, pc_max_freq_cap(pc))); - mutex_unlock(&pc->freq_lock); - } + guard(mutex)(&pc->freq_lock); + + /* + * Get updated min/max and stash them. + */ + ret = xe_guc_pc_get_min_freq_locked(pc, &pc->stashed_min_freq); + if (!ret) + ret = xe_guc_pc_get_max_freq_locked(pc, &pc->stashed_max_freq); + if (ret) + return ret; + + /* + * Ensure min and max are bound by MERT_FREQ_CAP until driver loads. + */ + ret = pc_set_min_freq(pc, min(pc->rpe_freq, pc_max_freq_cap(pc))); + if (!ret) + ret = pc_set_max_freq(pc, min(pc->rp0_freq, pc_max_freq_cap(pc))); return ret; } From a1eec6cae95a1a0888cb8370338822ca81cd9436 Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Wed, 18 Jun 2025 11:50:00 -0700 Subject: [PATCH 32/41] drm/xe: Split xe_device_td_flush() xe_device_td_flush() has 2 possible implementations: an entire L2 flush or a transient flush, depending on WA 16023588340. Make this clear by splitting the function so it calls each of them. Reviewed-by: Matthew Auld Link: https://lore.kernel.org/r/20250618-wa-22019338487-v5-3-b888388477f2@intel.com Signed-off-by: Lucas De Marchi (cherry picked from commit 5e300ed8a545bdffc26b579c526b5fef7b2d5365) Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_device.c | 68 ++++++++++++++++++++-------------- 1 file changed, 40 insertions(+), 28 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c index c02c4c4e9412..a798914644fb 100644 --- a/drivers/gpu/drm/xe/xe_device.c +++ b/drivers/gpu/drm/xe/xe_device.c @@ -986,38 +986,15 @@ void xe_device_wmb(struct xe_device *xe) xe_mmio_write32(xe_root_tile_mmio(xe), VF_CAP_REG, 0); } -/** - * xe_device_td_flush() - Flush transient L3 cache entries - * @xe: The device - * - * Display engine has direct access to memory and is never coherent with L3/L4 - * caches (or CPU caches), however KMD is responsible for specifically flushing - * transient L3 GPU cache entries prior to the flip sequence to ensure scanout - * can happen from such a surface without seeing corruption. - * - * Display surfaces can be tagged as transient by mapping it using one of the - * various L3:XD PAT index modes on Xe2. - * - * Note: On non-discrete xe2 platforms, like LNL, the entire L3 cache is flushed - * at the end of each submission via PIPE_CONTROL for compute/render, since SA - * Media is not coherent with L3 and we want to support render-vs-media - * usescases. For other engines like copy/blt the HW internally forces uncached - * behaviour, hence why we can skip the TDF on such platforms. +/* + * Issue a TRANSIENT_FLUSH_REQUEST and wait for completion on each gt. */ -void xe_device_td_flush(struct xe_device *xe) +static void tdf_request_sync(struct xe_device *xe) { - struct xe_gt *gt; unsigned int fw_ref; + struct xe_gt *gt; u8 id; - if (!IS_DGFX(xe) || GRAPHICS_VER(xe) < 20) - return; - - if (XE_WA(xe_root_mmio_gt(xe), 16023588340)) { - xe_device_l2_flush(xe); - return; - } - for_each_gt(gt, xe, id) { if (xe_gt_is_media_type(gt)) continue; @@ -1027,6 +1004,7 @@ void xe_device_td_flush(struct xe_device *xe) return; xe_mmio_write32(>->mmio, XE2_TDF_CTRL, TRANSIENT_FLUSH_REQUEST); + /* * FIXME: We can likely do better here with our choice of * timeout. Currently we just assume the worst case, i.e. 150us, @@ -1057,15 +1035,49 @@ void xe_device_l2_flush(struct xe_device *xe) return; spin_lock(>->global_invl_lock); - xe_mmio_write32(>->mmio, XE2_GLOBAL_INVAL, 0x1); + xe_mmio_write32(>->mmio, XE2_GLOBAL_INVAL, 0x1); if (xe_mmio_wait32(>->mmio, XE2_GLOBAL_INVAL, 0x1, 0x0, 500, NULL, true)) xe_gt_err_once(gt, "Global invalidation timeout\n"); + spin_unlock(>->global_invl_lock); xe_force_wake_put(gt_to_fw(gt), fw_ref); } +/** + * xe_device_td_flush() - Flush transient L3 cache entries + * @xe: The device + * + * Display engine has direct access to memory and is never coherent with L3/L4 + * caches (or CPU caches), however KMD is responsible for specifically flushing + * transient L3 GPU cache entries prior to the flip sequence to ensure scanout + * can happen from such a surface without seeing corruption. + * + * Display surfaces can be tagged as transient by mapping it using one of the + * various L3:XD PAT index modes on Xe2. + * + * Note: On non-discrete xe2 platforms, like LNL, the entire L3 cache is flushed + * at the end of each submission via PIPE_CONTROL for compute/render, since SA + * Media is not coherent with L3 and we want to support render-vs-media + * usescases. For other engines like copy/blt the HW internally forces uncached + * behaviour, hence why we can skip the TDF on such platforms. + */ +void xe_device_td_flush(struct xe_device *xe) +{ + struct xe_gt *root_gt; + + if (!IS_DGFX(xe) || GRAPHICS_VER(xe) < 20) + return; + + root_gt = xe_root_mmio_gt(xe); + if (XE_WA(root_gt, 16023588340)) + /* A transient flush is not sufficient: flush the L2 */ + xe_device_l2_flush(xe); + else + tdf_request_sync(xe); +} + u32 xe_device_ccs_bytes(struct xe_device *xe, u64 size) { return xe_device_has_flat_ccs(xe) ? From a5c7dcdd969f2248cc91d65e5ac852859fc8dac2 Mon Sep 17 00:00:00 2001 From: Vinay Belgaumkar Date: Thu, 12 Jun 2025 00:09:02 -0700 Subject: [PATCH 33/41] drm/xe/bmg: Update Wa_14022085890 Set GT min frequency to 1200Mhz once driver load is complete. v2: Review comments (Rodrigo) v3: Apply Wa earlier so user_req_min is not clobbered. v4: Apply to all GTs (Lucas) Cc: Matt Roper Cc: Rodrigo Vivi Signed-off-by: Vinay Belgaumkar Reviewed-by: Stuart Summers Link: https://lore.kernel.org/r/20250612-wa-14022085890-v4-3-94ba5dcc1e30@intel.com Signed-off-by: Lucas De Marchi (cherry picked from commit bdde16c9ac5cb56ad2ee19792222fa1853577af7) Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_guc_pc.c | 5 +++++ drivers/gpu/drm/xe/xe_wa_oob.rules | 4 ++++ 2 files changed, 9 insertions(+) diff --git a/drivers/gpu/drm/xe/xe_guc_pc.c b/drivers/gpu/drm/xe/xe_guc_pc.c index 9f029acbd257..9d9d0e815fea 100644 --- a/drivers/gpu/drm/xe/xe_guc_pc.c +++ b/drivers/gpu/drm/xe/xe_guc_pc.c @@ -52,6 +52,7 @@ #define LNL_MERT_FREQ_CAP 800 #define BMG_MERT_FREQ_CAP 2133 +#define BMG_MIN_FREQ 1200 #define SLPC_RESET_TIMEOUT_MS 5 /* roughly 5ms, but no need for precision */ #define SLPC_RESET_EXTENDED_TIMEOUT_MS 1000 /* To be used only at pc_start */ @@ -832,6 +833,7 @@ void xe_guc_pc_init_early(struct xe_guc_pc *pc) static int pc_adjust_freq_bounds(struct xe_guc_pc *pc) { + struct xe_tile *tile = gt_to_tile(pc_to_gt(pc)); int ret; lockdep_assert_held(&pc->freq_lock); @@ -858,6 +860,9 @@ static int pc_adjust_freq_bounds(struct xe_guc_pc *pc) if (pc_get_min_freq(pc) > pc->rp0_freq) ret = pc_set_min_freq(pc, pc->rp0_freq); + if (XE_WA(tile->primary_gt, 14022085890)) + ret = pc_set_min_freq(pc, max(BMG_MIN_FREQ, pc_get_min_freq(pc))); + out: return ret; } diff --git a/drivers/gpu/drm/xe/xe_wa_oob.rules b/drivers/gpu/drm/xe/xe_wa_oob.rules index 9efc5accd43d..320766f6c5df 100644 --- a/drivers/gpu/drm/xe/xe_wa_oob.rules +++ b/drivers/gpu/drm/xe/xe_wa_oob.rules @@ -59,3 +59,7 @@ no_media_l3 MEDIA_VERSION(3000) MEDIA_VERSION_RANGE(1301, 3000) 16026508708 GRAPHICS_VERSION_RANGE(1200, 3001) MEDIA_VERSION_RANGE(1300, 3000) + +# SoC workaround - currently applies to all platforms with the following +# primary GT GMDID +14022085890 GRAPHICS_VERSION(2001) From 84c0b4a00610afbde650fdb8ad6db0424f7b2cc3 Mon Sep 17 00:00:00 2001 From: Vinay Belgaumkar Date: Wed, 18 Jun 2025 11:50:01 -0700 Subject: [PATCH 34/41] drm/xe/bmg: Update Wa_22019338487 Limit GT max frequency to 2600MHz and wait for frequency to reduce before proceeding with a transient flush. This is really only needed for the transient flush: if L2 flush is needed due to 16023588340 then there's no need to do this additional wait since we are already using the bigger hammer. v2: Use generic names, ensure user set max frequency requests wait for flush to complete (Rodrigo) v3: - User requests wait via wait_var_event_timeout (Lucas) - Close races on flush + user requests (Lucas) - Fix xe_guc_pc_remove_flush_freq_limit() being called on last gt rather than root gt (Lucas) v4: - Only apply the freq reducing part if a TDF is needed: L2 flush trumps the need for waiting a lower frequency Fixes: aaa08078e725 ("drm/xe/bmg: Apply Wa_22019338487") Reviewed-by: Rodrigo Vivi Signed-off-by: Vinay Belgaumkar Link: https://lore.kernel.org/r/20250618-wa-22019338487-v5-4-b888388477f2@intel.com Signed-off-by: Lucas De Marchi (cherry picked from commit deea6a7d6d803d6bb874a3e6f1b312e560e6c6df) Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_device.c | 8 +- drivers/gpu/drm/xe/xe_guc_pc.c | 125 +++++++++++++++++++++++++++ drivers/gpu/drm/xe/xe_guc_pc.h | 2 + drivers/gpu/drm/xe/xe_guc_pc_types.h | 2 + 4 files changed, 135 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c index a798914644fb..e9f3c1a53db2 100644 --- a/drivers/gpu/drm/xe/xe_device.c +++ b/drivers/gpu/drm/xe/xe_device.c @@ -40,6 +40,7 @@ #include "xe_gt_printk.h" #include "xe_gt_sriov_vf.h" #include "xe_guc.h" +#include "xe_guc_pc.h" #include "xe_hw_engine_group.h" #include "xe_hwmon.h" #include "xe_irq.h" @@ -1071,11 +1072,14 @@ void xe_device_td_flush(struct xe_device *xe) return; root_gt = xe_root_mmio_gt(xe); - if (XE_WA(root_gt, 16023588340)) + if (XE_WA(root_gt, 16023588340)) { /* A transient flush is not sufficient: flush the L2 */ xe_device_l2_flush(xe); - else + } else { + xe_guc_pc_apply_flush_freq_limit(&root_gt->uc.guc.pc); tdf_request_sync(xe); + xe_guc_pc_remove_flush_freq_limit(&root_gt->uc.guc.pc); + } } u32 xe_device_ccs_bytes(struct xe_device *xe, u64 size) diff --git a/drivers/gpu/drm/xe/xe_guc_pc.c b/drivers/gpu/drm/xe/xe_guc_pc.c index 9d9d0e815fea..c0ca61695d76 100644 --- a/drivers/gpu/drm/xe/xe_guc_pc.c +++ b/drivers/gpu/drm/xe/xe_guc_pc.c @@ -7,7 +7,9 @@ #include #include +#include #include +#include #include #include @@ -53,9 +55,11 @@ #define LNL_MERT_FREQ_CAP 800 #define BMG_MERT_FREQ_CAP 2133 #define BMG_MIN_FREQ 1200 +#define BMG_MERT_FLUSH_FREQ_CAP 2600 #define SLPC_RESET_TIMEOUT_MS 5 /* roughly 5ms, but no need for precision */ #define SLPC_RESET_EXTENDED_TIMEOUT_MS 1000 /* To be used only at pc_start */ +#define SLPC_ACT_FREQ_TIMEOUT_MS 100 /** * DOC: GuC Power Conservation (PC) @@ -143,6 +147,36 @@ static int wait_for_pc_state(struct xe_guc_pc *pc, return -ETIMEDOUT; } +static int wait_for_flush_complete(struct xe_guc_pc *pc) +{ + const unsigned long timeout = msecs_to_jiffies(30); + + if (!wait_var_event_timeout(&pc->flush_freq_limit, + !atomic_read(&pc->flush_freq_limit), + timeout)) + return -ETIMEDOUT; + + return 0; +} + +static int wait_for_act_freq_limit(struct xe_guc_pc *pc, u32 freq) +{ + int timeout_us = SLPC_ACT_FREQ_TIMEOUT_MS * USEC_PER_MSEC; + int slept, wait = 10; + + for (slept = 0; slept < timeout_us;) { + if (xe_guc_pc_get_act_freq(pc) <= freq) + return 0; + + usleep_range(wait, wait << 1); + slept += wait; + wait <<= 1; + if (slept + wait > timeout_us) + wait = timeout_us - slept; + } + + return -ETIMEDOUT; +} static int pc_action_reset(struct xe_guc_pc *pc) { struct xe_guc_ct *ct = pc_to_ct(pc); @@ -688,6 +722,11 @@ static int xe_guc_pc_set_max_freq_locked(struct xe_guc_pc *pc, u32 freq) */ int xe_guc_pc_set_max_freq(struct xe_guc_pc *pc, u32 freq) { + if (XE_WA(pc_to_gt(pc), 22019338487)) { + if (wait_for_flush_complete(pc) != 0) + return -EAGAIN; + } + guard(mutex)(&pc->freq_lock); return xe_guc_pc_set_max_freq_locked(pc, freq); @@ -888,6 +927,92 @@ static int pc_adjust_requested_freq(struct xe_guc_pc *pc) return ret; } +static bool needs_flush_freq_limit(struct xe_guc_pc *pc) +{ + struct xe_gt *gt = pc_to_gt(pc); + + return XE_WA(gt, 22019338487) && + pc->rp0_freq > BMG_MERT_FLUSH_FREQ_CAP; +} + +/** + * xe_guc_pc_apply_flush_freq_limit() - Limit max GT freq during L2 flush + * @pc: the xe_guc_pc object + * + * As per the WA, reduce max GT frequency during L2 cache flush + */ +void xe_guc_pc_apply_flush_freq_limit(struct xe_guc_pc *pc) +{ + struct xe_gt *gt = pc_to_gt(pc); + u32 max_freq; + int ret; + + if (!needs_flush_freq_limit(pc)) + return; + + guard(mutex)(&pc->freq_lock); + + ret = xe_guc_pc_get_max_freq_locked(pc, &max_freq); + if (!ret && max_freq > BMG_MERT_FLUSH_FREQ_CAP) { + ret = pc_set_max_freq(pc, BMG_MERT_FLUSH_FREQ_CAP); + if (ret) { + xe_gt_err_once(gt, "Failed to cap max freq on flush to %u, %pe\n", + BMG_MERT_FLUSH_FREQ_CAP, ERR_PTR(ret)); + return; + } + + atomic_set(&pc->flush_freq_limit, 1); + + /* + * If user has previously changed max freq, stash that value to + * restore later, otherwise use the current max. New user + * requests wait on flush. + */ + if (pc->user_requested_max != 0) + pc->stashed_max_freq = pc->user_requested_max; + else + pc->stashed_max_freq = max_freq; + } + + /* + * Wait for actual freq to go below the flush cap: even if the previous + * max was below cap, the current one might still be above it + */ + ret = wait_for_act_freq_limit(pc, BMG_MERT_FLUSH_FREQ_CAP); + if (ret) + xe_gt_err_once(gt, "Actual freq did not reduce to %u, %pe\n", + BMG_MERT_FLUSH_FREQ_CAP, ERR_PTR(ret)); +} + +/** + * xe_guc_pc_remove_flush_freq_limit() - Remove max GT freq limit after L2 flush completes. + * @pc: the xe_guc_pc object + * + * Retrieve the previous GT max frequency value. + */ +void xe_guc_pc_remove_flush_freq_limit(struct xe_guc_pc *pc) +{ + struct xe_gt *gt = pc_to_gt(pc); + int ret = 0; + + if (!needs_flush_freq_limit(pc)) + return; + + if (!atomic_read(&pc->flush_freq_limit)) + return; + + mutex_lock(&pc->freq_lock); + + ret = pc_set_max_freq(>->uc.guc.pc, pc->stashed_max_freq); + if (ret) + xe_gt_err_once(gt, "Failed to restore max freq %u:%d", + pc->stashed_max_freq, ret); + + atomic_set(&pc->flush_freq_limit, 0); + mutex_unlock(&pc->freq_lock); + wake_up_var(&pc->flush_freq_limit); +} + static int pc_set_mert_freq_cap(struct xe_guc_pc *pc) { int ret; diff --git a/drivers/gpu/drm/xe/xe_guc_pc.h b/drivers/gpu/drm/xe/xe_guc_pc.h index 0a2664d5c811..52ecdd5ddbff 100644 --- a/drivers/gpu/drm/xe/xe_guc_pc.h +++ b/drivers/gpu/drm/xe/xe_guc_pc.h @@ -38,5 +38,7 @@ u64 xe_guc_pc_mc6_residency(struct xe_guc_pc *pc); void xe_guc_pc_init_early(struct xe_guc_pc *pc); int xe_guc_pc_restore_stashed_freq(struct xe_guc_pc *pc); void xe_guc_pc_raise_unslice(struct xe_guc_pc *pc); +void xe_guc_pc_apply_flush_freq_limit(struct xe_guc_pc *pc); +void xe_guc_pc_remove_flush_freq_limit(struct xe_guc_pc *pc); #endif /* _XE_GUC_PC_H_ */ diff --git a/drivers/gpu/drm/xe/xe_guc_pc_types.h b/drivers/gpu/drm/xe/xe_guc_pc_types.h index 2978ac9a249b..c02053948a57 100644 --- a/drivers/gpu/drm/xe/xe_guc_pc_types.h +++ b/drivers/gpu/drm/xe/xe_guc_pc_types.h @@ -15,6 +15,8 @@ struct xe_guc_pc { /** @bo: GGTT buffer object that is shared with GuC PC */ struct xe_bo *bo; + /** @flush_freq_limit: 1 when max freq changes are limited by driver */ + atomic_t flush_freq_limit; /** @rp0_freq: HW RP0 frequency - The Maximum one */ u32 rp0_freq; /** @rpa_freq: HW RPa frequency - The Achievable one */ From de6acfdc390ec2eb64d43186721038ddc93228b3 Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Wed, 11 Jun 2025 12:38:37 -0700 Subject: [PATCH 35/41] drm/xe: Fix kconfig prompt The xe driver is the official driver for Intel Xe2 and later, while maintaining experimental support for earlier GPUs. Reword the help message accordingly. Reviewed-by: Maarten Lankhorst Link: https://lore.kernel.org/r/20250611-xe-kconfig-help-v1-1-8bcc6b47d11a@intel.com Signed-off-by: Lucas De Marchi (cherry picked from commit 1488a3089de3d0bcdc9532da7ce04cf0af9d7dd0) Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/Kconfig | 5 +++-- drivers/gpu/drm/xe/xe_drv.h | 2 +- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/xe/Kconfig b/drivers/gpu/drm/xe/Kconfig index fcc2677a4229..884a3199b5be 100644 --- a/drivers/gpu/drm/xe/Kconfig +++ b/drivers/gpu/drm/xe/Kconfig @@ -1,6 +1,6 @@ # SPDX-License-Identifier: GPL-2.0-only config DRM_XE - tristate "Intel Xe Graphics" + tristate "Intel Xe2 Graphics" depends on DRM && PCI && (m || (y && KUNIT=y)) depends on INTEL_VSEC || !INTEL_VSEC depends on X86_PLATFORM_DEVICES || !(X86 && ACPI) @@ -46,7 +46,8 @@ config DRM_XE select AUXILIARY_BUS select HMM_MIRROR help - Experimental driver for Intel Xe series GPUs + Driver for Intel Xe2 series GPUs and later. Experimental support + for Xe series is also available. If "M" is selected, the module will be called xe. diff --git a/drivers/gpu/drm/xe/xe_drv.h b/drivers/gpu/drm/xe/xe_drv.h index d61650d4aa0b..95242a375e54 100644 --- a/drivers/gpu/drm/xe/xe_drv.h +++ b/drivers/gpu/drm/xe/xe_drv.h @@ -9,7 +9,7 @@ #include #define DRIVER_NAME "xe" -#define DRIVER_DESC "Intel Xe Graphics" +#define DRIVER_DESC "Intel Xe2 Graphics" /* Interface history: * From aa18d5769fcafe645a3ba01a9a69dde4f8dc8cc3 Mon Sep 17 00:00:00 2001 From: Harry Austen Date: Fri, 27 Jun 2025 13:30:35 -0700 Subject: [PATCH 36/41] drm/xe: Allow dropping kunit dependency as built-in MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix Kconfig symbol dependency on KUNIT, which isn't actually required for XE to be built-in. However, if KUNIT is enabled, it must be built-in too. Fixes: 08987a8b6820 ("drm/xe: Fix build with KUNIT=m") Cc: Lucas De Marchi Cc: Thomas Hellström Cc: Jani Nikula Cc: Maarten Lankhorst Signed-off-by: Harry Austen Reviewed-by: Lucas De Marchi Acked-by: Randy Dunlap Tested-by: Randy Dunlap Link: https://lore.kernel.org/r/20250627-xe-kunit-v2-2-756fe5cd56cf@intel.com Signed-off-by: Lucas De Marchi (cherry picked from commit a559434880b320b83733d739733250815aecf1b0) Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/Kconfig | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/Kconfig b/drivers/gpu/drm/xe/Kconfig index 884a3199b5be..99a91355842e 100644 --- a/drivers/gpu/drm/xe/Kconfig +++ b/drivers/gpu/drm/xe/Kconfig @@ -1,7 +1,8 @@ # SPDX-License-Identifier: GPL-2.0-only config DRM_XE tristate "Intel Xe2 Graphics" - depends on DRM && PCI && (m || (y && KUNIT=y)) + depends on DRM && PCI + depends on KUNIT || !KUNIT depends on INTEL_VSEC || !INTEL_VSEC depends on X86_PLATFORM_DEVICES || !(X86 && ACPI) select INTERVAL_TREE From 8af39ec5cf2be522c8eb43a3d8005ed59e4daaee Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Fri, 13 Jun 2025 14:20:13 -0700 Subject: [PATCH 37/41] drm/sched: Increment job count before swapping tail spsc queue MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A small race exists between spsc_queue_push and the run-job worker, in which spsc_queue_push may return not-first while the run-job worker has already idled due to the job count being zero. If this race occurs, job scheduling stops, leading to hangs while waiting on the job’s DMA fences. Seal this race by incrementing the job count before appending to the SPSC queue. This race was observed on a drm-tip 6.16-rc1 build with the Xe driver in an SVM test case. Fixes: 1b1f42d8fde4 ("drm: move amd_gpu_scheduler into common location") Fixes: 27105db6c63a ("drm/amdgpu: Add SPSC queue to scheduler.") Cc: stable@vger.kernel.org Signed-off-by: Matthew Brost Reviewed-by: Jonathan Cavitt Link: https://lore.kernel.org/r/20250613212013.719312-1-matthew.brost@intel.com --- include/drm/spsc_queue.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/include/drm/spsc_queue.h b/include/drm/spsc_queue.h index 125f096c88cb..ee9df8cc67b7 100644 --- a/include/drm/spsc_queue.h +++ b/include/drm/spsc_queue.h @@ -70,9 +70,11 @@ static inline bool spsc_queue_push(struct spsc_queue *queue, struct spsc_node *n preempt_disable(); + atomic_inc(&queue->job_count); + smp_mb__after_atomic(); + tail = (struct spsc_node **)atomic_long_xchg(&queue->tail, (long)&node->next); WRITE_ONCE(*tail, node); - atomic_inc(&queue->job_count); /* * In case of first element verify new node will be visible to the consumer From 5307dce878d4126e1b375587318955bd019c3741 Mon Sep 17 00:00:00 2001 From: Thomas Zimmermann Date: Mon, 30 Jun 2025 10:36:47 +0200 Subject: [PATCH 38/41] drm/gem: Acquire references on GEM handles for framebuffers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A GEM handle can be released while the GEM buffer object is attached to a DRM framebuffer. This leads to the release of the dma-buf backing the buffer object, if any. [1] Trying to use the framebuffer in further mode-setting operations leads to a segmentation fault. Most easily happens with driver that use shadow planes for vmap-ing the dma-buf during a page flip. An example is shown below. [ 156.791968] ------------[ cut here ]------------ [ 156.796830] WARNING: CPU: 2 PID: 2255 at drivers/dma-buf/dma-buf.c:1527 dma_buf_vmap+0x224/0x430 [...] [ 156.942028] RIP: 0010:dma_buf_vmap+0x224/0x430 [ 157.043420] Call Trace: [ 157.045898] [ 157.048030] ? show_trace_log_lvl+0x1af/0x2c0 [ 157.052436] ? show_trace_log_lvl+0x1af/0x2c0 [ 157.056836] ? show_trace_log_lvl+0x1af/0x2c0 [ 157.061253] ? drm_gem_shmem_vmap+0x74/0x710 [ 157.065567] ? dma_buf_vmap+0x224/0x430 [ 157.069446] ? __warn.cold+0x58/0xe4 [ 157.073061] ? dma_buf_vmap+0x224/0x430 [ 157.077111] ? report_bug+0x1dd/0x390 [ 157.080842] ? handle_bug+0x5e/0xa0 [ 157.084389] ? exc_invalid_op+0x14/0x50 [ 157.088291] ? asm_exc_invalid_op+0x16/0x20 [ 157.092548] ? dma_buf_vmap+0x224/0x430 [ 157.096663] ? dma_resv_get_singleton+0x6d/0x230 [ 157.101341] ? __pfx_dma_buf_vmap+0x10/0x10 [ 157.105588] ? __pfx_dma_resv_get_singleton+0x10/0x10 [ 157.110697] drm_gem_shmem_vmap+0x74/0x710 [ 157.114866] drm_gem_vmap+0xa9/0x1b0 [ 157.118763] drm_gem_vmap_unlocked+0x46/0xa0 [ 157.123086] drm_gem_fb_vmap+0xab/0x300 [ 157.126979] drm_atomic_helper_prepare_planes.part.0+0x487/0xb10 [ 157.133032] ? lockdep_init_map_type+0x19d/0x880 [ 157.137701] drm_atomic_helper_commit+0x13d/0x2e0 [ 157.142671] ? drm_atomic_nonblocking_commit+0xa0/0x180 [ 157.147988] drm_mode_atomic_ioctl+0x766/0xe40 [...] [ 157.346424] ---[ end trace 0000000000000000 ]--- Acquiring GEM handles for the framebuffer's GEM buffer objects prevents this from happening. The framebuffer's cleanup later puts the handle references. Commit 1a148af06000 ("drm/gem-shmem: Use dma_buf from GEM object instance") triggers the segmentation fault easily by using the dma-buf field more widely. The underlying issue with reference counting has been present before. v2: - acquire the handle instead of the BO (Christian) - fix comment style (Christian) - drop the Fixes tag (Christian) - rename err_ gotos - add missing Link tag Suggested-by: Christian König Signed-off-by: Thomas Zimmermann Link: https://elixir.bootlin.com/linux/v6.15/source/drivers/gpu/drm/drm_gem.c#L241 # [1] Cc: Thomas Zimmermann Cc: Anusha Srivatsa Cc: Christian König Cc: Maarten Lankhorst Cc: Maxime Ripard Cc: Sumit Semwal Cc: "Christian König" Cc: linux-media@vger.kernel.org Cc: dri-devel@lists.freedesktop.org Cc: linaro-mm-sig@lists.linaro.org Cc: Reviewed-by: Christian König Link: https://lore.kernel.org/r/20250630084001.293053-1-tzimmermann@suse.de --- drivers/gpu/drm/drm_gem.c | 44 ++++++++++++++++++-- drivers/gpu/drm/drm_gem_framebuffer_helper.c | 16 +++---- drivers/gpu/drm/drm_internal.h | 2 + 3 files changed, 51 insertions(+), 11 deletions(-) diff --git a/drivers/gpu/drm/drm_gem.c b/drivers/gpu/drm/drm_gem.c index 1e659d2660f7..4bf0a76bb35e 100644 --- a/drivers/gpu/drm/drm_gem.c +++ b/drivers/gpu/drm/drm_gem.c @@ -212,6 +212,35 @@ void drm_gem_private_object_fini(struct drm_gem_object *obj) } EXPORT_SYMBOL(drm_gem_private_object_fini); +static void drm_gem_object_handle_get(struct drm_gem_object *obj) +{ + struct drm_device *dev = obj->dev; + + drm_WARN_ON(dev, !mutex_is_locked(&dev->object_name_lock)); + + if (obj->handle_count++ == 0) + drm_gem_object_get(obj); +} + +/** + * drm_gem_object_handle_get_unlocked - acquire reference on user-space handles + * @obj: GEM object + * + * Acquires a reference on the GEM buffer object's handle. Required + * to keep the GEM object alive. Call drm_gem_object_handle_put_unlocked() + * to release the reference. + */ +void drm_gem_object_handle_get_unlocked(struct drm_gem_object *obj) +{ + struct drm_device *dev = obj->dev; + + guard(mutex)(&dev->object_name_lock); + + drm_WARN_ON(dev, !obj->handle_count); /* first ref taken in create-tail helper */ + drm_gem_object_handle_get(obj); +} +EXPORT_SYMBOL(drm_gem_object_handle_get_unlocked); + /** * drm_gem_object_handle_free - release resources bound to userspace handles * @obj: GEM object to clean up. @@ -242,8 +271,14 @@ static void drm_gem_object_exported_dma_buf_free(struct drm_gem_object *obj) } } -static void -drm_gem_object_handle_put_unlocked(struct drm_gem_object *obj) +/** + * drm_gem_object_handle_put_unlocked - releases reference on user-space handles + * @obj: GEM object + * + * Releases a reference on the GEM buffer object's handle. Possibly releases + * the GEM buffer object and associated dma-buf objects. + */ +void drm_gem_object_handle_put_unlocked(struct drm_gem_object *obj) { struct drm_device *dev = obj->dev; bool final = false; @@ -268,6 +303,7 @@ drm_gem_object_handle_put_unlocked(struct drm_gem_object *obj) if (final) drm_gem_object_put(obj); } +EXPORT_SYMBOL(drm_gem_object_handle_put_unlocked); /* * Called at device or object close to release the file's @@ -389,8 +425,8 @@ drm_gem_handle_create_tail(struct drm_file *file_priv, int ret; WARN_ON(!mutex_is_locked(&dev->object_name_lock)); - if (obj->handle_count++ == 0) - drm_gem_object_get(obj); + + drm_gem_object_handle_get(obj); /* * Get the user-visible handle using idr. Preload and perform diff --git a/drivers/gpu/drm/drm_gem_framebuffer_helper.c b/drivers/gpu/drm/drm_gem_framebuffer_helper.c index 6f72e7a0f427..14a87788695d 100644 --- a/drivers/gpu/drm/drm_gem_framebuffer_helper.c +++ b/drivers/gpu/drm/drm_gem_framebuffer_helper.c @@ -99,7 +99,7 @@ void drm_gem_fb_destroy(struct drm_framebuffer *fb) unsigned int i; for (i = 0; i < fb->format->num_planes; i++) - drm_gem_object_put(fb->obj[i]); + drm_gem_object_handle_put_unlocked(fb->obj[i]); drm_framebuffer_cleanup(fb); kfree(fb); @@ -182,8 +182,10 @@ int drm_gem_fb_init_with_funcs(struct drm_device *dev, if (!objs[i]) { drm_dbg_kms(dev, "Failed to lookup GEM object\n"); ret = -ENOENT; - goto err_gem_object_put; + goto err_gem_object_handle_put_unlocked; } + drm_gem_object_handle_get_unlocked(objs[i]); + drm_gem_object_put(objs[i]); min_size = (height - 1) * mode_cmd->pitches[i] + drm_format_info_min_pitch(info, i, width) @@ -193,22 +195,22 @@ int drm_gem_fb_init_with_funcs(struct drm_device *dev, drm_dbg_kms(dev, "GEM object size (%zu) smaller than minimum size (%u) for plane %d\n", objs[i]->size, min_size, i); - drm_gem_object_put(objs[i]); + drm_gem_object_handle_put_unlocked(objs[i]); ret = -EINVAL; - goto err_gem_object_put; + goto err_gem_object_handle_put_unlocked; } } ret = drm_gem_fb_init(dev, fb, mode_cmd, objs, i, funcs); if (ret) - goto err_gem_object_put; + goto err_gem_object_handle_put_unlocked; return 0; -err_gem_object_put: +err_gem_object_handle_put_unlocked: while (i > 0) { --i; - drm_gem_object_put(objs[i]); + drm_gem_object_handle_put_unlocked(objs[i]); } return ret; } diff --git a/drivers/gpu/drm/drm_internal.h b/drivers/gpu/drm/drm_internal.h index e44f28fd81d3..be77d61a16ce 100644 --- a/drivers/gpu/drm/drm_internal.h +++ b/drivers/gpu/drm/drm_internal.h @@ -161,6 +161,8 @@ void drm_sysfs_lease_event(struct drm_device *dev); /* drm_gem.c */ int drm_gem_init(struct drm_device *dev); +void drm_gem_object_handle_get_unlocked(struct drm_gem_object *obj); +void drm_gem_object_handle_put_unlocked(struct drm_gem_object *obj); int drm_gem_handle_create_tail(struct drm_file *file_priv, struct drm_gem_object *obj, u32 *handlep); From 226862f50a7a88e4e4de9abbf36c64d19acd6fd0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ma=C3=ADra=20Canal?= Date: Sat, 28 Jun 2025 19:42:42 -0300 Subject: [PATCH 39/41] drm/v3d: Disable interrupts before resetting the GPU MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently, an interrupt can be triggered during a GPU reset, which can lead to GPU hangs and NULL pointer dereference in an interrupt context as shown in the following trace: [ 314.035040] Unable to handle kernel NULL pointer dereference at virtual address 00000000000000c0 [ 314.043822] Mem abort info: [ 314.046606] ESR = 0x0000000096000005 [ 314.050347] EC = 0x25: DABT (current EL), IL = 32 bits [ 314.055651] SET = 0, FnV = 0 [ 314.058695] EA = 0, S1PTW = 0 [ 314.061826] FSC = 0x05: level 1 translation fault [ 314.066694] Data abort info: [ 314.069564] ISV = 0, ISS = 0x00000005, ISS2 = 0x00000000 [ 314.075039] CM = 0, WnR = 0, TnD = 0, TagAccess = 0 [ 314.080080] GCS = 0, Overlay = 0, DirtyBit = 0, Xs = 0 [ 314.085382] user pgtable: 4k pages, 39-bit VAs, pgdp=0000000102728000 [ 314.091814] [00000000000000c0] pgd=0000000000000000, p4d=0000000000000000, pud=0000000000000000 [ 314.100511] Internal error: Oops: 0000000096000005 [#1] PREEMPT SMP [ 314.106770] Modules linked in: v3d i2c_brcmstb vc4 snd_soc_hdmi_codec gpu_sched drm_shmem_helper drm_display_helper cec drm_dma_helper drm_kms_helper drm drm_panel_orientation_quirks snd_soc_core snd_compress snd_pcm_dmaengine snd_pcm snd_timer snd backlight [ 314.129654] CPU: 0 UID: 0 PID: 0 Comm: swapper/0 Not tainted 6.12.25+rpt-rpi-v8 #1 Debian 1:6.12.25-1+rpt1 [ 314.139388] Hardware name: Raspberry Pi 4 Model B Rev 1.4 (DT) [ 314.145211] pstate: 600000c5 (nZCv daIF -PAN -UAO -TCO -DIT -SSBS BTYPE=--) [ 314.152165] pc : v3d_irq+0xec/0x2e0 [v3d] [ 314.156187] lr : v3d_irq+0xe0/0x2e0 [v3d] [ 314.160198] sp : ffffffc080003ea0 [ 314.163502] x29: ffffffc080003ea0 x28: ffffffec1f184980 x27: 021202b000000000 [ 314.170633] x26: ffffffec1f17f630 x25: ffffff8101372000 x24: ffffffec1f17d9f0 [ 314.177764] x23: 000000000000002a x22: 000000000000002a x21: ffffff8103252000 [ 314.184895] x20: 0000000000000001 x19: 00000000deadbeef x18: 0000000000000000 [ 314.192026] x17: ffffff94e51d2000 x16: ffffffec1dac3cb0 x15: c306000000000000 [ 314.199156] x14: 0000000000000000 x13: b2fc982e03cc5168 x12: 0000000000000001 [ 314.206286] x11: ffffff8103f8bcc0 x10: ffffffec1f196868 x9 : ffffffec1dac3874 [ 314.213416] x8 : 0000000000000000 x7 : 0000000000042a3a x6 : ffffff810017a180 [ 314.220547] x5 : ffffffec1ebad400 x4 : ffffffec1ebad320 x3 : 00000000000bebeb [ 314.227677] x2 : 0000000000000000 x1 : 0000000000000000 x0 : 0000000000000000 [ 314.234807] Call trace: [ 314.237243] v3d_irq+0xec/0x2e0 [v3d] [ 314.240906] __handle_irq_event_percpu+0x58/0x218 [ 314.245609] handle_irq_event+0x54/0xb8 [ 314.249439] handle_fasteoi_irq+0xac/0x240 [ 314.253527] handle_irq_desc+0x48/0x68 [ 314.257269] generic_handle_domain_irq+0x24/0x38 [ 314.261879] gic_handle_irq+0x48/0xd8 [ 314.265533] call_on_irq_stack+0x24/0x58 [ 314.269448] do_interrupt_handler+0x88/0x98 [ 314.273624] el1_interrupt+0x34/0x68 [ 314.277193] el1h_64_irq_handler+0x18/0x28 [ 314.281281] el1h_64_irq+0x64/0x68 [ 314.284673] default_idle_call+0x3c/0x168 [ 314.288675] do_idle+0x1fc/0x230 [ 314.291895] cpu_startup_entry+0x3c/0x50 [ 314.295810] rest_init+0xe4/0xf0 [ 314.299030] start_kernel+0x5e8/0x790 [ 314.302684] __primary_switched+0x80/0x90 [ 314.306691] Code: 940029eb 360ffc13 f9442ea0 52800001 (f9406017) [ 314.312775] ---[ end trace 0000000000000000 ]--- [ 314.317384] Kernel panic - not syncing: Oops: Fatal exception in interrupt [ 314.324249] SMP: stopping secondary CPUs [ 314.328167] Kernel Offset: 0x2b9da00000 from 0xffffffc080000000 [ 314.334076] PHYS_OFFSET: 0x0 [ 314.336946] CPU features: 0x08,00002013,c0200000,0200421b [ 314.342337] Memory Limit: none [ 314.345382] ---[ end Kernel panic - not syncing: Oops: Fatal exception in interrupt ]--- Before resetting the GPU, it's necessary to disable all interrupts and deal with any interrupt handler still in-flight. Otherwise, the GPU might reset with jobs still running, or yet, an interrupt could be handled during the reset. Cc: stable@vger.kernel.org Fixes: 57692c94dcbe ("drm/v3d: Introduce a new DRM driver for Broadcom V3D V3.x+") Reviewed-by: Juan A. Suarez Reviewed-by: Iago Toral Quiroga Link: https://lore.kernel.org/r/20250628224243.47599-1-mcanal@igalia.com Signed-off-by: Maíra Canal --- drivers/gpu/drm/v3d/v3d_drv.h | 8 ++++++++ drivers/gpu/drm/v3d/v3d_gem.c | 2 ++ drivers/gpu/drm/v3d/v3d_irq.c | 37 +++++++++++++++++++++++++---------- 3 files changed, 37 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/v3d/v3d_drv.h b/drivers/gpu/drm/v3d/v3d_drv.h index b51f0b648a08..411e47702f8a 100644 --- a/drivers/gpu/drm/v3d/v3d_drv.h +++ b/drivers/gpu/drm/v3d/v3d_drv.h @@ -101,6 +101,12 @@ enum v3d_gen { V3D_GEN_71 = 71, }; +enum v3d_irq { + V3D_CORE_IRQ, + V3D_HUB_IRQ, + V3D_MAX_IRQS, +}; + struct v3d_dev { struct drm_device drm; @@ -112,6 +118,8 @@ struct v3d_dev { bool single_irq_line; + int irq[V3D_MAX_IRQS]; + struct v3d_perfmon_info perfmon_info; void __iomem *hub_regs; diff --git a/drivers/gpu/drm/v3d/v3d_gem.c b/drivers/gpu/drm/v3d/v3d_gem.c index d7d16da78db3..37bf5eecdd2c 100644 --- a/drivers/gpu/drm/v3d/v3d_gem.c +++ b/drivers/gpu/drm/v3d/v3d_gem.c @@ -134,6 +134,8 @@ v3d_reset(struct v3d_dev *v3d) if (false) v3d_idle_axi(v3d, 0); + v3d_irq_disable(v3d); + v3d_idle_gca(v3d); v3d_reset_sms(v3d); v3d_reset_v3d(v3d); diff --git a/drivers/gpu/drm/v3d/v3d_irq.c b/drivers/gpu/drm/v3d/v3d_irq.c index 2cca5d3a26a2..a515a301e480 100644 --- a/drivers/gpu/drm/v3d/v3d_irq.c +++ b/drivers/gpu/drm/v3d/v3d_irq.c @@ -260,7 +260,7 @@ v3d_hub_irq(int irq, void *arg) int v3d_irq_init(struct v3d_dev *v3d) { - int irq1, ret, core; + int irq, ret, core; INIT_WORK(&v3d->overflow_mem_work, v3d_overflow_mem_work); @@ -271,17 +271,24 @@ v3d_irq_init(struct v3d_dev *v3d) V3D_CORE_WRITE(core, V3D_CTL_INT_CLR, V3D_CORE_IRQS(v3d->ver)); V3D_WRITE(V3D_HUB_INT_CLR, V3D_HUB_IRQS(v3d->ver)); - irq1 = platform_get_irq_optional(v3d_to_pdev(v3d), 1); - if (irq1 == -EPROBE_DEFER) - return irq1; - if (irq1 > 0) { - ret = devm_request_irq(v3d->drm.dev, irq1, + irq = platform_get_irq_optional(v3d_to_pdev(v3d), 1); + if (irq == -EPROBE_DEFER) + return irq; + if (irq > 0) { + v3d->irq[V3D_CORE_IRQ] = irq; + + ret = devm_request_irq(v3d->drm.dev, v3d->irq[V3D_CORE_IRQ], v3d_irq, IRQF_SHARED, "v3d_core0", v3d); if (ret) goto fail; - ret = devm_request_irq(v3d->drm.dev, - platform_get_irq(v3d_to_pdev(v3d), 0), + + irq = platform_get_irq(v3d_to_pdev(v3d), 0); + if (irq < 0) + return irq; + v3d->irq[V3D_HUB_IRQ] = irq; + + ret = devm_request_irq(v3d->drm.dev, v3d->irq[V3D_HUB_IRQ], v3d_hub_irq, IRQF_SHARED, "v3d_hub", v3d); if (ret) @@ -289,8 +296,12 @@ v3d_irq_init(struct v3d_dev *v3d) } else { v3d->single_irq_line = true; - ret = devm_request_irq(v3d->drm.dev, - platform_get_irq(v3d_to_pdev(v3d), 0), + irq = platform_get_irq(v3d_to_pdev(v3d), 0); + if (irq < 0) + return irq; + v3d->irq[V3D_CORE_IRQ] = irq; + + ret = devm_request_irq(v3d->drm.dev, v3d->irq[V3D_CORE_IRQ], v3d_irq, IRQF_SHARED, "v3d", v3d); if (ret) @@ -331,6 +342,12 @@ v3d_irq_disable(struct v3d_dev *v3d) V3D_CORE_WRITE(core, V3D_CTL_INT_MSK_SET, ~0); V3D_WRITE(V3D_HUB_INT_MSK_SET, ~0); + /* Finish any interrupt handler still in flight. */ + for (int i = 0; i < V3D_MAX_IRQS; i++) { + if (v3d->irq[i]) + synchronize_irq(v3d->irq[i]); + } + /* Clear any pending interrupts we might have left. */ for (core = 0; core < v3d->cores; core++) V3D_CORE_WRITE(core, V3D_CTL_INT_CLR, V3D_CORE_IRQS(v3d->ver)); From d008fc65ebbc7d0f36c5a90c8b1ef117c295d8d8 Mon Sep 17 00:00:00 2001 From: Daniele Ceraolo Spurio Date: Fri, 13 Jun 2025 16:11:29 -0700 Subject: [PATCH 40/41] drm/xe: Extend WA 14018094691 to BMG This WA is applicable to BMG as well. Note that this is a GSC WA and we don't load the GSC on BMG, so extending the WA to BMG won't do anything right now. However, it helps future-proof the driver so that if we ever turn the GSC on we won't have to remember to extend this WA. v2: don't use VERSION_RANGE from 2001 to 2004 (Matt) Signed-off-by: Daniele Ceraolo Spurio Cc: Matt Roper Reviewed-by: Matt Roper Link: https://lore.kernel.org/r/20250613231128.1261815-2-daniele.ceraolospurio@intel.com (cherry picked from commit 1a5ce0c5b95b0624ebd44f574b98003a466973be) Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_wa_oob.rules | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/xe_wa_oob.rules b/drivers/gpu/drm/xe/xe_wa_oob.rules index 320766f6c5df..69c1d7fc695e 100644 --- a/drivers/gpu/drm/xe/xe_wa_oob.rules +++ b/drivers/gpu/drm/xe/xe_wa_oob.rules @@ -21,7 +21,8 @@ GRAPHICS_VERSION_RANGE(1270, 1274) MEDIA_VERSION(1300) PLATFORM(DG2) -14018094691 GRAPHICS_VERSION(2004) +14018094691 GRAPHICS_VERSION_RANGE(2001, 2002) + GRAPHICS_VERSION(2004) 14019882105 GRAPHICS_VERSION(2004), GRAPHICS_STEP(A0, B0) 18024947630 GRAPHICS_VERSION(2001) GRAPHICS_VERSION(2004) From 5459e16b215c18529782e7746992653f00de0779 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Tue, 24 Jun 2025 10:41:03 -0700 Subject: [PATCH 41/41] drm/xe: Do not wedge device on killed exec queues When a user closes an exec queue or interrupts an app with Ctrl-C, this does not warrant wedging the device in mode 2. Avoid this by skipping the wedge check for killed exec queues in the TDR and LR exec queue cleanup worker. Signed-off-by: Matthew Brost Reviewed-by: Rodrigo Vivi Link: https://lore.kernel.org/r/20250624174103.2707941-1-matthew.brost@intel.com (cherry picked from commit 5a2f117a80c207372513ca8964eeb178874f4990) Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_guc_submit.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c index 9567f6700cf2..2ac87ff4a057 100644 --- a/drivers/gpu/drm/xe/xe_guc_submit.c +++ b/drivers/gpu/drm/xe/xe_guc_submit.c @@ -891,12 +891,13 @@ static void xe_guc_exec_queue_lr_cleanup(struct work_struct *w) struct xe_exec_queue *q = ge->q; struct xe_guc *guc = exec_queue_to_guc(q); struct xe_gpu_scheduler *sched = &ge->sched; - bool wedged; + bool wedged = false; xe_gt_assert(guc_to_gt(guc), xe_exec_queue_is_lr(q)); trace_xe_exec_queue_lr_cleanup(q); - wedged = guc_submit_hint_wedged(exec_queue_to_guc(q)); + if (!exec_queue_killed(q)) + wedged = guc_submit_hint_wedged(exec_queue_to_guc(q)); /* Kill the run_job / process_msg entry points */ xe_sched_submission_stop(sched); @@ -1070,7 +1071,7 @@ guc_exec_queue_timedout_job(struct drm_sched_job *drm_job) int err = -ETIME; pid_t pid = -1; int i = 0; - bool wedged, skip_timeout_check; + bool wedged = false, skip_timeout_check; /* * TDR has fired before free job worker. Common if exec queue @@ -1116,7 +1117,8 @@ guc_exec_queue_timedout_job(struct drm_sched_job *drm_job) * doesn't work for SRIOV. For now assuming timeouts in wedged mode are * genuine timeouts. */ - wedged = guc_submit_hint_wedged(exec_queue_to_guc(q)); + if (!exec_queue_killed(q)) + wedged = guc_submit_hint_wedged(exec_queue_to_guc(q)); /* Engine state now stable, disable scheduling to check timestamp */ if (!wedged && exec_queue_registered(q)) {