From 5f69165b7e4215f02247b0c64052c71b2f66d73a Mon Sep 17 00:00:00 2001 From: "Mukesh Kumar Chaurasiya (IBM)" Date: Sun, 26 Apr 2026 15:17:25 +0530 Subject: [PATCH 01/37] rust/drm: import ARef from sync crate ARef is defined in sync and is getting used from types causing the build to fail. Fix this by using ARef from sync module. Fixes: 80df573af9ef ("rust: drm: gem: shmem: Add DRM shmem helper abstraction") Signed-off-by: Mukesh Kumar Chaurasiya (IBM) Link: https://patch.msgid.link/20260426094725.2188668-2-mkchauras@gmail.com [ Add missing Fixes: tag. - Danilo ] Signed-off-by: Danilo Krummrich --- rust/kernel/drm/gem/shmem.rs | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/rust/kernel/drm/gem/shmem.rs b/rust/kernel/drm/gem/shmem.rs index d025fb035195..e1b648920d2f 100644 --- a/rust/kernel/drm/gem/shmem.rs +++ b/rust/kernel/drm/gem/shmem.rs @@ -19,10 +19,8 @@ }, error::to_result, prelude::*, - types::{ - ARef, - Opaque, // - }, // + sync::aref::ARef, + types::Opaque, // }; use core::{ ops::{ From 15e8bae5d930c91b8739a87d75db0a6efca3cb32 Mon Sep 17 00:00:00 2001 From: Danilo Krummrich Date: Thu, 23 Apr 2026 14:46:35 +0200 Subject: [PATCH 02/37] MAINTAINERS: nova: update mailing list The nouveau mailing list has some issues (e.g. with stripping Cc entries from replies when using notmuch + b4 based workflows). Besides that, having a separate mailing list for nova also helps to better distinguish nova from nouveau and makes it easier to track nova-specific discussions. Replace the nouveau mailing list with the new nova-gpu@lists.linux.dev mailing list for both nova-core and nova-drm, and remove the patchwork entries, since those are bound to the nouveau mailing list and not used by nova anyway. Link: https://lore.kernel.org/all/bc2517c2-6772-4cbd-8fd7-6dbdcdd13eab@nvidia.com/ Reviewed-by: Joel Fernandes Reviewed-by: John Hubbard Acked-by: Alexandre Courbot Link: https://patch.msgid.link/20260423124649.38793-1-dakr@kernel.org Signed-off-by: Danilo Krummrich --- MAINTAINERS | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index 2fb1c75afd16..5c9272622033 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -8193,10 +8193,9 @@ F: include/uapi/drm/nouveau_drm.h CORE DRIVER FOR NVIDIA GPUS [RUST] M: Danilo Krummrich M: Alexandre Courbot -L: nouveau@lists.freedesktop.org +L: nova-gpu@lists.linux.dev S: Supported W: https://rust-for-linux.com/nova-gpu-driver -Q: https://patchwork.freedesktop.org/project/nouveau/ B: https://gitlab.freedesktop.org/drm/nova/-/issues C: irc://irc.oftc.net/nouveau T: git https://gitlab.freedesktop.org/drm/rust/kernel.git drm-rust-next @@ -8205,10 +8204,9 @@ F: drivers/gpu/nova-core/ DRM DRIVER FOR NVIDIA GPUS [RUST] M: Danilo Krummrich -L: nouveau@lists.freedesktop.org +L: nova-gpu@lists.linux.dev S: Supported W: https://rust-for-linux.com/nova-gpu-driver -Q: https://patchwork.freedesktop.org/project/nouveau/ B: https://gitlab.freedesktop.org/drm/nova/-/issues C: irc://irc.oftc.net/nouveau T: git https://gitlab.freedesktop.org/drm/rust/kernel.git drm-rust-next From 7dd57d7a6350770dfc283287125c409e995200e0 Mon Sep 17 00:00:00 2001 From: Karol Wachowski Date: Thu, 30 Apr 2026 11:56:44 +0200 Subject: [PATCH 03/37] accel/ivpu: Disallow re-exporting imported GEM objects Prevent re-exporting of imported GEM buffers by adding a custom prime_handle_to_fd callback that checks if the object is imported and returns -EOPNOTSUPP if so. Re-exporting imported GEM buffers causes loss of buffer flags settings, leading to incorrect device access and data corruption. Reported-by: Yametsu Fixes: 57557964b582 ("accel/ivpu: Add support for userptr buffer objects") Reviewed-by: Andrzej Kacprowski Signed-off-by: Karol Wachowski Cc: # v6.19+ --- drivers/accel/ivpu/ivpu_drv.c | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/drivers/accel/ivpu/ivpu_drv.c b/drivers/accel/ivpu/ivpu_drv.c index 2801378e3e19..3b7b008bccfe 100644 --- a/drivers/accel/ivpu/ivpu_drv.c +++ b/drivers/accel/ivpu/ivpu_drv.c @@ -537,6 +537,26 @@ static const struct file_operations ivpu_fops = { #endif }; +static int ivpu_gem_prime_handle_to_fd(struct drm_device *dev, struct drm_file *file_priv, + u32 handle, u32 flags, int *prime_fd) +{ + struct drm_gem_object *obj; + + obj = drm_gem_object_lookup(file_priv, handle); + if (!obj) + return -ENOENT; + + if (drm_gem_is_imported(obj)) { + /* Do not allow re-exporting */ + drm_gem_object_put(obj); + return -EOPNOTSUPP; + } + + drm_gem_object_put(obj); + + return drm_gem_prime_handle_to_fd(dev, file_priv, handle, flags, prime_fd); +} + static const struct drm_driver driver = { .driver_features = DRIVER_GEM | DRIVER_COMPUTE_ACCEL, @@ -545,6 +565,7 @@ static const struct drm_driver driver = { .gem_create_object = ivpu_gem_create_object, .gem_prime_import = ivpu_gem_prime_import, + .prime_handle_to_fd = ivpu_gem_prime_handle_to_fd, .ioctls = ivpu_drm_ioctls, .num_ioctls = ARRAY_SIZE(ivpu_drm_ioctls), From c9e3878ae2f57fd6786279cf5d9dc6e6e1b52f5a Mon Sep 17 00:00:00 2001 From: Timur Tabi Date: Thu, 30 Apr 2026 17:38:29 -0500 Subject: [PATCH 04/37] Revert "drm/nouveau/gsp: add support for GA100" This reverts commit 20e0c197802c545db220157fafd567a10f2b7672. Despite claiming to add GA100 support, that commit actually has quite a few problems. It falsely claims that there is no VBIOS. GA100 does have a VBIOS, but it has no display engine, so it cannot use the PRAMIN method the read VBIOS and must fall back to using PROM. For whatever reason, the VBIOS on GA100 has an "Init-from-ROM" (IFR) header where the PCI Expansion ROM would normally be found. So to find that ROM, Nouveau needs to parse the IFR header. The commit also falsely claimed that there is no graphics (GR) engine. So rather than try to fix that commit, just revert it and start over from scratch. Signed-off-by: Timur Tabi Link: https://patch.msgid.link/20260430223838.2530778-2-ttabi@nvidia.com Signed-off-by: Danilo Krummrich --- .../gpu/drm/nouveau/nvkm/engine/device/base.c | 11 +++++++++-- .../gpu/drm/nouveau/nvkm/subdev/gsp/ga100.c | 4 ++++ .../gpu/drm/nouveau/nvkm/subdev/gsp/tu102.c | 18 +++++------------- 3 files changed, 18 insertions(+), 15 deletions(-) diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c b/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c index 72848ed80df7..b101e14f841e 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c @@ -2513,6 +2513,7 @@ static const struct nvkm_device_chip nv170_chipset = { .name = "GA100", .bar = { 0x00000001, tu102_bar_new }, + .bios = { 0x00000001, nvkm_bios_new }, .devinit = { 0x00000001, ga100_devinit_new }, .fault = { 0x00000001, tu102_fault_new }, .fb = { 0x00000001, ga100_fb_new }, @@ -2529,7 +2530,6 @@ nv170_chipset = { .vfn = { 0x00000001, ga100_vfn_new }, .ce = { 0x000003ff, ga100_ce_new }, .fifo = { 0x00000001, ga100_fifo_new }, - .sec2 = { 0x00000001, tu102_sec2_new }, }; static const struct nvkm_device_chip @@ -3341,7 +3341,6 @@ nvkm_device_ctor(const struct nvkm_device_func *func, case 0x166: device->chip = &nv166_chipset; break; case 0x167: device->chip = &nv167_chipset; break; case 0x168: device->chip = &nv168_chipset; break; - case 0x170: device->chip = &nv170_chipset; break; case 0x172: device->chip = &nv172_chipset; break; case 0x173: device->chip = &nv173_chipset; break; case 0x174: device->chip = &nv174_chipset; break; @@ -3361,6 +3360,14 @@ nvkm_device_ctor(const struct nvkm_device_func *func, case 0x1b6: device->chip = &nv1b6_chipset; break; case 0x1b7: device->chip = &nv1b7_chipset; break; default: + if (nvkm_boolopt(device->cfgopt, "NvEnableUnsupportedChipsets", false)) { + switch (device->chipset) { + case 0x170: device->chip = &nv170_chipset; break; + default: + break; + } + } + if (!device->chip) { nvdev_error(device, "unknown chipset (%08x)\n", boot0); ret = -ENODEV; diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/ga100.c b/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/ga100.c index fdd820eeef81..27a13aeccd3c 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/ga100.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/ga100.c @@ -41,11 +41,15 @@ ga100_gsp_flcn = { static const struct nvkm_gsp_func ga100_gsp = { .flcn = &ga100_gsp_flcn, + .fwsec = &tu102_gsp_fwsec, .sig_section = ".fwsignature_ga100", .booter.ctor = tu102_gsp_booter_ctor, + .fwsec_sb.ctor = tu102_gsp_fwsec_sb_ctor, + .fwsec_sb.dtor = tu102_gsp_fwsec_sb_dtor, + .dtor = r535_gsp_dtor, .oneinit = tu102_gsp_oneinit, .init = tu102_gsp_init, diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/tu102.c b/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/tu102.c index dd82c76b8b9a..19cb269e7a26 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/tu102.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/tu102.c @@ -318,13 +318,8 @@ tu102_gsp_oneinit(struct nvkm_gsp *gsp) if (ret) return ret; - /* - * Calculate FB layout. FRTS is a memory region created by the FWSEC-FRTS firmware. - * FWSEC comes from VBIOS. So on systems with no VBIOS (e.g. GA100), the FRTS does - * not exist. Therefore, use the existence of VBIOS to determine whether to reserve - * an FRTS region. - */ - gsp->fb.wpr2.frts.size = device->bios ? 0x100000 : 0; + /* Calculate FB layout. */ + gsp->fb.wpr2.frts.size = 0x100000; gsp->fb.wpr2.frts.addr = ALIGN_DOWN(gsp->fb.bios.addr, 0x20000) - gsp->fb.wpr2.frts.size; gsp->fb.wpr2.boot.size = gsp->boot.fw.size; @@ -348,12 +343,9 @@ tu102_gsp_oneinit(struct nvkm_gsp *gsp) if (ret) return ret; - /* Only boot FWSEC-FRTS if it actually exists */ - if (gsp->fb.wpr2.frts.size) { - ret = nvkm_gsp_fwsec_frts(gsp); - if (WARN_ON(ret)) - return ret; - } + ret = nvkm_gsp_fwsec_frts(gsp); + if (WARN_ON(ret)) + return ret; /* Reset GSP into RISC-V mode. */ ret = gsp->func->reset(gsp); From 2e42a17b8f6bc3c0cd69d7556b588011d3ec2394 Mon Sep 17 00:00:00 2001 From: Eliot Courtney Date: Thu, 23 Apr 2026 21:36:52 +0900 Subject: [PATCH 05/37] rust: drm: gem: clean up GEM state in init failure case MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently, if `drm_gem_object_init` fails, the object is freed without any cleanup. Perform the cleanup in that case. Cc: stable@vger.kernel.org Fixes: c284d3e42338 ("rust: drm: gem: Add GEM object abstraction") Signed-off-by: Eliot Courtney Reviewed-by: Alice Ryhl Reviewed-by: Onur Özkan Link: https://patch.msgid.link/20260423-fix-gem-1-v1-1-e12e35f7bba9@nvidia.com [ Move safety comment closer to unsafe block to avoid a clippy warning. - Danilo ] Signed-off-by: Danilo Krummrich --- rust/kernel/drm/gem/mod.rs | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/rust/kernel/drm/gem/mod.rs b/rust/kernel/drm/gem/mod.rs index 75acda7ba500..01b5bd47a333 100644 --- a/rust/kernel/drm/gem/mod.rs +++ b/rust/kernel/drm/gem/mod.rs @@ -277,8 +277,17 @@ pub fn new(dev: &drm::Device, size: usize, args: T::Args) -> Result` is always treated as pinned. let ptr = KBox::into_raw(unsafe { Pin::into_inner_unchecked(obj) }); From 01eb80b767430ae868c48ad106c60eb61a508c85 Mon Sep 17 00:00:00 2001 From: Alok Tiwari Date: Fri, 10 Apr 2026 04:20:12 -0700 Subject: [PATCH 06/37] accel/qaic: fix incorrect counter check in RAS message decode The UE and UE_NF cases check ce_count against UINT_MAX before incrementing their respective counters. This is logically incorrect and prevents ue_count and ue_nf_count from incrementing when ce_count reaches UINT_MAX. Fixes: c11a50b170e7 ("accel/qaic: Add Reliability, Accessibility, Serviceability (RAS)") Signed-off-by: Alok Tiwari Reviewed-by: Jeff Hugo Signed-off-by: Jeff Hugo Link: https://patch.msgid.link/20260410112015.592546-1-alok.a.tiwari@oracle.com --- drivers/accel/qaic/qaic_ras.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/accel/qaic/qaic_ras.c b/drivers/accel/qaic/qaic_ras.c index cc0b75461e1a..6791af366cba 100644 --- a/drivers/accel/qaic/qaic_ras.c +++ b/drivers/accel/qaic/qaic_ras.c @@ -497,11 +497,11 @@ static void decode_ras_msg(struct qaic_device *qdev, struct ras_data *msg) qdev->ce_count++; break; case UE: - if (qdev->ce_count != UINT_MAX) + if (qdev->ue_count != UINT_MAX) qdev->ue_count++; break; case UE_NF: - if (qdev->ce_count != UINT_MAX) + if (qdev->ue_nf_count != UINT_MAX) qdev->ue_nf_count++; break; default: From 0a69ac25bd596d50823d530d0a2004336668c0df Mon Sep 17 00:00:00 2001 From: Eliot Courtney Date: Fri, 1 May 2026 19:49:37 +0900 Subject: [PATCH 07/37] rust: drm: fix unsound initialization in drm::Device::new If pinned initialization of drm::Device::Data fails, it calls drm::Device::release via drm_dev_put. This materializes a reference to &drm::Device, but it's not fully constructed yet, because initializing `data` failed. It should not be dropped either. Instead, if pinned initialization fails, make sure drm::Device::release isn't called. Fixes: 2e9fdbe5ec7a ("rust: drm: device: drop_in_place() the drm::Device in release()") Signed-off-by: Eliot Courtney Reviewed-by: Gary Guo Link: https://patch.msgid.link/20260501-fix-drm-1-v2-1-5c4f681837bc@nvidia.com Signed-off-by: Danilo Krummrich --- rust/kernel/drm/device.rs | 22 ++++++++++++++++------ 1 file changed, 16 insertions(+), 6 deletions(-) diff --git a/rust/kernel/drm/device.rs b/rust/kernel/drm/device.rs index adbafe8db54d..403fc35353c7 100644 --- a/rust/kernel/drm/device.rs +++ b/rust/kernel/drm/device.rs @@ -119,13 +119,20 @@ pub fn new(dev: &device::Device, data: impl PinInit) -> Result()); + // Use a temporary vtable without a `release` callback until `data` is initialized, so + // init failure can release the DRM device without dropping uninitialized fields. + let alloc_vtable = bindings::drm_driver { + release: None, + ..Self::VTABLE + }; + // SAFETY: - // - `VTABLE`, as a `const` is pinned to the read-only section of the compilation, + // - `alloc_vtable` reference remains valid until no longer used, // - `dev` is valid by its type invarants, let raw_drm: *mut Self = unsafe { bindings::__drm_dev_alloc( dev.as_raw(), - &Self::VTABLE, + &alloc_vtable, layout.size(), mem::offset_of!(Self, dev), ) @@ -133,6 +140,10 @@ pub fn new(dev: &device::Device, data: impl PinInit) -> Result) -> Result Date: Fri, 1 May 2026 23:53:38 -0700 Subject: [PATCH 08/37] drm/ttm: Fix GPU MM stats during pool shrinking TTM pool shrinking frees pages by calling __free_pages() directly, which bypasses updates to NR_GPU_ACTIVE and leaves GPU MM accounting out of sync. Introduce a helper, __free_pages_gpu_account(), and use it for all page frees in ttm_pool.c so GPU MM statistics are updated consistently. Reported-by: Kenneth Crudup Fixes: ae80122f3896 ("drm/ttm: use gpu mm stats to track gpu memory allocations. (v4)") Cc: Christian Koenig Cc: Huang Rui Cc: Matthew Auld Cc: David Airlie Cc: dri-devel@lists.freedesktop.org Signed-off-by: Matthew Brost Tested-by: Kenneth Crudup Reviewed-by: Dave Airlie Link: https://patch.msgid.link/20260502065338.2720646-1-matthew.brost@intel.com --- drivers/gpu/drm/ttm/ttm_pool.c | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/ttm/ttm_pool.c b/drivers/gpu/drm/ttm/ttm_pool.c index 26a3689e5fd9..278bbe7a11ad 100644 --- a/drivers/gpu/drm/ttm/ttm_pool.c +++ b/drivers/gpu/drm/ttm/ttm_pool.c @@ -206,6 +206,14 @@ static struct page *ttm_pool_alloc_page(struct ttm_pool *pool, gfp_t gfp_flags, return NULL; } +static void __free_pages_gpu_account(struct page *p, unsigned int order, + bool reclaim) +{ + mod_lruvec_page_state(p, reclaim ? NR_GPU_RECLAIM : NR_GPU_ACTIVE, + -(1 << order)); + __free_pages(p, order); +} + /* Reset the caching and pages of size 1 << order */ static void ttm_pool_free_page(struct ttm_pool *pool, enum ttm_caching caching, unsigned int order, struct page *p, bool reclaim) @@ -223,9 +231,7 @@ static void ttm_pool_free_page(struct ttm_pool *pool, enum ttm_caching caching, #endif if (!pool || !ttm_pool_uses_dma_alloc(pool)) { - mod_lruvec_page_state(p, reclaim ? NR_GPU_RECLAIM : NR_GPU_ACTIVE, - -(1 << order)); - __free_pages(p, order); + __free_pages_gpu_account(p, order, reclaim); return; } @@ -606,7 +612,7 @@ static int ttm_pool_restore_commit(struct ttm_pool_tt_restore *restore, */ ttm_pool_split_for_swap(restore->pool, p); copy_highpage(restore->alloced_page + i, p); - __free_pages(p, 0); + __free_pages_gpu_account(p, 0, false); } restore->restored_pages++; @@ -1068,7 +1074,7 @@ long ttm_pool_backup(struct ttm_pool *pool, struct ttm_tt *tt, if (flags->purge) { shrunken += num_pages; page->private = 0; - __free_pages(page, order); + __free_pages_gpu_account(page, order, false); memset(tt->pages + i, 0, num_pages * sizeof(*tt->pages)); } @@ -1109,7 +1115,7 @@ long ttm_pool_backup(struct ttm_pool *pool, struct ttm_tt *tt, } handle = shandle; tt->pages[i] = ttm_backup_handle_to_page_ptr(handle); - put_page(page); + __free_pages_gpu_account(page, 0, false); shrunken++; } From 8acd2d7e0889ac62bc102bd7b648cd7bee04f902 Mon Sep 17 00:00:00 2001 From: Myeonghun Pak Date: Fri, 24 Apr 2026 20:25:18 +0900 Subject: [PATCH 09/37] drm/qxl: Fix missing KMS poll cleanup drm_kms_helper_poll_init() initializes the output polling work and enables polling for the DRM device. qxl enables polling before calling drm_dev_register(), but the drm_dev_register() failure path tears down the modeset and device state without disabling the polling helper. The remove path also unregisters and shuts down the DRM device without first disabling the polling helper. Add matching drm_kms_helper_poll_fini() calls in both paths so the delayed polling work is cancelled before qxl tears down the associated modeset/device state. Signed-off-by: Myeonghun Pak Reviewed-by: Thomas Zimmermann Fixes: 5ff91e442652 ("qxl: use drm helper hotplug support") Signed-off-by: Thomas Zimmermann Link: https://patch.msgid.link/20260424112543.57819-1-mhun512@gmail.com --- drivers/gpu/drm/qxl/qxl_drv.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/qxl/qxl_drv.c b/drivers/gpu/drm/qxl/qxl_drv.c index 2bbb1168a3ff..1e6a2392d7c6 100644 --- a/drivers/gpu/drm/qxl/qxl_drv.c +++ b/drivers/gpu/drm/qxl/qxl_drv.c @@ -118,12 +118,13 @@ qxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent) /* Complete initialization. */ ret = drm_dev_register(&qdev->ddev, ent->driver_data); if (ret) - goto modeset_cleanup; + goto poll_fini; drm_client_setup(&qdev->ddev, NULL); return 0; -modeset_cleanup: +poll_fini: + drm_kms_helper_poll_fini(&qdev->ddev); qxl_modeset_fini(qdev); unload: qxl_device_fini(qdev); @@ -154,6 +155,7 @@ qxl_pci_remove(struct pci_dev *pdev) { struct drm_device *dev = pci_get_drvdata(pdev); + drm_kms_helper_poll_fini(dev); drm_dev_unregister(dev); drm_atomic_helper_shutdown(dev); if (pci_is_vga(pdev) && pdev->revision < 5) From c28c22c8cfbd43f2ad71a157324d9fbebc0d0f2e Mon Sep 17 00:00:00 2001 From: Francesco Lavra Date: Tue, 10 Feb 2026 18:35:45 +0100 Subject: [PATCH 10/37] drm/fb-helper: Fix clipping when damage area spans a single scanline When the damage area resulting from a dirty memory range spans a single scanline, the width of the rectangle is calculated dynamically because it may not coincide with the framebuffer width. If the dirty range ends exactly at the end of the scanline, the `bit_end` variable is incorrectly assigned a 0 value, which results in a bogus clip rectangle where the x2 coordinate is 0. This prevents the dirty scanline from being flushed to the hardware. Change the calculation of the `bit_end` value to fix the x2 coordinate value in the above edge case. Fixes: ded74cafeea9 ("drm/fb-helper: Clip damage area horizontally") Signed-off-by: Francesco Lavra Reviewed-by: Thomas Zimmermann Signed-off-by: Thomas Zimmermann Link: https://patch.msgid.link/20260210173545.733937-1-flavra@baylibre.com --- drivers/gpu/drm/drm_fb_helper.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/drm_fb_helper.c b/drivers/gpu/drm/drm_fb_helper.c index a80a335f4148..1541fc8a9ac2 100644 --- a/drivers/gpu/drm/drm_fb_helper.c +++ b/drivers/gpu/drm/drm_fb_helper.c @@ -490,7 +490,7 @@ static void drm_fb_helper_memory_range_to_clip(struct fb_info *info, off_t off, * the number of horizontal pixels that need an update. */ off_t bit_off = (off % line_length) * 8; - off_t bit_end = (end % line_length) * 8; + off_t bit_end = bit_off + len * 8; x1 = bit_off / info->var.bits_per_pixel; x2 = DIV_ROUND_UP(bit_end, info->var.bits_per_pixel); From 2a46a9356ba7b1bdd741c8b41e5374edcd960557 Mon Sep 17 00:00:00 2001 From: "Kory Maincent (TI)" Date: Tue, 28 Apr 2026 11:04:56 +0200 Subject: [PATCH 11/37] drm/bridge: tda998x: Use __be32 for audio port OF property pointer of_get_property() returns a pointer to big-endian (__be32) data, but port_data in tda998x_get_audio_ports() was declared as const u32 *, causing a sparse endianness type mismatch warning. Fix the declaration to use const __be32 *. Fixes: 7e567624dc5a4 ("drm/i2c: tda998x: Register ASoC hdmi-codec and add audio DT binding") Cc: stable@vger.kernel.org Signed-off-by: Kory Maincent (TI) Reviewed-by: Russell King (Oracle) Link: https://patch.msgid.link/20260428090457.121894-1-kory.maincent@bootlin.com Signed-off-by: Luca Ceresoli --- drivers/gpu/drm/bridge/tda998x_drv.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/bridge/tda998x_drv.c b/drivers/gpu/drm/bridge/tda998x_drv.c index d9b388165de1..779b976f601c 100644 --- a/drivers/gpu/drm/bridge/tda998x_drv.c +++ b/drivers/gpu/drm/bridge/tda998x_drv.c @@ -1762,7 +1762,7 @@ static const struct drm_bridge_funcs tda998x_bridge_funcs = { static int tda998x_get_audio_ports(struct tda998x_priv *priv, struct device_node *np) { - const u32 *port_data; + const __be32 *port_data; u32 size; int i; From b5d0ad616ca8dd8c7b6b24dc13012e342278a085 Mon Sep 17 00:00:00 2001 From: "Kory Maincent (TI)" Date: Fri, 17 Apr 2026 17:54:45 +0200 Subject: [PATCH 12/37] drm/bridge: tda998x: Return NULL instead of 0 in tda998x_edid_read() tda998x_edid_read() returns a const struct drm_edid pointer, but when tda998x_edid_delay_wait() fails (process killed while waiting for the HPD timeout), the integer literal 0 is returned instead of NULL, triggering a sparse warning: "Using plain integer as NULL pointer" Replace 0 with NULL to fix the sparse warning. Fixes: c76a8be4feec ("drm/bridge: tda998x: Add support for DRM_BRIDGE_ATTACH_NO_CONNECTOR") Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202604172257.Imo6GOH9-lkp@intel.com/ Signed-off-by: Kory Maincent (TI) Reviewed-by: Luca Ceresoli Link: https://patch.msgid.link/20260417155446.1068893-1-kory.maincent@bootlin.com Signed-off-by: Luca Ceresoli --- drivers/gpu/drm/bridge/tda998x_drv.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/bridge/tda998x_drv.c b/drivers/gpu/drm/bridge/tda998x_drv.c index 779b976f601c..6c427bc75896 100644 --- a/drivers/gpu/drm/bridge/tda998x_drv.c +++ b/drivers/gpu/drm/bridge/tda998x_drv.c @@ -1293,7 +1293,7 @@ static const struct drm_edid *tda998x_edid_read(struct tda998x_priv *priv, * can't handle signals gracefully. */ if (tda998x_edid_delay_wait(priv)) - return 0; + return NULL; if (priv->rev == TDA19988) reg_clear(priv, REG_TX4, TX4_PD_RAM); From 84ae1840260fece9b6b70d3872b79384bbe5a90b Mon Sep 17 00:00:00 2001 From: Osama Abdelkader Date: Thu, 23 Apr 2026 22:06:19 +0200 Subject: [PATCH 13/37] drm/sti: remove bridge when sti_hda component_add fails MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Use devm_drm_bridge_add() so the bridge is released if probe fails after registration, and drop the manual drm_bridge_remove() in remove(). Check the return value of devm_drm_bridge_add(). Signed-off-by: Osama Abdelkader Fixes: d28726efc637 ("drm/sti: hda: add bridge before attaching") Cc: stable@vger.kernel.org Reviewed-by: Luca Ceresoli Acked-by: Raphaël Gallais-Pou Link: https://patch.msgid.link/20260423200622.325076-1-osama.abdelkader@gmail.com Signed-off-by: Raphael Gallais-Pou --- drivers/gpu/drm/sti/sti_hda.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/sti/sti_hda.c b/drivers/gpu/drm/sti/sti_hda.c index b7397827889c..360a88ca8f0c 100644 --- a/drivers/gpu/drm/sti/sti_hda.c +++ b/drivers/gpu/drm/sti/sti_hda.c @@ -741,6 +741,7 @@ static int sti_hda_probe(struct platform_device *pdev) struct device *dev = &pdev->dev; struct sti_hda *hda; struct resource *res; + int ret; DRM_INFO("%s\n", __func__); @@ -779,7 +780,9 @@ static int sti_hda_probe(struct platform_device *pdev) return PTR_ERR(hda->clk_hddac); } - drm_bridge_add(&hda->bridge); + ret = devm_drm_bridge_add(dev, &hda->bridge); + if (ret) + return ret; platform_set_drvdata(pdev, hda); @@ -788,10 +791,7 @@ static int sti_hda_probe(struct platform_device *pdev) static void sti_hda_remove(struct platform_device *pdev) { - struct sti_hda *hda = platform_get_drvdata(pdev); - component_del(&pdev->dev, &sti_hda_ops); - drm_bridge_remove(&hda->bridge); } static const struct of_device_id hda_of_match[] = { From aab3d205a086233c612fee86009265451793e0c2 Mon Sep 17 00:00:00 2001 From: Juha-Pekka Heikkila Date: Mon, 27 Apr 2026 19:57:15 +0300 Subject: [PATCH 14/37] drm/i915/display: enable ccs modifiers on dg2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Since Xe driver aux ccs enablement dg2 ccs modifiers have been disabled on i915 driver. Here allow dg2 to use ccs again for framebuffers. Fixes: 6a99e91a6ca8 ("drm/i915/display: Detect AuxCCS support via display parent interface") Signed-off-by: Juha-Pekka Heikkila Reviewed-by: Ville Syrjälä Signed-off-by: Mika Kahola Link: https://patch.msgid.link/20260427165715.864721-1-juhapekka.heikkila@gmail.com (cherry picked from commit aee13ba1448213975f36942ba5d1ce693eb5c002) Signed-off-by: Tvrtko Ursulin --- drivers/gpu/drm/i915/i915_driver.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_driver.c b/drivers/gpu/drm/i915/i915_driver.c index 385a634c3ed0..d9be7a5a239c 100644 --- a/drivers/gpu/drm/i915/i915_driver.c +++ b/drivers/gpu/drm/i915/i915_driver.c @@ -750,9 +750,8 @@ static bool has_auxccs(struct drm_device *drm) { struct drm_i915_private *i915 = to_i915(drm); - return IS_GRAPHICS_VER(i915, 9, 12) || - IS_ALDERLAKE_P(i915) || - IS_METEORLAKE(i915); + return IS_GRAPHICS_VER(i915, 9, 12) && + !HAS_FLAT_CCS(i915); } static bool has_fenced_regions(struct drm_device *drm) From 3780c41460a9ad6d5d4c09a416765c6cc285033b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ma=C3=ADra=20Canal?= Date: Thu, 2 Apr 2026 16:32:35 -0300 Subject: [PATCH 15/37] drm/etnaviv: Fix armed job not being pushed to the DRM scheduler MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When xa_alloc_cyclic() failed in etnaviv_sched_push_job(), the error path skipped drm_sched_entity_push_job(). This is a violation of the DRM scheduler contract, as once a job has been armed with drm_sched_job_arm(), it must be pushed with drm_sched_entity_push_job(). From the DRM scheduler documentation, """ drm_sched_job_arm() is a point of no return since it initializes the fences and their sequence number etc. Once that function has been called, you *must* submit it with drm_sched_entity_push_job() and cannot simply abort it by calling drm_sched_job_cleanup(). """ Fix this by splitting the fence ID allocation into two phases: first, alloc an xarray slot before arming the job (which can fail), then fill in the actual fence with xa_store() after arming. This way, allocation failures are handled before the job is armed, and once armed, the job is always pushed to the scheduler. This also fixes a double call to drm_sched_job_cleanup(), as both etnaviv_sched_push_job() and its caller would call it on failure. Fixes: 764be12345c3 ("drm/etnaviv: convert user fence tracking to XArray") Signed-off-by: Maíra Canal Link: https://patch.msgid.link/20260402193424.2023318-1-mcanal@igalia.com Signed-off-by: Christian Gmeiner --- drivers/gpu/drm/etnaviv/etnaviv_sched.c | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/etnaviv/etnaviv_sched.c b/drivers/gpu/drm/etnaviv/etnaviv_sched.c index df4232d7e135..3cc50d697c89 100644 --- a/drivers/gpu/drm/etnaviv/etnaviv_sched.c +++ b/drivers/gpu/drm/etnaviv/etnaviv_sched.c @@ -116,16 +116,18 @@ int etnaviv_sched_push_job(struct etnaviv_gem_submit *submit) */ mutex_lock(&gpu->sched_lock); + ret = xa_alloc_cyclic(&gpu->user_fences, &submit->out_fence_id, + NULL, xa_limit_32b, &gpu->next_user_fence, + GFP_KERNEL); + if (ret < 0) + goto out_unlock; + drm_sched_job_arm(&submit->sched_job); submit->out_fence = dma_fence_get(&submit->sched_job.s_fence->finished); - ret = xa_alloc_cyclic(&gpu->user_fences, &submit->out_fence_id, - submit->out_fence, xa_limit_32b, - &gpu->next_user_fence, GFP_KERNEL); - if (ret < 0) { - drm_sched_job_cleanup(&submit->sched_job); - goto out_unlock; - } + + xa_store(&gpu->user_fences, submit->out_fence_id, + submit->out_fence, GFP_KERNEL); /* the scheduler holds on to the job now */ kref_get(&submit->refcount); From 50987d4e6c55929aa2d4d3976e74ccbae22d5017 Mon Sep 17 00:00:00 2001 From: Chen Ni Date: Fri, 27 Mar 2026 10:17:28 +0800 Subject: [PATCH 16/37] drm/panel: himax-hx83121a: Fix incorrect error check for devm_drm_panel_alloc() Check devm_drm_panel_alloc() return value for ERR_PTR instead of NULL. devm_drm_panel_alloc() returns an ERR_PTR on failure, never NULL. Using a NULL check skips the error path and may cause a NULL pointer dereference. Fixes: a7c61963b727 ("drm/panel: Add Himax HX83121A panel driver") Signed-off-by: Chen Ni Reviewed-by: Pengyu Luo Signed-off-by: Neil Armstrong Link: https://patch.msgid.link/20260327021728.647182-1-nichen@iscas.ac.cn --- drivers/gpu/drm/panel/panel-himax-hx83121a.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/panel/panel-himax-hx83121a.c b/drivers/gpu/drm/panel/panel-himax-hx83121a.c index ebe643ba4184..bed79aa06f46 100644 --- a/drivers/gpu/drm/panel/panel-himax-hx83121a.c +++ b/drivers/gpu/drm/panel/panel-himax-hx83121a.c @@ -596,8 +596,8 @@ static int himax_probe(struct mipi_dsi_device *dsi) ctx = devm_drm_panel_alloc(dev, struct himax, panel, &himax_panel_funcs, DRM_MODE_CONNECTOR_DSI); - if (!ctx) - return -ENOMEM; + if (IS_ERR(ctx)) + return PTR_ERR(ctx); ret = devm_regulator_bulk_get_const(&dsi->dev, ARRAY_SIZE(himax_supplies), From defab7b01e0848e004077d7d8dcc04d305ea1a27 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 13 Apr 2026 09:10:19 +0200 Subject: [PATCH 17/37] drm/panel: hx83121a: select DRM_DISPLAY_DSC_HELPER Like a number of other panel drivers, this newly merged driver needs DRM_DISPLAY_DSC_HELPER to be enabled: arm-linux-gnueabi-ld: drivers/gpu/drm/panel/panel-himax-hx83121a.o: in function `himax_prepare': panel-himax-hx83121a.c:(.text+0x1024): undefined reference to `drm_dsc_pps_payload_pack' Fixes: a7c61963b727 ("drm/panel: Add Himax HX83121A panel driver") Signed-off-by: Arnd Bergmann Reviewed-by: Neil Armstrong Reviewed-by: David Heidelberg Signed-off-by: Neil Armstrong Link: https://patch.msgid.link/20260413071043.3829868-1-arnd@kernel.org --- drivers/gpu/drm/panel/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/panel/Kconfig b/drivers/gpu/drm/panel/Kconfig index d6863b28ddc5..d592f4f4b939 100644 --- a/drivers/gpu/drm/panel/Kconfig +++ b/drivers/gpu/drm/panel/Kconfig @@ -208,6 +208,7 @@ config DRM_PANEL_HIMAX_HX83121A depends on OF depends on DRM_MIPI_DSI depends on BACKLIGHT_CLASS_DEVICE + select DRM_DISPLAY_DSC_HELPER select DRM_KMS_HELPER help Say Y here if you want to enable support for Himax HX83121A-based From c67e8787f6743101c90c7a9c4bb7cf6f1f739f83 Mon Sep 17 00:00:00 2001 From: Christian Van Date: Sat, 25 Apr 2026 01:39:48 -0400 Subject: [PATCH 18/37] drm/panel: feiyang-fy07024di26a30d: return display-on error mipi_dsi_dcs_set_display_on() returns an error code, but feiyang_enable() currently ignores it and always reports success. Return the DCS command result so callers can observe enable failures. Signed-off-by: Christian Van Reviewed-by: Neil Armstrong Signed-off-by: Neil Armstrong Link: https://patch.msgid.link/20260425053948.117714-1-cvan20191@gmail.com --- drivers/gpu/drm/panel/panel-feiyang-fy07024di26a30d.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/gpu/drm/panel/panel-feiyang-fy07024di26a30d.c b/drivers/gpu/drm/panel/panel-feiyang-fy07024di26a30d.c index 4f8d6d8c07e4..dbdb7e3cb7b6 100644 --- a/drivers/gpu/drm/panel/panel-feiyang-fy07024di26a30d.c +++ b/drivers/gpu/drm/panel/panel-feiyang-fy07024di26a30d.c @@ -98,9 +98,7 @@ static int feiyang_enable(struct drm_panel *panel) /* T12 (video & logic signal rise + backlight rise) T12 >= 200ms */ msleep(200); - mipi_dsi_dcs_set_display_on(ctx->dsi); - - return 0; + return mipi_dsi_dcs_set_display_on(ctx->dsi); } static int feiyang_disable(struct drm_panel *panel) From 570cf799e87ae805eacfab3b4ba66676b5fccdb6 Mon Sep 17 00:00:00 2001 From: Icenowy Zheng Date: Sun, 3 May 2026 17:17:08 +0800 Subject: [PATCH 19/37] drm/panel: boe-tv101wum-nl6: restore MODE_LPM after sending disable cmds When preparing the panel, it seems that it always expects commands to be transferred in LP mode. However, the disable function removes the MIPI_DSI_MODE_LPM flag, and no other function re-adds it. As the unprepare function contains no DSI commands, re-adding the flag just after disabling the panel should be safe. Add the code re-adding the flag after the two commands for disabling the panel are sent. This fixes error messages shown in kernel log when unblanking on mt8183-kukui-kodama-sku32 device. Cc: stable@vger.kernel.org Fixes: a869b9db7adf ("drm/panel: support for boe tv101wum-nl6 wuxga dsi video mode panel") Signed-off-by: Icenowy Zheng Reviewed-by: Neil Armstrong Reviewed-by: Douglas Anderson Signed-off-by: Neil Armstrong Link: https://patch.msgid.link/20260503091708.1079962-1-zhengxingda@iscas.ac.cn --- drivers/gpu/drm/panel/panel-boe-tv101wum-nl6.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/panel/panel-boe-tv101wum-nl6.c b/drivers/gpu/drm/panel/panel-boe-tv101wum-nl6.c index d5fe105bdbdd..658ce64c71eb 100644 --- a/drivers/gpu/drm/panel/panel-boe-tv101wum-nl6.c +++ b/drivers/gpu/drm/panel/panel-boe-tv101wum-nl6.c @@ -1324,6 +1324,8 @@ static int boe_panel_disable(struct drm_panel *panel) mipi_dsi_dcs_set_display_off_multi(&ctx); mipi_dsi_dcs_enter_sleep_mode_multi(&ctx); + boe->dsi->mode_flags |= MIPI_DSI_MODE_LPM; + mipi_dsi_msleep(&ctx, 150); return ctx.accum_err; From 2d4e80271f784aa0c7b17676e9762c7e8156be1c Mon Sep 17 00:00:00 2001 From: Icenowy Zheng Date: Sun, 26 Apr 2026 00:57:51 +0800 Subject: [PATCH 20/37] drm/panel: himax-hx83102: restore MODE_LPM after sending disable cmds When preparing the panel, it seems that it always expects commands to be transferred in LP mode. However, the disable function removes the MIPI_DSI_MODE_LPM flag, and no other function re-adds it. As the unprepare function contains no DSI commands, re-adding the flag just after disabling the panel should be safe. Add the code re-adding the flag after the two commands for disabling the panel are sent. This fixes screen unblanking (after blanking once) on mt8188-geralt-ciri-sku1 device. Cc: stable@vger.kernel.org # 6.11+ Fixes: 0ef94554dc40 ("drm/panel: himax-hx83102: Break out as separate driver") Signed-off-by: Icenowy Zheng Reviewed-by: Neil Armstrong Reviewed-by: Douglas Anderson Signed-off-by: Neil Armstrong Link: https://patch.msgid.link/20260425165751.1716569-1-zhengxingda@iscas.ac.cn --- drivers/gpu/drm/panel/panel-himax-hx83102.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/panel/panel-himax-hx83102.c b/drivers/gpu/drm/panel/panel-himax-hx83102.c index 8b2a68ee851e..a5e5c9ea7a73 100644 --- a/drivers/gpu/drm/panel/panel-himax-hx83102.c +++ b/drivers/gpu/drm/panel/panel-himax-hx83102.c @@ -937,6 +937,8 @@ static int hx83102_disable(struct drm_panel *panel) mipi_dsi_dcs_set_display_off_multi(&dsi_ctx); mipi_dsi_dcs_enter_sleep_mode_multi(&dsi_ctx); + dsi->mode_flags |= MIPI_DSI_MODE_LPM; + mipi_dsi_msleep(&dsi_ctx, 150); return dsi_ctx.accum_err; From 9b4e3495d1bd2469bf94b74930c153c2d534ddb7 Mon Sep 17 00:00:00 2001 From: Felix Kuehling Date: Mon, 20 Apr 2026 11:55:57 -0400 Subject: [PATCH 21/37] drm/amdkfd: Make all TLB-flushes heavy-weight With only one sequence number we cannot track the need for legacy vs heavy-weight flushes reliably. Always use heavy-weight. Signed-off-by: Felix Kuehling Reviewed-by: Philip Yang Signed-off-by: Alex Deucher (cherry picked from commit c1a3ff1d327820cd9a52bc1056b98681fc088949) Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 4 ++-- drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c | 6 +++--- drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 6 +++--- drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 4 ++-- 4 files changed, 10 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c index f829d65a79b4..f95bf6d95534 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c @@ -1360,7 +1360,7 @@ static int kfd_ioctl_map_memory_to_gpu(struct file *filep, peer_pdd = kfd_process_device_data_by_id(p, devices_arr[i]); if (WARN_ON_ONCE(!peer_pdd)) continue; - kfd_flush_tlb(peer_pdd, TLB_FLUSH_LEGACY); + kfd_flush_tlb(peer_pdd); } kfree(devices_arr); @@ -1455,7 +1455,7 @@ static int kfd_ioctl_unmap_memory_from_gpu(struct file *filep, if (WARN_ON_ONCE(!peer_pdd)) continue; if (flush_tlb) - kfd_flush_tlb(peer_pdd, TLB_FLUSH_HEAVYWEIGHT); + kfd_flush_tlb(peer_pdd); /* Remove dma mapping after tlb flush to avoid IO_PAGE_FAULT */ err = amdgpu_amdkfd_gpuvm_dmaunmap_mem(mem, peer_pdd->drm_priv); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c index ab3b2e7be9bd..9185ebe4c079 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c @@ -572,7 +572,7 @@ static int allocate_vmid(struct device_queue_manager *dqm, qpd->vmid, qpd->page_table_base); /* invalidate the VM context after pasid and vmid mapping is set up */ - kfd_flush_tlb(qpd_to_pdd(qpd), TLB_FLUSH_LEGACY); + kfd_flush_tlb(qpd_to_pdd(qpd)); if (dqm->dev->kfd2kgd->set_scratch_backing_va) dqm->dev->kfd2kgd->set_scratch_backing_va(dqm->dev->adev, @@ -610,7 +610,7 @@ static void deallocate_vmid(struct device_queue_manager *dqm, if (flush_texture_cache_nocpsch(q->device, qpd)) dev_err(dev, "Failed to flush TC\n"); - kfd_flush_tlb(qpd_to_pdd(qpd), TLB_FLUSH_LEGACY); + kfd_flush_tlb(qpd_to_pdd(qpd)); /* Release the vmid mapping */ set_pasid_vmid_mapping(dqm, 0, qpd->vmid); @@ -1284,7 +1284,7 @@ static int restore_process_queues_nocpsch(struct device_queue_manager *dqm, dqm->dev->adev, qpd->vmid, qpd->page_table_base); - kfd_flush_tlb(pdd, TLB_FLUSH_LEGACY); + kfd_flush_tlb(pdd); } /* Take a safe reference to the mm_struct, which may otherwise diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h index 163d665a6074..7b5b12206919 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h @@ -1554,13 +1554,13 @@ void kfd_signal_reset_event(struct kfd_node *dev); void kfd_signal_poison_consumed_event(struct kfd_node *dev, u32 pasid); void kfd_signal_process_terminate_event(struct kfd_process *p); -static inline void kfd_flush_tlb(struct kfd_process_device *pdd, - enum TLB_FLUSH_TYPE type) +static inline void kfd_flush_tlb(struct kfd_process_device *pdd) { struct amdgpu_device *adev = pdd->dev->adev; struct amdgpu_vm *vm = drm_priv_to_vm(pdd->drm_priv); - amdgpu_vm_flush_compute_tlb(adev, vm, type, pdd->dev->xcc_mask); + amdgpu_vm_flush_compute_tlb(adev, vm, TLB_FLUSH_HEAVYWEIGHT, + pdd->dev->xcc_mask); } static inline bool kfd_flush_tlb_after_unmap(struct kfd_dev *dev) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c index 38085a0a0f58..35ec67d9739b 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c @@ -1424,7 +1424,7 @@ svm_range_unmap_from_gpus(struct svm_range *prange, unsigned long start, if (r) break; } - kfd_flush_tlb(pdd, TLB_FLUSH_HEAVYWEIGHT); + kfd_flush_tlb(pdd); } return r; @@ -1571,7 +1571,7 @@ svm_range_map_to_gpus(struct svm_range *prange, unsigned long offset, } } - kfd_flush_tlb(pdd, TLB_FLUSH_LEGACY); + kfd_flush_tlb(pdd); } return r; From 7bbfb2559bcec39d1a4e1182d931a2046112c352 Mon Sep 17 00:00:00 2001 From: "John B. Moore" Date: Tue, 28 Apr 2026 11:35:12 -0500 Subject: [PATCH 22/37] drm/amdgpu/gfx9: drop unnecessary 64-bit fence flag check in KIQ MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Remove the BUG_ON(flags & AMDGPU_FENCE_FLAG_64BIT) assertion from gfx_v9_0_ring_emit_fence_kiq(). The KIQ hardware supports 64-bit fence writes; the 32-bit writeback address constraint is an upper-layer convention, not a hardware limitation. The check serves no purpose and should not be present. Found by code inspection while investigating related BUG_ON assertions in the GFX and compute ring emission paths. Reviewed-by: Christian König Signed-off-by: John B. Moore Signed-off-by: Alex Deucher (cherry picked from commit 1b1101a46a426bb4328116bb5273c326a2780389) Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index 95be105671ec..86c7c2a429b7 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -5660,9 +5660,6 @@ static void gfx_v9_0_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr, { struct amdgpu_device *adev = ring->adev; - /* we only allocate 32bit for each seq wb address */ - BUG_ON(flags & AMDGPU_FENCE_FLAG_64BIT); - /* write fence seq to the "addr" */ amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | From 2a561b361b7681509710f3cfc3d95d54c87ac69f Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Mon, 27 Apr 2026 11:38:58 -0400 Subject: [PATCH 23/37] drm/amdgpu/pm: add missing revision check for CI MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The ci_populate_all_memory_levels() workaround only applies to revision 0 SKUs. Link: https://gitlab.freedesktop.org/drm/amd/-/work_items/1816 Fixes: 9f4b35411cfe ("drm/amd/powerplay: add CI asics support to smumgr (v3)") Reviewed-by: Timur Kristóf Reviewed-by: Kent Russell Signed-off-by: Alex Deucher (cherry picked from commit 1db15ba8f72f400bbad8ae0ce24fafc43429d4bd) Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/pm/powerplay/smumgr/ci_smumgr.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/pm/powerplay/smumgr/ci_smumgr.c b/drivers/gpu/drm/amd/pm/powerplay/smumgr/ci_smumgr.c index 731355bdb9bc..0a3a0722b5c9 100644 --- a/drivers/gpu/drm/amd/pm/powerplay/smumgr/ci_smumgr.c +++ b/drivers/gpu/drm/amd/pm/powerplay/smumgr/ci_smumgr.c @@ -1333,8 +1333,9 @@ static int ci_populate_all_memory_levels(struct pp_hwmgr *hwmgr) dev_id = adev->pdev->device; - if ((dpm_table->mclk_table.count >= 2) - && ((dev_id == 0x67B0) || (dev_id == 0x67B1))) { + if ((dpm_table->mclk_table.count >= 2) && + ((dev_id == 0x67B0) || (dev_id == 0x67B1)) && + (adev->pdev->revision == 0)) { smu_data->smc_state_table.MemoryLevel[1].MinVddci = smu_data->smc_state_table.MemoryLevel[0].MinVddci; smu_data->smc_state_table.MemoryLevel[1].MinMvdd = From 1987c79b4fe5789dfa14423e78b5c25f6acf3e9d Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Tue, 28 Apr 2026 10:42:49 -0400 Subject: [PATCH 24/37] drm/amdgpu/pm: align Hawaii mclk workaround with radeon MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Align the hawaii mclk workaround with radeon and windows. Link: https://gitlab.freedesktop.org/drm/amd/-/work_items/1816 Fixes: 9f4b35411cfe ("drm/amd/powerplay: add CI asics support to smumgr (v3)") Reviewed-by: Timur Kristóf Reviewed-by: Kent Russell Signed-off-by: Alex Deucher (cherry picked from commit 9649528b637f668c5af9f2b83ca4ad8576ae2121) Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/pm/powerplay/smumgr/ci_smumgr.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/pm/powerplay/smumgr/ci_smumgr.c b/drivers/gpu/drm/amd/pm/powerplay/smumgr/ci_smumgr.c index 0a3a0722b5c9..3650e7beeb67 100644 --- a/drivers/gpu/drm/amd/pm/powerplay/smumgr/ci_smumgr.c +++ b/drivers/gpu/drm/amd/pm/powerplay/smumgr/ci_smumgr.c @@ -1336,10 +1336,10 @@ static int ci_populate_all_memory_levels(struct pp_hwmgr *hwmgr) if ((dpm_table->mclk_table.count >= 2) && ((dev_id == 0x67B0) || (dev_id == 0x67B1)) && (adev->pdev->revision == 0)) { - smu_data->smc_state_table.MemoryLevel[1].MinVddci = - smu_data->smc_state_table.MemoryLevel[0].MinVddci; - smu_data->smc_state_table.MemoryLevel[1].MinMvdd = - smu_data->smc_state_table.MemoryLevel[0].MinMvdd; + smu_data->smc_state_table.MemoryLevel[1].MinVddc = + smu_data->smc_state_table.MemoryLevel[0].MinVddc; + smu_data->smc_state_table.MemoryLevel[1].MinVddcPhases = + smu_data->smc_state_table.MemoryLevel[0].MinVddcPhases; } smu_data->smc_state_table.MemoryLevel[0].ActivityLevel = 0x1F; CONVERT_FROM_HOST_TO_SMC_US(smu_data->smc_state_table.MemoryLevel[0].ActivityLevel); From 17223816498f7b117d138d18eb0eba63604dc74e Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Mon, 27 Apr 2026 11:40:25 -0400 Subject: [PATCH 25/37] drm/radeon: add missing revision check for CI MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The memory level workarounds only apply to revision 0 SKUs. Link: https://gitlab.freedesktop.org/drm/amd/-/work_items/1816 Fixes: 127e056e2a82 ("drm/radeon: fix mclk vddc configuration for cards for hawaii") Fixes: 21b8a369046f ("drm/radeon: fix dram timing for certain hawaii boards") Fixes: 90b2fee35cb9 ("drm/radeon: fix dpm mc init for certain hawaii boards") Reviewed-by: Timur Kristóf Reviewed-by: Kent Russell Signed-off-by: Alex Deucher (cherry picked from commit 4d8dcc14311515077062b5740f39f427075de5c9) Cc: stable@vger.kernel.org --- drivers/gpu/drm/radeon/ci_dpm.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/radeon/ci_dpm.c b/drivers/gpu/drm/radeon/ci_dpm.c index 22321eb95b7d..703848fac189 100644 --- a/drivers/gpu/drm/radeon/ci_dpm.c +++ b/drivers/gpu/drm/radeon/ci_dpm.c @@ -2461,7 +2461,8 @@ static void ci_register_patching_mc_arb(struct radeon_device *rdev, if (patch && ((rdev->pdev->device == 0x67B0) || - (rdev->pdev->device == 0x67B1))) { + (rdev->pdev->device == 0x67B1)) && + (rdev->pdev->revision == 0)) { if ((memory_clock > 100000) && (memory_clock <= 125000)) { tmp2 = (((0x31 * engine_clock) / 125000) - 1) & 0xff; *dram_timimg2 &= ~0x00ff0000; @@ -3304,7 +3305,8 @@ static int ci_populate_all_memory_levels(struct radeon_device *rdev) pi->smc_state_table.MemoryLevel[0].EnabledForActivity = 1; if ((dpm_table->mclk_table.count >= 2) && - ((rdev->pdev->device == 0x67B0) || (rdev->pdev->device == 0x67B1))) { + ((rdev->pdev->device == 0x67B0) || (rdev->pdev->device == 0x67B1)) && + (rdev->pdev->revision == 0)) { pi->smc_state_table.MemoryLevel[1].MinVddc = pi->smc_state_table.MemoryLevel[0].MinVddc; pi->smc_state_table.MemoryLevel[1].MinVddcPhases = @@ -4493,7 +4495,8 @@ static int ci_register_patching_mc_seq(struct radeon_device *rdev, if (patch && ((rdev->pdev->device == 0x67B0) || - (rdev->pdev->device == 0x67B1))) { + (rdev->pdev->device == 0x67B1)) && + (rdev->pdev->revision == 0)) { for (i = 0; i < table->last; i++) { if (table->last >= SMU7_DISCRETE_MC_REGISTER_ARRAY_SIZE) return -EINVAL; From 78d2e624fa073c14970aa097adcf3ea31c157a66 Mon Sep 17 00:00:00 2001 From: "John B. Moore" Date: Mon, 27 Apr 2026 16:06:28 -0500 Subject: [PATCH 26/37] drm/amdgpu/sdma4: replace BUG_ON with WARN_ON in fence emission MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit sdma_v4_0_ring_emit_fence() contains two BUG_ON(addr & 0x3) assertions that verify fence writeback addresses are dword-aligned. These assertions can be reached from unprivileged userspace via crafted DRM_IOCTL_AMDGPU_CS submissions, causing a fatal kernel panic in a scheduler worker thread. Replace both BUG_ON() calls with WARN_ON() to log the condition without crashing the kernel. A misaligned fence address at this point indicates a driver bug, but crashing the kernel is never the correct response when the assertion is reachable from userspace. The CS IOCTL path is the correct place to filter invalid submissions; the ring emission callback is too late to do anything about it. Fixes: 2130f89ced2c ("drm/amdgpu: add SDMA v4.0 implementation (v2)") Reviewed-by: Christian König Signed-off-by: John B. Moore Signed-off-by: Alex Deucher (cherry picked from commit b90250bd933afd1ba94d86d6b13821997b22b18e) Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c index 44f0f23e1148..e64f2f6df9a9 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c @@ -889,7 +889,7 @@ static void sdma_v4_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 se /* write the fence */ amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_FENCE)); /* zero in first two bits */ - BUG_ON(addr & 0x3); + WARN_ON(addr & 0x3); amdgpu_ring_write(ring, lower_32_bits(addr)); amdgpu_ring_write(ring, upper_32_bits(addr)); amdgpu_ring_write(ring, lower_32_bits(seq)); @@ -899,7 +899,7 @@ static void sdma_v4_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 se addr += 4; amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_FENCE)); /* zero in first two bits */ - BUG_ON(addr & 0x3); + WARN_ON(addr & 0x3); amdgpu_ring_write(ring, lower_32_bits(addr)); amdgpu_ring_write(ring, upper_32_bits(addr)); amdgpu_ring_write(ring, upper_32_bits(seq)); From e6c2e6c2e1fa066968a16aca1cb66cd1bdde7741 Mon Sep 17 00:00:00 2001 From: Philip Yang Date: Mon, 27 Apr 2026 09:30:23 -0400 Subject: [PATCH 27/37] drm/amdgpu: zero-initialize GART table on allocation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit GART TLB is flushed after unmapping but not after mapping. Since amdgpu_bo_create_kernel() does not zero-initialize the buffer, when a single PTE is written the TLB may speculatively load other uninitialized entries from the same cacheline. Those garbage entries can appear valid, and a subsequent write to another PTE in the same cacheline may cause the GPU to use a stale garbage PTE from the TLB. Fix this by calling memset_io() to zero-initialize the GART table with gart_pte_flags immediately after allocation. Using AMDGPU_GEM_CREATE_VRAM_CLEARED, SDMA-based clear will not work since SDMA needs GART to be initialized to work. Suggested-by: Felix Kuehling Signed-off-by: Philip Yang Reviewed-by: Christian König Signed-off-by: Alex Deucher (cherry picked from commit d9af8263b82b6eaa60c5718e0c6631c5037e4b24) Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c index bc772ca3dab7..b6f849d51c2e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c @@ -262,12 +262,19 @@ void amdgpu_gart_table_ram_free(struct amdgpu_device *adev) */ int amdgpu_gart_table_vram_alloc(struct amdgpu_device *adev) { + int r; + if (adev->gart.bo != NULL) return 0; - return amdgpu_bo_create_kernel(adev, adev->gart.table_size, PAGE_SIZE, - AMDGPU_GEM_DOMAIN_VRAM, &adev->gart.bo, - NULL, (void *)&adev->gart.ptr); + r = amdgpu_bo_create_kernel(adev, adev->gart.table_size, PAGE_SIZE, + AMDGPU_GEM_DOMAIN_VRAM, &adev->gart.bo, + NULL, (void *)&adev->gart.ptr); + if (r) + return r; + + memset_io(adev->gart.ptr, adev->gart.gart_pte_flags, adev->gart.table_size); + return 0; } /** From 81665e35f143d93adef654f3be1360def9196e72 Mon Sep 17 00:00:00 2001 From: Xiaogang Chen Date: Fri, 24 Apr 2026 13:47:01 -0500 Subject: [PATCH 28/37] drm/amdkfd: Check if there are kfd porcesses using adev by kfd_processes_count During gpu hot-unplug need check if there are kfd porcesses still using the being removed gpu before clean resources of the device. Current driver checks if kfd_processes_table is empty. kfd processes are not terminated after removed from kfd_processes_table immediately. They are still alive and may access the device until kfd_process_wq work queue got ran. Check kfd->kfd_processes_count value that is updated after kfd process got uninitialized when its ref becomes zero. Fixes: 6cca686dfce7 ("drm/amdkfd: kfd driver supports hot unplug/replug amdgpu devices") Signed-off-by: Xiaogang Chen Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher (cherry picked from commit d12d05c4bc4c15585130af43e897923ff292df7b) --- drivers/gpu/drm/amd/amdkfd/kfd_device.c | 33 +------------------------ 1 file changed, 1 insertion(+), 32 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c index 8ff97bf7d95a..b7f8f7ff8198 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c @@ -1737,37 +1737,6 @@ bool kgd2kfd_vmfault_fast_path(struct amdgpu_device *adev, struct amdgpu_iv_entr return false; } -/* check if there is kfd process still uses adev */ -static bool kgd2kfd_check_device_idle(struct amdgpu_device *adev) -{ - struct kfd_process *p; - struct hlist_node *p_temp; - unsigned int temp; - struct kfd_node *dev; - - mutex_lock(&kfd_processes_mutex); - - if (hash_empty(kfd_processes_table)) { - mutex_unlock(&kfd_processes_mutex); - return true; - } - - /* check if there is device still use adev */ - hash_for_each_safe(kfd_processes_table, temp, p_temp, p, kfd_processes) { - for (int i = 0; i < p->n_pdds; i++) { - dev = p->pdds[i]->dev; - if (dev->adev == adev) { - mutex_unlock(&kfd_processes_mutex); - return false; - } - } - } - - mutex_unlock(&kfd_processes_mutex); - - return true; -} - /** kgd2kfd_teardown_processes - gracefully tear down existing * kfd processes that use adev * @@ -1800,7 +1769,7 @@ void kgd2kfd_teardown_processes(struct amdgpu_device *adev) mutex_unlock(&kfd_processes_mutex); /* wait all kfd processes use adev terminate */ - while (!kgd2kfd_check_device_idle(adev)) + while (!!atomic_read(&adev->kfd.dev->kfd_processes_count)) cond_resched(); } From 6da7b1242da4455b11c24ce667d1cab1a348c8ea Mon Sep 17 00:00:00 2001 From: Sunil Khatri Date: Mon, 4 May 2026 18:21:17 +0530 Subject: [PATCH 29/37] drm/amdgpu/userq: fix access to stale wptr mapping MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Use drm_exec to take both locks i.e vm root bo and wptr_obj bo to access the mapping data properly. This fixes the security issue of unmap the wptr_obj while a queue creation is in progress and passing other bo at same address. Signed-off-by: Sunil Khatri Reviewed-by: Christian König Signed-off-by: Alex Deucher (cherry picked from commit 1fc6c8ab45dbee096469c08c13f6099d57a52d6c) Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/mes_userqueue.c | 97 +++++++++------------- 1 file changed, 38 insertions(+), 59 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c b/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c index 2fc39a6938f6..5b4121ddc78c 100644 --- a/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c +++ b/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c @@ -30,34 +30,6 @@ #define AMDGPU_USERQ_PROC_CTX_SZ PAGE_SIZE #define AMDGPU_USERQ_GANG_CTX_SZ PAGE_SIZE -static int -mes_userq_map_gtt_bo_to_gart(struct amdgpu_bo *bo) -{ - int ret; - - ret = amdgpu_bo_reserve(bo, true); - if (ret) { - DRM_ERROR("Failed to reserve bo. ret %d\n", ret); - goto err_reserve_bo_failed; - } - - ret = amdgpu_ttm_alloc_gart(&bo->tbo); - if (ret) { - DRM_ERROR("Failed to bind bo to GART. ret %d\n", ret); - goto err_map_bo_gart_failed; - } - - amdgpu_bo_unreserve(bo); - bo = amdgpu_bo_ref(bo); - - return 0; - -err_map_bo_gart_failed: - amdgpu_bo_unreserve(bo); -err_reserve_bo_failed: - return ret; -} - static int mes_userq_create_wptr_mapping(struct amdgpu_device *adev, struct amdgpu_userq_mgr *uq_mgr, @@ -65,55 +37,62 @@ mes_userq_create_wptr_mapping(struct amdgpu_device *adev, uint64_t wptr) { struct amdgpu_bo_va_mapping *wptr_mapping; - struct amdgpu_vm *wptr_vm; struct amdgpu_userq_obj *wptr_obj = &queue->wptr_obj; + struct amdgpu_bo *obj; + struct amdgpu_vm *vm = queue->vm; + struct drm_exec exec; int ret; - wptr_vm = queue->vm; - ret = amdgpu_bo_reserve(wptr_vm->root.bo, false); - if (ret) - return ret; - wptr &= AMDGPU_GMC_HOLE_MASK; - wptr_mapping = amdgpu_vm_bo_lookup_mapping(wptr_vm, wptr >> PAGE_SHIFT); - amdgpu_bo_unreserve(wptr_vm->root.bo); - if (!wptr_mapping) { - DRM_ERROR("Failed to lookup wptr bo\n"); - return -EINVAL; + + drm_exec_init(&exec, DRM_EXEC_IGNORE_DUPLICATES, 2); + drm_exec_until_all_locked(&exec) { + ret = amdgpu_vm_lock_pd(vm, &exec, 1); + drm_exec_retry_on_contention(&exec); + if (unlikely(ret)) + goto fail_lock; + + wptr_mapping = amdgpu_vm_bo_lookup_mapping(vm, wptr >> PAGE_SHIFT); + if (!wptr_mapping) { + ret = -EINVAL; + goto fail_lock; + } + + obj = wptr_mapping->bo_va->base.bo; + ret = drm_exec_lock_obj(&exec, &obj->tbo.base); + drm_exec_retry_on_contention(&exec); + if (unlikely(ret)) + goto fail_lock; } - wptr_obj->obj = wptr_mapping->bo_va->base.bo; + wptr_obj->obj = amdgpu_bo_ref(wptr_mapping->bo_va->base.bo); if (wptr_obj->obj->tbo.base.size > PAGE_SIZE) { - DRM_ERROR("Requested GART mapping for wptr bo larger than one page\n"); - return -EINVAL; - } - - ret = mes_userq_map_gtt_bo_to_gart(wptr_obj->obj); - if (ret) { - DRM_ERROR("Failed to map wptr bo to GART\n"); - return ret; - } - - ret = amdgpu_bo_reserve(wptr_obj->obj, true); - if (ret) { - DRM_ERROR("Failed to reserve wptr bo\n"); - return ret; + ret = -EINVAL; + goto fail_map; } /* TODO use eviction fence instead of pinning. */ ret = amdgpu_bo_pin(wptr_obj->obj, AMDGPU_GEM_DOMAIN_GTT); if (ret) { - drm_file_err(uq_mgr->file, "[Usermode queues] Failed to pin wptr bo\n"); - goto unresv_bo; + DRM_ERROR("Failed to pin wptr bo. ret %d\n", ret); + goto fail_map; + } + + ret = amdgpu_ttm_alloc_gart(&wptr_obj->obj->tbo); + if (ret) { + DRM_ERROR("Failed to bind bo to GART. ret %d\n", ret); + goto fail_map; } queue->wptr_obj.gpu_addr = amdgpu_bo_gpu_offset(wptr_obj->obj); - amdgpu_bo_unreserve(wptr_obj->obj); + drm_exec_fini(&exec); return 0; -unresv_bo: - amdgpu_bo_unreserve(wptr_obj->obj); +fail_map: + amdgpu_bo_unref(&wptr_obj->obj); +fail_lock: + drm_exec_fini(&exec); return ret; } From 4e02e0afa95f691dc7cc17538cdd648089a843f0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Mon, 13 Oct 2025 15:26:02 +0200 Subject: [PATCH 30/37] drm/amdgpu: nuke amdgpu_userq_fence_slab v2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit As preparation for independent fences remove the extra slab, kmalloc should do just fine. v2: use GFP_KERNEL instead of GFP_ATOMIC Signed-off-by: Christian König Reviewed-by: Prike Liang Reviewed-by: Sunil Khatri Signed-off-by: Alex Deucher (cherry picked from commit 0d831487b5be0ae59cac865a0aa87b0acc3dc717) --- drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 13 ++------- .../gpu/drm/amd/amdgpu/amdgpu_userq_fence.c | 28 +++---------------- .../gpu/drm/amd/amdgpu/amdgpu_userq_fence.h | 3 -- 3 files changed, 7 insertions(+), 37 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index 46aae3fad4bf..60debd543e44 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -3149,11 +3149,7 @@ static int __init amdgpu_init(void) r = amdgpu_sync_init(); if (r) - goto error_sync; - - r = amdgpu_userq_fence_slab_init(); - if (r) - goto error_fence; + return r; amdgpu_register_atpx_handler(); amdgpu_acpi_detect(); @@ -3161,7 +3157,7 @@ static int __init amdgpu_init(void) /* Ignore KFD init failures when CONFIG_HSA_AMD is not set. */ r = amdgpu_amdkfd_init(); if (r && r != -ENOENT) - goto error_fence; + goto error_fini_sync; if (amdgpu_pp_feature_mask & PP_OVERDRIVE_MASK) { add_taint(TAINT_CPU_OUT_OF_SPEC, LOCKDEP_STILL_OK); @@ -3172,10 +3168,8 @@ static int __init amdgpu_init(void) /* let modprobe override vga console setting */ return pci_register_driver(&amdgpu_kms_pci_driver); -error_fence: +error_fini_sync: amdgpu_sync_fini(); - -error_sync: return r; } @@ -3186,7 +3180,6 @@ static void __exit amdgpu_exit(void) amdgpu_unregister_atpx_handler(); amdgpu_acpi_release(); amdgpu_sync_fini(); - amdgpu_userq_fence_slab_fini(); mmu_notifier_synchronize(); amdgpu_xcp_drv_release(); } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c index da39ac862f37..e2d5f04296e1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c @@ -32,29 +32,9 @@ #include "amdgpu.h" #include "amdgpu_userq_fence.h" -static const struct dma_fence_ops amdgpu_userq_fence_ops; -static struct kmem_cache *amdgpu_userq_fence_slab; - #define AMDGPU_USERQ_MAX_HANDLES (1U << 16) -int amdgpu_userq_fence_slab_init(void) -{ - amdgpu_userq_fence_slab = kmem_cache_create("amdgpu_userq_fence", - sizeof(struct amdgpu_userq_fence), - 0, - SLAB_HWCACHE_ALIGN, - NULL); - if (!amdgpu_userq_fence_slab) - return -ENOMEM; - - return 0; -} - -void amdgpu_userq_fence_slab_fini(void) -{ - rcu_barrier(); - kmem_cache_destroy(amdgpu_userq_fence_slab); -} +static const struct dma_fence_ops amdgpu_userq_fence_ops; static inline struct amdgpu_userq_fence *to_amdgpu_userq_fence(struct dma_fence *f) { @@ -231,7 +211,7 @@ void amdgpu_userq_fence_driver_put(struct amdgpu_userq_fence_driver *fence_drv) static int amdgpu_userq_fence_alloc(struct amdgpu_userq_fence **userq_fence) { - *userq_fence = kmem_cache_alloc(amdgpu_userq_fence_slab, GFP_ATOMIC); + *userq_fence = kmalloc(sizeof(**userq_fence), GFP_KERNEL); return *userq_fence ? 0 : -ENOMEM; } @@ -342,7 +322,7 @@ static void amdgpu_userq_fence_free(struct rcu_head *rcu) amdgpu_userq_fence_driver_put(fence_drv); kvfree(userq_fence->fence_drv_array); - kmem_cache_free(amdgpu_userq_fence_slab, userq_fence); + kfree(userq_fence); } static void amdgpu_userq_fence_release(struct dma_fence *f) @@ -545,7 +525,7 @@ int amdgpu_userq_signal_ioctl(struct drm_device *dev, void *data, r = amdgpu_userq_fence_create(queue, userq_fence, wptr, &fence); if (r) { mutex_unlock(&userq_mgr->userq_mutex); - kmem_cache_free(amdgpu_userq_fence_slab, userq_fence); + kfree(userq_fence); goto put_gobj_write; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.h index d56246ad8c26..d355a0eecc07 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.h @@ -58,9 +58,6 @@ struct amdgpu_userq_fence_driver { char timeline_name[TASK_COMM_LEN]; }; -int amdgpu_userq_fence_slab_init(void); -void amdgpu_userq_fence_slab_fini(void); - void amdgpu_userq_fence_driver_get(struct amdgpu_userq_fence_driver *fence_drv); void amdgpu_userq_fence_driver_put(struct amdgpu_userq_fence_driver *fence_drv); int amdgpu_userq_fence_driver_alloc(struct amdgpu_device *adev, From 26f6654a9a60eb4d241f42a0ec85412e8821480b Mon Sep 17 00:00:00 2001 From: Osama Abdelkader Date: Thu, 23 Apr 2026 22:06:20 +0200 Subject: [PATCH 31/37] drm/exynos: remove bridge when component_add fails MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Use devm_drm_bridge_add() so the bridge is released if probe fails after registration, and drop the manual drm_bridge_remove() in remove(). Check the return value of devm_drm_bridge_add(). Signed-off-by: Osama Abdelkader Fixes: 576d72fbfb45 ("drm/exynos: mic: add a bridge at probe") Cc: stable@vger.kernel.org Reviewed-by: Raphaël Gallais-Pou Reviewed-by: Luca Ceresoli Link: https://patch.msgid.link/20260423200622.325076-2-osama.abdelkader@gmail.com Signed-off-by: Luca Ceresoli --- drivers/gpu/drm/exynos/exynos_drm_mic.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/exynos/exynos_drm_mic.c b/drivers/gpu/drm/exynos/exynos_drm_mic.c index 29a8366513fa..e68c954ec3e6 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_mic.c +++ b/drivers/gpu/drm/exynos/exynos_drm_mic.c @@ -423,7 +423,9 @@ static int exynos_mic_probe(struct platform_device *pdev) mic->bridge.of_node = dev->of_node; - drm_bridge_add(&mic->bridge); + ret = devm_drm_bridge_add(dev, &mic->bridge); + if (ret) + goto err; pm_runtime_enable(dev); @@ -443,12 +445,8 @@ static int exynos_mic_probe(struct platform_device *pdev) static void exynos_mic_remove(struct platform_device *pdev) { - struct exynos_mic *mic = platform_get_drvdata(pdev); - component_del(&pdev->dev, &exynos_mic_component_ops); pm_runtime_disable(&pdev->dev); - - drm_bridge_remove(&mic->bridge); } static const struct of_device_id exynos_mic_of_match[] = { From 60a1e131a811b68703da58fd805ab359b704ab03 Mon Sep 17 00:00:00 2001 From: Gustavo Sousa Date: Thu, 16 Apr 2026 15:17:19 -0300 Subject: [PATCH 32/37] drm/xe/hdcp: Add NULL check for media_gt in intel_hdcp_gsc_check_status() When media GT is disabled via configfs, there is no allocation for media_gt, which is kept as NULL. In such scenario, intel_hdcp_gsc_check_status() results in a kernel pagefault error due to >->uc.gsc being evaluated as an invalid memory address. Fix that by introducing a NULL check on media_gt and bailing out early if so. While at it, also drop the NULL check for gsc, since it can't be NULL if media_gt is not NULL. v2: - Get address for gsc only after checking that gt is not NULL. (Shuicheng) - Drop the NULL check for gsc. (Shuicheng) v3: - Add "Fixes" and "Cc: " tags. (Matt) Fixes: 4af50beb4e0f ("drm/xe: Use gsc_proxy_init_done to check proxy status") Cc: # v6.10+ Reviewed-by: Matt Roper Reviewed-by: Shuicheng Lin Link: https://patch.msgid.link/20260416-check-for-null-media_gt-in-intel_hdcp_gsc_check_status-v2-1-9adb9fd3b621@intel.com Signed-off-by: Gustavo Sousa (cherry picked from commit bfaf87e84ca3ca3f6e275f9ae56da47a8b55ffd1) Signed-off-by: Matthew Brost --- drivers/gpu/drm/xe/display/xe_hdcp_gsc.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/xe/display/xe_hdcp_gsc.c b/drivers/gpu/drm/xe/display/xe_hdcp_gsc.c index 29c72aa4b0d2..33494b86205d 100644 --- a/drivers/gpu/drm/xe/display/xe_hdcp_gsc.c +++ b/drivers/gpu/drm/xe/display/xe_hdcp_gsc.c @@ -37,9 +37,17 @@ static bool intel_hdcp_gsc_check_status(struct drm_device *drm) struct xe_device *xe = to_xe_device(drm); struct xe_tile *tile = xe_device_get_root_tile(xe); struct xe_gt *gt = tile->media_gt; - struct xe_gsc *gsc = >->uc.gsc; + struct xe_gsc *gsc; - if (!gsc || !xe_uc_fw_is_available(&gsc->fw)) { + if (!gt) { + drm_dbg_kms(&xe->drm, + "not checking GSC status for HDCP2.x: media GT not present or disabled\n"); + return false; + } + + gsc = >->uc.gsc; + + if (!xe_uc_fw_is_available(&gsc->fw)) { drm_dbg_kms(&xe->drm, "GSC Components not ready for HDCP2.x\n"); return false; From d01012c740bbb298b957e30cc0848e482c6f486f Mon Sep 17 00:00:00 2001 From: Shuicheng Lin Date: Tue, 28 Apr 2026 20:14:48 +0000 Subject: [PATCH 33/37] drm/xe/pf: Fix EAGAIN sign in pf_migration_consume() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit PTR_ERR() returns a negative value, so comparing against the positive EAGAIN is always true for ERR_PTR(-EAGAIN), causing pf_migration_consume() to bail out instead of continuing to the remaining GTs. On multi-GT platforms this can skip GTs that already have data ready. Compare against -EAGAIN to match the intent (and the following line that correctly uses -EAGAIN). While at it, gate PTR_ERR() with IS_ERR(). v2: add IS_ERR() guard before PTR_ERR(). (Gustavo) Fixes: 67df4a5cbc58 ("drm/xe/pf: Add data structures and handlers for migration rings") Cc: Michał Winiarski Reviewed-by: Gustavo Sousa Link: https://patch.msgid.link/20260428201448.3999428-1-shuicheng.lin@intel.com Signed-off-by: Shuicheng Lin (cherry picked from commit 9d770e72e1edb54beacfce5f402edb51632811e3) Signed-off-by: Matthew Brost --- drivers/gpu/drm/xe/xe_sriov_pf_migration.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_sriov_pf_migration.c b/drivers/gpu/drm/xe/xe_sriov_pf_migration.c index 6c4b16409cc9..150a241110fb 100644 --- a/drivers/gpu/drm/xe/xe_sriov_pf_migration.c +++ b/drivers/gpu/drm/xe/xe_sriov_pf_migration.c @@ -149,10 +149,11 @@ pf_migration_consume(struct xe_device *xe, unsigned int vfid) for_each_gt(gt, xe, gt_id) { data = xe_gt_sriov_pf_migration_save_consume(gt, vfid); - if (data && PTR_ERR(data) != EAGAIN) + if (!data) + continue; + if (!IS_ERR(data) || PTR_ERR(data) != -EAGAIN) return data; - if (PTR_ERR(data) == -EAGAIN) - more_data = true; + more_data = true; } if (!more_data) From b87951a0ae9f95ca6590bf0939edced7d36929dd Mon Sep 17 00:00:00 2001 From: Shuicheng Lin Date: Wed, 29 Apr 2026 19:22:59 +0000 Subject: [PATCH 34/37] drm/xe/pf: Fix MMIO access using PF view instead of VF view during migration MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit pf_migration_mmio_save() and pf_migration_mmio_restore() initialize a local VF-specific MMIO view via xe_mmio_init_vf_view() but then pass >->mmio (the PF base) to all xe_mmio_read32()/xe_mmio_write32() calls instead of the local &mmio. This causes the PF own SW flag registers to be saved/restored rather than the target VF registers, silently corrupting migration state. Use the VF MMIO view for all register accesses, matching the correct pattern used in pf_clear_vf_scratch_regs(). Fixes: b7c1b990f719 ("drm/xe/pf: Handle MMIO migration data as part of PF control") Cc: Michał Winiarski Assisted-by: Claude:claude-opus-4.6 Reviewed-by: Michal Wajdeczko Reviewed-by: Stuart Summers Link: https://patch.msgid.link/20260429192259.4009211-1-shuicheng.lin@intel.com Signed-off-by: Shuicheng Lin (cherry picked from commit 7d9c39cfb31ff389490ca1308767c2807a9829a6) Signed-off-by: Matthew Brost --- drivers/gpu/drm/xe/xe_gt_sriov_pf_migration.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_migration.c b/drivers/gpu/drm/xe/xe_gt_sriov_pf_migration.c index 87a164efcc33..01fe03b9efe8 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_migration.c +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_migration.c @@ -385,10 +385,10 @@ static int pf_migration_mmio_save(struct xe_gt *gt, unsigned int vfid, void *buf if (xe_gt_is_media_type(gt)) for (n = 0; n < MED_VF_SW_FLAG_COUNT; n++) - regs[n] = xe_mmio_read32(>->mmio, MED_VF_SW_FLAG(n)); + regs[n] = xe_mmio_read32(&mmio, MED_VF_SW_FLAG(n)); else for (n = 0; n < VF_SW_FLAG_COUNT; n++) - regs[n] = xe_mmio_read32(>->mmio, VF_SW_FLAG(n)); + regs[n] = xe_mmio_read32(&mmio, VF_SW_FLAG(n)); return 0; } @@ -407,10 +407,10 @@ static int pf_migration_mmio_restore(struct xe_gt *gt, unsigned int vfid, if (xe_gt_is_media_type(gt)) for (n = 0; n < MED_VF_SW_FLAG_COUNT; n++) - xe_mmio_write32(>->mmio, MED_VF_SW_FLAG(n), regs[n]); + xe_mmio_write32(&mmio, MED_VF_SW_FLAG(n), regs[n]); else for (n = 0; n < VF_SW_FLAG_COUNT; n++) - xe_mmio_write32(>->mmio, VF_SW_FLAG(n), regs[n]); + xe_mmio_write32(&mmio, VF_SW_FLAG(n), regs[n]); return 0; } From b29987dfd943e655df6e3b641ecffad5cc1509c2 Mon Sep 17 00:00:00 2001 From: Satyanarayana K V P Date: Mon, 4 May 2026 09:49:26 +0000 Subject: [PATCH 35/37] drm/xe/guc: Exclude indirect ring state page from ADS engine state size The engine state size reported to GuC via ADS should only include the engine state portion and should not include the indirect ring state page that comes after it in the context image. The GuC uses this size to overwrite the engine state in the LRC on watchdog resets and we don't want it to overwrite the indirect ring state as well. Fixes: d6219e1cd5e3 ("drm/xe: Add Indirect Ring State support") Suggested-by: Daniele Ceraolo Spurio Signed-off-by: Satyanarayana K V P Cc: Michal Wajdeczko Cc: Matthew Brost Reviewed-by: Matthew Brost Reviewed-by: Daniele Ceraolo Spurio Signed-off-by: Daniele Ceraolo Spurio Link: https://patch.msgid.link/20260504094924.3760713-4-satyanarayana.k.v.p@intel.com (cherry picked from commit 3ec5f003f6c377beda8bd5438941f5a7795e1848) Signed-off-by: Matthew Brost --- drivers/gpu/drm/xe/xe_guc_ads.c | 5 +---- drivers/gpu/drm/xe/xe_lrc.c | 11 +++++++++-- drivers/gpu/drm/xe/xe_lrc.h | 2 +- 3 files changed, 11 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_guc_ads.c b/drivers/gpu/drm/xe/xe_guc_ads.c index 81b5f01b1f65..2b835d48b565 100644 --- a/drivers/gpu/drm/xe/xe_guc_ads.c +++ b/drivers/gpu/drm/xe/xe_guc_ads.c @@ -512,12 +512,9 @@ static void guc_golden_lrc_init(struct xe_guc_ads *ads) * that starts after the execlists LRC registers. This is * required to allow the GuC to restore just the engine state * when a watchdog reset occurs. - * We calculate the engine state size by removing the size of - * what comes before it in the context image (which is identical - * on all engines). */ ads_blob_write(ads, ads.eng_state_size[guc_class], - real_size - xe_lrc_skip_size(xe)); + xe_lrc_engine_state_size(gt, class)); ads_blob_write(ads, ads.golden_context_lrca[guc_class], addr_ggtt); diff --git a/drivers/gpu/drm/xe/xe_lrc.c b/drivers/gpu/drm/xe/xe_lrc.c index c725cde4508d..4af9f0d7c6f3 100644 --- a/drivers/gpu/drm/xe/xe_lrc.c +++ b/drivers/gpu/drm/xe/xe_lrc.c @@ -746,9 +746,16 @@ size_t xe_lrc_reg_size(struct xe_device *xe) return 80 * sizeof(u32); } -size_t xe_lrc_skip_size(struct xe_device *xe) +/** + * xe_lrc_engine_state_size() - Get size of the engine state within LRC + * @gt: the &xe_gt struct instance + * @class: Hardware engine class + * + * Returns: Size of the engine state + */ +size_t xe_lrc_engine_state_size(struct xe_gt *gt, enum xe_engine_class class) { - return LRC_PPHWSP_SIZE + xe_lrc_reg_size(xe); + return xe_gt_lrc_hang_replay_size(gt, class) - xe_lrc_reg_size(gt_to_xe(gt)); } static inline u32 __xe_lrc_seqno_offset(struct xe_lrc *lrc) diff --git a/drivers/gpu/drm/xe/xe_lrc.h b/drivers/gpu/drm/xe/xe_lrc.h index e7c975f9e2d9..5440663183f6 100644 --- a/drivers/gpu/drm/xe/xe_lrc.h +++ b/drivers/gpu/drm/xe/xe_lrc.h @@ -130,7 +130,7 @@ u32 xe_lrc_parallel_ggtt_addr(struct xe_lrc *lrc); struct iosys_map xe_lrc_parallel_map(struct xe_lrc *lrc); size_t xe_lrc_reg_size(struct xe_device *xe); -size_t xe_lrc_skip_size(struct xe_device *xe); +size_t xe_lrc_engine_state_size(struct xe_gt *gt, enum xe_engine_class class); void xe_lrc_dump_default(struct drm_printer *p, struct xe_gt *gt, From b15838b03cd0c6cf35651cfde62d17f14bb1d566 Mon Sep 17 00:00:00 2001 From: Myeonghun Pak Date: Fri, 24 Apr 2026 21:34:28 +0900 Subject: [PATCH 36/37] drm/bochs: Drop manual put on probe error path bochs_pci_probe() allocates the DRM device with devm_drm_dev_alloc(), which registers a devres action to drop the initial DRM device reference on driver detach or probe failure. The error path currently calls drm_dev_put() manually. If probe then returns an error, devres will run the registered release action and put the same device again, after the first put may already have released it. Return the probe error directly and let devres own the final put. Signed-off-by: Myeonghun Pak Fixes: 04826f588682 ("drm/bochs: Allocate DRM device in struct bochs_device") Signed-off-by: Thomas Zimmermann Reviewed-by: Thomas Zimmermann Link: https://patch.msgid.link/20260424123506.32275-1-mhun512@gmail.com --- drivers/gpu/drm/tiny/bochs.c | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/tiny/bochs.c b/drivers/gpu/drm/tiny/bochs.c index 222e4ae1abbd..5d8dc5efec77 100644 --- a/drivers/gpu/drm/tiny/bochs.c +++ b/drivers/gpu/drm/tiny/bochs.c @@ -761,25 +761,21 @@ static int bochs_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent ret = pcim_enable_device(pdev); if (ret) - goto err_free_dev; + return ret; pci_set_drvdata(pdev, dev); ret = bochs_load(bochs); if (ret) - goto err_free_dev; + return ret; ret = drm_dev_register(dev, 0); if (ret) - goto err_free_dev; + return ret; drm_client_setup(dev, NULL); return ret; - -err_free_dev: - drm_dev_put(dev); - return ret; } static void bochs_pci_remove(struct pci_dev *pdev) From 5e28b7b94408897e41c63477aabc9e1db439bc8c Mon Sep 17 00:00:00 2001 From: "Francis, David" Date: Tue, 28 Apr 2026 19:25:50 +0000 Subject: [PATCH 37/37] drm: Set old handle to NULL before prime swap in change_handle There was a potential race condition in change_handle. The ioctl briefly had a single object with two idr entries; a concurrent gem_close could delete the object and remove one of the handles while leaving the other one dangling, which could subsequently be dereferenced for a use-after-free. To fix this, do the same dance that gem_close itself does. (f6cd7daecff5 drm: Release driver references to handle before making it available again) First idr_replace the old handle to NULL. Later, if the prime operations are successful, actually close it. create_tail required a similar dance to avoid a similar problem. (bd46cece51a3 drm/gem: Fix race in drm_gem_handle_create_tail()) It idr_allocs the new handle with NULL, then swaps in the correct object later to avoid races. We don't need to do that here, since the only operations that could race are drm_prime, and change_handle holds the prime lock for the entire duration. v2: cleanups of error paths Signed-off-by: David Francis Co-authored-by: Dave Airlie Reported-by: Puttimet Thammasaeng Tested-by: Vitaly Prosyak Cc: Simona Vetter Cc: stable@vger.kernel.org Cc: Christian Koenig Fixes: 53096728b8910 ("drm: Add DRM prime interface to reassign GEM handle") Signed-off-by: Dave Airlie --- drivers/gpu/drm/drm_gem.c | 25 ++++++++++++++++++++++++- 1 file changed, 24 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/drm_gem.c b/drivers/gpu/drm/drm_gem.c index d6424267260b..51a887cc7fd7 100644 --- a/drivers/gpu/drm/drm_gem.c +++ b/drivers/gpu/drm/drm_gem.c @@ -1019,7 +1019,7 @@ int drm_gem_change_handle_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv) { struct drm_gem_change_handle *args = data; - struct drm_gem_object *obj; + struct drm_gem_object *obj, *idrobj; int handle, ret; if (!drm_core_check_feature(dev, DRIVER_GEM)) @@ -1042,8 +1042,29 @@ int drm_gem_change_handle_ioctl(struct drm_device *dev, void *data, mutex_lock(&file_priv->prime.lock); spin_lock(&file_priv->table_lock); + + /* When create_tail allocs an obj idr, it needs to first alloc as NULL, + * then later replace with the correct object. This is not necessary + * here, because the only operations that could race are drm_prime + * bookkeeping, and we hold the prime lock. + */ ret = idr_alloc(&file_priv->object_idr, obj, handle, handle + 1, GFP_NOWAIT); + + if (ret < 0) { + spin_unlock(&file_priv->table_lock); + goto out_unlock; + } + + idrobj = idr_replace(&file_priv->object_idr, NULL, handle); + if (idrobj != obj) { + idr_replace(&file_priv->object_idr, idrobj, handle); + idr_remove(&file_priv->object_idr, args->new_handle); + spin_unlock(&file_priv->table_lock); + ret = -ENOENT; + goto out_unlock; + } + spin_unlock(&file_priv->table_lock); if (ret < 0) @@ -1055,6 +1076,8 @@ int drm_gem_change_handle_ioctl(struct drm_device *dev, void *data, if (ret < 0) { spin_lock(&file_priv->table_lock); idr_remove(&file_priv->object_idr, handle); + idrobj = idr_replace(&file_priv->object_idr, obj, handle); + WARN_ON(idrobj != NULL); spin_unlock(&file_priv->table_lock); goto out_unlock; }