From a9162439ad792afcddc04718408ec1380b7a5f63 Mon Sep 17 00:00:00 2001 From: Lizhi Hou Date: Tue, 27 Jan 2026 16:23:56 -0800 Subject: [PATCH 01/12] accel/amdxdna: Hold mm structure across iommu_sva_unbind_device() Some tests trigger a crash in iommu_sva_unbind_device() due to accessing iommu_mm after the associated mm structure has been freed. Fix this by taking an explicit reference to the mm structure after successfully binding the device, and releasing it only after the device is unbound. This ensures the mm remains valid for the entire SVA bind/unbind lifetime. Fixes: be462c97b7df ("accel/amdxdna: Add hardware context") Reviewed-by: Mario Limonciello (AMD) Signed-off-by: Lizhi Hou Link: https://patch.msgid.link/20260128002356.1858122-1-lizhi.hou@amd.com --- drivers/accel/amdxdna/amdxdna_pci_drv.c | 3 +++ drivers/accel/amdxdna/amdxdna_pci_drv.h | 1 + 2 files changed, 4 insertions(+) diff --git a/drivers/accel/amdxdna/amdxdna_pci_drv.c b/drivers/accel/amdxdna/amdxdna_pci_drv.c index 45f5c12fc67f..fdefd9ec2066 100644 --- a/drivers/accel/amdxdna/amdxdna_pci_drv.c +++ b/drivers/accel/amdxdna/amdxdna_pci_drv.c @@ -82,6 +82,8 @@ static int amdxdna_drm_open(struct drm_device *ddev, struct drm_file *filp) ret = -ENODEV; goto unbind_sva; } + client->mm = current->mm; + mmgrab(client->mm); init_srcu_struct(&client->hwctx_srcu); xa_init_flags(&client->hwctx_xa, XA_FLAGS_ALLOC); mutex_init(&client->mm_lock); @@ -116,6 +118,7 @@ static void amdxdna_client_cleanup(struct amdxdna_client *client) drm_gem_object_put(to_gobj(client->dev_heap)); iommu_sva_unbind_device(client->sva); + mmdrop(client->mm); kfree(client); } diff --git a/drivers/accel/amdxdna/amdxdna_pci_drv.h b/drivers/accel/amdxdna/amdxdna_pci_drv.h index 6580cb5ec7e2..f08406b8fdf9 100644 --- a/drivers/accel/amdxdna/amdxdna_pci_drv.h +++ b/drivers/accel/amdxdna/amdxdna_pci_drv.h @@ -130,6 +130,7 @@ struct amdxdna_client { struct iommu_sva *sva; int pasid; + struct mm_struct *mm; }; #define amdxdna_for_each_hwctx(client, hwctx_id, entry) \ From f1370241fe8045702bc9d0812b996791f0500f1b Mon Sep 17 00:00:00 2001 From: Lizhi Hou Date: Thu, 29 Jan 2026 16:32:55 -0800 Subject: [PATCH 02/12] accel/amdxdna: Stop job scheduling across aie2_release_resource() Running jobs on a hardware context while it is in the process of releasing resources can lead to use-after-free and crashes. Fix this by stopping job scheduling before calling aie2_release_resource() and restarting it after the release completes. Additionally, aie2_sched_job_run() now checks whether the hardware context is still active. Fixes: 4fd6ca90fc7f ("accel/amdxdna: Refactor hardware context destroy routine") Reviewed-by: Mario Limonciello (AMD) Signed-off-by: Lizhi Hou Link: https://patch.msgid.link/20260130003255.2083255-1-lizhi.hou@amd.com --- drivers/accel/amdxdna/aie2_ctx.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/accel/amdxdna/aie2_ctx.c b/drivers/accel/amdxdna/aie2_ctx.c index 5511ab2ef242..c4a58c00e442 100644 --- a/drivers/accel/amdxdna/aie2_ctx.c +++ b/drivers/accel/amdxdna/aie2_ctx.c @@ -315,6 +315,9 @@ aie2_sched_job_run(struct drm_sched_job *sched_job) struct dma_fence *fence; int ret; + if (hwctx->status != HWCTX_STAT_READY) + return NULL; + if (!mmget_not_zero(job->mm)) return ERR_PTR(-ESRCH); @@ -705,7 +708,10 @@ void aie2_hwctx_fini(struct amdxdna_hwctx *hwctx) aie2_hwctx_wait_for_idle(hwctx); /* Request fw to destroy hwctx and cancel the rest pending requests */ + drm_sched_stop(&hwctx->priv->sched, NULL); aie2_release_resource(hwctx); + hwctx->status = HWCTX_STAT_STOP; + drm_sched_start(&hwctx->priv->sched, 0); mutex_unlock(&xdna->dev_lock); drm_sched_entity_destroy(&hwctx->priv->entity); From 84dd57fb0359500092f1101409ca32091731490d Mon Sep 17 00:00:00 2001 From: Zishun Yi Date: Fri, 30 Jan 2026 01:10:22 +0800 Subject: [PATCH 03/12] accel/amdxdna: Fix memory leak in amdxdna_ubuf_map The amdxdna_ubuf_map() function allocates memory for sg and internal sg table structures, but it fails to free them if subsequent operations (sg_alloc_table_from_pages or dma_map_sgtable) fail. Fixes: bd72d4acda10 ("accel/amdxdna: Support user space allocated buffer") Signed-off-by: Zishun Yi Reviewed-by: Lizhi Hou Reviewed-by: Min Ma Signed-off-by: Lizhi Hou Link: https://patch.msgid.link/20260129171022.68578-1-zishun.yi.dev@gmail.com --- drivers/accel/amdxdna/amdxdna_ubuf.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/accel/amdxdna/amdxdna_ubuf.c b/drivers/accel/amdxdna/amdxdna_ubuf.c index 077b2261cf2a..9e3b3b055caa 100644 --- a/drivers/accel/amdxdna/amdxdna_ubuf.c +++ b/drivers/accel/amdxdna/amdxdna_ubuf.c @@ -34,15 +34,21 @@ static struct sg_table *amdxdna_ubuf_map(struct dma_buf_attachment *attach, ret = sg_alloc_table_from_pages(sg, ubuf->pages, ubuf->nr_pages, 0, ubuf->nr_pages << PAGE_SHIFT, GFP_KERNEL); if (ret) - return ERR_PTR(ret); + goto err_free_sg; if (ubuf->flags & AMDXDNA_UBUF_FLAG_MAP_DMA) { ret = dma_map_sgtable(attach->dev, sg, direction, 0); if (ret) - return ERR_PTR(ret); + goto err_free_table; } return sg; + +err_free_table: + sg_free_table(sg); +err_free_sg: + kfree(sg); + return ERR_PTR(ret); } static void amdxdna_ubuf_unmap(struct dma_buf_attachment *attach, From 45c0a8a70253ab97e676b9791891e389a531664b Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Wed, 14 Jan 2026 13:43:31 -0700 Subject: [PATCH 04/12] drm/panel: ilitek-ili9882t: Remove duplicate initializers in tianma_il79900a_dsc Clang warns (or errors with CONFIG_WERROR=y / W=e): drivers/gpu/drm/panel/panel-ilitek-ili9882t.c:95:16: error: initializer overrides prior initialization of this subobject [-Werror,-Winitializer-overrides] 95 | .vbr_enable = 0, | ^ drivers/gpu/drm/panel/panel-ilitek-ili9882t.c:90:16: note: previous initialization is here 90 | .vbr_enable = false, | ^~~~~ drivers/gpu/drm/panel/panel-ilitek-ili9882t.c:97:19: error: initializer overrides prior initialization of this subobject [-Werror,-Winitializer-overrides] 97 | .rc_model_size = DSC_RC_MODEL_SIZE_CONST, | ^~~~~~~~~~~~~~~~~~~~~~~ include/drm/display/drm_dsc.h:22:38: note: expanded from macro 'DSC_RC_MODEL_SIZE_CONST' 22 | #define DSC_RC_MODEL_SIZE_CONST 8192 | ^~~~ drivers/gpu/drm/panel/panel-ilitek-ili9882t.c:91:19: note: previous initialization is here 91 | .rc_model_size = DSC_RC_MODEL_SIZE_CONST, | ^~~~~~~~~~~~~~~~~~~~~~~ include/drm/display/drm_dsc.h:22:38: note: expanded from macro 'DSC_RC_MODEL_SIZE_CONST' 22 | #define DSC_RC_MODEL_SIZE_CONST 8192 | ^~~~ drivers/gpu/drm/panel/panel-ilitek-ili9882t.c:132:25: error: initializer overrides prior initialization of this subobject [-Werror,-Winitializer-overrides] 132 | .initial_scale_value = 32, | ^~ drivers/gpu/drm/panel/panel-ilitek-ili9882t.c:126:25: note: previous initialization is here 126 | .initial_scale_value = 32, | ^~ drivers/gpu/drm/panel/panel-ilitek-ili9882t.c:133:20: error: initializer overrides prior initialization of this subobject [-Werror,-Winitializer-overrides] 133 | .nfl_bpg_offset = 3511, | ^~~~ drivers/gpu/drm/panel/panel-ilitek-ili9882t.c:108:20: note: previous initialization is here 108 | .nfl_bpg_offset = 1402, | ^~~~ GCC would warn about this in the same manner but its version, -Woverride-init, is disabled for a normal kernel build in scripts/Makefile.warn. For clang, -Wextra in drivers/gpu/drm/Makefile turns it back but GCC respects turning it off earlier in the command line. Of all the duplicate fields in the initializer, only nfl_bpg_offset is a different value. Clear up the duplicate initializers, keeping the 'false' value for .vbr_enable, as it is bool, and the second value for .nfl_bpg_offset, assuming it is the correct one since it was the one tested in the original change. Fixes: 65ce1f5834e9 ("drm/panel: ilitek-ili9882t: Switch Tianma TL121BVMS07 to DSC 120Hz mode") Signed-off-by: Nathan Chancellor Link: https://patch.msgid.link/20260114-panel-ilitek-ili9882t-fix-override-init-v1-1-1d69a2b096df@kernel.org Signed-off-by: Maxime Ripard --- drivers/gpu/drm/panel/panel-ilitek-ili9882t.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/gpu/drm/panel/panel-ilitek-ili9882t.c b/drivers/gpu/drm/panel/panel-ilitek-ili9882t.c index 370424ddfc80..8b2bfb7d3638 100644 --- a/drivers/gpu/drm/panel/panel-ilitek-ili9882t.c +++ b/drivers/gpu/drm/panel/panel-ilitek-ili9882t.c @@ -88,11 +88,9 @@ static const struct drm_dsc_config tianma_il79900a_dsc = { .native_422 = false, .simple_422 = false, .vbr_enable = false, - .rc_model_size = DSC_RC_MODEL_SIZE_CONST, .pic_width = 1600, .pic_height = 2560, .convert_rgb = 0, - .vbr_enable = 0, .rc_buf_thresh = {14, 28, 42, 56, 70, 84, 98, 105, 112, 119, 121, 123, 125, 126}, .rc_model_size = DSC_RC_MODEL_SIZE_CONST, .rc_edge_factor = DSC_RC_EDGE_FACTOR_CONST, @@ -105,7 +103,6 @@ static const struct drm_dsc_config tianma_il79900a_dsc = { .initial_offset = 6144, .rc_quant_incr_limit0 = 11, .rc_quant_incr_limit1 = 11, - .nfl_bpg_offset = 1402, .rc_range_params = { { 0, 4, DSC_BPG_OFFSET(2)}, { 0, 4, DSC_BPG_OFFSET(0)}, @@ -123,7 +120,6 @@ static const struct drm_dsc_config tianma_il79900a_dsc = { { 9, 12, DSC_BPG_OFFSET(-12)}, {12, 13, DSC_BPG_OFFSET(-12)}, }, - .initial_scale_value = 32, .slice_chunk_size = 800, .initial_dec_delay = 657, .final_offset = 4320, From fe6d29b082d49df336ebb92226a3c004ae9e3222 Mon Sep 17 00:00:00 2001 From: Liu Ying Date: Fri, 23 Jan 2026 17:22:17 +0800 Subject: [PATCH 05/12] drm/bridge: imx8qxp-pixel-combiner: Fix bailout for imx8qxp_pc_bridge_probe() In case the channel0 is unavailable and bailing out from free_child is needed when we fail to add a DRM bridge for the available channel1, pointer pc->ch[0] in the bailout path would be NULL and it would be dereferenced as pc->ch[0]->bridge.next_bridge. Fix this by checking pc->ch[0] before dereferencing it. Fixes: ae754f049ce1 ("drm/bridge: imx8qxp-pixel-combiner: get/put the next bridge") Fixes: 99764593528f ("drm/bridge: imx8qxp-pixel-combiner: convert to devm_drm_bridge_alloc() API") Signed-off-by: Liu Ying Reviewed-by: Luca Ceresoli Reviewed-by: Frank Li Link: https://patch.msgid.link/20260123-imx8qxp-drm-bridge-fixes-v1-3-8bb85ada5866@nxp.com Signed-off-by: Luca Ceresoli --- drivers/gpu/drm/bridge/imx/imx8qxp-pixel-combiner.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/bridge/imx/imx8qxp-pixel-combiner.c b/drivers/gpu/drm/bridge/imx/imx8qxp-pixel-combiner.c index 74eda8b54023..99a9280edb4f 100644 --- a/drivers/gpu/drm/bridge/imx/imx8qxp-pixel-combiner.c +++ b/drivers/gpu/drm/bridge/imx/imx8qxp-pixel-combiner.c @@ -348,7 +348,7 @@ static int imx8qxp_pc_bridge_probe(struct platform_device *pdev) free_child: of_node_put(child); - if (i == 1 && pc->ch[0]->bridge.next_bridge) + if (i == 1 && pc->ch[0] && pc->ch[0]->bridge.next_bridge) drm_bridge_remove(&pc->ch[0]->bridge); pm_runtime_disable(dev); From b853007fdcdd64b49601a993c2b30c28279ae15d Mon Sep 17 00:00:00 2001 From: Lizhi Hou Date: Mon, 2 Feb 2026 13:24:50 -0800 Subject: [PATCH 06/12] accel/amdxdna: Remove hardware context status One newly supported command does not require hardware context configuration to be performed upfront. As a result, checking hardware context status causes this command to fail incorrectly. Remove hardware context status handling entirely. For other commands, if userspace submits a request without configuring the hardware context first, the firmware will report an error or time out as appropriate. Fixes: aac243092b70 ("accel/amdxdna: Add command execution") Reviewed-by: Mario Limonciello (AMD) Signed-off-by: Lizhi Hou Link: https://patch.msgid.link/20260202212450.2681273-1-lizhi.hou@amd.com --- drivers/accel/amdxdna/aie2_ctx.c | 25 ++----------------------- drivers/accel/amdxdna/aie2_message.c | 3 +++ drivers/accel/amdxdna/amdxdna_ctx.h | 5 ----- 3 files changed, 5 insertions(+), 28 deletions(-) diff --git a/drivers/accel/amdxdna/aie2_ctx.c b/drivers/accel/amdxdna/aie2_ctx.c index c4a58c00e442..ad5b5cd0bc81 100644 --- a/drivers/accel/amdxdna/aie2_ctx.c +++ b/drivers/accel/amdxdna/aie2_ctx.c @@ -47,17 +47,6 @@ static void aie2_job_put(struct amdxdna_sched_job *job) kref_put(&job->refcnt, aie2_job_release); } -static void aie2_hwctx_status_shift_stop(struct amdxdna_hwctx *hwctx) -{ - hwctx->old_status = hwctx->status; - hwctx->status = HWCTX_STAT_STOP; -} - -static void aie2_hwctx_status_restore(struct amdxdna_hwctx *hwctx) -{ - hwctx->status = hwctx->old_status; -} - /* The bad_job is used in aie2_sched_job_timedout, otherwise, set it to NULL */ static void aie2_hwctx_stop(struct amdxdna_dev *xdna, struct amdxdna_hwctx *hwctx, struct drm_sched_job *bad_job) @@ -84,11 +73,6 @@ static int aie2_hwctx_restart(struct amdxdna_dev *xdna, struct amdxdna_hwctx *hw goto out; } - if (hwctx->status != HWCTX_STAT_READY) { - XDNA_DBG(xdna, "hwctx is not ready, status %d", hwctx->status); - goto out; - } - ret = aie2_config_cu(hwctx, NULL); if (ret) { XDNA_ERR(xdna, "Config cu failed, ret %d", ret); @@ -140,7 +124,6 @@ static int aie2_hwctx_suspend_cb(struct amdxdna_hwctx *hwctx, void *arg) aie2_hwctx_wait_for_idle(hwctx); aie2_hwctx_stop(xdna, hwctx, NULL); - aie2_hwctx_status_shift_stop(hwctx); return 0; } @@ -162,7 +145,6 @@ static int aie2_hwctx_resume_cb(struct amdxdna_hwctx *hwctx, void *arg) { struct amdxdna_dev *xdna = hwctx->client->xdna; - aie2_hwctx_status_restore(hwctx); return aie2_hwctx_restart(xdna, hwctx); } @@ -315,7 +297,7 @@ aie2_sched_job_run(struct drm_sched_job *sched_job) struct dma_fence *fence; int ret; - if (hwctx->status != HWCTX_STAT_READY) + if (!hwctx->priv->mbox_chann) return NULL; if (!mmget_not_zero(job->mm)) @@ -666,7 +648,6 @@ int aie2_hwctx_init(struct amdxdna_hwctx *hwctx) } amdxdna_pm_suspend_put(xdna); - hwctx->status = HWCTX_STAT_INIT; init_waitqueue_head(&priv->job_free_wq); XDNA_DBG(xdna, "hwctx %s init completed", hwctx->name); @@ -710,7 +691,6 @@ void aie2_hwctx_fini(struct amdxdna_hwctx *hwctx) /* Request fw to destroy hwctx and cancel the rest pending requests */ drm_sched_stop(&hwctx->priv->sched, NULL); aie2_release_resource(hwctx); - hwctx->status = HWCTX_STAT_STOP; drm_sched_start(&hwctx->priv->sched, 0); mutex_unlock(&xdna->dev_lock); @@ -755,7 +735,7 @@ static int aie2_hwctx_cu_config(struct amdxdna_hwctx *hwctx, void *buf, u32 size if (XDNA_MBZ_DBG(xdna, config->pad, sizeof(config->pad))) return -EINVAL; - if (hwctx->status != HWCTX_STAT_INIT) { + if (hwctx->cus) { XDNA_ERR(xdna, "Not support re-config CU"); return -EINVAL; } @@ -786,7 +766,6 @@ static int aie2_hwctx_cu_config(struct amdxdna_hwctx *hwctx, void *buf, u32 size } wmb(); /* To avoid locking in command submit when check status */ - hwctx->status = HWCTX_STAT_READY; return 0; diff --git a/drivers/accel/amdxdna/aie2_message.c b/drivers/accel/amdxdna/aie2_message.c index 99215328505e..7d7dcfeaf794 100644 --- a/drivers/accel/amdxdna/aie2_message.c +++ b/drivers/accel/amdxdna/aie2_message.c @@ -493,6 +493,9 @@ int aie2_config_cu(struct amdxdna_hwctx *hwctx, if (!chann) return -ENODEV; + if (!hwctx->cus) + return 0; + if (hwctx->cus->num_cus > MAX_NUM_CUS) { XDNA_DBG(xdna, "Exceed maximum CU %d", MAX_NUM_CUS); return -EINVAL; diff --git a/drivers/accel/amdxdna/amdxdna_ctx.h b/drivers/accel/amdxdna/amdxdna_ctx.h index b29449a92f60..16c85f08f03c 100644 --- a/drivers/accel/amdxdna/amdxdna_ctx.h +++ b/drivers/accel/amdxdna/amdxdna_ctx.h @@ -99,11 +99,6 @@ struct amdxdna_hwctx { u32 start_col; u32 num_col; u32 num_unused_col; -#define HWCTX_STAT_INIT 0 -#define HWCTX_STAT_READY 1 -#define HWCTX_STAT_STOP 2 - u32 status; - u32 old_status; struct amdxdna_qos_info qos; struct amdxdna_hwctx_param_config_cu *cus; From 750817a7c41de083ca5d73052e97bb7b67d7c394 Mon Sep 17 00:00:00 2001 From: Lizhi Hou Date: Tue, 3 Feb 2026 10:40:37 -0800 Subject: [PATCH 07/12] accel/amdxdna: Fix incorrect error code returned for failed chain command The driver currently returns an incorrect error code when a chain command fails. In this case, ERT_CMD_STATE_ERROR is expected to be reported for failed chain commands. Fixes: aac243092b70 ("accel/amdxdna: Add command execution") Reviewed-by: Mario Limonciello (AMD) Reviewed-by: Maciej Falkowski Signed-off-by: Lizhi Hou Link: https://patch.msgid.link/20260203184037.2751889-1-lizhi.hou@amd.com --- drivers/accel/amdxdna/aie2_ctx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/accel/amdxdna/aie2_ctx.c b/drivers/accel/amdxdna/aie2_ctx.c index ad5b5cd0bc81..fe8f9783a73c 100644 --- a/drivers/accel/amdxdna/aie2_ctx.c +++ b/drivers/accel/amdxdna/aie2_ctx.c @@ -274,7 +274,7 @@ aie2_sched_cmdlist_resp_handler(void *handle, void __iomem *data, size_t size) ret = -EINVAL; goto out; } - amdxdna_cmd_set_state(cmd_abo, fail_cmd_status); + amdxdna_cmd_set_state(cmd_abo, ERT_CMD_STATE_ERROR); if (amdxdna_cmd_get_op(cmd_abo) == ERT_CMD_CHAIN) { struct amdxdna_cmd_chain *cc = amdxdna_cmd_get_payload(cmd_abo, NULL); From c4d53e567d3b6f6211d013e6c5c3672b26a49845 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Wed, 4 Feb 2026 13:00:05 +1000 Subject: [PATCH 08/12] nouveau/vmm: rewrite pte tracker using a struct and bitfields. I want to increase the counters here and start tracking LPTs as well as there are certain situations where userspace with mixed page sizes can cause ref/unrefs to live longer so need better reference counting. This should be entirely non-functional. Reviewed-by: Mary Guillemard Tested-by: Mary Guillemard Tested-by: Mel Henning Signed-off-by: Dave Airlie Link: https://patch.msgid.link/20260204030208.2313241-2-airlied@gmail.com --- drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmm.c | 41 ++++++++++--------- drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmm.h | 14 +++++-- 2 files changed, 31 insertions(+), 24 deletions(-) diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmm.c b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmm.c index f95c58b67633..efc334f6104c 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmm.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmm.c @@ -53,7 +53,7 @@ nvkm_vmm_pt_new(const struct nvkm_vmm_desc *desc, bool sparse, } } - if (!(pgt = kzalloc(sizeof(*pgt) + lpte, GFP_KERNEL))) + if (!(pgt = kzalloc(sizeof(*pgt) + (sizeof(pgt->pte[0]) * lpte), GFP_KERNEL))) return NULL; pgt->page = page ? page->shift : 0; pgt->sparse = sparse; @@ -208,7 +208,7 @@ nvkm_vmm_unref_sptes(struct nvkm_vmm_iter *it, struct nvkm_vmm_pt *pgt, */ for (lpti = ptei >> sptb; ptes; spti = 0, lpti++) { const u32 pten = min(sptn - spti, ptes); - pgt->pte[lpti] -= pten; + pgt->pte[lpti].s.sptes -= pten; ptes -= pten; } @@ -218,9 +218,9 @@ nvkm_vmm_unref_sptes(struct nvkm_vmm_iter *it, struct nvkm_vmm_pt *pgt, for (ptei = pteb = ptei >> sptb; ptei < lpti; pteb = ptei) { /* Skip over any LPTEs that still have valid SPTEs. */ - if (pgt->pte[pteb] & NVKM_VMM_PTE_SPTES) { + if (pgt->pte[pteb].s.sptes) { for (ptes = 1, ptei++; ptei < lpti; ptes++, ptei++) { - if (!(pgt->pte[ptei] & NVKM_VMM_PTE_SPTES)) + if (!(pgt->pte[ptei].s.sptes)) break; } continue; @@ -232,14 +232,14 @@ nvkm_vmm_unref_sptes(struct nvkm_vmm_iter *it, struct nvkm_vmm_pt *pgt, * * Determine how many LPTEs need to transition state. */ - pgt->pte[ptei] &= ~NVKM_VMM_PTE_VALID; + pgt->pte[ptei].s.spte_valid = false; for (ptes = 1, ptei++; ptei < lpti; ptes++, ptei++) { - if (pgt->pte[ptei] & NVKM_VMM_PTE_SPTES) + if (pgt->pte[ptei].s.sptes) break; - pgt->pte[ptei] &= ~NVKM_VMM_PTE_VALID; + pgt->pte[ptei].s.spte_valid = false; } - if (pgt->pte[pteb] & NVKM_VMM_PTE_SPARSE) { + if (pgt->pte[pteb].s.sparse) { TRA(it, "LPTE %05x: U -> S %d PTEs", pteb, ptes); pair->func->sparse(vmm, pgt->pt[0], pteb, ptes); } else @@ -307,7 +307,7 @@ nvkm_vmm_ref_sptes(struct nvkm_vmm_iter *it, struct nvkm_vmm_pt *pgt, */ for (lpti = ptei >> sptb; ptes; spti = 0, lpti++) { const u32 pten = min(sptn - spti, ptes); - pgt->pte[lpti] += pten; + pgt->pte[lpti].s.sptes += pten; ptes -= pten; } @@ -317,9 +317,9 @@ nvkm_vmm_ref_sptes(struct nvkm_vmm_iter *it, struct nvkm_vmm_pt *pgt, for (ptei = pteb = ptei >> sptb; ptei < lpti; pteb = ptei) { /* Skip over any LPTEs that already have valid SPTEs. */ - if (pgt->pte[pteb] & NVKM_VMM_PTE_VALID) { + if (pgt->pte[pteb].s.spte_valid) { for (ptes = 1, ptei++; ptei < lpti; ptes++, ptei++) { - if (!(pgt->pte[ptei] & NVKM_VMM_PTE_VALID)) + if (!pgt->pte[ptei].s.spte_valid) break; } continue; @@ -331,14 +331,14 @@ nvkm_vmm_ref_sptes(struct nvkm_vmm_iter *it, struct nvkm_vmm_pt *pgt, * * Determine how many LPTEs need to transition state. */ - pgt->pte[ptei] |= NVKM_VMM_PTE_VALID; + pgt->pte[ptei].s.spte_valid = true; for (ptes = 1, ptei++; ptei < lpti; ptes++, ptei++) { - if (pgt->pte[ptei] & NVKM_VMM_PTE_VALID) + if (pgt->pte[ptei].s.spte_valid) break; - pgt->pte[ptei] |= NVKM_VMM_PTE_VALID; + pgt->pte[ptei].s.spte_valid = true; } - if (pgt->pte[pteb] & NVKM_VMM_PTE_SPARSE) { + if (pgt->pte[pteb].s.sparse) { const u32 spti = pteb * sptn; const u32 sptc = ptes * sptn; /* The entire LPTE is marked as sparse, we need @@ -386,7 +386,8 @@ nvkm_vmm_sparse_ptes(const struct nvkm_vmm_desc *desc, pgt->pde[ptei++] = NVKM_VMM_PDE_SPARSE; } else if (desc->type == LPT) { - memset(&pgt->pte[ptei], NVKM_VMM_PTE_SPARSE, ptes); + union nvkm_pte_tracker sparse = { .s.sparse = 1 }; + memset(&pgt->pte[ptei].u, sparse.u, ptes); } } @@ -398,7 +399,7 @@ nvkm_vmm_sparse_unref_ptes(struct nvkm_vmm_iter *it, bool pfn, u32 ptei, u32 pte memset(&pt->pde[ptei], 0x00, sizeof(pt->pde[0]) * ptes); else if (it->desc->type == LPT) - memset(&pt->pte[ptei], 0x00, sizeof(pt->pte[0]) * ptes); + memset(&pt->pte[ptei].u, 0x00, sizeof(pt->pte[0]) * ptes); return nvkm_vmm_unref_ptes(it, pfn, ptei, ptes); } @@ -445,9 +446,9 @@ nvkm_vmm_ref_hwpt(struct nvkm_vmm_iter *it, struct nvkm_vmm_pt *pgd, u32 pdei) * the SPTEs on some GPUs. */ for (ptei = pteb = 0; ptei < pten; pteb = ptei) { - bool spte = pgt->pte[ptei] & NVKM_VMM_PTE_SPTES; + bool spte = !!pgt->pte[ptei].s.sptes; for (ptes = 1, ptei++; ptei < pten; ptes++, ptei++) { - bool next = pgt->pte[ptei] & NVKM_VMM_PTE_SPTES; + bool next = !!pgt->pte[ptei].s.sptes; if (spte != next) break; } @@ -461,7 +462,7 @@ nvkm_vmm_ref_hwpt(struct nvkm_vmm_iter *it, struct nvkm_vmm_pt *pgd, u32 pdei) } else { desc->func->unmap(vmm, pt, pteb, ptes); while (ptes--) - pgt->pte[pteb++] |= NVKM_VMM_PTE_VALID; + pgt->pte[pteb++].s.spte_valid = true; } } } else { diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmm.h b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmm.h index 4586a425dbe4..a6312a0e6b84 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmm.h +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmm.h @@ -4,6 +4,15 @@ #include enum nvkm_memory_target; +union nvkm_pte_tracker { + u8 u; + struct { + u8 sparse:1; + u8 spte_valid:1; + u8 sptes:6; + } s; +}; + struct nvkm_vmm_pt { /* Some GPUs have a mapping level with a dual page tables to * support large and small pages in the same address-range. @@ -44,10 +53,7 @@ struct nvkm_vmm_pt { * * This information is used to manage LPTE state transitions. */ -#define NVKM_VMM_PTE_SPARSE 0x80 -#define NVKM_VMM_PTE_VALID 0x40 -#define NVKM_VMM_PTE_SPTES 0x3f - u8 pte[]; + union nvkm_pte_tracker pte[]; }; typedef void (*nvkm_vmm_pxe_func)(struct nvkm_vmm *, From 9dc983a85eb9b546bbe2ef3f2345e338aa655789 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Wed, 4 Feb 2026 13:00:06 +1000 Subject: [PATCH 09/12] nouveau/vmm: increase size of vmm pte tracker struct to u32 (v2) We need to tracker large counts of spte than previously due to unref getting delayed sometimes. This doesn't fix LPT tracking yet, it just creates space for it. Reviewed-by: Mary Guillemard Tested-by: Mary Guillemard Tested-by: Mel Henning Signed-off-by: Dave Airlie Link: https://patch.msgid.link/20260204030208.2313241-3-airlied@gmail.com --- drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmm.c | 6 +++--- drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmm.h | 9 +++++---- 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmm.c b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmm.c index efc334f6104c..44daeec0aa6d 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmm.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmm.c @@ -387,7 +387,7 @@ nvkm_vmm_sparse_ptes(const struct nvkm_vmm_desc *desc, } else if (desc->type == LPT) { union nvkm_pte_tracker sparse = { .s.sparse = 1 }; - memset(&pgt->pte[ptei].u, sparse.u, ptes); + memset32(&pgt->pte[ptei].u, sparse.u, ptes); } } @@ -399,7 +399,7 @@ nvkm_vmm_sparse_unref_ptes(struct nvkm_vmm_iter *it, bool pfn, u32 ptei, u32 pte memset(&pt->pde[ptei], 0x00, sizeof(pt->pde[0]) * ptes); else if (it->desc->type == LPT) - memset(&pt->pte[ptei].u, 0x00, sizeof(pt->pte[0]) * ptes); + memset32(&pt->pte[ptei].u, 0x00, ptes); return nvkm_vmm_unref_ptes(it, pfn, ptei, ptes); } @@ -458,7 +458,7 @@ nvkm_vmm_ref_hwpt(struct nvkm_vmm_iter *it, struct nvkm_vmm_pt *pgd, u32 pdei) desc->func->sparse(vmm, pt, pteb, ptes); else desc->func->invalid(vmm, pt, pteb, ptes); - memset(&pgt->pte[pteb], 0x00, ptes); + memset32(&pgt->pte[pteb].u, 0x00, ptes); } else { desc->func->unmap(vmm, pt, pteb, ptes); while (ptes--) diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmm.h b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmm.h index a6312a0e6b84..a8b08126e8dc 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmm.h +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmm.h @@ -5,11 +5,12 @@ enum nvkm_memory_target; union nvkm_pte_tracker { - u8 u; + u32 u; struct { - u8 sparse:1; - u8 spte_valid:1; - u8 sptes:6; + u32 sparse:1; + u32 spte_valid:1; + u32 padding:14; + u32 sptes:16; } s; }; From d19512f5abb198daf29da877f6a02c667a95c03d Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Wed, 4 Feb 2026 13:00:07 +1000 Subject: [PATCH 10/12] nouveau/vmm: start tracking if the LPT PTE is valid. (v6) When NVK enabled large pages userspace tests were seeing fault reports at a valid address. There was a case where an address moving from 64k page to 4k pages could expose a race between unmapping the 4k page, mapping the 64k page and unref the 4k pages. Unref 4k pages would cause the dual-page table handling to always set the LPTE entry to SPARSE or INVALID, but if we'd mapped a valid LPTE in the meantime, it would get trashed. Keep track of when a valid LPTE has been referenced, and don't reset in that case. This adds an lpte valid tracker and lpte reference count. Whenever an lpte is referenced, it gets made valid and the ref count increases, whenever it gets unreference the refcount is tracked. Link: https://gitlab.freedesktop.org/mesa/mesa/-/issues/14610 Reviewed-by: Mary Guillemard Tested-by: Mary Guillemard Tested-by: Mel Henning Signed-off-by: Dave Airlie Link: https://patch.msgid.link/20260204030208.2313241-4-airlied@gmail.com --- drivers/gpu/drm/nouveau/nouveau_drv.h | 4 +- drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmm.c | 39 +++++++++++++++---- drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmm.h | 3 +- 3 files changed, 36 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/nouveau/nouveau_drv.h b/drivers/gpu/drm/nouveau/nouveau_drv.h index 0e409414f44d..1c2523e2f92e 100644 --- a/drivers/gpu/drm/nouveau/nouveau_drv.h +++ b/drivers/gpu/drm/nouveau/nouveau_drv.h @@ -10,7 +10,7 @@ #define DRIVER_MAJOR 1 #define DRIVER_MINOR 4 -#define DRIVER_PATCHLEVEL 1 +#define DRIVER_PATCHLEVEL 2 /* * 1.1.1: @@ -37,6 +37,8 @@ * - implemented limited ABI16/NVIF interop * 1.4.1: * - add variable page sizes and compression for Turing+ + * 1.4.2: + * - tell userspace LPTE/SPTE races are fixed. */ #include diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmm.c b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmm.c index 44daeec0aa6d..19a7407cf702 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmm.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmm.c @@ -242,14 +242,17 @@ nvkm_vmm_unref_sptes(struct nvkm_vmm_iter *it, struct nvkm_vmm_pt *pgt, if (pgt->pte[pteb].s.sparse) { TRA(it, "LPTE %05x: U -> S %d PTEs", pteb, ptes); pair->func->sparse(vmm, pgt->pt[0], pteb, ptes); - } else - if (pair->func->invalid) { - /* If the MMU supports it, restore the LPTE to the - * INVALID state to tell the MMU there is no point - * trying to fetch the corresponding SPTEs. - */ - TRA(it, "LPTE %05x: U -> I %d PTEs", pteb, ptes); - pair->func->invalid(vmm, pgt->pt[0], pteb, ptes); + } else if (!pgt->pte[pteb].s.lpte_valid) { + if (pair->func->invalid) { + /* If the MMU supports it, restore the LPTE to the + * INVALID state to tell the MMU there is no point + * trying to fetch the corresponding SPTEs. + */ + TRA(it, "LPTE %05x: U -> I %d PTEs", pteb, ptes); + pair->func->invalid(vmm, pgt->pt[0], pteb, ptes); + } + } else { + TRA(it, "LPTE %05x: V %d PTEs", pteb, ptes); } } } @@ -280,6 +283,15 @@ nvkm_vmm_unref_ptes(struct nvkm_vmm_iter *it, bool pfn, u32 ptei, u32 ptes) if (desc->type == SPT && (pgt->refs[0] || pgt->refs[1])) nvkm_vmm_unref_sptes(it, pgt, desc, ptei, ptes); + if (desc->type == LPT && (pgt->refs[0] || pgt->refs[1])) { + for (u32 lpti = ptei; ptes; lpti++) { + pgt->pte[lpti].s.lptes--; + if (pgt->pte[lpti].s.lptes == 0) + pgt->pte[lpti].s.lpte_valid = false; + ptes--; + } + } + /* PT no longer needed? Destroy it. */ if (!pgt->refs[type]) { it->lvl++; @@ -332,10 +344,12 @@ nvkm_vmm_ref_sptes(struct nvkm_vmm_iter *it, struct nvkm_vmm_pt *pgt, * Determine how many LPTEs need to transition state. */ pgt->pte[ptei].s.spte_valid = true; + pgt->pte[ptei].s.lpte_valid = false; for (ptes = 1, ptei++; ptei < lpti; ptes++, ptei++) { if (pgt->pte[ptei].s.spte_valid) break; pgt->pte[ptei].s.spte_valid = true; + pgt->pte[ptei].s.lpte_valid = false; } if (pgt->pte[pteb].s.sparse) { @@ -374,6 +388,15 @@ nvkm_vmm_ref_ptes(struct nvkm_vmm_iter *it, bool pfn, u32 ptei, u32 ptes) if (desc->type == SPT) nvkm_vmm_ref_sptes(it, pgt, desc, ptei, ptes); + if (desc->type == LPT) { + for (u32 lpti = ptei; ptes; lpti++) { + pgt->pte[lpti].s.spte_valid = false; + pgt->pte[lpti].s.lpte_valid = true; + pgt->pte[lpti].s.lptes++; + ptes--; + } + } + return true; } diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmm.h b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmm.h index a8b08126e8dc..4ec0a3a21169 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmm.h +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmm.h @@ -9,7 +9,8 @@ union nvkm_pte_tracker { struct { u32 sparse:1; u32 spte_valid:1; - u32 padding:14; + u32 lpte_valid:1; + u32 lptes:13; u32 sptes:16; } s; }; From d19d963d2a4acb5bbf03e25733ba565a7f6e1422 Mon Sep 17 00:00:00 2001 From: Lizhi Hou Date: Wed, 4 Feb 2026 09:10:48 -0800 Subject: [PATCH 11/12] accel/amdxdna: Fix incorrect DPM level after suspend/resume The suspend routine sets the DPM level to 0, which unintentionally overwrites the previously saved DPM level. As a result, the device always resumes with DPM level 0 instead of restoring the original value. Fix this by ensuring the suspend path does not overwrite the saved DPM level, allowing the correct DPM level to be restored during resume. Fixes: f4d7b8a6bc8c ("accel/amdxdna: Enhance power management settings") Reviewed-by: Mario Limonciello (AMD) Signed-off-by: Lizhi Hou Link: https://patch.msgid.link/20260204171048.3165580-1-lizhi.hou@amd.com --- drivers/accel/amdxdna/aie2_pm.c | 3 +++ drivers/accel/amdxdna/aie2_smu.c | 2 -- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/accel/amdxdna/aie2_pm.c b/drivers/accel/amdxdna/aie2_pm.c index afcd6d4683e5..579b8be13b18 100644 --- a/drivers/accel/amdxdna/aie2_pm.c +++ b/drivers/accel/amdxdna/aie2_pm.c @@ -36,6 +36,8 @@ int aie2_pm_set_dpm(struct amdxdna_dev_hdl *ndev, u32 dpm_level) return ret; ret = ndev->priv->hw_ops.set_dpm(ndev, dpm_level); + if (!ret) + ndev->dpm_level = dpm_level; amdxdna_pm_suspend_put(ndev->xdna); return ret; @@ -65,6 +67,7 @@ int aie2_pm_init(struct amdxdna_dev_hdl *ndev) ret = ndev->priv->hw_ops.set_dpm(ndev, ndev->max_dpm_level); if (ret) return ret; + ndev->dpm_level = ndev->max_dpm_level; ret = aie2_pm_set_clk_gating(ndev, AIE2_CLK_GATING_ENABLE); if (ret) diff --git a/drivers/accel/amdxdna/aie2_smu.c b/drivers/accel/amdxdna/aie2_smu.c index 2d195e41f83d..d8c31924e501 100644 --- a/drivers/accel/amdxdna/aie2_smu.c +++ b/drivers/accel/amdxdna/aie2_smu.c @@ -84,7 +84,6 @@ int npu1_set_dpm(struct amdxdna_dev_hdl *ndev, u32 dpm_level) } ndev->hclk_freq = freq; - ndev->dpm_level = dpm_level; ndev->max_tops = 2 * ndev->total_col; ndev->curr_tops = ndev->max_tops * freq / 1028; @@ -114,7 +113,6 @@ int npu4_set_dpm(struct amdxdna_dev_hdl *ndev, u32 dpm_level) ndev->npuclk_freq = ndev->priv->dpm_clk_tbl[dpm_level].npuclk; ndev->hclk_freq = ndev->priv->dpm_clk_tbl[dpm_level].hclk; - ndev->dpm_level = dpm_level; ndev->max_tops = NPU4_DPM_TOPS(ndev, ndev->max_dpm_level); ndev->curr_tops = NPU4_DPM_TOPS(ndev, dpm_level); From 69674c1c704c0199ca7a3947f3cdcd575973175d Mon Sep 17 00:00:00 2001 From: Lizhi Hou Date: Wed, 4 Feb 2026 09:11:17 -0800 Subject: [PATCH 12/12] accel/amdxdna: Move RPM resume into job run function Currently, amdxdna_pm_resume_get() is called during job creation, and amdxdna_pm_suspend_put() is called when the hardware notifies job completion. If a job is canceled before it is run, no hardware completion notification is generated, resulting in an unbalanced runtime PM resume/suspend pair. Fix this by moving amdxdna_pm_resume_get() to the job run path, ensuring runtime PM is only resumed for jobs that are actually executed. Fixes: 063db451832b ("accel/amdxdna: Enhance runtime power management") Reviewed-by: Mario Limonciello (AMD) Signed-off-by: Lizhi Hou Link: https://patch.msgid.link/20260204171118.3165607-1-lizhi.hou@amd.com --- drivers/accel/amdxdna/aie2_ctx.c | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/drivers/accel/amdxdna/aie2_ctx.c b/drivers/accel/amdxdna/aie2_ctx.c index fe8f9783a73c..37d05f2e986f 100644 --- a/drivers/accel/amdxdna/aie2_ctx.c +++ b/drivers/accel/amdxdna/aie2_ctx.c @@ -306,6 +306,10 @@ aie2_sched_job_run(struct drm_sched_job *sched_job) kref_get(&job->refcnt); fence = dma_fence_get(job->fence); + ret = amdxdna_pm_resume_get(hwctx->client->xdna); + if (ret) + goto out; + if (job->drv_cmd) { switch (job->drv_cmd->opcode) { case SYNC_DEBUG_BO: @@ -332,6 +336,7 @@ aie2_sched_job_run(struct drm_sched_job *sched_job) out: if (ret) { + amdxdna_pm_suspend_put(hwctx->client->xdna); dma_fence_put(job->fence); aie2_job_put(job); mmput(job->mm); @@ -988,15 +993,11 @@ int aie2_cmd_submit(struct amdxdna_hwctx *hwctx, struct amdxdna_sched_job *job, goto free_chain; } - ret = amdxdna_pm_resume_get(xdna); - if (ret) - goto cleanup_job; - retry: ret = drm_gem_lock_reservations(job->bos, job->bo_cnt, &acquire_ctx); if (ret) { XDNA_WARN(xdna, "Failed to lock BOs, ret %d", ret); - goto suspend_put; + goto cleanup_job; } for (i = 0; i < job->bo_cnt; i++) { @@ -1004,7 +1005,7 @@ int aie2_cmd_submit(struct amdxdna_hwctx *hwctx, struct amdxdna_sched_job *job, if (ret) { XDNA_WARN(xdna, "Failed to reserve fences %d", ret); drm_gem_unlock_reservations(job->bos, job->bo_cnt, &acquire_ctx); - goto suspend_put; + goto cleanup_job; } } @@ -1019,12 +1020,12 @@ int aie2_cmd_submit(struct amdxdna_hwctx *hwctx, struct amdxdna_sched_job *job, msecs_to_jiffies(HMM_RANGE_DEFAULT_TIMEOUT); } else if (time_after(jiffies, timeout)) { ret = -ETIME; - goto suspend_put; + goto cleanup_job; } ret = aie2_populate_range(abo); if (ret) - goto suspend_put; + goto cleanup_job; goto retry; } } @@ -1050,8 +1051,6 @@ int aie2_cmd_submit(struct amdxdna_hwctx *hwctx, struct amdxdna_sched_job *job, return 0; -suspend_put: - amdxdna_pm_suspend_put(xdna); cleanup_job: drm_sched_job_cleanup(&job->base); free_chain: