From a9162439ad792afcddc04718408ec1380b7a5f63 Mon Sep 17 00:00:00 2001
From: Lizhi Hou <lizhi.hou@amd.com>
Date: Tue, 27 Jan 2026 16:23:56 -0800
Subject: [PATCH 01/12] accel/amdxdna: Hold mm structure across
 iommu_sva_unbind_device()

Some tests trigger a crash in iommu_sva_unbind_device() due to
accessing iommu_mm after the associated mm structure has been
freed.

Fix this by taking an explicit reference to the mm structure
after successfully binding the device, and releasing it only
after the device is unbound. This ensures the mm remains valid
for the entire SVA bind/unbind lifetime.

Fixes: be462c97b7df ("accel/amdxdna: Add hardware context")
Reviewed-by: Mario Limonciello (AMD) <superm1@kernel.org>
Signed-off-by: Lizhi Hou <lizhi.hou@amd.com>
Link: https://patch.msgid.link/20260128002356.1858122-1-lizhi.hou@amd.com
---
 drivers/accel/amdxdna/amdxdna_pci_drv.c | 3 +++
 drivers/accel/amdxdna/amdxdna_pci_drv.h | 1 +
 2 files changed, 4 insertions(+)

diff --git a/drivers/accel/amdxdna/amdxdna_pci_drv.c b/drivers/accel/amdxdna/amdxdna_pci_drv.c
index 45f5c12fc67f..fdefd9ec2066 100644
--- a/drivers/accel/amdxdna/amdxdna_pci_drv.c
+++ b/drivers/accel/amdxdna/amdxdna_pci_drv.c
@@ -82,6 +82,8 @@ static int amdxdna_drm_open(struct drm_device *ddev, struct drm_file *filp)
 		ret = -ENODEV;
 		goto unbind_sva;
 	}
+	client->mm = current->mm;
+	mmgrab(client->mm);
 	init_srcu_struct(&client->hwctx_srcu);
 	xa_init_flags(&client->hwctx_xa, XA_FLAGS_ALLOC);
 	mutex_init(&client->mm_lock);
@@ -116,6 +118,7 @@ static void amdxdna_client_cleanup(struct amdxdna_client *client)
 		drm_gem_object_put(to_gobj(client->dev_heap));
 
 	iommu_sva_unbind_device(client->sva);
+	mmdrop(client->mm);
 
 	kfree(client);
 }
diff --git a/drivers/accel/amdxdna/amdxdna_pci_drv.h b/drivers/accel/amdxdna/amdxdna_pci_drv.h
index 6580cb5ec7e2..f08406b8fdf9 100644
--- a/drivers/accel/amdxdna/amdxdna_pci_drv.h
+++ b/drivers/accel/amdxdna/amdxdna_pci_drv.h
@@ -130,6 +130,7 @@ struct amdxdna_client {
 
 	struct iommu_sva		*sva;
 	int				pasid;
+	struct mm_struct		*mm;
 };
 
 #define amdxdna_for_each_hwctx(client, hwctx_id, entry)		\

From f1370241fe8045702bc9d0812b996791f0500f1b Mon Sep 17 00:00:00 2001
From: Lizhi Hou <lizhi.hou@amd.com>
Date: Thu, 29 Jan 2026 16:32:55 -0800
Subject: [PATCH 02/12] accel/amdxdna: Stop job scheduling across
 aie2_release_resource()

Running jobs on a hardware context while it is in the process of
releasing resources can lead to use-after-free and crashes.

Fix this by stopping job scheduling before calling
aie2_release_resource() and restarting it after the release completes.
Additionally, aie2_sched_job_run() now checks whether the hardware
context is still active.

Fixes: 4fd6ca90fc7f ("accel/amdxdna: Refactor hardware context destroy routine")
Reviewed-by: Mario Limonciello (AMD) <superm1@kernel.org>
Signed-off-by: Lizhi Hou <lizhi.hou@amd.com>
Link: https://patch.msgid.link/20260130003255.2083255-1-lizhi.hou@amd.com
---
 drivers/accel/amdxdna/aie2_ctx.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/drivers/accel/amdxdna/aie2_ctx.c b/drivers/accel/amdxdna/aie2_ctx.c
index 5511ab2ef242..c4a58c00e442 100644
--- a/drivers/accel/amdxdna/aie2_ctx.c
+++ b/drivers/accel/amdxdna/aie2_ctx.c
@@ -315,6 +315,9 @@ aie2_sched_job_run(struct drm_sched_job *sched_job)
 	struct dma_fence *fence;
 	int ret;
 
+	if (hwctx->status != HWCTX_STAT_READY)
+		return NULL;
+
 	if (!mmget_not_zero(job->mm))
 		return ERR_PTR(-ESRCH);
 
@@ -705,7 +708,10 @@ void aie2_hwctx_fini(struct amdxdna_hwctx *hwctx)
 	aie2_hwctx_wait_for_idle(hwctx);
 
 	/* Request fw to destroy hwctx and cancel the rest pending requests */
+	drm_sched_stop(&hwctx->priv->sched, NULL);
 	aie2_release_resource(hwctx);
+	hwctx->status = HWCTX_STAT_STOP;
+	drm_sched_start(&hwctx->priv->sched, 0);
 
 	mutex_unlock(&xdna->dev_lock);
 	drm_sched_entity_destroy(&hwctx->priv->entity);

From 84dd57fb0359500092f1101409ca32091731490d Mon Sep 17 00:00:00 2001
From: Zishun Yi <zishun.yi.dev@gmail.com>
Date: Fri, 30 Jan 2026 01:10:22 +0800
Subject: [PATCH 03/12] accel/amdxdna: Fix memory leak in amdxdna_ubuf_map

The amdxdna_ubuf_map() function allocates memory for sg and
internal sg table structures, but it fails to free them if subsequent
operations (sg_alloc_table_from_pages or dma_map_sgtable) fail.

Fixes: bd72d4acda10 ("accel/amdxdna: Support user space allocated buffer")
Signed-off-by: Zishun Yi <zishun.yi.dev@gmail.com>
Reviewed-by: Lizhi Hou <lizhi.hou@amd.com>
Reviewed-by: Min Ma <mamin506@gmail.com>
Signed-off-by: Lizhi Hou <lizhi.hou@amd.com>
Link: https://patch.msgid.link/20260129171022.68578-1-zishun.yi.dev@gmail.com
---
 drivers/accel/amdxdna/amdxdna_ubuf.c | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/drivers/accel/amdxdna/amdxdna_ubuf.c b/drivers/accel/amdxdna/amdxdna_ubuf.c
index 077b2261cf2a..9e3b3b055caa 100644
--- a/drivers/accel/amdxdna/amdxdna_ubuf.c
+++ b/drivers/accel/amdxdna/amdxdna_ubuf.c
@@ -34,15 +34,21 @@ static struct sg_table *amdxdna_ubuf_map(struct dma_buf_attachment *attach,
 	ret = sg_alloc_table_from_pages(sg, ubuf->pages, ubuf->nr_pages, 0,
 					ubuf->nr_pages << PAGE_SHIFT, GFP_KERNEL);
 	if (ret)
-		return ERR_PTR(ret);
+		goto err_free_sg;
 
 	if (ubuf->flags & AMDXDNA_UBUF_FLAG_MAP_DMA) {
 		ret = dma_map_sgtable(attach->dev, sg, direction, 0);
 		if (ret)
-			return ERR_PTR(ret);
+			goto err_free_table;
 	}
 
 	return sg;
+
+err_free_table:
+	sg_free_table(sg);
+err_free_sg:
+	kfree(sg);
+	return ERR_PTR(ret);
 }
 
 static void amdxdna_ubuf_unmap(struct dma_buf_attachment *attach,

From 45c0a8a70253ab97e676b9791891e389a531664b Mon Sep 17 00:00:00 2001
From: Nathan Chancellor <nathan@kernel.org>
Date: Wed, 14 Jan 2026 13:43:31 -0700
Subject: [PATCH 04/12] drm/panel: ilitek-ili9882t: Remove duplicate
 initializers in tianma_il79900a_dsc

Clang warns (or errors with CONFIG_WERROR=y / W=e):

  drivers/gpu/drm/panel/panel-ilitek-ili9882t.c:95:16: error: initializer overrides prior initialization of this subobject [-Werror,-Winitializer-overrides]
     95 |         .vbr_enable = 0,
        |                       ^
  drivers/gpu/drm/panel/panel-ilitek-ili9882t.c:90:16: note: previous initialization is here
     90 |         .vbr_enable = false,
        |                       ^~~~~
  drivers/gpu/drm/panel/panel-ilitek-ili9882t.c:97:19: error: initializer overrides prior initialization of this subobject [-Werror,-Winitializer-overrides]
     97 |         .rc_model_size = DSC_RC_MODEL_SIZE_CONST,
        |                          ^~~~~~~~~~~~~~~~~~~~~~~
  include/drm/display/drm_dsc.h:22:38: note: expanded from macro 'DSC_RC_MODEL_SIZE_CONST'
     22 | #define DSC_RC_MODEL_SIZE_CONST             8192
        |                                             ^~~~
  drivers/gpu/drm/panel/panel-ilitek-ili9882t.c:91:19: note: previous initialization is here
     91 |         .rc_model_size = DSC_RC_MODEL_SIZE_CONST,
        |                          ^~~~~~~~~~~~~~~~~~~~~~~
  include/drm/display/drm_dsc.h:22:38: note: expanded from macro 'DSC_RC_MODEL_SIZE_CONST'
     22 | #define DSC_RC_MODEL_SIZE_CONST             8192
        |                                             ^~~~
  drivers/gpu/drm/panel/panel-ilitek-ili9882t.c:132:25: error: initializer overrides prior initialization of this subobject [-Werror,-Winitializer-overrides]
    132 |         .initial_scale_value = 32,
        |                                ^~
  drivers/gpu/drm/panel/panel-ilitek-ili9882t.c:126:25: note: previous initialization is here
    126 |         .initial_scale_value = 32,
        |                                ^~
  drivers/gpu/drm/panel/panel-ilitek-ili9882t.c:133:20: error: initializer overrides prior initialization of this subobject [-Werror,-Winitializer-overrides]
    133 |         .nfl_bpg_offset = 3511,
        |                           ^~~~
  drivers/gpu/drm/panel/panel-ilitek-ili9882t.c:108:20: note: previous initialization is here
    108 |         .nfl_bpg_offset = 1402,
        |                           ^~~~

GCC would warn about this in the same manner but its version,
-Woverride-init, is disabled for a normal kernel build in
scripts/Makefile.warn. For clang, -Wextra in drivers/gpu/drm/Makefile
turns it back but GCC respects turning it off earlier in the command
line.

Of all the duplicate fields in the initializer, only nfl_bpg_offset is a
different value. Clear up the duplicate initializers, keeping the
'false' value for .vbr_enable, as it is bool, and the second value for
.nfl_bpg_offset, assuming it is the correct one since it was the one
tested in the original change.

Fixes: 65ce1f5834e9 ("drm/panel: ilitek-ili9882t: Switch Tianma TL121BVMS07 to DSC 120Hz mode")
Signed-off-by: Nathan Chancellor <nathan@kernel.org>
Link: https://patch.msgid.link/20260114-panel-ilitek-ili9882t-fix-override-init-v1-1-1d69a2b096df@kernel.org
Signed-off-by: Maxime Ripard <mripard@kernel.org>
---
 drivers/gpu/drm/panel/panel-ilitek-ili9882t.c | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/drivers/gpu/drm/panel/panel-ilitek-ili9882t.c b/drivers/gpu/drm/panel/panel-ilitek-ili9882t.c
index 370424ddfc80..8b2bfb7d3638 100644
--- a/drivers/gpu/drm/panel/panel-ilitek-ili9882t.c
+++ b/drivers/gpu/drm/panel/panel-ilitek-ili9882t.c
@@ -88,11 +88,9 @@ static const struct drm_dsc_config tianma_il79900a_dsc = {
 	.native_422 = false,
 	.simple_422 = false,
 	.vbr_enable = false,
-	.rc_model_size = DSC_RC_MODEL_SIZE_CONST,
 	.pic_width = 1600,
 	.pic_height = 2560,
 	.convert_rgb = 0,
-	.vbr_enable = 0,
 	.rc_buf_thresh = {14, 28, 42, 56, 70, 84, 98, 105, 112, 119, 121, 123, 125, 126},
 	.rc_model_size = DSC_RC_MODEL_SIZE_CONST,
 	.rc_edge_factor = DSC_RC_EDGE_FACTOR_CONST,
@@ -105,7 +103,6 @@ static const struct drm_dsc_config tianma_il79900a_dsc = {
 	.initial_offset = 6144,
 	.rc_quant_incr_limit0 = 11,
 	.rc_quant_incr_limit1 = 11,
-	.nfl_bpg_offset = 1402,
 	.rc_range_params = {
 		{ 0,  4, DSC_BPG_OFFSET(2)},
 		{ 0,  4, DSC_BPG_OFFSET(0)},
@@ -123,7 +120,6 @@ static const struct drm_dsc_config tianma_il79900a_dsc = {
 		{ 9, 12, DSC_BPG_OFFSET(-12)},
 		{12, 13, DSC_BPG_OFFSET(-12)},
 	},
-	.initial_scale_value = 32,
 	.slice_chunk_size = 800,
 	.initial_dec_delay = 657,
 	.final_offset = 4320,

From fe6d29b082d49df336ebb92226a3c004ae9e3222 Mon Sep 17 00:00:00 2001
From: Liu Ying <victor.liu@nxp.com>
Date: Fri, 23 Jan 2026 17:22:17 +0800
Subject: [PATCH 05/12] drm/bridge: imx8qxp-pixel-combiner: Fix bailout for
 imx8qxp_pc_bridge_probe()

In case the channel0 is unavailable and bailing out from free_child is
needed when we fail to add a DRM bridge for the available channel1,
pointer pc->ch[0] in the bailout path would be NULL and it would be
dereferenced as pc->ch[0]->bridge.next_bridge.  Fix this by checking
pc->ch[0] before dereferencing it.

Fixes: ae754f049ce1 ("drm/bridge: imx8qxp-pixel-combiner: get/put the next bridge")
Fixes: 99764593528f ("drm/bridge: imx8qxp-pixel-combiner: convert to devm_drm_bridge_alloc() API")
Signed-off-by: Liu Ying <victor.liu@nxp.com>
Reviewed-by: Luca Ceresoli <luca.ceresoli@bootlin.com>
Reviewed-by: Frank Li <Frank.Li@nxp.com>
Link: https://patch.msgid.link/20260123-imx8qxp-drm-bridge-fixes-v1-3-8bb85ada5866@nxp.com
Signed-off-by: Luca Ceresoli <luca.ceresoli@bootlin.com>
---
 drivers/gpu/drm/bridge/imx/imx8qxp-pixel-combiner.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/bridge/imx/imx8qxp-pixel-combiner.c b/drivers/gpu/drm/bridge/imx/imx8qxp-pixel-combiner.c
index 74eda8b54023..99a9280edb4f 100644
--- a/drivers/gpu/drm/bridge/imx/imx8qxp-pixel-combiner.c
+++ b/drivers/gpu/drm/bridge/imx/imx8qxp-pixel-combiner.c
@@ -348,7 +348,7 @@ static int imx8qxp_pc_bridge_probe(struct platform_device *pdev)
 free_child:
 	of_node_put(child);
 
-	if (i == 1 && pc->ch[0]->bridge.next_bridge)
+	if (i == 1 && pc->ch[0] && pc->ch[0]->bridge.next_bridge)
 		drm_bridge_remove(&pc->ch[0]->bridge);
 
 	pm_runtime_disable(dev);

From b853007fdcdd64b49601a993c2b30c28279ae15d Mon Sep 17 00:00:00 2001
From: Lizhi Hou <lizhi.hou@amd.com>
Date: Mon, 2 Feb 2026 13:24:50 -0800
Subject: [PATCH 06/12] accel/amdxdna: Remove hardware context status

One newly supported command does not require hardware context configuration
to be performed upfront. As a result, checking hardware context status
causes this command to fail incorrectly.

Remove hardware context status handling entirely. For other commands,
if userspace submits a request without configuring the hardware context
first, the firmware will report an error or time out as appropriate.

Fixes: aac243092b70 ("accel/amdxdna: Add command execution")
Reviewed-by: Mario Limonciello (AMD) <superm1@kernel.org>
Signed-off-by: Lizhi Hou <lizhi.hou@amd.com>
Link: https://patch.msgid.link/20260202212450.2681273-1-lizhi.hou@amd.com
---
 drivers/accel/amdxdna/aie2_ctx.c     | 25 ++-----------------------
 drivers/accel/amdxdna/aie2_message.c |  3 +++
 drivers/accel/amdxdna/amdxdna_ctx.h  |  5 -----
 3 files changed, 5 insertions(+), 28 deletions(-)

diff --git a/drivers/accel/amdxdna/aie2_ctx.c b/drivers/accel/amdxdna/aie2_ctx.c
index c4a58c00e442..ad5b5cd0bc81 100644
--- a/drivers/accel/amdxdna/aie2_ctx.c
+++ b/drivers/accel/amdxdna/aie2_ctx.c
@@ -47,17 +47,6 @@ static void aie2_job_put(struct amdxdna_sched_job *job)
 	kref_put(&job->refcnt, aie2_job_release);
 }
 
-static void aie2_hwctx_status_shift_stop(struct amdxdna_hwctx *hwctx)
-{
-	 hwctx->old_status = hwctx->status;
-	 hwctx->status = HWCTX_STAT_STOP;
-}
-
-static void aie2_hwctx_status_restore(struct amdxdna_hwctx *hwctx)
-{
-	hwctx->status = hwctx->old_status;
-}
-
 /* The bad_job is used in aie2_sched_job_timedout, otherwise, set it to NULL */
 static void aie2_hwctx_stop(struct amdxdna_dev *xdna, struct amdxdna_hwctx *hwctx,
 			    struct drm_sched_job *bad_job)
@@ -84,11 +73,6 @@ static int aie2_hwctx_restart(struct amdxdna_dev *xdna, struct amdxdna_hwctx *hw
 		goto out;
 	}
 
-	if (hwctx->status != HWCTX_STAT_READY) {
-		XDNA_DBG(xdna, "hwctx is not ready, status %d", hwctx->status);
-		goto out;
-	}
-
 	ret = aie2_config_cu(hwctx, NULL);
 	if (ret) {
 		XDNA_ERR(xdna, "Config cu failed, ret %d", ret);
@@ -140,7 +124,6 @@ static int aie2_hwctx_suspend_cb(struct amdxdna_hwctx *hwctx, void *arg)
 
 	aie2_hwctx_wait_for_idle(hwctx);
 	aie2_hwctx_stop(xdna, hwctx, NULL);
-	aie2_hwctx_status_shift_stop(hwctx);
 
 	return 0;
 }
@@ -162,7 +145,6 @@ static int aie2_hwctx_resume_cb(struct amdxdna_hwctx *hwctx, void *arg)
 {
 	struct amdxdna_dev *xdna = hwctx->client->xdna;
 
-	aie2_hwctx_status_restore(hwctx);
 	return aie2_hwctx_restart(xdna, hwctx);
 }
 
@@ -315,7 +297,7 @@ aie2_sched_job_run(struct drm_sched_job *sched_job)
 	struct dma_fence *fence;
 	int ret;
 
-	if (hwctx->status != HWCTX_STAT_READY)
+	if (!hwctx->priv->mbox_chann)
 		return NULL;
 
 	if (!mmget_not_zero(job->mm))
@@ -666,7 +648,6 @@ int aie2_hwctx_init(struct amdxdna_hwctx *hwctx)
 	}
 	amdxdna_pm_suspend_put(xdna);
 
-	hwctx->status = HWCTX_STAT_INIT;
 	init_waitqueue_head(&priv->job_free_wq);
 
 	XDNA_DBG(xdna, "hwctx %s init completed", hwctx->name);
@@ -710,7 +691,6 @@ void aie2_hwctx_fini(struct amdxdna_hwctx *hwctx)
 	/* Request fw to destroy hwctx and cancel the rest pending requests */
 	drm_sched_stop(&hwctx->priv->sched, NULL);
 	aie2_release_resource(hwctx);
-	hwctx->status = HWCTX_STAT_STOP;
 	drm_sched_start(&hwctx->priv->sched, 0);
 
 	mutex_unlock(&xdna->dev_lock);
@@ -755,7 +735,7 @@ static int aie2_hwctx_cu_config(struct amdxdna_hwctx *hwctx, void *buf, u32 size
 	if (XDNA_MBZ_DBG(xdna, config->pad, sizeof(config->pad)))
 		return -EINVAL;
 
-	if (hwctx->status != HWCTX_STAT_INIT) {
+	if (hwctx->cus) {
 		XDNA_ERR(xdna, "Not support re-config CU");
 		return -EINVAL;
 	}
@@ -786,7 +766,6 @@ static int aie2_hwctx_cu_config(struct amdxdna_hwctx *hwctx, void *buf, u32 size
 	}
 
 	wmb(); /* To avoid locking in command submit when check status */
-	hwctx->status = HWCTX_STAT_READY;
 
 	return 0;
 
diff --git a/drivers/accel/amdxdna/aie2_message.c b/drivers/accel/amdxdna/aie2_message.c
index 99215328505e..7d7dcfeaf794 100644
--- a/drivers/accel/amdxdna/aie2_message.c
+++ b/drivers/accel/amdxdna/aie2_message.c
@@ -493,6 +493,9 @@ int aie2_config_cu(struct amdxdna_hwctx *hwctx,
 	if (!chann)
 		return -ENODEV;
 
+	if (!hwctx->cus)
+		return 0;
+
 	if (hwctx->cus->num_cus > MAX_NUM_CUS) {
 		XDNA_DBG(xdna, "Exceed maximum CU %d", MAX_NUM_CUS);
 		return -EINVAL;
diff --git a/drivers/accel/amdxdna/amdxdna_ctx.h b/drivers/accel/amdxdna/amdxdna_ctx.h
index b29449a92f60..16c85f08f03c 100644
--- a/drivers/accel/amdxdna/amdxdna_ctx.h
+++ b/drivers/accel/amdxdna/amdxdna_ctx.h
@@ -99,11 +99,6 @@ struct amdxdna_hwctx {
 	u32				start_col;
 	u32				num_col;
 	u32				num_unused_col;
-#define HWCTX_STAT_INIT  0
-#define HWCTX_STAT_READY 1
-#define HWCTX_STAT_STOP  2
-	u32				status;
-	u32				old_status;
 
 	struct amdxdna_qos_info		     qos;
 	struct amdxdna_hwctx_param_config_cu *cus;

From 750817a7c41de083ca5d73052e97bb7b67d7c394 Mon Sep 17 00:00:00 2001
From: Lizhi Hou <lizhi.hou@amd.com>
Date: Tue, 3 Feb 2026 10:40:37 -0800
Subject: [PATCH 07/12] accel/amdxdna: Fix incorrect error code returned for
 failed chain command

The driver currently returns an incorrect error code when a chain command
fails. In this case, ERT_CMD_STATE_ERROR is expected to be reported for
failed chain commands.

Fixes: aac243092b70 ("accel/amdxdna: Add command execution")
Reviewed-by: Mario Limonciello (AMD) <superm1@kernel.org>
Reviewed-by: Maciej Falkowski <maciej.falkowski@linux.intel.com>
Signed-off-by: Lizhi Hou <lizhi.hou@amd.com>
Link: https://patch.msgid.link/20260203184037.2751889-1-lizhi.hou@amd.com
---
 drivers/accel/amdxdna/aie2_ctx.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/accel/amdxdna/aie2_ctx.c b/drivers/accel/amdxdna/aie2_ctx.c
index ad5b5cd0bc81..fe8f9783a73c 100644
--- a/drivers/accel/amdxdna/aie2_ctx.c
+++ b/drivers/accel/amdxdna/aie2_ctx.c
@@ -274,7 +274,7 @@ aie2_sched_cmdlist_resp_handler(void *handle, void __iomem *data, size_t size)
 		ret = -EINVAL;
 		goto out;
 	}
-	amdxdna_cmd_set_state(cmd_abo, fail_cmd_status);
+	amdxdna_cmd_set_state(cmd_abo, ERT_CMD_STATE_ERROR);
 
 	if (amdxdna_cmd_get_op(cmd_abo) == ERT_CMD_CHAIN) {
 		struct amdxdna_cmd_chain *cc = amdxdna_cmd_get_payload(cmd_abo, NULL);

From c4d53e567d3b6f6211d013e6c5c3672b26a49845 Mon Sep 17 00:00:00 2001
From: Dave Airlie <airlied@redhat.com>
Date: Wed, 4 Feb 2026 13:00:05 +1000
Subject: [PATCH 08/12] nouveau/vmm: rewrite pte tracker using a struct and
 bitfields.

I want to increase the counters here and start tracking LPTs as well
as there are certain situations where userspace with mixed page sizes
can cause ref/unrefs to live longer so need better reference counting.

This should be entirely non-functional.

Reviewed-by: Mary Guillemard <mary@mary.zone>
Tested-by: Mary Guillemard <mary@mary.zone>
Tested-by: Mel Henning <mhenning@darkrefraction.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
Link: https://patch.msgid.link/20260204030208.2313241-2-airlied@gmail.com
---
 drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmm.c | 41 ++++++++++---------
 drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmm.h | 14 +++++--
 2 files changed, 31 insertions(+), 24 deletions(-)

diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmm.c b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmm.c
index f95c58b67633..efc334f6104c 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmm.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmm.c
@@ -53,7 +53,7 @@ nvkm_vmm_pt_new(const struct nvkm_vmm_desc *desc, bool sparse,
 		}
 	}
 
-	if (!(pgt = kzalloc(sizeof(*pgt) + lpte, GFP_KERNEL)))
+	if (!(pgt = kzalloc(sizeof(*pgt) + (sizeof(pgt->pte[0]) * lpte), GFP_KERNEL)))
 		return NULL;
 	pgt->page = page ? page->shift : 0;
 	pgt->sparse = sparse;
@@ -208,7 +208,7 @@ nvkm_vmm_unref_sptes(struct nvkm_vmm_iter *it, struct nvkm_vmm_pt *pgt,
 	 */
 	for (lpti = ptei >> sptb; ptes; spti = 0, lpti++) {
 		const u32 pten = min(sptn - spti, ptes);
-		pgt->pte[lpti] -= pten;
+		pgt->pte[lpti].s.sptes -= pten;
 		ptes -= pten;
 	}
 
@@ -218,9 +218,9 @@ nvkm_vmm_unref_sptes(struct nvkm_vmm_iter *it, struct nvkm_vmm_pt *pgt,
 
 	for (ptei = pteb = ptei >> sptb; ptei < lpti; pteb = ptei) {
 		/* Skip over any LPTEs that still have valid SPTEs. */
-		if (pgt->pte[pteb] & NVKM_VMM_PTE_SPTES) {
+		if (pgt->pte[pteb].s.sptes) {
 			for (ptes = 1, ptei++; ptei < lpti; ptes++, ptei++) {
-				if (!(pgt->pte[ptei] & NVKM_VMM_PTE_SPTES))
+				if (!(pgt->pte[ptei].s.sptes))
 					break;
 			}
 			continue;
@@ -232,14 +232,14 @@ nvkm_vmm_unref_sptes(struct nvkm_vmm_iter *it, struct nvkm_vmm_pt *pgt,
 		 *
 		 * Determine how many LPTEs need to transition state.
 		 */
-		pgt->pte[ptei] &= ~NVKM_VMM_PTE_VALID;
+		pgt->pte[ptei].s.spte_valid = false;
 		for (ptes = 1, ptei++; ptei < lpti; ptes++, ptei++) {
-			if (pgt->pte[ptei] & NVKM_VMM_PTE_SPTES)
+			if (pgt->pte[ptei].s.sptes)
 				break;
-			pgt->pte[ptei] &= ~NVKM_VMM_PTE_VALID;
+			pgt->pte[ptei].s.spte_valid = false;
 		}
 
-		if (pgt->pte[pteb] & NVKM_VMM_PTE_SPARSE) {
+		if (pgt->pte[pteb].s.sparse) {
 			TRA(it, "LPTE %05x: U -> S %d PTEs", pteb, ptes);
 			pair->func->sparse(vmm, pgt->pt[0], pteb, ptes);
 		} else
@@ -307,7 +307,7 @@ nvkm_vmm_ref_sptes(struct nvkm_vmm_iter *it, struct nvkm_vmm_pt *pgt,
 	 */
 	for (lpti = ptei >> sptb; ptes; spti = 0, lpti++) {
 		const u32 pten = min(sptn - spti, ptes);
-		pgt->pte[lpti] += pten;
+		pgt->pte[lpti].s.sptes += pten;
 		ptes -= pten;
 	}
 
@@ -317,9 +317,9 @@ nvkm_vmm_ref_sptes(struct nvkm_vmm_iter *it, struct nvkm_vmm_pt *pgt,
 
 	for (ptei = pteb = ptei >> sptb; ptei < lpti; pteb = ptei) {
 		/* Skip over any LPTEs that already have valid SPTEs. */
-		if (pgt->pte[pteb] & NVKM_VMM_PTE_VALID) {
+		if (pgt->pte[pteb].s.spte_valid) {
 			for (ptes = 1, ptei++; ptei < lpti; ptes++, ptei++) {
-				if (!(pgt->pte[ptei] & NVKM_VMM_PTE_VALID))
+				if (!pgt->pte[ptei].s.spte_valid)
 					break;
 			}
 			continue;
@@ -331,14 +331,14 @@ nvkm_vmm_ref_sptes(struct nvkm_vmm_iter *it, struct nvkm_vmm_pt *pgt,
 		 *
 		 * Determine how many LPTEs need to transition state.
 		 */
-		pgt->pte[ptei] |= NVKM_VMM_PTE_VALID;
+		pgt->pte[ptei].s.spte_valid = true;
 		for (ptes = 1, ptei++; ptei < lpti; ptes++, ptei++) {
-			if (pgt->pte[ptei] & NVKM_VMM_PTE_VALID)
+			if (pgt->pte[ptei].s.spte_valid)
 				break;
-			pgt->pte[ptei] |= NVKM_VMM_PTE_VALID;
+			pgt->pte[ptei].s.spte_valid = true;
 		}
 
-		if (pgt->pte[pteb] & NVKM_VMM_PTE_SPARSE) {
+		if (pgt->pte[pteb].s.sparse) {
 			const u32 spti = pteb * sptn;
 			const u32 sptc = ptes * sptn;
 			/* The entire LPTE is marked as sparse, we need
@@ -386,7 +386,8 @@ nvkm_vmm_sparse_ptes(const struct nvkm_vmm_desc *desc,
 			pgt->pde[ptei++] = NVKM_VMM_PDE_SPARSE;
 	} else
 	if (desc->type == LPT) {
-		memset(&pgt->pte[ptei], NVKM_VMM_PTE_SPARSE, ptes);
+		union nvkm_pte_tracker sparse = { .s.sparse = 1 };
+		memset(&pgt->pte[ptei].u, sparse.u, ptes);
 	}
 }
 
@@ -398,7 +399,7 @@ nvkm_vmm_sparse_unref_ptes(struct nvkm_vmm_iter *it, bool pfn, u32 ptei, u32 pte
 		memset(&pt->pde[ptei], 0x00, sizeof(pt->pde[0]) * ptes);
 	else
 	if (it->desc->type == LPT)
-		memset(&pt->pte[ptei], 0x00, sizeof(pt->pte[0]) * ptes);
+		memset(&pt->pte[ptei].u, 0x00, sizeof(pt->pte[0]) * ptes);
 	return nvkm_vmm_unref_ptes(it, pfn, ptei, ptes);
 }
 
@@ -445,9 +446,9 @@ nvkm_vmm_ref_hwpt(struct nvkm_vmm_iter *it, struct nvkm_vmm_pt *pgd, u32 pdei)
 		 * the SPTEs on some GPUs.
 		 */
 		for (ptei = pteb = 0; ptei < pten; pteb = ptei) {
-			bool spte = pgt->pte[ptei] & NVKM_VMM_PTE_SPTES;
+			bool spte = !!pgt->pte[ptei].s.sptes;
 			for (ptes = 1, ptei++; ptei < pten; ptes++, ptei++) {
-				bool next = pgt->pte[ptei] & NVKM_VMM_PTE_SPTES;
+				bool next = !!pgt->pte[ptei].s.sptes;
 				if (spte != next)
 					break;
 			}
@@ -461,7 +462,7 @@ nvkm_vmm_ref_hwpt(struct nvkm_vmm_iter *it, struct nvkm_vmm_pt *pgd, u32 pdei)
 			} else {
 				desc->func->unmap(vmm, pt, pteb, ptes);
 				while (ptes--)
-					pgt->pte[pteb++] |= NVKM_VMM_PTE_VALID;
+					pgt->pte[pteb++].s.spte_valid = true;
 			}
 		}
 	} else {
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmm.h b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmm.h
index 4586a425dbe4..a6312a0e6b84 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmm.h
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmm.h
@@ -4,6 +4,15 @@
 #include <core/memory.h>
 enum nvkm_memory_target;
 
+union nvkm_pte_tracker {
+	u8 u;
+	struct {
+		u8 sparse:1;
+		u8 spte_valid:1;
+		u8 sptes:6;
+	} s;
+};
+
 struct nvkm_vmm_pt {
 	/* Some GPUs have a mapping level with a dual page tables to
 	 * support large and small pages in the same address-range.
@@ -44,10 +53,7 @@ struct nvkm_vmm_pt {
 	 *
 	 * This information is used to manage LPTE state transitions.
 	 */
-#define NVKM_VMM_PTE_SPARSE 0x80
-#define NVKM_VMM_PTE_VALID  0x40
-#define NVKM_VMM_PTE_SPTES  0x3f
-	u8 pte[];
+	union nvkm_pte_tracker pte[];
 };
 
 typedef void (*nvkm_vmm_pxe_func)(struct nvkm_vmm *,

From 9dc983a85eb9b546bbe2ef3f2345e338aa655789 Mon Sep 17 00:00:00 2001
From: Dave Airlie <airlied@redhat.com>
Date: Wed, 4 Feb 2026 13:00:06 +1000
Subject: [PATCH 09/12] nouveau/vmm: increase size of vmm pte tracker struct to
 u32 (v2)

We need to tracker large counts of spte than previously due to unref
getting delayed sometimes.

This doesn't fix LPT tracking yet, it just creates space for it.

Reviewed-by: Mary Guillemard <mary@mary.zone>
Tested-by: Mary Guillemard <mary@mary.zone>
Tested-by: Mel Henning <mhenning@darkrefraction.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
Link: https://patch.msgid.link/20260204030208.2313241-3-airlied@gmail.com
---
 drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmm.c | 6 +++---
 drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmm.h | 9 +++++----
 2 files changed, 8 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmm.c b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmm.c
index efc334f6104c..44daeec0aa6d 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmm.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmm.c
@@ -387,7 +387,7 @@ nvkm_vmm_sparse_ptes(const struct nvkm_vmm_desc *desc,
 	} else
 	if (desc->type == LPT) {
 		union nvkm_pte_tracker sparse = { .s.sparse = 1 };
-		memset(&pgt->pte[ptei].u, sparse.u, ptes);
+		memset32(&pgt->pte[ptei].u, sparse.u, ptes);
 	}
 }
 
@@ -399,7 +399,7 @@ nvkm_vmm_sparse_unref_ptes(struct nvkm_vmm_iter *it, bool pfn, u32 ptei, u32 pte
 		memset(&pt->pde[ptei], 0x00, sizeof(pt->pde[0]) * ptes);
 	else
 	if (it->desc->type == LPT)
-		memset(&pt->pte[ptei].u, 0x00, sizeof(pt->pte[0]) * ptes);
+		memset32(&pt->pte[ptei].u, 0x00, ptes);
 	return nvkm_vmm_unref_ptes(it, pfn, ptei, ptes);
 }
 
@@ -458,7 +458,7 @@ nvkm_vmm_ref_hwpt(struct nvkm_vmm_iter *it, struct nvkm_vmm_pt *pgd, u32 pdei)
 					desc->func->sparse(vmm, pt, pteb, ptes);
 				else
 					desc->func->invalid(vmm, pt, pteb, ptes);
-				memset(&pgt->pte[pteb], 0x00, ptes);
+				memset32(&pgt->pte[pteb].u, 0x00, ptes);
 			} else {
 				desc->func->unmap(vmm, pt, pteb, ptes);
 				while (ptes--)
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmm.h b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmm.h
index a6312a0e6b84..a8b08126e8dc 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmm.h
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmm.h
@@ -5,11 +5,12 @@
 enum nvkm_memory_target;
 
 union nvkm_pte_tracker {
-	u8 u;
+	u32 u;
 	struct {
-		u8 sparse:1;
-		u8 spte_valid:1;
-		u8 sptes:6;
+		u32 sparse:1;
+		u32 spte_valid:1;
+		u32 padding:14;
+		u32 sptes:16;
 	} s;
 };
 

From d19512f5abb198daf29da877f6a02c667a95c03d Mon Sep 17 00:00:00 2001
From: Dave Airlie <airlied@redhat.com>
Date: Wed, 4 Feb 2026 13:00:07 +1000
Subject: [PATCH 10/12] nouveau/vmm: start tracking if the LPT PTE is valid.
 (v6)

When NVK enabled large pages userspace tests were seeing fault
reports at a valid address.

There was a case where an address moving from 64k page to 4k pages
could expose a race between unmapping the 4k page, mapping the 64k
page and unref the 4k pages.

Unref 4k pages would cause the dual-page table handling to always
set the LPTE entry to SPARSE or INVALID, but if we'd mapped a valid
LPTE in the meantime, it would get trashed. Keep track of when
a valid LPTE has been referenced, and don't reset in that case.

This adds an lpte valid tracker and lpte reference count.

Whenever an lpte is referenced, it gets made valid and the ref count
increases, whenever it gets unreference the refcount is tracked.

Link: https://gitlab.freedesktop.org/mesa/mesa/-/issues/14610
Reviewed-by: Mary Guillemard <mary@mary.zone>
Tested-by: Mary Guillemard <mary@mary.zone>
Tested-by: Mel Henning <mhenning@darkrefraction.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
Link: https://patch.msgid.link/20260204030208.2313241-4-airlied@gmail.com
---
 drivers/gpu/drm/nouveau/nouveau_drv.h         |  4 +-
 drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmm.c | 39 +++++++++++++++----
 drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmm.h |  3 +-
 3 files changed, 36 insertions(+), 10 deletions(-)

diff --git a/drivers/gpu/drm/nouveau/nouveau_drv.h b/drivers/gpu/drm/nouveau/nouveau_drv.h
index 0e409414f44d..1c2523e2f92e 100644
--- a/drivers/gpu/drm/nouveau/nouveau_drv.h
+++ b/drivers/gpu/drm/nouveau/nouveau_drv.h
@@ -10,7 +10,7 @@
 
 #define DRIVER_MAJOR		1
 #define DRIVER_MINOR		4
-#define DRIVER_PATCHLEVEL	1
+#define DRIVER_PATCHLEVEL	2
 
 /*
  * 1.1.1:
@@ -37,6 +37,8 @@
  *      - implemented limited ABI16/NVIF interop
  * 1.4.1:
  *      - add variable page sizes and compression for Turing+
+ * 1.4.2:
+ *      - tell userspace LPTE/SPTE races are fixed.
  */
 
 #include <linux/notifier.h>
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmm.c b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmm.c
index 44daeec0aa6d..19a7407cf702 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmm.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmm.c
@@ -242,14 +242,17 @@ nvkm_vmm_unref_sptes(struct nvkm_vmm_iter *it, struct nvkm_vmm_pt *pgt,
 		if (pgt->pte[pteb].s.sparse) {
 			TRA(it, "LPTE %05x: U -> S %d PTEs", pteb, ptes);
 			pair->func->sparse(vmm, pgt->pt[0], pteb, ptes);
-		} else
-		if (pair->func->invalid) {
-			/* If the MMU supports it, restore the LPTE to the
-			 * INVALID state to tell the MMU there is no point
-			 * trying to fetch the corresponding SPTEs.
-			 */
-			TRA(it, "LPTE %05x: U -> I %d PTEs", pteb, ptes);
-			pair->func->invalid(vmm, pgt->pt[0], pteb, ptes);
+		} else if (!pgt->pte[pteb].s.lpte_valid) {
+			if (pair->func->invalid) {
+				/* If the MMU supports it, restore the LPTE to the
+				 * INVALID state to tell the MMU there is no point
+				 * trying to fetch the corresponding SPTEs.
+				 */
+				TRA(it, "LPTE %05x: U -> I %d PTEs", pteb, ptes);
+				pair->func->invalid(vmm, pgt->pt[0], pteb, ptes);
+			}
+		} else {
+			TRA(it, "LPTE %05x: V %d PTEs", pteb, ptes);
 		}
 	}
 }
@@ -280,6 +283,15 @@ nvkm_vmm_unref_ptes(struct nvkm_vmm_iter *it, bool pfn, u32 ptei, u32 ptes)
 	if (desc->type == SPT && (pgt->refs[0] || pgt->refs[1]))
 		nvkm_vmm_unref_sptes(it, pgt, desc, ptei, ptes);
 
+	if (desc->type == LPT && (pgt->refs[0] || pgt->refs[1])) {
+		for (u32 lpti = ptei; ptes; lpti++) {
+			pgt->pte[lpti].s.lptes--;
+			if (pgt->pte[lpti].s.lptes == 0)
+				pgt->pte[lpti].s.lpte_valid = false;
+			ptes--;
+		}
+	}
+
 	/* PT no longer needed? Destroy it. */
 	if (!pgt->refs[type]) {
 		it->lvl++;
@@ -332,10 +344,12 @@ nvkm_vmm_ref_sptes(struct nvkm_vmm_iter *it, struct nvkm_vmm_pt *pgt,
 		 * Determine how many LPTEs need to transition state.
 		 */
 		pgt->pte[ptei].s.spte_valid = true;
+		pgt->pte[ptei].s.lpte_valid = false;
 		for (ptes = 1, ptei++; ptei < lpti; ptes++, ptei++) {
 			if (pgt->pte[ptei].s.spte_valid)
 				break;
 			pgt->pte[ptei].s.spte_valid = true;
+			pgt->pte[ptei].s.lpte_valid = false;
 		}
 
 		if (pgt->pte[pteb].s.sparse) {
@@ -374,6 +388,15 @@ nvkm_vmm_ref_ptes(struct nvkm_vmm_iter *it, bool pfn, u32 ptei, u32 ptes)
 	if (desc->type == SPT)
 		nvkm_vmm_ref_sptes(it, pgt, desc, ptei, ptes);
 
+	if (desc->type == LPT) {
+		for (u32 lpti = ptei; ptes; lpti++) {
+			pgt->pte[lpti].s.spte_valid = false;
+			pgt->pte[lpti].s.lpte_valid = true;
+			pgt->pte[lpti].s.lptes++;
+			ptes--;
+		}
+	}
+
 	return true;
 }
 
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmm.h b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmm.h
index a8b08126e8dc..4ec0a3a21169 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmm.h
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmm.h
@@ -9,7 +9,8 @@ union nvkm_pte_tracker {
 	struct {
 		u32 sparse:1;
 		u32 spte_valid:1;
-		u32 padding:14;
+		u32 lpte_valid:1;
+		u32 lptes:13;
 		u32 sptes:16;
 	} s;
 };

From d19d963d2a4acb5bbf03e25733ba565a7f6e1422 Mon Sep 17 00:00:00 2001
From: Lizhi Hou <lizhi.hou@amd.com>
Date: Wed, 4 Feb 2026 09:10:48 -0800
Subject: [PATCH 11/12] accel/amdxdna: Fix incorrect DPM level after
 suspend/resume

The suspend routine sets the DPM level to 0, which unintentionally
overwrites the previously saved DPM level. As a result, the device always
resumes with DPM level 0 instead of restoring the original value.

Fix this by ensuring the suspend path does not overwrite the saved DPM
level, allowing the correct DPM level to be restored during resume.

Fixes: f4d7b8a6bc8c ("accel/amdxdna: Enhance power management settings")
Reviewed-by: Mario Limonciello (AMD) <superm1@kernel.org>
Signed-off-by: Lizhi Hou <lizhi.hou@amd.com>
Link: https://patch.msgid.link/20260204171048.3165580-1-lizhi.hou@amd.com
---
 drivers/accel/amdxdna/aie2_pm.c  | 3 +++
 drivers/accel/amdxdna/aie2_smu.c | 2 --
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/accel/amdxdna/aie2_pm.c b/drivers/accel/amdxdna/aie2_pm.c
index afcd6d4683e5..579b8be13b18 100644
--- a/drivers/accel/amdxdna/aie2_pm.c
+++ b/drivers/accel/amdxdna/aie2_pm.c
@@ -36,6 +36,8 @@ int aie2_pm_set_dpm(struct amdxdna_dev_hdl *ndev, u32 dpm_level)
 		return ret;
 
 	ret = ndev->priv->hw_ops.set_dpm(ndev, dpm_level);
+	if (!ret)
+		ndev->dpm_level = dpm_level;
 	amdxdna_pm_suspend_put(ndev->xdna);
 
 	return ret;
@@ -65,6 +67,7 @@ int aie2_pm_init(struct amdxdna_dev_hdl *ndev)
 	ret = ndev->priv->hw_ops.set_dpm(ndev, ndev->max_dpm_level);
 	if (ret)
 		return ret;
+	ndev->dpm_level = ndev->max_dpm_level;
 
 	ret = aie2_pm_set_clk_gating(ndev, AIE2_CLK_GATING_ENABLE);
 	if (ret)
diff --git a/drivers/accel/amdxdna/aie2_smu.c b/drivers/accel/amdxdna/aie2_smu.c
index 2d195e41f83d..d8c31924e501 100644
--- a/drivers/accel/amdxdna/aie2_smu.c
+++ b/drivers/accel/amdxdna/aie2_smu.c
@@ -84,7 +84,6 @@ int npu1_set_dpm(struct amdxdna_dev_hdl *ndev, u32 dpm_level)
 	}
 
 	ndev->hclk_freq = freq;
-	ndev->dpm_level = dpm_level;
 	ndev->max_tops = 2 * ndev->total_col;
 	ndev->curr_tops = ndev->max_tops * freq / 1028;
 
@@ -114,7 +113,6 @@ int npu4_set_dpm(struct amdxdna_dev_hdl *ndev, u32 dpm_level)
 
 	ndev->npuclk_freq = ndev->priv->dpm_clk_tbl[dpm_level].npuclk;
 	ndev->hclk_freq = ndev->priv->dpm_clk_tbl[dpm_level].hclk;
-	ndev->dpm_level = dpm_level;
 	ndev->max_tops = NPU4_DPM_TOPS(ndev, ndev->max_dpm_level);
 	ndev->curr_tops = NPU4_DPM_TOPS(ndev, dpm_level);
 

From 69674c1c704c0199ca7a3947f3cdcd575973175d Mon Sep 17 00:00:00 2001
From: Lizhi Hou <lizhi.hou@amd.com>
Date: Wed, 4 Feb 2026 09:11:17 -0800
Subject: [PATCH 12/12] accel/amdxdna: Move RPM resume into job run function

Currently, amdxdna_pm_resume_get() is called during job creation, and
amdxdna_pm_suspend_put() is called when the hardware notifies job
completion. If a job is canceled before it is run, no hardware
completion notification is generated, resulting in an unbalanced
runtime PM resume/suspend pair.

Fix this by moving amdxdna_pm_resume_get() to the job run path, ensuring
runtime PM is only resumed for jobs that are actually executed.

Fixes: 063db451832b ("accel/amdxdna: Enhance runtime power management")
Reviewed-by: Mario Limonciello (AMD) <superm1@kernel.org>
Signed-off-by: Lizhi Hou <lizhi.hou@amd.com>
Link: https://patch.msgid.link/20260204171118.3165607-1-lizhi.hou@amd.com
---
 drivers/accel/amdxdna/aie2_ctx.c | 19 +++++++++----------
 1 file changed, 9 insertions(+), 10 deletions(-)

diff --git a/drivers/accel/amdxdna/aie2_ctx.c b/drivers/accel/amdxdna/aie2_ctx.c
index fe8f9783a73c..37d05f2e986f 100644
--- a/drivers/accel/amdxdna/aie2_ctx.c
+++ b/drivers/accel/amdxdna/aie2_ctx.c
@@ -306,6 +306,10 @@ aie2_sched_job_run(struct drm_sched_job *sched_job)
 	kref_get(&job->refcnt);
 	fence = dma_fence_get(job->fence);
 
+	ret = amdxdna_pm_resume_get(hwctx->client->xdna);
+	if (ret)
+		goto out;
+
 	if (job->drv_cmd) {
 		switch (job->drv_cmd->opcode) {
 		case SYNC_DEBUG_BO:
@@ -332,6 +336,7 @@ aie2_sched_job_run(struct drm_sched_job *sched_job)
 
 out:
 	if (ret) {
+		amdxdna_pm_suspend_put(hwctx->client->xdna);
 		dma_fence_put(job->fence);
 		aie2_job_put(job);
 		mmput(job->mm);
@@ -988,15 +993,11 @@ int aie2_cmd_submit(struct amdxdna_hwctx *hwctx, struct amdxdna_sched_job *job,
 		goto free_chain;
 	}
 
-	ret = amdxdna_pm_resume_get(xdna);
-	if (ret)
-		goto cleanup_job;
-
 retry:
 	ret = drm_gem_lock_reservations(job->bos, job->bo_cnt, &acquire_ctx);
 	if (ret) {
 		XDNA_WARN(xdna, "Failed to lock BOs, ret %d", ret);
-		goto suspend_put;
+		goto cleanup_job;
 	}
 
 	for (i = 0; i < job->bo_cnt; i++) {
@@ -1004,7 +1005,7 @@ int aie2_cmd_submit(struct amdxdna_hwctx *hwctx, struct amdxdna_sched_job *job,
 		if (ret) {
 			XDNA_WARN(xdna, "Failed to reserve fences %d", ret);
 			drm_gem_unlock_reservations(job->bos, job->bo_cnt, &acquire_ctx);
-			goto suspend_put;
+			goto cleanup_job;
 		}
 	}
 
@@ -1019,12 +1020,12 @@ int aie2_cmd_submit(struct amdxdna_hwctx *hwctx, struct amdxdna_sched_job *job,
 					msecs_to_jiffies(HMM_RANGE_DEFAULT_TIMEOUT);
 			} else if (time_after(jiffies, timeout)) {
 				ret = -ETIME;
-				goto suspend_put;
+				goto cleanup_job;
 			}
 
 			ret = aie2_populate_range(abo);
 			if (ret)
-				goto suspend_put;
+				goto cleanup_job;
 			goto retry;
 		}
 	}
@@ -1050,8 +1051,6 @@ int aie2_cmd_submit(struct amdxdna_hwctx *hwctx, struct amdxdna_sched_job *job,
 
 	return 0;
 
-suspend_put:
-	amdxdna_pm_suspend_put(xdna);
 cleanup_job:
 	drm_sched_job_cleanup(&job->base);
 free_chain: