From 9d7a1cbebbb691891671def57407ba2f8ee914e8 Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Thu, 31 Jul 2025 10:38:09 +0100 Subject: [PATCH 1/6] drm/xe/migrate: prevent infinite recursion If the buf + offset is not aligned to XE_CAHELINE_BYTES we fallback to using a bounce buffer. However the bounce buffer here is allocated on the stack, and the only alignment requirement here is that it's naturally aligned to u8, and not XE_CACHELINE_BYTES. If the bounce buffer is also misaligned we then recurse back into the function again, however the new bounce buffer might also not be aligned, and might never be until we eventually blow through the stack, as we keep recursing. Instead of using the stack use kmalloc, which should respect the power-of-two alignment request here. Fixes a kernel panic when triggering this path through eudebug. v2 (Stuart): - Add build bug check for power-of-two restriction - s/EINVAL/ENOMEM/ Fixes: 270172f64b11 ("drm/xe: Update xe_ttm_access_memory to use GPU for non-visible access") Signed-off-by: Matthew Auld Cc: Maciej Patelczyk Cc: Stuart Summers Cc: Matthew Brost Reviewed-by: Stuart Summers Link: https://lore.kernel.org/r/20250731093807.207572-6-matthew.auld@intel.com (cherry picked from commit 38b34e928a08ba594c4bbf7118aa3aadacd62fff) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_migrate.c | 29 +++++++++++++++++------------ 1 file changed, 17 insertions(+), 12 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c index ba1cff2e4cda..6193e2ca3741 100644 --- a/drivers/gpu/drm/xe/xe_migrate.c +++ b/drivers/gpu/drm/xe/xe_migrate.c @@ -1820,15 +1820,19 @@ int xe_migrate_access_memory(struct xe_migrate *m, struct xe_bo *bo, if (!IS_ALIGNED(len, XE_CACHELINE_BYTES) || !IS_ALIGNED((unsigned long)buf + offset, XE_CACHELINE_BYTES)) { int buf_offset = 0; + void *bounce; + int err; + + BUILD_BUG_ON(!is_power_of_2(XE_CACHELINE_BYTES)); + bounce = kmalloc(XE_CACHELINE_BYTES, GFP_KERNEL); + if (!bounce) + return -ENOMEM; /* * Less than ideal for large unaligned access but this should be * fairly rare, can fixup if this becomes common. */ do { - u8 bounce[XE_CACHELINE_BYTES]; - void *ptr = (void *)bounce; - int err; int copy_bytes = min_t(int, bytes_left, XE_CACHELINE_BYTES - (offset & XE_CACHELINE_MASK)); @@ -1837,22 +1841,22 @@ int xe_migrate_access_memory(struct xe_migrate *m, struct xe_bo *bo, err = xe_migrate_access_memory(m, bo, offset & ~XE_CACHELINE_MASK, - (void *)ptr, - sizeof(bounce), 0); + bounce, + XE_CACHELINE_BYTES, 0); if (err) - return err; + break; if (write) { - memcpy(ptr + ptr_offset, buf + buf_offset, copy_bytes); + memcpy(bounce + ptr_offset, buf + buf_offset, copy_bytes); err = xe_migrate_access_memory(m, bo, offset & ~XE_CACHELINE_MASK, - (void *)ptr, - sizeof(bounce), write); + bounce, + XE_CACHELINE_BYTES, write); if (err) - return err; + break; } else { - memcpy(buf + buf_offset, ptr + ptr_offset, + memcpy(buf + buf_offset, bounce + ptr_offset, copy_bytes); } @@ -1861,7 +1865,8 @@ int xe_migrate_access_memory(struct xe_migrate *m, struct xe_bo *bo, offset += copy_bytes; } while (bytes_left); - return 0; + kfree(bounce); + return err; } dma_addr = xe_migrate_dma_map(xe, buf, len + page_offset, write); From 4126cb327a2e3273c81fcef1c594c5b7b645c44c Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Thu, 31 Jul 2025 10:38:10 +0100 Subject: [PATCH 2/6] drm/xe/migrate: don't overflow max copy size With non-page aligned copy, we need to use 4 byte aligned pitch, however the size itself might still be close to our maximum of ~8M, and so the dimensions of the copy can easily exceed the S16_MAX limit of the copy command leading to the following assert: xe 0000:03:00.0: [drm] Assertion `size / pitch <= ((s16)(((u16)~0U) >> 1))` failed! platform: BATTLEMAGE subplatform: 1 graphics: Xe2_HPG 20.01 step A0 media: Xe2_HPM 13.01 step A1 tile: 0 VRAM 10.0 GiB GT: 0 type 1 WARNING: CPU: 23 PID: 10605 at drivers/gpu/drm/xe/xe_migrate.c:673 emit_copy+0x4b5/0x4e0 [xe] To fix this account for the pitch when calculating the number of current bytes to copy. Fixes: 270172f64b11 ("drm/xe: Update xe_ttm_access_memory to use GPU for non-visible access") Signed-off-by: Matthew Auld Cc: Maciej Patelczyk Cc: Matthew Brost Reviewed-by: Stuart Summers Link: https://lore.kernel.org/r/20250731093807.207572-7-matthew.auld@intel.com (cherry picked from commit 8c2d61e0e916e077fda7e7b8e67f25ffe0f361fc) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_migrate.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c index 6193e2ca3741..95bcaa427d26 100644 --- a/drivers/gpu/drm/xe/xe_migrate.c +++ b/drivers/gpu/drm/xe/xe_migrate.c @@ -1887,6 +1887,12 @@ int xe_migrate_access_memory(struct xe_migrate *m, struct xe_bo *bo, else current_bytes = min_t(int, bytes_left, cursor.size); + if (current_bytes & ~PAGE_MASK) { + int pitch = 4; + + current_bytes = min_t(int, current_bytes, S16_MAX * pitch); + } + if (fence) dma_fence_put(fence); From 145832fbdd17b1d77ffd6cdd1642259e101d1b7e Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Thu, 31 Jul 2025 10:38:11 +0100 Subject: [PATCH 3/6] drm/xe/migrate: prevent potential UAF If we hit the error path, the previous fence (if there is one) has already been put() prior to this, so doing a fence_wait could lead to UAF. Tweak the flow to do to the put() until after we do the wait. Fixes: 270172f64b11 ("drm/xe: Update xe_ttm_access_memory to use GPU for non-visible access") Signed-off-by: Matthew Auld Cc: Maciej Patelczyk Cc: Matthew Brost Reviewed-by: Stuart Summers Link: https://lore.kernel.org/r/20250731093807.207572-8-matthew.auld@intel.com (cherry picked from commit 9b7ca35ed28fe5fad86e9d9c24ebd1271e4c9c3e) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_migrate.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c index 95bcaa427d26..7d20ac4bb633 100644 --- a/drivers/gpu/drm/xe/xe_migrate.c +++ b/drivers/gpu/drm/xe/xe_migrate.c @@ -1893,9 +1893,6 @@ int xe_migrate_access_memory(struct xe_migrate *m, struct xe_bo *bo, current_bytes = min_t(int, current_bytes, S16_MAX * pitch); } - if (fence) - dma_fence_put(fence); - __fence = xe_migrate_vram(m, current_bytes, (unsigned long)buf & ~PAGE_MASK, dma_addr + current_page, @@ -1903,11 +1900,15 @@ int xe_migrate_access_memory(struct xe_migrate *m, struct xe_bo *bo, XE_MIGRATE_COPY_TO_VRAM : XE_MIGRATE_COPY_TO_SRAM); if (IS_ERR(__fence)) { - if (fence) + if (fence) { dma_fence_wait(fence, false); + dma_fence_put(fence); + } fence = __fence; goto out_err; } + + dma_fence_put(fence); fence = __fence; buf += current_bytes; From 2dd7a47669ae6c1da18c55f8e89c4a44418c7006 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Hellstr=C3=B6m?= Date: Tue, 5 Aug 2025 09:48:42 +0200 Subject: [PATCH 4/6] drm/xe: Defer buffer object shrinker write-backs and GPU waits MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When the xe buffer-object shrinker allows GPU waits and write-back, (typically from kswapd), perform multiple passes, skipping subsequent passes if the shrinker number of scanned objects target is reached. 1) Without GPU waits and write-back 2) Without write-back 3) With both GPU-waits and write-back This is to avoid stalls and costly write- and readbacks unless they are really necessary. v2: - Don't test for scan completion twice. (Stuart Summers) - Update tags. Reported-by: melvyn Closes: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/5557 Cc: Summers Stuart Fixes: 00c8efc3180f ("drm/xe: Add a shrinker for xe bos") Cc: # v6.15+ Signed-off-by: Thomas Hellström Reviewed-by: Stuart Summers Link: https://lore.kernel.org/r/20250805074842.11359-1-thomas.hellstrom@linux.intel.com (cherry picked from commit 80944d334182ce5eb27d00e2bf20a88bfc32dea1) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_shrinker.c | 51 +++++++++++++++++++++++++++++--- 1 file changed, 47 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_shrinker.c b/drivers/gpu/drm/xe/xe_shrinker.c index 1c3c04d52f55..90244fe59b59 100644 --- a/drivers/gpu/drm/xe/xe_shrinker.c +++ b/drivers/gpu/drm/xe/xe_shrinker.c @@ -54,10 +54,10 @@ xe_shrinker_mod_pages(struct xe_shrinker *shrinker, long shrinkable, long purgea write_unlock(&shrinker->lock); } -static s64 xe_shrinker_walk(struct xe_device *xe, - struct ttm_operation_ctx *ctx, - const struct xe_bo_shrink_flags flags, - unsigned long to_scan, unsigned long *scanned) +static s64 __xe_shrinker_walk(struct xe_device *xe, + struct ttm_operation_ctx *ctx, + const struct xe_bo_shrink_flags flags, + unsigned long to_scan, unsigned long *scanned) { unsigned int mem_type; s64 freed = 0, lret; @@ -93,6 +93,48 @@ static s64 xe_shrinker_walk(struct xe_device *xe, return freed; } +/* + * Try shrinking idle objects without writeback first, then if not sufficient, + * try also non-idle objects and finally if that's not sufficient either, + * add writeback. This avoids stalls and explicit writebacks with light or + * moderate memory pressure. + */ +static s64 xe_shrinker_walk(struct xe_device *xe, + struct ttm_operation_ctx *ctx, + const struct xe_bo_shrink_flags flags, + unsigned long to_scan, unsigned long *scanned) +{ + bool no_wait_gpu = true; + struct xe_bo_shrink_flags save_flags = flags; + s64 lret, freed; + + swap(no_wait_gpu, ctx->no_wait_gpu); + save_flags.writeback = false; + lret = __xe_shrinker_walk(xe, ctx, save_flags, to_scan, scanned); + swap(no_wait_gpu, ctx->no_wait_gpu); + if (lret < 0 || *scanned >= to_scan) + return lret; + + freed = lret; + if (!ctx->no_wait_gpu) { + lret = __xe_shrinker_walk(xe, ctx, save_flags, to_scan, scanned); + if (lret < 0) + return lret; + freed += lret; + if (*scanned >= to_scan) + return freed; + } + + if (flags.writeback) { + lret = __xe_shrinker_walk(xe, ctx, flags, to_scan, scanned); + if (lret < 0) + return lret; + freed += lret; + } + + return freed; +} + static unsigned long xe_shrinker_count(struct shrinker *shrink, struct shrink_control *sc) { @@ -199,6 +241,7 @@ static unsigned long xe_shrinker_scan(struct shrinker *shrink, struct shrink_con runtime_pm = xe_shrinker_runtime_pm_get(shrinker, true, 0, can_backup); shrink_flags.purge = false; + lret = xe_shrinker_walk(shrinker->xe, &ctx, shrink_flags, nr_to_scan, &nr_scanned); if (lret >= 0) From 55d49f06162e45686399df4ae6292167f0deab7c Mon Sep 17 00:00:00 2001 From: Karthik Poosa Date: Sat, 9 Aug 2025 00:23:10 +0530 Subject: [PATCH 5/6] drm/xe/hwmon: Add SW clamp for power limits writes Clamp writes to power limits powerX_crit/currX_crit, powerX_cap, powerX_max, to the maximum supported by the pcode mailbox when sysfs-provided values exceed this limit. Although the pcode already performs clamping, values beyond the pcode mailbox's supported range get truncated, leading to incorrect critical power settings. This patch ensures proper clamping to prevent such truncation. v2: - Address below review comments. (Riana) - Split comments into multiple sentences. - Use local variables for readability. - Add a debug log. - Use u64 instead of unsigned long. v3: - Change drm_dbg logs to drm_info. (Badal) v4: - Rephrase the drm_info log. (Rodrigo, Riana) - Rename variable max_mbx_power_limit to max_supp_power_limit, as limit is same for platforms with and without mailbox power limit support. Signed-off-by: Karthik Poosa Fixes: 92d44a422d0d ("drm/xe/hwmon: Expose card reactive critical power") Fixes: fb1b70607f73 ("drm/xe/hwmon: Expose power attributes") Reviewed-by: Riana Tauro Link: https://lore.kernel.org/r/20250808185310.3466529-1-karthik.poosa@intel.com Signed-off-by: Rodrigo Vivi (cherry picked from commit d301eb950da59f962bafe874cf5eb6d61a85b2c2) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_hwmon.c | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/drivers/gpu/drm/xe/xe_hwmon.c b/drivers/gpu/drm/xe/xe_hwmon.c index f08fc4377d25..c17ed1ae8649 100644 --- a/drivers/gpu/drm/xe/xe_hwmon.c +++ b/drivers/gpu/drm/xe/xe_hwmon.c @@ -332,6 +332,7 @@ static int xe_hwmon_power_max_write(struct xe_hwmon *hwmon, u32 attr, int channe int ret = 0; u32 reg_val, max; struct xe_reg rapl_limit; + u64 max_supp_power_limit = 0; mutex_lock(&hwmon->hwmon_lock); @@ -356,6 +357,20 @@ static int xe_hwmon_power_max_write(struct xe_hwmon *hwmon, u32 attr, int channe goto unlock; } + /* + * If the sysfs value exceeds the maximum pcode supported power limit value, clamp it to + * the supported maximum (U12.3 format). + * This is to avoid truncation during reg_val calculation below and ensure the valid + * power limit is sent for pcode which would clamp it to card-supported value. + */ + max_supp_power_limit = ((PWR_LIM_VAL) >> hwmon->scl_shift_power) * SF_POWER; + if (value > max_supp_power_limit) { + value = max_supp_power_limit; + drm_info(&hwmon->xe->drm, + "Power limit clamped as selected %s exceeds channel %d limit\n", + PWR_ATTR_TO_STR(attr), channel); + } + /* Computation in 64-bits to avoid overflow. Round to nearest. */ reg_val = DIV_ROUND_CLOSEST_ULL((u64)value << hwmon->scl_shift_power, SF_POWER); @@ -739,9 +754,23 @@ static int xe_hwmon_power_curr_crit_write(struct xe_hwmon *hwmon, int channel, { int ret; u32 uval; + u64 max_crit_power_curr = 0; mutex_lock(&hwmon->hwmon_lock); + /* + * If the sysfs value exceeds the pcode mailbox cmd POWER_SETUP_SUBCOMMAND_WRITE_I1 + * max supported value, clamp it to the command's max (U10.6 format). + * This is to avoid truncation during uval calculation below and ensure the valid power + * limit is sent for pcode which would clamp it to card-supported value. + */ + max_crit_power_curr = (POWER_SETUP_I1_DATA_MASK >> POWER_SETUP_I1_SHIFT) * scale_factor; + if (value > max_crit_power_curr) { + value = max_crit_power_curr; + drm_info(&hwmon->xe->drm, + "Power limit clamped as selected exceeds channel %d limit\n", + channel); + } uval = DIV_ROUND_CLOSEST_ULL(value << POWER_SETUP_I1_SHIFT, scale_factor); ret = xe_hwmon_pcode_write_i1(hwmon, uval); From 94eae6ee4c2df2031bca586405e9ec36e0b9ccf8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Winiarski?= Date: Tue, 27 May 2025 14:06:37 +0200 Subject: [PATCH 6/6] drm/xe/pf: Set VF LMEM BAR size MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit LMEM is partitioned between multiple VFs and we expect that the more VFs we have, the less LMEM is assigned to each VF. This means that we can achieve full LMEM BAR access without the need to attempt full VF LMEM BAR resize via pci_resize_resource(). Always try to set the largest possible BAR size that allows to fit the number of enabled VFs and inform the user in case the resize attempt is not successful. Signed-off-by: Michał Winiarski Reviewed-by: Ilpo Järvinen Link: https://lore.kernel.org/r/20250527120637.665506-7-michal.winiarski@intel.com Signed-off-by: Lucas De Marchi (cherry picked from commit 32a4d1b98e6663101fd0abfaf151c48feea7abb1) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/regs/xe_bars.h | 1 + drivers/gpu/drm/xe/xe_pci_sriov.c | 22 ++++++++++++++++++++++ 2 files changed, 23 insertions(+) diff --git a/drivers/gpu/drm/xe/regs/xe_bars.h b/drivers/gpu/drm/xe/regs/xe_bars.h index ce05b6ae832f..880140d6ccdc 100644 --- a/drivers/gpu/drm/xe/regs/xe_bars.h +++ b/drivers/gpu/drm/xe/regs/xe_bars.h @@ -7,5 +7,6 @@ #define GTTMMADR_BAR 0 /* MMIO + GTT */ #define LMEM_BAR 2 /* VRAM */ +#define VF_LMEM_BAR 9 /* VF VRAM */ #endif diff --git a/drivers/gpu/drm/xe/xe_pci_sriov.c b/drivers/gpu/drm/xe/xe_pci_sriov.c index 447a7867eecb..af05db07162e 100644 --- a/drivers/gpu/drm/xe/xe_pci_sriov.c +++ b/drivers/gpu/drm/xe/xe_pci_sriov.c @@ -3,6 +3,10 @@ * Copyright © 2023-2024 Intel Corporation */ +#include +#include + +#include "regs/xe_bars.h" #include "xe_assert.h" #include "xe_device.h" #include "xe_gt_sriov_pf_config.h" @@ -128,6 +132,18 @@ static void pf_engine_activity_stats(struct xe_device *xe, unsigned int num_vfs, } } +static int resize_vf_vram_bar(struct xe_device *xe, int num_vfs) +{ + struct pci_dev *pdev = to_pci_dev(xe->drm.dev); + u32 sizes; + + sizes = pci_iov_vf_bar_get_sizes(pdev, VF_LMEM_BAR, num_vfs); + if (!sizes) + return 0; + + return pci_iov_vf_bar_set_size(pdev, VF_LMEM_BAR, __fls(sizes)); +} + static int pf_enable_vfs(struct xe_device *xe, int num_vfs) { struct pci_dev *pdev = to_pci_dev(xe->drm.dev); @@ -158,6 +174,12 @@ static int pf_enable_vfs(struct xe_device *xe, int num_vfs) if (err < 0) goto failed; + if (IS_DGFX(xe)) { + err = resize_vf_vram_bar(xe, num_vfs); + if (err) + xe_sriov_info(xe, "Failed to set VF LMEM BAR size: %d\n", err); + } + err = pci_enable_sriov(pdev, num_vfs); if (err < 0) goto failed;