- Some more xe_migrate_access_memory fixes (Auld)

- Defer buffer object shrinker write-backs and GPU waits (Thomas)
 - HWMON fix for clamping limits (Karthik)
 - SRIOV-PF: Set VF LMEM BAR size (Michal)
 -----BEGIN PGP SIGNATURE-----
 
 iQEzBAABCgAdFiEEbSBwaO7dZQkcLOKj+mJfZA7rE8oFAmieCm0ACgkQ+mJfZA7r
 E8of7wf/U55dzKcOu9i4mTYZIGd3cqfbWRuDM5FSJpuXang7cRRZDr6RzSes4XKH
 ary751s97gUIzfiUCzNXCY+1ACQqN2qNUzJsrcAYVC5ertvrgmRLtTbczLSWj4Ey
 vh/ctCns4bxb955pTyi7xg8mAPBUN1OHckdJrrcQyatOaEVeB88N5Z67bGJ66s89
 sb5Nbnd0MImlX90/Ojd4TvfXfFkUwP0JII3+4XeTktNPw3ECMszEHHNV8Uk4LH+d
 8o0qG2f/WMul/c8eVVdcnCpYf42ZMQWithwh1f0eld0tDxQXgrFOg1HPV7cVGOEI
 y/znB2zWiluJ0/21XYyar3CazD38jA==
 =m/qf
 -----END PGP SIGNATURE-----

Merge tag 'drm-xe-fixes-2025-08-14' of https://gitlab.freedesktop.org/drm/xe/kernel into drm-fixes

- Some more xe_migrate_access_memory fixes (Auld)
- Defer buffer object shrinker write-backs and GPU waits (Thomas)
- HWMON fix for clamping limits (Karthik)
- SRIOV-PF: Set VF LMEM BAR size (Michal)

Signed-off-by: Dave Airlie <airlied@redhat.com>

From: Rodrigo Vivi <rodrigo.vivi@intel.com>
Link: https://lore.kernel.org/r/aJ4MIZQurSo0uNxn@intel.com
This commit is contained in:
Dave Airlie 2025-08-15 09:50:17 +10:00
commit 00062ea01d
5 changed files with 126 additions and 19 deletions

View File

@ -7,5 +7,6 @@
#define GTTMMADR_BAR 0 /* MMIO + GTT */
#define LMEM_BAR 2 /* VRAM */
#define VF_LMEM_BAR 9 /* VF VRAM */
#endif

View File

@ -332,6 +332,7 @@ static int xe_hwmon_power_max_write(struct xe_hwmon *hwmon, u32 attr, int channe
int ret = 0;
u32 reg_val, max;
struct xe_reg rapl_limit;
u64 max_supp_power_limit = 0;
mutex_lock(&hwmon->hwmon_lock);
@ -356,6 +357,20 @@ static int xe_hwmon_power_max_write(struct xe_hwmon *hwmon, u32 attr, int channe
goto unlock;
}
/*
* If the sysfs value exceeds the maximum pcode supported power limit value, clamp it to
* the supported maximum (U12.3 format).
* This is to avoid truncation during reg_val calculation below and ensure the valid
* power limit is sent for pcode which would clamp it to card-supported value.
*/
max_supp_power_limit = ((PWR_LIM_VAL) >> hwmon->scl_shift_power) * SF_POWER;
if (value > max_supp_power_limit) {
value = max_supp_power_limit;
drm_info(&hwmon->xe->drm,
"Power limit clamped as selected %s exceeds channel %d limit\n",
PWR_ATTR_TO_STR(attr), channel);
}
/* Computation in 64-bits to avoid overflow. Round to nearest. */
reg_val = DIV_ROUND_CLOSEST_ULL((u64)value << hwmon->scl_shift_power, SF_POWER);
@ -739,9 +754,23 @@ static int xe_hwmon_power_curr_crit_write(struct xe_hwmon *hwmon, int channel,
{
int ret;
u32 uval;
u64 max_crit_power_curr = 0;
mutex_lock(&hwmon->hwmon_lock);
/*
* If the sysfs value exceeds the pcode mailbox cmd POWER_SETUP_SUBCOMMAND_WRITE_I1
* max supported value, clamp it to the command's max (U10.6 format).
* This is to avoid truncation during uval calculation below and ensure the valid power
* limit is sent for pcode which would clamp it to card-supported value.
*/
max_crit_power_curr = (POWER_SETUP_I1_DATA_MASK >> POWER_SETUP_I1_SHIFT) * scale_factor;
if (value > max_crit_power_curr) {
value = max_crit_power_curr;
drm_info(&hwmon->xe->drm,
"Power limit clamped as selected exceeds channel %d limit\n",
channel);
}
uval = DIV_ROUND_CLOSEST_ULL(value << POWER_SETUP_I1_SHIFT, scale_factor);
ret = xe_hwmon_pcode_write_i1(hwmon, uval);

View File

@ -1820,15 +1820,19 @@ int xe_migrate_access_memory(struct xe_migrate *m, struct xe_bo *bo,
if (!IS_ALIGNED(len, XE_CACHELINE_BYTES) ||
!IS_ALIGNED((unsigned long)buf + offset, XE_CACHELINE_BYTES)) {
int buf_offset = 0;
void *bounce;
int err;
BUILD_BUG_ON(!is_power_of_2(XE_CACHELINE_BYTES));
bounce = kmalloc(XE_CACHELINE_BYTES, GFP_KERNEL);
if (!bounce)
return -ENOMEM;
/*
* Less than ideal for large unaligned access but this should be
* fairly rare, can fixup if this becomes common.
*/
do {
u8 bounce[XE_CACHELINE_BYTES];
void *ptr = (void *)bounce;
int err;
int copy_bytes = min_t(int, bytes_left,
XE_CACHELINE_BYTES -
(offset & XE_CACHELINE_MASK));
@ -1837,22 +1841,22 @@ int xe_migrate_access_memory(struct xe_migrate *m, struct xe_bo *bo,
err = xe_migrate_access_memory(m, bo,
offset &
~XE_CACHELINE_MASK,
(void *)ptr,
sizeof(bounce), 0);
bounce,
XE_CACHELINE_BYTES, 0);
if (err)
return err;
break;
if (write) {
memcpy(ptr + ptr_offset, buf + buf_offset, copy_bytes);
memcpy(bounce + ptr_offset, buf + buf_offset, copy_bytes);
err = xe_migrate_access_memory(m, bo,
offset & ~XE_CACHELINE_MASK,
(void *)ptr,
sizeof(bounce), write);
bounce,
XE_CACHELINE_BYTES, write);
if (err)
return err;
break;
} else {
memcpy(buf + buf_offset, ptr + ptr_offset,
memcpy(buf + buf_offset, bounce + ptr_offset,
copy_bytes);
}
@ -1861,7 +1865,8 @@ int xe_migrate_access_memory(struct xe_migrate *m, struct xe_bo *bo,
offset += copy_bytes;
} while (bytes_left);
return 0;
kfree(bounce);
return err;
}
dma_addr = xe_migrate_dma_map(xe, buf, len + page_offset, write);
@ -1882,8 +1887,11 @@ int xe_migrate_access_memory(struct xe_migrate *m, struct xe_bo *bo,
else
current_bytes = min_t(int, bytes_left, cursor.size);
if (fence)
dma_fence_put(fence);
if (current_bytes & ~PAGE_MASK) {
int pitch = 4;
current_bytes = min_t(int, current_bytes, S16_MAX * pitch);
}
__fence = xe_migrate_vram(m, current_bytes,
(unsigned long)buf & ~PAGE_MASK,
@ -1892,11 +1900,15 @@ int xe_migrate_access_memory(struct xe_migrate *m, struct xe_bo *bo,
XE_MIGRATE_COPY_TO_VRAM :
XE_MIGRATE_COPY_TO_SRAM);
if (IS_ERR(__fence)) {
if (fence)
if (fence) {
dma_fence_wait(fence, false);
dma_fence_put(fence);
}
fence = __fence;
goto out_err;
}
dma_fence_put(fence);
fence = __fence;
buf += current_bytes;

View File

@ -3,6 +3,10 @@
* Copyright © 2023-2024 Intel Corporation
*/
#include <linux/bitops.h>
#include <linux/pci.h>
#include "regs/xe_bars.h"
#include "xe_assert.h"
#include "xe_device.h"
#include "xe_gt_sriov_pf_config.h"
@ -128,6 +132,18 @@ static void pf_engine_activity_stats(struct xe_device *xe, unsigned int num_vfs,
}
}
static int resize_vf_vram_bar(struct xe_device *xe, int num_vfs)
{
struct pci_dev *pdev = to_pci_dev(xe->drm.dev);
u32 sizes;
sizes = pci_iov_vf_bar_get_sizes(pdev, VF_LMEM_BAR, num_vfs);
if (!sizes)
return 0;
return pci_iov_vf_bar_set_size(pdev, VF_LMEM_BAR, __fls(sizes));
}
static int pf_enable_vfs(struct xe_device *xe, int num_vfs)
{
struct pci_dev *pdev = to_pci_dev(xe->drm.dev);
@ -158,6 +174,12 @@ static int pf_enable_vfs(struct xe_device *xe, int num_vfs)
if (err < 0)
goto failed;
if (IS_DGFX(xe)) {
err = resize_vf_vram_bar(xe, num_vfs);
if (err)
xe_sriov_info(xe, "Failed to set VF LMEM BAR size: %d\n", err);
}
err = pci_enable_sriov(pdev, num_vfs);
if (err < 0)
goto failed;

View File

@ -54,10 +54,10 @@ xe_shrinker_mod_pages(struct xe_shrinker *shrinker, long shrinkable, long purgea
write_unlock(&shrinker->lock);
}
static s64 xe_shrinker_walk(struct xe_device *xe,
struct ttm_operation_ctx *ctx,
const struct xe_bo_shrink_flags flags,
unsigned long to_scan, unsigned long *scanned)
static s64 __xe_shrinker_walk(struct xe_device *xe,
struct ttm_operation_ctx *ctx,
const struct xe_bo_shrink_flags flags,
unsigned long to_scan, unsigned long *scanned)
{
unsigned int mem_type;
s64 freed = 0, lret;
@ -93,6 +93,48 @@ static s64 xe_shrinker_walk(struct xe_device *xe,
return freed;
}
/*
* Try shrinking idle objects without writeback first, then if not sufficient,
* try also non-idle objects and finally if that's not sufficient either,
* add writeback. This avoids stalls and explicit writebacks with light or
* moderate memory pressure.
*/
static s64 xe_shrinker_walk(struct xe_device *xe,
struct ttm_operation_ctx *ctx,
const struct xe_bo_shrink_flags flags,
unsigned long to_scan, unsigned long *scanned)
{
bool no_wait_gpu = true;
struct xe_bo_shrink_flags save_flags = flags;
s64 lret, freed;
swap(no_wait_gpu, ctx->no_wait_gpu);
save_flags.writeback = false;
lret = __xe_shrinker_walk(xe, ctx, save_flags, to_scan, scanned);
swap(no_wait_gpu, ctx->no_wait_gpu);
if (lret < 0 || *scanned >= to_scan)
return lret;
freed = lret;
if (!ctx->no_wait_gpu) {
lret = __xe_shrinker_walk(xe, ctx, save_flags, to_scan, scanned);
if (lret < 0)
return lret;
freed += lret;
if (*scanned >= to_scan)
return freed;
}
if (flags.writeback) {
lret = __xe_shrinker_walk(xe, ctx, flags, to_scan, scanned);
if (lret < 0)
return lret;
freed += lret;
}
return freed;
}
static unsigned long
xe_shrinker_count(struct shrinker *shrink, struct shrink_control *sc)
{
@ -199,6 +241,7 @@ static unsigned long xe_shrinker_scan(struct shrinker *shrink, struct shrink_con
runtime_pm = xe_shrinker_runtime_pm_get(shrinker, true, 0, can_backup);
shrink_flags.purge = false;
lret = xe_shrinker_walk(shrinker->xe, &ctx, shrink_flags,
nr_to_scan, &nr_scanned);
if (lret >= 0)