mirror of
https://github.com/torvalds/linux.git
synced 2026-06-03 03:53:37 +02:00
- Some more xe_migrate_access_memory fixes (Auld)
- Defer buffer object shrinker write-backs and GPU waits (Thomas) - HWMON fix for clamping limits (Karthik) - SRIOV-PF: Set VF LMEM BAR size (Michal) -----BEGIN PGP SIGNATURE----- iQEzBAABCgAdFiEEbSBwaO7dZQkcLOKj+mJfZA7rE8oFAmieCm0ACgkQ+mJfZA7r E8of7wf/U55dzKcOu9i4mTYZIGd3cqfbWRuDM5FSJpuXang7cRRZDr6RzSes4XKH ary751s97gUIzfiUCzNXCY+1ACQqN2qNUzJsrcAYVC5ertvrgmRLtTbczLSWj4Ey vh/ctCns4bxb955pTyi7xg8mAPBUN1OHckdJrrcQyatOaEVeB88N5Z67bGJ66s89 sb5Nbnd0MImlX90/Ojd4TvfXfFkUwP0JII3+4XeTktNPw3ECMszEHHNV8Uk4LH+d 8o0qG2f/WMul/c8eVVdcnCpYf42ZMQWithwh1f0eld0tDxQXgrFOg1HPV7cVGOEI y/znB2zWiluJ0/21XYyar3CazD38jA== =m/qf -----END PGP SIGNATURE----- Merge tag 'drm-xe-fixes-2025-08-14' of https://gitlab.freedesktop.org/drm/xe/kernel into drm-fixes - Some more xe_migrate_access_memory fixes (Auld) - Defer buffer object shrinker write-backs and GPU waits (Thomas) - HWMON fix for clamping limits (Karthik) - SRIOV-PF: Set VF LMEM BAR size (Michal) Signed-off-by: Dave Airlie <airlied@redhat.com> From: Rodrigo Vivi <rodrigo.vivi@intel.com> Link: https://lore.kernel.org/r/aJ4MIZQurSo0uNxn@intel.com
This commit is contained in:
commit
00062ea01d
|
|
@ -7,5 +7,6 @@
|
|||
|
||||
#define GTTMMADR_BAR 0 /* MMIO + GTT */
|
||||
#define LMEM_BAR 2 /* VRAM */
|
||||
#define VF_LMEM_BAR 9 /* VF VRAM */
|
||||
|
||||
#endif
|
||||
|
|
|
|||
|
|
@ -332,6 +332,7 @@ static int xe_hwmon_power_max_write(struct xe_hwmon *hwmon, u32 attr, int channe
|
|||
int ret = 0;
|
||||
u32 reg_val, max;
|
||||
struct xe_reg rapl_limit;
|
||||
u64 max_supp_power_limit = 0;
|
||||
|
||||
mutex_lock(&hwmon->hwmon_lock);
|
||||
|
||||
|
|
@ -356,6 +357,20 @@ static int xe_hwmon_power_max_write(struct xe_hwmon *hwmon, u32 attr, int channe
|
|||
goto unlock;
|
||||
}
|
||||
|
||||
/*
|
||||
* If the sysfs value exceeds the maximum pcode supported power limit value, clamp it to
|
||||
* the supported maximum (U12.3 format).
|
||||
* This is to avoid truncation during reg_val calculation below and ensure the valid
|
||||
* power limit is sent for pcode which would clamp it to card-supported value.
|
||||
*/
|
||||
max_supp_power_limit = ((PWR_LIM_VAL) >> hwmon->scl_shift_power) * SF_POWER;
|
||||
if (value > max_supp_power_limit) {
|
||||
value = max_supp_power_limit;
|
||||
drm_info(&hwmon->xe->drm,
|
||||
"Power limit clamped as selected %s exceeds channel %d limit\n",
|
||||
PWR_ATTR_TO_STR(attr), channel);
|
||||
}
|
||||
|
||||
/* Computation in 64-bits to avoid overflow. Round to nearest. */
|
||||
reg_val = DIV_ROUND_CLOSEST_ULL((u64)value << hwmon->scl_shift_power, SF_POWER);
|
||||
|
||||
|
|
@ -739,9 +754,23 @@ static int xe_hwmon_power_curr_crit_write(struct xe_hwmon *hwmon, int channel,
|
|||
{
|
||||
int ret;
|
||||
u32 uval;
|
||||
u64 max_crit_power_curr = 0;
|
||||
|
||||
mutex_lock(&hwmon->hwmon_lock);
|
||||
|
||||
/*
|
||||
* If the sysfs value exceeds the pcode mailbox cmd POWER_SETUP_SUBCOMMAND_WRITE_I1
|
||||
* max supported value, clamp it to the command's max (U10.6 format).
|
||||
* This is to avoid truncation during uval calculation below and ensure the valid power
|
||||
* limit is sent for pcode which would clamp it to card-supported value.
|
||||
*/
|
||||
max_crit_power_curr = (POWER_SETUP_I1_DATA_MASK >> POWER_SETUP_I1_SHIFT) * scale_factor;
|
||||
if (value > max_crit_power_curr) {
|
||||
value = max_crit_power_curr;
|
||||
drm_info(&hwmon->xe->drm,
|
||||
"Power limit clamped as selected exceeds channel %d limit\n",
|
||||
channel);
|
||||
}
|
||||
uval = DIV_ROUND_CLOSEST_ULL(value << POWER_SETUP_I1_SHIFT, scale_factor);
|
||||
ret = xe_hwmon_pcode_write_i1(hwmon, uval);
|
||||
|
||||
|
|
|
|||
|
|
@ -1820,15 +1820,19 @@ int xe_migrate_access_memory(struct xe_migrate *m, struct xe_bo *bo,
|
|||
if (!IS_ALIGNED(len, XE_CACHELINE_BYTES) ||
|
||||
!IS_ALIGNED((unsigned long)buf + offset, XE_CACHELINE_BYTES)) {
|
||||
int buf_offset = 0;
|
||||
void *bounce;
|
||||
int err;
|
||||
|
||||
BUILD_BUG_ON(!is_power_of_2(XE_CACHELINE_BYTES));
|
||||
bounce = kmalloc(XE_CACHELINE_BYTES, GFP_KERNEL);
|
||||
if (!bounce)
|
||||
return -ENOMEM;
|
||||
|
||||
/*
|
||||
* Less than ideal for large unaligned access but this should be
|
||||
* fairly rare, can fixup if this becomes common.
|
||||
*/
|
||||
do {
|
||||
u8 bounce[XE_CACHELINE_BYTES];
|
||||
void *ptr = (void *)bounce;
|
||||
int err;
|
||||
int copy_bytes = min_t(int, bytes_left,
|
||||
XE_CACHELINE_BYTES -
|
||||
(offset & XE_CACHELINE_MASK));
|
||||
|
|
@ -1837,22 +1841,22 @@ int xe_migrate_access_memory(struct xe_migrate *m, struct xe_bo *bo,
|
|||
err = xe_migrate_access_memory(m, bo,
|
||||
offset &
|
||||
~XE_CACHELINE_MASK,
|
||||
(void *)ptr,
|
||||
sizeof(bounce), 0);
|
||||
bounce,
|
||||
XE_CACHELINE_BYTES, 0);
|
||||
if (err)
|
||||
return err;
|
||||
break;
|
||||
|
||||
if (write) {
|
||||
memcpy(ptr + ptr_offset, buf + buf_offset, copy_bytes);
|
||||
memcpy(bounce + ptr_offset, buf + buf_offset, copy_bytes);
|
||||
|
||||
err = xe_migrate_access_memory(m, bo,
|
||||
offset & ~XE_CACHELINE_MASK,
|
||||
(void *)ptr,
|
||||
sizeof(bounce), write);
|
||||
bounce,
|
||||
XE_CACHELINE_BYTES, write);
|
||||
if (err)
|
||||
return err;
|
||||
break;
|
||||
} else {
|
||||
memcpy(buf + buf_offset, ptr + ptr_offset,
|
||||
memcpy(buf + buf_offset, bounce + ptr_offset,
|
||||
copy_bytes);
|
||||
}
|
||||
|
||||
|
|
@ -1861,7 +1865,8 @@ int xe_migrate_access_memory(struct xe_migrate *m, struct xe_bo *bo,
|
|||
offset += copy_bytes;
|
||||
} while (bytes_left);
|
||||
|
||||
return 0;
|
||||
kfree(bounce);
|
||||
return err;
|
||||
}
|
||||
|
||||
dma_addr = xe_migrate_dma_map(xe, buf, len + page_offset, write);
|
||||
|
|
@ -1882,8 +1887,11 @@ int xe_migrate_access_memory(struct xe_migrate *m, struct xe_bo *bo,
|
|||
else
|
||||
current_bytes = min_t(int, bytes_left, cursor.size);
|
||||
|
||||
if (fence)
|
||||
dma_fence_put(fence);
|
||||
if (current_bytes & ~PAGE_MASK) {
|
||||
int pitch = 4;
|
||||
|
||||
current_bytes = min_t(int, current_bytes, S16_MAX * pitch);
|
||||
}
|
||||
|
||||
__fence = xe_migrate_vram(m, current_bytes,
|
||||
(unsigned long)buf & ~PAGE_MASK,
|
||||
|
|
@ -1892,11 +1900,15 @@ int xe_migrate_access_memory(struct xe_migrate *m, struct xe_bo *bo,
|
|||
XE_MIGRATE_COPY_TO_VRAM :
|
||||
XE_MIGRATE_COPY_TO_SRAM);
|
||||
if (IS_ERR(__fence)) {
|
||||
if (fence)
|
||||
if (fence) {
|
||||
dma_fence_wait(fence, false);
|
||||
dma_fence_put(fence);
|
||||
}
|
||||
fence = __fence;
|
||||
goto out_err;
|
||||
}
|
||||
|
||||
dma_fence_put(fence);
|
||||
fence = __fence;
|
||||
|
||||
buf += current_bytes;
|
||||
|
|
|
|||
|
|
@ -3,6 +3,10 @@
|
|||
* Copyright © 2023-2024 Intel Corporation
|
||||
*/
|
||||
|
||||
#include <linux/bitops.h>
|
||||
#include <linux/pci.h>
|
||||
|
||||
#include "regs/xe_bars.h"
|
||||
#include "xe_assert.h"
|
||||
#include "xe_device.h"
|
||||
#include "xe_gt_sriov_pf_config.h"
|
||||
|
|
@ -128,6 +132,18 @@ static void pf_engine_activity_stats(struct xe_device *xe, unsigned int num_vfs,
|
|||
}
|
||||
}
|
||||
|
||||
static int resize_vf_vram_bar(struct xe_device *xe, int num_vfs)
|
||||
{
|
||||
struct pci_dev *pdev = to_pci_dev(xe->drm.dev);
|
||||
u32 sizes;
|
||||
|
||||
sizes = pci_iov_vf_bar_get_sizes(pdev, VF_LMEM_BAR, num_vfs);
|
||||
if (!sizes)
|
||||
return 0;
|
||||
|
||||
return pci_iov_vf_bar_set_size(pdev, VF_LMEM_BAR, __fls(sizes));
|
||||
}
|
||||
|
||||
static int pf_enable_vfs(struct xe_device *xe, int num_vfs)
|
||||
{
|
||||
struct pci_dev *pdev = to_pci_dev(xe->drm.dev);
|
||||
|
|
@ -158,6 +174,12 @@ static int pf_enable_vfs(struct xe_device *xe, int num_vfs)
|
|||
if (err < 0)
|
||||
goto failed;
|
||||
|
||||
if (IS_DGFX(xe)) {
|
||||
err = resize_vf_vram_bar(xe, num_vfs);
|
||||
if (err)
|
||||
xe_sriov_info(xe, "Failed to set VF LMEM BAR size: %d\n", err);
|
||||
}
|
||||
|
||||
err = pci_enable_sriov(pdev, num_vfs);
|
||||
if (err < 0)
|
||||
goto failed;
|
||||
|
|
|
|||
|
|
@ -54,10 +54,10 @@ xe_shrinker_mod_pages(struct xe_shrinker *shrinker, long shrinkable, long purgea
|
|||
write_unlock(&shrinker->lock);
|
||||
}
|
||||
|
||||
static s64 xe_shrinker_walk(struct xe_device *xe,
|
||||
struct ttm_operation_ctx *ctx,
|
||||
const struct xe_bo_shrink_flags flags,
|
||||
unsigned long to_scan, unsigned long *scanned)
|
||||
static s64 __xe_shrinker_walk(struct xe_device *xe,
|
||||
struct ttm_operation_ctx *ctx,
|
||||
const struct xe_bo_shrink_flags flags,
|
||||
unsigned long to_scan, unsigned long *scanned)
|
||||
{
|
||||
unsigned int mem_type;
|
||||
s64 freed = 0, lret;
|
||||
|
|
@ -93,6 +93,48 @@ static s64 xe_shrinker_walk(struct xe_device *xe,
|
|||
return freed;
|
||||
}
|
||||
|
||||
/*
|
||||
* Try shrinking idle objects without writeback first, then if not sufficient,
|
||||
* try also non-idle objects and finally if that's not sufficient either,
|
||||
* add writeback. This avoids stalls and explicit writebacks with light or
|
||||
* moderate memory pressure.
|
||||
*/
|
||||
static s64 xe_shrinker_walk(struct xe_device *xe,
|
||||
struct ttm_operation_ctx *ctx,
|
||||
const struct xe_bo_shrink_flags flags,
|
||||
unsigned long to_scan, unsigned long *scanned)
|
||||
{
|
||||
bool no_wait_gpu = true;
|
||||
struct xe_bo_shrink_flags save_flags = flags;
|
||||
s64 lret, freed;
|
||||
|
||||
swap(no_wait_gpu, ctx->no_wait_gpu);
|
||||
save_flags.writeback = false;
|
||||
lret = __xe_shrinker_walk(xe, ctx, save_flags, to_scan, scanned);
|
||||
swap(no_wait_gpu, ctx->no_wait_gpu);
|
||||
if (lret < 0 || *scanned >= to_scan)
|
||||
return lret;
|
||||
|
||||
freed = lret;
|
||||
if (!ctx->no_wait_gpu) {
|
||||
lret = __xe_shrinker_walk(xe, ctx, save_flags, to_scan, scanned);
|
||||
if (lret < 0)
|
||||
return lret;
|
||||
freed += lret;
|
||||
if (*scanned >= to_scan)
|
||||
return freed;
|
||||
}
|
||||
|
||||
if (flags.writeback) {
|
||||
lret = __xe_shrinker_walk(xe, ctx, flags, to_scan, scanned);
|
||||
if (lret < 0)
|
||||
return lret;
|
||||
freed += lret;
|
||||
}
|
||||
|
||||
return freed;
|
||||
}
|
||||
|
||||
static unsigned long
|
||||
xe_shrinker_count(struct shrinker *shrink, struct shrink_control *sc)
|
||||
{
|
||||
|
|
@ -199,6 +241,7 @@ static unsigned long xe_shrinker_scan(struct shrinker *shrink, struct shrink_con
|
|||
runtime_pm = xe_shrinker_runtime_pm_get(shrinker, true, 0, can_backup);
|
||||
|
||||
shrink_flags.purge = false;
|
||||
|
||||
lret = xe_shrinker_walk(shrinker->xe, &ctx, shrink_flags,
|
||||
nr_to_scan, &nr_scanned);
|
||||
if (lret >= 0)
|
||||
|
|
|
|||
Loading…
Reference in New Issue
Block a user