- Release guc ids before cancelling work (Tejas)

- Fix new warnings around userptr (Thomas)
 - Temporaritly disable D3Cold on BMG (Rodrigo)
 - Retry and wait longer for GuC PC to start (Rodrigo)
 - Remove redundant check in xe_vm_create_ioctl (Xin)
 -----BEGIN PGP SIGNATURE-----
 
 iQEzBAABCAAdFiEEbSBwaO7dZQkcLOKj+mJfZA7rE8oFAmfTB3EACgkQ+mJfZA7r
 E8pkoAgAui5ItatSInI77C15ajI1Mn4kVVu95Pus+rz8pj4NDpWPVwo9dIySjp+h
 BIwDFLzpi9aEAYKT81JmXp6KDtqQDv+pyBzw/hVdMT47hP5nOJax1j6uAa7gAaXB
 KxvMmHP2p4N917n6oUg8tOM0rPrY/GjXRkPETjD3snJ1gP0kqj4xDOWupkU51fec
 Mk+PI+o67fnDFTMZziHNS0BS3MdXGi6uEldNAzm2s6xgoIGwIv2Gl2Sm8uTNxEqY
 olo03ssQGO3NKh78J/laoNEendcKG+aarwt3h7WQhAgx0P1asjn1F2pFX0nNKpyJ
 Y1SRMu+g6lzozo3I1tCB3BaIJ/1XEQ==
 =1WN7
 -----END PGP SIGNATURE-----

Merge tag 'drm-xe-fixes-2025-03-13' of https://gitlab.freedesktop.org/drm/xe/kernel into drm-fixes

- Release guc ids before cancelling work (Tejas)
- Fix new warnings around userptr (Thomas)
- Temporaritly disable D3Cold on BMG (Rodrigo)
- Retry and wait longer for GuC PC to start (Rodrigo)
- Remove redundant check in xe_vm_create_ioctl (Xin)

Signed-off-by: Dave Airlie <airlied@redhat.com>

From: Rodrigo Vivi <rodrigo.vivi@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/Z9MJWeIlZPuvXZ_G@intel.com
This commit is contained in:
Dave Airlie 2025-03-14 13:42:13 +10:00
commit d1d77326f5
5 changed files with 58 additions and 19 deletions

View File

@ -6,6 +6,7 @@
#include "xe_guc_pc.h"
#include <linux/delay.h>
#include <linux/ktime.h>
#include <drm/drm_managed.h>
#include <generated/xe_wa_oob.h>
@ -19,6 +20,7 @@
#include "xe_gt.h"
#include "xe_gt_idle.h"
#include "xe_gt_printk.h"
#include "xe_gt_throttle.h"
#include "xe_gt_types.h"
#include "xe_guc.h"
#include "xe_guc_ct.h"
@ -49,6 +51,9 @@
#define LNL_MERT_FREQ_CAP 800
#define BMG_MERT_FREQ_CAP 2133
#define SLPC_RESET_TIMEOUT_MS 5 /* roughly 5ms, but no need for precision */
#define SLPC_RESET_EXTENDED_TIMEOUT_MS 1000 /* To be used only at pc_start */
/**
* DOC: GuC Power Conservation (PC)
*
@ -113,9 +118,10 @@ static struct iosys_map *pc_to_maps(struct xe_guc_pc *pc)
FIELD_PREP(HOST2GUC_PC_SLPC_REQUEST_MSG_1_EVENT_ARGC, count))
static int wait_for_pc_state(struct xe_guc_pc *pc,
enum slpc_global_state state)
enum slpc_global_state state,
int timeout_ms)
{
int timeout_us = 5000; /* rought 5ms, but no need for precision */
int timeout_us = 1000 * timeout_ms;
int slept, wait = 10;
xe_device_assert_mem_access(pc_to_xe(pc));
@ -164,7 +170,8 @@ static int pc_action_query_task_state(struct xe_guc_pc *pc)
};
int ret;
if (wait_for_pc_state(pc, SLPC_GLOBAL_STATE_RUNNING))
if (wait_for_pc_state(pc, SLPC_GLOBAL_STATE_RUNNING,
SLPC_RESET_TIMEOUT_MS))
return -EAGAIN;
/* Blocking here to ensure the results are ready before reading them */
@ -187,7 +194,8 @@ static int pc_action_set_param(struct xe_guc_pc *pc, u8 id, u32 value)
};
int ret;
if (wait_for_pc_state(pc, SLPC_GLOBAL_STATE_RUNNING))
if (wait_for_pc_state(pc, SLPC_GLOBAL_STATE_RUNNING,
SLPC_RESET_TIMEOUT_MS))
return -EAGAIN;
ret = xe_guc_ct_send(ct, action, ARRAY_SIZE(action), 0, 0);
@ -208,7 +216,8 @@ static int pc_action_unset_param(struct xe_guc_pc *pc, u8 id)
struct xe_guc_ct *ct = &pc_to_guc(pc)->ct;
int ret;
if (wait_for_pc_state(pc, SLPC_GLOBAL_STATE_RUNNING))
if (wait_for_pc_state(pc, SLPC_GLOBAL_STATE_RUNNING,
SLPC_RESET_TIMEOUT_MS))
return -EAGAIN;
ret = xe_guc_ct_send(ct, action, ARRAY_SIZE(action), 0, 0);
@ -440,6 +449,15 @@ u32 xe_guc_pc_get_act_freq(struct xe_guc_pc *pc)
return freq;
}
static u32 get_cur_freq(struct xe_gt *gt)
{
u32 freq;
freq = xe_mmio_read32(&gt->mmio, RPNSWREQ);
freq = REG_FIELD_GET(REQ_RATIO_MASK, freq);
return decode_freq(freq);
}
/**
* xe_guc_pc_get_cur_freq - Get Current requested frequency
* @pc: The GuC PC
@ -463,10 +481,7 @@ int xe_guc_pc_get_cur_freq(struct xe_guc_pc *pc, u32 *freq)
return -ETIMEDOUT;
}
*freq = xe_mmio_read32(&gt->mmio, RPNSWREQ);
*freq = REG_FIELD_GET(REQ_RATIO_MASK, *freq);
*freq = decode_freq(*freq);
*freq = get_cur_freq(gt);
xe_force_wake_put(gt_to_fw(gt), fw_ref);
return 0;
@ -1002,6 +1017,7 @@ int xe_guc_pc_start(struct xe_guc_pc *pc)
struct xe_gt *gt = pc_to_gt(pc);
u32 size = PAGE_ALIGN(sizeof(struct slpc_shared_data));
unsigned int fw_ref;
ktime_t earlier;
int ret;
xe_gt_assert(gt, xe_device_uc_enabled(xe));
@ -1026,14 +1042,25 @@ int xe_guc_pc_start(struct xe_guc_pc *pc)
memset(pc->bo->vmap.vaddr, 0, size);
slpc_shared_data_write(pc, header.size, size);
earlier = ktime_get();
ret = pc_action_reset(pc);
if (ret)
goto out;
if (wait_for_pc_state(pc, SLPC_GLOBAL_STATE_RUNNING)) {
xe_gt_err(gt, "GuC PC Start failed\n");
ret = -EIO;
goto out;
if (wait_for_pc_state(pc, SLPC_GLOBAL_STATE_RUNNING,
SLPC_RESET_TIMEOUT_MS)) {
xe_gt_warn(gt, "GuC PC start taking longer than normal [freq = %dMHz (req = %dMHz), perf_limit_reasons = 0x%08X]\n",
xe_guc_pc_get_act_freq(pc), get_cur_freq(gt),
xe_gt_throttle_get_limit_reasons(gt));
if (wait_for_pc_state(pc, SLPC_GLOBAL_STATE_RUNNING,
SLPC_RESET_EXTENDED_TIMEOUT_MS)) {
xe_gt_err(gt, "GuC PC Start failed: Dynamic GT frequency control and GT sleep states are now disabled.\n");
goto out;
}
xe_gt_warn(gt, "GuC PC excessive start time: %lldms",
ktime_ms_delta(ktime_get(), earlier));
}
ret = pc_init_freqs(pc);

View File

@ -1246,11 +1246,11 @@ static void __guc_exec_queue_fini_async(struct work_struct *w)
xe_pm_runtime_get(guc_to_xe(guc));
trace_xe_exec_queue_destroy(q);
release_guc_id(guc, q);
if (xe_exec_queue_is_lr(q))
cancel_work_sync(&ge->lr_tdr);
/* Confirm no work left behind accessing device structures */
cancel_delayed_work_sync(&ge->sched.base.work_tdr);
release_guc_id(guc, q);
xe_sched_entity_fini(&ge->entity);
xe_sched_fini(&ge->sched);

View File

@ -138,13 +138,17 @@ static int xe_build_sg(struct xe_device *xe, struct hmm_range *range,
i += size;
if (unlikely(j == st->nents - 1)) {
xe_assert(xe, i >= npages);
if (i > npages)
size -= (i - npages);
sg_mark_end(sgl);
} else {
xe_assert(xe, i < npages);
}
sg_set_page(sgl, page, size << PAGE_SHIFT, 0);
}
xe_assert(xe, i == npages);
return dma_map_sgtable(dev, st, write ? DMA_BIDIRECTIONAL : DMA_TO_DEVICE,
DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_NO_KERNEL_MAPPING);

View File

@ -267,6 +267,15 @@ int xe_pm_init_early(struct xe_device *xe)
}
ALLOW_ERROR_INJECTION(xe_pm_init_early, ERRNO); /* See xe_pci_probe() */
static u32 vram_threshold_value(struct xe_device *xe)
{
/* FIXME: D3Cold temporarily disabled by default on BMG */
if (xe->info.platform == XE_BATTLEMAGE)
return 0;
return DEFAULT_VRAM_THRESHOLD;
}
/**
* xe_pm_init - Initialize Xe Power Management
* @xe: xe device instance
@ -277,6 +286,7 @@ ALLOW_ERROR_INJECTION(xe_pm_init_early, ERRNO); /* See xe_pci_probe() */
*/
int xe_pm_init(struct xe_device *xe)
{
u32 vram_threshold;
int err;
/* For now suspend/resume is only allowed with GuC */
@ -290,7 +300,8 @@ int xe_pm_init(struct xe_device *xe)
if (err)
return err;
err = xe_pm_set_vram_threshold(xe, DEFAULT_VRAM_THRESHOLD);
vram_threshold = vram_threshold_value(xe);
err = xe_pm_set_vram_threshold(xe, vram_threshold);
if (err)
return err;
}

View File

@ -1809,9 +1809,6 @@ int xe_vm_create_ioctl(struct drm_device *dev, void *data,
args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE))
return -EINVAL;
if (XE_IOCTL_DBG(xe, args->extensions))
return -EINVAL;
if (args->flags & DRM_XE_VM_CREATE_FLAG_SCRATCH_PAGE)
flags |= XE_VM_FLAG_SCRATCH_PAGE;
if (args->flags & DRM_XE_VM_CREATE_FLAG_LR_MODE)