mirror of
https://github.com/torvalds/linux.git
synced 2026-05-30 18:13:41 +02:00
UAPI Changes:
- OA new property: 'unblock after N reports' (Ashutosh) i915 display Changes: - UHBR rates for Thunderbolt (Kahola) Driver Changes: - IRQ related fixes and improvements (Ilia) - Revert some changes that break a mesa debug tool (John) - Fix migration issues (Nirmoy) - Enable GuC's WA_DUAL_QUEUE for newer platforms (Daniele) - Move shrink test out of xe_bo (Nirmoy) - SRIOV PF: Use correct function to check LMEM provisioning (Michal) - Fix a false-positive "Missing outer runtime PM protection" warning (Rodrigo) - Make GSCCS disabling message less alarming (Daniele) - Fix DG1 power gate sequence (Rodrigo) - Xe files fixes (Lucas) - Fix a potential TP_printk UAF (Thomas) - OA Fixes (Umesh) - Fix tlb invalidation when wedging (Lucas) - Documentation fix (Lucas) -----BEGIN PGP SIGNATURE----- iQEzBAABCAAdFiEEbSBwaO7dZQkcLOKj+mJfZA7rE8oFAmd9eTAACgkQ+mJfZA7r E8oZ1Af/e6LI6oi/mckrBpm0/IOJZNS0ABKR2LVk+HaXNLyb7ZUOlK7euTJynoRf oCuRJK7YsGAD7ArUzHT9CIXf9Kj2HUtHcbKFyusg1j91/YoTL76fUUtf26Z9WaM9 Vxl7J841oVvFxNB1U9wbVWQS/gfLE3SHToB/iXqko1LYfh2pnIeq6KmaiEj7WJeh dX6p+PENRjE/Lj+Q6nD7xvcAeh2jJPmnsYA2VJOedYpo90Y/GGEOHwLSfJpUMQ18 M48H3kdxiDyrk8l6HokSscPVxzy2AB/+9wxmNdYsbO8vVqf0KYtKF32i+NB6GqrA fHTBG4jyzQGtHcNVs9EW2md0kuu4Eg== =7JGM -----END PGP SIGNATURE----- Merge tag 'drm-xe-next-2025-01-07' of https://gitlab.freedesktop.org/drm/xe/kernel into drm-next UAPI Changes: - OA new property: 'unblock after N reports' (Ashutosh) i915 display Changes: - UHBR rates for Thunderbolt (Kahola) Driver Changes: - IRQ related fixes and improvements (Ilia) - Revert some changes that break a mesa debug tool (John) - Fix migration issues (Nirmoy) - Enable GuC's WA_DUAL_QUEUE for newer platforms (Daniele) - Move shrink test out of xe_bo (Nirmoy) - SRIOV PF: Use correct function to check LMEM provisioning (Michal) - Fix a false-positive "Missing outer runtime PM protection" warning (Rodrigo) - Make GSCCS disabling message less alarming (Daniele) - Fix DG1 power gate sequence (Rodrigo) - Xe files fixes (Lucas) - Fix a potential TP_printk UAF (Thomas) - OA Fixes (Umesh) - Fix tlb invalidation when wedging (Lucas) - Documentation fix (Lucas) Signed-off-by: Dave Airlie <airlied@redhat.com> From: Rodrigo Vivi <rodrigo.vivi@intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/Z31579j3V3XCPFaK@intel.com
This commit is contained in:
commit
9cc3e4e9f4
|
|
@ -3070,7 +3070,10 @@ int intel_mtl_tbt_calc_port_clock(struct intel_encoder *encoder)
|
|||
|
||||
val = intel_de_read(display, XELPDP_PORT_CLOCK_CTL(display, encoder->port));
|
||||
|
||||
clock = REG_FIELD_GET(XELPDP_DDI_CLOCK_SELECT_MASK, val);
|
||||
if (DISPLAY_VER(display) >= 30)
|
||||
clock = REG_FIELD_GET(XE3_DDI_CLOCK_SELECT_MASK, val);
|
||||
else
|
||||
clock = REG_FIELD_GET(XELPDP_DDI_CLOCK_SELECT_MASK, val);
|
||||
|
||||
drm_WARN_ON(display->drm, !(val & XELPDP_FORWARD_CLOCK_UNGATE));
|
||||
drm_WARN_ON(display->drm, !(val & XELPDP_TBT_CLOCK_REQUEST));
|
||||
|
|
@ -3085,13 +3088,18 @@ int intel_mtl_tbt_calc_port_clock(struct intel_encoder *encoder)
|
|||
return 540000;
|
||||
case XELPDP_DDI_CLOCK_SELECT_TBT_810:
|
||||
return 810000;
|
||||
case XELPDP_DDI_CLOCK_SELECT_TBT_312_5:
|
||||
return 1000000;
|
||||
case XELPDP_DDI_CLOCK_SELECT_TBT_625:
|
||||
return 2000000;
|
||||
default:
|
||||
MISSING_CASE(clock);
|
||||
return 162000;
|
||||
}
|
||||
}
|
||||
|
||||
static int intel_mtl_tbt_clock_select(int clock)
|
||||
static int intel_mtl_tbt_clock_select(struct intel_display *display,
|
||||
int clock)
|
||||
{
|
||||
switch (clock) {
|
||||
case 162000:
|
||||
|
|
@ -3102,6 +3110,18 @@ static int intel_mtl_tbt_clock_select(int clock)
|
|||
return XELPDP_DDI_CLOCK_SELECT_TBT_540;
|
||||
case 810000:
|
||||
return XELPDP_DDI_CLOCK_SELECT_TBT_810;
|
||||
case 1000000:
|
||||
if (DISPLAY_VER(display) < 30) {
|
||||
drm_WARN_ON(display->drm, "UHBR10 not supported for the platform\n");
|
||||
return XELPDP_DDI_CLOCK_SELECT_TBT_162;
|
||||
}
|
||||
return XELPDP_DDI_CLOCK_SELECT_TBT_312_5;
|
||||
case 2000000:
|
||||
if (DISPLAY_VER(display) < 30) {
|
||||
drm_WARN_ON(display->drm, "UHBR20 not supported for the platform\n");
|
||||
return XELPDP_DDI_CLOCK_SELECT_TBT_162;
|
||||
}
|
||||
return XELPDP_DDI_CLOCK_SELECT_TBT_625;
|
||||
default:
|
||||
MISSING_CASE(clock);
|
||||
return XELPDP_DDI_CLOCK_SELECT_TBT_162;
|
||||
|
|
@ -3114,15 +3134,26 @@ static void intel_mtl_tbt_pll_enable(struct intel_encoder *encoder,
|
|||
struct intel_display *display = to_intel_display(encoder);
|
||||
enum phy phy = intel_encoder_to_phy(encoder);
|
||||
u32 val = 0;
|
||||
u32 mask;
|
||||
|
||||
/*
|
||||
* 1. Program PORT_CLOCK_CTL REGISTER to configure
|
||||
* clock muxes, gating and SSC
|
||||
*/
|
||||
val |= XELPDP_DDI_CLOCK_SELECT(intel_mtl_tbt_clock_select(crtc_state->port_clock));
|
||||
|
||||
if (DISPLAY_VER(display) >= 30) {
|
||||
mask = XE3_DDI_CLOCK_SELECT_MASK;
|
||||
val |= XE3_DDI_CLOCK_SELECT(intel_mtl_tbt_clock_select(display, crtc_state->port_clock));
|
||||
} else {
|
||||
mask = XELPDP_DDI_CLOCK_SELECT_MASK;
|
||||
val |= XELPDP_DDI_CLOCK_SELECT(intel_mtl_tbt_clock_select(display, crtc_state->port_clock));
|
||||
}
|
||||
|
||||
mask |= XELPDP_FORWARD_CLOCK_UNGATE;
|
||||
val |= XELPDP_FORWARD_CLOCK_UNGATE;
|
||||
|
||||
intel_de_rmw(display, XELPDP_PORT_CLOCK_CTL(display, encoder->port),
|
||||
XELPDP_DDI_CLOCK_SELECT_MASK | XELPDP_FORWARD_CLOCK_UNGATE, val);
|
||||
mask, val);
|
||||
|
||||
/* 2. Read back PORT_CLOCK_CTL REGISTER */
|
||||
val = intel_de_read(display, XELPDP_PORT_CLOCK_CTL(display, encoder->port));
|
||||
|
|
|
|||
|
|
@ -187,7 +187,9 @@
|
|||
#define XELPDP_TBT_CLOCK_REQUEST REG_BIT(19)
|
||||
#define XELPDP_TBT_CLOCK_ACK REG_BIT(18)
|
||||
#define XELPDP_DDI_CLOCK_SELECT_MASK REG_GENMASK(15, 12)
|
||||
#define XE3_DDI_CLOCK_SELECT_MASK REG_GENMASK(16, 12)
|
||||
#define XELPDP_DDI_CLOCK_SELECT(val) REG_FIELD_PREP(XELPDP_DDI_CLOCK_SELECT_MASK, val)
|
||||
#define XE3_DDI_CLOCK_SELECT(val) REG_FIELD_PREP(XE3_DDI_CLOCK_SELECT_MASK, val)
|
||||
#define XELPDP_DDI_CLOCK_SELECT_NONE 0x0
|
||||
#define XELPDP_DDI_CLOCK_SELECT_MAXPCLK 0x8
|
||||
#define XELPDP_DDI_CLOCK_SELECT_DIV18CLK 0x9
|
||||
|
|
@ -195,6 +197,8 @@
|
|||
#define XELPDP_DDI_CLOCK_SELECT_TBT_270 0xd
|
||||
#define XELPDP_DDI_CLOCK_SELECT_TBT_540 0xe
|
||||
#define XELPDP_DDI_CLOCK_SELECT_TBT_810 0xf
|
||||
#define XELPDP_DDI_CLOCK_SELECT_TBT_312_5 0x18
|
||||
#define XELPDP_DDI_CLOCK_SELECT_TBT_625 0x19
|
||||
#define XELPDP_FORWARD_CLOCK_UNGATE REG_BIT(10)
|
||||
#define XELPDP_LANE1_PHY_CLOCK_SELECT REG_BIT(8)
|
||||
#define XELPDP_SSC_ENABLE_PLLA REG_BIT(1)
|
||||
|
|
|
|||
|
|
@ -83,6 +83,8 @@
|
|||
#define RING_IMR(base) XE_REG((base) + 0xa8)
|
||||
#define RING_INT_STATUS_RPT_PTR(base) XE_REG((base) + 0xac)
|
||||
|
||||
#define CS_INT_VEC(base) XE_REG((base) + 0x1b8)
|
||||
|
||||
#define RING_EIR(base) XE_REG((base) + 0xb0)
|
||||
#define RING_EMR(base) XE_REG((base) + 0xb4)
|
||||
#define RING_ESR(base) XE_REG((base) + 0xb8)
|
||||
|
|
@ -138,6 +140,7 @@
|
|||
|
||||
#define RING_MODE(base) XE_REG((base) + 0x29c)
|
||||
#define GFX_DISABLE_LEGACY_MODE REG_BIT(3)
|
||||
#define GFX_MSIX_INTERRUPT_ENABLE REG_BIT(13)
|
||||
|
||||
#define RING_TIMESTAMP(base) XE_REG((base) + 0x358)
|
||||
|
||||
|
|
|
|||
|
|
@ -25,6 +25,9 @@
|
|||
#define CTX_INT_SRC_REPORT_REG (CTX_LRI_INT_REPORT_PTR + 3)
|
||||
#define CTX_INT_SRC_REPORT_PTR (CTX_LRI_INT_REPORT_PTR + 4)
|
||||
|
||||
#define CTX_CS_INT_VEC_REG 0x5a
|
||||
#define CTX_CS_INT_VEC_DATA (CTX_CS_INT_VEC_REG + 1)
|
||||
|
||||
#define INDIRECT_CTX_RING_HEAD (0x02 + 1)
|
||||
#define INDIRECT_CTX_RING_TAIL (0x04 + 1)
|
||||
#define INDIRECT_CTX_RING_START (0x06 + 1)
|
||||
|
|
|
|||
|
|
@ -606,8 +606,6 @@ static void xe_bo_shrink_kunit(struct kunit *test)
|
|||
static struct kunit_case xe_bo_tests[] = {
|
||||
KUNIT_CASE_PARAM(xe_ccs_migrate_kunit, xe_pci_live_device_gen_param),
|
||||
KUNIT_CASE_PARAM(xe_bo_evict_kunit, xe_pci_live_device_gen_param),
|
||||
KUNIT_CASE_PARAM_ATTR(xe_bo_shrink_kunit, xe_pci_live_device_gen_param,
|
||||
{.speed = KUNIT_SPEED_SLOW}),
|
||||
{}
|
||||
};
|
||||
|
||||
|
|
@ -618,3 +616,17 @@ struct kunit_suite xe_bo_test_suite = {
|
|||
.init = xe_kunit_helper_xe_device_live_test_init,
|
||||
};
|
||||
EXPORT_SYMBOL_IF_KUNIT(xe_bo_test_suite);
|
||||
|
||||
static struct kunit_case xe_bo_shrink_test[] = {
|
||||
KUNIT_CASE_PARAM_ATTR(xe_bo_shrink_kunit, xe_pci_live_device_gen_param,
|
||||
{.speed = KUNIT_SPEED_SLOW}),
|
||||
{}
|
||||
};
|
||||
|
||||
VISIBLE_IF_KUNIT
|
||||
struct kunit_suite xe_bo_shrink_test_suite = {
|
||||
.name = "xe_bo_shrink",
|
||||
.test_cases = xe_bo_shrink_test,
|
||||
.init = xe_kunit_helper_xe_device_live_test_init,
|
||||
};
|
||||
EXPORT_SYMBOL_IF_KUNIT(xe_bo_shrink_test_suite);
|
||||
|
|
|
|||
|
|
@ -6,11 +6,13 @@
|
|||
#include <kunit/test.h>
|
||||
|
||||
extern struct kunit_suite xe_bo_test_suite;
|
||||
extern struct kunit_suite xe_bo_shrink_test_suite;
|
||||
extern struct kunit_suite xe_dma_buf_test_suite;
|
||||
extern struct kunit_suite xe_migrate_test_suite;
|
||||
extern struct kunit_suite xe_mocs_test_suite;
|
||||
|
||||
kunit_test_suite(xe_bo_test_suite);
|
||||
kunit_test_suite(xe_bo_shrink_test_suite);
|
||||
kunit_test_suite(xe_dma_buf_test_suite);
|
||||
kunit_test_suite(xe_migrate_test_suite);
|
||||
kunit_test_suite(xe_mocs_test_suite);
|
||||
|
|
|
|||
|
|
@ -733,7 +733,7 @@ static int xe_bo_move(struct ttm_buffer_object *ttm_bo, bool evict,
|
|||
new_mem->mem_type == XE_PL_SYSTEM) {
|
||||
long timeout = dma_resv_wait_timeout(ttm_bo->base.resv,
|
||||
DMA_RESV_USAGE_BOOKKEEP,
|
||||
true,
|
||||
false,
|
||||
MAX_SCHEDULE_TIMEOUT);
|
||||
if (timeout < 0) {
|
||||
ret = timeout;
|
||||
|
|
@ -857,8 +857,16 @@ static int xe_bo_move(struct ttm_buffer_object *ttm_bo, bool evict,
|
|||
|
||||
out:
|
||||
if ((!ttm_bo->resource || ttm_bo->resource->mem_type == XE_PL_SYSTEM) &&
|
||||
ttm_bo->ttm)
|
||||
ttm_bo->ttm) {
|
||||
long timeout = dma_resv_wait_timeout(ttm_bo->base.resv,
|
||||
DMA_RESV_USAGE_KERNEL,
|
||||
false,
|
||||
MAX_SCHEDULE_TIMEOUT);
|
||||
if (timeout < 0)
|
||||
ret = timeout;
|
||||
|
||||
xe_tt_unmap_sg(ttm_bo->ttm);
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -119,7 +119,11 @@ static ssize_t __xe_devcoredump_read(char *buffer, size_t count,
|
|||
drm_puts(&p, "\n**** GuC CT ****\n");
|
||||
xe_guc_ct_snapshot_print(ss->guc.ct, &p);
|
||||
|
||||
drm_puts(&p, "\n**** Contexts ****\n");
|
||||
/*
|
||||
* Don't add a new section header here because the mesa debug decoder
|
||||
* tool expects the context information to be in the 'GuC CT' section.
|
||||
*/
|
||||
/* drm_puts(&p, "\n**** Contexts ****\n"); */
|
||||
xe_guc_exec_queue_snapshot_print(ss->ge, &p);
|
||||
|
||||
drm_puts(&p, "\n**** Job ****\n");
|
||||
|
|
@ -416,6 +420,15 @@ void xe_print_blob_ascii85(struct drm_printer *p, const char *prefix,
|
|||
char buff[ASCII85_BUFSZ], *line_buff;
|
||||
size_t line_pos = 0;
|
||||
|
||||
/*
|
||||
* Splitting blobs across multiple lines is not compatible with the mesa
|
||||
* debug decoder tool. Note that even dropping the explicit '\n' below
|
||||
* doesn't help because the GuC log is so big some underlying implementation
|
||||
* still splits the lines at 512K characters. So just bail completely for
|
||||
* the moment.
|
||||
*/
|
||||
return;
|
||||
|
||||
#define DMESG_MAX_LINE_LEN 800
|
||||
#define MIN_SPACE (ASCII85_BUFSZ + 2) /* 85 + "\n\0" */
|
||||
|
||||
|
|
|
|||
|
|
@ -325,7 +325,9 @@ struct xe_device *xe_device_create(struct pci_dev *pdev,
|
|||
xe->info.revid = pdev->revision;
|
||||
xe->info.force_execlist = xe_modparam.force_execlist;
|
||||
|
||||
spin_lock_init(&xe->irq.lock);
|
||||
err = xe_irq_init(xe);
|
||||
if (err)
|
||||
goto err;
|
||||
|
||||
init_waitqueue_head(&xe->ufence_wq);
|
||||
|
||||
|
|
|
|||
|
|
@ -157,8 +157,7 @@ static inline bool xe_device_has_sriov(struct xe_device *xe)
|
|||
|
||||
static inline bool xe_device_has_msix(struct xe_device *xe)
|
||||
{
|
||||
/* TODO: change this when MSI-X support is fully integrated */
|
||||
return false;
|
||||
return xe->irq.msix.nvec > 0;
|
||||
}
|
||||
|
||||
static inline bool xe_device_has_memirq(struct xe_device *xe)
|
||||
|
|
|
|||
|
|
@ -348,6 +348,14 @@ struct xe_device {
|
|||
|
||||
/** @irq.enabled: interrupts enabled on this device */
|
||||
atomic_t enabled;
|
||||
|
||||
/** @irq.msix: irq info for platforms that support MSI-X */
|
||||
struct {
|
||||
/** @irq.msix.nvec: number of MSI-X interrupts */
|
||||
u16 nvec;
|
||||
/** @irq.msix.indexes: used to allocate MSI-X indexes */
|
||||
struct xarray indexes;
|
||||
} msix;
|
||||
} irq;
|
||||
|
||||
/** @ttm: ttm device */
|
||||
|
|
|
|||
|
|
@ -8,6 +8,7 @@
|
|||
#include <linux/nospec.h>
|
||||
|
||||
#include <drm/drm_device.h>
|
||||
#include <drm/drm_drv.h>
|
||||
#include <drm/drm_file.h>
|
||||
#include <uapi/drm/xe_drm.h>
|
||||
|
||||
|
|
@ -16,6 +17,7 @@
|
|||
#include "xe_hw_engine_class_sysfs.h"
|
||||
#include "xe_hw_engine_group.h"
|
||||
#include "xe_hw_fence.h"
|
||||
#include "xe_irq.h"
|
||||
#include "xe_lrc.h"
|
||||
#include "xe_macros.h"
|
||||
#include "xe_migrate.h"
|
||||
|
|
@ -68,6 +70,7 @@ static struct xe_exec_queue *__xe_exec_queue_alloc(struct xe_device *xe,
|
|||
q->gt = gt;
|
||||
q->class = hwe->class;
|
||||
q->width = width;
|
||||
q->msix_vec = XE_IRQ_DEFAULT_MSIX;
|
||||
q->logical_mask = logical_mask;
|
||||
q->fence_irq = >->fence_irq[hwe->class];
|
||||
q->ring_ops = gt->ring_ops[hwe->class];
|
||||
|
|
@ -117,7 +120,7 @@ static int __xe_exec_queue_init(struct xe_exec_queue *q)
|
|||
}
|
||||
|
||||
for (i = 0; i < q->width; ++i) {
|
||||
q->lrc[i] = xe_lrc_create(q->hwe, q->vm, SZ_16K);
|
||||
q->lrc[i] = xe_lrc_create(q->hwe, q->vm, SZ_16K, q->msix_vec);
|
||||
if (IS_ERR(q->lrc[i])) {
|
||||
err = PTR_ERR(q->lrc[i]);
|
||||
goto err_unlock;
|
||||
|
|
@ -766,19 +769,21 @@ bool xe_exec_queue_is_idle(struct xe_exec_queue *q)
|
|||
*/
|
||||
void xe_exec_queue_update_run_ticks(struct xe_exec_queue *q)
|
||||
{
|
||||
struct xe_file *xef;
|
||||
struct xe_device *xe = gt_to_xe(q->gt);
|
||||
struct xe_lrc *lrc;
|
||||
u32 old_ts, new_ts;
|
||||
int idx;
|
||||
|
||||
/*
|
||||
* Jobs that are run during driver load may use an exec_queue, but are
|
||||
* not associated with a user xe file, so avoid accumulating busyness
|
||||
* for kernel specific work.
|
||||
* Jobs that are executed by kernel doesn't have a corresponding xe_file
|
||||
* and thus are not accounted.
|
||||
*/
|
||||
if (!q->vm || !q->vm->xef)
|
||||
if (!q->xef)
|
||||
return;
|
||||
|
||||
xef = q->vm->xef;
|
||||
/* Synchronize with unbind while holding the xe file open */
|
||||
if (!drm_dev_enter(&xe->drm, &idx))
|
||||
return;
|
||||
|
||||
/*
|
||||
* Only sample the first LRC. For parallel submission, all of them are
|
||||
|
|
@ -790,7 +795,9 @@ void xe_exec_queue_update_run_ticks(struct xe_exec_queue *q)
|
|||
*/
|
||||
lrc = q->lrc[0];
|
||||
new_ts = xe_lrc_update_timestamp(lrc, &old_ts);
|
||||
xef->run_ticks[q->class] += (new_ts - old_ts) * q->width;
|
||||
q->xef->run_ticks[q->class] += (new_ts - old_ts) * q->width;
|
||||
|
||||
drm_dev_exit(idx);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
|||
|
|
@ -41,7 +41,7 @@ struct xe_exec_queue {
|
|||
/** @xef: Back pointer to xe file if this is user created exec queue */
|
||||
struct xe_file *xef;
|
||||
|
||||
/** @gt: graphics tile this exec queue can submit to */
|
||||
/** @gt: GT structure this exec queue can submit to */
|
||||
struct xe_gt *gt;
|
||||
/**
|
||||
* @hwe: A hardware of the same class. May (physical engine) or may not
|
||||
|
|
@ -63,6 +63,8 @@ struct xe_exec_queue {
|
|||
char name[MAX_FENCE_NAME_LEN];
|
||||
/** @width: width (number BB submitted per exec) of this exec queue */
|
||||
u16 width;
|
||||
/** @msix_vec: MSI-X vector (for platforms that support it) */
|
||||
u16 msix_vec;
|
||||
/** @fence_irq: fence IRQ used to signal job completion */
|
||||
struct xe_hw_fence_irq *fence_irq;
|
||||
|
||||
|
|
|
|||
|
|
@ -17,6 +17,7 @@
|
|||
#include "xe_exec_queue.h"
|
||||
#include "xe_gt.h"
|
||||
#include "xe_hw_fence.h"
|
||||
#include "xe_irq.h"
|
||||
#include "xe_lrc.h"
|
||||
#include "xe_macros.h"
|
||||
#include "xe_mmio.h"
|
||||
|
|
@ -47,6 +48,7 @@ static void __start_lrc(struct xe_hw_engine *hwe, struct xe_lrc *lrc,
|
|||
struct xe_mmio *mmio = >->mmio;
|
||||
struct xe_device *xe = gt_to_xe(gt);
|
||||
u64 lrc_desc;
|
||||
u32 ring_mode = _MASKED_BIT_ENABLE(GFX_DISABLE_LEGACY_MODE);
|
||||
|
||||
lrc_desc = xe_lrc_descriptor(lrc);
|
||||
|
||||
|
|
@ -80,8 +82,10 @@ static void __start_lrc(struct xe_hw_engine *hwe, struct xe_lrc *lrc,
|
|||
xe_mmio_write32(mmio, RING_HWS_PGA(hwe->mmio_base),
|
||||
xe_bo_ggtt_addr(hwe->hwsp));
|
||||
xe_mmio_read32(mmio, RING_HWS_PGA(hwe->mmio_base));
|
||||
xe_mmio_write32(mmio, RING_MODE(hwe->mmio_base),
|
||||
_MASKED_BIT_ENABLE(GFX_DISABLE_LEGACY_MODE));
|
||||
|
||||
if (xe_device_has_msix(gt_to_xe(hwe->gt)))
|
||||
ring_mode |= _MASKED_BIT_ENABLE(GFX_MSIX_INTERRUPT_ENABLE);
|
||||
xe_mmio_write32(mmio, RING_MODE(hwe->mmio_base), ring_mode);
|
||||
|
||||
xe_mmio_write32(mmio, RING_EXECLIST_SQ_CONTENTS_LO(hwe->mmio_base),
|
||||
lower_32_bits(lrc_desc));
|
||||
|
|
@ -265,7 +269,7 @@ struct xe_execlist_port *xe_execlist_port_create(struct xe_device *xe,
|
|||
|
||||
port->hwe = hwe;
|
||||
|
||||
port->lrc = xe_lrc_create(hwe, NULL, SZ_16K);
|
||||
port->lrc = xe_lrc_create(hwe, NULL, SZ_16K, XE_IRQ_DEFAULT_MSIX);
|
||||
if (IS_ERR(port->lrc)) {
|
||||
err = PTR_ERR(port->lrc);
|
||||
goto err;
|
||||
|
|
|
|||
|
|
@ -387,6 +387,10 @@ int xe_gt_init_early(struct xe_gt *gt)
|
|||
xe_force_wake_init_gt(gt, gt_to_fw(gt));
|
||||
spin_lock_init(>->global_invl_lock);
|
||||
|
||||
err = xe_gt_tlb_invalidation_init_early(gt);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
|
@ -588,10 +592,6 @@ int xe_gt_init(struct xe_gt *gt)
|
|||
xe_hw_fence_irq_init(>->fence_irq[i]);
|
||||
}
|
||||
|
||||
err = xe_gt_tlb_invalidation_init(gt);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
err = xe_gt_pagefault_init(gt);
|
||||
if (err)
|
||||
return err;
|
||||
|
|
|
|||
|
|
@ -122,10 +122,12 @@ void xe_gt_idle_enable_pg(struct xe_gt *gt)
|
|||
if (!xe_gt_is_media_type(gt))
|
||||
gtidle->powergate_enable |= RENDER_POWERGATE_ENABLE;
|
||||
|
||||
for (i = XE_HW_ENGINE_VCS0, j = 0; i <= XE_HW_ENGINE_VCS7; ++i, ++j) {
|
||||
if ((gt->info.engine_mask & BIT(i)))
|
||||
gtidle->powergate_enable |= (VDN_HCP_POWERGATE_ENABLE(j) |
|
||||
VDN_MFXVDENC_POWERGATE_ENABLE(j));
|
||||
if (xe->info.platform != XE_DG1) {
|
||||
for (i = XE_HW_ENGINE_VCS0, j = 0; i <= XE_HW_ENGINE_VCS7; ++i, ++j) {
|
||||
if ((gt->info.engine_mask & BIT(i)))
|
||||
gtidle->powergate_enable |= (VDN_HCP_POWERGATE_ENABLE(j) |
|
||||
VDN_MFXVDENC_POWERGATE_ENABLE(j));
|
||||
}
|
||||
}
|
||||
|
||||
fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
|
||||
|
|
|
|||
|
|
@ -2120,7 +2120,7 @@ static int pf_validate_vf_config(struct xe_gt *gt, unsigned int vfid)
|
|||
valid_any = valid_any || (valid_ggtt && is_primary);
|
||||
|
||||
if (IS_DGFX(xe)) {
|
||||
bool valid_lmem = pf_get_vf_config_ggtt(primary_gt, vfid);
|
||||
bool valid_lmem = pf_get_vf_config_lmem(primary_gt, vfid);
|
||||
|
||||
valid_any = valid_any || (valid_lmem && is_primary);
|
||||
valid_all = valid_all && valid_lmem;
|
||||
|
|
|
|||
|
|
@ -12,7 +12,7 @@
|
|||
|
||||
/**
|
||||
* xe_gt_stats_incr - Increments the specified stats counter
|
||||
* @gt: graphics tile
|
||||
* @gt: GT structure
|
||||
* @id: xe_gt_stats_id type id that needs to be incremented
|
||||
* @incr: value to be incremented with
|
||||
*
|
||||
|
|
@ -32,7 +32,7 @@ static const char *const stat_description[__XE_GT_STATS_NUM_IDS] = {
|
|||
|
||||
/**
|
||||
* xe_gt_stats_print_info - Print the GT stats
|
||||
* @gt: graphics tile
|
||||
* @gt: GT structure
|
||||
* @p: drm_printer where it will be printed out.
|
||||
*
|
||||
* This prints out all the available GT stats.
|
||||
|
|
|
|||
|
|
@ -106,15 +106,15 @@ static void xe_gt_tlb_fence_timeout(struct work_struct *work)
|
|||
}
|
||||
|
||||
/**
|
||||
* xe_gt_tlb_invalidation_init - Initialize GT TLB invalidation state
|
||||
* @gt: graphics tile
|
||||
* xe_gt_tlb_invalidation_init_early - Initialize GT TLB invalidation state
|
||||
* @gt: GT structure
|
||||
*
|
||||
* Initialize GT TLB invalidation state, purely software initialization, should
|
||||
* be called once during driver load.
|
||||
*
|
||||
* Return: 0 on success, negative error code on error.
|
||||
*/
|
||||
int xe_gt_tlb_invalidation_init(struct xe_gt *gt)
|
||||
int xe_gt_tlb_invalidation_init_early(struct xe_gt *gt)
|
||||
{
|
||||
gt->tlb_invalidation.seqno = 1;
|
||||
INIT_LIST_HEAD(>->tlb_invalidation.pending_fences);
|
||||
|
|
@ -128,7 +128,7 @@ int xe_gt_tlb_invalidation_init(struct xe_gt *gt)
|
|||
|
||||
/**
|
||||
* xe_gt_tlb_invalidation_reset - Initialize GT TLB invalidation reset
|
||||
* @gt: graphics tile
|
||||
* @gt: GT structure
|
||||
*
|
||||
* Signal any pending invalidation fences, should be called during a GT reset
|
||||
*/
|
||||
|
|
@ -244,7 +244,7 @@ static int send_tlb_invalidation(struct xe_guc *guc,
|
|||
|
||||
/**
|
||||
* xe_gt_tlb_invalidation_guc - Issue a TLB invalidation on this GT for the GuC
|
||||
* @gt: graphics tile
|
||||
* @gt: GT structure
|
||||
* @fence: invalidation fence which will be signal on TLB invalidation
|
||||
* completion
|
||||
*
|
||||
|
|
@ -277,7 +277,7 @@ static int xe_gt_tlb_invalidation_guc(struct xe_gt *gt,
|
|||
|
||||
/**
|
||||
* xe_gt_tlb_invalidation_ggtt - Issue a TLB invalidation on this GT for the GGTT
|
||||
* @gt: graphics tile
|
||||
* @gt: GT structure
|
||||
*
|
||||
* Issue a TLB invalidation for the GGTT. Completion of TLB invalidation is
|
||||
* synchronous.
|
||||
|
|
@ -326,7 +326,7 @@ int xe_gt_tlb_invalidation_ggtt(struct xe_gt *gt)
|
|||
* xe_gt_tlb_invalidation_range - Issue a TLB invalidation on this GT for an
|
||||
* address range
|
||||
*
|
||||
* @gt: graphics tile
|
||||
* @gt: GT structure
|
||||
* @fence: invalidation fence which will be signal on TLB invalidation
|
||||
* completion
|
||||
* @start: start address
|
||||
|
|
@ -412,7 +412,7 @@ int xe_gt_tlb_invalidation_range(struct xe_gt *gt,
|
|||
|
||||
/**
|
||||
* xe_gt_tlb_invalidation_vma - Issue a TLB invalidation on this GT for a VMA
|
||||
* @gt: graphics tile
|
||||
* @gt: GT structure
|
||||
* @fence: invalidation fence which will be signal on TLB invalidation
|
||||
* completion, can be NULL
|
||||
* @vma: VMA to invalidate
|
||||
|
|
|
|||
|
|
@ -14,7 +14,8 @@ struct xe_gt;
|
|||
struct xe_guc;
|
||||
struct xe_vma;
|
||||
|
||||
int xe_gt_tlb_invalidation_init(struct xe_gt *gt);
|
||||
int xe_gt_tlb_invalidation_init_early(struct xe_gt *gt);
|
||||
|
||||
void xe_gt_tlb_invalidation_reset(struct xe_gt *gt);
|
||||
int xe_gt_tlb_invalidation_ggtt(struct xe_gt *gt);
|
||||
int xe_gt_tlb_invalidation_vma(struct xe_gt *gt,
|
||||
|
|
|
|||
|
|
@ -147,6 +147,34 @@ static u32 guc_ctl_ads_flags(struct xe_guc *guc)
|
|||
return flags;
|
||||
}
|
||||
|
||||
static bool needs_wa_dual_queue(struct xe_gt *gt)
|
||||
{
|
||||
/*
|
||||
* The DUAL_QUEUE_WA tells the GuC to not allow concurrent submissions
|
||||
* on RCS and CCSes with different address spaces, which on DG2 is
|
||||
* required as a WA for an HW bug.
|
||||
*/
|
||||
if (XE_WA(gt, 22011391025))
|
||||
return true;
|
||||
|
||||
/*
|
||||
* On newer platforms, the HW has been updated to not allow parallel
|
||||
* execution of different address spaces, so the RCS/CCS will stall the
|
||||
* context switch if one of the other RCS/CCSes is busy with a different
|
||||
* address space. While functionally correct, having a submission
|
||||
* stalled on the HW limits the GuC ability to shuffle things around and
|
||||
* can cause complications if the non-stalled submission runs for a long
|
||||
* time, because the GuC doesn't know that the stalled submission isn't
|
||||
* actually running and might declare it as hung. Therefore, we enable
|
||||
* the DUAL_QUEUE_WA on all newer platforms on GTs that have CCS engines
|
||||
* to move management back to the GuC.
|
||||
*/
|
||||
if (CCS_MASK(gt) && GRAPHICS_VERx100(gt_to_xe(gt)) >= 1270)
|
||||
return true;
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
static u32 guc_ctl_wa_flags(struct xe_guc *guc)
|
||||
{
|
||||
struct xe_device *xe = guc_to_xe(guc);
|
||||
|
|
@ -159,7 +187,7 @@ static u32 guc_ctl_wa_flags(struct xe_guc *guc)
|
|||
if (XE_WA(gt, 14014475959))
|
||||
flags |= GUC_WA_HOLD_CCS_SWITCHOUT;
|
||||
|
||||
if (XE_WA(gt, 22011391025))
|
||||
if (needs_wa_dual_queue(gt))
|
||||
flags |= GUC_WA_DUAL_QUEUE;
|
||||
|
||||
/*
|
||||
|
|
|
|||
|
|
@ -324,6 +324,7 @@ void xe_hw_engine_enable_ring(struct xe_hw_engine *hwe)
|
|||
{
|
||||
u32 ccs_mask =
|
||||
xe_hw_engine_mask_per_class(hwe->gt, XE_ENGINE_CLASS_COMPUTE);
|
||||
u32 ring_mode = _MASKED_BIT_ENABLE(GFX_DISABLE_LEGACY_MODE);
|
||||
|
||||
if (hwe->class == XE_ENGINE_CLASS_COMPUTE && ccs_mask)
|
||||
xe_mmio_write32(&hwe->gt->mmio, RCU_MODE,
|
||||
|
|
@ -332,8 +333,10 @@ void xe_hw_engine_enable_ring(struct xe_hw_engine *hwe)
|
|||
xe_hw_engine_mmio_write32(hwe, RING_HWSTAM(0), ~0x0);
|
||||
xe_hw_engine_mmio_write32(hwe, RING_HWS_PGA(0),
|
||||
xe_bo_ggtt_addr(hwe->hwsp));
|
||||
xe_hw_engine_mmio_write32(hwe, RING_MODE(0),
|
||||
_MASKED_BIT_ENABLE(GFX_DISABLE_LEGACY_MODE));
|
||||
|
||||
if (xe_device_has_msix(gt_to_xe(hwe->gt)))
|
||||
ring_mode |= _MASKED_BIT_ENABLE(GFX_MSIX_INTERRUPT_ENABLE);
|
||||
xe_hw_engine_mmio_write32(hwe, RING_MODE(0), ring_mode);
|
||||
xe_hw_engine_mmio_write32(hwe, RING_MI_MODE(0),
|
||||
_MASKED_BIT_DISABLE(STOP_RING));
|
||||
xe_hw_engine_mmio_read32(hwe, RING_MI_MODE(0));
|
||||
|
|
@ -772,7 +775,7 @@ static void check_gsc_availability(struct xe_gt *gt)
|
|||
xe_mmio_write32(>->mmio, GUNIT_GSC_INTR_ENABLE, 0);
|
||||
xe_mmio_write32(>->mmio, GUNIT_GSC_INTR_MASK, ~0);
|
||||
|
||||
drm_info(&xe->drm, "gsccs disabled due to lack of FW\n");
|
||||
drm_dbg(&xe->drm, "GSC FW not used, disabling gsccs\n");
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -106,7 +106,7 @@ struct xe_hw_engine_class_intf {
|
|||
* Contains all the hardware engine state for physical instances.
|
||||
*/
|
||||
struct xe_hw_engine {
|
||||
/** @gt: graphics tile this hw engine belongs to */
|
||||
/** @gt: GT structure this hw engine belongs to */
|
||||
struct xe_gt *gt;
|
||||
/** @name: name of this hw engine */
|
||||
const char *name;
|
||||
|
|
|
|||
|
|
@ -41,7 +41,7 @@ struct xe_hw_fence_irq {
|
|||
* to a xe_hw_fence_irq, maintains serial seqno.
|
||||
*/
|
||||
struct xe_hw_fence_ctx {
|
||||
/** @gt: graphics tile of hardware fence context */
|
||||
/** @gt: GT structure of hardware fence context */
|
||||
struct xe_gt *gt;
|
||||
/** @irq: fence irq handler */
|
||||
struct xe_hw_fence_irq *irq;
|
||||
|
|
|
|||
|
|
@ -10,6 +10,7 @@
|
|||
#include <drm/drm_managed.h>
|
||||
|
||||
#include "display/xe_display.h"
|
||||
#include "regs/xe_guc_regs.h"
|
||||
#include "regs/xe_irq_regs.h"
|
||||
#include "xe_device.h"
|
||||
#include "xe_drv.h"
|
||||
|
|
@ -29,6 +30,11 @@
|
|||
#define IIR(offset) XE_REG(offset + 0x8)
|
||||
#define IER(offset) XE_REG(offset + 0xc)
|
||||
|
||||
static int xe_irq_msix_init(struct xe_device *xe);
|
||||
static void xe_irq_msix_free(struct xe_device *xe);
|
||||
static int xe_irq_msix_request_irqs(struct xe_device *xe);
|
||||
static void xe_irq_msix_synchronize_irq(struct xe_device *xe);
|
||||
|
||||
static void assert_iir_is_zero(struct xe_mmio *mmio, struct xe_reg reg)
|
||||
{
|
||||
u32 val = xe_mmio_read32(mmio, reg);
|
||||
|
|
@ -572,6 +578,11 @@ static void xe_irq_reset(struct xe_device *xe)
|
|||
if (IS_SRIOV_VF(xe))
|
||||
return vf_irq_reset(xe);
|
||||
|
||||
if (xe_device_uses_memirq(xe)) {
|
||||
for_each_tile(tile, xe, id)
|
||||
xe_memirq_reset(&tile->memirq);
|
||||
}
|
||||
|
||||
for_each_tile(tile, xe, id) {
|
||||
if (GRAPHICS_VERx100(xe) >= 1210)
|
||||
dg1_irq_reset(tile);
|
||||
|
|
@ -614,6 +625,14 @@ static void xe_irq_postinstall(struct xe_device *xe)
|
|||
if (IS_SRIOV_VF(xe))
|
||||
return vf_irq_postinstall(xe);
|
||||
|
||||
if (xe_device_uses_memirq(xe)) {
|
||||
struct xe_tile *tile;
|
||||
unsigned int id;
|
||||
|
||||
for_each_tile(tile, xe, id)
|
||||
xe_memirq_postinstall(&tile->memirq);
|
||||
}
|
||||
|
||||
xe_display_irq_postinstall(xe, xe_root_mmio_gt(xe));
|
||||
|
||||
/*
|
||||
|
|
@ -656,27 +675,11 @@ static irq_handler_t xe_irq_handler(struct xe_device *xe)
|
|||
return xelp_irq_handler;
|
||||
}
|
||||
|
||||
static void irq_uninstall(void *arg)
|
||||
{
|
||||
struct xe_device *xe = arg;
|
||||
struct pci_dev *pdev = to_pci_dev(xe->drm.dev);
|
||||
int irq;
|
||||
|
||||
if (!atomic_xchg(&xe->irq.enabled, 0))
|
||||
return;
|
||||
|
||||
xe_irq_reset(xe);
|
||||
|
||||
irq = pci_irq_vector(pdev, 0);
|
||||
free_irq(irq, xe);
|
||||
}
|
||||
|
||||
int xe_irq_install(struct xe_device *xe)
|
||||
static int xe_irq_msi_request_irqs(struct xe_device *xe)
|
||||
{
|
||||
struct pci_dev *pdev = to_pci_dev(xe->drm.dev);
|
||||
unsigned int irq_flags = PCI_IRQ_MSIX;
|
||||
irq_handler_t irq_handler;
|
||||
int err, irq, nvec;
|
||||
int irq, err;
|
||||
|
||||
irq_handler = xe_irq_handler(xe);
|
||||
if (!irq_handler) {
|
||||
|
|
@ -684,32 +687,71 @@ int xe_irq_install(struct xe_device *xe)
|
|||
return -EINVAL;
|
||||
}
|
||||
|
||||
irq = pci_irq_vector(pdev, 0);
|
||||
err = request_irq(irq, irq_handler, IRQF_SHARED, DRIVER_NAME, xe);
|
||||
if (err < 0) {
|
||||
drm_err(&xe->drm, "Failed to request MSI IRQ %d\n", err);
|
||||
return err;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void xe_irq_msi_free(struct xe_device *xe)
|
||||
{
|
||||
struct pci_dev *pdev = to_pci_dev(xe->drm.dev);
|
||||
int irq;
|
||||
|
||||
irq = pci_irq_vector(pdev, 0);
|
||||
free_irq(irq, xe);
|
||||
}
|
||||
|
||||
static void irq_uninstall(void *arg)
|
||||
{
|
||||
struct xe_device *xe = arg;
|
||||
|
||||
if (!atomic_xchg(&xe->irq.enabled, 0))
|
||||
return;
|
||||
|
||||
xe_irq_reset(xe);
|
||||
|
||||
nvec = pci_msix_vec_count(pdev);
|
||||
if (nvec <= 0) {
|
||||
if (nvec == -EINVAL) {
|
||||
/* MSIX capability is not supported in the device, using MSI */
|
||||
irq_flags = PCI_IRQ_MSI;
|
||||
nvec = 1;
|
||||
} else {
|
||||
drm_err(&xe->drm, "MSIX: Failed getting count\n");
|
||||
return nvec;
|
||||
}
|
||||
if (xe_device_has_msix(xe))
|
||||
xe_irq_msix_free(xe);
|
||||
else
|
||||
xe_irq_msi_free(xe);
|
||||
}
|
||||
|
||||
int xe_irq_init(struct xe_device *xe)
|
||||
{
|
||||
spin_lock_init(&xe->irq.lock);
|
||||
|
||||
return xe_irq_msix_init(xe);
|
||||
}
|
||||
|
||||
int xe_irq_install(struct xe_device *xe)
|
||||
{
|
||||
struct pci_dev *pdev = to_pci_dev(xe->drm.dev);
|
||||
unsigned int irq_flags = PCI_IRQ_MSI;
|
||||
int nvec = 1;
|
||||
int err;
|
||||
|
||||
xe_irq_reset(xe);
|
||||
|
||||
if (xe_device_has_msix(xe)) {
|
||||
nvec = xe->irq.msix.nvec;
|
||||
irq_flags = PCI_IRQ_MSIX;
|
||||
}
|
||||
|
||||
err = pci_alloc_irq_vectors(pdev, nvec, nvec, irq_flags);
|
||||
if (err < 0) {
|
||||
drm_err(&xe->drm, "MSI/MSIX: Failed to enable support %d\n", err);
|
||||
drm_err(&xe->drm, "Failed to allocate IRQ vectors: %d\n", err);
|
||||
return err;
|
||||
}
|
||||
|
||||
irq = pci_irq_vector(pdev, 0);
|
||||
err = request_irq(irq, irq_handler, IRQF_SHARED, DRIVER_NAME, xe);
|
||||
if (err < 0) {
|
||||
drm_err(&xe->drm, "Failed to request MSI/MSIX IRQ %d\n", err);
|
||||
err = xe_device_has_msix(xe) ? xe_irq_msix_request_irqs(xe) :
|
||||
xe_irq_msi_request_irqs(xe);
|
||||
if (err)
|
||||
return err;
|
||||
}
|
||||
|
||||
atomic_set(&xe->irq.enabled, 1);
|
||||
|
||||
|
|
@ -722,18 +764,28 @@ int xe_irq_install(struct xe_device *xe)
|
|||
return 0;
|
||||
|
||||
free_irq_handler:
|
||||
free_irq(irq, xe);
|
||||
if (xe_device_has_msix(xe))
|
||||
xe_irq_msix_free(xe);
|
||||
else
|
||||
xe_irq_msi_free(xe);
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
static void xe_irq_msi_synchronize_irq(struct xe_device *xe)
|
||||
{
|
||||
synchronize_irq(to_pci_dev(xe->drm.dev)->irq);
|
||||
}
|
||||
|
||||
void xe_irq_suspend(struct xe_device *xe)
|
||||
{
|
||||
int irq = to_pci_dev(xe->drm.dev)->irq;
|
||||
|
||||
atomic_set(&xe->irq.enabled, 0); /* no new irqs */
|
||||
|
||||
synchronize_irq(irq); /* flush irqs */
|
||||
/* flush irqs */
|
||||
if (xe_device_has_msix(xe))
|
||||
xe_irq_msix_synchronize_irq(xe);
|
||||
else
|
||||
xe_irq_msi_synchronize_irq(xe);
|
||||
xe_irq_reset(xe); /* turn irqs off */
|
||||
}
|
||||
|
||||
|
|
@ -754,3 +806,198 @@ void xe_irq_resume(struct xe_device *xe)
|
|||
for_each_gt(gt, xe, id)
|
||||
xe_irq_enable_hwe(gt);
|
||||
}
|
||||
|
||||
/* MSI-X related definitions and functions below. */
|
||||
|
||||
enum xe_irq_msix_static {
|
||||
GUC2HOST_MSIX = 0,
|
||||
DEFAULT_MSIX = XE_IRQ_DEFAULT_MSIX,
|
||||
/* Must be last */
|
||||
NUM_OF_STATIC_MSIX,
|
||||
};
|
||||
|
||||
static int xe_irq_msix_init(struct xe_device *xe)
|
||||
{
|
||||
struct pci_dev *pdev = to_pci_dev(xe->drm.dev);
|
||||
int nvec = pci_msix_vec_count(pdev);
|
||||
|
||||
if (nvec == -EINVAL)
|
||||
return 0; /* MSI */
|
||||
|
||||
if (nvec < 0) {
|
||||
drm_err(&xe->drm, "Failed getting MSI-X vectors count: %d\n", nvec);
|
||||
return nvec;
|
||||
}
|
||||
|
||||
xe->irq.msix.nvec = nvec;
|
||||
xa_init_flags(&xe->irq.msix.indexes, XA_FLAGS_ALLOC);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static irqreturn_t guc2host_irq_handler(int irq, void *arg)
|
||||
{
|
||||
struct xe_device *xe = arg;
|
||||
struct xe_tile *tile;
|
||||
u8 id;
|
||||
|
||||
if (!atomic_read(&xe->irq.enabled))
|
||||
return IRQ_NONE;
|
||||
|
||||
for_each_tile(tile, xe, id)
|
||||
xe_guc_irq_handler(&tile->primary_gt->uc.guc,
|
||||
GUC_INTR_GUC2HOST);
|
||||
|
||||
return IRQ_HANDLED;
|
||||
}
|
||||
|
||||
static irqreturn_t xe_irq_msix_default_hwe_handler(int irq, void *arg)
|
||||
{
|
||||
unsigned int tile_id, gt_id;
|
||||
struct xe_device *xe = arg;
|
||||
struct xe_memirq *memirq;
|
||||
struct xe_hw_engine *hwe;
|
||||
enum xe_hw_engine_id id;
|
||||
struct xe_tile *tile;
|
||||
struct xe_gt *gt;
|
||||
|
||||
if (!atomic_read(&xe->irq.enabled))
|
||||
return IRQ_NONE;
|
||||
|
||||
for_each_tile(tile, xe, tile_id) {
|
||||
memirq = &tile->memirq;
|
||||
if (!memirq->bo)
|
||||
continue;
|
||||
|
||||
for_each_gt(gt, xe, gt_id) {
|
||||
if (gt->tile != tile)
|
||||
continue;
|
||||
|
||||
for_each_hw_engine(hwe, gt, id)
|
||||
xe_memirq_hwe_handler(memirq, hwe);
|
||||
}
|
||||
}
|
||||
|
||||
return IRQ_HANDLED;
|
||||
}
|
||||
|
||||
static int xe_irq_msix_alloc_vector(struct xe_device *xe, void *irq_buf,
|
||||
bool dynamic_msix, u16 *msix)
|
||||
{
|
||||
struct xa_limit limit;
|
||||
int ret;
|
||||
u32 id;
|
||||
|
||||
limit = (dynamic_msix) ? XA_LIMIT(NUM_OF_STATIC_MSIX, xe->irq.msix.nvec - 1) :
|
||||
XA_LIMIT(*msix, *msix);
|
||||
ret = xa_alloc(&xe->irq.msix.indexes, &id, irq_buf, limit, GFP_KERNEL);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
if (dynamic_msix)
|
||||
*msix = id;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void xe_irq_msix_release_vector(struct xe_device *xe, u16 msix)
|
||||
{
|
||||
xa_erase(&xe->irq.msix.indexes, msix);
|
||||
}
|
||||
|
||||
static int xe_irq_msix_request_irq_internal(struct xe_device *xe, irq_handler_t handler,
|
||||
void *irq_buf, const char *name, u16 msix)
|
||||
{
|
||||
struct pci_dev *pdev = to_pci_dev(xe->drm.dev);
|
||||
int ret, irq;
|
||||
|
||||
irq = pci_irq_vector(pdev, msix);
|
||||
if (irq < 0)
|
||||
return irq;
|
||||
|
||||
ret = request_irq(irq, handler, IRQF_SHARED, name, irq_buf);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int xe_irq_msix_request_irq(struct xe_device *xe, irq_handler_t handler, void *irq_buf,
|
||||
const char *name, bool dynamic_msix, u16 *msix)
|
||||
{
|
||||
int ret;
|
||||
|
||||
ret = xe_irq_msix_alloc_vector(xe, irq_buf, dynamic_msix, msix);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
ret = xe_irq_msix_request_irq_internal(xe, handler, irq_buf, name, *msix);
|
||||
if (ret) {
|
||||
drm_err(&xe->drm, "Failed to request IRQ for MSI-X %u\n", *msix);
|
||||
xe_irq_msix_release_vector(xe, *msix);
|
||||
return ret;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void xe_irq_msix_free_irq(struct xe_device *xe, u16 msix)
|
||||
{
|
||||
struct pci_dev *pdev = to_pci_dev(xe->drm.dev);
|
||||
int irq;
|
||||
void *irq_buf;
|
||||
|
||||
irq_buf = xa_load(&xe->irq.msix.indexes, msix);
|
||||
if (!irq_buf)
|
||||
return;
|
||||
|
||||
irq = pci_irq_vector(pdev, msix);
|
||||
if (irq < 0) {
|
||||
drm_err(&xe->drm, "MSI-X %u can't be released, there is no matching IRQ\n", msix);
|
||||
return;
|
||||
}
|
||||
|
||||
free_irq(irq, irq_buf);
|
||||
xe_irq_msix_release_vector(xe, msix);
|
||||
}
|
||||
|
||||
int xe_irq_msix_request_irqs(struct xe_device *xe)
|
||||
{
|
||||
int err;
|
||||
u16 msix;
|
||||
|
||||
msix = GUC2HOST_MSIX;
|
||||
err = xe_irq_msix_request_irq(xe, guc2host_irq_handler, xe,
|
||||
DRIVER_NAME "-guc2host", false, &msix);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
msix = DEFAULT_MSIX;
|
||||
err = xe_irq_msix_request_irq(xe, xe_irq_msix_default_hwe_handler, xe,
|
||||
DRIVER_NAME "-default-msix", false, &msix);
|
||||
if (err) {
|
||||
xe_irq_msix_free_irq(xe, GUC2HOST_MSIX);
|
||||
return err;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void xe_irq_msix_free(struct xe_device *xe)
|
||||
{
|
||||
unsigned long msix;
|
||||
u32 *dummy;
|
||||
|
||||
xa_for_each(&xe->irq.msix.indexes, msix, dummy)
|
||||
xe_irq_msix_free_irq(xe, msix);
|
||||
xa_destroy(&xe->irq.msix.indexes);
|
||||
}
|
||||
|
||||
void xe_irq_msix_synchronize_irq(struct xe_device *xe)
|
||||
{
|
||||
struct pci_dev *pdev = to_pci_dev(xe->drm.dev);
|
||||
unsigned long msix;
|
||||
u32 *dummy;
|
||||
|
||||
xa_for_each(&xe->irq.msix.indexes, msix, dummy)
|
||||
synchronize_irq(pci_irq_vector(pdev, msix));
|
||||
}
|
||||
|
|
|
|||
|
|
@ -6,13 +6,21 @@
|
|||
#ifndef _XE_IRQ_H_
|
||||
#define _XE_IRQ_H_
|
||||
|
||||
#include <linux/interrupt.h>
|
||||
|
||||
#define XE_IRQ_DEFAULT_MSIX 1
|
||||
|
||||
struct xe_device;
|
||||
struct xe_tile;
|
||||
struct xe_gt;
|
||||
|
||||
int xe_irq_init(struct xe_device *xe);
|
||||
int xe_irq_install(struct xe_device *xe);
|
||||
void xe_irq_suspend(struct xe_device *xe);
|
||||
void xe_irq_resume(struct xe_device *xe);
|
||||
void xe_irq_enable_hwe(struct xe_gt *gt);
|
||||
int xe_irq_msix_request_irq(struct xe_device *xe, irq_handler_t handler, void *irq_buf,
|
||||
const char *name, bool dynamic_msix, u16 *msix);
|
||||
void xe_irq_msix_free_irq(struct xe_device *xe, u16 msix);
|
||||
|
||||
#endif
|
||||
|
|
|
|||
|
|
@ -584,6 +584,7 @@ static void set_memory_based_intr(u32 *regs, struct xe_hw_engine *hwe)
|
|||
{
|
||||
struct xe_memirq *memirq = >_to_tile(hwe->gt)->memirq;
|
||||
struct xe_device *xe = gt_to_xe(hwe->gt);
|
||||
u8 num_regs;
|
||||
|
||||
if (!xe_device_uses_memirq(xe))
|
||||
return;
|
||||
|
|
@ -593,12 +594,18 @@ static void set_memory_based_intr(u32 *regs, struct xe_hw_engine *hwe)
|
|||
regs[CTX_INT_MASK_ENABLE_REG] = RING_IMR(0).addr;
|
||||
regs[CTX_INT_MASK_ENABLE_PTR] = xe_memirq_enable_ptr(memirq);
|
||||
|
||||
regs[CTX_LRI_INT_REPORT_PTR] = MI_LOAD_REGISTER_IMM | MI_LRI_NUM_REGS(2) |
|
||||
num_regs = xe_device_has_msix(xe) ? 3 : 2;
|
||||
regs[CTX_LRI_INT_REPORT_PTR] = MI_LOAD_REGISTER_IMM | MI_LRI_NUM_REGS(num_regs) |
|
||||
MI_LRI_LRM_CS_MMIO | MI_LRI_FORCE_POSTED;
|
||||
regs[CTX_INT_STATUS_REPORT_REG] = RING_INT_STATUS_RPT_PTR(0).addr;
|
||||
regs[CTX_INT_STATUS_REPORT_PTR] = xe_memirq_status_ptr(memirq, hwe);
|
||||
regs[CTX_INT_SRC_REPORT_REG] = RING_INT_SRC_RPT_PTR(0).addr;
|
||||
regs[CTX_INT_SRC_REPORT_PTR] = xe_memirq_source_ptr(memirq, hwe);
|
||||
|
||||
if (xe_device_has_msix(xe)) {
|
||||
regs[CTX_CS_INT_VEC_REG] = CS_INT_VEC(0).addr;
|
||||
/* CTX_CS_INT_VEC_DATA will be set in xe_lrc_init */
|
||||
}
|
||||
}
|
||||
|
||||
static int lrc_ring_mi_mode(struct xe_hw_engine *hwe)
|
||||
|
|
@ -876,7 +883,7 @@ static void xe_lrc_finish(struct xe_lrc *lrc)
|
|||
#define PVC_CTX_ACC_CTR_THOLD (0x2a + 1)
|
||||
|
||||
static int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe,
|
||||
struct xe_vm *vm, u32 ring_size)
|
||||
struct xe_vm *vm, u32 ring_size, u16 msix_vec)
|
||||
{
|
||||
struct xe_gt *gt = hwe->gt;
|
||||
struct xe_tile *tile = gt_to_tile(gt);
|
||||
|
|
@ -945,6 +952,14 @@ static int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe,
|
|||
xe_drm_client_add_bo(vm->xef->client, lrc->bo);
|
||||
}
|
||||
|
||||
if (xe_device_has_msix(xe)) {
|
||||
xe_lrc_write_ctx_reg(lrc, CTX_INT_STATUS_REPORT_PTR,
|
||||
xe_memirq_status_ptr(&tile->memirq, hwe));
|
||||
xe_lrc_write_ctx_reg(lrc, CTX_INT_SRC_REPORT_PTR,
|
||||
xe_memirq_source_ptr(&tile->memirq, hwe));
|
||||
xe_lrc_write_ctx_reg(lrc, CTX_CS_INT_VEC_DATA, msix_vec << 16 | msix_vec);
|
||||
}
|
||||
|
||||
if (xe_gt_has_indirect_ring_state(gt)) {
|
||||
xe_lrc_write_ctx_reg(lrc, CTX_INDIRECT_RING_STATE,
|
||||
__xe_lrc_indirect_ring_ggtt_addr(lrc));
|
||||
|
|
@ -1005,6 +1020,7 @@ static int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe,
|
|||
* @hwe: Hardware Engine
|
||||
* @vm: The VM (address space)
|
||||
* @ring_size: LRC ring size
|
||||
* @msix_vec: MSI-X interrupt vector (for platforms that support it)
|
||||
*
|
||||
* Allocate and initialize the Logical Ring Context (LRC).
|
||||
*
|
||||
|
|
@ -1012,7 +1028,7 @@ static int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe,
|
|||
* upon failure.
|
||||
*/
|
||||
struct xe_lrc *xe_lrc_create(struct xe_hw_engine *hwe, struct xe_vm *vm,
|
||||
u32 ring_size)
|
||||
u32 ring_size, u16 msix_vec)
|
||||
{
|
||||
struct xe_lrc *lrc;
|
||||
int err;
|
||||
|
|
@ -1021,7 +1037,7 @@ struct xe_lrc *xe_lrc_create(struct xe_hw_engine *hwe, struct xe_vm *vm,
|
|||
if (!lrc)
|
||||
return ERR_PTR(-ENOMEM);
|
||||
|
||||
err = xe_lrc_init(lrc, hwe, vm, ring_size);
|
||||
err = xe_lrc_init(lrc, hwe, vm, ring_size, msix_vec);
|
||||
if (err) {
|
||||
kfree(lrc);
|
||||
return ERR_PTR(err);
|
||||
|
|
|
|||
|
|
@ -42,7 +42,7 @@ struct xe_lrc_snapshot {
|
|||
#define LRC_PPHWSP_SCRATCH_ADDR (0x34 * 4)
|
||||
|
||||
struct xe_lrc *xe_lrc_create(struct xe_hw_engine *hwe, struct xe_vm *vm,
|
||||
u32 ring_size);
|
||||
u32 ring_size, u16 msix_vec);
|
||||
void xe_lrc_destroy(struct kref *ref);
|
||||
|
||||
/**
|
||||
|
|
|
|||
|
|
@ -16,7 +16,6 @@
|
|||
#include "instructions/xe_mi_commands.h"
|
||||
#include "regs/xe_engine_regs.h"
|
||||
#include "regs/xe_gt_regs.h"
|
||||
#include "regs/xe_lrc_layout.h"
|
||||
#include "regs/xe_oa_regs.h"
|
||||
#include "xe_assert.h"
|
||||
#include "xe_bb.h"
|
||||
|
|
@ -28,7 +27,6 @@
|
|||
#include "xe_gt_mcr.h"
|
||||
#include "xe_gt_printk.h"
|
||||
#include "xe_guc_pc.h"
|
||||
#include "xe_lrc.h"
|
||||
#include "xe_macros.h"
|
||||
#include "xe_mmio.h"
|
||||
#include "xe_oa.h"
|
||||
|
|
@ -74,12 +72,6 @@ struct xe_oa_config {
|
|||
struct rcu_head rcu;
|
||||
};
|
||||
|
||||
struct flex {
|
||||
struct xe_reg reg;
|
||||
u32 offset;
|
||||
u32 value;
|
||||
};
|
||||
|
||||
struct xe_oa_open_param {
|
||||
struct xe_file *xef;
|
||||
u32 oa_unit_id;
|
||||
|
|
@ -97,6 +89,7 @@ struct xe_oa_open_param {
|
|||
int num_syncs;
|
||||
struct xe_sync_entry *syncs;
|
||||
size_t oa_buffer_size;
|
||||
int wait_num_reports;
|
||||
};
|
||||
|
||||
struct xe_oa_config_bo {
|
||||
|
|
@ -241,11 +234,10 @@ static void oa_timestamp_clear(struct xe_oa_stream *stream, u32 *report)
|
|||
static bool xe_oa_buffer_check_unlocked(struct xe_oa_stream *stream)
|
||||
{
|
||||
u32 gtt_offset = xe_bo_ggtt_addr(stream->oa_buffer.bo);
|
||||
u32 tail, hw_tail, partial_report_size, available;
|
||||
int report_size = stream->oa_buffer.format->size;
|
||||
u32 tail, hw_tail;
|
||||
unsigned long flags;
|
||||
bool pollin;
|
||||
u32 partial_report_size;
|
||||
|
||||
spin_lock_irqsave(&stream->oa_buffer.ptr_lock, flags);
|
||||
|
||||
|
|
@ -289,8 +281,8 @@ static bool xe_oa_buffer_check_unlocked(struct xe_oa_stream *stream)
|
|||
|
||||
stream->oa_buffer.tail = tail;
|
||||
|
||||
pollin = xe_oa_circ_diff(stream, stream->oa_buffer.tail,
|
||||
stream->oa_buffer.head) >= report_size;
|
||||
available = xe_oa_circ_diff(stream, stream->oa_buffer.tail, stream->oa_buffer.head);
|
||||
pollin = available >= stream->wait_num_reports * report_size;
|
||||
|
||||
spin_unlock_irqrestore(&stream->oa_buffer.ptr_lock, flags);
|
||||
|
||||
|
|
@ -605,19 +597,38 @@ static __poll_t xe_oa_poll(struct file *file, poll_table *wait)
|
|||
return ret;
|
||||
}
|
||||
|
||||
static void xe_oa_lock_vma(struct xe_exec_queue *q)
|
||||
{
|
||||
if (q->vm) {
|
||||
down_read(&q->vm->lock);
|
||||
xe_vm_lock(q->vm, false);
|
||||
}
|
||||
}
|
||||
|
||||
static void xe_oa_unlock_vma(struct xe_exec_queue *q)
|
||||
{
|
||||
if (q->vm) {
|
||||
xe_vm_unlock(q->vm);
|
||||
up_read(&q->vm->lock);
|
||||
}
|
||||
}
|
||||
|
||||
static struct dma_fence *xe_oa_submit_bb(struct xe_oa_stream *stream, enum xe_oa_submit_deps deps,
|
||||
struct xe_bb *bb)
|
||||
{
|
||||
struct xe_exec_queue *q = stream->exec_q ?: stream->k_exec_q;
|
||||
struct xe_sched_job *job;
|
||||
struct dma_fence *fence;
|
||||
int err = 0;
|
||||
|
||||
/* Kernel configuration is issued on stream->k_exec_q, not stream->exec_q */
|
||||
job = xe_bb_create_job(stream->k_exec_q, bb);
|
||||
xe_oa_lock_vma(q);
|
||||
|
||||
job = xe_bb_create_job(q, bb);
|
||||
if (IS_ERR(job)) {
|
||||
err = PTR_ERR(job);
|
||||
goto exit;
|
||||
}
|
||||
job->ggtt = true;
|
||||
|
||||
if (deps == XE_OA_SUBMIT_ADD_DEPS) {
|
||||
for (int i = 0; i < stream->num_syncs && !err; i++)
|
||||
|
|
@ -632,10 +643,13 @@ static struct dma_fence *xe_oa_submit_bb(struct xe_oa_stream *stream, enum xe_oa
|
|||
fence = dma_fence_get(&job->drm.s_fence->finished);
|
||||
xe_sched_job_push(job);
|
||||
|
||||
xe_oa_unlock_vma(q);
|
||||
|
||||
return fence;
|
||||
err_put_job:
|
||||
xe_sched_job_put(job);
|
||||
exit:
|
||||
xe_oa_unlock_vma(q);
|
||||
return ERR_PTR(err);
|
||||
}
|
||||
|
||||
|
|
@ -684,63 +698,19 @@ static void xe_oa_free_configs(struct xe_oa_stream *stream)
|
|||
dma_fence_put(stream->last_fence);
|
||||
}
|
||||
|
||||
static void xe_oa_store_flex(struct xe_oa_stream *stream, struct xe_lrc *lrc,
|
||||
struct xe_bb *bb, const struct flex *flex, u32 count)
|
||||
{
|
||||
u32 offset = xe_bo_ggtt_addr(lrc->bo);
|
||||
|
||||
do {
|
||||
bb->cs[bb->len++] = MI_STORE_DATA_IMM | MI_SDI_GGTT | MI_SDI_NUM_DW(1);
|
||||
bb->cs[bb->len++] = offset + flex->offset * sizeof(u32);
|
||||
bb->cs[bb->len++] = 0;
|
||||
bb->cs[bb->len++] = flex->value;
|
||||
|
||||
} while (flex++, --count);
|
||||
}
|
||||
|
||||
static int xe_oa_modify_ctx_image(struct xe_oa_stream *stream, struct xe_lrc *lrc,
|
||||
const struct flex *flex, u32 count)
|
||||
static int xe_oa_load_with_lri(struct xe_oa_stream *stream, struct xe_oa_reg *reg_lri, u32 count)
|
||||
{
|
||||
struct dma_fence *fence;
|
||||
struct xe_bb *bb;
|
||||
int err;
|
||||
|
||||
bb = xe_bb_new(stream->gt, 4 * count, false);
|
||||
bb = xe_bb_new(stream->gt, 2 * count + 1, false);
|
||||
if (IS_ERR(bb)) {
|
||||
err = PTR_ERR(bb);
|
||||
goto exit;
|
||||
}
|
||||
|
||||
xe_oa_store_flex(stream, lrc, bb, flex, count);
|
||||
|
||||
fence = xe_oa_submit_bb(stream, XE_OA_SUBMIT_NO_DEPS, bb);
|
||||
if (IS_ERR(fence)) {
|
||||
err = PTR_ERR(fence);
|
||||
goto free_bb;
|
||||
}
|
||||
xe_bb_free(bb, fence);
|
||||
dma_fence_put(fence);
|
||||
|
||||
return 0;
|
||||
free_bb:
|
||||
xe_bb_free(bb, NULL);
|
||||
exit:
|
||||
return err;
|
||||
}
|
||||
|
||||
static int xe_oa_load_with_lri(struct xe_oa_stream *stream, struct xe_oa_reg *reg_lri)
|
||||
{
|
||||
struct dma_fence *fence;
|
||||
struct xe_bb *bb;
|
||||
int err;
|
||||
|
||||
bb = xe_bb_new(stream->gt, 3, false);
|
||||
if (IS_ERR(bb)) {
|
||||
err = PTR_ERR(bb);
|
||||
goto exit;
|
||||
}
|
||||
|
||||
write_cs_mi_lri(bb, reg_lri, 1);
|
||||
write_cs_mi_lri(bb, reg_lri, count);
|
||||
|
||||
fence = xe_oa_submit_bb(stream, XE_OA_SUBMIT_NO_DEPS, bb);
|
||||
if (IS_ERR(fence)) {
|
||||
|
|
@ -760,71 +730,55 @@ static int xe_oa_load_with_lri(struct xe_oa_stream *stream, struct xe_oa_reg *re
|
|||
static int xe_oa_configure_oar_context(struct xe_oa_stream *stream, bool enable)
|
||||
{
|
||||
const struct xe_oa_format *format = stream->oa_buffer.format;
|
||||
struct xe_lrc *lrc = stream->exec_q->lrc[0];
|
||||
u32 regs_offset = xe_lrc_regs_offset(lrc) / sizeof(u32);
|
||||
u32 oacontrol = __format_to_oactrl(format, OAR_OACONTROL_COUNTER_SEL_MASK) |
|
||||
(enable ? OAR_OACONTROL_COUNTER_ENABLE : 0);
|
||||
|
||||
struct flex regs_context[] = {
|
||||
struct xe_oa_reg reg_lri[] = {
|
||||
{
|
||||
OACTXCONTROL(stream->hwe->mmio_base),
|
||||
stream->oa->ctx_oactxctrl_offset[stream->hwe->class] + 1,
|
||||
enable ? OA_COUNTER_RESUME : 0,
|
||||
},
|
||||
{
|
||||
OAR_OACONTROL,
|
||||
oacontrol,
|
||||
},
|
||||
{
|
||||
RING_CONTEXT_CONTROL(stream->hwe->mmio_base),
|
||||
regs_offset + CTX_CONTEXT_CONTROL,
|
||||
_MASKED_BIT_ENABLE(CTX_CTRL_OAC_CONTEXT_ENABLE),
|
||||
_MASKED_FIELD(CTX_CTRL_OAC_CONTEXT_ENABLE,
|
||||
enable ? CTX_CTRL_OAC_CONTEXT_ENABLE : 0)
|
||||
},
|
||||
};
|
||||
struct xe_oa_reg reg_lri = { OAR_OACONTROL, oacontrol };
|
||||
int err;
|
||||
|
||||
/* Modify stream hwe context image with regs_context */
|
||||
err = xe_oa_modify_ctx_image(stream, stream->exec_q->lrc[0],
|
||||
regs_context, ARRAY_SIZE(regs_context));
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
/* Apply reg_lri using LRI */
|
||||
return xe_oa_load_with_lri(stream, ®_lri);
|
||||
return xe_oa_load_with_lri(stream, reg_lri, ARRAY_SIZE(reg_lri));
|
||||
}
|
||||
|
||||
static int xe_oa_configure_oac_context(struct xe_oa_stream *stream, bool enable)
|
||||
{
|
||||
const struct xe_oa_format *format = stream->oa_buffer.format;
|
||||
struct xe_lrc *lrc = stream->exec_q->lrc[0];
|
||||
u32 regs_offset = xe_lrc_regs_offset(lrc) / sizeof(u32);
|
||||
u32 oacontrol = __format_to_oactrl(format, OAR_OACONTROL_COUNTER_SEL_MASK) |
|
||||
(enable ? OAR_OACONTROL_COUNTER_ENABLE : 0);
|
||||
struct flex regs_context[] = {
|
||||
struct xe_oa_reg reg_lri[] = {
|
||||
{
|
||||
OACTXCONTROL(stream->hwe->mmio_base),
|
||||
stream->oa->ctx_oactxctrl_offset[stream->hwe->class] + 1,
|
||||
enable ? OA_COUNTER_RESUME : 0,
|
||||
},
|
||||
{
|
||||
OAC_OACONTROL,
|
||||
oacontrol
|
||||
},
|
||||
{
|
||||
RING_CONTEXT_CONTROL(stream->hwe->mmio_base),
|
||||
regs_offset + CTX_CONTEXT_CONTROL,
|
||||
_MASKED_BIT_ENABLE(CTX_CTRL_OAC_CONTEXT_ENABLE) |
|
||||
_MASKED_FIELD(CTX_CTRL_OAC_CONTEXT_ENABLE,
|
||||
enable ? CTX_CTRL_OAC_CONTEXT_ENABLE : 0) |
|
||||
_MASKED_FIELD(CTX_CTRL_RUN_ALONE, enable ? CTX_CTRL_RUN_ALONE : 0),
|
||||
},
|
||||
};
|
||||
struct xe_oa_reg reg_lri = { OAC_OACONTROL, oacontrol };
|
||||
int err;
|
||||
|
||||
/* Set ccs select to enable programming of OAC_OACONTROL */
|
||||
xe_mmio_write32(&stream->gt->mmio, __oa_regs(stream)->oa_ctrl,
|
||||
__oa_ccs_select(stream));
|
||||
|
||||
/* Modify stream hwe context image with regs_context */
|
||||
err = xe_oa_modify_ctx_image(stream, stream->exec_q->lrc[0],
|
||||
regs_context, ARRAY_SIZE(regs_context));
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
/* Apply reg_lri using LRI */
|
||||
return xe_oa_load_with_lri(stream, ®_lri);
|
||||
return xe_oa_load_with_lri(stream, reg_lri, ARRAY_SIZE(reg_lri));
|
||||
}
|
||||
|
||||
static int xe_oa_configure_oa_context(struct xe_oa_stream *stream, bool enable)
|
||||
|
|
@ -1285,6 +1239,17 @@ static int xe_oa_set_prop_oa_buffer_size(struct xe_oa *oa, u64 value,
|
|||
return 0;
|
||||
}
|
||||
|
||||
static int xe_oa_set_prop_wait_num_reports(struct xe_oa *oa, u64 value,
|
||||
struct xe_oa_open_param *param)
|
||||
{
|
||||
if (!value) {
|
||||
drm_dbg(&oa->xe->drm, "wait_num_reports %llu\n", value);
|
||||
return -EINVAL;
|
||||
}
|
||||
param->wait_num_reports = value;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int xe_oa_set_prop_ret_inval(struct xe_oa *oa, u64 value,
|
||||
struct xe_oa_open_param *param)
|
||||
{
|
||||
|
|
@ -1306,6 +1271,7 @@ static const xe_oa_set_property_fn xe_oa_set_property_funcs_open[] = {
|
|||
[DRM_XE_OA_PROPERTY_NUM_SYNCS] = xe_oa_set_prop_num_syncs,
|
||||
[DRM_XE_OA_PROPERTY_SYNCS] = xe_oa_set_prop_syncs_user,
|
||||
[DRM_XE_OA_PROPERTY_OA_BUFFER_SIZE] = xe_oa_set_prop_oa_buffer_size,
|
||||
[DRM_XE_OA_PROPERTY_WAIT_NUM_REPORTS] = xe_oa_set_prop_wait_num_reports,
|
||||
};
|
||||
|
||||
static const xe_oa_set_property_fn xe_oa_set_property_funcs_config[] = {
|
||||
|
|
@ -1321,6 +1287,7 @@ static const xe_oa_set_property_fn xe_oa_set_property_funcs_config[] = {
|
|||
[DRM_XE_OA_PROPERTY_NUM_SYNCS] = xe_oa_set_prop_num_syncs,
|
||||
[DRM_XE_OA_PROPERTY_SYNCS] = xe_oa_set_prop_syncs_user,
|
||||
[DRM_XE_OA_PROPERTY_OA_BUFFER_SIZE] = xe_oa_set_prop_ret_inval,
|
||||
[DRM_XE_OA_PROPERTY_WAIT_NUM_REPORTS] = xe_oa_set_prop_ret_inval,
|
||||
};
|
||||
|
||||
static int xe_oa_user_ext_set_property(struct xe_oa *oa, enum xe_oa_user_extn_from from,
|
||||
|
|
@ -1704,81 +1671,6 @@ static const struct file_operations xe_oa_fops = {
|
|||
.mmap = xe_oa_mmap,
|
||||
};
|
||||
|
||||
static bool engine_supports_mi_query(struct xe_hw_engine *hwe)
|
||||
{
|
||||
return hwe->class == XE_ENGINE_CLASS_RENDER ||
|
||||
hwe->class == XE_ENGINE_CLASS_COMPUTE;
|
||||
}
|
||||
|
||||
static bool xe_oa_find_reg_in_lri(u32 *state, u32 reg, u32 *offset, u32 end)
|
||||
{
|
||||
u32 idx = *offset;
|
||||
u32 len = min(MI_LRI_LEN(state[idx]) + idx, end);
|
||||
bool found = false;
|
||||
|
||||
idx++;
|
||||
for (; idx < len; idx += 2) {
|
||||
if (state[idx] == reg) {
|
||||
found = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
*offset = idx;
|
||||
return found;
|
||||
}
|
||||
|
||||
#define IS_MI_LRI_CMD(x) (REG_FIELD_GET(MI_OPCODE, (x)) == \
|
||||
REG_FIELD_GET(MI_OPCODE, MI_LOAD_REGISTER_IMM))
|
||||
|
||||
static u32 xe_oa_context_image_offset(struct xe_oa_stream *stream, u32 reg)
|
||||
{
|
||||
struct xe_lrc *lrc = stream->exec_q->lrc[0];
|
||||
u32 len = (xe_gt_lrc_size(stream->gt, stream->hwe->class) +
|
||||
lrc->ring.size) / sizeof(u32);
|
||||
u32 offset = xe_lrc_regs_offset(lrc) / sizeof(u32);
|
||||
u32 *state = (u32 *)lrc->bo->vmap.vaddr;
|
||||
|
||||
if (drm_WARN_ON(&stream->oa->xe->drm, !state))
|
||||
return U32_MAX;
|
||||
|
||||
for (; offset < len; ) {
|
||||
if (IS_MI_LRI_CMD(state[offset])) {
|
||||
/*
|
||||
* We expect reg-value pairs in MI_LRI command, so
|
||||
* MI_LRI_LEN() should be even
|
||||
*/
|
||||
drm_WARN_ON(&stream->oa->xe->drm,
|
||||
MI_LRI_LEN(state[offset]) & 0x1);
|
||||
|
||||
if (xe_oa_find_reg_in_lri(state, reg, &offset, len))
|
||||
break;
|
||||
} else {
|
||||
offset++;
|
||||
}
|
||||
}
|
||||
|
||||
return offset < len ? offset : U32_MAX;
|
||||
}
|
||||
|
||||
static int xe_oa_set_ctx_ctrl_offset(struct xe_oa_stream *stream)
|
||||
{
|
||||
struct xe_reg reg = OACTXCONTROL(stream->hwe->mmio_base);
|
||||
u32 offset = stream->oa->ctx_oactxctrl_offset[stream->hwe->class];
|
||||
|
||||
/* Do this only once. Failure is stored as offset of U32_MAX */
|
||||
if (offset)
|
||||
goto exit;
|
||||
|
||||
offset = xe_oa_context_image_offset(stream, reg.addr);
|
||||
stream->oa->ctx_oactxctrl_offset[stream->hwe->class] = offset;
|
||||
|
||||
drm_dbg(&stream->oa->xe->drm, "%s oa ctx control at 0x%08x dword offset\n",
|
||||
stream->hwe->name, offset);
|
||||
exit:
|
||||
return offset && offset != U32_MAX ? 0 : -ENODEV;
|
||||
}
|
||||
|
||||
static int xe_oa_stream_init(struct xe_oa_stream *stream,
|
||||
struct xe_oa_open_param *param)
|
||||
{
|
||||
|
|
@ -1797,6 +1689,7 @@ static int xe_oa_stream_init(struct xe_oa_stream *stream,
|
|||
stream->periodic = param->period_exponent > 0;
|
||||
stream->period_exponent = param->period_exponent;
|
||||
stream->no_preempt = param->no_preempt;
|
||||
stream->wait_num_reports = param->wait_num_reports;
|
||||
|
||||
stream->xef = xe_file_get(param->xef);
|
||||
stream->num_syncs = param->num_syncs;
|
||||
|
|
@ -1815,17 +1708,6 @@ static int xe_oa_stream_init(struct xe_oa_stream *stream,
|
|||
else
|
||||
stream->oa_buffer.circ_size = param->oa_buffer_size;
|
||||
|
||||
if (stream->exec_q && engine_supports_mi_query(stream->hwe)) {
|
||||
/* If we don't find the context offset, just return error */
|
||||
ret = xe_oa_set_ctx_ctrl_offset(stream);
|
||||
if (ret) {
|
||||
drm_err(&stream->oa->xe->drm,
|
||||
"xe_oa_set_ctx_ctrl_offset failed for %s\n",
|
||||
stream->hwe->name);
|
||||
goto exit;
|
||||
}
|
||||
}
|
||||
|
||||
stream->oa_config = xe_oa_get_oa_config(stream->oa, param->metric_set);
|
||||
if (!stream->oa_config) {
|
||||
drm_dbg(&stream->oa->xe->drm, "Invalid OA config id=%i\n", param->metric_set);
|
||||
|
|
@ -2094,8 +1976,8 @@ int xe_oa_stream_open_ioctl(struct drm_device *dev, u64 data, struct drm_file *f
|
|||
if (XE_IOCTL_DBG(oa->xe, !param.exec_q))
|
||||
return -ENOENT;
|
||||
|
||||
if (param.exec_q->width > 1)
|
||||
drm_dbg(&oa->xe->drm, "exec_q->width > 1, programming only exec_q->lrc[0]\n");
|
||||
if (XE_IOCTL_DBG(oa->xe, param.exec_q->width > 1))
|
||||
return -EOPNOTSUPP;
|
||||
}
|
||||
|
||||
/*
|
||||
|
|
@ -2156,6 +2038,14 @@ int xe_oa_stream_open_ioctl(struct drm_device *dev, u64 data, struct drm_file *f
|
|||
if (!param.oa_buffer_size)
|
||||
param.oa_buffer_size = DEFAULT_XE_OA_BUFFER_SIZE;
|
||||
|
||||
if (!param.wait_num_reports)
|
||||
param.wait_num_reports = 1;
|
||||
if (param.wait_num_reports > param.oa_buffer_size / f->size) {
|
||||
drm_dbg(&oa->xe->drm, "wait_num_reports %d\n", param.wait_num_reports);
|
||||
ret = -EINVAL;
|
||||
goto err_exec_q;
|
||||
}
|
||||
|
||||
ret = xe_oa_parse_syncs(oa, ¶m);
|
||||
if (ret)
|
||||
goto err_exec_q;
|
||||
|
|
|
|||
|
|
@ -138,9 +138,6 @@ struct xe_oa {
|
|||
/** @metrics_idr: List of dynamic configurations (struct xe_oa_config) */
|
||||
struct idr metrics_idr;
|
||||
|
||||
/** @ctx_oactxctrl_offset: offset of OACTXCONTROL register in context image */
|
||||
u32 ctx_oactxctrl_offset[XE_ENGINE_CLASS_MAX];
|
||||
|
||||
/** @oa_formats: tracks all OA formats across platforms */
|
||||
const struct xe_oa_format *oa_formats;
|
||||
|
||||
|
|
@ -218,6 +215,9 @@ struct xe_oa_stream {
|
|||
/** @pollin: Whether there is data available to read */
|
||||
bool pollin;
|
||||
|
||||
/** @wait_num_reports: Number of reports to wait for before signalling pollin */
|
||||
int wait_num_reports;
|
||||
|
||||
/** @periodic: Whether periodic sampling is currently enabled */
|
||||
bool periodic;
|
||||
|
||||
|
|
|
|||
|
|
@ -7,6 +7,7 @@
|
|||
|
||||
#include <linux/fault-inject.h>
|
||||
#include <linux/pm_runtime.h>
|
||||
#include <linux/suspend.h>
|
||||
|
||||
#include <drm/drm_managed.h>
|
||||
#include <drm/ttm/ttm_placement.h>
|
||||
|
|
@ -607,7 +608,8 @@ static bool xe_pm_suspending_or_resuming(struct xe_device *xe)
|
|||
struct device *dev = xe->drm.dev;
|
||||
|
||||
return dev->power.runtime_status == RPM_SUSPENDING ||
|
||||
dev->power.runtime_status == RPM_RESUMING;
|
||||
dev->power.runtime_status == RPM_RESUMING ||
|
||||
pm_suspend_target_state != PM_SUSPEND_ON;
|
||||
#else
|
||||
return false;
|
||||
#endif
|
||||
|
|
|
|||
|
|
@ -672,7 +672,8 @@ static int query_oa_units(struct xe_device *xe,
|
|||
du->oa_unit_type = u->type;
|
||||
du->oa_timestamp_freq = xe_oa_timestamp_frequency(gt);
|
||||
du->capabilities = DRM_XE_OA_CAPS_BASE | DRM_XE_OA_CAPS_SYNCS |
|
||||
DRM_XE_OA_CAPS_OA_BUFFER_SIZE;
|
||||
DRM_XE_OA_CAPS_OA_BUFFER_SIZE |
|
||||
DRM_XE_OA_CAPS_WAIT_NUM_REPORTS;
|
||||
|
||||
j = 0;
|
||||
for_each_hw_engine(hwe, gt, hwe_id) {
|
||||
|
|
|
|||
|
|
@ -221,7 +221,10 @@ static int emit_pipe_imm_ggtt(u32 addr, u32 value, bool stall_only, u32 *dw,
|
|||
|
||||
static u32 get_ppgtt_flag(struct xe_sched_job *job)
|
||||
{
|
||||
return job->q->vm ? BIT(8) : 0;
|
||||
if (job->q->vm && !job->ggtt)
|
||||
return BIT(8);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int emit_copy_timestamp(struct xe_lrc *lrc, u32 *dw, int i)
|
||||
|
|
|
|||
|
|
@ -56,6 +56,8 @@ struct xe_sched_job {
|
|||
u32 migrate_flush_flags;
|
||||
/** @ring_ops_flush_tlb: The ring ops need to flush TLB before payload. */
|
||||
bool ring_ops_flush_tlb;
|
||||
/** @ggtt: mapped in ggtt. */
|
||||
bool ggtt;
|
||||
/** @ptrs: per instance pointers. */
|
||||
struct xe_job_ptrs ptrs[];
|
||||
};
|
||||
|
|
|
|||
|
|
@ -60,8 +60,8 @@ TRACE_EVENT(xe_bo_move,
|
|||
TP_STRUCT__entry(
|
||||
__field(struct xe_bo *, bo)
|
||||
__field(size_t, size)
|
||||
__field(u32, new_placement)
|
||||
__field(u32, old_placement)
|
||||
__string(new_placement_name, xe_mem_type_to_name[new_placement])
|
||||
__string(old_placement_name, xe_mem_type_to_name[old_placement])
|
||||
__string(device_id, __dev_name_bo(bo))
|
||||
__field(bool, move_lacks_source)
|
||||
),
|
||||
|
|
@ -69,15 +69,15 @@ TRACE_EVENT(xe_bo_move,
|
|||
TP_fast_assign(
|
||||
__entry->bo = bo;
|
||||
__entry->size = bo->size;
|
||||
__entry->new_placement = new_placement;
|
||||
__entry->old_placement = old_placement;
|
||||
__assign_str(new_placement_name);
|
||||
__assign_str(old_placement_name);
|
||||
__assign_str(device_id);
|
||||
__entry->move_lacks_source = move_lacks_source;
|
||||
),
|
||||
TP_printk("move_lacks_source:%s, migrate object %p [size %zu] from %s to %s device_id:%s",
|
||||
__entry->move_lacks_source ? "yes" : "no", __entry->bo, __entry->size,
|
||||
xe_mem_type_to_name[__entry->old_placement],
|
||||
xe_mem_type_to_name[__entry->new_placement], __get_str(device_id))
|
||||
__get_str(old_placement_name),
|
||||
__get_str(new_placement_name), __get_str(device_id))
|
||||
);
|
||||
|
||||
DECLARE_EVENT_CLASS(xe_vma,
|
||||
|
|
|
|||
|
|
@ -1487,6 +1487,7 @@ struct drm_xe_oa_unit {
|
|||
#define DRM_XE_OA_CAPS_BASE (1 << 0)
|
||||
#define DRM_XE_OA_CAPS_SYNCS (1 << 1)
|
||||
#define DRM_XE_OA_CAPS_OA_BUFFER_SIZE (1 << 2)
|
||||
#define DRM_XE_OA_CAPS_WAIT_NUM_REPORTS (1 << 3)
|
||||
|
||||
/** @oa_timestamp_freq: OA timestamp freq */
|
||||
__u64 oa_timestamp_freq;
|
||||
|
|
@ -1660,6 +1661,12 @@ enum drm_xe_oa_property_id {
|
|||
* buffer is allocated by default.
|
||||
*/
|
||||
DRM_XE_OA_PROPERTY_OA_BUFFER_SIZE,
|
||||
|
||||
/**
|
||||
* @DRM_XE_OA_PROPERTY_WAIT_NUM_REPORTS: Number of reports to wait
|
||||
* for before unblocking poll or read
|
||||
*/
|
||||
DRM_XE_OA_PROPERTY_WAIT_NUM_REPORTS,
|
||||
};
|
||||
|
||||
/**
|
||||
|
|
|
|||
Loading…
Reference in New Issue
Block a user