Merge tag 'drm-intel-gt-next-2024-10-23' of https://gitlab.freedesktop.org/drm/i915/kernel into drm-next

Driver Changes:

Fixes/improvements/new stuff:

- Enable PXP GuC autoteardown flow [guc] (Juston Li)
- Retry RING_HEAD reset until it get sticks [gt] (Nitin Gote)
- Add basic PMU support for gen2 [pmu] (Ville Syrjälä)

Miscellaneous:

- Prevent a possible int overflow in wq offsets [guc] (Nikita Zhandarovich)
- PMU code cleanups (Lucas De Marchi)
- Fixed "CPU" -> "GPU" typo [gt] (Zhang He)
- Gen2/3 interrupt handling cleanup (Ville Syrjälä)

Signed-off-by: Dave Airlie <airlied@redhat.com>
From: Tvrtko Ursulin <tursulin@igalia.com>
Link: https://patchwork.freedesktop.org/patch/msgid/Zxi-3wkIwI-Y1Qvj@linux
This commit is contained in:
Dave Airlie 2024-10-25 05:57:38 +10:00
commit c9ff14d033
10 changed files with 80 additions and 59 deletions

View File

@ -169,7 +169,7 @@ static u32 *__gen2_emit_breadcrumb(struct i915_request *rq, u32 *cs,
return cs;
}
u32 *gen3_emit_breadcrumb(struct i915_request *rq, u32 *cs)
u32 *gen2_emit_breadcrumb(struct i915_request *rq, u32 *cs)
{
return __gen2_emit_breadcrumb(rq, cs, 16, 8);
}
@ -248,7 +248,7 @@ int i830_emit_bb_start(struct i915_request *rq,
return 0;
}
int gen3_emit_bb_start(struct i915_request *rq,
int gen2_emit_bb_start(struct i915_request *rq,
u64 offset, u32 len,
unsigned int dispatch_flags)
{
@ -291,30 +291,13 @@ int gen4_emit_bb_start(struct i915_request *rq,
}
void gen2_irq_enable(struct intel_engine_cs *engine)
{
struct drm_i915_private *i915 = engine->i915;
i915->irq_mask &= ~engine->irq_enable_mask;
intel_uncore_write16(&i915->uncore, GEN2_IMR, i915->irq_mask);
ENGINE_POSTING_READ16(engine, RING_IMR);
}
void gen2_irq_disable(struct intel_engine_cs *engine)
{
struct drm_i915_private *i915 = engine->i915;
i915->irq_mask |= engine->irq_enable_mask;
intel_uncore_write16(&i915->uncore, GEN2_IMR, i915->irq_mask);
}
void gen3_irq_enable(struct intel_engine_cs *engine)
{
engine->i915->irq_mask &= ~engine->irq_enable_mask;
intel_uncore_write(engine->uncore, GEN2_IMR, engine->i915->irq_mask);
intel_uncore_posting_read_fw(engine->uncore, GEN2_IMR);
}
void gen3_irq_disable(struct intel_engine_cs *engine)
void gen2_irq_disable(struct intel_engine_cs *engine)
{
engine->i915->irq_mask |= engine->irq_enable_mask;
intel_uncore_write(engine->uncore, GEN2_IMR, engine->i915->irq_mask);

View File

@ -15,13 +15,13 @@ int gen2_emit_flush(struct i915_request *rq, u32 mode);
int gen4_emit_flush_rcs(struct i915_request *rq, u32 mode);
int gen4_emit_flush_vcs(struct i915_request *rq, u32 mode);
u32 *gen3_emit_breadcrumb(struct i915_request *rq, u32 *cs);
u32 *gen2_emit_breadcrumb(struct i915_request *rq, u32 *cs);
u32 *gen5_emit_breadcrumb(struct i915_request *rq, u32 *cs);
int i830_emit_bb_start(struct i915_request *rq,
u64 offset, u32 len,
unsigned int dispatch_flags);
int gen3_emit_bb_start(struct i915_request *rq,
int gen2_emit_bb_start(struct i915_request *rq,
u64 offset, u32 len,
unsigned int dispatch_flags);
int gen4_emit_bb_start(struct i915_request *rq,
@ -30,8 +30,6 @@ int gen4_emit_bb_start(struct i915_request *rq,
void gen2_irq_enable(struct intel_engine_cs *engine);
void gen2_irq_disable(struct intel_engine_cs *engine);
void gen3_irq_enable(struct intel_engine_cs *engine);
void gen3_irq_disable(struct intel_engine_cs *engine);
void gen5_irq_enable(struct intel_engine_cs *engine);
void gen5_irq_disable(struct intel_engine_cs *engine);

View File

@ -15,6 +15,7 @@
#define HEAD_WRAP_COUNT 0xFFE00000
#define HEAD_WRAP_ONE 0x00200000
#define HEAD_ADDR 0x001FFFFC
#define HEAD_WAIT_I8XX (1 << 0) /* gen2, PRBx_HEAD */
#define RING_START(base) _MMIO((base) + 0x38)
#define RING_CTL(base) _MMIO((base) + 0x3c)
#define RING_CTL_SIZE(size) ((size) - PAGE_SIZE) /* in bytes -> pages */
@ -26,7 +27,6 @@
#define RING_VALID_MASK 0x00000001
#define RING_VALID 0x00000001
#define RING_INVALID 0x00000000
#define RING_WAIT_I8XX (1 << 0) /* gen2, PRBx_HEAD */
#define RING_WAIT (1 << 11) /* gen3+, PRBx_CTL */
#define RING_WAIT_SEMAPHORE (1 << 10) /* gen6+ */
#define RING_SYNC_0(base) _MMIO((base) + 0x40)

View File

@ -431,7 +431,7 @@ static int llc_show(struct seq_file *m, void *data)
max_gpu_freq /= GEN9_FREQ_SCALER;
}
seq_puts(m, "GPU freq (MHz)\tEffective CPU freq (MHz)\tEffective Ring freq (MHz)\n");
seq_puts(m, "GPU freq (MHz)\tEffective GPU freq (MHz)\tEffective Ring freq (MHz)\n");
wakeref = intel_runtime_pm_get(gt->uncore->rpm);
for (gpu_freq = min_gpu_freq; gpu_freq <= max_gpu_freq; gpu_freq++) {

View File

@ -192,6 +192,7 @@ static bool stop_ring(struct intel_engine_cs *engine)
static int xcs_resume(struct intel_engine_cs *engine)
{
struct intel_ring *ring = engine->legacy.ring;
ktime_t kt;
ENGINE_TRACE(engine, "ring:{HEAD:%04x, TAIL:%04x}\n",
ring->head, ring->tail);
@ -230,9 +231,27 @@ static int xcs_resume(struct intel_engine_cs *engine)
set_pp_dir(engine);
/* First wake the ring up to an empty/idle ring */
ENGINE_WRITE_FW(engine, RING_HEAD, ring->head);
for ((kt) = ktime_get() + (2 * NSEC_PER_MSEC);
ktime_before(ktime_get(), (kt)); cpu_relax()) {
/*
* In case of resets fails because engine resumes from
* incorrect RING_HEAD and then GPU may be then fed
* to invalid instrcutions, which may lead to unrecoverable
* hang. So at first write doesn't succeed then try again.
*/
ENGINE_WRITE_FW(engine, RING_HEAD, ring->head);
if (ENGINE_READ_FW(engine, RING_HEAD) == ring->head)
break;
}
ENGINE_WRITE_FW(engine, RING_TAIL, ring->head);
ENGINE_POSTING_READ(engine, RING_TAIL);
if (ENGINE_READ_FW(engine, RING_HEAD) != ENGINE_READ_FW(engine, RING_TAIL)) {
ENGINE_TRACE(engine, "failed to reset empty ring: [%x, %x]: %x\n",
ENGINE_READ_FW(engine, RING_HEAD),
ENGINE_READ_FW(engine, RING_TAIL),
ring->head);
goto err;
}
ENGINE_WRITE_FW(engine, RING_CTL,
RING_CTL_SIZE(ring->size) | RING_VALID);
@ -241,12 +260,16 @@ static int xcs_resume(struct intel_engine_cs *engine)
if (__intel_wait_for_register_fw(engine->uncore,
RING_CTL(engine->mmio_base),
RING_VALID, RING_VALID,
5000, 0, NULL))
5000, 0, NULL)) {
ENGINE_TRACE(engine, "failed to restart\n");
goto err;
}
if (GRAPHICS_VER(engine->i915) > 2)
if (GRAPHICS_VER(engine->i915) > 2) {
ENGINE_WRITE_FW(engine,
RING_MI_MODE, _MASKED_BIT_DISABLE(STOP_RING));
ENGINE_POSTING_READ(engine, RING_MI_MODE);
}
/* Now awake, let it get started */
if (ring->tail != ring->head) {
@ -1090,9 +1113,6 @@ static void setup_irq(struct intel_engine_cs *engine)
} else if (GRAPHICS_VER(i915) >= 5) {
engine->irq_enable = gen5_irq_enable;
engine->irq_disable = gen5_irq_disable;
} else if (GRAPHICS_VER(i915) >= 3) {
engine->irq_enable = gen3_irq_enable;
engine->irq_disable = gen3_irq_disable;
} else {
engine->irq_enable = gen2_irq_enable;
engine->irq_disable = gen2_irq_disable;
@ -1146,7 +1166,7 @@ static void setup_common(struct intel_engine_cs *engine)
* equivalent to our next initial bread so we can elide
* engine->emit_init_breadcrumb().
*/
engine->emit_fini_breadcrumb = gen3_emit_breadcrumb;
engine->emit_fini_breadcrumb = gen2_emit_breadcrumb;
if (GRAPHICS_VER(i915) == 5)
engine->emit_fini_breadcrumb = gen5_emit_breadcrumb;
@ -1159,7 +1179,7 @@ static void setup_common(struct intel_engine_cs *engine)
else if (IS_I830(i915) || IS_I845G(i915))
engine->emit_bb_start = i830_emit_bb_start;
else
engine->emit_bb_start = gen3_emit_bb_start;
engine->emit_bb_start = gen2_emit_bb_start;
}
static void setup_rcs(struct intel_engine_cs *engine)

View File

@ -239,8 +239,16 @@ static u32 guc_ctl_debug_flags(struct intel_guc *guc)
static u32 guc_ctl_feature_flags(struct intel_guc *guc)
{
struct intel_gt *gt = guc_to_gt(guc);
u32 flags = 0;
/*
* Enable PXP GuC autoteardown flow.
* NB: MTL does things differently.
*/
if (HAS_PXP(gt->i915) && !IS_METEORLAKE(gt->i915))
flags |= GUC_CTL_ENABLE_GUC_PXP_CTL;
if (!intel_guc_submission_is_used(guc))
flags |= GUC_CTL_DISABLE_SCHEDULER;

View File

@ -105,6 +105,7 @@
#define GUC_WA_ENABLE_TSC_CHECK_ON_RC6 BIT(22)
#define GUC_CTL_FEATURE 2
#define GUC_CTL_ENABLE_GUC_PXP_CTL BIT(1)
#define GUC_CTL_ENABLE_SLPC BIT(2)
#define GUC_CTL_DISABLE_SCHEDULER BIT(14)

View File

@ -691,6 +691,9 @@ IS_SUBPLATFORM(const struct drm_i915_private *i915,
#define HAS_RPS(i915) (INTEL_INFO(i915)->has_rps)
#define HAS_PXP(i915) \
(IS_ENABLED(CONFIG_DRM_I915_PXP) && INTEL_INFO(i915)->has_pxp)
#define HAS_HECI_PXP(i915) \
(INTEL_INFO(i915)->has_heci_pxp)

View File

@ -356,7 +356,7 @@ static bool exclusive_mmio_access(const struct drm_i915_private *i915)
return GRAPHICS_VER(i915) == 7;
}
static void engine_sample(struct intel_engine_cs *engine, unsigned int period_ns)
static void gen3_engine_sample(struct intel_engine_cs *engine, unsigned int period_ns)
{
struct intel_engine_pmu *pmu = &engine->pmu;
bool busy;
@ -391,6 +391,31 @@ static void engine_sample(struct intel_engine_cs *engine, unsigned int period_ns
add_sample(&pmu->sample[I915_SAMPLE_BUSY], period_ns);
}
static void gen2_engine_sample(struct intel_engine_cs *engine, unsigned int period_ns)
{
struct intel_engine_pmu *pmu = &engine->pmu;
u32 tail, head, acthd;
tail = ENGINE_READ_FW(engine, RING_TAIL);
head = ENGINE_READ_FW(engine, RING_HEAD);
acthd = ENGINE_READ_FW(engine, ACTHD);
if (head & HEAD_WAIT_I8XX)
add_sample(&pmu->sample[I915_SAMPLE_WAIT], period_ns);
if (head & HEAD_WAIT_I8XX || head != acthd ||
(head & HEAD_ADDR) != (tail & TAIL_ADDR))
add_sample(&pmu->sample[I915_SAMPLE_BUSY], period_ns);
}
static void engine_sample(struct intel_engine_cs *engine, unsigned int period_ns)
{
if (GRAPHICS_VER(engine->i915) >= 3)
gen3_engine_sample(engine, period_ns);
else
gen2_engine_sample(engine, period_ns);
}
static void
engines_sample(struct intel_gt *gt, unsigned int period_ns)
{
@ -834,15 +859,14 @@ static void i915_pmu_event_start(struct perf_event *event, int flags)
static void i915_pmu_event_stop(struct perf_event *event, int flags)
{
struct drm_i915_private *i915 =
container_of(event->pmu, typeof(*i915), pmu.base);
struct i915_pmu *pmu = &i915->pmu;
struct i915_pmu *pmu = event_to_pmu(event);
if (pmu->closed)
goto out;
if (flags & PERF_EF_UPDATE)
i915_pmu_event_read(event);
i915_pmu_disable(event);
out:
@ -1232,17 +1256,6 @@ static void i915_pmu_unregister_cpuhp_state(struct i915_pmu *pmu)
cpuhp_state_remove_instance(cpuhp_slot, &pmu->cpuhp.node);
}
static bool is_igp(struct drm_i915_private *i915)
{
struct pci_dev *pdev = to_pci_dev(i915->drm.dev);
/* IGP is 0000:00:02.0 */
return pci_domain_nr(pdev->bus) == 0 &&
pdev->bus->number == 0 &&
PCI_SLOT(pdev->devfn) == 2 &&
PCI_FUNC(pdev->devfn) == 0;
}
void i915_pmu_register(struct drm_i915_private *i915)
{
struct i915_pmu *pmu = &i915->pmu;
@ -1255,18 +1268,13 @@ void i915_pmu_register(struct drm_i915_private *i915)
int ret = -ENOMEM;
if (GRAPHICS_VER(i915) <= 2) {
drm_info(&i915->drm, "PMU not supported for this GPU.");
return;
}
spin_lock_init(&pmu->lock);
hrtimer_init(&pmu->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
pmu->timer.function = i915_sample;
pmu->cpuhp.cpu = -1;
init_rc6(pmu);
if (!is_igp(i915)) {
if (IS_DGFX(i915)) {
pmu->name = kasprintf(GFP_KERNEL,
"i915_%s",
dev_name(i915->drm.dev));
@ -1318,7 +1326,7 @@ void i915_pmu_register(struct drm_i915_private *i915)
pmu->base.event_init = NULL;
free_event_attributes(pmu);
err_name:
if (!is_igp(i915))
if (IS_DGFX(i915))
kfree(pmu->name);
err:
drm_notice(&i915->drm, "Failed to register PMU!\n");
@ -1346,7 +1354,7 @@ void i915_pmu_unregister(struct drm_i915_private *i915)
perf_pmu_unregister(&pmu->base);
pmu->base.event_init = NULL;
kfree(pmu->base.attr_groups);
if (!is_igp(i915))
if (IS_DGFX(i915))
kfree(pmu->name);
free_event_attributes(pmu);
}

View File

@ -170,7 +170,7 @@ static struct intel_gt *find_gt_for_required_teelink(struct drm_i915_private *i9
static struct intel_gt *find_gt_for_required_protected_content(struct drm_i915_private *i915)
{
if (!IS_ENABLED(CONFIG_DRM_I915_PXP) || !INTEL_INFO(i915)->has_pxp)
if (!HAS_PXP(i915))
return NULL;
/*