From a33c9699e73456d08182ad7b87a4af52ac24f779 Mon Sep 17 00:00:00 2001 From: Daniele Ceraolo Spurio Date: Tue, 25 Feb 2025 15:53:28 -0800 Subject: [PATCH] drm/xe/pxp: Don't kill queues while holding PXP locks xe_exec_queue_kill can sleep, so we can't call it from under a spinlock. We can instead move the queues to a separate list and then kill them all after we release the spinlock. Furthermore, xe_exec_queue_kill can take the VM lock so we can't call it while holding the PXP mutex because the mutex is taken under VM lock at queue creation time. Note that while it is safe to call the kill without holding the mutex, we must call it after the PXP state has been updated, otherwise an app might be able to create a queue between the invalidation and the state update, which would break the state machine. Since being in the list is used to track whether RPM cleanup is needed, we can no longer defer that to queue_destroy, so we perform it immediately instead. v2: also avoid calling kill() under pxp->mutex. Reported-by: Dan Carpenter Closes: https://lore.kernel.org/intel-xe/34aaced9-4a9d-4e8c-900a-b8f73452e35c@stanley.mountain/ Fixes: f8caa80154c4 ("drm/xe/pxp: Add PXP queue tracking and session start") Signed-off-by: Daniele Ceraolo Spurio Cc: John Harrison Cc: Matthew Brost Reviewed-by: Matthew Brost Link: https://patchwork.freedesktop.org/patch/msgid/20250225235328.2895877-1-daniele.ceraolospurio@intel.com --- drivers/gpu/drm/xe/xe_pxp.c | 90 ++++++++++++++++++++----------------- 1 file changed, 50 insertions(+), 40 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_pxp.c b/drivers/gpu/drm/xe/xe_pxp.c index 3cd3f83e86b0..47499ca02693 100644 --- a/drivers/gpu/drm/xe/xe_pxp.c +++ b/drivers/gpu/drm/xe/xe_pxp.c @@ -132,14 +132,6 @@ static int pxp_wait_for_session_state(struct xe_pxp *pxp, u32 id, bool in_play) static void pxp_invalidate_queues(struct xe_pxp *pxp); -static void pxp_invalidate_state(struct xe_pxp *pxp) -{ - pxp_invalidate_queues(pxp); - - if (pxp->status == XE_PXP_ACTIVE) - pxp->key_instance++; -} - static int pxp_terminate_hw(struct xe_pxp *pxp) { struct xe_gt *gt = pxp->gt; @@ -193,7 +185,8 @@ static void pxp_terminate(struct xe_pxp *pxp) mutex_lock(&pxp->mutex); - pxp_invalidate_state(pxp); + if (pxp->status == XE_PXP_ACTIVE) + pxp->key_instance++; /* * we'll mark the status as needing termination on resume, so no need to @@ -220,6 +213,8 @@ static void pxp_terminate(struct xe_pxp *pxp) mutex_unlock(&pxp->mutex); + pxp_invalidate_queues(pxp); + ret = pxp_terminate_hw(pxp); if (ret) { drm_err(&xe->drm, "PXP termination failed: %pe\n", ERR_PTR(ret)); @@ -665,6 +660,30 @@ int xe_pxp_exec_queue_add(struct xe_pxp *pxp, struct xe_exec_queue *q) return ret; } +static void __pxp_exec_queue_remove(struct xe_pxp *pxp, struct xe_exec_queue *q, bool lock) +{ + bool need_pm_put = false; + + if (!xe_pxp_is_enabled(pxp)) + return; + + if (lock) + spin_lock_irq(&pxp->queues.lock); + + if (!list_empty(&q->pxp.link)) { + list_del_init(&q->pxp.link); + need_pm_put = true; + } + + q->pxp.type = DRM_XE_PXP_TYPE_NONE; + + if (lock) + spin_unlock_irq(&pxp->queues.lock); + + if (need_pm_put) + xe_pm_runtime_put(pxp->xe); +} + /** * xe_pxp_exec_queue_remove - remove a queue from the PXP list * @pxp: the xe->pxp pointer (it will be NULL if PXP is disabled) @@ -676,50 +695,36 @@ int xe_pxp_exec_queue_add(struct xe_pxp *pxp, struct xe_exec_queue *q) */ void xe_pxp_exec_queue_remove(struct xe_pxp *pxp, struct xe_exec_queue *q) { - bool need_pm_put = false; - - if (!xe_pxp_is_enabled(pxp)) - return; - - spin_lock_irq(&pxp->queues.lock); - - if (!list_empty(&q->pxp.link)) { - list_del_init(&q->pxp.link); - need_pm_put = true; - } - - q->pxp.type = DRM_XE_PXP_TYPE_NONE; - - spin_unlock_irq(&pxp->queues.lock); - - if (need_pm_put) - xe_pm_runtime_put(pxp->xe); + __pxp_exec_queue_remove(pxp, q, true); } static void pxp_invalidate_queues(struct xe_pxp *pxp) { struct xe_exec_queue *tmp, *q; + LIST_HEAD(to_clean); spin_lock_irq(&pxp->queues.lock); - /* - * Removing a queue from the PXP list requires a put of the RPM ref that - * the queue holds to keep the PXP session alive, which can't be done - * under spinlock. Since it is safe to kill a queue multiple times, we - * can leave the invalid queue in the list for now and postpone the - * removal and associated RPM put to when the queue is destroyed. - */ - list_for_each_entry(tmp, &pxp->queues.list, pxp.link) { - q = xe_exec_queue_get_unless_zero(tmp); - + list_for_each_entry_safe(q, tmp, &pxp->queues.list, pxp.link) { + q = xe_exec_queue_get_unless_zero(q); if (!q) continue; + list_move_tail(&q->pxp.link, &to_clean); + } + spin_unlock_irq(&pxp->queues.lock); + + list_for_each_entry_safe(q, tmp, &to_clean, pxp.link) { xe_exec_queue_kill(q); + + /* + * We hold a ref to the queue so there is no risk of racing with + * the calls to exec_queue_remove coming from exec_queue_destroy. + */ + __pxp_exec_queue_remove(pxp, q, false); + xe_exec_queue_put(q); } - - spin_unlock_irq(&pxp->queues.lock); } /** @@ -816,6 +821,7 @@ int xe_pxp_obj_key_check(struct xe_pxp *pxp, struct drm_gem_object *obj) */ int xe_pxp_pm_suspend(struct xe_pxp *pxp) { + bool needs_queue_inval = false; int ret = 0; if (!xe_pxp_is_enabled(pxp)) @@ -848,7 +854,8 @@ int xe_pxp_pm_suspend(struct xe_pxp *pxp) break; fallthrough; case XE_PXP_ACTIVE: - pxp_invalidate_state(pxp); + pxp->key_instance++; + needs_queue_inval = true; break; default: drm_err(&pxp->xe->drm, "unexpected state during PXP suspend: %u", @@ -865,6 +872,9 @@ int xe_pxp_pm_suspend(struct xe_pxp *pxp) mutex_unlock(&pxp->mutex); + if (needs_queue_inval) + pxp_invalidate_queues(pxp); + /* * if there is a termination in progress, wait for it. * We need to wait outside the lock because the completion is done from