mirror of
https://github.com/torvalds/linux.git
synced 2026-05-12 16:18:45 +02:00
sched_ext: Fix stale direct dispatch state in ddsp_dsq_id
@p->scx.ddsp_dsq_id can be left set (non-SCX_DSQ_INVALID) triggering a
spurious warning in mark_direct_dispatch() when the next wakeup's
ops.select_cpu() calls scx_bpf_dsq_insert(), such as:
WARNING: kernel/sched/ext.c:1273 at scx_dsq_insert_commit+0xcd/0x140
The root cause is that ddsp_dsq_id was only cleared in dispatch_enqueue(),
which is not reached in all paths that consume or cancel a direct dispatch
verdict.
Fix it by clearing it at the right places:
- direct_dispatch(): cache the direct dispatch state in local variables
and clear it before dispatch_enqueue() on the synchronous path. For
the deferred path, the direct dispatch state must remain set until
process_ddsp_deferred_locals() consumes them.
- process_ddsp_deferred_locals(): cache the dispatch state in local
variables and clear it before calling dispatch_to_local_dsq(), which
may migrate the task to another rq.
- do_enqueue_task(): clear the dispatch state on the enqueue path
(local/global/bypass fallbacks), where the direct dispatch verdict is
ignored.
- dequeue_task_scx(): clear the dispatch state after dispatch_dequeue()
to handle both the deferred dispatch cancellation and the holding_cpu
race, covering all cases where a pending direct dispatch is
cancelled.
- scx_disable_task(): clear the direct dispatch state when
transitioning a task out of the current scheduler. Waking tasks may
have had the direct dispatch state set by the outgoing scheduler's
ops.select_cpu() and then been queued on a wake_list via
ttwu_queue_wakelist(), when SCX_OPS_ALLOW_QUEUED_WAKEUP is set. Such
tasks are not on the runqueue and are not iterated by scx_bypass(),
so their direct dispatch state won't be cleared. Without this clear,
any subsequent SCX scheduler that tries to direct dispatch the task
will trigger the WARN_ON_ONCE() in mark_direct_dispatch().
Fixes: 5b26f7b920 ("sched_ext: Allow SCX_DSQ_LOCAL_ON for direct dispatches")
Cc: stable@vger.kernel.org # v6.12+
Cc: Daniel Hodges <hodgesd@meta.com>
Cc: Patrick Somaru <patsomaru@meta.com>
Signed-off-by: Andrea Righi <arighi@nvidia.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
This commit is contained in:
parent
0c4a59df37
commit
7e0ffb72de
|
|
@ -1109,15 +1109,6 @@ static void dispatch_enqueue(struct scx_sched *sch, struct scx_dispatch_q *dsq,
|
|||
dsq_mod_nr(dsq, 1);
|
||||
p->scx.dsq = dsq;
|
||||
|
||||
/*
|
||||
* scx.ddsp_dsq_id and scx.ddsp_enq_flags are only relevant on the
|
||||
* direct dispatch path, but we clear them here because the direct
|
||||
* dispatch verdict may be overridden on the enqueue path during e.g.
|
||||
* bypass.
|
||||
*/
|
||||
p->scx.ddsp_dsq_id = SCX_DSQ_INVALID;
|
||||
p->scx.ddsp_enq_flags = 0;
|
||||
|
||||
/*
|
||||
* We're transitioning out of QUEUEING or DISPATCHING. store_release to
|
||||
* match waiters' load_acquire.
|
||||
|
|
@ -1283,12 +1274,34 @@ static void mark_direct_dispatch(struct scx_sched *sch,
|
|||
p->scx.ddsp_enq_flags = enq_flags;
|
||||
}
|
||||
|
||||
/*
|
||||
* Clear @p direct dispatch state when leaving the scheduler.
|
||||
*
|
||||
* Direct dispatch state must be cleared in the following cases:
|
||||
* - direct_dispatch(): cleared on the synchronous enqueue path, deferred
|
||||
* dispatch keeps the state until consumed
|
||||
* - process_ddsp_deferred_locals(): cleared after consuming deferred state,
|
||||
* - do_enqueue_task(): cleared on enqueue fallbacks where the dispatch
|
||||
* verdict is ignored (local/global/bypass)
|
||||
* - dequeue_task_scx(): cleared after dispatch_dequeue(), covering deferred
|
||||
* cancellation and holding_cpu races
|
||||
* - scx_disable_task(): cleared for queued wakeup tasks, which are excluded by
|
||||
* the scx_bypass() loop, so that stale state is not reused by a subsequent
|
||||
* scheduler instance
|
||||
*/
|
||||
static inline void clear_direct_dispatch(struct task_struct *p)
|
||||
{
|
||||
p->scx.ddsp_dsq_id = SCX_DSQ_INVALID;
|
||||
p->scx.ddsp_enq_flags = 0;
|
||||
}
|
||||
|
||||
static void direct_dispatch(struct scx_sched *sch, struct task_struct *p,
|
||||
u64 enq_flags)
|
||||
{
|
||||
struct rq *rq = task_rq(p);
|
||||
struct scx_dispatch_q *dsq =
|
||||
find_dsq_for_dispatch(sch, rq, p->scx.ddsp_dsq_id, p);
|
||||
u64 ddsp_enq_flags;
|
||||
|
||||
touch_core_sched_dispatch(rq, p);
|
||||
|
||||
|
|
@ -1329,8 +1342,10 @@ static void direct_dispatch(struct scx_sched *sch, struct task_struct *p,
|
|||
return;
|
||||
}
|
||||
|
||||
dispatch_enqueue(sch, dsq, p,
|
||||
p->scx.ddsp_enq_flags | SCX_ENQ_CLEAR_OPSS);
|
||||
ddsp_enq_flags = p->scx.ddsp_enq_flags;
|
||||
clear_direct_dispatch(p);
|
||||
|
||||
dispatch_enqueue(sch, dsq, p, ddsp_enq_flags | SCX_ENQ_CLEAR_OPSS);
|
||||
}
|
||||
|
||||
static bool scx_rq_online(struct rq *rq)
|
||||
|
|
@ -1439,6 +1454,7 @@ static void do_enqueue_task(struct rq *rq, struct task_struct *p, u64 enq_flags,
|
|||
*/
|
||||
touch_core_sched(rq, p);
|
||||
refill_task_slice_dfl(sch, p);
|
||||
clear_direct_dispatch(p);
|
||||
dispatch_enqueue(sch, dsq, p, enq_flags);
|
||||
}
|
||||
|
||||
|
|
@ -1610,6 +1626,7 @@ static bool dequeue_task_scx(struct rq *rq, struct task_struct *p, int deq_flags
|
|||
sub_nr_running(rq, 1);
|
||||
|
||||
dispatch_dequeue(rq, p);
|
||||
clear_direct_dispatch(p);
|
||||
return true;
|
||||
}
|
||||
|
||||
|
|
@ -2293,13 +2310,15 @@ static void process_ddsp_deferred_locals(struct rq *rq)
|
|||
struct task_struct, scx.dsq_list.node))) {
|
||||
struct scx_sched *sch = scx_root;
|
||||
struct scx_dispatch_q *dsq;
|
||||
u64 dsq_id = p->scx.ddsp_dsq_id;
|
||||
u64 enq_flags = p->scx.ddsp_enq_flags;
|
||||
|
||||
list_del_init(&p->scx.dsq_list.node);
|
||||
clear_direct_dispatch(p);
|
||||
|
||||
dsq = find_dsq_for_dispatch(sch, rq, p->scx.ddsp_dsq_id, p);
|
||||
dsq = find_dsq_for_dispatch(sch, rq, dsq_id, p);
|
||||
if (!WARN_ON_ONCE(dsq->id != SCX_DSQ_LOCAL))
|
||||
dispatch_to_local_dsq(sch, rq, dsq, p,
|
||||
p->scx.ddsp_enq_flags);
|
||||
dispatch_to_local_dsq(sch, rq, dsq, p, enq_flags);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -3015,6 +3034,8 @@ static void scx_disable_task(struct task_struct *p)
|
|||
lockdep_assert_rq_held(rq);
|
||||
WARN_ON_ONCE(scx_get_task_state(p) != SCX_TASK_ENABLED);
|
||||
|
||||
clear_direct_dispatch(p);
|
||||
|
||||
if (SCX_HAS_OP(sch, disable))
|
||||
SCX_CALL_OP_TASK(sch, SCX_KF_REST, disable, rq, p);
|
||||
scx_set_task_state(p, SCX_TASK_READY);
|
||||
|
|
|
|||
Loading…
Reference in New Issue
Block a user