sched_ext: Fixes for v7.0-rc6

- Fix stale direct dispatch state in ddsp_dsq_id which can cause
   spurious warnings in mark_direct_dispatch() on task wakeup.
 
 - Fix is_bpf_migration_disabled() false negative on non-PREEMPT_RCU
   configs which can lead to incorrectly dispatching migration-disabled
   tasks to remote CPUs.
 -----BEGIN PGP SIGNATURE-----
 
 iIQEABYKACwWIQTfIjM1kS57o3GsC/uxYfJx3gVYGQUCac//0w4cdGpAa2VybmVs
 Lm9yZwAKCRCxYfJx3gVYGdqUAP9kEuxvB+pxjheSKV0j7zvDHd+ksMxjQTRoBmyu
 PE0hIgEA5gAax8ebef9MlyRVsm9Qh7v/AmovUHt75oeCnDk++Ag=
 =hD7A
 -----END PGP SIGNATURE-----

Merge tag 'sched_ext-for-7.0-rc6-fixes-2' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/sched_ext

Pull sched_ext fixes from Tejun Heo:
 "These are late but both fix subtle yet critical problems and the blast
  radius is limited strictly to sched_ext.

   - Fix stale direct dispatch state in ddsp_dsq_id which can cause
     spurious warnings in mark_direct_dispatch() on task wakeup

   - Fix is_bpf_migration_disabled() false negative on non-PREEMPT_RCU
     configs which can lead to incorrectly dispatching migration-
     disabled tasks to remote CPUs"

* tag 'sched_ext-for-7.0-rc6-fixes-2' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/sched_ext:
  sched_ext: Fix stale direct dispatch state in ddsp_dsq_id
  sched_ext: Fix is_bpf_migration_disabled() false negative on non-PREEMPT_RCU
This commit is contained in:
Linus Torvalds 2026-04-03 12:05:06 -07:00
commit 631919fb12
2 changed files with 54 additions and 26 deletions

View File

@ -1109,15 +1109,6 @@ static void dispatch_enqueue(struct scx_sched *sch, struct scx_dispatch_q *dsq,
dsq_mod_nr(dsq, 1);
p->scx.dsq = dsq;
/*
* scx.ddsp_dsq_id and scx.ddsp_enq_flags are only relevant on the
* direct dispatch path, but we clear them here because the direct
* dispatch verdict may be overridden on the enqueue path during e.g.
* bypass.
*/
p->scx.ddsp_dsq_id = SCX_DSQ_INVALID;
p->scx.ddsp_enq_flags = 0;
/*
* We're transitioning out of QUEUEING or DISPATCHING. store_release to
* match waiters' load_acquire.
@ -1283,12 +1274,34 @@ static void mark_direct_dispatch(struct scx_sched *sch,
p->scx.ddsp_enq_flags = enq_flags;
}
/*
* Clear @p direct dispatch state when leaving the scheduler.
*
* Direct dispatch state must be cleared in the following cases:
* - direct_dispatch(): cleared on the synchronous enqueue path, deferred
* dispatch keeps the state until consumed
* - process_ddsp_deferred_locals(): cleared after consuming deferred state,
* - do_enqueue_task(): cleared on enqueue fallbacks where the dispatch
* verdict is ignored (local/global/bypass)
* - dequeue_task_scx(): cleared after dispatch_dequeue(), covering deferred
* cancellation and holding_cpu races
* - scx_disable_task(): cleared for queued wakeup tasks, which are excluded by
* the scx_bypass() loop, so that stale state is not reused by a subsequent
* scheduler instance
*/
static inline void clear_direct_dispatch(struct task_struct *p)
{
p->scx.ddsp_dsq_id = SCX_DSQ_INVALID;
p->scx.ddsp_enq_flags = 0;
}
static void direct_dispatch(struct scx_sched *sch, struct task_struct *p,
u64 enq_flags)
{
struct rq *rq = task_rq(p);
struct scx_dispatch_q *dsq =
find_dsq_for_dispatch(sch, rq, p->scx.ddsp_dsq_id, p);
u64 ddsp_enq_flags;
touch_core_sched_dispatch(rq, p);
@ -1329,8 +1342,10 @@ static void direct_dispatch(struct scx_sched *sch, struct task_struct *p,
return;
}
dispatch_enqueue(sch, dsq, p,
p->scx.ddsp_enq_flags | SCX_ENQ_CLEAR_OPSS);
ddsp_enq_flags = p->scx.ddsp_enq_flags;
clear_direct_dispatch(p);
dispatch_enqueue(sch, dsq, p, ddsp_enq_flags | SCX_ENQ_CLEAR_OPSS);
}
static bool scx_rq_online(struct rq *rq)
@ -1439,6 +1454,7 @@ static void do_enqueue_task(struct rq *rq, struct task_struct *p, u64 enq_flags,
*/
touch_core_sched(rq, p);
refill_task_slice_dfl(sch, p);
clear_direct_dispatch(p);
dispatch_enqueue(sch, dsq, p, enq_flags);
}
@ -1610,6 +1626,7 @@ static bool dequeue_task_scx(struct rq *rq, struct task_struct *p, int deq_flags
sub_nr_running(rq, 1);
dispatch_dequeue(rq, p);
clear_direct_dispatch(p);
return true;
}
@ -2293,13 +2310,15 @@ static void process_ddsp_deferred_locals(struct rq *rq)
struct task_struct, scx.dsq_list.node))) {
struct scx_sched *sch = scx_root;
struct scx_dispatch_q *dsq;
u64 dsq_id = p->scx.ddsp_dsq_id;
u64 enq_flags = p->scx.ddsp_enq_flags;
list_del_init(&p->scx.dsq_list.node);
clear_direct_dispatch(p);
dsq = find_dsq_for_dispatch(sch, rq, p->scx.ddsp_dsq_id, p);
dsq = find_dsq_for_dispatch(sch, rq, dsq_id, p);
if (!WARN_ON_ONCE(dsq->id != SCX_DSQ_LOCAL))
dispatch_to_local_dsq(sch, rq, dsq, p,
p->scx.ddsp_enq_flags);
dispatch_to_local_dsq(sch, rq, dsq, p, enq_flags);
}
}
@ -3015,6 +3034,8 @@ static void scx_disable_task(struct task_struct *p)
lockdep_assert_rq_held(rq);
WARN_ON_ONCE(scx_get_task_state(p) != SCX_TASK_ENABLED);
clear_direct_dispatch(p);
if (SCX_HAS_OP(sch, disable))
SCX_CALL_OP_TASK(sch, SCX_KF_REST, disable, rq, p);
scx_set_task_state(p, SCX_TASK_READY);

View File

@ -860,25 +860,32 @@ static bool check_builtin_idle_enabled(struct scx_sched *sch)
* code.
*
* We can't simply check whether @p->migration_disabled is set in a
* sched_ext callback, because migration is always disabled for the current
* task while running BPF code.
* sched_ext callback, because the BPF prolog (__bpf_prog_enter) may disable
* migration for the current task while running BPF code.
*
* The prolog (__bpf_prog_enter) and epilog (__bpf_prog_exit) respectively
* disable and re-enable migration. For this reason, the current task
* inside a sched_ext callback is always a migration-disabled task.
* Since the BPF prolog calls migrate_disable() only when CONFIG_PREEMPT_RCU
* is enabled (via rcu_read_lock_dont_migrate()), migration_disabled == 1 for
* the current task is ambiguous only in that case: it could be from the BPF
* prolog rather than a real migrate_disable() call.
*
* Therefore, when @p->migration_disabled == 1, check whether @p is the
* current task or not: if it is, then migration was not disabled before
* entering the callback, otherwise migration was disabled.
* Without CONFIG_PREEMPT_RCU, the BPF prolog never calls migrate_disable(),
* so migration_disabled == 1 always means the task is truly
* migration-disabled.
*
* Therefore, when migration_disabled == 1 and CONFIG_PREEMPT_RCU is enabled,
* check whether @p is the current task or not: if it is, then migration was
* not disabled before entering the callback, otherwise migration was disabled.
*
* Returns true if @p is migration-disabled, false otherwise.
*/
static bool is_bpf_migration_disabled(const struct task_struct *p)
{
if (p->migration_disabled == 1)
return p != current;
else
return p->migration_disabled;
if (p->migration_disabled == 1) {
if (IS_ENABLED(CONFIG_PREEMPT_RCU))
return p != current;
return true;
}
return p->migration_disabled;
}
static s32 select_cpu_from_kfunc(struct scx_sched *sch, struct task_struct *p,