mirror of
https://github.com/torvalds/linux.git
synced 2026-05-25 23:52:08 +02:00
sched_ext: Fixes for v7.1-rc4
- Spurious WARN in ops_dequeue() racing with concurrent dispatch. - Self-deadlock between scheduler disable and a concurrent sub-sched enable. -----BEGIN PGP SIGNATURE----- iIQEABYKACwWIQTfIjM1kS57o3GsC/uxYfJx3gVYGQUCahCHGQ4cdGpAa2VybmVs Lm9yZwAKCRCxYfJx3gVYGdKOAP9C6xYbZSXuPJSugQg7Ogq7GTcMf0EtK7CGVb9x 0pVJigEA1E17Vqf1WTWTp2DOsoPV1adS51wcoTGvklJc0eFRrw0= =i40E -----END PGP SIGNATURE----- Merge tag 'sched_ext-for-7.1-rc4-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/sched_ext Pull sched_ext fixes from Tejun Heo: - Spurious WARN in ops_dequeue() racing with concurrent dispatch - Self-deadlock between scheduler disable and a concurrent sub-sched enable * tag 'sched_ext-for-7.1-rc4-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/sched_ext: sched_ext: Fix spurious WARN on stale ops_state in ops_dequeue() sched_ext: Fix deadlock between scx_root_disable() and concurrent forks
This commit is contained in:
commit
79bd2dded1
|
|
@ -2078,6 +2078,7 @@ static void ops_dequeue(struct rq *rq, struct task_struct *p, u64 deq_flags)
|
|||
/* dequeue is always temporary, don't reset runnable_at */
|
||||
clr_task_runnable(p, false);
|
||||
|
||||
retry:
|
||||
/* acquire ensures that we see the preceding updates on QUEUED */
|
||||
opss = atomic_long_read_acquire(&p->scx.ops_state);
|
||||
|
||||
|
|
@ -2091,8 +2092,20 @@ static void ops_dequeue(struct rq *rq, struct task_struct *p, u64 deq_flags)
|
|||
*/
|
||||
BUG();
|
||||
case SCX_OPSS_QUEUED:
|
||||
/* A queued task must always be in BPF scheduler's custody */
|
||||
WARN_ON_ONCE(!(p->scx.flags & SCX_TASK_IN_CUSTODY));
|
||||
/*
|
||||
* A queued task must always be in BPF scheduler's custody. If
|
||||
* SCX_TASK_IN_CUSTODY is clear, finish_dispatch() on another
|
||||
* CPU has already passed call_task_dequeue() (which clears the
|
||||
* flag), but has not yet written SCX_OPSS_NONE. That final
|
||||
* store does not require this rq's lock, so retrying with
|
||||
* cpu_relax() is bounded: we will observe NONE (or DISPATCHING,
|
||||
* handled by the fallthrough) on a subsequent iteration.
|
||||
*/
|
||||
if (unlikely(!(READ_ONCE(p->scx.flags) & SCX_TASK_IN_CUSTODY))) {
|
||||
cpu_relax();
|
||||
goto retry;
|
||||
}
|
||||
|
||||
if (atomic_long_try_cmpxchg(&p->scx.ops_state, &opss,
|
||||
SCX_OPSS_NONE))
|
||||
break;
|
||||
|
|
@ -4946,10 +4959,30 @@ static const struct kset_uevent_ops scx_uevent_ops = {
|
|||
*/
|
||||
bool task_should_scx(int policy)
|
||||
{
|
||||
if (!scx_enabled() || unlikely(scx_enable_state() == SCX_DISABLING))
|
||||
/* if disabled, nothing should be on it */
|
||||
if (!scx_enabled())
|
||||
return false;
|
||||
|
||||
/* scx is taking over all SCHED_OTHER and SCHED_EXT tasks */
|
||||
if (READ_ONCE(scx_switching_all))
|
||||
return true;
|
||||
|
||||
/*
|
||||
* scx is tearing down - keep new SCHED_EXT tasks out.
|
||||
*
|
||||
* Must come after scx_switching_all test, which serves as a proxy
|
||||
* for __scx_switched_all. While __scx_switched_all is set, we must
|
||||
* return true via the branch above: a fork routed to fair would
|
||||
* stall because next_active_class() skips fair.
|
||||
*
|
||||
* This can develop into a deadlock - scx holds scx_enable_mutex across
|
||||
* kthread_create() in scx_alloc_and_add_sched(); if the new kthread is
|
||||
* the stalled task, the disable path can never grab the mutex to clear
|
||||
* scx_switching_all.
|
||||
*/
|
||||
if (unlikely(scx_enable_state() == SCX_DISABLING))
|
||||
return false;
|
||||
|
||||
return policy == SCHED_EXT;
|
||||
}
|
||||
|
||||
|
|
|
|||
Loading…
Reference in New Issue
Block a user