mirror of
https://github.com/torvalds/linux.git
synced 2026-05-12 16:18:45 +02:00
sched_ext: Don't disable tasks in scx_sub_enable_workfn() abort path
scx_sub_enable_workfn()'s prep loop calls __scx_init_task(sch, p, false)
without transitioning task state, then sets SCX_TASK_SUB_INIT. If prep fails
partway, the abort path runs __scx_disable_and_exit_task(sch, p) on the
marked tasks. Task state is still the parent's ENABLED, so that dispatches
to the SCX_TASK_ENABLED arm and calls scx_disable_task(sch, p) - i.e.
child->ops.disable() - for tasks on which child->ops.enable() never ran. A
BPF sub-scheduler allocating per-task state in enable/freeing in disable
would operate on uninitialized state.
The dying-task branch in scx_disable_and_exit_task() has the same problem,
and scx_enabling_sub_sched was cleared before the abort cleanup loop - a
task exiting during cleanup tripped the WARN and skipped both ops.exit_task
and the SCX_TASK_SUB_INIT clear, leaking per-task resources and leaving the
task stuck.
Introduce scx_sub_init_cancel_task() that calls ops.exit_task with
cancelled=true - matching what the top-level init path does when init_task
itself returns -errno. Use it in the abort loop and in the dying-task
branch. scx_enabling_sub_sched now stays set until the abort loop finishes
clearing SUB_INIT, so concurrent exits hitting the dying-task branch can
still find @sch. That branch also clears SCX_TASK_SUB_INIT unconditionally
when seen, leaving the task unmarked even if the WARN fires.
Fixes: 337ec00b1d ("sched_ext: Implement cgroup sub-sched enabling and disabling")
Reported-by: Chris Mason <clm@meta.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
Reviewed-by: Andrea Righi <arighi@nvidia.com>
This commit is contained in:
parent
da2d81b411
commit
21a5a97ba4
|
|
@ -3633,6 +3633,22 @@ static void __scx_disable_and_exit_task(struct scx_sched *sch,
|
|||
SCX_CALL_OP_TASK(sch, exit_task, task_rq(p), p, &args);
|
||||
}
|
||||
|
||||
/*
|
||||
* Undo a completed __scx_init_task(sch, p, false) when scx_enable_task() never
|
||||
* ran. The task state has not been transitioned, so this mirrors the
|
||||
* SCX_TASK_INIT branch in __scx_disable_and_exit_task().
|
||||
*/
|
||||
static void scx_sub_init_cancel_task(struct scx_sched *sch, struct task_struct *p)
|
||||
{
|
||||
struct scx_exit_task_args args = { .cancelled = true };
|
||||
|
||||
lockdep_assert_held(&p->pi_lock);
|
||||
lockdep_assert_rq_held(task_rq(p));
|
||||
|
||||
if (SCX_HAS_OP(sch, exit_task))
|
||||
SCX_CALL_OP_TASK(sch, exit_task, task_rq(p), p, &args);
|
||||
}
|
||||
|
||||
static void scx_disable_and_exit_task(struct scx_sched *sch,
|
||||
struct task_struct *p)
|
||||
{
|
||||
|
|
@ -3641,11 +3657,12 @@ static void scx_disable_and_exit_task(struct scx_sched *sch,
|
|||
/*
|
||||
* If set, @p exited between __scx_init_task() and scx_enable_task() in
|
||||
* scx_sub_enable() and is initialized for both the associated sched and
|
||||
* its parent. Disable and exit for the child too.
|
||||
* its parent. Exit for the child too - scx_enable_task() never ran for
|
||||
* it, so undo only init_task.
|
||||
*/
|
||||
if ((p->scx.flags & SCX_TASK_SUB_INIT) &&
|
||||
!WARN_ON_ONCE(!scx_enabling_sub_sched)) {
|
||||
__scx_disable_and_exit_task(scx_enabling_sub_sched, p);
|
||||
if (p->scx.flags & SCX_TASK_SUB_INIT) {
|
||||
if (!WARN_ON_ONCE(!scx_enabling_sub_sched))
|
||||
scx_sub_init_cancel_task(scx_enabling_sub_sched, p);
|
||||
p->scx.flags &= ~SCX_TASK_SUB_INIT;
|
||||
}
|
||||
|
||||
|
|
@ -7124,16 +7141,23 @@ static void scx_sub_enable_workfn(struct kthread_work *work)
|
|||
abort:
|
||||
put_task_struct(p);
|
||||
scx_task_iter_stop(&sti);
|
||||
scx_enabling_sub_sched = NULL;
|
||||
|
||||
/*
|
||||
* Undo __scx_init_task() for tasks we marked. scx_enable_task() never
|
||||
* ran for @sch on them, so calling scx_disable_task() here would invoke
|
||||
* ops.disable() without a matching ops.enable(). scx_enabling_sub_sched
|
||||
* must stay set until SUB_INIT is cleared from every marked task -
|
||||
* scx_disable_and_exit_task() reads it when a task exits concurrently.
|
||||
*/
|
||||
scx_task_iter_start(&sti, sch->cgrp);
|
||||
while ((p = scx_task_iter_next_locked(&sti))) {
|
||||
if (p->scx.flags & SCX_TASK_SUB_INIT) {
|
||||
__scx_disable_and_exit_task(sch, p);
|
||||
scx_sub_init_cancel_task(sch, p);
|
||||
p->scx.flags &= ~SCX_TASK_SUB_INIT;
|
||||
}
|
||||
}
|
||||
scx_task_iter_stop(&sti);
|
||||
scx_enabling_sub_sched = NULL;
|
||||
err_unlock_and_disable:
|
||||
/* we'll soon enter disable path, keep bypass on */
|
||||
scx_cgroup_unlock();
|
||||
|
|
|
|||
Loading…
Reference in New Issue
Block a user