cgroup, sched_ext: Include exiting tasks in cgroup iter

a72f73c4dd ("cgroup: Don't expose dead tasks in cgroup") made
css_task_iter_advance() skip exiting tasks so cgroup.procs stays consistent
with waitpid() visibility. Unfortunately, this broke scx_task_iter.

scx_task_iter walks either scx_tasks (global) or a cgroup subtree via
css_task_iter() and the two modes are expected to cover the same set of
tasks. After the above change the cgroup-scoped mode silently skips tasks
past exit_signals() that are still on scx_tasks.

scx_sub_enable_workfn()'s abort path is one of the symptoms: an exiting
SCX_TASK_SUB_INIT task can race past the cgroup iter leaking
__scx_init_task() state. Other iterations share the same gap.

Add CSS_TASK_ITER_WITH_DEAD to opt out of the skip and use it from
scx_task_iter().

Fixes: b0e4c2f8a0 ("sched_ext: Implement cgroup subtree iteration for scx_task_iter")
Reported-by: Cheng-Yang Chou <yphbchou0911@gmail.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
This commit is contained in:
Tejun Heo 2026-04-27 14:16:34 -10:00
parent d99f7a32f0
commit 60f21a2649
3 changed files with 10 additions and 5 deletions

View File

@ -53,6 +53,7 @@ struct kernel_clone_args;
enum css_task_iter_flags {
CSS_TASK_ITER_PROCS = (1U << 0), /* walk only threadgroup leaders */
CSS_TASK_ITER_THREADED = (1U << 1), /* walk all threaded css_sets in the domain */
CSS_TASK_ITER_WITH_DEAD = (1U << 2), /* include exiting tasks */
CSS_TASK_ITER_SKIPPED = (1U << 16), /* internal flags */
};

View File

@ -5059,10 +5059,12 @@ static void css_task_iter_advance(struct css_task_iter *it)
task = list_entry(it->task_pos, struct task_struct, cg_list);
/*
* Hide tasks that are exiting but not yet removed. Keep zombie
* leaders with live threads visible.
* Hide tasks that are exiting but not yet removed by default. Keep
* zombie leaders with live threads visible. Usages that need to walk
* every existing task can opt out via CSS_TASK_ITER_WITH_DEAD.
*/
if ((task->flags & PF_EXITING) && !atomic_read(&task->signal->live))
if (!(it->flags & CSS_TASK_ITER_WITH_DEAD) &&
(task->flags & PF_EXITING) && !atomic_read(&task->signal->live))
goto repeat;
if (it->flags & CSS_TASK_ITER_PROCS) {

View File

@ -766,7 +766,8 @@ static void scx_task_iter_start(struct scx_task_iter *iter, struct cgroup *cgrp)
lockdep_assert_held(&cgroup_mutex);
iter->cgrp = cgrp;
iter->css_pos = css_next_descendant_pre(NULL, &iter->cgrp->self);
css_task_iter_start(iter->css_pos, 0, &iter->css_iter);
css_task_iter_start(iter->css_pos, CSS_TASK_ITER_WITH_DEAD,
&iter->css_iter);
return;
}
#endif
@ -866,7 +867,8 @@ static struct task_struct *scx_task_iter_next(struct scx_task_iter *iter)
iter->css_pos = css_next_descendant_pre(iter->css_pos,
&iter->cgrp->self);
if (iter->css_pos)
css_task_iter_start(iter->css_pos, 0, &iter->css_iter);
css_task_iter_start(iter->css_pos, CSS_TASK_ITER_WITH_DEAD,
&iter->css_iter);
}
return NULL;
}