diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c index 8d48a1385835..1ff4cf93413b 100644 --- a/kernel/sched/ext.c +++ b/kernel/sched/ext.c @@ -514,14 +514,31 @@ struct scx_task_iter { struct rq_flags rf; u32 cnt; bool list_locked; +#ifdef CONFIG_CGROUPS + struct cgroup *cgrp; + struct cgroup_subsys_state *css_pos; + struct css_task_iter css_iter; +#endif }; /** * scx_task_iter_start - Lock scx_tasks_lock and start a task iteration * @iter: iterator to init + * @cgrp: Optional root of cgroup subhierarchy to iterate * - * Initialize @iter and return with scx_tasks_lock held. Once initialized, @iter - * must eventually be stopped with scx_task_iter_stop(). + * Initialize @iter. Once initialized, @iter must eventually be stopped with + * scx_task_iter_stop(). + * + * If @cgrp is %NULL, scx_tasks is used for iteration and this function returns + * with scx_tasks_lock held and @iter->cursor inserted into scx_tasks. + * + * If @cgrp is not %NULL, @cgrp and its descendants' tasks are walked using + * @iter->css_iter. The caller must be holding cgroup_lock() to prevent cgroup + * task migrations. + * + * The two modes of iterations are largely independent and it's likely that + * scx_tasks can be removed in favor of always using cgroup iteration if + * CONFIG_SCHED_CLASS_EXT depends on CONFIG_CGROUPS. * * scx_tasks_lock and the rq lock may be released using scx_task_iter_unlock() * between this and the first next() call or between any two next() calls. If @@ -532,10 +549,19 @@ struct scx_task_iter { * All tasks which existed when the iteration started are guaranteed to be * visited as long as they are not dead. */ -static void scx_task_iter_start(struct scx_task_iter *iter) +static void scx_task_iter_start(struct scx_task_iter *iter, struct cgroup *cgrp) { memset(iter, 0, sizeof(*iter)); +#ifdef CONFIG_CGROUPS + if (cgrp) { + lockdep_assert_held(&cgroup_mutex); + iter->cgrp = cgrp; + iter->css_pos = css_next_descendant_pre(NULL, &iter->cgrp->self); + css_task_iter_start(iter->css_pos, 0, &iter->css_iter); + return; + } +#endif raw_spin_lock_irq(&scx_tasks_lock); iter->cursor = (struct sched_ext_entity){ .flags = SCX_TASK_CURSOR }; @@ -588,6 +614,14 @@ static void __scx_task_iter_maybe_relock(struct scx_task_iter *iter) */ static void scx_task_iter_stop(struct scx_task_iter *iter) { +#ifdef CONFIG_CGROUPS + if (iter->cgrp) { + if (iter->css_pos) + css_task_iter_end(&iter->css_iter); + __scx_task_iter_rq_unlock(iter); + return; + } +#endif __scx_task_iter_maybe_relock(iter); list_del_init(&iter->cursor.tasks_node); scx_task_iter_unlock(iter); @@ -611,6 +645,24 @@ static struct task_struct *scx_task_iter_next(struct scx_task_iter *iter) cond_resched(); } +#ifdef CONFIG_CGROUPS + if (iter->cgrp) { + while (iter->css_pos) { + struct task_struct *p; + + p = css_task_iter_next(&iter->css_iter); + if (p) + return p; + + css_task_iter_end(&iter->css_iter); + iter->css_pos = css_next_descendant_pre(iter->css_pos, + &iter->cgrp->self); + if (iter->css_pos) + css_task_iter_start(iter->css_pos, 0, &iter->css_iter); + } + return NULL; + } +#endif __scx_task_iter_maybe_relock(iter); list_for_each_entry(pos, cursor, tasks_node) { @@ -4440,7 +4492,7 @@ static void scx_disable_workfn(struct kthread_work *work) scx_init_task_enabled = false; - scx_task_iter_start(&sti); + scx_task_iter_start(&sti, NULL); while ((p = scx_task_iter_next_locked(&sti))) { unsigned int queue_flags = DEQUEUE_SAVE | DEQUEUE_MOVE | DEQUEUE_NOCLOCK; const struct sched_class *old_class = p->sched_class; @@ -5230,7 +5282,7 @@ static void scx_enable_workfn(struct kthread_work *work) if (ret) goto err_disable_unlock_all; - scx_task_iter_start(&sti); + scx_task_iter_start(&sti, NULL); while ((p = scx_task_iter_next_locked(&sti))) { /* * @p may already be dead, have lost all its usages counts and @@ -5272,7 +5324,7 @@ static void scx_enable_workfn(struct kthread_work *work) * scx_tasks_lock. */ percpu_down_write(&scx_fork_rwsem); - scx_task_iter_start(&sti); + scx_task_iter_start(&sti, NULL); while ((p = scx_task_iter_next_locked(&sti))) { unsigned int queue_flags = DEQUEUE_SAVE | DEQUEUE_MOVE; const struct sched_class *old_class = p->sched_class;