From 0760d62dad5d3e76c2aa175d9bc42b5f664967c2 Mon Sep 17 00:00:00 2001 From: Devaansh Kumar Date: Tue, 11 Feb 2025 22:48:48 +0530 Subject: [PATCH 1/5] sched_ext: selftests: Fix grammar in tests description Fixed grammar for a few tests of sched_ext. Signed-off-by: Devaansh Kumar Signed-off-by: Tejun Heo --- tools/testing/selftests/sched_ext/init_enable_count.c | 2 +- tools/testing/selftests/sched_ext/maybe_null.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/sched_ext/init_enable_count.c b/tools/testing/selftests/sched_ext/init_enable_count.c index 0f3eddc7a17a..eddf9e0e26e7 100644 --- a/tools/testing/selftests/sched_ext/init_enable_count.c +++ b/tools/testing/selftests/sched_ext/init_enable_count.c @@ -150,7 +150,7 @@ static enum scx_test_status run(void *ctx) struct scx_test init_enable_count = { .name = "init_enable_count", - .description = "Verify we do the correct amount of counting of init, " + .description = "Verify we correctly count the occurrences of init, " "enable, etc callbacks.", .run = run, }; diff --git a/tools/testing/selftests/sched_ext/maybe_null.c b/tools/testing/selftests/sched_ext/maybe_null.c index 31cfafb0cf65..aacf0c58ca4f 100644 --- a/tools/testing/selftests/sched_ext/maybe_null.c +++ b/tools/testing/selftests/sched_ext/maybe_null.c @@ -43,7 +43,7 @@ static enum scx_test_status run(void *ctx) struct scx_test maybe_null = { .name = "maybe_null", - .description = "Verify if PTR_MAYBE_NULL work for .dispatch", + .description = "Verify if PTR_MAYBE_NULL works for .dispatch", .run = run, }; REGISTER_SCX_TEST(&maybe_null) From 2e2006c91c842c551521434466f9b4324719c9a7 Mon Sep 17 00:00:00 2001 From: Chuyi Zhou Date: Wed, 12 Feb 2025 15:19:36 +0800 Subject: [PATCH 2/5] sched_ext: Fix the incorrect bpf_list kfunc API in common.bpf.h. Now BPF only supports bpf_list_push_{front,back}_impl kfunc, not bpf_list_ push_{front,back}. This patch fix this issue. Without this patch, if we use bpf_list kfunc in scx, the BPF verifier would complain: libbpf: extern (func ksym) 'bpf_list_push_back': not found in kernel or module BTFs libbpf: failed to load object 'scx_foo' libbpf: failed to load BPF skeleton 'scx_foo': -EINVAL With this patch, the bpf list kfunc will work as expected. Signed-off-by: Chuyi Zhou Fixes: 2a52ca7c98960 ("sched_ext: Add scx_simple and scx_example_qmap example schedulers") Signed-off-by: Tejun Heo --- tools/sched_ext/include/scx/common.bpf.h | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/tools/sched_ext/include/scx/common.bpf.h b/tools/sched_ext/include/scx/common.bpf.h index d72b60a0c582..7849405614b1 100644 --- a/tools/sched_ext/include/scx/common.bpf.h +++ b/tools/sched_ext/include/scx/common.bpf.h @@ -270,8 +270,16 @@ void bpf_obj_drop_impl(void *kptr, void *meta) __ksym; #define bpf_obj_new(type) ((type *)bpf_obj_new_impl(bpf_core_type_id_local(type), NULL)) #define bpf_obj_drop(kptr) bpf_obj_drop_impl(kptr, NULL) -void bpf_list_push_front(struct bpf_list_head *head, struct bpf_list_node *node) __ksym; -void bpf_list_push_back(struct bpf_list_head *head, struct bpf_list_node *node) __ksym; +int bpf_list_push_front_impl(struct bpf_list_head *head, + struct bpf_list_node *node, + void *meta, __u64 off) __ksym; +#define bpf_list_push_front(head, node) bpf_list_push_front_impl(head, node, NULL, 0) + +int bpf_list_push_back_impl(struct bpf_list_head *head, + struct bpf_list_node *node, + void *meta, __u64 off) __ksym; +#define bpf_list_push_back(head, node) bpf_list_push_back_impl(head, node, NULL, 0) + struct bpf_list_node *bpf_list_pop_front(struct bpf_list_head *head) __ksym; struct bpf_list_node *bpf_list_pop_back(struct bpf_list_head *head) __ksym; struct bpf_rb_node *bpf_rbtree_remove(struct bpf_rb_root *root, From f5717c93a1b999970f3a64d771a1a9ee68cc37d0 Mon Sep 17 00:00:00 2001 From: Chuyi Zhou Date: Wed, 12 Feb 2025 21:09:35 +0800 Subject: [PATCH 3/5] sched_ext: Use SCX_CALL_OP_TASK in task_tick_scx Now when we use scx_bpf_task_cgroup() in ops.tick() to get the cgroup of the current task, the following error will occur: scx_foo[3795244] triggered exit kind 1024: runtime error (called on a task not being operated on) The reason is that we are using SCX_CALL_OP() instead of SCX_CALL_OP_TASK() when calling ops.tick(), which triggers the error during the subsequent scx_kf_allowed_on_arg_tasks() check. SCX_CALL_OP_TASK() was first introduced in commit 36454023f50b ("sched_ext: Track tasks that are subjects of the in-flight SCX operation") to ensure task's rq lock is held when accessing task's sched_group. Since ops.tick() is marked as SCX_KF_TERMINAL and task_tick_scx() is protected by the rq lock, we can use SCX_CALL_OP_TASK() to avoid the above issue. Similarly, the same changes should be made for ops.disable() and ops.exit_task(), as they are also protected by task_rq_lock() and it's safe to access the task's task_group. Fixes: 36454023f50b ("sched_ext: Track tasks that are subjects of the in-flight SCX operation") Signed-off-by: Chuyi Zhou Signed-off-by: Tejun Heo --- kernel/sched/ext.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c index 54edd0e2132a..5a81d9a1e31f 100644 --- a/kernel/sched/ext.c +++ b/kernel/sched/ext.c @@ -3899,7 +3899,7 @@ static void task_tick_scx(struct rq *rq, struct task_struct *curr, int queued) curr->scx.slice = 0; touch_core_sched(rq, curr); } else if (SCX_HAS_OP(tick)) { - SCX_CALL_OP(SCX_KF_REST, tick, curr); + SCX_CALL_OP_TASK(SCX_KF_REST, tick, curr); } if (!curr->scx.slice) @@ -4046,7 +4046,7 @@ static void scx_ops_disable_task(struct task_struct *p) WARN_ON_ONCE(scx_get_task_state(p) != SCX_TASK_ENABLED); if (SCX_HAS_OP(disable)) - SCX_CALL_OP(SCX_KF_REST, disable, p); + SCX_CALL_OP_TASK(SCX_KF_REST, disable, p); scx_set_task_state(p, SCX_TASK_READY); } @@ -4075,7 +4075,7 @@ static void scx_ops_exit_task(struct task_struct *p) } if (SCX_HAS_OP(exit_task)) - SCX_CALL_OP(SCX_KF_REST, exit_task, p, &args); + SCX_CALL_OP_TASK(SCX_KF_REST, exit_task, p, &args); scx_set_task_state(p, SCX_TASK_NONE); } From 8fef0a3b17bb258130a4fcbcb5addf94b25e9ec5 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 25 Feb 2025 06:02:23 -1000 Subject: [PATCH 4/5] sched_ext: Fix pick_task_scx() picking non-queued tasks when it's called without balance() a6250aa251ea ("sched_ext: Handle cases where pick_task_scx() is called without preceding balance_scx()") added a workaround to handle the cases where pick_task_scx() is called without prececing balance_scx() which is due to a fair class bug where pick_taks_fair() may return NULL after a true return from balance_fair(). The workaround detects when pick_task_scx() is called without preceding balance_scx() and emulates SCX_RQ_BAL_KEEP and triggers kicking to avoid stalling. Unfortunately, the workaround code was testing whether @prev was on SCX to decide whether to keep the task running. This is incorrect as the task may be on SCX but no longer runnable. This could lead to a non-runnable task to be returned from pick_task_scx() which cause interesting confusions and failures. e.g. A common failure mode is the task ending up with (!on_rq && on_cpu) state which can cause potential wakers to busy loop, which can easily lead to deadlocks. Fix it by testing whether @prev has SCX_TASK_QUEUED set. This makes @prev_on_scx only used in one place. Open code the usage and improve the comment while at it. Signed-off-by: Tejun Heo Reported-by: Pat Cody Fixes: a6250aa251ea ("sched_ext: Handle cases where pick_task_scx() is called without preceding balance_scx()") Cc: stable@vger.kernel.org # v6.12+ Acked-by: Andrea Righi --- kernel/sched/ext.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c index 5a81d9a1e31f..0f1da199cfc7 100644 --- a/kernel/sched/ext.c +++ b/kernel/sched/ext.c @@ -3117,7 +3117,6 @@ static struct task_struct *pick_task_scx(struct rq *rq) { struct task_struct *prev = rq->curr; struct task_struct *p; - bool prev_on_scx = prev->sched_class == &ext_sched_class; bool keep_prev = rq->scx.flags & SCX_RQ_BAL_KEEP; bool kick_idle = false; @@ -3137,14 +3136,18 @@ static struct task_struct *pick_task_scx(struct rq *rq) * if pick_task_scx() is called without preceding balance_scx(). */ if (unlikely(rq->scx.flags & SCX_RQ_BAL_PENDING)) { - if (prev_on_scx) { + if (prev->scx.flags & SCX_TASK_QUEUED) { keep_prev = true; } else { keep_prev = false; kick_idle = true; } - } else if (unlikely(keep_prev && !prev_on_scx)) { - /* only allowed during transitions */ + } else if (unlikely(keep_prev && + prev->sched_class != &ext_sched_class)) { + /* + * Can happen while enabling as SCX_RQ_BAL_PENDING assertion is + * conditional on scx_enabled() and may have been skipped. + */ WARN_ON_ONCE(scx_ops_enable_state() == SCX_OPS_ENABLED); keep_prev = false; } From 9360dfe4cbd62ff1eb8217b815964931523b75b3 Mon Sep 17 00:00:00 2001 From: Andrea Righi Date: Mon, 3 Mar 2025 18:51:59 +0100 Subject: [PATCH 5/5] sched_ext: Validate prev_cpu in scx_bpf_select_cpu_dfl() If a BPF scheduler provides an invalid CPU (outside the nr_cpu_ids range) as prev_cpu to scx_bpf_select_cpu_dfl() it can cause a kernel crash. To prevent this, validate prev_cpu in scx_bpf_select_cpu_dfl() and trigger an scx error if an invalid CPU is specified. Fixes: f0e1a0643a59b ("sched_ext: Implement BPF extensible scheduler class") Cc: stable@vger.kernel.org # v6.12+ Signed-off-by: Andrea Righi Signed-off-by: Tejun Heo --- kernel/sched/ext.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c index 0f1da199cfc7..7b9dfee858e7 100644 --- a/kernel/sched/ext.c +++ b/kernel/sched/ext.c @@ -6422,6 +6422,9 @@ static bool check_builtin_idle_enabled(void) __bpf_kfunc s32 scx_bpf_select_cpu_dfl(struct task_struct *p, s32 prev_cpu, u64 wake_flags, bool *is_idle) { + if (!ops_cpu_valid(prev_cpu, NULL)) + goto prev_cpu; + if (!check_builtin_idle_enabled()) goto prev_cpu;