mirror of
https://github.com/torvalds/linux.git
synced 2026-05-12 16:18:45 +02:00
sched_ext: Save and restore scx_locked_rq across SCX_CALL_OP
SCX_CALL_OP{,_RET}() unconditionally clears scx_locked_rq_state to NULL on
exit. Correct at the top level, but ops can recurse via
scx_bpf_sub_dispatch(): a parent's ops.dispatch calls the helper, which
invokes the child's ops.dispatch under another SCX_CALL_OP. When the inner
call returns, the NULL clobbers the outer's state. The parent's BPF then
calls kfuncs like scx_bpf_cpuperf_set() which read scx_locked_rq()==NULL and
re-acquire the already-held rq.
Snapshot scx_locked_rq_state on entry and restore on exit. Rename the rq
parameter to locked_rq across all SCX_CALL_OP* macros so the snapshot local
can be typed as 'struct rq *' without colliding with the parameter token in
the expansion. SCX_CALL_OP_TASK{,_RET}() and SCX_CALL_OP_2TASKS_RET() funnel
through the two base macros and inherit the fix.
Fixes: 4f8b122848 ("sched_ext: Add basic building blocks for nested sub-scheduler dispatching")
Reported-by: Chris Mason <clm@meta.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
Reviewed-by: Andrea Righi <arighi@nvidia.com>
This commit is contained in:
parent
2f2ea77092
commit
7fb39e4eb4
|
|
@ -470,24 +470,35 @@ static inline void update_locked_rq(struct rq *rq)
|
|||
__this_cpu_write(scx_locked_rq_state, rq);
|
||||
}
|
||||
|
||||
#define SCX_CALL_OP(sch, op, rq, args...) \
|
||||
/*
|
||||
* SCX ops can recurse via scx_bpf_sub_dispatch() - the inner call must not
|
||||
* clobber the outer's scx_locked_rq_state. Save it on entry, restore on exit.
|
||||
*/
|
||||
#define SCX_CALL_OP(sch, op, locked_rq, args...) \
|
||||
do { \
|
||||
if (rq) \
|
||||
update_locked_rq(rq); \
|
||||
struct rq *__prev_locked_rq; \
|
||||
\
|
||||
if (locked_rq) { \
|
||||
__prev_locked_rq = scx_locked_rq(); \
|
||||
update_locked_rq(locked_rq); \
|
||||
} \
|
||||
(sch)->ops.op(args); \
|
||||
if (rq) \
|
||||
update_locked_rq(NULL); \
|
||||
if (locked_rq) \
|
||||
update_locked_rq(__prev_locked_rq); \
|
||||
} while (0)
|
||||
|
||||
#define SCX_CALL_OP_RET(sch, op, rq, args...) \
|
||||
#define SCX_CALL_OP_RET(sch, op, locked_rq, args...) \
|
||||
({ \
|
||||
struct rq *__prev_locked_rq; \
|
||||
__typeof__((sch)->ops.op(args)) __ret; \
|
||||
\
|
||||
if (rq) \
|
||||
update_locked_rq(rq); \
|
||||
if (locked_rq) { \
|
||||
__prev_locked_rq = scx_locked_rq(); \
|
||||
update_locked_rq(locked_rq); \
|
||||
} \
|
||||
__ret = (sch)->ops.op(args); \
|
||||
if (rq) \
|
||||
update_locked_rq(NULL); \
|
||||
if (locked_rq) \
|
||||
update_locked_rq(__prev_locked_rq); \
|
||||
__ret; \
|
||||
})
|
||||
|
||||
|
|
@ -499,39 +510,39 @@ do { \
|
|||
* those subject tasks.
|
||||
*
|
||||
* Every SCX_CALL_OP_TASK*() call site invokes its op with @p's rq lock held -
|
||||
* either via the @rq argument here, or (for ops.select_cpu()) via @p's pi_lock
|
||||
* held by try_to_wake_up() with rq tracking via scx_rq.in_select_cpu. So if
|
||||
* kf_tasks[] is set, @p's scheduler-protected fields are stable.
|
||||
* either via the @locked_rq argument here, or (for ops.select_cpu()) via @p's
|
||||
* pi_lock held by try_to_wake_up() with rq tracking via scx_rq.in_select_cpu.
|
||||
* So if kf_tasks[] is set, @p's scheduler-protected fields are stable.
|
||||
*
|
||||
* kf_tasks[] can not stack, so task-based SCX ops must not nest. The
|
||||
* WARN_ON_ONCE() in each macro catches a re-entry of any of the three variants
|
||||
* while a previous one is still in progress.
|
||||
*/
|
||||
#define SCX_CALL_OP_TASK(sch, op, rq, task, args...) \
|
||||
#define SCX_CALL_OP_TASK(sch, op, locked_rq, task, args...) \
|
||||
do { \
|
||||
WARN_ON_ONCE(current->scx.kf_tasks[0]); \
|
||||
current->scx.kf_tasks[0] = task; \
|
||||
SCX_CALL_OP((sch), op, rq, task, ##args); \
|
||||
SCX_CALL_OP((sch), op, locked_rq, task, ##args); \
|
||||
current->scx.kf_tasks[0] = NULL; \
|
||||
} while (0)
|
||||
|
||||
#define SCX_CALL_OP_TASK_RET(sch, op, rq, task, args...) \
|
||||
#define SCX_CALL_OP_TASK_RET(sch, op, locked_rq, task, args...) \
|
||||
({ \
|
||||
__typeof__((sch)->ops.op(task, ##args)) __ret; \
|
||||
WARN_ON_ONCE(current->scx.kf_tasks[0]); \
|
||||
current->scx.kf_tasks[0] = task; \
|
||||
__ret = SCX_CALL_OP_RET((sch), op, rq, task, ##args); \
|
||||
__ret = SCX_CALL_OP_RET((sch), op, locked_rq, task, ##args); \
|
||||
current->scx.kf_tasks[0] = NULL; \
|
||||
__ret; \
|
||||
})
|
||||
|
||||
#define SCX_CALL_OP_2TASKS_RET(sch, op, rq, task0, task1, args...) \
|
||||
#define SCX_CALL_OP_2TASKS_RET(sch, op, locked_rq, task0, task1, args...) \
|
||||
({ \
|
||||
__typeof__((sch)->ops.op(task0, task1, ##args)) __ret; \
|
||||
WARN_ON_ONCE(current->scx.kf_tasks[0]); \
|
||||
current->scx.kf_tasks[0] = task0; \
|
||||
current->scx.kf_tasks[1] = task1; \
|
||||
__ret = SCX_CALL_OP_RET((sch), op, rq, task0, task1, ##args); \
|
||||
__ret = SCX_CALL_OP_RET((sch), op, locked_rq, task0, task1, ##args); \
|
||||
current->scx.kf_tasks[0] = NULL; \
|
||||
current->scx.kf_tasks[1] = NULL; \
|
||||
__ret; \
|
||||
|
|
|
|||
Loading…
Reference in New Issue
Block a user