mirror of
https://github.com/torvalds/linux.git
synced 2026-06-01 11:03:43 +02:00
sched_ext: Make watchdog sub-sched aware
Currently, the watchdog checks all tasks as if they are all on scx_root. Move scx_watchdog_timeout inside scx_sched and make check_rq_for_timeouts() use the timeout from the scx_sched associated with each task. refresh_watchdog() is added, which determines the timer interval as half of the shortest watchdog timeouts of all scheds and arms or disarms it as necessary. Every scx_sched instance has equivalent or better detection latency while sharing the same timer. Signed-off-by: Tejun Heo <tj@kernel.org> Reviewed-by: Andrea Righi <arighi@nvidia.com>
This commit is contained in:
parent
34ecfb3551
commit
cde94c032b
|
|
@ -59,11 +59,10 @@ static atomic_long_t scx_hotplug_seq = ATOMIC_LONG_INIT(0);
|
||||||
static atomic_long_t scx_enable_seq = ATOMIC_LONG_INIT(0);
|
static atomic_long_t scx_enable_seq = ATOMIC_LONG_INIT(0);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* The maximum amount of time in jiffies that a task may be runnable without
|
* Watchdog interval. All scx_sched's share a single watchdog timer and the
|
||||||
* being scheduled on a CPU. If this timeout is exceeded, it will trigger
|
* interval is half of the shortest sch->watchdog_timeout.
|
||||||
* scx_error().
|
|
||||||
*/
|
*/
|
||||||
static unsigned long scx_watchdog_timeout;
|
static unsigned long scx_watchdog_interval;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* The last time the delayed work was run. This delayed work relies on
|
* The last time the delayed work was run. This delayed work relies on
|
||||||
|
|
@ -3038,10 +3037,11 @@ static bool check_rq_for_timeouts(struct rq *rq)
|
||||||
goto out_unlock;
|
goto out_unlock;
|
||||||
|
|
||||||
list_for_each_entry(p, &rq->scx.runnable_list, scx.runnable_node) {
|
list_for_each_entry(p, &rq->scx.runnable_list, scx.runnable_node) {
|
||||||
|
struct scx_sched *sch = scx_task_sched(p);
|
||||||
unsigned long last_runnable = p->scx.runnable_at;
|
unsigned long last_runnable = p->scx.runnable_at;
|
||||||
|
|
||||||
if (unlikely(time_after(jiffies,
|
if (unlikely(time_after(jiffies,
|
||||||
last_runnable + READ_ONCE(scx_watchdog_timeout)))) {
|
last_runnable + READ_ONCE(sch->watchdog_timeout)))) {
|
||||||
u32 dur_ms = jiffies_to_msecs(jiffies - last_runnable);
|
u32 dur_ms = jiffies_to_msecs(jiffies - last_runnable);
|
||||||
|
|
||||||
scx_exit(sch, SCX_EXIT_ERROR_STALL, 0,
|
scx_exit(sch, SCX_EXIT_ERROR_STALL, 0,
|
||||||
|
|
@ -3058,6 +3058,7 @@ static bool check_rq_for_timeouts(struct rq *rq)
|
||||||
|
|
||||||
static void scx_watchdog_workfn(struct work_struct *work)
|
static void scx_watchdog_workfn(struct work_struct *work)
|
||||||
{
|
{
|
||||||
|
unsigned long intv;
|
||||||
int cpu;
|
int cpu;
|
||||||
|
|
||||||
WRITE_ONCE(scx_watchdog_timestamp, jiffies);
|
WRITE_ONCE(scx_watchdog_timestamp, jiffies);
|
||||||
|
|
@ -3068,28 +3069,31 @@ static void scx_watchdog_workfn(struct work_struct *work)
|
||||||
|
|
||||||
cond_resched();
|
cond_resched();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
intv = READ_ONCE(scx_watchdog_interval);
|
||||||
|
if (intv < ULONG_MAX)
|
||||||
queue_delayed_work(system_unbound_wq, to_delayed_work(work),
|
queue_delayed_work(system_unbound_wq, to_delayed_work(work),
|
||||||
READ_ONCE(scx_watchdog_timeout) / 2);
|
intv);
|
||||||
}
|
}
|
||||||
|
|
||||||
void scx_tick(struct rq *rq)
|
void scx_tick(struct rq *rq)
|
||||||
{
|
{
|
||||||
struct scx_sched *sch;
|
struct scx_sched *root;
|
||||||
unsigned long last_check;
|
unsigned long last_check;
|
||||||
|
|
||||||
if (!scx_enabled())
|
if (!scx_enabled())
|
||||||
return;
|
return;
|
||||||
|
|
||||||
sch = rcu_dereference_bh(scx_root);
|
root = rcu_dereference_bh(scx_root);
|
||||||
if (unlikely(!sch))
|
if (unlikely(!root))
|
||||||
return;
|
return;
|
||||||
|
|
||||||
last_check = READ_ONCE(scx_watchdog_timestamp);
|
last_check = READ_ONCE(scx_watchdog_timestamp);
|
||||||
if (unlikely(time_after(jiffies,
|
if (unlikely(time_after(jiffies,
|
||||||
last_check + READ_ONCE(scx_watchdog_timeout)))) {
|
last_check + READ_ONCE(root->watchdog_timeout)))) {
|
||||||
u32 dur_ms = jiffies_to_msecs(jiffies - last_check);
|
u32 dur_ms = jiffies_to_msecs(jiffies - last_check);
|
||||||
|
|
||||||
scx_exit(sch, SCX_EXIT_ERROR_STALL, 0,
|
scx_exit(root, SCX_EXIT_ERROR_STALL, 0,
|
||||||
"watchdog failed to check in for %u.%03us",
|
"watchdog failed to check in for %u.%03us",
|
||||||
dur_ms / 1000, dur_ms % 1000);
|
dur_ms / 1000, dur_ms % 1000);
|
||||||
}
|
}
|
||||||
|
|
@ -4760,6 +4764,26 @@ static void free_kick_syncs(void)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void refresh_watchdog(void)
|
||||||
|
{
|
||||||
|
struct scx_sched *sch;
|
||||||
|
unsigned long intv = ULONG_MAX;
|
||||||
|
|
||||||
|
/* take the shortest timeout and use its half for watchdog interval */
|
||||||
|
rcu_read_lock();
|
||||||
|
list_for_each_entry_rcu(sch, &scx_sched_all, all)
|
||||||
|
intv = max(min(intv, sch->watchdog_timeout / 2), 1);
|
||||||
|
rcu_read_unlock();
|
||||||
|
|
||||||
|
WRITE_ONCE(scx_watchdog_timestamp, jiffies);
|
||||||
|
WRITE_ONCE(scx_watchdog_interval, intv);
|
||||||
|
|
||||||
|
if (intv < ULONG_MAX)
|
||||||
|
mod_delayed_work(system_unbound_wq, &scx_watchdog_work, intv);
|
||||||
|
else
|
||||||
|
cancel_delayed_work_sync(&scx_watchdog_work);
|
||||||
|
}
|
||||||
|
|
||||||
#ifdef CONFIG_EXT_SUB_SCHED
|
#ifdef CONFIG_EXT_SUB_SCHED
|
||||||
static DECLARE_WAIT_QUEUE_HEAD(scx_unlink_waitq);
|
static DECLARE_WAIT_QUEUE_HEAD(scx_unlink_waitq);
|
||||||
|
|
||||||
|
|
@ -4798,6 +4822,8 @@ static void scx_sub_disable(struct scx_sched *sch)
|
||||||
list_del_rcu(&sch->all);
|
list_del_rcu(&sch->all);
|
||||||
raw_spin_unlock_irq(&scx_sched_lock);
|
raw_spin_unlock_irq(&scx_sched_lock);
|
||||||
|
|
||||||
|
refresh_watchdog();
|
||||||
|
|
||||||
mutex_unlock(&scx_enable_mutex);
|
mutex_unlock(&scx_enable_mutex);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
|
@ -4932,12 +4958,12 @@ static void scx_root_disable(struct scx_sched *sch)
|
||||||
if (sch->ops.exit)
|
if (sch->ops.exit)
|
||||||
SCX_CALL_OP(sch, SCX_KF_UNLOCKED, exit, NULL, ei);
|
SCX_CALL_OP(sch, SCX_KF_UNLOCKED, exit, NULL, ei);
|
||||||
|
|
||||||
cancel_delayed_work_sync(&scx_watchdog_work);
|
|
||||||
|
|
||||||
raw_spin_lock_irq(&scx_sched_lock);
|
raw_spin_lock_irq(&scx_sched_lock);
|
||||||
list_del_rcu(&sch->all);
|
list_del_rcu(&sch->all);
|
||||||
raw_spin_unlock_irq(&scx_sched_lock);
|
raw_spin_unlock_irq(&scx_sched_lock);
|
||||||
|
|
||||||
|
refresh_watchdog();
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* scx_root clearing must be inside cpus_read_lock(). See
|
* scx_root clearing must be inside cpus_read_lock(). See
|
||||||
* handle_hotplug().
|
* handle_hotplug().
|
||||||
|
|
@ -5473,6 +5499,11 @@ static struct scx_sched *scx_alloc_and_add_sched(struct sched_ext_ops *ops,
|
||||||
sch->ancestors[level] = sch;
|
sch->ancestors[level] = sch;
|
||||||
sch->level = level;
|
sch->level = level;
|
||||||
|
|
||||||
|
if (ops->timeout_ms)
|
||||||
|
sch->watchdog_timeout = msecs_to_jiffies(ops->timeout_ms);
|
||||||
|
else
|
||||||
|
sch->watchdog_timeout = SCX_WATCHDOG_MAX_TIMEOUT;
|
||||||
|
|
||||||
sch->slice_dfl = SCX_SLICE_DFL;
|
sch->slice_dfl = SCX_SLICE_DFL;
|
||||||
atomic_set(&sch->exit_kind, SCX_EXIT_NONE);
|
atomic_set(&sch->exit_kind, SCX_EXIT_NONE);
|
||||||
init_irq_work(&sch->error_irq_work, scx_error_irq_workfn);
|
init_irq_work(&sch->error_irq_work, scx_error_irq_workfn);
|
||||||
|
|
@ -5615,7 +5646,6 @@ static void scx_root_enable_workfn(struct kthread_work *work)
|
||||||
struct scx_sched *sch;
|
struct scx_sched *sch;
|
||||||
struct scx_task_iter sti;
|
struct scx_task_iter sti;
|
||||||
struct task_struct *p;
|
struct task_struct *p;
|
||||||
unsigned long timeout;
|
|
||||||
int i, cpu, ret;
|
int i, cpu, ret;
|
||||||
|
|
||||||
mutex_lock(&scx_enable_mutex);
|
mutex_lock(&scx_enable_mutex);
|
||||||
|
|
@ -5667,6 +5697,8 @@ static void scx_root_enable_workfn(struct kthread_work *work)
|
||||||
list_add_tail_rcu(&sch->all, &scx_sched_all);
|
list_add_tail_rcu(&sch->all, &scx_sched_all);
|
||||||
raw_spin_unlock_irq(&scx_sched_lock);
|
raw_spin_unlock_irq(&scx_sched_lock);
|
||||||
|
|
||||||
|
refresh_watchdog();
|
||||||
|
|
||||||
scx_idle_enable(ops);
|
scx_idle_enable(ops);
|
||||||
|
|
||||||
if (sch->ops.init) {
|
if (sch->ops.init) {
|
||||||
|
|
@ -5697,16 +5729,6 @@ static void scx_root_enable_workfn(struct kthread_work *work)
|
||||||
if (ret)
|
if (ret)
|
||||||
goto err_disable;
|
goto err_disable;
|
||||||
|
|
||||||
if (ops->timeout_ms)
|
|
||||||
timeout = msecs_to_jiffies(ops->timeout_ms);
|
|
||||||
else
|
|
||||||
timeout = SCX_WATCHDOG_MAX_TIMEOUT;
|
|
||||||
|
|
||||||
WRITE_ONCE(scx_watchdog_timeout, timeout);
|
|
||||||
WRITE_ONCE(scx_watchdog_timestamp, jiffies);
|
|
||||||
queue_delayed_work(system_unbound_wq, &scx_watchdog_work,
|
|
||||||
READ_ONCE(scx_watchdog_timeout) / 2);
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Once __scx_enabled is set, %current can be switched to SCX anytime.
|
* Once __scx_enabled is set, %current can be switched to SCX anytime.
|
||||||
* This can lead to stalls as some BPF schedulers (e.g. userspace
|
* This can lead to stalls as some BPF schedulers (e.g. userspace
|
||||||
|
|
@ -5928,6 +5950,8 @@ static void scx_sub_enable_workfn(struct kthread_work *work)
|
||||||
list_add_tail_rcu(&sch->all, &scx_sched_all);
|
list_add_tail_rcu(&sch->all, &scx_sched_all);
|
||||||
raw_spin_unlock_irq(&scx_sched_lock);
|
raw_spin_unlock_irq(&scx_sched_lock);
|
||||||
|
|
||||||
|
refresh_watchdog();
|
||||||
|
|
||||||
if (sch->level >= SCX_SUB_MAX_DEPTH) {
|
if (sch->level >= SCX_SUB_MAX_DEPTH) {
|
||||||
scx_error(sch, "max nesting depth %d violated",
|
scx_error(sch, "max nesting depth %d violated",
|
||||||
SCX_SUB_MAX_DEPTH);
|
SCX_SUB_MAX_DEPTH);
|
||||||
|
|
|
||||||
|
|
@ -1019,6 +1019,13 @@ struct scx_sched {
|
||||||
bool sub_attached;
|
bool sub_attached;
|
||||||
#endif /* CONFIG_EXT_SUB_SCHED */
|
#endif /* CONFIG_EXT_SUB_SCHED */
|
||||||
|
|
||||||
|
/*
|
||||||
|
* The maximum amount of time in jiffies that a task may be runnable
|
||||||
|
* without being scheduled on a CPU. If this timeout is exceeded, it
|
||||||
|
* will trigger scx_error().
|
||||||
|
*/
|
||||||
|
unsigned long watchdog_timeout;
|
||||||
|
|
||||||
atomic_t exit_kind;
|
atomic_t exit_kind;
|
||||||
struct scx_exit_info *exit_info;
|
struct scx_exit_info *exit_info;
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue
Block a user