mirror of
https://github.com/torvalds/linux.git
synced 2026-05-30 01:53:29 +02:00
sched/deadline: Don't count nr_running for dl_server proxy tasks
On CPU offline the kernel stalled with below call trace:
INFO: task kworker/0:1:11 blocked for more than 120 seconds.
cpuhp hold the cpu hotplug lock endless and stalled vmstat_shepherd.
This is because we count nr_running twice on cpuhp enqueuing and failed
the wait condition of cpuhp:
enqueue_task_fair() // pick cpuhp from idle, rq->nr_running = 0
dl_server_start()
[...]
add_nr_running() // rq->nr_running = 1
add_nr_running() // rq->nr_running = 2
[switch to cpuhp, waiting on balance_hotplug_wait()]
rcuwait_wait_event(rq->nr_running == 1 && ...) // failed, rq->nr_running=2
schedule() // wait again
It doesn't make sense to count the dl_server towards runnable tasks,
since it runs other tasks.
Fixes: 63ba8422f8 ("sched/deadline: Introduce deadline servers")
Signed-off-by: Yicong Yang <yangyicong@hisilicon.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lore.kernel.org/r/20250627035420.37712-1-yangyicong@huawei.com
This commit is contained in:
parent
421fc59cf5
commit
52d15521eb
|
|
@ -1851,7 +1851,9 @@ void inc_dl_tasks(struct sched_dl_entity *dl_se, struct dl_rq *dl_rq)
|
|||
u64 deadline = dl_se->deadline;
|
||||
|
||||
dl_rq->dl_nr_running++;
|
||||
add_nr_running(rq_of_dl_rq(dl_rq), 1);
|
||||
|
||||
if (!dl_server(dl_se))
|
||||
add_nr_running(rq_of_dl_rq(dl_rq), 1);
|
||||
|
||||
inc_dl_deadline(dl_rq, deadline);
|
||||
}
|
||||
|
|
@ -1861,7 +1863,9 @@ void dec_dl_tasks(struct sched_dl_entity *dl_se, struct dl_rq *dl_rq)
|
|||
{
|
||||
WARN_ON(!dl_rq->dl_nr_running);
|
||||
dl_rq->dl_nr_running--;
|
||||
sub_nr_running(rq_of_dl_rq(dl_rq), 1);
|
||||
|
||||
if (!dl_server(dl_se))
|
||||
sub_nr_running(rq_of_dl_rq(dl_rq), 1);
|
||||
|
||||
dec_dl_deadline(dl_rq, dl_se->deadline);
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in New Issue
Block a user