sched/fair: Avoid re-setting virtual deadline on 'migrations'

During OSPM24 Youssef noted that migrations are re-setting the virtual
deadline. Notably everything that does a dequeue-enqueue, like setting
nice, changing preferred numa-node, and a myriad of other random crap,
will cause this to happen.

This shouldn't be. Preserve the relative virtual deadline across such
dequeue/enqueue cycles.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Valentin Schneider <vschneid@redhat.com>
Tested-by: Valentin Schneider <vschneid@redhat.com>
Link: https://lkml.kernel.org/r/20240727105030.625119246@infradead.org
This commit is contained in:
Peter Zijlstra 2024-05-31 15:49:40 +02:00
parent fc1892becd
commit 82e9d0456e
3 changed files with 26 additions and 7 deletions

View File

@ -544,8 +544,10 @@ struct sched_entity {
u64 min_vruntime; u64 min_vruntime;
struct list_head group_node; struct list_head group_node;
unsigned int on_rq; unsigned char on_rq;
unsigned int sched_delayed; unsigned char sched_delayed;
unsigned char rel_deadline;
/* hole */
u64 exec_start; u64 exec_start;
u64 sum_exec_runtime; u64 sum_exec_runtime;

View File

@ -5270,6 +5270,12 @@ place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
se->vruntime = vruntime - lag; se->vruntime = vruntime - lag;
if (sched_feat(PLACE_REL_DEADLINE) && se->rel_deadline) {
se->deadline += se->vruntime;
se->rel_deadline = 0;
return;
}
/* /*
* When joining the competition; the existing tasks will be, * When joining the competition; the existing tasks will be,
* on average, halfway through their slice, as such start tasks * on average, halfway through their slice, as such start tasks
@ -5382,23 +5388,24 @@ static __always_inline void return_cfs_rq_runtime(struct cfs_rq *cfs_rq);
static bool static bool
dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags) dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
{ {
bool sleep = flags & DEQUEUE_SLEEP;
update_curr(cfs_rq); update_curr(cfs_rq);
if (flags & DEQUEUE_DELAYED) { if (flags & DEQUEUE_DELAYED) {
SCHED_WARN_ON(!se->sched_delayed); SCHED_WARN_ON(!se->sched_delayed);
} else { } else {
bool sleep = flags & DEQUEUE_SLEEP; bool delay = sleep;
/* /*
* DELAY_DEQUEUE relies on spurious wakeups, special task * DELAY_DEQUEUE relies on spurious wakeups, special task
* states must not suffer spurious wakeups, excempt them. * states must not suffer spurious wakeups, excempt them.
*/ */
if (flags & DEQUEUE_SPECIAL) if (flags & DEQUEUE_SPECIAL)
sleep = false; delay = false;
SCHED_WARN_ON(sleep && se->sched_delayed); SCHED_WARN_ON(delay && se->sched_delayed);
if (sched_feat(DELAY_DEQUEUE) && sleep && if (sched_feat(DELAY_DEQUEUE) && delay &&
!entity_eligible(cfs_rq, se)) { !entity_eligible(cfs_rq, se)) {
if (cfs_rq->next == se) if (cfs_rq->next == se)
cfs_rq->next = NULL; cfs_rq->next = NULL;
@ -5429,6 +5436,11 @@ dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
clear_buddies(cfs_rq, se); clear_buddies(cfs_rq, se);
update_entity_lag(cfs_rq, se); update_entity_lag(cfs_rq, se);
if (sched_feat(PLACE_REL_DEADLINE) && !sleep) {
se->deadline -= se->vruntime;
se->rel_deadline = 1;
}
if (se != cfs_rq->curr) if (se != cfs_rq->curr)
__dequeue_entity(cfs_rq, se); __dequeue_entity(cfs_rq, se);
se->on_rq = 0; se->on_rq = 0;
@ -12992,6 +13004,7 @@ static void switched_from_fair(struct rq *rq, struct task_struct *p)
if (p->se.sched_delayed) { if (p->se.sched_delayed) {
dequeue_task(rq, p, DEQUEUE_NOCLOCK | DEQUEUE_SLEEP); dequeue_task(rq, p, DEQUEUE_NOCLOCK | DEQUEUE_SLEEP);
p->se.sched_delayed = 0; p->se.sched_delayed = 0;
p->se.rel_deadline = 0;
if (sched_feat(DELAY_ZERO) && p->se.vlag > 0) if (sched_feat(DELAY_ZERO) && p->se.vlag > 0)
p->se.vlag = 0; p->se.vlag = 0;
} }

View File

@ -9,6 +9,10 @@ SCHED_FEAT(PLACE_LAG, true)
* Give new tasks half a slice to ease into the competition. * Give new tasks half a slice to ease into the competition.
*/ */
SCHED_FEAT(PLACE_DEADLINE_INITIAL, true) SCHED_FEAT(PLACE_DEADLINE_INITIAL, true)
/*
* Preserve relative virtual deadline on 'migration'.
*/
SCHED_FEAT(PLACE_REL_DEADLINE, true)
/* /*
* Inhibit (wakeup) preemption until the current task has either matched the * Inhibit (wakeup) preemption until the current task has either matched the
* 0-lag point or until is has exhausted it's slice. * 0-lag point or until is has exhausted it's slice.