mirror of
https://github.com/torvalds/linux.git
synced 2026-05-12 16:18:45 +02:00
sched/fair: Revert 6d71a9c616 ("sched/fair: Fix EEVDF entity placement bug causing scheduling lag")
Zicheng Qu reported that, because avg_vruntime() always includes cfs_rq->curr, when ->on_rq, place_entity() doesn't work right. Specifically, the lag scaling in place_entity() relies on avg_vruntime() being the state *before* placement of the new entity. However in this case avg_vruntime() will actually already include the entity, which breaks things. Also, Zicheng Qu argues that avg_vruntime should be invariant under reweight. IOW commit6d71a9c616("sched/fair: Fix EEVDF entity placement bug causing scheduling lag") was wrong! The issue reported in6d71a9c616could possibly be explained by rounding artifacts -- notably the extreme weight '2' is outside of the range of avg_vruntime/sum_w_vruntime, since that uses scale_load_down(). By scaling vruntime by the real weight, but accounting it in vruntime with a factor 1024 more, the average moves significantly. However, that is now cured. Tested by reverting66951e4860("sched/fair: Fix update_cfs_group() vs DELAY_DEQUEUE") and tracing vruntime and vlag figures again. Reported-by: Zicheng Qu <quzicheng@huawei.com> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Reviewed-by: Vincent Guittot <vincent.guittot@linaro.org> Tested-by: K Prateek Nayak <kprateek.nayak@amd.com> Tested-by: Shubhang Kaushik <shubhang@os.amperecomputing.com> Link: https://patch.msgid.link/20260219080625.066102672%40infradead.org
This commit is contained in:
parent
4823725d9d
commit
101f3498b4
|
|
@ -822,17 +822,22 @@ static inline u64 cfs_rq_max_slice(struct cfs_rq *cfs_rq);
|
|||
*
|
||||
* -r_max < lag < max(r_max, q)
|
||||
*/
|
||||
static void update_entity_lag(struct cfs_rq *cfs_rq, struct sched_entity *se)
|
||||
static s64 entity_lag(struct cfs_rq *cfs_rq, struct sched_entity *se, u64 avruntime)
|
||||
{
|
||||
u64 max_slice = cfs_rq_max_slice(cfs_rq) + TICK_NSEC;
|
||||
s64 vlag, limit;
|
||||
|
||||
WARN_ON_ONCE(!se->on_rq);
|
||||
|
||||
vlag = avg_vruntime(cfs_rq) - se->vruntime;
|
||||
vlag = avruntime - se->vruntime;
|
||||
limit = calc_delta_fair(max_slice, se);
|
||||
|
||||
se->vlag = clamp(vlag, -limit, limit);
|
||||
return clamp(vlag, -limit, limit);
|
||||
}
|
||||
|
||||
static void update_entity_lag(struct cfs_rq *cfs_rq, struct sched_entity *se)
|
||||
{
|
||||
WARN_ON_ONCE(!se->on_rq);
|
||||
|
||||
se->vlag = entity_lag(cfs_rq, se, avg_vruntime(cfs_rq));
|
||||
}
|
||||
|
||||
/*
|
||||
|
|
@ -3898,23 +3903,125 @@ dequeue_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *se)
|
|||
se_weight(se) * -se->avg.load_sum);
|
||||
}
|
||||
|
||||
static void place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags);
|
||||
static void
|
||||
rescale_entity(struct sched_entity *se, unsigned long weight, bool rel_vprot)
|
||||
{
|
||||
unsigned long old_weight = se->load.weight;
|
||||
|
||||
/*
|
||||
* VRUNTIME
|
||||
* --------
|
||||
*
|
||||
* COROLLARY #1: The virtual runtime of the entity needs to be
|
||||
* adjusted if re-weight at !0-lag point.
|
||||
*
|
||||
* Proof: For contradiction assume this is not true, so we can
|
||||
* re-weight without changing vruntime at !0-lag point.
|
||||
*
|
||||
* Weight VRuntime Avg-VRuntime
|
||||
* before w v V
|
||||
* after w' v' V'
|
||||
*
|
||||
* Since lag needs to be preserved through re-weight:
|
||||
*
|
||||
* lag = (V - v)*w = (V'- v')*w', where v = v'
|
||||
* ==> V' = (V - v)*w/w' + v (1)
|
||||
*
|
||||
* Let W be the total weight of the entities before reweight,
|
||||
* since V' is the new weighted average of entities:
|
||||
*
|
||||
* V' = (WV + w'v - wv) / (W + w' - w) (2)
|
||||
*
|
||||
* by using (1) & (2) we obtain:
|
||||
*
|
||||
* (WV + w'v - wv) / (W + w' - w) = (V - v)*w/w' + v
|
||||
* ==> (WV-Wv+Wv+w'v-wv)/(W+w'-w) = (V - v)*w/w' + v
|
||||
* ==> (WV - Wv)/(W + w' - w) + v = (V - v)*w/w' + v
|
||||
* ==> (V - v)*W/(W + w' - w) = (V - v)*w/w' (3)
|
||||
*
|
||||
* Since we are doing at !0-lag point which means V != v, we
|
||||
* can simplify (3):
|
||||
*
|
||||
* ==> W / (W + w' - w) = w / w'
|
||||
* ==> Ww' = Ww + ww' - ww
|
||||
* ==> W * (w' - w) = w * (w' - w)
|
||||
* ==> W = w (re-weight indicates w' != w)
|
||||
*
|
||||
* So the cfs_rq contains only one entity, hence vruntime of
|
||||
* the entity @v should always equal to the cfs_rq's weighted
|
||||
* average vruntime @V, which means we will always re-weight
|
||||
* at 0-lag point, thus breach assumption. Proof completed.
|
||||
*
|
||||
*
|
||||
* COROLLARY #2: Re-weight does NOT affect weighted average
|
||||
* vruntime of all the entities.
|
||||
*
|
||||
* Proof: According to corollary #1, Eq. (1) should be:
|
||||
*
|
||||
* (V - v)*w = (V' - v')*w'
|
||||
* ==> v' = V' - (V - v)*w/w' (4)
|
||||
*
|
||||
* According to the weighted average formula, we have:
|
||||
*
|
||||
* V' = (WV - wv + w'v') / (W - w + w')
|
||||
* = (WV - wv + w'(V' - (V - v)w/w')) / (W - w + w')
|
||||
* = (WV - wv + w'V' - Vw + wv) / (W - w + w')
|
||||
* = (WV + w'V' - Vw) / (W - w + w')
|
||||
*
|
||||
* ==> V'*(W - w + w') = WV + w'V' - Vw
|
||||
* ==> V' * (W - w) = (W - w) * V (5)
|
||||
*
|
||||
* If the entity is the only one in the cfs_rq, then reweight
|
||||
* always occurs at 0-lag point, so V won't change. Or else
|
||||
* there are other entities, hence W != w, then Eq. (5) turns
|
||||
* into V' = V. So V won't change in either case, proof done.
|
||||
*
|
||||
*
|
||||
* So according to corollary #1 & #2, the effect of re-weight
|
||||
* on vruntime should be:
|
||||
*
|
||||
* v' = V' - (V - v) * w / w' (4)
|
||||
* = V - (V - v) * w / w'
|
||||
* = V - vl * w / w'
|
||||
* = V - vl'
|
||||
*/
|
||||
se->vlag = div64_long(se->vlag * old_weight, weight);
|
||||
|
||||
/*
|
||||
* DEADLINE
|
||||
* --------
|
||||
*
|
||||
* When the weight changes, the virtual time slope changes and
|
||||
* we should adjust the relative virtual deadline accordingly.
|
||||
*
|
||||
* d' = v' + (d - v)*w/w'
|
||||
* = V' - (V - v)*w/w' + (d - v)*w/w'
|
||||
* = V - (V - v)*w/w' + (d - v)*w/w'
|
||||
* = V + (d - V)*w/w'
|
||||
*/
|
||||
if (se->rel_deadline)
|
||||
se->deadline = div64_long(se->deadline * old_weight, weight);
|
||||
|
||||
if (rel_vprot)
|
||||
se->vprot = div64_long(se->vprot * old_weight, weight);
|
||||
}
|
||||
|
||||
static void reweight_entity(struct cfs_rq *cfs_rq, struct sched_entity *se,
|
||||
unsigned long weight)
|
||||
{
|
||||
bool curr = cfs_rq->curr == se;
|
||||
bool rel_vprot = false;
|
||||
u64 vprot;
|
||||
u64 avruntime = 0;
|
||||
|
||||
if (se->on_rq) {
|
||||
/* commit outstanding execution time */
|
||||
update_curr(cfs_rq);
|
||||
update_entity_lag(cfs_rq, se);
|
||||
se->deadline -= se->vruntime;
|
||||
avruntime = avg_vruntime(cfs_rq);
|
||||
se->vlag = entity_lag(cfs_rq, se, avruntime);
|
||||
se->deadline -= avruntime;
|
||||
se->rel_deadline = 1;
|
||||
if (curr && protect_slice(se)) {
|
||||
vprot = se->vprot - se->vruntime;
|
||||
se->vprot -= avruntime;
|
||||
rel_vprot = true;
|
||||
}
|
||||
|
||||
|
|
@ -3925,30 +4032,23 @@ static void reweight_entity(struct cfs_rq *cfs_rq, struct sched_entity *se,
|
|||
}
|
||||
dequeue_load_avg(cfs_rq, se);
|
||||
|
||||
/*
|
||||
* Because we keep se->vlag = V - v_i, while: lag_i = w_i*(V - v_i),
|
||||
* we need to scale se->vlag when w_i changes.
|
||||
*/
|
||||
se->vlag = div64_long(se->vlag * se->load.weight, weight);
|
||||
if (se->rel_deadline)
|
||||
se->deadline = div64_long(se->deadline * se->load.weight, weight);
|
||||
|
||||
if (rel_vprot)
|
||||
vprot = div64_long(vprot * se->load.weight, weight);
|
||||
rescale_entity(se, weight, rel_vprot);
|
||||
|
||||
update_load_set(&se->load, weight);
|
||||
|
||||
do {
|
||||
u32 divider = get_pelt_divider(&se->avg);
|
||||
|
||||
se->avg.load_avg = div_u64(se_weight(se) * se->avg.load_sum, divider);
|
||||
} while (0);
|
||||
|
||||
enqueue_load_avg(cfs_rq, se);
|
||||
if (se->on_rq) {
|
||||
place_entity(cfs_rq, se, 0);
|
||||
if (rel_vprot)
|
||||
se->vprot = se->vruntime + vprot;
|
||||
se->vprot += avruntime;
|
||||
se->deadline += avruntime;
|
||||
se->rel_deadline = 0;
|
||||
se->vruntime = avruntime - se->vlag;
|
||||
|
||||
update_load_add(&cfs_rq->load, se->load.weight);
|
||||
if (!curr)
|
||||
__enqueue_entity(cfs_rq, se);
|
||||
|
|
@ -5306,7 +5406,7 @@ place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
|
|||
|
||||
se->vruntime = vruntime - lag;
|
||||
|
||||
if (se->rel_deadline) {
|
||||
if (sched_feat(PLACE_REL_DEADLINE) && se->rel_deadline) {
|
||||
se->deadline += se->vruntime;
|
||||
se->rel_deadline = 0;
|
||||
return;
|
||||
|
|
|
|||
Loading…
Reference in New Issue
Block a user