diff --git a/include/trace/events/sched.h b/include/trace/events/sched.h index 947dbcae17a7..23cee9056d8e 100644 --- a/include/trace/events/sched.h +++ b/include/trace/events/sched.h @@ -878,9 +878,9 @@ TRACE_EVENT(sched_boost_cpu, TRACE_EVENT(sched_tune_tasks_update, TP_PROTO(struct task_struct *tsk, int cpu, int tasks, int idx, - int boost, int max_boost), + int boost, int max_boost, u64 group_ts), - TP_ARGS(tsk, cpu, tasks, idx, boost, max_boost), + TP_ARGS(tsk, cpu, tasks, idx, boost, max_boost, group_ts), TP_STRUCT__entry( __array( char, comm, TASK_COMM_LEN ) @@ -890,6 +890,7 @@ TRACE_EVENT(sched_tune_tasks_update, __field( int, idx ) __field( int, boost ) __field( int, max_boost ) + __field( u64, group_ts ) ), TP_fast_assign( @@ -900,13 +901,15 @@ TRACE_EVENT(sched_tune_tasks_update, __entry->idx = idx; __entry->boost = boost; __entry->max_boost = max_boost; + __entry->group_ts = group_ts; ), TP_printk("pid=%d comm=%s " - "cpu=%d tasks=%d idx=%d boost=%d max_boost=%d", + "cpu=%d tasks=%d idx=%d boost=%d max_boost=%d timeout=%llu", __entry->pid, __entry->comm, __entry->cpu, __entry->tasks, __entry->idx, - __entry->boost, __entry->max_boost) + __entry->boost, __entry->max_boost, + __entry->group_ts) ); /* diff --git a/kernel/sched/features.h b/kernel/sched/features.h index 5590f47a6e78..50bdfd7c3321 100644 --- a/kernel/sched/features.h +++ b/kernel/sched/features.h @@ -109,3 +109,14 @@ SCHED_FEAT(EAS_PREFER_IDLE, true) * Request max frequency from schedutil whenever a RT task is running. */ SCHED_FEAT(SUGOV_RT_MAX_FREQ, false) + +/* + * Apply schedtune boost hold to tasks of all sched classes. + * If enabled, schedtune will hold the boost applied to a CPU + * for 50ms regardless of task activation - if the task is + * still running 50ms later, the boost hold expires and schedtune + * boost will expire immediately the task stops. + * If disabled, this behaviour will only apply to tasks of the + * RT class. + */ +SCHED_FEAT(SCHEDTUNE_BOOST_HOLD_ALL, false) diff --git a/kernel/sched/tune.c b/kernel/sched/tune.c index 7e0630c52b81..3b231c639fe4 100644 --- a/kernel/sched/tune.c +++ b/kernel/sched/tune.c @@ -13,6 +13,9 @@ bool schedtune_initialized = false; extern struct reciprocal_value schedtune_spc_rdiv; +/* We hold schedtune boost in effect for at least this long */ +#define SCHEDTUNE_BOOST_HOLD_NS 50000000ULL + /* * EAS scheduler tunables for task groups. * @@ -151,6 +154,7 @@ static struct schedtune *allocated_group[BOOSTGROUPS_COUNT] = { struct boost_groups { /* Maximum boost value for all RUNNABLE tasks on a CPU */ int boost_max; + u64 boost_ts; struct { /* True when this boost group maps an actual cgroup */ bool valid; @@ -158,6 +162,8 @@ struct boost_groups { int boost; /* Count of RUNNABLE tasks on that boost group */ unsigned tasks; + /* Timestamp of boost activation */ + u64 ts; } group[BOOSTGROUPS_COUNT]; /* CPU's boost group locking */ raw_spinlock_t lock; @@ -166,15 +172,31 @@ struct boost_groups { /* Boost groups affecting each CPU in the system */ DEFINE_PER_CPU(struct boost_groups, cpu_boost_groups); +static inline bool schedtune_boost_timeout(u64 now, u64 ts) +{ + return ((now - ts) > SCHEDTUNE_BOOST_HOLD_NS); +} + +static inline bool +schedtune_boost_group_active(int idx, struct boost_groups* bg, u64 now) +{ + if (bg->group[idx].tasks) + return true; + + return !schedtune_boost_timeout(now, bg->group[idx].ts); +} + static void -schedtune_cpu_update(int cpu) +schedtune_cpu_update(int cpu, u64 now) { struct boost_groups *bg = &per_cpu(cpu_boost_groups, cpu); int boost_max; + u64 boost_ts; int idx; /* The root boost group is always active */ boost_max = bg->group[0].boost; + boost_ts = now; for (idx = 1; idx < BOOSTGROUPS_COUNT; ++idx) { /* Ignore non boostgroups not mapping a cgroup */ @@ -183,12 +205,18 @@ schedtune_cpu_update(int cpu) /* * A boost group affects a CPU only if it has - * RUNNABLE tasks on that CPU + * RUNNABLE tasks on that CPU or it has hold + * in effect from a previous task. */ - if (bg->group[idx].tasks == 0) + if (!schedtune_boost_group_active(idx, bg, now)) continue; - boost_max = max(boost_max, bg->group[idx].boost); + /* This boost group is active */ + if (boost_max > bg->group[idx].boost) + continue; + + boost_max = bg->group[idx].boost; + boost_ts = bg->group[idx].ts; } /* Ensures boost_max is non-negative when all cgroup boost values @@ -196,6 +224,7 @@ schedtune_cpu_update(int cpu) * task stacking and frequency spikes.*/ boost_max = max(boost_max, 0); bg->boost_max = boost_max; + bg->boost_ts = boost_ts; } static int @@ -205,6 +234,7 @@ schedtune_boostgroup_update(int idx, int boost) int cur_boost_max; int old_boost; int cpu; + u64 now; /* Update per CPU boost groups */ for_each_possible_cpu(cpu) { @@ -225,15 +255,19 @@ schedtune_boostgroup_update(int idx, int boost) bg->group[idx].boost = boost; /* Check if this update increase current max */ - if (boost > cur_boost_max && bg->group[idx].tasks) { + now = sched_clock_cpu(cpu); + if (boost > cur_boost_max && + schedtune_boost_group_active(idx, bg, now)) { bg->boost_max = boost; + bg->boost_ts = bg->group[idx].ts; + trace_sched_tune_boostgroup_update(cpu, 1, bg->boost_max); continue; } /* Check if this update has decreased current max */ if (cur_boost_max == old_boost && old_boost > boost) { - schedtune_cpu_update(cpu); + schedtune_cpu_update(cpu, now); trace_sched_tune_boostgroup_update(cpu, -1, bg->boost_max); continue; } @@ -247,6 +281,15 @@ schedtune_boostgroup_update(int idx, int boost) #define ENQUEUE_TASK 1 #define DEQUEUE_TASK -1 +static inline bool +schedtune_update_timestamp(struct task_struct *p) +{ + if (sched_feat(SCHEDTUNE_BOOST_HOLD_ALL)) + return true; + + return task_has_rt_policy(p); +} + static inline void schedtune_tasks_update(struct task_struct *p, int cpu, int idx, int task_count) { @@ -256,12 +299,21 @@ schedtune_tasks_update(struct task_struct *p, int cpu, int idx, int task_count) /* Update boosted tasks count while avoiding to make it negative */ bg->group[idx].tasks = max(0, tasks); - trace_sched_tune_tasks_update(p, cpu, tasks, idx, - bg->group[idx].boost, bg->boost_max); + /* Update timeout on enqueue */ + if (task_count > 0) { + u64 now = sched_clock_cpu(cpu); - /* Boost group activation or deactivation on that RQ */ - if (tasks == 1 || tasks == 0) - schedtune_cpu_update(cpu); + if (schedtune_update_timestamp(p)) + bg->group[idx].ts = now; + + /* Boost group activation or deactivation on that RQ */ + if (bg->group[idx].tasks == 1) + schedtune_cpu_update(cpu, now); + } + + trace_sched_tune_tasks_update(p, cpu, tasks, idx, + bg->group[idx].boost, bg->boost_max, + bg->group[idx].ts); } /* @@ -305,6 +357,7 @@ int schedtune_can_attach(struct cgroup_taskset *tset) int src_bg; /* Source boost group index */ int dst_bg; /* Destination boost group index */ int tasks; + u64 now; if (unlikely(!schedtune_initialized)) return 0; @@ -355,13 +408,15 @@ int schedtune_can_attach(struct cgroup_taskset *tset) bg->group[src_bg].tasks = max(0, tasks); bg->group[dst_bg].tasks += 1; + /* Update boost hold start for this group */ + now = sched_clock_cpu(cpu); + bg->group[dst_bg].ts = now; + + /* Force boost group re-evaluation at next boost check */ + bg->boost_ts = now - SCHEDTUNE_BOOST_HOLD_NS; + raw_spin_unlock(&bg->lock); task_rq_unlock(rq, task, &rq_flags); - - /* Update CPU boost group */ - if (bg->group[src_bg].tasks == 0 || bg->group[dst_bg].tasks == 1) - schedtune_cpu_update(task_cpu(task)); - } return 0; @@ -408,8 +463,15 @@ void schedtune_dequeue_task(struct task_struct *p, int cpu) int schedtune_cpu_boost(int cpu) { struct boost_groups *bg; + u64 now; bg = &per_cpu(cpu_boost_groups, cpu); + now = sched_clock_cpu(cpu); + + /* Check to see if we have a hold in effect */ + if (schedtune_boost_timeout(now, bg->boost_ts)) + schedtune_cpu_update(cpu, now); + return bg->boost_max; } @@ -515,6 +577,7 @@ schedtune_boostgroup_init(struct schedtune *st, int idx) bg = &per_cpu(cpu_boost_groups, cpu); bg->group[idx].boost = 0; bg->group[idx].valid = true; + bg->group[idx].ts = 0; } /* Keep track of allocated boost groups */