mirror of
https://github.com/torvalds/linux.git
synced 2026-06-07 22:14:04 +02:00
cpufreq: Drop schedfreq governor
We all should be using (and improving) the schedutil governor now. Get rid of the non-upstream governor. Tested on Hikey. Change-Id: Ic660756536e5da51952738c3c18b94e31f58cd57 Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
This commit is contained in:
parent
d07b5deabc
commit
0dae60cb4e
|
|
@ -224,19 +224,6 @@ config CPU_FREQ_GOV_CONSERVATIVE
|
|||
|
||||
If in doubt, say N.
|
||||
|
||||
config CPU_FREQ_GOV_SCHED
|
||||
bool "'sched' cpufreq governor"
|
||||
depends on CPU_FREQ
|
||||
depends on SMP
|
||||
select CPU_FREQ_GOV_COMMON
|
||||
help
|
||||
'sched' - this governor scales cpu frequency from the
|
||||
scheduler as a function of cpu capacity utilization. It does
|
||||
not evaluate utilization on a periodic basis (as ondemand
|
||||
does) but instead is event-driven by the scheduler.
|
||||
|
||||
If in doubt, say N.
|
||||
|
||||
config CPU_FREQ_GOV_SCHEDUTIL
|
||||
bool "'schedutil' cpufreq policy governor"
|
||||
depends on CPU_FREQ && SMP
|
||||
|
|
|
|||
|
|
@ -40,7 +40,6 @@ extern unsigned int sysctl_sched_min_granularity;
|
|||
extern unsigned int sysctl_sched_wakeup_granularity;
|
||||
extern unsigned int sysctl_sched_child_runs_first;
|
||||
extern unsigned int sysctl_sched_sync_hint_enable;
|
||||
extern unsigned int sysctl_sched_initial_task_util;
|
||||
extern unsigned int sysctl_sched_cstate_aware;
|
||||
#ifdef CONFIG_SCHED_WALT
|
||||
extern unsigned int sysctl_sched_use_walt_cpu_util;
|
||||
|
|
|
|||
|
|
@ -22,5 +22,4 @@ obj-$(CONFIG_SCHED_DEBUG) += debug.o
|
|||
obj-$(CONFIG_SCHED_TUNE) += tune.o
|
||||
obj-$(CONFIG_CGROUP_CPUACCT) += cpuacct.o
|
||||
obj-$(CONFIG_CPU_FREQ) += cpufreq.o
|
||||
obj-$(CONFIG_CPU_FREQ_GOV_SCHED) += cpufreq_sched.o
|
||||
obj-$(CONFIG_CPU_FREQ_GOV_SCHEDUTIL) += cpufreq_schedutil.o
|
||||
|
|
|
|||
|
|
@ -2982,91 +2982,6 @@ unsigned long long task_sched_runtime(struct task_struct *p)
|
|||
return ns;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_CPU_FREQ_GOV_SCHED
|
||||
|
||||
static inline
|
||||
unsigned long add_capacity_margin(unsigned long cpu_capacity)
|
||||
{
|
||||
cpu_capacity = cpu_capacity * capacity_margin;
|
||||
cpu_capacity /= SCHED_CAPACITY_SCALE;
|
||||
return cpu_capacity;
|
||||
}
|
||||
|
||||
static inline
|
||||
unsigned long sum_capacity_reqs(unsigned long cfs_cap,
|
||||
struct sched_capacity_reqs *scr)
|
||||
{
|
||||
unsigned long total = add_capacity_margin(cfs_cap + scr->rt);
|
||||
return total += scr->dl;
|
||||
}
|
||||
|
||||
unsigned long boosted_cpu_util(int cpu);
|
||||
static void sched_freq_tick_pelt(int cpu)
|
||||
{
|
||||
unsigned long cpu_utilization = boosted_cpu_util(cpu);
|
||||
unsigned long capacity_curr = capacity_curr_of(cpu);
|
||||
struct sched_capacity_reqs *scr;
|
||||
|
||||
scr = &per_cpu(cpu_sched_capacity_reqs, cpu);
|
||||
if (sum_capacity_reqs(cpu_utilization, scr) < capacity_curr)
|
||||
return;
|
||||
|
||||
/*
|
||||
* To make free room for a task that is building up its "real"
|
||||
* utilization and to harm its performance the least, request
|
||||
* a jump to a higher OPP as soon as the margin of free capacity
|
||||
* is impacted (specified by capacity_margin).
|
||||
* Remember CPU utilization in sched_capacity_reqs should be normalised.
|
||||
*/
|
||||
cpu_utilization = cpu_utilization * SCHED_CAPACITY_SCALE / capacity_orig_of(cpu);
|
||||
set_cfs_cpu_capacity(cpu, true, cpu_utilization);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_SCHED_WALT
|
||||
static void sched_freq_tick_walt(int cpu)
|
||||
{
|
||||
unsigned long cpu_utilization = cpu_util_freq(cpu);
|
||||
unsigned long capacity_curr = capacity_curr_of(cpu);
|
||||
|
||||
if (walt_disabled || !sysctl_sched_use_walt_cpu_util)
|
||||
return sched_freq_tick_pelt(cpu);
|
||||
|
||||
/*
|
||||
* Add a margin to the WALT utilization to check if we will need to
|
||||
* increase frequency.
|
||||
* NOTE: WALT tracks a single CPU signal for all the scheduling
|
||||
* classes, thus this margin is going to be added to the DL class as
|
||||
* well, which is something we do not do in sched_freq_tick_pelt case.
|
||||
*/
|
||||
if (add_capacity_margin(cpu_utilization) <= capacity_curr)
|
||||
return;
|
||||
|
||||
/*
|
||||
* It is likely that the load is growing so we
|
||||
* keep the added margin in our request as an
|
||||
* extra boost.
|
||||
* Remember CPU utilization in sched_capacity_reqs should be normalised.
|
||||
*/
|
||||
cpu_utilization = cpu_utilization * SCHED_CAPACITY_SCALE / capacity_orig_of(cpu);
|
||||
set_cfs_cpu_capacity(cpu, true, cpu_utilization);
|
||||
|
||||
}
|
||||
#define _sched_freq_tick(cpu) sched_freq_tick_walt(cpu)
|
||||
#else
|
||||
#define _sched_freq_tick(cpu) sched_freq_tick_pelt(cpu)
|
||||
#endif /* CONFIG_SCHED_WALT */
|
||||
|
||||
static void sched_freq_tick(int cpu)
|
||||
{
|
||||
if (!sched_freq())
|
||||
return;
|
||||
|
||||
_sched_freq_tick(cpu);
|
||||
}
|
||||
#else
|
||||
static inline void sched_freq_tick(int cpu) { }
|
||||
#endif /* CONFIG_CPU_FREQ_GOV_SCHED */
|
||||
|
||||
/*
|
||||
* This function gets called by the timer code, with HZ frequency.
|
||||
* We call it with interrupts disabled.
|
||||
|
|
@ -3087,7 +3002,6 @@ void scheduler_tick(void)
|
|||
curr->sched_class->task_tick(rq, curr, 0);
|
||||
update_cpu_load_active(rq);
|
||||
calc_global_load_tick(rq);
|
||||
sched_freq_tick(cpu);
|
||||
raw_spin_unlock(&rq->lock);
|
||||
|
||||
perf_event_task_tick();
|
||||
|
|
|
|||
|
|
@ -1,525 +0,0 @@
|
|||
/*
|
||||
* Copyright (C) 2015 Michael Turquette <mturquette@linaro.org>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License version 2 as
|
||||
* published by the Free Software Foundation.
|
||||
*/
|
||||
|
||||
#include <linux/cpufreq.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/kthread.h>
|
||||
#include <linux/percpu.h>
|
||||
#include <linux/irq_work.h>
|
||||
#include <linux/delay.h>
|
||||
#include <linux/string.h>
|
||||
|
||||
#define CREATE_TRACE_POINTS
|
||||
#include <trace/events/cpufreq_sched.h>
|
||||
|
||||
#include "sched.h"
|
||||
|
||||
#define THROTTLE_DOWN_NSEC 50000000 /* 50ms default */
|
||||
#define THROTTLE_UP_NSEC 500000 /* 500us default */
|
||||
|
||||
struct static_key __read_mostly __sched_freq = STATIC_KEY_INIT_FALSE;
|
||||
static bool __read_mostly cpufreq_driver_slow;
|
||||
|
||||
#ifndef CONFIG_CPU_FREQ_DEFAULT_GOV_SCHED
|
||||
static struct cpufreq_governor cpufreq_gov_sched;
|
||||
#endif
|
||||
|
||||
static DEFINE_PER_CPU(unsigned long, enabled);
|
||||
DEFINE_PER_CPU(struct sched_capacity_reqs, cpu_sched_capacity_reqs);
|
||||
|
||||
struct gov_tunables {
|
||||
struct gov_attr_set attr_set;
|
||||
unsigned int up_throttle_nsec;
|
||||
unsigned int down_throttle_nsec;
|
||||
};
|
||||
|
||||
/**
|
||||
* gov_data - per-policy data internal to the governor
|
||||
* @up_throttle: next throttling period expiry if increasing OPP
|
||||
* @down_throttle: next throttling period expiry if decreasing OPP
|
||||
* @up_throttle_nsec: throttle period length in nanoseconds if increasing OPP
|
||||
* @down_throttle_nsec: throttle period length in nanoseconds if decreasing OPP
|
||||
* @task: worker thread for dvfs transition that may block/sleep
|
||||
* @irq_work: callback used to wake up worker thread
|
||||
* @requested_freq: last frequency requested by the sched governor
|
||||
*
|
||||
* struct gov_data is the per-policy cpufreq_sched-specific data structure. A
|
||||
* per-policy instance of it is created when the cpufreq_sched governor receives
|
||||
* the CPUFREQ_GOV_START condition and a pointer to it exists in the gov_data
|
||||
* member of struct cpufreq_policy.
|
||||
*
|
||||
* Readers of this data must call down_read(policy->rwsem). Writers must
|
||||
* call down_write(policy->rwsem).
|
||||
*/
|
||||
struct gov_data {
|
||||
ktime_t up_throttle;
|
||||
ktime_t down_throttle;
|
||||
struct gov_tunables *tunables;
|
||||
struct list_head tunables_hook;
|
||||
struct task_struct *task;
|
||||
struct irq_work irq_work;
|
||||
unsigned int requested_freq;
|
||||
};
|
||||
|
||||
static void cpufreq_sched_try_driver_target(struct cpufreq_policy *policy,
|
||||
unsigned int freq)
|
||||
{
|
||||
struct gov_data *gd = policy->governor_data;
|
||||
|
||||
/* avoid race with cpufreq_sched_stop */
|
||||
if (!down_write_trylock(&policy->rwsem))
|
||||
return;
|
||||
|
||||
__cpufreq_driver_target(policy, freq, CPUFREQ_RELATION_L);
|
||||
|
||||
gd->up_throttle = ktime_add_ns(ktime_get(),
|
||||
gd->tunables->up_throttle_nsec);
|
||||
gd->down_throttle = ktime_add_ns(ktime_get(),
|
||||
gd->tunables->down_throttle_nsec);
|
||||
up_write(&policy->rwsem);
|
||||
}
|
||||
|
||||
static bool finish_last_request(struct gov_data *gd, unsigned int cur_freq)
|
||||
{
|
||||
ktime_t now = ktime_get();
|
||||
|
||||
ktime_t throttle = gd->requested_freq < cur_freq ?
|
||||
gd->down_throttle : gd->up_throttle;
|
||||
|
||||
if (ktime_after(now, throttle))
|
||||
return false;
|
||||
|
||||
while (1) {
|
||||
int usec_left = ktime_to_ns(ktime_sub(throttle, now));
|
||||
|
||||
usec_left /= NSEC_PER_USEC;
|
||||
trace_cpufreq_sched_throttled(usec_left);
|
||||
usleep_range(usec_left, usec_left + 100);
|
||||
now = ktime_get();
|
||||
if (ktime_after(now, throttle))
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* we pass in struct cpufreq_policy. This is safe because changing out the
|
||||
* policy requires a call to __cpufreq_governor(policy, CPUFREQ_GOV_STOP),
|
||||
* which tears down all of the data structures and __cpufreq_governor(policy,
|
||||
* CPUFREQ_GOV_START) will do a full rebuild, including this kthread with the
|
||||
* new policy pointer
|
||||
*/
|
||||
static int cpufreq_sched_thread(void *data)
|
||||
{
|
||||
struct sched_param param;
|
||||
struct cpufreq_policy *policy;
|
||||
struct gov_data *gd;
|
||||
unsigned int new_request = 0;
|
||||
unsigned int last_request = 0;
|
||||
int ret;
|
||||
|
||||
policy = (struct cpufreq_policy *) data;
|
||||
gd = policy->governor_data;
|
||||
|
||||
param.sched_priority = 50;
|
||||
ret = sched_setscheduler_nocheck(gd->task, SCHED_FIFO, ¶m);
|
||||
if (ret) {
|
||||
pr_warn("%s: failed to set SCHED_FIFO\n", __func__);
|
||||
do_exit(-EINVAL);
|
||||
} else {
|
||||
pr_debug("%s: kthread (%d) set to SCHED_FIFO\n",
|
||||
__func__, gd->task->pid);
|
||||
}
|
||||
|
||||
do {
|
||||
new_request = gd->requested_freq;
|
||||
if (new_request == last_request) {
|
||||
set_current_state(TASK_INTERRUPTIBLE);
|
||||
if (kthread_should_stop())
|
||||
break;
|
||||
schedule();
|
||||
} else {
|
||||
/*
|
||||
* if the frequency thread sleeps while waiting to be
|
||||
* unthrottled, start over to check for a newer request
|
||||
*/
|
||||
if (finish_last_request(gd, policy->cur))
|
||||
continue;
|
||||
last_request = new_request;
|
||||
cpufreq_sched_try_driver_target(policy, new_request);
|
||||
}
|
||||
} while (!kthread_should_stop());
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void cpufreq_sched_irq_work(struct irq_work *irq_work)
|
||||
{
|
||||
struct gov_data *gd;
|
||||
|
||||
gd = container_of(irq_work, struct gov_data, irq_work);
|
||||
if (!gd)
|
||||
return;
|
||||
|
||||
wake_up_process(gd->task);
|
||||
}
|
||||
|
||||
static void update_fdomain_capacity_request(int cpu)
|
||||
{
|
||||
unsigned int freq_new, index_new, cpu_tmp;
|
||||
struct cpufreq_policy *policy;
|
||||
struct gov_data *gd;
|
||||
unsigned long capacity = 0;
|
||||
|
||||
/*
|
||||
* Avoid grabbing the policy if possible. A test is still
|
||||
* required after locking the CPU's policy to avoid racing
|
||||
* with the governor changing.
|
||||
*/
|
||||
if (!per_cpu(enabled, cpu))
|
||||
return;
|
||||
|
||||
policy = cpufreq_cpu_get(cpu);
|
||||
if (IS_ERR_OR_NULL(policy))
|
||||
return;
|
||||
|
||||
if (policy->governor != &cpufreq_gov_sched ||
|
||||
!policy->governor_data)
|
||||
goto out;
|
||||
|
||||
gd = policy->governor_data;
|
||||
|
||||
/* find max capacity requested by cpus in this policy */
|
||||
for_each_cpu(cpu_tmp, policy->cpus) {
|
||||
struct sched_capacity_reqs *scr;
|
||||
|
||||
scr = &per_cpu(cpu_sched_capacity_reqs, cpu_tmp);
|
||||
capacity = max(capacity, scr->total);
|
||||
}
|
||||
|
||||
/* Convert the new maximum capacity request into a cpu frequency */
|
||||
freq_new = capacity * policy->cpuinfo.max_freq >> SCHED_CAPACITY_SHIFT;
|
||||
if (cpufreq_frequency_table_target(policy, policy->freq_table,
|
||||
freq_new, CPUFREQ_RELATION_L,
|
||||
&index_new))
|
||||
goto out;
|
||||
freq_new = policy->freq_table[index_new].frequency;
|
||||
|
||||
if (freq_new > policy->max)
|
||||
freq_new = policy->max;
|
||||
|
||||
if (freq_new < policy->min)
|
||||
freq_new = policy->min;
|
||||
|
||||
trace_cpufreq_sched_request_opp(cpu, capacity, freq_new,
|
||||
gd->requested_freq);
|
||||
if (freq_new == gd->requested_freq)
|
||||
goto out;
|
||||
|
||||
gd->requested_freq = freq_new;
|
||||
|
||||
/*
|
||||
* Throttling is not yet supported on platforms with fast cpufreq
|
||||
* drivers.
|
||||
*/
|
||||
if (cpufreq_driver_slow)
|
||||
irq_work_queue_on(&gd->irq_work, cpu);
|
||||
else
|
||||
cpufreq_sched_try_driver_target(policy, freq_new);
|
||||
|
||||
out:
|
||||
cpufreq_cpu_put(policy);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_SCHED_WALT
|
||||
static inline unsigned long
|
||||
requested_capacity(struct sched_capacity_reqs *scr)
|
||||
{
|
||||
if (!walt_disabled && sysctl_sched_use_walt_cpu_util)
|
||||
return scr->cfs;
|
||||
return scr->cfs + scr->rt;
|
||||
}
|
||||
#else
|
||||
#define requested_capacity(scr) (scr->cfs + scr->rt)
|
||||
#endif
|
||||
|
||||
void update_cpu_capacity_request(int cpu, bool request)
|
||||
{
|
||||
unsigned long new_capacity;
|
||||
struct sched_capacity_reqs *scr;
|
||||
|
||||
/* The rq lock serializes access to the CPU's sched_capacity_reqs. */
|
||||
lockdep_assert_held(&cpu_rq(cpu)->lock);
|
||||
|
||||
scr = &per_cpu(cpu_sched_capacity_reqs, cpu);
|
||||
|
||||
new_capacity = requested_capacity(scr);
|
||||
new_capacity = new_capacity * capacity_margin
|
||||
/ SCHED_CAPACITY_SCALE;
|
||||
new_capacity += scr->dl;
|
||||
|
||||
if (new_capacity == scr->total)
|
||||
return;
|
||||
|
||||
trace_cpufreq_sched_update_capacity(cpu, request, scr, new_capacity);
|
||||
|
||||
scr->total = new_capacity;
|
||||
if (request)
|
||||
update_fdomain_capacity_request(cpu);
|
||||
}
|
||||
|
||||
static inline void set_sched_freq(void)
|
||||
{
|
||||
static_key_slow_inc(&__sched_freq);
|
||||
}
|
||||
|
||||
static inline void clear_sched_freq(void)
|
||||
{
|
||||
static_key_slow_dec(&__sched_freq);
|
||||
}
|
||||
|
||||
/* Tunables */
|
||||
static struct gov_tunables *global_tunables;
|
||||
|
||||
static inline struct gov_tunables *to_tunables(struct gov_attr_set *attr_set)
|
||||
{
|
||||
return container_of(attr_set, struct gov_tunables, attr_set);
|
||||
}
|
||||
|
||||
static ssize_t up_throttle_nsec_show(struct gov_attr_set *attr_set, char *buf)
|
||||
{
|
||||
struct gov_tunables *tunables = to_tunables(attr_set);
|
||||
|
||||
return sprintf(buf, "%u\n", tunables->up_throttle_nsec);
|
||||
}
|
||||
|
||||
static ssize_t up_throttle_nsec_store(struct gov_attr_set *attr_set,
|
||||
const char *buf, size_t count)
|
||||
{
|
||||
struct gov_tunables *tunables = to_tunables(attr_set);
|
||||
int ret;
|
||||
long unsigned int val;
|
||||
|
||||
ret = kstrtoul(buf, 0, &val);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
tunables->up_throttle_nsec = val;
|
||||
return count;
|
||||
}
|
||||
|
||||
static ssize_t down_throttle_nsec_show(struct gov_attr_set *attr_set, char *buf)
|
||||
{
|
||||
struct gov_tunables *tunables = to_tunables(attr_set);
|
||||
|
||||
return sprintf(buf, "%u\n", tunables->down_throttle_nsec);
|
||||
}
|
||||
|
||||
static ssize_t down_throttle_nsec_store(struct gov_attr_set *attr_set,
|
||||
const char *buf, size_t count)
|
||||
{
|
||||
struct gov_tunables *tunables = to_tunables(attr_set);
|
||||
int ret;
|
||||
long unsigned int val;
|
||||
|
||||
ret = kstrtoul(buf, 0, &val);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
tunables->down_throttle_nsec = val;
|
||||
return count;
|
||||
}
|
||||
|
||||
static struct governor_attr up_throttle_nsec = __ATTR_RW(up_throttle_nsec);
|
||||
static struct governor_attr down_throttle_nsec = __ATTR_RW(down_throttle_nsec);
|
||||
|
||||
static struct attribute *schedfreq_attributes[] = {
|
||||
&up_throttle_nsec.attr,
|
||||
&down_throttle_nsec.attr,
|
||||
NULL
|
||||
};
|
||||
|
||||
static struct kobj_type tunables_ktype = {
|
||||
.default_attrs = schedfreq_attributes,
|
||||
.sysfs_ops = &governor_sysfs_ops,
|
||||
};
|
||||
|
||||
static int cpufreq_sched_policy_init(struct cpufreq_policy *policy)
|
||||
{
|
||||
struct gov_data *gd;
|
||||
int cpu;
|
||||
int rc;
|
||||
|
||||
for_each_cpu(cpu, policy->cpus)
|
||||
memset(&per_cpu(cpu_sched_capacity_reqs, cpu), 0,
|
||||
sizeof(struct sched_capacity_reqs));
|
||||
|
||||
gd = kzalloc(sizeof(*gd), GFP_KERNEL);
|
||||
if (!gd)
|
||||
return -ENOMEM;
|
||||
|
||||
policy->governor_data = gd;
|
||||
|
||||
if (!global_tunables) {
|
||||
gd->tunables = kzalloc(sizeof(*gd->tunables), GFP_KERNEL);
|
||||
if (!gd->tunables)
|
||||
goto free_gd;
|
||||
|
||||
gd->tunables->up_throttle_nsec =
|
||||
policy->cpuinfo.transition_latency ?
|
||||
policy->cpuinfo.transition_latency :
|
||||
THROTTLE_UP_NSEC;
|
||||
gd->tunables->down_throttle_nsec =
|
||||
THROTTLE_DOWN_NSEC;
|
||||
|
||||
rc = kobject_init_and_add(&gd->tunables->attr_set.kobj,
|
||||
&tunables_ktype,
|
||||
get_governor_parent_kobj(policy),
|
||||
"%s", cpufreq_gov_sched.name);
|
||||
if (rc)
|
||||
goto free_tunables;
|
||||
|
||||
gov_attr_set_init(&gd->tunables->attr_set,
|
||||
&gd->tunables_hook);
|
||||
|
||||
pr_debug("%s: throttle_threshold = %u [ns]\n",
|
||||
__func__, gd->tunables->up_throttle_nsec);
|
||||
|
||||
if (!have_governor_per_policy())
|
||||
global_tunables = gd->tunables;
|
||||
} else {
|
||||
gd->tunables = global_tunables;
|
||||
gov_attr_set_get(&global_tunables->attr_set,
|
||||
&gd->tunables_hook);
|
||||
}
|
||||
|
||||
policy->governor_data = gd;
|
||||
if (cpufreq_driver_is_slow()) {
|
||||
cpufreq_driver_slow = true;
|
||||
gd->task = kthread_create(cpufreq_sched_thread, policy,
|
||||
"kschedfreq:%d",
|
||||
cpumask_first(policy->related_cpus));
|
||||
if (IS_ERR_OR_NULL(gd->task)) {
|
||||
pr_err("%s: failed to create kschedfreq thread\n",
|
||||
__func__);
|
||||
goto free_tunables;
|
||||
}
|
||||
get_task_struct(gd->task);
|
||||
kthread_bind_mask(gd->task, policy->related_cpus);
|
||||
wake_up_process(gd->task);
|
||||
init_irq_work(&gd->irq_work, cpufreq_sched_irq_work);
|
||||
}
|
||||
|
||||
set_sched_freq();
|
||||
|
||||
return 0;
|
||||
|
||||
free_tunables:
|
||||
kfree(gd->tunables);
|
||||
free_gd:
|
||||
policy->governor_data = NULL;
|
||||
kfree(gd);
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
static int cpufreq_sched_policy_exit(struct cpufreq_policy *policy)
|
||||
{
|
||||
unsigned int count;
|
||||
struct gov_data *gd = policy->governor_data;
|
||||
|
||||
clear_sched_freq();
|
||||
if (cpufreq_driver_slow) {
|
||||
kthread_stop(gd->task);
|
||||
put_task_struct(gd->task);
|
||||
}
|
||||
|
||||
count = gov_attr_set_put(&gd->tunables->attr_set, &gd->tunables_hook);
|
||||
if (!count) {
|
||||
if (!have_governor_per_policy())
|
||||
global_tunables = NULL;
|
||||
kfree(gd->tunables);
|
||||
}
|
||||
|
||||
policy->governor_data = NULL;
|
||||
|
||||
kfree(gd);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int cpufreq_sched_start(struct cpufreq_policy *policy)
|
||||
{
|
||||
int cpu;
|
||||
|
||||
for_each_cpu(cpu, policy->cpus)
|
||||
per_cpu(enabled, cpu) = 1;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void cpufreq_sched_limits(struct cpufreq_policy *policy)
|
||||
{
|
||||
unsigned int clamp_freq;
|
||||
struct gov_data *gd = policy->governor_data;;
|
||||
|
||||
pr_debug("limit event for cpu %u: %u - %u kHz, currently %u kHz\n",
|
||||
policy->cpu, policy->min, policy->max,
|
||||
policy->cur);
|
||||
|
||||
clamp_freq = clamp(gd->requested_freq, policy->min, policy->max);
|
||||
|
||||
if (policy->cur != clamp_freq)
|
||||
__cpufreq_driver_target(policy, clamp_freq, CPUFREQ_RELATION_L);
|
||||
}
|
||||
|
||||
static int cpufreq_sched_stop(struct cpufreq_policy *policy)
|
||||
{
|
||||
int cpu;
|
||||
|
||||
for_each_cpu(cpu, policy->cpus)
|
||||
per_cpu(enabled, cpu) = 0;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int cpufreq_sched_setup(struct cpufreq_policy *policy,
|
||||
unsigned int event)
|
||||
{
|
||||
switch (event) {
|
||||
case CPUFREQ_GOV_POLICY_INIT:
|
||||
return cpufreq_sched_policy_init(policy);
|
||||
case CPUFREQ_GOV_POLICY_EXIT:
|
||||
return cpufreq_sched_policy_exit(policy);
|
||||
case CPUFREQ_GOV_START:
|
||||
return cpufreq_sched_start(policy);
|
||||
case CPUFREQ_GOV_STOP:
|
||||
return cpufreq_sched_stop(policy);
|
||||
case CPUFREQ_GOV_LIMITS:
|
||||
cpufreq_sched_limits(policy);
|
||||
break;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
#ifndef CONFIG_CPU_FREQ_DEFAULT_GOV_SCHED
|
||||
static
|
||||
#endif
|
||||
struct cpufreq_governor cpufreq_gov_sched = {
|
||||
.name = "sched",
|
||||
.governor = cpufreq_sched_setup,
|
||||
.owner = THIS_MODULE,
|
||||
};
|
||||
|
||||
static int __init cpufreq_sched_init(void)
|
||||
{
|
||||
int cpu;
|
||||
|
||||
for_each_cpu(cpu, cpu_possible_mask)
|
||||
per_cpu(enabled, cpu) = 0;
|
||||
return cpufreq_register_governor(&cpufreq_gov_sched);
|
||||
}
|
||||
|
||||
/* Try to make this the default governor */
|
||||
fs_initcall(cpufreq_sched_init);
|
||||
|
|
@ -54,7 +54,6 @@ unsigned int sysctl_sched_latency = 6000000ULL;
|
|||
unsigned int normalized_sysctl_sched_latency = 6000000ULL;
|
||||
|
||||
unsigned int sysctl_sched_sync_hint_enable = 1;
|
||||
unsigned int sysctl_sched_initial_task_util = 0;
|
||||
unsigned int sysctl_sched_cstate_aware = 1;
|
||||
|
||||
#ifdef CONFIG_SCHED_WALT
|
||||
|
|
@ -750,9 +749,7 @@ void init_entity_runnable_average(struct sched_entity *se)
|
|||
sa->load_sum = sa->load_avg * LOAD_AVG_MAX;
|
||||
/*
|
||||
* In previous Android versions, we used to have:
|
||||
* sa->util_avg = sched_freq() ?
|
||||
* sysctl_sched_initial_task_util :
|
||||
* scale_load_down(SCHED_LOAD_SCALE);
|
||||
* sa->util_avg = scale_load_down(SCHED_LOAD_SCALE);
|
||||
* sa->util_sum = sa->util_avg * LOAD_AVG_MAX;
|
||||
* However, that functionality has been moved to enqueue.
|
||||
* It is unclear if we should restore this in enqueue.
|
||||
|
|
@ -4668,21 +4665,6 @@ unsigned long boosted_cpu_util(int cpu);
|
|||
#define boosted_cpu_util(cpu) cpu_util_freq(cpu)
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_SMP
|
||||
static void update_capacity_of(int cpu)
|
||||
{
|
||||
unsigned long req_cap;
|
||||
|
||||
if (!sched_freq())
|
||||
return;
|
||||
|
||||
/* Normalize scale-invariant capacity to cpu. */
|
||||
req_cap = boosted_cpu_util(cpu);
|
||||
req_cap = req_cap * SCHED_CAPACITY_SCALE / capacity_orig_of(cpu);
|
||||
set_cfs_cpu_capacity(cpu, true, req_cap);
|
||||
}
|
||||
#endif
|
||||
|
||||
/*
|
||||
* The enqueue_task method is called before nr_running is
|
||||
* increased. Here we update the fair scheduling stats and
|
||||
|
|
@ -4695,7 +4677,6 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags)
|
|||
struct sched_entity *se = &p->se;
|
||||
#ifdef CONFIG_SMP
|
||||
int task_new = flags & ENQUEUE_WAKEUP_NEW;
|
||||
int task_wakeup = flags & ENQUEUE_WAKEUP;
|
||||
#endif
|
||||
|
||||
/*
|
||||
|
|
@ -4769,16 +4750,6 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags)
|
|||
rq->rd->overutilized = true;
|
||||
trace_sched_overutilized(true);
|
||||
}
|
||||
|
||||
/*
|
||||
* We want to potentially trigger a freq switch
|
||||
* request only for tasks that are waking up; this is
|
||||
* because we get here also during load balancing, but
|
||||
* in these cases it seems wise to trigger as single
|
||||
* request after load balancing is done.
|
||||
*/
|
||||
if (task_new || task_wakeup)
|
||||
update_capacity_of(cpu_of(rq));
|
||||
}
|
||||
|
||||
#endif /* CONFIG_SMP */
|
||||
|
|
@ -4854,25 +4825,8 @@ static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags)
|
|||
*/
|
||||
schedtune_dequeue_task(p, cpu_of(rq));
|
||||
|
||||
if (!se) {
|
||||
if (!se)
|
||||
walt_dec_cumulative_runnable_avg(rq, p);
|
||||
|
||||
/*
|
||||
* We want to potentially trigger a freq switch
|
||||
* request only for tasks that are going to sleep;
|
||||
* this is because we get here also during load
|
||||
* balancing, but in these cases it seems wise to
|
||||
* trigger as single request after load balancing is
|
||||
* done.
|
||||
*/
|
||||
if (task_sleep) {
|
||||
if (rq->cfs.nr_running)
|
||||
update_capacity_of(cpu_of(rq));
|
||||
else if (sched_freq())
|
||||
set_cfs_cpu_capacity(cpu_of(rq), false, 0); /* no normalization required for 0 */
|
||||
}
|
||||
}
|
||||
|
||||
#endif /* CONFIG_SMP */
|
||||
|
||||
hrtick_update(rq);
|
||||
|
|
@ -7709,10 +7663,6 @@ static void attach_one_task(struct rq *rq, struct task_struct *p)
|
|||
{
|
||||
raw_spin_lock(&rq->lock);
|
||||
attach_task(rq, p);
|
||||
/*
|
||||
* We want to potentially raise target_cpu's OPP.
|
||||
*/
|
||||
update_capacity_of(cpu_of(rq));
|
||||
raw_spin_unlock(&rq->lock);
|
||||
}
|
||||
|
||||
|
|
@ -7734,11 +7684,6 @@ static void attach_tasks(struct lb_env *env)
|
|||
attach_task(env->dst_rq, p);
|
||||
}
|
||||
|
||||
/*
|
||||
* We want to potentially raise env.dst_cpu's OPP.
|
||||
*/
|
||||
update_capacity_of(env->dst_cpu);
|
||||
|
||||
raw_spin_unlock(&env->dst_rq->lock);
|
||||
}
|
||||
|
||||
|
|
@ -9081,11 +9026,6 @@ static int load_balance(int this_cpu, struct rq *this_rq,
|
|||
* ld_moved - cumulative load moved across iterations
|
||||
*/
|
||||
cur_ld_moved = detach_tasks(&env);
|
||||
/*
|
||||
* We want to potentially lower env.src_cpu's OPP.
|
||||
*/
|
||||
if (cur_ld_moved)
|
||||
update_capacity_of(env.src_cpu);
|
||||
|
||||
/*
|
||||
* We've detached some tasks from busiest_rq. Every
|
||||
|
|
@ -9310,7 +9250,6 @@ static int idle_balance(struct rq *this_rq)
|
|||
struct sched_domain *sd;
|
||||
int pulled_task = 0;
|
||||
u64 curr_cost = 0;
|
||||
long removed_util=0;
|
||||
|
||||
idle_enter_fair(this_rq);
|
||||
|
||||
|
|
@ -9334,17 +9273,6 @@ static int idle_balance(struct rq *this_rq)
|
|||
|
||||
raw_spin_unlock(&this_rq->lock);
|
||||
|
||||
/*
|
||||
* If removed_util_avg is !0 we most probably migrated some task away
|
||||
* from this_cpu. In this case we might be willing to trigger an OPP
|
||||
* update, but we want to do so if we don't find anybody else to pull
|
||||
* here (we will trigger an OPP update with the pulled task's enqueue
|
||||
* anyway).
|
||||
*
|
||||
* Record removed_util before calling update_blocked_averages, and use
|
||||
* it below (before returning) to see if an OPP update is required.
|
||||
*/
|
||||
removed_util = atomic_long_read(&(this_rq->cfs).removed_util_avg);
|
||||
update_blocked_averages(this_cpu);
|
||||
rcu_read_lock();
|
||||
for_each_domain(this_cpu, sd) {
|
||||
|
|
@ -9409,12 +9337,6 @@ static int idle_balance(struct rq *this_rq)
|
|||
if (pulled_task) {
|
||||
idle_exit_fair(this_rq);
|
||||
this_rq->idle_stamp = 0;
|
||||
} else if (removed_util) {
|
||||
/*
|
||||
* No task pulled and someone has been migrated away.
|
||||
* Good case to trigger an OPP update.
|
||||
*/
|
||||
update_capacity_of(this_cpu);
|
||||
}
|
||||
|
||||
return pulled_task;
|
||||
|
|
@ -9488,13 +9410,8 @@ static int active_load_balance_cpu_stop(void *data)
|
|||
update_rq_clock(busiest_rq);
|
||||
|
||||
p = detach_one_task(&env);
|
||||
if (p) {
|
||||
if (p)
|
||||
schedstat_inc(sd, alb_pushed);
|
||||
/*
|
||||
* We want to potentially lower env.src_cpu's OPP.
|
||||
*/
|
||||
update_capacity_of(env.src_cpu);
|
||||
}
|
||||
else
|
||||
schedstat_inc(sd, alb_failed);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1489,41 +1489,6 @@ static void check_preempt_curr_rt(struct rq *rq, struct task_struct *p, int flag
|
|||
#endif
|
||||
}
|
||||
|
||||
#ifdef CONFIG_SMP
|
||||
static void sched_rt_update_capacity_req(struct rq *rq)
|
||||
{
|
||||
u64 total, used, age_stamp, avg;
|
||||
s64 delta;
|
||||
|
||||
if (!sched_freq())
|
||||
return;
|
||||
|
||||
sched_avg_update(rq);
|
||||
/*
|
||||
* Since we're reading these variables without serialization make sure
|
||||
* we read them once before doing sanity checks on them.
|
||||
*/
|
||||
age_stamp = READ_ONCE(rq->age_stamp);
|
||||
avg = READ_ONCE(rq->rt_avg);
|
||||
delta = rq_clock(rq) - age_stamp;
|
||||
|
||||
if (unlikely(delta < 0))
|
||||
delta = 0;
|
||||
|
||||
total = sched_avg_period() + delta;
|
||||
|
||||
used = div_u64(avg, total);
|
||||
if (unlikely(used > SCHED_CAPACITY_SCALE))
|
||||
used = SCHED_CAPACITY_SCALE;
|
||||
|
||||
set_rt_cpu_capacity(rq->cpu, 1, (unsigned long)(used));
|
||||
}
|
||||
#else
|
||||
static inline void sched_rt_update_capacity_req(struct rq *rq)
|
||||
{ }
|
||||
|
||||
#endif
|
||||
|
||||
static struct sched_rt_entity *pick_next_rt_entity(struct rq *rq,
|
||||
struct rt_rq *rt_rq)
|
||||
{
|
||||
|
|
@ -1592,17 +1557,8 @@ pick_next_task_rt(struct rq *rq, struct task_struct *prev)
|
|||
if (prev->sched_class == &rt_sched_class)
|
||||
update_curr_rt(rq);
|
||||
|
||||
if (!rt_rq->rt_queued) {
|
||||
/*
|
||||
* The next task to be picked on this rq will have a lower
|
||||
* priority than rt tasks so we can spend some time to update
|
||||
* the capacity used by rt tasks based on the last activity.
|
||||
* This value will be the used as an estimation of the next
|
||||
* activity.
|
||||
*/
|
||||
sched_rt_update_capacity_req(rq);
|
||||
if (!rt_rq->rt_queued)
|
||||
return NULL;
|
||||
}
|
||||
|
||||
put_prev_task(rq, prev);
|
||||
|
||||
|
|
@ -2322,9 +2278,6 @@ static void task_tick_rt(struct rq *rq, struct task_struct *p, int queued)
|
|||
|
||||
update_curr_rt(rq);
|
||||
|
||||
if (rq->rt.rt_nr_running)
|
||||
sched_rt_update_capacity_req(rq);
|
||||
|
||||
watchdog(rq, p);
|
||||
|
||||
/*
|
||||
|
|
|
|||
|
|
@ -1628,81 +1628,6 @@ static inline unsigned long cpu_util_freq(int cpu)
|
|||
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_CPU_FREQ_GOV_SCHED
|
||||
#define capacity_max SCHED_CAPACITY_SCALE
|
||||
extern unsigned int capacity_margin;
|
||||
extern struct static_key __sched_freq;
|
||||
|
||||
static inline bool sched_freq(void)
|
||||
{
|
||||
return static_key_false(&__sched_freq);
|
||||
}
|
||||
|
||||
/*
|
||||
* sched_capacity_reqs expects capacity requests to be normalised.
|
||||
* All capacities should sum to the range of 0-1024.
|
||||
*/
|
||||
DECLARE_PER_CPU(struct sched_capacity_reqs, cpu_sched_capacity_reqs);
|
||||
void update_cpu_capacity_request(int cpu, bool request);
|
||||
|
||||
static inline void set_cfs_cpu_capacity(int cpu, bool request,
|
||||
unsigned long capacity)
|
||||
{
|
||||
struct sched_capacity_reqs *scr = &per_cpu(cpu_sched_capacity_reqs, cpu);
|
||||
|
||||
#ifdef CONFIG_SCHED_WALT
|
||||
if (!walt_disabled && sysctl_sched_use_walt_cpu_util) {
|
||||
int rtdl = scr->rt + scr->dl;
|
||||
/*
|
||||
* WALT tracks the utilization of a CPU considering the load
|
||||
* generated by all the scheduling classes.
|
||||
* Since the following call to:
|
||||
* update_cpu_capacity
|
||||
* is already adding the RT and DL utilizations let's remove
|
||||
* these contributions from the WALT signal.
|
||||
*/
|
||||
if (capacity > rtdl)
|
||||
capacity -= rtdl;
|
||||
else
|
||||
capacity = 0;
|
||||
}
|
||||
#endif
|
||||
if (scr->cfs != capacity) {
|
||||
scr->cfs = capacity;
|
||||
update_cpu_capacity_request(cpu, request);
|
||||
}
|
||||
}
|
||||
|
||||
static inline void set_rt_cpu_capacity(int cpu, bool request,
|
||||
unsigned long capacity)
|
||||
{
|
||||
if (per_cpu(cpu_sched_capacity_reqs, cpu).rt != capacity) {
|
||||
per_cpu(cpu_sched_capacity_reqs, cpu).rt = capacity;
|
||||
update_cpu_capacity_request(cpu, request);
|
||||
}
|
||||
}
|
||||
|
||||
static inline void set_dl_cpu_capacity(int cpu, bool request,
|
||||
unsigned long capacity)
|
||||
{
|
||||
if (per_cpu(cpu_sched_capacity_reqs, cpu).dl != capacity) {
|
||||
per_cpu(cpu_sched_capacity_reqs, cpu).dl = capacity;
|
||||
update_cpu_capacity_request(cpu, request);
|
||||
}
|
||||
}
|
||||
#else
|
||||
static inline bool sched_freq(void) { return false; }
|
||||
static inline void set_cfs_cpu_capacity(int cpu, bool request,
|
||||
unsigned long capacity)
|
||||
{ }
|
||||
static inline void set_rt_cpu_capacity(int cpu, bool request,
|
||||
unsigned long capacity)
|
||||
{ }
|
||||
static inline void set_dl_cpu_capacity(int cpu, bool request,
|
||||
unsigned long capacity)
|
||||
{ }
|
||||
#endif
|
||||
|
||||
static inline void sched_rt_avg_update(struct rq *rq, u64 rt_delta)
|
||||
{
|
||||
rq->rt_avg += rt_delta * arch_scale_freq_capacity(NULL, cpu_of(rq));
|
||||
|
|
|
|||
|
|
@ -342,13 +342,6 @@ static struct ctl_table kern_table[] = {
|
|||
.proc_handler = proc_dointvec,
|
||||
},
|
||||
#endif
|
||||
{
|
||||
.procname = "sched_initial_task_util",
|
||||
.data = &sysctl_sched_initial_task_util,
|
||||
.maxlen = sizeof(unsigned int),
|
||||
.mode = 0644,
|
||||
.proc_handler = proc_dointvec,
|
||||
},
|
||||
{
|
||||
.procname = "sched_cstate_aware",
|
||||
.data = &sysctl_sched_cstate_aware,
|
||||
|
|
|
|||
Loading…
Reference in New Issue
Block a user