Merge remote-tracking branch 'lsk/v3.10/topic/big.LITTLE' into linux-linaro-lsk

This commit is contained in:
Mark Brown 2013-07-18 16:42:46 +01:00
commit 38b5268356
20 changed files with 1873 additions and 66 deletions

View File

@ -16,6 +16,9 @@ Required properties:
"arm,arm1176-pmu"
"arm,arm1136-pmu"
- interrupts : 1 combined interrupt or 1 per core.
- cluster : a phandle to the cluster to which it belongs
If there are more than one cluster with same CPU type
then there should be separate PMU nodes per cluster.
Example:

View File

@ -1240,6 +1240,15 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
See comment before ip2_setup() in
drivers/char/ip2/ip2base.c.
irqaffinity= [SMP] Set the default irq affinity mask
Format:
<cpu number>,...,<cpu number>
or
<cpu number>-<cpu number>
(must be a positive range in ascending order)
or a mixture
<cpu number>,...,<cpu number>-<cpu number>
irqfixup [HW]
When an interrupt is not handled search all handlers
for it. Intended to get systems with badly broken

View File

@ -1494,6 +1494,90 @@ config SCHED_SMT
MultiThreading at a cost of slightly increased overhead in some
places. If unsure say N here.
config DISABLE_CPU_SCHED_DOMAIN_BALANCE
bool "(EXPERIMENTAL) Disable CPU level scheduler load-balancing"
help
Disables scheduler load-balancing at CPU sched domain level.
config SCHED_HMP
bool "(EXPERIMENTAL) Heterogenous multiprocessor scheduling"
depends on DISABLE_CPU_SCHED_DOMAIN_BALANCE && SCHED_MC && FAIR_GROUP_SCHED && !SCHED_AUTOGROUP
help
Experimental scheduler optimizations for heterogeneous platforms.
Attempts to introspectively select task affinity to optimize power
and performance. Basic support for multiple (>2) cpu types is in place,
but it has only been tested with two types of cpus.
There is currently no support for migration of task groups, hence
!SCHED_AUTOGROUP. Furthermore, normal load-balancing must be disabled
between cpus of different type (DISABLE_CPU_SCHED_DOMAIN_BALANCE).
config SCHED_HMP_PRIO_FILTER
bool "(EXPERIMENTAL) Filter HMP migrations by task priority"
depends on SCHED_HMP
help
Enables task priority based HMP migration filter. Any task with
a NICE value above the threshold will always be on low-power cpus
with less compute capacity.
config SCHED_HMP_PRIO_FILTER_VAL
int "NICE priority threshold"
default 5
depends on SCHED_HMP_PRIO_FILTER
config HMP_FAST_CPU_MASK
string "HMP scheduler fast CPU mask"
depends on SCHED_HMP
help
Leave empty to use device tree information.
Specify the cpuids of the fast CPUs in the system as a list string,
e.g. cpuid 0+1 should be specified as 0-1.
config HMP_SLOW_CPU_MASK
string "HMP scheduler slow CPU mask"
depends on SCHED_HMP
help
Leave empty to use device tree information.
Specify the cpuids of the slow CPUs in the system as a list string,
e.g. cpuid 0+1 should be specified as 0-1.
config HMP_VARIABLE_SCALE
bool "Allows changing the load tracking scale through sysfs"
depends on SCHED_HMP
help
When turned on, this option exports the thresholds and load average
period value for the load tracking patches through sysfs.
The values can be modified to change the rate of load accumulation
and the thresholds used for HMP migration.
The load_avg_period_ms is the time in ms to reach a load average of
0.5 for an idle task of 0 load average ratio that start a busy loop.
The up_threshold and down_threshold is the value to go to a faster
CPU or to go back to a slower cpu.
The {up,down}_threshold are devided by 1024 before being compared
to the load average.
For examples, with load_avg_period_ms = 128 and up_threshold = 512,
a running task with a load of 0 will be migrated to a bigger CPU after
128ms, because after 128ms its load_avg_ratio is 0.5 and the real
up_threshold is 0.5.
This patch has the same behavior as changing the Y of the load
average computation to
(1002/1024)^(LOAD_AVG_PERIOD/load_avg_period_ms)
but it remove intermadiate overflows in computation.
config HMP_FREQUENCY_INVARIANT_SCALE
bool "(EXPERIMENTAL) Frequency-Invariant Tracked Load for HMP"
depends on HMP_VARIABLE_SCALE && CPU_FREQ
help
Scales the current load contribution in line with the frequency
of the CPU that the task was executed on.
In this version, we use a simple linear scale derived from the
maximum frequency reported by CPUFreq.
Restricting tracked load to be scaled by the CPU's frequency
represents the consumption of possible compute capacity
(rather than consumption of actual instantaneous capacity as
normal) and allows the HMP migration's simple threshold
migration strategy to interact more predictably with CPUFreq's
asynchronous compute capacity changes.
config HAVE_ARM_SCU
bool
help

View File

@ -62,9 +62,19 @@ struct pmu_hw_events {
raw_spinlock_t pmu_lock;
};
struct cpupmu_regs {
u32 pmc;
u32 pmcntenset;
u32 pmuseren;
u32 pmintenset;
u32 pmxevttype[8];
u32 pmxevtcnt[8];
};
struct arm_pmu {
struct pmu pmu;
cpumask_t active_irqs;
cpumask_t valid_cpus;
char *name;
irqreturn_t (*handle_irq)(int irq_num, void *dev);
void (*enable)(struct perf_event *event);
@ -81,6 +91,8 @@ struct arm_pmu {
int (*request_irq)(struct arm_pmu *, irq_handler_t handler);
void (*free_irq)(struct arm_pmu *);
int (*map_event)(struct perf_event *event);
void (*save_regs)(struct arm_pmu *, struct cpupmu_regs *);
void (*restore_regs)(struct arm_pmu *, struct cpupmu_regs *);
int num_events;
atomic_t active_events;
struct mutex reserve_mutex;

View File

@ -26,11 +26,45 @@ extern struct cputopo_arm cpu_topology[NR_CPUS];
void init_cpu_topology(void);
void store_cpu_topology(unsigned int cpuid);
const struct cpumask *cpu_coregroup_mask(int cpu);
int cluster_to_logical_mask(unsigned int socket_id, cpumask_t *cluster_mask);
#ifdef CONFIG_DISABLE_CPU_SCHED_DOMAIN_BALANCE
/* Common values for CPUs */
#ifndef SD_CPU_INIT
#define SD_CPU_INIT (struct sched_domain) { \
.min_interval = 1, \
.max_interval = 4, \
.busy_factor = 64, \
.imbalance_pct = 125, \
.cache_nice_tries = 1, \
.busy_idx = 2, \
.idle_idx = 1, \
.newidle_idx = 0, \
.wake_idx = 0, \
.forkexec_idx = 0, \
\
.flags = 0*SD_LOAD_BALANCE \
| 1*SD_BALANCE_NEWIDLE \
| 1*SD_BALANCE_EXEC \
| 1*SD_BALANCE_FORK \
| 0*SD_BALANCE_WAKE \
| 1*SD_WAKE_AFFINE \
| 0*SD_SHARE_CPUPOWER \
| 0*SD_SHARE_PKG_RESOURCES \
| 0*SD_SERIALIZE \
, \
.last_balance = jiffies, \
.balance_interval = 1, \
}
#endif
#endif /* CONFIG_DISABLE_CPU_SCHED_DOMAIN_BALANCE */
#else
static inline void init_cpu_topology(void) { }
static inline void store_cpu_topology(unsigned int cpuid) { }
static inline int cluster_to_logical_mask(unsigned int socket_id,
cpumask_t *cluster_mask) { return -EINVAL; }
#endif

View File

@ -1049,7 +1049,8 @@ static struct notifier_block dbg_cpu_pm_nb = {
static void __init pm_init(void)
{
cpu_pm_register_notifier(&dbg_cpu_pm_nb);
if (has_ossr)
cpu_pm_register_notifier(&dbg_cpu_pm_nb);
}
#else
static inline void pm_init(void)

View File

@ -12,6 +12,7 @@
*/
#define pr_fmt(fmt) "hw perfevents: " fmt
#include <linux/cpumask.h>
#include <linux/kernel.h>
#include <linux/platform_device.h>
#include <linux/pm_runtime.h>
@ -81,6 +82,9 @@ armpmu_map_event(struct perf_event *event,
return armpmu_map_cache_event(cache_map, config);
case PERF_TYPE_RAW:
return armpmu_map_raw_event(raw_event_mask, config);
default:
if (event->attr.type >= PERF_TYPE_MAX)
return armpmu_map_raw_event(raw_event_mask, config);
}
return -ENOENT;
@ -158,6 +162,8 @@ armpmu_stop(struct perf_event *event, int flags)
struct arm_pmu *armpmu = to_arm_pmu(event->pmu);
struct hw_perf_event *hwc = &event->hw;
if (!cpumask_test_cpu(smp_processor_id(), &armpmu->valid_cpus))
return;
/*
* ARM pmu always has to update the counter, so ignore
* PERF_EF_UPDATE, see comments in armpmu_start().
@ -174,6 +180,8 @@ static void armpmu_start(struct perf_event *event, int flags)
struct arm_pmu *armpmu = to_arm_pmu(event->pmu);
struct hw_perf_event *hwc = &event->hw;
if (!cpumask_test_cpu(smp_processor_id(), &armpmu->valid_cpus))
return;
/*
* ARM pmu always has to reprogram the period, so ignore
* PERF_EF_RELOAD, see the comment below.
@ -201,6 +209,9 @@ armpmu_del(struct perf_event *event, int flags)
struct hw_perf_event *hwc = &event->hw;
int idx = hwc->idx;
if (!cpumask_test_cpu(smp_processor_id(), &armpmu->valid_cpus))
return;
armpmu_stop(event, PERF_EF_UPDATE);
hw_events->events[idx] = NULL;
clear_bit(idx, hw_events->used_mask);
@ -217,6 +228,10 @@ armpmu_add(struct perf_event *event, int flags)
int idx;
int err = 0;
/* An event following a process won't be stopped earlier */
if (!cpumask_test_cpu(smp_processor_id(), &armpmu->valid_cpus))
return 0;
perf_pmu_disable(event->pmu);
/* If we don't have a space for the counter then finish early. */
@ -416,6 +431,10 @@ static int armpmu_event_init(struct perf_event *event)
int err = 0;
atomic_t *active_events = &armpmu->active_events;
if (event->cpu != -1 &&
!cpumask_test_cpu(event->cpu, &armpmu->valid_cpus))
return -ENOENT;
/* does not support taken branch sampling */
if (has_branch_stack(event))
return -EOPNOTSUPP;

View File

@ -19,6 +19,7 @@
#define pr_fmt(fmt) "CPU PMU: " fmt
#include <linux/bitmap.h>
#include <linux/cpu_pm.h>
#include <linux/export.h>
#include <linux/kernel.h>
#include <linux/of.h>
@ -31,33 +32,36 @@
#include <asm/pmu.h>
/* Set at runtime when we know what CPU type we are. */
static struct arm_pmu *cpu_pmu;
static DEFINE_PER_CPU(struct arm_pmu *, cpu_pmu);
static DEFINE_PER_CPU(struct perf_event * [ARMPMU_MAX_HWEVENTS], hw_events);
static DEFINE_PER_CPU(unsigned long [BITS_TO_LONGS(ARMPMU_MAX_HWEVENTS)], used_mask);
static DEFINE_PER_CPU(struct pmu_hw_events, cpu_hw_events);
static DEFINE_PER_CPU(struct cpupmu_regs, cpu_pmu_regs);
/*
* Despite the names, these two functions are CPU-specific and are used
* by the OProfile/perf code.
*/
const char *perf_pmu_name(void)
{
if (!cpu_pmu)
struct arm_pmu *pmu = per_cpu(cpu_pmu, 0);
if (!pmu)
return NULL;
return cpu_pmu->name;
return pmu->name;
}
EXPORT_SYMBOL_GPL(perf_pmu_name);
int perf_num_counters(void)
{
int max_events = 0;
struct arm_pmu *pmu = per_cpu(cpu_pmu, 0);
if (cpu_pmu != NULL)
max_events = cpu_pmu->num_events;
if (!pmu)
return 0;
return max_events;
return pmu->num_events;
}
EXPORT_SYMBOL_GPL(perf_num_counters);
@ -75,11 +79,13 @@ static void cpu_pmu_free_irq(struct arm_pmu *cpu_pmu)
{
int i, irq, irqs;
struct platform_device *pmu_device = cpu_pmu->plat_device;
int cpu = -1;
irqs = min(pmu_device->num_resources, num_possible_cpus());
for (i = 0; i < irqs; ++i) {
if (!cpumask_test_and_clear_cpu(i, &cpu_pmu->active_irqs))
cpu = cpumask_next(cpu, &cpu_pmu->valid_cpus);
if (!cpumask_test_and_clear_cpu(cpu, &cpu_pmu->active_irqs))
continue;
irq = platform_get_irq(pmu_device, i);
if (irq >= 0)
@ -91,6 +97,7 @@ static int cpu_pmu_request_irq(struct arm_pmu *cpu_pmu, irq_handler_t handler)
{
int i, err, irq, irqs;
struct platform_device *pmu_device = cpu_pmu->plat_device;
int cpu = -1;
if (!pmu_device)
return -ENODEV;
@ -103,6 +110,7 @@ static int cpu_pmu_request_irq(struct arm_pmu *cpu_pmu, irq_handler_t handler)
for (i = 0; i < irqs; ++i) {
err = 0;
cpu = cpumask_next(cpu, &cpu_pmu->valid_cpus);
irq = platform_get_irq(pmu_device, i);
if (irq < 0)
continue;
@ -112,7 +120,7 @@ static int cpu_pmu_request_irq(struct arm_pmu *cpu_pmu, irq_handler_t handler)
* assume that we're running on a uniprocessor machine and
* continue. Otherwise, continue without this interrupt.
*/
if (irq_set_affinity(irq, cpumask_of(i)) && irqs > 1) {
if (irq_set_affinity(irq, cpumask_of(cpu)) && irqs > 1) {
pr_warning("unable to set irq affinity (irq=%d, cpu=%u)\n",
irq, i);
continue;
@ -126,7 +134,7 @@ static int cpu_pmu_request_irq(struct arm_pmu *cpu_pmu, irq_handler_t handler)
return err;
}
cpumask_set_cpu(i, &cpu_pmu->active_irqs);
cpumask_set_cpu(cpu, &cpu_pmu->active_irqs);
}
return 0;
@ -135,7 +143,7 @@ static int cpu_pmu_request_irq(struct arm_pmu *cpu_pmu, irq_handler_t handler)
static void cpu_pmu_init(struct arm_pmu *cpu_pmu)
{
int cpu;
for_each_possible_cpu(cpu) {
for_each_cpu_mask(cpu, cpu_pmu->valid_cpus) {
struct pmu_hw_events *events = &per_cpu(cpu_hw_events, cpu);
events->events = per_cpu(hw_events, cpu);
events->used_mask = per_cpu(used_mask, cpu);
@ -148,7 +156,7 @@ static void cpu_pmu_init(struct arm_pmu *cpu_pmu)
/* Ensure the PMU has sane values out of reset. */
if (cpu_pmu->reset)
on_each_cpu(cpu_pmu->reset, cpu_pmu, 1);
on_each_cpu_mask(&cpu_pmu->valid_cpus, cpu_pmu->reset, cpu_pmu, 1);
}
/*
@ -160,21 +168,46 @@ static void cpu_pmu_init(struct arm_pmu *cpu_pmu)
static int __cpuinit cpu_pmu_notify(struct notifier_block *b,
unsigned long action, void *hcpu)
{
struct arm_pmu *pmu = per_cpu(cpu_pmu, (long)hcpu);
if ((action & ~CPU_TASKS_FROZEN) != CPU_STARTING)
return NOTIFY_DONE;
if (cpu_pmu && cpu_pmu->reset)
cpu_pmu->reset(cpu_pmu);
if (pmu && pmu->reset)
pmu->reset(pmu);
else
return NOTIFY_DONE;
return NOTIFY_OK;
}
static int cpu_pmu_pm_notify(struct notifier_block *b,
unsigned long action, void *hcpu)
{
int cpu = smp_processor_id();
struct arm_pmu *pmu = per_cpu(cpu_pmu, cpu);
struct cpupmu_regs *pmuregs = &per_cpu(cpu_pmu_regs, cpu);
if (!pmu)
return NOTIFY_DONE;
if (action == CPU_PM_ENTER && pmu->save_regs) {
pmu->save_regs(pmu, pmuregs);
} else if (action == CPU_PM_EXIT && pmu->restore_regs) {
pmu->restore_regs(pmu, pmuregs);
}
return NOTIFY_OK;
}
static struct notifier_block __cpuinitdata cpu_pmu_hotplug_notifier = {
.notifier_call = cpu_pmu_notify,
};
static struct notifier_block __cpuinitdata cpu_pmu_pm_notifier = {
.notifier_call = cpu_pmu_pm_notify,
};
/*
* PMU platform driver and devicetree bindings.
*/
@ -246,6 +279,9 @@ static int probe_current_pmu(struct arm_pmu *pmu)
}
}
/* assume PMU support all the CPUs in this case */
cpumask_setall(&pmu->valid_cpus);
put_cpu();
return ret;
}
@ -253,15 +289,10 @@ static int probe_current_pmu(struct arm_pmu *pmu)
static int cpu_pmu_device_probe(struct platform_device *pdev)
{
const struct of_device_id *of_id;
int (*init_fn)(struct arm_pmu *);
struct device_node *node = pdev->dev.of_node;
struct arm_pmu *pmu;
int ret = -ENODEV;
if (cpu_pmu) {
pr_info("attempt to register multiple PMU devices!");
return -ENOSPC;
}
int ret = 0;
int cpu;
pmu = kzalloc(sizeof(struct arm_pmu), GFP_KERNEL);
if (!pmu) {
@ -270,8 +301,28 @@ static int cpu_pmu_device_probe(struct platform_device *pdev)
}
if (node && (of_id = of_match_node(cpu_pmu_of_device_ids, pdev->dev.of_node))) {
init_fn = of_id->data;
ret = init_fn(pmu);
smp_call_func_t init_fn = (smp_call_func_t)of_id->data;
struct device_node *ncluster;
int cluster = -1;
cpumask_t sibling_mask;
ncluster = of_parse_phandle(node, "cluster", 0);
if (ncluster) {
int len;
const u32 *hwid;
hwid = of_get_property(ncluster, "reg", &len);
if (hwid && len == 4)
cluster = be32_to_cpup(hwid);
}
/* set sibling mask to all cpu mask if socket is not specified */
if (cluster == -1 ||
cluster_to_logical_mask(cluster, &sibling_mask))
cpumask_setall(&sibling_mask);
smp_call_function_any(&sibling_mask, init_fn, pmu, 1);
/* now set the valid_cpus after init */
cpumask_copy(&pmu->valid_cpus, &sibling_mask);
} else {
ret = probe_current_pmu(pmu);
}
@ -281,10 +332,12 @@ static int cpu_pmu_device_probe(struct platform_device *pdev)
goto out_free;
}
cpu_pmu = pmu;
cpu_pmu->plat_device = pdev;
cpu_pmu_init(cpu_pmu);
ret = armpmu_register(cpu_pmu, PERF_TYPE_RAW);
for_each_cpu_mask(cpu, pmu->valid_cpus)
per_cpu(cpu_pmu, cpu) = pmu;
pmu->plat_device = pdev;
cpu_pmu_init(pmu);
ret = armpmu_register(pmu, -1);
if (!ret)
return 0;
@ -313,9 +366,17 @@ static int __init register_pmu_driver(void)
if (err)
return err;
err = platform_driver_register(&cpu_pmu_driver);
if (err)
err = cpu_pm_register_notifier(&cpu_pmu_pm_notifier);
if (err) {
unregister_cpu_notifier(&cpu_pmu_hotplug_notifier);
return err;
}
err = platform_driver_register(&cpu_pmu_driver);
if (err) {
cpu_pm_unregister_notifier(&cpu_pmu_pm_notifier);
unregister_cpu_notifier(&cpu_pmu_hotplug_notifier);
}
return err;
}

View File

@ -950,6 +950,51 @@ static void armv7_pmnc_dump_regs(struct arm_pmu *cpu_pmu)
}
#endif
static void armv7pmu_save_regs(struct arm_pmu *cpu_pmu,
struct cpupmu_regs *regs)
{
unsigned int cnt;
asm volatile("mrc p15, 0, %0, c9, c12, 0" : "=r" (regs->pmc));
if (!(regs->pmc & ARMV7_PMNC_E))
return;
asm volatile("mrc p15, 0, %0, c9, c12, 1" : "=r" (regs->pmcntenset));
asm volatile("mrc p15, 0, %0, c9, c14, 0" : "=r" (regs->pmuseren));
asm volatile("mrc p15, 0, %0, c9, c14, 1" : "=r" (regs->pmintenset));
asm volatile("mrc p15, 0, %0, c9, c13, 0" : "=r" (regs->pmxevtcnt[0]));
for (cnt = ARMV7_IDX_COUNTER0;
cnt <= ARMV7_IDX_COUNTER_LAST(cpu_pmu); cnt++) {
armv7_pmnc_select_counter(cnt);
asm volatile("mrc p15, 0, %0, c9, c13, 1"
: "=r"(regs->pmxevttype[cnt]));
asm volatile("mrc p15, 0, %0, c9, c13, 2"
: "=r"(regs->pmxevtcnt[cnt]));
}
return;
}
static void armv7pmu_restore_regs(struct arm_pmu *cpu_pmu,
struct cpupmu_regs *regs)
{
unsigned int cnt;
if (!(regs->pmc & ARMV7_PMNC_E))
return;
asm volatile("mcr p15, 0, %0, c9, c12, 1" : : "r" (regs->pmcntenset));
asm volatile("mcr p15, 0, %0, c9, c14, 0" : : "r" (regs->pmuseren));
asm volatile("mcr p15, 0, %0, c9, c14, 1" : : "r" (regs->pmintenset));
asm volatile("mcr p15, 0, %0, c9, c13, 0" : : "r" (regs->pmxevtcnt[0]));
for (cnt = ARMV7_IDX_COUNTER0;
cnt <= ARMV7_IDX_COUNTER_LAST(cpu_pmu); cnt++) {
armv7_pmnc_select_counter(cnt);
asm volatile("mcr p15, 0, %0, c9, c13, 1"
: : "r"(regs->pmxevttype[cnt]));
asm volatile("mcr p15, 0, %0, c9, c13, 2"
: : "r"(regs->pmxevtcnt[cnt]));
}
asm volatile("mcr p15, 0, %0, c9, c12, 0" : : "r" (regs->pmc));
}
static void armv7pmu_enable_event(struct perf_event *event)
{
unsigned long flags;
@ -1223,6 +1268,8 @@ static void armv7pmu_init(struct arm_pmu *cpu_pmu)
cpu_pmu->start = armv7pmu_start;
cpu_pmu->stop = armv7pmu_stop;
cpu_pmu->reset = armv7pmu_reset;
cpu_pmu->save_regs = armv7pmu_save_regs;
cpu_pmu->restore_regs = armv7pmu_restore_regs;
cpu_pmu->max_period = (1LLU << 32) - 1;
};
@ -1240,7 +1287,7 @@ static u32 armv7_read_num_pmnc_events(void)
static int armv7_a8_pmu_init(struct arm_pmu *cpu_pmu)
{
armv7pmu_init(cpu_pmu);
cpu_pmu->name = "ARMv7 Cortex-A8";
cpu_pmu->name = "ARMv7_Cortex_A8";
cpu_pmu->map_event = armv7_a8_map_event;
cpu_pmu->num_events = armv7_read_num_pmnc_events();
return 0;
@ -1249,7 +1296,7 @@ static int armv7_a8_pmu_init(struct arm_pmu *cpu_pmu)
static int armv7_a9_pmu_init(struct arm_pmu *cpu_pmu)
{
armv7pmu_init(cpu_pmu);
cpu_pmu->name = "ARMv7 Cortex-A9";
cpu_pmu->name = "ARMv7_Cortex_A9";
cpu_pmu->map_event = armv7_a9_map_event;
cpu_pmu->num_events = armv7_read_num_pmnc_events();
return 0;
@ -1258,7 +1305,7 @@ static int armv7_a9_pmu_init(struct arm_pmu *cpu_pmu)
static int armv7_a5_pmu_init(struct arm_pmu *cpu_pmu)
{
armv7pmu_init(cpu_pmu);
cpu_pmu->name = "ARMv7 Cortex-A5";
cpu_pmu->name = "ARMv7_Cortex_A5";
cpu_pmu->map_event = armv7_a5_map_event;
cpu_pmu->num_events = armv7_read_num_pmnc_events();
return 0;
@ -1267,7 +1314,7 @@ static int armv7_a5_pmu_init(struct arm_pmu *cpu_pmu)
static int armv7_a15_pmu_init(struct arm_pmu *cpu_pmu)
{
armv7pmu_init(cpu_pmu);
cpu_pmu->name = "ARMv7 Cortex-A15";
cpu_pmu->name = "ARMv7_Cortex_A15";
cpu_pmu->map_event = armv7_a15_map_event;
cpu_pmu->num_events = armv7_read_num_pmnc_events();
cpu_pmu->set_event_filter = armv7pmu_set_event_filter;
@ -1277,7 +1324,7 @@ static int armv7_a15_pmu_init(struct arm_pmu *cpu_pmu)
static int armv7_a7_pmu_init(struct arm_pmu *cpu_pmu)
{
armv7pmu_init(cpu_pmu);
cpu_pmu->name = "ARMv7 Cortex-A7";
cpu_pmu->name = "ARMv7_Cortex_A7";
cpu_pmu->map_event = armv7_a7_map_event;
cpu_pmu->num_events = armv7_read_num_pmnc_events();
cpu_pmu->set_event_filter = armv7pmu_set_event_filter;

View File

@ -23,6 +23,7 @@
#include <linux/slab.h>
#include <asm/cputype.h>
#include <asm/smp_plat.h>
#include <asm/topology.h>
/*
@ -289,6 +290,140 @@ void store_cpu_topology(unsigned int cpuid)
cpu_topology[cpuid].socket_id, mpidr);
}
#ifdef CONFIG_SCHED_HMP
static const char * const little_cores[] = {
"arm,cortex-a7",
NULL,
};
static bool is_little_cpu(struct device_node *cn)
{
const char * const *lc;
for (lc = little_cores; *lc; lc++)
if (of_device_is_compatible(cn, *lc))
return true;
return false;
}
void __init arch_get_fast_and_slow_cpus(struct cpumask *fast,
struct cpumask *slow)
{
struct device_node *cn = NULL;
int cpu;
cpumask_clear(fast);
cpumask_clear(slow);
/*
* Use the config options if they are given. This helps testing
* HMP scheduling on systems without a big.LITTLE architecture.
*/
if (strlen(CONFIG_HMP_FAST_CPU_MASK) && strlen(CONFIG_HMP_SLOW_CPU_MASK)) {
if (cpulist_parse(CONFIG_HMP_FAST_CPU_MASK, fast))
WARN(1, "Failed to parse HMP fast cpu mask!\n");
if (cpulist_parse(CONFIG_HMP_SLOW_CPU_MASK, slow))
WARN(1, "Failed to parse HMP slow cpu mask!\n");
return;
}
/*
* Else, parse device tree for little cores.
*/
while ((cn = of_find_node_by_type(cn, "cpu"))) {
const u32 *mpidr;
int len;
mpidr = of_get_property(cn, "reg", &len);
if (!mpidr || len != 4) {
pr_err("* %s missing reg property\n", cn->full_name);
continue;
}
cpu = get_logical_index(be32_to_cpup(mpidr));
if (cpu == -EINVAL) {
pr_err("couldn't get logical index for mpidr %x\n",
be32_to_cpup(mpidr));
break;
}
if (is_little_cpu(cn))
cpumask_set_cpu(cpu, slow);
else
cpumask_set_cpu(cpu, fast);
}
if (!cpumask_empty(fast) && !cpumask_empty(slow))
return;
/*
* We didn't find both big and little cores so let's call all cores
* fast as this will keep the system running, with all cores being
* treated equal.
*/
cpumask_setall(fast);
cpumask_clear(slow);
}
struct cpumask hmp_slow_cpu_mask;
void __init arch_get_hmp_domains(struct list_head *hmp_domains_list)
{
struct cpumask hmp_fast_cpu_mask;
struct hmp_domain *domain;
arch_get_fast_and_slow_cpus(&hmp_fast_cpu_mask, &hmp_slow_cpu_mask);
/*
* Initialize hmp_domains
* Must be ordered with respect to compute capacity.
* Fastest domain at head of list.
*/
if(!cpumask_empty(&hmp_slow_cpu_mask)) {
domain = (struct hmp_domain *)
kmalloc(sizeof(struct hmp_domain), GFP_KERNEL);
cpumask_copy(&domain->possible_cpus, &hmp_slow_cpu_mask);
cpumask_and(&domain->cpus, cpu_online_mask, &domain->possible_cpus);
list_add(&domain->hmp_domains, hmp_domains_list);
}
domain = (struct hmp_domain *)
kmalloc(sizeof(struct hmp_domain), GFP_KERNEL);
cpumask_copy(&domain->possible_cpus, &hmp_fast_cpu_mask);
cpumask_and(&domain->cpus, cpu_online_mask, &domain->possible_cpus);
list_add(&domain->hmp_domains, hmp_domains_list);
}
#endif /* CONFIG_SCHED_HMP */
/*
* cluster_to_logical_mask - return cpu logical mask of CPUs in a cluster
* @socket_id: cluster HW identifier
* @cluster_mask: the cpumask location to be initialized, modified by the
* function only if return value == 0
*
* Return:
*
* 0 on success
* -EINVAL if cluster_mask is NULL or there is no record matching socket_id
*/
int cluster_to_logical_mask(unsigned int socket_id, cpumask_t *cluster_mask)
{
int cpu;
if (!cluster_mask)
return -EINVAL;
for_each_online_cpu(cpu)
if (socket_id == topology_physical_package_id(cpu)) {
cpumask_copy(cluster_mask, topology_core_cpumask(cpu));
return 0;
}
return -EINVAL;
}
/*
* init_cpu_topology is called at boot when only one cpu is running
* which prevent simultaneous write access to cpu_topology array

View File

@ -885,6 +885,13 @@ void free_sched_domains(cpumask_var_t doms[], unsigned int ndoms);
bool cpus_share_cache(int this_cpu, int that_cpu);
#ifdef CONFIG_SCHED_HMP
struct hmp_domain {
struct cpumask cpus;
struct cpumask possible_cpus;
struct list_head hmp_domains;
};
#endif /* CONFIG_SCHED_HMP */
#else /* CONFIG_SMP */
struct sched_domain_attr;
@ -931,6 +938,12 @@ struct sched_avg {
u64 last_runnable_update;
s64 decay_count;
unsigned long load_avg_contrib;
unsigned long load_avg_ratio;
#ifdef CONFIG_SCHED_HMP
u64 hmp_last_up_migration;
u64 hmp_last_down_migration;
#endif
u32 usage_avg_sum;
};
#ifdef CONFIG_SCHEDSTATS

View File

@ -198,7 +198,7 @@ extern void __inc_zone_state(struct zone *, enum zone_stat_item);
extern void dec_zone_state(struct zone *, enum zone_stat_item);
extern void __dec_zone_state(struct zone *, enum zone_stat_item);
void refresh_cpu_vm_stats(int);
bool refresh_cpu_vm_stats(int);
void refresh_zone_stat_thresholds(void);
void drain_zonestat(struct zone *zone, struct per_cpu_pageset *);

View File

@ -430,6 +430,159 @@ TRACE_EVENT(sched_pi_setprio,
__entry->oldprio, __entry->newprio)
);
/*
* Tracepoint for showing tracked load contribution.
*/
TRACE_EVENT(sched_task_load_contrib,
TP_PROTO(struct task_struct *tsk, unsigned long load_contrib),
TP_ARGS(tsk, load_contrib),
TP_STRUCT__entry(
__array(char, comm, TASK_COMM_LEN)
__field(pid_t, pid)
__field(unsigned long, load_contrib)
),
TP_fast_assign(
memcpy(__entry->comm, tsk->comm, TASK_COMM_LEN);
__entry->pid = tsk->pid;
__entry->load_contrib = load_contrib;
),
TP_printk("comm=%s pid=%d load_contrib=%lu",
__entry->comm, __entry->pid,
__entry->load_contrib)
);
/*
* Tracepoint for showing tracked task runnable ratio [0..1023].
*/
TRACE_EVENT(sched_task_runnable_ratio,
TP_PROTO(struct task_struct *tsk, unsigned long ratio),
TP_ARGS(tsk, ratio),
TP_STRUCT__entry(
__array(char, comm, TASK_COMM_LEN)
__field(pid_t, pid)
__field(unsigned long, ratio)
),
TP_fast_assign(
memcpy(__entry->comm, tsk->comm, TASK_COMM_LEN);
__entry->pid = tsk->pid;
__entry->ratio = ratio;
),
TP_printk("comm=%s pid=%d ratio=%lu",
__entry->comm, __entry->pid,
__entry->ratio)
);
/*
* Tracepoint for showing tracked rq runnable ratio [0..1023].
*/
TRACE_EVENT(sched_rq_runnable_ratio,
TP_PROTO(int cpu, unsigned long ratio),
TP_ARGS(cpu, ratio),
TP_STRUCT__entry(
__field(int, cpu)
__field(unsigned long, ratio)
),
TP_fast_assign(
__entry->cpu = cpu;
__entry->ratio = ratio;
),
TP_printk("cpu=%d ratio=%lu",
__entry->cpu,
__entry->ratio)
);
/*
* Tracepoint for showing tracked rq runnable load.
*/
TRACE_EVENT(sched_rq_runnable_load,
TP_PROTO(int cpu, u64 load),
TP_ARGS(cpu, load),
TP_STRUCT__entry(
__field(int, cpu)
__field(u64, load)
),
TP_fast_assign(
__entry->cpu = cpu;
__entry->load = load;
),
TP_printk("cpu=%d load=%llu",
__entry->cpu,
__entry->load)
);
/*
* Tracepoint for showing tracked task cpu usage ratio [0..1023].
*/
TRACE_EVENT(sched_task_usage_ratio,
TP_PROTO(struct task_struct *tsk, unsigned long ratio),
TP_ARGS(tsk, ratio),
TP_STRUCT__entry(
__array(char, comm, TASK_COMM_LEN)
__field(pid_t, pid)
__field(unsigned long, ratio)
),
TP_fast_assign(
memcpy(__entry->comm, tsk->comm, TASK_COMM_LEN);
__entry->pid = tsk->pid;
__entry->ratio = ratio;
),
TP_printk("comm=%s pid=%d ratio=%lu",
__entry->comm, __entry->pid,
__entry->ratio)
);
/*
* Tracepoint for HMP (CONFIG_SCHED_HMP) task migrations.
*/
TRACE_EVENT(sched_hmp_migrate,
TP_PROTO(struct task_struct *tsk, int dest, int force),
TP_ARGS(tsk, dest, force),
TP_STRUCT__entry(
__array(char, comm, TASK_COMM_LEN)
__field(pid_t, pid)
__field(int, dest)
__field(int, force)
),
TP_fast_assign(
memcpy(__entry->comm, tsk->comm, TASK_COMM_LEN);
__entry->pid = tsk->pid;
__entry->dest = dest;
__entry->force = force;
),
TP_printk("comm=%s pid=%d dest=%d force=%d",
__entry->comm, __entry->pid,
__entry->dest, __entry->force)
);
#endif /* _TRACE_SCHED_H */
/* This part must be outside protection */

View File

@ -23,10 +23,27 @@
static struct lock_class_key irq_desc_lock_class;
#if defined(CONFIG_SMP)
static int __init irq_affinity_setup(char *str)
{
zalloc_cpumask_var(&irq_default_affinity, GFP_NOWAIT);
cpulist_parse(str, irq_default_affinity);
/*
* Set at least the boot cpu. We don't want to end up with
* bugreports caused by random comandline masks
*/
cpumask_set_cpu(smp_processor_id(), irq_default_affinity);
return 1;
}
__setup("irqaffinity=", irq_affinity_setup);
static void __init init_irq_default_affinity(void)
{
alloc_cpumask_var(&irq_default_affinity, GFP_NOWAIT);
cpumask_setall(irq_default_affinity);
#ifdef CONFIG_CPUMASK_OFFSTACK
if (!irq_default_affinity)
zalloc_cpumask_var(&irq_default_affinity, GFP_NOWAIT);
#endif
if (cpumask_empty(irq_default_affinity))
cpumask_setall(irq_default_affinity);
}
#else
static void __init init_irq_default_affinity(void)

View File

@ -1617,6 +1617,20 @@ static void __sched_fork(struct task_struct *p)
#if defined(CONFIG_SMP) && defined(CONFIG_FAIR_GROUP_SCHED)
p->se.avg.runnable_avg_period = 0;
p->se.avg.runnable_avg_sum = 0;
#ifdef CONFIG_SCHED_HMP
/* keep LOAD_AVG_MAX in sync with fair.c if load avg series is changed */
#define LOAD_AVG_MAX 47742
if (p->mm) {
p->se.avg.hmp_last_up_migration = 0;
p->se.avg.hmp_last_down_migration = 0;
p->se.avg.load_avg_ratio = 1023;
p->se.avg.load_avg_contrib =
(1023 * scale_load_down(p->se.load.weight));
p->se.avg.runnable_avg_period = LOAD_AVG_MAX;
p->se.avg.runnable_avg_sum = LOAD_AVG_MAX;
p->se.avg.usage_avg_sum = LOAD_AVG_MAX;
}
#endif
#endif
#ifdef CONFIG_SCHEDSTATS
memset(&p->se.statistics, 0, sizeof(p->se.statistics));
@ -3813,6 +3827,8 @@ static struct task_struct *find_process_by_pid(pid_t pid)
return pid ? find_task_by_vpid(pid) : current;
}
extern struct cpumask hmp_slow_cpu_mask;
/* Actually do priority change: must hold rq lock. */
static void
__setscheduler(struct rq *rq, struct task_struct *p, int policy, int prio)
@ -3822,8 +3838,13 @@ __setscheduler(struct rq *rq, struct task_struct *p, int policy, int prio)
p->normal_prio = normal_prio(p);
/* we are holding p->pi_lock already */
p->prio = rt_mutex_getprio(p);
if (rt_prio(p->prio))
if (rt_prio(p->prio)) {
p->sched_class = &rt_sched_class;
#ifdef CONFIG_SCHED_HMP
if (cpumask_equal(&p->cpus_allowed, cpu_all_mask))
do_set_cpus_allowed(p, &hmp_slow_cpu_mask);
#endif
}
else
p->sched_class = &fair_sched_class;
set_load_weight(p);

View File

@ -94,6 +94,7 @@ static void print_cfs_group_stats(struct seq_file *m, int cpu, struct task_group
#ifdef CONFIG_SMP
P(se->avg.runnable_avg_sum);
P(se->avg.runnable_avg_period);
P(se->avg.usage_avg_sum);
P(se->avg.load_avg_contrib);
P(se->avg.decay_count);
#endif
@ -223,6 +224,8 @@ void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq)
cfs_rq->tg_runnable_contrib);
SEQ_printf(m, " .%-30s: %d\n", "tg->runnable_avg",
atomic_read(&cfs_rq->tg->runnable_avg));
SEQ_printf(m, " .%-30s: %d\n", "tg->usage_avg",
atomic_read(&cfs_rq->tg->usage_avg));
#endif
print_cfs_group_stats(m, cpu, cfs_rq->tg);

File diff suppressed because it is too large Load Diff

View File

@ -142,7 +142,7 @@ struct task_group {
atomic_t load_weight;
atomic64_t load_avg;
atomic_t runnable_avg;
atomic_t runnable_avg, usage_avg;
#endif
#ifdef CONFIG_RT_GROUP_SCHED
@ -279,7 +279,7 @@ struct cfs_rq {
#endif /* CONFIG_FAIR_GROUP_SCHED */
/* These always depend on CONFIG_FAIR_GROUP_SCHED */
#ifdef CONFIG_FAIR_GROUP_SCHED
u32 tg_runnable_contrib;
u32 tg_runnable_contrib, tg_usage_contrib;
u64 tg_load_contrib;
#endif /* CONFIG_FAIR_GROUP_SCHED */
@ -464,6 +464,9 @@ struct rq {
int active_balance;
int push_cpu;
struct cpu_stop_work active_balance_work;
#ifdef CONFIG_SCHED_HMP
struct task_struct *migrate_task;
#endif
/* cpu of this runqueue: */
int cpu;
int online;
@ -642,6 +645,12 @@ static inline unsigned int group_first_cpu(struct sched_group *group)
extern int group_balance_cpu(struct sched_group *sg);
#ifdef CONFIG_SCHED_HMP
static LIST_HEAD(hmp_domains);
DECLARE_PER_CPU(struct hmp_domain *, hmp_cpu_domain);
#define hmp_cpu_domain(cpu) (per_cpu(hmp_cpu_domain, (cpu)))
#endif /* CONFIG_SCHED_HMP */
#endif /* CONFIG_SMP */
#include "stats.h"

View File

@ -0,0 +1,11 @@
CONFIG_CGROUPS=y
CONFIG_CGROUP_SCHED=y
CONFIG_FAIR_GROUP_SCHED=y
CONFIG_NO_HZ=y
CONFIG_SCHED_MC=y
CONFIG_DISABLE_CPU_SCHED_DOMAIN_BALANCE=y
CONFIG_SCHED_HMP=y
CONFIG_HMP_FAST_CPU_MASK=""
CONFIG_HMP_SLOW_CPU_MASK=""
CONFIG_HMP_VARIABLE_SCALE=y
CONFIG_HMP_FREQUENCY_INVARIANT_SCALE=y

View File

@ -14,6 +14,7 @@
#include <linux/module.h>
#include <linux/slab.h>
#include <linux/cpu.h>
#include <linux/cpumask.h>
#include <linux/vmstat.h>
#include <linux/sched.h>
#include <linux/math64.h>
@ -432,11 +433,12 @@ EXPORT_SYMBOL(dec_zone_page_state);
* with the global counters. These could cause remote node cache line
* bouncing and will have to be only done when necessary.
*/
void refresh_cpu_vm_stats(int cpu)
bool refresh_cpu_vm_stats(int cpu)
{
struct zone *zone;
int i;
int global_diff[NR_VM_ZONE_STAT_ITEMS] = { 0, };
bool vm_activity = false;
for_each_populated_zone(zone) {
struct per_cpu_pageset *p;
@ -483,14 +485,21 @@ void refresh_cpu_vm_stats(int cpu)
if (p->expire)
continue;
if (p->pcp.count)
if (p->pcp.count) {
vm_activity = true;
drain_zone_pages(zone, &p->pcp);
}
#endif
}
for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++)
if (global_diff[i])
if (global_diff[i]) {
atomic_long_add(global_diff[i], &vm_stat[i]);
vm_activity = true;
}
return vm_activity;
}
/*
@ -1174,20 +1183,70 @@ static const struct file_operations proc_vmstat_file_operations = {
#ifdef CONFIG_SMP
static DEFINE_PER_CPU(struct delayed_work, vmstat_work);
int sysctl_stat_interval __read_mostly = HZ;
static struct cpumask vmstat_off_cpus;
struct delayed_work vmstat_monitor_work;
static void vmstat_update(struct work_struct *w)
static inline bool need_vmstat(int cpu)
{
refresh_cpu_vm_stats(smp_processor_id());
schedule_delayed_work(&__get_cpu_var(vmstat_work),
round_jiffies_relative(sysctl_stat_interval));
struct zone *zone;
int i;
for_each_populated_zone(zone) {
struct per_cpu_pageset *p;
p = per_cpu_ptr(zone->pageset, cpu);
for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++)
if (p->vm_stat_diff[i])
return true;
if (zone_to_nid(zone) != numa_node_id() && p->pcp.count)
return true;
}
return false;
}
static void __cpuinit start_cpu_timer(int cpu)
static void vmstat_update(struct work_struct *w);
static void start_cpu_timer(int cpu)
{
struct delayed_work *work = &per_cpu(vmstat_work, cpu);
cpumask_clear_cpu(cpu, &vmstat_off_cpus);
schedule_delayed_work_on(cpu, work, __round_jiffies_relative(HZ, cpu));
}
static void __cpuinit setup_cpu_timer(int cpu)
{
struct delayed_work *work = &per_cpu(vmstat_work, cpu);
INIT_DEFERRABLE_WORK(work, vmstat_update);
schedule_delayed_work_on(cpu, work, __round_jiffies_relative(HZ, cpu));
start_cpu_timer(cpu);
}
static void vmstat_update_monitor(struct work_struct *w)
{
int cpu;
for_each_cpu_and(cpu, &vmstat_off_cpus, cpu_online_mask)
if (need_vmstat(cpu))
start_cpu_timer(cpu);
queue_delayed_work(system_unbound_wq, &vmstat_monitor_work,
round_jiffies_relative(sysctl_stat_interval));
}
static void vmstat_update(struct work_struct *w)
{
int cpu = smp_processor_id();
if (likely(refresh_cpu_vm_stats(cpu)))
schedule_delayed_work(&__get_cpu_var(vmstat_work),
round_jiffies_relative(sysctl_stat_interval));
else
cpumask_set_cpu(cpu, &vmstat_off_cpus);
}
/*
@ -1204,17 +1263,19 @@ static int __cpuinit vmstat_cpuup_callback(struct notifier_block *nfb,
case CPU_ONLINE:
case CPU_ONLINE_FROZEN:
refresh_zone_stat_thresholds();
start_cpu_timer(cpu);
setup_cpu_timer(cpu);
node_set_state(cpu_to_node(cpu), N_CPU);
break;
case CPU_DOWN_PREPARE:
case CPU_DOWN_PREPARE_FROZEN:
cancel_delayed_work_sync(&per_cpu(vmstat_work, cpu));
per_cpu(vmstat_work, cpu).work.func = NULL;
if (!cpumask_test_cpu(cpu, &vmstat_off_cpus)) {
cancel_delayed_work_sync(&per_cpu(vmstat_work, cpu));
per_cpu(vmstat_work, cpu).work.func = NULL;
}
break;
case CPU_DOWN_FAILED:
case CPU_DOWN_FAILED_FROZEN:
start_cpu_timer(cpu);
setup_cpu_timer(cpu);
break;
case CPU_DEAD:
case CPU_DEAD_FROZEN:
@ -1237,8 +1298,14 @@ static int __init setup_vmstat(void)
register_cpu_notifier(&vmstat_notifier);
INIT_DEFERRABLE_WORK(&vmstat_monitor_work,
vmstat_update_monitor);
queue_delayed_work(system_unbound_wq,
&vmstat_monitor_work,
round_jiffies_relative(HZ));
for_each_online_cpu(cpu)
start_cpu_timer(cpu);
setup_cpu_timer(cpu);
#endif
#ifdef CONFIG_PROC_FS
proc_create("buddyinfo", S_IRUGO, NULL, &fragmentation_file_operations);