mirror of
https://github.com/torvalds/linux.git
synced 2026-05-25 23:52:08 +02:00
Final power management fixes for 3.15
- Taking non-idle time into account when calculating core busy
time was a mistake and led to a performance regression. Since
the problem it was supposed to address is now taken care of in
a different way, we don't need to do it any more, so drop the
non-idle time tracking from intel_pstate. Dirk Brandewie.
- Changing to fixed point math throughout the busy calculation
introduced rounding errors that adversely affect the accuracy
of intel_pstate's computations. Fix from Dirk Brandewie.
- The PID controller algorithm used by intel_pstate assumes that
the time interval between two adjacent samples will always be the
same which is not the case for deferable timers (used by
intel_pstate) when the system is idle. This leads to inaccurate
predictions and artificially increases convergence times for
the minimum P-state. Fix from Dirk Brandewie.
- intel_pstate carries out computations using 32-bit variables
that may overflow for large enough values of APERF/MPERF. Switch
to using 64-bit variables for computations, from Doug Smythies.
/
-----BEGIN PGP SIGNATURE-----
Version: GnuPG v2.0.22 (GNU/Linux)
iQIcBAABCAAGBQJTjjxqAAoJEILEb/54YlRxyxYP/RbWoU3ueLJnPuWWfRmdyW++
ebQGku6nVjRheDJxKK/bE5XIvZVx1rk8XPrzhmAI4iWZ8KVwRwezKL4rwaLS4TNo
Q2AuG7nHWjsTdvZH7NhYvBNIxRCPkdxI4GyHeJvuYu+SrphgwgcQ3xW8I9re+c8Q
afy3PK6bfFyPmx/IGL41AD0Tmh7edWpkGIGizI9QYsATn6IzbjNj17IBjLgpUf9s
yyj5OgU0T9J7B/sHHyDgmto0cniQdKgs8mvFLNzfHoytG/H1MCIII4+v1DZJvL4Y
L6cx71jrS+OrbBhJi9Z3n2m09LuA9/cxAGp1ojVDQ3TFZF7NQ+ruGvjDtLDgnqJK
crckpNQP1umL+maWnKbP2//IxvUo8bJi0g0GgOeIO8Ju9hf2oqCRDHR2L6cPJ5c5
DDbN+MmcRTdynXaTE0nMqwsR+ZsKyIbe9vx02roQUbvGlBNH35zbHsh7rsT4O0Cr
XpZET80G8WtggqZKTBj08A1o31rTaGXIu4uGsN4cFO4dNrmTDWsguJg5tB7fMpCH
8rMDo8h+Q2V+h+TWGkhqDxZnChik5jNWJY2lBnhyh88o1Nx5zLhnEAgSddQVnzTN
as4QDSuj2D7wU7UBDqZO9GV9MRtyYSMk/lsAx/lbIvryY6wZYZSiDeWIu82jcdeb
iO1WGBlQJHIkng6OZz7e
=YT7e
-----END PGP SIGNATURE-----
Merge tag 'pm-3.15-final' of git://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm
Pull intel pstate fixes from Rafael Wysocki:
"Final power management fixes for 3.15
- Taking non-idle time into account when calculating core busy time
was a mistake and led to a performance regression. Since the
problem it was supposed to address is now taken care of in a
different way, we don't need to do it any more, so drop the
non-idle time tracking from intel_pstate. Dirk Brandewie.
- Changing to fixed point math throughout the busy calculation
introduced rounding errors that adversely affect the accuracy of
intel_pstate's computations. Fix from Dirk Brandewie.
- The PID controller algorithm used by intel_pstate assumes that the
time interval between two adjacent samples will always be the same
which is not the case for deferable timers (used by intel_pstate)
when the system is idle. This leads to inaccurate predictions and
artificially increases convergence times for the minimum P-state.
Fix from Dirk Brandewie.
- intel_pstate carries out computations using 32-bit variables that
may overflow for large enough values of APERF/MPERF. Switch to
using 64-bit variables for computations, from Doug Smythies"
* tag 'pm-3.15-final' of git://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm:
intel_pstate: Improve initial busy calculation
intel_pstate: add sample time scaling
intel_pstate: Correct rounding in busy calculation
intel_pstate: Remove C0 tracking
This commit is contained in:
commit
c717d15614
|
|
@ -40,10 +40,10 @@
|
|||
#define BYT_TURBO_VIDS 0x66d
|
||||
|
||||
|
||||
#define FRAC_BITS 6
|
||||
#define FRAC_BITS 8
|
||||
#define int_tofp(X) ((int64_t)(X) << FRAC_BITS)
|
||||
#define fp_toint(X) ((X) >> FRAC_BITS)
|
||||
#define FP_ROUNDUP(X) ((X) += 1 << FRAC_BITS)
|
||||
|
||||
|
||||
static inline int32_t mul_fp(int32_t x, int32_t y)
|
||||
{
|
||||
|
|
@ -59,8 +59,8 @@ struct sample {
|
|||
int32_t core_pct_busy;
|
||||
u64 aperf;
|
||||
u64 mperf;
|
||||
unsigned long long tsc;
|
||||
int freq;
|
||||
ktime_t time;
|
||||
};
|
||||
|
||||
struct pstate_data {
|
||||
|
|
@ -98,9 +98,9 @@ struct cpudata {
|
|||
struct vid_data vid;
|
||||
struct _pid pid;
|
||||
|
||||
ktime_t last_sample_time;
|
||||
u64 prev_aperf;
|
||||
u64 prev_mperf;
|
||||
unsigned long long prev_tsc;
|
||||
struct sample sample;
|
||||
};
|
||||
|
||||
|
|
@ -200,7 +200,10 @@ static signed int pid_calc(struct _pid *pid, int32_t busy)
|
|||
pid->last_err = fp_error;
|
||||
|
||||
result = pterm + mul_fp(pid->integral, pid->i_gain) + dterm;
|
||||
|
||||
if (result >= 0)
|
||||
result = result + (1 << (FRAC_BITS-1));
|
||||
else
|
||||
result = result - (1 << (FRAC_BITS-1));
|
||||
return (signed int)fp_toint(result);
|
||||
}
|
||||
|
||||
|
|
@ -560,47 +563,42 @@ static void intel_pstate_get_cpu_pstates(struct cpudata *cpu)
|
|||
static inline void intel_pstate_calc_busy(struct cpudata *cpu,
|
||||
struct sample *sample)
|
||||
{
|
||||
int32_t core_pct;
|
||||
int32_t c0_pct;
|
||||
int64_t core_pct;
|
||||
int32_t rem;
|
||||
|
||||
core_pct = div_fp(int_tofp((sample->aperf)),
|
||||
int_tofp((sample->mperf)));
|
||||
core_pct = mul_fp(core_pct, int_tofp(100));
|
||||
FP_ROUNDUP(core_pct);
|
||||
core_pct = int_tofp(sample->aperf) * int_tofp(100);
|
||||
core_pct = div_u64_rem(core_pct, int_tofp(sample->mperf), &rem);
|
||||
|
||||
c0_pct = div_fp(int_tofp(sample->mperf), int_tofp(sample->tsc));
|
||||
if ((rem << 1) >= int_tofp(sample->mperf))
|
||||
core_pct += 1;
|
||||
|
||||
sample->freq = fp_toint(
|
||||
mul_fp(int_tofp(cpu->pstate.max_pstate * 1000), core_pct));
|
||||
|
||||
sample->core_pct_busy = mul_fp(core_pct, c0_pct);
|
||||
sample->core_pct_busy = (int32_t)core_pct;
|
||||
}
|
||||
|
||||
static inline void intel_pstate_sample(struct cpudata *cpu)
|
||||
{
|
||||
u64 aperf, mperf;
|
||||
unsigned long long tsc;
|
||||
|
||||
rdmsrl(MSR_IA32_APERF, aperf);
|
||||
rdmsrl(MSR_IA32_MPERF, mperf);
|
||||
tsc = native_read_tsc();
|
||||
|
||||
aperf = aperf >> FRAC_BITS;
|
||||
mperf = mperf >> FRAC_BITS;
|
||||
tsc = tsc >> FRAC_BITS;
|
||||
|
||||
cpu->last_sample_time = cpu->sample.time;
|
||||
cpu->sample.time = ktime_get();
|
||||
cpu->sample.aperf = aperf;
|
||||
cpu->sample.mperf = mperf;
|
||||
cpu->sample.tsc = tsc;
|
||||
cpu->sample.aperf -= cpu->prev_aperf;
|
||||
cpu->sample.mperf -= cpu->prev_mperf;
|
||||
cpu->sample.tsc -= cpu->prev_tsc;
|
||||
|
||||
intel_pstate_calc_busy(cpu, &cpu->sample);
|
||||
|
||||
cpu->prev_aperf = aperf;
|
||||
cpu->prev_mperf = mperf;
|
||||
cpu->prev_tsc = tsc;
|
||||
}
|
||||
|
||||
static inline void intel_pstate_set_sample_time(struct cpudata *cpu)
|
||||
|
|
@ -614,13 +612,25 @@ static inline void intel_pstate_set_sample_time(struct cpudata *cpu)
|
|||
|
||||
static inline int32_t intel_pstate_get_scaled_busy(struct cpudata *cpu)
|
||||
{
|
||||
int32_t core_busy, max_pstate, current_pstate;
|
||||
int32_t core_busy, max_pstate, current_pstate, sample_ratio;
|
||||
u32 duration_us;
|
||||
u32 sample_time;
|
||||
|
||||
core_busy = cpu->sample.core_pct_busy;
|
||||
max_pstate = int_tofp(cpu->pstate.max_pstate);
|
||||
current_pstate = int_tofp(cpu->pstate.current_pstate);
|
||||
core_busy = mul_fp(core_busy, div_fp(max_pstate, current_pstate));
|
||||
return FP_ROUNDUP(core_busy);
|
||||
|
||||
sample_time = (pid_params.sample_rate_ms * USEC_PER_MSEC);
|
||||
duration_us = (u32) ktime_us_delta(cpu->sample.time,
|
||||
cpu->last_sample_time);
|
||||
if (duration_us > sample_time * 3) {
|
||||
sample_ratio = div_fp(int_tofp(sample_time),
|
||||
int_tofp(duration_us));
|
||||
core_busy = mul_fp(core_busy, sample_ratio);
|
||||
}
|
||||
|
||||
return core_busy;
|
||||
}
|
||||
|
||||
static inline void intel_pstate_adjust_busy_pstate(struct cpudata *cpu)
|
||||
|
|
|
|||
Loading…
Reference in New Issue
Block a user