From d2cd195b57cf5ffbe432be01e96f35637e7bd403 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Thu, 6 Feb 2025 15:22:59 +0100 Subject: [PATCH 1/6] cpuidle: menu: Drop a redundant local variable Local variable min in get_typical_interval() is updated, but never accessed later, so drop it. No functional impact. Signed-off-by: Rafael J. Wysocki Reviewed-by: Christian Loehle Tested-by: Artem Bityutskiy Tested-by: Christian Loehle Tested-by: Aboorva Devarajan Link: https://patch.msgid.link/13699686.uLZWGnKmhe@rjwysocki.net --- drivers/cpuidle/governors/menu.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/drivers/cpuidle/governors/menu.c b/drivers/cpuidle/governors/menu.c index 28363bfa3e4c..b19406502b56 100644 --- a/drivers/cpuidle/governors/menu.c +++ b/drivers/cpuidle/governors/menu.c @@ -117,7 +117,7 @@ static void menu_update(struct cpuidle_driver *drv, struct cpuidle_device *dev); static unsigned int get_typical_interval(struct menu_device *data) { int i, divisor; - unsigned int min, max, thresh, avg; + unsigned int max, thresh, avg; uint64_t sum, variance; thresh = INT_MAX; /* Discard outliers above this value */ @@ -125,7 +125,6 @@ static unsigned int get_typical_interval(struct menu_device *data) again: /* First calculate the average of past intervals */ - min = UINT_MAX; max = 0; sum = 0; divisor = 0; @@ -136,9 +135,6 @@ static unsigned int get_typical_interval(struct menu_device *data) divisor++; if (value > max) max = value; - - if (value < min) - min = value; } } From 13982929fb08ed4691256072856f50bf7b206b9b Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Thu, 6 Feb 2025 15:24:18 +0100 Subject: [PATCH 2/6] cpuidle: menu: Use one loop for average and variance computations Use the observation that one loop is sufficient to compute the average of an array of values and their variance to eliminate one of the loops from get_typical_interval(). While at it, make get_typical_interval() consistently use u64 as the 64-bit unsigned integer data type and rearrange some white space and the declarations of local variables in it (to make them follow the reverse X-mas tree pattern). No intentional functional impact. Signed-off-by: Rafael J. Wysocki Tested-by: Artem Bityutskiy Reviewed-by: Christian Loehle Tested-by: Christian Loehle Tested-by: Aboorva Devarajan Link: https://patch.msgid.link/3339073.aeNJFYEL58@rjwysocki.net --- drivers/cpuidle/governors/menu.c | 61 +++++++++++++++----------------- 1 file changed, 28 insertions(+), 33 deletions(-) diff --git a/drivers/cpuidle/governors/menu.c b/drivers/cpuidle/governors/menu.c index b19406502b56..ec472af38de6 100644 --- a/drivers/cpuidle/governors/menu.c +++ b/drivers/cpuidle/governors/menu.c @@ -116,49 +116,45 @@ static void menu_update(struct cpuidle_driver *drv, struct cpuidle_device *dev); */ static unsigned int get_typical_interval(struct menu_device *data) { - int i, divisor; - unsigned int max, thresh, avg; - uint64_t sum, variance; - - thresh = INT_MAX; /* Discard outliers above this value */ + unsigned int max, divisor, thresh = INT_MAX; + u64 avg, variance, avg_sq; + int i; again: - - /* First calculate the average of past intervals */ + /* Compute the average and variance of past intervals. */ max = 0; - sum = 0; + avg = 0; + variance = 0; divisor = 0; for (i = 0; i < INTERVALS; i++) { unsigned int value = data->intervals[i]; - if (value <= thresh) { - sum += value; - divisor++; - if (value > max) - max = value; - } + + /* Discard data points above the threshold. */ + if (value > thresh) + continue; + + divisor++; + + avg += value; + variance += (u64)value * value; + + if (value > max) + max = value; } if (!max) return UINT_MAX; - if (divisor == INTERVALS) - avg = sum >> INTERVAL_SHIFT; - else - avg = div_u64(sum, divisor); - - /* Then try to determine variance */ - variance = 0; - for (i = 0; i < INTERVALS; i++) { - unsigned int value = data->intervals[i]; - if (value <= thresh) { - int64_t diff = (int64_t)value - avg; - variance += diff * diff; - } - } - if (divisor == INTERVALS) + if (divisor == INTERVALS) { + avg >>= INTERVAL_SHIFT; variance >>= INTERVAL_SHIFT; - else + } else { + do_div(avg, divisor); do_div(variance, divisor); + } + + avg_sq = avg * avg; + variance -= avg_sq; /* * The typical interval is obtained when standard deviation is @@ -173,10 +169,9 @@ static unsigned int get_typical_interval(struct menu_device *data) * Use this result only if there is no timer to wake us up sooner. */ if (likely(variance <= U64_MAX/36)) { - if ((((u64)avg*avg > variance*36) && (divisor * 4 >= INTERVALS * 3)) - || variance <= 400) { + if ((avg_sq > variance * 36 && divisor * 4 >= INTERVALS * 3) || + variance <= 400) return avg; - } } /* From 60256e458e1c29652b2f9e4f2ba71fc7b09bd30c Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Thu, 6 Feb 2025 15:25:18 +0100 Subject: [PATCH 3/6] cpuidle: menu: Tweak threshold use in get_typical_interval() To prepare get_typical_interval() for subsequent changes, rearrange the use of the data point threshold in it a bit and initialize that threshold to UINT_MAX which is more consistent with its data type. No intentional functional impact. Signed-off-by: Rafael J. Wysocki Tested-by: Artem Bityutskiy Reviewed-by: Christian Loehle Tested-by: Christian Loehle Tested-by: Aboorva Devarajan Link: https://patch.msgid.link/8490144.T7Z3S40VBb@rjwysocki.net --- drivers/cpuidle/governors/menu.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/cpuidle/governors/menu.c b/drivers/cpuidle/governors/menu.c index ec472af38de6..680ff20e5528 100644 --- a/drivers/cpuidle/governors/menu.c +++ b/drivers/cpuidle/governors/menu.c @@ -116,7 +116,7 @@ static void menu_update(struct cpuidle_driver *drv, struct cpuidle_device *dev); */ static unsigned int get_typical_interval(struct menu_device *data) { - unsigned int max, divisor, thresh = INT_MAX; + unsigned int max, divisor, thresh = UINT_MAX; u64 avg, variance, avg_sq; int i; @@ -129,8 +129,8 @@ static unsigned int get_typical_interval(struct menu_device *data) for (i = 0; i < INTERVALS; i++) { unsigned int value = data->intervals[i]; - /* Discard data points above the threshold. */ - if (value > thresh) + /* Discard data points above or at the threshold. */ + if (value >= thresh) continue; divisor++; @@ -186,7 +186,7 @@ static unsigned int get_typical_interval(struct menu_device *data) if ((divisor * 4) <= INTERVALS * 3) return UINT_MAX; - thresh = max - 1; + thresh = max; goto again; } From 8de7606f0fe2bf5a918fe97d425e16e190a24fe6 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Thu, 6 Feb 2025 15:26:41 +0100 Subject: [PATCH 4/6] cpuidle: menu: Eliminate outliers on both ends of the sample set Currently, get_typical_interval() attempts to eliminate outliers at the high end of the sample set only (probably in order to bias the prediction toward lower values), but this it problematic because if the outliers are present at the low end of the sample set, discarding the highest values will not help to reduce the variance. Since the presence of outliers at the low end of the sample set is generally as likely as their presence at the high end of the sample set, modify get_typical_interval() to treat samples at the largest distances from the average (on both ends of the sample set) as outliers. This should increase the likelihood of making a meaningful prediction in some cases. Signed-off-by: Rafael J. Wysocki Reported-by: Artem Bityutskiy Tested-by: Artem Bityutskiy Reviewed-by: Christian Loehle Tested-by: Christian Loehle Tested-by: Aboorva Devarajan Link: https://patch.msgid.link/2301940.iZASKD2KPV@rjwysocki.net --- drivers/cpuidle/governors/menu.c | 32 ++++++++++++++++++++++---------- 1 file changed, 22 insertions(+), 10 deletions(-) diff --git a/drivers/cpuidle/governors/menu.c b/drivers/cpuidle/governors/menu.c index 680ff20e5528..48ebbde750e5 100644 --- a/drivers/cpuidle/governors/menu.c +++ b/drivers/cpuidle/governors/menu.c @@ -116,30 +116,37 @@ static void menu_update(struct cpuidle_driver *drv, struct cpuidle_device *dev); */ static unsigned int get_typical_interval(struct menu_device *data) { - unsigned int max, divisor, thresh = UINT_MAX; + s64 value, min_thresh = -1, max_thresh = UINT_MAX; + unsigned int max, min, divisor; u64 avg, variance, avg_sq; int i; again: /* Compute the average and variance of past intervals. */ max = 0; + min = UINT_MAX; avg = 0; variance = 0; divisor = 0; for (i = 0; i < INTERVALS; i++) { - unsigned int value = data->intervals[i]; - - /* Discard data points above or at the threshold. */ - if (value >= thresh) + value = data->intervals[i]; + /* + * Discard the samples outside the interval between the min and + * max thresholds. + */ + if (value <= min_thresh || value >= max_thresh) continue; divisor++; avg += value; - variance += (u64)value * value; + variance += value * value; if (value > max) max = value; + + if (value < min) + min = value; } if (!max) @@ -175,10 +182,10 @@ static unsigned int get_typical_interval(struct menu_device *data) } /* - * If we have outliers to the upside in our distribution, discard - * those by setting the threshold to exclude these outliers, then + * If there are outliers, discard them by setting thresholds to exclude + * data points at a large enough distance from the average, then * calculate the average and standard deviation again. Once we get - * down to the bottom 3/4 of our samples, stop excluding samples. + * down to the last 3/4 of our samples, stop excluding samples. * * This can deal with workloads that have long pauses interspersed * with sporadic activity with a bunch of short pauses. @@ -186,7 +193,12 @@ static unsigned int get_typical_interval(struct menu_device *data) if ((divisor * 4) <= INTERVALS * 3) return UINT_MAX; - thresh = max; + /* Update the thresholds for the next round. */ + if (avg - min > max - avg) + min_thresh = min; + else + max_thresh = max; + goto again; } From 85975daeaa4d6ec560bfcd354fc9c08ad7f38888 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Thu, 6 Feb 2025 15:29:05 +0100 Subject: [PATCH 5/6] cpuidle: menu: Avoid discarding useful information When giving up on making a high-confidence prediction, get_typical_interval() always returns UINT_MAX which means that the next idle interval prediction will be based entirely on the time till the next timer. However, the information represented by the most recent intervals may not be completely useless in those cases. Namely, the largest recent idle interval is an upper bound on the recently observed idle duration, so it is reasonable to assume that the next idle duration is unlikely to exceed it. Moreover, this is still true after eliminating the suspected outliers if the sample set still under consideration is at least as large as 50% of the maximum sample set size. Accordingly, make get_typical_interval() return the current maximum recent interval value in that case instead of UINT_MAX. Signed-off-by: Rafael J. Wysocki Reported-by: Artem Bityutskiy Tested-by: Artem Bityutskiy Reviewed-by: Christian Loehle Tested-by: Christian Loehle Tested-by: Aboorva Devarajan Link: https://patch.msgid.link/7770672.EvYhyI6sBW@rjwysocki.net --- drivers/cpuidle/governors/menu.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/drivers/cpuidle/governors/menu.c b/drivers/cpuidle/governors/menu.c index 48ebbde750e5..30ffb1f69056 100644 --- a/drivers/cpuidle/governors/menu.c +++ b/drivers/cpuidle/governors/menu.c @@ -190,8 +190,19 @@ static unsigned int get_typical_interval(struct menu_device *data) * This can deal with workloads that have long pauses interspersed * with sporadic activity with a bunch of short pauses. */ - if ((divisor * 4) <= INTERVALS * 3) + if (divisor * 4 <= INTERVALS * 3) { + /* + * If there are sufficiently many data points still under + * consideration after the outliers have been eliminated, + * returning without a prediction would be a mistake because it + * is likely that the next interval will not exceed the current + * maximum, so return the latter in that case. + */ + if (divisor >= INTERVALS / 2) + return max; + return UINT_MAX; + } /* Update the thresholds for the next round. */ if (avg - min > max - avg) From 5c350410999653dff8d2975d794088e4c166e8b5 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Thu, 20 Feb 2025 21:13:12 +0100 Subject: [PATCH 6/6] cpuidle: menu: Update documentation after get_typical_interval() changes The documentation of the menu cpuidle governor needs to be updated to match the code behavior after some changes made recently. No functional impact. Signed-off-by: Rafael J. Wysocki Reviewed-by: Christian Loehle Link: https://patch.msgid.link/4998484.31r3eYUQgx@rjwysocki.net [ rjw: More specific subject, two typos fixed in the changelog ] Signed-off-by: Rafael J. Wysocki --- Documentation/admin-guide/pm/cpuidle.rst | 27 +++++++++++++--------- drivers/cpuidle/governors/menu.c | 29 ++++++++---------------- 2 files changed, 26 insertions(+), 30 deletions(-) diff --git a/Documentation/admin-guide/pm/cpuidle.rst b/Documentation/admin-guide/pm/cpuidle.rst index eb58d7a5affd..0c090b076224 100644 --- a/Documentation/admin-guide/pm/cpuidle.rst +++ b/Documentation/admin-guide/pm/cpuidle.rst @@ -275,20 +275,25 @@ values and, when predicting the idle duration next time, it computes the average and variance of them. If the variance is small (smaller than 400 square milliseconds) or it is small relative to the average (the average is greater that 6 times the standard deviation), the average is regarded as the "typical -interval" value. Otherwise, the longest of the saved observed idle duration +interval" value. Otherwise, either the longest or the shortest (depending on +which one is farther from the average) of the saved observed idle duration values is discarded and the computation is repeated for the remaining ones. + Again, if the variance of them is small (in the above sense), the average is taken as the "typical interval" value and so on, until either the "typical -interval" is determined or too many data points are disregarded, in which case -the "typical interval" is assumed to equal "infinity" (the maximum unsigned -integer value). +interval" is determined or too many data points are disregarded. In the latter +case, if the size of the set of data points still under consideration is +sufficiently large, the next idle duration is not likely to be above the largest +idle duration value still in that set, so that value is taken as the predicted +next idle duration. Finally, if the set of data points still under +consideration is too small, no prediction is made. -If the "typical interval" computed this way is long enough, the governor obtains -the time until the closest timer event with the assumption that the scheduler -tick will be stopped. That time, referred to as the *sleep length* in what follows, -is the upper bound on the time before the next CPU wakeup. It is used to determine -the sleep length range, which in turn is needed to get the sleep length correction -factor. +If the preliminary prediction of the next idle duration computed this way is +long enough, the governor obtains the time until the closest timer event with +the assumption that the scheduler tick will be stopped. That time, referred to +as the *sleep length* in what follows, is the upper bound on the time before the +next CPU wakeup. It is used to determine the sleep length range, which in turn +is needed to get the sleep length correction factor. The ``menu`` governor maintains an array containing several correction factor values that correspond to different sleep length ranges organized so that each @@ -302,7 +307,7 @@ to 1 the correction factor becomes (it must fall between 0 and 1 inclusive). The sleep length is multiplied by the correction factor for the range that it falls into to obtain an approximation of the predicted idle duration that is compared to the "typical interval" determined previously and the minimum of -the two is taken as the idle duration prediction. +the two is taken as the final idle duration prediction. If the "typical interval" value is small, which means that the CPU is likely to be woken up soon enough, the sleep length computation is skipped as it may diff --git a/drivers/cpuidle/governors/menu.c b/drivers/cpuidle/governors/menu.c index 30ffb1f69056..39aa0aea61c6 100644 --- a/drivers/cpuidle/governors/menu.c +++ b/drivers/cpuidle/governors/menu.c @@ -41,7 +41,7 @@ * the C state is required to actually break even on this cost. CPUIDLE * provides us this duration in the "target_residency" field. So all that we * need is a good prediction of how long we'll be idle. Like the traditional - * menu governor, we start with the actual known "next timer event" time. + * menu governor, we take the actual known "next timer event" time. * * Since there are other source of wakeups (interrupts for example) than * the next timer event, this estimation is rather optimistic. To get a @@ -50,30 +50,21 @@ * duration always was 50% of the next timer tick, the correction factor will * be 0.5. * - * menu uses a running average for this correction factor, however it uses a - * set of factors, not just a single factor. This stems from the realization - * that the ratio is dependent on the order of magnitude of the expected - * duration; if we expect 500 milliseconds of idle time the likelihood of - * getting an interrupt very early is much higher than if we expect 50 micro - * seconds of idle time. A second independent factor that has big impact on - * the actual factor is if there is (disk) IO outstanding or not. - * (as a special twist, we consider every sleep longer than 50 milliseconds - * as perfect; there are no power gains for sleeping longer than this) - * - * For these two reasons we keep an array of 12 independent factors, that gets - * indexed based on the magnitude of the expected duration as well as the - * "is IO outstanding" property. + * menu uses a running average for this correction factor, but it uses a set of + * factors, not just a single factor. This stems from the realization that the + * ratio is dependent on the order of magnitude of the expected duration; if we + * expect 500 milliseconds of idle time the likelihood of getting an interrupt + * very early is much higher than if we expect 50 micro seconds of idle time. + * For this reason, menu keeps an array of 6 independent factors, that gets + * indexed based on the magnitude of the expected duration. * * Repeatable-interval-detector * ---------------------------- * There are some cases where "next timer" is a completely unusable predictor: * Those cases where the interval is fixed, for example due to hardware - * interrupt mitigation, but also due to fixed transfer rate devices such as - * mice. + * interrupt mitigation, but also due to fixed transfer rate devices like mice. * For this, we use a different predictor: We track the duration of the last 8 - * intervals and if the stand deviation of these 8 intervals is below a - * threshold value, we use the average of these intervals as prediction. - * + * intervals and use them to estimate the duration of the next one. */ struct menu_device {