aboutsummaryrefslogtreecommitdiff
path: root/drivers/cpuidle
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/cpuidle')
-rw-r--r--drivers/cpuidle/governors/gov.h14
-rw-r--r--drivers/cpuidle/governors/menu.c65
-rw-r--r--drivers/cpuidle/governors/teo.c235
3 files changed, 203 insertions, 111 deletions
diff --git a/drivers/cpuidle/governors/gov.h b/drivers/cpuidle/governors/gov.h
new file mode 100644
index 000000000000..99e067d9668c
--- /dev/null
+++ b/drivers/cpuidle/governors/gov.h
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+/* Common definitions for cpuidle governors. */
+
+#ifndef __CPUIDLE_GOVERNOR_H
+#define __CPUIDLE_GOVERNOR_H
+
+/*
+ * Idle state target residency threshold used for deciding whether or not to
+ * check the time till the closest expected timer event.
+ */
+#define RESIDENCY_THRESHOLD_NS (15 * NSEC_PER_USEC)
+
+#endif /* __CPUIDLE_GOVERNOR_H */
diff --git a/drivers/cpuidle/governors/menu.c b/drivers/cpuidle/governors/menu.c
index c4922684f305..b96e3da0fedd 100644
--- a/drivers/cpuidle/governors/menu.c
+++ b/drivers/cpuidle/governors/menu.c
@@ -19,6 +19,8 @@
#include <linux/sched/stat.h>
#include <linux/math64.h>
+#include "gov.h"
+
#define BUCKETS 12
#define INTERVAL_SHIFT 3
#define INTERVALS (1UL << INTERVAL_SHIFT)
@@ -166,8 +168,7 @@ static void menu_update(struct cpuidle_driver *drv, struct cpuidle_device *dev);
* of points is below a threshold. If it is... then use the
* average of these 8 points as the estimated value.
*/
-static unsigned int get_typical_interval(struct menu_device *data,
- unsigned int predicted_us)
+static unsigned int get_typical_interval(struct menu_device *data)
{
int i, divisor;
unsigned int min, max, thresh, avg;
@@ -195,11 +196,7 @@ again:
}
}
- /*
- * If the result of the computation is going to be discarded anyway,
- * avoid the computation altogether.
- */
- if (min >= predicted_us)
+ if (!max)
return UINT_MAX;
if (divisor == INTERVALS)
@@ -267,7 +264,6 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
{
struct menu_device *data = this_cpu_ptr(&menu_devices);
s64 latency_req = cpuidle_governor_latency_req(dev->cpu);
- unsigned int predicted_us;
u64 predicted_ns;
u64 interactivity_req;
unsigned int nr_iowaiters;
@@ -279,16 +275,41 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
data->needs_update = 0;
}
- /* determine the expected residency time, round up */
- delta = tick_nohz_get_sleep_length(&delta_tick);
- if (unlikely(delta < 0)) {
- delta = 0;
- delta_tick = 0;
- }
- data->next_timer_ns = delta;
-
nr_iowaiters = nr_iowait_cpu(dev->cpu);
- data->bucket = which_bucket(data->next_timer_ns, nr_iowaiters);
+
+ /* Find the shortest expected idle interval. */
+ predicted_ns = get_typical_interval(data) * NSEC_PER_USEC;
+ if (predicted_ns > RESIDENCY_THRESHOLD_NS) {
+ unsigned int timer_us;
+
+ /* Determine the time till the closest timer. */
+ delta = tick_nohz_get_sleep_length(&delta_tick);
+ if (unlikely(delta < 0)) {
+ delta = 0;
+ delta_tick = 0;
+ }
+
+ data->next_timer_ns = delta;
+ data->bucket = which_bucket(data->next_timer_ns, nr_iowaiters);
+
+ /* Round up the result for half microseconds. */
+ timer_us = div_u64((RESOLUTION * DECAY * NSEC_PER_USEC) / 2 +
+ data->next_timer_ns *
+ data->correction_factor[data->bucket],
+ RESOLUTION * DECAY * NSEC_PER_USEC);
+ /* Use the lowest expected idle interval to pick the idle state. */
+ predicted_ns = min((u64)timer_us * NSEC_PER_USEC, predicted_ns);
+ } else {
+ /*
+ * Because the next timer event is not going to be determined
+ * in this case, assume that without the tick the closest timer
+ * will be in distant future and that the closest tick will occur
+ * after 1/2 of the tick period.
+ */
+ data->next_timer_ns = KTIME_MAX;
+ delta_tick = TICK_NSEC / 2;
+ data->bucket = which_bucket(KTIME_MAX, nr_iowaiters);
+ }
if (unlikely(drv->state_count <= 1 || latency_req == 0) ||
((data->next_timer_ns < drv->states[1].target_residency_ns ||
@@ -303,16 +324,6 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
return 0;
}
- /* Round up the result for half microseconds. */
- predicted_us = div_u64(data->next_timer_ns *
- data->correction_factor[data->bucket] +
- (RESOLUTION * DECAY * NSEC_PER_USEC) / 2,
- RESOLUTION * DECAY * NSEC_PER_USEC);
- /* Use the lowest expected idle interval to pick the idle state. */
- predicted_ns = (u64)min(predicted_us,
- get_typical_interval(data, predicted_us)) *
- NSEC_PER_USEC;
-
if (tick_nohz_tick_stopped()) {
/*
* If the tick is already stopped, the cost of possible short
diff --git a/drivers/cpuidle/governors/teo.c b/drivers/cpuidle/governors/teo.c
index 987fc5f3997d..7244f71c59c5 100644
--- a/drivers/cpuidle/governors/teo.c
+++ b/drivers/cpuidle/governors/teo.c
@@ -140,6 +140,8 @@
#include <linux/sched/topology.h>
#include <linux/tick.h>
+#include "gov.h"
+
/*
* The number of bits to shift the CPU's capacity by in order to determine
* the utilized threshold.
@@ -152,7 +154,6 @@
*/
#define UTIL_THRESHOLD_SHIFT 6
-
/*
* The PULSE value is added to metrics when they grow and the DECAY_SHIFT value
* is used for decreasing metrics on a regular basis.
@@ -186,8 +187,8 @@ struct teo_bin {
* @total: Grand total of the "intercepts" and "hits" metrics for all bins.
* @next_recent_idx: Index of the next @recent_idx entry to update.
* @recent_idx: Indices of bins corresponding to recent "intercepts".
+ * @tick_hits: Number of "hits" after TICK_NSEC.
* @util_threshold: Threshold above which the CPU is considered utilized
- * @utilized: Whether the last sleep on the CPU happened while utilized
*/
struct teo_cpu {
s64 time_span_ns;
@@ -196,8 +197,8 @@ struct teo_cpu {
unsigned int total;
int next_recent_idx;
int recent_idx[NR_RECENT];
+ unsigned int tick_hits;
unsigned long util_threshold;
- bool utilized;
};
static DEFINE_PER_CPU(struct teo_cpu, teo_cpus);
@@ -228,6 +229,7 @@ static void teo_update(struct cpuidle_driver *drv, struct cpuidle_device *dev)
{
struct teo_cpu *cpu_data = per_cpu_ptr(&teo_cpus, dev->cpu);
int i, idx_timer = 0, idx_duration = 0;
+ s64 target_residency_ns;
u64 measured_ns;
if (cpu_data->time_span_ns >= cpu_data->sleep_length_ns) {
@@ -268,7 +270,6 @@ static void teo_update(struct cpuidle_driver *drv, struct cpuidle_device *dev)
* fall into.
*/
for (i = 0; i < drv->state_count; i++) {
- s64 target_residency_ns = drv->states[i].target_residency_ns;
struct teo_bin *bin = &cpu_data->state_bins[i];
bin->hits -= bin->hits >> DECAY_SHIFT;
@@ -276,6 +277,8 @@ static void teo_update(struct cpuidle_driver *drv, struct cpuidle_device *dev)
cpu_data->total += bin->hits + bin->intercepts;
+ target_residency_ns = drv->states[i].target_residency_ns;
+
if (target_residency_ns <= cpu_data->sleep_length_ns) {
idx_timer = i;
if (target_residency_ns <= measured_ns)
@@ -291,6 +294,26 @@ static void teo_update(struct cpuidle_driver *drv, struct cpuidle_device *dev)
cpu_data->state_bins[cpu_data->recent_idx[i]].recent--;
/*
+ * If the deepest state's target residency is below the tick length,
+ * make a record of it to help teo_select() decide whether or not
+ * to stop the tick. This effectively adds an extra hits-only bin
+ * beyond the last state-related one.
+ */
+ if (target_residency_ns < TICK_NSEC) {
+ cpu_data->tick_hits -= cpu_data->tick_hits >> DECAY_SHIFT;
+
+ cpu_data->total += cpu_data->tick_hits;
+
+ if (TICK_NSEC <= cpu_data->sleep_length_ns) {
+ idx_timer = drv->state_count;
+ if (TICK_NSEC <= measured_ns) {
+ cpu_data->tick_hits += PULSE;
+ goto end;
+ }
+ }
+ }
+
+ /*
* If the measured idle duration falls into the same bin as the sleep
* length, this is a "hit", so update the "hits" metric for that bin.
* Otherwise, update the "intercepts" metric for the bin fallen into by
@@ -305,18 +328,14 @@ static void teo_update(struct cpuidle_driver *drv, struct cpuidle_device *dev)
cpu_data->recent_idx[i] = idx_duration;
}
+end:
cpu_data->total += PULSE;
}
-static bool teo_time_ok(u64 interval_ns)
+static bool teo_state_ok(int i, struct cpuidle_driver *drv)
{
- return !tick_nohz_tick_stopped() || interval_ns >= TICK_NSEC;
-}
-
-static s64 teo_middle_of_bin(int idx, struct cpuidle_driver *drv)
-{
- return (drv->states[idx].target_residency_ns +
- drv->states[idx+1].target_residency_ns) / 2;
+ return !tick_nohz_tick_stopped() ||
+ drv->states[i].target_residency_ns >= TICK_NSEC;
}
/**
@@ -356,6 +375,8 @@ static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
{
struct teo_cpu *cpu_data = per_cpu_ptr(&teo_cpus, dev->cpu);
s64 latency_req = cpuidle_governor_latency_req(dev->cpu);
+ ktime_t delta_tick = TICK_NSEC / 2;
+ unsigned int tick_intercept_sum = 0;
unsigned int idx_intercept_sum = 0;
unsigned int intercept_sum = 0;
unsigned int idx_recent_sum = 0;
@@ -365,7 +386,7 @@ static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
int constraint_idx = 0;
int idx0 = 0, idx = -1;
bool alt_intercepts, alt_recent;
- ktime_t delta_tick;
+ bool cpu_utilized;
s64 duration_ns;
int i;
@@ -375,44 +396,48 @@ static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
}
cpu_data->time_span_ns = local_clock();
-
- duration_ns = tick_nohz_get_sleep_length(&delta_tick);
- cpu_data->sleep_length_ns = duration_ns;
+ /*
+ * Set the expected sleep length to infinity in case of an early
+ * return.
+ */
+ cpu_data->sleep_length_ns = KTIME_MAX;
/* Check if there is any choice in the first place. */
if (drv->state_count < 2) {
idx = 0;
- goto end;
+ goto out_tick;
}
- if (!dev->states_usage[0].disable) {
+
+ if (!dev->states_usage[0].disable)
idx = 0;
- if (drv->states[1].target_residency_ns > duration_ns)
- goto end;
- }
- cpu_data->utilized = teo_cpu_is_utilized(dev->cpu, cpu_data);
+ cpu_utilized = teo_cpu_is_utilized(dev->cpu, cpu_data);
/*
* If the CPU is being utilized over the threshold and there are only 2
* states to choose from, the metrics need not be considered, so choose
* the shallowest non-polling state and exit.
*/
- if (drv->state_count < 3 && cpu_data->utilized) {
- for (i = 0; i < drv->state_count; ++i) {
- if (!dev->states_usage[i].disable &&
- !(drv->states[i].flags & CPUIDLE_FLAG_POLLING)) {
- idx = i;
- goto end;
- }
+ if (drv->state_count < 3 && cpu_utilized) {
+ /*
+ * If state 0 is enabled and it is not a polling one, select it
+ * right away unless the scheduler tick has been stopped, in
+ * which case care needs to be taken to leave the CPU in a deep
+ * enough state in case it is not woken up any time soon after
+ * all. If state 1 is disabled, though, state 0 must be used
+ * anyway.
+ */
+ if ((!idx && !(drv->states[0].flags & CPUIDLE_FLAG_POLLING) &&
+ teo_state_ok(0, drv)) || dev->states_usage[1].disable) {
+ idx = 0;
+ goto out_tick;
}
+ /* Assume that state 1 is not a polling one and use it. */
+ idx = 1;
+ duration_ns = drv->states[1].target_residency_ns;
+ goto end;
}
- /*
- * Find the deepest idle state whose target residency does not exceed
- * the current sleep length and the deepest idle state not deeper than
- * the former whose exit latency does not exceed the current latency
- * constraint. Compute the sums of metrics for early wakeup pattern
- * detection.
- */
+ /* Compute the sums of metrics for early wakeup pattern detection. */
for (i = 1; i < drv->state_count; i++) {
struct teo_bin *prev_bin = &cpu_data->state_bins[i-1];
struct cpuidle_state *s = &drv->states[i];
@@ -428,19 +453,15 @@ static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
if (dev->states_usage[i].disable)
continue;
- if (idx < 0) {
- idx = i; /* first enabled state */
- idx0 = i;
- }
-
- if (s->target_residency_ns > duration_ns)
- break;
+ if (idx < 0)
+ idx0 = i; /* first enabled state */
idx = i;
if (s->exit_latency_ns <= latency_req)
constraint_idx = i;
+ /* Save the sums for the current state. */
idx_intercept_sum = intercept_sum;
idx_hit_sum = hit_sum;
idx_recent_sum = recent_sum;
@@ -449,11 +470,21 @@ static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
/* Avoid unnecessary overhead. */
if (idx < 0) {
idx = 0; /* No states enabled, must use 0. */
- goto end;
- } else if (idx == idx0) {
+ goto out_tick;
+ }
+
+ if (idx == idx0) {
+ /*
+ * Only one idle state is enabled, so use it, but do not
+ * allow the tick to be stopped it is shallow enough.
+ */
+ duration_ns = drv->states[idx].target_residency_ns;
goto end;
}
+ tick_intercept_sum = intercept_sum +
+ cpu_data->state_bins[drv->state_count-1].intercepts;
+
/*
* If the sum of the intercepts metric for all of the idle states
* shallower than the current candidate one (idx) is greater than the
@@ -461,13 +492,11 @@ static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
* all of the deeper states, or the sum of the numbers of recent
* intercepts over all of the states shallower than the candidate one
* is greater than a half of the number of recent events taken into
- * account, the CPU is likely to wake up early, so find an alternative
- * idle state to select.
+ * account, a shallower idle state is likely to be a better choice.
*/
alt_intercepts = 2 * idx_intercept_sum > cpu_data->total - idx_hit_sum;
alt_recent = idx_recent_sum > NR_RECENT / 2;
if (alt_recent || alt_intercepts) {
- s64 first_suitable_span_ns = duration_ns;
int first_suitable_idx = idx;
/*
@@ -476,44 +505,39 @@ static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
* cases (both with respect to intercepts overall and with
* respect to the recent intercepts only) in the past.
*
- * Take the possible latency constraint and duration limitation
- * present if the tick has been stopped already into account.
+ * Take the possible duration limitation present if the tick
+ * has been stopped already into account.
*/
intercept_sum = 0;
recent_sum = 0;
for (i = idx - 1; i >= 0; i--) {
struct teo_bin *bin = &cpu_data->state_bins[i];
- s64 span_ns;
intercept_sum += bin->intercepts;
recent_sum += bin->recent;
- span_ns = teo_middle_of_bin(i, drv);
-
if ((!alt_recent || 2 * recent_sum > idx_recent_sum) &&
(!alt_intercepts ||
2 * intercept_sum > idx_intercept_sum)) {
- if (teo_time_ok(span_ns) &&
- !dev->states_usage[i].disable) {
+ /*
+ * Use the current state unless it is too
+ * shallow or disabled, in which case take the
+ * first enabled state that is deep enough.
+ */
+ if (teo_state_ok(i, drv) &&
+ !dev->states_usage[i].disable)
idx = i;
- duration_ns = span_ns;
- } else {
- /*
- * The current state is too shallow or
- * disabled, so take the first enabled
- * deeper state with suitable time span.
- */
+ else
idx = first_suitable_idx;
- duration_ns = first_suitable_span_ns;
- }
+
break;
}
if (dev->states_usage[i].disable)
continue;
- if (!teo_time_ok(span_ns)) {
+ if (!teo_state_ok(i, drv)) {
/*
* The current state is too shallow, but if an
* alternative candidate state has been found,
@@ -525,7 +549,6 @@ static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
break;
}
- first_suitable_span_ns = span_ns;
first_suitable_idx = i;
}
}
@@ -539,31 +562,75 @@ static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
/*
* If the CPU is being utilized over the threshold, choose a shallower
- * non-polling state to improve latency
+ * non-polling state to improve latency, unless the scheduler tick has
+ * been stopped already and the shallower state's target residency is
+ * not sufficiently large.
*/
- if (cpu_data->utilized)
- idx = teo_find_shallower_state(drv, dev, idx, duration_ns, true);
+ if (cpu_utilized) {
+ i = teo_find_shallower_state(drv, dev, idx, KTIME_MAX, true);
+ if (teo_state_ok(i, drv))
+ idx = i;
+ }
-end:
/*
- * Don't stop the tick if the selected state is a polling one or if the
- * expected idle duration is shorter than the tick period length.
+ * Skip the timers check if state 0 is the current candidate one,
+ * because an immediate non-timer wakeup is expected in that case.
*/
- if (((drv->states[idx].flags & CPUIDLE_FLAG_POLLING) ||
- duration_ns < TICK_NSEC) && !tick_nohz_tick_stopped()) {
- *stop_tick = false;
+ if (!idx)
+ goto out_tick;
- /*
- * The tick is not going to be stopped, so if the target
- * residency of the state to be returned is not within the time
- * till the closest timer including the tick, try to correct
- * that.
- */
- if (idx > idx0 &&
- drv->states[idx].target_residency_ns > delta_tick)
- idx = teo_find_shallower_state(drv, dev, idx, delta_tick, false);
+ /*
+ * If state 0 is a polling one, check if the target residency of
+ * the current candidate state is low enough and skip the timers
+ * check in that case too.
+ */
+ if ((drv->states[0].flags & CPUIDLE_FLAG_POLLING) &&
+ drv->states[idx].target_residency_ns < RESIDENCY_THRESHOLD_NS)
+ goto out_tick;
+
+ duration_ns = tick_nohz_get_sleep_length(&delta_tick);
+ cpu_data->sleep_length_ns = duration_ns;
+
+ /*
+ * If the closest expected timer is before the terget residency of the
+ * candidate state, a shallower one needs to be found.
+ */
+ if (drv->states[idx].target_residency_ns > duration_ns) {
+ i = teo_find_shallower_state(drv, dev, idx, duration_ns, false);
+ if (teo_state_ok(i, drv))
+ idx = i;
}
+ /*
+ * If the selected state's target residency is below the tick length
+ * and intercepts occurring before the tick length are the majority of
+ * total wakeup events, do not stop the tick.
+ */
+ if (drv->states[idx].target_residency_ns < TICK_NSEC &&
+ tick_intercept_sum > cpu_data->total / 2 + cpu_data->total / 8)
+ duration_ns = TICK_NSEC / 2;
+
+end:
+ /*
+ * Allow the tick to be stopped unless the selected state is a polling
+ * one or the expected idle duration is shorter than the tick period
+ * length.
+ */
+ if ((!(drv->states[idx].flags & CPUIDLE_FLAG_POLLING) &&
+ duration_ns >= TICK_NSEC) || tick_nohz_tick_stopped())
+ return idx;
+
+ /*
+ * The tick is not going to be stopped, so if the target residency of
+ * the state to be returned is not within the time till the closest
+ * timer including the tick, try to correct that.
+ */
+ if (idx > idx0 &&
+ drv->states[idx].target_residency_ns > delta_tick)
+ idx = teo_find_shallower_state(drv, dev, idx, delta_tick, false);
+
+out_tick:
+ *stop_tick = false;
return idx;
}