mirror of
https://github.com/torvalds/linux.git
synced 2026-05-13 00:28:54 +02:00
DAMON_STAT updates 'enabled' parameter value, which represents the running
status of its kdamond, when the user explicitly requests start/stop of the
kdamond. The kdamond can, however, be stopped even if the user explicitly
requested the stop, if ctx->regions_score_histogram allocation failure at
beginning of the execution of the kdamond. Hence, if the kdamond is
stopped by the allocation failure, the value of the parameter can be
stale.
Users could show the stale value and be confused. The problem will only
rarely happen in real and common setups because the allocation is arguably
too small to fail. Also, unlike the similar bugs that are now fixed in
DAMON_RECLAIM and DAMON_LRU_SORT, kdamond can be restarted in this case,
because DAMON_STAT force-updates the enabled parameter value for user
inputs. The bug is a bug, though.
The issue stems from the fact that there are multiple events that can
change the status, and following all the events is challenging.
Dynamically detect and use the fresh status for the parameters when those
are requested.
The issue was dicovered [1] by Sashiko.
Link: https://lore.kernel.org/20260419161003.79176-4-sj@kernel.org
Link: https://lore.kernel.org/20260416040602.88665-1-sj@kernel.org [1]
Fixes: 369c415e60 ("mm/damon: introduce DAMON_STAT module")
Signed-off-by: SeongJae Park <sj@kernel.org>
Cc: Liew Rui Yan <aethernet65535@gmail.com>
Cc: <stable@vger.kernel.org> # 6.17.x
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
336 lines
8.4 KiB
C
336 lines
8.4 KiB
C
// SPDX-License-Identifier: GPL-2.0
|
|
/*
|
|
* Shows data access monitoring results in simple metrics.
|
|
*/
|
|
|
|
#define pr_fmt(fmt) "damon-stat: " fmt
|
|
|
|
#include <linux/damon.h>
|
|
#include <linux/init.h>
|
|
#include <linux/kernel.h>
|
|
#include <linux/module.h>
|
|
#include <linux/sort.h>
|
|
|
|
#ifdef MODULE_PARAM_PREFIX
|
|
#undef MODULE_PARAM_PREFIX
|
|
#endif
|
|
#define MODULE_PARAM_PREFIX "damon_stat."
|
|
|
|
static int damon_stat_enabled_store(
|
|
const char *val, const struct kernel_param *kp);
|
|
|
|
static int damon_stat_enabled_load(char *buffer,
|
|
const struct kernel_param *kp);
|
|
|
|
static const struct kernel_param_ops enabled_param_ops = {
|
|
.set = damon_stat_enabled_store,
|
|
.get = damon_stat_enabled_load,
|
|
};
|
|
|
|
static bool enabled __read_mostly = IS_ENABLED(
|
|
CONFIG_DAMON_STAT_ENABLED_DEFAULT);
|
|
module_param_cb(enabled, &enabled_param_ops, NULL, 0600);
|
|
MODULE_PARM_DESC(enabled, "Enable of disable DAMON_STAT");
|
|
|
|
static unsigned long estimated_memory_bandwidth __read_mostly;
|
|
module_param(estimated_memory_bandwidth, ulong, 0400);
|
|
MODULE_PARM_DESC(estimated_memory_bandwidth,
|
|
"Estimated memory bandwidth usage in bytes per second");
|
|
|
|
static long memory_idle_ms_percentiles[101] = {0,};
|
|
module_param_array(memory_idle_ms_percentiles, long, NULL, 0400);
|
|
MODULE_PARM_DESC(memory_idle_ms_percentiles,
|
|
"Memory idle time percentiles in milliseconds");
|
|
|
|
static unsigned long aggr_interval_us;
|
|
module_param(aggr_interval_us, ulong, 0400);
|
|
MODULE_PARM_DESC(aggr_interval_us,
|
|
"Current tuned aggregation interval in microseconds");
|
|
|
|
static struct damon_ctx *damon_stat_context;
|
|
|
|
static unsigned long damon_stat_last_refresh_jiffies;
|
|
|
|
static void damon_stat_set_estimated_memory_bandwidth(struct damon_ctx *c)
|
|
{
|
|
struct damon_target *t;
|
|
struct damon_region *r;
|
|
unsigned long access_bytes = 0;
|
|
|
|
damon_for_each_target(t, c) {
|
|
damon_for_each_region(r, t)
|
|
access_bytes += (r->ar.end - r->ar.start) *
|
|
r->nr_accesses;
|
|
}
|
|
estimated_memory_bandwidth = access_bytes * USEC_PER_MSEC *
|
|
MSEC_PER_SEC / c->attrs.aggr_interval;
|
|
}
|
|
|
|
static int damon_stat_idletime(const struct damon_region *r)
|
|
{
|
|
if (r->nr_accesses)
|
|
return -1 * (r->age + 1);
|
|
return r->age + 1;
|
|
}
|
|
|
|
static int damon_stat_cmp_regions(const void *a, const void *b)
|
|
{
|
|
const struct damon_region *ra = *(const struct damon_region **)a;
|
|
const struct damon_region *rb = *(const struct damon_region **)b;
|
|
|
|
return damon_stat_idletime(ra) - damon_stat_idletime(rb);
|
|
}
|
|
|
|
static int damon_stat_sort_regions(struct damon_ctx *c,
|
|
struct damon_region ***sorted_ptr, int *nr_regions_ptr,
|
|
unsigned long *total_sz_ptr)
|
|
{
|
|
struct damon_target *t;
|
|
struct damon_region *r;
|
|
struct damon_region **region_pointers;
|
|
unsigned int nr_regions = 0;
|
|
unsigned long total_sz = 0;
|
|
|
|
damon_for_each_target(t, c) {
|
|
/* there is only one target */
|
|
region_pointers = kmalloc_objs(*region_pointers,
|
|
damon_nr_regions(t));
|
|
if (!region_pointers)
|
|
return -ENOMEM;
|
|
damon_for_each_region(r, t) {
|
|
region_pointers[nr_regions++] = r;
|
|
total_sz += r->ar.end - r->ar.start;
|
|
}
|
|
}
|
|
sort(region_pointers, nr_regions, sizeof(*region_pointers),
|
|
damon_stat_cmp_regions, NULL);
|
|
*sorted_ptr = region_pointers;
|
|
*nr_regions_ptr = nr_regions;
|
|
*total_sz_ptr = total_sz;
|
|
return 0;
|
|
}
|
|
|
|
static void damon_stat_set_idletime_percentiles(struct damon_ctx *c)
|
|
{
|
|
struct damon_region **sorted_regions, *region;
|
|
int nr_regions;
|
|
unsigned long total_sz, accounted_bytes = 0;
|
|
int err, i, next_percentile = 0;
|
|
|
|
err = damon_stat_sort_regions(c, &sorted_regions, &nr_regions,
|
|
&total_sz);
|
|
if (err)
|
|
return;
|
|
for (i = 0; i < nr_regions; i++) {
|
|
region = sorted_regions[i];
|
|
accounted_bytes += region->ar.end - region->ar.start;
|
|
while (next_percentile <= accounted_bytes * 100 / total_sz)
|
|
memory_idle_ms_percentiles[next_percentile++] =
|
|
damon_stat_idletime(region) *
|
|
(long)c->attrs.aggr_interval / USEC_PER_MSEC;
|
|
}
|
|
kfree(sorted_regions);
|
|
}
|
|
|
|
static int damon_stat_damon_call_fn(void *data)
|
|
{
|
|
struct damon_ctx *c = data;
|
|
|
|
/* avoid unnecessarily frequent stat update */
|
|
if (time_before_eq(jiffies, damon_stat_last_refresh_jiffies +
|
|
msecs_to_jiffies(5 * MSEC_PER_SEC)))
|
|
return 0;
|
|
damon_stat_last_refresh_jiffies = jiffies;
|
|
|
|
aggr_interval_us = c->attrs.aggr_interval;
|
|
damon_stat_set_estimated_memory_bandwidth(c);
|
|
damon_stat_set_idletime_percentiles(c);
|
|
return 0;
|
|
}
|
|
|
|
struct damon_stat_system_ram_range_walk_arg {
|
|
bool walked;
|
|
struct resource res;
|
|
};
|
|
|
|
static int damon_stat_system_ram_walk_fn(struct resource *res, void *arg)
|
|
{
|
|
struct damon_stat_system_ram_range_walk_arg *a = arg;
|
|
|
|
if (!a->walked) {
|
|
a->walked = true;
|
|
a->res.start = res->start;
|
|
}
|
|
a->res.end = res->end;
|
|
return 0;
|
|
}
|
|
|
|
static unsigned long damon_stat_res_to_core_addr(resource_size_t ra,
|
|
unsigned long addr_unit)
|
|
{
|
|
/*
|
|
* Use div_u64() for avoiding linking errors related with __udivdi3,
|
|
* __aeabi_uldivmod, or similar problems. This should also improve the
|
|
* performance optimization (read div_u64() comment for the detail).
|
|
*/
|
|
if (sizeof(ra) == 8 && sizeof(addr_unit) == 4)
|
|
return div_u64(ra, addr_unit);
|
|
return ra / addr_unit;
|
|
}
|
|
|
|
static int damon_stat_set_monitoring_region(struct damon_target *t,
|
|
unsigned long addr_unit, unsigned long min_region_sz)
|
|
{
|
|
struct damon_addr_range addr_range;
|
|
struct damon_stat_system_ram_range_walk_arg arg = {};
|
|
|
|
walk_system_ram_res(0, -1, &arg, damon_stat_system_ram_walk_fn);
|
|
if (!arg.walked)
|
|
return -EINVAL;
|
|
addr_range.start = damon_stat_res_to_core_addr(
|
|
arg.res.start, addr_unit);
|
|
addr_range.end = damon_stat_res_to_core_addr(
|
|
arg.res.end + 1, addr_unit);
|
|
if (addr_range.end <= addr_range.start)
|
|
return -EINVAL;
|
|
return damon_set_regions(t, &addr_range, 1, min_region_sz);
|
|
}
|
|
|
|
static struct damon_ctx *damon_stat_build_ctx(void)
|
|
{
|
|
struct damon_ctx *ctx;
|
|
struct damon_attrs attrs;
|
|
struct damon_target *target;
|
|
|
|
ctx = damon_new_ctx();
|
|
if (!ctx)
|
|
return NULL;
|
|
attrs = (struct damon_attrs) {
|
|
.sample_interval = 5 * USEC_PER_MSEC,
|
|
.aggr_interval = 100 * USEC_PER_MSEC,
|
|
.ops_update_interval = 60 * USEC_PER_MSEC * MSEC_PER_SEC,
|
|
.min_nr_regions = 10,
|
|
.max_nr_regions = 1000,
|
|
};
|
|
/*
|
|
* auto-tune sampling and aggregation interval aiming 4% DAMON-observed
|
|
* accesses ratio, keeping sampling interval in [5ms, 10s] range.
|
|
*/
|
|
attrs.intervals_goal = (struct damon_intervals_goal) {
|
|
.access_bp = 400, .aggrs = 3,
|
|
.min_sample_us = 5000, .max_sample_us = 10000000,
|
|
};
|
|
if (damon_set_attrs(ctx, &attrs))
|
|
goto free_out;
|
|
|
|
if (damon_select_ops(ctx, DAMON_OPS_PADDR))
|
|
goto free_out;
|
|
|
|
target = damon_new_target();
|
|
if (!target)
|
|
goto free_out;
|
|
damon_add_target(ctx, target);
|
|
if (damon_stat_set_monitoring_region(target, ctx->addr_unit,
|
|
ctx->min_region_sz))
|
|
goto free_out;
|
|
return ctx;
|
|
free_out:
|
|
damon_destroy_ctx(ctx);
|
|
return NULL;
|
|
}
|
|
|
|
static struct damon_call_control call_control = {
|
|
.fn = damon_stat_damon_call_fn,
|
|
.repeat = true,
|
|
};
|
|
|
|
static int damon_stat_start(void)
|
|
{
|
|
int err;
|
|
|
|
if (damon_stat_context) {
|
|
if (damon_is_running(damon_stat_context))
|
|
return -EAGAIN;
|
|
damon_destroy_ctx(damon_stat_context);
|
|
}
|
|
|
|
damon_stat_context = damon_stat_build_ctx();
|
|
if (!damon_stat_context)
|
|
return -ENOMEM;
|
|
err = damon_start(&damon_stat_context, 1, true);
|
|
if (err) {
|
|
damon_destroy_ctx(damon_stat_context);
|
|
damon_stat_context = NULL;
|
|
return err;
|
|
}
|
|
|
|
damon_stat_last_refresh_jiffies = jiffies;
|
|
call_control.data = damon_stat_context;
|
|
return damon_call(damon_stat_context, &call_control);
|
|
}
|
|
|
|
static void damon_stat_stop(void)
|
|
{
|
|
damon_stop(&damon_stat_context, 1);
|
|
damon_destroy_ctx(damon_stat_context);
|
|
damon_stat_context = NULL;
|
|
}
|
|
|
|
static bool damon_stat_enabled(void)
|
|
{
|
|
if (!damon_stat_context)
|
|
return false;
|
|
return damon_is_running(damon_stat_context);
|
|
}
|
|
|
|
static int damon_stat_enabled_store(
|
|
const char *val, const struct kernel_param *kp)
|
|
{
|
|
int err;
|
|
|
|
err = kstrtobool(val, &enabled);
|
|
if (err)
|
|
return err;
|
|
|
|
if (damon_stat_enabled() == enabled)
|
|
return 0;
|
|
|
|
if (!damon_initialized())
|
|
/*
|
|
* probably called from command line parsing (parse_args()).
|
|
* Cannot call damon_new_ctx(). Let damon_stat_init() handle.
|
|
*/
|
|
return 0;
|
|
|
|
if (enabled)
|
|
return damon_stat_start();
|
|
damon_stat_stop();
|
|
return 0;
|
|
}
|
|
|
|
static int damon_stat_enabled_load(char *buffer, const struct kernel_param *kp)
|
|
{
|
|
return sprintf(buffer, "%c\n", damon_stat_enabled() ? 'Y' : 'N');
|
|
}
|
|
|
|
static int __init damon_stat_init(void)
|
|
{
|
|
int err = 0;
|
|
|
|
if (!damon_initialized()) {
|
|
err = -ENOMEM;
|
|
goto out;
|
|
}
|
|
|
|
/* probably set via command line */
|
|
if (enabled)
|
|
err = damon_stat_start();
|
|
|
|
out:
|
|
if (err && enabled)
|
|
enabled = false;
|
|
return err;
|
|
}
|
|
|
|
module_init(damon_stat_init);
|