linux/mm/damon/lru_sort.c
SeongJae Park b98b7ff602 mm/damon/lru_sort: detect and use fresh enabled and kdamond_pid values
DAMON_LRU_SORT updates 'enabled' and 'kdamond_pid' parameter values, which
represents the running status of its kdamond, when the user explicitly
requests start/stop of the kdamond.  The kdamond can, however, be stopped
in events other than the explicit user request in the following three
events.

1. ctx->regions_score_histogram allocation failure at beginning of the
   execution,
2. damon_commit_ctx() failure due to invalid user input, and
3. damon_commit_ctx() failure due to its internal allocation failures.

Hence, if the kdamond is stopped by the above three events, the values of
the status parameters can be stale.  Users could show the stale values and
be confused.  This is already bad, but the real consequence is worse. 
DAMON_LRU_SORT avoids unnecessary damon_start() and damon_stop() calls
based on the 'enabled' parameter value.  And the update of 'enabled'
parameter value depends on the damon_start() and damon_stop() call
results.  Hence, once the kdamond has stopped by the unintentional events,
the user cannot restart the kdamond before the system reboot.  For
example, the issue can be reproduced via below steps.

    # cd /sys/module/damon_lru_sort/parameters
    #
    # # start DAMON_LRU_SORT
    # echo Y > enabled
    # ps -ef | grep kdamond
    root         806       2  0 17:53 ?        00:00:00 [kdamond.0]
    root         808     803  0 17:53 pts/4    00:00:00 grep kdamond
    #
    # # commit wrong input to stop kdamond withou explicit stop request
    # echo 3 > addr_unit
    # echo Y > commit_inputs
    bash: echo: write error: Invalid argument
    #
    # # confirm kdamond is stopped
    # ps -ef | grep kdamond
    root         811     803  0 17:53 pts/4    00:00:00 grep kdamond
    #
    # # users casn now show stable status
    # cat enabled
    Y
    # cat kdamond_pid
    806
    #
    # # even after fixing the wrong parameter,
    # # kdamond cannot be restarted.
    # echo 1 > addr_unit
    # echo Y > enabled
    # ps -ef | grep kdamond
    root         815     803  0 17:54 pts/4    00:00:00 grep kdamond

The problem will only rarely happen in real and common setups for the
following reasons.  The allocation failures are unlikely in such setups
since those allocations are arguably too small to fail.  Also sane users
on real production environments may not commit wrong input parameters. 
But once it happens, the consequence is quite bad.  And the bug is a bug.

The issue stems from the fact that there are multiple events that can
change the status, and following all the events is challenging. 
Dynamically detect and use the fresh status for the parameters when those
are requested.

Link: https://lore.kernel.org/20260419161003.79176-3-sj@kernel.org
Fixes: 40e983cca9 ("mm/damon: introduce DAMON-based LRU-lists Sorting")
Co-developed-by: Liew Rui Yan <aethernet65535@gmail.com>
Signed-off-by: Liew Rui Yan <aethernet65535@gmail.com>
Signed-off-by: SeongJae Park <sj@kernel.org>
Cc: <stable@vger.kernel.org> # 6.0.x
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
2026-04-27 05:54:27 -07:00

519 lines
14 KiB
C

// SPDX-License-Identifier: GPL-2.0
/*
* DAMON-based LRU-lists Sorting
*
* Author: SeongJae Park <sj@kernel.org>
*/
#define pr_fmt(fmt) "damon-lru-sort: " fmt
#include <linux/damon.h>
#include <linux/kstrtox.h>
#include <linux/module.h>
#include "modules-common.h"
#ifdef MODULE_PARAM_PREFIX
#undef MODULE_PARAM_PREFIX
#endif
#define MODULE_PARAM_PREFIX "damon_lru_sort."
/*
* Enable or disable DAMON_LRU_SORT.
*
* You can enable DAMON_LRU_SORT by setting the value of this parameter as
* ``Y``. Setting it as ``N`` disables DAMON_LRU_SORT. Note that
* DAMON_LRU_SORT could do no real monitoring and LRU-lists sorting due to the
* watermarks-based activation condition. Refer to below descriptions for the
* watermarks parameter for this.
*/
static bool enabled __read_mostly;
/*
* Make DAMON_LRU_SORT reads the input parameters again, except ``enabled``.
*
* Input parameters that updated while DAMON_LRU_SORT is running are not
* applied by default. Once this parameter is set as ``Y``, DAMON_LRU_SORT
* reads values of parameters except ``enabled`` again. Once the re-reading is
* done, this parameter is set as ``N``. If invalid parameters are found while
* the re-reading, DAMON_LRU_SORT will be disabled.
*/
static bool commit_inputs __read_mostly;
module_param(commit_inputs, bool, 0600);
/*
* Desired active to [in]active memory ratio in bp (1/10,000).
*
* While keeping the caps that set by other quotas, DAMON_LRU_SORT
* automatically increases and decreases the effective level of the quota
* aiming the LRU [de]prioritizations of the hot and cold memory resulting in
* this active to [in]active memory ratio. Value zero means disabling this
* auto-tuning feature.
*
* Disabled by default.
*/
static unsigned long active_mem_bp __read_mostly;
module_param(active_mem_bp, ulong, 0600);
/*
* Auto-tune monitoring intervals.
*
* If this parameter is set as ``Y``, DAMON_LRU_SORT automatically tunes
* DAMON's sampling and aggregation intervals. The auto-tuning aims to capture
* meaningful amount of access events in each DAMON-snapshot, while keeping the
* sampling interval 5 milliseconds in minimum, and 10 seconds in maximum.
* Setting this as ``N`` disables the auto-tuning.
*
* Disabled by default.
*/
static bool autotune_monitoring_intervals __read_mostly;
module_param(autotune_monitoring_intervals, bool, 0600);
/*
* Filter [non-]young pages accordingly for LRU [de]prioritizations.
*
* If this is set, check page level access (youngness) once again before each
* LRU [de]prioritization operation. LRU prioritization operation is skipped
* if the page has not accessed since the last check (not young). LRU
* deprioritization operation is skipped if the page has accessed since the
* last check (young). The feature is enabled or disabled if this parameter is
* set as ``Y`` or ``N``, respectively.
*
* Disabled by default.
*/
static bool filter_young_pages __read_mostly;
module_param(filter_young_pages, bool, 0600);
/*
* Access frequency threshold for hot memory regions identification in permil.
*
* If a memory region is accessed in frequency of this or higher,
* DAMON_LRU_SORT identifies the region as hot, and mark it as accessed on the
* LRU list, so that it could not be reclaimed under memory pressure. 50% by
* default.
*/
static unsigned long hot_thres_access_freq = 500;
module_param(hot_thres_access_freq, ulong, 0600);
/*
* Time threshold for cold memory regions identification in microseconds.
*
* If a memory region is not accessed for this or longer time, DAMON_LRU_SORT
* identifies the region as cold, and mark it as unaccessed on the LRU list, so
* that it could be reclaimed first under memory pressure. 120 seconds by
* default.
*/
static unsigned long cold_min_age __read_mostly = 120000000;
module_param(cold_min_age, ulong, 0600);
static struct damos_quota damon_lru_sort_quota = {
/* Use up to 10 ms per 1 sec, by default */
.ms = 10,
.sz = 0,
.reset_interval = 1000,
/* Within the quota, mark hotter regions accessed first. */
.weight_sz = 0,
.weight_nr_accesses = 1,
.weight_age = 1,
};
DEFINE_DAMON_MODULES_DAMOS_TIME_QUOTA(damon_lru_sort_quota);
static struct damos_watermarks damon_lru_sort_wmarks = {
.metric = DAMOS_WMARK_FREE_MEM_RATE,
.interval = 5000000, /* 5 seconds */
.high = 200, /* 20 percent */
.mid = 150, /* 15 percent */
.low = 50, /* 5 percent */
};
DEFINE_DAMON_MODULES_WMARKS_PARAMS(damon_lru_sort_wmarks);
static struct damon_attrs damon_lru_sort_mon_attrs = {
.sample_interval = 5000, /* 5 ms */
.aggr_interval = 100000, /* 100 ms */
.ops_update_interval = 0,
.min_nr_regions = 10,
.max_nr_regions = 1000,
};
DEFINE_DAMON_MODULES_MON_ATTRS_PARAMS(damon_lru_sort_mon_attrs);
/*
* Start of the target memory region in physical address.
*
* The start physical address of memory region that DAMON_LRU_SORT will do work
* against. By default, biggest System RAM is used as the region.
*/
static unsigned long monitor_region_start __read_mostly;
module_param(monitor_region_start, ulong, 0600);
/*
* End of the target memory region in physical address.
*
* The end physical address of memory region that DAMON_LRU_SORT will do work
* against. By default, biggest System RAM is used as the region.
*/
static unsigned long monitor_region_end __read_mostly;
module_param(monitor_region_end, ulong, 0600);
/*
* Scale factor for DAMON_LRU_SORT to ops address conversion.
*
* This parameter must not be set to 0.
*/
static unsigned long addr_unit __read_mostly = 1;
static struct damos_stat damon_lru_sort_hot_stat;
DEFINE_DAMON_MODULES_DAMOS_STATS_PARAMS(damon_lru_sort_hot_stat,
lru_sort_tried_hot_regions, lru_sorted_hot_regions,
hot_quota_exceeds);
static struct damos_stat damon_lru_sort_cold_stat;
DEFINE_DAMON_MODULES_DAMOS_STATS_PARAMS(damon_lru_sort_cold_stat,
lru_sort_tried_cold_regions, lru_sorted_cold_regions,
cold_quota_exceeds);
static struct damos_access_pattern damon_lru_sort_stub_pattern = {
/* Find regions having PAGE_SIZE or larger size */
.min_sz_region = PAGE_SIZE,
.max_sz_region = ULONG_MAX,
/* no matter its access frequency */
.min_nr_accesses = 0,
.max_nr_accesses = UINT_MAX,
/* no matter its age */
.min_age_region = 0,
.max_age_region = UINT_MAX,
};
static struct damon_ctx *ctx;
static struct damon_target *target;
static struct damos *damon_lru_sort_new_scheme(
struct damos_access_pattern *pattern, enum damos_action action)
{
struct damos_quota quota = damon_lru_sort_quota;
/* Use half of total quota for hot/cold pages sorting */
quota.ms = quota.ms / 2;
return damon_new_scheme(
/* find the pattern, and */
pattern,
/* (de)prioritize on LRU-lists */
action,
/* for each aggregation interval */
0,
/* under the quota. */
&quota,
/* (De)activate this according to the watermarks. */
&damon_lru_sort_wmarks,
NUMA_NO_NODE);
}
/* Create a DAMON-based operation scheme for hot memory regions */
static struct damos *damon_lru_sort_new_hot_scheme(unsigned int hot_thres)
{
struct damos_access_pattern pattern = damon_lru_sort_stub_pattern;
pattern.min_nr_accesses = hot_thres;
return damon_lru_sort_new_scheme(&pattern, DAMOS_LRU_PRIO);
}
/* Create a DAMON-based operation scheme for cold memory regions */
static struct damos *damon_lru_sort_new_cold_scheme(unsigned int cold_thres)
{
struct damos_access_pattern pattern = damon_lru_sort_stub_pattern;
pattern.max_nr_accesses = 0;
pattern.min_age_region = cold_thres;
return damon_lru_sort_new_scheme(&pattern, DAMOS_LRU_DEPRIO);
}
static int damon_lru_sort_add_quota_goals(struct damos *hot_scheme,
struct damos *cold_scheme)
{
struct damos_quota_goal *goal;
if (!active_mem_bp)
return 0;
goal = damos_new_quota_goal(DAMOS_QUOTA_ACTIVE_MEM_BP, active_mem_bp);
if (!goal)
return -ENOMEM;
damos_add_quota_goal(&hot_scheme->quota, goal);
/* aim 0.2 % goal conflict, to keep little ping pong */
goal = damos_new_quota_goal(DAMOS_QUOTA_INACTIVE_MEM_BP,
10000 - active_mem_bp + 2);
if (!goal)
return -ENOMEM;
damos_add_quota_goal(&cold_scheme->quota, goal);
return 0;
}
static int damon_lru_sort_add_filters(struct damos *hot_scheme,
struct damos *cold_scheme)
{
struct damos_filter *filter;
if (!filter_young_pages)
return 0;
/* disallow prioritizing not-young pages */
filter = damos_new_filter(DAMOS_FILTER_TYPE_YOUNG, false, false);
if (!filter)
return -ENOMEM;
damos_add_filter(hot_scheme, filter);
/* disabllow de-prioritizing young pages */
filter = damos_new_filter(DAMOS_FILTER_TYPE_YOUNG, true, false);
if (!filter)
return -ENOMEM;
damos_add_filter(cold_scheme, filter);
return 0;
}
static int damon_lru_sort_apply_parameters(void)
{
struct damon_ctx *param_ctx;
struct damon_target *param_target;
struct damon_attrs attrs;
struct damos *hot_scheme, *cold_scheme;
unsigned int hot_thres, cold_thres;
int err;
err = damon_modules_new_paddr_ctx_target(&param_ctx, &param_target);
if (err)
return err;
param_ctx->addr_unit = addr_unit;
param_ctx->min_region_sz = max(DAMON_MIN_REGION_SZ / addr_unit, 1);
if (!damon_lru_sort_mon_attrs.sample_interval) {
err = -EINVAL;
goto out;
}
attrs = damon_lru_sort_mon_attrs;
if (autotune_monitoring_intervals) {
attrs.sample_interval = 5000;
attrs.aggr_interval = 100000;
attrs.intervals_goal.access_bp = 40;
attrs.intervals_goal.aggrs = 3;
attrs.intervals_goal.min_sample_us = 5000;
attrs.intervals_goal.max_sample_us = 10 * 1000 * 1000;
}
err = damon_set_attrs(param_ctx, &attrs);
if (err)
goto out;
err = -ENOMEM;
hot_thres = damon_max_nr_accesses(&attrs) *
hot_thres_access_freq / 1000;
hot_scheme = damon_lru_sort_new_hot_scheme(hot_thres);
if (!hot_scheme)
goto out;
cold_thres = cold_min_age / attrs.aggr_interval;
cold_scheme = damon_lru_sort_new_cold_scheme(cold_thres);
if (!cold_scheme) {
damon_destroy_scheme(hot_scheme);
goto out;
}
damon_set_schemes(param_ctx, &hot_scheme, 1);
damon_add_scheme(param_ctx, cold_scheme);
err = damon_lru_sort_add_quota_goals(hot_scheme, cold_scheme);
if (err)
goto out;
err = damon_lru_sort_add_filters(hot_scheme, cold_scheme);
if (err)
goto out;
err = damon_set_region_biggest_system_ram_default(param_target,
&monitor_region_start,
&monitor_region_end,
param_ctx->addr_unit,
param_ctx->min_region_sz);
if (err)
goto out;
err = damon_commit_ctx(ctx, param_ctx);
out:
damon_destroy_ctx(param_ctx);
return err;
}
static int damon_lru_sort_handle_commit_inputs(void)
{
int err;
if (!commit_inputs)
return 0;
err = damon_lru_sort_apply_parameters();
commit_inputs = false;
return err;
}
static int damon_lru_sort_damon_call_fn(void *arg)
{
struct damon_ctx *c = arg;
struct damos *s;
/* update the stats parameter */
damon_for_each_scheme(s, c) {
if (s->action == DAMOS_LRU_PRIO)
damon_lru_sort_hot_stat = s->stat;
else if (s->action == DAMOS_LRU_DEPRIO)
damon_lru_sort_cold_stat = s->stat;
}
return damon_lru_sort_handle_commit_inputs();
}
static struct damon_call_control call_control = {
.fn = damon_lru_sort_damon_call_fn,
.repeat = true,
};
static int damon_lru_sort_turn(bool on)
{
int err;
if (!on)
return damon_stop(&ctx, 1);
err = damon_lru_sort_apply_parameters();
if (err)
return err;
err = damon_start(&ctx, 1, true);
if (err)
return err;
return damon_call(ctx, &call_control);
}
static int damon_lru_sort_addr_unit_store(const char *val,
const struct kernel_param *kp)
{
unsigned long input_addr_unit;
int err = kstrtoul(val, 0, &input_addr_unit);
if (err)
return err;
if (!input_addr_unit)
return -EINVAL;
addr_unit = input_addr_unit;
return 0;
}
static const struct kernel_param_ops addr_unit_param_ops = {
.set = damon_lru_sort_addr_unit_store,
.get = param_get_ulong,
};
module_param_cb(addr_unit, &addr_unit_param_ops, &addr_unit, 0600);
MODULE_PARM_DESC(addr_unit,
"Scale factor for DAMON_LRU_SORT to ops address conversion (default: 1)");
static bool damon_lru_sort_enabled(void)
{
if (!ctx)
return false;
return damon_is_running(ctx);
}
static int damon_lru_sort_enabled_store(const char *val,
const struct kernel_param *kp)
{
int err;
err = kstrtobool(val, &enabled);
if (err)
return err;
if (damon_lru_sort_enabled() == enabled)
return 0;
/* Called before init function. The function will handle this. */
if (!damon_initialized())
return 0;
return damon_lru_sort_turn(enabled);
}
static int damon_lru_sort_enabled_load(char *buffer,
const struct kernel_param *kp)
{
return sprintf(buffer, "%c\n", damon_lru_sort_enabled() ? 'Y' : 'N');
}
static const struct kernel_param_ops enabled_param_ops = {
.set = damon_lru_sort_enabled_store,
.get = damon_lru_sort_enabled_load,
};
module_param_cb(enabled, &enabled_param_ops, &enabled, 0600);
MODULE_PARM_DESC(enabled,
"Enable or disable DAMON_LRU_SORT (default: disabled)");
static int damon_lru_sort_kdamond_pid_store(const char *val,
const struct kernel_param *kp)
{
/*
* kdamond_pid is read-only, but kernel command line could write it.
* Do nothing here.
*/
return 0;
}
static int damon_lru_sort_kdamond_pid_load(char *buffer,
const struct kernel_param *kp)
{
int kdamond_pid = -1;
if (ctx) {
kdamond_pid = damon_kdamond_pid(ctx);
if (kdamond_pid < 0)
kdamond_pid = -1;
}
return sprintf(buffer, "%d\n", kdamond_pid);
}
static const struct kernel_param_ops kdamond_pid_param_ops = {
.set = damon_lru_sort_kdamond_pid_store,
.get = damon_lru_sort_kdamond_pid_load,
};
/*
* PID of the DAMON thread
*
* If DAMON_LRU_SORT is enabled, this becomes the PID of the worker thread.
* Else, -1.
*/
module_param_cb(kdamond_pid, &kdamond_pid_param_ops, NULL, 0400);
static int __init damon_lru_sort_init(void)
{
int err;
if (!damon_initialized()) {
err = -ENOMEM;
goto out;
}
err = damon_modules_new_paddr_ctx_target(&ctx, &target);
if (err)
goto out;
call_control.data = ctx;
/* 'enabled' has set before this function, probably via command line */
if (enabled)
err = damon_lru_sort_turn(true);
out:
if (err && enabled)
enabled = false;
return err;
}
module_init(damon_lru_sort_init);