mirror of
https://github.com/torvalds/linux.git
synced 2026-05-31 10:33:41 +02:00
zram: modernize writeback interface
The writeback interface supports a page_index=N parameter which performs
writeback of the given page. Since we rarely need to writeback just one
single page, the typical use case involves a number of writeback calls,
each performing writeback of one page:
echo page_index=100 > zram0/writeback
...
echo page_index=200 > zram0/writeback
echo page_index=500 > zram0/writeback
...
echo page_index=700 > zram0/writeback
One obvious downside of this is that it increases the number of syscalls.
Less obvious, but a significantly more important downside, is that when
given only one page to post-process zram cannot perform an optimal target
selection. This becomes a critical limitation when writeback_limit is
enabled, because under writeback_limit we want to guarantee the highest
memory savings hence we first need to writeback pages that release the
highest amount of zsmalloc pool memory.
This patch adds page_indexes=LOW-HIGH parameter to the writeback
interface:
echo page_indexes=100-200 page_indexes=500-700 > zram0/writeback
This gives zram a chance to apply an optimal target selection strategy on
each iteration of the writeback loop.
We also now permit multiple page_index parameters per call (previously
zram would recognize only one page_index) and a mix or single pages and
page ranges:
echo page_index=42 page_index=99 page_indexes=100-200 \
page_indexes=500-700 > zram0/writeback
Apart from that the patch also unifies parameters passing and resembles
other "modern" zram device attributes (e.g. recompression), while the old
interface used a mixed scheme: values-less parameters for mode and a
key=value format for page_index. We still support the "old" value-less
format for compatibility reasons.
[senozhatsky@chromium.org: simplify parse_page_index() range checks, per Brian]
nk: https://lkml.kernel.org/r/20250404015327.2427684-1-senozhatsky@chromium.org
[sozhatsky@chromium.org: fix uninitialized variable in zram_writeback_slots(), per Dan]
nk: https://lkml.kernel.org/r/20250409112611.1154282-1-senozhatsky@chromium.org
Link: https://lkml.kernel.org/r/20250327015818.4148660-1-senozhatsky@chromium.org
Signed-off-by: Sergey Senozhatsky <senozhatsky@chromium.org>
Reviewed-by: Brian Geffon <bgeffon@google.com>
Cc: Minchan Kim <minchan@kernel.org>
Cc: Richard Chang <richardycc@google.com>
Cc: Sergey Senozhatsky <senozhatsky@chromium.org>
Cc: Dan Carpenter <dan.carpenter@linaro.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
This commit is contained in:
parent
0bf19a357e
commit
cf42d4cccf
|
|
@ -369,6 +369,23 @@ they could write a page index into the interface::
|
|||
|
||||
echo "page_index=1251" > /sys/block/zramX/writeback
|
||||
|
||||
In Linux 6.16 this interface underwent some rework. First, the interface
|
||||
now supports `key=value` format for all of its parameters (`type=huge_idle`,
|
||||
etc.) Second, the support for `page_indexes` was introduced, which specify
|
||||
`LOW-HIGH` range (or ranges) of pages to be written-back. This reduces the
|
||||
number of syscalls, but more importantly this enables optimal post-processing
|
||||
target selection strategy. Usage example::
|
||||
|
||||
echo "type=idle" > /sys/block/zramX/writeback
|
||||
echo "page_indexes=1-100 page_indexes=200-300" > \
|
||||
/sys/block/zramX/writeback
|
||||
|
||||
We also now permit multiple page_index params per call and a mix of
|
||||
single pages and page ranges::
|
||||
|
||||
echo page_index=42 page_index=99 page_indexes=100-200 \
|
||||
page_indexes=500-700 > /sys/block/zramX/writeback
|
||||
|
||||
If there are lots of write IO with flash device, potentially, it has
|
||||
flash wearout problem so that admin needs to design write limitation
|
||||
to guarantee storage health for entire product life.
|
||||
|
|
|
|||
|
|
@ -734,114 +734,19 @@ static void read_from_bdev_async(struct zram *zram, struct page *page,
|
|||
submit_bio(bio);
|
||||
}
|
||||
|
||||
#define PAGE_WB_SIG "page_index="
|
||||
|
||||
#define PAGE_WRITEBACK 0
|
||||
#define HUGE_WRITEBACK (1<<0)
|
||||
#define IDLE_WRITEBACK (1<<1)
|
||||
#define INCOMPRESSIBLE_WRITEBACK (1<<2)
|
||||
|
||||
static int scan_slots_for_writeback(struct zram *zram, u32 mode,
|
||||
unsigned long nr_pages,
|
||||
unsigned long index,
|
||||
struct zram_pp_ctl *ctl)
|
||||
static int zram_writeback_slots(struct zram *zram, struct zram_pp_ctl *ctl)
|
||||
{
|
||||
for (; nr_pages != 0; index++, nr_pages--) {
|
||||
bool ok = true;
|
||||
|
||||
zram_slot_lock(zram, index);
|
||||
if (!zram_allocated(zram, index))
|
||||
goto next;
|
||||
|
||||
if (zram_test_flag(zram, index, ZRAM_WB) ||
|
||||
zram_test_flag(zram, index, ZRAM_SAME))
|
||||
goto next;
|
||||
|
||||
if (mode & IDLE_WRITEBACK &&
|
||||
!zram_test_flag(zram, index, ZRAM_IDLE))
|
||||
goto next;
|
||||
if (mode & HUGE_WRITEBACK &&
|
||||
!zram_test_flag(zram, index, ZRAM_HUGE))
|
||||
goto next;
|
||||
if (mode & INCOMPRESSIBLE_WRITEBACK &&
|
||||
!zram_test_flag(zram, index, ZRAM_INCOMPRESSIBLE))
|
||||
goto next;
|
||||
|
||||
ok = place_pp_slot(zram, ctl, index);
|
||||
next:
|
||||
zram_slot_unlock(zram, index);
|
||||
if (!ok)
|
||||
break;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static ssize_t writeback_store(struct device *dev,
|
||||
struct device_attribute *attr, const char *buf, size_t len)
|
||||
{
|
||||
struct zram *zram = dev_to_zram(dev);
|
||||
unsigned long nr_pages = zram->disksize >> PAGE_SHIFT;
|
||||
struct zram_pp_ctl *ctl = NULL;
|
||||
struct zram_pp_slot *pps;
|
||||
unsigned long index = 0;
|
||||
struct bio bio;
|
||||
struct bio_vec bio_vec;
|
||||
struct page *page = NULL;
|
||||
ssize_t ret = len;
|
||||
int mode, err;
|
||||
unsigned long blk_idx = 0;
|
||||
|
||||
if (sysfs_streq(buf, "idle"))
|
||||
mode = IDLE_WRITEBACK;
|
||||
else if (sysfs_streq(buf, "huge"))
|
||||
mode = HUGE_WRITEBACK;
|
||||
else if (sysfs_streq(buf, "huge_idle"))
|
||||
mode = IDLE_WRITEBACK | HUGE_WRITEBACK;
|
||||
else if (sysfs_streq(buf, "incompressible"))
|
||||
mode = INCOMPRESSIBLE_WRITEBACK;
|
||||
else {
|
||||
if (strncmp(buf, PAGE_WB_SIG, sizeof(PAGE_WB_SIG) - 1))
|
||||
return -EINVAL;
|
||||
|
||||
if (kstrtol(buf + sizeof(PAGE_WB_SIG) - 1, 10, &index) ||
|
||||
index >= nr_pages)
|
||||
return -EINVAL;
|
||||
|
||||
nr_pages = 1;
|
||||
mode = PAGE_WRITEBACK;
|
||||
}
|
||||
|
||||
down_read(&zram->init_lock);
|
||||
if (!init_done(zram)) {
|
||||
ret = -EINVAL;
|
||||
goto release_init_lock;
|
||||
}
|
||||
|
||||
/* Do not permit concurrent post-processing actions. */
|
||||
if (atomic_xchg(&zram->pp_in_progress, 1)) {
|
||||
up_read(&zram->init_lock);
|
||||
return -EAGAIN;
|
||||
}
|
||||
|
||||
if (!zram->backing_dev) {
|
||||
ret = -ENODEV;
|
||||
goto release_init_lock;
|
||||
}
|
||||
struct page *page = NULL;
|
||||
struct zram_pp_slot *pps;
|
||||
struct bio_vec bio_vec;
|
||||
struct bio bio;
|
||||
int ret = 0, err;
|
||||
u32 index;
|
||||
|
||||
page = alloc_page(GFP_KERNEL);
|
||||
if (!page) {
|
||||
ret = -ENOMEM;
|
||||
goto release_init_lock;
|
||||
}
|
||||
|
||||
ctl = init_pp_ctl();
|
||||
if (!ctl) {
|
||||
ret = -ENOMEM;
|
||||
goto release_init_lock;
|
||||
}
|
||||
|
||||
scan_slots_for_writeback(zram, mode, nr_pages, index, ctl);
|
||||
if (!page)
|
||||
return -ENOMEM;
|
||||
|
||||
while ((pps = select_pp_slot(ctl))) {
|
||||
spin_lock(&zram->wb_limit_lock);
|
||||
|
|
@ -929,10 +834,215 @@ static ssize_t writeback_store(struct device *dev,
|
|||
|
||||
if (blk_idx)
|
||||
free_block_bdev(zram, blk_idx);
|
||||
|
||||
release_init_lock:
|
||||
if (page)
|
||||
__free_page(page);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
#define PAGE_WRITEBACK 0
|
||||
#define HUGE_WRITEBACK (1 << 0)
|
||||
#define IDLE_WRITEBACK (1 << 1)
|
||||
#define INCOMPRESSIBLE_WRITEBACK (1 << 2)
|
||||
|
||||
static int parse_page_index(char *val, unsigned long nr_pages,
|
||||
unsigned long *lo, unsigned long *hi)
|
||||
{
|
||||
int ret;
|
||||
|
||||
ret = kstrtoul(val, 10, lo);
|
||||
if (ret)
|
||||
return ret;
|
||||
if (*lo >= nr_pages)
|
||||
return -ERANGE;
|
||||
*hi = *lo + 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int parse_page_indexes(char *val, unsigned long nr_pages,
|
||||
unsigned long *lo, unsigned long *hi)
|
||||
{
|
||||
char *delim;
|
||||
int ret;
|
||||
|
||||
delim = strchr(val, '-');
|
||||
if (!delim)
|
||||
return -EINVAL;
|
||||
|
||||
*delim = 0x00;
|
||||
ret = kstrtoul(val, 10, lo);
|
||||
if (ret)
|
||||
return ret;
|
||||
if (*lo >= nr_pages)
|
||||
return -ERANGE;
|
||||
|
||||
ret = kstrtoul(delim + 1, 10, hi);
|
||||
if (ret)
|
||||
return ret;
|
||||
if (*hi >= nr_pages || *lo > *hi)
|
||||
return -ERANGE;
|
||||
*hi += 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int parse_mode(char *val, u32 *mode)
|
||||
{
|
||||
*mode = 0;
|
||||
|
||||
if (!strcmp(val, "idle"))
|
||||
*mode = IDLE_WRITEBACK;
|
||||
if (!strcmp(val, "huge"))
|
||||
*mode = HUGE_WRITEBACK;
|
||||
if (!strcmp(val, "huge_idle"))
|
||||
*mode = IDLE_WRITEBACK | HUGE_WRITEBACK;
|
||||
if (!strcmp(val, "incompressible"))
|
||||
*mode = INCOMPRESSIBLE_WRITEBACK;
|
||||
|
||||
if (*mode == 0)
|
||||
return -EINVAL;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int scan_slots_for_writeback(struct zram *zram, u32 mode,
|
||||
unsigned long lo, unsigned long hi,
|
||||
struct zram_pp_ctl *ctl)
|
||||
{
|
||||
u32 index = lo;
|
||||
|
||||
while (index < hi) {
|
||||
bool ok = true;
|
||||
|
||||
zram_slot_lock(zram, index);
|
||||
if (!zram_allocated(zram, index))
|
||||
goto next;
|
||||
|
||||
if (zram_test_flag(zram, index, ZRAM_WB) ||
|
||||
zram_test_flag(zram, index, ZRAM_SAME))
|
||||
goto next;
|
||||
|
||||
if (mode & IDLE_WRITEBACK &&
|
||||
!zram_test_flag(zram, index, ZRAM_IDLE))
|
||||
goto next;
|
||||
if (mode & HUGE_WRITEBACK &&
|
||||
!zram_test_flag(zram, index, ZRAM_HUGE))
|
||||
goto next;
|
||||
if (mode & INCOMPRESSIBLE_WRITEBACK &&
|
||||
!zram_test_flag(zram, index, ZRAM_INCOMPRESSIBLE))
|
||||
goto next;
|
||||
|
||||
ok = place_pp_slot(zram, ctl, index);
|
||||
next:
|
||||
zram_slot_unlock(zram, index);
|
||||
if (!ok)
|
||||
break;
|
||||
index++;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static ssize_t writeback_store(struct device *dev,
|
||||
struct device_attribute *attr,
|
||||
const char *buf, size_t len)
|
||||
{
|
||||
struct zram *zram = dev_to_zram(dev);
|
||||
u64 nr_pages = zram->disksize >> PAGE_SHIFT;
|
||||
unsigned long lo = 0, hi = nr_pages;
|
||||
struct zram_pp_ctl *ctl = NULL;
|
||||
char *args, *param, *val;
|
||||
ssize_t ret = len;
|
||||
int err, mode = 0;
|
||||
|
||||
down_read(&zram->init_lock);
|
||||
if (!init_done(zram)) {
|
||||
up_read(&zram->init_lock);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
/* Do not permit concurrent post-processing actions. */
|
||||
if (atomic_xchg(&zram->pp_in_progress, 1)) {
|
||||
up_read(&zram->init_lock);
|
||||
return -EAGAIN;
|
||||
}
|
||||
|
||||
if (!zram->backing_dev) {
|
||||
ret = -ENODEV;
|
||||
goto release_init_lock;
|
||||
}
|
||||
|
||||
ctl = init_pp_ctl();
|
||||
if (!ctl) {
|
||||
ret = -ENOMEM;
|
||||
goto release_init_lock;
|
||||
}
|
||||
|
||||
args = skip_spaces(buf);
|
||||
while (*args) {
|
||||
args = next_arg(args, ¶m, &val);
|
||||
|
||||
/*
|
||||
* Workaround to support the old writeback interface.
|
||||
*
|
||||
* The old writeback interface has a minor inconsistency and
|
||||
* requires key=value only for page_index parameter, while the
|
||||
* writeback mode is a valueless parameter.
|
||||
*
|
||||
* This is not the case anymore and now all parameters are
|
||||
* required to have values, however, we need to support the
|
||||
* legacy writeback interface format so we check if we can
|
||||
* recognize a valueless parameter as the (legacy) writeback
|
||||
* mode.
|
||||
*/
|
||||
if (!val || !*val) {
|
||||
err = parse_mode(param, &mode);
|
||||
if (err) {
|
||||
ret = err;
|
||||
goto release_init_lock;
|
||||
}
|
||||
|
||||
scan_slots_for_writeback(zram, mode, lo, hi, ctl);
|
||||
break;
|
||||
}
|
||||
|
||||
if (!strcmp(param, "type")) {
|
||||
err = parse_mode(val, &mode);
|
||||
if (err) {
|
||||
ret = err;
|
||||
goto release_init_lock;
|
||||
}
|
||||
|
||||
scan_slots_for_writeback(zram, mode, lo, hi, ctl);
|
||||
break;
|
||||
}
|
||||
|
||||
if (!strcmp(param, "page_index")) {
|
||||
err = parse_page_index(val, nr_pages, &lo, &hi);
|
||||
if (err) {
|
||||
ret = err;
|
||||
goto release_init_lock;
|
||||
}
|
||||
|
||||
scan_slots_for_writeback(zram, mode, lo, hi, ctl);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!strcmp(param, "page_indexes")) {
|
||||
err = parse_page_indexes(val, nr_pages, &lo, &hi);
|
||||
if (err) {
|
||||
ret = err;
|
||||
goto release_init_lock;
|
||||
}
|
||||
|
||||
scan_slots_for_writeback(zram, mode, lo, hi, ctl);
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
err = zram_writeback_slots(zram, ctl);
|
||||
if (err)
|
||||
ret = err;
|
||||
|
||||
release_init_lock:
|
||||
release_pp_ctl(zram, ctl);
|
||||
atomic_set(&zram->pp_in_progress, 0);
|
||||
up_read(&zram->init_lock);
|
||||
|
|
|
|||
Loading…
Reference in New Issue
Block a user