FROMLIST: f2fs: early updates queued for v4.18-rc1

Cherry-picked from:
  origin/upstream-f2fs-stable-linux-4.4.y

85d2070f60 ("f2fs: turn down IO priority of discard from background")
4738f527db ("f2fs: don't split checkpoint in fstrim")
31e2713935 ("f2fs: issue discard commands proactively in high fs utilization")
70676ef736 ("f2fs: add fsync_mode=nobarrier for non-atomic files")
bb53d06b5f ("f2fs: let fstrim issue discard commands in lower priority")

Signed-off-by: Jaegeuk Kim <jaegeuk@google.com>
This commit is contained in:
Jaegeuk Kim 2018-05-24 13:57:26 -07:00 committed by Amit Pundir
parent 0231b85fc8
commit a89a2131dc
7 changed files with 148 additions and 137 deletions

View File

@ -101,6 +101,7 @@ Date: February 2015
Contact: "Jaegeuk Kim" <jaegeuk@kernel.org>
Description:
Controls the trimming rate in batch mode.
<deprecated>
What: /sys/fs/f2fs/<disk>/cp_interval
Date: October 2015

View File

@ -180,13 +180,15 @@ whint_mode=%s Control which write hints are passed down to block
passes down hints with its policy.
alloc_mode=%s Adjust block allocation policy, which supports "reuse"
and "default".
fsync_mode=%s Control the policy of fsync. Currently supports "posix"
and "strict". In "posix" mode, which is default, fsync
will follow POSIX semantics and does a light operation
to improve the filesystem performance. In "strict" mode,
fsync will be heavy and behaves in line with xfs, ext4
and btrfs, where xfstest generic/342 will pass, but the
performance will regress.
fsync_mode=%s Control the policy of fsync. Currently supports "posix",
"strict", and "nobarrier". In "posix" mode, which is
default, fsync will follow POSIX semantics and does a
light operation to improve the filesystem performance.
In "strict" mode, fsync will be heavy and behaves in line
with xfs, ext4 and btrfs, where xfstest generic/342 will
pass, but the performance will regress. "nobarrier" is
based on "posix", but doesn't issue flush command for
non-atomic files likewise "nobarrier" mount option.
test_dummy_encryption Enable dummy encryption, which provides a fake fscrypt
context. The fake fscrypt context is used by xfstests.

View File

@ -238,15 +238,12 @@ enum {
#define CP_DISCARD 0x00000010
#define CP_TRIMMED 0x00000020
#define DEF_BATCHED_TRIM_SECTIONS 2048
#define BATCHED_TRIM_SEGMENTS(sbi) \
(GET_SEG_FROM_SEC(sbi, SM_I(sbi)->trim_sections))
#define BATCHED_TRIM_BLOCKS(sbi) \
(BATCHED_TRIM_SEGMENTS(sbi) << (sbi)->log_blocks_per_seg)
#define MAX_DISCARD_BLOCKS(sbi) BLKS_PER_SEC(sbi)
#define DEF_MAX_DISCARD_REQUEST 8 /* issue 8 discards per round */
#define DEF_MAX_DISCARD_LEN 512 /* Max. 2MB per discard */
#define DEF_MIN_DISCARD_ISSUE_TIME 50 /* 50 ms, if exists */
#define DEF_MAX_DISCARD_ISSUE_TIME 60000 /* 60 s, if no candidates */
#define DEF_DISCARD_URGENT_UTIL 80 /* do more discard over 80% */
#define DEF_CP_INTERVAL 60 /* 60 secs */
#define DEF_IDLE_INTERVAL 5 /* 5 secs */
@ -753,7 +750,8 @@ static inline void set_extent_info(struct extent_info *ei, unsigned int fofs,
static inline bool __is_discard_mergeable(struct discard_info *back,
struct discard_info *front)
{
return back->lstart + back->len == front->lstart;
return (back->lstart + back->len == front->lstart) &&
(back->len + front->len < DEF_MAX_DISCARD_LEN);
}
static inline bool __is_discard_back_mergeable(struct discard_info *cur,
@ -1139,6 +1137,7 @@ enum {
enum fsync_mode {
FSYNC_MODE_POSIX, /* fsync follows posix semantics */
FSYNC_MODE_STRICT, /* fsync behaves in line with ext4 */
FSYNC_MODE_NOBARRIER, /* fsync behaves nobarrier based on posix */
};
#ifdef CONFIG_F2FS_FS_ENCRYPTION
@ -2853,8 +2852,6 @@ int f2fs_flush_device_cache(struct f2fs_sb_info *sbi);
void destroy_flush_cmd_control(struct f2fs_sb_info *sbi, bool free);
void invalidate_blocks(struct f2fs_sb_info *sbi, block_t addr);
bool is_checkpointed_data(struct f2fs_sb_info *sbi, block_t blkaddr);
void init_discard_policy(struct discard_policy *dpolicy, int discard_type,
unsigned int granularity);
void drop_discard_cmd(struct f2fs_sb_info *sbi);
void stop_discard_thread(struct f2fs_sb_info *sbi);
bool f2fs_wait_discard_bios(struct f2fs_sb_info *sbi);

View File

@ -309,7 +309,7 @@ static int f2fs_do_sync_file(struct file *file, loff_t start, loff_t end,
remove_ino_entry(sbi, ino, APPEND_INO);
clear_inode_flag(inode, FI_APPEND_WRITE);
flush_out:
if (!atomic)
if (!atomic && F2FS_OPTION(sbi).fsync_mode != FSYNC_MODE_NOBARRIER)
ret = f2fs_issue_flush(sbi, inode->i_ino);
if (!ret) {
remove_ino_entry(sbi, ino, UPDATE_INO);

View File

@ -996,6 +996,39 @@ static void __check_sit_bitmap(struct f2fs_sb_info *sbi,
#endif
}
static void __init_discard_policy(struct f2fs_sb_info *sbi,
struct discard_policy *dpolicy,
int discard_type, unsigned int granularity)
{
/* common policy */
dpolicy->type = discard_type;
dpolicy->sync = true;
dpolicy->granularity = granularity;
dpolicy->max_requests = DEF_MAX_DISCARD_REQUEST;
dpolicy->io_aware_gran = MAX_PLIST_NUM;
if (discard_type == DPOLICY_BG) {
dpolicy->min_interval = DEF_MIN_DISCARD_ISSUE_TIME;
dpolicy->max_interval = DEF_MAX_DISCARD_ISSUE_TIME;
dpolicy->io_aware = true;
dpolicy->sync = false;
if (utilization(sbi) > DEF_DISCARD_URGENT_UTIL) {
dpolicy->granularity = 1;
dpolicy->max_interval = DEF_MIN_DISCARD_ISSUE_TIME;
}
} else if (discard_type == DPOLICY_FORCE) {
dpolicy->min_interval = DEF_MIN_DISCARD_ISSUE_TIME;
dpolicy->max_interval = DEF_MAX_DISCARD_ISSUE_TIME;
dpolicy->io_aware = false;
} else if (discard_type == DPOLICY_FSTRIM) {
dpolicy->io_aware = false;
} else if (discard_type == DPOLICY_UMOUNT) {
dpolicy->io_aware = false;
}
}
/* this function is copied from blkdev_issue_discard from block/blk-lib.c */
static void __submit_discard_cmd(struct f2fs_sb_info *sbi,
struct discard_policy *dpolicy,
@ -1210,68 +1243,6 @@ static int __queue_discard_cmd(struct f2fs_sb_info *sbi,
return 0;
}
static void __issue_discard_cmd_range(struct f2fs_sb_info *sbi,
struct discard_policy *dpolicy,
unsigned int start, unsigned int end)
{
struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
struct discard_cmd *prev_dc = NULL, *next_dc = NULL;
struct rb_node **insert_p = NULL, *insert_parent = NULL;
struct discard_cmd *dc;
struct blk_plug plug;
int issued;
next:
issued = 0;
mutex_lock(&dcc->cmd_lock);
f2fs_bug_on(sbi, !__check_rb_tree_consistence(sbi, &dcc->root));
dc = (struct discard_cmd *)__lookup_rb_tree_ret(&dcc->root,
NULL, start,
(struct rb_entry **)&prev_dc,
(struct rb_entry **)&next_dc,
&insert_p, &insert_parent, true);
if (!dc)
dc = next_dc;
blk_start_plug(&plug);
while (dc && dc->lstart <= end) {
struct rb_node *node;
if (dc->len < dpolicy->granularity)
goto skip;
if (dc->state != D_PREP) {
list_move_tail(&dc->list, &dcc->fstrim_list);
goto skip;
}
__submit_discard_cmd(sbi, dpolicy, dc);
if (++issued >= dpolicy->max_requests) {
start = dc->lstart + dc->len;
blk_finish_plug(&plug);
mutex_unlock(&dcc->cmd_lock);
schedule();
goto next;
}
skip:
node = rb_next(&dc->rb_node);
dc = rb_entry_safe(node, struct discard_cmd, rb_node);
if (fatal_signal_pending(current))
break;
}
blk_finish_plug(&plug);
mutex_unlock(&dcc->cmd_lock);
}
static int __issue_discard_cmd(struct f2fs_sb_info *sbi,
struct discard_policy *dpolicy)
{
@ -1412,7 +1383,18 @@ static unsigned int __wait_discard_cmd_range(struct f2fs_sb_info *sbi,
static void __wait_all_discard_cmd(struct f2fs_sb_info *sbi,
struct discard_policy *dpolicy)
{
__wait_discard_cmd_range(sbi, dpolicy, 0, UINT_MAX);
struct discard_policy dp;
if (dpolicy) {
__wait_discard_cmd_range(sbi, dpolicy, 0, UINT_MAX);
return;
}
/* wait all */
__init_discard_policy(sbi, &dp, DPOLICY_FSTRIM, 1);
__wait_discard_cmd_range(sbi, &dp, 0, UINT_MAX);
__init_discard_policy(sbi, &dp, DPOLICY_UMOUNT, 1);
__wait_discard_cmd_range(sbi, &dp, 0, UINT_MAX);
}
/* This should be covered by global mutex, &sit_i->sentry_lock */
@ -1457,11 +1439,13 @@ bool f2fs_wait_discard_bios(struct f2fs_sb_info *sbi)
struct discard_policy dpolicy;
bool dropped;
init_discard_policy(&dpolicy, DPOLICY_UMOUNT, dcc->discard_granularity);
__init_discard_policy(sbi, &dpolicy, DPOLICY_UMOUNT,
dcc->discard_granularity);
__issue_discard_cmd(sbi, &dpolicy);
dropped = __drop_discard_cmd(sbi);
__wait_all_discard_cmd(sbi, &dpolicy);
/* just to make sure there is no pending discard commands */
__wait_all_discard_cmd(sbi, NULL);
return dropped;
}
@ -1477,7 +1461,7 @@ static int issue_discard_thread(void *data)
set_freezable();
do {
init_discard_policy(&dpolicy, DPOLICY_BG,
__init_discard_policy(sbi, &dpolicy, DPOLICY_BG,
dcc->discard_granularity);
wait_event_interruptible_timeout(*q,
@ -1495,7 +1479,7 @@ static int issue_discard_thread(void *data)
dcc->discard_wake = 0;
if (sbi->gc_thread && sbi->gc_thread->gc_urgent)
init_discard_policy(&dpolicy, DPOLICY_FORCE, 1);
__init_discard_policy(sbi, &dpolicy, DPOLICY_FORCE, 1);
sb_start_intwrite(sbi->sb);
@ -1788,32 +1772,6 @@ void clear_prefree_segments(struct f2fs_sb_info *sbi, struct cp_control *cpc)
wake_up_discard_thread(sbi, false);
}
void init_discard_policy(struct discard_policy *dpolicy,
int discard_type, unsigned int granularity)
{
/* common policy */
dpolicy->type = discard_type;
dpolicy->sync = true;
dpolicy->granularity = granularity;
dpolicy->max_requests = DEF_MAX_DISCARD_REQUEST;
dpolicy->io_aware_gran = MAX_PLIST_NUM;
if (discard_type == DPOLICY_BG) {
dpolicy->min_interval = DEF_MIN_DISCARD_ISSUE_TIME;
dpolicy->max_interval = DEF_MAX_DISCARD_ISSUE_TIME;
dpolicy->io_aware = true;
} else if (discard_type == DPOLICY_FORCE) {
dpolicy->min_interval = DEF_MIN_DISCARD_ISSUE_TIME;
dpolicy->max_interval = DEF_MAX_DISCARD_ISSUE_TIME;
dpolicy->io_aware = false;
} else if (discard_type == DPOLICY_FSTRIM) {
dpolicy->io_aware = false;
} else if (discard_type == DPOLICY_UMOUNT) {
dpolicy->io_aware = false;
}
}
static int create_discard_cmd_control(struct f2fs_sb_info *sbi)
{
dev_t dev = sbi->sb->s_bdev->bd_dev;
@ -2453,11 +2411,72 @@ bool exist_trim_candidates(struct f2fs_sb_info *sbi, struct cp_control *cpc)
return has_candidate;
}
static void __issue_discard_cmd_range(struct f2fs_sb_info *sbi,
struct discard_policy *dpolicy,
unsigned int start, unsigned int end)
{
struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
struct discard_cmd *prev_dc = NULL, *next_dc = NULL;
struct rb_node **insert_p = NULL, *insert_parent = NULL;
struct discard_cmd *dc;
struct blk_plug plug;
int issued;
next:
issued = 0;
mutex_lock(&dcc->cmd_lock);
f2fs_bug_on(sbi, !__check_rb_tree_consistence(sbi, &dcc->root));
dc = (struct discard_cmd *)__lookup_rb_tree_ret(&dcc->root,
NULL, start,
(struct rb_entry **)&prev_dc,
(struct rb_entry **)&next_dc,
&insert_p, &insert_parent, true);
if (!dc)
dc = next_dc;
blk_start_plug(&plug);
while (dc && dc->lstart <= end) {
struct rb_node *node;
if (dc->len < dpolicy->granularity)
goto skip;
if (dc->state != D_PREP) {
list_move_tail(&dc->list, &dcc->fstrim_list);
goto skip;
}
__submit_discard_cmd(sbi, dpolicy, dc);
if (++issued >= dpolicy->max_requests) {
start = dc->lstart + dc->len;
blk_finish_plug(&plug);
mutex_unlock(&dcc->cmd_lock);
__wait_all_discard_cmd(sbi, NULL);
congestion_wait(BLK_RW_ASYNC, HZ/50);
goto next;
}
skip:
node = rb_next(&dc->rb_node);
dc = rb_entry_safe(node, struct discard_cmd, rb_node);
if (fatal_signal_pending(current))
break;
}
blk_finish_plug(&plug);
mutex_unlock(&dcc->cmd_lock);
}
int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range)
{
__u64 start = F2FS_BYTES_TO_BLK(range->start);
__u64 end = start + F2FS_BYTES_TO_BLK(range->len) - 1;
unsigned int start_segno, end_segno, cur_segno;
unsigned int start_segno, end_segno;
block_t start_block, end_block;
struct cp_control cpc;
struct discard_policy dpolicy;
@ -2483,40 +2502,27 @@ int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range)
cpc.reason = CP_DISCARD;
cpc.trim_minlen = max_t(__u64, 1, F2FS_BYTES_TO_BLK(range->minlen));
cpc.trim_start = start_segno;
cpc.trim_end = end_segno;
/* do checkpoint to issue discard commands safely */
for (cur_segno = start_segno; cur_segno <= end_segno;
cur_segno = cpc.trim_end + 1) {
cpc.trim_start = cur_segno;
if (sbi->discard_blks == 0)
goto out;
if (sbi->discard_blks == 0)
break;
else if (sbi->discard_blks < BATCHED_TRIM_BLOCKS(sbi))
cpc.trim_end = end_segno;
else
cpc.trim_end = min_t(unsigned int,
rounddown(cur_segno +
BATCHED_TRIM_SEGMENTS(sbi),
sbi->segs_per_sec) - 1, end_segno);
mutex_lock(&sbi->gc_mutex);
err = write_checkpoint(sbi, &cpc);
mutex_unlock(&sbi->gc_mutex);
if (err)
break;
schedule();
}
mutex_lock(&sbi->gc_mutex);
err = write_checkpoint(sbi, &cpc);
mutex_unlock(&sbi->gc_mutex);
if (err)
goto out;
start_block = START_BLOCK(sbi, start_segno);
end_block = START_BLOCK(sbi, min(cur_segno, end_segno) + 1);
end_block = START_BLOCK(sbi, end_segno + 1);
init_discard_policy(&dpolicy, DPOLICY_FSTRIM, cpc.trim_minlen);
__init_discard_policy(sbi, &dpolicy, DPOLICY_FSTRIM, cpc.trim_minlen);
__issue_discard_cmd_range(sbi, &dpolicy, start_block, end_block);
trimmed = __wait_discard_cmd_range(sbi, &dpolicy,
start_block, end_block);
out:
range->len = F2FS_BLK_TO_BYTES(trimmed);
out:
return err;
}
@ -3904,8 +3910,6 @@ int build_segment_manager(struct f2fs_sb_info *sbi)
sm_info->min_hot_blocks = DEF_MIN_HOT_BLOCKS;
sm_info->min_ssr_sections = reserved_sections(sbi);
sm_info->trim_sections = DEF_BATCHED_TRIM_SECTIONS;
INIT_LIST_HEAD(&sm_info->sit_entry_set);
init_rwsem(&sm_info->curseg_lock);

View File

@ -740,6 +740,10 @@ static int parse_options(struct super_block *sb, char *options)
} else if (strlen(name) == 6 &&
!strncmp(name, "strict", 6)) {
F2FS_OPTION(sbi).fsync_mode = FSYNC_MODE_STRICT;
} else if (strlen(name) == 9 &&
!strncmp(name, "nobarrier", 9)) {
F2FS_OPTION(sbi).fsync_mode =
FSYNC_MODE_NOBARRIER;
} else {
kfree(name);
return -EINVAL;

View File

@ -245,6 +245,9 @@ static ssize_t f2fs_sbi_store(struct f2fs_attr *a,
return count;
}
if (!strcmp(a->attr.name, "trim_sections"))
return -EINVAL;
*ui = t;
if (!strcmp(a->attr.name, "iostat_enable") && *ui == 0)