bcachefs fixes for 6.15

Small stuff, main ones users will be interested in:
 
 - Couple more casefolding fixes; we can now detect and repair casefolded
   dirents in non-casefolded dir and vice versa
 - Fix for massive write inflation with mmapped io, which hit certain
   databases
 -----BEGIN PGP SIGNATURE-----
 
 iQIzBAABCgAdFiEEKnAFLkS8Qha+jvQrE6szbY3KbnYFAmgvz74ACgkQE6szbY3K
 bnY2mw/+Ioz+7dQz4vw2DY48o6Qn9jGst1XANOb2PQD/1RaEcnJF2d8C5gmjxVN6
 ysl/TQf+BHLkiXb6TTOgXVgtLjGBMOk3NNHrv8ebthacFcIfPC7r/cE5KmECRPiT
 0Fs6KTDeQscjLzg8YHI1PHnJ6EU/uYy76wK1nIcF5VJxLYSH6UQ2ZAo7NT0sAc2K
 PyCsvmOb8mev3wrCHgYYUWZyS0Cla6a44bpG83km5NhIC4dcrL8On8TO2E2VwVpX
 A9otPSLmmdIuJd+Q6nUNiEFOVcdtZn/rH9jz9QWokH/kpdoqCysZneTHXEy0kVCG
 NAKuEKv/mv+cQ45pn+4JhWf+FaLih/46w+C/10gNWShHoqdxIZFYOlb3S5ZCvK7L
 ij5zlcQF/ZrT7bZJTov1J6SUM9LShgTvt4gjqcaiokb2BBYqLn6u/TBaNgW9wu0Q
 4BiYfdO/BfCaLiBvIz2WuMmUfh2i+rBYjetW0R+Sq8f/vFfFTw7tWipO/u4zrE7J
 D5FVT9+XGwAvRULk1/jG5df9ZsF/g4L2CMdQuWh/8wXfTp/GrKirFN5x6Ywkz6ew
 rb/m/ukSOLLUcRBJCapZmDrRwBnaQ0MTFi6z8qNb125ENR0WwbYkV0XdkBOi3U/g
 1OM2eC/SWOrWkNXl4mLMv0MUSLEsVyuYUtf75FvVm4zNH9Spqos=
 =a06t
 -----END PGP SIGNATURE-----

Merge tag 'bcachefs-2025-05-22' of git://evilpiepirate.org/bcachefs

Pull bcachefs fixes from Kent Overstreet:
 "Small stuff, main ones users will be interested in:

   - Couple more casefolding fixes; we can now detect and repair
     casefolded dirents in non-casefolded dir and vice versa

   - Fix for massive write inflation with mmapped io, which hit certain
     databases"

* tag 'bcachefs-2025-05-22' of git://evilpiepirate.org/bcachefs:
  bcachefs: Check for casefolded dirents in non casefolded dirs
  bcachefs: Fix bch2_dirent_create_snapshot() for casefolding
  bcachefs: Fix casefold opt via xattr interface
  bcachefs: mkwrite() now only dirties one page
  bcachefs: fix extent_has_stripe_ptr()
  bcachefs: Fix bch2_btree_path_traverse_cached() when paths realloced
This commit is contained in:
Linus Torvalds 2025-05-22 19:17:55 -07:00
commit 040c0f6a18
15 changed files with 143 additions and 86 deletions

View File

@ -1162,7 +1162,7 @@ int bch2_btree_path_traverse_one(struct btree_trans *trans,
}
if (path->cached) {
ret = bch2_btree_path_traverse_cached(trans, path, flags);
ret = bch2_btree_path_traverse_cached(trans, path_idx, flags);
goto out;
}

View File

@ -301,9 +301,11 @@ static noinline_for_stack void do_trace_key_cache_fill(struct btree_trans *trans
}
static noinline int btree_key_cache_fill(struct btree_trans *trans,
struct btree_path *ck_path,
btree_path_idx_t ck_path_idx,
unsigned flags)
{
struct btree_path *ck_path = trans->paths + ck_path_idx;
if (flags & BTREE_ITER_cached_nofill) {
ck_path->l[0].b = NULL;
return 0;
@ -325,6 +327,7 @@ static noinline int btree_key_cache_fill(struct btree_trans *trans,
goto err;
/* Recheck after btree lookup, before allocating: */
ck_path = trans->paths + ck_path_idx;
ret = bch2_btree_key_cache_find(c, ck_path->btree_id, ck_path->pos) ? -EEXIST : 0;
if (unlikely(ret))
goto out;
@ -344,10 +347,11 @@ static noinline int btree_key_cache_fill(struct btree_trans *trans,
}
static inline int btree_path_traverse_cached_fast(struct btree_trans *trans,
struct btree_path *path)
btree_path_idx_t path_idx)
{
struct bch_fs *c = trans->c;
struct bkey_cached *ck;
struct btree_path *path = trans->paths + path_idx;
retry:
ck = bch2_btree_key_cache_find(c, path->btree_id, path->pos);
if (!ck)
@ -373,27 +377,32 @@ static inline int btree_path_traverse_cached_fast(struct btree_trans *trans,
return 0;
}
int bch2_btree_path_traverse_cached(struct btree_trans *trans, struct btree_path *path,
int bch2_btree_path_traverse_cached(struct btree_trans *trans,
btree_path_idx_t path_idx,
unsigned flags)
{
EBUG_ON(path->level);
path->l[1].b = NULL;
EBUG_ON(trans->paths[path_idx].level);
int ret;
do {
ret = btree_path_traverse_cached_fast(trans, path);
ret = btree_path_traverse_cached_fast(trans, path_idx);
if (unlikely(ret == -ENOENT))
ret = btree_key_cache_fill(trans, path, flags);
ret = btree_key_cache_fill(trans, path_idx, flags);
} while (ret == -EEXIST);
struct btree_path *path = trans->paths + path_idx;
if (unlikely(ret)) {
path->uptodate = BTREE_ITER_NEED_TRAVERSE;
if (!bch2_err_matches(ret, BCH_ERR_transaction_restart)) {
btree_node_unlock(trans, path, 0);
path->l[0].b = ERR_PTR(ret);
}
} else {
BUG_ON(path->uptodate);
BUG_ON(!path->nodes_locked);
}
return ret;
}

View File

@ -40,8 +40,7 @@ int bch2_btree_key_cache_journal_flush(struct journal *,
struct bkey_cached *
bch2_btree_key_cache_find(struct bch_fs *, enum btree_id, struct bpos);
int bch2_btree_path_traverse_cached(struct btree_trans *, struct btree_path *,
unsigned);
int bch2_btree_path_traverse_cached(struct btree_trans *, btree_path_idx_t, unsigned);
bool bch2_btree_insert_key_cached(struct btree_trans *, unsigned,
struct btree_insert_entry *);

View File

@ -288,6 +288,7 @@ static void dirent_init_casefolded_name(struct bkey_i_dirent *dirent,
}
static struct bkey_i_dirent *dirent_create_key(struct btree_trans *trans,
const struct bch_hash_info *hash_info,
subvol_inum dir,
u8 type,
const struct qstr *name,
@ -295,10 +296,19 @@ static struct bkey_i_dirent *dirent_create_key(struct btree_trans *trans,
u64 dst)
{
struct bkey_i_dirent *dirent;
struct qstr _cf_name;
if (name->len > BCH_NAME_MAX)
return ERR_PTR(-ENAMETOOLONG);
if (hash_info->cf_encoding && !cf_name) {
int ret = bch2_casefold(trans, hash_info, name, &_cf_name);
if (ret)
return ERR_PTR(ret);
cf_name = &_cf_name;
}
dirent = dirent_alloc_key(trans, dir, type, name->len, cf_name ? cf_name->len : 0, dst);
if (IS_ERR(dirent))
return dirent;
@ -324,7 +334,7 @@ int bch2_dirent_create_snapshot(struct btree_trans *trans,
struct bkey_i_dirent *dirent;
int ret;
dirent = dirent_create_key(trans, dir_inum, type, name, NULL, dst_inum);
dirent = dirent_create_key(trans, hash_info, dir_inum, type, name, NULL, dst_inum);
ret = PTR_ERR_OR_ZERO(dirent);
if (ret)
return ret;
@ -333,8 +343,7 @@ int bch2_dirent_create_snapshot(struct btree_trans *trans,
dirent->k.p.snapshot = snapshot;
ret = bch2_hash_set_in_snapshot(trans, bch2_dirent_hash_desc, hash_info,
dir_inum, snapshot, &dirent->k_i,
flags|BTREE_UPDATE_internal_snapshot_node);
dir_inum, snapshot, &dirent->k_i, flags);
*dir_offset = dirent->k.p.offset;
return ret;
@ -344,28 +353,16 @@ int bch2_dirent_create(struct btree_trans *trans, subvol_inum dir,
const struct bch_hash_info *hash_info,
u8 type, const struct qstr *name, u64 dst_inum,
u64 *dir_offset,
u64 *i_size,
enum btree_iter_update_trigger_flags flags)
{
struct bkey_i_dirent *dirent;
int ret;
if (hash_info->cf_encoding) {
struct qstr cf_name;
ret = bch2_casefold(trans, hash_info, name, &cf_name);
if (ret)
return ret;
dirent = dirent_create_key(trans, dir, type, name, &cf_name, dst_inum);
} else {
dirent = dirent_create_key(trans, dir, type, name, NULL, dst_inum);
}
dirent = dirent_create_key(trans, hash_info, dir, type, name, NULL, dst_inum);
ret = PTR_ERR_OR_ZERO(dirent);
if (ret)
return ret;
*i_size += bkey_bytes(&dirent->k);
ret = bch2_hash_set(trans, bch2_dirent_hash_desc, hash_info,
dir, &dirent->k_i, flags);
*dir_offset = dirent->k.p.offset;
@ -466,7 +463,7 @@ int bch2_dirent_rename(struct btree_trans *trans,
*src_offset = dst_iter.pos.offset;
/* Create new dst key: */
new_dst = dirent_create_key(trans, dst_dir, 0, dst_name,
new_dst = dirent_create_key(trans, dst_hash, dst_dir, 0, dst_name,
dst_hash->cf_encoding ? &dst_name_lookup : NULL, 0);
ret = PTR_ERR_OR_ZERO(new_dst);
if (ret)
@ -477,7 +474,7 @@ int bch2_dirent_rename(struct btree_trans *trans,
/* Create new src key: */
if (mode == BCH_RENAME_EXCHANGE) {
new_src = dirent_create_key(trans, src_dir, 0, src_name,
new_src = dirent_create_key(trans, src_hash, src_dir, 0, src_name,
src_hash->cf_encoding ? &src_name_lookup : NULL, 0);
ret = PTR_ERR_OR_ZERO(new_src);
if (ret)

View File

@ -65,7 +65,7 @@ int bch2_dirent_create_snapshot(struct btree_trans *, u32, u64, u32,
enum btree_iter_update_trigger_flags);
int bch2_dirent_create(struct btree_trans *, subvol_inum,
const struct bch_hash_info *, u8,
const struct qstr *, u64, u64 *, u64 *,
const struct qstr *, u64, u64 *,
enum btree_iter_update_trigger_flags);
static inline unsigned vfs_d_type(unsigned type)

View File

@ -507,20 +507,14 @@ static const struct bch_extent_ptr *bkey_matches_stripe(struct bch_stripe *s,
static bool extent_has_stripe_ptr(struct bkey_s_c k, u64 idx)
{
switch (k.k->type) {
case KEY_TYPE_extent: {
struct bkey_s_c_extent e = bkey_s_c_to_extent(k);
const union bch_extent_entry *entry;
struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
const union bch_extent_entry *entry;
extent_for_each_entry(e, entry)
if (extent_entry_type(entry) ==
BCH_EXTENT_ENTRY_stripe_ptr &&
entry->stripe_ptr.idx == idx)
return true;
break;
}
}
bkey_extent_entry_for_each(ptrs, entry)
if (extent_entry_type(entry) ==
BCH_EXTENT_ENTRY_stripe_ptr &&
entry->stripe_ptr.idx == idx)
return true;
return false;
}

View File

@ -380,13 +380,6 @@ out: \
/* Iterate over pointers in KEY_TYPE_extent: */
#define extent_for_each_entry_from(_e, _entry, _start) \
__bkey_extent_entry_for_each_from(_start, \
extent_entry_last(_e), _entry)
#define extent_for_each_entry(_e, _entry) \
extent_for_each_entry_from(_e, _entry, (_e).v->start)
#define extent_ptr_next(_e, _ptr) \
__bkey_ptr_next(_ptr, extent_entry_last(_e))

View File

@ -605,10 +605,14 @@ vm_fault_t bch2_page_mkwrite(struct vm_fault *vmf)
struct address_space *mapping = file->f_mapping;
struct bch_fs *c = inode->v.i_sb->s_fs_info;
struct bch2_folio_reservation res;
unsigned len;
loff_t isize;
vm_fault_t ret;
loff_t file_offset = round_down(vmf->pgoff << PAGE_SHIFT, block_bytes(c));
unsigned offset = file_offset - folio_pos(folio);
unsigned len = max(PAGE_SIZE, block_bytes(c));
BUG_ON(offset + len > folio_size(folio));
bch2_folio_reservation_init(c, inode, &res);
sb_start_pagefault(inode->v.i_sb);
@ -623,24 +627,24 @@ vm_fault_t bch2_page_mkwrite(struct vm_fault *vmf)
bch2_pagecache_add_get(inode);
folio_lock(folio);
isize = i_size_read(&inode->v);
u64 isize = i_size_read(&inode->v);
if (folio->mapping != mapping || folio_pos(folio) >= isize) {
if (folio->mapping != mapping || file_offset >= isize) {
folio_unlock(folio);
ret = VM_FAULT_NOPAGE;
goto out;
}
len = min_t(loff_t, folio_size(folio), isize - folio_pos(folio));
len = min_t(unsigned, len, isize - file_offset);
if (bch2_folio_set(c, inode_inum(inode), &folio, 1) ?:
bch2_folio_reservation_get(c, inode, folio, &res, 0, len)) {
bch2_folio_reservation_get(c, inode, folio, &res, offset, len)) {
folio_unlock(folio);
ret = VM_FAULT_SIGBUS;
goto out;
}
bch2_set_folio_dirty(c, inode, folio, &res, 0, len);
bch2_set_folio_dirty(c, inode, folio, &res, offset, len);
bch2_folio_reservation_put(c, inode, &res);
folio_wait_stable(folio);

View File

@ -1664,33 +1664,9 @@ static int fssetxattr_inode_update_fn(struct btree_trans *trans,
return -EINVAL;
if (s->casefold != bch2_inode_casefold(c, bi)) {
#ifdef CONFIG_UNICODE
int ret = 0;
/* Not supported on individual files. */
if (!S_ISDIR(bi->bi_mode))
return -EOPNOTSUPP;
/*
* Make sure the dir is empty, as otherwise we'd need to
* rehash everything and update the dirent keys.
*/
ret = bch2_empty_dir_trans(trans, inode_inum(inode));
if (ret < 0)
return ret;
ret = bch2_request_incompat_feature(c, bcachefs_metadata_version_casefolding);
int ret = bch2_inode_set_casefold(trans, inode_inum(inode), bi, s->casefold);
if (ret)
return ret;
bch2_check_set_feature(c, BCH_FEATURE_casefolding);
bi->bi_casefold = s->casefold + 1;
bi->bi_fields_set |= BIT(Inode_opt_casefold);
#else
printk(KERN_ERR "Cannot use casefolding on a kernel without CONFIG_UNICODE\n");
return -EOPNOTSUPP;
#endif
}
if (s->set_project) {

View File

@ -306,6 +306,7 @@ static int lookup_lostfound(struct btree_trans *trans, u32 snapshot,
&lostfound_str,
lostfound->bi_inum,
&lostfound->bi_dir_offset,
BTREE_UPDATE_internal_snapshot_node|
STR_HASH_must_create) ?:
bch2_inode_write_flags(trans, &lostfound_iter, lostfound,
BTREE_UPDATE_internal_snapshot_node);
@ -431,6 +432,7 @@ static int reattach_inode(struct btree_trans *trans, struct bch_inode_unpacked *
&name,
inode->bi_subvol ?: inode->bi_inum,
&inode->bi_dir_offset,
BTREE_UPDATE_internal_snapshot_node|
STR_HASH_must_create);
if (ret) {
bch_err_msg(c, ret, "error creating dirent");
@ -2188,6 +2190,41 @@ static int check_dirent(struct btree_trans *trans, struct btree_iter *iter,
struct bkey_s_c_dirent d = bkey_s_c_to_dirent(k);
/* check casefold */
if (fsck_err_on(d.v->d_casefold != !!hash_info->cf_encoding,
trans, dirent_casefold_mismatch,
"dirent casefold does not match dir casefold\n%s",
(printbuf_reset(&buf),
bch2_bkey_val_to_text(&buf, c, k),
buf.buf))) {
struct qstr name = bch2_dirent_get_name(d);
u32 subvol = d.v->d_type == DT_SUBVOL
? d.v->d_parent_subvol
: 0;
u64 target = d.v->d_type == DT_SUBVOL
? d.v->d_child_subvol
: d.v->d_inum;
u64 dir_offset;
ret = bch2_hash_delete_at(trans,
bch2_dirent_hash_desc, hash_info, iter,
BTREE_UPDATE_internal_snapshot_node) ?:
bch2_dirent_create_snapshot(trans, subvol,
d.k->p.inode, d.k->p.snapshot,
hash_info,
d.v->d_type,
&name,
target,
&dir_offset,
BTREE_ITER_with_updates|
BTREE_UPDATE_internal_snapshot_node|
STR_HASH_must_create) ?:
bch2_trans_commit(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc);
/* might need another check_dirents pass */
goto out;
}
if (d.v->d_type == DT_SUBVOL) {
ret = check_dirent_to_subvol(trans, iter, d);
if (ret)

View File

@ -14,6 +14,7 @@
#include "extent_update.h"
#include "fs.h"
#include "inode.h"
#include "namei.h"
#include "opts.h"
#include "str_hash.h"
#include "snapshot.h"
@ -1204,6 +1205,41 @@ int bch2_inum_opts_get(struct btree_trans *trans, subvol_inum inum, struct bch_i
return 0;
}
int bch2_inode_set_casefold(struct btree_trans *trans, subvol_inum inum,
struct bch_inode_unpacked *bi, unsigned v)
{
struct bch_fs *c = trans->c;
#ifdef CONFIG_UNICODE
int ret = 0;
/* Not supported on individual files. */
if (!S_ISDIR(bi->bi_mode))
return -EOPNOTSUPP;
/*
* Make sure the dir is empty, as otherwise we'd need to
* rehash everything and update the dirent keys.
*/
ret = bch2_empty_dir_trans(trans, inum);
if (ret < 0)
return ret;
ret = bch2_request_incompat_feature(c, bcachefs_metadata_version_casefolding);
if (ret)
return ret;
bch2_check_set_feature(c, BCH_FEATURE_casefolding);
bi->bi_casefold = v + 1;
bi->bi_fields_set |= BIT(Inode_opt_casefold);
return 0;
#else
bch_err(c, "Cannot use casefolding on a kernel without CONFIG_UNICODE");
return -EOPNOTSUPP;
#endif
}
static noinline int __bch2_inode_rm_snapshot(struct btree_trans *trans, u64 inum, u32 snapshot)
{
struct bch_fs *c = trans->c;

View File

@ -292,7 +292,9 @@ static inline bool bch2_inode_should_have_single_bp(struct bch_inode_unpacked *i
struct bch_opts bch2_inode_opts_to_opts(struct bch_inode_unpacked *);
void bch2_inode_opts_get(struct bch_io_opts *, struct bch_fs *,
struct bch_inode_unpacked *);
int bch2_inum_opts_get(struct btree_trans*, subvol_inum, struct bch_io_opts *);
int bch2_inum_opts_get(struct btree_trans *, subvol_inum, struct bch_io_opts *);
int bch2_inode_set_casefold(struct btree_trans *, subvol_inum,
struct bch_inode_unpacked *, unsigned);
#include "rebalance.h"

View File

@ -158,7 +158,6 @@ int bch2_create_trans(struct btree_trans *trans,
name,
dir_target,
&dir_offset,
&dir_u->bi_size,
STR_HASH_must_create|BTREE_ITER_with_updates) ?:
bch2_inode_write(trans, &dir_iter, dir_u);
if (ret)
@ -225,7 +224,6 @@ int bch2_link_trans(struct btree_trans *trans,
mode_to_type(inode_u->bi_mode),
name, inum.inum,
&dir_offset,
&dir_u->bi_size,
STR_HASH_must_create);
if (ret)
goto err;

View File

@ -209,6 +209,7 @@ enum bch_fsck_flags {
x(subvol_to_missing_root, 188, 0) \
x(subvol_root_wrong_bi_subvol, 189, FSCK_AUTOFIX) \
x(bkey_in_missing_snapshot, 190, 0) \
x(bkey_in_deleted_snapshot, 315, 0) \
x(inode_pos_inode_nonzero, 191, 0) \
x(inode_pos_blockdev_range, 192, 0) \
x(inode_alloc_cursor_inode_bad, 301, 0) \
@ -216,6 +217,7 @@ enum bch_fsck_flags {
x(inode_str_hash_invalid, 194, 0) \
x(inode_v3_fields_start_bad, 195, 0) \
x(inode_snapshot_mismatch, 196, 0) \
x(snapshot_key_missing_inode_snapshot, 314, 0) \
x(inode_unlinked_but_clean, 197, 0) \
x(inode_unlinked_but_nlink_nonzero, 198, 0) \
x(inode_unlinked_and_not_open, 281, 0) \
@ -237,6 +239,8 @@ enum bch_fsck_flags {
x(inode_unreachable, 210, FSCK_AUTOFIX) \
x(inode_journal_seq_in_future, 299, FSCK_AUTOFIX) \
x(inode_i_sectors_underflow, 312, FSCK_AUTOFIX) \
x(inode_has_case_insensitive_not_set, 316, FSCK_AUTOFIX) \
x(inode_parent_has_case_insensitive_not_set, 317, FSCK_AUTOFIX) \
x(vfs_inode_i_blocks_underflow, 311, FSCK_AUTOFIX) \
x(vfs_inode_i_blocks_not_zero_at_truncate, 313, FSCK_AUTOFIX) \
x(deleted_inode_but_clean, 211, FSCK_AUTOFIX) \
@ -262,6 +266,7 @@ enum bch_fsck_flags {
x(dirent_to_overwritten_inode, 302, 0) \
x(dirent_to_missing_subvol, 230, 0) \
x(dirent_to_itself, 231, 0) \
x(dirent_casefold_mismatch, 318, FSCK_AUTOFIX) \
x(quota_type_invalid, 232, 0) \
x(xattr_val_size_too_small, 233, 0) \
x(xattr_val_size_too_big, 234, 0) \
@ -301,6 +306,7 @@ enum bch_fsck_flags {
x(btree_ptr_v2_written_0, 268, 0) \
x(subvol_snapshot_bad, 269, 0) \
x(subvol_inode_bad, 270, 0) \
x(subvol_missing, 308, FSCK_AUTOFIX) \
x(alloc_key_stripe_sectors_wrong, 271, FSCK_AUTOFIX) \
x(accounting_mismatch, 272, FSCK_AUTOFIX) \
x(accounting_replicas_not_marked, 273, 0) \
@ -322,7 +328,7 @@ enum bch_fsck_flags {
x(dirent_stray_data_after_cf_name, 305, 0) \
x(rebalance_work_incorrectly_set, 309, FSCK_AUTOFIX) \
x(rebalance_work_incorrectly_unset, 310, FSCK_AUTOFIX) \
x(MAX, 314, 0)
x(MAX, 319, 0)
enum bch_sb_error_id {
#define x(t, n, ...) BCH_FSCK_ERR_##t = n,

View File

@ -473,6 +473,12 @@ static int inode_opt_set_fn(struct btree_trans *trans,
{
struct inode_opt_set *s = p;
if (s->id == Inode_opt_casefold) {
int ret = bch2_inode_set_casefold(trans, inode_inum(inode), bi, s->v);
if (ret)
return ret;
}
if (s->defined)
bi->bi_fields_set |= 1U << s->id;
else