mirror of
https://github.com/torvalds/linux.git
synced 2026-05-28 09:04:39 +02:00
bcachefs fixes for 6.15-rc3
Usual set of small fixes/logging improvements. One bigger user reported fix, for inode <-> dirent inconsistencies reported in fsck, after moving a subvolume that had been snapshotted. -----BEGIN PGP SIGNATURE----- iQIzBAABCgAdFiEEKnAFLkS8Qha+jvQrE6szbY3KbnYFAmgBYaIACgkQE6szbY3K bnYcGQ/+K+LsEvGAZ5wtTwUN4KqJIYWhcHYcuLS2mHKf8PMbgYhL7TjmCwb9VWyr 0+GFQcJgfLsl++kX4j7CjG4gHd22aLiwbhMDmSt3r6c4aF29rG+zCpe4W1+7o60k UIKokfbLUV6b+0vF5bA/W3PmtXK7S8E0yAPMfWxv4/sACu8RUvrUJtrUCKEWwLzC bcrRGsN91456qNhCrOp3e4t3yZjiGtZIz+SbPYIxdNrZYIMURlGUm+f9sLH3O+2R NKsi41sggo/TmgUyspH3KCtMT88IDbN07F7O9/zcxgtdfzfC9l9FI6HnvRVSHDOV boFaH/NdRaIbg+O5kqZXYul+/EPXsYp5B77TL6KQ3jhv3q16uwpv9EL4v6HIwvz9 BTDOfI2y/+YWHMfrtzXgh3C9dZDPS7qxFFWjSjCs/lXwKVz46RjBWVmtQoTJSEmb Ee29kBGMpkwmH8fqr5KQheJUIeYewpyTVeB6orgtshnrr+aezS6zunIbk7fJ6+Ng Tc08H/Aqc2KGcyBS3KTLhbReQ1clQKGOqWJymeb1p2V3SMXfABMbh61B1VU1XulC Al5B7/w/WPwb+T2XZIM2qbmeoRJ8OBara5RWkx4HN8pcYuWV8H6GWJtRJQD/eKSO pOT5bz8z9N2n/otwrfLT5lfO2fNW1mULCAamn6iSzR+EDHyuaMU= =pi0D -----END PGP SIGNATURE----- Merge tag 'bcachefs-2025-04-17' of git://evilpiepirate.org/bcachefs Pull bcachefs fixes from Kent Overstreet: "Usual set of small fixes/logging improvements. One bigger user reported fix, for inode <-> dirent inconsistencies reported in fsck, after moving a subvolume that had been snapshotted" * tag 'bcachefs-2025-04-17' of git://evilpiepirate.org/bcachefs: bcachefs: Fix snapshotting a subvolume, then renaming it bcachefs: Add missing READ_ONCE() for metadata replicas bcachefs: snapshot_node_missing is now autofix bcachefs: Log message when incompat version requested but not enabled bcachefs: Print version_incompat_allowed on startup bcachefs: Silence extent_poisoned error messages bcachefs: btree_root_unreadable_and_scan_found_nothing now AUTOFIX bcachefs: fix bch2_dev_usage_full_read_fast() bcachefs: Don't print data read retry success on non-errors bcachefs: Add missing error handling bcachefs: Prevent granting write refs when filesystem is read-only
This commit is contained in:
commit
9e99c1accb
|
|
@ -788,6 +788,8 @@ struct bch_fs {
|
|||
unsigned long errors_silent[BITS_TO_LONGS(BCH_FSCK_ERR_MAX)];
|
||||
u64 btrees_lost_data;
|
||||
} sb;
|
||||
DARRAY(enum bcachefs_metadata_version)
|
||||
incompat_versions_requested;
|
||||
|
||||
#ifdef CONFIG_UNICODE
|
||||
struct unicode_map *cf_encoding;
|
||||
|
|
|
|||
|
|
@ -1221,7 +1221,7 @@ bch2_btree_update_start(struct btree_trans *trans, struct btree_path *path,
|
|||
|
||||
ret = bch2_disk_reservation_get(c, &as->disk_res,
|
||||
(nr_nodes[0] + nr_nodes[1]) * btree_sectors(c),
|
||||
c->opts.metadata_replicas,
|
||||
READ_ONCE(c->opts.metadata_replicas),
|
||||
disk_res_flags);
|
||||
if (ret)
|
||||
goto err;
|
||||
|
|
|
|||
|
|
@ -37,7 +37,8 @@ void bch2_dev_usage_read_fast(struct bch_dev *ca, struct bch_dev_usage *usage)
|
|||
void bch2_dev_usage_full_read_fast(struct bch_dev *ca, struct bch_dev_usage_full *usage)
|
||||
{
|
||||
memset(usage, 0, sizeof(*usage));
|
||||
acc_u64s_percpu((u64 *) usage, (u64 __percpu *) ca->usage, dev_usage_u64s());
|
||||
acc_u64s_percpu((u64 *) usage, (u64 __percpu *) ca->usage,
|
||||
sizeof(struct bch_dev_usage_full) / sizeof(u64));
|
||||
}
|
||||
|
||||
static u64 reserve_factor(u64 r)
|
||||
|
|
|
|||
|
|
@ -242,11 +242,6 @@ static inline u64 dev_buckets_available(struct bch_dev *ca,
|
|||
|
||||
/* Filesystem usage: */
|
||||
|
||||
static inline unsigned dev_usage_u64s(void)
|
||||
{
|
||||
return sizeof(struct bch_dev_usage) / sizeof(u64);
|
||||
}
|
||||
|
||||
struct bch_fs_usage_short
|
||||
bch2_fs_usage_read_short(struct bch_fs *);
|
||||
|
||||
|
|
|
|||
|
|
@ -287,7 +287,7 @@
|
|||
x(EIO, mark_stripe) \
|
||||
x(EIO, stripe_reconstruct) \
|
||||
x(EIO, key_type_error) \
|
||||
x(EIO, extent_poisened) \
|
||||
x(EIO, extent_poisoned) \
|
||||
x(EIO, missing_indirect_extent) \
|
||||
x(EIO, invalidate_stripe_to_dev) \
|
||||
x(EIO, no_encryption_key) \
|
||||
|
|
|
|||
|
|
@ -139,7 +139,7 @@ int bch2_bkey_pick_read_device(struct bch_fs *c, struct bkey_s_c k,
|
|||
struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
|
||||
|
||||
if (bch2_bkey_extent_ptrs_flags(ptrs) & BIT_ULL(BCH_EXTENT_FLAG_poisoned))
|
||||
return -BCH_ERR_extent_poisened;
|
||||
return -BCH_ERR_extent_poisoned;
|
||||
|
||||
rcu_read_lock();
|
||||
const union bch_extent_entry *entry;
|
||||
|
|
|
|||
|
|
@ -69,7 +69,7 @@ static int bch2_inode_flags_set(struct btree_trans *trans,
|
|||
if (ret < 0)
|
||||
return ret;
|
||||
|
||||
ret = bch2_request_incompat_feature(c,bcachefs_metadata_version_casefolding);
|
||||
ret = bch2_request_incompat_feature(c, bcachefs_metadata_version_casefolding);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
|
|
|
|||
|
|
@ -321,6 +321,31 @@ static inline bool inode_should_reattach(struct bch_inode_unpacked *inode)
|
|||
inode->bi_subvol == BCACHEFS_ROOT_SUBVOL)
|
||||
return false;
|
||||
|
||||
/*
|
||||
* Subvolume roots are special: older versions of subvolume roots may be
|
||||
* disconnected, it's only the newest version that matters.
|
||||
*
|
||||
* We only keep a single dirent pointing to a subvolume root, i.e.
|
||||
* older versions of snapshots will not have a different dirent pointing
|
||||
* to the same subvolume root.
|
||||
*
|
||||
* This is because dirents that point to subvolumes are only visible in
|
||||
* the parent subvolume - versioning is not needed - and keeping them
|
||||
* around would break fsck, because when we're crossing subvolumes we
|
||||
* don't have a consistent snapshot ID to do check the inode <-> dirent
|
||||
* relationships.
|
||||
*
|
||||
* Thus, a subvolume root that's been renamed after a snapshot will have
|
||||
* a disconnected older version - that's expected.
|
||||
*
|
||||
* Note that taking a snapshot always updates the root inode (to update
|
||||
* the dirent backpointer), so a subvolume root inode with
|
||||
* BCH_INODE_has_child_snapshot is never visible.
|
||||
*/
|
||||
if (inode->bi_subvol &&
|
||||
(inode->bi_flags & BCH_INODE_has_child_snapshot))
|
||||
return false;
|
||||
|
||||
return !inode->bi_dir && !(inode->bi_flags & BCH_INODE_unlinked);
|
||||
}
|
||||
|
||||
|
|
@ -1007,6 +1032,23 @@ static int check_inode_dirent_inode(struct btree_trans *trans,
|
|||
if (ret && !bch2_err_matches(ret, ENOENT))
|
||||
return ret;
|
||||
|
||||
if ((ret || dirent_points_to_inode_nowarn(d, inode)) &&
|
||||
inode->bi_subvol &&
|
||||
(inode->bi_flags & BCH_INODE_has_child_snapshot)) {
|
||||
/* Older version of a renamed subvolume root: we won't have a
|
||||
* correct dirent for it. That's expected, see
|
||||
* inode_should_reattach().
|
||||
*
|
||||
* We don't clear the backpointer field when doing the rename
|
||||
* because there might be arbitrarily many versions in older
|
||||
* snapshots.
|
||||
*/
|
||||
inode->bi_dir = 0;
|
||||
inode->bi_dir_offset = 0;
|
||||
*write_inode = true;
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (fsck_err_on(ret,
|
||||
trans, inode_points_to_missing_dirent,
|
||||
"inode points to missing dirent\n%s",
|
||||
|
|
@ -1027,7 +1069,7 @@ static int check_inode_dirent_inode(struct btree_trans *trans,
|
|||
inode->bi_dir_offset = 0;
|
||||
*write_inode = true;
|
||||
}
|
||||
|
||||
out:
|
||||
ret = 0;
|
||||
fsck_err:
|
||||
bch2_trans_iter_exit(trans, &dirent_iter);
|
||||
|
|
|
|||
|
|
@ -487,6 +487,8 @@ static void bch2_rbio_retry(struct work_struct *work)
|
|||
.inum = rbio->read_pos.inode,
|
||||
};
|
||||
struct bch_io_failures failed = { .nr = 0 };
|
||||
int orig_error = rbio->ret;
|
||||
|
||||
struct btree_trans *trans = bch2_trans_get(c);
|
||||
|
||||
trace_io_read_retry(&rbio->bio);
|
||||
|
|
@ -519,7 +521,9 @@ static void bch2_rbio_retry(struct work_struct *work)
|
|||
if (ret) {
|
||||
rbio->ret = ret;
|
||||
rbio->bio.bi_status = BLK_STS_IOERR;
|
||||
} else {
|
||||
} else if (orig_error != -BCH_ERR_data_read_retry_csum_err_maybe_userspace &&
|
||||
orig_error != -BCH_ERR_data_read_ptr_stale_race &&
|
||||
!failed.nr) {
|
||||
struct printbuf buf = PRINTBUF;
|
||||
|
||||
lockrestart_do(trans,
|
||||
|
|
@ -1345,14 +1349,16 @@ int __bch2_read(struct btree_trans *trans, struct bch_read_bio *rbio,
|
|||
|
||||
bch2_trans_iter_exit(trans, &iter);
|
||||
|
||||
if (ret) {
|
||||
struct printbuf buf = PRINTBUF;
|
||||
lockrestart_do(trans,
|
||||
bch2_inum_offset_err_msg_trans(trans, &buf, inum,
|
||||
bvec_iter.bi_sector << 9));
|
||||
prt_printf(&buf, "read error: %s", bch2_err_str(ret));
|
||||
bch_err_ratelimited(c, "%s", buf.buf);
|
||||
printbuf_exit(&buf);
|
||||
if (unlikely(ret)) {
|
||||
if (ret != -BCH_ERR_extent_poisoned) {
|
||||
struct printbuf buf = PRINTBUF;
|
||||
lockrestart_do(trans,
|
||||
bch2_inum_offset_err_msg_trans(trans, &buf, inum,
|
||||
bvec_iter.bi_sector << 9));
|
||||
prt_printf(&buf, "data read error: %s", bch2_err_str(ret));
|
||||
bch_err_ratelimited(c, "%s", buf.buf);
|
||||
printbuf_exit(&buf);
|
||||
}
|
||||
|
||||
rbio->bio.bi_status = BLK_STS_IOERR;
|
||||
rbio->ret = ret;
|
||||
|
|
|
|||
|
|
@ -1125,7 +1125,10 @@ int bch2_fs_initialize(struct bch_fs *c)
|
|||
* journal_res_get() will crash if called before this has
|
||||
* set up the journal.pin FIFO and journal.cur pointer:
|
||||
*/
|
||||
bch2_fs_journal_start(&c->journal, 1);
|
||||
ret = bch2_fs_journal_start(&c->journal, 1);
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
set_bit(BCH_FS_accounting_replay_done, &c->flags);
|
||||
bch2_journal_set_replay_done(&c->journal);
|
||||
|
||||
|
|
|
|||
|
|
@ -290,8 +290,8 @@ enum bch_fsck_flags {
|
|||
x(btree_node_bkey_bad_u64s, 260, 0) \
|
||||
x(btree_node_topology_empty_interior_node, 261, 0) \
|
||||
x(btree_ptr_v2_min_key_bad, 262, 0) \
|
||||
x(btree_root_unreadable_and_scan_found_nothing, 263, 0) \
|
||||
x(snapshot_node_missing, 264, 0) \
|
||||
x(btree_root_unreadable_and_scan_found_nothing, 263, FSCK_AUTOFIX) \
|
||||
x(snapshot_node_missing, 264, FSCK_AUTOFIX) \
|
||||
x(dup_backpointer_to_bad_csum_extent, 265, 0) \
|
||||
x(btree_bitmap_not_marked, 266, FSCK_AUTOFIX) \
|
||||
x(sb_clean_entry_overrun, 267, 0) \
|
||||
|
|
|
|||
|
|
@ -73,14 +73,30 @@ int bch2_set_version_incompat(struct bch_fs *c, enum bcachefs_metadata_version v
|
|||
? 0
|
||||
: -BCH_ERR_may_not_use_incompat_feature;
|
||||
|
||||
mutex_lock(&c->sb_lock);
|
||||
if (!ret) {
|
||||
mutex_lock(&c->sb_lock);
|
||||
SET_BCH_SB_VERSION_INCOMPAT(c->disk_sb.sb,
|
||||
max(BCH_SB_VERSION_INCOMPAT(c->disk_sb.sb), version));
|
||||
bch2_write_super(c);
|
||||
mutex_unlock(&c->sb_lock);
|
||||
} else {
|
||||
darray_for_each(c->incompat_versions_requested, i)
|
||||
if (version == *i)
|
||||
goto out;
|
||||
|
||||
darray_push(&c->incompat_versions_requested, version);
|
||||
struct printbuf buf = PRINTBUF;
|
||||
prt_str(&buf, "requested incompat feature ");
|
||||
bch2_version_to_text(&buf, version);
|
||||
prt_str(&buf, " currently not enabled");
|
||||
prt_printf(&buf, "\n set version_upgrade=incompat to enable");
|
||||
|
||||
bch_notice(c, "%s", buf.buf);
|
||||
printbuf_exit(&buf);
|
||||
}
|
||||
|
||||
out:
|
||||
mutex_unlock(&c->sb_lock);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -589,6 +589,7 @@ static void __bch2_fs_free(struct bch_fs *c)
|
|||
free_percpu(c->online_reserved);
|
||||
}
|
||||
|
||||
darray_exit(&c->incompat_versions_requested);
|
||||
darray_exit(&c->btree_roots_extra);
|
||||
free_percpu(c->pcpu);
|
||||
free_percpu(c->usage);
|
||||
|
|
@ -1013,6 +1014,11 @@ static void print_mount_opts(struct bch_fs *c)
|
|||
bch2_opt_to_text(&p, c, c->disk_sb.sb, opt, v, OPT_SHOW_MOUNT_STYLE);
|
||||
}
|
||||
|
||||
if (c->sb.version_incompat_allowed != c->sb.version) {
|
||||
prt_printf(&p, "\n allowing incompatible features above ");
|
||||
bch2_version_to_text(&p, c->sb.version_incompat_allowed);
|
||||
}
|
||||
|
||||
bch_info(c, "%s", p.buf);
|
||||
printbuf_exit(&p);
|
||||
}
|
||||
|
|
@ -1757,7 +1763,8 @@ int bch2_dev_remove(struct bch_fs *c, struct bch_dev *ca, int flags)
|
|||
up_write(&c->state_lock);
|
||||
return 0;
|
||||
err:
|
||||
if (ca->mi.state == BCH_MEMBER_STATE_rw &&
|
||||
if (test_bit(BCH_FS_rw, &c->flags) &&
|
||||
ca->mi.state == BCH_MEMBER_STATE_rw &&
|
||||
!percpu_ref_is_zero(&ca->io_ref[READ]))
|
||||
__bch2_dev_read_write(c, ca);
|
||||
up_write(&c->state_lock);
|
||||
|
|
|
|||
Loading…
Reference in New Issue
Block a user