From fa014953f9409edd32b04cb5c0f4167b8a537e65 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Fri, 29 Dec 2023 13:39:07 -0500 Subject: [PATCH 01/13] bcachefs: Fix extents iteration + snapshots interaction peek_upto() checks against the end position and bails out before FILTER_SNAPSHOTS checks; this is because if we end up at a different inode number than the original search key none of the keys we see might be visibile in the current snapshot - we might be looking at inode in a completely different subvolume. But this is broken, because when we're iterating over extents we're checking against the extent start position to decide when to bail out, and the extent start position isn't monotonically increasing until after we've run FILTER_SNAPSHOTS. Fix this by adding a simple inode number check where the old bailout check was, and moving the main check to the correct position. Signed-off-by: Kent Overstreet Reported-by: "Carl E. Thompson" --- fs/bcachefs/btree_iter.c | 35 ++++++++++++++++++++++++----------- 1 file changed, 24 insertions(+), 11 deletions(-) diff --git a/fs/bcachefs/btree_iter.c b/fs/bcachefs/btree_iter.c index 6be79129738d..da594e006769 100644 --- a/fs/bcachefs/btree_iter.c +++ b/fs/bcachefs/btree_iter.c @@ -2085,18 +2085,16 @@ struct bkey_s_c bch2_btree_iter_peek_upto(struct btree_iter *iter, struct bpos e goto out_no_locked; /* - * iter->pos should be mononotically increasing, and always be - * equal to the key we just returned - except extents can - * straddle iter->pos: + * We need to check against @end before FILTER_SNAPSHOTS because + * if we get to a different inode that requested we might be + * seeing keys for a different snapshot tree that will all be + * filtered out. + * + * But we can't do the full check here, because bkey_start_pos() + * isn't monotonically increasing before FILTER_SNAPSHOTS, and + * that's what we check against in extents mode: */ - if (!(iter->flags & BTREE_ITER_IS_EXTENTS)) - iter_pos = k.k->p; - else - iter_pos = bkey_max(iter->pos, bkey_start_pos(k.k)); - - if (unlikely(!(iter->flags & BTREE_ITER_IS_EXTENTS) - ? bkey_gt(iter_pos, end) - : bkey_ge(iter_pos, end))) + if (k.k->p.inode > end.inode) goto end; if (iter->update_path && @@ -2155,6 +2153,21 @@ struct bkey_s_c bch2_btree_iter_peek_upto(struct btree_iter *iter, struct bpos e continue; } + /* + * iter->pos should be mononotically increasing, and always be + * equal to the key we just returned - except extents can + * straddle iter->pos: + */ + if (!(iter->flags & BTREE_ITER_IS_EXTENTS)) + iter_pos = k.k->p; + else + iter_pos = bkey_max(iter->pos, bkey_start_pos(k.k)); + + if (unlikely(!(iter->flags & BTREE_ITER_IS_EXTENTS) + ? bkey_gt(iter_pos, end) + : bkey_ge(iter_pos, end))) + goto end; + break; } From f2eb8434e4fec891ae842183b9193ae759ee6d78 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sat, 30 Dec 2023 20:52:18 -0500 Subject: [PATCH 02/13] bcachefs: fix invalid free in dio write path MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit turns out iterate_iovec() mutates __iov, we need to save our own copy Signed-off-by: Kent Overstreet Reported-by: Marcin Mirosław --- fs/bcachefs/fs-io-direct.c | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/fs/bcachefs/fs-io-direct.c b/fs/bcachefs/fs-io-direct.c index 9a479e4de6b3..84e20c3ada6c 100644 --- a/fs/bcachefs/fs-io-direct.c +++ b/fs/bcachefs/fs-io-direct.c @@ -216,11 +216,11 @@ struct dio_write { struct address_space *mapping; struct bch_inode_info *inode; struct mm_struct *mm; + const struct iovec *iov; unsigned loop:1, extending:1, sync:1, - flush:1, - free_iov:1; + flush:1; struct quota_res quota_res; u64 written; @@ -312,12 +312,10 @@ static noinline int bch2_dio_write_copy_iov(struct dio_write *dio) return -1; if (dio->iter.nr_segs > ARRAY_SIZE(dio->inline_vecs)) { - iov = kmalloc_array(dio->iter.nr_segs, sizeof(*iov), + dio->iov = iov = kmalloc_array(dio->iter.nr_segs, sizeof(*iov), GFP_KERNEL); if (unlikely(!iov)) return -ENOMEM; - - dio->free_iov = true; } memcpy(iov, dio->iter.__iov, dio->iter.nr_segs * sizeof(*iov)); @@ -381,8 +379,7 @@ static __always_inline long bch2_dio_write_done(struct dio_write *dio) bch2_pagecache_block_put(inode); - if (dio->free_iov) - kfree(dio->iter.__iov); + kfree(dio->iov); ret = dio->op.error ?: ((long) dio->written << 9); bio_put(&dio->op.wbio.bio); @@ -626,11 +623,11 @@ ssize_t bch2_direct_write(struct kiocb *req, struct iov_iter *iter) dio->mapping = mapping; dio->inode = inode; dio->mm = current->mm; + dio->iov = NULL; dio->loop = false; dio->extending = extending; dio->sync = is_sync_kiocb(req) || extending; dio->flush = iocb_is_dsync(req) && !c->opts.journal_flush_disabled; - dio->free_iov = false; dio->quota_res.sectors = 0; dio->written = 0; dio->iter = *iter; From f87bf892ea9848696ddfd184964d303202dd19dd Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Fri, 29 Dec 2023 17:18:56 -0500 Subject: [PATCH 03/13] bcachefs: fix setting version_upgrade_complete If a superblock write hasn't happened (i.e. we never had to go rw), then c->sb.version will be out of date w.r.t. c->disk_sb.sb->version. Signed-off-by: Kent Overstreet --- fs/bcachefs/recovery.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/bcachefs/recovery.c b/fs/bcachefs/recovery.c index c7d9074c82d9..c2f183d2ffe8 100644 --- a/fs/bcachefs/recovery.c +++ b/fs/bcachefs/recovery.c @@ -868,8 +868,8 @@ int bch2_fs_recovery(struct bch_fs *c) } mutex_lock(&c->sb_lock); - if (BCH_SB_VERSION_UPGRADE_COMPLETE(c->disk_sb.sb) != c->sb.version) { - SET_BCH_SB_VERSION_UPGRADE_COMPLETE(c->disk_sb.sb, c->sb.version); + if (BCH_SB_VERSION_UPGRADE_COMPLETE(c->disk_sb.sb) != le16_to_cpu(c->disk_sb.sb->version)) { + SET_BCH_SB_VERSION_UPGRADE_COMPLETE(c->disk_sb.sb, le16_to_cpu(c->disk_sb.sb->version)); write_sb = true; } From 73ab9e03861c22ec6723b7b110d62ba60f37c164 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Wed, 8 Nov 2023 17:11:44 -0500 Subject: [PATCH 04/13] bcachefs: Factor out darray resize slowpath Move the slowpath (actually growing the darray) to an out-of-line function; also, add some helpers for the upcoming btree write buffer rewrite. Signed-off-by: Kent Overstreet --- fs/bcachefs/Makefile | 1 + fs/bcachefs/darray.c | 21 +++++++++++++++++++++ fs/bcachefs/darray.h | 32 +++++++++++++++++++------------- 3 files changed, 41 insertions(+), 13 deletions(-) create mode 100644 fs/bcachefs/darray.c diff --git a/fs/bcachefs/Makefile b/fs/bcachefs/Makefile index 45b64f89258c..ed550a40463e 100644 --- a/fs/bcachefs/Makefile +++ b/fs/bcachefs/Makefile @@ -28,6 +28,7 @@ bcachefs-y := \ clock.o \ compress.o \ counters.o \ + darray.o \ debug.o \ dirent.o \ disk_groups.o \ diff --git a/fs/bcachefs/darray.c b/fs/bcachefs/darray.c new file mode 100644 index 000000000000..aae07be1d911 --- /dev/null +++ b/fs/bcachefs/darray.c @@ -0,0 +1,21 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include +#include +#include "darray.h" + +int __bch2_darray_resize(darray_void *d, size_t element_size, size_t new_size, gfp_t gfp) +{ + if (new_size > d->size) { + new_size = roundup_pow_of_two(new_size); + + void *data = krealloc_array(d->data, new_size, element_size, gfp); + if (!data) + return -ENOMEM; + + d->data = data; + d->size = new_size; + } + + return 0; +} diff --git a/fs/bcachefs/darray.h b/fs/bcachefs/darray.h index 87b4b2d1ec76..43ea21ad9ea3 100644 --- a/fs/bcachefs/darray.h +++ b/fs/bcachefs/darray.h @@ -8,7 +8,6 @@ * Inspired by CCAN's darray */ -#include "util.h" #include #define DARRAY(type) \ @@ -19,20 +18,25 @@ struct { \ typedef DARRAY(void) darray_void; +int __bch2_darray_resize(darray_void *, size_t, size_t, gfp_t); + +static inline int __darray_resize(darray_void *d, size_t element_size, + size_t new_size, gfp_t gfp) +{ + return unlikely(new_size > d->size) + ? __bch2_darray_resize(d, element_size, new_size, gfp) + : 0; +} + +#define darray_resize_gfp(_d, _new_size, _gfp) \ + __darray_resize((darray_void *) (_d), sizeof((_d)->data[0]), (_new_size), _gfp) + +#define darray_resize(_d, _new_size) \ + darray_resize_gfp(_d, _new_size, GFP_KERNEL) + static inline int __darray_make_room(darray_void *d, size_t t_size, size_t more, gfp_t gfp) { - if (d->nr + more > d->size) { - size_t new_size = roundup_pow_of_two(d->nr + more); - void *data = krealloc_array(d->data, new_size, t_size, gfp); - - if (!data) - return -ENOMEM; - - d->data = data; - d->size = new_size; - } - - return 0; + return __darray_resize(d, t_size, d->nr + more, gfp); } #define darray_make_room_gfp(_d, _more, _gfp) \ @@ -41,6 +45,8 @@ static inline int __darray_make_room(darray_void *d, size_t t_size, size_t more, #define darray_make_room(_d, _more) \ darray_make_room_gfp(_d, _more, GFP_KERNEL) +#define darray_room(_d) ((_d).size - (_d).nr) + #define darray_top(_d) ((_d).data[(_d).nr]) #define darray_push_gfp(_d, _item, _gfp) \ From a58a6a58f5ad55aa1f5c54598c44f1d9938c1793 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Thu, 16 Nov 2023 19:44:48 -0500 Subject: [PATCH 05/13] bcachefs: Switch darray to kvmalloc() We sometimes use darrays for quite large buffers - the btree write buffer in particular needs large buffers, since it must be sized to hold all the write buffer keys outstanding in the journal. Signed-off-by: Kent Overstreet --- fs/bcachefs/darray.c | 4 +++- fs/bcachefs/darray.h | 2 +- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/fs/bcachefs/darray.c b/fs/bcachefs/darray.c index aae07be1d911..4c900c853268 100644 --- a/fs/bcachefs/darray.c +++ b/fs/bcachefs/darray.c @@ -9,10 +9,12 @@ int __bch2_darray_resize(darray_void *d, size_t element_size, size_t new_size, g if (new_size > d->size) { new_size = roundup_pow_of_two(new_size); - void *data = krealloc_array(d->data, new_size, element_size, gfp); + void *data = kvmalloc_array(new_size, element_size, gfp); if (!data) return -ENOMEM; + memcpy(data, d->data, d->size * element_size); + kvfree(d->data); d->data = data; d->size = new_size; } diff --git a/fs/bcachefs/darray.h b/fs/bcachefs/darray.h index 43ea21ad9ea3..6157c53d5bf0 100644 --- a/fs/bcachefs/darray.h +++ b/fs/bcachefs/darray.h @@ -92,7 +92,7 @@ do { \ #define darray_exit(_d) \ do { \ - kfree((_d)->data); \ + kvfree((_d)->data); \ darray_init(_d); \ } while (0) From 099dc5c29dff52c0c37abcaa6cc747a1ac8df5fe Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sat, 30 Dec 2023 14:38:29 -0500 Subject: [PATCH 06/13] bcachefs: DARRAY_PREALLOCATED() Add support to darray for preallocating some number of elements. Signed-off-by: Kent Overstreet --- fs/bcachefs/darray.c | 5 +++-- fs/bcachefs/darray.h | 28 +++++++++++++++++----------- 2 files changed, 20 insertions(+), 13 deletions(-) diff --git a/fs/bcachefs/darray.c b/fs/bcachefs/darray.c index 4c900c853268..ac35b8b705ae 100644 --- a/fs/bcachefs/darray.c +++ b/fs/bcachefs/darray.c @@ -4,7 +4,7 @@ #include #include "darray.h" -int __bch2_darray_resize(darray_void *d, size_t element_size, size_t new_size, gfp_t gfp) +int __bch2_darray_resize(darray_char *d, size_t element_size, size_t new_size, gfp_t gfp) { if (new_size > d->size) { new_size = roundup_pow_of_two(new_size); @@ -14,7 +14,8 @@ int __bch2_darray_resize(darray_void *d, size_t element_size, size_t new_size, g return -ENOMEM; memcpy(data, d->data, d->size * element_size); - kvfree(d->data); + if (d->data != d->preallocated) + kvfree(d->data); d->data = data; d->size = new_size; } diff --git a/fs/bcachefs/darray.h b/fs/bcachefs/darray.h index 6157c53d5bf0..e367c625f057 100644 --- a/fs/bcachefs/darray.h +++ b/fs/bcachefs/darray.h @@ -10,17 +10,20 @@ #include -#define DARRAY(type) \ +#define DARRAY_PREALLOCATED(_type, _nr) \ struct { \ size_t nr, size; \ - type *data; \ + _type *data; \ + _type preallocated[_nr]; \ } -typedef DARRAY(void) darray_void; +#define DARRAY(_type) DARRAY_PREALLOCATED(_type, 0) -int __bch2_darray_resize(darray_void *, size_t, size_t, gfp_t); +typedef DARRAY(char) darray_char; -static inline int __darray_resize(darray_void *d, size_t element_size, +int __bch2_darray_resize(darray_char *, size_t, size_t, gfp_t); + +static inline int __darray_resize(darray_char *d, size_t element_size, size_t new_size, gfp_t gfp) { return unlikely(new_size > d->size) @@ -29,18 +32,18 @@ static inline int __darray_resize(darray_void *d, size_t element_size, } #define darray_resize_gfp(_d, _new_size, _gfp) \ - __darray_resize((darray_void *) (_d), sizeof((_d)->data[0]), (_new_size), _gfp) + unlikely(__darray_resize((darray_char *) (_d), sizeof((_d)->data[0]), (_new_size), _gfp)) #define darray_resize(_d, _new_size) \ darray_resize_gfp(_d, _new_size, GFP_KERNEL) -static inline int __darray_make_room(darray_void *d, size_t t_size, size_t more, gfp_t gfp) +static inline int __darray_make_room(darray_char *d, size_t t_size, size_t more, gfp_t gfp) { return __darray_resize(d, t_size, d->nr + more, gfp); } #define darray_make_room_gfp(_d, _more, _gfp) \ - __darray_make_room((darray_void *) (_d), sizeof((_d)->data[0]), (_more), _gfp) + __darray_make_room((darray_char *) (_d), sizeof((_d)->data[0]), (_more), _gfp) #define darray_make_room(_d, _more) \ darray_make_room_gfp(_d, _more, GFP_KERNEL) @@ -86,13 +89,16 @@ static inline int __darray_make_room(darray_void *d, size_t t_size, size_t more, #define darray_init(_d) \ do { \ - (_d)->data = NULL; \ - (_d)->nr = (_d)->size = 0; \ + (_d)->nr = 0; \ + (_d)->size = ARRAY_SIZE((_d)->preallocated); \ + (_d)->data = (_d)->size ? (_d)->preallocated : NULL; \ } while (0) #define darray_exit(_d) \ do { \ - kvfree((_d)->data); \ + if (!ARRAY_SIZE((_d)->preallocated) || \ + (_d)->data != (_d)->preallocated) \ + kvfree((_d)->data); \ darray_init(_d); \ } while (0) From d9534cc9fc455214e857bc1d32f44d5a229dc602 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sat, 30 Dec 2023 15:32:05 -0500 Subject: [PATCH 07/13] bcachefs: fix buffer overflow in nocow write path BCH_REPLICAS_MAX isn't the actual maximum number of pointers in an extent, it's the maximum number of dirty pointers. We don't have a real restriction on the number of cached pointers, and we don't want a fixed size array here anyways - so switch to DARRAY_PREALLOCATED(). Signed-off-by: Kent Overstreet Reported-and-tested-by: Daniel J Blueman --- fs/bcachefs/io_write.c | 82 +++++++++++++++++++++--------------------- 1 file changed, 41 insertions(+), 41 deletions(-) diff --git a/fs/bcachefs/io_write.c b/fs/bcachefs/io_write.c index 8ede46b1e354..8c8cb1541ac9 100644 --- a/fs/bcachefs/io_write.c +++ b/fs/bcachefs/io_write.c @@ -1216,6 +1216,12 @@ static CLOSURE_CALLBACK(bch2_nocow_write_done) bch2_write_done(cl); } +struct bucket_to_lock { + struct bpos b; + unsigned gen; + struct nocow_lock_bucket *l; +}; + static void bch2_nocow_write(struct bch_write_op *op) { struct bch_fs *c = op->c; @@ -1224,18 +1230,16 @@ static void bch2_nocow_write(struct bch_write_op *op) struct bkey_s_c k; struct bkey_ptrs_c ptrs; const struct bch_extent_ptr *ptr; - struct { - struct bpos b; - unsigned gen; - struct nocow_lock_bucket *l; - } buckets[BCH_REPLICAS_MAX]; - unsigned nr_buckets = 0; + DARRAY_PREALLOCATED(struct bucket_to_lock, 3) buckets; + struct bucket_to_lock *i; u32 snapshot; - int ret, i; + struct bucket_to_lock *stale_at; + int ret; if (op->flags & BCH_WRITE_MOVE) return; + darray_init(&buckets); trans = bch2_trans_get(c); retry: bch2_trans_begin(trans); @@ -1250,7 +1254,7 @@ static void bch2_nocow_write(struct bch_write_op *op) while (1) { struct bio *bio = &op->wbio.bio; - nr_buckets = 0; + buckets.nr = 0; k = bch2_btree_iter_peek_slot(&iter); ret = bkey_err(k); @@ -1263,26 +1267,26 @@ static void bch2_nocow_write(struct bch_write_op *op) break; if (bch2_keylist_realloc(&op->insert_keys, - op->inline_keys, - ARRAY_SIZE(op->inline_keys), - k.k->u64s)) + op->inline_keys, + ARRAY_SIZE(op->inline_keys), + k.k->u64s)) break; /* Get iorefs before dropping btree locks: */ ptrs = bch2_bkey_ptrs_c(k); bkey_for_each_ptr(ptrs, ptr) { - buckets[nr_buckets].b = PTR_BUCKET_POS(c, ptr); - buckets[nr_buckets].gen = ptr->gen; - buckets[nr_buckets].l = - bucket_nocow_lock(&c->nocow_locks, - bucket_to_u64(buckets[nr_buckets].b)); - - prefetch(buckets[nr_buckets].l); + struct bpos b = PTR_BUCKET_POS(c, ptr); + struct nocow_lock_bucket *l = + bucket_nocow_lock(&c->nocow_locks, bucket_to_u64(b)); + prefetch(l); if (unlikely(!bch2_dev_get_ioref(bch_dev_bkey_exists(c, ptr->dev), WRITE))) goto err_get_ioref; - nr_buckets++; + /* XXX allocating memory with btree locks held - rare */ + darray_push_gfp(&buckets, ((struct bucket_to_lock) { + .b = b, .gen = ptr->gen, .l = l, + }), GFP_KERNEL|__GFP_NOFAIL); if (ptr->unwritten) op->flags |= BCH_WRITE_CONVERT_UNWRITTEN; @@ -1296,21 +1300,21 @@ static void bch2_nocow_write(struct bch_write_op *op) if (op->flags & BCH_WRITE_CONVERT_UNWRITTEN) bch2_cut_back(POS(op->pos.inode, op->pos.offset + bio_sectors(bio)), op->insert_keys.top); - for (i = 0; i < nr_buckets; i++) { - struct bch_dev *ca = bch_dev_bkey_exists(c, buckets[i].b.inode); - struct nocow_lock_bucket *l = buckets[i].l; - bool stale; + darray_for_each(buckets, i) { + struct bch_dev *ca = bch_dev_bkey_exists(c, i->b.inode); - __bch2_bucket_nocow_lock(&c->nocow_locks, l, - bucket_to_u64(buckets[i].b), + __bch2_bucket_nocow_lock(&c->nocow_locks, i->l, + bucket_to_u64(i->b), BUCKET_NOCOW_LOCK_UPDATE); rcu_read_lock(); - stale = gen_after(*bucket_gen(ca, buckets[i].b.offset), buckets[i].gen); + bool stale = gen_after(*bucket_gen(ca, i->b.offset), i->gen); rcu_read_unlock(); - if (unlikely(stale)) + if (unlikely(stale)) { + stale_at = i; goto err_bucket_stale; + } } bio = &op->wbio.bio; @@ -1346,15 +1350,14 @@ static void bch2_nocow_write(struct bch_write_op *op) if (ret) { bch_err_inum_offset_ratelimited(c, - op->pos.inode, - op->pos.offset << 9, - "%s: btree lookup error %s", - __func__, bch2_err_str(ret)); + op->pos.inode, op->pos.offset << 9, + "%s: btree lookup error %s", __func__, bch2_err_str(ret)); op->error = ret; op->flags |= BCH_WRITE_DONE; } bch2_trans_put(trans); + darray_exit(&buckets); /* fallback to cow write path? */ if (!(op->flags & BCH_WRITE_DONE)) { @@ -1374,24 +1377,21 @@ static void bch2_nocow_write(struct bch_write_op *op) } return; err_get_ioref: - for (i = 0; i < nr_buckets; i++) - percpu_ref_put(&bch_dev_bkey_exists(c, buckets[i].b.inode)->io_ref); + darray_for_each(buckets, i) + percpu_ref_put(&bch_dev_bkey_exists(c, i->b.inode)->io_ref); /* Fall back to COW path: */ goto out; err_bucket_stale: - while (i >= 0) { - bch2_bucket_nocow_unlock(&c->nocow_locks, - buckets[i].b, - BUCKET_NOCOW_LOCK_UPDATE); - --i; + darray_for_each(buckets, i) { + bch2_bucket_nocow_unlock(&c->nocow_locks, i->b, BUCKET_NOCOW_LOCK_UPDATE); + if (i == stale_at) + break; } - for (i = 0; i < nr_buckets; i++) - percpu_ref_put(&bch_dev_bkey_exists(c, buckets[i].b.inode)->io_ref); /* We can retry this: */ ret = -BCH_ERR_transaction_restart; - goto out; + goto err_get_ioref; } static void __bch2_write(struct bch_write_op *op) From 6b49b0f7e73817265f80c565037ac812df6f1ae3 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sat, 30 Dec 2023 23:56:41 -0500 Subject: [PATCH 08/13] bcachefs: move BCH_SB_ERRS() to sb-errors_types.h we need BCH_SB_ERR_MAX in bcachefs.h Signed-off-by: Kent Overstreet --- fs/bcachefs/sb-errors.h | 253 ---------------------------------- fs/bcachefs/sb-errors_types.h | 253 ++++++++++++++++++++++++++++++++++ 2 files changed, 253 insertions(+), 253 deletions(-) diff --git a/fs/bcachefs/sb-errors.h b/fs/bcachefs/sb-errors.h index 7557fe94f06d..27e5dc7884bb 100644 --- a/fs/bcachefs/sb-errors.h +++ b/fs/bcachefs/sb-errors.h @@ -4,259 +4,6 @@ #include "sb-errors_types.h" -#define BCH_SB_ERRS() \ - x(clean_but_journal_not_empty, 0) \ - x(dirty_but_no_journal_entries, 1) \ - x(dirty_but_no_journal_entries_post_drop_nonflushes, 2) \ - x(sb_clean_journal_seq_mismatch, 3) \ - x(sb_clean_btree_root_mismatch, 4) \ - x(sb_clean_missing, 5) \ - x(jset_unsupported_version, 6) \ - x(jset_unknown_csum, 7) \ - x(jset_last_seq_newer_than_seq, 8) \ - x(jset_past_bucket_end, 9) \ - x(jset_seq_blacklisted, 10) \ - x(journal_entries_missing, 11) \ - x(journal_entry_replicas_not_marked, 12) \ - x(journal_entry_past_jset_end, 13) \ - x(journal_entry_replicas_data_mismatch, 14) \ - x(journal_entry_bkey_u64s_0, 15) \ - x(journal_entry_bkey_past_end, 16) \ - x(journal_entry_bkey_bad_format, 17) \ - x(journal_entry_bkey_invalid, 18) \ - x(journal_entry_btree_root_bad_size, 19) \ - x(journal_entry_blacklist_bad_size, 20) \ - x(journal_entry_blacklist_v2_bad_size, 21) \ - x(journal_entry_blacklist_v2_start_past_end, 22) \ - x(journal_entry_usage_bad_size, 23) \ - x(journal_entry_data_usage_bad_size, 24) \ - x(journal_entry_clock_bad_size, 25) \ - x(journal_entry_clock_bad_rw, 26) \ - x(journal_entry_dev_usage_bad_size, 27) \ - x(journal_entry_dev_usage_bad_dev, 28) \ - x(journal_entry_dev_usage_bad_pad, 29) \ - x(btree_node_unreadable, 30) \ - x(btree_node_fault_injected, 31) \ - x(btree_node_bad_magic, 32) \ - x(btree_node_bad_seq, 33) \ - x(btree_node_unsupported_version, 34) \ - x(btree_node_bset_older_than_sb_min, 35) \ - x(btree_node_bset_newer_than_sb, 36) \ - x(btree_node_data_missing, 37) \ - x(btree_node_bset_after_end, 38) \ - x(btree_node_replicas_sectors_written_mismatch, 39) \ - x(btree_node_replicas_data_mismatch, 40) \ - x(bset_unknown_csum, 41) \ - x(bset_bad_csum, 42) \ - x(bset_past_end_of_btree_node, 43) \ - x(bset_wrong_sector_offset, 44) \ - x(bset_empty, 45) \ - x(bset_bad_seq, 46) \ - x(bset_blacklisted_journal_seq, 47) \ - x(first_bset_blacklisted_journal_seq, 48) \ - x(btree_node_bad_btree, 49) \ - x(btree_node_bad_level, 50) \ - x(btree_node_bad_min_key, 51) \ - x(btree_node_bad_max_key, 52) \ - x(btree_node_bad_format, 53) \ - x(btree_node_bkey_past_bset_end, 54) \ - x(btree_node_bkey_bad_format, 55) \ - x(btree_node_bad_bkey, 56) \ - x(btree_node_bkey_out_of_order, 57) \ - x(btree_root_bkey_invalid, 58) \ - x(btree_root_read_error, 59) \ - x(btree_root_bad_min_key, 60) \ - x(btree_root_bad_max_key, 61) \ - x(btree_node_read_error, 62) \ - x(btree_node_topology_bad_min_key, 63) \ - x(btree_node_topology_bad_max_key, 64) \ - x(btree_node_topology_overwritten_by_prev_node, 65) \ - x(btree_node_topology_overwritten_by_next_node, 66) \ - x(btree_node_topology_interior_node_empty, 67) \ - x(fs_usage_hidden_wrong, 68) \ - x(fs_usage_btree_wrong, 69) \ - x(fs_usage_data_wrong, 70) \ - x(fs_usage_cached_wrong, 71) \ - x(fs_usage_reserved_wrong, 72) \ - x(fs_usage_persistent_reserved_wrong, 73) \ - x(fs_usage_nr_inodes_wrong, 74) \ - x(fs_usage_replicas_wrong, 75) \ - x(dev_usage_buckets_wrong, 76) \ - x(dev_usage_sectors_wrong, 77) \ - x(dev_usage_fragmented_wrong, 78) \ - x(dev_usage_buckets_ec_wrong, 79) \ - x(bkey_version_in_future, 80) \ - x(bkey_u64s_too_small, 81) \ - x(bkey_invalid_type_for_btree, 82) \ - x(bkey_extent_size_zero, 83) \ - x(bkey_extent_size_greater_than_offset, 84) \ - x(bkey_size_nonzero, 85) \ - x(bkey_snapshot_nonzero, 86) \ - x(bkey_snapshot_zero, 87) \ - x(bkey_at_pos_max, 88) \ - x(bkey_before_start_of_btree_node, 89) \ - x(bkey_after_end_of_btree_node, 90) \ - x(bkey_val_size_nonzero, 91) \ - x(bkey_val_size_too_small, 92) \ - x(alloc_v1_val_size_bad, 93) \ - x(alloc_v2_unpack_error, 94) \ - x(alloc_v3_unpack_error, 95) \ - x(alloc_v4_val_size_bad, 96) \ - x(alloc_v4_backpointers_start_bad, 97) \ - x(alloc_key_data_type_bad, 98) \ - x(alloc_key_empty_but_have_data, 99) \ - x(alloc_key_dirty_sectors_0, 100) \ - x(alloc_key_data_type_inconsistency, 101) \ - x(alloc_key_to_missing_dev_bucket, 102) \ - x(alloc_key_cached_inconsistency, 103) \ - x(alloc_key_cached_but_read_time_zero, 104) \ - x(alloc_key_to_missing_lru_entry, 105) \ - x(alloc_key_data_type_wrong, 106) \ - x(alloc_key_gen_wrong, 107) \ - x(alloc_key_dirty_sectors_wrong, 108) \ - x(alloc_key_cached_sectors_wrong, 109) \ - x(alloc_key_stripe_wrong, 110) \ - x(alloc_key_stripe_redundancy_wrong, 111) \ - x(bucket_sector_count_overflow, 112) \ - x(bucket_metadata_type_mismatch, 113) \ - x(need_discard_key_wrong, 114) \ - x(freespace_key_wrong, 115) \ - x(freespace_hole_missing, 116) \ - x(bucket_gens_val_size_bad, 117) \ - x(bucket_gens_key_wrong, 118) \ - x(bucket_gens_hole_wrong, 119) \ - x(bucket_gens_to_invalid_dev, 120) \ - x(bucket_gens_to_invalid_buckets, 121) \ - x(bucket_gens_nonzero_for_invalid_buckets, 122) \ - x(need_discard_freespace_key_to_invalid_dev_bucket, 123) \ - x(need_discard_freespace_key_bad, 124) \ - x(backpointer_pos_wrong, 125) \ - x(backpointer_to_missing_device, 126) \ - x(backpointer_to_missing_alloc, 127) \ - x(backpointer_to_missing_ptr, 128) \ - x(lru_entry_at_time_0, 129) \ - x(lru_entry_to_invalid_bucket, 130) \ - x(lru_entry_bad, 131) \ - x(btree_ptr_val_too_big, 132) \ - x(btree_ptr_v2_val_too_big, 133) \ - x(btree_ptr_has_non_ptr, 134) \ - x(extent_ptrs_invalid_entry, 135) \ - x(extent_ptrs_no_ptrs, 136) \ - x(extent_ptrs_too_many_ptrs, 137) \ - x(extent_ptrs_redundant_crc, 138) \ - x(extent_ptrs_redundant_stripe, 139) \ - x(extent_ptrs_unwritten, 140) \ - x(extent_ptrs_written_and_unwritten, 141) \ - x(ptr_to_invalid_device, 142) \ - x(ptr_to_duplicate_device, 143) \ - x(ptr_after_last_bucket, 144) \ - x(ptr_before_first_bucket, 145) \ - x(ptr_spans_multiple_buckets, 146) \ - x(ptr_to_missing_backpointer, 147) \ - x(ptr_to_missing_alloc_key, 148) \ - x(ptr_to_missing_replicas_entry, 149) \ - x(ptr_to_missing_stripe, 150) \ - x(ptr_to_incorrect_stripe, 151) \ - x(ptr_gen_newer_than_bucket_gen, 152) \ - x(ptr_too_stale, 153) \ - x(stale_dirty_ptr, 154) \ - x(ptr_bucket_data_type_mismatch, 155) \ - x(ptr_cached_and_erasure_coded, 156) \ - x(ptr_crc_uncompressed_size_too_small, 157) \ - x(ptr_crc_csum_type_unknown, 158) \ - x(ptr_crc_compression_type_unknown, 159) \ - x(ptr_crc_redundant, 160) \ - x(ptr_crc_uncompressed_size_too_big, 161) \ - x(ptr_crc_nonce_mismatch, 162) \ - x(ptr_stripe_redundant, 163) \ - x(reservation_key_nr_replicas_invalid, 164) \ - x(reflink_v_refcount_wrong, 165) \ - x(reflink_p_to_missing_reflink_v, 166) \ - x(stripe_pos_bad, 167) \ - x(stripe_val_size_bad, 168) \ - x(stripe_sector_count_wrong, 169) \ - x(snapshot_tree_pos_bad, 170) \ - x(snapshot_tree_to_missing_snapshot, 171) \ - x(snapshot_tree_to_missing_subvol, 172) \ - x(snapshot_tree_to_wrong_subvol, 173) \ - x(snapshot_tree_to_snapshot_subvol, 174) \ - x(snapshot_pos_bad, 175) \ - x(snapshot_parent_bad, 176) \ - x(snapshot_children_not_normalized, 177) \ - x(snapshot_child_duplicate, 178) \ - x(snapshot_child_bad, 179) \ - x(snapshot_skiplist_not_normalized, 180) \ - x(snapshot_skiplist_bad, 181) \ - x(snapshot_should_not_have_subvol, 182) \ - x(snapshot_to_bad_snapshot_tree, 183) \ - x(snapshot_bad_depth, 184) \ - x(snapshot_bad_skiplist, 185) \ - x(subvol_pos_bad, 186) \ - x(subvol_not_master_and_not_snapshot, 187) \ - x(subvol_to_missing_root, 188) \ - x(subvol_root_wrong_bi_subvol, 189) \ - x(bkey_in_missing_snapshot, 190) \ - x(inode_pos_inode_nonzero, 191) \ - x(inode_pos_blockdev_range, 192) \ - x(inode_unpack_error, 193) \ - x(inode_str_hash_invalid, 194) \ - x(inode_v3_fields_start_bad, 195) \ - x(inode_snapshot_mismatch, 196) \ - x(inode_unlinked_but_clean, 197) \ - x(inode_unlinked_but_nlink_nonzero, 198) \ - x(inode_checksum_type_invalid, 199) \ - x(inode_compression_type_invalid, 200) \ - x(inode_subvol_root_but_not_dir, 201) \ - x(inode_i_size_dirty_but_clean, 202) \ - x(inode_i_sectors_dirty_but_clean, 203) \ - x(inode_i_sectors_wrong, 204) \ - x(inode_dir_wrong_nlink, 205) \ - x(inode_dir_multiple_links, 206) \ - x(inode_multiple_links_but_nlink_0, 207) \ - x(inode_wrong_backpointer, 208) \ - x(inode_wrong_nlink, 209) \ - x(inode_unreachable, 210) \ - x(deleted_inode_but_clean, 211) \ - x(deleted_inode_missing, 212) \ - x(deleted_inode_is_dir, 213) \ - x(deleted_inode_not_unlinked, 214) \ - x(extent_overlapping, 215) \ - x(extent_in_missing_inode, 216) \ - x(extent_in_non_reg_inode, 217) \ - x(extent_past_end_of_inode, 218) \ - x(dirent_empty_name, 219) \ - x(dirent_val_too_big, 220) \ - x(dirent_name_too_long, 221) \ - x(dirent_name_embedded_nul, 222) \ - x(dirent_name_dot_or_dotdot, 223) \ - x(dirent_name_has_slash, 224) \ - x(dirent_d_type_wrong, 225) \ - x(dirent_d_parent_subvol_wrong, 226) \ - x(dirent_in_missing_dir_inode, 227) \ - x(dirent_in_non_dir_inode, 228) \ - x(dirent_to_missing_inode, 229) \ - x(dirent_to_missing_subvol, 230) \ - x(dirent_to_itself, 231) \ - x(quota_type_invalid, 232) \ - x(xattr_val_size_too_small, 233) \ - x(xattr_val_size_too_big, 234) \ - x(xattr_invalid_type, 235) \ - x(xattr_name_invalid_chars, 236) \ - x(xattr_in_missing_inode, 237) \ - x(root_subvol_missing, 238) \ - x(root_dir_missing, 239) \ - x(root_inode_not_dir, 240) \ - x(dir_loop, 241) \ - x(hash_table_key_duplicate, 242) \ - x(hash_table_key_wrong_offset, 243) - -enum bch_sb_error_id { -#define x(t, n) BCH_FSCK_ERR_##t = n, - BCH_SB_ERRS() -#undef x - BCH_SB_ERR_MAX -}; - extern const struct bch_sb_field_ops bch_sb_field_ops_errors; void bch2_sb_error_count(struct bch_fs *, enum bch_sb_error_id); diff --git a/fs/bcachefs/sb-errors_types.h b/fs/bcachefs/sb-errors_types.h index b1c099843a39..3504c2d09c29 100644 --- a/fs/bcachefs/sb-errors_types.h +++ b/fs/bcachefs/sb-errors_types.h @@ -4,6 +4,259 @@ #include "darray.h" +#define BCH_SB_ERRS() \ + x(clean_but_journal_not_empty, 0) \ + x(dirty_but_no_journal_entries, 1) \ + x(dirty_but_no_journal_entries_post_drop_nonflushes, 2) \ + x(sb_clean_journal_seq_mismatch, 3) \ + x(sb_clean_btree_root_mismatch, 4) \ + x(sb_clean_missing, 5) \ + x(jset_unsupported_version, 6) \ + x(jset_unknown_csum, 7) \ + x(jset_last_seq_newer_than_seq, 8) \ + x(jset_past_bucket_end, 9) \ + x(jset_seq_blacklisted, 10) \ + x(journal_entries_missing, 11) \ + x(journal_entry_replicas_not_marked, 12) \ + x(journal_entry_past_jset_end, 13) \ + x(journal_entry_replicas_data_mismatch, 14) \ + x(journal_entry_bkey_u64s_0, 15) \ + x(journal_entry_bkey_past_end, 16) \ + x(journal_entry_bkey_bad_format, 17) \ + x(journal_entry_bkey_invalid, 18) \ + x(journal_entry_btree_root_bad_size, 19) \ + x(journal_entry_blacklist_bad_size, 20) \ + x(journal_entry_blacklist_v2_bad_size, 21) \ + x(journal_entry_blacklist_v2_start_past_end, 22) \ + x(journal_entry_usage_bad_size, 23) \ + x(journal_entry_data_usage_bad_size, 24) \ + x(journal_entry_clock_bad_size, 25) \ + x(journal_entry_clock_bad_rw, 26) \ + x(journal_entry_dev_usage_bad_size, 27) \ + x(journal_entry_dev_usage_bad_dev, 28) \ + x(journal_entry_dev_usage_bad_pad, 29) \ + x(btree_node_unreadable, 30) \ + x(btree_node_fault_injected, 31) \ + x(btree_node_bad_magic, 32) \ + x(btree_node_bad_seq, 33) \ + x(btree_node_unsupported_version, 34) \ + x(btree_node_bset_older_than_sb_min, 35) \ + x(btree_node_bset_newer_than_sb, 36) \ + x(btree_node_data_missing, 37) \ + x(btree_node_bset_after_end, 38) \ + x(btree_node_replicas_sectors_written_mismatch, 39) \ + x(btree_node_replicas_data_mismatch, 40) \ + x(bset_unknown_csum, 41) \ + x(bset_bad_csum, 42) \ + x(bset_past_end_of_btree_node, 43) \ + x(bset_wrong_sector_offset, 44) \ + x(bset_empty, 45) \ + x(bset_bad_seq, 46) \ + x(bset_blacklisted_journal_seq, 47) \ + x(first_bset_blacklisted_journal_seq, 48) \ + x(btree_node_bad_btree, 49) \ + x(btree_node_bad_level, 50) \ + x(btree_node_bad_min_key, 51) \ + x(btree_node_bad_max_key, 52) \ + x(btree_node_bad_format, 53) \ + x(btree_node_bkey_past_bset_end, 54) \ + x(btree_node_bkey_bad_format, 55) \ + x(btree_node_bad_bkey, 56) \ + x(btree_node_bkey_out_of_order, 57) \ + x(btree_root_bkey_invalid, 58) \ + x(btree_root_read_error, 59) \ + x(btree_root_bad_min_key, 60) \ + x(btree_root_bad_max_key, 61) \ + x(btree_node_read_error, 62) \ + x(btree_node_topology_bad_min_key, 63) \ + x(btree_node_topology_bad_max_key, 64) \ + x(btree_node_topology_overwritten_by_prev_node, 65) \ + x(btree_node_topology_overwritten_by_next_node, 66) \ + x(btree_node_topology_interior_node_empty, 67) \ + x(fs_usage_hidden_wrong, 68) \ + x(fs_usage_btree_wrong, 69) \ + x(fs_usage_data_wrong, 70) \ + x(fs_usage_cached_wrong, 71) \ + x(fs_usage_reserved_wrong, 72) \ + x(fs_usage_persistent_reserved_wrong, 73) \ + x(fs_usage_nr_inodes_wrong, 74) \ + x(fs_usage_replicas_wrong, 75) \ + x(dev_usage_buckets_wrong, 76) \ + x(dev_usage_sectors_wrong, 77) \ + x(dev_usage_fragmented_wrong, 78) \ + x(dev_usage_buckets_ec_wrong, 79) \ + x(bkey_version_in_future, 80) \ + x(bkey_u64s_too_small, 81) \ + x(bkey_invalid_type_for_btree, 82) \ + x(bkey_extent_size_zero, 83) \ + x(bkey_extent_size_greater_than_offset, 84) \ + x(bkey_size_nonzero, 85) \ + x(bkey_snapshot_nonzero, 86) \ + x(bkey_snapshot_zero, 87) \ + x(bkey_at_pos_max, 88) \ + x(bkey_before_start_of_btree_node, 89) \ + x(bkey_after_end_of_btree_node, 90) \ + x(bkey_val_size_nonzero, 91) \ + x(bkey_val_size_too_small, 92) \ + x(alloc_v1_val_size_bad, 93) \ + x(alloc_v2_unpack_error, 94) \ + x(alloc_v3_unpack_error, 95) \ + x(alloc_v4_val_size_bad, 96) \ + x(alloc_v4_backpointers_start_bad, 97) \ + x(alloc_key_data_type_bad, 98) \ + x(alloc_key_empty_but_have_data, 99) \ + x(alloc_key_dirty_sectors_0, 100) \ + x(alloc_key_data_type_inconsistency, 101) \ + x(alloc_key_to_missing_dev_bucket, 102) \ + x(alloc_key_cached_inconsistency, 103) \ + x(alloc_key_cached_but_read_time_zero, 104) \ + x(alloc_key_to_missing_lru_entry, 105) \ + x(alloc_key_data_type_wrong, 106) \ + x(alloc_key_gen_wrong, 107) \ + x(alloc_key_dirty_sectors_wrong, 108) \ + x(alloc_key_cached_sectors_wrong, 109) \ + x(alloc_key_stripe_wrong, 110) \ + x(alloc_key_stripe_redundancy_wrong, 111) \ + x(bucket_sector_count_overflow, 112) \ + x(bucket_metadata_type_mismatch, 113) \ + x(need_discard_key_wrong, 114) \ + x(freespace_key_wrong, 115) \ + x(freespace_hole_missing, 116) \ + x(bucket_gens_val_size_bad, 117) \ + x(bucket_gens_key_wrong, 118) \ + x(bucket_gens_hole_wrong, 119) \ + x(bucket_gens_to_invalid_dev, 120) \ + x(bucket_gens_to_invalid_buckets, 121) \ + x(bucket_gens_nonzero_for_invalid_buckets, 122) \ + x(need_discard_freespace_key_to_invalid_dev_bucket, 123) \ + x(need_discard_freespace_key_bad, 124) \ + x(backpointer_pos_wrong, 125) \ + x(backpointer_to_missing_device, 126) \ + x(backpointer_to_missing_alloc, 127) \ + x(backpointer_to_missing_ptr, 128) \ + x(lru_entry_at_time_0, 129) \ + x(lru_entry_to_invalid_bucket, 130) \ + x(lru_entry_bad, 131) \ + x(btree_ptr_val_too_big, 132) \ + x(btree_ptr_v2_val_too_big, 133) \ + x(btree_ptr_has_non_ptr, 134) \ + x(extent_ptrs_invalid_entry, 135) \ + x(extent_ptrs_no_ptrs, 136) \ + x(extent_ptrs_too_many_ptrs, 137) \ + x(extent_ptrs_redundant_crc, 138) \ + x(extent_ptrs_redundant_stripe, 139) \ + x(extent_ptrs_unwritten, 140) \ + x(extent_ptrs_written_and_unwritten, 141) \ + x(ptr_to_invalid_device, 142) \ + x(ptr_to_duplicate_device, 143) \ + x(ptr_after_last_bucket, 144) \ + x(ptr_before_first_bucket, 145) \ + x(ptr_spans_multiple_buckets, 146) \ + x(ptr_to_missing_backpointer, 147) \ + x(ptr_to_missing_alloc_key, 148) \ + x(ptr_to_missing_replicas_entry, 149) \ + x(ptr_to_missing_stripe, 150) \ + x(ptr_to_incorrect_stripe, 151) \ + x(ptr_gen_newer_than_bucket_gen, 152) \ + x(ptr_too_stale, 153) \ + x(stale_dirty_ptr, 154) \ + x(ptr_bucket_data_type_mismatch, 155) \ + x(ptr_cached_and_erasure_coded, 156) \ + x(ptr_crc_uncompressed_size_too_small, 157) \ + x(ptr_crc_csum_type_unknown, 158) \ + x(ptr_crc_compression_type_unknown, 159) \ + x(ptr_crc_redundant, 160) \ + x(ptr_crc_uncompressed_size_too_big, 161) \ + x(ptr_crc_nonce_mismatch, 162) \ + x(ptr_stripe_redundant, 163) \ + x(reservation_key_nr_replicas_invalid, 164) \ + x(reflink_v_refcount_wrong, 165) \ + x(reflink_p_to_missing_reflink_v, 166) \ + x(stripe_pos_bad, 167) \ + x(stripe_val_size_bad, 168) \ + x(stripe_sector_count_wrong, 169) \ + x(snapshot_tree_pos_bad, 170) \ + x(snapshot_tree_to_missing_snapshot, 171) \ + x(snapshot_tree_to_missing_subvol, 172) \ + x(snapshot_tree_to_wrong_subvol, 173) \ + x(snapshot_tree_to_snapshot_subvol, 174) \ + x(snapshot_pos_bad, 175) \ + x(snapshot_parent_bad, 176) \ + x(snapshot_children_not_normalized, 177) \ + x(snapshot_child_duplicate, 178) \ + x(snapshot_child_bad, 179) \ + x(snapshot_skiplist_not_normalized, 180) \ + x(snapshot_skiplist_bad, 181) \ + x(snapshot_should_not_have_subvol, 182) \ + x(snapshot_to_bad_snapshot_tree, 183) \ + x(snapshot_bad_depth, 184) \ + x(snapshot_bad_skiplist, 185) \ + x(subvol_pos_bad, 186) \ + x(subvol_not_master_and_not_snapshot, 187) \ + x(subvol_to_missing_root, 188) \ + x(subvol_root_wrong_bi_subvol, 189) \ + x(bkey_in_missing_snapshot, 190) \ + x(inode_pos_inode_nonzero, 191) \ + x(inode_pos_blockdev_range, 192) \ + x(inode_unpack_error, 193) \ + x(inode_str_hash_invalid, 194) \ + x(inode_v3_fields_start_bad, 195) \ + x(inode_snapshot_mismatch, 196) \ + x(inode_unlinked_but_clean, 197) \ + x(inode_unlinked_but_nlink_nonzero, 198) \ + x(inode_checksum_type_invalid, 199) \ + x(inode_compression_type_invalid, 200) \ + x(inode_subvol_root_but_not_dir, 201) \ + x(inode_i_size_dirty_but_clean, 202) \ + x(inode_i_sectors_dirty_but_clean, 203) \ + x(inode_i_sectors_wrong, 204) \ + x(inode_dir_wrong_nlink, 205) \ + x(inode_dir_multiple_links, 206) \ + x(inode_multiple_links_but_nlink_0, 207) \ + x(inode_wrong_backpointer, 208) \ + x(inode_wrong_nlink, 209) \ + x(inode_unreachable, 210) \ + x(deleted_inode_but_clean, 211) \ + x(deleted_inode_missing, 212) \ + x(deleted_inode_is_dir, 213) \ + x(deleted_inode_not_unlinked, 214) \ + x(extent_overlapping, 215) \ + x(extent_in_missing_inode, 216) \ + x(extent_in_non_reg_inode, 217) \ + x(extent_past_end_of_inode, 218) \ + x(dirent_empty_name, 219) \ + x(dirent_val_too_big, 220) \ + x(dirent_name_too_long, 221) \ + x(dirent_name_embedded_nul, 222) \ + x(dirent_name_dot_or_dotdot, 223) \ + x(dirent_name_has_slash, 224) \ + x(dirent_d_type_wrong, 225) \ + x(dirent_d_parent_subvol_wrong, 226) \ + x(dirent_in_missing_dir_inode, 227) \ + x(dirent_in_non_dir_inode, 228) \ + x(dirent_to_missing_inode, 229) \ + x(dirent_to_missing_subvol, 230) \ + x(dirent_to_itself, 231) \ + x(quota_type_invalid, 232) \ + x(xattr_val_size_too_small, 233) \ + x(xattr_val_size_too_big, 234) \ + x(xattr_invalid_type, 235) \ + x(xattr_name_invalid_chars, 236) \ + x(xattr_in_missing_inode, 237) \ + x(root_subvol_missing, 238) \ + x(root_dir_missing, 239) \ + x(root_inode_not_dir, 240) \ + x(dir_loop, 241) \ + x(hash_table_key_duplicate, 242) \ + x(hash_table_key_wrong_offset, 243) + +enum bch_sb_error_id { +#define x(t, n) BCH_FSCK_ERR_##t = n, + BCH_SB_ERRS() +#undef x + BCH_SB_ERR_MAX +}; + struct bch_sb_error_entry_cpu { u64 id:16, nr:48; From 560661d4ae067276c14bf0dc89fdd0228f993150 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sat, 30 Dec 2023 23:58:50 -0500 Subject: [PATCH 09/13] bcachefs: prt_bitflags_vector() similar to prt_bitflags(), but for ulong arrays Signed-off-by: Kent Overstreet --- fs/bcachefs/printbuf.c | 22 ++++++++++++++++++++++ fs/bcachefs/printbuf.h | 2 ++ fs/bcachefs/util.h | 1 + 3 files changed, 25 insertions(+) diff --git a/fs/bcachefs/printbuf.c b/fs/bcachefs/printbuf.c index 5e653eb81d54..accf246c3233 100644 --- a/fs/bcachefs/printbuf.c +++ b/fs/bcachefs/printbuf.c @@ -1,6 +1,7 @@ // SPDX-License-Identifier: LGPL-2.1+ /* Copyright (C) 2022 Kent Overstreet */ +#include #include #include #include @@ -423,3 +424,24 @@ void bch2_prt_bitflags(struct printbuf *out, flags ^= BIT_ULL(bit); } } + +void bch2_prt_bitflags_vector(struct printbuf *out, + const char * const list[], + unsigned long *v, unsigned nr) +{ + bool first = true; + unsigned i; + + for (i = 0; i < nr; i++) + if (!list[i]) { + nr = i - 1; + break; + } + + for_each_set_bit(i, v, nr) { + if (!first) + bch2_prt_printf(out, ","); + first = false; + bch2_prt_printf(out, "%s", list[i]); + } +} diff --git a/fs/bcachefs/printbuf.h b/fs/bcachefs/printbuf.h index 2191423d9f22..9a4a56c40937 100644 --- a/fs/bcachefs/printbuf.h +++ b/fs/bcachefs/printbuf.h @@ -124,6 +124,8 @@ void bch2_prt_units_u64(struct printbuf *, u64); void bch2_prt_units_s64(struct printbuf *, s64); void bch2_prt_string_option(struct printbuf *, const char * const[], size_t); void bch2_prt_bitflags(struct printbuf *, const char * const[], u64); +void bch2_prt_bitflags_vector(struct printbuf *, const char * const[], + unsigned long *, unsigned); /* Initializer for a heap allocated printbuf: */ #define PRINTBUF ((struct printbuf) { .heap_allocated = true }) diff --git a/fs/bcachefs/util.h b/fs/bcachefs/util.h index 2984b57b2958..b701f7fe0784 100644 --- a/fs/bcachefs/util.h +++ b/fs/bcachefs/util.h @@ -243,6 +243,7 @@ do { \ #define prt_units_s64(...) bch2_prt_units_s64(__VA_ARGS__) #define prt_string_option(...) bch2_prt_string_option(__VA_ARGS__) #define prt_bitflags(...) bch2_prt_bitflags(__VA_ARGS__) +#define prt_bitflags_vector(...) bch2_prt_bitflags_vector(__VA_ARGS__) void bch2_pr_time_units(struct printbuf *, u64); void bch2_prt_datetime(struct printbuf *, time64_t); From 808c680f2a256188951805f05c907d40919db37a Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Fri, 29 Dec 2023 14:40:03 -0500 Subject: [PATCH 10/13] bcachefs: Add persistent identifiers for recovery passes The next patch will start to refer to recovery passes from the superblock; naturally, we now need identifiers that don't change, since the existing enum is in the order in which they are run and is not fixed. Signed-off-by: Kent Overstreet --- fs/bcachefs/recovery.c | 34 +++++++++++++- fs/bcachefs/recovery.h | 3 ++ fs/bcachefs/recovery_types.h | 86 ++++++++++++++++++++---------------- 3 files changed, 84 insertions(+), 39 deletions(-) diff --git a/fs/bcachefs/recovery.c b/fs/bcachefs/recovery.c index c2f183d2ffe8..6f8a84cc2c2e 100644 --- a/fs/bcachefs/recovery.c +++ b/fs/bcachefs/recovery.c @@ -481,7 +481,7 @@ static int bch2_fs_upgrade_for_subvolumes(struct bch_fs *c) } const char * const bch2_recovery_passes[] = { -#define x(_fn, _when) #_fn, +#define x(_fn, ...) #_fn, BCH_RECOVERY_PASSES() #undef x NULL @@ -504,11 +504,41 @@ struct recovery_pass_fn { }; static struct recovery_pass_fn recovery_pass_fns[] = { -#define x(_fn, _when) { .fn = bch2_##_fn, .when = _when }, +#define x(_fn, _id, _when) { .fn = bch2_##_fn, .when = _when }, BCH_RECOVERY_PASSES() #undef x }; +u64 bch2_recovery_passes_to_stable(u64 v) +{ + static const u8 map[] = { +#define x(n, id, ...) [BCH_RECOVERY_PASS_##n] = BCH_RECOVERY_PASS_STABLE_##n, + BCH_RECOVERY_PASSES() +#undef x + }; + + u64 ret = 0; + for (unsigned i = 0; i < ARRAY_SIZE(map); i++) + if (v & BIT_ULL(i)) + ret |= BIT_ULL(map[i]); + return ret; +} + +u64 bch2_recovery_passes_from_stable(u64 v) +{ + static const u8 map[] = { +#define x(n, id, ...) [BCH_RECOVERY_PASS_STABLE_##n] = BCH_RECOVERY_PASS_##n, + BCH_RECOVERY_PASSES() +#undef x + }; + + u64 ret = 0; + for (unsigned i = 0; i < ARRAY_SIZE(map); i++) + if (v & BIT_ULL(i)) + ret |= BIT_ULL(map[i]); + return ret; +} + static void check_version_upgrade(struct bch_fs *c) { unsigned latest_compatible = bch2_latest_compatible_version(c->sb.version); diff --git a/fs/bcachefs/recovery.h b/fs/bcachefs/recovery.h index d266aae90200..3a554b0751d0 100644 --- a/fs/bcachefs/recovery.h +++ b/fs/bcachefs/recovery.h @@ -4,6 +4,9 @@ extern const char * const bch2_recovery_passes[]; +u64 bch2_recovery_passes_to_stable(u64 v); +u64 bch2_recovery_passes_from_stable(u64 v); + /* * For when we need to rewind recovery passes and run a pass we skipped: */ diff --git a/fs/bcachefs/recovery_types.h b/fs/bcachefs/recovery_types.h index 515e3d62c2ac..d37c6fd30e38 100644 --- a/fs/bcachefs/recovery_types.h +++ b/fs/bcachefs/recovery_types.h @@ -7,45 +7,57 @@ #define PASS_UNCLEAN BIT(2) #define PASS_ALWAYS BIT(3) -#define BCH_RECOVERY_PASSES() \ - x(alloc_read, PASS_ALWAYS) \ - x(stripes_read, PASS_ALWAYS) \ - x(initialize_subvolumes, 0) \ - x(snapshots_read, PASS_ALWAYS) \ - x(check_topology, 0) \ - x(check_allocations, PASS_FSCK) \ - x(trans_mark_dev_sbs, PASS_ALWAYS|PASS_SILENT) \ - x(fs_journal_alloc, PASS_ALWAYS|PASS_SILENT) \ - x(set_may_go_rw, PASS_ALWAYS|PASS_SILENT) \ - x(journal_replay, PASS_ALWAYS) \ - x(check_alloc_info, PASS_FSCK) \ - x(check_lrus, PASS_FSCK) \ - x(check_btree_backpointers, PASS_FSCK) \ - x(check_backpointers_to_extents,PASS_FSCK) \ - x(check_extents_to_backpointers,PASS_FSCK) \ - x(check_alloc_to_lru_refs, PASS_FSCK) \ - x(fs_freespace_init, PASS_ALWAYS|PASS_SILENT) \ - x(bucket_gens_init, 0) \ - x(check_snapshot_trees, PASS_FSCK) \ - x(check_snapshots, PASS_FSCK) \ - x(check_subvols, PASS_FSCK) \ - x(delete_dead_snapshots, PASS_FSCK) \ - x(fs_upgrade_for_subvolumes, 0) \ - x(resume_logged_ops, PASS_ALWAYS) \ - x(check_inodes, PASS_FSCK) \ - x(check_extents, PASS_FSCK) \ - x(check_indirect_extents, PASS_FSCK) \ - x(check_dirents, PASS_FSCK) \ - x(check_xattrs, PASS_FSCK) \ - x(check_root, PASS_FSCK) \ - x(check_directory_structure, PASS_FSCK) \ - x(check_nlinks, PASS_FSCK) \ - x(delete_dead_inodes, PASS_FSCK|PASS_UNCLEAN) \ - x(fix_reflink_p, 0) \ - x(set_fs_needs_rebalance, 0) \ +/* + * Passes may be reordered, but the second field is a persistent identifier and + * must never change: + */ +#define BCH_RECOVERY_PASSES() \ + x(alloc_read, 0, PASS_ALWAYS) \ + x(stripes_read, 1, PASS_ALWAYS) \ + x(initialize_subvolumes, 2, 0) \ + x(snapshots_read, 3, PASS_ALWAYS) \ + x(check_topology, 4, 0) \ + x(check_allocations, 5, PASS_FSCK) \ + x(trans_mark_dev_sbs, 6, PASS_ALWAYS|PASS_SILENT) \ + x(fs_journal_alloc, 7, PASS_ALWAYS|PASS_SILENT) \ + x(set_may_go_rw, 8, PASS_ALWAYS|PASS_SILENT) \ + x(journal_replay, 9, PASS_ALWAYS) \ + x(check_alloc_info, 10, PASS_FSCK) \ + x(check_lrus, 11, PASS_FSCK) \ + x(check_btree_backpointers, 12, PASS_FSCK) \ + x(check_backpointers_to_extents, 13, PASS_FSCK) \ + x(check_extents_to_backpointers, 14, PASS_FSCK) \ + x(check_alloc_to_lru_refs, 15, PASS_FSCK) \ + x(fs_freespace_init, 16, PASS_ALWAYS|PASS_SILENT) \ + x(bucket_gens_init, 17, 0) \ + x(check_snapshot_trees, 18, PASS_FSCK) \ + x(check_snapshots, 19, PASS_FSCK) \ + x(check_subvols, 20, PASS_FSCK) \ + x(delete_dead_snapshots, 21, PASS_FSCK) \ + x(fs_upgrade_for_subvolumes, 22, 0) \ + x(resume_logged_ops, 23, PASS_ALWAYS) \ + x(check_inodes, 24, PASS_FSCK) \ + x(check_extents, 25, PASS_FSCK) \ + x(check_indirect_extents, 26, PASS_FSCK) \ + x(check_dirents, 27, PASS_FSCK) \ + x(check_xattrs, 28, PASS_FSCK) \ + x(check_root, 29, PASS_FSCK) \ + x(check_directory_structure, 30, PASS_FSCK) \ + x(check_nlinks, 31, PASS_FSCK) \ + x(delete_dead_inodes, 32, PASS_FSCK|PASS_UNCLEAN) \ + x(fix_reflink_p, 33, 0) \ + x(set_fs_needs_rebalance, 34, 0) \ +/* We normally enumerate recovery passes in the order we run them: */ enum bch_recovery_pass { -#define x(n, when) BCH_RECOVERY_PASS_##n, +#define x(n, id, when) BCH_RECOVERY_PASS_##n, + BCH_RECOVERY_PASSES() +#undef x +}; + +/* But we also need stable identifiers that can be used in the superblock */ +enum bch_recovery_pass_stable { +#define x(n, id, when) BCH_RECOVERY_PASS_STABLE_##n = id, BCH_RECOVERY_PASSES() #undef x }; From 8b16413cda7c46d47f8071165b3071f8a9369199 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Fri, 29 Dec 2023 15:15:14 -0500 Subject: [PATCH 11/13] bcachefs: bch_sb.recovery_passes_required Add two new superblock fields. Since the main section of the superblock is now fully, we have to add a new variable length section for them - bch_sb_field_ext. - recovery_passes_requried: recovery passes that must be run on the next mount - errors_silent: errors that will be silently fixed These are to improve upgrading and dwongrading: these fields won't be cleared until after recovery successfully completes, so there won't be any issues with crashing partway through an upgrade or a downgrade. Signed-off-by: Kent Overstreet --- fs/bcachefs/bcachefs.h | 1 + fs/bcachefs/bcachefs_format.h | 33 +++++++++------ fs/bcachefs/errcode.h | 1 + fs/bcachefs/error.c | 3 ++ fs/bcachefs/recovery.c | 75 ++++++++++++++++++++++++++++------- fs/bcachefs/sb-errors.c | 2 +- fs/bcachefs/sb-errors.h | 2 + fs/bcachefs/super-io.c | 75 ++++++++++++++++++++++++++++++++++- fs/bcachefs/super-io.h | 10 +++++ 9 files changed, 172 insertions(+), 30 deletions(-) diff --git a/fs/bcachefs/bcachefs.h b/fs/bcachefs/bcachefs.h index dfa22f9d9a1d..b62737fdf5ab 100644 --- a/fs/bcachefs/bcachefs.h +++ b/fs/bcachefs/bcachefs.h @@ -737,6 +737,7 @@ struct bch_fs { unsigned nsec_per_time_unit; u64 features; u64 compat; + unsigned long errors_silent[BITS_TO_LONGS(BCH_SB_ERR_MAX)]; } sb; diff --git a/fs/bcachefs/bcachefs_format.h b/fs/bcachefs/bcachefs_format.h index 1ab1f08d763b..bd5af516994a 100644 --- a/fs/bcachefs/bcachefs_format.h +++ b/fs/bcachefs/bcachefs_format.h @@ -1207,19 +1207,20 @@ struct bch_sb_field { }; #define BCH_SB_FIELDS() \ - x(journal, 0) \ - x(members_v1, 1) \ - x(crypt, 2) \ - x(replicas_v0, 3) \ - x(quota, 4) \ - x(disk_groups, 5) \ - x(clean, 6) \ - x(replicas, 7) \ - x(journal_seq_blacklist, 8) \ - x(journal_v2, 9) \ - x(counters, 10) \ - x(members_v2, 11) \ - x(errors, 12) + x(journal, 0) \ + x(members_v1, 1) \ + x(crypt, 2) \ + x(replicas_v0, 3) \ + x(quota, 4) \ + x(disk_groups, 5) \ + x(clean, 6) \ + x(replicas, 7) \ + x(journal_seq_blacklist, 8) \ + x(journal_v2, 9) \ + x(counters, 10) \ + x(members_v2, 11) \ + x(errors, 12) \ + x(ext, 13) enum bch_sb_field_type { #define x(f, nr) BCH_SB_FIELD_##f = nr, @@ -1631,6 +1632,12 @@ struct bch_sb_field_errors { LE64_BITMASK(BCH_SB_ERROR_ENTRY_ID, struct bch_sb_field_error_entry, v, 0, 16); LE64_BITMASK(BCH_SB_ERROR_ENTRY_NR, struct bch_sb_field_error_entry, v, 16, 64); +struct bch_sb_field_ext { + struct bch_sb_field field; + __le64 recovery_passes_required[2]; + __le64 errors_silent[8]; +}; + /* Superblock: */ /* diff --git a/fs/bcachefs/errcode.h b/fs/bcachefs/errcode.h index ae7910bf2228..79327b5cec50 100644 --- a/fs/bcachefs/errcode.h +++ b/fs/bcachefs/errcode.h @@ -218,6 +218,7 @@ x(BCH_ERR_invalid_sb, invalid_sb_quota) \ x(BCH_ERR_invalid_sb, invalid_sb_errors) \ x(BCH_ERR_invalid_sb, invalid_sb_opt_compression) \ + x(BCH_ERR_invalid_sb, invalid_sb_ext) \ x(BCH_ERR_invalid, invalid_bkey) \ x(BCH_ERR_operation_blocked, nocow_lock_blocked) \ x(EIO, btree_node_read_err) \ diff --git a/fs/bcachefs/error.c b/fs/bcachefs/error.c index 7b28d37922fd..25cf78a7b946 100644 --- a/fs/bcachefs/error.c +++ b/fs/bcachefs/error.c @@ -152,6 +152,9 @@ int bch2_fsck_err(struct bch_fs *c, struct printbuf buf = PRINTBUF, *out = &buf; int ret = -BCH_ERR_fsck_ignore; + if (test_bit(err, c->sb.errors_silent)) + return -BCH_ERR_fsck_fix; + bch2_sb_error_count(c, err); va_start(args, fmt); diff --git a/fs/bcachefs/recovery.c b/fs/bcachefs/recovery.c index 6f8a84cc2c2e..b9c84e8cf3fd 100644 --- a/fs/bcachefs/recovery.c +++ b/fs/bcachefs/recovery.c @@ -539,13 +539,12 @@ u64 bch2_recovery_passes_from_stable(u64 v) return ret; } -static void check_version_upgrade(struct bch_fs *c) +static bool check_version_upgrade(struct bch_fs *c) { unsigned latest_compatible = bch2_latest_compatible_version(c->sb.version); unsigned latest_version = bcachefs_metadata_version_current; unsigned old_version = c->sb.version_upgrade_complete ?: c->sb.version; unsigned new_version = 0; - u64 recovery_passes; if (old_version < bcachefs_metadata_required_upgrade_below) { if (c->opts.version_upgrade == BCH_VERSION_UPGRADE_incompatible || @@ -589,7 +588,7 @@ static void check_version_upgrade(struct bch_fs *c) bch2_version_to_text(&buf, new_version); prt_newline(&buf); - recovery_passes = bch2_upgrade_recovery_passes(c, old_version, new_version); + u64 recovery_passes = bch2_upgrade_recovery_passes(c, old_version, new_version); if (recovery_passes) { if ((recovery_passes & RECOVERY_PASS_ALL_FSCK) == RECOVERY_PASS_ALL_FSCK) prt_str(&buf, "fsck required"); @@ -604,12 +603,13 @@ static void check_version_upgrade(struct bch_fs *c) bch_info(c, "%s", buf.buf); - mutex_lock(&c->sb_lock); bch2_sb_upgrade(c, new_version); - mutex_unlock(&c->sb_lock); printbuf_exit(&buf); + return true; } + + return false; } u64 bch2_fsck_recovery_passes(void) @@ -684,7 +684,6 @@ int bch2_fs_recovery(struct bch_fs *c) struct bch_sb_field_clean *clean = NULL; struct jset *last_journal_entry = NULL; u64 last_seq = 0, blacklist_seq, journal_seq; - bool write_sb = false; int ret = 0; if (c->sb.clean) { @@ -712,15 +711,52 @@ int bch2_fs_recovery(struct bch_fs *c) goto err; } - if (c->opts.fsck || !(c->opts.nochanges && c->opts.norecovery)) - check_version_upgrade(c); - if (c->opts.fsck && c->opts.norecovery) { bch_err(c, "cannot select both norecovery and fsck"); ret = -EINVAL; goto err; } + if (!(c->opts.nochanges && c->opts.norecovery)) { + mutex_lock(&c->sb_lock); + bool write_sb = false; + + struct bch_sb_field_ext *ext = + bch2_sb_field_get_minsize(&c->disk_sb, ext, sizeof(*ext) / sizeof(u64)); + if (!ext) { + ret = -BCH_ERR_ENOSPC_sb; + mutex_unlock(&c->sb_lock); + goto err; + } + + if (BCH_SB_HAS_TOPOLOGY_ERRORS(c->disk_sb.sb)) { + ext->recovery_passes_required[0] |= + cpu_to_le64(bch2_recovery_passes_to_stable(BIT_ULL(BCH_RECOVERY_PASS_check_topology))); + write_sb = true; + } + + u64 sb_passes = bch2_recovery_passes_from_stable(le64_to_cpu(ext->recovery_passes_required[0])); + if (sb_passes) { + struct printbuf buf = PRINTBUF; + prt_str(&buf, "superblock requires following recovery passes to be run:\n "); + prt_bitflags(&buf, bch2_recovery_passes, sb_passes); + bch_info(c, "%s", buf.buf); + printbuf_exit(&buf); + } + + if (check_version_upgrade(c)) + write_sb = true; + + if (write_sb) + bch2_write_super(c); + + c->recovery_passes_explicit |= bch2_recovery_passes_from_stable(le64_to_cpu(ext->recovery_passes_required[0])); + mutex_unlock(&c->sb_lock); + } + + if (c->opts.fsck && IS_ENABLED(CONFIG_BCACHEFS_DEBUG)) + c->recovery_passes_explicit |= BIT_ULL(BCH_RECOVERY_PASS_check_topology); + ret = bch2_blacklist_table_initialize(c); if (ret) { bch_err(c, "error initializing blacklist table"); @@ -857,11 +893,6 @@ int bch2_fs_recovery(struct bch_fs *c) if (ret) goto err; - if (c->opts.fsck && - (IS_ENABLED(CONFIG_BCACHEFS_DEBUG) || - BCH_SB_HAS_TOPOLOGY_ERRORS(c->disk_sb.sb))) - c->recovery_passes_explicit |= BIT_ULL(BCH_RECOVERY_PASS_check_topology); - ret = bch2_run_recovery_passes(c); if (ret) goto err; @@ -898,16 +929,30 @@ int bch2_fs_recovery(struct bch_fs *c) } mutex_lock(&c->sb_lock); + bool write_sb = false; + if (BCH_SB_VERSION_UPGRADE_COMPLETE(c->disk_sb.sb) != le16_to_cpu(c->disk_sb.sb->version)) { SET_BCH_SB_VERSION_UPGRADE_COMPLETE(c->disk_sb.sb, le16_to_cpu(c->disk_sb.sb->version)); write_sb = true; } - if (!test_bit(BCH_FS_ERROR, &c->flags)) { + if (!test_bit(BCH_FS_ERROR, &c->flags) && + !(c->disk_sb.sb->compat[0] & cpu_to_le64(1ULL << BCH_COMPAT_alloc_info))) { c->disk_sb.sb->compat[0] |= cpu_to_le64(1ULL << BCH_COMPAT_alloc_info); write_sb = true; } + if (!test_bit(BCH_FS_ERROR, &c->flags)) { + struct bch_sb_field_ext *ext = bch2_sb_field_get(c->disk_sb.sb, ext); + if (ext && + (!bch2_is_zero(ext->recovery_passes_required, sizeof(ext->recovery_passes_required)) || + !bch2_is_zero(ext->errors_silent, sizeof(ext->errors_silent)))) { + memset(ext->recovery_passes_required, 0, sizeof(ext->recovery_passes_required)); + memset(ext->errors_silent, 0, sizeof(ext->errors_silent)); + write_sb = true; + } + } + if (c->opts.fsck && !test_bit(BCH_FS_ERROR, &c->flags) && !test_bit(BCH_FS_ERRORS_NOT_FIXED, &c->flags)) { diff --git a/fs/bcachefs/sb-errors.c b/fs/bcachefs/sb-errors.c index f0930ab7f036..caf7669db6a1 100644 --- a/fs/bcachefs/sb-errors.c +++ b/fs/bcachefs/sb-errors.c @@ -4,7 +4,7 @@ #include "sb-errors.h" #include "super-io.h" -static const char * const bch2_sb_error_strs[] = { +const char * const bch2_sb_error_strs[] = { #define x(t, n, ...) [n] = #t, BCH_SB_ERRS() NULL diff --git a/fs/bcachefs/sb-errors.h b/fs/bcachefs/sb-errors.h index 27e5dc7884bb..8889001e7db4 100644 --- a/fs/bcachefs/sb-errors.h +++ b/fs/bcachefs/sb-errors.h @@ -4,6 +4,8 @@ #include "sb-errors_types.h" +extern const char * const bch2_sb_error_strs[]; + extern const struct bch_sb_field_ops bch_sb_field_ops_errors; void bch2_sb_error_count(struct bch_fs *, enum bch_sb_error_id); diff --git a/fs/bcachefs/super-io.c b/fs/bcachefs/super-io.c index f3e12f7979d5..e085d3b021e8 100644 --- a/fs/bcachefs/super-io.c +++ b/fs/bcachefs/super-io.c @@ -264,6 +264,17 @@ struct bch_sb_field *bch2_sb_field_resize_id(struct bch_sb_handle *sb, return f; } +struct bch_sb_field *bch2_sb_field_get_minsize_id(struct bch_sb_handle *sb, + enum bch_sb_field_type type, + unsigned u64s) +{ + struct bch_sb_field *f = bch2_sb_field_get_id(sb->sb, type); + + if (!f || le32_to_cpu(f->u64s) < u64s) + f = bch2_sb_field_resize_id(sb, type, u64s); + return f; +} + /* Superblock validate: */ static int validate_sb_layout(struct bch_sb_layout *layout, struct printbuf *out) @@ -484,6 +495,21 @@ static int bch2_sb_validate(struct bch_sb_handle *disk_sb, struct printbuf *out, /* device open: */ +static unsigned long le_ulong_to_cpu(unsigned long v) +{ + return sizeof(unsigned long) == 8 + ? le64_to_cpu(v) + : le32_to_cpu(v); +} + +static void le_bitvector_to_cpu(unsigned long *dst, unsigned long *src, unsigned nr) +{ + BUG_ON(nr & (BITS_PER_TYPE(long) - 1)); + + for (unsigned i = 0; i < BITS_TO_LONGS(nr); i++) + dst[i] = le_ulong_to_cpu(src[i]); +} + static void bch2_sb_update(struct bch_fs *c) { struct bch_sb *src = c->disk_sb.sb; @@ -512,8 +538,15 @@ static void bch2_sb_update(struct bch_fs *c) c->sb.features = le64_to_cpu(src->features[0]); c->sb.compat = le64_to_cpu(src->compat[0]); + memset(c->sb.errors_silent, 0, sizeof(c->sb.errors_silent)); + + struct bch_sb_field_ext *ext = bch2_sb_field_get(src, ext); + if (ext) + le_bitvector_to_cpu(c->sb.errors_silent, (void *) ext->errors_silent, + sizeof(c->sb.errors_silent) * 8); + for_each_member_device(ca, c, i) { - struct bch_member m = bch2_sb_member_get(src, i); + struct bch_member m = bch2_sb_member_get(src, ca->dev_idx); ca->mi = bch2_mi_to_cpu(&m); } } @@ -1054,6 +1087,46 @@ void bch2_sb_upgrade(struct bch_fs *c, unsigned new_version) c->disk_sb.sb->features[0] |= cpu_to_le64(BCH_SB_FEATURES_ALL); } +static int bch2_sb_ext_validate(struct bch_sb *sb, struct bch_sb_field *f, + struct printbuf *err) +{ + if (vstruct_bytes(f) < 88) { + prt_printf(err, "field too small (%zu < %u)", vstruct_bytes(f), 88); + return -BCH_ERR_invalid_sb_ext; + } + + return 0; +} + +static void bch2_sb_ext_to_text(struct printbuf *out, struct bch_sb *sb, + struct bch_sb_field *f) +{ + struct bch_sb_field_ext *e = field_to_type(f, ext); + + prt_printf(out, "Recovery passes required:"); + prt_tab(out); + prt_bitflags(out, bch2_recovery_passes, + bch2_recovery_passes_from_stable(le64_to_cpu(e->recovery_passes_required[0]))); + prt_newline(out); + + unsigned long *errors_silent = kmalloc(sizeof(e->errors_silent), GFP_KERNEL); + if (errors_silent) { + le_bitvector_to_cpu(errors_silent, (void *) e->errors_silent, sizeof(e->errors_silent) * 8); + + prt_printf(out, "Errors to silently fix:"); + prt_tab(out); + prt_bitflags_vector(out, bch2_sb_error_strs, errors_silent, sizeof(e->errors_silent) * 8); + prt_newline(out); + + kfree(errors_silent); + } +} + +static const struct bch_sb_field_ops bch_sb_field_ops_ext = { + .validate = bch2_sb_ext_validate, + .to_text = bch2_sb_ext_to_text, +}; + static const struct bch_sb_field_ops *bch2_sb_field_ops[] = { #define x(f, nr) \ [BCH_SB_FIELD_##f] = &bch_sb_field_ops_##f, diff --git a/fs/bcachefs/super-io.h b/fs/bcachefs/super-io.h index f5abd102bff7..589509ebe996 100644 --- a/fs/bcachefs/super-io.h +++ b/fs/bcachefs/super-io.h @@ -40,6 +40,16 @@ struct bch_sb_field *bch2_sb_field_resize_id(struct bch_sb_handle *, #define bch2_sb_field_resize(_sb, _name, _u64s) \ field_to_type(bch2_sb_field_resize_id(_sb, BCH_SB_FIELD_##_name, _u64s), _name) +struct bch_sb_field *bch2_sb_field_get_minsize_id(struct bch_sb_handle *, + enum bch_sb_field_type, unsigned); +#define bch2_sb_field_get_minsize(_sb, _name, _u64s) \ + field_to_type(bch2_sb_field_get_minsize_id(_sb, BCH_SB_FIELD_##_name, _u64s), _name) + +#define bch2_sb_field_nr_entries(_f) \ + (_f ? ((bch2_sb_field_bytes(&_f->field) - sizeof(*_f)) / \ + sizeof(_f->entries[0])) \ + : 0) + void bch2_sb_field_delete(struct bch_sb_handle *, enum bch_sb_field_type); extern const char * const bch2_sb_fields[]; From 84f1638795da1ff2084597de4251e9054f1ad728 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Fri, 29 Dec 2023 15:25:07 -0500 Subject: [PATCH 12/13] bcachefs: bch_sb_field_downgrade Add a new superblock section that contains a list of { minor version, recovery passes, errors_to_fix } that is - a list of recovery passes that must be run when downgrading past a given version, and a list of errors to silently fix. The upcoming disk accounting rewrite is not going to be fully compatible: we're going to have to regenerate accounting both when upgrading to the new version, and also from downgrading from the new version, since the new method of doing disk space accounting is a completely different architecture based on deltas, and synchronizing them for every jounal entry write to maintain compatibility is going to be too expensive and impractical. Signed-off-by: Kent Overstreet --- fs/bcachefs/Makefile | 1 + fs/bcachefs/bcachefs_format.h | 20 +++- fs/bcachefs/errcode.h | 2 + fs/bcachefs/recovery.c | 24 ++++- fs/bcachefs/sb-clean.c | 2 - fs/bcachefs/sb-downgrade.c | 188 ++++++++++++++++++++++++++++++++++ fs/bcachefs/sb-downgrade.h | 10 ++ fs/bcachefs/sb-errors.c | 4 +- fs/bcachefs/super-io.c | 11 +- fs/bcachefs/super-io.h | 2 +- 10 files changed, 255 insertions(+), 9 deletions(-) create mode 100644 fs/bcachefs/sb-downgrade.c create mode 100644 fs/bcachefs/sb-downgrade.h diff --git a/fs/bcachefs/Makefile b/fs/bcachefs/Makefile index ed550a40463e..b81268418174 100644 --- a/fs/bcachefs/Makefile +++ b/fs/bcachefs/Makefile @@ -71,6 +71,7 @@ bcachefs-y := \ reflink.o \ replicas.o \ sb-clean.o \ + sb-downgrade.o \ sb-errors.o \ sb-members.o \ siphash.o \ diff --git a/fs/bcachefs/bcachefs_format.h b/fs/bcachefs/bcachefs_format.h index bd5af516994a..fe78e87603fc 100644 --- a/fs/bcachefs/bcachefs_format.h +++ b/fs/bcachefs/bcachefs_format.h @@ -1220,7 +1220,8 @@ struct bch_sb_field { x(counters, 10) \ x(members_v2, 11) \ x(errors, 12) \ - x(ext, 13) + x(ext, 13) \ + x(downgrade, 14) enum bch_sb_field_type { #define x(f, nr) BCH_SB_FIELD_##f = nr, @@ -1638,6 +1639,18 @@ struct bch_sb_field_ext { __le64 errors_silent[8]; }; +struct bch_sb_field_downgrade_entry { + __le16 version; + __le64 recovery_passes[2]; + __le16 nr_errors; + __le16 errors[] __counted_by(nr_errors); +} __packed __aligned(2); + +struct bch_sb_field_downgrade { + struct bch_sb_field field; + struct bch_sb_field_downgrade_entry entries[]; +}; + /* Superblock: */ /* @@ -1651,6 +1664,11 @@ struct bch_sb_field_ext { #define RECOVERY_PASS_ALL_FSCK (1ULL << 63) +/* + * field 1: version name + * field 2: BCH_VERSION(major, minor) + * field 3: recovery passess required on upgrade + */ #define BCH_METADATA_VERSIONS() \ x(bkey_renumber, BCH_VERSION(0, 10), \ RECOVERY_PASS_ALL_FSCK) \ diff --git a/fs/bcachefs/errcode.h b/fs/bcachefs/errcode.h index 79327b5cec50..9ce29681eec9 100644 --- a/fs/bcachefs/errcode.h +++ b/fs/bcachefs/errcode.h @@ -95,6 +95,7 @@ x(ENOSPC, ENOSPC_sb_members) \ x(ENOSPC, ENOSPC_sb_members_v2) \ x(ENOSPC, ENOSPC_sb_crypt) \ + x(ENOSPC, ENOSPC_sb_downgrade) \ x(ENOSPC, ENOSPC_btree_slot) \ x(ENOSPC, ENOSPC_snapshot_tree) \ x(ENOENT, ENOENT_bkey_type_mismatch) \ @@ -219,6 +220,7 @@ x(BCH_ERR_invalid_sb, invalid_sb_errors) \ x(BCH_ERR_invalid_sb, invalid_sb_opt_compression) \ x(BCH_ERR_invalid_sb, invalid_sb_ext) \ + x(BCH_ERR_invalid_sb, invalid_sb_downgrade) \ x(BCH_ERR_invalid, invalid_bkey) \ x(BCH_ERR_operation_blocked, nocow_lock_blocked) \ x(EIO, btree_node_read_err) \ diff --git a/fs/bcachefs/recovery.c b/fs/bcachefs/recovery.c index b9c84e8cf3fd..5cf7d0532002 100644 --- a/fs/bcachefs/recovery.c +++ b/fs/bcachefs/recovery.c @@ -27,6 +27,7 @@ #include "recovery.h" #include "replicas.h" #include "sb-clean.h" +#include "sb-downgrade.h" #include "snapshot.h" #include "subvolume.h" #include "super-io.h" @@ -744,6 +745,27 @@ int bch2_fs_recovery(struct bch_fs *c) printbuf_exit(&buf); } + if (bch2_check_version_downgrade(c)) { + struct printbuf buf = PRINTBUF; + + prt_str(&buf, "Version downgrade required:\n"); + + __le64 passes = ext->recovery_passes_required[0]; + bch2_sb_set_downgrade(c, + BCH_VERSION_MINOR(bcachefs_metadata_version_current), + BCH_VERSION_MINOR(c->sb.version)); + passes = ext->recovery_passes_required[0] & ~passes; + if (passes) { + prt_str(&buf, " running recovery passes: "); + prt_bitflags(&buf, bch2_recovery_passes, + bch2_recovery_passes_from_stable(le64_to_cpu(passes))); + } + + bch_info(c, "%s", buf.buf); + printbuf_exit(&buf); + write_sb = true; + } + if (check_version_upgrade(c)) write_sb = true; @@ -1022,7 +1044,7 @@ int bch2_fs_initialize(struct bch_fs *c) c->disk_sb.sb->compat[0] |= cpu_to_le64(1ULL << BCH_COMPAT_extents_above_btree_updates_done); c->disk_sb.sb->compat[0] |= cpu_to_le64(1ULL << BCH_COMPAT_bformat_overflow_done); - bch2_sb_maybe_downgrade(c); + bch2_check_version_downgrade(c); if (c->opts.version_upgrade != BCH_VERSION_UPGRADE_none) { bch2_sb_upgrade(c, bcachefs_metadata_version_current); diff --git a/fs/bcachefs/sb-clean.c b/fs/bcachefs/sb-clean.c index e151ada1c8bd..c76ad8ea5e4a 100644 --- a/fs/bcachefs/sb-clean.c +++ b/fs/bcachefs/sb-clean.c @@ -332,8 +332,6 @@ int bch2_fs_mark_dirty(struct bch_fs *c) mutex_lock(&c->sb_lock); SET_BCH_SB_CLEAN(c->disk_sb.sb, false); - - bch2_sb_maybe_downgrade(c); c->disk_sb.sb->features[0] |= cpu_to_le64(BCH_SB_FEATURES_ALWAYS); ret = bch2_write_super(c); diff --git a/fs/bcachefs/sb-downgrade.c b/fs/bcachefs/sb-downgrade.c new file mode 100644 index 000000000000..4919237bbe73 --- /dev/null +++ b/fs/bcachefs/sb-downgrade.c @@ -0,0 +1,188 @@ +// SPDX-License-Identifier: GPL-2.0 + +/* + * Superblock section that contains a list of recovery passes to run when + * downgrading past a given version + */ + +#include "bcachefs.h" +#include "darray.h" +#include "recovery.h" +#include "sb-downgrade.h" +#include "sb-errors.h" +#include "super-io.h" + +/* + * Downgrade table: + * When dowgrading past certain versions, we need to run certain recovery passes + * and fix certain errors: + * + * x(version, recovery_passes, errors...) + */ + +#define DOWNGRADE_TABLE() + +struct downgrade_entry { + u64 recovery_passes; + u16 version; + u16 nr_errors; + const u16 *errors; +}; + +#define x(ver, passes, ...) static const u16 ver_##errors[] = { __VA_ARGS__ }; +DOWNGRADE_TABLE() +#undef x + +static const struct downgrade_entry downgrade_table[] = { +#define x(ver, passes, ...) { \ + .recovery_passes = passes, \ + .version = bcachefs_metadata_version_##ver,\ + .nr_errors = ARRAY_SIZE(ver_##errors), \ + .errors = ver_##errors, \ +}, +DOWNGRADE_TABLE() +#undef x +}; + +static inline const struct bch_sb_field_downgrade_entry * +downgrade_entry_next_c(const struct bch_sb_field_downgrade_entry *e) +{ + return (void *) &e->errors[le16_to_cpu(e->nr_errors)]; +} + +#define for_each_downgrade_entry(_d, _i) \ + for (const struct bch_sb_field_downgrade_entry *_i = (_d)->entries; \ + (void *) _i < vstruct_end(&(_d)->field) && \ + (void *) &_i->errors[0] < vstruct_end(&(_d)->field); \ + _i = downgrade_entry_next_c(_i)) + +static int bch2_sb_downgrade_validate(struct bch_sb *sb, struct bch_sb_field *f, + struct printbuf *err) +{ + struct bch_sb_field_downgrade *e = field_to_type(f, downgrade); + + for_each_downgrade_entry(e, i) { + if (BCH_VERSION_MAJOR(le16_to_cpu(i->version)) != + BCH_VERSION_MAJOR(le16_to_cpu(sb->version))) { + prt_printf(err, "downgrade entry with mismatched major version (%u != %u)", + BCH_VERSION_MAJOR(le16_to_cpu(i->version)), + BCH_VERSION_MAJOR(le16_to_cpu(sb->version))); + return -BCH_ERR_invalid_sb_downgrade; + } + } + + return 0; +} + +static void bch2_sb_downgrade_to_text(struct printbuf *out, struct bch_sb *sb, + struct bch_sb_field *f) +{ + struct bch_sb_field_downgrade *e = field_to_type(f, downgrade); + + if (out->nr_tabstops <= 1) + printbuf_tabstop_push(out, 16); + + for_each_downgrade_entry(e, i) { + prt_str(out, "version:"); + prt_tab(out); + bch2_version_to_text(out, le16_to_cpu(i->version)); + prt_newline(out); + + prt_str(out, "recovery passes:"); + prt_tab(out); + prt_bitflags(out, bch2_recovery_passes, + bch2_recovery_passes_from_stable(le64_to_cpu(i->recovery_passes[0]))); + prt_newline(out); + + prt_str(out, "errors:"); + prt_tab(out); + bool first = true; + for (unsigned j = 0; j < le16_to_cpu(i->nr_errors); j++) { + if (!first) + prt_char(out, ','); + first = false; + unsigned e = le16_to_cpu(i->errors[j]); + prt_str(out, e < BCH_SB_ERR_MAX ? bch2_sb_error_strs[e] : "(unknown)"); + } + prt_newline(out); + } +} + +const struct bch_sb_field_ops bch_sb_field_ops_downgrade = { + .validate = bch2_sb_downgrade_validate, + .to_text = bch2_sb_downgrade_to_text, +}; + +int bch2_sb_downgrade_update(struct bch_fs *c) +{ + darray_char table = {}; + int ret = 0; + + for (const struct downgrade_entry *src = downgrade_table; + src < downgrade_table + ARRAY_SIZE(downgrade_table); + src++) { + if (BCH_VERSION_MAJOR(src->version) != BCH_VERSION_MAJOR(le16_to_cpu(c->disk_sb.sb->version))) + continue; + + struct bch_sb_field_downgrade_entry *dst; + unsigned bytes = sizeof(*dst) + sizeof(dst->errors[0]) * src->nr_errors; + + ret = darray_make_room(&table, bytes); + if (ret) + goto out; + + dst = (void *) &darray_top(table); + dst->version = cpu_to_le16(src->version); + dst->recovery_passes[0] = cpu_to_le64(src->recovery_passes); + dst->recovery_passes[1] = 0; + dst->nr_errors = cpu_to_le16(src->nr_errors); + for (unsigned i = 0; i < src->nr_errors; i++) + dst->errors[i] = cpu_to_le16(src->errors[i]); + + table.nr += bytes; + } + + struct bch_sb_field_downgrade *d = bch2_sb_field_get(c->disk_sb.sb, downgrade); + + unsigned sb_u64s = DIV_ROUND_UP(sizeof(*d) + table.nr, sizeof(u64)); + + if (d && le32_to_cpu(d->field.u64s) > sb_u64s) + goto out; + + d = bch2_sb_field_resize(&c->disk_sb, downgrade, sb_u64s); + if (!d) { + ret = -BCH_ERR_ENOSPC_sb_downgrade; + goto out; + } + + memcpy(d->entries, table.data, table.nr); + memset_u64s_tail(d->entries, 0, table.nr); +out: + darray_exit(&table); + return ret; +} + +void bch2_sb_set_downgrade(struct bch_fs *c, unsigned new_minor, unsigned old_minor) +{ + struct bch_sb_field_downgrade *d = bch2_sb_field_get(c->disk_sb.sb, downgrade); + if (!d) + return; + + struct bch_sb_field_ext *ext = bch2_sb_field_get(c->disk_sb.sb, ext); + + for_each_downgrade_entry(d, i) { + unsigned minor = BCH_VERSION_MINOR(le16_to_cpu(i->version)); + if (new_minor < minor && minor <= old_minor) { + ext->recovery_passes_required[0] |= i->recovery_passes[0]; + ext->recovery_passes_required[1] |= i->recovery_passes[1]; + + for (unsigned j = 0; j < le16_to_cpu(i->nr_errors); j++) { + unsigned e = le16_to_cpu(i->errors[j]); + if (e < BCH_SB_ERR_MAX) + __set_bit(e, c->sb.errors_silent); + if (e < sizeof(ext->errors_silent) * 8) + ext->errors_silent[e / 64] |= cpu_to_le64(BIT_ULL(e % 64)); + } + } + } +} diff --git a/fs/bcachefs/sb-downgrade.h b/fs/bcachefs/sb-downgrade.h new file mode 100644 index 000000000000..bc48fd2ca70e --- /dev/null +++ b/fs/bcachefs/sb-downgrade.h @@ -0,0 +1,10 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _BCACHEFS_SB_DOWNGRADE_H +#define _BCACHEFS_SB_DOWNGRADE_H + +extern const struct bch_sb_field_ops bch_sb_field_ops_downgrade; + +int bch2_sb_downgrade_update(struct bch_fs *); +void bch2_sb_set_downgrade(struct bch_fs *, unsigned, unsigned); + +#endif /* _BCACHEFS_SB_DOWNGRADE_H */ diff --git a/fs/bcachefs/sb-errors.c b/fs/bcachefs/sb-errors.c index caf7669db6a1..5f5bcae391fb 100644 --- a/fs/bcachefs/sb-errors.c +++ b/fs/bcachefs/sb-errors.c @@ -20,9 +20,7 @@ static void bch2_sb_error_id_to_text(struct printbuf *out, enum bch_sb_error_id static inline unsigned bch2_sb_field_errors_nr_entries(struct bch_sb_field_errors *e) { - return e - ? (bch2_sb_field_bytes(&e->field) - sizeof(*e)) / sizeof(e->entries[0]) - : 0; + return bch2_sb_field_nr_entries(e); } static inline unsigned bch2_sb_field_errors_u64s(unsigned nr) diff --git a/fs/bcachefs/super-io.c b/fs/bcachefs/super-io.c index e085d3b021e8..4c98d8cc2a79 100644 --- a/fs/bcachefs/super-io.c +++ b/fs/bcachefs/super-io.c @@ -13,6 +13,7 @@ #include "replicas.h" #include "quota.h" #include "sb-clean.h" +#include "sb-downgrade.h" #include "sb-errors.h" #include "sb-members.h" #include "super-io.h" @@ -939,6 +940,7 @@ int bch2_write_super(struct bch_fs *c) bch2_sb_members_from_cpu(c); bch2_sb_members_cpy_v2_v1(&c->disk_sb); bch2_sb_errors_from_cpu(c); + bch2_sb_downgrade_update(c); for_each_online_member(ca, c, i) bch2_sb_from_fs(c, ca); @@ -1062,8 +1064,10 @@ void __bch2_check_set_feature(struct bch_fs *c, unsigned feat) } /* Downgrade if superblock is at a higher version than currently supported: */ -void bch2_sb_maybe_downgrade(struct bch_fs *c) +bool bch2_check_version_downgrade(struct bch_fs *c) { + bool ret = bcachefs_metadata_version_current < c->sb.version; + lockdep_assert_held(&c->sb_lock); /* @@ -1077,12 +1081,17 @@ void bch2_sb_maybe_downgrade(struct bch_fs *c) if (c->sb.version_min > bcachefs_metadata_version_current) c->disk_sb.sb->version_min = cpu_to_le16(bcachefs_metadata_version_current); c->disk_sb.sb->compat[0] &= cpu_to_le64((1ULL << BCH_COMPAT_NR) - 1); + return ret; } void bch2_sb_upgrade(struct bch_fs *c, unsigned new_version) { lockdep_assert_held(&c->sb_lock); + if (BCH_VERSION_MAJOR(new_version) > + BCH_VERSION_MAJOR(le16_to_cpu(c->disk_sb.sb->version))) + bch2_sb_field_resize(&c->disk_sb, downgrade, 0); + c->disk_sb.sb->version = cpu_to_le16(new_version); c->disk_sb.sb->features[0] |= cpu_to_le64(BCH_SB_FEATURES_ALL); } diff --git a/fs/bcachefs/super-io.h b/fs/bcachefs/super-io.h index 589509ebe996..e41e5de531a0 100644 --- a/fs/bcachefs/super-io.h +++ b/fs/bcachefs/super-io.h @@ -93,7 +93,7 @@ static inline void bch2_check_set_feature(struct bch_fs *c, unsigned feat) __bch2_check_set_feature(c, feat); } -void bch2_sb_maybe_downgrade(struct bch_fs *); +bool bch2_check_version_downgrade(struct bch_fs *); void bch2_sb_upgrade(struct bch_fs *, unsigned); void bch2_sb_field_to_text(struct printbuf *, struct bch_sb *, From 0d72ab35a925d66b044cb62b709e53141c3f0143 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Fri, 29 Dec 2023 21:16:32 -0500 Subject: [PATCH 13/13] bcachefs: make RO snapshots actually RO Add checks to all the VFS paths for "are we in a RO snapshot?". Note - we don't check this when setting inode options via our xattr interface, since those generally only affect data placement, not contents of data. Signed-off-by: Kent Overstreet Reported-by: "Carl E. Thompson" --- fs/bcachefs/acl.c | 3 ++- fs/bcachefs/fs-ioctl.c | 12 +++++------- fs/bcachefs/fs.c | 38 +++++++++++++++++++++++++++++++++----- fs/bcachefs/subvolume.c | 18 ++++++++++++++++++ fs/bcachefs/subvolume.h | 3 +++ fs/bcachefs/xattr.c | 3 ++- 6 files changed, 63 insertions(+), 14 deletions(-) diff --git a/fs/bcachefs/acl.c b/fs/bcachefs/acl.c index f3809897f00a..3640f417cce1 100644 --- a/fs/bcachefs/acl.c +++ b/fs/bcachefs/acl.c @@ -366,7 +366,8 @@ int bch2_set_acl(struct mnt_idmap *idmap, bch2_trans_begin(trans); acl = _acl; - ret = bch2_inode_peek(trans, &inode_iter, &inode_u, inode_inum(inode), + ret = bch2_subvol_is_ro_trans(trans, inode->ei_subvol) ?: + bch2_inode_peek(trans, &inode_iter, &inode_u, inode_inum(inode), BTREE_ITER_INTENT); if (ret) goto btree_err; diff --git a/fs/bcachefs/fs-ioctl.c b/fs/bcachefs/fs-ioctl.c index a70b7a03057d..14d5cc6f90d7 100644 --- a/fs/bcachefs/fs-ioctl.c +++ b/fs/bcachefs/fs-ioctl.c @@ -100,7 +100,8 @@ static int bch2_ioc_setflags(struct bch_fs *c, } mutex_lock(&inode->ei_update_lock); - ret = bch2_write_inode(c, inode, bch2_inode_flags_set, &s, + ret = bch2_subvol_is_ro(c, inode->ei_subvol) ?: + bch2_write_inode(c, inode, bch2_inode_flags_set, &s, ATTR_CTIME); mutex_unlock(&inode->ei_update_lock); @@ -183,13 +184,10 @@ static int bch2_ioc_fssetxattr(struct bch_fs *c, } mutex_lock(&inode->ei_update_lock); - ret = bch2_set_projid(c, inode, fa.fsx_projid); - if (ret) - goto err_unlock; - - ret = bch2_write_inode(c, inode, fssetxattr_inode_update_fn, &s, + ret = bch2_subvol_is_ro(c, inode->ei_subvol) ?: + bch2_set_projid(c, inode, fa.fsx_projid) ?: + bch2_write_inode(c, inode, fssetxattr_inode_update_fn, &s, ATTR_CTIME); -err_unlock: mutex_unlock(&inode->ei_update_lock); err: inode_unlock(&inode->v); diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c index ba93e32d7708..49da8db1d9e9 100644 --- a/fs/bcachefs/fs.c +++ b/fs/bcachefs/fs.c @@ -258,7 +258,8 @@ __bch2_create(struct mnt_idmap *idmap, retry: bch2_trans_begin(trans); - ret = bch2_create_trans(trans, + ret = bch2_subvol_is_ro_trans(trans, dir->ei_subvol) ?: + bch2_create_trans(trans, inode_inum(dir), &dir_u, &inode_u, !(flags & BCH_CREATE_TMPFILE) ? &dentry->d_name : NULL, @@ -430,7 +431,9 @@ static int bch2_link(struct dentry *old_dentry, struct inode *vdir, lockdep_assert_held(&inode->v.i_rwsem); - ret = __bch2_link(c, inode, dir, dentry); + ret = bch2_subvol_is_ro(c, dir->ei_subvol) ?: + bch2_subvol_is_ro(c, inode->ei_subvol) ?: + __bch2_link(c, inode, dir, dentry); if (unlikely(ret)) return ret; @@ -481,7 +484,11 @@ int __bch2_unlink(struct inode *vdir, struct dentry *dentry, static int bch2_unlink(struct inode *vdir, struct dentry *dentry) { - return __bch2_unlink(vdir, dentry, false); + struct bch_inode_info *dir= to_bch_ei(vdir); + struct bch_fs *c = dir->v.i_sb->s_fs_info; + + return bch2_subvol_is_ro(c, dir->ei_subvol) ?: + __bch2_unlink(vdir, dentry, false); } static int bch2_symlink(struct mnt_idmap *idmap, @@ -562,6 +569,11 @@ static int bch2_rename2(struct mnt_idmap *idmap, src_inode, dst_inode); + ret = bch2_subvol_is_ro_trans(trans, src_dir->ei_subvol) ?: + bch2_subvol_is_ro_trans(trans, dst_dir->ei_subvol); + if (ret) + goto err; + if (inode_attr_changing(dst_dir, src_inode, Inode_opt_project)) { ret = bch2_fs_quota_transfer(c, src_inode, dst_dir->ei_qid, @@ -783,11 +795,13 @@ static int bch2_setattr(struct mnt_idmap *idmap, struct dentry *dentry, struct iattr *iattr) { struct bch_inode_info *inode = to_bch_ei(dentry->d_inode); + struct bch_fs *c = inode->v.i_sb->s_fs_info; int ret; lockdep_assert_held(&inode->v.i_rwsem); - ret = setattr_prepare(idmap, dentry, iattr); + ret = bch2_subvol_is_ro(c, inode->ei_subvol) ?: + setattr_prepare(idmap, dentry, iattr); if (ret) return ret; @@ -1010,12 +1024,26 @@ static int bch2_vfs_readdir(struct file *file, struct dir_context *ctx) return bch2_err_class(ret); } +static int bch2_open(struct inode *vinode, struct file *file) +{ + if (file->f_flags & (O_WRONLY|O_RDWR)) { + struct bch_inode_info *inode = to_bch_ei(vinode); + struct bch_fs *c = inode->v.i_sb->s_fs_info; + + int ret = bch2_subvol_is_ro(c, inode->ei_subvol); + if (ret) + return ret; + } + + return generic_file_open(vinode, file); +} + static const struct file_operations bch_file_operations = { + .open = bch2_open, .llseek = bch2_llseek, .read_iter = bch2_read_iter, .write_iter = bch2_write_iter, .mmap = bch2_mmap, - .open = generic_file_open, .fsync = bch2_fsync, .splice_read = filemap_splice_read, .splice_write = iter_file_splice_write, diff --git a/fs/bcachefs/subvolume.c b/fs/bcachefs/subvolume.c index fccd25aa3242..22b34a8e4d6e 100644 --- a/fs/bcachefs/subvolume.c +++ b/fs/bcachefs/subvolume.c @@ -146,6 +146,24 @@ int bch2_subvolume_get(struct btree_trans *trans, unsigned subvol, return bch2_subvolume_get_inlined(trans, subvol, inconsistent_if_not_found, iter_flags, s); } +int bch2_subvol_is_ro_trans(struct btree_trans *trans, u32 subvol) +{ + struct bch_subvolume s; + int ret = bch2_subvolume_get_inlined(trans, subvol, true, 0, &s); + if (ret) + return ret; + + if (BCH_SUBVOLUME_RO(&s)) + return -EROFS; + return 0; +} + +int bch2_subvol_is_ro(struct bch_fs *c, u32 subvol) +{ + return bch2_trans_do(c, NULL, NULL, 0, + bch2_subvol_is_ro_trans(trans, subvol)); +} + int bch2_snapshot_get_subvol(struct btree_trans *trans, u32 snapshot, struct bch_subvolume *subvol) { diff --git a/fs/bcachefs/subvolume.h b/fs/bcachefs/subvolume.h index a1003d30ab0a..a6f56f66e27c 100644 --- a/fs/bcachefs/subvolume.h +++ b/fs/bcachefs/subvolume.h @@ -23,6 +23,9 @@ int bch2_subvolume_get(struct btree_trans *, unsigned, bool, int, struct bch_subvolume *); int bch2_subvolume_get_snapshot(struct btree_trans *, u32, u32 *); +int bch2_subvol_is_ro_trans(struct btree_trans *, u32); +int bch2_subvol_is_ro(struct bch_fs *, u32); + int bch2_delete_dead_snapshots(struct bch_fs *); void bch2_delete_dead_snapshots_async(struct bch_fs *); diff --git a/fs/bcachefs/xattr.c b/fs/bcachefs/xattr.c index 79d982674c18..5a1858fb9879 100644 --- a/fs/bcachefs/xattr.c +++ b/fs/bcachefs/xattr.c @@ -176,7 +176,8 @@ int bch2_xattr_set(struct btree_trans *trans, subvol_inum inum, struct btree_iter inode_iter = { NULL }; int ret; - ret = bch2_inode_peek(trans, &inode_iter, inode_u, inum, BTREE_ITER_INTENT); + ret = bch2_subvol_is_ro_trans(trans, inum.subvol) ?: + bch2_inode_peek(trans, &inode_iter, inode_u, inum, BTREE_ITER_INTENT); if (ret) return ret;