From bedd6fe4d357f3cffb392f2153b52ef71f810259 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Mon, 11 Dec 2023 18:40:17 -0500 Subject: [PATCH 1/8] bcachefs: Fix nocow locks deadlock On trylock failure we were waiting for outstanding reads to complete - but nocow locks need to be held until the whole move is finished. Signed-off-by: Kent Overstreet --- fs/bcachefs/data_update.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/bcachefs/data_update.c b/fs/bcachefs/data_update.c index 2418c528c533..b05457d284a6 100644 --- a/fs/bcachefs/data_update.c +++ b/fs/bcachefs/data_update.c @@ -560,7 +560,8 @@ int bch2_data_update_init(struct btree_trans *trans, move_ctxt_wait_event(ctxt, (locked = bch2_bucket_nocow_trylock(&c->nocow_locks, PTR_BUCKET_POS(c, &p.ptr), 0)) || - !atomic_read(&ctxt->read_sectors)); + (!atomic_read(&ctxt->read_sectors) && + !atomic_read(&ctxt->write_sectors))); if (!locked) bch2_bucket_nocow_lock(&c->nocow_locks, From 8bf771972b8468b6a841d088141ac2960e6927fd Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Wed, 13 Dec 2023 17:51:04 +0100 Subject: [PATCH 2/8] bcachefs: Fix determining required file handle length The ->encode_fh method is responsible for setting amount of space required for storing the file handle if not enough space was provided. bch2_encode_fh() was not setting required length in that case which breaks e.g. fanotify. Fix it. Reported-by: Petr Vorel Signed-off-by: Jan Kara Signed-off-by: Kent Overstreet --- fs/bcachefs/fs.c | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c index 371565e02ff2..ba93e32d7708 100644 --- a/fs/bcachefs/fs.c +++ b/fs/bcachefs/fs.c @@ -1143,24 +1143,33 @@ static int bch2_encode_fh(struct inode *vinode, u32 *fh, int *len, { struct bch_inode_info *inode = to_bch_ei(vinode); struct bch_inode_info *dir = to_bch_ei(vdir); - - if (*len < sizeof(struct bcachefs_fid_with_parent) / sizeof(u32)) - return FILEID_INVALID; + int min_len; if (!S_ISDIR(inode->v.i_mode) && dir) { struct bcachefs_fid_with_parent *fid = (void *) fh; + min_len = sizeof(*fid) / sizeof(u32); + if (*len < min_len) { + *len = min_len; + return FILEID_INVALID; + } + fid->fid = bch2_inode_to_fid(inode); fid->dir = bch2_inode_to_fid(dir); - *len = sizeof(*fid) / sizeof(u32); + *len = min_len; return FILEID_BCACHEFS_WITH_PARENT; } else { struct bcachefs_fid *fid = (void *) fh; + min_len = sizeof(*fid) / sizeof(u32); + if (*len < min_len) { + *len = min_len; + return FILEID_INVALID; + } *fid = bch2_inode_to_fid(inode); - *len = sizeof(*fid) / sizeof(u32); + *len = min_len; return FILEID_BCACHEFS_WITHOUT_PARENT; } } From 50a8a732d2db64507ba7cd4ebe66538d9c40bea8 Mon Sep 17 00:00:00 2001 From: Thomas Bertschinger Date: Thu, 14 Dec 2023 12:06:41 -0700 Subject: [PATCH 3/8] bcachefs: fix invalid memory access in bch2_fs_alloc() error path When bch2_fs_alloc() gets an error before calling bch2_fs_btree_iter_init(), bch2_fs_btree_iter_exit() makes an invalid memory access because btree_trans_list is uninitialized. Signed-off-by: Thomas Bertschinger Fixes: 6bd68ec266ad ("bcachefs: Heap allocate btree_trans") Signed-off-by: Kent Overstreet --- fs/bcachefs/btree_iter.c | 8 ++++++-- fs/bcachefs/btree_iter.h | 1 + fs/bcachefs/super.c | 1 + 3 files changed, 8 insertions(+), 2 deletions(-) diff --git a/fs/bcachefs/btree_iter.c b/fs/bcachefs/btree_iter.c index 8e0fe65f6101..6be79129738d 100644 --- a/fs/bcachefs/btree_iter.c +++ b/fs/bcachefs/btree_iter.c @@ -3214,10 +3214,9 @@ void bch2_fs_btree_iter_exit(struct bch_fs *c) mempool_exit(&c->btree_trans_pool); } -int bch2_fs_btree_iter_init(struct bch_fs *c) +void bch2_fs_btree_iter_init_early(struct bch_fs *c) { struct btree_transaction_stats *s; - int ret; for (s = c->btree_transaction_stats; s < c->btree_transaction_stats + ARRAY_SIZE(c->btree_transaction_stats); @@ -3228,6 +3227,11 @@ int bch2_fs_btree_iter_init(struct bch_fs *c) INIT_LIST_HEAD(&c->btree_trans_list); seqmutex_init(&c->btree_trans_lock); +} + +int bch2_fs_btree_iter_init(struct bch_fs *c) +{ + int ret; c->btree_trans_bufs = alloc_percpu(struct btree_trans_buf); if (!c->btree_trans_bufs) diff --git a/fs/bcachefs/btree_iter.h b/fs/bcachefs/btree_iter.h index 85e7cb52f6b6..eaffced4c132 100644 --- a/fs/bcachefs/btree_iter.h +++ b/fs/bcachefs/btree_iter.h @@ -938,6 +938,7 @@ unsigned bch2_trans_get_fn_idx(const char *); void bch2_btree_trans_to_text(struct printbuf *, struct btree_trans *); void bch2_fs_btree_iter_exit(struct bch_fs *); +void bch2_fs_btree_iter_init_early(struct bch_fs *); int bch2_fs_btree_iter_init(struct bch_fs *); #endif /* _BCACHEFS_BTREE_ITER_H */ diff --git a/fs/bcachefs/super.c b/fs/bcachefs/super.c index f63474c5c5a2..cfa91188dd4e 100644 --- a/fs/bcachefs/super.c +++ b/fs/bcachefs/super.c @@ -714,6 +714,7 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts) bch2_fs_copygc_init(c); bch2_fs_btree_key_cache_init_early(&c->btree_key_cache); + bch2_fs_btree_iter_init_early(c); bch2_fs_btree_interior_update_init_early(c); bch2_fs_allocator_background_init(c); bch2_fs_allocator_foreground_init(c); From 85c6db980989ddc119ea1647ad72a4ec5a4e06f2 Mon Sep 17 00:00:00 2001 From: Daniel Hill Date: Tue, 5 Dec 2023 19:10:28 +1300 Subject: [PATCH 4/8] bcachefs: improve modprobe support by providing softdeps We need to help modprobe load architecture specific modules so we don't fall back to generic software implementations, this should help performance when building as a module. Signed-off-by: Daniel Hill Signed-off-by: Kent Overstreet --- fs/bcachefs/super.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/fs/bcachefs/super.c b/fs/bcachefs/super.c index cfa91188dd4e..818ec467a06b 100644 --- a/fs/bcachefs/super.c +++ b/fs/bcachefs/super.c @@ -72,6 +72,12 @@ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Kent Overstreet "); MODULE_DESCRIPTION("bcachefs filesystem"); +MODULE_SOFTDEP("pre: crc32c"); +MODULE_SOFTDEP("pre: crc64"); +MODULE_SOFTDEP("pre: sha256"); +MODULE_SOFTDEP("pre: chacha20"); +MODULE_SOFTDEP("pre: poly1305"); +MODULE_SOFTDEP("pre: xxhash"); #define KTYPE(type) \ static const struct attribute_group type ## _group = { \ From e8c7692718bb001505602aa0eb48f142c389c27a Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sun, 17 Dec 2023 15:41:03 -0500 Subject: [PATCH 5/8] bcachefs: print explicit recovery pass message only once Signed-off-by: Kent Overstreet --- fs/bcachefs/recovery.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/fs/bcachefs/recovery.h b/fs/bcachefs/recovery.h index 852d30567da9..d266aae90200 100644 --- a/fs/bcachefs/recovery.h +++ b/fs/bcachefs/recovery.h @@ -10,6 +10,9 @@ extern const char * const bch2_recovery_passes[]; static inline int bch2_run_explicit_recovery_pass(struct bch_fs *c, enum bch_recovery_pass pass) { + if (c->recovery_passes_explicit & BIT_ULL(pass)) + return 0; + bch_info(c, "running explicit recovery pass %s (%u), currently at %s (%u)", bch2_recovery_passes[pass], pass, bch2_recovery_passes[c->curr_recovery_pass], c->curr_recovery_pass); From 0fa3b97767019be4556a8f081b742aaaabd2bd9e Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sun, 17 Dec 2023 23:20:59 -0500 Subject: [PATCH 6/8] bcachefs: btree_node_u64s_with_format() takes nr keys Signed-off-by: Kent Overstreet --- fs/bcachefs/btree_update_interior.c | 27 ++++++++++++++------------- fs/bcachefs/btree_update_interior.h | 4 ---- 2 files changed, 14 insertions(+), 17 deletions(-) diff --git a/fs/bcachefs/btree_update_interior.c b/fs/bcachefs/btree_update_interior.c index 26be38ab6ecb..19fa160238ea 100644 --- a/fs/bcachefs/btree_update_interior.c +++ b/fs/bcachefs/btree_update_interior.c @@ -99,7 +99,7 @@ static void btree_node_interior_verify(struct bch_fs *c, struct btree *b) /* Calculate ideal packed bkey format for new btree nodes: */ -void __bch2_btree_calc_format(struct bkey_format_state *s, struct btree *b) +static void __bch2_btree_calc_format(struct bkey_format_state *s, struct btree *b) { struct bkey_packed *k; struct bset_tree *t; @@ -125,21 +125,20 @@ static struct bkey_format bch2_btree_calc_format(struct btree *b) return bch2_bkey_format_done(&s); } -static size_t btree_node_u64s_with_format(struct btree *b, +static size_t btree_node_u64s_with_format(struct btree_nr_keys nr, + struct bkey_format *old_f, struct bkey_format *new_f) { - struct bkey_format *old_f = &b->format; - /* stupid integer promotion rules */ ssize_t delta = (((int) new_f->key_u64s - old_f->key_u64s) * - (int) b->nr.packed_keys) + + (int) nr.packed_keys) + (((int) new_f->key_u64s - BKEY_U64s) * - (int) b->nr.unpacked_keys); + (int) nr.unpacked_keys); - BUG_ON(delta + b->nr.live_u64s < 0); + BUG_ON(delta + nr.live_u64s < 0); - return b->nr.live_u64s + delta; + return nr.live_u64s + delta; } /** @@ -147,16 +146,18 @@ static size_t btree_node_u64s_with_format(struct btree *b, * * @c: filesystem handle * @b: btree node to rewrite + * @nr: number of keys for new node (i.e. b->nr) * @new_f: bkey format to translate keys to * * Returns: true if all re-packed keys will be able to fit in a new node. * * Assumes all keys will successfully pack with the new format. */ -bool bch2_btree_node_format_fits(struct bch_fs *c, struct btree *b, +static bool bch2_btree_node_format_fits(struct bch_fs *c, struct btree *b, + struct btree_nr_keys nr, struct bkey_format *new_f) { - size_t u64s = btree_node_u64s_with_format(b, new_f); + size_t u64s = btree_node_u64s_with_format(nr, &b->format, new_f); return __vstruct_bytes(struct btree_node, u64s) < btree_bytes(c); } @@ -391,7 +392,7 @@ static struct btree *bch2_btree_node_alloc_replacement(struct btree_update *as, * The keys might expand with the new format - if they wouldn't fit in * the btree node anymore, use the old format for now: */ - if (!bch2_btree_node_format_fits(as->c, b, &format)) + if (!bch2_btree_node_format_fits(as->c, b, b->nr, &format)) format = b->format; SET_BTREE_NODE_SEQ(n->data, BTREE_NODE_SEQ(b->data) + 1); @@ -1822,8 +1823,8 @@ int __bch2_foreground_maybe_merge(struct btree_trans *trans, bch2_bkey_format_add_pos(&new_s, next->data->max_key); new_f = bch2_bkey_format_done(&new_s); - sib_u64s = btree_node_u64s_with_format(b, &new_f) + - btree_node_u64s_with_format(m, &new_f); + sib_u64s = btree_node_u64s_with_format(b->nr, &b->format, &new_f) + + btree_node_u64s_with_format(m->nr, &m->format, &new_f); if (sib_u64s > BTREE_FOREGROUND_MERGE_HYSTERESIS(c)) { sib_u64s -= BTREE_FOREGROUND_MERGE_HYSTERESIS(c); diff --git a/fs/bcachefs/btree_update_interior.h b/fs/bcachefs/btree_update_interior.h index 031076e75fa1..a6668992a272 100644 --- a/fs/bcachefs/btree_update_interior.h +++ b/fs/bcachefs/btree_update_interior.h @@ -6,10 +6,6 @@ #include "btree_locking.h" #include "btree_update.h" -void __bch2_btree_calc_format(struct bkey_format_state *, struct btree *); -bool bch2_btree_node_format_fits(struct bch_fs *c, struct btree *, - struct bkey_format *); - #define BTREE_UPDATE_NODES_MAX ((BTREE_MAX_DEPTH - 2) * 2 + GC_MERGE_NODES) #define BTREE_UPDATE_JOURNAL_RES (BTREE_UPDATE_NODES_MAX * (BKEY_BTREE_PTR_U64s_MAX + 1)) From 7ba1f6ec97c7afec5787ab8e92a6a7e24f0459aa Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sun, 17 Dec 2023 23:31:26 -0500 Subject: [PATCH 7/8] bcachefs; guard against overflow in btree node split Signed-off-by: Kent Overstreet --- fs/bcachefs/btree_update_interior.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/fs/bcachefs/btree_update_interior.c b/fs/bcachefs/btree_update_interior.c index 19fa160238ea..239fcc3c7c99 100644 --- a/fs/bcachefs/btree_update_interior.c +++ b/fs/bcachefs/btree_update_interior.c @@ -1346,8 +1346,11 @@ static void __btree_split_node(struct btree_update *as, struct bkey_packed *out[2]; struct bkey uk; unsigned u64s, n1_u64s = (b->nr.live_u64s * 3) / 5; + struct { unsigned nr_keys, val_u64s; } nr_keys[2]; int i; + memset(&nr_keys, 0, sizeof(nr_keys)); + for (i = 0; i < 2; i++) { BUG_ON(n[i]->nsets != 1); @@ -1369,6 +1372,9 @@ static void __btree_split_node(struct btree_update *as, if (!i) n1_pos = uk.p; bch2_bkey_format_add_key(&format[i], &uk); + + nr_keys[i].nr_keys++; + nr_keys[i].val_u64s += bkeyp_val_u64s(&b->format, k); } btree_set_min(n[0], b->data->min_key); @@ -1381,6 +1387,12 @@ static void __btree_split_node(struct btree_update *as, bch2_bkey_format_add_pos(&format[i], n[i]->data->max_key); n[i]->data->format = bch2_bkey_format_done(&format[i]); + + unsigned u64s = nr_keys[i].nr_keys * n[i]->data->format.key_u64s + + nr_keys[i].val_u64s; + if (__vstruct_bytes(struct btree_node, u64s) > btree_bytes(as->c)) + n[i]->data->format = b->format; + btree_node_set_format(n[i], n[i]->data->format); } From 247ce5f1bb3ea90879e8552b8edf4885b9a9f849 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Tue, 19 Dec 2023 17:16:34 -0500 Subject: [PATCH 8/8] bcachefs: Fix bch2_alloc_sectors_start_trans() error handling When we fail to allocate because of insufficient open buckets, we don't want to retry from the full set of devices - we just want to retry in blocking mode. But if the retry in blocking mode fails with a different error code, we end up squashing the -BCH_ERR_open_buckets_empty error with an error that makes us thing we won't be able to allocate (insufficient_devices) - which is incorrect when we didn't try to allocate from the full set of devices, and causes the write to fail. Signed-off-by: Kent Overstreet --- fs/bcachefs/alloc_foreground.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/fs/bcachefs/alloc_foreground.c b/fs/bcachefs/alloc_foreground.c index 1ba0eeb7552a..0e6157982607 100644 --- a/fs/bcachefs/alloc_foreground.c +++ b/fs/bcachefs/alloc_foreground.c @@ -1374,8 +1374,17 @@ int bch2_alloc_sectors_start_trans(struct btree_trans *trans, goto alloc_done; /* Don't retry from all devices if we're out of open buckets: */ - if (bch2_err_matches(ret, BCH_ERR_open_buckets_empty)) - goto allocate_blocking; + if (bch2_err_matches(ret, BCH_ERR_open_buckets_empty)) { + int ret = open_bucket_add_buckets(trans, &ptrs, wp, devs_have, + target, erasure_code, + nr_replicas, &nr_effective, + &have_cache, watermark, + flags, cl); + if (!ret || + bch2_err_matches(ret, BCH_ERR_transaction_restart) || + bch2_err_matches(ret, BCH_ERR_open_buckets_empty)) + goto alloc_done; + } /* * Only try to allocate cache (durability = 0 devices) from the @@ -1389,7 +1398,6 @@ int bch2_alloc_sectors_start_trans(struct btree_trans *trans, &have_cache, watermark, flags, cl); } else { -allocate_blocking: ret = open_bucket_add_buckets(trans, &ptrs, wp, devs_have, target, erasure_code, nr_replicas, &nr_effective,