diff --git a/fs/bcachefs/bcachefs.h b/fs/bcachefs/bcachefs.h index 799569d1778a..d5fc5eed73ae 100644 --- a/fs/bcachefs/bcachefs.h +++ b/fs/bcachefs/bcachefs.h @@ -539,11 +539,13 @@ struct journal_keys { struct journal_key { enum btree_id btree_id:8; unsigned level:8; + bool allocated; struct bkey_i *k; u32 journal_seq; u32 journal_offset; } *d; size_t nr; + size_t size; u64 journal_seq_base; }; @@ -840,6 +842,7 @@ mempool_t bio_bounce_pages; struct journal journal; struct list_head journal_entries; struct journal_keys journal_keys; + struct list_head journal_iters; u64 last_bucket_seq_cleanup; diff --git a/fs/bcachefs/btree_gc.c b/fs/bcachefs/btree_gc.c index 6023af960101..8f347ba5b4e6 100644 --- a/fs/bcachefs/btree_gc.c +++ b/fs/bcachefs/btree_gc.c @@ -274,7 +274,6 @@ static int bch2_gc_btree(struct bch_fs *c, enum btree_id btree_id, } static int bch2_gc_btree_init_recurse(struct bch_fs *c, struct btree *b, - struct journal_keys *journal_keys, unsigned target_depth) { struct btree_and_journal_iter iter; @@ -283,7 +282,7 @@ static int bch2_gc_btree_init_recurse(struct bch_fs *c, struct btree *b, u8 max_stale = 0; int ret = 0; - bch2_btree_and_journal_iter_init_node_iter(&iter, journal_keys, b); + bch2_btree_and_journal_iter_init_node_iter(&iter, c, b); bch2_bkey_buf_init(&prev); bch2_bkey_buf_init(&cur); bkey_init(&prev.k->k); @@ -320,7 +319,7 @@ static int bch2_gc_btree_init_recurse(struct bch_fs *c, struct btree *b, break; ret = bch2_gc_btree_init_recurse(c, child, - journal_keys, target_depth); + target_depth); six_unlock_read(&child->c.lock); if (ret) @@ -333,11 +332,11 @@ static int bch2_gc_btree_init_recurse(struct bch_fs *c, struct btree *b, bch2_bkey_buf_exit(&cur, c); bch2_bkey_buf_exit(&prev, c); + bch2_btree_and_journal_iter_exit(&iter); return ret; } static int bch2_gc_btree_init(struct bch_fs *c, - struct journal_keys *journal_keys, enum btree_id btree_id) { struct btree *b; @@ -368,8 +367,7 @@ static int bch2_gc_btree_init(struct bch_fs *c, } if (b->c.level >= target_depth) - ret = bch2_gc_btree_init_recurse(c, b, - journal_keys, target_depth); + ret = bch2_gc_btree_init_recurse(c, b, target_depth); if (!ret) ret = bch2_gc_mark_key(c, bkey_i_to_s_c(&b->key), @@ -386,8 +384,7 @@ static inline int btree_id_gc_phase_cmp(enum btree_id l, enum btree_id r) (int) btree_id_to_gc_phase(r); } -static int bch2_gc_btrees(struct bch_fs *c, struct journal_keys *journal_keys, - bool initial) +static int bch2_gc_btrees(struct bch_fs *c, bool initial) { enum btree_id ids[BTREE_ID_NR]; unsigned i; @@ -399,8 +396,7 @@ static int bch2_gc_btrees(struct bch_fs *c, struct journal_keys *journal_keys, for (i = 0; i < BTREE_ID_NR; i++) { enum btree_id id = ids[i]; int ret = initial - ? bch2_gc_btree_init(c, journal_keys, - id) + ? bch2_gc_btree_init(c, id) : bch2_gc_btree(c, id, initial); if (ret) return ret; @@ -788,8 +784,7 @@ static int bch2_gc_start(struct bch_fs *c) * move around - if references move backwards in the ordering GC * uses, GC could skip past them */ -int bch2_gc(struct bch_fs *c, struct journal_keys *journal_keys, - bool initial) +int bch2_gc(struct bch_fs *c, bool initial) { struct bch_dev *ca; u64 start_time = local_clock(); @@ -811,7 +806,7 @@ int bch2_gc(struct bch_fs *c, struct journal_keys *journal_keys, bch2_mark_superblocks(c); - ret = bch2_gc_btrees(c, journal_keys, initial); + ret = bch2_gc_btrees(c, initial); if (ret) goto out; @@ -1384,7 +1379,7 @@ static int bch2_gc_thread(void *arg) * Full gc is currently incompatible with btree key cache: */ #if 0 - ret = bch2_gc(c, NULL, false, false); + ret = bch2_gc(c, false, false); #else ret = bch2_gc_gens(c); #endif diff --git a/fs/bcachefs/btree_gc.h b/fs/bcachefs/btree_gc.h index f0435a58793b..fa604efc70cc 100644 --- a/fs/bcachefs/btree_gc.h +++ b/fs/bcachefs/btree_gc.h @@ -6,8 +6,7 @@ void bch2_coalesce(struct bch_fs *); -struct journal_keys; -int bch2_gc(struct bch_fs *, struct journal_keys *, bool); +int bch2_gc(struct bch_fs *, bool); int bch2_gc_gens(struct bch_fs *); void bch2_gc_thread_stop(struct bch_fs *); int bch2_gc_thread_start(struct bch_fs *); diff --git a/fs/bcachefs/recovery.c b/fs/bcachefs/recovery.c index 422f2fbe6dfb..88a1d47e6e4b 100644 --- a/fs/bcachefs/recovery.c +++ b/fs/bcachefs/recovery.c @@ -40,78 +40,169 @@ static void drop_alloc_keys(struct journal_keys *keys) /* iterate over keys read from the journal: */ -static struct journal_key *journal_key_search(struct journal_keys *journal_keys, - enum btree_id id, unsigned level, - struct bpos pos) +static int __journal_key_cmp(enum btree_id l_btree_id, + unsigned l_level, + struct bpos l_pos, + struct journal_key *r) +{ + return (cmp_int(l_btree_id, r->btree_id) ?: + cmp_int(l_level, r->level) ?: + bkey_cmp(l_pos, r->k->k.p)); +} + +static int journal_key_cmp(struct journal_key *l, struct journal_key *r) +{ + return (cmp_int(l->btree_id, r->btree_id) ?: + cmp_int(l->level, r->level) ?: + bkey_cmp(l->k->k.p, r->k->k.p)); +} + +static size_t journal_key_search(struct journal_keys *journal_keys, + enum btree_id id, unsigned level, + struct bpos pos) { size_t l = 0, r = journal_keys->nr, m; while (l < r) { m = l + ((r - l) >> 1); - if ((cmp_int(id, journal_keys->d[m].btree_id) ?: - cmp_int(level, journal_keys->d[m].level) ?: - bkey_cmp(pos, journal_keys->d[m].k->k.p)) > 0) + if (__journal_key_cmp(id, level, pos, &journal_keys->d[m]) > 0) l = m + 1; else r = m; } BUG_ON(l < journal_keys->nr && - (cmp_int(id, journal_keys->d[l].btree_id) ?: - cmp_int(level, journal_keys->d[l].level) ?: - bkey_cmp(pos, journal_keys->d[l].k->k.p)) > 0); + __journal_key_cmp(id, level, pos, &journal_keys->d[l]) > 0); BUG_ON(l && - (cmp_int(id, journal_keys->d[l - 1].btree_id) ?: - cmp_int(level, journal_keys->d[l - 1].level) ?: - bkey_cmp(pos, journal_keys->d[l - 1].k->k.p)) <= 0); + __journal_key_cmp(id, level, pos, &journal_keys->d[l - 1]) <= 0); - return l < journal_keys->nr ? journal_keys->d + l : NULL; + return l; +} + +static void journal_iter_fix(struct bch_fs *c, struct journal_iter *iter, unsigned idx) +{ + struct bkey_i *n = iter->keys->d[idx].k; + struct btree_and_journal_iter *biter = + container_of(iter, struct btree_and_journal_iter, journal); + + if (iter->idx > idx || + (iter->idx == idx && + biter->last && + bkey_cmp(n->k.p, biter->unpacked.p) <= 0)) + iter->idx++; +} + +int bch2_journal_key_insert(struct bch_fs *c, enum btree_id id, + unsigned level, struct bkey_i *k) +{ + struct journal_key n = { + .btree_id = id, + .level = level, + .k = k, + .allocated = true + }; + struct journal_keys *keys = &c->journal_keys; + struct journal_iter *iter; + unsigned idx = journal_key_search(keys, id, level, k->k.p); + + if (idx < keys->nr && + journal_key_cmp(&n, &keys->d[idx]) == 0) { + if (keys->d[idx].allocated) + kfree(keys->d[idx].k); + keys->d[idx] = n; + return 0; + } + + if (keys->nr == keys->size) { + struct journal_keys new_keys = { + .nr = keys->nr, + .size = keys->size * 2, + .journal_seq_base = keys->journal_seq_base, + }; + + new_keys.d = kvmalloc(sizeof(new_keys.d[0]) * new_keys.size, GFP_KERNEL); + if (!new_keys.d) + return -ENOMEM; + + memcpy(new_keys.d, keys->d, sizeof(keys->d[0]) * keys->nr); + kvfree(keys->d); + *keys = new_keys; + } + + array_insert_item(keys->d, keys->nr, idx, n); + + list_for_each_entry(iter, &c->journal_iters, list) + journal_iter_fix(c, iter, idx); + + return 0; +} + +int bch2_journal_key_delete(struct bch_fs *c, enum btree_id id, + unsigned level, struct bpos pos) +{ + struct bkey_i *whiteout = + kmalloc(sizeof(struct bkey), GFP_KERNEL); + int ret; + + if (!whiteout) + return -ENOMEM; + + bkey_init(&whiteout->k); + whiteout->k.p = pos; + + ret = bch2_journal_key_insert(c, id, level, whiteout); + if (ret) + kfree(whiteout); + return ret; } static struct bkey_i *bch2_journal_iter_peek(struct journal_iter *iter) { - if (iter->k && - iter->k < iter->keys->d + iter->keys->nr && - iter->k->btree_id == iter->btree_id && - iter->k->level == iter->level) - return iter->k->k; + struct journal_key *k = iter->idx - iter->keys->nr + ? iter->keys->d + iter->idx : NULL; - iter->k = NULL; + if (k && + k->btree_id == iter->btree_id && + k->level == iter->level) + return k->k; + + iter->idx = iter->keys->nr; return NULL; } static void bch2_journal_iter_advance(struct journal_iter *iter) { - if (iter->k) - iter->k++; + if (iter->idx < iter->keys->nr) + iter->idx++; } -static void bch2_journal_iter_init(struct journal_iter *iter, - struct journal_keys *journal_keys, +static void bch2_journal_iter_exit(struct journal_iter *iter) +{ + list_del(&iter->list); +} + +static void bch2_journal_iter_init(struct bch_fs *c, + struct journal_iter *iter, enum btree_id id, unsigned level, struct bpos pos) { iter->btree_id = id; iter->level = level; - iter->keys = journal_keys; - iter->k = journal_key_search(journal_keys, id, level, pos); + iter->keys = &c->journal_keys; + iter->idx = journal_key_search(&c->journal_keys, id, level, pos); + list_add(&iter->list, &c->journal_iters); } static struct bkey_s_c bch2_journal_iter_peek_btree(struct btree_and_journal_iter *iter) { - return iter->btree - ? bch2_btree_iter_peek(iter->btree) - : bch2_btree_node_iter_peek_unpack(&iter->node_iter, - iter->b, &iter->unpacked); + return bch2_btree_node_iter_peek_unpack(&iter->node_iter, + iter->b, &iter->unpacked); } static void bch2_journal_iter_advance_btree(struct btree_and_journal_iter *iter) { - if (iter->btree) - bch2_btree_iter_next(iter->btree); - else - bch2_btree_node_iter_advance(&iter->node_iter, iter->b); + bch2_btree_node_iter_advance(&iter->node_iter, iter->b); } void bch2_btree_and_journal_iter_advance(struct btree_and_journal_iter *iter) @@ -160,7 +251,7 @@ struct bkey_s_c bch2_btree_and_journal_iter_peek(struct btree_and_journal_iter * if (iter->b && bkey_cmp(ret.k->p, iter->b->data->max_key) > 0) { - iter->journal.k = NULL; + iter->journal.idx = iter->journal.keys->nr; iter->last = none; return bkey_s_c_null; } @@ -181,26 +272,20 @@ struct bkey_s_c bch2_btree_and_journal_iter_next(struct btree_and_journal_iter * return bch2_btree_and_journal_iter_peek(iter); } -void bch2_btree_and_journal_iter_init(struct btree_and_journal_iter *iter, - struct btree_trans *trans, - struct journal_keys *journal_keys, - enum btree_id id, struct bpos pos) +void bch2_btree_and_journal_iter_exit(struct btree_and_journal_iter *iter) { - memset(iter, 0, sizeof(*iter)); - - iter->btree = bch2_trans_get_iter(trans, id, pos, BTREE_ITER_PREFETCH); - bch2_journal_iter_init(&iter->journal, journal_keys, id, 0, pos); + bch2_journal_iter_exit(&iter->journal); } void bch2_btree_and_journal_iter_init_node_iter(struct btree_and_journal_iter *iter, - struct journal_keys *journal_keys, + struct bch_fs *c, struct btree *b) { memset(iter, 0, sizeof(*iter)); iter->b = b; bch2_btree_node_iter_init_from_start(&iter->node_iter, iter->b); - bch2_journal_iter_init(&iter->journal, journal_keys, + bch2_journal_iter_init(c, &iter->journal, b->c.btree_id, b->c.level, b->data->min_key); } @@ -244,7 +329,7 @@ static int bch2_btree_and_journal_walk_recurse(struct bch_fs *c, struct btree *b int ret = 0; bch2_bkey_buf_init(&tmp); - bch2_btree_and_journal_iter_init_node_iter(&iter, journal_keys, b); + bch2_btree_and_journal_iter_init_node_iter(&iter, c, b); while ((k = bch2_btree_and_journal_iter_peek(&iter)).k) { ret = key_fn(c, btree_id, b->c.level, k); @@ -277,6 +362,7 @@ static int bch2_btree_and_journal_walk_recurse(struct bch_fs *c, struct btree *b } } + bch2_btree_and_journal_iter_exit(&iter); bch2_bkey_buf_exit(&tmp, c); return ret; } @@ -333,6 +419,12 @@ static int journal_sort_key_cmp(const void *_l, const void *_r) void bch2_journal_keys_free(struct journal_keys *keys) { + struct journal_key *i; + + for (i = keys->d; i < keys->d + keys->nr; i++) + if (i->allocated) + kfree(i->k); + kvfree(keys->d); keys->d = NULL; keys->nr = 0; @@ -361,7 +453,9 @@ static struct journal_keys journal_keys_sort(struct list_head *journal_entries) nr_keys++; } - keys.d = kvmalloc(sizeof(keys.d[0]) * nr_keys, GFP_KERNEL); + keys.size = roundup_pow_of_two(nr_keys); + + keys.d = kvmalloc(sizeof(keys.d[0]) * keys.size, GFP_KERNEL); if (!keys.d) goto err; @@ -545,14 +639,16 @@ static int __bch2_journal_replay_key(struct btree_trans *trans, return ret; } -static int bch2_journal_replay_key(struct bch_fs *c, enum btree_id id, - unsigned level, struct bkey_i *k) +static int bch2_journal_replay_key(struct bch_fs *c, struct journal_key *k) { - return bch2_trans_do(c, NULL, NULL, - BTREE_INSERT_NOFAIL| - BTREE_INSERT_LAZY_RW| - BTREE_INSERT_JOURNAL_REPLAY, - __bch2_journal_replay_key(&trans, id, level, k)); + unsigned commit_flags = BTREE_INSERT_NOFAIL| + BTREE_INSERT_LAZY_RW; + + if (!k->allocated) + commit_flags |= BTREE_INSERT_JOURNAL_REPLAY; + + return bch2_trans_do(c, NULL, NULL, commit_flags, + __bch2_journal_replay_key(&trans, k->btree_id, k->level, k->k)); } static int __bch2_alloc_replay_key(struct btree_trans *trans, struct bkey_i *k) @@ -628,7 +724,7 @@ static int bch2_journal_replay(struct bch_fs *c, if (i->level) { j->replay_journal_seq = keys.journal_seq_base + i->journal_seq; - ret = bch2_journal_replay_key(c, i->btree_id, i->level, i->k); + ret = bch2_journal_replay_key(c, i); if (ret) goto err; } @@ -658,7 +754,7 @@ static int bch2_journal_replay(struct bch_fs *c, ret = i->k->k.size ? bch2_extent_replay_key(c, i->btree_id, i->k) - : bch2_journal_replay_key(c, i->btree_id, i->level, i->k); + : bch2_journal_replay_key(c, i); if (ret) goto err; } @@ -1105,7 +1201,7 @@ int bch2_fs_recovery(struct bch_fs *c) test_bit(BCH_FS_REBUILD_REPLICAS, &c->flags)) { bch_info(c, "starting mark and sweep"); err = "error in mark and sweep"; - ret = bch2_gc(c, &c->journal_keys, true); + ret = bch2_gc(c, true); if (ret) goto err; bch_verbose(c, "mark and sweep done"); diff --git a/fs/bcachefs/recovery.h b/fs/bcachefs/recovery.h index a66827c9addf..fa91851b9ed7 100644 --- a/fs/bcachefs/recovery.h +++ b/fs/bcachefs/recovery.h @@ -6,10 +6,11 @@ for (i = (keys).d; i < (keys).d + (keys).nr; (i)++) struct journal_iter { + struct list_head list; enum btree_id btree_id; unsigned level; + size_t idx; struct journal_keys *keys; - struct journal_key *k; }; /* @@ -17,8 +18,6 @@ struct journal_iter { */ struct btree_and_journal_iter { - struct btree_iter *btree; - struct btree *b; struct btree_node_iter node_iter; struct bkey unpacked; @@ -32,16 +31,18 @@ struct btree_and_journal_iter { } last; }; +int bch2_journal_key_insert(struct bch_fs *, enum btree_id, + unsigned, struct bkey_i *); +int bch2_journal_key_delete(struct bch_fs *, enum btree_id, + unsigned, struct bpos); + void bch2_btree_and_journal_iter_advance(struct btree_and_journal_iter *); struct bkey_s_c bch2_btree_and_journal_iter_peek(struct btree_and_journal_iter *); struct bkey_s_c bch2_btree_and_journal_iter_next(struct btree_and_journal_iter *); -void bch2_btree_and_journal_iter_init(struct btree_and_journal_iter *, - struct btree_trans *, - struct journal_keys *, - enum btree_id, struct bpos); +void bch2_btree_and_journal_iter_exit(struct btree_and_journal_iter *); void bch2_btree_and_journal_iter_init_node_iter(struct btree_and_journal_iter *, - struct journal_keys *, + struct bch_fs *, struct btree *); typedef int (*btree_walk_node_fn)(struct bch_fs *c, struct btree *b); diff --git a/fs/bcachefs/super.c b/fs/bcachefs/super.c index bdaea336be85..d451a29b517b 100644 --- a/fs/bcachefs/super.c +++ b/fs/bcachefs/super.c @@ -692,6 +692,7 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts) bch2_blacklist_entries_gc); INIT_LIST_HEAD(&c->journal_entries); + INIT_LIST_HEAD(&c->journal_iters); INIT_LIST_HEAD(&c->fsck_errors); mutex_init(&c->fsck_error_lock); diff --git a/fs/bcachefs/sysfs.c b/fs/bcachefs/sysfs.c index 57b1dbe04178..521b6d8d518f 100644 --- a/fs/bcachefs/sysfs.c +++ b/fs/bcachefs/sysfs.c @@ -475,7 +475,7 @@ STORE(bch2_fs) */ #if 0 down_read(&c->state_lock); - bch2_gc(c, NULL, false, false); + bch2_gc(c, false, false); up_read(&c->state_lock); #else bch2_gc_gens(c);