bcachefs: Ensure we rewind to run recovery passes

Fix a 6.16 regression from the recovery pass rework, which introduced a
bug where calling bch2_run_explicit_recovery_pass() would only return
the error code to rewind recovery for the first call that scheduled that
recovery pass.

If the error code from the first call was swallowed (because it was
called by an asynchronous codepath), subsequent calls would go "ok, this
pass is already marked as needing to run" and return 0.

Fixing this ensures that check_topology bails out to run btree_node_scan
before doing any repair.

Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
This commit is contained in:
Kent Overstreet 2025-06-25 00:48:14 -04:00
parent 3e72acb78b
commit 64b6a788bd

View File

@ -313,6 +313,9 @@ static bool recovery_pass_needs_set(struct bch_fs *c,
*/
bool in_recovery = test_bit(BCH_FS_in_recovery, &c->flags);
bool persistent = !in_recovery || !(*flags & RUN_RECOVERY_PASS_nopersistent);
bool rewind = in_recovery &&
r->curr_pass > pass &&
!(r->passes_complete & BIT_ULL(pass));
if (persistent
? !(c->sb.recovery_passes_required & BIT_ULL(pass))
@ -323,6 +326,9 @@ static bool recovery_pass_needs_set(struct bch_fs *c,
(r->passes_ratelimiting & BIT_ULL(pass)))
return true;
if (rewind)
return true;
return false;
}
@ -337,7 +343,6 @@ int __bch2_run_explicit_recovery_pass(struct bch_fs *c,
struct bch_fs_recovery *r = &c->recovery;
int ret = 0;
lockdep_assert_held(&c->sb_lock);
bch2_printbuf_make_room(out, 1024);
@ -408,10 +413,8 @@ int bch2_run_explicit_recovery_pass(struct bch_fs *c,
{
int ret = 0;
scoped_guard(mutex, &c->sb_lock) {
if (!recovery_pass_needs_set(c, pass, &flags))
return 0;
if (recovery_pass_needs_set(c, pass, &flags)) {
guard(mutex)(&c->sb_lock);
ret = __bch2_run_explicit_recovery_pass(c, out, pass, flags);
bch2_write_super(c);
}