btrfs: revalidate cached tree blocks on the uptodate path

read_extent_buffer_pages_nowait() returns immediately when an extent
buffer is already marked uptodate. On that cache-hit path,
the caller supplied btrfs_tree_parent_check is not re-run.

This can let read_tree_root_path() accept a cached tree block whose
actual header level/owner does not match the expected value derived from
the parent.

E.g. a corrupted root item that points to a tree block which doesn't
even belong to that root, and has mismatching level/owner.

But that tree block is already read and cached, later the corrupted tree
root got read from disk and hit the cached tree block.

Fix this by re-validating cached extent buffers against the supplied
btrfs_tree_parent_check on the uptodate path, and make
read_tree_root_path() pass its check to btrfs_buffer_uptodate().

This makes cache hits and fresh reads follow the same tree-parent
verification rules, and turns the corruption into a read failure instead
of constructing an inconsistent root object.

Signed-off-by: ZhengYuan Huang <gality369@gmail.com>
Reviewed-by: Qu Wenruo <wqu@suse.com>
[ Resolve the conflict with extent_buffer_uptodate() helper, handle
  transid mismatch case ]
Signed-off-by: Qu Wenruo <wqu@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
This commit is contained in:
ZhengYuan Huang 2026-03-13 17:19:23 +08:00 committed by David Sterba
parent e526779648
commit f04c6475c2
6 changed files with 39 additions and 10 deletions

View File

@ -1499,7 +1499,7 @@ read_block_for_search(struct btrfs_root *root, struct btrfs_path *p,
reada_for_search(fs_info, p, parent_level, slot, key->objectid);
/* first we do an atomic uptodate check */
if (btrfs_buffer_uptodate(tmp, check.transid, true) > 0) {
if (btrfs_buffer_uptodate(tmp, check.transid, true, NULL) > 0) {
/*
* Do extra check for first_key, eb can be stale due to
* being cached, read from scrub, or have multiple

View File

@ -109,13 +109,21 @@ static void csum_tree_block(struct extent_buffer *buf, u8 *result)
* detect blocks that either didn't get written at all or got written
* in the wrong place.
*/
int btrfs_buffer_uptodate(struct extent_buffer *eb, u64 parent_transid, bool atomic)
int btrfs_buffer_uptodate(struct extent_buffer *eb, u64 parent_transid, bool atomic,
const struct btrfs_tree_parent_check *check)
{
if (!extent_buffer_uptodate(eb))
return 0;
if (!parent_transid || btrfs_header_generation(eb) == parent_transid)
if (!parent_transid || btrfs_header_generation(eb) == parent_transid) {
/*
* On a cache hit, the caller may still need tree parent
* verification before reusing the buffer.
*/
if (unlikely(check && btrfs_verify_level_key(eb, check)))
return -EUCLEAN;
return 1;
}
if (atomic)
return -EAGAIN;
@ -992,8 +1000,11 @@ static struct btrfs_root *read_tree_root_path(struct btrfs_root *tree_root,
root->node = NULL;
goto fail;
}
if (unlikely(!btrfs_buffer_uptodate(root->node, generation, false))) {
ret = -EIO;
ret = btrfs_buffer_uptodate(root->node, generation, false, &check);
if (unlikely(ret <= 0)) {
if (ret == 0)
ret = -EIO;
goto fail;
}

View File

@ -107,7 +107,8 @@ static inline struct btrfs_root *btrfs_grab_root(struct btrfs_root *root)
void btrfs_put_root(struct btrfs_root *root);
void btrfs_mark_buffer_dirty(struct btrfs_trans_handle *trans,
struct extent_buffer *buf);
int btrfs_buffer_uptodate(struct extent_buffer *buf, u64 parent_transid, bool atomic);
int btrfs_buffer_uptodate(struct extent_buffer *buf, u64 parent_transid, bool atomic,
const struct btrfs_tree_parent_check *check);
int btrfs_read_extent_buffer(struct extent_buffer *buf,
const struct btrfs_tree_parent_check *check);

View File

@ -5780,7 +5780,7 @@ static int check_next_block_uptodate(struct btrfs_trans_handle *trans,
generation = btrfs_node_ptr_generation(path->nodes[level], path->slots[level]);
if (btrfs_buffer_uptodate(next, generation, false))
if (btrfs_buffer_uptodate(next, generation, false, NULL))
return 0;
check.level = level - 1;

View File

@ -3901,8 +3901,17 @@ int read_extent_buffer_pages_nowait(struct extent_buffer *eb, int mirror_num,
struct btrfs_fs_info *fs_info = eb->fs_info;
struct btrfs_bio *bbio;
if (extent_buffer_uptodate(eb))
if (extent_buffer_uptodate(eb)) {
int ret;
ret = btrfs_buffer_uptodate(eb, 0, true, check);
if (unlikely(ret <= 0)) {
if (ret == 0)
ret = -EIO;
return ret;
}
return 0;
}
/*
* We could have had EXTENT_BUFFER_UPTODATE cleared by the write
@ -3923,7 +3932,15 @@ int read_extent_buffer_pages_nowait(struct extent_buffer *eb, int mirror_num,
* will now be set, and we shouldn't read it in again.
*/
if (unlikely(extent_buffer_uptodate(eb))) {
int ret;
clear_extent_buffer_reading(eb);
ret = btrfs_buffer_uptodate(eb, 0, true, check);
if (unlikely(ret <= 0)) {
if (ret == 0)
ret = -EIO;
return ret;
}
return 0;
}
@ -4636,7 +4653,7 @@ void btrfs_readahead_tree_block(struct btrfs_fs_info *fs_info,
if (IS_ERR(eb))
return;
if (btrfs_buffer_uptodate(eb, gen, true)) {
if (btrfs_buffer_uptodate(eb, gen, true, NULL)) {
free_extent_buffer(eb);
return;
}

View File

@ -457,7 +457,7 @@ static int process_one_buffer(struct extent_buffer *eb,
return ret;
}
if (btrfs_buffer_uptodate(eb, gen, false) && level == 0) {
if (btrfs_buffer_uptodate(eb, gen, false, NULL) && level == 0) {
ret = btrfs_exclude_logged_extents(eb);
if (ret)
btrfs_abort_transaction(trans, ret);