btrfs: add mount time auto fix for orphan fst entries

[BUG]
Before btrfs-progs v6.16.1 release, mkfs.btrfs can leave free space tree
entries for deleted chunks:

  # mkfs.btrfs -f -O fst $dev
  # btrfs ins dump-tree -t chunk $dev
  btrfs-progs v6.16
  chunk tree
  leaf 22036480 items 4 free space 15781 generation 8 owner CHUNK_TREE
  leaf 22036480 flags 0x1(WRITTEN) backref revision 1
	 item 0 key (DEV_ITEMS DEV_ITEM 1) itemoff 16185 itemsize 98
	 item 1 key (FIRST_CHUNK_TREE CHUNK_ITEM 13631488) itemoff 16105 itemsize 80
	 ^^^ The first chunk is at 13631488
	 item 2 key (FIRST_CHUNK_TREE CHUNK_ITEM 22020096) itemoff 15993 itemsize 112
	 item 3 key (FIRST_CHUNK_TREE CHUNK_ITEM 30408704) itemoff 15881 itemsize 112

  # btrfs ins dump-tree -t free-space-tree $dev
  btrfs-progs v6.16
  free space tree key (FREE_SPACE_TREE ROOT_ITEM 0)
  leaf 30556160 items 13 free space 15918 generation 8 owner FREE_SPACE_TREE
  leaf 30556160 flags 0x1(WRITTEN) backref revision 1
	 item 0 key (1048576 FREE_SPACE_INFO 4194304) itemoff 16275 itemsize 8
		 free space info extent count 1 flags 0
	 item 1 key (1048576 FREE_SPACE_EXTENT 4194304) itemoff 16275 itemsize 0
		 free space extent
	 item 2 key (5242880 FREE_SPACE_INFO 8388608) itemoff 16267 itemsize 8
		 free space info extent count 1 flags 0
	 item 3 key (5242880 FREE_SPACE_EXTENT 8388608) itemoff 16267 itemsize 0
		 free space extent
	 ^^^ Above 4 items are all before the first chunk.
	 item 4 key (13631488 FREE_SPACE_INFO 8388608) itemoff 16259 itemsize 8
		 free space info extent count 1 flags 0
	 item 5 key (13631488 FREE_SPACE_EXTENT 8388608) itemoff 16259 itemsize 0
		 free space extent
	 ...

This can trigger btrfs check errors.

[CAUSE]
It's a bug in free space tree implementation of btrfs-progs, which
doesn't delete involved fst entries for the to-be-deleted chunk/block
group.

[ENHANCEMENT]
The mostly common fix is to clear the space cache and rebuild it, but
that requires a ro->rw remount which may not be possible for rootfs,
and also relies on users to use "clear_cache" mount option manually.

Here introduce a kernel fix for it, which will delete any entries that
is before the first block group automatically at the first RW mount.

For filesystems without such problem, the overhead is just a single tree
search and no modification to the free space tree, thus the overhead
should be minimal.

Reviewed-by: Filipe Manana <fdmanana@suse.com>
Signed-off-by: Qu Wenruo <wqu@suse.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
This commit is contained in:
Qu Wenruo 2025-12-20 13:07:40 +10:30 committed by David Sterba
parent fdb945f665
commit d1a020a8d7
3 changed files with 113 additions and 0 deletions

View File

@ -3013,6 +3013,15 @@ int btrfs_start_pre_rw_mount(struct btrfs_fs_info *fs_info)
}
}
/*
* Before btrfs-progs v6.16.1 mkfs.btrfs can leave free space entries
* for deleted temporary chunks. Delete them if they exist.
*/
ret = btrfs_delete_orphan_free_space_entries(fs_info);
if (ret < 0) {
btrfs_err(fs_info, "failed to delete orphan free space tree entries: %d", ret);
goto out;
}
/*
* btrfs_find_orphan_roots() is responsible for finding all the dead
* roots (with 0 refs), flag them with BTRFS_ROOT_DEAD_TREE and load

View File

@ -1710,3 +1710,106 @@ int btrfs_load_free_space_tree(struct btrfs_caching_control *caching_ctl)
else
return load_free_space_extents(caching_ctl, path, extent_count);
}
static int delete_orphan_free_space_entries(struct btrfs_root *fst_root,
struct btrfs_path *path,
u64 first_bg_bytenr)
{
struct btrfs_trans_handle *trans;
int ret;
trans = btrfs_start_transaction(fst_root, 1);
if (IS_ERR(trans))
return PTR_ERR(trans);
while (true) {
struct btrfs_key key = { 0 };
int i;
ret = btrfs_search_slot(trans, fst_root, &key, path, -1, 1);
if (ret < 0)
break;
ASSERT(ret > 0);
ret = 0;
for (i = 0; i < btrfs_header_nritems(path->nodes[0]); i++) {
btrfs_item_key_to_cpu(path->nodes[0], &key, i);
if (key.objectid >= first_bg_bytenr) {
/*
* Only break the for() loop and continue to
* delete items.
*/
break;
}
}
/* No items to delete, finished. */
if (i == 0)
break;
ret = btrfs_del_items(trans, fst_root, path, 0, i);
if (ret < 0)
break;
btrfs_release_path(path);
}
btrfs_release_path(path);
btrfs_end_transaction(trans);
if (ret == 0)
btrfs_info(fst_root->fs_info, "deleted orphan free space tree entries");
return ret;
}
/* Remove any free space entry before the first block group. */
int btrfs_delete_orphan_free_space_entries(struct btrfs_fs_info *fs_info)
{
BTRFS_PATH_AUTO_RELEASE(path);
struct btrfs_key key = {
.objectid = BTRFS_FREE_SPACE_TREE_OBJECTID,
.type = BTRFS_ROOT_ITEM_KEY,
.offset = 0,
};
struct btrfs_root *root;
struct btrfs_block_group *bg;
u64 first_bg_bytenr;
int ret;
/*
* Extent tree v2 has multiple global roots based on the block group.
* This means we cannot easily grab the global free space tree and locate
* orphan items. Furthermore this is still experimental, all users
* should use the latest btrfs-progs anyway.
*/
if (btrfs_fs_incompat(fs_info, EXTENT_TREE_V2))
return 0;
if (!btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE))
return 0;
root = btrfs_global_root(fs_info, &key);
if (!root)
return 0;
key.objectid = 0;
key.type = 0;
key.offset = 0;
bg = btrfs_lookup_first_block_group(fs_info, 0);
if (unlikely(!bg)) {
btrfs_err(fs_info, "no block group found");
return -EUCLEAN;
}
first_bg_bytenr = bg->start;
btrfs_put_block_group(bg);
ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
if (ret < 0)
return ret;
/* There should not be an all-zero key in fst. */
ASSERT(ret > 0);
/* Empty free space tree. */
if (path.slots[0] >= btrfs_header_nritems(path.nodes[0]))
return 0;
btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
if (key.objectid >= first_bg_bytenr)
return 0;
btrfs_release_path(&path);
return delete_orphan_free_space_entries(root, &path, first_bg_bytenr);
}

View File

@ -35,6 +35,7 @@ int btrfs_add_to_free_space_tree(struct btrfs_trans_handle *trans,
u64 start, u64 size);
int btrfs_remove_from_free_space_tree(struct btrfs_trans_handle *trans,
u64 start, u64 size);
int btrfs_delete_orphan_free_space_entries(struct btrfs_fs_info *fs_info);
#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
struct btrfs_free_space_info *