for-6.16-rc3-tag

-----BEGIN PGP SIGNATURE-----
 
 iQIzBAABCgAdFiEE8rQSAMVO+zA4DBdWxWXV+ddtWDsFAmhZQ9MACgkQxWXV+ddt
 WDsyvw/+K5N4zbig9D5QL5SdsQwMe/ZUk1KF0LLu6H3hFetdICeM/Z4K46EBh40X
 c9Sxb13gLnIAm8DR/IFTTlOZVrrbJ3CTazZuJbncCpaZchH863aYb/1KboxjJnpW
 KqOen20KdUh8HdevrJFhkFc7rOjp7KupfIHsbWqIxaWYPf8ORvUyK55lKxQz0HES
 E5tFXLNr6z/8Ws5pc2HnRLgnRcCHuRUNJUb1PEaTfPKxoFvTwjda6cDsYnXOJEO9
 NOnh6lluurqja+3FUEFig2f292/CbKGtByYUDgfhHO21P//IHSDhlouvwipzI/kh
 6WUoH1K+DWCxxNbIVFFbUYLxrDGu7R7/aWFHH2q0dNjqQeiQBbUnbn4WIjAAwDWf
 k9cmE+WgVqwQI+vpfG3eENUafG5MpcQQo2wKrxG0whWaC2fiA6QtI+3DfKyMj4XJ
 JI1jUhfCwHrqzoGQ4XBE3UYENqQw9RICNC+Z3UfZx+5sQMWcb+ac5qIGygvCfU8N
 Gtfx4ladZshpQUSuRneiLozxdxLyXX3LzCt2Ls1s5fPPikZft/+2QRu5rzSbb/Cp
 50TDSn/pE1N/TEMVZaP5M2PxquBVDOZ4TFSsSm3IvceqFInm0UerAGaJ7+T2eZhM
 3XHhIp6xTecHfwukvGqs+XSxB9PMLfF5M0gc+9PR+3oxzFRpowI=
 =XLWR
 -----END PGP SIGNATURE-----

Merge tag 'for-6.16-rc3-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux

Pull btrfs fixes from David Sterba:
 "Fixes:

   - fix invalid inode pointer dereferences during log replay

   - fix a race between renames and directory logging

   - fix shutting down delayed iput worker

   - fix device byte accounting when dropping chunk

   - in zoned mode, fix offset calculations for DUP profile when
     conventional and sequential zones are used together

  Regression fixes:

   - fix possible double unlock of extent buffer tree (xarray
     conversion)

   - in zoned mode, fix extent buffer refcount when writing out extents
     (xarray conversion)

  Error handling fixes and updates:

   - handle unexpected extent type when replaying log

   - check and warn if there are remaining delayed inodes when putting a
     root

   - fix assertion when building free space tree

   - handle csum tree error with mount option 'rescue=ibadroot'

  Other:

   - error message updates: add prefix to all scrub related messages,
     include other information in messages"

* tag 'for-6.16-rc3-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux:
  btrfs: zoned: fix alloc_offset calculation for partly conventional block groups
  btrfs: handle csum tree error with rescue=ibadroots correctly
  btrfs: fix race between async reclaim worker and close_ctree()
  btrfs: fix assertion when building free space tree
  btrfs: don't silently ignore unexpected extent type when replaying log
  btrfs: fix invalid inode pointer dereferences during log replay
  btrfs: fix double unlock of buffer_tree xarray when releasing subpage eb
  btrfs: update superblock's device bytes_used when dropping chunk
  btrfs: fix a race between renames and directory logging
  btrfs: scrub: add prefix for the error messages
  btrfs: warn if leaking delayed_nodes in btrfs_put_root()
  btrfs: fix delayed ref refcount leak in debug assertion
  btrfs: include root in error message when unlinking inode
  btrfs: don't drop a reference if btrfs_check_write_meta_pointer() fails
This commit is contained in:
Linus Torvalds 2025-06-23 11:16:38 -07:00
commit 5ca7fe213b
10 changed files with 220 additions and 83 deletions

View File

@ -1377,7 +1377,10 @@ static int btrfs_wq_run_delayed_node(struct btrfs_delayed_root *delayed_root,
void btrfs_assert_delayed_root_empty(struct btrfs_fs_info *fs_info)
{
WARN_ON(btrfs_first_delayed_node(fs_info->delayed_root));
struct btrfs_delayed_node *node = btrfs_first_delayed_node(fs_info->delayed_root);
if (WARN_ON(node))
refcount_dec(&node->refs);
}
static bool could_end_wait(struct btrfs_delayed_root *delayed_root, int seq)

View File

@ -1835,6 +1835,8 @@ void btrfs_put_root(struct btrfs_root *root)
if (refcount_dec_and_test(&root->refs)) {
if (WARN_ON(!xa_empty(&root->inodes)))
xa_destroy(&root->inodes);
if (WARN_ON(!xa_empty(&root->delayed_nodes)))
xa_destroy(&root->delayed_nodes);
WARN_ON(test_bit(BTRFS_ROOT_DEAD_RELOC_TREE, &root->state));
if (root->anon_dev)
free_anon_bdev(root->anon_dev);
@ -2156,8 +2158,7 @@ static int load_global_roots_objectid(struct btrfs_root *tree_root,
found = true;
root = read_tree_root_path(tree_root, path, &key);
if (IS_ERR(root)) {
if (!btrfs_test_opt(fs_info, IGNOREBADROOTS))
ret = PTR_ERR(root);
ret = PTR_ERR(root);
break;
}
set_bit(BTRFS_ROOT_TRACK_DIRTY, &root->state);
@ -4310,8 +4311,8 @@ void __cold close_ctree(struct btrfs_fs_info *fs_info)
*
* So wait for all ongoing ordered extents to complete and then run
* delayed iputs. This works because once we reach this point no one
* can either create new ordered extents nor create delayed iputs
* through some other means.
* can create new ordered extents, but delayed iputs can still be added
* by a reclaim worker (see comments further below).
*
* Also note that btrfs_wait_ordered_roots() is not safe here, because
* it waits for BTRFS_ORDERED_COMPLETE to be set on an ordered extent,
@ -4322,15 +4323,29 @@ void __cold close_ctree(struct btrfs_fs_info *fs_info)
btrfs_flush_workqueue(fs_info->endio_write_workers);
/* Ordered extents for free space inodes. */
btrfs_flush_workqueue(fs_info->endio_freespace_worker);
/*
* Run delayed iputs in case an async reclaim worker is waiting for them
* to be run as mentioned above.
*/
btrfs_run_delayed_iputs(fs_info);
/* There should be no more workload to generate new delayed iputs. */
set_bit(BTRFS_FS_STATE_NO_DELAYED_IPUT, &fs_info->fs_state);
cancel_work_sync(&fs_info->async_reclaim_work);
cancel_work_sync(&fs_info->async_data_reclaim_work);
cancel_work_sync(&fs_info->preempt_reclaim_work);
cancel_work_sync(&fs_info->em_shrinker_work);
/*
* Run delayed iputs again because an async reclaim worker may have
* added new ones if it was flushing delalloc:
*
* shrink_delalloc() -> btrfs_start_delalloc_roots() ->
* start_delalloc_inodes() -> btrfs_add_delayed_iput()
*/
btrfs_run_delayed_iputs(fs_info);
/* There should be no more workload to generate new delayed iputs. */
set_bit(BTRFS_FS_STATE_NO_DELAYED_IPUT, &fs_info->fs_state);
/* Cancel or finish ongoing discard work */
btrfs_discard_cleanup(fs_info);

View File

@ -4312,7 +4312,6 @@ static int try_release_subpage_extent_buffer(struct folio *folio)
spin_unlock(&eb->refs_lock);
continue;
}
xa_unlock_irq(&fs_info->buffer_tree);
/*
* If tree ref isn't set then we know the ref on this eb is a
@ -4329,6 +4328,7 @@ static int try_release_subpage_extent_buffer(struct folio *folio)
* check the folio private at the end. And
* release_extent_buffer() will release the refs_lock.
*/
xa_unlock_irq(&fs_info->buffer_tree);
release_extent_buffer(eb);
xa_lock_irq(&fs_info->buffer_tree);
}

View File

@ -1115,11 +1115,21 @@ static int populate_free_space_tree(struct btrfs_trans_handle *trans,
ret = btrfs_search_slot_for_read(extent_root, &key, path, 1, 0);
if (ret < 0)
goto out_locked;
ASSERT(ret == 0);
/*
* If ret is 1 (no key found), it means this is an empty block group,
* without any extents allocated from it and there's no block group
* item (key BTRFS_BLOCK_GROUP_ITEM_KEY) located in the extent tree
* because we are using the block group tree feature, so block group
* items are stored in the block group tree. It also means there are no
* extents allocated for block groups with a start offset beyond this
* block group's end offset (this is the last, highest, block group).
*/
if (!btrfs_fs_compat_ro(trans->fs_info, BLOCK_GROUP_TREE))
ASSERT(ret == 0);
start = block_group->start;
end = block_group->start + block_group->length;
while (1) {
while (ret == 0) {
btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
if (key.type == BTRFS_EXTENT_ITEM_KEY ||
@ -1149,8 +1159,6 @@ static int populate_free_space_tree(struct btrfs_trans_handle *trans,
ret = btrfs_next_item(extent_root, path);
if (ret < 0)
goto out_locked;
if (ret)
break;
}
if (start < end) {
ret = __add_to_free_space_tree(trans, block_group, path2,

View File

@ -4250,9 +4250,9 @@ static int __btrfs_unlink_inode(struct btrfs_trans_handle *trans,
ret = btrfs_del_inode_ref(trans, root, name, ino, dir_ino, &index);
if (ret) {
btrfs_info(fs_info,
"failed to delete reference to %.*s, inode %llu parent %llu",
name->len, name->name, ino, dir_ino);
btrfs_crit(fs_info,
"failed to delete reference to %.*s, root %llu inode %llu parent %llu",
name->len, name->name, btrfs_root_id(root), ino, dir_ino);
btrfs_abort_transaction(trans, ret);
goto err;
}
@ -8059,6 +8059,7 @@ static int btrfs_rename_exchange(struct inode *old_dir,
int ret;
int ret2;
bool need_abort = false;
bool logs_pinned = false;
struct fscrypt_name old_fname, new_fname;
struct fscrypt_str *old_name, *new_name;
@ -8182,6 +8183,31 @@ static int btrfs_rename_exchange(struct inode *old_dir,
inode_inc_iversion(new_inode);
simple_rename_timestamp(old_dir, old_dentry, new_dir, new_dentry);
if (old_ino != BTRFS_FIRST_FREE_OBJECTID &&
new_ino != BTRFS_FIRST_FREE_OBJECTID) {
/*
* If we are renaming in the same directory (and it's not for
* root entries) pin the log early to prevent any concurrent
* task from logging the directory after we removed the old
* entries and before we add the new entries, otherwise that
* task can sync a log without any entry for the inodes we are
* renaming and therefore replaying that log, if a power failure
* happens after syncing the log, would result in deleting the
* inodes.
*
* If the rename affects two different directories, we want to
* make sure the that there's no log commit that contains
* updates for only one of the directories but not for the
* other.
*
* If we are renaming an entry for a root, we don't care about
* log updates since we called btrfs_set_log_full_commit().
*/
btrfs_pin_log_trans(root);
btrfs_pin_log_trans(dest);
logs_pinned = true;
}
if (old_dentry->d_parent != new_dentry->d_parent) {
btrfs_record_unlink_dir(trans, BTRFS_I(old_dir),
BTRFS_I(old_inode), true);
@ -8253,30 +8279,23 @@ static int btrfs_rename_exchange(struct inode *old_dir,
BTRFS_I(new_inode)->dir_index = new_idx;
/*
* Now pin the logs of the roots. We do it to ensure that no other task
* can sync the logs while we are in progress with the rename, because
* that could result in an inconsistency in case any of the inodes that
* are part of this rename operation were logged before.
* Do the log updates for all inodes.
*
* If either entry is for a root we don't need to update the logs since
* we've called btrfs_set_log_full_commit() before.
*/
if (old_ino != BTRFS_FIRST_FREE_OBJECTID)
btrfs_pin_log_trans(root);
if (new_ino != BTRFS_FIRST_FREE_OBJECTID)
btrfs_pin_log_trans(dest);
/* Do the log updates for all inodes. */
if (old_ino != BTRFS_FIRST_FREE_OBJECTID)
if (logs_pinned) {
btrfs_log_new_name(trans, old_dentry, BTRFS_I(old_dir),
old_rename_ctx.index, new_dentry->d_parent);
if (new_ino != BTRFS_FIRST_FREE_OBJECTID)
btrfs_log_new_name(trans, new_dentry, BTRFS_I(new_dir),
new_rename_ctx.index, old_dentry->d_parent);
}
/* Now unpin the logs. */
if (old_ino != BTRFS_FIRST_FREE_OBJECTID)
btrfs_end_log_trans(root);
if (new_ino != BTRFS_FIRST_FREE_OBJECTID)
btrfs_end_log_trans(dest);
out_fail:
if (logs_pinned) {
btrfs_end_log_trans(root);
btrfs_end_log_trans(dest);
}
ret2 = btrfs_end_transaction(trans);
ret = ret ? ret : ret2;
out_notrans:
@ -8326,6 +8345,7 @@ static int btrfs_rename(struct mnt_idmap *idmap,
int ret2;
u64 old_ino = btrfs_ino(BTRFS_I(old_inode));
struct fscrypt_name old_fname, new_fname;
bool logs_pinned = false;
if (btrfs_ino(BTRFS_I(new_dir)) == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID)
return -EPERM;
@ -8460,6 +8480,29 @@ static int btrfs_rename(struct mnt_idmap *idmap,
inode_inc_iversion(old_inode);
simple_rename_timestamp(old_dir, old_dentry, new_dir, new_dentry);
if (old_ino != BTRFS_FIRST_FREE_OBJECTID) {
/*
* If we are renaming in the same directory (and it's not a
* root entry) pin the log to prevent any concurrent task from
* logging the directory after we removed the old entry and
* before we add the new entry, otherwise that task can sync
* a log without any entry for the inode we are renaming and
* therefore replaying that log, if a power failure happens
* after syncing the log, would result in deleting the inode.
*
* If the rename affects two different directories, we want to
* make sure the that there's no log commit that contains
* updates for only one of the directories but not for the
* other.
*
* If we are renaming an entry for a root, we don't care about
* log updates since we called btrfs_set_log_full_commit().
*/
btrfs_pin_log_trans(root);
btrfs_pin_log_trans(dest);
logs_pinned = true;
}
if (old_dentry->d_parent != new_dentry->d_parent)
btrfs_record_unlink_dir(trans, BTRFS_I(old_dir),
BTRFS_I(old_inode), true);
@ -8524,7 +8567,7 @@ static int btrfs_rename(struct mnt_idmap *idmap,
if (old_inode->i_nlink == 1)
BTRFS_I(old_inode)->dir_index = index;
if (old_ino != BTRFS_FIRST_FREE_OBJECTID)
if (logs_pinned)
btrfs_log_new_name(trans, old_dentry, BTRFS_I(old_dir),
rename_ctx.index, new_dentry->d_parent);
@ -8540,6 +8583,10 @@ static int btrfs_rename(struct mnt_idmap *idmap,
}
}
out_fail:
if (logs_pinned) {
btrfs_end_log_trans(root);
btrfs_end_log_trans(dest);
}
ret2 = btrfs_end_transaction(trans);
ret = ret ? ret : ret2;
out_notrans:

View File

@ -3139,7 +3139,7 @@ static long btrfs_ioctl_scrub(struct file *file, void __user *arg)
return -EPERM;
if (btrfs_fs_incompat(fs_info, EXTENT_TREE_V2)) {
btrfs_err(fs_info, "scrub is not supported on extent tree v2 yet");
btrfs_err(fs_info, "scrub: extent tree v2 not yet supported");
return -EINVAL;
}

View File

@ -557,7 +557,7 @@ static int scrub_print_warning_inode(u64 inum, u64 offset, u64 num_bytes,
*/
for (i = 0; i < ipath->fspath->elem_cnt; ++i)
btrfs_warn_in_rcu(fs_info,
"%s at logical %llu on dev %s, physical %llu, root %llu, inode %llu, offset %llu, length %u, links %u (path: %s)",
"scrub: %s at logical %llu on dev %s, physical %llu root %llu inode %llu offset %llu length %u links %u (path: %s)",
swarn->errstr, swarn->logical,
btrfs_dev_name(swarn->dev),
swarn->physical,
@ -571,7 +571,7 @@ static int scrub_print_warning_inode(u64 inum, u64 offset, u64 num_bytes,
err:
btrfs_warn_in_rcu(fs_info,
"%s at logical %llu on dev %s, physical %llu, root %llu, inode %llu, offset %llu: path resolving failed with ret=%d",
"scrub: %s at logical %llu on dev %s, physical %llu root %llu inode %llu offset %llu: path resolving failed with ret=%d",
swarn->errstr, swarn->logical,
btrfs_dev_name(swarn->dev),
swarn->physical,
@ -596,7 +596,7 @@ static void scrub_print_common_warning(const char *errstr, struct btrfs_device *
/* Super block error, no need to search extent tree. */
if (is_super) {
btrfs_warn_in_rcu(fs_info, "%s on device %s, physical %llu",
btrfs_warn_in_rcu(fs_info, "scrub: %s on device %s, physical %llu",
errstr, btrfs_dev_name(dev), physical);
return;
}
@ -631,14 +631,14 @@ static void scrub_print_common_warning(const char *errstr, struct btrfs_device *
&ref_level);
if (ret < 0) {
btrfs_warn(fs_info,
"failed to resolve tree backref for logical %llu: %d",
swarn.logical, ret);
"scrub: failed to resolve tree backref for logical %llu: %d",
swarn.logical, ret);
break;
}
if (ret > 0)
break;
btrfs_warn_in_rcu(fs_info,
"%s at logical %llu on dev %s, physical %llu: metadata %s (level %d) in tree %llu",
"scrub: %s at logical %llu on dev %s, physical %llu: metadata %s (level %d) in tree %llu",
errstr, swarn.logical, btrfs_dev_name(dev),
swarn.physical, (ref_level ? "node" : "leaf"),
ref_level, ref_root);
@ -718,7 +718,7 @@ static void scrub_verify_one_metadata(struct scrub_stripe *stripe, int sector_nr
scrub_bitmap_set_meta_error(stripe, sector_nr, sectors_per_tree);
scrub_bitmap_set_error(stripe, sector_nr, sectors_per_tree);
btrfs_warn_rl(fs_info,
"tree block %llu mirror %u has bad bytenr, has %llu want %llu",
"scrub: tree block %llu mirror %u has bad bytenr, has %llu want %llu",
logical, stripe->mirror_num,
btrfs_stack_header_bytenr(header), logical);
return;
@ -728,7 +728,7 @@ static void scrub_verify_one_metadata(struct scrub_stripe *stripe, int sector_nr
scrub_bitmap_set_meta_error(stripe, sector_nr, sectors_per_tree);
scrub_bitmap_set_error(stripe, sector_nr, sectors_per_tree);
btrfs_warn_rl(fs_info,
"tree block %llu mirror %u has bad fsid, has %pU want %pU",
"scrub: tree block %llu mirror %u has bad fsid, has %pU want %pU",
logical, stripe->mirror_num,
header->fsid, fs_info->fs_devices->fsid);
return;
@ -738,7 +738,7 @@ static void scrub_verify_one_metadata(struct scrub_stripe *stripe, int sector_nr
scrub_bitmap_set_meta_error(stripe, sector_nr, sectors_per_tree);
scrub_bitmap_set_error(stripe, sector_nr, sectors_per_tree);
btrfs_warn_rl(fs_info,
"tree block %llu mirror %u has bad chunk tree uuid, has %pU want %pU",
"scrub: tree block %llu mirror %u has bad chunk tree uuid, has %pU want %pU",
logical, stripe->mirror_num,
header->chunk_tree_uuid, fs_info->chunk_tree_uuid);
return;
@ -760,7 +760,7 @@ static void scrub_verify_one_metadata(struct scrub_stripe *stripe, int sector_nr
scrub_bitmap_set_meta_error(stripe, sector_nr, sectors_per_tree);
scrub_bitmap_set_error(stripe, sector_nr, sectors_per_tree);
btrfs_warn_rl(fs_info,
"tree block %llu mirror %u has bad csum, has " CSUM_FMT " want " CSUM_FMT,
"scrub: tree block %llu mirror %u has bad csum, has " CSUM_FMT " want " CSUM_FMT,
logical, stripe->mirror_num,
CSUM_FMT_VALUE(fs_info->csum_size, on_disk_csum),
CSUM_FMT_VALUE(fs_info->csum_size, calculated_csum));
@ -771,7 +771,7 @@ static void scrub_verify_one_metadata(struct scrub_stripe *stripe, int sector_nr
scrub_bitmap_set_meta_gen_error(stripe, sector_nr, sectors_per_tree);
scrub_bitmap_set_error(stripe, sector_nr, sectors_per_tree);
btrfs_warn_rl(fs_info,
"tree block %llu mirror %u has bad generation, has %llu want %llu",
"scrub: tree block %llu mirror %u has bad generation, has %llu want %llu",
logical, stripe->mirror_num,
btrfs_stack_header_generation(header),
stripe->sectors[sector_nr].generation);
@ -814,7 +814,7 @@ static void scrub_verify_one_sector(struct scrub_stripe *stripe, int sector_nr)
*/
if (unlikely(sector_nr + sectors_per_tree > stripe->nr_sectors)) {
btrfs_warn_rl(fs_info,
"tree block at %llu crosses stripe boundary %llu",
"scrub: tree block at %llu crosses stripe boundary %llu",
stripe->logical +
(sector_nr << fs_info->sectorsize_bits),
stripe->logical);
@ -1046,12 +1046,12 @@ static void scrub_stripe_report_errors(struct scrub_ctx *sctx,
if (repaired) {
if (dev) {
btrfs_err_rl_in_rcu(fs_info,
"fixed up error at logical %llu on dev %s physical %llu",
"scrub: fixed up error at logical %llu on dev %s physical %llu",
stripe->logical, btrfs_dev_name(dev),
physical);
} else {
btrfs_err_rl_in_rcu(fs_info,
"fixed up error at logical %llu on mirror %u",
"scrub: fixed up error at logical %llu on mirror %u",
stripe->logical, stripe->mirror_num);
}
continue;
@ -1060,12 +1060,12 @@ static void scrub_stripe_report_errors(struct scrub_ctx *sctx,
/* The remaining are all for unrepaired. */
if (dev) {
btrfs_err_rl_in_rcu(fs_info,
"unable to fixup (regular) error at logical %llu on dev %s physical %llu",
"scrub: unable to fixup (regular) error at logical %llu on dev %s physical %llu",
stripe->logical, btrfs_dev_name(dev),
physical);
} else {
btrfs_err_rl_in_rcu(fs_info,
"unable to fixup (regular) error at logical %llu on mirror %u",
"scrub: unable to fixup (regular) error at logical %llu on mirror %u",
stripe->logical, stripe->mirror_num);
}
@ -1593,8 +1593,7 @@ static int sync_write_pointer_for_zoned(struct scrub_ctx *sctx, u64 logical,
physical,
sctx->write_pointer);
if (ret)
btrfs_err(fs_info,
"zoned: failed to recover write pointer");
btrfs_err(fs_info, "scrub: zoned: failed to recover write pointer");
}
mutex_unlock(&sctx->wr_lock);
btrfs_dev_clear_zone_empty(sctx->wr_tgtdev, physical);
@ -1658,7 +1657,7 @@ static int scrub_find_fill_first_stripe(struct btrfs_block_group *bg,
int ret;
if (unlikely(!extent_root || !csum_root)) {
btrfs_err(fs_info, "no valid extent or csum root for scrub");
btrfs_err(fs_info, "scrub: no valid extent or csum root found");
return -EUCLEAN;
}
memset(stripe->sectors, 0, sizeof(struct scrub_sector_verification) *
@ -1907,7 +1906,7 @@ static bool stripe_has_metadata_error(struct scrub_stripe *stripe)
struct btrfs_fs_info *fs_info = stripe->bg->fs_info;
btrfs_err(fs_info,
"stripe %llu has unrepaired metadata sector at %llu",
"scrub: stripe %llu has unrepaired metadata sector at logical %llu",
stripe->logical,
stripe->logical + (i << fs_info->sectorsize_bits));
return true;
@ -2167,7 +2166,7 @@ static int scrub_raid56_parity_stripe(struct scrub_ctx *sctx,
bitmap_and(&error, &error, &has_extent, stripe->nr_sectors);
if (!bitmap_empty(&error, stripe->nr_sectors)) {
btrfs_err(fs_info,
"unrepaired sectors detected, full stripe %llu data stripe %u errors %*pbl",
"scrub: unrepaired sectors detected, full stripe %llu data stripe %u errors %*pbl",
full_stripe_start, i, stripe->nr_sectors,
&error);
ret = -EIO;
@ -2789,14 +2788,14 @@ int scrub_enumerate_chunks(struct scrub_ctx *sctx,
ro_set = 0;
} else if (ret == -ETXTBSY) {
btrfs_warn(fs_info,
"skipping scrub of block group %llu due to active swapfile",
"scrub: skipping scrub of block group %llu due to active swapfile",
cache->start);
scrub_pause_off(fs_info);
ret = 0;
goto skip_unfreeze;
} else {
btrfs_warn(fs_info,
"failed setting block group ro: %d", ret);
btrfs_warn(fs_info, "scrub: failed setting block group ro: %d",
ret);
btrfs_unfreeze_block_group(cache);
btrfs_put_block_group(cache);
scrub_pause_off(fs_info);
@ -2892,13 +2891,13 @@ static int scrub_one_super(struct scrub_ctx *sctx, struct btrfs_device *dev,
ret = btrfs_check_super_csum(fs_info, sb);
if (ret != 0) {
btrfs_err_rl(fs_info,
"super block at physical %llu devid %llu has bad csum",
"scrub: super block at physical %llu devid %llu has bad csum",
physical, dev->devid);
return -EIO;
}
if (btrfs_super_generation(sb) != generation) {
btrfs_err_rl(fs_info,
"super block at physical %llu devid %llu has bad generation %llu expect %llu",
"scrub: super block at physical %llu devid %llu has bad generation %llu expect %llu",
physical, dev->devid,
btrfs_super_generation(sb), generation);
return -EUCLEAN;
@ -3059,7 +3058,7 @@ int btrfs_scrub_dev(struct btrfs_fs_info *fs_info, u64 devid, u64 start,
!test_bit(BTRFS_DEV_STATE_WRITEABLE, &dev->dev_state)) {
mutex_unlock(&fs_info->fs_devices->device_list_mutex);
btrfs_err_in_rcu(fs_info,
"scrub on devid %llu: filesystem on %s is not writable",
"scrub: devid %llu: filesystem on %s is not writable",
devid, btrfs_dev_name(dev));
ret = -EROFS;
goto out;

View File

@ -668,15 +668,15 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans,
extent_end = ALIGN(start + size,
fs_info->sectorsize);
} else {
ret = 0;
goto out;
btrfs_err(fs_info,
"unexpected extent type=%d root=%llu inode=%llu offset=%llu",
found_type, btrfs_root_id(root), key->objectid, key->offset);
return -EUCLEAN;
}
inode = read_one_inode(root, key->objectid);
if (!inode) {
ret = -EIO;
goto out;
}
if (!inode)
return -EIO;
/*
* first check to see if we already have this extent in the
@ -961,7 +961,8 @@ static noinline int drop_one_dir_item(struct btrfs_trans_handle *trans,
ret = unlink_inode_for_log_replay(trans, dir, inode, &name);
out:
kfree(name.name);
iput(&inode->vfs_inode);
if (inode)
iput(&inode->vfs_inode);
return ret;
}
@ -1176,8 +1177,8 @@ static inline int __add_inode_ref(struct btrfs_trans_handle *trans,
ret = unlink_inode_for_log_replay(trans,
victim_parent,
inode, &victim_name);
iput(&victim_parent->vfs_inode);
}
iput(&victim_parent->vfs_inode);
kfree(victim_name.name);
if (ret)
return ret;

View File

@ -3282,6 +3282,12 @@ int btrfs_remove_chunk(struct btrfs_trans_handle *trans, u64 chunk_offset)
device->bytes_used - dev_extent_len);
atomic64_add(dev_extent_len, &fs_info->free_chunk_space);
btrfs_clear_space_info_full(fs_info);
if (list_empty(&device->post_commit_list)) {
list_add_tail(&device->post_commit_list,
&trans->transaction->dev_update_list);
}
mutex_unlock(&fs_info->chunk_mutex);
}
}

View File

@ -1403,7 +1403,8 @@ static int btrfs_load_block_group_single(struct btrfs_block_group *bg,
static int btrfs_load_block_group_dup(struct btrfs_block_group *bg,
struct btrfs_chunk_map *map,
struct zone_info *zone_info,
unsigned long *active)
unsigned long *active,
u64 last_alloc)
{
struct btrfs_fs_info *fs_info = bg->fs_info;
@ -1426,6 +1427,13 @@ static int btrfs_load_block_group_dup(struct btrfs_block_group *bg,
zone_info[1].physical);
return -EIO;
}
if (zone_info[0].alloc_offset == WP_CONVENTIONAL)
zone_info[0].alloc_offset = last_alloc;
if (zone_info[1].alloc_offset == WP_CONVENTIONAL)
zone_info[1].alloc_offset = last_alloc;
if (zone_info[0].alloc_offset != zone_info[1].alloc_offset) {
btrfs_err(bg->fs_info,
"zoned: write pointer offset mismatch of zones in DUP profile");
@ -1446,7 +1454,8 @@ static int btrfs_load_block_group_dup(struct btrfs_block_group *bg,
static int btrfs_load_block_group_raid1(struct btrfs_block_group *bg,
struct btrfs_chunk_map *map,
struct zone_info *zone_info,
unsigned long *active)
unsigned long *active,
u64 last_alloc)
{
struct btrfs_fs_info *fs_info = bg->fs_info;
int i;
@ -1461,10 +1470,12 @@ static int btrfs_load_block_group_raid1(struct btrfs_block_group *bg,
bg->zone_capacity = min_not_zero(zone_info[0].capacity, zone_info[1].capacity);
for (i = 0; i < map->num_stripes; i++) {
if (zone_info[i].alloc_offset == WP_MISSING_DEV ||
zone_info[i].alloc_offset == WP_CONVENTIONAL)
if (zone_info[i].alloc_offset == WP_MISSING_DEV)
continue;
if (zone_info[i].alloc_offset == WP_CONVENTIONAL)
zone_info[i].alloc_offset = last_alloc;
if ((zone_info[0].alloc_offset != zone_info[i].alloc_offset) &&
!btrfs_test_opt(fs_info, DEGRADED)) {
btrfs_err(fs_info,
@ -1494,7 +1505,8 @@ static int btrfs_load_block_group_raid1(struct btrfs_block_group *bg,
static int btrfs_load_block_group_raid0(struct btrfs_block_group *bg,
struct btrfs_chunk_map *map,
struct zone_info *zone_info,
unsigned long *active)
unsigned long *active,
u64 last_alloc)
{
struct btrfs_fs_info *fs_info = bg->fs_info;
@ -1505,10 +1517,29 @@ static int btrfs_load_block_group_raid0(struct btrfs_block_group *bg,
}
for (int i = 0; i < map->num_stripes; i++) {
if (zone_info[i].alloc_offset == WP_MISSING_DEV ||
zone_info[i].alloc_offset == WP_CONVENTIONAL)
if (zone_info[i].alloc_offset == WP_MISSING_DEV)
continue;
if (zone_info[i].alloc_offset == WP_CONVENTIONAL) {
u64 stripe_nr, full_stripe_nr;
u64 stripe_offset;
int stripe_index;
stripe_nr = div64_u64(last_alloc, map->stripe_size);
stripe_offset = stripe_nr * map->stripe_size;
full_stripe_nr = div_u64(stripe_nr, map->num_stripes);
div_u64_rem(stripe_nr, map->num_stripes, &stripe_index);
zone_info[i].alloc_offset =
full_stripe_nr * map->stripe_size;
if (stripe_index > i)
zone_info[i].alloc_offset += map->stripe_size;
else if (stripe_index == i)
zone_info[i].alloc_offset +=
(last_alloc - stripe_offset);
}
if (test_bit(0, active) != test_bit(i, active)) {
if (!btrfs_zone_activate(bg))
return -EIO;
@ -1526,7 +1557,8 @@ static int btrfs_load_block_group_raid0(struct btrfs_block_group *bg,
static int btrfs_load_block_group_raid10(struct btrfs_block_group *bg,
struct btrfs_chunk_map *map,
struct zone_info *zone_info,
unsigned long *active)
unsigned long *active,
u64 last_alloc)
{
struct btrfs_fs_info *fs_info = bg->fs_info;
@ -1537,8 +1569,7 @@ static int btrfs_load_block_group_raid10(struct btrfs_block_group *bg,
}
for (int i = 0; i < map->num_stripes; i++) {
if (zone_info[i].alloc_offset == WP_MISSING_DEV ||
zone_info[i].alloc_offset == WP_CONVENTIONAL)
if (zone_info[i].alloc_offset == WP_MISSING_DEV)
continue;
if (test_bit(0, active) != test_bit(i, active)) {
@ -1549,6 +1580,29 @@ static int btrfs_load_block_group_raid10(struct btrfs_block_group *bg,
set_bit(BLOCK_GROUP_FLAG_ZONE_IS_ACTIVE, &bg->runtime_flags);
}
if (zone_info[i].alloc_offset == WP_CONVENTIONAL) {
u64 stripe_nr, full_stripe_nr;
u64 stripe_offset;
int stripe_index;
stripe_nr = div64_u64(last_alloc, map->stripe_size);
stripe_offset = stripe_nr * map->stripe_size;
full_stripe_nr = div_u64(stripe_nr,
map->num_stripes / map->sub_stripes);
div_u64_rem(stripe_nr,
(map->num_stripes / map->sub_stripes),
&stripe_index);
zone_info[i].alloc_offset =
full_stripe_nr * map->stripe_size;
if (stripe_index > (i / map->sub_stripes))
zone_info[i].alloc_offset += map->stripe_size;
else if (stripe_index == (i / map->sub_stripes))
zone_info[i].alloc_offset +=
(last_alloc - stripe_offset);
}
if ((i % map->sub_stripes) == 0) {
bg->zone_capacity += zone_info[i].capacity;
bg->alloc_offset += zone_info[i].alloc_offset;
@ -1637,18 +1691,22 @@ int btrfs_load_block_group_zone_info(struct btrfs_block_group *cache, bool new)
ret = btrfs_load_block_group_single(cache, &zone_info[0], active);
break;
case BTRFS_BLOCK_GROUP_DUP:
ret = btrfs_load_block_group_dup(cache, map, zone_info, active);
ret = btrfs_load_block_group_dup(cache, map, zone_info, active,
last_alloc);
break;
case BTRFS_BLOCK_GROUP_RAID1:
case BTRFS_BLOCK_GROUP_RAID1C3:
case BTRFS_BLOCK_GROUP_RAID1C4:
ret = btrfs_load_block_group_raid1(cache, map, zone_info, active);
ret = btrfs_load_block_group_raid1(cache, map, zone_info,
active, last_alloc);
break;
case BTRFS_BLOCK_GROUP_RAID0:
ret = btrfs_load_block_group_raid0(cache, map, zone_info, active);
ret = btrfs_load_block_group_raid0(cache, map, zone_info,
active, last_alloc);
break;
case BTRFS_BLOCK_GROUP_RAID10:
ret = btrfs_load_block_group_raid10(cache, map, zone_info, active);
ret = btrfs_load_block_group_raid10(cache, map, zone_info,
active, last_alloc);
break;
case BTRFS_BLOCK_GROUP_RAID5:
case BTRFS_BLOCK_GROUP_RAID6: