btrfs: implement remove_bdev and shutdown super operation callbacks

For the ->remove_bdev() callback, btrfs will:

- Mark the target device as missing

- Go degraded if the fs can afford it

- Return error other wise
  Thus falls back to the shutdown callback

For the ->shutdown callback, btrfs will:

- Set the SHUTDOWN flag
  Which will reject all new incoming operations, and make all writeback
  to fail.

  The behavior is the same as the NOLOGFLUSH behavior.

To support the lookup from bdev to a btrfs_device,
btrfs_dev_lookup_args is enhanced to have a new @devt member.
If set, we should be able to use that @devt member to uniquely locating a
btrfs device.

I know the shutdown can be a little overkilled, if one has a RAID1
metadata and RAID0 data, in that case one can still read data with 50%
chance to got some good data.

But a filesystem returning -EIO for half of the time is not really
considered usable.
Further it can also be as bad as the only device went missing for a single
device btrfs.

So here we go safe other than sorry when handling missing device.

And the remove_bdev callback will be hidden behind experimental features
for now, the reasons are:

- There are not enough btrfs specific bdev removal test cases
  The existing test cases are all removing the only device, thus only
  exercises the ->shutdown() behavior.

- Not yet determined what's the expected behavior
  Although the current auto-degrade behavior is no worse than the old
  behavior, it may not always be what the end users want.

  Before there is a concrete interface, better hide the new feature
  from end users.

Reviewed-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
Reviewed-by: Anand Jain <asj@kernel.org>
Tested-by: Anand Jain <asj@kernel.org>
Signed-off-by: Qu Wenruo <wqu@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
This commit is contained in:
Qu Wenruo 2025-10-13 10:22:05 +10:30 committed by David Sterba
parent 6b1ac78dd0
commit 803e115657
3 changed files with 71 additions and 0 deletions

View File

@ -2430,6 +2430,66 @@ static long btrfs_free_cached_objects(struct super_block *sb, struct shrink_cont
return 0;
}
#ifdef CONFIG_BTRFS_EXPERIMENTAL
static int btrfs_remove_bdev(struct super_block *sb, struct block_device *bdev)
{
struct btrfs_fs_info *fs_info = btrfs_sb(sb);
struct btrfs_device *device;
struct btrfs_dev_lookup_args lookup_args = { .devt = bdev->bd_dev };
bool can_rw;
mutex_lock(&fs_info->fs_devices->device_list_mutex);
device = btrfs_find_device(fs_info->fs_devices, &lookup_args);
if (!device) {
mutex_unlock(&fs_info->fs_devices->device_list_mutex);
/* Device not found, should not affect the running fs, just give a warning. */
btrfs_warn(fs_info, "unable to find btrfs device for block device '%pg'", bdev);
return 0;
}
/*
* The to-be-removed device is already missing?
*
* That's weird but no special handling needed and can exit right now.
*/
if (unlikely(test_and_set_bit(BTRFS_DEV_STATE_MISSING, &device->dev_state))) {
mutex_unlock(&fs_info->fs_devices->device_list_mutex);
btrfs_warn(fs_info, "btrfs device id %llu is already missing", device->devid);
return 0;
}
device->fs_devices->missing_devices++;
if (test_and_clear_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state)) {
list_del_init(&device->dev_alloc_list);
WARN_ON(device->fs_devices->rw_devices < 1);
device->fs_devices->rw_devices--;
}
can_rw = btrfs_check_rw_degradable(fs_info, device);
mutex_unlock(&fs_info->fs_devices->device_list_mutex);
/*
* Now device is considered missing, btrfs_device_name() won't give a
* meaningful result anymore, so only output the devid.
*/
if (unlikely(!can_rw)) {
btrfs_crit(fs_info,
"btrfs device id %llu has gone missing, can not maintain read-write",
device->devid);
return -EIO;
}
btrfs_warn(fs_info,
"btrfs device id %llu has gone missing, continue as degraded",
device->devid);
btrfs_set_opt(fs_info->mount_opt, DEGRADED);
return 0;
}
static void btrfs_shutdown(struct super_block *sb)
{
struct btrfs_fs_info *fs_info = btrfs_sb(sb);
btrfs_force_shutdown(fs_info);
}
#endif
static const struct super_operations btrfs_super_ops = {
.drop_inode = btrfs_drop_inode,
.evict_inode = btrfs_evict_inode,
@ -2445,6 +2505,10 @@ static const struct super_operations btrfs_super_ops = {
.unfreeze_fs = btrfs_unfreeze,
.nr_cached_objects = btrfs_nr_cached_objects,
.free_cached_objects = btrfs_free_cached_objects,
#ifdef CONFIG_BTRFS_EXPERIMENTAL
.remove_bdev = btrfs_remove_bdev,
.shutdown = btrfs_shutdown,
#endif
};
static const struct file_operations btrfs_ctl_fops = {

View File

@ -6802,6 +6802,8 @@ static bool dev_args_match_fs_devices(const struct btrfs_dev_lookup_args *args,
static bool dev_args_match_device(const struct btrfs_dev_lookup_args *args,
const struct btrfs_device *device)
{
if (args->devt)
return device->devt == args->devt;
if (args->missing) {
if (test_bit(BTRFS_DEV_STATE_IN_FS_METADATA, &device->dev_state) &&
!device->bdev)

View File

@ -662,6 +662,11 @@ struct btrfs_dev_lookup_args {
u64 devid;
u8 *uuid;
u8 *fsid;
/*
* If devt is specified, all other members will be ignored as it is
* enough to uniquely locate a device.
*/
dev_t devt;
bool missing;
};