diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c index a10e1076c881..7fa60a44d716 100644 --- a/fs/btrfs/zoned.c +++ b/fs/btrfs/zoned.c @@ -1561,7 +1561,9 @@ static int btrfs_load_block_group_raid0(struct btrfs_block_group *bg, { struct btrfs_fs_info *fs_info = bg->fs_info; u64 stripe_nr = 0, stripe_offset = 0; + u64 prev_offset = 0; u32 stripe_index = 0; + bool has_partial = false, has_conventional = false; if ((map->type & BTRFS_BLOCK_GROUP_DATA) && !fs_info->stripe_root) { btrfs_err(fs_info, "zoned: data %s needs raid-stripe-tree", @@ -1569,6 +1571,35 @@ static int btrfs_load_block_group_raid0(struct btrfs_block_group *bg, return -EINVAL; } + /* + * When the last extent is removed, last_alloc can be smaller than the other write + * pointer. In that case, last_alloc should be moved to the corresponding write + * pointer position. + */ + for (int i = 0; i < map->num_stripes; i++) { + u64 alloc; + + if (zone_info[i].alloc_offset == WP_MISSING_DEV || + zone_info[i].alloc_offset == WP_CONVENTIONAL) + continue; + + stripe_nr = zone_info[i].alloc_offset >> BTRFS_STRIPE_LEN_SHIFT; + stripe_offset = zone_info[i].alloc_offset & BTRFS_STRIPE_LEN_MASK; + if (stripe_offset == 0 && stripe_nr > 0) { + stripe_nr--; + stripe_offset = BTRFS_STRIPE_LEN; + } + alloc = ((stripe_nr * map->num_stripes + i) << BTRFS_STRIPE_LEN_SHIFT) + + stripe_offset; + last_alloc = max(last_alloc, alloc); + + /* Partially written stripe found. It should be last. */ + if (zone_info[i].alloc_offset & BTRFS_STRIPE_LEN_MASK) + break; + } + stripe_nr = 0; + stripe_offset = 0; + if (last_alloc) { u32 factor = map->num_stripes; @@ -1582,7 +1613,7 @@ static int btrfs_load_block_group_raid0(struct btrfs_block_group *bg, continue; if (zone_info[i].alloc_offset == WP_CONVENTIONAL) { - + has_conventional = true; zone_info[i].alloc_offset = btrfs_stripe_nr_to_offset(stripe_nr); if (stripe_index > i) @@ -1591,6 +1622,28 @@ static int btrfs_load_block_group_raid0(struct btrfs_block_group *bg, zone_info[i].alloc_offset += stripe_offset; } + /* Verification */ + if (i != 0) { + if (unlikely(prev_offset < zone_info[i].alloc_offset)) { + btrfs_err(fs_info, + "zoned: stripe position disorder found in block group %llu", + bg->start); + return -EIO; + } + + if (unlikely(has_partial && + (zone_info[i].alloc_offset & BTRFS_STRIPE_LEN_MASK))) { + btrfs_err(fs_info, + "zoned: multiple partial written stripe found in block group %llu", + bg->start); + return -EIO; + } + } + prev_offset = zone_info[i].alloc_offset; + + if ((zone_info[i].alloc_offset & BTRFS_STRIPE_LEN_MASK) != 0) + has_partial = true; + if (test_bit(0, active) != test_bit(i, active)) { if (unlikely(!btrfs_zone_activate(bg))) return -EIO; @@ -1602,6 +1655,19 @@ static int btrfs_load_block_group_raid0(struct btrfs_block_group *bg, bg->alloc_offset += zone_info[i].alloc_offset; } + /* Check if all devices stay in the same stripe row. */ + if (unlikely(zone_info[0].alloc_offset - + zone_info[map->num_stripes - 1].alloc_offset > BTRFS_STRIPE_LEN)) { + btrfs_err(fs_info, "zoned: stripe gap too large in block group %llu", bg->start); + return -EIO; + } + + if (unlikely(has_conventional && bg->alloc_offset < last_alloc)) { + btrfs_err(fs_info, "zoned: allocated extent stays beyond write pointers %llu %llu", + bg->alloc_offset, last_alloc); + return -EIO; + } + return 0; } @@ -1612,8 +1678,11 @@ static int btrfs_load_block_group_raid10(struct btrfs_block_group *bg, u64 last_alloc) { struct btrfs_fs_info *fs_info = bg->fs_info; + u64 AUTO_KFREE(raid0_allocs); u64 stripe_nr = 0, stripe_offset = 0; u32 stripe_index = 0; + bool has_partial = false, has_conventional = false; + u64 prev_offset = 0; if ((map->type & BTRFS_BLOCK_GROUP_DATA) && !fs_info->stripe_root) { btrfs_err(fs_info, "zoned: data %s needs raid-stripe-tree", @@ -1621,6 +1690,60 @@ static int btrfs_load_block_group_raid10(struct btrfs_block_group *bg, return -EINVAL; } + raid0_allocs = kcalloc(map->num_stripes / map->sub_stripes, sizeof(*raid0_allocs), + GFP_NOFS); + if (!raid0_allocs) + return -ENOMEM; + + /* + * When the last extent is removed, last_alloc can be smaller than the other write + * pointer. In that case, last_alloc should be moved to the corresponding write + * pointer position. + */ + for (int i = 0; i < map->num_stripes; i += map->sub_stripes) { + u64 alloc = zone_info[i].alloc_offset; + + for (int j = 1; j < map->sub_stripes; j++) { + int idx = i + j; + + if (zone_info[idx].alloc_offset == WP_MISSING_DEV || + zone_info[idx].alloc_offset == WP_CONVENTIONAL) + continue; + if (alloc == WP_MISSING_DEV || alloc == WP_CONVENTIONAL) { + alloc = zone_info[idx].alloc_offset; + } else if (unlikely(zone_info[idx].alloc_offset != alloc)) { + btrfs_err(fs_info, + "zoned: write pointer mismatch found in block group %llu", + bg->start); + return -EIO; + } + } + + raid0_allocs[i / map->sub_stripes] = alloc; + if (alloc == WP_CONVENTIONAL) + continue; + if (unlikely(alloc == WP_MISSING_DEV)) { + btrfs_err(fs_info, + "zoned: cannot recover write pointer of block group %llu due to missing device", + bg->start); + return -EIO; + } + + stripe_nr = alloc >> BTRFS_STRIPE_LEN_SHIFT; + stripe_offset = alloc & BTRFS_STRIPE_LEN_MASK; + if (stripe_offset == 0 && stripe_nr > 0) { + stripe_nr--; + stripe_offset = BTRFS_STRIPE_LEN; + } + + alloc = ((stripe_nr * (map->num_stripes / map->sub_stripes) + + (i / map->sub_stripes)) << + BTRFS_STRIPE_LEN_SHIFT) + stripe_offset; + last_alloc = max(last_alloc, alloc); + } + stripe_nr = 0; + stripe_offset = 0; + if (last_alloc) { u32 factor = map->num_stripes / map->sub_stripes; @@ -1630,24 +1753,51 @@ static int btrfs_load_block_group_raid10(struct btrfs_block_group *bg, } for (int i = 0; i < map->num_stripes; i++) { - if (zone_info[i].alloc_offset == WP_MISSING_DEV) - continue; + int idx = i / map->sub_stripes; + + if (raid0_allocs[idx] == WP_CONVENTIONAL) { + has_conventional = true; + raid0_allocs[idx] = btrfs_stripe_nr_to_offset(stripe_nr); + + if (stripe_index > idx) + raid0_allocs[idx] += BTRFS_STRIPE_LEN; + else if (stripe_index == idx) + raid0_allocs[idx] += stripe_offset; + } + + if ((i % map->sub_stripes) == 0) { + /* Verification */ + if (i != 0) { + if (unlikely(prev_offset < raid0_allocs[idx])) { + btrfs_err(fs_info, + "zoned: stripe position disorder found in block group %llu", + bg->start); + return -EIO; + } + + if (unlikely(has_partial && + (raid0_allocs[idx] & BTRFS_STRIPE_LEN_MASK))) { + btrfs_err(fs_info, + "zoned: multiple partial written stripe found in block group %llu", + bg->start); + return -EIO; + } + } + prev_offset = raid0_allocs[idx]; + + if ((raid0_allocs[idx] & BTRFS_STRIPE_LEN_MASK) != 0) + has_partial = true; + } + + if (zone_info[i].alloc_offset == WP_MISSING_DEV || + zone_info[i].alloc_offset == WP_CONVENTIONAL) + zone_info[i].alloc_offset = raid0_allocs[idx]; if (test_bit(0, active) != test_bit(i, active)) { if (unlikely(!btrfs_zone_activate(bg))) return -EIO; - } else { - if (test_bit(0, active)) - set_bit(BLOCK_GROUP_FLAG_ZONE_IS_ACTIVE, &bg->runtime_flags); - } - - if (zone_info[i].alloc_offset == WP_CONVENTIONAL) { - zone_info[i].alloc_offset = btrfs_stripe_nr_to_offset(stripe_nr); - - if (stripe_index > (i / map->sub_stripes)) - zone_info[i].alloc_offset += BTRFS_STRIPE_LEN; - else if (stripe_index == (i / map->sub_stripes)) - zone_info[i].alloc_offset += stripe_offset; + } else if (test_bit(0, active)) { + set_bit(BLOCK_GROUP_FLAG_ZONE_IS_ACTIVE, &bg->runtime_flags); } if ((i % map->sub_stripes) == 0) { @@ -1656,6 +1806,20 @@ static int btrfs_load_block_group_raid10(struct btrfs_block_group *bg, } } + /* Check if all devices stay in the same stripe row. */ + if (unlikely(zone_info[0].alloc_offset - + zone_info[map->num_stripes - 1].alloc_offset > BTRFS_STRIPE_LEN)) { + btrfs_err(fs_info, "zoned: stripe gap too large in block group %llu", + bg->start); + return -EIO; + } + + if (unlikely(has_conventional && bg->alloc_offset < last_alloc)) { + btrfs_err(fs_info, "zoned: allocated extent stays beyond write pointers %llu %llu", + bg->alloc_offset, last_alloc); + return -EIO; + } + return 0; }