From f79846ca2f04c9744627c24034d675c88f0da3a0 Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Mon, 3 Jul 2023 11:48:08 +0900 Subject: [PATCH 1/5] scsi: sd_zbc: Set zone limits before revalidating zones In sd_zbc_revalidate_zones(), execute blk_queue_chunk_sectors() and blk_queue_max_zone_append_sectors() to respectively set a ZBC device zone size and maximum zone append sector limit before executing blk_revalidate_disk_zones(). This is to allow the block layer zone reavlidation to check these device characteristics prior to checking all zones of the device. Since blk_queue_max_zone_append_sectors() already caps the device maximum zone append limit to the zone size and to the maximum command size, the max_append value passed to blk_queue_max_zone_append_sectors() is simplified to the maximum number of segments times the number of sectors per page. Signed-off-by: Damien Le Moal Link: https://lore.kernel.org/r/20230703024812.76778-2-dlemoal@kernel.org Reviewed-by: Bart Van Assche Reviewed-by: Johannes Thumshirn Signed-off-by: Martin K. Petersen --- drivers/scsi/sd_zbc.c | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/drivers/scsi/sd_zbc.c b/drivers/scsi/sd_zbc.c index abbd08933ac7..a25215507668 100644 --- a/drivers/scsi/sd_zbc.c +++ b/drivers/scsi/sd_zbc.c @@ -831,7 +831,6 @@ int sd_zbc_revalidate_zones(struct scsi_disk *sdkp) struct request_queue *q = disk->queue; u32 zone_blocks = sdkp->early_zone_info.zone_blocks; unsigned int nr_zones = sdkp->early_zone_info.nr_zones; - u32 max_append; int ret = 0; unsigned int flags; @@ -876,6 +875,11 @@ int sd_zbc_revalidate_zones(struct scsi_disk *sdkp) goto unlock; } + blk_queue_chunk_sectors(q, + logical_to_sectors(sdkp->device, zone_blocks)); + blk_queue_max_zone_append_sectors(q, + q->limits.max_segments << PAGE_SECTORS_SHIFT); + ret = blk_revalidate_disk_zones(disk, sd_zbc_revalidate_zones_cb); memalloc_noio_restore(flags); @@ -888,12 +892,6 @@ int sd_zbc_revalidate_zones(struct scsi_disk *sdkp) goto unlock; } - max_append = min_t(u32, logical_to_sectors(sdkp->device, zone_blocks), - q->limits.max_segments << PAGE_SECTORS_SHIFT); - max_append = min_t(u32, max_append, queue_max_hw_sectors(q)); - - blk_queue_max_zone_append_sectors(q, max_append); - sd_zbc_print_zones(sdkp); unlock: From d226b0a2b683e27fce060669dc7cb8d6917b785a Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Mon, 3 Jul 2023 11:48:09 +0900 Subject: [PATCH 2/5] scsi: nvme: zns: Set zone limits before revalidating zones In nvme_revalidate_zones(), execute blk_queue_chunk_sectors() and blk_queue_max_zone_append_sectors() to respectively set a ZNS namespace zone size and maximum zone append sector limit before executing blk_revalidate_disk_zones(). This is to allow the block layer zone reavlidation to check these device characteristics prior to checking all zones of the device. Signed-off-by: Damien Le Moal Link: https://lore.kernel.org/r/20230703024812.76778-3-dlemoal@kernel.org Reviewed-by: Bart Van Assche Reviewed-by: Johannes Thumshirn Signed-off-by: Martin K. Petersen --- drivers/nvme/host/zns.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/drivers/nvme/host/zns.c b/drivers/nvme/host/zns.c index 12316ab51bda..ec8557810c21 100644 --- a/drivers/nvme/host/zns.c +++ b/drivers/nvme/host/zns.c @@ -10,12 +10,11 @@ int nvme_revalidate_zones(struct nvme_ns *ns) { struct request_queue *q = ns->queue; - int ret; - ret = blk_revalidate_disk_zones(ns->disk, NULL); - if (!ret) - blk_queue_max_zone_append_sectors(q, ns->ctrl->max_zone_append); - return ret; + blk_queue_chunk_sectors(q, ns->zsze); + blk_queue_max_zone_append_sectors(q, ns->ctrl->max_zone_append); + + return blk_revalidate_disk_zones(ns->disk, NULL); } static int nvme_set_max_append(struct nvme_ctrl *ctrl) From a442b899fe17118e672647f664bf7f1dd5d7fcb0 Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Mon, 3 Jul 2023 11:48:10 +0900 Subject: [PATCH 3/5] scsi: block: nullblk: Set zone limits before revalidating zones In null_register_zoned_dev(), execute blk_queue_chunk_sectors() and blk_queue_max_zone_append_sectors() to respectively set the zoned device zone size and maximum zone append sector limit before executing blk_revalidate_disk_zones(). This is to allow the block layer zone reavlidation to check these device characteristics prior to checking all zones of the device. Signed-off-by: Damien Le Moal Link: https://lore.kernel.org/r/20230703024812.76778-4-dlemoal@kernel.org Reviewed-by: Bart Van Assche Reviewed-by: Johannes Thumshirn Signed-off-by: Martin K. Petersen --- drivers/block/null_blk/zoned.c | 16 +++++----------- 1 file changed, 5 insertions(+), 11 deletions(-) diff --git a/drivers/block/null_blk/zoned.c b/drivers/block/null_blk/zoned.c index 635ce0648133..55c5b48bc276 100644 --- a/drivers/block/null_blk/zoned.c +++ b/drivers/block/null_blk/zoned.c @@ -162,21 +162,15 @@ int null_register_zoned_dev(struct nullb *nullb) disk_set_zoned(nullb->disk, BLK_ZONED_HM); blk_queue_flag_set(QUEUE_FLAG_ZONE_RESETALL, q); blk_queue_required_elevator_features(q, ELEVATOR_F_ZBD_SEQ_WRITE); - - if (queue_is_mq(q)) { - int ret = blk_revalidate_disk_zones(nullb->disk, NULL); - - if (ret) - return ret; - } else { - blk_queue_chunk_sectors(q, dev->zone_size_sects); - nullb->disk->nr_zones = bdev_nr_zones(nullb->disk->part0); - } - + blk_queue_chunk_sectors(q, dev->zone_size_sects); + nullb->disk->nr_zones = bdev_nr_zones(nullb->disk->part0); blk_queue_max_zone_append_sectors(q, dev->zone_size_sects); disk_set_max_open_zones(nullb->disk, dev->zone_max_open); disk_set_max_active_zones(nullb->disk, dev->zone_max_active); + if (queue_is_mq(q)) + return blk_revalidate_disk_zones(nullb->disk, NULL); + return 0; } From a3d96ed21507e8d70ddab6c7abc93d5e56aaeeb0 Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Mon, 3 Jul 2023 11:48:11 +0900 Subject: [PATCH 4/5] scsi: block: virtio_blk: Set zone limits before revalidating zones In virtblk_probe_zoned_device(), execute blk_queue_chunk_sectors() and blk_queue_max_zone_append_sectors() to respectively set the zoned device zone size and maximum zone append sector limit before executing blk_revalidate_disk_zones(). This is to allow the block layer zone reavlidation to check these device characteristics prior to checking all zones of the device. Signed-off-by: Damien Le Moal Link: https://lore.kernel.org/r/20230703024812.76778-5-dlemoal@kernel.org Reviewed-by: Bart Van Assche Reviewed-by: Dmitry Fomichev Reviewed-by: Johannes Thumshirn Signed-off-by: Martin K. Petersen --- drivers/block/virtio_blk.c | 34 +++++++++++++++------------------- 1 file changed, 15 insertions(+), 19 deletions(-) diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c index 2b918e28acaa..d6df29566370 100644 --- a/drivers/block/virtio_blk.c +++ b/drivers/block/virtio_blk.c @@ -781,7 +781,6 @@ static int virtblk_probe_zoned_device(struct virtio_device *vdev, { u32 v, wg; u8 model; - int ret; virtio_cread(vdev, struct virtio_blk_config, zoned.model, &model); @@ -836,6 +835,7 @@ static int virtblk_probe_zoned_device(struct virtio_device *vdev, vblk->zone_sectors); return -ENODEV; } + blk_queue_chunk_sectors(q, vblk->zone_sectors); dev_dbg(&vdev->dev, "zone sectors = %u\n", vblk->zone_sectors); if (virtio_has_feature(vdev, VIRTIO_BLK_F_DISCARD)) { @@ -844,26 +844,22 @@ static int virtblk_probe_zoned_device(struct virtio_device *vdev, blk_queue_max_discard_sectors(q, 0); } - ret = blk_revalidate_disk_zones(vblk->disk, NULL); - if (!ret) { - virtio_cread(vdev, struct virtio_blk_config, - zoned.max_append_sectors, &v); - if (!v) { - dev_warn(&vdev->dev, "zero max_append_sectors reported\n"); - return -ENODEV; - } - if ((v << SECTOR_SHIFT) < wg) { - dev_err(&vdev->dev, - "write granularity %u exceeds max_append_sectors %u limit\n", - wg, v); - return -ENODEV; - } - - blk_queue_max_zone_append_sectors(q, v); - dev_dbg(&vdev->dev, "max append sectors = %u\n", v); + virtio_cread(vdev, struct virtio_blk_config, + zoned.max_append_sectors, &v); + if (!v) { + dev_warn(&vdev->dev, "zero max_append_sectors reported\n"); + return -ENODEV; } + if ((v << SECTOR_SHIFT) < wg) { + dev_err(&vdev->dev, + "write granularity %u exceeds max_append_sectors %u limit\n", + wg, v); + return -ENODEV; + } + blk_queue_max_zone_append_sectors(q, v); + dev_dbg(&vdev->dev, "max append sectors = %u\n", v); - return ret; + return blk_revalidate_disk_zones(vblk->disk, NULL); } #else From 03e51c4a74b91b0b1a9ca091029b0b58f014be81 Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Mon, 3 Jul 2023 11:48:12 +0900 Subject: [PATCH 5/5] scsi: block: Improve checks in blk_revalidate_disk_zones() blk_revalidate_disk_zones() implements checks of the zones of a zoned block device, verifying that the zone size is a power of 2 number of sectors, that all zones (except possibly the last one) have the same size and that zones cover the entire addressing space of the device. While these checks are appropriate to verify that well tested hardware devices have an adequate zone configurations, they lack in certain areas which may result in issues with emulated devices implemented with user drivers such as ublk or tcmu. Specifically, this function does not check if the device driver indicated support for the mandatory zone append writes, that is, if the device max_zone_append_sectors queue limit is set to a non-zero value. Additionally, invalid zones such as a zero length zone with a start sector equal to the device capacity will not be detected and result in out of bounds use of the zone bitmaps prepared with the callback function blk_revalidate_zone_cb(). Improve blk_revalidate_disk_zones() to address these inadequate checks, relying on the fact that all device drivers supporting zoned block devices must set the device zone size (chunk_sectors queue limit) and the max_zone_append_sectors queue limit before executing this function. The check for a non-zero max_zone_append_sectors value is done in blk_revalidate_disk_zones() before executing the zone report. The zone report callback function blk_revalidate_zone_cb() is also modified to add a check that a zone start is below the device capacity. The check that the zone size is a power of 2 number of sectors is moved to blk_revalidate_disk_zones() as the zone size is already known. Similarly, the number of zones of the device can be calculated in blk_revalidate_disk_zones() before executing the zone report. The kdoc comment for blk_revalidate_disk_zones() is also updated to mention that device drivers must set the device zone size and the max_zone_append_sectors queue limit before calling this function. Signed-off-by: Damien Le Moal Link: https://lore.kernel.org/r/20230703024812.76778-6-dlemoal@kernel.org Reviewed-by: Bart Van Assche Reviewed-by: Johannes Thumshirn Signed-off-by: Martin K. Petersen --- block/blk-zoned.c | 86 +++++++++++++++++++++++++++-------------------- 1 file changed, 50 insertions(+), 36 deletions(-) diff --git a/block/blk-zoned.c b/block/blk-zoned.c index fce9082384d6..da92ce0c5da9 100644 --- a/block/blk-zoned.c +++ b/block/blk-zoned.c @@ -448,7 +448,6 @@ struct blk_revalidate_zone_args { unsigned long *conv_zones_bitmap; unsigned long *seq_zones_wlock; unsigned int nr_zones; - sector_t zone_sectors; sector_t sector; }; @@ -462,38 +461,34 @@ static int blk_revalidate_zone_cb(struct blk_zone *zone, unsigned int idx, struct gendisk *disk = args->disk; struct request_queue *q = disk->queue; sector_t capacity = get_capacity(disk); + sector_t zone_sectors = q->limits.chunk_sectors; + + /* Check for bad zones and holes in the zone report */ + if (zone->start != args->sector) { + pr_warn("%s: Zone gap at sectors %llu..%llu\n", + disk->disk_name, args->sector, zone->start); + return -ENODEV; + } + + if (zone->start >= capacity || !zone->len) { + pr_warn("%s: Invalid zone start %llu, length %llu\n", + disk->disk_name, zone->start, zone->len); + return -ENODEV; + } /* * All zones must have the same size, with the exception on an eventual * smaller last zone. */ - if (zone->start == 0) { - if (zone->len == 0 || !is_power_of_2(zone->len)) { - pr_warn("%s: Invalid zoned device with non power of two zone size (%llu)\n", - disk->disk_name, zone->len); - return -ENODEV; - } - - args->zone_sectors = zone->len; - args->nr_zones = (capacity + zone->len - 1) >> ilog2(zone->len); - } else if (zone->start + args->zone_sectors < capacity) { - if (zone->len != args->zone_sectors) { + if (zone->start + zone->len < capacity) { + if (zone->len != zone_sectors) { pr_warn("%s: Invalid zoned device with non constant zone size\n", disk->disk_name); return -ENODEV; } - } else { - if (zone->len > args->zone_sectors) { - pr_warn("%s: Invalid zoned device with larger last zone size\n", - disk->disk_name); - return -ENODEV; - } - } - - /* Check for holes in the zone report */ - if (zone->start != args->sector) { - pr_warn("%s: Zone gap at sectors %llu..%llu\n", - disk->disk_name, args->sector, zone->start); + } else if (zone->len > zone_sectors) { + pr_warn("%s: Invalid zoned device with larger last zone size\n", + disk->disk_name); return -ENODEV; } @@ -532,11 +527,13 @@ static int blk_revalidate_zone_cb(struct blk_zone *zone, unsigned int idx, * @disk: Target disk * @update_driver_data: Callback to update driver data on the frozen disk * - * Helper function for low-level device drivers to (re) allocate and initialize - * a disk request queue zone bitmaps. This functions should normally be called - * within the disk ->revalidate method for blk-mq based drivers. For BIO based - * drivers only q->nr_zones needs to be updated so that the sysfs exposed value - * is correct. + * Helper function for low-level device drivers to check and (re) allocate and + * initialize a disk request queue zone bitmaps. This functions should normally + * be called within the disk ->revalidate method for blk-mq based drivers. + * Before calling this function, the device driver must already have set the + * device zone size (chunk_sector limit) and the max zone append limit. + * For BIO based drivers, this function cannot be used. BIO based device drivers + * only need to set disk->nr_zones so that the sysfs exposed value is correct. * If the @update_driver_data callback function is not NULL, the callback is * executed with the device request queue frozen after all zones have been * checked. @@ -545,9 +542,9 @@ int blk_revalidate_disk_zones(struct gendisk *disk, void (*update_driver_data)(struct gendisk *disk)) { struct request_queue *q = disk->queue; - struct blk_revalidate_zone_args args = { - .disk = disk, - }; + sector_t zone_sectors = q->limits.chunk_sectors; + sector_t capacity = get_capacity(disk); + struct blk_revalidate_zone_args args = { }; unsigned int noio_flag; int ret; @@ -556,13 +553,31 @@ int blk_revalidate_disk_zones(struct gendisk *disk, if (WARN_ON_ONCE(!queue_is_mq(q))) return -EIO; - if (!get_capacity(disk)) - return -EIO; + if (!capacity) + return -ENODEV; + + /* + * Checks that the device driver indicated a valid zone size and that + * the max zone append limit is set. + */ + if (!zone_sectors || !is_power_of_2(zone_sectors)) { + pr_warn("%s: Invalid non power of two zone size (%llu)\n", + disk->disk_name, zone_sectors); + return -ENODEV; + } + + if (!q->limits.max_zone_append_sectors) { + pr_warn("%s: Invalid 0 maximum zone append limit\n", + disk->disk_name); + return -ENODEV; + } /* * Ensure that all memory allocations in this context are done as if * GFP_NOIO was specified. */ + args.disk = disk; + args.nr_zones = (capacity + zone_sectors - 1) >> ilog2(zone_sectors); noio_flag = memalloc_noio_save(); ret = disk->fops->report_zones(disk, 0, UINT_MAX, blk_revalidate_zone_cb, &args); @@ -576,7 +591,7 @@ int blk_revalidate_disk_zones(struct gendisk *disk, * If zones where reported, make sure that the entire disk capacity * has been checked. */ - if (ret > 0 && args.sector != get_capacity(disk)) { + if (ret > 0 && args.sector != capacity) { pr_warn("%s: Missing zones from sector %llu\n", disk->disk_name, args.sector); ret = -ENODEV; @@ -589,7 +604,6 @@ int blk_revalidate_disk_zones(struct gendisk *disk, */ blk_mq_freeze_queue(q); if (ret > 0) { - blk_queue_chunk_sectors(q, args.zone_sectors); disk->nr_zones = args.nr_zones; swap(disk->seq_zones_wlock, args.seq_zones_wlock); swap(disk->conv_zones_bitmap, args.conv_zones_bitmap);