From 68999d1dd23a71b991a36201db29c8787f35a23f Mon Sep 17 00:00:00 2001 From: Guixin Liu Date: Tue, 5 Dec 2023 15:37:39 +0800 Subject: [PATCH 01/13] nvme: introduce nvme_check_ctrl_fabric_info helper Inroduce nvme_check_ctrl_fabric_info helper to check fabric controller info returned by target. Signed-off-by: Guixin Liu Reviewed-by: Sagi Grimberg Reviewed-by: Christoph Hellwig Signed-off-by: Keith Busch --- drivers/nvme/host/core.c | 42 +++++++++++++++++++++++----------------- 1 file changed, 24 insertions(+), 18 deletions(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 46a4c9c5ea96..9b38f37c872a 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -3003,6 +3003,28 @@ static int nvme_init_effects(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id) return 0; } +static int nvme_check_ctrl_fabric_info(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id) +{ + /* + * In fabrics we need to verify the cntlid matches the + * admin connect + */ + if (ctrl->cntlid != le16_to_cpu(id->cntlid)) { + dev_err(ctrl->device, + "Mismatching cntlid: Connect %u vs Identify %u, rejecting\n", + ctrl->cntlid, le16_to_cpu(id->cntlid)); + return -EINVAL; + } + + if (!nvme_discovery_ctrl(ctrl) && !ctrl->kas) { + dev_err(ctrl->device, + "keep-alive support is mandatory for fabrics\n"); + return -EINVAL; + } + + return 0; +} + static int nvme_init_identify(struct nvme_ctrl *ctrl) { struct nvme_id_ctrl *id; @@ -3115,25 +3137,9 @@ static int nvme_init_identify(struct nvme_ctrl *ctrl) ctrl->iorcsz = le32_to_cpu(id->iorcsz); ctrl->maxcmd = le16_to_cpu(id->maxcmd); - /* - * In fabrics we need to verify the cntlid matches the - * admin connect - */ - if (ctrl->cntlid != le16_to_cpu(id->cntlid)) { - dev_err(ctrl->device, - "Mismatching cntlid: Connect %u vs Identify " - "%u, rejecting\n", - ctrl->cntlid, le16_to_cpu(id->cntlid)); - ret = -EINVAL; + ret = nvme_check_ctrl_fabric_info(ctrl, id); + if (ret) goto out_free; - } - - if (!nvme_discovery_ctrl(ctrl) && !ctrl->kas) { - dev_err(ctrl->device, - "keep-alive support is mandatory for fabrics\n"); - ret = -EINVAL; - goto out_free; - } } else { ctrl->hmpre = le32_to_cpu(id->hmpre); ctrl->hmmin = le32_to_cpu(id->hmmin); From 2fcd3ab398260a113fd4434b4d72929066c73121 Mon Sep 17 00:00:00 2001 From: Guixin Liu Date: Tue, 5 Dec 2023 15:37:40 +0800 Subject: [PATCH 02/13] nvme-fabrics: check ioccsz and iorcsz Make sure that ioccsz and iorcsz returned by target are correct before use it. Per 2.0a base NVMe spec: I/O Queue Command Capsule Supported Size (IOCCSZ): This field defines the maximum I/O command capsule size in 16 byte units. The minimum value that shall be indicated is 4 corresponding to 64 bytes. Signed-off-by: Guixin Liu Reviewed-by: Sagi Grimberg Reviewed-by: Christoph Hellwig Signed-off-by: Keith Busch --- drivers/nvme/host/core.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 9b38f37c872a..590cd4f097c2 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -3022,6 +3022,20 @@ static int nvme_check_ctrl_fabric_info(struct nvme_ctrl *ctrl, struct nvme_id_ct return -EINVAL; } + if (ctrl->ioccsz < 4) { + dev_err(ctrl->device, + "I/O queue command capsule supported size %d < 4\n", + ctrl->ioccsz); + return -EINVAL; + } + + if (ctrl->iorcsz < 1) { + dev_err(ctrl->device, + "I/O queue response capsule supported size %d < 1\n", + ctrl->iorcsz); + return -EINVAL; + } + return 0; } From 906dbc47b1d540961a2ffddc7a095196d1a39b93 Mon Sep 17 00:00:00 2001 From: Guixin Liu Date: Wed, 13 Dec 2023 14:32:49 +0800 Subject: [PATCH 03/13] nvmet: allow identical cntlid_min and cntlid_max settings When the user wants to restrict to only creating one controller, they can set cntlid_min and cntlid_max to the same value. Signed-off-by: Guixin Liu Reviewed-by: Christoph Hellwig Reviewed-by: Sagi Grimberg Signed-off-by: Keith Busch --- drivers/nvme/target/configfs.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/nvme/target/configfs.c b/drivers/nvme/target/configfs.c index e307a044b1a1..bd514d4c4a5b 100644 --- a/drivers/nvme/target/configfs.c +++ b/drivers/nvme/target/configfs.c @@ -1274,7 +1274,7 @@ static ssize_t nvmet_subsys_attr_cntlid_min_store(struct config_item *item, return -EINVAL; down_write(&nvmet_config_sem); - if (cntlid_min >= to_subsys(item)->cntlid_max) + if (cntlid_min > to_subsys(item)->cntlid_max) goto out_unlock; to_subsys(item)->cntlid_min = cntlid_min; up_write(&nvmet_config_sem); @@ -1304,7 +1304,7 @@ static ssize_t nvmet_subsys_attr_cntlid_max_store(struct config_item *item, return -EINVAL; down_write(&nvmet_config_sem); - if (cntlid_max <= to_subsys(item)->cntlid_min) + if (cntlid_max < to_subsys(item)->cntlid_min) goto out_unlock; to_subsys(item)->cntlid_max = cntlid_max; up_write(&nvmet_config_sem); From 4ba8b3f7d368279d3d3bde788394c7f6b3e0c061 Mon Sep 17 00:00:00 2001 From: Guixin Liu Date: Wed, 13 Dec 2023 14:32:50 +0800 Subject: [PATCH 04/13] nvmet: remove cntlid_min and cntlid_max check in nvmet_alloc_ctrl The cntlid_min and cntlid_max are checked in configfs, don't check again in nvmet_alloc_ctrl(). Signed-off-by: Guixin Liu Reviewed-by: Christoph Hellwig Reviewed-by: Sagi Grimberg Signed-off-by: Keith Busch --- drivers/nvme/target/core.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c index 3935165048e7..d26aa30f8702 100644 --- a/drivers/nvme/target/core.c +++ b/drivers/nvme/target/core.c @@ -1425,9 +1425,6 @@ u16 nvmet_alloc_ctrl(const char *subsysnqn, const char *hostnqn, if (!ctrl->sqs) goto out_free_changed_ns_list; - if (subsys->cntlid_min > subsys->cntlid_max) - goto out_free_sqs; - ret = ida_alloc_range(&cntlid_ida, subsys->cntlid_min, subsys->cntlid_max, GFP_KERNEL); From 9419e71b8d67312dbe267968b2bec0ebc449dc73 Mon Sep 17 00:00:00 2001 From: Daniel Wagner Date: Mon, 18 Dec 2023 17:59:49 +0100 Subject: [PATCH 05/13] nvme: move ns id info to struct nvme_ns_head Move the namesapce info to struct nvme_ns_head, because it's the same for all associated namespaces. Note: with multipathing enabled the PI information is shared between all paths. If a path is using a different PI configuration it will overwrite the previous settings. This is obviously not correct and such configuration will be rejected in future. For the time being we expect a correctly configured storage. Reviewed-by: Christoph Hellwig Reviewed-by: Hannes Reinecke Reviewed-by: Sagi Grimberg Signed-off-by: Daniel Wagner Signed-off-by: Keith Busch --- drivers/nvme/host/core.c | 80 ++++++++++++++++++++------------------- drivers/nvme/host/ioctl.c | 8 ++-- drivers/nvme/host/nvme.h | 28 +++++++------- drivers/nvme/host/rdma.c | 2 +- drivers/nvme/host/zns.c | 17 +++++---- 5 files changed, 69 insertions(+), 66 deletions(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 590cd4f097c2..f9a70c70d95c 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -312,12 +312,12 @@ static void nvme_log_error(struct request *req) struct nvme_request *nr = nvme_req(req); if (ns) { - pr_err_ratelimited("%s: %s(0x%x) @ LBA %llu, %llu blocks, %s (sct 0x%x / sc 0x%x) %s%s\n", + pr_err_ratelimited("%s: %s(0x%x) @ LBA %llu, %u blocks, %s (sct 0x%x / sc 0x%x) %s%s\n", ns->disk ? ns->disk->disk_name : "?", nvme_get_opcode_str(nr->cmd->common.opcode), nr->cmd->common.opcode, - (unsigned long long)nvme_sect_to_lba(ns, blk_rq_pos(req)), - (unsigned long long)blk_rq_bytes(req) >> ns->lba_shift, + nvme_sect_to_lba(ns, blk_rq_pos(req)), + blk_rq_bytes(req) >> ns->head->lba_shift, nvme_get_error_status_str(nr->status), nr->status >> 8 & 7, /* Status Code Type */ nr->status & 0xff, /* Status Code */ @@ -792,7 +792,7 @@ static blk_status_t nvme_setup_discard(struct nvme_ns *ns, struct request *req, if (queue_max_discard_segments(req->q) == 1) { u64 slba = nvme_sect_to_lba(ns, blk_rq_pos(req)); - u32 nlb = blk_rq_sectors(req) >> (ns->lba_shift - 9); + u32 nlb = blk_rq_sectors(req) >> (ns->head->lba_shift - 9); range[0].cattr = cpu_to_le32(0); range[0].nlb = cpu_to_le32(nlb); @@ -801,7 +801,7 @@ static blk_status_t nvme_setup_discard(struct nvme_ns *ns, struct request *req, } else { __rq_for_each_bio(bio, req) { u64 slba = nvme_sect_to_lba(ns, bio->bi_iter.bi_sector); - u32 nlb = bio->bi_iter.bi_size >> ns->lba_shift; + u32 nlb = bio->bi_iter.bi_size >> ns->head->lba_shift; if (n < segments) { range[n].cattr = cpu_to_le32(0); @@ -839,7 +839,7 @@ static void nvme_set_ref_tag(struct nvme_ns *ns, struct nvme_command *cmnd, u64 ref48; /* both rw and write zeroes share the same reftag format */ - switch (ns->guard_type) { + switch (ns->head->guard_type) { case NVME_NVM_NS_16B_GUARD: cmnd->rw.reftag = cpu_to_le32(t10_pi_ref_tag(req)); break; @@ -869,15 +869,16 @@ static inline blk_status_t nvme_setup_write_zeroes(struct nvme_ns *ns, cmnd->write_zeroes.slba = cpu_to_le64(nvme_sect_to_lba(ns, blk_rq_pos(req))); cmnd->write_zeroes.length = - cpu_to_le16((blk_rq_bytes(req) >> ns->lba_shift) - 1); + cpu_to_le16((blk_rq_bytes(req) >> ns->head->lba_shift) - 1); - if (!(req->cmd_flags & REQ_NOUNMAP) && (ns->features & NVME_NS_DEAC)) + if (!(req->cmd_flags & REQ_NOUNMAP) && + (ns->head->features & NVME_NS_DEAC)) cmnd->write_zeroes.control |= cpu_to_le16(NVME_WZ_DEAC); if (nvme_ns_has_pi(ns)) { cmnd->write_zeroes.control |= cpu_to_le16(NVME_RW_PRINFO_PRACT); - switch (ns->pi_type) { + switch (ns->head->pi_type) { case NVME_NS_DPS_PI_TYPE1: case NVME_NS_DPS_PI_TYPE2: nvme_set_ref_tag(ns, cmnd, req); @@ -910,12 +911,13 @@ static inline blk_status_t nvme_setup_rw(struct nvme_ns *ns, cmnd->rw.cdw3 = 0; cmnd->rw.metadata = 0; cmnd->rw.slba = cpu_to_le64(nvme_sect_to_lba(ns, blk_rq_pos(req))); - cmnd->rw.length = cpu_to_le16((blk_rq_bytes(req) >> ns->lba_shift) - 1); + cmnd->rw.length = + cpu_to_le16((blk_rq_bytes(req) >> ns->head->lba_shift) - 1); cmnd->rw.reftag = 0; cmnd->rw.apptag = 0; cmnd->rw.appmask = 0; - if (ns->ms) { + if (ns->head->ms) { /* * If formated with metadata, the block layer always provides a * metadata buffer if CONFIG_BLK_DEV_INTEGRITY is enabled. Else @@ -928,7 +930,7 @@ static inline blk_status_t nvme_setup_rw(struct nvme_ns *ns, control |= NVME_RW_PRINFO_PRACT; } - switch (ns->pi_type) { + switch (ns->head->pi_type) { case NVME_NS_DPS_PI_TYPE3: control |= NVME_RW_PRINFO_PRCHK_GUARD; break; @@ -1663,9 +1665,9 @@ static void nvme_init_integrity(struct gendisk *disk, struct nvme_ns *ns, { struct blk_integrity integrity = { }; - switch (ns->pi_type) { + switch (ns->head->pi_type) { case NVME_NS_DPS_PI_TYPE3: - switch (ns->guard_type) { + switch (ns->head->guard_type) { case NVME_NVM_NS_16B_GUARD: integrity.profile = &t10_pi_type3_crc; integrity.tag_size = sizeof(u16) + sizeof(u32); @@ -1683,7 +1685,7 @@ static void nvme_init_integrity(struct gendisk *disk, struct nvme_ns *ns, break; case NVME_NS_DPS_PI_TYPE1: case NVME_NS_DPS_PI_TYPE2: - switch (ns->guard_type) { + switch (ns->head->guard_type) { case NVME_NVM_NS_16B_GUARD: integrity.profile = &t10_pi_type1_crc; integrity.tag_size = sizeof(u16); @@ -1704,7 +1706,7 @@ static void nvme_init_integrity(struct gendisk *disk, struct nvme_ns *ns, break; } - integrity.tuple_size = ns->ms; + integrity.tuple_size = ns->head->ms; blk_integrity_register(disk, &integrity); blk_queue_max_integrity_segments(disk->queue, max_integrity_segments); } @@ -1763,11 +1765,11 @@ static int nvme_init_ms(struct nvme_ns *ns, struct nvme_id_ns *id) int ret = 0; u32 elbaf; - ns->pi_size = 0; - ns->ms = le16_to_cpu(id->lbaf[lbaf].ms); + ns->head->pi_size = 0; + ns->head->ms = le16_to_cpu(id->lbaf[lbaf].ms); if (!(ctrl->ctratt & NVME_CTRL_ATTR_ELBAS)) { - ns->pi_size = sizeof(struct t10_pi_tuple); - ns->guard_type = NVME_NVM_NS_16B_GUARD; + ns->head->pi_size = sizeof(struct t10_pi_tuple); + ns->head->guard_type = NVME_NVM_NS_16B_GUARD; goto set_pi; } @@ -1790,13 +1792,13 @@ static int nvme_init_ms(struct nvme_ns *ns, struct nvme_id_ns *id) if (nvme_elbaf_sts(elbaf)) goto free_data; - ns->guard_type = nvme_elbaf_guard_type(elbaf); - switch (ns->guard_type) { + ns->head->guard_type = nvme_elbaf_guard_type(elbaf); + switch (ns->head->guard_type) { case NVME_NVM_NS_64B_GUARD: - ns->pi_size = sizeof(struct crc64_pi_tuple); + ns->head->pi_size = sizeof(struct crc64_pi_tuple); break; case NVME_NVM_NS_16B_GUARD: - ns->pi_size = sizeof(struct t10_pi_tuple); + ns->head->pi_size = sizeof(struct t10_pi_tuple); break; default: break; @@ -1805,10 +1807,10 @@ static int nvme_init_ms(struct nvme_ns *ns, struct nvme_id_ns *id) free_data: kfree(nvm); set_pi: - if (ns->pi_size && (first || ns->ms == ns->pi_size)) - ns->pi_type = id->dps & NVME_NS_DPS_PI_MASK; + if (ns->head->pi_size && (first || ns->head->ms == ns->head->pi_size)) + ns->head->pi_type = id->dps & NVME_NS_DPS_PI_MASK; else - ns->pi_type = 0; + ns->head->pi_type = 0; return ret; } @@ -1822,8 +1824,8 @@ static int nvme_configure_metadata(struct nvme_ns *ns, struct nvme_id_ns *id) if (ret) return ret; - ns->features &= ~(NVME_NS_METADATA_SUPPORTED | NVME_NS_EXT_LBAS); - if (!ns->ms || !(ctrl->ops->flags & NVME_F_METADATA_SUPPORTED)) + ns->head->features &= ~(NVME_NS_METADATA_SUPPORTED | NVME_NS_EXT_LBAS); + if (!ns->head->ms || !(ctrl->ops->flags & NVME_F_METADATA_SUPPORTED)) return 0; if (ctrl->ops->flags & NVME_F_FABRICS) { @@ -1835,7 +1837,7 @@ static int nvme_configure_metadata(struct nvme_ns *ns, struct nvme_id_ns *id) if (WARN_ON_ONCE(!(id->flbas & NVME_NS_FLBAS_META_EXT))) return 0; - ns->features |= NVME_NS_EXT_LBAS; + ns->head->features |= NVME_NS_EXT_LBAS; /* * The current fabrics transport drivers support namespace @@ -1847,7 +1849,7 @@ static int nvme_configure_metadata(struct nvme_ns *ns, struct nvme_id_ns *id) * gain the ability to use other metadata formats. */ if (ctrl->max_integrity_segments && nvme_ns_has_pi(ns)) - ns->features |= NVME_NS_METADATA_SUPPORTED; + ns->head->features |= NVME_NS_METADATA_SUPPORTED; } else { /* * For PCIe controllers, we can't easily remap the separate @@ -1856,9 +1858,9 @@ static int nvme_configure_metadata(struct nvme_ns *ns, struct nvme_id_ns *id) * We allow extended LBAs for the passthrough interface, though. */ if (id->flbas & NVME_NS_FLBAS_META_EXT) - ns->features |= NVME_NS_EXT_LBAS; + ns->head->features |= NVME_NS_EXT_LBAS; else - ns->features |= NVME_NS_METADATA_SUPPORTED; + ns->head->features |= NVME_NS_METADATA_SUPPORTED; } return 0; } @@ -1885,14 +1887,14 @@ static void nvme_update_disk_info(struct gendisk *disk, struct nvme_ns *ns, struct nvme_id_ns *id) { sector_t capacity = nvme_lba_to_sect(ns, le64_to_cpu(id->nsze)); - u32 bs = 1U << ns->lba_shift; + u32 bs = 1U << ns->head->lba_shift; u32 atomic_bs, phys_bs, io_opt = 0; /* * The block layer can't support LBA sizes larger than the page size * yet, so catch this early and don't allow block I/O. */ - if (ns->lba_shift > PAGE_SHIFT) { + if (ns->head->lba_shift > PAGE_SHIFT) { capacity = 0; bs = (1 << 9); } @@ -1935,9 +1937,9 @@ static void nvme_update_disk_info(struct gendisk *disk, * I/O to namespaces with metadata except when the namespace supports * PI, as it can strip/insert in that case. */ - if (ns->ms) { + if (ns->head->ms) { if (IS_ENABLED(CONFIG_BLK_DEV_INTEGRITY) && - (ns->features & NVME_NS_METADATA_SUPPORTED)) + (ns->head->features & NVME_NS_METADATA_SUPPORTED)) nvme_init_integrity(disk, ns, ns->ctrl->max_integrity_segments); else if (!nvme_ns_has_pi(ns)) @@ -2031,7 +2033,7 @@ static int nvme_update_ns_info_block(struct nvme_ns *ns, blk_mq_freeze_queue(ns->disk->queue); lbaf = nvme_lbaf_index(id->flbas); - ns->lba_shift = id->lbaf[lbaf].ds; + ns->head->lba_shift = id->lbaf[lbaf].ds; nvme_set_queue_limits(ns->ctrl, ns->queue); ret = nvme_configure_metadata(ns, id); @@ -2057,7 +2059,7 @@ static int nvme_update_ns_info_block(struct nvme_ns *ns, * do not return zeroes. */ if ((id->dlfeat & 0x7) == 0x1 && (id->dlfeat & (1 << 3))) - ns->features |= NVME_NS_DEAC; + ns->head->features |= NVME_NS_DEAC; set_disk_ro(ns->disk, nvme_ns_is_readonly(ns, info)); set_bit(NVME_NS_READY, &ns->flags); blk_mq_unfreeze_queue(ns->disk->queue); diff --git a/drivers/nvme/host/ioctl.c b/drivers/nvme/host/ioctl.c index 32c9bcf491a3..883621748318 100644 --- a/drivers/nvme/host/ioctl.c +++ b/drivers/nvme/host/ioctl.c @@ -224,10 +224,10 @@ static int nvme_submit_io(struct nvme_ns *ns, struct nvme_user_io __user *uio) return -EINVAL; } - length = (io.nblocks + 1) << ns->lba_shift; + length = (io.nblocks + 1) << ns->head->lba_shift; if ((io.control & NVME_RW_PRINFO_PRACT) && - ns->ms == sizeof(struct t10_pi_tuple)) { + ns->head->ms == sizeof(struct t10_pi_tuple)) { /* * Protection information is stripped/inserted by the * controller. @@ -237,11 +237,11 @@ static int nvme_submit_io(struct nvme_ns *ns, struct nvme_user_io __user *uio) meta_len = 0; metadata = NULL; } else { - meta_len = (io.nblocks + 1) * ns->ms; + meta_len = (io.nblocks + 1) * ns->head->ms; metadata = nvme_to_user_ptr(io.metadata); } - if (ns->features & NVME_NS_EXT_LBAS) { + if (ns->head->features & NVME_NS_EXT_LBAS) { length += meta_len; meta_len = 0; } else if (meta_len) { diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index 39a90b7cb125..7b3c93d80eb1 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -446,6 +446,17 @@ struct nvme_ns_head { bool shared; int instance; struct nvme_effects_log *effects; + int lba_shift; + u16 ms; + u16 pi_size; + u16 sgs; + u32 sws; + u8 pi_type; + u8 guard_type; +#ifdef CONFIG_BLK_DEV_ZONED + u64 zsze; +#endif + unsigned long features; struct cdev cdev; struct device cdev_device; @@ -487,17 +498,6 @@ struct nvme_ns { struct kref kref; struct nvme_ns_head *head; - int lba_shift; - u16 ms; - u16 pi_size; - u16 sgs; - u32 sws; - u8 pi_type; - u8 guard_type; -#ifdef CONFIG_BLK_DEV_ZONED - u64 zsze; -#endif - unsigned long features; unsigned long flags; #define NVME_NS_REMOVING 0 #define NVME_NS_ANA_PENDING 2 @@ -514,7 +514,7 @@ struct nvme_ns { /* NVMe ns supports metadata actions by the controller (generate/strip) */ static inline bool nvme_ns_has_pi(struct nvme_ns *ns) { - return ns->pi_type && ns->ms == ns->pi_size; + return ns->head->pi_type && ns->head->ms == ns->head->pi_size; } struct nvme_ctrl_ops { @@ -648,7 +648,7 @@ static inline int nvme_reset_subsystem(struct nvme_ctrl *ctrl) */ static inline u64 nvme_sect_to_lba(struct nvme_ns *ns, sector_t sector) { - return sector >> (ns->lba_shift - SECTOR_SHIFT); + return sector >> (ns->head->lba_shift - SECTOR_SHIFT); } /* @@ -656,7 +656,7 @@ static inline u64 nvme_sect_to_lba(struct nvme_ns *ns, sector_t sector) */ static inline sector_t nvme_lba_to_sect(struct nvme_ns *ns, u64 lba) { - return lba << (ns->lba_shift - SECTOR_SHIFT); + return lba << (ns->head->lba_shift - SECTOR_SHIFT); } /* diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index 6d178d555920..be2a3e5884c0 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -1418,7 +1418,7 @@ static int nvme_rdma_map_sg_pi(struct nvme_rdma_queue *queue, goto mr_put; nvme_rdma_set_sig_attrs(blk_get_integrity(bio->bi_bdev->bd_disk), c, - req->mr->sig_attrs, ns->pi_type); + req->mr->sig_attrs, ns->head->pi_type); nvme_rdma_set_prot_checks(c, &req->mr->sig_attrs->check_mask); ib_update_fast_reg_key(req->mr, ib_inc_rkey(req->mr->rkey)); diff --git a/drivers/nvme/host/zns.c b/drivers/nvme/host/zns.c index ec8557810c21..fa9e8f664ae7 100644 --- a/drivers/nvme/host/zns.c +++ b/drivers/nvme/host/zns.c @@ -11,7 +11,7 @@ int nvme_revalidate_zones(struct nvme_ns *ns) { struct request_queue *q = ns->queue; - blk_queue_chunk_sectors(q, ns->zsze); + blk_queue_chunk_sectors(q, ns->head->zsze); blk_queue_max_zone_append_sectors(q, ns->ctrl->max_zone_append); return blk_revalidate_disk_zones(ns->disk, NULL); @@ -99,11 +99,12 @@ int nvme_update_zone_info(struct nvme_ns *ns, unsigned lbaf) goto free_data; } - ns->zsze = nvme_lba_to_sect(ns, le64_to_cpu(id->lbafe[lbaf].zsze)); - if (!is_power_of_2(ns->zsze)) { + ns->head->zsze = + nvme_lba_to_sect(ns, le64_to_cpu(id->lbafe[lbaf].zsze)); + if (!is_power_of_2(ns->head->zsze)) { dev_warn(ns->ctrl->device, "invalid zone size:%llu for namespace:%u\n", - ns->zsze, ns->head->ns_id); + ns->head->zsze, ns->head->ns_id); status = -ENODEV; goto free_data; } @@ -128,7 +129,7 @@ static void *nvme_zns_alloc_report_buffer(struct nvme_ns *ns, sizeof(struct nvme_zone_descriptor); nr_zones = min_t(unsigned int, nr_zones, - get_capacity(ns->disk) >> ilog2(ns->zsze)); + get_capacity(ns->disk) >> ilog2(ns->head->zsze)); bufsize = sizeof(struct nvme_zone_report) + nr_zones * sizeof(struct nvme_zone_descriptor); @@ -162,7 +163,7 @@ static int nvme_zone_parse_entry(struct nvme_ns *ns, zone.type = BLK_ZONE_TYPE_SEQWRITE_REQ; zone.cond = entry->zs >> 4; - zone.len = ns->zsze; + zone.len = ns->head->zsze; zone.capacity = nvme_lba_to_sect(ns, le64_to_cpu(entry->zcap)); zone.start = nvme_lba_to_sect(ns, le64_to_cpu(entry->zslba)); if (zone.cond == BLK_ZONE_COND_FULL) @@ -196,7 +197,7 @@ int nvme_ns_report_zones(struct nvme_ns *ns, sector_t sector, c.zmr.zrasf = NVME_ZRASF_ZONE_REPORT_ALL; c.zmr.pr = NVME_REPORT_ZONE_PARTIAL; - sector &= ~(ns->zsze - 1); + sector &= ~(ns->head->zsze - 1); while (zone_idx < nr_zones && sector < get_capacity(ns->disk)) { memset(report, 0, buflen); @@ -220,7 +221,7 @@ int nvme_ns_report_zones(struct nvme_ns *ns, sector_t sector, zone_idx++; } - sector += ns->zsze * nz; + sector += ns->head->zsze * nz; } if (zone_idx > 0) From 0372dd4e36171708f90192d5d4a3dcab7159df09 Mon Sep 17 00:00:00 2001 From: Daniel Wagner Date: Mon, 18 Dec 2023 17:59:50 +0100 Subject: [PATCH 06/13] nvme: refactor ns info helpers Pass in the nvme_ns_head pointer directly. This reduces the necessity on the caller side have the nvme_ns data structure present. Thus we can refactor the caller side in the next step as well. Reviewed-by: Christoph Hellwig Reviewed-by: Sagi Grimberg Reviewed-by: Hannes Reinecke Signed-off-by: Daniel Wagner Signed-off-by: Keith Busch --- drivers/nvme/host/core.c | 36 +++++++++++++++++++++--------------- drivers/nvme/host/nvme.h | 12 ++++++------ drivers/nvme/host/rdma.c | 2 +- drivers/nvme/host/zns.c | 12 ++++++------ 4 files changed, 34 insertions(+), 28 deletions(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index f9a70c70d95c..7be1db8caff4 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -316,7 +316,7 @@ static void nvme_log_error(struct request *req) ns->disk ? ns->disk->disk_name : "?", nvme_get_opcode_str(nr->cmd->common.opcode), nr->cmd->common.opcode, - nvme_sect_to_lba(ns, blk_rq_pos(req)), + nvme_sect_to_lba(ns->head, blk_rq_pos(req)), blk_rq_bytes(req) >> ns->head->lba_shift, nvme_get_error_status_str(nr->status), nr->status >> 8 & 7, /* Status Code Type */ @@ -372,9 +372,12 @@ static inline enum nvme_disposition nvme_decide_disposition(struct request *req) static inline void nvme_end_req_zoned(struct request *req) { if (IS_ENABLED(CONFIG_BLK_DEV_ZONED) && - req_op(req) == REQ_OP_ZONE_APPEND) - req->__sector = nvme_lba_to_sect(req->q->queuedata, + req_op(req) == REQ_OP_ZONE_APPEND) { + struct nvme_ns *ns = req->q->queuedata; + + req->__sector = nvme_lba_to_sect(ns->head, le64_to_cpu(nvme_req(req)->result.u64)); + } } static inline void nvme_end_req(struct request *req) @@ -791,7 +794,7 @@ static blk_status_t nvme_setup_discard(struct nvme_ns *ns, struct request *req, } if (queue_max_discard_segments(req->q) == 1) { - u64 slba = nvme_sect_to_lba(ns, blk_rq_pos(req)); + u64 slba = nvme_sect_to_lba(ns->head, blk_rq_pos(req)); u32 nlb = blk_rq_sectors(req) >> (ns->head->lba_shift - 9); range[0].cattr = cpu_to_le32(0); @@ -800,7 +803,8 @@ static blk_status_t nvme_setup_discard(struct nvme_ns *ns, struct request *req, n = 1; } else { __rq_for_each_bio(bio, req) { - u64 slba = nvme_sect_to_lba(ns, bio->bi_iter.bi_sector); + u64 slba = nvme_sect_to_lba(ns->head, + bio->bi_iter.bi_sector); u32 nlb = bio->bi_iter.bi_size >> ns->head->lba_shift; if (n < segments) { @@ -867,7 +871,7 @@ static inline blk_status_t nvme_setup_write_zeroes(struct nvme_ns *ns, cmnd->write_zeroes.opcode = nvme_cmd_write_zeroes; cmnd->write_zeroes.nsid = cpu_to_le32(ns->head->ns_id); cmnd->write_zeroes.slba = - cpu_to_le64(nvme_sect_to_lba(ns, blk_rq_pos(req))); + cpu_to_le64(nvme_sect_to_lba(ns->head, blk_rq_pos(req))); cmnd->write_zeroes.length = cpu_to_le16((blk_rq_bytes(req) >> ns->head->lba_shift) - 1); @@ -875,7 +879,7 @@ static inline blk_status_t nvme_setup_write_zeroes(struct nvme_ns *ns, (ns->head->features & NVME_NS_DEAC)) cmnd->write_zeroes.control |= cpu_to_le16(NVME_WZ_DEAC); - if (nvme_ns_has_pi(ns)) { + if (nvme_ns_has_pi(ns->head)) { cmnd->write_zeroes.control |= cpu_to_le16(NVME_RW_PRINFO_PRACT); switch (ns->head->pi_type) { @@ -910,7 +914,8 @@ static inline blk_status_t nvme_setup_rw(struct nvme_ns *ns, cmnd->rw.cdw2 = 0; cmnd->rw.cdw3 = 0; cmnd->rw.metadata = 0; - cmnd->rw.slba = cpu_to_le64(nvme_sect_to_lba(ns, blk_rq_pos(req))); + cmnd->rw.slba = + cpu_to_le64(nvme_sect_to_lba(ns->head, blk_rq_pos(req))); cmnd->rw.length = cpu_to_le16((blk_rq_bytes(req) >> ns->head->lba_shift) - 1); cmnd->rw.reftag = 0; @@ -925,7 +930,7 @@ static inline blk_status_t nvme_setup_rw(struct nvme_ns *ns, * namespace capacity to zero to prevent any I/O. */ if (!blk_integrity_rq(req)) { - if (WARN_ON_ONCE(!nvme_ns_has_pi(ns))) + if (WARN_ON_ONCE(!nvme_ns_has_pi(ns->head))) return BLK_STS_NOTSUPP; control |= NVME_RW_PRINFO_PRACT; } @@ -1723,8 +1728,9 @@ static void nvme_config_discard(struct gendisk *disk, struct nvme_ns *ns) struct request_queue *queue = disk->queue; u32 size = queue_logical_block_size(queue); - if (ctrl->dmrsl && ctrl->dmrsl <= nvme_sect_to_lba(ns, UINT_MAX)) - ctrl->max_discard_sectors = nvme_lba_to_sect(ns, ctrl->dmrsl); + if (ctrl->dmrsl && ctrl->dmrsl <= nvme_sect_to_lba(ns->head, UINT_MAX)) + ctrl->max_discard_sectors = + nvme_lba_to_sect(ns->head, ctrl->dmrsl); if (ctrl->max_discard_sectors == 0) { blk_queue_max_discard_sectors(queue, 0); @@ -1848,7 +1854,7 @@ static int nvme_configure_metadata(struct nvme_ns *ns, struct nvme_id_ns *id) * Note, this check will need to be modified if any drivers * gain the ability to use other metadata formats. */ - if (ctrl->max_integrity_segments && nvme_ns_has_pi(ns)) + if (ctrl->max_integrity_segments && nvme_ns_has_pi(ns->head)) ns->head->features |= NVME_NS_METADATA_SUPPORTED; } else { /* @@ -1886,7 +1892,7 @@ static void nvme_set_queue_limits(struct nvme_ctrl *ctrl, static void nvme_update_disk_info(struct gendisk *disk, struct nvme_ns *ns, struct nvme_id_ns *id) { - sector_t capacity = nvme_lba_to_sect(ns, le64_to_cpu(id->nsze)); + sector_t capacity = nvme_lba_to_sect(ns->head, le64_to_cpu(id->nsze)); u32 bs = 1U << ns->head->lba_shift; u32 atomic_bs, phys_bs, io_opt = 0; @@ -1942,7 +1948,7 @@ static void nvme_update_disk_info(struct gendisk *disk, (ns->head->features & NVME_NS_METADATA_SUPPORTED)) nvme_init_integrity(disk, ns, ns->ctrl->max_integrity_segments); - else if (!nvme_ns_has_pi(ns)) + else if (!nvme_ns_has_pi(ns->head)) capacity = 0; } @@ -1973,7 +1979,7 @@ static void nvme_set_chunk_sectors(struct nvme_ns *ns, struct nvme_id_ns *id) is_power_of_2(ctrl->max_hw_sectors)) iob = ctrl->max_hw_sectors; else - iob = nvme_lba_to_sect(ns, le16_to_cpu(id->noiob)); + iob = nvme_lba_to_sect(ns->head, le16_to_cpu(id->noiob)); if (!iob) return; diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index 7b3c93d80eb1..9e5b9e779fbd 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -512,9 +512,9 @@ struct nvme_ns { }; /* NVMe ns supports metadata actions by the controller (generate/strip) */ -static inline bool nvme_ns_has_pi(struct nvme_ns *ns) +static inline bool nvme_ns_has_pi(struct nvme_ns_head *head) { - return ns->head->pi_type && ns->head->ms == ns->head->pi_size; + return head->pi_type && head->ms == head->pi_size; } struct nvme_ctrl_ops { @@ -646,17 +646,17 @@ static inline int nvme_reset_subsystem(struct nvme_ctrl *ctrl) /* * Convert a 512B sector number to a device logical block number. */ -static inline u64 nvme_sect_to_lba(struct nvme_ns *ns, sector_t sector) +static inline u64 nvme_sect_to_lba(struct nvme_ns_head *head, sector_t sector) { - return sector >> (ns->head->lba_shift - SECTOR_SHIFT); + return sector >> (head->lba_shift - SECTOR_SHIFT); } /* * Convert a device logical block number to a 512B sector number. */ -static inline sector_t nvme_lba_to_sect(struct nvme_ns *ns, u64 lba) +static inline sector_t nvme_lba_to_sect(struct nvme_ns_head *head, u64 lba) { - return lba << (ns->head->lba_shift - SECTOR_SHIFT); + return lba << (head->lba_shift - SECTOR_SHIFT); } /* diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index be2a3e5884c0..bc90ec3c51b0 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -2012,7 +2012,7 @@ static blk_status_t nvme_rdma_queue_rq(struct blk_mq_hw_ctx *hctx, queue->pi_support && (c->common.opcode == nvme_cmd_write || c->common.opcode == nvme_cmd_read) && - nvme_ns_has_pi(ns)) + nvme_ns_has_pi(ns->head)) req->use_sig_mr = true; else req->use_sig_mr = false; diff --git a/drivers/nvme/host/zns.c b/drivers/nvme/host/zns.c index fa9e8f664ae7..ded52ab05424 100644 --- a/drivers/nvme/host/zns.c +++ b/drivers/nvme/host/zns.c @@ -100,7 +100,7 @@ int nvme_update_zone_info(struct nvme_ns *ns, unsigned lbaf) } ns->head->zsze = - nvme_lba_to_sect(ns, le64_to_cpu(id->lbafe[lbaf].zsze)); + nvme_lba_to_sect(ns->head, le64_to_cpu(id->lbafe[lbaf].zsze)); if (!is_power_of_2(ns->head->zsze)) { dev_warn(ns->ctrl->device, "invalid zone size:%llu for namespace:%u\n", @@ -164,12 +164,12 @@ static int nvme_zone_parse_entry(struct nvme_ns *ns, zone.type = BLK_ZONE_TYPE_SEQWRITE_REQ; zone.cond = entry->zs >> 4; zone.len = ns->head->zsze; - zone.capacity = nvme_lba_to_sect(ns, le64_to_cpu(entry->zcap)); - zone.start = nvme_lba_to_sect(ns, le64_to_cpu(entry->zslba)); + zone.capacity = nvme_lba_to_sect(ns->head, le64_to_cpu(entry->zcap)); + zone.start = nvme_lba_to_sect(ns->head, le64_to_cpu(entry->zslba)); if (zone.cond == BLK_ZONE_COND_FULL) zone.wp = zone.start + zone.len; else - zone.wp = nvme_lba_to_sect(ns, le64_to_cpu(entry->wp)); + zone.wp = nvme_lba_to_sect(ns->head, le64_to_cpu(entry->wp)); return cb(&zone, idx, data); } @@ -201,7 +201,7 @@ int nvme_ns_report_zones(struct nvme_ns *ns, sector_t sector, while (zone_idx < nr_zones && sector < get_capacity(ns->disk)) { memset(report, 0, buflen); - c.zmr.slba = cpu_to_le64(nvme_sect_to_lba(ns, sector)); + c.zmr.slba = cpu_to_le64(nvme_sect_to_lba(ns->head, sector)); ret = nvme_submit_sync_cmd(ns->queue, &c, report, buflen); if (ret) { if (ret > 0) @@ -240,7 +240,7 @@ blk_status_t nvme_setup_zone_mgmt_send(struct nvme_ns *ns, struct request *req, c->zms.opcode = nvme_cmd_zone_mgmt_send; c->zms.nsid = cpu_to_le32(ns->head->ns_id); - c->zms.slba = cpu_to_le64(nvme_sect_to_lba(ns, blk_rq_pos(req))); + c->zms.slba = cpu_to_le64(nvme_sect_to_lba(ns->head, blk_rq_pos(req))); c->zms.zsa = action; if (req_op(req) == REQ_OP_ZONE_RESET_ALL) From d386aedc94efe249b0071b0ad969a6bce2e505bd Mon Sep 17 00:00:00 2001 From: Daniel Wagner Date: Mon, 18 Dec 2023 17:59:51 +0100 Subject: [PATCH 07/13] nvme: refactor ns info setup function Use nvme_ns_head instead of nvme_ns where possible. This reduces the coupling between the different data structures. Reviewed-by: Christoph Hellwig Reviewed-by: Sagi Grimberg Reviewed-by: Hannes Reinecke Signed-off-by: Daniel Wagner Signed-off-by: Keith Busch --- drivers/nvme/host/core.c | 106 +++++++++++++++++++-------------------- drivers/nvme/host/zns.c | 16 +++--- 2 files changed, 62 insertions(+), 60 deletions(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 7be1db8caff4..e3e997a437d9 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -1665,14 +1665,14 @@ int nvme_getgeo(struct block_device *bdev, struct hd_geometry *geo) } #ifdef CONFIG_BLK_DEV_INTEGRITY -static void nvme_init_integrity(struct gendisk *disk, struct nvme_ns *ns, - u32 max_integrity_segments) +static void nvme_init_integrity(struct gendisk *disk, + struct nvme_ns_head *head, u32 max_integrity_segments) { struct blk_integrity integrity = { }; - switch (ns->head->pi_type) { + switch (head->pi_type) { case NVME_NS_DPS_PI_TYPE3: - switch (ns->head->guard_type) { + switch (head->guard_type) { case NVME_NVM_NS_16B_GUARD: integrity.profile = &t10_pi_type3_crc; integrity.tag_size = sizeof(u16) + sizeof(u32); @@ -1690,7 +1690,7 @@ static void nvme_init_integrity(struct gendisk *disk, struct nvme_ns *ns, break; case NVME_NS_DPS_PI_TYPE1: case NVME_NS_DPS_PI_TYPE2: - switch (ns->head->guard_type) { + switch (head->guard_type) { case NVME_NVM_NS_16B_GUARD: integrity.profile = &t10_pi_type1_crc; integrity.tag_size = sizeof(u16); @@ -1711,26 +1711,26 @@ static void nvme_init_integrity(struct gendisk *disk, struct nvme_ns *ns, break; } - integrity.tuple_size = ns->head->ms; + integrity.tuple_size = head->ms; blk_integrity_register(disk, &integrity); blk_queue_max_integrity_segments(disk->queue, max_integrity_segments); } #else -static void nvme_init_integrity(struct gendisk *disk, struct nvme_ns *ns, - u32 max_integrity_segments) +static void nvme_init_integrity(struct gendisk *disk, + struct nvme_ns_head *head, u32 max_integrity_segments) { } #endif /* CONFIG_BLK_DEV_INTEGRITY */ -static void nvme_config_discard(struct gendisk *disk, struct nvme_ns *ns) +static void nvme_config_discard(struct nvme_ctrl *ctrl, struct gendisk *disk, + struct nvme_ns_head *head) { - struct nvme_ctrl *ctrl = ns->ctrl; struct request_queue *queue = disk->queue; u32 size = queue_logical_block_size(queue); - if (ctrl->dmrsl && ctrl->dmrsl <= nvme_sect_to_lba(ns->head, UINT_MAX)) + if (ctrl->dmrsl && ctrl->dmrsl <= nvme_sect_to_lba(head, UINT_MAX)) ctrl->max_discard_sectors = - nvme_lba_to_sect(ns->head, ctrl->dmrsl); + nvme_lba_to_sect(head, ctrl->dmrsl); if (ctrl->max_discard_sectors == 0) { blk_queue_max_discard_sectors(queue, 0); @@ -1761,21 +1761,21 @@ static bool nvme_ns_ids_equal(struct nvme_ns_ids *a, struct nvme_ns_ids *b) a->csi == b->csi; } -static int nvme_init_ms(struct nvme_ns *ns, struct nvme_id_ns *id) +static int nvme_init_ms(struct nvme_ctrl *ctrl, struct nvme_ns_head *head, + struct nvme_id_ns *id) { bool first = id->dps & NVME_NS_DPS_PI_FIRST; unsigned lbaf = nvme_lbaf_index(id->flbas); - struct nvme_ctrl *ctrl = ns->ctrl; struct nvme_command c = { }; struct nvme_id_ns_nvm *nvm; int ret = 0; u32 elbaf; - ns->head->pi_size = 0; - ns->head->ms = le16_to_cpu(id->lbaf[lbaf].ms); + head->pi_size = 0; + head->ms = le16_to_cpu(id->lbaf[lbaf].ms); if (!(ctrl->ctratt & NVME_CTRL_ATTR_ELBAS)) { - ns->head->pi_size = sizeof(struct t10_pi_tuple); - ns->head->guard_type = NVME_NVM_NS_16B_GUARD; + head->pi_size = sizeof(struct t10_pi_tuple); + head->guard_type = NVME_NVM_NS_16B_GUARD; goto set_pi; } @@ -1784,11 +1784,11 @@ static int nvme_init_ms(struct nvme_ns *ns, struct nvme_id_ns *id) return -ENOMEM; c.identify.opcode = nvme_admin_identify; - c.identify.nsid = cpu_to_le32(ns->head->ns_id); + c.identify.nsid = cpu_to_le32(head->ns_id); c.identify.cns = NVME_ID_CNS_CS_NS; c.identify.csi = NVME_CSI_NVM; - ret = nvme_submit_sync_cmd(ns->ctrl->admin_q, &c, nvm, sizeof(*nvm)); + ret = nvme_submit_sync_cmd(ctrl->admin_q, &c, nvm, sizeof(*nvm)); if (ret) goto free_data; @@ -1798,13 +1798,13 @@ static int nvme_init_ms(struct nvme_ns *ns, struct nvme_id_ns *id) if (nvme_elbaf_sts(elbaf)) goto free_data; - ns->head->guard_type = nvme_elbaf_guard_type(elbaf); - switch (ns->head->guard_type) { + head->guard_type = nvme_elbaf_guard_type(elbaf); + switch (head->guard_type) { case NVME_NVM_NS_64B_GUARD: - ns->head->pi_size = sizeof(struct crc64_pi_tuple); + head->pi_size = sizeof(struct crc64_pi_tuple); break; case NVME_NVM_NS_16B_GUARD: - ns->head->pi_size = sizeof(struct t10_pi_tuple); + head->pi_size = sizeof(struct t10_pi_tuple); break; default: break; @@ -1813,25 +1813,25 @@ static int nvme_init_ms(struct nvme_ns *ns, struct nvme_id_ns *id) free_data: kfree(nvm); set_pi: - if (ns->head->pi_size && (first || ns->head->ms == ns->head->pi_size)) - ns->head->pi_type = id->dps & NVME_NS_DPS_PI_MASK; + if (head->pi_size && (first || head->ms == head->pi_size)) + head->pi_type = id->dps & NVME_NS_DPS_PI_MASK; else - ns->head->pi_type = 0; + head->pi_type = 0; return ret; } -static int nvme_configure_metadata(struct nvme_ns *ns, struct nvme_id_ns *id) +static int nvme_configure_metadata(struct nvme_ctrl *ctrl, + struct nvme_ns_head *head, struct nvme_id_ns *id) { - struct nvme_ctrl *ctrl = ns->ctrl; int ret; - ret = nvme_init_ms(ns, id); + ret = nvme_init_ms(ctrl, head, id); if (ret) return ret; - ns->head->features &= ~(NVME_NS_METADATA_SUPPORTED | NVME_NS_EXT_LBAS); - if (!ns->head->ms || !(ctrl->ops->flags & NVME_F_METADATA_SUPPORTED)) + head->features &= ~(NVME_NS_METADATA_SUPPORTED | NVME_NS_EXT_LBAS); + if (!head->ms || !(ctrl->ops->flags & NVME_F_METADATA_SUPPORTED)) return 0; if (ctrl->ops->flags & NVME_F_FABRICS) { @@ -1843,7 +1843,7 @@ static int nvme_configure_metadata(struct nvme_ns *ns, struct nvme_id_ns *id) if (WARN_ON_ONCE(!(id->flbas & NVME_NS_FLBAS_META_EXT))) return 0; - ns->head->features |= NVME_NS_EXT_LBAS; + head->features |= NVME_NS_EXT_LBAS; /* * The current fabrics transport drivers support namespace @@ -1854,8 +1854,8 @@ static int nvme_configure_metadata(struct nvme_ns *ns, struct nvme_id_ns *id) * Note, this check will need to be modified if any drivers * gain the ability to use other metadata formats. */ - if (ctrl->max_integrity_segments && nvme_ns_has_pi(ns->head)) - ns->head->features |= NVME_NS_METADATA_SUPPORTED; + if (ctrl->max_integrity_segments && nvme_ns_has_pi(head)) + head->features |= NVME_NS_METADATA_SUPPORTED; } else { /* * For PCIe controllers, we can't easily remap the separate @@ -1864,9 +1864,9 @@ static int nvme_configure_metadata(struct nvme_ns *ns, struct nvme_id_ns *id) * We allow extended LBAs for the passthrough interface, though. */ if (id->flbas & NVME_NS_FLBAS_META_EXT) - ns->head->features |= NVME_NS_EXT_LBAS; + head->features |= NVME_NS_EXT_LBAS; else - ns->head->features |= NVME_NS_METADATA_SUPPORTED; + head->features |= NVME_NS_METADATA_SUPPORTED; } return 0; } @@ -1889,18 +1889,18 @@ static void nvme_set_queue_limits(struct nvme_ctrl *ctrl, blk_queue_write_cache(q, vwc, vwc); } -static void nvme_update_disk_info(struct gendisk *disk, - struct nvme_ns *ns, struct nvme_id_ns *id) +static void nvme_update_disk_info(struct nvme_ctrl *ctrl, struct gendisk *disk, + struct nvme_ns_head *head, struct nvme_id_ns *id) { - sector_t capacity = nvme_lba_to_sect(ns->head, le64_to_cpu(id->nsze)); - u32 bs = 1U << ns->head->lba_shift; + sector_t capacity = nvme_lba_to_sect(head, le64_to_cpu(id->nsze)); + u32 bs = 1U << head->lba_shift; u32 atomic_bs, phys_bs, io_opt = 0; /* * The block layer can't support LBA sizes larger than the page size * yet, so catch this early and don't allow block I/O. */ - if (ns->head->lba_shift > PAGE_SHIFT) { + if (head->lba_shift > PAGE_SHIFT) { capacity = 0; bs = (1 << 9); } @@ -1917,7 +1917,7 @@ static void nvme_update_disk_info(struct gendisk *disk, if (id->nsfeat & NVME_NS_FEAT_ATOMICS && id->nawupf) atomic_bs = (1 + le16_to_cpu(id->nawupf)) * bs; else - atomic_bs = (1 + ns->ctrl->subsys->awupf) * bs; + atomic_bs = (1 + ctrl->subsys->awupf) * bs; } if (id->nsfeat & NVME_NS_FEAT_IO_OPT) { @@ -1943,20 +1943,20 @@ static void nvme_update_disk_info(struct gendisk *disk, * I/O to namespaces with metadata except when the namespace supports * PI, as it can strip/insert in that case. */ - if (ns->head->ms) { + if (head->ms) { if (IS_ENABLED(CONFIG_BLK_DEV_INTEGRITY) && - (ns->head->features & NVME_NS_METADATA_SUPPORTED)) - nvme_init_integrity(disk, ns, - ns->ctrl->max_integrity_segments); - else if (!nvme_ns_has_pi(ns->head)) + (head->features & NVME_NS_METADATA_SUPPORTED)) + nvme_init_integrity(disk, head, + ctrl->max_integrity_segments); + else if (!nvme_ns_has_pi(head)) capacity = 0; } set_capacity_and_notify(disk, capacity); - nvme_config_discard(disk, ns); + nvme_config_discard(ctrl, disk, head); blk_queue_max_write_zeroes_sectors(disk->queue, - ns->ctrl->max_zeroes_sectors); + ctrl->max_zeroes_sectors); } static bool nvme_ns_is_readonly(struct nvme_ns *ns, struct nvme_ns_info *info) @@ -2042,13 +2042,13 @@ static int nvme_update_ns_info_block(struct nvme_ns *ns, ns->head->lba_shift = id->lbaf[lbaf].ds; nvme_set_queue_limits(ns->ctrl, ns->queue); - ret = nvme_configure_metadata(ns, id); + ret = nvme_configure_metadata(ns->ctrl, ns->head, id); if (ret < 0) { blk_mq_unfreeze_queue(ns->disk->queue); goto out; } nvme_set_chunk_sectors(ns, id); - nvme_update_disk_info(ns->disk, ns, id); + nvme_update_disk_info(ns->ctrl, ns->disk, ns->head, id); if (ns->head->ids.csi == NVME_CSI_ZNS) { ret = nvme_update_zone_info(ns, lbaf); @@ -2078,7 +2078,7 @@ static int nvme_update_ns_info_block(struct nvme_ns *ns, if (nvme_ns_head_multipath(ns->head)) { blk_mq_freeze_queue(ns->head->disk->queue); - nvme_update_disk_info(ns->head->disk, ns, id); + nvme_update_disk_info(ns->ctrl, ns->head->disk, ns->head, id); set_disk_ro(ns->head->disk, nvme_ns_is_readonly(ns, info)); nvme_mpath_revalidate_paths(ns); blk_stack_limits(&ns->head->disk->queue->limits, diff --git a/drivers/nvme/host/zns.c b/drivers/nvme/host/zns.c index ded52ab05424..56b27aabcad9 100644 --- a/drivers/nvme/host/zns.c +++ b/drivers/nvme/host/zns.c @@ -148,7 +148,8 @@ static void *nvme_zns_alloc_report_buffer(struct nvme_ns *ns, return NULL; } -static int nvme_zone_parse_entry(struct nvme_ns *ns, +static int nvme_zone_parse_entry(struct nvme_ctrl *ctrl, + struct nvme_ns_head *head, struct nvme_zone_descriptor *entry, unsigned int idx, report_zones_cb cb, void *data) @@ -156,20 +157,20 @@ static int nvme_zone_parse_entry(struct nvme_ns *ns, struct blk_zone zone = { }; if ((entry->zt & 0xf) != NVME_ZONE_TYPE_SEQWRITE_REQ) { - dev_err(ns->ctrl->device, "invalid zone type %#x\n", + dev_err(ctrl->device, "invalid zone type %#x\n", entry->zt); return -EINVAL; } zone.type = BLK_ZONE_TYPE_SEQWRITE_REQ; zone.cond = entry->zs >> 4; - zone.len = ns->head->zsze; - zone.capacity = nvme_lba_to_sect(ns->head, le64_to_cpu(entry->zcap)); - zone.start = nvme_lba_to_sect(ns->head, le64_to_cpu(entry->zslba)); + zone.len = head->zsze; + zone.capacity = nvme_lba_to_sect(head, le64_to_cpu(entry->zcap)); + zone.start = nvme_lba_to_sect(head, le64_to_cpu(entry->zslba)); if (zone.cond == BLK_ZONE_COND_FULL) zone.wp = zone.start + zone.len; else - zone.wp = nvme_lba_to_sect(ns->head, le64_to_cpu(entry->wp)); + zone.wp = nvme_lba_to_sect(head, le64_to_cpu(entry->wp)); return cb(&zone, idx, data); } @@ -214,7 +215,8 @@ int nvme_ns_report_zones(struct nvme_ns *ns, sector_t sector, break; for (i = 0; i < nz && zone_idx < nr_zones; i++) { - ret = nvme_zone_parse_entry(ns, &report->entries[i], + ret = nvme_zone_parse_entry(ns->ctrl, ns->head, + &report->entries[i], zone_idx, cb, data); if (ret) goto out_free; From 83ac678e599f0cee4056594111612946a381fa0b Mon Sep 17 00:00:00 2001 From: Daniel Wagner Date: Mon, 18 Dec 2023 17:59:52 +0100 Subject: [PATCH 08/13] nvme: rename ns attribute group Drop the 'id' part of the attribute group name because we want to expose non 'id' related attributes via the ns attribute group. Reviewed-by: Christoph Hellwig Reviewed-by: Sagi Grimberg Reviewed-by: Hannes Reinecke Signed-off-by: Daniel Wagner Signed-off-by: Keith Busch --- drivers/nvme/host/core.c | 2 +- drivers/nvme/host/multipath.c | 2 +- drivers/nvme/host/nvme.h | 2 +- drivers/nvme/host/sysfs.c | 14 +++++++------- 4 files changed, 10 insertions(+), 10 deletions(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index e3e997a437d9..ba738ae83cba 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -3671,7 +3671,7 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, struct nvme_ns_info *info) up_write(&ctrl->namespaces_rwsem); nvme_get_ctrl(ctrl); - if (device_add_disk(ctrl->device, ns->disk, nvme_ns_id_attr_groups)) + if (device_add_disk(ctrl->device, ns->disk, nvme_ns_attr_groups)) goto out_cleanup_ns_from_list; if (!nvme_ns_head_multipath(ns->head)) diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c index 0a88d7bdc5e3..2dd4137a08b2 100644 --- a/drivers/nvme/host/multipath.c +++ b/drivers/nvme/host/multipath.c @@ -579,7 +579,7 @@ static void nvme_mpath_set_live(struct nvme_ns *ns) */ if (!test_and_set_bit(NVME_NSHEAD_DISK_LIVE, &head->flags)) { rc = device_add_disk(&head->subsys->dev, head->disk, - nvme_ns_id_attr_groups); + nvme_ns_attr_groups); if (rc) { clear_bit(NVME_NSHEAD_DISK_LIVE, &ns->flags); return; diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index 9e5b9e779fbd..919115916449 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -865,7 +865,7 @@ int nvme_ns_head_chr_uring_cmd(struct io_uring_cmd *ioucmd, int nvme_getgeo(struct block_device *bdev, struct hd_geometry *geo); int nvme_dev_uring_cmd(struct io_uring_cmd *ioucmd, unsigned int issue_flags); -extern const struct attribute_group *nvme_ns_id_attr_groups[]; +extern const struct attribute_group *nvme_ns_attr_groups[]; extern const struct pr_ops nvme_pr_ops; extern const struct block_device_operations nvme_ns_head_ops; extern const struct attribute_group nvme_dev_attrs_group; diff --git a/drivers/nvme/host/sysfs.c b/drivers/nvme/host/sysfs.c index c6b7fbd4d34d..d682d0a667a0 100644 --- a/drivers/nvme/host/sysfs.c +++ b/drivers/nvme/host/sysfs.c @@ -114,7 +114,7 @@ static ssize_t nsid_show(struct device *dev, struct device_attribute *attr, } static DEVICE_ATTR_RO(nsid); -static struct attribute *nvme_ns_id_attrs[] = { +static struct attribute *nvme_ns_attrs[] = { &dev_attr_wwid.attr, &dev_attr_uuid.attr, &dev_attr_nguid.attr, @@ -127,7 +127,7 @@ static struct attribute *nvme_ns_id_attrs[] = { NULL, }; -static umode_t nvme_ns_id_attrs_are_visible(struct kobject *kobj, +static umode_t nvme_ns_attrs_are_visible(struct kobject *kobj, struct attribute *a, int n) { struct device *dev = container_of(kobj, struct device, kobj); @@ -157,13 +157,13 @@ static umode_t nvme_ns_id_attrs_are_visible(struct kobject *kobj, return a->mode; } -static const struct attribute_group nvme_ns_id_attr_group = { - .attrs = nvme_ns_id_attrs, - .is_visible = nvme_ns_id_attrs_are_visible, +static const struct attribute_group nvme_ns_attr_group = { + .attrs = nvme_ns_attrs, + .is_visible = nvme_ns_attrs_are_visible, }; -const struct attribute_group *nvme_ns_id_attr_groups[] = { - &nvme_ns_id_attr_group, +const struct attribute_group *nvme_ns_attr_groups[] = { + &nvme_ns_attr_group, NULL, }; From a1a825ab6a60380240ca136596732fdb80bad87a Mon Sep 17 00:00:00 2001 From: Daniel Wagner Date: Mon, 18 Dec 2023 17:59:53 +0100 Subject: [PATCH 09/13] nvme: add csi, ms and nuse to sysfs libnvme is using the sysfs for enumarating the nvme resources. Though there are few missing attritbutes in the sysfs. For these libnvme issues commands during discovering. As the kernel already knows all these attributes and we would like to avoid libnvme to issue commands all the time, expose these missing attributes. The nuse value is updated on request because the nuse is a volatile value. Since any user can read the sysfs attribute, a very simple rate limit is added (update once every 5 seconds). A more sophisticated update strategy can be added later if there is actually a need for it. Reviewed-by: Christoph Hellwig Reviewed-by: Sagi Grimberg Reviewed-by: Hannes Reinecke Signed-off-by: Daniel Wagner Signed-off-by: Keith Busch --- drivers/nvme/host/core.c | 6 ++- drivers/nvme/host/nvme.h | 6 +++ drivers/nvme/host/sysfs.c | 85 +++++++++++++++++++++++++++++++++++++++ 3 files changed, 96 insertions(+), 1 deletion(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index ba738ae83cba..22dae2a26ba4 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -20,6 +20,7 @@ #include #include #include +#include #include #include "nvme.h" @@ -1449,7 +1450,7 @@ static int nvme_identify_ns_descs(struct nvme_ctrl *ctrl, return status; } -static int nvme_identify_ns(struct nvme_ctrl *ctrl, unsigned nsid, +int nvme_identify_ns(struct nvme_ctrl *ctrl, unsigned nsid, struct nvme_id_ns **id) { struct nvme_command c = { }; @@ -2040,6 +2041,7 @@ static int nvme_update_ns_info_block(struct nvme_ns *ns, blk_mq_freeze_queue(ns->disk->queue); lbaf = nvme_lbaf_index(id->flbas); ns->head->lba_shift = id->lbaf[lbaf].ds; + ns->head->nuse = le64_to_cpu(id->nuse); nvme_set_queue_limits(ns->ctrl, ns->queue); ret = nvme_configure_metadata(ns->ctrl, ns->head, id); @@ -3420,6 +3422,8 @@ static struct nvme_ns_head *nvme_alloc_ns_head(struct nvme_ctrl *ctrl, head->ns_id = info->nsid; head->ids = info->ids; head->shared = info->is_shared; + ratelimit_state_init(&head->rs_nuse, 5 * HZ, 1); + ratelimit_set_flags(&head->rs_nuse, RATELIMIT_MSG_ON_RELEASE); kref_init(&head->ref); if (head->ids.csi) { diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index 919115916449..6211f18c53c7 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -16,6 +16,7 @@ #include #include #include +#include #include @@ -451,6 +452,7 @@ struct nvme_ns_head { u16 pi_size; u16 sgs; u32 sws; + u64 nuse; u8 pi_type; u8 guard_type; #ifdef CONFIG_BLK_DEV_ZONED @@ -458,6 +460,8 @@ struct nvme_ns_head { #endif unsigned long features; + struct ratelimit_state rs_nuse; + struct cdev cdev; struct device cdev_device; @@ -862,6 +866,8 @@ int nvme_ns_chr_uring_cmd(struct io_uring_cmd *ioucmd, unsigned int issue_flags); int nvme_ns_head_chr_uring_cmd(struct io_uring_cmd *ioucmd, unsigned int issue_flags); +int nvme_identify_ns(struct nvme_ctrl *ctrl, unsigned nsid, + struct nvme_id_ns **id); int nvme_getgeo(struct block_device *bdev, struct hd_geometry *geo); int nvme_dev_uring_cmd(struct io_uring_cmd *ioucmd, unsigned int issue_flags); diff --git a/drivers/nvme/host/sysfs.c b/drivers/nvme/host/sysfs.c index d682d0a667a0..ac24ad102380 100644 --- a/drivers/nvme/host/sysfs.c +++ b/drivers/nvme/host/sysfs.c @@ -114,12 +114,97 @@ static ssize_t nsid_show(struct device *dev, struct device_attribute *attr, } static DEVICE_ATTR_RO(nsid); +static ssize_t csi_show(struct device *dev, struct device_attribute *attr, + char *buf) +{ + return sysfs_emit(buf, "%u\n", dev_to_ns_head(dev)->ids.csi); +} +static DEVICE_ATTR_RO(csi); + +static ssize_t metadata_bytes_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + return sysfs_emit(buf, "%u\n", dev_to_ns_head(dev)->ms); +} +static DEVICE_ATTR_RO(metadata_bytes); + +static int ns_head_update_nuse(struct nvme_ns_head *head) +{ + struct nvme_id_ns *id; + struct nvme_ns *ns; + int srcu_idx, ret = -EWOULDBLOCK; + + /* Avoid issuing commands too often by rate limiting the update */ + if (!__ratelimit(&head->rs_nuse)) + return 0; + + srcu_idx = srcu_read_lock(&head->srcu); + ns = nvme_find_path(head); + if (!ns) + goto out_unlock; + + ret = nvme_identify_ns(ns->ctrl, head->ns_id, &id); + if (ret) + goto out_unlock; + + head->nuse = le64_to_cpu(id->nuse); + kfree(id); + +out_unlock: + srcu_read_unlock(&head->srcu, srcu_idx); + return ret; +} + +static int ns_update_nuse(struct nvme_ns *ns) +{ + struct nvme_id_ns *id; + int ret; + + /* Avoid issuing commands too often by rate limiting the update. */ + if (!__ratelimit(&ns->head->rs_nuse)) + return 0; + + ret = nvme_identify_ns(ns->ctrl, ns->head->ns_id, &id); + if (ret) + goto out_free_id; + + ns->head->nuse = le64_to_cpu(id->nuse); + +out_free_id: + kfree(id); + + return ret; +} + +static ssize_t nuse_show(struct device *dev, struct device_attribute *attr, + char *buf) +{ + struct nvme_ns_head *head = dev_to_ns_head(dev); + struct gendisk *disk = dev_to_disk(dev); + struct block_device *bdev = disk->part0; + int ret; + + if (IS_ENABLED(CONFIG_NVME_MULTIPATH) && + bdev->bd_disk->fops == &nvme_ns_head_ops) + ret = ns_head_update_nuse(head); + else + ret = ns_update_nuse(bdev->bd_disk->private_data); + if (ret) + return ret; + + return sysfs_emit(buf, "%llu\n", head->nuse); +} +static DEVICE_ATTR_RO(nuse); + static struct attribute *nvme_ns_attrs[] = { &dev_attr_wwid.attr, &dev_attr_uuid.attr, &dev_attr_nguid.attr, &dev_attr_eui.attr, + &dev_attr_csi.attr, &dev_attr_nsid.attr, + &dev_attr_metadata_bytes.attr, + &dev_attr_nuse.attr, #ifdef CONFIG_NVME_MULTIPATH &dev_attr_ana_grpid.attr, &dev_attr_ana_state.attr, From 963929615194d163a25e3b536cd990a05d7e9439 Mon Sep 17 00:00:00 2001 From: Daniel Wagner Date: Mon, 18 Dec 2023 17:59:54 +0100 Subject: [PATCH 10/13] nvme: repack struct nvme_ns_head ns_id, lba_shift and ms are always accessed for every read/write I/O in nvme_setup_rw. By grouping these variables into one cacheline we can safe some cycles. 4k sequential reads: baseline patched Bandwidth: 1620 1634 IOPs 66345579 66910939 Reviewed-by: Christoph Hellwig Reviewed-by: Sagi Grimberg Reviewed-by: Hannes Reinecke Signed-off-by: Daniel Wagner Signed-off-by: Keith Busch --- drivers/nvme/host/nvme.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index 6211f18c53c7..3dbd187896d8 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -440,21 +440,21 @@ struct nvme_ns_head { struct list_head list; struct srcu_struct srcu; struct nvme_subsystem *subsys; - unsigned ns_id; struct nvme_ns_ids ids; struct list_head entry; struct kref ref; bool shared; int instance; struct nvme_effects_log *effects; + u64 nuse; + unsigned ns_id; int lba_shift; u16 ms; u16 pi_size; - u16 sgs; - u32 sws; - u64 nuse; u8 pi_type; u8 guard_type; + u16 sgs; + u32 sws; #ifdef CONFIG_BLK_DEV_ZONED u64 zsze; #endif From 536ecccbaf1fe6319c3af635596748c19208e627 Mon Sep 17 00:00:00 2001 From: Evan Burgess Date: Mon, 18 Dec 2023 19:03:32 +0000 Subject: [PATCH 11/13] nvmet: configfs: use ctrl->instance to track passthru subsystems To prevent enabling more than one passthrough subsystem per NVMe controller, passthru.c maintains an xarray indexed by cntlid values. Passthrough for a given nvmet subsystem cannot be enabled by configfs if the subsystem's passthru_ctrl->cntlid value is already accounted for in the xarray. However, according to the NVMe spec (rev 2.0c, p.145), "The Controller ID (CNTLID) value returned in the Identify Controller data structure may be used to uniquely identify a controller within an NVM subsystem," meaning that cntlid values are not guaranteed to be globally unique across multiple subsystems. Instead, the cntlid only uniquely identifies multiple controllers _within_ a subsystem. As a result, multiple unique & valid NVMe targets can be blocked from enabling passthrough at the same time if their controllers share cntlid values, a behavior allowed by the spec. Fix this by indexing the xarray with passthru_ctrl->instance values, which are allocated per controller by IDA and thus should be truly unique. Reviewed-by: Sagi Grimberg Reviewed-by: Christoph Hellwig Signed-off-by: Evan Burgess Signed-off-by: Keith Busch --- drivers/nvme/target/passthru.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/nvme/target/passthru.c b/drivers/nvme/target/passthru.c index 9fe07d7efa96..f2d963e1fe94 100644 --- a/drivers/nvme/target/passthru.c +++ b/drivers/nvme/target/passthru.c @@ -602,7 +602,7 @@ int nvmet_passthru_ctrl_enable(struct nvmet_subsys *subsys) goto out_put_file; } - old = xa_cmpxchg(&passthru_subsystems, ctrl->cntlid, NULL, + old = xa_cmpxchg(&passthru_subsystems, ctrl->instance, NULL, subsys, GFP_KERNEL); if (xa_is_err(old)) { ret = xa_err(old); @@ -635,7 +635,7 @@ int nvmet_passthru_ctrl_enable(struct nvmet_subsys *subsys) static void __nvmet_passthru_ctrl_disable(struct nvmet_subsys *subsys) { if (subsys->passthru_ctrl) { - xa_erase(&passthru_subsystems, subsys->passthru_ctrl->cntlid); + xa_erase(&passthru_subsystems, subsys->passthru_ctrl->instance); module_put(subsys->passthru_ctrl->ops->module); nvme_put_ctrl(subsys->passthru_ctrl); } From 7642138e17529b48b43c69faf5c6f45bb2b64234 Mon Sep 17 00:00:00 2001 From: Max Gurtovoy Date: Wed, 20 Dec 2023 11:27:45 +0200 Subject: [PATCH 12/13] nvme-fabrics: don't check discovery ioccsz/iorcsz IOCCSZ and IORCSZ are reserved for discovery controllers. Avoid checking their values during identify controller phase. Fixes: 2fcd3ab39826 ("nvme-fabrics: check ioccsz and iorcsz") Reported-by: Daniel Wagner Tested-by: Daniel Wagner Signed-off-by: Max Gurtovoy Signed-off-by: Keith Busch --- drivers/nvme/host/core.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 22dae2a26ba4..d144d1acb09a 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -3032,14 +3032,14 @@ static int nvme_check_ctrl_fabric_info(struct nvme_ctrl *ctrl, struct nvme_id_ct return -EINVAL; } - if (ctrl->ioccsz < 4) { + if (!nvme_discovery_ctrl(ctrl) && ctrl->ioccsz < 4) { dev_err(ctrl->device, "I/O queue command capsule supported size %d < 4\n", ctrl->ioccsz); return -EINVAL; } - if (ctrl->iorcsz < 1) { + if (!nvme_discovery_ctrl(ctrl) && ctrl->iorcsz < 1) { dev_err(ctrl->device, "I/O queue response capsule supported size %d < 1\n", ctrl->iorcsz); From 5d51dc8db10190661232741c93a141bdfc05f5ee Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Mon, 18 Dec 2023 15:22:24 -0800 Subject: [PATCH 13/13] nvme-fc: set numa_node after nvme_init_ctrl nvme_init_ctrl() resets numa_node to NUMA_NO_NODE, so be sure to set the desired value after that function call so it won't be overwritten. Reviewed-by: Sagi Grimberg Reviewed-by: Jens Axboe Reviewed-by: Christoph Hellwig Reviewed-by: Max Gurtovoy Signed-off-by: Keith Busch --- drivers/nvme/host/fc.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c index 9f9a3b35dc64..05c3159d42e9 100644 --- a/drivers/nvme/host/fc.c +++ b/drivers/nvme/host/fc.c @@ -3509,10 +3509,6 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts, ctrl->ctrl.opts = opts; ctrl->ctrl.nr_reconnects = 0; - if (lport->dev) - ctrl->ctrl.numa_node = dev_to_node(lport->dev); - else - ctrl->ctrl.numa_node = NUMA_NO_NODE; INIT_LIST_HEAD(&ctrl->ctrl_list); ctrl->lport = lport; ctrl->rport = rport; @@ -3557,6 +3553,8 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts, ret = nvme_init_ctrl(&ctrl->ctrl, dev, &nvme_fc_ctrl_ops, 0); if (ret) goto out_free_queues; + if (lport->dev) + ctrl->ctrl.numa_node = dev_to_node(lport->dev); /* at this point, teardown path changes to ref counting on nvme ctrl */