From dd2c18548964ae7ad48d208a765d909cd35448a1 Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Wed, 11 Jun 2025 06:50:47 +0200 Subject: [PATCH 1/3] nvme: reset delayed remove_work after reconnect The remove_work will proceed with permanently disconnecting on the initial final path failure if the head shows no paths after the delay. If a new path connects while the remove_work is pending, and if that new path happens to disconnect before that remove_work executes, the delayed removal should reset based on the most recent path disconnect time, but queue_delayed_work() won't do anything if the work is already pending. Attempt to cancel the delayed work when a new path connects, and use mod_delayed_work() in case the remove_work remains pending anyway. Signed-off-by: Keith Busch Reviewed-by: Nilay Shroff Signed-off-by: Christoph Hellwig --- drivers/nvme/host/core.c | 4 ++++ drivers/nvme/host/multipath.c | 2 +- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 92697f98c601..724f5732786c 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -4036,6 +4036,10 @@ static int nvme_init_ns_head(struct nvme_ns *ns, struct nvme_ns_info *info) list_add_tail_rcu(&ns->siblings, &head->list); ns->head = head; mutex_unlock(&ctrl->subsys->lock); + +#ifdef CONFIG_NVME_MULTIPATH + cancel_delayed_work(&head->remove_work); +#endif return 0; out_put_ns_head: diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c index 140079ff86e6..1062467595f3 100644 --- a/drivers/nvme/host/multipath.c +++ b/drivers/nvme/host/multipath.c @@ -1311,7 +1311,7 @@ void nvme_mpath_remove_disk(struct nvme_ns_head *head) */ if (!try_module_get(THIS_MODULE)) goto out; - queue_delayed_work(nvme_wq, &head->remove_work, + mod_delayed_work(nvme_wq, &head->remove_work, head->delayed_removal_secs * HZ); } else { list_del_init(&head->entry); From b2e607fecac15e07f50269c080e2e71b5049dfa2 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 11 Jun 2025 07:09:21 +0200 Subject: [PATCH 2/3] nvme: refactor the atomic write unit detection Move all the code out of nvme_update_disk_info into the helper, and rename the helper to have a somewhat less clumsy name. Signed-off-by: Christoph Hellwig Reviewed-by: Luis Chamberlain Reviewed-by: John Garry --- drivers/nvme/host/core.c | 72 +++++++++++++++++++++------------------- 1 file changed, 38 insertions(+), 34 deletions(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 724f5732786c..520fb5f1e214 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -2015,21 +2015,51 @@ static void nvme_configure_metadata(struct nvme_ctrl *ctrl, } -static void nvme_update_atomic_write_disk_info(struct nvme_ns *ns, - struct nvme_id_ns *id, struct queue_limits *lim, - u32 bs, u32 atomic_bs) +static u32 nvme_configure_atomic_write(struct nvme_ns *ns, + struct nvme_id_ns *id, struct queue_limits *lim, u32 bs) { - unsigned int boundary = 0; + u32 atomic_bs, boundary = 0; - if (id->nsfeat & NVME_NS_FEAT_ATOMICS && id->nawupf) { - if (le16_to_cpu(id->nabspf)) + /* + * We do not support an offset for the atomic boundaries. + */ + if (id->nabo) + return bs; + + if ((id->nsfeat & NVME_NS_FEAT_ATOMICS) && id->nawupf) { + /* + * Use the per-namespace atomic write unit when available. + */ + atomic_bs = (1 + le16_to_cpu(id->nawupf)) * bs; + if (id->nabspf) boundary = (le16_to_cpu(id->nabspf) + 1) * bs; + } else { + /* + * Use the controller wide atomic write unit. This sucks + * because the limit is defined in terms of logical blocks while + * namespaces can have different formats, and because there is + * no clear language in the specification prohibiting different + * values for different controllers in the subsystem. + */ + atomic_bs = (1 + ns->ctrl->awupf) * bs; } + + if (!ns->ctrl->subsys->atomic_bs) { + ns->ctrl->subsys->atomic_bs = atomic_bs; + } else if (ns->ctrl->subsys->atomic_bs != atomic_bs) { + dev_err_ratelimited(ns->ctrl->device, + "%s: Inconsistent Atomic Write Size, Namespace will not be added: Subsystem=%d bytes, Controller/Namespace=%d bytes\n", + ns->disk ? ns->disk->disk_name : "?", + ns->ctrl->subsys->atomic_bs, + atomic_bs); + } + lim->atomic_write_hw_max = atomic_bs; lim->atomic_write_hw_boundary = boundary; lim->atomic_write_hw_unit_min = bs; lim->atomic_write_hw_unit_max = rounddown_pow_of_two(atomic_bs); lim->features |= BLK_FEAT_ATOMIC_WRITES; + return atomic_bs; } static u32 nvme_max_drv_segments(struct nvme_ctrl *ctrl) @@ -2067,34 +2097,8 @@ static bool nvme_update_disk_info(struct nvme_ns *ns, struct nvme_id_ns *id, valid = false; } - atomic_bs = phys_bs = bs; - if (id->nabo == 0) { - /* - * Bit 1 indicates whether NAWUPF is defined for this namespace - * and whether it should be used instead of AWUPF. If NAWUPF == - * 0 then AWUPF must be used instead. - */ - if (id->nsfeat & NVME_NS_FEAT_ATOMICS && id->nawupf) - atomic_bs = (1 + le16_to_cpu(id->nawupf)) * bs; - else - atomic_bs = (1 + ns->ctrl->awupf) * bs; - - /* - * Set subsystem atomic bs. - */ - if (ns->ctrl->subsys->atomic_bs) { - if (atomic_bs != ns->ctrl->subsys->atomic_bs) { - dev_err_ratelimited(ns->ctrl->device, - "%s: Inconsistent Atomic Write Size, Namespace will not be added: Subsystem=%d bytes, Controller/Namespace=%d bytes\n", - ns->disk ? ns->disk->disk_name : "?", - ns->ctrl->subsys->atomic_bs, - atomic_bs); - } - } else - ns->ctrl->subsys->atomic_bs = atomic_bs; - - nvme_update_atomic_write_disk_info(ns, id, lim, bs, atomic_bs); - } + phys_bs = bs; + atomic_bs = nvme_configure_atomic_write(ns, id, lim, bs); if (id->nsfeat & NVME_NS_FEAT_IO_OPT) { /* NPWG = Namespace Preferred Write Granularity */ From f46d273449ba65afd53f3dd8fe0182c9df877e08 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 11 Jun 2025 06:54:56 +0200 Subject: [PATCH 3/3] nvme: fix atomic write size validation Don't mix the namespace and controller values, and validate the per-controller limit when probing the controller. This avoid spurious failures for controllers with namespaces that have different namespaces with different logical block sizes, or report the per-namespace values only for some namespaces. It also fixes a missing queue_limits_cancel_update in an error path by removing that error path. Fixes: 8695f060a029 ("nvme: all namespaces in a subsystem must adhere to a common atomic write size") Reported-by: Yi Zhang Signed-off-by: Christoph Hellwig Reviewed-by: Luis Chamberlain Reviewed-by: John Garry Tested-by: Yi Zhang --- drivers/nvme/host/core.c | 33 +++++++++++---------------------- drivers/nvme/host/nvme.h | 3 +-- 2 files changed, 12 insertions(+), 24 deletions(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 520fb5f1e214..e533d791955d 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -2041,17 +2041,7 @@ static u32 nvme_configure_atomic_write(struct nvme_ns *ns, * no clear language in the specification prohibiting different * values for different controllers in the subsystem. */ - atomic_bs = (1 + ns->ctrl->awupf) * bs; - } - - if (!ns->ctrl->subsys->atomic_bs) { - ns->ctrl->subsys->atomic_bs = atomic_bs; - } else if (ns->ctrl->subsys->atomic_bs != atomic_bs) { - dev_err_ratelimited(ns->ctrl->device, - "%s: Inconsistent Atomic Write Size, Namespace will not be added: Subsystem=%d bytes, Controller/Namespace=%d bytes\n", - ns->disk ? ns->disk->disk_name : "?", - ns->ctrl->subsys->atomic_bs, - atomic_bs); + atomic_bs = (1 + ns->ctrl->subsys->awupf) * bs; } lim->atomic_write_hw_max = atomic_bs; @@ -2386,16 +2376,6 @@ static int nvme_update_ns_info_block(struct nvme_ns *ns, if (!nvme_update_disk_info(ns, id, &lim)) capacity = 0; - /* - * Validate the max atomic write size fits within the subsystem's - * atomic write capabilities. - */ - if (lim.atomic_write_hw_max > ns->ctrl->subsys->atomic_bs) { - blk_mq_unfreeze_queue(ns->disk->queue, memflags); - ret = -ENXIO; - goto out; - } - nvme_config_discard(ns, &lim); if (IS_ENABLED(CONFIG_BLK_DEV_ZONED) && ns->head->ids.csi == NVME_CSI_ZNS) @@ -3219,6 +3199,7 @@ static int nvme_init_subsystem(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id) memcpy(subsys->model, id->mn, sizeof(subsys->model)); subsys->vendor_id = le16_to_cpu(id->vid); subsys->cmic = id->cmic; + subsys->awupf = le16_to_cpu(id->awupf); /* Versions prior to 1.4 don't necessarily report a valid type */ if (id->cntrltype == NVME_CTRL_DISC || @@ -3556,6 +3537,15 @@ static int nvme_init_identify(struct nvme_ctrl *ctrl) if (ret) goto out_free; } + + if (le16_to_cpu(id->awupf) != ctrl->subsys->awupf) { + dev_err_ratelimited(ctrl->device, + "inconsistent AWUPF, controller not added (%u/%u).\n", + le16_to_cpu(id->awupf), ctrl->subsys->awupf); + ret = -EINVAL; + goto out_free; + } + memcpy(ctrl->subsys->firmware_rev, id->fr, sizeof(ctrl->subsys->firmware_rev)); @@ -3651,7 +3641,6 @@ static int nvme_init_identify(struct nvme_ctrl *ctrl) dev_pm_qos_expose_latency_tolerance(ctrl->device); else if (!ctrl->apst_enabled && prev_apst_enabled) dev_pm_qos_hide_latency_tolerance(ctrl->device); - ctrl->awupf = le16_to_cpu(id->awupf); out_free: kfree(id); return ret; diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index a468cdc5b5cb..7df2ea21851f 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -410,7 +410,6 @@ struct nvme_ctrl { enum nvme_ctrl_type cntrltype; enum nvme_dctype dctype; - u16 awupf; /* 0's based value. */ }; static inline enum nvme_ctrl_state nvme_ctrl_state(struct nvme_ctrl *ctrl) @@ -443,11 +442,11 @@ struct nvme_subsystem { u8 cmic; enum nvme_subsys_type subtype; u16 vendor_id; + u16 awupf; /* 0's based value. */ struct ida ns_ida; #ifdef CONFIG_NVME_MULTIPATH enum nvme_iopolicy iopolicy; #endif - u32 atomic_bs; }; /*