From b2702aaaa45c2d1028bc8c51b49106041be7aae5 Mon Sep 17 00:00:00 2001 From: Chaitanya Kulkarni Date: Wed, 16 Sep 2020 18:11:02 -0700 Subject: [PATCH 1/9] nvme: lift the file open code from nvme_ctrl_get_by_path Lift opening the file open/close code from nvme_ctrl_get_by_path into the caller, just keeping a simple nvme_ctrl_from_file() helper. Signed-off-by: Chaitanya Kulkarni [hch: refactored a bit, split the bug fixes into a separate prep patch] Signed-off-by: Christoph Hellwig Reviewed-by: Logan Gunthorpe --- drivers/nvme/host/core.c | 25 +++++-------------------- drivers/nvme/host/nvme.h | 2 +- drivers/nvme/target/passthru.c | 27 ++++++++++++++++----------- 3 files changed, 22 insertions(+), 32 deletions(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 64f4bf85c3d1..35cd177bda08 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -4668,28 +4668,13 @@ void nvme_sync_queues(struct nvme_ctrl *ctrl) } EXPORT_SYMBOL_GPL(nvme_sync_queues); -struct nvme_ctrl *nvme_ctrl_get_by_path(const char *path) +struct nvme_ctrl *nvme_ctrl_from_file(struct file *file) { - struct nvme_ctrl *ctrl; - struct file *f; - - f = filp_open(path, O_RDWR, 0); - if (IS_ERR(f)) - return ERR_CAST(f); - - if (f->f_op != &nvme_dev_fops) { - ctrl = ERR_PTR(-EINVAL); - goto out_close; - } - - ctrl = f->private_data; - nvme_get_ctrl(ctrl); - -out_close: - filp_close(f, NULL); - return ctrl; + if (file->f_op != &nvme_dev_fops) + return NULL; + return file->private_data; } -EXPORT_SYMBOL_NS_GPL(nvme_ctrl_get_by_path, NVME_TARGET_PASSTHRU); +EXPORT_SYMBOL_NS_GPL(nvme_ctrl_from_file, NVME_TARGET_PASSTHRU); /* * Check we didn't inadvertently grow the command structure sizes: diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index 398394bcbee4..4749baa457bc 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -822,7 +822,7 @@ static inline void nvme_hwmon_init(struct nvme_ctrl *ctrl) { } u32 nvme_command_effects(struct nvme_ctrl *ctrl, struct nvme_ns *ns, u8 opcode); void nvme_execute_passthru_rq(struct request *rq); -struct nvme_ctrl *nvme_ctrl_get_by_path(const char *path); +struct nvme_ctrl *nvme_ctrl_from_file(struct file *file); struct nvme_ns *nvme_find_get_ns(struct nvme_ctrl *ctrl, unsigned nsid); void nvme_put_ns(struct nvme_ns *ns); diff --git a/drivers/nvme/target/passthru.c b/drivers/nvme/target/passthru.c index dacfa7435d0b..e3a5f8499a0b 100644 --- a/drivers/nvme/target/passthru.c +++ b/drivers/nvme/target/passthru.c @@ -474,6 +474,7 @@ u16 nvmet_parse_passthru_admin_cmd(struct nvmet_req *req) int nvmet_passthru_ctrl_enable(struct nvmet_subsys *subsys) { struct nvme_ctrl *ctrl; + struct file *file; int ret = -EINVAL; void *old; @@ -488,24 +489,29 @@ int nvmet_passthru_ctrl_enable(struct nvmet_subsys *subsys) goto out_unlock; } - ctrl = nvme_ctrl_get_by_path(subsys->passthru_ctrl_path); - if (IS_ERR(ctrl)) { - ret = PTR_ERR(ctrl); + file = filp_open(subsys->passthru_ctrl_path, O_RDWR, 0); + if (IS_ERR(file)) { + ret = PTR_ERR(file); + goto out_unlock; + } + + ctrl = nvme_ctrl_from_file(file); + if (!ctrl) { pr_err("failed to open nvme controller %s\n", subsys->passthru_ctrl_path); - goto out_unlock; + goto out_put_file; } old = xa_cmpxchg(&passthru_subsystems, ctrl->cntlid, NULL, subsys, GFP_KERNEL); if (xa_is_err(old)) { ret = xa_err(old); - goto out_put_ctrl; + goto out_put_file; } if (old) - goto out_put_ctrl; + goto out_put_file; subsys->passthru_ctrl = ctrl; subsys->ver = ctrl->vs; @@ -516,13 +522,12 @@ int nvmet_passthru_ctrl_enable(struct nvmet_subsys *subsys) NVME_TERTIARY(subsys->ver)); subsys->ver = NVME_VS(1, 2, 1); } - + nvme_get_ctrl(ctrl); __module_get(subsys->passthru_ctrl->ops->module); - mutex_unlock(&subsys->lock); - return 0; + ret = 0; -out_put_ctrl: - nvme_put_ctrl(ctrl); +out_put_file: + filp_close(file, NULL); out_unlock: mutex_unlock(&subsys->lock); return ret; From 1cf7a12e09aa48512404bf3f0f429b73182ce793 Mon Sep 17 00:00:00 2001 From: Chaitanya Kulkarni Date: Tue, 22 Sep 2020 14:05:29 -0700 Subject: [PATCH 2/9] nvme: use an xarray to lookup the Commands Supported and Effects log When using linked list we have to open code the locking, search, and destroy operations with the loops even if data structure doesn't fall into the fast path. One of the main advantage of having XArray to store, search, and remove items is that it handles all the locking by itself, avoids the loops when using linked lists, provides clear API to replace the linked list's search and destroy loops. This patch replaces the ctrl->cel list with XArray and removes :- a. Extra code needed for the linked list for ctrl->cel item management such as nvme_find_cel(). b. Destroy loop in the nvme_free_ctrl(). c. Explicit insertion locking in the nvme_get_effects_log(). Signed-off-by: Chaitanya Kulkarni Reviewed-by: Keith Busch Signed-off-by: Christoph Hellwig --- drivers/nvme/host/core.c | 31 ++++--------------------------- drivers/nvme/host/nvme.h | 2 +- 2 files changed, 5 insertions(+), 28 deletions(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 35cd177bda08..0a985c601c62 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -3025,26 +3025,10 @@ int nvme_get_log(struct nvme_ctrl *ctrl, u32 nsid, u8 log_page, u8 lsp, u8 csi, return nvme_submit_sync_cmd(ctrl->admin_q, &c, log, size); } -static struct nvme_cel *nvme_find_cel(struct nvme_ctrl *ctrl, u8 csi) -{ - struct nvme_cel *cel, *ret = NULL; - - spin_lock_irq(&ctrl->lock); - list_for_each_entry(cel, &ctrl->cels, entry) { - if (cel->csi == csi) { - ret = cel; - break; - } - } - spin_unlock_irq(&ctrl->lock); - - return ret; -} - static int nvme_get_effects_log(struct nvme_ctrl *ctrl, u8 csi, struct nvme_effects_log **log) { - struct nvme_cel *cel = nvme_find_cel(ctrl, csi); + struct nvme_cel *cel = xa_load(&ctrl->cels, csi); int ret; if (cel) @@ -3062,10 +3046,7 @@ static int nvme_get_effects_log(struct nvme_ctrl *ctrl, u8 csi, } cel->csi = csi; - - spin_lock_irq(&ctrl->lock); - list_add_tail(&cel->entry, &ctrl->cels); - spin_unlock_irq(&ctrl->lock); + xa_store(&ctrl->cels, cel->csi, cel, GFP_KERNEL); out: *log = &cel->log; return 0; @@ -4448,15 +4429,11 @@ static void nvme_free_ctrl(struct device *dev) struct nvme_ctrl *ctrl = container_of(dev, struct nvme_ctrl, ctrl_device); struct nvme_subsystem *subsys = ctrl->subsys; - struct nvme_cel *cel, *next; if (!subsys || ctrl->instance != subsys->instance) ida_simple_remove(&nvme_instance_ida, ctrl->instance); - list_for_each_entry_safe(cel, next, &ctrl->cels, entry) { - list_del(&cel->entry); - kfree(cel); - } + xa_destroy(&ctrl->cels); nvme_mpath_uninit(ctrl); __free_page(ctrl->discard_page); @@ -4488,7 +4465,7 @@ int nvme_init_ctrl(struct nvme_ctrl *ctrl, struct device *dev, spin_lock_init(&ctrl->lock); mutex_init(&ctrl->scan_lock); INIT_LIST_HEAD(&ctrl->namespaces); - INIT_LIST_HEAD(&ctrl->cels); + xa_init(&ctrl->cels); init_rwsem(&ctrl->namespaces_rwsem); ctrl->dev = dev; ctrl->ops = ops; diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index 4749baa457bc..1096ef1f6aa2 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -300,7 +300,7 @@ struct nvme_ctrl { unsigned long quirks; struct nvme_id_power_state psd[32]; struct nvme_effects_log *effects; - struct list_head cels; + struct xarray cels; struct work_struct scan_work; struct work_struct async_event_work; struct delayed_work ka_work; From 0b85f59d30b91bd2b93ea7ef0816a4b7e7039e8c Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Tue, 18 Aug 2020 11:35:30 +0300 Subject: [PATCH 3/9] nvme-pci: Move enumeration by class to be last in the table It's unusual that we have enumeration by class in the middle of the table. It might potentially be problematic in the future if we add another entry after it. So, move class matching entry to be the last in the ID table. Signed-off-by: Andy Shevchenko Reviewed-by: Keith Busch Reviewed-by: Sagi Grimberg Reviewed-by: Chaitanya Kulkarni Signed-off-by: Christoph Hellwig --- drivers/nvme/host/pci.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 08c5a9b438f3..dabe42f605b1 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -3187,7 +3187,6 @@ static const struct pci_device_id nvme_id_table[] = { NVME_QUIRK_IGNORE_DEV_SUBNQN, }, { PCI_DEVICE(0x1c5c, 0x1504), /* SK Hynix PC400 */ .driver_data = NVME_QUIRK_DISABLE_WRITE_ZEROES, }, - { PCI_DEVICE_CLASS(PCI_CLASS_STORAGE_EXPRESS, 0xffffff) }, { PCI_DEVICE(PCI_VENDOR_ID_APPLE, 0x2001), .driver_data = NVME_QUIRK_SINGLE_VECTOR }, { PCI_DEVICE(PCI_VENDOR_ID_APPLE, 0x2003) }, @@ -3195,6 +3194,8 @@ static const struct pci_device_id nvme_id_table[] = { .driver_data = NVME_QUIRK_SINGLE_VECTOR | NVME_QUIRK_128_BYTES_SQES | NVME_QUIRK_SHARED_TAGS }, + + { PCI_DEVICE_CLASS(PCI_CLASS_STORAGE_EXPRESS, 0xffffff) }, { 0, } }; MODULE_DEVICE_TABLE(pci, nvme_id_table); From f7790e5de92a3b88e74704a6cb2276c09c32f511 Mon Sep 17 00:00:00 2001 From: Mark Wunderlich Date: Fri, 28 Aug 2020 01:00:53 +0000 Subject: [PATCH 4/9] nvmet-tcp: have queue io_work context run on sock incoming cpu No real good need to spread queues artificially. Usually the target will serve multiple hosts, and it's better to run on the socket incoming cpu for better affinitization rather than spread queues on all online cpus. We rely on RSS to spread the work around sufficiently. Signed-off-by: Mark Wunderlich Reviewed-by: Sagi Grimberg Signed-off-by: Christoph Hellwig --- drivers/nvme/target/tcp.c | 21 ++++++++++----------- 1 file changed, 10 insertions(+), 11 deletions(-) diff --git a/drivers/nvme/target/tcp.c b/drivers/nvme/target/tcp.c index 8e0d766d2722..dc1f0f647189 100644 --- a/drivers/nvme/target/tcp.c +++ b/drivers/nvme/target/tcp.c @@ -94,7 +94,6 @@ struct nvmet_tcp_queue { struct socket *sock; struct nvmet_tcp_port *port; struct work_struct io_work; - int cpu; struct nvmet_cq nvme_cq; struct nvmet_sq nvme_sq; @@ -144,7 +143,6 @@ struct nvmet_tcp_port { struct work_struct accept_work; struct nvmet_port *nport; struct sockaddr_storage addr; - int last_cpu; void (*data_ready)(struct sock *); }; @@ -219,6 +217,11 @@ static inline void nvmet_tcp_put_cmd(struct nvmet_tcp_cmd *cmd) list_add_tail(&cmd->entry, &cmd->queue->free_list); } +static inline int queue_cpu(struct nvmet_tcp_queue *queue) +{ + return queue->sock->sk->sk_incoming_cpu; +} + static inline u8 nvmet_tcp_hdgst_len(struct nvmet_tcp_queue *queue) { return queue->hdr_digest ? NVME_TCP_DIGEST_LENGTH : 0; @@ -506,7 +509,7 @@ static void nvmet_tcp_queue_response(struct nvmet_req *req) struct nvmet_tcp_queue *queue = cmd->queue; llist_add(&cmd->lentry, &queue->resp_list); - queue_work_on(cmd->queue->cpu, nvmet_tcp_wq, &cmd->queue->io_work); + queue_work_on(queue_cpu(queue), nvmet_tcp_wq, &cmd->queue->io_work); } static int nvmet_try_send_data_pdu(struct nvmet_tcp_cmd *cmd) @@ -1223,7 +1226,7 @@ static void nvmet_tcp_io_work(struct work_struct *w) * We exahusted our budget, requeue our selves */ if (pending) - queue_work_on(queue->cpu, nvmet_tcp_wq, &queue->io_work); + queue_work_on(queue_cpu(queue), nvmet_tcp_wq, &queue->io_work); } static int nvmet_tcp_alloc_cmd(struct nvmet_tcp_queue *queue, @@ -1383,7 +1386,7 @@ static void nvmet_tcp_data_ready(struct sock *sk) read_lock_bh(&sk->sk_callback_lock); queue = sk->sk_user_data; if (likely(queue)) - queue_work_on(queue->cpu, nvmet_tcp_wq, &queue->io_work); + queue_work_on(queue_cpu(queue), nvmet_tcp_wq, &queue->io_work); read_unlock_bh(&sk->sk_callback_lock); } @@ -1403,7 +1406,7 @@ static void nvmet_tcp_write_space(struct sock *sk) if (sk_stream_is_writeable(sk)) { clear_bit(SOCK_NOSPACE, &sk->sk_socket->flags); - queue_work_on(queue->cpu, nvmet_tcp_wq, &queue->io_work); + queue_work_on(queue_cpu(queue), nvmet_tcp_wq, &queue->io_work); } out: read_unlock_bh(&sk->sk_callback_lock); @@ -1512,9 +1515,6 @@ static int nvmet_tcp_alloc_queue(struct nvmet_tcp_port *port, if (ret) goto out_free_connect; - port->last_cpu = cpumask_next_wrap(port->last_cpu, - cpu_online_mask, -1, false); - queue->cpu = port->last_cpu; nvmet_prepare_receive_pdu(queue); mutex_lock(&nvmet_tcp_queue_mutex); @@ -1525,7 +1525,7 @@ static int nvmet_tcp_alloc_queue(struct nvmet_tcp_port *port, if (ret) goto out_destroy_sq; - queue_work_on(queue->cpu, nvmet_tcp_wq, &queue->io_work); + queue_work_on(queue_cpu(queue), nvmet_tcp_wq, &queue->io_work); return 0; out_destroy_sq: @@ -1612,7 +1612,6 @@ static int nvmet_tcp_add_port(struct nvmet_port *nport) } port->nport = nport; - port->last_cpu = -1; INIT_WORK(&port->accept_work, nvmet_tcp_accept_work); if (port->nport->inline_data_size < 0) port->nport->inline_data_size = NVMET_TCP_DEF_INLINE_DATA_SIZE; From 4e683c48db018e72e81e1e0d12a5020d29be561a Mon Sep 17 00:00:00 2001 From: Amit Engel Date: Wed, 16 Sep 2020 20:47:20 +0300 Subject: [PATCH 5/9] nvmet: handle keep-alive timer when kato is modified by a set features cmd A user may modify the kato by a set features cmd. To properly deal with races or a kato value of 0 (no keep alive enabled) change nvmet_set_feat_kato to first disable the timer, then set the value and then re-enable the timer. Signed-off-by: Amit Engel Reviewed-by: Sagi Grimberg Signed-off-by: Christoph Hellwig --- drivers/nvme/target/admin-cmd.c | 2 ++ drivers/nvme/target/core.c | 4 ++-- drivers/nvme/target/nvmet.h | 2 ++ 3 files changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/nvme/target/admin-cmd.c b/drivers/nvme/target/admin-cmd.c index e9fe91786bbb..dca34489a1dc 100644 --- a/drivers/nvme/target/admin-cmd.c +++ b/drivers/nvme/target/admin-cmd.c @@ -727,7 +727,9 @@ u16 nvmet_set_feat_kato(struct nvmet_req *req) { u32 val32 = le32_to_cpu(req->cmd->common.cdw11); + nvmet_stop_keep_alive_timer(req->sq->ctrl); req->sq->ctrl->kato = DIV_ROUND_UP(val32, 1000); + nvmet_start_keep_alive_timer(req->sq->ctrl); nvmet_set_result(req, req->sq->ctrl->kato); diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c index b7b63330b5ef..25d62d867563 100644 --- a/drivers/nvme/target/core.c +++ b/drivers/nvme/target/core.c @@ -395,7 +395,7 @@ static void nvmet_keep_alive_timer(struct work_struct *work) nvmet_ctrl_fatal_error(ctrl); } -static void nvmet_start_keep_alive_timer(struct nvmet_ctrl *ctrl) +void nvmet_start_keep_alive_timer(struct nvmet_ctrl *ctrl) { if (unlikely(ctrl->kato == 0)) return; @@ -407,7 +407,7 @@ static void nvmet_start_keep_alive_timer(struct nvmet_ctrl *ctrl) schedule_delayed_work(&ctrl->ka_work, ctrl->kato * HZ); } -static void nvmet_stop_keep_alive_timer(struct nvmet_ctrl *ctrl) +void nvmet_stop_keep_alive_timer(struct nvmet_ctrl *ctrl) { if (unlikely(ctrl->kato == 0)) return; diff --git a/drivers/nvme/target/nvmet.h b/drivers/nvme/target/nvmet.h index 47ee3fb193bd..559a15ccc322 100644 --- a/drivers/nvme/target/nvmet.h +++ b/drivers/nvme/target/nvmet.h @@ -395,6 +395,8 @@ void nvmet_get_feat_async_event(struct nvmet_req *req); u16 nvmet_set_feat_kato(struct nvmet_req *req); u16 nvmet_set_feat_async_event(struct nvmet_req *req, u32 mask); void nvmet_execute_async_event(struct nvmet_req *req); +void nvmet_start_keep_alive_timer(struct nvmet_ctrl *ctrl); +void nvmet_stop_keep_alive_timer(struct nvmet_ctrl *ctrl); u16 nvmet_parse_connect_cmd(struct nvmet_req *req); void nvmet_bdev_set_limits(struct block_device *bdev, struct nvme_id_ns *id); From 5b3356d9da88fe6d6f3bcd8a2336d14a767f377d Mon Sep 17 00:00:00 2001 From: Chaitanya Kulkarni Date: Tue, 22 Sep 2020 14:17:18 -0700 Subject: [PATCH 6/9] nvmet: add passthru ZNS support In the default passthru implementation NVMeOF target passthru ctrl is not capable of handling Zoned Namespaces (ZNS). Update the nvmet_parse_pasthru_admin_cmd() to allow NVME_ID_CNS_CS_CTRL/NVME_CIS_ZNS and NVME_ID_CNS_CS_NS/NVME_CIS_ZNS. With this addition NVMeOF Passthru allows Zoned Namespaces. Signed-off-by: Chaitanya Kulkarni Signed-off-by: Christoph Hellwig --- drivers/nvme/target/passthru.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/drivers/nvme/target/passthru.c b/drivers/nvme/target/passthru.c index e3a5f8499a0b..56c571052216 100644 --- a/drivers/nvme/target/passthru.c +++ b/drivers/nvme/target/passthru.c @@ -456,10 +456,26 @@ u16 nvmet_parse_passthru_admin_cmd(struct nvmet_req *req) req->execute = nvmet_passthru_execute_cmd; req->p.use_workqueue = true; return NVME_SC_SUCCESS; + case NVME_ID_CNS_CS_CTRL: + switch (req->cmd->identify.csi) { + case NVME_CSI_ZNS: + req->execute = nvmet_passthru_execute_cmd; + req->p.use_workqueue = true; + return NVME_SC_SUCCESS; + } + return NVME_SC_INVALID_OPCODE | NVME_SC_DNR; case NVME_ID_CNS_NS: req->execute = nvmet_passthru_execute_cmd; req->p.use_workqueue = true; return NVME_SC_SUCCESS; + case NVME_ID_CNS_CS_NS: + switch (req->cmd->identify.csi) { + case NVME_CSI_ZNS: + req->execute = nvmet_passthru_execute_cmd; + req->p.use_workqueue = true; + return NVME_SC_SUCCESS; + } + return NVME_SC_INVALID_OPCODE | NVME_SC_DNR; default: return nvmet_setup_passthru_command(req); } From ddd3d1051797b9f907ab9799f5ba50398c530676 Mon Sep 17 00:00:00 2001 From: James Smart Date: Tue, 22 Sep 2020 14:54:12 -0700 Subject: [PATCH 7/9] nvmet-fc: fix missing check for no hostport struct A hostport port pointer is allowed to be NULL as it is not allocated if the lldd does not support the new interfaces for NVME LS request support. The hostport free routine validates the handle but forgot to validate the hostport pointer. Validate the hostport pointer before using it to validate the handle. Signed-off-by: James Smart Reviewed-by: Himanshu Madhani Signed-off-by: Christoph Hellwig --- drivers/nvme/target/fc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/nvme/target/fc.c b/drivers/nvme/target/fc.c index e6861cc10e7d..cd4e73aa9807 100644 --- a/drivers/nvme/target/fc.c +++ b/drivers/nvme/target/fc.c @@ -1019,7 +1019,7 @@ static void nvmet_fc_free_hostport(struct nvmet_fc_hostport *hostport) { /* if LLDD not implemented, leave as NULL */ - if (!hostport->hosthandle) + if (!hostport || !hostport->hosthandle) return; nvmet_fc_hostport_put(hostport); From 936fab503ff4af94f5f9c0b549f3ab4d435500ec Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sun, 30 Aug 2020 12:00:10 +0200 Subject: [PATCH 8/9] nvme: fix error handling in nvme_ns_report_zones nvme_submit_sync_cmd can return positive NVMe error codes in addition to the negative Linux error code, which are currently ignored. Fix this by removing __nvme_ns_report_zones and handling the errors from nvme_submit_sync_cmd in the caller instead of multiplexing the return value and the number of zones reported into a single return value. Fixes: 240e6ee272c0 ("nvme: support for zoned namespaces") Signed-off-by: Christoph Hellwig Reviewed-by: Damien Le Moal --- drivers/nvme/host/zns.c | 43 ++++++++++++++++------------------------- 1 file changed, 17 insertions(+), 26 deletions(-) diff --git a/drivers/nvme/host/zns.c b/drivers/nvme/host/zns.c index 57cfd78731fb..53efecb67898 100644 --- a/drivers/nvme/host/zns.c +++ b/drivers/nvme/host/zns.c @@ -133,28 +133,6 @@ static void *nvme_zns_alloc_report_buffer(struct nvme_ns *ns, return NULL; } -static int __nvme_ns_report_zones(struct nvme_ns *ns, sector_t sector, - struct nvme_zone_report *report, - size_t buflen) -{ - struct nvme_command c = { }; - int ret; - - c.zmr.opcode = nvme_cmd_zone_mgmt_recv; - c.zmr.nsid = cpu_to_le32(ns->head->ns_id); - c.zmr.slba = cpu_to_le64(nvme_sect_to_lba(ns, sector)); - c.zmr.numd = cpu_to_le32(nvme_bytes_to_numd(buflen)); - c.zmr.zra = NVME_ZRA_ZONE_REPORT; - c.zmr.zrasf = NVME_ZRASF_ZONE_REPORT_ALL; - c.zmr.pr = NVME_REPORT_ZONE_PARTIAL; - - ret = nvme_submit_sync_cmd(ns->queue, &c, report, buflen); - if (ret) - return ret; - - return le64_to_cpu(report->nr_zones); -} - static int nvme_zone_parse_entry(struct nvme_ns *ns, struct nvme_zone_descriptor *entry, unsigned int idx, report_zones_cb cb, @@ -182,6 +160,7 @@ static int nvme_ns_report_zones(struct nvme_ns *ns, sector_t sector, unsigned int nr_zones, report_zones_cb cb, void *data) { struct nvme_zone_report *report; + struct nvme_command c = { }; int ret, zone_idx = 0; unsigned int nz, i; size_t buflen; @@ -190,14 +169,26 @@ static int nvme_ns_report_zones(struct nvme_ns *ns, sector_t sector, if (!report) return -ENOMEM; + c.zmr.opcode = nvme_cmd_zone_mgmt_recv; + c.zmr.nsid = cpu_to_le32(ns->head->ns_id); + c.zmr.numd = cpu_to_le32(nvme_bytes_to_numd(buflen)); + c.zmr.zra = NVME_ZRA_ZONE_REPORT; + c.zmr.zrasf = NVME_ZRASF_ZONE_REPORT_ALL; + c.zmr.pr = NVME_REPORT_ZONE_PARTIAL; + sector &= ~(ns->zsze - 1); while (zone_idx < nr_zones && sector < get_capacity(ns->disk)) { memset(report, 0, buflen); - ret = __nvme_ns_report_zones(ns, sector, report, buflen); - if (ret < 0) - goto out_free; - nz = min_t(unsigned int, ret, nr_zones); + c.zmr.slba = cpu_to_le64(nvme_sect_to_lba(ns, sector)); + ret = nvme_submit_sync_cmd(ns->queue, &c, report, buflen); + if (ret) { + if (ret > 0) + ret = -EIO; + goto out_free; + } + + nz = min((unsigned int)le64_to_cpu(report->nr_zones), nr_zones); if (!nz) break; From 21cc2f3f799fa228f812f7ab971fa8d43c893392 Mon Sep 17 00:00:00 2001 From: Jeffle Xu Date: Thu, 24 Sep 2020 09:01:22 +0200 Subject: [PATCH 9/9] nvme-pci: allocate separate interrupt for the reserved non-polled I/O queue One queue will be reserved for non-polled IO when nvme.poll_queues is greater or equal than the number of IO queues that the nvme controller can provide. Currently the reserved queue for non-polled IO will reuse the interrupt used by admin queue in this case, e.g, vector 0. This can work and the performance may not be an issue since the admin queue is used unfrequently. However this behaviour may be inconsistent with that when nvme.poll_queues is smaller than the number of IO queues available. Thus allocate separate interrupt for this reserved queue, and thus make the behaviour consistent. Signed-off-by: Jeffle Xu [hch: minor cleanups, mostly to the pre-existing surrounding code] Signed-off-by: Christoph Hellwig --- drivers/nvme/host/pci.c | 32 +++++++++++++++----------------- 1 file changed, 15 insertions(+), 17 deletions(-) diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index dabe42f605b1..cb645ac0c2dc 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -2038,32 +2038,30 @@ static int nvme_setup_irqs(struct nvme_dev *dev, unsigned int nr_io_queues) .calc_sets = nvme_calc_irq_sets, .priv = dev, }; - unsigned int irq_queues, this_p_queues; + unsigned int irq_queues, poll_queues; /* - * Poll queues don't need interrupts, but we need at least one IO - * queue left over for non-polled IO. + * Poll queues don't need interrupts, but we need at least one I/O queue + * left over for non-polled I/O. */ - this_p_queues = dev->nr_poll_queues; - if (this_p_queues >= nr_io_queues) { - this_p_queues = nr_io_queues - 1; - irq_queues = 1; - } else { - irq_queues = nr_io_queues - this_p_queues + 1; - } - dev->io_queues[HCTX_TYPE_POLL] = this_p_queues; + poll_queues = min(dev->nr_poll_queues, nr_io_queues - 1); + dev->io_queues[HCTX_TYPE_POLL] = poll_queues; - /* Initialize for the single interrupt case */ + /* + * Initialize for the single interrupt case, will be updated in + * nvme_calc_irq_sets(). + */ dev->io_queues[HCTX_TYPE_DEFAULT] = 1; dev->io_queues[HCTX_TYPE_READ] = 0; /* - * Some Apple controllers require all queues to use the - * first vector. + * We need interrupts for the admin queue and each non-polled I/O queue, + * but some Apple controllers require all queues to use the first + * vector. */ - if (dev->ctrl.quirks & NVME_QUIRK_SINGLE_VECTOR) - irq_queues = 1; - + irq_queues = 1; + if (!(dev->ctrl.quirks & NVME_QUIRK_SINGLE_VECTOR)) + irq_queues += (nr_io_queues - poll_queues); return pci_alloc_irq_vectors_affinity(pdev, 1, irq_queues, PCI_IRQ_ALL_TYPES | PCI_IRQ_AFFINITY, &affd); }