From 0fa0f99fc84e41057cbdd2efbfe91c6b2f47dd9d Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Tue, 1 Feb 2022 14:54:19 +0200 Subject: [PATCH 1/4] nvme: fix a possible use-after-free in controller reset during load Unlike .queue_rq, in .submit_async_event drivers may not check the ctrl readiness for AER submission. This may lead to a use-after-free condition that was observed with nvme-tcp. The race condition may happen in the following scenario: 1. driver executes its reset_ctrl_work 2. -> nvme_stop_ctrl - flushes ctrl async_event_work 3. ctrl sends AEN which is received by the host, which in turn schedules AEN handling 4. teardown admin queue (which releases the queue socket) 5. AEN processed, submits another AER, calling the driver to submit 6. driver attempts to send the cmd ==> use-after-free In order to fix that, add ctrl state check to validate the ctrl is actually able to accept the AER submission. This addresses the above race in controller resets because the driver during teardown should: 1. change ctrl state to RESETTING 2. flush async_event_work (as well as other async work elements) So after 1,2, any other AER command will find the ctrl state to be RESETTING and bail out without submitting the AER. Signed-off-by: Sagi Grimberg --- drivers/nvme/host/core.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 5e0bfda04bd7..961a5f8a44d2 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -4253,7 +4253,14 @@ static void nvme_async_event_work(struct work_struct *work) container_of(work, struct nvme_ctrl, async_event_work); nvme_aen_uevent(ctrl); - ctrl->ops->submit_async_event(ctrl); + + /* + * The transport drivers must guarantee AER submission here is safe by + * flushing ctrl async_event_work after changing the controller state + * from LIVE and before freeing the admin queue. + */ + if (ctrl->state == NVME_CTRL_LIVE) + ctrl->ops->submit_async_event(ctrl); } static bool nvme_ctrl_pp_status(struct nvme_ctrl *ctrl) From ff9fc7ebf5c06de1ef72a69f9b1ab40af8b07f9e Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Tue, 1 Feb 2022 14:54:20 +0200 Subject: [PATCH 2/4] nvme-tcp: fix possible use-after-free in transport error_recovery work While nvme_tcp_submit_async_event_work is checking the ctrl and queue state before preparing the AER command and scheduling io_work, in order to fully prevent a race where this check is not reliable the error recovery work must flush async_event_work before continuing to destroy the admin queue after setting the ctrl state to RESETTING such that there is no race .submit_async_event and the error recovery handler itself changing the ctrl state. Tested-by: Chris Leech Signed-off-by: Sagi Grimberg --- drivers/nvme/host/tcp.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c index 4ceb28675fdf..01e24b5703db 100644 --- a/drivers/nvme/host/tcp.c +++ b/drivers/nvme/host/tcp.c @@ -2096,6 +2096,7 @@ static void nvme_tcp_error_recovery_work(struct work_struct *work) struct nvme_ctrl *ctrl = &tcp_ctrl->ctrl; nvme_stop_keep_alive(ctrl); + flush_work(&ctrl->async_event_work); nvme_tcp_teardown_io_queues(ctrl, false); /* unquiesce to fail fast pending requests */ nvme_start_queues(ctrl); From b6bb1722f34bbdbabed27acdceaf585d300c5fd2 Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Tue, 1 Feb 2022 14:54:21 +0200 Subject: [PATCH 3/4] nvme-rdma: fix possible use-after-free in transport error_recovery work While nvme_rdma_submit_async_event_work is checking the ctrl and queue state before preparing the AER command and scheduling io_work, in order to fully prevent a race where this check is not reliable the error recovery work must flush async_event_work before continuing to destroy the admin queue after setting the ctrl state to RESETTING such that there is no race .submit_async_event and the error recovery handler itself changing the ctrl state. Signed-off-by: Sagi Grimberg --- drivers/nvme/host/rdma.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index 850f84d204d0..9c55e4be8a39 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -1200,6 +1200,7 @@ static void nvme_rdma_error_recovery_work(struct work_struct *work) struct nvme_rdma_ctrl, err_work); nvme_stop_keep_alive(&ctrl->ctrl); + flush_work(&ctrl->ctrl.async_event_work); nvme_rdma_teardown_io_queues(ctrl, false); nvme_start_queues(&ctrl->ctrl); nvme_rdma_teardown_admin_queue(ctrl, false); From 6a51abdeb259a56d95f13cc67e3a0838bcda0377 Mon Sep 17 00:00:00 2001 From: Uday Shankar Date: Thu, 20 Jan 2022 12:17:37 -0800 Subject: [PATCH 4/4] nvme-fabrics: fix state check in nvmf_ctlr_matches_baseopts() Controller deletion/reset, immediately followed by or concurrent with a reconnect, is hard failing the connect attempt resulting in a complete loss of connectivity to the controller. In the connect request, fabrics looks for an existing controller with the same address components and aborts the connect if a controller already exists and the duplicate connect option isn't set. The match routine filters out controllers that are dead or dying, so they don't interfere with the new connect request. When NVME_CTRL_DELETING_NOIO was added, it missed updating the state filters in the nvmf_ctlr_matches_baseopts() routine. Thus, when in this new state, it's seen as a live controller and fails the connect request. Correct by adding the DELETING_NIO state to the match checks. Fixes: ecca390e8056 ("nvme: fix deadlock in disconnect during scan_work and/or ana_work") Cc: # v5.7+ Signed-off-by: Uday Shankar Reviewed-by: James Smart Reviewed-by: Sagi Grimberg Signed-off-by: Christoph Hellwig --- drivers/nvme/host/fabrics.h | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/nvme/host/fabrics.h b/drivers/nvme/host/fabrics.h index c3203ff1c654..1e3a09cad961 100644 --- a/drivers/nvme/host/fabrics.h +++ b/drivers/nvme/host/fabrics.h @@ -170,6 +170,7 @@ nvmf_ctlr_matches_baseopts(struct nvme_ctrl *ctrl, struct nvmf_ctrl_options *opts) { if (ctrl->state == NVME_CTRL_DELETING || + ctrl->state == NVME_CTRL_DELETING_NOIO || ctrl->state == NVME_CTRL_DEAD || strcmp(opts->subsysnqn, ctrl->opts->subsysnqn) || strcmp(opts->host->nqn, ctrl->opts->host->nqn) ||