From 926245c7d22271307606c88b1fbb2539a8550e94 Mon Sep 17 00:00:00 2001 From: Maurizio Lombardi Date: Fri, 15 Oct 2021 10:26:34 +0200 Subject: [PATCH 1/6] nvmet-tcp: fix a memory leak when releasing a queue page_frag_free() won't completely release the memory allocated for the commands, the cache page must be explicitly freed by calling __page_frag_cache_drain(). This bug can be easily reproduced by repeatedly executing the following command on the initiator: $echo 1 > /sys/devices/virtual/nvme-fabrics/ctl/nvme0/reset_controller Signed-off-by: Maurizio Lombardi Reviewed-by: Sagi Grimberg Reviewed-by: John Meneghini Signed-off-by: Christoph Hellwig --- drivers/nvme/target/tcp.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/nvme/target/tcp.c b/drivers/nvme/target/tcp.c index 07ee347ea3f3..c33a0464346f 100644 --- a/drivers/nvme/target/tcp.c +++ b/drivers/nvme/target/tcp.c @@ -1428,6 +1428,7 @@ static void nvmet_tcp_uninit_data_in_cmds(struct nvmet_tcp_queue *queue) static void nvmet_tcp_release_queue_work(struct work_struct *w) { + struct page *page; struct nvmet_tcp_queue *queue = container_of(w, struct nvmet_tcp_queue, release_work); @@ -1447,6 +1448,8 @@ static void nvmet_tcp_release_queue_work(struct work_struct *w) nvmet_tcp_free_crypto(queue); ida_simple_remove(&nvmet_tcp_queue_ida, queue->idx); + page = virt_to_head_page(queue->pf_cache.va); + __page_frag_cache_drain(page, queue->pf_cache.pagecnt_bias); kfree(queue); } From 25e1f67eda4a19c91dc05c84d6d413c53efb447b Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Sun, 24 Oct 2021 10:43:31 +0300 Subject: [PATCH 2/6] nvme-tcp: fix H2CData PDU send accounting (again) We should not access request members after the last send, even to determine if indeed it was the last data payload send. The reason is that a completion could have arrived and trigger a new execution of the request which overridden these members. This was fixed by commit 825619b09ad3 ("nvme-tcp: fix possible use-after-completion"). Commit e371af033c56 broke that assumption again to address cases where multiple r2t pdus are sent per request. To fix it, we need to record the request data_sent and data_len and after the payload network send we reference these counters to determine weather we should advance the request iterator. Fixes: e371af033c56 ("nvme-tcp: fix incorrect h2cdata pdu offset accounting") Reported-by: Keith Busch Cc: stable@vger.kernel.org # 5.10+ Signed-off-by: Sagi Grimberg Reviewed-by: Keith Busch Signed-off-by: Christoph Hellwig --- drivers/nvme/host/tcp.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c index 3c1c29dd3020..0626d14e6d4c 100644 --- a/drivers/nvme/host/tcp.c +++ b/drivers/nvme/host/tcp.c @@ -926,12 +926,14 @@ static void nvme_tcp_fail_request(struct nvme_tcp_request *req) static int nvme_tcp_try_send_data(struct nvme_tcp_request *req) { struct nvme_tcp_queue *queue = req->queue; + int req_data_len = req->data_len; while (true) { struct page *page = nvme_tcp_req_cur_page(req); size_t offset = nvme_tcp_req_cur_offset(req); size_t len = nvme_tcp_req_cur_length(req); bool last = nvme_tcp_pdu_last_send(req, len); + int req_data_sent = req->data_sent; int ret, flags = MSG_DONTWAIT; if (last && !queue->data_digest && !nvme_tcp_queue_more(queue)) @@ -958,7 +960,7 @@ static int nvme_tcp_try_send_data(struct nvme_tcp_request *req) * in the request where we don't want to modify it as we may * compete with the RX path completing the request. */ - if (req->data_sent + ret < req->data_len) + if (req_data_sent + ret < req_data_len) nvme_tcp_advance_req(req, ret); /* fully successful last send in current PDU */ From ce7723e9cdae4eb3030da082876580f4b2dc0861 Mon Sep 17 00:00:00 2001 From: Varun Prakash Date: Tue, 26 Oct 2021 19:01:55 +0530 Subject: [PATCH 3/6] nvme-tcp: fix possible req->offset corruption With commit db5ad6b7f8cd ("nvme-tcp: try to send request in queue_rq context") r2t and response PDU can get processed while send function is executing. Current data digest send code uses req->offset after kernel_sendmsg(), this creates a race condition where req->offset gets reset before it is used in send function. This can happen in two cases - 1. Target sends r2t PDU which resets req->offset. 2. Target send response PDU which completes the req and then req is used for a new command, nvme_tcp_setup_cmd_pdu() resets req->offset. Fix this by storing req->offset in a local variable and using this local variable after kernel_sendmsg(). Fixes: db5ad6b7f8cd ("nvme-tcp: try to send request in queue_rq context") Signed-off-by: Varun Prakash Reviewed-by: Keith Busch Reviewed-by: Sagi Grimberg Signed-off-by: Christoph Hellwig --- drivers/nvme/host/tcp.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c index 0626d14e6d4c..1a209f0d7181 100644 --- a/drivers/nvme/host/tcp.c +++ b/drivers/nvme/host/tcp.c @@ -1050,6 +1050,7 @@ static int nvme_tcp_try_send_data_pdu(struct nvme_tcp_request *req) static int nvme_tcp_try_send_ddgst(struct nvme_tcp_request *req) { struct nvme_tcp_queue *queue = req->queue; + size_t offset = req->offset; int ret; struct msghdr msg = { .msg_flags = MSG_DONTWAIT }; struct kvec iov = { @@ -1066,7 +1067,7 @@ static int nvme_tcp_try_send_ddgst(struct nvme_tcp_request *req) if (unlikely(ret <= 0)) return ret; - if (req->offset + ret == NVME_TCP_DIGEST_LENGTH) { + if (offset + ret == NVME_TCP_DIGEST_LENGTH) { nvme_tcp_done_send_req(queue); return 1; } From d89b9f3bbb58e9e378881209756b0723694f22ff Mon Sep 17 00:00:00 2001 From: Varun Prakash Date: Mon, 25 Oct 2021 22:47:30 +0530 Subject: [PATCH 4/6] nvme-tcp: fix data digest pointer calculation ddgst is of type __le32, &req->ddgst + req->offset increases &req->ddgst by 4 * req->offset, fix this by type casting &req->ddgst to u8 *. Fixes: 3f2304f8c6d6 ("nvme-tcp: add NVMe over TCP host driver") Signed-off-by: Varun Prakash Reviewed-by: Sagi Grimberg Signed-off-by: Christoph Hellwig --- drivers/nvme/host/tcp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c index 1a209f0d7181..4ae562d30d2b 100644 --- a/drivers/nvme/host/tcp.c +++ b/drivers/nvme/host/tcp.c @@ -1054,7 +1054,7 @@ static int nvme_tcp_try_send_ddgst(struct nvme_tcp_request *req) int ret; struct msghdr msg = { .msg_flags = MSG_DONTWAIT }; struct kvec iov = { - .iov_base = &req->ddgst + req->offset, + .iov_base = (u8 *)&req->ddgst + req->offset, .iov_len = NVME_TCP_DIGEST_LENGTH - req->offset }; From e790de54e94a7a15fb725b34724d41d41cbaa60c Mon Sep 17 00:00:00 2001 From: Varun Prakash Date: Mon, 25 Oct 2021 22:46:54 +0530 Subject: [PATCH 5/6] nvmet-tcp: fix data digest pointer calculation exp_ddgst is of type __le32, &cmd->exp_ddgst + cmd->offset increases &cmd->exp_ddgst by 4 * cmd->offset, fix this by type casting &cmd->exp_ddgst to u8 *. Fixes: 872d26a391da ("nvmet-tcp: add NVMe over TCP target driver") Signed-off-by: Varun Prakash Reviewed-by: Sagi Grimberg Signed-off-by: Christoph Hellwig --- drivers/nvme/target/tcp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/nvme/target/tcp.c b/drivers/nvme/target/tcp.c index c33a0464346f..586ca20837e7 100644 --- a/drivers/nvme/target/tcp.c +++ b/drivers/nvme/target/tcp.c @@ -702,7 +702,7 @@ static int nvmet_try_send_ddgst(struct nvmet_tcp_cmd *cmd, bool last_in_batch) struct nvmet_tcp_queue *queue = cmd->queue; struct msghdr msg = { .msg_flags = MSG_DONTWAIT }; struct kvec iov = { - .iov_base = &cmd->exp_ddgst + cmd->offset, + .iov_base = (u8 *)&cmd->exp_ddgst + cmd->offset, .iov_len = NVME_TCP_DIGEST_LENGTH - cmd->offset }; int ret; From 86aeda32b887cdaeb0f4b7bfc9971e36377181c7 Mon Sep 17 00:00:00 2001 From: Amit Engel Date: Wed, 27 Oct 2021 09:49:27 +0300 Subject: [PATCH 6/6] nvmet-tcp: fix header digest verification Pass the correct length to nvmet_tcp_verify_hdgst, which is the pdu header length. This fixes a wrong behaviour where header digest verification passes although the digest is wrong. Signed-off-by: Amit Engel Reviewed-by: Sagi Grimberg Signed-off-by: Christoph Hellwig --- drivers/nvme/target/tcp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/nvme/target/tcp.c b/drivers/nvme/target/tcp.c index 586ca20837e7..46c3b3be7e03 100644 --- a/drivers/nvme/target/tcp.c +++ b/drivers/nvme/target/tcp.c @@ -1096,7 +1096,7 @@ static int nvmet_tcp_try_recv_pdu(struct nvmet_tcp_queue *queue) } if (queue->hdr_digest && - nvmet_tcp_verify_hdgst(queue, &queue->pdu, queue->offset)) { + nvmet_tcp_verify_hdgst(queue, &queue->pdu, hdr->hlen)) { nvmet_tcp_fatal_error(queue); /* fatal */ return -EPROTO; }