From 01e68ce08a30db3d842ce7a55f7f6e0474a55f9a Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Wed, 8 Mar 2023 07:18:51 -0700 Subject: [PATCH 1/3] io_uring/io-wq: stop setting PF_NO_SETAFFINITY on io-wq workers Every now and then reports come in that are puzzled on why changing affinity on the io-wq workers fails with EINVAL. This happens because they set PF_NO_SETAFFINITY as part of their creation, as io-wq organizes workers into groups based on what CPU they are running on. However, this is purely an optimization and not a functional requirement. We can allow setting affinity, and just lazily update our worker to wqe mappings. If a given io-wq thread times out, it normally exits if there's no more work to do. The exception is if it's the last worker available. For the timeout case, check the affinity of the worker against group mask and exit even if it's the last worker. New workers should be created with the right mask and in the right location. Reported-by:Daniel Dao Link: https://lore.kernel.org/io-uring/CA+wXwBQwgxB3_UphSny-yAP5b26meeOu1W4TwYVcD_+5gOhvPw@mail.gmail.com/ Signed-off-by: Jens Axboe --- io_uring/io-wq.c | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/io_uring/io-wq.c b/io_uring/io-wq.c index 411bb2d1acd4..f81c0a7136a5 100644 --- a/io_uring/io-wq.c +++ b/io_uring/io-wq.c @@ -616,7 +616,7 @@ static int io_wqe_worker(void *data) struct io_wqe_acct *acct = io_wqe_get_acct(worker); struct io_wqe *wqe = worker->wqe; struct io_wq *wq = wqe->wq; - bool last_timeout = false; + bool exit_mask = false, last_timeout = false; char buf[TASK_COMM_LEN]; worker->flags |= (IO_WORKER_F_UP | IO_WORKER_F_RUNNING); @@ -632,8 +632,11 @@ static int io_wqe_worker(void *data) io_worker_handle_work(worker); raw_spin_lock(&wqe->lock); - /* timed out, exit unless we're the last worker */ - if (last_timeout && acct->nr_workers > 1) { + /* + * Last sleep timed out. Exit if we're not the last worker, + * or if someone modified our affinity. + */ + if (last_timeout && (exit_mask || acct->nr_workers > 1)) { acct->nr_workers--; raw_spin_unlock(&wqe->lock); __set_current_state(TASK_RUNNING); @@ -652,7 +655,11 @@ static int io_wqe_worker(void *data) continue; break; } - last_timeout = !ret; + if (!ret) { + last_timeout = true; + exit_mask = !cpumask_test_cpu(raw_smp_processor_id(), + wqe->cpu_mask); + } } if (test_bit(IO_WQ_BIT_EXIT, &wq->state)) @@ -704,7 +711,6 @@ static void io_init_new_worker(struct io_wqe *wqe, struct io_worker *worker, tsk->worker_private = worker; worker->task = tsk; set_cpus_allowed_ptr(tsk, wqe->cpu_mask); - tsk->flags |= PF_NO_SETAFFINITY; raw_spin_lock(&wqe->lock); hlist_nulls_add_head_rcu(&worker->nulls_node, &wqe->free_list); From 03b3d6be73e81ddb7c2930d942cdd17f4cfd5ba5 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Wed, 8 Mar 2023 09:26:13 -0700 Subject: [PATCH 2/3] io_uring/uring_cmd: ensure that device supports IOPOLL It's possible for a file type to support uring commands, but not pollable ones. Hence before issuing one of those, we should check that it is supported and error out upfront if it isn't. Cc: stable@vger.kernel.org Fixes: 5756a3a7e713 ("io_uring: add iopoll infrastructure for io_uring_cmd") Link: https://github.com/axboe/liburing/issues/816 Reviewed-by: Kanchan Joshi Signed-off-by: Jens Axboe --- io_uring/uring_cmd.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/io_uring/uring_cmd.c b/io_uring/uring_cmd.c index 446a189b78b0..2e4c483075d3 100644 --- a/io_uring/uring_cmd.c +++ b/io_uring/uring_cmd.c @@ -108,7 +108,7 @@ int io_uring_cmd(struct io_kiocb *req, unsigned int issue_flags) struct file *file = req->file; int ret; - if (!req->file->f_op->uring_cmd) + if (!file->f_op->uring_cmd) return -EOPNOTSUPP; ret = security_uring_cmd(ioucmd); @@ -120,6 +120,8 @@ int io_uring_cmd(struct io_kiocb *req, unsigned int issue_flags) if (ctx->flags & IORING_SETUP_CQE32) issue_flags |= IO_URING_F_CQE32; if (ctx->flags & IORING_SETUP_IOPOLL) { + if (!file->f_op->uring_cmd_iopoll) + return -EOPNOTSUPP; issue_flags |= IO_URING_F_IOPOLL; req->iopoll_completed = 0; WRITE_ONCE(ioucmd->cookie, NULL); From fa780334a8c392d959ae05eb19f2410b3a1e6cb0 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Thu, 9 Mar 2023 09:51:13 -0700 Subject: [PATCH 3/3] =?UTF-8?q?io=5Furing:=20silence=20variable=20?= =?UTF-8?q?=E2=80=98prev=E2=80=99=20set=20but=20not=20used=20warning?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If io_uring.o is built with W=1, it triggers a warning: io_uring/io_uring.c: In function ‘__io_submit_flush_completions’: io_uring/io_uring.c:1502:40: warning: variable ‘prev’ set but not used [-Wunused-but-set-variable] 1502 | struct io_wq_work_node *node, *prev; | ^~~~ which is due to the wq_list_for_each() iterator always keeping a 'prev' variable. Most users need this to remove an entry from a list, for example, but __io_submit_flush_completions() never does that. Add a basic helper that doesn't track prev instead, and use that in that function. Reported-by: Vincenzo Palazzo Reviewed-by: Vincenzo Palazzo Signed-off-by: Jens Axboe --- io_uring/io_uring.c | 4 ++-- io_uring/slist.h | 5 ++++- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c index fd1cc35a1c00..722624b6d0dc 100644 --- a/io_uring/io_uring.c +++ b/io_uring/io_uring.c @@ -1499,14 +1499,14 @@ void io_free_batch_list(struct io_ring_ctx *ctx, struct io_wq_work_node *node) static void __io_submit_flush_completions(struct io_ring_ctx *ctx) __must_hold(&ctx->uring_lock) { - struct io_wq_work_node *node, *prev; struct io_submit_state *state = &ctx->submit_state; + struct io_wq_work_node *node; __io_cq_lock(ctx); /* must come first to preserve CQE ordering in failure cases */ if (state->cqes_count) __io_flush_post_cqes(ctx); - wq_list_for_each(node, prev, &state->compl_reqs) { + __wq_list_for_each(node, &state->compl_reqs) { struct io_kiocb *req = container_of(node, struct io_kiocb, comp_list); diff --git a/io_uring/slist.h b/io_uring/slist.h index 7c198a40d5f1..0eb194817242 100644 --- a/io_uring/slist.h +++ b/io_uring/slist.h @@ -3,6 +3,9 @@ #include +#define __wq_list_for_each(pos, head) \ + for (pos = (head)->first; pos; pos = (pos)->next) + #define wq_list_for_each(pos, prv, head) \ for (pos = (head)->first, prv = NULL; pos; prv = pos, pos = (pos)->next) @@ -113,4 +116,4 @@ static inline struct io_wq_work *wq_next_work(struct io_wq_work *work) return container_of(work->list.next, struct io_wq_work, list); } -#endif // INTERNAL_IO_SLIST_H \ No newline at end of file +#endif // INTERNAL_IO_SLIST_H