io_uring/napi: clear tracked NAPI entries on unregister

IORING_UNREGISTER_NAPI disables NAPI busy polling, but it currently
leaves any previously tracked NAPI IDs on the ring context. The normal
wait path only checks whether the list is empty before entering the busy
poll helper, so an unregistered ring can still observe stale entries and
run an unexpected busy poll pass.

Make unregister switch the context to inactive and free the tracked
entries. Do the same inactive transition while changing the tracking
strategy, and recheck the expected tracking mode under napi_lock before
inserting a newly learned NAPI ID. This prevents a racing poll path from
repopulating the list after unregister or reconfiguration.

Also make the busy poll dispatcher ignore inactive mode explicitly.

Signed-off-by: Yufan Chen <ericterminal@gmail.com>
Fixes: 6bf90bd8c5 ("io_uring/napi: add static napi tracking strategy")
Link: https://patch.msgid.link/20260503175610.35521-1-yufan.chen@linux.dev
Signed-off-by: Jens Axboe <axboe@kernel.dk>
This commit is contained in:
Yufan Chen 2026-05-04 01:56:10 +08:00 committed by Jens Axboe
parent 17666e2d75
commit b8c2e9e276
2 changed files with 25 additions and 10 deletions

View File

@ -38,7 +38,8 @@ static inline ktime_t net_to_ktime(unsigned long t)
return ns_to_ktime(t << 10);
}
int __io_napi_add_id(struct io_ring_ctx *ctx, unsigned int napi_id)
int __io_napi_add_id(struct io_ring_ctx *ctx, unsigned int napi_id,
unsigned int mode)
{
struct hlist_head *hash_list;
struct io_napi_entry *e;
@ -69,6 +70,11 @@ int __io_napi_add_id(struct io_ring_ctx *ctx, unsigned int napi_id)
* kfree()
*/
spin_lock(&ctx->napi_lock);
if (unlikely(READ_ONCE(ctx->napi_track_mode) != mode)) {
spin_unlock(&ctx->napi_lock);
kfree(e);
return -EINVAL;
}
if (unlikely(io_napi_hash_find(hash_list, napi_id))) {
spin_unlock(&ctx->napi_lock);
kfree(e);
@ -196,9 +202,14 @@ __io_napi_do_busy_loop(struct io_ring_ctx *ctx,
bool (*loop_end)(void *, unsigned long),
void *loop_end_arg)
{
if (READ_ONCE(ctx->napi_track_mode) == IO_URING_NAPI_TRACKING_STATIC)
switch (READ_ONCE(ctx->napi_track_mode)) {
case IO_URING_NAPI_TRACKING_STATIC:
return static_tracking_do_busy_loop(ctx, loop_end, loop_end_arg);
return dynamic_tracking_do_busy_loop(ctx, loop_end, loop_end_arg);
case IO_URING_NAPI_TRACKING_DYNAMIC:
return dynamic_tracking_do_busy_loop(ctx, loop_end, loop_end_arg);
default:
return false;
}
}
static void io_napi_blocking_busy_loop(struct io_ring_ctx *ctx,
@ -273,13 +284,13 @@ static int io_napi_register_napi(struct io_ring_ctx *ctx,
default:
return -EINVAL;
}
/* clean the napi list for new settings */
WRITE_ONCE(ctx->napi_track_mode, IO_URING_NAPI_TRACKING_INACTIVE);
io_napi_free(ctx);
WRITE_ONCE(ctx->napi_track_mode, napi->op_param);
/* cap NAPI at 10 msec of spin time */
napi->busy_poll_to = min(10000, napi->busy_poll_to);
WRITE_ONCE(ctx->napi_busy_poll_dt, napi->busy_poll_to * NSEC_PER_USEC);
WRITE_ONCE(ctx->napi_prefer_busy_poll, !!napi->prefer_busy_poll);
WRITE_ONCE(ctx->napi_track_mode, napi->op_param);
return 0;
}
@ -315,7 +326,8 @@ int io_register_napi(struct io_ring_ctx *ctx, void __user *arg)
case IO_URING_NAPI_STATIC_ADD_ID:
if (curr.op_param != IO_URING_NAPI_TRACKING_STATIC)
return -EINVAL;
return __io_napi_add_id(ctx, napi.op_param);
return __io_napi_add_id(ctx, napi.op_param,
IO_URING_NAPI_TRACKING_STATIC);
case IO_URING_NAPI_STATIC_DEL_ID:
if (curr.op_param != IO_URING_NAPI_TRACKING_STATIC)
return -EINVAL;
@ -343,9 +355,10 @@ int io_unregister_napi(struct io_ring_ctx *ctx, void __user *arg)
if (arg && copy_to_user(arg, &curr, sizeof(curr)))
return -EFAULT;
WRITE_ONCE(ctx->napi_track_mode, IO_URING_NAPI_TRACKING_INACTIVE);
WRITE_ONCE(ctx->napi_busy_poll_dt, 0);
WRITE_ONCE(ctx->napi_prefer_busy_poll, false);
WRITE_ONCE(ctx->napi_track_mode, IO_URING_NAPI_TRACKING_INACTIVE);
io_napi_free(ctx);
return 0;
}

View File

@ -15,7 +15,8 @@ void io_napi_free(struct io_ring_ctx *ctx);
int io_register_napi(struct io_ring_ctx *ctx, void __user *arg);
int io_unregister_napi(struct io_ring_ctx *ctx, void __user *arg);
int __io_napi_add_id(struct io_ring_ctx *ctx, unsigned int napi_id);
int __io_napi_add_id(struct io_ring_ctx *ctx, unsigned int napi_id,
unsigned int mode);
void __io_napi_busy_loop(struct io_ring_ctx *ctx, struct io_wait_queue *iowq);
int io_napi_sqpoll_busy_poll(struct io_ring_ctx *ctx);
@ -43,13 +44,14 @@ static inline void io_napi_add(struct io_kiocb *req)
{
struct io_ring_ctx *ctx = req->ctx;
struct socket *sock;
unsigned int mode = IO_URING_NAPI_TRACKING_DYNAMIC;
if (READ_ONCE(ctx->napi_track_mode) != IO_URING_NAPI_TRACKING_DYNAMIC)
if (READ_ONCE(ctx->napi_track_mode) != mode)
return;
sock = sock_from_file(req->file);
if (sock && sock->sk)
__io_napi_add_id(ctx, READ_ONCE(sock->sk->sk_napi_id));
__io_napi_add_id(ctx, READ_ONCE(sock->sk->sk_napi_id), mode);
}
#else