mirror of
https://github.com/torvalds/linux.git
synced 2026-05-12 16:18:45 +02:00
io_uring-7.1-20260508
-----BEGIN PGP SIGNATURE----- iQJEBAABCAAuFiEEwPw5LcreJtl1+l5K99NY+ylx4KYFAmn+FU4QHGF4Ym9lQGtl cm5lbC5kawAKCRD301j7KXHgpnIsEADVG1zqBrfj6JE0pscyrFbFHwAjoihs7jW2 dHVRzrWKlCG0iflVAxIcoA6WQLAc1W5cmWi9GHuOHtPvWY1rdiZCvp8GMGuq40ye 3OQrMAcDdpowAUBfO9tiZ9L9Bn96HFZCa92V0PEp/fSPxuRv1HGE/yTpWsardbxn eUGBoOMAclqawMU5thfOXFMT+DetwrY//nd799iEElzyNfk92mDCZZ5n3WPyl1J4 hn/iUu04YVozto9P17SJfEOg1c4kz84wL6ATR+2IuxrWm8/LxXspmbIovJYaCLRr EkdevTrxABBTJ77dllnnaFg233F75ZdYr0z0xgHoOFT2totSz2lZFxqz8R0/b/NE mHdshkn4LTU4yDHuILDt0LxImi62i1Bmn7QQIbICcMAEhTGh2hebaM/lmmHp7IlN R98q4ALm+dTu5vp+MDlve53P4UITxsgclAICqxyY26FrNneHd/TodeDYPGYLwj4F 2EPZyzHe3WpTXMmF6pxLlEr2r8DRqZhBqj+mohN/pZK+ecs6GmCLh1F1zcuaLQyg VOPf3FY48f6EWku0gCUpOev2iFTQHICf3RC39uR5pVVdC3+Yzc2+yqa9xg+I5ckt gNTbmD1vkssaCmxIXR0Cj/pHskZxJqtdmDJmcBQfbLm9ytFuCGFK0IqpvpyRya/H jGjRGJxJ5w== =y3+7 -----END PGP SIGNATURE----- Merge tag 'io_uring-7.1-20260508' of git://git.kernel.org/pub/scm/linux/kernel/git/axboe/linux Pull io_uring fixes from Jens Axboe: - Ensure that the absolute timeouts for both the command side and the waiting side honor the callers time namespace - Ensure tracked NAPI entries are cleared at unregistration time, as the NAPI polling loop checks the list state rather than the general NAPI state. This can lead to NAPI polling even after unregistration has been done. If unregistered, all NAPI polling should be disabled - Fix for eventfd recursive invocation handling * tag 'io_uring-7.1-20260508' of git://git.kernel.org/pub/scm/linux/kernel/git/axboe/linux: io_uring/wait: honour caller's time namespace for IORING_ENTER_ABS_TIMER io_uring/timeout: honour caller's time namespace for IORING_TIMEOUT_ABS io_uring/eventfd: reset deferred signal state io_uring/napi: clear tracked NAPI entries on unregister
This commit is contained in:
commit
8be01e1280
|
|
@ -43,6 +43,7 @@ static void io_eventfd_do_signal(struct rcu_head *rcu)
|
|||
{
|
||||
struct io_ev_fd *ev_fd = container_of(rcu, struct io_ev_fd, rcu);
|
||||
|
||||
atomic_andnot(BIT(IO_EVENTFD_OP_SIGNAL_BIT), &ev_fd->ops);
|
||||
eventfd_signal_mask(ev_fd->cq_ev_fd, EPOLL_URING_WAKE);
|
||||
io_eventfd_put(ev_fd);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -38,7 +38,8 @@ static inline ktime_t net_to_ktime(unsigned long t)
|
|||
return ns_to_ktime(t << 10);
|
||||
}
|
||||
|
||||
int __io_napi_add_id(struct io_ring_ctx *ctx, unsigned int napi_id)
|
||||
int __io_napi_add_id(struct io_ring_ctx *ctx, unsigned int napi_id,
|
||||
unsigned int mode)
|
||||
{
|
||||
struct hlist_head *hash_list;
|
||||
struct io_napi_entry *e;
|
||||
|
|
@ -69,6 +70,11 @@ int __io_napi_add_id(struct io_ring_ctx *ctx, unsigned int napi_id)
|
|||
* kfree()
|
||||
*/
|
||||
spin_lock(&ctx->napi_lock);
|
||||
if (unlikely(READ_ONCE(ctx->napi_track_mode) != mode)) {
|
||||
spin_unlock(&ctx->napi_lock);
|
||||
kfree(e);
|
||||
return -EINVAL;
|
||||
}
|
||||
if (unlikely(io_napi_hash_find(hash_list, napi_id))) {
|
||||
spin_unlock(&ctx->napi_lock);
|
||||
kfree(e);
|
||||
|
|
@ -196,9 +202,14 @@ __io_napi_do_busy_loop(struct io_ring_ctx *ctx,
|
|||
bool (*loop_end)(void *, unsigned long),
|
||||
void *loop_end_arg)
|
||||
{
|
||||
if (READ_ONCE(ctx->napi_track_mode) == IO_URING_NAPI_TRACKING_STATIC)
|
||||
switch (READ_ONCE(ctx->napi_track_mode)) {
|
||||
case IO_URING_NAPI_TRACKING_STATIC:
|
||||
return static_tracking_do_busy_loop(ctx, loop_end, loop_end_arg);
|
||||
return dynamic_tracking_do_busy_loop(ctx, loop_end, loop_end_arg);
|
||||
case IO_URING_NAPI_TRACKING_DYNAMIC:
|
||||
return dynamic_tracking_do_busy_loop(ctx, loop_end, loop_end_arg);
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
static void io_napi_blocking_busy_loop(struct io_ring_ctx *ctx,
|
||||
|
|
@ -273,13 +284,13 @@ static int io_napi_register_napi(struct io_ring_ctx *ctx,
|
|||
default:
|
||||
return -EINVAL;
|
||||
}
|
||||
/* clean the napi list for new settings */
|
||||
WRITE_ONCE(ctx->napi_track_mode, IO_URING_NAPI_TRACKING_INACTIVE);
|
||||
io_napi_free(ctx);
|
||||
WRITE_ONCE(ctx->napi_track_mode, napi->op_param);
|
||||
/* cap NAPI at 10 msec of spin time */
|
||||
napi->busy_poll_to = min(10000, napi->busy_poll_to);
|
||||
WRITE_ONCE(ctx->napi_busy_poll_dt, napi->busy_poll_to * NSEC_PER_USEC);
|
||||
WRITE_ONCE(ctx->napi_prefer_busy_poll, !!napi->prefer_busy_poll);
|
||||
WRITE_ONCE(ctx->napi_track_mode, napi->op_param);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
|
@ -315,7 +326,8 @@ int io_register_napi(struct io_ring_ctx *ctx, void __user *arg)
|
|||
case IO_URING_NAPI_STATIC_ADD_ID:
|
||||
if (curr.op_param != IO_URING_NAPI_TRACKING_STATIC)
|
||||
return -EINVAL;
|
||||
return __io_napi_add_id(ctx, napi.op_param);
|
||||
return __io_napi_add_id(ctx, napi.op_param,
|
||||
IO_URING_NAPI_TRACKING_STATIC);
|
||||
case IO_URING_NAPI_STATIC_DEL_ID:
|
||||
if (curr.op_param != IO_URING_NAPI_TRACKING_STATIC)
|
||||
return -EINVAL;
|
||||
|
|
@ -343,9 +355,10 @@ int io_unregister_napi(struct io_ring_ctx *ctx, void __user *arg)
|
|||
if (arg && copy_to_user(arg, &curr, sizeof(curr)))
|
||||
return -EFAULT;
|
||||
|
||||
WRITE_ONCE(ctx->napi_track_mode, IO_URING_NAPI_TRACKING_INACTIVE);
|
||||
WRITE_ONCE(ctx->napi_busy_poll_dt, 0);
|
||||
WRITE_ONCE(ctx->napi_prefer_busy_poll, false);
|
||||
WRITE_ONCE(ctx->napi_track_mode, IO_URING_NAPI_TRACKING_INACTIVE);
|
||||
io_napi_free(ctx);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -15,7 +15,8 @@ void io_napi_free(struct io_ring_ctx *ctx);
|
|||
int io_register_napi(struct io_ring_ctx *ctx, void __user *arg);
|
||||
int io_unregister_napi(struct io_ring_ctx *ctx, void __user *arg);
|
||||
|
||||
int __io_napi_add_id(struct io_ring_ctx *ctx, unsigned int napi_id);
|
||||
int __io_napi_add_id(struct io_ring_ctx *ctx, unsigned int napi_id,
|
||||
unsigned int mode);
|
||||
|
||||
void __io_napi_busy_loop(struct io_ring_ctx *ctx, struct io_wait_queue *iowq);
|
||||
int io_napi_sqpoll_busy_poll(struct io_ring_ctx *ctx);
|
||||
|
|
@ -43,13 +44,14 @@ static inline void io_napi_add(struct io_kiocb *req)
|
|||
{
|
||||
struct io_ring_ctx *ctx = req->ctx;
|
||||
struct socket *sock;
|
||||
unsigned int mode = IO_URING_NAPI_TRACKING_DYNAMIC;
|
||||
|
||||
if (READ_ONCE(ctx->napi_track_mode) != IO_URING_NAPI_TRACKING_DYNAMIC)
|
||||
if (READ_ONCE(ctx->napi_track_mode) != mode)
|
||||
return;
|
||||
|
||||
sock = sock_from_file(req->file);
|
||||
if (sock && sock->sk)
|
||||
__io_napi_add_id(ctx, READ_ONCE(sock->sk->sk_napi_id));
|
||||
__io_napi_add_id(ctx, READ_ONCE(sock->sk->sk_napi_id), mode);
|
||||
}
|
||||
|
||||
#else
|
||||
|
|
|
|||
|
|
@ -3,6 +3,7 @@
|
|||
#include <linux/errno.h>
|
||||
#include <linux/file.h>
|
||||
#include <linux/io_uring.h>
|
||||
#include <linux/time_namespace.h>
|
||||
|
||||
#include <trace/events/io_uring.h>
|
||||
|
||||
|
|
@ -35,6 +36,22 @@ struct io_timeout_rem {
|
|||
bool ltimeout;
|
||||
};
|
||||
|
||||
static clockid_t io_flags_to_clock(unsigned flags)
|
||||
{
|
||||
switch (flags & IORING_TIMEOUT_CLOCK_MASK) {
|
||||
case IORING_TIMEOUT_BOOTTIME:
|
||||
return CLOCK_BOOTTIME;
|
||||
case IORING_TIMEOUT_REALTIME:
|
||||
return CLOCK_REALTIME;
|
||||
default:
|
||||
/* can't happen, vetted at prep time */
|
||||
WARN_ON_ONCE(1);
|
||||
fallthrough;
|
||||
case 0:
|
||||
return CLOCK_MONOTONIC;
|
||||
}
|
||||
}
|
||||
|
||||
static int io_parse_user_time(ktime_t *time, u64 arg, unsigned flags)
|
||||
{
|
||||
struct timespec64 ts;
|
||||
|
|
@ -43,7 +60,7 @@ static int io_parse_user_time(ktime_t *time, u64 arg, unsigned flags)
|
|||
*time = ns_to_ktime(arg);
|
||||
if (*time < 0)
|
||||
return -EINVAL;
|
||||
return 0;
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (get_timespec64(&ts, u64_to_user_ptr(arg)))
|
||||
|
|
@ -51,6 +68,9 @@ static int io_parse_user_time(ktime_t *time, u64 arg, unsigned flags)
|
|||
if (ts.tv_sec < 0 || ts.tv_nsec < 0)
|
||||
return -EINVAL;
|
||||
*time = timespec64_to_ktime(ts);
|
||||
out:
|
||||
if (flags & IORING_TIMEOUT_ABS)
|
||||
*time = timens_ktime_to_host(io_flags_to_clock(flags), *time);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
|
@ -399,18 +419,7 @@ static enum hrtimer_restart io_link_timeout_fn(struct hrtimer *timer)
|
|||
|
||||
static clockid_t io_timeout_get_clock(struct io_timeout_data *data)
|
||||
{
|
||||
switch (data->flags & IORING_TIMEOUT_CLOCK_MASK) {
|
||||
case IORING_TIMEOUT_BOOTTIME:
|
||||
return CLOCK_BOOTTIME;
|
||||
case IORING_TIMEOUT_REALTIME:
|
||||
return CLOCK_REALTIME;
|
||||
default:
|
||||
/* can't happen, vetted at prep time */
|
||||
WARN_ON_ONCE(1);
|
||||
fallthrough;
|
||||
case 0:
|
||||
return CLOCK_MONOTONIC;
|
||||
}
|
||||
return io_flags_to_clock(data->flags);
|
||||
}
|
||||
|
||||
static int io_linked_timeout_update(struct io_ring_ctx *ctx, __u64 user_data,
|
||||
|
|
|
|||
|
|
@ -5,6 +5,7 @@
|
|||
#include <linux/kernel.h>
|
||||
#include <linux/sched/signal.h>
|
||||
#include <linux/io_uring.h>
|
||||
#include <linux/time_namespace.h>
|
||||
|
||||
#include <trace/events/io_uring.h>
|
||||
|
||||
|
|
@ -229,7 +230,10 @@ int io_cqring_wait(struct io_ring_ctx *ctx, int min_events, u32 flags,
|
|||
|
||||
if (ext_arg->ts_set) {
|
||||
iowq.timeout = timespec64_to_ktime(ext_arg->ts);
|
||||
if (!(flags & IORING_ENTER_ABS_TIMER))
|
||||
if (flags & IORING_ENTER_ABS_TIMER)
|
||||
iowq.timeout = timens_ktime_to_host(ctx->clockid,
|
||||
iowq.timeout);
|
||||
else
|
||||
iowq.timeout = ktime_add(iowq.timeout, start_time);
|
||||
}
|
||||
|
||||
|
|
|
|||
Loading…
Reference in New Issue
Block a user