mirror of
https://github.com/torvalds/linux.git
synced 2026-05-12 16:18:45 +02:00
io_uring-7.0-20260403
-----BEGIN PGP SIGNATURE----- iQJEBAABCAAuFiEEwPw5LcreJtl1+l5K99NY+ylx4KYFAmnPokUQHGF4Ym9lQGtl cm5lbC5kawAKCRD301j7KXHgpjW6D/91Xg/mGvYUBVvwEhP0ydPncuAsThnkoDHY 6Pu+VxawKW480yAC06nktAeDgJNnpFpJXatPEtk2n8r7Ol3Cx0sDWdQjzoKSlBC7 9wj+MVpCcU970Gb1G6PNLKQoW+DxKuD9Iq6Ph434uCx/bgC2EKthj0vYpssoU48S OxyFGBTjhbgnmiZaAEMHpLC/LJP27eH24QQbobeVWyY7C6jy6YI0WQaoG4Qt+UMd S2XdFe97xrVaCVS3E5X5BAyHCcMX4e1D6/Y7bNDGG3Ke673RuUJHhqvk8P1NJnTI CaMlfoGhNw36FpkzTYIvoZlkCFl48axXmscRcekTg4d9ssnY9aSFVY+xMSHmkhKu zs1r1tZK970xUbQK0NAoD9T+LsFKU1S0PaEaCL2KMHwz9vG0uY7iUYteKqdM8L/f jUpYcxn9R6AhdeL77eEu3w6vCdMqP2+OgDv1uEpyJv6oWSdhfI38+EIwmMoEq+Az BkDipYNh4lAiI23qbS9CDe5aam6pv+hwecDn3x7MZVpGZ6cJjs43QWwk+jZz+KQj gacQu01q/TN7rpyaFYhkxPGKHVs259/uSLJY647ORgpJNXg4a+6DlB7/4YdW35il O4gnKECSflmoePm7B4QFh8Q89XPma74hVqtDB7opz0xL3PQMq07EQKQmoNP7RTOp GLW71uD2MA== =q/wK -----END PGP SIGNATURE----- Merge tag 'io_uring-7.0-20260403' of git://git.kernel.org/pub/scm/linux/kernel/git/axboe/linux Pull io_uring fixes from Jens Axboe: - A previous fix in this release covered the case of the rings being RCU protected during resize, but it missed a few spots. This covers the rest - Fix the cBPF filters when COW'ed, introduced in this merge window - Fix for an attempt to import a zero sized buffer - Fix for a missing clamp in importing bundle buffers * tag 'io_uring-7.0-20260403' of git://git.kernel.org/pub/scm/linux/kernel/git/axboe/linux: io_uring/bpf_filters: retain COW'ed settings on parse failures io_uring: protect remaining lockless ctx->rings accesses with RCU io_uring/rsrc: reject zero-length fixed buffer import io_uring/net: fix slab-out-of-bounds read in io_bundle_nbufs()
This commit is contained in:
commit
e41255ce7a
|
|
@ -2015,7 +2015,7 @@ int io_submit_sqes(struct io_ring_ctx *ctx, unsigned int nr)
|
|||
if (ctx->flags & IORING_SETUP_SQ_REWIND)
|
||||
entries = ctx->sq_entries;
|
||||
else
|
||||
entries = io_sqring_entries(ctx);
|
||||
entries = __io_sqring_entries(ctx);
|
||||
|
||||
entries = min(nr, entries);
|
||||
if (unlikely(!entries))
|
||||
|
|
@ -2250,7 +2250,9 @@ static __poll_t io_uring_poll(struct file *file, poll_table *wait)
|
|||
*/
|
||||
poll_wait(file, &ctx->poll_wq, wait);
|
||||
|
||||
if (!io_sqring_full(ctx))
|
||||
rcu_read_lock();
|
||||
|
||||
if (!__io_sqring_full(ctx))
|
||||
mask |= EPOLLOUT | EPOLLWRNORM;
|
||||
|
||||
/*
|
||||
|
|
@ -2270,6 +2272,7 @@ static __poll_t io_uring_poll(struct file *file, poll_table *wait)
|
|||
if (__io_cqring_events_user(ctx) || io_has_work(ctx))
|
||||
mask |= EPOLLIN | EPOLLRDNORM;
|
||||
|
||||
rcu_read_unlock();
|
||||
return mask;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -142,16 +142,28 @@ struct io_wait_queue {
|
|||
#endif
|
||||
};
|
||||
|
||||
static inline struct io_rings *io_get_rings(struct io_ring_ctx *ctx)
|
||||
{
|
||||
return rcu_dereference_check(ctx->rings_rcu,
|
||||
lockdep_is_held(&ctx->uring_lock) ||
|
||||
lockdep_is_held(&ctx->completion_lock));
|
||||
}
|
||||
|
||||
static inline bool io_should_wake(struct io_wait_queue *iowq)
|
||||
{
|
||||
struct io_ring_ctx *ctx = iowq->ctx;
|
||||
int dist = READ_ONCE(ctx->rings->cq.tail) - (int) iowq->cq_tail;
|
||||
struct io_rings *rings;
|
||||
int dist;
|
||||
|
||||
guard(rcu)();
|
||||
rings = io_get_rings(ctx);
|
||||
|
||||
/*
|
||||
* Wake up if we have enough events, or if a timeout occurred since we
|
||||
* started waiting. For timeouts, we always want to return to userspace,
|
||||
* regardless of event count.
|
||||
*/
|
||||
dist = READ_ONCE(rings->cq.tail) - (int) iowq->cq_tail;
|
||||
return dist >= 0 || atomic_read(&ctx->cq_timeouts) != iowq->nr_timeouts;
|
||||
}
|
||||
|
||||
|
|
@ -431,9 +443,9 @@ static inline void io_cqring_wake(struct io_ring_ctx *ctx)
|
|||
__io_wq_wake(&ctx->cq_wait);
|
||||
}
|
||||
|
||||
static inline bool io_sqring_full(struct io_ring_ctx *ctx)
|
||||
static inline bool __io_sqring_full(struct io_ring_ctx *ctx)
|
||||
{
|
||||
struct io_rings *r = ctx->rings;
|
||||
struct io_rings *r = io_get_rings(ctx);
|
||||
|
||||
/*
|
||||
* SQPOLL must use the actual sqring head, as using the cached_sq_head
|
||||
|
|
@ -445,9 +457,15 @@ static inline bool io_sqring_full(struct io_ring_ctx *ctx)
|
|||
return READ_ONCE(r->sq.tail) - READ_ONCE(r->sq.head) == ctx->sq_entries;
|
||||
}
|
||||
|
||||
static inline unsigned int io_sqring_entries(struct io_ring_ctx *ctx)
|
||||
static inline bool io_sqring_full(struct io_ring_ctx *ctx)
|
||||
{
|
||||
struct io_rings *rings = ctx->rings;
|
||||
guard(rcu)();
|
||||
return __io_sqring_full(ctx);
|
||||
}
|
||||
|
||||
static inline unsigned int __io_sqring_entries(struct io_ring_ctx *ctx)
|
||||
{
|
||||
struct io_rings *rings = io_get_rings(ctx);
|
||||
unsigned int entries;
|
||||
|
||||
/* make sure SQ entry isn't read before tail */
|
||||
|
|
@ -455,6 +473,12 @@ static inline unsigned int io_sqring_entries(struct io_ring_ctx *ctx)
|
|||
return min(entries, ctx->sq_entries);
|
||||
}
|
||||
|
||||
static inline unsigned int io_sqring_entries(struct io_ring_ctx *ctx)
|
||||
{
|
||||
guard(rcu)();
|
||||
return __io_sqring_entries(ctx);
|
||||
}
|
||||
|
||||
/*
|
||||
* Don't complete immediately but use deferred completion infrastructure.
|
||||
* Protected by ->uring_lock and can only be used either with
|
||||
|
|
|
|||
|
|
@ -421,6 +421,8 @@ int io_sendmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
|
|||
|
||||
sr->done_io = 0;
|
||||
sr->len = READ_ONCE(sqe->len);
|
||||
if (unlikely(sr->len < 0))
|
||||
return -EINVAL;
|
||||
sr->flags = READ_ONCE(sqe->ioprio);
|
||||
if (sr->flags & ~SENDMSG_FLAGS)
|
||||
return -EINVAL;
|
||||
|
|
@ -791,6 +793,8 @@ int io_recvmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
|
|||
|
||||
sr->umsg = u64_to_user_ptr(READ_ONCE(sqe->addr));
|
||||
sr->len = READ_ONCE(sqe->len);
|
||||
if (unlikely(sr->len < 0))
|
||||
return -EINVAL;
|
||||
sr->flags = READ_ONCE(sqe->ioprio);
|
||||
if (sr->flags & ~RECVMSG_FLAGS)
|
||||
return -EINVAL;
|
||||
|
|
|
|||
|
|
@ -178,9 +178,17 @@ static __cold int io_register_restrictions(struct io_ring_ctx *ctx,
|
|||
return -EBUSY;
|
||||
|
||||
ret = io_parse_restrictions(arg, nr_args, &ctx->restrictions);
|
||||
/* Reset all restrictions if an error happened */
|
||||
/*
|
||||
* Reset all restrictions if an error happened, but retain any COW'ed
|
||||
* settings.
|
||||
*/
|
||||
if (ret < 0) {
|
||||
struct io_bpf_filters *bpf = ctx->restrictions.bpf_filters;
|
||||
bool cowed = ctx->restrictions.bpf_filters_cow;
|
||||
|
||||
memset(&ctx->restrictions, 0, sizeof(ctx->restrictions));
|
||||
ctx->restrictions.bpf_filters = bpf;
|
||||
ctx->restrictions.bpf_filters_cow = cowed;
|
||||
return ret;
|
||||
}
|
||||
if (ctx->restrictions.op_registered)
|
||||
|
|
|
|||
|
|
@ -1061,6 +1061,10 @@ static int io_import_fixed(int ddir, struct iov_iter *iter,
|
|||
return ret;
|
||||
if (!(imu->dir & (1 << ddir)))
|
||||
return -EFAULT;
|
||||
if (unlikely(!len)) {
|
||||
iov_iter_bvec(iter, ddir, NULL, 0, 0);
|
||||
return 0;
|
||||
}
|
||||
|
||||
offset = buf_addr - imu->ubuf;
|
||||
|
||||
|
|
|
|||
|
|
@ -79,12 +79,15 @@ static enum hrtimer_restart io_cqring_min_timer_wakeup(struct hrtimer *timer)
|
|||
if (io_has_work(ctx))
|
||||
goto out_wake;
|
||||
/* got events since we started waiting, min timeout is done */
|
||||
if (iowq->cq_min_tail != READ_ONCE(ctx->rings->cq.tail))
|
||||
goto out_wake;
|
||||
/* if we have any events and min timeout expired, we're done */
|
||||
if (io_cqring_events(ctx))
|
||||
goto out_wake;
|
||||
scoped_guard(rcu) {
|
||||
struct io_rings *rings = io_get_rings(ctx);
|
||||
|
||||
if (iowq->cq_min_tail != READ_ONCE(rings->cq.tail))
|
||||
goto out_wake;
|
||||
/* if we have any events and min timeout expired, we're done */
|
||||
if (io_cqring_events(ctx))
|
||||
goto out_wake;
|
||||
}
|
||||
/*
|
||||
* If using deferred task_work running and application is waiting on
|
||||
* more than one request, ensure we reset it now where we are switching
|
||||
|
|
@ -186,9 +189,9 @@ int io_cqring_wait(struct io_ring_ctx *ctx, int min_events, u32 flags,
|
|||
struct ext_arg *ext_arg)
|
||||
{
|
||||
struct io_wait_queue iowq;
|
||||
struct io_rings *rings = ctx->rings;
|
||||
struct io_rings *rings;
|
||||
ktime_t start_time;
|
||||
int ret;
|
||||
int ret, nr_wait;
|
||||
|
||||
min_events = min_t(int, min_events, ctx->cq_entries);
|
||||
|
||||
|
|
@ -201,15 +204,23 @@ int io_cqring_wait(struct io_ring_ctx *ctx, int min_events, u32 flags,
|
|||
|
||||
if (unlikely(test_bit(IO_CHECK_CQ_OVERFLOW_BIT, &ctx->check_cq)))
|
||||
io_cqring_do_overflow_flush(ctx);
|
||||
if (__io_cqring_events_user(ctx) >= min_events)
|
||||
|
||||
rcu_read_lock();
|
||||
rings = io_get_rings(ctx);
|
||||
if (__io_cqring_events_user(ctx) >= min_events) {
|
||||
rcu_read_unlock();
|
||||
return 0;
|
||||
}
|
||||
|
||||
init_waitqueue_func_entry(&iowq.wq, io_wake_function);
|
||||
iowq.wq.private = current;
|
||||
INIT_LIST_HEAD(&iowq.wq.entry);
|
||||
iowq.ctx = ctx;
|
||||
iowq.cq_tail = READ_ONCE(ctx->rings->cq.head) + min_events;
|
||||
iowq.cq_min_tail = READ_ONCE(ctx->rings->cq.tail);
|
||||
iowq.cq_tail = READ_ONCE(rings->cq.head) + min_events;
|
||||
iowq.cq_min_tail = READ_ONCE(rings->cq.tail);
|
||||
nr_wait = (int) iowq.cq_tail - READ_ONCE(rings->cq.tail);
|
||||
rcu_read_unlock();
|
||||
rings = NULL;
|
||||
iowq.nr_timeouts = atomic_read(&ctx->cq_timeouts);
|
||||
iowq.hit_timeout = 0;
|
||||
iowq.min_timeout = ext_arg->min_time;
|
||||
|
|
@ -240,14 +251,6 @@ int io_cqring_wait(struct io_ring_ctx *ctx, int min_events, u32 flags,
|
|||
trace_io_uring_cqring_wait(ctx, min_events);
|
||||
do {
|
||||
unsigned long check_cq;
|
||||
int nr_wait;
|
||||
|
||||
/* if min timeout has been hit, don't reset wait count */
|
||||
if (!iowq.hit_timeout)
|
||||
nr_wait = (int) iowq.cq_tail -
|
||||
READ_ONCE(ctx->rings->cq.tail);
|
||||
else
|
||||
nr_wait = 1;
|
||||
|
||||
if (ctx->flags & IORING_SETUP_DEFER_TASKRUN) {
|
||||
atomic_set(&ctx->cq_wait_nr, nr_wait);
|
||||
|
|
@ -298,11 +301,20 @@ int io_cqring_wait(struct io_ring_ctx *ctx, int min_events, u32 flags,
|
|||
break;
|
||||
}
|
||||
cond_resched();
|
||||
|
||||
/* if min timeout has been hit, don't reset wait count */
|
||||
if (!iowq.hit_timeout)
|
||||
scoped_guard(rcu)
|
||||
nr_wait = (int) iowq.cq_tail -
|
||||
READ_ONCE(io_get_rings(ctx)->cq.tail);
|
||||
else
|
||||
nr_wait = 1;
|
||||
} while (1);
|
||||
|
||||
if (!(ctx->flags & IORING_SETUP_DEFER_TASKRUN))
|
||||
finish_wait(&ctx->cq_wait, &iowq.wq);
|
||||
restore_saved_sigmask_unless(ret == -EINTR);
|
||||
|
||||
return READ_ONCE(rings->cq.head) == READ_ONCE(rings->cq.tail) ? ret : 0;
|
||||
guard(rcu)();
|
||||
return READ_ONCE(io_get_rings(ctx)->cq.head) == READ_ONCE(io_get_rings(ctx)->cq.tail) ? ret : 0;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -28,12 +28,15 @@ void io_cqring_do_overflow_flush(struct io_ring_ctx *ctx);
|
|||
|
||||
static inline unsigned int __io_cqring_events(struct io_ring_ctx *ctx)
|
||||
{
|
||||
return ctx->cached_cq_tail - READ_ONCE(ctx->rings->cq.head);
|
||||
struct io_rings *rings = io_get_rings(ctx);
|
||||
return ctx->cached_cq_tail - READ_ONCE(rings->cq.head);
|
||||
}
|
||||
|
||||
static inline unsigned int __io_cqring_events_user(struct io_ring_ctx *ctx)
|
||||
{
|
||||
return READ_ONCE(ctx->rings->cq.tail) - READ_ONCE(ctx->rings->cq.head);
|
||||
struct io_rings *rings = io_get_rings(ctx);
|
||||
|
||||
return READ_ONCE(rings->cq.tail) - READ_ONCE(rings->cq.head);
|
||||
}
|
||||
|
||||
/*
|
||||
|
|
|
|||
Loading…
Reference in New Issue
Block a user