linux/io_uring/kbuf.h
Martin Michaelis 7deba791ad io_uring/kbuf: support min length left for incremental buffers
Incrementally consumed buffer rings are generally fully consumed, but
it's quite possible that the application has a minimum size it needs to
meet to avoid truncation. Currently that minimum limit is 1 byte, but
this should be a setting that is the hands of the application. For
recvmsg multishot, a prime use case for incrementally consumed buffers,
the application may get spurious -EFAULT returned at the end of an
incrementally consumed buffer, as less space is available than the
headers need.

Grab a u32 field in struct io_uring_buf_reg, which the application can
use to inform the kernel of the minimum size that should be available
in an incrementally consumed buffer. If less than that is available,
the current buffer is fully processed and the next one will be picked.

Cc: stable@vger.kernel.org
Fixes: ae98dbf43d ("io_uring/kbuf: add support for incremental buffer consumption")
Link: https://github.com/axboe/liburing/issues/1433
Signed-off-by: Martin Michaelis <code@mgjm.de>
[axboe: write commit message, change io_buffer_list member name]
Reviewed-by: Gabriel Krisman Bertazi <krisman@suse.de>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
2026-04-28 16:08:56 -06:00

142 lines
3.8 KiB
C

// SPDX-License-Identifier: GPL-2.0
#ifndef IOU_KBUF_H
#define IOU_KBUF_H
#include <uapi/linux/io_uring.h>
#include <linux/io_uring_types.h>
enum {
/* ring mapped provided buffers */
IOBL_BUF_RING = 1,
/* buffers are consumed incrementally rather than always fully */
IOBL_INC = 2,
};
struct io_buffer_list {
/*
* If the IOBL_BUF_RING flag is set, then buf_ring is used. If not, then
* these are classic provided buffers and ->buf_list is used.
*/
union {
struct list_head buf_list;
struct io_uring_buf_ring *buf_ring;
};
/* count of classic/legacy buffers in buffer list */
int nbufs;
__u16 bgid;
/* below is for ring provided buffers */
__u16 head;
__u16 mask;
__u16 flags;
/*
* minimum required amount to be left to reuse an incrementally
* consumed buffer. If less than this is left at consumption time,
* buffer is done and head is incremented to the next buffer.
*/
__u32 min_left_sub_one;
struct io_mapped_region region;
};
struct io_buffer {
struct list_head list;
__u64 addr;
__u32 len;
__u16 bid;
__u16 bgid;
};
enum {
/* can alloc a bigger vec */
KBUF_MODE_EXPAND = 1,
/* if bigger vec allocated, free old one */
KBUF_MODE_FREE = 2,
};
struct buf_sel_arg {
struct iovec *iovs;
size_t out_len;
size_t max_len;
unsigned short nr_iovs;
unsigned short mode;
unsigned short buf_group;
unsigned short partial_map;
};
struct io_br_sel io_buffer_select(struct io_kiocb *req, size_t *len,
unsigned buf_group, unsigned int issue_flags);
int io_buffers_select(struct io_kiocb *req, struct buf_sel_arg *arg,
struct io_br_sel *sel, unsigned int issue_flags);
int io_buffers_peek(struct io_kiocb *req, struct buf_sel_arg *arg,
struct io_br_sel *sel);
void io_destroy_buffers(struct io_ring_ctx *ctx);
int io_remove_buffers_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe);
int io_provide_buffers_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe);
int io_manage_buffers_legacy(struct io_kiocb *req, unsigned int issue_flags);
int io_register_pbuf_ring(struct io_ring_ctx *ctx, void __user *arg);
int io_unregister_pbuf_ring(struct io_ring_ctx *ctx, void __user *arg);
int io_register_pbuf_status(struct io_ring_ctx *ctx, void __user *arg);
bool io_kbuf_recycle_legacy(struct io_kiocb *req, unsigned issue_flags);
void io_kbuf_drop_legacy(struct io_kiocb *req);
unsigned int __io_put_kbufs(struct io_kiocb *req, struct io_buffer_list *bl,
int len, int nbufs);
bool io_kbuf_commit(struct io_kiocb *req,
struct io_buffer_list *bl, int len, int nr);
struct io_mapped_region *io_pbuf_get_region(struct io_ring_ctx *ctx,
unsigned int bgid);
static inline bool io_kbuf_recycle_ring(struct io_kiocb *req,
struct io_buffer_list *bl)
{
if (bl) {
req->flags &= ~(REQ_F_BUFFER_RING|REQ_F_BUFFERS_COMMIT);
return true;
}
return false;
}
static inline bool io_do_buffer_select(struct io_kiocb *req)
{
if (!(req->flags & REQ_F_BUFFER_SELECT))
return false;
return !(req->flags & (REQ_F_BUFFER_SELECTED|REQ_F_BUFFER_RING));
}
static inline bool io_kbuf_recycle(struct io_kiocb *req, struct io_buffer_list *bl,
unsigned issue_flags)
{
if (req->flags & REQ_F_BL_NO_RECYCLE)
return false;
if (req->flags & REQ_F_BUFFER_RING)
return io_kbuf_recycle_ring(req, bl);
if (req->flags & REQ_F_BUFFER_SELECTED)
return io_kbuf_recycle_legacy(req, issue_flags);
return false;
}
static inline unsigned int io_put_kbuf(struct io_kiocb *req, int len,
struct io_buffer_list *bl)
{
if (!(req->flags & (REQ_F_BUFFER_RING | REQ_F_BUFFER_SELECTED)))
return 0;
return __io_put_kbufs(req, bl, len, 1);
}
static inline unsigned int io_put_kbufs(struct io_kiocb *req, int len,
struct io_buffer_list *bl, int nbufs)
{
if (!(req->flags & (REQ_F_BUFFER_RING | REQ_F_BUFFER_SELECTED)))
return 0;
return __io_put_kbufs(req, bl, len, nbufs);
}
#endif