selftests: ublk: handle UBLK_U_IO_FETCH_IO_CMDS

Add support for UBLK_U_IO_FETCH_IO_CMDS to enable efficient batch
fetching of I/O commands using multishot io_uring operations.

Key improvements:
- Implement multishot UBLK_U_IO_FETCH_IO_CMDS for continuous command fetching
- Add fetch buffer management with page-aligned, mlocked buffers
- Process fetched I/O command tags from kernel-provided buffers
- Integrate fetch operations with existing batch I/O infrastructure
- Significantly reduce uring_cmd issuing overhead through batching

The implementation uses two fetch buffers per thread with automatic
requeuing to maintain continuous I/O command flow. Each fetch operation
retrieves multiple command tags in a single syscall, dramatically
improving performance compared to individual command fetching.

Technical details:
- Fetch buffers are page-aligned and mlocked for optimal performance
- Uses IORING_URING_CMD_MULTISHOT for continuous operation
- Automatic buffer management and requeuing on completion
- Enhanced CQE handling for fetch command completions

Signed-off-by: Ming Lei <ming.lei@redhat.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
This commit is contained in:
Ming Lei 2026-01-16 22:18:54 +08:00 committed by Jens Axboe
parent dee7024ffe
commit cb5a6b3087
3 changed files with 159 additions and 4 deletions

View File

@ -140,15 +140,63 @@ void ublk_batch_prepare(struct ublk_thread *t)
t->nr_bufs);
}
static void free_batch_fetch_buf(struct ublk_thread *t)
{
int i;
for (i = 0; i < UBLKS_T_NR_FETCH_BUF; i++) {
io_uring_free_buf_ring(&t->ring, t->fetch[i].br, 1, i);
munlock(t->fetch[i].fetch_buf, t->fetch[i].fetch_buf_size);
free(t->fetch[i].fetch_buf);
}
}
static int alloc_batch_fetch_buf(struct ublk_thread *t)
{
/* page aligned fetch buffer, and it is mlocked for speedup delivery */
unsigned pg_sz = getpagesize();
unsigned buf_size = round_up(t->dev->dev_info.queue_depth * 2, pg_sz);
int ret;
int i = 0;
for (i = 0; i < UBLKS_T_NR_FETCH_BUF; i++) {
t->fetch[i].fetch_buf_size = buf_size;
if (posix_memalign((void **)&t->fetch[i].fetch_buf, pg_sz,
t->fetch[i].fetch_buf_size))
return -ENOMEM;
/* lock fetch buffer page for fast fetching */
if (mlock(t->fetch[i].fetch_buf, t->fetch[i].fetch_buf_size))
ublk_err("%s: can't lock fetch buffer %s\n", __func__,
strerror(errno));
t->fetch[i].br = io_uring_setup_buf_ring(&t->ring, 1,
i, IOU_PBUF_RING_INC, &ret);
if (!t->fetch[i].br) {
ublk_err("Buffer ring register failed %d\n", ret);
return ret;
}
}
return 0;
}
int ublk_batch_alloc_buf(struct ublk_thread *t)
{
int ret;
ublk_assert(t->nr_commit_buf < 16);
return alloc_batch_commit_buf(t);
ret = alloc_batch_commit_buf(t);
if (ret)
return ret;
return alloc_batch_fetch_buf(t);
}
void ublk_batch_free_buf(struct ublk_thread *t)
{
free_batch_commit_buf(t);
free_batch_fetch_buf(t);
}
static void ublk_init_batch_cmd(struct ublk_thread *t, __u16 q_id,
@ -199,6 +247,76 @@ static void ublk_setup_commit_sqe(struct ublk_thread *t,
cmd->flags |= t->cmd_flags;
}
static void ublk_batch_queue_fetch(struct ublk_thread *t,
struct ublk_queue *q,
unsigned short buf_idx)
{
unsigned short nr_elem = t->fetch[buf_idx].fetch_buf_size / 2;
struct io_uring_sqe *sqe;
io_uring_buf_ring_add(t->fetch[buf_idx].br, t->fetch[buf_idx].fetch_buf,
t->fetch[buf_idx].fetch_buf_size,
0, 0, 0);
io_uring_buf_ring_advance(t->fetch[buf_idx].br, 1);
ublk_io_alloc_sqes(t, &sqe, 1);
ublk_init_batch_cmd(t, q->q_id, sqe, UBLK_U_IO_FETCH_IO_CMDS, 2, nr_elem,
buf_idx);
sqe->rw_flags= IORING_URING_CMD_MULTISHOT;
sqe->buf_group = buf_idx;
sqe->flags |= IOSQE_BUFFER_SELECT;
t->fetch[buf_idx].fetch_buf_off = 0;
}
void ublk_batch_start_fetch(struct ublk_thread *t,
struct ublk_queue *q)
{
int i;
for (i = 0; i < UBLKS_T_NR_FETCH_BUF; i++)
ublk_batch_queue_fetch(t, q, i);
}
static unsigned short ublk_compl_batch_fetch(struct ublk_thread *t,
struct ublk_queue *q,
const struct io_uring_cqe *cqe)
{
unsigned short buf_idx = user_data_to_tag(cqe->user_data);
unsigned start = t->fetch[buf_idx].fetch_buf_off;
unsigned end = start + cqe->res;
void *buf = t->fetch[buf_idx].fetch_buf;
int i;
if (cqe->res < 0)
return buf_idx;
if ((end - start) / 2 > q->q_depth) {
ublk_err("%s: fetch duplicated ios offset %u count %u\n", __func__, start, cqe->res);
for (i = start; i < end; i += 2) {
unsigned short tag = *(unsigned short *)(buf + i);
ublk_err("%u ", tag);
}
ublk_err("\n");
}
for (i = start; i < end; i += 2) {
unsigned short tag = *(unsigned short *)(buf + i);
if (tag >= q->q_depth)
ublk_err("%s: bad tag %u\n", __func__, tag);
if (q->tgt_ops->queue_io)
q->tgt_ops->queue_io(t, q, tag);
}
t->fetch[buf_idx].fetch_buf_off = end;
return buf_idx;
}
int ublk_batch_queue_prep_io_cmds(struct ublk_thread *t, struct ublk_queue *q)
{
unsigned short nr_elem = q->q_depth;
@ -258,6 +376,9 @@ void ublk_batch_compl_cmd(struct ublk_thread *t,
const struct io_uring_cqe *cqe)
{
unsigned op = user_data_to_op(cqe->user_data);
struct ublk_queue *q;
unsigned buf_idx;
unsigned q_id;
if (op == _IOC_NR(UBLK_U_IO_PREP_IO_CMDS) ||
op == _IOC_NR(UBLK_U_IO_COMMIT_IO_CMDS)) {
@ -265,6 +386,19 @@ void ublk_batch_compl_cmd(struct ublk_thread *t,
ublk_batch_compl_commit_cmd(t, cqe, op);
return;
}
/* FETCH command is per queue */
q_id = user_data_to_q_id(cqe->user_data);
q = &t->dev->q[q_id];
buf_idx = ublk_compl_batch_fetch(t, q, cqe);
if (cqe->res < 0 && cqe->res != -ENOBUFS) {
t->cmd_inflight--;
t->state |= UBLKS_T_STOPPING;
} else if (!(cqe->flags & IORING_CQE_F_MORE) || cqe->res == -ENOBUFS) {
t->cmd_inflight--;
ublk_batch_queue_fetch(t, q, buf_idx);
}
}
void ublk_batch_commit_io_cmds(struct ublk_thread *t)

View File

@ -519,6 +519,10 @@ static int ublk_thread_init(struct ublk_thread *t, unsigned long long extra_flag
int ring_depth = dev->tgt.sq_depth, cq_depth = dev->tgt.cq_depth;
int ret;
/* FETCH_IO_CMDS is multishot, so increase cq depth for BATCH_IO */
if (ublk_dev_batch_io(dev))
cq_depth += dev->dev_info.queue_depth;
ret = ublk_setup_ring(&t->ring, ring_depth, cq_depth,
IORING_SETUP_COOP_TASKRUN |
IORING_SETUP_SINGLE_ISSUER |
@ -878,7 +882,7 @@ static void ublk_handle_cqe(struct ublk_thread *t,
unsigned q_id = user_data_to_q_id(cqe->user_data);
unsigned cmd_op = user_data_to_op(cqe->user_data);
if (cqe->res < 0 && cqe->res != -ENODEV)
if (cqe->res < 0 && cqe->res != -ENODEV && cqe->res != -ENOBUFS)
ublk_err("%s: res %d userdata %llx thread state %x\n", __func__,
cqe->res, cqe->user_data, t->state);
@ -1001,9 +1005,13 @@ static __attribute__((noinline)) int __ublk_io_handler_fn(struct ublk_thread_inf
if (!ublk_thread_batch_io(&t)) {
/* submit all io commands to ublk driver */
ublk_submit_fetch_commands(&t);
} else if (!t.idx) {
} else {
struct ublk_queue *q = &t.dev->q[t.idx];
/* prepare all io commands in the 1st thread context */
ublk_batch_setup_queues(&t);
if (!t.idx)
ublk_batch_setup_queues(&t);
ublk_batch_start_fetch(&t, q);
}
do {

View File

@ -198,6 +198,13 @@ struct batch_commit_buf {
unsigned short count;
};
struct batch_fetch_buf {
struct io_uring_buf_ring *br;
void *fetch_buf;
unsigned int fetch_buf_size;
unsigned int fetch_buf_off;
};
struct ublk_thread {
struct ublk_dev *dev;
unsigned idx;
@ -224,6 +231,9 @@ struct ublk_thread {
#define UBLKS_T_COMMIT_BUF_INV_IDX ((unsigned short)-1)
struct allocator commit_buf_alloc;
struct batch_commit_buf commit;
/* FETCH_IO_CMDS buffer */
#define UBLKS_T_NR_FETCH_BUF 2
struct batch_fetch_buf fetch[UBLKS_T_NR_FETCH_BUF];
struct io_uring ring;
};
@ -515,6 +525,9 @@ static inline unsigned short ublk_batch_io_buf_idx(
/* Queue UBLK_U_IO_PREP_IO_CMDS for a specific queue with batch elements */
int ublk_batch_queue_prep_io_cmds(struct ublk_thread *t, struct ublk_queue *q);
/* Start fetching I/O commands using multishot UBLK_U_IO_FETCH_IO_CMDS */
void ublk_batch_start_fetch(struct ublk_thread *t,
struct ublk_queue *q);
/* Handle completion of batch I/O commands (prep/commit) */
void ublk_batch_compl_cmd(struct ublk_thread *t,
const struct io_uring_cqe *cqe);