ublk: replace xarray with IDA for shmem buffer index allocation

Remove struct ublk_buf which only contained nr_pages that was never
read after registration. Use IDA for pure index allocation instead
of xarray. Make __ublk_ctrl_unreg_buf() return int so the caller
can detect invalid index without a separate lookup.

Simplify ublk_buf_cleanup() to walk the maple tree directly and
unpin all pages in one pass, instead of iterating the xarray by
buffer index.

Suggested-by: Caleb Sander Mateos <csander@purestorage.com>
Signed-off-by: Ming Lei <tom.leiming@gmail.com>
Link: https://patch.msgid.link/20260409133020.3780098-5-tom.leiming@gmail.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
This commit is contained in:
Ming Lei 2026-04-09 21:30:16 +08:00 committed by Jens Axboe
parent 8ea8566a9a
commit 5e864438e2

View File

@ -297,11 +297,6 @@ struct ublk_queue {
struct ublk_io ios[] __counted_by(q_depth); struct ublk_io ios[] __counted_by(q_depth);
}; };
/* Per-registered shared memory buffer */
struct ublk_buf {
unsigned int nr_pages;
};
/* Maple tree value: maps a PFN range to buffer location */ /* Maple tree value: maps a PFN range to buffer location */
struct ublk_buf_range { struct ublk_buf_range {
unsigned short buf_index; unsigned short buf_index;
@ -345,7 +340,7 @@ struct ublk_device {
/* shared memory zero copy */ /* shared memory zero copy */
struct maple_tree buf_tree; struct maple_tree buf_tree;
struct xarray bufs_xa; struct ida buf_ida;
struct ublk_queue *queues[]; struct ublk_queue *queues[];
}; };
@ -4698,7 +4693,7 @@ static int ublk_ctrl_add_dev(const struct ublksrv_ctrl_cmd *header)
spin_lock_init(&ub->lock); spin_lock_init(&ub->lock);
mutex_init(&ub->cancel_mutex); mutex_init(&ub->cancel_mutex);
mt_init(&ub->buf_tree); mt_init(&ub->buf_tree);
xa_init_flags(&ub->bufs_xa, XA_FLAGS_ALLOC); ida_init(&ub->buf_ida);
INIT_WORK(&ub->partition_scan_work, ublk_partition_scan_work); INIT_WORK(&ub->partition_scan_work, ublk_partition_scan_work);
ret = ublk_alloc_dev_number(ub, header->dev_id); ret = ublk_alloc_dev_number(ub, header->dev_id);
@ -5279,11 +5274,9 @@ static void ublk_buf_erase_ranges(struct ublk_device *ub, int buf_index)
} }
static int __ublk_ctrl_reg_buf(struct ublk_device *ub, static int __ublk_ctrl_reg_buf(struct ublk_device *ub,
struct ublk_buf *ubuf, struct page **pages, unsigned long nr_pages,
struct page **pages, int index, int index, unsigned short flags)
unsigned short flags)
{ {
unsigned long nr_pages = ubuf->nr_pages;
unsigned long i; unsigned long i;
int ret; int ret;
@ -5335,9 +5328,8 @@ static int ublk_ctrl_reg_buf(struct ublk_device *ub,
struct page **pages = NULL; struct page **pages = NULL;
unsigned int gup_flags; unsigned int gup_flags;
struct gendisk *disk; struct gendisk *disk;
struct ublk_buf *ubuf;
long pinned; long pinned;
u32 index; int index;
int ret; int ret;
if (!ublk_dev_support_shmem_zc(ub)) if (!ublk_dev_support_shmem_zc(ub))
@ -5367,16 +5359,10 @@ static int ublk_ctrl_reg_buf(struct ublk_device *ub,
return -ENODEV; return -ENODEV;
/* Pin pages before quiescing (may sleep) */ /* Pin pages before quiescing (may sleep) */
ubuf = kzalloc(sizeof(*ubuf), GFP_KERNEL);
if (!ubuf) {
ret = -ENOMEM;
goto put_disk;
}
pages = kvmalloc_array(nr_pages, sizeof(*pages), GFP_KERNEL); pages = kvmalloc_array(nr_pages, sizeof(*pages), GFP_KERNEL);
if (!pages) { if (!pages) {
ret = -ENOMEM; ret = -ENOMEM;
goto err_free; goto put_disk;
} }
gup_flags = FOLL_LONGTERM; gup_flags = FOLL_LONGTERM;
@ -5392,7 +5378,6 @@ static int ublk_ctrl_reg_buf(struct ublk_device *ub,
ret = -EFAULT; ret = -EFAULT;
goto err_unpin; goto err_unpin;
} }
ubuf->nr_pages = nr_pages;
/* /*
* Drain inflight I/O and quiesce the queue so no new requests * Drain inflight I/O and quiesce the queue so no new requests
@ -5403,13 +5388,15 @@ static int ublk_ctrl_reg_buf(struct ublk_device *ub,
mutex_lock(&ub->mutex); mutex_lock(&ub->mutex);
ret = xa_alloc(&ub->bufs_xa, &index, ubuf, xa_limit_16b, GFP_KERNEL); index = ida_alloc_max(&ub->buf_ida, USHRT_MAX, GFP_KERNEL);
if (ret) if (index < 0) {
ret = index;
goto err_unlock; goto err_unlock;
}
ret = __ublk_ctrl_reg_buf(ub, ubuf, pages, index, buf_reg.flags); ret = __ublk_ctrl_reg_buf(ub, pages, nr_pages, index, buf_reg.flags);
if (ret) { if (ret) {
xa_erase(&ub->bufs_xa, index); ida_free(&ub->buf_ida, index);
goto err_unlock; goto err_unlock;
} }
@ -5427,19 +5414,17 @@ static int ublk_ctrl_reg_buf(struct ublk_device *ub,
unpin_user_pages(pages, pinned); unpin_user_pages(pages, pinned);
err_free_pages: err_free_pages:
kvfree(pages); kvfree(pages);
err_free:
kfree(ubuf);
put_disk: put_disk:
ublk_put_disk(disk); ublk_put_disk(disk);
return ret; return ret;
} }
static void __ublk_ctrl_unreg_buf(struct ublk_device *ub, static int __ublk_ctrl_unreg_buf(struct ublk_device *ub, int buf_index)
struct ublk_buf *ubuf, int buf_index)
{ {
MA_STATE(mas, &ub->buf_tree, 0, ULONG_MAX); MA_STATE(mas, &ub->buf_tree, 0, ULONG_MAX);
struct ublk_buf_range *range; struct ublk_buf_range *range;
struct page *pages[32]; struct page *pages[32];
int ret = -ENOENT;
mas_lock(&mas); mas_lock(&mas);
mas_for_each(&mas, range, ULONG_MAX) { mas_for_each(&mas, range, ULONG_MAX) {
@ -5448,6 +5433,7 @@ static void __ublk_ctrl_unreg_buf(struct ublk_device *ub,
if (range->buf_index != buf_index) if (range->buf_index != buf_index)
continue; continue;
ret = 0;
base = mas.index; base = mas.index;
nr = mas.last - base + 1; nr = mas.last - base + 1;
mas_erase(&mas); mas_erase(&mas);
@ -5465,7 +5451,8 @@ static void __ublk_ctrl_unreg_buf(struct ublk_device *ub,
kfree(range); kfree(range);
} }
mas_unlock(&mas); mas_unlock(&mas);
kfree(ubuf);
return ret;
} }
static int ublk_ctrl_unreg_buf(struct ublk_device *ub, static int ublk_ctrl_unreg_buf(struct ublk_device *ub,
@ -5473,11 +5460,14 @@ static int ublk_ctrl_unreg_buf(struct ublk_device *ub,
{ {
int index = (int)header->data[0]; int index = (int)header->data[0];
struct gendisk *disk; struct gendisk *disk;
struct ublk_buf *ubuf; int ret;
if (!ublk_dev_support_shmem_zc(ub)) if (!ublk_dev_support_shmem_zc(ub))
return -EOPNOTSUPP; return -EOPNOTSUPP;
if (index < 0 || index > USHRT_MAX)
return -EINVAL;
disk = ublk_get_disk(ub); disk = ublk_get_disk(ub);
if (!disk) if (!disk)
return -ENODEV; return -ENODEV;
@ -5487,32 +5477,42 @@ static int ublk_ctrl_unreg_buf(struct ublk_device *ub,
mutex_lock(&ub->mutex); mutex_lock(&ub->mutex);
ubuf = xa_erase(&ub->bufs_xa, index); ret = __ublk_ctrl_unreg_buf(ub, index);
if (!ubuf) { if (!ret)
mutex_unlock(&ub->mutex); ida_free(&ub->buf_ida, index);
ublk_unquiesce_and_resume(disk);
ublk_put_disk(disk);
return -ENOENT;
}
__ublk_ctrl_unreg_buf(ub, ubuf, index);
mutex_unlock(&ub->mutex); mutex_unlock(&ub->mutex);
ublk_unquiesce_and_resume(disk); ublk_unquiesce_and_resume(disk);
ublk_put_disk(disk); ublk_put_disk(disk);
return 0; return ret;
} }
static void ublk_buf_cleanup(struct ublk_device *ub) static void ublk_buf_cleanup(struct ublk_device *ub)
{ {
struct ublk_buf *ubuf; MA_STATE(mas, &ub->buf_tree, 0, ULONG_MAX);
unsigned long index; struct ublk_buf_range *range;
struct page *pages[32];
xa_for_each(&ub->bufs_xa, index, ubuf) mas_for_each(&mas, range, ULONG_MAX) {
__ublk_ctrl_unreg_buf(ub, ubuf, index); unsigned long base = mas.index;
xa_destroy(&ub->bufs_xa); unsigned long nr = mas.last - base + 1;
unsigned long off;
for (off = 0; off < nr; ) {
unsigned int batch = min_t(unsigned long,
nr - off, 32);
unsigned int j;
for (j = 0; j < batch; j++)
pages[j] = pfn_to_page(base + off + j);
unpin_user_pages(pages, batch);
off += batch;
}
kfree(range);
}
mtree_destroy(&ub->buf_tree); mtree_destroy(&ub->buf_tree);
ida_destroy(&ub->buf_ida);
} }
/* Check if request pages match a registered shared memory buffer */ /* Check if request pages match a registered shared memory buffer */