mirror of
https://github.com/torvalds/linux.git
synced 2026-05-12 16:18:45 +02:00
ublk: eliminate permanent pages[] array from struct ublk_buf
The pages[] array (kvmalloc'd, 8 bytes per page = 2MB for a 1GB buffer) was stored permanently in struct ublk_buf but only needed during pin_user_pages_fast() and maple tree construction. Since the maple tree already stores PFN ranges via ublk_buf_range, struct page pointers can be recovered via pfn_to_page() during unregistration. Make pages[] a temporary allocation in ublk_ctrl_reg_buf(), freed immediately after the maple tree is built. Rewrite __ublk_ctrl_unreg_buf() to iterate the maple tree for matching buf_index entries, recovering struct page pointers via pfn_to_page() and unpinning in batches of 32. Simplify ublk_buf_erase_ranges() to iterate the maple tree by buf_index instead of walking the now-removed pages[] array. Signed-off-by: Ming Lei <ming.lei@redhat.com> Link: https://patch.msgid.link/20260331153207.3635125-5-ming.lei@redhat.com Signed-off-by: Jens Axboe <axboe@kernel.dk>
This commit is contained in:
parent
08677040a9
commit
8a34e88769
|
|
@ -296,7 +296,6 @@ struct ublk_queue {
|
|||
|
||||
/* Per-registered shared memory buffer */
|
||||
struct ublk_buf {
|
||||
struct page **pages;
|
||||
unsigned int nr_pages;
|
||||
};
|
||||
|
||||
|
|
@ -5261,27 +5260,25 @@ static void ublk_unquiesce_and_resume(struct gendisk *disk)
|
|||
blk_mq_unquiesce_queue(disk->queue);
|
||||
}
|
||||
|
||||
/* Erase coalesced PFN ranges from the maple tree for pages [0, nr_pages) */
|
||||
static void ublk_buf_erase_ranges(struct ublk_device *ub,
|
||||
struct ublk_buf *ubuf,
|
||||
unsigned long nr_pages)
|
||||
/* Erase coalesced PFN ranges from the maple tree matching buf_index */
|
||||
static void ublk_buf_erase_ranges(struct ublk_device *ub, int buf_index)
|
||||
{
|
||||
unsigned long i;
|
||||
MA_STATE(mas, &ub->buf_tree, 0, ULONG_MAX);
|
||||
struct ublk_buf_range *range;
|
||||
|
||||
for (i = 0; i < nr_pages; ) {
|
||||
unsigned long pfn = page_to_pfn(ubuf->pages[i]);
|
||||
unsigned long start = i;
|
||||
|
||||
while (i + 1 < nr_pages &&
|
||||
page_to_pfn(ubuf->pages[i + 1]) == pfn + (i - start) + 1)
|
||||
i++;
|
||||
i++;
|
||||
kfree(mtree_erase(&ub->buf_tree, pfn));
|
||||
mas_lock(&mas);
|
||||
mas_for_each(&mas, range, ULONG_MAX) {
|
||||
if (range->buf_index == buf_index) {
|
||||
mas_erase(&mas);
|
||||
kfree(range);
|
||||
}
|
||||
}
|
||||
mas_unlock(&mas);
|
||||
}
|
||||
|
||||
static int __ublk_ctrl_reg_buf(struct ublk_device *ub,
|
||||
struct ublk_buf *ubuf, int index,
|
||||
struct ublk_buf *ubuf,
|
||||
struct page **pages, int index,
|
||||
unsigned short flags)
|
||||
{
|
||||
unsigned long nr_pages = ubuf->nr_pages;
|
||||
|
|
@ -5289,13 +5286,13 @@ static int __ublk_ctrl_reg_buf(struct ublk_device *ub,
|
|||
int ret;
|
||||
|
||||
for (i = 0; i < nr_pages; ) {
|
||||
unsigned long pfn = page_to_pfn(ubuf->pages[i]);
|
||||
unsigned long pfn = page_to_pfn(pages[i]);
|
||||
unsigned long start = i;
|
||||
struct ublk_buf_range *range;
|
||||
|
||||
/* Find run of consecutive PFNs */
|
||||
while (i + 1 < nr_pages &&
|
||||
page_to_pfn(ubuf->pages[i + 1]) == pfn + (i - start) + 1)
|
||||
page_to_pfn(pages[i + 1]) == pfn + (i - start) + 1)
|
||||
i++;
|
||||
i++; /* past the last page in this run */
|
||||
|
||||
|
|
@ -5320,7 +5317,7 @@ static int __ublk_ctrl_reg_buf(struct ublk_device *ub,
|
|||
return 0;
|
||||
|
||||
unwind:
|
||||
ublk_buf_erase_ranges(ub, ubuf, i);
|
||||
ublk_buf_erase_ranges(ub, index);
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
|
@ -5335,6 +5332,7 @@ static int ublk_ctrl_reg_buf(struct ublk_device *ub,
|
|||
void __user *argp = (void __user *)(unsigned long)header->addr;
|
||||
struct ublk_shmem_buf_reg buf_reg;
|
||||
unsigned long addr, size, nr_pages;
|
||||
struct page **pages = NULL;
|
||||
unsigned int gup_flags;
|
||||
struct gendisk *disk;
|
||||
struct ublk_buf *ubuf;
|
||||
|
|
@ -5371,9 +5369,8 @@ static int ublk_ctrl_reg_buf(struct ublk_device *ub,
|
|||
goto put_disk;
|
||||
}
|
||||
|
||||
ubuf->pages = kvmalloc_array(nr_pages, sizeof(*ubuf->pages),
|
||||
GFP_KERNEL);
|
||||
if (!ubuf->pages) {
|
||||
pages = kvmalloc_array(nr_pages, sizeof(*pages), GFP_KERNEL);
|
||||
if (!pages) {
|
||||
ret = -ENOMEM;
|
||||
goto err_free;
|
||||
}
|
||||
|
|
@ -5382,7 +5379,7 @@ static int ublk_ctrl_reg_buf(struct ublk_device *ub,
|
|||
if (!(buf_reg.flags & UBLK_SHMEM_BUF_READ_ONLY))
|
||||
gup_flags |= FOLL_WRITE;
|
||||
|
||||
pinned = pin_user_pages_fast(addr, nr_pages, gup_flags, ubuf->pages);
|
||||
pinned = pin_user_pages_fast(addr, nr_pages, gup_flags, pages);
|
||||
if (pinned < 0) {
|
||||
ret = pinned;
|
||||
goto err_free_pages;
|
||||
|
|
@ -5406,7 +5403,7 @@ static int ublk_ctrl_reg_buf(struct ublk_device *ub,
|
|||
if (ret)
|
||||
goto err_unlock;
|
||||
|
||||
ret = __ublk_ctrl_reg_buf(ub, ubuf, index, buf_reg.flags);
|
||||
ret = __ublk_ctrl_reg_buf(ub, ubuf, pages, index, buf_reg.flags);
|
||||
if (ret) {
|
||||
xa_erase(&ub->bufs_xa, index);
|
||||
goto err_unlock;
|
||||
|
|
@ -5414,6 +5411,7 @@ static int ublk_ctrl_reg_buf(struct ublk_device *ub,
|
|||
|
||||
mutex_unlock(&ub->mutex);
|
||||
|
||||
kvfree(pages);
|
||||
ublk_unquiesce_and_resume(disk);
|
||||
ublk_put_disk(disk);
|
||||
return index;
|
||||
|
|
@ -5422,9 +5420,9 @@ static int ublk_ctrl_reg_buf(struct ublk_device *ub,
|
|||
mutex_unlock(&ub->mutex);
|
||||
ublk_unquiesce_and_resume(disk);
|
||||
err_unpin:
|
||||
unpin_user_pages(ubuf->pages, pinned);
|
||||
unpin_user_pages(pages, pinned);
|
||||
err_free_pages:
|
||||
kvfree(ubuf->pages);
|
||||
kvfree(pages);
|
||||
err_free:
|
||||
kfree(ubuf);
|
||||
put_disk:
|
||||
|
|
@ -5433,11 +5431,36 @@ static int ublk_ctrl_reg_buf(struct ublk_device *ub,
|
|||
}
|
||||
|
||||
static void __ublk_ctrl_unreg_buf(struct ublk_device *ub,
|
||||
struct ublk_buf *ubuf)
|
||||
struct ublk_buf *ubuf, int buf_index)
|
||||
{
|
||||
ublk_buf_erase_ranges(ub, ubuf, ubuf->nr_pages);
|
||||
unpin_user_pages(ubuf->pages, ubuf->nr_pages);
|
||||
kvfree(ubuf->pages);
|
||||
MA_STATE(mas, &ub->buf_tree, 0, ULONG_MAX);
|
||||
struct ublk_buf_range *range;
|
||||
struct page *pages[32];
|
||||
|
||||
mas_lock(&mas);
|
||||
mas_for_each(&mas, range, ULONG_MAX) {
|
||||
unsigned long base, nr, off;
|
||||
|
||||
if (range->buf_index != buf_index)
|
||||
continue;
|
||||
|
||||
base = range->base_pfn;
|
||||
nr = mas.last - mas.index + 1;
|
||||
mas_erase(&mas);
|
||||
|
||||
for (off = 0; off < nr; ) {
|
||||
unsigned int batch = min_t(unsigned long,
|
||||
nr - off, 32);
|
||||
unsigned int j;
|
||||
|
||||
for (j = 0; j < batch; j++)
|
||||
pages[j] = pfn_to_page(base + off + j);
|
||||
unpin_user_pages(pages, batch);
|
||||
off += batch;
|
||||
}
|
||||
kfree(range);
|
||||
}
|
||||
mas_unlock(&mas);
|
||||
kfree(ubuf);
|
||||
}
|
||||
|
||||
|
|
@ -5468,7 +5491,7 @@ static int ublk_ctrl_unreg_buf(struct ublk_device *ub,
|
|||
return -ENOENT;
|
||||
}
|
||||
|
||||
__ublk_ctrl_unreg_buf(ub, ubuf);
|
||||
__ublk_ctrl_unreg_buf(ub, ubuf, index);
|
||||
|
||||
mutex_unlock(&ub->mutex);
|
||||
|
||||
|
|
@ -5483,7 +5506,7 @@ static void ublk_buf_cleanup(struct ublk_device *ub)
|
|||
unsigned long index;
|
||||
|
||||
xa_for_each(&ub->bufs_xa, index, ubuf)
|
||||
__ublk_ctrl_unreg_buf(ub, ubuf);
|
||||
__ublk_ctrl_unreg_buf(ub, ubuf, index);
|
||||
xa_destroy(&ub->bufs_xa);
|
||||
mtree_destroy(&ub->buf_tree);
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in New Issue
Block a user