mirror of
https://github.com/torvalds/linux.git
synced 2026-05-27 00:22:00 +02:00
bpf: arena: use kmalloc_nolock() in place of kvcalloc()
To make arena_alloc_pages() safe to be called from any context, replace kvcalloc() with kmalloc_nolock() so as it doesn't sleep or take any locks. kmalloc_nolock() returns NULL for allocations larger than KMALLOC_MAX_CACHE_SIZE, which is (PAGE_SIZE * 2) = 8KB on systems with 4KB pages. So, round down the allocation done by kmalloc_nolock to 1024 * 8 and reuse the array in a loop. Signed-off-by: Puranjay Mohan <puranjay@kernel.org> Link: https://lore.kernel.org/r/20251222195022.431211-3-puranjay@kernel.org Signed-off-by: Alexei Starovoitov <ast@kernel.org>
This commit is contained in:
parent
c336b0b327
commit
360c35f8ff
|
|
@ -44,6 +44,8 @@
|
|||
#define GUARD_SZ round_up(1ull << sizeof_field(struct bpf_insn, off) * 8, PAGE_SIZE << 1)
|
||||
#define KERN_VM_SZ (SZ_4G + GUARD_SZ)
|
||||
|
||||
static void arena_free_pages(struct bpf_arena *arena, long uaddr, long page_cnt);
|
||||
|
||||
struct bpf_arena {
|
||||
struct bpf_map map;
|
||||
u64 user_vm_start;
|
||||
|
|
@ -500,8 +502,10 @@ static long arena_alloc_pages(struct bpf_arena *arena, long uaddr, long page_cnt
|
|||
/* user_vm_end/start are fixed before bpf prog runs */
|
||||
long page_cnt_max = (arena->user_vm_end - arena->user_vm_start) >> PAGE_SHIFT;
|
||||
u64 kern_vm_start = bpf_arena_get_kern_vm_start(arena);
|
||||
struct apply_range_data data;
|
||||
struct page **pages = NULL;
|
||||
long mapped = 0;
|
||||
long remaining, mapped = 0;
|
||||
long alloc_pages;
|
||||
long pgoff = 0;
|
||||
u32 uaddr32;
|
||||
int ret, i;
|
||||
|
|
@ -518,17 +522,19 @@ static long arena_alloc_pages(struct bpf_arena *arena, long uaddr, long page_cnt
|
|||
return 0;
|
||||
}
|
||||
|
||||
/* zeroing is needed, since alloc_pages_bulk() only fills in non-zero entries */
|
||||
pages = kvcalloc(page_cnt, sizeof(struct page *), GFP_KERNEL);
|
||||
/* Cap allocation size to KMALLOC_MAX_CACHE_SIZE so kmalloc_nolock() can succeed. */
|
||||
alloc_pages = min(page_cnt, KMALLOC_MAX_CACHE_SIZE / sizeof(struct page *));
|
||||
pages = kmalloc_nolock(alloc_pages * sizeof(struct page *), 0, NUMA_NO_NODE);
|
||||
if (!pages)
|
||||
return 0;
|
||||
data.pages = pages;
|
||||
|
||||
mutex_lock(&arena->lock);
|
||||
|
||||
if (uaddr) {
|
||||
ret = is_range_tree_set(&arena->rt, pgoff, page_cnt);
|
||||
if (ret)
|
||||
goto out_free_pages;
|
||||
goto out_unlock_free_pages;
|
||||
ret = range_tree_clear(&arena->rt, pgoff, page_cnt);
|
||||
} else {
|
||||
ret = pgoff = range_tree_find(&arena->rt, page_cnt);
|
||||
|
|
@ -536,40 +542,60 @@ static long arena_alloc_pages(struct bpf_arena *arena, long uaddr, long page_cnt
|
|||
ret = range_tree_clear(&arena->rt, pgoff, page_cnt);
|
||||
}
|
||||
if (ret)
|
||||
goto out_free_pages;
|
||||
|
||||
struct apply_range_data data = { .pages = pages, .i = 0 };
|
||||
ret = bpf_map_alloc_pages(&arena->map, node_id, page_cnt, pages);
|
||||
if (ret)
|
||||
goto out;
|
||||
goto out_unlock_free_pages;
|
||||
|
||||
remaining = page_cnt;
|
||||
uaddr32 = (u32)(arena->user_vm_start + pgoff * PAGE_SIZE);
|
||||
/* Earlier checks made sure that uaddr32 + page_cnt * PAGE_SIZE - 1
|
||||
* will not overflow 32-bit. Lower 32-bit need to represent
|
||||
* contiguous user address range.
|
||||
* Map these pages at kern_vm_start base.
|
||||
* kern_vm_start + uaddr32 + page_cnt * PAGE_SIZE - 1 can overflow
|
||||
* lower 32-bit and it's ok.
|
||||
*/
|
||||
apply_to_page_range(&init_mm, kern_vm_start + uaddr32,
|
||||
page_cnt << PAGE_SHIFT, apply_range_set_cb, &data);
|
||||
mapped = data.i;
|
||||
flush_vmap_cache(kern_vm_start + uaddr32, mapped << PAGE_SHIFT);
|
||||
if (mapped < page_cnt) {
|
||||
for (i = mapped; i < page_cnt; i++)
|
||||
__free_page(pages[i]);
|
||||
goto out;
|
||||
|
||||
while (remaining) {
|
||||
long this_batch = min(remaining, alloc_pages);
|
||||
|
||||
/* zeroing is needed, since alloc_pages_bulk() only fills in non-zero entries */
|
||||
memset(pages, 0, this_batch * sizeof(struct page *));
|
||||
|
||||
ret = bpf_map_alloc_pages(&arena->map, node_id, this_batch, pages);
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
/*
|
||||
* Earlier checks made sure that uaddr32 + page_cnt * PAGE_SIZE - 1
|
||||
* will not overflow 32-bit. Lower 32-bit need to represent
|
||||
* contiguous user address range.
|
||||
* Map these pages at kern_vm_start base.
|
||||
* kern_vm_start + uaddr32 + page_cnt * PAGE_SIZE - 1 can overflow
|
||||
* lower 32-bit and it's ok.
|
||||
*/
|
||||
data.i = 0;
|
||||
ret = apply_to_page_range(&init_mm,
|
||||
kern_vm_start + uaddr32 + (mapped << PAGE_SHIFT),
|
||||
this_batch << PAGE_SHIFT, apply_range_set_cb, &data);
|
||||
if (ret) {
|
||||
/* data.i pages were mapped, account them and free the remaining */
|
||||
mapped += data.i;
|
||||
for (i = data.i; i < this_batch; i++)
|
||||
__free_page(pages[i]);
|
||||
goto out;
|
||||
}
|
||||
|
||||
mapped += this_batch;
|
||||
remaining -= this_batch;
|
||||
}
|
||||
flush_vmap_cache(kern_vm_start + uaddr32, mapped << PAGE_SHIFT);
|
||||
mutex_unlock(&arena->lock);
|
||||
kvfree(pages);
|
||||
kfree_nolock(pages);
|
||||
return clear_lo32(arena->user_vm_start) + uaddr32;
|
||||
out:
|
||||
range_tree_set(&arena->rt, pgoff + mapped, page_cnt - mapped);
|
||||
out_free_pages:
|
||||
mutex_unlock(&arena->lock);
|
||||
if (mapped)
|
||||
if (mapped) {
|
||||
flush_vmap_cache(kern_vm_start + uaddr32, mapped << PAGE_SHIFT);
|
||||
arena_free_pages(arena, uaddr32, mapped);
|
||||
kvfree(pages);
|
||||
}
|
||||
goto out_free_pages;
|
||||
out_unlock_free_pages:
|
||||
mutex_unlock(&arena->lock);
|
||||
out_free_pages:
|
||||
kfree_nolock(pages);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
|
|
|||
Loading…
Reference in New Issue
Block a user