bpf: arena: use kmalloc_nolock() in place of kvcalloc()

To make arena_alloc_pages() safe to be called from any context, replace
kvcalloc() with kmalloc_nolock() so as it doesn't sleep or take any
locks. kmalloc_nolock() returns NULL for allocations larger than
KMALLOC_MAX_CACHE_SIZE, which is (PAGE_SIZE * 2) = 8KB on systems with
4KB pages. So, round down the allocation done by kmalloc_nolock to 1024
* 8 and reuse the array in a loop.

Signed-off-by: Puranjay Mohan <puranjay@kernel.org>
Link: https://lore.kernel.org/r/20251222195022.431211-3-puranjay@kernel.org
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
This commit is contained in:
Puranjay Mohan 2025-12-22 11:50:17 -08:00 committed by Alexei Starovoitov
parent c336b0b327
commit 360c35f8ff

View File

@ -44,6 +44,8 @@
#define GUARD_SZ round_up(1ull << sizeof_field(struct bpf_insn, off) * 8, PAGE_SIZE << 1)
#define KERN_VM_SZ (SZ_4G + GUARD_SZ)
static void arena_free_pages(struct bpf_arena *arena, long uaddr, long page_cnt);
struct bpf_arena {
struct bpf_map map;
u64 user_vm_start;
@ -500,8 +502,10 @@ static long arena_alloc_pages(struct bpf_arena *arena, long uaddr, long page_cnt
/* user_vm_end/start are fixed before bpf prog runs */
long page_cnt_max = (arena->user_vm_end - arena->user_vm_start) >> PAGE_SHIFT;
u64 kern_vm_start = bpf_arena_get_kern_vm_start(arena);
struct apply_range_data data;
struct page **pages = NULL;
long mapped = 0;
long remaining, mapped = 0;
long alloc_pages;
long pgoff = 0;
u32 uaddr32;
int ret, i;
@ -518,17 +522,19 @@ static long arena_alloc_pages(struct bpf_arena *arena, long uaddr, long page_cnt
return 0;
}
/* zeroing is needed, since alloc_pages_bulk() only fills in non-zero entries */
pages = kvcalloc(page_cnt, sizeof(struct page *), GFP_KERNEL);
/* Cap allocation size to KMALLOC_MAX_CACHE_SIZE so kmalloc_nolock() can succeed. */
alloc_pages = min(page_cnt, KMALLOC_MAX_CACHE_SIZE / sizeof(struct page *));
pages = kmalloc_nolock(alloc_pages * sizeof(struct page *), 0, NUMA_NO_NODE);
if (!pages)
return 0;
data.pages = pages;
mutex_lock(&arena->lock);
if (uaddr) {
ret = is_range_tree_set(&arena->rt, pgoff, page_cnt);
if (ret)
goto out_free_pages;
goto out_unlock_free_pages;
ret = range_tree_clear(&arena->rt, pgoff, page_cnt);
} else {
ret = pgoff = range_tree_find(&arena->rt, page_cnt);
@ -536,40 +542,60 @@ static long arena_alloc_pages(struct bpf_arena *arena, long uaddr, long page_cnt
ret = range_tree_clear(&arena->rt, pgoff, page_cnt);
}
if (ret)
goto out_free_pages;
struct apply_range_data data = { .pages = pages, .i = 0 };
ret = bpf_map_alloc_pages(&arena->map, node_id, page_cnt, pages);
if (ret)
goto out;
goto out_unlock_free_pages;
remaining = page_cnt;
uaddr32 = (u32)(arena->user_vm_start + pgoff * PAGE_SIZE);
/* Earlier checks made sure that uaddr32 + page_cnt * PAGE_SIZE - 1
* will not overflow 32-bit. Lower 32-bit need to represent
* contiguous user address range.
* Map these pages at kern_vm_start base.
* kern_vm_start + uaddr32 + page_cnt * PAGE_SIZE - 1 can overflow
* lower 32-bit and it's ok.
*/
apply_to_page_range(&init_mm, kern_vm_start + uaddr32,
page_cnt << PAGE_SHIFT, apply_range_set_cb, &data);
mapped = data.i;
flush_vmap_cache(kern_vm_start + uaddr32, mapped << PAGE_SHIFT);
if (mapped < page_cnt) {
for (i = mapped; i < page_cnt; i++)
__free_page(pages[i]);
goto out;
while (remaining) {
long this_batch = min(remaining, alloc_pages);
/* zeroing is needed, since alloc_pages_bulk() only fills in non-zero entries */
memset(pages, 0, this_batch * sizeof(struct page *));
ret = bpf_map_alloc_pages(&arena->map, node_id, this_batch, pages);
if (ret)
goto out;
/*
* Earlier checks made sure that uaddr32 + page_cnt * PAGE_SIZE - 1
* will not overflow 32-bit. Lower 32-bit need to represent
* contiguous user address range.
* Map these pages at kern_vm_start base.
* kern_vm_start + uaddr32 + page_cnt * PAGE_SIZE - 1 can overflow
* lower 32-bit and it's ok.
*/
data.i = 0;
ret = apply_to_page_range(&init_mm,
kern_vm_start + uaddr32 + (mapped << PAGE_SHIFT),
this_batch << PAGE_SHIFT, apply_range_set_cb, &data);
if (ret) {
/* data.i pages were mapped, account them and free the remaining */
mapped += data.i;
for (i = data.i; i < this_batch; i++)
__free_page(pages[i]);
goto out;
}
mapped += this_batch;
remaining -= this_batch;
}
flush_vmap_cache(kern_vm_start + uaddr32, mapped << PAGE_SHIFT);
mutex_unlock(&arena->lock);
kvfree(pages);
kfree_nolock(pages);
return clear_lo32(arena->user_vm_start) + uaddr32;
out:
range_tree_set(&arena->rt, pgoff + mapped, page_cnt - mapped);
out_free_pages:
mutex_unlock(&arena->lock);
if (mapped)
if (mapped) {
flush_vmap_cache(kern_vm_start + uaddr32, mapped << PAGE_SHIFT);
arena_free_pages(arena, uaddr32, mapped);
kvfree(pages);
}
goto out_free_pages;
out_unlock_free_pages:
mutex_unlock(&arena->lock);
out_free_pages:
kfree_nolock(pages);
return 0;
}