Merge branch 'support-non-linear-skbs-for-bpf_prog_test_run'

Paul Chaignon says:

====================
Support non-linear skbs for BPF_PROG_TEST_RUN

This patchset adds support for non-linear skbs when running tc programs
with BPF_PROG_TEST_RUN.

We've had multiple bugs in the past few years in Cilium caused by
missing calls to bpf_skb_pull_data(). Daniel suggested to support
non linear skb in BPF_PROG_TEST_RUN to uncover these bugs in
our BPF tests.

Changes in v8:
  - Fix uninitialized data pointer spotted by Martin.
  - Error out in test_loader if __linear_size tag is used on unsupported
    program types.
Changes in v7:
  - Refactor use of 'size' variable as suggested by Martin.
  - Support copying back the non-linear area to data_out.
  - Minor code changes for readability, suggested by Martin.
Changes in v6:
  - Disallow non-linear skb in prog_run_skb only for LWT programs
    instead of all non-L2 program types, on suggestion from Martin.
  - Reject non-null ctx->data and ctx->data_meta, as suggested by Amery.
  - Bound linear_size to 'PAGE_SIZE - headroom - tailroom' to be
    consistent with prog_run_xdp, as suggested by Martin.
  - Allocate exactly linear_size bytes in bpf_test_init, spotted by
    Martin.
  - Fix wrong conflict resolution on double-free fix, spotted by Amery.
  - Rebased.
Changes in v5:
  - Fix double free on data in first patch.
Changes in v4:
  - Per Martin's suggestion, follow the XDP code pattern and use
    bpf_test_init only to initialize the linear area. That way data is
    directly copied to the right areas and we avoid the call to
    __pskb_pull_tail.
  - Fixed outdated commit descriptions.
  - Rebased.
Changes in v3:
  - Dropped BPF_F_TEST_SKB_NON_LINEAR and used the ctx->data_end to
    determine if the user wants non-linear skb, as suggested by Amery.
  - Introduced a second commit with a bit of refactoring to allow for
    the above requested change.
  - Fix bug found by syzkaller on third commit.
  - Rebased.
Changes in v2:
  - Made the linear size configurable via ctx->data_end, as suggested
    by Amery.
  - Reworked the selftests to allow testing the configurable linear
    size.
  - Fix warnings reported by kernel test robot on first commit.
  - Rebased.
====================

Link: https://patch.msgid.link/cover.1760037899.git.paul.chaignon@gmail.com
Signed-off-by: Martin KaFai Lau <martin.lau@kernel.org>
This commit is contained in:
Martin KaFai Lau 2025-10-10 09:47:04 -07:00
commit 7dc484fe48
4 changed files with 193 additions and 42 deletions

View File

@ -447,7 +447,7 @@ static int bpf_test_run(struct bpf_prog *prog, void *ctx, u32 repeat,
static int bpf_test_finish(const union bpf_attr *kattr,
union bpf_attr __user *uattr, const void *data,
struct skb_shared_info *sinfo, u32 size,
struct skb_shared_info *sinfo, u32 size, u32 frag_size,
u32 retval, u32 duration)
{
void __user *data_out = u64_to_user_ptr(kattr->test.data_out);
@ -464,7 +464,7 @@ static int bpf_test_finish(const union bpf_attr *kattr,
}
if (data_out) {
int len = sinfo ? copy_size - sinfo->xdp_frags_size : copy_size;
int len = sinfo ? copy_size - frag_size : copy_size;
if (len < 0) {
err = -ENOSPC;
@ -910,6 +910,12 @@ static int convert___skb_to_skb(struct sk_buff *skb, struct __sk_buff *__skb)
/* cb is allowed */
if (!range_is_zero(__skb, offsetofend(struct __sk_buff, cb),
offsetof(struct __sk_buff, data_end)))
return -EINVAL;
/* data_end is allowed, but not copied to skb */
if (!range_is_zero(__skb, offsetofend(struct __sk_buff, data_end),
offsetof(struct __sk_buff, tstamp)))
return -EINVAL;
@ -984,46 +990,39 @@ static struct proto bpf_dummy_proto = {
int bpf_prog_test_run_skb(struct bpf_prog *prog, const union bpf_attr *kattr,
union bpf_attr __user *uattr)
{
bool is_l2 = false, is_direct_pkt_access = false;
bool is_l2 = false, is_direct_pkt_access = false, is_lwt = false;
u32 tailroom = SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
struct net *net = current->nsproxy->net_ns;
struct net_device *dev = net->loopback_dev;
u32 size = kattr->test.data_size_in;
u32 headroom = NET_SKB_PAD + NET_IP_ALIGN;
u32 linear_sz = kattr->test.data_size_in;
u32 repeat = kattr->test.repeat;
struct __sk_buff *ctx = NULL;
struct sk_buff *skb = NULL;
struct sock *sk = NULL;
u32 retval, duration;
int hh_len = ETH_HLEN;
struct sk_buff *skb;
struct sock *sk;
void *data;
void *data = NULL;
int ret;
if ((kattr->test.flags & ~BPF_F_TEST_SKB_CHECKSUM_COMPLETE) ||
kattr->test.cpu || kattr->test.batch_size)
return -EINVAL;
if (size < ETH_HLEN)
if (kattr->test.data_size_in < ETH_HLEN)
return -EINVAL;
data = bpf_test_init(kattr, kattr->test.data_size_in,
size, NET_SKB_PAD + NET_IP_ALIGN,
SKB_DATA_ALIGN(sizeof(struct skb_shared_info)));
if (IS_ERR(data))
return PTR_ERR(data);
ctx = bpf_ctx_init(kattr, sizeof(struct __sk_buff));
if (IS_ERR(ctx)) {
kfree(data);
return PTR_ERR(ctx);
}
switch (prog->type) {
case BPF_PROG_TYPE_SCHED_CLS:
case BPF_PROG_TYPE_SCHED_ACT:
is_direct_pkt_access = true;
is_l2 = true;
fallthrough;
break;
case BPF_PROG_TYPE_LWT_IN:
case BPF_PROG_TYPE_LWT_OUT:
case BPF_PROG_TYPE_LWT_XMIT:
is_lwt = true;
fallthrough;
case BPF_PROG_TYPE_CGROUP_SKB:
is_direct_pkt_access = true;
break;
@ -1031,25 +1030,88 @@ int bpf_prog_test_run_skb(struct bpf_prog *prog, const union bpf_attr *kattr,
break;
}
ctx = bpf_ctx_init(kattr, sizeof(struct __sk_buff));
if (IS_ERR(ctx))
return PTR_ERR(ctx);
if (ctx) {
if (ctx->data_end > kattr->test.data_size_in || ctx->data || ctx->data_meta) {
ret = -EINVAL;
goto out;
}
if (ctx->data_end) {
/* Non-linear LWT test_run is unsupported for now. */
if (is_lwt) {
ret = -EINVAL;
goto out;
}
linear_sz = max(ETH_HLEN, ctx->data_end);
}
}
linear_sz = min_t(u32, linear_sz, PAGE_SIZE - headroom - tailroom);
data = bpf_test_init(kattr, linear_sz, linear_sz, headroom, tailroom);
if (IS_ERR(data)) {
ret = PTR_ERR(data);
data = NULL;
goto out;
}
sk = sk_alloc(net, AF_UNSPEC, GFP_USER, &bpf_dummy_proto, 1);
if (!sk) {
kfree(data);
kfree(ctx);
return -ENOMEM;
ret = -ENOMEM;
goto out;
}
sock_init_data(NULL, sk);
skb = slab_build_skb(data);
if (!skb) {
kfree(data);
kfree(ctx);
sk_free(sk);
return -ENOMEM;
ret = -ENOMEM;
goto out;
}
skb->sk = sk;
data = NULL; /* data released via kfree_skb */
skb_reserve(skb, NET_SKB_PAD + NET_IP_ALIGN);
__skb_put(skb, size);
__skb_put(skb, linear_sz);
if (unlikely(kattr->test.data_size_in > linear_sz)) {
void __user *data_in = u64_to_user_ptr(kattr->test.data_in);
struct skb_shared_info *sinfo = skb_shinfo(skb);
u32 copied = linear_sz;
while (copied < kattr->test.data_size_in) {
struct page *page;
u32 data_len;
if (sinfo->nr_frags == MAX_SKB_FRAGS) {
ret = -ENOMEM;
goto out;
}
page = alloc_page(GFP_KERNEL);
if (!page) {
ret = -ENOMEM;
goto out;
}
data_len = min_t(u32, kattr->test.data_size_in - copied,
PAGE_SIZE);
skb_fill_page_desc(skb, sinfo->nr_frags, page, 0, data_len);
if (copy_from_user(page_address(page), data_in + copied,
data_len)) {
ret = -EFAULT;
goto out;
}
skb->data_len += data_len;
skb->truesize += PAGE_SIZE;
skb->len += data_len;
copied += data_len;
}
}
if (ctx && ctx->ifindex > 1) {
dev = dev_get_by_index(net, ctx->ifindex);
@ -1129,12 +1191,11 @@ int bpf_prog_test_run_skb(struct bpf_prog *prog, const union bpf_attr *kattr,
convert_skb_to___skb(skb, ctx);
size = skb->len;
/* bpf program can never convert linear skb to non-linear */
if (WARN_ON_ONCE(skb_is_nonlinear(skb)))
size = skb_headlen(skb);
ret = bpf_test_finish(kattr, uattr, skb->data, NULL, size, retval,
duration);
if (skb_is_nonlinear(skb))
/* bpf program can never convert linear skb to non-linear */
WARN_ON_ONCE(linear_sz == kattr->test.data_size_in);
ret = bpf_test_finish(kattr, uattr, skb->data, skb_shinfo(skb), skb->len,
skb->data_len, retval, duration);
if (!ret)
ret = bpf_ctx_finish(kattr, uattr, ctx,
sizeof(struct __sk_buff));
@ -1142,7 +1203,9 @@ int bpf_prog_test_run_skb(struct bpf_prog *prog, const union bpf_attr *kattr,
if (dev && dev != net->loopback_dev)
dev_put(dev);
kfree_skb(skb);
sk_free(sk);
kfree(data);
if (sk)
sk_free(sk);
kfree(ctx);
return ret;
}
@ -1340,7 +1403,7 @@ int bpf_prog_test_run_xdp(struct bpf_prog *prog, const union bpf_attr *kattr,
goto out;
size = xdp.data_end - xdp.data_meta + sinfo->xdp_frags_size;
ret = bpf_test_finish(kattr, uattr, xdp.data_meta, sinfo, size,
ret = bpf_test_finish(kattr, uattr, xdp.data_meta, sinfo, size, sinfo->xdp_frags_size,
retval, duration);
if (!ret)
ret = bpf_ctx_finish(kattr, uattr, ctx,
@ -1431,7 +1494,7 @@ int bpf_prog_test_run_flow_dissector(struct bpf_prog *prog,
goto out;
ret = bpf_test_finish(kattr, uattr, &flow_keys, NULL,
sizeof(flow_keys), retval, duration);
sizeof(flow_keys), 0, retval, duration);
if (!ret)
ret = bpf_ctx_finish(kattr, uattr, user_ctx,
sizeof(struct bpf_flow_keys));
@ -1532,7 +1595,7 @@ int bpf_prog_test_run_sk_lookup(struct bpf_prog *prog, const union bpf_attr *kat
user_ctx->cookie = sock_gen_cookie(ctx.selected_sk);
}
ret = bpf_test_finish(kattr, uattr, NULL, NULL, 0, retval, duration);
ret = bpf_test_finish(kattr, uattr, NULL, NULL, 0, 0, retval, duration);
if (!ret)
ret = bpf_ctx_finish(kattr, uattr, user_ctx, sizeof(*user_ctx));
@ -1732,7 +1795,7 @@ int bpf_prog_test_run_nf(struct bpf_prog *prog,
if (ret)
goto out;
ret = bpf_test_finish(kattr, uattr, NULL, NULL, 0, retval, duration);
ret = bpf_test_finish(kattr, uattr, NULL, NULL, 0, 0, retval, duration);
out:
kfree(user_ctx);

View File

@ -126,6 +126,9 @@
* Several __arch_* annotations could be specified at once.
* When test case is not run on current arch it is marked as skipped.
* __caps_unpriv Specify the capabilities that should be set when running the test.
*
* __linear_size Specify the size of the linear area of non-linear skbs, or
* 0 for linear skbs.
*/
#define __msg(msg) __attribute__((btf_decl_tag("comment:test_expect_msg=" XSTR(__COUNTER__) "=" msg)))
#define __not_msg(msg) __attribute__((btf_decl_tag("comment:test_expect_not_msg=" XSTR(__COUNTER__) "=" msg)))
@ -159,6 +162,7 @@
#define __stderr_unpriv(msg) __attribute__((btf_decl_tag("comment:test_expect_stderr_unpriv=" XSTR(__COUNTER__) "=" msg)))
#define __stdout(msg) __attribute__((btf_decl_tag("comment:test_expect_stdout=" XSTR(__COUNTER__) "=" msg)))
#define __stdout_unpriv(msg) __attribute__((btf_decl_tag("comment:test_expect_stdout_unpriv=" XSTR(__COUNTER__) "=" msg)))
#define __linear_size(sz) __attribute__((btf_decl_tag("comment:test_linear_size=" XSTR(sz))))
/* Define common capabilities tested using __caps_unpriv */
#define CAP_NET_ADMIN 12

View File

@ -1,6 +1,7 @@
// SPDX-License-Identifier: GPL-2.0
/* Converted from tools/testing/selftests/bpf/verifier/direct_packet_access.c */
#include <linux/if_ether.h>
#include <linux/bpf.h>
#include <bpf/bpf_helpers.h>
#include "bpf_misc.h"
@ -800,4 +801,62 @@ l0_%=: /* exit(0) */ \
: __clobber_all);
}
#define access_test_non_linear(name, type, desc, retval, linear_sz, off) \
SEC(type) \
__description("direct packet access: " #name " (non-linear, " type ", " desc ")") \
__success __retval(retval) \
__linear_size(linear_sz) \
__naked void access_non_linear_##name(void) \
{ \
asm volatile (" \
r2 = *(u32*)(r1 + %[skb_data]); \
r3 = *(u32*)(r1 + %[skb_data_end]); \
r0 = r2; \
r0 += %[offset]; \
if r0 > r3 goto l0_%=; \
r0 = *(u8*)(r0 - 1); \
r0 = 0; \
exit; \
l0_%=: r0 = 1; \
exit; \
" : \
: __imm_const(skb_data, offsetof(struct __sk_buff, data)), \
__imm_const(skb_data_end, offsetof(struct __sk_buff, data_end)), \
__imm_const(offset, off) \
: __clobber_all); \
}
access_test_non_linear(test31, "tc", "too short eth", 1, ETH_HLEN, 22);
access_test_non_linear(test32, "tc", "too short 1", 1, 1, 22);
access_test_non_linear(test33, "tc", "long enough", 0, 22, 22);
access_test_non_linear(test34, "cgroup_skb/ingress", "too short eth", 1, ETH_HLEN, 8);
access_test_non_linear(test35, "cgroup_skb/ingress", "too short 1", 1, 1, 8);
access_test_non_linear(test36, "cgroup_skb/ingress", "long enough", 0, 22, 8);
SEC("tc")
__description("direct packet access: test37 (non-linear, linearized)")
__success __retval(0)
__linear_size(ETH_HLEN)
__naked void access_non_linear_linearized(void)
{
asm volatile (" \
r6 = r1; \
r2 = 22; \
call %[bpf_skb_pull_data]; \
r2 = *(u32*)(r6 + %[skb_data]); \
r3 = *(u32*)(r6 + %[skb_data_end]); \
r0 = r2; \
r0 += 22; \
if r0 > r3 goto l0_%=; \
r0 = *(u8*)(r0 - 1); \
exit; \
l0_%=: r0 = 1; \
exit; \
" :
: __imm(bpf_skb_pull_data),
__imm_const(skb_data, offsetof(struct __sk_buff, data)),
__imm_const(skb_data_end, offsetof(struct __sk_buff, data_end))
: __clobber_all);
}
char _license[] SEC("license") = "GPL";

View File

@ -43,6 +43,7 @@
#define TEST_TAG_EXPECT_STDERR_PFX_UNPRIV "comment:test_expect_stderr_unpriv="
#define TEST_TAG_EXPECT_STDOUT_PFX "comment:test_expect_stdout="
#define TEST_TAG_EXPECT_STDOUT_PFX_UNPRIV "comment:test_expect_stdout_unpriv="
#define TEST_TAG_LINEAR_SIZE "comment:test_linear_size="
/* Warning: duplicated in bpf_misc.h */
#define POINTER_VALUE 0xbadcafe
@ -89,6 +90,7 @@ struct test_spec {
int mode_mask;
int arch_mask;
int load_mask;
int linear_sz;
bool auxiliary;
bool valid;
};
@ -633,6 +635,21 @@ static int parse_test_spec(struct test_loader *tester,
&spec->unpriv.stdout);
if (err)
goto cleanup;
} else if (str_has_pfx(s, TEST_TAG_LINEAR_SIZE)) {
switch (bpf_program__type(prog)) {
case BPF_PROG_TYPE_SCHED_ACT:
case BPF_PROG_TYPE_SCHED_CLS:
case BPF_PROG_TYPE_CGROUP_SKB:
val = s + sizeof(TEST_TAG_LINEAR_SIZE) - 1;
err = parse_int(val, &spec->linear_sz, "test linear size");
if (err)
goto cleanup;
break;
default:
PRINT_FAIL("__linear_size for unsupported program type");
err = -EINVAL;
goto cleanup;
}
}
}
@ -1007,10 +1024,11 @@ static bool is_unpriv_capable_map(struct bpf_map *map)
}
}
static int do_prog_test_run(int fd_prog, int *retval, bool empty_opts)
static int do_prog_test_run(int fd_prog, int *retval, bool empty_opts, int linear_sz)
{
__u8 tmp_out[TEST_DATA_LEN << 2] = {};
__u8 tmp_in[TEST_DATA_LEN] = {};
struct __sk_buff ctx = {};
int err, saved_errno;
LIBBPF_OPTS(bpf_test_run_opts, topts,
.data_in = tmp_in,
@ -1020,6 +1038,12 @@ static int do_prog_test_run(int fd_prog, int *retval, bool empty_opts)
.repeat = 1,
);
if (linear_sz) {
ctx.data_end = linear_sz;
topts.ctx_in = &ctx;
topts.ctx_size_in = sizeof(ctx);
}
if (empty_opts) {
memset(&topts, 0, sizeof(struct bpf_test_run_opts));
topts.sz = sizeof(struct bpf_test_run_opts);
@ -1269,7 +1293,8 @@ void run_subtest(struct test_loader *tester,
}
err = do_prog_test_run(bpf_program__fd(tprog), &retval,
bpf_program__type(tprog) == BPF_PROG_TYPE_SYSCALL ? true : false);
bpf_program__type(tprog) == BPF_PROG_TYPE_SYSCALL ? true : false,
spec->linear_sz);
if (!err && retval != subspec->retval && subspec->retval != POINTER_VALUE) {
PRINT_FAIL("Unexpected retval: %d != %d\n", retval, subspec->retval);
goto tobj_cleanup;