bpf-next-for-netdev

-----BEGIN PGP SIGNATURE-----
 
 iHUEABYKAB0WIQQ6NaUOruQGUkvPdG4raS+Z+3y5EwUCZzZUwAAKCRAraS+Z+3y5
 E54pAP9kim6BVXVngcMBmyAKa1Fr0zLGj/Ds1JB+KFfQ/0v80wD/ebVpoIEoKHs9
 /Xl/3WfN3JzIi9+mqIauENH6DTUQPAo=
 =MWOY
 -----END PGP SIGNATURE-----

Merge tag 'for-netdev' of https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next

Martin KaFai Lau says:

====================
pull-request: bpf-next 2024-11-14

We've added 9 non-merge commits during the last 4 day(s) which contain
a total of 3 files changed, 226 insertions(+), 84 deletions(-).

The main changes are:

1) Fixes to bpf_msg_push/pop_data and test_sockmap. The changes has
   dependency on the other changes in the bpf-next/net branch,
   from Zijian Zhang.

2) Drop netns codes from mptcp test. Reuse the common helpers in
   test_progs, from Geliang Tang.

* tag 'for-netdev' of https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next:
  bpf, sockmap: Fix sk_msg_reset_curr
  bpf, sockmap: Several fixes to bpf_msg_pop_data
  bpf, sockmap: Several fixes to bpf_msg_push_data
  selftests/bpf: Add more tests for test_txmsg_push_pop in test_sockmap
  selftests/bpf: Add push/pop checking for msg_verify_data in test_sockmap
  selftests/bpf: Fix total_bytes in msg_loop_rx in test_sockmap
  selftests/bpf: Fix SENDPAGE data logic in test_sockmap
  selftests/bpf: Add txmsg_pass to pull/push/pop in test_sockmap
  selftests/bpf: Drop netns helpers in mptcp
====================

Link: https://patch.msgid.link/20241114202832.3187927-1-martin.lau@linux.dev
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
This commit is contained in:
Jakub Kicinski 2024-11-14 19:08:04 -08:00
commit 55c8590129
3 changed files with 227 additions and 85 deletions

View File

@ -2604,18 +2604,16 @@ BPF_CALL_2(bpf_msg_cork_bytes, struct sk_msg *, msg, u32, bytes)
static void sk_msg_reset_curr(struct sk_msg *msg)
{
u32 i = msg->sg.start;
u32 len = 0;
if (!msg->sg.size) {
msg->sg.curr = msg->sg.start;
msg->sg.copybreak = 0;
} else {
u32 i = msg->sg.end;
do {
len += sk_msg_elem(msg, i)->length;
sk_msg_iter_var_next(i);
if (len >= msg->sg.size)
break;
} while (i != msg->sg.end);
msg->sg.curr = i;
msg->sg.copybreak = 0;
sk_msg_iter_var_prev(i);
msg->sg.curr = i;
msg->sg.copybreak = msg->sg.data[i].length;
}
}
static const struct bpf_func_proto bpf_msg_cork_bytes_proto = {
@ -2778,7 +2776,7 @@ BPF_CALL_4(bpf_msg_push_data, struct sk_msg *, msg, u32, start,
sk_msg_iter_var_next(i);
} while (i != msg->sg.end);
if (start >= offset + l)
if (start > offset + l)
return -EINVAL;
space = MAX_MSG_FRAGS - sk_msg_elem_used(msg);
@ -2803,6 +2801,8 @@ BPF_CALL_4(bpf_msg_push_data, struct sk_msg *, msg, u32, start,
raw = page_address(page);
if (i == msg->sg.end)
sk_msg_iter_var_prev(i);
psge = sk_msg_elem(msg, i);
front = start - offset;
back = psge->length - front;
@ -2819,7 +2819,13 @@ BPF_CALL_4(bpf_msg_push_data, struct sk_msg *, msg, u32, start,
}
put_page(sg_page(psge));
} else if (start - offset) {
new = i;
goto place_new;
}
if (start - offset) {
if (i == msg->sg.end)
sk_msg_iter_var_prev(i);
psge = sk_msg_elem(msg, i);
rsge = sk_msg_elem_cpy(msg, i);
@ -2830,39 +2836,44 @@ BPF_CALL_4(bpf_msg_push_data, struct sk_msg *, msg, u32, start,
sk_msg_iter_var_next(i);
sg_unmark_end(psge);
sg_unmark_end(&rsge);
sk_msg_iter_next(msg, end);
}
/* Slot(s) to place newly allocated data */
sk_msg_iter_next(msg, end);
new = i;
sk_msg_iter_var_next(i);
if (i == msg->sg.end) {
if (!rsge.length)
goto place_new;
sk_msg_iter_next(msg, end);
goto place_new;
}
/* Shift one or two slots as needed */
if (!copy) {
sge = sk_msg_elem_cpy(msg, i);
sge = sk_msg_elem_cpy(msg, new);
sg_unmark_end(&sge);
nsge = sk_msg_elem_cpy(msg, i);
if (rsge.length) {
sk_msg_iter_var_next(i);
sg_unmark_end(&sge);
nnsge = sk_msg_elem_cpy(msg, i);
sk_msg_iter_next(msg, end);
}
nsge = sk_msg_elem_cpy(msg, i);
while (i != msg->sg.end) {
msg->sg.data[i] = sge;
sge = nsge;
sk_msg_iter_var_next(i);
if (rsge.length) {
sk_msg_iter_var_next(i);
nsge = nnsge;
nnsge = sk_msg_elem_cpy(msg, i);
}
while (i != msg->sg.end) {
msg->sg.data[i] = sge;
sge = nsge;
sk_msg_iter_var_next(i);
if (rsge.length) {
nsge = nnsge;
nnsge = sk_msg_elem_cpy(msg, i);
} else {
nsge = sk_msg_elem_cpy(msg, i);
}
} else {
nsge = sk_msg_elem_cpy(msg, i);
}
}
place_new:
/* Place newly allocated data buffer */
sk_mem_charge(msg->sk, len);
msg->sg.size += len;
@ -2891,8 +2902,10 @@ static const struct bpf_func_proto bpf_msg_push_data_proto = {
static void sk_msg_shift_left(struct sk_msg *msg, int i)
{
struct scatterlist *sge = sk_msg_elem(msg, i);
int prev;
put_page(sg_page(sge));
do {
prev = i;
sk_msg_iter_var_next(i);
@ -2929,6 +2942,9 @@ BPF_CALL_4(bpf_msg_pop_data, struct sk_msg *, msg, u32, start,
if (unlikely(flags))
return -EINVAL;
if (unlikely(len == 0))
return 0;
/* First find the starting scatterlist element */
i = msg->sg.start;
do {
@ -2941,7 +2957,7 @@ BPF_CALL_4(bpf_msg_pop_data, struct sk_msg *, msg, u32, start,
} while (i != msg->sg.end);
/* Bounds checks: start and pop must be inside message */
if (start >= offset + l || last >= msg->sg.size)
if (start >= offset + l || last > msg->sg.size)
return -EINVAL;
space = MAX_MSG_FRAGS - sk_msg_elem_used(msg);
@ -2970,12 +2986,12 @@ BPF_CALL_4(bpf_msg_pop_data, struct sk_msg *, msg, u32, start,
*/
if (start != offset) {
struct scatterlist *nsge, *sge = sk_msg_elem(msg, i);
int a = start;
int a = start - offset;
int b = sge->length - pop - a;
sk_msg_iter_var_next(i);
if (pop < sge->length - a) {
if (b > 0) {
if (space) {
sge->length = a;
sk_msg_shift_right(msg, i);
@ -2994,7 +3010,6 @@ BPF_CALL_4(bpf_msg_pop_data, struct sk_msg *, msg, u32, start,
if (unlikely(!page))
return -ENOMEM;
sge->length = a;
orig = sg_page(sge);
from = sg_virt(sge);
to = page_address(page);
@ -3004,7 +3019,7 @@ BPF_CALL_4(bpf_msg_pop_data, struct sk_msg *, msg, u32, start,
put_page(orig);
}
pop = 0;
} else if (pop >= sge->length - a) {
} else {
pop -= (sge->length - a);
sge->length = a;
}
@ -3038,7 +3053,6 @@ BPF_CALL_4(bpf_msg_pop_data, struct sk_msg *, msg, u32, start,
pop -= sge->length;
sk_msg_shift_left(msg, i);
}
sk_msg_iter_var_next(i);
}
sk_mem_uncharge(msg->sk, len - pop);

View File

@ -69,24 +69,6 @@ struct mptcp_storage {
char ca_name[TCP_CA_NAME_MAX];
};
static struct nstoken *create_netns(void)
{
SYS(fail, "ip netns add %s", NS_TEST);
SYS(fail, "ip -net %s link set dev lo up", NS_TEST);
return open_netns(NS_TEST);
fail:
return NULL;
}
static void cleanup_netns(struct nstoken *nstoken)
{
if (nstoken)
close_netns(nstoken);
SYS_NOFAIL("ip netns del %s", NS_TEST);
}
static int start_mptcp_server(int family, const char *addr_str, __u16 port,
int timeout_ms)
{
@ -206,15 +188,15 @@ static int run_test(int cgroup_fd, int server_fd, bool is_mptcp)
static void test_base(void)
{
struct nstoken *nstoken = NULL;
struct netns_obj *netns = NULL;
int server_fd, cgroup_fd;
cgroup_fd = test__join_cgroup("/mptcp");
if (!ASSERT_GE(cgroup_fd, 0, "test__join_cgroup"))
return;
nstoken = create_netns();
if (!ASSERT_OK_PTR(nstoken, "create_netns"))
netns = netns_new(NS_TEST, true);
if (!ASSERT_OK_PTR(netns, "netns_new"))
goto fail;
/* without MPTCP */
@ -237,7 +219,7 @@ static void test_base(void)
close(server_fd);
fail:
cleanup_netns(nstoken);
netns_free(netns);
close(cgroup_fd);
}
@ -322,21 +304,21 @@ static int run_mptcpify(int cgroup_fd)
static void test_mptcpify(void)
{
struct nstoken *nstoken = NULL;
struct netns_obj *netns = NULL;
int cgroup_fd;
cgroup_fd = test__join_cgroup("/mptcpify");
if (!ASSERT_GE(cgroup_fd, 0, "test__join_cgroup"))
return;
nstoken = create_netns();
if (!ASSERT_OK_PTR(nstoken, "create_netns"))
netns = netns_new(NS_TEST, true);
if (!ASSERT_OK_PTR(netns, "netns_new"))
goto fail;
ASSERT_OK(run_mptcpify(cgroup_fd), "run_mptcpify");
fail:
cleanup_netns(nstoken);
netns_free(netns);
close(cgroup_fd);
}
@ -414,7 +396,7 @@ static void run_subflow(void)
static void test_subflow(void)
{
struct mptcp_subflow *skel;
struct nstoken *nstoken;
struct netns_obj *netns;
int cgroup_fd;
cgroup_fd = test__join_cgroup("/mptcp_subflow");
@ -437,8 +419,8 @@ static void test_subflow(void)
if (!ASSERT_OK_PTR(skel->links._getsockopt_subflow, "attach _getsockopt_subflow"))
goto skel_destroy;
nstoken = create_netns();
if (!ASSERT_OK_PTR(nstoken, "create_netns: mptcp_subflow"))
netns = netns_new(NS_TEST, true);
if (!ASSERT_OK_PTR(netns, "netns_new: mptcp_subflow"))
goto skel_destroy;
if (endpoint_init("subflow") < 0)
@ -447,7 +429,7 @@ static void test_subflow(void)
run_subflow();
close_netns:
cleanup_netns(nstoken);
netns_free(netns);
skel_destroy:
mptcp_subflow__destroy(skel);
close_cgroup:

View File

@ -88,6 +88,10 @@ int ktls;
int peek_flag;
int skb_use_parser;
int txmsg_omit_skb_parser;
int verify_push_start;
int verify_push_len;
int verify_pop_start;
int verify_pop_len;
static const struct option long_options[] = {
{"help", no_argument, NULL, 'h' },
@ -420,16 +424,18 @@ static int msg_loop_sendpage(int fd, int iov_length, int cnt,
{
bool drop = opt->drop_expected;
unsigned char k = 0;
int i, j, fp;
FILE *file;
int i, fp;
file = tmpfile();
if (!file) {
perror("create file for sendpage");
return 1;
}
for (i = 0; i < iov_length * cnt; i++, k++)
fwrite(&k, sizeof(char), 1, file);
for (i = 0; i < cnt; i++, k = 0) {
for (j = 0; j < iov_length; j++, k++)
fwrite(&k, sizeof(char), 1, file);
}
fflush(file);
fseek(file, 0, SEEK_SET);
@ -512,12 +518,41 @@ static int msg_alloc_iov(struct msghdr *msg,
return -ENOMEM;
}
/* TODO: Add verification logic for push, pull and pop data */
static int msg_verify_data(struct msghdr *msg, int size, int chunk_sz,
unsigned char *k_p, int *bytes_cnt_p)
/* In push or pop test, we need to do some calculations for msg_verify_data */
static void msg_verify_date_prep(void)
{
int i, j, bytes_cnt = *bytes_cnt_p;
int push_range_end = txmsg_start_push + txmsg_end_push - 1;
int pop_range_end = txmsg_start_pop + txmsg_pop - 1;
if (txmsg_end_push && txmsg_pop &&
txmsg_start_push <= pop_range_end && txmsg_start_pop <= push_range_end) {
/* The push range and the pop range overlap */
int overlap_len;
verify_push_start = txmsg_start_push;
verify_pop_start = txmsg_start_pop;
if (txmsg_start_push < txmsg_start_pop)
overlap_len = min(push_range_end - txmsg_start_pop + 1, txmsg_pop);
else
overlap_len = min(pop_range_end - txmsg_start_push + 1, txmsg_end_push);
verify_push_len = max(txmsg_end_push - overlap_len, 0);
verify_pop_len = max(txmsg_pop - overlap_len, 0);
} else {
/* Otherwise */
verify_push_start = txmsg_start_push;
verify_pop_start = txmsg_start_pop;
verify_push_len = txmsg_end_push;
verify_pop_len = txmsg_pop;
}
}
static int msg_verify_data(struct msghdr *msg, int size, int chunk_sz,
unsigned char *k_p, int *bytes_cnt_p,
int *check_cnt_p, int *push_p)
{
int bytes_cnt = *bytes_cnt_p, check_cnt = *check_cnt_p, push = *push_p;
unsigned char k = *k_p;
int i, j;
for (i = 0, j = 0; i < msg->msg_iovlen && size; i++, j = 0) {
unsigned char *d = msg->msg_iov[i].iov_base;
@ -536,6 +571,37 @@ static int msg_verify_data(struct msghdr *msg, int size, int chunk_sz,
}
for (; j < msg->msg_iov[i].iov_len && size; j++) {
if (push > 0 &&
check_cnt == verify_push_start + verify_push_len - push) {
int skipped;
revisit_push:
skipped = push;
if (j + push >= msg->msg_iov[i].iov_len)
skipped = msg->msg_iov[i].iov_len - j;
push -= skipped;
size -= skipped;
j += skipped - 1;
check_cnt += skipped;
continue;
}
if (verify_pop_len > 0 && check_cnt == verify_pop_start) {
bytes_cnt += verify_pop_len;
check_cnt += verify_pop_len;
k += verify_pop_len;
if (bytes_cnt == chunk_sz) {
k = 0;
bytes_cnt = 0;
check_cnt = 0;
push = verify_push_len;
}
if (push > 0 &&
check_cnt == verify_push_start + verify_push_len - push)
goto revisit_push;
}
if (d[j] != k++) {
fprintf(stderr,
"detected data corruption @iov[%i]:%i %02x != %02x, %02x ?= %02x\n",
@ -543,15 +609,20 @@ static int msg_verify_data(struct msghdr *msg, int size, int chunk_sz,
return -EDATAINTEGRITY;
}
bytes_cnt++;
check_cnt++;
if (bytes_cnt == chunk_sz) {
k = 0;
bytes_cnt = 0;
check_cnt = 0;
push = verify_push_len;
}
size--;
}
}
*k_p = k;
*bytes_cnt_p = bytes_cnt;
*check_cnt_p = check_cnt;
*push_p = push;
return 0;
}
@ -604,12 +675,14 @@ static int msg_loop(int fd, int iov_count, int iov_length, int cnt,
}
clock_gettime(CLOCK_MONOTONIC, &s->end);
} else {
float total_bytes, txmsg_pop_total, txmsg_push_total;
int slct, recvp = 0, recv, max_fd = fd;
float total_bytes, txmsg_pop_total;
int fd_flags = O_NONBLOCK;
struct timeval timeout;
unsigned char k = 0;
int bytes_cnt = 0;
int check_cnt = 0;
int push = 0;
fd_set w;
fcntl(fd, fd_flags);
@ -623,12 +696,22 @@ static int msg_loop(int fd, int iov_count, int iov_length, int cnt,
* This is really only useful for testing edge cases in code
* paths.
*/
total_bytes = (float)iov_count * (float)iov_length * (float)cnt;
if (txmsg_apply)
total_bytes = (float)iov_length * (float)cnt;
if (!opt->sendpage)
total_bytes *= (float)iov_count;
if (txmsg_apply) {
txmsg_push_total = txmsg_end_push * (total_bytes / txmsg_apply);
txmsg_pop_total = txmsg_pop * (total_bytes / txmsg_apply);
else
} else {
txmsg_push_total = txmsg_end_push * cnt;
txmsg_pop_total = txmsg_pop * cnt;
}
total_bytes += txmsg_push_total;
total_bytes -= txmsg_pop_total;
if (data) {
msg_verify_date_prep();
push = verify_push_len;
}
err = clock_gettime(CLOCK_MONOTONIC, &s->start);
if (err < 0)
perror("recv start time");
@ -701,10 +784,11 @@ static int msg_loop(int fd, int iov_count, int iov_length, int cnt,
if (data) {
int chunk_sz = opt->sendpage ?
iov_length * cnt :
iov_length :
iov_length * iov_count;
errno = msg_verify_data(&msg, recv, chunk_sz, &k, &bytes_cnt);
errno = msg_verify_data(&msg, recv, chunk_sz, &k, &bytes_cnt,
&check_cnt, &push);
if (errno) {
perror("data verify msg failed");
goto out_errno;
@ -714,7 +798,9 @@ static int msg_loop(int fd, int iov_count, int iov_length, int cnt,
recvp,
chunk_sz,
&k,
&bytes_cnt);
&bytes_cnt,
&check_cnt,
&push);
if (errno) {
perror("data verify msg_peek failed");
goto out_errno;
@ -796,8 +882,6 @@ static int sendmsg_test(struct sockmap_options *opt)
rxpid = fork();
if (rxpid == 0) {
if (txmsg_pop || txmsg_start_pop)
iov_buf -= (txmsg_pop - txmsg_start_pop + 1);
if (opt->drop_expected || txmsg_ktls_skb_drop)
_exit(0);
@ -1466,8 +1550,8 @@ static void test_send_many(struct sockmap_options *opt, int cgrp)
static void test_send_large(struct sockmap_options *opt, int cgrp)
{
opt->iov_length = 256;
opt->iov_count = 1024;
opt->iov_length = 8192;
opt->iov_count = 32;
opt->rate = 2;
test_exec(cgrp, opt);
}
@ -1596,11 +1680,13 @@ static void test_txmsg_cork_hangs(int cgrp, struct sockmap_options *opt)
static void test_txmsg_pull(int cgrp, struct sockmap_options *opt)
{
/* Test basic start/end */
txmsg_pass = 1;
txmsg_start = 1;
txmsg_end = 2;
test_send(opt, cgrp);
/* Test >4k pull */
txmsg_pass = 1;
txmsg_start = 4096;
txmsg_end = 9182;
test_send_large(opt, cgrp);
@ -1628,12 +1714,16 @@ static void test_txmsg_pull(int cgrp, struct sockmap_options *opt)
static void test_txmsg_pop(int cgrp, struct sockmap_options *opt)
{
bool data = opt->data_test;
/* Test basic pop */
txmsg_pass = 1;
txmsg_start_pop = 1;
txmsg_pop = 2;
test_send_many(opt, cgrp);
/* Test pop with >4k */
txmsg_pass = 1;
txmsg_start_pop = 4096;
txmsg_pop = 4096;
test_send_large(opt, cgrp);
@ -1644,6 +1734,12 @@ static void test_txmsg_pop(int cgrp, struct sockmap_options *opt)
txmsg_pop = 2;
test_send_many(opt, cgrp);
/* TODO: Test for pop + cork should be different,
* - It makes the layout of the received data difficult
* - It makes it hard to calculate the total_bytes in the recvmsg
* Temporarily skip the data integrity test for this case now.
*/
opt->data_test = false;
/* Test pop + cork */
txmsg_redir = 0;
txmsg_cork = 512;
@ -1657,16 +1753,21 @@ static void test_txmsg_pop(int cgrp, struct sockmap_options *opt)
txmsg_start_pop = 1;
txmsg_pop = 2;
test_send_many(opt, cgrp);
opt->data_test = data;
}
static void test_txmsg_push(int cgrp, struct sockmap_options *opt)
{
bool data = opt->data_test;
/* Test basic push */
txmsg_pass = 1;
txmsg_start_push = 1;
txmsg_end_push = 1;
test_send(opt, cgrp);
/* Test push 4kB >4k */
txmsg_pass = 1;
txmsg_start_push = 4096;
txmsg_end_push = 4096;
test_send_large(opt, cgrp);
@ -1677,21 +1778,66 @@ static void test_txmsg_push(int cgrp, struct sockmap_options *opt)
txmsg_end_push = 2;
test_send_many(opt, cgrp);
/* TODO: Test for push + cork should be different,
* - It makes the layout of the received data difficult
* - It makes it hard to calculate the total_bytes in the recvmsg
* Temporarily skip the data integrity test for this case now.
*/
opt->data_test = false;
/* Test push + cork */
txmsg_redir = 0;
txmsg_cork = 512;
txmsg_start_push = 1;
txmsg_end_push = 2;
test_send_many(opt, cgrp);
opt->data_test = data;
}
static void test_txmsg_push_pop(int cgrp, struct sockmap_options *opt)
{
/* Test push/pop range overlapping */
txmsg_pass = 1;
txmsg_start_push = 1;
txmsg_end_push = 10;
txmsg_start_pop = 5;
txmsg_pop = 4;
test_send_large(opt, cgrp);
txmsg_pass = 1;
txmsg_start_push = 1;
txmsg_end_push = 10;
txmsg_start_pop = 5;
txmsg_pop = 16;
test_send_large(opt, cgrp);
txmsg_pass = 1;
txmsg_start_push = 5;
txmsg_end_push = 4;
txmsg_start_pop = 1;
txmsg_pop = 10;
test_send_large(opt, cgrp);
txmsg_pass = 1;
txmsg_start_push = 5;
txmsg_end_push = 16;
txmsg_start_pop = 1;
txmsg_pop = 10;
test_send_large(opt, cgrp);
/* Test push/pop range non-overlapping */
txmsg_pass = 1;
txmsg_start_push = 1;
txmsg_end_push = 10;
txmsg_start_pop = 16;
txmsg_pop = 4;
test_send_large(opt, cgrp);
txmsg_pass = 1;
txmsg_start_push = 16;
txmsg_end_push = 10;
txmsg_start_pop = 5;
txmsg_pop = 4;
test_send_large(opt, cgrp);
}
static void test_txmsg_apply(int cgrp, struct sockmap_options *opt)