From f333ee0cdb27ba201e6cc0c99c76b1364aa29b86 Mon Sep 17 00:00:00 2001 From: Andrey Ignatov Date: Wed, 11 Jul 2018 17:33:32 -0700 Subject: [PATCH 1/6] bpf: Add BPF_SOCK_OPS_TCP_LISTEN_CB Add new TCP-BPF callback that is called on listen(2) right after socket transition to TCP_LISTEN state. It fills the gap for listening sockets in TCP-BPF. For example BPF program can set BPF_SOCK_OPS_STATE_CB_FLAG when socket becomes listening and track later transition from TCP_LISTEN to TCP_CLOSE with BPF_SOCK_OPS_STATE_CB callback. Before there was no way to do it with TCP-BPF and other options were much harder to work with. E.g. socket state tracking can be done with tracepoints (either raw or regular) but they can't be attached to cgroup and their lifetime has to be managed separately. Signed-off-by: Andrey Ignatov Acked-by: Alexei Starovoitov Signed-off-by: Daniel Borkmann --- include/uapi/linux/bpf.h | 3 +++ net/ipv4/af_inet.c | 1 + 2 files changed, 4 insertions(+) diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 6bcb287a888d..870113916cac 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -2555,6 +2555,9 @@ enum { * Arg1: old_state * Arg2: new_state */ + BPF_SOCK_OPS_TCP_LISTEN_CB, /* Called on listen(2), right after + * socket transition to LISTEN state. + */ }; /* List of TCP states. There is a build check in net/ipv4/tcp.c to detect diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index c716be13d58c..f2a0a3bab6b5 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -229,6 +229,7 @@ int inet_listen(struct socket *sock, int backlog) err = inet_csk_listen_start(sk, backlog); if (err) goto out; + tcp_call_bpf(sk, BPF_SOCK_OPS_TCP_LISTEN_CB, 0, NULL); } sk->sk_max_ack_backlog = backlog; err = 0; From 060a7fccd394f1600be86536b1d3e8cdd66637c2 Mon Sep 17 00:00:00 2001 From: Andrey Ignatov Date: Wed, 11 Jul 2018 17:33:33 -0700 Subject: [PATCH 2/6] bpf: Sync bpf.h to tools/ Sync BPF_SOCK_OPS_TCP_LISTEN_CB related UAPI changes to tools/. Signed-off-by: Andrey Ignatov Acked-by: Alexei Starovoitov Signed-off-by: Daniel Borkmann --- tools/include/uapi/linux/bpf.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 6bcb287a888d..870113916cac 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -2555,6 +2555,9 @@ enum { * Arg1: old_state * Arg2: new_state */ + BPF_SOCK_OPS_TCP_LISTEN_CB, /* Called on listen(2), right after + * socket transition to LISTEN state. + */ }; /* List of TCP states. There is a build check in net/ipv4/tcp.c to detect From 04c13411151c220b41998020c32ac97f33b58683 Mon Sep 17 00:00:00 2001 From: Andrey Ignatov Date: Wed, 11 Jul 2018 17:33:34 -0700 Subject: [PATCH 3/6] selftests/bpf: Fix const'ness in cgroup_helpers Lack of const in cgroup helpers signatures forces to write ugly client code. Fix it. Signed-off-by: Andrey Ignatov Acked-by: Alexei Starovoitov Signed-off-by: Daniel Borkmann --- tools/testing/selftests/bpf/cgroup_helpers.c | 6 +++--- tools/testing/selftests/bpf/cgroup_helpers.h | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/tools/testing/selftests/bpf/cgroup_helpers.c b/tools/testing/selftests/bpf/cgroup_helpers.c index c87b4e052ce9..cf16948aad4a 100644 --- a/tools/testing/selftests/bpf/cgroup_helpers.c +++ b/tools/testing/selftests/bpf/cgroup_helpers.c @@ -118,7 +118,7 @@ static int join_cgroup_from_top(char *cgroup_path) * * On success, it returns 0, otherwise on failure it returns 1. */ -int join_cgroup(char *path) +int join_cgroup(const char *path) { char cgroup_path[PATH_MAX + 1]; @@ -158,7 +158,7 @@ void cleanup_cgroup_environment(void) * On success, it returns the file descriptor. On failure it returns 0. * If there is a failure, it prints the error to stderr. */ -int create_and_get_cgroup(char *path) +int create_and_get_cgroup(const char *path) { char cgroup_path[PATH_MAX + 1]; int fd; @@ -186,7 +186,7 @@ int create_and_get_cgroup(char *path) * which is an invalid cgroup id. * If there is a failure, it prints the error to stderr. */ -unsigned long long get_cgroup_id(char *path) +unsigned long long get_cgroup_id(const char *path) { int dirfd, err, flags, mount_id, fhsize; union { diff --git a/tools/testing/selftests/bpf/cgroup_helpers.h b/tools/testing/selftests/bpf/cgroup_helpers.h index 20a4a5dcd469..d64bb8957090 100644 --- a/tools/testing/selftests/bpf/cgroup_helpers.h +++ b/tools/testing/selftests/bpf/cgroup_helpers.h @@ -9,10 +9,10 @@ __FILE__, __LINE__, clean_errno(), ##__VA_ARGS__) -int create_and_get_cgroup(char *path); -int join_cgroup(char *path); +int create_and_get_cgroup(const char *path); +int join_cgroup(const char *path); int setup_cgroup_environment(void); void cleanup_cgroup_environment(void); -unsigned long long get_cgroup_id(char *path); +unsigned long long get_cgroup_id(const char *path); #endif From c65267e5ff418c81d7fea7025850ed4f190a1289 Mon Sep 17 00:00:00 2001 From: Andrey Ignatov Date: Wed, 11 Jul 2018 17:33:35 -0700 Subject: [PATCH 4/6] selftests/bpf: Switch test_tcpbpf_user to cgroup_helpers Switch to cgroup_helpers to simplify the code and fix cgroup cleanup: before cgroup was not cleaned up after the test. It also removes SYSTEM macro, that only printed error, but didn't terminate the test. Signed-off-by: Andrey Ignatov Acked-by: Alexei Starovoitov Signed-off-by: Daniel Borkmann --- tools/testing/selftests/bpf/Makefile | 1 + .../testing/selftests/bpf/test_tcpbpf_user.c | 55 +++++++------------ 2 files changed, 22 insertions(+), 34 deletions(-) diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index 7a6214e9ae58..478bf1bcbbf5 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -61,6 +61,7 @@ $(OUTPUT)/test_dev_cgroup: cgroup_helpers.c $(OUTPUT)/test_sock: cgroup_helpers.c $(OUTPUT)/test_sock_addr: cgroup_helpers.c $(OUTPUT)/test_sockmap: cgroup_helpers.c +$(OUTPUT)/test_tcpbpf_user: cgroup_helpers.c $(OUTPUT)/test_progs: trace_helpers.c $(OUTPUT)/get_cgroup_id_user: cgroup_helpers.c diff --git a/tools/testing/selftests/bpf/test_tcpbpf_user.c b/tools/testing/selftests/bpf/test_tcpbpf_user.c index 84ab5163c828..fa97ec6428de 100644 --- a/tools/testing/selftests/bpf/test_tcpbpf_user.c +++ b/tools/testing/selftests/bpf/test_tcpbpf_user.c @@ -1,25 +1,18 @@ // SPDX-License-Identifier: GPL-2.0 #include #include -#include #include #include -#include #include -#include -#include -#include #include -#include -#include #include -#include -#include #include #include -#include "bpf_util.h" + #include "bpf_rlimit.h" -#include +#include "bpf_util.h" +#include "cgroup_helpers.h" + #include "test_tcpbpf.h" static int bpf_find_map(const char *test, struct bpf_object *obj, @@ -35,42 +28,32 @@ static int bpf_find_map(const char *test, struct bpf_object *obj, return bpf_map__fd(map); } -#define SYSTEM(CMD) \ - do { \ - if (system(CMD)) { \ - printf("system(%s) FAILS!\n", CMD); \ - } \ - } while (0) - int main(int argc, char **argv) { const char *file = "test_tcpbpf_kern.o"; struct tcpbpf_globals g = {0}; - int cg_fd, prog_fd, map_fd; + const char *cg_path = "/foo"; bool debug_flag = false; int error = EXIT_FAILURE; struct bpf_object *obj; - char cmd[100], *dir; - struct stat buffer; + int prog_fd, map_fd; + int cg_fd = -1; __u32 key = 0; - int pid; int rv; if (argc > 1 && strcmp(argv[1], "-d") == 0) debug_flag = true; - dir = "/tmp/cgroupv2/foo"; + if (setup_cgroup_environment()) + goto err; - if (stat(dir, &buffer) != 0) { - SYSTEM("mkdir -p /tmp/cgroupv2"); - SYSTEM("mount -t cgroup2 none /tmp/cgroupv2"); - SYSTEM("mkdir -p /tmp/cgroupv2/foo"); - } - pid = (int) getpid(); - sprintf(cmd, "echo %d >> /tmp/cgroupv2/foo/cgroup.procs", pid); - SYSTEM(cmd); + cg_fd = create_and_get_cgroup(cg_path); + if (!cg_fd) + goto err; + + if (join_cgroup(cg_path)) + goto err; - cg_fd = open(dir, O_DIRECTORY, O_RDONLY); if (bpf_prog_load(file, BPF_PROG_TYPE_SOCK_OPS, &obj, &prog_fd)) { printf("FAILED: load_bpf_file failed for: %s\n", file); goto err; @@ -83,7 +66,10 @@ int main(int argc, char **argv) goto err; } - SYSTEM("./tcp_server.py"); + if (system("./tcp_server.py")) { + printf("FAILED: TCP server\n"); + goto err; + } map_fd = bpf_find_map(__func__, obj, "global_map"); if (map_fd < 0) @@ -123,6 +109,7 @@ int main(int argc, char **argv) error = 0; err: bpf_prog_detach(cg_fd, BPF_CGROUP_SOCK_OPS); + close(cg_fd); + cleanup_cgroup_environment(); return error; - } From 2044e4ef0be29f0154b078a956a3d8331315298a Mon Sep 17 00:00:00 2001 From: Andrey Ignatov Date: Wed, 11 Jul 2018 17:33:36 -0700 Subject: [PATCH 5/6] selftests/bpf: Better verification in test_tcpbpf Reduce amount of copy/paste for debug info when result is verified in the test and keep that info together with values being checked so that they won't get out of sync. It also improves debug experience: instead of checking manually what doesn't match in debug output for all fields, only unexpected field is printed. Signed-off-by: Andrey Ignatov Acked-by: Alexei Starovoitov Signed-off-by: Daniel Borkmann --- .../testing/selftests/bpf/test_tcpbpf_user.c | 64 +++++++++++-------- 1 file changed, 39 insertions(+), 25 deletions(-) diff --git a/tools/testing/selftests/bpf/test_tcpbpf_user.c b/tools/testing/selftests/bpf/test_tcpbpf_user.c index fa97ec6428de..971f1644b9c7 100644 --- a/tools/testing/selftests/bpf/test_tcpbpf_user.c +++ b/tools/testing/selftests/bpf/test_tcpbpf_user.c @@ -1,4 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 +#include #include #include #include @@ -15,6 +16,42 @@ #include "test_tcpbpf.h" +#define EXPECT_EQ(expected, actual, fmt) \ + do { \ + if ((expected) != (actual)) { \ + printf(" Value of: " #actual "\n" \ + " Actual: %" fmt "\n" \ + " Expected: %" fmt "\n", \ + (actual), (expected)); \ + goto err; \ + } \ + } while (0) + +int verify_result(const struct tcpbpf_globals *result) +{ + __u32 expected_events; + + expected_events = ((1 << BPF_SOCK_OPS_TIMEOUT_INIT) | + (1 << BPF_SOCK_OPS_RWND_INIT) | + (1 << BPF_SOCK_OPS_TCP_CONNECT_CB) | + (1 << BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB) | + (1 << BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB) | + (1 << BPF_SOCK_OPS_NEEDS_ECN) | + (1 << BPF_SOCK_OPS_STATE_CB)); + + EXPECT_EQ(expected_events, result->event_map, "#" PRIx32); + EXPECT_EQ(501ULL, result->bytes_received, "llu"); + EXPECT_EQ(1002ULL, result->bytes_acked, "llu"); + EXPECT_EQ(1, result->data_segs_in, PRIu32); + EXPECT_EQ(1, result->data_segs_out, PRIu32); + EXPECT_EQ(0x80, result->bad_cb_test_rv, PRIu32); + EXPECT_EQ(0, result->good_cb_test_rv, PRIu32); + + return 0; +err: + return -1; +} + static int bpf_find_map(const char *test, struct bpf_object *obj, const char *name) { @@ -33,7 +70,6 @@ int main(int argc, char **argv) const char *file = "test_tcpbpf_kern.o"; struct tcpbpf_globals g = {0}; const char *cg_path = "/foo"; - bool debug_flag = false; int error = EXIT_FAILURE; struct bpf_object *obj; int prog_fd, map_fd; @@ -41,9 +77,6 @@ int main(int argc, char **argv) __u32 key = 0; int rv; - if (argc > 1 && strcmp(argv[1], "-d") == 0) - debug_flag = true; - if (setup_cgroup_environment()) goto err; @@ -81,30 +114,11 @@ int main(int argc, char **argv) goto err; } - if (g.bytes_received != 501 || g.bytes_acked != 1002 || - g.data_segs_in != 1 || g.data_segs_out != 1 || - (g.event_map ^ 0x47e) != 0 || g.bad_cb_test_rv != 0x80 || - g.good_cb_test_rv != 0) { + if (verify_result(&g)) { printf("FAILED: Wrong stats\n"); - if (debug_flag) { - printf("\n"); - printf("bytes_received: %d (expecting 501)\n", - (int)g.bytes_received); - printf("bytes_acked: %d (expecting 1002)\n", - (int)g.bytes_acked); - printf("data_segs_in: %d (expecting 1)\n", - g.data_segs_in); - printf("data_segs_out: %d (expecting 1)\n", - g.data_segs_out); - printf("event_map: 0x%x (at least 0x47e)\n", - g.event_map); - printf("bad_cb_test_rv: 0x%x (expecting 0x80)\n", - g.bad_cb_test_rv); - printf("good_cb_test_rv:0x%x (expecting 0)\n", - g.good_cb_test_rv); - } goto err; } + printf("PASSED!\n"); error = 0; err: From 78d8e26d46bc2ed73ab6a0e369a342478fda4ce0 Mon Sep 17 00:00:00 2001 From: Andrey Ignatov Date: Wed, 11 Jul 2018 17:33:37 -0700 Subject: [PATCH 6/6] selftests/bpf: Test case for BPF_SOCK_OPS_TCP_LISTEN_CB Cover new TCP-BPF callback in test_tcpbpf: when listen() is called on socket, set BPF_SOCK_OPS_STATE_CB_FLAG so that BPF_SOCK_OPS_STATE_CB callback can be called on future state transition, and when such a transition happens (TCP_LISTEN -> TCP_CLOSE), track it in the map and verify it in user space later. Signed-off-by: Andrey Ignatov Acked-by: Alexei Starovoitov Signed-off-by: Daniel Borkmann --- tools/testing/selftests/bpf/test_tcpbpf.h | 1 + tools/testing/selftests/bpf/test_tcpbpf_kern.c | 17 ++++++++++++----- tools/testing/selftests/bpf/test_tcpbpf_user.c | 4 +++- 3 files changed, 16 insertions(+), 6 deletions(-) diff --git a/tools/testing/selftests/bpf/test_tcpbpf.h b/tools/testing/selftests/bpf/test_tcpbpf.h index 2fe43289943c..7bcfa6207005 100644 --- a/tools/testing/selftests/bpf/test_tcpbpf.h +++ b/tools/testing/selftests/bpf/test_tcpbpf.h @@ -12,5 +12,6 @@ struct tcpbpf_globals { __u32 good_cb_test_rv; __u64 bytes_received; __u64 bytes_acked; + __u32 num_listen; }; #endif diff --git a/tools/testing/selftests/bpf/test_tcpbpf_kern.c b/tools/testing/selftests/bpf/test_tcpbpf_kern.c index 3e645ee41ed5..4b7fd540cea9 100644 --- a/tools/testing/selftests/bpf/test_tcpbpf_kern.c +++ b/tools/testing/selftests/bpf/test_tcpbpf_kern.c @@ -96,15 +96,22 @@ int bpf_testcb(struct bpf_sock_ops *skops) if (!gp) break; g = *gp; - g.total_retrans = skops->total_retrans; - g.data_segs_in = skops->data_segs_in; - g.data_segs_out = skops->data_segs_out; - g.bytes_received = skops->bytes_received; - g.bytes_acked = skops->bytes_acked; + if (skops->args[0] == BPF_TCP_LISTEN) { + g.num_listen++; + } else { + g.total_retrans = skops->total_retrans; + g.data_segs_in = skops->data_segs_in; + g.data_segs_out = skops->data_segs_out; + g.bytes_received = skops->bytes_received; + g.bytes_acked = skops->bytes_acked; + } bpf_map_update_elem(&global_map, &key, &g, BPF_ANY); } break; + case BPF_SOCK_OPS_TCP_LISTEN_CB: + bpf_sock_ops_cb_flags_set(skops, BPF_SOCK_OPS_STATE_CB_FLAG); + break; default: rv = -1; } diff --git a/tools/testing/selftests/bpf/test_tcpbpf_user.c b/tools/testing/selftests/bpf/test_tcpbpf_user.c index 971f1644b9c7..a275c2971376 100644 --- a/tools/testing/selftests/bpf/test_tcpbpf_user.c +++ b/tools/testing/selftests/bpf/test_tcpbpf_user.c @@ -37,7 +37,8 @@ int verify_result(const struct tcpbpf_globals *result) (1 << BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB) | (1 << BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB) | (1 << BPF_SOCK_OPS_NEEDS_ECN) | - (1 << BPF_SOCK_OPS_STATE_CB)); + (1 << BPF_SOCK_OPS_STATE_CB) | + (1 << BPF_SOCK_OPS_TCP_LISTEN_CB)); EXPECT_EQ(expected_events, result->event_map, "#" PRIx32); EXPECT_EQ(501ULL, result->bytes_received, "llu"); @@ -46,6 +47,7 @@ int verify_result(const struct tcpbpf_globals *result) EXPECT_EQ(1, result->data_segs_out, PRIu32); EXPECT_EQ(0x80, result->bad_cb_test_rv, PRIu32); EXPECT_EQ(0, result->good_cb_test_rv, PRIu32); + EXPECT_EQ(1, result->num_listen, PRIu32); return 0; err: