Merge branch 'selftests-bpf-convert-test_tc_tunnel-sh-to-test_progs'

Alexis Lothoré says:

====================
Hello,
this is the v3 of test_tc_tunnel conversion into test_progs framework.
This new revision:
- fixes a few issues spotted by the bot reviewer
- removes any test ensuring connection failure (and so depending on a
  timout) to keep the execution time reasonable

test_tc_tunnel.sh tests a variety of tunnels based on BPF: packets are
encapsulated by a BPF program on the client egress. We then check that
those packets can be decapsulated on server ingress side, either thanks
to kernel-based or BPF-based decapsulation. Those tests are run thanks
to two veths in two dedicated namespaces.

- patches 1 and 2 are preparatory patches
- patch 3 introduce tc_tunnel test into test_progs
- patch 4 gets rid of the test_tc_tunnel.sh script

The new test has been executed both in some x86 local qemu machine, as
well as in CI:

  # ./test_progs -a tc_tunnel
  #454/1   tc_tunnel/ipip_none:OK
  #454/2   tc_tunnel/ipip6_none:OK
  #454/3   tc_tunnel/ip6tnl_none:OK
  #454/4   tc_tunnel/sit_none:OK
  #454/5   tc_tunnel/vxlan_eth:OK
  #454/6   tc_tunnel/ip6vxlan_eth:OK
  #454/7   tc_tunnel/gre_none:OK
  #454/8   tc_tunnel/gre_eth:OK
  #454/9   tc_tunnel/gre_mpls:OK
  #454/10  tc_tunnel/ip6gre_none:OK
  #454/11  tc_tunnel/ip6gre_eth:OK
  #454/12  tc_tunnel/ip6gre_mpls:OK
  #454/13  tc_tunnel/udp_none:OK
  #454/14  tc_tunnel/udp_eth:OK
  #454/15  tc_tunnel/udp_mpls:OK
  #454/16  tc_tunnel/ip6udp_none:OK
  #454/17  tc_tunnel/ip6udp_eth:OK
  #454/18  tc_tunnel/ip6udp_mpls:OK
  #454     tc_tunnel:OK
  Summary: 1/18 PASSED, 0 SKIPPED, 0 FAILED
====================

Link: https://patch.msgid.link/20251027-tc_tunnel-v3-0-505c12019f9d@bootlin.com
Signed-off-by: Martin KaFai Lau <martin.lau@kernel.org>
This commit is contained in:
Martin KaFai Lau 2025-10-28 17:38:43 -07:00
commit e2e668bd81
7 changed files with 789 additions and 468 deletions

View File

@ -105,7 +105,6 @@ TEST_FILES = xsk_prereqs.sh $(wildcard progs/btf_dump_test_case_*.c)
# Order correspond to 'make run_tests' order
TEST_PROGS := test_kmod.sh \
test_lirc_mode2.sh \
test_tc_tunnel.sh \
test_tc_edt.sh \
test_xdping.sh \
test_bpftool_build.sh \

View File

@ -766,6 +766,50 @@ int send_recv_data(int lfd, int fd, uint32_t total_bytes)
return err;
}
int tc_prog_attach(const char *dev, int ingress_fd, int egress_fd)
{
int ifindex, ret;
if (!ASSERT_TRUE(ingress_fd >= 0 || egress_fd >= 0,
"at least one program fd is valid"))
return -1;
ifindex = if_nametoindex(dev);
if (!ASSERT_NEQ(ifindex, 0, "get ifindex"))
return -1;
DECLARE_LIBBPF_OPTS(bpf_tc_hook, hook, .ifindex = ifindex,
.attach_point = BPF_TC_INGRESS | BPF_TC_EGRESS);
DECLARE_LIBBPF_OPTS(bpf_tc_opts, opts1, .handle = 1,
.priority = 1, .prog_fd = ingress_fd);
DECLARE_LIBBPF_OPTS(bpf_tc_opts, opts2, .handle = 1,
.priority = 1, .prog_fd = egress_fd);
ret = bpf_tc_hook_create(&hook);
if (!ASSERT_OK(ret, "create tc hook"))
return ret;
if (ingress_fd >= 0) {
hook.attach_point = BPF_TC_INGRESS;
ret = bpf_tc_attach(&hook, &opts1);
if (!ASSERT_OK(ret, "bpf_tc_attach")) {
bpf_tc_hook_destroy(&hook);
return ret;
}
}
if (egress_fd >= 0) {
hook.attach_point = BPF_TC_EGRESS;
ret = bpf_tc_attach(&hook, &opts2);
if (!ASSERT_OK(ret, "bpf_tc_attach")) {
bpf_tc_hook_destroy(&hook);
return ret;
}
}
return 0;
}
#ifdef TRAFFIC_MONITOR
struct tmonitor_ctx {
pcap_t *pcap;

View File

@ -255,6 +255,22 @@ struct tmonitor_ctx;
typedef int (*tm_print_fn_t)(const char *format, va_list args);
/**
* tc_prog_attach - attach BPF program(s) to an interface
*
* Takes file descriptors pointing to at least one, at most two BPF
* programs, and attach those programs to an interface ingress, egress or
* both.
*
* @dev: string containing the interface name
* @ingress_fd: file descriptor of the program to attach to interface ingress
* @egress_fd: file descriptor of the program to attach to interface egress
*
* Returns 0 on success, -1 if no valid file descriptor has been found, if
* the interface name is invalid or if an error ocurred during attach.
*/
int tc_prog_attach(const char *dev, int ingress_fd, int egress_fd);
#ifdef TRAFFIC_MONITOR
struct tmonitor_ctx *traffic_monitor_start(const char *netns, const char *test_name,
const char *subtest_name);

View File

@ -0,0 +1,674 @@
// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
/*
* End-to-end eBPF tunnel test suite
* The file tests BPF network tunnels implementation. For each tunnel
* type, the test validates that:
* - basic communication can first be established between the two veths
* - when adding a BPF-based encapsulation on client egress, it now fails
* to communicate with the server
* - when adding a kernel-based decapsulation on server ingress, client
* can now connect
* - when replacing the kernel-based decapsulation with a BPF-based one,
* the client can still connect
*/
#include <stdio.h>
#include <unistd.h>
#include <fcntl.h>
#include <sys/socket.h>
#include <bpf/libbpf.h>
#include "test_progs.h"
#include "network_helpers.h"
#include "test_tc_tunnel.skel.h"
#define SERVER_NS "tc-tunnel-server-ns"
#define CLIENT_NS "tc-tunnel-client-ns"
#define MAC_ADDR_VETH1 "00:11:22:33:44:55"
#define IP4_ADDR_VETH1 "192.168.1.1"
#define IP6_ADDR_VETH1 "fd::1"
#define MAC_ADDR_VETH2 "66:77:88:99:AA:BB"
#define IP4_ADDR_VETH2 "192.168.1.2"
#define IP6_ADDR_VETH2 "fd::2"
#define TEST_NAME_MAX_LEN 64
#define PROG_NAME_MAX_LEN 64
#define TUNNEL_ARGS_MAX_LEN 128
#define BUFFER_LEN 2000
#define DEFAULT_TEST_DATA_SIZE 100
#define GSO_TEST_DATA_SIZE BUFFER_LEN
#define TIMEOUT_MS 1000
#define TEST_PORT 8000
#define UDP_PORT 5555
#define MPLS_UDP_PORT 6635
#define FOU_MPLS_PROTO 137
#define VXLAN_ID 1
#define VXLAN_PORT 8472
#define MPLS_TABLE_ENTRIES_COUNT 65536
static char tx_buffer[BUFFER_LEN], rx_buffer[BUFFER_LEN];
struct subtest_cfg {
char *ebpf_tun_type;
char *iproute_tun_type;
char *mac_tun_type;
int ipproto;
void (*extra_decap_mod_args_cb)(struct subtest_cfg *cfg, char *dst);
bool tunnel_need_veth_mac;
bool configure_fou_rx_port;
char *tmode;
bool expect_kern_decap_failure;
bool configure_mpls;
bool test_gso;
char *tunnel_client_addr;
char *tunnel_server_addr;
char name[TEST_NAME_MAX_LEN];
char *server_addr;
int client_egress_prog_fd;
int server_ingress_prog_fd;
char extra_decap_mod_args[TUNNEL_ARGS_MAX_LEN];
int *server_fd;
};
struct connection {
int client_fd;
int server_fd;
};
static int build_subtest_name(struct subtest_cfg *cfg, char *dst, size_t size)
{
int ret;
ret = snprintf(dst, size, "%s_%s", cfg->ebpf_tun_type,
cfg->mac_tun_type);
return ret < 0 ? ret : 0;
}
static int set_subtest_progs(struct subtest_cfg *cfg, struct test_tc_tunnel *skel)
{
char prog_name[PROG_NAME_MAX_LEN];
struct bpf_program *prog;
int ret;
ret = snprintf(prog_name, PROG_NAME_MAX_LEN, "__encap_");
if (ret < 0)
return ret;
ret = build_subtest_name(cfg, prog_name + ret, PROG_NAME_MAX_LEN - ret);
if (ret < 0)
return ret;
prog = bpf_object__find_program_by_name(skel->obj, prog_name);
if (!prog)
return -1;
cfg->client_egress_prog_fd = bpf_program__fd(prog);
cfg->server_ingress_prog_fd = bpf_program__fd(skel->progs.decap_f);
return 0;
}
static void set_subtest_addresses(struct subtest_cfg *cfg)
{
if (cfg->ipproto == 6)
cfg->server_addr = IP6_ADDR_VETH2;
else
cfg->server_addr = IP4_ADDR_VETH2;
/* Some specific tunnel types need specific addressing, it then
* has been already set in the configuration table. Otherwise,
* deduce the relevant addressing from the ipproto
*/
if (cfg->tunnel_client_addr && cfg->tunnel_server_addr)
return;
if (cfg->ipproto == 6) {
cfg->tunnel_client_addr = IP6_ADDR_VETH1;
cfg->tunnel_server_addr = IP6_ADDR_VETH2;
} else {
cfg->tunnel_client_addr = IP4_ADDR_VETH1;
cfg->tunnel_server_addr = IP4_ADDR_VETH2;
}
}
static int run_server(struct subtest_cfg *cfg)
{
struct nstoken *nstoken = open_netns(SERVER_NS);
int family = cfg->ipproto == 6 ? AF_INET6 : AF_INET;
cfg->server_fd = start_reuseport_server(family, SOCK_STREAM,
cfg->server_addr, TEST_PORT,
TIMEOUT_MS, 1);
close_netns(nstoken);
if (!ASSERT_OK_PTR(cfg->server_fd, "start server"))
return -1;
return 0;
}
static void stop_server(struct subtest_cfg *cfg)
{
free_fds(cfg->server_fd, 1);
}
static int check_server_rx_data(struct subtest_cfg *cfg,
struct connection *conn, int len)
{
int err;
memset(rx_buffer, 0, BUFFER_LEN);
err = recv(conn->server_fd, rx_buffer, len, 0);
if (!ASSERT_EQ(err, len, "check rx data len"))
return 1;
if (!ASSERT_MEMEQ(tx_buffer, rx_buffer, len, "check received data"))
return 1;
return 0;
}
static struct connection *connect_client_to_server(struct subtest_cfg *cfg)
{
struct network_helper_opts opts = {.timeout_ms = 500};
int family = cfg->ipproto == 6 ? AF_INET6 : AF_INET;
struct connection *conn = NULL;
int client_fd, server_fd;
conn = malloc(sizeof(struct connection));
if (!conn)
return conn;
client_fd = connect_to_addr_str(family, SOCK_STREAM, cfg->server_addr,
TEST_PORT, &opts);
if (client_fd < 0) {
free(conn);
return NULL;
}
server_fd = accept(*cfg->server_fd, NULL, NULL);
if (server_fd < 0) {
close(client_fd);
free(conn);
return NULL;
}
conn->server_fd = server_fd;
conn->client_fd = client_fd;
return conn;
}
static void disconnect_client_from_server(struct subtest_cfg *cfg,
struct connection *conn)
{
close(conn->server_fd);
close(conn->client_fd);
free(conn);
}
static int send_and_test_data(struct subtest_cfg *cfg, bool must_succeed)
{
struct connection *conn;
int err, res = -1;
conn = connect_client_to_server(cfg);
if (!must_succeed && !ASSERT_ERR_PTR(conn, "connection that must fail"))
goto end;
else if (!must_succeed)
return 0;
if (!ASSERT_OK_PTR(conn, "connection that must succeed"))
return -1;
err = send(conn->client_fd, tx_buffer, DEFAULT_TEST_DATA_SIZE, 0);
if (!ASSERT_EQ(err, DEFAULT_TEST_DATA_SIZE, "send data from client"))
goto end;
if (check_server_rx_data(cfg, conn, DEFAULT_TEST_DATA_SIZE))
goto end;
if (!cfg->test_gso) {
res = 0;
goto end;
}
err = send(conn->client_fd, tx_buffer, GSO_TEST_DATA_SIZE, 0);
if (!ASSERT_EQ(err, GSO_TEST_DATA_SIZE, "send (large) data from client"))
goto end;
if (check_server_rx_data(cfg, conn, DEFAULT_TEST_DATA_SIZE))
goto end;
res = 0;
end:
disconnect_client_from_server(cfg, conn);
return res;
}
static void vxlan_decap_mod_args_cb(struct subtest_cfg *cfg, char *dst)
{
snprintf(dst, TUNNEL_ARGS_MAX_LEN, "id %d dstport %d udp6zerocsumrx",
VXLAN_ID, VXLAN_PORT);
}
static void udp_decap_mod_args_cb(struct subtest_cfg *cfg, char *dst)
{
bool is_mpls = !strcmp(cfg->mac_tun_type, "mpls");
snprintf(dst, TUNNEL_ARGS_MAX_LEN,
"encap fou encap-sport auto encap-dport %d",
is_mpls ? MPLS_UDP_PORT : UDP_PORT);
}
static int configure_fou_rx_port(struct subtest_cfg *cfg, bool add)
{
bool is_mpls = strcmp(cfg->mac_tun_type, "mpls") == 0;
int fou_proto;
if (is_mpls)
fou_proto = FOU_MPLS_PROTO;
else
fou_proto = cfg->ipproto == 6 ? 41 : 4;
SYS(fail, "ip fou %s port %d ipproto %d%s", add ? "add" : "del",
is_mpls ? MPLS_UDP_PORT : UDP_PORT, fou_proto,
cfg->ipproto == 6 ? " -6" : "");
return 0;
fail:
return 1;
}
static int add_fou_rx_port(struct subtest_cfg *cfg)
{
return configure_fou_rx_port(cfg, true);
}
static int del_fou_rx_port(struct subtest_cfg *cfg)
{
return configure_fou_rx_port(cfg, false);
}
static int update_tunnel_intf_addr(struct subtest_cfg *cfg)
{
SYS(fail, "ip link set dev testtun0 address " MAC_ADDR_VETH2);
return 0;
fail:
return -1;
}
static int configure_kernel_for_mpls(struct subtest_cfg *cfg)
{
SYS(fail, "sysctl -qw net.mpls.platform_labels=%d",
MPLS_TABLE_ENTRIES_COUNT);
SYS(fail, "ip -f mpls route add 1000 dev lo");
SYS(fail, "ip link set lo up");
SYS(fail, "sysctl -qw net.mpls.conf.testtun0.input=1");
SYS(fail, "sysctl -qw net.ipv4.conf.lo.rp_filter=0");
return 0;
fail:
return -1;
}
static int configure_encapsulation(struct subtest_cfg *cfg)
{
int ret;
ret = tc_prog_attach("veth1", -1, cfg->client_egress_prog_fd);
return ret;
}
static int configure_kernel_decapsulation(struct subtest_cfg *cfg)
{
struct nstoken *nstoken = open_netns(SERVER_NS);
if (cfg->configure_fou_rx_port &&
!ASSERT_OK(add_fou_rx_port(cfg), "configure FOU RX port"))
goto fail;
SYS(fail, "ip link add name testtun0 type %s %s remote %s local %s %s",
cfg->iproute_tun_type, cfg->tmode ? cfg->tmode : "",
cfg->tunnel_client_addr, cfg->tunnel_server_addr,
cfg->extra_decap_mod_args);
if (cfg->tunnel_need_veth_mac &&
!ASSERT_OK(update_tunnel_intf_addr(cfg), "update testtun0 mac"))
goto fail;
if (cfg->configure_mpls &&
(!ASSERT_OK(configure_kernel_for_mpls(cfg),
"configure MPLS decap")))
goto fail;
SYS(fail, "sysctl -qw net.ipv4.conf.all.rp_filter=0");
SYS(fail, "sysctl -qw net.ipv4.conf.testtun0.rp_filter=0");
SYS(fail, "ip link set dev testtun0 up");
close_netns(nstoken);
return 0;
fail:
close_netns(nstoken);
return -1;
}
static void remove_kernel_decapsulation(struct subtest_cfg *cfg)
{
SYS_NOFAIL("ip link del testtun0");
if (cfg->configure_mpls)
SYS_NOFAIL("ip -f mpls route del 1000 dev lo");
if (cfg->configure_fou_rx_port)
del_fou_rx_port(cfg);
}
static int configure_ebpf_decapsulation(struct subtest_cfg *cfg)
{
struct nstoken *nstoken = open_netns(SERVER_NS);
if (!cfg->expect_kern_decap_failure)
SYS(fail, "ip link del testtun0");
if (!ASSERT_OK(tc_prog_attach("veth2", cfg->server_ingress_prog_fd, -1),
"attach_program"))
goto fail;
close_netns(nstoken);
return 0;
fail:
close_netns(nstoken);
return -1;
}
static void run_test(struct subtest_cfg *cfg)
{
struct nstoken *nstoken = open_netns(CLIENT_NS);
if (!ASSERT_OK(run_server(cfg), "run server"))
goto fail;
/* Basic communication must work */
if (!ASSERT_OK(send_and_test_data(cfg, true), "connect without any encap"))
goto fail;
/* Attach encapsulation program to client */
if (!ASSERT_OK(configure_encapsulation(cfg), "configure encapsulation"))
goto fail;
/* If supported, insert kernel decap module, connection must succeed */
if (!cfg->expect_kern_decap_failure) {
if (!ASSERT_OK(configure_kernel_decapsulation(cfg),
"configure kernel decapsulation"))
goto fail;
if (!ASSERT_OK(send_and_test_data(cfg, true),
"connect with encap prog and kern decap"))
goto fail;
}
/* Replace kernel decapsulation with BPF decapsulation, test must pass */
if (!ASSERT_OK(configure_ebpf_decapsulation(cfg), "configure ebpf decapsulation"))
goto fail;
ASSERT_OK(send_and_test_data(cfg, true), "connect with encap and decap progs");
fail:
stop_server(cfg);
close_netns(nstoken);
}
static int setup(void)
{
struct nstoken *nstoken = NULL;
int fd, err;
fd = open("/dev/urandom", O_RDONLY);
if (!ASSERT_OK_FD(fd, "open urandom"))
goto fail;
err = read(fd, tx_buffer, BUFFER_LEN);
close(fd);
if (!ASSERT_EQ(err, BUFFER_LEN, "read random bytes"))
goto fail;
/* Configure the testing network */
if (!ASSERT_OK(make_netns(CLIENT_NS), "create client ns") ||
!ASSERT_OK(make_netns(SERVER_NS), "create server ns"))
goto fail;
nstoken = open_netns(CLIENT_NS);
SYS(fail, "ip link add %s type veth peer name %s",
"veth1 mtu 1500 netns " CLIENT_NS " address " MAC_ADDR_VETH1,
"veth2 mtu 1500 netns " SERVER_NS " address " MAC_ADDR_VETH2);
SYS(fail, "ethtool -K veth1 tso off");
SYS(fail, "ip link set veth1 up");
close_netns(nstoken);
nstoken = open_netns(SERVER_NS);
SYS(fail, "ip link set veth2 up");
close_netns(nstoken);
return 0;
fail:
close_netns(nstoken);
return 1;
}
static int subtest_setup(struct test_tc_tunnel *skel, struct subtest_cfg *cfg)
{
struct nstoken *nstoken;
set_subtest_addresses(cfg);
if (!ASSERT_OK(set_subtest_progs(cfg, skel),
"find subtest progs"))
return -1;
if (cfg->extra_decap_mod_args_cb)
cfg->extra_decap_mod_args_cb(cfg, cfg->extra_decap_mod_args);
nstoken = open_netns(CLIENT_NS);
SYS(fail, "ip -4 addr add " IP4_ADDR_VETH1 "/24 dev veth1");
SYS(fail, "ip -4 route flush table main");
SYS(fail, "ip -4 route add " IP4_ADDR_VETH2 " mtu 1450 dev veth1");
SYS(fail, "ip -6 addr add " IP6_ADDR_VETH1 "/64 dev veth1 nodad");
SYS(fail, "ip -6 route flush table main");
SYS(fail, "ip -6 route add " IP6_ADDR_VETH2 " mtu 1430 dev veth1");
close_netns(nstoken);
nstoken = open_netns(SERVER_NS);
SYS(fail, "ip -4 addr add " IP4_ADDR_VETH2 "/24 dev veth2");
SYS(fail, "ip -6 addr add " IP6_ADDR_VETH2 "/64 dev veth2 nodad");
close_netns(nstoken);
return 0;
fail:
close_netns(nstoken);
return -1;
}
static void subtest_cleanup(struct subtest_cfg *cfg)
{
struct nstoken *nstoken;
nstoken = open_netns(CLIENT_NS);
SYS_NOFAIL("tc qdisc delete dev veth1 parent ffff:fff1");
SYS_NOFAIL("ip a flush veth1");
close_netns(nstoken);
nstoken = open_netns(SERVER_NS);
SYS_NOFAIL("tc qdisc delete dev veth2 parent ffff:fff1");
SYS_NOFAIL("ip a flush veth2");
if (!cfg->expect_kern_decap_failure)
remove_kernel_decapsulation(cfg);
close_netns(nstoken);
}
static void cleanup(void)
{
remove_netns(CLIENT_NS);
remove_netns(SERVER_NS);
}
static struct subtest_cfg subtests_cfg[] = {
{
.ebpf_tun_type = "ipip",
.mac_tun_type = "none",
.iproute_tun_type = "ipip",
.ipproto = 4,
},
{
.ebpf_tun_type = "ipip6",
.mac_tun_type = "none",
.iproute_tun_type = "ip6tnl",
.ipproto = 4,
.tunnel_client_addr = IP6_ADDR_VETH1,
.tunnel_server_addr = IP6_ADDR_VETH2,
},
{
.ebpf_tun_type = "ip6tnl",
.iproute_tun_type = "ip6tnl",
.mac_tun_type = "none",
.ipproto = 6,
},
{
.mac_tun_type = "none",
.ebpf_tun_type = "sit",
.iproute_tun_type = "sit",
.ipproto = 6,
.tunnel_client_addr = IP4_ADDR_VETH1,
.tunnel_server_addr = IP4_ADDR_VETH2,
},
{
.ebpf_tun_type = "vxlan",
.mac_tun_type = "eth",
.iproute_tun_type = "vxlan",
.ipproto = 4,
.extra_decap_mod_args_cb = vxlan_decap_mod_args_cb,
.tunnel_need_veth_mac = true
},
{
.ebpf_tun_type = "ip6vxlan",
.mac_tun_type = "eth",
.iproute_tun_type = "vxlan",
.ipproto = 6,
.extra_decap_mod_args_cb = vxlan_decap_mod_args_cb,
.tunnel_need_veth_mac = true
},
{
.ebpf_tun_type = "gre",
.mac_tun_type = "none",
.iproute_tun_type = "gre",
.ipproto = 4,
.test_gso = true
},
{
.ebpf_tun_type = "gre",
.mac_tun_type = "eth",
.iproute_tun_type = "gretap",
.ipproto = 4,
.tunnel_need_veth_mac = true,
.test_gso = true
},
{
.ebpf_tun_type = "gre",
.mac_tun_type = "mpls",
.iproute_tun_type = "gre",
.ipproto = 4,
.configure_mpls = true,
.test_gso = true
},
{
.ebpf_tun_type = "ip6gre",
.mac_tun_type = "none",
.iproute_tun_type = "ip6gre",
.ipproto = 6,
.test_gso = true,
},
{
.ebpf_tun_type = "ip6gre",
.mac_tun_type = "eth",
.iproute_tun_type = "ip6gretap",
.ipproto = 6,
.tunnel_need_veth_mac = true,
.test_gso = true
},
{
.ebpf_tun_type = "ip6gre",
.mac_tun_type = "mpls",
.iproute_tun_type = "ip6gre",
.ipproto = 6,
.configure_mpls = true,
.test_gso = true
},
{
.ebpf_tun_type = "udp",
.mac_tun_type = "none",
.iproute_tun_type = "ipip",
.ipproto = 4,
.extra_decap_mod_args_cb = udp_decap_mod_args_cb,
.configure_fou_rx_port = true,
.test_gso = true
},
{
.ebpf_tun_type = "udp",
.mac_tun_type = "eth",
.iproute_tun_type = "ipip",
.ipproto = 4,
.extra_decap_mod_args_cb = udp_decap_mod_args_cb,
.configure_fou_rx_port = true,
.expect_kern_decap_failure = true,
.test_gso = true
},
{
.ebpf_tun_type = "udp",
.mac_tun_type = "mpls",
.iproute_tun_type = "ipip",
.ipproto = 4,
.extra_decap_mod_args_cb = udp_decap_mod_args_cb,
.configure_fou_rx_port = true,
.tmode = "mode any ttl 255",
.configure_mpls = true,
.test_gso = true
},
{
.ebpf_tun_type = "ip6udp",
.mac_tun_type = "none",
.iproute_tun_type = "ip6tnl",
.ipproto = 6,
.extra_decap_mod_args_cb = udp_decap_mod_args_cb,
.configure_fou_rx_port = true,
.test_gso = true
},
{
.ebpf_tun_type = "ip6udp",
.mac_tun_type = "eth",
.iproute_tun_type = "ip6tnl",
.ipproto = 6,
.extra_decap_mod_args_cb = udp_decap_mod_args_cb,
.configure_fou_rx_port = true,
.expect_kern_decap_failure = true,
.test_gso = true
},
{
.ebpf_tun_type = "ip6udp",
.mac_tun_type = "mpls",
.iproute_tun_type = "ip6tnl",
.ipproto = 6,
.extra_decap_mod_args_cb = udp_decap_mod_args_cb,
.configure_fou_rx_port = true,
.tmode = "mode any ttl 255",
.expect_kern_decap_failure = true,
.test_gso = true
},
};
void test_tc_tunnel(void)
{
struct test_tc_tunnel *skel;
struct subtest_cfg *cfg;
int i, ret;
skel = test_tc_tunnel__open_and_load();
if (!ASSERT_OK_PTR(skel, "skel open and load"))
return;
if (!ASSERT_OK(setup(), "global setup"))
return;
for (i = 0; i < ARRAY_SIZE(subtests_cfg); i++) {
cfg = &subtests_cfg[i];
ret = build_subtest_name(cfg, cfg->name, TEST_NAME_MAX_LEN);
if (ret < 0 || !test__start_subtest(cfg->name))
continue;
subtest_setup(skel, cfg);
run_test(cfg);
subtest_cleanup(cfg);
}
cleanup();
}

View File

@ -534,85 +534,6 @@ static void ping6_dev1(void)
close_netns(nstoken);
}
static int attach_tc_prog(int ifindex, int igr_fd, int egr_fd)
{
DECLARE_LIBBPF_OPTS(bpf_tc_hook, hook, .ifindex = ifindex,
.attach_point = BPF_TC_INGRESS | BPF_TC_EGRESS);
DECLARE_LIBBPF_OPTS(bpf_tc_opts, opts1, .handle = 1,
.priority = 1, .prog_fd = igr_fd);
DECLARE_LIBBPF_OPTS(bpf_tc_opts, opts2, .handle = 1,
.priority = 1, .prog_fd = egr_fd);
int ret;
ret = bpf_tc_hook_create(&hook);
if (!ASSERT_OK(ret, "create tc hook"))
return ret;
if (igr_fd >= 0) {
hook.attach_point = BPF_TC_INGRESS;
ret = bpf_tc_attach(&hook, &opts1);
if (!ASSERT_OK(ret, "bpf_tc_attach")) {
bpf_tc_hook_destroy(&hook);
return ret;
}
}
if (egr_fd >= 0) {
hook.attach_point = BPF_TC_EGRESS;
ret = bpf_tc_attach(&hook, &opts2);
if (!ASSERT_OK(ret, "bpf_tc_attach")) {
bpf_tc_hook_destroy(&hook);
return ret;
}
}
return 0;
}
static int generic_attach(const char *dev, int igr_fd, int egr_fd)
{
int ifindex;
if (!ASSERT_OK_FD(igr_fd, "check ingress fd"))
return -1;
if (!ASSERT_OK_FD(egr_fd, "check egress fd"))
return -1;
ifindex = if_nametoindex(dev);
if (!ASSERT_NEQ(ifindex, 0, "get ifindex"))
return -1;
return attach_tc_prog(ifindex, igr_fd, egr_fd);
}
static int generic_attach_igr(const char *dev, int igr_fd)
{
int ifindex;
if (!ASSERT_OK_FD(igr_fd, "check ingress fd"))
return -1;
ifindex = if_nametoindex(dev);
if (!ASSERT_NEQ(ifindex, 0, "get ifindex"))
return -1;
return attach_tc_prog(ifindex, igr_fd, -1);
}
static int generic_attach_egr(const char *dev, int egr_fd)
{
int ifindex;
if (!ASSERT_OK_FD(egr_fd, "check egress fd"))
return -1;
ifindex = if_nametoindex(dev);
if (!ASSERT_NEQ(ifindex, 0, "get ifindex"))
return -1;
return attach_tc_prog(ifindex, -1, egr_fd);
}
static void test_vxlan_tunnel(void)
{
struct test_tunnel_kern *skel = NULL;
@ -635,12 +556,12 @@ static void test_vxlan_tunnel(void)
goto done;
get_src_prog_fd = bpf_program__fd(skel->progs.vxlan_get_tunnel_src);
set_src_prog_fd = bpf_program__fd(skel->progs.vxlan_set_tunnel_src);
if (generic_attach(VXLAN_TUNL_DEV1, get_src_prog_fd, set_src_prog_fd))
if (tc_prog_attach(VXLAN_TUNL_DEV1, get_src_prog_fd, set_src_prog_fd))
goto done;
/* load and attach bpf prog to veth dev tc hook point */
set_dst_prog_fd = bpf_program__fd(skel->progs.veth_set_outer_dst);
if (generic_attach_igr("veth1", set_dst_prog_fd))
if (tc_prog_attach("veth1", set_dst_prog_fd, -1))
goto done;
/* load and attach prog set_md to tunnel dev tc hook point at_ns0 */
@ -648,7 +569,7 @@ static void test_vxlan_tunnel(void)
if (!ASSERT_OK_PTR(nstoken, "setns src"))
goto done;
set_dst_prog_fd = bpf_program__fd(skel->progs.vxlan_set_tunnel_dst);
if (generic_attach_egr(VXLAN_TUNL_DEV0, set_dst_prog_fd))
if (tc_prog_attach(VXLAN_TUNL_DEV0, -1, set_dst_prog_fd))
goto done;
close_netns(nstoken);
@ -695,7 +616,7 @@ static void test_ip6vxlan_tunnel(void)
goto done;
get_src_prog_fd = bpf_program__fd(skel->progs.ip6vxlan_get_tunnel_src);
set_src_prog_fd = bpf_program__fd(skel->progs.ip6vxlan_set_tunnel_src);
if (generic_attach(IP6VXLAN_TUNL_DEV1, get_src_prog_fd, set_src_prog_fd))
if (tc_prog_attach(IP6VXLAN_TUNL_DEV1, get_src_prog_fd, set_src_prog_fd))
goto done;
/* load and attach prog set_md to tunnel dev tc hook point at_ns0 */
@ -703,7 +624,7 @@ static void test_ip6vxlan_tunnel(void)
if (!ASSERT_OK_PTR(nstoken, "setns src"))
goto done;
set_dst_prog_fd = bpf_program__fd(skel->progs.ip6vxlan_set_tunnel_dst);
if (generic_attach_egr(IP6VXLAN_TUNL_DEV0, set_dst_prog_fd))
if (tc_prog_attach(IP6VXLAN_TUNL_DEV0, -1, set_dst_prog_fd))
goto done;
close_netns(nstoken);
@ -764,7 +685,7 @@ static void test_ipip_tunnel(enum ipip_encap encap)
skel->progs.ipip_set_tunnel);
}
if (generic_attach(IPIP_TUNL_DEV1, get_src_prog_fd, set_src_prog_fd))
if (tc_prog_attach(IPIP_TUNL_DEV1, get_src_prog_fd, set_src_prog_fd))
goto done;
ping_dev0();
@ -797,7 +718,7 @@ static void test_xfrm_tunnel(void)
/* attach tc prog to tunnel dev */
tc_prog_fd = bpf_program__fd(skel->progs.xfrm_get_state);
if (generic_attach_igr("veth1", tc_prog_fd))
if (tc_prog_attach("veth1", tc_prog_fd, -1))
goto done;
/* attach xdp prog to tunnel dev */
@ -870,7 +791,7 @@ static void test_gre_tunnel(enum gre_test test)
if (!ASSERT_OK(err, "add tunnel"))
goto done;
if (generic_attach(GRE_TUNL_DEV1, get_fd, set_fd))
if (tc_prog_attach(GRE_TUNL_DEV1, get_fd, set_fd))
goto done;
ping_dev0();
@ -911,7 +832,7 @@ static void test_ip6gre_tunnel(enum ip6gre_test test)
set_fd = bpf_program__fd(skel->progs.ip6gretap_set_tunnel);
get_fd = bpf_program__fd(skel->progs.ip6gretap_get_tunnel);
if (generic_attach(IP6GRE_TUNL_DEV1, get_fd, set_fd))
if (tc_prog_attach(IP6GRE_TUNL_DEV1, get_fd, set_fd))
goto done;
ping6_veth0();
@ -954,7 +875,7 @@ static void test_erspan_tunnel(enum erspan_test test)
set_fd = bpf_program__fd(skel->progs.erspan_set_tunnel);
get_fd = bpf_program__fd(skel->progs.erspan_get_tunnel);
if (generic_attach(ERSPAN_TUNL_DEV1, get_fd, set_fd))
if (tc_prog_attach(ERSPAN_TUNL_DEV1, get_fd, set_fd))
goto done;
ping_dev0();
@ -990,7 +911,7 @@ static void test_ip6erspan_tunnel(enum erspan_test test)
set_fd = bpf_program__fd(skel->progs.ip4ip6erspan_set_tunnel);
get_fd = bpf_program__fd(skel->progs.ip4ip6erspan_get_tunnel);
if (generic_attach(IP6ERSPAN_TUNL_DEV1, get_fd, set_fd))
if (tc_prog_attach(IP6ERSPAN_TUNL_DEV1, get_fd, set_fd))
goto done;
ping6_veth0();
@ -1017,7 +938,7 @@ static void test_geneve_tunnel(void)
set_fd = bpf_program__fd(skel->progs.geneve_set_tunnel);
get_fd = bpf_program__fd(skel->progs.geneve_get_tunnel);
if (generic_attach(GENEVE_TUNL_DEV1, get_fd, set_fd))
if (tc_prog_attach(GENEVE_TUNL_DEV1, get_fd, set_fd))
goto done;
ping_dev0();
@ -1044,7 +965,7 @@ static void test_ip6geneve_tunnel(void)
set_fd = bpf_program__fd(skel->progs.ip6geneve_set_tunnel);
get_fd = bpf_program__fd(skel->progs.ip6geneve_get_tunnel);
if (generic_attach(IP6GENEVE_TUNL_DEV1, get_fd, set_fd))
if (tc_prog_attach(IP6GENEVE_TUNL_DEV1, get_fd, set_fd))
goto done;
ping_dev0();
@ -1083,7 +1004,7 @@ static void test_ip6tnl_tunnel(enum ip6tnl_test test)
get_fd = bpf_program__fd(skel->progs.ip6ip6_get_tunnel);
break;
}
if (generic_attach(IP6TNL_TUNL_DEV1, get_fd, set_fd))
if (tc_prog_attach(IP6TNL_TUNL_DEV1, get_fd, set_fd))
goto done;
ping6_veth0();

View File

@ -2,23 +2,11 @@
/* In-place tunneling */
#include <stdbool.h>
#include <string.h>
#include <vmlinux.h>
#include <linux/stddef.h>
#include <linux/bpf.h>
#include <linux/if_ether.h>
#include <linux/in.h>
#include <linux/ip.h>
#include <linux/ipv6.h>
#include <linux/mpls.h>
#include <linux/tcp.h>
#include <linux/udp.h>
#include <linux/pkt_cls.h>
#include <linux/types.h>
#include <bpf/bpf_endian.h>
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_endian.h>
#include "bpf_tracing_net.h"
#include "bpf_compiler.h"
#pragma GCC diagnostic ignored "-Waddress-of-packed-member"
@ -27,6 +15,14 @@ static const int cfg_port = 8000;
static const int cfg_udp_src = 20000;
#define ETH_P_MPLS_UC 0x8847
#define ETH_P_TEB 0x6558
#define MPLS_LS_S_MASK 0x00000100
#define BPF_F_ADJ_ROOM_ENCAP_L2(len) \
(((__u64)len & BPF_ADJ_ROOM_ENCAP_L2_MASK) \
<< BPF_ADJ_ROOM_ENCAP_L2_SHIFT)
#define L2_PAD_SZ (sizeof(struct vxlanhdr) + ETH_HLEN)
#define UDP_PORT 5555
@ -36,10 +32,9 @@ static const int cfg_udp_src = 20000;
#define EXTPROTO_VXLAN 0x1
#define VXLAN_N_VID (1u << 24)
#define VXLAN_VNI_MASK bpf_htonl((VXLAN_N_VID - 1) << 8)
#define VXLAN_FLAGS 0x8
#define VXLAN_VNI 1
#define VXLAN_FLAGS bpf_htonl(1<<27)
#define VNI_ID 1
#define VXLAN_VNI bpf_htonl(VNI_ID << 8)
#ifndef NEXTHDR_DEST
#define NEXTHDR_DEST 60
@ -48,12 +43,6 @@ static const int cfg_udp_src = 20000;
/* MPLS label 1000 with S bit (last label) set and ttl of 255. */
static const __u32 mpls_label = __bpf_constant_htonl(1000 << 12 |
MPLS_LS_S_MASK | 0xff);
struct vxlanhdr {
__be32 vx_flags;
__be32 vx_vni;
} __attribute__((packed));
struct gre_hdr {
__be16 flags;
__be16 protocol;
@ -94,8 +83,8 @@ static __always_inline void set_ipv4_csum(struct iphdr *iph)
static __always_inline int __encap_ipv4(struct __sk_buff *skb, __u8 encap_proto,
__u16 l2_proto, __u16 ext_proto)
{
struct iphdr iph_inner = {0};
__u16 udp_dst = UDP_PORT;
struct iphdr iph_inner;
struct v4hdr h_outer;
struct tcphdr tcph;
int olen, l2_len;
@ -122,7 +111,6 @@ static __always_inline int __encap_ipv4(struct __sk_buff *skb, __u8 encap_proto,
return TC_ACT_OK;
/* Derive the IPv4 header fields from the IPv6 header */
memset(&iph_inner, 0, sizeof(iph_inner));
iph_inner.version = 4;
iph_inner.ihl = 5;
iph_inner.tot_len = bpf_htons(sizeof(iph6_inner) +
@ -210,7 +198,7 @@ static __always_inline int __encap_ipv4(struct __sk_buff *skb, __u8 encap_proto,
struct vxlanhdr *vxlan_hdr = (struct vxlanhdr *)l2_hdr;
vxlan_hdr->vx_flags = VXLAN_FLAGS;
vxlan_hdr->vx_vni = bpf_htonl((VXLAN_VNI & VXLAN_VNI_MASK) << 8);
vxlan_hdr->vx_vni = VXLAN_VNI;
l2_hdr += sizeof(struct vxlanhdr);
}
@ -340,7 +328,7 @@ static __always_inline int __encap_ipv6(struct __sk_buff *skb, __u8 encap_proto,
struct vxlanhdr *vxlan_hdr = (struct vxlanhdr *)l2_hdr;
vxlan_hdr->vx_flags = VXLAN_FLAGS;
vxlan_hdr->vx_vni = bpf_htonl((VXLAN_VNI & VXLAN_VNI_MASK) << 8);
vxlan_hdr->vx_vni = VXLAN_VNI;
l2_hdr += sizeof(struct vxlanhdr);
}
@ -372,8 +360,8 @@ static __always_inline int __encap_ipv6(struct __sk_buff *skb, __u8 encap_proto,
static int encap_ipv6_ipip6(struct __sk_buff *skb)
{
struct v6hdr h_outer = {0};
struct iphdr iph_inner;
struct v6hdr h_outer;
struct tcphdr tcph;
struct ethhdr eth;
__u64 flags;
@ -400,13 +388,12 @@ static int encap_ipv6_ipip6(struct __sk_buff *skb)
return TC_ACT_SHOT;
/* prepare new outer network header */
memset(&h_outer.ip, 0, sizeof(h_outer.ip));
h_outer.ip.version = 6;
h_outer.ip.hop_limit = iph_inner.ttl;
h_outer.ip.saddr.s6_addr[1] = 0xfd;
h_outer.ip.saddr.s6_addr[15] = 1;
h_outer.ip.daddr.s6_addr[1] = 0xfd;
h_outer.ip.daddr.s6_addr[15] = 2;
h_outer.ip.saddr.in6_u.u6_addr8[1] = 0xfd;
h_outer.ip.saddr.in6_u.u6_addr8[15] = 1;
h_outer.ip.daddr.in6_u.u6_addr8[1] = 0xfd;
h_outer.ip.daddr.in6_u.u6_addr8[15] = 2;
h_outer.ip.payload_len = iph_inner.tot_len;
h_outer.ip.nexthdr = IPPROTO_IPIP;
@ -431,7 +418,7 @@ static __always_inline int encap_ipv6(struct __sk_buff *skb, __u8 encap_proto,
return __encap_ipv6(skb, encap_proto, l2_proto, 0);
}
SEC("encap_ipip_none")
SEC("tc")
int __encap_ipip_none(struct __sk_buff *skb)
{
if (skb->protocol == __bpf_constant_htons(ETH_P_IP))
@ -440,7 +427,7 @@ int __encap_ipip_none(struct __sk_buff *skb)
return TC_ACT_OK;
}
SEC("encap_gre_none")
SEC("tc")
int __encap_gre_none(struct __sk_buff *skb)
{
if (skb->protocol == __bpf_constant_htons(ETH_P_IP))
@ -449,7 +436,7 @@ int __encap_gre_none(struct __sk_buff *skb)
return TC_ACT_OK;
}
SEC("encap_gre_mpls")
SEC("tc")
int __encap_gre_mpls(struct __sk_buff *skb)
{
if (skb->protocol == __bpf_constant_htons(ETH_P_IP))
@ -458,7 +445,7 @@ int __encap_gre_mpls(struct __sk_buff *skb)
return TC_ACT_OK;
}
SEC("encap_gre_eth")
SEC("tc")
int __encap_gre_eth(struct __sk_buff *skb)
{
if (skb->protocol == __bpf_constant_htons(ETH_P_IP))
@ -467,7 +454,7 @@ int __encap_gre_eth(struct __sk_buff *skb)
return TC_ACT_OK;
}
SEC("encap_udp_none")
SEC("tc")
int __encap_udp_none(struct __sk_buff *skb)
{
if (skb->protocol == __bpf_constant_htons(ETH_P_IP))
@ -476,7 +463,7 @@ int __encap_udp_none(struct __sk_buff *skb)
return TC_ACT_OK;
}
SEC("encap_udp_mpls")
SEC("tc")
int __encap_udp_mpls(struct __sk_buff *skb)
{
if (skb->protocol == __bpf_constant_htons(ETH_P_IP))
@ -485,7 +472,7 @@ int __encap_udp_mpls(struct __sk_buff *skb)
return TC_ACT_OK;
}
SEC("encap_udp_eth")
SEC("tc")
int __encap_udp_eth(struct __sk_buff *skb)
{
if (skb->protocol == __bpf_constant_htons(ETH_P_IP))
@ -494,7 +481,7 @@ int __encap_udp_eth(struct __sk_buff *skb)
return TC_ACT_OK;
}
SEC("encap_vxlan_eth")
SEC("tc")
int __encap_vxlan_eth(struct __sk_buff *skb)
{
if (skb->protocol == __bpf_constant_htons(ETH_P_IP))
@ -505,7 +492,7 @@ int __encap_vxlan_eth(struct __sk_buff *skb)
return TC_ACT_OK;
}
SEC("encap_sit_none")
SEC("tc")
int __encap_sit_none(struct __sk_buff *skb)
{
if (skb->protocol == __bpf_constant_htons(ETH_P_IPV6))
@ -514,7 +501,7 @@ int __encap_sit_none(struct __sk_buff *skb)
return TC_ACT_OK;
}
SEC("encap_ip6tnl_none")
SEC("tc")
int __encap_ip6tnl_none(struct __sk_buff *skb)
{
if (skb->protocol == __bpf_constant_htons(ETH_P_IPV6))
@ -523,7 +510,7 @@ int __encap_ip6tnl_none(struct __sk_buff *skb)
return TC_ACT_OK;
}
SEC("encap_ipip6_none")
SEC("tc")
int __encap_ipip6_none(struct __sk_buff *skb)
{
if (skb->protocol == __bpf_constant_htons(ETH_P_IP))
@ -532,7 +519,7 @@ int __encap_ipip6_none(struct __sk_buff *skb)
return TC_ACT_OK;
}
SEC("encap_ip6gre_none")
SEC("tc")
int __encap_ip6gre_none(struct __sk_buff *skb)
{
if (skb->protocol == __bpf_constant_htons(ETH_P_IPV6))
@ -541,7 +528,7 @@ int __encap_ip6gre_none(struct __sk_buff *skb)
return TC_ACT_OK;
}
SEC("encap_ip6gre_mpls")
SEC("tc")
int __encap_ip6gre_mpls(struct __sk_buff *skb)
{
if (skb->protocol == __bpf_constant_htons(ETH_P_IPV6))
@ -550,7 +537,7 @@ int __encap_ip6gre_mpls(struct __sk_buff *skb)
return TC_ACT_OK;
}
SEC("encap_ip6gre_eth")
SEC("tc")
int __encap_ip6gre_eth(struct __sk_buff *skb)
{
if (skb->protocol == __bpf_constant_htons(ETH_P_IPV6))
@ -559,7 +546,7 @@ int __encap_ip6gre_eth(struct __sk_buff *skb)
return TC_ACT_OK;
}
SEC("encap_ip6udp_none")
SEC("tc")
int __encap_ip6udp_none(struct __sk_buff *skb)
{
if (skb->protocol == __bpf_constant_htons(ETH_P_IPV6))
@ -568,7 +555,7 @@ int __encap_ip6udp_none(struct __sk_buff *skb)
return TC_ACT_OK;
}
SEC("encap_ip6udp_mpls")
SEC("tc")
int __encap_ip6udp_mpls(struct __sk_buff *skb)
{
if (skb->protocol == __bpf_constant_htons(ETH_P_IPV6))
@ -577,7 +564,7 @@ int __encap_ip6udp_mpls(struct __sk_buff *skb)
return TC_ACT_OK;
}
SEC("encap_ip6udp_eth")
SEC("tc")
int __encap_ip6udp_eth(struct __sk_buff *skb)
{
if (skb->protocol == __bpf_constant_htons(ETH_P_IPV6))
@ -586,7 +573,7 @@ int __encap_ip6udp_eth(struct __sk_buff *skb)
return TC_ACT_OK;
}
SEC("encap_ip6vxlan_eth")
SEC("tc")
int __encap_ip6vxlan_eth(struct __sk_buff *skb)
{
if (skb->protocol == __bpf_constant_htons(ETH_P_IPV6))
@ -693,7 +680,7 @@ static int decap_ipv6(struct __sk_buff *skb)
iph_outer.nexthdr);
}
SEC("decap")
SEC("tc")
int decap_f(struct __sk_buff *skb)
{
switch (skb->protocol) {

View File

@ -1,320 +0,0 @@
#!/bin/bash
# SPDX-License-Identifier: GPL-2.0
#
# In-place tunneling
BPF_FILE="test_tc_tunnel.bpf.o"
# must match the port that the bpf program filters on
readonly port=8000
readonly ns_prefix="ns-$$-"
readonly ns1="${ns_prefix}1"
readonly ns2="${ns_prefix}2"
readonly ns1_v4=192.168.1.1
readonly ns2_v4=192.168.1.2
readonly ns1_v6=fd::1
readonly ns2_v6=fd::2
# Must match port used by bpf program
readonly udpport=5555
# MPLSoverUDP
readonly mplsudpport=6635
readonly mplsproto=137
readonly infile="$(mktemp)"
readonly outfile="$(mktemp)"
setup() {
ip netns add "${ns1}"
ip netns add "${ns2}"
ip link add dev veth1 mtu 1500 netns "${ns1}" type veth \
peer name veth2 mtu 1500 netns "${ns2}"
ip netns exec "${ns1}" ethtool -K veth1 tso off
ip -netns "${ns1}" link set veth1 up
ip -netns "${ns2}" link set veth2 up
ip -netns "${ns1}" -4 addr add "${ns1_v4}/24" dev veth1
ip -netns "${ns2}" -4 addr add "${ns2_v4}/24" dev veth2
ip -netns "${ns1}" -6 addr add "${ns1_v6}/64" dev veth1 nodad
ip -netns "${ns2}" -6 addr add "${ns2_v6}/64" dev veth2 nodad
# clamp route to reserve room for tunnel headers
ip -netns "${ns1}" -4 route flush table main
ip -netns "${ns1}" -6 route flush table main
ip -netns "${ns1}" -4 route add "${ns2_v4}" mtu 1450 dev veth1
ip -netns "${ns1}" -6 route add "${ns2_v6}" mtu 1430 dev veth1
sleep 1
dd if=/dev/urandom of="${infile}" bs="${datalen}" count=1 status=none
}
cleanup() {
ip netns del "${ns2}"
ip netns del "${ns1}"
if [[ -f "${outfile}" ]]; then
rm "${outfile}"
fi
if [[ -f "${infile}" ]]; then
rm "${infile}"
fi
if [[ -n $server_pid ]]; then
kill $server_pid 2> /dev/null
fi
}
server_listen() {
ip netns exec "${ns2}" nc "${netcat_opt}" -l "${port}" > "${outfile}" &
server_pid=$!
}
client_connect() {
ip netns exec "${ns1}" timeout 2 nc "${netcat_opt}" -w 1 "${addr2}" "${port}" < "${infile}"
echo $?
}
verify_data() {
wait "${server_pid}"
server_pid=
# sha1sum returns two fields [sha1] [filepath]
# convert to bash array and access first elem
insum=($(sha1sum ${infile}))
outsum=($(sha1sum ${outfile}))
if [[ "${insum[0]}" != "${outsum[0]}" ]]; then
echo "data mismatch"
exit 1
fi
}
wait_for_port() {
for i in $(seq 20); do
if ip netns exec "${ns2}" ss ${2:--4}OHntl | grep -q "$1"; then
return 0
fi
sleep 0.1
done
return 1
}
set -e
# no arguments: automated test, run all
if [[ "$#" -eq "0" ]]; then
echo "ipip"
$0 ipv4 ipip none 100
echo "ipip6"
$0 ipv4 ipip6 none 100
echo "ip6ip6"
$0 ipv6 ip6tnl none 100
echo "sit"
$0 ipv6 sit none 100
echo "ip4 vxlan"
$0 ipv4 vxlan eth 2000
echo "ip6 vxlan"
$0 ipv6 ip6vxlan eth 2000
for mac in none mpls eth ; do
echo "ip gre $mac"
$0 ipv4 gre $mac 100
echo "ip6 gre $mac"
$0 ipv6 ip6gre $mac 100
echo "ip gre $mac gso"
$0 ipv4 gre $mac 2000
echo "ip6 gre $mac gso"
$0 ipv6 ip6gre $mac 2000
echo "ip udp $mac"
$0 ipv4 udp $mac 100
echo "ip6 udp $mac"
$0 ipv6 ip6udp $mac 100
echo "ip udp $mac gso"
$0 ipv4 udp $mac 2000
echo "ip6 udp $mac gso"
$0 ipv6 ip6udp $mac 2000
done
echo "OK. All tests passed"
exit 0
fi
if [[ "$#" -ne "4" ]]; then
echo "Usage: $0"
echo " or: $0 <ipv4|ipv6> <tuntype> <none|mpls|eth> <data_len>"
exit 1
fi
case "$1" in
"ipv4")
readonly addr1="${ns1_v4}"
readonly addr2="${ns2_v4}"
readonly ipproto=4
readonly netcat_opt=-${ipproto}
readonly foumod=fou
readonly foutype=ipip
readonly fouproto=4
readonly fouproto_mpls=${mplsproto}
readonly gretaptype=gretap
;;
"ipv6")
readonly addr1="${ns1_v6}"
readonly addr2="${ns2_v6}"
readonly ipproto=6
readonly netcat_opt=-${ipproto}
readonly foumod=fou6
readonly foutype=ip6tnl
readonly fouproto="41 -6"
readonly fouproto_mpls="${mplsproto} -6"
readonly gretaptype=ip6gretap
;;
*)
echo "unknown arg: $1"
exit 1
;;
esac
readonly tuntype=$2
readonly mac=$3
readonly datalen=$4
echo "encap ${addr1} to ${addr2}, type ${tuntype}, mac ${mac} len ${datalen}"
trap cleanup EXIT
setup
# basic communication works
echo "test basic connectivity"
server_listen
wait_for_port ${port} ${netcat_opt}
client_connect
verify_data
# clientside, insert bpf program to encap all TCP to port ${port}
# client can no longer connect
ip netns exec "${ns1}" tc qdisc add dev veth1 clsact
ip netns exec "${ns1}" tc filter add dev veth1 egress \
bpf direct-action object-file ${BPF_FILE} \
section "encap_${tuntype}_${mac}"
echo "test bpf encap without decap (expect failure)"
server_listen
wait_for_port ${port} ${netcat_opt}
! client_connect
if [[ "$tuntype" =~ "udp" ]]; then
# Set up fou tunnel.
ttype="${foutype}"
targs="encap fou encap-sport auto encap-dport $udpport"
# fou may be a module; allow this to fail.
modprobe "${foumod}" ||true
if [[ "$mac" == "mpls" ]]; then
dport=${mplsudpport}
dproto=${fouproto_mpls}
tmode="mode any ttl 255"
else
dport=${udpport}
dproto=${fouproto}
fi
ip netns exec "${ns2}" ip fou add port $dport ipproto ${dproto}
targs="encap fou encap-sport auto encap-dport $dport"
elif [[ "$tuntype" =~ "gre" && "$mac" == "eth" ]]; then
ttype=$gretaptype
elif [[ "$tuntype" =~ "vxlan" && "$mac" == "eth" ]]; then
ttype="vxlan"
targs="id 1 dstport 8472 udp6zerocsumrx"
elif [[ "$tuntype" == "ipip6" ]]; then
ttype="ip6tnl"
targs=""
else
ttype=$tuntype
targs=""
fi
# tunnel address family differs from inner for SIT
if [[ "${tuntype}" == "sit" ]]; then
link_addr1="${ns1_v4}"
link_addr2="${ns2_v4}"
elif [[ "${tuntype}" == "ipip6" ]]; then
link_addr1="${ns1_v6}"
link_addr2="${ns2_v6}"
else
link_addr1="${addr1}"
link_addr2="${addr2}"
fi
# serverside, insert decap module
# server is still running
# client can connect again
ip netns exec "${ns2}" ip link add name testtun0 type "${ttype}" \
${tmode} remote "${link_addr1}" local "${link_addr2}" $targs
expect_tun_fail=0
if [[ "$tuntype" == "ip6udp" && "$mac" == "mpls" ]]; then
# No support for MPLS IPv6 fou tunnel; expect failure.
expect_tun_fail=1
elif [[ "$tuntype" =~ "udp" && "$mac" == "eth" ]]; then
# No support for TEB fou tunnel; expect failure.
expect_tun_fail=1
elif [[ "$tuntype" =~ (gre|vxlan) && "$mac" == "eth" ]]; then
# Share ethernet address between tunnel/veth2 so L2 decap works.
ethaddr=$(ip netns exec "${ns2}" ip link show veth2 | \
awk '/ether/ { print $2 }')
ip netns exec "${ns2}" ip link set testtun0 address $ethaddr
elif [[ "$mac" == "mpls" ]]; then
modprobe mpls_iptunnel ||true
modprobe mpls_gso ||true
ip netns exec "${ns2}" sysctl -qw net.mpls.platform_labels=65536
ip netns exec "${ns2}" ip -f mpls route add 1000 dev lo
ip netns exec "${ns2}" ip link set lo up
ip netns exec "${ns2}" sysctl -qw net.mpls.conf.testtun0.input=1
ip netns exec "${ns2}" sysctl -qw net.ipv4.conf.lo.rp_filter=0
fi
# Because packets are decapped by the tunnel they arrive on testtun0 from
# the IP stack perspective. Ensure reverse path filtering is disabled
# otherwise we drop the TCP SYN as arriving on testtun0 instead of the
# expected veth2 (veth2 is where 192.168.1.2 is configured).
ip netns exec "${ns2}" sysctl -qw net.ipv4.conf.all.rp_filter=0
# rp needs to be disabled for both all and testtun0 as the rp value is
# selected as the max of the "all" and device-specific values.
ip netns exec "${ns2}" sysctl -qw net.ipv4.conf.testtun0.rp_filter=0
ip netns exec "${ns2}" ip link set dev testtun0 up
if [[ "$expect_tun_fail" == 1 ]]; then
# This tunnel mode is not supported, so we expect failure.
echo "test bpf encap with tunnel device decap (expect failure)"
! client_connect
else
echo "test bpf encap with tunnel device decap"
client_connect
verify_data
server_listen
wait_for_port ${port} ${netcat_opt}
fi
# serverside, use BPF for decap
ip netns exec "${ns2}" ip link del dev testtun0
ip netns exec "${ns2}" tc qdisc add dev veth2 clsact
ip netns exec "${ns2}" tc filter add dev veth2 ingress \
bpf direct-action object-file ${BPF_FILE} section decap
echo "test bpf encap with bpf decap"
client_connect
verify_data
echo OK