From 699f47e616fed11d5074e7bbee2f4f920028c4a2 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Sun, 29 Mar 2026 11:04:27 -0700 Subject: [PATCH 1/2] net: Clear the dst when performing encap / decap Commit ba9db6f907ac ("net: clear the dst when changing skb protocol") added dst clearing when a BPF program changes the skb protocol (e.g. IPv4 to IPv6). Since that was a fix we only cleared the dst when the L3 protocol actually changes to keep it minimal. As suggested during the discussion (see Link) encap or decap operation which wraps or unwraps a same-protocol header may also render the existing dst incorrect - even if that doesn't result in a crash, just the wrong route for the now-outermost IP dst. Make dropping dst unconditional for bpf_skb_change_proto() and all L3 encap / decap ops. Signed-off-by: Jakub Kicinski Signed-off-by: Martin KaFai Lau Reviewed-by: Willem de Bruijn Acked-by: Daniel Borkmann Link: https://lore.kernel.org/CANP3RGfRaYwve_xgxH6Tp2zenzKn2-DjZ9tg023WVzfdJF3p_w@mail.gmail.com Link: https://patch.msgid.link/20260329180428.2657785-1-kuba@kernel.org --- net/core/filter.c | 50 +++++++++++++++++++++++++---------------------- 1 file changed, 27 insertions(+), 23 deletions(-) diff --git a/net/core/filter.c b/net/core/filter.c index d55525cc5540..cf2113af4bc9 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -3256,13 +3256,6 @@ static const struct bpf_func_proto bpf_skb_vlan_pop_proto = { .arg1_type = ARG_PTR_TO_CTX, }; -static void bpf_skb_change_protocol(struct sk_buff *skb, u16 proto) -{ - skb->protocol = htons(proto); - if (skb_valid_dst(skb)) - skb_dst_drop(skb); -} - static int bpf_skb_generic_push(struct sk_buff *skb, u32 off, u32 len) { /* Caller already did skb_cow() with meta_len+len as headroom, @@ -3361,7 +3354,7 @@ static int bpf_skb_proto_4_to_6(struct sk_buff *skb) shinfo->gso_type |= SKB_GSO_DODGY; } - bpf_skb_change_protocol(skb, ETH_P_IPV6); + skb->protocol = htons(ETH_P_IPV6); skb_clear_hash(skb); return 0; @@ -3392,7 +3385,7 @@ static int bpf_skb_proto_6_to_4(struct sk_buff *skb) shinfo->gso_type |= SKB_GSO_DODGY; } - bpf_skb_change_protocol(skb, ETH_P_IP); + skb->protocol = htons(ETH_P_IP); skb_clear_hash(skb); return 0; @@ -3440,7 +3433,13 @@ BPF_CALL_3(bpf_skb_change_proto, struct sk_buff *, skb, __be16, proto, */ ret = bpf_skb_proto_xlat(skb, proto); bpf_compute_data_pointers(skb); - return ret; + if (ret) + return ret; + + if (skb_valid_dst(skb)) + skb_dst_drop(skb); + + return 0; } static const struct bpf_func_proto bpf_skb_change_proto_proto = { @@ -3582,12 +3581,13 @@ static int bpf_skb_net_grow(struct sk_buff *skb, u32 off, u32 len_diff, } /* Match skb->protocol to new outer l3 protocol */ - if (skb->protocol == htons(ETH_P_IP) && - flags & BPF_F_ADJ_ROOM_ENCAP_L3_IPV6) - bpf_skb_change_protocol(skb, ETH_P_IPV6); - else if (skb->protocol == htons(ETH_P_IPV6) && - flags & BPF_F_ADJ_ROOM_ENCAP_L3_IPV4) - bpf_skb_change_protocol(skb, ETH_P_IP); + if (flags & BPF_F_ADJ_ROOM_ENCAP_L3_IPV6) + skb->protocol = htons(ETH_P_IPV6); + else if (flags & BPF_F_ADJ_ROOM_ENCAP_L3_IPV4) + skb->protocol = htons(ETH_P_IP); + + if (skb_valid_dst(skb)) + skb_dst_drop(skb); } if (skb_is_gso(skb)) { @@ -3615,6 +3615,7 @@ static int bpf_skb_net_grow(struct sk_buff *skb, u32 off, u32 len_diff, static int bpf_skb_net_shrink(struct sk_buff *skb, u32 off, u32 len_diff, u64 flags) { + bool decap = flags & BPF_F_ADJ_ROOM_DECAP_L3_MASK; int ret; if (unlikely(flags & ~(BPF_F_ADJ_ROOM_FIXED_GSO | @@ -3637,13 +3638,16 @@ static int bpf_skb_net_shrink(struct sk_buff *skb, u32 off, u32 len_diff, if (unlikely(ret < 0)) return ret; - /* Match skb->protocol to new outer l3 protocol */ - if (skb->protocol == htons(ETH_P_IP) && - flags & BPF_F_ADJ_ROOM_DECAP_L3_IPV6) - bpf_skb_change_protocol(skb, ETH_P_IPV6); - else if (skb->protocol == htons(ETH_P_IPV6) && - flags & BPF_F_ADJ_ROOM_DECAP_L3_IPV4) - bpf_skb_change_protocol(skb, ETH_P_IP); + if (decap) { + /* Match skb->protocol to new outer l3 protocol */ + if (flags & BPF_F_ADJ_ROOM_DECAP_L3_IPV6) + skb->protocol = htons(ETH_P_IPV6); + else if (flags & BPF_F_ADJ_ROOM_DECAP_L3_IPV4) + skb->protocol = htons(ETH_P_IP); + + if (skb_valid_dst(skb)) + skb_dst_drop(skb); + } if (skb_is_gso(skb)) { struct skb_shared_info *shinfo = skb_shinfo(skb); From 64c535db769e9ab917343d61045802e832d621ed Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Sun, 29 Mar 2026 11:04:28 -0700 Subject: [PATCH 2/2] selftests/bpf: Test that dst is cleared on same-protocol encap Verify that bpf_skb_adjust_room() clears the routing dst even when the encap L3 protocol matches the original packet (e.g. IPIP). The dst selected for the inner packet is not valid for the encapsulated result; a stale dst could lead to misrouting. Signed-off-by: Jakub Kicinski Signed-off-by: Martin KaFai Lau Acked-by: Daniel Borkmann Link: https://patch.msgid.link/20260329180428.2657785-2-kuba@kernel.org --- .../selftests/bpf/prog_tests/test_dst_clear.c | 55 ++++++++++++++++++ .../selftests/bpf/progs/test_dst_clear.c | 57 +++++++++++++++++++ 2 files changed, 112 insertions(+) create mode 100644 tools/testing/selftests/bpf/prog_tests/test_dst_clear.c create mode 100644 tools/testing/selftests/bpf/progs/test_dst_clear.c diff --git a/tools/testing/selftests/bpf/prog_tests/test_dst_clear.c b/tools/testing/selftests/bpf/prog_tests/test_dst_clear.c new file mode 100644 index 000000000000..7c35ca6f4539 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/test_dst_clear.c @@ -0,0 +1,55 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2026 Meta Platforms, Inc. and affiliates. */ + +#include +#include +#include + +#include "test_progs.h" +#include "network_helpers.h" +#include "test_dst_clear.skel.h" + +#define IPV4_IFACE_ADDR "1.0.0.1" +#define UDP_TEST_PORT 7777 + +void test_ns_dst_clear(void) +{ + LIBBPF_OPTS(bpf_tcx_opts, tcx_opts); + struct test_dst_clear *skel; + struct sockaddr_in addr; + struct bpf_link *link; + socklen_t addrlen; + char buf[128] = {}; + int sockfd, err; + + skel = test_dst_clear__open_and_load(); + if (!ASSERT_OK_PTR(skel, "skel open_and_load")) + return; + + SYS(fail, "ip addr add %s/8 dev lo", IPV4_IFACE_ADDR); + + link = bpf_program__attach_tcx(skel->progs.dst_clear, + if_nametoindex("lo"), &tcx_opts); + if (!ASSERT_OK_PTR(link, "attach_tcx")) + goto fail; + skel->links.dst_clear = link; + + addrlen = sizeof(addr); + err = make_sockaddr(AF_INET, IPV4_IFACE_ADDR, UDP_TEST_PORT, + (void *)&addr, &addrlen); + if (!ASSERT_OK(err, "make_sockaddr")) + goto fail; + sockfd = socket(AF_INET, SOCK_DGRAM, 0); + if (!ASSERT_NEQ(sockfd, -1, "socket")) + goto fail; + err = sendto(sockfd, buf, sizeof(buf), 0, (void *)&addr, addrlen); + close(sockfd); + if (!ASSERT_EQ(err, sizeof(buf), "send")) + goto fail; + + ASSERT_TRUE(skel->bss->had_dst, "had_dst"); + ASSERT_TRUE(skel->bss->dst_cleared, "dst_cleared"); + +fail: + test_dst_clear__destroy(skel); +} diff --git a/tools/testing/selftests/bpf/progs/test_dst_clear.c b/tools/testing/selftests/bpf/progs/test_dst_clear.c new file mode 100644 index 000000000000..c22a6eeb4798 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_dst_clear.c @@ -0,0 +1,57 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2026 Meta Platforms, Inc. and affiliates. */ + +#include "vmlinux.h" +#include "bpf_tracing_net.h" +#include +#include + +#define UDP_TEST_PORT 7777 + +void *bpf_cast_to_kern_ctx(void *) __ksym; + +bool had_dst = false; +bool dst_cleared = false; + +SEC("tc/egress") +int dst_clear(struct __sk_buff *skb) +{ + struct sk_buff *kskb; + struct iphdr iph; + struct udphdr udph; + int err; + + if (skb->protocol != __bpf_constant_htons(ETH_P_IP)) + return TC_ACT_OK; + + if (bpf_skb_load_bytes(skb, ETH_HLEN, &iph, sizeof(iph))) + return TC_ACT_OK; + + if (iph.protocol != IPPROTO_UDP) + return TC_ACT_OK; + + if (bpf_skb_load_bytes(skb, ETH_HLEN + sizeof(iph), &udph, sizeof(udph))) + return TC_ACT_OK; + + if (udph.dest != __bpf_constant_htons(UDP_TEST_PORT)) + return TC_ACT_OK; + + kskb = bpf_cast_to_kern_ctx(skb); + had_dst = (kskb->_skb_refdst != 0); + + /* Same-protocol encap (IPIP): protocol stays IPv4, but the dst + * from the original routing is no longer valid for the outer hdr. + */ + err = bpf_skb_adjust_room(skb, (s32)sizeof(struct iphdr), + BPF_ADJ_ROOM_MAC, + BPF_F_ADJ_ROOM_FIXED_GSO | + BPF_F_ADJ_ROOM_ENCAP_L3_IPV4); + if (err) + return TC_ACT_SHOT; + + dst_cleared = (kskb->_skb_refdst == 0); + + return TC_ACT_SHOT; +} + +char __license[] SEC("license") = "GPL";