Merge branch 'seg6-fix-dst_cache-sharing-in-seg6-lwtunnel'

Andrea Mayer says:

====================
seg6: fix dst_cache sharing in seg6 lwtunnel

The seg6 lwtunnel encap uses a single per-route dst_cache shared
between seg6_input_core() and seg6_output_core(). These two paths
can perform the post-encap SID lookup in different routing contexts
(e.g., ip rules matching on the ingress interface, or VRF table
separation). Whichever path runs first populates the cache, and the
other reuses it blindly, bypassing its own lookup.

Patch 1 fixes this by splitting the cache into cache_input and
cache_output. Patch 2 adds a selftest that validates the isolation.
====================

Link: https://patch.msgid.link/20260404004405.4057-1-andrea.mayer@uniroma2.it
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
This commit is contained in:
Jakub Kicinski 2026-04-07 20:20:59 -07:00
commit f821664dde
3 changed files with 221 additions and 11 deletions

View File

@ -48,7 +48,8 @@ static size_t seg6_lwt_headroom(struct seg6_iptunnel_encap *tuninfo)
}
struct seg6_lwt {
struct dst_cache cache;
struct dst_cache cache_input;
struct dst_cache cache_output;
struct seg6_iptunnel_encap tuninfo[];
};
@ -488,7 +489,7 @@ static int seg6_input_core(struct net *net, struct sock *sk,
slwt = seg6_lwt_lwtunnel(lwtst);
local_bh_disable();
dst = dst_cache_get(&slwt->cache);
dst = dst_cache_get(&slwt->cache_input);
local_bh_enable();
err = seg6_do_srh(skb, dst);
@ -504,7 +505,7 @@ static int seg6_input_core(struct net *net, struct sock *sk,
/* cache only if we don't create a dst reference loop */
if (!dst->error && lwtst != dst->lwtstate) {
local_bh_disable();
dst_cache_set_ip6(&slwt->cache, dst,
dst_cache_set_ip6(&slwt->cache_input, dst,
&ipv6_hdr(skb)->saddr);
local_bh_enable();
}
@ -564,7 +565,7 @@ static int seg6_output_core(struct net *net, struct sock *sk,
slwt = seg6_lwt_lwtunnel(orig_dst->lwtstate);
local_bh_disable();
dst = dst_cache_get(&slwt->cache);
dst = dst_cache_get(&slwt->cache_output);
local_bh_enable();
err = seg6_do_srh(skb, dst);
@ -591,7 +592,7 @@ static int seg6_output_core(struct net *net, struct sock *sk,
/* cache only if we don't create a dst reference loop */
if (orig_dst->lwtstate != dst->lwtstate) {
local_bh_disable();
dst_cache_set_ip6(&slwt->cache, dst, &fl6.saddr);
dst_cache_set_ip6(&slwt->cache_output, dst, &fl6.saddr);
local_bh_enable();
}
@ -701,11 +702,13 @@ static int seg6_build_state(struct net *net, struct nlattr *nla,
slwt = seg6_lwt_lwtunnel(newts);
err = dst_cache_init(&slwt->cache, GFP_ATOMIC);
if (err) {
kfree(newts);
return err;
}
err = dst_cache_init(&slwt->cache_input, GFP_ATOMIC);
if (err)
goto err_free_newts;
err = dst_cache_init(&slwt->cache_output, GFP_ATOMIC);
if (err)
goto err_destroy_input;
memcpy(&slwt->tuninfo, tuninfo, tuninfo_len);
@ -720,11 +723,20 @@ static int seg6_build_state(struct net *net, struct nlattr *nla,
*ts = newts;
return 0;
err_destroy_input:
dst_cache_destroy(&slwt->cache_input);
err_free_newts:
kfree(newts);
return err;
}
static void seg6_destroy_state(struct lwtunnel_state *lwt)
{
dst_cache_destroy(&seg6_lwt_lwtunnel(lwt)->cache);
struct seg6_lwt *slwt = seg6_lwt_lwtunnel(lwt);
dst_cache_destroy(&slwt->cache_input);
dst_cache_destroy(&slwt->cache_output);
}
static int seg6_fill_encap_info(struct sk_buff *skb,

View File

@ -89,6 +89,7 @@ TEST_PROGS := \
srv6_end_x_next_csid_l3vpn_test.sh \
srv6_hencap_red_l3vpn_test.sh \
srv6_hl2encap_red_l2vpn_test.sh \
srv6_iptunnel_cache.sh \
stress_reuseport_listen.sh \
tcp_fastopen_backup_key.sh \
test_bpf.sh \

View File

@ -0,0 +1,197 @@
#!/bin/bash
# SPDX-License-Identifier: GPL-2.0
#
# author: Andrea Mayer <andrea.mayer@uniroma2.it>
# This test verifies that the seg6 lwtunnel does not share the dst_cache
# between the input (forwarding) and output (locally generated) paths.
#
# A shared dst_cache allows a forwarded packet to populate the cache and a
# subsequent locally generated packet to silently reuse that entry, bypassing
# its own route lookup. To expose this, the SID is made reachable only for
# forwarded traffic (via an ip rule matching iif) and blackholed for everything
# else. A local ping on ns_router must always hit the blackhole;
# if it succeeds after a forwarded packet has populated the
# cache, the bug is confirmed.
#
# Both forwarded and local packets are pinned to the same CPU with taskset,
# since dst_cache is per-cpu.
#
#
# +--------------------+ +--------------------+
# | ns_src | | ns_dst |
# | | | |
# | veth-s0 | | veth-d0 |
# | fd00::1/64 | | fd01::2/64 |
# +-------+------------+ +----------+---------+
# | |
# | +--------------------+ |
# | | ns_router | |
# | | | |
# +------------+ veth-r0 veth-r1 +--------------+
# | fd00::2 fd01::1 |
# +--------------------+
#
#
# ns_router: encap (main table)
# +---------+---------------------------------------+
# | dst | action |
# +---------+---------------------------------------+
# | cafe::1 | encap seg6 mode encap segs fc00::100 |
# +---------+---------------------------------------+
#
# ns_router: post-encap SID resolution
# +-------+------------+----------------------------+
# | table | dst | action |
# +-------+------------+----------------------------+
# | 100 | fc00::100 | via fd01::2 dev veth-r1 |
# +-------+------------+----------------------------+
# | main | fc00::100 | blackhole |
# +-------+------------+----------------------------+
#
# ns_router: ip rule
# +------------------+------------------------------+
# | match | action |
# +------------------+------------------------------+
# | iif veth-r0 | lookup 100 |
# +------------------+------------------------------+
#
# ns_dst: SRv6 decap (main table)
# +--------------+----------------------------------+
# | SID | action |
# +--------------+----------------------------------+
# | fc00::100 | End.DT6 table 255 (local) |
# +--------------+----------------------------------+
source lib.sh
readonly SID="fc00::100"
readonly DEST="cafe::1"
readonly SRC_MAC="02:00:00:00:00:01"
readonly RTR_R0_MAC="02:00:00:00:00:02"
readonly RTR_R1_MAC="02:00:00:00:00:03"
readonly DST_MAC="02:00:00:00:00:04"
cleanup()
{
cleanup_ns "${NS_SRC}" "${NS_RTR}" "${NS_DST}"
}
check_prerequisites()
{
if ! command -v ip &>/dev/null; then
echo "SKIP: ip tool not found"
exit "${ksft_skip}"
fi
if ! command -v ping &>/dev/null; then
echo "SKIP: ping not found"
exit "${ksft_skip}"
fi
if ! command -v sysctl &>/dev/null; then
echo "SKIP: sysctl not found"
exit "${ksft_skip}"
fi
if ! command -v taskset &>/dev/null; then
echo "SKIP: taskset not found"
exit "${ksft_skip}"
fi
}
setup()
{
setup_ns NS_SRC NS_RTR NS_DST
ip link add veth-s0 netns "${NS_SRC}" type veth \
peer name veth-r0 netns "${NS_RTR}"
ip link add veth-r1 netns "${NS_RTR}" type veth \
peer name veth-d0 netns "${NS_DST}"
ip -n "${NS_SRC}" link set veth-s0 address "${SRC_MAC}"
ip -n "${NS_RTR}" link set veth-r0 address "${RTR_R0_MAC}"
ip -n "${NS_RTR}" link set veth-r1 address "${RTR_R1_MAC}"
ip -n "${NS_DST}" link set veth-d0 address "${DST_MAC}"
# ns_src
ip -n "${NS_SRC}" link set veth-s0 up
ip -n "${NS_SRC}" addr add fd00::1/64 dev veth-s0 nodad
ip -n "${NS_SRC}" -6 route add "${DEST}"/128 via fd00::2
# ns_router
ip -n "${NS_RTR}" link set veth-r0 up
ip -n "${NS_RTR}" addr add fd00::2/64 dev veth-r0 nodad
ip -n "${NS_RTR}" link set veth-r1 up
ip -n "${NS_RTR}" addr add fd01::1/64 dev veth-r1 nodad
ip netns exec "${NS_RTR}" sysctl -qw net.ipv6.conf.all.forwarding=1
ip -n "${NS_RTR}" -6 route add "${DEST}"/128 \
encap seg6 mode encap segs "${SID}" dev veth-r0
ip -n "${NS_RTR}" -6 route add "${SID}"/128 table 100 \
via fd01::2 dev veth-r1
ip -n "${NS_RTR}" -6 route add blackhole "${SID}"/128
ip -n "${NS_RTR}" -6 rule add iif veth-r0 lookup 100
# ns_dst
ip -n "${NS_DST}" link set veth-d0 up
ip -n "${NS_DST}" addr add fd01::2/64 dev veth-d0 nodad
ip -n "${NS_DST}" addr add "${DEST}"/128 dev lo nodad
ip -n "${NS_DST}" -6 route add "${SID}"/128 \
encap seg6local action End.DT6 table 255 dev veth-d0
ip -n "${NS_DST}" -6 route add fd00::/64 via fd01::1
# static neighbors
ip -n "${NS_SRC}" -6 neigh add fd00::2 dev veth-s0 \
lladdr "${RTR_R0_MAC}" nud permanent
ip -n "${NS_RTR}" -6 neigh add fd00::1 dev veth-r0 \
lladdr "${SRC_MAC}" nud permanent
ip -n "${NS_RTR}" -6 neigh add fd01::2 dev veth-r1 \
lladdr "${DST_MAC}" nud permanent
ip -n "${NS_DST}" -6 neigh add fd01::1 dev veth-d0 \
lladdr "${RTR_R1_MAC}" nud permanent
}
test_cache_isolation()
{
RET=0
# local ping with empty cache: must fail (SID is blackholed)
if ip netns exec "${NS_RTR}" taskset -c 0 \
ping -c 1 -W 2 "${DEST}" &>/dev/null; then
echo "SKIP: local ping succeeded, topology broken"
exit "${ksft_skip}"
fi
# forward from ns_src to populate the input cache
if ! ip netns exec "${NS_SRC}" taskset -c 0 \
ping -c 1 -W 2 "${DEST}" &>/dev/null; then
echo "SKIP: forwarded ping failed, topology broken"
exit "${ksft_skip}"
fi
# local ping again: must still fail; if the output path reuses
# the input cache, it bypasses the blackhole and the ping succeeds
if ip netns exec "${NS_RTR}" taskset -c 0 \
ping -c 1 -W 2 "${DEST}" &>/dev/null; then
echo "FAIL: output path used dst cached by input path"
RET="${ksft_fail}"
else
echo "PASS: output path dst_cache is independent"
fi
return "${RET}"
}
if [ "$(id -u)" -ne 0 ]; then
echo "SKIP: Need root privileges"
exit "${ksft_skip}"
fi
trap cleanup EXIT
check_prerequisites
setup
test_cache_isolation
exit "${RET}"