ipsec-2026-05-05

-----BEGIN PGP SIGNATURE-----
 
 iQIzBAABCgAdFiEEH7ZpcWbFyOOp6OJbrB3Eaf9PW7cFAmn57igACgkQrB3Eaf9P
 W7cDig//aXeIEN6VUYPU6lTDYXNCWz2A7sM636rXMMizF1nVjkRtrZlzQFwE9pIm
 LOla+Mu1VLGVsuxaoYfW2NagKt6bUg3xEDrlOt+lL/Bn6hengdjVF9PibvP4XCjt
 5bwtg0xN0AysoktYS2v+2b+fSh5CSnQkcEcn9F2d+3zXmFlLpxuyPJqhHn54nHmI
 JPACVyk9bZdKutdfr86uThgWnTDInPvJ2vMRpRlwpGWx5f2JspJv1g4zzWzc38Ad
 yTcRZQXhZ7zfOaYFGjqMD0eHtFDPC+HqMTi0Ak9ngCBAFpZS8/iBJ3/TlukJjNcy
 q805gPyRqnpiVgm6NH55C8HUguzpD7m8tcjBbVADvIrMA0OzMw3mBxwFsbG2aaCs
 cPXxvtT7crDbKPtxvY5RhVJIvCe4BCMP/uqlmo7wuwPE01arVau5i4miZKGPTzXB
 LRNchWJMDIrwE/+MnAbJBXT5RfiN5RPvPdV5OdTlrofkwDzBjpTev5FeQq7QktSx
 ctPy7I28IRw+eCKlu2FNrUJ4x8C/7Fv1ZPADOSvd3D5PdaOAArUb3RhTGwC9giuo
 qKKv8Q30x5xyOv90MB3M8vQwM7mGUloIfZPN6AhRoaDGikdMyy6gZ8Y5M3noGUUJ
 D4z+kZgHy1ZrdYDM58CdfE1Kz/s96rA5aIHUVZQYonaz35YGRts=
 =WKO1
 -----END PGP SIGNATURE-----

Merge tag 'ipsec-2026-05-05' of git://git.kernel.org/pub/scm/linux/kernel/git/klassert/ipsec

Steffen Klassert says:

====================
pull request (net): ipsec 2026-05-05

1. Fix an IPv6 encapsulation error path that leaked route references
   when UDPv6 ESP decapsulation resolved to an error route.
   From Yilin Zhu.

2. Fix AH with ESN on async crypto paths by accounting for the extra
   high-order sequence number when reconstructing the temporary
   authentication layout in the completion callbacks.
   From Michael Bomarito.

3. Fix XFRM output so it does not overwrite already-correct inner header
   pointers when a tunnel layer such as VXLAN has already saved them.
   The fix comes with new selftests. From Cosmin Ratiu.

4. Add the missing native payload size entry for XFRM_MSG_MAPPING in the
   compat translation path. From Ruijie Li.

5. Harden __xfrm_state_delete() against repeated or inconsistent unhashing
   of state list nodes by keying the removal on actual list membership and
   using delete-and-init helpers. From Michal Kosiorek.

6. Prevent ESP from decrypting shared splice-backed skb fragments in place
   by marking UDP splice frags as shared and forcing copy-on-write in ESP
   input when needed. From Kuan-Ting Chen.

* tag 'ipsec-2026-05-05' of git://git.kernel.org/pub/scm/linux/kernel/git/klassert/ipsec:
  xfrm: esp: avoid in-place decrypt on shared skb frags
  xfrm: defensively unhash xfrm_state lists in __xfrm_state_delete
  xfrm: provide message size for XFRM_MSG_MAPPING
  xfrm: Don't clobber inner headers when already set
  tools/selftests: Add a VXLAN+IPsec traffic test
  tools/selftests: Use a sensible timeout value for iperf3 client
  xfrm: ah: account for ESN high bits in async callbacks
  ipv6: xfrm6: release dst on error in xfrm6_rcv_encap()
====================

Link: https://patch.msgid.link/20260505132326.1362733-1-steffen.klassert@secunet.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
This commit is contained in:
Jakub Kicinski 2026-05-06 16:49:41 -07:00
commit dc61989e37
14 changed files with 270 additions and 20 deletions

View File

@ -124,9 +124,14 @@ static void ah_output_done(void *data, int err)
struct iphdr *top_iph = ip_hdr(skb);
struct ip_auth_hdr *ah = ip_auth_hdr(skb);
int ihl = ip_hdrlen(skb);
int seqhi_len = 0;
__be32 *seqhi;
if (x->props.flags & XFRM_STATE_ESN)
seqhi_len = sizeof(*seqhi);
iph = AH_SKB_CB(skb)->tmp;
icv = ah_tmp_icv(iph, ihl);
seqhi = (__be32 *)((char *)iph + ihl);
icv = ah_tmp_icv(seqhi, seqhi_len);
memcpy(ah->auth_data, icv, ahp->icv_trunc_len);
top_iph->tos = iph->tos;
@ -270,12 +275,17 @@ static void ah_input_done(void *data, int err)
struct ip_auth_hdr *ah = ip_auth_hdr(skb);
int ihl = ip_hdrlen(skb);
int ah_hlen = (ah->hdrlen + 2) << 2;
int seqhi_len = 0;
__be32 *seqhi;
if (err)
goto out;
if (x->props.flags & XFRM_STATE_ESN)
seqhi_len = sizeof(*seqhi);
work_iph = AH_SKB_CB(skb)->tmp;
auth_data = ah_tmp_auth(work_iph, ihl);
seqhi = (__be32 *)((char *)work_iph + ihl);
auth_data = ah_tmp_auth(seqhi, seqhi_len);
icv = ah_tmp_icv(auth_data, ahp->icv_trunc_len);
err = crypto_memneq(icv, auth_data, ahp->icv_trunc_len) ? -EBADMSG : 0;

View File

@ -873,7 +873,8 @@ static int esp_input(struct xfrm_state *x, struct sk_buff *skb)
nfrags = 1;
goto skip_cow;
} else if (!skb_has_frag_list(skb)) {
} else if (!skb_has_frag_list(skb) &&
!skb_has_shared_frag(skb)) {
nfrags = skb_shinfo(skb)->nr_frags;
nfrags++;

View File

@ -1233,6 +1233,8 @@ static int __ip_append_data(struct sock *sk,
if (err < 0)
goto error;
copy = err;
if (!(flags & MSG_NO_SHARED_FRAGS))
skb_shinfo(skb)->flags |= SKBFL_SHARED_FRAG;
wmem_alloc_delta += copy;
} else if (!zc) {
int i = skb_shinfo(skb)->nr_frags;

View File

@ -317,14 +317,19 @@ static void ah6_output_done(void *data, int err)
struct ipv6hdr *top_iph = ipv6_hdr(skb);
struct ip_auth_hdr *ah = ip_auth_hdr(skb);
struct tmp_ext *iph_ext;
int seqhi_len = 0;
__be32 *seqhi;
extlen = skb_network_header_len(skb) - sizeof(struct ipv6hdr);
if (extlen)
extlen += sizeof(*iph_ext);
if (x->props.flags & XFRM_STATE_ESN)
seqhi_len = sizeof(*seqhi);
iph_base = AH_SKB_CB(skb)->tmp;
iph_ext = ah_tmp_ext(iph_base);
icv = ah_tmp_icv(iph_ext, extlen);
seqhi = (__be32 *)((char *)iph_ext + extlen);
icv = ah_tmp_icv(seqhi, seqhi_len);
memcpy(ah->auth_data, icv, ahp->icv_trunc_len);
memcpy(top_iph, iph_base, IPV6HDR_BASELEN);
@ -471,13 +476,18 @@ static void ah6_input_done(void *data, int err)
struct ip_auth_hdr *ah = ip_auth_hdr(skb);
int hdr_len = skb_network_header_len(skb);
int ah_hlen = ipv6_authlen(ah);
int seqhi_len = 0;
__be32 *seqhi;
if (err)
goto out;
if (x->props.flags & XFRM_STATE_ESN)
seqhi_len = sizeof(*seqhi);
work_iph = AH_SKB_CB(skb)->tmp;
auth_data = ah_tmp_auth(work_iph, hdr_len);
icv = ah_tmp_icv(auth_data, ahp->icv_trunc_len);
seqhi = (__be32 *)(auth_data + ahp->icv_trunc_len);
icv = ah_tmp_icv(seqhi, seqhi_len);
err = crypto_memneq(icv, auth_data, ahp->icv_trunc_len) ? -EBADMSG : 0;
if (err)

View File

@ -915,7 +915,8 @@ static int esp6_input(struct xfrm_state *x, struct sk_buff *skb)
nfrags = 1;
goto skip_cow;
} else if (!skb_has_frag_list(skb)) {
} else if (!skb_has_frag_list(skb) &&
!skb_has_shared_frag(skb)) {
nfrags = skb_shinfo(skb)->nr_frags;
nfrags++;

View File

@ -1794,6 +1794,8 @@ static int __ip6_append_data(struct sock *sk,
if (err < 0)
goto error;
copy = err;
if (!(flags & MSG_NO_SHARED_FRAGS))
skb_shinfo(skb)->flags |= SKBFL_SHARED_FRAG;
wmem_alloc_delta += copy;
} else if (!zc) {
int i = skb_shinfo(skb)->nr_frags;

View File

@ -88,8 +88,10 @@ int xfrm6_rcv_encap(struct sk_buff *skb, int nexthdr, __be32 spi,
dst = ip6_route_input_lookup(dev_net(skb->dev), skb->dev, &fl6,
skb, flags);
if (dst->error)
if (dst->error) {
dst_release(dst);
goto drop;
}
skb_dst_set(skb, dst);
}

View File

@ -66,7 +66,9 @@ static int xfrm4_transport_output(struct xfrm_state *x, struct sk_buff *skb)
struct iphdr *iph = ip_hdr(skb);
int ihl = iph->ihl * 4;
skb_set_inner_transport_header(skb, skb_transport_offset(skb));
if (!skb->inner_protocol)
skb_set_inner_transport_header(skb,
skb_transport_offset(skb));
skb_set_network_header(skb, -x->props.header_len);
skb->mac_header = skb->network_header +
@ -167,7 +169,9 @@ static int xfrm6_transport_output(struct xfrm_state *x, struct sk_buff *skb)
int hdr_len;
iph = ipv6_hdr(skb);
skb_set_inner_transport_header(skb, skb_transport_offset(skb));
if (!skb->inner_protocol)
skb_set_inner_transport_header(skb,
skb_transport_offset(skb));
hdr_len = xfrm6_hdr_offset(x, skb, &prevhdr);
if (hdr_len < 0)
@ -276,8 +280,10 @@ static int xfrm4_tunnel_encap_add(struct xfrm_state *x, struct sk_buff *skb)
struct iphdr *top_iph;
int flags;
skb_set_inner_network_header(skb, skb_network_offset(skb));
skb_set_inner_transport_header(skb, skb_transport_offset(skb));
if (!skb->inner_protocol) {
skb_set_inner_network_header(skb, skb_network_offset(skb));
skb_set_inner_transport_header(skb, skb_transport_offset(skb));
}
skb_set_network_header(skb, -x->props.header_len);
skb->mac_header = skb->network_header +
@ -321,8 +327,10 @@ static int xfrm6_tunnel_encap_add(struct xfrm_state *x, struct sk_buff *skb)
struct ipv6hdr *top_iph;
int dsfield;
skb_set_inner_network_header(skb, skb_network_offset(skb));
skb_set_inner_transport_header(skb, skb_transport_offset(skb));
if (!skb->inner_protocol) {
skb_set_inner_network_header(skb, skb_network_offset(skb));
skb_set_inner_transport_header(skb, skb_transport_offset(skb));
}
skb_set_network_header(skb, -x->props.header_len);
skb->mac_header = skb->network_header +

View File

@ -818,17 +818,17 @@ int __xfrm_state_delete(struct xfrm_state *x)
spin_lock(&net->xfrm.xfrm_state_lock);
list_del(&x->km.all);
hlist_del_rcu(&x->bydst);
hlist_del_rcu(&x->bysrc);
if (x->km.seq)
hlist_del_rcu(&x->byseq);
hlist_del_init_rcu(&x->bydst);
hlist_del_init_rcu(&x->bysrc);
if (!hlist_unhashed(&x->byseq))
hlist_del_init_rcu(&x->byseq);
if (!hlist_unhashed(&x->state_cache))
hlist_del_rcu(&x->state_cache);
if (!hlist_unhashed(&x->state_cache_input))
hlist_del_rcu(&x->state_cache_input);
if (x->id.spi)
hlist_del_rcu(&x->byspi);
if (!hlist_unhashed(&x->byspi))
hlist_del_init_rcu(&x->byspi);
net->xfrm.state_num--;
xfrm_nat_keepalive_state_updated(x);
spin_unlock(&net->xfrm.xfrm_state_lock);

View File

@ -3323,6 +3323,7 @@ const int xfrm_msg_min[XFRM_NR_MSGTYPES] = {
[XFRM_MSG_GETSADINFO - XFRM_MSG_BASE] = sizeof(u32),
[XFRM_MSG_NEWSPDINFO - XFRM_MSG_BASE] = sizeof(u32),
[XFRM_MSG_GETSPDINFO - XFRM_MSG_BASE] = sizeof(u32),
[XFRM_MSG_MAPPING - XFRM_MSG_BASE] = XMSGSIZE(xfrm_user_mapping),
[XFRM_MSG_SETDEFAULT - XFRM_MSG_BASE] = XMSGSIZE(xfrm_userpolicy_default),
[XFRM_MSG_GETDEFAULT - XFRM_MSG_BASE] = XMSGSIZE(xfrm_userpolicy_default),
};

View File

@ -30,6 +30,7 @@ TEST_PROGS = \
gro_hw.py \
hw_stats_l3.sh \
hw_stats_l3_gre.sh \
ipsec_vxlan.py \
iou-zcrx.py \
irq.py \
loopback.sh \

View File

@ -12,5 +12,10 @@ CONFIG_NET_IPGRE=y
CONFIG_NET_IPGRE_DEMUX=y
CONFIG_NETKIT=y
CONFIG_NET_SCH_INGRESS=y
CONFIG_INET6_ESP=y
CONFIG_INET6_ESP_OFFLOAD=y
CONFIG_INET_ESP=y
CONFIG_INET_ESP_OFFLOAD=y
CONFIG_UDMABUF=y
CONFIG_VXLAN=y
CONFIG_XFRM_USER=y

View File

@ -0,0 +1,204 @@
#!/usr/bin/env python3
# SPDX-License-Identifier: GPL-2.0
"""Traffic test for VXLAN + IPsec crypto-offload."""
import os
from lib.py import ksft_run, ksft_exit, ksft_eq, ksft_ge
from lib.py import ksft_variants, KsftNamedVariant, KsftSkipEx
from lib.py import CmdExitFailure, NetDrvEpEnv, cmd, defer, ethtool, ip
from lib.py import Iperf3Runner
# Inner tunnel addresses - TEST-NET-2 (RFC 5737) / doc prefix (RFC 3849)
INNER_V4_LOCAL = "198.51.100.1"
INNER_V4_REMOTE = "198.51.100.2"
INNER_V6_LOCAL = "2001:db8:100::1"
INNER_V6_REMOTE = "2001:db8:100::2"
# ESP parameters
SPI_OUT = "0x1000"
SPI_IN = "0x1001"
# 128-bit key + 32-bit salt = 20 bytes hex, 128-bit ICV
ESP_AEAD = "aead 'rfc4106(gcm(aes))' 0x" + "01" * 20 + " 128"
def xfrm(args, host=None):
"""Runs 'ip xfrm' via shell to preserve parentheses in algo names."""
cmd(f"ip xfrm {args}", shell=True, host=host)
def check_xfrm_offload_support():
"""Skips if iproute2 lacks xfrm offload support."""
out = cmd("ip xfrm state help", fail=False)
if "offload" not in out.stdout + out.stderr:
raise KsftSkipEx("iproute2 too old, missing xfrm offload")
def check_esp_hw_offload(cfg):
"""Skips if device lacks esp-hw-offload support."""
check_xfrm_offload_support()
try:
feat = ethtool(f"-k {cfg.ifname}", json=True)[0]
except (CmdExitFailure, IndexError) as e:
raise KsftSkipEx(f"can't query features: {e}") from e
if not feat.get("esp-hw-offload", {}).get("active"):
raise KsftSkipEx("Device does not support esp-hw-offload")
def get_tx_drops(cfg):
"""Returns TX dropped counter from the physical device."""
stats = ip("-s -s link show dev " + cfg.ifname, json=True)[0]
return stats["stats64"]["tx"]["dropped"]
def setup_vxlan_ipsec(cfg, outer_ipver, inner_ipver):
"""Sets up VXLAN tunnel with IPsec transport-mode crypto-offload."""
vxlan_name = f"vx{os.getpid()}"
local_addr = cfg.addr_v[outer_ipver]
remote_addr = cfg.remote_addr_v[outer_ipver]
if inner_ipver == "4":
inner_local = f"{INNER_V4_LOCAL}/24"
inner_remote = f"{INNER_V4_REMOTE}/24"
addr_extra = ""
else:
inner_local = f"{INNER_V6_LOCAL}/64"
inner_remote = f"{INNER_V6_REMOTE}/64"
addr_extra = " nodad"
if outer_ipver == "6":
vxlan_opts = "udp6zerocsumtx udp6zerocsumrx"
else:
vxlan_opts = "noudpcsum"
# VXLAN tunnel - local side
ip(f"link add {vxlan_name} type vxlan id 100 dstport 4789 {vxlan_opts} "
f"local {local_addr} remote {remote_addr} dev {cfg.ifname}")
defer(ip, f"link del {vxlan_name}")
ip(f"addr add {inner_local} dev {vxlan_name}{addr_extra}")
ip(f"link set {vxlan_name} up")
# VXLAN tunnel - remote side
ip(f"link add {vxlan_name} type vxlan id 100 dstport 4789 {vxlan_opts} "
f"local {remote_addr} remote {local_addr} dev {cfg.remote_ifname}",
host=cfg.remote)
defer(ip, f"link del {vxlan_name}", host=cfg.remote)
ip(f"addr add {inner_remote} dev {vxlan_name}{addr_extra}",
host=cfg.remote)
ip(f"link set {vxlan_name} up", host=cfg.remote)
# xfrm state - local outbound SA
xfrm(f"state add src {local_addr} dst {remote_addr} "
f"proto esp spi {SPI_OUT} "
f"{ESP_AEAD} "
f"mode transport offload crypto dev {cfg.ifname} dir out")
defer(xfrm, f"state del src {local_addr} dst {remote_addr} "
f"proto esp spi {SPI_OUT}")
# xfrm state - local inbound SA
xfrm(f"state add src {remote_addr} dst {local_addr} "
f"proto esp spi {SPI_IN} "
f"{ESP_AEAD} "
f"mode transport offload crypto dev {cfg.ifname} dir in")
defer(xfrm, f"state del src {remote_addr} dst {local_addr} "
f"proto esp spi {SPI_IN}")
# xfrm state - remote outbound SA (mirror, software crypto)
xfrm(f"state add src {remote_addr} dst {local_addr} "
f"proto esp spi {SPI_IN} "
f"{ESP_AEAD} "
f"mode transport",
host=cfg.remote)
defer(xfrm, f"state del src {remote_addr} dst {local_addr} "
f"proto esp spi {SPI_IN}", host=cfg.remote)
# xfrm state - remote inbound SA (mirror, software crypto)
xfrm(f"state add src {local_addr} dst {remote_addr} "
f"proto esp spi {SPI_OUT} "
f"{ESP_AEAD} "
f"mode transport",
host=cfg.remote)
defer(xfrm, f"state del src {local_addr} dst {remote_addr} "
f"proto esp spi {SPI_OUT}", host=cfg.remote)
# xfrm policy - local out
xfrm(f"policy add src {local_addr} dst {remote_addr} "
f"proto udp dport 4789 dir out "
f"tmpl src {local_addr} dst {remote_addr} proto esp mode transport")
defer(xfrm, f"policy del src {local_addr} dst {remote_addr} "
f"proto udp dport 4789 dir out")
# xfrm policy - local in
xfrm(f"policy add src {remote_addr} dst {local_addr} "
f"proto udp dport 4789 dir in "
f"tmpl src {remote_addr} dst {local_addr} proto esp mode transport")
defer(xfrm, f"policy del src {remote_addr} dst {local_addr} "
f"proto udp dport 4789 dir in")
# xfrm policy - remote out
xfrm(f"policy add src {remote_addr} dst {local_addr} "
f"proto udp dport 4789 dir out "
f"tmpl src {remote_addr} dst {local_addr} proto esp mode transport",
host=cfg.remote)
defer(xfrm, f"policy del src {remote_addr} dst {local_addr} "
f"proto udp dport 4789 dir out", host=cfg.remote)
# xfrm policy - remote in
xfrm(f"policy add src {local_addr} dst {remote_addr} "
f"proto udp dport 4789 dir in "
f"tmpl src {local_addr} dst {remote_addr} proto esp mode transport",
host=cfg.remote)
defer(xfrm, f"policy del src {local_addr} dst {remote_addr} "
f"proto udp dport 4789 dir in", host=cfg.remote)
def _vxlan_ipsec_variants():
"""Generates outer/inner IP version variants."""
for outer in ["4", "6"]:
for inner in ["4", "6"]:
yield KsftNamedVariant(f"outer_v{outer}_inner_v{inner}", outer, inner)
@ksft_variants(_vxlan_ipsec_variants())
def test_vxlan_ipsec_crypto_offload(cfg, outer_ipver, inner_ipver):
"""Tests VXLAN+IPsec crypto-offload has no TX drops."""
cfg.require_ipver(outer_ipver)
check_esp_hw_offload(cfg)
setup_vxlan_ipsec(cfg, outer_ipver, inner_ipver)
if inner_ipver == "4":
inner_local = INNER_V4_LOCAL
inner_remote = INNER_V4_REMOTE
ping = "ping"
else:
inner_local = INNER_V6_LOCAL
inner_remote = INNER_V6_REMOTE
ping = "ping -6"
cmd(f"{ping} -c 1 -W 2 {inner_remote}")
drops_before = get_tx_drops(cfg)
runner = Iperf3Runner(cfg, server_ip=inner_local,
client_ip=inner_remote)
bw_gbps = runner.measure_bandwidth(reverse=True)
cfg.wait_hw_stats_settle()
drops_after = get_tx_drops(cfg)
ksft_eq(drops_after - drops_before, 0,
comment="TX drops during VXLAN+IPsec")
ksft_ge(bw_gbps, 0.1,
comment="Minimum 100Mbps over VXLAN+IPsec")
def main():
"""Runs VXLAN+IPsec crypto-offload GSO selftest."""
with NetDrvEpEnv(__file__, nsim_test=False) as cfg:
ksft_run([test_vxlan_ipsec_crypto_offload], args=(cfg,))
ksft_exit()
if __name__ == "__main__":
main()

View File

@ -48,7 +48,10 @@ class Iperf3Runner:
Starts the iperf3 client with the configured options.
"""
cmdline = self._build_client(streams, duration, reverse)
return cmd(cmdline, background=background, host=self.env.remote)
kwargs = {"background": background, "host": self.env.remote}
if not background:
kwargs["timeout"] = duration + 5
return cmd(cmdline, **kwargs)
def measure_bandwidth(self, reverse=False):
"""