netfilter pull request nf-next-25-09-24

-----BEGIN PGP SIGNATURE-----
 
 iQJBBAABCAArFiEEgKkgxbID4Gn1hq6fcJGo2a1f9gAFAmjT71ENHGZ3QHN0cmxl
 bi5kZQAKCRBwkajZrV/2AN7OEAClGm5eABrjTlHq8REn7S4xUBo9/jOESTvg22Bg
 A4qu14OV7rGMT/mLyV0kuNzHs18NT9bhpTwANL0bjz7YOK+v8QRlsHzNaJ7dzJ00
 WUPPYFnAhCA3qTCWLLvCnzpZgAopnRpFAgpXP6ddZ2kpsT+fo5B67kcasOQoVJzz
 9Kk98r0kgo6YKghmaIhBgchTt+rxZbo4R0Jb9BoBPXeQ32AghacOlngXPiAJA2d/
 Yeqet5p0gg8oI62KhhBC+uXyR29DG4c3iD2pUUhPnxV1Gt3S6AG1PTkt0KYzqVKm
 XEIxjrmtn3+2NYcaaH340mQwBbBV1EdZroHeBPUN9gYr5MpkHjppBJyrQ76cXXcr
 xKw2C357H4O8tT2nxz4LvJagRKhnZa0P6vWL6HeFIlFHXOqEJXYG6X2jbDzomUXd
 NPQrwKJnf2iYiRcc4U5xyABZE98/kVmqcclJDs0Zxz0eX8N8e26d/wjFKAZxfJyY
 pCUjF7lo7Doq6sOiB25OZjbPN3Fz/BcBTYTCTK+8MuQM1EaKXm33RcWjtO2gDUoe
 vyGfQResdwFELj9niPf27Nymezm1nX27x6f/jqCmioLxqQDGAGPsgJIMHy9XO8wz
 3YU/92dbwBdOQ6laKT7f5ClfPGDQlpqQTXXyxVHLFUd3T6JHJGUv0AplwVNZk6l8
 KCXEWg==
 =uK7h
 -----END PGP SIGNATURE-----

Merge tag 'nf-next-25-09-24' of https://git.kernel.org/pub/scm/linux/kernel/git/netfilter/nf-next

Florian Westphal says:

====================
netfilter: fixes for net-next

These fixes target next because the bug is either not severe or has
existed for so long that there is no reason to cram them in at the last
minute.

1) Fix IPVS ftp unregistering during netns cleanup, broken since netns
   support was introduced in 2011 in the 2.6.39 kernel.
   From Slavin Liu.

2) nfnetlink must reset the 'nlh' pointer back to the original
   address when a batch is replayed, else we emit bogus ACK messages
   and conceal real errno from userspace.
   From Fernando Fernandez Mancera.  This was broken since 6.10.

3) Recent fix for nftables 'pipapo' set type was incomplete, it only
   made things work for the AVX2 version of the algorithm.

4) Testing revealed another problem with avx2 version that results in
   out-of-bounds read access, this bug always existed since feature was
   added in 5.7 kernel.  This also comes with a selftest update.

Last fix resolves a long-standing bug (since 4.9) in conntrack /proc
interface:
Decrease skip count when we reap an expired entry during dump.
As-is we erronously elide one conntrack entry from dump for every expired
entry seen.  From Eric Dumazet.

* tag 'nf-next-25-09-24' of https://git.kernel.org/pub/scm/linux/kernel/git/netfilter/nf-next:
  netfilter: nf_conntrack: do not skip entries in /proc/net/nf_conntrack
  selftests: netfilter: nft_concat_range.sh: add check for double-create bug
  netfilter: nft_set_pipapo_avx2: fix skip of expired entries
  netfilter: nft_set_pipapo: use 0 genmask for packetpath lookups
  netfilter: nfnetlink: reset nlh pointer during batch replay
  ipvs: Defer ip_vs_ftp unregister during netns cleanup
====================

Link: https://patch.msgid.link/20250924140654.10210-1-fw@strlen.de
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
This commit is contained in:
Jakub Kicinski 2025-09-24 17:45:14 -07:00
commit c7ab8024ca
6 changed files with 73 additions and 10 deletions

View File

@ -53,6 +53,7 @@ enum {
IP_VS_FTP_EPSV,
};
static bool exiting_module;
/*
* List of ports (up to IP_VS_APP_MAX_PORTS) to be handled by helper
* First port is set to the default port.
@ -605,7 +606,7 @@ static void __ip_vs_ftp_exit(struct net *net)
{
struct netns_ipvs *ipvs = net_ipvs(net);
if (!ipvs)
if (!ipvs || !exiting_module)
return;
unregister_ip_vs_app(ipvs, &ip_vs_ftp);
@ -627,6 +628,7 @@ static int __init ip_vs_ftp_init(void)
*/
static void __exit ip_vs_ftp_exit(void)
{
exiting_module = true;
unregister_pernet_subsys(&ip_vs_ftp_ops);
/* rcu_barrier() is called by netns */
}

View File

@ -317,6 +317,9 @@ static int ct_seq_show(struct seq_file *s, void *v)
smp_acquire__after_ctrl_dep();
if (nf_ct_should_gc(ct)) {
struct ct_iter_state *st = s->private;
st->skip_elems--;
nf_ct_kill(ct);
goto release;
}

View File

@ -376,6 +376,7 @@ static void nfnetlink_rcv_batch(struct sk_buff *skb, struct nlmsghdr *nlh,
const struct nfnetlink_subsystem *ss;
const struct nfnl_callback *nc;
struct netlink_ext_ack extack;
struct nlmsghdr *onlh = nlh;
LIST_HEAD(err_list);
u32 status;
int err;
@ -386,6 +387,7 @@ static void nfnetlink_rcv_batch(struct sk_buff *skb, struct nlmsghdr *nlh,
status = 0;
replay_abort:
skb = netlink_skb_clone(oskb, GFP_KERNEL);
nlh = onlh;
if (!skb)
return netlink_ack(oskb, nlh, -ENOMEM, NULL);

View File

@ -549,8 +549,7 @@ static struct nft_pipapo_elem *pipapo_get(const struct nft_pipapo_match *m,
*
* This function is called from the data path. It will search for
* an element matching the given key in the current active copy.
* Unlike other set types, this uses NFT_GENMASK_ANY instead of
* nft_genmask_cur().
* Unlike other set types, this uses 0 instead of nft_genmask_cur().
*
* This is because new (future) elements are not reachable from
* priv->match, they get added to priv->clone instead.
@ -560,8 +559,8 @@ static struct nft_pipapo_elem *pipapo_get(const struct nft_pipapo_match *m,
* inconsistent state: matching old entries get skipped but thew
* newly matching entries are unreachable.
*
* GENMASK will still find the 'now old' entries which ensures consistent
* priv->match view.
* GENMASK_ANY doesn't work for the same reason: old-gen entries get
* skipped, new-gen entries are only reachable from priv->clone.
*
* nft_pipapo_commit swaps ->clone and ->match shortly after the
* genbit flip. As ->clone doesn't contain the old entries in the first
@ -578,7 +577,7 @@ nft_pipapo_lookup(const struct net *net, const struct nft_set *set,
const struct nft_pipapo_elem *e;
m = rcu_dereference(priv->match);
e = pipapo_get_slow(m, (const u8 *)key, NFT_GENMASK_ANY, get_jiffies_64());
e = pipapo_get_slow(m, (const u8 *)key, 0, get_jiffies_64());
return e ? &e->ext : NULL;
}

View File

@ -1179,7 +1179,6 @@ struct nft_pipapo_elem *pipapo_get_avx2(const struct nft_pipapo_match *m,
nft_pipapo_avx2_prepare();
next_match:
nft_pipapo_for_each_field(f, i, m) {
bool last = i == m->field_count - 1, first = !i;
int ret = 0;
@ -1226,6 +1225,7 @@ struct nft_pipapo_elem *pipapo_get_avx2(const struct nft_pipapo_match *m,
#undef NFT_SET_PIPAPO_AVX2_LOOKUP
next_match:
if (ret < 0) {
scratch->map_index = map_index;
kernel_fpu_end();
@ -1238,8 +1238,11 @@ struct nft_pipapo_elem *pipapo_get_avx2(const struct nft_pipapo_match *m,
e = f->mt[ret].e;
if (unlikely(__nft_set_elem_expired(&e->ext, tstamp) ||
!nft_set_elem_active(&e->ext, genmask)))
!nft_set_elem_active(&e->ext, genmask))) {
ret = pipapo_refill(res, f->bsize, f->rules,
fill, f->mt, last);
goto next_match;
}
scratch->map_index = map_index;
kernel_fpu_end();
@ -1292,7 +1295,7 @@ nft_pipapo_avx2_lookup(const struct net *net, const struct nft_set *set,
m = rcu_dereference(priv->match);
e = pipapo_get_avx2(m, rp, NFT_GENMASK_ANY, get_jiffies_64());
e = pipapo_get_avx2(m, rp, 0, get_jiffies_64());
local_bh_enable();
return e ? &e->ext : NULL;

View File

@ -29,7 +29,7 @@ TYPES="net_port port_net net6_port port_proto net6_port_mac net6_port_mac_proto
net6_port_net6_port net_port_mac_proto_net"
# Reported bugs, also described by TYPE_ variables below
BUGS="flush_remove_add reload net_port_proto_match avx2_mismatch"
BUGS="flush_remove_add reload net_port_proto_match avx2_mismatch doublecreate"
# List of possible paths to pktgen script from kernel tree for performance tests
PKTGEN_SCRIPT_PATHS="
@ -408,6 +408,18 @@ perf_duration 0
"
TYPE_doublecreate="
display cannot create same element twice
type_spec ipv4_addr . ipv4_addr
chain_spec ip saddr . ip daddr
dst addr4
proto icmp
race_repeat 0
perf_duration 0
"
# Set template for all tests, types and rules are filled in depending on test
set_template='
flush ruleset
@ -1900,6 +1912,48 @@ test_bug_avx2_mismatch()
fi
}
test_bug_doublecreate()
{
local elements="1.2.3.4 . 1.2.4.1, 1.2.4.1 . 1.2.3.4"
local ret=1
local i
setup veth send_"${proto}" set || return ${ksft_skip}
add "{ $elements }" || return 1
# expected to work: 'add' on existing should be no-op.
add "{ $elements }" || return 1
# 'create' should return an error.
if nft create element inet filter test "{ $elements }" 2>/dev/null; then
err "Could create an existing element"
return 1
fi
nft -f - <<EOF 2>/dev/null
flush set inet filter test
create element inet filter test { $elements }
create element inet filter test { $elements }
EOF
ret=$?
if [ $ret -eq 0 ]; then
err "Could create element twice in one transaction"
err "$(nft -a list ruleset)"
return 1
fi
nft -f - <<EOF 2>/dev/null
flush set inet filter test
create element inet filter test { $elements }
EOF
ret=$?
if [ $ret -ne 0 ]; then
err "Could not flush and re-create element in one transaction"
return 1
fi
return 0
}
test_reported_issues() {
eval test_bug_"${subtest}"
}