netfilter pull request nf-next-26-01-20

-----BEGIN PGP SIGNATURE-----
 
 iQJdBAABCABHFiEEgKkgxbID4Gn1hq6fcJGo2a1f9gAFAmlvnjQbFIAAAAAABAAO
 bWFudTIsMi41KzEuMTEsMiwyDRxmd0BzdHJsZW4uZGUACgkQcJGo2a1f9gBGtQ//
 fpGuA96XbcQVHDAkOYYAsjkHk4DPJvTdL4m4Pnw5SO3m5lVq0kw5Cp+6drv3q/Pd
 pMTuAUliVDOlK7wYemsThv/DgzqSO93uxrqTeX2J4tb/TgVYA9040aAfvWKo76iE
 GxSqfM255cMOAJ2zpBbgP3WfwiklGBlB7phPDTP3yoxbwH6TdtDCxcpVJ+M8wVN4
 7CsRY3P8ZWZR1lW2V7sHERRABdsVfRmEtlFCEP+WARKHtkTyMZeqRsUtk3f50iRB
 AeEm3Uryj+q5s2Uof+uO8Lu0RxaBezJID4JksbWXEc/bsxaGKroPXx1qUsruwAJP
 1TW+HL2yJx1xIydinoKFSD6PE7as0LeRvwCLFNbOqTrGefpPFX7sIdQNb/qMh1IN
 JpU0O0cwhWPYKjXD8pGcVNqTFs9CABRGSZBRUkSKMhWqwF1Hu0habF4nL70QkCqv
 FuhrelmNY/pDn7X5EQRII7cZMAxEL2lFtv+HERwZH2uZvDdMjE6Fu/+NdGQT4bCe
 d95dlnd1UkpMhI2CPsDKACXb5aqA5apWb7+2F5WcXzlI7XmNcHJksY8OVkjB0r7p
 +6IeBAYLPEUv+PYsR6g7vf6pAHA6I/axkMoK4vFXRnU3POnrVyZh3JHL/CChokWj
 cy8BYZukYSqvOnEoPJRpWkwO8opWDZmT5gIUSqHgKGk=
 =QWao
 -----END PGP SIGNATURE-----

Merge tag 'nf-next-26-01-20' of https://git.kernel.org/pub/scm/linux/kernel/git/netfilter/nf-next

Florian Westphal says:

====================
Subject: netfilter: updates for net-next

1) Speed up nftables transactions after earlier transaction failed.
   Due to a (harmeless) bug we remained in slow paranoia mode until
   a successful transaction completes.

2) Allow generic tracker to resolve clashes, this avoids very rare
   packet drops.  From Yuto Hamaguchi.

3) Increase the cleanup budget to 64 entries in nf_conncount to reap
   more entries in one go, from Fernando Fernandez Mancera.

4) Allow icmp trackers to resolve clashes, this avoids very rare
   initial packet drop with test cases that have high-frequency pings.
   After this all trackers except tcp and sctp allow clash resolution.

5) Disentangle netfilter headers, don't include nftables/xtables headers
   in subsystems that are unrelated.

6) Don't rely on implicit includes coming from nf_conntrack_proto_gre.h.

7) Allow nfnetlink_queue nfq instance struct to get accounted via memcg,
   from Scott Mitchell.

8) Reject bogus xt target/match data upfront via netlink policiy in
   nft_compat interface rather than relying on x_tables API to do it.

9) Fix nf_conncount breakage when trying to limit loopback flows via
   prerouting rule, from Fernando Fernandez Mancera.
   This is a recent breakage but not seen as urgent enough to rush this
   via net tree at this late stage in development cycle.

10) Fix a possible off-by-one when parsing tcp option in xtables tcpmss
    match.  Also handled via -next due to late stage in development
    cycle.

* tag 'nf-next-26-01-20' of https://git.kernel.org/pub/scm/linux/kernel/git/netfilter/nf-next:
  netfilter: xt_tcpmss: check remaining length before reading optlen
  netfilter: nf_conncount: fix tracking of connections from localhost
  netfilter: nft_compat: add more restrictions on netlink attributes
  netfilter: nfnetlink_queue: nfqnl_instance GFP_ATOMIC -> GFP_KERNEL_ACCOUNT allocation
  netfilter: nf_conntrack: don't rely on implicit includes
  netfilter: don't include xt and nftables.h in unrelated subsystems
  netfilter: nf_conntrack: enable icmp clash support
  netfilter: nf_conncount: increase the connection clean up limit to 64
  netfilter: nf_conntrack: Add allow_clash to generic protocol handler
  netfilter: nf_tables: reset table validation state on abort
====================

Link: https://patch.msgid.link/20260120191803.22208-1-fw@strlen.de
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
This commit is contained in:
Jakub Kicinski 2026-01-21 20:23:11 -08:00
commit a7c708dc0d
29 changed files with 102 additions and 60 deletions

View File

@ -13,7 +13,6 @@
#include <linux/ptrace.h>
#include <linux/audit_arch.h>
#include <uapi/linux/audit.h>
#include <uapi/linux/netfilter/nf_tables.h>
#include <uapi/linux/fanotify.h>
#define AUDIT_INO_UNSET ((unsigned long)-1)

View File

@ -1,9 +1,6 @@
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _CONNTRACK_PROTO_GRE_H
#define _CONNTRACK_PROTO_GRE_H
#include <asm/byteorder.h>
#include <net/gre.h>
#include <net/pptp.h>
struct nf_ct_gre {
unsigned int stream_timeout;

View File

@ -16,6 +16,7 @@
#include <linux/bitops.h>
#include <linux/compiler.h>
#include <net/netns/generic.h>
#include <linux/netfilter/nf_conntrack_common.h>
#include <linux/netfilter/nf_conntrack_tcp.h>
#include <linux/netfilter/nf_conntrack_sctp.h>

View File

@ -13,6 +13,7 @@ struct nf_conncount_list {
u32 last_gc; /* jiffies at most recent gc */
struct list_head head; /* connections with the same filtering key */
unsigned int count; /* length of list */
unsigned int last_gc_count; /* length of list at most recent gc */
};
struct nf_conncount_data *nf_conncount_init(struct net *net, unsigned int keylen);

View File

@ -11,7 +11,7 @@
#ifndef _NF_CONNTRACK_TUPLE_H
#define _NF_CONNTRACK_TUPLE_H
#include <linux/netfilter/x_tables.h>
#include <linux/netfilter.h>
#include <linux/netfilter/nf_conntrack_tuple_common.h>
#include <linux/list_nulls.h>

View File

@ -6,7 +6,6 @@
#include <linux/list.h>
#include <linux/netfilter.h>
#include <linux/netfilter/nfnetlink.h>
#include <linux/netfilter/x_tables.h>
#include <linux/netfilter/nf_tables.h>
#include <linux/u64_stats_sync.h>
#include <linux/rhashtable.h>

View File

@ -16,8 +16,7 @@
#include <net/netfilter/nf_conntrack_helper.h>
#include <net/netfilter/nf_conntrack_bridge.h>
#include <linux/netfilter/nf_tables.h>
#include <net/netfilter/nf_tables.h>
#include <linux/netfilter_ipv4.h>
#include "../br_private.h"

View File

@ -34,8 +34,9 @@
#define CONNCOUNT_SLOTS 256U
#define CONNCOUNT_GC_MAX_NODES 8
#define MAX_KEYLEN 5
#define CONNCOUNT_GC_MAX_NODES 8
#define CONNCOUNT_GC_MAX_COLLECT 64
#define MAX_KEYLEN 5
/* we will save the tuples of all connections we care about */
struct nf_conncount_tuple {
@ -178,16 +179,28 @@ static int __nf_conncount_add(struct net *net,
return -ENOENT;
if (ct && nf_ct_is_confirmed(ct)) {
err = -EEXIST;
goto out_put;
/* local connections are confirmed in postrouting so confirmation
* might have happened before hitting connlimit
*/
if (skb->skb_iif != LOOPBACK_IFINDEX) {
err = -EEXIST;
goto out_put;
}
/* this is likely a local connection, skip optimization to avoid
* adding duplicates from a 'packet train'
*/
goto check_connections;
}
if ((u32)jiffies == list->last_gc)
if ((u32)jiffies == list->last_gc &&
(list->count - list->last_gc_count) < CONNCOUNT_GC_MAX_COLLECT)
goto add_new_node;
check_connections:
/* check the saved connections */
list_for_each_entry_safe(conn, conn_n, &list->head, node) {
if (collect > CONNCOUNT_GC_MAX_NODES)
if (collect > CONNCOUNT_GC_MAX_COLLECT)
break;
found = find_or_evict(net, list, conn);
@ -230,6 +243,7 @@ static int __nf_conncount_add(struct net *net,
nf_ct_put(found_ct);
}
list->last_gc = (u32)jiffies;
list->last_gc_count = list->count;
add_new_node:
if (WARN_ON_ONCE(list->count > INT_MAX)) {
@ -277,6 +291,7 @@ void nf_conncount_list_init(struct nf_conncount_list *list)
spin_lock_init(&list->list_lock);
INIT_LIST_HEAD(&list->head);
list->count = 0;
list->last_gc_count = 0;
list->last_gc = (u32)jiffies;
}
EXPORT_SYMBOL_GPL(nf_conncount_list_init);
@ -316,13 +331,14 @@ static bool __nf_conncount_gc_list(struct net *net,
}
nf_ct_put(found_ct);
if (collected > CONNCOUNT_GC_MAX_NODES)
if (collected > CONNCOUNT_GC_MAX_COLLECT)
break;
}
if (!list->count)
ret = true;
list->last_gc = (u32)jiffies;
list->last_gc_count = list->count;
return ret;
}

View File

@ -14,6 +14,7 @@
#include <linux/types.h>
#include <linux/btf_ids.h>
#include <linux/net_namespace.h>
#include <net/sock.h>
#include <net/xdp.h>
#include <net/netfilter/nf_conntrack_bpf.h>
#include <net/netfilter/nf_conntrack_core.h>

View File

@ -23,6 +23,7 @@
#include <linux/skbuff.h>
#include <net/route.h>
#include <net/ip6_route.h>
#include <linux/netfilter_ipv4.h>
#include <linux/netfilter_ipv6.h>
#include <net/netfilter/nf_conntrack.h>

View File

@ -32,6 +32,7 @@
#include <linux/siphash.h>
#include <linux/netfilter.h>
#include <net/ipv6.h>
#include <net/netlink.h>
#include <net/sock.h>
#include <net/netfilter/nf_conntrack.h>

View File

@ -67,6 +67,7 @@ void nf_conntrack_generic_init_net(struct net *net)
const struct nf_conntrack_l4proto nf_conntrack_l4proto_generic =
{
.l4proto = 255,
.allow_clash = true,
#ifdef CONFIG_NF_CONNTRACK_TIMEOUT
.ctnl_timeout = {
.nlattr_to_obj = generic_timeout_nlattr_to_obj,

View File

@ -33,12 +33,14 @@
#include <linux/skbuff.h>
#include <linux/slab.h>
#include <net/dst.h>
#include <net/gre.h>
#include <net/net_namespace.h>
#include <net/netns/generic.h>
#include <net/netfilter/nf_conntrack_l4proto.h>
#include <net/netfilter/nf_conntrack_helper.h>
#include <net/netfilter/nf_conntrack_core.h>
#include <net/netfilter/nf_conntrack_timeout.h>
#include <net/pptp.h>
#include <linux/netfilter/nf_conntrack_proto_gre.h>
#include <linux/netfilter/nf_conntrack_pptp.h>

View File

@ -365,6 +365,7 @@ void nf_conntrack_icmp_init_net(struct net *net)
const struct nf_conntrack_l4proto nf_conntrack_l4proto_icmp =
{
.l4proto = IPPROTO_ICMP,
.allow_clash = true,
#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
.tuple_to_nlattr = icmp_tuple_to_nlattr,
.nlattr_tuple_size = icmp_nlattr_tuple_size,

View File

@ -343,6 +343,7 @@ void nf_conntrack_icmpv6_init_net(struct net *net)
const struct nf_conntrack_l4proto nf_conntrack_l4proto_icmpv6 =
{
.l4proto = IPPROTO_ICMPV6,
.allow_clash = true,
#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
.tuple_to_nlattr = icmpv6_tuple_to_nlattr,
.nlattr_tuple_size = icmpv6_nlattr_tuple_size,

View File

@ -8,6 +8,8 @@
#include <linux/ipv6.h>
#include <linux/netdevice.h>
#include <linux/if_ether.h>
#include <linux/if_vlan.h>
#include <net/gre.h>
#include <net/gso.h>
#include <net/ip.h>
#include <net/ipv6.h>

View File

@ -6,6 +6,7 @@
#include <linux/netdevice.h>
#include <linux/tc_act/tc_csum.h>
#include <net/flow_offload.h>
#include <net/ip_tunnels.h>
#include <net/netfilter/nf_flow_table.h>
#include <net/netfilter/nf_tables.h>
#include <net/netfilter/nf_conntrack.h>

View File

@ -2,6 +2,7 @@
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/init.h>
#include <linux/etherdevice.h>
#include <linux/netlink.h>
#include <linux/netfilter.h>
#include <linux/spinlock.h>

View File

@ -2,6 +2,9 @@
/* Support nat functions for openvswitch and used by OVS and TC conntrack. */
#include <net/netfilter/nf_nat.h>
#include <net/ipv6.h>
#include <linux/ip.h>
#include <linux/if_vlan.h>
/* Modelled after nf_nat_ipv[46]_fn().
* range is only used for new, uninitialized NAT state.

View File

@ -25,6 +25,7 @@
#include <net/ip6_route.h>
#include <net/xfrm.h>
#include <net/ipv6.h>
#include <net/pptp.h>
#include <net/netfilter/nf_conntrack_core.h>
#include <net/netfilter/nf_conntrack.h>

View File

@ -10,6 +10,7 @@
#include <net/netns/generic.h>
#include <linux/proc_fs.h>
#include <linux/netfilter_ipv4.h>
#include <linux/netfilter_ipv6.h>
#include <linux/netfilter/nf_synproxy.h>

View File

@ -14,6 +14,7 @@
#include <linux/rhashtable.h>
#include <linux/audit.h>
#include <linux/netfilter.h>
#include <linux/netfilter_ipv4.h>
#include <linux/netfilter/nfnetlink.h>
#include <linux/netfilter/nf_tables.h>
#include <net/netfilter/nf_flow_table.h>
@ -11536,6 +11537,13 @@ static int nf_tables_abort(struct net *net, struct sk_buff *skb,
ret = __nf_tables_abort(net, action);
nft_gc_seq_end(nft_net, gc_seq);
if (action == NFNL_ABORT_NONE) {
struct nft_table *table;
list_for_each_entry(table, &nft_net->tables, list)
table->validate_state = NFT_VALIDATE_SKIP;
}
WARN_ON_ONCE(!list_empty(&nft_net->commit_list));
/* module autoload needs to happen after GC sequence update because it

View File

@ -121,17 +121,9 @@ instance_create(struct nfnl_queue_net *q, u_int16_t queue_num, u32 portid)
unsigned int h;
int err;
spin_lock(&q->instances_lock);
if (instance_lookup(q, queue_num)) {
err = -EEXIST;
goto out_unlock;
}
inst = kzalloc(sizeof(*inst), GFP_ATOMIC);
if (!inst) {
err = -ENOMEM;
goto out_unlock;
}
inst = kzalloc(sizeof(*inst), GFP_KERNEL_ACCOUNT);
if (!inst)
return ERR_PTR(-ENOMEM);
inst->queue_num = queue_num;
inst->peer_portid = portid;
@ -141,9 +133,15 @@ instance_create(struct nfnl_queue_net *q, u_int16_t queue_num, u32 portid)
spin_lock_init(&inst->lock);
INIT_LIST_HEAD(&inst->queue_list);
spin_lock(&q->instances_lock);
if (instance_lookup(q, queue_num)) {
err = -EEXIST;
goto out_unlock;
}
if (!try_module_get(THIS_MODULE)) {
err = -EAGAIN;
goto out_free;
goto out_unlock;
}
h = instance_hashfn(queue_num);
@ -153,10 +151,9 @@ instance_create(struct nfnl_queue_net *q, u_int16_t queue_num, u32 portid)
return inst;
out_free:
kfree(inst);
out_unlock:
spin_unlock(&q->instances_lock);
kfree(inst);
return ERR_PTR(err);
}
@ -1498,7 +1495,8 @@ static int nfqnl_recv_config(struct sk_buff *skb, const struct nfnl_info *info,
struct nfqnl_msg_config_cmd *cmd = NULL;
struct nfqnl_instance *queue;
__u32 flags = 0, mask = 0;
int ret = 0;
WARN_ON_ONCE(!lockdep_nfnl_is_held(NFNL_SUBSYS_QUEUE));
if (nfqa[NFQA_CFG_CMD]) {
cmd = nla_data(nfqa[NFQA_CFG_CMD]);
@ -1544,47 +1542,44 @@ static int nfqnl_recv_config(struct sk_buff *skb, const struct nfnl_info *info,
}
}
/* Lookup queue under RCU. After peer_portid check (or for new queue
* in BIND case), the queue is owned by the socket sending this message.
* A socket cannot simultaneously send a message and close, so while
* processing this CONFIG message, nfqnl_rcv_nl_event() (triggered by
* socket close) cannot destroy this queue. Safe to use without RCU.
*/
rcu_read_lock();
queue = instance_lookup(q, queue_num);
if (queue && queue->peer_portid != NETLINK_CB(skb).portid) {
ret = -EPERM;
goto err_out_unlock;
rcu_read_unlock();
return -EPERM;
}
rcu_read_unlock();
if (cmd != NULL) {
switch (cmd->command) {
case NFQNL_CFG_CMD_BIND:
if (queue) {
ret = -EBUSY;
goto err_out_unlock;
}
queue = instance_create(q, queue_num,
NETLINK_CB(skb).portid);
if (IS_ERR(queue)) {
ret = PTR_ERR(queue);
goto err_out_unlock;
}
if (queue)
return -EBUSY;
queue = instance_create(q, queue_num, NETLINK_CB(skb).portid);
if (IS_ERR(queue))
return PTR_ERR(queue);
break;
case NFQNL_CFG_CMD_UNBIND:
if (!queue) {
ret = -ENODEV;
goto err_out_unlock;
}
if (!queue)
return -ENODEV;
instance_destroy(q, queue);
goto err_out_unlock;
return 0;
case NFQNL_CFG_CMD_PF_BIND:
case NFQNL_CFG_CMD_PF_UNBIND:
break;
default:
ret = -ENOTSUPP;
goto err_out_unlock;
return -EOPNOTSUPP;
}
}
if (!queue) {
ret = -ENODEV;
goto err_out_unlock;
}
if (!queue)
return -ENODEV;
if (nfqa[NFQA_CFG_PARAMS]) {
struct nfqnl_msg_config_params *params =
@ -1609,9 +1604,7 @@ static int nfqnl_recv_config(struct sk_buff *skb, const struct nfnl_info *info,
spin_unlock_bh(&queue->lock);
}
err_out_unlock:
rcu_read_unlock();
return ret;
return 0;
}
static const struct nfnl_callback nfqnl_cb[NFQNL_MSG_MAX] = {

View File

@ -134,7 +134,8 @@ static void nft_target_eval_bridge(const struct nft_expr *expr,
}
static const struct nla_policy nft_target_policy[NFTA_TARGET_MAX + 1] = {
[NFTA_TARGET_NAME] = { .type = NLA_NUL_STRING },
[NFTA_TARGET_NAME] = { .type = NLA_NUL_STRING,
.len = XT_EXTENSION_MAXNAMELEN, },
[NFTA_TARGET_REV] = NLA_POLICY_MAX(NLA_BE32, 255),
[NFTA_TARGET_INFO] = { .type = NLA_BINARY },
};
@ -434,7 +435,8 @@ static void nft_match_eval(const struct nft_expr *expr,
}
static const struct nla_policy nft_match_policy[NFTA_MATCH_MAX + 1] = {
[NFTA_MATCH_NAME] = { .type = NLA_NUL_STRING },
[NFTA_MATCH_NAME] = { .type = NLA_NUL_STRING,
.len = XT_EXTENSION_MAXNAMELEN },
[NFTA_MATCH_REV] = NLA_POLICY_MAX(NLA_BE32, 255),
[NFTA_MATCH_INFO] = { .type = NLA_BINARY },
};
@ -693,7 +695,12 @@ static int nfnl_compat_get_rcu(struct sk_buff *skb,
name = nla_data(tb[NFTA_COMPAT_NAME]);
rev = ntohl(nla_get_be32(tb[NFTA_COMPAT_REV]));
target = ntohl(nla_get_be32(tb[NFTA_COMPAT_TYPE]));
/* x_tables api checks for 'target == 1' to mean target,
* everything else means 'match'.
* In x_tables world, the number is set by kernel, not
* userspace.
*/
target = nla_get_be32(tb[NFTA_COMPAT_TYPE]) == htonl(1);
switch(family) {
case AF_INET:

View File

@ -1,4 +1,5 @@
// SPDX-License-Identifier: GPL-2.0-only
#include <linux/etherdevice.h>
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/init.h>

View File

@ -7,6 +7,7 @@
#include <net/netfilter/nf_conntrack.h>
#include <net/netfilter/nf_conntrack_synproxy.h>
#include <net/netfilter/nf_synproxy.h>
#include <linux/netfilter_ipv4.h>
#include <linux/netfilter/nf_tables.h>
#include <linux/netfilter/nf_synproxy.h>

View File

@ -61,7 +61,7 @@ tcpmss_mt(const struct sk_buff *skb, struct xt_action_param *par)
return (mssval >= info->mss_min &&
mssval <= info->mss_max) ^ info->invert;
}
if (op[i] < 2)
if (op[i] < 2 || i == optlen - 1)
i++;
else
i += op[i+1] ? : 1;

View File

@ -13,9 +13,11 @@
#include <linux/skbuff.h>
#include <linux/rtnetlink.h>
#include <linux/pkt_cls.h>
#include <linux/if_tunnel.h>
#include <linux/ip.h>
#include <linux/ipv6.h>
#include <linux/rhashtable.h>
#include <net/gre.h>
#include <net/netlink.h>
#include <net/pkt_sched.h>
#include <net/pkt_cls.h>

View File

@ -16,6 +16,7 @@
#include <net/pkt_sched.h>
#include <net/act_api.h>
#include <net/pkt_cls.h>
#include <net/inet_ecn.h>
#include <uapi/linux/tc_act/tc_ctinfo.h>
#include <net/tc_act/tc_ctinfo.h>
#include <net/tc_wrapper.h>