netfilter pull request nf-25-12-16

-----BEGIN PGP SIGNATURE-----
 
 iQJBBAABCAArFiEEgKkgxbID4Gn1hq6fcJGo2a1f9gAFAmlBqhYNHGZ3QHN0cmxl
 bi5kZQAKCRBwkajZrV/2AAL3EACpOsJRrAyYtltuXzaPyIeXMSFXqyl8r1fl/t35
 BXPX9ZuZLInkmPvbSvBzZjoN7PXhByu31Y2f4/u4qoylwTFs+Cs0WLQF3QVRTV0q
 zo1hTOxm2Lt6pB3GdLLV0W2gz14E9xDAwW2BvjgDVg/RhqlOb+Hn5QTdyJVnFiov
 je1n5mWT2nGtL/p6KwtJdVja23Hx3egqSv5qLYE2sEwTEVBt6wupqCRX8mc8/Gpx
 rir7tmZGHtFbgwgJ+tShfqcBzSfc+uFHvq4tyzoWE+y0REUBApvwrxTFMjb6yA+q
 baGKDcJFugtfXBVMdg3HM1NDKO4n/Rl7qJzdz2OYyqHIh8jHDyYakXtDu6U2quoX
 W2hhrPMpYVYy6TCocFDRIq6xEiORqBMZecnL8dm+LkHVtxhB4vituweJd3hXCyUu
 Zj3dlW3idI3NGzpNh/wBMgUE7ghHfbgLJvPT+sCypYUH59UL5WgvP5uiZrR+rNRF
 gujC1jLIxwDR0Yz6X3/fO7xscNOnx8zu2llX+8y5PeQCntMpUybdeKcHbx3k206D
 fYQV9QSk2AN0XeID26afkzer0Ibk7vC0Q6AD2lRqIPeQ6yGGS5VfFEutCl64M4Mp
 fyBvxp7336W3YOKwF7vxH1BjHgzqfdVyw97vXA/2oPs85+4tWzqNex+X5HMzruM/
 hrI1iA==
 =8bX+
 -----END PGP SIGNATURE-----

Merge tag 'nf-25-12-16' of https://git.kernel.org/pub/scm/linux/kernel/git/netfilter/nf

Florian Westphal says:

====================
netfilter: updates for net

The following patchset contains Netfilter fixes for *net*:

1)  Jozsef Kadlecsik is retiring.  Fortunately Jozsef will still keep an
    eye on ipset patches.

2)  remove a bogus direction check from nat core, this caused spurious
    flakes in the 'reverse clash' selftest, from myself.

3) nf_tables doesn't need to do chain validation on register store,
   from Pablo Neira Ayuso.

4) nf_tables shouldn't revisit chains during ruleset (graph) validation
   if possible.  Both 3 and 4 were slated for -next initially but there
   are now two independent reports of people hitting soft lockup errors
   during ruleset validation, so it makes no sense anymore to route
   this via -next given this is -stable material. From myself.

5) call cond_resched() in a more frequently visited place during nf_tables
   chain validation, this wasn't possible earlier due to rcu read lock,
   but nowadays its not held anymore during set walks.

6) Don't fail conntrack packetdrill test with HZ=100 kernels.

netfilter pull request nf-25-12-16

* tag 'nf-25-12-16' of https://git.kernel.org/pub/scm/linux/kernel/git/netfilter/nf:
  selftests: netfilter: packetdrill: avoid failure on HZ=100 kernel
  netfilter: nf_tables: avoid softlockup warnings in nft_chain_validate
  netfilter: nf_tables: avoid chain re-validation if possible
  netfilter: nf_tables: remove redundant chain validation on register store
  netfilter: nf_nat: remove bogus direction check
  MAINTAINERS: Remove Jozsef Kadlecsik from MAINTAINERS file
====================

Link: https://patch.msgid.link/20251216190904.14507-1-fw@strlen.de
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
This commit is contained in:
Paolo Abeni 2025-12-18 13:55:01 +01:00
commit 3e82accd3e
8 changed files with 107 additions and 44 deletions

View File

@ -1983,6 +1983,7 @@ D: netfilter: TCP window tracking code
D: netfilter: raw table
D: netfilter: iprange match
D: netfilter: new logging interfaces
D: netfilter: ipset
D: netfilter: various other hacks
S: Tata
S: Hungary

View File

@ -17808,7 +17808,6 @@ F: drivers/net/ethernet/neterion/
NETFILTER
M: Pablo Neira Ayuso <pablo@netfilter.org>
M: Jozsef Kadlecsik <kadlec@netfilter.org>
M: Florian Westphal <fw@strlen.de>
R: Phil Sutter <phil@nwl.cc>
L: netfilter-devel@vger.kernel.org

View File

@ -1091,6 +1091,29 @@ struct nft_rule_blob {
__attribute__((aligned(__alignof__(struct nft_rule_dp))));
};
enum nft_chain_types {
NFT_CHAIN_T_DEFAULT = 0,
NFT_CHAIN_T_ROUTE,
NFT_CHAIN_T_NAT,
NFT_CHAIN_T_MAX
};
/**
* struct nft_chain_validate_state - validation state
*
* If a chain is encountered again during table validation it is
* possible to avoid revalidation provided the calling context is
* compatible. This structure stores relevant calling context of
* previous validations.
*
* @hook_mask: the hook numbers and locations the chain is linked to
* @depth: the deepest call chain level the chain is linked to
*/
struct nft_chain_validate_state {
u8 hook_mask[NFT_CHAIN_T_MAX];
u8 depth;
};
/**
* struct nft_chain - nf_tables chain
*
@ -1109,6 +1132,7 @@ struct nft_rule_blob {
* @udlen: user data length
* @udata: user data in the chain
* @blob_next: rule blob pointer to the next in the chain
* @vstate: validation state
*/
struct nft_chain {
struct nft_rule_blob __rcu *blob_gen_0;
@ -1128,9 +1152,10 @@ struct nft_chain {
/* Only used during control plane commit phase: */
struct nft_rule_blob *blob_next;
struct nft_chain_validate_state vstate;
};
int nft_chain_validate(const struct nft_ctx *ctx, const struct nft_chain *chain);
int nft_chain_validate(const struct nft_ctx *ctx, struct nft_chain *chain);
int nft_setelem_validate(const struct nft_ctx *ctx, struct nft_set *set,
const struct nft_set_iter *iter,
struct nft_elem_priv *elem_priv);
@ -1138,13 +1163,6 @@ int nft_set_catchall_validate(const struct nft_ctx *ctx, struct nft_set *set);
int nf_tables_bind_chain(const struct nft_ctx *ctx, struct nft_chain *chain);
void nf_tables_unbind_chain(const struct nft_ctx *ctx, struct nft_chain *chain);
enum nft_chain_types {
NFT_CHAIN_T_DEFAULT = 0,
NFT_CHAIN_T_ROUTE,
NFT_CHAIN_T_NAT,
NFT_CHAIN_T_MAX
};
/**
* struct nft_chain_type - nf_tables chain type info
*

View File

@ -294,25 +294,13 @@ nf_nat_used_tuple_new(const struct nf_conntrack_tuple *tuple,
ct = nf_ct_tuplehash_to_ctrack(thash);
/* NB: IP_CT_DIR_ORIGINAL should be impossible because
* nf_nat_used_tuple() handles origin collisions.
*
* Handle remote chance other CPU confirmed its ct right after.
*/
if (thash->tuple.dst.dir != IP_CT_DIR_REPLY)
goto out;
/* clashing connection subject to NAT? Retry with new tuple. */
if (READ_ONCE(ct->status) & uses_nat)
goto out;
if (nf_ct_tuple_equal(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple,
&ignored_ct->tuplehash[IP_CT_DIR_REPLY].tuple) &&
nf_ct_tuple_equal(&ct->tuplehash[IP_CT_DIR_REPLY].tuple,
&ignored_ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple)) {
&ignored_ct->tuplehash[IP_CT_DIR_REPLY].tuple))
taken = false;
goto out;
}
out:
nf_ct_put(ct);
return taken;

View File

@ -123,6 +123,29 @@ static void nft_validate_state_update(struct nft_table *table, u8 new_validate_s
table->validate_state = new_validate_state;
}
static bool nft_chain_vstate_valid(const struct nft_ctx *ctx,
const struct nft_chain *chain)
{
const struct nft_base_chain *base_chain;
enum nft_chain_types type;
u8 hooknum;
if (WARN_ON_ONCE(!nft_is_base_chain(ctx->chain)))
return false;
base_chain = nft_base_chain(ctx->chain);
hooknum = base_chain->ops.hooknum;
type = base_chain->type->type;
/* chain is already validated for this call depth */
if (chain->vstate.depth >= ctx->level &&
chain->vstate.hook_mask[type] & BIT(hooknum))
return true;
return false;
}
static void nf_tables_trans_destroy_work(struct work_struct *w);
static void nft_trans_gc_work(struct work_struct *work);
@ -4079,6 +4102,29 @@ static void nf_tables_rule_release(const struct nft_ctx *ctx, struct nft_rule *r
nf_tables_rule_destroy(ctx, rule);
}
static void nft_chain_vstate_update(const struct nft_ctx *ctx, struct nft_chain *chain)
{
const struct nft_base_chain *base_chain;
enum nft_chain_types type;
u8 hooknum;
/* ctx->chain must hold the calling base chain. */
if (WARN_ON_ONCE(!nft_is_base_chain(ctx->chain))) {
memset(&chain->vstate, 0, sizeof(chain->vstate));
return;
}
base_chain = nft_base_chain(ctx->chain);
hooknum = base_chain->ops.hooknum;
type = base_chain->type->type;
BUILD_BUG_ON(BIT(NF_INET_NUMHOOKS) > U8_MAX);
chain->vstate.hook_mask[type] |= BIT(hooknum);
if (chain->vstate.depth < ctx->level)
chain->vstate.depth = ctx->level;
}
/** nft_chain_validate - loop detection and hook validation
*
* @ctx: context containing call depth and base chain
@ -4088,15 +4134,25 @@ static void nf_tables_rule_release(const struct nft_ctx *ctx, struct nft_rule *r
* and set lookups until either the jump limit is hit or all reachable
* chains have been validated.
*/
int nft_chain_validate(const struct nft_ctx *ctx, const struct nft_chain *chain)
int nft_chain_validate(const struct nft_ctx *ctx, struct nft_chain *chain)
{
struct nft_expr *expr, *last;
struct nft_rule *rule;
int err;
BUILD_BUG_ON(NFT_JUMP_STACK_SIZE > 255);
if (ctx->level == NFT_JUMP_STACK_SIZE)
return -EMLINK;
if (ctx->level > 0) {
/* jumps to base chains are not allowed. */
if (nft_is_base_chain(chain))
return -ELOOP;
if (nft_chain_vstate_valid(ctx, chain))
return 0;
}
list_for_each_entry(rule, &chain->rules, list) {
if (fatal_signal_pending(current))
return -EINTR;
@ -4115,8 +4171,11 @@ int nft_chain_validate(const struct nft_ctx *ctx, const struct nft_chain *chain)
if (err < 0)
return err;
}
cond_resched();
}
nft_chain_vstate_update(ctx, chain);
return 0;
}
EXPORT_SYMBOL_GPL(nft_chain_validate);
@ -4128,7 +4187,7 @@ static int nft_table_validate(struct net *net, const struct nft_table *table)
.net = net,
.family = table->family,
};
int err;
int err = 0;
list_for_each_entry(chain, &table->chains, list) {
if (!nft_is_base_chain(chain))
@ -4137,12 +4196,14 @@ static int nft_table_validate(struct net *net, const struct nft_table *table)
ctx.chain = chain;
err = nft_chain_validate(&ctx, chain);
if (err < 0)
return err;
cond_resched();
goto err;
}
return 0;
err:
list_for_each_entry(chain, &table->chains, list)
memset(&chain->vstate, 0, sizeof(chain->vstate));
return err;
}
int nft_setelem_validate(const struct nft_ctx *ctx, struct nft_set *set,
@ -11676,21 +11737,10 @@ static int nft_validate_register_store(const struct nft_ctx *ctx,
enum nft_data_types type,
unsigned int len)
{
int err;
switch (reg) {
case NFT_REG_VERDICT:
if (type != NFT_DATA_VERDICT)
return -EINVAL;
if (data != NULL &&
(data->verdict.code == NFT_GOTO ||
data->verdict.code == NFT_JUMP)) {
err = nft_chain_validate(ctx, data->verdict.chain);
if (err < 0)
return err;
}
break;
default:
if (type != NFT_DATA_VALUE)

View File

@ -33,9 +33,14 @@ static void die(const char *e)
exit(111);
}
static void die_port(uint16_t got, uint16_t want)
static void die_port(const struct sockaddr_in *sin, uint16_t want)
{
fprintf(stderr, "Port number changed, wanted %d got %d\n", want, ntohs(got));
uint16_t got = ntohs(sin->sin_port);
char str[INET_ADDRSTRLEN];
inet_ntop(AF_INET, &sin->sin_addr, str, sizeof(str));
fprintf(stderr, "Port number changed, wanted %d got %d from %s\n", want, got, str);
exit(1);
}
@ -100,7 +105,7 @@ int main(int argc, char *argv[])
die("child recvfrom");
if (peer.sin_port != htons(PORT))
die_port(peer.sin_port, PORT);
die_port(&peer, PORT);
} else {
if (sendto(s2, buf, LEN, 0, (struct sockaddr *)&sa1, sizeof(sa1)) != LEN)
continue;
@ -109,7 +114,7 @@ int main(int argc, char *argv[])
die("parent recvfrom");
if (peer.sin_port != htons((PORT + 1)))
die_port(peer.sin_port, PORT + 1);
die_port(&peer, PORT + 1);
}
}

View File

@ -45,6 +45,8 @@ if ip netns exec "$ns0" ./conntrack_reverse_clash; then
echo "PASS: No SNAT performed for null bindings"
else
echo "ERROR: SNAT performed without any matching snat rule"
ip netns exec "$ns0" conntrack -L
ip netns exec "$ns0" conntrack -S
exit 1
fi

View File

@ -26,7 +26,7 @@
+0.01 > R 643160523:643160523(0) win 0
+0.01 `conntrack -f $NFCT_IP_VERSION -L -p tcp --dport 8080 2>/dev/null | grep UNREPLIED | grep -q SYN_SENT`
+0.1 `conntrack -f $NFCT_IP_VERSION -L -p tcp --dport 8080 2>/dev/null | grep UNREPLIED | grep -q SYN_SENT`
// Must go through.
+0.01 > S 0:0(0) win 65535 <mss 1460,sackOK,TS val 1 ecr 0,nop,wscale 8>