mirror of
https://github.com/torvalds/linux.git
synced 2026-05-28 09:04:39 +02:00
netfilter pull request nf-next-26-01-22
-----BEGIN PGP SIGNATURE----- iQJdBAABCABHFiEEgKkgxbID4Gn1hq6fcJGo2a1f9gAFAmlyTdIbFIAAAAAABAAO bWFudTIsMi41KzEuMTEsMiwyDRxmd0BzdHJsZW4uZGUACgkQcJGo2a1f9gCiQA// RPmUUytzSbNAFHjB0RttYUJI0f5YnuFA59BUL6qtRlyC74RXvgHAKlbYYwYwnf1h TYxOu2qSLDn/NsQvIJtF2bbNfKITV0nbliwGoPKdIlTInQM0+PE6L9E0uVqZdCI9 Tpkqi7Bmr8l7W88qcUMqfNjMAMT63JS+fp2GmjcIJ5Iv3Zc5wjTGr3nGZxaBF/IS aN5RpOSdHhKhk8jC15E4OtuyrJuGqpU21OlJthz8F5wUhmDM80cRA4ZFTMij+cEC tJbm4ysAaB0HKz1qpm4/GQ8fysmAQ/GH6lINGctYQrelmoVZBHfvr54oV6ZNPiNv +u2daQU11+9MRR7z4OCcdXunKv5hwm1jiHpgPQ5xuNfdAF+dPglCYQcIOiY+CP0c fSVPjqFibS3DgS3enYywl2narh7w71EHnFswqVNWTTmmizbWcnM78s1HNtmCJCK6 dqLk7+mGrZ+0IVGIkYWiLlXjQPkaNj3AiaCWe4h4su+igfDaUsS6HIEEaJeQA/wu ORPbh+KYZNHAT6t5jzOO1XhABLsEOHXLOJZRJ7y4FDIfX/2UiX7wWZARaBRhEDz+ bPipHbSEAmFqgCuDIQF4pqKic4oxXzeImUbiXUGQ5Y+UwlOQaKsUjzvd1aNJhTrQ s8iAnXoe5/1xFGZs8357P5I5BzJUodafxVt8Gsed0uE= =JUmj -----END PGP SIGNATURE----- Merge tag 'nf-next-26-01-22' of https://git.kernel.org/pub/scm/linux/kernel/git/netfilter/nf-next Florian Westphal says: ==================== netfilter: updates for net-next There is an issue with interval matching in nftables rbtree set type: When userspace sends us set updates, there is a brief window where false negative lookups may occur from the data plane. Quoting Pablos original cover letter: This series addresses this issue by translating the rbtree, which keeps the intervals in order, to binary search. The array is published to packet path through RCU. The idea is to keep using the rbtree datastructure for control plane, which needs to deal with updates, then generate an array using this rbtree for binary search lookups. Patch #1 allows to call .remove in case .abort is defined, which is needed by this new approach. Only pipapo needs to skip .remove to speed. Patch #2 add the binary search array approach for interval matching. Patch #3 updates .get to use the binary search array to find for (closest or exact) interval matching. Patch #4 removes seqcount_rwlock_t as it is not needed anymore (new in this series). * tag 'nf-next-26-01-22' of https://git.kernel.org/pub/scm/linux/kernel/git/netfilter/nf-next: netfilter: nft_set_rbtree: remove seqcount_rwlock_t netfilter: nft_set_rbtree: use binary search array in get command netfilter: nft_set_rbtree: translate rbtree to array for binary search netfilter: nf_tables: add .abort_skip_removal flag for set types ==================== Link: https://patch.msgid.link/20260122162935.8581-1-fw@strlen.de Signed-off-by: Jakub Kicinski <kuba@kernel.org>
This commit is contained in:
commit
31d44a3782
|
|
@ -451,6 +451,7 @@ struct nft_set_ext;
|
|||
* @init: initialize private data of new set instance
|
||||
* @destroy: destroy private data of set instance
|
||||
* @gc_init: initialize garbage collection
|
||||
* @abort_skip_removal: skip removal of elements from abort path
|
||||
* @elemsize: element private size
|
||||
*
|
||||
* Operations lookup, update and delete have simpler interfaces, are faster
|
||||
|
|
@ -508,6 +509,7 @@ struct nft_set_ops {
|
|||
const struct nft_set *set);
|
||||
void (*gc_init)(const struct nft_set *set);
|
||||
|
||||
bool abort_skip_removal;
|
||||
unsigned int elemsize;
|
||||
};
|
||||
|
||||
|
|
|
|||
|
|
@ -7807,7 +7807,8 @@ static bool nft_trans_elems_new_abort(const struct nft_ctx *ctx,
|
|||
continue;
|
||||
}
|
||||
|
||||
if (!te->set->ops->abort || nft_setelem_is_catchall(te->set, te->elems[i].priv))
|
||||
if (!te->set->ops->abort_skip_removal ||
|
||||
nft_setelem_is_catchall(te->set, te->elems[i].priv))
|
||||
nft_setelem_remove(ctx->net, te->set, te->elems[i].priv);
|
||||
|
||||
if (!nft_setelem_is_catchall(te->set, te->elems[i].priv))
|
||||
|
|
|
|||
|
|
@ -2370,6 +2370,7 @@ const struct nft_set_type nft_set_pipapo_type = {
|
|||
.gc_init = nft_pipapo_gc_init,
|
||||
.commit = nft_pipapo_commit,
|
||||
.abort = nft_pipapo_abort,
|
||||
.abort_skip_removal = true,
|
||||
.elemsize = offsetof(struct nft_pipapo_elem, ext),
|
||||
},
|
||||
};
|
||||
|
|
@ -2394,6 +2395,7 @@ const struct nft_set_type nft_set_pipapo_avx2_type = {
|
|||
.gc_init = nft_pipapo_gc_init,
|
||||
.commit = nft_pipapo_commit,
|
||||
.abort = nft_pipapo_abort,
|
||||
.abort_skip_removal = true,
|
||||
.elemsize = offsetof(struct nft_pipapo_elem, ext),
|
||||
},
|
||||
};
|
||||
|
|
|
|||
|
|
@ -10,15 +10,29 @@
|
|||
#include <linux/module.h>
|
||||
#include <linux/list.h>
|
||||
#include <linux/rbtree.h>
|
||||
#include <linux/bsearch.h>
|
||||
#include <linux/netlink.h>
|
||||
#include <linux/netfilter.h>
|
||||
#include <linux/netfilter/nf_tables.h>
|
||||
#include <net/netfilter/nf_tables_core.h>
|
||||
|
||||
struct nft_array_interval {
|
||||
struct nft_set_ext *from;
|
||||
struct nft_set_ext *to;
|
||||
};
|
||||
|
||||
struct nft_array {
|
||||
u32 max_intervals;
|
||||
u32 num_intervals;
|
||||
struct nft_array_interval *intervals;
|
||||
struct rcu_head rcu_head;
|
||||
};
|
||||
|
||||
struct nft_rbtree {
|
||||
struct rb_root root;
|
||||
rwlock_t lock;
|
||||
seqcount_rwlock_t count;
|
||||
struct nft_array __rcu *array;
|
||||
struct nft_array *array_next;
|
||||
unsigned long last_gc;
|
||||
};
|
||||
|
||||
|
|
@ -47,67 +61,33 @@ static int nft_rbtree_cmp(const struct nft_set *set,
|
|||
set->klen);
|
||||
}
|
||||
|
||||
static bool nft_rbtree_elem_expired(const struct nft_rbtree_elem *rbe)
|
||||
struct nft_array_lookup_ctx {
|
||||
const u32 *key;
|
||||
u32 klen;
|
||||
};
|
||||
|
||||
static int nft_array_lookup_cmp(const void *pkey, const void *entry)
|
||||
{
|
||||
return nft_set_elem_expired(&rbe->ext);
|
||||
}
|
||||
const struct nft_array_interval *interval = entry;
|
||||
const struct nft_array_lookup_ctx *ctx = pkey;
|
||||
int a, b;
|
||||
|
||||
static const struct nft_set_ext *
|
||||
__nft_rbtree_lookup(const struct net *net, const struct nft_set *set,
|
||||
const u32 *key, unsigned int seq)
|
||||
{
|
||||
struct nft_rbtree *priv = nft_set_priv(set);
|
||||
const struct nft_rbtree_elem *rbe, *interval = NULL;
|
||||
u8 genmask = nft_genmask_cur(net);
|
||||
const struct rb_node *parent;
|
||||
int d;
|
||||
if (!interval->from)
|
||||
return 1;
|
||||
|
||||
parent = rcu_dereference_raw(priv->root.rb_node);
|
||||
while (parent != NULL) {
|
||||
if (read_seqcount_retry(&priv->count, seq))
|
||||
return NULL;
|
||||
a = memcmp(ctx->key, nft_set_ext_key(interval->from), ctx->klen);
|
||||
if (!interval->to)
|
||||
b = -1;
|
||||
else
|
||||
b = memcmp(ctx->key, nft_set_ext_key(interval->to), ctx->klen);
|
||||
|
||||
rbe = rb_entry(parent, struct nft_rbtree_elem, node);
|
||||
if (a >= 0 && b < 0)
|
||||
return 0;
|
||||
|
||||
d = memcmp(nft_set_ext_key(&rbe->ext), key, set->klen);
|
||||
if (d < 0) {
|
||||
parent = rcu_dereference_raw(parent->rb_left);
|
||||
if (interval &&
|
||||
!nft_rbtree_cmp(set, rbe, interval) &&
|
||||
nft_rbtree_interval_end(rbe) &&
|
||||
nft_rbtree_interval_start(interval))
|
||||
continue;
|
||||
if (nft_set_elem_active(&rbe->ext, genmask) &&
|
||||
!nft_rbtree_elem_expired(rbe))
|
||||
interval = rbe;
|
||||
} else if (d > 0)
|
||||
parent = rcu_dereference_raw(parent->rb_right);
|
||||
else {
|
||||
if (!nft_set_elem_active(&rbe->ext, genmask)) {
|
||||
parent = rcu_dereference_raw(parent->rb_left);
|
||||
continue;
|
||||
}
|
||||
if (a < 0)
|
||||
return -1;
|
||||
|
||||
if (nft_rbtree_elem_expired(rbe))
|
||||
return NULL;
|
||||
|
||||
if (nft_rbtree_interval_end(rbe)) {
|
||||
if (nft_set_is_anonymous(set))
|
||||
return NULL;
|
||||
parent = rcu_dereference_raw(parent->rb_left);
|
||||
interval = NULL;
|
||||
continue;
|
||||
}
|
||||
|
||||
return &rbe->ext;
|
||||
}
|
||||
}
|
||||
|
||||
if (set->flags & NFT_SET_INTERVAL && interval != NULL &&
|
||||
nft_rbtree_interval_start(interval))
|
||||
return &interval->ext;
|
||||
|
||||
return NULL;
|
||||
return 1;
|
||||
}
|
||||
|
||||
INDIRECT_CALLABLE_SCOPE
|
||||
|
|
@ -116,83 +96,57 @@ nft_rbtree_lookup(const struct net *net, const struct nft_set *set,
|
|||
const u32 *key)
|
||||
{
|
||||
struct nft_rbtree *priv = nft_set_priv(set);
|
||||
unsigned int seq = read_seqcount_begin(&priv->count);
|
||||
const struct nft_set_ext *ext;
|
||||
struct nft_array *array = rcu_dereference(priv->array);
|
||||
const struct nft_array_interval *interval;
|
||||
struct nft_array_lookup_ctx ctx = {
|
||||
.key = key,
|
||||
.klen = set->klen,
|
||||
};
|
||||
|
||||
ext = __nft_rbtree_lookup(net, set, key, seq);
|
||||
if (ext || !read_seqcount_retry(&priv->count, seq))
|
||||
return ext;
|
||||
if (!array)
|
||||
return NULL;
|
||||
|
||||
read_lock_bh(&priv->lock);
|
||||
seq = read_seqcount_begin(&priv->count);
|
||||
ext = __nft_rbtree_lookup(net, set, key, seq);
|
||||
read_unlock_bh(&priv->lock);
|
||||
interval = bsearch(&ctx, array->intervals, array->num_intervals,
|
||||
sizeof(struct nft_array_interval),
|
||||
nft_array_lookup_cmp);
|
||||
if (!interval || nft_set_elem_expired(interval->from))
|
||||
return NULL;
|
||||
|
||||
return ext;
|
||||
return interval->from;
|
||||
}
|
||||
|
||||
static bool __nft_rbtree_get(const struct net *net, const struct nft_set *set,
|
||||
const u32 *key, struct nft_rbtree_elem **elem,
|
||||
unsigned int seq, unsigned int flags, u8 genmask)
|
||||
struct nft_array_get_ctx {
|
||||
const u32 *key;
|
||||
unsigned int flags;
|
||||
u32 klen;
|
||||
};
|
||||
|
||||
static int nft_array_get_cmp(const void *pkey, const void *entry)
|
||||
{
|
||||
struct nft_rbtree_elem *rbe, *interval = NULL;
|
||||
struct nft_rbtree *priv = nft_set_priv(set);
|
||||
const struct rb_node *parent;
|
||||
const void *this;
|
||||
int d;
|
||||
const struct nft_array_interval *interval = entry;
|
||||
const struct nft_array_get_ctx *ctx = pkey;
|
||||
int a, b;
|
||||
|
||||
parent = rcu_dereference_raw(priv->root.rb_node);
|
||||
while (parent != NULL) {
|
||||
if (read_seqcount_retry(&priv->count, seq))
|
||||
return false;
|
||||
if (!interval->from)
|
||||
return 1;
|
||||
|
||||
rbe = rb_entry(parent, struct nft_rbtree_elem, node);
|
||||
a = memcmp(ctx->key, nft_set_ext_key(interval->from), ctx->klen);
|
||||
if (!interval->to)
|
||||
b = -1;
|
||||
else
|
||||
b = memcmp(ctx->key, nft_set_ext_key(interval->to), ctx->klen);
|
||||
|
||||
this = nft_set_ext_key(&rbe->ext);
|
||||
d = memcmp(this, key, set->klen);
|
||||
if (d < 0) {
|
||||
parent = rcu_dereference_raw(parent->rb_left);
|
||||
if (!(flags & NFT_SET_ELEM_INTERVAL_END))
|
||||
interval = rbe;
|
||||
} else if (d > 0) {
|
||||
parent = rcu_dereference_raw(parent->rb_right);
|
||||
if (flags & NFT_SET_ELEM_INTERVAL_END)
|
||||
interval = rbe;
|
||||
} else {
|
||||
if (!nft_set_elem_active(&rbe->ext, genmask)) {
|
||||
parent = rcu_dereference_raw(parent->rb_left);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (nft_set_elem_expired(&rbe->ext))
|
||||
return false;
|
||||
|
||||
if (!nft_set_ext_exists(&rbe->ext, NFT_SET_EXT_FLAGS) ||
|
||||
(*nft_set_ext_flags(&rbe->ext) & NFT_SET_ELEM_INTERVAL_END) ==
|
||||
(flags & NFT_SET_ELEM_INTERVAL_END)) {
|
||||
*elem = rbe;
|
||||
return true;
|
||||
}
|
||||
|
||||
if (nft_rbtree_interval_end(rbe))
|
||||
interval = NULL;
|
||||
|
||||
parent = rcu_dereference_raw(parent->rb_left);
|
||||
}
|
||||
if (a >= 0) {
|
||||
if (ctx->flags & NFT_SET_ELEM_INTERVAL_END && b <= 0)
|
||||
return 0;
|
||||
else if (b < 0)
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (set->flags & NFT_SET_INTERVAL && interval != NULL &&
|
||||
nft_set_elem_active(&interval->ext, genmask) &&
|
||||
!nft_set_elem_expired(&interval->ext) &&
|
||||
((!nft_rbtree_interval_end(interval) &&
|
||||
!(flags & NFT_SET_ELEM_INTERVAL_END)) ||
|
||||
(nft_rbtree_interval_end(interval) &&
|
||||
(flags & NFT_SET_ELEM_INTERVAL_END)))) {
|
||||
*elem = interval;
|
||||
return true;
|
||||
}
|
||||
if (a < 0)
|
||||
return -1;
|
||||
|
||||
return false;
|
||||
return 1;
|
||||
}
|
||||
|
||||
static struct nft_elem_priv *
|
||||
|
|
@ -200,24 +154,28 @@ nft_rbtree_get(const struct net *net, const struct nft_set *set,
|
|||
const struct nft_set_elem *elem, unsigned int flags)
|
||||
{
|
||||
struct nft_rbtree *priv = nft_set_priv(set);
|
||||
unsigned int seq = read_seqcount_begin(&priv->count);
|
||||
struct nft_rbtree_elem *rbe = ERR_PTR(-ENOENT);
|
||||
const u32 *key = (const u32 *)&elem->key.val;
|
||||
u8 genmask = nft_genmask_cur(net);
|
||||
bool ret;
|
||||
struct nft_array *array = rcu_dereference(priv->array);
|
||||
const struct nft_array_interval *interval;
|
||||
struct nft_array_get_ctx ctx = {
|
||||
.key = (const u32 *)&elem->key.val,
|
||||
.flags = flags,
|
||||
.klen = set->klen,
|
||||
};
|
||||
struct nft_rbtree_elem *rbe;
|
||||
|
||||
ret = __nft_rbtree_get(net, set, key, &rbe, seq, flags, genmask);
|
||||
if (ret || !read_seqcount_retry(&priv->count, seq))
|
||||
return &rbe->priv;
|
||||
|
||||
read_lock_bh(&priv->lock);
|
||||
seq = read_seqcount_begin(&priv->count);
|
||||
ret = __nft_rbtree_get(net, set, key, &rbe, seq, flags, genmask);
|
||||
read_unlock_bh(&priv->lock);
|
||||
|
||||
if (!ret)
|
||||
if (!array)
|
||||
return ERR_PTR(-ENOENT);
|
||||
|
||||
interval = bsearch(&ctx, array->intervals, array->num_intervals,
|
||||
sizeof(struct nft_array_interval), nft_array_get_cmp);
|
||||
if (!interval || nft_set_elem_expired(interval->from))
|
||||
return ERR_PTR(-ENOENT);
|
||||
|
||||
if (flags & NFT_SET_ELEM_INTERVAL_END)
|
||||
rbe = container_of(interval->to, struct nft_rbtree_elem, ext);
|
||||
else
|
||||
rbe = container_of(interval->from, struct nft_rbtree_elem, ext);
|
||||
|
||||
return &rbe->priv;
|
||||
}
|
||||
|
||||
|
|
@ -481,6 +439,87 @@ static int __nft_rbtree_insert(const struct net *net, const struct nft_set *set,
|
|||
return 0;
|
||||
}
|
||||
|
||||
static int nft_array_intervals_alloc(struct nft_array *array, u32 max_intervals)
|
||||
{
|
||||
struct nft_array_interval *intervals;
|
||||
|
||||
intervals = kvcalloc(max_intervals, sizeof(struct nft_array_interval),
|
||||
GFP_KERNEL_ACCOUNT);
|
||||
if (!intervals)
|
||||
return -ENOMEM;
|
||||
|
||||
if (array->intervals)
|
||||
kvfree(array->intervals);
|
||||
|
||||
array->intervals = intervals;
|
||||
array->max_intervals = max_intervals;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static struct nft_array *nft_array_alloc(u32 max_intervals)
|
||||
{
|
||||
struct nft_array *array;
|
||||
|
||||
array = kzalloc(sizeof(*array), GFP_KERNEL_ACCOUNT);
|
||||
if (!array)
|
||||
return NULL;
|
||||
|
||||
if (nft_array_intervals_alloc(array, max_intervals) < 0) {
|
||||
kfree(array);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
return array;
|
||||
}
|
||||
|
||||
#define NFT_ARRAY_EXTRA_SIZE 10240
|
||||
|
||||
/* Similar to nft_rbtree_{u,k}size to hide details to userspace, but consider
|
||||
* packed representation coming from userspace for anonymous sets too.
|
||||
*/
|
||||
static u32 nft_array_elems(const struct nft_set *set)
|
||||
{
|
||||
u32 nelems = atomic_read(&set->nelems);
|
||||
|
||||
/* Adjacent intervals are represented with a single start element in
|
||||
* anonymous sets, use the current element counter as is.
|
||||
*/
|
||||
if (nft_set_is_anonymous(set))
|
||||
return nelems;
|
||||
|
||||
/* Add extra room for never matching interval at the beginning and open
|
||||
* interval at the end which only use a single element to represent it.
|
||||
* The conversion to array will compact intervals, this allows reduce
|
||||
* memory consumption.
|
||||
*/
|
||||
return (nelems / 2) + 2;
|
||||
}
|
||||
|
||||
static int nft_array_may_resize(const struct nft_set *set)
|
||||
{
|
||||
u32 nelems = nft_array_elems(set), new_max_intervals;
|
||||
struct nft_rbtree *priv = nft_set_priv(set);
|
||||
struct nft_array *array;
|
||||
|
||||
if (!priv->array_next) {
|
||||
array = nft_array_alloc(nelems + NFT_ARRAY_EXTRA_SIZE);
|
||||
if (!array)
|
||||
return -ENOMEM;
|
||||
|
||||
priv->array_next = array;
|
||||
}
|
||||
|
||||
if (nelems < priv->array_next->max_intervals)
|
||||
return 0;
|
||||
|
||||
new_max_intervals = priv->array_next->max_intervals + NFT_ARRAY_EXTRA_SIZE;
|
||||
if (nft_array_intervals_alloc(priv->array_next, new_max_intervals) < 0)
|
||||
return -ENOMEM;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int nft_rbtree_insert(const struct net *net, const struct nft_set *set,
|
||||
const struct nft_set_elem *elem,
|
||||
struct nft_elem_priv **elem_priv)
|
||||
|
|
@ -489,6 +528,9 @@ static int nft_rbtree_insert(const struct net *net, const struct nft_set *set,
|
|||
struct nft_rbtree *priv = nft_set_priv(set);
|
||||
int err;
|
||||
|
||||
if (nft_array_may_resize(set) < 0)
|
||||
return -ENOMEM;
|
||||
|
||||
do {
|
||||
if (fatal_signal_pending(current))
|
||||
return -EINTR;
|
||||
|
|
@ -496,9 +538,7 @@ static int nft_rbtree_insert(const struct net *net, const struct nft_set *set,
|
|||
cond_resched();
|
||||
|
||||
write_lock_bh(&priv->lock);
|
||||
write_seqcount_begin(&priv->count);
|
||||
err = __nft_rbtree_insert(net, set, rbe, elem_priv);
|
||||
write_seqcount_end(&priv->count);
|
||||
write_unlock_bh(&priv->lock);
|
||||
} while (err == -EAGAIN);
|
||||
|
||||
|
|
@ -508,9 +548,7 @@ static int nft_rbtree_insert(const struct net *net, const struct nft_set *set,
|
|||
static void nft_rbtree_erase(struct nft_rbtree *priv, struct nft_rbtree_elem *rbe)
|
||||
{
|
||||
write_lock_bh(&priv->lock);
|
||||
write_seqcount_begin(&priv->count);
|
||||
rb_erase(&rbe->node, &priv->root);
|
||||
write_seqcount_end(&priv->count);
|
||||
write_unlock_bh(&priv->lock);
|
||||
}
|
||||
|
||||
|
|
@ -553,6 +591,9 @@ nft_rbtree_deactivate(const struct net *net, const struct nft_set *set,
|
|||
u64 tstamp = nft_net_tstamp(net);
|
||||
int d;
|
||||
|
||||
if (nft_array_may_resize(set) < 0)
|
||||
return NULL;
|
||||
|
||||
while (parent != NULL) {
|
||||
rbe = rb_entry(parent, struct nft_rbtree_elem, node);
|
||||
|
||||
|
|
@ -615,6 +656,11 @@ static void nft_rbtree_walk(const struct nft_ctx *ctx,
|
|||
switch (iter->type) {
|
||||
case NFT_ITER_UPDATE:
|
||||
lockdep_assert_held(&nft_pernet(ctx->net)->commit_mutex);
|
||||
|
||||
if (nft_array_may_resize(set) < 0) {
|
||||
iter->err = -ENOMEM;
|
||||
break;
|
||||
}
|
||||
nft_rbtree_do_walk(ctx, set, iter);
|
||||
break;
|
||||
case NFT_ITER_READ:
|
||||
|
|
@ -714,17 +760,26 @@ static int nft_rbtree_init(const struct nft_set *set,
|
|||
BUILD_BUG_ON(offsetof(struct nft_rbtree_elem, priv) != 0);
|
||||
|
||||
rwlock_init(&priv->lock);
|
||||
seqcount_rwlock_init(&priv->count, &priv->lock);
|
||||
priv->root = RB_ROOT;
|
||||
|
||||
priv->array = NULL;
|
||||
priv->array_next = NULL;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void __nft_array_free(struct nft_array *array)
|
||||
{
|
||||
kvfree(array->intervals);
|
||||
kfree(array);
|
||||
}
|
||||
|
||||
static void nft_rbtree_destroy(const struct nft_ctx *ctx,
|
||||
const struct nft_set *set)
|
||||
{
|
||||
struct nft_rbtree *priv = nft_set_priv(set);
|
||||
struct nft_rbtree_elem *rbe;
|
||||
struct nft_array *array;
|
||||
struct rb_node *node;
|
||||
|
||||
while ((node = priv->root.rb_node) != NULL) {
|
||||
|
|
@ -732,6 +787,12 @@ static void nft_rbtree_destroy(const struct nft_ctx *ctx,
|
|||
rbe = rb_entry(node, struct nft_rbtree_elem, node);
|
||||
nf_tables_set_elem_destroy(ctx, set, &rbe->priv);
|
||||
}
|
||||
|
||||
array = rcu_dereference_protected(priv->array, true);
|
||||
if (array)
|
||||
__nft_array_free(array);
|
||||
if (priv->array_next)
|
||||
__nft_array_free(priv->array_next);
|
||||
}
|
||||
|
||||
static bool nft_rbtree_estimate(const struct nft_set_desc *desc, u32 features,
|
||||
|
|
@ -752,12 +813,91 @@ static bool nft_rbtree_estimate(const struct nft_set_desc *desc, u32 features,
|
|||
return true;
|
||||
}
|
||||
|
||||
static void nft_array_free_rcu(struct rcu_head *rcu_head)
|
||||
{
|
||||
struct nft_array *array = container_of(rcu_head, struct nft_array, rcu_head);
|
||||
|
||||
__nft_array_free(array);
|
||||
}
|
||||
|
||||
static void nft_rbtree_commit(struct nft_set *set)
|
||||
{
|
||||
struct nft_rbtree *priv = nft_set_priv(set);
|
||||
struct nft_rbtree_elem *rbe, *prev_rbe;
|
||||
struct nft_array *old;
|
||||
u32 num_intervals = 0;
|
||||
struct rb_node *node;
|
||||
|
||||
if (time_after_eq(jiffies, priv->last_gc + nft_set_gc_interval(set)))
|
||||
nft_rbtree_gc(set);
|
||||
|
||||
/* No changes, skip, eg. elements updates only. */
|
||||
if (!priv->array_next)
|
||||
return;
|
||||
|
||||
/* Reverse walk to create an array from smaller to largest interval. */
|
||||
node = rb_last(&priv->root);
|
||||
if (node)
|
||||
prev_rbe = rb_entry(node, struct nft_rbtree_elem, node);
|
||||
else
|
||||
prev_rbe = NULL;
|
||||
|
||||
while (prev_rbe) {
|
||||
rbe = prev_rbe;
|
||||
|
||||
if (nft_rbtree_interval_start(rbe))
|
||||
priv->array_next->intervals[num_intervals].from = &rbe->ext;
|
||||
else if (nft_rbtree_interval_end(rbe))
|
||||
priv->array_next->intervals[num_intervals++].to = &rbe->ext;
|
||||
|
||||
if (num_intervals >= priv->array_next->max_intervals) {
|
||||
pr_warn_once("malformed interval set from userspace?");
|
||||
goto err_out;
|
||||
}
|
||||
|
||||
node = rb_prev(node);
|
||||
if (!node)
|
||||
break;
|
||||
|
||||
prev_rbe = rb_entry(node, struct nft_rbtree_elem, node);
|
||||
|
||||
/* For anonymous sets, when adjacent ranges are found,
|
||||
* the end element is not added to the set to pack the set
|
||||
* representation. Use next start element to complete this
|
||||
* interval.
|
||||
*/
|
||||
if (nft_rbtree_interval_start(rbe) &&
|
||||
nft_rbtree_interval_start(prev_rbe) &&
|
||||
priv->array_next->intervals[num_intervals].from)
|
||||
priv->array_next->intervals[num_intervals++].to = &prev_rbe->ext;
|
||||
|
||||
if (num_intervals >= priv->array_next->max_intervals) {
|
||||
pr_warn_once("malformed interval set from userspace?");
|
||||
goto err_out;
|
||||
}
|
||||
}
|
||||
|
||||
if (priv->array_next->intervals[num_intervals].from)
|
||||
num_intervals++;
|
||||
err_out:
|
||||
priv->array_next->num_intervals = num_intervals;
|
||||
old = rcu_replace_pointer(priv->array, priv->array_next, true);
|
||||
priv->array_next = NULL;
|
||||
if (old)
|
||||
call_rcu(&old->rcu_head, nft_array_free_rcu);
|
||||
}
|
||||
|
||||
static void nft_rbtree_abort(const struct nft_set *set)
|
||||
{
|
||||
struct nft_rbtree *priv = nft_set_priv(set);
|
||||
struct nft_array *array_next;
|
||||
|
||||
if (!priv->array_next)
|
||||
return;
|
||||
|
||||
array_next = priv->array_next;
|
||||
priv->array_next = NULL;
|
||||
__nft_array_free(array_next);
|
||||
}
|
||||
|
||||
static void nft_rbtree_gc_init(const struct nft_set *set)
|
||||
|
|
@ -821,6 +961,7 @@ const struct nft_set_type nft_set_rbtree_type = {
|
|||
.flush = nft_rbtree_flush,
|
||||
.activate = nft_rbtree_activate,
|
||||
.commit = nft_rbtree_commit,
|
||||
.abort = nft_rbtree_abort,
|
||||
.gc_init = nft_rbtree_gc_init,
|
||||
.lookup = nft_rbtree_lookup,
|
||||
.walk = nft_rbtree_walk,
|
||||
|
|
|
|||
Loading…
Reference in New Issue
Block a user