linux/net/sched/cls_fw.c
Davide Caratti 65782b2db7 net/sched: cls_fw: fix NULL dereference of "old" filters before change()
Like pointed out by Sashiko [1], since commit ed76f5edcc ("net: sched:
protect filter_chain list with filter_chain_lock mutex") TC filters are
added to a shared block and published to datapath before their ->change()
function is called. This is a problem for cls_fw: an invalid filter
created with the "old" method can still classify some packets before it
is destroyed by the validation logic added by Xiang.
Therefore, insisting with repeated runs of the following script:

 # ip link add dev crash0 type dummy
 # ip link set dev crash0 up
 # mausezahn  crash0 -c 100000 -P 10 \
 > -A 4.3.2.1 -B 1.2.3.4 -t udp "dp=1234" -q &
 # sleep 1
 # tc qdisc add dev crash0 egress_block 1 clsact
 # tc filter add block 1 protocol ip prio 1 matchall \
 > action skbedit mark 65536 continue
 # tc filter add block 1 protocol ip prio 2 fw
 # ip link del dev crash0

can still make fw_classify() hit the WARN_ON() in [2]:

 WARNING: ./include/net/pkt_cls.h:88 at fw_classify+0x244/0x250 [cls_fw], CPU#18: mausezahn/1399
 Modules linked in: cls_fw(E) act_skbedit(E)
 CPU: 18 UID: 0 PID: 1399 Comm: mausezahn Tainted: G            E       7.0.0-rc6-virtme #17 PREEMPT(full)
 Tainted: [E]=UNSIGNED_MODULE
 Hardware name: Red Hat KVM, BIOS 1.16.3-2.el9 04/01/2014
 RIP: 0010:fw_classify+0x244/0x250 [cls_fw]
 Code: 5c 49 c7 45 00 00 00 00 00 41 5d 41 5e 41 5f 5d c3 cc cc cc cc 5b b8 ff ff ff ff 41 5c 41 5d 41 5e 41 5f 5d c3 cc cc cc cc 90 <0f> 0b 90 eb a0 0f 1f 80 00 00 00 00 90 90 90 90 90 90 90 90 90 90
 RSP: 0018:ffffd1b7026bf8a8 EFLAGS: 00010202
 RAX: ffff8c5ac9c60800 RBX: ffff8c5ac99322c0 RCX: 0000000000000004
 RDX: 0000000000000001 RSI: ffff8c5b74d7a000 RDI: ffff8c5ac8284f40
 RBP: ffffd1b7026bf8d0 R08: 0000000000000000 R09: ffffd1b7026bf9b0
 R10: 00000000ffffffff R11: 0000000000000000 R12: 0000000000010000
 R13: ffffd1b7026bf930 R14: ffff8c5ac8284f40 R15: 0000000000000000
 FS:  00007fca40c37740(0000) GS:ffff8c5b74d7a000(0000) knlGS:0000000000000000
 CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
 CR2: 00007fca40e822a0 CR3: 0000000005ca0001 CR4: 0000000000172ef0
 Call Trace:
  <TASK>
  tcf_classify+0x17d/0x5c0
  tc_run+0x9d/0x150
  __dev_queue_xmit+0x2ab/0x14d0
  ip_finish_output2+0x340/0x8f0
  ip_output+0xa4/0x250
  raw_sendmsg+0x147d/0x14b0
  __sys_sendto+0x1cc/0x1f0
  __x64_sys_sendto+0x24/0x30
  do_syscall_64+0x126/0xf80
  entry_SYSCALL_64_after_hwframe+0x77/0x7f
 RIP: 0033:0x7fca40e822ba
 Code: d8 64 89 02 48 c7 c0 ff ff ff ff eb b8 0f 1f 00 f3 0f 1e fa 41 89 ca 64 8b 04 25 18 00 00 00 85 c0 75 15 b8 2c 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 7e c3 0f 1f 44 00 00 41 54 48 83 ec 30 44 89
 RSP: 002b:00007ffc248a42c8 EFLAGS: 00000246 ORIG_RAX: 000000000000002c
 RAX: ffffffffffffffda RBX: 000055ef233289d0 RCX: 00007fca40e822ba
 RDX: 000000000000001e RSI: 000055ef23328c30 RDI: 0000000000000003
 RBP: 000055ef233289d0 R08: 00007ffc248a42d0 R09: 0000000000000010
 R10: 0000000000000000 R11: 0000000000000246 R12: 000000000000001e
 R13: 00000000000186a0 R14: 0000000000000000 R15: 00007fca41043000
  </TASK>
 irq event stamp: 1045778
 hardirqs last  enabled at (1045784): [<ffffffff864ec042>] __up_console_sem+0x52/0x60
 hardirqs last disabled at (1045789): [<ffffffff864ec027>] __up_console_sem+0x37/0x60
 softirqs last  enabled at (1045426): [<ffffffff874d48c7>] __alloc_skb+0x207/0x260
 softirqs last disabled at (1045434): [<ffffffff874fe8f8>] __dev_queue_xmit+0x78/0x14d0

Then, because of the value in the packet's mark, dereference on 'q->handle'
with NULL 'q' occurs:

 BUG: kernel NULL  pointer dereference, address: 0000000000000038
 [...]
 RIP: 0010:fw_classify+0x1fe/0x250 [cls_fw]
 [...]

Skip "old-style" classification on shared blocks, so that the NULL
dereference is fixed and WARN_ON() is not hit anymore in the short
lifetime of invalid cls_fw "old-style" filters.

[1] https://sashiko.dev/#/patchset/20260331050217.504278-1-xmei5%40asu.edu
[2] https://elixir.bootlin.com/linux/v7.0-rc6/source/include/net/pkt_cls.h#L86

Fixes: faeea8bbf6 ("net/sched: cls_fw: fix NULL pointer dereference on shared blocks")
Fixes: ed76f5edcc ("net: sched: protect filter_chain list with filter_chain_lock mutex")
Acked-by: Jamal Hadi Salim <jhs@mojatatu.com>
Signed-off-by: Davide Caratti <dcaratti@redhat.com>
Link: https://patch.msgid.link/e39cbd3103a337f1e515d186fe697b4459d24757.1775661704.git.dcaratti@redhat.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2026-04-12 08:49:13 -07:00

466 lines
9.8 KiB
C

// SPDX-License-Identifier: GPL-2.0-or-later
/*
* net/sched/cls_fw.c Classifier mapping ipchains' fwmark to traffic class.
*
* Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
*
* Changes:
* Karlis Peisenieks <karlis@mt.lv> : 990415 : fw_walk off by one
* Karlis Peisenieks <karlis@mt.lv> : 990415 : fw_delete killed all the filter (and kernel).
* Alex <alex@pilotsoft.com> : 2004xxyy: Added Action extension
*/
#include <linux/module.h>
#include <linux/slab.h>
#include <linux/types.h>
#include <linux/kernel.h>
#include <linux/string.h>
#include <linux/errno.h>
#include <linux/skbuff.h>
#include <net/netlink.h>
#include <net/act_api.h>
#include <net/pkt_cls.h>
#include <net/sch_generic.h>
#include <net/tc_wrapper.h>
#define HTSIZE 256
struct fw_head {
u32 mask;
struct fw_filter __rcu *ht[HTSIZE];
struct rcu_head rcu;
};
struct fw_filter {
struct fw_filter __rcu *next;
u32 id;
struct tcf_result res;
int ifindex;
struct tcf_exts exts;
struct tcf_proto *tp;
struct rcu_work rwork;
};
static u32 fw_hash(u32 handle)
{
handle ^= (handle >> 16);
handle ^= (handle >> 8);
return handle % HTSIZE;
}
TC_INDIRECT_SCOPE int fw_classify(struct sk_buff *skb,
const struct tcf_proto *tp,
struct tcf_result *res)
{
struct fw_head *head = rcu_dereference_bh(tp->root);
struct fw_filter *f;
int r;
u32 id = skb->mark;
if (head != NULL) {
id &= head->mask;
for (f = rcu_dereference_bh(head->ht[fw_hash(id)]); f;
f = rcu_dereference_bh(f->next)) {
if (f->id == id) {
*res = f->res;
if (!tcf_match_indev(skb, f->ifindex))
continue;
r = tcf_exts_exec(skb, &f->exts, res);
if (r < 0)
continue;
return r;
}
}
} else {
struct Qdisc *q;
/* Old method: classify the packet using its skb mark. */
if (tcf_block_shared(tp->chain->block))
return -1;
q = tcf_block_q(tp->chain->block);
if (id && (TC_H_MAJ(id) == 0 ||
!(TC_H_MAJ(id ^ q->handle)))) {
res->classid = id;
res->class = 0;
return 0;
}
}
return -1;
}
static void *fw_get(struct tcf_proto *tp, u32 handle)
{
struct fw_head *head = rtnl_dereference(tp->root);
struct fw_filter *f;
if (head == NULL)
return NULL;
f = rtnl_dereference(head->ht[fw_hash(handle)]);
for (; f; f = rtnl_dereference(f->next)) {
if (f->id == handle)
return f;
}
return NULL;
}
static int fw_init(struct tcf_proto *tp)
{
/* We don't allocate fw_head here, because in the old method
* we don't need it at all.
*/
return 0;
}
static void __fw_delete_filter(struct fw_filter *f)
{
tcf_exts_destroy(&f->exts);
tcf_exts_put_net(&f->exts);
kfree(f);
}
static void fw_delete_filter_work(struct work_struct *work)
{
struct fw_filter *f = container_of(to_rcu_work(work),
struct fw_filter,
rwork);
rtnl_lock();
__fw_delete_filter(f);
rtnl_unlock();
}
static void fw_destroy(struct tcf_proto *tp, bool rtnl_held,
struct netlink_ext_ack *extack)
{
struct fw_head *head = rtnl_dereference(tp->root);
struct fw_filter *f;
int h;
if (head == NULL)
return;
for (h = 0; h < HTSIZE; h++) {
while ((f = rtnl_dereference(head->ht[h])) != NULL) {
RCU_INIT_POINTER(head->ht[h],
rtnl_dereference(f->next));
tcf_unbind_filter(tp, &f->res);
if (tcf_exts_get_net(&f->exts))
tcf_queue_work(&f->rwork, fw_delete_filter_work);
else
__fw_delete_filter(f);
}
}
kfree_rcu(head, rcu);
}
static int fw_delete(struct tcf_proto *tp, void *arg, bool *last,
bool rtnl_held, struct netlink_ext_ack *extack)
{
struct fw_head *head = rtnl_dereference(tp->root);
struct fw_filter *f = arg;
struct fw_filter __rcu **fp;
struct fw_filter *pfp;
int ret = -EINVAL;
int h;
if (head == NULL || f == NULL)
goto out;
fp = &head->ht[fw_hash(f->id)];
for (pfp = rtnl_dereference(*fp); pfp;
fp = &pfp->next, pfp = rtnl_dereference(*fp)) {
if (pfp == f) {
RCU_INIT_POINTER(*fp, rtnl_dereference(f->next));
tcf_unbind_filter(tp, &f->res);
tcf_exts_get_net(&f->exts);
tcf_queue_work(&f->rwork, fw_delete_filter_work);
ret = 0;
break;
}
}
*last = true;
for (h = 0; h < HTSIZE; h++) {
if (rcu_access_pointer(head->ht[h])) {
*last = false;
break;
}
}
out:
return ret;
}
static const struct nla_policy fw_policy[TCA_FW_MAX + 1] = {
[TCA_FW_CLASSID] = { .type = NLA_U32 },
[TCA_FW_INDEV] = { .type = NLA_STRING, .len = IFNAMSIZ },
[TCA_FW_MASK] = { .type = NLA_U32 },
};
static int fw_set_parms(struct net *net, struct tcf_proto *tp,
struct fw_filter *f, struct nlattr **tb,
struct nlattr **tca, unsigned long base, u32 flags,
struct netlink_ext_ack *extack)
{
struct fw_head *head = rtnl_dereference(tp->root);
u32 mask;
int err;
err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &f->exts, flags,
extack);
if (err < 0)
return err;
if (tb[TCA_FW_INDEV]) {
int ret;
ret = tcf_change_indev(net, tb[TCA_FW_INDEV], extack);
if (ret < 0)
return ret;
f->ifindex = ret;
}
err = -EINVAL;
if (tb[TCA_FW_MASK]) {
mask = nla_get_u32(tb[TCA_FW_MASK]);
if (mask != head->mask)
return err;
} else if (head->mask != 0xFFFFFFFF)
return err;
if (tb[TCA_FW_CLASSID]) {
f->res.classid = nla_get_u32(tb[TCA_FW_CLASSID]);
tcf_bind_filter(tp, &f->res, base);
}
return 0;
}
static int fw_change(struct net *net, struct sk_buff *in_skb,
struct tcf_proto *tp, unsigned long base,
u32 handle, struct nlattr **tca, void **arg,
u32 flags, struct netlink_ext_ack *extack)
{
struct fw_head *head = rtnl_dereference(tp->root);
struct fw_filter *f = *arg;
struct nlattr *opt = tca[TCA_OPTIONS];
struct nlattr *tb[TCA_FW_MAX + 1];
int err;
if (!opt) {
if (handle)
return -EINVAL;
if (tcf_block_shared(tp->chain->block)) {
NL_SET_ERR_MSG(extack,
"Must specify mark when attaching fw filter to block");
return -EINVAL;
}
return 0; /* Succeed if it is old method. */
}
err = nla_parse_nested_deprecated(tb, TCA_FW_MAX, opt, fw_policy,
NULL);
if (err < 0)
return err;
if (f) {
struct fw_filter *pfp, *fnew;
struct fw_filter __rcu **fp;
if (f->id != handle && handle)
return -EINVAL;
fnew = kzalloc_obj(struct fw_filter);
if (!fnew)
return -ENOBUFS;
fnew->id = f->id;
fnew->ifindex = f->ifindex;
fnew->tp = f->tp;
err = tcf_exts_init(&fnew->exts, net, TCA_FW_ACT,
TCA_FW_POLICE);
if (err < 0) {
kfree(fnew);
return err;
}
err = fw_set_parms(net, tp, fnew, tb, tca, base, flags, extack);
if (err < 0) {
tcf_exts_destroy(&fnew->exts);
kfree(fnew);
return err;
}
fp = &head->ht[fw_hash(fnew->id)];
for (pfp = rtnl_dereference(*fp); pfp;
fp = &pfp->next, pfp = rtnl_dereference(*fp))
if (pfp == f)
break;
RCU_INIT_POINTER(fnew->next, rtnl_dereference(pfp->next));
rcu_assign_pointer(*fp, fnew);
tcf_unbind_filter(tp, &f->res);
tcf_exts_get_net(&f->exts);
tcf_queue_work(&f->rwork, fw_delete_filter_work);
*arg = fnew;
return err;
}
if (!handle)
return -EINVAL;
if (!head) {
u32 mask = 0xFFFFFFFF;
if (tb[TCA_FW_MASK])
mask = nla_get_u32(tb[TCA_FW_MASK]);
head = kzalloc_obj(*head);
if (!head)
return -ENOBUFS;
head->mask = mask;
rcu_assign_pointer(tp->root, head);
}
f = kzalloc_obj(struct fw_filter);
if (f == NULL)
return -ENOBUFS;
err = tcf_exts_init(&f->exts, net, TCA_FW_ACT, TCA_FW_POLICE);
if (err < 0)
goto errout;
f->id = handle;
f->tp = tp;
err = fw_set_parms(net, tp, f, tb, tca, base, flags, extack);
if (err < 0)
goto errout;
RCU_INIT_POINTER(f->next, head->ht[fw_hash(handle)]);
rcu_assign_pointer(head->ht[fw_hash(handle)], f);
*arg = f;
return 0;
errout:
tcf_exts_destroy(&f->exts);
kfree(f);
return err;
}
static void fw_walk(struct tcf_proto *tp, struct tcf_walker *arg,
bool rtnl_held)
{
struct fw_head *head = rtnl_dereference(tp->root);
int h;
if (head == NULL)
arg->stop = 1;
if (arg->stop)
return;
for (h = 0; h < HTSIZE; h++) {
struct fw_filter *f;
for (f = rtnl_dereference(head->ht[h]); f;
f = rtnl_dereference(f->next)) {
if (!tc_cls_stats_dump(tp, arg, f))
return;
}
}
}
static int fw_dump(struct net *net, struct tcf_proto *tp, void *fh,
struct sk_buff *skb, struct tcmsg *t, bool rtnl_held)
{
struct fw_head *head = rtnl_dereference(tp->root);
struct fw_filter *f = fh;
struct nlattr *nest;
if (f == NULL)
return skb->len;
t->tcm_handle = f->id;
if (!f->res.classid && !tcf_exts_has_actions(&f->exts))
return skb->len;
nest = nla_nest_start_noflag(skb, TCA_OPTIONS);
if (nest == NULL)
goto nla_put_failure;
if (f->res.classid &&
nla_put_u32(skb, TCA_FW_CLASSID, f->res.classid))
goto nla_put_failure;
if (f->ifindex) {
struct net_device *dev;
dev = __dev_get_by_index(net, f->ifindex);
if (dev && nla_put_string(skb, TCA_FW_INDEV, dev->name))
goto nla_put_failure;
}
if (head->mask != 0xFFFFFFFF &&
nla_put_u32(skb, TCA_FW_MASK, head->mask))
goto nla_put_failure;
if (tcf_exts_dump(skb, &f->exts) < 0)
goto nla_put_failure;
nla_nest_end(skb, nest);
if (tcf_exts_dump_stats(skb, &f->exts) < 0)
goto nla_put_failure;
return skb->len;
nla_put_failure:
nla_nest_cancel(skb, nest);
return -1;
}
static void fw_bind_class(void *fh, u32 classid, unsigned long cl, void *q,
unsigned long base)
{
struct fw_filter *f = fh;
tc_cls_bind_class(classid, cl, q, &f->res, base);
}
static struct tcf_proto_ops cls_fw_ops __read_mostly = {
.kind = "fw",
.classify = fw_classify,
.init = fw_init,
.destroy = fw_destroy,
.get = fw_get,
.change = fw_change,
.delete = fw_delete,
.walk = fw_walk,
.dump = fw_dump,
.bind_class = fw_bind_class,
.owner = THIS_MODULE,
};
MODULE_ALIAS_NET_CLS("fw");
static int __init init_fw(void)
{
return register_tcf_proto_ops(&cls_fw_ops);
}
static void __exit exit_fw(void)
{
unregister_tcf_proto_ops(&cls_fw_ops);
}
module_init(init_fw)
module_exit(exit_fw)
MODULE_DESCRIPTION("SKB mark based TC classifier");
MODULE_LICENSE("GPL");