mirror of
https://github.com/torvalds/linux.git
synced 2026-06-02 19:43:40 +02:00
Like pointed out by Sashiko [1], since commited76f5edcc("net: sched: protect filter_chain list with filter_chain_lock mutex") TC filters are added to a shared block and published to datapath before their ->change() function is called. This is a problem for cls_fw: an invalid filter created with the "old" method can still classify some packets before it is destroyed by the validation logic added by Xiang. Therefore, insisting with repeated runs of the following script: # ip link add dev crash0 type dummy # ip link set dev crash0 up # mausezahn crash0 -c 100000 -P 10 \ > -A 4.3.2.1 -B 1.2.3.4 -t udp "dp=1234" -q & # sleep 1 # tc qdisc add dev crash0 egress_block 1 clsact # tc filter add block 1 protocol ip prio 1 matchall \ > action skbedit mark 65536 continue # tc filter add block 1 protocol ip prio 2 fw # ip link del dev crash0 can still make fw_classify() hit the WARN_ON() in [2]: WARNING: ./include/net/pkt_cls.h:88 at fw_classify+0x244/0x250 [cls_fw], CPU#18: mausezahn/1399 Modules linked in: cls_fw(E) act_skbedit(E) CPU: 18 UID: 0 PID: 1399 Comm: mausezahn Tainted: G E 7.0.0-rc6-virtme #17 PREEMPT(full) Tainted: [E]=UNSIGNED_MODULE Hardware name: Red Hat KVM, BIOS 1.16.3-2.el9 04/01/2014 RIP: 0010:fw_classify+0x244/0x250 [cls_fw] Code: 5c 49 c7 45 00 00 00 00 00 41 5d 41 5e 41 5f 5d c3 cc cc cc cc 5b b8 ff ff ff ff 41 5c 41 5d 41 5e 41 5f 5d c3 cc cc cc cc 90 <0f> 0b 90 eb a0 0f 1f 80 00 00 00 00 90 90 90 90 90 90 90 90 90 90 RSP: 0018:ffffd1b7026bf8a8 EFLAGS: 00010202 RAX: ffff8c5ac9c60800 RBX: ffff8c5ac99322c0 RCX: 0000000000000004 RDX: 0000000000000001 RSI: ffff8c5b74d7a000 RDI: ffff8c5ac8284f40 RBP: ffffd1b7026bf8d0 R08: 0000000000000000 R09: ffffd1b7026bf9b0 R10: 00000000ffffffff R11: 0000000000000000 R12: 0000000000010000 R13: ffffd1b7026bf930 R14: ffff8c5ac8284f40 R15: 0000000000000000 FS: 00007fca40c37740(0000) GS:ffff8c5b74d7a000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007fca40e822a0 CR3: 0000000005ca0001 CR4: 0000000000172ef0 Call Trace: <TASK> tcf_classify+0x17d/0x5c0 tc_run+0x9d/0x150 __dev_queue_xmit+0x2ab/0x14d0 ip_finish_output2+0x340/0x8f0 ip_output+0xa4/0x250 raw_sendmsg+0x147d/0x14b0 __sys_sendto+0x1cc/0x1f0 __x64_sys_sendto+0x24/0x30 do_syscall_64+0x126/0xf80 entry_SYSCALL_64_after_hwframe+0x77/0x7f RIP: 0033:0x7fca40e822ba Code: d8 64 89 02 48 c7 c0 ff ff ff ff eb b8 0f 1f 00 f3 0f 1e fa 41 89 ca 64 8b 04 25 18 00 00 00 85 c0 75 15 b8 2c 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 7e c3 0f 1f 44 00 00 41 54 48 83 ec 30 44 89 RSP: 002b:00007ffc248a42c8 EFLAGS: 00000246 ORIG_RAX: 000000000000002c RAX: ffffffffffffffda RBX: 000055ef233289d0 RCX: 00007fca40e822ba RDX: 000000000000001e RSI: 000055ef23328c30 RDI: 0000000000000003 RBP: 000055ef233289d0 R08: 00007ffc248a42d0 R09: 0000000000000010 R10: 0000000000000000 R11: 0000000000000246 R12: 000000000000001e R13: 00000000000186a0 R14: 0000000000000000 R15: 00007fca41043000 </TASK> irq event stamp: 1045778 hardirqs last enabled at (1045784): [<ffffffff864ec042>] __up_console_sem+0x52/0x60 hardirqs last disabled at (1045789): [<ffffffff864ec027>] __up_console_sem+0x37/0x60 softirqs last enabled at (1045426): [<ffffffff874d48c7>] __alloc_skb+0x207/0x260 softirqs last disabled at (1045434): [<ffffffff874fe8f8>] __dev_queue_xmit+0x78/0x14d0 Then, because of the value in the packet's mark, dereference on 'q->handle' with NULL 'q' occurs: BUG: kernel NULL pointer dereference, address: 0000000000000038 [...] RIP: 0010:fw_classify+0x1fe/0x250 [cls_fw] [...] Skip "old-style" classification on shared blocks, so that the NULL dereference is fixed and WARN_ON() is not hit anymore in the short lifetime of invalid cls_fw "old-style" filters. [1] https://sashiko.dev/#/patchset/20260331050217.504278-1-xmei5%40asu.edu [2] https://elixir.bootlin.com/linux/v7.0-rc6/source/include/net/pkt_cls.h#L86 Fixes:faeea8bbf6("net/sched: cls_fw: fix NULL pointer dereference on shared blocks") Fixes:ed76f5edcc("net: sched: protect filter_chain list with filter_chain_lock mutex") Acked-by: Jamal Hadi Salim <jhs@mojatatu.com> Signed-off-by: Davide Caratti <dcaratti@redhat.com> Link: https://patch.msgid.link/e39cbd3103a337f1e515d186fe697b4459d24757.1775661704.git.dcaratti@redhat.com Signed-off-by: Jakub Kicinski <kuba@kernel.org>
466 lines
9.8 KiB
C
466 lines
9.8 KiB
C
// SPDX-License-Identifier: GPL-2.0-or-later
|
|
/*
|
|
* net/sched/cls_fw.c Classifier mapping ipchains' fwmark to traffic class.
|
|
*
|
|
* Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
|
|
*
|
|
* Changes:
|
|
* Karlis Peisenieks <karlis@mt.lv> : 990415 : fw_walk off by one
|
|
* Karlis Peisenieks <karlis@mt.lv> : 990415 : fw_delete killed all the filter (and kernel).
|
|
* Alex <alex@pilotsoft.com> : 2004xxyy: Added Action extension
|
|
*/
|
|
|
|
#include <linux/module.h>
|
|
#include <linux/slab.h>
|
|
#include <linux/types.h>
|
|
#include <linux/kernel.h>
|
|
#include <linux/string.h>
|
|
#include <linux/errno.h>
|
|
#include <linux/skbuff.h>
|
|
#include <net/netlink.h>
|
|
#include <net/act_api.h>
|
|
#include <net/pkt_cls.h>
|
|
#include <net/sch_generic.h>
|
|
#include <net/tc_wrapper.h>
|
|
|
|
#define HTSIZE 256
|
|
|
|
struct fw_head {
|
|
u32 mask;
|
|
struct fw_filter __rcu *ht[HTSIZE];
|
|
struct rcu_head rcu;
|
|
};
|
|
|
|
struct fw_filter {
|
|
struct fw_filter __rcu *next;
|
|
u32 id;
|
|
struct tcf_result res;
|
|
int ifindex;
|
|
struct tcf_exts exts;
|
|
struct tcf_proto *tp;
|
|
struct rcu_work rwork;
|
|
};
|
|
|
|
static u32 fw_hash(u32 handle)
|
|
{
|
|
handle ^= (handle >> 16);
|
|
handle ^= (handle >> 8);
|
|
return handle % HTSIZE;
|
|
}
|
|
|
|
TC_INDIRECT_SCOPE int fw_classify(struct sk_buff *skb,
|
|
const struct tcf_proto *tp,
|
|
struct tcf_result *res)
|
|
{
|
|
struct fw_head *head = rcu_dereference_bh(tp->root);
|
|
struct fw_filter *f;
|
|
int r;
|
|
u32 id = skb->mark;
|
|
|
|
if (head != NULL) {
|
|
id &= head->mask;
|
|
|
|
for (f = rcu_dereference_bh(head->ht[fw_hash(id)]); f;
|
|
f = rcu_dereference_bh(f->next)) {
|
|
if (f->id == id) {
|
|
*res = f->res;
|
|
if (!tcf_match_indev(skb, f->ifindex))
|
|
continue;
|
|
r = tcf_exts_exec(skb, &f->exts, res);
|
|
if (r < 0)
|
|
continue;
|
|
|
|
return r;
|
|
}
|
|
}
|
|
} else {
|
|
struct Qdisc *q;
|
|
|
|
/* Old method: classify the packet using its skb mark. */
|
|
if (tcf_block_shared(tp->chain->block))
|
|
return -1;
|
|
|
|
q = tcf_block_q(tp->chain->block);
|
|
if (id && (TC_H_MAJ(id) == 0 ||
|
|
!(TC_H_MAJ(id ^ q->handle)))) {
|
|
res->classid = id;
|
|
res->class = 0;
|
|
return 0;
|
|
}
|
|
}
|
|
|
|
return -1;
|
|
}
|
|
|
|
static void *fw_get(struct tcf_proto *tp, u32 handle)
|
|
{
|
|
struct fw_head *head = rtnl_dereference(tp->root);
|
|
struct fw_filter *f;
|
|
|
|
if (head == NULL)
|
|
return NULL;
|
|
|
|
f = rtnl_dereference(head->ht[fw_hash(handle)]);
|
|
for (; f; f = rtnl_dereference(f->next)) {
|
|
if (f->id == handle)
|
|
return f;
|
|
}
|
|
return NULL;
|
|
}
|
|
|
|
static int fw_init(struct tcf_proto *tp)
|
|
{
|
|
/* We don't allocate fw_head here, because in the old method
|
|
* we don't need it at all.
|
|
*/
|
|
return 0;
|
|
}
|
|
|
|
static void __fw_delete_filter(struct fw_filter *f)
|
|
{
|
|
tcf_exts_destroy(&f->exts);
|
|
tcf_exts_put_net(&f->exts);
|
|
kfree(f);
|
|
}
|
|
|
|
static void fw_delete_filter_work(struct work_struct *work)
|
|
{
|
|
struct fw_filter *f = container_of(to_rcu_work(work),
|
|
struct fw_filter,
|
|
rwork);
|
|
rtnl_lock();
|
|
__fw_delete_filter(f);
|
|
rtnl_unlock();
|
|
}
|
|
|
|
static void fw_destroy(struct tcf_proto *tp, bool rtnl_held,
|
|
struct netlink_ext_ack *extack)
|
|
{
|
|
struct fw_head *head = rtnl_dereference(tp->root);
|
|
struct fw_filter *f;
|
|
int h;
|
|
|
|
if (head == NULL)
|
|
return;
|
|
|
|
for (h = 0; h < HTSIZE; h++) {
|
|
while ((f = rtnl_dereference(head->ht[h])) != NULL) {
|
|
RCU_INIT_POINTER(head->ht[h],
|
|
rtnl_dereference(f->next));
|
|
tcf_unbind_filter(tp, &f->res);
|
|
if (tcf_exts_get_net(&f->exts))
|
|
tcf_queue_work(&f->rwork, fw_delete_filter_work);
|
|
else
|
|
__fw_delete_filter(f);
|
|
}
|
|
}
|
|
kfree_rcu(head, rcu);
|
|
}
|
|
|
|
static int fw_delete(struct tcf_proto *tp, void *arg, bool *last,
|
|
bool rtnl_held, struct netlink_ext_ack *extack)
|
|
{
|
|
struct fw_head *head = rtnl_dereference(tp->root);
|
|
struct fw_filter *f = arg;
|
|
struct fw_filter __rcu **fp;
|
|
struct fw_filter *pfp;
|
|
int ret = -EINVAL;
|
|
int h;
|
|
|
|
if (head == NULL || f == NULL)
|
|
goto out;
|
|
|
|
fp = &head->ht[fw_hash(f->id)];
|
|
|
|
for (pfp = rtnl_dereference(*fp); pfp;
|
|
fp = &pfp->next, pfp = rtnl_dereference(*fp)) {
|
|
if (pfp == f) {
|
|
RCU_INIT_POINTER(*fp, rtnl_dereference(f->next));
|
|
tcf_unbind_filter(tp, &f->res);
|
|
tcf_exts_get_net(&f->exts);
|
|
tcf_queue_work(&f->rwork, fw_delete_filter_work);
|
|
ret = 0;
|
|
break;
|
|
}
|
|
}
|
|
|
|
*last = true;
|
|
for (h = 0; h < HTSIZE; h++) {
|
|
if (rcu_access_pointer(head->ht[h])) {
|
|
*last = false;
|
|
break;
|
|
}
|
|
}
|
|
|
|
out:
|
|
return ret;
|
|
}
|
|
|
|
static const struct nla_policy fw_policy[TCA_FW_MAX + 1] = {
|
|
[TCA_FW_CLASSID] = { .type = NLA_U32 },
|
|
[TCA_FW_INDEV] = { .type = NLA_STRING, .len = IFNAMSIZ },
|
|
[TCA_FW_MASK] = { .type = NLA_U32 },
|
|
};
|
|
|
|
static int fw_set_parms(struct net *net, struct tcf_proto *tp,
|
|
struct fw_filter *f, struct nlattr **tb,
|
|
struct nlattr **tca, unsigned long base, u32 flags,
|
|
struct netlink_ext_ack *extack)
|
|
{
|
|
struct fw_head *head = rtnl_dereference(tp->root);
|
|
u32 mask;
|
|
int err;
|
|
|
|
err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &f->exts, flags,
|
|
extack);
|
|
if (err < 0)
|
|
return err;
|
|
|
|
if (tb[TCA_FW_INDEV]) {
|
|
int ret;
|
|
ret = tcf_change_indev(net, tb[TCA_FW_INDEV], extack);
|
|
if (ret < 0)
|
|
return ret;
|
|
f->ifindex = ret;
|
|
}
|
|
|
|
err = -EINVAL;
|
|
if (tb[TCA_FW_MASK]) {
|
|
mask = nla_get_u32(tb[TCA_FW_MASK]);
|
|
if (mask != head->mask)
|
|
return err;
|
|
} else if (head->mask != 0xFFFFFFFF)
|
|
return err;
|
|
|
|
if (tb[TCA_FW_CLASSID]) {
|
|
f->res.classid = nla_get_u32(tb[TCA_FW_CLASSID]);
|
|
tcf_bind_filter(tp, &f->res, base);
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
static int fw_change(struct net *net, struct sk_buff *in_skb,
|
|
struct tcf_proto *tp, unsigned long base,
|
|
u32 handle, struct nlattr **tca, void **arg,
|
|
u32 flags, struct netlink_ext_ack *extack)
|
|
{
|
|
struct fw_head *head = rtnl_dereference(tp->root);
|
|
struct fw_filter *f = *arg;
|
|
struct nlattr *opt = tca[TCA_OPTIONS];
|
|
struct nlattr *tb[TCA_FW_MAX + 1];
|
|
int err;
|
|
|
|
if (!opt) {
|
|
if (handle)
|
|
return -EINVAL;
|
|
|
|
if (tcf_block_shared(tp->chain->block)) {
|
|
NL_SET_ERR_MSG(extack,
|
|
"Must specify mark when attaching fw filter to block");
|
|
return -EINVAL;
|
|
}
|
|
|
|
return 0; /* Succeed if it is old method. */
|
|
}
|
|
|
|
err = nla_parse_nested_deprecated(tb, TCA_FW_MAX, opt, fw_policy,
|
|
NULL);
|
|
if (err < 0)
|
|
return err;
|
|
|
|
if (f) {
|
|
struct fw_filter *pfp, *fnew;
|
|
struct fw_filter __rcu **fp;
|
|
|
|
if (f->id != handle && handle)
|
|
return -EINVAL;
|
|
|
|
fnew = kzalloc_obj(struct fw_filter);
|
|
if (!fnew)
|
|
return -ENOBUFS;
|
|
|
|
fnew->id = f->id;
|
|
fnew->ifindex = f->ifindex;
|
|
fnew->tp = f->tp;
|
|
|
|
err = tcf_exts_init(&fnew->exts, net, TCA_FW_ACT,
|
|
TCA_FW_POLICE);
|
|
if (err < 0) {
|
|
kfree(fnew);
|
|
return err;
|
|
}
|
|
|
|
err = fw_set_parms(net, tp, fnew, tb, tca, base, flags, extack);
|
|
if (err < 0) {
|
|
tcf_exts_destroy(&fnew->exts);
|
|
kfree(fnew);
|
|
return err;
|
|
}
|
|
|
|
fp = &head->ht[fw_hash(fnew->id)];
|
|
for (pfp = rtnl_dereference(*fp); pfp;
|
|
fp = &pfp->next, pfp = rtnl_dereference(*fp))
|
|
if (pfp == f)
|
|
break;
|
|
|
|
RCU_INIT_POINTER(fnew->next, rtnl_dereference(pfp->next));
|
|
rcu_assign_pointer(*fp, fnew);
|
|
tcf_unbind_filter(tp, &f->res);
|
|
tcf_exts_get_net(&f->exts);
|
|
tcf_queue_work(&f->rwork, fw_delete_filter_work);
|
|
|
|
*arg = fnew;
|
|
return err;
|
|
}
|
|
|
|
if (!handle)
|
|
return -EINVAL;
|
|
|
|
if (!head) {
|
|
u32 mask = 0xFFFFFFFF;
|
|
if (tb[TCA_FW_MASK])
|
|
mask = nla_get_u32(tb[TCA_FW_MASK]);
|
|
|
|
head = kzalloc_obj(*head);
|
|
if (!head)
|
|
return -ENOBUFS;
|
|
head->mask = mask;
|
|
|
|
rcu_assign_pointer(tp->root, head);
|
|
}
|
|
|
|
f = kzalloc_obj(struct fw_filter);
|
|
if (f == NULL)
|
|
return -ENOBUFS;
|
|
|
|
err = tcf_exts_init(&f->exts, net, TCA_FW_ACT, TCA_FW_POLICE);
|
|
if (err < 0)
|
|
goto errout;
|
|
f->id = handle;
|
|
f->tp = tp;
|
|
|
|
err = fw_set_parms(net, tp, f, tb, tca, base, flags, extack);
|
|
if (err < 0)
|
|
goto errout;
|
|
|
|
RCU_INIT_POINTER(f->next, head->ht[fw_hash(handle)]);
|
|
rcu_assign_pointer(head->ht[fw_hash(handle)], f);
|
|
|
|
*arg = f;
|
|
return 0;
|
|
|
|
errout:
|
|
tcf_exts_destroy(&f->exts);
|
|
kfree(f);
|
|
return err;
|
|
}
|
|
|
|
static void fw_walk(struct tcf_proto *tp, struct tcf_walker *arg,
|
|
bool rtnl_held)
|
|
{
|
|
struct fw_head *head = rtnl_dereference(tp->root);
|
|
int h;
|
|
|
|
if (head == NULL)
|
|
arg->stop = 1;
|
|
|
|
if (arg->stop)
|
|
return;
|
|
|
|
for (h = 0; h < HTSIZE; h++) {
|
|
struct fw_filter *f;
|
|
|
|
for (f = rtnl_dereference(head->ht[h]); f;
|
|
f = rtnl_dereference(f->next)) {
|
|
if (!tc_cls_stats_dump(tp, arg, f))
|
|
return;
|
|
}
|
|
}
|
|
}
|
|
|
|
static int fw_dump(struct net *net, struct tcf_proto *tp, void *fh,
|
|
struct sk_buff *skb, struct tcmsg *t, bool rtnl_held)
|
|
{
|
|
struct fw_head *head = rtnl_dereference(tp->root);
|
|
struct fw_filter *f = fh;
|
|
struct nlattr *nest;
|
|
|
|
if (f == NULL)
|
|
return skb->len;
|
|
|
|
t->tcm_handle = f->id;
|
|
|
|
if (!f->res.classid && !tcf_exts_has_actions(&f->exts))
|
|
return skb->len;
|
|
|
|
nest = nla_nest_start_noflag(skb, TCA_OPTIONS);
|
|
if (nest == NULL)
|
|
goto nla_put_failure;
|
|
|
|
if (f->res.classid &&
|
|
nla_put_u32(skb, TCA_FW_CLASSID, f->res.classid))
|
|
goto nla_put_failure;
|
|
if (f->ifindex) {
|
|
struct net_device *dev;
|
|
dev = __dev_get_by_index(net, f->ifindex);
|
|
if (dev && nla_put_string(skb, TCA_FW_INDEV, dev->name))
|
|
goto nla_put_failure;
|
|
}
|
|
if (head->mask != 0xFFFFFFFF &&
|
|
nla_put_u32(skb, TCA_FW_MASK, head->mask))
|
|
goto nla_put_failure;
|
|
|
|
if (tcf_exts_dump(skb, &f->exts) < 0)
|
|
goto nla_put_failure;
|
|
|
|
nla_nest_end(skb, nest);
|
|
|
|
if (tcf_exts_dump_stats(skb, &f->exts) < 0)
|
|
goto nla_put_failure;
|
|
|
|
return skb->len;
|
|
|
|
nla_put_failure:
|
|
nla_nest_cancel(skb, nest);
|
|
return -1;
|
|
}
|
|
|
|
static void fw_bind_class(void *fh, u32 classid, unsigned long cl, void *q,
|
|
unsigned long base)
|
|
{
|
|
struct fw_filter *f = fh;
|
|
|
|
tc_cls_bind_class(classid, cl, q, &f->res, base);
|
|
}
|
|
|
|
static struct tcf_proto_ops cls_fw_ops __read_mostly = {
|
|
.kind = "fw",
|
|
.classify = fw_classify,
|
|
.init = fw_init,
|
|
.destroy = fw_destroy,
|
|
.get = fw_get,
|
|
.change = fw_change,
|
|
.delete = fw_delete,
|
|
.walk = fw_walk,
|
|
.dump = fw_dump,
|
|
.bind_class = fw_bind_class,
|
|
.owner = THIS_MODULE,
|
|
};
|
|
MODULE_ALIAS_NET_CLS("fw");
|
|
|
|
static int __init init_fw(void)
|
|
{
|
|
return register_tcf_proto_ops(&cls_fw_ops);
|
|
}
|
|
|
|
static void __exit exit_fw(void)
|
|
{
|
|
unregister_tcf_proto_ops(&cls_fw_ops);
|
|
}
|
|
|
|
module_init(init_fw)
|
|
module_exit(exit_fw)
|
|
MODULE_DESCRIPTION("SKB mark based TC classifier");
|
|
MODULE_LICENSE("GPL");
|