mirror of
https://github.com/torvalds/linux.git
synced 2026-05-29 17:43:52 +02:00
Following typical script is extremely disruptive,
because each graft operation calls dev_deactivate()
which resets all the queues of the device.
QPARAM="limit 100000 flow_limit 1000 buckets 4096"
TXQS=64
for ETH in eth1
do
tc qd del dev $ETH root 2>/dev/null
tc qd add dev $ETH root handle 1: mq
for i in `seq 1 $TXQS`
do
slot=$( printf %x $(( i )) )
tc qd add dev $ETH parent 1:$slot fq $QPARAM
done
done
One can add "ip link set dev $ETH down/up" to reduce the disruption time:
QPARAM="limit 100000 flow_limit 1000 buckets 4096"
TXQS=64
for ETH in eth1
do
ip link set dev $ETH down
tc qd del dev $ETH root 2>/dev/null
tc qd add dev $ETH root handle 1: mq
for i in `seq 1 $TXQS`
do
slot=$( printf %x $(( i )) )
tc qd add dev $ETH parent 1:$slot fq $QPARAM
done
ip link set dev $ETH up
done
Or we can add a @reset_needed flag to dev_deactivate() and
dev_deactivate_many().
This flag is set to true at device dismantle or linkwatch_do_dev(),
and to false for graft operations.
In the future, we might only stop one queue instead of the whole
device, ie call dev_deactivate_queue() instead of dev_deactivate().
I think the problem (quadratic behavior) was added in commit
2fb541c862 ("net: sch_generic: aviod concurrent reset and enqueue op
for lockless qdisc") but this does not look serious enough to deserve
risky backports.
Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Yunsheng Lin <linyunsheng@huawei.com>
Reviewed-by: Jamal Hadi Salim <jhs@mojatatu.com>
Reviewed-by: Toke Høiland-Jørgensen <toke@redhat.com>
Reviewed-by: Victor Nogueira <victor@mojatatu.com>
Link: https://patch.msgid.link/20260307163430.470644-1-edumazet@google.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
302 lines
7.6 KiB
C
302 lines
7.6 KiB
C
// SPDX-License-Identifier: GPL-2.0-only
|
|
/*
|
|
* net/sched/sch_mq.c Classful multiqueue dummy scheduler
|
|
*
|
|
* Copyright (c) 2009 Patrick McHardy <kaber@trash.net>
|
|
*/
|
|
|
|
#include <linux/types.h>
|
|
#include <linux/slab.h>
|
|
#include <linux/kernel.h>
|
|
#include <linux/export.h>
|
|
#include <linux/string.h>
|
|
#include <linux/errno.h>
|
|
#include <linux/skbuff.h>
|
|
#include <net/netlink.h>
|
|
#include <net/pkt_cls.h>
|
|
#include <net/pkt_sched.h>
|
|
#include <net/sch_priv.h>
|
|
|
|
static int mq_offload(struct Qdisc *sch, enum tc_mq_command cmd)
|
|
{
|
|
struct net_device *dev = qdisc_dev(sch);
|
|
struct tc_mq_qopt_offload opt = {
|
|
.command = cmd,
|
|
.handle = sch->handle,
|
|
};
|
|
|
|
if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc)
|
|
return -EOPNOTSUPP;
|
|
|
|
return dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_MQ, &opt);
|
|
}
|
|
|
|
static int mq_offload_stats(struct Qdisc *sch)
|
|
{
|
|
struct tc_mq_qopt_offload opt = {
|
|
.command = TC_MQ_STATS,
|
|
.handle = sch->handle,
|
|
.stats = {
|
|
.bstats = &sch->bstats,
|
|
.qstats = &sch->qstats,
|
|
},
|
|
};
|
|
|
|
return qdisc_offload_dump_helper(sch, TC_SETUP_QDISC_MQ, &opt);
|
|
}
|
|
|
|
void mq_destroy_common(struct Qdisc *sch)
|
|
{
|
|
struct net_device *dev = qdisc_dev(sch);
|
|
struct mq_sched *priv = qdisc_priv(sch);
|
|
unsigned int ntx;
|
|
|
|
if (!priv->qdiscs)
|
|
return;
|
|
for (ntx = 0; ntx < dev->num_tx_queues && priv->qdiscs[ntx]; ntx++)
|
|
qdisc_put(priv->qdiscs[ntx]);
|
|
kfree(priv->qdiscs);
|
|
}
|
|
EXPORT_SYMBOL_NS_GPL(mq_destroy_common, "NET_SCHED_INTERNAL");
|
|
|
|
static void mq_destroy(struct Qdisc *sch)
|
|
{
|
|
mq_offload(sch, TC_MQ_DESTROY);
|
|
mq_destroy_common(sch);
|
|
}
|
|
|
|
int mq_init_common(struct Qdisc *sch, struct nlattr *opt,
|
|
struct netlink_ext_ack *extack,
|
|
const struct Qdisc_ops *qdisc_ops)
|
|
{
|
|
struct net_device *dev = qdisc_dev(sch);
|
|
struct mq_sched *priv = qdisc_priv(sch);
|
|
struct netdev_queue *dev_queue;
|
|
struct Qdisc *qdisc;
|
|
unsigned int ntx;
|
|
|
|
if (sch->parent != TC_H_ROOT)
|
|
return -EOPNOTSUPP;
|
|
|
|
if (!netif_is_multiqueue(dev))
|
|
return -EOPNOTSUPP;
|
|
|
|
/* pre-allocate qdiscs, attachment can't fail */
|
|
priv->qdiscs = kzalloc_objs(priv->qdiscs[0], dev->num_tx_queues);
|
|
if (!priv->qdiscs)
|
|
return -ENOMEM;
|
|
|
|
for (ntx = 0; ntx < dev->num_tx_queues; ntx++) {
|
|
dev_queue = netdev_get_tx_queue(dev, ntx);
|
|
qdisc = qdisc_create_dflt(dev_queue,
|
|
qdisc_ops ?: get_default_qdisc_ops(dev, ntx),
|
|
TC_H_MAKE(TC_H_MAJ(sch->handle),
|
|
TC_H_MIN(ntx + 1)),
|
|
extack);
|
|
if (!qdisc)
|
|
return -ENOMEM;
|
|
priv->qdiscs[ntx] = qdisc;
|
|
qdisc->flags |= TCQ_F_ONETXQUEUE | TCQ_F_NOPARENT;
|
|
}
|
|
|
|
sch->flags |= TCQ_F_MQROOT;
|
|
return 0;
|
|
}
|
|
EXPORT_SYMBOL_NS_GPL(mq_init_common, "NET_SCHED_INTERNAL");
|
|
|
|
static int mq_init(struct Qdisc *sch, struct nlattr *opt,
|
|
struct netlink_ext_ack *extack)
|
|
{
|
|
int ret;
|
|
|
|
ret = mq_init_common(sch, opt, extack, NULL);
|
|
if (ret)
|
|
return ret;
|
|
|
|
mq_offload(sch, TC_MQ_CREATE);
|
|
return 0;
|
|
}
|
|
|
|
void mq_attach(struct Qdisc *sch)
|
|
{
|
|
struct net_device *dev = qdisc_dev(sch);
|
|
struct mq_sched *priv = qdisc_priv(sch);
|
|
struct Qdisc *qdisc, *old;
|
|
unsigned int ntx;
|
|
|
|
for (ntx = 0; ntx < dev->num_tx_queues; ntx++) {
|
|
qdisc = priv->qdiscs[ntx];
|
|
old = dev_graft_qdisc(qdisc->dev_queue, qdisc);
|
|
if (old)
|
|
qdisc_put(old);
|
|
#ifdef CONFIG_NET_SCHED
|
|
if (ntx < dev->real_num_tx_queues)
|
|
qdisc_hash_add(qdisc, false);
|
|
#endif
|
|
|
|
}
|
|
kfree(priv->qdiscs);
|
|
priv->qdiscs = NULL;
|
|
}
|
|
EXPORT_SYMBOL_NS_GPL(mq_attach, "NET_SCHED_INTERNAL");
|
|
|
|
void mq_dump_common(struct Qdisc *sch, struct sk_buff *skb)
|
|
{
|
|
struct net_device *dev = qdisc_dev(sch);
|
|
struct Qdisc *qdisc;
|
|
unsigned int ntx;
|
|
|
|
sch->q.qlen = 0;
|
|
gnet_stats_basic_sync_init(&sch->bstats);
|
|
memset(&sch->qstats, 0, sizeof(sch->qstats));
|
|
|
|
/* MQ supports lockless qdiscs. However, statistics accounting needs
|
|
* to account for all, none, or a mix of locked and unlocked child
|
|
* qdiscs. Percpu stats are added to counters in-band and locking
|
|
* qdisc totals are added at end.
|
|
*/
|
|
for (ntx = 0; ntx < dev->num_tx_queues; ntx++) {
|
|
qdisc = rtnl_dereference(netdev_get_tx_queue(dev, ntx)->qdisc_sleeping);
|
|
spin_lock_bh(qdisc_lock(qdisc));
|
|
|
|
gnet_stats_add_basic(&sch->bstats, qdisc->cpu_bstats,
|
|
&qdisc->bstats, false);
|
|
gnet_stats_add_queue(&sch->qstats, qdisc->cpu_qstats,
|
|
&qdisc->qstats);
|
|
sch->q.qlen += qdisc_qlen(qdisc);
|
|
|
|
spin_unlock_bh(qdisc_lock(qdisc));
|
|
}
|
|
}
|
|
EXPORT_SYMBOL_NS_GPL(mq_dump_common, "NET_SCHED_INTERNAL");
|
|
|
|
static int mq_dump(struct Qdisc *sch, struct sk_buff *skb)
|
|
{
|
|
mq_dump_common(sch, skb);
|
|
return mq_offload_stats(sch);
|
|
}
|
|
|
|
static struct netdev_queue *mq_queue_get(struct Qdisc *sch, unsigned long cl)
|
|
{
|
|
struct net_device *dev = qdisc_dev(sch);
|
|
unsigned long ntx = cl - 1;
|
|
|
|
if (ntx >= dev->num_tx_queues)
|
|
return NULL;
|
|
return netdev_get_tx_queue(dev, ntx);
|
|
}
|
|
|
|
struct netdev_queue *mq_select_queue(struct Qdisc *sch,
|
|
struct tcmsg *tcm)
|
|
{
|
|
return mq_queue_get(sch, TC_H_MIN(tcm->tcm_parent));
|
|
}
|
|
EXPORT_SYMBOL_NS_GPL(mq_select_queue, "NET_SCHED_INTERNAL");
|
|
|
|
static int mq_graft(struct Qdisc *sch, unsigned long cl, struct Qdisc *new,
|
|
struct Qdisc **old, struct netlink_ext_ack *extack)
|
|
{
|
|
struct netdev_queue *dev_queue = mq_queue_get(sch, cl);
|
|
struct tc_mq_qopt_offload graft_offload;
|
|
struct net_device *dev = qdisc_dev(sch);
|
|
|
|
if (dev->flags & IFF_UP)
|
|
dev_deactivate(dev, false);
|
|
|
|
*old = dev_graft_qdisc(dev_queue, new);
|
|
if (new)
|
|
new->flags |= TCQ_F_ONETXQUEUE | TCQ_F_NOPARENT;
|
|
if (dev->flags & IFF_UP)
|
|
dev_activate(dev);
|
|
|
|
graft_offload.handle = sch->handle;
|
|
graft_offload.graft_params.queue = cl - 1;
|
|
graft_offload.graft_params.child_handle = new ? new->handle : 0;
|
|
graft_offload.command = TC_MQ_GRAFT;
|
|
|
|
qdisc_offload_graft_helper(qdisc_dev(sch), sch, new, *old,
|
|
TC_SETUP_QDISC_MQ, &graft_offload, extack);
|
|
return 0;
|
|
}
|
|
|
|
struct Qdisc *mq_leaf(struct Qdisc *sch, unsigned long cl)
|
|
{
|
|
struct netdev_queue *dev_queue = mq_queue_get(sch, cl);
|
|
|
|
return rtnl_dereference(dev_queue->qdisc_sleeping);
|
|
}
|
|
EXPORT_SYMBOL_NS_GPL(mq_leaf, "NET_SCHED_INTERNAL");
|
|
|
|
unsigned long mq_find(struct Qdisc *sch, u32 classid)
|
|
{
|
|
unsigned int ntx = TC_H_MIN(classid);
|
|
|
|
if (!mq_queue_get(sch, ntx))
|
|
return 0;
|
|
return ntx;
|
|
}
|
|
EXPORT_SYMBOL_NS_GPL(mq_find, "NET_SCHED_INTERNAL");
|
|
|
|
int mq_dump_class(struct Qdisc *sch, unsigned long cl,
|
|
struct sk_buff *skb, struct tcmsg *tcm)
|
|
{
|
|
struct netdev_queue *dev_queue = mq_queue_get(sch, cl);
|
|
|
|
tcm->tcm_parent = TC_H_ROOT;
|
|
tcm->tcm_handle |= TC_H_MIN(cl);
|
|
tcm->tcm_info = rtnl_dereference(dev_queue->qdisc_sleeping)->handle;
|
|
return 0;
|
|
}
|
|
EXPORT_SYMBOL_NS_GPL(mq_dump_class, "NET_SCHED_INTERNAL");
|
|
|
|
int mq_dump_class_stats(struct Qdisc *sch, unsigned long cl,
|
|
struct gnet_dump *d)
|
|
{
|
|
struct netdev_queue *dev_queue = mq_queue_get(sch, cl);
|
|
|
|
sch = rtnl_dereference(dev_queue->qdisc_sleeping);
|
|
if (gnet_stats_copy_basic(d, sch->cpu_bstats, &sch->bstats, true) < 0 ||
|
|
qdisc_qstats_copy(d, sch) < 0)
|
|
return -1;
|
|
return 0;
|
|
}
|
|
EXPORT_SYMBOL_NS_GPL(mq_dump_class_stats, "NET_SCHED_INTERNAL");
|
|
|
|
void mq_walk(struct Qdisc *sch, struct qdisc_walker *arg)
|
|
{
|
|
struct net_device *dev = qdisc_dev(sch);
|
|
unsigned int ntx;
|
|
|
|
if (arg->stop)
|
|
return;
|
|
|
|
arg->count = arg->skip;
|
|
for (ntx = arg->skip; ntx < dev->num_tx_queues; ntx++) {
|
|
if (!tc_qdisc_stats_dump(sch, ntx + 1, arg))
|
|
break;
|
|
}
|
|
}
|
|
EXPORT_SYMBOL_NS_GPL(mq_walk, "NET_SCHED_INTERNAL");
|
|
|
|
static const struct Qdisc_class_ops mq_class_ops = {
|
|
.select_queue = mq_select_queue,
|
|
.graft = mq_graft,
|
|
.leaf = mq_leaf,
|
|
.find = mq_find,
|
|
.walk = mq_walk,
|
|
.dump = mq_dump_class,
|
|
.dump_stats = mq_dump_class_stats,
|
|
};
|
|
|
|
struct Qdisc_ops mq_qdisc_ops __read_mostly = {
|
|
.cl_ops = &mq_class_ops,
|
|
.id = "mq",
|
|
.priv_size = sizeof(struct mq_sched),
|
|
.init = mq_init,
|
|
.destroy = mq_destroy,
|
|
.attach = mq_attach,
|
|
.change_real_num_tx = mq_change_real_num_tx,
|
|
.dump = mq_dump,
|
|
.owner = THIS_MODULE,
|
|
};
|