Merge branch 'dev_watchdog-less-intrusive'

Eric Dumazet says:

====================
net: make dev_watchdog() less intrusive

dev_watchdog() is used on many NIC to periodically monitor TX queues
to detect hangs.

Problem is : It stops all queues, then check them, then 'unfreeze' them.

Not only this stops feeding the NIC, it also migrates all qdiscs
to be serviced on the cpu calling netif_tx_unlock(), causing
a potential latency artifact.

With many TX queues, this is becoming more visible.
====================

Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
David S. Miller 2021-11-17 14:56:16 +00:00
commit 17a7555bf2
15 changed files with 94 additions and 70 deletions

View File

@ -869,7 +869,7 @@ static void xgene_enet_timeout(struct net_device *ndev, unsigned int txqueue)
for (i = 0; i < pdata->txq_cnt; i++) {
txq = netdev_get_tx_queue(ndev, i);
txq->trans_start = jiffies;
txq_trans_cond_update(txq);
netif_tx_start_queue(txq);
}
}

View File

@ -766,7 +766,7 @@ static bool ag71xx_check_dma_stuck(struct ag71xx *ag)
unsigned long timestamp;
u32 rx_sm, tx_sm, rx_fd;
timestamp = netdev_get_tx_queue(ag->ndev, 0)->trans_start;
timestamp = READ_ONCE(netdev_get_tx_queue(ag->ndev, 0)->trans_start);
if (likely(time_before(jiffies, timestamp + HZ / 10)))
return false;

View File

@ -2325,7 +2325,7 @@ dpaa_start_xmit(struct sk_buff *skb, struct net_device *net_dev)
txq = netdev_get_tx_queue(net_dev, queue_mapping);
/* LLTX requires to do our own update of trans_start */
txq->trans_start = jiffies;
txq_trans_cond_update(txq);
if (priv->tx_tstamp && skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP) {
fd.cmd |= cpu_to_be32(FM_FD_CMD_UPD);
@ -2531,7 +2531,7 @@ static int dpaa_xdp_xmit_frame(struct net_device *net_dev,
/* Bump the trans_start */
txq = netdev_get_tx_queue(net_dev, smp_processor_id());
txq->trans_start = jiffies;
txq_trans_cond_update(txq);
err = dpaa_xmit(priv, percpu_stats, smp_processor_id(), &fd);
if (err) {

View File

@ -2679,7 +2679,7 @@ static bool hns3_get_tx_timeo_queue_info(struct net_device *ndev)
unsigned long trans_start;
q = netdev_get_tx_queue(ndev, i);
trans_start = q->trans_start;
trans_start = READ_ONCE(q->trans_start);
if (netif_xmit_stopped(q) &&
time_after(jiffies,
(trans_start + ndev->watchdog_timeo))) {

View File

@ -2058,7 +2058,7 @@ static netdev_tx_t ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev)
tx_packets++;
tx_bytes += skb->len;
txq->trans_start = jiffies;
txq_trans_cond_update(txq);
ret = NETDEV_TX_OK;
goto out;

View File

@ -2927,7 +2927,7 @@ static int igb_xdp_xmit_back(struct igb_adapter *adapter, struct xdp_buff *xdp)
nq = txring_txq(tx_ring);
__netif_tx_lock(nq, cpu);
/* Avoid transmit queue timeout since we share it with the slow path */
nq->trans_start = jiffies;
txq_trans_cond_update(nq);
ret = igb_xmit_xdp_ring(adapter, tx_ring, xdpf);
__netif_tx_unlock(nq);
@ -2961,7 +2961,7 @@ static int igb_xdp_xmit(struct net_device *dev, int n,
__netif_tx_lock(nq, cpu);
/* Avoid transmit queue timeout since we share it with the slow path */
nq->trans_start = jiffies;
txq_trans_cond_update(nq);
for (i = 0; i < n; i++) {
struct xdp_frame *xdpf = frames[i];

View File

@ -565,7 +565,7 @@ int mlx5e_reporter_tx_timeout(struct mlx5e_txqsq *sq)
snprintf(err_str, sizeof(err_str),
"TX timeout on queue: %d, SQ: 0x%x, CQ: 0x%x, SQ Cons: 0x%x SQ Prod: 0x%x, usecs since last trans: %u",
sq->ch_ix, sq->sqn, sq->cq.mcq.cqn, sq->cc, sq->pc,
jiffies_to_usecs(jiffies - sq->txq->trans_start));
jiffies_to_usecs(jiffies - READ_ONCE(sq->txq->trans_start)));
mlx5e_health_report(priv, priv->tx_reporter, err_str, &err_ctx);
return to_ctx.status;

View File

@ -2356,7 +2356,7 @@ static bool stmmac_xdp_xmit_zc(struct stmmac_priv *priv, u32 queue, u32 budget)
bool work_done = true;
/* Avoids TX time-out as we are sharing with slow path */
nq->trans_start = jiffies;
txq_trans_cond_update(nq->trans_start);
budget = min(budget, stmmac_tx_avail(priv, queue));
@ -4657,7 +4657,7 @@ static int stmmac_xdp_xmit_back(struct stmmac_priv *priv,
__netif_tx_lock(nq, cpu);
/* Avoids TX time-out as we are sharing with slow path */
nq->trans_start = jiffies;
txq_trans_cond_update(nq->trans_start);
res = stmmac_xdp_xmit_xdpf(priv, queue, xdpf, false);
if (res == STMMAC_XDP_TX)
@ -6293,7 +6293,7 @@ static int stmmac_xdp_xmit(struct net_device *dev, int num_frames,
__netif_tx_lock(nq, cpu);
/* Avoids TX time-out as we are sharing with slow path */
nq->trans_start = jiffies;
txq_trans_cond_update(nq);
for (i = 0; i < num_frames; i++) {
int res;

View File

@ -345,7 +345,7 @@ static void am65_cpsw_nuss_ndo_host_tx_timeout(struct net_device *ndev,
netif_txq = netdev_get_tx_queue(ndev, txqueue);
tx_chn = &common->tx_chns[txqueue];
trans_start = netif_txq->trans_start;
trans_start = READ_ONCE(netif_txq->trans_start);
netdev_err(ndev, "txq:%d DRV_XOFF:%d tmo:%u dql_avail:%d free_desc:%zu\n",
txqueue,

View File

@ -2694,7 +2694,7 @@ static void virtnet_tx_timeout(struct net_device *dev, unsigned int txqueue)
netdev_err(dev, "TX timeout on queue: %u, sq: %s, vq: 0x%x, name: %s, %u usecs ago\n",
txqueue, sq->name, sq->vq->index, sq->vq->name,
jiffies_to_usecs(jiffies - txq->trans_start));
jiffies_to_usecs(jiffies - READ_ONCE(txq->trans_start)));
}
static const struct net_device_ops virtnet_netdev = {

View File

@ -332,7 +332,7 @@ void mwifiex_set_trans_start(struct net_device *dev)
int i;
for (i = 0; i < dev->num_tx_queues; i++)
netdev_get_tx_queue(dev, i)->trans_start = jiffies;
txq_trans_cond_update(netdev_get_tx_queue(dev, i));
netif_trans_update(dev);
}

View File

@ -2515,7 +2515,7 @@ void rtllib_stop_all_queues(struct rtllib_device *ieee)
unsigned int i;
for (i = 0; i < ieee->dev->num_tx_queues; i++)
netdev_get_tx_queue(ieee->dev, i)->trans_start = jiffies;
txq_trans_cond_update(netdev_get_tx_queue(ieee->dev, i));
netif_tx_stop_all_queues(ieee->dev);
}

View File

@ -592,7 +592,7 @@ struct netdev_queue {
* Number of TX timeouts for this queue
* (/sys/class/net/DEV/Q/trans_timeout)
*/
unsigned long trans_timeout;
atomic_long_t trans_timeout;
/* Subordinate device that the queue has been assigned to */
struct net_device *sb_dev;
@ -4095,10 +4095,21 @@ static inline void __netif_tx_unlock_bh(struct netdev_queue *txq)
spin_unlock_bh(&txq->_xmit_lock);
}
/*
* txq->trans_start can be read locklessly from dev_watchdog()
*/
static inline void txq_trans_update(struct netdev_queue *txq)
{
if (txq->xmit_lock_owner != -1)
txq->trans_start = jiffies;
WRITE_ONCE(txq->trans_start, jiffies);
}
static inline void txq_trans_cond_update(struct netdev_queue *txq)
{
unsigned long now = jiffies;
if (READ_ONCE(txq->trans_start) != now)
WRITE_ONCE(txq->trans_start, now);
}
/* legacy drivers only, netdev_start_xmit() sets txq->trans_start */
@ -4106,8 +4117,7 @@ static inline void netif_trans_update(struct net_device *dev)
{
struct netdev_queue *txq = netdev_get_tx_queue(dev, 0);
if (txq->trans_start != jiffies)
txq->trans_start = jiffies;
txq_trans_cond_update(txq);
}
/**
@ -4116,27 +4126,7 @@ static inline void netif_trans_update(struct net_device *dev)
*
* Get network device transmit lock
*/
static inline void netif_tx_lock(struct net_device *dev)
{
unsigned int i;
int cpu;
spin_lock(&dev->tx_global_lock);
cpu = smp_processor_id();
for (i = 0; i < dev->num_tx_queues; i++) {
struct netdev_queue *txq = netdev_get_tx_queue(dev, i);
/* We are the only thread of execution doing a
* freeze, but we have to grab the _xmit_lock in
* order to synchronize with threads which are in
* the ->hard_start_xmit() handler and already
* checked the frozen bit.
*/
__netif_tx_lock(txq, cpu);
set_bit(__QUEUE_STATE_FROZEN, &txq->state);
__netif_tx_unlock(txq);
}
}
void netif_tx_lock(struct net_device *dev);
static inline void netif_tx_lock_bh(struct net_device *dev)
{
@ -4144,22 +4134,7 @@ static inline void netif_tx_lock_bh(struct net_device *dev)
netif_tx_lock(dev);
}
static inline void netif_tx_unlock(struct net_device *dev)
{
unsigned int i;
for (i = 0; i < dev->num_tx_queues; i++) {
struct netdev_queue *txq = netdev_get_tx_queue(dev, i);
/* No need to grab the _xmit_lock here. If the
* queue is not stopped for another reason, we
* force a schedule.
*/
clear_bit(__QUEUE_STATE_FROZEN, &txq->state);
netif_schedule_queue(txq);
}
spin_unlock(&dev->tx_global_lock);
}
void netif_tx_unlock(struct net_device *dev);
static inline void netif_tx_unlock_bh(struct net_device *dev)
{

View File

@ -1201,11 +1201,7 @@ static const struct sysfs_ops netdev_queue_sysfs_ops = {
static ssize_t tx_timeout_show(struct netdev_queue *queue, char *buf)
{
unsigned long trans_timeout;
spin_lock_irq(&queue->_xmit_lock);
trans_timeout = queue->trans_timeout;
spin_unlock_irq(&queue->_xmit_lock);
unsigned long trans_timeout = atomic_long_read(&queue->trans_timeout);
return sprintf(buf, fmt_ulong, trans_timeout);
}

View File

@ -434,9 +434,9 @@ unsigned long dev_trans_start(struct net_device *dev)
dev = vlan_dev_real_dev(dev);
else if (netif_is_macvlan(dev))
dev = macvlan_dev_real_dev(dev);
res = netdev_get_tx_queue(dev, 0)->trans_start;
res = READ_ONCE(netdev_get_tx_queue(dev, 0)->trans_start);
for (i = 1; i < dev->num_tx_queues; i++) {
val = netdev_get_tx_queue(dev, i)->trans_start;
val = READ_ONCE(netdev_get_tx_queue(dev, i)->trans_start);
if (val && time_after(val, res))
res = val;
}
@ -445,11 +445,62 @@ unsigned long dev_trans_start(struct net_device *dev)
}
EXPORT_SYMBOL(dev_trans_start);
static void netif_freeze_queues(struct net_device *dev)
{
unsigned int i;
int cpu;
cpu = smp_processor_id();
for (i = 0; i < dev->num_tx_queues; i++) {
struct netdev_queue *txq = netdev_get_tx_queue(dev, i);
/* We are the only thread of execution doing a
* freeze, but we have to grab the _xmit_lock in
* order to synchronize with threads which are in
* the ->hard_start_xmit() handler and already
* checked the frozen bit.
*/
__netif_tx_lock(txq, cpu);
set_bit(__QUEUE_STATE_FROZEN, &txq->state);
__netif_tx_unlock(txq);
}
}
void netif_tx_lock(struct net_device *dev)
{
spin_lock(&dev->tx_global_lock);
netif_freeze_queues(dev);
}
EXPORT_SYMBOL(netif_tx_lock);
static void netif_unfreeze_queues(struct net_device *dev)
{
unsigned int i;
for (i = 0; i < dev->num_tx_queues; i++) {
struct netdev_queue *txq = netdev_get_tx_queue(dev, i);
/* No need to grab the _xmit_lock here. If the
* queue is not stopped for another reason, we
* force a schedule.
*/
clear_bit(__QUEUE_STATE_FROZEN, &txq->state);
netif_schedule_queue(txq);
}
}
void netif_tx_unlock(struct net_device *dev)
{
netif_unfreeze_queues(dev);
spin_unlock(&dev->tx_global_lock);
}
EXPORT_SYMBOL(netif_tx_unlock);
static void dev_watchdog(struct timer_list *t)
{
struct net_device *dev = from_timer(dev, t, watchdog_timer);
netif_tx_lock(dev);
spin_lock(&dev->tx_global_lock);
if (!qdisc_tx_is_noop(dev)) {
if (netif_device_present(dev) &&
netif_running(dev) &&
@ -462,21 +513,23 @@ static void dev_watchdog(struct timer_list *t)
struct netdev_queue *txq;
txq = netdev_get_tx_queue(dev, i);
trans_start = txq->trans_start;
trans_start = READ_ONCE(txq->trans_start);
if (netif_xmit_stopped(txq) &&
time_after(jiffies, (trans_start +
dev->watchdog_timeo))) {
some_queue_timedout = 1;
txq->trans_timeout++;
atomic_long_inc(&txq->trans_timeout);
break;
}
}
if (some_queue_timedout) {
if (unlikely(some_queue_timedout)) {
trace_net_dev_xmit_timeout(dev, i);
WARN_ONCE(1, KERN_INFO "NETDEV WATCHDOG: %s (%s): transmit queue %u timed out\n",
dev->name, netdev_drivername(dev), i);
netif_freeze_queues(dev);
dev->netdev_ops->ndo_tx_timeout(dev, i);
netif_unfreeze_queues(dev);
}
if (!mod_timer(&dev->watchdog_timer,
round_jiffies(jiffies +
@ -484,7 +537,7 @@ static void dev_watchdog(struct timer_list *t)
dev_hold(dev);
}
}
netif_tx_unlock(dev);
spin_unlock(&dev->tx_global_lock);
dev_put(dev);
}
@ -1148,7 +1201,7 @@ static void transition_one_qdisc(struct net_device *dev,
rcu_assign_pointer(dev_queue->qdisc, new_qdisc);
if (need_watchdog_p) {
dev_queue->trans_start = 0;
WRITE_ONCE(dev_queue->trans_start, 0);
*need_watchdog_p = 1;
}
}