From cfbc8b6babf24ab252b6b5db936adbb5f3ca45dd Mon Sep 17 00:00:00 2001 From: Shahar Shitrit Date: Mon, 12 Jan 2026 11:16:21 +0200 Subject: [PATCH 1/3] net: Introduce netif_xmit_timeout_ms() helper Introduce a new helper function netif_xmit_timeout_ms() to check if a TX queue is stopped and has timed out and report the timeout duration. This makes the timeout logic reusable, and will be used in several places in subsequent patches. Signed-off-by: Shahar Shitrit Reviewed-by: Yael Chemla Signed-off-by: Tariq Toukan Link: https://patch.msgid.link/1768209383-1546791-2-git-send-email-tariqt@nvidia.com Reviewed-by: Simon Horman Signed-off-by: Paolo Abeni --- include/net/netdev_queues.h | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/include/net/netdev_queues.h b/include/net/netdev_queues.h index cd00e0406cf4..b55d3b9cb9c2 100644 --- a/include/net/netdev_queues.h +++ b/include/net/netdev_queues.h @@ -310,6 +310,17 @@ static inline void netif_subqueue_sent(const struct net_device *dev, netdev_tx_sent_queue(txq, bytes); } +static inline unsigned int netif_xmit_timeout_ms(struct netdev_queue *txq) +{ + unsigned long trans_start = READ_ONCE(txq->trans_start); + + if (netif_xmit_stopped(txq) && + time_after(jiffies, trans_start + txq->dev->watchdog_timeo)) + return jiffies_to_msecs(jiffies - trans_start); + + return 0; +} + #define netif_subqueue_maybe_stop(dev, idx, get_desc, stop_thrs, start_thrs) \ ({ \ struct netdev_queue *_txq; \ From 3ae02d659773f8f3aa0cebe04d42c88113a2dd0d Mon Sep 17 00:00:00 2001 From: Shahar Shitrit Date: Mon, 12 Jan 2026 11:16:22 +0200 Subject: [PATCH 2/3] net: hns3: Use netif_xmit_timeout_ms() helper Replace the open-coded TX queue timeout check in hns3_get_timeout_queue() with a call to netif_xmit_timeout_ms() helper. Signed-off-by: Shahar Shitrit Reviewed-by: Yael Chemla Signed-off-by: Tariq Toukan Reviewed-by: Jijie Shao Link: https://patch.msgid.link/1768209383-1546791-3-git-send-email-tariqt@nvidia.com Reviewed-by: Simon Horman Signed-off-by: Paolo Abeni --- drivers/net/ethernet/hisilicon/hns3/hns3_enet.c | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c index 7a0654e2d3dd..7b9269f6fdfc 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c @@ -25,6 +25,7 @@ #include #include #include +#include #include "hnae3.h" #include "hns3_enet.h" @@ -2807,14 +2808,12 @@ static int hns3_get_timeout_queue(struct net_device *ndev) /* Find the stopped queue the same way the stack does */ for (i = 0; i < ndev->num_tx_queues; i++) { + unsigned int timedout_ms; struct netdev_queue *q; - unsigned long trans_start; q = netdev_get_tx_queue(ndev, i); - trans_start = READ_ONCE(q->trans_start); - if (netif_xmit_stopped(q) && - time_after(jiffies, - (trans_start + ndev->watchdog_timeo))) { + timedout_ms = netif_xmit_timeout_ms(q); + if (timedout_ms) { #ifdef CONFIG_BQL struct dql *dql = &q->dql; @@ -2823,8 +2822,7 @@ static int hns3_get_timeout_queue(struct net_device *ndev) dql->adj_limit, dql->num_completed); #endif netdev_info(ndev, "queue state: 0x%lx, delta msecs: %u\n", - q->state, - jiffies_to_msecs(jiffies - trans_start)); + q->state, timedout_ms); break; } } From b0ba734516d201c217b23bf39782792b5404fb1c Mon Sep 17 00:00:00 2001 From: Shahar Shitrit Date: Mon, 12 Jan 2026 11:16:23 +0200 Subject: [PATCH 3/3] net/mlx5e: Refine TX timeout handling to skip non-timed-out SQ MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit mlx5e_tx_timeout_work() is invoked when the dev_watchdog reports a timed-out TX queue. Currently, the recovery flow is triggered for all stopped SQs, which is not always correct — some SQs may be temporarily stopped without actually timing out. Attempting to recover such SQs results in no EQE being polled (since no real timeout occurred), which the driver misinterprets as a recovery failure, unnecessarily causing channel reopening. Improve the logic to initiate recovery only for SQs that are both stopped and timed out. Utilize the helper introduced in the previous patch to determine whether the netdevice watchdog timeout period has elapsed since the SQ’s last transmit timestamp. Signed-off-by: Shahar Shitrit Reviewed-by: Yael Chemla Signed-off-by: Tariq Toukan Link: https://patch.msgid.link/1768209383-1546791-4-git-send-email-tariqt@nvidia.com Reviewed-by: Simon Horman Signed-off-by: Paolo Abeni --- drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 3ac47df83ac8..7dbcf71404d8 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -5137,7 +5137,7 @@ static void mlx5e_tx_timeout_work(struct work_struct *work) netdev_get_tx_queue(netdev, i); struct mlx5e_txqsq *sq = priv->txq2sq[i]; - if (!netif_xmit_stopped(dev_queue)) + if (!netif_xmit_timeout_ms(dev_queue)) continue; if (mlx5e_reporter_tx_timeout(sq))