net: mana: Implement ndo_tx_timeout and serialize queue resets per port.

Implement .ndo_tx_timeout for MANA so any stalled TX queue can be detected
and a device-controlled port reset for all queues can be scheduled to a
ordered workqueue. The reset for all queues on stall detection is
recomended by hardware team.

Reviewed-by: Pavan Chebbi <pavan.chebbi@broadcom.com>
Reviewed-by: Haiyang Zhang <haiyangz@microsoft.com>
Signed-off-by: Dipayaan Roy <dipayanroy@linux.microsoft.com>
Link: https://patch.msgid.link/20260112130552.GA11785@linuxonhyperv3.guj3yctzbm1etfxqx2vob5hsef.xx.internal.cloudapp.net
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
This commit is contained in:
Dipayaan Roy 2026-01-12 05:05:52 -08:00 committed by Jakub Kicinski
parent 969994f032
commit 3b194343c2
3 changed files with 84 additions and 3 deletions

View File

@ -299,6 +299,39 @@ static int mana_get_gso_hs(struct sk_buff *skb)
return gso_hs; return gso_hs;
} }
static void mana_per_port_queue_reset_work_handler(struct work_struct *work)
{
struct mana_port_context *apc = container_of(work,
struct mana_port_context,
queue_reset_work);
struct net_device *ndev = apc->ndev;
int err;
rtnl_lock();
/* Pre-allocate buffers to prevent failure in mana_attach later */
err = mana_pre_alloc_rxbufs(apc, ndev->mtu, apc->num_queues);
if (err) {
netdev_err(ndev, "Insufficient memory for reset post tx stall detection\n");
goto out;
}
err = mana_detach(ndev, false);
if (err) {
netdev_err(ndev, "mana_detach failed: %d\n", err);
goto dealloc_pre_rxbufs;
}
err = mana_attach(ndev);
if (err)
netdev_err(ndev, "mana_attach failed: %d\n", err);
dealloc_pre_rxbufs:
mana_pre_dealloc_rxbufs(apc);
out:
rtnl_unlock();
}
netdev_tx_t mana_start_xmit(struct sk_buff *skb, struct net_device *ndev) netdev_tx_t mana_start_xmit(struct sk_buff *skb, struct net_device *ndev)
{ {
enum mana_tx_pkt_format pkt_fmt = MANA_SHORT_PKT_FMT; enum mana_tx_pkt_format pkt_fmt = MANA_SHORT_PKT_FMT;
@ -839,6 +872,23 @@ static int mana_change_mtu(struct net_device *ndev, int new_mtu)
return err; return err;
} }
static void mana_tx_timeout(struct net_device *netdev, unsigned int txqueue)
{
struct mana_port_context *apc = netdev_priv(netdev);
struct mana_context *ac = apc->ac;
struct gdma_context *gc = ac->gdma_dev->gdma_context;
/* Already in service, hence tx queue reset is not required.*/
if (gc->in_service)
return;
/* Note: If there are pending queue reset work for this port(apc),
* subsequent request queued up from here are ignored. This is because
* we are using the same work instance per port(apc).
*/
queue_work(ac->per_port_queue_reset_wq, &apc->queue_reset_work);
}
static int mana_shaper_set(struct net_shaper_binding *binding, static int mana_shaper_set(struct net_shaper_binding *binding,
const struct net_shaper *shaper, const struct net_shaper *shaper,
struct netlink_ext_ack *extack) struct netlink_ext_ack *extack)
@ -924,6 +974,7 @@ static const struct net_device_ops mana_devops = {
.ndo_bpf = mana_bpf, .ndo_bpf = mana_bpf,
.ndo_xdp_xmit = mana_xdp_xmit, .ndo_xdp_xmit = mana_xdp_xmit,
.ndo_change_mtu = mana_change_mtu, .ndo_change_mtu = mana_change_mtu,
.ndo_tx_timeout = mana_tx_timeout,
.net_shaper_ops = &mana_shaper_ops, .net_shaper_ops = &mana_shaper_ops,
}; };
@ -3287,6 +3338,8 @@ static int mana_probe_port(struct mana_context *ac, int port_idx,
ndev->min_mtu = ETH_MIN_MTU; ndev->min_mtu = ETH_MIN_MTU;
ndev->needed_headroom = MANA_HEADROOM; ndev->needed_headroom = MANA_HEADROOM;
ndev->dev_port = port_idx; ndev->dev_port = port_idx;
/* Recommended timeout based on HW FPGA re-config scenario. */
ndev->watchdog_timeo = 15 * HZ;
SET_NETDEV_DEV(ndev, gc->dev); SET_NETDEV_DEV(ndev, gc->dev);
netif_set_tso_max_size(ndev, GSO_MAX_SIZE); netif_set_tso_max_size(ndev, GSO_MAX_SIZE);
@ -3303,6 +3356,10 @@ static int mana_probe_port(struct mana_context *ac, int port_idx,
if (err) if (err)
goto reset_apc; goto reset_apc;
/* Initialize the per port queue reset work.*/
INIT_WORK(&apc->queue_reset_work,
mana_per_port_queue_reset_work_handler);
netdev_lockdep_set_classes(ndev); netdev_lockdep_set_classes(ndev);
ndev->hw_features = NETIF_F_SG | NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM; ndev->hw_features = NETIF_F_SG | NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM;
@ -3492,6 +3549,7 @@ int mana_probe(struct gdma_dev *gd, bool resuming)
{ {
struct gdma_context *gc = gd->gdma_context; struct gdma_context *gc = gd->gdma_context;
struct mana_context *ac = gd->driver_data; struct mana_context *ac = gd->driver_data;
struct mana_port_context *apc = NULL;
struct device *dev = gc->dev; struct device *dev = gc->dev;
u8 bm_hostmode = 0; u8 bm_hostmode = 0;
u16 num_ports = 0; u16 num_ports = 0;
@ -3549,6 +3607,14 @@ int mana_probe(struct gdma_dev *gd, bool resuming)
if (ac->num_ports > MAX_PORTS_IN_MANA_DEV) if (ac->num_ports > MAX_PORTS_IN_MANA_DEV)
ac->num_ports = MAX_PORTS_IN_MANA_DEV; ac->num_ports = MAX_PORTS_IN_MANA_DEV;
ac->per_port_queue_reset_wq =
create_singlethread_workqueue("mana_per_port_queue_reset_wq");
if (!ac->per_port_queue_reset_wq) {
dev_err(dev, "Failed to allocate per port queue reset workqueue\n");
err = -ENOMEM;
goto out;
}
if (!resuming) { if (!resuming) {
for (i = 0; i < ac->num_ports; i++) { for (i = 0; i < ac->num_ports; i++) {
err = mana_probe_port(ac, i, &ac->ports[i]); err = mana_probe_port(ac, i, &ac->ports[i]);
@ -3565,6 +3631,8 @@ int mana_probe(struct gdma_dev *gd, bool resuming)
} else { } else {
for (i = 0; i < ac->num_ports; i++) { for (i = 0; i < ac->num_ports; i++) {
rtnl_lock(); rtnl_lock();
apc = netdev_priv(ac->ports[i]);
enable_work(&apc->queue_reset_work);
err = mana_attach(ac->ports[i]); err = mana_attach(ac->ports[i]);
rtnl_unlock(); rtnl_unlock();
/* we log the port for which the attach failed and stop /* we log the port for which the attach failed and stop
@ -3616,13 +3684,15 @@ void mana_remove(struct gdma_dev *gd, bool suspending)
for (i = 0; i < ac->num_ports; i++) { for (i = 0; i < ac->num_ports; i++) {
ndev = ac->ports[i]; ndev = ac->ports[i];
apc = netdev_priv(ndev);
if (!ndev) { if (!ndev) {
if (i == 0) if (i == 0)
dev_err(dev, "No net device to remove\n"); dev_err(dev, "No net device to remove\n");
goto out; goto out;
} }
apc = netdev_priv(ndev);
disable_work_sync(&apc->queue_reset_work);
/* All cleanup actions should stay after rtnl_lock(), otherwise /* All cleanup actions should stay after rtnl_lock(), otherwise
* other functions may access partially cleaned up data. * other functions may access partially cleaned up data.
*/ */
@ -3649,6 +3719,11 @@ void mana_remove(struct gdma_dev *gd, bool suspending)
mana_destroy_eq(ac); mana_destroy_eq(ac);
out: out:
if (ac->per_port_queue_reset_wq) {
destroy_workqueue(ac->per_port_queue_reset_wq);
ac->per_port_queue_reset_wq = NULL;
}
mana_gd_deregister_device(gd); mana_gd_deregister_device(gd);
if (suspending) if (suspending)

View File

@ -598,6 +598,10 @@ enum {
/* Driver can self reset on FPGA Reconfig EQE notification */ /* Driver can self reset on FPGA Reconfig EQE notification */
#define GDMA_DRV_CAP_FLAG_1_HANDLE_RECONFIG_EQE BIT(17) #define GDMA_DRV_CAP_FLAG_1_HANDLE_RECONFIG_EQE BIT(17)
/* Driver detects stalled send queues and recovers them */
#define GDMA_DRV_CAP_FLAG_1_HANDLE_STALL_SQ_RECOVERY BIT(18)
#define GDMA_DRV_CAP_FLAG_1_HW_VPORT_LINK_AWARE BIT(6) #define GDMA_DRV_CAP_FLAG_1_HW_VPORT_LINK_AWARE BIT(6)
/* Driver supports linearizing the skb when num_sge exceeds hardware limit */ /* Driver supports linearizing the skb when num_sge exceeds hardware limit */
@ -621,7 +625,8 @@ enum {
GDMA_DRV_CAP_FLAG_1_HW_VPORT_LINK_AWARE | \ GDMA_DRV_CAP_FLAG_1_HW_VPORT_LINK_AWARE | \
GDMA_DRV_CAP_FLAG_1_PERIODIC_STATS_QUERY | \ GDMA_DRV_CAP_FLAG_1_PERIODIC_STATS_QUERY | \
GDMA_DRV_CAP_FLAG_1_SKB_LINEARIZE | \ GDMA_DRV_CAP_FLAG_1_SKB_LINEARIZE | \
GDMA_DRV_CAP_FLAG_1_PROBE_RECOVERY) GDMA_DRV_CAP_FLAG_1_PROBE_RECOVERY | \
GDMA_DRV_CAP_FLAG_1_HANDLE_STALL_SQ_RECOVERY)
#define GDMA_DRV_CAP_FLAGS2 0 #define GDMA_DRV_CAP_FLAGS2 0

View File

@ -480,7 +480,7 @@ struct mana_context {
struct mana_ethtool_hc_stats hc_stats; struct mana_ethtool_hc_stats hc_stats;
struct mana_eq *eqs; struct mana_eq *eqs;
struct dentry *mana_eqs_debugfs; struct dentry *mana_eqs_debugfs;
struct workqueue_struct *per_port_queue_reset_wq;
/* Workqueue for querying hardware stats */ /* Workqueue for querying hardware stats */
struct delayed_work gf_stats_work; struct delayed_work gf_stats_work;
bool hwc_timeout_occurred; bool hwc_timeout_occurred;
@ -495,6 +495,7 @@ struct mana_context {
struct mana_port_context { struct mana_port_context {
struct mana_context *ac; struct mana_context *ac;
struct net_device *ndev; struct net_device *ndev;
struct work_struct queue_reset_work;
u8 mac_addr[ETH_ALEN]; u8 mac_addr[ETH_ALEN];