net: mana: Add NULL guards in teardown path to prevent panic on attach failure

When queue allocation fails partway through, the error cleanup frees
and NULLs apc->tx_qp and apc->rxqs. Multiple teardown paths such as
mana_remove(), mana_change_mtu() recovery, and internal error handling
in mana_alloc_queues() can subsequently call into functions that
dereference these pointers without NULL checks:

- mana_chn_setxdp() dereferences apc->rxqs[0], causing a NULL pointer
  dereference panic (CR2: 0000000000000000 at mana_chn_setxdp+0x26).
- mana_destroy_vport() iterates apc->rxqs without a NULL check.
- mana_fence_rqs() iterates apc->rxqs without a NULL check.
- mana_dealloc_queues() iterates apc->tx_qp without a NULL check.

Add NULL guards for apc->rxqs in mana_fence_rqs(),
mana_destroy_vport(), and before the mana_chn_setxdp() call. Add a
NULL guard for apc->tx_qp in mana_dealloc_queues() to skip TX queue
draining when TX queues were never allocated or already freed.

Fixes: ca9c54d2d6 ("net: mana: Add a driver for Microsoft Azure Network Adapter (MANA)")
Reviewed-by: Haiyang Zhang <haiyangz@microsoft.com>
Signed-off-by: Dipayaan Roy <dipayanroy@linux.microsoft.com>
Link: https://patch.msgid.link/20260525081129.1230035-2-dipayanroy@linux.microsoft.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
This commit is contained in:
Dipayaan Roy 2026-05-25 01:08:24 -07:00 committed by Jakub Kicinski
parent 3e20009988
commit 17bfe0a8c0

View File

@ -1727,6 +1727,9 @@ static void mana_fence_rqs(struct mana_port_context *apc)
struct mana_rxq *rxq;
int err;
if (!apc->rxqs)
return;
for (rxq_idx = 0; rxq_idx < apc->num_queues; rxq_idx++) {
rxq = apc->rxqs[rxq_idx];
err = mana_fence_rq(apc, rxq);
@ -2858,13 +2861,16 @@ static void mana_destroy_vport(struct mana_port_context *apc)
struct mana_rxq *rxq;
u32 rxq_idx;
for (rxq_idx = 0; rxq_idx < apc->num_queues; rxq_idx++) {
rxq = apc->rxqs[rxq_idx];
if (!rxq)
continue;
if (apc->rxqs) {
mana_destroy_rxq(apc, rxq, true);
apc->rxqs[rxq_idx] = NULL;
for (rxq_idx = 0; rxq_idx < apc->num_queues; rxq_idx++) {
rxq = apc->rxqs[rxq_idx];
if (!rxq)
continue;
mana_destroy_rxq(apc, rxq, true);
apc->rxqs[rxq_idx] = NULL;
}
}
mana_destroy_txq(apc);
@ -3269,7 +3275,8 @@ static int mana_dealloc_queues(struct net_device *ndev)
if (apc->port_is_up)
return -EINVAL;
mana_chn_setxdp(apc, NULL);
if (apc->rxqs)
mana_chn_setxdp(apc, NULL);
if (gd->gdma_context->is_pf && !apc->ac->bm_hostmode)
mana_pf_deregister_filter(apc);
@ -3287,33 +3294,38 @@ static int mana_dealloc_queues(struct net_device *ndev)
* number of queues.
*/
for (i = 0; i < apc->num_queues; i++) {
txq = &apc->tx_qp[i].txq;
tsleep = 1000;
while (atomic_read(&txq->pending_sends) > 0 &&
time_before(jiffies, timeout)) {
usleep_range(tsleep, tsleep + 1000);
tsleep <<= 1;
}
if (atomic_read(&txq->pending_sends)) {
err = pcie_flr(to_pci_dev(gd->gdma_context->dev));
if (err) {
netdev_err(ndev, "flr failed %d with %d pkts pending in txq %u\n",
err, atomic_read(&txq->pending_sends),
txq->gdma_txq_id);
if (apc->tx_qp) {
for (i = 0; i < apc->num_queues; i++) {
txq = &apc->tx_qp[i].txq;
tsleep = 1000;
while (atomic_read(&txq->pending_sends) > 0 &&
time_before(jiffies, timeout)) {
usleep_range(tsleep, tsleep + 1000);
tsleep <<= 1;
}
break;
if (atomic_read(&txq->pending_sends)) {
err =
pcie_flr(to_pci_dev(gd->gdma_context->dev));
if (err) {
netdev_err(ndev, "flr failed %d with %d pkts pending in txq %u\n",
err,
atomic_read(&txq->pending_sends),
txq->gdma_txq_id);
}
break;
}
}
for (i = 0; i < apc->num_queues; i++) {
txq = &apc->tx_qp[i].txq;
while ((skb = skb_dequeue(&txq->pending_skbs))) {
mana_unmap_skb(skb, apc);
dev_kfree_skb_any(skb);
}
atomic_set(&txq->pending_sends, 0);
}
}
for (i = 0; i < apc->num_queues; i++) {
txq = &apc->tx_qp[i].txq;
while ((skb = skb_dequeue(&txq->pending_skbs))) {
mana_unmap_skb(skb, apc);
dev_kfree_skb_any(skb);
}
atomic_set(&txq->pending_sends, 0);
}
/* We're 100% sure the queues can no longer be woken up, because
* we're sure now mana_poll_tx_cq() can't be running.
*/