From 0d5fd7a9323ce6bedd170e21e1e90b8904917c75 Mon Sep 17 00:00:00 2001 From: Nicolin Chen Date: Fri, 24 Apr 2026 18:15:24 -0700 Subject: [PATCH] iommu: Fix nested pci_dev_reset_iommu_prepare/done() Shuai found that cxl_reset_bus_function() calls pci_reset_bus_function() internally while both are calling pci_dev_reset_iommu_prepare/done(). As pci_dev_reset_iommu_prepare() doesn't support re-entry, the inner call will trigger a WARN_ON and return -EBUSY, resulting in failing the entire device reset. On the other hand, removing the outer calls in the PCI callers is unsafe. As pointed out by Kevin, device-specific quirks like reset_hinic_vf_dev() execute custom firmware waits after their inner pcie_flr() completes. If the IOMMU protection relies solely on the inner reset, the IOMMU will be unblocked prematurely while the device is still resetting. Instead, fix this by making pci_dev_reset_iommu_prepare/done() reentrant. Introduce gdev->reset_depth to handle the re-entries on the same device. Fixes: c279e83953d9 ("iommu: Introduce pci_dev_reset_iommu_prepare/done()") Cc: stable@vger.kernel.org Reported-by: Shuai Xue Closes: https://lore.kernel.org/all/absKsk7qQOwzhpzv@Asurada-Nvidia/ Suggested-by: Kevin Tian Reviewed-by: Shuai Xue Reviewed-by: Jason Gunthorpe Reviewed-by: Kevin Tian Reviewed-by: Lu Baolu Signed-off-by: Nicolin Chen Signed-off-by: Joerg Roedel --- drivers/iommu/iommu.c | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index 221be84db5ad..301c76c40e3d 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -83,6 +83,7 @@ struct group_device { * - Device is undergoing a reset */ bool blocked; + unsigned int reset_depth; }; /* Iterate over each struct group_device in a struct iommu_group */ @@ -4045,20 +4046,23 @@ int pci_dev_reset_iommu_prepare(struct pci_dev *pdev) if (WARN_ON(!gdev)) return -ENODEV; - /* Re-entry is not allowed */ - if (WARN_ON(gdev->blocked)) - return -EBUSY; + if (gdev->reset_depth++) + return 0; ret = __iommu_group_alloc_blocking_domain(group); - if (ret) + if (ret) { + gdev->reset_depth--; return ret; + } /* Stage RID domain at blocking_domain while retaining group->domain */ if (group->domain != group->blocking_domain) { ret = __iommu_attach_device(group->blocking_domain, &pdev->dev, group->domain); - if (ret) + if (ret) { + gdev->reset_depth--; return ret; + } } /* @@ -4118,7 +4122,10 @@ void pci_dev_reset_iommu_done(struct pci_dev *pdev) if (WARN_ON(!gdev)) return; - if (!gdev->blocked) + /* Unbalanced done() calls would underflow the counter */ + if (WARN_ON(gdev->reset_depth == 0)) + return; + if (--gdev->reset_depth) return; if (WARN_ON(!group->blocking_domain))