Merge branch 'pci/virtualization'

- Mark ASM1164 SATA controller to avoid bus reset since it fails to train
  the Link after reset (Alex Williamson)

- Mark Nvidia GB10 Root Ports to avoid bus reset since they may fail to
  retrain the link after reset (Johnny-CC Chang)

- Add lockdep and other lock assertions (Ilpo Järvinen)

- Add ACS quirk for Qualcomm Hamoa & Glymur, which provides ACS-like
  features but doesn't advertise an ACS Capability (Krishna Chaitanya
  Chundru)

- Add ACS quirk for Pericom PI7C9X2G404 switches, which fail under load
  when P2P Redirect Request is enabled (Nicolas Cavallari)

- Remove an incorrect unlock in pci_slot_trylock() error handling (Jinhui
  Guo)

- Lock the bridge device for slot reset (Keith Busch)

- Enable ACS after IOMMU configuration on OF platforms so ACS is enabled an
  all devices; previously the first device enumeration (typically a Root
  Port) was omitted (Manivannan Sadhasivam)

- Disable ACS Source Validation for IDT 0x80b5 and 0x8090 switches to work
  around hardware erratum; previously ACS SV was temporarily disabled,
  which worked for enumeration but not after reset (Manivannan Sadhasivam)

* pci/virtualization:
  PCI: Disable ACS SV for IDT 0x8090 switch
  PCI: Disable ACS SV for IDT 0x80b5 switch
  PCI: Cache ACS Capabilities register
  PCI: Enable ACS after configuring IOMMU for OF platforms
  PCI: Add ACS quirk for Pericom PI7C9X2G404 switches [12d8:b404]
  PCI: Add ACS quirk for Qualcomm Hamoa & Glymur
  PCI: Use device_lock_assert() to verify device lock is held
  PCI: Use lockdep_assert_held(pci_bus_sem) to verify lock is held
  PCI: Fix pci_slot_lock () device locking
  PCI: Fix pci_slot_trylock() error handling
  PCI: Mark Nvidia GB10 to avoid bus reset
  PCI: Mark ASM1164 SATA controller to avoid bus reset
This commit is contained in:
Bjorn Helgaas 2026-02-06 17:09:26 -06:00
commit 2095b9dd2e
6 changed files with 96 additions and 78 deletions

View File

@ -1650,6 +1650,14 @@ static int pci_dma_configure(struct device *dev)
ret = acpi_dma_configure(dev, acpi_get_dma_attr(adev));
}
/*
* Attempt to enable ACS regardless of capability because some Root
* Ports (e.g. those quirked with *_intel_pch_acs_*) do not have
* the standard ACS capability but still support ACS via those
* quirks.
*/
pci_enable_acs(to_pci_dev(dev));
pci_put_host_bridge_device(bridge);
/* @drv may not be valid when we're called from the IOMMU layer */

View File

@ -13,6 +13,7 @@
#include <linux/delay.h>
#include <linux/dmi.h>
#include <linux/init.h>
#include <linux/lockdep.h>
#include <linux/msi.h>
#include <linux/of.h>
#include <linux/pci.h>
@ -886,7 +887,6 @@ static const char *disable_acs_redir_param;
static const char *config_acs_param;
struct pci_acs {
u16 cap;
u16 ctrl;
u16 fw_ctrl;
};
@ -989,27 +989,27 @@ static void __pci_config_acs(struct pci_dev *dev, struct pci_acs *caps,
static void pci_std_enable_acs(struct pci_dev *dev, struct pci_acs *caps)
{
/* Source Validation */
caps->ctrl |= (caps->cap & PCI_ACS_SV);
caps->ctrl |= (dev->acs_capabilities & PCI_ACS_SV);
/* P2P Request Redirect */
caps->ctrl |= (caps->cap & PCI_ACS_RR);
caps->ctrl |= (dev->acs_capabilities & PCI_ACS_RR);
/* P2P Completion Redirect */
caps->ctrl |= (caps->cap & PCI_ACS_CR);
caps->ctrl |= (dev->acs_capabilities & PCI_ACS_CR);
/* Upstream Forwarding */
caps->ctrl |= (caps->cap & PCI_ACS_UF);
caps->ctrl |= (dev->acs_capabilities & PCI_ACS_UF);
/* Enable Translation Blocking for external devices and noats */
if (pci_ats_disabled() || dev->external_facing || dev->untrusted)
caps->ctrl |= (caps->cap & PCI_ACS_TB);
caps->ctrl |= (dev->acs_capabilities & PCI_ACS_TB);
}
/**
* pci_enable_acs - enable ACS if hardware support it
* @dev: the PCI device
*/
static void pci_enable_acs(struct pci_dev *dev)
void pci_enable_acs(struct pci_dev *dev)
{
struct pci_acs caps;
bool enable_acs = false;
@ -1025,7 +1025,6 @@ static void pci_enable_acs(struct pci_dev *dev)
if (!pos)
return;
pci_read_config_word(dev, pos + PCI_ACS_CAP, &caps.cap);
pci_read_config_word(dev, pos + PCI_ACS_CTRL, &caps.ctrl);
caps.fw_ctrl = caps.ctrl;
@ -3517,7 +3516,7 @@ void pci_configure_ari(struct pci_dev *dev)
static bool pci_acs_flags_enabled(struct pci_dev *pdev, u16 acs_flags)
{
int pos;
u16 cap, ctrl;
u16 ctrl;
pos = pdev->acs_cap;
if (!pos)
@ -3528,8 +3527,7 @@ static bool pci_acs_flags_enabled(struct pci_dev *pdev, u16 acs_flags)
* or only required if controllable. Features missing from the
* capability field can therefore be assumed as hard-wired enabled.
*/
pci_read_config_word(pdev, pos + PCI_ACS_CAP, &cap);
acs_flags &= (cap | PCI_ACS_EC);
acs_flags &= (pdev->acs_capabilities | PCI_ACS_EC);
pci_read_config_word(pdev, pos + PCI_ACS_CTRL, &ctrl);
return (ctrl & acs_flags) == acs_flags;
@ -3650,15 +3648,15 @@ bool pci_acs_path_enabled(struct pci_dev *start,
*/
void pci_acs_init(struct pci_dev *dev)
{
dev->acs_cap = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_ACS);
int pos;
/*
* Attempt to enable ACS regardless of capability because some Root
* Ports (e.g. those quirked with *_intel_pch_acs_*) do not have
* the standard ACS capability but still support ACS via those
* quirks.
*/
pci_enable_acs(dev);
dev->acs_cap = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_ACS);
pos = dev->acs_cap;
if (!pos)
return;
pci_read_config_word(dev, pos + PCI_ACS_CAP, &dev->acs_capabilities);
pci_disable_broken_acs_cap(dev);
}
/**
@ -4625,7 +4623,7 @@ bool pcie_wait_for_link(struct pci_dev *pdev, bool active)
* spec says 100 ms, but firmware can lower it and we allow drivers to
* increase it as well.
*
* Called with @pci_bus_sem locked for reading.
* Context: Called with @pci_bus_sem locked for reading.
*/
static int pci_bus_max_d3cold_delay(const struct pci_bus *bus)
{
@ -4633,6 +4631,8 @@ static int pci_bus_max_d3cold_delay(const struct pci_bus *bus)
int min_delay = 100;
int max_delay = 0;
lockdep_assert_held(&pci_bus_sem);
list_for_each_entry(pdev, &bus->devices, bus_list) {
if (pdev->d3cold_delay < min_delay)
min_delay = pdev->d3cold_delay;
@ -4970,6 +4970,7 @@ static void pci_dev_save_and_disable(struct pci_dev *dev)
* races with ->remove() by the device lock, which must be held by
* the caller.
*/
device_lock_assert(&dev->dev);
if (err_handler && err_handler->reset_prepare)
err_handler->reset_prepare(dev);
else if (dev->driver)
@ -5040,7 +5041,9 @@ const struct pci_reset_fn_method pci_reset_fn_methods[] = {
* device including MSI, bus mastering, BARs, decoding IO and memory spaces,
* etc.
*
* Returns 0 if the device function was successfully reset or negative if the
* Context: The caller must hold the device lock.
*
* Return: 0 if the device function was successfully reset or negative if the
* device doesn't support resetting a single function.
*/
int __pci_reset_function_locked(struct pci_dev *dev)
@ -5049,6 +5052,7 @@ int __pci_reset_function_locked(struct pci_dev *dev)
const struct pci_reset_fn_method *method;
might_sleep();
device_lock_assert(&dev->dev);
/*
* A reset method returns -ENOTTY if it doesn't support this device and
@ -5171,13 +5175,17 @@ EXPORT_SYMBOL_GPL(pci_reset_function);
* over the reset. It also differs from pci_reset_function() in that it
* requires the PCI device lock to be held.
*
* Returns 0 if the device function was successfully reset or negative if the
* Context: The caller must hold the device lock.
*
* Return: 0 if the device function was successfully reset or negative if the
* device doesn't support resetting a single function.
*/
int pci_reset_function_locked(struct pci_dev *dev)
{
int rc;
device_lock_assert(&dev->dev);
if (!pci_reset_supported(dev))
return -ENOTTY;
@ -5293,10 +5301,9 @@ static int pci_bus_trylock(struct pci_bus *bus)
/* Do any devices on or below this slot prevent a bus reset? */
static bool pci_slot_resettable(struct pci_slot *slot)
{
struct pci_dev *dev;
struct pci_dev *dev, *bridge = slot->bus->self;
if (slot->bus->self &&
(slot->bus->self->dev_flags & PCI_DEV_FLAGS_NO_BUS_RESET))
if (bridge && (bridge->dev_flags & PCI_DEV_FLAGS_NO_BUS_RESET))
return false;
list_for_each_entry(dev, &slot->bus->devices, bus_list) {
@ -5313,7 +5320,10 @@ static bool pci_slot_resettable(struct pci_slot *slot)
/* Lock devices from the top of the tree down */
static void pci_slot_lock(struct pci_slot *slot)
{
struct pci_dev *dev;
struct pci_dev *dev, *bridge = slot->bus->self;
if (bridge)
pci_dev_lock(bridge);
list_for_each_entry(dev, &slot->bus->devices, bus_list) {
if (!dev->slot || dev->slot != slot)
@ -5328,7 +5338,7 @@ static void pci_slot_lock(struct pci_slot *slot)
/* Unlock devices from the bottom of the tree up */
static void pci_slot_unlock(struct pci_slot *slot)
{
struct pci_dev *dev;
struct pci_dev *dev, *bridge = slot->bus->self;
list_for_each_entry(dev, &slot->bus->devices, bus_list) {
if (!dev->slot || dev->slot != slot)
@ -5338,21 +5348,25 @@ static void pci_slot_unlock(struct pci_slot *slot)
else
pci_dev_unlock(dev);
}
if (bridge)
pci_dev_unlock(bridge);
}
/* Return 1 on successful lock, 0 on contention */
static int pci_slot_trylock(struct pci_slot *slot)
{
struct pci_dev *dev;
struct pci_dev *dev, *bridge = slot->bus->self;
if (bridge && !pci_dev_trylock(bridge))
return 0;
list_for_each_entry(dev, &slot->bus->devices, bus_list) {
if (!dev->slot || dev->slot != slot)
continue;
if (dev->subordinate) {
if (!pci_bus_trylock(dev->subordinate)) {
pci_dev_unlock(dev);
if (!pci_bus_trylock(dev->subordinate))
goto unlock;
}
} else if (!pci_dev_trylock(dev))
goto unlock;
}
@ -5368,6 +5382,9 @@ static int pci_slot_trylock(struct pci_slot *slot)
else
pci_dev_unlock(dev);
}
if (bridge)
pci_dev_unlock(bridge);
return 0;
}

View File

@ -469,7 +469,6 @@ bool pci_bus_read_dev_vendor_id(struct pci_bus *bus, int devfn, u32 *pl,
int rrs_timeout);
bool pci_bus_generic_read_dev_vendor_id(struct pci_bus *bus, int devfn, u32 *pl,
int rrs_timeout);
int pci_idt_bus_quirk(struct pci_bus *bus, int devfn, u32 *pl, int rrs_timeout);
int pci_setup_device(struct pci_dev *dev);
void __pci_size_stdbars(struct pci_dev *dev, int count,
@ -1000,10 +999,12 @@ static inline resource_size_t pci_resource_alignment(struct pci_dev *dev,
}
void pci_acs_init(struct pci_dev *dev);
void pci_enable_acs(struct pci_dev *dev);
#ifdef CONFIG_PCI_QUIRKS
int pci_dev_specific_acs_enabled(struct pci_dev *dev, u16 acs_flags);
int pci_dev_specific_enable_acs(struct pci_dev *dev);
int pci_dev_specific_disable_acs_redir(struct pci_dev *dev);
void pci_disable_broken_acs_cap(struct pci_dev *pdev);
int pcie_failed_link_retrain(struct pci_dev *dev);
#else
static inline int pci_dev_specific_acs_enabled(struct pci_dev *dev,
@ -1019,6 +1020,7 @@ static inline int pci_dev_specific_disable_acs_redir(struct pci_dev *dev)
{
return -ENOTTY;
}
static inline void pci_disable_broken_acs_cap(struct pci_dev *dev) { }
static inline int pcie_failed_link_retrain(struct pci_dev *dev)
{
return -ENOTTY;

View File

@ -2549,18 +2549,6 @@ bool pci_bus_generic_read_dev_vendor_id(struct pci_bus *bus, int devfn, u32 *l,
bool pci_bus_read_dev_vendor_id(struct pci_bus *bus, int devfn, u32 *l,
int timeout)
{
#ifdef CONFIG_PCI_QUIRKS
struct pci_dev *bridge = bus->self;
/*
* Certain IDT switches have an issue where they improperly trigger
* ACS Source Validation errors on completions for config reads.
*/
if (bridge && bridge->vendor == PCI_VENDOR_ID_IDT &&
bridge->device == 0x80b5)
return pci_idt_bus_quirk(bus, devfn, l, timeout);
#endif
return pci_bus_generic_read_dev_vendor_id(bus, devfn, l, timeout);
}
EXPORT_SYMBOL(pci_bus_read_dev_vendor_id);

View File

@ -3758,6 +3758,14 @@ static void quirk_no_bus_reset(struct pci_dev *dev)
dev->dev_flags |= PCI_DEV_FLAGS_NO_BUS_RESET;
}
/*
* After asserting Secondary Bus Reset to downstream devices via a GB10
* Root Port, the link may not retrain correctly.
* https://lore.kernel.org/r/20251113084441.2124737-1-Johnny-CC.Chang@mediatek.com
*/
DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_NVIDIA, 0x22CE, quirk_no_bus_reset);
DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_NVIDIA, 0x22D0, quirk_no_bus_reset);
/*
* Some NVIDIA GPU devices do not work with bus reset, SBR needs to be
* prevented for those affected devices.
@ -3801,6 +3809,16 @@ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_CAVIUM, 0xa100, quirk_no_bus_reset);
*/
DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_TI, 0xb005, quirk_no_bus_reset);
/*
* Reports from users making use of PCI device assignment with ASM1164
* controllers indicate an issue with bus reset where the device fails to
* retrain. The issue appears more common in configurations with multiple
* controllers. The device does indicate PM reset support (NoSoftRst-),
* therefore this still leaves a viable reset method.
* https://forum.proxmox.com/threads/problems-with-pcie-passthrough-with-two-identical-devices.149003/
*/
DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_ASMEDIA, 0x1164, quirk_no_bus_reset);
static void quirk_no_pm_reset(struct pci_dev *dev)
{
/*
@ -5127,6 +5145,10 @@ static const struct pci_dev_acs_enabled {
{ PCI_VENDOR_ID_QCOM, 0x0401, pci_quirk_qcom_rp_acs },
/* QCOM SA8775P root port */
{ PCI_VENDOR_ID_QCOM, 0x0115, pci_quirk_qcom_rp_acs },
/* QCOM Hamoa root port */
{ PCI_VENDOR_ID_QCOM, 0x0111, pci_quirk_qcom_rp_acs },
/* QCOM Glymur root port */
{ PCI_VENDOR_ID_QCOM, 0x0120, pci_quirk_qcom_rp_acs },
/* HXT SD4800 root ports. The ACS design is same as QCOM QDF2xxx */
{ PCI_VENDOR_ID_HXT, 0x0401, pci_quirk_qcom_rp_acs },
/* Intel PCH root ports */
@ -5800,7 +5822,7 @@ DECLARE_PCI_FIXUP_CLASS_RESUME_EARLY(PCI_VENDOR_ID_NVIDIA, PCI_ANY_ID,
/*
* Some IDT switches incorrectly flag an ACS Source Validation error on
* completions for config read requests even though PCIe r4.0, sec
* completions for config read requests even though PCIe r7.0, sec
* 6.12.1.1, says that completions are never affected by ACS Source
* Validation. Here's the text of IDT 89H32H8G3-YC, erratum #36:
*
@ -5813,44 +5835,20 @@ DECLARE_PCI_FIXUP_CLASS_RESUME_EARLY(PCI_VENDOR_ID_NVIDIA, PCI_ANY_ID,
*
* The workaround suggested by IDT is to issue a config write to the
* downstream device before issuing the first config read. This allows the
* downstream device to capture its bus and device numbers (see PCIe r4.0,
* sec 2.2.9), thus avoiding the ACS error on the completion.
* downstream device to capture its bus and device numbers (see PCIe r7.0,
* sec 2.2.9.1), thus avoiding the ACS error on the completion.
*
* However, we don't know when the device is ready to accept the config
* write, so we do config reads until we receive a non-Config Request Retry
* Status, then do the config write.
*
* To avoid hitting the erratum when doing the config reads, we disable ACS
* SV around this process.
* write, and the issue affects resets of the switch as well as enumeration,
* so disable use of ACS SV for these devices altogether.
*/
int pci_idt_bus_quirk(struct pci_bus *bus, int devfn, u32 *l, int timeout)
void pci_disable_broken_acs_cap(struct pci_dev *pdev)
{
int pos;
u16 ctrl = 0;
bool found;
struct pci_dev *bridge = bus->self;
pos = bridge->acs_cap;
/* Disable ACS SV before initial config reads */
if (pos) {
pci_read_config_word(bridge, pos + PCI_ACS_CTRL, &ctrl);
if (ctrl & PCI_ACS_SV)
pci_write_config_word(bridge, pos + PCI_ACS_CTRL,
ctrl & ~PCI_ACS_SV);
if (pdev->vendor == PCI_VENDOR_ID_IDT &&
(pdev->device == 0x80b5 || pdev->device == 0x8090)) {
pci_info(pdev, "Disabling broken ACS SV; downstream device isolation reduced\n");
pdev->acs_capabilities &= ~PCI_ACS_SV;
}
found = pci_bus_generic_read_dev_vendor_id(bus, devfn, l, timeout);
/* Write Vendor ID (read-only) so the endpoint latches its bus/dev */
if (found)
pci_bus_write_config_word(bus, devfn, PCI_VENDOR_ID, 0);
/* Re-enable ACS_SV if it was previously enabled */
if (ctrl & PCI_ACS_SV)
pci_write_config_word(bridge, pos + PCI_ACS_CTRL, ctrl);
return found;
}
/*
@ -6209,6 +6207,10 @@ DECLARE_PCI_FIXUP_ENABLE(PCI_VENDOR_ID_PERICOM, 0x2303,
pci_fixup_pericom_acs_store_forward);
DECLARE_PCI_FIXUP_RESUME(PCI_VENDOR_ID_PERICOM, 0x2303,
pci_fixup_pericom_acs_store_forward);
DECLARE_PCI_FIXUP_ENABLE(PCI_VENDOR_ID_PERICOM, 0xb404,
pci_fixup_pericom_acs_store_forward);
DECLARE_PCI_FIXUP_RESUME(PCI_VENDOR_ID_PERICOM, 0xb404,
pci_fixup_pericom_acs_store_forward);
static void nvidia_ion_ahci_fixup(struct pci_dev *pdev)
{

View File

@ -564,6 +564,7 @@ struct pci_dev {
struct pci_tsm *tsm; /* TSM operation state */
#endif
u16 acs_cap; /* ACS Capability offset */
u16 acs_capabilities; /* ACS Capabilities */
u8 supported_speeds; /* Supported Link Speeds Vector */
phys_addr_t rom; /* Physical address if not from BAR */
size_t romlen; /* Length if not from BAR */