From 2389d8dc38fee18176c49e9c4804f5ecc55807fa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= Date: Tue, 22 Apr 2025 14:55:47 +0300 Subject: [PATCH 1/2] PCI/bwctrl: Replace lbms_count with PCI_LINK_LBMS_SEEN flag MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit PCIe BW controller counted LBMS assertions for the purposes of the Target Speed quirk (pcie_failed_link_retrain()). It was also a plan to expose the LBMS count through sysfs to allow better diagnosing link related issues. Lukas Wunner suggested, however, that adding a trace event would be better for diagnostics purposes, leaving only pcie_failed_link_retrain() as a user of the lbms_count. The logic in pcie_failed_link_retrain() does not require keeping count of LBMS assertions, so replace lbms_count with a simple flag in pci_dev's priv_flags. The reduced complexity allows removing pcie_bwctrl_lbms_rwsem. Since pcie_failed_link_retrain() runs before bwctrl is probed during boot, the LBMS in Link Status register still has to be checked by the quirk. The priv_flags numbering is not continuous because hotplug code added a few flags to fill numbers 4-5 (hotplug and bwctrl changes are routed through in different branches). Suggested-by: Lukas Wunner Signed-off-by: Ilpo Järvinen [bhelgaas: commit log] Signed-off-by: Bjorn Helgaas [kwilczynski: squashed a fix to resolve build failures from https://lore.kernel.org/all/20250508090036.1528-1-ilpo.jarvinen@linux.intel.com] Signed-off-by: Krzysztof Wilczyński Reviewed-by: Lukas Wunner Link: https://patch.msgid.link/20250422115548.1483-1-ilpo.jarvinen@linux.intel.com --- drivers/pci/hotplug/pciehp_ctrl.c | 2 +- drivers/pci/pci.c | 2 +- drivers/pci/pci.h | 10 ++--- drivers/pci/pcie/bwctrl.c | 73 ++++++++----------------------- drivers/pci/quirks.c | 10 ++--- 5 files changed, 26 insertions(+), 71 deletions(-) diff --git a/drivers/pci/hotplug/pciehp_ctrl.c b/drivers/pci/hotplug/pciehp_ctrl.c index d603a7aa7483..bcc938d4420f 100644 --- a/drivers/pci/hotplug/pciehp_ctrl.c +++ b/drivers/pci/hotplug/pciehp_ctrl.c @@ -131,7 +131,7 @@ static void remove_board(struct controller *ctrl, bool safe_removal) INDICATOR_NOOP); /* Don't carry LBMS indications across */ - pcie_reset_lbms_count(ctrl->pcie->port); + pcie_reset_lbms(ctrl->pcie->port); } static int pciehp_enable_slot(struct controller *ctrl); diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index 4d7c9f64ea24..3d94cf33c1b6 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -4757,7 +4757,7 @@ int pcie_retrain_link(struct pci_dev *pdev, bool use_lt) * to track link speed or width changes made by hardware itself * in attempt to correct unreliable link operation. */ - pcie_reset_lbms_count(pdev); + pcie_reset_lbms(pdev); return rc; } diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h index b81e99cd4b62..887811fbe722 100644 --- a/drivers/pci/pci.h +++ b/drivers/pci/pci.h @@ -557,6 +557,7 @@ static inline int pci_dev_set_disconnected(struct pci_dev *dev, void *unused) #define PCI_DPC_RECOVERED 1 #define PCI_DPC_RECOVERING 2 #define PCI_DEV_REMOVED 3 +#define PCI_LINK_LBMS_SEEN 6 static inline void pci_dev_assign_added(struct pci_dev *dev) { @@ -824,14 +825,9 @@ static inline void pcie_ecrc_get_policy(char *str) { } #endif #ifdef CONFIG_PCIEPORTBUS -void pcie_reset_lbms_count(struct pci_dev *port); -int pcie_lbms_count(struct pci_dev *port, unsigned long *val); +void pcie_reset_lbms(struct pci_dev *port); #else -static inline void pcie_reset_lbms_count(struct pci_dev *port) {} -static inline int pcie_lbms_count(struct pci_dev *port, unsigned long *val) -{ - return -EOPNOTSUPP; -} +static inline void pcie_reset_lbms(struct pci_dev *port) {} #endif struct pci_dev_reset_methods { diff --git a/drivers/pci/pcie/bwctrl.c b/drivers/pci/pcie/bwctrl.c index d8d2aa85a229..f31fbbd51490 100644 --- a/drivers/pci/pcie/bwctrl.c +++ b/drivers/pci/pcie/bwctrl.c @@ -38,24 +38,14 @@ /** * struct pcie_bwctrl_data - PCIe bandwidth controller * @set_speed_mutex: Serializes link speed changes - * @lbms_count: Count for LBMS (since last reset) * @cdev: Thermal cooling device associated with the port */ struct pcie_bwctrl_data { struct mutex set_speed_mutex; - atomic_t lbms_count; struct thermal_cooling_device *cdev; }; -/* - * Prevent port removal during LBMS count accessors and Link Speed changes. - * - * These have to be differentiated because pcie_bwctrl_change_speed() calls - * pcie_retrain_link() which uses LBMS count reset accessor on success - * (using just one rwsem triggers "possible recursive locking detected" - * warning). - */ -static DECLARE_RWSEM(pcie_bwctrl_lbms_rwsem); +/* Prevent port removal during Link Speed changes. */ static DECLARE_RWSEM(pcie_bwctrl_setspeed_rwsem); static bool pcie_valid_speed(enum pci_bus_speed speed) @@ -202,15 +192,14 @@ int pcie_set_target_speed(struct pci_dev *port, enum pci_bus_speed speed_req, static void pcie_bwnotif_enable(struct pcie_device *srv) { - struct pcie_bwctrl_data *data = srv->port->link_bwctrl; struct pci_dev *port = srv->port; u16 link_status; int ret; - /* Count LBMS seen so far as one */ + /* Note if LBMS has been seen so far */ ret = pcie_capability_read_word(port, PCI_EXP_LNKSTA, &link_status); if (ret == PCIBIOS_SUCCESSFUL && link_status & PCI_EXP_LNKSTA_LBMS) - atomic_inc(&data->lbms_count); + set_bit(PCI_LINK_LBMS_SEEN, &port->priv_flags); pcie_capability_set_word(port, PCI_EXP_LNKCTL, PCI_EXP_LNKCTL_LBMIE | PCI_EXP_LNKCTL_LABIE); @@ -233,7 +222,6 @@ static void pcie_bwnotif_disable(struct pci_dev *port) static irqreturn_t pcie_bwnotif_irq(int irq, void *context) { struct pcie_device *srv = context; - struct pcie_bwctrl_data *data = srv->port->link_bwctrl; struct pci_dev *port = srv->port; u16 link_status, events; int ret; @@ -247,7 +235,7 @@ static irqreturn_t pcie_bwnotif_irq(int irq, void *context) return IRQ_NONE; if (events & PCI_EXP_LNKSTA_LBMS) - atomic_inc(&data->lbms_count); + set_bit(PCI_LINK_LBMS_SEEN, &port->priv_flags); pcie_capability_write_word(port, PCI_EXP_LNKSTA, events); @@ -262,31 +250,10 @@ static irqreturn_t pcie_bwnotif_irq(int irq, void *context) return IRQ_HANDLED; } -void pcie_reset_lbms_count(struct pci_dev *port) +void pcie_reset_lbms(struct pci_dev *port) { - struct pcie_bwctrl_data *data; - - guard(rwsem_read)(&pcie_bwctrl_lbms_rwsem); - data = port->link_bwctrl; - if (data) - atomic_set(&data->lbms_count, 0); - else - pcie_capability_write_word(port, PCI_EXP_LNKSTA, - PCI_EXP_LNKSTA_LBMS); -} - -int pcie_lbms_count(struct pci_dev *port, unsigned long *val) -{ - struct pcie_bwctrl_data *data; - - guard(rwsem_read)(&pcie_bwctrl_lbms_rwsem); - data = port->link_bwctrl; - if (!data) - return -ENOTTY; - - *val = atomic_read(&data->lbms_count); - - return 0; + clear_bit(PCI_LINK_LBMS_SEEN, &port->priv_flags); + pcie_capability_write_word(port, PCI_EXP_LNKSTA, PCI_EXP_LNKSTA_LBMS); } static int pcie_bwnotif_probe(struct pcie_device *srv) @@ -308,18 +275,16 @@ static int pcie_bwnotif_probe(struct pcie_device *srv) return ret; scoped_guard(rwsem_write, &pcie_bwctrl_setspeed_rwsem) { - scoped_guard(rwsem_write, &pcie_bwctrl_lbms_rwsem) { - port->link_bwctrl = data; + port->link_bwctrl = data; - ret = request_irq(srv->irq, pcie_bwnotif_irq, - IRQF_SHARED, "PCIe bwctrl", srv); - if (ret) { - port->link_bwctrl = NULL; - return ret; - } - - pcie_bwnotif_enable(srv); + ret = request_irq(srv->irq, pcie_bwnotif_irq, + IRQF_SHARED, "PCIe bwctrl", srv); + if (ret) { + port->link_bwctrl = NULL; + return ret; } + + pcie_bwnotif_enable(srv); } pci_dbg(port, "enabled with IRQ %d\n", srv->irq); @@ -339,13 +304,11 @@ static void pcie_bwnotif_remove(struct pcie_device *srv) pcie_cooling_device_unregister(data->cdev); scoped_guard(rwsem_write, &pcie_bwctrl_setspeed_rwsem) { - scoped_guard(rwsem_write, &pcie_bwctrl_lbms_rwsem) { - pcie_bwnotif_disable(srv->port); + pcie_bwnotif_disable(srv->port); - free_irq(srv->irq, srv); + free_irq(srv->irq, srv); - srv->port->link_bwctrl = NULL; - } + srv->port->link_bwctrl = NULL; } } diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c index 8d610c17e0f2..64ac1ee944d3 100644 --- a/drivers/pci/quirks.c +++ b/drivers/pci/quirks.c @@ -38,14 +38,10 @@ static bool pcie_lbms_seen(struct pci_dev *dev, u16 lnksta) { - unsigned long count; - int ret; + if (test_bit(PCI_LINK_LBMS_SEEN, &dev->priv_flags)) + return true; - ret = pcie_lbms_count(dev, &count); - if (ret < 0) - return lnksta & PCI_EXP_LNKSTA_LBMS; - - return count > 0; + return lnksta & PCI_EXP_LNKSTA_LBMS; } /* From 6ade6e81f898f7f533207b23849ac8cc0ea8c755 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= Date: Wed, 14 May 2025 16:28:21 +0300 Subject: [PATCH 2/2] PCI: Update Link Speed after retraining MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit PCIe Link Retraining can alter Link Speed. pcie_retrain_link() that performs the Link Training is called from bwctrl and ASPM driver. While bwctrl listens for Link Bandwidth Management Status (LBMS) to pick up changes in Link Speed, there is a race between pcie_reset_lbms() clearing LBMS after the Link Training and pcie_bwnotif_irq() reading the Link Status register. If LBMS is already cleared when the irq handler reads the register, the interrupt handler will return early with IRQ_NONE and won't update the Link Speed. When Link Speed update originates from bwctrl, pcie_bwctrl_change_speed() ensures Link Speed is updated after the retraining. ASPM driver, however, calls pcie_retrain_link() but does not update the Link Speed after retraining which can result in stale Link Speed. Also, it is possible to have ASPM support with CONFIG_PCIEPORTBUS=n in which case bwctrl will not be built in (and thus won't update the Link Speed at all). To ensure Link Speed is not left stale after Link Training, move the call to pcie_update_link_speed() from pcie_bwctrl_change_speed() into pcie_retrain_link(). Suggested-by: Lukas Wunner Signed-off-by: Ilpo Järvinen Signed-off-by: Krzysztof Wilczyński Reviewed-by: Lukas Wunner Link: https://lore.kernel.org/linux-pci/aBCjpfyYmlkJ12AZ@wunner.de Link: https://lore.kernel.org/r/20250514132821.15705-1-ilpo.jarvinen@linux.intel.com --- drivers/pci/pci.c | 17 +++++++++++++++++ drivers/pci/pcie/bwctrl.c | 13 +------------ 2 files changed, 18 insertions(+), 12 deletions(-) diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index 3d94cf33c1b6..eb0c55078d5e 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -4718,6 +4718,11 @@ static int pcie_wait_for_link_status(struct pci_dev *pdev, * @pdev: Device whose link to retrain. * @use_lt: Use the LT bit if TRUE, or the DLLLA bit if FALSE, for status. * + * Trigger retraining of the PCIe Link and wait for the completion of the + * retraining. As link retraining is known to asserts LBMS and may change + * the Link Speed, LBMS is cleared after the retraining and the Link Speed + * of the subordinate bus is updated. + * * Retrain completion status is retrieved from the Link Status Register * according to @use_lt. It is not verified whether the use of the DLLLA * bit is valid. @@ -4758,6 +4763,18 @@ int pcie_retrain_link(struct pci_dev *pdev, bool use_lt) * in attempt to correct unreliable link operation. */ pcie_reset_lbms(pdev); + + /* + * Ensure the Link Speed updates after retraining in case the Link + * Speed was changed because of the retraining. While the bwctrl's + * IRQ handler normally picks up the new Link Speed, clearing LBMS + * races with the IRQ handler reading the Link Status register and + * can result in the handler returning early without updating the + * Link Speed. + */ + if (pdev->subordinate) + pcie_update_link_speed(pdev->subordinate); + return rc; } diff --git a/drivers/pci/pcie/bwctrl.c b/drivers/pci/pcie/bwctrl.c index f31fbbd51490..36f939f23d34 100644 --- a/drivers/pci/pcie/bwctrl.c +++ b/drivers/pci/pcie/bwctrl.c @@ -117,18 +117,7 @@ static int pcie_bwctrl_change_speed(struct pci_dev *port, u16 target_speed, bool if (ret != PCIBIOS_SUCCESSFUL) return pcibios_err_to_errno(ret); - ret = pcie_retrain_link(port, use_lt); - if (ret < 0) - return ret; - - /* - * Ensure link speed updates also with platforms that have problems - * with notifications. - */ - if (port->subordinate) - pcie_update_link_speed(port->subordinate); - - return 0; + return pcie_retrain_link(port, use_lt); } /**