From 01daacfb9035e5b86d43a01f11a0614648f306c1 Mon Sep 17 00:00:00 2001 From: Yicong Yang Date: Fri, 13 Dec 2019 19:44:34 +0800 Subject: [PATCH 1/3] PCI/AER: Log which device prevents error recovery PCI error recovery will fail if any device under the Root Port doesn't have an error_detected callback. Currently only the failure result is printed, which is not enough to identify the driver that lacks the callback. Log a message to identify the device with no error_detected callback. [bhelgaas: tweak log message] Link: https://lore.kernel.org/r/1576237474-32021-1-git-send-email-yangyicong@hisilicon.com Signed-off-by: Yicong Yang Signed-off-by: Bjorn Helgaas --- drivers/pci/pcie/err.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/pci/pcie/err.c b/drivers/pci/pcie/err.c index b0e6048a9208..98acf944a27f 100644 --- a/drivers/pci/pcie/err.c +++ b/drivers/pci/pcie/err.c @@ -61,10 +61,12 @@ static int report_error_detected(struct pci_dev *dev, * error callbacks of "any" device in the subtree, and will * exit in the disconnected error state. */ - if (dev->hdr_type != PCI_HEADER_TYPE_BRIDGE) + if (dev->hdr_type != PCI_HEADER_TYPE_BRIDGE) { vote = PCI_ERS_RESULT_NO_AER_DRIVER; - else + pci_info(dev, "AER: Can't recover (no error_detected callback)\n"); + } else { vote = PCI_ERS_RESULT_NONE; + } } else { err_handler = dev->driver->err_handler; vote = err_handler->error_detected(dev, state); From 8d077c3ce0109c406c265cafc334258caee47e6d Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Fri, 13 Dec 2019 16:46:05 -0600 Subject: [PATCH 2/3] PCI/AER: Factor message prefixes with dev_fmt() Define dev_fmt() with the common prefix of log messages so we don't have to repeat it in every printk. No functional change intended. Link: https://lore.kernel.org/r/20191213225709.GA213811@google.com Signed-off-by: Bjorn Helgaas --- drivers/pci/pcie/err.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/pci/pcie/err.c b/drivers/pci/pcie/err.c index 98acf944a27f..01dfc8bb7ca0 100644 --- a/drivers/pci/pcie/err.c +++ b/drivers/pci/pcie/err.c @@ -10,6 +10,8 @@ * Zhang Yanmin (yanmin.zhang@intel.com) */ +#define dev_fmt(fmt) "AER: " fmt + #include #include #include @@ -63,7 +65,7 @@ static int report_error_detected(struct pci_dev *dev, */ if (dev->hdr_type != PCI_HEADER_TYPE_BRIDGE) { vote = PCI_ERS_RESULT_NO_AER_DRIVER; - pci_info(dev, "AER: Can't recover (no error_detected callback)\n"); + pci_info(dev, "can't recover (no error_detected callback)\n"); } else { vote = PCI_ERS_RESULT_NONE; } @@ -235,12 +237,12 @@ void pcie_do_recovery(struct pci_dev *dev, enum pci_channel_state state, pci_aer_clear_device_status(dev); pci_cleanup_aer_uncorrect_error_status(dev); - pci_info(dev, "AER: Device recovery successful\n"); + pci_info(dev, "device recovery successful\n"); return; failed: pci_uevent_ers(dev, PCI_ERS_RESULT_DISCONNECT); /* TODO: Should kernel panic here? */ - pci_info(dev, "AER: Device recovery failed\n"); + pci_info(dev, "device recovery failed\n"); } From d95f20c4f07020ebc605f3b46af4b6db9eb5fc99 Mon Sep 17 00:00:00 2001 From: Dongdong Liu Date: Thu, 23 Jan 2020 16:26:31 +0800 Subject: [PATCH 3/3] PCI/AER: Initialize aer_fifo Previously we did not call INIT_KFIFO() for aer_fifo. This leads to kfifo_put() sometimes returning 0 (queue full) when in fact it is not. It is easy to reproduce the problem by using aer-inject: $ aer-inject -s :82:00.0 multiple-corr-nonfatal The content of the multiple-corr-nonfatal file is as below: AER COR RCVR HL 0 1 2 3 AER UNCOR POISON_TLP HL 4 5 6 7 Fixes: 27c1ce8bbed7 ("PCI/AER: Use kfifo for tracking events instead of reimplementing it") Link: https://lore.kernel.org/r/1579767991-103898-1-git-send-email-liudongdong3@huawei.com Signed-off-by: Dongdong Liu Signed-off-by: Bjorn Helgaas --- drivers/pci/pcie/aer.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/pci/pcie/aer.c b/drivers/pci/pcie/aer.c index 1ca86f2e0166..4a818b07a1af 100644 --- a/drivers/pci/pcie/aer.c +++ b/drivers/pci/pcie/aer.c @@ -1445,6 +1445,7 @@ static int aer_probe(struct pcie_device *dev) return -ENOMEM; rpc->rpd = port; + INIT_KFIFO(rpc->aer_fifo); set_service_data(dev, rpc); status = devm_request_threaded_irq(device, dev->irq, aer_irq, aer_isr,