From f4d84941832fca4b1939b1683dd1e40fdc59a32d Mon Sep 17 00:00:00 2001 From: Steve French Date: Thu, 16 Jul 2020 00:34:21 -0500 Subject: [PATCH 01/91] smb3: warn on confusing error scenario with sec=krb5 commit 0a018944eee913962bce8ffebbb121960d5125d9 upstream. When mounting with Kerberos, users have been confused about the default error returned in scenarios in which either keyutils is not installed or the user did not properly acquire a krb5 ticket. Log a warning message in the case that "ENOKEY" is returned from the get_spnego_key upcall so that users can better understand why mount failed in those two cases. CC: Stable Signed-off-by: Steve French Signed-off-by: Greg Kroah-Hartman --- fs/cifs/smb2pdu.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c index e2d2b749c8f3..379ac8caa29a 100644 --- a/fs/cifs/smb2pdu.c +++ b/fs/cifs/smb2pdu.c @@ -1132,6 +1132,8 @@ SMB2_auth_kerberos(struct SMB2_sess_data *sess_data) spnego_key = cifs_get_spnego_key(ses); if (IS_ERR(spnego_key)) { rc = PTR_ERR(spnego_key); + if (rc == -ENOKEY) + cifs_dbg(VFS, "Verify user has a krb5 ticket and keyutils is installed\n"); spnego_key = NULL; goto out; } From 5c4d9eefd314e763dcb2a499797176c17ad6ab69 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 24 Jul 2020 22:44:41 +0200 Subject: [PATCH 02/91] genirq/affinity: Make affinity setting if activated opt-in commit f0c7baca180046824e07fc5f1326e83a8fd150c7 upstream. John reported that on a RK3288 system the perf per CPU interrupts are all affine to CPU0 and provided the analysis: "It looks like what happens is that because the interrupts are not per-CPU in the hardware, armpmu_request_irq() calls irq_force_affinity() while the interrupt is deactivated and then request_irq() with IRQF_PERCPU | IRQF_NOBALANCING. Now when irq_startup() runs with IRQ_STARTUP_NORMAL, it calls irq_setup_affinity() which returns early because IRQF_PERCPU and IRQF_NOBALANCING are set, leaving the interrupt on its original CPU." This was broken by the recent commit which blocked interrupt affinity setting in hardware before activation of the interrupt. While this works in general, it does not work for this particular case. As contrary to the initial analysis not all interrupt chip drivers implement an activate callback, the safe cure is to make the deferred interrupt affinity setting at activation time opt-in. Implement the necessary core logic and make the two irqchip implementations for which this is required opt-in. In hindsight this would have been the right thing to do, but ... Fixes: baedb87d1b53 ("genirq/affinity: Handle affinity setting on inactive interrupts correctly") Reported-by: John Keeping Signed-off-by: Thomas Gleixner Tested-by: Marc Zyngier Acked-by: Marc Zyngier Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/87blk4tzgm.fsf@nanos.tec.linutronix.de Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/apic/vector.c | 4 ++++ drivers/irqchip/irq-gic-v3-its.c | 5 ++++- include/linux/irq.h | 13 +++++++++++++ kernel/irq/manage.c | 6 +++++- 4 files changed, 26 insertions(+), 2 deletions(-) diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c index 99c28c02b7a5..8b7e0b46e86e 100644 --- a/arch/x86/kernel/apic/vector.c +++ b/arch/x86/kernel/apic/vector.c @@ -556,6 +556,10 @@ static int x86_vector_alloc_irqs(struct irq_domain *domain, unsigned int virq, irqd->chip_data = apicd; irqd->hwirq = virq + i; irqd_set_single_target(irqd); + + /* Don't invoke affinity setter on deactivated interrupts */ + irqd_set_affinity_on_activate(irqd); + /* * Legacy vectors are already assigned when the IOAPIC * takes them over. They stay on the same vector. This is diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c index fe7d63cdfb1d..d5cc32e80f5e 100644 --- a/drivers/irqchip/irq-gic-v3-its.c +++ b/drivers/irqchip/irq-gic-v3-its.c @@ -2458,6 +2458,7 @@ static int its_irq_domain_alloc(struct irq_domain *domain, unsigned int virq, { msi_alloc_info_t *info = args; struct its_device *its_dev = info->scratchpad[0].ptr; + struct irq_data *irqd; irq_hw_number_t hwirq; int err; int i; @@ -2473,7 +2474,9 @@ static int its_irq_domain_alloc(struct irq_domain *domain, unsigned int virq, irq_domain_set_hwirq_and_chip(domain, virq + i, hwirq + i, &its_irq_chip, its_dev); - irqd_set_single_target(irq_desc_get_irq_data(irq_to_desc(virq + i))); + irqd = irq_get_irq_data(virq + i); + irqd_set_single_target(irqd); + irqd_set_affinity_on_activate(irqd); pr_debug("ID:%d pID:%d vID:%d\n", (int)(hwirq + i - its_dev->event_map.lpi_base), (int)(hwirq + i), virq + i); diff --git a/include/linux/irq.h b/include/linux/irq.h index 6ecaf056ab63..a042faefb9b7 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -210,6 +210,8 @@ struct irq_data { * IRQD_CAN_RESERVE - Can use reservation mode * IRQD_MSI_NOMASK_QUIRK - Non-maskable MSI quirk for affinity change * required + * IRQD_AFFINITY_ON_ACTIVATE - Affinity is set on activation. Don't call + * irq_chip::irq_set_affinity() when deactivated. */ enum { IRQD_TRIGGER_MASK = 0xf, @@ -233,6 +235,7 @@ enum { IRQD_DEFAULT_TRIGGER_SET = (1 << 25), IRQD_CAN_RESERVE = (1 << 26), IRQD_MSI_NOMASK_QUIRK = (1 << 27), + IRQD_AFFINITY_ON_ACTIVATE = (1 << 29), }; #define __irqd_to_state(d) ACCESS_PRIVATE((d)->common, state_use_accessors) @@ -407,6 +410,16 @@ static inline bool irqd_msi_nomask_quirk(struct irq_data *d) return __irqd_to_state(d) & IRQD_MSI_NOMASK_QUIRK; } +static inline void irqd_set_affinity_on_activate(struct irq_data *d) +{ + __irqd_to_state(d) |= IRQD_AFFINITY_ON_ACTIVATE; +} + +static inline bool irqd_affinity_on_activate(struct irq_data *d) +{ + return __irqd_to_state(d) & IRQD_AFFINITY_ON_ACTIVATE; +} + #undef __irqd_to_state static inline irq_hw_number_t irqd_to_hwirq(struct irq_data *d) diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c index 025fcd029f83..3b66c77670d9 100644 --- a/kernel/irq/manage.c +++ b/kernel/irq/manage.c @@ -280,12 +280,16 @@ static bool irq_set_affinity_deactivated(struct irq_data *data, struct irq_desc *desc = irq_data_to_desc(data); /* + * Handle irq chips which can handle affinity only in activated + * state correctly + * * If the interrupt is not yet activated, just store the affinity * mask and do not call the chip driver at all. On activation the * driver has to make sure anyway that the interrupt is in a * useable state so startup works. */ - if (!IS_ENABLED(CONFIG_IRQ_DOMAIN_HIERARCHY) || irqd_is_activated(data)) + if (!IS_ENABLED(CONFIG_IRQ_DOMAIN_HIERARCHY) || + irqd_is_activated(data) || !irqd_affinity_on_activate(data)) return false; cpumask_copy(desc->irq_common_data.affinity, mask); From c59ea9bde42ede641006940a339eabe6669cc1be Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Fri, 26 Jun 2020 19:42:34 +0200 Subject: [PATCH 03/91] PCI: hotplug: ACPI: Fix context refcounting in acpiphp_grab_context() commit dae68d7fd4930315389117e9da35b763f12238f9 upstream. If context is not NULL in acpiphp_grab_context(), but the is_going_away flag is set for the device's parent, the reference counter of the context needs to be decremented before returning NULL or the context will never be freed, so make that happen. Fixes: edf5bf34d408 ("ACPI / dock: Use callback pointers from devices' ACPI hotplug contexts") Reported-by: Vasily Averin Cc: 3.15+ # 3.15+ Signed-off-by: Rafael J. Wysocki Signed-off-by: Greg Kroah-Hartman --- drivers/pci/hotplug/acpiphp_glue.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/drivers/pci/hotplug/acpiphp_glue.c b/drivers/pci/hotplug/acpiphp_glue.c index c94c13525447..be35bbfa6968 100644 --- a/drivers/pci/hotplug/acpiphp_glue.c +++ b/drivers/pci/hotplug/acpiphp_glue.c @@ -122,13 +122,21 @@ static struct acpiphp_context *acpiphp_grab_context(struct acpi_device *adev) struct acpiphp_context *context; acpi_lock_hp_context(); + context = acpiphp_get_context(adev); - if (!context || context->func.parent->is_going_away) { - acpi_unlock_hp_context(); - return NULL; + if (!context) + goto unlock; + + if (context->func.parent->is_going_away) { + acpiphp_put_context(context); + context = NULL; + goto unlock; } + get_bridge(context->func.parent); acpiphp_put_context(context); + +unlock: acpi_unlock_hp_context(); return context; } From 71c6716cb61a7f474a47186c459acf7fbc780b3d Mon Sep 17 00:00:00 2001 From: Kai-Heng Feng Date: Tue, 28 Jul 2020 18:45:53 +0800 Subject: [PATCH 04/91] PCI: Mark AMD Navi10 GPU rev 0x00 ATS as broken commit 45beb31d3afb651bb5c41897e46bd4fa9980c51c upstream. We are seeing AMD Radeon Pro W5700 doesn't work when IOMMU is enabled: iommu ivhd0: AMD-Vi: Event logged [IOTLB_INV_TIMEOUT device=63:00.0 address=0x42b5b01a0] iommu ivhd0: AMD-Vi: Event logged [IOTLB_INV_TIMEOUT device=63:00.0 address=0x42b5b01c0] The error also makes graphics driver fail to probe the device. It appears to be the same issue as commit 5e89cd303e3a ("PCI: Mark AMD Navi14 GPU rev 0xc5 ATS as broken") addresses, and indeed the same ATS quirk can workaround the issue. See-also: 5e89cd303e3a ("PCI: Mark AMD Navi14 GPU rev 0xc5 ATS as broken") See-also: d28ca864c493 ("PCI: Mark AMD Stoney Radeon R7 GPU ATS as broken") See-also: 9b44b0b09dec ("PCI: Mark AMD Stoney GPU ATS as broken") Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=208725 Link: https://lore.kernel.org/r/20200728104554.28927-1-kai.heng.feng@canonical.com Signed-off-by: Kai-Heng Feng Signed-off-by: Bjorn Helgaas Acked-by: Alex Deucher Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/pci/quirks.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c index 9129ccd593d1..af2149632102 100644 --- a/drivers/pci/quirks.c +++ b/drivers/pci/quirks.c @@ -5068,7 +5068,8 @@ DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_SERVERWORKS, 0x0422, quirk_no_ext_tags); */ static void quirk_amd_harvest_no_ats(struct pci_dev *pdev) { - if (pdev->device == 0x7340 && pdev->revision != 0xc5) + if ((pdev->device == 0x7312 && pdev->revision != 0x00) || + (pdev->device == 0x7340 && pdev->revision != 0xc5)) return; pci_info(pdev, "disabling ATS\n"); @@ -5079,6 +5080,8 @@ static void quirk_amd_harvest_no_ats(struct pci_dev *pdev) DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_ATI, 0x98e4, quirk_amd_harvest_no_ats); /* AMD Iceland dGPU */ DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_ATI, 0x6900, quirk_amd_harvest_no_ats); +/* AMD Navi10 dGPU */ +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_ATI, 0x7312, quirk_amd_harvest_no_ats); /* AMD Navi14 dGPU */ DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_ATI, 0x7340, quirk_amd_harvest_no_ats); #endif /* CONFIG_PCI_ATS */ From ae33b1ebbce825c85dbabfdbbea7db72f51298d5 Mon Sep 17 00:00:00 2001 From: Rajat Jain Date: Mon, 6 Jul 2020 16:32:40 -0700 Subject: [PATCH 05/91] PCI: Add device even if driver attach failed commit 2194bc7c39610be7cabe7456c5f63a570604f015 upstream. device_attach() returning failure indicates a driver error while trying to probe the device. In such a scenario, the PCI device should still be added in the system and be visible to the user. When device_attach() fails, merely warn about it and keep the PCI device in the system. This partially reverts ab1a187bba5c ("PCI: Check device_attach() return value always"). Link: https://lore.kernel.org/r/20200706233240.3245512-1-rajatja@google.com Signed-off-by: Rajat Jain Signed-off-by: Bjorn Helgaas Reviewed-by: Greg Kroah-Hartman Cc: stable@vger.kernel.org # v4.6+ Signed-off-by: Greg Kroah-Hartman --- drivers/pci/bus.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/drivers/pci/bus.c b/drivers/pci/bus.c index 5cb40b2518f9..87a2829dffd4 100644 --- a/drivers/pci/bus.c +++ b/drivers/pci/bus.c @@ -323,12 +323,8 @@ void pci_bus_add_device(struct pci_dev *dev) dev->match_driver = true; retval = device_attach(&dev->dev); - if (retval < 0 && retval != -EPROBE_DEFER) { + if (retval < 0 && retval != -EPROBE_DEFER) pci_warn(dev, "device attach failed (%d)\n", retval); - pci_proc_detach_device(dev); - pci_remove_sysfs_dev_files(dev); - return; - } pci_dev_assign_added(dev, true); } From 56e2a4456647942e5f3aca88da164f30dcbf95ad Mon Sep 17 00:00:00 2001 From: Ansuel Smith Date: Mon, 15 Jun 2020 23:06:03 +0200 Subject: [PATCH 06/91] PCI: qcom: Define some PARF params needed for ipq8064 SoC commit 5149901e9e6deca487c01cc434a3ac4125c7b00b upstream. Set some specific value for Tx De-Emphasis, Tx Swing and Rx equalization needed on some ipq8064 based device (Netgear R7800 for example). Without this the system locks on kernel load. Link: https://lore.kernel.org/r/20200615210608.21469-8-ansuelsmth@gmail.com Fixes: 82a823833f4e ("PCI: qcom: Add Qualcomm PCIe controller driver") Signed-off-by: Ansuel Smith Signed-off-by: Lorenzo Pieralisi Reviewed-by: Rob Herring Acked-by: Stanimir Varbanov Cc: stable@vger.kernel.org # v4.5+ Signed-off-by: Greg Kroah-Hartman --- drivers/pci/controller/dwc/pcie-qcom.c | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/drivers/pci/controller/dwc/pcie-qcom.c b/drivers/pci/controller/dwc/pcie-qcom.c index e292801fff7f..06d951ad44ae 100644 --- a/drivers/pci/controller/dwc/pcie-qcom.c +++ b/drivers/pci/controller/dwc/pcie-qcom.c @@ -76,6 +76,18 @@ #define DBI_RO_WR_EN 1 #define PERST_DELAY_US 1000 +/* PARF registers */ +#define PCIE20_PARF_PCS_DEEMPH 0x34 +#define PCS_DEEMPH_TX_DEEMPH_GEN1(x) ((x) << 16) +#define PCS_DEEMPH_TX_DEEMPH_GEN2_3_5DB(x) ((x) << 8) +#define PCS_DEEMPH_TX_DEEMPH_GEN2_6DB(x) ((x) << 0) + +#define PCIE20_PARF_PCS_SWING 0x38 +#define PCS_SWING_TX_SWING_FULL(x) ((x) << 8) +#define PCS_SWING_TX_SWING_LOW(x) ((x) << 0) + +#define PCIE20_PARF_CONFIG_BITS 0x50 +#define PHY_RX0_EQ(x) ((x) << 24) #define PCIE20_v3_PARF_SLV_ADDR_SPACE_SIZE 0x358 #define SLV_ADDR_SPACE_SZ 0x10000000 @@ -275,6 +287,7 @@ static int qcom_pcie_init_2_1_0(struct qcom_pcie *pcie) struct qcom_pcie_resources_2_1_0 *res = &pcie->res.v2_1_0; struct dw_pcie *pci = pcie->pci; struct device *dev = pci->dev; + struct device_node *node = dev->of_node; u32 val; int ret; @@ -319,6 +332,17 @@ static int qcom_pcie_init_2_1_0(struct qcom_pcie *pcie) val &= ~BIT(0); writel(val, pcie->parf + PCIE20_PARF_PHY_CTRL); + if (of_device_is_compatible(node, "qcom,pcie-ipq8064")) { + writel(PCS_DEEMPH_TX_DEEMPH_GEN1(24) | + PCS_DEEMPH_TX_DEEMPH_GEN2_3_5DB(24) | + PCS_DEEMPH_TX_DEEMPH_GEN2_6DB(34), + pcie->parf + PCIE20_PARF_PCS_DEEMPH); + writel(PCS_SWING_TX_SWING_FULL(120) | + PCS_SWING_TX_SWING_LOW(120), + pcie->parf + PCIE20_PARF_PCS_SWING); + writel(PHY_RX0_EQ(4), pcie->parf + PCIE20_PARF_CONFIG_BITS); + } + /* enable external reference clock */ val = readl(pcie->parf + PCIE20_PARF_PHY_REFCLK); val |= BIT(16); From dd6dc2fd66824c2fc6bdf07ff44a00a9695636d2 Mon Sep 17 00:00:00 2001 From: Ansuel Smith Date: Mon, 15 Jun 2020 23:06:04 +0200 Subject: [PATCH 07/91] PCI: qcom: Add support for tx term offset for rev 2.1.0 commit de3c4bf648975ea0b1d344d811e9b0748907b47c upstream. Add tx term offset support to pcie qcom driver need in some revision of the ipq806x SoC. Ipq8064 needs tx term offset set to 7. Link: https://lore.kernel.org/r/20200615210608.21469-9-ansuelsmth@gmail.com Fixes: 82a823833f4e ("PCI: qcom: Add Qualcomm PCIe controller driver") Signed-off-by: Sham Muthayyan Signed-off-by: Ansuel Smith Signed-off-by: Lorenzo Pieralisi Acked-by: Stanimir Varbanov Cc: stable@vger.kernel.org # v4.5+ Signed-off-by: Greg Kroah-Hartman --- drivers/pci/controller/dwc/pcie-qcom.c | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/drivers/pci/controller/dwc/pcie-qcom.c b/drivers/pci/controller/dwc/pcie-qcom.c index 06d951ad44ae..1bdac298a943 100644 --- a/drivers/pci/controller/dwc/pcie-qcom.c +++ b/drivers/pci/controller/dwc/pcie-qcom.c @@ -45,7 +45,13 @@ #define PCIE_CAP_CPL_TIMEOUT_DISABLE 0x10 #define PCIE20_PARF_PHY_CTRL 0x40 +#define PHY_CTRL_PHY_TX0_TERM_OFFSET_MASK GENMASK(20, 16) +#define PHY_CTRL_PHY_TX0_TERM_OFFSET(x) ((x) << 16) + #define PCIE20_PARF_PHY_REFCLK 0x4C +#define PHY_REFCLK_SSP_EN BIT(16) +#define PHY_REFCLK_USE_PAD BIT(12) + #define PCIE20_PARF_DBI_BASE_ADDR 0x168 #define PCIE20_PARF_SLV_ADDR_SPACE_SIZE 0x16C #define PCIE20_PARF_MHI_CLOCK_RESET_CTRL 0x174 @@ -343,9 +349,18 @@ static int qcom_pcie_init_2_1_0(struct qcom_pcie *pcie) writel(PHY_RX0_EQ(4), pcie->parf + PCIE20_PARF_CONFIG_BITS); } + if (of_device_is_compatible(node, "qcom,pcie-ipq8064")) { + /* set TX termination offset */ + val = readl(pcie->parf + PCIE20_PARF_PHY_CTRL); + val &= ~PHY_CTRL_PHY_TX0_TERM_OFFSET_MASK; + val |= PHY_CTRL_PHY_TX0_TERM_OFFSET(7); + writel(val, pcie->parf + PCIE20_PARF_PHY_CTRL); + } + /* enable external reference clock */ val = readl(pcie->parf + PCIE20_PARF_PHY_REFCLK); - val |= BIT(16); + val &= ~PHY_REFCLK_USE_PAD; + val |= PHY_REFCLK_SSP_EN; writel(val, pcie->parf + PCIE20_PARF_PHY_REFCLK); ret = reset_control_deassert(res->phy_reset); From 54a7a9d75c0727433feb634b1025c84589949e02 Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Sat, 19 Jan 2019 11:35:04 -0600 Subject: [PATCH 08/91] PCI: Probe bridge window attributes once at enumeration-time commit 51c48b310183ab6ba5419edfc6a8de889cc04521 upstream. pci_bridge_check_ranges() determines whether a bridge supports the optional I/O and prefetchable memory windows and sets the flag bits in the bridge resources. This *could* be done once during enumeration except that the resource allocation code completely clears the flag bits, e.g., in the pci_assign_unassigned_bridge_resources() path. The problem with pci_bridge_check_ranges() in the resource allocation path is that we may allocate resources after devices have been claimed by drivers, and pci_bridge_check_ranges() *changes* the window registers to determine whether they're writable. This may break concurrent accesses to devices behind the bridge. Add a new pci_read_bridge_windows() to determine whether a bridge supports the optional windows, call it once during enumeration, remember the results, and change pci_bridge_check_ranges() so it doesn't touch the bridge windows but sets the flag bits based on those remembered results. Link: https://lore.kernel.org/linux-pci/1506151482-113560-1-git-send-email-wangzhou1@hisilicon.com Link: https://lists.gnu.org/archive/html/qemu-devel/2018-12/msg02082.html Reported-by: Yandong Xu Tested-by: Yandong Xu Signed-off-by: Bjorn Helgaas Cc: Michael S. Tsirkin Cc: Sagi Grimberg Cc: Ofer Hayut Cc: Roy Shterman Cc: Keith Busch Cc: Zhou Wang Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=208371 Signed-off-by: Dima Stepanov Signed-off-by: Greg Kroah-Hartman --- drivers/pci/probe.c | 52 +++++++++++++++++++++++++++++++++++++++++ drivers/pci/setup-bus.c | 45 ++++------------------------------- include/linux/pci.h | 3 +++ 3 files changed, 59 insertions(+), 41 deletions(-) diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c index cbc0d8da7483..9a5b6a8e2502 100644 --- a/drivers/pci/probe.c +++ b/drivers/pci/probe.c @@ -348,6 +348,57 @@ static void pci_read_bases(struct pci_dev *dev, unsigned int howmany, int rom) } } +static void pci_read_bridge_windows(struct pci_dev *bridge) +{ + u16 io; + u32 pmem, tmp; + + pci_read_config_word(bridge, PCI_IO_BASE, &io); + if (!io) { + pci_write_config_word(bridge, PCI_IO_BASE, 0xe0f0); + pci_read_config_word(bridge, PCI_IO_BASE, &io); + pci_write_config_word(bridge, PCI_IO_BASE, 0x0); + } + if (io) + bridge->io_window = 1; + + /* + * DECchip 21050 pass 2 errata: the bridge may miss an address + * disconnect boundary by one PCI data phase. Workaround: do not + * use prefetching on this device. + */ + if (bridge->vendor == PCI_VENDOR_ID_DEC && bridge->device == 0x0001) + return; + + pci_read_config_dword(bridge, PCI_PREF_MEMORY_BASE, &pmem); + if (!pmem) { + pci_write_config_dword(bridge, PCI_PREF_MEMORY_BASE, + 0xffe0fff0); + pci_read_config_dword(bridge, PCI_PREF_MEMORY_BASE, &pmem); + pci_write_config_dword(bridge, PCI_PREF_MEMORY_BASE, 0x0); + } + if (!pmem) + return; + + bridge->pref_window = 1; + + if ((pmem & PCI_PREF_RANGE_TYPE_MASK) == PCI_PREF_RANGE_TYPE_64) { + + /* + * Bridge claims to have a 64-bit prefetchable memory + * window; verify that the upper bits are actually + * writable. + */ + pci_read_config_dword(bridge, PCI_PREF_BASE_UPPER32, &pmem); + pci_write_config_dword(bridge, PCI_PREF_BASE_UPPER32, + 0xffffffff); + pci_read_config_dword(bridge, PCI_PREF_BASE_UPPER32, &tmp); + pci_write_config_dword(bridge, PCI_PREF_BASE_UPPER32, pmem); + if (tmp) + bridge->pref_64_window = 1; + } +} + static void pci_read_bridge_io(struct pci_bus *child) { struct pci_dev *dev = child->self; @@ -1712,6 +1763,7 @@ int pci_setup_device(struct pci_dev *dev) pci_read_irq(dev); dev->transparent = ((dev->class & 0xff) == 1); pci_read_bases(dev, 2, PCI_ROM_ADDRESS1); + pci_read_bridge_windows(dev); set_pcie_hotplug_bridge(dev); pos = pci_find_capability(dev, PCI_CAP_ID_SSVID); if (pos) { diff --git a/drivers/pci/setup-bus.c b/drivers/pci/setup-bus.c index 8e5b00a420a5..87c8190de622 100644 --- a/drivers/pci/setup-bus.c +++ b/drivers/pci/setup-bus.c @@ -735,58 +735,21 @@ int pci_claim_bridge_resource(struct pci_dev *bridge, int i) base/limit registers must be read-only and read as 0. */ static void pci_bridge_check_ranges(struct pci_bus *bus) { - u16 io; - u32 pmem; struct pci_dev *bridge = bus->self; - struct resource *b_res; + struct resource *b_res = &bridge->resource[PCI_BRIDGE_RESOURCES]; - b_res = &bridge->resource[PCI_BRIDGE_RESOURCES]; b_res[1].flags |= IORESOURCE_MEM; - pci_read_config_word(bridge, PCI_IO_BASE, &io); - if (!io) { - pci_write_config_word(bridge, PCI_IO_BASE, 0xe0f0); - pci_read_config_word(bridge, PCI_IO_BASE, &io); - pci_write_config_word(bridge, PCI_IO_BASE, 0x0); - } - if (io) + if (bridge->io_window) b_res[0].flags |= IORESOURCE_IO; - /* DECchip 21050 pass 2 errata: the bridge may miss an address - disconnect boundary by one PCI data phase. - Workaround: do not use prefetching on this device. */ - if (bridge->vendor == PCI_VENDOR_ID_DEC && bridge->device == 0x0001) - return; - - pci_read_config_dword(bridge, PCI_PREF_MEMORY_BASE, &pmem); - if (!pmem) { - pci_write_config_dword(bridge, PCI_PREF_MEMORY_BASE, - 0xffe0fff0); - pci_read_config_dword(bridge, PCI_PREF_MEMORY_BASE, &pmem); - pci_write_config_dword(bridge, PCI_PREF_MEMORY_BASE, 0x0); - } - if (pmem) { + if (bridge->pref_window) { b_res[2].flags |= IORESOURCE_MEM | IORESOURCE_PREFETCH; - if ((pmem & PCI_PREF_RANGE_TYPE_MASK) == - PCI_PREF_RANGE_TYPE_64) { + if (bridge->pref_64_window) { b_res[2].flags |= IORESOURCE_MEM_64; b_res[2].flags |= PCI_PREF_RANGE_TYPE_64; } } - - /* double check if bridge does support 64 bit pref */ - if (b_res[2].flags & IORESOURCE_MEM_64) { - u32 mem_base_hi, tmp; - pci_read_config_dword(bridge, PCI_PREF_BASE_UPPER32, - &mem_base_hi); - pci_write_config_dword(bridge, PCI_PREF_BASE_UPPER32, - 0xffffffff); - pci_read_config_dword(bridge, PCI_PREF_BASE_UPPER32, &tmp); - if (!tmp) - b_res[2].flags &= ~IORESOURCE_MEM_64; - pci_write_config_dword(bridge, PCI_PREF_BASE_UPPER32, - mem_base_hi); - } } /* Helper function for sizing routines: find first available diff --git a/include/linux/pci.h b/include/linux/pci.h index b1f297f4b7b0..2517492dd185 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -373,6 +373,9 @@ struct pci_dev { bool match_driver; /* Skip attaching driver */ unsigned int transparent:1; /* Subtractive decode bridge */ + unsigned int io_window:1; /* Bridge has I/O window */ + unsigned int pref_window:1; /* Bridge has pref mem window */ + unsigned int pref_64_window:1; /* Pref mem window is 64-bit */ unsigned int multifunction:1; /* Multi-function device */ unsigned int is_busmaster:1; /* Is busmaster */ From 3b5318a963bddf2bb4e6a1a3d46f25ed7580c97f Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Tue, 16 Jun 2020 10:17:37 +0800 Subject: [PATCH 09/91] btrfs: free anon block device right after subvolume deletion commit 082b6c970f02fefd278c7833880cda29691a5f34 upstream. [BUG] When a lot of subvolumes are created, there is a user report about transaction aborted caused by slow anonymous block device reclaim: BTRFS: Transaction aborted (error -24) WARNING: CPU: 17 PID: 17041 at fs/btrfs/transaction.c:1576 create_pending_snapshot+0xbc4/0xd10 [btrfs] RIP: 0010:create_pending_snapshot+0xbc4/0xd10 [btrfs] Call Trace: create_pending_snapshots+0x82/0xa0 [btrfs] btrfs_commit_transaction+0x275/0x8c0 [btrfs] btrfs_mksubvol+0x4b9/0x500 [btrfs] btrfs_ioctl_snap_create_transid+0x174/0x180 [btrfs] btrfs_ioctl_snap_create_v2+0x11c/0x180 [btrfs] btrfs_ioctl+0x11a4/0x2da0 [btrfs] do_vfs_ioctl+0xa9/0x640 ksys_ioctl+0x67/0x90 __x64_sys_ioctl+0x1a/0x20 do_syscall_64+0x5a/0x110 entry_SYSCALL_64_after_hwframe+0x44/0xa9 ---[ end trace 33f2f83f3d5250e9 ]--- BTRFS: error (device sda1) in create_pending_snapshot:1576: errno=-24 unknown BTRFS info (device sda1): forced readonly BTRFS warning (device sda1): Skipping commit of aborted transaction. BTRFS: error (device sda1) in cleanup_transaction:1831: errno=-24 unknown [CAUSE] The anonymous device pool is shared and its size is 1M. It's possible to hit that limit if the subvolume deletion is not fast enough and the subvolumes to be cleaned keep the ids allocated. [WORKAROUND] We can't avoid the anon device pool exhaustion but we can shorten the time the id is attached to the subvolume root once the subvolume becomes invisible to the user. Reported-by: Greed Rong Link: https://lore.kernel.org/linux-btrfs/CA+UqX+NTrZ6boGnWHhSeZmEY5J76CTqmYjO2S+=tHJX7nb9DPw@mail.gmail.com/ CC: stable@vger.kernel.org # 4.4+ Reviewed-by: Josef Bacik Signed-off-by: Qu Wenruo Reviewed-by: David Sterba Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/inode.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 7befb7c12bd3..6154fbaf43e1 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -4458,6 +4458,8 @@ int btrfs_delete_subvolume(struct inode *dir, struct dentry *dentry) } } + free_anon_bdev(dest->anon_dev); + dest->anon_dev = 0; out_end_trans: trans->block_rsv = NULL; trans->bytes_reserved = 0; From 8eadf67bc216537337655cb1d73af0b00861a022 Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Tue, 16 Jun 2020 10:17:34 +0800 Subject: [PATCH 10/91] btrfs: don't allocate anonymous block device for user invisible roots commit 851fd730a743e072badaf67caf39883e32439431 upstream. [BUG] When a lot of subvolumes are created, there is a user report about transaction aborted: BTRFS: Transaction aborted (error -24) WARNING: CPU: 17 PID: 17041 at fs/btrfs/transaction.c:1576 create_pending_snapshot+0xbc4/0xd10 [btrfs] RIP: 0010:create_pending_snapshot+0xbc4/0xd10 [btrfs] Call Trace: create_pending_snapshots+0x82/0xa0 [btrfs] btrfs_commit_transaction+0x275/0x8c0 [btrfs] btrfs_mksubvol+0x4b9/0x500 [btrfs] btrfs_ioctl_snap_create_transid+0x174/0x180 [btrfs] btrfs_ioctl_snap_create_v2+0x11c/0x180 [btrfs] btrfs_ioctl+0x11a4/0x2da0 [btrfs] do_vfs_ioctl+0xa9/0x640 ksys_ioctl+0x67/0x90 __x64_sys_ioctl+0x1a/0x20 do_syscall_64+0x5a/0x110 entry_SYSCALL_64_after_hwframe+0x44/0xa9 ---[ end trace 33f2f83f3d5250e9 ]--- BTRFS: error (device sda1) in create_pending_snapshot:1576: errno=-24 unknown BTRFS info (device sda1): forced readonly BTRFS warning (device sda1): Skipping commit of aborted transaction. BTRFS: error (device sda1) in cleanup_transaction:1831: errno=-24 unknown [CAUSE] The error is EMFILE (Too many files open) and comes from the anonymous block device allocation. The ids are in a shared pool of size 1<<20. The ids are assigned to live subvolumes, ie. the root structure exists in memory (eg. after creation or after the root appears in some path). The pool could be exhausted if the numbers are not reclaimed fast enough, after subvolume deletion or if other system component uses the anon block devices. [WORKAROUND] Since it's not possible to completely solve the problem, we can only minimize the time the id is allocated to a subvolume root. Firstly, we can reduce the use of anon_dev by trees that are not subvolume roots, like data reloc tree. This patch will do extra check on root objectid, to skip roots that don't need anon_dev. Currently it's only data reloc tree and orphan roots. Reported-by: Greed Rong Link: https://lore.kernel.org/linux-btrfs/CA+UqX+NTrZ6boGnWHhSeZmEY5J76CTqmYjO2S+=tHJX7nb9DPw@mail.gmail.com/ CC: stable@vger.kernel.org # 4.4+ Reviewed-by: Josef Bacik Signed-off-by: Qu Wenruo Reviewed-by: David Sterba Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/disk-io.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 9740f7b5d4fb..3130844e219c 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1500,9 +1500,16 @@ int btrfs_init_fs_root(struct btrfs_root *root) spin_lock_init(&root->ino_cache_lock); init_waitqueue_head(&root->ino_cache_wait); - ret = get_anon_bdev(&root->anon_dev); - if (ret) - goto fail; + /* + * Don't assign anonymous block device to roots that are not exposed to + * userspace, the id pool is limited to 1M + */ + if (is_fstree(root->root_key.objectid) && + btrfs_root_refs(&root->root_item) > 0) { + ret = get_anon_bdev(&root->anon_dev); + if (ret) + goto fail; + } mutex_lock(&root->objectid_mutex); ret = btrfs_find_highest_objectid(root, From 6bf983c8db01d69eb3eb0a1ee90e43ad3a7b8709 Mon Sep 17 00:00:00 2001 From: Tom Rix Date: Tue, 7 Jul 2020 06:29:08 -0700 Subject: [PATCH 11/91] btrfs: ref-verify: fix memory leak in add_block_entry commit d60ba8de1164e1b42e296ff270c622a070ef8fe7 upstream. clang static analysis flags this error fs/btrfs/ref-verify.c:290:3: warning: Potential leak of memory pointed to by 're' [unix.Malloc] kfree(be); ^~~~~ The problem is in this block of code: if (root_objectid) { struct root_entry *exist_re; exist_re = insert_root_entry(&exist->roots, re); if (exist_re) kfree(re); } There is no 'else' block freeing when root_objectid is 0. Add the missing kfree to the else branch. Fixes: fd708b81d972 ("Btrfs: add a extent ref verify tool") CC: stable@vger.kernel.org # 4.19+ Signed-off-by: Tom Rix Reviewed-by: David Sterba Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/ref-verify.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/btrfs/ref-verify.c b/fs/btrfs/ref-verify.c index dbc685ca017f..5dec52bd2897 100644 --- a/fs/btrfs/ref-verify.c +++ b/fs/btrfs/ref-verify.c @@ -297,6 +297,8 @@ static struct block_entry *add_block_entry(struct btrfs_fs_info *fs_info, exist_re = insert_root_entry(&exist->roots, re); if (exist_re) kfree(re); + } else { + kfree(re); } kfree(be); return exist; From fa511954694cbea4d0cb59c81c8670276920c08c Mon Sep 17 00:00:00 2001 From: Anand Jain Date: Fri, 10 Jul 2020 14:37:38 +0800 Subject: [PATCH 12/91] btrfs: don't traverse into the seed devices in show_devname commit 4faf55b03823e96c44dc4e364520000ed3b12fdb upstream. ->show_devname currently shows the lowest devid in the list. As the seed devices have the lowest devid in the sprouted filesystem, the userland tool such as findmnt end up seeing seed device instead of the device from the read-writable sprouted filesystem. As shown below. mount /dev/sda /btrfs mount: /btrfs: WARNING: device write-protected, mounted read-only. findmnt --output SOURCE,TARGET,UUID /btrfs SOURCE TARGET UUID /dev/sda /btrfs 899f7027-3e46-4626-93e7-7d4c9ad19111 btrfs dev add -f /dev/sdb /btrfs umount /btrfs mount /dev/sdb /btrfs findmnt --output SOURCE,TARGET,UUID /btrfs SOURCE TARGET UUID /dev/sda /btrfs 899f7027-3e46-4626-93e7-7d4c9ad19111 All sprouts from a single seed will show the same seed device and the same fsid. That's confusing. This is causing problems in our prototype as there isn't any reference to the sprout file-system(s) which is being used for actual read and write. This was added in the patch which implemented the show_devname in btrfs commit 9c5085c14798 ("Btrfs: implement ->show_devname"). I tried to look for any particular reason that we need to show the seed device, there isn't any. So instead, do not traverse through the seed devices, just show the lowest devid in the sprouted fsid. After the patch: mount /dev/sda /btrfs mount: /btrfs: WARNING: device write-protected, mounted read-only. findmnt --output SOURCE,TARGET,UUID /btrfs SOURCE TARGET UUID /dev/sda /btrfs 899f7027-3e46-4626-93e7-7d4c9ad19111 btrfs dev add -f /dev/sdb /btrfs mount -o rw,remount /dev/sdb /btrfs findmnt --output SOURCE,TARGET,UUID /btrfs SOURCE TARGET UUID /dev/sdb /btrfs 595ca0e6-b82e-46b5-b9e2-c72a6928be48 mount /dev/sda /btrfs1 mount: /btrfs1: WARNING: device write-protected, mounted read-only. btrfs dev add -f /dev/sdc /btrfs1 findmnt --output SOURCE,TARGET,UUID /btrfs1 SOURCE TARGET UUID /dev/sdc /btrfs1 ca1dbb7a-8446-4f95-853c-a20f3f82bdbb cat /proc/self/mounts | grep btrfs /dev/sdb /btrfs btrfs rw,relatime,noacl,space_cache,subvolid=5,subvol=/ 0 0 /dev/sdc /btrfs1 btrfs ro,relatime,noacl,space_cache,subvolid=5,subvol=/ 0 0 Reported-by: Martin K. Petersen CC: stable@vger.kernel.org # 4.19+ Tested-by: Martin K. Petersen Signed-off-by: Anand Jain Reviewed-by: David Sterba Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/super.c | 21 +++++++-------------- 1 file changed, 7 insertions(+), 14 deletions(-) diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 6a5b16a119ed..a670205c9808 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -2314,9 +2314,7 @@ static int btrfs_unfreeze(struct super_block *sb) static int btrfs_show_devname(struct seq_file *m, struct dentry *root) { struct btrfs_fs_info *fs_info = btrfs_sb(root->d_sb); - struct btrfs_fs_devices *cur_devices; struct btrfs_device *dev, *first_dev = NULL; - struct list_head *head; /* * Lightweight locking of the devices. We should not need @@ -2326,18 +2324,13 @@ static int btrfs_show_devname(struct seq_file *m, struct dentry *root) * least until until the rcu_read_unlock. */ rcu_read_lock(); - cur_devices = fs_info->fs_devices; - while (cur_devices) { - head = &cur_devices->devices; - list_for_each_entry_rcu(dev, head, dev_list) { - if (test_bit(BTRFS_DEV_STATE_MISSING, &dev->dev_state)) - continue; - if (!dev->name) - continue; - if (!first_dev || dev->devid < first_dev->devid) - first_dev = dev; - } - cur_devices = cur_devices->seed; + list_for_each_entry_rcu(dev, &fs_info->fs_devices->devices, dev_list) { + if (test_bit(BTRFS_DEV_STATE_MISSING, &dev->dev_state)) + continue; + if (!dev->name) + continue; + if (!first_dev || dev->devid < first_dev->devid) + first_dev = dev; } if (first_dev) From 35b4a28051b237a4e3f71f6778b1f536d6a82a5e Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Fri, 17 Jul 2020 15:12:27 -0400 Subject: [PATCH 13/91] btrfs: open device without device_list_mutex commit 18c850fdc5a801bad4977b0f1723761d42267e45 upstream. There's long existed a lockdep splat because we open our bdev's under the ->device_list_mutex at mount time, which acquires the bd_mutex. Usually this goes unnoticed, but if you do loopback devices at all suddenly the bd_mutex comes with a whole host of other dependencies, which results in the splat when you mount a btrfs file system. ====================================================== WARNING: possible circular locking dependency detected 5.8.0-0.rc3.1.fc33.x86_64+debug #1 Not tainted ------------------------------------------------------ systemd-journal/509 is trying to acquire lock: ffff970831f84db0 (&fs_info->reloc_mutex){+.+.}-{3:3}, at: btrfs_record_root_in_trans+0x44/0x70 [btrfs] but task is already holding lock: ffff97083144d598 (sb_pagefaults){.+.+}-{0:0}, at: btrfs_page_mkwrite+0x59/0x560 [btrfs] which lock already depends on the new lock. the existing dependency chain (in reverse order) is: -> #6 (sb_pagefaults){.+.+}-{0:0}: __sb_start_write+0x13e/0x220 btrfs_page_mkwrite+0x59/0x560 [btrfs] do_page_mkwrite+0x4f/0x130 do_wp_page+0x3b0/0x4f0 handle_mm_fault+0xf47/0x1850 do_user_addr_fault+0x1fc/0x4b0 exc_page_fault+0x88/0x300 asm_exc_page_fault+0x1e/0x30 -> #5 (&mm->mmap_lock#2){++++}-{3:3}: __might_fault+0x60/0x80 _copy_from_user+0x20/0xb0 get_sg_io_hdr+0x9a/0xb0 scsi_cmd_ioctl+0x1ea/0x2f0 cdrom_ioctl+0x3c/0x12b4 sr_block_ioctl+0xa4/0xd0 block_ioctl+0x3f/0x50 ksys_ioctl+0x82/0xc0 __x64_sys_ioctl+0x16/0x20 do_syscall_64+0x52/0xb0 entry_SYSCALL_64_after_hwframe+0x44/0xa9 -> #4 (&cd->lock){+.+.}-{3:3}: __mutex_lock+0x7b/0x820 sr_block_open+0xa2/0x180 __blkdev_get+0xdd/0x550 blkdev_get+0x38/0x150 do_dentry_open+0x16b/0x3e0 path_openat+0x3c9/0xa00 do_filp_open+0x75/0x100 do_sys_openat2+0x8a/0x140 __x64_sys_openat+0x46/0x70 do_syscall_64+0x52/0xb0 entry_SYSCALL_64_after_hwframe+0x44/0xa9 -> #3 (&bdev->bd_mutex){+.+.}-{3:3}: __mutex_lock+0x7b/0x820 __blkdev_get+0x6a/0x550 blkdev_get+0x85/0x150 blkdev_get_by_path+0x2c/0x70 btrfs_get_bdev_and_sb+0x1b/0xb0 [btrfs] open_fs_devices+0x88/0x240 [btrfs] btrfs_open_devices+0x92/0xa0 [btrfs] btrfs_mount_root+0x250/0x490 [btrfs] legacy_get_tree+0x30/0x50 vfs_get_tree+0x28/0xc0 vfs_kern_mount.part.0+0x71/0xb0 btrfs_mount+0x119/0x380 [btrfs] legacy_get_tree+0x30/0x50 vfs_get_tree+0x28/0xc0 do_mount+0x8c6/0xca0 __x64_sys_mount+0x8e/0xd0 do_syscall_64+0x52/0xb0 entry_SYSCALL_64_after_hwframe+0x44/0xa9 -> #2 (&fs_devs->device_list_mutex){+.+.}-{3:3}: __mutex_lock+0x7b/0x820 btrfs_run_dev_stats+0x36/0x420 [btrfs] commit_cowonly_roots+0x91/0x2d0 [btrfs] btrfs_commit_transaction+0x4e6/0x9f0 [btrfs] btrfs_sync_file+0x38a/0x480 [btrfs] __x64_sys_fdatasync+0x47/0x80 do_syscall_64+0x52/0xb0 entry_SYSCALL_64_after_hwframe+0x44/0xa9 -> #1 (&fs_info->tree_log_mutex){+.+.}-{3:3}: __mutex_lock+0x7b/0x820 btrfs_commit_transaction+0x48e/0x9f0 [btrfs] btrfs_sync_file+0x38a/0x480 [btrfs] __x64_sys_fdatasync+0x47/0x80 do_syscall_64+0x52/0xb0 entry_SYSCALL_64_after_hwframe+0x44/0xa9 -> #0 (&fs_info->reloc_mutex){+.+.}-{3:3}: __lock_acquire+0x1241/0x20c0 lock_acquire+0xb0/0x400 __mutex_lock+0x7b/0x820 btrfs_record_root_in_trans+0x44/0x70 [btrfs] start_transaction+0xd2/0x500 [btrfs] btrfs_dirty_inode+0x44/0xd0 [btrfs] file_update_time+0xc6/0x120 btrfs_page_mkwrite+0xda/0x560 [btrfs] do_page_mkwrite+0x4f/0x130 do_wp_page+0x3b0/0x4f0 handle_mm_fault+0xf47/0x1850 do_user_addr_fault+0x1fc/0x4b0 exc_page_fault+0x88/0x300 asm_exc_page_fault+0x1e/0x30 other info that might help us debug this: Chain exists of: &fs_info->reloc_mutex --> &mm->mmap_lock#2 --> sb_pagefaults Possible unsafe locking scenario: CPU0 CPU1 ---- ---- lock(sb_pagefaults); lock(&mm->mmap_lock#2); lock(sb_pagefaults); lock(&fs_info->reloc_mutex); *** DEADLOCK *** 3 locks held by systemd-journal/509: #0: ffff97083bdec8b8 (&mm->mmap_lock#2){++++}-{3:3}, at: do_user_addr_fault+0x12e/0x4b0 #1: ffff97083144d598 (sb_pagefaults){.+.+}-{0:0}, at: btrfs_page_mkwrite+0x59/0x560 [btrfs] #2: ffff97083144d6a8 (sb_internal){.+.+}-{0:0}, at: start_transaction+0x3f8/0x500 [btrfs] stack backtrace: CPU: 0 PID: 509 Comm: systemd-journal Not tainted 5.8.0-0.rc3.1.fc33.x86_64+debug #1 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 0.0.0 02/06/2015 Call Trace: dump_stack+0x92/0xc8 check_noncircular+0x134/0x150 __lock_acquire+0x1241/0x20c0 lock_acquire+0xb0/0x400 ? btrfs_record_root_in_trans+0x44/0x70 [btrfs] ? lock_acquire+0xb0/0x400 ? btrfs_record_root_in_trans+0x44/0x70 [btrfs] __mutex_lock+0x7b/0x820 ? btrfs_record_root_in_trans+0x44/0x70 [btrfs] ? kvm_sched_clock_read+0x14/0x30 ? sched_clock+0x5/0x10 ? sched_clock_cpu+0xc/0xb0 btrfs_record_root_in_trans+0x44/0x70 [btrfs] start_transaction+0xd2/0x500 [btrfs] btrfs_dirty_inode+0x44/0xd0 [btrfs] file_update_time+0xc6/0x120 btrfs_page_mkwrite+0xda/0x560 [btrfs] ? sched_clock+0x5/0x10 do_page_mkwrite+0x4f/0x130 do_wp_page+0x3b0/0x4f0 handle_mm_fault+0xf47/0x1850 do_user_addr_fault+0x1fc/0x4b0 exc_page_fault+0x88/0x300 ? asm_exc_page_fault+0x8/0x30 asm_exc_page_fault+0x1e/0x30 RIP: 0033:0x7fa3972fdbfe Code: Bad RIP value. Fix this by not holding the ->device_list_mutex at this point. The device_list_mutex exists to protect us from modifying the device list while the file system is running. However it can also be modified by doing a scan on a device. But this action is specifically protected by the uuid_mutex, which we are holding here. We cannot race with opening at this point because we have the ->s_mount lock held during the mount. Not having the ->device_list_mutex here is perfectly safe as we're not going to change the devices at this point. CC: stable@vger.kernel.org # 4.19+ Signed-off-by: Josef Bacik Reviewed-by: David Sterba [ add some comments ] Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/volumes.c | 21 ++++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index e0ba1e9ddcdf..4abb2a155ac5 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -155,7 +155,9 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, * * global::fs_devs - add, remove, updates to the global list * - * does not protect: manipulation of the fs_devices::devices list! + * does not protect: manipulation of the fs_devices::devices list in general + * but in mount context it could be used to exclude list modifications by eg. + * scan ioctl * * btrfs_device::name - renames (write side), read is RCU * @@ -168,6 +170,9 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, * may be used to exclude some operations from running concurrently without any * modifications to the list (see write_all_supers) * + * Is not required at mount and close times, because our device list is + * protected by the uuid_mutex at that point. + * * balance_mutex * ------------- * protects balance structures (status, state) and context accessed from @@ -656,6 +661,11 @@ static void btrfs_free_stale_devices(const char *path, } } +/* + * This is only used on mount, and we are protected from competing things + * messing with our fs_devices by the uuid_mutex, thus we do not need the + * fs_devices->device_list_mutex here. + */ static int btrfs_open_one_device(struct btrfs_fs_devices *fs_devices, struct btrfs_device *device, fmode_t flags, void *holder) @@ -1153,8 +1163,14 @@ int btrfs_open_devices(struct btrfs_fs_devices *fs_devices, int ret; lockdep_assert_held(&uuid_mutex); + /* + * The device_list_mutex cannot be taken here in case opening the + * underlying device takes further locks like bd_mutex. + * + * We also don't need the lock here as this is called during mount and + * exclusion is provided by uuid_mutex + */ - mutex_lock(&fs_devices->device_list_mutex); if (fs_devices->opened) { fs_devices->opened++; ret = 0; @@ -1162,7 +1178,6 @@ int btrfs_open_devices(struct btrfs_fs_devices *fs_devices, list_sort(NULL, &fs_devices->devices, devid_cmp); ret = open_fs_devices(fs_devices, flags, holder); } - mutex_unlock(&fs_devices->device_list_mutex); return ret; } From 7c1ddfc98703433b556b827ffc10dd3a133866e1 Mon Sep 17 00:00:00 2001 From: David Sterba Date: Thu, 23 Jul 2020 19:08:55 +0200 Subject: [PATCH 14/91] btrfs: fix messages after changing compression level by remount commit 27942c9971cc405c60432eca9395e514a2ae9f5e upstream. Reported by Forza on IRC that remounting with compression options does not reflect the change in level, or at least it does not appear to do so according to the messages: mount -o compress=zstd:1 /dev/sda /mnt mount -o remount,compress=zstd:15 /mnt does not print the change to the level to syslog: [ 41.366060] BTRFS info (device vda): use zstd compression, level 1 [ 41.368254] BTRFS info (device vda): disk space caching is enabled [ 41.390429] BTRFS info (device vda): disk space caching is enabled What really happens is that the message is lost but the level is actualy changed. There's another weird output, if compression is reset to 'no': [ 45.413776] BTRFS info (device vda): use no compression, level 4 To fix that, save the previous compression level and print the message in that case too and use separate message for 'no' compression. CC: stable@vger.kernel.org # 4.19+ Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/super.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index a670205c9808..ed539496089f 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -432,6 +432,7 @@ int btrfs_parse_options(struct btrfs_fs_info *info, char *options, char *compress_type; bool compress_force = false; enum btrfs_compression_type saved_compress_type; + int saved_compress_level; bool saved_compress_force; int no_compress = 0; @@ -514,6 +515,7 @@ int btrfs_parse_options(struct btrfs_fs_info *info, char *options, info->compress_type : BTRFS_COMPRESS_NONE; saved_compress_force = btrfs_test_opt(info, FORCE_COMPRESS); + saved_compress_level = info->compress_level; if (token == Opt_compress || token == Opt_compress_force || strncmp(args[0].from, "zlib", 4) == 0) { @@ -552,6 +554,8 @@ int btrfs_parse_options(struct btrfs_fs_info *info, char *options, no_compress = 0; } else if (strncmp(args[0].from, "no", 2) == 0) { compress_type = "no"; + info->compress_level = 0; + info->compress_type = 0; btrfs_clear_opt(info->mount_opt, COMPRESS); btrfs_clear_opt(info->mount_opt, FORCE_COMPRESS); compress_force = false; @@ -572,11 +576,11 @@ int btrfs_parse_options(struct btrfs_fs_info *info, char *options, */ btrfs_clear_opt(info->mount_opt, FORCE_COMPRESS); } - if ((btrfs_test_opt(info, COMPRESS) && - (info->compress_type != saved_compress_type || - compress_force != saved_compress_force)) || - (!btrfs_test_opt(info, COMPRESS) && - no_compress == 1)) { + if (no_compress == 1) { + btrfs_info(info, "use no compression"); + } else if ((info->compress_type != saved_compress_type) || + (compress_force != saved_compress_force) || + (info->compress_level != saved_compress_level)) { btrfs_info(info, "%s %s compression, level %d", (compress_force) ? "force" : "use", compress_type, info->compress_level); From 627fa9d8071daad6aa84316c1fcb114a62db914f Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Mon, 27 Jul 2020 10:28:05 -0400 Subject: [PATCH 15/91] btrfs: only search for left_info if there is no right_info in try_merge_free_space commit bf53d4687b8f3f6b752f091eb85f62369a515dfd upstream. In try_to_merge_free_space we attempt to find entries to the left and right of the entry we are adding to see if they can be merged. We search for an entry past our current info (saved into right_info), and then if right_info exists and it has a rb_prev() we save the rb_prev() into left_info. However there's a slight problem in the case that we have a right_info, but no entry previous to that entry. At that point we will search for an entry just before the info we're attempting to insert. This will simply find right_info again, and assign it to left_info, making them both the same pointer. Now if right_info _can_ be merged with the range we're inserting, we'll add it to the info and free right_info. However further down we'll access left_info, which was right_info, and thus get a use-after-free. Fix this by only searching for the left entry if we don't find a right entry at all. The CVE referenced had a specially crafted file system that could trigger this use-after-free. However with the tree checker improvements we no longer trigger the conditions for the UAF. But the original conditions still apply, hence this fix. Reference: CVE-2019-19448 Fixes: 963030817060 ("Btrfs: use hybrid extents+bitmap rb tree for free space") CC: stable@vger.kernel.org # 4.4+ Signed-off-by: Josef Bacik Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/free-space-cache.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c index c9965e89097f..4c65305fd418 100644 --- a/fs/btrfs/free-space-cache.c +++ b/fs/btrfs/free-space-cache.c @@ -2169,7 +2169,7 @@ static int insert_into_bitmap(struct btrfs_free_space_ctl *ctl, static bool try_merge_free_space(struct btrfs_free_space_ctl *ctl, struct btrfs_free_space *info, bool update_stat) { - struct btrfs_free_space *left_info; + struct btrfs_free_space *left_info = NULL; struct btrfs_free_space *right_info; bool merged = false; u64 offset = info->offset; @@ -2184,7 +2184,7 @@ static bool try_merge_free_space(struct btrfs_free_space_ctl *ctl, if (right_info && rb_prev(&right_info->offset_index)) left_info = rb_entry(rb_prev(&right_info->offset_index), struct btrfs_free_space, offset_index); - else + else if (!right_info) left_info = tree_search_offset(ctl, offset - 1, 0, 0); if (right_info && !right_info->bitmap) { From 183af2d27dfced3167a88c80a2df776cb28a1255 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Wed, 29 Jul 2020 10:17:50 +0100 Subject: [PATCH 16/91] btrfs: fix memory leaks after failure to lookup checksums during inode logging commit 4f26433e9b3eb7a55ed70d8f882ae9cd48ba448b upstream. While logging an inode, at copy_items(), if we fail to lookup the checksums for an extent we release the destination path, free the ins_data array and then return immediately. However a previous iteration of the for loop may have added checksums to the ordered_sums list, in which case we leak the memory used by them. So fix this by making sure we iterate the ordered_sums list and free all its checksums before returning. Fixes: 3650860b90cc2a ("Btrfs: remove almost all of the BUG()'s from tree-log.c") CC: stable@vger.kernel.org # 4.4+ Reviewed-by: Johannes Thumshirn Signed-off-by: Filipe Manana Reviewed-by: David Sterba Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/tree-log.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 928ac2c4899e..090315f4ac78 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -3988,11 +3988,8 @@ static noinline int copy_items(struct btrfs_trans_handle *trans, fs_info->csum_root, ds + cs, ds + cs + cl - 1, &ordered_sums, 0); - if (ret) { - btrfs_release_path(dst_path); - kfree(ins_data); - return ret; - } + if (ret) + break; } } } @@ -4005,7 +4002,6 @@ static noinline int copy_items(struct btrfs_trans_handle *trans, * we have to do this after the loop above to avoid changing the * log tree while trying to change the log tree. */ - ret = 0; while (!list_empty(&ordered_sums)) { struct btrfs_ordered_sum *sums = list_entry(ordered_sums.next, struct btrfs_ordered_sum, From a34b58b5b43be7df523188dd2fa639075658ed0b Mon Sep 17 00:00:00 2001 From: Pavel Machek Date: Mon, 3 Aug 2020 11:35:06 +0200 Subject: [PATCH 17/91] btrfs: fix return value mixup in btrfs_get_extent commit 881a3a11c2b858fe9b69ef79ac5ee9978a266dc9 upstream. btrfs_get_extent() sets variable ret, but out: error path expect error to be in variable err so the error code is lost. Fixes: 6bf9e4bd6a27 ("btrfs: inode: Verify inode mode to avoid NULL pointer dereference") CC: stable@vger.kernel.org # 5.4+ Reviewed-by: Nikolay Borisov Signed-off-by: Pavel Machek (CIP) Reviewed-by: David Sterba Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/inode.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 6154fbaf43e1..1656ef0e959f 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -7014,7 +7014,7 @@ struct extent_map *btrfs_get_extent(struct btrfs_inode *inode, found_type == BTRFS_FILE_EXTENT_PREALLOC) { /* Only regular file could have regular/prealloc extent */ if (!S_ISREG(inode->vfs_inode.i_mode)) { - ret = -EUCLEAN; + err = -EUCLEAN; btrfs_crit(fs_info, "regular/prealloc extent found for non-regular inode %llu", btrfs_ino(inode)); From 0d4abc3512b0e79cd79c8d41dd3d7ed4a8bdbac1 Mon Sep 17 00:00:00 2001 From: Christian Eggers Date: Mon, 27 Jul 2020 12:16:05 +0200 Subject: [PATCH 18/91] dt-bindings: iio: io-channel-mux: Fix compatible string in example code commit add48ba425192c6e04ce70549129cacd01e2a09e upstream. The correct compatible string is "gpio-mux" (see bindings/mux/gpio-mux.txt). Cc: stable@vger.kernel.org # v4.13+ Reviewed-by: Peter Rosin Signed-off-by: Christian Eggers Link: https://lore.kernel.org/r/20200727101605.24384-1-ceggers@arri.de Signed-off-by: Rob Herring Signed-off-by: Greg Kroah-Hartman --- .../devicetree/bindings/iio/multiplexer/io-channel-mux.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/devicetree/bindings/iio/multiplexer/io-channel-mux.txt b/Documentation/devicetree/bindings/iio/multiplexer/io-channel-mux.txt index c82794002595..89647d714387 100644 --- a/Documentation/devicetree/bindings/iio/multiplexer/io-channel-mux.txt +++ b/Documentation/devicetree/bindings/iio/multiplexer/io-channel-mux.txt @@ -21,7 +21,7 @@ controller state. The mux controller state is described in Example: mux: mux-controller { - compatible = "mux-gpio"; + compatible = "gpio-mux"; #mux-control-cells = <0>; mux-gpios = <&pioA 0 GPIO_ACTIVE_HIGH>, From b86f06e13cc6700ec38e6226f4d2f1e5bbfc96b5 Mon Sep 17 00:00:00 2001 From: Alexandru Ardelean Date: Mon, 6 Jul 2020 14:02:57 +0300 Subject: [PATCH 19/91] iio: dac: ad5592r: fix unbalanced mutex unlocks in ad5592r_read_raw() commit 65afb0932a81c1de719ceee0db0b276094b10ac8 upstream. There are 2 exit paths where the lock isn't held, but try to unlock the mutex when exiting. In these places we should just return from the function. A neater approach would be to cleanup the ad5592r_read_raw(), but that would make this patch more difficult to backport to stable versions. Fixes 56ca9db862bf3: ("iio: dac: Add support for the AD5592R/AD5593R ADCs/DACs") Reported-by: Charles Stanhope Signed-off-by: Alexandru Ardelean Cc: Signed-off-by: Jonathan Cameron Signed-off-by: Greg Kroah-Hartman --- drivers/iio/dac/ad5592r-base.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/iio/dac/ad5592r-base.c b/drivers/iio/dac/ad5592r-base.c index 095530c233e4..7549abd544c0 100644 --- a/drivers/iio/dac/ad5592r-base.c +++ b/drivers/iio/dac/ad5592r-base.c @@ -417,7 +417,7 @@ static int ad5592r_read_raw(struct iio_dev *iio_dev, s64 tmp = *val * (3767897513LL / 25LL); *val = div_s64_rem(tmp, 1000000000LL, val2); - ret = IIO_VAL_INT_PLUS_MICRO; + return IIO_VAL_INT_PLUS_MICRO; } else { int mult; @@ -448,7 +448,7 @@ static int ad5592r_read_raw(struct iio_dev *iio_dev, ret = IIO_VAL_INT; break; default: - ret = -EINVAL; + return -EINVAL; } unlock: From 6ffc89cadbd02b83f23e572bb7c43ad9638f441f Mon Sep 17 00:00:00 2001 From: Max Filippov Date: Fri, 31 Jul 2020 12:37:32 -0700 Subject: [PATCH 20/91] xtensa: fix xtensa_pmu_setup prototype MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 6d65d3769d1910379e1cfa61ebf387efc6bfb22c upstream. Fix the following build error in configurations with CONFIG_XTENSA_VARIANT_HAVE_PERF_EVENTS=y: arch/xtensa/kernel/perf_event.c:420:29: error: passing argument 3 of ‘cpuhp_setup_state’ from incompatible pointer type Cc: stable@vger.kernel.org Fixes: 25a77b55e74c ("xtensa/perf: Convert the hotplug notifier to state machine callbacks") Signed-off-by: Max Filippov Signed-off-by: Greg Kroah-Hartman --- arch/xtensa/kernel/perf_event.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/xtensa/kernel/perf_event.c b/arch/xtensa/kernel/perf_event.c index ff1d81385ed7..768e1f7ab871 100644 --- a/arch/xtensa/kernel/perf_event.c +++ b/arch/xtensa/kernel/perf_event.c @@ -404,7 +404,7 @@ static struct pmu xtensa_pmu = { .read = xtensa_pmu_read, }; -static int xtensa_pmu_setup(int cpu) +static int xtensa_pmu_setup(unsigned int cpu) { unsigned i; From d9710cc6bd97679f5ab38918cb67c6641e962453 Mon Sep 17 00:00:00 2001 From: Paul Aurich Date: Thu, 9 Jul 2020 22:01:16 -0700 Subject: [PATCH 21/91] cifs: Fix leak when handling lease break for cached root fid commit baf57b56d3604880ccb3956ec6c62ea894f5de99 upstream. Handling a lease break for the cached root didn't free the smb2_lease_break_work allocation, resulting in a leak: unreferenced object 0xffff98383a5af480 (size 128): comm "cifsd", pid 684, jiffies 4294936606 (age 534.868s) hex dump (first 32 bytes): c0 ff ff ff 1f 00 00 00 88 f4 5a 3a 38 98 ff ff ..........Z:8... 88 f4 5a 3a 38 98 ff ff 80 88 d6 8a ff ff ff ff ..Z:8........... backtrace: [<0000000068957336>] smb2_is_valid_oplock_break+0x1fa/0x8c0 [<0000000073b70b9e>] cifs_demultiplex_thread+0x73d/0xcc0 [<00000000905fa372>] kthread+0x11c/0x150 [<0000000079378e4e>] ret_from_fork+0x22/0x30 Avoid this leak by only allocating when necessary. Fixes: a93864d93977 ("cifs: add lease tracking to the cached root fid") Signed-off-by: Paul Aurich CC: Stable # v4.18+ Reviewed-by: Aurelien Aptel Signed-off-by: Steve French Signed-off-by: Greg Kroah-Hartman --- fs/cifs/smb2misc.c | 75 ++++++++++++++++++++++++++++++++-------------- 1 file changed, 53 insertions(+), 22 deletions(-) diff --git a/fs/cifs/smb2misc.c b/fs/cifs/smb2misc.c index 14265b4bbcc0..2fc96f7923ee 100644 --- a/fs/cifs/smb2misc.c +++ b/fs/cifs/smb2misc.c @@ -509,15 +509,31 @@ cifs_ses_oplock_break(struct work_struct *work) kfree(lw); } -static bool -smb2_tcon_has_lease(struct cifs_tcon *tcon, struct smb2_lease_break *rsp, - struct smb2_lease_break_work *lw) +static void +smb2_queue_pending_open_break(struct tcon_link *tlink, __u8 *lease_key, + __le32 new_lease_state) +{ + struct smb2_lease_break_work *lw; + + lw = kmalloc(sizeof(struct smb2_lease_break_work), GFP_KERNEL); + if (!lw) { + cifs_put_tlink(tlink); + return; + } + + INIT_WORK(&lw->lease_break, cifs_ses_oplock_break); + lw->tlink = tlink; + lw->lease_state = new_lease_state; + memcpy(lw->lease_key, lease_key, SMB2_LEASE_KEY_SIZE); + queue_work(cifsiod_wq, &lw->lease_break); +} + +static bool +smb2_tcon_has_lease(struct cifs_tcon *tcon, struct smb2_lease_break *rsp) { - bool found; __u8 lease_state; struct list_head *tmp; struct cifsFileInfo *cfile; - struct cifs_pending_open *open; struct cifsInodeInfo *cinode; int ack_req = le32_to_cpu(rsp->Flags & SMB2_NOTIFY_BREAK_LEASE_FLAG_ACK_REQUIRED); @@ -556,22 +572,29 @@ smb2_tcon_has_lease(struct cifs_tcon *tcon, struct smb2_lease_break *rsp, &cinode->flags); cifs_queue_oplock_break(cfile); - kfree(lw); return true; } - found = false; + return false; +} + +static struct cifs_pending_open * +smb2_tcon_find_pending_open_lease(struct cifs_tcon *tcon, + struct smb2_lease_break *rsp) +{ + __u8 lease_state = le32_to_cpu(rsp->NewLeaseState); + int ack_req = le32_to_cpu(rsp->Flags & + SMB2_NOTIFY_BREAK_LEASE_FLAG_ACK_REQUIRED); + struct cifs_pending_open *open; + struct cifs_pending_open *found = NULL; + list_for_each_entry(open, &tcon->pending_opens, olist) { if (memcmp(open->lease_key, rsp->LeaseKey, SMB2_LEASE_KEY_SIZE)) continue; if (!found && ack_req) { - found = true; - memcpy(lw->lease_key, open->lease_key, - SMB2_LEASE_KEY_SIZE); - lw->tlink = cifs_get_tlink(open->tlink); - queue_work(cifsiod_wq, &lw->lease_break); + found = open; } cifs_dbg(FYI, "found in the pending open list\n"); @@ -592,14 +615,7 @@ smb2_is_valid_lease_break(char *buffer) struct TCP_Server_Info *server; struct cifs_ses *ses; struct cifs_tcon *tcon; - struct smb2_lease_break_work *lw; - - lw = kmalloc(sizeof(struct smb2_lease_break_work), GFP_KERNEL); - if (!lw) - return false; - - INIT_WORK(&lw->lease_break, cifs_ses_oplock_break); - lw->lease_state = rsp->NewLeaseState; + struct cifs_pending_open *open; cifs_dbg(FYI, "Checking for lease break\n"); @@ -617,11 +633,27 @@ smb2_is_valid_lease_break(char *buffer) spin_lock(&tcon->open_file_lock); cifs_stats_inc( &tcon->stats.cifs_stats.num_oplock_brks); - if (smb2_tcon_has_lease(tcon, rsp, lw)) { + if (smb2_tcon_has_lease(tcon, rsp)) { spin_unlock(&tcon->open_file_lock); spin_unlock(&cifs_tcp_ses_lock); return true; } + open = smb2_tcon_find_pending_open_lease(tcon, + rsp); + if (open) { + __u8 lease_key[SMB2_LEASE_KEY_SIZE]; + struct tcon_link *tlink; + + tlink = cifs_get_tlink(open->tlink); + memcpy(lease_key, open->lease_key, + SMB2_LEASE_KEY_SIZE); + spin_unlock(&tcon->open_file_lock); + spin_unlock(&cifs_tcp_ses_lock); + smb2_queue_pending_open_break(tlink, + lease_key, + rsp->NewLeaseState); + return true; + } spin_unlock(&tcon->open_file_lock); if (tcon->crfid.is_valid && @@ -639,7 +671,6 @@ smb2_is_valid_lease_break(char *buffer) } } spin_unlock(&cifs_tcp_ses_lock); - kfree(lw); cifs_dbg(FYI, "Can not process lease break - no lease matched\n"); return false; } From b11ac832808158b2df38482988aea703640127f5 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Fri, 24 Jul 2020 19:25:25 +1000 Subject: [PATCH 22/91] powerpc: Allow 4224 bytes of stack expansion for the signal frame commit 63dee5df43a31f3844efabc58972f0a206ca4534 upstream. We have powerpc specific logic in our page fault handling to decide if an access to an unmapped address below the stack pointer should expand the stack VMA. The code was originally added in 2004 "ported from 2.4". The rough logic is that the stack is allowed to grow to 1MB with no extra checking. Over 1MB the access must be within 2048 bytes of the stack pointer, or be from a user instruction that updates the stack pointer. The 2048 byte allowance below the stack pointer is there to cover the 288 byte "red zone" as well as the "about 1.5kB" needed by the signal delivery code. Unfortunately since then the signal frame has expanded, and is now 4224 bytes on 64-bit kernels with transactional memory enabled. This means if a process has consumed more than 1MB of stack, and its stack pointer lies less than 4224 bytes from the next page boundary, signal delivery will fault when trying to expand the stack and the process will see a SEGV. The total size of the signal frame is the size of struct rt_sigframe (which includes the red zone) plus __SIGNAL_FRAMESIZE (128 bytes on 64-bit). The 2048 byte allowance was correct until 2008 as the signal frame was: struct rt_sigframe { struct ucontext uc; /* 0 1440 */ /* --- cacheline 11 boundary (1408 bytes) was 32 bytes ago --- */ long unsigned int _unused[2]; /* 1440 16 */ unsigned int tramp[6]; /* 1456 24 */ struct siginfo * pinfo; /* 1480 8 */ void * puc; /* 1488 8 */ struct siginfo info; /* 1496 128 */ /* --- cacheline 12 boundary (1536 bytes) was 88 bytes ago --- */ char abigap[288]; /* 1624 288 */ /* size: 1920, cachelines: 15, members: 7 */ /* padding: 8 */ }; 1920 + 128 = 2048 Then in commit ce48b2100785 ("powerpc: Add VSX context save/restore, ptrace and signal support") (Jul 2008) the signal frame expanded to 2304 bytes: struct rt_sigframe { struct ucontext uc; /* 0 1696 */ <-- /* --- cacheline 13 boundary (1664 bytes) was 32 bytes ago --- */ long unsigned int _unused[2]; /* 1696 16 */ unsigned int tramp[6]; /* 1712 24 */ struct siginfo * pinfo; /* 1736 8 */ void * puc; /* 1744 8 */ struct siginfo info; /* 1752 128 */ /* --- cacheline 14 boundary (1792 bytes) was 88 bytes ago --- */ char abigap[288]; /* 1880 288 */ /* size: 2176, cachelines: 17, members: 7 */ /* padding: 8 */ }; 2176 + 128 = 2304 At this point we should have been exposed to the bug, though as far as I know it was never reported. I no longer have a system old enough to easily test on. Then in 2010 commit 320b2b8de126 ("mm: keep a guard page below a grow-down stack segment") caused our stack expansion code to never trigger, as there was always a VMA found for a write up to PAGE_SIZE below r1. That meant the bug was hidden as we continued to expand the signal frame in commit 2b0a576d15e0 ("powerpc: Add new transactional memory state to the signal context") (Feb 2013): struct rt_sigframe { struct ucontext uc; /* 0 1696 */ /* --- cacheline 13 boundary (1664 bytes) was 32 bytes ago --- */ struct ucontext uc_transact; /* 1696 1696 */ <-- /* --- cacheline 26 boundary (3328 bytes) was 64 bytes ago --- */ long unsigned int _unused[2]; /* 3392 16 */ unsigned int tramp[6]; /* 3408 24 */ struct siginfo * pinfo; /* 3432 8 */ void * puc; /* 3440 8 */ struct siginfo info; /* 3448 128 */ /* --- cacheline 27 boundary (3456 bytes) was 120 bytes ago --- */ char abigap[288]; /* 3576 288 */ /* size: 3872, cachelines: 31, members: 8 */ /* padding: 8 */ /* last cacheline: 32 bytes */ }; 3872 + 128 = 4000 And commit 573ebfa6601f ("powerpc: Increase stack redzone for 64-bit userspace to 512 bytes") (Feb 2014): struct rt_sigframe { struct ucontext uc; /* 0 1696 */ /* --- cacheline 13 boundary (1664 bytes) was 32 bytes ago --- */ struct ucontext uc_transact; /* 1696 1696 */ /* --- cacheline 26 boundary (3328 bytes) was 64 bytes ago --- */ long unsigned int _unused[2]; /* 3392 16 */ unsigned int tramp[6]; /* 3408 24 */ struct siginfo * pinfo; /* 3432 8 */ void * puc; /* 3440 8 */ struct siginfo info; /* 3448 128 */ /* --- cacheline 27 boundary (3456 bytes) was 120 bytes ago --- */ char abigap[512]; /* 3576 512 */ <-- /* size: 4096, cachelines: 32, members: 8 */ /* padding: 8 */ }; 4096 + 128 = 4224 Then finally in 2017, commit 1be7107fbe18 ("mm: larger stack guard gap, between vmas") exposed us to the existing bug, because it changed the stack VMA to be the correct/real size, meaning our stack expansion code is now triggered. Fix it by increasing the allowance to 4224 bytes. Hard-coding 4224 is obviously unsafe against future expansions of the signal frame in the same way as the existing code. We can't easily use sizeof() because the signal frame structure is not in a header. We will either fix that, or rip out all the custom stack expansion checking logic entirely. Fixes: ce48b2100785 ("powerpc: Add VSX context save/restore, ptrace and signal support") Cc: stable@vger.kernel.org # v2.6.27+ Reported-by: Tom Lane Tested-by: Daniel Axtens Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200724092528.1578671-2-mpe@ellerman.id.au Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/mm/fault.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c index 6e0ff8b600ce..eb5252177b66 100644 --- a/arch/powerpc/mm/fault.c +++ b/arch/powerpc/mm/fault.c @@ -233,6 +233,9 @@ static bool bad_kernel_fault(bool is_exec, unsigned long error_code, return is_exec || (address >= TASK_SIZE); } +// This comes from 64-bit struct rt_sigframe + __SIGNAL_FRAMESIZE +#define SIGFRAME_MAX_SIZE (4096 + 128) + static bool bad_stack_expansion(struct pt_regs *regs, unsigned long address, struct vm_area_struct *vma, unsigned int flags, bool *must_retry) @@ -240,7 +243,7 @@ static bool bad_stack_expansion(struct pt_regs *regs, unsigned long address, /* * N.B. The POWER/Open ABI allows programs to access up to * 288 bytes below the stack pointer. - * The kernel signal delivery code writes up to about 1.5kB + * The kernel signal delivery code writes a bit over 4KB * below the stack pointer (r1) before decrementing it. * The exec code can write slightly over 640kB to the stack * before setting the user r1. Thus we allow the stack to @@ -265,7 +268,7 @@ static bool bad_stack_expansion(struct pt_regs *regs, unsigned long address, * between the last mapped region and the stack will * expand the stack rather than segfaulting. */ - if (address + 2048 >= uregs->gpr[1]) + if (address + SIGFRAME_MAX_SIZE >= uregs->gpr[1]) return false; if ((flags & FAULT_FLAG_WRITE) && (flags & FAULT_FLAG_USER) && From e83f99c428000d5e347b953da8d539ee67113134 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Tue, 4 Aug 2020 22:44:06 +1000 Subject: [PATCH 23/91] powerpc: Fix circular dependency between percpu.h and mmu.h commit 0c83b277ada72b585e6a3e52b067669df15bcedb upstream. Recently random.h started including percpu.h (see commit f227e3ec3b5c ("random32: update the net random state on interrupt and activity")), which broke corenet64_smp_defconfig: In file included from /linux/arch/powerpc/include/asm/paca.h:18, from /linux/arch/powerpc/include/asm/percpu.h:13, from /linux/include/linux/random.h:14, from /linux/lib/uuid.c:14: /linux/arch/powerpc/include/asm/mmu.h:139:22: error: unknown type name 'next_tlbcam_idx' 139 | DECLARE_PER_CPU(int, next_tlbcam_idx); This is due to a circular header dependency: asm/mmu.h includes asm/percpu.h, which includes asm/paca.h, which includes asm/mmu.h Which means DECLARE_PER_CPU() isn't defined when mmu.h needs it. We can fix it by moving the include of paca.h below the include of asm-generic/percpu.h. This moves the include of paca.h out of the #ifdef __powerpc64__, but that is OK because paca.h is almost entirely inside #ifdef CONFIG_PPC64 anyway. It also moves the include of paca.h out of the #ifdef CONFIG_SMP, which could possibly break something, but seems to have no ill effects. Fixes: f227e3ec3b5c ("random32: update the net random state on interrupt and activity") Cc: stable@vger.kernel.org # v5.8 Reported-by: Stephen Rothwell Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200804130558.292328-1-mpe@ellerman.id.au Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/include/asm/percpu.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/include/asm/percpu.h b/arch/powerpc/include/asm/percpu.h index dce863a7635c..8e5b7d0b851c 100644 --- a/arch/powerpc/include/asm/percpu.h +++ b/arch/powerpc/include/asm/percpu.h @@ -10,8 +10,6 @@ #ifdef CONFIG_SMP -#include - #define __my_cpu_offset local_paca->data_offset #endif /* CONFIG_SMP */ @@ -19,4 +17,6 @@ #include +#include + #endif /* _ASM_POWERPC_PERCPU_H_ */ From 62f8d71408990bc26690e02370c1ae7973625d2b Mon Sep 17 00:00:00 2001 From: Eugeniu Rosca Date: Tue, 2 Jun 2020 21:50:16 +0200 Subject: [PATCH 24/91] media: vsp1: dl: Fix NULL pointer dereference on unbind commit c92d30e4b78dc331909f8c6056c2792aa14e2166 upstream. In commit f3b98e3c4d2e16 ("media: vsp1: Provide support for extended command pools"), the vsp pointer used for referencing the VSP1 device structure from a command pool during vsp1_dl_ext_cmd_pool_destroy() was not populated. Correctly assign the pointer to prevent the following null-pointer-dereference when removing the device: [*] h3ulcb-kf #> echo fea28000.vsp > /sys/bus/platform/devices/fea28000.vsp/driver/unbind Unable to handle kernel NULL pointer dereference at virtual address 0000000000000028 Mem abort info: ESR = 0x96000006 EC = 0x25: DABT (current EL), IL = 32 bits SET = 0, FnV = 0 EA = 0, S1PTW = 0 Data abort info: ISV = 0, ISS = 0x00000006 CM = 0, WnR = 0 user pgtable: 4k pages, 48-bit VAs, pgdp=00000007318be000 [0000000000000028] pgd=00000007333a1003, pud=00000007333a6003, pmd=0000000000000000 Internal error: Oops: 96000006 [#1] PREEMPT SMP Modules linked in: CPU: 1 PID: 486 Comm: sh Not tainted 5.7.0-rc6-arm64-renesas-00118-ge644645abf47 #185 Hardware name: Renesas H3ULCB Kingfisher board based on r8a77951 (DT) pstate: 40000005 (nZcv daif -PAN -UAO) pc : vsp1_dlm_destroy+0xe4/0x11c lr : vsp1_dlm_destroy+0xc8/0x11c sp : ffff800012963b60 x29: ffff800012963b60 x28: ffff0006f83fc440 x27: 0000000000000000 x26: ffff0006f5e13e80 x25: ffff0006f5e13ed0 x24: ffff0006f5e13ed0 x23: ffff0006f5e13ed0 x22: dead000000000122 x21: ffff0006f5e3a080 x20: ffff0006f5df2938 x19: ffff0006f5df2980 x18: 0000000000000003 x17: 0000000000000000 x16: 0000000000000016 x15: 0000000000000003 x14: 00000000000393c0 x13: ffff800011a5ec18 x12: ffff800011d8d000 x11: ffff0006f83fcc68 x10: ffff800011a53d70 x9 : ffff8000111f3000 x8 : 0000000000000000 x7 : 0000000000210d00 x6 : 0000000000000000 x5 : ffff800010872e60 x4 : 0000000000000004 x3 : 0000000078068000 x2 : ffff800012781000 x1 : 0000000000002c00 x0 : 0000000000000000 Call trace: vsp1_dlm_destroy+0xe4/0x11c vsp1_wpf_destroy+0x10/0x20 vsp1_entity_destroy+0x24/0x4c vsp1_destroy_entities+0x54/0x130 vsp1_remove+0x1c/0x40 platform_drv_remove+0x28/0x50 __device_release_driver+0x178/0x220 device_driver_detach+0x44/0xc0 unbind_store+0xe0/0x104 drv_attr_store+0x20/0x30 sysfs_kf_write+0x48/0x70 kernfs_fop_write+0x148/0x230 __vfs_write+0x18/0x40 vfs_write+0xdc/0x1c4 ksys_write+0x68/0xf0 __arm64_sys_write+0x18/0x20 el0_svc_common.constprop.0+0x70/0x170 do_el0_svc+0x20/0x80 el0_sync_handler+0x134/0x1b0 el0_sync+0x140/0x180 Code: b40000c2 f9403a60 d2800084 a9400663 (f9401400) ---[ end trace 3875369841fb288a ]--- Fixes: f3b98e3c4d2e16 ("media: vsp1: Provide support for extended command pools") Cc: stable@vger.kernel.org # v4.19+ Signed-off-by: Eugeniu Rosca Reviewed-by: Kieran Bingham Tested-by: Kieran Bingham Reviewed-by: Laurent Pinchart Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Greg Kroah-Hartman --- drivers/media/platform/vsp1/vsp1_dl.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/media/platform/vsp1/vsp1_dl.c b/drivers/media/platform/vsp1/vsp1_dl.c index a5634ca85a31..a07caf981e15 100644 --- a/drivers/media/platform/vsp1/vsp1_dl.c +++ b/drivers/media/platform/vsp1/vsp1_dl.c @@ -431,6 +431,8 @@ vsp1_dl_cmd_pool_create(struct vsp1_device *vsp1, enum vsp1_extcmd_type type, if (!pool) return NULL; + pool->vsp1 = vsp1; + spin_lock_init(&pool->lock); INIT_LIST_HEAD(&pool->free); From 26f0092f35e239e072276aa6c33e7d90bf0ac66e Mon Sep 17 00:00:00 2001 From: Jonathan McDowell Date: Wed, 12 Aug 2020 20:37:23 +0100 Subject: [PATCH 25/91] net: ethernet: stmmac: Disable hardware multicast filter commit df43dd526e6609769ae513a81443c7aa727c8ca3 upstream. The IPQ806x does not appear to have a functional multicast ethernet address filter. This was observed as a failure to correctly receive IPv6 packets on a LAN to the all stations address. Checking the vendor driver shows that it does not attempt to enable the multicast filter and instead falls back to receiving all multicast packets, internally setting ALLMULTI. Use the new fallback support in the dwmac1000 driver to correctly achieve the same with the mainline IPQ806x driver. Confirmed to fix IPv6 functionality on an RB3011 router. Cc: stable@vger.kernel.org Signed-off-by: Jonathan McDowell Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/stmicro/stmmac/dwmac-ipq806x.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-ipq806x.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-ipq806x.c index 4d75158c64b2..826626e870d5 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-ipq806x.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-ipq806x.c @@ -350,6 +350,7 @@ static int ipq806x_gmac_probe(struct platform_device *pdev) plat_dat->has_gmac = true; plat_dat->bsp_priv = gmac; plat_dat->fix_mac_speed = ipq806x_gmac_fix_mac_speed; + plat_dat->multicast_filter_bins = 0; err = stmmac_dvr_probe(&pdev->dev, plat_dat, &stmmac_res); if (err) From c334db67ebb3b5af540481f22b2d1b4f837c0992 Mon Sep 17 00:00:00 2001 From: Jonathan McDowell Date: Wed, 12 Aug 2020 20:37:01 +0100 Subject: [PATCH 26/91] net: stmmac: dwmac1000: provide multicast filter fallback commit 592d751c1e174df5ff219946908b005eb48934b3 upstream. If we don't have a hardware multicast filter available then instead of silently failing to listen for the requested ethernet broadcast addresses fall back to receiving all multicast packets, in a similar fashion to other drivers with no multicast filter. Cc: stable@vger.kernel.org Signed-off-by: Jonathan McDowell Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c b/drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c index e4e9a7591efe..4d617ba11ecb 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c @@ -176,6 +176,9 @@ static void dwmac1000_set_filter(struct mac_device_info *hw, value = GMAC_FRAME_FILTER_PR; } else if (dev->flags & IFF_ALLMULTI) { value = GMAC_FRAME_FILTER_PM; /* pass all multi */ + } else if (!netdev_mc_empty(dev) && (mcbitslog2 == 0)) { + /* Fall back to all multicast if we've no filter */ + value = GMAC_FRAME_FILTER_PM; } else if (!netdev_mc_empty(dev)) { struct netdev_hw_addr *ha; From f90339a4eccf1768f044ac98ec6d2a8afeeab58d Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Tue, 9 Jun 2020 16:11:29 -0700 Subject: [PATCH 27/91] net/compat: Add missing sock updates for SCM_RIGHTS commit d9539752d23283db4692384a634034f451261e29 upstream. Add missed sock updates to compat path via a new helper, which will be used more in coming patches. (The net/core/scm.c code is left as-is here to assist with -stable backports for the compat path.) Cc: Christoph Hellwig Cc: Sargun Dhillon Cc: Jakub Kicinski Cc: stable@vger.kernel.org Fixes: 48a87cc26c13 ("net: netprio: fd passed in SCM_RIGHTS datagram not set correctly") Fixes: d84295067fc7 ("net: net_cls: fd passed in SCM_RIGHTS datagram not set correctly") Acked-by: Christian Brauner Signed-off-by: Kees Cook Signed-off-by: Greg Kroah-Hartman --- include/net/sock.h | 4 ++++ net/compat.c | 1 + net/core/sock.c | 21 +++++++++++++++++++++ 3 files changed, 26 insertions(+) diff --git a/include/net/sock.h b/include/net/sock.h index e2df102e669e..77f36257cac9 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -845,6 +845,8 @@ static inline int sk_memalloc_socks(void) { return static_branch_unlikely(&memalloc_socks_key); } + +void __receive_sock(struct file *file); #else static inline int sk_memalloc_socks(void) @@ -852,6 +854,8 @@ static inline int sk_memalloc_socks(void) return 0; } +static inline void __receive_sock(struct file *file) +{ } #endif static inline gfp_t sk_gfp_mask(const struct sock *sk, gfp_t gfp_mask) diff --git a/net/compat.c b/net/compat.c index 3c4b0283b29a..2a8c7cb5f06a 100644 --- a/net/compat.c +++ b/net/compat.c @@ -289,6 +289,7 @@ void scm_detach_fds_compat(struct msghdr *kmsg, struct scm_cookie *scm) break; } /* Bump the usage count and install the file. */ + __receive_sock(fp[i]); fd_install(new_fd, get_file(fp[i])); } diff --git a/net/core/sock.c b/net/core/sock.c index 6c3b031b6ad6..e6cbe137cb6f 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -2636,6 +2636,27 @@ int sock_no_mmap(struct file *file, struct socket *sock, struct vm_area_struct * } EXPORT_SYMBOL(sock_no_mmap); +/* + * When a file is received (via SCM_RIGHTS, etc), we must bump the + * various sock-based usage counts. + */ +void __receive_sock(struct file *file) +{ + struct socket *sock; + int error; + + /* + * The resulting value of "error" is ignored here since we only + * need to take action when the file is a socket and testing + * "sock" for NULL is sufficient. + */ + sock = sock_from_file(file, &error); + if (sock) { + sock_update_netprioidx(&sock->sk->sk_cgrp_data); + sock_update_classid(&sock->sk->sk_cgrp_data); + } +} + ssize_t sock_no_sendpage(struct socket *sock, struct page *page, int offset, size_t size, int flags) { ssize_t res; From 566cba3c7d1fe14fdc9c43931035e9ae4320f973 Mon Sep 17 00:00:00 2001 From: ChangSyun Peng Date: Fri, 31 Jul 2020 17:50:17 +0800 Subject: [PATCH 28/91] md/raid5: Fix Force reconstruct-write io stuck in degraded raid5 commit a1c6ae3d9f3dd6aa5981a332a6f700cf1c25edef upstream. In degraded raid5, we need to read parity to do reconstruct-write when data disks fail. However, we can not read parity from handle_stripe_dirtying() in force reconstruct-write mode. Reproducible Steps: 1. Create degraded raid5 mdadm -C /dev/md2 --assume-clean -l5 -n3 /dev/sda2 /dev/sdb2 missing 2. Set rmw_level to 0 echo 0 > /sys/block/md2/md/rmw_level 3. IO to raid5 Now some io may be stuck in raid5. We can use handle_stripe_fill() to read the parity in this situation. Cc: # v4.4+ Reviewed-by: Alex Wu Reviewed-by: BingJing Chang Reviewed-by: Danny Shih Signed-off-by: ChangSyun Peng Signed-off-by: Song Liu Signed-off-by: Greg Kroah-Hartman --- drivers/md/raid5.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 01021382131b..d91154d65455 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -3596,6 +3596,7 @@ static int need_this_block(struct stripe_head *sh, struct stripe_head_state *s, * is missing/faulty, then we need to read everything we can. */ if (sh->raid_conf->level != 6 && + sh->raid_conf->rmw_level != PARITY_DISABLE_RMW && sh->sector < sh->raid_conf->mddev->recovery_cp) /* reconstruct-write isn't being forced */ return 0; @@ -4832,7 +4833,7 @@ static void handle_stripe(struct stripe_head *sh) * or to load a block that is being partially written. */ if (s.to_read || s.non_overwrite - || (conf->level == 6 && s.to_write && s.failed) + || (s.to_write && s.failed) || (s.syncing && (s.uptodate + s.compute < disks)) || s.replacing || s.expanding) From d6e2394ce6c9554a78a88fab2a779e3168088a47 Mon Sep 17 00:00:00 2001 From: Coly Li Date: Sat, 25 Jul 2020 20:00:16 +0800 Subject: [PATCH 29/91] bcache: allocate meta data pages as compound pages commit 5fe48867856367142d91a82f2cbf7a57a24cbb70 upstream. There are some meta data of bcache are allocated by multiple pages, and they are used as bio bv_page for I/Os to the cache device. for example cache_set->uuids, cache->disk_buckets, journal_write->data, bset_tree->data. For such meta data memory, all the allocated pages should be treated as a single memory block. Then the memory management and underlying I/O code can treat them more clearly. This patch adds __GFP_COMP flag to all the location allocating >0 order pages for the above mentioned meta data. Then their pages are treated as compound pages now. Signed-off-by: Coly Li Cc: stable@vger.kernel.org Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman --- drivers/md/bcache/bset.c | 2 +- drivers/md/bcache/btree.c | 2 +- drivers/md/bcache/journal.c | 4 ++-- drivers/md/bcache/super.c | 2 +- 4 files changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/md/bcache/bset.c b/drivers/md/bcache/bset.c index 268f1b685084..ec48cf86cab6 100644 --- a/drivers/md/bcache/bset.c +++ b/drivers/md/bcache/bset.c @@ -321,7 +321,7 @@ int bch_btree_keys_alloc(struct btree_keys *b, b->page_order = page_order; - t->data = (void *) __get_free_pages(gfp, b->page_order); + t->data = (void *) __get_free_pages(__GFP_COMP|gfp, b->page_order); if (!t->data) goto err; diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c index 38a8f8d2a908..d320574b9a4c 100644 --- a/drivers/md/bcache/btree.c +++ b/drivers/md/bcache/btree.c @@ -830,7 +830,7 @@ int bch_btree_cache_alloc(struct cache_set *c) mutex_init(&c->verify_lock); c->verify_ondisk = (void *) - __get_free_pages(GFP_KERNEL, ilog2(bucket_pages(c))); + __get_free_pages(GFP_KERNEL|__GFP_COMP, ilog2(bucket_pages(c))); c->verify_data = mca_bucket_alloc(c, &ZERO_KEY, GFP_KERNEL); diff --git a/drivers/md/bcache/journal.c b/drivers/md/bcache/journal.c index 7bb15cddca5e..182c2b7bd960 100644 --- a/drivers/md/bcache/journal.c +++ b/drivers/md/bcache/journal.c @@ -864,8 +864,8 @@ int bch_journal_alloc(struct cache_set *c) j->w[1].c = c; if (!(init_fifo(&j->pin, JOURNAL_PIN, GFP_KERNEL)) || - !(j->w[0].data = (void *) __get_free_pages(GFP_KERNEL, JSET_BITS)) || - !(j->w[1].data = (void *) __get_free_pages(GFP_KERNEL, JSET_BITS))) + !(j->w[0].data = (void *) __get_free_pages(GFP_KERNEL|__GFP_COMP, JSET_BITS)) || + !(j->w[1].data = (void *) __get_free_pages(GFP_KERNEL|__GFP_COMP, JSET_BITS))) return -ENOMEM; return 0; diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c index 46ad0bf18e1f..825bfde10c69 100644 --- a/drivers/md/bcache/super.c +++ b/drivers/md/bcache/super.c @@ -1693,7 +1693,7 @@ void bch_cache_set_unregister(struct cache_set *c) } #define alloc_bucket_pages(gfp, c) \ - ((void *) __get_free_pages(__GFP_ZERO|gfp, ilog2(bucket_pages(c)))) + ((void *) __get_free_pages(__GFP_ZERO|__GFP_COMP|gfp, ilog2(bucket_pages(c)))) struct cache_set *bch_cache_set_alloc(struct cache_sb *sb) { From 2a72c283319c2be9b4667630d8d0c98b59371930 Mon Sep 17 00:00:00 2001 From: Coly Li Date: Sat, 25 Jul 2020 20:00:22 +0800 Subject: [PATCH 30/91] bcache: fix overflow in offset_to_stripe() commit 7a1481267999c02abf4a624515c1b5c7c1fccbd6 upstream. offset_to_stripe() returns the stripe number (in type unsigned int) from an offset (in type uint64_t) by the following calculation, do_div(offset, d->stripe_size); For large capacity backing device (e.g. 18TB) with small stripe size (e.g. 4KB), the result is 4831838208 and exceeds UINT_MAX. The actual returned value which caller receives is 536870912, due to the overflow. Indeed in bcache_device_init(), bcache_device->nr_stripes is limited in range [1, INT_MAX]. Therefore all valid stripe numbers in bcache are in range [0, bcache_dev->nr_stripes - 1]. This patch adds a upper limition check in offset_to_stripe(): the max valid stripe number should be less than bcache_device->nr_stripes. If the calculated stripe number from do_div() is equal to or larger than bcache_device->nr_stripe, -EINVAL will be returned. (Normally nr_stripes is less than INT_MAX, exceeding upper limitation doesn't mean overflow, therefore -EOVERFLOW is not used as error code.) This patch also changes nr_stripes' type of struct bcache_device from 'unsigned int' to 'int', and return value type of offset_to_stripe() from 'unsigned int' to 'int', to match their exact data ranges. All locations where bcache_device->nr_stripes and offset_to_stripe() are referenced also get updated for the above type change. Reported-and-tested-by: Ken Raeburn Signed-off-by: Coly Li Cc: stable@vger.kernel.org Link: https://bugzilla.redhat.com/show_bug.cgi?id=1783075 Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman --- drivers/md/bcache/bcache.h | 2 +- drivers/md/bcache/writeback.c | 14 +++++++++----- drivers/md/bcache/writeback.h | 19 +++++++++++++++++-- 3 files changed, 27 insertions(+), 8 deletions(-) diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h index 2a2f189dd37c..1cc6ae3e058c 100644 --- a/drivers/md/bcache/bcache.h +++ b/drivers/md/bcache/bcache.h @@ -264,7 +264,7 @@ struct bcache_device { #define BCACHE_DEV_UNLINK_DONE 2 #define BCACHE_DEV_WB_RUNNING 3 #define BCACHE_DEV_RATE_DW_RUNNING 4 - unsigned int nr_stripes; + int nr_stripes; unsigned int stripe_size; atomic_t *stripe_sectors_dirty; unsigned long *full_dirty_stripes; diff --git a/drivers/md/bcache/writeback.c b/drivers/md/bcache/writeback.c index b5fc3c6c7178..aa58833fb012 100644 --- a/drivers/md/bcache/writeback.c +++ b/drivers/md/bcache/writeback.c @@ -506,15 +506,19 @@ void bcache_dev_sectors_dirty_add(struct cache_set *c, unsigned int inode, uint64_t offset, int nr_sectors) { struct bcache_device *d = c->devices[inode]; - unsigned int stripe_offset, stripe, sectors_dirty; + unsigned int stripe_offset, sectors_dirty; + int stripe; if (!d) return; + stripe = offset_to_stripe(d, offset); + if (stripe < 0) + return; + if (UUID_FLASH_ONLY(&c->uuids[inode])) atomic_long_add(nr_sectors, &c->flash_dev_dirty_sectors); - stripe = offset_to_stripe(d, offset); stripe_offset = offset & (d->stripe_size - 1); while (nr_sectors) { @@ -554,12 +558,12 @@ static bool dirty_pred(struct keybuf *buf, struct bkey *k) static void refill_full_stripes(struct cached_dev *dc) { struct keybuf *buf = &dc->writeback_keys; - unsigned int start_stripe, stripe, next_stripe; + unsigned int start_stripe, next_stripe; + int stripe; bool wrapped = false; stripe = offset_to_stripe(&dc->disk, KEY_OFFSET(&buf->last_scanned)); - - if (stripe >= dc->disk.nr_stripes) + if (stripe < 0) stripe = 0; start_stripe = stripe; diff --git a/drivers/md/bcache/writeback.h b/drivers/md/bcache/writeback.h index e75dc33339f6..b902e574c5c4 100644 --- a/drivers/md/bcache/writeback.h +++ b/drivers/md/bcache/writeback.h @@ -28,10 +28,22 @@ static inline uint64_t bcache_dev_sectors_dirty(struct bcache_device *d) return ret; } -static inline unsigned int offset_to_stripe(struct bcache_device *d, +static inline int offset_to_stripe(struct bcache_device *d, uint64_t offset) { do_div(offset, d->stripe_size); + + /* d->nr_stripes is in range [1, INT_MAX] */ + if (unlikely(offset >= d->nr_stripes)) { + pr_err("Invalid stripe %llu (>= nr_stripes %d).\n", + offset, d->nr_stripes); + return -EINVAL; + } + + /* + * Here offset is definitly smaller than INT_MAX, + * return it as int will never overflow. + */ return offset; } @@ -39,7 +51,10 @@ static inline bool bcache_dev_stripe_dirty(struct cached_dev *dc, uint64_t offset, unsigned int nr_sectors) { - unsigned int stripe = offset_to_stripe(&dc->disk, offset); + int stripe = offset_to_stripe(&dc->disk, offset); + + if (stripe < 0) + return false; while (1) { if (atomic_read(dc->disk.stripe_sectors_dirty + stripe)) From 4cf1d191f77f8b81d0ed1cea344ca73c0d4cb2bc Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 3 Aug 2020 11:02:10 +0200 Subject: [PATCH 31/91] mac80211: fix misplaced while instead of if commit 5981fe5b0529ba25d95f37d7faa434183ad618c5 upstream. This never was intended to be a 'while' loop, it should've just been an 'if' instead of 'while'. Fix this. I noticed this while applying another patch from Ben that intended to fix a busy loop at this spot. Cc: stable@vger.kernel.org Fixes: b16798f5b907 ("mac80211: mark station unauthorized before key removal") Reported-by: Ben Greear Link: https://lore.kernel.org/r/20200803110209.253009ae41ff.I3522aad099392b31d5cf2dcca34cbac7e5832dde@changeid Signed-off-by: Johannes Berg Signed-off-by: Greg Kroah-Hartman --- net/mac80211/sta_info.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c index ec2e83272f9d..2a82d438991b 100644 --- a/net/mac80211/sta_info.c +++ b/net/mac80211/sta_info.c @@ -979,7 +979,7 @@ static void __sta_info_destroy_part2(struct sta_info *sta) might_sleep(); lockdep_assert_held(&local->sta_mtx); - while (sta->sta_state == IEEE80211_STA_AUTHORIZED) { + if (sta->sta_state == IEEE80211_STA_AUTHORIZED) { ret = sta_info_move_state(sta, IEEE80211_STA_ASSOC); WARN_ON_ONCE(ret); } From 706695d477fb16a5920098535380ee39337e7ea8 Mon Sep 17 00:00:00 2001 From: Lukas Wunner Date: Wed, 8 Jul 2020 15:27:01 +0200 Subject: [PATCH 32/91] driver core: Avoid binding drivers to dead devices commit 654888327e9f655a9d55ad477a9583e90e8c9b5c upstream. Commit 3451a495ef24 ("driver core: Establish order of operations for device_add and device_del via bitflag") sought to prevent asynchronous driver binding to a device which is being removed. It added a per-device "dead" flag which is checked in the following code paths: * asynchronous binding in __driver_attach_async_helper() * synchronous binding in device_driver_attach() * asynchronous binding in __device_attach_async_helper() It did *not* check the flag upon: * synchronous binding in __device_attach() However __device_attach() may also be called asynchronously from: deferred_probe_work_func() bus_probe_device() device_initial_probe() __device_attach() So if the commit's intention was to check the "dead" flag in all asynchronous code paths, then a check is also necessary in __device_attach(). Add the missing check. Fixes: 3451a495ef24 ("driver core: Establish order of operations for device_add and device_del via bitflag") Signed-off-by: Lukas Wunner Cc: stable@vger.kernel.org # v5.1+ Cc: Alexander Duyck Link: https://lore.kernel.org/r/de88a23a6fe0ef70f7cfd13c8aea9ab51b4edab6.1594214103.git.lukas@wunner.de Signed-off-by: Greg Kroah-Hartman --- drivers/base/dd.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/base/dd.c b/drivers/base/dd.c index caaeb7910a04..0047bbdd43c0 100644 --- a/drivers/base/dd.c +++ b/drivers/base/dd.c @@ -792,7 +792,9 @@ static int __device_attach(struct device *dev, bool allow_async) int ret = 0; device_lock(dev); - if (dev->driver) { + if (dev->p->dead) { + goto out_unlock; + } else if (dev->driver) { if (device_is_bound(dev)) { ret = 1; goto out_unlock; From baa5bd366835ceb752e5f75ba993042a5b872dbf Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Thu, 16 Jul 2020 18:40:23 +0800 Subject: [PATCH 33/91] MIPS: CPU#0 is not hotpluggable commit 9cce844abf07b683cff5f0273977d5f8d0af94c7 upstream. Now CPU#0 is not hotpluggable on MIPS, so prevent to create /sys/devices /system/cpu/cpu0/online which confuses some user-space tools. Cc: stable@vger.kernel.org Signed-off-by: Huacai Chen Signed-off-by: Thomas Bogendoerfer Signed-off-by: Greg Kroah-Hartman --- arch/mips/kernel/topology.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/mips/kernel/topology.c b/arch/mips/kernel/topology.c index cd3e1f82e1a5..08ad6371fbe0 100644 --- a/arch/mips/kernel/topology.c +++ b/arch/mips/kernel/topology.c @@ -20,7 +20,7 @@ static int __init topology_init(void) for_each_present_cpu(i) { struct cpu *c = &per_cpu(cpu_devices, i); - c->hotpluggable = 1; + c->hotpluggable = !!i; ret = register_cpu(c, i); if (ret) printk(KERN_WARNING "topology_init: register_cpu %d " From 41d71ef2e791506bc3a8c5db155bb75daf623ce6 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Mon, 20 Apr 2020 16:02:21 -0400 Subject: [PATCH 34/91] ext2: fix missing percpu_counter_inc commit bc2fbaa4d3808aef82dd1064a8e61c16549fe956 upstream. sbi->s_freeinodes_counter is only decreased by the ext2 code, it is never increased. This patch fixes it. Note that sbi->s_freeinodes_counter is only used in the algorithm that tries to find the group for new allocations, so this bug is not easily visible (the only visibility is that the group finding algorithm selects inoptinal result). Link: https://lore.kernel.org/r/alpine.LRH.2.02.2004201538300.19436@file01.intranet.prod.int.rdu2.redhat.com Signed-off-by: Mikulas Patocka Cc: stable@vger.kernel.org Signed-off-by: Jan Kara Signed-off-by: Greg Kroah-Hartman --- fs/ext2/ialloc.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/ext2/ialloc.c b/fs/ext2/ialloc.c index 5c3d7b7e4975..d8a03b1afbc3 100644 --- a/fs/ext2/ialloc.c +++ b/fs/ext2/ialloc.c @@ -80,6 +80,7 @@ static void ext2_release_inode(struct super_block *sb, int group, int dir) if (dir) le16_add_cpu(&desc->bg_used_dirs_count, -1); spin_unlock(sb_bgl_lock(EXT2_SB(sb), group)); + percpu_counter_inc(&EXT2_SB(sb)->s_freeinodes_counter); if (dir) percpu_counter_dec(&EXT2_SB(sb)->s_dirs_counter); mark_buffer_dirty(bh); @@ -531,7 +532,7 @@ struct inode *ext2_new_inode(struct inode *dir, umode_t mode, goto fail; } - percpu_counter_add(&sbi->s_freeinodes_counter, -1); + percpu_counter_dec(&sbi->s_freeinodes_counter); if (S_ISDIR(mode)) percpu_counter_inc(&sbi->s_dirs_counter); From 73cbb8af7e8a80715e79b78dba1057b966849a37 Mon Sep 17 00:00:00 2001 From: Junxiao Bi Date: Thu, 6 Aug 2020 23:18:02 -0700 Subject: [PATCH 35/91] ocfs2: change slot number type s16 to u16 commit 38d51b2dd171ad973afc1f5faab825ed05a2d5e9 upstream. Dan Carpenter reported the following static checker warning. fs/ocfs2/super.c:1269 ocfs2_parse_options() warn: '(-1)' 65535 can't fit into 32767 'mopt->slot' fs/ocfs2/suballoc.c:859 ocfs2_init_inode_steal_slot() warn: '(-1)' 65535 can't fit into 32767 'osb->s_inode_steal_slot' fs/ocfs2/suballoc.c:867 ocfs2_init_meta_steal_slot() warn: '(-1)' 65535 can't fit into 32767 'osb->s_meta_steal_slot' That's because OCFS2_INVALID_SLOT is (u16)-1. Slot number in ocfs2 can be never negative, so change s16 to u16. Fixes: 9277f8334ffc ("ocfs2: fix value of OCFS2_INVALID_SLOT") Reported-by: Dan Carpenter Signed-off-by: Junxiao Bi Signed-off-by: Andrew Morton Reviewed-by: Joseph Qi Reviewed-by: Gang He Cc: Mark Fasheh Cc: Joel Becker Cc: Junxiao Bi Cc: Changwei Ge Cc: Jun Piao Cc: Link: http://lkml.kernel.org/r/20200627001259.19757-1-junxiao.bi@oracle.com Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- fs/ocfs2/ocfs2.h | 4 ++-- fs/ocfs2/suballoc.c | 4 ++-- fs/ocfs2/super.c | 4 ++-- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h index 231933618300..b9f62d29355b 100644 --- a/fs/ocfs2/ocfs2.h +++ b/fs/ocfs2/ocfs2.h @@ -338,8 +338,8 @@ struct ocfs2_super spinlock_t osb_lock; u32 s_next_generation; unsigned long osb_flags; - s16 s_inode_steal_slot; - s16 s_meta_steal_slot; + u16 s_inode_steal_slot; + u16 s_meta_steal_slot; atomic_t s_num_inodes_stolen; atomic_t s_num_meta_stolen; diff --git a/fs/ocfs2/suballoc.c b/fs/ocfs2/suballoc.c index 15a89c513da2..0230b4ece0f0 100644 --- a/fs/ocfs2/suballoc.c +++ b/fs/ocfs2/suballoc.c @@ -893,9 +893,9 @@ static void __ocfs2_set_steal_slot(struct ocfs2_super *osb, int slot, int type) { spin_lock(&osb->osb_lock); if (type == INODE_ALLOC_SYSTEM_INODE) - osb->s_inode_steal_slot = slot; + osb->s_inode_steal_slot = (u16)slot; else if (type == EXTENT_ALLOC_SYSTEM_INODE) - osb->s_meta_steal_slot = slot; + osb->s_meta_steal_slot = (u16)slot; spin_unlock(&osb->osb_lock); } diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c index 3415e0b09398..2658d91c1f7b 100644 --- a/fs/ocfs2/super.c +++ b/fs/ocfs2/super.c @@ -92,7 +92,7 @@ struct mount_options unsigned long commit_interval; unsigned long mount_opt; unsigned int atime_quantum; - signed short slot; + unsigned short slot; int localalloc_opt; unsigned int resv_level; int dir_resv_level; @@ -1384,7 +1384,7 @@ static int ocfs2_parse_options(struct super_block *sb, goto bail; } if (option) - mopt->slot = (s16)option; + mopt->slot = (u16)option; break; case Opt_commit: if (match_int(&args[0], &option)) { From e88a72e86bd0c0c19ba00865ed68dce22824aac6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michal=20Koutn=C3=BD?= Date: Thu, 6 Aug 2020 23:22:18 -0700 Subject: [PATCH 36/91] mm/page_counter.c: fix protection usage propagation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit a6f23d14ec7d7d02220ad8bb2774be3322b9aeec upstream. When workload runs in cgroups that aren't directly below root cgroup and their parent specifies reclaim protection, it may end up ineffective. The reason is that propagate_protected_usage() is not called in all hierarchy up. All the protected usage is incorrectly accumulated in the workload's parent. This means that siblings_low_usage is overestimated and effective protection underestimated. Even though it is transitional phenomenon (uncharge path does correct propagation and fixes the wrong children_low_usage), it can undermine the intended protection unexpectedly. We have noticed this problem while seeing a swap out in a descendant of a protected memcg (intermediate node) while the parent was conveniently under its protection limit and the memory pressure was external to that hierarchy. Michal has pinpointed this down to the wrong siblings_low_usage which led to the unwanted reclaim. The fix is simply updating children_low_usage in respective ancestors also in the charging path. Fixes: 230671533d64 ("mm: memory.low hierarchical behavior") Signed-off-by: Michal Koutný Signed-off-by: Michal Hocko Signed-off-by: Andrew Morton Acked-by: Michal Hocko Acked-by: Roman Gushchin Cc: Johannes Weiner Cc: Tejun Heo Cc: [4.18+] Link: http://lkml.kernel.org/r/20200803153231.15477-1-mhocko@kernel.org Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- mm/page_counter.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/mm/page_counter.c b/mm/page_counter.c index de31470655f6..147ff99187b8 100644 --- a/mm/page_counter.c +++ b/mm/page_counter.c @@ -77,7 +77,7 @@ void page_counter_charge(struct page_counter *counter, unsigned long nr_pages) long new; new = atomic_long_add_return(nr_pages, &c->usage); - propagate_protected_usage(counter, new); + propagate_protected_usage(c, new); /* * This is indeed racy, but we can live with some * inaccuracy in the watermark. @@ -121,7 +121,7 @@ bool page_counter_try_charge(struct page_counter *counter, new = atomic_long_add_return(nr_pages, &c->usage); if (new > c->max) { atomic_long_sub(nr_pages, &c->usage); - propagate_protected_usage(counter, new); + propagate_protected_usage(c, new); /* * This is racy, but we can live with some * inaccuracy in the failcnt. @@ -130,7 +130,7 @@ bool page_counter_try_charge(struct page_counter *counter, *fail = c; goto failed; } - propagate_protected_usage(counter, new); + propagate_protected_usage(c, new); /* * Just like with failcnt, we can live with some * inaccuracy in the watermark. From 892fd3637a2c0c29d3dd7402bbb0802eb231b69d Mon Sep 17 00:00:00 2001 From: Chengming Zhou Date: Wed, 29 Jul 2020 02:05:53 +0800 Subject: [PATCH 37/91] ftrace: Setup correct FTRACE_FL_REGS flags for module commit 8a224ffb3f52b0027f6b7279854c71a31c48fc97 upstream. When module loaded and enabled, we will use __ftrace_replace_code for module if any ftrace_ops referenced it found. But we will get wrong ftrace_addr for module rec in ftrace_get_addr_new, because rec->flags has not been setup correctly. It can cause the callback function of a ftrace_ops has FTRACE_OPS_FL_SAVE_REGS to be called with pt_regs set to NULL. So setup correct FTRACE_FL_REGS flags for rec when we call referenced_filters to find ftrace_ops references it. Link: https://lkml.kernel.org/r/20200728180554.65203-1-zhouchengming@bytedance.com Cc: stable@vger.kernel.org Fixes: 8c4f3c3fa9681 ("ftrace: Check module functions being traced on reload") Signed-off-by: Chengming Zhou Signed-off-by: Muchun Song Signed-off-by: Steven Rostedt (VMware) Signed-off-by: Greg Kroah-Hartman --- kernel/trace/ftrace.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 0c379cd40bea..8960fe94e8ee 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -5665,8 +5665,11 @@ static int referenced_filters(struct dyn_ftrace *rec) int cnt = 0; for (ops = ftrace_ops_list; ops != &ftrace_list_end; ops = ops->next) { - if (ops_references_rec(ops, rec)) - cnt++; + if (ops_references_rec(ops, rec)) { + cnt++; + if (ops->flags & FTRACE_OPS_FL_SAVE_REGS) + rec->flags |= FTRACE_FL_REGS; + } } return cnt; @@ -5843,8 +5846,8 @@ void ftrace_module_enable(struct module *mod) if (ftrace_start_up) cnt += referenced_filters(rec); - /* This clears FTRACE_FL_DISABLED */ - rec->flags = cnt; + rec->flags &= ~FTRACE_FL_DISABLED; + rec->flags += cnt; if (ftrace_start_up && cnt) { int failed = __ftrace_replace_code(rec, 1); From 46c9d3925ab0ceb4d19cee4be1a061b87faf1e11 Mon Sep 17 00:00:00 2001 From: Muchun Song Date: Tue, 28 Jul 2020 14:45:36 +0800 Subject: [PATCH 38/91] kprobes: Fix NULL pointer dereference at kprobe_ftrace_handler commit 0cb2f1372baa60af8456388a574af6133edd7d80 upstream. We found a case of kernel panic on our server. The stack trace is as follows(omit some irrelevant information): BUG: kernel NULL pointer dereference, address: 0000000000000080 RIP: 0010:kprobe_ftrace_handler+0x5e/0xe0 RSP: 0018:ffffb512c6550998 EFLAGS: 00010282 RAX: 0000000000000000 RBX: ffff8e9d16eea018 RCX: 0000000000000000 RDX: ffffffffbe1179c0 RSI: ffffffffc0535564 RDI: ffffffffc0534ec0 RBP: ffffffffc0534ec1 R08: ffff8e9d1bbb0f00 R09: 0000000000000004 R10: 0000000000000000 R11: 0000000000000000 R12: 0000000000000000 R13: ffff8e9d1f797060 R14: 000000000000bacc R15: ffff8e9ce13eca00 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000080 CR3: 00000008453d0005 CR4: 00000000003606e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: ftrace_ops_assist_func+0x56/0xe0 ftrace_call+0x5/0x34 tcpa_statistic_send+0x5/0x130 [ttcp_engine] The tcpa_statistic_send is the function being kprobed. After analysis, the root cause is that the fourth parameter regs of kprobe_ftrace_handler is NULL. Why regs is NULL? We use the crash tool to analyze the kdump. crash> dis tcpa_statistic_send -r : callq 0xffffffffbd8018c0 The tcpa_statistic_send calls ftrace_caller instead of ftrace_regs_caller. So it is reasonable that the fourth parameter regs of kprobe_ftrace_handler is NULL. In theory, we should call the ftrace_regs_caller instead of the ftrace_caller. After in-depth analysis, we found a reproducible path. Writing a simple kernel module which starts a periodic timer. The timer's handler is named 'kprobe_test_timer_handler'. The module name is kprobe_test.ko. 1) insmod kprobe_test.ko 2) bpftrace -e 'kretprobe:kprobe_test_timer_handler {}' 3) echo 0 > /proc/sys/kernel/ftrace_enabled 4) rmmod kprobe_test 5) stop step 2) kprobe 6) insmod kprobe_test.ko 7) bpftrace -e 'kretprobe:kprobe_test_timer_handler {}' We mark the kprobe as GONE but not disarm the kprobe in the step 4). The step 5) also do not disarm the kprobe when unregister kprobe. So we do not remove the ip from the filter. In this case, when the module loads again in the step 6), we will replace the code to ftrace_caller via the ftrace_module_enable(). When we register kprobe again, we will not replace ftrace_caller to ftrace_regs_caller because the ftrace is disabled in the step 3). So the step 7) will trigger kernel panic. Fix this problem by disarming the kprobe when the module is going away. Link: https://lkml.kernel.org/r/20200728064536.24405-1-songmuchun@bytedance.com Cc: stable@vger.kernel.org Fixes: ae6aa16fdc16 ("kprobes: introduce ftrace based optimization") Acked-by: Masami Hiramatsu Signed-off-by: Muchun Song Co-developed-by: Chengming Zhou Signed-off-by: Chengming Zhou Signed-off-by: Steven Rostedt (VMware) Signed-off-by: Greg Kroah-Hartman --- kernel/kprobes.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/kernel/kprobes.c b/kernel/kprobes.c index 178327a75e73..eb4bffe6d764 100644 --- a/kernel/kprobes.c +++ b/kernel/kprobes.c @@ -2077,6 +2077,13 @@ static void kill_kprobe(struct kprobe *p) * the original probed function (which will be freed soon) any more. */ arch_remove_kprobe(p); + + /* + * The module is going away. We should disarm the kprobe which + * is using ftrace. + */ + if (kprobe_ftrace(p)) + disarm_kprobe_ftrace(p); } /* Disable one kprobe */ From b3b77736dd517ce1f03364486d88c375e0060679 Mon Sep 17 00:00:00 2001 From: Kevin Hao Date: Thu, 30 Jul 2020 16:23:18 +0800 Subject: [PATCH 39/91] tracing/hwlat: Honor the tracing_cpumask commit 96b4833b6827a62c295b149213c68b559514c929 upstream. In calculation of the cpu mask for the hwlat kernel thread, the wrong cpu mask is used instead of the tracing_cpumask, this causes the tracing/tracing_cpumask useless for hwlat tracer. Fixes it. Link: https://lkml.kernel.org/r/20200730082318.42584-2-haokexin@gmail.com Cc: Ingo Molnar Cc: stable@vger.kernel.org Fixes: 0330f7aa8ee6 ("tracing: Have hwlat trace migrate across tracing_cpumask CPUs") Signed-off-by: Kevin Hao Signed-off-by: Steven Rostedt (VMware) Signed-off-by: Greg Kroah-Hartman --- kernel/trace/trace_hwlat.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/kernel/trace/trace_hwlat.c b/kernel/trace/trace_hwlat.c index 8030e24dbf14..568918fae8d4 100644 --- a/kernel/trace/trace_hwlat.c +++ b/kernel/trace/trace_hwlat.c @@ -270,6 +270,7 @@ static bool disable_migrate; static void move_to_next_cpu(void) { struct cpumask *current_mask = &save_cpumask; + struct trace_array *tr = hwlat_trace; int next_cpu; if (disable_migrate) @@ -283,7 +284,7 @@ static void move_to_next_cpu(void) goto disable; get_online_cpus(); - cpumask_and(current_mask, cpu_online_mask, tracing_buffer_mask); + cpumask_and(current_mask, cpu_online_mask, tr->tracing_cpumask); next_cpu = cpumask_next(smp_processor_id(), current_mask); put_online_cpus(); @@ -360,7 +361,7 @@ static int start_kthread(struct trace_array *tr) /* Just pick the first CPU on first iteration */ current_mask = &save_cpumask; get_online_cpus(); - cpumask_and(current_mask, cpu_online_mask, tracing_buffer_mask); + cpumask_and(current_mask, cpu_online_mask, tr->tracing_cpumask); put_online_cpus(); next_cpu = cpumask_first(current_mask); From 2c98c4a0c35117926dcb0b0ec4d5034c64415149 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Tue, 4 Aug 2020 20:00:02 -0400 Subject: [PATCH 40/91] tracing: Use trace_sched_process_free() instead of exit() for pid tracing commit afcab636657421f7ebfa0783a91f90256bba0091 upstream. On exit, if a process is preempted after the trace_sched_process_exit() tracepoint but before the process is done exiting, then when it gets scheduled in, the function tracers will not filter it properly against the function tracing pid filters. That is because the function tracing pid filters hooks to the sched_process_exit() tracepoint to remove the exiting task's pid from the filter list. Because the filtering happens at the sched_switch tracepoint, when the exiting task schedules back in to finish up the exit, it will no longer be in the function pid filtering tables. This was noticeable in the notrace self tests on a preemptable kernel, as the tests would fail as it exits and preempted after being taken off the notrace filter table and on scheduling back in it would not be in the notrace list, and then the ending of the exit function would trace. The test detected this and would fail. Cc: stable@vger.kernel.org Cc: Namhyung Kim Fixes: 1e10486ffee0a ("ftrace: Add 'function-fork' trace option") Fixes: c37775d57830a ("tracing: Add infrastructure to allow set_event_pid to follow children" Signed-off-by: Steven Rostedt (VMware) Signed-off-by: Greg Kroah-Hartman --- kernel/trace/ftrace.c | 4 ++-- kernel/trace/trace_events.c | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 8960fe94e8ee..70f7743c1672 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -6450,12 +6450,12 @@ void ftrace_pid_follow_fork(struct trace_array *tr, bool enable) if (enable) { register_trace_sched_process_fork(ftrace_pid_follow_sched_process_fork, tr); - register_trace_sched_process_exit(ftrace_pid_follow_sched_process_exit, + register_trace_sched_process_free(ftrace_pid_follow_sched_process_exit, tr); } else { unregister_trace_sched_process_fork(ftrace_pid_follow_sched_process_fork, tr); - unregister_trace_sched_process_exit(ftrace_pid_follow_sched_process_exit, + unregister_trace_sched_process_free(ftrace_pid_follow_sched_process_exit, tr); } } diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index ec340e1cbffc..27726121d332 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -534,12 +534,12 @@ void trace_event_follow_fork(struct trace_array *tr, bool enable) if (enable) { register_trace_prio_sched_process_fork(event_filter_pid_sched_process_fork, tr, INT_MIN); - register_trace_prio_sched_process_exit(event_filter_pid_sched_process_exit, + register_trace_prio_sched_process_free(event_filter_pid_sched_process_exit, tr, INT_MAX); } else { unregister_trace_sched_process_fork(event_filter_pid_sched_process_fork, tr); - unregister_trace_sched_process_exit(event_filter_pid_sched_process_exit, + unregister_trace_sched_process_free(event_filter_pid_sched_process_exit, tr); } } From 203dbe7cda02b85d22f093617f0330cd03870161 Mon Sep 17 00:00:00 2001 From: Ahmad Fatoum Date: Thu, 11 Jun 2020 21:17:43 +0200 Subject: [PATCH 41/91] watchdog: f71808e_wdt: indicate WDIOF_CARDRESET support in watchdog_info.options commit e871e93fb08a619dfc015974a05768ed6880fd82 upstream. The driver supports populating bootstatus with WDIOF_CARDRESET, but so far userspace couldn't portably determine whether absence of this flag meant no watchdog reset or no driver support. Or-in the bit to fix this. Fixes: b97cb21a4634 ("watchdog: f71808e_wdt: Fix WDTMOUT_STS register read") Cc: stable@vger.kernel.org Signed-off-by: Ahmad Fatoum Reviewed-by: Guenter Roeck Link: https://lore.kernel.org/r/20200611191750.28096-3-a.fatoum@pengutronix.de Signed-off-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck Signed-off-by: Greg Kroah-Hartman --- drivers/watchdog/f71808e_wdt.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/watchdog/f71808e_wdt.c b/drivers/watchdog/f71808e_wdt.c index 9a1c761258ce..7683936e0b10 100644 --- a/drivers/watchdog/f71808e_wdt.c +++ b/drivers/watchdog/f71808e_wdt.c @@ -690,7 +690,8 @@ static int __init watchdog_init(int sioaddr) watchdog.sioaddr = sioaddr; watchdog.ident.options = WDIOC_SETTIMEOUT | WDIOF_MAGICCLOSE - | WDIOF_KEEPALIVEPING; + | WDIOF_KEEPALIVEPING + | WDIOF_CARDRESET; snprintf(watchdog.ident.identity, sizeof(watchdog.ident.identity), "%s watchdog", From 49d0707efdad02d15c627ba19629c81a367748b1 Mon Sep 17 00:00:00 2001 From: Ahmad Fatoum Date: Thu, 11 Jun 2020 21:17:44 +0200 Subject: [PATCH 42/91] watchdog: f71808e_wdt: remove use of wrong watchdog_info option commit 802141462d844f2e6a4d63a12260d79b7afc4c34 upstream. The flags that should be or-ed into the watchdog_info.options by drivers all start with WDIOF_, e.g. WDIOF_SETTIMEOUT, which indicates that the driver's watchdog_ops has a usable set_timeout. WDIOC_SETTIMEOUT was used instead, which expands to 0xc0045706, which equals: WDIOF_FANFAULT | WDIOF_EXTERN1 | WDIOF_PRETIMEOUT | WDIOF_ALARMONLY | WDIOF_MAGICCLOSE | 0xc0045000 These were so far indicated to userspace on WDIOC_GETSUPPORT. As the driver has not yet been migrated to the new watchdog kernel API, the constant can just be dropped without substitute. Fixes: 96cb4eb019ce ("watchdog: f71808e_wdt: new watchdog driver for Fintek F71808E and F71882FG") Cc: stable@vger.kernel.org Signed-off-by: Ahmad Fatoum Reviewed-by: Guenter Roeck Link: https://lore.kernel.org/r/20200611191750.28096-4-a.fatoum@pengutronix.de Signed-off-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck Signed-off-by: Greg Kroah-Hartman --- drivers/watchdog/f71808e_wdt.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/watchdog/f71808e_wdt.c b/drivers/watchdog/f71808e_wdt.c index 7683936e0b10..608eeba3270b 100644 --- a/drivers/watchdog/f71808e_wdt.c +++ b/drivers/watchdog/f71808e_wdt.c @@ -688,8 +688,7 @@ static int __init watchdog_init(int sioaddr) * into the module have been registered yet. */ watchdog.sioaddr = sioaddr; - watchdog.ident.options = WDIOC_SETTIMEOUT - | WDIOF_MAGICCLOSE + watchdog.ident.options = WDIOF_MAGICCLOSE | WDIOF_KEEPALIVEPING | WDIOF_CARDRESET; From e1462b5e6a052221d06d064d28a2f16c4e1782f6 Mon Sep 17 00:00:00 2001 From: Ahmad Fatoum Date: Thu, 11 Jun 2020 21:17:45 +0200 Subject: [PATCH 43/91] watchdog: f71808e_wdt: clear watchdog timeout occurred flag commit 4f39d575844148fbf3081571a1f3b4ae04150958 upstream. The flag indicating a watchdog timeout having occurred normally persists till Power-On Reset of the Fintek Super I/O chip. The user can clear it by writing a `1' to the bit. The driver doesn't offer a restart method, so regular system reboot might not reset the Super I/O and if the watchdog isn't enabled, we won't touch the register containing the bit on the next boot. In this case all subsequent regular reboots will be wrongly flagged by the driver as being caused by the watchdog. Fix this by having the flag cleared after read. This is also done by other drivers like those for the i6300esb and mpc8xxx_wdt. Fixes: b97cb21a4634 ("watchdog: f71808e_wdt: Fix WDTMOUT_STS register read") Cc: stable@vger.kernel.org Signed-off-by: Ahmad Fatoum Reviewed-by: Guenter Roeck Link: https://lore.kernel.org/r/20200611191750.28096-5-a.fatoum@pengutronix.de Signed-off-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck Signed-off-by: Greg Kroah-Hartman --- drivers/watchdog/f71808e_wdt.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/watchdog/f71808e_wdt.c b/drivers/watchdog/f71808e_wdt.c index 608eeba3270b..5d0ea419070d 100644 --- a/drivers/watchdog/f71808e_wdt.c +++ b/drivers/watchdog/f71808e_wdt.c @@ -704,6 +704,13 @@ static int __init watchdog_init(int sioaddr) wdt_conf = superio_inb(sioaddr, F71808FG_REG_WDT_CONF); watchdog.caused_reboot = wdt_conf & BIT(F71808FG_FLAG_WDTMOUT_STS); + /* + * We don't want WDTMOUT_STS to stick around till regular reboot. + * Write 1 to the bit to clear it to zero. + */ + superio_outb(sioaddr, F71808FG_REG_WDT_CONF, + wdt_conf | BIT(F71808FG_FLAG_WDTMOUT_STS)); + superio_exit(sioaddr); err = watchdog_set_timeout(timeout); From 73f74fc311d4d7fc94b9e130e3eb108c67314178 Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Wed, 15 Jul 2020 10:08:20 +1000 Subject: [PATCH 44/91] pseries: Fix 64 bit logical memory block panic commit 89c140bbaeee7a55ed0360a88f294ead2b95201b upstream. Booting with a 4GB LMB size causes us to panic: qemu-system-ppc64: OS terminated: OS panic: Memory block size not suitable: 0x0 Fix pseries_memory_block_size() to handle 64 bit LMBs. Cc: stable@vger.kernel.org Signed-off-by: Anton Blanchard Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200715000820.1255764-1-anton@ozlabs.org Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/platforms/pseries/hotplug-memory.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/platforms/pseries/hotplug-memory.c b/arch/powerpc/platforms/pseries/hotplug-memory.c index b168c3742b43..afabe6918619 100644 --- a/arch/powerpc/platforms/pseries/hotplug-memory.c +++ b/arch/powerpc/platforms/pseries/hotplug-memory.c @@ -31,7 +31,7 @@ static bool rtas_hp_event; unsigned long pseries_memory_block_size(void) { struct device_node *np; - unsigned int memblock_size = MIN_MEMORY_BLOCK_SIZE; + u64 memblock_size = MIN_MEMORY_BLOCK_SIZE; struct resource r; np = of_find_node_by_path("/ibm,dynamic-reconfiguration-memory"); From cec9fbfe39594e9fae46b17f6b0d8283ffcf1942 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Thu, 6 Aug 2020 14:15:23 -0700 Subject: [PATCH 45/91] module: Correctly truncate sysfs sections output commit 11990a5bd7e558e9203c1070fc52fb6f0488e75b upstream. The only-root-readable /sys/module/$module/sections/$section files did not truncate their output to the available buffer size. While most paths into the kernfs read handlers end up using PAGE_SIZE buffers, it's possible to get there through other paths (e.g. splice, sendfile). Actually limit the output to the "count" passed into the read function, and report it back correctly. *sigh* Reported-by: kernel test robot Link: https://lore.kernel.org/lkml/20200805002015.GE23458@shao2-debian Fixes: ed66f991bb19 ("module: Refactor section attr into bin attribute") Cc: stable@vger.kernel.org Reviewed-by: Greg Kroah-Hartman Acked-by: Jessica Yu Signed-off-by: Kees Cook Signed-off-by: Greg Kroah-Hartman --- kernel/module.c | 22 +++++++++++++++++++--- 1 file changed, 19 insertions(+), 3 deletions(-) diff --git a/kernel/module.c b/kernel/module.c index ae8e7a1fa74a..d05e1bfdd355 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -1461,18 +1461,34 @@ struct module_sect_attrs { struct module_sect_attr attrs[0]; }; +#define MODULE_SECT_READ_SIZE (3 /* "0x", "\n" */ + (BITS_PER_LONG / 4)) static ssize_t module_sect_read(struct file *file, struct kobject *kobj, struct bin_attribute *battr, char *buf, loff_t pos, size_t count) { struct module_sect_attr *sattr = container_of(battr, struct module_sect_attr, battr); + char bounce[MODULE_SECT_READ_SIZE + 1]; + size_t wrote; if (pos != 0) return -EINVAL; - return sprintf(buf, "0x%px\n", - kallsyms_show_value(file->f_cred) ? (void *)sattr->address : NULL); + /* + * Since we're a binary read handler, we must account for the + * trailing NUL byte that sprintf will write: if "buf" is + * too small to hold the NUL, or the NUL is exactly the last + * byte, the read will look like it got truncated by one byte. + * Since there is no way to ask sprintf nicely to not write + * the NUL, we have to use a bounce buffer. + */ + wrote = scnprintf(bounce, sizeof(bounce), "0x%px\n", + kallsyms_show_value(file->f_cred) + ? (void *)sattr->address : NULL); + count = min(count, wrote); + memcpy(buf, bounce, count); + + return count; } static void free_sect_attrs(struct module_sect_attrs *sect_attrs) @@ -1521,7 +1537,7 @@ static void add_sect_attrs(struct module *mod, const struct load_info *info) goto out; sect_attrs->nsections++; sattr->battr.read = module_sect_read; - sattr->battr.size = 3 /* "0x", "\n" */ + (BITS_PER_LONG / 4); + sattr->battr.size = MODULE_SECT_READ_SIZE; sattr->battr.attr.mode = 0400; *(gattr++) = &(sattr++)->battr; } From 721df98627dc8add25252e59535f548e3a6f7d96 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Fri, 10 Jul 2020 18:10:53 +0300 Subject: [PATCH 46/91] perf intel-pt: Fix FUP packet state commit 401136bb084fd021acd9f8c51b52fe0a25e326b2 upstream. While walking code towards a FUP ip, the packet state is INTEL_PT_STATE_FUP or INTEL_PT_STATE_FUP_NO_TIP. That was mishandled resulting in the state becoming INTEL_PT_STATE_IN_SYNC prematurely. The result was an occasional lost EXSTOP event. Signed-off-by: Adrian Hunter Reviewed-by: Andi Kleen Cc: Jiri Olsa Cc: stable@vger.kernel.org Link: http://lore.kernel.org/lkml/20200710151104.15137-2-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Greg Kroah-Hartman --- .../util/intel-pt-decoder/intel-pt-decoder.c | 21 +++++++------------ 1 file changed, 7 insertions(+), 14 deletions(-) diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c index 4357141c7c92..6522b6513895 100644 --- a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c +++ b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c @@ -1129,6 +1129,7 @@ static int intel_pt_walk_fup(struct intel_pt_decoder *decoder) return 0; if (err == -EAGAIN || intel_pt_fup_with_nlip(decoder, &intel_pt_insn, ip, err)) { + decoder->pkt_state = INTEL_PT_STATE_IN_SYNC; if (intel_pt_fup_event(decoder)) return 0; return -EAGAIN; @@ -1780,17 +1781,13 @@ static int intel_pt_walk_trace(struct intel_pt_decoder *decoder) } if (decoder->set_fup_mwait) no_tip = true; + if (no_tip) + decoder->pkt_state = INTEL_PT_STATE_FUP_NO_TIP; + else + decoder->pkt_state = INTEL_PT_STATE_FUP; err = intel_pt_walk_fup(decoder); - if (err != -EAGAIN) { - if (err) - return err; - if (no_tip) - decoder->pkt_state = - INTEL_PT_STATE_FUP_NO_TIP; - else - decoder->pkt_state = INTEL_PT_STATE_FUP; - return 0; - } + if (err != -EAGAIN) + return err; if (no_tip) { no_tip = false; break; @@ -2375,15 +2372,11 @@ const struct intel_pt_state *intel_pt_decode(struct intel_pt_decoder *decoder) err = intel_pt_walk_tip(decoder); break; case INTEL_PT_STATE_FUP: - decoder->pkt_state = INTEL_PT_STATE_IN_SYNC; err = intel_pt_walk_fup(decoder); if (err == -EAGAIN) err = intel_pt_walk_fup_tip(decoder); - else if (!err) - decoder->pkt_state = INTEL_PT_STATE_FUP; break; case INTEL_PT_STATE_FUP_NO_TIP: - decoder->pkt_state = INTEL_PT_STATE_IN_SYNC; err = intel_pt_walk_fup(decoder); if (err == -EAGAIN) err = intel_pt_walk_trace(decoder); From 7d3146f3b1b75e45599e270c63fc88a6ce9ff644 Mon Sep 17 00:00:00 2001 From: Sibi Sankar Date: Tue, 2 Jun 2020 22:02:56 +0530 Subject: [PATCH 47/91] remoteproc: qcom: q6v5: Update running state before requesting stop commit 5b7be880074c73540948f8fc597e0407b98fabfa upstream. Sometimes the stop triggers a watchdog rather than a stop-ack. Update the running state to false on requesting stop to skip the watchdog instead. Error Logs: $ echo stop > /sys/class/remoteproc/remoteproc0/state ipa 1e40000.ipa: received modem stopping event remoteproc-modem: watchdog received: sys_m_smsm_mpss.c:291:APPS force stop qcom-q6v5-mss 4080000.remoteproc-modem: port failed halt ipa 1e40000.ipa: received modem offline event remoteproc0: stopped remote processor 4080000.remoteproc-modem Reviewed-by: Evan Green Fixes: 3b415c8fb263 ("remoteproc: q6v5: Extract common resource handling") Cc: stable@vger.kernel.org Signed-off-by: Sibi Sankar Link: https://lore.kernel.org/r/20200602163257.26978-1-sibis@codeaurora.org Signed-off-by: Bjorn Andersson Signed-off-by: Greg Kroah-Hartman --- drivers/remoteproc/qcom_q6v5.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/remoteproc/qcom_q6v5.c b/drivers/remoteproc/qcom_q6v5.c index 0d33e3079f0d..ef61cb709acd 100644 --- a/drivers/remoteproc/qcom_q6v5.c +++ b/drivers/remoteproc/qcom_q6v5.c @@ -151,6 +151,8 @@ int qcom_q6v5_request_stop(struct qcom_q6v5 *q6v5) { int ret; + q6v5->running = false; + qcom_smem_state_update_bits(q6v5->state, BIT(q6v5->stop_bit), BIT(q6v5->stop_bit)); From 87a56a59ad2427afc358fe1d50fedcbb8a21c31f Mon Sep 17 00:00:00 2001 From: Liu Ying Date: Thu, 9 Jul 2020 10:28:52 +0800 Subject: [PATCH 48/91] drm/imx: imx-ldb: Disable both channels for split mode in enc->disable() commit 3b2a999582c467d1883716b37ffcc00178a13713 upstream. Both of the two LVDS channels should be disabled for split mode in the encoder's ->disable() callback, because they are enabled in the encoder's ->enable() callback. Fixes: 6556f7f82b9c ("drm: imx: Move imx-drm driver out of staging") Cc: Philipp Zabel Cc: Sascha Hauer Cc: Pengutronix Kernel Team Cc: NXP Linux Team Cc: Signed-off-by: Liu Ying Signed-off-by: Philipp Zabel Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/imx/imx-ldb.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/imx/imx-ldb.c b/drivers/gpu/drm/imx/imx-ldb.c index 42daa5c9ff8e..221a8cbc57f9 100644 --- a/drivers/gpu/drm/imx/imx-ldb.c +++ b/drivers/gpu/drm/imx/imx-ldb.c @@ -311,18 +311,19 @@ static void imx_ldb_encoder_disable(struct drm_encoder *encoder) { struct imx_ldb_channel *imx_ldb_ch = enc_to_imx_ldb_ch(encoder); struct imx_ldb *ldb = imx_ldb_ch->ldb; + int dual = ldb->ldb_ctrl & LDB_SPLIT_MODE_EN; int mux, ret; drm_panel_disable(imx_ldb_ch->panel); - if (imx_ldb_ch == &ldb->channel[0]) + if (imx_ldb_ch == &ldb->channel[0] || dual) ldb->ldb_ctrl &= ~LDB_CH0_MODE_EN_MASK; - else if (imx_ldb_ch == &ldb->channel[1]) + if (imx_ldb_ch == &ldb->channel[1] || dual) ldb->ldb_ctrl &= ~LDB_CH1_MODE_EN_MASK; regmap_write(ldb->regmap, IOMUXC_GPR2, ldb->ldb_ctrl); - if (ldb->ldb_ctrl & LDB_SPLIT_MODE_EN) { + if (dual) { clk_disable_unprepare(ldb->clk[0]); clk_disable_unprepare(ldb->clk[1]); } From d12296621cf98b0e9820a28ec077e599d37a7193 Mon Sep 17 00:00:00 2001 From: Charles Keepax Date: Mon, 15 Jun 2020 14:53:21 +0100 Subject: [PATCH 49/91] mfd: arizona: Ensure 32k clock is put on driver unbind and error [ Upstream commit ddff6c45b21d0437ce0c85f8ac35d7b5480513d7 ] Whilst it doesn't matter if the internal 32k clock register settings are cleaned up on exit, as the part will be turned off losing any settings, hence the driver hasn't historially bothered. The external clock should however be cleaned up, as it could cause clocks to be left on, and will at best generate a warning on unbind. Add clean up on both the probe error path and unbind for the 32k clock. Fixes: cdd8da8cc66b ("mfd: arizona: Add gating of external MCLKn clocks") Signed-off-by: Charles Keepax Signed-off-by: Lee Jones Signed-off-by: Sasha Levin --- drivers/mfd/arizona-core.c | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/drivers/mfd/arizona-core.c b/drivers/mfd/arizona-core.c index a4403a57ddc8..09acaa2cf74a 100644 --- a/drivers/mfd/arizona-core.c +++ b/drivers/mfd/arizona-core.c @@ -1433,6 +1433,15 @@ int arizona_dev_init(struct arizona *arizona) arizona_irq_exit(arizona); err_pm: pm_runtime_disable(arizona->dev); + + switch (arizona->pdata.clk32k_src) { + case ARIZONA_32KZ_MCLK1: + case ARIZONA_32KZ_MCLK2: + arizona_clk32k_disable(arizona); + break; + default: + break; + } err_reset: arizona_enable_reset(arizona); regulator_disable(arizona->dcvdd); @@ -1455,6 +1464,15 @@ int arizona_dev_exit(struct arizona *arizona) regulator_disable(arizona->dcvdd); regulator_put(arizona->dcvdd); + switch (arizona->pdata.clk32k_src) { + case ARIZONA_32KZ_MCLK1: + case ARIZONA_32KZ_MCLK2: + arizona_clk32k_disable(arizona); + break; + default: + break; + } + mfd_remove_devices(arizona->dev); arizona_free_irq(arizona, ARIZONA_IRQ_UNDERCLOCKED, arizona); arizona_free_irq(arizona, ARIZONA_IRQ_OVERCLOCKED, arizona); From 0bcab21cf6eecdcc63fc9210447e0925a3100eb0 Mon Sep 17 00:00:00 2001 From: Kamal Heib Date: Tue, 23 Jun 2020 13:52:36 +0300 Subject: [PATCH 50/91] RDMA/ipoib: Return void from ipoib_ib_dev_stop() [ Upstream commit 95a5631f6c9f3045f26245e6045244652204dfdb ] The return value from ipoib_ib_dev_stop() is always 0 - change it to be void. Link: https://lore.kernel.org/r/20200623105236.18683-1-kamalheib1@gmail.com Signed-off-by: Kamal Heib Signed-off-by: Jason Gunthorpe Signed-off-by: Sasha Levin --- drivers/infiniband/ulp/ipoib/ipoib.h | 2 +- drivers/infiniband/ulp/ipoib/ipoib_ib.c | 4 +--- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h index 85267bbf4836..ef1222101705 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib.h +++ b/drivers/infiniband/ulp/ipoib/ipoib.h @@ -513,7 +513,7 @@ void ipoib_ib_dev_cleanup(struct net_device *dev); int ipoib_ib_dev_open_default(struct net_device *dev); int ipoib_ib_dev_open(struct net_device *dev); -int ipoib_ib_dev_stop(struct net_device *dev); +void ipoib_ib_dev_stop(struct net_device *dev); void ipoib_ib_dev_up(struct net_device *dev); void ipoib_ib_dev_down(struct net_device *dev); int ipoib_ib_dev_stop_default(struct net_device *dev); diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ib.c b/drivers/infiniband/ulp/ipoib/ipoib_ib.c index 0f2e80f54d33..925258ffbde3 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_ib.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_ib.c @@ -846,7 +846,7 @@ int ipoib_ib_dev_stop_default(struct net_device *dev) return 0; } -int ipoib_ib_dev_stop(struct net_device *dev) +void ipoib_ib_dev_stop(struct net_device *dev) { struct ipoib_dev_priv *priv = ipoib_priv(dev); @@ -854,8 +854,6 @@ int ipoib_ib_dev_stop(struct net_device *dev) clear_bit(IPOIB_FLAG_INITIALIZED, &priv->flags); ipoib_flush_ah(dev); - - return 0; } int ipoib_ib_dev_open_default(struct net_device *dev) From 2cb3b14eb6b2af74ec146289f86963194c834eb8 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Thu, 25 Jun 2020 20:42:19 +0300 Subject: [PATCH 51/91] RDMA/ipoib: Fix ABBA deadlock with ipoib_reap_ah() [ Upstream commit 65936bf25f90fe440bb2d11624c7d10fab266639 ] ipoib_mcast_carrier_on_task() insanely open codes a rtnl_lock() such that the only time flush_workqueue() can be called is if it also clears IPOIB_FLAG_OPER_UP. Thus the flush inside ipoib_flush_ah() will deadlock if it gets unlucky enough, and lockdep doesn't help us to find it early: CPU0 CPU1 CPU2 __ipoib_ib_dev_flush() down_read(vlan_rwsem) ipoib_vlan_add() rtnl_trylock() down_write(vlan_rwsem) ipoib_mcast_carrier_on_task() while (!rtnl_trylock()) msleep(20); ipoib_flush_ah() flush_workqueue(priv->wq) Clean up the ah_reaper related functions and lifecycle to make sense: - Start/Stop of the reaper should only be done in open/stop NDOs, not in any other places - cancel and flush of the reaper should only happen in the stop NDO. cancel is only functional when combined with IPOIB_STOP_REAPER. - Non-stop places were flushing the AH's just need to flush out dead AH's synchronously and ignore the background task completely. It is fully locked and harmless to leave running. Which ultimately fixes the ABBA deadlock by removing the unnecessary flush_workqueue() from the problematic place under the vlan_rwsem. Fixes: efc82eeeae4e ("IB/ipoib: No longer use flush as a parameter") Link: https://lore.kernel.org/r/20200625174219.290842-1-kamalheib1@gmail.com Reported-by: Kamal Heib Tested-by: Kamal Heib Signed-off-by: Jason Gunthorpe Signed-off-by: Sasha Levin --- drivers/infiniband/ulp/ipoib/ipoib_ib.c | 65 ++++++++++------------- drivers/infiniband/ulp/ipoib/ipoib_main.c | 2 + 2 files changed, 31 insertions(+), 36 deletions(-) diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ib.c b/drivers/infiniband/ulp/ipoib/ipoib_ib.c index 925258ffbde3..82b9c5b6e3e6 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_ib.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_ib.c @@ -669,14 +669,13 @@ int ipoib_send(struct net_device *dev, struct sk_buff *skb, return rc; } -static void __ipoib_reap_ah(struct net_device *dev) +static void ipoib_reap_dead_ahs(struct ipoib_dev_priv *priv) { - struct ipoib_dev_priv *priv = ipoib_priv(dev); struct ipoib_ah *ah, *tah; LIST_HEAD(remove_list); unsigned long flags; - netif_tx_lock_bh(dev); + netif_tx_lock_bh(priv->dev); spin_lock_irqsave(&priv->lock, flags); list_for_each_entry_safe(ah, tah, &priv->dead_ahs, list) @@ -687,37 +686,37 @@ static void __ipoib_reap_ah(struct net_device *dev) } spin_unlock_irqrestore(&priv->lock, flags); - netif_tx_unlock_bh(dev); + netif_tx_unlock_bh(priv->dev); } void ipoib_reap_ah(struct work_struct *work) { struct ipoib_dev_priv *priv = container_of(work, struct ipoib_dev_priv, ah_reap_task.work); - struct net_device *dev = priv->dev; - __ipoib_reap_ah(dev); + ipoib_reap_dead_ahs(priv); if (!test_bit(IPOIB_STOP_REAPER, &priv->flags)) queue_delayed_work(priv->wq, &priv->ah_reap_task, round_jiffies_relative(HZ)); } -static void ipoib_flush_ah(struct net_device *dev) +static void ipoib_start_ah_reaper(struct ipoib_dev_priv *priv) { - struct ipoib_dev_priv *priv = ipoib_priv(dev); - - cancel_delayed_work(&priv->ah_reap_task); - flush_workqueue(priv->wq); - ipoib_reap_ah(&priv->ah_reap_task.work); + clear_bit(IPOIB_STOP_REAPER, &priv->flags); + queue_delayed_work(priv->wq, &priv->ah_reap_task, + round_jiffies_relative(HZ)); } -static void ipoib_stop_ah(struct net_device *dev) +static void ipoib_stop_ah_reaper(struct ipoib_dev_priv *priv) { - struct ipoib_dev_priv *priv = ipoib_priv(dev); - set_bit(IPOIB_STOP_REAPER, &priv->flags); - ipoib_flush_ah(dev); + cancel_delayed_work(&priv->ah_reap_task); + /* + * After ipoib_stop_ah_reaper() we always go through + * ipoib_reap_dead_ahs() which ensures the work is really stopped and + * does a final flush out of the dead_ah's list + */ } static int recvs_pending(struct net_device *dev) @@ -846,16 +845,6 @@ int ipoib_ib_dev_stop_default(struct net_device *dev) return 0; } -void ipoib_ib_dev_stop(struct net_device *dev) -{ - struct ipoib_dev_priv *priv = ipoib_priv(dev); - - priv->rn_ops->ndo_stop(dev); - - clear_bit(IPOIB_FLAG_INITIALIZED, &priv->flags); - ipoib_flush_ah(dev); -} - int ipoib_ib_dev_open_default(struct net_device *dev) { struct ipoib_dev_priv *priv = ipoib_priv(dev); @@ -899,10 +888,7 @@ int ipoib_ib_dev_open(struct net_device *dev) return -1; } - clear_bit(IPOIB_STOP_REAPER, &priv->flags); - queue_delayed_work(priv->wq, &priv->ah_reap_task, - round_jiffies_relative(HZ)); - + ipoib_start_ah_reaper(priv); if (priv->rn_ops->ndo_open(dev)) { pr_warn("%s: Failed to open dev\n", dev->name); goto dev_stop; @@ -913,13 +899,20 @@ int ipoib_ib_dev_open(struct net_device *dev) return 0; dev_stop: - set_bit(IPOIB_STOP_REAPER, &priv->flags); - cancel_delayed_work(&priv->ah_reap_task); - set_bit(IPOIB_FLAG_INITIALIZED, &priv->flags); - ipoib_ib_dev_stop(dev); + ipoib_stop_ah_reaper(priv); return -1; } +void ipoib_ib_dev_stop(struct net_device *dev) +{ + struct ipoib_dev_priv *priv = ipoib_priv(dev); + + priv->rn_ops->ndo_stop(dev); + + clear_bit(IPOIB_FLAG_INITIALIZED, &priv->flags); + ipoib_stop_ah_reaper(priv); +} + void ipoib_pkey_dev_check_presence(struct net_device *dev) { struct ipoib_dev_priv *priv = ipoib_priv(dev); @@ -1230,7 +1223,7 @@ static void __ipoib_ib_dev_flush(struct ipoib_dev_priv *priv, ipoib_mcast_dev_flush(dev); if (oper_up) set_bit(IPOIB_FLAG_OPER_UP, &priv->flags); - ipoib_flush_ah(dev); + ipoib_reap_dead_ahs(priv); } if (level >= IPOIB_FLUSH_NORMAL) @@ -1305,7 +1298,7 @@ void ipoib_ib_dev_cleanup(struct net_device *dev) * the neighbor garbage collection is stopped and reaped. * That should all be done now, so make a final ah flush. */ - ipoib_stop_ah(dev); + ipoib_reap_dead_ahs(priv); clear_bit(IPOIB_PKEY_ASSIGNED, &priv->flags); diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c index 6093e8268583..d0c35eb687ae 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c @@ -1979,6 +1979,8 @@ static void ipoib_ndo_uninit(struct net_device *dev) /* no more works over the priv->wq */ if (priv->wq) { + /* See ipoib_mcast_carrier_on_task() */ + WARN_ON(test_bit(IPOIB_FLAG_OPER_UP, &priv->flags)); flush_workqueue(priv->wq); destroy_workqueue(priv->wq); priv->wq = NULL; From a676d83b8f87ec1e4f4bd5540ff8c17a27deede8 Mon Sep 17 00:00:00 2001 From: Paul Kocialkowski Date: Thu, 30 Apr 2020 18:42:44 +0200 Subject: [PATCH 52/91] media: rockchip: rga: Introduce color fmt macros and refactor CSC mode logic [ Upstream commit ded874ece29d3fe2abd3775810a06056067eb68c ] This introduces two macros: RGA_COLOR_FMT_IS_YUV and RGA_COLOR_FMT_IS_RGB which allow quick checking of the colorspace familily of a RGA color format. These macros are then used to refactor the logic for CSC mode selection. The two nested tests for input colorspace are simplified into a single one, with a logical and, making the whole more readable. Signed-off-by: Paul Kocialkowski Reviewed-by: Ezequiel Garcia Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Sasha Levin --- drivers/media/platform/rockchip/rga/rga-hw.c | 23 +++++++++----------- drivers/media/platform/rockchip/rga/rga-hw.h | 5 +++++ 2 files changed, 15 insertions(+), 13 deletions(-) diff --git a/drivers/media/platform/rockchip/rga/rga-hw.c b/drivers/media/platform/rockchip/rga/rga-hw.c index 96d1b1b3fe8e..6d12491b79d7 100644 --- a/drivers/media/platform/rockchip/rga/rga-hw.c +++ b/drivers/media/platform/rockchip/rga/rga-hw.c @@ -208,22 +208,19 @@ static void rga_cmd_set_trans_info(struct rga_ctx *ctx) dst_info.data.format = ctx->out.fmt->hw_format; dst_info.data.swap = ctx->out.fmt->color_swap; - if (ctx->in.fmt->hw_format >= RGA_COLOR_FMT_YUV422SP) { - if (ctx->out.fmt->hw_format < RGA_COLOR_FMT_YUV422SP) { - switch (ctx->in.colorspace) { - case V4L2_COLORSPACE_REC709: - src_info.data.csc_mode = - RGA_SRC_CSC_MODE_BT709_R0; - break; - default: - src_info.data.csc_mode = - RGA_SRC_CSC_MODE_BT601_R0; - break; - } + if (RGA_COLOR_FMT_IS_YUV(ctx->in.fmt->hw_format) && + RGA_COLOR_FMT_IS_RGB(ctx->out.fmt->hw_format)) { + switch (ctx->in.colorspace) { + case V4L2_COLORSPACE_REC709: + src_info.data.csc_mode = RGA_SRC_CSC_MODE_BT709_R0; + break; + default: + src_info.data.csc_mode = RGA_SRC_CSC_MODE_BT601_R0; + break; } } - if (ctx->out.fmt->hw_format >= RGA_COLOR_FMT_YUV422SP) { + if (RGA_COLOR_FMT_IS_YUV(ctx->out.fmt->hw_format)) { switch (ctx->out.colorspace) { case V4L2_COLORSPACE_REC709: dst_info.data.csc_mode = RGA_SRC_CSC_MODE_BT709_R0; diff --git a/drivers/media/platform/rockchip/rga/rga-hw.h b/drivers/media/platform/rockchip/rga/rga-hw.h index ca3c204abe42..3e4b70eb9ced 100644 --- a/drivers/media/platform/rockchip/rga/rga-hw.h +++ b/drivers/media/platform/rockchip/rga/rga-hw.h @@ -103,6 +103,11 @@ #define RGA_COLOR_FMT_CP_8BPP 15 #define RGA_COLOR_FMT_MASK 15 +#define RGA_COLOR_FMT_IS_YUV(fmt) \ + (((fmt) >= RGA_COLOR_FMT_YUV422SP) && ((fmt) < RGA_COLOR_FMT_CP_1BPP)) +#define RGA_COLOR_FMT_IS_RGB(fmt) \ + ((fmt) < RGA_COLOR_FMT_YUV422SP) + #define RGA_COLOR_NONE_SWAP 0 #define RGA_COLOR_RB_SWAP 1 #define RGA_COLOR_ALPHA_SWAP 2 From d7ee731744d7c6396116aa93b776ed2ce9afa912 Mon Sep 17 00:00:00 2001 From: Paul Kocialkowski Date: Thu, 30 Apr 2020 18:42:45 +0200 Subject: [PATCH 53/91] media: rockchip: rga: Only set output CSC mode for RGB input [ Upstream commit 0f879bab72f47e8ba2421a984e7acfa763d3e84e ] Setting the output CSC mode is required for a YUV output, but must not be set when the input is also YUV. Doing this (as tested with a YUV420P to YUV420P conversion) results in wrong colors. Adapt the logic to only set the output CSC mode when the output is YUV and the input is RGB. Also add a comment to clarify the rationale. Fixes: f7e7b48e6d79 ("[media] rockchip/rga: v4l2 m2m support") Signed-off-by: Paul Kocialkowski Reviewed-by: Ezequiel Garcia Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Sasha Levin --- drivers/media/platform/rockchip/rga/rga-hw.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/media/platform/rockchip/rga/rga-hw.c b/drivers/media/platform/rockchip/rga/rga-hw.c index 6d12491b79d7..681de42f12e9 100644 --- a/drivers/media/platform/rockchip/rga/rga-hw.c +++ b/drivers/media/platform/rockchip/rga/rga-hw.c @@ -208,6 +208,11 @@ static void rga_cmd_set_trans_info(struct rga_ctx *ctx) dst_info.data.format = ctx->out.fmt->hw_format; dst_info.data.swap = ctx->out.fmt->color_swap; + /* + * CSC mode must only be set when the colorspace families differ between + * input and output. It must remain unset (zeroed) if both are the same. + */ + if (RGA_COLOR_FMT_IS_YUV(ctx->in.fmt->hw_format) && RGA_COLOR_FMT_IS_RGB(ctx->out.fmt->hw_format)) { switch (ctx->in.colorspace) { @@ -220,7 +225,8 @@ static void rga_cmd_set_trans_info(struct rga_ctx *ctx) } } - if (RGA_COLOR_FMT_IS_YUV(ctx->out.fmt->hw_format)) { + if (RGA_COLOR_FMT_IS_RGB(ctx->in.fmt->hw_format) && + RGA_COLOR_FMT_IS_YUV(ctx->out.fmt->hw_format)) { switch (ctx->out.colorspace) { case V4L2_COLORSPACE_REC709: dst_info.data.csc_mode = RGA_SRC_CSC_MODE_BT709_R0; From fe34945c7898deb4cae866ae0330b781abdff5ac Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Wed, 8 Jul 2020 14:49:51 +0200 Subject: [PATCH 54/91] USB: serial: ftdi_sio: make process-packet buffer unsigned [ Upstream commit ab4cc4ef6724ea588e835fc1e764c4b4407a70b7 ] Use an unsigned type for the process-packet buffer argument and give it a more apt name. Reviewed-by: Greg Kroah-Hartman Signed-off-by: Johan Hovold Signed-off-by: Sasha Levin --- drivers/usb/serial/ftdi_sio.c | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/drivers/usb/serial/ftdi_sio.c b/drivers/usb/serial/ftdi_sio.c index 3c0f38cd3a5a..d0ae6318d6e9 100644 --- a/drivers/usb/serial/ftdi_sio.c +++ b/drivers/usb/serial/ftdi_sio.c @@ -2037,12 +2037,12 @@ static int ftdi_prepare_write_buffer(struct usb_serial_port *port, #define FTDI_RS_ERR_MASK (FTDI_RS_BI | FTDI_RS_PE | FTDI_RS_FE | FTDI_RS_OE) static int ftdi_process_packet(struct usb_serial_port *port, - struct ftdi_private *priv, char *packet, int len) + struct ftdi_private *priv, unsigned char *buf, int len) { + unsigned char status; + unsigned char *ch; int i; - char status; char flag; - char *ch; if (len < 2) { dev_dbg(&port->dev, "malformed packet\n"); @@ -2052,7 +2052,7 @@ static int ftdi_process_packet(struct usb_serial_port *port, /* Compare new line status to the old one, signal if different/ N.B. packet may be processed more than once, but differences are only processed once. */ - status = packet[0] & FTDI_STATUS_B0_MASK; + status = buf[0] & FTDI_STATUS_B0_MASK; if (status != priv->prev_status) { char diff_status = status ^ priv->prev_status; @@ -2078,7 +2078,7 @@ static int ftdi_process_packet(struct usb_serial_port *port, } /* save if the transmitter is empty or not */ - if (packet[1] & FTDI_RS_TEMT) + if (buf[1] & FTDI_RS_TEMT) priv->transmit_empty = 1; else priv->transmit_empty = 0; @@ -2092,29 +2092,29 @@ static int ftdi_process_packet(struct usb_serial_port *port, * data payload to avoid over-reporting. */ flag = TTY_NORMAL; - if (packet[1] & FTDI_RS_ERR_MASK) { + if (buf[1] & FTDI_RS_ERR_MASK) { /* Break takes precedence over parity, which takes precedence * over framing errors */ - if (packet[1] & FTDI_RS_BI) { + if (buf[1] & FTDI_RS_BI) { flag = TTY_BREAK; port->icount.brk++; usb_serial_handle_break(port); - } else if (packet[1] & FTDI_RS_PE) { + } else if (buf[1] & FTDI_RS_PE) { flag = TTY_PARITY; port->icount.parity++; - } else if (packet[1] & FTDI_RS_FE) { + } else if (buf[1] & FTDI_RS_FE) { flag = TTY_FRAME; port->icount.frame++; } /* Overrun is special, not associated with a char */ - if (packet[1] & FTDI_RS_OE) { + if (buf[1] & FTDI_RS_OE) { port->icount.overrun++; tty_insert_flip_char(&port->port, 0, TTY_OVERRUN); } } port->icount.rx += len; - ch = packet + 2; + ch = buf + 2; if (port->port.console && port->sysrq) { for (i = 0; i < len; i++, ch++) { From e9bca40189aec751786c44dfc89fe14a960a4a88 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Wed, 8 Jul 2020 14:49:52 +0200 Subject: [PATCH 55/91] USB: serial: ftdi_sio: clean up receive processing [ Upstream commit ce054039ba5e47b75a3be02a00274e52b06a6456 ] Clean up receive processing by dropping the character pointer and keeping the length argument unchanged throughout the function. Also make it more apparent that sysrq processing can consume a characters by adding an explicit continue. Reviewed-by: Greg Kroah-Hartman Signed-off-by: Johan Hovold Signed-off-by: Sasha Levin --- drivers/usb/serial/ftdi_sio.c | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/drivers/usb/serial/ftdi_sio.c b/drivers/usb/serial/ftdi_sio.c index d0ae6318d6e9..ce9cc1f90b05 100644 --- a/drivers/usb/serial/ftdi_sio.c +++ b/drivers/usb/serial/ftdi_sio.c @@ -2040,7 +2040,6 @@ static int ftdi_process_packet(struct usb_serial_port *port, struct ftdi_private *priv, unsigned char *buf, int len) { unsigned char status; - unsigned char *ch; int i; char flag; @@ -2083,8 +2082,7 @@ static int ftdi_process_packet(struct usb_serial_port *port, else priv->transmit_empty = 0; - len -= 2; - if (!len) + if (len == 2) return 0; /* status only */ /* @@ -2113,19 +2111,20 @@ static int ftdi_process_packet(struct usb_serial_port *port, } } - port->icount.rx += len; - ch = buf + 2; + port->icount.rx += len - 2; if (port->port.console && port->sysrq) { - for (i = 0; i < len; i++, ch++) { - if (!usb_serial_handle_sysrq_char(port, *ch)) - tty_insert_flip_char(&port->port, *ch, flag); + for (i = 2; i < len; i++) { + if (usb_serial_handle_sysrq_char(port, buf[i])) + continue; + tty_insert_flip_char(&port->port, buf[i], flag); } } else { - tty_insert_flip_string_fixed_flag(&port->port, ch, flag, len); + tty_insert_flip_string_fixed_flag(&port->port, buf + 2, flag, + len - 2); } - return len; + return len - 2; } static void ftdi_process_read_urb(struct urb *urb) From 388a802632cf66c2d0a447110b87bae2ed90b1e2 Mon Sep 17 00:00:00 2001 From: Yoshihiro Shimoda Date: Thu, 21 May 2020 16:01:05 +0900 Subject: [PATCH 56/91] mmc: renesas_sdhi_internal_dmac: clean up the code for dma complete [ Upstream commit 2b26e34e9af3fa24fa1266e9ea2d66a1f7d62dc0 ] To add end() operation in the future, clean the code of renesas_sdhi_internal_dmac_complete_tasklet_fn(). No behavior change. Signed-off-by: Yoshihiro Shimoda Link: https://lore.kernel.org/r/1590044466-28372-3-git-send-email-yoshihiro.shimoda.uh@renesas.com Tested-by: Wolfram Sang Signed-off-by: Ulf Hansson Signed-off-by: Sasha Levin --- drivers/mmc/host/renesas_sdhi_internal_dmac.c | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/drivers/mmc/host/renesas_sdhi_internal_dmac.c b/drivers/mmc/host/renesas_sdhi_internal_dmac.c index 382172fb3da8..74eea8247490 100644 --- a/drivers/mmc/host/renesas_sdhi_internal_dmac.c +++ b/drivers/mmc/host/renesas_sdhi_internal_dmac.c @@ -222,15 +222,12 @@ static void renesas_sdhi_internal_dmac_issue_tasklet_fn(unsigned long arg) DTRAN_CTRL_DM_START); } -static void renesas_sdhi_internal_dmac_complete_tasklet_fn(unsigned long arg) +static bool renesas_sdhi_internal_dmac_complete(struct tmio_mmc_host *host) { - struct tmio_mmc_host *host = (struct tmio_mmc_host *)arg; enum dma_data_direction dir; - spin_lock_irq(&host->lock); - if (!host->data) - goto out; + return false; if (host->data->flags & MMC_DATA_READ) dir = DMA_FROM_DEVICE; @@ -243,6 +240,17 @@ static void renesas_sdhi_internal_dmac_complete_tasklet_fn(unsigned long arg) if (dir == DMA_FROM_DEVICE) clear_bit(SDHI_INTERNAL_DMAC_RX_IN_USE, &global_flags); + return true; +} + +static void renesas_sdhi_internal_dmac_complete_tasklet_fn(unsigned long arg) +{ + struct tmio_mmc_host *host = (struct tmio_mmc_host *)arg; + + spin_lock_irq(&host->lock); + if (!renesas_sdhi_internal_dmac_complete(host)) + goto out; + tmio_mmc_do_data_irq(host); out: spin_unlock_irq(&host->lock); From c00c5131441cbc1775bb1c3164217aa52358396d Mon Sep 17 00:00:00 2001 From: Steve Longerbeam Date: Wed, 17 Jun 2020 15:40:37 -0700 Subject: [PATCH 57/91] gpu: ipu-v3: image-convert: Combine rotate/no-rotate irq handlers [ Upstream commit 0f6245f42ce9b7e4d20f2cda8d5f12b55a44d7d1 ] Combine the rotate_irq() and norotate_irq() handlers into a single eof_irq() handler. Signed-off-by: Steve Longerbeam Signed-off-by: Philipp Zabel Signed-off-by: Sasha Levin --- drivers/gpu/ipu-v3/ipu-image-convert.c | 60 +++++++++----------------- 1 file changed, 21 insertions(+), 39 deletions(-) diff --git a/drivers/gpu/ipu-v3/ipu-image-convert.c b/drivers/gpu/ipu-v3/ipu-image-convert.c index 91653adc41cc..cdaf1d74e31a 100644 --- a/drivers/gpu/ipu-v3/ipu-image-convert.c +++ b/drivers/gpu/ipu-v3/ipu-image-convert.c @@ -998,38 +998,7 @@ static irqreturn_t do_irq(struct ipu_image_convert_run *run) return IRQ_WAKE_THREAD; } -static irqreturn_t norotate_irq(int irq, void *data) -{ - struct ipu_image_convert_chan *chan = data; - struct ipu_image_convert_ctx *ctx; - struct ipu_image_convert_run *run; - unsigned long flags; - irqreturn_t ret; - - spin_lock_irqsave(&chan->irqlock, flags); - - /* get current run and its context */ - run = chan->current_run; - if (!run) { - ret = IRQ_NONE; - goto out; - } - - ctx = run->ctx; - - if (ipu_rot_mode_is_irt(ctx->rot_mode)) { - /* this is a rotation operation, just ignore */ - spin_unlock_irqrestore(&chan->irqlock, flags); - return IRQ_HANDLED; - } - - ret = do_irq(run); -out: - spin_unlock_irqrestore(&chan->irqlock, flags); - return ret; -} - -static irqreturn_t rotate_irq(int irq, void *data) +static irqreturn_t eof_irq(int irq, void *data) { struct ipu_image_convert_chan *chan = data; struct ipu_image_convert_priv *priv = chan->priv; @@ -1049,11 +1018,24 @@ static irqreturn_t rotate_irq(int irq, void *data) ctx = run->ctx; - if (!ipu_rot_mode_is_irt(ctx->rot_mode)) { - /* this was NOT a rotation operation, shouldn't happen */ - dev_err(priv->ipu->dev, "Unexpected rotation interrupt\n"); - spin_unlock_irqrestore(&chan->irqlock, flags); - return IRQ_HANDLED; + if (irq == chan->out_eof_irq) { + if (ipu_rot_mode_is_irt(ctx->rot_mode)) { + /* this is a rotation op, just ignore */ + ret = IRQ_HANDLED; + goto out; + } + } else if (irq == chan->rot_out_eof_irq) { + if (!ipu_rot_mode_is_irt(ctx->rot_mode)) { + /* this was NOT a rotation op, shouldn't happen */ + dev_err(priv->ipu->dev, + "Unexpected rotation interrupt\n"); + ret = IRQ_HANDLED; + goto out; + } + } else { + dev_err(priv->ipu->dev, "Received unknown irq %d\n", irq); + ret = IRQ_NONE; + goto out; } ret = do_irq(run); @@ -1148,7 +1130,7 @@ static int get_ipu_resources(struct ipu_image_convert_chan *chan) chan->out_chan, IPU_IRQ_EOF); - ret = request_threaded_irq(chan->out_eof_irq, norotate_irq, do_bh, + ret = request_threaded_irq(chan->out_eof_irq, eof_irq, do_bh, 0, "ipu-ic", chan); if (ret < 0) { dev_err(priv->ipu->dev, "could not acquire irq %d\n", @@ -1161,7 +1143,7 @@ static int get_ipu_resources(struct ipu_image_convert_chan *chan) chan->rotation_out_chan, IPU_IRQ_EOF); - ret = request_threaded_irq(chan->rot_out_eof_irq, rotate_irq, do_bh, + ret = request_threaded_irq(chan->rot_out_eof_irq, eof_irq, do_bh, 0, "ipu-ic", chan); if (ret < 0) { dev_err(priv->ipu->dev, "could not acquire irq %d\n", From a24e10abad8ff9df527fc3ceed74c0d361f68230 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Fri, 19 Jun 2020 16:42:14 +0800 Subject: [PATCH 58/91] dm rq: don't call blk_mq_queue_stopped() in dm_stop_queue() [ Upstream commit e766668c6cd49d741cfb49eaeb38998ba34d27bc ] dm_stop_queue() only uses blk_mq_quiesce_queue() so it doesn't formally stop the blk-mq queue; therefore there is no point making the blk_mq_queue_stopped() check -- it will never be stopped. In addition, even though dm_stop_queue() actually tries to quiesce hw queues via blk_mq_quiesce_queue(), checking with blk_queue_quiesced() to avoid unnecessary queue quiesce isn't reliable because: the QUEUE_FLAG_QUIESCED flag is set before synchronize_rcu() and dm_stop_queue() may be called when synchronize_rcu() from another blk_mq_quiesce_queue() is in-progress. Fixes: 7b17c2f7292ba ("dm: Fix a race condition related to stopping and starting queues") Signed-off-by: Ming Lei Signed-off-by: Mike Snitzer Signed-off-by: Sasha Levin --- drivers/md/dm-rq.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/md/dm-rq.c b/drivers/md/dm-rq.c index 4d36373e1c0f..9fde174ce396 100644 --- a/drivers/md/dm-rq.c +++ b/drivers/md/dm-rq.c @@ -95,9 +95,6 @@ static void dm_old_stop_queue(struct request_queue *q) static void dm_mq_stop_queue(struct request_queue *q) { - if (blk_mq_queue_stopped(q)) - return; - blk_mq_quiesce_queue(q); } From 9f00bbb7a21ac7f695aa449981330d706ab46fb9 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Thu, 9 Jul 2020 08:59:43 +0530 Subject: [PATCH 59/91] selftests/powerpc: ptrace-pkey: Rename variables to make it easier to follow code [ Upstream commit 9a11f12e0a6c374b3ef1ce81e32ce477d28eb1b8 ] Rename variable to indicate that they are invalid values which we will use to test ptrace update of pkeys. Signed-off-by: Aneesh Kumar K.V Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200709032946.881753-21-aneesh.kumar@linux.ibm.com Signed-off-by: Sasha Levin --- .../selftests/powerpc/ptrace/ptrace-pkey.c | 26 +++++++++---------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/tools/testing/selftests/powerpc/ptrace/ptrace-pkey.c b/tools/testing/selftests/powerpc/ptrace/ptrace-pkey.c index bdbbbe8431e0..f9216c7a1829 100644 --- a/tools/testing/selftests/powerpc/ptrace/ptrace-pkey.c +++ b/tools/testing/selftests/powerpc/ptrace/ptrace-pkey.c @@ -44,7 +44,7 @@ struct shared_info { unsigned long amr2; /* AMR value that ptrace should refuse to write to the child. */ - unsigned long amr3; + unsigned long invalid_amr; /* IAMR value the parent expects to read from the child. */ unsigned long expected_iamr; @@ -57,8 +57,8 @@ struct shared_info { * (even though they're valid ones) because userspace doesn't have * access to those registers. */ - unsigned long new_iamr; - unsigned long new_uamor; + unsigned long invalid_iamr; + unsigned long invalid_uamor; }; static int sys_pkey_alloc(unsigned long flags, unsigned long init_access_rights) @@ -100,7 +100,7 @@ static int child(struct shared_info *info) info->amr1 |= 3ul << pkeyshift(pkey1); info->amr2 |= 3ul << pkeyshift(pkey2); - info->amr3 |= info->amr2 | 3ul << pkeyshift(pkey3); + info->invalid_amr |= info->amr2 | 3ul << pkeyshift(pkey3); if (disable_execute) info->expected_iamr |= 1ul << pkeyshift(pkey1); @@ -111,8 +111,8 @@ static int child(struct shared_info *info) info->expected_uamor |= 3ul << pkeyshift(pkey1) | 3ul << pkeyshift(pkey2); - info->new_iamr |= 1ul << pkeyshift(pkey1) | 1ul << pkeyshift(pkey2); - info->new_uamor |= 3ul << pkeyshift(pkey1); + info->invalid_iamr |= 1ul << pkeyshift(pkey1) | 1ul << pkeyshift(pkey2); + info->invalid_uamor |= 3ul << pkeyshift(pkey1); /* * We won't use pkey3. We just want a plausible but invalid key to test @@ -196,9 +196,9 @@ static int parent(struct shared_info *info, pid_t pid) PARENT_SKIP_IF_UNSUPPORTED(ret, &info->child_sync); PARENT_FAIL_IF(ret, &info->child_sync); - info->amr1 = info->amr2 = info->amr3 = regs[0]; - info->expected_iamr = info->new_iamr = regs[1]; - info->expected_uamor = info->new_uamor = regs[2]; + info->amr1 = info->amr2 = info->invalid_amr = regs[0]; + info->expected_iamr = info->invalid_iamr = regs[1]; + info->expected_uamor = info->invalid_uamor = regs[2]; /* Wake up child so that it can set itself up. */ ret = prod_child(&info->child_sync); @@ -234,10 +234,10 @@ static int parent(struct shared_info *info, pid_t pid) return ret; /* Write invalid AMR value in child. */ - ret = ptrace_write_regs(pid, NT_PPC_PKEY, &info->amr3, 1); + ret = ptrace_write_regs(pid, NT_PPC_PKEY, &info->invalid_amr, 1); PARENT_FAIL_IF(ret, &info->child_sync); - printf("%-30s AMR: %016lx\n", ptrace_write_running, info->amr3); + printf("%-30s AMR: %016lx\n", ptrace_write_running, info->invalid_amr); /* Wake up child so that it can verify it didn't change. */ ret = prod_child(&info->child_sync); @@ -249,7 +249,7 @@ static int parent(struct shared_info *info, pid_t pid) /* Try to write to IAMR. */ regs[0] = info->amr1; - regs[1] = info->new_iamr; + regs[1] = info->invalid_iamr; ret = ptrace_write_regs(pid, NT_PPC_PKEY, regs, 2); PARENT_FAIL_IF(!ret, &info->child_sync); @@ -257,7 +257,7 @@ static int parent(struct shared_info *info, pid_t pid) ptrace_write_running, regs[0], regs[1]); /* Try to write to IAMR and UAMOR. */ - regs[2] = info->new_uamor; + regs[2] = info->invalid_uamor; ret = ptrace_write_regs(pid, NT_PPC_PKEY, regs, 3); PARENT_FAIL_IF(!ret, &info->child_sync); From a075f690c28eaea5616fad4fd8c698ee8b23e2d9 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Thu, 9 Jul 2020 08:59:44 +0530 Subject: [PATCH 60/91] selftests/powerpc: ptrace-pkey: Update the test to mark an invalid pkey correctly [ Upstream commit 0eaa3b5ca7b5a76e3783639c828498343be66a01 ] Signed-off-by: Aneesh Kumar K.V Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200709032946.881753-22-aneesh.kumar@linux.ibm.com Signed-off-by: Sasha Levin --- .../selftests/powerpc/ptrace/ptrace-pkey.c | 30 ++++++++----------- 1 file changed, 12 insertions(+), 18 deletions(-) diff --git a/tools/testing/selftests/powerpc/ptrace/ptrace-pkey.c b/tools/testing/selftests/powerpc/ptrace/ptrace-pkey.c index f9216c7a1829..bc33d748d95b 100644 --- a/tools/testing/selftests/powerpc/ptrace/ptrace-pkey.c +++ b/tools/testing/selftests/powerpc/ptrace/ptrace-pkey.c @@ -66,11 +66,6 @@ static int sys_pkey_alloc(unsigned long flags, unsigned long init_access_rights) return syscall(__NR_pkey_alloc, flags, init_access_rights); } -static int sys_pkey_free(int pkey) -{ - return syscall(__NR_pkey_free, pkey); -} - static int child(struct shared_info *info) { unsigned long reg; @@ -100,7 +95,11 @@ static int child(struct shared_info *info) info->amr1 |= 3ul << pkeyshift(pkey1); info->amr2 |= 3ul << pkeyshift(pkey2); - info->invalid_amr |= info->amr2 | 3ul << pkeyshift(pkey3); + /* + * invalid amr value where we try to force write + * things which are deined by a uamor setting. + */ + info->invalid_amr = info->amr2 | (~0x0UL & ~info->expected_uamor); if (disable_execute) info->expected_iamr |= 1ul << pkeyshift(pkey1); @@ -111,17 +110,12 @@ static int child(struct shared_info *info) info->expected_uamor |= 3ul << pkeyshift(pkey1) | 3ul << pkeyshift(pkey2); - info->invalid_iamr |= 1ul << pkeyshift(pkey1) | 1ul << pkeyshift(pkey2); - info->invalid_uamor |= 3ul << pkeyshift(pkey1); - /* - * We won't use pkey3. We just want a plausible but invalid key to test - * whether ptrace will let us write to AMR bits we are not supposed to. - * - * This also tests whether the kernel restores the UAMOR permissions - * after a key is freed. + * Create an IAMR value different from expected value. + * Kernel will reject an IAMR and UAMOR change. */ - sys_pkey_free(pkey3); + info->invalid_iamr = info->expected_iamr | (1ul << pkeyshift(pkey1) | 1ul << pkeyshift(pkey2)); + info->invalid_uamor = info->expected_uamor & ~(0x3ul << pkeyshift(pkey1)); printf("%-30s AMR: %016lx pkey1: %d pkey2: %d pkey3: %d\n", user_write, info->amr1, pkey1, pkey2, pkey3); @@ -196,9 +190,9 @@ static int parent(struct shared_info *info, pid_t pid) PARENT_SKIP_IF_UNSUPPORTED(ret, &info->child_sync); PARENT_FAIL_IF(ret, &info->child_sync); - info->amr1 = info->amr2 = info->invalid_amr = regs[0]; - info->expected_iamr = info->invalid_iamr = regs[1]; - info->expected_uamor = info->invalid_uamor = regs[2]; + info->amr1 = info->amr2 = regs[0]; + info->expected_iamr = regs[1]; + info->expected_uamor = regs[2]; /* Wake up child so that it can set itself up. */ ret = prod_child(&info->child_sync); From 0b80d3cdb0fe028ecb63ea9d330d8fba1cecf294 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Thu, 9 Jul 2020 08:59:45 +0530 Subject: [PATCH 61/91] selftests/powerpc: ptrace-pkey: Don't update expected UAMOR value [ Upstream commit 3563b9bea0ca7f53e4218b5e268550341a49f333 ] With commit 4a4a5e5d2aad ("powerpc/pkeys: key allocation/deallocation must not change pkey registers") we are not updating UAMOR on key allocation. So don't update the expected uamor value in the test. Fixes: 4a4a5e5d2aad ("powerpc/pkeys: key allocation/deallocation must not change pkey registers") Signed-off-by: Aneesh Kumar K.V Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200709032946.881753-23-aneesh.kumar@linux.ibm.com Signed-off-by: Sasha Levin --- tools/testing/selftests/powerpc/ptrace/ptrace-pkey.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/tools/testing/selftests/powerpc/ptrace/ptrace-pkey.c b/tools/testing/selftests/powerpc/ptrace/ptrace-pkey.c index bc33d748d95b..3694613f418f 100644 --- a/tools/testing/selftests/powerpc/ptrace/ptrace-pkey.c +++ b/tools/testing/selftests/powerpc/ptrace/ptrace-pkey.c @@ -101,15 +101,20 @@ static int child(struct shared_info *info) */ info->invalid_amr = info->amr2 | (~0x0UL & ~info->expected_uamor); + /* + * if PKEY_DISABLE_EXECUTE succeeded we should update the expected_iamr + */ if (disable_execute) info->expected_iamr |= 1ul << pkeyshift(pkey1); else info->expected_iamr &= ~(1ul << pkeyshift(pkey1)); - info->expected_iamr &= ~(1ul << pkeyshift(pkey2) | 1ul << pkeyshift(pkey3)); + /* + * We allocated pkey2 and pkey 3 above. Clear the IAMR bits. + */ + info->expected_iamr &= ~(1ul << pkeyshift(pkey2)); + info->expected_iamr &= ~(1ul << pkeyshift(pkey3)); - info->expected_uamor |= 3ul << pkeyshift(pkey1) | - 3ul << pkeyshift(pkey2); /* * Create an IAMR value different from expected value. * Kernel will reject an IAMR and UAMOR change. From b9b2092af3c1ac84fa30f72b767a5a50cab2ba04 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Tue, 14 Jul 2020 20:22:11 +0100 Subject: [PATCH 62/91] iommu/omap: Check for failure of a call to omap_iommu_dump_ctx [ Upstream commit dee9d154f40c58d02f69acdaa5cfd1eae6ebc28b ] It is possible for the call to omap_iommu_dump_ctx to return a negative error number, so check for the failure and return the error number rather than pass the negative value to simple_read_from_buffer. Fixes: 14e0e6796a0d ("OMAP: iommu: add initial debugfs support") Signed-off-by: Colin Ian King Link: https://lore.kernel.org/r/20200714192211.744776-1-colin.king@canonical.com Addresses-Coverity: ("Improper use of negative value") Signed-off-by: Joerg Roedel Signed-off-by: Sasha Levin --- drivers/iommu/omap-iommu-debug.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/iommu/omap-iommu-debug.c b/drivers/iommu/omap-iommu-debug.c index 50217548c3b8..5ce55fabc9d8 100644 --- a/drivers/iommu/omap-iommu-debug.c +++ b/drivers/iommu/omap-iommu-debug.c @@ -101,8 +101,11 @@ static ssize_t debug_read_regs(struct file *file, char __user *userbuf, mutex_lock(&iommu_debug_lock); bytes = omap_iommu_dump_ctx(obj, p, count); + if (bytes < 0) + goto err; bytes = simple_read_from_buffer(userbuf, count, ppos, buf, bytes); +err: mutex_unlock(&iommu_debug_lock); kfree(buf); From a5040830b48953d89ec5254e65e6c5b161165762 Mon Sep 17 00:00:00 2001 From: Liu Yi L Date: Fri, 24 Jul 2020 09:49:14 +0800 Subject: [PATCH 63/91] iommu/vt-d: Enforce PASID devTLB field mask [ Upstream commit 5f77d6ca5ca74e4b4a5e2e010f7ff50c45dea326 ] Set proper masks to avoid invalid input spillover to reserved bits. Signed-off-by: Liu Yi L Signed-off-by: Jacob Pan Signed-off-by: Lu Baolu Reviewed-by: Eric Auger Link: https://lore.kernel.org/r/20200724014925.15523-2-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel Signed-off-by: Sasha Levin --- include/linux/intel-iommu.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h index b1b4411b4c6b..539f4a84412f 100644 --- a/include/linux/intel-iommu.h +++ b/include/linux/intel-iommu.h @@ -308,8 +308,8 @@ enum { #define QI_DEV_EIOTLB_ADDR(a) ((u64)(a) & VTD_PAGE_MASK) #define QI_DEV_EIOTLB_SIZE (((u64)1) << 11) -#define QI_DEV_EIOTLB_GLOB(g) ((u64)g) -#define QI_DEV_EIOTLB_PASID(p) (((u64)p) << 32) +#define QI_DEV_EIOTLB_GLOB(g) ((u64)(g) & 0x1) +#define QI_DEV_EIOTLB_PASID(p) ((u64)((p) & 0xfffff) << 32) #define QI_DEV_EIOTLB_SID(sid) ((u64)((sid) & 0xffff) << 16) #define QI_DEV_EIOTLB_QDEP(qd) ((u64)((qd) & 0x1f) << 4) #define QI_DEV_EIOTLB_PFSID(pfsid) (((u64)(pfsid & 0xf) << 12) | \ From 7fc9f681fad28ac720acbb157445810fe0b3bf3c Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Mon, 29 Jun 2020 17:38:07 +0200 Subject: [PATCH 64/91] i2c: rcar: slave: only send STOP event when we have been addressed [ Upstream commit 314139f9f0abdba61ed9a8463bbcb0bf900ac5a2 ] When the SSR interrupt is activated, it will detect every STOP condition on the bus, not only the ones after we have been addressed. So, enable this interrupt only after we have been addressed, and disable it otherwise. Fixes: de20d1857dd6 ("i2c: rcar: add slave support") Signed-off-by: Wolfram Sang Signed-off-by: Wolfram Sang Signed-off-by: Sasha Levin --- drivers/i2c/busses/i2c-rcar.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/i2c/busses/i2c-rcar.c b/drivers/i2c/busses/i2c-rcar.c index 6e49e438ef5a..11d197761685 100644 --- a/drivers/i2c/busses/i2c-rcar.c +++ b/drivers/i2c/busses/i2c-rcar.c @@ -587,13 +587,14 @@ static bool rcar_i2c_slave_irq(struct rcar_i2c_priv *priv) rcar_i2c_write(priv, ICSIER, SDR | SSR | SAR); } - rcar_i2c_write(priv, ICSSR, ~SAR & 0xff); + /* Clear SSR, too, because of old STOPs to other clients than us */ + rcar_i2c_write(priv, ICSSR, ~(SAR | SSR) & 0xff); } /* master sent stop */ if (ssr_filtered & SSR) { i2c_slave_event(priv->slave, I2C_SLAVE_STOP, &value); - rcar_i2c_write(priv, ICSIER, SAR | SSR); + rcar_i2c_write(priv, ICSIER, SAR); rcar_i2c_write(priv, ICSSR, ~SSR & 0xff); } @@ -848,7 +849,7 @@ static int rcar_reg_slave(struct i2c_client *slave) priv->slave = slave; rcar_i2c_write(priv, ICSAR, slave->addr); rcar_i2c_write(priv, ICSSR, 0); - rcar_i2c_write(priv, ICSIER, SAR | SSR); + rcar_i2c_write(priv, ICSIER, SAR); rcar_i2c_write(priv, ICSCR, SIE | SDBS); return 0; From f0b9f54f4763091d4616d4fcf104d65967820f93 Mon Sep 17 00:00:00 2001 From: Xu Wang Date: Mon, 13 Jul 2020 03:21:43 +0000 Subject: [PATCH 65/91] clk: clk-atlas6: fix return value check in atlas6_clk_init() [ Upstream commit 12b90b40854a8461a02ef19f6f4474cc88d64b66 ] In case of error, the function clk_register() returns ERR_PTR() and never returns NULL. The NULL test in the return value check should be replaced with IS_ERR(). Signed-off-by: Xu Wang Link: https://lore.kernel.org/r/20200713032143.21362-1-vulab@iscas.ac.cn Acked-by: Barry Song Fixes: 7bf21bc81f28 ("clk: sirf: re-arch to make the codes support both prima2 and atlas6") Signed-off-by: Stephen Boyd Signed-off-by: Sasha Levin --- drivers/clk/sirf/clk-atlas6.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/clk/sirf/clk-atlas6.c b/drivers/clk/sirf/clk-atlas6.c index 0cd11e6893af..25ed60776560 100644 --- a/drivers/clk/sirf/clk-atlas6.c +++ b/drivers/clk/sirf/clk-atlas6.c @@ -136,7 +136,7 @@ static void __init atlas6_clk_init(struct device_node *np) for (i = pll1; i < maxclk; i++) { atlas6_clks[i] = clk_register(NULL, atlas6_clk_hw_array[i]); - BUG_ON(!atlas6_clks[i]); + BUG_ON(IS_ERR(atlas6_clks[i])); } clk_register_clkdev(atlas6_clks[cpu], NULL, "cpu"); clk_register_clkdev(atlas6_clks[io], NULL, "io"); From 17dc3213fbc0e7f9fd962ba9dd4ca6d215f53fa7 Mon Sep 17 00:00:00 2001 From: Rayagonda Kokatanur Date: Fri, 17 Jul 2020 21:46:06 -0700 Subject: [PATCH 66/91] pwm: bcm-iproc: handle clk_get_rate() return MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 6ced5ff0be8e94871ba846dfbddf69d21363f3d7 ] Handle clk_get_rate() returning 0 to avoid possible division by zero. Fixes: daa5abc41c80 ("pwm: Add support for Broadcom iProc PWM controller") Signed-off-by: Rayagonda Kokatanur Signed-off-by: Scott Branden Reviewed-by: Ray Jui Reviewed-by: Uwe Kleine-König Signed-off-by: Thierry Reding Signed-off-by: Sasha Levin --- drivers/pwm/pwm-bcm-iproc.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/pwm/pwm-bcm-iproc.c b/drivers/pwm/pwm-bcm-iproc.c index 31b01035d0ab..8cfba3614e60 100644 --- a/drivers/pwm/pwm-bcm-iproc.c +++ b/drivers/pwm/pwm-bcm-iproc.c @@ -85,8 +85,6 @@ static void iproc_pwmc_get_state(struct pwm_chip *chip, struct pwm_device *pwm, u64 tmp, multi, rate; u32 value, prescale; - rate = clk_get_rate(ip->clk); - value = readl(ip->base + IPROC_PWM_CTRL_OFFSET); if (value & BIT(IPROC_PWM_CTRL_EN_SHIFT(pwm->hwpwm))) @@ -99,6 +97,13 @@ static void iproc_pwmc_get_state(struct pwm_chip *chip, struct pwm_device *pwm, else state->polarity = PWM_POLARITY_INVERSED; + rate = clk_get_rate(ip->clk); + if (rate == 0) { + state->period = 0; + state->duty_cycle = 0; + return; + } + value = readl(ip->base + IPROC_PWM_PRESCALE_OFFSET); prescale = value >> IPROC_PWM_PRESCALE_SHIFT(pwm->hwpwm); prescale &= IPROC_PWM_PRESCALE_MAX; From bb1da23aa45bbe1edb74379d5b541c62f0d2836a Mon Sep 17 00:00:00 2001 From: Thomas Hebb Date: Sun, 26 Jul 2020 21:08:14 -0700 Subject: [PATCH 67/91] tools build feature: Use CC and CXX from parent [ Upstream commit e3232c2f39acafd5a29128425bc30b9884642cfa ] commit c8c188679ccf ("tools build: Use the same CC for feature detection and actual build") changed these assignments from unconditional (:=) to conditional (?=) so that they wouldn't clobber values from the environment. However, conditional assignment does not work properly for variables that Make implicitly sets, among which are CC and CXX. To quote tools/scripts/Makefile.include, which handles this properly: # Makefiles suck: This macro sets a default value of $(2) for the # variable named by $(1), unless the variable has been set by # environment or command line. This is necessary for CC and AR # because make sets default values, so the simpler ?= approach # won't work as expected. In other words, the conditional assignments will not run even if the variables are not overridden in the environment; Make will set CC to "cc" and CXX to "g++" when it starts[1], meaning the variables are not empty by the time the conditional assignments are evaluated. This breaks cross-compilation when CROSS_COMPILE is set but CC isn't, since "cc" gets used for feature detection instead of the cross compiler (and likewise for CXX). To fix the issue, just pass down the values of CC and CXX computed by the parent Makefile, which gets included by the Makefile that actually builds whatever we're detecting features for and so is guaranteed to have good values. This is a better solution anyway, since it means we aren't trying to replicate the logic of the parent build system and so don't risk it getting out of sync. Leave PKG_CONFIG alone, since 1) there's no common logic to compute it in Makefile.include, and 2) it's not an implicit variable, so conditional assignment works properly. [1] https://www.gnu.org/software/make/manual/html_node/Implicit-Variables.html Fixes: c8c188679ccf ("tools build: Use the same CC for feature detection and actual build") Signed-off-by: Thomas Hebb Acked-by: Jiri Olsa Cc: David Carrillo-Cisneros Cc: Ian Rogers Cc: Igor Lubashev Cc: Namhyung Kim Cc: Quentin Monnet Cc: Song Liu Cc: Stephane Eranian Cc: thomas hebb Link: http://lore.kernel.org/lkml/0a6e69d1736b0fa231a648f50b0cce5d8a6734ef.1595822871.git.tommyhebb@gmail.com Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Sasha Levin --- tools/build/Makefile.feature | 2 +- tools/build/feature/Makefile | 2 -- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/tools/build/Makefile.feature b/tools/build/Makefile.feature index 42a787856cd8..7d9d70c0b380 100644 --- a/tools/build/Makefile.feature +++ b/tools/build/Makefile.feature @@ -7,7 +7,7 @@ endif feature_check = $(eval $(feature_check_code)) define feature_check_code - feature-$(1) := $(shell $(MAKE) OUTPUT=$(OUTPUT_FEATURES) CFLAGS="$(EXTRA_CFLAGS) $(FEATURE_CHECK_CFLAGS-$(1))" CXXFLAGS="$(EXTRA_CXXFLAGS) $(FEATURE_CHECK_CXXFLAGS-$(1))" LDFLAGS="$(LDFLAGS) $(FEATURE_CHECK_LDFLAGS-$(1))" -C $(feature_dir) $(OUTPUT_FEATURES)test-$1.bin >/dev/null 2>/dev/null && echo 1 || echo 0) + feature-$(1) := $(shell $(MAKE) OUTPUT=$(OUTPUT_FEATURES) CC=$(CC) CXX=$(CXX) CFLAGS="$(EXTRA_CFLAGS) $(FEATURE_CHECK_CFLAGS-$(1))" CXXFLAGS="$(EXTRA_CXXFLAGS) $(FEATURE_CHECK_CXXFLAGS-$(1))" LDFLAGS="$(LDFLAGS) $(FEATURE_CHECK_LDFLAGS-$(1))" -C $(feature_dir) $(OUTPUT_FEATURES)test-$1.bin >/dev/null 2>/dev/null && echo 1 || echo 0) endef feature_set = $(eval $(feature_set_code)) diff --git a/tools/build/feature/Makefile b/tools/build/feature/Makefile index bf8a8ebcca1e..c4845b66b9ba 100644 --- a/tools/build/feature/Makefile +++ b/tools/build/feature/Makefile @@ -62,8 +62,6 @@ FILES= \ FILES := $(addprefix $(OUTPUT),$(FILES)) -CC ?= $(CROSS_COMPILE)gcc -CXX ?= $(CROSS_COMPILE)g++ PKG_CONFIG ?= $(CROSS_COMPILE)pkg-config LLVM_CONFIG ?= llvm-config From 49b30a64d320b23d7c3a3a67f2501a6ab5518a29 Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Sun, 26 Jul 2020 18:16:06 +0200 Subject: [PATCH 68/91] i2c: rcar: avoid race when unregistering slave MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit c7c9e914f9a0478fba4dc6f227cfd69cf84a4063 ] Due to the lockless design of the driver, it is theoretically possible to access a NULL pointer, if a slave interrupt was running while we were unregistering the slave. To make this rock solid, disable the interrupt for a short time while we are clearing the interrupt_enable register. This patch is purely based on code inspection. The OOPS is super-hard to trigger because clearing SAR (the address) makes interrupts even more unlikely to happen as well. While here, reinit SCR to SDBS because this bit should always be set according to documentation. There is no effect, though, because the interface is disabled. Fixes: 7b814d852af6 ("i2c: rcar: avoid race when unregistering slave client") Signed-off-by: Wolfram Sang Reviewed-by: Niklas Söderlund Signed-off-by: Wolfram Sang Signed-off-by: Sasha Levin --- drivers/i2c/busses/i2c-rcar.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/i2c/busses/i2c-rcar.c b/drivers/i2c/busses/i2c-rcar.c index 11d197761685..dcdce18fc706 100644 --- a/drivers/i2c/busses/i2c-rcar.c +++ b/drivers/i2c/busses/i2c-rcar.c @@ -861,12 +861,14 @@ static int rcar_unreg_slave(struct i2c_client *slave) WARN_ON(!priv->slave); - /* disable irqs and ensure none is running before clearing ptr */ + /* ensure no irq is running before clearing ptr */ + disable_irq(priv->irq); rcar_i2c_write(priv, ICSIER, 0); - rcar_i2c_write(priv, ICSCR, 0); + rcar_i2c_write(priv, ICSSR, 0); + enable_irq(priv->irq); + rcar_i2c_write(priv, ICSCR, SDBS); rcar_i2c_write(priv, ICSAR, 0); /* Gen2: must be 0 if not using slave */ - synchronize_irq(priv->irq); priv->slave = NULL; pm_runtime_put(rcar_i2c_priv_to_dev(priv)); From ff62a41403720bccbdfa7dc681410d68548621b7 Mon Sep 17 00:00:00 2001 From: Stafford Horne Date: Tue, 16 Jun 2020 06:19:46 +0900 Subject: [PATCH 69/91] openrisc: Fix oops caused when dumping stack [ Upstream commit 57b8e277c33620e115633cdf700a260b55095460 ] When dumping a stack with 'cat /proc/#/stack' the kernel would oops. For example: # cat /proc/690/stack Unable to handle kernel access at virtual address 0x7fc60f58 Oops#: 0000 CPU #: 0 PC: c00097fc SR: 0000807f SP: d6f09b9c GPR00: 00000000 GPR01: d6f09b9c GPR02: d6f09bb8 GPR03: d6f09bc4 GPR04: 7fc60f5c GPR05: c00099b4 GPR06: 00000000 GPR07: d6f09ba3 GPR08: ffffff00 GPR09: c0009804 GPR10: d6f08000 GPR11: 00000000 GPR12: ffffe000 GPR13: dbb86000 GPR14: 00000001 GPR15: dbb86250 GPR16: 7fc60f63 GPR17: 00000f5c GPR18: d6f09bc4 GPR19: 00000000 GPR20: c00099b4 GPR21: ffffffc0 GPR22: 00000000 GPR23: 00000000 GPR24: 00000001 GPR25: 000002c6 GPR26: d78b6850 GPR27: 00000001 GPR28: 00000000 GPR29: dbb86000 GPR30: ffffffff GPR31: dbb862fc RES: 00000000 oGPR11: ffffffff Process cat (pid: 702, stackpage=d79d6000) Stack: Call trace: [<598977f2>] save_stack_trace_tsk+0x40/0x74 [<95063f0e>] stack_trace_save_tsk+0x44/0x58 [] proc_pid_stack+0xd0/0x13c [] proc_single_show+0x6c/0xf0 [] seq_read+0x1b4/0x688 [<2d6c7480>] do_iter_read+0x208/0x248 [<2182a2fb>] vfs_readv+0x64/0x90 This was caused by the stack trace code in save_stack_trace_tsk using the wrong stack pointer. It was using the user stack pointer instead of the kernel stack pointer. Fix this by using the right stack. Also for good measure we add try_get_task_stack/put_task_stack to ensure the task is not lost while we are walking it's stack. Fixes: eecac38b0423a ("openrisc: support framepointers and STACKTRACE_SUPPORT") Signed-off-by: Stafford Horne Signed-off-by: Sasha Levin --- arch/openrisc/kernel/stacktrace.c | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/arch/openrisc/kernel/stacktrace.c b/arch/openrisc/kernel/stacktrace.c index 43f140a28bc7..54d38809e22c 100644 --- a/arch/openrisc/kernel/stacktrace.c +++ b/arch/openrisc/kernel/stacktrace.c @@ -13,6 +13,7 @@ #include #include #include +#include #include #include @@ -68,12 +69,25 @@ void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace) { unsigned long *sp = NULL; + if (!try_get_task_stack(tsk)) + return; + if (tsk == current) sp = (unsigned long *) &sp; - else - sp = (unsigned long *) KSTK_ESP(tsk); + else { + unsigned long ksp; + + /* Locate stack from kernel context */ + ksp = task_thread_info(tsk)->ksp; + ksp += STACK_FRAME_OVERHEAD; /* redzone */ + ksp += sizeof(struct pt_regs); + + sp = (unsigned long *) ksp; + } unwind_stack(trace, sp, save_stack_address_nosched); + + put_task_stack(tsk); } EXPORT_SYMBOL_GPL(save_stack_trace_tsk); From fe9a71128e3dfb4f0fdd6edb7a0ef1d980dfcf1d Mon Sep 17 00:00:00 2001 From: "Ewan D. Milne" Date: Wed, 29 Jul 2020 19:10:11 -0400 Subject: [PATCH 70/91] scsi: lpfc: nvmet: Avoid hang / use-after-free again when destroying targetport [ Upstream commit af6de8c60fe9433afa73cea6fcccdccd98ad3e5e ] We cannot wait on a completion object in the lpfc_nvme_targetport structure in the _destroy_targetport() code path because the NVMe/fc transport will free that structure immediately after the .targetport_delete() callback. This results in a use-after-free, and a crash if slub_debug=FZPU is enabled. An earlier fix put put the completion on the stack, but commit 2a0fb340fcc8 ("scsi: lpfc: Correct localport timeout duration error") subsequently changed the code to reference the completion through a pointer in the object rather than the local stack variable. Fix this by using the stack variable directly. Link: https://lore.kernel.org/r/20200729231011.13240-1-emilne@redhat.com Fixes: 2a0fb340fcc8 ("scsi: lpfc: Correct localport timeout duration error") Reviewed-by: James Smart Signed-off-by: Ewan D. Milne Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin --- drivers/scsi/lpfc/lpfc_nvmet.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/lpfc/lpfc_nvmet.c b/drivers/scsi/lpfc/lpfc_nvmet.c index 768eba8c111d..5bc33817568e 100644 --- a/drivers/scsi/lpfc/lpfc_nvmet.c +++ b/drivers/scsi/lpfc/lpfc_nvmet.c @@ -1712,7 +1712,7 @@ lpfc_nvmet_destroy_targetport(struct lpfc_hba *phba) } tgtp->tport_unreg_cmp = &tport_unreg_cmp; nvmet_fc_unregister_targetport(phba->targetport); - if (!wait_for_completion_timeout(tgtp->tport_unreg_cmp, + if (!wait_for_completion_timeout(&tport_unreg_cmp, msecs_to_jiffies(LPFC_NVMET_WAIT_TMO))) lpfc_printf_log(phba, KERN_ERR, LOG_NVME, "6179 Unreg targetport %p timeout " From ddf2b7891323fe30bfac761d2e12d3378a0d7eb2 Mon Sep 17 00:00:00 2001 From: Krzysztof Sobota Date: Fri, 17 Jul 2020 12:31:09 +0200 Subject: [PATCH 71/91] watchdog: initialize device before misc_register [ Upstream commit cb36e29bb0e4b0c33c3d5866a0a4aebace4c99b7 ] When watchdog device is being registered, it calls misc_register that makes watchdog available for systemd to open. This is a data race scenario, because when device is open it may still have device struct not initialized - this in turn causes a crash. This patch moves device initialization before misc_register call and it solves the problem printed below. ------------[ cut here ]------------ WARNING: CPU: 3 PID: 1 at lib/kobject.c:612 kobject_get+0x50/0x54 kobject: '(null)' ((ptrval)): is not initialized, yet kobject_get() is being called. Modules linked in: k2_reset_status(O) davinci_wdt(+) sfn_platform_hwbcn(O) fsmddg_sfn(O) clk_misc_mmap(O) clk_sw_bcn(O) fsp_reset(O) cma_mod(O) slave_sup_notif(O) fpga_master(O) latency(O+) evnotify(O) enable_arm_pmu(O) xge(O) rio_mport_cdev br_netfilter bridge stp llc nvrd_checksum(O) ipv6 CPU: 3 PID: 1 Comm: systemd Tainted: G O 4.19.113-g2579778-fsm4_k2 #1 Hardware name: Keystone [] (unwind_backtrace) from [] (show_stack+0x18/0x1c) [] (show_stack) from [] (dump_stack+0xb4/0xe8) [] (dump_stack) from [] (__warn+0xfc/0x114) [] (__warn) from [] (warn_slowpath_fmt+0x50/0x74) [] (warn_slowpath_fmt) from [] (kobject_get+0x50/0x54) [] (kobject_get) from [] (get_device+0x1c/0x24) [] (get_device) from [] (watchdog_open+0x90/0xf0) [] (watchdog_open) from [] (misc_open+0x130/0x17c) [] (misc_open) from [] (chrdev_open+0xec/0x1a8) [] (chrdev_open) from [] (do_dentry_open+0x204/0x3cc) [] (do_dentry_open) from [] (path_openat+0x330/0x1148) [] (path_openat) from [] (do_filp_open+0x78/0xec) [] (do_filp_open) from [] (do_sys_open+0x130/0x1f4) [] (do_sys_open) from [] (ret_fast_syscall+0x0/0x28) Exception stack(0xd2ceffa8 to 0xd2cefff0) ffa0: b6f69968 00000000 ffffff9c b6ebd210 000a0001 00000000 ffc0: b6f69968 00000000 00000000 00000142 fffffffd ffffffff 00b65530 bed7bb78 ffe0: 00000142 bed7ba70 b6cc2503 b6cc41d6 ---[ end trace 7b16eb105513974f ]--- ------------[ cut here ]------------ WARNING: CPU: 3 PID: 1 at lib/refcount.c:153 kobject_get+0x24/0x54 refcount_t: increment on 0; use-after-free. Modules linked in: k2_reset_status(O) davinci_wdt(+) sfn_platform_hwbcn(O) fsmddg_sfn(O) clk_misc_mmap(O) clk_sw_bcn(O) fsp_reset(O) cma_mod(O) slave_sup_notif(O) fpga_master(O) latency(O+) evnotify(O) enable_arm_pmu(O) xge(O) rio_mport_cdev br_netfilter bridge stp llc nvrd_checksum(O) ipv6 CPU: 3 PID: 1 Comm: systemd Tainted: G W O 4.19.113-g2579778-fsm4_k2 #1 Hardware name: Keystone [] (unwind_backtrace) from [] (show_stack+0x18/0x1c) [] (show_stack) from [] (dump_stack+0xb4/0xe8) [] (dump_stack) from [] (__warn+0xfc/0x114) [] (__warn) from [] (warn_slowpath_fmt+0x50/0x74) [] (warn_slowpath_fmt) from [] (kobject_get+0x24/0x54) [] (kobject_get) from [] (get_device+0x1c/0x24) [] (get_device) from [] (watchdog_open+0x90/0xf0) [] (watchdog_open) from [] (misc_open+0x130/0x17c) [] (misc_open) from [] (chrdev_open+0xec/0x1a8) [] (chrdev_open) from [] (do_dentry_open+0x204/0x3cc) [] (do_dentry_open) from [] (path_openat+0x330/0x1148) [] (path_openat) from [] (do_filp_open+0x78/0xec) [] (do_filp_open) from [] (do_sys_open+0x130/0x1f4) [] (do_sys_open) from [] (ret_fast_syscall+0x0/0x28) Exception stack(0xd2ceffa8 to 0xd2cefff0) ffa0: b6f69968 00000000 ffffff9c b6ebd210 000a0001 00000000 ffc0: b6f69968 00000000 00000000 00000142 fffffffd ffffffff 00b65530 bed7bb78 ffe0: 00000142 bed7ba70 b6cc2503 b6cc41d6 ---[ end trace 7b16eb1055139750 ]--- Fixes: 72139dfa2464 ("watchdog: Fix the race between the release of watchdog_core_data and cdev") Reviewed-by: Guenter Roeck Reviewed-by: Alexander Sverdlin Signed-off-by: Krzysztof Sobota Link: https://lore.kernel.org/r/20200717103109.14660-1-krzysztof.sobota@nokia.com Signed-off-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck Signed-off-by: Sasha Levin --- drivers/watchdog/watchdog_dev.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/drivers/watchdog/watchdog_dev.c b/drivers/watchdog/watchdog_dev.c index 10b2090f3e5e..1c322caecf7f 100644 --- a/drivers/watchdog/watchdog_dev.c +++ b/drivers/watchdog/watchdog_dev.c @@ -947,6 +947,15 @@ static int watchdog_cdev_register(struct watchdog_device *wdd) if (IS_ERR_OR_NULL(watchdog_kworker)) return -ENODEV; + device_initialize(&wd_data->dev); + wd_data->dev.devt = MKDEV(MAJOR(watchdog_devt), wdd->id); + wd_data->dev.class = &watchdog_class; + wd_data->dev.parent = wdd->parent; + wd_data->dev.groups = wdd->groups; + wd_data->dev.release = watchdog_core_data_release; + dev_set_drvdata(&wd_data->dev, wdd); + dev_set_name(&wd_data->dev, "watchdog%d", wdd->id); + kthread_init_work(&wd_data->work, watchdog_ping_work); hrtimer_init(&wd_data->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); wd_data->timer.function = watchdog_timer_expired; @@ -967,15 +976,6 @@ static int watchdog_cdev_register(struct watchdog_device *wdd) } } - device_initialize(&wd_data->dev); - wd_data->dev.devt = MKDEV(MAJOR(watchdog_devt), wdd->id); - wd_data->dev.class = &watchdog_class; - wd_data->dev.parent = wdd->parent; - wd_data->dev.groups = wdd->groups; - wd_data->dev.release = watchdog_core_data_release; - dev_set_drvdata(&wd_data->dev, wdd); - dev_set_name(&wd_data->dev, "watchdog%d", wdd->id); - /* Fill in the data structures */ cdev_init(&wd_data->cdev, &watchdog_fops); From 27a545d597ddecc87cf20e1e4809985fd32736c7 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Thu, 6 Aug 2020 15:35:34 -0700 Subject: [PATCH 72/91] Input: sentelic - fix error return when fsp_reg_write fails [ Upstream commit ea38f06e0291986eb93beb6d61fd413607a30ca4 ] Currently when the call to fsp_reg_write fails -EIO is not being returned because the count is being returned instead of the return value in retval. Fix this by returning the value in retval instead of count. Addresses-Coverity: ("Unused value") Fixes: fc69f4a6af49 ("Input: add new driver for Sentelic Finger Sensing Pad") Signed-off-by: Colin Ian King Link: https://lore.kernel.org/r/20200603141218.131663-1-colin.king@canonical.com Signed-off-by: Dmitry Torokhov Signed-off-by: Sasha Levin --- drivers/input/mouse/sentelic.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/input/mouse/sentelic.c b/drivers/input/mouse/sentelic.c index 1d6010d463e2..022a8cb58a06 100644 --- a/drivers/input/mouse/sentelic.c +++ b/drivers/input/mouse/sentelic.c @@ -454,7 +454,7 @@ static ssize_t fsp_attr_set_setreg(struct psmouse *psmouse, void *data, fsp_reg_write_enable(psmouse, false); - return count; + return retval; } PSMOUSE_DEFINE_WO_ATTR(setreg, S_IWUSR, NULL, fsp_attr_set_setreg); From 3cfd94ed90ee1eb4854aef1a4f9d8a3334654671 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 26 Jun 2020 13:34:37 +0300 Subject: [PATCH 73/91] drm/vmwgfx: Use correct vmw_legacy_display_unit pointer [ Upstream commit 1d2c0c565bc0da25f5e899a862fb58e612b222df ] The "entry" pointer is an offset from the list head and it doesn't point to a valid vmw_legacy_display_unit struct. Presumably the intent was to point to the last entry. Also the "i++" wasn't used so I have removed that as well. Fixes: d7e1958dbe4a ("drm/vmwgfx: Support older hardware.") Signed-off-by: Dan Carpenter Reviewed-by: Roland Scheidegger Signed-off-by: Roland Scheidegger Signed-off-by: Sasha Levin --- drivers/gpu/drm/vmwgfx/vmwgfx_ldu.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_ldu.c b/drivers/gpu/drm/vmwgfx/vmwgfx_ldu.c index 723578117191..0743a7311700 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_ldu.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_ldu.c @@ -79,7 +79,7 @@ static int vmw_ldu_commit_list(struct vmw_private *dev_priv) struct vmw_legacy_display_unit *entry; struct drm_framebuffer *fb = NULL; struct drm_crtc *crtc = NULL; - int i = 0; + int i; /* If there is no display topology the host just assumes * that the guest will set the same layout as the host. @@ -90,12 +90,11 @@ static int vmw_ldu_commit_list(struct vmw_private *dev_priv) crtc = &entry->base.crtc; w = max(w, crtc->x + crtc->mode.hdisplay); h = max(h, crtc->y + crtc->mode.vdisplay); - i++; } if (crtc == NULL) return 0; - fb = entry->base.crtc.primary->state->fb; + fb = crtc->primary->state->fb; return vmw_kms_write_svga(dev_priv, w, h, fb->pitches[0], fb->format->cpp[0] * 8, From ae71f731f061272806651be14f355cafd9560462 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 26 Jun 2020 13:39:59 +0300 Subject: [PATCH 74/91] drm/vmwgfx: Fix two list_for_each loop exit tests [ Upstream commit 4437c1152ce0e57ab8f401aa696ea6291cc07ab1 ] These if statements are supposed to be true if we ended the list_for_each_entry() loops without hitting a break statement but they don't work. In the first loop, we increment "i" after the "if (i == unit)" condition so we don't necessarily know that "i" is not equal to unit at the end of the loop. In the second loop we exit when mode is not pointing to a valid drm_display_mode struct so it doesn't make sense to check "mode->type". Fixes: a278724aa23c ("drm/vmwgfx: Implement fbdev on kms v2") Signed-off-by: Dan Carpenter Reviewed-by: Roland Scheidegger Signed-off-by: Roland Scheidegger Signed-off-by: Sasha Levin --- drivers/gpu/drm/vmwgfx/vmwgfx_kms.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c index 6a712a8d59e9..e486b6517ac5 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c @@ -2861,7 +2861,7 @@ int vmw_kms_fbdev_init_data(struct vmw_private *dev_priv, ++i; } - if (i != unit) { + if (&con->head == &dev_priv->dev->mode_config.connector_list) { DRM_ERROR("Could not find initial display unit.\n"); ret = -EINVAL; goto out_unlock; @@ -2885,13 +2885,13 @@ int vmw_kms_fbdev_init_data(struct vmw_private *dev_priv, break; } - if (mode->type & DRM_MODE_TYPE_PREFERRED) - *p_mode = mode; - else { + if (&mode->head == &con->modes) { WARN_ONCE(true, "Could not find initial preferred mode.\n"); *p_mode = list_first_entry(&con->modes, struct drm_display_mode, head); + } else { + *p_mode = mode; } out_unlock: From c08be09b2c1c5389f1d24677b595822ac7a75a21 Mon Sep 17 00:00:00 2001 From: Wang Hai Date: Mon, 10 Aug 2020 10:57:05 +0800 Subject: [PATCH 75/91] net: qcom/emac: add missed clk_disable_unprepare in error path of emac_clks_phase1_init [ Upstream commit 50caa777a3a24d7027748e96265728ce748b41ef ] Fix the missing clk_disable_unprepare() before return from emac_clks_phase1_init() in the error handling case. Fixes: b9b17debc69d ("net: emac: emac gigabit ethernet controller driver") Reported-by: Hulk Robot Signed-off-by: Wang Hai Acked-by: Timur Tabi Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/qualcomm/emac/emac.c | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/qualcomm/emac/emac.c b/drivers/net/ethernet/qualcomm/emac/emac.c index 2a0cbc535a2e..19673ed929e6 100644 --- a/drivers/net/ethernet/qualcomm/emac/emac.c +++ b/drivers/net/ethernet/qualcomm/emac/emac.c @@ -493,13 +493,24 @@ static int emac_clks_phase1_init(struct platform_device *pdev, ret = clk_prepare_enable(adpt->clk[EMAC_CLK_CFG_AHB]); if (ret) - return ret; + goto disable_clk_axi; ret = clk_set_rate(adpt->clk[EMAC_CLK_HIGH_SPEED], 19200000); if (ret) - return ret; + goto disable_clk_cfg_ahb; - return clk_prepare_enable(adpt->clk[EMAC_CLK_HIGH_SPEED]); + ret = clk_prepare_enable(adpt->clk[EMAC_CLK_HIGH_SPEED]); + if (ret) + goto disable_clk_cfg_ahb; + + return 0; + +disable_clk_cfg_ahb: + clk_disable_unprepare(adpt->clk[EMAC_CLK_CFG_AHB]); +disable_clk_axi: + clk_disable_unprepare(adpt->clk[EMAC_CLK_AXI]); + + return ret; } /* Enable clocks; needs emac_clks_phase1_init to be called before */ From a906b868953a9c9bba44649a8fe760e818dd7224 Mon Sep 17 00:00:00 2001 From: Jeffrey Mitchell Date: Wed, 5 Aug 2020 12:23:19 -0500 Subject: [PATCH 76/91] nfs: Fix getxattr kernel panic and memory overflow [ Upstream commit b4487b93545214a9db8cbf32e86411677b0cca21 ] Move the buffer size check to decode_attr_security_label() before memcpy() Only call memcpy() if the buffer is large enough Fixes: aa9c2669626c ("NFS: Client implementation of Labeled-NFS") Signed-off-by: Jeffrey Mitchell [Trond: clean up duplicate test of label->len != 0] Signed-off-by: Trond Myklebust Signed-off-by: Sasha Levin --- fs/nfs/nfs4proc.c | 2 -- fs/nfs/nfs4xdr.c | 6 +++++- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 05cb68ca1ba1..1ef75b1deffa 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -5603,8 +5603,6 @@ static int _nfs4_get_security_label(struct inode *inode, void *buf, return ret; if (!(fattr.valid & NFS_ATTR_FATTR_V4_SECURITY_LABEL)) return -ENOENT; - if (buflen < label.len) - return -ERANGE; return 0; } diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index c4cf0192d7bb..0a5cae8f8aff 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -4280,7 +4280,11 @@ static int decode_attr_security_label(struct xdr_stream *xdr, uint32_t *bitmap, goto out_overflow; if (len < NFS4_MAXLABELLEN) { if (label) { - memcpy(label->label, p, len); + if (label->len) { + if (label->len < len) + return -ERANGE; + memcpy(label->label, p, len); + } label->len = len; label->pi = pi; label->lfs = lfs; From 95fc841ea8b681eea44e766c182c18f17818acbf Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Tue, 11 Aug 2020 18:35:33 -0700 Subject: [PATCH 77/91] fs/minix: set s_maxbytes correctly [ Upstream commit 32ac86efff91a3e4ef8c3d1cadd4559e23c8e73a ] The minix filesystem leaves super_block::s_maxbytes at MAX_NON_LFS rather than setting it to the actual filesystem-specific limit. This is broken because it means userspace doesn't see the standard behavior like getting EFBIG and SIGXFSZ when exceeding the maximum file size. Fix this by setting s_maxbytes correctly. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Eric Biggers Signed-off-by: Andrew Morton Cc: Alexander Viro Cc: Qiujun Huang Link: http://lkml.kernel.org/r/20200628060846.682158-5-ebiggers@kernel.org Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin --- fs/minix/inode.c | 12 +++++++----- fs/minix/itree_v1.c | 2 +- fs/minix/itree_v2.c | 3 +-- fs/minix/minix.h | 1 - 4 files changed, 9 insertions(+), 9 deletions(-) diff --git a/fs/minix/inode.c b/fs/minix/inode.c index 4f994de46e6b..03fe8bac36cf 100644 --- a/fs/minix/inode.c +++ b/fs/minix/inode.c @@ -155,8 +155,10 @@ static int minix_remount (struct super_block * sb, int * flags, char * data) return 0; } -static bool minix_check_superblock(struct minix_sb_info *sbi) +static bool minix_check_superblock(struct super_block *sb) { + struct minix_sb_info *sbi = minix_sb(sb); + if (sbi->s_imap_blocks == 0 || sbi->s_zmap_blocks == 0) return false; @@ -166,7 +168,7 @@ static bool minix_check_superblock(struct minix_sb_info *sbi) * of indirect blocks which places the limit well above U32_MAX. */ if (sbi->s_version == MINIX_V1 && - sbi->s_max_size > (7 + 512 + 512*512) * BLOCK_SIZE) + sb->s_maxbytes > (7 + 512 + 512*512) * BLOCK_SIZE) return false; return true; @@ -207,7 +209,7 @@ static int minix_fill_super(struct super_block *s, void *data, int silent) sbi->s_zmap_blocks = ms->s_zmap_blocks; sbi->s_firstdatazone = ms->s_firstdatazone; sbi->s_log_zone_size = ms->s_log_zone_size; - sbi->s_max_size = ms->s_max_size; + s->s_maxbytes = ms->s_max_size; s->s_magic = ms->s_magic; if (s->s_magic == MINIX_SUPER_MAGIC) { sbi->s_version = MINIX_V1; @@ -238,7 +240,7 @@ static int minix_fill_super(struct super_block *s, void *data, int silent) sbi->s_zmap_blocks = m3s->s_zmap_blocks; sbi->s_firstdatazone = m3s->s_firstdatazone; sbi->s_log_zone_size = m3s->s_log_zone_size; - sbi->s_max_size = m3s->s_max_size; + s->s_maxbytes = m3s->s_max_size; sbi->s_ninodes = m3s->s_ninodes; sbi->s_nzones = m3s->s_zones; sbi->s_dirsize = 64; @@ -250,7 +252,7 @@ static int minix_fill_super(struct super_block *s, void *data, int silent) } else goto out_no_fs; - if (!minix_check_superblock(sbi)) + if (!minix_check_superblock(s)) goto out_illegal_sb; /* diff --git a/fs/minix/itree_v1.c b/fs/minix/itree_v1.c index 046cc96ee7ad..c0d418209ead 100644 --- a/fs/minix/itree_v1.c +++ b/fs/minix/itree_v1.c @@ -29,7 +29,7 @@ static int block_to_path(struct inode * inode, long block, int offsets[DEPTH]) if (block < 0) { printk("MINIX-fs: block_to_path: block %ld < 0 on dev %pg\n", block, inode->i_sb->s_bdev); - } else if (block >= (minix_sb(inode->i_sb)->s_max_size/BLOCK_SIZE)) { + } else if (block >= inode->i_sb->s_maxbytes/BLOCK_SIZE) { if (printk_ratelimit()) printk("MINIX-fs: block_to_path: " "block %ld too big on dev %pg\n", diff --git a/fs/minix/itree_v2.c b/fs/minix/itree_v2.c index f7fc7ecccccc..ee8af2f9e282 100644 --- a/fs/minix/itree_v2.c +++ b/fs/minix/itree_v2.c @@ -32,8 +32,7 @@ static int block_to_path(struct inode * inode, long block, int offsets[DEPTH]) if (block < 0) { printk("MINIX-fs: block_to_path: block %ld < 0 on dev %pg\n", block, sb->s_bdev); - } else if ((u64)block * (u64)sb->s_blocksize >= - minix_sb(sb)->s_max_size) { + } else if ((u64)block * (u64)sb->s_blocksize >= sb->s_maxbytes) { if (printk_ratelimit()) printk("MINIX-fs: block_to_path: " "block %ld too big on dev %pg\n", diff --git a/fs/minix/minix.h b/fs/minix/minix.h index df081e8afcc3..168d45d3de73 100644 --- a/fs/minix/minix.h +++ b/fs/minix/minix.h @@ -32,7 +32,6 @@ struct minix_sb_info { unsigned long s_zmap_blocks; unsigned long s_firstdatazone; unsigned long s_log_zone_size; - unsigned long s_max_size; int s_dirsize; int s_namelen; struct buffer_head ** s_imap; From c7ac366be04e49147498f0837bca5c020c3fb994 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Tue, 11 Aug 2020 18:35:36 -0700 Subject: [PATCH 78/91] fs/minix: fix block limit check for V1 filesystems [ Upstream commit 0a12c4a8069607247cb8edc3b035a664e636fd9a ] The minix filesystem reads its maximum file size from its on-disk superblock. This value isn't necessarily a multiple of the block size. When it's not, the V1 block mapping code doesn't allow mapping the last possible block. Commit 6ed6a722f9ab ("minixfs: fix block limit check") fixed this in the V2 mapping code. Fix it in the V1 mapping code too. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Eric Biggers Signed-off-by: Andrew Morton Cc: Alexander Viro Cc: Qiujun Huang Link: http://lkml.kernel.org/r/20200628060846.682158-6-ebiggers@kernel.org Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin --- fs/minix/itree_v1.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/minix/itree_v1.c b/fs/minix/itree_v1.c index c0d418209ead..405573a79aab 100644 --- a/fs/minix/itree_v1.c +++ b/fs/minix/itree_v1.c @@ -29,7 +29,7 @@ static int block_to_path(struct inode * inode, long block, int offsets[DEPTH]) if (block < 0) { printk("MINIX-fs: block_to_path: block %ld < 0 on dev %pg\n", block, inode->i_sb->s_bdev); - } else if (block >= inode->i_sb->s_maxbytes/BLOCK_SIZE) { + } else if ((u64)block * BLOCK_SIZE >= inode->i_sb->s_maxbytes) { if (printk_ratelimit()) printk("MINIX-fs: block_to_path: " "block %ld too big on dev %pg\n", From 9f3fb90d30db4969c41cc22d72c0f6ca2f29954c Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Tue, 11 Aug 2020 18:35:39 -0700 Subject: [PATCH 79/91] fs/minix: remove expected error message in block_to_path() [ Upstream commit f666f9fb9a36f1c833b9d18923572f0e4d304754 ] When truncating a file to a size within the last allowed logical block, block_to_path() is called with the *next* block. This exceeds the limit, causing the "block %ld too big" error message to be printed. This case isn't actually an error; there are just no more blocks past that point. So, remove this error message. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Eric Biggers Signed-off-by: Andrew Morton Cc: Alexander Viro Cc: Qiujun Huang Link: http://lkml.kernel.org/r/20200628060846.682158-7-ebiggers@kernel.org Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin --- fs/minix/itree_v1.c | 12 ++++++------ fs/minix/itree_v2.c | 12 ++++++------ 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/fs/minix/itree_v1.c b/fs/minix/itree_v1.c index 405573a79aab..1fed906042aa 100644 --- a/fs/minix/itree_v1.c +++ b/fs/minix/itree_v1.c @@ -29,12 +29,12 @@ static int block_to_path(struct inode * inode, long block, int offsets[DEPTH]) if (block < 0) { printk("MINIX-fs: block_to_path: block %ld < 0 on dev %pg\n", block, inode->i_sb->s_bdev); - } else if ((u64)block * BLOCK_SIZE >= inode->i_sb->s_maxbytes) { - if (printk_ratelimit()) - printk("MINIX-fs: block_to_path: " - "block %ld too big on dev %pg\n", - block, inode->i_sb->s_bdev); - } else if (block < 7) { + return 0; + } + if ((u64)block * BLOCK_SIZE >= inode->i_sb->s_maxbytes) + return 0; + + if (block < 7) { offsets[n++] = block; } else if ((block -= 7) < 512) { offsets[n++] = 7; diff --git a/fs/minix/itree_v2.c b/fs/minix/itree_v2.c index ee8af2f9e282..9d00f31a2d9d 100644 --- a/fs/minix/itree_v2.c +++ b/fs/minix/itree_v2.c @@ -32,12 +32,12 @@ static int block_to_path(struct inode * inode, long block, int offsets[DEPTH]) if (block < 0) { printk("MINIX-fs: block_to_path: block %ld < 0 on dev %pg\n", block, sb->s_bdev); - } else if ((u64)block * (u64)sb->s_blocksize >= sb->s_maxbytes) { - if (printk_ratelimit()) - printk("MINIX-fs: block_to_path: " - "block %ld too big on dev %pg\n", - block, sb->s_bdev); - } else if (block < DIRCOUNT) { + return 0; + } + if ((u64)block * (u64)sb->s_blocksize >= sb->s_maxbytes) + return 0; + + if (block < DIRCOUNT) { offsets[n++] = block; } else if ((block -= DIRCOUNT) < INDIRCOUNT(sb)) { offsets[n++] = DIRCOUNT; From aefe207d95d02a1b4711bee17cde0014d7b8223f Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Tue, 11 Aug 2020 18:35:53 -0700 Subject: [PATCH 80/91] fs/ufs: avoid potential u32 multiplication overflow [ Upstream commit 88b2e9b06381551b707d980627ad0591191f7a2d ] The 64 bit ino is being compared to the product of two u32 values, however, the multiplication is being performed using a 32 bit multiply so there is a potential of an overflow. To be fully safe, cast uspi->s_ncg to a u64 to ensure a 64 bit multiplication occurs to avoid any chance of overflow. Fixes: f3e2a520f5fb ("ufs: NFS support") Signed-off-by: Colin Ian King Signed-off-by: Andrew Morton Cc: Evgeniy Dushistov Cc: Alexey Dobriyan Link: http://lkml.kernel.org/r/20200715170355.1081713-1-colin.king@canonical.com Addresses-Coverity: ("Unintentional integer overflow") Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin --- fs/ufs/super.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/ufs/super.c b/fs/ufs/super.c index a4e07e910f1b..6e59e45d7bfb 100644 --- a/fs/ufs/super.c +++ b/fs/ufs/super.c @@ -100,7 +100,7 @@ static struct inode *ufs_nfs_get_inode(struct super_block *sb, u64 ino, u32 gene struct ufs_sb_private_info *uspi = UFS_SB(sb)->s_uspi; struct inode *inode; - if (ino < UFS_ROOTINO || ino > uspi->s_ncg * uspi->s_ipg) + if (ino < UFS_ROOTINO || ino > (u64)uspi->s_ncg * uspi->s_ipg) return ERR_PTR(-ESTALE); inode = ufs_iget(sb, ino); From 8e69ac04403c0f3d721a552d7987fd8cd1daa517 Mon Sep 17 00:00:00 2001 From: Tiezhu Yang Date: Tue, 11 Aug 2020 18:36:16 -0700 Subject: [PATCH 81/91] test_kmod: avoid potential double free in trigger_config_run_type() [ Upstream commit 0776d1231bec0c7ab43baf440a3f5ef5f49dd795 ] Reset the member "test_fs" of the test configuration after a call of the function "kfree_const" to a null pointer so that a double memory release will not be performed. Fixes: d9c6a72d6fa2 ("kmod: add test driver to stress test the module loader") Signed-off-by: Tiezhu Yang Signed-off-by: Luis Chamberlain Signed-off-by: Andrew Morton Acked-by: Luis Chamberlain Cc: Alexei Starovoitov Cc: Al Viro Cc: Christian Brauner Cc: Chuck Lever Cc: David Howells Cc: David S. Miller Cc: Greg Kroah-Hartman Cc: Jakub Kicinski Cc: James Morris Cc: Jarkko Sakkinen Cc: J. Bruce Fields Cc: Jens Axboe Cc: Josh Triplett Cc: Kees Cook Cc: Lars Ellenberg Cc: Nikolay Aleksandrov Cc: Philipp Reisner Cc: Roopa Prabhu Cc: "Serge E. Hallyn" Cc: Sergei Trofimovich Cc: Sergey Kvachonok Cc: Shuah Khan Cc: Tony Vroon Cc: Christoph Hellwig Link: http://lkml.kernel.org/r/20200610154923.27510-4-mcgrof@kernel.org Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin --- lib/test_kmod.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/test_kmod.c b/lib/test_kmod.c index 9cf77628fc91..87a0cc750ea2 100644 --- a/lib/test_kmod.c +++ b/lib/test_kmod.c @@ -745,7 +745,7 @@ static int trigger_config_run_type(struct kmod_test_device *test_dev, break; case TEST_KMOD_FS_TYPE: kfree_const(config->test_fs); - config->test_driver = NULL; + config->test_fs = NULL; copied = config_copy_test_fs(config, test_str, strlen(test_str)); break; From 59547851489e5168b7291eac5ecc5bb24a71d7f5 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Thu, 23 Jul 2020 16:02:46 +0300 Subject: [PATCH 82/91] mfd: dln2: Run event handler loop under spinlock [ Upstream commit 3d858942250820b9adc35f963a257481d6d4c81d ] The event handler loop must be run with interrupts disabled. Otherwise we will have a warning: [ 1970.785649] irq 31 handler lineevent_irq_handler+0x0/0x20 enabled interrupts [ 1970.792739] WARNING: CPU: 0 PID: 0 at kernel/irq/handle.c:159 __handle_irq_event_percpu+0x162/0x170 [ 1970.860732] RIP: 0010:__handle_irq_event_percpu+0x162/0x170 ... [ 1970.946994] Call Trace: [ 1970.949446] [ 1970.951471] handle_irq_event_percpu+0x2c/0x80 [ 1970.955921] handle_irq_event+0x23/0x43 [ 1970.959766] handle_simple_irq+0x57/0x70 [ 1970.963695] generic_handle_irq+0x42/0x50 [ 1970.967717] dln2_rx+0xc1/0x210 [dln2] [ 1970.971479] ? usb_hcd_unmap_urb_for_dma+0xa6/0x1c0 [ 1970.976362] __usb_hcd_giveback_urb+0x77/0xe0 [ 1970.980727] usb_giveback_urb_bh+0x8e/0xe0 [ 1970.984837] tasklet_action_common.isra.0+0x4a/0xe0 ... Recently xHCI driver switched to tasklets in the commit 36dc01657b49 ("usb: host: xhci: Support running urb giveback in tasklet context"). The handle_irq_event_* functions are expected to be called with interrupts disabled and they rightfully complain here because we run in tasklet context with interrupts enabled. Use a event spinlock to protect event handler from being interrupted. Note, that there are only two users of this GPIO and ADC drivers and both of them are using generic_handle_irq() which makes above happen. Fixes: 338a12814297 ("mfd: Add support for Diolan DLN-2 devices") Signed-off-by: Andy Shevchenko Signed-off-by: Lee Jones Signed-off-by: Sasha Levin --- drivers/mfd/dln2.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/mfd/dln2.c b/drivers/mfd/dln2.c index 6ea0dd37b453..fe614ba5fec9 100644 --- a/drivers/mfd/dln2.c +++ b/drivers/mfd/dln2.c @@ -290,7 +290,11 @@ static void dln2_rx(struct urb *urb) len = urb->actual_length - sizeof(struct dln2_header); if (handle == DLN2_HANDLE_EVENT) { + unsigned long flags; + + spin_lock_irqsave(&dln2->event_cb_lock, flags); dln2_run_event_callbacks(dln2, id, echo, data, len); + spin_unlock_irqrestore(&dln2->event_cb_lock, flags); } else { /* URB will be re-submitted in _dln2_transfer (free_rx_slot) */ if (dln2_transfer_complete(dln2, urb, handle, echo)) From 9a1b7ced0fd9305d25708ff7fd9d7d4fb794f7a1 Mon Sep 17 00:00:00 2001 From: Dinghao Liu Date: Thu, 13 Aug 2020 15:46:30 +0800 Subject: [PATCH 83/91] ALSA: echoaudio: Fix potential Oops in snd_echo_resume() [ Upstream commit 5a25de6df789cc805a9b8ba7ab5deef5067af47e ] Freeing chip on error may lead to an Oops at the next time the system goes to resume. Fix this by removing all snd_echo_free() calls on error. Fixes: 47b5d028fdce8 ("ALSA: Echoaudio - Add suspend support #2") Signed-off-by: Dinghao Liu Link: https://lore.kernel.org/r/20200813074632.17022-1-dinghao.liu@zju.edu.cn Signed-off-by: Takashi Iwai Signed-off-by: Sasha Levin --- sound/pci/echoaudio/echoaudio.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/sound/pci/echoaudio/echoaudio.c b/sound/pci/echoaudio/echoaudio.c index 3ef2b27ebbe8..f32c55ffffc7 100644 --- a/sound/pci/echoaudio/echoaudio.c +++ b/sound/pci/echoaudio/echoaudio.c @@ -2216,7 +2216,6 @@ static int snd_echo_resume(struct device *dev) if (err < 0) { kfree(commpage_bak); dev_err(dev, "resume init_hw err=%d\n", err); - snd_echo_free(chip); return err; } @@ -2243,7 +2242,6 @@ static int snd_echo_resume(struct device *dev) if (request_irq(pci->irq, snd_echo_interrupt, IRQF_SHARED, KBUILD_MODNAME, chip)) { dev_err(chip->card->dev, "cannot grab irq\n"); - snd_echo_free(chip); return -EBUSY; } chip->irq = pci->irq; From 3b87dc3e0ba624cc2a9df08a14d1653122d383f9 Mon Sep 17 00:00:00 2001 From: Vincent Whitchurch Date: Mon, 10 Aug 2020 15:34:04 +0200 Subject: [PATCH 84/91] perf bench mem: Always memset source before memcpy [ Upstream commit 1beaef29c34154ccdcb3f1ae557f6883eda18840 ] For memcpy, the source pages are memset to zero only when --cycles is used. This leads to wildly different results with or without --cycles, since all sources pages are likely to be mapped to the same zero page without explicit writes. Before this fix: $ export cmd="./perf stat -e LLC-loads -- ./perf bench \ mem memcpy -s 1024MB -l 100 -f default" $ $cmd 2,935,826 LLC-loads 3.821677452 seconds time elapsed $ $cmd --cycles 217,533,436 LLC-loads 8.616725985 seconds time elapsed After this fix: $ $cmd 214,459,686 LLC-loads 8.674301124 seconds time elapsed $ $cmd --cycles 214,758,651 LLC-loads 8.644480006 seconds time elapsed Fixes: 47b5757bac03c338 ("perf bench mem: Move boilerplate memory allocation to the infrastructure") Signed-off-by: Vincent Whitchurch Cc: Alexander Shishkin Cc: Jiri Olsa Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Cc: kernel@axis.com Link: http://lore.kernel.org/lkml/20200810133404.30829-1-vincent.whitchurch@axis.com Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Sasha Levin --- tools/perf/bench/mem-functions.c | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/tools/perf/bench/mem-functions.c b/tools/perf/bench/mem-functions.c index 0251dd348124..4864fc67d01b 100644 --- a/tools/perf/bench/mem-functions.c +++ b/tools/perf/bench/mem-functions.c @@ -222,12 +222,8 @@ static int bench_mem_common(int argc, const char **argv, struct bench_mem_info * return 0; } -static u64 do_memcpy_cycles(const struct function *r, size_t size, void *src, void *dst) +static void memcpy_prefault(memcpy_t fn, size_t size, void *src, void *dst) { - u64 cycle_start = 0ULL, cycle_end = 0ULL; - memcpy_t fn = r->fn.memcpy; - int i; - /* Make sure to always prefault zero pages even if MMAP_THRESH is crossed: */ memset(src, 0, size); @@ -236,6 +232,15 @@ static u64 do_memcpy_cycles(const struct function *r, size_t size, void *src, vo * to not measure page fault overhead: */ fn(dst, src, size); +} + +static u64 do_memcpy_cycles(const struct function *r, size_t size, void *src, void *dst) +{ + u64 cycle_start = 0ULL, cycle_end = 0ULL; + memcpy_t fn = r->fn.memcpy; + int i; + + memcpy_prefault(fn, size, src, dst); cycle_start = get_cycles(); for (i = 0; i < nr_loops; ++i) @@ -251,11 +256,7 @@ static double do_memcpy_gettimeofday(const struct function *r, size_t size, void memcpy_t fn = r->fn.memcpy; int i; - /* - * We prefault the freshly allocated memory range here, - * to not measure page fault overhead: - */ - fn(dst, src, size); + memcpy_prefault(fn, size, src, dst); BUG_ON(gettimeofday(&tv_start, NULL)); for (i = 0; i < nr_loops; ++i) From df8caaf9ef87cbc050bff4d9569d2c1c87d23389 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20D=C3=ADaz?= Date: Wed, 12 Aug 2020 17:15:17 -0500 Subject: [PATCH 85/91] tools build feature: Quote CC and CXX for their arguments MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit fa5c893181ed2ca2f96552f50073786d2cfce6c0 ] When using a cross-compilation environment, such as OpenEmbedded, the CC an CXX variables are set to something more than just a command: there are arguments (such as --sysroot) that need to be passed on to the compiler so that the right set of headers and libraries are used. For the particular case that our systems detected, CC is set to the following: export CC="aarch64-linaro-linux-gcc --sysroot=/oe/build/tmp/work/machine/perf/1.0-r9/recipe-sysroot" Without quotes, detection is as follows: Auto-detecting system features: ... dwarf: [ OFF ] ... dwarf_getlocations: [ OFF ] ... glibc: [ OFF ] ... gtk2: [ OFF ] ... libbfd: [ OFF ] ... libcap: [ OFF ] ... libelf: [ OFF ] ... libnuma: [ OFF ] ... numa_num_possible_cpus: [ OFF ] ... libperl: [ OFF ] ... libpython: [ OFF ] ... libcrypto: [ OFF ] ... libunwind: [ OFF ] ... libdw-dwarf-unwind: [ OFF ] ... zlib: [ OFF ] ... lzma: [ OFF ] ... get_cpuid: [ OFF ] ... bpf: [ OFF ] ... libaio: [ OFF ] ... libzstd: [ OFF ] ... disassembler-four-args: [ OFF ] Makefile.config:414: *** No gnu/libc-version.h found, please install glibc-dev[el]. Stop. Makefile.perf:230: recipe for target 'sub-make' failed make[1]: *** [sub-make] Error 2 Makefile:69: recipe for target 'all' failed make: *** [all] Error 2 With CC and CXX quoted, some of those features are now detected. Fixes: e3232c2f39ac ("tools build feature: Use CC and CXX from parent") Signed-off-by: Daniel Díaz Reviewed-by: Thomas Hebb Cc: Alexei Starovoitov Cc: Andrii Nakryiko Cc: Daniel Borkmann Cc: Jiri Olsa Cc: John Fastabend Cc: KP Singh Cc: Martin KaFai Lau Cc: Namhyung Kim Cc: Song Liu Cc: Stephane Eranian Cc: Yonghong Song Link: http://lore.kernel.org/lkml/20200812221518.2869003-1-daniel.diaz@linaro.org Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Sasha Levin --- tools/build/Makefile.feature | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/build/Makefile.feature b/tools/build/Makefile.feature index 7d9d70c0b380..7c17f17ea2cd 100644 --- a/tools/build/Makefile.feature +++ b/tools/build/Makefile.feature @@ -7,7 +7,7 @@ endif feature_check = $(eval $(feature_check_code)) define feature_check_code - feature-$(1) := $(shell $(MAKE) OUTPUT=$(OUTPUT_FEATURES) CC=$(CC) CXX=$(CXX) CFLAGS="$(EXTRA_CFLAGS) $(FEATURE_CHECK_CFLAGS-$(1))" CXXFLAGS="$(EXTRA_CXXFLAGS) $(FEATURE_CHECK_CXXFLAGS-$(1))" LDFLAGS="$(LDFLAGS) $(FEATURE_CHECK_LDFLAGS-$(1))" -C $(feature_dir) $(OUTPUT_FEATURES)test-$1.bin >/dev/null 2>/dev/null && echo 1 || echo 0) + feature-$(1) := $(shell $(MAKE) OUTPUT=$(OUTPUT_FEATURES) CC="$(CC)" CXX="$(CXX)" CFLAGS="$(EXTRA_CFLAGS) $(FEATURE_CHECK_CFLAGS-$(1))" CXXFLAGS="$(EXTRA_CXXFLAGS) $(FEATURE_CHECK_CXXFLAGS-$(1))" LDFLAGS="$(LDFLAGS) $(FEATURE_CHECK_LDFLAGS-$(1))" -C $(feature_dir) $(OUTPUT_FEATURES)test-$1.bin >/dev/null 2>/dev/null && echo 1 || echo 0) endef feature_set = $(eval $(feature_set_code)) From 014ec97717f4794a1c085aa4fdc924018c4fd999 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Fri, 14 Aug 2020 14:42:45 +0200 Subject: [PATCH 86/91] sh: landisk: Add missing initialization of sh_io_port_base [ Upstream commit 0c64a0dce51faa9c706fdf1f957d6f19878f4b81 ] The Landisk setup code maps the CF IDE area using ioremap_prot(), and passes the resulting virtual addresses to the pata_platform driver, disguising them as I/O port addresses. Hence the pata_platform driver translates them again using ioport_map(). As CONFIG_GENERIC_IOMAP=n, and CONFIG_HAS_IOPORT_MAP=y, the SuperH-specific mapping code in arch/sh/kernel/ioport.c translates I/O port addresses to virtual addresses by adding sh_io_port_base, which defaults to -1, thus breaking the assumption of an identity mapping. Fix this by setting sh_io_port_base to zero. Fixes: 37b7a97884ba64bf ("sh: machvec IO death.") Signed-off-by: Geert Uytterhoeven Signed-off-by: Rich Felker Signed-off-by: Sasha Levin --- arch/sh/boards/mach-landisk/setup.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/sh/boards/mach-landisk/setup.c b/arch/sh/boards/mach-landisk/setup.c index f1147caebacf..af69fb7fef7c 100644 --- a/arch/sh/boards/mach-landisk/setup.c +++ b/arch/sh/boards/mach-landisk/setup.c @@ -85,6 +85,9 @@ device_initcall(landisk_devices_setup); static void __init landisk_setup(char **cmdline_p) { + /* I/O port identity mapping */ + __set_io_port_base(0); + /* LED ON */ __raw_writeb(__raw_readb(PA_LED) | 0x03, PA_LED); From 2406c45db3de8da8052eccb1dcfde69ea5988b19 Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Thu, 6 Aug 2020 23:26:22 -0700 Subject: [PATCH 87/91] khugepaged: retract_page_tables() remember to test exit commit 18e77600f7a1ed69f8ce46c9e11cad0985712dfa upstream. Only once have I seen this scenario (and forgot even to notice what forced the eventual crash): a sequence of "BUG: Bad page map" alerts from vm_normal_page(), from zap_pte_range() servicing exit_mmap(); pmd:00000000, pte values corresponding to data in physical page 0. The pte mappings being zapped in this case were supposed to be from a huge page of ext4 text (but could as well have been shmem): my belief is that it was racing with collapse_file()'s retract_page_tables(), found *pmd pointing to a page table, locked it, but *pmd had become 0 by the time start_pte was decided. In most cases, that possibility is excluded by holding mmap lock; but exit_mmap() proceeds without mmap lock. Most of what's run by khugepaged checks khugepaged_test_exit() after acquiring mmap lock: khugepaged_collapse_pte_mapped_thps() and hugepage_vma_revalidate() do so, for example. But retract_page_tables() did not: fix that. The fix is for retract_page_tables() to check khugepaged_test_exit(), after acquiring mmap lock, before doing anything to the page table. Getting the mmap lock serializes with __mmput(), which briefly takes and drops it in __khugepaged_exit(); then the khugepaged_test_exit() check on mm_users makes sure we don't touch the page table once exit_mmap() might reach it, since exit_mmap() will be proceeding without mmap lock, not expecting anyone to be racing with it. Fixes: f3f0e1d2150b ("khugepaged: add support of collapse for tmpfs/shmem pages") Signed-off-by: Hugh Dickins Signed-off-by: Andrew Morton Acked-by: Kirill A. Shutemov Cc: Andrea Arcangeli Cc: Mike Kravetz Cc: Song Liu Cc: [4.8+] Link: http://lkml.kernel.org/r/alpine.LSU.2.11.2008021215400.27773@eggly.anvils Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- mm/khugepaged.c | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/mm/khugepaged.c b/mm/khugepaged.c index ecefdba4b0dd..483c4573695a 100644 --- a/mm/khugepaged.c +++ b/mm/khugepaged.c @@ -1251,6 +1251,7 @@ static void collect_mm_slot(struct mm_slot *mm_slot) static void retract_page_tables(struct address_space *mapping, pgoff_t pgoff) { struct vm_area_struct *vma; + struct mm_struct *mm; unsigned long addr; pmd_t *pmd, _pmd; @@ -1264,7 +1265,8 @@ static void retract_page_tables(struct address_space *mapping, pgoff_t pgoff) continue; if (vma->vm_end < addr + HPAGE_PMD_SIZE) continue; - pmd = mm_find_pmd(vma->vm_mm, addr); + mm = vma->vm_mm; + pmd = mm_find_pmd(mm, addr); if (!pmd) continue; /* @@ -1273,14 +1275,16 @@ static void retract_page_tables(struct address_space *mapping, pgoff_t pgoff) * re-fault. Not ideal, but it's more important to not disturb * the system too much. */ - if (down_write_trylock(&vma->vm_mm->mmap_sem)) { - spinlock_t *ptl = pmd_lock(vma->vm_mm, pmd); - /* assume page table is clear */ - _pmd = pmdp_collapse_flush(vma, addr, pmd); - spin_unlock(ptl); - up_write(&vma->vm_mm->mmap_sem); - mm_dec_nr_ptes(vma->vm_mm); - pte_free(vma->vm_mm, pmd_pgtable(_pmd)); + if (down_write_trylock(&mm->mmap_sem)) { + if (!khugepaged_test_exit(mm)) { + spinlock_t *ptl = pmd_lock(mm, pmd); + /* assume page table is clear */ + _pmd = pmdp_collapse_flush(vma, addr, pmd); + spin_unlock(ptl); + mm_dec_nr_ptes(mm); + pte_free(mm, pmd_pgtable(_pmd)); + } + up_write(&mm->mmap_sem); } } i_mmap_unlock_write(mapping); From c4368ab3ef78880ea27f5400e84e424184e8cee6 Mon Sep 17 00:00:00 2001 From: Tomasz Maciej Nowak Date: Thu, 27 Feb 2020 17:52:32 +0100 Subject: [PATCH 88/91] arm64: dts: marvell: espressobin: add ethernet alias MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 5253cb8c00a6f4356760efb38bca0e0393aa06de upstream. The maker of this board and its variants, stores MAC address in U-Boot environment. Add alias for bootloader to recognise, to which ethernet node inject the factory MAC address. Signed-off-by: Tomasz Maciej Nowak Signed-off-by: Gregory CLEMENT [pali: Backported to 5.4 and older versions] Signed-off-by: Pali Rohár Signed-off-by: Greg Kroah-Hartman --- arch/arm64/boot/dts/marvell/armada-3720-espressobin.dts | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/arch/arm64/boot/dts/marvell/armada-3720-espressobin.dts b/arch/arm64/boot/dts/marvell/armada-3720-espressobin.dts index 3ab25ad402b9..6cbdd66921aa 100644 --- a/arch/arm64/boot/dts/marvell/armada-3720-espressobin.dts +++ b/arch/arm64/boot/dts/marvell/armada-3720-espressobin.dts @@ -19,6 +19,12 @@ / { model = "Globalscale Marvell ESPRESSOBin Board"; compatible = "globalscale,espressobin", "marvell,armada3720", "marvell,armada3710"; + aliases { + ethernet0 = ð0; + serial0 = &uart0; + serial1 = &uart1; + }; + chosen { stdout-path = "serial0:115200n8"; }; From bfdd8596994870126a62e673b82e6208f96727cd Mon Sep 17 00:00:00 2001 From: Marius Iacob Date: Sat, 1 Aug 2020 15:34:46 +0300 Subject: [PATCH 89/91] drm: Added orientation quirk for ASUS tablet model T103HAF commit b5ac98cbb8e5e30c34ebc837d1e5a3982d2b5f5c upstream. Signed-off-by: Marius Iacob Cc: stable@vger.kernel.org Signed-off-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20200801123445.1514567-1-themariusus@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/drm_panel_orientation_quirks.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/gpu/drm/drm_panel_orientation_quirks.c b/drivers/gpu/drm/drm_panel_orientation_quirks.c index fa5c25d36d3d..652de972c3ae 100644 --- a/drivers/gpu/drm/drm_panel_orientation_quirks.c +++ b/drivers/gpu/drm/drm_panel_orientation_quirks.c @@ -107,6 +107,12 @@ static const struct dmi_system_id orientation_data[] = { DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "T101HA"), }, .driver_data = (void *)&lcd800x1280_rightside_up, + }, { /* Asus T103HAF */ + .matches = { + DMI_EXACT_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."), + DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "T103HAF"), + }, + .driver_data = (void *)&lcd800x1280_rightside_up, }, { /* GPD MicroPC (generic strings, also match on bios date) */ .matches = { DMI_EXACT_MATCH(DMI_SYS_VENDOR, "Default string"), From 2c1684171564a7e91afce2897469c81bf7d6085a Mon Sep 17 00:00:00 2001 From: Sandeep Raghuraman Date: Thu, 6 Aug 2020 22:52:20 +0530 Subject: [PATCH 90/91] drm/amdgpu: Fix bug where DPM is not enabled after hibernate and resume commit f87812284172a9809820d10143b573d833cd3f75 upstream. Reproducing bug report here: After hibernating and resuming, DPM is not enabled. This remains the case even if you test hibernate using the steps here: https://www.kernel.org/doc/html/latest/power/basic-pm-debugging.html I debugged the problem, and figured out that in the file hardwaremanager.c, in the function, phm_enable_dynamic_state_management(), the check 'if (!hwmgr->pp_one_vf && smum_is_dpm_running(hwmgr) && !amdgpu_passthrough(adev) && adev->in_suspend)' returns true for the hibernate case, and false for the suspend case. This means that for the hibernate case, the AMDGPU driver doesn't enable DPM (even though it should) and simply returns from that function. In the suspend case, it goes ahead and enables DPM, even though it doesn't need to. I debugged further, and found out that in the case of suspend, for the CIK/Hawaii GPUs, smum_is_dpm_running(hwmgr) returns false, while in the case of hibernate, smum_is_dpm_running(hwmgr) returns true. For CIK, the ci_is_dpm_running() function calls the ci_is_smc_ram_running() function, which is ultimately used to determine if DPM is currently enabled or not, and this seems to provide the wrong answer. I've changed the ci_is_dpm_running() function to instead use the same method that some other AMD GPU chips do (e.g Fiji), which seems to read the voltage controller. I've tested on my R9 390 and it seems to work correctly for both suspend and hibernate use cases, and has been stable so far. Bug: https://bugzilla.kernel.org/show_bug.cgi?id=208839 Signed-off-by: Sandeep Raghuraman Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/powerplay/smumgr/ci_smumgr.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/ci_smumgr.c b/drivers/gpu/drm/amd/powerplay/smumgr/ci_smumgr.c index 083aa71487e8..db87cb8930d2 100644 --- a/drivers/gpu/drm/amd/powerplay/smumgr/ci_smumgr.c +++ b/drivers/gpu/drm/amd/powerplay/smumgr/ci_smumgr.c @@ -2723,7 +2723,10 @@ static int ci_initialize_mc_reg_table(struct pp_hwmgr *hwmgr) static bool ci_is_dpm_running(struct pp_hwmgr *hwmgr) { - return ci_is_smc_ram_running(hwmgr); + return (1 == PHM_READ_INDIRECT_FIELD(hwmgr->device, + CGS_IND_REG__SMC, FEATURE_STATUS, + VOLTAGE_CONTROLLER_ON)) + ? true : false; } static int ci_smu_init(struct pp_hwmgr *hwmgr) From d18b78abc0c6e7d3119367c931c583e02d466495 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Fri, 21 Aug 2020 11:05:39 +0200 Subject: [PATCH 91/91] Linux 4.19.141 Tested-by: Guenter Roeck Tested-by: Shuah Khan Signed-off-by: Greg Kroah-Hartman --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index c5ee1c10a39c..5b64e1141984 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 VERSION = 4 PATCHLEVEL = 19 -SUBLEVEL = 140 +SUBLEVEL = 141 EXTRAVERSION = NAME = "People's Front"