From 08f0a15ee8adb4846b08ca5d5c175fbf0f652bc9 Mon Sep 17 00:00:00 2001 From: Mika Westerberg Date: Tue, 31 Jan 2023 11:24:03 +0200 Subject: [PATCH 1/4] PCI: Align extra resources for hotplug bridges properly After division the extra resource space per hotplug bridge may not be aligned according to the window alignment, so align it before passing it down for further distribution. Link: https://lore.kernel.org/r/20230131092405.29121-2-mika.westerberg@linux.intel.com Signed-off-by: Mika Westerberg Signed-off-by: Bjorn Helgaas --- drivers/pci/setup-bus.c | 25 +++++++++++++++++++------ 1 file changed, 19 insertions(+), 6 deletions(-) diff --git a/drivers/pci/setup-bus.c b/drivers/pci/setup-bus.c index b4096598dbcb..e440f264accb 100644 --- a/drivers/pci/setup-bus.c +++ b/drivers/pci/setup-bus.c @@ -1891,6 +1891,7 @@ static void pci_bus_distribute_available_resources(struct pci_bus *bus, * resource space between hotplug bridges. */ for_each_pci_bridge(dev, bus) { + struct resource *res; struct pci_bus *b; b = dev->subordinate; @@ -1902,16 +1903,28 @@ static void pci_bus_distribute_available_resources(struct pci_bus *bus, * hotplug-capable downstream ports taking alignment into * account. */ - io.end = io.start + io_per_hp - 1; - mmio.end = mmio.start + mmio_per_hp - 1; - mmio_pref.end = mmio_pref.start + mmio_pref_per_hp - 1; + res = &dev->resource[PCI_BRIDGE_IO_WINDOW]; + align = pci_resource_alignment(dev, res); + io.end = align ? io.start + ALIGN_DOWN(io_per_hp, align) - 1 + : io.start + io_per_hp - 1; + + res = &dev->resource[PCI_BRIDGE_MEM_WINDOW]; + align = pci_resource_alignment(dev, res); + mmio.end = align ? mmio.start + ALIGN_DOWN(mmio_per_hp, align) - 1 + : mmio.start + mmio_per_hp - 1; + + res = &dev->resource[PCI_BRIDGE_PREF_MEM_WINDOW]; + align = pci_resource_alignment(dev, res); + mmio_pref.end = align ? mmio_pref.start + + ALIGN_DOWN(mmio_pref_per_hp, align) - 1 + : mmio_pref.start + mmio_pref_per_hp - 1; pci_bus_distribute_available_resources(b, add_list, io, mmio, mmio_pref); - io.start += io_per_hp; - mmio.start += mmio_per_hp; - mmio_pref.start += mmio_pref_per_hp; + io.start += io.end + 1; + mmio.start += mmio.end + 1; + mmio_pref.start += mmio_pref.end + 1; } } From 9db0b9b6a14249ef65a5f1e5e3b37762af96f425 Mon Sep 17 00:00:00 2001 From: Mika Westerberg Date: Tue, 31 Jan 2023 11:24:04 +0200 Subject: [PATCH 2/4] PCI: Take other bus devices into account when distributing resources A PCI bridge may reside on a bus with other devices as well. The resource distribution code does not take this into account and therefore it expands the bridge resource windows too much, not leaving space for the other devices (or functions of a multifunction device). This leads to an issue that Jonathan reported when running QEMU with the following topology (QEMU parameters): -device pcie-root-port,port=0,id=root_port13,chassis=0,slot=2 \ -device x3130-upstream,id=sw1,bus=root_port13,multifunction=on \ -device e1000,bus=root_port13,addr=0.1 \ -device xio3130-downstream,id=fun1,bus=sw1,chassis=0,slot=3 \ -device e1000,bus=fun1 The first e1000 NIC here is another function in the switch upstream port. This leads to following errors: pci 0000:00:04.0: bridge window [mem 0x10200000-0x103fffff] to [bus 02-04] pci 0000:02:00.0: bridge window [mem 0x10200000-0x103fffff] to [bus 03-04] pci 0000:02:00.1: BAR 0: failed to assign [mem size 0x00020000] e1000 0000:02:00.1: can't ioremap BAR 0: [??? 0x00000000 flags 0x0] Fix this by taking into account bridge windows, device BARs and SR-IOV PF BARs on the bus (PF BARs include space for VF BARS so only account PF BARs), including the ones belonging to bridges themselves if it has any. Link: https://lore.kernel.org/linux-pci/20221014124553.0000696f@huawei.com/ Link: https://lore.kernel.org/linux-pci/6053736d-1923-41e7-def9-7585ce1772d9@ixsystems.com/ Link: https://lore.kernel.org/r/20230131092405.29121-3-mika.westerberg@linux.intel.com Reported-by: Jonathan Cameron Reported-by: Alexander Motin Signed-off-by: Mika Westerberg Signed-off-by: Bjorn Helgaas --- drivers/pci/setup-bus.c | 178 ++++++++++++++++++++++++---------------- 1 file changed, 107 insertions(+), 71 deletions(-) diff --git a/drivers/pci/setup-bus.c b/drivers/pci/setup-bus.c index e440f264accb..b7b8dddb7772 100644 --- a/drivers/pci/setup-bus.c +++ b/drivers/pci/setup-bus.c @@ -1765,12 +1765,67 @@ static void adjust_bridge_window(struct pci_dev *bridge, struct resource *res, add_size = size - new_size; pci_dbg(bridge, "bridge window %pR shrunken by %pa\n", res, &add_size); + } else { + return; } res->end = res->start + new_size - 1; remove_from_list(add_list, res); } +static void remove_dev_resource(struct resource *avail, struct pci_dev *dev, + struct resource *res) +{ + resource_size_t size, align, tmp; + + size = resource_size(res); + if (!size) + return; + + align = pci_resource_alignment(dev, res); + align = align ? ALIGN(avail->start, align) - avail->start : 0; + tmp = align + size; + avail->start = min(avail->start + tmp, avail->end + 1); +} + +static void remove_dev_resources(struct pci_dev *dev, struct resource *io, + struct resource *mmio, + struct resource *mmio_pref) +{ + int i; + + for (i = 0; i < PCI_NUM_RESOURCES; i++) { + struct resource *res = &dev->resource[i]; + + if (resource_type(res) == IORESOURCE_IO) { + remove_dev_resource(io, dev, res); + } else if (resource_type(res) == IORESOURCE_MEM) { + + /* + * Make sure prefetchable memory is reduced from + * the correct resource. Specifically we put 32-bit + * prefetchable memory in non-prefetchable window + * if there is an 64-bit pretchable window. + * + * See comments in __pci_bus_size_bridges() for + * more information. + */ + if ((res->flags & IORESOURCE_PREFETCH) && + ((res->flags & IORESOURCE_MEM_64) == + (mmio_pref->flags & IORESOURCE_MEM_64))) + remove_dev_resource(mmio_pref, dev, res); + else + remove_dev_resource(mmio, dev, res); + } + } +} + +/* + * io, mmio and mmio_pref contain the total amount of bridge window space + * available. This includes the minimal space needed to cover all the + * existing devices on the bus and the possible extra space that can be + * shared with the bridges. + */ static void pci_bus_distribute_available_resources(struct pci_bus *bus, struct list_head *add_list, struct resource io, @@ -1780,7 +1835,7 @@ static void pci_bus_distribute_available_resources(struct pci_bus *bus, unsigned int normal_bridges = 0, hotplug_bridges = 0; struct resource *io_res, *mmio_res, *mmio_pref_res; struct pci_dev *dev, *bridge = bus->self; - resource_size_t io_per_hp, mmio_per_hp, mmio_pref_per_hp, align; + resource_size_t io_per_b, mmio_per_b, mmio_pref_per_b, align; io_res = &bridge->resource[PCI_BRIDGE_IO_WINDOW]; mmio_res = &bridge->resource[PCI_BRIDGE_MEM_WINDOW]; @@ -1824,100 +1879,81 @@ static void pci_bus_distribute_available_resources(struct pci_bus *bus, normal_bridges++; } - /* - * There is only one bridge on the bus so it gets all available - * resources which it can then distribute to the possible hotplug - * bridges below. - */ - if (hotplug_bridges + normal_bridges == 1) { - dev = list_first_entry(&bus->devices, struct pci_dev, bus_list); - if (dev->subordinate) - pci_bus_distribute_available_resources(dev->subordinate, - add_list, io, mmio, mmio_pref); - return; - } - - if (hotplug_bridges == 0) + if (!(hotplug_bridges + normal_bridges)) return; /* - * Calculate the total amount of extra resource space we can - * pass to bridges below this one. This is basically the - * extra space reduced by the minimal required space for the - * non-hotplug bridges. + * Calculate the amount of space we can forward from "bus" to any + * downstream buses, i.e., the space left over after assigning the + * BARs and windows on "bus". */ - for_each_pci_bridge(dev, bus) { - resource_size_t used_size; - struct resource *res; - - if (dev->is_hotplug_bridge) - continue; - - /* - * Reduce the available resource space by what the - * bridge and devices below it occupy. - */ - res = &dev->resource[PCI_BRIDGE_IO_WINDOW]; - align = pci_resource_alignment(dev, res); - align = align ? ALIGN(io.start, align) - io.start : 0; - used_size = align + resource_size(res); - if (!res->parent) - io.start = min(io.start + used_size, io.end + 1); - - res = &dev->resource[PCI_BRIDGE_MEM_WINDOW]; - align = pci_resource_alignment(dev, res); - align = align ? ALIGN(mmio.start, align) - mmio.start : 0; - used_size = align + resource_size(res); - if (!res->parent) - mmio.start = min(mmio.start + used_size, mmio.end + 1); - - res = &dev->resource[PCI_BRIDGE_PREF_MEM_WINDOW]; - align = pci_resource_alignment(dev, res); - align = align ? ALIGN(mmio_pref.start, align) - - mmio_pref.start : 0; - used_size = align + resource_size(res); - if (!res->parent) - mmio_pref.start = min(mmio_pref.start + used_size, - mmio_pref.end + 1); + list_for_each_entry(dev, &bus->devices, bus_list) { + if (!dev->is_virtfn) + remove_dev_resources(dev, &io, &mmio, &mmio_pref); } - io_per_hp = div64_ul(resource_size(&io), hotplug_bridges); - mmio_per_hp = div64_ul(resource_size(&mmio), hotplug_bridges); - mmio_pref_per_hp = div64_ul(resource_size(&mmio_pref), - hotplug_bridges); - /* - * Go over devices on this bus and distribute the remaining - * resource space between hotplug bridges. + * If there is at least one hotplug bridge on this bus it gets all + * the extra resource space that was left after the reductions + * above. + * + * If there are no hotplug bridges the extra resource space is + * split between non-hotplug bridges. This is to allow possible + * hotplug bridges below them to get the extra space as well. */ + if (hotplug_bridges) { + io_per_b = div64_ul(resource_size(&io), hotplug_bridges); + mmio_per_b = div64_ul(resource_size(&mmio), hotplug_bridges); + mmio_pref_per_b = div64_ul(resource_size(&mmio_pref), + hotplug_bridges); + } else { + io_per_b = div64_ul(resource_size(&io), normal_bridges); + mmio_per_b = div64_ul(resource_size(&mmio), normal_bridges); + mmio_pref_per_b = div64_ul(resource_size(&mmio_pref), + normal_bridges); + } + for_each_pci_bridge(dev, bus) { struct resource *res; struct pci_bus *b; b = dev->subordinate; - if (!b || !dev->is_hotplug_bridge) + if (!b) + continue; + if (hotplug_bridges && !dev->is_hotplug_bridge) continue; - /* - * Distribute available extra resources equally between - * hotplug-capable downstream ports taking alignment into - * account. - */ res = &dev->resource[PCI_BRIDGE_IO_WINDOW]; + + /* + * Make sure the split resource space is properly aligned + * for bridge windows (align it down to avoid going above + * what is available). + */ align = pci_resource_alignment(dev, res); - io.end = align ? io.start + ALIGN_DOWN(io_per_hp, align) - 1 - : io.start + io_per_hp - 1; + io.end = align ? io.start + ALIGN_DOWN(io_per_b, align) - 1 + : io.start + io_per_b - 1; + + /* + * The x_per_b holds the extra resource space that can be + * added for each bridge but there is the minimal already + * reserved as well so adjust x.start down accordingly to + * cover the whole space. + */ + io.start -= resource_size(res); res = &dev->resource[PCI_BRIDGE_MEM_WINDOW]; align = pci_resource_alignment(dev, res); - mmio.end = align ? mmio.start + ALIGN_DOWN(mmio_per_hp, align) - 1 - : mmio.start + mmio_per_hp - 1; + mmio.end = align ? mmio.start + ALIGN_DOWN(mmio_per_b, align) - 1 + : mmio.start + mmio_per_b - 1; + mmio.start -= resource_size(res); res = &dev->resource[PCI_BRIDGE_PREF_MEM_WINDOW]; align = pci_resource_alignment(dev, res); mmio_pref.end = align ? mmio_pref.start + - ALIGN_DOWN(mmio_pref_per_hp, align) - 1 - : mmio_pref.start + mmio_pref_per_hp - 1; + ALIGN_DOWN(mmio_pref_per_b, align) - 1 + : mmio_pref.start + mmio_pref_per_b - 1; + mmio_pref.start -= resource_size(res); pci_bus_distribute_available_resources(b, add_list, io, mmio, mmio_pref); From 7180c1d08639f28e63110ad35815f7a1785b8a19 Mon Sep 17 00:00:00 2001 From: Mika Westerberg Date: Tue, 31 Jan 2023 11:24:05 +0200 Subject: [PATCH 3/4] PCI: Distribute available resources for root buses, too Previously we distributed spare resources only upon hot-add, so if the initial root bus scan found devices that had not been fully configured by the BIOS, we allocated only enough resources to cover what was then present. If some of those devices were hotplug bridges, we did not leave any additional resource space for future expansion. Distribute the available resources for root buses, too, to make this work the same way as the normal hotplug case. A previous commit to do this was reverted due to a regression reported by Jonathan Cameron: e96e27fc6f79 ("PCI: Distribute available resources for root buses, too") 5632e2beaf9d ("Revert "PCI: Distribute available resources for root buses, too"") This commit changes pci_bridge_resources_not_assigned() to work with bridges that do not have all the resource windows programmed by the boot firmware (previously we expected all I/O, memory and prefetchable memory were programmed). Link: https://bugzilla.kernel.org/show_bug.cgi?id=216000 Link: https://lore.kernel.org/r/20220905080232.36087-5-mika.westerberg@linux.intel.com Link: https://lore.kernel.org/r/20230131092405.29121-4-mika.westerberg@linux.intel.com Reported-by: Chris Chiu Signed-off-by: Mika Westerberg Signed-off-by: Bjorn Helgaas --- drivers/pci/setup-bus.c | 57 ++++++++++++++++++++++++++++++++++++++++- 1 file changed, 56 insertions(+), 1 deletion(-) diff --git a/drivers/pci/setup-bus.c b/drivers/pci/setup-bus.c index b7b8dddb7772..c690572b10ce 100644 --- a/drivers/pci/setup-bus.c +++ b/drivers/pci/setup-bus.c @@ -1770,7 +1770,10 @@ static void adjust_bridge_window(struct pci_dev *bridge, struct resource *res, } res->end = res->start + new_size - 1; - remove_from_list(add_list, res); + + /* If the resource is part of the add_list, remove it now */ + if (add_list) + remove_from_list(add_list, res); } static void remove_dev_resource(struct resource *avail, struct pci_dev *dev, @@ -1972,6 +1975,8 @@ static void pci_bridge_distribute_available_resources(struct pci_dev *bridge, if (!bridge->is_hotplug_bridge) return; + pci_dbg(bridge, "distributing available resources\n"); + /* Take the initial extra resources from the hotplug port */ available_io = bridge->resource[PCI_BRIDGE_IO_WINDOW]; available_mmio = bridge->resource[PCI_BRIDGE_MEM_WINDOW]; @@ -1983,6 +1988,54 @@ static void pci_bridge_distribute_available_resources(struct pci_dev *bridge, available_mmio_pref); } +static bool pci_bridge_resources_not_assigned(struct pci_dev *dev) +{ + const struct resource *r; + + /* + * If the child device's resources are not yet assigned it means we + * are configuring them (not the boot firmware), so we should be + * able to extend the upstream bridge resources in the same way we + * do with the normal hotplug case. + */ + r = &dev->resource[PCI_BRIDGE_IO_WINDOW]; + if (r->flags && !(r->flags & IORESOURCE_STARTALIGN)) + return false; + r = &dev->resource[PCI_BRIDGE_MEM_WINDOW]; + if (r->flags && !(r->flags & IORESOURCE_STARTALIGN)) + return false; + r = &dev->resource[PCI_BRIDGE_PREF_MEM_WINDOW]; + if (r->flags && !(r->flags & IORESOURCE_STARTALIGN)) + return false; + + return true; +} + +static void +pci_root_bus_distribute_available_resources(struct pci_bus *bus, + struct list_head *add_list) +{ + struct pci_dev *dev, *bridge = bus->self; + + for_each_pci_bridge(dev, bus) { + struct pci_bus *b; + + b = dev->subordinate; + if (!b) + continue; + + /* + * Need to check "bridge" here too because it is NULL + * in case of root bus. + */ + if (bridge && pci_bridge_resources_not_assigned(dev)) + pci_bridge_distribute_available_resources(bridge, + add_list); + else + pci_root_bus_distribute_available_resources(b, add_list); + } +} + /* * First try will not touch PCI bridge res. * Second and later try will clear small leaf bridge res. @@ -2022,6 +2075,8 @@ void pci_assign_unassigned_root_bus_resources(struct pci_bus *bus) */ __pci_bus_size_bridges(bus, add_list); + pci_root_bus_distribute_available_resources(bus, add_list); + /* Depth last, allocate resources and update the hardware. */ __pci_bus_assign_resources(bus, add_list, &fail_head); if (add_list) From 9d8ba74a181b1c81def21168795ed96cbe6f05ed Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Fri, 10 Feb 2023 14:46:39 +0100 Subject: [PATCH 4/4] PCI: Fix dropping valid root bus resources with .end = zero On r8a7791/koelsch: kmemleak: 1 new suspected memory leaks (see /sys/kernel/debug/kmemleak) # cat /sys/kernel/debug/kmemleak unreferenced object 0xc3a34e00 (size 64): comm "swapper/0", pid 1, jiffies 4294937460 (age 199.080s) hex dump (first 32 bytes): b4 5d 81 f0 b4 5d 81 f0 c0 b0 a2 c3 00 00 00 00 .]...].......... 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ backtrace: [] __kmalloc+0xf0/0x140 [<34bd6bc0>] resource_list_create_entry+0x18/0x38 [<767046bc>] pci_add_resource_offset+0x20/0x68 [] devm_of_pci_get_host_bridge_resources.constprop.0+0xb0/0x390 When coalescing two resources for a contiguous aperture, the second resource is enlarged to cover the full contiguous range, while the first resource is marked invalid. This invalidation is done by clearing the flags, start, and end members. When adding the initial resources to the bus later, invalid resources are skipped. Unfortunately, the check for an invalid resource considers only the end member, causing false positives. E.g. on r8a7791/koelsch, root bus resource 0 ("bus 00") is skipped, and no longer registered with pci_bus_insert_busn_res() (causing the memory leak), nor printed: pci-rcar-gen2 ee090000.pci: host bridge /soc/pci@ee090000 ranges: pci-rcar-gen2 ee090000.pci: MEM 0x00ee080000..0x00ee08ffff -> 0x00ee080000 pci-rcar-gen2 ee090000.pci: PCI: revision 11 pci-rcar-gen2 ee090000.pci: PCI host bridge to bus 0000:00 -pci_bus 0000:00: root bus resource [bus 00] pci_bus 0000:00: root bus resource [mem 0xee080000-0xee08ffff] Fix this by only skipping resources where all of the flags, start, and end members are zero. Fixes: 7c3855c423b17f6c ("PCI: Coalesce host bridge contiguous apertures") Link: https://lore.kernel.org/r/da0fcd5e86c74239be79c7cb03651c0fce31b515.1676036673.git.geert+renesas@glider.be Tested-by: Niklas Schnelle Signed-off-by: Geert Uytterhoeven Signed-off-by: Bjorn Helgaas Acked-by: Kai-Heng Feng --- drivers/pci/probe.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c index 1779582fb500..598858482548 100644 --- a/drivers/pci/probe.c +++ b/drivers/pci/probe.c @@ -996,7 +996,7 @@ static int pci_register_host_bridge(struct pci_host_bridge *bridge) resource_list_for_each_entry_safe(window, n, &resources) { offset = window->offset; res = window->res; - if (!res->end) + if (!res->flags && !res->start && !res->end) continue; list_move_tail(&window->node, &bridge->windows);