mirror of
https://github.com/torvalds/linux.git
synced 2026-05-12 16:18:45 +02:00
Bernd reports passthrough failure of a Digital Devices Cine S2 V6 DVB adapter plugged into an ASRock X570S PG Riptide board with BIOS version P5.41 (09/07/2023): ddbridge 0000:05:00.0: detected Digital Devices Cine S2 V6 DVB adapter ddbridge 0000:05:00.0: cannot read registers ddbridge 0000:05:00.0: fail BIOS assigns an incorrect BAR to the DVB adapter which doesn't fit into the upstream bridge window. The kernel corrects the BAR assignment: pci 0000:07:00.0: BAR 0 [mem 0xfffffffffc500000-0xfffffffffc50ffff 64bit]: can't claim; no compatible bridge window pci 0000:07:00.0: BAR 0 [mem 0xfc500000-0xfc50ffff 64bit]: assigned Correction of the BAR assignment happens in an x86-specific fs_initcall, pcibios_assign_resources(), after device enumeration in a subsys_initcall. This order was introduced at the behest of Linus in 2004: https://git.kernel.org/tglx/history/c/a06a30144bbc No other architecture performs such a late BAR correction. Bernd bisected the issue to commita2f1e22390("PCI/ERR: Ensure error recoverability at all times"), but it only occurs in the absence of commit4d4c10f763("PCI: Explicitly put devices into D0 when initializing"). This combination exists in stable kernel v6.12.70, but not in mainline, hence Bernd cannot reproduce the issue with mainline. Sincea2f1e22390, config space is saved on enumeration, prior to BAR correction. Upon passthrough, the corrected BAR is overwritten with the incorrect saved value by: vfio_pci_core_register_device() vfio_pci_set_power_state() pci_restore_state() But only if the device's current_state is PCI_UNKNOWN, as it was prior to commit4d4c10f763. Since the commit, it is PCI_D0, which changes the behavior of vfio_pci_set_power_state() to no longer restore the state without saving it first. Alexandre is reporting the same issue as Bernd, but in his case, mainline is affected as well. The difference is that on Alexandre's system, the host kernel binds a driver to the device which is unbound prior to passthrough, whereas on Bernd's system no driver gets bound by the host kernel. Unbinding sets current_state to PCI_UNKNOWN in pci_device_remove(), so when vfio-pci is subsequently bound to the device, pci_restore_state() is once again called without invoking pci_save_state() first. To robustly fix the issue, always update saved_config_space upon resource assignment. Reported-by: Bernd Schumacher <bernd@bschu.de> Closes: https://lore.kernel.org/r/acfZrlP0Ua_5D3U4@eldamar.lan/ Reported-by: Alexandre N. <an.tech@mailo.com> Closes: https://lore.kernel.org/r/dd3c3358-de0f-4a56-9c81-04aceaab4058@mailo.com/ Fixes:a2f1e22390("PCI/ERR: Ensure error recoverability at all times") Signed-off-by: Lukas Wunner <lukas@wunner.de> Signed-off-by: Bjorn Helgaas <bhelgaas@google.com> Tested-by: Bernd Schumacher <bernd@bschu.de> Tested-by: Alexandre N. <an.tech@mailo.com> Cc: stable@vger.kernel.org # v6.12+ Link: https://patch.msgid.link/febc3f354e0c1f5a9f5b3ee9ffddaa44caccf651.1776268054.git.lukas@wunner.de
523 lines
14 KiB
C
523 lines
14 KiB
C
// SPDX-License-Identifier: GPL-2.0
|
|
/*
|
|
* Support routines for initializing a PCI subsystem
|
|
*
|
|
* Extruded from code written by
|
|
* Dave Rusling (david.rusling@reo.mts.dec.com)
|
|
* David Mosberger (davidm@cs.arizona.edu)
|
|
* David Miller (davem@redhat.com)
|
|
*
|
|
* Fixed for multiple PCI buses, 1999 Andrea Arcangeli <andrea@suse.de>
|
|
*
|
|
* Nov 2000, Ivan Kokshaysky <ink@jurassic.park.msu.ru>
|
|
* Resource sorting
|
|
*/
|
|
|
|
#include <linux/kernel.h>
|
|
#include <linux/export.h>
|
|
#include <linux/pci.h>
|
|
#include <linux/errno.h>
|
|
#include <linux/ioport.h>
|
|
#include <linux/cache.h>
|
|
#include <linux/slab.h>
|
|
#include "pci.h"
|
|
|
|
static void pci_std_update_resource(struct pci_dev *dev, int resno)
|
|
{
|
|
struct pci_bus_region region;
|
|
bool disable;
|
|
u16 cmd;
|
|
u32 new, check, mask;
|
|
int reg;
|
|
struct resource *res = pci_resource_n(dev, resno);
|
|
const char *res_name = pci_resource_name(dev, resno);
|
|
|
|
/* Per SR-IOV spec 3.4.1.11, VF BARs are RO zero */
|
|
if (dev->is_virtfn)
|
|
return;
|
|
|
|
/*
|
|
* Ignore resources for unimplemented BARs and unused resource slots
|
|
* for 64 bit BARs.
|
|
*/
|
|
if (!res->flags)
|
|
return;
|
|
|
|
if (res->flags & IORESOURCE_UNSET)
|
|
return;
|
|
|
|
/*
|
|
* Ignore non-moveable resources. This might be legacy resources for
|
|
* which no functional BAR register exists or another important
|
|
* system resource we shouldn't move around.
|
|
*/
|
|
if (res->flags & IORESOURCE_PCI_FIXED)
|
|
return;
|
|
|
|
pcibios_resource_to_bus(dev->bus, ®ion, res);
|
|
new = region.start;
|
|
|
|
if (res->flags & IORESOURCE_IO) {
|
|
mask = (u32)PCI_BASE_ADDRESS_IO_MASK;
|
|
new |= res->flags & ~PCI_BASE_ADDRESS_IO_MASK;
|
|
} else if (resno == PCI_ROM_RESOURCE) {
|
|
mask = PCI_ROM_ADDRESS_MASK;
|
|
} else {
|
|
mask = (u32)PCI_BASE_ADDRESS_MEM_MASK;
|
|
new |= res->flags & ~PCI_BASE_ADDRESS_MEM_MASK;
|
|
}
|
|
|
|
if (resno < PCI_ROM_RESOURCE) {
|
|
reg = PCI_BASE_ADDRESS_0 + 4 * resno;
|
|
} else if (resno == PCI_ROM_RESOURCE) {
|
|
|
|
/*
|
|
* Apparently some Matrox devices have ROM BARs that read
|
|
* as zero when disabled, so don't update ROM BARs unless
|
|
* they're enabled. See
|
|
* https://lore.kernel.org/r/43147B3D.1030309@vc.cvut.cz/
|
|
* But we must update ROM BAR for buggy devices where even a
|
|
* disabled ROM can conflict with other BARs.
|
|
*/
|
|
if (!(res->flags & IORESOURCE_ROM_ENABLE) &&
|
|
!dev->rom_bar_overlap)
|
|
return;
|
|
|
|
reg = dev->rom_base_reg;
|
|
if (res->flags & IORESOURCE_ROM_ENABLE)
|
|
new |= PCI_ROM_ADDRESS_ENABLE;
|
|
} else
|
|
return;
|
|
|
|
/*
|
|
* We can't update a 64-bit BAR atomically, so when possible,
|
|
* disable decoding so that a half-updated BAR won't conflict
|
|
* with another device.
|
|
*/
|
|
disable = (res->flags & IORESOURCE_MEM_64) && !dev->mmio_always_on;
|
|
if (disable) {
|
|
pci_read_config_word(dev, PCI_COMMAND, &cmd);
|
|
pci_write_config_word(dev, PCI_COMMAND,
|
|
cmd & ~PCI_COMMAND_MEMORY);
|
|
}
|
|
|
|
pci_write_config_dword(dev, reg, new);
|
|
dev->saved_config_space[reg / 4] = new;
|
|
pci_read_config_dword(dev, reg, &check);
|
|
|
|
if ((new ^ check) & mask) {
|
|
pci_err(dev, "%s: error updating (%#010x != %#010x)\n",
|
|
res_name, new, check);
|
|
}
|
|
|
|
if (res->flags & IORESOURCE_MEM_64) {
|
|
new = region.start >> 16 >> 16;
|
|
pci_write_config_dword(dev, reg + 4, new);
|
|
dev->saved_config_space[(reg + 4) / 4] = new;
|
|
pci_read_config_dword(dev, reg + 4, &check);
|
|
if (check != new) {
|
|
pci_err(dev, "%s: error updating (high %#010x != %#010x)\n",
|
|
res_name, new, check);
|
|
}
|
|
}
|
|
|
|
if (disable)
|
|
pci_write_config_word(dev, PCI_COMMAND, cmd);
|
|
}
|
|
|
|
void pci_update_resource(struct pci_dev *dev, int resno)
|
|
{
|
|
if (resno <= PCI_ROM_RESOURCE)
|
|
pci_std_update_resource(dev, resno);
|
|
else if (pci_resource_is_iov(resno))
|
|
pci_iov_update_resource(dev, resno);
|
|
}
|
|
|
|
int pci_claim_resource(struct pci_dev *dev, int resource)
|
|
{
|
|
struct resource *res = &dev->resource[resource];
|
|
const char *res_name = pci_resource_name(dev, resource);
|
|
struct resource *root, *conflict;
|
|
|
|
if (res->flags & IORESOURCE_UNSET) {
|
|
pci_info(dev, "%s %pR: can't claim; no address assigned\n",
|
|
res_name, res);
|
|
return -EINVAL;
|
|
}
|
|
|
|
/*
|
|
* If we have a shadow copy in RAM, the PCI device doesn't respond
|
|
* to the shadow range, so we don't need to claim it, and upstream
|
|
* bridges don't need to route the range to the device.
|
|
*/
|
|
if (res->flags & IORESOURCE_ROM_SHADOW)
|
|
return 0;
|
|
|
|
root = pci_find_parent_resource(dev, res);
|
|
if (!root) {
|
|
pci_info(dev, "%s %pR: can't claim; no compatible bridge window\n",
|
|
res_name, res);
|
|
res->flags |= IORESOURCE_UNSET;
|
|
return -EINVAL;
|
|
}
|
|
|
|
conflict = request_resource_conflict(root, res);
|
|
if (conflict) {
|
|
pci_info(dev, "%s %pR: can't claim; address conflict with %s %pR\n",
|
|
res_name, res, conflict->name, conflict);
|
|
res->flags |= IORESOURCE_UNSET;
|
|
return -EBUSY;
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
EXPORT_SYMBOL(pci_claim_resource);
|
|
|
|
void pci_disable_bridge_window(struct pci_dev *dev)
|
|
{
|
|
/* MMIO Base/Limit */
|
|
pci_write_config_dword(dev, PCI_MEMORY_BASE, 0x0000fff0);
|
|
|
|
/* Prefetchable MMIO Base/Limit */
|
|
pci_write_config_dword(dev, PCI_PREF_LIMIT_UPPER32, 0);
|
|
pci_write_config_dword(dev, PCI_PREF_MEMORY_BASE, 0x0000fff0);
|
|
pci_write_config_dword(dev, PCI_PREF_BASE_UPPER32, 0xffffffff);
|
|
}
|
|
|
|
/*
|
|
* Generic function that returns a value indicating that the device's
|
|
* original BIOS BAR address was not saved and so is not available for
|
|
* reinstatement.
|
|
*
|
|
* Can be over-ridden by architecture specific code that implements
|
|
* reinstatement functionality rather than leaving it disabled when
|
|
* normal allocation attempts fail.
|
|
*/
|
|
resource_size_t __weak pcibios_retrieve_fw_addr(struct pci_dev *dev, int idx)
|
|
{
|
|
return 0;
|
|
}
|
|
|
|
static int pci_revert_fw_address(struct resource *res, struct pci_dev *dev,
|
|
int resno, resource_size_t size)
|
|
{
|
|
struct resource *root, *conflict;
|
|
resource_size_t fw_addr, start, end;
|
|
const char *res_name = pci_resource_name(dev, resno);
|
|
|
|
fw_addr = pcibios_retrieve_fw_addr(dev, resno);
|
|
if (!fw_addr)
|
|
return -ENOMEM;
|
|
|
|
start = res->start;
|
|
end = res->end;
|
|
resource_set_range(res, fw_addr, size);
|
|
res->flags &= ~IORESOURCE_UNSET;
|
|
|
|
root = pci_find_parent_resource(dev, res);
|
|
if (!root) {
|
|
/*
|
|
* If dev is behind a bridge, accesses will only reach it
|
|
* if res is inside the relevant bridge window.
|
|
*/
|
|
if (pci_upstream_bridge(dev))
|
|
return -ENXIO;
|
|
|
|
/*
|
|
* On the root bus, assume the host bridge will forward
|
|
* everything.
|
|
*/
|
|
if (res->flags & IORESOURCE_IO)
|
|
root = &ioport_resource;
|
|
else
|
|
root = &iomem_resource;
|
|
}
|
|
|
|
pci_info(dev, "%s: trying firmware assignment %pR\n", res_name, res);
|
|
conflict = request_resource_conflict(root, res);
|
|
if (conflict) {
|
|
pci_info(dev, "%s %pR: conflicts with %s %pR\n", res_name, res,
|
|
conflict->name, conflict);
|
|
res->start = start;
|
|
res->end = end;
|
|
res->flags |= IORESOURCE_UNSET;
|
|
return -EBUSY;
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
/*
|
|
* For mem bridge windows, try to relocate tail remainder space to space
|
|
* before res->start if there's enough free space there. This enables
|
|
* tighter packing for resources.
|
|
*/
|
|
resource_size_t pci_align_resource(struct pci_dev *dev,
|
|
const struct resource *res,
|
|
const struct resource *empty_res,
|
|
resource_size_t size,
|
|
resource_size_t align)
|
|
{
|
|
resource_size_t remainder, start_addr;
|
|
|
|
if (!(res->flags & IORESOURCE_MEM))
|
|
return res->start;
|
|
|
|
if (IS_ALIGNED(size, align))
|
|
return res->start;
|
|
|
|
remainder = size - ALIGN_DOWN(size, align);
|
|
/* Don't mess with size that doesn't align with window size granularity */
|
|
if (!IS_ALIGNED(remainder, pci_min_window_alignment(dev->bus, res->flags)))
|
|
return res->start;
|
|
/* Try to place remainder that doesn't fill align before */
|
|
if (res->start < remainder)
|
|
return res->start;
|
|
start_addr = res->start - remainder;
|
|
if (empty_res->start > start_addr)
|
|
return res->start;
|
|
|
|
pci_dbg(dev, "%pR: moving candidate start address below align to %llx\n",
|
|
res, (unsigned long long)start_addr);
|
|
return start_addr;
|
|
}
|
|
|
|
/*
|
|
* We don't have to worry about legacy ISA devices, so nothing to do here.
|
|
* This is marked as __weak because multiple architectures define it; it should
|
|
* eventually go away.
|
|
*/
|
|
resource_size_t __weak pcibios_align_resource(void *data,
|
|
const struct resource *res,
|
|
const struct resource *empty_res,
|
|
resource_size_t size,
|
|
resource_size_t align)
|
|
{
|
|
struct pci_dev *dev = data;
|
|
|
|
return pci_align_resource(dev, res, empty_res, size, align);
|
|
}
|
|
|
|
static int __pci_assign_resource(struct pci_bus *bus, struct pci_dev *dev,
|
|
int resno, resource_size_t size, resource_size_t align)
|
|
{
|
|
struct resource *res = pci_resource_n(dev, resno);
|
|
resource_size_t min;
|
|
int ret;
|
|
|
|
min = (res->flags & IORESOURCE_IO) ? PCIBIOS_MIN_IO : PCIBIOS_MIN_MEM;
|
|
|
|
/*
|
|
* First, try exact prefetching match. Even if a 64-bit
|
|
* prefetchable bridge window is below 4GB, we can't put a 32-bit
|
|
* prefetchable resource in it because pbus_size_mem() assumes a
|
|
* 64-bit window will contain no 32-bit resources. If we assign
|
|
* things differently than they were sized, not everything will fit.
|
|
*/
|
|
ret = pci_bus_alloc_resource(bus, res, size, align, min,
|
|
IORESOURCE_PREFETCH | IORESOURCE_MEM_64,
|
|
pcibios_align_resource, dev);
|
|
if (ret == 0)
|
|
return 0;
|
|
|
|
/*
|
|
* If the prefetchable window is only 32 bits wide, we can put
|
|
* 64-bit prefetchable resources in it.
|
|
*/
|
|
if ((res->flags & (IORESOURCE_PREFETCH | IORESOURCE_MEM_64)) ==
|
|
(IORESOURCE_PREFETCH | IORESOURCE_MEM_64)) {
|
|
ret = pci_bus_alloc_resource(bus, res, size, align, min,
|
|
IORESOURCE_PREFETCH,
|
|
pcibios_align_resource, dev);
|
|
if (ret == 0)
|
|
return 0;
|
|
}
|
|
|
|
/*
|
|
* If we didn't find a better match, we can put any memory resource
|
|
* in a non-prefetchable window. If this resource is 32 bits and
|
|
* non-prefetchable, the first call already tried the only possibility
|
|
* so we don't need to try again.
|
|
*/
|
|
if (res->flags & (IORESOURCE_PREFETCH | IORESOURCE_MEM_64))
|
|
ret = pci_bus_alloc_resource(bus, res, size, align, min, 0,
|
|
pcibios_align_resource, dev);
|
|
|
|
return ret;
|
|
}
|
|
|
|
static int _pci_assign_resource(struct pci_dev *dev, int resno,
|
|
resource_size_t size, resource_size_t min_align)
|
|
{
|
|
struct pci_bus *bus;
|
|
int ret;
|
|
|
|
bus = dev->bus;
|
|
while ((ret = __pci_assign_resource(bus, dev, resno, size, min_align))) {
|
|
if (!bus->parent || !bus->self->transparent)
|
|
break;
|
|
bus = bus->parent;
|
|
}
|
|
|
|
return ret;
|
|
}
|
|
|
|
int pci_assign_resource(struct pci_dev *dev, int resno)
|
|
{
|
|
struct resource *res = pci_resource_n(dev, resno);
|
|
const char *res_name = pci_resource_name(dev, resno);
|
|
resource_size_t align, size;
|
|
int ret;
|
|
|
|
if (res->flags & IORESOURCE_PCI_FIXED)
|
|
return 0;
|
|
|
|
res->flags |= IORESOURCE_UNSET;
|
|
align = pci_resource_alignment(dev, res);
|
|
if (!align) {
|
|
pci_info(dev, "%s %pR: can't assign; bogus alignment\n",
|
|
res_name, res);
|
|
return -EINVAL;
|
|
}
|
|
|
|
size = resource_size(res);
|
|
ret = _pci_assign_resource(dev, resno, size, align);
|
|
|
|
/*
|
|
* If we failed to assign anything, let's try the address
|
|
* where firmware left it. That at least has a chance of
|
|
* working, which is better than just leaving it disabled.
|
|
*/
|
|
if (ret < 0) {
|
|
pci_info(dev, "%s %pR: can't assign; no space\n", res_name, res);
|
|
ret = pci_revert_fw_address(res, dev, resno, size);
|
|
}
|
|
|
|
if (ret < 0) {
|
|
pci_info(dev, "%s %pR: failed to assign\n", res_name, res);
|
|
return ret;
|
|
}
|
|
|
|
res->flags &= ~IORESOURCE_UNSET;
|
|
res->flags &= ~IORESOURCE_STARTALIGN;
|
|
if (pci_resource_is_bridge_win(resno))
|
|
res->flags &= ~IORESOURCE_DISABLED;
|
|
|
|
pci_info(dev, "%s %pR: assigned\n", res_name, res);
|
|
if (resno < PCI_BRIDGE_RESOURCES)
|
|
pci_update_resource(dev, resno);
|
|
|
|
return 0;
|
|
}
|
|
EXPORT_SYMBOL(pci_assign_resource);
|
|
|
|
int pci_reassign_resource(struct pci_dev *dev, int resno,
|
|
resource_size_t addsize, resource_size_t min_align)
|
|
{
|
|
struct resource *res = pci_resource_n(dev, resno);
|
|
const char *res_name = pci_resource_name(dev, resno);
|
|
unsigned long flags;
|
|
resource_size_t new_size;
|
|
int ret;
|
|
|
|
if (res->flags & IORESOURCE_PCI_FIXED)
|
|
return 0;
|
|
|
|
flags = res->flags;
|
|
res->flags |= IORESOURCE_UNSET;
|
|
if (!res->parent) {
|
|
pci_info(dev, "%s %pR: can't reassign; unassigned resource\n",
|
|
res_name, res);
|
|
return -EINVAL;
|
|
}
|
|
|
|
new_size = resource_size(res) + addsize;
|
|
ret = _pci_assign_resource(dev, resno, new_size, min_align);
|
|
if (ret) {
|
|
res->flags = flags;
|
|
pci_info(dev, "%s %pR: failed to expand by %#llx\n",
|
|
res_name, res, (unsigned long long) addsize);
|
|
return ret;
|
|
}
|
|
|
|
res->flags &= ~IORESOURCE_UNSET;
|
|
res->flags &= ~IORESOURCE_STARTALIGN;
|
|
pci_info(dev, "%s %pR: reassigned; expanded by %#llx\n",
|
|
res_name, res, (unsigned long long) addsize);
|
|
if (resno < PCI_BRIDGE_RESOURCES)
|
|
pci_update_resource(dev, resno);
|
|
|
|
return 0;
|
|
}
|
|
|
|
int pci_release_resource(struct pci_dev *dev, int resno)
|
|
{
|
|
struct resource *res = pci_resource_n(dev, resno);
|
|
const char *res_name = pci_resource_name(dev, resno);
|
|
int ret;
|
|
|
|
if (!res->parent)
|
|
return 0;
|
|
|
|
pci_info(dev, "%s %pR: releasing\n", res_name, res);
|
|
|
|
ret = release_resource(res);
|
|
if (ret)
|
|
return ret;
|
|
res->end = resource_size(res) - 1;
|
|
res->start = 0;
|
|
res->flags |= IORESOURCE_UNSET;
|
|
|
|
return 0;
|
|
}
|
|
EXPORT_SYMBOL(pci_release_resource);
|
|
|
|
int pci_enable_resources(struct pci_dev *dev, int mask)
|
|
{
|
|
u16 cmd, old_cmd;
|
|
int i;
|
|
struct resource *r;
|
|
const char *r_name;
|
|
|
|
pci_read_config_word(dev, PCI_COMMAND, &cmd);
|
|
old_cmd = cmd;
|
|
|
|
pci_dev_for_each_resource(dev, r, i) {
|
|
if (!(mask & (1 << i)))
|
|
continue;
|
|
|
|
r_name = pci_resource_name(dev, i);
|
|
|
|
if (!(r->flags & (IORESOURCE_IO | IORESOURCE_MEM)))
|
|
continue;
|
|
if (pci_resource_is_optional(dev, i))
|
|
continue;
|
|
|
|
if (i < PCI_BRIDGE_RESOURCES) {
|
|
if (r->flags & IORESOURCE_UNSET) {
|
|
pci_err(dev, "%s %pR: not assigned; can't enable device\n",
|
|
r_name, r);
|
|
return -EINVAL;
|
|
}
|
|
|
|
if (!r->parent) {
|
|
pci_err(dev, "%s %pR: not claimed; can't enable device\n",
|
|
r_name, r);
|
|
return -EINVAL;
|
|
}
|
|
}
|
|
|
|
if (r->parent) {
|
|
if (r->flags & IORESOURCE_IO)
|
|
cmd |= PCI_COMMAND_IO;
|
|
if (r->flags & IORESOURCE_MEM)
|
|
cmd |= PCI_COMMAND_MEMORY;
|
|
}
|
|
}
|
|
|
|
if (cmd != old_cmd) {
|
|
pci_info(dev, "enabling device (%04x -> %04x)\n", old_cmd, cmd);
|
|
pci_write_config_word(dev, PCI_COMMAND, cmd);
|
|
}
|
|
return 0;
|
|
}
|