VFIO fixes for v7.1-rc4

- Convert vfio-pci BAR resource requests and iomaps initialization
    from a lazy, on-demand model to an eager pre-allocation model to
    avoid races while preserving legacy error behavior.  Fix unchecked
    barmap access in dma-buf export path. (Matt Evans)
 
  - Introduce an implicit unsigned cast in converting vfio-pci device
    offsets to region indexes, closing a potential out-of-bounds
    access through the vfio_pci_ioeventfd() interface. (Matt Evans)
 
  - Fix a dma-buf kref underflow and stuck wait_for_completion() when
    closing a previously revoked dma-buf. (Alex Williamson)
 -----BEGIN PGP SIGNATURE-----
 
 iQJFBAABCgAvFiEEQvbATlQL0amee4qQI5ubbjuwiyIFAmoHU9QRHGFsZXhAc2hh
 emJvdC5vcmcACgkQI5ubbjuwiyIp6xAAqu4qVp1IsMj5jgG3S3aFFpTSAwpHPvu6
 A/U99/pmniU/F714kujKyWS+w75k2L9De3T+u0numQ5qrrF8MDcyzVHGxuWSolHL
 h2Y2D74zCS2J9q6VGW9VU7isplitjEYLgC/eXwmfaZ5YBH/No2KurCbYXowSjuMU
 BQTgjvoT9tsj5AOcVhdladImuwf7SZ+8CvxHE3seybDmb8EDjWTgNgU7F1PNblEG
 Bx2sSa7zLgRzmsh8R78lqKeknrRSLi3IIW4RqWXNRA5+z9Cs3Ibd9a2zCu8yFuj6
 7FInwh3EBKrHGCpZiom5VhYRUojVk+rOMQOTIXepucn6igZmSJWirUJneF7LmbNm
 bUxzZZn9DHzFupl9CuzXLW+C2A0ZQVCXJ6rPxAbBPeguWIZvRKUS48l4RFaknPzY
 wEOEeBZQgzcxwNj4P7ji0rJOT8taa2C4Hh8Fada1D5zS7ITavMNkAMm0M5DsH2So
 85rTYMN2pPPUos16oWkhvPgRJRMwBRR2yB+XH/joFN3Y9QbGG0nhkARe9mbD+FE8
 KyBw+pNpwuQCDqqXboxiQrEP9jEMCf/16KrUNpHgqyNvcfmX316jtONWYGJunDu9
 yYgMQlpuxssy8Z3542CA+QWGt+hwmzr5MlVlTzaG8C1uWfpkoOqaJBVuFyza1OxV
 2C/XRHcEB8E=
 =2Sn8
 -----END PGP SIGNATURE-----

Merge tag 'vfio-v7.1-rc4' of https://github.com/awilliam/linux-vfio

Pull VFIO fixes from Alex Williamson:

 - Convert vfio-pci BAR resource requests and iomaps initialization
   from a lazy, on-demand model to an eager pre-allocation model to
   avoid races while preserving legacy error behavior.  Fix unchecked
   barmap access in dma-buf export path (Matt Evans)

 - Introduce an implicit unsigned cast in converting vfio-pci device
   offsets to region indexes, closing a potential out-of-bounds
   access through the vfio_pci_ioeventfd() interface (Matt Evans)

 - Fix a dma-buf kref underflow and stuck wait_for_completion() when
   closing a previously revoked dma-buf (Alex Williamson)

* tag 'vfio-v7.1-rc4' of https://github.com/awilliam/linux-vfio:
  vfio/pci: Check BAR resources before exporting a DMABUF
  vfio/pci: Set up BAR resources and maps in vfio_pci_core_enable()
  vfio/pci: Make VFIO_PCI_OFFSET_TO_INDEX() return unsigned
  vfio/pci: fix dma-buf kref underflow after revoke
This commit is contained in:
Linus Torvalds 2026-05-15 15:13:02 -07:00
commit 3bf83e47b4
4 changed files with 66 additions and 41 deletions

View File

@ -482,6 +482,40 @@ static int vfio_pci_core_runtime_resume(struct device *dev)
}
#endif /* CONFIG_PM */
/*
* Eager-request BAR resources, and iomap them. Soft failures are
* allowed, and consumers must check the barmap before use in order to
* give compatible user-visible behaviour with the previous on-demand
* allocation method.
*/
static void vfio_pci_core_map_bars(struct vfio_pci_core_device *vdev)
{
struct pci_dev *pdev = vdev->pdev;
int i;
for (i = 0; i < PCI_STD_NUM_BARS; i++) {
int bar = i + PCI_STD_RESOURCES;
vdev->barmap[bar] = IOMEM_ERR_PTR(-ENODEV);
if (!pci_resource_len(pdev, i))
continue;
if (pci_request_selected_regions(pdev, 1 << bar, "vfio")) {
pci_dbg(pdev, "Failed to reserve region %d\n", bar);
vdev->barmap[bar] = IOMEM_ERR_PTR(-EBUSY);
continue;
}
vdev->barmap[bar] = pci_iomap(pdev, bar, 0);
if (!vdev->barmap[bar]) {
pci_dbg(pdev, "Failed to iomap region %d\n", bar);
pci_release_selected_regions(pdev, 1 << bar);
vdev->barmap[bar] = IOMEM_ERR_PTR(-ENOMEM);
}
}
}
/*
* The pci-driver core runtime PM routines always save the device state
* before going into suspended state. If the device is going into low power
@ -568,6 +602,7 @@ int vfio_pci_core_enable(struct vfio_pci_core_device *vdev)
if (!vfio_vga_disabled() && vfio_pci_is_vga(pdev))
vdev->has_vga = true;
vfio_pci_core_map_bars(vdev);
return 0;
@ -648,7 +683,7 @@ void vfio_pci_core_disable(struct vfio_pci_core_device *vdev)
for (i = 0; i < PCI_STD_NUM_BARS; i++) {
bar = i + PCI_STD_RESOURCES;
if (!vdev->barmap[bar])
if (IS_ERR_OR_NULL(vdev->barmap[bar]))
continue;
pci_iounmap(pdev, vdev->barmap[bar]);
pci_release_selected_regions(pdev, 1 << bar);

View File

@ -244,9 +244,11 @@ int vfio_pci_core_feature_dma_buf(struct vfio_pci_core_device *vdev, u32 flags,
return -EINVAL;
/*
* For PCI the region_index is the BAR number like everything else.
* For PCI the region_index is the BAR number like everything
* else. Check that PCI resources have been claimed for it.
*/
if (get_dma_buf.region_index >= VFIO_PCI_ROM_REGION_INDEX)
if (get_dma_buf.region_index >= VFIO_PCI_ROM_REGION_INDEX ||
vfio_pci_core_setup_barmap(vdev, get_dma_buf.region_index))
return -ENODEV;
dma_ranges = memdup_array_user(&arg->dma_ranges, get_dma_buf.nr_ranges,
@ -354,19 +356,18 @@ void vfio_pci_dma_buf_move(struct vfio_pci_core_device *vdev, bool revoked)
if (revoked) {
kref_put(&priv->kref, vfio_pci_dma_buf_done);
wait_for_completion(&priv->comp);
} else {
/*
* Kref is initialize again, because when revoke
* was performed the reference counter was decreased
* to zero to trigger completion.
* Re-arm the registered kref reference and the
* completion so the post-revoke state matches the
* post-creation state. An un-revoke followed by a
* new mapping needs the kref to be non-zero before
* kref_get(), and vfio_pci_dma_buf_cleanup()
* delegates its drain back through this revoke
* path on a possibly-already-revoked dma-buf.
*/
kref_init(&priv->kref);
/*
* There is no need to wait as no mapping was
* performed when the previous status was
* priv->revoked == true.
*/
reinit_completion(&priv->comp);
} else {
dma_resv_lock(priv->dmabuf->resv, NULL);
priv->revoked = false;
dma_resv_unlock(priv->dmabuf->resv);
@ -382,21 +383,22 @@ void vfio_pci_dma_buf_cleanup(struct vfio_pci_core_device *vdev)
struct vfio_pci_dma_buf *tmp;
down_write(&vdev->memory_lock);
/*
* Drain any active mappings via the revoke path. The move is
* idempotent for dma-bufs already in the revoked state and
* leaves every priv with the kref re-armed and the completion
* ready, so cleanup itself does not need to participate in kref
* bookkeeping.
*/
vfio_pci_dma_buf_move(vdev, true);
list_for_each_entry_safe(priv, tmp, &vdev->dmabufs, dmabufs_elm) {
if (!get_file_active(&priv->dmabuf->file))
continue;
dma_resv_lock(priv->dmabuf->resv, NULL);
list_del_init(&priv->dmabufs_elm);
priv->vdev = NULL;
priv->revoked = true;
dma_buf_invalidate_mappings(priv->dmabuf);
dma_resv_wait_timeout(priv->dmabuf->resv,
DMA_RESV_USAGE_BOOKKEEP, false,
MAX_SCHEDULE_TIMEOUT);
dma_resv_unlock(priv->dmabuf->resv);
kref_put(&priv->kref, vfio_pci_dma_buf_done);
wait_for_completion(&priv->comp);
vfio_device_put_registration(&vdev->vdev);
fput(priv->dmabuf->file);
}

View File

@ -198,27 +198,15 @@ ssize_t vfio_pci_core_do_io_rw(struct vfio_pci_core_device *vdev, bool test_mem,
}
EXPORT_SYMBOL_GPL(vfio_pci_core_do_io_rw);
/*
* The barmap is set up in vfio_pci_core_enable(). Callers use this
* function to check that the BAR resources are requested or that the
* pci_iomap() was done.
*/
int vfio_pci_core_setup_barmap(struct vfio_pci_core_device *vdev, int bar)
{
struct pci_dev *pdev = vdev->pdev;
int ret;
void __iomem *io;
if (vdev->barmap[bar])
return 0;
ret = pci_request_selected_regions(pdev, 1 << bar, "vfio");
if (ret)
return ret;
io = pci_iomap(pdev, bar, 0);
if (!io) {
pci_release_selected_regions(pdev, 1 << bar);
return -ENOMEM;
}
vdev->barmap[bar] = io;
if (IS_ERR(vdev->barmap[bar]))
return PTR_ERR(vdev->barmap[bar]);
return 0;
}
EXPORT_SYMBOL_GPL(vfio_pci_core_setup_barmap);

View File

@ -21,7 +21,7 @@
#define VFIO_PCI_CORE_H
#define VFIO_PCI_OFFSET_SHIFT 40
#define VFIO_PCI_OFFSET_TO_INDEX(off) (off >> VFIO_PCI_OFFSET_SHIFT)
#define VFIO_PCI_OFFSET_TO_INDEX(off) ((u64)(off) >> VFIO_PCI_OFFSET_SHIFT)
#define VFIO_PCI_INDEX_TO_OFFSET(index) ((u64)(index) << VFIO_PCI_OFFSET_SHIFT)
#define VFIO_PCI_OFFSET_MASK (((u64)(1) << VFIO_PCI_OFFSET_SHIFT) - 1)