VFIO updates for v6.15-rc1

- Relax IGD support code to match display class device rather than
    specifically requiring a VGA device. (Tomita Moeko)
 
  - Accelerate DMA mapping of device MMIO by iterating at PMD and PUD
    levels to take advantage of huge pfnmap support added in v6.12.
    (Alex Williamson)
 
  - Extend virtio vfio-pci variant driver to include migration support
    for block devices where enabled by the PF. (Yishai Hadas)
 
  - Virtualize INTx PIN register for devices where the platform does
    not route legacy PCI interrupts for the device and the interrupt
    is reported as IRQ_NOTCONNECTED. (Alex Williamson)
 -----BEGIN PGP SIGNATURE-----
 
 iQJPBAABCAA5FiEEQvbATlQL0amee4qQI5ubbjuwiyIFAmfq5nEbHGFsZXgud2ls
 bGlhbXNvbkByZWRoYXQuY29tAAoJECObm247sIsi3KAP/2MQcQaKTZ6/+dG6YdKT
 ZFaY4+xJ14DnUN/z96UlIWLk8bWgSyDFxdoFMbtFGENKRslEWxZ7In9Caow7f6ux
 7/usBjSvJa5Yx9YWRGsblrx7IyYfSW6R1V+jH3xPd+K8Ir4K7SUvb1CJLVPdfEYh
 OWer8eRpZ5tw3R2X4o+QxZ+H4Fx1zVQourW35h4daqrjnn7kOQMJIzGYOwHSDlCy
 lW0X0yD3sGgw9w7qAmEDmw9UbKGf245AVylIl5T1a7c3RaO+eKdKPZfNa18g0J/Q
 5pRMK+2PvZ+S0OTYxotcF9GtEJ3iBxY8W4QnlLiyTs9XyZ7tLMzGvLEKmCDKA0U8
 yAtoJ5T00PVXjMxkZx1+oMGja9Hx+b7gABTYpbf5wRtab6EdNWln++I1HCLKgZZ+
 yStvQNsMYGbJsLfwiGouMwD24JT+xg3A+Dv2Cx+Ai4NVJebxTD8Lhc0lz2I6IpOh
 wFBpBzBIPpcG53oQ1Syb9GLESQ0Acb4LUMjsSxIg7QFSrWgAAlq/PiLXv852S3xJ
 pUEh7r/YByQytUsQajgE7ekKqyXw0gn99Z+UTk0LUIq/y7SxrIPeqzq2qRf490RV
 wnkOrMxrAWj84lkIv8hLCiLsXMmvV4rsMJgV+s8KQZ+hPv38mPbkFYGdHj4h5RPA
 5J5h32dDkHLK+X5u//gBY8xh
 =fJPO
 -----END PGP SIGNATURE-----

Merge tag 'vfio-v6.15-rc1' of https://github.com/awilliam/linux-vfio

Pull VFIO updates from Alex Williamson:

 - Relax IGD support code to match display class device rather than
   specifically requiring a VGA device (Tomita Moeko)

 - Accelerate DMA mapping of device MMIO by iterating at PMD and PUD
   levels to take advantage of huge pfnmap support added in v6.12
   (Alex Williamson)

 - Extend virtio vfio-pci variant driver to include migration support
   for block devices where enabled by the PF (Yishai Hadas)

 - Virtualize INTx PIN register for devices where the platform does not
   route legacy PCI interrupts for the device and the interrupt is
   reported as IRQ_NOTCONNECTED (Alex Williamson)

* tag 'vfio-v6.15-rc1' of https://github.com/awilliam/linux-vfio:
  vfio/pci: Handle INTx IRQ_NOTCONNECTED
  vfio/virtio: Enable support for virtio-block live migration
  vfio/type1: Use mapping page mask for pfnmaps
  mm: Provide address mask in struct follow_pfnmap_args
  vfio/type1: Use consistent types for page counts
  vfio/type1: Use vfio_batch for vaddr_get_pfns()
  vfio/type1: Convert all vaddr_get_pfns() callers to use vfio_batch
  vfio/type1: Catch zero from pin_user_pages_remote()
  vfio/pci: match IGD devices in display controller class
This commit is contained in:
Linus Torvalds 2025-04-01 19:35:19 -07:00
commit 3491aa0478
12 changed files with 106 additions and 66 deletions

View File

@ -111,9 +111,7 @@ static int vfio_pci_open_device(struct vfio_device *core_vdev)
if (ret)
return ret;
if (vfio_pci_is_vga(pdev) &&
pdev->vendor == PCI_VENDOR_ID_INTEL &&
IS_ENABLED(CONFIG_VFIO_PCI_IGD)) {
if (vfio_pci_is_intel_display(pdev)) {
ret = vfio_pci_igd_init(vdev);
if (ret && ret != -ENODEV) {
pci_warn(pdev, "Failed to setup Intel IGD regions\n");

View File

@ -1814,7 +1814,8 @@ int vfio_config_init(struct vfio_pci_core_device *vdev)
cpu_to_le16(PCI_COMMAND_MEMORY);
}
if (!IS_ENABLED(CONFIG_VFIO_PCI_INTX) || vdev->nointx)
if (!IS_ENABLED(CONFIG_VFIO_PCI_INTX) || vdev->nointx ||
vdev->pdev->irq == IRQ_NOTCONNECTED)
vconfig[PCI_INTERRUPT_PIN] = 0;
ret = vfio_cap_init(vdev);

View File

@ -727,15 +727,7 @@ EXPORT_SYMBOL_GPL(vfio_pci_core_finish_enable);
static int vfio_pci_get_irq_count(struct vfio_pci_core_device *vdev, int irq_type)
{
if (irq_type == VFIO_PCI_INTX_IRQ_INDEX) {
u8 pin;
if (!IS_ENABLED(CONFIG_VFIO_PCI_INTX) ||
vdev->nointx || vdev->pdev->is_virtfn)
return 0;
pci_read_config_byte(vdev->pdev, PCI_INTERRUPT_PIN, &pin);
return pin ? 1 : 0;
return vdev->vconfig[PCI_INTERRUPT_PIN] ? 1 : 0;
} else if (irq_type == VFIO_PCI_MSI_IRQ_INDEX) {
u8 pos;
u16 flags;

View File

@ -435,6 +435,12 @@ static int vfio_pci_igd_cfg_init(struct vfio_pci_core_device *vdev)
return 0;
}
bool vfio_pci_is_intel_display(struct pci_dev *pdev)
{
return (pdev->vendor == PCI_VENDOR_ID_INTEL) &&
((pdev->class >> 16) == PCI_BASE_CLASS_DISPLAY);
}
int vfio_pci_igd_init(struct vfio_pci_core_device *vdev)
{
int ret;

View File

@ -259,7 +259,7 @@ static int vfio_intx_enable(struct vfio_pci_core_device *vdev,
if (!is_irq_none(vdev))
return -EINVAL;
if (!pdev->irq)
if (!pdev->irq || pdev->irq == IRQ_NOTCONNECTED)
return -ENODEV;
name = kasprintf(GFP_KERNEL_ACCOUNT, "vfio-intx(%s)", pci_name(pdev));

View File

@ -67,8 +67,14 @@ void vfio_pci_memory_unlock_and_restore(struct vfio_pci_core_device *vdev,
u16 cmd);
#ifdef CONFIG_VFIO_PCI_IGD
bool vfio_pci_is_intel_display(struct pci_dev *pdev);
int vfio_pci_igd_init(struct vfio_pci_core_device *vdev);
#else
static inline bool vfio_pci_is_intel_display(struct pci_dev *pdev)
{
return false;
}
static inline int vfio_pci_igd_init(struct vfio_pci_core_device *vdev)
{
return -ENODEV;

View File

@ -1,11 +1,11 @@
# SPDX-License-Identifier: GPL-2.0-only
config VIRTIO_VFIO_PCI
tristate "VFIO support for VIRTIO NET PCI VF devices"
tristate "VFIO support for VIRTIO PCI VF devices"
depends on VIRTIO_PCI
select VFIO_PCI_CORE
help
This provides migration support for VIRTIO NET PCI VF devices
using the VFIO framework. Migration support requires the
This provides migration support for VIRTIO NET and BLOCK PCI VF
devices using the VFIO framework. Migration support requires the
SR-IOV PF device to support specific VIRTIO extensions,
otherwise this driver provides no additional functionality
beyond vfio-pci.

View File

@ -382,7 +382,9 @@ static bool virtiovf_bar0_exists(struct pci_dev *pdev)
bool virtiovf_support_legacy_io(struct pci_dev *pdev)
{
return virtio_pci_admin_has_legacy_io(pdev) && !virtiovf_bar0_exists(pdev);
/* For now, the legacy IO functionality is supported only for virtio-net */
return pdev->device == 0x1041 && virtio_pci_admin_has_legacy_io(pdev) &&
!virtiovf_bar0_exists(pdev);
}
int virtiovf_init_legacy_io(struct virtiovf_pci_core_device *virtvdev)

View File

@ -187,8 +187,9 @@ static void virtiovf_pci_remove(struct pci_dev *pdev)
}
static const struct pci_device_id virtiovf_pci_table[] = {
/* Only virtio-net is supported/tested so far */
/* Only virtio-net and virtio-block are supported/tested so far */
{ PCI_DRIVER_OVERRIDE_DEVICE_VFIO(PCI_VENDOR_ID_REDHAT_QUMRANET, 0x1041) },
{ PCI_DRIVER_OVERRIDE_DEVICE_VFIO(PCI_VENDOR_ID_REDHAT_QUMRANET, 0x1042) },
{}
};
@ -221,4 +222,4 @@ module_pci_driver(virtiovf_pci_driver);
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Yishai Hadas <yishaih@nvidia.com>");
MODULE_DESCRIPTION(
"VIRTIO VFIO PCI - User Level meta-driver for VIRTIO NET devices");
"VIRTIO VFIO PCI - User Level meta-driver for VIRTIO NET and BLOCK devices");

View File

@ -103,9 +103,9 @@ struct vfio_dma {
struct vfio_batch {
struct page **pages; /* for pin_user_pages_remote */
struct page *fallback_page; /* if pages alloc fails */
int capacity; /* length of pages array */
int size; /* of batch currently */
int offset; /* of next entry in pages */
unsigned int capacity; /* length of pages array */
unsigned int size; /* of batch currently */
unsigned int offset; /* of next entry in pages */
};
struct vfio_iommu_group {
@ -471,12 +471,12 @@ static int put_pfn(unsigned long pfn, int prot)
#define VFIO_BATCH_MAX_CAPACITY (PAGE_SIZE / sizeof(struct page *))
static void vfio_batch_init(struct vfio_batch *batch)
static void __vfio_batch_init(struct vfio_batch *batch, bool single)
{
batch->size = 0;
batch->offset = 0;
if (unlikely(disable_hugepages))
if (single || unlikely(disable_hugepages))
goto fallback;
batch->pages = (struct page **) __get_free_page(GFP_KERNEL);
@ -491,6 +491,16 @@ static void vfio_batch_init(struct vfio_batch *batch)
batch->capacity = 1;
}
static void vfio_batch_init(struct vfio_batch *batch)
{
__vfio_batch_init(batch, false);
}
static void vfio_batch_init_single(struct vfio_batch *batch)
{
__vfio_batch_init(batch, true);
}
static void vfio_batch_unpin(struct vfio_batch *batch, struct vfio_dma *dma)
{
while (batch->size) {
@ -510,7 +520,7 @@ static void vfio_batch_fini(struct vfio_batch *batch)
static int follow_fault_pfn(struct vm_area_struct *vma, struct mm_struct *mm,
unsigned long vaddr, unsigned long *pfn,
bool write_fault)
unsigned long *addr_mask, bool write_fault)
{
struct follow_pfnmap_args args = { .vma = vma, .address = vaddr };
int ret;
@ -534,10 +544,12 @@ static int follow_fault_pfn(struct vm_area_struct *vma, struct mm_struct *mm,
return ret;
}
if (write_fault && !args.writable)
if (write_fault && !args.writable) {
ret = -EFAULT;
else
} else {
*pfn = args.pfn;
*addr_mask = args.addr_mask;
}
follow_pfnmap_end(&args);
return ret;
@ -545,25 +557,33 @@ static int follow_fault_pfn(struct vm_area_struct *vma, struct mm_struct *mm,
/*
* Returns the positive number of pfns successfully obtained or a negative
* error code.
* error code. The initial pfn is stored in the pfn arg. For page-backed
* pfns, the provided batch is also updated to indicate the filled pages and
* initial offset. For VM_PFNMAP pfns, only the returned number of pfns and
* returned initial pfn are provided; subsequent pfns are contiguous.
*/
static int vaddr_get_pfns(struct mm_struct *mm, unsigned long vaddr,
long npages, int prot, unsigned long *pfn,
struct page **pages)
static long vaddr_get_pfns(struct mm_struct *mm, unsigned long vaddr,
unsigned long npages, int prot, unsigned long *pfn,
struct vfio_batch *batch)
{
unsigned long pin_pages = min_t(unsigned long, npages, batch->capacity);
struct vm_area_struct *vma;
unsigned int flags = 0;
int ret;
long ret;
if (prot & IOMMU_WRITE)
flags |= FOLL_WRITE;
mmap_read_lock(mm);
ret = pin_user_pages_remote(mm, vaddr, npages, flags | FOLL_LONGTERM,
pages, NULL);
ret = pin_user_pages_remote(mm, vaddr, pin_pages, flags | FOLL_LONGTERM,
batch->pages, NULL);
if (ret > 0) {
*pfn = page_to_pfn(pages[0]);
*pfn = page_to_pfn(batch->pages[0]);
batch->size = ret;
batch->offset = 0;
goto done;
} else if (!ret) {
ret = -EFAULT;
}
vaddr = untagged_addr_remote(mm, vaddr);
@ -572,15 +592,22 @@ static int vaddr_get_pfns(struct mm_struct *mm, unsigned long vaddr,
vma = vma_lookup(mm, vaddr);
if (vma && vma->vm_flags & VM_PFNMAP) {
ret = follow_fault_pfn(vma, mm, vaddr, pfn, prot & IOMMU_WRITE);
unsigned long addr_mask;
ret = follow_fault_pfn(vma, mm, vaddr, pfn, &addr_mask,
prot & IOMMU_WRITE);
if (ret == -EAGAIN)
goto retry;
if (!ret) {
if (is_invalid_reserved_pfn(*pfn))
ret = 1;
else
if (is_invalid_reserved_pfn(*pfn)) {
unsigned long epfn;
epfn = (*pfn | (~addr_mask >> PAGE_SHIFT)) + 1;
ret = min_t(long, npages, epfn - *pfn);
} else {
ret = -EFAULT;
}
}
}
done:
@ -594,7 +621,7 @@ static int vaddr_get_pfns(struct mm_struct *mm, unsigned long vaddr,
* first page and all consecutive pages with the same locking.
*/
static long vfio_pin_pages_remote(struct vfio_dma *dma, unsigned long vaddr,
long npage, unsigned long *pfn_base,
unsigned long npage, unsigned long *pfn_base,
unsigned long limit, struct vfio_batch *batch)
{
unsigned long pfn;
@ -616,32 +643,42 @@ static long vfio_pin_pages_remote(struct vfio_dma *dma, unsigned long vaddr,
*pfn_base = 0;
}
if (unlikely(disable_hugepages))
npage = 1;
while (npage) {
if (!batch->size) {
/* Empty batch, so refill it. */
long req_pages = min_t(long, npage, batch->capacity);
ret = vaddr_get_pfns(mm, vaddr, req_pages, dma->prot,
&pfn, batch->pages);
ret = vaddr_get_pfns(mm, vaddr, npage, dma->prot,
&pfn, batch);
if (ret < 0)
goto unpin_out;
batch->size = ret;
batch->offset = 0;
if (!*pfn_base) {
*pfn_base = pfn;
rsvd = is_invalid_reserved_pfn(*pfn_base);
}
/* Handle pfnmap */
if (!batch->size) {
if (pfn != *pfn_base + pinned || !rsvd)
goto out;
pinned += ret;
npage -= ret;
vaddr += (PAGE_SIZE * ret);
iova += (PAGE_SIZE * ret);
continue;
}
}
/*
* pfn is preset for the first iteration of this inner loop and
* updated at the end to handle a VM_PFNMAP pfn. In that case,
* batch->pages isn't valid (there's no struct page), so allow
* batch->pages to be touched only when there's more than one
* pfn to check, which guarantees the pfns are from a
* !VM_PFNMAP vma.
* pfn is preset for the first iteration of this inner loop
* due to the fact that vaddr_get_pfns() needs to provide the
* initial pfn for pfnmaps. Therefore to reduce redundancy,
* the next pfn is fetched at the end of the loop.
* A PageReserved() page could still qualify as page backed
* and rsvd here, and therefore continues to use the batch.
*/
while (true) {
if (pfn != *pfn_base + pinned ||
@ -676,21 +713,12 @@ static long vfio_pin_pages_remote(struct vfio_dma *dma, unsigned long vaddr,
pfn = page_to_pfn(batch->pages[batch->offset]);
}
if (unlikely(disable_hugepages))
break;
}
out:
ret = vfio_lock_acct(dma, lock_acct, false);
unpin_out:
if (batch->size == 1 && !batch->offset) {
/* May be a VM_PFNMAP pfn, which the batch can't remember. */
put_pfn(pfn, dma->prot);
batch->size = 0;
}
if (ret < 0) {
if (pinned && !rsvd) {
for (pfn = *pfn_base ; pinned ; pfn++, pinned--)
@ -705,7 +733,7 @@ static long vfio_pin_pages_remote(struct vfio_dma *dma, unsigned long vaddr,
}
static long vfio_unpin_pages_remote(struct vfio_dma *dma, dma_addr_t iova,
unsigned long pfn, long npage,
unsigned long pfn, unsigned long npage,
bool do_accounting)
{
long unlocked = 0, locked = 0;
@ -728,7 +756,7 @@ static long vfio_unpin_pages_remote(struct vfio_dma *dma, dma_addr_t iova,
static int vfio_pin_page_external(struct vfio_dma *dma, unsigned long vaddr,
unsigned long *pfn_base, bool do_accounting)
{
struct page *pages[1];
struct vfio_batch batch;
struct mm_struct *mm;
int ret;
@ -736,7 +764,9 @@ static int vfio_pin_page_external(struct vfio_dma *dma, unsigned long vaddr,
if (!mmget_not_zero(mm))
return -ENODEV;
ret = vaddr_get_pfns(mm, vaddr, 1, dma->prot, pfn_base, pages);
vfio_batch_init_single(&batch);
ret = vaddr_get_pfns(mm, vaddr, 1, dma->prot, pfn_base, &batch);
if (ret != 1)
goto out;
@ -755,6 +785,7 @@ static int vfio_pin_page_external(struct vfio_dma *dma, unsigned long vaddr,
}
out:
vfio_batch_fini(&batch);
mmput(mm);
return ret;
}

View File

@ -2495,11 +2495,13 @@ struct follow_pfnmap_args {
* Outputs:
*
* @pfn: the PFN of the address
* @addr_mask: address mask covering pfn
* @pgprot: the pgprot_t of the mapping
* @writable: whether the mapping is writable
* @special: whether the mapping is a special mapping (real PFN maps)
*/
unsigned long pfn;
unsigned long addr_mask;
pgprot_t pgprot;
bool writable;
bool special;

View File

@ -6670,6 +6670,7 @@ static inline void pfnmap_args_setup(struct follow_pfnmap_args *args,
args->lock = lock;
args->ptep = ptep;
args->pfn = pfn_base + ((args->address & ~addr_mask) >> PAGE_SHIFT);
args->addr_mask = addr_mask;
args->pgprot = pgprot;
args->writable = writable;
args->special = special;