mirror of
https://github.com/torvalds/linux.git
synced 2026-05-25 15:41:52 +02:00
vfio/pci: Use RCU for error/request triggers to avoid circular locking
Thanks to a device generating an ACS violation during bus reset,
lockdep reported the following circular locking issue:
CPU0: SET_IRQS (MSI/X): holds igate, acquires memory_lock
CPU1: HOT_RESET: holds memory_lock, acquires pci_bus_sem
CPU2: AER: holds pci_bus_sem, acquires igate
This results in a potential 3-way deadlock.
Remove the pci_bus_sem->igate leg of the triangle by using RCU
to peek at the eventfd rather than locking it with igate.
Fixes: 3be3a074cf ("vfio-pci: Don't use device_lock around AER interrupt setup")
Signed-off-by: Alex Williamson <alex.williamson@nvidia.com>
Reviewed-by: Jason Gunthorpe <jgg@nvidia.com>
Link: https://lore.kernel.org/r/20251124223623.2770706-1-alex@shazbot.org
Signed-off-by: Alex Williamson <alex@shazbot.org>
This commit is contained in:
parent
fa804aa4ac
commit
98693e0897
|
|
@ -42,6 +42,40 @@ static bool nointxmask;
|
|||
static bool disable_vga;
|
||||
static bool disable_idle_d3;
|
||||
|
||||
static void vfio_pci_eventfd_rcu_free(struct rcu_head *rcu)
|
||||
{
|
||||
struct vfio_pci_eventfd *eventfd =
|
||||
container_of(rcu, struct vfio_pci_eventfd, rcu);
|
||||
|
||||
eventfd_ctx_put(eventfd->ctx);
|
||||
kfree(eventfd);
|
||||
}
|
||||
|
||||
int vfio_pci_eventfd_replace_locked(struct vfio_pci_core_device *vdev,
|
||||
struct vfio_pci_eventfd __rcu **peventfd,
|
||||
struct eventfd_ctx *ctx)
|
||||
{
|
||||
struct vfio_pci_eventfd *new = NULL;
|
||||
struct vfio_pci_eventfd *old;
|
||||
|
||||
lockdep_assert_held(&vdev->igate);
|
||||
|
||||
if (ctx) {
|
||||
new = kzalloc(sizeof(*new), GFP_KERNEL_ACCOUNT);
|
||||
if (!new)
|
||||
return -ENOMEM;
|
||||
|
||||
new->ctx = ctx;
|
||||
}
|
||||
|
||||
old = rcu_replace_pointer(*peventfd, new,
|
||||
lockdep_is_held(&vdev->igate));
|
||||
if (old)
|
||||
call_rcu(&old->rcu, vfio_pci_eventfd_rcu_free);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* List of PF's that vfio_pci_core_sriov_configure() has been called on */
|
||||
static DEFINE_MUTEX(vfio_pci_sriov_pfs_mutex);
|
||||
static LIST_HEAD(vfio_pci_sriov_pfs);
|
||||
|
|
@ -697,14 +731,8 @@ void vfio_pci_core_close_device(struct vfio_device *core_vdev)
|
|||
vfio_pci_dma_buf_cleanup(vdev);
|
||||
|
||||
mutex_lock(&vdev->igate);
|
||||
if (vdev->err_trigger) {
|
||||
eventfd_ctx_put(vdev->err_trigger);
|
||||
vdev->err_trigger = NULL;
|
||||
}
|
||||
if (vdev->req_trigger) {
|
||||
eventfd_ctx_put(vdev->req_trigger);
|
||||
vdev->req_trigger = NULL;
|
||||
}
|
||||
vfio_pci_eventfd_replace_locked(vdev, &vdev->err_trigger, NULL);
|
||||
vfio_pci_eventfd_replace_locked(vdev, &vdev->req_trigger, NULL);
|
||||
mutex_unlock(&vdev->igate);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(vfio_pci_core_close_device);
|
||||
|
|
@ -1784,21 +1812,21 @@ void vfio_pci_core_request(struct vfio_device *core_vdev, unsigned int count)
|
|||
struct vfio_pci_core_device *vdev =
|
||||
container_of(core_vdev, struct vfio_pci_core_device, vdev);
|
||||
struct pci_dev *pdev = vdev->pdev;
|
||||
struct vfio_pci_eventfd *eventfd;
|
||||
|
||||
mutex_lock(&vdev->igate);
|
||||
|
||||
if (vdev->req_trigger) {
|
||||
rcu_read_lock();
|
||||
eventfd = rcu_dereference(vdev->req_trigger);
|
||||
if (eventfd) {
|
||||
if (!(count % 10))
|
||||
pci_notice_ratelimited(pdev,
|
||||
"Relaying device request to user (#%u)\n",
|
||||
count);
|
||||
eventfd_signal(vdev->req_trigger);
|
||||
eventfd_signal(eventfd->ctx);
|
||||
} else if (count == 0) {
|
||||
pci_warn(pdev,
|
||||
"No device request channel registered, blocked until released by user\n");
|
||||
}
|
||||
|
||||
mutex_unlock(&vdev->igate);
|
||||
rcu_read_unlock();
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(vfio_pci_core_request);
|
||||
|
||||
|
|
@ -2216,13 +2244,13 @@ pci_ers_result_t vfio_pci_core_aer_err_detected(struct pci_dev *pdev,
|
|||
pci_channel_state_t state)
|
||||
{
|
||||
struct vfio_pci_core_device *vdev = dev_get_drvdata(&pdev->dev);
|
||||
struct vfio_pci_eventfd *eventfd;
|
||||
|
||||
mutex_lock(&vdev->igate);
|
||||
|
||||
if (vdev->err_trigger)
|
||||
eventfd_signal(vdev->err_trigger);
|
||||
|
||||
mutex_unlock(&vdev->igate);
|
||||
rcu_read_lock();
|
||||
eventfd = rcu_dereference(vdev->err_trigger);
|
||||
if (eventfd)
|
||||
eventfd_signal(eventfd->ctx);
|
||||
rcu_read_unlock();
|
||||
|
||||
return PCI_ERS_RESULT_CAN_RECOVER;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -731,21 +731,27 @@ static int vfio_pci_set_msi_trigger(struct vfio_pci_core_device *vdev,
|
|||
return 0;
|
||||
}
|
||||
|
||||
static int vfio_pci_set_ctx_trigger_single(struct eventfd_ctx **ctx,
|
||||
static int vfio_pci_set_ctx_trigger_single(struct vfio_pci_core_device *vdev,
|
||||
struct vfio_pci_eventfd __rcu **peventfd,
|
||||
unsigned int count, uint32_t flags,
|
||||
void *data)
|
||||
{
|
||||
/* DATA_NONE/DATA_BOOL enables loopback testing */
|
||||
if (flags & VFIO_IRQ_SET_DATA_NONE) {
|
||||
if (*ctx) {
|
||||
if (count) {
|
||||
eventfd_signal(*ctx);
|
||||
} else {
|
||||
eventfd_ctx_put(*ctx);
|
||||
*ctx = NULL;
|
||||
}
|
||||
struct vfio_pci_eventfd *eventfd;
|
||||
|
||||
eventfd = rcu_dereference_protected(*peventfd,
|
||||
lockdep_is_held(&vdev->igate));
|
||||
|
||||
if (!eventfd)
|
||||
return -EINVAL;
|
||||
|
||||
if (count) {
|
||||
eventfd_signal(eventfd->ctx);
|
||||
return 0;
|
||||
}
|
||||
|
||||
return vfio_pci_eventfd_replace_locked(vdev, peventfd, NULL);
|
||||
} else if (flags & VFIO_IRQ_SET_DATA_BOOL) {
|
||||
uint8_t trigger;
|
||||
|
||||
|
|
@ -753,8 +759,15 @@ static int vfio_pci_set_ctx_trigger_single(struct eventfd_ctx **ctx,
|
|||
return -EINVAL;
|
||||
|
||||
trigger = *(uint8_t *)data;
|
||||
if (trigger && *ctx)
|
||||
eventfd_signal(*ctx);
|
||||
|
||||
if (trigger) {
|
||||
struct vfio_pci_eventfd *eventfd =
|
||||
rcu_dereference_protected(*peventfd,
|
||||
lockdep_is_held(&vdev->igate));
|
||||
|
||||
if (eventfd)
|
||||
eventfd_signal(eventfd->ctx);
|
||||
}
|
||||
|
||||
return 0;
|
||||
} else if (flags & VFIO_IRQ_SET_DATA_EVENTFD) {
|
||||
|
|
@ -765,22 +778,23 @@ static int vfio_pci_set_ctx_trigger_single(struct eventfd_ctx **ctx,
|
|||
|
||||
fd = *(int32_t *)data;
|
||||
if (fd == -1) {
|
||||
if (*ctx)
|
||||
eventfd_ctx_put(*ctx);
|
||||
*ctx = NULL;
|
||||
return vfio_pci_eventfd_replace_locked(vdev,
|
||||
peventfd, NULL);
|
||||
} else if (fd >= 0) {
|
||||
struct eventfd_ctx *efdctx;
|
||||
int ret;
|
||||
|
||||
efdctx = eventfd_ctx_fdget(fd);
|
||||
if (IS_ERR(efdctx))
|
||||
return PTR_ERR(efdctx);
|
||||
|
||||
if (*ctx)
|
||||
eventfd_ctx_put(*ctx);
|
||||
ret = vfio_pci_eventfd_replace_locked(vdev,
|
||||
peventfd, efdctx);
|
||||
if (ret)
|
||||
eventfd_ctx_put(efdctx);
|
||||
|
||||
*ctx = efdctx;
|
||||
return ret;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
return -EINVAL;
|
||||
|
|
@ -793,7 +807,7 @@ static int vfio_pci_set_err_trigger(struct vfio_pci_core_device *vdev,
|
|||
if (index != VFIO_PCI_ERR_IRQ_INDEX || start != 0 || count > 1)
|
||||
return -EINVAL;
|
||||
|
||||
return vfio_pci_set_ctx_trigger_single(&vdev->err_trigger,
|
||||
return vfio_pci_set_ctx_trigger_single(vdev, &vdev->err_trigger,
|
||||
count, flags, data);
|
||||
}
|
||||
|
||||
|
|
@ -804,7 +818,7 @@ static int vfio_pci_set_req_trigger(struct vfio_pci_core_device *vdev,
|
|||
if (index != VFIO_PCI_REQ_IRQ_INDEX || start != 0 || count > 1)
|
||||
return -EINVAL;
|
||||
|
||||
return vfio_pci_set_ctx_trigger_single(&vdev->req_trigger,
|
||||
return vfio_pci_set_ctx_trigger_single(vdev, &vdev->req_trigger,
|
||||
count, flags, data);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -26,6 +26,10 @@ struct vfio_pci_ioeventfd {
|
|||
bool vfio_pci_intx_mask(struct vfio_pci_core_device *vdev);
|
||||
void vfio_pci_intx_unmask(struct vfio_pci_core_device *vdev);
|
||||
|
||||
int vfio_pci_eventfd_replace_locked(struct vfio_pci_core_device *vdev,
|
||||
struct vfio_pci_eventfd __rcu **peventfd,
|
||||
struct eventfd_ctx *ctx);
|
||||
|
||||
int vfio_pci_set_irqs_ioctl(struct vfio_pci_core_device *vdev, uint32_t flags,
|
||||
unsigned index, unsigned start, unsigned count,
|
||||
void *data);
|
||||
|
|
|
|||
|
|
@ -12,6 +12,7 @@
|
|||
#include <linux/pci.h>
|
||||
#include <linux/vfio.h>
|
||||
#include <linux/irqbypass.h>
|
||||
#include <linux/rcupdate.h>
|
||||
#include <linux/types.h>
|
||||
#include <linux/uuid.h>
|
||||
#include <linux/notifier.h>
|
||||
|
|
@ -29,6 +30,11 @@ struct vfio_pci_region;
|
|||
struct p2pdma_provider;
|
||||
struct dma_buf_phys_vec;
|
||||
|
||||
struct vfio_pci_eventfd {
|
||||
struct eventfd_ctx *ctx;
|
||||
struct rcu_head rcu;
|
||||
};
|
||||
|
||||
struct vfio_pci_regops {
|
||||
ssize_t (*rw)(struct vfio_pci_core_device *vdev, char __user *buf,
|
||||
size_t count, loff_t *ppos, bool iswrite);
|
||||
|
|
@ -124,8 +130,8 @@ struct vfio_pci_core_device {
|
|||
struct pci_saved_state *pci_saved_state;
|
||||
struct pci_saved_state *pm_save;
|
||||
int ioeventfds_nr;
|
||||
struct eventfd_ctx *err_trigger;
|
||||
struct eventfd_ctx *req_trigger;
|
||||
struct vfio_pci_eventfd __rcu *err_trigger;
|
||||
struct vfio_pci_eventfd __rcu *req_trigger;
|
||||
struct eventfd_ctx *pm_wake_eventfd_ctx;
|
||||
struct list_head dummy_resources_list;
|
||||
struct mutex ioeventfds_lock;
|
||||
|
|
|
|||
Loading…
Reference in New Issue
Block a user