From a39d13e291c2681e475d9fd41655764dab09be7b Mon Sep 17 00:00:00 2001 From: Liming Wu Date: Thu, 31 Jul 2025 17:27:57 +0800 Subject: [PATCH 1/6] virtio_pci: Fix misleading comment for queue vector This patch fixes misleading comments in both legacy and modern virtio-pci device implementations. The comments previously referred to the "config vector" for parameters and return values of the `vp_legacy_queue_vector()` and `vp_modern_queue_vector()` functions, which is incorrect. Signed-off-by: Liming Wu Message-Id: <20250731092757.1000-1-liming.wu@jaguarmicro.com> Signed-off-by: Michael S. Tsirkin --- drivers/virtio/virtio_pci_legacy_dev.c | 4 ++-- drivers/virtio/virtio_pci_modern_dev.c | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/virtio/virtio_pci_legacy_dev.c b/drivers/virtio/virtio_pci_legacy_dev.c index 677d1f68bc9b..bbbf89c22880 100644 --- a/drivers/virtio/virtio_pci_legacy_dev.c +++ b/drivers/virtio/virtio_pci_legacy_dev.c @@ -140,9 +140,9 @@ EXPORT_SYMBOL_GPL(vp_legacy_set_status); * vp_legacy_queue_vector - set the MSIX vector for a specific virtqueue * @ldev: the legacy virtio-pci device * @index: queue index - * @vector: the config vector + * @vector: the queue vector * - * Returns the config vector read from the device + * Returns the queue vector read from the device */ u16 vp_legacy_queue_vector(struct virtio_pci_legacy_device *ldev, u16 index, u16 vector) diff --git a/drivers/virtio/virtio_pci_modern_dev.c b/drivers/virtio/virtio_pci_modern_dev.c index d665f8f73ea8..9e503b7a58d8 100644 --- a/drivers/virtio/virtio_pci_modern_dev.c +++ b/drivers/virtio/virtio_pci_modern_dev.c @@ -546,9 +546,9 @@ EXPORT_SYMBOL_GPL(vp_modern_set_queue_reset); * vp_modern_queue_vector - set the MSIX vector for a specific virtqueue * @mdev: the modern virtio-pci device * @index: queue index - * @vector: the config vector + * @vector: the queue vector * - * Returns the config vector read from the device + * Returns the queue vector read from the device */ u16 vp_modern_queue_vector(struct virtio_pci_modern_device *mdev, u16 index, u16 vector) From dd54bcf86c91a4455b1f95cbc8e9ac91205f3193 Mon Sep 17 00:00:00 2001 From: Nikolay Kuratov Date: Tue, 5 Aug 2025 16:09:17 +0300 Subject: [PATCH 2/6] vhost/net: Protect ubufs with rcu read lock in vhost_net_ubuf_put() When operating on struct vhost_net_ubuf_ref, the following execution sequence is theoretically possible: CPU0 is finalizing DMA operation CPU1 is doing VHOST_NET_SET_BACKEND // ubufs->refcount == 2 vhost_net_ubuf_put() vhost_net_ubuf_put_wait_and_free(oldubufs) vhost_net_ubuf_put_and_wait() vhost_net_ubuf_put() int r = atomic_sub_return(1, &ubufs->refcount); // r = 1 int r = atomic_sub_return(1, &ubufs->refcount); // r = 0 wait_event(ubufs->wait, !atomic_read(&ubufs->refcount)); // no wait occurs here because condition is already true kfree(ubufs); if (unlikely(!r)) wake_up(&ubufs->wait); // use-after-free This leads to use-after-free on ubufs access. This happens because CPU1 skips waiting for wake_up() when refcount is already zero. To prevent that use a read-side RCU critical section in vhost_net_ubuf_put(), as suggested by Hillf Danton. For this lock to take effect, free ubufs with kfree_rcu(). Cc: stable@vger.kernel.org Fixes: 0ad8b480d6ee9 ("vhost: fix ref cnt checking deadlock") Reported-by: Andrey Ryabinin Suggested-by: Hillf Danton Signed-off-by: Nikolay Kuratov Message-Id: <20250805130917.727332-1-kniv@yandex-team.ru> Signed-off-by: Michael S. Tsirkin --- drivers/vhost/net.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c index 6edac0c1ba9b..c6508fe0d5c8 100644 --- a/drivers/vhost/net.c +++ b/drivers/vhost/net.c @@ -99,6 +99,7 @@ struct vhost_net_ubuf_ref { atomic_t refcount; wait_queue_head_t wait; struct vhost_virtqueue *vq; + struct rcu_head rcu; }; #define VHOST_NET_BATCH 64 @@ -250,9 +251,13 @@ vhost_net_ubuf_alloc(struct vhost_virtqueue *vq, bool zcopy) static int vhost_net_ubuf_put(struct vhost_net_ubuf_ref *ubufs) { - int r = atomic_sub_return(1, &ubufs->refcount); + int r; + + rcu_read_lock(); + r = atomic_sub_return(1, &ubufs->refcount); if (unlikely(!r)) wake_up(&ubufs->wait); + rcu_read_unlock(); return r; } @@ -265,7 +270,7 @@ static void vhost_net_ubuf_put_and_wait(struct vhost_net_ubuf_ref *ubufs) static void vhost_net_ubuf_put_wait_and_free(struct vhost_net_ubuf_ref *ubufs) { vhost_net_ubuf_put_and_wait(ubufs); - kfree(ubufs); + kfree_rcu(ubufs, rcu); } static void vhost_net_clear_ubuf_info(struct vhost_net *n) From ced17ee32a9988b8a260628e7c31a100d7dc082e Mon Sep 17 00:00:00 2001 From: Igor Torrente Date: Thu, 7 Aug 2025 09:41:45 -0300 Subject: [PATCH 3/6] Revert "virtio: reject shm region if length is zero" The commit 206cc44588f7 ("virtio: reject shm region if length is zero") breaks the Virtio-gpu `host_visible` feature. As you can see in the snippet below, host_visible_region is zero because of the `kzalloc`. It's using the `vm_get_shm_region` (drivers/virtio/virtio_mmio.c:536) to read the `addr` and `len` from qemu/crosvm. ``` drivers/gpu/drm/virtio/virtgpu_kms.c 132 vgdev = drmm_kzalloc(dev, sizeof(struct virtio_gpu_device), GFP_KERNEL); [...] 177 if (virtio_get_shm_region(vgdev->vdev, &vgdev->host_visible_region, 178 VIRTIO_GPU_SHM_ID_HOST_VISIBLE)) { ``` Now it always fails. To fix, revert the offending commit. Fixes: 206cc44588f7 ("virtio: reject shm region if length is zero") Signed-off-by: Igor Torrente Message-Id: <20250807124145.81816-1-igor.torrente@collabora.com> Signed-off-by: Michael S. Tsirkin --- include/linux/virtio_config.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/include/linux/virtio_config.h b/include/linux/virtio_config.h index 918cf25cd3c6..8bf156dde554 100644 --- a/include/linux/virtio_config.h +++ b/include/linux/virtio_config.h @@ -328,8 +328,6 @@ static inline bool virtio_get_shm_region(struct virtio_device *vdev, struct virtio_shm_region *region, u8 id) { - if (!region->len) - return false; if (!vdev->config->get_shm_region) return false; return vdev->config->get_shm_region(vdev, region, id); From 24fc631539cc78225f5c61f99c7666fcff48024d Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Mon, 18 Aug 2025 23:39:57 -0700 Subject: [PATCH 4/6] vhost: Fix ioctl # for VHOST_[GS]ET_FORK_FROM_OWNER MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The VHOST_[GS]ET_FEATURES_ARRAY ioctl already took 0x83 and it would result in a build error when the vhost uapi header is used for perf tool build like below. In file included from trace/beauty/ioctl.c:93: tools/perf/trace/beauty/generated/ioctl/vhost_virtio_ioctl_array.c: In function ‘ioctl__scnprintf_vhost_virtio_cmd’: tools/perf/trace/beauty/generated/ioctl/vhost_virtio_ioctl_array.c:36:18: error: initialized field overwritten [-Werror=override-init] 36 | [0x83] = "SET_FORK_FROM_OWNER", | ^~~~~~~~~~~~~~~~~~~~~ tools/perf/trace/beauty/generated/ioctl/vhost_virtio_ioctl_array.c:36:18: note: (near initialization for ‘vhost_virtio_ioctl_cmds[131]’) Fixes: 7d9896e9f6d02d8a ("vhost: Reintroduce kthread API and add mode selection") Signed-off-by: Namhyung Kim Message-Id: <20250819063958.833770-1-namhyung@kernel.org> Signed-off-by: Michael S. Tsirkin Tested-by: Lei Yang --- include/uapi/linux/vhost.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/uapi/linux/vhost.h b/include/uapi/linux/vhost.h index 283348b64af9..c57674a6aa0d 100644 --- a/include/uapi/linux/vhost.h +++ b/include/uapi/linux/vhost.h @@ -260,7 +260,7 @@ * When fork_owner is set to VHOST_FORK_OWNER_KTHREAD: * - Vhost will create vhost workers as kernel threads. */ -#define VHOST_SET_FORK_FROM_OWNER _IOW(VHOST_VIRTIO, 0x83, __u8) +#define VHOST_SET_FORK_FROM_OWNER _IOW(VHOST_VIRTIO, 0x84, __u8) /** * VHOST_GET_FORK_OWNER - Get the current fork_owner flag for the vhost device. @@ -268,6 +268,6 @@ * * @return: An 8-bit value indicating the current thread mode. */ -#define VHOST_GET_FORK_FROM_OWNER _IOR(VHOST_VIRTIO, 0x84, __u8) +#define VHOST_GET_FORK_FROM_OWNER _IOR(VHOST_VIRTIO, 0x85, __u8) #endif From 528d92bfc0937a6a1ec837dbbcb3612a8545cd37 Mon Sep 17 00:00:00 2001 From: Ying Gao Date: Tue, 12 Aug 2025 17:51:18 +0800 Subject: [PATCH 5/6] virtio_input: Improve freeze handling MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When executing suspend to ram, if lacking the operations to reset device and free unused buffers before deleting a vq, resource leaks and inconsistent device status will appear. According to chapter "3.3.1 Driver Requirements: Device Cleanup:" of virtio-specification: Driver MUST ensure a virtqueue isn’t live (by device reset) before removing exposed buffers. Therefore, modify the virtinput_freeze function to reset the device and delete the unused buffers before deleting the virtqueue, just like virtinput_remove does. Co-developed-by: Ying Xu Signed-off-by: Ying Xu Co-developed-by: Junnan Wu Signed-off-by: Junnan Wu Signed-off-by: Ying Gao Message-Id: <20250812095118.3622717-1-ying01.gao@samsung.com> Signed-off-by: Michael S. Tsirkin --- drivers/virtio/virtio_input.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/virtio/virtio_input.c b/drivers/virtio/virtio_input.c index a5d63269f20b..d0728285b6ce 100644 --- a/drivers/virtio/virtio_input.c +++ b/drivers/virtio/virtio_input.c @@ -360,11 +360,15 @@ static int virtinput_freeze(struct virtio_device *vdev) { struct virtio_input *vi = vdev->priv; unsigned long flags; + void *buf; spin_lock_irqsave(&vi->lock, flags); vi->ready = false; spin_unlock_irqrestore(&vi->lock, flags); + virtio_reset_device(vdev); + while ((buf = virtqueue_detach_unused_buf(vi->sts)) != NULL) + kfree(buf); vdev->config->del_vqs(vdev); return 0; } From 45d8ef6322b8a828d3b1e2cfb8893e2ff882cb23 Mon Sep 17 00:00:00 2001 From: Junnan Wu Date: Tue, 12 Aug 2025 17:08:17 +0800 Subject: [PATCH 6/6] virtio_net: adjust the execution order of function `virtnet_close` during freeze "Use after free" issue appears in suspend once race occurs when napi poll scheduls after `netif_device_detach` and before napi disables. For details, during suspend flow of virtio-net, the tx queue state is set to "__QUEUE_STATE_DRV_XOFF" by CPU-A. And at some coincidental times, if a TCP connection is still working, CPU-B does `virtnet_poll` before napi disable. In this flow, the state "__QUEUE_STATE_DRV_XOFF" of tx queue will be cleared. This is not the normal process it expects. After that, CPU-A continues to close driver then virtqueue is removed. Sequence likes below: -------------------------------------------------------------------------- CPU-A CPU-B ----- ----- suspend is called A TCP based on virtio-net still work virtnet_freeze |- virtnet_freeze_down | |- netif_device_detach | | |- netif_tx_stop_all_queues | | |- netif_tx_stop_queue | | |- set_bit | | (__QUEUE_STATE_DRV_XOFF,...) | | softirq rasied | | |- net_rx_action | | |- napi_poll | | |- virtnet_poll | | |- virtnet_poll_cleantx | | |- netif_tx_wake_queue | | |- test_and_clear_bit | | (__QUEUE_STATE_DRV_XOFF,...) | |- virtnet_close | |- virtnet_disable_queue_pair | |- virtnet_napi_tx_disable |- remove_vq_common -------------------------------------------------------------------------- When TCP delayack timer is up, a cpu gets softirq and irq handler `tcp_delack_timer_handler` will be called, which will finally call `start_xmit` in virtio net driver. Then the access to tx virtq will cause panic. The root cause of this issue is that napi tx is not disable before `netif_tx_stop_queue`, once `virnet_poll` schedules in such coincidental time, the tx queue state will be cleared. To solve this issue, adjusts the order of function `virtnet_close` in `virtnet_freeze_down`. Co-developed-by: Ying Xu Signed-off-by: Ying Xu Signed-off-by: Junnan Wu Message-Id: <20250812090817.3463403-1-junnan01.wu@samsung.com> Signed-off-by: Michael S. Tsirkin --- drivers/net/virtio_net.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index d14e6d602273..975bdc5dab84 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -5758,14 +5758,15 @@ static void virtnet_freeze_down(struct virtio_device *vdev) disable_rx_mode_work(vi); flush_work(&vi->rx_mode_work); - netif_tx_lock_bh(vi->dev); - netif_device_detach(vi->dev); - netif_tx_unlock_bh(vi->dev); if (netif_running(vi->dev)) { rtnl_lock(); virtnet_close(vi->dev); rtnl_unlock(); } + + netif_tx_lock_bh(vi->dev); + netif_device_detach(vi->dev); + netif_tx_unlock_bh(vi->dev); } static int init_vqs(struct virtnet_info *vi);