From b7efeb081ed3b80c6af38c285f5c3b5d15c560e9 Mon Sep 17 00:00:00 2001 From: Peng Jiang Date: Fri, 20 Jun 2025 08:41:04 +0800 Subject: [PATCH 1/6] xen/xenbus: fix W=1 build warning in xenbus_va_dev_error function This patch fixes a W=1 format-string warning reported by GCC 12.3.0 by annotating xenbus_switch_fatal() and xenbus_va_dev_error() with the __printf attribute. The attribute enables compile-time validation of printf-style format strings in these functions. The original warning trace: drivers/xen/xenbus/xenbus_client.c:304:9: warning: function 'xenbus_va_dev_error' might be a candidate for 'gnu_printf' format attribute [-Wsuggest-attribute=format] Signed-off-by: Peng Jiang Reviewed-by: Juergen Gross Message-ID: <20250620084104786r5xoR16_AmYZMJLnno3_Q@zte.com.cn> Signed-off-by: Juergen Gross --- drivers/xen/xenbus/xenbus_client.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/xen/xenbus/xenbus_client.c b/drivers/xen/xenbus/xenbus_client.c index 51b3124b0d56..e73ec225d4a6 100644 --- a/drivers/xen/xenbus/xenbus_client.c +++ b/drivers/xen/xenbus/xenbus_client.c @@ -202,6 +202,7 @@ int xenbus_watch_pathfmt(struct xenbus_device *dev, } EXPORT_SYMBOL_GPL(xenbus_watch_pathfmt); +__printf(4, 5) static void xenbus_switch_fatal(struct xenbus_device *, int, int, const char *, ...); @@ -287,6 +288,7 @@ int xenbus_frontend_closed(struct xenbus_device *dev) } EXPORT_SYMBOL_GPL(xenbus_frontend_closed); +__printf(3, 0) static void xenbus_va_dev_error(struct xenbus_device *dev, int err, const char *fmt, va_list ap) { From c79626899ddb613a1119dd2e8176bdcb26cd9100 Mon Sep 17 00:00:00 2001 From: Ryan Chung Date: Tue, 8 Jul 2025 09:14:40 +0900 Subject: [PATCH 2/6] xen-pciback: Replace scnprintf() with sysfs_emit_at() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This is the third revision (v3) of this patch series. No changes since v2—only adding Reviewed-by lines. Reviewed-by: Juergen Gross Signed-off-by: Ryan Chung Signed-off-by: Juergen Gross Message-ID: <20250708001444.86155-1-seokwoo.chung130@gmail.com> --- drivers/xen/xen-pciback/pci_stub.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/xen/xen-pciback/pci_stub.c b/drivers/xen/xen-pciback/pci_stub.c index 5c2f829d5b0b..045e74847fe6 100644 --- a/drivers/xen/xen-pciback/pci_stub.c +++ b/drivers/xen/xen-pciback/pci_stub.c @@ -1261,7 +1261,7 @@ static ssize_t slots_show(struct device_driver *drv, char *buf) if (count >= PAGE_SIZE) break; - count += scnprintf(buf + count, PAGE_SIZE - count, + count += sysfs_emit_at(buf, count, "%04x:%02x:%02x.%d\n", pci_dev_id->domain, pci_dev_id->bus, PCI_SLOT(pci_dev_id->devfn), @@ -1290,7 +1290,7 @@ static ssize_t irq_handlers_show(struct device_driver *drv, char *buf) if (!dev_data) continue; count += - scnprintf(buf + count, PAGE_SIZE - count, + sysfs_emit_at(buf, count, "%s:%s:%sing:%ld\n", pci_name(psdev->dev), dev_data->isr_on ? "on" : "off", @@ -1375,7 +1375,7 @@ static ssize_t quirks_show(struct device_driver *drv, char *buf) if (count >= PAGE_SIZE) goto out; - count += scnprintf(buf + count, PAGE_SIZE - count, + count += sysfs_emit_at(buf, count, "%02x:%02x.%01x\n\t%04x:%04x:%04x:%04x\n", quirk->pdev->bus->number, PCI_SLOT(quirk->pdev->devfn), @@ -1391,7 +1391,7 @@ static ssize_t quirks_show(struct device_driver *drv, char *buf) if (count >= PAGE_SIZE) goto out; - count += scnprintf(buf + count, PAGE_SIZE - count, + count += sysfs_emit_at(buf, count, "\t\t%08x:%01x:%08x\n", cfg_entry->base_offset + field->offset, field->size, @@ -1462,7 +1462,7 @@ static ssize_t permissive_show(struct device_driver *drv, char *buf) if (!dev_data || !dev_data->permissive) continue; count += - scnprintf(buf + count, PAGE_SIZE - count, "%s\n", + sysfs_emit_at(buf, count, "%s\n", pci_name(psdev->dev)); } spin_unlock_irqrestore(&pcistub_devices_lock, flags); @@ -1521,7 +1521,7 @@ static ssize_t allow_interrupt_control_show(struct device_driver *drv, if (!dev_data || !dev_data->allow_interrupt_control) continue; count += - scnprintf(buf + count, PAGE_SIZE - count, "%s\n", + sysfs_emit_at(buf, count, "%s\n", pci_name(psdev->dev)); } spin_unlock_irqrestore(&pcistub_devices_lock, flags); From 0df11950099887520cc8bf22a790a5535be30e8d Mon Sep 17 00:00:00 2001 From: "Dr. David Alan Gilbert" Date: Sun, 13 Jul 2025 14:26:25 +0100 Subject: [PATCH 3/6] xen: Remove some deadcode (x) Remove three uncalled functions: xenbus_mkdir() was added in 2007 by commit 4bac07c993d0 ("xen: add the Xenbus sysfs and virtual device hotplug driver") but has remained unused. xen_get_runstate_snapshot() last use was removed in 2016 by commit 6ba286ad8457 ("xen: support runqueue steal time on xen") which replaces the use by the _cpu version. xen_resume_notifier_unregister() last use was removed in 2017 by commit 1914f0cd203c ("xen/acpi: upload PM state from init-domain to Xen") Remove them. Signed-off-by: "Dr. David Alan Gilbert" Reviewed-by: Juergen Gross Signed-off-by: Juergen Gross Message-ID: <20250713132625.164728-1-linux@treblig.org> --- drivers/xen/manage.c | 6 ------ drivers/xen/time.c | 8 -------- drivers/xen/xenbus/xenbus_xs.c | 17 ----------------- include/xen/xen-ops.h | 2 -- include/xen/xenbus.h | 2 -- 5 files changed, 35 deletions(-) diff --git a/drivers/xen/manage.c b/drivers/xen/manage.c index b4b4ebed68da..841afa4933c7 100644 --- a/drivers/xen/manage.c +++ b/drivers/xen/manage.c @@ -52,12 +52,6 @@ void xen_resume_notifier_register(struct notifier_block *nb) } EXPORT_SYMBOL_GPL(xen_resume_notifier_register); -void xen_resume_notifier_unregister(struct notifier_block *nb) -{ - raw_notifier_chain_unregister(&xen_resume_notifier, nb); -} -EXPORT_SYMBOL_GPL(xen_resume_notifier_unregister); - #ifdef CONFIG_HIBERNATE_CALLBACKS static int xen_suspend(void *data) { diff --git a/drivers/xen/time.c b/drivers/xen/time.c index 152dd33bb223..5683383d2305 100644 --- a/drivers/xen/time.c +++ b/drivers/xen/time.c @@ -136,14 +136,6 @@ void xen_manage_runstate_time(int action) } } -/* - * Runstate accounting - */ -void xen_get_runstate_snapshot(struct vcpu_runstate_info *res) -{ - xen_get_runstate_snapshot_cpu(res, smp_processor_id()); -} - /* return true when a vcpu could run but has no real cpu to run on */ bool xen_vcpu_stolen(int vcpu) { diff --git a/drivers/xen/xenbus/xenbus_xs.c b/drivers/xen/xenbus/xenbus_xs.c index dcf9182c8451..3c9da446b85d 100644 --- a/drivers/xen/xenbus/xenbus_xs.c +++ b/drivers/xen/xenbus/xenbus_xs.c @@ -512,23 +512,6 @@ int xenbus_write(struct xenbus_transaction t, } EXPORT_SYMBOL_GPL(xenbus_write); -/* Create a new directory. */ -int xenbus_mkdir(struct xenbus_transaction t, - const char *dir, const char *node) -{ - char *path; - int ret; - - path = join(dir, node); - if (IS_ERR(path)) - return PTR_ERR(path); - - ret = xs_error(xs_single(t, XS_MKDIR, path, NULL)); - kfree(path); - return ret; -} -EXPORT_SYMBOL_GPL(xenbus_mkdir); - /* Destroy a file or directory (directories must be empty). */ int xenbus_rm(struct xenbus_transaction t, const char *dir, const char *node) { diff --git a/include/xen/xen-ops.h b/include/xen/xen-ops.h index 47f11bec5e90..9e2a769b0d96 100644 --- a/include/xen/xen-ops.h +++ b/include/xen/xen-ops.h @@ -30,13 +30,11 @@ void xen_arch_suspend(void); void xen_reboot(int reason); void xen_resume_notifier_register(struct notifier_block *nb); -void xen_resume_notifier_unregister(struct notifier_block *nb); bool xen_vcpu_stolen(int vcpu); void xen_setup_runstate_info(int cpu); void xen_time_setup_guest(void); void xen_manage_runstate_time(int action); -void xen_get_runstate_snapshot(struct vcpu_runstate_info *res); u64 xen_steal_clock(int cpu); int xen_setup_shutdown_event(void); diff --git a/include/xen/xenbus.h b/include/xen/xenbus.h index 3f90bdd387b6..1c23e6387f13 100644 --- a/include/xen/xenbus.h +++ b/include/xen/xenbus.h @@ -154,8 +154,6 @@ void *xenbus_read(struct xenbus_transaction t, const char *dir, const char *node, unsigned int *len); int xenbus_write(struct xenbus_transaction t, const char *dir, const char *node, const char *string); -int xenbus_mkdir(struct xenbus_transaction t, - const char *dir, const char *node); int xenbus_exists(struct xenbus_transaction t, const char *dir, const char *node); int xenbus_rm(struct xenbus_transaction t, const char *dir, const char *node); From 532c8b51b3a8676cbf533a291f8156774f30ea87 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 12 Jul 2025 06:09:16 +0100 Subject: [PATCH 4/6] xen: fix UAF in dmabuf_exp_from_pages() [dma_buf_fd() fixes; no preferences regarding the tree it goes through - up to xen folks] As soon as we'd inserted a file reference into descriptor table, another thread could close it. That's fine for the case when all we are doing is returning that descriptor to userland (it's a race, but it's a userland race and there's nothing the kernel can do about it). However, if we follow fd_install() with any kind of access to objects that would be destroyed on close (be it the struct file itself or anything destroyed by its ->release()), we have a UAF. dma_buf_fd() is a combination of reserving a descriptor and fd_install(). gntdev dmabuf_exp_from_pages() calls it and then proceeds to access the objects destroyed on close - starting with gntdev_dmabuf itself. Fix that by doing reserving descriptor before anything else and do fd_install() only when everything had been set up. Fixes: a240d6e42e28 ("xen/gntdev: Implement dma-buf export functionality") Signed-off-by: Al Viro Acked-by: Juergen Gross Message-ID: <20250712050916.GY1880847@ZenIV> Signed-off-by: Juergen Gross --- drivers/xen/gntdev-dmabuf.c | 28 ++++++++++------------------ 1 file changed, 10 insertions(+), 18 deletions(-) diff --git a/drivers/xen/gntdev-dmabuf.c b/drivers/xen/gntdev-dmabuf.c index 5453d86324f6..82855105ab85 100644 --- a/drivers/xen/gntdev-dmabuf.c +++ b/drivers/xen/gntdev-dmabuf.c @@ -357,8 +357,11 @@ struct gntdev_dmabuf_export_args { static int dmabuf_exp_from_pages(struct gntdev_dmabuf_export_args *args) { DEFINE_DMA_BUF_EXPORT_INFO(exp_info); - struct gntdev_dmabuf *gntdev_dmabuf; - int ret; + struct gntdev_dmabuf *gntdev_dmabuf __free(kfree) = NULL; + CLASS(get_unused_fd, ret)(O_CLOEXEC); + + if (ret < 0) + return ret; gntdev_dmabuf = kzalloc(sizeof(*gntdev_dmabuf), GFP_KERNEL); if (!gntdev_dmabuf) @@ -383,32 +386,21 @@ static int dmabuf_exp_from_pages(struct gntdev_dmabuf_export_args *args) exp_info.priv = gntdev_dmabuf; gntdev_dmabuf->dmabuf = dma_buf_export(&exp_info); - if (IS_ERR(gntdev_dmabuf->dmabuf)) { - ret = PTR_ERR(gntdev_dmabuf->dmabuf); - gntdev_dmabuf->dmabuf = NULL; - goto fail; - } - - ret = dma_buf_fd(gntdev_dmabuf->dmabuf, O_CLOEXEC); - if (ret < 0) - goto fail; + if (IS_ERR(gntdev_dmabuf->dmabuf)) + return PTR_ERR(gntdev_dmabuf->dmabuf); gntdev_dmabuf->fd = ret; args->fd = ret; pr_debug("Exporting DMA buffer with fd %d\n", ret); + get_file(gntdev_dmabuf->priv->filp); mutex_lock(&args->dmabuf_priv->lock); list_add(&gntdev_dmabuf->next, &args->dmabuf_priv->exp_list); mutex_unlock(&args->dmabuf_priv->lock); - get_file(gntdev_dmabuf->priv->filp); - return 0; -fail: - if (gntdev_dmabuf->dmabuf) - dma_buf_put(gntdev_dmabuf->dmabuf); - kfree(gntdev_dmabuf); - return ret; + fd_install(take_fd(ret), no_free_ptr(gntdev_dmabuf)->dmabuf->file); + return 0; } static struct gntdev_grant_map * From 70045cf6593cbf0740956ea9b7b4269142c6ee38 Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Thu, 3 Jul 2025 09:32:59 +0200 Subject: [PATCH 5/6] xen/gntdev: remove struct gntdev_copy_batch from stack When compiling the kernel with LLVM, the following warning was issued: drivers/xen/gntdev.c:991: warning: stack frame size (1160) exceeds limit (1024) in function 'gntdev_ioctl' The main reason is struct gntdev_copy_batch which is located on the stack and has a size of nearly 1kb. For performance reasons it shouldn't by just dynamically allocated instead, so allocate a new instance when needed and instead of freeing it put it into a list of free structs anchored in struct gntdev_priv. Fixes: a4cdb556cae0 ("xen/gntdev: add ioctl for grant copy") Reported-by: Abinash Singh Reviewed-by: Stefano Stabellini Signed-off-by: Juergen Gross Message-ID: <20250703073259.17356-1-jgross@suse.com> --- drivers/xen/gntdev-common.h | 4 +++ drivers/xen/gntdev.c | 71 ++++++++++++++++++++++++++----------- 2 files changed, 54 insertions(+), 21 deletions(-) diff --git a/drivers/xen/gntdev-common.h b/drivers/xen/gntdev-common.h index 9c286b2a1900..ac8ce3179ba2 100644 --- a/drivers/xen/gntdev-common.h +++ b/drivers/xen/gntdev-common.h @@ -26,6 +26,10 @@ struct gntdev_priv { /* lock protects maps and freeable_maps. */ struct mutex lock; + /* Free instances of struct gntdev_copy_batch. */ + struct gntdev_copy_batch *batch; + struct mutex batch_lock; + #ifdef CONFIG_XEN_GRANT_DMA_ALLOC /* Device for which DMA memory is allocated. */ struct device *dma_dev; diff --git a/drivers/xen/gntdev.c b/drivers/xen/gntdev.c index 61faea1f0663..1f2160765618 100644 --- a/drivers/xen/gntdev.c +++ b/drivers/xen/gntdev.c @@ -56,6 +56,18 @@ MODULE_AUTHOR("Derek G. Murray , " "Gerd Hoffmann "); MODULE_DESCRIPTION("User-space granted page access driver"); +#define GNTDEV_COPY_BATCH 16 + +struct gntdev_copy_batch { + struct gnttab_copy ops[GNTDEV_COPY_BATCH]; + struct page *pages[GNTDEV_COPY_BATCH]; + s16 __user *status[GNTDEV_COPY_BATCH]; + unsigned int nr_ops; + unsigned int nr_pages; + bool writeable; + struct gntdev_copy_batch *next; +}; + static unsigned int limit = 64*1024; module_param(limit, uint, 0644); MODULE_PARM_DESC(limit, @@ -584,6 +596,8 @@ static int gntdev_open(struct inode *inode, struct file *flip) INIT_LIST_HEAD(&priv->maps); mutex_init(&priv->lock); + mutex_init(&priv->batch_lock); + #ifdef CONFIG_XEN_GNTDEV_DMABUF priv->dmabuf_priv = gntdev_dmabuf_init(flip); if (IS_ERR(priv->dmabuf_priv)) { @@ -608,6 +622,7 @@ static int gntdev_release(struct inode *inode, struct file *flip) { struct gntdev_priv *priv = flip->private_data; struct gntdev_grant_map *map; + struct gntdev_copy_batch *batch; pr_debug("priv %p\n", priv); @@ -620,6 +635,14 @@ static int gntdev_release(struct inode *inode, struct file *flip) } mutex_unlock(&priv->lock); + mutex_lock(&priv->batch_lock); + while (priv->batch) { + batch = priv->batch; + priv->batch = batch->next; + kfree(batch); + } + mutex_unlock(&priv->batch_lock); + #ifdef CONFIG_XEN_GNTDEV_DMABUF gntdev_dmabuf_fini(priv->dmabuf_priv); #endif @@ -785,17 +808,6 @@ static long gntdev_ioctl_notify(struct gntdev_priv *priv, void __user *u) return rc; } -#define GNTDEV_COPY_BATCH 16 - -struct gntdev_copy_batch { - struct gnttab_copy ops[GNTDEV_COPY_BATCH]; - struct page *pages[GNTDEV_COPY_BATCH]; - s16 __user *status[GNTDEV_COPY_BATCH]; - unsigned int nr_ops; - unsigned int nr_pages; - bool writeable; -}; - static int gntdev_get_page(struct gntdev_copy_batch *batch, void __user *virt, unsigned long *gfn) { @@ -953,36 +965,53 @@ static int gntdev_grant_copy_seg(struct gntdev_copy_batch *batch, static long gntdev_ioctl_grant_copy(struct gntdev_priv *priv, void __user *u) { struct ioctl_gntdev_grant_copy copy; - struct gntdev_copy_batch batch; + struct gntdev_copy_batch *batch; unsigned int i; int ret = 0; if (copy_from_user(©, u, sizeof(copy))) return -EFAULT; - batch.nr_ops = 0; - batch.nr_pages = 0; + mutex_lock(&priv->batch_lock); + if (!priv->batch) { + batch = kmalloc(sizeof(*batch), GFP_KERNEL); + } else { + batch = priv->batch; + priv->batch = batch->next; + } + mutex_unlock(&priv->batch_lock); + if (!batch) + return -ENOMEM; + + batch->nr_ops = 0; + batch->nr_pages = 0; for (i = 0; i < copy.count; i++) { struct gntdev_grant_copy_segment seg; if (copy_from_user(&seg, ©.segments[i], sizeof(seg))) { ret = -EFAULT; + gntdev_put_pages(batch); goto out; } - ret = gntdev_grant_copy_seg(&batch, &seg, ©.segments[i].status); - if (ret < 0) + ret = gntdev_grant_copy_seg(batch, &seg, ©.segments[i].status); + if (ret < 0) { + gntdev_put_pages(batch); goto out; + } cond_resched(); } - if (batch.nr_ops) - ret = gntdev_copy(&batch); - return ret; + if (batch->nr_ops) + ret = gntdev_copy(batch); + + out: + mutex_lock(&priv->batch_lock); + batch->next = priv->batch; + priv->batch = batch; + mutex_unlock(&priv->batch_lock); - out: - gntdev_put_pages(&batch); return ret; } From 114a2de6fa86d99ed9546cc9113a3cad58beef79 Mon Sep 17 00:00:00 2001 From: Anthoine Bourgeois Date: Mon, 21 Jul 2025 09:34:54 +0000 Subject: [PATCH 6/6] xen/netfront: Fix TX response spurious interrupts We found at Vates that there are lot of spurious interrupts when benchmarking the xen-net PV driver frontend. This issue appeared with a patch that addresses security issue XSA-391 (b27d47950e48 "xen/netfront: harden netfront against event channel storms"). On an iperf benchmark, spurious interrupts can represent up to 50% of the interrupts. Spurious interrupts are interrupts that are rised for nothing, there is no work to do. This appends because the function that handles the interrupts ("xennet_tx_buf_gc") is also called at the end of the request path to garbage collect the responses received during the transmission load. The request path is doing the work that the interrupt handler should have done otherwise. This is particurary true when there is more than one vcpu and get worse linearly with the number of vcpu/queue. Moreover, this problem is amplifyed by the penalty imposed by a spurious interrupt. When an interrupt is found spurious the interrupt chip will delay the EOI to slowdown the backend. This delay will allow more responses to be handled by the request path and then there will be more chance the next interrupt will not find any work to do, creating a new spurious interrupt. This causes performance issue. The solution here is to remove the calls from the request path and let the interrupt handler do the processing of the responses. This approch removes most of the spurious interrupts (<0.05%) and also has the benefit of freeing up cycles in the request path, allowing it to process more work, which improves performance compared to masking the spurious interrupt one way or another. This optimization changes a part of the code that is present since the net frontend driver was upstreamed. There is no similar pattern in the other xen PV drivers. Since the first commit of xen-netfront is a blob that doesn't explain all the design choices I can only guess why this specific mecanism was here. This could have been introduce to compensate a slow backend at the time (maybe the backend was fixed or optimize later) or a small queue. In 18 years, both frontend and backend gain lot of features and optimizations that could have obsolete the feature of reaping completions from the TX path. Some vif throughput performance figures from a 8 vCPUs, 4GB of RAM HVM guest(s): Without this patch on the : vm -> dom0: 4.5Gb/s vm -> vm: 7.0Gb/s Without XSA-391 patch (revert of b27d47950e48): vm -> dom0: 8.3Gb/s vm -> vm: 8.7Gb/s With XSA-391 and this patch: vm -> dom0: 11.5Gb/s vm -> vm: 12.6Gb/s v2: - add revewed and tested by tags - resend with the maintainers in the recipients list v3: - remove Fixes tag but keep the commit ref in the explanation - add a paragraph on why this code was here Signed-off-by: Anthoine Bourgeois Reviewed-by: Juergen Gross Tested-by: Elliott Mitchell Signed-off-by: Juergen Gross Message-ID: <20250721093316.23560-1-anthoine.bourgeois@vates.tech> --- drivers/net/xen-netfront.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c index 9bac50963477..a11a0e949400 100644 --- a/drivers/net/xen-netfront.c +++ b/drivers/net/xen-netfront.c @@ -638,8 +638,6 @@ static int xennet_xdp_xmit_one(struct net_device *dev, tx_stats->packets++; u64_stats_update_end(&tx_stats->syncp); - xennet_tx_buf_gc(queue); - return 0; } @@ -849,9 +847,6 @@ static netdev_tx_t xennet_start_xmit(struct sk_buff *skb, struct net_device *dev tx_stats->packets++; u64_stats_update_end(&tx_stats->syncp); - /* Note: It is not safe to access skb after xennet_tx_buf_gc()! */ - xennet_tx_buf_gc(queue); - if (!netfront_tx_slot_available(queue)) netif_tx_stop_queue(netdev_get_tx_queue(dev, queue->id));