From 2e7b5cf72e51c9cf9c8b75190189c757df31ddd9 Mon Sep 17 00:00:00 2001 From: Joy Zou Date: Wed, 17 Sep 2025 17:53:42 +0800 Subject: [PATCH 001/401] dmaengine: fsl-edma: fix channel parameter config for fixed channel requests Configure only the requested channel when a fixed channel is specified to avoid modifying other channels unintentionally. Fix parameter configuration when a fixed DMA channel is requested on i.MX9 AON domain and i.MX8QM/QXP/DXL platforms. When a client requests a fixed channel (e.g., channel 6), the driver traverses channels 0-5 and may unintentionally modify their configuration if they are unused. This leads to issues such as setting the `is_multi_fifo` flag unexpectedly, causing memcpy tests to fail when using the dmatest tool. Only affect edma memcpy test when the channel is fixed. Fixes: 72f5801a4e2b ("dmaengine: fsl-edma: integrate v3 support") Signed-off-by: Joy Zou Cc: stable@vger.kernel.org Reviewed-by: Frank Li Link: https://patch.msgid.link/20250917-b4-edma-chanconf-v1-1-886486e02e91@nxp.com Signed-off-by: Vinod Koul --- drivers/dma/fsl-edma-main.c | 26 +++++++++++--------------- 1 file changed, 11 insertions(+), 15 deletions(-) diff --git a/drivers/dma/fsl-edma-main.c b/drivers/dma/fsl-edma-main.c index dbcdd1e68319..b596baa0a182 100644 --- a/drivers/dma/fsl-edma-main.c +++ b/drivers/dma/fsl-edma-main.c @@ -317,10 +317,8 @@ static struct dma_chan *fsl_edma3_xlate(struct of_phandle_args *dma_spec, return NULL; i = fsl_chan - fsl_edma->chans; - fsl_chan->priority = dma_spec->args[1]; - fsl_chan->is_rxchan = dma_spec->args[2] & FSL_EDMA_RX; - fsl_chan->is_remote = dma_spec->args[2] & FSL_EDMA_REMOTE; - fsl_chan->is_multi_fifo = dma_spec->args[2] & FSL_EDMA_MULTI_FIFO; + if (!b_chmux && i != dma_spec->args[0]) + continue; if ((dma_spec->args[2] & FSL_EDMA_EVEN_CH) && (i & 0x1)) continue; @@ -328,17 +326,15 @@ static struct dma_chan *fsl_edma3_xlate(struct of_phandle_args *dma_spec, if ((dma_spec->args[2] & FSL_EDMA_ODD_CH) && !(i & 0x1)) continue; - if (!b_chmux && i == dma_spec->args[0]) { - chan = dma_get_slave_channel(chan); - chan->device->privatecnt++; - return chan; - } else if (b_chmux && !fsl_chan->srcid) { - /* if controller support channel mux, choose a free channel */ - chan = dma_get_slave_channel(chan); - chan->device->privatecnt++; - fsl_chan->srcid = dma_spec->args[0]; - return chan; - } + fsl_chan->srcid = dma_spec->args[0]; + fsl_chan->priority = dma_spec->args[1]; + fsl_chan->is_rxchan = dma_spec->args[2] & FSL_EDMA_RX; + fsl_chan->is_remote = dma_spec->args[2] & FSL_EDMA_REMOTE; + fsl_chan->is_multi_fifo = dma_spec->args[2] & FSL_EDMA_MULTI_FIFO; + + chan = dma_get_slave_channel(chan); + chan->device->privatecnt++; + return chan; } return NULL; } From 77b19d053ac2cce9e873007ad4b09f2323c93576 Mon Sep 17 00:00:00 2001 From: Shenghui Shi Date: Mon, 9 Feb 2026 18:37:25 +0800 Subject: [PATCH 002/401] dmaengine: dw-edma: fix MSI data programming for multi-IRQ case When using MSI (not MSI-X) with multiple IRQs, the MSI data value must be unique per vector to ensure correct interrupt delivery. Currently, the driver fails to increment the MSI data per vector, causing interrupts to be misrouted. Fix this by caching the base MSI data and adjusting each vector's data accordingly during IRQ setup. Fixes: e63d79d1ff04 ("dmaengine: dw-edma: Add Synopsys DesignWare eDMA IP core driver") Signed-off-by: Shenghui Shi Reviewed-by: Frank Li Link: https://patch.msgid.link/20260209103726.414-1-brody.shi@m2semi.com Signed-off-by: Vinod Koul --- drivers/dma/dw-edma/dw-edma-core.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/dma/dw-edma/dw-edma-core.c b/drivers/dma/dw-edma/dw-edma-core.c index e7d698b352d3..5397dbda4f72 100644 --- a/drivers/dma/dw-edma/dw-edma-core.c +++ b/drivers/dma/dw-edma/dw-edma-core.c @@ -844,6 +844,7 @@ static int dw_edma_irq_request(struct dw_edma *dw, { struct dw_edma_chip *chip = dw->chip; struct device *dev = dw->chip->dev; + struct msi_desc *msi_desc; u32 wr_mask = 1; u32 rd_mask = 1; int i, err = 0; @@ -895,9 +896,12 @@ static int dw_edma_irq_request(struct dw_edma *dw, &dw->irq[i]); if (err) goto err_irq_free; - - if (irq_get_msi_desc(irq)) + msi_desc = irq_get_msi_desc(irq); + if (msi_desc) { get_cached_msi_msg(irq, &dw->irq[i].msi); + if (!msi_desc->pci.msi_attrib.is_msix) + dw->irq[i].msi.data = dw->irq[0].msi.data + i; + } } dw->nr_irqs = i; From caf91cdf2de8b7134749d32cd4ae5520b108abb7 Mon Sep 17 00:00:00 2001 From: Vinicius Costa Gomes Date: Wed, 21 Jan 2026 10:34:27 -0800 Subject: [PATCH 003/401] dmaengine: idxd: Fix lockdep warnings when calling idxd_device_config() Move the check for IDXD_FLAG_CONFIGURABLE and the locking to "inside" idxd_device_config(), as this is common to all callers, and the one that wasn't holding the lock was an error (that was causing the lockdep warning). Suggested-by: Dave Jiang Reviewed-by: Dave Jiang Signed-off-by: Vinicius Costa Gomes Link: https://patch.msgid.link/20260121-idxd-fix-flr-on-kernel-queues-v3-v3-1-7ed70658a9d1@intel.com Signed-off-by: Vinod Koul --- drivers/dma/idxd/device.c | 17 +++++++---------- drivers/dma/idxd/init.c | 10 ++++------ 2 files changed, 11 insertions(+), 16 deletions(-) diff --git a/drivers/dma/idxd/device.c b/drivers/dma/idxd/device.c index c26128529ff4..a704475d87b3 100644 --- a/drivers/dma/idxd/device.c +++ b/drivers/dma/idxd/device.c @@ -1125,7 +1125,11 @@ int idxd_device_config(struct idxd_device *idxd) { int rc; - lockdep_assert_held(&idxd->dev_lock); + guard(spinlock)(&idxd->dev_lock); + + if (!test_bit(IDXD_FLAG_CONFIGURABLE, &idxd->flags)) + return 0; + rc = idxd_wqs_setup(idxd); if (rc < 0) return rc; @@ -1454,11 +1458,7 @@ int idxd_drv_enable_wq(struct idxd_wq *wq) } } - rc = 0; - spin_lock(&idxd->dev_lock); - if (test_bit(IDXD_FLAG_CONFIGURABLE, &idxd->flags)) - rc = idxd_device_config(idxd); - spin_unlock(&idxd->dev_lock); + rc = idxd_device_config(idxd); if (rc < 0) { dev_dbg(dev, "Writing wq %d config failed: %d\n", wq->id, rc); goto err; @@ -1554,10 +1554,7 @@ int idxd_device_drv_probe(struct idxd_dev *idxd_dev) } /* Device configuration */ - spin_lock(&idxd->dev_lock); - if (test_bit(IDXD_FLAG_CONFIGURABLE, &idxd->flags)) - rc = idxd_device_config(idxd); - spin_unlock(&idxd->dev_lock); + rc = idxd_device_config(idxd); if (rc < 0) return -ENXIO; diff --git a/drivers/dma/idxd/init.c b/drivers/dma/idxd/init.c index fb80803d5b57..dd32b81a3108 100644 --- a/drivers/dma/idxd/init.c +++ b/drivers/dma/idxd/init.c @@ -1104,12 +1104,10 @@ static void idxd_reset_done(struct pci_dev *pdev) idxd_device_config_restore(idxd, idxd->idxd_saved); /* Re-configure IDXD device if allowed. */ - if (test_bit(IDXD_FLAG_CONFIGURABLE, &idxd->flags)) { - rc = idxd_device_config(idxd); - if (rc < 0) { - dev_err(dev, "HALT: %s config fails\n", idxd_name); - goto out; - } + rc = idxd_device_config(idxd); + if (rc < 0) { + dev_err(dev, "HALT: %s config fails\n", idxd_name); + goto out; } /* Bind IDXD device to driver. */ From 52d2edea0d63c935e82631e4b9e4a94eccf97b5b Mon Sep 17 00:00:00 2001 From: Vinicius Costa Gomes Date: Wed, 21 Jan 2026 10:34:28 -0800 Subject: [PATCH 004/401] dmaengine: idxd: Fix crash when the event log is disabled If reporting errors to the event log is not supported by the hardware, and an error that causes Function Level Reset (FLR) is received, the driver will try to restore the event log even if it was not allocated. Also, only try to free the event log if it was properly allocated. Fixes: 6078a315aec1 ("dmaengine: idxd: Add idxd_device_config_save() and idxd_device_config_restore() helpers") Reviewed-by: Dave Jiang Signed-off-by: Vinicius Costa Gomes Link: https://patch.msgid.link/20260121-idxd-fix-flr-on-kernel-queues-v3-v3-2-7ed70658a9d1@intel.com Signed-off-by: Vinod Koul --- drivers/dma/idxd/device.c | 3 +++ drivers/dma/idxd/init.c | 3 ++- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/dma/idxd/device.c b/drivers/dma/idxd/device.c index a704475d87b3..5265925f3076 100644 --- a/drivers/dma/idxd/device.c +++ b/drivers/dma/idxd/device.c @@ -831,6 +831,9 @@ static void idxd_device_evl_free(struct idxd_device *idxd) struct device *dev = &idxd->pdev->dev; struct idxd_evl *evl = idxd->evl; + if (!evl) + return; + gencfg.bits = ioread32(idxd->reg_base + IDXD_GENCFG_OFFSET); if (!gencfg.evl_en) return; diff --git a/drivers/dma/idxd/init.c b/drivers/dma/idxd/init.c index dd32b81a3108..1c3f9bc7364b 100644 --- a/drivers/dma/idxd/init.c +++ b/drivers/dma/idxd/init.c @@ -973,7 +973,8 @@ static void idxd_device_config_restore(struct idxd_device *idxd, idxd->rdbuf_limit = idxd_saved->saved_idxd.rdbuf_limit; - idxd->evl->size = saved_evl->size; + if (idxd->evl) + idxd->evl->size = saved_evl->size; for (i = 0; i < idxd->max_groups; i++) { struct idxd_group *saved_group, *group; From d6077df7b75d26e4edf98983836c05d00ebabd8d Mon Sep 17 00:00:00 2001 From: Vinicius Costa Gomes Date: Wed, 21 Jan 2026 10:34:29 -0800 Subject: [PATCH 005/401] dmaengine: idxd: Fix possible invalid memory access after FLR In the case that the first Function Level Reset (FLR) concludes correctly, but in the second FLR the scratch area for the saved configuration cannot be allocated, it's possible for a invalid memory access to happen. Always set the deallocated scratch area to NULL after FLR completes. Fixes: 98d187a98903 ("dmaengine: idxd: Enable Function Level Reset (FLR) for halt") Reviewed-by: Dave Jiang Signed-off-by: Vinicius Costa Gomes Link: https://patch.msgid.link/20260121-idxd-fix-flr-on-kernel-queues-v3-v3-3-7ed70658a9d1@intel.com Signed-off-by: Vinod Koul --- drivers/dma/idxd/init.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/dma/idxd/init.c b/drivers/dma/idxd/init.c index 1c3f9bc7364b..f1cfc7790d95 100644 --- a/drivers/dma/idxd/init.c +++ b/drivers/dma/idxd/init.c @@ -1146,6 +1146,7 @@ static void idxd_reset_done(struct pci_dev *pdev) } out: kfree(idxd->idxd_saved); + idxd->idxd_saved = NULL; } static const struct pci_error_handlers idxd_error_handler = { From f019d7814bceb6d8a017b3e55cb53deb1e6fd36b Mon Sep 17 00:00:00 2001 From: Vinicius Costa Gomes Date: Wed, 21 Jan 2026 10:34:30 -0800 Subject: [PATCH 006/401] dmaengine: idxd: Flush kernel workqueues on Function Level Reset When a Function Level Reset (FLR) happens, terminate the pending descriptors that were issued by in-kernel users and disable the interrupts associated with those. They will be re-enabled after FLR finishes. idxd_wq_flush_desc() is declared on idxd.h because it's going to be used in by the DMA backend in a future patch. Signed-off-by: Vinicius Costa Gomes Reviewed-by: Dave Jiang Link: https://patch.msgid.link/20260121-idxd-fix-flr-on-kernel-queues-v3-v3-4-7ed70658a9d1@intel.com Signed-off-by: Vinod Koul --- drivers/dma/idxd/device.c | 22 ++++++++++++++++++++++ drivers/dma/idxd/idxd.h | 1 + drivers/dma/idxd/irq.c | 16 ++++++++++++++++ 3 files changed, 39 insertions(+) diff --git a/drivers/dma/idxd/device.c b/drivers/dma/idxd/device.c index 5265925f3076..5e890b6771cb 100644 --- a/drivers/dma/idxd/device.c +++ b/drivers/dma/idxd/device.c @@ -1339,6 +1339,11 @@ void idxd_wq_free_irq(struct idxd_wq *wq) free_irq(ie->vector, ie); idxd_flush_pending_descs(ie); + + /* The interrupt might have been already released by FLR */ + if (ie->int_handle == INVALID_INT_HANDLE) + return; + if (idxd->request_int_handles) idxd_device_release_int_handle(idxd, ie->int_handle, IDXD_IRQ_MSIX); idxd_device_clear_perm_entry(idxd, ie); @@ -1347,6 +1352,23 @@ void idxd_wq_free_irq(struct idxd_wq *wq) ie->pasid = IOMMU_PASID_INVALID; } +void idxd_wq_flush_descs(struct idxd_wq *wq) +{ + struct idxd_irq_entry *ie = &wq->ie; + struct idxd_device *idxd = wq->idxd; + + guard(mutex)(&wq->wq_lock); + + if (wq->state != IDXD_WQ_ENABLED || wq->type != IDXD_WQT_KERNEL) + return; + + idxd_flush_pending_descs(ie); + if (idxd->request_int_handles) + idxd_device_release_int_handle(idxd, ie->int_handle, IDXD_IRQ_MSIX); + idxd_device_clear_perm_entry(idxd, ie); + ie->int_handle = INVALID_INT_HANDLE; +} + int idxd_wq_request_irq(struct idxd_wq *wq) { struct idxd_device *idxd = wq->idxd; diff --git a/drivers/dma/idxd/idxd.h b/drivers/dma/idxd/idxd.h index ea8c4daed38d..ce78b9a7c641 100644 --- a/drivers/dma/idxd/idxd.h +++ b/drivers/dma/idxd/idxd.h @@ -803,6 +803,7 @@ void idxd_wq_quiesce(struct idxd_wq *wq); int idxd_wq_init_percpu_ref(struct idxd_wq *wq); void idxd_wq_free_irq(struct idxd_wq *wq); int idxd_wq_request_irq(struct idxd_wq *wq); +void idxd_wq_flush_descs(struct idxd_wq *wq); /* submission */ int idxd_submit_desc(struct idxd_wq *wq, struct idxd_desc *desc); diff --git a/drivers/dma/idxd/irq.c b/drivers/dma/idxd/irq.c index 7782f8c51c32..6a25e1fd0e62 100644 --- a/drivers/dma/idxd/irq.c +++ b/drivers/dma/idxd/irq.c @@ -397,6 +397,17 @@ static void idxd_device_flr(struct work_struct *work) dev_err(&idxd->pdev->dev, "FLR failed\n"); } +static void idxd_wqs_flush_descs(struct idxd_device *idxd) +{ + int i; + + for (i = 0; i < idxd->max_wqs; i++) { + struct idxd_wq *wq = idxd->wqs[i]; + + idxd_wq_flush_descs(wq); + } +} + static irqreturn_t idxd_halt(struct idxd_device *idxd) { union gensts_reg gensts; @@ -415,6 +426,11 @@ static irqreturn_t idxd_halt(struct idxd_device *idxd) } else if (gensts.reset_type == IDXD_DEVICE_RESET_FLR) { idxd->state = IDXD_DEV_HALTED; idxd_mask_error_interrupts(idxd); + /* Flush all pending descriptors, and disable + * interrupts, they will be re-enabled when FLR + * concludes. + */ + idxd_wqs_flush_descs(idxd); dev_dbg(&idxd->pdev->dev, "idxd halted, doing FLR. After FLR, configs are restored\n"); INIT_WORK(&idxd->work, idxd_device_flr); From 2a93f5747d0eef89a3158c91d185d37d0bca2491 Mon Sep 17 00:00:00 2001 From: Vinicius Costa Gomes Date: Wed, 21 Jan 2026 10:34:31 -0800 Subject: [PATCH 007/401] dmaengine: idxd: Flush all pending descriptors When used as a dmaengine, the DMA "core" might ask the driver to terminate all pending requests, when that happens, flush all pending descriptors. In this context, flush means removing the requests from the pending lists, so even if they are completed after, the user is not notified. Reviewed-by: Dave Jiang Signed-off-by: Vinicius Costa Gomes Link: https://patch.msgid.link/20260121-idxd-fix-flr-on-kernel-queues-v3-v3-5-7ed70658a9d1@intel.com Signed-off-by: Vinod Koul --- drivers/dma/idxd/dma.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/dma/idxd/dma.c b/drivers/dma/idxd/dma.c index dbecd699237e..e4f9788aa635 100644 --- a/drivers/dma/idxd/dma.c +++ b/drivers/dma/idxd/dma.c @@ -194,6 +194,15 @@ static void idxd_dma_release(struct dma_device *device) kfree(idxd_dma); } +static int idxd_dma_terminate_all(struct dma_chan *c) +{ + struct idxd_wq *wq = to_idxd_wq(c); + + idxd_wq_flush_descs(wq); + + return 0; +} + int idxd_register_dma_device(struct idxd_device *idxd) { struct idxd_dma_dev *idxd_dma; @@ -224,6 +233,7 @@ int idxd_register_dma_device(struct idxd_device *idxd) dma->device_issue_pending = idxd_dma_issue_pending; dma->device_alloc_chan_resources = idxd_dma_alloc_chan_resources; dma->device_free_chan_resources = idxd_dma_free_chan_resources; + dma->device_terminate_all = idxd_dma_terminate_all; rc = dma_async_device_register(dma); if (rc < 0) { From 4fd3c4679f4f33873d7cb90b3eb553bea4db1038 Mon Sep 17 00:00:00 2001 From: Vinicius Costa Gomes Date: Wed, 21 Jan 2026 10:34:32 -0800 Subject: [PATCH 008/401] dmaengine: idxd: Wait for submitted operations on .device_synchronize() When the dmaengine "core" asks the driver to synchronize, send a Drain operation to the device workqueue, which will wait for the already submitted operations to finish. Reviewed-by: Dave Jiang Signed-off-by: Vinicius Costa Gomes Link: https://patch.msgid.link/20260121-idxd-fix-flr-on-kernel-queues-v3-v3-6-7ed70658a9d1@intel.com Signed-off-by: Vinod Koul --- drivers/dma/idxd/dma.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/dma/idxd/dma.c b/drivers/dma/idxd/dma.c index e4f9788aa635..9937b671f637 100644 --- a/drivers/dma/idxd/dma.c +++ b/drivers/dma/idxd/dma.c @@ -203,6 +203,13 @@ static int idxd_dma_terminate_all(struct dma_chan *c) return 0; } +static void idxd_dma_synchronize(struct dma_chan *c) +{ + struct idxd_wq *wq = to_idxd_wq(c); + + idxd_wq_drain(wq); +} + int idxd_register_dma_device(struct idxd_device *idxd) { struct idxd_dma_dev *idxd_dma; @@ -234,6 +241,7 @@ int idxd_register_dma_device(struct idxd_device *idxd) dma->device_alloc_chan_resources = idxd_dma_alloc_chan_resources; dma->device_free_chan_resources = idxd_dma_free_chan_resources; dma->device_terminate_all = idxd_dma_terminate_all; + dma->device_synchronize = idxd_dma_synchronize; rc = dma_async_device_register(dma); if (rc < 0) { From 3d33de353b1ff9023d5ec73b9becf80ea87af695 Mon Sep 17 00:00:00 2001 From: Vinicius Costa Gomes Date: Wed, 21 Jan 2026 10:34:33 -0800 Subject: [PATCH 009/401] dmaengine: idxd: Fix not releasing workqueue on .release() The workqueue associated with an DSA/IAA device is not released when the object is freed. Fixes: 47c16ac27d4c ("dmaengine: idxd: fix idxd conf_dev 'struct device' lifetime") Reviewed-by: Dave Jiang Signed-off-by: Vinicius Costa Gomes Link: https://patch.msgid.link/20260121-idxd-fix-flr-on-kernel-queues-v3-v3-7-7ed70658a9d1@intel.com Signed-off-by: Vinod Koul --- drivers/dma/idxd/sysfs.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/dma/idxd/sysfs.c b/drivers/dma/idxd/sysfs.c index cc2c83d7f710..6d251095c350 100644 --- a/drivers/dma/idxd/sysfs.c +++ b/drivers/dma/idxd/sysfs.c @@ -1836,6 +1836,7 @@ static void idxd_conf_device_release(struct device *dev) { struct idxd_device *idxd = confdev_to_idxd(dev); + destroy_workqueue(idxd->wq); kfree(idxd->groups); bitmap_free(idxd->wq_enable_map); kfree(idxd->wqs); From d9cfb5193a047a92a4d3c0e91ea4cc87c8f7c478 Mon Sep 17 00:00:00 2001 From: Vinicius Costa Gomes Date: Wed, 21 Jan 2026 10:34:34 -0800 Subject: [PATCH 010/401] dmaengine: idxd: Fix memory leak when a wq is reset idxd_wq_disable_cleanup() which is called from the reset path for a workqueue, sets the wq type to NONE, which for other parts of the driver mean that the wq is empty (all its resources were released). Only set the wq type to NONE after its resources are released. Fixes: da32b28c95a7 ("dmaengine: idxd: cleanup workqueue config after disabling") Reviewed-by: Dave Jiang Signed-off-by: Vinicius Costa Gomes Link: https://patch.msgid.link/20260121-idxd-fix-flr-on-kernel-queues-v3-v3-8-7ed70658a9d1@intel.com Signed-off-by: Vinod Koul --- drivers/dma/idxd/device.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/dma/idxd/device.c b/drivers/dma/idxd/device.c index 5e890b6771cb..efd7bfccc51f 100644 --- a/drivers/dma/idxd/device.c +++ b/drivers/dma/idxd/device.c @@ -175,6 +175,7 @@ void idxd_wq_free_resources(struct idxd_wq *wq) free_descs(wq); dma_free_coherent(dev, wq->compls_size, wq->compls, wq->compls_addr); sbitmap_queue_free(&wq->sbq); + wq->type = IDXD_WQT_NONE; } EXPORT_SYMBOL_NS_GPL(idxd_wq_free_resources, "IDXD"); @@ -382,7 +383,6 @@ static void idxd_wq_disable_cleanup(struct idxd_wq *wq) lockdep_assert_held(&wq->wq_lock); wq->state = IDXD_WQ_DISABLED; memset(wq->wqcfg, 0, idxd->wqcfg_size); - wq->type = IDXD_WQT_NONE; wq->threshold = 0; wq->priority = 0; wq->enqcmds_retries = IDXD_ENQCMDS_RETRIES; @@ -1558,7 +1558,6 @@ void idxd_drv_disable_wq(struct idxd_wq *wq) idxd_wq_reset(wq); idxd_wq_free_resources(wq); percpu_ref_exit(&wq->wq_active); - wq->type = IDXD_WQT_NONE; wq->client_count = 0; } EXPORT_SYMBOL_NS_GPL(idxd_drv_disable_wq, "IDXD"); From c311f5e9248471a950f0a524c2fd736414d98900 Mon Sep 17 00:00:00 2001 From: Vinicius Costa Gomes Date: Wed, 21 Jan 2026 10:34:35 -0800 Subject: [PATCH 011/401] dmaengine: idxd: Fix freeing the allocated ida too late It can happen that when the cdev .release() is called, the driver already called ida_destroy(). Move ida_free() to the _del() path. We see with DEBUG_KOBJECT_RELEASE enabled and forcing an early PCI unbind. Fixes: 04922b7445a1 ("dmaengine: idxd: fix cdev setup and free device lifetime issues") Reviewed-by: Dave Jiang Signed-off-by: Vinicius Costa Gomes Link: https://patch.msgid.link/20260121-idxd-fix-flr-on-kernel-queues-v3-v3-9-7ed70658a9d1@intel.com Signed-off-by: Vinod Koul --- drivers/dma/idxd/cdev.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/dma/idxd/cdev.c b/drivers/dma/idxd/cdev.c index c37d233535f9..0366c7cf3502 100644 --- a/drivers/dma/idxd/cdev.c +++ b/drivers/dma/idxd/cdev.c @@ -158,11 +158,7 @@ static const struct device_type idxd_cdev_file_type = { static void idxd_cdev_dev_release(struct device *dev) { struct idxd_cdev *idxd_cdev = dev_to_cdev(dev); - struct idxd_cdev_context *cdev_ctx; - struct idxd_wq *wq = idxd_cdev->wq; - cdev_ctx = &ictx[wq->idxd->data->type]; - ida_free(&cdev_ctx->minor_ida, idxd_cdev->minor); kfree(idxd_cdev); } @@ -582,11 +578,15 @@ int idxd_wq_add_cdev(struct idxd_wq *wq) void idxd_wq_del_cdev(struct idxd_wq *wq) { + struct idxd_cdev_context *cdev_ctx; struct idxd_cdev *idxd_cdev; idxd_cdev = wq->idxd_cdev; wq->idxd_cdev = NULL; cdev_device_del(&idxd_cdev->cdev, cdev_dev(idxd_cdev)); + + cdev_ctx = &ictx[wq->idxd->data->type]; + ida_free(&cdev_ctx->minor_ida, idxd_cdev->minor); put_device(cdev_dev(idxd_cdev)); } From ee66bc29578391c9b48523dc9119af67bd5c7c0f Mon Sep 17 00:00:00 2001 From: Vinicius Costa Gomes Date: Wed, 21 Jan 2026 10:34:36 -0800 Subject: [PATCH 012/401] dmaengine: idxd: Fix leaking event log memory During the device remove process, the device is reset, causing the configuration registers to go back to their default state, which is zero. As the driver is checking if the event log support was enabled before deallocating, it will fail if a reset happened before. Do not check if the support was enabled, the check for 'idxd->evl' being valid (only allocated if the HW capability is available) is enough. Fixes: 244da66cda35 ("dmaengine: idxd: setup event log configuration") Reviewed-by: Dave Jiang Signed-off-by: Vinicius Costa Gomes Link: https://patch.msgid.link/20260121-idxd-fix-flr-on-kernel-queues-v3-v3-10-7ed70658a9d1@intel.com Signed-off-by: Vinod Koul --- drivers/dma/idxd/device.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/dma/idxd/device.c b/drivers/dma/idxd/device.c index efd7bfccc51f..131138483b87 100644 --- a/drivers/dma/idxd/device.c +++ b/drivers/dma/idxd/device.c @@ -834,10 +834,6 @@ static void idxd_device_evl_free(struct idxd_device *idxd) if (!evl) return; - gencfg.bits = ioread32(idxd->reg_base + IDXD_GENCFG_OFFSET); - if (!gencfg.evl_en) - return; - mutex_lock(&evl->lock); gencfg.evl_en = 0; iowrite32(gencfg.bits, idxd->reg_base + IDXD_GENCFG_OFFSET); From 48fafffcf29bb968c9dee6bf507c1e57d0ccb6b5 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Thu, 26 Feb 2026 17:33:15 +0200 Subject: [PATCH 013/401] phy: make PHY_COMMON_PROPS Kconfig symbol conditionally user-selectable Geert reports that enabling CONFIG_KUNIT_ALL_TESTS shouldn't enable features that aren't enabled without it. That isn't what "*all* tests" means, but as the prompt puts it, "All KUnit tests with satisfied dependencies". The impact is that enabling CONFIG_KUNIT_ALL_TESTS brings features which cannot be disabled as built-in into the kernel. Keep the pattern where consumer drivers have to "select PHY_COMMON_PROPS", but if KUNIT_ALL_TESTS is enabled, also make PHY_COMMON_PROPS user selectable, so it can be turned off. Modify PHY_COMMON_PROPS_TEST to depend on PHY_COMMON_PROPS rather than select it. Fixes: e7556b59ba65 ("phy: add phy_get_rx_polarity() and phy_get_tx_polarity()") Reported-by: Geert Uytterhoeven Closes: https://lore.kernel.org/linux-phy/CAMuHMdUBaoYKNj52gn8DQeZFZ42Cvm6xT6fvo0-_twNv1k3Jhg@mail.gmail.com/ Signed-off-by: Vladimir Oltean Link: https://patch.msgid.link/20260226153315.3530378-1-vladimir.oltean@nxp.com Signed-off-by: Vinod Koul --- drivers/phy/Kconfig | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/phy/Kconfig b/drivers/phy/Kconfig index 02467dfd4fb0..1875d5b784f6 100644 --- a/drivers/phy/Kconfig +++ b/drivers/phy/Kconfig @@ -6,7 +6,7 @@ menu "PHY Subsystem" config PHY_COMMON_PROPS - bool + bool "PHY common properties" if KUNIT_ALL_TESTS help This parses properties common between generic PHYs and Ethernet PHYs. @@ -16,8 +16,7 @@ config PHY_COMMON_PROPS config PHY_COMMON_PROPS_TEST tristate "KUnit tests for PHY common props" if !KUNIT_ALL_TESTS - select PHY_COMMON_PROPS - depends on KUNIT + depends on KUNIT && PHY_COMMON_PROPS default KUNIT_ALL_TESTS help This builds KUnit tests for the PHY common property API. From a258d843a3e4cb687da19437f8f81fee55ad7d35 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Thu, 26 Feb 2026 20:28:53 +0200 Subject: [PATCH 014/401] phy: lynx-28g: skip CDR lock workaround for lanes disabled in the device tree The blamed commit introduced support for specifying individual lanes as OF nodes in the device, and these can have status = "disabled". When that happens, for_each_available_child_of_node() skips them and lynx_28g_probe_lane() -> devm_phy_create() is not called, so lane->phy will be NULL. Yet it will be dereferenced in lynx_28g_cdr_lock_check(), resulting in a crash. This used to be well handled in v3 of that patch: https://lore.kernel.org/linux-phy/20250926180505.760089-14-vladimir.oltean@nxp.com/ but until v5 was merged, the logic to support per-lane OF nodes was split into a separate change, and the per-SoC compatible strings patch was deferred to a "part 2" set. The splitting was done improperly, and that handling of NULL lane->phy pointers was not integrated into the proper commit. Fixes: 7df7d58abbd6 ("phy: lynx-28g: support individual lanes as OF PHY providers") Signed-off-by: Vladimir Oltean Link: https://patch.msgid.link/20260226182853.1103616-1-vladimir.oltean@nxp.com Signed-off-by: Vinod Koul --- drivers/phy/freescale/phy-fsl-lynx-28g.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/phy/freescale/phy-fsl-lynx-28g.c b/drivers/phy/freescale/phy-fsl-lynx-28g.c index 2b0fd95ba62f..63427fc34e26 100644 --- a/drivers/phy/freescale/phy-fsl-lynx-28g.c +++ b/drivers/phy/freescale/phy-fsl-lynx-28g.c @@ -1069,6 +1069,8 @@ static void lynx_28g_cdr_lock_check(struct work_struct *work) for (i = 0; i < LYNX_28G_NUM_LANE; i++) { lane = &priv->lane[i]; + if (!lane->phy) + continue; mutex_lock(&lane->phy->mutex); From f0cf0a882a02dcf28547f32264f6fd37e9a7b147 Mon Sep 17 00:00:00 2001 From: Yixun Lan Date: Mon, 16 Feb 2026 23:26:53 +0800 Subject: [PATCH 015/401] phy: k1-usb: add disconnect function support A disconnect status BIT of USB2 PHY need to be cleared, otherwise it will fail to work properly during next connection when devices connect to roothub directly. Fixes: fe4bc1a08638 ("phy: spacemit: support K1 USB2.0 PHY controller") Signed-off-by: Yixun Lan Reviewed-by: Vladimir Oltean Link: https://patch.msgid.link/20260216152653.25244-1-dlan@kernel.org Signed-off-by: Vinod Koul --- drivers/phy/spacemit/phy-k1-usb2.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/drivers/phy/spacemit/phy-k1-usb2.c b/drivers/phy/spacemit/phy-k1-usb2.c index 342061380012..9215d0b223b2 100644 --- a/drivers/phy/spacemit/phy-k1-usb2.c +++ b/drivers/phy/spacemit/phy-k1-usb2.c @@ -48,6 +48,9 @@ #define PHY_CLK_HSTXP_EN BIT(3) /* clock hstxp enable */ #define PHY_HSTXP_MODE BIT(4) /* 0: force en_txp to be 1; 1: no force */ +#define PHY_K1_HS_HOST_DISC 0x40 +#define PHY_K1_HS_HOST_DISC_CLR BIT(0) + #define PHY_PLL_DIV_CFG 0x98 #define PHY_FDIV_FRACT_8_15 GENMASK(7, 0) #define PHY_FDIV_FRACT_16_19 GENMASK(11, 8) @@ -142,9 +145,20 @@ static int spacemit_usb2phy_exit(struct phy *phy) return 0; } +static int spacemit_usb2phy_disconnect(struct phy *phy, int port) +{ + struct spacemit_usb2phy *sphy = phy_get_drvdata(phy); + + regmap_update_bits(sphy->regmap_base, PHY_K1_HS_HOST_DISC, + PHY_K1_HS_HOST_DISC_CLR, PHY_K1_HS_HOST_DISC_CLR); + + return 0; +} + static const struct phy_ops spacemit_usb2phy_ops = { .init = spacemit_usb2phy_init, .exit = spacemit_usb2phy_exit, + .disconnect = spacemit_usb2phy_disconnect, .owner = THIS_MODULE, }; From 584b457f4166293bdfa50f930228e9fb91a38392 Mon Sep 17 00:00:00 2001 From: Felix Gu Date: Thu, 12 Feb 2026 18:39:19 +0800 Subject: [PATCH 016/401] phy: ti: j721e-wiz: Fix device node reference leak in wiz_get_lane_phy_types() The serdes device_node is obtained using of_get_child_by_name(), which increments the reference count. However, it is never put, leading to a reference leak. Add the missing of_node_put() calls to ensure the reference count is properly balanced. Fixes: 7ae14cf581f2 ("phy: ti: j721e-wiz: Implement DisplayPort mode to the wiz driver") Suggested-by: Vladimir Oltean Signed-off-by: Felix Gu Reviewed-by: Vladimir Oltean Link: https://patch.msgid.link/20260212-wiz-v2-1-6e8bd4cc7a4a@gmail.com Signed-off-by: Vinod Koul --- drivers/phy/ti/phy-j721e-wiz.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/phy/ti/phy-j721e-wiz.c b/drivers/phy/ti/phy-j721e-wiz.c index 6e9ecb88dc8b..6b584706b913 100644 --- a/drivers/phy/ti/phy-j721e-wiz.c +++ b/drivers/phy/ti/phy-j721e-wiz.c @@ -1425,6 +1425,7 @@ static int wiz_get_lane_phy_types(struct device *dev, struct wiz *wiz) dev_err(dev, "%s: Reading \"reg\" from \"%s\" failed: %d\n", __func__, subnode->name, ret); + of_node_put(serdes); return ret; } of_property_read_u32(subnode, "cdns,num-lanes", &num_lanes); @@ -1439,6 +1440,7 @@ static int wiz_get_lane_phy_types(struct device *dev, struct wiz *wiz) } } + of_node_put(serdes); return 0; } From 81af9e40e2e4e1aa95f09fb34811760be6742c58 Mon Sep 17 00:00:00 2001 From: Abel Vesa Date: Thu, 19 Feb 2026 13:11:48 +0200 Subject: [PATCH 017/401] phy: qcom: qmp-ufs: Fix SM8650 PCS table for Gear 4 According to internal documentation, on SM8650, when the PHY is configured in Gear 4, the QPHY_V6_PCS_UFS_PLL_CNTL register needs to have the same value as for Gear 5. At the moment, there is no board that comes with a UFS 3.x device, so this issue doesn't show up, but with the new Eliza SoC, which uses the same init sequence as SM8650, on the MTP board, the link startup fails with the current Gear 4 PCS table. So fix that by moving the entry into the PCS generic table instead, while keeping the value from Gear 5 configuration. Cc: stable@vger.kernel.org # v6.10 Fixes: b9251e64a96f ("phy: qcom: qmp-ufs: update SM8650 tables for Gear 4 & 5") Suggested-by: Nitin Rawat Signed-off-by: Abel Vesa Reviewed-by: Konrad Dybcio Reviewed-by: Neil Armstrong Tested-by: Neil Armstrong # on SM8650-HDK Link: https://patch.msgid.link/20260219-phy-qcom-qmp-ufs-fix-sm8650-pcs-g4-table-v1-1-f136505b57f6@oss.qualcomm.com Signed-off-by: Vinod Koul --- drivers/phy/qualcomm/phy-qcom-qmp-ufs.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-ufs.c b/drivers/phy/qualcomm/phy-qcom-qmp-ufs.c index df138a5442eb..771bc7c2ab50 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-ufs.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-ufs.c @@ -990,6 +990,7 @@ static const struct qmp_phy_init_tbl sm8650_ufsphy_pcs[] = { QMP_PHY_INIT_CFG(QPHY_V6_PCS_UFS_MULTI_LANE_CTRL1, 0x02), QMP_PHY_INIT_CFG(QPHY_V6_PCS_UFS_TX_MID_TERM_CTRL1, 0x43), QMP_PHY_INIT_CFG(QPHY_V6_PCS_UFS_PCS_CTRL1, 0xc1), + QMP_PHY_INIT_CFG(QPHY_V6_PCS_UFS_PLL_CNTL, 0x33), QMP_PHY_INIT_CFG(QPHY_V6_PCS_UFS_TX_LARGE_AMP_DRV_LVL, 0x0f), QMP_PHY_INIT_CFG(QPHY_V6_PCS_UFS_RX_SIGDET_CTRL2, 0x68), QMP_PHY_INIT_CFG(QPHY_V6_PCS_UFS_TX_POST_EMP_LVL_S4, 0x0e), @@ -999,13 +1000,11 @@ static const struct qmp_phy_init_tbl sm8650_ufsphy_pcs[] = { }; static const struct qmp_phy_init_tbl sm8650_ufsphy_g4_pcs[] = { - QMP_PHY_INIT_CFG(QPHY_V6_PCS_UFS_PLL_CNTL, 0x13), QMP_PHY_INIT_CFG(QPHY_V6_PCS_UFS_TX_HSGEAR_CAPABILITY, 0x04), QMP_PHY_INIT_CFG(QPHY_V6_PCS_UFS_RX_HSGEAR_CAPABILITY, 0x04), }; static const struct qmp_phy_init_tbl sm8650_ufsphy_g5_pcs[] = { - QMP_PHY_INIT_CFG(QPHY_V6_PCS_UFS_PLL_CNTL, 0x33), QMP_PHY_INIT_CFG(QPHY_V6_PCS_UFS_TX_HSGEAR_CAPABILITY, 0x05), QMP_PHY_INIT_CFG(QPHY_V6_PCS_UFS_RX_HSGEAR_CAPABILITY, 0x05), QMP_PHY_INIT_CFG(QPHY_V6_PCS_UFS_RX_HS_G5_SYNC_LENGTH_CAPABILITY, 0x4d), From 398c0c8bbc8f5a9d2f43863275a427a9d3720b6f Mon Sep 17 00:00:00 2001 From: Frank Li Date: Mon, 2 Mar 2026 16:59:55 -0500 Subject: [PATCH 018/401] dt-bindings: auxdisplay: ht16k33: Use unevaluatedProperties to fix common property warning Change additionalProperties to unevaluatedProperties because it refs to /schemas/input/matrix-keymap.yaml. Fix below CHECK_DTBS warnings: arch/arm/boot/dts/nxp/imx/imx6dl-victgo.dtb: keypad@70 (holtek,ht16k33): 'keypad,num-columns', 'keypad,num-rows' do not match any of the regexes: '^pinctrl-[0-9]+$' from schema $id: http://devicetree.org/schemas/auxdisplay/holtek,ht16k33.yaml# Fixes: f12b457c6b25c ("dt-bindings: auxdisplay: ht16k33: Convert to json-schema") Acked-by: Rob Herring (Arm) Signed-off-by: Frank Li Signed-off-by: Andy Shevchenko --- .../devicetree/bindings/auxdisplay/holtek,ht16k33.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/devicetree/bindings/auxdisplay/holtek,ht16k33.yaml b/Documentation/devicetree/bindings/auxdisplay/holtek,ht16k33.yaml index b90eec2077b4..fe1272e86467 100644 --- a/Documentation/devicetree/bindings/auxdisplay/holtek,ht16k33.yaml +++ b/Documentation/devicetree/bindings/auxdisplay/holtek,ht16k33.yaml @@ -66,7 +66,7 @@ then: required: - refresh-rate-hz -additionalProperties: false +unevaluatedProperties: false examples: - | From 29a3edd7004bb635d299fb9bc6f0ea4ef13ed5a2 Mon Sep 17 00:00:00 2001 From: Jacob Moroni Date: Fri, 27 Feb 2026 15:27:43 +0000 Subject: [PATCH 019/401] RDMA/irdma: Fix double free related to rereg_user_mr If IB_MR_REREG_TRANS is set during rereg_user_mr, the umem will be released and a new one will be allocated in irdma_rereg_mr_trans. If any step of irdma_rereg_mr_trans fails after the new umem is allocated, it releases the umem, but does not set iwmr->region to NULL. The problem is that this failure is propagated to the user, who will then call ibv_dereg_mr (as they should). Then, the dereg_mr path will see a non-NULL umem and attempt to call ib_umem_release again. Fix this by setting iwmr->region to NULL after ib_umem_release. Fixed: 5ac388db27c4 ("RDMA/irdma: Add support to re-register a memory region") Signed-off-by: Jacob Moroni Link: https://patch.msgid.link/20260227152743.1183388-1-jmoroni@google.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/irdma/verbs.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/infiniband/hw/irdma/verbs.c b/drivers/infiniband/hw/irdma/verbs.c index 7251cd7a2147..a20d25585993 100644 --- a/drivers/infiniband/hw/irdma/verbs.c +++ b/drivers/infiniband/hw/irdma/verbs.c @@ -3723,6 +3723,7 @@ static int irdma_rereg_mr_trans(struct irdma_mr *iwmr, u64 start, u64 len, err: ib_umem_release(region); + iwmr->region = NULL; return err; } From a08aaf3968aec5d05cd32c801b8cc0c61da69c41 Mon Sep 17 00:00:00 2001 From: Abhijit Gangurde Date: Fri, 27 Feb 2026 11:48:09 +0530 Subject: [PATCH 020/401] RDMA/ionic: Preserve and set Ethernet source MAC after ib_ud_header_init() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ionic_build_hdr() populated the Ethernet source MAC (hdr->eth.smac_h) by passing the header’s storage directly to rdma_read_gid_l2_fields(). However, ib_ud_header_init() is called after that and re-initializes the UD header, which wipes the previously written smac_h. As a result, packets are emitted with an zero source MAC address on the wire. Correct the source MAC by reading the GID-derived smac into a temporary buffer and copy it after ib_ud_header_init() completes. Fixes: e8521822c733 ("RDMA/ionic: Register device ops for control path") Cc: stable@vger.kernel.org # 6.18 Signed-off-by: Abhijit Gangurde Link: https://patch.msgid.link/20260227061809.2979990-1-abhijit.gangurde@amd.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/ionic/ionic_controlpath.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/ionic/ionic_controlpath.c b/drivers/infiniband/hw/ionic/ionic_controlpath.c index 4842931f5316..a5671da3db64 100644 --- a/drivers/infiniband/hw/ionic/ionic_controlpath.c +++ b/drivers/infiniband/hw/ionic/ionic_controlpath.c @@ -508,6 +508,7 @@ static int ionic_build_hdr(struct ionic_ibdev *dev, { const struct ib_global_route *grh; enum rdma_network_type net; + u8 smac[ETH_ALEN]; u16 vlan; int rc; @@ -518,7 +519,7 @@ static int ionic_build_hdr(struct ionic_ibdev *dev, grh = rdma_ah_read_grh(attr); - rc = rdma_read_gid_l2_fields(grh->sgid_attr, &vlan, &hdr->eth.smac_h[0]); + rc = rdma_read_gid_l2_fields(grh->sgid_attr, &vlan, smac); if (rc) return rc; @@ -536,6 +537,7 @@ static int ionic_build_hdr(struct ionic_ibdev *dev, if (rc) return rc; + ether_addr_copy(hdr->eth.smac_h, smac); ether_addr_copy(hdr->eth.dmac_h, attr->roce.dmac); if (net == RDMA_NETWORK_IPV4) { From 405ca72dc589dd746e5ee5378bb9d9ee7f844010 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micka=C3=ABl=20Sala=C3=BCn?= Date: Tue, 3 Mar 2026 18:36:31 +0100 Subject: [PATCH 021/401] landlock: Fix formatting MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Auto-format with clang-format -i security/landlock/*.[ch] Cc: Günther Noack Cc: Kees Cook Fixes: 69050f8d6d07 ("treewide: Replace kmalloc with kmalloc_obj for non-scalar types") Reviewed-by: Günther Noack Link: https://lore.kernel.org/r/20260303173632.88040-1-mic@digikod.net Signed-off-by: Mickaël Salaün --- security/landlock/domain.c | 3 +-- security/landlock/ruleset.c | 9 ++++----- 2 files changed, 5 insertions(+), 7 deletions(-) diff --git a/security/landlock/domain.c b/security/landlock/domain.c index f5b78d4766cd..f0d83f43afa1 100644 --- a/security/landlock/domain.c +++ b/security/landlock/domain.c @@ -94,8 +94,7 @@ static struct landlock_details *get_current_details(void) * allocate with GFP_KERNEL_ACCOUNT because it is independent from the * caller. */ - details = - kzalloc_flex(*details, exe_path, path_size); + details = kzalloc_flex(*details, exe_path, path_size); if (!details) return ERR_PTR(-ENOMEM); diff --git a/security/landlock/ruleset.c b/security/landlock/ruleset.c index 319873586385..73018dc8d6c7 100644 --- a/security/landlock/ruleset.c +++ b/security/landlock/ruleset.c @@ -32,9 +32,8 @@ static struct landlock_ruleset *create_ruleset(const u32 num_layers) { struct landlock_ruleset *new_ruleset; - new_ruleset = - kzalloc_flex(*new_ruleset, access_masks, num_layers, - GFP_KERNEL_ACCOUNT); + new_ruleset = kzalloc_flex(*new_ruleset, access_masks, num_layers, + GFP_KERNEL_ACCOUNT); if (!new_ruleset) return ERR_PTR(-ENOMEM); refcount_set(&new_ruleset->usage, 1); @@ -559,8 +558,8 @@ landlock_merge_ruleset(struct landlock_ruleset *const parent, if (IS_ERR(new_dom)) return new_dom; - new_dom->hierarchy = kzalloc_obj(*new_dom->hierarchy, - GFP_KERNEL_ACCOUNT); + new_dom->hierarchy = + kzalloc_obj(*new_dom->hierarchy, GFP_KERNEL_ACCOUNT); if (!new_dom->hierarchy) return ERR_PTR(-ENOMEM); From 929553bbb4cdda9be22175e1adb4d5814b770855 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micka=C3=ABl=20Sala=C3=BCn?= Date: Tue, 17 Feb 2026 13:23:39 +0100 Subject: [PATCH 022/401] landlock: Fully release unused TSYNC work entries MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If task_work_add() failed, ctx->task is put but the tsync_works struct is not reset to its previous state. The first consequence is that the kernel allocates memory for dying threads, which could lead to user-accounted memory exhaustion (not very useful nor specific to this case). The second consequence is that task_work_cancel(), called by cancel_tsync_works(), can dereference a NULL task pointer. Fix this issues by keeping a consistent works->size wrt the added task work. This is done in a new tsync_works_trim() helper which also cleans up the shared_ctx and work fields. As a safeguard, add a pointer check to cancel_tsync_works() and update tsync_works_release() accordingly. Cc: Jann Horn Reviewed-by: Günther Noack Link: https://lore.kernel.org/r/20260217122341.2359582-1-mic@digikod.net [mic: Replace memset() with compound literal] Signed-off-by: Mickaël Salaün --- security/landlock/tsync.c | 47 ++++++++++++++++++++++++++++++++++----- 1 file changed, 41 insertions(+), 6 deletions(-) diff --git a/security/landlock/tsync.c b/security/landlock/tsync.c index de01aa899751..3e44be4f66e3 100644 --- a/security/landlock/tsync.c +++ b/security/landlock/tsync.c @@ -203,6 +203,40 @@ static struct tsync_work *tsync_works_provide(struct tsync_works *s, return ctx; } +/** + * tsync_works_trim - Put the last tsync_work element + * + * @s: TSYNC works to trim. + * + * Put the last task and decrement the size of @s. + * + * This helper does not cancel a running task, but just reset the last element + * to zero. + */ +static void tsync_works_trim(struct tsync_works *s) +{ + struct tsync_work *ctx; + + if (WARN_ON_ONCE(s->size <= 0)) + return; + + ctx = s->works[s->size - 1]; + + /* + * For consistency, remove the task from ctx so that it does not look like + * we handed it a task_work. + */ + put_task_struct(ctx->task); + *ctx = (typeof(*ctx)){}; + + /* + * Cancel the tsync_works_provide() change to recycle the reserved memory + * for the next thread, if any. This also ensures that cancel_tsync_works() + * and tsync_works_release() do not see any NULL task pointers. + */ + s->size--; +} + /* * tsync_works_grow_by - preallocates space for n more contexts in s * @@ -276,7 +310,7 @@ static void tsync_works_release(struct tsync_works *s) size_t i; for (i = 0; i < s->size; i++) { - if (!s->works[i]->task) + if (WARN_ON_ONCE(!s->works[i]->task)) continue; put_task_struct(s->works[i]->task); @@ -379,16 +413,14 @@ static bool schedule_task_work(struct tsync_works *works, init_task_work(&ctx->work, restrict_one_thread_callback); err = task_work_add(thread, &ctx->work, TWA_SIGNAL); - if (err) { + if (unlikely(err)) { /* * task_work_add() only fails if the task is about to exit. We * checked that earlier, but it can happen as a race. Resume * without setting an error, as the task is probably gone in the - * next loop iteration. For consistency, remove the task from ctx - * so that it does not look like we handed it a task_work. + * next loop iteration. */ - put_task_struct(ctx->task); - ctx->task = NULL; + tsync_works_trim(works); atomic_dec(&shared_ctx->num_preparing); atomic_dec(&shared_ctx->num_unfinished); @@ -412,6 +444,9 @@ static void cancel_tsync_works(struct tsync_works *works, int i; for (i = 0; i < works->size; i++) { + if (WARN_ON_ONCE(!works->works[i]->task)) + continue; + if (!task_work_cancel(works->works[i]->task, &works->works[i]->work)) continue; From bb8369ead40771b9550e5dbc287d6b707dd6c2b3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micka=C3=ABl=20Sala=C3=BCn?= Date: Tue, 17 Feb 2026 13:23:40 +0100 Subject: [PATCH 023/401] landlock: Improve TSYNC types MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Constify pointers when it makes sense. Consistently use size_t for loops, especially to match works->size type. Add new lines to improve readability. Cc: Jann Horn Reviewed-by: Günther Noack Link: https://lore.kernel.org/r/20260217122341.2359582-2-mic@digikod.net Signed-off-by: Mickaël Salaün --- security/landlock/tsync.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/security/landlock/tsync.c b/security/landlock/tsync.c index 3e44be4f66e3..1e738ef2cff5 100644 --- a/security/landlock/tsync.c +++ b/security/landlock/tsync.c @@ -290,13 +290,14 @@ static int tsync_works_grow_by(struct tsync_works *s, size_t n, gfp_t flags) * tsync_works_contains - checks for presence of task in s */ static bool tsync_works_contains_task(const struct tsync_works *s, - struct task_struct *task) + const struct task_struct *task) { size_t i; for (i = 0; i < s->size; i++) if (s->works[i]->task == task) return true; + return false; } @@ -318,6 +319,7 @@ static void tsync_works_release(struct tsync_works *s) for (i = 0; i < s->capacity; i++) kfree(s->works[i]); + kfree(s->works); s->works = NULL; s->size = 0; @@ -329,7 +331,7 @@ static void tsync_works_release(struct tsync_works *s) */ static size_t count_additional_threads(const struct tsync_works *works) { - struct task_struct *thread, *caller; + const struct task_struct *caller, *thread; size_t n = 0; caller = current; @@ -368,7 +370,8 @@ static bool schedule_task_work(struct tsync_works *works, struct tsync_shared_context *shared_ctx) { int err; - struct task_struct *thread, *caller; + const struct task_struct *caller; + struct task_struct *thread; struct tsync_work *ctx; bool found_more_threads = false; @@ -438,10 +441,10 @@ static bool schedule_task_work(struct tsync_works *works, * shared_ctx->num_preparing and shared_ctx->num_unfished and mark the two * completions if needed, as if the task was never scheduled. */ -static void cancel_tsync_works(struct tsync_works *works, +static void cancel_tsync_works(const struct tsync_works *works, struct tsync_shared_context *shared_ctx) { - int i; + size_t i; for (i = 0; i < works->size; i++) { if (WARN_ON_ONCE(!works->works[i]->task)) From f8e2019c3bd1ea73ca25cd69a8141555243c9a11 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?G=C3=BCnther=20Noack?= Date: Fri, 20 Feb 2026 17:06:27 +0100 Subject: [PATCH 024/401] samples/landlock: Bump ABI version to 8 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The sample tool should print a warning if it is not running on a kernel that provides the newest Landlock ABI version. Link: https://lore.kernel.org/all/20260218.ufao5Vaefa2u@digikod.net/ Suggested-by: Mickaël Salaün Signed-off-by: Günther Noack Link: https://lore.kernel.org/r/20260220160627.53913-1-gnoack3000@gmail.com Signed-off-by: Mickaël Salaün --- samples/landlock/sandboxer.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/samples/landlock/sandboxer.c b/samples/landlock/sandboxer.c index e7af02f98208..9f21088c0855 100644 --- a/samples/landlock/sandboxer.c +++ b/samples/landlock/sandboxer.c @@ -299,7 +299,7 @@ static bool check_ruleset_scope(const char *const env_var, /* clang-format on */ -#define LANDLOCK_ABI_LAST 7 +#define LANDLOCK_ABI_LAST 8 #define XSTR(s) #s #define STR(s) XSTR(s) @@ -436,7 +436,8 @@ int main(const int argc, char *const argv[], char *const *const envp) /* Removes LANDLOCK_RESTRICT_SELF_LOG_NEW_EXEC_ON for ABI < 7 */ supported_restrict_flags &= ~LANDLOCK_RESTRICT_SELF_LOG_NEW_EXEC_ON; - + __attribute__((fallthrough)); + case 7: /* Must be printed for any ABI < LANDLOCK_ABI_LAST. */ fprintf(stderr, "Hint: You should update the running kernel " From c242e92c9da456d361d1d4482fb6e93ee95bd8cf Mon Sep 17 00:00:00 2001 From: Kamal Heib Date: Mon, 2 Mar 2026 23:36:45 -0500 Subject: [PATCH 025/401] RDMA/bng_re: Fix silent failure in HWRM version query If the firmware version query fails, the driver currently ignores the error and continues initializing. This leaves the device in a bad state. Fix this by making bng_re_query_hwrm_version() return the error code and update the driver to check for this error and stop the setup process safely if it happens. Fixes: 745065770c2d ("RDMA/bng_re: Register and get the resources from bnge driver") Signed-off-by: Kamal Heib Link: https://patch.msgid.link/20260303043645.425724-1-kheib@redhat.com Reviewed-by: Siva Reddy Kallam Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/bng_re/bng_dev.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/drivers/infiniband/hw/bng_re/bng_dev.c b/drivers/infiniband/hw/bng_re/bng_dev.c index d34b5f88cd40..71a7ca2196ad 100644 --- a/drivers/infiniband/hw/bng_re/bng_dev.c +++ b/drivers/infiniband/hw/bng_re/bng_dev.c @@ -210,7 +210,7 @@ static int bng_re_stats_ctx_alloc(struct bng_re_dev *rdev) return rc; } -static void bng_re_query_hwrm_version(struct bng_re_dev *rdev) +static int bng_re_query_hwrm_version(struct bng_re_dev *rdev) { struct bnge_auxr_dev *aux_dev = rdev->aux_dev; struct hwrm_ver_get_output ver_get_resp = {}; @@ -230,7 +230,7 @@ static void bng_re_query_hwrm_version(struct bng_re_dev *rdev) if (rc) { ibdev_err(&rdev->ibdev, "Failed to query HW version, rc = 0x%x", rc); - return; + return rc; } cctx = rdev->chip_ctx; @@ -244,6 +244,8 @@ static void bng_re_query_hwrm_version(struct bng_re_dev *rdev) if (!cctx->hwrm_cmd_max_timeout) cctx->hwrm_cmd_max_timeout = BNG_ROCE_FW_MAX_TIMEOUT; + + return 0; } static void bng_re_dev_uninit(struct bng_re_dev *rdev) @@ -306,13 +308,15 @@ static int bng_re_dev_init(struct bng_re_dev *rdev) goto msix_ctx_fail; } - bng_re_query_hwrm_version(rdev); + rc = bng_re_query_hwrm_version(rdev); + if (rc) + goto destroy_chip_ctx; rc = bng_re_alloc_fw_channel(&rdev->bng_res, &rdev->rcfw); if (rc) { ibdev_err(&rdev->ibdev, "Failed to allocate RCFW Channel: %#x\n", rc); - goto alloc_fw_chl_fail; + goto destroy_chip_ctx; } /* Allocate nq record memory */ @@ -391,7 +395,7 @@ static int bng_re_dev_init(struct bng_re_dev *rdev) kfree(rdev->nqr); nq_alloc_fail: bng_re_free_rcfw_channel(&rdev->rcfw); -alloc_fw_chl_fail: +destroy_chip_ctx: bng_re_destroy_chip_ctx(rdev); msix_ctx_fail: bnge_unregister_dev(rdev->aux_dev); From ef3b06742c8a201d0e83edc9a33a89a4fe3009f8 Mon Sep 17 00:00:00 2001 From: Yonatan Nachum Date: Sun, 8 Mar 2026 16:53:50 +0000 Subject: [PATCH 026/401] RDMA/efa: Fix use of completion ctx after free On admin queue completion handling, if the admin command completed with error we print data from the completion context. The issue is that we already freed the completion context in polling/interrupts handler which means we print data from context in an unknown state (it might be already used again). Change the admin submission flow so alloc/dealloc of the context will be symmetric and dealloc will be called after any potential use of the context. Fixes: 68fb9f3e312a ("RDMA/efa: Remove redundant NULL pointer check of CQE") Reviewed-by: Daniel Kranzdorf Reviewed-by: Michael Margolin Signed-off-by: Yonatan Nachum Link: https://patch.msgid.link/20260308165350.18219-1-ynachum@amazon.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/efa/efa_com.c | 87 +++++++++++++---------------- 1 file changed, 39 insertions(+), 48 deletions(-) diff --git a/drivers/infiniband/hw/efa/efa_com.c b/drivers/infiniband/hw/efa/efa_com.c index 229b0ad3b0cb..56caba612139 100644 --- a/drivers/infiniband/hw/efa/efa_com.c +++ b/drivers/infiniband/hw/efa/efa_com.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause /* - * Copyright 2018-2025 Amazon.com, Inc. or its affiliates. All rights reserved. + * Copyright 2018-2026 Amazon.com, Inc. or its affiliates. All rights reserved. */ #include @@ -310,23 +310,19 @@ static inline struct efa_comp_ctx *efa_com_get_comp_ctx_by_cmd_id(struct efa_com return &aq->comp_ctx[ctx_id]; } -static struct efa_comp_ctx *__efa_com_submit_admin_cmd(struct efa_com_admin_queue *aq, - struct efa_admin_aq_entry *cmd, - size_t cmd_size_in_bytes, - struct efa_admin_acq_entry *comp, - size_t comp_size_in_bytes) +static void __efa_com_submit_admin_cmd(struct efa_com_admin_queue *aq, + struct efa_comp_ctx *comp_ctx, + struct efa_admin_aq_entry *cmd, + size_t cmd_size_in_bytes, + struct efa_admin_acq_entry *comp, + size_t comp_size_in_bytes) { struct efa_admin_aq_entry *aqe; - struct efa_comp_ctx *comp_ctx; u16 queue_size_mask; u16 cmd_id; u16 ctx_id; u16 pi; - comp_ctx = efa_com_alloc_comp_ctx(aq); - if (!comp_ctx) - return ERR_PTR(-EINVAL); - queue_size_mask = aq->depth - 1; pi = aq->sq.pc & queue_size_mask; ctx_id = efa_com_get_comp_ctx_id(aq, comp_ctx); @@ -360,8 +356,6 @@ static struct efa_comp_ctx *__efa_com_submit_admin_cmd(struct efa_com_admin_queu /* barrier not needed in case of writel */ writel(aq->sq.pc, aq->sq.db_addr); - - return comp_ctx; } static inline int efa_com_init_comp_ctxt(struct efa_com_admin_queue *aq) @@ -394,28 +388,25 @@ static inline int efa_com_init_comp_ctxt(struct efa_com_admin_queue *aq) return 0; } -static struct efa_comp_ctx *efa_com_submit_admin_cmd(struct efa_com_admin_queue *aq, - struct efa_admin_aq_entry *cmd, - size_t cmd_size_in_bytes, - struct efa_admin_acq_entry *comp, - size_t comp_size_in_bytes) +static int efa_com_submit_admin_cmd(struct efa_com_admin_queue *aq, + struct efa_comp_ctx *comp_ctx, + struct efa_admin_aq_entry *cmd, + size_t cmd_size_in_bytes, + struct efa_admin_acq_entry *comp, + size_t comp_size_in_bytes) { - struct efa_comp_ctx *comp_ctx; - spin_lock(&aq->sq.lock); if (!test_bit(EFA_AQ_STATE_RUNNING_BIT, &aq->state)) { ibdev_err_ratelimited(aq->efa_dev, "Admin queue is closed\n"); spin_unlock(&aq->sq.lock); - return ERR_PTR(-ENODEV); + return -ENODEV; } - comp_ctx = __efa_com_submit_admin_cmd(aq, cmd, cmd_size_in_bytes, comp, - comp_size_in_bytes); + __efa_com_submit_admin_cmd(aq, comp_ctx, cmd, cmd_size_in_bytes, comp, + comp_size_in_bytes); spin_unlock(&aq->sq.lock); - if (IS_ERR(comp_ctx)) - clear_bit(EFA_AQ_STATE_RUNNING_BIT, &aq->state); - return comp_ctx; + return 0; } static int efa_com_handle_single_admin_completion(struct efa_com_admin_queue *aq, @@ -512,7 +503,6 @@ static int efa_com_wait_and_process_admin_cq_polling(struct efa_comp_ctx *comp_c { unsigned long timeout; unsigned long flags; - int err; timeout = jiffies + usecs_to_jiffies(aq->completion_timeout); @@ -532,24 +522,20 @@ static int efa_com_wait_and_process_admin_cq_polling(struct efa_comp_ctx *comp_c atomic64_inc(&aq->stats.no_completion); clear_bit(EFA_AQ_STATE_RUNNING_BIT, &aq->state); - err = -ETIME; - goto out; + return -ETIME; } msleep(aq->poll_interval); } - err = efa_com_comp_status_to_errno(comp_ctx->user_cqe->acq_common_descriptor.status); -out: - efa_com_dealloc_comp_ctx(aq, comp_ctx); - return err; + return efa_com_comp_status_to_errno( + comp_ctx->user_cqe->acq_common_descriptor.status); } static int efa_com_wait_and_process_admin_cq_interrupts(struct efa_comp_ctx *comp_ctx, struct efa_com_admin_queue *aq) { unsigned long flags; - int err; wait_for_completion_timeout(&comp_ctx->wait_event, usecs_to_jiffies(aq->completion_timeout)); @@ -585,14 +571,11 @@ static int efa_com_wait_and_process_admin_cq_interrupts(struct efa_comp_ctx *com aq->cq.cc); clear_bit(EFA_AQ_STATE_RUNNING_BIT, &aq->state); - err = -ETIME; - goto out; + return -ETIME; } - err = efa_com_comp_status_to_errno(comp_ctx->user_cqe->acq_common_descriptor.status); -out: - efa_com_dealloc_comp_ctx(aq, comp_ctx); - return err; + return efa_com_comp_status_to_errno( + comp_ctx->user_cqe->acq_common_descriptor.status); } /* @@ -642,30 +625,38 @@ int efa_com_cmd_exec(struct efa_com_admin_queue *aq, ibdev_dbg(aq->efa_dev, "%s (opcode %d)\n", efa_com_cmd_str(cmd->aq_common_descriptor.opcode), cmd->aq_common_descriptor.opcode); - comp_ctx = efa_com_submit_admin_cmd(aq, cmd, cmd_size, comp, comp_size); - if (IS_ERR(comp_ctx)) { + + comp_ctx = efa_com_alloc_comp_ctx(aq); + if (!comp_ctx) { + clear_bit(EFA_AQ_STATE_RUNNING_BIT, &aq->state); + return -EINVAL; + } + + err = efa_com_submit_admin_cmd(aq, comp_ctx, cmd, cmd_size, comp, comp_size); + if (err) { ibdev_err_ratelimited( aq->efa_dev, - "Failed to submit command %s (opcode %u) err %pe\n", + "Failed to submit command %s (opcode %u) err %d\n", efa_com_cmd_str(cmd->aq_common_descriptor.opcode), - cmd->aq_common_descriptor.opcode, comp_ctx); + cmd->aq_common_descriptor.opcode, err); + efa_com_dealloc_comp_ctx(aq, comp_ctx); up(&aq->avail_cmds); atomic64_inc(&aq->stats.cmd_err); - return PTR_ERR(comp_ctx); + return err; } err = efa_com_wait_and_process_admin_cq(comp_ctx, aq); if (err) { ibdev_err_ratelimited( aq->efa_dev, - "Failed to process command %s (opcode %u) comp_status %d err %d\n", + "Failed to process command %s (opcode %u) err %d\n", efa_com_cmd_str(cmd->aq_common_descriptor.opcode), - cmd->aq_common_descriptor.opcode, - comp_ctx->user_cqe->acq_common_descriptor.status, err); + cmd->aq_common_descriptor.opcode, err); atomic64_inc(&aq->stats.cmd_err); } + efa_com_dealloc_comp_ctx(aq, comp_ctx); up(&aq->avail_cmds); return err; From 3f63297ff61a994b99d710dcb6dbde41c4003233 Mon Sep 17 00:00:00 2001 From: LUO Haowen Date: Wed, 4 Mar 2026 14:45:09 +0800 Subject: [PATCH 027/401] dmaengine: dw-edma: Fix multiple times setting of the CYCLE_STATE and CYCLE_BIT bits for HDMA. Others have submitted this issue (https://lore.kernel.org/dmaengine/ 20240722030405.3385-1-zhengdongxiong@gxmicro.cn/), but it has not been fixed yet. Therefore, more supplementary information is provided here. As mentioned in the "PCS-CCS-CB-TCB" Producer-Consumer Synchronization of "DesignWare Cores PCI Express Controller Databook, version 6.00a": 1. The Consumer CYCLE_STATE (CCS) bit in the register only needs to be initialized once; the value will update automatically to be ~CYCLE_BIT (CB) in the next chunk. 2. The Consumer CYCLE_BIT bit in the register is loaded from the LL element and tested against CCS. When CB = CCS, the data transfer is executed. Otherwise not. The current logic sets customer (HDMA) CS and CB bits to 1 in each chunk while setting the producer (software) CB of odd chunks to 0 and even chunks to 1 in the linked list. This is leading to a mismatch between the producer CB and consumer CS bits. This issue can be reproduced by setting the transmission data size to exceed one chunk. By the way, in the EDMA using the same "PCS-CCS-CB-TCB" mechanism, the CS bit is only initialized once and this issue was not found. Refer to drivers/dma/dw-edma/dw-edma-v0-core.c:dw_edma_v0_core_start. So fix this issue by initializing the CYCLE_STATE and CYCLE_BIT bits only once. Fixes: e74c39573d35 ("dmaengine: dw-edma: Add support for native HDMA") Signed-off-by: LUO Haowen Reviewed-by: Frank Li Link: https://patch.msgid.link/tencent_CB11AA9F3920C1911AF7477A9BD8EFE0AD05@qq.com Signed-off-by: Vinod Koul --- drivers/dma/dw-edma/dw-hdma-v0-core.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/dma/dw-edma/dw-hdma-v0-core.c b/drivers/dma/dw-edma/dw-hdma-v0-core.c index e3f8db4fe909..ce8f7254bab2 100644 --- a/drivers/dma/dw-edma/dw-hdma-v0-core.c +++ b/drivers/dma/dw-edma/dw-hdma-v0-core.c @@ -252,10 +252,10 @@ static void dw_hdma_v0_core_start(struct dw_edma_chunk *chunk, bool first) lower_32_bits(chunk->ll_region.paddr)); SET_CH_32(dw, chan->dir, chan->id, llp.msb, upper_32_bits(chunk->ll_region.paddr)); + /* Set consumer cycle */ + SET_CH_32(dw, chan->dir, chan->id, cycle_sync, + HDMA_V0_CONSUMER_CYCLE_STAT | HDMA_V0_CONSUMER_CYCLE_BIT); } - /* Set consumer cycle */ - SET_CH_32(dw, chan->dir, chan->id, cycle_sync, - HDMA_V0_CONSUMER_CYCLE_STAT | HDMA_V0_CONSUMER_CYCLE_BIT); dw_hdma_v0_sync_ll_data(chunk); From e0adbf74e2a0455a6bc9628726ba87bcd0b42bf8 Mon Sep 17 00:00:00 2001 From: Alexander Stein Date: Tue, 14 Oct 2025 08:13:08 +0200 Subject: [PATCH 028/401] dmaengine: xilinx: xdma: Fix regmap init error handling devm_regmap_init_mmio returns an ERR_PTR() upon error, not NULL. Fix the error check and also fix the error message. Use the error code from ERR_PTR() instead of the wrong value in ret. Fixes: 17ce252266c7 ("dmaengine: xilinx: xdma: Add xilinx xdma driver") Signed-off-by: Alexander Stein Reviewed-by: Frank Li Link: https://patch.msgid.link/20251014061309.283468-1-alexander.stein@ew.tq-group.com Signed-off-by: Vinod Koul --- drivers/dma/xilinx/xdma.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/dma/xilinx/xdma.c b/drivers/dma/xilinx/xdma.c index d02a4dac2291..782a55edc55b 100644 --- a/drivers/dma/xilinx/xdma.c +++ b/drivers/dma/xilinx/xdma.c @@ -1234,8 +1234,8 @@ static int xdma_probe(struct platform_device *pdev) xdev->rmap = devm_regmap_init_mmio(&pdev->dev, reg_base, &xdma_regmap_config); - if (!xdev->rmap) { - xdma_err(xdev, "config regmap failed: %d", ret); + if (IS_ERR(xdev->rmap)) { + xdma_err(xdev, "config regmap failed: %pe", xdev->rmap); goto failed; } INIT_LIST_HEAD(&xdev->dma_dev.channels); From 67e467a11f62ff64ad219dc6aa5459e132c79d14 Mon Sep 17 00:00:00 2001 From: Deepanshu Kartikey Date: Sat, 7 Mar 2026 14:30:41 +0530 Subject: [PATCH 029/401] netfs: Fix kernel BUG in netfs_limit_iter() for ITER_KVEC iterators When a process crashes and the kernel writes a core dump to a 9P filesystem, __kernel_write() creates an ITER_KVEC iterator. This iterator reaches netfs_limit_iter() via netfs_unbuffered_write(), which only handles ITER_FOLIOQ, ITER_BVEC and ITER_XARRAY iterator types, hitting the BUG() for any other type. Fix this by adding netfs_limit_kvec() following the same pattern as netfs_limit_bvec(), since both kvec and bvec are simple segment arrays with pointer and length fields. Dispatch it from netfs_limit_iter() when the iterator type is ITER_KVEC. Fixes: cae932d3aee5 ("netfs: Add func to calculate pagecount/size-limited span of an iterator") Reported-by: syzbot+9c058f0d63475adc97fd@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=9c058f0d63475adc97fd Tested-by: syzbot+9c058f0d63475adc97fd@syzkaller.appspotmail.com Signed-off-by: Deepanshu Kartikey Link: https://patch.msgid.link/20260307090041.359870-1-kartikey406@gmail.com Signed-off-by: Christian Brauner --- fs/netfs/iterator.c | 43 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 43 insertions(+) diff --git a/fs/netfs/iterator.c b/fs/netfs/iterator.c index 72a435e5fc6d..154a14bb2d7f 100644 --- a/fs/netfs/iterator.c +++ b/fs/netfs/iterator.c @@ -142,6 +142,47 @@ static size_t netfs_limit_bvec(const struct iov_iter *iter, size_t start_offset, return min(span, max_size); } +/* + * Select the span of a kvec iterator we're going to use. Limit it by both + * maximum size and maximum number of segments. Returns the size of the span + * in bytes. + */ +static size_t netfs_limit_kvec(const struct iov_iter *iter, size_t start_offset, + size_t max_size, size_t max_segs) +{ + const struct kvec *kvecs = iter->kvec; + unsigned int nkv = iter->nr_segs, ix = 0, nsegs = 0; + size_t len, span = 0, n = iter->count; + size_t skip = iter->iov_offset + start_offset; + + if (WARN_ON(!iov_iter_is_kvec(iter)) || + WARN_ON(start_offset > n) || + n == 0) + return 0; + + while (n && ix < nkv && skip) { + len = kvecs[ix].iov_len; + if (skip < len) + break; + skip -= len; + n -= len; + ix++; + } + + while (n && ix < nkv) { + len = min3(n, kvecs[ix].iov_len - skip, max_size); + span += len; + nsegs++; + ix++; + if (span >= max_size || nsegs >= max_segs) + break; + skip = 0; + n -= len; + } + + return min(span, max_size); +} + /* * Select the span of an xarray iterator we're going to use. Limit it by both * maximum size and maximum number of segments. It is assumed that segments @@ -245,6 +286,8 @@ size_t netfs_limit_iter(const struct iov_iter *iter, size_t start_offset, return netfs_limit_bvec(iter, start_offset, max_size, max_segs); if (iov_iter_is_xarray(iter)) return netfs_limit_xarray(iter, start_offset, max_size, max_segs); + if (iov_iter_is_kvec(iter)) + return netfs_limit_kvec(iter, start_offset, max_size, max_segs); BUG(); } EXPORT_SYMBOL(netfs_limit_iter); From e9075e420a1eb3b52c60f3b95893a55e77419ce8 Mon Sep 17 00:00:00 2001 From: Deepanshu Kartikey Date: Sat, 7 Mar 2026 10:09:47 +0530 Subject: [PATCH 030/401] netfs: Fix NULL pointer dereference in netfs_unbuffered_write() on retry When a write subrequest is marked NETFS_SREQ_NEED_RETRY, the retry path in netfs_unbuffered_write() unconditionally calls stream->prepare_write() without checking if it is NULL. Filesystems such as 9P do not set the prepare_write operation, so stream->prepare_write remains NULL. When get_user_pages() fails with -EFAULT and the subrequest is flagged for retry, this results in a NULL pointer dereference at fs/netfs/direct_write.c:189. Fix this by mirroring the pattern already used in write_retry.c: if stream->prepare_write is NULL, skip renegotiation and directly reissue the subrequest via netfs_reissue_write(), which handles iterator reset, IN_PROGRESS flag, stats update and reissue internally. Fixes: a0b4c7a49137 ("netfs: Fix unbuffered/DIO writes to dispatch subrequests in strict sequence") Reported-by: syzbot+7227db0fbac9f348dba0@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=7227db0fbac9f348dba0 Signed-off-by: Deepanshu Kartikey Link: https://patch.msgid.link/20260307043947.347092-1-kartikey406@gmail.com Tested-by: syzbot+7227db0fbac9f348dba0@syzkaller.appspotmail.com Signed-off-by: Christian Brauner --- fs/netfs/direct_write.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/fs/netfs/direct_write.c b/fs/netfs/direct_write.c index dd1451bf7543..4d9760e36c11 100644 --- a/fs/netfs/direct_write.c +++ b/fs/netfs/direct_write.c @@ -186,10 +186,18 @@ static int netfs_unbuffered_write(struct netfs_io_request *wreq) stream->sreq_max_segs = INT_MAX; netfs_get_subrequest(subreq, netfs_sreq_trace_get_resubmit); - stream->prepare_write(subreq); - __set_bit(NETFS_SREQ_IN_PROGRESS, &subreq->flags); - netfs_stat(&netfs_n_wh_retry_write_subreq); + if (stream->prepare_write) { + stream->prepare_write(subreq); + __set_bit(NETFS_SREQ_IN_PROGRESS, &subreq->flags); + netfs_stat(&netfs_n_wh_retry_write_subreq); + } else { + struct iov_iter source; + + netfs_reset_iter(subreq); + source = subreq->io_iter; + netfs_reissue_write(stream, subreq, &source); + } } netfs_unbuffered_write_done(wreq); From e1c9866173c5f8521f2d0768547a01508cb9ff27 Mon Sep 17 00:00:00 2001 From: Tuo Li Date: Tue, 6 Jan 2026 11:24:28 +0800 Subject: [PATCH 031/401] dmaengine: idxd: fix possible wrong descriptor completion in llist_abort_desc() At the end of this function, d is the traversal cursor of flist, but the code completes found instead. This can lead to issues such as NULL pointer dereferences, double completion, or descriptor leaks. Fix this by completing d instead of found in the final list_for_each_entry_safe() loop. Fixes: aa8d18becc0c ("dmaengine: idxd: add callback support for iaa crypto") Signed-off-by: Tuo Li Reviewed-by: Dave Jiang Link: https://patch.msgid.link/20260106032428.162445-1-islituo@gmail.com Signed-off-by: Vinod Koul --- drivers/dma/idxd/submit.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/dma/idxd/submit.c b/drivers/dma/idxd/submit.c index 6db1c5fcedc5..03217041b8b3 100644 --- a/drivers/dma/idxd/submit.c +++ b/drivers/dma/idxd/submit.c @@ -138,7 +138,7 @@ static void llist_abort_desc(struct idxd_wq *wq, struct idxd_irq_entry *ie, */ list_for_each_entry_safe(d, t, &flist, list) { list_del_init(&d->list); - idxd_dma_complete_txd(found, IDXD_COMPLETE_ABORT, true, + idxd_dma_complete_txd(d, IDXD_COMPLETE_ABORT, true, NULL, NULL); } } From ff88df67dbf78b5eb909f8a3da4115b1cfd998ab Mon Sep 17 00:00:00 2001 From: Yihan Ding Date: Fri, 6 Mar 2026 10:16:50 +0800 Subject: [PATCH 032/401] landlock: Serialize TSYNC thread restriction MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit syzbot found a deadlock in landlock_restrict_sibling_threads(). When multiple threads concurrently call landlock_restrict_self() with sibling thread restriction enabled, they can deadlock by mutually queueing task_works on each other and then blocking in kernel space (waiting for the other to finish). Fix this by serializing the TSYNC operations within the same process using the exec_update_lock. This prevents concurrent invocations from deadlocking. We use down_write_trylock() and restart the syscall if the lock cannot be acquired immediately. This ensures that if a thread fails to get the lock, it will return to userspace, allowing it to process any pending TSYNC task_works from the lock holder, and then transparently restart the syscall. Fixes: 42fc7e6543f6 ("landlock: Multithreading support for landlock_restrict_self()") Reported-by: syzbot+7ea2f5e9dfd468201817@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=7ea2f5e9dfd468201817 Suggested-by: Günther Noack Suggested-by: Tingmao Wang Tested-by: Justin Suess Signed-off-by: Yihan Ding Tested-by: Günther Noack Reviewed-by: Günther Noack Link: https://lore.kernel.org/r/20260306021651.744723-2-dingyihan@uniontech.com Signed-off-by: Mickaël Salaün --- security/landlock/tsync.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/security/landlock/tsync.c b/security/landlock/tsync.c index 1e738ef2cff5..f0e5f8102001 100644 --- a/security/landlock/tsync.c +++ b/security/landlock/tsync.c @@ -485,6 +485,16 @@ int landlock_restrict_sibling_threads(const struct cred *old_cred, shared_ctx.new_cred = new_cred; shared_ctx.set_no_new_privs = task_no_new_privs(current); + /* + * Serialize concurrent TSYNC operations to prevent deadlocks when + * multiple threads call landlock_restrict_self() simultaneously. + * If the lock is already held, we gracefully yield by restarting the + * syscall. This allows the current thread to process pending + * task_works before retrying. + */ + if (!down_write_trylock(¤t->signal->exec_update_lock)) + return restart_syscall(); + /* * We schedule a pseudo-signal task_work for each of the calling task's * sibling threads. In the task work, each thread: @@ -594,6 +604,6 @@ int landlock_restrict_sibling_threads(const struct cred *old_cred, wait_for_completion(&shared_ctx.all_finished); tsync_works_release(&works); - + up_write(¤t->signal->exec_update_lock); return atomic_read(&shared_ctx.preparation_error); } From 697f514ad9dbe600a808326d80b02caab03b7f90 Mon Sep 17 00:00:00 2001 From: Yihan Ding Date: Fri, 6 Mar 2026 10:16:51 +0800 Subject: [PATCH 033/401] landlock: Clean up interrupted thread logic in TSYNC MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In landlock_restrict_sibling_threads(), when the calling thread is interrupted while waiting for sibling threads to prepare, it executes a recovery path. Previously, this path included a wait_for_completion() call on all_prepared to prevent a Use-After-Free of the local shared_ctx. However, this wait is redundant. Exiting the main do-while loop already leads to a bottom cleanup section that unconditionally waits for all_finished. Therefore, replacing the wait with a simple break is safe, prevents UAF, and correctly unblocks the remaining task_works. Clean up the error path by breaking the loop and updating the surrounding comments to accurately reflect the state machine. Suggested-by: Günther Noack Signed-off-by: Yihan Ding Tested-by: Günther Noack Reviewed-by: Günther Noack Link: https://lore.kernel.org/r/20260306021651.744723-3-dingyihan@uniontech.com Signed-off-by: Mickaël Salaün --- security/landlock/tsync.c | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/security/landlock/tsync.c b/security/landlock/tsync.c index f0e5f8102001..4d4427ba8d93 100644 --- a/security/landlock/tsync.c +++ b/security/landlock/tsync.c @@ -575,24 +575,30 @@ int landlock_restrict_sibling_threads(const struct cred *old_cred, -ERESTARTNOINTR); /* - * Cancel task works for tasks that did not start running yet, - * and decrement all_prepared and num_unfinished accordingly. + * Opportunistic improvement: try to cancel task + * works for tasks that did not start running + * yet. We do not have a guarantee that it + * cancels any of the enqueued task works + * because task_work_run() might already have + * dequeued them. */ cancel_tsync_works(&works, &shared_ctx); /* - * The remaining task works have started running, so waiting for - * their completion will finish. + * Break the loop with error. The cleanup code + * after the loop unblocks the remaining + * task_works. */ - wait_for_completion(&shared_ctx.all_prepared); + break; } } } while (found_more_threads && !atomic_read(&shared_ctx.preparation_error)); /* - * We now have all sibling threads blocking and in "prepared" state in the - * task work. Ask all threads to commit. + * We now have either (a) all sibling threads blocking and in "prepared" + * state in the task work, or (b) the preparation error is set. Ask all + * threads to commit (or abort). */ complete_all(&shared_ctx.ready_to_commit); From 995a418a6ca33e466e5e1527663ae3d5eee18304 Mon Sep 17 00:00:00 2001 From: Wang Jun <1742789905@qq.com> Date: Thu, 12 Mar 2026 22:51:36 +0800 Subject: [PATCH 034/401] auxdisplay: lcd2s: add error handling for i2c transfers The lcd2s_print() and lcd2s_gotoxy() functions currently ignore the return value of lcd2s_i2c_master_send(), which can fail. This can lead to silent data loss or incorrect cursor positioning. Add proper error checking: if the number of bytes sent does not match the expected length, return -EIO; otherwise propagate any error code from the I2C transfer. Signed-off-by: Wang Jun <1742789905@qq.com> Signed-off-by: Andy Shevchenko --- drivers/auxdisplay/lcd2s.c | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/drivers/auxdisplay/lcd2s.c b/drivers/auxdisplay/lcd2s.c index defb0573e43c..c7a962728752 100644 --- a/drivers/auxdisplay/lcd2s.c +++ b/drivers/auxdisplay/lcd2s.c @@ -99,8 +99,13 @@ static int lcd2s_print(struct charlcd *lcd, int c) { struct lcd2s_data *lcd2s = lcd->drvdata; u8 buf[2] = { LCD2S_CMD_WRITE, c }; + int ret; - lcd2s_i2c_master_send(lcd2s->i2c, buf, sizeof(buf)); + ret = lcd2s_i2c_master_send(lcd2s->i2c, buf, sizeof(buf)); + if (ret < 0) + return ret; + if (ret != sizeof(buf)) + return -EIO; return 0; } @@ -108,9 +113,13 @@ static int lcd2s_gotoxy(struct charlcd *lcd, unsigned int x, unsigned int y) { struct lcd2s_data *lcd2s = lcd->drvdata; u8 buf[3] = { LCD2S_CMD_CUR_POS, y + 1, x + 1 }; + int ret; - lcd2s_i2c_master_send(lcd2s->i2c, buf, sizeof(buf)); - + ret = lcd2s_i2c_master_send(lcd2s->i2c, buf, sizeof(buf)); + if (ret < 0) + return ret; + if (ret != sizeof(buf)) + return -EIO; return 0; } From 7bae956cac0433c4d41aac9f1d04e42694e0b706 Mon Sep 17 00:00:00 2001 From: Zhang Heng Date: Fri, 13 Mar 2026 16:06:24 +0800 Subject: [PATCH 035/401] ALSA: hda/realtek: add quirk for Lenovo Yoga 7 2-in-1 16AKP10 This machine is equipped with ALC287 and requires the quirk ALC287_FIXUP_YOGA9_14IAP7_BASS_SPK_PIN to fix the issue where the bass speakers are not configured and the speaker volume cannot be controlled. Link: https://bugzilla.kernel.org/show_bug.cgi?id=221210 Signed-off-by: Zhang Heng Link: https://patch.msgid.link/20260313080624.1395362-1-zhangheng@kylinos.cn Signed-off-by: Takashi Iwai --- sound/hda/codecs/realtek/alc269.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/hda/codecs/realtek/alc269.c b/sound/hda/codecs/realtek/alc269.c index ab4b22fcb72e..8d0767db9802 100644 --- a/sound/hda/codecs/realtek/alc269.c +++ b/sound/hda/codecs/realtek/alc269.c @@ -7575,6 +7575,7 @@ static const struct hda_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x17aa, 0x38ab, "Thinkbook 16P", ALC287_FIXUP_MG_RTKC_CSAMP_CS35L41_I2C_THINKPAD), SND_PCI_QUIRK(0x17aa, 0x38b4, "Legion Slim 7 16IRH8", ALC287_FIXUP_CS35L41_I2C_2), HDA_CODEC_QUIRK(0x17aa, 0x391c, "Lenovo Yoga 7 2-in-1 14AKP10", ALC287_FIXUP_YOGA9_14IAP7_BASS_SPK_PIN), + HDA_CODEC_QUIRK(0x17aa, 0x391d, "Lenovo Yoga 7 2-in-1 16AKP10", ALC287_FIXUP_YOGA9_14IAP7_BASS_SPK_PIN), SND_PCI_QUIRK(0x17aa, 0x38b5, "Legion Slim 7 16IRH8", ALC287_FIXUP_CS35L41_I2C_2), SND_PCI_QUIRK(0x17aa, 0x38b6, "Legion Slim 7 16APH8", ALC287_FIXUP_CS35L41_I2C_2), SND_PCI_QUIRK(0x17aa, 0x38b7, "Legion Slim 7 16APH8", ALC287_FIXUP_CS35L41_I2C_2), From 59f68dc1d8df3142cb58fd2568966a9bb7b0ed8a Mon Sep 17 00:00:00 2001 From: Matthew Schwartz Date: Fri, 13 Mar 2026 10:25:03 -0700 Subject: [PATCH 036/401] ALSA: hda/realtek: Add quirk for ASUS ROG Flow Z13-KJP GZ302EAC Fixes lack of audio output on the ASUS ROG Flow Z13-KJP GZ302EAC model, similar to the ASUS ROG Flow Z13 GZ302EA. Signed-off-by: Matthew Schwartz Link: https://patch.msgid.link/20260313172503.285846-1-matthew.schwartz@linux.dev Signed-off-by: Takashi Iwai --- sound/hda/codecs/realtek/alc269.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/hda/codecs/realtek/alc269.c b/sound/hda/codecs/realtek/alc269.c index 8d0767db9802..b8aba01ef261 100644 --- a/sound/hda/codecs/realtek/alc269.c +++ b/sound/hda/codecs/realtek/alc269.c @@ -7206,6 +7206,7 @@ static const struct hda_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1043, 0x14e3, "ASUS G513PI/PU/PV", ALC287_FIXUP_CS35L41_I2C_2), SND_PCI_QUIRK(0x1043, 0x14f2, "ASUS VivoBook X515JA", ALC256_FIXUP_ASUS_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1043, 0x1503, "ASUS G733PY/PZ/PZV/PYV", ALC287_FIXUP_CS35L41_I2C_2), + SND_PCI_QUIRK(0x1043, 0x1514, "ASUS ROG Flow Z13 GZ302EAC", ALC287_FIXUP_CS35L41_I2C_2), SND_PCI_QUIRK(0x1043, 0x1517, "Asus Zenbook UX31A", ALC269VB_FIXUP_ASUS_ZENBOOK_UX31A), SND_PCI_QUIRK(0x1043, 0x1533, "ASUS GV302XA/XJ/XQ/XU/XV/XI", ALC287_FIXUP_CS35L41_I2C_2), SND_PCI_QUIRK(0x1043, 0x1573, "ASUS GZ301VV/VQ/VU/VJ/VA/VC/VE/VVC/VQC/VUC/VJC/VEC/VCC", ALC285_FIXUP_ASUS_HEADSET_MIC), From 793b008cd39516385791a1d1d223d817e947a471 Mon Sep 17 00:00:00 2001 From: Andrii Kovalchuk Date: Sun, 15 Mar 2026 01:08:51 +0000 Subject: [PATCH 037/401] ALSA: hda/realtek: Add HP ENVY Laptop 13-ba0xxx quirk Add a PCI quirk for HP ENVY Laptop 13-ba0xxx (PCI device ID 0x8756) to enable proper mute LED and mic mute behavior using the ALC245_FIXUP_HP_X360_MUTE_LEDS fixup. Signed-off-by: Andrii Kovalchuk Link: https://patch.msgid.link/u0s-uRVegF9BN0t-4JnOUwsIAR-mVc4U4FJfJHdEHX7ro_laErHD9y35NebWybcN16gVaVHPJo1ap3AoJ1a2gqJImPvThgeNt_SYVY1KaDw=@proton.me Signed-off-by: Takashi Iwai --- sound/hda/codecs/realtek/alc269.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/hda/codecs/realtek/alc269.c b/sound/hda/codecs/realtek/alc269.c index b8aba01ef261..7294298fce4a 100644 --- a/sound/hda/codecs/realtek/alc269.c +++ b/sound/hda/codecs/realtek/alc269.c @@ -6854,6 +6854,7 @@ static const struct hda_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x103c, 0x8730, "HP ProBook 445 G7", ALC236_FIXUP_HP_MUTE_LED_MICMUTE_VREF), SND_PCI_QUIRK(0x103c, 0x8735, "HP ProBook 435 G7", ALC236_FIXUP_HP_MUTE_LED_MICMUTE_VREF), SND_PCI_QUIRK(0x103c, 0x8736, "HP", ALC285_FIXUP_HP_GPIO_AMP_INIT), + SND_PCI_QUIRK(0x103c, 0x8756, "HP ENVY Laptop 13-ba0xxx", ALC245_FIXUP_HP_X360_MUTE_LEDS), SND_PCI_QUIRK(0x103c, 0x8760, "HP EliteBook 8{4,5}5 G7", ALC285_FIXUP_HP_BEEP_MICMUTE_LED), SND_PCI_QUIRK(0x103c, 0x876e, "HP ENVY x360 Convertible 13-ay0xxx", ALC245_FIXUP_HP_X360_MUTE_LEDS), SND_PCI_QUIRK(0x103c, 0x877a, "HP", ALC285_FIXUP_HP_MUTE_LED), From fe757092d2329c397ecb32f2bf68a5b1c4bd9193 Mon Sep 17 00:00:00 2001 From: Guangshuo Li Date: Fri, 13 Mar 2026 12:06:11 +0800 Subject: [PATCH 038/401] ASoC: sma1307: fix double free of devm_kzalloc() memory A previous change added NULL checks and cleanup for allocation failures in sma1307_setting_loaded(). However, the cleanup for mode_set entries is wrong. Those entries are allocated with devm_kzalloc(), so they are device-managed resources and must not be freed with kfree(). Manually freeing them in the error path can lead to a double free when devres later releases the same memory. Drop the manual kfree() loop and let devres handle the cleanup. Fixes: 0ec6bd16705fe ("ASoC: sma1307: Add NULL check in sma1307_setting_loaded()") Cc: stable@vger.kernel.org Signed-off-by: Guangshuo Li Link: https://patch.msgid.link/20260313040611.391479-1-lgs201920130244@gmail.com Signed-off-by: Mark Brown --- sound/soc/codecs/sma1307.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/sound/soc/codecs/sma1307.c b/sound/soc/codecs/sma1307.c index 4bb59e5c0891..5850bf6e71ca 100644 --- a/sound/soc/codecs/sma1307.c +++ b/sound/soc/codecs/sma1307.c @@ -1759,8 +1759,10 @@ static void sma1307_setting_loaded(struct sma1307_priv *sma1307, const char *fil sma1307->set.mode_size * 2 * sizeof(int), GFP_KERNEL); if (!sma1307->set.mode_set[i]) { - for (int j = 0; j < i; j++) - kfree(sma1307->set.mode_set[j]); + for (int j = 0; j < i; j++) { + devm_kfree(sma1307->dev, sma1307->set.mode_set[j]); + sma1307->set.mode_set[j] = NULL; + } sma1307->set.status = false; return; } From d0426510a9e1fabf074e274ceff26ffc6500980a Mon Sep 17 00:00:00 2001 From: Hasun Park Date: Mon, 9 Mar 2026 00:16:52 +0900 Subject: [PATCH 039/401] ASoC: amd: acp: add DMI override for ACP70 flag Some ASUS ProArt PX13 systems expose ACP ACPI config flags that can select a non-working fallback path. Add a DMI override in snd_amd_acp_find_config() for ACP70+ boards and return 0 so ACP ACPI flag-based selection is skipped on this platform. This keeps machine driver selection on the intended SoundWire path. Signed-off-by: Hasun Park Link: https://patch.msgid.link/20260308151654.29059-2-hasunpark@gmail.com Signed-off-by: Mark Brown --- sound/soc/amd/acp-config.c | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/sound/soc/amd/acp-config.c b/sound/soc/amd/acp-config.c index 365209ea53f3..1604ed679224 100644 --- a/sound/soc/amd/acp-config.c +++ b/sound/soc/amd/acp-config.c @@ -23,6 +23,16 @@ static int acp_quirk_data; +static const struct dmi_system_id acp70_acpi_flag_override_table[] = { + { + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "ASUSTeK COMPUTER INC."), + DMI_MATCH(DMI_PRODUCT_NAME, "HN7306EA"), + }, + }, + {} +}; + static const struct config_entry config_table[] = { { .flags = FLAG_AMD_SOF, @@ -186,8 +196,11 @@ int snd_amd_acp_find_config(struct pci_dev *pci) */ if (!pci->revision) return 0; - else if (pci->revision >= ACP_7_0_REV) + else if (pci->revision >= ACP_7_0_REV) { + if (dmi_check_system(acp70_acpi_flag_override_table)) + return 0; return snd_amd_acp_acpi_find_config(pci); + } for (i = 0; i < ARRAY_SIZE(config_table); i++, table++) { if (table->device != device) From 399b6fd37a102c73cefa32a0ec945d76d80fa35f Mon Sep 17 00:00:00 2001 From: Hasun Park Date: Mon, 9 Mar 2026 00:16:53 +0900 Subject: [PATCH 040/401] ASoC: amd: acp: add PX13 SoundWire machine link for rt721+tas2783x2 Add an ACP70 SoundWire machine entry for ASUS PX13 (HN7306EA/HN7306EAC) with rt721 and two TAS2783 amps on link1. Describe rt721 with jack/DMIC endpoints on this platform and add explicit left/right TAS2783 speaker endpoint mapping via name prefixes. Signed-off-by: Hasun Park Link: https://patch.msgid.link/20260308151654.29059-3-hasunpark@gmail.com Signed-off-by: Mark Brown --- sound/soc/amd/acp/amd-acp70-acpi-match.c | 61 ++++++++++++++++++++++++ 1 file changed, 61 insertions(+) diff --git a/sound/soc/amd/acp/amd-acp70-acpi-match.c b/sound/soc/amd/acp/amd-acp70-acpi-match.c index 7a567ba02292..1ae43df5da6c 100644 --- a/sound/soc/amd/acp/amd-acp70-acpi-match.c +++ b/sound/soc/amd/acp/amd-acp70-acpi-match.c @@ -69,6 +69,28 @@ static const struct snd_soc_acpi_endpoint jack_amp_g1_dmic_endpoints[] = { }, }; +static const struct snd_soc_acpi_endpoint jack_dmic_endpoints[] = { + /* Jack Endpoint */ + { + .num = 0, + .aggregated = 0, + .group_position = 0, + .group_id = 0, + }, + /* DMIC Endpoint */ + { + /* + * rt721 endpoint #2 maps to AIF3 (internal DMIC capture). + * Endpoint #1 is AIF2 amp path and is handled by external amps + * on this platform. + */ + .num = 2, + .aggregated = 0, + .group_position = 0, + .group_id = 0, + }, +}; + static const struct snd_soc_acpi_adr_device rt712_vb_1_group1_adr[] = { { .adr = 0x000130025D071201ull, @@ -563,6 +585,40 @@ static const struct snd_soc_acpi_link_adr acp70_rt1320_l0_rt722_l1[] = { {} }; +static const struct snd_soc_acpi_adr_device rt721_l1u0_tas2783x2_l1u8b_adr[] = { + { + .adr = 0x000130025D072101ull, + /* + * On this platform speakers are provided by two TAS2783 amps. + * Keep rt721 as UAJ + DMIC only. + */ + .num_endpoints = ARRAY_SIZE(jack_dmic_endpoints), + .endpoints = jack_dmic_endpoints, + .name_prefix = "rt721", + }, + { + .adr = 0x0001380102000001ull, + .num_endpoints = 1, + .endpoints = &spk_l_endpoint, + .name_prefix = "tas2783-1", + }, + { + .adr = 0x00013B0102000001ull, + .num_endpoints = 1, + .endpoints = &spk_r_endpoint, + .name_prefix = "tas2783-2", + }, +}; + +static const struct snd_soc_acpi_link_adr acp70_rt721_l1u0_tas2783x2_l1u8b[] = { + { + .mask = BIT(1), + .num_adr = ARRAY_SIZE(rt721_l1u0_tas2783x2_l1u8b_adr), + .adr_d = rt721_l1u0_tas2783x2_l1u8b_adr, + }, + {} +}; + struct snd_soc_acpi_mach snd_soc_acpi_amd_acp70_sdw_machines[] = { { .link_mask = BIT(0) | BIT(1), @@ -650,6 +706,11 @@ struct snd_soc_acpi_mach snd_soc_acpi_amd_acp70_sdw_machines[] = { .machine_check = snd_soc_acpi_amd_sdca_is_device_rt712_vb, .drv_name = "amd_sdw", }, + { + .link_mask = BIT(1), + .links = acp70_rt721_l1u0_tas2783x2_l1u8b, + .drv_name = "amd_sdw", + }, {}, }; EXPORT_SYMBOL(snd_soc_acpi_amd_acp70_sdw_machines); From 09e70e4f119ff650d24c96161fd2f62ac7e424b0 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Mon, 2 Mar 2026 19:43:31 +0100 Subject: [PATCH 041/401] regmap: Synchronize cache for the page selector If the selector register is represented in each page, its value according to the debugfs is stale because it gets synchronized only after the real page switch happens. Hence the regmap cache initialisation from the HW inherits outdated data in the selector register. Synchronize cache for the page selector just in time. Before (offset followed by hexdump, the first byte is selector): // Real registers 18: 05 ff 00 00 ff 0f 00 00 f0 00 00 00 ... // Virtual (per port) 40: 05 ff 00 00 e0 e0 00 00 00 00 00 1f 50: 00 ff 00 00 e0 e0 00 00 00 00 00 1f 60: 01 ff 00 00 ff ff 00 00 00 00 00 00 70: 02 ff 00 00 cf f3 00 00 00 00 00 0c 80: 03 ff 00 00 00 00 00 00 00 00 00 ff 90: 04 ff 00 00 ff 0f 00 00 f0 00 00 00 After: // Real registers 18: 05 ff 00 00 ff 0f 00 00 f0 00 00 00 ... // Virtual (per port) 40: 00 ff 00 00 e0 e0 00 00 00 00 00 1f 50: 01 ff 00 00 e0 e0 00 00 00 00 00 1f 60: 02 ff 00 00 ff ff 00 00 00 00 00 00 70: 03 ff 00 00 cf f3 00 00 00 00 00 0c 80: 04 ff 00 00 00 00 00 00 00 00 00 ff 90: 05 ff 00 00 ff 0f 00 00 f0 00 00 00 Fixes: 6863ca622759 ("regmap: Add support for register indirect addressing.") Signed-off-by: Andy Shevchenko Link: https://patch.msgid.link/20260302184753.2693803-1-andriy.shevchenko@linux.intel.com Tested-by: Marek Szyprowski Signed-off-by: Mark Brown --- drivers/base/regmap/regmap.c | 30 ++++++++++++++++++++++++++---- 1 file changed, 26 insertions(+), 4 deletions(-) diff --git a/drivers/base/regmap/regmap.c b/drivers/base/regmap/regmap.c index 607c1246d994..e388b19850e3 100644 --- a/drivers/base/regmap/regmap.c +++ b/drivers/base/regmap/regmap.c @@ -1545,6 +1545,7 @@ static int _regmap_select_page(struct regmap *map, unsigned int *reg, unsigned int val_num) { void *orig_work_buf; + unsigned int selector_reg; unsigned int win_offset; unsigned int win_page; bool page_chg; @@ -1563,10 +1564,31 @@ static int _regmap_select_page(struct regmap *map, unsigned int *reg, return -EINVAL; } - /* It is possible to have selector register inside data window. - In that case, selector register is located on every page and - it needs no page switching, when accessed alone. */ + /* + * Calculate the address of the selector register in the corresponding + * data window if it is located on every page. + */ + page_chg = in_range(range->selector_reg, range->window_start, range->window_len); + if (page_chg) + selector_reg = range->range_min + win_page * range->window_len + + range->selector_reg - range->window_start; + + /* + * It is possible to have selector register inside data window. + * In that case, selector register is located on every page and it + * needs no page switching, when accessed alone. + * + * Nevertheless we should synchronize the cache values for it. + * This can't be properly achieved if the selector register is + * the first and the only one to be read inside the data window. + * That's why we update it in that case as well. + * + * However, we specifically avoid updating it for the default page, + * when it's overlapped with the real data window, to prevent from + * infinite looping. + */ if (val_num > 1 || + (page_chg && selector_reg != range->selector_reg) || range->window_start + win_offset != range->selector_reg) { /* Use separate work_buf during page switching */ orig_work_buf = map->work_buf; @@ -1575,7 +1597,7 @@ static int _regmap_select_page(struct regmap *map, unsigned int *reg, ret = _regmap_update_bits(map, range->selector_reg, range->selector_mask, win_page << range->selector_shift, - &page_chg, false); + NULL, false); map->work_buf = orig_work_buf; From a6919f2a01f8fbf807b015e5b26aecae7db8117b Mon Sep 17 00:00:00 2001 From: Sean Rhodes Date: Sun, 15 Mar 2026 20:11:27 +0000 Subject: [PATCH 042/401] ALSA: hda/realtek: Sequence GPIO2 on Star Labs StarFighter The initial StarFighter quirk fixed the runtime suspend pop by muting speakers in the shutup callback before power-down. Further hardware validation showed that the speaker path is controlled directly by LINE2 EAPD on NID 0x1b together with GPIO2 for the external amplifier. Replace the shutup-delay workaround with explicit sequencing of those controls at playback start and stop: - assert LINE2 EAPD and drive GPIO2 high on PREPARE - deassert LINE2 EAPD and drive GPIO2 low on CLEANUP This avoids the runtime suspend pop without a sleep, and also fixes pops around G3 entry and display-manager start that the original workaround did not cover. Fixes: 1cb3c20688fc ("ALSA: hda/realtek: Fix speaker pop on Star Labs StarFighter") Tested-by: Sean Rhodes Signed-off-by: Sean Rhodes Link: https://patch.msgid.link/20260315201127.33744-1-sean@starlabs.systems Signed-off-by: Takashi Iwai --- sound/hda/codecs/realtek/alc269.c | 38 ++++++++++++++++++++++++++----- 1 file changed, 32 insertions(+), 6 deletions(-) diff --git a/sound/hda/codecs/realtek/alc269.c b/sound/hda/codecs/realtek/alc269.c index 7294298fce4a..190c1dd11bee 100644 --- a/sound/hda/codecs/realtek/alc269.c +++ b/sound/hda/codecs/realtek/alc269.c @@ -1017,12 +1017,30 @@ static int alc269_resume(struct hda_codec *codec) return 0; } -#define STARLABS_STARFIGHTER_SHUTUP_DELAY_MS 30 +#define ALC233_STARFIGHTER_SPK_PIN 0x1b +#define ALC233_STARFIGHTER_GPIO2 0x04 -static void starlabs_starfighter_shutup(struct hda_codec *codec) +static void alc233_starfighter_update_amp(struct hda_codec *codec, bool on) { - if (snd_hda_gen_shutup_speakers(codec)) - msleep(STARLABS_STARFIGHTER_SHUTUP_DELAY_MS); + snd_hda_codec_write(codec, ALC233_STARFIGHTER_SPK_PIN, 0, + AC_VERB_SET_EAPD_BTLENABLE, + on ? AC_EAPDBTL_EAPD : 0); + alc_update_gpio_data(codec, ALC233_STARFIGHTER_GPIO2, on); +} + +static void alc233_starfighter_pcm_hook(struct hda_pcm_stream *hinfo, + struct hda_codec *codec, + struct snd_pcm_substream *substream, + int action) +{ + switch (action) { + case HDA_GEN_PCM_ACT_PREPARE: + alc233_starfighter_update_amp(codec, true); + break; + case HDA_GEN_PCM_ACT_CLEANUP: + alc233_starfighter_update_amp(codec, false); + break; + } } static void alc233_fixup_starlabs_starfighter(struct hda_codec *codec, @@ -1031,8 +1049,16 @@ static void alc233_fixup_starlabs_starfighter(struct hda_codec *codec, { struct alc_spec *spec = codec->spec; - if (action == HDA_FIXUP_ACT_PRE_PROBE) - spec->shutup = starlabs_starfighter_shutup; + switch (action) { + case HDA_FIXUP_ACT_PRE_PROBE: + spec->gpio_mask |= ALC233_STARFIGHTER_GPIO2; + spec->gpio_dir |= ALC233_STARFIGHTER_GPIO2; + spec->gpio_data &= ~ALC233_STARFIGHTER_GPIO2; + break; + case HDA_FIXUP_ACT_PROBE: + spec->gen.pcm_playback_hook = alc233_starfighter_pcm_hook; + break; + } } static void alc269_fixup_pincfg_no_hp_to_lineout(struct hda_codec *codec, From 0bdf27abaf8940592207be939142451436afe39f Mon Sep 17 00:00:00 2001 From: Zhang Heng Date: Mon, 16 Mar 2026 10:28:43 +0800 Subject: [PATCH 043/401] ALSA: hda/realtek: add quirk for ASUS Strix G16 G615JMR The machine is equipped with ALC294 and requires the ALC287_FIXUP_TXNW2781_I2C_ASUS quirk for the amplifier to work properly. Since the machine's PCI SSID is also 1043:1204, HDA_CODEC_QUIRK is used to retain the previous quirk. Link: https://bugzilla.kernel.org/show_bug.cgi?id=221173 Cc: Signed-off-by: Zhang Heng Link: https://patch.msgid.link/20260316022843.2809968-1-zhangheng@kylinos.cn Signed-off-by: Takashi Iwai --- sound/hda/codecs/realtek/alc269.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/hda/codecs/realtek/alc269.c b/sound/hda/codecs/realtek/alc269.c index 190c1dd11bee..327f4dc1b09f 100644 --- a/sound/hda/codecs/realtek/alc269.c +++ b/sound/hda/codecs/realtek/alc269.c @@ -7204,6 +7204,7 @@ static const struct hda_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1043, 0x115d, "Asus 1015E", ALC269_FIXUP_LIMIT_INT_MIC_BOOST), SND_PCI_QUIRK(0x1043, 0x1194, "ASUS UM3406KA", ALC287_FIXUP_CS35L41_I2C_2), SND_PCI_QUIRK(0x1043, 0x11c0, "ASUS X556UR", ALC255_FIXUP_ASUS_MIC_NO_PRESENCE), + HDA_CODEC_QUIRK(0x1043, 0x1204, "ASUS Strix G16 G615JMR", ALC287_FIXUP_TXNW2781_I2C_ASUS), SND_PCI_QUIRK(0x1043, 0x1204, "ASUS Strix G615JHR_JMR_JPR", ALC287_FIXUP_TAS2781_I2C), SND_PCI_QUIRK(0x1043, 0x1214, "ASUS Strix G615LH_LM_LP", ALC287_FIXUP_TAS2781_I2C), SND_PCI_QUIRK(0x1043, 0x125e, "ASUS Q524UQK", ALC255_FIXUP_ASUS_MIC_NO_PRESENCE), From bb120ad57def62e3f23e3d999c5fbed11f610993 Mon Sep 17 00:00:00 2001 From: Alexey Nepomnyashih Date: Mon, 16 Mar 2026 19:18:22 +0000 Subject: [PATCH 044/401] ALSA: firewire-lib: fix uninitialized local variable Similar to commit d8dc8720468a ("ALSA: firewire-lib: fix uninitialized local variable"), the local variable `curr_cycle_time` in process_rx_packets() is declared without initialization. When the tracepoint event is not probed, the variable may appear to be used without being initialized. In practice the value is only relevant when the tracepoint is enabled, however initializing it avoids potential use of an uninitialized value and improves code safety. Initialize `curr_cycle_time` to zero. Fixes: fef4e61b0b76 ("ALSA: firewire-lib: extend tracepoints event including CYCLE_TIME of 1394 OHCI") Cc: stable@vger.kernel.org Signed-off-by: Alexey Nepomnyashih Link: https://patch.msgid.link/20260316191824.83249-1-sdl@nppct.ru Signed-off-by: Takashi Iwai --- sound/firewire/amdtp-stream.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/firewire/amdtp-stream.c b/sound/firewire/amdtp-stream.c index e97721f80f65..8e70da850fac 100644 --- a/sound/firewire/amdtp-stream.c +++ b/sound/firewire/amdtp-stream.c @@ -1164,7 +1164,7 @@ static void process_rx_packets(struct fw_iso_context *context, u32 tstamp, size_ struct pkt_desc *desc = s->packet_descs_cursor; unsigned int pkt_header_length; unsigned int packets; - u32 curr_cycle_time; + u32 curr_cycle_time = 0; bool need_hw_irq; int i; From abb863e6213dc41a58ef8bb3289b7e77460dabf3 Mon Sep 17 00:00:00 2001 From: Claudiu Beznea Date: Mon, 16 Mar 2026 15:32:45 +0200 Subject: [PATCH 045/401] dmaengine: sh: rz-dmac: Protect the driver specific lists The driver lists (ld_free, ld_queue) are used in rz_dmac_free_chan_resources(), rz_dmac_terminate_all(), rz_dmac_issue_pending(), and rz_dmac_irq_handler_thread(), all under the virtual channel lock. Take the same lock in rz_dmac_prep_slave_sg() and rz_dmac_prep_dma_memcpy() as well to avoid concurrency issues, since these functions also check whether the lists are empty and update or remove list entries. Fixes: 5000d37042a6 ("dmaengine: sh: Add DMAC driver for RZ/G2L SoC") Cc: stable@vger.kernel.org Reviewed-by: Frank Li Signed-off-by: Claudiu Beznea Link: https://patch.msgid.link/20260316133252.240348-2-claudiu.beznea.uj@bp.renesas.com Signed-off-by: Vinod Koul --- drivers/dma/sh/rz-dmac.c | 59 ++++++++++++++++++++++------------------ 1 file changed, 33 insertions(+), 26 deletions(-) diff --git a/drivers/dma/sh/rz-dmac.c b/drivers/dma/sh/rz-dmac.c index d84ca551b2bf..089e1ab29159 100644 --- a/drivers/dma/sh/rz-dmac.c +++ b/drivers/dma/sh/rz-dmac.c @@ -10,6 +10,7 @@ */ #include +#include #include #include #include @@ -447,6 +448,7 @@ static int rz_dmac_alloc_chan_resources(struct dma_chan *chan) if (!desc) break; + /* No need to lock. This is called only for the 1st client. */ list_add_tail(&desc->node, &channel->ld_free); channel->descs_allocated++; } @@ -502,18 +504,21 @@ rz_dmac_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dest, dma_addr_t src, dev_dbg(dmac->dev, "%s channel: %d src=0x%pad dst=0x%pad len=%zu\n", __func__, channel->index, &src, &dest, len); - if (list_empty(&channel->ld_free)) - return NULL; + scoped_guard(spinlock_irqsave, &channel->vc.lock) { + if (list_empty(&channel->ld_free)) + return NULL; - desc = list_first_entry(&channel->ld_free, struct rz_dmac_desc, node); + desc = list_first_entry(&channel->ld_free, struct rz_dmac_desc, node); - desc->type = RZ_DMAC_DESC_MEMCPY; - desc->src = src; - desc->dest = dest; - desc->len = len; - desc->direction = DMA_MEM_TO_MEM; + desc->type = RZ_DMAC_DESC_MEMCPY; + desc->src = src; + desc->dest = dest; + desc->len = len; + desc->direction = DMA_MEM_TO_MEM; + + list_move_tail(channel->ld_free.next, &channel->ld_queue); + } - list_move_tail(channel->ld_free.next, &channel->ld_queue); return vchan_tx_prep(&channel->vc, &desc->vd, flags); } @@ -529,27 +534,29 @@ rz_dmac_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl, int dma_length = 0; int i = 0; - if (list_empty(&channel->ld_free)) - return NULL; + scoped_guard(spinlock_irqsave, &channel->vc.lock) { + if (list_empty(&channel->ld_free)) + return NULL; - desc = list_first_entry(&channel->ld_free, struct rz_dmac_desc, node); + desc = list_first_entry(&channel->ld_free, struct rz_dmac_desc, node); - for_each_sg(sgl, sg, sg_len, i) { - dma_length += sg_dma_len(sg); + for_each_sg(sgl, sg, sg_len, i) + dma_length += sg_dma_len(sg); + + desc->type = RZ_DMAC_DESC_SLAVE_SG; + desc->sg = sgl; + desc->sgcount = sg_len; + desc->len = dma_length; + desc->direction = direction; + + if (direction == DMA_DEV_TO_MEM) + desc->src = channel->src_per_address; + else + desc->dest = channel->dst_per_address; + + list_move_tail(channel->ld_free.next, &channel->ld_queue); } - desc->type = RZ_DMAC_DESC_SLAVE_SG; - desc->sg = sgl; - desc->sgcount = sg_len; - desc->len = dma_length; - desc->direction = direction; - - if (direction == DMA_DEV_TO_MEM) - desc->src = channel->src_per_address; - else - desc->dest = channel->dst_per_address; - - list_move_tail(channel->ld_free.next, &channel->ld_queue); return vchan_tx_prep(&channel->vc, &desc->vd, flags); } From 89a8567d84bde88cb7cdbbac2ab2299c4f991490 Mon Sep 17 00:00:00 2001 From: Claudiu Beznea Date: Mon, 16 Mar 2026 15:32:46 +0200 Subject: [PATCH 046/401] dmaengine: sh: rz-dmac: Move CHCTRL updates under spinlock Both rz_dmac_disable_hw() and rz_dmac_irq_handle_channel() update the CHCTRL register. To avoid concurrency issues when configuring functionalities exposed by this registers, take the virtual channel lock. All other CHCTRL updates were already protected by the same lock. Previously, rz_dmac_disable_hw() disabled and re-enabled local IRQs, before accessing CHCTRL registers but this does not ensure race-free access. Remove the local IRQ disable/enable code as well. Fixes: 5000d37042a6 ("dmaengine: sh: Add DMAC driver for RZ/G2L SoC") Cc: stable@vger.kernel.org Reviewed-by: Biju Das Reviewed-by: Frank Li Signed-off-by: Claudiu Beznea Link: https://patch.msgid.link/20260316133252.240348-3-claudiu.beznea.uj@bp.renesas.com Signed-off-by: Vinod Koul --- drivers/dma/sh/rz-dmac.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/drivers/dma/sh/rz-dmac.c b/drivers/dma/sh/rz-dmac.c index 089e1ab29159..f30bdf69c740 100644 --- a/drivers/dma/sh/rz-dmac.c +++ b/drivers/dma/sh/rz-dmac.c @@ -297,13 +297,10 @@ static void rz_dmac_disable_hw(struct rz_dmac_chan *channel) { struct dma_chan *chan = &channel->vc.chan; struct rz_dmac *dmac = to_rz_dmac(chan->device); - unsigned long flags; dev_dbg(dmac->dev, "%s channel %d\n", __func__, channel->index); - local_irq_save(flags); rz_dmac_ch_writel(channel, CHCTRL_DEFAULT, CHCTRL, 1); - local_irq_restore(flags); } static void rz_dmac_set_dmars_register(struct rz_dmac *dmac, int nr, u32 dmars) @@ -568,8 +565,8 @@ static int rz_dmac_terminate_all(struct dma_chan *chan) unsigned int i; LIST_HEAD(head); - rz_dmac_disable_hw(channel); spin_lock_irqsave(&channel->vc.lock, flags); + rz_dmac_disable_hw(channel); for (i = 0; i < DMAC_NR_LMDESC; i++) lmdesc[i].header = 0; @@ -706,7 +703,9 @@ static void rz_dmac_irq_handle_channel(struct rz_dmac_chan *channel) if (chstat & CHSTAT_ER) { dev_err(dmac->dev, "DMAC err CHSTAT_%d = %08X\n", channel->index, chstat); - rz_dmac_ch_writel(channel, CHCTRL_DEFAULT, CHCTRL, 1); + + scoped_guard(spinlock_irqsave, &channel->vc.lock) + rz_dmac_ch_writel(channel, CHCTRL_DEFAULT, CHCTRL, 1); goto done; } From e9cc95397bb7da13fe8a5b53a2f23cfaf9018ade Mon Sep 17 00:00:00 2001 From: Marek Vasut Date: Mon, 16 Mar 2026 23:16:54 +0100 Subject: [PATCH 047/401] dmaengine: xilinx: xilinx_dma: Fix dma_device directions Unlike chan->direction , struct dma_device .directions field is a bitfield. Turn chan->direction into a bitfield to make it compatible with struct dma_device .directions . Fixes: 7e01511443c3 ("dmaengine: xilinx_dma: Set dma_device directions") Signed-off-by: Marek Vasut Link: https://patch.msgid.link/20260316221728.160139-1-marex@nabladev.com Signed-off-by: Vinod Koul --- drivers/dma/xilinx/xilinx_dma.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/dma/xilinx/xilinx_dma.c b/drivers/dma/xilinx/xilinx_dma.c index b53292e02448..bbecf18b04ac 100644 --- a/drivers/dma/xilinx/xilinx_dma.c +++ b/drivers/dma/xilinx/xilinx_dma.c @@ -3024,7 +3024,7 @@ static int xilinx_dma_chan_probe(struct xilinx_dma_device *xdev, return -EINVAL; } - xdev->common.directions |= chan->direction; + xdev->common.directions |= BIT(chan->direction); /* Request the interrupt */ chan->irq = of_irq_get(node, chan->tdest); From f61d145999d61948a23cd436ebbfa4c3b9ab8987 Mon Sep 17 00:00:00 2001 From: Marek Vasut Date: Mon, 16 Mar 2026 23:18:57 +0100 Subject: [PATCH 048/401] dmaengine: xilinx: xilinx_dma: Fix residue calculation for cyclic DMA The cyclic DMA calculation is currently entirely broken and reports residue only for the first segment. The problem is twofold. First, when the first descriptor finishes, it is moved from active_list to done_list, but it is never returned back into the active_list. The xilinx_dma_tx_status() expects the descriptor to be in the active_list to report any meaningful residue information, which never happens after the first descriptor finishes. Fix this up in xilinx_dma_start_transfer() and if the descriptor is cyclic, lift it from done_list and place it back into active_list list. Second, the segment .status fields of the descriptor remain dirty. Once the DMA did one pass on the descriptor, the .status fields are populated with data by the DMA, but the .status fields are not cleared before reuse during the next cyclic DMA round. The xilinx_dma_get_residue() recognizes that as if the descriptor was complete and had 0 residue, which is bogus. Reinitialize the status field before placing the descriptor back into the active_list. Fixes: c0bba3a99f07 ("dmaengine: vdma: Add Support for Xilinx AXI Direct Memory Access Engine") Signed-off-by: Marek Vasut Link: https://patch.msgid.link/20260316221943.160375-1-marex@nabladev.com Signed-off-by: Vinod Koul --- drivers/dma/xilinx/xilinx_dma.c | 23 ++++++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) diff --git a/drivers/dma/xilinx/xilinx_dma.c b/drivers/dma/xilinx/xilinx_dma.c index bbecf18b04ac..9dd5d7388e1d 100644 --- a/drivers/dma/xilinx/xilinx_dma.c +++ b/drivers/dma/xilinx/xilinx_dma.c @@ -1564,8 +1564,29 @@ static void xilinx_dma_start_transfer(struct xilinx_dma_chan *chan) if (chan->err) return; - if (list_empty(&chan->pending_list)) + if (list_empty(&chan->pending_list)) { + if (chan->cyclic) { + struct xilinx_dma_tx_descriptor *desc; + struct list_head *entry; + + desc = list_last_entry(&chan->done_list, + struct xilinx_dma_tx_descriptor, node); + list_for_each(entry, &desc->segments) { + struct xilinx_axidma_tx_segment *axidma_seg; + struct xilinx_axidma_desc_hw *axidma_hw; + axidma_seg = list_entry(entry, + struct xilinx_axidma_tx_segment, + node); + axidma_hw = &axidma_seg->hw; + axidma_hw->status = 0; + } + + list_splice_tail_init(&chan->done_list, &chan->active_list); + chan->desc_pendingcount = 0; + chan->idle = false; + } return; + } if (!chan->idle) return; From c7d812e33f3e8ca0fa9eeabf71d1c7bc3acedc09 Mon Sep 17 00:00:00 2001 From: Marek Vasut Date: Mon, 16 Mar 2026 23:25:24 +0100 Subject: [PATCH 049/401] dmaengine: xilinx: xilinx_dma: Fix unmasked residue subtraction The segment .control and .status fields both contain top bits which are not part of the buffer size, the buffer size is located only in the bottom max_buffer_len bits. To avoid interference from those top bits, mask out the size using max_buffer_len first, and only then subtract the values. Fixes: a575d0b4e663 ("dmaengine: xilinx_dma: Introduce xilinx_dma_get_residue") Signed-off-by: Marek Vasut Link: https://patch.msgid.link/20260316222530.163815-1-marex@nabladev.com Signed-off-by: Vinod Koul --- drivers/dma/xilinx/xilinx_dma.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/dma/xilinx/xilinx_dma.c b/drivers/dma/xilinx/xilinx_dma.c index 9dd5d7388e1d..969343342e86 100644 --- a/drivers/dma/xilinx/xilinx_dma.c +++ b/drivers/dma/xilinx/xilinx_dma.c @@ -997,16 +997,16 @@ static u32 xilinx_dma_get_residue(struct xilinx_dma_chan *chan, struct xilinx_cdma_tx_segment, node); cdma_hw = &cdma_seg->hw; - residue += (cdma_hw->control - cdma_hw->status) & - chan->xdev->max_buffer_len; + residue += (cdma_hw->control & chan->xdev->max_buffer_len) - + (cdma_hw->status & chan->xdev->max_buffer_len); } else if (chan->xdev->dma_config->dmatype == XDMA_TYPE_AXIDMA) { axidma_seg = list_entry(entry, struct xilinx_axidma_tx_segment, node); axidma_hw = &axidma_seg->hw; - residue += (axidma_hw->control - axidma_hw->status) & - chan->xdev->max_buffer_len; + residue += (axidma_hw->control & chan->xdev->max_buffer_len) - + (axidma_hw->status & chan->xdev->max_buffer_len); } else { aximcdma_seg = list_entry(entry, @@ -1014,8 +1014,8 @@ static u32 xilinx_dma_get_residue(struct xilinx_dma_chan *chan, node); aximcdma_hw = &aximcdma_seg->hw; residue += - (aximcdma_hw->control - aximcdma_hw->status) & - chan->xdev->max_buffer_len; + (aximcdma_hw->control & chan->xdev->max_buffer_len) - + (aximcdma_hw->status & chan->xdev->max_buffer_len); } } From a17ce4bc6f4f9acf77ba416c36791a15602e53aa Mon Sep 17 00:00:00 2001 From: Tomi Valkeinen Date: Wed, 11 Mar 2026 07:34:46 +0200 Subject: [PATCH 050/401] dmaengine: xilinx_dma: Fix reset related timeout with two-channel AXIDMA A single AXIDMA controller can have one or two channels. When it has two channels, the reset for both are tied together: resetting one channel resets the other as well. This creates a problem where resetting one channel will reset the registers for both channels, including clearing interrupt enable bits for the other channel, which can then lead to timeouts as the driver is waiting for an interrupt which never comes. The driver currently has a probe-time work around for this: when a channel is created, the driver also resets and enables the interrupts. With two channels the reset for the second channel will clear the interrupt enables for the first one. The work around in the driver is just to manually enable the interrupts again in xilinx_dma_alloc_chan_resources(). This workaround only addresses the probe-time issue. When channels are reset at runtime (e.g., in xilinx_dma_terminate_all() or during error recovery), there's no corresponding mechanism to restore the other channel's interrupt enables. This leads to one channel having its interrupts disabled while the driver expects them to work, causing timeouts and DMA failures. A proper fix is a complicated matter, as we should not reset the other channel when it's operating normally. So, perhaps, there should be some kind of synchronization for a common reset, which is not trivial to implement. To add to the complexity, the driver also supports other DMA types, like VDMA, CDMA and MCDMA, which don't have a shared reset. However, when the two-channel AXIDMA is used in the (assumably) normal use case, providing DMA for a single memory-to-memory device, the common reset is a bit smaller issue: when something bad happens on one channel, or when one channel is terminated, the assumption is that we also want to terminate the other channel. And thus resetting both at the same time is "ok". With that line of thinking we can implement a bit better work around than just the current probe time work around: let's enable the AXIDMA interrupts at xilinx_dma_start_transfer() instead. This ensures interrupts are enabled whenever a transfer starts, regardless of any prior resets that may have cleared them. This approach is also more logical: enable interrupts only when needed for a transfer, rather than at resource allocation time, and, I think, all the other DMA types should also use this model, but I'm reluctant to do such changes as I cannot test them. The reset function still enables interrupts even though it's not needed for AXIDMA anymore, but it's common code for all DMA types (VDMA, CDMA, MCDMA), so leave it unchanged to avoid affecting other variants. Signed-off-by: Tomi Valkeinen Fixes: c0bba3a99f07 ("dmaengine: vdma: Add Support for Xilinx AXI Direct Memory Access Engine") Link: https://patch.msgid.link/20260311-xilinx-dma-fix-v2-1-a725abb66e3c@ideasonboard.com Signed-off-by: Vinod Koul --- drivers/dma/xilinx/xilinx_dma.c | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/drivers/dma/xilinx/xilinx_dma.c b/drivers/dma/xilinx/xilinx_dma.c index 969343342e86..e3a18ee42aa2 100644 --- a/drivers/dma/xilinx/xilinx_dma.c +++ b/drivers/dma/xilinx/xilinx_dma.c @@ -1235,14 +1235,6 @@ static int xilinx_dma_alloc_chan_resources(struct dma_chan *dchan) dma_cookie_init(dchan); - if (chan->xdev->dma_config->dmatype == XDMA_TYPE_AXIDMA) { - /* For AXI DMA resetting once channel will reset the - * other channel as well so enable the interrupts here. - */ - dma_ctrl_set(chan, XILINX_DMA_REG_DMACR, - XILINX_DMA_DMAXR_ALL_IRQ_MASK); - } - if ((chan->xdev->dma_config->dmatype == XDMA_TYPE_CDMA) && chan->has_sg) dma_ctrl_set(chan, XILINX_DMA_REG_DMACR, XILINX_CDMA_CR_SGMODE); @@ -1612,6 +1604,7 @@ static void xilinx_dma_start_transfer(struct xilinx_dma_chan *chan) head_desc->async_tx.phys); reg &= ~XILINX_DMA_CR_DELAY_MAX; reg |= chan->irq_delay << XILINX_DMA_CR_DELAY_SHIFT; + reg |= XILINX_DMA_DMAXR_ALL_IRQ_MASK; dma_ctrl_write(chan, XILINX_DMA_REG_DMACR, reg); xilinx_dma_start(chan); From 4eae391a8e4cb065b900afcb95a3b0f97c75184d Mon Sep 17 00:00:00 2001 From: Sebastian Reichel Date: Mon, 16 Mar 2026 19:23:00 +0100 Subject: [PATCH 051/401] ASoC: dt-bindings: rockchip: Add compatible for RK3576 SPDIF Add a compatible string for SPDIF on RK3576, which is similar to the one on RK3568. Signed-off-by: Sebastian Reichel Link: https://patch.msgid.link/20260316-rk3576-spdif-v1-1-acb75088b560@collabora.com Signed-off-by: Mark Brown --- Documentation/devicetree/bindings/sound/rockchip-spdif.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/Documentation/devicetree/bindings/sound/rockchip-spdif.yaml b/Documentation/devicetree/bindings/sound/rockchip-spdif.yaml index 56c755c22945..502907dd28b3 100644 --- a/Documentation/devicetree/bindings/sound/rockchip-spdif.yaml +++ b/Documentation/devicetree/bindings/sound/rockchip-spdif.yaml @@ -33,6 +33,7 @@ properties: - const: rockchip,rk3066-spdif - items: - enum: + - rockchip,rk3576-spdif - rockchip,rk3588-spdif - const: rockchip,rk3568-spdif From f8b8820a4a16c4ef673e90ded41e8348514e53f0 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 17 Mar 2026 14:44:04 +0100 Subject: [PATCH 052/401] fs: clear I_DIRTY_TIME in sync_lazytime For file systems implementing ->sync_lazytime, I_DIRTY_TIME fails to get cleared in sync_lazytime, and might cause additional calls to sync_lazytime during inode deactivation. Use the same pattern as in __mark_inode_dirty to clear the flag under the inode lock. Fixes: 5cf06ea56ee6 ("fs: add a ->sync_lazytime method") Signed-off-by: Christoph Hellwig Link: https://patch.msgid.link/20260317134409.1691317-1-hch@lst.de Signed-off-by: Christian Brauner --- fs/fs-writeback.c | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index 7c75ed7e8979..d8dac1931595 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c @@ -1711,6 +1711,19 @@ static void requeue_inode(struct inode *inode, struct bdi_writeback *wb, } } +static bool __sync_lazytime(struct inode *inode) +{ + spin_lock(&inode->i_lock); + if (!(inode_state_read(inode) & I_DIRTY_TIME)) { + spin_unlock(&inode->i_lock); + return false; + } + inode_state_clear(inode, I_DIRTY_TIME); + spin_unlock(&inode->i_lock); + inode->i_op->sync_lazytime(inode); + return true; +} + bool sync_lazytime(struct inode *inode) { if (!(inode_state_read_once(inode) & I_DIRTY_TIME)) @@ -1718,9 +1731,8 @@ bool sync_lazytime(struct inode *inode) trace_writeback_lazytime(inode); if (inode->i_op->sync_lazytime) - inode->i_op->sync_lazytime(inode); - else - mark_inode_dirty_sync(inode); + return __sync_lazytime(inode); + mark_inode_dirty_sync(inode); return true; } From f200b2f9a810c440c6750b56fc647b73337749a1 Mon Sep 17 00:00:00 2001 From: Vee Satayamas Date: Sun, 15 Mar 2026 21:25:12 +0700 Subject: [PATCH 053/401] ASoC: amd: yc: Add DMI quirk for ASUS EXPERTBOOK BM1403CDA Add a DMI quirk for the Asus Expertbook BM1403CDA to resolve the issue of the internal microphone not being detected. Link: https://bugzilla.kernel.org/show_bug.cgi?id=221236 Signed-off-by: Vee Satayamas Reviewed-by: Zhang Heng Link: https://patch.msgid.link/20260315142511.66029-2-vsatayamas@gmail.com Signed-off-by: Mark Brown --- sound/soc/amd/yc/acp6x-mach.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/sound/soc/amd/yc/acp6x-mach.c b/sound/soc/amd/yc/acp6x-mach.c index 1324543b42d7..c536de1bb94a 100644 --- a/sound/soc/amd/yc/acp6x-mach.c +++ b/sound/soc/amd/yc/acp6x-mach.c @@ -717,6 +717,13 @@ static const struct dmi_system_id yc_acp_quirk_table[] = { DMI_MATCH(DMI_BOARD_NAME, "PM1503CDA"), } }, + { + .driver_data = &acp6x_card, + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "ASUSTeK COMPUTER INC."), + DMI_MATCH(DMI_BOARD_NAME, "BM1403CDA"), + } + }, {} }; From c465f5591aa84a6f85d66d152e28b92844a45d4f Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Tue, 17 Mar 2026 16:59:45 +0100 Subject: [PATCH 054/401] selftests/mount_setattr: increase tmpfs size for idmapped mount tests The mount_setattr_idmapped fixture mounts a 2 MB tmpfs at /mnt and then creates a 2 GB sparse ext4 image at /mnt/C/ext4.img. While ftruncate() succeeds (sparse file), mkfs.ext4 needs to write actual metadata blocks (inode tables, journal, bitmaps) which easily exceeds the 2 MB tmpfs limit, causing ENOSPC and failing the fixture setup for all mount_setattr_idmapped tests. This was introduced by commit d37d4720c3e7 ("selftests/mount_settattr: ensure that ext4 filesystem can be created") which increased the image size from 2 MB to 2 GB but didn't adjust the tmpfs size. Bump the tmpfs size to 256 MB which is sufficient for the ext4 metadata. Fixes: d37d4720c3e7 ("selftests/mount_settattr: ensure that ext4 filesystem can be created") Signed-off-by: Christian Brauner --- tools/testing/selftests/mount_setattr/mount_setattr_test.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/mount_setattr/mount_setattr_test.c b/tools/testing/selftests/mount_setattr/mount_setattr_test.c index 7aec3ae82a44..c6dafb3cc116 100644 --- a/tools/testing/selftests/mount_setattr/mount_setattr_test.c +++ b/tools/testing/selftests/mount_setattr/mount_setattr_test.c @@ -1020,7 +1020,7 @@ FIXTURE_SETUP(mount_setattr_idmapped) "size=100000,mode=700"), 0); ASSERT_EQ(mount("testing", "/mnt", "tmpfs", MS_NOATIME | MS_NODEV, - "size=2m,mode=700"), 0); + "size=256m,mode=700"), 0); ASSERT_EQ(mkdir("/mnt/A", 0777), 0); From 1f182ec9d7084db7dfdb2372d453c28f0e5c3f0a Mon Sep 17 00:00:00 2001 From: Zhang Heng Date: Mon, 16 Mar 2026 16:02:18 +0800 Subject: [PATCH 055/401] ASoC: amd: yc: Add DMI quirk for Thin A15 B7VF Add a DMI quirk for the Thin A15 B7VF fixing the issue where the internal microphone was not detected. Link: https://bugzilla.kernel.org/show_bug.cgi?id=220833 Signed-off-by: Zhang Heng Link: https://patch.msgid.link/20260316080218.2931304-1-zhangheng@kylinos.cn Signed-off-by: Mark Brown --- sound/soc/amd/yc/acp6x-mach.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/sound/soc/amd/yc/acp6x-mach.c b/sound/soc/amd/yc/acp6x-mach.c index c536de1bb94a..6f1c105ca77e 100644 --- a/sound/soc/amd/yc/acp6x-mach.c +++ b/sound/soc/amd/yc/acp6x-mach.c @@ -724,6 +724,13 @@ static const struct dmi_system_id yc_acp_quirk_table[] = { DMI_MATCH(DMI_BOARD_NAME, "BM1403CDA"), } }, + { + .driver_data = &acp6x_card, + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "Micro-Star International Co., Ltd."), + DMI_MATCH(DMI_PRODUCT_NAME, "Thin A15 B7VE"), + } + }, {} }; From 00da250c21b074ea9494c375d0117b69e5b1d0a4 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Fri, 13 Mar 2026 15:41:58 -0400 Subject: [PATCH 056/401] RDMA/rw: Fall back to direct SGE on MR pool exhaustion When IOMMU passthrough mode is active, ib_dma_map_sgtable_attrs() produces no coalescing: each scatterlist page maps 1:1 to a DMA entry, so sgt.nents equals the raw page count. A 1 MB transfer yields 256 DMA entries. If that count exceeds the device's max_sgl_rd threshold (an optimization hint from mlx5 firmware), rdma_rw_io_needs_mr() steers the operation into the MR registration path. Each such operation consumes one or more MRs from a pool sized at max_rdma_ctxs -- roughly one MR per concurrent context. Under write-intensive workloads that issue many concurrent RDMA READs, the pool is rapidly exhausted, ib_mr_pool_get() returns NULL, and rdma_rw_init_one_mr() returns -EAGAIN. Upper layer protocols treat this as a fatal DMA mapping failure and tear down the connection. The max_sgl_rd check is a performance optimization, not a correctness requirement: the device can handle large SGE counts via direct posting, just less efficiently than with MR registration. When the MR pool cannot satisfy a request, falling back to the direct SGE (map_wrs) path avoids the connection reset while preserving the MR optimization for the common case where pool resources are available. Add a fallback in rdma_rw_ctx_init() so that -EAGAIN from rdma_rw_init_mr_wrs() triggers direct SGE posting instead of propagating the error. iWARP devices, which mandate MR registration for RDMA READs, and force_mr debug mode continue to treat -EAGAIN as terminal. Fixes: 00bd1439f464 ("RDMA/rw: Support threshold for registration vs scattering to local pages") Signed-off-by: Chuck Lever Reviewed-by: Christoph Hellwig Link: https://patch.msgid.link/20260313194201.5818-2-cel@kernel.org Signed-off-by: Leon Romanovsky --- drivers/infiniband/core/rw.c | 27 +++++++++++++++++++++------ 1 file changed, 21 insertions(+), 6 deletions(-) diff --git a/drivers/infiniband/core/rw.c b/drivers/infiniband/core/rw.c index fc45c384833f..c01d5e605053 100644 --- a/drivers/infiniband/core/rw.c +++ b/drivers/infiniband/core/rw.c @@ -608,14 +608,29 @@ int rdma_rw_ctx_init(struct rdma_rw_ctx *ctx, struct ib_qp *qp, u32 port_num, if (rdma_rw_io_needs_mr(qp->device, port_num, dir, sg_cnt)) { ret = rdma_rw_init_mr_wrs(ctx, qp, port_num, sg, sg_cnt, sg_offset, remote_addr, rkey, dir); - } else if (sg_cnt > 1) { - ret = rdma_rw_init_map_wrs(ctx, qp, sg, sg_cnt, sg_offset, - remote_addr, rkey, dir); - } else { - ret = rdma_rw_init_single_wr(ctx, qp, sg, sg_offset, - remote_addr, rkey, dir); + /* + * If MR init succeeded or failed for a reason other + * than pool exhaustion, that result is final. + * + * Pool exhaustion (-EAGAIN) from the max_sgl_rd + * optimization is recoverable: fall back to + * direct SGE posting. iWARP and force_mr require + * MRs unconditionally, so -EAGAIN is terminal. + */ + if (ret != -EAGAIN || + rdma_protocol_iwarp(qp->device, port_num) || + unlikely(rdma_rw_force_mr)) + goto out; } + if (sg_cnt > 1) + ret = rdma_rw_init_map_wrs(ctx, qp, sg, sg_cnt, sg_offset, + remote_addr, rkey, dir); + else + ret = rdma_rw_init_single_wr(ctx, qp, sg, sg_offset, + remote_addr, rkey, dir); + +out: if (ret < 0) goto out_unmap_sg; return ret; From f28599f3969d1d928276772f1306872344c967f0 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Fri, 13 Mar 2026 15:41:59 -0400 Subject: [PATCH 057/401] RDMA/rw: Fix MR pool exhaustion in bvec RDMA READ path When IOVA-based DMA mapping is unavailable (e.g., IOMMU passthrough mode), rdma_rw_ctx_init_bvec() falls back to checking rdma_rw_io_needs_mr() with the raw bvec count. Unlike the scatterlist path in rdma_rw_ctx_init(), which passes a post-DMA-mapping entry count that reflects coalescing of physically contiguous pages, the bvec path passes the pre-mapping page count. This overstates the number of DMA entries, causing every multi-bvec RDMA READ to consume an MR from the QP's pool. Under NFS WRITE workloads the server performs RDMA READs to pull data from the client. With the inflated MR demand, the pool is rapidly exhausted, ib_mr_pool_get() returns NULL, and rdma_rw_init_one_mr() returns -EAGAIN. svcrdma treats this as a DMA mapping failure, closes the connection, and the client reconnects -- producing a cycle of 71% RPC retransmissions and ~100 reconnections per test run. RDMA WRITEs (NFS READ direction) are unaffected because DMA_TO_DEVICE never triggers the max_sgl_rd check. Remove the rdma_rw_io_needs_mr() gate from the bvec path entirely, so that bvec RDMA operations always use the map_wrs path (direct WR posting without MR allocation). The bvec caller has no post-DMA-coalescing segment count available -- xdr_buf and svc_rqst hold pages as individual pointers, and physical contiguity is discovered only during DMA mapping -- so the raw page count cannot serve as a reliable input to rdma_rw_io_needs_mr(). iWARP devices, which require MRs unconditionally, are handled by an earlier check in rdma_rw_ctx_init_bvec() and are unaffected. Fixes: bea28ac14cab ("RDMA/core: add MR support for bvec-based RDMA operations") Signed-off-by: Chuck Lever Reviewed-by: Christoph Hellwig Link: https://patch.msgid.link/20260313194201.5818-3-cel@kernel.org Signed-off-by: Leon Romanovsky --- drivers/infiniband/core/rw.c | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/drivers/infiniband/core/rw.c b/drivers/infiniband/core/rw.c index c01d5e605053..4fafe393a48c 100644 --- a/drivers/infiniband/core/rw.c +++ b/drivers/infiniband/core/rw.c @@ -701,14 +701,16 @@ int rdma_rw_ctx_init_bvec(struct rdma_rw_ctx *ctx, struct ib_qp *qp, return ret; /* - * IOVA mapping not available. Check if MR registration provides - * better performance than multiple SGE entries. + * IOVA not available; fall back to the map_wrs path, which maps + * each bvec as a direct SGE. This is always correct: the MR path + * is a throughput optimization, not a correctness requirement. + * (iWARP, which does require MRs, is handled by the check above.) + * + * The rdma_rw_io_needs_mr() gate is not used here because nr_bvec + * is a raw page count that overstates DMA entry demand -- the bvec + * caller has no post-DMA-coalescing segment count, and feeding the + * inflated count into the MR path exhausts the pool on RDMA READs. */ - if (rdma_rw_io_needs_mr(dev, port_num, dir, nr_bvec)) - return rdma_rw_init_mr_wrs_bvec(ctx, qp, port_num, bvecs, - nr_bvec, &iter, remote_addr, - rkey, dir); - return rdma_rw_init_map_wrs_bvec(ctx, qp, bvecs, nr_bvec, &iter, remote_addr, rkey, dir); } From 0f2055db7b630559870afb40fc84490816ab8ec5 Mon Sep 17 00:00:00 2001 From: Ethan Tidmore Date: Fri, 13 Mar 2026 23:57:30 -0500 Subject: [PATCH 058/401] RDMA/efa: Fix possible deadlock In the error path for efa_com_alloc_comp_ctx() the semaphore assigned to &aq->avail_cmds is not released. Detected by Smatch: drivers/infiniband/hw/efa/efa_com.c:662 efa_com_cmd_exec() warn: inconsistent returns '&aq->avail_cmds' Add release for &aq->avail_cmds in efa_com_alloc_comp_ctx() error path. Fixes: ef3b06742c8a2 ("RDMA/efa: Fix use of completion ctx after free") Signed-off-by: Ethan Tidmore Link: https://patch.msgid.link/20260314045730.1143862-1-ethantidmore06@gmail.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/efa/efa_com.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/infiniband/hw/efa/efa_com.c b/drivers/infiniband/hw/efa/efa_com.c index 56caba612139..e97b5f0d7003 100644 --- a/drivers/infiniband/hw/efa/efa_com.c +++ b/drivers/infiniband/hw/efa/efa_com.c @@ -629,6 +629,7 @@ int efa_com_cmd_exec(struct efa_com_admin_queue *aq, comp_ctx = efa_com_alloc_comp_ctx(aq); if (!comp_ctx) { clear_bit(EFA_AQ_STATE_RUNNING_BIT, &aq->state); + up(&aq->avail_cmds); return -EINVAL; } From 8780f561f6717dec52351251881bff79e960eb46 Mon Sep 17 00:00:00 2001 From: "Geoffrey D. Bennett" Date: Wed, 18 Mar 2026 06:02:30 +1030 Subject: [PATCH 059/401] ALSA: usb-audio: Exclude Scarlett 2i2 1st Gen from SKIP_IFACE_SETUP The Focusrite Scarlett 2i2 1st Gen (1235:8006) produces distorted/silent audio when QUIRK_FLAG_SKIP_IFACE_SETUP is active, as that flag causes the feedback format to be detected as 17.15 instead of 16.16. Add a DEVICE_FLG entry for this device before the Focusrite VENDOR_FLG entry so that it gets no quirk flags, overriding the vendor-wide SKIP_IFACE_SETUP. This device doesn't have the internal mixer, Air, or Safe modes that the quirk was designed to protect. Fixes: 38c322068a26 ("ALSA: usb-audio: Add QUIRK_FLAG_SKIP_IFACE_SETUP") Reported-by: pairomaniac [https://github.com/geoffreybennett/linux-fcp/issues/54] Tested-by: pairomaniac [https://github.com/geoffreybennett/linux-fcp/issues/54] Signed-off-by: Geoffrey D. Bennett Link: https://patch.msgid.link/abmsTjKmQMKbhYtK@m.b4.vu Signed-off-by: Takashi Iwai --- sound/usb/quirks.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/usb/quirks.c b/sound/usb/quirks.c index 049a94079f9e..cd3a9fe8edf2 100644 --- a/sound/usb/quirks.c +++ b/sound/usb/quirks.c @@ -2429,6 +2429,7 @@ static const struct usb_audio_quirk_flags_table quirk_flags_table[] = { QUIRK_FLAG_CTL_MSG_DELAY | QUIRK_FLAG_IFACE_DELAY), VENDOR_FLG(0x07fd, /* MOTU */ QUIRK_FLAG_VALIDATE_RATES), + DEVICE_FLG(0x1235, 0x8006, 0), /* Focusrite Scarlett 2i2 1st Gen */ VENDOR_FLG(0x1235, /* Focusrite Novation */ QUIRK_FLAG_SKIP_CLOCK_SELECTOR | QUIRK_FLAG_SKIP_IFACE_SETUP), From 4f24a767e3d64a5f58c595b5c29b6063a201f1e3 Mon Sep 17 00:00:00 2001 From: Yuto Ohnuki Date: Tue, 10 Mar 2026 18:38:37 +0000 Subject: [PATCH 060/401] xfs: stop reclaim before pushing AIL during unmount The unmount sequence in xfs_unmount_flush_inodes() pushed the AIL while background reclaim and inodegc are still running. This is broken independently of any use-after-free issues - background reclaim and inodegc should not be running while the AIL is being pushed during unmount, as inodegc can dirty and insert inodes into the AIL during the flush, and background reclaim can race to abort and free dirty inodes. Reorder xfs_unmount_flush_inodes() to stop inodegc and cancel background reclaim before pushing the AIL. Stop inodegc before cancelling m_reclaim_work because the inodegc worker can re-queue m_reclaim_work via xfs_inodegc_set_reclaimable. Reported-by: syzbot+652af2b3c5569c4ab63c@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=652af2b3c5569c4ab63c Fixes: 90c60e164012 ("xfs: xfs_iflush() is no longer necessary") Cc: stable@vger.kernel.org # v5.9 Signed-off-by: Yuto Ohnuki Reviewed-by: Darrick J. Wong Signed-off-by: Carlos Maiolino --- fs/xfs/xfs_mount.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c index 9c295abd0a0a..ef1ea8a1238c 100644 --- a/fs/xfs/xfs_mount.c +++ b/fs/xfs/xfs_mount.c @@ -608,8 +608,9 @@ xfs_unmount_check( * have been retrying in the background. This will prevent never-ending * retries in AIL pushing from hanging the unmount. * - * Finally, we can push the AIL to clean all the remaining dirty objects, then - * reclaim the remaining inodes that are still in memory at this point in time. + * Stop inodegc and background reclaim before pushing the AIL so that they + * are not running while the AIL is being flushed. Then push the AIL to + * clean all the remaining dirty objects and reclaim the remaining inodes. */ static void xfs_unmount_flush_inodes( @@ -621,9 +622,9 @@ xfs_unmount_flush_inodes( xfs_set_unmounting(mp); - xfs_ail_push_all_sync(mp->m_ail); xfs_inodegc_stop(mp); cancel_delayed_work_sync(&mp->m_reclaim_work); + xfs_ail_push_all_sync(mp->m_ail); xfs_reclaim_inodes(mp); xfs_health_unmount(mp); xfs_healthmon_unmount(mp); From 79ef34ec0554ec04bdbafafbc9836423734e1bd6 Mon Sep 17 00:00:00 2001 From: Yuto Ohnuki Date: Tue, 10 Mar 2026 18:38:38 +0000 Subject: [PATCH 061/401] xfs: avoid dereferencing log items after push callbacks After xfsaild_push_item() calls iop_push(), the log item may have been freed if the AIL lock was dropped during the push. Background inode reclaim or the dquot shrinker can free the log item while the AIL lock is not held, and the tracepoints in the switch statement dereference the log item after iop_push() returns. Fix this by capturing the log item type, flags, and LSN before calling xfsaild_push_item(), and introducing a new xfs_ail_push_class trace event class that takes these pre-captured values and the ailp pointer instead of the log item pointer. Reported-by: syzbot+652af2b3c5569c4ab63c@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=652af2b3c5569c4ab63c Fixes: 90c60e164012 ("xfs: xfs_iflush() is no longer necessary") Cc: stable@vger.kernel.org # v5.9 Signed-off-by: Yuto Ohnuki Reviewed-by: Darrick J. Wong Signed-off-by: Carlos Maiolino --- fs/xfs/xfs_trace.h | 36 ++++++++++++++++++++++++++++++++---- fs/xfs/xfs_trans_ail.c | 26 +++++++++++++++++++------- 2 files changed, 51 insertions(+), 11 deletions(-) diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h index 813e5a9f57eb..0e994b3f768f 100644 --- a/fs/xfs/xfs_trace.h +++ b/fs/xfs/xfs_trace.h @@ -56,6 +56,7 @@ #include struct xfs_agf; +struct xfs_ail; struct xfs_alloc_arg; struct xfs_attr_list_context; struct xfs_buf_log_item; @@ -1650,16 +1651,43 @@ TRACE_EVENT(xfs_log_force, DEFINE_EVENT(xfs_log_item_class, name, \ TP_PROTO(struct xfs_log_item *lip), \ TP_ARGS(lip)) -DEFINE_LOG_ITEM_EVENT(xfs_ail_push); -DEFINE_LOG_ITEM_EVENT(xfs_ail_pinned); -DEFINE_LOG_ITEM_EVENT(xfs_ail_locked); -DEFINE_LOG_ITEM_EVENT(xfs_ail_flushing); DEFINE_LOG_ITEM_EVENT(xfs_cil_whiteout_mark); DEFINE_LOG_ITEM_EVENT(xfs_cil_whiteout_skip); DEFINE_LOG_ITEM_EVENT(xfs_cil_whiteout_unpin); DEFINE_LOG_ITEM_EVENT(xlog_ail_insert_abort); DEFINE_LOG_ITEM_EVENT(xfs_trans_free_abort); +DECLARE_EVENT_CLASS(xfs_ail_push_class, + TP_PROTO(struct xfs_ail *ailp, uint type, unsigned long flags, xfs_lsn_t lsn), + TP_ARGS(ailp, type, flags, lsn), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(uint, type) + __field(unsigned long, flags) + __field(xfs_lsn_t, lsn) + ), + TP_fast_assign( + __entry->dev = ailp->ail_log->l_mp->m_super->s_dev; + __entry->type = type; + __entry->flags = flags; + __entry->lsn = lsn; + ), + TP_printk("dev %d:%d lsn %d/%d type %s flags %s", + MAJOR(__entry->dev), MINOR(__entry->dev), + CYCLE_LSN(__entry->lsn), BLOCK_LSN(__entry->lsn), + __print_symbolic(__entry->type, XFS_LI_TYPE_DESC), + __print_flags(__entry->flags, "|", XFS_LI_FLAGS)) +) + +#define DEFINE_AIL_PUSH_EVENT(name) \ +DEFINE_EVENT(xfs_ail_push_class, name, \ + TP_PROTO(struct xfs_ail *ailp, uint type, unsigned long flags, xfs_lsn_t lsn), \ + TP_ARGS(ailp, type, flags, lsn)) +DEFINE_AIL_PUSH_EVENT(xfs_ail_push); +DEFINE_AIL_PUSH_EVENT(xfs_ail_pinned); +DEFINE_AIL_PUSH_EVENT(xfs_ail_locked); +DEFINE_AIL_PUSH_EVENT(xfs_ail_flushing); + DECLARE_EVENT_CLASS(xfs_ail_class, TP_PROTO(struct xfs_log_item *lip, xfs_lsn_t old_lsn, xfs_lsn_t new_lsn), TP_ARGS(lip, old_lsn, new_lsn), diff --git a/fs/xfs/xfs_trans_ail.c b/fs/xfs/xfs_trans_ail.c index 923729af4206..63266d31b514 100644 --- a/fs/xfs/xfs_trans_ail.c +++ b/fs/xfs/xfs_trans_ail.c @@ -365,6 +365,12 @@ xfsaild_resubmit_item( return XFS_ITEM_SUCCESS; } +/* + * Push a single log item from the AIL. + * + * @lip may have been released and freed by the time this function returns, + * so callers must not dereference the log item afterwards. + */ static inline uint xfsaild_push_item( struct xfs_ail *ailp, @@ -505,7 +511,10 @@ xfsaild_push( lsn = lip->li_lsn; while ((XFS_LSN_CMP(lip->li_lsn, ailp->ail_target) <= 0)) { - int lock_result; + int lock_result; + uint type = lip->li_type; + unsigned long flags = lip->li_flags; + xfs_lsn_t item_lsn = lip->li_lsn; if (test_bit(XFS_LI_FLUSHING, &lip->li_flags)) goto next_item; @@ -514,14 +523,17 @@ xfsaild_push( * Note that iop_push may unlock and reacquire the AIL lock. We * rely on the AIL cursor implementation to be able to deal with * the dropped lock. + * + * The log item may have been freed by the push, so it must not + * be accessed or dereferenced below this line. */ lock_result = xfsaild_push_item(ailp, lip); switch (lock_result) { case XFS_ITEM_SUCCESS: XFS_STATS_INC(mp, xs_push_ail_success); - trace_xfs_ail_push(lip); + trace_xfs_ail_push(ailp, type, flags, item_lsn); - ailp->ail_last_pushed_lsn = lsn; + ailp->ail_last_pushed_lsn = item_lsn; break; case XFS_ITEM_FLUSHING: @@ -537,22 +549,22 @@ xfsaild_push( * AIL is being flushed. */ XFS_STATS_INC(mp, xs_push_ail_flushing); - trace_xfs_ail_flushing(lip); + trace_xfs_ail_flushing(ailp, type, flags, item_lsn); flushing++; - ailp->ail_last_pushed_lsn = lsn; + ailp->ail_last_pushed_lsn = item_lsn; break; case XFS_ITEM_PINNED: XFS_STATS_INC(mp, xs_push_ail_pinned); - trace_xfs_ail_pinned(lip); + trace_xfs_ail_pinned(ailp, type, flags, item_lsn); stuck++; ailp->ail_log_flush++; break; case XFS_ITEM_LOCKED: XFS_STATS_INC(mp, xs_push_ail_locked); - trace_xfs_ail_locked(lip); + trace_xfs_ail_locked(ailp, type, flags, item_lsn); stuck++; break; From 394d70b86fae9fe865e7e6d9540b7696f73aa9b6 Mon Sep 17 00:00:00 2001 From: Yuto Ohnuki Date: Tue, 10 Mar 2026 18:38:39 +0000 Subject: [PATCH 062/401] xfs: save ailp before dropping the AIL lock in push callbacks In xfs_inode_item_push() and xfs_qm_dquot_logitem_push(), the AIL lock is dropped to perform buffer IO. Once the cluster buffer no longer protects the log item from reclaim, the log item may be freed by background reclaim or the dquot shrinker. The subsequent spin_lock() call dereferences lip->li_ailp, which is a use-after-free. Fix this by saving the ailp pointer in a local variable while the AIL lock is held and the log item is guaranteed to be valid. Reported-by: syzbot+652af2b3c5569c4ab63c@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=652af2b3c5569c4ab63c Fixes: 90c60e164012 ("xfs: xfs_iflush() is no longer necessary") Cc: stable@vger.kernel.org # v5.9 Reviewed-by: Darrick J. Wong Reviewed-by: Dave Chinner Signed-off-by: Yuto Ohnuki Signed-off-by: Carlos Maiolino --- fs/xfs/xfs_dquot_item.c | 9 +++++++-- fs/xfs/xfs_inode_item.c | 9 +++++++-- 2 files changed, 14 insertions(+), 4 deletions(-) diff --git a/fs/xfs/xfs_dquot_item.c b/fs/xfs/xfs_dquot_item.c index 491e2a7053a3..65a0e69c3d08 100644 --- a/fs/xfs/xfs_dquot_item.c +++ b/fs/xfs/xfs_dquot_item.c @@ -125,6 +125,7 @@ xfs_qm_dquot_logitem_push( struct xfs_dq_logitem *qlip = DQUOT_ITEM(lip); struct xfs_dquot *dqp = qlip->qli_dquot; struct xfs_buf *bp; + struct xfs_ail *ailp = lip->li_ailp; uint rval = XFS_ITEM_SUCCESS; int error; @@ -153,7 +154,7 @@ xfs_qm_dquot_logitem_push( goto out_unlock; } - spin_unlock(&lip->li_ailp->ail_lock); + spin_unlock(&ailp->ail_lock); error = xfs_dquot_use_attached_buf(dqp, &bp); if (error == -EAGAIN) { @@ -172,9 +173,13 @@ xfs_qm_dquot_logitem_push( rval = XFS_ITEM_FLUSHING; } xfs_buf_relse(bp); + /* + * The buffer no longer protects the log item from reclaim, so + * do not reference lip after this point. + */ out_relock_ail: - spin_lock(&lip->li_ailp->ail_lock); + spin_lock(&ailp->ail_lock); out_unlock: mutex_unlock(&dqp->q_qlock); return rval; diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c index 8913036b8024..4ae81eed0442 100644 --- a/fs/xfs/xfs_inode_item.c +++ b/fs/xfs/xfs_inode_item.c @@ -746,6 +746,7 @@ xfs_inode_item_push( struct xfs_inode_log_item *iip = INODE_ITEM(lip); struct xfs_inode *ip = iip->ili_inode; struct xfs_buf *bp = lip->li_buf; + struct xfs_ail *ailp = lip->li_ailp; uint rval = XFS_ITEM_SUCCESS; int error; @@ -771,7 +772,7 @@ xfs_inode_item_push( if (!xfs_buf_trylock(bp)) return XFS_ITEM_LOCKED; - spin_unlock(&lip->li_ailp->ail_lock); + spin_unlock(&ailp->ail_lock); /* * We need to hold a reference for flushing the cluster buffer as it may @@ -795,7 +796,11 @@ xfs_inode_item_push( rval = XFS_ITEM_LOCKED; } - spin_lock(&lip->li_ailp->ail_lock); + /* + * The buffer no longer protects the log item from reclaim, so + * do not reference lip after this point. + */ + spin_lock(&ailp->ail_lock); return rval; } From 7cac60947335f8d88a6390814840590a61134484 Mon Sep 17 00:00:00 2001 From: Yuto Ohnuki Date: Tue, 10 Mar 2026 18:38:40 +0000 Subject: [PATCH 063/401] xfs: refactor xfsaild_push loop into helper Factor the loop body of xfsaild_push() into a separate xfsaild_process_logitem() helper to improve readability. This is a pure code movement with no functional change. Signed-off-by: Yuto Ohnuki Reviewed-by: Darrick J. Wong Signed-off-by: Carlos Maiolino --- fs/xfs/xfs_trans_ail.c | 127 ++++++++++++++++++++++------------------- 1 file changed, 69 insertions(+), 58 deletions(-) diff --git a/fs/xfs/xfs_trans_ail.c b/fs/xfs/xfs_trans_ail.c index 63266d31b514..99a9bf3762b7 100644 --- a/fs/xfs/xfs_trans_ail.c +++ b/fs/xfs/xfs_trans_ail.c @@ -464,6 +464,74 @@ xfs_ail_calc_push_target( return target_lsn; } +static void +xfsaild_process_logitem( + struct xfs_ail *ailp, + struct xfs_log_item *lip, + int *stuck, + int *flushing) +{ + struct xfs_mount *mp = ailp->ail_log->l_mp; + uint type = lip->li_type; + unsigned long flags = lip->li_flags; + xfs_lsn_t item_lsn = lip->li_lsn; + int lock_result; + + /* + * Note that iop_push may unlock and reacquire the AIL lock. We + * rely on the AIL cursor implementation to be able to deal with + * the dropped lock. + * + * The log item may have been freed by the push, so it must not + * be accessed or dereferenced below this line. + */ + lock_result = xfsaild_push_item(ailp, lip); + switch (lock_result) { + case XFS_ITEM_SUCCESS: + XFS_STATS_INC(mp, xs_push_ail_success); + trace_xfs_ail_push(ailp, type, flags, item_lsn); + + ailp->ail_last_pushed_lsn = item_lsn; + break; + + case XFS_ITEM_FLUSHING: + /* + * The item or its backing buffer is already being + * flushed. The typical reason for that is that an + * inode buffer is locked because we already pushed the + * updates to it as part of inode clustering. + * + * We do not want to stop flushing just because lots + * of items are already being flushed, but we need to + * re-try the flushing relatively soon if most of the + * AIL is being flushed. + */ + XFS_STATS_INC(mp, xs_push_ail_flushing); + trace_xfs_ail_flushing(ailp, type, flags, item_lsn); + + (*flushing)++; + ailp->ail_last_pushed_lsn = item_lsn; + break; + + case XFS_ITEM_PINNED: + XFS_STATS_INC(mp, xs_push_ail_pinned); + trace_xfs_ail_pinned(ailp, type, flags, item_lsn); + + (*stuck)++; + ailp->ail_log_flush++; + break; + case XFS_ITEM_LOCKED: + XFS_STATS_INC(mp, xs_push_ail_locked); + trace_xfs_ail_locked(ailp, type, flags, item_lsn); + + (*stuck)++; + break; + default: + ASSERT(0); + break; + } +} + static long xfsaild_push( struct xfs_ail *ailp) @@ -511,68 +579,11 @@ xfsaild_push( lsn = lip->li_lsn; while ((XFS_LSN_CMP(lip->li_lsn, ailp->ail_target) <= 0)) { - int lock_result; - uint type = lip->li_type; - unsigned long flags = lip->li_flags; - xfs_lsn_t item_lsn = lip->li_lsn; if (test_bit(XFS_LI_FLUSHING, &lip->li_flags)) goto next_item; - /* - * Note that iop_push may unlock and reacquire the AIL lock. We - * rely on the AIL cursor implementation to be able to deal with - * the dropped lock. - * - * The log item may have been freed by the push, so it must not - * be accessed or dereferenced below this line. - */ - lock_result = xfsaild_push_item(ailp, lip); - switch (lock_result) { - case XFS_ITEM_SUCCESS: - XFS_STATS_INC(mp, xs_push_ail_success); - trace_xfs_ail_push(ailp, type, flags, item_lsn); - - ailp->ail_last_pushed_lsn = item_lsn; - break; - - case XFS_ITEM_FLUSHING: - /* - * The item or its backing buffer is already being - * flushed. The typical reason for that is that an - * inode buffer is locked because we already pushed the - * updates to it as part of inode clustering. - * - * We do not want to stop flushing just because lots - * of items are already being flushed, but we need to - * re-try the flushing relatively soon if most of the - * AIL is being flushed. - */ - XFS_STATS_INC(mp, xs_push_ail_flushing); - trace_xfs_ail_flushing(ailp, type, flags, item_lsn); - - flushing++; - ailp->ail_last_pushed_lsn = item_lsn; - break; - - case XFS_ITEM_PINNED: - XFS_STATS_INC(mp, xs_push_ail_pinned); - trace_xfs_ail_pinned(ailp, type, flags, item_lsn); - - stuck++; - ailp->ail_log_flush++; - break; - case XFS_ITEM_LOCKED: - XFS_STATS_INC(mp, xs_push_ail_locked); - trace_xfs_ail_locked(ailp, type, flags, item_lsn); - - stuck++; - break; - default: - ASSERT(0); - break; - } - + xfsaild_process_logitem(ailp, lip, &stuck, &flushing); count++; /* From 268378b6ad20569af0d1957992de1c8b16c6e900 Mon Sep 17 00:00:00 2001 From: hongao Date: Thu, 12 Mar 2026 20:10:26 +0800 Subject: [PATCH 064/401] xfs: scrub: unlock dquot before early return in quota scrub xchk_quota_item can return early after calling xchk_fblock_process_error. When that helper returns false, the function returned immediately without dropping dq->q_qlock, which can leave the dquot lock held and risk lock leaks or deadlocks in later quota operations. Fix this by unlocking dq->q_qlock before the early return. Signed-off-by: hongao Fixes: 7d1f0e167a067e ("xfs: check the ondisk space mapping behind a dquot") Cc: # v6.8 Reviewed-by: Darrick J. Wong Signed-off-by: Carlos Maiolino --- fs/xfs/scrub/quota.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/xfs/scrub/quota.c b/fs/xfs/scrub/quota.c index 1d25bd5b892e..222812fe202c 100644 --- a/fs/xfs/scrub/quota.c +++ b/fs/xfs/scrub/quota.c @@ -171,8 +171,10 @@ xchk_quota_item( error = xchk_quota_item_bmap(sc, dq, offset); xchk_iunlock(sc, XFS_ILOCK_SHARED); - if (!xchk_fblock_process_error(sc, XFS_DATA_FORK, offset, &error)) + if (!xchk_fblock_process_error(sc, XFS_DATA_FORK, offset, &error)) { + mutex_unlock(&dq->q_qlock); return error; + } /* * Warn if the hard limits are larger than the fs. From 0c98524ab20193d8772cff9c71b00ad004fb1349 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 16 Mar 2026 16:35:29 +0100 Subject: [PATCH 065/401] xfs: cleanup buftarg handling in XFS_IOC_VERIFY_MEDIA The newly added XFS_IOC_VERIFY_MEDIA is a bit unusual in how it handles buftarg fields. Update it to be more in line with other XFS code: - use btp->bt_dev instead of btp->bt_bdev->bd_dev to retrieve the device number for tracing - use btp->bt_logical_sectorsize instead of bdev_logical_block_size(btp->bt_bdev) to retrieve the logical sector size - compare the buftarg and not the bdev to see if there is a separate log buftarg Signed-off-by: Christoph Hellwig Reviewed-by: Darrick J. Wong Signed-off-by: Carlos Maiolino --- fs/xfs/xfs_verify_media.c | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/fs/xfs/xfs_verify_media.c b/fs/xfs/xfs_verify_media.c index 8bbd4ec567f8..5ead3976d511 100644 --- a/fs/xfs/xfs_verify_media.c +++ b/fs/xfs/xfs_verify_media.c @@ -183,10 +183,9 @@ xfs_verify_iosize( min_not_zero(SZ_1M, me->me_max_io_size); BUILD_BUG_ON(BBSHIFT != SECTOR_SHIFT); - ASSERT(BBTOB(bbcount) >= bdev_logical_block_size(btp->bt_bdev)); + ASSERT(BBTOB(bbcount) >= btp->bt_logical_sectorsize); - return clamp(iosize, bdev_logical_block_size(btp->bt_bdev), - BBTOB(bbcount)); + return clamp(iosize, btp->bt_logical_sectorsize, BBTOB(bbcount)); } /* Allocate as much memory as we can get for verification buffer. */ @@ -218,8 +217,8 @@ xfs_verify_media_error( unsigned int bio_bbcount, blk_status_t bio_status) { - trace_xfs_verify_media_error(mp, me, btp->bt_bdev->bd_dev, daddr, - bio_bbcount, bio_status); + trace_xfs_verify_media_error(mp, me, btp->bt_dev, daddr, bio_bbcount, + bio_status); /* * Pass any error, I/O or otherwise, up to the caller if we didn't @@ -280,7 +279,7 @@ xfs_verify_media( btp = mp->m_ddev_targp; break; case XFS_DEV_LOG: - if (mp->m_logdev_targp->bt_bdev != mp->m_ddev_targp->bt_bdev) + if (mp->m_logdev_targp != mp->m_ddev_targp) btp = mp->m_logdev_targp; break; case XFS_DEV_RT: @@ -299,7 +298,7 @@ xfs_verify_media( /* start and end have to be aligned to the lba size */ if (!IS_ALIGNED(BBTOB(me->me_start_daddr | me->me_end_daddr), - bdev_logical_block_size(btp->bt_bdev))) + btp->bt_logical_sectorsize)) return -EINVAL; /* @@ -331,8 +330,7 @@ xfs_verify_media( if (!folio) return -ENOMEM; - trace_xfs_verify_media(mp, me, btp->bt_bdev->bd_dev, daddr, bbcount, - folio); + trace_xfs_verify_media(mp, me, btp->bt_dev, daddr, bbcount, folio); bio = bio_alloc(btp->bt_bdev, 1, REQ_OP_READ, GFP_KERNEL); if (!bio) { @@ -400,7 +398,7 @@ xfs_verify_media( * an operational error. */ me->me_start_daddr = daddr; - trace_xfs_verify_media_end(mp, me, btp->bt_bdev->bd_dev); + trace_xfs_verify_media_end(mp, me, btp->bt_dev); return 0; } From e5966096d0856d071269cb5928d6bc33342d2dfd Mon Sep 17 00:00:00 2001 From: Bill Wendling Date: Mon, 16 Mar 2026 18:41:58 +0000 Subject: [PATCH 066/401] xfs: annotate struct xfs_attr_list_context with __counted_by_ptr Add the `__counted_by_ptr` attribute to the `buffer` field of `struct xfs_attr_list_context`. This field is used to point to a buffer of size `bufsize`. The `buffer` field is assigned in: 1. `xfs_ioc_attr_list` in `fs/xfs/xfs_handle.c` 2. `xfs_xattr_list` in `fs/xfs/xfs_xattr.c` 3. `xfs_getparents` in `fs/xfs/xfs_handle.c` (implicitly initialized to NULL) In `xfs_ioc_attr_list`, `buffer` was assigned before `bufsize`. Reorder them to ensure `bufsize` is set before `buffer` is assigned, although no access happens between them. In `xfs_xattr_list`, `buffer` was assigned before `bufsize`. Reorder them to ensure `bufsize` is set before `buffer` is assigned. In `xfs_getparents`, `buffer` is NULL (from zero initialization) and remains NULL. `bufsize` is set to a non-zero value, but since `buffer` is NULL, no access occurs. In all cases, the pointer `buffer` is not accessed before `bufsize` is set. This patch was generated by CodeMender and reviewed by Bill Wendling. Tested by running xfstests. Signed-off-by: Bill Wendling Reviewed-by: Christoph Hellwig Reviewed-by: Darrick J. Wong Signed-off-by: Carlos Maiolino --- fs/xfs/libxfs/xfs_attr.h | 3 ++- fs/xfs/xfs_handle.c | 2 +- fs/xfs/xfs_xattr.c | 2 +- 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/fs/xfs/libxfs/xfs_attr.h b/fs/xfs/libxfs/xfs_attr.h index 8244305949de..67fd9c75ac3f 100644 --- a/fs/xfs/libxfs/xfs_attr.h +++ b/fs/xfs/libxfs/xfs_attr.h @@ -55,7 +55,8 @@ struct xfs_attr_list_context { struct xfs_trans *tp; struct xfs_inode *dp; /* inode */ struct xfs_attrlist_cursor_kern cursor; /* position in list */ - void *buffer; /* output buffer */ + /* output buffer */ + void *buffer __counted_by_ptr(bufsize); /* * Abort attribute list iteration if non-zero. Can be used to pass diff --git a/fs/xfs/xfs_handle.c b/fs/xfs/xfs_handle.c index d1291ca15239..2b8617ae7ec2 100644 --- a/fs/xfs/xfs_handle.c +++ b/fs/xfs/xfs_handle.c @@ -443,8 +443,8 @@ xfs_ioc_attr_list( context.dp = dp; context.resynch = 1; context.attr_filter = xfs_attr_filter(flags); - context.buffer = buffer; context.bufsize = round_down(bufsize, sizeof(uint32_t)); + context.buffer = buffer; context.firstu = context.bufsize; context.put_listent = xfs_ioc_attr_put_listent; diff --git a/fs/xfs/xfs_xattr.c b/fs/xfs/xfs_xattr.c index a735f16d9cd8..544213067d59 100644 --- a/fs/xfs/xfs_xattr.c +++ b/fs/xfs/xfs_xattr.c @@ -332,8 +332,8 @@ xfs_vn_listxattr( memset(&context, 0, sizeof(context)); context.dp = XFS_I(inode); context.resynch = 1; - context.buffer = size ? data : NULL; context.bufsize = size; + context.buffer = size ? data : NULL; context.firstu = context.bufsize; context.put_listent = xfs_xattr_put_listent; From bd71fb3fea9945987053968f028a948997cba8cc Mon Sep 17 00:00:00 2001 From: Joanne Koong Date: Tue, 17 Mar 2026 13:39:35 -0700 Subject: [PATCH 067/401] iomap: fix invalid folio access when i_blkbits differs from I/O granularity Commit aa35dd5cbc06 ("iomap: fix invalid folio access after folio_end_read()") partially addressed invalid folio access for folios without an ifs attached, but it did not handle the case where 1 << inode->i_blkbits matches the folio size but is different from the granularity used for the IO, which means IO can be submitted for less than the full folio for the !ifs case. In this case, the condition: if (*bytes_submitted == folio_len) ctx->cur_folio = NULL; in iomap_read_folio_iter() will not invalidate ctx->cur_folio, and iomap_read_end() will still be called on the folio even though the IO helper owns it and will finish the read on it. Fix this by unconditionally invalidating ctx->cur_folio for the !ifs case. Reported-by: Johannes Thumshirn Tested-by: Johannes Thumshirn Link: https://lore.kernel.org/linux-fsdevel/b3dfe271-4e3d-4922-b618-e73731242bca@wdc.com/ Fixes: b2f35ac4146d ("iomap: add caller-provided callbacks for read and readahead") Cc: stable@vger.kernel.org Signed-off-by: Joanne Koong Link: https://patch.msgid.link/20260317203935.830549-1-joannelkoong@gmail.com Reviewed-by: Christoph Hellwig Signed-off-by: Christian Brauner --- fs/iomap/buffered-io.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c index 00f0efaf12b2..92a831cf4bf1 100644 --- a/fs/iomap/buffered-io.c +++ b/fs/iomap/buffered-io.c @@ -514,6 +514,7 @@ static int iomap_read_folio_iter(struct iomap_iter *iter, loff_t length = iomap_length(iter); struct folio *folio = ctx->cur_folio; size_t folio_len = folio_size(folio); + struct iomap_folio_state *ifs; size_t poff, plen; loff_t pos_diff; int ret; @@ -525,7 +526,7 @@ static int iomap_read_folio_iter(struct iomap_iter *iter, return iomap_iter_advance(iter, length); } - ifs_alloc(iter->inode, folio, iter->flags); + ifs = ifs_alloc(iter->inode, folio, iter->flags); length = min_t(loff_t, length, folio_len - offset_in_folio(folio, pos)); while (length) { @@ -560,11 +561,15 @@ static int iomap_read_folio_iter(struct iomap_iter *iter, *bytes_submitted += plen; /* - * If the entire folio has been read in by the IO - * helper, then the helper owns the folio and will end - * the read on it. + * Hand off folio ownership to the IO helper when: + * 1) The entire folio has been submitted for IO, or + * 2) There is no ifs attached to the folio + * + * Case (2) occurs when 1 << i_blkbits matches the folio + * size but the underlying filesystem or block device + * uses a smaller granularity for IO. */ - if (*bytes_submitted == folio_len) + if (*bytes_submitted == folio_len || !ifs) ctx->cur_folio = NULL; } From 11a95521fb93c91e2d4ef9d53dc80ef0a755549b Mon Sep 17 00:00:00 2001 From: Jacob Moroni Date: Mon, 16 Mar 2026 13:39:38 -0500 Subject: [PATCH 068/401] RDMA/irdma: Initialize free_qp completion before using it In irdma_create_qp, if ib_copy_to_udata fails, it will call irdma_destroy_qp to clean up which will attempt to wait on the free_qp completion, which is not initialized yet. Fix this by initializing the completion before the ib_copy_to_udata call. Fixes: b48c24c2d710 ("RDMA/irdma: Implement device supported verb APIs") Signed-off-by: Jacob Moroni Signed-off-by: Tatyana Nikolova Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/irdma/verbs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/irdma/verbs.c b/drivers/infiniband/hw/irdma/verbs.c index a20d25585993..ce1050bdd191 100644 --- a/drivers/infiniband/hw/irdma/verbs.c +++ b/drivers/infiniband/hw/irdma/verbs.c @@ -1105,6 +1105,7 @@ static int irdma_create_qp(struct ib_qp *ibqp, spin_lock_init(&iwqp->sc_qp.pfpdu.lock); iwqp->sig_all = init_attr->sq_sig_type == IB_SIGNAL_ALL_WR; rf->qp_table[qp_num] = iwqp; + init_completion(&iwqp->free_qp); if (udata) { /* GEN_1 legacy support with libi40iw does not have expanded uresp struct */ @@ -1129,7 +1130,6 @@ static int irdma_create_qp(struct ib_qp *ibqp, } } - init_completion(&iwqp->free_qp); return 0; error: From 8c1f19a2225cf37b3f8ab0b5a8a5322291cda620 Mon Sep 17 00:00:00 2001 From: Tatyana Nikolova Date: Mon, 16 Mar 2026 13:39:41 -0500 Subject: [PATCH 069/401] RDMA/irdma: Update ibqp state to error if QP is already in error state In irdma_modify_qp() update ibqp state to error if the irdma QP is already in error state, otherwise the ibqp state which is visible to the consumer app remains stale. Fixes: b48c24c2d710 ("RDMA/irdma: Implement device supported verb APIs") Signed-off-by: Tatyana Nikolova Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/irdma/verbs.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/infiniband/hw/irdma/verbs.c b/drivers/infiniband/hw/irdma/verbs.c index ce1050bdd191..a13ed000fa19 100644 --- a/drivers/infiniband/hw/irdma/verbs.c +++ b/drivers/infiniband/hw/irdma/verbs.c @@ -1540,6 +1540,7 @@ int irdma_modify_qp_roce(struct ib_qp *ibqp, struct ib_qp_attr *attr, case IB_QPS_ERR: case IB_QPS_RESET: if (iwqp->iwarp_state == IRDMA_QP_STATE_ERROR) { + iwqp->ibqp_state = attr->qp_state; spin_unlock_irqrestore(&iwqp->lock, flags); if (udata && udata->inlen) { if (ib_copy_from_udata(&ureq, udata, @@ -1745,6 +1746,7 @@ int irdma_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask, case IB_QPS_ERR: case IB_QPS_RESET: if (iwqp->iwarp_state == IRDMA_QP_STATE_ERROR) { + iwqp->ibqp_state = attr->qp_state; spin_unlock_irqrestore(&iwqp->lock, flags); if (udata && udata->inlen) { if (ib_copy_from_udata(&ureq, udata, From 5e8f0239731a83753473b7aa91bda67bbdff5053 Mon Sep 17 00:00:00 2001 From: Tatyana Nikolova Date: Mon, 16 Mar 2026 13:39:42 -0500 Subject: [PATCH 070/401] RDMA/irdma: Remove a NOP wait_event() in irdma_modify_qp_roce() Remove a NOP wait_event() in irdma_modify_qp_roce() which is relevant for iWARP and likely a copy and paste artifact for RoCEv2. The wait event is for sending a reset on a TCP connection, after the reset has been requested in irdma_modify_qp(), which occurs only in iWarp mode. Fixes: b48c24c2d710 ("RDMA/irdma: Implement device supported verb APIs") Signed-off-by: Tatyana Nikolova Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/irdma/verbs.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/infiniband/hw/irdma/verbs.c b/drivers/infiniband/hw/irdma/verbs.c index a13ed000fa19..9920a3304be3 100644 --- a/drivers/infiniband/hw/irdma/verbs.c +++ b/drivers/infiniband/hw/irdma/verbs.c @@ -1462,8 +1462,6 @@ int irdma_modify_qp_roce(struct ib_qp *ibqp, struct ib_qp_attr *attr, ctx_info->remote_atomics_en = true; } - wait_event(iwqp->mod_qp_waitq, !atomic_read(&iwqp->hw_mod_qp_pend)); - ibdev_dbg(&iwdev->ibdev, "VERBS: caller: %pS qp_id=%d to_ibqpstate=%d ibqpstate=%d irdma_qpstate=%d attr_mask=0x%x\n", __builtin_return_address(0), ibqp->qp_num, attr->qp_state, From b415399c9a024d574b65479636f0d4eb625b9abd Mon Sep 17 00:00:00 2001 From: Ivan Barrera Date: Mon, 16 Mar 2026 13:39:43 -0500 Subject: [PATCH 071/401] RDMA/irdma: Clean up unnecessary dereference of event->cm_node The cm_node is available and the usage of cm_node and event->cm_node seems arbitrary. Clean up unnecessary dereference of event->cm_node. Fixes: 146b9756f14c ("RDMA/irdma: Add connection manager") Signed-off-by: Ivan Barrera Signed-off-by: Tatyana Nikolova Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/irdma/cm.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/infiniband/hw/irdma/cm.c b/drivers/infiniband/hw/irdma/cm.c index 3d084d4ff577..13b24ffca265 100644 --- a/drivers/infiniband/hw/irdma/cm.c +++ b/drivers/infiniband/hw/irdma/cm.c @@ -4239,21 +4239,21 @@ static void irdma_cm_event_handler(struct work_struct *work) irdma_cm_event_reset(event); break; case IRDMA_CM_EVENT_CONNECTED: - if (!event->cm_node->cm_id || - event->cm_node->state != IRDMA_CM_STATE_OFFLOADED) + if (!cm_node->cm_id || + cm_node->state != IRDMA_CM_STATE_OFFLOADED) break; irdma_cm_event_connected(event); break; case IRDMA_CM_EVENT_MPA_REJECT: - if (!event->cm_node->cm_id || + if (!cm_node->cm_id || cm_node->state == IRDMA_CM_STATE_OFFLOADED) break; irdma_send_cm_event(cm_node, cm_node->cm_id, IW_CM_EVENT_CONNECT_REPLY, -ECONNREFUSED); break; case IRDMA_CM_EVENT_ABORTED: - if (!event->cm_node->cm_id || - event->cm_node->state == IRDMA_CM_STATE_OFFLOADED) + if (!cm_node->cm_id || + cm_node->state == IRDMA_CM_STATE_OFFLOADED) break; irdma_event_connect_error(event); break; @@ -4263,7 +4263,7 @@ static void irdma_cm_event_handler(struct work_struct *work) break; } - irdma_rem_ref_cm_node(event->cm_node); + irdma_rem_ref_cm_node(cm_node); kfree(event); } From c45c6ebd693b944f1ffe429fdfb6cc1674c237be Mon Sep 17 00:00:00 2001 From: Tatyana Nikolova Date: Mon, 16 Mar 2026 13:39:44 -0500 Subject: [PATCH 072/401] RDMA/irdma: Remove reset check from irdma_modify_qp_to_err() During reset, irdma_modify_qp() to error should be called to disconnect the QP. Without this fix, if not preceded by irdma_modify_qp() to error, the API call irdma_destroy_qp() gets stuck waiting for the QP refcount to go to zero, because the cm_node associated with this QP isn't disconnected. Fixes: 915cc7ac0f8e ("RDMA/irdma: Add miscellaneous utility definitions") Signed-off-by: Tatyana Nikolova Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/irdma/utils.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/infiniband/hw/irdma/utils.c b/drivers/infiniband/hw/irdma/utils.c index ab8c5284d4be..495e5daff4b4 100644 --- a/drivers/infiniband/hw/irdma/utils.c +++ b/drivers/infiniband/hw/irdma/utils.c @@ -2322,8 +2322,6 @@ void irdma_modify_qp_to_err(struct irdma_sc_qp *sc_qp) struct irdma_qp *qp = sc_qp->qp_uk.back_qp; struct ib_qp_attr attr; - if (qp->iwdev->rf->reset) - return; attr.qp_state = IB_QPS_ERR; if (rdma_protocol_roce(qp->ibqp.device, 1)) From 6f52370970ac07d352a7af4089e55e0e6425f827 Mon Sep 17 00:00:00 2001 From: Anil Samal Date: Mon, 16 Mar 2026 13:39:45 -0500 Subject: [PATCH 073/401] RDMA/irdma: Fix deadlock during netdev reset with active connections Resolve deadlock that occurs when user executes netdev reset while RDMA applications (e.g., rping) are active. The netdev reset causes ice driver to remove irdma auxiliary driver, triggering device_delete and subsequent client removal. During client removal, uverbs_client waits for QP reference count to reach zero while cma_client holds the final reference, creating circular dependency and indefinite wait in iWARP mode. Skip QP reference count wait during device reset to prevent deadlock. Fixes: c8f304d75f6c ("RDMA/irdma: Prevent QP use after free") Signed-off-by: Anil Samal Signed-off-by: Tatyana Nikolova Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/irdma/verbs.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/irdma/verbs.c b/drivers/infiniband/hw/irdma/verbs.c index 9920a3304be3..95f590c10c05 100644 --- a/drivers/infiniband/hw/irdma/verbs.c +++ b/drivers/infiniband/hw/irdma/verbs.c @@ -558,7 +558,8 @@ static int irdma_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata) } irdma_qp_rem_ref(&iwqp->ibqp); - wait_for_completion(&iwqp->free_qp); + if (!iwdev->rf->reset) + wait_for_completion(&iwqp->free_qp); irdma_free_lsmm_rsrc(iwqp); irdma_cqp_qp_destroy_cmd(&iwdev->rf->sc_dev, &iwqp->sc_qp); From 7221f581eefa79ead06e171044f393fb7ee22f87 Mon Sep 17 00:00:00 2001 From: Tatyana Nikolova Date: Mon, 16 Mar 2026 13:39:46 -0500 Subject: [PATCH 074/401] RDMA/irdma: Return EINVAL for invalid arp index error When rdma_connect() fails due to an invalid arp index, user space rdma core reports ENOMEM which is confusing. Modify irdma_make_cm_node() to return the correct error code. Fixes: 146b9756f14c ("RDMA/irdma: Add connection manager") Signed-off-by: Tatyana Nikolova Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/irdma/cm.c | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/drivers/infiniband/hw/irdma/cm.c b/drivers/infiniband/hw/irdma/cm.c index 13b24ffca265..91c0e7298283 100644 --- a/drivers/infiniband/hw/irdma/cm.c +++ b/drivers/infiniband/hw/irdma/cm.c @@ -2241,11 +2241,12 @@ irdma_make_cm_node(struct irdma_cm_core *cm_core, struct irdma_device *iwdev, int oldarpindex; int arpindex; struct net_device *netdev = iwdev->netdev; + int ret; /* create an hte and cm_node for this instance */ cm_node = kzalloc_obj(*cm_node, GFP_ATOMIC); if (!cm_node) - return NULL; + return ERR_PTR(-ENOMEM); /* set our node specific transport info */ cm_node->ipv4 = cm_info->ipv4; @@ -2348,8 +2349,10 @@ irdma_make_cm_node(struct irdma_cm_core *cm_core, struct irdma_device *iwdev, arpindex = -EINVAL; } - if (arpindex < 0) + if (arpindex < 0) { + ret = -EINVAL; goto err; + } ether_addr_copy(cm_node->rem_mac, iwdev->rf->arp_table[arpindex].mac_addr); @@ -2360,7 +2363,7 @@ irdma_make_cm_node(struct irdma_cm_core *cm_core, struct irdma_device *iwdev, err: kfree(cm_node); - return NULL; + return ERR_PTR(ret); } static void irdma_destroy_connection(struct irdma_cm_node *cm_node) @@ -3021,8 +3024,8 @@ static int irdma_create_cm_node(struct irdma_cm_core *cm_core, /* create a CM connection node */ cm_node = irdma_make_cm_node(cm_core, iwdev, cm_info, NULL); - if (!cm_node) - return -ENOMEM; + if (IS_ERR(cm_node)) + return PTR_ERR(cm_node); /* set our node side to client (active) side */ cm_node->tcp_cntxt.client = 1; @@ -3219,9 +3222,9 @@ void irdma_receive_ilq(struct irdma_sc_vsi *vsi, struct irdma_puda_buf *rbuf) cm_info.cm_id = listener->cm_id; cm_node = irdma_make_cm_node(cm_core, iwdev, &cm_info, listener); - if (!cm_node) { + if (IS_ERR(cm_node)) { ibdev_dbg(&cm_core->iwdev->ibdev, - "CM: allocate node failed\n"); + "CM: allocate node failed ret=%ld\n", PTR_ERR(cm_node)); refcount_dec(&listener->refcnt); return; } From e37afcb56ae070477741fe2d6e61fc0c542cce2d Mon Sep 17 00:00:00 2001 From: Shiraz Saleem Date: Mon, 16 Mar 2026 13:39:47 -0500 Subject: [PATCH 075/401] RDMA/irdma: Harden depth calculation functions An issue was exposed where OS can pass in U32_MAX for SQ/RQ/SRQ size. This can cause integer overflow and truncation of SQ/RQ/SRQ depth returning a success when it should have failed. Harden the functions to do all depth calculations and boundary checking in u64 sizes. Fixes: 563e1feb5f6e ("RDMA/irdma: Add SRQ support") Signed-off-by: Shiraz Saleem Signed-off-by: Tatyana Nikolova Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/irdma/uk.c | 39 ++++++++++++++++++-------------- 1 file changed, 22 insertions(+), 17 deletions(-) diff --git a/drivers/infiniband/hw/irdma/uk.c b/drivers/infiniband/hw/irdma/uk.c index ac3721a5747a..4718acf6c6fd 100644 --- a/drivers/infiniband/hw/irdma/uk.c +++ b/drivers/infiniband/hw/irdma/uk.c @@ -1438,7 +1438,7 @@ int irdma_uk_cq_poll_cmpl(struct irdma_cq_uk *cq, * irdma_round_up_wq - return round up qp wq depth * @wqdepth: wq depth in quanta to round up */ -static int irdma_round_up_wq(u32 wqdepth) +static u64 irdma_round_up_wq(u64 wqdepth) { int scount = 1; @@ -1491,15 +1491,16 @@ void irdma_get_wqe_shift(struct irdma_uk_attrs *uk_attrs, u32 sge, int irdma_get_sqdepth(struct irdma_uk_attrs *uk_attrs, u32 sq_size, u8 shift, u32 *sqdepth) { - u32 min_size = (u32)uk_attrs->min_hw_wq_size << shift; + u32 min_hw_quanta = (u32)uk_attrs->min_hw_wq_size << shift; + u64 hw_quanta = + irdma_round_up_wq(((u64)sq_size << shift) + IRDMA_SQ_RSVD); - *sqdepth = irdma_round_up_wq((sq_size << shift) + IRDMA_SQ_RSVD); - - if (*sqdepth < min_size) - *sqdepth = min_size; - else if (*sqdepth > uk_attrs->max_hw_wq_quanta) + if (hw_quanta < min_hw_quanta) + hw_quanta = min_hw_quanta; + else if (hw_quanta > uk_attrs->max_hw_wq_quanta) return -EINVAL; + *sqdepth = hw_quanta; return 0; } @@ -1513,15 +1514,16 @@ int irdma_get_sqdepth(struct irdma_uk_attrs *uk_attrs, u32 sq_size, u8 shift, int irdma_get_rqdepth(struct irdma_uk_attrs *uk_attrs, u32 rq_size, u8 shift, u32 *rqdepth) { - u32 min_size = (u32)uk_attrs->min_hw_wq_size << shift; + u32 min_hw_quanta = (u32)uk_attrs->min_hw_wq_size << shift; + u64 hw_quanta = + irdma_round_up_wq(((u64)rq_size << shift) + IRDMA_RQ_RSVD); - *rqdepth = irdma_round_up_wq((rq_size << shift) + IRDMA_RQ_RSVD); - - if (*rqdepth < min_size) - *rqdepth = min_size; - else if (*rqdepth > uk_attrs->max_hw_rq_quanta) + if (hw_quanta < min_hw_quanta) + hw_quanta = min_hw_quanta; + else if (hw_quanta > uk_attrs->max_hw_rq_quanta) return -EINVAL; + *rqdepth = hw_quanta; return 0; } @@ -1535,13 +1537,16 @@ int irdma_get_rqdepth(struct irdma_uk_attrs *uk_attrs, u32 rq_size, u8 shift, int irdma_get_srqdepth(struct irdma_uk_attrs *uk_attrs, u32 srq_size, u8 shift, u32 *srqdepth) { - *srqdepth = irdma_round_up_wq((srq_size << shift) + IRDMA_RQ_RSVD); + u32 min_hw_quanta = (u32)uk_attrs->min_hw_wq_size << shift; + u64 hw_quanta = + irdma_round_up_wq(((u64)srq_size << shift) + IRDMA_RQ_RSVD); - if (*srqdepth < ((u32)uk_attrs->min_hw_wq_size << shift)) - *srqdepth = uk_attrs->min_hw_wq_size << shift; - else if (*srqdepth > uk_attrs->max_hw_srq_quanta) + if (hw_quanta < min_hw_quanta) + hw_quanta = min_hw_quanta; + else if (hw_quanta > uk_attrs->max_hw_srq_quanta) return -EINVAL; + *srqdepth = hw_quanta; return 0; } From b8bee48e38f2ddbdba5e58bc54ef54bb7d8d341b Mon Sep 17 00:00:00 2001 From: Frank Zhang Date: Tue, 17 Mar 2026 18:25:27 +0800 Subject: [PATCH 076/401] ALSA:usb:qcom: add AUXILIARY_BUS to Kconfig dependencies The build can fail with: ERROR: modpost: "__auxiliary_driver_register" [sound/usb/qcom/snd-usb-audio-qmi.ko] undefined! ERROR: modpost: "auxiliary_driver_unregister" [sound/usb/qcom/snd-usb-audio-qmi.ko] undefined! Select AUXILIARY_BUS when SND_USB_AUDIO_QMI is enabled. Signed-off-by: Frank Zhang Link: https://patch.msgid.link/20260317102527.556248-1-rmxpzlb@gmail.com Signed-off-by: Takashi Iwai --- sound/usb/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/usb/Kconfig b/sound/usb/Kconfig index 9b890abd96d3..b4588915efa1 100644 --- a/sound/usb/Kconfig +++ b/sound/usb/Kconfig @@ -192,6 +192,7 @@ config SND_USB_AUDIO_QMI tristate "Qualcomm Audio Offload driver" depends on QCOM_QMI_HELPERS && SND_USB_AUDIO && SND_SOC_USB depends on USB_XHCI_HCD && USB_XHCI_SIDEBAND + select AUXILIARY_BUS help Say Y here to enable the Qualcomm USB audio offloading feature. From 8306a78a1c04cf87bfa9ae6451cc9d8f0f9dc0e0 Mon Sep 17 00:00:00 2001 From: Daniel Lezcano Date: Tue, 17 Mar 2026 19:09:38 +0100 Subject: [PATCH 077/401] ALSA: usb-audio: qcom: Fix the license marking The Copyright for Qualcomm changed its format and replaces the old Qualcomm Innovative Center by Qualcomm Technology Inc. Signed-off-by: Daniel Lezcano Link: https://patch.msgid.link/20260317180943.3062085-1-daniel.lezcano@oss.qualcomm.com Signed-off-by: Takashi Iwai --- sound/usb/qcom/qc_audio_offload.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/usb/qcom/qc_audio_offload.c b/sound/usb/qcom/qc_audio_offload.c index 510b68cced33..f161eb29f911 100644 --- a/sound/usb/qcom/qc_audio_offload.c +++ b/sound/usb/qcom/qc_audio_offload.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 /* - * Copyright (c) 2022-2025 Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries. */ #include From 0e9fc79132ce7ea1e48c388b864382aa38eb0ed4 Mon Sep 17 00:00:00 2001 From: Shengjiu Wang Date: Wed, 18 Mar 2026 18:28:49 +0800 Subject: [PATCH 078/401] ASoC: simple-card-utils: Check value of is_playback_only and is_capture_only The audio-graph-card2 gets the value of 'playback-only' and 'capture_only' property in below sequence, if there is 'playback_only' or 'capture_only' property in port_cpu and port_codec nodes, but no these properties in ep_cpu and ep_codec nodes, the value of playback_only and capture_only will be flushed to zero in the end. graph_util_parse_link_direction(lnk, &playback_only, &capture_only); graph_util_parse_link_direction(ports_cpu, &playback_only, &capture_only); graph_util_parse_link_direction(ports_codec, &playback_only, &capture_only); graph_util_parse_link_direction(port_cpu, &playback_only, &capture_only); graph_util_parse_link_direction(port_codec, &playback_only, &capture_only); graph_util_parse_link_direction(ep_cpu, &playback_only, &capture_only); graph_util_parse_link_direction(ep_codec, &playback_only, &capture_only); So check the value of is_playback_only and is_capture_only in graph_util_parse_link_direction() function, if they are true, then rewrite the values, and no need to check the np variable as of_property_read_bool() will ignore if it was NULL. Fixes: 3cc393d2232e ("ASoC: simple-card-utils: Fix pointer check in graph_util_parse_link_direction") Fixes: 22a507d7680f ("ASoC: simple-card-utils: Check device node before overwrite direction") Suggested-by: Kuninori Morimoto Acked-by: Kuninori Morimoto Signed-off-by: Shengjiu Wang Link: https://patch.msgid.link/20260318102850.2794029-2-shengjiu.wang@nxp.com Signed-off-by: Mark Brown --- sound/soc/generic/simple-card-utils.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sound/soc/generic/simple-card-utils.c b/sound/soc/generic/simple-card-utils.c index 9e5be0eaa77f..89d694c2cbdd 100644 --- a/sound/soc/generic/simple-card-utils.c +++ b/sound/soc/generic/simple-card-utils.c @@ -1183,9 +1183,9 @@ void graph_util_parse_link_direction(struct device_node *np, bool is_playback_only = of_property_read_bool(np, "playback-only"); bool is_capture_only = of_property_read_bool(np, "capture-only"); - if (np && playback_only) + if (playback_only && is_playback_only) *playback_only = is_playback_only; - if (np && capture_only) + if (capture_only && is_capture_only) *capture_only = is_capture_only; } EXPORT_SYMBOL_GPL(graph_util_parse_link_direction); From ca67bd564e94aaa898a2cbb90922ca3cccd0612b Mon Sep 17 00:00:00 2001 From: Shengjiu Wang Date: Wed, 18 Mar 2026 18:28:50 +0800 Subject: [PATCH 079/401] ASoC: fsl: imx-card: initialize playback_only and capture_only Fix uninitialized variable playback_only and capture_only because graph_util_parse_link_direction() may not write them. Fixes: 1877c3e7937f ("ASoC: imx-card: Add playback_only or capture_only support") Suggested-by: Kuninori Morimoto Acked-by: Kuninori Morimoto Signed-off-by: Shengjiu Wang Link: https://patch.msgid.link/20260318102850.2794029-3-shengjiu.wang@nxp.com Signed-off-by: Mark Brown --- sound/soc/fsl/imx-card.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sound/soc/fsl/imx-card.c b/sound/soc/fsl/imx-card.c index 05b4e971a366..a4518fefad69 100644 --- a/sound/soc/fsl/imx-card.c +++ b/sound/soc/fsl/imx-card.c @@ -710,6 +710,8 @@ static int imx_card_parse_of(struct imx_card_data *data) link->ops = &imx_aif_ops; } + playback_only = false; + capture_only = false; graph_util_parse_link_direction(np, &playback_only, &capture_only); link->playback_only = playback_only; link->capture_only = capture_only; From 591721223be9e28f83489a59289579493b8e3d83 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 18 Mar 2026 13:40:07 +0100 Subject: [PATCH 080/401] ALSA: asihpi: avoid write overflow check warning clang-22 rightfully warns that the memcpy() in adapter_prepare() copies between different structures, crossing the boundary of nested structures inside it: In file included from sound/pci/asihpi/hpimsgx.c:13: In file included from include/linux/string.h:386: include/linux/fortify-string.h:569:4: error: call to '__write_overflow_field' declared with 'warning' attribute: detected write beyond size of field (1st parameter); maybe use struct_group()? [-Werror,-Wattribute-warning] 569 | __write_overflow_field(p_size_field, size); The two structures seem to refer to the same layout, despite the separate definitions, so the code is in fact correct. Avoid the warning by copying the two inner structures separately. I see the same pattern happens in other functions in the same file, so there is a chance that this may come back in the future, but this instance is the only one that I saw in practice, hitting it multiple times per day in randconfig build. Signed-off-by: Arnd Bergmann Link: https://patch.msgid.link/20260318124016.3488566-1-arnd@kernel.org Signed-off-by: Takashi Iwai --- sound/pci/asihpi/hpimsgx.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/sound/pci/asihpi/hpimsgx.c b/sound/pci/asihpi/hpimsgx.c index b68e6bfbbfba..ed1c7b774436 100644 --- a/sound/pci/asihpi/hpimsgx.c +++ b/sound/pci/asihpi/hpimsgx.c @@ -581,8 +581,10 @@ static u16 adapter_prepare(u16 adapter) HPI_ADAPTER_OPEN); hm.adapter_index = adapter; hw_entry_point(&hm, &hr); - memcpy(&rESP_HPI_ADAPTER_OPEN[adapter], &hr, - sizeof(rESP_HPI_ADAPTER_OPEN[0])); + memcpy(&rESP_HPI_ADAPTER_OPEN[adapter].h, &hr, + sizeof(rESP_HPI_ADAPTER_OPEN[adapter].h)); + memcpy(&rESP_HPI_ADAPTER_OPEN[adapter].a, &hr.u.ax.info, + sizeof(rESP_HPI_ADAPTER_OPEN[adapter].a)); if (hr.error) return hr.error; From 8fb6857f2f5e35179ff35e7d25358b9add681b7e Mon Sep 17 00:00:00 2001 From: Jori Koolstra Date: Wed, 18 Mar 2026 21:39:52 +0100 Subject: [PATCH 081/401] vfs: fix docstring of hash_name() The docstring of hash_name() is falsely reporting that it returns the component length, whereas it returns a pointer to the terminating '/' or NUL character in the pathname being resolved. Signed-off-by: Jori Koolstra Link: https://patch.msgid.link/20260318203953.5770-1-jkoolstra@xs4all.nl Signed-off-by: Christian Brauner --- fs/namei.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/fs/namei.c b/fs/namei.c index 58f715f7657e..9e5500dad14f 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -2437,8 +2437,14 @@ u64 hashlen_string(const void *salt, const char *name) EXPORT_SYMBOL(hashlen_string); /* - * Calculate the length and hash of the path component, and - * return the length as the result. + * hash_name - Calculate the length and hash of the path component + * @nd: the path resolution state + * @name: the pathname to read the component from + * @lastword: if the component fits in a single word, LAST_WORD_IS_DOT, + * LAST_WORD_IS_DOTDOT, or some other value depending on whether the + * component is '.', '..', or something else. Otherwise, @lastword is 0. + * + * Returns: a pointer to the terminating '/' or NUL character in @name. */ static inline const char *hash_name(struct nameidata *nd, const char *name, From 7e57523490cd2efb52b1ea97f2e0a74c0fb634cd Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 18 Mar 2026 15:38:58 +0000 Subject: [PATCH 082/401] netfs: Fix read abandonment during retry Under certain circumstances, all the remaining subrequests from a read request will get abandoned during retry. The abandonment process expects the 'subreq' variable to be set to the place to start abandonment from, but it doesn't always have a useful value (it will be uninitialised on the first pass through the loop and it may point to a deleted subrequest on later passes). Fix the first jump to "abandon:" to set subreq to the start of the first subrequest expected to need retry (which, in this abandonment case, turned out unexpectedly to no longer have NEED_RETRY set). Also clear the subreq pointer after discarding superfluous retryable subrequests to cause an oops if we do try to access it. Fixes: ee4cdf7ba857 ("netfs: Speed up buffered reading") Signed-off-by: David Howells Link: https://patch.msgid.link/3775287.1773848338@warthog.procyon.org.uk Reviewed-by: Paulo Alcantara (Red Hat) cc: Paulo Alcantara cc: netfs@lists.linux.dev cc: linux-fsdevel@vger.kernel.org Signed-off-by: Christian Brauner --- fs/netfs/read_retry.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/fs/netfs/read_retry.c b/fs/netfs/read_retry.c index 7793ba5e3e8f..cca9ac43c077 100644 --- a/fs/netfs/read_retry.c +++ b/fs/netfs/read_retry.c @@ -93,8 +93,10 @@ static void netfs_retry_read_subrequests(struct netfs_io_request *rreq) from->start, from->transferred, from->len); if (test_bit(NETFS_SREQ_FAILED, &from->flags) || - !test_bit(NETFS_SREQ_NEED_RETRY, &from->flags)) + !test_bit(NETFS_SREQ_NEED_RETRY, &from->flags)) { + subreq = from; goto abandon; + } list_for_each_continue(next, &stream->subrequests) { subreq = list_entry(next, struct netfs_io_subrequest, rreq_link); @@ -178,6 +180,7 @@ static void netfs_retry_read_subrequests(struct netfs_io_request *rreq) if (subreq == to) break; } + subreq = NULL; continue; } From a437601a0a1383260222223440a95dd4322eb7fa Mon Sep 17 00:00:00 2001 From: Baojun Xu Date: Thu, 19 Mar 2026 17:07:47 +0800 Subject: [PATCH 083/401] ASoC: tas2781: Add null check for calibration data For avoid null pointer problem if no calibration data exist. Fixes: 55137f5a68b5e ("ASoC: tas2781: Put three different calibrated data solution into the same data structure") Signed-off-by: Baojun Xu Link: https://patch.msgid.link/20260319090747.2090-1-baojun.xu@ti.com Signed-off-by: Mark Brown --- sound/soc/codecs/tas2781-fmwlib.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/sound/soc/codecs/tas2781-fmwlib.c b/sound/soc/codecs/tas2781-fmwlib.c index 5798d518d94c..a1d86bd309f4 100644 --- a/sound/soc/codecs/tas2781-fmwlib.c +++ b/sound/soc/codecs/tas2781-fmwlib.c @@ -2550,6 +2550,9 @@ static void tasdev_load_calibrated_data(struct tasdevice_priv *priv, int i) int k = i * (cali_data->cali_dat_sz_per_dev + 1); int rc; + if (!data || !cali_data->total_sz) + return; + if (data[k] != i) { dev_err(priv->dev, "%s: no cal-data for dev %d from usr-spc\n", __func__, i); From a54142d9ff49dadb8bd063b8d016546e5706184c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micka=C3=ABl=20Sala=C3=BCn?= Date: Tue, 10 Mar 2026 20:04:15 +0100 Subject: [PATCH 084/401] selftests/landlock: Test tsync interruption and cancellation paths MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add tsync_interrupt test to exercise the signal interruption path in landlock_restrict_sibling_threads(). When a signal interrupts wait_for_completion_interruptible() while the calling thread waits for sibling threads to finish credential preparation, the kernel: 1. Sets ERESTARTNOINTR to request a transparent syscall restart. 2. Calls cancel_tsync_works() to opportunistically dequeue task works that have not started running yet. 3. Breaks out of the preparation loop, then unblocks remaining task works via complete_all() and waits for them to finish. 4. Returns the error, causing abort_creds() in the syscall handler. Specifically, cancel_tsync_works() in its entirety, the ERESTARTNOINTR error branch in landlock_restrict_sibling_threads(), and the abort_creds() error branch in the landlock_restrict_self() syscall handler are timing-dependent and not exercised by the existing tsync tests, making code coverage measurements non-deterministic. The test spawns a signaler thread that rapidly sends SIGUSR1 to the calling thread while it performs landlock_restrict_self() with LANDLOCK_RESTRICT_SELF_TSYNC. Since ERESTARTNOINTR causes a transparent restart, userspace always sees the syscall succeed. This is a best-effort coverage test: the interruption path is exercised when the signal lands during the preparation wait, which depends on thread scheduling. The test creates enough idle sibling threads (200) to ensure multiple serialized waves of credential preparation even on machines with many cores (e.g., 64), widening the window for the signaler. Deterministic coverage would require wrapping the wait call with ALLOW_ERROR_INJECTION() and using CONFIG_FAIL_FUNCTION. Test coverage for security/landlock was 90.2% of 2105 lines according to LLVM 21, and it is now 91.1% of 2105 lines with this new test. Cc: Günther Noack Cc: Justin Suess Cc: Tingmao Wang Cc: Yihan Ding Link: https://lore.kernel.org/r/20260310190416.1913908-1-mic@digikod.net Signed-off-by: Mickaël Salaün --- tools/testing/selftests/landlock/tsync_test.c | 93 ++++++++++++++++++- 1 file changed, 91 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/landlock/tsync_test.c b/tools/testing/selftests/landlock/tsync_test.c index 37ef0d2270db..2b9ad4f154f4 100644 --- a/tools/testing/selftests/landlock/tsync_test.c +++ b/tools/testing/selftests/landlock/tsync_test.c @@ -6,9 +6,10 @@ */ #define _GNU_SOURCE -#include -#include #include +#include +#include +#include #include "common.h" @@ -158,4 +159,92 @@ TEST(competing_enablement) EXPECT_EQ(0, close(ruleset_fd)); } +static void signal_nop_handler(int sig) +{ +} + +struct signaler_data { + pthread_t target; + volatile bool stop; +}; + +static void *signaler_thread(void *data) +{ + struct signaler_data *sd = data; + + while (!sd->stop) + pthread_kill(sd->target, SIGUSR1); + + return NULL; +} + +/* + * Number of idle sibling threads. This must be large enough that even on + * machines with many cores, the sibling threads cannot all complete their + * credential preparation in a single parallel wave, otherwise the signaler + * thread has no window to interrupt wait_for_completion_interruptible(). + * 200 threads on a 64-core machine yields ~3 serialized waves, giving the + * tight signal loop enough time to land an interruption. + */ +#define NUM_IDLE_THREADS 200 + +/* + * Exercises the tsync interruption and cancellation paths in tsync.c. + * + * When a signal interrupts the calling thread while it waits for sibling + * threads to finish their credential preparation + * (wait_for_completion_interruptible in landlock_restrict_sibling_threads), + * the kernel sets ERESTARTNOINTR, cancels queued task works that have not + * started yet (cancel_tsync_works), then waits for the remaining works to + * finish. On the error return, syscalls.c aborts the prepared credentials. + * The kernel automatically restarts the syscall, so userspace sees success. + */ +TEST(tsync_interrupt) +{ + size_t i; + pthread_t threads[NUM_IDLE_THREADS]; + pthread_t signaler; + struct signaler_data sd; + struct sigaction sa = {}; + const int ruleset_fd = create_ruleset(_metadata); + + disable_caps(_metadata); + + /* Install a no-op SIGUSR1 handler so the signal does not kill us. */ + sa.sa_handler = signal_nop_handler; + sigemptyset(&sa.sa_mask); + ASSERT_EQ(0, sigaction(SIGUSR1, &sa, NULL)); + + ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)); + + for (i = 0; i < NUM_IDLE_THREADS; i++) + ASSERT_EQ(0, pthread_create(&threads[i], NULL, idle, NULL)); + + /* + * Start a signaler thread that continuously sends SIGUSR1 to the + * calling thread. This maximizes the chance of interrupting + * wait_for_completion_interruptible() in the kernel's tsync path. + */ + sd.target = pthread_self(); + sd.stop = false; + ASSERT_EQ(0, pthread_create(&signaler, NULL, signaler_thread, &sd)); + + /* + * The syscall may be interrupted and transparently restarted by the + * kernel (ERESTARTNOINTR). From userspace, it should always succeed. + */ + EXPECT_EQ(0, landlock_restrict_self(ruleset_fd, + LANDLOCK_RESTRICT_SELF_TSYNC)); + + sd.stop = true; + ASSERT_EQ(0, pthread_join(signaler, NULL)); + + for (i = 0; i < NUM_IDLE_THREADS; i++) { + ASSERT_EQ(0, pthread_cancel(threads[i])); + ASSERT_EQ(0, pthread_join(threads[i], NULL)); + } + + EXPECT_EQ(0, close(ruleset_fd)); +} + TEST_HARNESS_MAIN From 7a9f448d44127217fabc4065c5ba070d4e0b5d37 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Mon, 23 Feb 2026 16:44:59 +0100 Subject: [PATCH 085/401] scsi: ses: Handle positive SCSI error from ses_recv_diag() ses_recv_diag() can return a positive value, which also means that an error happened, so do not only test for negative values. Cc: James E.J. Bottomley Cc: Martin K. Petersen Cc: stable Assisted-by: gkh_clanker_2000 Signed-off-by: Greg Kroah-Hartman Reviewed-by: Hannes Reinecke Link: https://patch.msgid.link/2026022301-bony-overstock-a07f@gregkh Signed-off-by: Martin K. Petersen --- drivers/scsi/ses.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/ses.c b/drivers/scsi/ses.c index 8e1686358e25..4c348645b04e 100644 --- a/drivers/scsi/ses.c +++ b/drivers/scsi/ses.c @@ -215,7 +215,7 @@ static unsigned char *ses_get_page2_descriptor(struct enclosure_device *edev, unsigned char *type_ptr = ses_dev->page1_types; unsigned char *desc_ptr = ses_dev->page2 + 8; - if (ses_recv_diag(sdev, 2, ses_dev->page2, ses_dev->page2_len) < 0) + if (ses_recv_diag(sdev, 2, ses_dev->page2, ses_dev->page2_len)) return NULL; for (i = 0; i < ses_dev->page1_num_types; i++, type_ptr += 4) { From 61d099ac4a7a8fb11ebdb6e2ec8d77f38e77362f Mon Sep 17 00:00:00 2001 From: Tyllis Xu Date: Sat, 14 Mar 2026 12:01:50 -0500 Subject: [PATCH 086/401] scsi: ibmvfc: Fix OOB access in ibmvfc_discover_targets_done() A malicious or compromised VIO server can return a num_written value in the discover targets MAD response that exceeds max_targets. This value is stored directly in vhost->num_targets without validation, and is then used as the loop bound in ibmvfc_alloc_targets() to index into disc_buf[], which is only allocated for max_targets entries. Indices at or beyond max_targets access kernel memory outside the DMA-coherent allocation. The out-of-bounds data is subsequently embedded in Implicit Logout and PLOGI MADs that are sent back to the VIO server, leaking kernel memory. Fix by clamping num_written to max_targets before storing it. Fixes: 072b91f9c651 ("[SCSI] ibmvfc: IBM Power Virtual Fibre Channel Adapter Client Driver") Reported-by: Yuhao Jiang Cc: stable@vger.kernel.org Signed-off-by: Tyllis Xu Reviewed-by: Dave Marquardt Acked-by: Tyrel Datwyler Link: https://patch.msgid.link/20260314170151.548614-1-LivelyCarpet87@gmail.com Signed-off-by: Martin K. Petersen --- drivers/scsi/ibmvscsi/ibmvfc.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/scsi/ibmvscsi/ibmvfc.c b/drivers/scsi/ibmvscsi/ibmvfc.c index a20fce04fe79..3dd2adda195e 100644 --- a/drivers/scsi/ibmvscsi/ibmvfc.c +++ b/drivers/scsi/ibmvscsi/ibmvfc.c @@ -4966,7 +4966,8 @@ static void ibmvfc_discover_targets_done(struct ibmvfc_event *evt) switch (mad_status) { case IBMVFC_MAD_SUCCESS: ibmvfc_dbg(vhost, "Discover Targets succeeded\n"); - vhost->num_targets = be32_to_cpu(rsp->num_written); + vhost->num_targets = min_t(u32, be32_to_cpu(rsp->num_written), + max_targets); ibmvfc_set_host_action(vhost, IBMVFC_HOST_ACTION_ALLOC_TGTS); break; case IBMVFC_MAD_FAILED: From 1333eee56cdf3f0cf67c6ab4114c2c9e0a952026 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Mon, 16 Mar 2026 20:23:29 -0400 Subject: [PATCH 087/401] scsi: target: tcm_loop: Drain commands in target_reset handler MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit tcm_loop_target_reset() violates the SCSI EH contract: it returns SUCCESS without draining any in-flight commands. The SCSI EH documentation (scsi_eh.rst) requires that when a reset handler returns SUCCESS the driver has made lower layers "forget about timed out scmds" and is ready for new commands. Every other SCSI LLD (virtio_scsi, mpt3sas, ipr, scsi_debug, mpi3mr) enforces this by draining or completing outstanding commands before returning SUCCESS. Because tcm_loop_target_reset() doesn't drain, the SCSI EH reuses in-flight scsi_cmnd structures for recovery commands (e.g. TUR) while the target core still has async completion work queued for the old se_cmd. The memset in queuecommand zeroes se_lun and lun_ref_active, causing transport_lun_remove_cmd() to skip its percpu_ref_put(). The leaked LUN reference prevents transport_clear_lun_ref() from completing, hanging configfs LUN unlink forever in D-state: INFO: task rm:264 blocked for more than 122 seconds. rm D 0 264 258 0x00004000 Call Trace: __schedule+0x3d0/0x8e0 schedule+0x36/0xf0 transport_clear_lun_ref+0x78/0x90 [target_core_mod] core_tpg_remove_lun+0x28/0xb0 [target_core_mod] target_fabric_port_unlink+0x50/0x60 [target_core_mod] configfs_unlink+0x156/0x1f0 [configfs] vfs_unlink+0x109/0x290 do_unlinkat+0x1d5/0x2d0 Fix this by making tcm_loop_target_reset() actually drain commands: 1. Issue TMR_LUN_RESET via tcm_loop_issue_tmr() to drain all commands that the target core knows about (those not yet CMD_T_COMPLETE). 2. Use blk_mq_tagset_busy_iter() to iterate all started requests and flush_work() on each se_cmd — this drains any deferred completion work for commands that already had CMD_T_COMPLETE set before the TMR (which the TMR skips via __target_check_io_state()). This is the same pattern used by mpi3mr, scsi_debug, and libsas to drain outstanding commands during reset. Fixes: e0eb5d38b732 ("scsi: target: tcm_loop: Use block cmd allocator for se_cmds") Cc: stable@vger.kernel.org Assisted-by: Claude:claude-opus-4-6 Signed-off-by: Josef Bacik Link: https://patch.msgid.link/27011aa34c8f6b1b94d2e3cf5655b6d037f53428.1773706803.git.josef@toxicpanda.com Signed-off-by: Martin K. Petersen --- drivers/target/loopback/tcm_loop.c | 52 ++++++++++++++++++++++++++---- 1 file changed, 46 insertions(+), 6 deletions(-) diff --git a/drivers/target/loopback/tcm_loop.c b/drivers/target/loopback/tcm_loop.c index d668bd19fd4a..528883d989b8 100644 --- a/drivers/target/loopback/tcm_loop.c +++ b/drivers/target/loopback/tcm_loop.c @@ -26,6 +26,7 @@ #include #include #include +#include #include #include #include @@ -269,15 +270,27 @@ static int tcm_loop_device_reset(struct scsi_cmnd *sc) return (ret == TMR_FUNCTION_COMPLETE) ? SUCCESS : FAILED; } +static bool tcm_loop_flush_work_iter(struct request *rq, void *data) +{ + struct scsi_cmnd *sc = blk_mq_rq_to_pdu(rq); + struct tcm_loop_cmd *tl_cmd = scsi_cmd_priv(sc); + struct se_cmd *se_cmd = &tl_cmd->tl_se_cmd; + + flush_work(&se_cmd->work); + return true; +} + static int tcm_loop_target_reset(struct scsi_cmnd *sc) { struct tcm_loop_hba *tl_hba; struct tcm_loop_tpg *tl_tpg; + struct Scsi_Host *sh = sc->device->host; + int ret; /* * Locate the tcm_loop_hba_t pointer */ - tl_hba = *(struct tcm_loop_hba **)shost_priv(sc->device->host); + tl_hba = *(struct tcm_loop_hba **)shost_priv(sh); if (!tl_hba) { pr_err("Unable to perform device reset without active I_T Nexus\n"); return FAILED; @@ -286,11 +299,38 @@ static int tcm_loop_target_reset(struct scsi_cmnd *sc) * Locate the tl_tpg pointer from TargetID in sc->device->id */ tl_tpg = &tl_hba->tl_hba_tpgs[sc->device->id]; - if (tl_tpg) { - tl_tpg->tl_transport_status = TCM_TRANSPORT_ONLINE; - return SUCCESS; - } - return FAILED; + if (!tl_tpg) + return FAILED; + + /* + * Issue a LUN_RESET to drain all commands that the target core + * knows about. This handles commands not yet marked CMD_T_COMPLETE. + */ + ret = tcm_loop_issue_tmr(tl_tpg, sc->device->lun, 0, TMR_LUN_RESET); + if (ret != TMR_FUNCTION_COMPLETE) + return FAILED; + + /* + * Flush any deferred target core completion work that may still be + * queued. Commands that already had CMD_T_COMPLETE set before the TMR + * are skipped by the TMR drain, but their async completion work + * (transport_lun_remove_cmd → percpu_ref_put, release_cmd → scsi_done) + * may still be pending in target_completion_wq. + * + * The SCSI EH will reuse in-flight scsi_cmnd structures for recovery + * commands (e.g. TUR) immediately after this handler returns SUCCESS — + * if deferred work is still pending, the memset in queuecommand would + * zero the se_cmd while the work accesses it, leaking the LUN + * percpu_ref and hanging configfs unlink forever. + * + * Use blk_mq_tagset_busy_iter() to find all started requests and + * flush_work() on each — the same pattern used by mpi3mr, scsi_debug, + * and other SCSI drivers to drain outstanding commands during reset. + */ + blk_mq_tagset_busy_iter(&sh->tag_set, tcm_loop_flush_work_iter, NULL); + + tl_tpg->tl_transport_status = TCM_TRANSPORT_ONLINE; + return SUCCESS; } static const struct scsi_host_template tcm_loop_driver_template = { From d71afa9deb4d413232ba16d693f7d43b321931b4 Mon Sep 17 00:00:00 2001 From: Yihang Li Date: Tue, 17 Mar 2026 14:31:47 +0800 Subject: [PATCH 088/401] scsi: scsi_transport_sas: Fix the maximum channel scanning issue After commit 37c4e72b0651 ("scsi: Fix sas_user_scan() to handle wildcard and multi-channel scans"), if the device supports multiple channels (0 to shost->max_channel), user_scan() invokes updated sas_user_scan() to perform the scan behavior for a specific transfer. However, when the user specifies shost->max_channel, it will return -EINVAL, which is not expected. Fix and support specifying the scan shost->max_channel for scanning. Fixes: 37c4e72b0651 ("scsi: Fix sas_user_scan() to handle wildcard and multi-channel scans") Signed-off-by: Yihang Li Reviewed-by: John Garry Link: https://patch.msgid.link/20260317063147.2182562-1-liyihang9@huawei.com Signed-off-by: Martin K. Petersen --- drivers/scsi/scsi_transport_sas.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/scsi_transport_sas.c b/drivers/scsi/scsi_transport_sas.c index 12124f9d5ccd..13412702188e 100644 --- a/drivers/scsi/scsi_transport_sas.c +++ b/drivers/scsi/scsi_transport_sas.c @@ -1734,7 +1734,7 @@ static int sas_user_scan(struct Scsi_Host *shost, uint channel, break; default: - if (channel < shost->max_channel) { + if (channel <= shost->max_channel) { res = scsi_scan_host_selected(shost, channel, id, lun, SCSI_SCAN_MANUAL); } else { From 01f784fc9d0ab2a6dac45ee443620e517cb2a19b Mon Sep 17 00:00:00 2001 From: Thinh Nguyen Date: Sat, 14 Mar 2026 01:17:40 +0000 Subject: [PATCH 089/401] scsi: target: file: Use kzalloc_flex for aio_cmd The target_core_file doesn't initialize the aio_cmd->iocb for the ki_write_stream. When a write command fd_execute_rw_aio() is executed, we may get a bogus ki_write_stream value, causing unintended write failure status when checking iocb->ki_write_stream > max_write_streams in the block device. Let's just use kzalloc_flex when allocating the aio_cmd and let ki_write_stream=0 to fix this issue. Fixes: 732f25a2895a ("fs: add a write stream field to the kiocb") Fixes: c27683da6406 ("block: expose write streams for block device nodes") Cc: stable@vger.kernel.org Signed-off-by: Thinh Nguyen Link: https://patch.msgid.link/f1a2f81c62f043e31f80bb92d5f29893400c8ee2.1773450782.git.Thinh.Nguyen@synopsys.com Signed-off-by: Martin K. Petersen --- drivers/target/target_core_file.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/target/target_core_file.c b/drivers/target/target_core_file.c index 3ae1f7137d9d..3d593af30aa5 100644 --- a/drivers/target/target_core_file.c +++ b/drivers/target/target_core_file.c @@ -276,7 +276,7 @@ fd_execute_rw_aio(struct se_cmd *cmd, struct scatterlist *sgl, u32 sgl_nents, ssize_t len = 0; int ret = 0, i; - aio_cmd = kmalloc_flex(*aio_cmd, bvecs, sgl_nents); + aio_cmd = kzalloc_flex(*aio_cmd, bvecs, sgl_nents); if (!aio_cmd) return TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE; From fc3bbf34e643faa8678aabdc3810c60109f3435a Mon Sep 17 00:00:00 2001 From: Pedro Demarchi Gomes Date: Wed, 18 Mar 2026 22:52:24 -0300 Subject: [PATCH 090/401] drm/shmem-helper: Fix huge page mapping in fault handler When running ./tools/testing/selftests/mm/split_huge_page_test multiple times with /sys/kernel/mm/transparent_hugepage/shmem_enabled and /sys/kernel/mm/transparent_hugepage/enabled set as always the following BUG occurs: [ 232.728858] ------------[ cut here ]------------ [ 232.729458] kernel BUG at mm/memory.c:2276! [ 232.729726] Oops: invalid opcode: 0000 [#1] SMP DEBUG_PAGEALLOC KASAN PTI [ 232.730217] CPU: 19 UID: 60578 PID: 1497 Comm: llvmpipe-9 Not tainted 7.0.0-rc1mm-new+ #19 PREEMPT(lazy) [ 232.730855] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.17.0-9.fc43 06/10/2025 [ 232.731360] RIP: 0010:walk_to_pmd+0x29e/0x3c0 [ 232.731569] Code: d8 5b 5d 41 5c 41 5d 41 5e 41 5f c3 cc cc cc cc 48 89 ea 48 89 de 4c 89 f7 e8 ae 85 ff ff 85 c0 0f 84 1f fe ff ff 31 db eb d0 <0f> 0b 48 89 ea 48 89 de 4c 89 f7 e8 92 8b ff ff 85 c0 75 e8 48 b8 [ 232.732614] RSP: 0000:ffff8881aa6ff9a8 EFLAGS: 00010282 [ 232.732991] RAX: 8000000142e002e7 RBX: ffff8881433cae10 RCX: dffffc0000000000 [ 232.733362] RDX: 0000000000000000 RSI: 00007fb47840b000 RDI: 8000000142e002e7 [ 232.733801] RBP: 00007fb47840b000 R08: 0000000000000000 R09: 1ffff110354dff46 [ 232.734168] R10: fffffbfff0cb921d R11: 00000000910da5ce R12: 1ffffffff0c1fcdd [ 232.734459] R13: 1ffffffff0c23f36 R14: ffff888171628040 R15: 0000000000000000 [ 232.734861] FS: 00007fb4907f86c0(0000) GS:ffff888791f2c000(0000) knlGS:0000000000000000 [ 232.735265] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 232.735548] CR2: 00007fb47840be00 CR3: 000000015e6dc000 CR4: 00000000000006f0 [ 232.736031] Call Trace: [ 232.736273] [ 232.736500] get_locked_pte+0x1f/0xa0 [ 232.736878] insert_pfn+0x9f/0x350 [ 232.737190] ? __pfx_pat_pagerange_is_ram+0x10/0x10 [ 232.737614] ? __pfx_insert_pfn+0x10/0x10 [ 232.737990] ? __pfx_css_rstat_updated+0x10/0x10 [ 232.738281] ? __pfx_pfn_modify_allowed+0x10/0x10 [ 232.738552] ? lookup_memtype+0x62/0x180 [ 232.738761] vmf_insert_pfn_prot+0x14b/0x340 [ 232.739012] ? __pfx_vmf_insert_pfn_prot+0x10/0x10 [ 232.739247] ? __pfx___might_resched+0x10/0x10 [ 232.739475] drm_gem_shmem_fault.cold+0x18/0x39 [ 232.739677] ? rcu_read_unlock+0x20/0x70 [ 232.739882] __do_fault+0x251/0x7b0 [ 232.740028] do_fault+0x6e1/0xc00 [ 232.740167] ? __lock_acquire+0x590/0xc40 [ 232.740335] handle_pte_fault+0x439/0x760 [ 232.740498] ? mtree_range_walk+0x252/0xae0 [ 232.740669] ? __pfx_handle_pte_fault+0x10/0x10 [ 232.740899] __handle_mm_fault+0xa02/0xf30 [ 232.741066] ? __pfx___handle_mm_fault+0x10/0x10 [ 232.741255] ? find_vma+0xa1/0x120 [ 232.741403] handle_mm_fault+0x2bf/0x8f0 [ 232.741564] do_user_addr_fault+0x2d3/0xed0 [ 232.741736] ? trace_page_fault_user+0x1bf/0x240 [ 232.741969] exc_page_fault+0x87/0x120 [ 232.742124] asm_exc_page_fault+0x26/0x30 [ 232.742288] RIP: 0033:0x7fb4d73ed546 [ 232.742441] Code: 66 41 0f 6f fb 66 44 0f 6d dc 66 44 0f 6f c6 66 41 0f 6d f1 66 0f 6c fc 66 45 0f 6c c1 66 44 0f 6f c9 66 0f 6d ca 66 0f db f0 <66> 0f df 04 08 66 44 0f 6c ca 66 45 0f db c2 66 44 0f df 10 66 44 [ 232.743193] RSP: 002b:00007fb4907f68a0 EFLAGS: 00010206 [ 232.743565] RAX: 00007fb47840aa00 RBX: 00007fb4d73ec070 RCX: 0000000000001400 [ 232.743871] RDX: 0000000000002800 RSI: 0000000000003c00 RDI: 0000000000000001 [ 232.744150] RBP: 0000000000000004 R08: 0000000000001400 R09: 00007fb4d73ec060 [ 232.744433] R10: 000055f0261a4288 R11: 00007fb4c013da40 R12: 0000000000000008 [ 232.744712] R13: 0000000000000000 R14: 4332322132212110 R15: 0000000000000004 [ 232.746616] [ 232.746711] Modules linked in: nft_nat nft_masq veth bridge stp llc snd_seq_dummy snd_hrtimer snd_seq snd_seq_device snd_timer snd soundcore overlay rfkill nf_conntrack_netbios_ns nf_conntrack_broadcast nft_fib_inet nft_fib_ipv4 nft_fib_ipv6 nft_fib nft_reject_inet nf_reject_ipv4 nf_reject_ipv6 nft_reject nft_ct nft_chain_nat nf_nat nf_conntrack nf_defrag_ipv6 nf_defrag_ipv4 nf_tables qrtr ppdev 9pnet_virtio 9pnet parport_pc i2c_piix4 netfs pcspkr parport i2c_smbus joydev sunrpc vfat fat loop dm_multipath nfnetlink vsock_loopback vmw_vsock_virtio_transport_common vmw_vsock_vmci_transport zram lz4hc_compress vmw_vmci lz4_compress vsock e1000 bochs serio_raw ata_generic pata_acpi scsi_dh_rdac scsi_dh_emc scsi_dh_alua i2c_dev fuse qemu_fw_cfg [ 232.749308] ---[ end trace 0000000000000000 ]--- [ 232.749507] RIP: 0010:walk_to_pmd+0x29e/0x3c0 [ 232.749692] Code: d8 5b 5d 41 5c 41 5d 41 5e 41 5f c3 cc cc cc cc 48 89 ea 48 89 de 4c 89 f7 e8 ae 85 ff ff 85 c0 0f 84 1f fe ff ff 31 db eb d0 <0f> 0b 48 89 ea 48 89 de 4c 89 f7 e8 92 8b ff ff 85 c0 75 e8 48 b8 [ 232.750428] RSP: 0000:ffff8881aa6ff9a8 EFLAGS: 00010282 [ 232.750645] RAX: 8000000142e002e7 RBX: ffff8881433cae10 RCX: dffffc0000000000 [ 232.750954] RDX: 0000000000000000 RSI: 00007fb47840b000 RDI: 8000000142e002e7 [ 232.751232] RBP: 00007fb47840b000 R08: 0000000000000000 R09: 1ffff110354dff46 [ 232.751514] R10: fffffbfff0cb921d R11: 00000000910da5ce R12: 1ffffffff0c1fcdd [ 232.751837] R13: 1ffffffff0c23f36 R14: ffff888171628040 R15: 0000000000000000 [ 232.752124] FS: 00007fb4907f86c0(0000) GS:ffff888791f2c000(0000) knlGS:0000000000000000 [ 232.752441] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 232.752674] CR2: 00007fb47840be00 CR3: 000000015e6dc000 CR4: 00000000000006f0 [ 232.752983] Kernel panic - not syncing: Fatal exception [ 232.753510] Kernel Offset: disabled [ 232.754643] ---[ end Kernel panic - not syncing: Fatal exception ]--- This happens when two concurrent page faults occur within the same PMD range. One fault installs a PMD mapping through vmf_insert_pfn_pmd(), while the other attempts to install a PTE mapping via vmf_insert_pfn(). The bug is triggered because a pmd_trans_huge is not expected when walking the page table inside vmf_insert_pfn. Avoid this race by adding a huge_fault callback to drm_gem_shmem_vm_ops so that PMD-sized mappings are handled through the appropriate huge page fault path. Fixes: 211b9a39f261 ("drm/shmem-helper: Map huge pages in fault handler") Signed-off-by: Pedro Demarchi Gomes Reviewed-by: Boris Brezillon Link: https://patch.msgid.link/20260319015224.46896-1-pedrodemargomes@gmail.com Signed-off-by: Boris Brezillon --- drivers/gpu/drm/drm_gem_shmem_helper.c | 48 +++++++++++++++----------- 1 file changed, 27 insertions(+), 21 deletions(-) diff --git a/drivers/gpu/drm/drm_gem_shmem_helper.c b/drivers/gpu/drm/drm_gem_shmem_helper.c index 7b5a49935ae4..c549293b5bb6 100644 --- a/drivers/gpu/drm/drm_gem_shmem_helper.c +++ b/drivers/gpu/drm/drm_gem_shmem_helper.c @@ -550,27 +550,27 @@ int drm_gem_shmem_dumb_create(struct drm_file *file, struct drm_device *dev, } EXPORT_SYMBOL_GPL(drm_gem_shmem_dumb_create); -static bool drm_gem_shmem_try_map_pmd(struct vm_fault *vmf, unsigned long addr, - struct page *page) +static vm_fault_t try_insert_pfn(struct vm_fault *vmf, unsigned int order, + unsigned long pfn) { + if (!order) { + return vmf_insert_pfn(vmf->vma, vmf->address, pfn); #ifdef CONFIG_ARCH_SUPPORTS_PMD_PFNMAP - unsigned long pfn = page_to_pfn(page); - unsigned long paddr = pfn << PAGE_SHIFT; - bool aligned = (addr & ~PMD_MASK) == (paddr & ~PMD_MASK); + } else if (order == PMD_ORDER) { + unsigned long paddr = pfn << PAGE_SHIFT; + bool aligned = (vmf->address & ~PMD_MASK) == (paddr & ~PMD_MASK); - if (aligned && - pmd_none(*vmf->pmd) && - folio_test_pmd_mappable(page_folio(page))) { - pfn &= PMD_MASK >> PAGE_SHIFT; - if (vmf_insert_pfn_pmd(vmf, pfn, false) == VM_FAULT_NOPAGE) - return true; - } + if (aligned && + folio_test_pmd_mappable(page_folio(pfn_to_page(pfn)))) { + pfn &= PMD_MASK >> PAGE_SHIFT; + return vmf_insert_pfn_pmd(vmf, pfn, false); + } #endif - - return false; + } + return VM_FAULT_FALLBACK; } -static vm_fault_t drm_gem_shmem_fault(struct vm_fault *vmf) +static vm_fault_t drm_gem_shmem_any_fault(struct vm_fault *vmf, unsigned int order) { struct vm_area_struct *vma = vmf->vma; struct drm_gem_object *obj = vma->vm_private_data; @@ -581,6 +581,9 @@ static vm_fault_t drm_gem_shmem_fault(struct vm_fault *vmf) pgoff_t page_offset; unsigned long pfn; + if (order && order != PMD_ORDER) + return VM_FAULT_FALLBACK; + /* Offset to faulty address in the VMA. */ page_offset = vmf->pgoff - vma->vm_pgoff; @@ -593,13 +596,8 @@ static vm_fault_t drm_gem_shmem_fault(struct vm_fault *vmf) goto out; } - if (drm_gem_shmem_try_map_pmd(vmf, vmf->address, pages[page_offset])) { - ret = VM_FAULT_NOPAGE; - goto out; - } - pfn = page_to_pfn(pages[page_offset]); - ret = vmf_insert_pfn(vma, vmf->address, pfn); + ret = try_insert_pfn(vmf, order, pfn); out: dma_resv_unlock(shmem->base.resv); @@ -607,6 +605,11 @@ static vm_fault_t drm_gem_shmem_fault(struct vm_fault *vmf) return ret; } +static vm_fault_t drm_gem_shmem_fault(struct vm_fault *vmf) +{ + return drm_gem_shmem_any_fault(vmf, 0); +} + static void drm_gem_shmem_vm_open(struct vm_area_struct *vma) { struct drm_gem_object *obj = vma->vm_private_data; @@ -643,6 +646,9 @@ static void drm_gem_shmem_vm_close(struct vm_area_struct *vma) const struct vm_operations_struct drm_gem_shmem_vm_ops = { .fault = drm_gem_shmem_fault, +#ifdef CONFIG_ARCH_SUPPORTS_PMD_PFNMAP + .huge_fault = drm_gem_shmem_any_fault, +#endif .open = drm_gem_shmem_vm_open, .close = drm_gem_shmem_vm_close, }; From 215e5fe75881a7e2425df04aeeed47a903d5cd5d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?C=C3=A1ssio=20Gabriel?= Date: Thu, 19 Mar 2026 21:45:26 -0300 Subject: [PATCH 091/401] ASoC: SOF: topology: reject invalid vendor array size in token parser MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit sof_parse_token_sets() accepts array->size values that can be invalid for a vendor tuple array header. In particular, a zero size does not advance the parser state and can lead to non-progress parsing on malformed topology data. Validate array->size against the minimum header size and reject values smaller than sizeof(*array) before parsing. This preserves behavior for valid topologies and hardens malformed-input handling. Signed-off-by: Cássio Gabriel Acked-by: Peter Ujfalusi Link: https://patch.msgid.link/20260319-sof-topology-array-size-fix-v1-1-f9191b16b1b7@gmail.com Signed-off-by: Mark Brown --- sound/soc/sof/topology.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/sof/topology.c b/sound/soc/sof/topology.c index 18e2401152c8..35200d801fb7 100644 --- a/sound/soc/sof/topology.c +++ b/sound/soc/sof/topology.c @@ -736,7 +736,7 @@ static int sof_parse_token_sets(struct snd_soc_component *scomp, asize = le32_to_cpu(array->size); /* validate asize */ - if (asize < 0) { /* FIXME: A zero-size array makes no sense */ + if (asize < sizeof(*array)) { dev_err(scomp->dev, "error: invalid array size 0x%x\n", asize); return -EINVAL; From 2594196f4e3bd70782e7cf1e22e3e398cdb74f78 Mon Sep 17 00:00:00 2001 From: Hasun Park Date: Fri, 20 Mar 2026 01:33:21 +0900 Subject: [PATCH 092/401] ASoC: amd: acp: add ASUS HN7306EA quirk for legacy SDW machine Add a DMI quirk entry for ASUS HN7306EA in the ACP SoundWire legacy machine driver. Set driver_data to ASOC_SDW_ACP_DMIC for this board so the platform-specific DMIC quirk path is selected. Signed-off-by: Hasun Park Link: https://patch.msgid.link/20260319163321.30326-1-hasunpark@gmail.com Signed-off-by: Mark Brown --- sound/soc/amd/acp/acp-sdw-legacy-mach.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/sound/soc/amd/acp/acp-sdw-legacy-mach.c b/sound/soc/amd/acp/acp-sdw-legacy-mach.c index c30ccf23005a..6388cd7cb28e 100644 --- a/sound/soc/amd/acp/acp-sdw-legacy-mach.c +++ b/sound/soc/amd/acp/acp-sdw-legacy-mach.c @@ -111,6 +111,14 @@ static const struct dmi_system_id soc_sdw_quirk_table[] = { }, .driver_data = (void *)(ASOC_SDW_CODEC_SPKR), }, + { + .callback = soc_sdw_quirk_cb, + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "ASUSTeK COMPUTER INC."), + DMI_MATCH(DMI_PRODUCT_NAME, "HN7306EA"), + }, + .driver_data = (void *)(ASOC_SDW_ACP_DMIC), + }, {} }; From 76f9377cd2ab7a9220c25d33940d9ca20d368172 Mon Sep 17 00:00:00 2001 From: Joanne Koong Date: Thu, 19 Mar 2026 17:51:45 -0700 Subject: [PATCH 093/401] writeback: don't block sync for filesystems with no data integrity guarantees Add a SB_I_NO_DATA_INTEGRITY superblock flag for filesystems that cannot guarantee data persistence on sync (eg fuse). For superblocks with this flag set, sync kicks off writeback of dirty inodes but does not wait for the flusher threads to complete the writeback. This replaces the per-inode AS_NO_DATA_INTEGRITY mapping flag added in commit f9a49aa302a0 ("fs/writeback: skip AS_NO_DATA_INTEGRITY mappings in wait_sb_inodes()"). The flag belongs at the superblock level because data integrity is a filesystem-wide property, not a per-inode one. Having this flag at the superblock level also allows us to skip having to iterate every dirty inode in wait_sb_inodes() only to skip each inode individually. Prior to this commit, mappings with no data integrity guarantees skipped waiting on writeback completion but still waited on the flusher threads to finish initiating the writeback. Waiting on the flusher threads is unnecessary. This commit kicks off writeback but does not wait on the flusher threads. This change properly addresses a recent report [1] for a suspend-to-RAM hang seen on fuse-overlayfs that was caused by waiting on the flusher threads to finish: Workqueue: pm_fs_sync pm_fs_sync_work_fn Call Trace: __schedule+0x457/0x1720 schedule+0x27/0xd0 wb_wait_for_completion+0x97/0xe0 sync_inodes_sb+0xf8/0x2e0 __iterate_supers+0xdc/0x160 ksys_sync+0x43/0xb0 pm_fs_sync_work_fn+0x17/0xa0 process_one_work+0x193/0x350 worker_thread+0x1a1/0x310 kthread+0xfc/0x240 ret_from_fork+0x243/0x280 ret_from_fork_asm+0x1a/0x30 On fuse this is problematic because there are paths that may cause the flusher thread to block (eg if systemd freezes the user session cgroups first, which freezes the fuse daemon, before invoking the kernel suspend. The kernel suspend triggers ->write_node() which on fuse issues a synchronous setattr request, which cannot be processed since the daemon is frozen. Or if the daemon is buggy and cannot properly complete writeback, initiating writeback on a dirty folio already under writeback leads to writeback_get_folio() -> folio_prepare_writeback() -> unconditional wait on writeback to finish, which will cause a hang). This commit restores fuse to its prior behavior before tmp folios were removed, where sync was essentially a no-op. [1] https://lore.kernel.org/linux-fsdevel/CAJnrk1a-asuvfrbKXbEwwDSctvemF+6zfhdnuzO65Pt8HsFSRw@mail.gmail.com/T/#m632c4648e9cafc4239299887109ebd880ac6c5c1 Fixes: 0c58a97f919c ("fuse: remove tmp folio for writebacks and internal rb tree") Reported-by: John Cc: stable@vger.kernel.org Signed-off-by: Joanne Koong Link: https://patch.msgid.link/20260320005145.2483161-2-joannelkoong@gmail.com Reviewed-by: Jan Kara Reviewed-by: David Hildenbrand (Arm) Signed-off-by: Christian Brauner --- fs/fs-writeback.c | 18 ++++++++++++------ fs/fuse/file.c | 4 +--- fs/fuse/inode.c | 1 + include/linux/fs/super_types.h | 1 + include/linux/pagemap.h | 11 ----------- 5 files changed, 15 insertions(+), 20 deletions(-) diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index d8dac1931595..3c75ee025bda 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c @@ -2787,13 +2787,8 @@ static void wait_sb_inodes(struct super_block *sb) * The mapping can appear untagged while still on-list since we * do not have the mapping lock. Skip it here, wb completion * will remove it. - * - * If the mapping does not have data integrity semantics, - * there's no need to wait for the writeout to complete, as the - * mapping cannot guarantee that data is persistently stored. */ - if (!mapping_tagged(mapping, PAGECACHE_TAG_WRITEBACK) || - mapping_no_data_integrity(mapping)) + if (!mapping_tagged(mapping, PAGECACHE_TAG_WRITEBACK)) continue; spin_unlock_irq(&sb->s_inode_wblist_lock); @@ -2928,6 +2923,17 @@ void sync_inodes_sb(struct super_block *sb) */ if (bdi == &noop_backing_dev_info) return; + + /* + * If the superblock has SB_I_NO_DATA_INTEGRITY set, there's no need to + * wait for the writeout to complete, as the filesystem cannot guarantee + * data persistence on sync. Just kick off writeback and return. + */ + if (sb->s_iflags & SB_I_NO_DATA_INTEGRITY) { + wakeup_flusher_threads_bdi(bdi, WB_REASON_SYNC); + return; + } + WARN_ON(!rwsem_is_locked(&sb->s_umount)); /* protect against inode wb switch, see inode_switch_wbs_work_fn() */ diff --git a/fs/fuse/file.c b/fs/fuse/file.c index b1bb7153cb78..676fd9856bfb 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -3201,10 +3201,8 @@ void fuse_init_file_inode(struct inode *inode, unsigned int flags) inode->i_fop = &fuse_file_operations; inode->i_data.a_ops = &fuse_file_aops; - if (fc->writeback_cache) { + if (fc->writeback_cache) mapping_set_writeback_may_deadlock_on_reclaim(&inode->i_data); - mapping_set_no_data_integrity(&inode->i_data); - } INIT_LIST_HEAD(&fi->write_files); INIT_LIST_HEAD(&fi->queued_writes); diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index e57b8af06be9..c795abe47a4f 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -1709,6 +1709,7 @@ static void fuse_sb_defaults(struct super_block *sb) sb->s_export_op = &fuse_export_operations; sb->s_iflags |= SB_I_IMA_UNVERIFIABLE_SIGNATURE; sb->s_iflags |= SB_I_NOIDMAP; + sb->s_iflags |= SB_I_NO_DATA_INTEGRITY; if (sb->s_user_ns != &init_user_ns) sb->s_iflags |= SB_I_UNTRUSTED_MOUNTER; sb->s_flags &= ~(SB_NOSEC | SB_I_VERSION); diff --git a/include/linux/fs/super_types.h b/include/linux/fs/super_types.h index fa7638b81246..383050e7fdf5 100644 --- a/include/linux/fs/super_types.h +++ b/include/linux/fs/super_types.h @@ -338,5 +338,6 @@ struct super_block { #define SB_I_NOUMASK 0x00001000 /* VFS does not apply umask */ #define SB_I_NOIDMAP 0x00002000 /* No idmapped mounts on this superblock */ #define SB_I_ALLOW_HSM 0x00004000 /* Allow HSM events on this superblock */ +#define SB_I_NO_DATA_INTEGRITY 0x00008000 /* fs cannot guarantee data persistence on sync */ #endif /* _LINUX_FS_SUPER_TYPES_H */ diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index ec442af3f886..31a848485ad9 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -210,7 +210,6 @@ enum mapping_flags { AS_WRITEBACK_MAY_DEADLOCK_ON_RECLAIM = 9, AS_KERNEL_FILE = 10, /* mapping for a fake kernel file that shouldn't account usage to user cgroups */ - AS_NO_DATA_INTEGRITY = 11, /* no data integrity guarantees */ /* Bits 16-25 are used for FOLIO_ORDER */ AS_FOLIO_ORDER_BITS = 5, AS_FOLIO_ORDER_MIN = 16, @@ -346,16 +345,6 @@ static inline bool mapping_writeback_may_deadlock_on_reclaim(const struct addres return test_bit(AS_WRITEBACK_MAY_DEADLOCK_ON_RECLAIM, &mapping->flags); } -static inline void mapping_set_no_data_integrity(struct address_space *mapping) -{ - set_bit(AS_NO_DATA_INTEGRITY, &mapping->flags); -} - -static inline bool mapping_no_data_integrity(const struct address_space *mapping) -{ - return test_bit(AS_NO_DATA_INTEGRITY, &mapping->flags); -} - static inline gfp_t mapping_gfp_mask(const struct address_space *mapping) { return mapping->gfp_mask; From 217c0a5c177a3d4f7c8497950cbf5c36756e8bbb Mon Sep 17 00:00:00 2001 From: "Mike Rapoport (Microsoft)" Date: Fri, 20 Mar 2026 15:59:48 +0200 Subject: [PATCH 094/401] x86/efi: efi_unmap_boot_services: fix calculation of ranges_to_free size ranges_to_free array should have enough room to store the entire EFI memmap plus an extra element for NULL entry. The calculation of this array size wrongly adds 1 to the overall size instead of adding 1 to the number of elements. Add parentheses to properly size the array. Reported-by: Guenter Roeck Fixes: a4b0bf6a40f3 ("x86/efi: defer freeing of boot services memory") Signed-off-by: Mike Rapoport (Microsoft) Signed-off-by: Ard Biesheuvel --- arch/x86/platform/efi/quirks.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/platform/efi/quirks.c b/arch/x86/platform/efi/quirks.c index 35caa5746115..79f0818131e8 100644 --- a/arch/x86/platform/efi/quirks.c +++ b/arch/x86/platform/efi/quirks.c @@ -424,7 +424,7 @@ void __init efi_unmap_boot_services(void) if (efi_enabled(EFI_DBG)) return; - sz = sizeof(*ranges_to_free) * efi.memmap.nr_map + 1; + sz = sizeof(*ranges_to_free) * (efi.memmap.nr_map + 1); ranges_to_free = kzalloc(sz, GFP_KERNEL); if (!ranges_to_free) { pr_err("Failed to allocate storage for freeable EFI regions\n"); From c3fd16c3b98ed726294feab2f94f876290bf7b61 Mon Sep 17 00:00:00 2001 From: Zubin Mithra Date: Wed, 18 Mar 2026 13:40:13 +0000 Subject: [PATCH 095/401] virt: tdx-guest: Fix handling of host controlled 'quote' buffer length Validate host controlled value `quote_buf->out_len` that determines how many bytes of the quote are copied out to guest userspace. In TDX environments with remote attestation, quotes are not considered private, and can be forwarded to an attestation server. Catch scenarios where the host specifies a response length larger than the guest's allocation, or otherwise races modifying the response while the guest consumes it. This prevents contents beyond the pages allocated for `quote_buf` (up to TSM_REPORT_OUTBLOB_MAX) from being read out to guest userspace, and possibly forwarded in attestation requests. Recall that some deployments want per-container configs-tsm-report interfaces, so the leak may cross container protection boundaries, not just local root. Fixes: f4738f56d1dc ("virt: tdx-guest: Add Quote generation support using TSM_REPORTS") Cc: stable@vger.kernel.org Signed-off-by: Zubin Mithra Reviewed-by: Dan Williams Reviewed-by: Kiryl Shutsemau (Meta) Reviewed-by: Kuppuswamy Sathyanarayanan Signed-off-by: Dan Williams --- drivers/virt/coco/tdx-guest/tdx-guest.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/drivers/virt/coco/tdx-guest/tdx-guest.c b/drivers/virt/coco/tdx-guest/tdx-guest.c index 4252b147593a..7cee97559ba2 100644 --- a/drivers/virt/coco/tdx-guest/tdx-guest.c +++ b/drivers/virt/coco/tdx-guest/tdx-guest.c @@ -171,6 +171,8 @@ static void tdx_mr_deinit(const struct attribute_group *mr_grp) #define GET_QUOTE_SUCCESS 0 #define GET_QUOTE_IN_FLIGHT 0xffffffffffffffff +#define TDX_QUOTE_MAX_LEN (GET_QUOTE_BUF_SIZE - sizeof(struct tdx_quote_buf)) + /* struct tdx_quote_buf: Format of Quote request buffer. * @version: Quote format version, filled by TD. * @status: Status code of Quote request, filled by VMM. @@ -269,6 +271,7 @@ static int tdx_report_new_locked(struct tsm_report *report, void *data) u8 *buf; struct tdx_quote_buf *quote_buf = quote_data; struct tsm_report_desc *desc = &report->desc; + u32 out_len; int ret; u64 err; @@ -306,12 +309,17 @@ static int tdx_report_new_locked(struct tsm_report *report, void *data) return ret; } - buf = kvmemdup(quote_buf->data, quote_buf->out_len, GFP_KERNEL); + out_len = READ_ONCE(quote_buf->out_len); + + if (out_len > TDX_QUOTE_MAX_LEN) + return -EFBIG; + + buf = kvmemdup(quote_buf->data, out_len, GFP_KERNEL); if (!buf) return -ENOMEM; report->outblob = buf; - report->outblob_len = quote_buf->out_len; + report->outblob_len = out_len; /* * TODO: parse the PEM-formatted cert chain out of the quote buffer when From db08b1940f4beb25460b4a4e9da3446454f2e8fe Mon Sep 17 00:00:00 2001 From: Cheng-Yang Chou Date: Sat, 21 Mar 2026 18:54:58 +0800 Subject: [PATCH 096/401] sched_ext: Fix inconsistent NUMA node lookup in scx_select_cpu_dfl() In the WAKE_SYNC path of scx_select_cpu_dfl(), waker_node was computed with cpu_to_node(), while node (for prev_cpu) was computed with scx_cpu_node_if_enabled(). When scx_builtin_idle_per_node is disabled, idle_cpumask(waker_node) is called with a real node ID even though per-node idle tracking is disabled, resulting in undefined behavior. Fix by using scx_cpu_node_if_enabled() for waker_node as well, ensuring both variables are computed consistently. Fixes: 48849271e6611 ("sched_ext: idle: Per-node idle cpumasks") Cc: stable@vger.kernel.org # v6.15+ Signed-off-by: Cheng-Yang Chou Reviewed-by: Andrea Righi Signed-off-by: Tejun Heo --- kernel/sched/ext_idle.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/sched/ext_idle.c b/kernel/sched/ext_idle.c index ba298ac3ce6c..0ae93cd64004 100644 --- a/kernel/sched/ext_idle.c +++ b/kernel/sched/ext_idle.c @@ -543,7 +543,7 @@ s32 scx_select_cpu_dfl(struct task_struct *p, s32 prev_cpu, u64 wake_flags, * piled up on it even if there is an idle core elsewhere on * the system. */ - waker_node = cpu_to_node(cpu); + waker_node = scx_cpu_node_if_enabled(cpu); if (!(current->flags & PF_EXITING) && cpu_rq(cpu)->scx.local_dsq.nr == 0 && (!(flags & SCX_PICK_IDLE_IN_NODE) || (waker_node == node)) && From c7f27a8ab9f2f43570f0725256597a0d7abe2c5b Mon Sep 17 00:00:00 2001 From: Song Liu Date: Sat, 21 Mar 2026 20:30:45 -0700 Subject: [PATCH 097/401] workqueue: Fix false positive stall reports On weakly ordered architectures (e.g., arm64), the lockless check in wq_watchdog_timer_fn() can observe a reordering between the worklist insertion and the last_progress_ts update. Specifically, the watchdog can see a non-empty worklist (from a list_add) while reading a stale last_progress_ts value, causing a false positive stall report. This was confirmed by reading pool->last_progress_ts again after holding pool->lock in wq_watchdog_timer_fn(): workqueue watchdog: pool 7 false positive detected! lockless_ts=4784580465 locked_ts=4785033728 diff=453263ms worklist_empty=0 To avoid slowing down the hot path (queue_work, etc.), recheck last_progress_ts with pool->lock held. This will eliminate the false positive with minimal overhead. Remove two extra empty lines in wq_watchdog_timer_fn() as we are on it. Fixes: 82607adcf9cd ("workqueue: implement lockup detector") Cc: stable@vger.kernel.org # v4.5+ Assisted-by: claude-code:claude-opus-4-6 Signed-off-by: Song Liu Signed-off-by: Tejun Heo --- kernel/workqueue.c | 24 +++++++++++++++++++++--- 1 file changed, 21 insertions(+), 3 deletions(-) diff --git a/kernel/workqueue.c b/kernel/workqueue.c index b77119d71641..ff97b705f25e 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -7699,8 +7699,28 @@ static void wq_watchdog_timer_fn(struct timer_list *unused) else ts = touched; - /* did we stall? */ + /* + * Did we stall? + * + * Do a lockless check first. On weakly ordered + * architectures, the lockless check can observe a + * reordering between worklist insert_work() and + * last_progress_ts update from __queue_work(). Since + * __queue_work() is a much hotter path than the timer + * function, we handle false positive here by reading + * last_progress_ts again with pool->lock held. + */ if (time_after(now, ts + thresh)) { + scoped_guard(raw_spinlock_irqsave, &pool->lock) { + pool_ts = pool->last_progress_ts; + if (time_after(pool_ts, touched)) + ts = pool_ts; + else + ts = touched; + } + if (!time_after(now, ts + thresh)) + continue; + lockup_detected = true; stall_time = jiffies_to_msecs(now - pool_ts) / 1000; max_stall_time = max(max_stall_time, stall_time); @@ -7712,8 +7732,6 @@ static void wq_watchdog_timer_fn(struct timer_list *unused) pr_cont_pool_info(pool); pr_cont(" stuck for %us!\n", stall_time); } - - } if (lockup_detected) From 4cfdfeb6ac06079f92fccd977fa742d6c5b8dd3a Mon Sep 17 00:00:00 2001 From: Luca Leonardo Scorcia Date: Wed, 25 Feb 2026 09:38:41 +0000 Subject: [PATCH 098/401] drm/mediatek: dsi: Store driver data before invoking mipi_dsi_host_register The call to mipi_dsi_host_register triggers a callback to mtk_dsi_bind, which uses dev_get_drvdata to retrieve the mtk_dsi struct, so this structure needs to be stored inside the driver data before invoking it. As drvdata is currently uninitialized it leads to a crash when registering the DSI DRM encoder right after acquiring the mode_config.idr_mutex, blocking all subsequent DRM operations. Fixes the following crash during mediatek-drm probe (tested on Xiaomi Smart Clock x04g): Unable to handle kernel NULL pointer dereference at virtual address 0000000000000040 [...] Modules linked in: mediatek_drm(+) drm_display_helper cec drm_client_lib drm_dma_helper drm_kms_helper panel_simple [...] Call trace: drm_mode_object_add+0x58/0x98 (P) __drm_encoder_init+0x48/0x140 drm_encoder_init+0x6c/0xa0 drm_simple_encoder_init+0x20/0x34 [drm_kms_helper] mtk_dsi_bind+0x34/0x13c [mediatek_drm] component_bind_all+0x120/0x280 mtk_drm_bind+0x284/0x67c [mediatek_drm] try_to_bring_up_aggregate_device+0x23c/0x320 __component_add+0xa4/0x198 component_add+0x14/0x20 mtk_dsi_host_attach+0x78/0x100 [mediatek_drm] mipi_dsi_attach+0x2c/0x50 panel_simple_dsi_probe+0x4c/0x9c [panel_simple] mipi_dsi_drv_probe+0x1c/0x28 really_probe+0xc0/0x3dc __driver_probe_device+0x80/0x160 driver_probe_device+0x40/0x120 __device_attach_driver+0xbc/0x17c bus_for_each_drv+0x88/0xf0 __device_attach+0x9c/0x1cc device_initial_probe+0x54/0x60 bus_probe_device+0x34/0xa0 device_add+0x5b0/0x800 mipi_dsi_device_register_full+0xdc/0x16c mipi_dsi_host_register+0xc4/0x17c mtk_dsi_probe+0x10c/0x260 [mediatek_drm] platform_probe+0x5c/0xa4 really_probe+0xc0/0x3dc __driver_probe_device+0x80/0x160 driver_probe_device+0x40/0x120 __driver_attach+0xc8/0x1f8 bus_for_each_dev+0x7c/0xe0 driver_attach+0x24/0x30 bus_add_driver+0x11c/0x240 driver_register+0x68/0x130 __platform_register_drivers+0x64/0x160 mtk_drm_init+0x24/0x1000 [mediatek_drm] do_one_initcall+0x60/0x1d0 do_init_module+0x54/0x240 load_module+0x1838/0x1dc0 init_module_from_file+0xd8/0xf0 __arm64_sys_finit_module+0x1b4/0x428 invoke_syscall.constprop.0+0x48/0xc8 do_el0_svc+0x3c/0xb8 el0_svc+0x34/0xe8 el0t_64_sync_handler+0xa0/0xe4 el0t_64_sync+0x198/0x19c Code: 52800022 941004ab 2a0003f3 37f80040 (29005a80) Fixes: e4732b590a77 ("drm/mediatek: dsi: Register DSI host after acquiring clocks and PHY") Signed-off-by: Luca Leonardo Scorcia Reviewed-by: AngeloGioacchino Del Regno Reviewed-by: CK Hu Link: https://patchwork.kernel.org/project/dri-devel/patch/20260225094047.76780-1-l.scorcia@gmail.com/ Signed-off-by: Chun-Kuang Hu --- drivers/gpu/drm/mediatek/mtk_dsi.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/mediatek/mtk_dsi.c b/drivers/gpu/drm/mediatek/mtk_dsi.c index 17c67f02016b..aaf6c9ebd319 100644 --- a/drivers/gpu/drm/mediatek/mtk_dsi.c +++ b/drivers/gpu/drm/mediatek/mtk_dsi.c @@ -1236,6 +1236,11 @@ static int mtk_dsi_probe(struct platform_device *pdev) dsi->host.ops = &mtk_dsi_ops; dsi->host.dev = dev; + + init_waitqueue_head(&dsi->irq_wait_queue); + + platform_set_drvdata(pdev, dsi); + ret = mipi_dsi_host_register(&dsi->host); if (ret < 0) return dev_err_probe(dev, ret, "Failed to register DSI host\n"); @@ -1247,10 +1252,6 @@ static int mtk_dsi_probe(struct platform_device *pdev) return dev_err_probe(&pdev->dev, ret, "Failed to request DSI irq\n"); } - init_waitqueue_head(&dsi->irq_wait_queue); - - platform_set_drvdata(pdev, dsi); - dsi->bridge.of_node = dev->of_node; dsi->bridge.type = DRM_MODE_CONNECTOR_DSI; From 9bbb19d21ded7d78645506f20d8c44895e3d0fb9 Mon Sep 17 00:00:00 2001 From: Hyunwoo Kim Date: Tue, 17 Mar 2026 08:52:01 +0900 Subject: [PATCH 099/401] ksmbd: do not expire session on binding failure When a multichannel session binding request fails (e.g. wrong password), the error path unconditionally sets sess->state = SMB2_SESSION_EXPIRED. However, during binding, sess points to the target session looked up via ksmbd_session_lookup_slowpath() -- which belongs to another connection's user. This allows a remote attacker to invalidate any active session by simply sending a binding request with a wrong password (DoS). Fix this by skipping session expiration when the failed request was a binding attempt, since the session does not belong to the current connection. The reference taken by ksmbd_session_lookup_slowpath() is still correctly released via ksmbd_user_session_put(). Cc: stable@vger.kernel.org Signed-off-by: Hyunwoo Kim Acked-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/server/smb2pdu.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/fs/smb/server/smb2pdu.c b/fs/smb/server/smb2pdu.c index 9c44e71e3c3b..8fa780e8efd0 100644 --- a/fs/smb/server/smb2pdu.c +++ b/fs/smb/server/smb2pdu.c @@ -1939,8 +1939,14 @@ int smb2_sess_setup(struct ksmbd_work *work) if (sess->user && sess->user->flags & KSMBD_USER_FLAG_DELAY_SESSION) try_delay = true; - sess->last_active = jiffies; - sess->state = SMB2_SESSION_EXPIRED; + /* + * For binding requests, session belongs to another + * connection. Do not expire it. + */ + if (!(req->Flags & SMB2_SESSION_REQ_FLAG_BINDING)) { + sess->last_active = jiffies; + sess->state = SMB2_SESSION_EXPIRED; + } ksmbd_user_session_put(sess); work->sess = NULL; if (try_delay) { From 48623ec358c1c600fa1e38368746f933e0f1a617 Mon Sep 17 00:00:00 2001 From: Werner Kasselman Date: Mon, 16 Mar 2026 11:38:47 +0000 Subject: [PATCH 100/401] ksmbd: fix use-after-free and NULL deref in smb_grant_oplock() smb_grant_oplock() has two issues in the oplock publication sequence: 1) opinfo is linked into ci->m_op_list (via opinfo_add) before add_lease_global_list() is called. If add_lease_global_list() fails (kmalloc returns NULL), the error path frees the opinfo via __free_opinfo() while it is still linked in ci->m_op_list. Concurrent m_op_list readers (opinfo_get_list, or direct iteration in smb_break_all_levII_oplock) dereference the freed node. 2) opinfo->o_fp is assigned after add_lease_global_list() publishes the opinfo on the global lease list. A concurrent find_same_lease_key() can walk the lease list and dereference opinfo->o_fp->f_ci while o_fp is still NULL. Fix by restructuring the publication sequence to eliminate post-publish failure: - Set opinfo->o_fp before any list publication (fixes NULL deref). - Preallocate lease_table via alloc_lease_table() before opinfo_add() so add_lease_global_list() becomes infallible after publication. - Keep the original m_op_list publication order (opinfo_add before lease list) so concurrent opens via same_client_has_lease() and opinfo_get_list() still see the in-flight grant. - Use opinfo_put() instead of __free_opinfo() on err_out so that the RCU-deferred free path is used. This also requires splitting add_lease_global_list() to take a preallocated lease_table and changing its return type from int to void, since it can no longer fail. Fixes: 1dfd062caa16 ("ksmbd: fix use-after-free by using call_rcu() for oplock_info") Cc: stable@vger.kernel.org Signed-off-by: Werner Kasselman Reviewed-by: ChenXiaoSong Acked-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/server/oplock.c | 72 ++++++++++++++++++++++++++---------------- 1 file changed, 45 insertions(+), 27 deletions(-) diff --git a/fs/smb/server/oplock.c b/fs/smb/server/oplock.c index 393a4ae47cc1..9b2bb8764a80 100644 --- a/fs/smb/server/oplock.c +++ b/fs/smb/server/oplock.c @@ -82,11 +82,19 @@ static void lease_del_list(struct oplock_info *opinfo) spin_unlock(&lb->lb_lock); } -static void lb_add(struct lease_table *lb) +static struct lease_table *alloc_lease_table(struct oplock_info *opinfo) { - write_lock(&lease_list_lock); - list_add(&lb->l_entry, &lease_table_list); - write_unlock(&lease_list_lock); + struct lease_table *lb; + + lb = kmalloc_obj(struct lease_table, KSMBD_DEFAULT_GFP); + if (!lb) + return NULL; + + memcpy(lb->client_guid, opinfo->conn->ClientGUID, + SMB2_CLIENT_GUID_SIZE); + INIT_LIST_HEAD(&lb->lease_list); + spin_lock_init(&lb->lb_lock); + return lb; } static int alloc_lease(struct oplock_info *opinfo, struct lease_ctx_info *lctx) @@ -1042,34 +1050,27 @@ static void copy_lease(struct oplock_info *op1, struct oplock_info *op2) lease2->version = lease1->version; } -static int add_lease_global_list(struct oplock_info *opinfo) +static void add_lease_global_list(struct oplock_info *opinfo, + struct lease_table *new_lb) { struct lease_table *lb; - read_lock(&lease_list_lock); + write_lock(&lease_list_lock); list_for_each_entry(lb, &lease_table_list, l_entry) { if (!memcmp(lb->client_guid, opinfo->conn->ClientGUID, SMB2_CLIENT_GUID_SIZE)) { opinfo->o_lease->l_lb = lb; lease_add_list(opinfo); - read_unlock(&lease_list_lock); - return 0; + write_unlock(&lease_list_lock); + kfree(new_lb); + return; } } - read_unlock(&lease_list_lock); - lb = kmalloc_obj(struct lease_table, KSMBD_DEFAULT_GFP); - if (!lb) - return -ENOMEM; - - memcpy(lb->client_guid, opinfo->conn->ClientGUID, - SMB2_CLIENT_GUID_SIZE); - INIT_LIST_HEAD(&lb->lease_list); - spin_lock_init(&lb->lb_lock); - opinfo->o_lease->l_lb = lb; + opinfo->o_lease->l_lb = new_lb; lease_add_list(opinfo); - lb_add(lb); - return 0; + list_add(&new_lb->l_entry, &lease_table_list); + write_unlock(&lease_list_lock); } static void set_oplock_level(struct oplock_info *opinfo, int level, @@ -1189,6 +1190,7 @@ int smb_grant_oplock(struct ksmbd_work *work, int req_op_level, u64 pid, int err = 0; struct oplock_info *opinfo = NULL, *prev_opinfo = NULL; struct ksmbd_inode *ci = fp->f_ci; + struct lease_table *new_lb = NULL; bool prev_op_has_lease; __le32 prev_op_state = 0; @@ -1291,21 +1293,37 @@ int smb_grant_oplock(struct ksmbd_work *work, int req_op_level, u64 pid, set_oplock_level(opinfo, req_op_level, lctx); out: + /* + * Set o_fp before any publication so that concurrent readers + * (e.g. find_same_lease_key() on the lease list) that + * dereference opinfo->o_fp don't hit a NULL pointer. + * + * Keep the original publication order so concurrent opens can + * still observe the in-flight grant via ci->m_op_list, but make + * everything after opinfo_add() no-fail by preallocating any new + * lease_table first. + */ + opinfo->o_fp = fp; + if (opinfo->is_lease) { + new_lb = alloc_lease_table(opinfo); + if (!new_lb) { + err = -ENOMEM; + goto err_out; + } + } + opinfo_count_inc(fp); opinfo_add(opinfo, fp); - if (opinfo->is_lease) { - err = add_lease_global_list(opinfo); - if (err) - goto err_out; - } + if (opinfo->is_lease) + add_lease_global_list(opinfo, new_lb); rcu_assign_pointer(fp->f_opinfo, opinfo); - opinfo->o_fp = fp; return 0; err_out: - __free_opinfo(opinfo); + kfree(new_lb); + opinfo_put(opinfo); return err; } From 309b44ed684496ed3f9c5715d10b899338623512 Mon Sep 17 00:00:00 2001 From: Werner Kasselman Date: Tue, 17 Mar 2026 07:55:37 +0000 Subject: [PATCH 101/401] ksmbd: fix memory leaks and NULL deref in smb2_lock() smb2_lock() has three error handling issues after list_del() detaches smb_lock from lock_list at no_check_cl: 1) If vfs_lock_file() returns an unexpected error in the non-UNLOCK path, goto out leaks smb_lock and its flock because the out: handler only iterates lock_list and rollback_list, neither of which contains the detached smb_lock. 2) If vfs_lock_file() returns -ENOENT in the UNLOCK path, goto out leaks smb_lock and flock for the same reason. The error code returned to the dispatcher is also stale. 3) In the rollback path, smb_flock_init() can return NULL on allocation failure. The result is dereferenced unconditionally, causing a kernel NULL pointer dereference. Add a NULL check to prevent the crash and clean up the bookkeeping; the VFS lock itself cannot be rolled back without the allocation and will be released at file or connection teardown. Fix cases 1 and 2 by hoisting the locks_free_lock()/kfree() to before the if(!rc) check in the UNLOCK branch so all exit paths share one free site, and by freeing smb_lock and flock before goto out in the non-UNLOCK branch. Propagate the correct error code in both cases. Fix case 3 by wrapping the VFS unlock in an if(rlock) guard and adding a NULL check for locks_free_lock(rlock) in the shared cleanup. Found via call-graph analysis using sqry. Fixes: e2f34481b24d ("cifsd: add server-side procedures for SMB3") Cc: stable@vger.kernel.org Suggested-by: ChenXiaoSong Signed-off-by: Werner Kasselman Reviewed-by: ChenXiaoSong Acked-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/server/smb2pdu.c | 27 ++++++++++++++++++--------- 1 file changed, 18 insertions(+), 9 deletions(-) diff --git a/fs/smb/server/smb2pdu.c b/fs/smb/server/smb2pdu.c index 8fa780e8efd0..24f8d58493d0 100644 --- a/fs/smb/server/smb2pdu.c +++ b/fs/smb/server/smb2pdu.c @@ -7592,14 +7592,15 @@ int smb2_lock(struct ksmbd_work *work) rc = vfs_lock_file(filp, smb_lock->cmd, flock, NULL); skip: if (smb_lock->flags & SMB2_LOCKFLAG_UNLOCK) { + locks_free_lock(flock); + kfree(smb_lock); if (!rc) { ksmbd_debug(SMB, "File unlocked\n"); } else if (rc == -ENOENT) { rsp->hdr.Status = STATUS_NOT_LOCKED; + err = rc; goto out; } - locks_free_lock(flock); - kfree(smb_lock); } else { if (rc == FILE_LOCK_DEFERRED) { void **argv; @@ -7668,6 +7669,9 @@ int smb2_lock(struct ksmbd_work *work) spin_unlock(&work->conn->llist_lock); ksmbd_debug(SMB, "successful in taking lock\n"); } else { + locks_free_lock(flock); + kfree(smb_lock); + err = rc; goto out; } } @@ -7698,13 +7702,17 @@ int smb2_lock(struct ksmbd_work *work) struct file_lock *rlock = NULL; rlock = smb_flock_init(filp); - rlock->c.flc_type = F_UNLCK; - rlock->fl_start = smb_lock->start; - rlock->fl_end = smb_lock->end; + if (rlock) { + rlock->c.flc_type = F_UNLCK; + rlock->fl_start = smb_lock->start; + rlock->fl_end = smb_lock->end; - rc = vfs_lock_file(filp, F_SETLK, rlock, NULL); - if (rc) - pr_err("rollback unlock fail : %d\n", rc); + rc = vfs_lock_file(filp, F_SETLK, rlock, NULL); + if (rc) + pr_err("rollback unlock fail : %d\n", rc); + } else { + pr_err("rollback unlock alloc failed\n"); + } list_del(&smb_lock->llist); spin_lock(&work->conn->llist_lock); @@ -7714,7 +7722,8 @@ int smb2_lock(struct ksmbd_work *work) spin_unlock(&work->conn->llist_lock); locks_free_lock(smb_lock->fl); - locks_free_lock(rlock); + if (rlock) + locks_free_lock(rlock); kfree(smb_lock); } out2: From 0e55f63dd08f09651d39e1b709a91705a8a0ddcb Mon Sep 17 00:00:00 2001 From: Namjae Jeon Date: Fri, 13 Mar 2026 14:45:58 +0900 Subject: [PATCH 102/401] ksmbd: replace hardcoded hdr2_len with offsetof() in smb2_calc_max_out_buf_len() After this commit (e2b76ab8b5c9 "ksmbd: add support for read compound"), response buffer management was changed to use dynamic iov array. In the new design, smb2_calc_max_out_buf_len() expects the second argument (hdr2_len) to be the offset of ->Buffer field in the response structure, not a hardcoded magic number. Fix the remaining call sites to use the correct offsetof() value. Cc: stable@vger.kernel.org Fixes: e2b76ab8b5c9 ("ksmbd: add support for read compound") Signed-off-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/server/smb2pdu.c | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/fs/smb/server/smb2pdu.c b/fs/smb/server/smb2pdu.c index 24f8d58493d0..f5f1bf5f642e 100644 --- a/fs/smb/server/smb2pdu.c +++ b/fs/smb/server/smb2pdu.c @@ -4452,8 +4452,9 @@ int smb2_query_dir(struct ksmbd_work *work) d_info.wptr = (char *)rsp->Buffer; d_info.rptr = (char *)rsp->Buffer; d_info.out_buf_len = - smb2_calc_max_out_buf_len(work, 8, - le32_to_cpu(req->OutputBufferLength)); + smb2_calc_max_out_buf_len(work, + offsetof(struct smb2_query_directory_rsp, Buffer), + le32_to_cpu(req->OutputBufferLength)); if (d_info.out_buf_len < 0) { rc = -EINVAL; goto err_out; @@ -4720,8 +4721,9 @@ static int smb2_get_ea(struct ksmbd_work *work, struct ksmbd_file *fp, } buf_free_len = - smb2_calc_max_out_buf_len(work, 8, - le32_to_cpu(req->OutputBufferLength)); + smb2_calc_max_out_buf_len(work, + offsetof(struct smb2_query_info_rsp, Buffer), + le32_to_cpu(req->OutputBufferLength)); if (buf_free_len < 0) return -EINVAL; @@ -5047,8 +5049,9 @@ static int get_file_stream_info(struct ksmbd_work *work, file_info = (struct smb2_file_stream_info *)rsp->Buffer; buf_free_len = - smb2_calc_max_out_buf_len(work, 8, - le32_to_cpu(req->OutputBufferLength)); + smb2_calc_max_out_buf_len(work, + offsetof(struct smb2_query_info_rsp, Buffer), + le32_to_cpu(req->OutputBufferLength)); if (buf_free_len < 0) goto out; @@ -8206,8 +8209,9 @@ int smb2_ioctl(struct ksmbd_work *work) buffer = (char *)req + le32_to_cpu(req->InputOffset); cnt_code = le32_to_cpu(req->CtlCode); - ret = smb2_calc_max_out_buf_len(work, 48, - le32_to_cpu(req->MaxOutputResponse)); + ret = smb2_calc_max_out_buf_len(work, + offsetof(struct smb2_ioctl_rsp, Buffer), + le32_to_cpu(req->MaxOutputResponse)); if (ret < 0) { rsp->hdr.Status = STATUS_INVALID_PARAMETER; goto out; From 08441f10f4dc09fdeb64529953ac308abc79dd38 Mon Sep 17 00:00:00 2001 From: Samasth Norway Ananda Date: Mon, 16 Mar 2026 16:19:19 -0700 Subject: [PATCH 103/401] drm/i915/gmbus: fix spurious timeout on 512-byte burst reads When reading exactly 512 bytes with burst read enabled, the extra_byte_added path breaks out of the inner do-while without decrementing len. The outer while(len) then re-enters and gmbus_wait() times out since all data has been delivered. Decrement len before the break so the outer loop terminates correctly. Fixes: d5dc0f43f268 ("drm/i915/gmbus: Enable burst read") Signed-off-by: Samasth Norway Ananda Reviewed-by: Jani Nikula Link: https://patch.msgid.link/20260316231920.135438-2-samasth.norway.ananda@oracle.com Signed-off-by: Jani Nikula (cherry picked from commit 4ab0f09ee73fc853d00466682635f67c531f909c) Signed-off-by: Joonas Lahtinen --- drivers/gpu/drm/i915/display/intel_gmbus.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/display/intel_gmbus.c b/drivers/gpu/drm/i915/display/intel_gmbus.c index a7bce0c6a17e..264e6843bff1 100644 --- a/drivers/gpu/drm/i915/display/intel_gmbus.c +++ b/drivers/gpu/drm/i915/display/intel_gmbus.c @@ -496,8 +496,10 @@ gmbus_xfer_read_chunk(struct intel_display *display, val = intel_de_read_fw(display, GMBUS3(display)); do { - if (extra_byte_added && len == 1) + if (extra_byte_added && len == 1) { + len--; break; + } *buf++ = val & 0xff; val >>= 8; From 6ad2a661ff0d3d94884947d2a593311ba46d34c2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Fri, 13 Mar 2026 13:07:40 +0200 Subject: [PATCH 104/401] drm/i915: Order OP vs. timeout correctly in __wait_for() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Put the barrier() before the OP so that anything we read out in OP and check in COND will actually be read out after the timeout has been evaluated. Currently the only place where we use OP is __intel_wait_for_register(), but the use there is precisely susceptible to this reordering, assuming the ktime_*() stuff itself doesn't act as a sufficient barrier: __intel_wait_for_register(...) { ... ret = __wait_for(reg_value = intel_uncore_read_notrace(...), (reg_value & mask) == value, ...); ... } Cc: stable@vger.kernel.org Fixes: 1c3c1dc66a96 ("drm/i915: Add compiler barrier to wait_for") Signed-off-by: Ville Syrjälä Link: https://patch.msgid.link/20260313110740.24620-1-ville.syrjala@linux.intel.com Reviewed-by: Jani Nikula (cherry picked from commit a464bace0482aa9a83e9aa7beefbaf44cd58e6cf) Signed-off-by: Joonas Lahtinen --- drivers/gpu/drm/i915/i915_wait_util.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_wait_util.h b/drivers/gpu/drm/i915/i915_wait_util.h index 7376898e3bf8..e1ed7921ec70 100644 --- a/drivers/gpu/drm/i915/i915_wait_util.h +++ b/drivers/gpu/drm/i915/i915_wait_util.h @@ -25,9 +25,9 @@ might_sleep(); \ for (;;) { \ const bool expired__ = ktime_after(ktime_get_raw(), end__); \ - OP; \ /* Guarantee COND check prior to timeout */ \ barrier(); \ + OP; \ if (COND) { \ ret__ = 0; \ break; \ From bfa71b7a9dc6b5b8af157686e03308291141d00c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Mon, 16 Mar 2026 18:39:51 +0200 Subject: [PATCH 105/401] drm/i915: Unlink NV12 planes earlier MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit unlink_nv12_plane() will clobber parts of the plane state potentially already set up by plane_atomic_check(), so we must make sure not to call the two in the wrong order. The problem happens when a plane previously selected as a Y plane is now configured as a normal plane by user space. plane_atomic_check() will first compute the proper plane state based on the userspace request, and unlink_nv12_plane() later clears some of the state. This used to work on account of unlink_nv12_plane() skipping the state clearing based on the plane visibility. But I removed that check, thinking it was an impossible situation. Now when that situation happens unlink_nv12_plane() will just WARN and proceed to clobber the state. Rather than reverting to the old way of doing things, I think it's more clear if we unlink the NV12 planes before we even compute the new plane state. Cc: stable@vger.kernel.org Reported-by: Khaled Almahallawy Closes: https://lore.kernel.org/intel-gfx/20260212004852.1920270-1-khaled.almahallawy@intel.com/ Tested-by: Khaled Almahallawy Fixes: 6a01df2f1b2a ("drm/i915: Remove pointless visible check in unlink_nv12_plane()") Signed-off-by: Ville Syrjälä Link: https://patch.msgid.link/20260316163953.12905-2-ville.syrjala@linux.intel.com Reviewed-by: Uma Shankar (cherry picked from commit 017ecd04985573eeeb0745fa2c23896fb22ee0cc) Signed-off-by: Joonas Lahtinen --- drivers/gpu/drm/i915/display/intel_plane.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_plane.c b/drivers/gpu/drm/i915/display/intel_plane.c index e06a0618b4c6..076b9b356481 100644 --- a/drivers/gpu/drm/i915/display/intel_plane.c +++ b/drivers/gpu/drm/i915/display/intel_plane.c @@ -436,11 +436,16 @@ void intel_plane_copy_hw_state(struct intel_plane_state *plane_state, drm_framebuffer_get(plane_state->hw.fb); } +static void unlink_nv12_plane(struct intel_crtc_state *crtc_state, + struct intel_plane_state *plane_state); + void intel_plane_set_invisible(struct intel_crtc_state *crtc_state, struct intel_plane_state *plane_state) { struct intel_plane *plane = to_intel_plane(plane_state->uapi.plane); + unlink_nv12_plane(crtc_state, plane_state); + crtc_state->active_planes &= ~BIT(plane->id); crtc_state->scaled_planes &= ~BIT(plane->id); crtc_state->nv12_planes &= ~BIT(plane->id); @@ -1513,6 +1518,9 @@ static void unlink_nv12_plane(struct intel_crtc_state *crtc_state, struct intel_display *display = to_intel_display(plane_state); struct intel_plane *plane = to_intel_plane(plane_state->uapi.plane); + if (!plane_state->planar_linked_plane) + return; + plane_state->planar_linked_plane = NULL; if (!plane_state->is_y_plane) @@ -1550,8 +1558,7 @@ static int icl_check_nv12_planes(struct intel_atomic_state *state, if (plane->pipe != crtc->pipe) continue; - if (plane_state->planar_linked_plane) - unlink_nv12_plane(crtc_state, plane_state); + unlink_nv12_plane(crtc_state, plane_state); } if (!crtc_state->nv12_planes) From e942498385bf80f4d6d075b47174035545eb6a2e Mon Sep 17 00:00:00 2001 From: Long Li Date: Tue, 17 Mar 2026 09:51:52 +0800 Subject: [PATCH 106/401] xfs: only assert new size for datafork during truncate extents The assertion functions properly because we currently only truncate the attr to a zero size. Any other new size of the attr is not preempted. Make this assertion is specific to the datafork, preparing for subsequent patches to truncate the attribute to a non-zero size. Reviewed-by: Darrick J. Wong Signed-off-by: Long Li Signed-off-by: Carlos Maiolino --- fs/xfs/xfs_inode.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index 50c0404f9064..beaa26ec62da 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -1048,7 +1048,8 @@ xfs_itruncate_extents_flags( xfs_assert_ilocked(ip, XFS_ILOCK_EXCL); if (icount_read(VFS_I(ip))) xfs_assert_ilocked(ip, XFS_IOLOCK_EXCL); - ASSERT(new_size <= XFS_ISIZE(ip)); + if (whichfork == XFS_DATA_FORK) + ASSERT(new_size <= XFS_ISIZE(ip)); ASSERT(tp->t_flags & XFS_TRANS_PERM_LOG_RES); ASSERT(ip->i_itemp != NULL); ASSERT(ip->i_itemp->ili_lock_flags == 0); From ce4e789cf3561c9fac73cc24445bfed9ea0c514b Mon Sep 17 00:00:00 2001 From: Long Li Date: Tue, 17 Mar 2026 09:51:53 +0800 Subject: [PATCH 107/401] xfs: factor out xfs_attr3_node_entry_remove Factor out wrapper xfs_attr3_node_entry_remove function, which exported for external use. Reviewed-by: Darrick J. Wong Signed-off-by: Long Li Signed-off-by: Carlos Maiolino --- fs/xfs/libxfs/xfs_da_btree.c | 53 ++++++++++++++++++++++++++++-------- fs/xfs/libxfs/xfs_da_btree.h | 2 ++ 2 files changed, 44 insertions(+), 11 deletions(-) diff --git a/fs/xfs/libxfs/xfs_da_btree.c b/fs/xfs/libxfs/xfs_da_btree.c index 09d4c17b3e7b..ad801b7bd2dd 100644 --- a/fs/xfs/libxfs/xfs_da_btree.c +++ b/fs/xfs/libxfs/xfs_da_btree.c @@ -1506,21 +1506,20 @@ xfs_da3_fixhashpath( } /* - * Remove an entry from an intermediate node. + * Internal implementation to remove an entry from an intermediate node. */ STATIC void -xfs_da3_node_remove( - struct xfs_da_state *state, - struct xfs_da_state_blk *drop_blk) +__xfs_da3_node_remove( + struct xfs_trans *tp, + struct xfs_inode *dp, + struct xfs_da_geometry *geo, + struct xfs_da_state_blk *drop_blk) { struct xfs_da_intnode *node; struct xfs_da3_icnode_hdr nodehdr; struct xfs_da_node_entry *btree; int index; int tmp; - struct xfs_inode *dp = state->args->dp; - - trace_xfs_da_node_remove(state->args); node = drop_blk->bp->b_addr; xfs_da3_node_hdr_from_disk(dp->i_mount, &nodehdr, node); @@ -1536,17 +1535,17 @@ xfs_da3_node_remove( tmp = nodehdr.count - index - 1; tmp *= (uint)sizeof(xfs_da_node_entry_t); memmove(&btree[index], &btree[index + 1], tmp); - xfs_trans_log_buf(state->args->trans, drop_blk->bp, + xfs_trans_log_buf(tp, drop_blk->bp, XFS_DA_LOGRANGE(node, &btree[index], tmp)); index = nodehdr.count - 1; } memset(&btree[index], 0, sizeof(xfs_da_node_entry_t)); - xfs_trans_log_buf(state->args->trans, drop_blk->bp, + xfs_trans_log_buf(tp, drop_blk->bp, XFS_DA_LOGRANGE(node, &btree[index], sizeof(btree[index]))); nodehdr.count -= 1; xfs_da3_node_hdr_to_disk(dp->i_mount, node, &nodehdr); - xfs_trans_log_buf(state->args->trans, drop_blk->bp, - XFS_DA_LOGRANGE(node, &node->hdr, state->args->geo->node_hdr_size)); + xfs_trans_log_buf(tp, drop_blk->bp, + XFS_DA_LOGRANGE(node, &node->hdr, geo->node_hdr_size)); /* * Copy the last hash value from the block to propagate upwards. @@ -1554,6 +1553,38 @@ xfs_da3_node_remove( drop_blk->hashval = be32_to_cpu(btree[index - 1].hashval); } +/* + * Remove an entry from an intermediate node. + */ +STATIC void +xfs_da3_node_remove( + struct xfs_da_state *state, + struct xfs_da_state_blk *drop_blk) +{ + trace_xfs_da_node_remove(state->args); + + __xfs_da3_node_remove(state->args->trans, state->args->dp, + state->args->geo, drop_blk); +} + +/* + * Remove an entry from an intermediate attr node at the specified index. + */ +void +xfs_attr3_node_entry_remove( + struct xfs_trans *tp, + struct xfs_inode *dp, + struct xfs_buf *bp, + int index) +{ + struct xfs_da_state_blk blk = { + .index = index, + .bp = bp, + }; + + __xfs_da3_node_remove(tp, dp, dp->i_mount->m_attr_geo, &blk); +} + /* * Unbalance the elements between two intermediate nodes, * move all Btree elements from one node into another. diff --git a/fs/xfs/libxfs/xfs_da_btree.h b/fs/xfs/libxfs/xfs_da_btree.h index 354d5d65043e..afcf2d3c7a21 100644 --- a/fs/xfs/libxfs/xfs_da_btree.h +++ b/fs/xfs/libxfs/xfs_da_btree.h @@ -184,6 +184,8 @@ int xfs_da3_split(xfs_da_state_t *state); int xfs_da3_join(xfs_da_state_t *state); void xfs_da3_fixhashpath(struct xfs_da_state *state, struct xfs_da_state_path *path_to_to_fix); +void xfs_attr3_node_entry_remove(struct xfs_trans *tp, struct xfs_inode *dp, + struct xfs_buf *bp, int index); /* * Routines used for finding things in the Btree. From e65bb55d7f8c2041c8fdb73cd29b0b4cad4ed847 Mon Sep 17 00:00:00 2001 From: Long Li Date: Tue, 17 Mar 2026 09:51:54 +0800 Subject: [PATCH 108/401] xfs: factor out xfs_attr3_leaf_init Factor out wrapper xfs_attr3_leaf_init function, which exported for external use. Reviewed-by: Darrick J. Wong Signed-off-by: Long Li Signed-off-by: Carlos Maiolino --- fs/xfs/libxfs/xfs_attr_leaf.c | 22 ++++++++++++++++++++++ fs/xfs/libxfs/xfs_attr_leaf.h | 3 +++ 2 files changed, 25 insertions(+) diff --git a/fs/xfs/libxfs/xfs_attr_leaf.c b/fs/xfs/libxfs/xfs_attr_leaf.c index 47f48ae555c0..2b78041e8672 100644 --- a/fs/xfs/libxfs/xfs_attr_leaf.c +++ b/fs/xfs/libxfs/xfs_attr_leaf.c @@ -1415,6 +1415,28 @@ xfs_attr3_leaf_create( return 0; } +/* + * Reinitialize an existing attr fork block as an empty leaf, and attach + * the buffer to tp. + */ +int +xfs_attr3_leaf_init( + struct xfs_trans *tp, + struct xfs_inode *dp, + xfs_dablk_t blkno) +{ + struct xfs_buf *bp = NULL; + struct xfs_da_args args = { + .trans = tp, + .dp = dp, + .owner = dp->i_ino, + .geo = dp->i_mount->m_attr_geo, + }; + + ASSERT(tp != NULL); + + return xfs_attr3_leaf_create(&args, blkno, &bp); +} /* * Split the leaf node, rebalance, then add the new entry. * diff --git a/fs/xfs/libxfs/xfs_attr_leaf.h b/fs/xfs/libxfs/xfs_attr_leaf.h index aca46da2bc50..72639efe6ac3 100644 --- a/fs/xfs/libxfs/xfs_attr_leaf.h +++ b/fs/xfs/libxfs/xfs_attr_leaf.h @@ -87,6 +87,9 @@ int xfs_attr3_leaf_list_int(struct xfs_buf *bp, /* * Routines used for shrinking the Btree. */ + +int xfs_attr3_leaf_init(struct xfs_trans *tp, struct xfs_inode *dp, + xfs_dablk_t blkno); int xfs_attr3_leaf_toosmall(struct xfs_da_state *state, int *retval); void xfs_attr3_leaf_unbalance(struct xfs_da_state *state, struct xfs_da_state_blk *drop_blk, From b854e1c4eff3473b6d3a9ae74129ac5c48bc0b61 Mon Sep 17 00:00:00 2001 From: Long Li Date: Tue, 17 Mar 2026 09:51:55 +0800 Subject: [PATCH 109/401] xfs: close crash window in attr dabtree inactivation When inactivating an inode with node-format extended attributes, xfs_attr3_node_inactive() invalidates all child leaf/node blocks via xfs_trans_binval(), but intentionally does not remove the corresponding entries from their parent node blocks. The implicit assumption is that xfs_attr_inactive() will truncate the entire attr fork to zero extents afterwards, so log recovery will never reach the root node and follow those stale pointers. However, if a log shutdown occurs after the leaf/node block cancellations commit but before the attr bmap truncation commits, this assumption breaks. Recovery replays the attr bmap intact (the inode still has attr fork extents), but suppresses replay of all cancelled leaf/node blocks, maybe leaving them as stale data on disk. On the next mount, xlog_recover_process_iunlinks() retries inactivation and attempts to read the root node via the attr bmap. If the root node was not replayed, reading the unreplayed root block triggers a metadata verification failure immediately; if it was replayed, following its child pointers to unreplayed child blocks triggers the same failure: XFS (pmem0): Metadata corruption detected at xfs_da3_node_read_verify+0x53/0x220, xfs_da3_node block 0x78 XFS (pmem0): Unmount and run xfs_repair XFS (pmem0): First 128 bytes of corrupted metadata buffer: 00000000: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ 00000010: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ 00000020: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ 00000030: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ 00000040: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ 00000050: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ 00000060: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ 00000070: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ XFS (pmem0): metadata I/O error in "xfs_da_read_buf+0x104/0x190" at daddr 0x78 len 8 error 117 Fix this in two places: In xfs_attr3_node_inactive(), after calling xfs_trans_binval() on a child block, immediately remove the entry that references it from the parent node in the same transaction. This eliminates the window where the parent holds a pointer to a cancelled block. Once all children are removed, the now-empty root node is converted to a leaf block within the same transaction. This node-to-leaf conversion is necessary for crash safety. If the system shutdown after the empty node is written to the log but before the second-phase bmap truncation commits, log recovery will attempt to verify the root block on disk. xfs_da3_node_verify() does not permit a node block with count == 0; such a block will fail verification and trigger a metadata corruption shutdown. on the other hand, leaf blocks are allowed to have this transient state. In xfs_attr_inactive(), split the attr fork truncation into two explicit phases. First, truncate all extents beyond the root block (the child extents whose parent references have already been removed above). Second, invalidate the root block and truncate the attr bmap to zero in a single transaction. The two operations in the second phase must be atomic: as long as the attr bmap has any non-zero length, recovery can follow it to the root block, so the root block invalidation must commit together with the bmap-to-zero truncation. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Cc: stable@vger.kernel.org Signed-off-by: Long Li Reviewed-by: Darrick J. Wong Signed-off-by: Carlos Maiolino --- fs/xfs/xfs_attr_inactive.c | 99 +++++++++++++++++++++++--------------- 1 file changed, 59 insertions(+), 40 deletions(-) diff --git a/fs/xfs/xfs_attr_inactive.c b/fs/xfs/xfs_attr_inactive.c index 92331991f9fd..a5b69c0fbfd0 100644 --- a/fs/xfs/xfs_attr_inactive.c +++ b/fs/xfs/xfs_attr_inactive.c @@ -140,7 +140,7 @@ xfs_attr3_node_inactive( xfs_daddr_t parent_blkno, child_blkno; struct xfs_buf *child_bp; struct xfs_da3_icnode_hdr ichdr; - int error, i; + int error; /* * Since this code is recursive (gasp!) we must protect ourselves. @@ -152,7 +152,7 @@ xfs_attr3_node_inactive( return -EFSCORRUPTED; } - xfs_da3_node_hdr_from_disk(dp->i_mount, &ichdr, bp->b_addr); + xfs_da3_node_hdr_from_disk(mp, &ichdr, bp->b_addr); parent_blkno = xfs_buf_daddr(bp); if (!ichdr.count) { xfs_trans_brelse(*trans, bp); @@ -167,7 +167,7 @@ xfs_attr3_node_inactive( * over the leaves removing all of them. If this is higher up * in the tree, recurse downward. */ - for (i = 0; i < ichdr.count; i++) { + while (ichdr.count > 0) { /* * Read the subsidiary block to see what we have to work with. * Don't do this in a transaction. This is a depth-first @@ -218,29 +218,32 @@ xfs_attr3_node_inactive( xfs_trans_binval(*trans, child_bp); child_bp = NULL; - /* - * If we're not done, re-read the parent to get the next - * child block number. - */ - if (i + 1 < ichdr.count) { - struct xfs_da3_icnode_hdr phdr; + error = xfs_da3_node_read_mapped(*trans, dp, + parent_blkno, &bp, XFS_ATTR_FORK); + if (error) + return error; - error = xfs_da3_node_read_mapped(*trans, dp, - parent_blkno, &bp, XFS_ATTR_FORK); + /* + * Remove entry from parent node, prevents being indexed to. + */ + xfs_attr3_node_entry_remove(*trans, dp, bp, 0); + + xfs_da3_node_hdr_from_disk(mp, &ichdr, bp->b_addr); + bp = NULL; + + if (ichdr.count > 0) { + /* + * If we're not done, get the next child block number. + */ + child_fsb = be32_to_cpu(ichdr.btree[0].before); + + /* + * Atomically commit the whole invalidate stuff. + */ + error = xfs_trans_roll_inode(trans, dp); if (error) return error; - xfs_da3_node_hdr_from_disk(dp->i_mount, &phdr, - bp->b_addr); - child_fsb = be32_to_cpu(phdr.btree[i + 1].before); - xfs_trans_brelse(*trans, bp); - bp = NULL; } - /* - * Atomically commit the whole invalidate stuff. - */ - error = xfs_trans_roll_inode(trans, dp); - if (error) - return error; } return 0; @@ -257,10 +260,8 @@ xfs_attr3_root_inactive( struct xfs_trans **trans, struct xfs_inode *dp) { - struct xfs_mount *mp = dp->i_mount; struct xfs_da_blkinfo *info; struct xfs_buf *bp; - xfs_daddr_t blkno; int error; /* @@ -272,7 +273,6 @@ xfs_attr3_root_inactive( error = xfs_da3_node_read(*trans, dp, 0, &bp, XFS_ATTR_FORK); if (error) return error; - blkno = xfs_buf_daddr(bp); /* * Invalidate the tree, even if the "tree" is only a single leaf block. @@ -283,10 +283,26 @@ xfs_attr3_root_inactive( case cpu_to_be16(XFS_DA_NODE_MAGIC): case cpu_to_be16(XFS_DA3_NODE_MAGIC): error = xfs_attr3_node_inactive(trans, dp, bp, 1); + /* + * Empty root node block are not allowed, convert it to leaf. + */ + if (!error) + error = xfs_attr3_leaf_init(*trans, dp, 0); + if (!error) + error = xfs_trans_roll_inode(trans, dp); break; case cpu_to_be16(XFS_ATTR_LEAF_MAGIC): case cpu_to_be16(XFS_ATTR3_LEAF_MAGIC): error = xfs_attr3_leaf_inactive(trans, dp, bp); + /* + * Reinit the leaf before truncating extents so that a crash + * mid-truncation leaves an empty leaf rather than one with + * entries that may reference freed remote value blocks. + */ + if (!error) + error = xfs_attr3_leaf_init(*trans, dp, 0); + if (!error) + error = xfs_trans_roll_inode(trans, dp); break; default: xfs_dirattr_mark_sick(dp, XFS_ATTR_FORK); @@ -295,21 +311,6 @@ xfs_attr3_root_inactive( xfs_trans_brelse(*trans, bp); break; } - if (error) - return error; - - /* - * Invalidate the incore copy of the root block. - */ - error = xfs_trans_get_buf(*trans, mp->m_ddev_targp, blkno, - XFS_FSB_TO_BB(mp, mp->m_attr_geo->fsbcount), 0, &bp); - if (error) - return error; - xfs_trans_binval(*trans, bp); /* remove from cache */ - /* - * Commit the invalidate and start the next transaction. - */ - error = xfs_trans_roll_inode(trans, dp); return error; } @@ -328,6 +329,7 @@ xfs_attr_inactive( { struct xfs_trans *trans; struct xfs_mount *mp; + struct xfs_buf *bp; int lock_mode = XFS_ILOCK_SHARED; int error = 0; @@ -363,10 +365,27 @@ xfs_attr_inactive( * removal below. */ if (dp->i_af.if_nextents > 0) { + /* + * Invalidate and truncate all blocks but leave the root block. + */ error = xfs_attr3_root_inactive(&trans, dp); if (error) goto out_cancel; + error = xfs_itruncate_extents(&trans, dp, XFS_ATTR_FORK, + XFS_FSB_TO_B(mp, mp->m_attr_geo->fsbcount)); + if (error) + goto out_cancel; + + /* + * Invalidate and truncate the root block and ensure that the + * operation is completed within a single transaction. + */ + error = xfs_da_get_buf(trans, dp, 0, &bp, XFS_ATTR_FORK); + if (error) + goto out_cancel; + + xfs_trans_binval(trans, bp); error = xfs_itruncate_extents(&trans, dp, XFS_ATTR_FORK, 0); if (error) goto out_cancel; From d72f2084e30966097c8eae762e31986a33c3c0ae Mon Sep 17 00:00:00 2001 From: Long Li Date: Fri, 20 Mar 2026 10:11:29 +0800 Subject: [PATCH 110/401] xfs: fix ri_total validation in xlog_recover_attri_commit_pass2 The ri_total checks for SET/REPLACE operations are hardcoded to 3, but xfs_attri_item_size() only emits a value iovec when value_len > 0, so ri_total is 2 when value_len == 0. For PPTR_SET/PPTR_REMOVE/PPTR_REPLACE, value_len is validated by xfs_attri_validate() to be exactly sizeof(struct xfs_parent_rec) and is never zero, so their hardcoded checks remain correct. This problem may cause log recovery failures. The following script can be used to reproduce the problem: #!/bin/bash mkfs.xfs -f /dev/sda mount /dev/sda /mnt/test/ touch /mnt/test/file for i in {1..200}; do attr -s "user.attr_$i" -V "value_$i" /mnt/test/file > /dev/null done echo 1 > /sys/fs/xfs/debug/larp echo 1 > /sys/fs/xfs/sda/errortag/larp attr -s "user.zero" -V "" /mnt/test/file echo 0 > /sys/fs/xfs/sda/errortag/larp umount /mnt/test mount /dev/sda /mnt/test/ # mount failed Fix this by deriving the expected count dynamically as "2 + !!value_len" for SET/REPLACE operations. Cc: stable@vger.kernel.org # v6.9 Fixes: ad206ae50eca ("xfs: check opcode and iovec count match in xlog_recover_attri_commit_pass2") Reviewed-by: Darrick J. Wong Signed-off-by: Long Li Reviewed-by: Christoph Hellwig Signed-off-by: Carlos Maiolino --- fs/xfs/xfs_attr_item.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/xfs/xfs_attr_item.c b/fs/xfs/xfs_attr_item.c index 354472bf45f1..83d09635b200 100644 --- a/fs/xfs/xfs_attr_item.c +++ b/fs/xfs/xfs_attr_item.c @@ -1047,8 +1047,8 @@ xlog_recover_attri_commit_pass2( break; case XFS_ATTRI_OP_FLAGS_SET: case XFS_ATTRI_OP_FLAGS_REPLACE: - /* Log item, attr name, attr value */ - if (item->ri_total != 3) { + /* Log item, attr name, optional attr value */ + if (item->ri_total != 2 + !!attri_formatp->alfi_value_len) { XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, attri_formatp, len); return -EFSCORRUPTED; From c6c56ff975f046be25f527231a239e37920aca5e Mon Sep 17 00:00:00 2001 From: Long Li Date: Fri, 20 Mar 2026 10:11:30 +0800 Subject: [PATCH 111/401] xfs: remove redundant validation in xlog_recover_attri_commit_pass2 Remove the redundant post-parse validation switch. By the time that block is reached, xfs_attri_validate() has already guaranteed all name lengths are non-zero via xfs_attri_validate_namelen(), and xfs_attri_validate_name_iovec() has already returned -EFSCORRUPTED for NULL names. For the REMOVE case, attr_value and value_len are structurally guaranteed to be NULL/zero because the parsing loop only populates them when value_len != 0. All checks in that switch are therefore dead code. Reviewed-by: Darrick J. Wong Signed-off-by: Long Li Reviewed-by: Christoph Hellwig Signed-off-by: Carlos Maiolino --- fs/xfs/xfs_attr_item.c | 46 ------------------------------------------ 1 file changed, 46 deletions(-) diff --git a/fs/xfs/xfs_attr_item.c b/fs/xfs/xfs_attr_item.c index 83d09635b200..82324f42537b 100644 --- a/fs/xfs/xfs_attr_item.c +++ b/fs/xfs/xfs_attr_item.c @@ -1132,52 +1132,6 @@ xlog_recover_attri_commit_pass2( return -EFSCORRUPTED; } - switch (op) { - case XFS_ATTRI_OP_FLAGS_REMOVE: - /* Regular remove operations operate only on names. */ - if (attr_value != NULL || value_len != 0) { - XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, - attri_formatp, len); - return -EFSCORRUPTED; - } - fallthrough; - case XFS_ATTRI_OP_FLAGS_PPTR_REMOVE: - case XFS_ATTRI_OP_FLAGS_PPTR_SET: - case XFS_ATTRI_OP_FLAGS_SET: - case XFS_ATTRI_OP_FLAGS_REPLACE: - /* - * Regular xattr set/remove/replace operations require a name - * and do not take a newname. Values are optional for set and - * replace. - * - * Name-value set/remove operations must have a name, do not - * take a newname, and can take a value. - */ - if (attr_name == NULL || name_len == 0) { - XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, - attri_formatp, len); - return -EFSCORRUPTED; - } - break; - case XFS_ATTRI_OP_FLAGS_PPTR_REPLACE: - /* - * Name-value replace operations require the caller to - * specify the old and new names and values explicitly. - * Values are optional. - */ - if (attr_name == NULL || name_len == 0) { - XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, - attri_formatp, len); - return -EFSCORRUPTED; - } - if (attr_new_name == NULL || new_name_len == 0) { - XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, - attri_formatp, len); - return -EFSCORRUPTED; - } - break; - } - /* * Memory alloc failure will cause replay to abort. We attach the * name/value buffer to the recovered incore log item and drop our From 05243d490bb7852a8acca7b5b5658019c7797a52 Mon Sep 17 00:00:00 2001 From: Nikunj A Dadhania Date: Wed, 18 Mar 2026 07:56:52 +0000 Subject: [PATCH 112/401] x86/cpu: Enable FSGSBASE early in cpu_init_exception_handling() Move FSGSBASE enablement from identify_cpu() to cpu_init_exception_handling() to ensure it is enabled before any exceptions can occur on both boot and secondary CPUs. == Background == Exception entry code (paranoid_entry()) uses ALTERNATIVE patching based on X86_FEATURE_FSGSBASE to decide whether to use RDGSBASE/WRGSBASE instructions or the slower RDMSR/SWAPGS sequence for saving/restoring GSBASE. On boot CPU, ALTERNATIVE patching happens after enabling FSGSBASE in CR4. When the feature is available, the code is permanently patched to use RDGSBASE/WRGSBASE, which require CR4.FSGSBASE=1 to execute without triggering == Boot Sequence == Boot CPU (with CR pinning enabled): trap_init() cpu_init() <- Uses unpatched code (RDMSR/SWAPGS) x2apic_setup() ... arch_cpu_finalize_init() identify_boot_cpu() identify_cpu() cr4_set_bits(X86_CR4_FSGSBASE) # Enables the feature # This becomes part of cr4_pinned_bits ... alternative_instructions() <- Patches code to use RDGSBASE/WRGSBASE Secondary CPUs (with CR pinning enabled): start_secondary() cr4_init() <- Code already patched, CR4.FSGSBASE=1 set implicitly via cr4_pinned_bits cpu_init() <- exceptions work because FSGSBASE is already enabled Secondary CPU (with CR pinning disabled): start_secondary() cr4_init() <- Code already patched, CR4.FSGSBASE=0 cpu_init() x2apic_setup() rdmsrq(MSR_IA32_APICBASE) <- Triggers #VC in SNP guests exc_vmm_communication() paranoid_entry() <- Uses RDGSBASE with CR4.FSGSBASE=0 (patched code) ... ap_starting() identify_secondary_cpu() identify_cpu() cr4_set_bits(X86_CR4_FSGSBASE) <- Enables the feature, which is too late == CR Pinning == Currently, for secondary CPUs, CR4.FSGSBASE is set implicitly through CR-pinning: the boot CPU sets it during identify_cpu(), it becomes part of cr4_pinned_bits, and cr4_init() applies those pinned bits to secondary CPUs. This works but creates an undocumented dependency between cr4_init() and the pinning mechanism. == Problem == Secondary CPUs boot after alternatives have been applied globally. They execute already-patched paranoid_entry() code that uses RDGSBASE/WRGSBASE instructions, which require CR4.FSGSBASE=1. Upcoming changes to CR pinning behavior will break the implicit dependency, causing secondary CPUs to generate #UD. This issue manifests itself on AMD SEV-SNP guests, where the rdmsrq() in x2apic_setup() triggers a #VC exception early during cpu_init(). The #VC handler (exc_vmm_communication()) executes the patched paranoid_entry() path. Without CR4.FSGSBASE enabled, RDGSBASE instructions trigger #UD. == Fix == Enable FSGSBASE explicitly in cpu_init_exception_handling() before loading exception handlers. This makes the dependency explicit and ensures both boot and secondary CPUs have FSGSBASE enabled before paranoid_entry() executes. Fixes: c82965f9e530 ("x86/entry/64: Handle FSGSBASE enabled paranoid entry/exit") Reported-by: Borislav Petkov Suggested-by: Sohil Mehta Signed-off-by: Nikunj A Dadhania Signed-off-by: Borislav Petkov (AMD) Reviewed-by: Sohil Mehta Cc: Link: https://patch.msgid.link/20260318075654.1792916-2-nikunj@amd.com --- arch/x86/kernel/cpu/common.c | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index a8ff4376c286..7840b224d6a7 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -2050,12 +2050,6 @@ static void identify_cpu(struct cpuinfo_x86 *c) setup_umip(c); setup_lass(c); - /* Enable FSGSBASE instructions if available. */ - if (cpu_has(c, X86_FEATURE_FSGSBASE)) { - cr4_set_bits(X86_CR4_FSGSBASE); - elf_hwcap2 |= HWCAP2_FSGSBASE; - } - /* * The vendor-specific functions might have changed features. * Now we do "generic changes." @@ -2416,6 +2410,18 @@ void cpu_init_exception_handling(bool boot_cpu) /* GHCB needs to be setup to handle #VC. */ setup_ghcb(); + /* + * On CPUs with FSGSBASE support, paranoid_entry() uses + * ALTERNATIVE-patched RDGSBASE/WRGSBASE instructions. Secondary CPUs + * boot after alternatives are patched globally, so early exceptions + * execute patched code that depends on FSGSBASE. Enable the feature + * before any exceptions occur. + */ + if (cpu_feature_enabled(X86_FEATURE_FSGSBASE)) { + cr4_set_bits(X86_CR4_FSGSBASE); + elf_hwcap2 |= HWCAP2_FSGSBASE; + } + if (cpu_feature_enabled(X86_FEATURE_FRED)) { /* The boot CPU has enabled FRED during early boot */ if (!boot_cpu) From 8f13c0c6cb75cc4421d5a60fc060e9e6fd9d1097 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Fri, 20 Mar 2026 14:54:06 +0530 Subject: [PATCH 113/401] cpufreq: Don't skip cpufreq_frequency_table_cpuinfo() The commit 6db0f533d320 ("cpufreq: preserve freq_table_sorted across suspend/hibernate") unintentionally made a change where cpufreq_frequency_table_cpuinfo() isn't getting called anymore for old policies getting re-initialized. This leads to potentially invalid values of policy->max and policy->cpuinfo_max_freq. Fix the issue by reverting the original commit and adding the condition for just the sorting function. Fixes: 6db0f533d320 ("cpufreq: preserve freq_table_sorted across suspend/hibernate") Signed-off-by: Viresh Kumar Cc: 6.19+ # 6.19+ Link: https://patch.msgid.link/65ba5c45749267c82e8a87af3dc788b37a0b3f48.1773998611.git.viresh.kumar@linaro.org Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/cpufreq.c | 9 +++------ drivers/cpufreq/freq_table.c | 4 ++++ 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index 277884d91913..1f794524a1d9 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -1427,12 +1427,9 @@ static int cpufreq_policy_online(struct cpufreq_policy *policy, * If there is a problem with its frequency table, take it * offline and drop it. */ - if (policy->freq_table_sorted != CPUFREQ_TABLE_SORTED_ASCENDING && - policy->freq_table_sorted != CPUFREQ_TABLE_SORTED_DESCENDING) { - ret = cpufreq_table_validate_and_sort(policy); - if (ret) - goto out_offline_policy; - } + ret = cpufreq_table_validate_and_sort(policy); + if (ret) + goto out_offline_policy; /* related_cpus should at least include policy->cpus. */ cpumask_copy(policy->related_cpus, policy->cpus); diff --git a/drivers/cpufreq/freq_table.c b/drivers/cpufreq/freq_table.c index 7f251daf03ce..5b364d8da4f9 100644 --- a/drivers/cpufreq/freq_table.c +++ b/drivers/cpufreq/freq_table.c @@ -360,6 +360,10 @@ int cpufreq_table_validate_and_sort(struct cpufreq_policy *policy) if (policy_has_boost_freq(policy)) policy->boost_supported = true; + if (policy->freq_table_sorted == CPUFREQ_TABLE_SORTED_ASCENDING || + policy->freq_table_sorted == CPUFREQ_TABLE_SORTED_DESCENDING) + return 0; + return set_freq_table_sorted(policy); } From 6a28fb8cb28b9eb39a392e531d938a889eacafc5 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Fri, 20 Mar 2026 15:08:14 +0530 Subject: [PATCH 114/401] cpufreq: conservative: Reset requested_freq on limits change A recently reported issue highlighted that the cached requested_freq is not guaranteed to stay in sync with policy->cur. If the platform changes the actual CPU frequency after the governor sets one (e.g. due to platform-specific frequency scaling) and a re-sync occurs later, policy->cur may diverge from requested_freq. This can lead to incorrect behavior in the conservative governor. For example, the governor may assume the CPU is already running at the maximum frequency and skip further increases even though there is still headroom. Avoid this by resetting the cached requested_freq to policy->cur on detecting a change in policy limits. Reported-by: Lifeng Zheng Tested-by: Lifeng Zheng Link: https://lore.kernel.org/all/20260210115458.3493646-1-zhenglifeng1@huawei.com/ Signed-off-by: Viresh Kumar Reviewed-by: Zhongqiu Han Cc: All applicable Link: https://patch.msgid.link/d846a141a98ac0482f20560fcd7525c0f0ec2f30.1773999467.git.viresh.kumar@linaro.org Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/cpufreq_conservative.c | 12 ++++++++++++ drivers/cpufreq/cpufreq_governor.c | 3 +++ drivers/cpufreq/cpufreq_governor.h | 1 + 3 files changed, 16 insertions(+) diff --git a/drivers/cpufreq/cpufreq_conservative.c b/drivers/cpufreq/cpufreq_conservative.c index e0e847764511..df01d33993d8 100644 --- a/drivers/cpufreq/cpufreq_conservative.c +++ b/drivers/cpufreq/cpufreq_conservative.c @@ -313,6 +313,17 @@ static void cs_start(struct cpufreq_policy *policy) dbs_info->requested_freq = policy->cur; } +static void cs_limits(struct cpufreq_policy *policy) +{ + struct cs_policy_dbs_info *dbs_info = to_dbs_info(policy->governor_data); + + /* + * The limits have changed, so may have the current frequency. Reset + * requested_freq to avoid any unintended outcomes due to the mismatch. + */ + dbs_info->requested_freq = policy->cur; +} + static struct dbs_governor cs_governor = { .gov = CPUFREQ_DBS_GOVERNOR_INITIALIZER("conservative"), .kobj_type = { .default_groups = cs_groups }, @@ -322,6 +333,7 @@ static struct dbs_governor cs_governor = { .init = cs_init, .exit = cs_exit, .start = cs_start, + .limits = cs_limits, }; #define CPU_FREQ_GOV_CONSERVATIVE (cs_governor.gov) diff --git a/drivers/cpufreq/cpufreq_governor.c b/drivers/cpufreq/cpufreq_governor.c index 36eb7aee4bcd..acf101878733 100644 --- a/drivers/cpufreq/cpufreq_governor.c +++ b/drivers/cpufreq/cpufreq_governor.c @@ -563,6 +563,7 @@ EXPORT_SYMBOL_GPL(cpufreq_dbs_governor_stop); void cpufreq_dbs_governor_limits(struct cpufreq_policy *policy) { + struct dbs_governor *gov = dbs_governor_of(policy); struct policy_dbs_info *policy_dbs; /* Protect gov->gdbs_data against cpufreq_dbs_governor_exit() */ @@ -574,6 +575,8 @@ void cpufreq_dbs_governor_limits(struct cpufreq_policy *policy) mutex_lock(&policy_dbs->update_mutex); cpufreq_policy_apply_limits(policy); gov_update_sample_delay(policy_dbs, 0); + if (gov->limits) + gov->limits(policy); mutex_unlock(&policy_dbs->update_mutex); out: diff --git a/drivers/cpufreq/cpufreq_governor.h b/drivers/cpufreq/cpufreq_governor.h index 168c23fd7fca..1462d59277bd 100644 --- a/drivers/cpufreq/cpufreq_governor.h +++ b/drivers/cpufreq/cpufreq_governor.h @@ -138,6 +138,7 @@ struct dbs_governor { int (*init)(struct dbs_data *dbs_data); void (*exit)(struct dbs_data *dbs_data); void (*start)(struct cpufreq_policy *policy); + void (*limits)(struct cpufreq_policy *policy); }; static inline struct dbs_governor *dbs_governor_of(struct cpufreq_policy *policy) From 734eba62cd32cb9ceffa09e57cdc03d761528525 Mon Sep 17 00:00:00 2001 From: Alberto Garcia Date: Mon, 9 Mar 2026 18:39:41 +0100 Subject: [PATCH 115/401] PM: hibernate: Drain trailing zero pages on userspace restore Commit 005e8dddd497 ("PM: hibernate: don't store zero pages in the image file") added an optimization to skip zero-filled pages in the hibernation image. On restore, zero pages are handled internally by snapshot_write_next() in a loop that processes them without returning to the caller. With the userspace restore interface, writing the last non-zero page to /dev/snapshot is followed by the SNAPSHOT_ATOMIC_RESTORE ioctl. At this point there are no more calls to snapshot_write_next() so any trailing zero pages are not processed, snapshot_image_loaded() fails because handle->cur is smaller than expected, the ioctl returns -EPERM and the image is not restored. The in-kernel restore path is not affected by this because the loop in load_image() in swap.c calls snapshot_write_next() until it returns 0. It is this final call that drains any trailing zero pages. Fixed by calling snapshot_write_next() in snapshot_write_finalize(), giving the kernel the chance to drain any trailing zero pages. Fixes: 005e8dddd497 ("PM: hibernate: don't store zero pages in the image file") Signed-off-by: Alberto Garcia Acked-by: Brian Geffon Link: https://patch.msgid.link/ef5a7c5e3e3dbd17dcb20efaa0c53a47a23498bb.1773075892.git.berto@igalia.com Signed-off-by: Rafael J. Wysocki --- kernel/power/snapshot.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c index 6e1321837c66..a564650734dc 100644 --- a/kernel/power/snapshot.c +++ b/kernel/power/snapshot.c @@ -2855,6 +2855,17 @@ int snapshot_write_finalize(struct snapshot_handle *handle) { int error; + /* + * Call snapshot_write_next() to drain any trailing zero pages, + * but make sure we're in the data page region first. + * This function can return PAGE_SIZE if the kernel was expecting + * another copy page. Return -ENODATA in that situation. + */ + if (handle->cur > nr_meta_pages + 1) { + error = snapshot_write_next(handle); + if (error) + return error > 0 ? -ENODATA : error; + } copy_last_highmem_page(); error = hibernate_restore_protect_page(handle->buffer); /* Do that only if we have loaded the image entirely */ From a8d51efb5929ae308895455a3e496b5eca2cd143 Mon Sep 17 00:00:00 2001 From: Youngjun Park Date: Sun, 22 Mar 2026 21:05:28 +0900 Subject: [PATCH 116/401] PM: sleep: Drop spurious WARN_ON() from pm_restore_gfp_mask() Commit 35e4a69b2003f ("PM: sleep: Allow pm_restrict_gfp_mask() stacking") introduced refcount-based GFP mask management that warns when pm_restore_gfp_mask() is called with saved_gfp_count == 0. Some hibernation paths call pm_restore_gfp_mask() defensively where the GFP mask may or may not be restricted depending on the execution path. For example, the uswsusp interface invokes it in SNAPSHOT_CREATE_IMAGE, SNAPSHOT_UNFREEZE, and snapshot_release(). Before the stacking change this was a silent no-op; it now triggers a spurious WARNING. Remove the WARN_ON() wrapper from the !saved_gfp_count check while retaining the check itself, so that defensive calls remain harmless without producing false warnings. Fixes: 35e4a69b2003f ("PM: sleep: Allow pm_restrict_gfp_mask() stacking") Signed-off-by: Youngjun Park [ rjw: Subject tweak ] Link: https://patch.msgid.link/20260322120528.750178-1-youngjun.park@lge.com Signed-off-by: Rafael J. Wysocki --- kernel/power/main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/power/main.c b/kernel/power/main.c index 5f8c9e12eaec..5429e9f19b65 100644 --- a/kernel/power/main.c +++ b/kernel/power/main.c @@ -40,7 +40,7 @@ void pm_restore_gfp_mask(void) { WARN_ON(!mutex_is_locked(&system_transition_mutex)); - if (WARN_ON(!saved_gfp_count) || --saved_gfp_count) + if (!saved_gfp_count || --saved_gfp_count) return; gfp_allowed_mask = saved_gfp_mask; From 411df123c017169922cc767affce76282b8e6c85 Mon Sep 17 00:00:00 2001 From: "Borislav Petkov (AMD)" Date: Thu, 19 Mar 2026 12:07:59 +0100 Subject: [PATCH 117/401] x86/cpu: Remove X86_CR4_FRED from the CR4 pinned bits mask Commit in Fixes added the FRED CR4 bit to the CR4 pinned bits mask so that whenever something else modifies CR4, that bit remains set. Which in itself is a perfectly fine idea. However, there's an issue when during boot FRED is initialized: first on the BSP and later on the APs. Thus, there's a window in time when exceptions cannot be handled. This becomes particularly nasty when running as SEV-{ES,SNP} or TDX guests which, when they manage to trigger exceptions during that short window described above, triple fault due to FRED MSRs not being set up yet. See Link tag below for a much more detailed explanation of the situation. So, as a result, the commit in that Link URL tried to address this shortcoming by temporarily disabling CR4 pinning when an AP is not online yet. However, that is a problem in itself because in this case, an attack on the kernel needs to only modify the online bit - a single bit in RW memory - and then disable CR4 pinning and then disable SM*P, leading to more and worse things to happen to the system. So, instead, remove the FRED bit from the CR4 pinning mask, thus obviating the need to temporarily disable CR4 pinning. If someone manages to disable FRED when poking at CR4, then idt_invalidate() would make sure the system would crash'n'burn on the first exception triggered, which is a much better outcome security-wise. Fixes: ff45746fbf00 ("x86/cpu: Add X86_CR4_FRED macro") Suggested-by: Dave Hansen Suggested-by: Peter Zijlstra Signed-off-by: Borislav Petkov (AMD) Cc: # 6.12+ Link: https://lore.kernel.org/r/177385987098.1647592.3381141860481415647.tip-bot2@tip-bot2 --- arch/x86/kernel/cpu/common.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 7840b224d6a7..c57e8972d30f 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -433,7 +433,7 @@ static __always_inline void setup_lass(struct cpuinfo_x86 *c) /* These bits should not change their value after CPU init is finished. */ static const unsigned long cr4_pinned_mask = X86_CR4_SMEP | X86_CR4_SMAP | X86_CR4_UMIP | - X86_CR4_FSGSBASE | X86_CR4_CET | X86_CR4_FRED; + X86_CR4_FSGSBASE | X86_CR4_CET; static DEFINE_STATIC_KEY_FALSE_RO(cr_pinning); static unsigned long cr4_pinned_bits __ro_after_init; From 34420cb92dbb9e37ff6c6603f4f5e1807db3f1de Mon Sep 17 00:00:00 2001 From: Huiwen He Date: Mon, 23 Mar 2026 17:08:12 +0800 Subject: [PATCH 118/401] smb/client: ensure smb2_mapping_table rebuild on cmd changes The current rule for smb2_mapping_table.c uses `$(call cmd,...)`, which fails to track command line modifications in the Makefile (e.g., modifying the command to `perl -d` or `perl -w` for debug will not trigger a rebuild) and does not generate the required .cmd file for Kbuild. Fix this by transitioning to the standard `$(call if_changed,...)` macro. This includes adding the `FORCE` prerequisite and appending the output file to the `targets` variable so Kbuild can track it properly. As a result, Kbuild now automatically handles the cleaning of the generated file, allowing us to safely drop the redundant `clean-files` assignment. Fixes: c527e13a7a66 ("cifs: Autogenerate SMB2 error mapping table") Signed-off-by: Huiwen He Reviewed-by: ChenXiaoSong Signed-off-by: Steve French --- fs/smb/client/Makefile | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/fs/smb/client/Makefile b/fs/smb/client/Makefile index 26b6105f04d1..1a6e1e1c9764 100644 --- a/fs/smb/client/Makefile +++ b/fs/smb/client/Makefile @@ -48,8 +48,8 @@ cifs-$(CONFIG_CIFS_COMPRESSION) += compress.o compress/lz77.o # Build the SMB2 error mapping table from smb2status.h # $(obj)/smb2_mapping_table.c: $(src)/../common/smb2status.h \ - $(src)/gen_smb2_mapping - $(call cmd,gen_smb2_mapping) + $(src)/gen_smb2_mapping FORCE + $(call if_changed,gen_smb2_mapping) $(obj)/smb2maperror.o: $(obj)/smb2_mapping_table.c @@ -58,4 +58,5 @@ quiet_cmd_gen_smb2_mapping = GEN $@ obj-$(CONFIG_SMB_KUNIT_TESTS) += smb2maperror_test.o -clean-files += smb2_mapping_table.c +# Let Kbuild handle tracking and cleaning +targets += smb2_mapping_table.c From 3645eb7e3915990a149460c151a00894cb586253 Mon Sep 17 00:00:00 2001 From: Nikunj A Dadhania Date: Wed, 18 Mar 2026 07:56:54 +0000 Subject: [PATCH 119/401] x86/fred: Fix early boot failures on SEV-ES/SNP guests FRED-enabled SEV-(ES,SNP) guests fail to boot due to the following issues in the early boot sequence: * FRED does not have a #VC exception handler in the dispatch logic * Early FRED #VC exceptions attempt to use uninitialized per-CPU GHCBs instead of boot_ghcb Add X86_TRAP_VC case to fred_hwexc() with a new exc_vmm_communication() function that provides the unified entry point FRED requires, dispatching to existing user/kernel handlers based on privilege level. The function is already declared via DECLARE_IDTENTRY_VC(). Fix early GHCB access by falling back to boot_ghcb in __sev_{get,put}_ghcb() when per-CPU GHCBs are not yet initialized. Fixes: 14619d912b65 ("x86/fred: FRED entry/exit and dispatch code") Signed-off-by: Nikunj A Dadhania Signed-off-by: Borislav Petkov (AMD) Reviewed-by: Tom Lendacky Cc: # 6.12+ Link: https://patch.msgid.link/20260318075654.1792916-4-nikunj@amd.com --- arch/x86/coco/sev/noinstr.c | 6 ++++++ arch/x86/entry/entry_fred.c | 14 ++++++++++++++ 2 files changed, 20 insertions(+) diff --git a/arch/x86/coco/sev/noinstr.c b/arch/x86/coco/sev/noinstr.c index 9d94aca4a698..5afd663a1c21 100644 --- a/arch/x86/coco/sev/noinstr.c +++ b/arch/x86/coco/sev/noinstr.c @@ -121,6 +121,9 @@ noinstr struct ghcb *__sev_get_ghcb(struct ghcb_state *state) WARN_ON(!irqs_disabled()); + if (!sev_cfg.ghcbs_initialized) + return boot_ghcb; + data = this_cpu_read(runtime_data); ghcb = &data->ghcb_page; @@ -164,6 +167,9 @@ noinstr void __sev_put_ghcb(struct ghcb_state *state) WARN_ON(!irqs_disabled()); + if (!sev_cfg.ghcbs_initialized) + return; + data = this_cpu_read(runtime_data); ghcb = &data->ghcb_page; diff --git a/arch/x86/entry/entry_fred.c b/arch/x86/entry/entry_fred.c index 88c757ac8ccd..fbe2d10dd737 100644 --- a/arch/x86/entry/entry_fred.c +++ b/arch/x86/entry/entry_fred.c @@ -177,6 +177,16 @@ static noinstr void fred_extint(struct pt_regs *regs) } } +#ifdef CONFIG_AMD_MEM_ENCRYPT +noinstr void exc_vmm_communication(struct pt_regs *regs, unsigned long error_code) +{ + if (user_mode(regs)) + return user_exc_vmm_communication(regs, error_code); + else + return kernel_exc_vmm_communication(regs, error_code); +} +#endif + static noinstr void fred_hwexc(struct pt_regs *regs, unsigned long error_code) { /* Optimize for #PF. That's the only exception which matters performance wise */ @@ -207,6 +217,10 @@ static noinstr void fred_hwexc(struct pt_regs *regs, unsigned long error_code) #ifdef CONFIG_X86_CET case X86_TRAP_CP: return exc_control_protection(regs, error_code); #endif +#ifdef CONFIG_AMD_MEM_ENCRYPT + case X86_TRAP_VC: return exc_vmm_communication(regs, error_code); +#endif + default: return fred_bad_type(regs, error_code); } From a3e93cac25316aad03bf561e3c205f4ca0b8f452 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 20 Mar 2026 10:25:21 +0100 Subject: [PATCH 120/401] x86/cpu: Add comment clarifying CRn pinning To avoid future confusion on the purpose and design of the CRn pinning code. Also note that if the attacker controls page-tables, the CRn bits lose much of the attraction anyway. Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Borislav Petkov (AMD) Link: https://patch.msgid.link/20260320092521.GG3739106@noisy.programming.kicks-ass.net --- arch/x86/kernel/cpu/common.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index c57e8972d30f..ec0670114efa 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -434,6 +434,19 @@ static __always_inline void setup_lass(struct cpuinfo_x86 *c) /* These bits should not change their value after CPU init is finished. */ static const unsigned long cr4_pinned_mask = X86_CR4_SMEP | X86_CR4_SMAP | X86_CR4_UMIP | X86_CR4_FSGSBASE | X86_CR4_CET; + +/* + * The CR pinning protects against ROP on the 'mov %reg, %CRn' instruction(s). + * Since you can ROP directly to these instructions (barring shadow stack), + * any protection must follow immediately and unconditionally after that. + * + * Specifically, the CR[04] write functions below will have the value + * validation controlled by the @cr_pinning static_branch which is + * __ro_after_init, just like the cr4_pinned_bits value. + * + * Once set, an attacker will have to defeat page-tables to get around these + * restrictions. Which is a much bigger ask than 'simple' ROP. + */ static DEFINE_STATIC_KEY_FALSE_RO(cr_pinning); static unsigned long cr4_pinned_bits __ro_after_init; From 87997b6c6516e049cbaf2fc6810b213d587a06b1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Winiarski?= Date: Tue, 17 Feb 2026 16:41:18 +0100 Subject: [PATCH 121/401] drm/xe/pf: Fix use-after-free in migration restore MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When an error is returned from xe_sriov_pf_migration_restore_produce(), the data pointer is not set to NULL, which can trigger use-after-free in subsequent .write() calls. Set the pointer to NULL upon error to fix the problem. Fixes: 1ed30397c0b92 ("drm/xe/pf: Add support for encap/decap of bitstream to/from packet") Reported-by: Sebastian Österlund Closes: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/7230 Reviewed-by: Shuicheng Lin Link: https://patch.msgid.link/20260217154118.176902-1-michal.winiarski@intel.com Signed-off-by: Michał Winiarski (cherry picked from commit 4f53d8c6d23527d734fe3531d08e15cb170a0819) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_sriov_packet.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/xe/xe_sriov_packet.c b/drivers/gpu/drm/xe/xe_sriov_packet.c index 968f32496282..2ae9eff2a7c0 100644 --- a/drivers/gpu/drm/xe/xe_sriov_packet.c +++ b/drivers/gpu/drm/xe/xe_sriov_packet.c @@ -341,6 +341,8 @@ ssize_t xe_sriov_packet_write_single(struct xe_device *xe, unsigned int vfid, ret = xe_sriov_pf_migration_restore_produce(xe, vfid, *data); if (ret) { xe_sriov_packet_free(*data); + *data = NULL; + return ret; } From ef3d549e1deb3466c61f3b01d22fc3fe3e5efb08 Mon Sep 17 00:00:00 2001 From: Felix Gu Date: Thu, 19 Mar 2026 00:12:34 +0800 Subject: [PATCH 122/401] spi: sn-f-ospi: Fix resource leak in f_ospi_probe() In f_ospi_probe(), when num_cs validation fails, it returns without calling spi_controller_put() on the SPI controller, which causes a resource leak. Use devm_spi_alloc_host() instead of spi_alloc_host() to ensure the SPI controller is properly freed when probe fails. Fixes: 1b74dd64c861 ("spi: Add Socionext F_OSPI SPI flash controller driver") Signed-off-by: Felix Gu Link: https://patch.msgid.link/20260319-sn-f-v1-1-33a6738d2da8@gmail.com Signed-off-by: Mark Brown --- drivers/spi/spi-sn-f-ospi.c | 17 +++++------------ 1 file changed, 5 insertions(+), 12 deletions(-) diff --git a/drivers/spi/spi-sn-f-ospi.c b/drivers/spi/spi-sn-f-ospi.c index bfcc140df810..1f554aa7ca27 100644 --- a/drivers/spi/spi-sn-f-ospi.c +++ b/drivers/spi/spi-sn-f-ospi.c @@ -612,7 +612,7 @@ static int f_ospi_probe(struct platform_device *pdev) u32 num_cs = OSPI_NUM_CS; int ret; - ctlr = spi_alloc_host(dev, sizeof(*ospi)); + ctlr = devm_spi_alloc_host(dev, sizeof(*ospi)); if (!ctlr) return -ENOMEM; @@ -635,16 +635,12 @@ static int f_ospi_probe(struct platform_device *pdev) platform_set_drvdata(pdev, ospi); ospi->base = devm_platform_ioremap_resource(pdev, 0); - if (IS_ERR(ospi->base)) { - ret = PTR_ERR(ospi->base); - goto err_put_ctlr; - } + if (IS_ERR(ospi->base)) + return PTR_ERR(ospi->base); ospi->clk = devm_clk_get_enabled(dev, NULL); - if (IS_ERR(ospi->clk)) { - ret = PTR_ERR(ospi->clk); - goto err_put_ctlr; - } + if (IS_ERR(ospi->clk)) + return PTR_ERR(ospi->clk); mutex_init(&ospi->mlock); @@ -661,9 +657,6 @@ static int f_ospi_probe(struct platform_device *pdev) err_destroy_mutex: mutex_destroy(&ospi->mlock); -err_put_ctlr: - spi_controller_put(ctlr); - return ret; } From a42c9b8b0c00ecd9b7467844f2fbfc766898bf54 Mon Sep 17 00:00:00 2001 From: Felix Gu Date: Thu, 19 Mar 2026 00:12:35 +0800 Subject: [PATCH 123/401] spi: sn-f-ospi: Use devm_mutex_init() to simplify code Switch to devm_mutex_init() to handle mutex destruction automatically. This simplifies the error paths in probe() and removes the need for an explicit mutex_destroy() in remove() callback. Signed-off-by: Felix Gu Link: https://patch.msgid.link/20260319-sn-f-v1-2-33a6738d2da8@gmail.com Signed-off-by: Mark Brown --- drivers/spi/spi-sn-f-ospi.c | 25 +++++-------------------- 1 file changed, 5 insertions(+), 20 deletions(-) diff --git a/drivers/spi/spi-sn-f-ospi.c b/drivers/spi/spi-sn-f-ospi.c index 1f554aa7ca27..3c61c799723b 100644 --- a/drivers/spi/spi-sn-f-ospi.c +++ b/drivers/spi/spi-sn-f-ospi.c @@ -642,29 +642,15 @@ static int f_ospi_probe(struct platform_device *pdev) if (IS_ERR(ospi->clk)) return PTR_ERR(ospi->clk); - mutex_init(&ospi->mlock); + ret = devm_mutex_init(dev, &ospi->mlock); + if (ret) + return ret; ret = f_ospi_init(ospi); if (ret) - goto err_destroy_mutex; + return ret; - ret = devm_spi_register_controller(dev, ctlr); - if (ret) - goto err_destroy_mutex; - - return 0; - -err_destroy_mutex: - mutex_destroy(&ospi->mlock); - - return ret; -} - -static void f_ospi_remove(struct platform_device *pdev) -{ - struct f_ospi *ospi = platform_get_drvdata(pdev); - - mutex_destroy(&ospi->mlock); + return devm_spi_register_controller(dev, ctlr); } static const struct of_device_id f_ospi_dt_ids[] = { @@ -679,7 +665,6 @@ static struct platform_driver f_ospi_driver = { .of_match_table = f_ospi_dt_ids, }, .probe = f_ospi_probe, - .remove = f_ospi_remove, }; module_platform_driver(f_ospi_driver); From 5a184f1cb43a8e035251c635f5c47da5dc3e3049 Mon Sep 17 00:00:00 2001 From: Cezary Rojewski Date: Fri, 20 Mar 2026 11:12:17 +0100 Subject: [PATCH 124/401] ASoC: Intel: catpt: Fix the device initialization The DMA mask shall be coerced before any buffer allocations for the device are done. At the same time explain why DMA mask of 31 bits is used in the first place. Cc: Andy Shevchenko Fixes: 7a10b66a5df9 ("ASoC: Intel: catpt: Device driver lifecycle") Signed-off-by: Cezary Rojewski Reviewed-by: Andy Shevchenko Link: https://patch.msgid.link/20260320101217.1243688-1-cezary.rojewski@intel.com Signed-off-by: Mark Brown --- sound/soc/intel/catpt/device.c | 10 +++++++++- sound/soc/intel/catpt/dsp.c | 3 --- 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/sound/soc/intel/catpt/device.c b/sound/soc/intel/catpt/device.c index 0638aecba40d..0b3f20e384c7 100644 --- a/sound/soc/intel/catpt/device.c +++ b/sound/soc/intel/catpt/device.c @@ -281,7 +281,15 @@ static int catpt_acpi_probe(struct platform_device *pdev) if (IS_ERR(cdev->pci_ba)) return PTR_ERR(cdev->pci_ba); - /* alloc buffer for storing DRAM context during dx transitions */ + /* + * As per design HOST is responsible for preserving firmware's runtime + * context during D0 -> D3 -> D0 transitions. Addresses used for DMA + * to/from HOST memory shall be outside the reserved range of 0xFFFxxxxx. + */ + ret = dma_coerce_mask_and_coherent(cdev->dev, DMA_BIT_MASK(31)); + if (ret) + return ret; + cdev->dxbuf_vaddr = dmam_alloc_coherent(dev, catpt_dram_size(cdev), &cdev->dxbuf_paddr, GFP_KERNEL); if (!cdev->dxbuf_vaddr) diff --git a/sound/soc/intel/catpt/dsp.c b/sound/soc/intel/catpt/dsp.c index 008a20a2acbd..677f348909c8 100644 --- a/sound/soc/intel/catpt/dsp.c +++ b/sound/soc/intel/catpt/dsp.c @@ -125,9 +125,6 @@ int catpt_dmac_probe(struct catpt_dev *cdev) dmac->dev = cdev->dev; dmac->irq = cdev->irq; - ret = dma_coerce_mask_and_coherent(cdev->dev, DMA_BIT_MASK(31)); - if (ret) - return ret; /* * Caller is responsible for putting device in D0 to allow * for I/O and memory access before probing DW. From 63542bb402b7013171c9f621c28b609eda4dbf1f Mon Sep 17 00:00:00 2001 From: Felix Gu Date: Sun, 22 Mar 2026 21:29:56 +0800 Subject: [PATCH 125/401] spi: meson-spicc: Fix double-put in remove path meson_spicc_probe() registers the controller with devm_spi_register_controller(), so teardown already drops the controller reference via devm cleanup. Calling spi_controller_put() again in meson_spicc_remove() causes a double-put. Fixes: 8311ee2164c5 ("spi: meson-spicc: fix memory leak in meson_spicc_remove") Signed-off-by: Felix Gu Reviewed-by: Johan Hovold Link: https://patch.msgid.link/20260322-rockchip-v1-1-fac3f0c6dad8@gmail.com Signed-off-by: Mark Brown --- drivers/spi/spi-meson-spicc.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/spi/spi-meson-spicc.c b/drivers/spi/spi-meson-spicc.c index a7001b9e36e6..57768da3205d 100644 --- a/drivers/spi/spi-meson-spicc.c +++ b/drivers/spi/spi-meson-spicc.c @@ -1101,8 +1101,6 @@ static void meson_spicc_remove(struct platform_device *pdev) /* Disable SPI */ writel(0, spicc->base + SPICC_CONREG); - - spi_controller_put(spicc->host); } static const struct meson_spicc_data meson_spicc_gx_data = { From aed3d041ab061ec8a64f50a3edda0f4db7280025 Mon Sep 17 00:00:00 2001 From: Yussuf Khalil Date: Fri, 6 Mar 2026 12:06:35 +0000 Subject: [PATCH 126/401] drm/amd/display: Do not skip unrelated mode changes in DSC validation Starting with commit 17ce8a6907f7 ("drm/amd/display: Add dsc pre-validation in atomic check"), amdgpu resets the CRTC state mode_changed flag to false when recomputing the DSC configuration results in no timing change for a particular stream. However, this is incorrect in scenarios where a change in MST/DSC configuration happens in the same KMS commit as another (unrelated) mode change. For example, the integrated panel of a laptop may be configured differently (e.g., HDR enabled/disabled) depending on whether external screens are attached. In this case, plugging in external DP-MST screens may result in the mode_changed flag being dropped incorrectly for the integrated panel if its DSC configuration did not change during precomputation in pre_validate_dsc(). At this point, however, dm_update_crtc_state() has already created new streams for CRTCs with DSC-independent mode changes. In turn, amdgpu_dm_commit_streams() will never release the old stream, resulting in a memory leak. amdgpu_dm_atomic_commit_tail() will never acquire a reference to the new stream either, which manifests as a use-after-free when the stream gets disabled later on: BUG: KASAN: use-after-free in dc_stream_release+0x25/0x90 [amdgpu] Write of size 4 at addr ffff88813d836524 by task kworker/9:9/29977 Workqueue: events drm_mode_rmfb_work_fn Call Trace: dump_stack_lvl+0x6e/0xa0 print_address_description.constprop.0+0x88/0x320 ? dc_stream_release+0x25/0x90 [amdgpu] print_report+0xfc/0x1ff ? srso_alias_return_thunk+0x5/0xfbef5 ? __virt_addr_valid+0x225/0x4e0 ? dc_stream_release+0x25/0x90 [amdgpu] kasan_report+0xe1/0x180 ? dc_stream_release+0x25/0x90 [amdgpu] kasan_check_range+0x125/0x200 dc_stream_release+0x25/0x90 [amdgpu] dc_state_destruct+0x14d/0x5c0 [amdgpu] dc_state_release.part.0+0x4e/0x130 [amdgpu] dm_atomic_destroy_state+0x3f/0x70 [amdgpu] drm_atomic_state_default_clear+0x8ee/0xf30 ? drm_mode_object_put.part.0+0xb1/0x130 __drm_atomic_state_free+0x15c/0x2d0 atomic_remove_fb+0x67e/0x980 Since there is no reliable way of figuring out whether a CRTC has unrelated mode changes pending at the time of DSC validation, remember the value of the mode_changed flag from before the point where a CRTC was marked as potentially affected by a change in DSC configuration. Reset the mode_changed flag to this earlier value instead in pre_validate_dsc(). Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/5004 Fixes: 17ce8a6907f7 ("drm/amd/display: Add dsc pre-validation in atomic check") Signed-off-by: Yussuf Khalil Reviewed-by: Harry Wentland Signed-off-by: Alex Deucher (cherry picked from commit cc7c7121ae082b7b82891baa7280f1ff2608f22b) --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 5 +++++ drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h | 1 + drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c | 4 +++- 3 files changed, 9 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 085cc98bd875..a9c398b1516b 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -12523,6 +12523,11 @@ static int amdgpu_dm_atomic_check(struct drm_device *dev, } if (dc_resource_is_dsc_encoding_supported(dc)) { + for_each_oldnew_crtc_in_state(state, crtc, old_crtc_state, new_crtc_state, i) { + dm_new_crtc_state = to_dm_crtc_state(new_crtc_state); + dm_new_crtc_state->mode_changed_independent_from_dsc = new_crtc_state->mode_changed; + } + for_each_oldnew_crtc_in_state(state, crtc, old_crtc_state, new_crtc_state, i) { if (drm_atomic_crtc_needs_modeset(new_crtc_state)) { ret = add_affected_mst_dsc_crtcs(state, crtc); diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h index 800813671748..d15812d51d72 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h @@ -984,6 +984,7 @@ struct dm_crtc_state { bool freesync_vrr_info_changed; + bool mode_changed_independent_from_dsc; bool dsc_force_changed; bool vrr_supported; struct mod_freesync_config freesync_config; diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c index 7be50e8c0636..5d8c4c7020b1 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c @@ -1744,9 +1744,11 @@ int pre_validate_dsc(struct drm_atomic_state *state, int ind = find_crtc_index_in_state_by_stream(state, stream); if (ind >= 0) { + struct dm_crtc_state *dm_new_crtc_state = to_dm_crtc_state(state->crtcs[ind].new_state); + DRM_INFO_ONCE("%s:%d MST_DSC no mode changed for stream 0x%p\n", __func__, __LINE__, stream); - state->crtcs[ind].new_state->mode_changed = 0; + dm_new_crtc_state->base.mode_changed = dm_new_crtc_state->mode_changed_independent_from_dsc; } } } From 2d300ebfc411205fa31ba7741c5821d381912381 Mon Sep 17 00:00:00 2001 From: Ruijing Dong Date: Tue, 17 Mar 2026 13:54:11 -0400 Subject: [PATCH 127/401] drm/amdgpu: fix strsep() corrupting lockup_timeout on multi-GPU (v3) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit amdgpu_device_get_job_timeout_settings() passes a pointer directly to the global amdgpu_lockup_timeout[] buffer into strsep(). strsep() destructively replaces delimiter characters with '\0' in-place. On multi-GPU systems, this function is called once per device. When a multi-value setting like "0,0,0,-1" is used, the first GPU's call transforms the global buffer into "0\00\00\0-1". The second GPU then sees only "0" (terminated at the first '\0'), parses a single value, hits the single-value fallthrough (index == 1), and applies timeout=0 to all rings — causing immediate false job timeouts. Fix this by copying into a stack-local array before calling strsep(), so the global module parameter buffer remains intact across calls. The buffer is AMDGPU_MAX_TIMEOUT_PARAM_LENGTH (256) bytes, which is safe for the stack. v2: wrap commit message to 72 columns, add Assisted-by tag. v3: use stack array with strscpy() instead of kstrdup()/kfree() to avoid unnecessary heap allocation (Christian). This patch was developed with assistance from Claude (claude-opus-4-6). Assisted-by: Claude:claude-opus-4-6 Reviewed-by: Christian König Reviewed-by: Alex Deucher Signed-off-by: Ruijing Dong Signed-off-by: Alex Deucher (cherry picked from commit 94d79f51efecb74be1d88dde66bdc8bfcca17935) Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index d8296dfc5e8a..6d8531f9b882 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -4207,7 +4207,8 @@ static void amdgpu_device_xgmi_reset_func(struct work_struct *__work) static int amdgpu_device_get_job_timeout_settings(struct amdgpu_device *adev) { - char *input = amdgpu_lockup_timeout; + char buf[AMDGPU_MAX_TIMEOUT_PARAM_LENGTH]; + char *input = buf; char *timeout_setting = NULL; int index = 0; long timeout; @@ -4217,9 +4218,17 @@ static int amdgpu_device_get_job_timeout_settings(struct amdgpu_device *adev) adev->gfx_timeout = adev->compute_timeout = adev->sdma_timeout = adev->video_timeout = msecs_to_jiffies(2000); - if (!strnlen(input, AMDGPU_MAX_TIMEOUT_PARAM_LENGTH)) + if (!strnlen(amdgpu_lockup_timeout, AMDGPU_MAX_TIMEOUT_PARAM_LENGTH)) return 0; + /* + * strsep() destructively modifies its input by replacing delimiters + * with '\0'. Use a stack copy so the global module parameter buffer + * remains intact for multi-GPU systems where this function is called + * once per device. + */ + strscpy(buf, amdgpu_lockup_timeout, sizeof(buf)); + while ((timeout_setting = strsep(&input, ",")) && strnlen(timeout_setting, AMDGPU_MAX_TIMEOUT_PARAM_LENGTH)) { ret = kstrtol(timeout_setting, 0, &timeout); From 14b81abe7bdc25f8097906fc2f91276ffedb2d26 Mon Sep 17 00:00:00 2001 From: Eric Huang Date: Mon, 16 Mar 2026 11:01:30 -0400 Subject: [PATCH 128/401] drm/amdgpu: prevent immediate PASID reuse case MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit PASID resue could cause interrupt issue when process immediately runs into hw state left by previous process exited with the same PASID, it's possible that page faults are still pending in the IH ring buffer when the process exits and frees up its PASID. To prevent the case, it uses idr cyclic allocator same as kernel pid's. Signed-off-by: Eric Huang Reviewed-by: Christian König Signed-off-by: Alex Deucher (cherry picked from commit 8f1de51f49be692de137c8525106e0fce2d1912d) Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c | 45 ++++++++++++++++++------- drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h | 1 + drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 1 + 3 files changed, 34 insertions(+), 13 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c index 64c519cd7395..d88523568b62 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c @@ -35,10 +35,13 @@ * PASIDs are global address space identifiers that can be shared * between the GPU, an IOMMU and the driver. VMs on different devices * may use the same PASID if they share the same address - * space. Therefore PASIDs are allocated using a global IDA. VMs are - * looked up from the PASID per amdgpu_device. + * space. Therefore PASIDs are allocated using IDR cyclic allocator + * (similar to kernel PID allocation) which naturally delays reuse. + * VMs are looked up from the PASID per amdgpu_device. */ -static DEFINE_IDA(amdgpu_pasid_ida); + +static DEFINE_IDR(amdgpu_pasid_idr); +static DEFINE_SPINLOCK(amdgpu_pasid_idr_lock); /* Helper to free pasid from a fence callback */ struct amdgpu_pasid_cb { @@ -50,8 +53,8 @@ struct amdgpu_pasid_cb { * amdgpu_pasid_alloc - Allocate a PASID * @bits: Maximum width of the PASID in bits, must be at least 1 * - * Allocates a PASID of the given width while keeping smaller PASIDs - * available if possible. + * Uses kernel's IDR cyclic allocator (same as PID allocation). + * Allocates sequentially with automatic wrap-around. * * Returns a positive integer on success. Returns %-EINVAL if bits==0. * Returns %-ENOSPC if no PASID was available. Returns %-ENOMEM on @@ -59,14 +62,15 @@ struct amdgpu_pasid_cb { */ int amdgpu_pasid_alloc(unsigned int bits) { - int pasid = -EINVAL; + int pasid; - for (bits = min(bits, 31U); bits > 0; bits--) { - pasid = ida_alloc_range(&amdgpu_pasid_ida, 1U << (bits - 1), - (1U << bits) - 1, GFP_KERNEL); - if (pasid != -ENOSPC) - break; - } + if (bits == 0) + return -EINVAL; + + spin_lock(&amdgpu_pasid_idr_lock); + pasid = idr_alloc_cyclic(&amdgpu_pasid_idr, NULL, 1, + 1U << bits, GFP_KERNEL); + spin_unlock(&amdgpu_pasid_idr_lock); if (pasid >= 0) trace_amdgpu_pasid_allocated(pasid); @@ -81,7 +85,10 @@ int amdgpu_pasid_alloc(unsigned int bits) void amdgpu_pasid_free(u32 pasid) { trace_amdgpu_pasid_freed(pasid); - ida_free(&amdgpu_pasid_ida, pasid); + + spin_lock(&amdgpu_pasid_idr_lock); + idr_remove(&amdgpu_pasid_idr, pasid); + spin_unlock(&amdgpu_pasid_idr_lock); } static void amdgpu_pasid_free_cb(struct dma_fence *fence, @@ -616,3 +623,15 @@ void amdgpu_vmid_mgr_fini(struct amdgpu_device *adev) } } } + +/** + * amdgpu_pasid_mgr_cleanup - cleanup PASID manager + * + * Cleanup the IDR allocator. + */ +void amdgpu_pasid_mgr_cleanup(void) +{ + spin_lock(&amdgpu_pasid_idr_lock); + idr_destroy(&amdgpu_pasid_idr); + spin_unlock(&amdgpu_pasid_idr_lock); +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h index b3649cd3af56..a57919478d3b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h @@ -74,6 +74,7 @@ int amdgpu_pasid_alloc(unsigned int bits); void amdgpu_pasid_free(u32 pasid); void amdgpu_pasid_free_delayed(struct dma_resv *resv, u32 pasid); +void amdgpu_pasid_mgr_cleanup(void); bool amdgpu_vmid_had_gpu_reset(struct amdgpu_device *adev, struct amdgpu_vmid *id); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index c60cbce356cf..d54afeb7b2a7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -2898,6 +2898,7 @@ void amdgpu_vm_manager_fini(struct amdgpu_device *adev) xa_destroy(&adev->vm_manager.pasids); amdgpu_vmid_mgr_fini(adev); + amdgpu_pasid_mgr_cleanup(); } /** From 37c2caa167b0b8aca4f74c32404c5288b876a2a3 Mon Sep 17 00:00:00 2001 From: Alex Hung Date: Mon, 9 Mar 2026 11:16:08 -0600 Subject: [PATCH 129/401] drm/amd/display: Fix drm_edid leak in amdgpu_dm [WHAT] When a sink is connected, aconnector->drm_edid was overwritten without freeing the previous allocation, causing a memory leak on resume. [HOW] Free the previous drm_edid before updating it. Reviewed-by: Roman Li Signed-off-by: Alex Hung Signed-off-by: Chuanyu Tseng Signed-off-by: Alex Deucher (cherry picked from commit 52024a94e7111366141cfc5d888b2ef011f879e5) Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index a9c398b1516b..acc3d8dad4a3 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -3909,8 +3909,9 @@ void amdgpu_dm_update_connector_after_detect( aconnector->dc_sink = sink; dc_sink_retain(aconnector->dc_sink); + drm_edid_free(aconnector->drm_edid); + aconnector->drm_edid = NULL; if (sink->dc_edid.length == 0) { - aconnector->drm_edid = NULL; hdmi_cec_unset_edid(aconnector); if (aconnector->dc_link->aux_mode) { drm_dp_cec_unset_edid(&aconnector->dm_dp_aux.aux); From cdbc3b62cfc2785da32b82260f852370cc1f2a6a Mon Sep 17 00:00:00 2001 From: Asad Kamal Date: Wed, 18 Mar 2026 13:48:30 +0800 Subject: [PATCH 130/401] drm/amd/pm: Skip redundant UCLK restore in smu_v13_0_6 Only reapply UCLK soft limits during PP_OD_RESTORE_DEFAULT when the current max differs from the DPM table max. This avoids redundant SMC updates and prevents -EINVAL on restore when no change is needed. Fixes: b7a900344546 ("drm/amd/pm: Allow setting max UCLK on SMU v13.0.6") Signed-off-by: Asad Kamal Reviewed-by: Lijo Lazar Signed-off-by: Alex Deucher (cherry picked from commit 17f11bbbc76c8e83c8474ea708316b1e3631d927) --- .../drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c index 896b51c8a9a7..b44a85697a30 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c @@ -2122,6 +2122,7 @@ static int smu_v13_0_6_usr_edit_dpm_table(struct smu_context *smu, { struct smu_dpm_context *smu_dpm = &(smu->smu_dpm); struct smu_13_0_dpm_context *dpm_context = smu_dpm->dpm_context; + struct smu_dpm_table *uclk_table = &dpm_context->dpm_tables.uclk_table; struct smu_umd_pstate_table *pstate_table = &smu->pstate_table; uint32_t min_clk; uint32_t max_clk; @@ -2221,14 +2222,16 @@ static int smu_v13_0_6_usr_edit_dpm_table(struct smu_context *smu, if (ret) return ret; - min_clk = SMU_DPM_TABLE_MIN( - &dpm_context->dpm_tables.uclk_table); - max_clk = SMU_DPM_TABLE_MAX( - &dpm_context->dpm_tables.uclk_table); - ret = smu_v13_0_6_set_soft_freq_limited_range( - smu, SMU_UCLK, min_clk, max_clk, false); - if (ret) - return ret; + if (SMU_DPM_TABLE_MAX(uclk_table) != + pstate_table->uclk_pstate.curr.max) { + min_clk = SMU_DPM_TABLE_MIN(&dpm_context->dpm_tables.uclk_table); + max_clk = SMU_DPM_TABLE_MAX(&dpm_context->dpm_tables.uclk_table); + ret = smu_v13_0_6_set_soft_freq_limited_range(smu, + SMU_UCLK, min_clk, + max_clk, false); + if (ret) + return ret; + } smu_v13_0_reset_custom_level(smu); } break; From 2f0e491faee43181b6a86e90f34016b256042fe1 Mon Sep 17 00:00:00 2001 From: Asad Kamal Date: Wed, 18 Mar 2026 13:52:57 +0800 Subject: [PATCH 131/401] drm/amd/pm: Return -EOPNOTSUPP for unsupported OD_MCLK on smu_v13_0_6 When SET_UCLK_MAX capability is absent, return -EOPNOTSUPP from smu_v13_0_6_emit_clk_levels() for OD_MCLK instead of 0. This makes unsupported OD_MCLK reporting consistent with other clock types and allows callers to skip the entry cleanly. Signed-off-by: Asad Kamal Reviewed-by: Lijo Lazar Signed-off-by: Alex Deucher (cherry picked from commit d82e0a72d9189e8acd353988e1a57f85ce479e37) Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c index b44a85697a30..870bcc86fd79 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c @@ -1391,7 +1391,7 @@ static int smu_v13_0_6_emit_clk_levels(struct smu_context *smu, break; case SMU_OD_MCLK: if (!smu_v13_0_6_cap_supported(smu, SMU_CAP(SET_UCLK_MAX))) - return 0; + return -EOPNOTSUPP; size += sysfs_emit_at(buf, size, "%s:\n", "OD_MCLK"); size += sysfs_emit_at(buf, size, "0: %uMhz\n1: %uMhz\n", From 3e6dd28a11083e83e11a284d99fcc9eb748c321c Mon Sep 17 00:00:00 2001 From: Yang Wang Date: Thu, 19 Mar 2026 21:17:38 -0400 Subject: [PATCH 132/401] drm/amd/pm: disable OD_FAN_CURVE if temp or pwm range invalid for smu v13 Forcibly disable the OD_FAN_CURVE feature when temperature or PWM range is invalid, otherwise PMFW will reject this configuration on smu v13.0.x example: $ sudo cat /sys/bus/pci/devices//gpu_od/fan_ctrl/fan_curve OD_FAN_CURVE: 0: 0C 0% 1: 0C 0% 2: 0C 0% 3: 0C 0% 4: 0C 0% OD_RANGE: FAN_CURVE(hotspot temp): 0C 0C FAN_CURVE(fan speed): 0% 0% $ echo "0 50 40" | sudo tee fan_curve kernel log: [ 756.442527] amdgpu 0000:03:00.0: amdgpu: Fan curve temp setting(50) must be within [0, 0]! [ 777.345800] amdgpu 0000:03:00.0: amdgpu: Fan curve temp setting(50) must be within [0, 0]! Closes: https://github.com/ROCm/amdgpu/issues/208 Signed-off-by: Yang Wang Acked-by: Alex Deucher Signed-off-by: Alex Deucher (cherry picked from commit 470891606c5a97b1d0d937e0aa67a3bed9fcb056) Cc: stable@vger.kernel.org --- .../drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c | 33 ++++++++++++++++++- .../drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c | 33 ++++++++++++++++++- 2 files changed, 64 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c index a8d63d4d1f6e..554f616328c3 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c @@ -59,6 +59,10 @@ #define to_amdgpu_device(x) (container_of(x, struct amdgpu_device, pm.smu_i2c)) +static void smu_v13_0_0_get_od_setting_limits(struct smu_context *smu, + int od_feature_bit, + int32_t *min, int32_t *max); + static const struct smu_feature_bits smu_v13_0_0_dpm_features = { .bits = { SMU_FEATURE_BIT_INIT(FEATURE_DPM_GFXCLK_BIT), @@ -1043,8 +1047,35 @@ static bool smu_v13_0_0_is_od_feature_supported(struct smu_context *smu, PPTable_t *pptable = smu->smu_table.driver_pptable; const OverDriveLimits_t * const overdrive_upperlimits = &pptable->SkuTable.OverDriveLimitsBasicMax; + int32_t min_value, max_value; + bool feature_enabled; - return overdrive_upperlimits->FeatureCtrlMask & (1U << od_feature_bit); + switch (od_feature_bit) { + case PP_OD_FEATURE_FAN_CURVE_BIT: + feature_enabled = !!(overdrive_upperlimits->FeatureCtrlMask & (1U << od_feature_bit)); + if (feature_enabled) { + smu_v13_0_0_get_od_setting_limits(smu, PP_OD_FEATURE_FAN_CURVE_TEMP, + &min_value, &max_value); + if (!min_value && !max_value) { + feature_enabled = false; + goto out; + } + + smu_v13_0_0_get_od_setting_limits(smu, PP_OD_FEATURE_FAN_CURVE_PWM, + &min_value, &max_value); + if (!min_value && !max_value) { + feature_enabled = false; + goto out; + } + } + break; + default: + feature_enabled = !!(overdrive_upperlimits->FeatureCtrlMask & (1U << od_feature_bit)); + break; + } + +out: + return feature_enabled; } static void smu_v13_0_0_get_od_setting_limits(struct smu_context *smu, diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c index 5500a0f12f0e..f331e87858c9 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c @@ -59,6 +59,10 @@ #define to_amdgpu_device(x) (container_of(x, struct amdgpu_device, pm.smu_i2c)) +static void smu_v13_0_7_get_od_setting_limits(struct smu_context *smu, + int od_feature_bit, + int32_t *min, int32_t *max); + static const struct smu_feature_bits smu_v13_0_7_dpm_features = { .bits = { SMU_FEATURE_BIT_INIT(FEATURE_DPM_GFXCLK_BIT), @@ -1053,8 +1057,35 @@ static bool smu_v13_0_7_is_od_feature_supported(struct smu_context *smu, PPTable_t *pptable = smu->smu_table.driver_pptable; const OverDriveLimits_t * const overdrive_upperlimits = &pptable->SkuTable.OverDriveLimitsBasicMax; + int32_t min_value, max_value; + bool feature_enabled; - return overdrive_upperlimits->FeatureCtrlMask & (1U << od_feature_bit); + switch (od_feature_bit) { + case PP_OD_FEATURE_FAN_CURVE_BIT: + feature_enabled = !!(overdrive_upperlimits->FeatureCtrlMask & (1U << od_feature_bit)); + if (feature_enabled) { + smu_v13_0_7_get_od_setting_limits(smu, PP_OD_FEATURE_FAN_CURVE_TEMP, + &min_value, &max_value); + if (!min_value && !max_value) { + feature_enabled = false; + goto out; + } + + smu_v13_0_7_get_od_setting_limits(smu, PP_OD_FEATURE_FAN_CURVE_PWM, + &min_value, &max_value); + if (!min_value && !max_value) { + feature_enabled = false; + goto out; + } + } + break; + default: + feature_enabled = !!(overdrive_upperlimits->FeatureCtrlMask & (1U << od_feature_bit)); + break; + } + +out: + return feature_enabled; } static void smu_v13_0_7_get_od_setting_limits(struct smu_context *smu, From b52fe51f724385b3ed81e37e510a4a33107e8161 Mon Sep 17 00:00:00 2001 From: Mark Harmstone Date: Tue, 17 Feb 2026 17:35:42 +0000 Subject: [PATCH 133/401] btrfs: fix super block offset in error message in btrfs_validate_super() Fix the superblock offset mismatch error message in btrfs_validate_super(): we changed it so that it considers all the superblocks, but the message still assumes we're only looking at the first one. The change from %u to %llu is because we're changing from a constant to a u64. Fixes: 069ec957c35e ("btrfs: Refactor btrfs_check_super_valid") Reviewed-by: Qu Wenruo Signed-off-by: Mark Harmstone Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/disk-io.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index b1b53d713ee9..3524976ccc1d 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -2531,8 +2531,8 @@ int btrfs_validate_super(const struct btrfs_fs_info *fs_info, if (mirror_num >= 0 && btrfs_super_bytenr(sb) != btrfs_sb_offset(mirror_num)) { - btrfs_err(fs_info, "super offset mismatch %llu != %u", - btrfs_super_bytenr(sb), BTRFS_SUPER_INFO_OFFSET); + btrfs_err(fs_info, "super offset mismatch %llu != %llu", + btrfs_super_bytenr(sb), btrfs_sb_offset(mirror_num)); ret = -EINVAL; } From 5254d4181add9dfaa5e3519edd71cc8f752b2f85 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Tue, 17 Feb 2026 14:46:50 +0000 Subject: [PATCH 134/401] btrfs: fix zero size inode with non-zero size after log replay When logging that an inode exists, as part of logging a new name or logging new dir entries for a directory, we always set the generation of the logged inode item to 0. This is to signal during log replay (in overwrite_item()), that we should not set the i_size since we only logged that an inode exists, so the i_size of the inode in the subvolume tree must be preserved (as when we log new names or that an inode exists, we don't log extents). This works fine except when we have already logged an inode in full mode or it's the first time we are logging an inode created in a past transaction, that inode has a new i_size of 0 and then we log a new name for the inode (due to a new hardlink or a rename), in which case we log an i_size of 0 for the inode and a generation of 0, which causes the log replay code to not update the inode's i_size to 0 (in overwrite_item()). An example scenario: mkdir /mnt/dir xfs_io -f -c "pwrite 0 64K" /mnt/dir/foo sync xfs_io -c "truncate 0" -c "fsync" /mnt/dir/foo ln /mnt/dir/foo /mnt/dir/bar xfs_io -c "fsync" /mnt/dir After log replay the file remains with a size of 64K. This is because when we first log the inode, when we fsync file foo, we log its current i_size of 0, and then when we create a hard link we log again the inode in exists mode (LOG_INODE_EXISTS) but we set a generation of 0 for the inode item we add to the log tree, so during log replay overwrite_item() sees that the generation is 0 and i_size is 0 so we skip updating the inode's i_size from 64K to 0. Fix this by making sure at fill_inode_item() we always log the real generation of the inode if it was logged in the current transaction with the i_size we logged before. Also if an inode created in a previous transaction is logged in exists mode only, make sure we log the i_size stored in the inode item located from the commit root, so that if we log multiple times that the inode exists we get the correct i_size. A test case for fstests will follow soon. Reported-by: Vyacheslav Kovalevsky Link: https://lore.kernel.org/linux-btrfs/af8c15fa-4e41-4bb2-885c-0bc4e97532a6@gmail.com/ Signed-off-by: Filipe Manana Signed-off-by: David Sterba --- fs/btrfs/tree-log.c | 98 ++++++++++++++++++++++++++++++--------------- 1 file changed, 65 insertions(+), 33 deletions(-) diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 9ff3933bc382..fce1b16a882b 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -4616,21 +4616,32 @@ static void fill_inode_item(struct btrfs_trans_handle *trans, struct inode *inode, bool log_inode_only, u64 logged_isize) { + u64 gen = BTRFS_I(inode)->generation; u64 flags; if (log_inode_only) { - /* set the generation to zero so the recover code - * can tell the difference between an logging - * just to say 'this inode exists' and a logging - * to say 'update this inode with these values' + /* + * Set the generation to zero so the recover code can tell the + * difference between a logging just to say 'this inode exists' + * and a logging to say 'update this inode with these values'. + * But only if the inode was not already logged before. + * We access ->logged_trans directly since it was already set + * up in the call chain by btrfs_log_inode(), and data_race() + * to avoid false alerts from KCSAN and since it was set already + * and one can set it to 0 since that only happens on eviction + * and we are holding a ref on the inode. */ - btrfs_set_inode_generation(leaf, item, 0); + ASSERT(data_race(BTRFS_I(inode)->logged_trans) > 0); + if (data_race(BTRFS_I(inode)->logged_trans) < trans->transid) + gen = 0; + btrfs_set_inode_size(leaf, item, logged_isize); } else { - btrfs_set_inode_generation(leaf, item, BTRFS_I(inode)->generation); btrfs_set_inode_size(leaf, item, inode->i_size); } + btrfs_set_inode_generation(leaf, item, gen); + btrfs_set_inode_uid(leaf, item, i_uid_read(inode)); btrfs_set_inode_gid(leaf, item, i_gid_read(inode)); btrfs_set_inode_mode(leaf, item, inode->i_mode); @@ -5448,42 +5459,63 @@ static int btrfs_log_changed_extents(struct btrfs_trans_handle *trans, return 0; } -static int logged_inode_size(struct btrfs_root *log, struct btrfs_inode *inode, - struct btrfs_path *path, u64 *size_ret) +static int get_inode_size_to_log(struct btrfs_trans_handle *trans, + struct btrfs_inode *inode, + struct btrfs_path *path, u64 *size_ret) { struct btrfs_key key; + struct btrfs_inode_item *item; int ret; key.objectid = btrfs_ino(inode); key.type = BTRFS_INODE_ITEM_KEY; key.offset = 0; - ret = btrfs_search_slot(NULL, log, &key, path, 0, 0); - if (ret < 0) { - return ret; - } else if (ret > 0) { - *size_ret = 0; - } else { - struct btrfs_inode_item *item; + /* + * Our caller called inode_logged(), so logged_trans is up to date. + * Use data_race() to silence any warning from KCSAN. Once logged_trans + * is set, it can only be reset to 0 after inode eviction. + */ + if (data_race(inode->logged_trans) == trans->transid) { + ret = btrfs_search_slot(NULL, inode->root->log_root, &key, path, 0, 0); + } else if (inode->generation < trans->transid) { + path->search_commit_root = true; + path->skip_locking = true; + ret = btrfs_search_slot(NULL, inode->root, &key, path, 0, 0); + path->search_commit_root = false; + path->skip_locking = false; - item = btrfs_item_ptr(path->nodes[0], path->slots[0], - struct btrfs_inode_item); - *size_ret = btrfs_inode_size(path->nodes[0], item); - /* - * If the in-memory inode's i_size is smaller then the inode - * size stored in the btree, return the inode's i_size, so - * that we get a correct inode size after replaying the log - * when before a power failure we had a shrinking truncate - * followed by addition of a new name (rename / new hard link). - * Otherwise return the inode size from the btree, to avoid - * data loss when replaying a log due to previously doing a - * write that expands the inode's size and logging a new name - * immediately after. - */ - if (*size_ret > inode->vfs_inode.i_size) - *size_ret = inode->vfs_inode.i_size; + } else { + *size_ret = 0; + return 0; } + /* + * If the inode was logged before or is from a past transaction, then + * its inode item must exist in the log root or in the commit root. + */ + ASSERT(ret <= 0); + if (WARN_ON_ONCE(ret > 0)) + ret = -ENOENT; + + if (ret < 0) + return ret; + + item = btrfs_item_ptr(path->nodes[0], path->slots[0], + struct btrfs_inode_item); + *size_ret = btrfs_inode_size(path->nodes[0], item); + /* + * If the in-memory inode's i_size is smaller then the inode size stored + * in the btree, return the inode's i_size, so that we get a correct + * inode size after replaying the log when before a power failure we had + * a shrinking truncate followed by addition of a new name (rename / new + * hard link). Otherwise return the inode size from the btree, to avoid + * data loss when replaying a log due to previously doing a write that + * expands the inode's size and logging a new name immediately after. + */ + if (*size_ret > inode->vfs_inode.i_size) + *size_ret = inode->vfs_inode.i_size; + btrfs_release_path(path); return 0; } @@ -6996,7 +7028,7 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans, ret = drop_inode_items(trans, log, path, inode, BTRFS_XATTR_ITEM_KEY); } else { - if (inode_only == LOG_INODE_EXISTS && ctx->logged_before) { + if (inode_only == LOG_INODE_EXISTS) { /* * Make sure the new inode item we write to the log has * the same isize as the current one (if it exists). @@ -7010,7 +7042,7 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans, * (zeroes), as if an expanding truncate happened, * instead of getting a file of 4Kb only. */ - ret = logged_inode_size(log, inode, path, &logged_isize); + ret = get_inode_size_to_log(trans, inode, path, &logged_isize); if (ret) goto out_unlock; } From a4376d9a5d4c9610e69def3fc0b32c86a7ab7a41 Mon Sep 17 00:00:00 2001 From: Shin'ichiro Kawasaki Date: Sun, 1 Mar 2026 21:17:04 +0900 Subject: [PATCH 135/401] btrfs: fix leak of kobject name for sub-group space_info When create_space_info_sub_group() allocates elements of space_info->sub_group[], kobject_init_and_add() is called for each element via btrfs_sysfs_add_space_info_type(). However, when check_removing_space_info() frees these elements, it does not call btrfs_sysfs_remove_space_info() on them. As a result, kobject_put() is not called and the associated kobj->name objects are leaked. This memory leak is reproduced by running the blktests test case zbd/009 on kernels built with CONFIG_DEBUG_KMEMLEAK. The kmemleak feature reports the following error: unreferenced object 0xffff888112877d40 (size 16): comm "mount", pid 1244, jiffies 4294996972 hex dump (first 16 bytes): 64 61 74 61 2d 72 65 6c 6f 63 00 c4 c6 a7 cb 7f data-reloc...... backtrace (crc 53ffde4d): __kmalloc_node_track_caller_noprof+0x619/0x870 kstrdup+0x42/0xc0 kobject_set_name_vargs+0x44/0x110 kobject_init_and_add+0xcf/0x150 btrfs_sysfs_add_space_info_type+0xfc/0x210 [btrfs] create_space_info_sub_group.constprop.0+0xfb/0x1b0 [btrfs] create_space_info+0x211/0x320 [btrfs] btrfs_init_space_info+0x15a/0x1b0 [btrfs] open_ctree+0x33c7/0x4a50 [btrfs] btrfs_get_tree.cold+0x9f/0x1ee [btrfs] vfs_get_tree+0x87/0x2f0 vfs_cmd_create+0xbd/0x280 __do_sys_fsconfig+0x3df/0x990 do_syscall_64+0x136/0x1540 entry_SYSCALL_64_after_hwframe+0x76/0x7e To avoid the leak, call btrfs_sysfs_remove_space_info() instead of kfree() for the elements. Fixes: f92ee31e031c ("btrfs: introduce btrfs_space_info sub-group") Link: https://lore.kernel.org/linux-block/b9488881-f18d-4f47-91a5-3c9bf63955a5@wdc.com/ Reviewed-by: Johannes Thumshirn Signed-off-by: Shin'ichiro Kawasaki Signed-off-by: David Sterba --- fs/btrfs/block-group.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/btrfs/block-group.c b/fs/btrfs/block-group.c index fa55d868ecd8..f7fcff7dca8f 100644 --- a/fs/btrfs/block-group.c +++ b/fs/btrfs/block-group.c @@ -4584,7 +4584,7 @@ static void check_removing_space_info(struct btrfs_space_info *space_info) for (int i = 0; i < BTRFS_SPACE_INFO_SUB_GROUP_MAX; i++) { if (space_info->sub_group[i]) { check_removing_space_info(space_info->sub_group[i]); - kfree(space_info->sub_group[i]); + btrfs_sysfs_remove_space_info(space_info->sub_group[i]); space_info->sub_group[i] = NULL; } } From 0dcabcb920a5c143c568f37c26c6f2b4b9206bd1 Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Fri, 13 Mar 2026 18:35:26 +1030 Subject: [PATCH 136/401] btrfs: zlib: handle page aligned compressed size correctly [BUG] Since commit 3d74a7556fba ("btrfs: zlib: introduce zlib_compress_bio() helper"), there are some reports about different crashes in zlib compression path. One of the symptoms is list corruption like the following: list_del corruption. next->prev should be fffffbb340204a08, but was ffff8d6517cb7de0. (next=fffffbb3402d62c8) ------------[ cut here ]------------ kernel BUG at lib/list_debug.c:65! Oops: invalid opcode: 0000 [#1] SMP NOPTI CPU: 1 UID: 0 PID: 21436 Comm: kworker/u16:7 Not tainted 7.0.0-rc2-jcg+ #1 PREEMPT Hardware name: LENOVO 10VGS02P00/3130, BIOS M1XKT57A 02/10/2022 Workqueue: btrfs-delalloc btrfs_work_helper [btrfs] RIP: 0010:__list_del_entry_valid_or_report+0xec/0xf0 Call Trace: btrfs_alloc_compr_folio+0xae/0xc0 [btrfs] zlib_compress_bio+0x39d/0x6a0 [btrfs] btrfs_compress_bio+0x2e3/0x3d0 [btrfs] compress_file_range+0x2b0/0x660 [btrfs] btrfs_work_helper+0xdb/0x3e0 [btrfs] process_one_work+0x192/0x3d0 worker_thread+0x19a/0x310 kthread+0xdf/0x120 ret_from_fork+0x22e/0x310 ret_from_fork_asm+0x1a/0x30 ---[ end trace 0000000000000000 ]--- Other symptoms include VM_BUG_ON() during folio_put() but it's rarer. David Sterba firstly reported this during his CI runs but unfortunately I'm unable to hit it. Meanwhile zstd/lzo doesn't seem to have the same problem. [CAUSE] During zlib_compress_bio() every time the output buffer is full, we queue the full folio into the compressed bio, and allocate a new folio as the output folio. After the input has finished, we loop through zlib_deflate() with Z_FINISH to flush all output. And when that is done, we still need to check if the last folio has any content, and if so we still need to queue that part into the compressed bio. The problem is in the final folio handling, if the final folio is full (for x86_64 the folio size is 4K), the length to queue is calculated by u32 cur_len = offset_in_folio(out_folio, workspace->strm.total_out); But since total_out is 4K aligned, the resulted @cur_len will be 0, then we hit the bio_add_folio(), which has a quirk that if bio_add_folio() got an length 0, it will still queue the folio into the bio, but return false. In that case we go to out: tag, which calls btrfs_free_compr_folio() to release @out_folio, which may put the out folio into the btrfs global pool list. On the other hand, that @out_folio is already added to the compressed bio, and will later be released again by cleanup_compressed_bio(), which results double release. And if this time we still need to put the folio into the btrfs global pool list, it will result a list corruption because it's already in the list. [FIX] Instead of offset_inside_folio(), directly use the difference between strm.total_out and bi_size. So that if the last folio is completely full, we can still properly queue the full folio other than queueing zero byte. Fixes: 3d74a7556fba ("btrfs: zlib: introduce zlib_compress_bio() helper") Reported-by: David Sterba Reported-by: Jean-Christophe Guillain Reported-by: syzbot+3c4d8371d65230f852a2@syzkaller.appspotmail.com Link: https://bugzilla.kernel.org/show_bug.cgi?id=221176 Signed-off-by: Qu Wenruo Signed-off-by: David Sterba --- fs/btrfs/zlib.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/zlib.c b/fs/btrfs/zlib.c index 0a8fcee16428..27fc2b828002 100644 --- a/fs/btrfs/zlib.c +++ b/fs/btrfs/zlib.c @@ -308,7 +308,9 @@ int zlib_compress_bio(struct list_head *ws, struct compressed_bio *cb) } /* Queue the remaining part of the folio. */ if (workspace->strm.total_out > bio->bi_iter.bi_size) { - u32 cur_len = offset_in_folio(out_folio, workspace->strm.total_out); + const u32 cur_len = workspace->strm.total_out - bio->bi_iter.bi_size; + + ASSERT(cur_len <= folio_size(out_folio)); if (!bio_add_folio(bio, out_folio, cur_len, 0)) { ret = -E2BIG; From a85b46db143fda5869e7d8df8f258ccef5fa1719 Mon Sep 17 00:00:00 2001 From: Goldwyn Rodrigues Date: Fri, 13 Mar 2026 14:11:39 -0400 Subject: [PATCH 137/401] btrfs: tracepoints: get correct superblock from dentry in event btrfs_sync_file() If overlay is used on top of btrfs, dentry->d_sb translates to overlay's super block and fsid assignment will lead to a crash. Use file_inode(file)->i_sb to always get btrfs_sb. Reviewed-by: Boris Burkov Signed-off-by: Goldwyn Rodrigues Signed-off-by: David Sterba --- include/trace/events/btrfs.h | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/include/trace/events/btrfs.h b/include/trace/events/btrfs.h index 125bdc166bfe..0864700f76e0 100644 --- a/include/trace/events/btrfs.h +++ b/include/trace/events/btrfs.h @@ -769,12 +769,15 @@ TRACE_EVENT(btrfs_sync_file, ), TP_fast_assign( - const struct dentry *dentry = file->f_path.dentry; - const struct inode *inode = d_inode(dentry); + struct dentry *dentry = file_dentry(file); + struct inode *inode = file_inode(file); + struct dentry *parent = dget_parent(dentry); + struct inode *parent_inode = d_inode(parent); - TP_fast_assign_fsid(btrfs_sb(file->f_path.dentry->d_sb)); + dput(parent); + TP_fast_assign_fsid(btrfs_sb(inode->i_sb)); __entry->ino = btrfs_ino(BTRFS_I(inode)); - __entry->parent = btrfs_ino(BTRFS_I(d_inode(dentry->d_parent))); + __entry->parent = btrfs_ino(BTRFS_I(parent_inode)); __entry->datasync = datasync; __entry->root_objectid = btrfs_root_id(BTRFS_I(inode)->root); ), From 1c37d896b12dfd0d4c96e310b0033c6676933917 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Wed, 18 Mar 2026 16:17:59 +0000 Subject: [PATCH 138/401] btrfs: fix lost error when running device stats on multiple devices fs Whenever we get an error updating the device stats item for a device in btrfs_run_dev_stats() we allow the loop to go to the next device, and if updating the stats item for the next device succeeds, we end up losing the error we had from the previous device. Fix this by breaking out of the loop once we get an error and make sure it's returned to the caller. Since we are in the transaction commit path (and in the critical section actually), returning the error will result in a transaction abort. Fixes: 733f4fbbc108 ("Btrfs: read device stats on mount, write modified ones during commit") Signed-off-by: Filipe Manana Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/volumes.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 8fbd736aad9f..117e13d245f6 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -8099,8 +8099,9 @@ int btrfs_run_dev_stats(struct btrfs_trans_handle *trans) smp_rmb(); ret = update_dev_stat_item(trans, device); - if (!ret) - atomic_sub(stats_cnt, &device->dev_stats_ccnt); + if (ret) + break; + atomic_sub(stats_cnt, &device->dev_stats_ccnt); } mutex_unlock(&fs_devices->device_list_mutex); From 2d8c5098b847f37dde8351fb5b5d190f1bb5c576 Mon Sep 17 00:00:00 2001 From: Chen-Yu Tsai Date: Thu, 26 Feb 2026 17:22:33 +0800 Subject: [PATCH 139/401] PCI/pwrctrl: Do not power off on pwrctrl device removal With the move to explicit pwrctrl power on/off APIs, the caller, i.e., the PCI controller driver, should manage the power state. The pwrctrl drivers should not try to clean up or power off when they are removed, as this might end up disabling an already disabled regulator, causing a big warning. This can be triggered if a PCI controller driver's .remove() callback calls pci_pwrctrl_destroy_devices() after pci_pwrctrl_power_off_devices(). Drop the devm cleanup parts that turn off regulators from the pwrctrl drivers. Fixes: b921aa3f8dec ("PCI/pwrctrl: Switch to pwrctrl create, power on/off, destroy APIs") Signed-off-by: Chen-Yu Tsai Signed-off-by: Bjorn Helgaas Reviewed-by: Bartosz Golaszewski Reviewed-by: Manivannan Sadhasivam Link: https://patch.msgid.link/20260226092234.3859740-1-wenst@chromium.org --- drivers/pci/pwrctrl/pci-pwrctrl-pwrseq.c | 12 ------------ drivers/pci/pwrctrl/slot.c | 1 - 2 files changed, 13 deletions(-) diff --git a/drivers/pci/pwrctrl/pci-pwrctrl-pwrseq.c b/drivers/pci/pwrctrl/pci-pwrctrl-pwrseq.c index 0d0377283c37..c7e4beec160a 100644 --- a/drivers/pci/pwrctrl/pci-pwrctrl-pwrseq.c +++ b/drivers/pci/pwrctrl/pci-pwrctrl-pwrseq.c @@ -68,13 +68,6 @@ static int pwrseq_pwrctrl_power_off(struct pci_pwrctrl *pwrctrl) return pwrseq_power_off(pwrseq->pwrseq); } -static void devm_pwrseq_pwrctrl_power_off(void *data) -{ - struct pwrseq_pwrctrl *pwrseq = data; - - pwrseq_pwrctrl_power_off(&pwrseq->pwrctrl); -} - static int pwrseq_pwrctrl_probe(struct platform_device *pdev) { const struct pwrseq_pwrctrl_pdata *pdata; @@ -101,11 +94,6 @@ static int pwrseq_pwrctrl_probe(struct platform_device *pdev) return dev_err_probe(dev, PTR_ERR(pwrseq->pwrseq), "Failed to get the power sequencer\n"); - ret = devm_add_action_or_reset(dev, devm_pwrseq_pwrctrl_power_off, - pwrseq); - if (ret) - return ret; - pwrseq->pwrctrl.power_on = pwrseq_pwrctrl_power_on; pwrseq->pwrctrl.power_off = pwrseq_pwrctrl_power_off; diff --git a/drivers/pci/pwrctrl/slot.c b/drivers/pci/pwrctrl/slot.c index 082af81efe25..b87639253ae2 100644 --- a/drivers/pci/pwrctrl/slot.c +++ b/drivers/pci/pwrctrl/slot.c @@ -63,7 +63,6 @@ static void devm_slot_pwrctrl_release(void *data) { struct slot_pwrctrl *slot = data; - slot_pwrctrl_power_off(&slot->pwrctrl); regulator_bulk_free(slot->num_supplies, slot->supplies); } From 70bb843794d150db8e653c9ab288c8533da00837 Mon Sep 17 00:00:00 2001 From: Felix Gu Date: Mon, 23 Mar 2026 19:05:22 +0800 Subject: [PATCH 140/401] PCI/pwrctrl: Fix pci_pwrctrl_is_required() device node leak The for_each_endpoint_of_node() macro requires calling of_node_put() on the endpoint node when breaking out of the loop early. Add of_node_put(endpoint) before the early return to release the reference. Fixes: cf3287fb2c1f ("PCI/pwrctrl: Ensure that remote endpoint node parent has supply requirement") Signed-off-by: Felix Gu Signed-off-by: Bjorn Helgaas Reviewed-by: Bartosz Golaszewski Reviewed-by: Manivannan Sadhasivam Link: https://patch.msgid.link/20260323-pwctrl-v1-1-f5c03a2df7fb@gmail.com --- drivers/pci/pwrctrl/core.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/pci/pwrctrl/core.c b/drivers/pci/pwrctrl/core.c index 7754baed67f2..97cff5b8ca88 100644 --- a/drivers/pci/pwrctrl/core.c +++ b/drivers/pci/pwrctrl/core.c @@ -299,8 +299,10 @@ static bool pci_pwrctrl_is_required(struct device_node *np) struct device_node *remote __free(device_node) = of_graph_get_remote_port_parent(endpoint); if (remote) { - if (of_pci_supply_present(remote)) + if (of_pci_supply_present(remote)) { + of_node_put(endpoint); return true; + } } } } From 05f643d6f7e699198ccc47e634de3879a8ec26a3 Mon Sep 17 00:00:00 2001 From: Lukas Wunner Date: Mon, 23 Mar 2026 07:52:39 +0100 Subject: [PATCH 141/401] Documentation: PCI: Document PCIe TLP Header decoder for AER messages The prefix/header of a TLP that caused an error may be recorded in the AER Capability and emitted to the kernel log in raw hex format. Document the existence and usage of tlp-tool, which decodes the TLP Header into human-readable form. The TLP Header hints at the root cause of an error, yet is often ignored because of its seeming opaqueness. Instead, PCIe errors are frequently worked around by a change in the kernel without fully understanding the actual source of the problem. With more documentation on available tools we'll hopefully come up with better solutions. There are also wireshark dissectors for TLPs, but it seems they expect a complete TLP, not just the header, and they cannot grok the hex format emitted by the kernel directly. tlp-tool appears to be the most cut and dried solution out there. Signed-off-by: Lukas Wunner Signed-off-by: Bjorn Helgaas Reviewed-by: Mika Westerberg Cc: Maciej Grochowski Link: https://patch.msgid.link/bf826c41b4c1d255c7dcb16e266b52f774d944ed.1774246067.git.lukas@wunner.de --- Documentation/PCI/pcieaer-howto.rst | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/Documentation/PCI/pcieaer-howto.rst b/Documentation/PCI/pcieaer-howto.rst index 3210c4792978..90fdfddd3ae5 100644 --- a/Documentation/PCI/pcieaer-howto.rst +++ b/Documentation/PCI/pcieaer-howto.rst @@ -85,6 +85,16 @@ In the example, 'Requester ID' means the ID of the device that sent the error message to the Root Port. Please refer to PCIe specs for other fields. +The 'TLP Header' is the prefix/header of the TLP that caused the error +in raw hex format. To decode the TLP Header into human-readable form +one may use tlp-tool: + +https://github.com/mmpg-x86/tlp-tool + +Example usage:: + + curl -L https://git.kernel.org/linus/2ca1c94ce0b6 | rtlp-tool --aer + AER Ratelimits -------------- From 77fcf58df15edcf3f5b5421f24814fb72796def9 Mon Sep 17 00:00:00 2001 From: Imre Deak Date: Fri, 20 Mar 2026 11:29:00 +0200 Subject: [PATCH 142/401] drm/i915/dp_tunnel: Fix error handling when clearing stream BW in atomic state MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Clearing the DP tunnel stream BW in the atomic state involves getting the tunnel group state, which can fail. Handle the error accordingly. This fixes at least one issue where drm_dp_tunnel_atomic_set_stream_bw() failed to get the tunnel group state returning -EDEADLK, which wasn't handled. This lead to the ctx->contended warn later in modeset_lock() while taking a WW mutex for another object in the same atomic state, and thus within the same already contended WW context. Moving intel_crtc_state_alloc() later would avoid freeing saved_state on the error path; this stable patch leaves that simplification for a follow-up. Cc: Uma Shankar Cc: Ville Syrjälä Cc: # v6.9+ Fixes: a4efae87ecb2 ("drm/i915/dp: Compute DP tunnel BW during encoder state computation") Closes: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/7617 Reviewed-by: Michał Grzelak Reviewed-by: Uma Shankar Signed-off-by: Imre Deak Link: https://patch.msgid.link/20260320092900.13210-1-imre.deak@intel.com (cherry picked from commit fb69d0076e687421188bc8103ab0e8e5825b1df1) Signed-off-by: Joonas Lahtinen --- drivers/gpu/drm/i915/display/intel_display.c | 8 +++++++- .../gpu/drm/i915/display/intel_dp_tunnel.c | 20 +++++++++++++------ .../gpu/drm/i915/display/intel_dp_tunnel.h | 11 ++++++---- 3 files changed, 28 insertions(+), 11 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c index c4246481fc2f..0f82bf771a92 100644 --- a/drivers/gpu/drm/i915/display/intel_display.c +++ b/drivers/gpu/drm/i915/display/intel_display.c @@ -4602,6 +4602,7 @@ intel_crtc_prepare_cleared_state(struct intel_atomic_state *state, struct intel_crtc_state *crtc_state = intel_atomic_get_new_crtc_state(state, crtc); struct intel_crtc_state *saved_state; + int err; saved_state = intel_crtc_state_alloc(crtc); if (!saved_state) @@ -4610,7 +4611,12 @@ intel_crtc_prepare_cleared_state(struct intel_atomic_state *state, /* free the old crtc_state->hw members */ intel_crtc_free_hw_state(crtc_state); - intel_dp_tunnel_atomic_clear_stream_bw(state, crtc_state); + err = intel_dp_tunnel_atomic_clear_stream_bw(state, crtc_state); + if (err) { + kfree(saved_state); + + return err; + } /* FIXME: before the switch to atomic started, a new pipe_config was * kzalloc'd. Code that depends on any field being zero should be diff --git a/drivers/gpu/drm/i915/display/intel_dp_tunnel.c b/drivers/gpu/drm/i915/display/intel_dp_tunnel.c index 83865c02d477..55b423fd6b6f 100644 --- a/drivers/gpu/drm/i915/display/intel_dp_tunnel.c +++ b/drivers/gpu/drm/i915/display/intel_dp_tunnel.c @@ -621,19 +621,27 @@ int intel_dp_tunnel_atomic_compute_stream_bw(struct intel_atomic_state *state, * * Clear any DP tunnel stream BW requirement set by * intel_dp_tunnel_atomic_compute_stream_bw(). + * + * Returns 0 in case of success, a negative error code otherwise. */ -void intel_dp_tunnel_atomic_clear_stream_bw(struct intel_atomic_state *state, - struct intel_crtc_state *crtc_state) +int intel_dp_tunnel_atomic_clear_stream_bw(struct intel_atomic_state *state, + struct intel_crtc_state *crtc_state) { struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); + int err; if (!crtc_state->dp_tunnel_ref.tunnel) - return; + return 0; + + err = drm_dp_tunnel_atomic_set_stream_bw(&state->base, + crtc_state->dp_tunnel_ref.tunnel, + crtc->pipe, 0); + if (err) + return err; - drm_dp_tunnel_atomic_set_stream_bw(&state->base, - crtc_state->dp_tunnel_ref.tunnel, - crtc->pipe, 0); drm_dp_tunnel_ref_put(&crtc_state->dp_tunnel_ref); + + return 0; } /** diff --git a/drivers/gpu/drm/i915/display/intel_dp_tunnel.h b/drivers/gpu/drm/i915/display/intel_dp_tunnel.h index 7f0f720e8dca..10ab9eebcef6 100644 --- a/drivers/gpu/drm/i915/display/intel_dp_tunnel.h +++ b/drivers/gpu/drm/i915/display/intel_dp_tunnel.h @@ -40,8 +40,8 @@ int intel_dp_tunnel_atomic_compute_stream_bw(struct intel_atomic_state *state, struct intel_dp *intel_dp, const struct intel_connector *connector, struct intel_crtc_state *crtc_state); -void intel_dp_tunnel_atomic_clear_stream_bw(struct intel_atomic_state *state, - struct intel_crtc_state *crtc_state); +int intel_dp_tunnel_atomic_clear_stream_bw(struct intel_atomic_state *state, + struct intel_crtc_state *crtc_state); int intel_dp_tunnel_atomic_add_state_for_crtc(struct intel_atomic_state *state, struct intel_crtc *crtc); @@ -88,9 +88,12 @@ intel_dp_tunnel_atomic_compute_stream_bw(struct intel_atomic_state *state, return 0; } -static inline void +static inline int intel_dp_tunnel_atomic_clear_stream_bw(struct intel_atomic_state *state, - struct intel_crtc_state *crtc_state) {} + struct intel_crtc_state *crtc_state) +{ + return 0; +} static inline int intel_dp_tunnel_atomic_add_state_for_crtc(struct intel_atomic_state *state, From f621324dfb3d6719cc9ffe65e8ec6051664ca059 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Mon, 23 Mar 2026 14:00:17 -0700 Subject: [PATCH 143/401] iomap: fix lockdep complaint when reads fail Zorro Lang reported the following lockdep splat: "While running fstests xfs/556 on kernel 7.0.0-rc4+ (HEAD=04a9f1766954), a lockdep warning was triggered indicating an inconsistent lock state for sb->s_type->i_lock_key. "The deadlock might occur because iomap_read_end_io (called from a hardware interrupt completion path) invokes fserror_report, which then calls igrab. igrab attempts to acquire the i_lock spinlock. However, the i_lock is frequently acquired in process context with interrupts enabled. If an interrupt occurs while a process holds the i_lock, and that interrupt handler calls fserror_report, the system deadlocks. "I hit this warning several times by running xfs/556 (mostly) or generic/648 on xfs. More details refer to below console log." along with this dmesg, for which I've cleaned up the stacktraces: run fstests xfs/556 at 2026-03-18 20:05:30 XFS (sda3): Mounting V5 Filesystem 396e9164-c45a-4e05-be9d-b38c2c5c6477 XFS (sda3): Ending clean mount XFS (sda3): Unmounting Filesystem 396e9164-c45a-4e05-be9d-b38c2c5c6477 XFS (sda3): Mounting V5 Filesystem bf3f89c3-3c45-4650-a9c7-744f39c0191e XFS (sda3): Ending clean mount XFS (sda3): Unmounting Filesystem bf3f89c3-3c45-4650-a9c7-744f39c0191e XFS (dm-0): Mounting V5 Filesystem bf3f89c3-3c45-4650-a9c7-744f39c0191e XFS (dm-0): Ending clean mount device-mapper: table: 253:0: adding target device (start sect 209 len 1) caused an alignment inconsistency device-mapper: table: 253:0: adding target device (start sect 210 len 62914350) caused an alignment inconsistency buffer_io_error: 6 callbacks suppressed Buffer I/O error on dev dm-0, logical block 209, async page read Buffer I/O error on dev dm-0, logical block 209, async page read XFS (dm-0): Unmounting Filesystem bf3f89c3-3c45-4650-a9c7-744f39c0191e XFS (dm-0): Mounting V5 Filesystem bf3f89c3-3c45-4650-a9c7-744f39c0191e XFS (dm-0): Ending clean mount ================================ WARNING: inconsistent lock state 7.0.0-rc4+ #1 Tainted: G S W -------------------------------- inconsistent {HARDIRQ-ON-W} -> {IN-HARDIRQ-W} usage. od/2368602 [HC1[1]:SC0[0]:HE0:SE1] takes: ff1100069f2b4a98 (&sb->s_type->i_lock_key#31){?.+.}-{3:3}, at: igrab+0x28/0x1a0 {HARDIRQ-ON-W} state was registered at: __lock_acquire+0x40d/0xbd0 lock_acquire.part.0+0xbd/0x260 _raw_spin_lock+0x37/0x80 unlock_new_inode+0x66/0x2a0 xfs_iget+0x67b/0x7b0 [xfs] xfs_mountfs+0xde4/0x1c80 [xfs] xfs_fs_fill_super+0xe86/0x17a0 [xfs] get_tree_bdev_flags+0x312/0x590 vfs_get_tree+0x8d/0x2f0 vfs_cmd_create+0xb2/0x240 __do_sys_fsconfig+0x3d8/0x9a0 do_syscall_64+0x13a/0x1520 entry_SYSCALL_64_after_hwframe+0x76/0x7e irq event stamp: 3118 hardirqs last enabled at (3117): [] _raw_spin_unlock_irq+0x28/0x50 hardirqs last disabled at (3118): [] common_interrupt+0x19/0xe0 softirqs last enabled at (3040): [] handle_softirqs+0x6b8/0x950 softirqs last disabled at (3023): [] __irq_exit_rcu+0xfd/0x250 other info that might help us debug this: Possible unsafe locking scenario: CPU0 ---- lock(&sb->s_type->i_lock_key#31); lock(&sb->s_type->i_lock_key#31); *** DEADLOCK *** 1 lock held by od/2368602: #0: ff1100069f2b4b58 (&sb->s_type->i_mutex_key#19){++++}-{4:4}, at: xfs_ilock+0x324/0x4b0 [xfs] stack backtrace: CPU: 15 UID: 0 PID: 2368602 Comm: od Kdump: loaded Tainted: G S W 7.0.0-rc4+ #1 PREEMPT(full) Tainted: [S]=CPU_OUT_OF_SPEC, [W]=WARN Hardware name: Dell Inc. PowerEdge R660/0R5JJC, BIOS 2.1.5 03/14/2024 Call Trace: dump_stack_lvl+0x6f/0xb0 print_usage_bug.part.0+0x230/0x2c0 mark_lock_irq+0x3ce/0x5b0 mark_lock+0x1cb/0x3d0 mark_usage+0x109/0x120 __lock_acquire+0x40d/0xbd0 lock_acquire.part.0+0xbd/0x260 _raw_spin_lock+0x37/0x80 igrab+0x28/0x1a0 fserror_report+0x127/0x2d0 iomap_finish_folio_read+0x13c/0x280 iomap_read_end_io+0x10e/0x2c0 clone_endio+0x37e/0x780 [dm_mod] blk_update_request+0x448/0xf00 scsi_end_request+0x74/0x750 scsi_io_completion+0xe9/0x7c0 _scsih_io_done+0x6ba/0x1ca0 [mpt3sas] _base_process_reply_queue+0x249/0x15b0 [mpt3sas] _base_interrupt+0x95/0xe0 [mpt3sas] __handle_irq_event_percpu+0x1f0/0x780 handle_irq_event+0xa9/0x1c0 handle_edge_irq+0x2ef/0x8a0 __common_interrupt+0xa0/0x170 common_interrupt+0xb7/0xe0 asm_common_interrupt+0x26/0x40 RIP: 0010:_raw_spin_unlock_irq+0x2e/0x50 Code: 0f 1f 44 00 00 53 48 8b 74 24 08 48 89 fb 48 83 c7 18 e8 b5 73 5e fd 48 89 df e8 ed e2 5e fd e8 08 78 8f fd fb bf 01 00 00 00 8d 56 4d fd 65 8b 05 46 d5 1d 03 85 c0 74 06 5b c3 cc cc cc cc RSP: 0018:ffa0000027d07538 EFLAGS: 00000206 RAX: 0000000000000c2d RBX: ffffffffb6614bc8 RCX: 0000000000000080 RDX: 0000000000000000 RSI: ffffffffb6306a01 RDI: 0000000000000001 RBP: 0000000000000000 R08: 0000000000000001 R09: 0000000000000000 R10: ffffffffb75efc67 R11: 0000000000000001 R12: ff1100015ada0000 R13: 0000000000000083 R14: 0000000000000002 R15: ffffffffb6614c10 folio_wait_bit_common+0x407/0x780 filemap_update_page+0x8e7/0xbd0 filemap_get_pages+0x904/0xc50 filemap_read+0x320/0xc20 xfs_file_buffered_read+0x2aa/0x380 [xfs] xfs_file_read_iter+0x263/0x4a0 [xfs] vfs_read+0x6cb/0xb70 ksys_read+0xf9/0x1d0 do_syscall_64+0x13a/0x1520 Zorro's diagnosis makes sense, so the solution is to kick the failed read handling to a workqueue much like we added for writeback ioends in commit 294f54f849d846 ("fserror: fix lockdep complaint when igrabbing inode"). Cc: Zorro Lang Link: https://lore.kernel.org/linux-xfs/20260319194303.efw4wcu7c4idhthz@doltdoltdolt/ Fixes: a9d573ee88af98 ("iomap: report file I/O errors to the VFS") Signed-off-by: "Darrick J. Wong" Link: https://patch.msgid.link/20260323210017.GL6223@frogsfrogsfrogs Reviewed-by: Christoph Hellwig Signed-off-by: Christian Brauner --- fs/iomap/bio.c | 51 +++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 50 insertions(+), 1 deletion(-) diff --git a/fs/iomap/bio.c b/fs/iomap/bio.c index fc045f2e4c45..edd908183058 100644 --- a/fs/iomap/bio.c +++ b/fs/iomap/bio.c @@ -8,7 +8,10 @@ #include "internal.h" #include "trace.h" -static void iomap_read_end_io(struct bio *bio) +static DEFINE_SPINLOCK(failed_read_lock); +static struct bio_list failed_read_list = BIO_EMPTY_LIST; + +static void __iomap_read_end_io(struct bio *bio) { int error = blk_status_to_errno(bio->bi_status); struct folio_iter fi; @@ -18,6 +21,52 @@ static void iomap_read_end_io(struct bio *bio) bio_put(bio); } +static void +iomap_fail_reads( + struct work_struct *work) +{ + struct bio *bio; + struct bio_list tmp = BIO_EMPTY_LIST; + unsigned long flags; + + spin_lock_irqsave(&failed_read_lock, flags); + bio_list_merge_init(&tmp, &failed_read_list); + spin_unlock_irqrestore(&failed_read_lock, flags); + + while ((bio = bio_list_pop(&tmp)) != NULL) { + __iomap_read_end_io(bio); + cond_resched(); + } +} + +static DECLARE_WORK(failed_read_work, iomap_fail_reads); + +static void iomap_fail_buffered_read(struct bio *bio) +{ + unsigned long flags; + + /* + * Bounce I/O errors to a workqueue to avoid nested i_lock acquisitions + * in the fserror code. The caller no longer owns the bio reference + * after the spinlock drops. + */ + spin_lock_irqsave(&failed_read_lock, flags); + if (bio_list_empty(&failed_read_list)) + WARN_ON_ONCE(!schedule_work(&failed_read_work)); + bio_list_add(&failed_read_list, bio); + spin_unlock_irqrestore(&failed_read_lock, flags); +} + +static void iomap_read_end_io(struct bio *bio) +{ + if (bio->bi_status) { + iomap_fail_buffered_read(bio); + return; + } + + __iomap_read_end_io(bio); +} + static void iomap_bio_submit_read(struct iomap_read_folio_ctx *ctx) { struct bio *bio = ctx->read_ctx; From e8ab57b56402697a9bef50b71aecc613f0d61846 Mon Sep 17 00:00:00 2001 From: Karol Wachowski Date: Mon, 23 Mar 2026 10:50:29 +0100 Subject: [PATCH 144/401] accel/ivpu: Add disable clock relinquish workaround for NVL-A0 Turn on disable clock relinquish workaround for Nova Lake A0. Without this workaround NPU may not power off correctly after inference, leading to unexpected system behavior. Fixes: 550f4dd2cedd ("accel/ivpu: Add support for Nova Lake's NPU") Cc: # v6.19+ Reviewed-by: Lizhi.hou Signed-off-by: Karol Wachowski Link: https://patch.msgid.link/20260323095029.64613-1-karol.wachowski@linux.intel.com --- drivers/accel/ivpu/ivpu_drv.h | 1 + drivers/accel/ivpu/ivpu_hw.c | 6 ++++-- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/accel/ivpu/ivpu_drv.h b/drivers/accel/ivpu/ivpu_drv.h index 5b34b6f50e69..f1b6155065ff 100644 --- a/drivers/accel/ivpu/ivpu_drv.h +++ b/drivers/accel/ivpu/ivpu_drv.h @@ -35,6 +35,7 @@ #define IVPU_HW_IP_60XX 60 #define IVPU_HW_IP_REV_LNL_B0 4 +#define IVPU_HW_IP_REV_NVL_A0 0 #define IVPU_HW_BTRS_MTL 1 #define IVPU_HW_BTRS_LNL 2 diff --git a/drivers/accel/ivpu/ivpu_hw.c b/drivers/accel/ivpu/ivpu_hw.c index d69cd0d93569..d4a9bcda4100 100644 --- a/drivers/accel/ivpu/ivpu_hw.c +++ b/drivers/accel/ivpu/ivpu_hw.c @@ -70,8 +70,10 @@ static void wa_init(struct ivpu_device *vdev) if (ivpu_hw_btrs_gen(vdev) == IVPU_HW_BTRS_MTL) vdev->wa.interrupt_clear_with_0 = ivpu_hw_btrs_irqs_clear_with_0_mtl(vdev); - if (ivpu_device_id(vdev) == PCI_DEVICE_ID_LNL && - ivpu_revision(vdev) < IVPU_HW_IP_REV_LNL_B0) + if ((ivpu_device_id(vdev) == PCI_DEVICE_ID_LNL && + ivpu_revision(vdev) < IVPU_HW_IP_REV_LNL_B0) || + (ivpu_device_id(vdev) == PCI_DEVICE_ID_NVL && + ivpu_revision(vdev) == IVPU_HW_IP_REV_NVL_A0)) vdev->wa.disable_clock_relinquish = true; if (ivpu_test_mode & IVPU_TEST_MODE_CLK_RELINQ_ENABLE) From ec66ec6a5a8f53e7c70085749e8d68f4431c630f Mon Sep 17 00:00:00 2001 From: Pagadala Yesu Anjaneyulu Date: Tue, 24 Mar 2026 11:33:24 +0200 Subject: [PATCH 145/401] wifi: iwlwifi: mld: Fix MLO scan timing Calculate MLO scan start time based on actual scan start notification from firmware instead of recording time when scan command is sent. Currently, MLO scan start time was captured immediately after sending the scan command to firmware. However, the actual scan start time may differ due to the FW being busy with a previous scan. In that case, the link selection code will think that the MLO scan is too old, and will warn. To fix it, Implement start scan notification handling to capture the precise moment when firmware begins the scan operation. Fixes: 9324731b9985 ("wifi: iwlwifi: mld: avoid selecting bad links") Signed-off-by: Pagadala Yesu Anjaneyulu Signed-off-by: Miri Korenblit Link: https://patch.msgid.link/20260324113316.4c56b8bac533.I6e656d8cc30bb82c96aabadedd62bd67f4c46bf9@changeid --- .../wireless/intel/iwlwifi/fw/api/commands.h | 5 ++++ .../net/wireless/intel/iwlwifi/fw/api/scan.h | 10 +++++++ drivers/net/wireless/intel/iwlwifi/mld/mld.c | 1 + drivers/net/wireless/intel/iwlwifi/mld/mlo.c | 4 +-- .../net/wireless/intel/iwlwifi/mld/notif.c | 5 ++++ drivers/net/wireless/intel/iwlwifi/mld/scan.c | 30 +++++++++++++++++-- drivers/net/wireless/intel/iwlwifi/mld/scan.h | 9 ++++-- 7 files changed, 56 insertions(+), 8 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/fw/api/commands.h b/drivers/net/wireless/intel/iwlwifi/fw/api/commands.h index 8d64a271bb94..36159a769916 100644 --- a/drivers/net/wireless/intel/iwlwifi/fw/api/commands.h +++ b/drivers/net/wireless/intel/iwlwifi/fw/api/commands.h @@ -296,6 +296,11 @@ enum iwl_legacy_cmds { */ SCAN_OFFLOAD_UPDATE_PROFILES_CMD = 0x6E, + /** + * @SCAN_START_NOTIFICATION_UMAC: uses &struct iwl_umac_scan_start + */ + SCAN_START_NOTIFICATION_UMAC = 0xb2, + /** * @MATCH_FOUND_NOTIFICATION: scan match found */ diff --git a/drivers/net/wireless/intel/iwlwifi/fw/api/scan.h b/drivers/net/wireless/intel/iwlwifi/fw/api/scan.h index 60f0a4924ddf..46fcc32608e3 100644 --- a/drivers/net/wireless/intel/iwlwifi/fw/api/scan.h +++ b/drivers/net/wireless/intel/iwlwifi/fw/api/scan.h @@ -1156,6 +1156,16 @@ enum iwl_umac_scan_abort_status { IWL_UMAC_SCAN_ABORT_STATUS_NOT_FOUND, }; +/** + * struct iwl_umac_scan_start - scan start notification + * @uid: scan id, &enum iwl_umac_scan_uid_offsets + * @reserved: for future use + */ +struct iwl_umac_scan_start { + __le32 uid; + __le32 reserved; +} __packed; /* SCAN_START_UMAC_API_S_VER_1 */ + /** * struct iwl_umac_scan_complete - scan complete notification * @uid: scan id, &enum iwl_umac_scan_uid_offsets diff --git a/drivers/net/wireless/intel/iwlwifi/mld/mld.c b/drivers/net/wireless/intel/iwlwifi/mld/mld.c index 495e9d8f3af6..9af79297c3b6 100644 --- a/drivers/net/wireless/intel/iwlwifi/mld/mld.c +++ b/drivers/net/wireless/intel/iwlwifi/mld/mld.c @@ -171,6 +171,7 @@ static const struct iwl_hcmd_names iwl_mld_legacy_names[] = { HCMD_NAME(MISSED_BEACONS_NOTIFICATION), HCMD_NAME(MAC_PM_POWER_TABLE), HCMD_NAME(MFUART_LOAD_NOTIFICATION), + HCMD_NAME(SCAN_START_NOTIFICATION_UMAC), HCMD_NAME(RSS_CONFIG_CMD), HCMD_NAME(SCAN_ITERATION_COMPLETE_UMAC), HCMD_NAME(REPLY_RX_MPDU_CMD), diff --git a/drivers/net/wireless/intel/iwlwifi/mld/mlo.c b/drivers/net/wireless/intel/iwlwifi/mld/mlo.c index f842f5183223..fbff5915f7fd 100644 --- a/drivers/net/wireless/intel/iwlwifi/mld/mlo.c +++ b/drivers/net/wireless/intel/iwlwifi/mld/mlo.c @@ -739,7 +739,7 @@ iwl_mld_set_link_sel_data(struct iwl_mld *mld, /* Ignore any BSS that was not seen in the last MLO scan */ if (ktime_before(link_conf->bss->ts_boottime, - mld->scan.last_mlo_scan_time)) + mld->scan.last_mlo_scan_start_time)) continue; data[n_data].link_id = link_id; @@ -945,7 +945,7 @@ static void _iwl_mld_select_links(struct iwl_mld *mld, if (!mld_vif->authorized || hweight16(usable_links) <= 1) return; - if (WARN(ktime_before(mld->scan.last_mlo_scan_time, + if (WARN(ktime_before(mld->scan.last_mlo_scan_start_time, ktime_sub_ns(ktime_get_boottime_ns(), 5ULL * NSEC_PER_SEC)), "Last MLO scan was too long ago, can't select links\n")) diff --git a/drivers/net/wireless/intel/iwlwifi/mld/notif.c b/drivers/net/wireless/intel/iwlwifi/mld/notif.c index 240526d8b632..9c88a8579a75 100644 --- a/drivers/net/wireless/intel/iwlwifi/mld/notif.c +++ b/drivers/net/wireless/intel/iwlwifi/mld/notif.c @@ -287,6 +287,8 @@ static void iwl_mld_handle_beacon_notification(struct iwl_mld *mld, * at least enough bytes to cover the structure listed in the CMD_VER_ENTRY. */ +CMD_VERSIONS(scan_start_notif, + CMD_VER_ENTRY(1, iwl_umac_scan_start)) CMD_VERSIONS(scan_complete_notif, CMD_VER_ENTRY(1, iwl_umac_scan_complete)) CMD_VERSIONS(scan_iter_complete_notif, @@ -360,6 +362,7 @@ DEFINE_SIMPLE_CANCELLATION(datapath_monitor, iwl_datapath_monitor_notif, link_id) DEFINE_SIMPLE_CANCELLATION(roc, iwl_roc_notif, activity) DEFINE_SIMPLE_CANCELLATION(scan_complete, iwl_umac_scan_complete, uid) +DEFINE_SIMPLE_CANCELLATION(scan_start, iwl_umac_scan_start, uid) DEFINE_SIMPLE_CANCELLATION(probe_resp_data, iwl_probe_resp_data_notif, mac_id) DEFINE_SIMPLE_CANCELLATION(uapsd_misbehaving_ap, iwl_uapsd_misbehaving_ap_notif, @@ -402,6 +405,8 @@ const struct iwl_rx_handler iwl_mld_rx_handlers[] = { RX_HANDLER_SYNC) RX_HANDLER_NO_OBJECT(LEGACY_GROUP, BA_NOTIF, compressed_ba_notif, RX_HANDLER_SYNC) + RX_HANDLER_OF_SCAN(LEGACY_GROUP, SCAN_START_NOTIFICATION_UMAC, + scan_start_notif) RX_HANDLER_OF_SCAN(LEGACY_GROUP, SCAN_COMPLETE_UMAC, scan_complete_notif) RX_HANDLER_NO_OBJECT(LEGACY_GROUP, SCAN_ITERATION_COMPLETE_UMAC, diff --git a/drivers/net/wireless/intel/iwlwifi/mld/scan.c b/drivers/net/wireless/intel/iwlwifi/mld/scan.c index a1a4cf3ab3d3..abd4281b4b0e 100644 --- a/drivers/net/wireless/intel/iwlwifi/mld/scan.c +++ b/drivers/net/wireless/intel/iwlwifi/mld/scan.c @@ -473,6 +473,9 @@ iwl_mld_scan_get_cmd_gen_flags(struct iwl_mld *mld, params->flags & NL80211_SCAN_FLAG_COLOCATED_6GHZ) flags |= IWL_UMAC_SCAN_GEN_FLAGS_V2_TRIGGER_UHB_SCAN; + if (scan_status == IWL_MLD_SCAN_INT_MLO) + flags |= IWL_UMAC_SCAN_GEN_FLAGS_V2_NTF_START; + if (params->enable_6ghz_passive) flags |= IWL_UMAC_SCAN_GEN_FLAGS_V2_6GHZ_PASSIVE_SCAN; @@ -1817,9 +1820,6 @@ static void iwl_mld_int_mlo_scan_start(struct iwl_mld *mld, ret = _iwl_mld_single_scan_start(mld, vif, req, &ies, IWL_MLD_SCAN_INT_MLO); - if (!ret) - mld->scan.last_mlo_scan_time = ktime_get_boottime_ns(); - IWL_DEBUG_SCAN(mld, "Internal MLO scan: ret=%d\n", ret); } @@ -1904,6 +1904,30 @@ void iwl_mld_handle_match_found_notif(struct iwl_mld *mld, ieee80211_sched_scan_results(mld->hw); } +void iwl_mld_handle_scan_start_notif(struct iwl_mld *mld, + struct iwl_rx_packet *pkt) +{ + struct iwl_umac_scan_complete *notif = (void *)pkt->data; + u32 uid = le32_to_cpu(notif->uid); + + if (IWL_FW_CHECK(mld, uid >= ARRAY_SIZE(mld->scan.uid_status), + "FW reports out-of-range scan UID %d\n", uid)) + return; + + if (IWL_FW_CHECK(mld, !(mld->scan.uid_status[uid] & mld->scan.status), + "FW reports scan UID %d we didn't trigger\n", uid)) + return; + + IWL_DEBUG_SCAN(mld, "Scan started: uid=%u type=%u\n", uid, + mld->scan.uid_status[uid]); + if (IWL_FW_CHECK(mld, mld->scan.uid_status[uid] != IWL_MLD_SCAN_INT_MLO, + "FW reports scan start notification %d we didn't trigger\n", + mld->scan.uid_status[uid])) + return; + + mld->scan.last_mlo_scan_start_time = ktime_get_boottime_ns(); +} + void iwl_mld_handle_scan_complete_notif(struct iwl_mld *mld, struct iwl_rx_packet *pkt) { diff --git a/drivers/net/wireless/intel/iwlwifi/mld/scan.h b/drivers/net/wireless/intel/iwlwifi/mld/scan.h index 69110f0cfc8e..de5620e7f463 100644 --- a/drivers/net/wireless/intel/iwlwifi/mld/scan.h +++ b/drivers/net/wireless/intel/iwlwifi/mld/scan.h @@ -27,6 +27,9 @@ int iwl_mld_sched_scan_start(struct iwl_mld *mld, void iwl_mld_handle_match_found_notif(struct iwl_mld *mld, struct iwl_rx_packet *pkt); +void iwl_mld_handle_scan_start_notif(struct iwl_mld *mld, + struct iwl_rx_packet *pkt); + void iwl_mld_handle_scan_complete_notif(struct iwl_mld *mld, struct iwl_rx_packet *pkt); @@ -114,8 +117,8 @@ enum iwl_mld_traffic_load { * in jiffies. * @last_start_time_jiffies: stores the last start time in jiffies * (interface up/reset/resume). - * @last_mlo_scan_time: start time of the last MLO scan in nanoseconds since - * boot. + * @last_mlo_scan_start_time: start time of the last MLO scan in nanoseconds + * since boot. */ struct iwl_mld_scan { /* Add here fields that need clean up on restart */ @@ -136,7 +139,7 @@ struct iwl_mld_scan { void *cmd; unsigned long last_6ghz_passive_jiffies; unsigned long last_start_time_jiffies; - u64 last_mlo_scan_time; + u64 last_mlo_scan_start_time; }; /** From 323156c3541e23da7e582008a7ac30cd51b60acd Mon Sep 17 00:00:00 2001 From: Emmanuel Grumbach Date: Tue, 24 Mar 2026 11:33:25 +0200 Subject: [PATCH 146/401] wifi: iwlwifi: mvm: don't send a 6E related command when not supported MCC_ALLOWED_AP_TYPE_CMD is related to 6E support. Do not send it if the device doesn't support 6E. Apparently, the firmware is mistakenly advertising support for this command even on AX201 which does not support 6E and then the firmware crashes. Fixes: 0d2fc8821a7d ("wifi: iwlwifi: nvm: parse the VLP/AFC bit from regulatory") Closes: https://bugzilla.kernel.org/show_bug.cgi?id=220804 Signed-off-by: Emmanuel Grumbach Reviewed-by: Johannes Berg Signed-off-by: Miri Korenblit Link: https://patch.msgid.link/20260324113316.e171f0163f2a.I0c444d1f82d1773054e7ffc391ad49697d58f44e@changeid --- drivers/net/wireless/intel/iwlwifi/mvm/fw.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/fw.c b/drivers/net/wireless/intel/iwlwifi/mvm/fw.c index 43cf94c9a36b..6cc78661116e 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/fw.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/fw.c @@ -470,7 +470,8 @@ static void iwl_mvm_uats_init(struct iwl_mvm *mvm) .dataflags[0] = IWL_HCMD_DFL_NOCOPY, }; - if (mvm->trans->mac_cfg->device_family < IWL_DEVICE_FAMILY_AX210) { + if (mvm->trans->mac_cfg->device_family < IWL_DEVICE_FAMILY_AX210 || + !mvm->trans->cfg->uhb_supported) { IWL_DEBUG_RADIO(mvm, "UATS feature is not supported\n"); return; } From 687a95d204e72e52f2e6bc7a994cc82f76b2678f Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 24 Mar 2026 11:33:26 +0200 Subject: [PATCH 147/401] wifi: iwlwifi: mld: correctly set wifi generation data In each MAC context, the firmware expects the wifi generation data, i.e. whether or not HE/EHT (and in the future UHR) is enabled on that MAC. However, this is currently handled wrong in two ways: - EHT is only enabled when the interface is also an MLD, but we currently allow (despite the spec) connecting with EHT but without MLO. - when HE or EHT are used by TDLS peers, the firmware needs to have them enabled regardless of the AP Fix this by iterating setting up the data depending on the interface type: - for AP, just set it according to the BSS configuration - for monitor, set it according to HW capabilities - otherwise, particularly for client, iterate all stations and then their links on the interface in question and set according to their capabilities, this handles the AP and TDLS peers. Re-calculate this whenever a TDLS station is marked associated or removed so that it's kept updated, for the AP it's already updated on assoc/disassoc. Fixes: d1e879ec600f ("wifi: iwlwifi: add iwlmld sub-driver") Signed-off-by: Johannes Berg Signed-off-by: Miri Korenblit Link: https://patch.msgid.link/20260319110722.404713b22177.Ic972b5e557d011a5438f8f97c1e793cc829e2ea9@changeid Link: https://patch.msgid.link/20260324093333.2953495-1-miriam.rachel.korenblit@intel.com --- .../net/wireless/intel/iwlwifi/mld/iface.c | 101 ++++++++++++------ .../net/wireless/intel/iwlwifi/mld/mac80211.c | 19 ++++ 2 files changed, 88 insertions(+), 32 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mld/iface.c b/drivers/net/wireless/intel/iwlwifi/mld/iface.c index 29df747c8938..9215fc7e2eca 100644 --- a/drivers/net/wireless/intel/iwlwifi/mld/iface.c +++ b/drivers/net/wireless/intel/iwlwifi/mld/iface.c @@ -111,14 +111,75 @@ static bool iwl_mld_is_nic_ack_enabled(struct iwl_mld *mld, IEEE80211_HE_MAC_CAP2_ACK_EN); } -static void iwl_mld_set_he_support(struct iwl_mld *mld, - struct ieee80211_vif *vif, - struct iwl_mac_config_cmd *cmd) +struct iwl_mld_mac_wifi_gen_sta_iter_data { + struct ieee80211_vif *vif; + struct iwl_mac_wifi_gen_support *support; +}; + +static void iwl_mld_mac_wifi_gen_sta_iter(void *_data, + struct ieee80211_sta *sta) { - if (vif->type == NL80211_IFTYPE_AP) - cmd->wifi_gen.he_ap_support = 1; - else - cmd->wifi_gen.he_support = 1; + struct iwl_mld_sta *mld_sta = iwl_mld_sta_from_mac80211(sta); + struct iwl_mld_mac_wifi_gen_sta_iter_data *data = _data; + struct ieee80211_link_sta *link_sta; + unsigned int link_id; + + if (mld_sta->vif != data->vif) + return; + + for_each_sta_active_link(data->vif, sta, link_sta, link_id) { + if (link_sta->he_cap.has_he) + data->support->he_support = 1; + if (link_sta->eht_cap.has_eht) + data->support->eht_support = 1; + } +} + +static void iwl_mld_set_wifi_gen(struct iwl_mld *mld, + struct ieee80211_vif *vif, + struct iwl_mac_wifi_gen_support *support) +{ + struct iwl_mld_mac_wifi_gen_sta_iter_data sta_iter_data = { + .vif = vif, + .support = support, + }; + struct ieee80211_bss_conf *link_conf; + unsigned int link_id; + + switch (vif->type) { + case NL80211_IFTYPE_MONITOR: + /* for sniffer, set to HW capabilities */ + support->he_support = 1; + support->eht_support = mld->trans->cfg->eht_supported; + break; + case NL80211_IFTYPE_AP: + /* for AP set according to the link configs */ + for_each_vif_active_link(vif, link_conf, link_id) { + support->he_ap_support |= link_conf->he_support; + support->eht_support |= link_conf->eht_support; + } + break; + default: + /* + * If we have MLO enabled, then the firmware needs to enable + * address translation for the station(s) we add. That depends + * on having EHT enabled in firmware, which in turn depends on + * mac80211 in the iteration below. + * However, mac80211 doesn't enable capabilities on the AP STA + * until it has parsed the association response successfully, + * so set EHT (and HE as a pre-requisite for EHT) when the vif + * is an MLD. + */ + if (ieee80211_vif_is_mld(vif)) { + support->he_support = 1; + support->eht_support = 1; + } + + ieee80211_iterate_stations_mtx(mld->hw, + iwl_mld_mac_wifi_gen_sta_iter, + &sta_iter_data); + break; + } } /* fill the common part for all interface types */ @@ -128,8 +189,6 @@ static void iwl_mld_mac_cmd_fill_common(struct iwl_mld *mld, u32 action) { struct iwl_mld_vif *mld_vif = iwl_mld_vif_from_mac80211(vif); - struct ieee80211_bss_conf *link_conf; - unsigned int link_id; lockdep_assert_wiphy(mld->wiphy); @@ -147,29 +206,7 @@ static void iwl_mld_mac_cmd_fill_common(struct iwl_mld *mld, cmd->nic_not_ack_enabled = cpu_to_le32(!iwl_mld_is_nic_ack_enabled(mld, vif)); - /* If we have MLO enabled, then the firmware needs to enable - * address translation for the station(s) we add. That depends - * on having EHT enabled in firmware, which in turn depends on - * mac80211 in the code below. - * However, mac80211 doesn't enable HE/EHT until it has parsed - * the association response successfully, so just skip all that - * and enable both when we have MLO. - */ - if (ieee80211_vif_is_mld(vif)) { - iwl_mld_set_he_support(mld, vif, cmd); - cmd->wifi_gen.eht_support = 1; - return; - } - - for_each_vif_active_link(vif, link_conf, link_id) { - if (!link_conf->he_support) - continue; - - iwl_mld_set_he_support(mld, vif, cmd); - - /* EHT, if supported, was already set above */ - break; - } + iwl_mld_set_wifi_gen(mld, vif, &cmd->wifi_gen); } static void iwl_mld_fill_mac_cmd_sta(struct iwl_mld *mld, diff --git a/drivers/net/wireless/intel/iwlwifi/mld/mac80211.c b/drivers/net/wireless/intel/iwlwifi/mld/mac80211.c index 0c53d6bd9651..71a9a72c9ac0 100644 --- a/drivers/net/wireless/intel/iwlwifi/mld/mac80211.c +++ b/drivers/net/wireless/intel/iwlwifi/mld/mac80211.c @@ -1761,6 +1761,16 @@ static int iwl_mld_move_sta_state_up(struct iwl_mld *mld, if (vif->type == NL80211_IFTYPE_STATION) iwl_mld_link_set_2mhz_block(mld, vif, sta); + + if (sta->tdls) { + /* + * update MAC since wifi generation flags may change, + * we also update MAC on association to the AP via the + * vif assoc change + */ + iwl_mld_mac_fw_action(mld, vif, FW_CTXT_ACTION_MODIFY); + } + /* Now the link_sta's capabilities are set, update the FW */ iwl_mld_config_tlc(mld, vif, sta); @@ -1873,6 +1883,15 @@ static int iwl_mld_move_sta_state_down(struct iwl_mld *mld, /* just removed last TDLS STA, so enable PM */ iwl_mld_update_mac_power(mld, vif, false); } + + if (sta->tdls) { + /* + * update MAC since wifi generation flags may change, + * we also update MAC on disassociation to the AP via + * the vif assoc change + */ + iwl_mld_mac_fw_action(mld, vif, FW_CTXT_ACTION_MODIFY); + } } else { return -EINVAL; } From 91049ec2e18376ec2192e73ef7be4c7110436350 Mon Sep 17 00:00:00 2001 From: Jihed Chaibi Date: Sat, 21 Mar 2026 02:20:11 +0100 Subject: [PATCH 148/401] ASoC: dt-bindings: stm32: Fix incorrect compatible string in stm32h7-sai match The conditional block that defines clock constraints for the stm32h7-sai variant references "st,stm32mph7-sai", which does not match any compatible string in the enum. As a result, clock validation for the h7 variant is silently skipped. Correct the compatible string to "st,stm32h7-sai". Fixes: 8509bb1f11a1f ("ASoC: dt-bindings: add stm32mp25 support for sai") Signed-off-by: Jihed Chaibi Reviewed-by: Olivier Moysan Link: https://patch.msgid.link/20260321012011.125791-1-jihed.chaibi.dev@gmail.com Signed-off-by: Mark Brown --- Documentation/devicetree/bindings/sound/st,stm32-sai.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/devicetree/bindings/sound/st,stm32-sai.yaml b/Documentation/devicetree/bindings/sound/st,stm32-sai.yaml index 4a7129d0b157..551edf39e766 100644 --- a/Documentation/devicetree/bindings/sound/st,stm32-sai.yaml +++ b/Documentation/devicetree/bindings/sound/st,stm32-sai.yaml @@ -164,7 +164,7 @@ allOf: properties: compatible: contains: - const: st,stm32mph7-sai + const: st,stm32h7-sai then: properties: clocks: From 8121353a4bf8e38afee26299419a78ec108e14a6 Mon Sep 17 00:00:00 2001 From: Alice Ryhl Date: Tue, 24 Mar 2026 10:49:59 +0000 Subject: [PATCH 149/401] rust: regulator: do not assume that regulator_get() returns non-null The Rust `Regulator` abstraction uses `NonNull` to wrap the underlying `struct regulator` pointer. When `CONFIG_REGULATOR` is disabled, the C stub for `regulator_get` returns `NULL`. `from_err_ptr` does not treat `NULL` as an error, so it was passed to `NonNull::new_unchecked`, causing undefined behavior. Fix this by using a raw pointer `*mut bindings::regulator` instead of `NonNull`. This allows `inner` to be `NULL` when `CONFIG_REGULATOR` is disabled, and leverages the C stubs which are designed to handle `NULL` or are no-ops. Fixes: 9b614ceada7c ("rust: regulator: add a bare minimum regulator abstraction") Reported-by: Miguel Ojeda Closes: https://lore.kernel.org/r/20260322193830.89324-1-ojeda@kernel.org Signed-off-by: Alice Ryhl Reviewed-by: Daniel Almeida Link: https://patch.msgid.link/20260324-regulator-fix-v1-1-a5244afa3c15@google.com Signed-off-by: Mark Brown --- rust/kernel/regulator.rs | 33 ++++++++++++++++++--------------- 1 file changed, 18 insertions(+), 15 deletions(-) diff --git a/rust/kernel/regulator.rs b/rust/kernel/regulator.rs index 4f7837c7e53a..41e730cedc81 100644 --- a/rust/kernel/regulator.rs +++ b/rust/kernel/regulator.rs @@ -23,7 +23,10 @@ prelude::*, }; -use core::{marker::PhantomData, mem::ManuallyDrop, ptr::NonNull}; +use core::{ + marker::PhantomData, + mem::ManuallyDrop, // +}; mod private { pub trait Sealed {} @@ -229,15 +232,17 @@ pub fn devm_enable_optional(dev: &Device, name: &CStr) -> Result { /// /// # Invariants /// -/// - `inner` is a non-null wrapper over a pointer to a `struct -/// regulator` obtained from [`regulator_get()`]. +/// - `inner` is a pointer obtained from a successful call to +/// [`regulator_get()`]. It is treated as an opaque token that may only be +/// accessed using C API methods (e.g., it may be `NULL` if the C API returns +/// `NULL`). /// /// [`regulator_get()`]: https://docs.kernel.org/driver-api/regulator.html#c.regulator_get pub struct Regulator where State: RegulatorState, { - inner: NonNull, + inner: *mut bindings::regulator, _phantom: PhantomData, } @@ -249,7 +254,7 @@ pub fn set_voltage(&self, min_voltage: Voltage, max_voltage: Voltage) -> Result // SAFETY: Safe as per the type invariants of `Regulator`. to_result(unsafe { bindings::regulator_set_voltage( - self.inner.as_ptr(), + self.inner, min_voltage.as_microvolts(), max_voltage.as_microvolts(), ) @@ -259,7 +264,7 @@ pub fn set_voltage(&self, min_voltage: Voltage, max_voltage: Voltage) -> Result /// Gets the current voltage of the regulator. pub fn get_voltage(&self) -> Result { // SAFETY: Safe as per the type invariants of `Regulator`. - let voltage = unsafe { bindings::regulator_get_voltage(self.inner.as_ptr()) }; + let voltage = unsafe { bindings::regulator_get_voltage(self.inner) }; to_result(voltage).map(|()| Voltage::from_microvolts(voltage)) } @@ -270,10 +275,8 @@ fn get_internal(dev: &Device, name: &CStr) -> Result> { // received from the C code. from_err_ptr(unsafe { bindings::regulator_get(dev.as_raw(), name.as_char_ptr()) })?; - // SAFETY: We can safely trust `inner` to be a pointer to a valid - // regulator if `ERR_PTR` was not returned. - let inner = unsafe { NonNull::new_unchecked(inner) }; - + // INVARIANT: `inner` is a pointer obtained from `regulator_get()`, and + // the call was successful. Ok(Self { inner, _phantom: PhantomData, @@ -282,12 +285,12 @@ fn get_internal(dev: &Device, name: &CStr) -> Result> { fn enable_internal(&self) -> Result { // SAFETY: Safe as per the type invariants of `Regulator`. - to_result(unsafe { bindings::regulator_enable(self.inner.as_ptr()) }) + to_result(unsafe { bindings::regulator_enable(self.inner) }) } fn disable_internal(&self) -> Result { // SAFETY: Safe as per the type invariants of `Regulator`. - to_result(unsafe { bindings::regulator_disable(self.inner.as_ptr()) }) + to_result(unsafe { bindings::regulator_disable(self.inner) }) } } @@ -349,7 +352,7 @@ impl Regulator { /// Checks if the regulator is enabled. pub fn is_enabled(&self) -> bool { // SAFETY: Safe as per the type invariants of `Regulator`. - unsafe { bindings::regulator_is_enabled(self.inner.as_ptr()) != 0 } + unsafe { bindings::regulator_is_enabled(self.inner) != 0 } } } @@ -359,11 +362,11 @@ fn drop(&mut self) { // SAFETY: By the type invariants, we know that `self` owns a // reference on the enabled refcount, so it is safe to relinquish it // now. - unsafe { bindings::regulator_disable(self.inner.as_ptr()) }; + unsafe { bindings::regulator_disable(self.inner) }; } // SAFETY: By the type invariants, we know that `self` owns a reference, // so it is safe to relinquish it now. - unsafe { bindings::regulator_put(self.inner.as_ptr()) }; + unsafe { bindings::regulator_put(self.inner) }; } } From cfb385a8dc88d86a805a5682eaa68f59fa5c0ec3 Mon Sep 17 00:00:00 2001 From: Srinivas Kandagatla Date: Mon, 23 Mar 2026 23:17:48 +0000 Subject: [PATCH 150/401] ASoC: codecs: wcd934x: fix typo in dt parsing Looks like we ended up with a typo during device tree data parsing as part of 4f16b6351bbff ("ASoC: codecs: wcd: add common helper for wcd codecs") patch. This will result in not parsing the device tree data and results in zero mic bias values. Fix this by calling wcd_dt_parse_micbias_info instead of wcd_dt_parse_mbhc_data. Fixes: 4f16b6351bbff ("ASoC: codecs: wcd: add common helper for wcd codecs") Cc: Stable@vger.kernel.org Reported-by: Joel Selvaraj Signed-off-by: Srinivas Kandagatla Reviewed-by: Konrad Dybcio Link: https://patch.msgid.link/20260323231748.2217967-1-srinivas.kandagatla@oss.qualcomm.com Signed-off-by: Mark Brown --- sound/soc/codecs/wcd934x.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/codecs/wcd934x.c b/sound/soc/codecs/wcd934x.c index c8db33f78a1b..bc41a1466c70 100644 --- a/sound/soc/codecs/wcd934x.c +++ b/sound/soc/codecs/wcd934x.c @@ -2172,7 +2172,7 @@ static int wcd934x_init_dmic(struct snd_soc_component *comp) u32 def_dmic_rate, dmic_clk_drv; int ret; - ret = wcd_dt_parse_mbhc_data(comp->dev, &wcd->mbhc_cfg); + ret = wcd_dt_parse_micbias_info(&wcd->common); if (ret) return ret; From 56781a4597706cd25185b1dedc38841ec6c31496 Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Thu, 19 Mar 2026 15:30:34 -0700 Subject: [PATCH 151/401] drm/xe: Implement recent spec updates to Wa_16025250150 The hardware teams noticed that the originally documented workaround steps for Wa_16025250150 may not be sufficient to fully avoid a hardware issue. The workaround documentation has been augmented to suggest programming one additional register; make the corresponding change in the driver. Fixes: 7654d51f1fd8 ("drm/xe/xe2hpg: Add Wa_16025250150") Reviewed-by: Matt Atwood Link: https://patch.msgid.link/20260319-wa_16025250150_part2-v1-1-46b1de1a31b2@intel.com Signed-off-by: Matt Roper (cherry picked from commit a31566762d4075646a8a2214586158b681e94305) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/regs/xe_gt_regs.h | 1 + drivers/gpu/drm/xe/xe_wa.c | 3 ++- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/regs/xe_gt_regs.h b/drivers/gpu/drm/xe/regs/xe_gt_regs.h index 24fc64fc832e..9d66f168ab8a 100644 --- a/drivers/gpu/drm/xe/regs/xe_gt_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_gt_regs.h @@ -553,6 +553,7 @@ #define ENABLE_SMP_LD_RENDER_SURFACE_CONTROL REG_BIT(44 - 32) #define FORCE_SLM_FENCE_SCOPE_TO_TILE REG_BIT(42 - 32) #define FORCE_UGM_FENCE_SCOPE_TO_TILE REG_BIT(41 - 32) +#define L3_128B_256B_WRT_DIS REG_BIT(40 - 32) #define MAXREQS_PER_BANK REG_GENMASK(39 - 32, 37 - 32) #define DISABLE_128B_EVICTION_COMMAND_UDW REG_BIT(36 - 32) diff --git a/drivers/gpu/drm/xe/xe_wa.c b/drivers/gpu/drm/xe/xe_wa.c index 462c2fa712e0..d7e309ad9aba 100644 --- a/drivers/gpu/drm/xe/xe_wa.c +++ b/drivers/gpu/drm/xe/xe_wa.c @@ -247,7 +247,8 @@ static const struct xe_rtp_entry_sr gt_was[] = { LSN_DIM_Z_WGT_MASK, LSN_LNI_WGT(1) | LSN_LNE_WGT(1) | LSN_DIM_X_WGT(1) | LSN_DIM_Y_WGT(1) | - LSN_DIM_Z_WGT(1))) + LSN_DIM_Z_WGT(1)), + SET(LSC_CHICKEN_BIT_0_UDW, L3_128B_256B_WRT_DIS)) }, /* Xe2_HPM */ From e225b36f83d7926c1f2035923bb0359d851fdb73 Mon Sep 17 00:00:00 2001 From: Reshma Immaculate Rajkumar Date: Thu, 19 Mar 2026 12:26:08 +0530 Subject: [PATCH 152/401] wifi: ath11k: Pass the correct value of each TID during a stop AMPDU session During ongoing traffic, a request to stop an AMPDU session for one TID could incorrectly affect other active sessions. This can happen because an incorrect TID reference would be passed when updating the BA session state, causing the wrong session to be stopped. As a result, the affected session would be reduced to a minimal BA size, leading to a noticeable throughput degradation. Fix this issue by passing the correct argument from ath11k_dp_rx_ampdu_stop() to ath11k_peer_rx_tid_reo_update() during a stop AMPDU session. Instead of passing peer->tx_tid, which is the base address of the array, corresponding to TID 0; pass the value of &peer->rx_tid[params->tid], where the different TID numbers are accounted for. Tested-on: QCN9074 hw1.0 PCI WLAN.HK.2.9.0.1-02146-QCAHKSWPL_SILICONZ-1 Fixes: d5c65159f2895 ("ath11k: driver for Qualcomm IEEE 802.11ax devices") Signed-off-by: Reshma Immaculate Rajkumar Reviewed-by: Baochen Qiang Reviewed-by: Vasanthakumar Thiagarajan Link: https://patch.msgid.link/20260319065608.2408179-1-reshma.rajkumar@oss.qualcomm.com Signed-off-by: Jeff Johnson --- drivers/net/wireless/ath/ath11k/dp_rx.c | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/drivers/net/wireless/ath/ath11k/dp_rx.c b/drivers/net/wireless/ath/ath11k/dp_rx.c index 49d959b2e148..85defe11750d 100644 --- a/drivers/net/wireless/ath/ath11k/dp_rx.c +++ b/drivers/net/wireless/ath/ath11k/dp_rx.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: BSD-3-Clause-Clear /* * Copyright (c) 2018-2019 The Linux Foundation. All rights reserved. - * Copyright (c) 2021-2025 Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries. */ #include @@ -1110,9 +1110,8 @@ int ath11k_dp_rx_ampdu_stop(struct ath11k *ar, struct ath11k_base *ab = ar->ab; struct ath11k_peer *peer; struct ath11k_sta *arsta = ath11k_sta_to_arsta(params->sta); + struct dp_rx_tid *rx_tid; int vdev_id = arsta->arvif->vdev_id; - dma_addr_t paddr; - bool active; int ret; spin_lock_bh(&ab->base_lock); @@ -1124,15 +1123,14 @@ int ath11k_dp_rx_ampdu_stop(struct ath11k *ar, return -ENOENT; } - paddr = peer->rx_tid[params->tid].paddr; - active = peer->rx_tid[params->tid].active; + rx_tid = &peer->rx_tid[params->tid]; - if (!active) { + if (!rx_tid->active) { spin_unlock_bh(&ab->base_lock); return 0; } - ret = ath11k_peer_rx_tid_reo_update(ar, peer, peer->rx_tid, 1, 0, false); + ret = ath11k_peer_rx_tid_reo_update(ar, peer, rx_tid, 1, 0, false); spin_unlock_bh(&ab->base_lock); if (ret) { ath11k_warn(ab, "failed to update reo for rx tid %d: %d\n", @@ -1141,7 +1139,8 @@ int ath11k_dp_rx_ampdu_stop(struct ath11k *ar, } ret = ath11k_wmi_peer_rx_reorder_queue_setup(ar, vdev_id, - params->sta->addr, paddr, + params->sta->addr, + rx_tid->paddr, params->tid, 1, 1); if (ret) ath11k_warn(ab, "failed to send wmi to delete rx tid %d\n", From 4242625f272974dd1947f73b10d884eab3b277cd Mon Sep 17 00:00:00 2001 From: Reshma Immaculate Rajkumar Date: Fri, 27 Feb 2026 16:31:23 +0530 Subject: [PATCH 153/401] wifi: ath12k: Pass the correct value of each TID during a stop AMPDU session With traffic ongoing for data TID [TID 0], an DELBA request to stop AMPDU for the BA session was received on management TID [TID 4]. The corresponding TID number was incorrectly passed to stop the BA session, resulting in the BA session for data TIDs being stopped and the BA size being reduced to 1, causing an overall dip in TCP throughput. Fix this issue by passing the correct argument from ath12k_dp_rx_ampdu_stop() to ath12k_dp_arch_peer_rx_tid_reo_update() during an AMPDU stop session. Instead of passing peer->dp_peer->rx_tid, which is the base address of the array, corresponding to TID 0, pass the value of &peer->dp_peer->rx_tid[params->tid]. With this, the different TID numbers are accounted for. Tested-on: QCN9274 hw2.0 PCI WLAN.WBE.1.5-01651-QCAHKSWPL_SILICONZ-1 Fixes: d889913205cf ("wifi: ath12k: driver for Qualcomm Wi-Fi 7 devices") Signed-off-by: Reshma Immaculate Rajkumar Reviewed-by: Baochen Qiang Reviewed-by: Vasanthakumar Thiagarajan Link: https://patch.msgid.link/20260227110123.3726354-1-reshma.rajkumar@oss.qualcomm.com Signed-off-by: Jeff Johnson --- drivers/net/wireless/ath/ath12k/dp_rx.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/ath/ath12k/dp_rx.c b/drivers/net/wireless/ath/ath12k/dp_rx.c index 5a82ede65dd3..244d5230a5bd 100644 --- a/drivers/net/wireless/ath/ath12k/dp_rx.c +++ b/drivers/net/wireless/ath/ath12k/dp_rx.c @@ -735,6 +735,7 @@ int ath12k_dp_rx_ampdu_stop(struct ath12k *ar, struct ath12k_dp *dp = ath12k_ab_to_dp(ab); struct ath12k_dp_link_peer *peer; struct ath12k_sta *ahsta = ath12k_sta_to_ahsta(params->sta); + struct ath12k_dp_rx_tid *rx_tid; struct ath12k_link_sta *arsta; int vdev_id; bool active; @@ -770,7 +771,8 @@ int ath12k_dp_rx_ampdu_stop(struct ath12k *ar, return 0; } - ret = ath12k_dp_arch_peer_rx_tid_reo_update(dp, peer, peer->dp_peer->rx_tid, + rx_tid = &peer->dp_peer->rx_tid[params->tid]; + ret = ath12k_dp_arch_peer_rx_tid_reo_update(dp, peer, rx_tid, 1, 0, false); spin_unlock_bh(&dp->dp_lock); if (ret) { From d049e56b1739101d1c4d81deedb269c52a8dbba0 Mon Sep 17 00:00:00 2001 From: Yasuaki Torimaru Date: Tue, 24 Mar 2026 19:06:24 +0900 Subject: [PATCH 154/401] wifi: wilc1000: fix u8 overflow in SSID scan buffer size calculation The variable valuesize is declared as u8 but accumulates the total length of all SSIDs to scan. Each SSID contributes up to 33 bytes (IEEE80211_MAX_SSID_LEN + 1), and with WILC_MAX_NUM_PROBED_SSID (10) SSIDs the total can reach 330, which wraps around to 74 when stored in a u8. This causes kmalloc to allocate only 75 bytes while the subsequent memcpy writes up to 331 bytes into the buffer, resulting in a 256-byte heap buffer overflow. Widen valuesize from u8 to u32 to accommodate the full range. Fixes: c5c77ba18ea6 ("staging: wilc1000: Add SDIO/SPI 802.11 driver") Cc: stable@vger.kernel.org Signed-off-by: Yasuaki Torimaru Link: https://patch.msgid.link/20260324100624.983458-1-yasuakitorimaru@gmail.com Signed-off-by: Johannes Berg --- drivers/net/wireless/microchip/wilc1000/hif.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/microchip/wilc1000/hif.c b/drivers/net/wireless/microchip/wilc1000/hif.c index f354b11cb919..944b2a812b63 100644 --- a/drivers/net/wireless/microchip/wilc1000/hif.c +++ b/drivers/net/wireless/microchip/wilc1000/hif.c @@ -163,7 +163,7 @@ int wilc_scan(struct wilc_vif *vif, u8 scan_source, u32 index = 0; u32 i, scan_timeout; u8 *buffer; - u8 valuesize = 0; + u32 valuesize = 0; u8 *search_ssid_vals = NULL; const u8 ch_list_len = request->n_channels; struct host_if_drv *hif_drv = vif->hif_drv; From 0fd56fad9c56356e7fa7a7c52e7ecbf807a44eb0 Mon Sep 17 00:00:00 2001 From: Pengpeng Hou Date: Mon, 23 Mar 2026 16:08:45 +0800 Subject: [PATCH 155/401] wifi: wl1251: validate packet IDs before indexing tx_frames wl1251_tx_packet_cb() uses the firmware completion ID directly to index the fixed 16-entry wl->tx_frames[] array. The ID is a raw u8 from the completion block, and the callback does not currently verify that it fits the array before dereferencing it. Reject completion IDs that fall outside wl->tx_frames[] and keep the existing NULL check in the same guard. This keeps the fix local to the trust boundary and avoids touching the rest of the completion flow. Signed-off-by: Pengpeng Hou Link: https://patch.msgid.link/20260323080845.40033-1-pengpeng@iscas.ac.cn Signed-off-by: Johannes Berg --- drivers/net/wireless/ti/wl1251/tx.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/net/wireless/ti/wl1251/tx.c b/drivers/net/wireless/ti/wl1251/tx.c index 2da8c0d5105b..4489aa77bb0f 100644 --- a/drivers/net/wireless/ti/wl1251/tx.c +++ b/drivers/net/wireless/ti/wl1251/tx.c @@ -402,12 +402,14 @@ static void wl1251_tx_packet_cb(struct wl1251 *wl, int hdrlen; u8 *frame; - skb = wl->tx_frames[result->id]; - if (skb == NULL) { - wl1251_error("SKB for packet %d is NULL", result->id); + if (unlikely(result->id >= ARRAY_SIZE(wl->tx_frames) || + wl->tx_frames[result->id] == NULL)) { + wl1251_error("invalid packet id %u", result->id); return; } + skb = wl->tx_frames[result->id]; + info = IEEE80211_SKB_CB(skb); if (!(info->flags & IEEE80211_TX_CTL_NO_ACK) && From f7e775c4694782844c66da5316fed82881835cf8 Mon Sep 17 00:00:00 2001 From: Sanman Pradhan Date: Thu, 19 Mar 2026 17:31:19 +0000 Subject: [PATCH 156/401] hwmon: (pmbus/ina233) Fix error handling and sign extension in shunt voltage read ina233_read_word_data() reads MFR_READ_VSHUNT via pmbus_read_word_data() but has two issues: 1. The return value is not checked for errors before being used in arithmetic. A negative error code from a failed I2C transaction is passed directly to DIV_ROUND_CLOSEST(), producing garbage data. 2. MFR_READ_VSHUNT is a 16-bit two's complement value. Negative shunt voltages (values with bit 15 set) are treated as large positive values since pmbus_read_word_data() returns them zero-extended in an int. This leads to incorrect scaling in the VIN coefficient conversion. Fix both issues by adding an error check, casting to s16 for proper sign extension, and clamping the result to a valid non-negative range. The clamp is necessary because read_word_data callbacks must return non-negative values on success (negative values indicate errors to the pmbus core). Fixes: b64b6cb163f16 ("hwmon: Add driver for TI INA233 Current and Power Monitor") Cc: stable@vger.kernel.org Signed-off-by: Sanman Pradhan Link: https://lore.kernel.org/r/20260319173055.125271-2-sanman.pradhan@hpe.com [groeck: Fixed clamp to avoid losing the sign bit] Signed-off-by: Guenter Roeck --- drivers/hwmon/pmbus/ina233.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/hwmon/pmbus/ina233.c b/drivers/hwmon/pmbus/ina233.c index 2d8b5a5347ed..7aebd854763a 100644 --- a/drivers/hwmon/pmbus/ina233.c +++ b/drivers/hwmon/pmbus/ina233.c @@ -72,7 +72,8 @@ static int ina233_read_word_data(struct i2c_client *client, int page, /* Adjust returned value to match VIN coefficients */ /* VIN: 1.25 mV VSHUNT: 2.5 uV LSB */ - ret = DIV_ROUND_CLOSEST(ret * 25, 12500); + ret = clamp_val(DIV_ROUND_CLOSEST((s16)ret * 25, 12500), + S16_MIN, S16_MAX) & 0xffff; break; default: ret = -ENODATA; From 3075a3951f7708da5a8ab47b0b7d068a32f69e58 Mon Sep 17 00:00:00 2001 From: Sanman Pradhan Date: Thu, 19 Mar 2026 17:31:29 +0000 Subject: [PATCH 157/401] hwmon: (pmbus/isl68137) Add mutex protection for AVS enable sysfs attributes The custom avs0_enable and avs1_enable sysfs attributes access PMBus registers through the exported API helpers (pmbus_read_byte_data, pmbus_read_word_data, pmbus_write_word_data, pmbus_update_byte_data) without holding the PMBus update_lock mutex. These exported helpers do not acquire the mutex internally, unlike the core's internal callers which hold the lock before invoking them. The store callback is especially vulnerable: it performs a multi-step read-modify-write sequence (read VOUT_COMMAND, write VOUT_COMMAND, then update OPERATION) where concurrent access from another thread could interleave and corrupt the register state. Add pmbus_lock_interruptible()/pmbus_unlock() around both the show and store callbacks to serialize PMBus register access with the rest of the driver. Fixes: 038a9c3d1e424 ("hwmon: (pmbus/isl68137) Add driver for Intersil ISL68137 PWM Controller") Cc: stable@vger.kernel.org Signed-off-by: Sanman Pradhan Link: https://lore.kernel.org/r/20260319173055.125271-3-sanman.pradhan@hpe.com Signed-off-by: Guenter Roeck --- drivers/hwmon/pmbus/isl68137.c | 21 ++++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) diff --git a/drivers/hwmon/pmbus/isl68137.c b/drivers/hwmon/pmbus/isl68137.c index e7dac26b5be6..3e3a887aad05 100644 --- a/drivers/hwmon/pmbus/isl68137.c +++ b/drivers/hwmon/pmbus/isl68137.c @@ -96,7 +96,15 @@ static ssize_t isl68137_avs_enable_show_page(struct i2c_client *client, int page, char *buf) { - int val = pmbus_read_byte_data(client, page, PMBUS_OPERATION); + int val; + + val = pmbus_lock_interruptible(client); + if (val) + return val; + + val = pmbus_read_byte_data(client, page, PMBUS_OPERATION); + + pmbus_unlock(client); if (val < 0) return val; @@ -118,6 +126,10 @@ static ssize_t isl68137_avs_enable_store_page(struct i2c_client *client, op_val = result ? ISL68137_VOUT_AVS : 0; + rc = pmbus_lock_interruptible(client); + if (rc) + return rc; + /* * Writes to VOUT setpoint over AVSBus will persist after the VRM is * switched to PMBus control. Switching back to AVSBus control @@ -129,17 +141,20 @@ static ssize_t isl68137_avs_enable_store_page(struct i2c_client *client, rc = pmbus_read_word_data(client, page, 0xff, PMBUS_VOUT_COMMAND); if (rc < 0) - return rc; + goto unlock; rc = pmbus_write_word_data(client, page, PMBUS_VOUT_COMMAND, rc); if (rc < 0) - return rc; + goto unlock; } rc = pmbus_update_byte_data(client, page, PMBUS_OPERATION, ISL68137_VOUT_AVS, op_val); +unlock: + pmbus_unlock(client); + return (rc < 0) ? rc : count; } From 0adc752b4f7d82af7bd14f7cad3091b3b5d702ba Mon Sep 17 00:00:00 2001 From: Sanman Pradhan Date: Mon, 23 Mar 2026 00:24:25 +0000 Subject: [PATCH 158/401] hwmon: (peci/cputemp) Fix crit_hyst returning delta instead of absolute temperature The hwmon sysfs ABI expects tempN_crit_hyst to report the temperature at which the critical condition clears, not the hysteresis delta from the critical limit. The peci cputemp driver currently returns tjmax - tcontrol for crit_hyst_type, which is the hysteresis margin rather than the corresponding absolute temperature. Return tcontrol directly, and update the documentation accordingly. Fixes: bf3608f338e9 ("hwmon: peci: Add cputemp driver") Cc: stable@vger.kernel.org Signed-off-by: Sanman Pradhan Link: https://lore.kernel.org/r/20260323002352.93417-2-sanman.pradhan@hpe.com Signed-off-by: Guenter Roeck --- Documentation/hwmon/peci-cputemp.rst | 10 ++++++---- drivers/hwmon/peci/cputemp.c | 2 +- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/Documentation/hwmon/peci-cputemp.rst b/Documentation/hwmon/peci-cputemp.rst index fe0422248dc5..266b62a46f49 100644 --- a/Documentation/hwmon/peci-cputemp.rst +++ b/Documentation/hwmon/peci-cputemp.rst @@ -51,8 +51,9 @@ temp1_max Provides thermal control temperature of the CPU package temp1_crit Provides shutdown temperature of the CPU package which is also known as the maximum processor junction temperature, Tjmax or Tprochot. -temp1_crit_hyst Provides the hysteresis value from Tcontrol to Tjmax of - the CPU package. +temp1_crit_hyst Provides the hysteresis temperature of the CPU + package. Returns Tcontrol, the temperature at which + the critical condition clears. temp2_label "DTS" temp2_input Provides current temperature of the CPU package scaled @@ -62,8 +63,9 @@ temp2_max Provides thermal control temperature of the CPU package temp2_crit Provides shutdown temperature of the CPU package which is also known as the maximum processor junction temperature, Tjmax or Tprochot. -temp2_crit_hyst Provides the hysteresis value from Tcontrol to Tjmax of - the CPU package. +temp2_crit_hyst Provides the hysteresis temperature of the CPU + package. Returns Tcontrol, the temperature at which + the critical condition clears. temp3_label "Tcontrol" temp3_input Provides current Tcontrol temperature of the CPU diff --git a/drivers/hwmon/peci/cputemp.c b/drivers/hwmon/peci/cputemp.c index b2fc936851e1..badec53ff446 100644 --- a/drivers/hwmon/peci/cputemp.c +++ b/drivers/hwmon/peci/cputemp.c @@ -131,7 +131,7 @@ static int get_temp_target(struct peci_cputemp *priv, enum peci_temp_target_type *val = priv->temp.target.tjmax; break; case crit_hyst_type: - *val = priv->temp.target.tjmax - priv->temp.target.tcontrol; + *val = priv->temp.target.tcontrol; break; default: ret = -EOPNOTSUPP; From b0c9d8ae71509f25690d57f2efddebf7f4b12194 Mon Sep 17 00:00:00 2001 From: Sanman Pradhan Date: Mon, 23 Mar 2026 00:24:37 +0000 Subject: [PATCH 159/401] hwmon: (peci/cputemp) Fix off-by-one in cputemp_is_visible() cputemp_is_visible() validates the channel index against CPUTEMP_CHANNEL_NUMS, but currently uses '>' instead of '>='. As a result, channel == CPUTEMP_CHANNEL_NUMS is not rejected even though valid indices are 0 .. CPUTEMP_CHANNEL_NUMS - 1. Fix the bounds check by using '>=' so invalid channel indices are rejected before indexing the core bitmap. Fixes: bf3608f338e9 ("hwmon: peci: Add cputemp driver") Cc: stable@vger.kernel.org Signed-off-by: Sanman Pradhan Link: https://lore.kernel.org/r/20260323002352.93417-3-sanman.pradhan@hpe.com Signed-off-by: Guenter Roeck --- drivers/hwmon/peci/cputemp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/hwmon/peci/cputemp.c b/drivers/hwmon/peci/cputemp.c index badec53ff446..457089c561b4 100644 --- a/drivers/hwmon/peci/cputemp.c +++ b/drivers/hwmon/peci/cputemp.c @@ -319,7 +319,7 @@ static umode_t cputemp_is_visible(const void *data, enum hwmon_sensor_types type { const struct peci_cputemp *priv = data; - if (channel > CPUTEMP_CHANNEL_NUMS) + if (channel >= CPUTEMP_CHANNEL_NUMS) return 0; if (channel < channel_core) From cc34d77dd48708d810c12bfd6f5bf03304f6c824 Mon Sep 17 00:00:00 2001 From: Danilo Krummrich Date: Tue, 24 Mar 2026 01:59:15 +0100 Subject: [PATCH 160/401] spi: use generic driver_override infrastructure When a driver is probed through __driver_attach(), the bus' match() callback is called without the device lock held, thus accessing the driver_override field without a lock, which can cause a UAF. Fix this by using the driver-core driver_override infrastructure taking care of proper locking internally. Note that calling match() from __driver_attach() without the device lock held is intentional. [1] Also note that we do not enable the driver_override feature of struct bus_type, as SPI - in contrast to most other buses - passes "" to sysfs_emit() when the driver_override pointer is NULL. Thus, printing "\n" instead of "(null)\n". Link: https://lore.kernel.org/driver-core/DGRGTIRHA62X.3RY09D9SOK77P@kernel.org/ [1] Reported-by: Gui-Dong Han Closes: https://bugzilla.kernel.org/show_bug.cgi?id=220789 Fixes: 5039563e7c25 ("spi: Add driver_override SPI device attribute") Signed-off-by: Danilo Krummrich Link: https://patch.msgid.link/20260324005919.2408620-12-dakr@kernel.org Signed-off-by: Mark Brown --- drivers/spi/spi.c | 19 +++++++------------ include/linux/spi/spi.h | 5 ----- 2 files changed, 7 insertions(+), 17 deletions(-) diff --git a/drivers/spi/spi.c b/drivers/spi/spi.c index 53dee314d76a..4101c2803eb3 100644 --- a/drivers/spi/spi.c +++ b/drivers/spi/spi.c @@ -50,7 +50,6 @@ static void spidev_release(struct device *dev) struct spi_device *spi = to_spi_device(dev); spi_controller_put(spi->controller); - kfree(spi->driver_override); free_percpu(spi->pcpu_statistics); kfree(spi); } @@ -73,10 +72,9 @@ static ssize_t driver_override_store(struct device *dev, struct device_attribute *a, const char *buf, size_t count) { - struct spi_device *spi = to_spi_device(dev); int ret; - ret = driver_set_override(dev, &spi->driver_override, buf, count); + ret = __device_set_driver_override(dev, buf, count); if (ret) return ret; @@ -86,13 +84,8 @@ static ssize_t driver_override_store(struct device *dev, static ssize_t driver_override_show(struct device *dev, struct device_attribute *a, char *buf) { - const struct spi_device *spi = to_spi_device(dev); - ssize_t len; - - device_lock(dev); - len = sysfs_emit(buf, "%s\n", spi->driver_override ? : ""); - device_unlock(dev); - return len; + guard(spinlock)(&dev->driver_override.lock); + return sysfs_emit(buf, "%s\n", dev->driver_override.name ?: ""); } static DEVICE_ATTR_RW(driver_override); @@ -376,10 +369,12 @@ static int spi_match_device(struct device *dev, const struct device_driver *drv) { const struct spi_device *spi = to_spi_device(dev); const struct spi_driver *sdrv = to_spi_driver(drv); + int ret; /* Check override first, and if set, only use the named driver */ - if (spi->driver_override) - return strcmp(spi->driver_override, drv->name) == 0; + ret = device_match_driver_override(dev, drv); + if (ret >= 0) + return ret; /* Attempt an OF style match */ if (of_driver_match_device(dev, drv)) diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h index af7cfee7b8f6..0dc671c07d3a 100644 --- a/include/linux/spi/spi.h +++ b/include/linux/spi/spi.h @@ -159,10 +159,6 @@ extern void spi_transfer_cs_change_delay_exec(struct spi_message *msg, * @modalias: Name of the driver to use with this device, or an alias * for that name. This appears in the sysfs "modalias" attribute * for driver coldplugging, and in uevents used for hotplugging - * @driver_override: If the name of a driver is written to this attribute, then - * the device will bind to the named driver and only the named driver. - * Do not set directly, because core frees it; use driver_set_override() to - * set or clear it. * @pcpu_statistics: statistics for the spi_device * @word_delay: delay to be inserted between consecutive * words of a transfer @@ -224,7 +220,6 @@ struct spi_device { void *controller_state; void *controller_data; char modalias[SPI_NAME_SIZE]; - const char *driver_override; /* The statistics */ struct spi_statistics __percpu *pcpu_statistics; From 744fabc338e87b95c4d1ff7c95bc8c0f834c6d99 Mon Sep 17 00:00:00 2001 From: Alexey Velichayshiy Date: Sat, 7 Feb 2026 18:03:22 +0300 Subject: [PATCH 161/401] wifi: iwlwifi: mvm: fix potential out-of-bounds read in iwl_mvm_nd_match_info_handler() The memcpy function assumes the dynamic array notif->matches is at least as large as the number of bytes to copy. Otherwise, results->matches may contain unwanted data. To guarantee safety, extend the validation in one of the checks to ensure sufficient packet length. Found by Linux Verification Center (linuxtesting.org) with SVACE. Cc: stable@vger.kernel.org Fixes: 5ac54afd4d97 ("wifi: iwlwifi: mvm: Add handling for scan offload match info notification") Signed-off-by: Alexey Velichayshiy Link: https://patch.msgid.link/20260207150335.1013646-1-a.velichayshiy@ispras.ru Signed-off-by: Johannes Berg --- drivers/net/wireless/intel/iwlwifi/mvm/d3.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/d3.c b/drivers/net/wireless/intel/iwlwifi/mvm/d3.c index a19f9d2e9346..9a74f60c9185 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/d3.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/d3.c @@ -2807,7 +2807,7 @@ static void iwl_mvm_nd_match_info_handler(struct iwl_mvm *mvm, if (IS_ERR_OR_NULL(vif)) return; - if (len < sizeof(struct iwl_scan_offload_match_info)) { + if (len < sizeof(struct iwl_scan_offload_match_info) + matches_len) { IWL_ERR(mvm, "Invalid scan match info notification\n"); return; } From 53a7c171e9dd833f0a96b545adcb89bd57387239 Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Sun, 8 Mar 2026 12:02:21 +0100 Subject: [PATCH 162/401] ovl: fix wrong detection of 32bit inode numbers The implicit FILEID_INO32_GEN encoder was changed to be explicit, so we need to fix the detection. When mounting overlayfs with upperdir and lowerdir on different ext4 filesystems, the expected kmsg log is: overlayfs: "xino" feature enabled using 32 upper inode bits. But instead, since the regressing commit, the kmsg log was: overlayfs: "xino" feature enabled using 2 upper inode bits. Fixes: e21fc2038c1b9 ("exportfs: make ->encode_fh() a mandatory method for NFS export") Cc: stable@vger.kernel.org # v6.7+ Signed-off-by: Amir Goldstein --- fs/overlayfs/util.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/fs/overlayfs/util.c b/fs/overlayfs/util.c index 3f1b763a8bb4..2ea769f311c3 100644 --- a/fs/overlayfs/util.c +++ b/fs/overlayfs/util.c @@ -85,7 +85,10 @@ int ovl_can_decode_fh(struct super_block *sb) if (!exportfs_can_decode_fh(sb->s_export_op)) return 0; - return sb->s_export_op->encode_fh ? -1 : FILEID_INO32_GEN; + if (sb->s_export_op->encode_fh == generic_encode_ino32_fh) + return FILEID_INO32_GEN; + + return -1; } struct dentry *ovl_indexdir(struct super_block *sb) From f6484cadbcaf26b5844b51bd7307a663dda48ef6 Mon Sep 17 00:00:00 2001 From: Weiming Shi Date: Wed, 25 Mar 2026 00:54:59 +0800 Subject: [PATCH 163/401] ACPI: EC: clean up handlers on probe failure in acpi_ec_setup() When ec_install_handlers() returns -EPROBE_DEFER on reduced-hardware platforms, it has already started the EC and installed the address space handler with the struct acpi_ec pointer as handler context. However, acpi_ec_setup() propagates the error without any cleanup. The caller acpi_ec_add() then frees the struct acpi_ec for non-boot instances, leaving a dangling handler context in ACPICA. Any subsequent AML evaluation that accesses an EC OpRegion field dispatches into acpi_ec_space_handler() with the freed pointer, causing a use-after-free: BUG: KASAN: slab-use-after-free in mutex_lock (kernel/locking/mutex.c:289) Write of size 8 at addr ffff88800721de38 by task init/1 Call Trace: mutex_lock (kernel/locking/mutex.c:289) acpi_ec_space_handler (drivers/acpi/ec.c:1362) acpi_ev_address_space_dispatch (drivers/acpi/acpica/evregion.c:293) acpi_ex_access_region (drivers/acpi/acpica/exfldio.c:246) acpi_ex_field_datum_io (drivers/acpi/acpica/exfldio.c:509) acpi_ex_extract_from_field (drivers/acpi/acpica/exfldio.c:700) acpi_ex_read_data_from_field (drivers/acpi/acpica/exfield.c:327) acpi_ex_resolve_node_to_value (drivers/acpi/acpica/exresolv.c:392) Allocated by task 1: acpi_ec_alloc (drivers/acpi/ec.c:1424) acpi_ec_add (drivers/acpi/ec.c:1692) Freed by task 1: kfree (mm/slub.c:6876) acpi_ec_add (drivers/acpi/ec.c:1751) The bug triggers on reduced-hardware EC platforms (ec->gpe < 0) when the GPIO IRQ provider defers probing. Once the stale handler exists, any unprivileged sysfs read that causes AML to touch an EC OpRegion (battery, thermal, backlight) exercises the dangling pointer. Fix this by calling ec_remove_handlers() in the error path of acpi_ec_setup() before clearing first_ec. ec_remove_handlers() checks each EC_FLAGS_* bit before acting, so it is safe to call regardless of how far ec_install_handlers() progressed: -ENODEV (handler not installed): only calls acpi_ec_stop() -EPROBE_DEFER (handler installed): removes handler, stops EC Fixes: 03e9a0e05739 ("ACPI: EC: Consolidate event handler installation code") Reported-by: Xiang Mei Signed-off-by: Weiming Shi Link: https://patch.msgid.link/20260324165458.1337233-2-bestswngs@gmail.com Signed-off-by: Rafael J. Wysocki --- drivers/acpi/ec.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/acpi/ec.c b/drivers/acpi/ec.c index 5f63ed120a2c..6f0065257a77 100644 --- a/drivers/acpi/ec.c +++ b/drivers/acpi/ec.c @@ -1656,6 +1656,8 @@ static int acpi_ec_setup(struct acpi_ec *ec, struct acpi_device *device, bool ca ret = ec_install_handlers(ec, device, call_reg); if (ret) { + ec_remove_handlers(ec); + if (ec == first_ec) first_ec = NULL; From 7150850146ebfa4ca998f653f264b8df6f7f85be Mon Sep 17 00:00:00 2001 From: Srinivasan Shanmugam Date: Mon, 23 Mar 2026 13:41:18 +0530 Subject: [PATCH 164/401] drm/amdgpu: Fix fence put before wait in amdgpu_amdkfd_submit_ib MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit amdgpu_amdkfd_submit_ib() submits a GPU job and gets a fence from amdgpu_ib_schedule(). This fence is used to wait for job completion. Currently, the code drops the fence reference using dma_fence_put() before calling dma_fence_wait(). If dma_fence_put() releases the last reference, the fence may be freed before dma_fence_wait() is called. This can lead to a use-after-free. Fix this by waiting on the fence first and releasing the reference only after dma_fence_wait() completes. Fixes the below: drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c:697 amdgpu_amdkfd_submit_ib() warn: passing freed memory 'f' (line 696) Fixes: 9ae55f030dc5 ("drm/amdgpu: Follow up change to previous drm scheduler change.") Cc: Felix Kuehling Cc: Dan Carpenter Cc: Christian König Cc: Alex Deucher Signed-off-by: Srinivasan Shanmugam Reviewed-by: Christian König Signed-off-by: Alex Deucher (cherry picked from commit 8b9e5259adc385b61a6590a13b82ae0ac2bd3482) --- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c index 40c22438b1d2..4f27c75abedb 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c @@ -692,9 +692,9 @@ int amdgpu_amdkfd_submit_ib(struct amdgpu_device *adev, goto err_ib_sched; } - /* Drop the initial kref_init count (see drm_sched_main as example) */ - dma_fence_put(f); ret = dma_fence_wait(f, false); + /* Drop the returned fence reference after the wait completes */ + dma_fence_put(f); err_ib_sched: amdgpu_job_free(job); From 9da4f9964abcaeb6e19797d5e3b10faad338a786 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Fri, 20 Mar 2026 12:33:48 -0400 Subject: [PATCH 165/401] drm/amd/display: check if ext_caps is valid in BL setup LVDS connectors don't have extended backlight caps so check if the pointer is valid before accessing it. Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/5012 Fixes: 1454642960b0 ("drm/amd: Re-introduce property to control adaptive backlight modulation") Cc: Mario Limonciello Reviewed-by: Mario Limonciello (AMD) Signed-off-by: Alex Deucher (cherry picked from commit 3f797396d7f4eb9bb6eded184bbc6f033628a6f6) Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index acc3d8dad4a3..2328c1aa0ead 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -5423,7 +5423,7 @@ static void setup_backlight_device(struct amdgpu_display_manager *dm, caps = &dm->backlight_caps[aconnector->bl_idx]; /* Only offer ABM property when non-OLED and user didn't turn off by module parameter */ - if (!caps->ext_caps->bits.oled && amdgpu_dm_abm_level < 0) + if (caps->ext_caps && !caps->ext_caps->bits.oled && amdgpu_dm_abm_level < 0) drm_object_attach_property(&aconnector->base.base, dm->adev->mode_info.abm_level_property, ABM_SYSFS_CONTROL); From 429aec2bc0ae1e20ce96066d57e9f91f79b660df Mon Sep 17 00:00:00 2001 From: Srinivasan Shanmugam Date: Mon, 23 Mar 2026 14:28:57 +0530 Subject: [PATCH 166/401] drm/amdkfd: Fix NULL pointer check order in kfd_ioctl_create_process MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In kfd_ioctl_create_process(), the pointer 'p' is used before checking if it is NULL. The code accesses p->context_id before validating 'p'. This can lead to a possible NULL pointer dereference. Move the NULL check before using 'p' so that the pointer is validated before access. Fixes the below: drivers/gpu/drm/amd/amdgpu/../amdkfd/kfd_chardev.c:3177 kfd_ioctl_create_process() warn: variable dereferenced before check 'p' (see line 3174) Fixes: cc6b66d661fd ("amdkfd: introduce new ioctl AMDKFD_IOC_CREATE_PROCESS") Cc: Zhu Lingshan Cc: Felix Kuehling Cc: Christian König Cc: Alex Deucher Cc: Dan Carpenter Signed-off-by: Srinivasan Shanmugam Reviewed-by: Christian König Signed-off-by: Alex Deucher (cherry picked from commit 19d4149b22f57094bfc4b86b742381b3ca394ead) --- drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c index 09dabb3b3297..462a32abf720 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c @@ -3170,11 +3170,11 @@ static int kfd_ioctl_create_process(struct file *filep, struct kfd_process *p, v struct kfd_process *process; int ret; - /* Each FD owns only one kfd_process */ - if (p->context_id != KFD_CONTEXT_ID_PRIMARY) + if (!filep->private_data || !p) return -EINVAL; - if (!filep->private_data || !p) + /* Each FD owns only one kfd_process */ + if (p->context_id != KFD_CONTEXT_ID_PRIMARY) return -EINVAL; mutex_lock(&kfd_processes_mutex); From 28922a43fdab715ed771175e8326ad7e13808be3 Mon Sep 17 00:00:00 2001 From: Yang Wang Date: Thu, 19 Mar 2026 03:36:50 -0400 Subject: [PATCH 167/401] drm/amd/pm: disable OD_FAN_CURVE if temp or pwm range invalid for smu v14 Forcibly disable the OD_FAN_CURVE feature when temperature or PWM range is invalid, otherwise PMFW will reject this configuration on smu v14.0.2/14.0.3. example: $ sudo cat /sys/bus/pci/devices//gpu_od/fan_ctrl/fan_curve OD_FAN_CURVE: 0: 0C 0% 1: 0C 0% 2: 0C 0% 3: 0C 0% 4: 0C 0% OD_RANGE: FAN_CURVE(hotspot temp): 0C 0C FAN_CURVE(fan speed): 0% 0% $ echo "0 50 40" | sudo tee fan_curve kernel log: [ 969.761627] amdgpu 0000:03:00.0: amdgpu: Fan curve temp setting(50) must be within [0, 0]! [ 1010.897800] amdgpu 0000:03:00.0: amdgpu: Fan curve temp setting(50) must be within [0, 0]! Signed-off-by: Yang Wang Acked-by: Alex Deucher Signed-off-by: Alex Deucher (cherry picked from commit ab4905d466b60f170d85e19ca2a5d2b159aeb780) Cc: stable@vger.kernel.org --- .../drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c | 33 ++++++++++++++++++- 1 file changed, 32 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c index 73762d9b5969..c3ebfac062a7 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c @@ -56,6 +56,10 @@ #define to_amdgpu_device(x) (container_of(x, struct amdgpu_device, pm.smu_i2c)) +static void smu_v14_0_2_get_od_setting_limits(struct smu_context *smu, + int od_feature_bit, + int32_t *min, int32_t *max); + static const struct smu_feature_bits smu_v14_0_2_dpm_features = { .bits = { SMU_FEATURE_BIT_INIT(FEATURE_DPM_GFXCLK_BIT), SMU_FEATURE_BIT_INIT(FEATURE_DPM_UCLK_BIT), @@ -922,8 +926,35 @@ static bool smu_v14_0_2_is_od_feature_supported(struct smu_context *smu, PPTable_t *pptable = smu->smu_table.driver_pptable; const OverDriveLimits_t * const overdrive_upperlimits = &pptable->SkuTable.OverDriveLimitsBasicMax; + int32_t min_value, max_value; + bool feature_enabled; - return overdrive_upperlimits->FeatureCtrlMask & (1U << od_feature_bit); + switch (od_feature_bit) { + case PP_OD_FEATURE_FAN_CURVE_BIT: + feature_enabled = !!(overdrive_upperlimits->FeatureCtrlMask & (1U << od_feature_bit)); + if (feature_enabled) { + smu_v14_0_2_get_od_setting_limits(smu, PP_OD_FEATURE_FAN_CURVE_TEMP, + &min_value, &max_value); + if (!min_value && !max_value) { + feature_enabled = false; + goto out; + } + + smu_v14_0_2_get_od_setting_limits(smu, PP_OD_FEATURE_FAN_CURVE_PWM, + &min_value, &max_value); + if (!min_value && !max_value) { + feature_enabled = false; + goto out; + } + } + break; + default: + feature_enabled = !!(overdrive_upperlimits->FeatureCtrlMask & (1U << od_feature_bit)); + break; + } + +out: + return feature_enabled; } static void smu_v14_0_2_get_od_setting_limits(struct smu_context *smu, From 4e9597f22a3cb8600c72fc266eaac57981d834c8 Mon Sep 17 00:00:00 2001 From: Donet Tom Date: Mon, 23 Mar 2026 09:58:36 +0530 Subject: [PATCH 168/401] drm/amdgpu: Handle GPU page faults correctly on non-4K page systems MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit During a GPU page fault, the driver restores the SVM range and then maps it into the GPU page tables. The current implementation passes a GPU-page-size (4K-based) PFN to svm_range_restore_pages() to restore the range. SVM ranges are tracked using system-page-size PFNs. On systems where the system page size is larger than 4K, using GPU-page-size PFNs to restore the range causes two problems: Range lookup fails: Because the restore function receives PFNs in GPU (4K) units, the SVM range lookup does not find the existing range. This will result in a duplicate SVM range being created. VMA lookup failure: The restore function also tries to locate the VMA for the faulting address. It converts the GPU-page-size PFN into an address using the system page size, which results in an incorrect address on non-4K page-size systems. As a result, the VMA lookup fails with the message: "address 0xxxx VMA is removed". This patch passes the system-page-size PFN to svm_range_restore_pages() so that the SVM range is restored correctly on non-4K page systems. Acked-by: Christian König Signed-off-by: Donet Tom Signed-off-by: Alex Deucher (cherry picked from commit 074fe395fb13247b057f60004c7ebcca9f38ef46) --- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index d54afeb7b2a7..a677e38a493b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -2974,14 +2974,14 @@ bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, u32 pasid, if (!root) return false; - addr /= AMDGPU_GPU_PAGE_SIZE; - if (is_compute_context && !svm_range_restore_pages(adev, pasid, vmid, - node_id, addr, ts, write_fault)) { + node_id, addr >> PAGE_SHIFT, ts, write_fault)) { amdgpu_bo_unref(&root); return true; } + addr /= AMDGPU_GPU_PAGE_SIZE; + r = amdgpu_bo_reserve(root, true); if (r) goto error_unref; From 90d239cc53723c1a3f89ce08eac17bf3a9e9f2d4 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Thu, 26 Feb 2026 17:12:08 -0500 Subject: [PATCH 169/401] drm/amd/display: Fix DCE LVDS handling LVDS does not use an HPD pin so it may be invalid. Handle this case correctly in link encoder creation. Fixes: 7c8fb3b8e9ba ("drm/amd/display: Add hpd_source index check for DCE60/80/100/110/112/120 link encoders") Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/5012 Cc: Srinivasan Shanmugam Cc: Roman Li Reviewed-by: Roman Li Reviewed-by: Srinivasan Shanmugam Signed-off-by: Alex Deucher (cherry picked from commit 3b5620f7ee688177fcf65cf61588c5435bce1872) Cc: stable@vger.kernel.org --- .../display/dc/resource/dce100/dce100_resource.c | 6 ++---- .../display/dc/resource/dce110/dce110_resource.c | 5 +++-- .../display/dc/resource/dce112/dce112_resource.c | 5 +++-- .../display/dc/resource/dce120/dce120_resource.c | 5 +++-- .../amd/display/dc/resource/dce60/dce60_resource.c | 14 ++++++-------- .../amd/display/dc/resource/dce80/dce80_resource.c | 6 ++---- 6 files changed, 19 insertions(+), 22 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/resource/dce100/dce100_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dce100/dce100_resource.c index 92c123aca0c9..fdcf8db6be50 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dce100/dce100_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dce100/dce100_resource.c @@ -650,9 +650,6 @@ static struct link_encoder *dce100_link_encoder_create( return &enc110->base; } - if (enc_init_data->hpd_source >= ARRAY_SIZE(link_enc_hpd_regs)) - return NULL; - link_regs_id = map_transmitter_id_to_phy_instance(enc_init_data->transmitter); @@ -661,7 +658,8 @@ static struct link_encoder *dce100_link_encoder_create( &link_enc_feature, &link_enc_regs[link_regs_id], &link_enc_aux_regs[enc_init_data->channel - 1], - &link_enc_hpd_regs[enc_init_data->hpd_source]); + enc_init_data->hpd_source >= ARRAY_SIZE(link_enc_hpd_regs) ? + NULL : &link_enc_hpd_regs[enc_init_data->hpd_source]); return &enc110->base; } diff --git a/drivers/gpu/drm/amd/display/dc/resource/dce110/dce110_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dce110/dce110_resource.c index 95852d277c22..ab71f645c90e 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dce110/dce110_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dce110/dce110_resource.c @@ -671,7 +671,7 @@ static struct link_encoder *dce110_link_encoder_create( kzalloc_obj(struct dce110_link_encoder); int link_regs_id; - if (!enc110 || enc_init_data->hpd_source >= ARRAY_SIZE(link_enc_hpd_regs)) + if (!enc110) return NULL; link_regs_id = @@ -682,7 +682,8 @@ static struct link_encoder *dce110_link_encoder_create( &link_enc_feature, &link_enc_regs[link_regs_id], &link_enc_aux_regs[enc_init_data->channel - 1], - &link_enc_hpd_regs[enc_init_data->hpd_source]); + enc_init_data->hpd_source >= ARRAY_SIZE(link_enc_hpd_regs) ? + NULL : &link_enc_hpd_regs[enc_init_data->hpd_source]); return &enc110->base; } diff --git a/drivers/gpu/drm/amd/display/dc/resource/dce112/dce112_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dce112/dce112_resource.c index 58c6a00397cf..b7051bfd4326 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dce112/dce112_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dce112/dce112_resource.c @@ -632,7 +632,7 @@ static struct link_encoder *dce112_link_encoder_create( kzalloc_obj(struct dce110_link_encoder); int link_regs_id; - if (!enc110 || enc_init_data->hpd_source >= ARRAY_SIZE(link_enc_hpd_regs)) + if (!enc110) return NULL; link_regs_id = @@ -643,7 +643,8 @@ static struct link_encoder *dce112_link_encoder_create( &link_enc_feature, &link_enc_regs[link_regs_id], &link_enc_aux_regs[enc_init_data->channel - 1], - &link_enc_hpd_regs[enc_init_data->hpd_source]); + enc_init_data->hpd_source >= ARRAY_SIZE(link_enc_hpd_regs) ? + NULL : &link_enc_hpd_regs[enc_init_data->hpd_source]); return &enc110->base; } diff --git a/drivers/gpu/drm/amd/display/dc/resource/dce120/dce120_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dce120/dce120_resource.c index 71d76b021375..7ee70f7b3aa7 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dce120/dce120_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dce120/dce120_resource.c @@ -716,7 +716,7 @@ static struct link_encoder *dce120_link_encoder_create( kzalloc_obj(struct dce110_link_encoder); int link_regs_id; - if (!enc110 || enc_init_data->hpd_source >= ARRAY_SIZE(link_enc_hpd_regs)) + if (!enc110) return NULL; link_regs_id = @@ -727,7 +727,8 @@ static struct link_encoder *dce120_link_encoder_create( &link_enc_feature, &link_enc_regs[link_regs_id], &link_enc_aux_regs[enc_init_data->channel - 1], - &link_enc_hpd_regs[enc_init_data->hpd_source]); + enc_init_data->hpd_source >= ARRAY_SIZE(link_enc_hpd_regs) ? + NULL : &link_enc_hpd_regs[enc_init_data->hpd_source]); return &enc110->base; } diff --git a/drivers/gpu/drm/amd/display/dc/resource/dce60/dce60_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dce60/dce60_resource.c index c27645708286..3d52973dd7f2 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dce60/dce60_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dce60/dce60_resource.c @@ -746,18 +746,16 @@ static struct link_encoder *dce60_link_encoder_create( return &enc110->base; } - if (enc_init_data->hpd_source >= ARRAY_SIZE(link_enc_hpd_regs)) - return NULL; - link_regs_id = map_transmitter_id_to_phy_instance(enc_init_data->transmitter); dce60_link_encoder_construct(enc110, - enc_init_data, - &link_enc_feature, - &link_enc_regs[link_regs_id], - &link_enc_aux_regs[enc_init_data->channel - 1], - &link_enc_hpd_regs[enc_init_data->hpd_source]); + enc_init_data, + &link_enc_feature, + &link_enc_regs[link_regs_id], + &link_enc_aux_regs[enc_init_data->channel - 1], + enc_init_data->hpd_source >= ARRAY_SIZE(link_enc_hpd_regs) ? + NULL : &link_enc_hpd_regs[enc_init_data->hpd_source]); return &enc110->base; } diff --git a/drivers/gpu/drm/amd/display/dc/resource/dce80/dce80_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dce80/dce80_resource.c index d66d8ac6d897..89927727a0d9 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dce80/dce80_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dce80/dce80_resource.c @@ -752,9 +752,6 @@ static struct link_encoder *dce80_link_encoder_create( return &enc110->base; } - if (enc_init_data->hpd_source >= ARRAY_SIZE(link_enc_hpd_regs)) - return NULL; - link_regs_id = map_transmitter_id_to_phy_instance(enc_init_data->transmitter); @@ -763,7 +760,8 @@ static struct link_encoder *dce80_link_encoder_create( &link_enc_feature, &link_enc_regs[link_regs_id], &link_enc_aux_regs[enc_init_data->channel - 1], - &link_enc_hpd_regs[enc_init_data->hpd_source]); + enc_init_data->hpd_source >= ARRAY_SIZE(link_enc_hpd_regs) ? + NULL : &link_enc_hpd_regs[enc_init_data->hpd_source]); return &enc110->base; } From 7dfe9846016b15816e287a4650be1ff1b48c5ab4 Mon Sep 17 00:00:00 2001 From: Srinivas Pandruvada Date: Tue, 24 Mar 2026 10:23:46 -0700 Subject: [PATCH 170/401] thermal: intel: int340x: soc_slider: Set offset only for balanced mode The slider offset can be set via debugfs for balanced mode. The offset should be only applicable in balanced mode. For other modes, it should be 0 when writing to MMIO offset, Fixes: 8306bcaba06d ("thermal: intel: int340x: Add module parameter to change slider offset") Tested-by: Erin Park Signed-off-by: Srinivas Pandruvada Cc: 6.18+ # 6.18+ [ rjw: Subject and changelog tweaks ] Link: https://patch.msgid.link/20260324172346.3317145-1-srinivas.pandruvada@linux.intel.com Signed-off-by: Rafael J. Wysocki --- .../intel/int340x_thermal/processor_thermal_soc_slider.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/thermal/intel/int340x_thermal/processor_thermal_soc_slider.c b/drivers/thermal/intel/int340x_thermal/processor_thermal_soc_slider.c index 49ff3bae7271..91f291627132 100644 --- a/drivers/thermal/intel/int340x_thermal/processor_thermal_soc_slider.c +++ b/drivers/thermal/intel/int340x_thermal/processor_thermal_soc_slider.c @@ -176,15 +176,21 @@ static inline void write_soc_slider(struct proc_thermal_device *proc_priv, u64 v static void set_soc_power_profile(struct proc_thermal_device *proc_priv, int slider) { + u8 offset; u64 val; val = read_soc_slider(proc_priv); val &= ~SLIDER_MASK; val |= FIELD_PREP(SLIDER_MASK, slider) | BIT(SLIDER_ENABLE_BIT); + if (slider == SOC_SLIDER_VALUE_MINIMUM || slider == SOC_SLIDER_VALUE_MAXIMUM) + offset = 0; + else + offset = slider_offset; + /* Set the slider offset from module params */ val &= ~SLIDER_OFFSET_MASK; - val |= FIELD_PREP(SLIDER_OFFSET_MASK, slider_offset); + val |= FIELD_PREP(SLIDER_OFFSET_MASK, offset); write_soc_slider(proc_priv, val); } From a23811061a553c70c42de0e811b2ec15b2d54157 Mon Sep 17 00:00:00 2001 From: "Panagiotis \"Ivory\" Vasilopoulos" Date: Wed, 4 Mar 2026 19:13:04 +0100 Subject: [PATCH 171/401] landlock: Expand restrict flags example for ABI version 8 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add LANDLOCK_RESTRICT_SELF_TSYNC to the backwards compatibility example for restrict flags. This introduces completeness, similar to that of the ruleset attributes example. However, as the new example can impact enforcement in certain cases, an appropriate warning is also included. Additionally, I modified the two comments of the example to make them more consistent with the ruleset attributes example's. Signed-off-by: Panagiotis "Ivory" Vasilopoulos Co-developed-by: Dan Cojocaru Signed-off-by: Dan Cojocaru Reviewed-by: Günther Noack Link: https://lore.kernel.org/r/20260304-landlock-docs-add-tsync-example-v4-1-819a276f05c5@n0toose.net [mic: Update date, improve comments consistency, fix newline issue] Signed-off-by: Mickaël Salaün --- Documentation/userspace-api/landlock.rst | 23 +++++++++++++++++++---- 1 file changed, 19 insertions(+), 4 deletions(-) diff --git a/Documentation/userspace-api/landlock.rst b/Documentation/userspace-api/landlock.rst index 13134bccdd39..7f86d7a37dc2 100644 --- a/Documentation/userspace-api/landlock.rst +++ b/Documentation/userspace-api/landlock.rst @@ -8,7 +8,7 @@ Landlock: unprivileged access control ===================================== :Author: Mickaël Salaün -:Date: January 2026 +:Date: March 2026 The goal of Landlock is to enable restriction of ambient rights (e.g. global filesystem or network access) for a set of processes. Because Landlock @@ -197,12 +197,27 @@ similar backwards compatibility check is needed for the restrict flags .. code-block:: c - __u32 restrict_flags = LANDLOCK_RESTRICT_SELF_LOG_NEW_EXEC_ON; - if (abi < 7) { - /* Clear logging flags unsupported before ABI 7. */ + __u32 restrict_flags = + LANDLOCK_RESTRICT_SELF_LOG_NEW_EXEC_ON | + LANDLOCK_RESTRICT_SELF_TSYNC; + switch (abi) { + case 1 ... 6: + /* Removes logging flags for ABI < 7 */ restrict_flags &= ~(LANDLOCK_RESTRICT_SELF_LOG_SAME_EXEC_OFF | LANDLOCK_RESTRICT_SELF_LOG_NEW_EXEC_ON | LANDLOCK_RESTRICT_SELF_LOG_SUBDOMAINS_OFF); + __attribute__((fallthrough)); + case 7: + /* + * Removes multithreaded enforcement flag for ABI < 8 + * + * WARNING: Without this flag, calling landlock_restrict_self(2) is + * only equivalent if the calling process is single-threaded. Below + * ABI v8 (and as of ABI v8, when not using this flag), a Landlock + * policy would only be enforced for the calling thread and its + * children (and not for all threads, including parents and siblings). + */ + restrict_flags &= ~LANDLOCK_RESTRICT_SELF_TSYNC; } The next step is to restrict the current thread from gaining more privileges From 1b164b876c36c3eb5561dd9b37702b04401b0166 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 24 Mar 2026 10:21:25 -1000 Subject: [PATCH 172/401] cgroup: Wait for dying tasks to leave on rmdir a72f73c4dd9b ("cgroup: Don't expose dead tasks in cgroup") hid PF_EXITING tasks from cgroup.procs so that systemd doesn't see tasks that have already been reaped via waitpid(). However, the populated counter (nr_populated_csets) is only decremented when the task later passes through cgroup_task_dead() in finish_task_switch(). This means cgroup.procs can appear empty while the cgroup is still populated, causing rmdir to fail with -EBUSY. Fix this by making cgroup_rmdir() wait for dying tasks to fully leave. If the cgroup is populated but all remaining tasks have PF_EXITING set (the task iterator returns none due to the existing filter), wait for a kick from cgroup_task_dead() and retry. The wait is brief as tasks are removed from the cgroup's css_set between PF_EXITING assertion in do_exit() and cgroup_task_dead() in finish_task_switch(). v2: cgroup_is_populated() true to false transition happens under css_set_lock not cgroup_mutex, so retest under css_set_lock before sleeping to avoid missed wakeups (Sebastian). Fixes: a72f73c4dd9b ("cgroup: Don't expose dead tasks in cgroup") Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-lkp/202603222104.2c81684e-lkp@intel.com Reported-by: Sebastian Andrzej Siewior Signed-off-by: Tejun Heo Reviewed-by: Sebastian Andrzej Siewior Cc: Bert Karwatzki Cc: Michal Koutny Cc: cgroups@vger.kernel.org --- include/linux/cgroup-defs.h | 3 ++ kernel/cgroup/cgroup.c | 86 +++++++++++++++++++++++++++++++++++-- 2 files changed, 86 insertions(+), 3 deletions(-) diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h index bb92f5c169ca..7f87399938fa 100644 --- a/include/linux/cgroup-defs.h +++ b/include/linux/cgroup-defs.h @@ -609,6 +609,9 @@ struct cgroup { /* used to wait for offlining of csses */ wait_queue_head_t offline_waitq; + /* used by cgroup_rmdir() to wait for dying tasks to leave */ + wait_queue_head_t dying_populated_waitq; + /* used to schedule release agent */ struct work_struct release_agent_work; diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c index 01fc2a93f3ef..2163054e1aa6 100644 --- a/kernel/cgroup/cgroup.c +++ b/kernel/cgroup/cgroup.c @@ -2126,6 +2126,7 @@ static void init_cgroup_housekeeping(struct cgroup *cgrp) #endif init_waitqueue_head(&cgrp->offline_waitq); + init_waitqueue_head(&cgrp->dying_populated_waitq); INIT_WORK(&cgrp->release_agent_work, cgroup1_release_agent); } @@ -6224,6 +6225,76 @@ static int cgroup_destroy_locked(struct cgroup *cgrp) return 0; }; +/** + * cgroup_drain_dying - wait for dying tasks to leave before rmdir + * @cgrp: the cgroup being removed + * + * The PF_EXITING filter in css_task_iter_advance() hides exiting tasks from + * cgroup.procs so that userspace (e.g. systemd) doesn't see tasks that have + * already been reaped via waitpid(). However, the populated counter + * (nr_populated_csets) is only decremented when the task later passes through + * cgroup_task_dead() in finish_task_switch(). This creates a window where + * cgroup.procs appears empty but cgroup_is_populated() is still true, causing + * rmdir to fail with -EBUSY. + * + * This function bridges that gap. If the cgroup is populated but all remaining + * tasks have PF_EXITING set, we wait for cgroup_task_dead() to process them. + * Tasks are removed from the cgroup's css_set in cgroup_task_dead() called from + * finish_task_switch(). As the window between PF_EXITING and cgroup_task_dead() + * is short, the number of PF_EXITING tasks on the list is small and the wait + * is brief. + * + * Each cgroup_task_dead() kicks the waitqueue via cset->cgrp_links, and we + * retry the full check from scratch. + * + * Must be called with cgroup_mutex held. + */ +static int cgroup_drain_dying(struct cgroup *cgrp) + __releases(&cgroup_mutex) __acquires(&cgroup_mutex) +{ + struct css_task_iter it; + struct task_struct *task; + DEFINE_WAIT(wait); + + lockdep_assert_held(&cgroup_mutex); +retry: + if (!cgroup_is_populated(cgrp)) + return 0; + + /* Same iterator as cgroup.threads - if any task is visible, it's busy */ + css_task_iter_start(&cgrp->self, 0, &it); + task = css_task_iter_next(&it); + css_task_iter_end(&it); + + if (task) + return -EBUSY; + + /* + * All remaining tasks are PF_EXITING and will pass through + * cgroup_task_dead() shortly. Wait for a kick and retry. + * + * cgroup_is_populated() can't transition from false to true while + * we're holding cgroup_mutex, but the true to false transition + * happens under css_set_lock (via cgroup_task_dead()). We must + * retest and prepare_to_wait() under css_set_lock. Otherwise, the + * transition can happen between our first test and + * prepare_to_wait(), and we sleep with no one to wake us. + */ + spin_lock_irq(&css_set_lock); + if (!cgroup_is_populated(cgrp)) { + spin_unlock_irq(&css_set_lock); + return 0; + } + prepare_to_wait(&cgrp->dying_populated_waitq, &wait, + TASK_UNINTERRUPTIBLE); + spin_unlock_irq(&css_set_lock); + mutex_unlock(&cgroup_mutex); + schedule(); + finish_wait(&cgrp->dying_populated_waitq, &wait); + mutex_lock(&cgroup_mutex); + goto retry; +} + int cgroup_rmdir(struct kernfs_node *kn) { struct cgroup *cgrp; @@ -6233,9 +6304,12 @@ int cgroup_rmdir(struct kernfs_node *kn) if (!cgrp) return 0; - ret = cgroup_destroy_locked(cgrp); - if (!ret) - TRACE_CGROUP_PATH(rmdir, cgrp); + ret = cgroup_drain_dying(cgrp); + if (!ret) { + ret = cgroup_destroy_locked(cgrp); + if (!ret) + TRACE_CGROUP_PATH(rmdir, cgrp); + } cgroup_kn_unlock(kn); return ret; @@ -6995,6 +7069,7 @@ void cgroup_task_exit(struct task_struct *tsk) static void do_cgroup_task_dead(struct task_struct *tsk) { + struct cgrp_cset_link *link; struct css_set *cset; unsigned long flags; @@ -7008,6 +7083,11 @@ static void do_cgroup_task_dead(struct task_struct *tsk) if (thread_group_leader(tsk) && atomic_read(&tsk->signal->live)) list_add_tail(&tsk->cg_list, &cset->dying_tasks); + /* kick cgroup_drain_dying() waiters, see cgroup_rmdir() */ + list_for_each_entry(link, &cset->cgrp_links, cgrp_link) + if (waitqueue_active(&link->cgrp->dying_populated_waitq)) + wake_up(&link->cgrp->dying_populated_waitq); + if (dl_task(tsk)) dec_dl_tasks_cs(tsk); From 6680c162b4850976ee52b57372eddc4450c1d074 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 24 Mar 2026 10:21:47 -1000 Subject: [PATCH 173/401] selftests/cgroup: Don't require synchronous populated update on task exit test_cgcore_populated (test_core) and test_cgkill_{simple,tree,forkbomb} (test_kill) check cgroup.events "populated 0" immediately after reaping child tasks with waitpid(). This used to work because cgroup_task_exit() in do_exit() unlinked tasks from css_sets before exit_notify() woke up waitpid(). d245698d727a ("cgroup: Defer task cgroup unlink until after the task is done switching out") moved the unlink to cgroup_task_dead() in finish_task_switch(), which runs after exit_notify(). The populated counter is now decremented after the parent's waitpid() can return, so there is no longer a synchronous ordering guarantee. On PREEMPT_RT, where cgroup_task_dead() is further deferred through lazy irq_work, the race window is even larger. The synchronous populated transition was never part of the cgroup interface contract - it was an implementation artifact. Use cg_read_strcmp_wait() which retries for up to 1 second, matching what these tests actually need to verify: that the cgroup eventually becomes unpopulated after all tasks exit. Fixes: d245698d727a ("cgroup: Defer task cgroup unlink until after the task is done switching out") Reported-by: Sebastian Andrzej Siewior Signed-off-by: Tejun Heo Tested-by: Sebastian Andrzej Siewior Cc: Christian Brauner Cc: cgroups@vger.kernel.org --- tools/testing/selftests/cgroup/lib/cgroup_util.c | 15 +++++++++++++++ .../selftests/cgroup/lib/include/cgroup_util.h | 2 ++ tools/testing/selftests/cgroup/test_core.c | 3 ++- tools/testing/selftests/cgroup/test_kill.c | 7 ++++--- 4 files changed, 23 insertions(+), 4 deletions(-) diff --git a/tools/testing/selftests/cgroup/lib/cgroup_util.c b/tools/testing/selftests/cgroup/lib/cgroup_util.c index ce6c2642fd9b..6a7295347e90 100644 --- a/tools/testing/selftests/cgroup/lib/cgroup_util.c +++ b/tools/testing/selftests/cgroup/lib/cgroup_util.c @@ -123,6 +123,21 @@ int cg_read_strcmp(const char *cgroup, const char *control, return ret; } +int cg_read_strcmp_wait(const char *cgroup, const char *control, + const char *expected) +{ + int i, ret; + + for (i = 0; i < 100; i++) { + ret = cg_read_strcmp(cgroup, control, expected); + if (!ret) + return ret; + usleep(10000); + } + + return ret; +} + int cg_read_strstr(const char *cgroup, const char *control, const char *needle) { char buf[PAGE_SIZE]; diff --git a/tools/testing/selftests/cgroup/lib/include/cgroup_util.h b/tools/testing/selftests/cgroup/lib/include/cgroup_util.h index 77f386dab5e8..567b1082974c 100644 --- a/tools/testing/selftests/cgroup/lib/include/cgroup_util.h +++ b/tools/testing/selftests/cgroup/lib/include/cgroup_util.h @@ -61,6 +61,8 @@ extern int cg_read(const char *cgroup, const char *control, char *buf, size_t len); extern int cg_read_strcmp(const char *cgroup, const char *control, const char *expected); +extern int cg_read_strcmp_wait(const char *cgroup, const char *control, + const char *expected); extern int cg_read_strstr(const char *cgroup, const char *control, const char *needle); extern long cg_read_long(const char *cgroup, const char *control); diff --git a/tools/testing/selftests/cgroup/test_core.c b/tools/testing/selftests/cgroup/test_core.c index 102262555a59..7b83c7e7c9d4 100644 --- a/tools/testing/selftests/cgroup/test_core.c +++ b/tools/testing/selftests/cgroup/test_core.c @@ -233,7 +233,8 @@ static int test_cgcore_populated(const char *root) if (err) goto cleanup; - if (cg_read_strcmp(cg_test_d, "cgroup.events", "populated 0\n")) + if (cg_read_strcmp_wait(cg_test_d, "cgroup.events", + "populated 0\n")) goto cleanup; /* Remove cgroup. */ diff --git a/tools/testing/selftests/cgroup/test_kill.c b/tools/testing/selftests/cgroup/test_kill.c index c8c9d306925b..f6cd23a8ecc7 100644 --- a/tools/testing/selftests/cgroup/test_kill.c +++ b/tools/testing/selftests/cgroup/test_kill.c @@ -86,7 +86,7 @@ static int test_cgkill_simple(const char *root) wait_for_pid(pids[i]); if (ret == KSFT_PASS && - cg_read_strcmp(cgroup, "cgroup.events", "populated 0\n")) + cg_read_strcmp_wait(cgroup, "cgroup.events", "populated 0\n")) ret = KSFT_FAIL; if (cgroup) @@ -190,7 +190,8 @@ static int test_cgkill_tree(const char *root) wait_for_pid(pids[i]); if (ret == KSFT_PASS && - cg_read_strcmp(cgroup[0], "cgroup.events", "populated 0\n")) + cg_read_strcmp_wait(cgroup[0], "cgroup.events", + "populated 0\n")) ret = KSFT_FAIL; for (i = 9; i >= 0 && cgroup[i]; i--) { @@ -251,7 +252,7 @@ static int test_cgkill_forkbomb(const char *root) wait_for_pid(pid); if (ret == KSFT_PASS && - cg_read_strcmp(cgroup, "cgroup.events", "populated 0\n")) + cg_read_strcmp_wait(cgroup, "cgroup.events", "populated 0\n")) ret = KSFT_FAIL; if (cgroup) From 5d16467ae56343b9205caedf85e3a131e0914ad8 Mon Sep 17 00:00:00 2001 From: Zhan Xusheng Date: Mon, 23 Mar 2026 14:11:30 +0800 Subject: [PATCH 174/401] alarmtimer: Fix argument order in alarm_timer_forward() alarm_timer_forward() passes arguments to alarm_forward() in the wrong order: alarm_forward(alarm, timr->it_interval, now); However, alarm_forward() is defined as: u64 alarm_forward(struct alarm *alarm, ktime_t now, ktime_t interval); and uses the second argument as the current time: delta = ktime_sub(now, alarm->node.expires); Passing the interval as "now" results in incorrect delta computation, which can lead to missed expirations or incorrect overrun accounting. This issue has been present since the introduction of alarm_timer_forward(). Fix this by swapping the arguments. Fixes: e7561f1633ac ("alarmtimer: Implement forward callback") Signed-off-by: Zhan Xusheng Signed-off-by: Thomas Gleixner Cc: stable@vger.kernel.org Link: https://patch.msgid.link/20260323061130.29991-1-zhanxusheng@xiaomi.com --- kernel/time/alarmtimer.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/time/alarmtimer.c b/kernel/time/alarmtimer.c index 069d93bfb0c7..b64db405ba5c 100644 --- a/kernel/time/alarmtimer.c +++ b/kernel/time/alarmtimer.c @@ -540,7 +540,7 @@ static s64 alarm_timer_forward(struct k_itimer *timr, ktime_t now) { struct alarm *alarm = &timr->it.alarm.alarmtimer; - return alarm_forward(alarm, timr->it_interval, now); + return alarm_forward(alarm, now, timr->it_interval); } /** From 06f4297134db37fb326047b1ed8194a23cdf057d Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Tue, 24 Mar 2026 11:10:19 +0000 Subject: [PATCH 175/401] drm/syncobj: Fix xa_alloc allocation flags MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The xarray conversion blindly and wrongly replaced idr_alloc with xa_alloc and kept the GFP_NOWAIT. It should have been GFP_KERNEL to account for idr_preload it removed. Fix it. Signed-off-by: Tvrtko Ursulin Fixes: fec2c3c01f1c ("drm/syncobj: Convert syncobj idr to xarray") Reported-by: Himanshu Girotra Cc: Matthew Brost Cc: Thomas Hellström Reviewed-by: Thomas Hellström Reviewed-by: Himanshu Girotra Reviewed-by: Matthew Brost Signed-off-by: Tvrtko Ursulin Link: https://lore.kernel.org/r/20260324111019.22467-1-tvrtko.ursulin@igalia.com --- drivers/gpu/drm/drm_syncobj.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/drm_syncobj.c b/drivers/gpu/drm/drm_syncobj.c index 250734dee928..8d9fd1917c6e 100644 --- a/drivers/gpu/drm/drm_syncobj.c +++ b/drivers/gpu/drm/drm_syncobj.c @@ -602,7 +602,7 @@ int drm_syncobj_get_handle(struct drm_file *file_private, drm_syncobj_get(syncobj); ret = xa_alloc(&file_private->syncobj_xa, handle, syncobj, xa_limit_32b, - GFP_NOWAIT); + GFP_KERNEL); if (ret) drm_syncobj_put(syncobj); @@ -716,7 +716,7 @@ static int drm_syncobj_fd_to_handle(struct drm_file *file_private, drm_syncobj_get(syncobj); ret = xa_alloc(&file_private->syncobj_xa, handle, syncobj, xa_limit_32b, - GFP_NOWAIT); + GFP_KERNEL); if (ret) drm_syncobj_put(syncobj); From bfe9e314d7574d1c5c851972e7aee342733819d2 Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Wed, 18 Mar 2026 10:02:09 +0000 Subject: [PATCH 176/401] drm/xe: always keep track of remap prev/next During 3D workload, user is reporting hitting: [ 413.361679] WARNING: drivers/gpu/drm/xe/xe_vm.c:1217 at vm_bind_ioctl_ops_unwind+0x1e2/0x2e0 [xe], CPU#7: vkd3d_queue/9925 [ 413.361944] CPU: 7 UID: 1000 PID: 9925 Comm: vkd3d_queue Kdump: loaded Not tainted 7.0.0-070000rc3-generic #202603090038 PREEMPT(lazy) [ 413.361949] RIP: 0010:vm_bind_ioctl_ops_unwind+0x1e2/0x2e0 [xe] [ 413.362074] RSP: 0018:ffffd4c25c3df930 EFLAGS: 00010282 [ 413.362077] RAX: 0000000000000000 RBX: ffff8f3ee817ed10 RCX: 0000000000000000 [ 413.362078] RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000000000000 [ 413.362079] RBP: ffffd4c25c3df980 R08: 0000000000000000 R09: 0000000000000000 [ 413.362081] R10: 0000000000000000 R11: 0000000000000000 R12: ffff8f41fbf99380 [ 413.362082] R13: ffff8f3ee817e968 R14: 00000000ffffffef R15: ffff8f43d00bd380 [ 413.362083] FS: 00000001040ff6c0(0000) GS:ffff8f4696d89000(0000) knlGS:00000000330b0000 [ 413.362085] CS: 0010 DS: 002b ES: 002b CR0: 0000000080050033 [ 413.362086] CR2: 00007ddfc4747000 CR3: 00000002e6262005 CR4: 0000000000f72ef0 [ 413.362088] PKRU: 55555554 [ 413.362089] Call Trace: [ 413.362092] [ 413.362096] xe_vm_bind_ioctl+0xa9a/0xc60 [xe] Which seems to hint that the vma we are re-inserting for the ops unwind is either invalid or overlapping with something already inserted in the vm. It shouldn't be invalid since this is a re-insertion, so must have worked before. Leaving the likely culprit as something already placed where we want to insert the vma. Following from that, for the case where we do something like a rebind in the middle of a vma, and one or both mapped ends are already compatible, we skip doing the rebind of those vma and set next/prev to NULL. As well as then adjust the original unmap va range, to avoid unmapping the ends. However, if we trigger the unwind path, we end up with three va, with the two ends never being removed and the original va range in the middle still being the shrunken size. If this occurs, one failure mode is when another unwind op needs to interact with that range, which can happen with a vector of binds. For example, if we need to re-insert something in place of the original va. In this case the va is still the shrunken version, so when removing it and then doing a re-insert it can overlap with the ends, which were never removed, triggering a warning like above, plus leaving the vm in a bad state. With that, we need two things here: 1) Stop nuking the prev/next tracking for the skip cases. Instead relying on checking for skip prev/next, where needed. That way on the unwind path, we now correctly remove both ends. 2) Undo the unmap va shrinkage, on the unwind path. With the two ends now removed the unmap va should expand back to the original size again, before re-insertion. v2: - Update the explanation in the commit message, based on an actual IGT of triggering this issue, rather than conjecture. - Also undo the unmap shrinkage, for the skip case. With the two ends now removed, the original unmap va range should expand back to the original range. v3: - Track the old start/range separately. vma_size/start() uses the va info directly. Link: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/7602 Fixes: 8f33b4f054fc ("drm/xe: Avoid doing rebinds") Signed-off-by: Matthew Auld Cc: Matthew Brost Cc: # v6.8+ Reviewed-by: Matthew Brost Link: https://patch.msgid.link/20260318100208.78097-2-matthew.auld@intel.com (cherry picked from commit aec6969f75afbf4e01fd5fb5850ed3e9c27043ac) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_pt.c | 12 ++++++------ drivers/gpu/drm/xe/xe_vm.c | 22 ++++++++++++++++++---- drivers/gpu/drm/xe/xe_vm_types.h | 4 ++++ 3 files changed, 28 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_pt.c b/drivers/gpu/drm/xe/xe_pt.c index 2d9ce2c4cb4f..713a303c9053 100644 --- a/drivers/gpu/drm/xe/xe_pt.c +++ b/drivers/gpu/drm/xe/xe_pt.c @@ -1442,9 +1442,9 @@ static int op_check_svm_userptr(struct xe_vm *vm, struct xe_vma_op *op, err = vma_check_userptr(vm, op->map.vma, pt_update); break; case DRM_GPUVA_OP_REMAP: - if (op->remap.prev) + if (op->remap.prev && !op->remap.skip_prev) err = vma_check_userptr(vm, op->remap.prev, pt_update); - if (!err && op->remap.next) + if (!err && op->remap.next && !op->remap.skip_next) err = vma_check_userptr(vm, op->remap.next, pt_update); break; case DRM_GPUVA_OP_UNMAP: @@ -2198,12 +2198,12 @@ static int op_prepare(struct xe_vm *vm, err = unbind_op_prepare(tile, pt_update_ops, old); - if (!err && op->remap.prev) { + if (!err && op->remap.prev && !op->remap.skip_prev) { err = bind_op_prepare(vm, tile, pt_update_ops, op->remap.prev, false); pt_update_ops->wait_vm_bookkeep = true; } - if (!err && op->remap.next) { + if (!err && op->remap.next && !op->remap.skip_next) { err = bind_op_prepare(vm, tile, pt_update_ops, op->remap.next, false); pt_update_ops->wait_vm_bookkeep = true; @@ -2428,10 +2428,10 @@ static void op_commit(struct xe_vm *vm, unbind_op_commit(vm, tile, pt_update_ops, old, fence, fence2); - if (op->remap.prev) + if (op->remap.prev && !op->remap.skip_prev) bind_op_commit(vm, tile, pt_update_ops, op->remap.prev, fence, fence2, false); - if (op->remap.next) + if (op->remap.next && !op->remap.skip_next) bind_op_commit(vm, tile, pt_update_ops, op->remap.next, fence, fence2, false); break; diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index a82e3a4fb389..ffdbab106a58 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -2554,7 +2554,6 @@ static int xe_vma_op_commit(struct xe_vm *vm, struct xe_vma_op *op) if (!err && op->remap.skip_prev) { op->remap.prev->tile_present = tile_present; - op->remap.prev = NULL; } } if (op->remap.next) { @@ -2564,11 +2563,13 @@ static int xe_vma_op_commit(struct xe_vm *vm, struct xe_vma_op *op) if (!err && op->remap.skip_next) { op->remap.next->tile_present = tile_present; - op->remap.next = NULL; } } - /* Adjust for partial unbind after removing VMA from VM */ + /* + * Adjust for partial unbind after removing VMA from VM. In case + * of unwind we might need to undo this later. + */ if (!err) { op->base.remap.unmap->va->va.addr = op->remap.start; op->base.remap.unmap->va->va.range = op->remap.range; @@ -2687,6 +2688,8 @@ static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct drm_gpuva_ops *ops, op->remap.start = xe_vma_start(old); op->remap.range = xe_vma_size(old); + op->remap.old_start = op->remap.start; + op->remap.old_range = op->remap.range; flags |= op->base.remap.unmap->va->flags & XE_VMA_CREATE_MASK; if (op->base.remap.prev) { @@ -2835,8 +2838,19 @@ static void xe_vma_op_unwind(struct xe_vm *vm, struct xe_vma_op *op, xe_svm_notifier_lock(vm); vma->gpuva.flags &= ~XE_VMA_DESTROYED; xe_svm_notifier_unlock(vm); - if (post_commit) + if (post_commit) { + /* + * Restore the old va range, in case of the + * prev/next skip optimisation. Otherwise what + * we re-insert here could be smaller than the + * original range. + */ + op->base.remap.unmap->va->va.addr = + op->remap.old_start; + op->base.remap.unmap->va->va.range = + op->remap.old_range; xe_vm_insert_vma(vm, vma); + } } break; } diff --git a/drivers/gpu/drm/xe/xe_vm_types.h b/drivers/gpu/drm/xe/xe_vm_types.h index 437f64202f3b..e2946e311d7a 100644 --- a/drivers/gpu/drm/xe/xe_vm_types.h +++ b/drivers/gpu/drm/xe/xe_vm_types.h @@ -373,6 +373,10 @@ struct xe_vma_op_remap { u64 start; /** @range: range of the VMA unmap */ u64 range; + /** @old_start: Original start of the VMA we unmap */ + u64 old_start; + /** @old_range: Original range of the VMA we unmap */ + u64 old_range; /** @skip_prev: skip prev rebind */ bool skip_prev; /** @skip_next: skip next rebind */ From c991ca3238410b611a2ce59adeca9b55850aff69 Mon Sep 17 00:00:00 2001 From: Shuming Fan Date: Wed, 25 Mar 2026 17:20:17 +0800 Subject: [PATCH 177/401] ASoC: SDCA: remove the max count of initialization table The number of the initialization table may exceed 2048. Therefore, this patch removes the limitation and allows the driver to allocate memory dynamically based on the size of the initialization table. Signed-off-by: Shuming Fan Reviewed-by: Charles Keepax Link: https://patch.msgid.link/20260325092017.3221640-1-shumingf@realtek.com Signed-off-by: Mark Brown --- include/sound/sdca_function.h | 5 ----- sound/soc/sdca/sdca_functions.c | 3 --- 2 files changed, 8 deletions(-) diff --git a/include/sound/sdca_function.h b/include/sound/sdca_function.h index 79bd5a7a0f88..0e871c786513 100644 --- a/include/sound/sdca_function.h +++ b/include/sound/sdca_function.h @@ -26,11 +26,6 @@ struct sdca_function_desc; */ #define SDCA_MAX_ENTITY_COUNT 128 -/* - * Sanity check on number of initialization writes, can be expanded if needed. - */ -#define SDCA_MAX_INIT_COUNT 2048 - /* * The Cluster IDs are 16-bit, so a maximum of 65535 Clusters per * function can be represented, however limit this to a slightly diff --git a/sound/soc/sdca/sdca_functions.c b/sound/soc/sdca/sdca_functions.c index e0ed593697ba..d27ffb25ad97 100644 --- a/sound/soc/sdca/sdca_functions.c +++ b/sound/soc/sdca/sdca_functions.c @@ -216,9 +216,6 @@ static int find_sdca_init_table(struct device *dev, } else if (num_init_writes % sizeof(*raw) != 0) { dev_err(dev, "%pfwP: init table size invalid\n", function_node); return -EINVAL; - } else if ((num_init_writes / sizeof(*raw)) > SDCA_MAX_INIT_COUNT) { - dev_err(dev, "%pfwP: maximum init table size exceeded\n", function_node); - return -EINVAL; } raw = kzalloc(num_init_writes, GFP_KERNEL); From bf08749a6abb6d1959bfdc0edc32c640df407558 Mon Sep 17 00:00:00 2001 From: Sanman Pradhan Date: Wed, 25 Mar 2026 05:13:06 +0000 Subject: [PATCH 178/401] hwmon: (adm1177) fix sysfs ABI violation and current unit conversion MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The adm1177 driver exposes the current alert threshold through hwmon_curr_max_alarm. This violates the hwmon sysfs ABI, where *_alarm attributes are read-only status flags and writable thresholds must use currN_max. The driver also stores the threshold internally in microamps, while currN_max is defined in milliamps. Convert the threshold accordingly on both the read and write paths. Widen the cached threshold and related calculations to 64 bits so that small shunt resistor values do not cause truncation or overflow. Also use 64-bit arithmetic for the mA/uA conversions, clamp writes to the range the hardware can represent, and propagate failures from adm1177_write_alert_thr() instead of silently ignoring them. Update the hwmon documentation to reflect the attribute rename and the correct units returned by the driver. Fixes: 09b08ac9e8d5 ("hwmon: (adm1177) Add ADM1177 Hot Swap Controller and Digital Power Monitor driver") Signed-off-by: Sanman Pradhan Acked-by: Nuno Sá Link: https://lore.kernel.org/r/20260325051246.28262-1-sanman.pradhan@hpe.com Signed-off-by: Guenter Roeck --- Documentation/hwmon/adm1177.rst | 8 ++--- drivers/hwmon/adm1177.c | 54 +++++++++++++++++++-------------- 2 files changed, 35 insertions(+), 27 deletions(-) diff --git a/Documentation/hwmon/adm1177.rst b/Documentation/hwmon/adm1177.rst index 1c85a2af92bf..375f6d6e03a7 100644 --- a/Documentation/hwmon/adm1177.rst +++ b/Documentation/hwmon/adm1177.rst @@ -27,10 +27,10 @@ for details. Sysfs entries ------------- -The following attributes are supported. Current maxim attribute +The following attributes are supported. Current maximum attribute is read-write, all other attributes are read-only. -in0_input Measured voltage in microvolts. +in0_input Measured voltage in millivolts. -curr1_input Measured current in microamperes. -curr1_max_alarm Overcurrent alarm in microamperes. +curr1_input Measured current in milliamperes. +curr1_max Overcurrent shutdown threshold in milliamperes. diff --git a/drivers/hwmon/adm1177.c b/drivers/hwmon/adm1177.c index 8b2c965480e3..7888afe8dafd 100644 --- a/drivers/hwmon/adm1177.c +++ b/drivers/hwmon/adm1177.c @@ -10,6 +10,8 @@ #include #include #include +#include +#include #include #include @@ -33,7 +35,7 @@ struct adm1177_state { struct i2c_client *client; u32 r_sense_uohm; - u32 alert_threshold_ua; + u64 alert_threshold_ua; bool vrange_high; }; @@ -48,7 +50,7 @@ static int adm1177_write_cmd(struct adm1177_state *st, u8 cmd) } static int adm1177_write_alert_thr(struct adm1177_state *st, - u32 alert_threshold_ua) + u64 alert_threshold_ua) { u64 val; int ret; @@ -91,8 +93,8 @@ static int adm1177_read(struct device *dev, enum hwmon_sensor_types type, *val = div_u64((105840000ull * dummy), 4096 * st->r_sense_uohm); return 0; - case hwmon_curr_max_alarm: - *val = st->alert_threshold_ua; + case hwmon_curr_max: + *val = div_u64(st->alert_threshold_ua, 1000); return 0; default: return -EOPNOTSUPP; @@ -126,9 +128,10 @@ static int adm1177_write(struct device *dev, enum hwmon_sensor_types type, switch (type) { case hwmon_curr: switch (attr) { - case hwmon_curr_max_alarm: - adm1177_write_alert_thr(st, val); - return 0; + case hwmon_curr_max: + val = clamp_val(val, 0, + div_u64(105840000ULL, st->r_sense_uohm)); + return adm1177_write_alert_thr(st, (u64)val * 1000); default: return -EOPNOTSUPP; } @@ -156,7 +159,7 @@ static umode_t adm1177_is_visible(const void *data, if (st->r_sense_uohm) return 0444; return 0; - case hwmon_curr_max_alarm: + case hwmon_curr_max: if (st->r_sense_uohm) return 0644; return 0; @@ -170,7 +173,7 @@ static umode_t adm1177_is_visible(const void *data, static const struct hwmon_channel_info * const adm1177_info[] = { HWMON_CHANNEL_INFO(curr, - HWMON_C_INPUT | HWMON_C_MAX_ALARM), + HWMON_C_INPUT | HWMON_C_MAX), HWMON_CHANNEL_INFO(in, HWMON_I_INPUT), NULL @@ -192,7 +195,8 @@ static int adm1177_probe(struct i2c_client *client) struct device *dev = &client->dev; struct device *hwmon_dev; struct adm1177_state *st; - u32 alert_threshold_ua; + u64 alert_threshold_ua; + u32 prop; int ret; st = devm_kzalloc(dev, sizeof(*st), GFP_KERNEL); @@ -208,22 +212,26 @@ static int adm1177_probe(struct i2c_client *client) if (device_property_read_u32(dev, "shunt-resistor-micro-ohms", &st->r_sense_uohm)) st->r_sense_uohm = 0; - if (device_property_read_u32(dev, "adi,shutdown-threshold-microamp", - &alert_threshold_ua)) { - if (st->r_sense_uohm) - /* - * set maximum default value from datasheet based on - * shunt-resistor - */ - alert_threshold_ua = div_u64(105840000000, - st->r_sense_uohm); - else - alert_threshold_ua = 0; + if (!device_property_read_u32(dev, "adi,shutdown-threshold-microamp", + &prop)) { + alert_threshold_ua = prop; + } else if (st->r_sense_uohm) { + /* + * set maximum default value from datasheet based on + * shunt-resistor + */ + alert_threshold_ua = div_u64(105840000000ULL, + st->r_sense_uohm); + } else { + alert_threshold_ua = 0; } st->vrange_high = device_property_read_bool(dev, "adi,vrange-high-enable"); - if (alert_threshold_ua && st->r_sense_uohm) - adm1177_write_alert_thr(st, alert_threshold_ua); + if (alert_threshold_ua && st->r_sense_uohm) { + ret = adm1177_write_alert_thr(st, alert_threshold_ua); + if (ret) + return ret; + } ret = adm1177_write_cmd(st, ADM1177_CMD_V_CONT | ADM1177_CMD_I_CONT | From 87a70013be7d1b96e7e160aea6dad4564b459868 Mon Sep 17 00:00:00 2001 From: Jianmin Lv Date: Fri, 20 Mar 2026 18:10:12 +0800 Subject: [PATCH 179/401] MAINTAINERS: Update GPU driver maintainer information I and Qianhai are GPU R&D engineers at Loongson, specializing in kernel driver development. We understand that the current Loongson GPU driver lacks dedicated maintenance resources because of some reasons. As Loongson GPU driver developers, we have both the capability and the responsibility to continuously maintain the Loongson GPU driver, ensuring minimal impact on its users. After internal discussions, our team has decided to recommend me and Qianhai to take over the maintenance responsibilities, and recommend Huacai, Mingcong and Ruoyao to help to review. And We'll continue to maintain it for current supported chips and drive future updates according to chip support plan. Signed-off-by: Jianmin Lv Acked-by: Thomas Zimmermann Signed-off-by: Thomas Zimmermann Link: https://patch.msgid.link/20260320101012.22714-1-lvjianmin@loongson.cn --- MAINTAINERS | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index b35fc8e062c3..3e31d0df4144 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -8630,8 +8630,14 @@ F: drivers/gpu/drm/lima/ F: include/uapi/drm/lima_drm.h DRM DRIVERS FOR LOONGSON +M: Jianmin Lv +M: Qianhai Wu +R: Huacai Chen +R: Mingcong Bai +R: Xi Ruoyao +R: Icenowy Zheng L: dri-devel@lists.freedesktop.org -S: Orphan +S: Maintained T: git https://gitlab.freedesktop.org/drm/misc/kernel.git F: drivers/gpu/drm/loongson/ From c673efd5db2223c2e8b885025bcd96bca6cdb171 Mon Sep 17 00:00:00 2001 From: Shuming Fan Date: Wed, 25 Mar 2026 19:04:06 +0800 Subject: [PATCH 180/401] ASoC: SDCA: fix finding wrong entity This patch fixes an issue like: where searching for the entity 'FU 11' could incorrectly match 'FU 113' first. The driver should first perform an exact match on the full string name. If no exact match is found, it can then fall back to a partial match. Fixes: 48fa77af2f4a ("ASoC: SDCA: Add terminal type into input/output widget name") Reviewed-by: Charles Keepax Signed-off-by: Shuming Fan Link: https://patch.msgid.link/20260325110406.3232420-1-shumingf@realtek.com Signed-off-by: Mark Brown --- sound/soc/sdca/sdca_functions.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/sound/soc/sdca/sdca_functions.c b/sound/soc/sdca/sdca_functions.c index d27ffb25ad97..dca60ee8e62c 100644 --- a/sound/soc/sdca/sdca_functions.c +++ b/sound/soc/sdca/sdca_functions.c @@ -1601,10 +1601,19 @@ static int find_sdca_entities(struct device *dev, struct sdw_slave *sdw, static struct sdca_entity *find_sdca_entity_by_label(struct sdca_function_data *function, const char *entity_label) { + struct sdca_entity *entity = NULL; int i; for (i = 0; i < function->num_entities; i++) { - struct sdca_entity *entity = &function->entities[i]; + entity = &function->entities[i]; + + /* check whole string first*/ + if (!strcmp(entity->label, entity_label)) + return entity; + } + + for (i = 0; i < function->num_entities; i++) { + entity = &function->entities[i]; if (!strncmp(entity->label, entity_label, strlen(entity_label))) return entity; From e398978ddf18fe5a2fc8299c77e6fe50e6c306c4 Mon Sep 17 00:00:00 2001 From: Petr Mladek Date: Wed, 25 Mar 2026 13:34:18 +0100 Subject: [PATCH 181/401] workqueue: Better describe stall check Try to be more explicit why the workqueue watchdog does not take pool->lock by default. Spin locks are full memory barriers which delay anything. Obviously, they would primary delay operations on the related worker pools. Explain why it is enough to prevent the false positive by re-checking the timestamp under the pool->lock. Finally, make it clear what would be the alternative solution in __queue_work() which is a hotter path. Signed-off-by: Petr Mladek Acked-by: Song Liu Signed-off-by: Tejun Heo --- kernel/workqueue.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/kernel/workqueue.c b/kernel/workqueue.c index ff97b705f25e..eda756556341 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -7702,13 +7702,14 @@ static void wq_watchdog_timer_fn(struct timer_list *unused) /* * Did we stall? * - * Do a lockless check first. On weakly ordered - * architectures, the lockless check can observe a - * reordering between worklist insert_work() and - * last_progress_ts update from __queue_work(). Since - * __queue_work() is a much hotter path than the timer - * function, we handle false positive here by reading - * last_progress_ts again with pool->lock held. + * Do a lockless check first to do not disturb the system. + * + * Prevent false positives by double checking the timestamp + * under pool->lock. The lock makes sure that the check reads + * an updated pool->last_progress_ts when this CPU saw + * an already updated pool->worklist above. It seems better + * than adding another barrier into __queue_work() which + * is a hotter path. */ if (time_after(now, ts + thresh)) { scoped_guard(raw_spinlock_irqsave, &pool->lock) { From 789b06f9f39cdc7e895bdab2c034e39c41c8f8d6 Mon Sep 17 00:00:00 2001 From: Alexander Popov Date: Wed, 25 Mar 2026 01:46:02 +0300 Subject: [PATCH 182/401] wifi: virt_wifi: remove SET_NETDEV_DEV to avoid use-after-free Currently we execute `SET_NETDEV_DEV(dev, &priv->lowerdev->dev)` for the virt_wifi net devices. However, unregistering a virt_wifi device in netdev_run_todo() can happen together with the device referenced by SET_NETDEV_DEV(). It can result in use-after-free during the ethtool operations performed on a virt_wifi device that is currently being unregistered. Such a net device can have the `dev.parent` field pointing to the freed memory, but ethnl_ops_begin() calls `pm_runtime_get_sync(dev->dev.parent)`. Let's remove SET_NETDEV_DEV for virt_wifi to avoid bugs like this: ================================================================== BUG: KASAN: slab-use-after-free in __pm_runtime_resume+0xe2/0xf0 Read of size 2 at addr ffff88810cfc46f8 by task pm/606 Call Trace: dump_stack_lvl+0x4d/0x70 print_report+0x170/0x4f3 ? __pfx__raw_spin_lock_irqsave+0x10/0x10 kasan_report+0xda/0x110 ? __pm_runtime_resume+0xe2/0xf0 ? __pm_runtime_resume+0xe2/0xf0 __pm_runtime_resume+0xe2/0xf0 ethnl_ops_begin+0x49/0x270 ethnl_set_features+0x23c/0xab0 ? __pfx_ethnl_set_features+0x10/0x10 ? kvm_sched_clock_read+0x11/0x20 ? local_clock_noinstr+0xf/0xf0 ? local_clock+0x10/0x30 ? kasan_save_track+0x25/0x60 ? __kasan_kmalloc+0x7f/0x90 ? genl_family_rcv_msg_attrs_parse.isra.0+0x150/0x2c0 genl_family_rcv_msg_doit+0x1e7/0x2c0 ? __pfx_genl_family_rcv_msg_doit+0x10/0x10 ? __pfx_cred_has_capability.isra.0+0x10/0x10 ? stack_trace_save+0x8e/0xc0 genl_rcv_msg+0x411/0x660 ? __pfx_genl_rcv_msg+0x10/0x10 ? __pfx_ethnl_set_features+0x10/0x10 netlink_rcv_skb+0x121/0x380 ? __pfx_genl_rcv_msg+0x10/0x10 ? __pfx_netlink_rcv_skb+0x10/0x10 ? __pfx_down_read+0x10/0x10 genl_rcv+0x23/0x30 netlink_unicast+0x60f/0x830 ? __pfx_netlink_unicast+0x10/0x10 ? __pfx___alloc_skb+0x10/0x10 netlink_sendmsg+0x6ea/0xbc0 ? __pfx_netlink_sendmsg+0x10/0x10 ? __futex_queue+0x10b/0x1f0 ____sys_sendmsg+0x7a2/0x950 ? copy_msghdr_from_user+0x26b/0x430 ? __pfx_____sys_sendmsg+0x10/0x10 ? __pfx_copy_msghdr_from_user+0x10/0x10 ___sys_sendmsg+0xf8/0x180 ? __pfx____sys_sendmsg+0x10/0x10 ? __pfx_futex_wait+0x10/0x10 ? fdget+0x2e4/0x4a0 __sys_sendmsg+0x11f/0x1c0 ? __pfx___sys_sendmsg+0x10/0x10 do_syscall_64+0xe2/0x570 ? exc_page_fault+0x66/0xb0 entry_SYSCALL_64_after_hwframe+0x77/0x7f This fix may be combined with another one in the ethtool subsystem: https://lore.kernel.org/all/20260322075917.254874-1-alex.popov@linux.com/T/#u Fixes: d43c65b05b848e0b ("ethtool: runtime-resume netdev parent in ethnl_ops_begin") Cc: stable@vger.kernel.org Signed-off-by: Alexander Popov Acked-by: Greg Kroah-Hartman Reviewed-by: Breno Leitao Link: https://patch.msgid.link/20260324224607.374327-1-alex.popov@linux.com Signed-off-by: Johannes Berg --- drivers/net/wireless/virtual/virt_wifi.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/net/wireless/virtual/virt_wifi.c b/drivers/net/wireless/virtual/virt_wifi.c index 885dc7243e8d..97bd39d89e98 100644 --- a/drivers/net/wireless/virtual/virt_wifi.c +++ b/drivers/net/wireless/virtual/virt_wifi.c @@ -557,7 +557,6 @@ static int virt_wifi_newlink(struct net_device *dev, eth_hw_addr_inherit(dev, priv->lowerdev); netif_stacked_transfer_operstate(priv->lowerdev, dev); - SET_NETDEV_DEV(dev, &priv->lowerdev->dev); dev->ieee80211_ptr = kzalloc_obj(*dev->ieee80211_ptr); if (!dev->ieee80211_ptr) { From 805a5bd1c3f307d45ae4e9cf8915ef16d585a54a Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Tue, 24 Mar 2026 16:41:07 -0700 Subject: [PATCH 183/401] hwmon: (pmbus) Mark lowest/average/highest/rated attributes as read-only Writing those attributes is not supported, so mark them as read-only. Prior to this change, attempts to write into these attributes returned an error. Mark boolean fields in struct pmbus_limit_attr and in struct pmbus_sensor_attr as bit fields to reduce configuration data size. The data is scanned only while probing, so performance is not a concern. Fixes: 6f183d33a02e6 ("hwmon: (pmbus) Add support for peak attributes") Reviewed-by: Sanman Pradhan Signed-off-by: Guenter Roeck --- drivers/hwmon/pmbus/pmbus_core.c | 48 ++++++++++++++++++++++++++++---- 1 file changed, 42 insertions(+), 6 deletions(-) diff --git a/drivers/hwmon/pmbus/pmbus_core.c b/drivers/hwmon/pmbus/pmbus_core.c index be6d05def115..ecd1dddcbe0f 100644 --- a/drivers/hwmon/pmbus/pmbus_core.c +++ b/drivers/hwmon/pmbus/pmbus_core.c @@ -1495,8 +1495,9 @@ static int pmbus_add_label(struct pmbus_data *data, struct pmbus_limit_attr { u16 reg; /* Limit register */ u16 sbit; /* Alarm attribute status bit */ - bool update; /* True if register needs updates */ - bool low; /* True if low limit; for limits with compare functions only */ + bool readonly:1; /* True if the attribute is read-only */ + bool update:1; /* True if register needs updates */ + bool low:1; /* True if low limit; for limits with compare functions only */ const char *attr; /* Attribute name */ const char *alarm; /* Alarm attribute name */ }; @@ -1511,9 +1512,9 @@ struct pmbus_sensor_attr { u8 nlimit; /* # of limit registers */ enum pmbus_sensor_classes class;/* sensor class */ const char *label; /* sensor label */ - bool paged; /* true if paged sensor */ - bool update; /* true if update needed */ - bool compare; /* true if compare function needed */ + bool paged:1; /* true if paged sensor */ + bool update:1; /* true if update needed */ + bool compare:1; /* true if compare function needed */ u32 func; /* sensor mask */ u32 sfunc; /* sensor status mask */ int sreg; /* status register */ @@ -1544,7 +1545,7 @@ static int pmbus_add_limit_attrs(struct i2c_client *client, curr = pmbus_add_sensor(data, name, l->attr, index, page, 0xff, l->reg, attr->class, attr->update || l->update, - false, true); + l->readonly, true); if (!curr) return -ENOMEM; if (l->sbit && (info->func[page] & attr->sfunc)) { @@ -1707,23 +1708,28 @@ static const struct pmbus_limit_attr vin_limit_attrs[] = { }, { .reg = PMBUS_VIRT_READ_VIN_AVG, .update = true, + .readonly = true, .attr = "average", }, { .reg = PMBUS_VIRT_READ_VIN_MIN, .update = true, + .readonly = true, .attr = "lowest", }, { .reg = PMBUS_VIRT_READ_VIN_MAX, .update = true, + .readonly = true, .attr = "highest", }, { .reg = PMBUS_VIRT_RESET_VIN_HISTORY, .attr = "reset_history", }, { .reg = PMBUS_MFR_VIN_MIN, + .readonly = true, .attr = "rated_min", }, { .reg = PMBUS_MFR_VIN_MAX, + .readonly = true, .attr = "rated_max", }, }; @@ -1776,23 +1782,28 @@ static const struct pmbus_limit_attr vout_limit_attrs[] = { }, { .reg = PMBUS_VIRT_READ_VOUT_AVG, .update = true, + .readonly = true, .attr = "average", }, { .reg = PMBUS_VIRT_READ_VOUT_MIN, .update = true, + .readonly = true, .attr = "lowest", }, { .reg = PMBUS_VIRT_READ_VOUT_MAX, .update = true, + .readonly = true, .attr = "highest", }, { .reg = PMBUS_VIRT_RESET_VOUT_HISTORY, .attr = "reset_history", }, { .reg = PMBUS_MFR_VOUT_MIN, + .readonly = true, .attr = "rated_min", }, { .reg = PMBUS_MFR_VOUT_MAX, + .readonly = true, .attr = "rated_max", }, }; @@ -1852,20 +1863,24 @@ static const struct pmbus_limit_attr iin_limit_attrs[] = { }, { .reg = PMBUS_VIRT_READ_IIN_AVG, .update = true, + .readonly = true, .attr = "average", }, { .reg = PMBUS_VIRT_READ_IIN_MIN, .update = true, + .readonly = true, .attr = "lowest", }, { .reg = PMBUS_VIRT_READ_IIN_MAX, .update = true, + .readonly = true, .attr = "highest", }, { .reg = PMBUS_VIRT_RESET_IIN_HISTORY, .attr = "reset_history", }, { .reg = PMBUS_MFR_IIN_MAX, + .readonly = true, .attr = "rated_max", }, }; @@ -1889,20 +1904,24 @@ static const struct pmbus_limit_attr iout_limit_attrs[] = { }, { .reg = PMBUS_VIRT_READ_IOUT_AVG, .update = true, + .readonly = true, .attr = "average", }, { .reg = PMBUS_VIRT_READ_IOUT_MIN, .update = true, + .readonly = true, .attr = "lowest", }, { .reg = PMBUS_VIRT_READ_IOUT_MAX, .update = true, + .readonly = true, .attr = "highest", }, { .reg = PMBUS_VIRT_RESET_IOUT_HISTORY, .attr = "reset_history", }, { .reg = PMBUS_MFR_IOUT_MAX, + .readonly = true, .attr = "rated_max", }, }; @@ -1943,20 +1962,24 @@ static const struct pmbus_limit_attr pin_limit_attrs[] = { }, { .reg = PMBUS_VIRT_READ_PIN_AVG, .update = true, + .readonly = true, .attr = "average", }, { .reg = PMBUS_VIRT_READ_PIN_MIN, .update = true, + .readonly = true, .attr = "input_lowest", }, { .reg = PMBUS_VIRT_READ_PIN_MAX, .update = true, + .readonly = true, .attr = "input_highest", }, { .reg = PMBUS_VIRT_RESET_PIN_HISTORY, .attr = "reset_history", }, { .reg = PMBUS_MFR_PIN_MAX, + .readonly = true, .attr = "rated_max", }, }; @@ -1980,20 +2003,24 @@ static const struct pmbus_limit_attr pout_limit_attrs[] = { }, { .reg = PMBUS_VIRT_READ_POUT_AVG, .update = true, + .readonly = true, .attr = "average", }, { .reg = PMBUS_VIRT_READ_POUT_MIN, .update = true, + .readonly = true, .attr = "input_lowest", }, { .reg = PMBUS_VIRT_READ_POUT_MAX, .update = true, + .readonly = true, .attr = "input_highest", }, { .reg = PMBUS_VIRT_RESET_POUT_HISTORY, .attr = "reset_history", }, { .reg = PMBUS_MFR_POUT_MAX, + .readonly = true, .attr = "rated_max", }, }; @@ -2049,18 +2076,22 @@ static const struct pmbus_limit_attr temp_limit_attrs[] = { .sbit = PB_TEMP_OT_FAULT, }, { .reg = PMBUS_VIRT_READ_TEMP_MIN, + .readonly = true, .attr = "lowest", }, { .reg = PMBUS_VIRT_READ_TEMP_AVG, + .readonly = true, .attr = "average", }, { .reg = PMBUS_VIRT_READ_TEMP_MAX, + .readonly = true, .attr = "highest", }, { .reg = PMBUS_VIRT_RESET_TEMP_HISTORY, .attr = "reset_history", }, { .reg = PMBUS_MFR_MAX_TEMP_1, + .readonly = true, .attr = "rated_max", }, }; @@ -2090,18 +2121,22 @@ static const struct pmbus_limit_attr temp_limit_attrs2[] = { .sbit = PB_TEMP_OT_FAULT, }, { .reg = PMBUS_VIRT_READ_TEMP2_MIN, + .readonly = true, .attr = "lowest", }, { .reg = PMBUS_VIRT_READ_TEMP2_AVG, + .readonly = true, .attr = "average", }, { .reg = PMBUS_VIRT_READ_TEMP2_MAX, + .readonly = true, .attr = "highest", }, { .reg = PMBUS_VIRT_RESET_TEMP2_HISTORY, .attr = "reset_history", }, { .reg = PMBUS_MFR_MAX_TEMP_2, + .readonly = true, .attr = "rated_max", }, }; @@ -2131,6 +2166,7 @@ static const struct pmbus_limit_attr temp_limit_attrs3[] = { .sbit = PB_TEMP_OT_FAULT, }, { .reg = PMBUS_MFR_MAX_TEMP_3, + .readonly = true, .attr = "rated_max", }, }; From cd658475e7694d58e1c40dabc1dacf8431ccedb2 Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Tue, 24 Mar 2026 18:54:11 -0700 Subject: [PATCH 184/401] hwmon: (pmbus) Introduce the concept of "write-only" attributes Attributes intended to clear sensor history are intended to be writeable only. Reading those attributes today results in reporting more or less random values. To avoid ABI surprises, have those attributes explicitly return 0 when reading. Fixes: 787c095edaa9d ("hwmon: (pmbus/core) Add support for rated attributes") Reviewed-by: Sanman Pradhan Signed-off-by: Guenter Roeck --- drivers/hwmon/pmbus/pmbus_core.c | 32 ++++++++++++++++++++++++-------- 1 file changed, 24 insertions(+), 8 deletions(-) diff --git a/drivers/hwmon/pmbus/pmbus_core.c b/drivers/hwmon/pmbus/pmbus_core.c index ecd1dddcbe0f..cbc36f0ba4bf 100644 --- a/drivers/hwmon/pmbus/pmbus_core.c +++ b/drivers/hwmon/pmbus/pmbus_core.c @@ -1209,6 +1209,12 @@ static ssize_t pmbus_show_boolean(struct device *dev, return sysfs_emit(buf, "%d\n", val); } +static ssize_t pmbus_show_zero(struct device *dev, + struct device_attribute *devattr, char *buf) +{ + return sysfs_emit(buf, "0\n"); +} + static ssize_t pmbus_show_sensor(struct device *dev, struct device_attribute *devattr, char *buf) { @@ -1407,7 +1413,7 @@ static struct pmbus_sensor *pmbus_add_sensor(struct pmbus_data *data, int reg, enum pmbus_sensor_classes class, bool update, bool readonly, - bool convert) + bool writeonly, bool convert) { struct pmbus_sensor *sensor; struct device_attribute *a; @@ -1436,7 +1442,8 @@ static struct pmbus_sensor *pmbus_add_sensor(struct pmbus_data *data, sensor->data = -ENODATA; pmbus_dev_attr_init(a, sensor->name, readonly ? 0444 : 0644, - pmbus_show_sensor, pmbus_set_sensor); + writeonly ? pmbus_show_zero : pmbus_show_sensor, + pmbus_set_sensor); if (pmbus_add_attribute(data, &a->attr)) return NULL; @@ -1496,6 +1503,7 @@ struct pmbus_limit_attr { u16 reg; /* Limit register */ u16 sbit; /* Alarm attribute status bit */ bool readonly:1; /* True if the attribute is read-only */ + bool writeonly:1; /* True if the attribute is write-only */ bool update:1; /* True if register needs updates */ bool low:1; /* True if low limit; for limits with compare functions only */ const char *attr; /* Attribute name */ @@ -1545,7 +1553,7 @@ static int pmbus_add_limit_attrs(struct i2c_client *client, curr = pmbus_add_sensor(data, name, l->attr, index, page, 0xff, l->reg, attr->class, attr->update || l->update, - l->readonly, true); + l->readonly, l->writeonly, true); if (!curr) return -ENOMEM; if (l->sbit && (info->func[page] & attr->sfunc)) { @@ -1585,7 +1593,7 @@ static int pmbus_add_sensor_attrs_one(struct i2c_client *client, return ret; } base = pmbus_add_sensor(data, name, "input", index, page, phase, - attr->reg, attr->class, true, true, true); + attr->reg, attr->class, true, true, false, true); if (!base) return -ENOMEM; /* No limit and alarm attributes for phase specific sensors */ @@ -1722,6 +1730,7 @@ static const struct pmbus_limit_attr vin_limit_attrs[] = { .attr = "highest", }, { .reg = PMBUS_VIRT_RESET_VIN_HISTORY, + .writeonly = true, .attr = "reset_history", }, { .reg = PMBUS_MFR_VIN_MIN, @@ -1796,6 +1805,7 @@ static const struct pmbus_limit_attr vout_limit_attrs[] = { .attr = "highest", }, { .reg = PMBUS_VIRT_RESET_VOUT_HISTORY, + .writeonly = true, .attr = "reset_history", }, { .reg = PMBUS_MFR_VOUT_MIN, @@ -1877,6 +1887,7 @@ static const struct pmbus_limit_attr iin_limit_attrs[] = { .attr = "highest", }, { .reg = PMBUS_VIRT_RESET_IIN_HISTORY, + .writeonly = true, .attr = "reset_history", }, { .reg = PMBUS_MFR_IIN_MAX, @@ -1918,6 +1929,7 @@ static const struct pmbus_limit_attr iout_limit_attrs[] = { .attr = "highest", }, { .reg = PMBUS_VIRT_RESET_IOUT_HISTORY, + .writeonly = true, .attr = "reset_history", }, { .reg = PMBUS_MFR_IOUT_MAX, @@ -1976,6 +1988,7 @@ static const struct pmbus_limit_attr pin_limit_attrs[] = { .attr = "input_highest", }, { .reg = PMBUS_VIRT_RESET_PIN_HISTORY, + .writeonly = true, .attr = "reset_history", }, { .reg = PMBUS_MFR_PIN_MAX, @@ -2017,6 +2030,7 @@ static const struct pmbus_limit_attr pout_limit_attrs[] = { .attr = "input_highest", }, { .reg = PMBUS_VIRT_RESET_POUT_HISTORY, + .writeonly = true, .attr = "reset_history", }, { .reg = PMBUS_MFR_POUT_MAX, @@ -2088,6 +2102,7 @@ static const struct pmbus_limit_attr temp_limit_attrs[] = { .attr = "highest", }, { .reg = PMBUS_VIRT_RESET_TEMP_HISTORY, + .writeonly = true, .attr = "reset_history", }, { .reg = PMBUS_MFR_MAX_TEMP_1, @@ -2133,6 +2148,7 @@ static const struct pmbus_limit_attr temp_limit_attrs2[] = { .attr = "highest", }, { .reg = PMBUS_VIRT_RESET_TEMP2_HISTORY, + .writeonly = true, .attr = "reset_history", }, { .reg = PMBUS_MFR_MAX_TEMP_2, @@ -2250,7 +2266,7 @@ static int pmbus_add_fan_ctrl(struct i2c_client *client, sensor = pmbus_add_sensor(data, "fan", "target", index, page, 0xff, PMBUS_VIRT_FAN_TARGET_1 + id, PSC_FAN, - false, false, true); + false, false, false, true); if (!sensor) return -ENOMEM; @@ -2261,14 +2277,14 @@ static int pmbus_add_fan_ctrl(struct i2c_client *client, sensor = pmbus_add_sensor(data, "pwm", NULL, index, page, 0xff, PMBUS_VIRT_PWM_1 + id, PSC_PWM, - false, false, true); + false, false, false, true); if (!sensor) return -ENOMEM; sensor = pmbus_add_sensor(data, "pwm", "enable", index, page, 0xff, PMBUS_VIRT_PWM_ENABLE_1 + id, PSC_PWM, - true, false, false); + true, false, false, false); if (!sensor) return -ENOMEM; @@ -2310,7 +2326,7 @@ static int pmbus_add_fan_attributes(struct i2c_client *client, if (pmbus_add_sensor(data, "fan", "input", index, page, 0xff, pmbus_fan_registers[f], - PSC_FAN, true, true, true) == NULL) + PSC_FAN, true, true, false, true) == NULL) return -ENOMEM; /* Fan control */ From 754bd2b4a084b90b5e7b630e1f423061a9b9b761 Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Sun, 22 Mar 2026 09:12:33 -0700 Subject: [PATCH 185/401] hwmon: (pmbus/core) Protect regulator operations with mutex The regulator operations pmbus_regulator_get_voltage(), pmbus_regulator_set_voltage(), and pmbus_regulator_list_voltage() access PMBus registers and shared data but were not protected by the update_lock mutex. This could lead to race conditions. However, adding mutex protection directly to these functions causes a deadlock because pmbus_regulator_notify() (which calls regulator_notifier_call_chain()) is often called with the mutex already held (e.g., from pmbus_fault_handler()). If a regulator callback then calls one of the now-protected voltage functions, it will attempt to acquire the same mutex. Rework pmbus_regulator_notify() to utilize a worker function to send notifications outside of the mutex protection. Events are stored as atomics in a per-page bitmask and processed by the worker. Initialize the worker and its associated data during regulator registration, and ensure it is cancelled on device removal using devm_add_action_or_reset(). While at it, remove the unnecessary include of linux/of.h. Cc: Sanman Pradhan Fixes: ddbb4db4ced1b ("hwmon: (pmbus) Add regulator support") Reviewed-by: Sanman Pradhan Signed-off-by: Guenter Roeck --- drivers/hwmon/pmbus/pmbus_core.c | 114 ++++++++++++++++++++++++------- 1 file changed, 89 insertions(+), 25 deletions(-) diff --git a/drivers/hwmon/pmbus/pmbus_core.c b/drivers/hwmon/pmbus/pmbus_core.c index cbc36f0ba4bf..572be3ebc03d 100644 --- a/drivers/hwmon/pmbus/pmbus_core.c +++ b/drivers/hwmon/pmbus/pmbus_core.c @@ -6,6 +6,7 @@ * Copyright (c) 2012 Guenter Roeck */ +#include #include #include #include @@ -21,8 +22,8 @@ #include #include #include -#include #include +#include #include "pmbus.h" /* @@ -112,6 +113,11 @@ struct pmbus_data { struct mutex update_lock; +#if IS_ENABLED(CONFIG_REGULATOR) + atomic_t regulator_events[PMBUS_PAGES]; + struct work_struct regulator_notify_work; +#endif + bool has_status_word; /* device uses STATUS_WORD register */ int (*read_status)(struct i2c_client *client, int page); @@ -3228,12 +3234,19 @@ static int pmbus_regulator_get_voltage(struct regulator_dev *rdev) .class = PSC_VOLTAGE_OUT, .convert = true, }; + int ret; + mutex_lock(&data->update_lock); s.data = _pmbus_read_word_data(client, s.page, 0xff, PMBUS_READ_VOUT); - if (s.data < 0) - return s.data; + if (s.data < 0) { + ret = s.data; + goto unlock; + } - return (int)pmbus_reg2data(data, &s) * 1000; /* unit is uV */ + ret = (int)pmbus_reg2data(data, &s) * 1000; /* unit is uV */ +unlock: + mutex_unlock(&data->update_lock); + return ret; } static int pmbus_regulator_set_voltage(struct regulator_dev *rdev, int min_uv, @@ -3250,16 +3263,22 @@ static int pmbus_regulator_set_voltage(struct regulator_dev *rdev, int min_uv, }; int val = DIV_ROUND_CLOSEST(min_uv, 1000); /* convert to mV */ int low, high; + int ret; *selector = 0; + mutex_lock(&data->update_lock); low = pmbus_regulator_get_low_margin(client, s.page); - if (low < 0) - return low; + if (low < 0) { + ret = low; + goto unlock; + } high = pmbus_regulator_get_high_margin(client, s.page); - if (high < 0) - return high; + if (high < 0) { + ret = high; + goto unlock; + } /* Make sure we are within margins */ if (low > val) @@ -3269,7 +3288,10 @@ static int pmbus_regulator_set_voltage(struct regulator_dev *rdev, int min_uv, val = pmbus_data2reg(data, &s, val); - return _pmbus_write_word_data(client, s.page, PMBUS_VOUT_COMMAND, (u16)val); + ret = _pmbus_write_word_data(client, s.page, PMBUS_VOUT_COMMAND, (u16)val); +unlock: + mutex_unlock(&data->update_lock); + return ret; } static int pmbus_regulator_list_voltage(struct regulator_dev *rdev, @@ -3279,6 +3301,7 @@ static int pmbus_regulator_list_voltage(struct regulator_dev *rdev, struct i2c_client *client = to_i2c_client(dev->parent); struct pmbus_data *data = i2c_get_clientdata(client); int val, low, high; + int ret; if (data->flags & PMBUS_VOUT_PROTECTED) return 0; @@ -3291,18 +3314,29 @@ static int pmbus_regulator_list_voltage(struct regulator_dev *rdev, val = DIV_ROUND_CLOSEST(rdev->desc->min_uV + (rdev->desc->uV_step * selector), 1000); /* convert to mV */ + mutex_lock(&data->update_lock); + low = pmbus_regulator_get_low_margin(client, rdev_get_id(rdev)); - if (low < 0) - return low; + if (low < 0) { + ret = low; + goto unlock; + } high = pmbus_regulator_get_high_margin(client, rdev_get_id(rdev)); - if (high < 0) - return high; + if (high < 0) { + ret = high; + goto unlock; + } - if (val >= low && val <= high) - return val * 1000; /* unit is uV */ + if (val >= low && val <= high) { + ret = val * 1000; /* unit is uV */ + goto unlock; + } - return 0; + ret = 0; +unlock: + mutex_unlock(&data->update_lock); + return ret; } const struct regulator_ops pmbus_regulator_ops = { @@ -3333,12 +3367,42 @@ int pmbus_regulator_init_cb(struct regulator_dev *rdev, } EXPORT_SYMBOL_NS_GPL(pmbus_regulator_init_cb, "PMBUS"); +static void pmbus_regulator_notify_work_cancel(void *data) +{ + struct pmbus_data *pdata = data; + + cancel_work_sync(&pdata->regulator_notify_work); +} + +static void pmbus_regulator_notify_worker(struct work_struct *work) +{ + struct pmbus_data *data = + container_of(work, struct pmbus_data, regulator_notify_work); + int i, j; + + for (i = 0; i < data->info->pages; i++) { + int event; + + event = atomic_xchg(&data->regulator_events[i], 0); + if (!event) + continue; + + for (j = 0; j < data->info->num_regulators; j++) { + if (i == rdev_get_id(data->rdevs[j])) { + regulator_notifier_call_chain(data->rdevs[j], + event, NULL); + break; + } + } + } +} + static int pmbus_regulator_register(struct pmbus_data *data) { struct device *dev = data->dev; const struct pmbus_driver_info *info = data->info; const struct pmbus_platform_data *pdata = dev_get_platdata(dev); - int i; + int i, ret; data->rdevs = devm_kzalloc(dev, sizeof(struct regulator_dev *) * info->num_regulators, GFP_KERNEL); @@ -3362,19 +3426,19 @@ static int pmbus_regulator_register(struct pmbus_data *data) info->reg_desc[i].name); } + INIT_WORK(&data->regulator_notify_work, pmbus_regulator_notify_worker); + + ret = devm_add_action_or_reset(dev, pmbus_regulator_notify_work_cancel, data); + if (ret) + return ret; + return 0; } static void pmbus_regulator_notify(struct pmbus_data *data, int page, int event) { - int j; - - for (j = 0; j < data->info->num_regulators; j++) { - if (page == rdev_get_id(data->rdevs[j])) { - regulator_notifier_call_chain(data->rdevs[j], event, NULL); - break; - } - } + atomic_or(event, &data->regulator_events[page]); + schedule_work(&data->regulator_notify_work); } #else static int pmbus_regulator_register(struct pmbus_data *data) From beef2634f81f1c086208191f7228bce1d366493d Mon Sep 17 00:00:00 2001 From: Namjae Jeon Date: Thu, 19 Mar 2026 21:00:02 +0900 Subject: [PATCH 186/401] ksmbd: fix potencial OOB in get_file_all_info() for compound requests When a compound request consists of QUERY_DIRECTORY + QUERY_INFO (FILE_ALL_INFORMATION) and the first command consumes nearly the entire max_trans_size, get_file_all_info() would blindly call smbConvertToUTF16() with PATH_MAX, causing out-of-bounds write beyond the response buffer. In get_file_all_info(), there was a missing validation check for the client-provided OutputBufferLength before copying the filename into FileName field of the smb2_file_all_info structure. If the filename length exceeds the available buffer space, it could lead to potential buffer overflows or memory corruption during smbConvertToUTF16 conversion. This calculating the actual free buffer size using smb2_calc_max_out_buf_len() and returning -EINVAL if the buffer is insufficient and updating smbConvertToUTF16 to use the actual filename length (clamped by PATH_MAX) to ensure a safe copy operation. Cc: stable@vger.kernel.org Fixes: e2b76ab8b5c9 ("ksmbd: add support for read compound") Reported-by: Asim Viladi Oglu Manizada Signed-off-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/server/smb2pdu.c | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/fs/smb/server/smb2pdu.c b/fs/smb/server/smb2pdu.c index f5f1bf5f642e..6fb7a795ff5d 100644 --- a/fs/smb/server/smb2pdu.c +++ b/fs/smb/server/smb2pdu.c @@ -4940,7 +4940,8 @@ static int get_file_all_info(struct ksmbd_work *work, int conv_len; char *filename; u64 time; - int ret; + int ret, buf_free_len, filename_len; + struct smb2_query_info_req *req = ksmbd_req_buf_next(work); if (!(fp->daccess & FILE_READ_ATTRIBUTES_LE)) { ksmbd_debug(SMB, "no right to read the attributes : 0x%x\n", @@ -4952,6 +4953,16 @@ static int get_file_all_info(struct ksmbd_work *work, if (IS_ERR(filename)) return PTR_ERR(filename); + filename_len = strlen(filename); + buf_free_len = smb2_calc_max_out_buf_len(work, + offsetof(struct smb2_query_info_rsp, Buffer) + + offsetof(struct smb2_file_all_info, FileName), + le32_to_cpu(req->OutputBufferLength)); + if (buf_free_len < (filename_len + 1) * 2) { + kfree(filename); + return -EINVAL; + } + ret = vfs_getattr(&fp->filp->f_path, &stat, STATX_BASIC_STATS, AT_STATX_SYNC_AS_STAT); if (ret) { @@ -4995,7 +5006,8 @@ static int get_file_all_info(struct ksmbd_work *work, file_info->Mode = fp->coption; file_info->AlignmentRequirement = 0; conv_len = smbConvertToUTF16((__le16 *)file_info->FileName, filename, - PATH_MAX, conn->local_nls, 0); + min(filename_len, PATH_MAX), + conn->local_nls, 0); conv_len *= 2; file_info->FileNameLength = cpu_to_le32(conv_len); rsp->OutputBufferLength = From 4c56a8ac6869855866de0bb368a4189739e1d24f Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 25 Mar 2026 07:23:48 -1000 Subject: [PATCH 187/401] cgroup: Fix cgroup_drain_dying() testing the wrong condition cgroup_drain_dying() was using cgroup_is_populated() to test whether there are dying tasks to wait for. cgroup_is_populated() tests nr_populated_csets, nr_populated_domain_children and nr_populated_threaded_children, but cgroup_drain_dying() only needs to care about this cgroup's own tasks - whether there are children is cgroup_destroy_locked()'s concern. This caused hangs during shutdown. When systemd tried to rmdir a cgroup that had no direct tasks but had a populated child, cgroup_drain_dying() would enter its wait loop because cgroup_is_populated() was true from nr_populated_domain_children. The task iterator found nothing to wait for, yet the populated state never cleared because it was driven by live tasks in the child cgroup. Fix it by using cgroup_has_tasks() which only tests nr_populated_csets. v3: Fix cgroup_is_populated() -> cgroup_has_tasks() (Sebastian). v2: https://lore.kernel.org/r/20260323200205.1063629-1-tj@kernel.org Reported-by: Sebastian Andrzej Siewior Fixes: 1b164b876c36 ("cgroup: Wait for dying tasks to leave on rmdir") Signed-off-by: Tejun Heo Tested-by: Sebastian Andrzej Siewior --- kernel/cgroup/cgroup.c | 42 ++++++++++++++++++++++-------------------- 1 file changed, 22 insertions(+), 20 deletions(-) diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c index 2163054e1aa6..4ca3cb993da2 100644 --- a/kernel/cgroup/cgroup.c +++ b/kernel/cgroup/cgroup.c @@ -6229,20 +6229,22 @@ static int cgroup_destroy_locked(struct cgroup *cgrp) * cgroup_drain_dying - wait for dying tasks to leave before rmdir * @cgrp: the cgroup being removed * - * The PF_EXITING filter in css_task_iter_advance() hides exiting tasks from - * cgroup.procs so that userspace (e.g. systemd) doesn't see tasks that have - * already been reaped via waitpid(). However, the populated counter - * (nr_populated_csets) is only decremented when the task later passes through + * cgroup.procs and cgroup.threads use css_task_iter which filters out + * PF_EXITING tasks so that userspace doesn't see tasks that have already been + * reaped via waitpid(). However, cgroup_has_tasks() - which tests whether the + * cgroup has non-empty css_sets - is only updated when dying tasks pass through * cgroup_task_dead() in finish_task_switch(). This creates a window where - * cgroup.procs appears empty but cgroup_is_populated() is still true, causing - * rmdir to fail with -EBUSY. + * cgroup.procs reads empty but cgroup_has_tasks() is still true, making rmdir + * fail with -EBUSY from cgroup_destroy_locked() even though userspace sees no + * tasks. * - * This function bridges that gap. If the cgroup is populated but all remaining - * tasks have PF_EXITING set, we wait for cgroup_task_dead() to process them. - * Tasks are removed from the cgroup's css_set in cgroup_task_dead() called from - * finish_task_switch(). As the window between PF_EXITING and cgroup_task_dead() - * is short, the number of PF_EXITING tasks on the list is small and the wait - * is brief. + * This function aligns cgroup_has_tasks() with what userspace can observe. If + * cgroup_has_tasks() but the task iterator sees nothing (all remaining tasks are + * PF_EXITING), we wait for cgroup_task_dead() to finish processing them. As the + * window between PF_EXITING and cgroup_task_dead() is short, the wait is brief. + * + * This function only concerns itself with this cgroup's own dying tasks. + * Whether the cgroup has children is cgroup_destroy_locked()'s problem. * * Each cgroup_task_dead() kicks the waitqueue via cset->cgrp_links, and we * retry the full check from scratch. @@ -6258,7 +6260,7 @@ static int cgroup_drain_dying(struct cgroup *cgrp) lockdep_assert_held(&cgroup_mutex); retry: - if (!cgroup_is_populated(cgrp)) + if (!cgroup_has_tasks(cgrp)) return 0; /* Same iterator as cgroup.threads - if any task is visible, it's busy */ @@ -6273,15 +6275,15 @@ static int cgroup_drain_dying(struct cgroup *cgrp) * All remaining tasks are PF_EXITING and will pass through * cgroup_task_dead() shortly. Wait for a kick and retry. * - * cgroup_is_populated() can't transition from false to true while - * we're holding cgroup_mutex, but the true to false transition - * happens under css_set_lock (via cgroup_task_dead()). We must - * retest and prepare_to_wait() under css_set_lock. Otherwise, the - * transition can happen between our first test and - * prepare_to_wait(), and we sleep with no one to wake us. + * cgroup_has_tasks() can't transition from false to true while we're + * holding cgroup_mutex, but the true to false transition happens + * under css_set_lock (via cgroup_task_dead()). We must retest and + * prepare_to_wait() under css_set_lock. Otherwise, the transition + * can happen between our first test and prepare_to_wait(), and we + * sleep with no one to wake us. */ spin_lock_irq(&css_set_lock); - if (!cgroup_is_populated(cgrp)) { + if (!cgroup_has_tasks(cgrp)) { spin_unlock_irq(&css_set_lock); return 0; } From 3a28daa9b7d7c2ddf2c722e9e95d7e0928bf0cd1 Mon Sep 17 00:00:00 2001 From: Li Jun Date: Thu, 26 Mar 2026 14:29:08 +0800 Subject: [PATCH 188/401] LoongArch: Fix missing NULL checks for kstrdup() 1. Replace "of_find_node_by_path("/")" with "of_root" to avoid multiple calls to "of_node_put()". 2. Fix a potential kernel oops during early boot when memory allocation fails while parsing CPU model from device tree. Cc: stable@vger.kernel.org Signed-off-by: Li Jun Signed-off-by: Huacai Chen --- arch/loongarch/kernel/env.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/arch/loongarch/kernel/env.c b/arch/loongarch/kernel/env.c index 841206fde3ab..652456768b55 100644 --- a/arch/loongarch/kernel/env.c +++ b/arch/loongarch/kernel/env.c @@ -42,16 +42,15 @@ static int __init init_cpu_fullname(void) int cpu, ret; char *cpuname; const char *model; - struct device_node *root; /* Parsing cpuname from DTS model property */ - root = of_find_node_by_path("/"); - ret = of_property_read_string(root, "model", &model); + ret = of_property_read_string(of_root, "model", &model); if (ret == 0) { cpuname = kstrdup(model, GFP_KERNEL); + if (!cpuname) + return -ENOMEM; loongson_sysconf.cpuname = strsep(&cpuname, " "); } - of_node_put(root); if (loongson_sysconf.cpuname && !strncmp(loongson_sysconf.cpuname, "Loongson", 8)) { for (cpu = 0; cpu < NR_CPUS; cpu++) From 95db0c9f526d583634cddb2e5914718570fbac87 Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Thu, 26 Mar 2026 14:29:09 +0800 Subject: [PATCH 189/401] LoongArch: Workaround LS2K/LS7A GPU DMA hang bug 1. Hardware limitation: GPU, DC and VPU are typically PCI device 06.0, 06.1 and 06.2. They share some hardware resources, so when configure the PCI 06.0 device BAR1, DMA memory access cannot be performed through this BAR, otherwise it will cause hardware abnormalities. 2. In typical scenarios of reboot or S3/S4, DC access to memory through BAR is not prohibited, resulting in GPU DMA hangs. 3. Workaround method: When configuring the 06.0 device BAR1, turn off the memory access of DC, GPU and VPU (via DC's CRTC registers). Cc: stable@vger.kernel.org Signed-off-by: Qianhai Wu Signed-off-by: Huacai Chen --- arch/loongarch/pci/pci.c | 80 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 80 insertions(+) diff --git a/arch/loongarch/pci/pci.c b/arch/loongarch/pci/pci.c index d923295ab8c6..d233ea2218fe 100644 --- a/arch/loongarch/pci/pci.c +++ b/arch/loongarch/pci/pci.c @@ -5,9 +5,11 @@ #include #include #include +#include #include #include #include +#include #include #include @@ -15,6 +17,9 @@ #define PCI_DEVICE_ID_LOONGSON_DC1 0x7a06 #define PCI_DEVICE_ID_LOONGSON_DC2 0x7a36 #define PCI_DEVICE_ID_LOONGSON_DC3 0x7a46 +#define PCI_DEVICE_ID_LOONGSON_GPU1 0x7a15 +#define PCI_DEVICE_ID_LOONGSON_GPU2 0x7a25 +#define PCI_DEVICE_ID_LOONGSON_GPU3 0x7a35 int raw_pci_read(unsigned int domain, unsigned int bus, unsigned int devfn, int reg, int len, u32 *val) @@ -99,3 +104,78 @@ static void pci_fixup_vgadev(struct pci_dev *pdev) DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_LOONGSON, PCI_DEVICE_ID_LOONGSON_DC1, pci_fixup_vgadev); DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_LOONGSON, PCI_DEVICE_ID_LOONGSON_DC2, pci_fixup_vgadev); DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_LOONGSON, PCI_DEVICE_ID_LOONGSON_DC3, pci_fixup_vgadev); + +#define CRTC_NUM_MAX 2 +#define CRTC_OUTPUT_ENABLE 0x100 + +static void loongson_gpu_fixup_dma_hang(struct pci_dev *pdev, bool on) +{ + u32 i, val, count, crtc_offset, device; + void __iomem *crtc_reg, *base, *regbase; + static u32 crtc_status[CRTC_NUM_MAX] = { 0 }; + + base = pdev->bus->ops->map_bus(pdev->bus, pdev->devfn + 1, 0); + device = readw(base + PCI_DEVICE_ID); + + regbase = ioremap(readq(base + PCI_BASE_ADDRESS_0) & ~0xffull, SZ_64K); + if (!regbase) { + pci_err(pdev, "Failed to ioremap()\n"); + return; + } + + switch (device) { + case PCI_DEVICE_ID_LOONGSON_DC2: + crtc_reg = regbase + 0x1240; + crtc_offset = 0x10; + break; + case PCI_DEVICE_ID_LOONGSON_DC3: + crtc_reg = regbase; + crtc_offset = 0x400; + break; + } + + for (i = 0; i < CRTC_NUM_MAX; i++, crtc_reg += crtc_offset) { + val = readl(crtc_reg); + + if (!on) + crtc_status[i] = val; + + /* No need to fixup if the status is off at startup. */ + if (!(crtc_status[i] & CRTC_OUTPUT_ENABLE)) + continue; + + if (on) + val |= CRTC_OUTPUT_ENABLE; + else + val &= ~CRTC_OUTPUT_ENABLE; + + mb(); + writel(val, crtc_reg); + + for (count = 0; count < 40; count++) { + val = readl(crtc_reg) & CRTC_OUTPUT_ENABLE; + if ((on && val) || (!on && !val)) + break; + udelay(1000); + } + + pci_info(pdev, "DMA hang fixup at reg[0x%lx]: 0x%x\n", + (unsigned long)crtc_reg & 0xffff, readl(crtc_reg)); + } + + iounmap(regbase); +} + +static void pci_fixup_dma_hang_early(struct pci_dev *pdev) +{ + loongson_gpu_fixup_dma_hang(pdev, false); +} +DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_LOONGSON, PCI_DEVICE_ID_LOONGSON_GPU2, pci_fixup_dma_hang_early); +DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_LOONGSON, PCI_DEVICE_ID_LOONGSON_GPU3, pci_fixup_dma_hang_early); + +static void pci_fixup_dma_hang_final(struct pci_dev *pdev) +{ + loongson_gpu_fixup_dma_hang(pdev, true); +} +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_LOONGSON, PCI_DEVICE_ID_LOONGSON_GPU2, pci_fixup_dma_hang_final); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_LOONGSON, PCI_DEVICE_ID_LOONGSON_GPU3, pci_fixup_dma_hang_final); From e4878c37f6679fdea91b27a0f4e60a871f0b7bad Mon Sep 17 00:00:00 2001 From: Xi Ruoyao Date: Thu, 26 Mar 2026 14:29:09 +0800 Subject: [PATCH 190/401] LoongArch: vDSO: Emit GNU_EH_FRAME correctly With -fno-asynchronous-unwind-tables and --no-eh-frame-hdr (the default of the linker), the GNU_EH_FRAME segment (specified by vdso.lds.S) is empty. This is not valid, as the current DWARF specification mandates the first byte of the EH frame to be the version number 1. It causes some unwinders to complain, for example the ClickHouse query profiler spams the log with messages: clickhouse-server[365854]: libunwind: unsupported .eh_frame_hdr version: 127 at 7ffffffb0000 Here "127" is just the byte located at the p_vaddr (0, i.e. the beginning of the vDSO) of the empty GNU_EH_FRAME segment. Cross- checking with /proc/365854/maps has also proven 7ffffffb0000 is the start of vDSO in the process VM image. In LoongArch the -fno-asynchronous-unwind-tables option seems just a MIPS legacy, and MIPS only uses this option to satisfy the MIPS-specific "genvdso" program, per the commit cfd75c2db17e ("MIPS: VDSO: Explicitly use -fno-asynchronous-unwind-tables"). IIRC it indicates some inherent limitation of the MIPS ELF ABI and has nothing to do with LoongArch. So we can simply flip it over to -fasynchronous-unwind-tables and pass --eh-frame-hdr for linking the vDSO, allowing the profilers to unwind the stack for statistics even if the sample point is taken when the PC is in the vDSO. However simply adjusting the options above would exploit an issue: when the libgcc unwinder saw the invalid GNU_EH_FRAME segment, it silently falled back to a machine-specific routine to match the code pattern of rt_sigreturn() and extract the registers saved in the sigframe if the code pattern is matched. As unwinding from signal handlers is vital for libgcc to support pthread cancellation etc., the fall-back routine had been silently keeping the LoongArch Linux systems functioning since Linux 5.19. But when we start to emit GNU_EH_FRAME with the correct format, fall-back routine will no longer be used and libgcc will fail to unwind the sigframe, and unwinding from signal handlers will no longer work, causing dozens of glibc test failures. To make it possible to unwind from signal handlers again, it's necessary to code the unwind info in __vdso_rt_sigreturn via .cfi_* directives. The offsets in the .cfi_* directives depend on the layout of struct sigframe, notably the offset of sigcontext in the sigframe. To use the offset in the assembly file, factor out struct sigframe into a header to allow asm-offsets.c to output the offset for assembly. To work around a long-term issue in the libgcc unwinder (the pc is unconditionally substracted by 1: doing so is technically incorrect for a signal frame), a nop instruction is included with the two real instructions in __vdso_rt_sigreturn in the same FDE PC range. The same hack has been used on x86 for a long time. Cc: stable@vger.kernel.org Fixes: c6b99bed6b8f ("LoongArch: Add VDSO and VSYSCALL support") Signed-off-by: Xi Ruoyao Signed-off-by: Huacai Chen --- arch/loongarch/include/asm/linkage.h | 36 +++++++++++++++++++++++++++ arch/loongarch/include/asm/sigframe.h | 9 +++++++ arch/loongarch/kernel/asm-offsets.c | 2 ++ arch/loongarch/kernel/signal.c | 6 +---- arch/loongarch/vdso/Makefile | 4 +-- arch/loongarch/vdso/sigreturn.S | 6 ++--- 6 files changed, 53 insertions(+), 10 deletions(-) create mode 100644 arch/loongarch/include/asm/sigframe.h diff --git a/arch/loongarch/include/asm/linkage.h b/arch/loongarch/include/asm/linkage.h index e2eca1a25b4e..a1bd6a3ee03a 100644 --- a/arch/loongarch/include/asm/linkage.h +++ b/arch/loongarch/include/asm/linkage.h @@ -41,4 +41,40 @@ .cfi_endproc; \ SYM_END(name, SYM_T_NONE) +/* + * This is for the signal handler trampoline, which is used as the return + * address of the signal handlers in userspace instead of called normally. + * The long standing libgcc bug https://gcc.gnu.org/PR124050 requires a + * nop between .cfi_startproc and the actual address of the trampoline, so + * we cannot simply use SYM_FUNC_START. + * + * This wrapper also contains all the .cfi_* directives for recovering + * the content of the GPRs and the "return address" (where the rt_sigreturn + * syscall will jump to), assuming there is a struct rt_sigframe (where + * a struct sigcontext containing those information we need to recover) at + * $sp. The "DWARF for the LoongArch(TM) Architecture" manual states + * column 0 is for $zero, but it does not make too much sense to + * save/restore the hardware zero register. Repurpose this column here + * for the return address (here it's not the content of $ra we cannot use + * the default column 3). + */ +#define SYM_SIGFUNC_START(name) \ + .cfi_startproc; \ + .cfi_signal_frame; \ + .cfi_def_cfa 3, RT_SIGFRAME_SC; \ + .cfi_return_column 0; \ + .cfi_offset 0, SC_PC; \ + \ + .irp num, 1, 2, 3, 4, 5, 6, 7, 8, \ + 9, 10, 11, 12, 13, 14, 15, 16, \ + 17, 18, 19, 20, 21, 22, 23, 24, \ + 25, 26, 27, 28, 29, 30, 31; \ + .cfi_offset \num, SC_REGS + \num * SZREG; \ + .endr; \ + \ + nop; \ + SYM_START(name, SYM_L_GLOBAL, SYM_A_ALIGN) + +#define SYM_SIGFUNC_END(name) SYM_FUNC_END(name) + #endif diff --git a/arch/loongarch/include/asm/sigframe.h b/arch/loongarch/include/asm/sigframe.h new file mode 100644 index 000000000000..109298b8d7e0 --- /dev/null +++ b/arch/loongarch/include/asm/sigframe.h @@ -0,0 +1,9 @@ +/* SPDX-License-Identifier: GPL-2.0+ */ + +#include +#include + +struct rt_sigframe { + struct siginfo rs_info; + struct ucontext rs_uctx; +}; diff --git a/arch/loongarch/kernel/asm-offsets.c b/arch/loongarch/kernel/asm-offsets.c index 3017c7157600..2cc953f113ac 100644 --- a/arch/loongarch/kernel/asm-offsets.c +++ b/arch/loongarch/kernel/asm-offsets.c @@ -16,6 +16,7 @@ #include #include #include +#include #include static void __used output_ptreg_defines(void) @@ -220,6 +221,7 @@ static void __used output_sc_defines(void) COMMENT("Linux sigcontext offsets."); OFFSET(SC_REGS, sigcontext, sc_regs); OFFSET(SC_PC, sigcontext, sc_pc); + OFFSET(RT_SIGFRAME_SC, rt_sigframe, rs_uctx.uc_mcontext); BLANK(); } diff --git a/arch/loongarch/kernel/signal.c b/arch/loongarch/kernel/signal.c index c9f7ca778364..d4151d2fb82e 100644 --- a/arch/loongarch/kernel/signal.c +++ b/arch/loongarch/kernel/signal.c @@ -35,6 +35,7 @@ #include #include #include +#include #include #include @@ -51,11 +52,6 @@ #define lock_lbt_owner() ({ preempt_disable(); pagefault_disable(); }) #define unlock_lbt_owner() ({ pagefault_enable(); preempt_enable(); }) -struct rt_sigframe { - struct siginfo rs_info; - struct ucontext rs_uctx; -}; - struct _ctx_layout { struct sctx_info *addr; unsigned int size; diff --git a/arch/loongarch/vdso/Makefile b/arch/loongarch/vdso/Makefile index 520f1513f07d..294c16b9517f 100644 --- a/arch/loongarch/vdso/Makefile +++ b/arch/loongarch/vdso/Makefile @@ -26,7 +26,7 @@ cflags-vdso := $(ccflags-vdso) \ $(filter -W%,$(filter-out -Wa$(comma)%,$(KBUILD_CFLAGS))) \ -std=gnu11 -fms-extensions -O2 -g -fno-strict-aliasing -fno-common -fno-builtin \ -fno-stack-protector -fno-jump-tables -DDISABLE_BRANCH_PROFILING \ - $(call cc-option, -fno-asynchronous-unwind-tables) \ + $(call cc-option, -fasynchronous-unwind-tables) \ $(call cc-option, -fno-stack-protector) aflags-vdso := $(ccflags-vdso) \ -D__ASSEMBLY__ -Wa,-gdwarf-2 @@ -41,7 +41,7 @@ endif # VDSO linker flags. ldflags-y := -Bsymbolic --no-undefined -soname=linux-vdso.so.1 \ - $(filter -E%,$(KBUILD_CFLAGS)) -shared --build-id -T + $(filter -E%,$(KBUILD_CFLAGS)) -shared --build-id --eh-frame-hdr -T # # Shared build commands. diff --git a/arch/loongarch/vdso/sigreturn.S b/arch/loongarch/vdso/sigreturn.S index 9cb3c58fad03..59f940d928de 100644 --- a/arch/loongarch/vdso/sigreturn.S +++ b/arch/loongarch/vdso/sigreturn.S @@ -12,13 +12,13 @@ #include #include +#include .section .text - .cfi_sections .debug_frame -SYM_FUNC_START(__vdso_rt_sigreturn) +SYM_SIGFUNC_START(__vdso_rt_sigreturn) li.w a7, __NR_rt_sigreturn syscall 0 -SYM_FUNC_END(__vdso_rt_sigreturn) +SYM_SIGFUNC_END(__vdso_rt_sigreturn) From 2db06c15d8c7a0ccb6108524e16cd9163753f354 Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Thu, 26 Mar 2026 14:29:09 +0800 Subject: [PATCH 191/401] LoongArch: KVM: Make kvm_get_vcpu_by_cpuid() more robust kvm_get_vcpu_by_cpuid() takes a cpuid parameter whose type is int, so cpuid can be negative. Let kvm_get_vcpu_by_cpuid() return NULL for this case so as to make it more robust. This fix an out-of-bounds access to kvm_arch::phyid_map::phys_map[]. Cc: Fixes: 73516e9da512adc ("LoongArch: KVM: Add vcpu mapping from physical cpuid") Reported-by: Aurelien Jarno Link: https://bugs.debian.org/cgi-bin/bugreport.cgi?bug=1131431 Signed-off-by: Huacai Chen --- arch/loongarch/kvm/vcpu.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/loongarch/kvm/vcpu.c b/arch/loongarch/kvm/vcpu.c index 8ffd50a470e6..831f381a8fd1 100644 --- a/arch/loongarch/kvm/vcpu.c +++ b/arch/loongarch/kvm/vcpu.c @@ -588,6 +588,9 @@ struct kvm_vcpu *kvm_get_vcpu_by_cpuid(struct kvm *kvm, int cpuid) { struct kvm_phyid_map *map; + if (cpuid < 0) + return NULL; + if (cpuid >= KVM_MAX_PHYID) return NULL; From b97bd69eb0f67b5f961b304d28e9ba45e202d841 Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Thu, 26 Mar 2026 14:29:09 +0800 Subject: [PATCH 192/401] LoongArch: KVM: Handle the case that EIOINTC's coremap is empty EIOINTC's coremap in eiointc_update_sw_coremap() can be empty, currently we get a cpuid with -1 in this case, but we actually need 0 because it's similar as the case that cpuid >= 4. This fix an out-of-bounds access to kvm_arch::phyid_map::phys_map[]. Cc: Fixes: 3956a52bc05bd81 ("LoongArch: KVM: Add EIOINTC read and write functions") Reported-by: Aurelien Jarno Link: https://bugs.debian.org/cgi-bin/bugreport.cgi?bug=1131431 Signed-off-by: Huacai Chen --- arch/loongarch/kvm/intc/eiointc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/loongarch/kvm/intc/eiointc.c b/arch/loongarch/kvm/intc/eiointc.c index d2acb4d09e73..c7badc813923 100644 --- a/arch/loongarch/kvm/intc/eiointc.c +++ b/arch/loongarch/kvm/intc/eiointc.c @@ -83,7 +83,7 @@ static inline void eiointc_update_sw_coremap(struct loongarch_eiointc *s, if (!(s->status & BIT(EIOINTC_ENABLE_CPU_ENCODE))) { cpuid = ffs(cpuid) - 1; - cpuid = (cpuid >= 4) ? 0 : cpuid; + cpuid = ((cpuid < 0) || (cpuid >= 4)) ? 0 : cpuid; } vcpu = kvm_get_vcpu_by_cpuid(s->kvm, cpuid); From 6bcfb7f46d667b04bd1a1169ccedf5fb699c60df Mon Sep 17 00:00:00 2001 From: Bibo Mao Date: Thu, 26 Mar 2026 14:29:09 +0800 Subject: [PATCH 193/401] LoongArch: KVM: Fix base address calculation in kvm_eiointc_regs_access() In function kvm_eiointc_regs_access(), the register base address is caculated from array base address plus offset, the offset is absolute value from the base address. The data type of array base address is u64, it should be converted into the "void *" type and then plus the offset. Cc: Fixes: d3e43a1f34ac ("LoongArch: KVM: Use 64-bit register definition for EIOINTC"). Reported-by: Aurelien Jarno Link: https://bugs.debian.org/cgi-bin/bugreport.cgi?bug=1131431 Signed-off-by: Bibo Mao Signed-off-by: Huacai Chen --- arch/loongarch/kvm/intc/eiointc.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/arch/loongarch/kvm/intc/eiointc.c b/arch/loongarch/kvm/intc/eiointc.c index c7badc813923..003bd773e11c 100644 --- a/arch/loongarch/kvm/intc/eiointc.c +++ b/arch/loongarch/kvm/intc/eiointc.c @@ -472,34 +472,34 @@ static int kvm_eiointc_regs_access(struct kvm_device *dev, switch (addr) { case EIOINTC_NODETYPE_START ... EIOINTC_NODETYPE_END: offset = (addr - EIOINTC_NODETYPE_START) / 4; - p = s->nodetype + offset * 4; + p = (void *)s->nodetype + offset * 4; break; case EIOINTC_IPMAP_START ... EIOINTC_IPMAP_END: offset = (addr - EIOINTC_IPMAP_START) / 4; - p = &s->ipmap + offset * 4; + p = (void *)&s->ipmap + offset * 4; break; case EIOINTC_ENABLE_START ... EIOINTC_ENABLE_END: offset = (addr - EIOINTC_ENABLE_START) / 4; - p = s->enable + offset * 4; + p = (void *)s->enable + offset * 4; break; case EIOINTC_BOUNCE_START ... EIOINTC_BOUNCE_END: offset = (addr - EIOINTC_BOUNCE_START) / 4; - p = s->bounce + offset * 4; + p = (void *)s->bounce + offset * 4; break; case EIOINTC_ISR_START ... EIOINTC_ISR_END: offset = (addr - EIOINTC_ISR_START) / 4; - p = s->isr + offset * 4; + p = (void *)s->isr + offset * 4; break; case EIOINTC_COREISR_START ... EIOINTC_COREISR_END: if (cpu >= s->num_cpu) return -EINVAL; offset = (addr - EIOINTC_COREISR_START) / 4; - p = s->coreisr[cpu] + offset * 4; + p = (void *)s->coreisr[cpu] + offset * 4; break; case EIOINTC_COREMAP_START ... EIOINTC_COREMAP_END: offset = (addr - EIOINTC_COREMAP_START) / 4; - p = s->coremap + offset * 4; + p = (void *)s->coremap + offset * 4; break; default: kvm_err("%s: unknown eiointc register, addr = %d\n", __func__, addr); From cd7e1fef5a1ca1c4fcd232211962ac2395601636 Mon Sep 17 00:00:00 2001 From: GuoHan Zhao Date: Wed, 25 Mar 2026 20:02:46 +0800 Subject: [PATCH 194/401] xen/privcmd: unregister xenstore notifier on module exit Commit 453b8fb68f36 ("xen/privcmd: restrict usage in unprivileged domU") added a xenstore notifier to defer setting the restriction target until Xenstore is ready. XEN_PRIVCMD can be built as a module, but privcmd_exit() leaves that notifier behind. Balance the notifier lifecycle by unregistering it on module exit. This is harmless even if xenstore was already ready at registration time and the notifier was never queued on the chain. Fixes: 453b8fb68f3641fe ("xen/privcmd: restrict usage in unprivileged domU") Signed-off-by: GuoHan Zhao Reviewed-by: Juergen Gross Signed-off-by: Juergen Gross Message-ID: <20260325120246.252899-1-zhaoguohan@kylinos.cn> --- drivers/xen/privcmd.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/xen/privcmd.c b/drivers/xen/privcmd.c index bbf9ee21306c..15ba592236e8 100644 --- a/drivers/xen/privcmd.c +++ b/drivers/xen/privcmd.c @@ -1765,6 +1765,9 @@ static int __init privcmd_init(void) static void __exit privcmd_exit(void) { + if (!xen_initial_domain()) + unregister_xenstore_notifier(&xenstore_notifier); + privcmd_ioeventfd_exit(); privcmd_irqfd_exit(); misc_deregister(&privcmd_dev); From f63a9df7e3f9f842945d292a19d9938924f066f9 Mon Sep 17 00:00:00 2001 From: Marc Buerg Date: Wed, 25 Mar 2026 23:29:50 +0100 Subject: [PATCH 195/401] sysctl: fix uninitialized variable in proc_do_large_bitmap proc_do_large_bitmap() does not initialize variable c, which is expected to be set to a trailing character by proc_get_long(). However, proc_get_long() only sets c when the input buffer contains a trailing character after the parsed value. If c is not initialized it may happen to contain a '-'. If this is the case proc_do_large_bitmap() expects to be able to parse a second part of the input buffer. If there is no second part an unjustified -EINVAL will be returned. Initialize c to 0 to prevent returning -EINVAL on valid input. Fixes: 9f977fb7ae9d ("sysctl: add proc_do_large_bitmap") Signed-off-by: Marc Buerg Reviewed-by: Joel Granados Signed-off-by: Joel Granados --- kernel/sysctl.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 9d3a666ffde1..c9efb17cc255 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -1118,7 +1118,7 @@ int proc_do_large_bitmap(const struct ctl_table *table, int dir, unsigned long bitmap_len = table->maxlen; unsigned long *bitmap = *(unsigned long **) table->data; unsigned long *tmp_bitmap = NULL; - char tr_a[] = { '-', ',', '\n' }, tr_b[] = { ',', '\n', 0 }, c; + char tr_a[] = { '-', ',', '\n' }, tr_b[] = { ',', '\n', 0 }, c = 0; if (!bitmap || !bitmap_len || !left || (*ppos && SYSCTL_KERN_TO_USER(dir))) { *lenp = 0; From 326fe8104a4020d30080d37ac8b6b43893cdebca Mon Sep 17 00:00:00 2001 From: Jihed Chaibi Date: Wed, 25 Mar 2026 22:07:03 +0100 Subject: [PATCH 196/401] ASoC: adau1372: Fix unchecked clk_prepare_enable() return value MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit adau1372_set_power() calls clk_prepare_enable() but discards the return value. If the clock enable fails, the driver proceeds to access registers on unpowered hardware, potentially causing silent corruption. Make adau1372_set_power() return int and propagate the error from clk_prepare_enable(). Update adau1372_set_bias_level() to return the error directly for the STANDBY and OFF cases. Signed-off-by: Jihed Chaibi Fixes: 6cd4c6459e47 ("ASoC: Add ADAU1372 audio CODEC support") Reviewed-by: Nuno Sá Link: https://patch.msgid.link/20260325210704.76847-2-jihed.chaibi.dev@gmail.com Signed-off-by: Mark Brown --- sound/soc/codecs/adau1372.c | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/sound/soc/codecs/adau1372.c b/sound/soc/codecs/adau1372.c index fdee689cae53..6345342218d6 100644 --- a/sound/soc/codecs/adau1372.c +++ b/sound/soc/codecs/adau1372.c @@ -782,15 +782,18 @@ static void adau1372_enable_pll(struct adau1372 *adau1372) dev_err(adau1372->dev, "Failed to lock PLL\n"); } -static void adau1372_set_power(struct adau1372 *adau1372, bool enable) +static int adau1372_set_power(struct adau1372 *adau1372, bool enable) { if (adau1372->enabled == enable) - return; + return 0; if (enable) { unsigned int clk_ctrl = ADAU1372_CLK_CTRL_MCLK_EN; + int ret; - clk_prepare_enable(adau1372->mclk); + ret = clk_prepare_enable(adau1372->mclk); + if (ret) + return ret; if (adau1372->pd_gpio) gpiod_set_value(adau1372->pd_gpio, 0); @@ -829,6 +832,8 @@ static void adau1372_set_power(struct adau1372 *adau1372, bool enable) } adau1372->enabled = enable; + + return 0; } static int adau1372_set_bias_level(struct snd_soc_component *component, @@ -842,11 +847,9 @@ static int adau1372_set_bias_level(struct snd_soc_component *component, case SND_SOC_BIAS_PREPARE: break; case SND_SOC_BIAS_STANDBY: - adau1372_set_power(adau1372, true); - break; + return adau1372_set_power(adau1372, true); case SND_SOC_BIAS_OFF: - adau1372_set_power(adau1372, false); - break; + return adau1372_set_power(adau1372, false); } return 0; From bfe6a264effcb6fe99ad7ceaf9e8c7439fc9555b Mon Sep 17 00:00:00 2001 From: Jihed Chaibi Date: Wed, 25 Mar 2026 22:07:04 +0100 Subject: [PATCH 197/401] ASoC: adau1372: Fix clock leak on PLL lock failure MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit adau1372_enable_pll() was a void function that logged a dev_err() on PLL lock timeout but did not propagate the error. As a result, adau1372_set_power() would continue with adau1372->enabled set to true despite the PLL being unlocked, and the mclk left enabled with no corresponding disable on the error path. Convert adau1372_enable_pll() to return int, using -ETIMEDOUT on lock timeout and propagating regmap errors directly. In adau1372_set_power(), check the return value and unwind in reverse order: restore regcache to cache-only mode, reassert GPIO power-down, and disable the clock before returning the error. Signed-off-by: Jihed Chaibi Fixes: 6cd4c6459e47 ("ASoC: Add ADAU1372 audio CODEC support") Reviewed-by: Nuno Sá Link: https://patch.msgid.link/20260325210704.76847-3-jihed.chaibi.dev@gmail.com Signed-off-by: Mark Brown --- sound/soc/codecs/adau1372.c | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/sound/soc/codecs/adau1372.c b/sound/soc/codecs/adau1372.c index 6345342218d6..d7363f9d53bb 100644 --- a/sound/soc/codecs/adau1372.c +++ b/sound/soc/codecs/adau1372.c @@ -762,7 +762,7 @@ static int adau1372_startup(struct snd_pcm_substream *substream, struct snd_soc_ return 0; } -static void adau1372_enable_pll(struct adau1372 *adau1372) +static int adau1372_enable_pll(struct adau1372 *adau1372) { unsigned int val, timeout = 0; int ret; @@ -778,8 +778,12 @@ static void adau1372_enable_pll(struct adau1372 *adau1372) timeout++; } while (!(val & 1) && timeout < 3); - if (ret < 0 || !(val & 1)) + if (ret < 0 || !(val & 1)) { dev_err(adau1372->dev, "Failed to lock PLL\n"); + return ret < 0 ? ret : -ETIMEDOUT; + } + + return 0; } static int adau1372_set_power(struct adau1372 *adau1372, bool enable) @@ -807,7 +811,14 @@ static int adau1372_set_power(struct adau1372 *adau1372, bool enable) * accessed. */ if (adau1372->use_pll) { - adau1372_enable_pll(adau1372); + ret = adau1372_enable_pll(adau1372); + if (ret) { + regcache_cache_only(adau1372->regmap, true); + if (adau1372->pd_gpio) + gpiod_set_value(adau1372->pd_gpio, 1); + clk_disable_unprepare(adau1372->mclk); + return ret; + } clk_ctrl |= ADAU1372_CLK_CTRL_CLKSRC; } From 8d2e0cb3224c89275c5471c92850e7f74df80c20 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Wed, 25 Mar 2026 15:53:19 +0100 Subject: [PATCH 198/401] spi: fix use-after-free on managed registration failure The SPI API is asymmetric and the controller is freed as part of deregistration (unless it has been allocated using devm_spi_alloc_host/target()). A recent change converting the managed registration function to use devm_add_action_or_reset() inadvertently introduced a (mostly theoretical) regression where a non-devres managed controller could be freed as part of failed registration. This in turn would lead to use-after-free in controller driver error paths. Fix this by taking another reference before calling devm_add_action_or_reset() and not releasing it on errors for non-devres allocated controllers. An alternative would be a partial revert of the offending commit, but it is better to handle this explicitly until the API has been fixed (e.g. see 5e844cc37a5c ("spi: Introduce device-managed SPI controller allocation")). Fixes: b6376dbed8e1 ("spi: Simplify devm_spi_*_controller()") Reported-by: Felix Gu Link: https://lore.kernel.org/all/20260324145548.139952-1-ustc.gu@gmail.com/ Cc: Andy Shevchenko Signed-off-by: Johan Hovold Acked-by: Andy Shevchenko Link: https://patch.msgid.link/20260325145319.1132072-1-johan@kernel.org Signed-off-by: Mark Brown --- drivers/spi/spi.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/drivers/spi/spi.c b/drivers/spi/spi.c index 4101c2803eb3..9b1125556d29 100644 --- a/drivers/spi/spi.c +++ b/drivers/spi/spi.c @@ -3534,8 +3534,19 @@ int devm_spi_register_controller(struct device *dev, if (ret) return ret; - return devm_add_action_or_reset(dev, devm_spi_unregister_controller, ctlr); + /* + * Prevent controller from being freed by spi_unregister_controller() + * if devm_add_action_or_reset() fails for a non-devres allocated + * controller. + */ + spi_controller_get(ctlr); + ret = devm_add_action_or_reset(dev, devm_spi_unregister_controller, ctlr); + + if (ret == 0 || ctlr->devm_allocated) + spi_controller_put(ctlr); + + return ret; } EXPORT_SYMBOL_GPL(devm_spi_register_controller); From d40a198e2b7821197c5c77b89d0130cc90f400f5 Mon Sep 17 00:00:00 2001 From: Peter Ujfalusi Date: Thu, 26 Mar 2026 09:56:18 +0200 Subject: [PATCH 199/401] ASoC: SOF: ipc4-topology: Allow bytes controls without initial payload It is unexpected, but allowed to have no initial payload for a bytes control and the code is prepared to handle this case, but the size check missed this corner case. Update the check for minimal size to allow the initial size to be 0. Cc: stable@vger.kernel.org Fixes: a653820700b8 ("ASoC: SOF: ipc4-topology: Correct the allocation size for bytes controls") Signed-off-by: Peter Ujfalusi Reviewed-by: Bard Liao Reviewed-by: Liam Girdwood Reviewed-by: Seppo Ingalsuo Reviewed-by: Kai Vehmanen Link: https://patch.msgid.link/20260326075618.1603-1-peter.ujfalusi@linux.intel.com Signed-off-by: Mark Brown --- sound/soc/sof/ipc4-topology.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/sof/ipc4-topology.c b/sound/soc/sof/ipc4-topology.c index db077e9d5644..c12ffdcfe4e3 100644 --- a/sound/soc/sof/ipc4-topology.c +++ b/sound/soc/sof/ipc4-topology.c @@ -2950,7 +2950,7 @@ static int sof_ipc4_control_load_bytes(struct snd_sof_dev *sdev, struct snd_sof_ return -EINVAL; } - if (scontrol->priv_size < sizeof(struct sof_abi_hdr)) { + if (scontrol->priv_size && scontrol->priv_size < sizeof(struct sof_abi_hdr)) { dev_err(sdev->dev, "bytes control %s initial data size %zu is insufficient.\n", scontrol->name, scontrol->priv_size); From 7c39f48568e0aec9bf6988cdbf833fdf8af19901 Mon Sep 17 00:00:00 2001 From: Ricardo Ribalda Date: Fri, 20 Mar 2026 07:49:10 +0000 Subject: [PATCH 200/401] media: uvcvideo: Fix bug in error path of uvc_alloc_urb_buffers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Recent cleanup introduced a bug in the error path of uvc_alloc_urb_buffers(). If there is not enough memory for the allocation the following error will be triggered: [ 739.196672] UBSAN: shift-out-of-bounds in mm/page_alloc.c:1403:22 [ 739.196710] shift exponent 52 is too large for 32-bit type 'int' Resulting in: [ 740.464422] BUG: unable to handle page fault for address: fffffac1c0800000 The reason for the bug is that usb_free_noncoherent is called with an invalid size (0) instead of the actual size of the urb. This patch takes care of that. Reported-by: Marek Marczykowski-Górecki Closes: https://lore.kernel.org/linux-media/abycbXzYupZpGkvR@hyeyoo/T/#t Tested-by: Marek Marczykowski-Górecki Fixes: c824345288d1 ("media: uvcvideo: Pass allocation size directly to uvc_alloc_urb_buffer") Signed-off-by: Ricardo Ribalda Reviewed-by: Laurent Pinchart Link: https://patch.msgid.link/20260320-uvc-urb-free-error-v1-1-b12cc3762a19@chromium.org Signed-off-by: Laurent Pinchart Signed-off-by: Hans Verkuil --- drivers/media/usb/uvc/uvc_video.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/media/usb/uvc/uvc_video.c b/drivers/media/usb/uvc/uvc_video.c index 40c76c051da2..f6c8e3223796 100644 --- a/drivers/media/usb/uvc/uvc_video.c +++ b/drivers/media/usb/uvc/uvc_video.c @@ -1751,7 +1751,8 @@ static void uvc_video_complete(struct urb *urb) /* * Free transfer buffers. */ -static void uvc_free_urb_buffers(struct uvc_streaming *stream) +static void uvc_free_urb_buffers(struct uvc_streaming *stream, + unsigned int size) { struct usb_device *udev = stream->dev->udev; struct uvc_urb *uvc_urb; @@ -1760,7 +1761,7 @@ static void uvc_free_urb_buffers(struct uvc_streaming *stream) if (!uvc_urb->buffer) continue; - usb_free_noncoherent(udev, stream->urb_size, uvc_urb->buffer, + usb_free_noncoherent(udev, size, uvc_urb->buffer, uvc_stream_dir(stream), uvc_urb->sgt); uvc_urb->buffer = NULL; uvc_urb->sgt = NULL; @@ -1820,7 +1821,7 @@ static int uvc_alloc_urb_buffers(struct uvc_streaming *stream, if (!uvc_alloc_urb_buffer(stream, uvc_urb, urb_size, gfp_flags)) { - uvc_free_urb_buffers(stream); + uvc_free_urb_buffers(stream, urb_size); break; } @@ -1868,7 +1869,7 @@ static void uvc_video_stop_transfer(struct uvc_streaming *stream, } if (free_buffers) - uvc_free_urb_buffers(stream); + uvc_free_urb_buffers(stream, stream->urb_size); } /* From 7587fbf5adc23d180a5ea9aa6944292c22328703 Mon Sep 17 00:00:00 2001 From: Sakari Ailus Date: Sat, 21 Mar 2026 23:21:44 +0200 Subject: [PATCH 201/401] media: ccs: Avoid deadlock in ccs_init_state() The sub-device state lock has been already acquired when ccs_init_state() is called. Do not try to acquire it again. Reported-by: David Heidelberg Fixes: a88883d1209c ("media: ccs: Rely on sub-device state locking") Signed-off-by: Sakari Ailus Signed-off-by: Hans Verkuil --- drivers/media/i2c/ccs/ccs-core.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/media/i2c/ccs/ccs-core.c b/drivers/media/i2c/ccs/ccs-core.c index aa4dd7e7cf5a..8e25f970fd12 100644 --- a/drivers/media/i2c/ccs/ccs-core.c +++ b/drivers/media/i2c/ccs/ccs-core.c @@ -3080,8 +3080,6 @@ static int ccs_init_state(struct v4l2_subdev *sd, struct v4l2_rect *crop = v4l2_subdev_state_get_crop(sd_state, pad); - guard(mutex)(&sensor->mutex); - ccs_get_native_size(ssd, crop); fmt->width = crop->width; From b341c1176f2e001b3adf0b47154fc31589f7410e Mon Sep 17 00:00:00 2001 From: Marc Kleine-Budde Date: Thu, 19 Mar 2026 19:38:12 +0100 Subject: [PATCH 202/401] spi: spi-fsl-lpspi: fix teardown order issue (UAF) There is a teardown order issue in the driver. The SPI controller is registered using devm_spi_register_controller(), which delays unregistration of the SPI controller until after the fsl_lpspi_remove() function returns. As the fsl_lpspi_remove() function synchronously tears down the DMA channels, a running SPI transfer triggers the following NULL pointer dereference due to use after free: | fsl_lpspi 42550000.spi: I/O Error in DMA RX | Unable to handle kernel NULL pointer dereference at virtual address 0000000000000000 [...] | Call trace: | fsl_lpspi_dma_transfer+0x260/0x340 [spi_fsl_lpspi] | fsl_lpspi_transfer_one+0x198/0x448 [spi_fsl_lpspi] | spi_transfer_one_message+0x49c/0x7c8 | __spi_pump_transfer_message+0x120/0x420 | __spi_sync+0x2c4/0x520 | spi_sync+0x34/0x60 | spidev_message+0x20c/0x378 [spidev] | spidev_ioctl+0x398/0x750 [spidev] [...] Switch from devm_spi_register_controller() to spi_register_controller() in fsl_lpspi_probe() and add the corresponding spi_unregister_controller() in fsl_lpspi_remove(). Fixes: 5314987de5e5 ("spi: imx: add lpspi bus driver") Signed-off-by: Marc Kleine-Budde Link: https://patch.msgid.link/20260319-spi-fsl-lpspi-fixes-v1-1-b433e435b2d8@pengutronix.de Signed-off-by: Mark Brown --- drivers/spi/spi-fsl-lpspi.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/spi/spi-fsl-lpspi.c b/drivers/spi/spi-fsl-lpspi.c index b361c1bb3e43..45390e9b8cae 100644 --- a/drivers/spi/spi-fsl-lpspi.c +++ b/drivers/spi/spi-fsl-lpspi.c @@ -1009,7 +1009,7 @@ static int fsl_lpspi_probe(struct platform_device *pdev) enable_irq(irq); } - ret = devm_spi_register_controller(&pdev->dev, controller); + ret = spi_register_controller(controller); if (ret < 0) { dev_err_probe(&pdev->dev, ret, "spi_register_controller error\n"); goto free_dma; @@ -1035,6 +1035,7 @@ static void fsl_lpspi_remove(struct platform_device *pdev) struct fsl_lpspi_data *fsl_lpspi = spi_controller_get_devdata(controller); + spi_unregister_controller(controller); fsl_lpspi_dma_exit(controller); pm_runtime_dont_use_autosuspend(fsl_lpspi->dev); From e98137f0a874ab36d0946de4707aa48cb7137d1c Mon Sep 17 00:00:00 2001 From: Alex Williamson Date: Mon, 23 Mar 2026 15:56:58 -0600 Subject: [PATCH 203/401] vfio/pci: Fix double free in dma-buf feature The error path through vfio_pci_core_feature_dma_buf() ignores its own advice to only use dma_buf_put() after dma_buf_export(), instead falling through the entire unwind chain. In the unlikely event that we encounter file descriptor exhaustion, this can result in an unbalanced refcount on the vfio device and double free of allocated objects. Avoid this by moving the "put" directly into the error path and return the errno rather than entering the unwind chain. Reported-by: Renato Marziano Fixes: 5d74781ebc86 ("vfio/pci: Add dma-buf export support for MMIO regions") Cc: stable@vger.kernel.org Acked-by: Leon Romanovsky Signed-off-by: Alex Williamson Link: https://lore.kernel.org/r/20260323215659.2108191-3-alex.williamson@nvidia.com Reviewed-by: Jason Gunthorpe Signed-off-by: Alex Williamson --- drivers/vfio/pci/vfio_pci_dmabuf.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/vfio/pci/vfio_pci_dmabuf.c b/drivers/vfio/pci/vfio_pci_dmabuf.c index 478beafc6ac3..b1d658b8f7b5 100644 --- a/drivers/vfio/pci/vfio_pci_dmabuf.c +++ b/drivers/vfio/pci/vfio_pci_dmabuf.c @@ -301,11 +301,10 @@ int vfio_pci_core_feature_dma_buf(struct vfio_pci_core_device *vdev, u32 flags, */ ret = dma_buf_fd(priv->dmabuf, get_dma_buf.open_flags); if (ret < 0) - goto err_dma_buf; + dma_buf_put(priv->dmabuf); + return ret; -err_dma_buf: - dma_buf_put(priv->dmabuf); err_dev_put: vfio_device_put_registration(&vdev->vdev); err_free_phys: From b59efde9e6c122207c16169d3d0deb623956eae9 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Thu, 26 Mar 2026 07:02:53 -0600 Subject: [PATCH 204/401] io_uring/fdinfo: fix SQE_MIXED SQE displaying When displaying pending SQEs for a MIXED ring, each 128-byte SQE increments sq_head to skip the second slot, but the loop counter is not adjusted. This can cause the loop to read past sq_tail by one entry for each 128-byte SQE encountered, displaying SQEs that haven't been made consumable yet by the application. Match the kernel's own consumption logic in io_init_req() which decrements what's left when consuming the extra slot. Fixes: 1cba30bf9fdd ("io_uring: add support for IORING_SETUP_SQE_MIXED") Signed-off-by: Jens Axboe --- io_uring/fdinfo.c | 1 + 1 file changed, 1 insertion(+) diff --git a/io_uring/fdinfo.c b/io_uring/fdinfo.c index 80178b69e05a..25c92ace18bd 100644 --- a/io_uring/fdinfo.c +++ b/io_uring/fdinfo.c @@ -125,6 +125,7 @@ static void __io_uring_show_fdinfo(struct io_ring_ctx *ctx, struct seq_file *m) sq_idx); break; } + i++; sqe128 = true; } seq_printf(m, "%5u: opcode:%s, fd:%d, flags:%x, off:%llu, " From 70685c291ef82269180758130394ecdc4496b52c Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Mon, 23 Mar 2026 14:01:57 -0700 Subject: [PATCH 205/401] xfs: don't irele after failing to iget in xfs_attri_recover_work xlog_recovery_iget* never set @ip to a valid pointer if they return an error, so this irele will walk off a dangling pointer. Fix that. Cc: stable@vger.kernel.org # v6.10 Fixes: ae673f534a3097 ("xfs: record inode generation in xattr update log intent items") Signed-off-by: Darrick J. Wong Reviewed-by: Long Li Reviewed-by: Christoph Hellwig Reviewed-by: Carlos Maiolino Signed-off-by: Carlos Maiolino --- fs/xfs/xfs_attr_item.c | 1 - 1 file changed, 1 deletion(-) diff --git a/fs/xfs/xfs_attr_item.c b/fs/xfs/xfs_attr_item.c index 82324f42537b..deab14f31b38 100644 --- a/fs/xfs/xfs_attr_item.c +++ b/fs/xfs/xfs_attr_item.c @@ -653,7 +653,6 @@ xfs_attri_recover_work( break; } if (error) { - xfs_irele(ip); XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, attrp, sizeof(*attrp)); return ERR_PTR(-EFSCORRUPTED); From e31c53a8060e134111ed095783fee0aa0c43b080 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Mon, 23 Mar 2026 14:04:33 -0700 Subject: [PATCH 206/401] xfs: remove file_path tracepoint data The xfile/xmbuf shmem file descriptions are no longer as detailed as they were when online fsck was first merged, because moving to static strings in commit 60382993a2e180 ("xfs: get rid of the xchk_xfile_*_descr calls") removed a memory allocation and hence a source of failure. However this makes encoding the description in the tracepoints sort of a waste of memory. David Laight also points out that file_path doesn't zero the whole buffer which causes exposure of stale trace bytes, and Steven Rostedt wonders why we're not using a dynamic array for the file path. I don't think this is worth fixing, so let's just rip it out. Cc: rostedt@goodmis.org Cc: david.laight.linux@gmail.com Link: https://lore.kernel.org/linux-xfs/20260323172204.work.979-kees@kernel.org/ Cc: stable@vger.kernel.org # v6.11 Fixes: 19ebc8f84ea12e ("xfs: fix file_path handling in tracepoints") Signed-off-by: Darrick J. Wong Reviewed-by: Carlos Maiolino Reviewed-by: Christoph Hellwig Signed-off-by: Carlos Maiolino --- fs/xfs/scrub/trace.h | 12 ++---------- fs/xfs/xfs_trace.h | 11 ++--------- 2 files changed, 4 insertions(+), 19 deletions(-) diff --git a/fs/xfs/scrub/trace.h b/fs/xfs/scrub/trace.h index 39ea651cbb75..286c5f5e0544 100644 --- a/fs/xfs/scrub/trace.h +++ b/fs/xfs/scrub/trace.h @@ -972,20 +972,12 @@ TRACE_EVENT(xfile_create, TP_STRUCT__entry( __field(dev_t, dev) __field(unsigned long, ino) - __array(char, pathname, MAXNAMELEN) ), TP_fast_assign( - char *path; - __entry->ino = file_inode(xf->file)->i_ino; - path = file_path(xf->file, __entry->pathname, MAXNAMELEN); - if (IS_ERR(path)) - strncpy(__entry->pathname, "(unknown)", - sizeof(__entry->pathname)); ), - TP_printk("xfino 0x%lx path '%s'", - __entry->ino, - __entry->pathname) + TP_printk("xfino 0x%lx", + __entry->ino) ); TRACE_EVENT(xfile_destroy, diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h index 0e994b3f768f..5e8190fe2be9 100644 --- a/fs/xfs/xfs_trace.h +++ b/fs/xfs/xfs_trace.h @@ -5119,23 +5119,16 @@ TRACE_EVENT(xmbuf_create, TP_STRUCT__entry( __field(dev_t, dev) __field(unsigned long, ino) - __array(char, pathname, MAXNAMELEN) ), TP_fast_assign( - char *path; struct file *file = btp->bt_file; __entry->dev = btp->bt_mount->m_super->s_dev; __entry->ino = file_inode(file)->i_ino; - path = file_path(file, __entry->pathname, MAXNAMELEN); - if (IS_ERR(path)) - strncpy(__entry->pathname, "(unknown)", - sizeof(__entry->pathname)); ), - TP_printk("dev %d:%d xmino 0x%lx path '%s'", + TP_printk("dev %d:%d xmino 0x%lx", MAJOR(__entry->dev), MINOR(__entry->dev), - __entry->ino, - __entry->pathname) + __entry->ino) ); TRACE_EVENT(xmbuf_free, From 0e764b9d46071668969410ec5429be0e2f38c6d3 Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 25 Mar 2026 08:20:17 +0000 Subject: [PATCH 207/401] netfs: Fix the handling of stream->front by removing it The netfs_io_stream::front member is meant to point to the subrequest currently being collected on a stream, but it isn't actually used this way by direct write (which mostly ignores it). However, there's a tracepoint which looks at it. Further, stream->front is actually redundant with stream->subrequests.next. Fix the potential problem in the direct code by just removing the member and using stream->subrequests.next instead, thereby also simplifying the code. Fixes: a0b4c7a49137 ("netfs: Fix unbuffered/DIO writes to dispatch subrequests in strict sequence") Reported-by: Paulo Alcantara Signed-off-by: David Howells Link: https://patch.msgid.link/4158599.1774426817@warthog.procyon.org.uk Reviewed-by: Paulo Alcantara (Red Hat) cc: netfs@lists.linux.dev cc: linux-fsdevel@vger.kernel.org Signed-off-by: Christian Brauner --- fs/netfs/buffered_read.c | 3 +-- fs/netfs/direct_read.c | 3 +-- fs/netfs/direct_write.c | 1 - fs/netfs/read_collect.c | 4 ++-- fs/netfs/read_single.c | 1 - fs/netfs/write_collect.c | 4 ++-- fs/netfs/write_issue.c | 3 +-- include/linux/netfs.h | 1 - include/trace/events/netfs.h | 8 ++++---- 9 files changed, 11 insertions(+), 17 deletions(-) diff --git a/fs/netfs/buffered_read.c b/fs/netfs/buffered_read.c index 88a0d801525f..a8c0d86118c5 100644 --- a/fs/netfs/buffered_read.c +++ b/fs/netfs/buffered_read.c @@ -171,9 +171,8 @@ static void netfs_queue_read(struct netfs_io_request *rreq, spin_lock(&rreq->lock); list_add_tail(&subreq->rreq_link, &stream->subrequests); if (list_is_first(&subreq->rreq_link, &stream->subrequests)) { - stream->front = subreq; if (!stream->active) { - stream->collected_to = stream->front->start; + stream->collected_to = subreq->start; /* Store list pointers before active flag */ smp_store_release(&stream->active, true); } diff --git a/fs/netfs/direct_read.c b/fs/netfs/direct_read.c index a498ee8d6674..f72e6da88cca 100644 --- a/fs/netfs/direct_read.c +++ b/fs/netfs/direct_read.c @@ -71,9 +71,8 @@ static int netfs_dispatch_unbuffered_reads(struct netfs_io_request *rreq) spin_lock(&rreq->lock); list_add_tail(&subreq->rreq_link, &stream->subrequests); if (list_is_first(&subreq->rreq_link, &stream->subrequests)) { - stream->front = subreq; if (!stream->active) { - stream->collected_to = stream->front->start; + stream->collected_to = subreq->start; /* Store list pointers before active flag */ smp_store_release(&stream->active, true); } diff --git a/fs/netfs/direct_write.c b/fs/netfs/direct_write.c index 4d9760e36c11..f9ab69de3e29 100644 --- a/fs/netfs/direct_write.c +++ b/fs/netfs/direct_write.c @@ -111,7 +111,6 @@ static int netfs_unbuffered_write(struct netfs_io_request *wreq) netfs_prepare_write(wreq, stream, wreq->start + wreq->transferred); subreq = stream->construct; stream->construct = NULL; - stream->front = NULL; } /* Check if (re-)preparation failed. */ diff --git a/fs/netfs/read_collect.c b/fs/netfs/read_collect.c index 137f0e28a44c..e5f6665b3341 100644 --- a/fs/netfs/read_collect.c +++ b/fs/netfs/read_collect.c @@ -205,7 +205,8 @@ static void netfs_collect_read_results(struct netfs_io_request *rreq) * in progress. The issuer thread may be adding stuff to the tail * whilst we're doing this. */ - front = READ_ONCE(stream->front); + front = list_first_entry_or_null(&stream->subrequests, + struct netfs_io_subrequest, rreq_link); while (front) { size_t transferred; @@ -301,7 +302,6 @@ static void netfs_collect_read_results(struct netfs_io_request *rreq) list_del_init(&front->rreq_link); front = list_first_entry_or_null(&stream->subrequests, struct netfs_io_subrequest, rreq_link); - stream->front = front; spin_unlock(&rreq->lock); netfs_put_subrequest(remove, notes & ABANDON_SREQ ? diff --git a/fs/netfs/read_single.c b/fs/netfs/read_single.c index 8e6264f62a8f..d0e23bc42445 100644 --- a/fs/netfs/read_single.c +++ b/fs/netfs/read_single.c @@ -107,7 +107,6 @@ static int netfs_single_dispatch_read(struct netfs_io_request *rreq) spin_lock(&rreq->lock); list_add_tail(&subreq->rreq_link, &stream->subrequests); trace_netfs_sreq(subreq, netfs_sreq_trace_added); - stream->front = subreq; /* Store list pointers before active flag */ smp_store_release(&stream->active, true); spin_unlock(&rreq->lock); diff --git a/fs/netfs/write_collect.c b/fs/netfs/write_collect.c index 83eb3dc1adf8..b194447f4b11 100644 --- a/fs/netfs/write_collect.c +++ b/fs/netfs/write_collect.c @@ -228,7 +228,8 @@ static void netfs_collect_write_results(struct netfs_io_request *wreq) if (!smp_load_acquire(&stream->active)) continue; - front = stream->front; + front = list_first_entry_or_null(&stream->subrequests, + struct netfs_io_subrequest, rreq_link); while (front) { trace_netfs_collect_sreq(wreq, front); //_debug("sreq [%x] %llx %zx/%zx", @@ -279,7 +280,6 @@ static void netfs_collect_write_results(struct netfs_io_request *wreq) list_del_init(&front->rreq_link); front = list_first_entry_or_null(&stream->subrequests, struct netfs_io_subrequest, rreq_link); - stream->front = front; spin_unlock(&wreq->lock); netfs_put_subrequest(remove, notes & SAW_FAILURE ? diff --git a/fs/netfs/write_issue.c b/fs/netfs/write_issue.c index 437268f65640..2db688f94125 100644 --- a/fs/netfs/write_issue.c +++ b/fs/netfs/write_issue.c @@ -206,9 +206,8 @@ void netfs_prepare_write(struct netfs_io_request *wreq, spin_lock(&wreq->lock); list_add_tail(&subreq->rreq_link, &stream->subrequests); if (list_is_first(&subreq->rreq_link, &stream->subrequests)) { - stream->front = subreq; if (!stream->active) { - stream->collected_to = stream->front->start; + stream->collected_to = subreq->start; /* Write list pointers before active flag */ smp_store_release(&stream->active, true); } diff --git a/include/linux/netfs.h b/include/linux/netfs.h index 72ee7d210a74..ba17ac5bf356 100644 --- a/include/linux/netfs.h +++ b/include/linux/netfs.h @@ -140,7 +140,6 @@ struct netfs_io_stream { void (*issue_write)(struct netfs_io_subrequest *subreq); /* Collection tracking */ struct list_head subrequests; /* Contributory I/O operations */ - struct netfs_io_subrequest *front; /* Op being collected */ unsigned long long collected_to; /* Position we've collected results to */ size_t transferred; /* The amount transferred from this stream */ unsigned short error; /* Aggregate error for the stream */ diff --git a/include/trace/events/netfs.h b/include/trace/events/netfs.h index 2d366be46a1c..cbe28211106c 100644 --- a/include/trace/events/netfs.h +++ b/include/trace/events/netfs.h @@ -740,19 +740,19 @@ TRACE_EVENT(netfs_collect_stream, __field(unsigned int, wreq) __field(unsigned char, stream) __field(unsigned long long, collected_to) - __field(unsigned long long, front) + __field(unsigned long long, issued_to) ), TP_fast_assign( __entry->wreq = wreq->debug_id; __entry->stream = stream->stream_nr; __entry->collected_to = stream->collected_to; - __entry->front = stream->front ? stream->front->start : UINT_MAX; + __entry->issued_to = atomic64_read(&wreq->issued_to); ), - TP_printk("R=%08x[%x:] cto=%llx frn=%llx", + TP_printk("R=%08x[%x:] cto=%llx ito=%llx", __entry->wreq, __entry->stream, - __entry->collected_to, __entry->front) + __entry->collected_to, __entry->issued_to) ); TRACE_EVENT(netfs_folioq, From cfe02147e86307a17057ee4e3604f5f5919571d2 Mon Sep 17 00:00:00 2001 From: Jassi Brar Date: Sun, 22 Mar 2026 12:15:33 -0500 Subject: [PATCH 208/401] irqchip/qcom-mpm: Add missing mailbox TX done acknowledgment The mbox_client for qcom-mpm sends NULL doorbell messages via mbox_send_message() but never signals TX completion. Set knows_txdone=true and call mbox_client_txdone() after a successful send, matching the pattern used by other Qualcomm mailbox clients (smp2p, smsm, qcom_aoss etc). Fixes: a6199bb514d8a6 "irqchip: Add Qualcomm MPM controller driver" Signed-off-by: Jassi Brar Signed-off-by: Thomas Gleixner Reviewed-by: Douglas Anderson Cc: stable@vger.kernel.org Link: https://patch.msgid.link/20260322171533.608436-1-jassisinghbrar@gmail.com --- drivers/irqchip/irq-qcom-mpm.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/irqchip/irq-qcom-mpm.c b/drivers/irqchip/irq-qcom-mpm.c index 83f31ea657b7..181320528a47 100644 --- a/drivers/irqchip/irq-qcom-mpm.c +++ b/drivers/irqchip/irq-qcom-mpm.c @@ -306,6 +306,8 @@ static int mpm_pd_power_off(struct generic_pm_domain *genpd) if (ret < 0) return ret; + mbox_client_txdone(priv->mbox_chan, 0); + return 0; } @@ -434,6 +436,7 @@ static int qcom_mpm_probe(struct platform_device *pdev, struct device_node *pare } priv->mbox_client.dev = dev; + priv->mbox_client.knows_txdone = true; priv->mbox_chan = mbox_request_channel(&priv->mbox_client, 0); if (IS_ERR(priv->mbox_chan)) { ret = PTR_ERR(priv->mbox_chan); From 0f54755343f56ac108cfd55173bc1b5c5376384d Mon Sep 17 00:00:00 2001 From: Claudio Imbrenda Date: Thu, 26 Mar 2026 14:17:10 +0100 Subject: [PATCH 209/401] KVM: s390: vsie: Fix dat_split_ste() If the guest misbehaves and puts the page tables for its nested guest inside the memory of the nested guest itself, and the guest and nested guest are being mapped with large pages, the shadow mapping will lose synchronization with the actual mapping, since this will cause the large page with the vsie notification bit to be split, but the vsie notification bit will not be propagated to the resulting small pages. Fix this by propagating the vsie_notif bit from large pages to normal pages when splitting a large page. Fixes: 2db149a0a6c5 ("KVM: s390: KVM page table management functions: walks") Reviewed-by: Christoph Schlameuss Reviewed-by: Steffen Eiden Reviewed-by: Janosch Frank Signed-off-by: Claudio Imbrenda --- arch/s390/kvm/dat.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/s390/kvm/dat.c b/arch/s390/kvm/dat.c index 670404d4fa44..48b5f2bcf172 100644 --- a/arch/s390/kvm/dat.c +++ b/arch/s390/kvm/dat.c @@ -292,6 +292,7 @@ static int dat_split_ste(struct kvm_s390_mmu_cache *mc, union pmd *pmdp, gfn_t g pt->ptes[i].val = init.val | i * PAGE_SIZE; /* No need to take locks as the page table is not installed yet. */ pgste_init.prefix_notif = old.s.fc1.prefix_notif; + pgste_init.vsie_notif = old.s.fc1.vsie_notif; pgste_init.pcl = uses_skeys && init.h.i; dat_init_pgstes(pt, pgste_init.val); } else { From 897cf98926429c8671a9009442883c2f62deae96 Mon Sep 17 00:00:00 2001 From: Biju Das Date: Mon, 23 Mar 2026 12:49:14 +0000 Subject: [PATCH 210/401] irqchip/renesas-rzv2h: Fix error path in rzv2h_icu_probe_common() Replace pm_runtime_put() with pm_runtime_put_sync() when irq_domain_create_hierarchy() fails to ensure the device suspends synchronously before devres cleanup disables runtime PM via pm_runtime_disable(). Fixes: 5ec8cabc3b86 ("irqchip/renesas-rzv2h: Use devm_pm_runtime_enable()") Signed-off-by: Biju Das Signed-off-by: Thomas Gleixner Link: https://patch.msgid.link/20260323124917.41602-1-biju.das.jz@bp.renesas.com --- drivers/irqchip/irq-renesas-rzv2h.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/irqchip/irq-renesas-rzv2h.c b/drivers/irqchip/irq-renesas-rzv2h.c index da2bc43a0e12..03e93b061edd 100644 --- a/drivers/irqchip/irq-renesas-rzv2h.c +++ b/drivers/irqchip/irq-renesas-rzv2h.c @@ -621,7 +621,7 @@ static int rzv2h_icu_probe_common(struct platform_device *pdev, struct device_no return 0; pm_put: - pm_runtime_put(&pdev->dev); + pm_runtime_put_sync(&pdev->dev); return ret; } From b827ef02f409bd42c7e7fb82663b84753c0e5d14 Mon Sep 17 00:00:00 2001 From: Claudio Imbrenda Date: Thu, 26 Mar 2026 14:17:11 +0100 Subject: [PATCH 211/401] KVM: s390: Remove non-atomic dat_crstep_xchg() In practice dat_crstep_xchg() is racy and hard to use correctly. Simply remove it and replace its uses with dat_crstep_xchg_atomic(). This solves some actual races that lead to system hangs / crashes. Opportunistically fix an alignment issue in _gmap_crstep_xchg_atomic(). Fixes: 589071eaaa8f ("KVM: s390: KVM page table management functions: clear and replace") Fixes: 94fd9b16cc67 ("KVM: s390: KVM page table management functions: lifecycle management") Reviewed-by: Steffen Eiden Signed-off-by: Claudio Imbrenda --- arch/s390/kvm/dat.c | 51 +++++++------------------ arch/s390/kvm/dat.h | 9 +++-- arch/s390/kvm/gaccess.c | 24 +++++++----- arch/s390/kvm/gmap.c | 82 ++++++++++++++++++++++++----------------- arch/s390/kvm/gmap.h | 29 +++++++++------ 5 files changed, 99 insertions(+), 96 deletions(-) diff --git a/arch/s390/kvm/dat.c b/arch/s390/kvm/dat.c index 48b5f2bcf172..4d44c0f9ad45 100644 --- a/arch/s390/kvm/dat.c +++ b/arch/s390/kvm/dat.c @@ -134,32 +134,6 @@ int dat_set_asce_limit(struct kvm_s390_mmu_cache *mc, union asce *asce, int newt return 0; } -/** - * dat_crstep_xchg() - Exchange a gmap CRSTE with another. - * @crstep: Pointer to the CRST entry - * @new: Replacement entry. - * @gfn: The affected guest address. - * @asce: The ASCE of the address space. - * - * Context: This function is assumed to be called with kvm->mmu_lock held. - */ -void dat_crstep_xchg(union crste *crstep, union crste new, gfn_t gfn, union asce asce) -{ - if (crstep->h.i) { - WRITE_ONCE(*crstep, new); - return; - } else if (cpu_has_edat2()) { - crdte_crste(crstep, *crstep, new, gfn, asce); - return; - } - - if (machine_has_tlb_guest()) - idte_crste(crstep, gfn, IDTE_GUEST_ASCE, asce, IDTE_GLOBAL); - else - idte_crste(crstep, gfn, 0, NULL_ASCE, IDTE_GLOBAL); - WRITE_ONCE(*crstep, new); -} - /** * dat_crstep_xchg_atomic() - Atomically exchange a gmap CRSTE with another. * @crstep: Pointer to the CRST entry. @@ -175,8 +149,8 @@ void dat_crstep_xchg(union crste *crstep, union crste new, gfn_t gfn, union asce * * Return: %true if the exchange was successful. */ -bool dat_crstep_xchg_atomic(union crste *crstep, union crste old, union crste new, gfn_t gfn, - union asce asce) +bool __must_check dat_crstep_xchg_atomic(union crste *crstep, union crste old, union crste new, + gfn_t gfn, union asce asce) { if (old.h.i) return arch_try_cmpxchg((long *)crstep, &old.val, new.val); @@ -894,7 +868,8 @@ static long _dat_slot_crste(union crste *crstep, gfn_t gfn, gfn_t next, struct d /* This table entry needs to be updated. */ if (walk->start <= gfn && walk->end >= next) { - dat_crstep_xchg_atomic(crstep, crste, new_crste, gfn, walk->asce); + if (!dat_crstep_xchg_atomic(crstep, crste, new_crste, gfn, walk->asce)) + return -EINVAL; /* A lower level table was present, needs to be freed. */ if (!crste.h.fc && !crste.h.i) { if (is_pmd(crste)) @@ -1072,17 +1047,19 @@ int dat_link(struct kvm_s390_mmu_cache *mc, union asce asce, int level, static long dat_set_pn_crste(union crste *crstep, gfn_t gfn, gfn_t next, struct dat_walk *walk) { - union crste crste = READ_ONCE(*crstep); + union crste newcrste, oldcrste; int *n = walk->priv; - if (!crste.h.fc || crste.h.i || crste.h.p) - return 0; - + do { + oldcrste = READ_ONCE(*crstep); + if (!oldcrste.h.fc || oldcrste.h.i || oldcrste.h.p) + return 0; + if (oldcrste.s.fc1.prefix_notif) + break; + newcrste = oldcrste; + newcrste.s.fc1.prefix_notif = 1; + } while (!dat_crstep_xchg_atomic(crstep, oldcrste, newcrste, gfn, walk->asce)); *n = 2; - if (crste.s.fc1.prefix_notif) - return 0; - crste.s.fc1.prefix_notif = 1; - dat_crstep_xchg(crstep, crste, gfn, walk->asce); return 0; } diff --git a/arch/s390/kvm/dat.h b/arch/s390/kvm/dat.h index 123e11dcd70d..22dafc775335 100644 --- a/arch/s390/kvm/dat.h +++ b/arch/s390/kvm/dat.h @@ -938,11 +938,14 @@ static inline bool dat_pudp_xchg_atomic(union pud *pudp, union pud old, union pu return dat_crstep_xchg_atomic(_CRSTEP(pudp), _CRSTE(old), _CRSTE(new), gfn, asce); } -static inline void dat_crstep_clear(union crste *crstep, gfn_t gfn, union asce asce) +static inline union crste dat_crstep_clear_atomic(union crste *crstep, gfn_t gfn, union asce asce) { - union crste newcrste = _CRSTE_EMPTY(crstep->h.tt); + union crste oldcrste, empty = _CRSTE_EMPTY(crstep->h.tt); - dat_crstep_xchg(crstep, newcrste, gfn, asce); + do { + oldcrste = READ_ONCE(*crstep); + } while (!dat_crstep_xchg_atomic(crstep, oldcrste, empty, gfn, asce)); + return oldcrste; } static inline int get_level(union crste *crstep, union pte *ptep) diff --git a/arch/s390/kvm/gaccess.c b/arch/s390/kvm/gaccess.c index a9da9390867d..4ee862424ca0 100644 --- a/arch/s390/kvm/gaccess.c +++ b/arch/s390/kvm/gaccess.c @@ -1456,7 +1456,7 @@ static int _do_shadow_pte(struct gmap *sg, gpa_t raddr, union pte *ptep_h, union static int _do_shadow_crste(struct gmap *sg, gpa_t raddr, union crste *host, union crste *table, struct guest_fault *f, bool p) { - union crste newcrste; + union crste newcrste, oldcrste; gfn_t gfn; int rc; @@ -1469,16 +1469,20 @@ static int _do_shadow_crste(struct gmap *sg, gpa_t raddr, union crste *host, uni if (rc) return rc; - newcrste = _crste_fc1(f->pfn, host->h.tt, f->writable, !p); - newcrste.s.fc1.d |= host->s.fc1.d; - newcrste.s.fc1.sd |= host->s.fc1.sd; - newcrste.h.p &= host->h.p; - newcrste.s.fc1.vsie_notif = 1; - newcrste.s.fc1.prefix_notif = host->s.fc1.prefix_notif; - _gmap_crstep_xchg(sg->parent, host, newcrste, f->gfn, false); + do { + oldcrste = READ_ONCE(*host); + newcrste = _crste_fc1(f->pfn, oldcrste.h.tt, f->writable, !p); + newcrste.s.fc1.d |= oldcrste.s.fc1.d; + newcrste.s.fc1.sd |= oldcrste.s.fc1.sd; + newcrste.h.p &= oldcrste.h.p; + newcrste.s.fc1.vsie_notif = 1; + newcrste.s.fc1.prefix_notif = oldcrste.s.fc1.prefix_notif; + } while (!_gmap_crstep_xchg_atomic(sg->parent, host, oldcrste, newcrste, f->gfn, false)); - newcrste = _crste_fc1(f->pfn, host->h.tt, 0, !p); - dat_crstep_xchg(table, newcrste, gpa_to_gfn(raddr), sg->asce); + newcrste = _crste_fc1(f->pfn, oldcrste.h.tt, 0, !p); + gfn = gpa_to_gfn(raddr); + while (!dat_crstep_xchg_atomic(table, READ_ONCE(*table), newcrste, gfn, sg->asce)) + ; return 0; } diff --git a/arch/s390/kvm/gmap.c b/arch/s390/kvm/gmap.c index ef0c6ebfdde2..956be4c01797 100644 --- a/arch/s390/kvm/gmap.c +++ b/arch/s390/kvm/gmap.c @@ -313,13 +313,16 @@ static long gmap_clear_young_crste(union crste *crstep, gfn_t gfn, gfn_t end, st struct clear_young_pte_priv *priv = walk->priv; union crste crste, new; - crste = READ_ONCE(*crstep); + do { + crste = READ_ONCE(*crstep); + + if (!crste.h.fc) + return 0; + if (!crste.s.fc1.y && crste.h.i) + return 0; + if (crste_prefix(crste) && !gmap_mkold_prefix(priv->gmap, gfn, end)) + break; - if (!crste.h.fc) - return 0; - if (!crste.s.fc1.y && crste.h.i) - return 0; - if (!crste_prefix(crste) || gmap_mkold_prefix(priv->gmap, gfn, end)) { new = crste; new.h.i = 1; new.s.fc1.y = 0; @@ -328,8 +331,8 @@ static long gmap_clear_young_crste(union crste *crstep, gfn_t gfn, gfn_t end, st folio_set_dirty(phys_to_folio(crste_origin_large(crste))); new.s.fc1.d = 0; new.h.p = 1; - dat_crstep_xchg(crstep, new, gfn, walk->asce); - } + } while (!dat_crstep_xchg_atomic(crstep, crste, new, gfn, walk->asce)); + priv->young = 1; return 0; } @@ -391,14 +394,18 @@ static long _gmap_unmap_crste(union crste *crstep, gfn_t gfn, gfn_t next, struct { struct gmap_unmap_priv *priv = walk->priv; struct folio *folio = NULL; + union crste old = *crstep; - if (crstep->h.fc) { - if (crstep->s.fc1.pr && test_bit(GMAP_FLAG_EXPORT_ON_UNMAP, &priv->gmap->flags)) - folio = phys_to_folio(crste_origin_large(*crstep)); - gmap_crstep_xchg(priv->gmap, crstep, _CRSTE_EMPTY(crstep->h.tt), gfn); - if (folio) - uv_convert_from_secure_folio(folio); - } + if (!old.h.fc) + return 0; + + if (old.s.fc1.pr && test_bit(GMAP_FLAG_EXPORT_ON_UNMAP, &priv->gmap->flags)) + folio = phys_to_folio(crste_origin_large(old)); + /* No races should happen because kvm->mmu_lock is held in write mode */ + KVM_BUG_ON(!gmap_crstep_xchg_atomic(priv->gmap, crstep, old, _CRSTE_EMPTY(old.h.tt), gfn), + priv->gmap->kvm); + if (folio) + uv_convert_from_secure_folio(folio); return 0; } @@ -474,23 +481,24 @@ static long _crste_test_and_clear_softdirty(union crste *table, gfn_t gfn, gfn_t if (fatal_signal_pending(current)) return 1; - crste = READ_ONCE(*table); - if (!crste.h.fc) - return 0; - if (crste.h.p && !crste.s.fc1.sd) - return 0; + do { + crste = READ_ONCE(*table); + if (!crste.h.fc) + return 0; + if (crste.h.p && !crste.s.fc1.sd) + return 0; - /* - * If this large page contains one or more prefixes of vCPUs that are - * currently running, do not reset the protection, leave it marked as - * dirty. - */ - if (!crste.s.fc1.prefix_notif || gmap_mkold_prefix(gmap, gfn, end)) { + /* + * If this large page contains one or more prefixes of vCPUs that are + * currently running, do not reset the protection, leave it marked as + * dirty. + */ + if (crste.s.fc1.prefix_notif && !gmap_mkold_prefix(gmap, gfn, end)) + break; new = crste; new.h.p = 1; new.s.fc1.sd = 0; - gmap_crstep_xchg(gmap, table, new, gfn); - } + } while (!gmap_crstep_xchg_atomic(gmap, table, crste, new, gfn)); for ( ; gfn < end; gfn++) mark_page_dirty(gmap->kvm, gfn); @@ -646,8 +654,8 @@ int gmap_link(struct kvm_s390_mmu_cache *mc, struct gmap *gmap, struct guest_fau static int gmap_ucas_map_one(struct kvm_s390_mmu_cache *mc, struct gmap *gmap, gfn_t p_gfn, gfn_t c_gfn, bool force_alloc) { + union crste newcrste, oldcrste; struct page_table *pt; - union crste newcrste; union crste *crstep; union pte *ptep; int rc; @@ -673,7 +681,11 @@ static int gmap_ucas_map_one(struct kvm_s390_mmu_cache *mc, struct gmap *gmap, &crstep, &ptep); if (rc) return rc; - dat_crstep_xchg(crstep, newcrste, c_gfn, gmap->asce); + do { + oldcrste = READ_ONCE(*crstep); + if (oldcrste.val == newcrste.val) + break; + } while (!dat_crstep_xchg_atomic(crstep, oldcrste, newcrste, c_gfn, gmap->asce)); return 0; } @@ -777,8 +789,10 @@ static void gmap_ucas_unmap_one(struct gmap *gmap, gfn_t c_gfn) int rc; rc = dat_entry_walk(NULL, c_gfn, gmap->asce, 0, TABLE_TYPE_SEGMENT, &crstep, &ptep); - if (!rc) - dat_crstep_xchg(crstep, _PMD_EMPTY, c_gfn, gmap->asce); + if (rc) + return; + while (!dat_crstep_xchg_atomic(crstep, READ_ONCE(*crstep), _PMD_EMPTY, c_gfn, gmap->asce)) + ; } void gmap_ucas_unmap(struct gmap *gmap, gfn_t c_gfn, unsigned long count) @@ -1017,8 +1031,8 @@ static void gmap_unshadow_level(struct gmap *sg, gfn_t r_gfn, int level) dat_ptep_xchg(ptep, _PTE_EMPTY, r_gfn, sg->asce, uses_skeys(sg)); return; } - crste = READ_ONCE(*crstep); - dat_crstep_clear(crstep, r_gfn, sg->asce); + + crste = dat_crstep_clear_atomic(crstep, r_gfn, sg->asce); if (crste_leaf(crste) || crste.h.i) return; if (is_pmd(crste)) diff --git a/arch/s390/kvm/gmap.h b/arch/s390/kvm/gmap.h index ccb5cd751e31..150e91e15ee0 100644 --- a/arch/s390/kvm/gmap.h +++ b/arch/s390/kvm/gmap.h @@ -194,35 +194,40 @@ static inline union pgste gmap_ptep_xchg(struct gmap *gmap, union pte *ptep, uni return _gmap_ptep_xchg(gmap, ptep, newpte, pgste, gfn, true); } -static inline void _gmap_crstep_xchg(struct gmap *gmap, union crste *crstep, union crste ne, - gfn_t gfn, bool needs_lock) +static inline bool __must_check _gmap_crstep_xchg_atomic(struct gmap *gmap, union crste *crstep, + union crste oldcrste, union crste newcrste, + gfn_t gfn, bool needs_lock) { - unsigned long align = 8 + (is_pmd(*crstep) ? 0 : 11); + unsigned long align = is_pmd(newcrste) ? _PAGE_ENTRIES : _PAGE_ENTRIES * _CRST_ENTRIES; + + if (KVM_BUG_ON(crstep->h.tt != oldcrste.h.tt || newcrste.h.tt != oldcrste.h.tt, gmap->kvm)) + return true; lockdep_assert_held(&gmap->kvm->mmu_lock); if (!needs_lock) lockdep_assert_held(&gmap->children_lock); gfn = ALIGN_DOWN(gfn, align); - if (crste_prefix(*crstep) && (ne.h.p || ne.h.i || !crste_prefix(ne))) { - ne.s.fc1.prefix_notif = 0; + if (crste_prefix(oldcrste) && (newcrste.h.p || newcrste.h.i || !crste_prefix(newcrste))) { + newcrste.s.fc1.prefix_notif = 0; gmap_unmap_prefix(gmap, gfn, gfn + align); } - if (crste_leaf(*crstep) && crstep->s.fc1.vsie_notif && - (ne.h.p || ne.h.i || !ne.s.fc1.vsie_notif)) { - ne.s.fc1.vsie_notif = 0; + if (crste_leaf(oldcrste) && oldcrste.s.fc1.vsie_notif && + (newcrste.h.p || newcrste.h.i || !newcrste.s.fc1.vsie_notif)) { + newcrste.s.fc1.vsie_notif = 0; if (needs_lock) gmap_handle_vsie_unshadow_event(gmap, gfn); else _gmap_handle_vsie_unshadow_event(gmap, gfn); } - dat_crstep_xchg(crstep, ne, gfn, gmap->asce); + return dat_crstep_xchg_atomic(crstep, oldcrste, newcrste, gfn, gmap->asce); } -static inline void gmap_crstep_xchg(struct gmap *gmap, union crste *crstep, union crste ne, - gfn_t gfn) +static inline bool __must_check gmap_crstep_xchg_atomic(struct gmap *gmap, union crste *crstep, + union crste oldcrste, union crste newcrste, + gfn_t gfn) { - return _gmap_crstep_xchg(gmap, crstep, ne, gfn, true); + return _gmap_crstep_xchg_atomic(gmap, crstep, oldcrste, newcrste, gfn, true); } /** From 6f93d1ed6f46b7b0be288cc45250d67bceb28982 Mon Sep 17 00:00:00 2001 From: Claudio Imbrenda Date: Thu, 26 Mar 2026 14:17:12 +0100 Subject: [PATCH 212/401] KVM: s390: vsie: Fix check for pre-existing shadow mapping When shadowing a nested guest, a check is performed and no shadowing is attempted if the nested guest is already shadowed. The existing check was incomplete; fix it by also checking whether the leaf DAT table entry in the existing shadow gmap has the same protection as the one specified in the guest DAT entry. Fixes: e38c884df921 ("KVM: s390: Switch to new gmap") Reviewed-by: Steffen Eiden Reviewed-by: Janosch Frank Signed-off-by: Claudio Imbrenda --- arch/s390/kvm/gaccess.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/arch/s390/kvm/gaccess.c b/arch/s390/kvm/gaccess.c index 4ee862424ca0..8fd690255e1b 100644 --- a/arch/s390/kvm/gaccess.c +++ b/arch/s390/kvm/gaccess.c @@ -1506,8 +1506,9 @@ static int _gaccess_do_shadow(struct kvm_s390_mmu_cache *mc, struct gmap *sg, if (rc) return rc; - /* A race occourred. The shadow mapping is already valid, nothing to do */ - if ((ptep && !ptep->h.i) || (!ptep && crste_leaf(*table))) + /* A race occurred. The shadow mapping is already valid, nothing to do */ + if ((ptep && !ptep->h.i && ptep->h.p == w->p) || + (!ptep && crste_leaf(*table) && !table->h.i && table->h.p == w->p)) return 0; gl = get_level(table, ptep); From 45921d0212d4a335680854c89a14efd01eae911a Mon Sep 17 00:00:00 2001 From: Claudio Imbrenda Date: Thu, 26 Mar 2026 14:17:13 +0100 Subject: [PATCH 213/401] KVM: s390: Fix gmap_link() The slow path of the fault handler ultimately called gmap_link(), which assumed the fault was a major fault, and blindly called dat_link(). In case of minor faults, things were not always handled properly; in particular the prefix and vsie marker bits were ignored. Move dat_link() into gmap.c, renaming it accordingly. Once moved, the new _gmap_link() function will be able to correctly honour the prefix and vsie markers. This will cause spurious unshadows in some uncommon cases. Fixes: 94fd9b16cc67 ("KVM: s390: KVM page table management functions: lifecycle management") Fixes: a2c17f9270cc ("KVM: s390: New gmap code") Reviewed-by: Steffen Eiden Signed-off-by: Claudio Imbrenda --- arch/s390/kvm/dat.c | 48 ------------------------------------- arch/s390/kvm/dat.h | 2 -- arch/s390/kvm/gmap.c | 56 ++++++++++++++++++++++++++++++++++++++++---- 3 files changed, 52 insertions(+), 54 deletions(-) diff --git a/arch/s390/kvm/dat.c b/arch/s390/kvm/dat.c index 4d44c0f9ad45..7b8d70fe406d 100644 --- a/arch/s390/kvm/dat.c +++ b/arch/s390/kvm/dat.c @@ -997,54 +997,6 @@ bool dat_test_age_gfn(union asce asce, gfn_t start, gfn_t end) return _dat_walk_gfn_range(start, end, asce, &test_age_ops, 0, NULL) > 0; } -int dat_link(struct kvm_s390_mmu_cache *mc, union asce asce, int level, - bool uses_skeys, struct guest_fault *f) -{ - union crste oldval, newval; - union pte newpte, oldpte; - union pgste pgste; - int rc = 0; - - rc = dat_entry_walk(mc, f->gfn, asce, DAT_WALK_ALLOC_CONTINUE, level, &f->crstep, &f->ptep); - if (rc == -EINVAL || rc == -ENOMEM) - return rc; - if (rc) - return -EAGAIN; - - if (WARN_ON_ONCE(unlikely(get_level(f->crstep, f->ptep) > level))) - return -EINVAL; - - if (f->ptep) { - pgste = pgste_get_lock(f->ptep); - oldpte = *f->ptep; - newpte = _pte(f->pfn, f->writable, f->write_attempt | oldpte.s.d, !f->page); - newpte.s.sd = oldpte.s.sd; - oldpte.s.sd = 0; - if (oldpte.val == _PTE_EMPTY.val || oldpte.h.pfra == f->pfn) { - pgste = __dat_ptep_xchg(f->ptep, pgste, newpte, f->gfn, asce, uses_skeys); - if (f->callback) - f->callback(f); - } else { - rc = -EAGAIN; - } - pgste_set_unlock(f->ptep, pgste); - } else { - oldval = READ_ONCE(*f->crstep); - newval = _crste_fc1(f->pfn, oldval.h.tt, f->writable, - f->write_attempt | oldval.s.fc1.d); - newval.s.fc1.sd = oldval.s.fc1.sd; - if (oldval.val != _CRSTE_EMPTY(oldval.h.tt).val && - crste_origin_large(oldval) != crste_origin_large(newval)) - return -EAGAIN; - if (!dat_crstep_xchg_atomic(f->crstep, oldval, newval, f->gfn, asce)) - return -EAGAIN; - if (f->callback) - f->callback(f); - } - - return rc; -} - static long dat_set_pn_crste(union crste *crstep, gfn_t gfn, gfn_t next, struct dat_walk *walk) { union crste newcrste, oldcrste; diff --git a/arch/s390/kvm/dat.h b/arch/s390/kvm/dat.h index 22dafc775335..efedcf96110c 100644 --- a/arch/s390/kvm/dat.h +++ b/arch/s390/kvm/dat.h @@ -540,8 +540,6 @@ int dat_set_slot(struct kvm_s390_mmu_cache *mc, union asce asce, gfn_t start, gf u16 type, u16 param); int dat_set_prefix_notif_bit(union asce asce, gfn_t gfn); bool dat_test_age_gfn(union asce asce, gfn_t start, gfn_t end); -int dat_link(struct kvm_s390_mmu_cache *mc, union asce asce, int level, - bool uses_skeys, struct guest_fault *f); int dat_perform_essa(union asce asce, gfn_t gfn, int orc, union essa_state *state, bool *dirty); long dat_reset_cmma(union asce asce, gfn_t start_gfn); diff --git a/arch/s390/kvm/gmap.c b/arch/s390/kvm/gmap.c index 956be4c01797..03e15b5e0b9a 100644 --- a/arch/s390/kvm/gmap.c +++ b/arch/s390/kvm/gmap.c @@ -631,10 +631,60 @@ static inline bool gmap_1m_allowed(struct gmap *gmap, gfn_t gfn) return test_bit(GMAP_FLAG_ALLOW_HPAGE_1M, &gmap->flags); } +static int _gmap_link(struct kvm_s390_mmu_cache *mc, struct gmap *gmap, int level, + struct guest_fault *f) +{ + union crste oldval, newval; + union pte newpte, oldpte; + union pgste pgste; + int rc = 0; + + rc = dat_entry_walk(mc, f->gfn, gmap->asce, DAT_WALK_ALLOC_CONTINUE, level, + &f->crstep, &f->ptep); + if (rc == -ENOMEM) + return rc; + if (KVM_BUG_ON(rc == -EINVAL, gmap->kvm)) + return rc; + if (rc) + return -EAGAIN; + if (KVM_BUG_ON(get_level(f->crstep, f->ptep) > level, gmap->kvm)) + return -EINVAL; + + if (f->ptep) { + pgste = pgste_get_lock(f->ptep); + oldpte = *f->ptep; + newpte = _pte(f->pfn, f->writable, f->write_attempt | oldpte.s.d, !f->page); + newpte.s.sd = oldpte.s.sd; + oldpte.s.sd = 0; + if (oldpte.val == _PTE_EMPTY.val || oldpte.h.pfra == f->pfn) { + pgste = gmap_ptep_xchg(gmap, f->ptep, newpte, pgste, f->gfn); + if (f->callback) + f->callback(f); + } else { + rc = -EAGAIN; + } + pgste_set_unlock(f->ptep, pgste); + } else { + do { + oldval = READ_ONCE(*f->crstep); + newval = _crste_fc1(f->pfn, oldval.h.tt, f->writable, + f->write_attempt | oldval.s.fc1.d); + newval.s.fc1.sd = oldval.s.fc1.sd; + if (oldval.val != _CRSTE_EMPTY(oldval.h.tt).val && + crste_origin_large(oldval) != crste_origin_large(newval)) + return -EAGAIN; + } while (!gmap_crstep_xchg_atomic(gmap, f->crstep, oldval, newval, f->gfn)); + if (f->callback) + f->callback(f); + } + + return rc; +} + int gmap_link(struct kvm_s390_mmu_cache *mc, struct gmap *gmap, struct guest_fault *f) { unsigned int order; - int rc, level; + int level; lockdep_assert_held(&gmap->kvm->mmu_lock); @@ -646,9 +696,7 @@ int gmap_link(struct kvm_s390_mmu_cache *mc, struct gmap *gmap, struct guest_fau else if (order >= get_order(_SEGMENT_SIZE) && gmap_1m_allowed(gmap, f->gfn)) level = TABLE_TYPE_SEGMENT; } - rc = dat_link(mc, gmap->asce, level, uses_skeys(gmap), f); - KVM_BUG_ON(rc == -EINVAL, gmap->kvm); - return rc; + return _gmap_link(mc, gmap, level, f); } static int gmap_ucas_map_one(struct kvm_s390_mmu_cache *mc, struct gmap *gmap, From 0f2b760a17126cb7940d410c99edfa14e928554c Mon Sep 17 00:00:00 2001 From: Claudio Imbrenda Date: Thu, 26 Mar 2026 14:17:14 +0100 Subject: [PATCH 214/401] KVM: s390: Correctly handle guest mappings without struct page Introduce a new special softbit for large pages, like already presend for normal pages, and use it to mark guest mappings that do not have struct pages. Whenever a leaf DAT entry becomes dirty, check the special softbit and only call SetPageDirty() if there is an actual struct page. Move the logic to mark pages dirty inside _gmap_ptep_xchg() and _gmap_crstep_xchg_atomic(), to avoid needlessly duplicating the code. Fixes: 5a74e3d93417 ("KVM: s390: KVM-specific bitfields and helper functions") Fixes: a2c17f9270cc ("KVM: s390: New gmap code") Reviewed-by: Christian Borntraeger Signed-off-by: Claudio Imbrenda --- arch/s390/kvm/dat.h | 12 ++++++------ arch/s390/kvm/gmap.c | 11 ++++------- arch/s390/kvm/gmap.h | 4 ++++ 3 files changed, 14 insertions(+), 13 deletions(-) diff --git a/arch/s390/kvm/dat.h b/arch/s390/kvm/dat.h index efedcf96110c..874cc962e196 100644 --- a/arch/s390/kvm/dat.h +++ b/arch/s390/kvm/dat.h @@ -160,14 +160,14 @@ union pmd { unsigned long :44; /* HW */ unsigned long : 3; /* Unused */ unsigned long : 1; /* HW */ + unsigned long s : 1; /* Special */ unsigned long w : 1; /* Writable soft-bit */ unsigned long r : 1; /* Readable soft-bit */ unsigned long d : 1; /* Dirty */ unsigned long y : 1; /* Young */ - unsigned long prefix_notif : 1; /* Guest prefix invalidation notification */ unsigned long : 3; /* HW */ + unsigned long prefix_notif : 1; /* Guest prefix invalidation notification */ unsigned long vsie_notif : 1; /* Referenced in a shadow table */ - unsigned long : 1; /* Unused */ unsigned long : 4; /* HW */ unsigned long sd : 1; /* Soft-Dirty */ unsigned long pr : 1; /* Present */ @@ -183,14 +183,14 @@ union pud { unsigned long :33; /* HW */ unsigned long :14; /* Unused */ unsigned long : 1; /* HW */ + unsigned long s : 1; /* Special */ unsigned long w : 1; /* Writable soft-bit */ unsigned long r : 1; /* Readable soft-bit */ unsigned long d : 1; /* Dirty */ unsigned long y : 1; /* Young */ - unsigned long prefix_notif : 1; /* Guest prefix invalidation notification */ unsigned long : 3; /* HW */ + unsigned long prefix_notif : 1; /* Guest prefix invalidation notification */ unsigned long vsie_notif : 1; /* Referenced in a shadow table */ - unsigned long : 1; /* Unused */ unsigned long : 4; /* HW */ unsigned long sd : 1; /* Soft-Dirty */ unsigned long pr : 1; /* Present */ @@ -254,14 +254,14 @@ union crste { struct { unsigned long :47; unsigned long : 1; /* HW (should be 0) */ + unsigned long s : 1; /* Special */ unsigned long w : 1; /* Writable */ unsigned long r : 1; /* Readable */ unsigned long d : 1; /* Dirty */ unsigned long y : 1; /* Young */ - unsigned long prefix_notif : 1; /* Guest prefix invalidation notification */ unsigned long : 3; /* HW */ + unsigned long prefix_notif : 1; /* Guest prefix invalidation notification */ unsigned long vsie_notif : 1; /* Referenced in a shadow table */ - unsigned long : 1; unsigned long : 4; /* HW */ unsigned long sd : 1; /* Soft-Dirty */ unsigned long pr : 1; /* Present */ diff --git a/arch/s390/kvm/gmap.c b/arch/s390/kvm/gmap.c index 03e15b5e0b9a..c8b79ad04ac9 100644 --- a/arch/s390/kvm/gmap.c +++ b/arch/s390/kvm/gmap.c @@ -519,7 +519,7 @@ void gmap_sync_dirty_log(struct gmap *gmap, gfn_t start, gfn_t end) _dat_walk_gfn_range(start, end, gmap->asce, &walk_ops, 0, gmap); } -static int gmap_handle_minor_crste_fault(union asce asce, struct guest_fault *f) +static int gmap_handle_minor_crste_fault(struct gmap *gmap, struct guest_fault *f) { union crste newcrste, oldcrste = READ_ONCE(*f->crstep); @@ -544,10 +544,8 @@ static int gmap_handle_minor_crste_fault(union asce asce, struct guest_fault *f) newcrste.s.fc1.d = 1; newcrste.s.fc1.sd = 1; } - if (!oldcrste.s.fc1.d && newcrste.s.fc1.d) - SetPageDirty(phys_to_page(crste_origin_large(newcrste))); /* In case of races, let the slow path deal with it. */ - return !dat_crstep_xchg_atomic(f->crstep, oldcrste, newcrste, f->gfn, asce); + return !gmap_crstep_xchg_atomic(gmap, f->crstep, oldcrste, newcrste, f->gfn); } /* Trying to write on a read-only page, let the slow path deal with it. */ return 1; @@ -576,8 +574,6 @@ static int _gmap_handle_minor_pte_fault(struct gmap *gmap, union pgste *pgste, newpte.s.d = 1; newpte.s.sd = 1; } - if (!oldpte.s.d && newpte.s.d) - SetPageDirty(pfn_to_page(newpte.h.pfra)); *pgste = gmap_ptep_xchg(gmap, f->ptep, newpte, *pgste, f->gfn); return 0; @@ -614,7 +610,7 @@ int gmap_try_fixup_minor(struct gmap *gmap, struct guest_fault *fault) fault->callback(fault); pgste_set_unlock(fault->ptep, pgste); } else { - rc = gmap_handle_minor_crste_fault(gmap->asce, fault); + rc = gmap_handle_minor_crste_fault(gmap, fault); if (!rc && fault->callback) fault->callback(fault); } @@ -669,6 +665,7 @@ static int _gmap_link(struct kvm_s390_mmu_cache *mc, struct gmap *gmap, int leve oldval = READ_ONCE(*f->crstep); newval = _crste_fc1(f->pfn, oldval.h.tt, f->writable, f->write_attempt | oldval.s.fc1.d); + newval.s.fc1.s = !f->page; newval.s.fc1.sd = oldval.s.fc1.sd; if (oldval.val != _CRSTE_EMPTY(oldval.h.tt).val && crste_origin_large(oldval) != crste_origin_large(newval)) diff --git a/arch/s390/kvm/gmap.h b/arch/s390/kvm/gmap.h index 150e91e15ee0..579399ef5480 100644 --- a/arch/s390/kvm/gmap.h +++ b/arch/s390/kvm/gmap.h @@ -185,6 +185,8 @@ static inline union pgste _gmap_ptep_xchg(struct gmap *gmap, union pte *ptep, un else _gmap_handle_vsie_unshadow_event(gmap, gfn); } + if (!ptep->s.d && newpte.s.d && !newpte.s.s) + SetPageDirty(pfn_to_page(newpte.h.pfra)); return __dat_ptep_xchg(ptep, pgste, newpte, gfn, gmap->asce, uses_skeys(gmap)); } @@ -220,6 +222,8 @@ static inline bool __must_check _gmap_crstep_xchg_atomic(struct gmap *gmap, unio else _gmap_handle_vsie_unshadow_event(gmap, gfn); } + if (!oldcrste.s.fc1.d && newcrste.s.fc1.d && !newcrste.s.fc1.s) + SetPageDirty(phys_to_page(crste_origin_large(newcrste))); return dat_crstep_xchg_atomic(crstep, oldcrste, newcrste, gfn, gmap->asce); } From fd7bc612cf27f7c98764e86e2fba3511610fff20 Mon Sep 17 00:00:00 2001 From: Claudio Imbrenda Date: Thu, 26 Mar 2026 14:17:15 +0100 Subject: [PATCH 215/401] KVM: s390: vsie: Fix nested guest memory shadowing Fix _do_shadow_pte() to use the correct pointer (guest pte instead of nested guest) to set up the new pte. Add a check to return -EOPNOTSUPP if the mapping for the nested guest is writeable but the same page in the guest is only read-only. Fixes: e38c884df921 ("KVM: s390: Switch to new gmap") Signed-off-by: Claudio Imbrenda --- arch/s390/kvm/gaccess.c | 21 +++++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) diff --git a/arch/s390/kvm/gaccess.c b/arch/s390/kvm/gaccess.c index 8fd690255e1b..6bc30f678921 100644 --- a/arch/s390/kvm/gaccess.c +++ b/arch/s390/kvm/gaccess.c @@ -1436,13 +1436,19 @@ static int _do_shadow_pte(struct gmap *sg, gpa_t raddr, union pte *ptep_h, union if (!pgste_get_trylock(ptep_h, &pgste)) return -EAGAIN; - newpte = _pte(f->pfn, f->writable, !p, 0); - newpte.s.d |= ptep->s.d; - newpte.s.sd |= ptep->s.sd; - newpte.h.p &= ptep->h.p; - pgste = _gmap_ptep_xchg(sg->parent, ptep_h, newpte, pgste, f->gfn, false); - pgste.vsie_notif = 1; + newpte = _pte(f->pfn, f->writable, !p, ptep_h->s.s); + newpte.s.d |= ptep_h->s.d; + newpte.s.sd |= ptep_h->s.sd; + newpte.h.p &= ptep_h->h.p; + if (!newpte.h.p && !f->writable) { + rc = -EOPNOTSUPP; + } else { + pgste = _gmap_ptep_xchg(sg->parent, ptep_h, newpte, pgste, f->gfn, false); + pgste.vsie_notif = 1; + } pgste_set_unlock(ptep_h, pgste); + if (rc) + return rc; newpte = _pte(f->pfn, 0, !p, 0); if (!pgste_get_trylock(ptep, &pgste)) @@ -1477,6 +1483,9 @@ static int _do_shadow_crste(struct gmap *sg, gpa_t raddr, union crste *host, uni newcrste.h.p &= oldcrste.h.p; newcrste.s.fc1.vsie_notif = 1; newcrste.s.fc1.prefix_notif = oldcrste.s.fc1.prefix_notif; + newcrste.s.fc1.s = oldcrste.s.fc1.s; + if (!newcrste.h.p && !f->writable) + return -EOPNOTSUPP; } while (!_gmap_crstep_xchg_atomic(sg->parent, host, oldcrste, newcrste, f->gfn, false)); newcrste = _crste_fc1(f->pfn, oldcrste.h.tt, 0, !p); From 0ec456b8a53d78644af7363a225c182494c1dacf Mon Sep 17 00:00:00 2001 From: Claudio Imbrenda Date: Thu, 26 Mar 2026 14:17:16 +0100 Subject: [PATCH 216/401] KVM: s390: vsie: Fix refcount overflow for shadow gmaps In most cases gmap_put() was not called when it should have. Add the missing gmap_put() in vsie_run(). Fixes: e38c884df921 ("KVM: s390: Switch to new gmap") Reviewed-by: Steffen Eiden Reviewed-by: Janosch Frank Signed-off-by: Claudio Imbrenda --- arch/s390/kvm/vsie.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/s390/kvm/vsie.c b/arch/s390/kvm/vsie.c index 0330829b4046..72895dddc39a 100644 --- a/arch/s390/kvm/vsie.c +++ b/arch/s390/kvm/vsie.c @@ -1328,7 +1328,7 @@ static void unregister_shadow_scb(struct kvm_vcpu *vcpu) static int vsie_run(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page) { struct kvm_s390_sie_block *scb_s = &vsie_page->scb_s; - struct gmap *sg; + struct gmap *sg = NULL; int rc = 0; while (1) { @@ -1368,6 +1368,8 @@ static int vsie_run(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page) sg = gmap_put(sg); cond_resched(); } + if (sg) + sg = gmap_put(sg); if (rc == -EFAULT) { /* From 19d6c5b8044366c88c1b1f6e831c0661ff1ddd20 Mon Sep 17 00:00:00 2001 From: Claudio Imbrenda Date: Thu, 26 Mar 2026 14:17:17 +0100 Subject: [PATCH 217/401] KVM: s390: vsie: Fix unshadowing while shadowing If shadowing causes the shadow gmap to get unshadowed, exit early to prevent an attempt to dereference the parent pointer, which at this point is NULL. Opportunistically add some more checks to prevent NULL parents. Fixes: a2c17f9270cc ("KVM: s390: New gmap code") Fixes: e5f98a6899bd ("KVM: s390: Add some helper functions needed for vSIE") Fixes: e38c884df921 ("KVM: s390: Switch to new gmap") Signed-off-by: Claudio Imbrenda --- arch/s390/kvm/gaccess.c | 9 +++++++++ arch/s390/kvm/gmap.c | 11 ++++++++++- 2 files changed, 19 insertions(+), 1 deletion(-) diff --git a/arch/s390/kvm/gaccess.c b/arch/s390/kvm/gaccess.c index 6bc30f678921..8d99667e7d34 100644 --- a/arch/s390/kvm/gaccess.c +++ b/arch/s390/kvm/gaccess.c @@ -1449,6 +1449,8 @@ static int _do_shadow_pte(struct gmap *sg, gpa_t raddr, union pte *ptep_h, union pgste_set_unlock(ptep_h, pgste); if (rc) return rc; + if (!sg->parent) + return -EAGAIN; newpte = _pte(f->pfn, 0, !p, 0); if (!pgste_get_trylock(ptep, &pgste)) @@ -1476,6 +1478,9 @@ static int _do_shadow_crste(struct gmap *sg, gpa_t raddr, union crste *host, uni return rc; do { + /* _gmap_crstep_xchg_atomic() could have unshadowed this shadow gmap */ + if (!sg->parent) + return -EAGAIN; oldcrste = READ_ONCE(*host); newcrste = _crste_fc1(f->pfn, oldcrste.h.tt, f->writable, !p); newcrste.s.fc1.d |= oldcrste.s.fc1.d; @@ -1487,6 +1492,8 @@ static int _do_shadow_crste(struct gmap *sg, gpa_t raddr, union crste *host, uni if (!newcrste.h.p && !f->writable) return -EOPNOTSUPP; } while (!_gmap_crstep_xchg_atomic(sg->parent, host, oldcrste, newcrste, f->gfn, false)); + if (!sg->parent) + return -EAGAIN; newcrste = _crste_fc1(f->pfn, oldcrste.h.tt, 0, !p); gfn = gpa_to_gfn(raddr); @@ -1531,6 +1538,8 @@ static int _gaccess_do_shadow(struct kvm_s390_mmu_cache *mc, struct gmap *sg, entries[i - 1].pfn, i, entries[i - 1].writable); if (rc) return rc; + if (!sg->parent) + return -EAGAIN; } rc = dat_entry_walk(NULL, entries[LEVEL_MEM].gfn, sg->parent->asce, DAT_WALK_LEAF, diff --git a/arch/s390/kvm/gmap.c b/arch/s390/kvm/gmap.c index c8b79ad04ac9..645c32c767d2 100644 --- a/arch/s390/kvm/gmap.c +++ b/arch/s390/kvm/gmap.c @@ -1160,6 +1160,7 @@ struct gmap_protect_asce_top_level { static inline int __gmap_protect_asce_top_level(struct kvm_s390_mmu_cache *mc, struct gmap *sg, struct gmap_protect_asce_top_level *context) { + struct gmap *parent; int rc, i; guard(write_lock)(&sg->kvm->mmu_lock); @@ -1167,7 +1168,12 @@ static inline int __gmap_protect_asce_top_level(struct kvm_s390_mmu_cache *mc, s if (kvm_s390_array_needs_retry_safe(sg->kvm, context->seq, context->f)) return -EAGAIN; - scoped_guard(spinlock, &sg->parent->children_lock) { + parent = READ_ONCE(sg->parent); + if (!parent) + return -EAGAIN; + scoped_guard(spinlock, &parent->children_lock) { + if (READ_ONCE(sg->parent) != parent) + return -EAGAIN; for (i = 0; i < CRST_TABLE_PAGES; i++) { if (!context->f[i].valid) continue; @@ -1250,6 +1256,9 @@ struct gmap *gmap_create_shadow(struct kvm_s390_mmu_cache *mc, struct gmap *pare struct gmap *sg, *new; int rc; + if (WARN_ON(!parent)) + return ERR_PTR(-EINVAL); + scoped_guard(spinlock, &parent->children_lock) { sg = gmap_find_shadow(parent, asce, edat_level); if (sg) { From a12cc7e3d6a62f26262c1940a526f0682fefa3ba Mon Sep 17 00:00:00 2001 From: Claudio Imbrenda Date: Thu, 26 Mar 2026 14:17:18 +0100 Subject: [PATCH 218/401] KVM: s390: vsie: Fix guest page tables protection When shadowing, the guest page tables are write-protected, in order to trap changes and properly unshadow the shadow mapping for the nested guest. Already shadowed levels are skipped, so that only the needed levels are write protected. Currently the levels that get write protected are exactly one level too deep: the last level (nested guest memory) gets protected in the wrong way, and will be protected again correctly a few lines afterwards; most importantly, the highest non-shadowed level does *not* get write protected. Moreover, if the nested guest is running in a real address space, there are no DAT tables to shadow. Write protect the correct levels, so that all the levels that need to be protected are protected, and avoid double protecting the last level; skip attempting to shadow the DAT tables when the nested guest is running in a real address space. Fixes: e38c884df921 ("KVM: s390: Switch to new gmap") Tested-by: Christian Borntraeger Reviewed-by: Janosch Frank Signed-off-by: Claudio Imbrenda --- arch/s390/kvm/gaccess.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/arch/s390/kvm/gaccess.c b/arch/s390/kvm/gaccess.c index 8d99667e7d34..53a8550e7102 100644 --- a/arch/s390/kvm/gaccess.c +++ b/arch/s390/kvm/gaccess.c @@ -1529,13 +1529,20 @@ static int _gaccess_do_shadow(struct kvm_s390_mmu_cache *mc, struct gmap *sg, gl = get_level(table, ptep); + /* In case of a real address space */ + if (w->level <= LEVEL_MEM) { + l = TABLE_TYPE_PAGE_TABLE; + hl = TABLE_TYPE_REGION1; + goto real_address_space; + } + /* * Skip levels that are already protected. For each level, protect * only the page containing the entry, not the whole table. */ for (i = gl ; i >= w->level; i--) { - rc = gmap_protect_rmap(mc, sg, entries[i - 1].gfn, gpa_to_gfn(saddr), - entries[i - 1].pfn, i, entries[i - 1].writable); + rc = gmap_protect_rmap(mc, sg, entries[i].gfn, gpa_to_gfn(saddr), + entries[i].pfn, i + 1, entries[i].writable); if (rc) return rc; if (!sg->parent) @@ -1551,6 +1558,7 @@ static int _gaccess_do_shadow(struct kvm_s390_mmu_cache *mc, struct gmap *sg, /* Get the smallest granularity */ l = min3(gl, hl, w->level); +real_address_space: flags = DAT_WALK_SPLIT_ALLOC | (uses_skeys(sg->parent) ? DAT_WALK_USES_SKEYS : 0); /* If necessary, create the shadow mapping */ if (l < gl) { From 0a28e06575b3f3b30c1e99fc08fa0907956f35a4 Mon Sep 17 00:00:00 2001 From: Claudio Imbrenda Date: Thu, 26 Mar 2026 14:17:19 +0100 Subject: [PATCH 219/401] KVM: s390: Fix KVM_S390_VCPU_FAULT ioctl A previous commit changed the behaviour of the KVM_S390_VCPU_FAULT ioctl. The current (wrong) implementation will trigger a guest addressing exception if the requested address lies outside of a memslot, unless the VM is UCONTROL. Restore the previous behaviour by open coding the fault-in logic. Fixes: 3762e905ec2e ("KVM: s390: use __kvm_faultin_pfn()") Acked-by: Christian Borntraeger Reviewed-by: Steffen Eiden Signed-off-by: Claudio Imbrenda --- arch/s390/kvm/kvm-s390.c | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index b2c01fa7b852..d7838334a338 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -5520,9 +5520,21 @@ long kvm_arch_vcpu_ioctl(struct file *filp, } #endif case KVM_S390_VCPU_FAULT: { - idx = srcu_read_lock(&vcpu->kvm->srcu); - r = vcpu_dat_fault_handler(vcpu, arg, 0); - srcu_read_unlock(&vcpu->kvm->srcu, idx); + gpa_t gaddr = arg; + + scoped_guard(srcu, &vcpu->kvm->srcu) { + r = vcpu_ucontrol_translate(vcpu, &gaddr); + if (r) + break; + + r = kvm_s390_faultin_gfn_simple(vcpu, NULL, gpa_to_gfn(gaddr), false); + if (r == PGM_ADDRESSING) + r = -EFAULT; + if (r <= 0) + break; + r = -EIO; + KVM_BUG_ON(r, vcpu->kvm); + } break; } case KVM_ENABLE_CAP: From 19f94b39058681dec64a10ebeb6f23fe7fc3f77a Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 26 Mar 2026 13:35:53 +0100 Subject: [PATCH 220/401] futex: Require sys_futex_requeue() to have identical flags Nicholas reported that his LLM found it was possible to create a UaF when sys_futex_requeue() is used with different flags. The initial motivation for allowing different flags was the variable sized futex, but since that hasn't been merged (yet), simply mandate the flags are identical, as is the case for the old style sys_futex() requeue operations. Fixes: 0f4b5f972216 ("futex: Add sys_futex_requeue()") Reported-by: Nicholas Carlini Signed-off-by: Peter Zijlstra (Intel) --- kernel/futex/syscalls.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/kernel/futex/syscalls.c b/kernel/futex/syscalls.c index 743c7a728237..77ad9691f6a6 100644 --- a/kernel/futex/syscalls.c +++ b/kernel/futex/syscalls.c @@ -459,6 +459,14 @@ SYSCALL_DEFINE4(futex_requeue, if (ret) return ret; + /* + * For now mandate both flags are identical, like the sys_futex() + * interface has. If/when we merge the variable sized futex support, + * that patch can modify this test to allow a difference in size. + */ + if (futexes[0].w.flags != futexes[1].w.flags) + return -EINVAL; + cmpval = futexes[0].w.val; return futex_requeue(u64_to_user_ptr(futexes[0].w.uaddr), futexes[0].w.flags, From 190a8c48ff623c3d67cb295b4536a660db2012aa Mon Sep 17 00:00:00 2001 From: Hao-Yu Yang Date: Fri, 13 Mar 2026 20:47:56 +0800 Subject: [PATCH 221/401] futex: Fix UaF between futex_key_to_node_opt() and vma_replace_policy() During futex_key_to_node_opt() execution, vma->vm_policy is read under speculative mmap lock and RCU. Concurrently, mbind() may call vma_replace_policy() which frees the old mempolicy immediately via kmem_cache_free(). This creates a race where __futex_key_to_node() dereferences a freed mempolicy pointer, causing a use-after-free read of mpol->mode. [ 151.412631] BUG: KASAN: slab-use-after-free in __futex_key_to_node (kernel/futex/core.c:349) [ 151.414046] Read of size 2 at addr ffff888001c49634 by task e/87 [ 151.415969] Call Trace: [ 151.416732] __asan_load2 (mm/kasan/generic.c:271) [ 151.416777] __futex_key_to_node (kernel/futex/core.c:349) [ 151.416822] get_futex_key (kernel/futex/core.c:374 kernel/futex/core.c:386 kernel/futex/core.c:593) Fix by adding rcu to __mpol_put(). Fixes: c042c505210d ("futex: Implement FUTEX2_MPOL") Reported-by: Hao-Yu Yang Suggested-by: Eric Dumazet Signed-off-by: Hao-Yu Yang Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Eric Dumazet Acked-by: David Hildenbrand (Arm) Link: https://patch.msgid.link/20260324174418.GB1850007@noisy.programming.kicks-ass.net --- include/linux/mempolicy.h | 1 + kernel/futex/core.c | 2 +- mm/mempolicy.c | 10 ++++++++-- 3 files changed, 10 insertions(+), 3 deletions(-) diff --git a/include/linux/mempolicy.h b/include/linux/mempolicy.h index 0fe96f3ab3ef..65c732d440d2 100644 --- a/include/linux/mempolicy.h +++ b/include/linux/mempolicy.h @@ -55,6 +55,7 @@ struct mempolicy { nodemask_t cpuset_mems_allowed; /* relative to these nodes */ nodemask_t user_nodemask; /* nodemask passed by user */ } w; + struct rcu_head rcu; }; /* diff --git a/kernel/futex/core.c b/kernel/futex/core.c index cf7e610eac42..31e83a09789e 100644 --- a/kernel/futex/core.c +++ b/kernel/futex/core.c @@ -342,7 +342,7 @@ static int __futex_key_to_node(struct mm_struct *mm, unsigned long addr) if (!vma) return FUTEX_NO_NODE; - mpol = vma_policy(vma); + mpol = READ_ONCE(vma->vm_policy); if (!mpol) return FUTEX_NO_NODE; diff --git a/mm/mempolicy.c b/mm/mempolicy.c index 0e5175f1c767..cf92bd6a8226 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -487,7 +487,13 @@ void __mpol_put(struct mempolicy *pol) { if (!atomic_dec_and_test(&pol->refcnt)) return; - kmem_cache_free(policy_cache, pol); + /* + * Required to allow mmap_lock_speculative*() access, see for example + * futex_key_to_node_opt(). All accesses are serialized by mmap_lock, + * however the speculative lock section unbound by the normal lock + * boundaries, requiring RCU freeing. + */ + kfree_rcu(pol, rcu); } EXPORT_SYMBOL_FOR_MODULES(__mpol_put, "kvm"); @@ -1020,7 +1026,7 @@ static int vma_replace_policy(struct vm_area_struct *vma, } old = vma->vm_policy; - vma->vm_policy = new; /* protected by mmap_lock */ + WRITE_ONCE(vma->vm_policy, new); /* protected by mmap_lock */ mpol_put(old); return 0; From 629ec78ef8608d955ce217880cdc3e1873af3a15 Mon Sep 17 00:00:00 2001 From: Sabrina Dubroca Date: Tue, 24 Mar 2026 00:25:57 +0100 Subject: [PATCH 222/401] mpls: add seqcount to protect the platform_label{,s} pair The RCU-protected codepaths (mpls_forward, mpls_dump_routes) can have an inconsistent view of platform_labels vs platform_label in case of a concurrent resize (resize_platform_label_table, under platform_mutex). This can lead to OOB accesses. This patch adds a seqcount, so that we get a consistent snapshot. Note that mpls_label_ok is also susceptible to this, so the check against RTA_DST in rtm_to_route_config, done outside platform_mutex, is not sufficient. This value gets passed to mpls_label_ok once more in both mpls_route_add and mpls_route_del, so there is no issue, but that additional check must not be removed. Reported-by: Yuan Tan Reported-by: Yifan Wu Reported-by: Juefei Pu Reported-by: Xin Liu Fixes: 7720c01f3f590 ("mpls: Add a sysctl to control the size of the mpls label table") Fixes: dde1b38e873c ("mpls: Convert mpls_dump_routes() to RCU.") Signed-off-by: Sabrina Dubroca Link: https://patch.msgid.link/cd8fca15e3eb7e212b094064cd83652e20fd9d31.1774284088.git.sd@queasysnail.net Signed-off-by: Jakub Kicinski --- include/net/netns/mpls.h | 1 + net/mpls/af_mpls.c | 29 +++++++++++++++++++++++++---- 2 files changed, 26 insertions(+), 4 deletions(-) diff --git a/include/net/netns/mpls.h b/include/net/netns/mpls.h index 6682e51513ef..2073cbac2afb 100644 --- a/include/net/netns/mpls.h +++ b/include/net/netns/mpls.h @@ -17,6 +17,7 @@ struct netns_mpls { size_t platform_labels; struct mpls_route __rcu * __rcu *platform_label; struct mutex platform_mutex; + seqcount_mutex_t platform_label_seq; struct ctl_table_header *ctl; }; diff --git a/net/mpls/af_mpls.c b/net/mpls/af_mpls.c index d5417688f69e..18d3da8ab384 100644 --- a/net/mpls/af_mpls.c +++ b/net/mpls/af_mpls.c @@ -83,14 +83,30 @@ static struct mpls_route *mpls_route_input(struct net *net, unsigned int index) return mpls_dereference(net, platform_label[index]); } +static struct mpls_route __rcu **mpls_platform_label_rcu(struct net *net, size_t *platform_labels) +{ + struct mpls_route __rcu **platform_label; + unsigned int sequence; + + do { + sequence = read_seqcount_begin(&net->mpls.platform_label_seq); + platform_label = rcu_dereference(net->mpls.platform_label); + *platform_labels = net->mpls.platform_labels; + } while (read_seqcount_retry(&net->mpls.platform_label_seq, sequence)); + + return platform_label; +} + static struct mpls_route *mpls_route_input_rcu(struct net *net, unsigned int index) { struct mpls_route __rcu **platform_label; + size_t platform_labels; - if (index >= net->mpls.platform_labels) + platform_label = mpls_platform_label_rcu(net, &platform_labels); + + if (index >= platform_labels) return NULL; - platform_label = rcu_dereference(net->mpls.platform_label); return rcu_dereference(platform_label[index]); } @@ -2240,8 +2256,7 @@ static int mpls_dump_routes(struct sk_buff *skb, struct netlink_callback *cb) if (index < MPLS_LABEL_FIRST_UNRESERVED) index = MPLS_LABEL_FIRST_UNRESERVED; - platform_label = rcu_dereference(net->mpls.platform_label); - platform_labels = net->mpls.platform_labels; + platform_label = mpls_platform_label_rcu(net, &platform_labels); if (filter.filter_set) flags |= NLM_F_DUMP_FILTERED; @@ -2645,8 +2660,12 @@ static int resize_platform_label_table(struct net *net, size_t limit) } /* Update the global pointers */ + local_bh_disable(); + write_seqcount_begin(&net->mpls.platform_label_seq); net->mpls.platform_labels = limit; rcu_assign_pointer(net->mpls.platform_label, labels); + write_seqcount_end(&net->mpls.platform_label_seq); + local_bh_enable(); mutex_unlock(&net->mpls.platform_mutex); @@ -2728,6 +2747,8 @@ static __net_init int mpls_net_init(struct net *net) int i; mutex_init(&net->mpls.platform_mutex); + seqcount_mutex_init(&net->mpls.platform_label_seq, &net->mpls.platform_mutex); + net->mpls.platform_labels = 0; net->mpls.platform_label = NULL; net->mpls.ip_ttl_propagate = 1; From f73896b4197ed53cf0894657c899265ef7c86b7a Mon Sep 17 00:00:00 2001 From: Dipayaan Roy Date: Tue, 24 Mar 2026 11:14:28 -0700 Subject: [PATCH 223/401] net: mana: Fix RX skb truesize accounting MANA passes rxq->alloc_size to napi_build_skb() for all RX buffers. It is correct for fragment-backed RX buffers, where alloc_size matches the actual backing allocation used for each packet buffer. However, in the non-fragment RX path mana allocates a full page, or a higher-order page, per RX buffer. In that case alloc_size only reflects the usable packet area and not the actual backing memory. This causes napi_build_skb() to underestimate the skb backing allocation in the single-buffer RX path, so skb->truesize is derived from a value smaller than the real RX buffer allocation. Fix this by updating alloc_size in the non-fragment RX path to the actual backing allocation size before it is passed to napi_build_skb(). Fixes: 730ff06d3f5c ("net: mana: Use page pool fragments for RX buffers instead of full pages to improve memory efficiency.") Signed-off-by: Dipayaan Roy Reviewed-by: Haiyang Zhang Link: https://patch.msgid.link/acLUhLpLum6qrD/N@linuxonhyperv3.guj3yctzbm1etfxqx2vob5hsef.xx.internal.cloudapp.net Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/microsoft/mana/mana_en.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/net/ethernet/microsoft/mana/mana_en.c b/drivers/net/ethernet/microsoft/mana/mana_en.c index dca62fb9a3a9..09a53c977545 100644 --- a/drivers/net/ethernet/microsoft/mana/mana_en.c +++ b/drivers/net/ethernet/microsoft/mana/mana_en.c @@ -766,6 +766,13 @@ static void mana_get_rxbuf_cfg(struct mana_port_context *apc, } *frag_count = 1; + + /* In the single-buffer path, napi_build_skb() must see the + * actual backing allocation size so skb->truesize reflects + * the full page (or higher-order page), not just the usable + * packet area. + */ + *alloc_size = PAGE_SIZE << get_order(*alloc_size); return; } From 976ff48c2ac6e6b25b01428c9d7997bcd0fb2949 Mon Sep 17 00:00:00 2001 From: "Sven Eckelmann (Plasma Cloud)" Date: Tue, 24 Mar 2026 09:36:01 +0100 Subject: [PATCH 224/401] net: ethernet: mtk_ppe: avoid NULL deref when gmac0 is disabled If the gmac0 is disabled, the precheck for a valid ingress device will cause a NULL pointer deref and crash the system. This happens because eth->netdev[0] will be NULL but the code will directly try to access netdev_ops. Instead of just checking for the first net_device, it must be checked if any of the mtk_eth net_devices is matching the netdev_ops of the ingress device. Cc: stable@vger.kernel.org Fixes: 73cfd947dbdb ("net: ethernet: mtk_eth_soc: ppe: prevent ppe update for non-mtk devices") Signed-off-by: Sven Eckelmann (Plasma Cloud) Reviewed-by: Simon Horman Link: https://patch.msgid.link/20260324-wed-crash-gmac0-disabled-v1-1-3bc388aee565@simonwunderlich.de Signed-off-by: Jakub Kicinski --- .../net/ethernet/mediatek/mtk_ppe_offload.c | 21 ++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mediatek/mtk_ppe_offload.c b/drivers/net/ethernet/mediatek/mtk_ppe_offload.c index cb30108f2bf6..cc8c4ef8038f 100644 --- a/drivers/net/ethernet/mediatek/mtk_ppe_offload.c +++ b/drivers/net/ethernet/mediatek/mtk_ppe_offload.c @@ -244,6 +244,25 @@ mtk_flow_set_output_device(struct mtk_eth *eth, struct mtk_foe_entry *foe, return 0; } +static bool +mtk_flow_is_valid_idev(const struct mtk_eth *eth, const struct net_device *idev) +{ + size_t i; + + if (!idev) + return false; + + for (i = 0; i < ARRAY_SIZE(eth->netdev); i++) { + if (!eth->netdev[i]) + continue; + + if (idev->netdev_ops == eth->netdev[i]->netdev_ops) + return true; + } + + return false; +} + static int mtk_flow_offload_replace(struct mtk_eth *eth, struct flow_cls_offload *f, int ppe_index) @@ -270,7 +289,7 @@ mtk_flow_offload_replace(struct mtk_eth *eth, struct flow_cls_offload *f, flow_rule_match_meta(rule, &match); if (mtk_is_netsys_v2_or_greater(eth)) { idev = __dev_get_by_index(&init_net, match.key->ingress_ifindex); - if (idev && idev->netdev_ops == eth->netdev[0]->netdev_ops) { + if (mtk_flow_is_valid_idev(eth, idev)) { struct mtk_mac *mac = netdev_priv(idev); if (WARN_ON(mac->ppe_idx >= eth->soc->ppe_num)) From 5170efd9c344c68a8075dcb8ed38d3f8a60e7ed4 Mon Sep 17 00:00:00 2001 From: Nicholas Carlini Date: Fri, 27 Mar 2026 02:18:23 +0000 Subject: [PATCH 225/401] io_uring/fdinfo: fix OOB read in SQE_MIXED wrap check __io_uring_show_fdinfo() iterates over pending SQEs and, for 128-byte SQEs on an IORING_SETUP_SQE_MIXED ring, needs to detect when the second half of the SQE would be past the end of the sq_sqes array. The current check tests (++sq_head & sq_mask) == 0, but sq_head is only incremented when a 128-byte SQE is encountered, not on every iteration. The actual array index is sq_idx = (i + sq_head) & sq_mask, which can be sq_mask (the last slot) while the wrap check passes. Fix by checking sq_idx directly. Keep the sq_head increment so the loop still skips the second half of the 128-byte SQE on the next iteration. Fixes: 1cba30bf9fdd ("io_uring: add support for IORING_SETUP_SQE_MIXED") Signed-off-by: Nicholas Carlini Link: https://patch.msgid.link/20260327021823.3138396-1-nicholas@carlini.com Signed-off-by: Jens Axboe --- io_uring/fdinfo.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/io_uring/fdinfo.c b/io_uring/fdinfo.c index 25c92ace18bd..c2d3e45544bb 100644 --- a/io_uring/fdinfo.c +++ b/io_uring/fdinfo.c @@ -119,12 +119,13 @@ static void __io_uring_show_fdinfo(struct io_ring_ctx *ctx, struct seq_file *m) sq_idx); break; } - if ((++sq_head & sq_mask) == 0) { + if (sq_idx == sq_mask) { seq_printf(m, "%5u: corrupted sqe, wrapping 128B entry\n", sq_idx); break; } + sq_head++; i++; sqe128 = true; } From 57a04a13aac1f247d171c3f3aef93efc69e6979e Mon Sep 17 00:00:00 2001 From: Qingfang Deng Date: Tue, 24 Mar 2026 22:08:56 +0800 Subject: [PATCH 226/401] netdevsim: fix build if SKB_EXTENSIONS=n __skb_ext_put() is not declared if SKB_EXTENSIONS is not enabled, which causes a build error: drivers/net/netdevsim/netdev.c: In function 'nsim_forward_skb': drivers/net/netdevsim/netdev.c:114:25: error: implicit declaration of function '__skb_ext_put'; did you mean 'skb_ext_put'? [-Werror=implicit-function-declaration] 114 | __skb_ext_put(psp_ext); | ^~~~~~~~~~~~~ | skb_ext_put cc1: some warnings being treated as errors Add a stub to fix the build. Fixes: 7d9351435ebb ("netdevsim: drop PSP ext ref on forward failure") Signed-off-by: Qingfang Deng Link: https://patch.msgid.link/20260324140857.783-1-dqfext@gmail.com Signed-off-by: Jakub Kicinski --- include/linux/skbuff.h | 1 + 1 file changed, 1 insertion(+) diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index daa4e4944ce3..2f278ce376b7 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -5097,6 +5097,7 @@ static inline bool skb_has_extensions(struct sk_buff *skb) return unlikely(skb->active_extensions); } #else +static inline void __skb_ext_put(struct skb_ext *ext) {} static inline void skb_ext_put(struct sk_buff *skb) {} static inline void skb_ext_reset(struct sk_buff *skb) {} static inline void skb_ext_del(struct sk_buff *skb, int unused) {} From e8e44c98f789dee45cfd24ffb9d4936e0606d7c6 Mon Sep 17 00:00:00 2001 From: Buday Csaba Date: Tue, 24 Mar 2026 14:32:30 +0100 Subject: [PATCH 227/401] net: fec: fix the PTP periodic output sysfs interface When the PPS channel configuration was implemented, the channel index for the periodic outputs was configured as the hardware channel number. The sysfs interface uses a logical channel index, and rejects numbers greater than `n_per_out` (see period_store() in ptp_sysfs.c). That property was left at 1, since the driver implements channel selection, not simultaneous operation of multiple PTP hardware timer channels. A second check in fec_ptp_enable() returns -EOPNOTSUPP when the two channel numbers disagree, making channels 1..3 unusable from sysfs. Fix by removing this redundant check in the FEC PTP driver. Fixes: 566c2d83887f ("net: fec: make PPS channel configurable") Signed-off-by: Buday Csaba Link: https://patch.msgid.link/8ec2afe88423c2231f9cf8044d212ce57846670e.1774359059.git.buday.csaba@prolan.hu Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/freescale/fec_ptp.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/net/ethernet/freescale/fec_ptp.c b/drivers/net/ethernet/freescale/fec_ptp.c index 4b7bad9a485d..56801c2009d5 100644 --- a/drivers/net/ethernet/freescale/fec_ptp.c +++ b/drivers/net/ethernet/freescale/fec_ptp.c @@ -545,9 +545,6 @@ static int fec_ptp_enable(struct ptp_clock_info *ptp, if (rq->perout.flags) return -EOPNOTSUPP; - if (rq->perout.index != fep->pps_channel) - return -EOPNOTSUPP; - period.tv_sec = rq->perout.period.sec; period.tv_nsec = rq->perout.period.nsec; period_ns = timespec64_to_ns(&period); From 0239fd701d33475a39428daa3dc627407cd417a6 Mon Sep 17 00:00:00 2001 From: Wei Fang Date: Tue, 24 Mar 2026 14:21:19 +0800 Subject: [PATCH 228/401] net: enetc: reset PIR and CIR if they are not equal when initializing TX ring Currently the driver does not reset the producer index register (PIR) and consumer index register (CIR) when initializing a TX BD ring. The driver only reads the PIR and CIR and initializes the software indexes. If the TX BD ring is reinitialized when it still contains unsent frames, its PIR and CIR will not be equal after the reinitialization. However, the BDs between CIR and PIR have been freed and become invalid and this can lead to a hardware malfunction, causing the TX BD ring will not work properly. For ENETC v4, it supports software to set the PIR and CIR, so the driver can reset these two registers if they are not equal when reinitializing the TX BD ring. Therefore, add this solution for ENETC v4. Note that this patch does not work for ENETC v1 because it does not support software to set the PIR and CIR. Fixes: 99100d0d9922 ("net: enetc: add preliminary support for i.MX95 ENETC PF") Signed-off-by: Wei Fang Reviewed-by: Claudiu Manoil Reviewed-by: Simon Horman Link: https://patch.msgid.link/20260324062121.2745033-2-wei.fang@nxp.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/freescale/enetc/enetc.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/freescale/enetc/enetc.c b/drivers/net/ethernet/freescale/enetc/enetc.c index a146ceaf2ed6..aa8a87124b10 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc.c +++ b/drivers/net/ethernet/freescale/enetc/enetc.c @@ -2578,6 +2578,7 @@ EXPORT_SYMBOL_GPL(enetc_free_si_resources); static void enetc_setup_txbdr(struct enetc_hw *hw, struct enetc_bdr *tx_ring) { + struct enetc_si *si = container_of(hw, struct enetc_si, hw); int idx = tx_ring->index; u32 tbmr; @@ -2591,10 +2592,20 @@ static void enetc_setup_txbdr(struct enetc_hw *hw, struct enetc_bdr *tx_ring) enetc_txbdr_wr(hw, idx, ENETC_TBLENR, ENETC_RTBLENR_LEN(tx_ring->bd_count)); - /* clearing PI/CI registers for Tx not supported, adjust sw indexes */ + /* For ENETC v1, clearing PI/CI registers for Tx not supported, + * adjust sw indexes + */ tx_ring->next_to_use = enetc_txbdr_rd(hw, idx, ENETC_TBPIR); tx_ring->next_to_clean = enetc_txbdr_rd(hw, idx, ENETC_TBCIR); + if (tx_ring->next_to_use != tx_ring->next_to_clean && + !is_enetc_rev1(si)) { + tx_ring->next_to_use = 0; + tx_ring->next_to_clean = 0; + enetc_txbdr_wr(hw, idx, ENETC_TBPIR, 0); + enetc_txbdr_wr(hw, idx, ENETC_TBCIR, 0); + } + /* enable Tx ints by setting pkt thr to 1 */ enetc_txbdr_wr(hw, idx, ENETC_TBICR0, ENETC_TBICR0_ICEN | 0x1); From 2725d84efe2582c0a4b907e74a689d26b2dbd382 Mon Sep 17 00:00:00 2001 From: Wei Fang Date: Tue, 24 Mar 2026 14:21:20 +0800 Subject: [PATCH 229/401] net: enetc: add graceful stop to safely reinitialize the TX Ring For ENETC v4, the PIR and CIR will be reset if they are not equal when reinitializing the TX BD ring. However, resetting the PIR and CIR alone is insufficient. When a link-down event occurs while the TX BD ring is transmitting frames, subsequent reinitialization of the TX BD ring may cause it to malfunction. For example, the below steps can reproduce the problem. 1. Unplug the cable when the TX BD ring is busy transmitting frames. 2. Disable the network interface (ifconfig eth0 down). 3. Re-enable the network interface (ifconfig eth0 up). 4. Plug in the cable, the TX BD ring may fail to transmit packets. When the link-down event occurs, enetc4_pl_mac_link_down() only clears PMa_COMMAND_CONFIG[TX_EN] to disable MAC transmit data path. It doesn't set PORT[TXDIS] to 1 to flush the TX BD ring. Therefore, reinitializing the TX BD ring at this point is unsafe. To safely reinitialize the TX BD ring after a link-down event, we checked with the NETC IP team, a proper Ethernet MAC graceful stop is necessary. Therefore, add the Ethernet MAC graceful stop to the link-down event handler enetc4_pl_mac_link_down(). Fixes: 99100d0d9922 ("net: enetc: add preliminary support for i.MX95 ENETC PF") Signed-off-by: Wei Fang Reviewed-by: Claudiu Manoil Reviewed-by: Simon Horman Link: https://patch.msgid.link/20260324062121.2745033-3-wei.fang@nxp.com Signed-off-by: Jakub Kicinski --- .../net/ethernet/freescale/enetc/enetc4_hw.h | 11 ++ .../net/ethernet/freescale/enetc/enetc4_pf.c | 111 +++++++++++++++--- 2 files changed, 108 insertions(+), 14 deletions(-) diff --git a/drivers/net/ethernet/freescale/enetc/enetc4_hw.h b/drivers/net/ethernet/freescale/enetc/enetc4_hw.h index 3ed0f7a02767..719c88ceb801 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc4_hw.h +++ b/drivers/net/ethernet/freescale/enetc/enetc4_hw.h @@ -134,6 +134,12 @@ /* Port operational register */ #define ENETC4_POR 0x4100 +#define POR_TXDIS BIT(0) +#define POR_RXDIS BIT(1) + +/* Port status register */ +#define ENETC4_PSR 0x4104 +#define PSR_RX_BUSY BIT(1) /* Port traffic class a transmit maximum SDU register */ #define ENETC4_PTCTMSDUR(a) ((a) * 0x20 + 0x4208) @@ -173,6 +179,11 @@ /* Port internal MDIO base address, use to access PCS */ #define ENETC4_PM_IMDIO_BASE 0x5030 +/* Port MAC 0/1 Interrupt Event Register */ +#define ENETC4_PM_IEVENT(mac) (0x5040 + (mac) * 0x400) +#define PM_IEVENT_TX_EMPTY BIT(5) +#define PM_IEVENT_RX_EMPTY BIT(6) + /* Port MAC 0/1 Pause Quanta Register */ #define ENETC4_PM_PAUSE_QUANTA(mac) (0x5054 + (mac) * 0x400) diff --git a/drivers/net/ethernet/freescale/enetc/enetc4_pf.c b/drivers/net/ethernet/freescale/enetc/enetc4_pf.c index 689b9f13c5eb..53cecbb23a97 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc4_pf.c +++ b/drivers/net/ethernet/freescale/enetc/enetc4_pf.c @@ -444,20 +444,11 @@ static void enetc4_set_trx_frame_size(struct enetc_pf *pf) enetc4_pf_reset_tc_msdu(&si->hw); } -static void enetc4_enable_trx(struct enetc_pf *pf) -{ - struct enetc_hw *hw = &pf->si->hw; - - /* Enable port transmit/receive */ - enetc_port_wr(hw, ENETC4_POR, 0); -} - static void enetc4_configure_port(struct enetc_pf *pf) { enetc4_configure_port_si(pf); enetc4_set_trx_frame_size(pf); enetc_set_default_rss_key(pf); - enetc4_enable_trx(pf); } static int enetc4_init_ntmp_user(struct enetc_si *si) @@ -801,15 +792,105 @@ static void enetc4_set_tx_pause(struct enetc_pf *pf, int num_rxbdr, bool tx_paus enetc_port_wr(hw, ENETC4_PPAUOFFTR, pause_off_thresh); } -static void enetc4_enable_mac(struct enetc_pf *pf, bool en) +static void enetc4_mac_wait_tx_empty(struct enetc_si *si, int mac) { + u32 val; + + if (read_poll_timeout(enetc_port_rd, val, + val & PM_IEVENT_TX_EMPTY, + 100, 10000, false, &si->hw, + ENETC4_PM_IEVENT(mac))) + dev_warn(&si->pdev->dev, + "MAC %d TX is not empty\n", mac); +} + +static void enetc4_mac_tx_graceful_stop(struct enetc_pf *pf) +{ + struct enetc_hw *hw = &pf->si->hw; + struct enetc_si *si = pf->si; + u32 val; + + val = enetc_port_rd(hw, ENETC4_POR); + val |= POR_TXDIS; + enetc_port_wr(hw, ENETC4_POR, val); + + enetc4_mac_wait_tx_empty(si, 0); + if (si->hw_features & ENETC_SI_F_QBU) + enetc4_mac_wait_tx_empty(si, 1); + + val = enetc_port_mac_rd(si, ENETC4_PM_CMD_CFG(0)); + val &= ~PM_CMD_CFG_TX_EN; + enetc_port_mac_wr(si, ENETC4_PM_CMD_CFG(0), val); +} + +static void enetc4_mac_tx_enable(struct enetc_pf *pf) +{ + struct enetc_hw *hw = &pf->si->hw; struct enetc_si *si = pf->si; u32 val; val = enetc_port_mac_rd(si, ENETC4_PM_CMD_CFG(0)); - val &= ~(PM_CMD_CFG_TX_EN | PM_CMD_CFG_RX_EN); - val |= en ? (PM_CMD_CFG_TX_EN | PM_CMD_CFG_RX_EN) : 0; + val |= PM_CMD_CFG_TX_EN; + enetc_port_mac_wr(si, ENETC4_PM_CMD_CFG(0), val); + val = enetc_port_rd(hw, ENETC4_POR); + val &= ~POR_TXDIS; + enetc_port_wr(hw, ENETC4_POR, val); +} + +static void enetc4_mac_wait_rx_empty(struct enetc_si *si, int mac) +{ + u32 val; + + if (read_poll_timeout(enetc_port_rd, val, + val & PM_IEVENT_RX_EMPTY, + 100, 10000, false, &si->hw, + ENETC4_PM_IEVENT(mac))) + dev_warn(&si->pdev->dev, + "MAC %d RX is not empty\n", mac); +} + +static void enetc4_mac_rx_graceful_stop(struct enetc_pf *pf) +{ + struct enetc_hw *hw = &pf->si->hw; + struct enetc_si *si = pf->si; + u32 val; + + if (si->hw_features & ENETC_SI_F_QBU) { + val = enetc_port_rd(hw, ENETC4_PM_CMD_CFG(1)); + val &= ~PM_CMD_CFG_RX_EN; + enetc_port_wr(hw, ENETC4_PM_CMD_CFG(1), val); + enetc4_mac_wait_rx_empty(si, 1); + } + + val = enetc_port_rd(hw, ENETC4_PM_CMD_CFG(0)); + val &= ~PM_CMD_CFG_RX_EN; + enetc_port_wr(hw, ENETC4_PM_CMD_CFG(0), val); + enetc4_mac_wait_rx_empty(si, 0); + + if (read_poll_timeout(enetc_port_rd, val, + !(val & PSR_RX_BUSY), + 100, 10000, false, hw, + ENETC4_PSR)) + dev_warn(&si->pdev->dev, "Port RX busy\n"); + + val = enetc_port_rd(hw, ENETC4_POR); + val |= POR_RXDIS; + enetc_port_wr(hw, ENETC4_POR, val); +} + +static void enetc4_mac_rx_enable(struct enetc_pf *pf) +{ + struct enetc_hw *hw = &pf->si->hw; + struct enetc_si *si = pf->si; + u32 val; + + val = enetc_port_rd(hw, ENETC4_POR); + val &= ~POR_RXDIS; + enetc_port_wr(hw, ENETC4_POR, val); + + val = enetc_port_mac_rd(si, ENETC4_PM_CMD_CFG(0)); + val |= PM_CMD_CFG_RX_EN; enetc_port_mac_wr(si, ENETC4_PM_CMD_CFG(0), val); } @@ -853,7 +934,8 @@ static void enetc4_pl_mac_link_up(struct phylink_config *config, enetc4_set_hd_flow_control(pf, hd_fc); enetc4_set_tx_pause(pf, priv->num_rx_rings, tx_pause); enetc4_set_rx_pause(pf, rx_pause); - enetc4_enable_mac(pf, true); + enetc4_mac_tx_enable(pf); + enetc4_mac_rx_enable(pf); } static void enetc4_pl_mac_link_down(struct phylink_config *config, @@ -862,7 +944,8 @@ static void enetc4_pl_mac_link_down(struct phylink_config *config, { struct enetc_pf *pf = phylink_to_enetc_pf(config); - enetc4_enable_mac(pf, false); + enetc4_mac_rx_graceful_stop(pf); + enetc4_mac_tx_graceful_stop(pf); } static const struct phylink_mac_ops enetc_pl_mac_ops = { From f2df9567b123145a07ee4ea7440e233f5d0232cc Mon Sep 17 00:00:00 2001 From: Wei Fang Date: Tue, 24 Mar 2026 14:21:21 +0800 Subject: [PATCH 230/401] net: enetc: do not access non-existent registers on pseudo MAC The ENETC4_PM_IEVENT and ENETC4_PM_CMD_CFG registers do not exist on the ENETC pseudo MAC, so the driver should prevent from accessing them. Fixes: 5175c1e4adca ("net: enetc: add basic support for the ENETC with pseudo MAC for i.MX94") Signed-off-by: Wei Fang Tested-by: Claudiu Manoil Reviewed-by: Claudiu Manoil Reviewed-by: Simon Horman Link: https://patch.msgid.link/20260324062121.2745033-4-wei.fang@nxp.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/freescale/enetc/enetc4_pf.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/net/ethernet/freescale/enetc/enetc4_pf.c b/drivers/net/ethernet/freescale/enetc/enetc4_pf.c index 53cecbb23a97..56899f2254aa 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc4_pf.c +++ b/drivers/net/ethernet/freescale/enetc/enetc4_pf.c @@ -814,6 +814,9 @@ static void enetc4_mac_tx_graceful_stop(struct enetc_pf *pf) val |= POR_TXDIS; enetc_port_wr(hw, ENETC4_POR, val); + if (enetc_is_pseudo_mac(si)) + return; + enetc4_mac_wait_tx_empty(si, 0); if (si->hw_features & ENETC_SI_F_QBU) enetc4_mac_wait_tx_empty(si, 1); @@ -856,6 +859,9 @@ static void enetc4_mac_rx_graceful_stop(struct enetc_pf *pf) struct enetc_si *si = pf->si; u32 val; + if (enetc_is_pseudo_mac(si)) + goto check_rx_busy; + if (si->hw_features & ENETC_SI_F_QBU) { val = enetc_port_rd(hw, ENETC4_PM_CMD_CFG(1)); val &= ~PM_CMD_CFG_RX_EN; @@ -868,6 +874,7 @@ static void enetc4_mac_rx_graceful_stop(struct enetc_pf *pf) enetc_port_wr(hw, ENETC4_PM_CMD_CFG(0), val); enetc4_mac_wait_rx_empty(si, 0); +check_rx_busy: if (read_poll_timeout(enetc_port_rd, val, !(val & PSR_RX_BUSY), 100, 10000, false, hw, From 2428083101f6883f979cceffa76cd8440751ffe6 Mon Sep 17 00:00:00 2001 From: Jiayuan Chen Date: Tue, 24 Mar 2026 16:06:44 +0800 Subject: [PATCH 231/401] net: qrtr: replace qrtr_tx_flow radix_tree with xarray to fix memory leak MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit __radix_tree_create() allocates and links intermediate nodes into the tree one by one. If a subsequent allocation fails, the already-linked nodes remain in the tree with no corresponding leaf entry. These orphaned internal nodes are never reclaimed because radix_tree_for_each_slot() only visits slots containing leaf values. The radix_tree API is deprecated in favor of xarray. As suggested by Matthew Wilcox, migrate qrtr_tx_flow from radix_tree to xarray instead of fixing the radix_tree itself [1]. xarray properly handles cleanup of internal nodes — xa_destroy() frees all internal xarray nodes when the qrtr_node is released, preventing the leak. [1] https://lore.kernel.org/all/20260225071623.41275-1-jiayuan.chen@linux.dev/T/ Reported-by: syzbot+006987d1be3586e13555@syzkaller.appspotmail.com Closes: https://lore.kernel.org/all/000000000000bfba3a060bf4ffcf@google.com/T/ Fixes: 5fdeb0d372ab ("net: qrtr: Implement outgoing flow control") Signed-off-by: Jiayuan Chen Reviewed-by: Simon Horman Link: https://patch.msgid.link/20260324080645.290197-1-jiayuan.chen@linux.dev Signed-off-by: Jakub Kicinski --- net/qrtr/af_qrtr.c | 31 +++++++++++++------------------ 1 file changed, 13 insertions(+), 18 deletions(-) diff --git a/net/qrtr/af_qrtr.c b/net/qrtr/af_qrtr.c index 55fd2dd37588..d77e9c8212da 100644 --- a/net/qrtr/af_qrtr.c +++ b/net/qrtr/af_qrtr.c @@ -118,7 +118,7 @@ static DEFINE_XARRAY_ALLOC(qrtr_ports); * @ep: endpoint * @ref: reference count for node * @nid: node id - * @qrtr_tx_flow: tree of qrtr_tx_flow, keyed by node << 32 | port + * @qrtr_tx_flow: xarray of qrtr_tx_flow, keyed by node << 32 | port * @qrtr_tx_lock: lock for qrtr_tx_flow inserts * @rx_queue: receive queue * @item: list item for broadcast list @@ -129,7 +129,7 @@ struct qrtr_node { struct kref ref; unsigned int nid; - struct radix_tree_root qrtr_tx_flow; + struct xarray qrtr_tx_flow; struct mutex qrtr_tx_lock; /* for qrtr_tx_flow */ struct sk_buff_head rx_queue; @@ -172,6 +172,7 @@ static void __qrtr_node_release(struct kref *kref) struct qrtr_tx_flow *flow; unsigned long flags; void __rcu **slot; + unsigned long index; spin_lock_irqsave(&qrtr_nodes_lock, flags); /* If the node is a bridge for other nodes, there are possibly @@ -189,11 +190,9 @@ static void __qrtr_node_release(struct kref *kref) skb_queue_purge(&node->rx_queue); /* Free tx flow counters */ - radix_tree_for_each_slot(slot, &node->qrtr_tx_flow, &iter, 0) { - flow = *slot; - radix_tree_iter_delete(&node->qrtr_tx_flow, &iter, slot); + xa_for_each(&node->qrtr_tx_flow, index, flow) kfree(flow); - } + xa_destroy(&node->qrtr_tx_flow); kfree(node); } @@ -228,9 +227,7 @@ static void qrtr_tx_resume(struct qrtr_node *node, struct sk_buff *skb) key = remote_node << 32 | remote_port; - rcu_read_lock(); - flow = radix_tree_lookup(&node->qrtr_tx_flow, key); - rcu_read_unlock(); + flow = xa_load(&node->qrtr_tx_flow, key); if (flow) { spin_lock(&flow->resume_tx.lock); flow->pending = 0; @@ -269,12 +266,13 @@ static int qrtr_tx_wait(struct qrtr_node *node, int dest_node, int dest_port, return 0; mutex_lock(&node->qrtr_tx_lock); - flow = radix_tree_lookup(&node->qrtr_tx_flow, key); + flow = xa_load(&node->qrtr_tx_flow, key); if (!flow) { flow = kzalloc_obj(*flow); if (flow) { init_waitqueue_head(&flow->resume_tx); - if (radix_tree_insert(&node->qrtr_tx_flow, key, flow)) { + if (xa_err(xa_store(&node->qrtr_tx_flow, key, flow, + GFP_KERNEL))) { kfree(flow); flow = NULL; } @@ -326,9 +324,7 @@ static void qrtr_tx_flow_failed(struct qrtr_node *node, int dest_node, unsigned long key = (u64)dest_node << 32 | dest_port; struct qrtr_tx_flow *flow; - rcu_read_lock(); - flow = radix_tree_lookup(&node->qrtr_tx_flow, key); - rcu_read_unlock(); + flow = xa_load(&node->qrtr_tx_flow, key); if (flow) { spin_lock_irq(&flow->resume_tx.lock); flow->tx_failed = 1; @@ -599,7 +595,7 @@ int qrtr_endpoint_register(struct qrtr_endpoint *ep, unsigned int nid) node->nid = QRTR_EP_NID_AUTO; node->ep = ep; - INIT_RADIX_TREE(&node->qrtr_tx_flow, GFP_KERNEL); + xa_init(&node->qrtr_tx_flow); mutex_init(&node->qrtr_tx_lock); qrtr_node_assign(node, nid); @@ -627,6 +623,7 @@ void qrtr_endpoint_unregister(struct qrtr_endpoint *ep) struct qrtr_tx_flow *flow; struct sk_buff *skb; unsigned long flags; + unsigned long index; void __rcu **slot; mutex_lock(&node->ep_lock); @@ -649,10 +646,8 @@ void qrtr_endpoint_unregister(struct qrtr_endpoint *ep) /* Wake up any transmitters waiting for resume-tx from the node */ mutex_lock(&node->qrtr_tx_lock); - radix_tree_for_each_slot(slot, &node->qrtr_tx_flow, &iter, 0) { - flow = *slot; + xa_for_each(&node->qrtr_tx_flow, index, flow) wake_up_interruptible_all(&flow->resume_tx); - } mutex_unlock(&node->qrtr_tx_lock); qrtr_node_release(node); From ae05340ccaa9d347fe85415609e075545bec589f Mon Sep 17 00:00:00 2001 From: Yochai Eisenrich Date: Wed, 25 Mar 2026 00:49:25 +0200 Subject: [PATCH 232/401] net: ipv6: ndisc: fix ndisc_ra_useropt to initialize nduseropt_padX fields to zero to prevent an info-leak When processing Router Advertisements with user options the kernel builds an RTM_NEWNDUSEROPT netlink message. The nduseroptmsg struct has three padding fields that are never zeroed and can leak kernel data The fix is simple, just zeroes the padding fields. Fixes: 31910575a9de ("[IPv6]: Export userland ND options through netlink (RDNSS support)") Signed-off-by: Yochai Eisenrich Reviewed-by: Simon Horman Link: https://patch.msgid.link/20260324224925.2437775-1-echelonh@gmail.com Signed-off-by: Jakub Kicinski --- net/ipv6/ndisc.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index f6a5d8c73af9..186e60c79214 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -1209,6 +1209,9 @@ static void ndisc_ra_useropt(struct sk_buff *ra, struct nd_opt_hdr *opt) ndmsg->nduseropt_icmp_type = icmp6h->icmp6_type; ndmsg->nduseropt_icmp_code = icmp6h->icmp6_code; ndmsg->nduseropt_opts_len = opt->nd_opt_len << 3; + ndmsg->nduseropt_pad1 = 0; + ndmsg->nduseropt_pad2 = 0; + ndmsg->nduseropt_pad3 = 0; memcpy(ndmsg + 1, opt, opt->nd_opt_len << 3); From 90c5def10bea574b101b7a520c015ca81742183f Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Mon, 2 Mar 2026 18:22:52 -0400 Subject: [PATCH 233/401] iommu: Do not call drivers for empty gathers An empty gather is coded with start=U64_MAX, end=0 and several drivers go on to convert that to a size with: end - start + 1 Which gives 2 for an empty gather. This then causes Weird Stuff to happen (for example an UBSAN splat in VT-d) that is hopefully harmless, but maybe not. Prevent drivers from being called right in iommu_iotlb_sync(). Auditing shows that AMD, Intel, Mediatek and RSIC-V drivers all do things on these empty gathers. Further, there are several callers that can trigger empty gathers, especially in unusual conditions. For example iommu_map_nosync() will call a 0 size unmap on some error paths. Also in VFIO, iommupt and other places. Cc: stable@vger.kernel.org Reported-by: Janusz Krzysztofik Closes: https://lore.kernel.org/r/11145826.aFP6jjVeTY@jkrzyszt-mobl2.ger.corp.intel.com Signed-off-by: Jason Gunthorpe Reviewed-by: Lu Baolu Reviewed-by: Samiullah Khawaja Reviewed-by: Robin Murphy Reviewed-by: Vasant Hegde Signed-off-by: Joerg Roedel --- include/linux/iommu.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/include/linux/iommu.h b/include/linux/iommu.h index 54b8b48c762e..555597b54083 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -980,7 +980,8 @@ static inline void iommu_flush_iotlb_all(struct iommu_domain *domain) static inline void iommu_iotlb_sync(struct iommu_domain *domain, struct iommu_iotlb_gather *iotlb_gather) { - if (domain->ops->iotlb_sync) + if (domain->ops->iotlb_sync && + likely(iotlb_gather->start < iotlb_gather->end)) domain->ops->iotlb_sync(domain, iotlb_gather); iommu_iotlb_gather_init(iotlb_gather); From ee6e69d032550687a3422504bfca3f834c7b5061 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Mon, 2 Mar 2026 18:22:53 -0400 Subject: [PATCH 234/401] iommupt: Fix short gather if the unmap goes into a large mapping unmap has the odd behavior that it can unmap more than requested if the ending point lands within the middle of a large or contiguous IOPTE. In this case the gather should flush everything unmapped which can be larger than what was requested to be unmapped. The gather was only flushing the range requested to be unmapped, not extending to the extra range, resulting in a short invalidation if the caller hits this special condition. This was found by the new invalidation/gather test I am adding in preparation for ARMv8. Claude deduced the root cause. As far as I remember nothing relies on unmapping a large entry, so this is likely not a triggerable bug. Cc: stable@vger.kernel.org Fixes: 7c53f4238aa8 ("iommupt: Add unmap_pages op") Signed-off-by: Jason Gunthorpe Reviewed-by: Lu Baolu Reviewed-by: Samiullah Khawaja Reviewed-by: Vasant Hegde Signed-off-by: Joerg Roedel --- drivers/iommu/generic_pt/iommu_pt.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iommu/generic_pt/iommu_pt.h b/drivers/iommu/generic_pt/iommu_pt.h index 3e33fe64feab..7e7a6e7abdee 100644 --- a/drivers/iommu/generic_pt/iommu_pt.h +++ b/drivers/iommu/generic_pt/iommu_pt.h @@ -1057,7 +1057,7 @@ size_t DOMAIN_NS(unmap_pages)(struct iommu_domain *domain, unsigned long iova, pt_walk_range(&range, __unmap_range, &unmap); - gather_range_pages(iotlb_gather, iommu_table, iova, len, + gather_range_pages(iotlb_gather, iommu_table, iova, unmap.unmapped, &unmap.free_list); return unmap.unmapped; From 8b72aa5704c77380742346d4ac755b074b7f9eaa Mon Sep 17 00:00:00 2001 From: Sherry Yang Date: Thu, 26 Mar 2026 09:17:19 -0700 Subject: [PATCH 235/401] iommupt/amdv1: mark amdv1pt_install_leaf_entry as __always_inline MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit After enabling CONFIG_GCOV_KERNEL and CONFIG_GCOV_PROFILE_ALL, following build failure is observed under GCC 14.2.1: In function 'amdv1pt_install_leaf_entry', inlined from '__do_map_single_page' at drivers/iommu/generic_pt/fmt/../iommu_pt.h:650:3, inlined from '__map_single_page0' at drivers/iommu/generic_pt/fmt/../iommu_pt.h:661:1, inlined from 'pt_descend' at drivers/iommu/generic_pt/fmt/../pt_iter.h:391:9, inlined from '__do_map_single_page' at drivers/iommu/generic_pt/fmt/../iommu_pt.h:657:10, inlined from '__map_single_page1.constprop' at drivers/iommu/generic_pt/fmt/../iommu_pt.h:661:1: ././include/linux/compiler_types.h:706:45: error: call to '__compiletime_assert_71' declared with attribute error: FIELD_PREP: value too large for the field 706 | _compiletime_assert(condition, msg, __compiletime_assert_, __COUNTER__) | ...... drivers/iommu/generic_pt/fmt/amdv1.h:220:26: note: in expansion of macro 'FIELD_PREP' 220 | FIELD_PREP(AMDV1PT_FMT_OA, | ^~~~~~~~~~ In the path '__do_map_single_page()', level 0 always invokes 'pt_install_leaf_entry(&pts, map->oa, PAGE_SHIFT, …)'. At runtime that lands in the 'if (oasz_lg2 == isz_lg2)' arm of 'amdv1pt_install_leaf_entry()'; the contiguous-only 'else' block is unreachable for 4 KiB pages. With CONFIG_GCOV_KERNEL + CONFIG_GCOV_PROFILE_ALL, the extra instrumentation changes GCC's inlining so that the "dead" 'else' branch still gets instantiated. The compiler constant-folds the contiguous OA expression, runs the 'FIELD_PREP()' compile-time check, and produces: FIELD_PREP: value too large for the field gcov-enabled builds therefore fail even though the code path never executes. Fix this by marking amdv1pt_install_leaf_entry as __always_inline. Fixes: dcd6a011a8d5 ("iommupt: Add map_pages op") Suggested-by: Jason Gunthorpe Signed-off-by: Sherry Yang Signed-off-by: Joerg Roedel --- drivers/iommu/generic_pt/fmt/amdv1.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iommu/generic_pt/fmt/amdv1.h b/drivers/iommu/generic_pt/fmt/amdv1.h index 3b2c41d9654d..8d11b08291d7 100644 --- a/drivers/iommu/generic_pt/fmt/amdv1.h +++ b/drivers/iommu/generic_pt/fmt/amdv1.h @@ -191,7 +191,7 @@ static inline enum pt_entry_type amdv1pt_load_entry_raw(struct pt_state *pts) } #define pt_load_entry_raw amdv1pt_load_entry_raw -static inline void +static __always_inline void amdv1pt_install_leaf_entry(struct pt_state *pts, pt_oaddr_t oa, unsigned int oasz_lg2, const struct pt_write_attrs *attrs) From 7f138de156b20d9f9da6f72f90b63c01941d97d3 Mon Sep 17 00:00:00 2001 From: Guangshuo Li Date: Fri, 27 Mar 2026 01:14:12 +0800 Subject: [PATCH 236/401] auxdisplay: line-display: fix NULL dereference in linedisp_release linedisp_release() currently retrieves the enclosing struct linedisp via to_linedisp(). That lookup depends on the attachment list, but the attachment may already have been removed before put_device() invokes the release callback. This can happen in linedisp_unregister(), and can also be reached from some linedisp_register() error paths. In that case, to_linedisp() returns NULL and linedisp_release() dereferences it while freeing the display resources. The struct device released here is the embedded linedisp->dev used by linedisp_register(), so retrieve the enclosing object directly with container_of() instead. Fixes: 66c93809487e ("auxdisplay: linedisp: encapsulate container_of usage within to_linedisp") Cc: stable@vger.kernel.org Signed-off-by: Guangshuo Li Reviewed-by: Geert Uytterhoeven Signed-off-by: Andy Shevchenko --- drivers/auxdisplay/line-display.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/auxdisplay/line-display.c b/drivers/auxdisplay/line-display.c index 81b4aac65807..fb6d9294140d 100644 --- a/drivers/auxdisplay/line-display.c +++ b/drivers/auxdisplay/line-display.c @@ -365,7 +365,7 @@ static DEFINE_IDA(linedisp_id); static void linedisp_release(struct device *dev) { - struct linedisp *linedisp = to_linedisp(dev); + struct linedisp *linedisp = container_of(dev, struct linedisp, dev); kfree(linedisp->map); kfree(linedisp->message); From d3be95efc6a1e03230ef646b498050152efe2888 Mon Sep 17 00:00:00 2001 From: Kailang Yang Date: Thu, 19 Mar 2026 15:49:46 +0800 Subject: [PATCH 237/401] ALSA: hda/realtek - Fixed Speaker Mute LED for HP EliteBoard G1a platform On the HP EliteBoard G1a platform (models without a headphone jack). the speaker mute LED failed to function. The Sysfs ctl-led info showed empty values because the standard LED registration couldn't correctly bind to the master switch. Adding this patch will fix and enable the speaker mute LED feature. Tested-by: Chris Chiu Signed-off-by: Kailang Yang Link: https://lore.kernel.org/279e929e884849df84687dbd67f20037@realtek.com Signed-off-by: Takashi Iwai --- sound/hda/codecs/realtek/alc269.c | 22 +++++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) diff --git a/sound/hda/codecs/realtek/alc269.c b/sound/hda/codecs/realtek/alc269.c index 327f4dc1b09f..9106498eb19f 100644 --- a/sound/hda/codecs/realtek/alc269.c +++ b/sound/hda/codecs/realtek/alc269.c @@ -3725,22 +3725,42 @@ static void alc245_tas2781_spi_hp_fixup_muteled(struct hda_codec *codec, alc_fixup_hp_gpio_led(codec, action, 0x04, 0x0); alc285_fixup_hp_coef_micmute_led(codec, fix, action); } + +static void alc245_hp_spk_mute_led_update(void *private_data, int enabled) +{ + struct hda_codec *codec = private_data; + unsigned int val; + + val = enabled ? 0x08 : 0x04; /* 0x08 led on, 0x04 led off */ + alc_update_coef_idx(codec, 0x0b, 0x0c, val); +} + /* JD2: mute led GPIO3: micmute led */ static void alc245_tas2781_i2c_hp_fixup_muteled(struct hda_codec *codec, const struct hda_fixup *fix, int action) { struct alc_spec *spec = codec->spec; + hda_nid_t hp_pin = alc_get_hp_pin(spec); static const hda_nid_t conn[] = { 0x02 }; switch (action) { case HDA_FIXUP_ACT_PRE_PROBE: + if (!hp_pin) { + spec->gen.vmaster_mute.hook = alc245_hp_spk_mute_led_update; + spec->gen.vmaster_mute_led = 1; + } spec->gen.auto_mute_via_amp = 1; snd_hda_override_conn_list(codec, 0x17, ARRAY_SIZE(conn), conn); break; + case HDA_FIXUP_ACT_INIT: + if (!hp_pin) + alc245_hp_spk_mute_led_update(codec, !spec->gen.master_mute); + break; } tas2781_fixup_txnw_i2c(codec, fix, action); - alc245_fixup_hp_mute_led_coefbit(codec, fix, action); + if (hp_pin) + alc245_fixup_hp_mute_led_coefbit(codec, fix, action); alc285_fixup_hp_coef_micmute_led(codec, fix, action); } /* From 2f388b4e8fdd6b0f27cafd281658daacfd85807e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?C=C3=A9sar=20Montoya?= Date: Sat, 21 Mar 2026 10:36:03 -0500 Subject: [PATCH 238/401] ALSA: hda/realtek: Add mute LED quirk for HP Pavilion 15-eg0xxx MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The HP Pavilion 15-eg0xxx with subsystem ID 0x103c87cb uses a Realtek ALC287 codec with a mute LED wired to GPIO pin 4 (mask 0x10). The existing ALC287_FIXUP_HP_GPIO_LED fixup already handles this correctly, but the subsystem ID was missing from the quirk table. GPIO pin confirmed via manual hda-verb testing: hda-verb SET_GPIO_MASK 0x10 hda-verb SET_GPIO_DIRECTION 0x10 hda-verb SET_GPIO_DATA 0x10 Signed-off-by: César Montoya Link: https://patch.msgid.link/20260321153603.12771-1-sprit152009@gmail.com Signed-off-by: Takashi Iwai --- sound/hda/codecs/realtek/alc269.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/hda/codecs/realtek/alc269.c b/sound/hda/codecs/realtek/alc269.c index 9106498eb19f..a72149d6fc90 100644 --- a/sound/hda/codecs/realtek/alc269.c +++ b/sound/hda/codecs/realtek/alc269.c @@ -6914,6 +6914,7 @@ static const struct hda_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x103c, 0x8788, "HP OMEN 15", ALC285_FIXUP_HP_MUTE_LED), SND_PCI_QUIRK(0x103c, 0x87b7, "HP Laptop 14-fq0xxx", ALC236_FIXUP_HP_MUTE_LED_COEFBIT2), SND_PCI_QUIRK(0x103c, 0x87c8, "HP", ALC287_FIXUP_HP_GPIO_LED), + SND_PCI_QUIRK(0x103c, 0x87cb, "HP Pavilion 15-eg0xxx", ALC287_FIXUP_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x87cc, "HP Pavilion 15-eg0xxx", ALC287_FIXUP_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x87d3, "HP Laptop 15-gw0xxx", ALC236_FIXUP_HP_MUTE_LED_COEFBIT2), SND_PCI_QUIRK(0x103c, 0x87df, "HP ProBook 430 G8 Notebook PC", ALC236_FIXUP_HP_GPIO_LED), From 990a8b0732cf899d4a0f847b0a67efeb9a384c82 Mon Sep 17 00:00:00 2001 From: "Geoffrey D. Bennett" Date: Mon, 23 Mar 2026 21:59:21 +1030 Subject: [PATCH 239/401] ALSA: usb-audio: Exclude Scarlett 2i4 1st Gen from SKIP_IFACE_SETUP Same issue that the Scarlett 2i2 1st Gen had: QUIRK_FLAG_SKIP_IFACE_SETUP causes distorted/flanging audio on the Scarlett 2i4 1st Gen (1235:800a). Fixes: 38c322068a26 ("ALSA: usb-audio: Add QUIRK_FLAG_SKIP_IFACE_SETUP") Reported-by: dcferreira [https://github.com/geoffreybennett/linux-fcp/issues/54] Signed-off-by: Geoffrey D. Bennett Link: https://patch.msgid.link/acEkEbftzyNe8W7C@m.b4.vu Signed-off-by: Takashi Iwai --- sound/usb/quirks.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/usb/quirks.c b/sound/usb/quirks.c index cd3a9fe8edf2..125230464ab5 100644 --- a/sound/usb/quirks.c +++ b/sound/usb/quirks.c @@ -2430,6 +2430,7 @@ static const struct usb_audio_quirk_flags_table quirk_flags_table[] = { VENDOR_FLG(0x07fd, /* MOTU */ QUIRK_FLAG_VALIDATE_RATES), DEVICE_FLG(0x1235, 0x8006, 0), /* Focusrite Scarlett 2i2 1st Gen */ + DEVICE_FLG(0x1235, 0x800a, 0), /* Focusrite Scarlett 2i4 1st Gen */ VENDOR_FLG(0x1235, /* Focusrite Novation */ QUIRK_FLAG_SKIP_CLOCK_SELECTOR | QUIRK_FLAG_SKIP_IFACE_SETUP), From faceb5cf5d7a08f4a40335d22d833bb75f05d99e Mon Sep 17 00:00:00 2001 From: Kshamendra Kumar Mishra Date: Mon, 23 Mar 2026 22:07:50 +0530 Subject: [PATCH 240/401] ALSA: hda/realtek: add HP Laptop 15-fd0xxx mute LED quirk HP Laptop 15-fd0xxx with ALC236 codec does not handle the toggling of the mute LED. This patch adds a quirk entry for subsystem ID 0x8dd7 using ALC236_FIXUP_HP_MUTE_LED_COEFBIT2 fixup, enabling correct mute LED behavior. Signed-off-by: Kshamendra Kumar Mishra Link: https://patch.msgid.link/DHAB51ISUM96.2K9SZIABIDEQ0@gmail.com Signed-off-by: Takashi Iwai --- sound/hda/codecs/realtek/alc269.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/hda/codecs/realtek/alc269.c b/sound/hda/codecs/realtek/alc269.c index a72149d6fc90..b6a58852752a 100644 --- a/sound/hda/codecs/realtek/alc269.c +++ b/sound/hda/codecs/realtek/alc269.c @@ -7146,6 +7146,7 @@ static const struct hda_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x103c, 0x8da7, "HP 14 Enstrom OmniBook X", ALC287_FIXUP_CS35L41_I2C_2), SND_PCI_QUIRK(0x103c, 0x8da8, "HP 16 Piston OmniBook X", ALC287_FIXUP_CS35L41_I2C_2), SND_PCI_QUIRK(0x103c, 0x8dd4, "HP EliteStudio 8 AIO", ALC274_FIXUP_HP_AIO_BIND_DACS), + SND_PCI_QUIRK(0x103c, 0x8dd7, "HP Laptop 15-fd0xxx", ALC236_FIXUP_HP_MUTE_LED_COEFBIT2), SND_PCI_QUIRK(0x103c, 0x8de8, "HP Gemtree", ALC245_FIXUP_TAS2781_SPI_2), SND_PCI_QUIRK(0x103c, 0x8de9, "HP Gemtree", ALC245_FIXUP_TAS2781_SPI_2), SND_PCI_QUIRK(0x103c, 0x8dec, "HP EliteBook 640 G12", ALC236_FIXUP_HP_GPIO_LED), From ee6c551a7d84fde084e10cc02fa8c7d03e6438f9 Mon Sep 17 00:00:00 2001 From: Lianqin Hu Date: Wed, 25 Mar 2026 06:26:48 +0000 Subject: [PATCH 241/401] ALSA: usb-audio: Add iface reset and delay quirk for AB17X USB Audio Setting up the interface when suspended/resumeing fail on this card. Adding a reset and delay quirk will eliminate this problem. usb 1-1: new full-speed USB device number 2 using xhci-hcd usb 1-1: New USB device found, idVendor=001f, idProduct=0b23 usb 1-1: New USB device strings: Mfr=1, Product=2, SerialNumber=3 usb 1-1: Product: AB17X USB Audio usb 1-1: Manufacturer: Generic usb 1-1: SerialNumber: 20241228172028 Signed-off-by: Lianqin Hu Signed-off-by: Takashi Iwai Link: https://patch.msgid.link/PUZPR06MB6224CA59AD2B26054120B276D249A@PUZPR06MB6224.apcprd06.prod.outlook.com --- sound/usb/quirks.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sound/usb/quirks.c b/sound/usb/quirks.c index 125230464ab5..1f82e9e02d4b 100644 --- a/sound/usb/quirks.c +++ b/sound/usb/quirks.c @@ -2148,6 +2148,8 @@ static const struct usb_audio_quirk_flags_table quirk_flags_table[] = { /* Device matches */ DEVICE_FLG(0x001f, 0x0b21, /* AB13X USB Audio */ QUIRK_FLAG_FORCE_IFACE_RESET | QUIRK_FLAG_IFACE_DELAY), + DEVICE_FLG(0x001f, 0x0b23, /* AB17X USB Audio */ + QUIRK_FLAG_FORCE_IFACE_RESET | QUIRK_FLAG_IFACE_DELAY), DEVICE_FLG(0x0020, 0x0b21, /* GHW-123P */ QUIRK_FLAG_FORCE_IFACE_RESET | QUIRK_FLAG_IFACE_DELAY), DEVICE_FLG(0x03f0, 0x654a, /* HP 320 FHD Webcam */ From ed4da361bf943b9041fc63e5cb6af01b3c0de978 Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Thu, 26 Mar 2026 14:05:38 -0500 Subject: [PATCH 242/401] Revert "ALSA: hda/intel: Add MSI X870E Tomahawk to denylist" commit 30b3211aa2416 ("ALSA: hda/intel: Add MSI X870E Tomahawk to denylist") was added to silence a warning, but this effectively reintroduced commit df42ee7e22f03 ("ALSA: hda: Add ASRock X670E Taichi to denylist") which was already reported to cause problems and reverted in commit ee8f1613596ad ("Revert "ALSA: hda: Add ASRock X670E Taichi to denylist"") Revert it yet again. Cc: stable@vger.kernel.org Reported-by: Juhyun Song Closes: https://bugzilla.kernel.org/show_bug.cgi?id=221274 Cc: Stuart Hayhurst Signed-off-by: Mario Limonciello Link: https://patch.msgid.link/20260326190542.524515-1-mario.limonciello@amd.com Signed-off-by: Takashi Iwai --- sound/hda/controllers/intel.c | 1 - 1 file changed, 1 deletion(-) diff --git a/sound/hda/controllers/intel.c b/sound/hda/controllers/intel.c index 3f434994c18d..2edbcab597c8 100644 --- a/sound/hda/controllers/intel.c +++ b/sound/hda/controllers/intel.c @@ -2077,7 +2077,6 @@ static const struct pci_device_id driver_denylist[] = { { PCI_DEVICE_SUB(0x1022, 0x1487, 0x1043, 0x874f) }, /* ASUS ROG Zenith II / Strix */ { PCI_DEVICE_SUB(0x1022, 0x1487, 0x1462, 0xcb59) }, /* MSI TRX40 Creator */ { PCI_DEVICE_SUB(0x1022, 0x1487, 0x1462, 0xcb60) }, /* MSI TRX40 */ - { PCI_DEVICE_SUB(0x1022, 0x15e3, 0x1462, 0xee59) }, /* MSI X870E Tomahawk WiFi */ {} }; From 1f6ee9be92f8df85a8c9a5a78c20fd39c0c21a95 Mon Sep 17 00:00:00 2001 From: Fei Lv Date: Mon, 22 Jul 2024 18:14:43 +0800 Subject: [PATCH 243/401] ovl: make fsync after metadata copy-up opt-in mount option Commit 7d6899fb69d25 ("ovl: fsync after metadata copy-up") was done to fix durability of overlayfs copy up on an upper filesystem which does not enforce ordering on storing of metadata changes (e.g. ubifs). In an earlier revision of the regressing commit by Lei Lv, the metadata fsync behavior was opt-in via a new "fsync=strict" mount option. We were hoping that the opt-in mount option could be avoided, so the change was only made to depend on metacopy=off, in the hope of not hurting performance of metadata heavy workloads, which are more likely to be using metacopy=on. This hope was proven wrong by a performance regression report from Google COS workload after upgrade to kernel 6.12. This is an adaptation of Lei's original "fsync=strict" mount option to the existing upstream code. The new mount option is mutually exclusive with the "volatile" mount option, so the latter is now an alias to the "fsync=volatile" mount option. Reported-by: Chenglong Tang Closes: https://lore.kernel.org/linux-unionfs/CAOdxtTadAFH01Vui1FvWfcmQ8jH1O45owTzUcpYbNvBxnLeM7Q@mail.gmail.com/ Link: https://lore.kernel.org/linux-unionfs/CAOQ4uxgKC1SgjMWre=fUb00v8rxtd6sQi-S+dxR8oDzAuiGu8g@mail.gmail.com/ Fixes: 7d6899fb69d25 ("ovl: fsync after metadata copy-up") Depends: 50e638beb67e0 ("ovl: Use str_on_off() helper in ovl_show_options()") Cc: stable@vger.kernel.org # v6.12+ Signed-off-by: Fei Lv Signed-off-by: Amir Goldstein --- Documentation/filesystems/overlayfs.rst | 50 +++++++++++++++++++++++++ fs/overlayfs/copy_up.c | 6 +-- fs/overlayfs/overlayfs.h | 21 +++++++++++ fs/overlayfs/ovl_entry.h | 7 +--- fs/overlayfs/params.c | 33 +++++++++++++--- fs/overlayfs/super.c | 2 +- 6 files changed, 104 insertions(+), 15 deletions(-) diff --git a/Documentation/filesystems/overlayfs.rst b/Documentation/filesystems/overlayfs.rst index af5a69f87da4..eb846518e6ac 100644 --- a/Documentation/filesystems/overlayfs.rst +++ b/Documentation/filesystems/overlayfs.rst @@ -783,6 +783,56 @@ controlled by the "uuid" mount option, which supports these values: mounted with "uuid=on". +Durability and copy up +---------------------- + +The fsync(2) system call ensures that the data and metadata of a file +are safely written to the backing storage, which is expected to +guarantee the existence of the information post system crash. + +Without an fsync(2) call, there is no guarantee that the observed +data after a system crash will be either the old or the new data, but +in practice, the observed data after crash is often the old or new data +or a mix of both. + +When an overlayfs file is modified for the first time, copy up will +create a copy of the lower file and its parent directories in the upper +layer. Since the Linux filesystem API does not enforce any particular +ordering on storing changes without explicit fsync(2) calls, in case +of a system crash, the upper file could end up with no data at all +(i.e. zeros), which would be an unusual outcome. To avoid this +experience, overlayfs calls fsync(2) on the upper file before completing +data copy up with rename(2) or link(2) to make the copy up "atomic". + +By default, overlayfs does not explicitly call fsync(2) on copied up +directories or on metadata-only copy up, so it provides no guarantee to +persist the user's modification unless the user calls fsync(2). +The fsync during copy up only guarantees that if a copy up is observed +after a crash, the observed data is not zeroes or intermediate values +from the copy up staging area. + +On traditional local filesystems with a single journal (e.g. ext4, xfs), +fsync on a file also persists the parent directory changes, because they +are usually modified in the same transaction, so metadata durability during +data copy up effectively comes for free. Overlayfs further limits risk by +disallowing network filesystems as upper layer. + +Overlayfs can be tuned to prefer performance or durability when storing +to the underlying upper layer. This is controlled by the "fsync" mount +option, which supports these values: + +- "auto": (default) + Call fsync(2) on upper file before completion of data copy up. + No explicit fsync(2) on directory or metadata-only copy up. +- "strict": + Call fsync(2) on upper file and directories before completion of any + copy up. +- "volatile": [*] + Prefer performance over durability (see `Volatile mount`_) + +[*] The mount option "volatile" is an alias to "fsync=volatile". + + Volatile mount -------------- diff --git a/fs/overlayfs/copy_up.c b/fs/overlayfs/copy_up.c index 758611ee4475..13cb60b52bd6 100644 --- a/fs/overlayfs/copy_up.c +++ b/fs/overlayfs/copy_up.c @@ -1146,15 +1146,15 @@ static int ovl_copy_up_one(struct dentry *parent, struct dentry *dentry, return -EOVERFLOW; /* - * With metacopy disabled, we fsync after final metadata copyup, for + * With "fsync=strict", we fsync after final metadata copyup, for * both regular files and directories to get atomic copyup semantics * on filesystems that do not use strict metadata ordering (e.g. ubifs). * - * With metacopy enabled we want to avoid fsync on all meta copyup + * By default, we want to avoid fsync on all meta copyup, because * that will hurt performance of workloads such as chown -R, so we * only fsync on data copyup as legacy behavior. */ - ctx.metadata_fsync = !OVL_FS(dentry->d_sb)->config.metacopy && + ctx.metadata_fsync = ovl_should_sync_metadata(OVL_FS(dentry->d_sb)) && (S_ISREG(ctx.stat.mode) || S_ISDIR(ctx.stat.mode)); ctx.metacopy = ovl_need_meta_copy_up(dentry, ctx.stat.mode, flags); diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h index cad2055ebf18..63b299bf12f7 100644 --- a/fs/overlayfs/overlayfs.h +++ b/fs/overlayfs/overlayfs.h @@ -99,6 +99,12 @@ enum { OVL_VERITY_REQUIRE, }; +enum { + OVL_FSYNC_VOLATILE, + OVL_FSYNC_AUTO, + OVL_FSYNC_STRICT, +}; + /* * The tuple (fh,uuid) is a universal unique identifier for a copy up origin, * where: @@ -656,6 +662,21 @@ static inline bool ovl_xino_warn(struct ovl_fs *ofs) return ofs->config.xino == OVL_XINO_ON; } +static inline bool ovl_should_sync(struct ovl_fs *ofs) +{ + return ofs->config.fsync_mode != OVL_FSYNC_VOLATILE; +} + +static inline bool ovl_should_sync_metadata(struct ovl_fs *ofs) +{ + return ofs->config.fsync_mode == OVL_FSYNC_STRICT; +} + +static inline bool ovl_is_volatile(struct ovl_config *config) +{ + return config->fsync_mode == OVL_FSYNC_VOLATILE; +} + /* * To avoid regressions in existing setups with overlay lower offline changes, * we allow lower changes only if none of the new features are used. diff --git a/fs/overlayfs/ovl_entry.h b/fs/overlayfs/ovl_entry.h index 1d4828dbcf7a..80cad4ea96a3 100644 --- a/fs/overlayfs/ovl_entry.h +++ b/fs/overlayfs/ovl_entry.h @@ -18,7 +18,7 @@ struct ovl_config { int xino; bool metacopy; bool userxattr; - bool ovl_volatile; + int fsync_mode; }; struct ovl_sb { @@ -120,11 +120,6 @@ static inline struct ovl_fs *OVL_FS(struct super_block *sb) return (struct ovl_fs *)sb->s_fs_info; } -static inline bool ovl_should_sync(struct ovl_fs *ofs) -{ - return !ofs->config.ovl_volatile; -} - static inline unsigned int ovl_numlower(struct ovl_entry *oe) { return oe ? oe->__numlower : 0; diff --git a/fs/overlayfs/params.c b/fs/overlayfs/params.c index 8111b437ae5d..c93fcaa45d4a 100644 --- a/fs/overlayfs/params.c +++ b/fs/overlayfs/params.c @@ -58,6 +58,7 @@ enum ovl_opt { Opt_xino, Opt_metacopy, Opt_verity, + Opt_fsync, Opt_volatile, Opt_override_creds, }; @@ -140,6 +141,23 @@ static int ovl_verity_mode_def(void) return OVL_VERITY_OFF; } +static const struct constant_table ovl_parameter_fsync[] = { + { "volatile", OVL_FSYNC_VOLATILE }, + { "auto", OVL_FSYNC_AUTO }, + { "strict", OVL_FSYNC_STRICT }, + {} +}; + +static const char *ovl_fsync_mode(struct ovl_config *config) +{ + return ovl_parameter_fsync[config->fsync_mode].name; +} + +static int ovl_fsync_mode_def(void) +{ + return OVL_FSYNC_AUTO; +} + const struct fs_parameter_spec ovl_parameter_spec[] = { fsparam_string_empty("lowerdir", Opt_lowerdir), fsparam_file_or_string("lowerdir+", Opt_lowerdir_add), @@ -155,6 +173,7 @@ const struct fs_parameter_spec ovl_parameter_spec[] = { fsparam_enum("xino", Opt_xino, ovl_parameter_xino), fsparam_enum("metacopy", Opt_metacopy, ovl_parameter_bool), fsparam_enum("verity", Opt_verity, ovl_parameter_verity), + fsparam_enum("fsync", Opt_fsync, ovl_parameter_fsync), fsparam_flag("volatile", Opt_volatile), fsparam_flag_no("override_creds", Opt_override_creds), {} @@ -665,8 +684,11 @@ static int ovl_parse_param(struct fs_context *fc, struct fs_parameter *param) case Opt_verity: config->verity_mode = result.uint_32; break; + case Opt_fsync: + config->fsync_mode = result.uint_32; + break; case Opt_volatile: - config->ovl_volatile = true; + config->fsync_mode = OVL_FSYNC_VOLATILE; break; case Opt_userxattr: config->userxattr = true; @@ -800,6 +822,7 @@ int ovl_init_fs_context(struct fs_context *fc) ofs->config.nfs_export = ovl_nfs_export_def; ofs->config.xino = ovl_xino_def(); ofs->config.metacopy = ovl_metacopy_def; + ofs->config.fsync_mode = ovl_fsync_mode_def(); fc->s_fs_info = ofs; fc->fs_private = ctx; @@ -870,9 +893,9 @@ int ovl_fs_params_verify(const struct ovl_fs_context *ctx, config->index = false; } - if (!config->upperdir && config->ovl_volatile) { + if (!config->upperdir && ovl_is_volatile(config)) { pr_info("option \"volatile\" is meaningless in a non-upper mount, ignoring it.\n"); - config->ovl_volatile = false; + config->fsync_mode = ovl_fsync_mode_def(); } if (!config->upperdir && config->uuid == OVL_UUID_ON) { @@ -1070,8 +1093,8 @@ int ovl_show_options(struct seq_file *m, struct dentry *dentry) seq_printf(m, ",xino=%s", ovl_xino_mode(&ofs->config)); if (ofs->config.metacopy != ovl_metacopy_def) seq_printf(m, ",metacopy=%s", str_on_off(ofs->config.metacopy)); - if (ofs->config.ovl_volatile) - seq_puts(m, ",volatile"); + if (ofs->config.fsync_mode != ovl_fsync_mode_def()) + seq_printf(m, ",fsync=%s", ovl_fsync_mode(&ofs->config)); if (ofs->config.userxattr) seq_puts(m, ",userxattr"); if (ofs->config.verity_mode != ovl_verity_mode_def()) diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c index d4c12feec039..0822987cfb51 100644 --- a/fs/overlayfs/super.c +++ b/fs/overlayfs/super.c @@ -776,7 +776,7 @@ static int ovl_make_workdir(struct super_block *sb, struct ovl_fs *ofs, * For volatile mount, create a incompat/volatile/dirty file to keep * track of it. */ - if (ofs->config.ovl_volatile) { + if (ovl_is_volatile(&ofs->config)) { err = ovl_create_volatile_dirty(ofs); if (err < 0) { pr_err("Failed to create volatile/dirty file.\n"); From 5e67ba9bb531e1ec6599a82a065dea9040b9ce50 Mon Sep 17 00:00:00 2001 From: Pengpeng Hou Date: Wed, 25 Mar 2026 15:41:52 +0800 Subject: [PATCH 244/401] net/ipv6: ioam6: prevent schema length wraparound in trace fill ioam6_fill_trace_data() stores the schema contribution to the trace length in a u8. With bit 22 enabled and the largest schema payload, sclen becomes 1 + 1020 / 4, wraps from 256 to 0, and bypasses the remaining-space check. __ioam6_fill_trace_data() then positions the write cursor without reserving the schema area but still copies the 4-byte schema header and the full schema payload, overrunning the trace buffer. Keep sclen in an unsigned int so the remaining-space check and the write cursor calculation both see the full schema length. Fixes: 8c6f6fa67726 ("ipv6: ioam: IOAM Generic Netlink API") Signed-off-by: Pengpeng Hou Reviewed-by: Justin Iurman Signed-off-by: David S. Miller --- net/ipv6/ioam6.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/ipv6/ioam6.c b/net/ipv6/ioam6.c index b76f89d92e7b..3978773bec42 100644 --- a/net/ipv6/ioam6.c +++ b/net/ipv6/ioam6.c @@ -708,7 +708,7 @@ static void __ioam6_fill_trace_data(struct sk_buff *skb, struct ioam6_namespace *ns, struct ioam6_trace_hdr *trace, struct ioam6_schema *sc, - u8 sclen, bool is_input) + unsigned int sclen, bool is_input) { struct net_device *dev = skb_dst_dev(skb); struct timespec64 ts; @@ -939,7 +939,7 @@ void ioam6_fill_trace_data(struct sk_buff *skb, bool is_input) { struct ioam6_schema *sc; - u8 sclen = 0; + unsigned int sclen = 0; /* Skip if Overflow flag is set */ From bb417456c7814d1493d98b7dd9c040bf3ce3b4ed Mon Sep 17 00:00:00 2001 From: Thomas Bogendoerfer Date: Wed, 25 Mar 2026 12:20:53 +0100 Subject: [PATCH 245/401] tg3: Fix race for querying speed/duplex When driver signals carrier up via netif_carrier_on() its internal link_up state isn't updated immediately. This leads to inconsistent speed/duplex in /proc/net/bonding/bondX where the speed and duplex is shown as unknown while ethtool shows correct values. Fix this by using netif_carrier_ok() for link checking in get_ksettings function. Fixes: 84421b99cedc ("tg3: Update link_up flag for phylib devices") Signed-off-by: Thomas Bogendoerfer Reviewed-by: Pavan Chebbi Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/tg3.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/broadcom/tg3.c b/drivers/net/ethernet/broadcom/tg3.c index 21a5dd342724..73a4b569b03e 100644 --- a/drivers/net/ethernet/broadcom/tg3.c +++ b/drivers/net/ethernet/broadcom/tg3.c @@ -12299,7 +12299,7 @@ static int tg3_get_link_ksettings(struct net_device *dev, ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.advertising, advertising); - if (netif_running(dev) && tp->link_up) { + if (netif_running(dev) && netif_carrier_ok(dev)) { cmd->base.speed = tp->link_config.active_speed; cmd->base.duplex = tp->link_config.active_duplex; ethtool_convert_legacy_u32_to_link_mode( From 5597dd284ff8c556c0b00f6a34473677426e3f81 Mon Sep 17 00:00:00 2001 From: David Carlier Date: Wed, 25 Mar 2026 12:51:30 +0000 Subject: [PATCH 246/401] net: ti: icssg-prueth: fix missing data copy and wrong recycle in ZC RX dispatch emac_dispatch_skb_zc() allocates a new skb via napi_alloc_skb() but never copies the packet data from the XDP buffer into it. The skb is passed up the stack containing uninitialized heap memory instead of the actual received packet, leaking kernel heap contents to userspace. Copy the received packet data from the XDP buffer into the skb using skb_copy_to_linear_data(). Additionally, remove the skb_mark_for_recycle() call since the skb is backed by the NAPI page frag allocator, not page_pool. Marking a non-page_pool skb for recycle causes the free path to return pages to a page_pool that does not own them, corrupting page_pool state. The non-ZC path (emac_rx_packet) does not have these issues because it uses napi_build_skb() to wrap the existing page_pool page directly, requiring no copy, and correctly marks for recycle since the page comes from page_pool_dev_alloc_pages(). Fixes: 7a64bb388df3 ("net: ti: icssg-prueth: Add AF_XDP zero copy for RX") Signed-off-by: David Carlier Reviewed-by: Simon Horman Signed-off-by: David S. Miller --- drivers/net/ethernet/ti/icssg/icssg_common.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/ti/icssg/icssg_common.c b/drivers/net/ethernet/ti/icssg/icssg_common.c index fd4e7622f123..a28a608f9bf4 100644 --- a/drivers/net/ethernet/ti/icssg/icssg_common.c +++ b/drivers/net/ethernet/ti/icssg/icssg_common.c @@ -902,6 +902,7 @@ static void emac_dispatch_skb_zc(struct prueth_emac *emac, struct xdp_buff *xdp, skb_reserve(skb, headroom); skb_put(skb, pkt_len); + skb_copy_to_linear_data(skb, xdp->data, pkt_len); skb->dev = ndev; /* RX HW timestamp */ @@ -912,7 +913,6 @@ static void emac_dispatch_skb_zc(struct prueth_emac *emac, struct xdp_buff *xdp, skb->offload_fwd_mark = emac->offload_fwd_mark; skb->protocol = eth_type_trans(skb, ndev); - skb_mark_for_recycle(skb); napi_gro_receive(&emac->napi_rx, skb); ndev->stats.rx_bytes += pkt_len; ndev->stats.rx_packets++; From f88e2e748a1fc3cb4b8d163a9be790812f578850 Mon Sep 17 00:00:00 2001 From: Stefan Eichenberger Date: Wed, 18 Feb 2026 16:08:49 +0100 Subject: [PATCH 247/401] i2c: imx: fix i2c issue when reading multiple messages When reading multiple messages, meaning a repeated start is required, polling the bus busy bit must be avoided. This must only be done for the last message. Otherwise, the driver will timeout. Here an example of such a sequence that fails with an error: i2ctransfer -y -a 0 w1@0x00 0x02 r1 w1@0x00 0x02 r1 Error: Sending messages failed: Connection timed out Fixes: 5f5c2d4579ca ("i2c: imx: prevent rescheduling in non dma mode") Cc: stable@vger.kernel.org # v6.13+ Signed-off-by: Stefan Eichenberger Reviewed-by: Frank Li Signed-off-by: Andi Shyti Link: https://lore.kernel.org/r/20260218150940.131354-2-eichest@gmail.com --- drivers/i2c/busses/i2c-imx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/i2c/busses/i2c-imx.c b/drivers/i2c/busses/i2c-imx.c index 85f554044cf1..56e2a14495a9 100644 --- a/drivers/i2c/busses/i2c-imx.c +++ b/drivers/i2c/busses/i2c-imx.c @@ -1522,7 +1522,7 @@ static int i2c_imx_read(struct imx_i2c_struct *i2c_imx, struct i2c_msg *msgs, dev_err(&i2c_imx->adapter.dev, "<%s> read timedout\n", __func__); return -ETIMEDOUT; } - if (!i2c_imx->stopped) + if (i2c_imx->is_lastmsg && !i2c_imx->stopped) return i2c_imx_bus_busy(i2c_imx, 0, false); return 0; From 13101db735bdb29c5f60e95fb578690bd178b30f Mon Sep 17 00:00:00 2001 From: Stefan Eichenberger Date: Wed, 18 Feb 2026 16:08:50 +0100 Subject: [PATCH 248/401] i2c: imx: ensure no clock is generated after last read When reading from the I2DR register, right after releasing the bus by clearing MSTA and MTX, the I2C controller might still generate an additional clock cycle which can cause devices to misbehave. Ensure to only read from I2DR after the bus is not busy anymore. Because this requires polling, the read of the last byte is moved outside of the interrupt handler. An example for such a failing transfer is this: i2ctransfer -y -a 0 w1@0x00 0x02 r1 Error: Sending messages failed: Connection timed out It does not happen with every device because not all devices react to the additional clock cycle. Fixes: 5f5c2d4579ca ("i2c: imx: prevent rescheduling in non dma mode") Cc: stable@vger.kernel.org # v6.13+ Signed-off-by: Stefan Eichenberger Signed-off-by: Andi Shyti Link: https://lore.kernel.org/r/20260218150940.131354-3-eichest@gmail.com --- drivers/i2c/busses/i2c-imx.c | 51 ++++++++++++++++++++++-------------- 1 file changed, 32 insertions(+), 19 deletions(-) diff --git a/drivers/i2c/busses/i2c-imx.c b/drivers/i2c/busses/i2c-imx.c index 56e2a14495a9..452d120a210b 100644 --- a/drivers/i2c/busses/i2c-imx.c +++ b/drivers/i2c/busses/i2c-imx.c @@ -1018,8 +1018,9 @@ static inline int i2c_imx_isr_read(struct imx_i2c_struct *i2c_imx) return 0; } -static inline void i2c_imx_isr_read_continue(struct imx_i2c_struct *i2c_imx) +static inline enum imx_i2c_state i2c_imx_isr_read_continue(struct imx_i2c_struct *i2c_imx) { + enum imx_i2c_state next_state = IMX_I2C_STATE_READ_CONTINUE; unsigned int temp; if ((i2c_imx->msg->len - 1) == i2c_imx->msg_buf_idx) { @@ -1033,18 +1034,20 @@ static inline void i2c_imx_isr_read_continue(struct imx_i2c_struct *i2c_imx) i2c_imx->stopped = 1; temp &= ~(I2CR_MSTA | I2CR_MTX); imx_i2c_write_reg(temp, i2c_imx, IMX_I2C_I2CR); - } else { - /* - * For i2c master receiver repeat restart operation like: - * read -> repeat MSTA -> read/write - * The controller must set MTX before read the last byte in - * the first read operation, otherwise the first read cost - * one extra clock cycle. - */ - temp = imx_i2c_read_reg(i2c_imx, IMX_I2C_I2CR); - temp |= I2CR_MTX; - imx_i2c_write_reg(temp, i2c_imx, IMX_I2C_I2CR); + + return IMX_I2C_STATE_DONE; } + /* + * For i2c master receiver repeat restart operation like: + * read -> repeat MSTA -> read/write + * The controller must set MTX before read the last byte in + * the first read operation, otherwise the first read cost + * one extra clock cycle. + */ + temp = imx_i2c_read_reg(i2c_imx, IMX_I2C_I2CR); + temp |= I2CR_MTX; + imx_i2c_write_reg(temp, i2c_imx, IMX_I2C_I2CR); + next_state = IMX_I2C_STATE_DONE; } else if (i2c_imx->msg_buf_idx == (i2c_imx->msg->len - 2)) { temp = imx_i2c_read_reg(i2c_imx, IMX_I2C_I2CR); temp |= I2CR_TXAK; @@ -1052,6 +1055,7 @@ static inline void i2c_imx_isr_read_continue(struct imx_i2c_struct *i2c_imx) } i2c_imx->msg->buf[i2c_imx->msg_buf_idx++] = imx_i2c_read_reg(i2c_imx, IMX_I2C_I2DR); + return next_state; } static inline void i2c_imx_isr_read_block_data_len(struct imx_i2c_struct *i2c_imx) @@ -1088,11 +1092,9 @@ static irqreturn_t i2c_imx_master_isr(struct imx_i2c_struct *i2c_imx, unsigned i break; case IMX_I2C_STATE_READ_CONTINUE: - i2c_imx_isr_read_continue(i2c_imx); - if (i2c_imx->msg_buf_idx == i2c_imx->msg->len) { - i2c_imx->state = IMX_I2C_STATE_DONE; + i2c_imx->state = i2c_imx_isr_read_continue(i2c_imx); + if (i2c_imx->state == IMX_I2C_STATE_DONE) wake_up(&i2c_imx->queue); - } break; case IMX_I2C_STATE_READ_BLOCK_DATA: @@ -1490,6 +1492,7 @@ static int i2c_imx_read(struct imx_i2c_struct *i2c_imx, struct i2c_msg *msgs, bool is_lastmsg) { int block_data = msgs->flags & I2C_M_RECV_LEN; + int ret = 0; dev_dbg(&i2c_imx->adapter.dev, "<%s> write slave address: addr=0x%x\n", @@ -1522,10 +1525,20 @@ static int i2c_imx_read(struct imx_i2c_struct *i2c_imx, struct i2c_msg *msgs, dev_err(&i2c_imx->adapter.dev, "<%s> read timedout\n", __func__); return -ETIMEDOUT; } - if (i2c_imx->is_lastmsg && !i2c_imx->stopped) - return i2c_imx_bus_busy(i2c_imx, 0, false); + if (i2c_imx->is_lastmsg) { + if (!i2c_imx->stopped) + ret = i2c_imx_bus_busy(i2c_imx, 0, false); + /* + * Only read the last byte of the last message after the bus is + * not busy. Else the controller generates another clock which + * might confuse devices. + */ + if (!ret) + i2c_imx->msg->buf[i2c_imx->msg_buf_idx++] = imx_i2c_read_reg(i2c_imx, + IMX_I2C_I2DR); + } - return 0; + return ret; } static int i2c_imx_xfer_common(struct i2c_adapter *adapter, From e2f1ada8e089dd5a331bcd8b88125ae2af8d188f Mon Sep 17 00:00:00 2001 From: Pratap Nirujogi Date: Fri, 20 Mar 2026 16:12:22 -0400 Subject: [PATCH 249/401] i2c: designware: amdisp: Fix resume-probe race condition issue Identified resume-probe race condition in kernel v7.0 with the commit 38fa29b01a6a ("i2c: designware: Combine the init functions"),but this issue existed from the beginning though not detected. The amdisp i2c device requires ISP to be in power-on state for probe to succeed. To meet this requirement, this device is added to genpd to control ISP power using runtime PM. The pm_runtime_get_sync() called before i2c_dw_probe() triggers PM resume, which powers on ISP and also invokes the amdisp i2c runtime resume before the probe completes resulting in this race condition and a NULL dereferencing issue in v7.0 Fix this race condition by using the genpd APIs directly during probe: - Call dev_pm_genpd_resume() to Power ON ISP before probe - Call dev_pm_genpd_suspend() to Power OFF ISP after probe - Set the device to suspended state with pm_runtime_set_suspended() - Enable runtime PM only after the device is fully initialized Fixes: d6263c468a761 ("i2c: amd-isp: Add ISP i2c-designware driver") Co-developed-by: Bin Du Signed-off-by: Bin Du Signed-off-by: Pratap Nirujogi Cc: # v6.16+ Acked-by: Mika Westerberg Reviewed-by: Mario Limonciello (AMD) Reviewed-by: Andy Shevchenko Signed-off-by: Andi Shyti Link: https://lore.kernel.org/r/20260320201302.3490570-1-pratap.nirujogi@amd.com --- drivers/i2c/busses/i2c-designware-amdisp.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/drivers/i2c/busses/i2c-designware-amdisp.c b/drivers/i2c/busses/i2c-designware-amdisp.c index c48728ad9f6f..9f0ec0fae6f2 100644 --- a/drivers/i2c/busses/i2c-designware-amdisp.c +++ b/drivers/i2c/busses/i2c-designware-amdisp.c @@ -7,6 +7,7 @@ #include #include +#include #include #include @@ -76,22 +77,20 @@ static int amd_isp_dw_i2c_plat_probe(struct platform_device *pdev) device_enable_async_suspend(&pdev->dev); - pm_runtime_enable(&pdev->dev); - pm_runtime_get_sync(&pdev->dev); - + dev_pm_genpd_resume(&pdev->dev); ret = i2c_dw_probe(isp_i2c_dev); if (ret) { dev_err_probe(&pdev->dev, ret, "i2c_dw_probe failed\n"); goto error_release_rpm; } - - pm_runtime_put_sync(&pdev->dev); + dev_pm_genpd_suspend(&pdev->dev); + pm_runtime_set_suspended(&pdev->dev); + pm_runtime_enable(&pdev->dev); return 0; error_release_rpm: amd_isp_dw_i2c_plat_pm_cleanup(isp_i2c_dev); - pm_runtime_put_sync(&pdev->dev); return ret; } From 73ff3916d803f7ca3a4325af649e46ff89d6c3a7 Mon Sep 17 00:00:00 2001 From: Zhang Heng Date: Fri, 27 Mar 2026 18:12:15 +0800 Subject: [PATCH 250/401] ALSA: hda/realtek: change quirk for HP OmniBook 7 Laptop 16-bh0xxx HP OmniBook 7 Laptop 16-bh0xxx has the same PCI subsystem ID 0x103c8e60, and the ALC245 on it needs this quirk to control the mute LED. Link: https://bugzilla.kernel.org/show_bug.cgi?id=221214 Cc: Tested-by: Artem S. Tashkinov Signed-off-by: Zhang Heng Link: https://patch.msgid.link/20260327101215.481108-1-zhangheng@kylinos.cn Signed-off-by: Takashi Iwai --- sound/hda/codecs/realtek/alc269.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/sound/hda/codecs/realtek/alc269.c b/sound/hda/codecs/realtek/alc269.c index b6a58852752a..6787e54fcfe6 100644 --- a/sound/hda/codecs/realtek/alc269.c +++ b/sound/hda/codecs/realtek/alc269.c @@ -4122,6 +4122,7 @@ enum { ALC233_FIXUP_LENOVO_GPIO2_MIC_HOTKEY, ALC245_FIXUP_BASS_HP_DAC, ALC245_FIXUP_ACER_MICMUTE_LED, + ALC245_FIXUP_CS35L41_I2C_2_MUTE_LED, }; /* A special fixup for Lenovo C940 and Yoga Duet 7; @@ -6651,6 +6652,12 @@ static const struct hda_fixup alc269_fixups[] = { .v.func = alc285_fixup_hp_coef_micmute_led, .chained = true, .chain_id = ALC2XX_FIXUP_HEADSET_MIC, + }, + [ALC245_FIXUP_CS35L41_I2C_2_MUTE_LED] = { + .type = HDA_FIXUP_FUNC, + .v.func = alc245_fixup_hp_mute_led_coefbit, + .chained = true, + .chain_id = ALC287_FIXUP_CS35L41_I2C_2, } }; @@ -7177,7 +7184,7 @@ static const struct hda_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x103c, 0x8e37, "HP 16 Piston OmniBook X", ALC287_FIXUP_CS35L41_I2C_2), SND_PCI_QUIRK(0x103c, 0x8e3a, "HP Agusta", ALC287_FIXUP_CS35L41_I2C_2), SND_PCI_QUIRK(0x103c, 0x8e3b, "HP Agusta", ALC287_FIXUP_CS35L41_I2C_2), - SND_PCI_QUIRK(0x103c, 0x8e60, "HP Trekker ", ALC287_FIXUP_CS35L41_I2C_2), + SND_PCI_QUIRK(0x103c, 0x8e60, "HP OmniBook 7 Laptop 16-bh0xxx", ALC245_FIXUP_CS35L41_I2C_2_MUTE_LED), SND_PCI_QUIRK(0x103c, 0x8e61, "HP Trekker ", ALC287_FIXUP_CS35L41_I2C_2), SND_PCI_QUIRK(0x103c, 0x8e62, "HP Trekker ", ALC287_FIXUP_CS35L41_I2C_2), SND_PCI_QUIRK(0x103c, 0x8e8a, "HP NexusX", ALC245_FIXUP_HP_TAS2781_I2C_MUTE_LED), From fffca572f9ca51607f180a37d0c898404c8f9112 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Thu, 26 Mar 2026 15:06:31 +0100 Subject: [PATCH 251/401] mpage: Provide variant of mpage_writepages() with own optional folio handler Some filesystems need to treat some folios specially (for example for inodes with inline data). Doing the handling in their .writepages method in a race-free manner results in duplicating some of the writeback internals. So provide generalized version of mpage_writepages() that allows filesystem to provide a handler called for each folio which can handle the folio in a special way. Reviewed-by: Christoph Hellwig Link: https://patch.msgid.link/20260326140635.15895-3-jack@suse.cz Signed-off-by: Jan Kara --- fs/mpage.c | 30 ++++++++++++++++++++++++------ include/linux/mpage.h | 11 +++++++++-- 2 files changed, 33 insertions(+), 8 deletions(-) diff --git a/fs/mpage.c b/fs/mpage.c index 7dae5afc2b9e..b3d9f231a04a 100644 --- a/fs/mpage.c +++ b/fs/mpage.c @@ -646,17 +646,24 @@ static int mpage_write_folio(struct writeback_control *wbc, struct folio *folio, } /** - * mpage_writepages - walk the list of dirty pages of the given address space & writepage() all of them + * __mpage_writepages - walk the list of dirty pages of the given address space + * & writepage() all of them * @mapping: address space structure to write * @wbc: subtract the number of written pages from *@wbc->nr_to_write * @get_block: the filesystem's block mapper function. + * @write_folio: handler to call for each folio before calling + * mpage_write_folio() * * This is a library function, which implements the writepages() - * address_space_operation. + * address_space_operation. It calls @write_folio handler for each folio. If + * the handler returns value > 0, it calls mpage_write_folio() to do the + * folio writeback. */ int -mpage_writepages(struct address_space *mapping, - struct writeback_control *wbc, get_block_t get_block) +__mpage_writepages(struct address_space *mapping, + struct writeback_control *wbc, get_block_t get_block, + int (*write_folio)(struct folio *folio, + struct writeback_control *wbc)) { struct mpage_data mpd = { .get_block = get_block, @@ -666,11 +673,22 @@ mpage_writepages(struct address_space *mapping, int error; blk_start_plug(&plug); - while ((folio = writeback_iter(mapping, wbc, folio, &error))) + while ((folio = writeback_iter(mapping, wbc, folio, &error))) { + if (write_folio) { + error = write_folio(folio, wbc); + /* + * == 0 means folio is handled, < 0 means error. In + * both cases hand back control to writeback_iter() + */ + if (error <= 0) + continue; + /* Let mpage_write_folio() handle the folio. */ + } error = mpage_write_folio(wbc, folio, &mpd); + } if (mpd.bio) mpage_bio_submit_write(mpd.bio); blk_finish_plug(&plug); return error; } -EXPORT_SYMBOL(mpage_writepages); +EXPORT_SYMBOL(__mpage_writepages); diff --git a/include/linux/mpage.h b/include/linux/mpage.h index 1bdc39daac0a..358946990bfa 100644 --- a/include/linux/mpage.h +++ b/include/linux/mpage.h @@ -17,7 +17,14 @@ struct readahead_control; void mpage_readahead(struct readahead_control *, get_block_t get_block); int mpage_read_folio(struct folio *folio, get_block_t get_block); -int mpage_writepages(struct address_space *mapping, - struct writeback_control *wbc, get_block_t get_block); +int __mpage_writepages(struct address_space *mapping, + struct writeback_control *wbc, get_block_t get_block, + int (*write_folio)(struct folio *folio, + struct writeback_control *wbc)); +static inline int mpage_writepages(struct address_space *mapping, + struct writeback_control *wbc, get_block_t get_block) +{ + return __mpage_writepages(mapping, wbc, get_block, NULL); +} #endif From 102e57d56f81fa5c5ed78f576101d1bf1b3e6fe2 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Thu, 26 Mar 2026 15:06:32 +0100 Subject: [PATCH 252/401] udf: Fix race between file type conversion and writeback udf_setsize() can race with udf_writepages() as follows: udf_setsize() udf_writepages() if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_IN_ICB) err = udf_expand_file_adinicb(inode); err = udf_extend_file(inode, newsize); udf_adinicb_writepages() memcpy_from_file_folio() - crash because inode size is too big. Fix the problem by checking the file type under folio lock in udf_handle_page_wb() handler called from __mpage_writepages() which properly serializes with udf_expand_file_adinicb(). Reported-by: Jianzhou Zhao Link: https://lore.kernel.org/all/f622c01.67ac.19cdbdd777d.Coremail.luckd0g@163.com Reviewed-by: Christoph Hellwig Link: https://patch.msgid.link/20260326140635.15895-4-jack@suse.cz Signed-off-by: Jan Kara --- fs/udf/inode.c | 33 +++++++++++++++------------------ 1 file changed, 15 insertions(+), 18 deletions(-) diff --git a/fs/udf/inode.c b/fs/udf/inode.c index 7fae8002344a..23e894092dab 100644 --- a/fs/udf/inode.c +++ b/fs/udf/inode.c @@ -181,22 +181,23 @@ static void udf_write_failed(struct address_space *mapping, loff_t to) } } -static int udf_adinicb_writepages(struct address_space *mapping, - struct writeback_control *wbc) +static int udf_handle_page_wb(struct folio *folio, + struct writeback_control *wbc) { - struct inode *inode = mapping->host; + struct inode *inode = folio->mapping->host; struct udf_inode_info *iinfo = UDF_I(inode); - struct folio *folio = NULL; - int error = 0; - while ((folio = writeback_iter(mapping, wbc, folio, &error))) { - BUG_ON(!folio_test_locked(folio)); - BUG_ON(folio->index != 0); - memcpy_from_file_folio(iinfo->i_data + iinfo->i_lenEAttr, folio, + /* + * Inodes in the normal format are handled by the generic code. This + * check is race-free as the folio lock protects us from inode type + * conversion. + */ + if (iinfo->i_alloc_type != ICBTAG_FLAG_AD_IN_ICB) + return 1; + + memcpy_from_file_folio(iinfo->i_data + iinfo->i_lenEAttr, folio, 0, i_size_read(inode)); - folio_unlock(folio); - } - + folio_unlock(folio); mark_inode_dirty(inode); return 0; } @@ -204,12 +205,8 @@ static int udf_adinicb_writepages(struct address_space *mapping, static int udf_writepages(struct address_space *mapping, struct writeback_control *wbc) { - struct inode *inode = mapping->host; - struct udf_inode_info *iinfo = UDF_I(inode); - - if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_IN_ICB) - return udf_adinicb_writepages(mapping, wbc); - return mpage_writepages(mapping, wbc, udf_get_block_wb); + return __mpage_writepages(mapping, wbc, udf_get_block_wb, + udf_handle_page_wb); } static void udf_adinicb_read_folio(struct folio *folio) From 1f9885732248d22f788e4992c739a98c88ab8a55 Mon Sep 17 00:00:00 2001 From: Luo Haiyang Date: Thu, 26 Mar 2026 14:19:53 +0800 Subject: [PATCH 253/401] tracing: Fix potential deadlock in cpu hotplug with osnoise The following sequence may leads deadlock in cpu hotplug: task1 task2 task3 ----- ----- ----- mutex_lock(&interface_lock) [CPU GOING OFFLINE] cpus_write_lock(); osnoise_cpu_die(); kthread_stop(task3); wait_for_completion(); osnoise_sleep(); mutex_lock(&interface_lock); cpus_read_lock(); [DEAD LOCK] Fix by swap the order of cpus_read_lock() and mutex_lock(&interface_lock). Cc: stable@vger.kernel.org Cc: Cc: Cc: Cc: Fixes: bce29ac9ce0bb ("trace: Add osnoise tracer") Link: https://patch.msgid.link/20260326141953414bVSj33dAYktqp9Oiyizq8@zte.com.cn Reviewed-by: Masami Hiramatsu (Google) Signed-off-by: Luo Haiyang Signed-off-by: Steven Rostedt (Google) --- kernel/trace/trace_osnoise.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/kernel/trace/trace_osnoise.c b/kernel/trace/trace_osnoise.c index dee610e465b9..be6cf0bb3c03 100644 --- a/kernel/trace/trace_osnoise.c +++ b/kernel/trace/trace_osnoise.c @@ -2073,8 +2073,8 @@ static void osnoise_hotplug_workfn(struct work_struct *dummy) if (!osnoise_has_registered_instances()) return; - guard(mutex)(&interface_lock); guard(cpus_read_lock)(); + guard(mutex)(&interface_lock); if (!cpu_online(cpu)) return; @@ -2237,11 +2237,11 @@ static ssize_t osnoise_options_write(struct file *filp, const char __user *ubuf, if (running) stop_per_cpu_kthreads(); - mutex_lock(&interface_lock); /* * avoid CPU hotplug operations that might read options. */ cpus_read_lock(); + mutex_lock(&interface_lock); retval = cnt; @@ -2257,8 +2257,8 @@ static ssize_t osnoise_options_write(struct file *filp, const char __user *ubuf, clear_bit(option, &osnoise_options); } - cpus_read_unlock(); mutex_unlock(&interface_lock); + cpus_read_unlock(); if (running) start_per_cpu_kthreads(); @@ -2345,16 +2345,16 @@ osnoise_cpus_write(struct file *filp, const char __user *ubuf, size_t count, if (running) stop_per_cpu_kthreads(); - mutex_lock(&interface_lock); /* * osnoise_cpumask is read by CPU hotplug operations. */ cpus_read_lock(); + mutex_lock(&interface_lock); cpumask_copy(&osnoise_cpumask, osnoise_cpumask_new); - cpus_read_unlock(); mutex_unlock(&interface_lock); + cpus_read_unlock(); if (running) start_per_cpu_kthreads(); From e5046823f8fa3677341b541a25af2fcb99a5b1e0 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Wed, 25 Mar 2026 20:29:20 -0700 Subject: [PATCH 254/401] lib/crypto: chacha: Zeroize permuted_state before it leaves scope Since the ChaCha permutation is invertible, the local variable 'permuted_state' is sufficient to compute the original 'state', and thus the key, even after the permutation has been done. While the kernel is quite inconsistent about zeroizing secrets on the stack (and some prominent userspace crypto libraries don't bother at all since it's not guaranteed to work anyway), the kernel does try to do it as a best practice, especially in cases involving the RNG. Thus, explicitly zeroize 'permuted_state' before it goes out of scope. Fixes: c08d0e647305 ("crypto: chacha20 - Add a generic ChaCha20 stream cipher implementation") Cc: stable@vger.kernel.org Acked-by: Ard Biesheuvel Link: https://lore.kernel.org/r/20260326032920.39408-1-ebiggers@kernel.org Signed-off-by: Eric Biggers --- lib/crypto/chacha-block-generic.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/lib/crypto/chacha-block-generic.c b/lib/crypto/chacha-block-generic.c index 77f68de71066..4a6d627580cb 100644 --- a/lib/crypto/chacha-block-generic.c +++ b/lib/crypto/chacha-block-generic.c @@ -87,6 +87,8 @@ void chacha_block_generic(struct chacha_state *state, &out[i * sizeof(u32)]); state->x[12]++; + + chacha_zeroize_state(&permuted_state); } EXPORT_SYMBOL(chacha_block_generic); @@ -110,5 +112,7 @@ void hchacha_block_generic(const struct chacha_state *state, memcpy(&out[0], &permuted_state.x[0], 16); memcpy(&out[4], &permuted_state.x[12], 16); + + chacha_zeroize_state(&permuted_state); } EXPORT_SYMBOL(hchacha_block_generic); From aad885e774966e97b675dfe928da164214a71605 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Thu, 5 Mar 2026 17:28:04 -0800 Subject: [PATCH 255/401] KVM: x86/mmu: Drop/zap existing present SPTE even when creating an MMIO SPTE When installing an emulated MMIO SPTE, do so *after* dropping/zapping the existing SPTE (if it's shadow-present). While commit a54aa15c6bda3 was right about it being impossible to convert a shadow-present SPTE to an MMIO SPTE due to a _guest_ write, it failed to account for writes to guest memory that are outside the scope of KVM. E.g. if host userspace modifies a shadowed gPTE to switch from a memslot to emulted MMIO and then the guest hits a relevant page fault, KVM will install the MMIO SPTE without first zapping the shadow-present SPTE. ------------[ cut here ]------------ is_shadow_present_pte(*sptep) WARNING: arch/x86/kvm/mmu/mmu.c:484 at mark_mmio_spte+0xb2/0xc0 [kvm], CPU#0: vmx_ept_stale_r/4292 Modules linked in: kvm_intel kvm irqbypass CPU: 0 UID: 1000 PID: 4292 Comm: vmx_ept_stale_r Not tainted 7.0.0-rc2-eafebd2d2ab0-sink-vm #319 PREEMPT Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 0.0.0 02/06/2015 RIP: 0010:mark_mmio_spte+0xb2/0xc0 [kvm] Call Trace: mmu_set_spte+0x237/0x440 [kvm] ept_page_fault+0x535/0x7f0 [kvm] kvm_mmu_do_page_fault+0xee/0x1f0 [kvm] kvm_mmu_page_fault+0x8d/0x620 [kvm] vmx_handle_exit+0x18c/0x5a0 [kvm_intel] kvm_arch_vcpu_ioctl_run+0xc55/0x1c20 [kvm] kvm_vcpu_ioctl+0x2d5/0x980 [kvm] __x64_sys_ioctl+0x8a/0xd0 do_syscall_64+0xb5/0x730 entry_SYSCALL_64_after_hwframe+0x4b/0x53 RIP: 0033:0x47fa3f ---[ end trace 0000000000000000 ]--- Reported-by: Alexander Bulekov Debugged-by: Alexander Bulekov Suggested-by: Fred Griffoul Fixes: a54aa15c6bda3 ("KVM: x86/mmu: Handle MMIO SPTEs directly in mmu_set_spte()") Cc: stable@vger.kernel.org Signed-off-by: Sean Christopherson --- arch/x86/kvm/mmu/mmu.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index b922a8b00057..98406d6aa2d6 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -3044,12 +3044,6 @@ static int mmu_set_spte(struct kvm_vcpu *vcpu, struct kvm_memory_slot *slot, bool prefetch = !fault || fault->prefetch; bool write_fault = fault && fault->write; - if (unlikely(is_noslot_pfn(pfn))) { - vcpu->stat.pf_mmio_spte_created++; - mark_mmio_spte(vcpu, sptep, gfn, pte_access); - return RET_PF_EMULATE; - } - if (is_shadow_present_pte(*sptep)) { if (prefetch && is_last_spte(*sptep, level) && pfn == spte_to_pfn(*sptep)) @@ -3073,6 +3067,14 @@ static int mmu_set_spte(struct kvm_vcpu *vcpu, struct kvm_memory_slot *slot, was_rmapped = 1; } + if (unlikely(is_noslot_pfn(pfn))) { + vcpu->stat.pf_mmio_spte_created++; + mark_mmio_spte(vcpu, sptep, gfn, pte_access); + if (flush) + kvm_flush_remote_tlbs_gfn(vcpu->kvm, gfn, level); + return RET_PF_EMULATE; + } + wrprot = make_spte(vcpu, sp, slot, pte_access, gfn, pfn, *sptep, prefetch, false, host_writable, &spte); From df83746075778958954aa0460cca55f4b3fc9c02 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Thu, 5 Mar 2026 17:42:14 -0800 Subject: [PATCH 256/401] KVM: x86/mmu: Only WARN in direct MMUs when overwriting shadow-present SPTE Adjust KVM's sanity check against overwriting a shadow-present SPTE with a another SPTE with a different target PFN to only apply to direct MMUs, i.e. only to MMUs without shadowed gPTEs. While it's impossible for KVM to overwrite a shadow-present SPTE in response to a guest write, writes from outside the scope of KVM, e.g. from host userspace, aren't detected by KVM's write tracking and so can break KVM's shadow paging rules. ------------[ cut here ]------------ pfn != spte_to_pfn(*sptep) WARNING: arch/x86/kvm/mmu/mmu.c:3069 at mmu_set_spte+0x1e4/0x440 [kvm], CPU#0: vmx_ept_stale_r/872 Modules linked in: kvm_intel kvm irqbypass CPU: 0 UID: 1000 PID: 872 Comm: vmx_ept_stale_r Not tainted 7.0.0-rc2-eafebd2d2ab0-sink-vm #319 PREEMPT Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 0.0.0 02/06/2015 RIP: 0010:mmu_set_spte+0x1e4/0x440 [kvm] Call Trace: ept_page_fault+0x535/0x7f0 [kvm] kvm_mmu_do_page_fault+0xee/0x1f0 [kvm] kvm_mmu_page_fault+0x8d/0x620 [kvm] vmx_handle_exit+0x18c/0x5a0 [kvm_intel] kvm_arch_vcpu_ioctl_run+0xc55/0x1c20 [kvm] kvm_vcpu_ioctl+0x2d5/0x980 [kvm] __x64_sys_ioctl+0x8a/0xd0 do_syscall_64+0xb5/0x730 entry_SYSCALL_64_after_hwframe+0x4b/0x53 ---[ end trace 0000000000000000 ]--- Fixes: 11d45175111d ("KVM: x86/mmu: Warn if PFN changes on shadow-present SPTE in shadow MMU") Cc: stable@vger.kernel.org Signed-off-by: Sean Christopherson --- arch/x86/kvm/mmu/mmu.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index 98406d6aa2d6..dd06453d5b72 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -3060,7 +3060,8 @@ static int mmu_set_spte(struct kvm_vcpu *vcpu, struct kvm_memory_slot *slot, child = spte_to_child_sp(pte); drop_parent_pte(vcpu->kvm, child, sptep); flush = true; - } else if (WARN_ON_ONCE(pfn != spte_to_pfn(*sptep))) { + } else if (pfn != spte_to_pfn(*sptep)) { + WARN_ON_ONCE(vcpu->arch.mmu->root_role.direct); drop_spte(vcpu->kvm, sptep); flush = true; } else From c5c0a268b38adffbb2e70e6957017537ff54c157 Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Thu, 26 Mar 2026 14:38:44 +0100 Subject: [PATCH 257/401] s390/barrier: Make array_index_mask_nospec() __always_inline Mark array_index_mask_nospec() as __always_inline to guarantee the mitigation is emitted inline regardless of compiler inlining decisions. Fixes: e2dd833389cc ("s390: add optimized array_index_mask_nospec") Cc: stable@kernel.org Reviewed-by: Ilya Leoshkevich Signed-off-by: Vasily Gorbik --- arch/s390/include/asm/barrier.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/s390/include/asm/barrier.h b/arch/s390/include/asm/barrier.h index f3184073e754..dad02f5b3c8d 100644 --- a/arch/s390/include/asm/barrier.h +++ b/arch/s390/include/asm/barrier.h @@ -62,8 +62,8 @@ do { \ * @size: number of elements in array */ #define array_index_mask_nospec array_index_mask_nospec -static inline unsigned long array_index_mask_nospec(unsigned long index, - unsigned long size) +static __always_inline unsigned long array_index_mask_nospec(unsigned long index, + unsigned long size) { unsigned long mask; From 48b8814e25d073dd84daf990a879a820bad2bcbd Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Tue, 24 Mar 2026 17:34:05 +0100 Subject: [PATCH 258/401] s390/syscalls: Add spectre boundary for syscall dispatch table The s390 syscall number is directly controlled by userspace, but does not have an array_index_nospec() boundary to prevent access past the syscall function pointer tables. Cc: Heiko Carstens Cc: Vasily Gorbik Cc: Alexander Gordeev Cc: Christian Borntraeger Cc: Sven Schnelle Cc: Arnd Bergmann Fixes: 56e62a737028 ("s390: convert to generic entry") Cc: stable@kernel.org Assisted-by: gkh_clanker_2000 Signed-off-by: Greg Kroah-Hartman Reviewed-by: Vasily Gorbik Link: https://lore.kernel.org/r/2026032404-sterling-swoosh-43e6@gregkh Signed-off-by: Vasily Gorbik --- arch/s390/kernel/syscall.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/arch/s390/kernel/syscall.c b/arch/s390/kernel/syscall.c index 795b6cca74c9..d103c853e120 100644 --- a/arch/s390/kernel/syscall.c +++ b/arch/s390/kernel/syscall.c @@ -13,6 +13,7 @@ */ #include +#include #include #include #include @@ -131,8 +132,10 @@ void noinstr __do_syscall(struct pt_regs *regs, int per_trap) if (unlikely(test_and_clear_pt_regs_flag(regs, PIF_SYSCALL_RET_SET))) goto out; regs->gprs[2] = -ENOSYS; - if (likely(nr < NR_syscalls)) + if (likely(nr < NR_syscalls)) { + nr = array_index_nospec(nr, NR_syscalls); regs->gprs[2] = sys_call_table[nr](regs); + } out: syscall_exit_to_user_mode(regs); } From 0738d395aab8fae3b5a3ad3fc640630c91693c27 Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Thu, 26 Mar 2026 19:50:14 +0100 Subject: [PATCH 259/401] s390/entry: Scrub r12 register on kernel entry Before commit f33f2d4c7c80 ("s390/bp: remove TIF_ISOLATE_BP"), all entry handlers loaded r12 with the current task pointer (lg %r12,__LC_CURRENT) for use by the BPENTER/BPEXIT macros. That commit removed TIF_ISOLATE_BP, dropping both the branch prediction macros and the r12 load, but did not add r12 to the register clearing sequence. Add the missing xgr %r12,%r12 to make the register scrub consistent across all entry points. Fixes: f33f2d4c7c80 ("s390/bp: remove TIF_ISOLATE_BP") Cc: stable@kernel.org Reviewed-by: Ilya Leoshkevich Signed-off-by: Vasily Gorbik --- arch/s390/kernel/entry.S | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S index 4873fe9d891b..689d253e1afc 100644 --- a/arch/s390/kernel/entry.S +++ b/arch/s390/kernel/entry.S @@ -271,6 +271,7 @@ SYM_CODE_START(system_call) xgr %r9,%r9 xgr %r10,%r10 xgr %r11,%r11 + xgr %r12,%r12 la %r2,STACK_FRAME_OVERHEAD(%r15) # pointer to pt_regs mvc __PT_R8(64,%r2),__LC_SAVE_AREA(%r13) MBEAR %r2,%r13 @@ -407,6 +408,7 @@ SYM_CODE_START(\name) xgr %r6,%r6 xgr %r7,%r7 xgr %r10,%r10 + xgr %r12,%r12 xc __PT_FLAGS(8,%r11),__PT_FLAGS(%r11) mvc __PT_R8(64,%r11),__LC_SAVE_AREA(%r13) MBEAR %r11,%r13 @@ -496,6 +498,7 @@ SYM_CODE_START(mcck_int_handler) xgr %r6,%r6 xgr %r7,%r7 xgr %r10,%r10 + xgr %r12,%r12 stmg %r8,%r9,__PT_PSW(%r11) xc __PT_FLAGS(8,%r11),__PT_FLAGS(%r11) xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15) From 82c4f23d2757c2fc6751a5805c1feecdd4c430fb Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Thu, 19 Feb 2026 10:22:18 -0500 Subject: [PATCH 260/401] Update MAINTAINERS file to add reviewers for ext4 Signed-off-by: Theodore Ts'o Reviewed-by: Ritesh Harjani (IBM) Reviewed-by: Ojaswin Mujoo --- MAINTAINERS | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index 96ea84948d76..bf2a5ef7aaeb 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -9611,7 +9611,12 @@ F: include/linux/ext2* EXT4 FILE SYSTEM M: "Theodore Ts'o" -M: Andreas Dilger +R: Andreas Dilger +R: Baokun Li +R: Jan Kara +R: Ojaswin Mujoo +R: Ritesh Harjani (IBM) +R: Zhang Yi L: linux-ext4@vger.kernel.org S: Maintained W: http://ext4.wiki.kernel.org From 84e21e3fb8fd99ea460eb7274584750d11cf3e9f Mon Sep 17 00:00:00 2001 From: Zhang Yi Date: Sat, 31 Jan 2026 17:11:56 +0800 Subject: [PATCH 261/401] ext4: do not check fast symlink during orphan recovery Commit '5f920d5d6083 ("ext4: verify fast symlink length")' causes the generic/475 test to fail during orphan cleanup of zero-length symlinks. generic/475 84s ... _check_generic_filesystem: filesystem on /dev/vde is inconsistent The fsck reports are provided below: Deleted inode 9686 has zero dtime. Deleted inode 158230 has zero dtime. ... Inode bitmap differences: -9686 -158230 Orphan file (inode 12) block 13 is not clean. Failed to initialize orphan file. In ext4_symlink(), a newly created symlink can be added to the orphan list due to ENOSPC. Its data has not been initialized, and its size is zero. Therefore, we need to disregard the length check of the symbolic link when cleaning up orphan inodes. Instead, we should ensure that the nlink count is zero. Fixes: 5f920d5d6083 ("ext4: verify fast symlink length") Signed-off-by: Zhang Yi Reviewed-by: Jan Kara Link: https://patch.msgid.link/20260131091156.1733648-1-yi.zhang@huaweicloud.com Signed-off-by: Theodore Ts'o Cc: stable@kernel.org --- fs/ext4/inode.c | 40 +++++++++++++++++++++++++++++----------- 1 file changed, 29 insertions(+), 11 deletions(-) diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 396dc3a5d16b..af6d1759c8de 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -5401,18 +5401,36 @@ struct inode *__ext4_iget(struct super_block *sb, unsigned long ino, inode->i_op = &ext4_encrypted_symlink_inode_operations; } else if (ext4_inode_is_fast_symlink(inode)) { inode->i_op = &ext4_fast_symlink_inode_operations; - if (inode->i_size == 0 || - inode->i_size >= sizeof(ei->i_data) || - strnlen((char *)ei->i_data, inode->i_size + 1) != - inode->i_size) { - ext4_error_inode(inode, function, line, 0, - "invalid fast symlink length %llu", - (unsigned long long)inode->i_size); - ret = -EFSCORRUPTED; - goto bad_inode; + + /* + * Orphan cleanup can see inodes with i_size == 0 + * and i_data uninitialized. Skip size checks in + * that case. This is safe because the first thing + * ext4_evict_inode() does for fast symlinks is + * clearing of i_data and i_size. + */ + if ((EXT4_SB(sb)->s_mount_state & EXT4_ORPHAN_FS)) { + if (inode->i_nlink != 0) { + ext4_error_inode(inode, function, line, 0, + "invalid orphan symlink nlink %d", + inode->i_nlink); + ret = -EFSCORRUPTED; + goto bad_inode; + } + } else { + if (inode->i_size == 0 || + inode->i_size >= sizeof(ei->i_data) || + strnlen((char *)ei->i_data, inode->i_size + 1) != + inode->i_size) { + ext4_error_inode(inode, function, line, 0, + "invalid fast symlink length %llu", + (unsigned long long)inode->i_size); + ret = -EFSCORRUPTED; + goto bad_inode; + } + inode_set_cached_link(inode, (char *)ei->i_data, + inode->i_size); } - inode_set_cached_link(inode, (char *)ei->i_data, - inode->i_size); } else { inode->i_op = &ext4_symlink_inode_operations; } From 86ab3e55673a7a49a841838776f1ab18d23a67b5 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 26 Mar 2026 20:26:08 +0000 Subject: [PATCH 262/401] ipv6: icmp: clear skb2->cb[] in ip6_err_gen_icmpv6_unreach() Sashiko AI-review observed: In ip6_err_gen_icmpv6_unreach(), the skb is an outer IPv4 ICMP error packet where its cb contains an IPv4 inet_skb_parm. When skb is cloned into skb2 and passed to icmp6_send(), it uses IP6CB(skb2). IP6CB interprets the IPv4 inet_skb_parm as an inet6_skb_parm. The cipso offset in inet_skb_parm.opt directly overlaps with dsthao in inet6_skb_parm at offset 18. If an attacker sends a forged ICMPv4 error with a CIPSO IP option, dsthao would be a non-zero offset. Inside icmp6_send(), mip6_addr_swap() is called and uses ipv6_find_tlv(skb, opt->dsthao, IPV6_TLV_HAO). This would scan the inner, attacker-controlled IPv6 packet starting at that offset, potentially returning a fake TLV without checking if the remaining packet length can hold the full 18-byte struct ipv6_destopt_hao. Could mip6_addr_swap() then perform a 16-byte swap that extends past the end of the packet data into skb_shared_info? Should the cb array also be cleared in ip6_err_gen_icmpv6_unreach() and ip6ip6_err() to prevent this? This patch implements the first suggestion. I am not sure if ip6ip6_err() needs to be changed. A separate patch would be better anyway. Fixes: ca15a078bd90 ("sit: generate icmpv6 error when receiving icmpv4 error") Reported-by: Ido Schimmel Closes: https://sashiko.dev/#/patchset/20260326155138.2429480-1-edumazet%40google.com Signed-off-by: Eric Dumazet Cc: Oskar Kjos Reviewed-by: Ido Schimmel Link: https://patch.msgid.link/20260326202608.2976021-1-edumazet@google.com Signed-off-by: Jakub Kicinski --- net/ipv6/icmp.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index 813d2e9edb8b..d5d23a9296ea 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -875,6 +875,9 @@ int ip6_err_gen_icmpv6_unreach(struct sk_buff *skb, int nhs, int type, if (!skb2) return 1; + /* Remove debris left by IPv4 stack. */ + memset(IP6CB(skb2), 0, sizeof(*IP6CB(skb2))); + skb_dst_drop(skb2); skb_pull(skb2, nhs); skb_reset_network_header(skb2); From 2edfa31769a4add828a7e604b21cb82aaaa05925 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 26 Mar 2026 15:51:38 +0000 Subject: [PATCH 263/401] ip6_tunnel: clear skb2->cb[] in ip4ip6_err() Oskar Kjos reported the following problem. ip4ip6_err() calls icmp_send() on a cloned skb whose cb[] was written by the IPv6 receive path as struct inet6_skb_parm. icmp_send() passes IPCB(skb2) to __ip_options_echo(), which interprets that cb[] region as struct inet_skb_parm (IPv4). The layouts differ: inet6_skb_parm.nhoff at offset 14 overlaps inet_skb_parm.opt.rr, producing a non-zero rr value. __ip_options_echo() then reads optlen from attacker-controlled packet data at sptr[rr+1] and copies that many bytes into dopt->__data, a fixed 40-byte stack buffer (IP_OPTIONS_DATA_FIXED_SIZE). To fix this we clear skb2->cb[], as suggested by Oskar Kjos. Also add minimal IPv4 header validation (version == 4, ihl >= 5). Fixes: c4d3efafcc93 ("[IPV6] IP6TUNNEL: Add support to IPv4 over IPv6 tunnel.") Reported-by: Oskar Kjos Signed-off-by: Eric Dumazet Reviewed-by: Ido Schimmel Link: https://patch.msgid.link/20260326155138.2429480-1-edumazet@google.com Signed-off-by: Jakub Kicinski --- net/ipv6/ip6_tunnel.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index 4c29aa94e86e..0b53488a9229 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -601,11 +601,16 @@ ip4ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, if (!skb2) return 0; + /* Remove debris left by IPv6 stack. */ + memset(IPCB(skb2), 0, sizeof(*IPCB(skb2))); + skb_dst_drop(skb2); skb_pull(skb2, offset); skb_reset_network_header(skb2); eiph = ip_hdr(skb2); + if (eiph->version != 4 || eiph->ihl < 5) + goto out; /* Try to guess incoming interface */ rt = ip_route_output_ports(dev_net(skb->dev), &fl4, NULL, eiph->saddr, From f4a2b42e78914ff15630e71289adc589c3a8eb45 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Thu, 5 Feb 2026 10:22:24 +0100 Subject: [PATCH 264/401] ext4: fix stale xarray tags after writeback There are cases where ext4_bio_write_page() gets called for a page which has no buffers to submit. This happens e.g. when the part of the file is actually a hole, when we cannot allocate blocks due to being called from jbd2, or in data=journal mode when checkpointing writes the buffers earlier. In these cases we just return from ext4_bio_write_page() however if the page didn't need redirtying, we will leave stale DIRTY and/or TOWRITE tags in xarray because those get cleared only in __folio_start_writeback(). As a result we can leave these tags set in mappings even after a final sync on filesystem that's getting remounted read-only or that's being frozen. Various assertions can then get upset when writeback is started on such filesystems (Gerald reported assertion in ext4_journal_check_start() firing). Fix the problem by cycling the page through writeback state even if we decide nothing needs to be written for it so that xarray tags get properly updated. This is slightly silly (we could update the xarray tags directly) but I don't think a special helper messing with xarray tags is really worth it in this relatively rare corner case. Reported-by: Gerald Yang Link: https://lore.kernel.org/all/20260128074515.2028982-1-gerald.yang@canonical.com Fixes: dff4ac75eeee ("ext4: move keep_towrite handling to ext4_bio_write_page()") Signed-off-by: Jan Kara Link: https://patch.msgid.link/20260205092223.21287-2-jack@suse.cz Signed-off-by: Theodore Ts'o Cc: stable@kernel.org --- fs/ext4/page-io.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c index a8c95eee91b7..39fe50b3c662 100644 --- a/fs/ext4/page-io.c +++ b/fs/ext4/page-io.c @@ -524,9 +524,15 @@ int ext4_bio_write_folio(struct ext4_io_submit *io, struct folio *folio, nr_to_submit++; } while ((bh = bh->b_this_page) != head); - /* Nothing to submit? Just unlock the folio... */ - if (!nr_to_submit) + if (!nr_to_submit) { + /* + * We have nothing to submit. Just cycle the folio through + * writeback state to properly update xarray tags. + */ + __folio_start_writeback(folio, keep_towrite); + folio_end_writeback(folio); return 0; + } bh = head = folio_buffers(folio); From ed9356a30e59c7cc3198e7fc46cfedf3767b9b17 Mon Sep 17 00:00:00 2001 From: Deepanshu Kartikey Date: Sat, 7 Feb 2026 10:06:07 +0530 Subject: [PATCH 265/401] ext4: convert inline data to extents when truncate exceeds inline size Add a check in ext4_setattr() to convert files from inline data storage to extent-based storage when truncate() grows the file size beyond the inline capacity. This prevents the filesystem from entering an inconsistent state where the inline data flag is set but the file size exceeds what can be stored inline. Without this fix, the following sequence causes a kernel BUG_ON(): 1. Mount filesystem with inode that has inline flag set and small size 2. truncate(file, 50MB) - grows size but inline flag remains set 3. sendfile() attempts to write data 4. ext4_write_inline_data() hits BUG_ON(write_size > inline_capacity) The crash occurs because ext4_write_inline_data() expects inline storage to accommodate the write, but the actual inline capacity (~60 bytes for i_block + ~96 bytes for xattrs) is far smaller than the file size and write request. The fix checks if the new size from setattr exceeds the inode's actual inline capacity (EXT4_I(inode)->i_inline_size) and converts the file to extent-based storage before proceeding with the size change. This addresses the root cause by ensuring the inline data flag and file size remain consistent during truncate operations. Reported-by: syzbot+7de5fe447862fc37576f@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=7de5fe447862fc37576f Tested-by: syzbot+7de5fe447862fc37576f@syzkaller.appspotmail.com Signed-off-by: Deepanshu Kartikey Link: https://patch.msgid.link/20260207043607.1175976-1-kartikey406@gmail.com Signed-off-by: Theodore Ts'o Cc: stable@kernel.org --- fs/ext4/inode.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index af6d1759c8de..252191632f56 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -5867,6 +5867,18 @@ int ext4_setattr(struct mnt_idmap *idmap, struct dentry *dentry, if (attr->ia_size == inode->i_size) inc_ivers = false; + /* + * If file has inline data but new size exceeds inline capacity, + * convert to extent-based storage first to prevent inconsistent + * state (inline flag set but size exceeds inline capacity). + */ + if (ext4_has_inline_data(inode) && + attr->ia_size > EXT4_I(inode)->i_inline_size) { + error = ext4_convert_inline_data(inode); + if (error) + goto err_out; + } + if (shrink) { if (ext4_should_order_data(inode)) { error = ext4_begin_ordered_truncate(inode, From b38c55320bf85a84a4f04803c57b261fc87e9b4b Mon Sep 17 00:00:00 2001 From: Dimitri Daskalakis Date: Tue, 24 Mar 2026 12:51:22 -0700 Subject: [PATCH 266/401] eth: fbnic: Account for page fragments when updating BDQ tail FBNIC supports fixed size buffers of 4K. When PAGE_SIZE > 4K, we fragment the page across multiple descriptors (FBNIC_BD_FRAG_COUNT). When refilling the BDQ, the correct number of entries are populated, but tail was only incremented by one. So on a system with 64K pages, HW would get one descriptor refilled for every 16 we populate. Additionally, we program the ring size in the HW when enabling the BDQ. This was not accounting for page fragments, so on systems with 64K pages, the HW used 1/16th of the ring. Fixes: 0cb4c0a13723 ("eth: fbnic: Implement Rx queue alloc/start/stop/free") Signed-off-by: Dimitri Daskalakis Link: https://patch.msgid.link/20260324195123.3486219-2-dimitri.daskalakis1@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/meta/fbnic/fbnic_txrx.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_txrx.c b/drivers/net/ethernet/meta/fbnic/fbnic_txrx.c index 9fb91d4f3971..9cd85a0d0c3a 100644 --- a/drivers/net/ethernet/meta/fbnic/fbnic_txrx.c +++ b/drivers/net/ethernet/meta/fbnic/fbnic_txrx.c @@ -927,7 +927,7 @@ static void fbnic_fill_bdq(struct fbnic_ring *bdq) /* Force DMA writes to flush before writing to tail */ dma_wmb(); - writel(i, bdq->doorbell); + writel(i * FBNIC_BD_FRAG_COUNT, bdq->doorbell); } } @@ -2564,7 +2564,7 @@ static void fbnic_enable_bdq(struct fbnic_ring *hpq, struct fbnic_ring *ppq) hpq->tail = 0; hpq->head = 0; - log_size = fls(hpq->size_mask); + log_size = fls(hpq->size_mask) + ilog2(FBNIC_BD_FRAG_COUNT); /* Store descriptor ring address and size */ fbnic_ring_wr32(hpq, FBNIC_QUEUE_BDQ_HPQ_BAL, lower_32_bits(hpq->dma)); @@ -2576,7 +2576,7 @@ static void fbnic_enable_bdq(struct fbnic_ring *hpq, struct fbnic_ring *ppq) if (!ppq->size_mask) goto write_ctl; - log_size = fls(ppq->size_mask); + log_size = fls(ppq->size_mask) + ilog2(FBNIC_BD_FRAG_COUNT); /* Add enabling of PPQ to BDQ control */ bdq_ctl |= FBNIC_QUEUE_BDQ_CTL_PPQ_ENABLE; From f3567dd428b264b3f06f881e5e85a738c7c910df Mon Sep 17 00:00:00 2001 From: Dimitri Daskalakis Date: Tue, 24 Mar 2026 12:51:23 -0700 Subject: [PATCH 267/401] eth: fbnic: Fix debugfs output for BDQ's with page frags The rings size_mask represents the number of pages, so we need to determine the number of page frags when dumping the descriptors. Fixes: df04373b0dab ("eth fbnic: Add debugfs hooks for tx/rx rings") Signed-off-by: Dimitri Daskalakis Link: https://patch.msgid.link/20260324195123.3486219-3-dimitri.daskalakis1@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/meta/fbnic/fbnic_debugfs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_debugfs.c b/drivers/net/ethernet/meta/fbnic/fbnic_debugfs.c index 08270db2dee8..3c4563c8f403 100644 --- a/drivers/net/ethernet/meta/fbnic/fbnic_debugfs.c +++ b/drivers/net/ethernet/meta/fbnic/fbnic_debugfs.c @@ -197,7 +197,7 @@ static int fbnic_dbg_bdq_desc_seq_show(struct seq_file *s, void *v) return 0; } - for (i = 0; i <= ring->size_mask; i++) { + for (i = 0; i < (ring->size_mask + 1) * FBNIC_BD_FRAG_COUNT; i++) { u64 bd = le64_to_cpu(ring->desc[i]); seq_printf(s, "%04x %#04llx %#014llx\n", i, From b1d682f1990c19fb1d5b97d13266210457092bcd Mon Sep 17 00:00:00 2001 From: Simon Weber Date: Sat, 7 Feb 2026 10:53:03 +0100 Subject: [PATCH 268/401] ext4: fix journal credit check when setting fscrypt context Fix an issue arising when ext4 features has_journal, ea_inode, and encrypt are activated simultaneously, leading to ENOSPC when creating an encrypted file. Fix by passing XATTR_CREATE flag to xattr_set_handle function if a handle is specified, i.e., when the function is called in the control flow of creating a new inode. This aligns the number of jbd2 credits set_handle checks for with the number allocated for creating a new inode. ext4_set_context must not be called with a non-null handle (fs_data) if fscrypt context xattr is not guaranteed to not exist yet. The only other usage of this function currently is when handling the ioctl FS_IOC_SET_ENCRYPTION_POLICY, which calls it with fs_data=NULL. Fixes: c1a5d5f6ab21eb7e ("ext4: improve journal credit handling in set xattr paths") Co-developed-by: Anthony Durrer Signed-off-by: Anthony Durrer Signed-off-by: Simon Weber Reviewed-by: Eric Biggers Link: https://patch.msgid.link/20260207100148.724275-4-simon.weber.39@gmail.com Signed-off-by: Theodore Ts'o Cc: stable@kernel.org --- fs/ext4/crypto.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/fs/ext4/crypto.c b/fs/ext4/crypto.c index cf0a0970c095..f41f320f4437 100644 --- a/fs/ext4/crypto.c +++ b/fs/ext4/crypto.c @@ -163,10 +163,17 @@ static int ext4_set_context(struct inode *inode, const void *ctx, size_t len, */ if (handle) { + /* + * Since the inode is new it is ok to pass the + * XATTR_CREATE flag. This is necessary to match the + * remaining journal credits check in the set_handle + * function with the credits allocated for the new + * inode. + */ res = ext4_xattr_set_handle(handle, inode, EXT4_XATTR_INDEX_ENCRYPTION, EXT4_XATTR_NAME_ENCRYPTION_CONTEXT, - ctx, len, 0); + ctx, len, XATTR_CREATE); if (!res) { ext4_set_inode_flag(inode, EXT4_INODE_ENCRYPT); ext4_clear_inode_state(inode, From bd060afa7cc3e0ad30afa9ecc544a78638498555 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Mon, 16 Feb 2026 17:48:43 +0100 Subject: [PATCH 269/401] ext4: make recently_deleted() properly work with lazy itable initialization recently_deleted() checks whether inode has been used in the near past. However this can give false positive result when inode table is not initialized yet and we are in fact comparing to random garbage (or stale itable block of a filesystem before mkfs). Ultimately this results in uninitialized inodes being skipped during inode allocation and possibly they are never initialized and thus e2fsck complains. Verify if the inode has been initialized before checking for dtime. Signed-off-by: Jan Kara Reviewed-by: Zhang Yi Link: https://patch.msgid.link/20260216164848.3074-3-jack@suse.cz Signed-off-by: Theodore Ts'o Cc: stable@kernel.org --- fs/ext4/ialloc.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c index b20a1bf866ab..b1bc1950c9f0 100644 --- a/fs/ext4/ialloc.c +++ b/fs/ext4/ialloc.c @@ -686,6 +686,12 @@ static int recently_deleted(struct super_block *sb, ext4_group_t group, int ino) if (unlikely(!gdp)) return 0; + /* Inode was never used in this filesystem? */ + if (ext4_has_group_desc_csum(sb) && + (gdp->bg_flags & cpu_to_le16(EXT4_BG_INODE_UNINIT) || + ino >= EXT4_INODES_PER_GROUP(sb) - ext4_itable_unused_count(sb, gdp))) + return 0; + bh = sb_find_get_block(sb, ext4_inode_table(sb, gdp) + (ino / inodes_per_block)); if (!bh || !buffer_uptodate(bh)) From 1308255bbf8452762f89f44f7447ce137ecdbcff Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Mon, 16 Feb 2026 17:48:44 +0100 Subject: [PATCH 270/401] ext4: fix fsync(2) for nojournal mode When inode metadata is changed, we sometimes just call ext4_mark_inode_dirty() to track modified metadata. This copies inode metadata into block buffer which is enough when we are journalling metadata. However when we are running in nojournal mode we currently fail to write the dirtied inode buffer during fsync(2) because the inode is not marked as dirty. Use explicit ext4_write_inode() call to make sure the inode table buffer is written to the disk. This is a band aid solution but proper solution requires a much larger rewrite including changes in metadata bh tracking infrastructure. Reported-by: Free Ekanayaka Link: https://lore.kernel.org/all/87il8nhxdm.fsf@x1.mail-host-address-is-not-set/ CC: stable@vger.kernel.org Signed-off-by: Jan Kara Reviewed-by: Zhang Yi Link: https://patch.msgid.link/20260216164848.3074-4-jack@suse.cz Signed-off-by: Theodore Ts'o Cc: stable@kernel.org --- fs/ext4/fsync.c | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/fs/ext4/fsync.c b/fs/ext4/fsync.c index e476c6de3074..bd8f230fa507 100644 --- a/fs/ext4/fsync.c +++ b/fs/ext4/fsync.c @@ -83,11 +83,23 @@ static int ext4_fsync_nojournal(struct file *file, loff_t start, loff_t end, int datasync, bool *needs_barrier) { struct inode *inode = file->f_inode; + struct writeback_control wbc = { + .sync_mode = WB_SYNC_ALL, + .nr_to_write = 0, + }; int ret; ret = generic_buffers_fsync_noflush(file, start, end, datasync); - if (!ret) - ret = ext4_sync_parent(inode); + if (ret) + return ret; + + /* Force writeout of inode table buffer to disk */ + ret = ext4_write_inode(inode, &wbc); + if (ret) + return ret; + + ret = ext4_sync_parent(inode); + if (test_opt(inode->i_sb, BARRIER)) *needs_barrier = true; From 356227096eb66e41b23caf7045e6304877322edf Mon Sep 17 00:00:00 2001 From: Yuto Ohnuki Date: Mon, 23 Feb 2026 12:33:46 +0000 Subject: [PATCH 271/401] ext4: replace BUG_ON with proper error handling in ext4_read_inline_folio Replace BUG_ON() with proper error handling when inline data size exceeds PAGE_SIZE. This prevents kernel panic and allows the system to continue running while properly reporting the filesystem corruption. The error is logged via ext4_error_inode(), the buffer head is released to prevent memory leak, and -EFSCORRUPTED is returned to indicate filesystem corruption. Signed-off-by: Yuto Ohnuki Link: https://patch.msgid.link/20260223123345.14838-2-ytohnuki@amazon.com Signed-off-by: Theodore Ts'o Cc: stable@kernel.org --- fs/ext4/inline.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/fs/ext4/inline.c b/fs/ext4/inline.c index 1f6bc05593df..408677fa8196 100644 --- a/fs/ext4/inline.c +++ b/fs/ext4/inline.c @@ -522,7 +522,15 @@ static int ext4_read_inline_folio(struct inode *inode, struct folio *folio) goto out; len = min_t(size_t, ext4_get_inline_size(inode), i_size_read(inode)); - BUG_ON(len > PAGE_SIZE); + + if (len > PAGE_SIZE) { + ext4_error_inode(inode, __func__, __LINE__, 0, + "inline size %zu exceeds PAGE_SIZE", len); + ret = -EFSCORRUPTED; + brelse(iloc.bh); + goto out; + } + kaddr = kmap_local_folio(folio, 0); ret = ext4_read_inline_data(inode, kaddr, len, &iloc); kaddr = folio_zero_tail(folio, len, kaddr + len); From 1aec30021edd410b986c156f195f3d23959a9d11 Mon Sep 17 00:00:00 2001 From: Li Chen Date: Wed, 25 Feb 2026 16:26:16 +0800 Subject: [PATCH 272/401] ext4: publish jinode after initialization ext4_inode_attach_jinode() publishes ei->jinode to concurrent users. It used to set ei->jinode before jbd2_journal_init_jbd_inode(), allowing a reader to observe a non-NULL jinode with i_vfs_inode still unset. The fast commit flush path can then pass this jinode to jbd2_wait_inode_data(), which dereferences i_vfs_inode->i_mapping and may crash. Below is the crash I observe: ``` BUG: unable to handle page fault for address: 000000010beb47f4 PGD 110e51067 P4D 110e51067 PUD 0 Oops: Oops: 0000 [#1] SMP NOPTI CPU: 1 UID: 0 PID: 4850 Comm: fc_fsync_bench_ Not tainted 6.18.0-00764-g795a690c06a5 #1 PREEMPT(voluntary) Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Arch Linux 1.17.0-2-2 04/01/2014 RIP: 0010:xas_find_marked+0x3d/0x2e0 Code: e0 03 48 83 f8 02 0f 84 f0 01 00 00 48 8b 47 08 48 89 c3 48 39 c6 0f 82 fd 01 00 00 48 85 c9 74 3d 48 83 f9 03 77 63 4c 8b 0f <49> 8b 71 08 48 c7 47 18 00 00 00 00 48 89 f1 83 e1 03 48 83 f9 02 RSP: 0018:ffffbbee806e7bf0 EFLAGS: 00010246 RAX: 000000000010beb4 RBX: 000000000010beb4 RCX: 0000000000000003 RDX: 0000000000000001 RSI: 0000002000300000 RDI: ffffbbee806e7c10 RBP: 0000000000000001 R08: 0000002000300000 R09: 000000010beb47ec R10: ffff9ea494590090 R11: 0000000000000000 R12: 0000002000300000 R13: ffffbbee806e7c90 R14: ffff9ea494513788 R15: ffffbbee806e7c88 FS: 00007fc2f9e3e6c0(0000) GS:ffff9ea6b1444000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 000000010beb47f4 CR3: 0000000119ac5000 CR4: 0000000000750ef0 PKRU: 55555554 Call Trace: filemap_get_folios_tag+0x87/0x2a0 __filemap_fdatawait_range+0x5f/0xd0 ? srso_alias_return_thunk+0x5/0xfbef5 ? __schedule+0x3e7/0x10c0 ? srso_alias_return_thunk+0x5/0xfbef5 ? srso_alias_return_thunk+0x5/0xfbef5 ? srso_alias_return_thunk+0x5/0xfbef5 ? preempt_count_sub+0x5f/0x80 ? srso_alias_return_thunk+0x5/0xfbef5 ? cap_safe_nice+0x37/0x70 ? srso_alias_return_thunk+0x5/0xfbef5 ? preempt_count_sub+0x5f/0x80 ? srso_alias_return_thunk+0x5/0xfbef5 filemap_fdatawait_range_keep_errors+0x12/0x40 ext4_fc_commit+0x697/0x8b0 ? ext4_file_write_iter+0x64b/0x950 ? srso_alias_return_thunk+0x5/0xfbef5 ? preempt_count_sub+0x5f/0x80 ? srso_alias_return_thunk+0x5/0xfbef5 ? vfs_write+0x356/0x480 ? srso_alias_return_thunk+0x5/0xfbef5 ? preempt_count_sub+0x5f/0x80 ext4_sync_file+0xf7/0x370 do_fsync+0x3b/0x80 ? syscall_trace_enter+0x108/0x1d0 __x64_sys_fdatasync+0x16/0x20 do_syscall_64+0x62/0x2c0 entry_SYSCALL_64_after_hwframe+0x76/0x7e ... ``` Fix this by initializing the jbd2_inode first. Use smp_wmb() and WRITE_ONCE() to publish ei->jinode after initialization. Readers use READ_ONCE() to fetch the pointer. Fixes: a361293f5fede ("jbd2: Fix oops in jbd2_journal_file_inode()") Cc: stable@vger.kernel.org Signed-off-by: Li Chen Reviewed-by: Jan Kara Link: https://patch.msgid.link/20260225082617.147957-1-me@linux.beauty Signed-off-by: Theodore Ts'o Cc: stable@kernel.org --- fs/ext4/fast_commit.c | 4 ++-- fs/ext4/inode.c | 15 +++++++++++---- 2 files changed, 13 insertions(+), 6 deletions(-) diff --git a/fs/ext4/fast_commit.c b/fs/ext4/fast_commit.c index f575751f1cae..6e949c21842d 100644 --- a/fs/ext4/fast_commit.c +++ b/fs/ext4/fast_commit.c @@ -975,13 +975,13 @@ static int ext4_fc_flush_data(journal_t *journal) int ret = 0; list_for_each_entry(ei, &sbi->s_fc_q[FC_Q_MAIN], i_fc_list) { - ret = jbd2_submit_inode_data(journal, ei->jinode); + ret = jbd2_submit_inode_data(journal, READ_ONCE(ei->jinode)); if (ret) return ret; } list_for_each_entry(ei, &sbi->s_fc_q[FC_Q_MAIN], i_fc_list) { - ret = jbd2_wait_inode_data(journal, ei->jinode); + ret = jbd2_wait_inode_data(journal, READ_ONCE(ei->jinode)); if (ret) return ret; } diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 252191632f56..ac5f3446c731 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -128,6 +128,8 @@ void ext4_inode_csum_set(struct inode *inode, struct ext4_inode *raw, static inline int ext4_begin_ordered_truncate(struct inode *inode, loff_t new_size) { + struct jbd2_inode *jinode = READ_ONCE(EXT4_I(inode)->jinode); + trace_ext4_begin_ordered_truncate(inode, new_size); /* * If jinode is zero, then we never opened the file for @@ -135,10 +137,10 @@ static inline int ext4_begin_ordered_truncate(struct inode *inode, * jbd2_journal_begin_ordered_truncate() since there's no * outstanding writes we need to flush. */ - if (!EXT4_I(inode)->jinode) + if (!jinode) return 0; return jbd2_journal_begin_ordered_truncate(EXT4_JOURNAL(inode), - EXT4_I(inode)->jinode, + jinode, new_size); } @@ -4451,8 +4453,13 @@ int ext4_inode_attach_jinode(struct inode *inode) spin_unlock(&inode->i_lock); return -ENOMEM; } - ei->jinode = jinode; - jbd2_journal_init_jbd_inode(ei->jinode, inode); + jbd2_journal_init_jbd_inode(jinode, inode); + /* + * Publish ->jinode only after it is fully initialized so that + * readers never observe a partially initialized jbd2_inode. + */ + smp_wmb(); + WRITE_ONCE(ei->jinode, jinode); jinode = NULL; } spin_unlock(&inode->i_lock); From afe376d2c1fa78c8a1063a357c6971bba3f6da91 Mon Sep 17 00:00:00 2001 From: "Ritesh Harjani (IBM)" Date: Sun, 1 Mar 2026 21:44:26 +0530 Subject: [PATCH 273/401] ext4: kunit: extents-test: lix percpu_counters list corruption commit 82f80e2e3b23 ("ext4: add extent status cache support to kunit tests"), added ext4_es_register_shrinker() in extents_kunit_init() function but failed to add the unregister shrinker routine in extents_kunit_exit(). This could cause the following percpu_counters list corruption bug. ok 1 split unwrit extent to 2 extents and convert 1st half writ slab kmalloc-4k start c0000002007ff000 pointer offset 1448 size 4096 list_add corruption. next->prev should be prev (c000000004bc9e60), but was 0000000000000000. (next=c0000002007ff5a8). ------------[ cut here ]------------ kernel BUG at lib/list_debug.c:29! cpu 0x2: Vector: 700 (Program Check) at [c000000241927a30] pc: c000000000f26ed0: __list_add_valid_or_report+0x120/0x164 lr: c000000000f26ecc: __list_add_valid_or_report+0x11c/0x164 sp: c000000241927cd0 msr: 800000000282b033 current = 0xc000000241215200 paca = 0xc0000003fffff300 irqmask: 0x03 irq_happened: 0x09 pid = 258, comm = kunit_try_catch kernel BUG at lib/list_debug.c:29! enter ? for help __percpu_counter_init_many+0x148/0x184 ext4_es_register_shrinker+0x74/0x23c extents_kunit_init+0x100/0x308 kunit_try_run_case+0x78/0x1f8 kunit_generic_run_threadfn_adapter+0x40/0x70 kthread+0x190/0x1a0 start_kernel_thread+0x14/0x18 2:mon> This happens because: extents_kunit_init(test N): ext4_es_register_shrinker(sbi) percpu_counters_init() x 4; // this adds 4 list nodes to global percpu_counters list list_add(&fbc->list, &percpu_counters); shrinker_register(); extents_kunit_exit(test N): kfree(sbi); // frees sbi w/o removing those 4 list nodes. // So, those list node now becomes dangling pointers extents_kunit_init(test N+1): kzalloc_obj(ext4_sb_info) // allocator returns same page, but zeroed. ext4_es_register_shrinker(sbi) percpu_counters_init() list_add(&fbc->list, &percpu_counters); __list_add_valid(new, prev, next); next->prev != prev // list corruption bug detected, since next->prev = NULL Fixes: 82f80e2e3b23 ("ext4: add extent status cache support to kunit tests") Signed-off-by: Ritesh Harjani (IBM) Reviewed-by: Ojaswin Mujoo Link: https://patch.msgid.link/5bb9041471dab8ce870c191c19cbe4df57473be8.1772381213.git.ritesh.list@gmail.com Signed-off-by: Theodore Ts'o Cc: stable@kernel.org --- fs/ext4/extents-test.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/ext4/extents-test.c b/fs/ext4/extents-test.c index 7c4690eb7dad..a6b3e6b592a5 100644 --- a/fs/ext4/extents-test.c +++ b/fs/ext4/extents-test.c @@ -142,8 +142,10 @@ static struct file_system_type ext_fs_type = { static void extents_kunit_exit(struct kunit *test) { - struct ext4_sb_info *sbi = k_ctx.k_ei->vfs_inode.i_sb->s_fs_info; + struct super_block *sb = k_ctx.k_ei->vfs_inode.i_sb; + struct ext4_sb_info *sbi = sb->s_fs_info; + ext4_es_unregister_shrinker(sbi); kfree(sbi); kfree(k_ctx.k_ei); kfree(k_ctx.k_data); From 46066e3a06647c5b186cc6334409722622d05c44 Mon Sep 17 00:00:00 2001 From: Ye Bin Date: Mon, 2 Mar 2026 21:46:19 +0800 Subject: [PATCH 274/401] ext4: avoid allocate block from corrupted group in ext4_mb_find_by_goal() There's issue as follows: ... EXT4-fs (mmcblk0p1): Delayed block allocation failed for inode 206 at logical offset 0 with max blocks 1 with error 117 EXT4-fs (mmcblk0p1): This should not happen!! Data will be lost EXT4-fs (mmcblk0p1): Delayed block allocation failed for inode 206 at logical offset 0 with max blocks 1 with error 117 EXT4-fs (mmcblk0p1): This should not happen!! Data will be lost EXT4-fs (mmcblk0p1): Delayed block allocation failed for inode 206 at logical offset 0 with max blocks 1 with error 117 EXT4-fs (mmcblk0p1): This should not happen!! Data will be lost EXT4-fs (mmcblk0p1): Delayed block allocation failed for inode 206 at logical offset 0 with max blocks 1 with error 117 EXT4-fs (mmcblk0p1): This should not happen!! Data will be lost EXT4-fs (mmcblk0p1): Delayed block allocation failed for inode 2243 at logical offset 0 with max blocks 1 with error 117 EXT4-fs (mmcblk0p1): This should not happen!! Data will be lost EXT4-fs (mmcblk0p1): Delayed block allocation failed for inode 2239 at logical offset 0 with max blocks 1 with error 117 EXT4-fs (mmcblk0p1): This should not happen!! Data will be lost EXT4-fs (mmcblk0p1): error count since last fsck: 1 EXT4-fs (mmcblk0p1): initial error at time 1765597433: ext4_mb_generate_buddy:760 EXT4-fs (mmcblk0p1): last error at time 1765597433: ext4_mb_generate_buddy:760 ... According to the log analysis, blocks are always requested from the corrupted block group. This may happen as follows: ext4_mb_find_by_goal ext4_mb_load_buddy ext4_mb_load_buddy_gfp ext4_mb_init_cache ext4_read_block_bitmap_nowait ext4_wait_block_bitmap ext4_validate_block_bitmap if (!grp || EXT4_MB_GRP_BBITMAP_CORRUPT(grp)) return -EFSCORRUPTED; // There's no logs. if (err) return err; // Will return error ext4_lock_group(ac->ac_sb, group); if (unlikely(EXT4_MB_GRP_BBITMAP_CORRUPT(e4b->bd_info))) // Unreachable goto out; After commit 9008a58e5dce ("ext4: make the bitmap read routines return real error codes") merged, Commit 163a203ddb36 ("ext4: mark block group as corrupt on block bitmap error") is no real solution for allocating blocks from corrupted block groups. This is because if 'EXT4_MB_GRP_BBITMAP_CORRUPT(e4b->bd_info)' is true, then 'ext4_mb_load_buddy()' may return an error. This means that the block allocation will fail. Therefore, check block group if corrupted when ext4_mb_load_buddy() returns error. Fixes: 163a203ddb36 ("ext4: mark block group as corrupt on block bitmap error") Fixes: 9008a58e5dce ("ext4: make the bitmap read routines return real error codes") Signed-off-by: Ye Bin Reviewed-by: Ritesh Harjani (IBM) Reviewed-by: Zhang Yi Reviewed-by: Andreas Dilger Reviewed-by: Jan Kara Link: https://patch.msgid.link/20260302134619.3145520-1-yebin@huaweicloud.com Signed-off-by: Theodore Ts'o Cc: stable@kernel.org --- fs/ext4/mballoc.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 20e9fdaf4301..705a879f13d3 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -2443,8 +2443,12 @@ int ext4_mb_find_by_goal(struct ext4_allocation_context *ac, return 0; err = ext4_mb_load_buddy(ac->ac_sb, group, e4b); - if (err) + if (err) { + if (EXT4_MB_GRP_BBITMAP_CORRUPT(e4b->bd_info) && + !(ac->ac_flags & EXT4_MB_HINT_GOAL_ONLY)) + return 0; return err; + } ext4_lock_group(ac->ac_sb, group); if (unlikely(EXT4_MB_GRP_BBITMAP_CORRUPT(e4b->bd_info))) From c4a48e9eeefd610ae0d26e9ff085277f751c8e53 Mon Sep 17 00:00:00 2001 From: Ojaswin Mujoo Date: Mon, 2 Mar 2026 20:08:11 +0530 Subject: [PATCH 275/401] ext4: minor fix for ext4_split_extent_zeroout() We missed storing the error which triggerd smatch warning: fs/ext4/extents.c:3369 ext4_split_extent_zeroout() warn: duplicate zero check 'err' (previous on line 3363) fs/ext4/extents.c 3361 3362 err = ext4_ext_get_access(handle, inode, path + depth); 3363 if (err) 3364 return err; 3365 3366 ext4_ext_mark_initialized(ex); 3367 3368 ext4_ext_dirty(handle, inode, path + depth); --> 3369 if (err) 3370 return err; 3371 3372 return 0; 3373 } Fix it by correctly storing the err value from ext4_ext_dirty(). Link: https://lore.kernel.org/all/aYXvVgPnKltX79KE@stanley.mountain/ Reported-by: Dan Carpenter Fixes: a985e07c26455 ("ext4: refactor zeroout path and handle all cases") Reviewed-by: Ritesh Harjani (IBM) Signed-off-by: Ojaswin Mujoo Reviewed-by: Zhang Yi Reviewed-by: Baokun Li Reviewed-by: Andreas Dilger Link: https://patch.msgid.link/20260302143811.605174-1-ojaswin@linux.ibm.com Signed-off-by: Theodore Ts'o Cc: stable@kernel.org --- fs/ext4/extents.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index ae3804f36535..b41a44dc245c 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -3363,7 +3363,7 @@ static int ext4_split_extent_zeroout(handle_t *handle, struct inode *inode, ext4_ext_mark_initialized(ex); - ext4_ext_dirty(handle, inode, path + depth); + err = ext4_ext_dirty(handle, inode, path + depth); if (err) return err; From 73bf12adbea10b13647864cd1c62410d19e21086 Mon Sep 17 00:00:00 2001 From: Ye Bin Date: Tue, 3 Mar 2026 09:22:42 +0800 Subject: [PATCH 276/401] ext4: test if inode's all dirty pages are submitted to disk The commit aa373cf55099 ("writeback: stop background/kupdate works from livelocking other works") introduced an issue where unmounting a filesystem in a multi-logical-partition scenario could lead to batch file data loss. This problem was not fixed until the commit d92109891f21 ("fs/writeback: bail out if there is no more inodes for IO and queued once"). It took considerable time to identify the root cause. Additionally, in actual production environments, we frequently encountered file data loss after normal system reboots. Therefore, we are adding a check in the inode release flow to verify whether all dirty pages have been flushed to disk, in order to determine whether the data loss is caused by a logic issue in the filesystem code. Signed-off-by: Ye Bin Reviewed-by: Jan Kara Link: https://patch.msgid.link/20260303012242.3206465-1-yebin@huaweicloud.com Signed-off-by: Theodore Ts'o Cc: stable@kernel.org --- fs/ext4/inode.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index ac5f3446c731..1123d995494b 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -186,6 +186,14 @@ void ext4_evict_inode(struct inode *inode) if (EXT4_I(inode)->i_flags & EXT4_EA_INODE_FL) ext4_evict_ea_inode(inode); if (inode->i_nlink) { + /* + * If there's dirty page will lead to data loss, user + * could see stale data. + */ + if (unlikely(!ext4_emergency_state(inode->i_sb) && + mapping_tagged(&inode->i_data, PAGECACHE_TAG_DIRTY))) + ext4_warning_inode(inode, "data will be lost"); + truncate_inode_pages_final(&inode->i_data); goto no_delete; From 2acb5c12ebd860f30e4faf67e6cc8c44ddfe5fe8 Mon Sep 17 00:00:00 2001 From: Tejas Bharambe Date: Tue, 3 Mar 2026 23:14:34 -0800 Subject: [PATCH 277/401] ext4: validate p_idx bounds in ext4_ext_correct_indexes ext4_ext_correct_indexes() walks up the extent tree correcting index entries when the first extent in a leaf is modified. Before accessing path[k].p_idx->ei_block, there is no validation that p_idx falls within the valid range of index entries for that level. If the on-disk extent header contains a corrupted or crafted eh_entries value, p_idx can point past the end of the allocated buffer, causing a slab-out-of-bounds read. Fix this by validating path[k].p_idx against EXT_LAST_INDEX() at both access sites: before the while loop and inside it. Return -EFSCORRUPTED if the index pointer is out of range, consistent with how other bounds violations are handled in the ext4 extent tree code. Reported-by: syzbot+04c4e65cab786a2e5b7e@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=04c4e65cab786a2e5b7e Signed-off-by: Tejas Bharambe Link: https://patch.msgid.link/JH0PR06MB66326016F9B6AD24097D232B897CA@JH0PR06MB6632.apcprd06.prod.outlook.com Signed-off-by: Theodore Ts'o Cc: stable@kernel.org --- fs/ext4/extents.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index b41a44dc245c..c99e0802d700 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -1736,6 +1736,13 @@ static int ext4_ext_correct_indexes(handle_t *handle, struct inode *inode, err = ext4_ext_get_access(handle, inode, path + k); if (err) return err; + if (unlikely(path[k].p_idx > EXT_LAST_INDEX(path[k].p_hdr))) { + EXT4_ERROR_INODE(inode, + "path[%d].p_idx %p > EXT_LAST_INDEX %p", + k, path[k].p_idx, + EXT_LAST_INDEX(path[k].p_hdr)); + return -EFSCORRUPTED; + } path[k].p_idx->ei_block = border; err = ext4_ext_dirty(handle, inode, path + k); if (err) @@ -1748,6 +1755,14 @@ static int ext4_ext_correct_indexes(handle_t *handle, struct inode *inode, err = ext4_ext_get_access(handle, inode, path + k); if (err) goto clean; + if (unlikely(path[k].p_idx > EXT_LAST_INDEX(path[k].p_hdr))) { + EXT4_ERROR_INODE(inode, + "path[%d].p_idx %p > EXT_LAST_INDEX %p", + k, path[k].p_idx, + EXT_LAST_INDEX(path[k].p_hdr)); + err = -EFSCORRUPTED; + goto clean; + } path[k].p_idx->ei_block = border; err = ext4_ext_dirty(handle, inode, path + k); if (err) From 5422fe71d26d42af6c454ca9527faaad4e677d6c Mon Sep 17 00:00:00 2001 From: Edward Adam Davis Date: Fri, 6 Mar 2026 09:31:58 +0800 Subject: [PATCH 278/401] ext4: avoid infinite loops caused by residual data On the mkdir/mknod path, when mapping logical blocks to physical blocks, if inserting a new extent into the extent tree fails (in this example, because the file system disabled the huge file feature when marking the inode as dirty), ext4_ext_map_blocks() only calls ext4_free_blocks() to reclaim the physical block without deleting the corresponding data in the extent tree. This causes subsequent mkdir operations to reference the previously reclaimed physical block number again, even though this physical block is already being used by the xattr block. Therefore, a situation arises where both the directory and xattr are using the same buffer head block in memory simultaneously. The above causes ext4_xattr_block_set() to enter an infinite loop about "inserted" and cannot release the inode lock, ultimately leading to the 143s blocking problem mentioned in [1]. If the metadata is corrupted, then trying to remove some extent space can do even more harm. Also in case EXT4_GET_BLOCKS_DELALLOC_RESERVE was passed, remove space wrongly update quota information. Jan Kara suggests distinguishing between two cases: 1) The error is ENOSPC or EDQUOT - in this case the filesystem is fully consistent and we must maintain its consistency including all the accounting. However these errors can happen only early before we've inserted the extent into the extent tree. So current code works correctly for this case. 2) Some other error - this means metadata is corrupted. We should strive to do as few modifications as possible to limit damage. So I'd just skip freeing of allocated blocks. [1] INFO: task syz.0.17:5995 blocked for more than 143 seconds. Call Trace: inode_lock_nested include/linux/fs.h:1073 [inline] __start_dirop fs/namei.c:2923 [inline] start_dirop fs/namei.c:2934 [inline] Reported-by: syzbot+512459401510e2a9a39f@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=1659aaaaa8d9d11265d7 Tested-by: syzbot+1659aaaaa8d9d11265d7@syzkaller.appspotmail.com Reported-by: syzbot+1659aaaaa8d9d11265d7@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=512459401510e2a9a39f Tested-by: syzbot+1659aaaaa8d9d11265d7@syzkaller.appspotmail.com Signed-off-by: Edward Adam Davis Reviewed-by: Jan Kara Tested-by: syzbot+512459401510e2a9a39f@syzkaller.appspotmail.com Link: https://patch.msgid.link/tencent_43696283A68450B761D76866C6F360E36705@qq.com Signed-off-by: Theodore Ts'o Cc: stable@kernel.org --- fs/ext4/extents.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index c99e0802d700..8744d3845577 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -4472,9 +4472,13 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode, path = ext4_ext_insert_extent(handle, inode, path, &newex, flags); if (IS_ERR(path)) { err = PTR_ERR(path); - if (allocated_clusters) { + /* + * Gracefully handle out of space conditions. If the filesystem + * is inconsistent, we'll just leak allocated blocks to avoid + * causing even more damage. + */ + if (allocated_clusters && (err == -EDQUOT || err == -ENOSPC)) { int fb_flags = 0; - /* * free data blocks we just allocated. * not a good idea to call discard here directly, From bac3190a8e79beff6ed221975e0c9b1b5f2a21da Mon Sep 17 00:00:00 2001 From: Milos Nikic Date: Tue, 10 Mar 2026 21:15:48 -0700 Subject: [PATCH 279/401] jbd2: gracefully abort on checkpointing state corruptions This patch targets two internal state machine invariants in checkpoint.c residing inside functions that natively return integer error codes. - In jbd2_cleanup_journal_tail(): A blocknr of 0 indicates a severely corrupted journal superblock. Replaced the J_ASSERT with a WARN_ON_ONCE and a graceful journal abort, returning -EFSCORRUPTED. - In jbd2_log_do_checkpoint(): Replaced the J_ASSERT_BH checking for an unexpected buffer_jwrite state. If the warning triggers, we explicitly drop the just-taken get_bh() reference and call __flush_batch() to safely clean up any previously queued buffers in the j_chkpt_bhs array, preventing a memory leak before returning -EFSCORRUPTED. Signed-off-by: Milos Nikic Reviewed-by: Andreas Dilger Reviewed-by: Zhang Yi Reviewed-by: Baokun Li Reviewed-by: Jan Kara Link: https://patch.msgid.link/20260311041548.159424-1-nikic.milos@gmail.com Signed-off-by: Theodore Ts'o Cc: stable@kernel.org --- fs/jbd2/checkpoint.c | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/fs/jbd2/checkpoint.c b/fs/jbd2/checkpoint.c index de89c5bef607..1508e2f54462 100644 --- a/fs/jbd2/checkpoint.c +++ b/fs/jbd2/checkpoint.c @@ -267,7 +267,15 @@ int jbd2_log_do_checkpoint(journal_t *journal) */ BUFFER_TRACE(bh, "queue"); get_bh(bh); - J_ASSERT_BH(bh, !buffer_jwrite(bh)); + if (WARN_ON_ONCE(buffer_jwrite(bh))) { + put_bh(bh); /* drop the ref we just took */ + spin_unlock(&journal->j_list_lock); + /* Clean up any previously batched buffers */ + if (batch_count) + __flush_batch(journal, &batch_count); + jbd2_journal_abort(journal, -EFSCORRUPTED); + return -EFSCORRUPTED; + } journal->j_chkpt_bhs[batch_count++] = bh; transaction->t_chp_stats.cs_written++; transaction->t_checkpoint_list = jh->b_cpnext; @@ -325,7 +333,10 @@ int jbd2_cleanup_journal_tail(journal_t *journal) if (!jbd2_journal_get_log_tail(journal, &first_tid, &blocknr)) return 1; - J_ASSERT(blocknr != 0); + if (WARN_ON_ONCE(blocknr == 0)) { + jbd2_journal_abort(journal, -EFSCORRUPTED); + return -EFSCORRUPTED; + } /* * We need to make sure that any blocks that were recently written out From 49504a512587147dd6da3b4b08832ccc157b97dc Mon Sep 17 00:00:00 2001 From: Ye Bin Date: Sat, 14 Mar 2026 15:52:56 +0800 Subject: [PATCH 280/401] ext4: introduce EXPORT_SYMBOL_FOR_EXT4_TEST() helper Introduce EXPORT_SYMBOL_FOR_EXT4_TEST() helper for kuint test. Signed-off-by: Ye Bin Reviewed-by: Jan Kara Link: https://patch.msgid.link/20260314075258.1317579-2-yebin@huaweicloud.com Signed-off-by: Theodore Ts'o --- fs/ext4/ext4.h | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 293f698b7042..c579f68b3c11 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -3944,6 +3944,11 @@ static inline bool ext4_inode_can_atomic_write(struct inode *inode) extern int ext4_block_write_begin(handle_t *handle, struct folio *folio, loff_t pos, unsigned len, get_block_t *get_block); + +#if IS_ENABLED(CONFIG_EXT4_KUNIT_TESTS) +#define EXPORT_SYMBOL_FOR_EXT4_TEST(sym) \ + EXPORT_SYMBOL_FOR_MODULES(sym, "ext4-test") +#endif #endif /* __KERNEL__ */ #endif /* _EXT4_H */ From 519b76ac0b31d86b45784735d4ef964e8efdc56b Mon Sep 17 00:00:00 2001 From: Ye Bin Date: Sat, 14 Mar 2026 15:52:57 +0800 Subject: [PATCH 281/401] ext4: fix mballoc-test.c is not compiled when EXT4_KUNIT_TESTS=M Now, only EXT4_KUNIT_TESTS=Y testcase will be compiled in 'mballoc.c'. To solve this issue, the ext4 test code needs to be decoupled. The ext4 test module is compiled into a separate module. Reported-by: ChenXiaoSong Closes: https://patchwork.kernel.org/project/cifs-client/patch/20260118091313.1988168-2-chenxiaosong.chenxiaosong@linux.dev/ Fixes: 7c9fa399a369 ("ext4: add first unit test for ext4_mb_new_blocks_simple in mballoc") Signed-off-by: Ye Bin Reviewed-by: Jan Kara Link: https://patch.msgid.link/20260314075258.1317579-3-yebin@huaweicloud.com Signed-off-by: Theodore Ts'o --- fs/ext4/Makefile | 4 +- fs/ext4/mballoc-test.c | 81 ++++++++++++++++---------------- fs/ext4/mballoc.c | 102 +++++++++++++++++++++++++++++++++++++++-- fs/ext4/mballoc.h | 30 ++++++++++++ 4 files changed, 172 insertions(+), 45 deletions(-) diff --git a/fs/ext4/Makefile b/fs/ext4/Makefile index 72206a292676..d836c3fe311b 100644 --- a/fs/ext4/Makefile +++ b/fs/ext4/Makefile @@ -14,7 +14,7 @@ ext4-y := balloc.o bitmap.o block_validity.o dir.o ext4_jbd2.o extents.o \ ext4-$(CONFIG_EXT4_FS_POSIX_ACL) += acl.o ext4-$(CONFIG_EXT4_FS_SECURITY) += xattr_security.o -ext4-inode-test-objs += inode-test.o -obj-$(CONFIG_EXT4_KUNIT_TESTS) += ext4-inode-test.o +ext4-test-objs += inode-test.o mballoc-test.o +obj-$(CONFIG_EXT4_KUNIT_TESTS) += ext4-test.o ext4-$(CONFIG_FS_VERITY) += verity.o ext4-$(CONFIG_FS_ENCRYPTION) += crypto.o diff --git a/fs/ext4/mballoc-test.c b/fs/ext4/mballoc-test.c index 9fbdf6a09489..6f5bfbb0e8a4 100644 --- a/fs/ext4/mballoc-test.c +++ b/fs/ext4/mballoc-test.c @@ -8,6 +8,7 @@ #include #include "ext4.h" +#include "mballoc.h" struct mbt_grp_ctx { struct buffer_head bitmap_bh; @@ -336,7 +337,7 @@ ext4_mb_mark_context_stub(handle_t *handle, struct super_block *sb, bool state, if (state) mb_set_bits(bitmap_bh->b_data, blkoff, len); else - mb_clear_bits(bitmap_bh->b_data, blkoff, len); + mb_clear_bits_test(bitmap_bh->b_data, blkoff, len); return 0; } @@ -413,14 +414,14 @@ static void test_new_blocks_simple(struct kunit *test) /* get block at goal */ ar.goal = ext4_group_first_block_no(sb, goal_group); - found = ext4_mb_new_blocks_simple(&ar, &err); + found = ext4_mb_new_blocks_simple_test(&ar, &err); KUNIT_ASSERT_EQ_MSG(test, ar.goal, found, "failed to alloc block at goal, expected %llu found %llu", ar.goal, found); /* get block after goal in goal group */ ar.goal = ext4_group_first_block_no(sb, goal_group); - found = ext4_mb_new_blocks_simple(&ar, &err); + found = ext4_mb_new_blocks_simple_test(&ar, &err); KUNIT_ASSERT_EQ_MSG(test, ar.goal + EXT4_C2B(sbi, 1), found, "failed to alloc block after goal in goal group, expected %llu found %llu", ar.goal + 1, found); @@ -428,7 +429,7 @@ static void test_new_blocks_simple(struct kunit *test) /* get block after goal group */ mbt_ctx_mark_used(sb, goal_group, 0, EXT4_CLUSTERS_PER_GROUP(sb)); ar.goal = ext4_group_first_block_no(sb, goal_group); - found = ext4_mb_new_blocks_simple(&ar, &err); + found = ext4_mb_new_blocks_simple_test(&ar, &err); KUNIT_ASSERT_EQ_MSG(test, ext4_group_first_block_no(sb, goal_group + 1), found, "failed to alloc block after goal group, expected %llu found %llu", @@ -438,7 +439,7 @@ static void test_new_blocks_simple(struct kunit *test) for (i = goal_group; i < ext4_get_groups_count(sb); i++) mbt_ctx_mark_used(sb, i, 0, EXT4_CLUSTERS_PER_GROUP(sb)); ar.goal = ext4_group_first_block_no(sb, goal_group); - found = ext4_mb_new_blocks_simple(&ar, &err); + found = ext4_mb_new_blocks_simple_test(&ar, &err); KUNIT_ASSERT_EQ_MSG(test, ext4_group_first_block_no(sb, 0) + EXT4_C2B(sbi, 1), found, "failed to alloc block before goal group, expected %llu found %llu", @@ -448,7 +449,7 @@ static void test_new_blocks_simple(struct kunit *test) for (i = 0; i < ext4_get_groups_count(sb); i++) mbt_ctx_mark_used(sb, i, 0, EXT4_CLUSTERS_PER_GROUP(sb)); ar.goal = ext4_group_first_block_no(sb, goal_group); - found = ext4_mb_new_blocks_simple(&ar, &err); + found = ext4_mb_new_blocks_simple_test(&ar, &err); KUNIT_ASSERT_NE_MSG(test, err, 0, "unexpectedly get block when no block is available"); } @@ -492,16 +493,16 @@ validate_free_blocks_simple(struct kunit *test, struct super_block *sb, continue; bitmap = mbt_ctx_bitmap(sb, i); - bit = mb_find_next_zero_bit(bitmap, max, 0); + bit = mb_find_next_zero_bit_test(bitmap, max, 0); KUNIT_ASSERT_EQ_MSG(test, bit, max, "free block on unexpected group %d", i); } bitmap = mbt_ctx_bitmap(sb, goal_group); - bit = mb_find_next_zero_bit(bitmap, max, 0); + bit = mb_find_next_zero_bit_test(bitmap, max, 0); KUNIT_ASSERT_EQ(test, bit, start); - bit = mb_find_next_bit(bitmap, max, bit + 1); + bit = mb_find_next_bit_test(bitmap, max, bit + 1); KUNIT_ASSERT_EQ(test, bit, start + len); } @@ -524,7 +525,7 @@ test_free_blocks_simple_range(struct kunit *test, ext4_group_t goal_group, block = ext4_group_first_block_no(sb, goal_group) + EXT4_C2B(sbi, start); - ext4_free_blocks_simple(inode, block, len); + ext4_free_blocks_simple_test(inode, block, len); validate_free_blocks_simple(test, sb, goal_group, start, len); mbt_ctx_mark_used(sb, goal_group, 0, EXT4_CLUSTERS_PER_GROUP(sb)); } @@ -566,15 +567,15 @@ test_mark_diskspace_used_range(struct kunit *test, bitmap = mbt_ctx_bitmap(sb, TEST_GOAL_GROUP); memset(bitmap, 0, sb->s_blocksize); - ret = ext4_mb_mark_diskspace_used(ac, NULL); + ret = ext4_mb_mark_diskspace_used_test(ac, NULL); KUNIT_ASSERT_EQ(test, ret, 0); max = EXT4_CLUSTERS_PER_GROUP(sb); - i = mb_find_next_bit(bitmap, max, 0); + i = mb_find_next_bit_test(bitmap, max, 0); KUNIT_ASSERT_EQ(test, i, start); - i = mb_find_next_zero_bit(bitmap, max, i + 1); + i = mb_find_next_zero_bit_test(bitmap, max, i + 1); KUNIT_ASSERT_EQ(test, i, start + len); - i = mb_find_next_bit(bitmap, max, i + 1); + i = mb_find_next_bit_test(bitmap, max, i + 1); KUNIT_ASSERT_EQ(test, max, i); } @@ -617,54 +618,54 @@ static void mbt_generate_buddy(struct super_block *sb, void *buddy, max = EXT4_CLUSTERS_PER_GROUP(sb); bb_h = buddy + sbi->s_mb_offsets[1]; - off = mb_find_next_zero_bit(bb, max, 0); + off = mb_find_next_zero_bit_test(bb, max, 0); grp->bb_first_free = off; while (off < max) { grp->bb_counters[0]++; grp->bb_free++; - if (!(off & 1) && !mb_test_bit(off + 1, bb)) { + if (!(off & 1) && !mb_test_bit_test(off + 1, bb)) { grp->bb_free++; grp->bb_counters[0]--; - mb_clear_bit(off >> 1, bb_h); + mb_clear_bit_test(off >> 1, bb_h); grp->bb_counters[1]++; grp->bb_largest_free_order = 1; off++; } - off = mb_find_next_zero_bit(bb, max, off + 1); + off = mb_find_next_zero_bit_test(bb, max, off + 1); } for (order = 1; order < MB_NUM_ORDERS(sb) - 1; order++) { bb = buddy + sbi->s_mb_offsets[order]; bb_h = buddy + sbi->s_mb_offsets[order + 1]; max = max >> 1; - off = mb_find_next_zero_bit(bb, max, 0); + off = mb_find_next_zero_bit_test(bb, max, 0); while (off < max) { - if (!(off & 1) && !mb_test_bit(off + 1, bb)) { + if (!(off & 1) && !mb_test_bit_test(off + 1, bb)) { mb_set_bits(bb, off, 2); grp->bb_counters[order] -= 2; - mb_clear_bit(off >> 1, bb_h); + mb_clear_bit_test(off >> 1, bb_h); grp->bb_counters[order + 1]++; grp->bb_largest_free_order = order + 1; off++; } - off = mb_find_next_zero_bit(bb, max, off + 1); + off = mb_find_next_zero_bit_test(bb, max, off + 1); } } max = EXT4_CLUSTERS_PER_GROUP(sb); - off = mb_find_next_zero_bit(bitmap, max, 0); + off = mb_find_next_zero_bit_test(bitmap, max, 0); while (off < max) { grp->bb_fragments++; - off = mb_find_next_bit(bitmap, max, off + 1); + off = mb_find_next_bit_test(bitmap, max, off + 1); if (off + 1 >= max) break; - off = mb_find_next_zero_bit(bitmap, max, off + 1); + off = mb_find_next_zero_bit_test(bitmap, max, off + 1); } } @@ -706,7 +707,7 @@ do_test_generate_buddy(struct kunit *test, struct super_block *sb, void *bitmap, /* needed by validation in ext4_mb_generate_buddy */ ext4_grp->bb_free = mbt_grp->bb_free; memset(ext4_buddy, 0xff, sb->s_blocksize); - ext4_mb_generate_buddy(sb, ext4_buddy, bitmap, TEST_GOAL_GROUP, + ext4_mb_generate_buddy_test(sb, ext4_buddy, bitmap, TEST_GOAL_GROUP, ext4_grp); KUNIT_ASSERT_EQ(test, memcmp(mbt_buddy, ext4_buddy, sb->s_blocksize), @@ -760,7 +761,7 @@ test_mb_mark_used_range(struct kunit *test, struct ext4_buddy *e4b, ex.fe_group = TEST_GOAL_GROUP; ext4_lock_group(sb, TEST_GOAL_GROUP); - mb_mark_used(e4b, &ex); + mb_mark_used_test(e4b, &ex); ext4_unlock_group(sb, TEST_GOAL_GROUP); mb_set_bits(bitmap, start, len); @@ -769,7 +770,7 @@ test_mb_mark_used_range(struct kunit *test, struct ext4_buddy *e4b, memset(buddy, 0xff, sb->s_blocksize); for (i = 0; i < MB_NUM_ORDERS(sb); i++) grp->bb_counters[i] = 0; - ext4_mb_generate_buddy(sb, buddy, bitmap, 0, grp); + ext4_mb_generate_buddy_test(sb, buddy, bitmap, 0, grp); KUNIT_ASSERT_EQ(test, memcmp(buddy, e4b->bd_buddy, sb->s_blocksize), 0); @@ -798,7 +799,7 @@ static void test_mb_mark_used(struct kunit *test) bb_counters[MB_NUM_ORDERS(sb)]), GFP_KERNEL); KUNIT_ASSERT_NOT_ERR_OR_NULL(test, grp); - ret = ext4_mb_load_buddy(sb, TEST_GOAL_GROUP, &e4b); + ret = ext4_mb_load_buddy_test(sb, TEST_GOAL_GROUP, &e4b); KUNIT_ASSERT_EQ(test, ret, 0); grp->bb_free = EXT4_CLUSTERS_PER_GROUP(sb); @@ -809,7 +810,7 @@ static void test_mb_mark_used(struct kunit *test) test_mb_mark_used_range(test, &e4b, ranges[i].start, ranges[i].len, bitmap, buddy, grp); - ext4_mb_unload_buddy(&e4b); + ext4_mb_unload_buddy_test(&e4b); } static void @@ -825,16 +826,16 @@ test_mb_free_blocks_range(struct kunit *test, struct ext4_buddy *e4b, return; ext4_lock_group(sb, e4b->bd_group); - mb_free_blocks(NULL, e4b, start, len); + mb_free_blocks_test(NULL, e4b, start, len); ext4_unlock_group(sb, e4b->bd_group); - mb_clear_bits(bitmap, start, len); + mb_clear_bits_test(bitmap, start, len); /* bypass bb_free validatoin in ext4_mb_generate_buddy */ grp->bb_free += len; memset(buddy, 0xff, sb->s_blocksize); for (i = 0; i < MB_NUM_ORDERS(sb); i++) grp->bb_counters[i] = 0; - ext4_mb_generate_buddy(sb, buddy, bitmap, 0, grp); + ext4_mb_generate_buddy_test(sb, buddy, bitmap, 0, grp); KUNIT_ASSERT_EQ(test, memcmp(buddy, e4b->bd_buddy, sb->s_blocksize), 0); @@ -865,7 +866,7 @@ static void test_mb_free_blocks(struct kunit *test) bb_counters[MB_NUM_ORDERS(sb)]), GFP_KERNEL); KUNIT_ASSERT_NOT_ERR_OR_NULL(test, grp); - ret = ext4_mb_load_buddy(sb, TEST_GOAL_GROUP, &e4b); + ret = ext4_mb_load_buddy_test(sb, TEST_GOAL_GROUP, &e4b); KUNIT_ASSERT_EQ(test, ret, 0); ex.fe_start = 0; @@ -873,7 +874,7 @@ static void test_mb_free_blocks(struct kunit *test) ex.fe_group = TEST_GOAL_GROUP; ext4_lock_group(sb, TEST_GOAL_GROUP); - mb_mark_used(&e4b, &ex); + mb_mark_used_test(&e4b, &ex); ext4_unlock_group(sb, TEST_GOAL_GROUP); grp->bb_free = 0; @@ -886,7 +887,7 @@ static void test_mb_free_blocks(struct kunit *test) test_mb_free_blocks_range(test, &e4b, ranges[i].start, ranges[i].len, bitmap, buddy, grp); - ext4_mb_unload_buddy(&e4b); + ext4_mb_unload_buddy_test(&e4b); } #define COUNT_FOR_ESTIMATE 100000 @@ -904,7 +905,7 @@ static void test_mb_mark_used_cost(struct kunit *test) if (sb->s_blocksize > PAGE_SIZE) kunit_skip(test, "blocksize exceeds pagesize"); - ret = ext4_mb_load_buddy(sb, TEST_GOAL_GROUP, &e4b); + ret = ext4_mb_load_buddy_test(sb, TEST_GOAL_GROUP, &e4b); KUNIT_ASSERT_EQ(test, ret, 0); ex.fe_group = TEST_GOAL_GROUP; @@ -918,7 +919,7 @@ static void test_mb_mark_used_cost(struct kunit *test) ex.fe_start = ranges[i].start; ex.fe_len = ranges[i].len; ext4_lock_group(sb, TEST_GOAL_GROUP); - mb_mark_used(&e4b, &ex); + mb_mark_used_test(&e4b, &ex); ext4_unlock_group(sb, TEST_GOAL_GROUP); } end = jiffies; @@ -929,14 +930,14 @@ static void test_mb_mark_used_cost(struct kunit *test) continue; ext4_lock_group(sb, TEST_GOAL_GROUP); - mb_free_blocks(NULL, &e4b, ranges[i].start, + mb_free_blocks_test(NULL, &e4b, ranges[i].start, ranges[i].len); ext4_unlock_group(sb, TEST_GOAL_GROUP); } } kunit_info(test, "costed jiffies %lu\n", all); - ext4_mb_unload_buddy(&e4b); + ext4_mb_unload_buddy_test(&e4b); } static const struct mbt_ext4_block_layout mbt_test_layouts[] = { diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 705a879f13d3..93d37f6cf9c3 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -4088,7 +4088,7 @@ void ext4_exit_mballoc(void) #define EXT4_MB_BITMAP_MARKED_CHECK 0x0001 #define EXT4_MB_SYNC_UPDATE 0x0002 -static int +int ext4_mb_mark_context(handle_t *handle, struct super_block *sb, bool state, ext4_group_t group, ext4_grpblk_t blkoff, ext4_grpblk_t len, int flags, ext4_grpblk_t *ret_changed) @@ -7192,6 +7192,102 @@ ext4_mballoc_query_range( return error; } -#ifdef CONFIG_EXT4_KUNIT_TESTS -#include "mballoc-test.c" +#if IS_ENABLED(CONFIG_EXT4_KUNIT_TESTS) +void mb_clear_bits_test(void *bm, int cur, int len) +{ + mb_clear_bits(bm, cur, len); +} +EXPORT_SYMBOL_FOR_EXT4_TEST(mb_clear_bits_test); + +ext4_fsblk_t +ext4_mb_new_blocks_simple_test(struct ext4_allocation_request *ar, + int *errp) +{ + return ext4_mb_new_blocks_simple(ar, errp); +} +EXPORT_SYMBOL_FOR_EXT4_TEST(ext4_mb_new_blocks_simple_test); + +int mb_find_next_zero_bit_test(void *addr, int max, int start) +{ + return mb_find_next_zero_bit(addr, max, start); +} +EXPORT_SYMBOL_FOR_EXT4_TEST(mb_find_next_zero_bit_test); + +int mb_find_next_bit_test(void *addr, int max, int start) +{ + return mb_find_next_bit(addr, max, start); +} +EXPORT_SYMBOL_FOR_EXT4_TEST(mb_find_next_bit_test); + +void mb_clear_bit_test(int bit, void *addr) +{ + mb_clear_bit(bit, addr); +} +EXPORT_SYMBOL_FOR_EXT4_TEST(mb_clear_bit_test); + +int mb_test_bit_test(int bit, void *addr) +{ + return mb_test_bit(bit, addr); +} +EXPORT_SYMBOL_FOR_EXT4_TEST(mb_test_bit_test); + +int ext4_mb_mark_diskspace_used_test(struct ext4_allocation_context *ac, + handle_t *handle) +{ + return ext4_mb_mark_diskspace_used(ac, handle); +} +EXPORT_SYMBOL_FOR_EXT4_TEST(ext4_mb_mark_diskspace_used_test); + +int mb_mark_used_test(struct ext4_buddy *e4b, struct ext4_free_extent *ex) +{ + return mb_mark_used(e4b, ex); +} +EXPORT_SYMBOL_FOR_EXT4_TEST(mb_mark_used_test); + +void ext4_mb_generate_buddy_test(struct super_block *sb, void *buddy, + void *bitmap, ext4_group_t group, + struct ext4_group_info *grp) +{ + ext4_mb_generate_buddy(sb, buddy, bitmap, group, grp); +} +EXPORT_SYMBOL_FOR_EXT4_TEST(ext4_mb_generate_buddy_test); + +int ext4_mb_load_buddy_test(struct super_block *sb, ext4_group_t group, + struct ext4_buddy *e4b) +{ + return ext4_mb_load_buddy(sb, group, e4b); +} +EXPORT_SYMBOL_FOR_EXT4_TEST(ext4_mb_load_buddy_test); + +void ext4_mb_unload_buddy_test(struct ext4_buddy *e4b) +{ + ext4_mb_unload_buddy(e4b); +} +EXPORT_SYMBOL_FOR_EXT4_TEST(ext4_mb_unload_buddy_test); + +void mb_free_blocks_test(struct inode *inode, struct ext4_buddy *e4b, + int first, int count) +{ + mb_free_blocks(inode, e4b, first, count); +} +EXPORT_SYMBOL_FOR_EXT4_TEST(mb_free_blocks_test); + +void ext4_free_blocks_simple_test(struct inode *inode, ext4_fsblk_t block, + unsigned long count) +{ + return ext4_free_blocks_simple(inode, block, count); +} +EXPORT_SYMBOL_FOR_EXT4_TEST(ext4_free_blocks_simple_test); + +EXPORT_SYMBOL_FOR_EXT4_TEST(ext4_wait_block_bitmap); +EXPORT_SYMBOL_FOR_EXT4_TEST(ext4_mb_init); +EXPORT_SYMBOL_FOR_EXT4_TEST(ext4_get_group_desc); +EXPORT_SYMBOL_FOR_EXT4_TEST(ext4_count_free_clusters); +EXPORT_SYMBOL_FOR_EXT4_TEST(ext4_get_group_info); +EXPORT_SYMBOL_FOR_EXT4_TEST(ext4_free_group_clusters_set); +EXPORT_SYMBOL_FOR_EXT4_TEST(ext4_mb_release); +EXPORT_SYMBOL_FOR_EXT4_TEST(ext4_read_block_bitmap_nowait); +EXPORT_SYMBOL_FOR_EXT4_TEST(mb_set_bits); +EXPORT_SYMBOL_FOR_EXT4_TEST(ext4_fc_init_inode); +EXPORT_SYMBOL_FOR_EXT4_TEST(ext4_mb_mark_context); #endif diff --git a/fs/ext4/mballoc.h b/fs/ext4/mballoc.h index 15a049f05d04..39333ce72cbd 100644 --- a/fs/ext4/mballoc.h +++ b/fs/ext4/mballoc.h @@ -270,4 +270,34 @@ ext4_mballoc_query_range( ext4_mballoc_query_range_fn formatter, void *priv); +extern int ext4_mb_mark_context(handle_t *handle, + struct super_block *sb, bool state, + ext4_group_t group, ext4_grpblk_t blkoff, + ext4_grpblk_t len, int flags, + ext4_grpblk_t *ret_changed); +#if IS_ENABLED(CONFIG_EXT4_KUNIT_TESTS) +extern void mb_clear_bits_test(void *bm, int cur, int len); +extern ext4_fsblk_t +ext4_mb_new_blocks_simple_test(struct ext4_allocation_request *ar, + int *errp); +extern int mb_find_next_zero_bit_test(void *addr, int max, int start); +extern int mb_find_next_bit_test(void *addr, int max, int start); +extern void mb_clear_bit_test(int bit, void *addr); +extern int mb_test_bit_test(int bit, void *addr); +extern int +ext4_mb_mark_diskspace_used_test(struct ext4_allocation_context *ac, + handle_t *handle); +extern int mb_mark_used_test(struct ext4_buddy *e4b, + struct ext4_free_extent *ex); +extern void ext4_mb_generate_buddy_test(struct super_block *sb, + void *buddy, void *bitmap, ext4_group_t group, + struct ext4_group_info *grp); +extern int ext4_mb_load_buddy_test(struct super_block *sb, + ext4_group_t group, struct ext4_buddy *e4b); +extern void ext4_mb_unload_buddy_test(struct ext4_buddy *e4b); +extern void mb_free_blocks_test(struct inode *inode, + struct ext4_buddy *e4b, int first, int count); +extern void ext4_free_blocks_simple_test(struct inode *inode, + ext4_fsblk_t block, unsigned long count); +#endif #endif From 9e1b14320b154094bb2c1bee6d8c6cb851fc3215 Mon Sep 17 00:00:00 2001 From: Ye Bin Date: Sat, 14 Mar 2026 15:52:58 +0800 Subject: [PATCH 282/401] ext4: fix extents-test.c is not compiled when EXT4_KUNIT_TESTS=M Now, only EXT4_KUNIT_TESTS=Y testcase will be compiled in 'extents.c'. To solve this issue, the ext4 test code needs to be decoupled. The 'extents-test' module is compiled into 'ext4-test' module. Fixes: cb1e0c1d1fad ("ext4: kunit tests for extent splitting and conversion") Signed-off-by: Ye Bin Reviewed-by: Jan Kara Link: https://patch.msgid.link/20260314075258.1317579-4-yebin@huaweicloud.com Signed-off-by: Theodore Ts'o --- fs/ext4/Makefile | 3 ++- fs/ext4/ext4_extents.h | 12 ++++++++++++ fs/ext4/extents-test.c | 8 ++++---- fs/ext4/extents.c | 39 +++++++++++++++++++++++++++++++++------ 4 files changed, 51 insertions(+), 11 deletions(-) diff --git a/fs/ext4/Makefile b/fs/ext4/Makefile index d836c3fe311b..3baee4e7c1cf 100644 --- a/fs/ext4/Makefile +++ b/fs/ext4/Makefile @@ -14,7 +14,8 @@ ext4-y := balloc.o bitmap.o block_validity.o dir.o ext4_jbd2.o extents.o \ ext4-$(CONFIG_EXT4_FS_POSIX_ACL) += acl.o ext4-$(CONFIG_EXT4_FS_SECURITY) += xattr_security.o -ext4-test-objs += inode-test.o mballoc-test.o +ext4-test-objs += inode-test.o mballoc-test.o \ + extents-test.o obj-$(CONFIG_EXT4_KUNIT_TESTS) += ext4-test.o ext4-$(CONFIG_FS_VERITY) += verity.o ext4-$(CONFIG_FS_ENCRYPTION) += crypto.o diff --git a/fs/ext4/ext4_extents.h b/fs/ext4/ext4_extents.h index c484125d963f..ebaf7cc42430 100644 --- a/fs/ext4/ext4_extents.h +++ b/fs/ext4/ext4_extents.h @@ -264,5 +264,17 @@ static inline void ext4_idx_store_pblock(struct ext4_extent_idx *ix, 0xffff); } +extern int __ext4_ext_dirty(const char *where, unsigned int line, + handle_t *handle, struct inode *inode, + struct ext4_ext_path *path); +extern int ext4_ext_zeroout(struct inode *inode, struct ext4_extent *ex); +#if IS_ENABLED(CONFIG_EXT4_KUNIT_TESTS) +extern int ext4_ext_space_root_idx_test(struct inode *inode, int check); +extern struct ext4_ext_path *ext4_split_convert_extents_test( + handle_t *handle, struct inode *inode, + struct ext4_map_blocks *map, + struct ext4_ext_path *path, + int flags, unsigned int *allocated); +#endif #endif /* _EXT4_EXTENTS */ diff --git a/fs/ext4/extents-test.c b/fs/ext4/extents-test.c index a6b3e6b592a5..5496b2c8e2cd 100644 --- a/fs/ext4/extents-test.c +++ b/fs/ext4/extents-test.c @@ -282,8 +282,8 @@ static int extents_kunit_init(struct kunit *test) eh->eh_depth = 0; eh->eh_entries = cpu_to_le16(1); eh->eh_magic = EXT4_EXT_MAGIC; - eh->eh_max = - cpu_to_le16(ext4_ext_space_root_idx(&k_ctx.k_ei->vfs_inode, 0)); + eh->eh_max = cpu_to_le16(ext4_ext_space_root_idx_test( + &k_ctx.k_ei->vfs_inode, 0)); eh->eh_generation = 0; /* @@ -386,8 +386,8 @@ static void test_split_convert(struct kunit *test) switch (param->type) { case TEST_SPLIT_CONVERT: - path = ext4_split_convert_extents(NULL, inode, &map, path, - param->split_flags, NULL); + path = ext4_split_convert_extents_test(NULL, inode, &map, + path, param->split_flags, NULL); break; case TEST_CREATE_BLOCKS: ext4_map_create_blocks_helper(test, inode, &map, param->split_flags); diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 8744d3845577..da78eb23aaa6 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -184,9 +184,9 @@ static int ext4_ext_get_access(handle_t *handle, struct inode *inode, * - ENOMEM * - EIO */ -static int __ext4_ext_dirty(const char *where, unsigned int line, - handle_t *handle, struct inode *inode, - struct ext4_ext_path *path) +int __ext4_ext_dirty(const char *where, unsigned int line, + handle_t *handle, struct inode *inode, + struct ext4_ext_path *path) { int err; @@ -3159,7 +3159,7 @@ static void ext4_zeroout_es(struct inode *inode, struct ext4_extent *ex) } /* FIXME!! we need to try to merge to left or right after zero-out */ -static int ext4_ext_zeroout(struct inode *inode, struct ext4_extent *ex) +int ext4_ext_zeroout(struct inode *inode, struct ext4_extent *ex) { ext4_fsblk_t ee_pblock; unsigned int ee_len; @@ -6257,6 +6257,33 @@ int ext4_ext_clear_bb(struct inode *inode) return 0; } -#ifdef CONFIG_EXT4_KUNIT_TESTS -#include "extents-test.c" +#if IS_ENABLED(CONFIG_EXT4_KUNIT_TESTS) +int ext4_ext_space_root_idx_test(struct inode *inode, int check) +{ + return ext4_ext_space_root_idx(inode, check); +} +EXPORT_SYMBOL_FOR_EXT4_TEST(ext4_ext_space_root_idx_test); + +struct ext4_ext_path *ext4_split_convert_extents_test(handle_t *handle, + struct inode *inode, struct ext4_map_blocks *map, + struct ext4_ext_path *path, int flags, + unsigned int *allocated) +{ + return ext4_split_convert_extents(handle, inode, map, path, + flags, allocated); +} +EXPORT_SYMBOL_FOR_EXT4_TEST(ext4_split_convert_extents_test); + +EXPORT_SYMBOL_FOR_EXT4_TEST(__ext4_ext_dirty); +EXPORT_SYMBOL_FOR_EXT4_TEST(ext4_ext_zeroout); +EXPORT_SYMBOL_FOR_EXT4_TEST(ext4_es_register_shrinker); +EXPORT_SYMBOL_FOR_EXT4_TEST(ext4_es_unregister_shrinker); +EXPORT_SYMBOL_FOR_EXT4_TEST(ext4_map_create_blocks); +EXPORT_SYMBOL_FOR_EXT4_TEST(ext4_es_init_tree); +EXPORT_SYMBOL_FOR_EXT4_TEST(ext4_es_lookup_extent); +EXPORT_SYMBOL_FOR_EXT4_TEST(ext4_es_insert_extent); +EXPORT_SYMBOL_FOR_EXT4_TEST(ext4_ext_insert_extent); +EXPORT_SYMBOL_FOR_EXT4_TEST(ext4_find_extent); +EXPORT_SYMBOL_FOR_EXT4_TEST(ext4_issue_zeroout); +EXPORT_SYMBOL_FOR_EXT4_TEST(ext4_map_query_blocks); #endif From 3822743dc20386d9897e999dbb990befa3a5b3f8 Mon Sep 17 00:00:00 2001 From: Helen Koike Date: Tue, 17 Mar 2026 11:23:10 -0300 Subject: [PATCH 283/401] ext4: reject mount if bigalloc with s_first_data_block != 0 bigalloc with s_first_data_block != 0 is not supported, reject mounting it. Signed-off-by: Helen Koike Suggested-by: Theodore Ts'o Reported-by: syzbot+b73703b873a33d8eb8f6@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=b73703b873a33d8eb8f6 Link: https://patch.msgid.link/20260317142325.135074-1-koike@igalia.com Signed-off-by: Theodore Ts'o Cc: stable@kernel.org --- fs/ext4/super.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 43f680c750ae..152c58fe8e01 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -3633,6 +3633,13 @@ int ext4_feature_set_ok(struct super_block *sb, int readonly) "extents feature\n"); return 0; } + if (ext4_has_feature_bigalloc(sb) && + le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block)) { + ext4_msg(sb, KERN_WARNING, + "bad geometry: bigalloc file system with non-zero " + "first_data_block\n"); + return 0; + } #if !IS_ENABLED(CONFIG_QUOTA) || !IS_ENABLED(CONFIG_QFMT_V2) if (!readonly && (ext4_has_feature_quota(sb) || From 496bb99b7e66f48b178126626f47e9ba79e2d0fa Mon Sep 17 00:00:00 2001 From: Zqiang Date: Thu, 19 Mar 2026 17:45:45 +0800 Subject: [PATCH 284/401] ext4: fix the might_sleep() warnings in kvfree() Use the kvfree() in the RCU read critical section can trigger the following warnings: EXT4-fs (vdb): unmounting filesystem cd983e5b-3c83-4f5a-a136-17b00eb9d018. WARNING: suspicious RCU usage ./include/linux/rcupdate.h:409 Illegal context switch in RCU read-side critical section! other info that might help us debug this: rcu_scheduler_active = 2, debug_locks = 1 Call Trace: dump_stack_lvl+0xbb/0xd0 dump_stack+0x14/0x20 lockdep_rcu_suspicious+0x15a/0x1b0 __might_resched+0x375/0x4d0 ? put_object.part.0+0x2c/0x50 __might_sleep+0x108/0x160 vfree+0x58/0x910 ? ext4_group_desc_free+0x27/0x270 kvfree+0x23/0x40 ext4_group_desc_free+0x111/0x270 ext4_put_super+0x3c8/0xd40 generic_shutdown_super+0x14c/0x4a0 ? __pfx_shrinker_free+0x10/0x10 kill_block_super+0x40/0x90 ext4_kill_sb+0x6d/0xb0 deactivate_locked_super+0xb4/0x180 deactivate_super+0x7e/0xa0 cleanup_mnt+0x296/0x3e0 __cleanup_mnt+0x16/0x20 task_work_run+0x157/0x250 ? __pfx_task_work_run+0x10/0x10 ? exit_to_user_mode_loop+0x6a/0x550 exit_to_user_mode_loop+0x102/0x550 do_syscall_64+0x44a/0x500 entry_SYSCALL_64_after_hwframe+0x77/0x7f BUG: sleeping function called from invalid context at mm/vmalloc.c:3441 in_atomic(): 1, irqs_disabled(): 0, non_block: 0, pid: 556, name: umount preempt_count: 1, expected: 0 CPU: 3 UID: 0 PID: 556 Comm: umount Call Trace: dump_stack_lvl+0xbb/0xd0 dump_stack+0x14/0x20 __might_resched+0x275/0x4d0 ? put_object.part.0+0x2c/0x50 __might_sleep+0x108/0x160 vfree+0x58/0x910 ? ext4_group_desc_free+0x27/0x270 kvfree+0x23/0x40 ext4_group_desc_free+0x111/0x270 ext4_put_super+0x3c8/0xd40 generic_shutdown_super+0x14c/0x4a0 ? __pfx_shrinker_free+0x10/0x10 kill_block_super+0x40/0x90 ext4_kill_sb+0x6d/0xb0 deactivate_locked_super+0xb4/0x180 deactivate_super+0x7e/0xa0 cleanup_mnt+0x296/0x3e0 __cleanup_mnt+0x16/0x20 task_work_run+0x157/0x250 ? __pfx_task_work_run+0x10/0x10 ? exit_to_user_mode_loop+0x6a/0x550 exit_to_user_mode_loop+0x102/0x550 do_syscall_64+0x44a/0x500 entry_SYSCALL_64_after_hwframe+0x77/0x7f The above scenarios occur in initialization failures and teardown paths, there are no parallel operations on the resources released by kvfree(), this commit therefore remove rcu_read_lock/unlock() and use rcu_access_pointer() instead of rcu_dereference() operations. Fixes: 7c990728b99e ("ext4: fix potential race between s_flex_groups online resizing and access") Fixes: df3da4ea5a0f ("ext4: fix potential race between s_group_info online resizing and access") Signed-off-by: Zqiang Reviewed-by: Baokun Li Link: https://patch.msgid.link/20260319094545.19291-1-qiang.zhang@linux.dev Signed-off-by: Theodore Ts'o Cc: stable@kernel.org --- fs/ext4/mballoc.c | 10 +++------- fs/ext4/super.c | 8 ++------ 2 files changed, 5 insertions(+), 13 deletions(-) diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 93d37f6cf9c3..bb6faebf9b6d 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -3584,9 +3584,7 @@ static int ext4_mb_init_backend(struct super_block *sb) rcu_read_unlock(); iput(sbi->s_buddy_cache); err_freesgi: - rcu_read_lock(); - kvfree(rcu_dereference(sbi->s_group_info)); - rcu_read_unlock(); + kvfree(rcu_access_pointer(sbi->s_group_info)); return -ENOMEM; } @@ -3901,7 +3899,8 @@ void ext4_mb_release(struct super_block *sb) WARN_ON_ONCE(!list_empty(&sbi->s_discard_list)); } - if (sbi->s_group_info) { + group_info = rcu_access_pointer(sbi->s_group_info); + if (group_info) { for (i = 0; i < ngroups; i++) { cond_resched(); grinfo = ext4_get_group_info(sb, i); @@ -3919,12 +3918,9 @@ void ext4_mb_release(struct super_block *sb) num_meta_group_infos = (ngroups + EXT4_DESC_PER_BLOCK(sb) - 1) >> EXT4_DESC_PER_BLOCK_BITS(sb); - rcu_read_lock(); - group_info = rcu_dereference(sbi->s_group_info); for (i = 0; i < num_meta_group_infos; i++) kfree(group_info[i]); kvfree(group_info); - rcu_read_unlock(); } ext4_mb_avg_fragment_size_destroy(sbi); ext4_mb_largest_free_orders_destroy(sbi); diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 152c58fe8e01..baa067eb8cf4 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -1254,12 +1254,10 @@ static void ext4_group_desc_free(struct ext4_sb_info *sbi) struct buffer_head **group_desc; int i; - rcu_read_lock(); - group_desc = rcu_dereference(sbi->s_group_desc); + group_desc = rcu_access_pointer(sbi->s_group_desc); for (i = 0; i < sbi->s_gdb_count; i++) brelse(group_desc[i]); kvfree(group_desc); - rcu_read_unlock(); } static void ext4_flex_groups_free(struct ext4_sb_info *sbi) @@ -1267,14 +1265,12 @@ static void ext4_flex_groups_free(struct ext4_sb_info *sbi) struct flex_groups **flex_groups; int i; - rcu_read_lock(); - flex_groups = rcu_dereference(sbi->s_flex_groups); + flex_groups = rcu_access_pointer(sbi->s_flex_groups); if (flex_groups) { for (i = 0; i < sbi->s_flex_groups_allocated; i++) kvfree(flex_groups[i]); kvfree(flex_groups); } - rcu_read_unlock(); } static void ext4_put_super(struct super_block *sb) From a01aee7cafc575bb82f5529e8734e7052f9b16ea Mon Sep 17 00:00:00 2001 From: Yang Yang Date: Thu, 26 Mar 2026 03:44:39 +0000 Subject: [PATCH 285/401] bridge: br_nd_send: linearize skb before parsing ND options br_nd_send() parses neighbour discovery options from ns->opt[] and assumes that these options are in the linear part of request. Its callers only guarantee that the ICMPv6 header and target address are available, so the option area can still be non-linear. Parsing ns->opt[] in that case can access data past the linear buffer. Linearize request before option parsing and derive ns from the linear network header. Fixes: ed842faeb2bd ("bridge: suppress nd pkts on BR_NEIGH_SUPPRESS ports") Reported-by: Yifan Wu Reported-by: Juefei Pu Tested-by: Ao Zhou Co-developed-by: Yuan Tan Signed-off-by: Yuan Tan Suggested-by: Xin Liu Signed-off-by: Yang Yang Reviewed-by: Ido Schimmel Acked-by: Nikolay Aleksandrov Link: https://patch.msgid.link/20260326034441.2037420-2-n05ec@lzu.edu.cn Signed-off-by: Jakub Kicinski --- net/bridge/br_arp_nd_proxy.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/net/bridge/br_arp_nd_proxy.c b/net/bridge/br_arp_nd_proxy.c index 1e2b51769eec..af3d1e33f50b 100644 --- a/net/bridge/br_arp_nd_proxy.c +++ b/net/bridge/br_arp_nd_proxy.c @@ -251,12 +251,12 @@ struct nd_msg *br_is_nd_neigh_msg(const struct sk_buff *skb, struct nd_msg *msg) static void br_nd_send(struct net_bridge *br, struct net_bridge_port *p, struct sk_buff *request, struct neighbour *n, - __be16 vlan_proto, u16 vlan_tci, struct nd_msg *ns) + __be16 vlan_proto, u16 vlan_tci) { struct net_device *dev = request->dev; struct net_bridge_vlan_group *vg; + struct nd_msg *na, *ns; struct sk_buff *reply; - struct nd_msg *na; struct ipv6hdr *pip6; int na_olen = 8; /* opt hdr + ETH_ALEN for target */ int ns_olen; @@ -264,7 +264,7 @@ static void br_nd_send(struct net_bridge *br, struct net_bridge_port *p, u8 *daddr; u16 pvid; - if (!dev) + if (!dev || skb_linearize(request)) return; len = LL_RESERVED_SPACE(dev) + sizeof(struct ipv6hdr) + @@ -281,6 +281,8 @@ static void br_nd_send(struct net_bridge *br, struct net_bridge_port *p, skb_set_mac_header(reply, 0); daddr = eth_hdr(request)->h_source; + ns = (struct nd_msg *)(skb_network_header(request) + + sizeof(struct ipv6hdr)); /* Do we need option processing ? */ ns_olen = request->len - (skb_network_offset(request) + @@ -472,9 +474,9 @@ void br_do_suppress_nd(struct sk_buff *skb, struct net_bridge *br, if (vid != 0) br_nd_send(br, p, skb, n, skb->vlan_proto, - skb_vlan_tag_get(skb), msg); + skb_vlan_tag_get(skb)); else - br_nd_send(br, p, skb, n, 0, 0, msg); + br_nd_send(br, p, skb, n, 0, 0); replied = true; } From 850837965af15707fd3142c1cf3c5bfaf022299b Mon Sep 17 00:00:00 2001 From: Yang Yang Date: Thu, 26 Mar 2026 03:44:40 +0000 Subject: [PATCH 286/401] bridge: br_nd_send: validate ND option lengths br_nd_send() walks ND options according to option-provided lengths. A malformed option can make the parser advance beyond the computed option span or use a too-short source LLADDR option payload. Validate option lengths against the remaining NS option area before advancing, and only read source LLADDR when the option is large enough for an Ethernet address. Fixes: ed842faeb2bd ("bridge: suppress nd pkts on BR_NEIGH_SUPPRESS ports") Cc: stable@vger.kernel.org Reported-by: Yifan Wu Reported-by: Juefei Pu Tested-by: Ao Zhou Co-developed-by: Yuan Tan Signed-off-by: Yuan Tan Suggested-by: Xin Liu Signed-off-by: Yang Yang Reviewed-by: Ido Schimmel Acked-by: Nikolay Aleksandrov Link: https://patch.msgid.link/20260326034441.2037420-3-n05ec@lzu.edu.cn Signed-off-by: Jakub Kicinski --- net/bridge/br_arp_nd_proxy.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/net/bridge/br_arp_nd_proxy.c b/net/bridge/br_arp_nd_proxy.c index af3d1e33f50b..6b5595868a39 100644 --- a/net/bridge/br_arp_nd_proxy.c +++ b/net/bridge/br_arp_nd_proxy.c @@ -288,12 +288,14 @@ static void br_nd_send(struct net_bridge *br, struct net_bridge_port *p, ns_olen = request->len - (skb_network_offset(request) + sizeof(struct ipv6hdr)) - sizeof(*ns); for (i = 0; i < ns_olen - 1; i += (ns->opt[i + 1] << 3)) { - if (!ns->opt[i + 1]) { + if (!ns->opt[i + 1] || i + (ns->opt[i + 1] << 3) > ns_olen) { kfree_skb(reply); return; } if (ns->opt[i] == ND_OPT_SOURCE_LL_ADDR) { - daddr = ns->opt + i + sizeof(struct nd_opt_hdr); + if ((ns->opt[i + 1] << 3) >= + sizeof(struct nd_opt_hdr) + ETH_ALEN) + daddr = ns->opt + i + sizeof(struct nd_opt_hdr); break; } } From afa9a05e6c4971bd5586f1b304e14d61fb3d9385 Mon Sep 17 00:00:00 2001 From: Yang Yang Date: Thu, 26 Mar 2026 03:44:41 +0000 Subject: [PATCH 287/401] vxlan: validate ND option lengths in vxlan_na_create vxlan_na_create() walks ND options according to option-provided lengths. A malformed option can make the parser advance beyond the computed option span or use a too-short source LLADDR option payload. Validate option lengths against the remaining NS option area before advancing, and only read source LLADDR when the option is large enough for an Ethernet address. Fixes: 4b29dba9c085 ("vxlan: fix nonfunctional neigh_reduce()") Cc: stable@vger.kernel.org Reported-by: Yifan Wu Reported-by: Juefei Pu Tested-by: Ao Zhou Co-developed-by: Yuan Tan Signed-off-by: Yuan Tan Suggested-by: Xin Liu Signed-off-by: Yang Yang Reviewed-by: Ido Schimmel Acked-by: Nikolay Aleksandrov Link: https://patch.msgid.link/20260326034441.2037420-4-n05ec@lzu.edu.cn Signed-off-by: Jakub Kicinski --- drivers/net/vxlan/vxlan_core.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/net/vxlan/vxlan_core.c b/drivers/net/vxlan/vxlan_core.c index 17c941aac32d..a94ac82a6136 100644 --- a/drivers/net/vxlan/vxlan_core.c +++ b/drivers/net/vxlan/vxlan_core.c @@ -1965,12 +1965,14 @@ static struct sk_buff *vxlan_na_create(struct sk_buff *request, ns_olen = request->len - skb_network_offset(request) - sizeof(struct ipv6hdr) - sizeof(*ns); for (i = 0; i < ns_olen-1; i += (ns->opt[i+1]<<3)) { - if (!ns->opt[i + 1]) { + if (!ns->opt[i + 1] || i + (ns->opt[i + 1] << 3) > ns_olen) { kfree_skb(reply); return NULL; } if (ns->opt[i] == ND_OPT_SOURCE_LL_ADDR) { - daddr = ns->opt + i + sizeof(struct nd_opt_hdr); + if ((ns->opt[i + 1] << 3) >= + sizeof(struct nd_opt_hdr) + ETH_ALEN) + daddr = ns->opt + i + sizeof(struct nd_opt_hdr); break; } } From d15e4b0a418537aafa56b2cb80d44add83e83697 Mon Sep 17 00:00:00 2001 From: Jiayuan Chen Date: Thu, 19 Mar 2026 20:03:35 +0800 Subject: [PATCH 288/401] ext4: fix use-after-free in update_super_work when racing with umount Commit b98535d09179 ("ext4: fix bug_on in start_this_handle during umount filesystem") moved ext4_unregister_sysfs() before flushing s_sb_upd_work to prevent new error work from being queued via /proc/fs/ext4/xx/mb_groups reads during unmount. However, this introduced a use-after-free because update_super_work calls ext4_notify_error_sysfs() -> sysfs_notify() which accesses the kobject's kernfs_node after it has been freed by kobject_del() in ext4_unregister_sysfs(): update_super_work ext4_put_super ----------------- -------------- ext4_unregister_sysfs(sb) kobject_del(&sbi->s_kobj) __kobject_del() sysfs_remove_dir() kobj->sd = NULL sysfs_put(sd) kernfs_put() // RCU free ext4_notify_error_sysfs(sbi) sysfs_notify(&sbi->s_kobj) kn = kobj->sd // stale pointer kernfs_get(kn) // UAF on freed kernfs_node ext4_journal_destroy() flush_work(&sbi->s_sb_upd_work) Instead of reordering the teardown sequence, fix this by making ext4_notify_error_sysfs() detect that sysfs has already been torn down by checking s_kobj.state_in_sysfs, and skipping the sysfs_notify() call in that case. A dedicated mutex (s_error_notify_mutex) serializes ext4_notify_error_sysfs() against kobject_del() in ext4_unregister_sysfs() to prevent TOCTOU races where the kobject could be deleted between the state_in_sysfs check and the sysfs_notify() call. Fixes: b98535d09179 ("ext4: fix bug_on in start_this_handle during umount filesystem") Cc: Jiayuan Chen Suggested-by: Jan Kara Signed-off-by: Jiayuan Chen Reviewed-by: Ritesh Harjani (IBM) Reviewed-by: Jan Kara Link: https://patch.msgid.link/20260319120336.157873-1-jiayuan.chen@linux.dev Signed-off-by: Theodore Ts'o Cc: stable@kernel.org --- fs/ext4/ext4.h | 1 + fs/ext4/super.c | 1 + fs/ext4/sysfs.c | 10 +++++++++- 3 files changed, 11 insertions(+), 1 deletion(-) diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index c579f68b3c11..7617e2d454ea 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -1570,6 +1570,7 @@ struct ext4_sb_info { struct proc_dir_entry *s_proc; struct kobject s_kobj; struct completion s_kobj_unregister; + struct mutex s_error_notify_mutex; /* protects sysfs_notify vs kobject_del */ struct super_block *s_sb; struct buffer_head *s_mmp_bh; diff --git a/fs/ext4/super.c b/fs/ext4/super.c index baa067eb8cf4..cb69a9b38b7c 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -5406,6 +5406,7 @@ static int __ext4_fill_super(struct fs_context *fc, struct super_block *sb) timer_setup(&sbi->s_err_report, print_daily_error_info, 0); spin_lock_init(&sbi->s_error_lock); + mutex_init(&sbi->s_error_notify_mutex); INIT_WORK(&sbi->s_sb_upd_work, update_super_work); err = ext4_group_desc_init(sb, es, logical_sb_block, &first_not_zeroed); diff --git a/fs/ext4/sysfs.c b/fs/ext4/sysfs.c index b87d7bdab06a..923b375e017f 100644 --- a/fs/ext4/sysfs.c +++ b/fs/ext4/sysfs.c @@ -597,7 +597,10 @@ static const struct kobj_type ext4_feat_ktype = { void ext4_notify_error_sysfs(struct ext4_sb_info *sbi) { - sysfs_notify(&sbi->s_kobj, NULL, "errors_count"); + mutex_lock(&sbi->s_error_notify_mutex); + if (sbi->s_kobj.state_in_sysfs) + sysfs_notify(&sbi->s_kobj, NULL, "errors_count"); + mutex_unlock(&sbi->s_error_notify_mutex); } static struct kobject *ext4_root; @@ -610,8 +613,10 @@ int ext4_register_sysfs(struct super_block *sb) int err; init_completion(&sbi->s_kobj_unregister); + mutex_lock(&sbi->s_error_notify_mutex); err = kobject_init_and_add(&sbi->s_kobj, &ext4_sb_ktype, ext4_root, "%s", sb->s_id); + mutex_unlock(&sbi->s_error_notify_mutex); if (err) { kobject_put(&sbi->s_kobj); wait_for_completion(&sbi->s_kobj_unregister); @@ -644,7 +649,10 @@ void ext4_unregister_sysfs(struct super_block *sb) if (sbi->s_proc) remove_proc_subtree(sb->s_id, ext4_proc_root); + + mutex_lock(&sbi->s_error_notify_mutex); kobject_del(&sbi->s_kobj); + mutex_unlock(&sbi->s_error_notify_mutex); } int __init ext4_init_sysfs(void) From 0c90eed1b95335eba4f546e6742a8e4503d79349 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Fri, 20 Mar 2026 10:04:29 +0100 Subject: [PATCH 289/401] ext4: fix deadlock on inode reallocation Currently there is a race in ext4 when reallocating freed inode resulting in a deadlock: Task1 Task2 ext4_evict_inode() handle = ext4_journal_start(); ... if (IS_SYNC(inode)) handle->h_sync = 1; ext4_free_inode() ext4_new_inode() handle = ext4_journal_start() finds the bit in inode bitmap already clear insert_inode_locked() waits for inode to be removed from the hash. ext4_journal_stop(handle) jbd2_journal_stop(handle) jbd2_log_wait_commit(journal, tid); - deadlocks waiting for transaction handle Task2 holds Fix the problem by removing inode from the hash already in ext4_clear_inode() by which time all IO for the inode is done so reuse is already fine but we are still before possibly blocking on transaction commit. Reported-by: "Lai, Yi" Link: https://lore.kernel.org/all/abNvb2PcrKj1FBeC@ly-workstation Fixes: 88ec797c4680 ("fs: make insert_inode_locked() wait for inode destruction") CC: stable@vger.kernel.org Signed-off-by: Jan Kara Link: https://patch.msgid.link/20260320090428.24899-2-jack@suse.cz Signed-off-by: Theodore Ts'o Cc: stable@kernel.org --- fs/ext4/super.c | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/fs/ext4/super.c b/fs/ext4/super.c index cb69a9b38b7c..a34efb44e73d 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -1523,6 +1523,27 @@ void ext4_clear_inode(struct inode *inode) invalidate_inode_buffers(inode); clear_inode(inode); ext4_discard_preallocations(inode); + /* + * We must remove the inode from the hash before ext4_free_inode() + * clears the bit in inode bitmap as otherwise another process reusing + * the inode will block in insert_inode_hash() waiting for inode + * eviction to complete while holding transaction handle open, but + * ext4_evict_inode() still running for that inode could block waiting + * for transaction commit if the inode is marked as IS_SYNC => deadlock. + * + * Removing the inode from the hash here is safe. There are two cases + * to consider: + * 1) The inode still has references to it (i_nlink > 0). In that case + * we are keeping the inode and once we remove the inode from the hash, + * iget() can create the new inode structure for the same inode number + * and we are fine with that as all IO on behalf of the inode is + * finished. + * 2) We are deleting the inode (i_nlink == 0). In that case inode + * number cannot be reused until ext4_free_inode() clears the bit in + * the inode bitmap, at which point all IO is done and reuse is fine + * again. + */ + remove_inode_hash(inode); ext4_es_remove_extent(inode, 0, EXT_MAX_BLOCKS); dquot_drop(inode); if (EXT4_I(inode)->jinode) { From ec0a7500d8eace5b4f305fa0c594dd148f0e8d29 Mon Sep 17 00:00:00 2001 From: Baokun Li Date: Mon, 23 Mar 2026 14:08:36 +0800 Subject: [PATCH 290/401] ext4: fix iloc.bh leak in ext4_fc_replay_inode() error paths During code review, Joseph found that ext4_fc_replay_inode() calls ext4_get_fc_inode_loc() to get the inode location, which holds a reference to iloc.bh that must be released via brelse(). However, several error paths jump to the 'out' label without releasing iloc.bh: - ext4_handle_dirty_metadata() failure - sync_dirty_buffer() failure - ext4_mark_inode_used() failure - ext4_iget() failure Fix this by introducing an 'out_brelse' label placed just before the existing 'out' label to ensure iloc.bh is always released. Additionally, make ext4_fc_replay_inode() propagate errors properly instead of always returning 0. Reported-by: Joseph Qi Fixes: 8016e29f4362 ("ext4: fast commit recovery path") Signed-off-by: Baokun Li Reviewed-by: Zhang Yi Reviewed-by: Jan Kara Link: https://patch.msgid.link/20260323060836.3452660-1-libaokun@linux.alibaba.com Signed-off-by: Theodore Ts'o Cc: stable@kernel.org --- fs/ext4/fast_commit.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/fs/ext4/fast_commit.c b/fs/ext4/fast_commit.c index 6e949c21842d..2f0057e04934 100644 --- a/fs/ext4/fast_commit.c +++ b/fs/ext4/fast_commit.c @@ -1613,19 +1613,21 @@ static int ext4_fc_replay_inode(struct super_block *sb, /* Immediately update the inode on disk. */ ret = ext4_handle_dirty_metadata(NULL, NULL, iloc.bh); if (ret) - goto out; + goto out_brelse; ret = sync_dirty_buffer(iloc.bh); if (ret) - goto out; + goto out_brelse; ret = ext4_mark_inode_used(sb, ino); if (ret) - goto out; + goto out_brelse; /* Given that we just wrote the inode on disk, this SHOULD succeed. */ inode = ext4_iget(sb, ino, EXT4_IGET_NORMAL); if (IS_ERR(inode)) { ext4_debug("Inode not found."); - return -EFSCORRUPTED; + inode = NULL; + ret = -EFSCORRUPTED; + goto out_brelse; } /* @@ -1642,13 +1644,14 @@ static int ext4_fc_replay_inode(struct super_block *sb, ext4_inode_csum_set(inode, ext4_raw_inode(&iloc), EXT4_I(inode)); ret = ext4_handle_dirty_metadata(NULL, NULL, iloc.bh); sync_dirty_buffer(iloc.bh); +out_brelse: brelse(iloc.bh); out: iput(inode); if (!ret) blkdev_issue_flush(sb->s_bdev); - return 0; + return ret; } /* From 3ceda17325fc2600f66fd85b526592bc8a9dfb9d Mon Sep 17 00:00:00 2001 From: hongao Date: Tue, 24 Mar 2026 09:58:15 +0800 Subject: [PATCH 291/401] ext4: skip split extent recovery on corruption ext4_split_extent_at() retries after ext4_ext_insert_extent() fails by refinding the original extent and restoring its length. That recovery is only safe for transient resource failures such as -ENOSPC, -EDQUOT, and -ENOMEM. When ext4_ext_insert_extent() fails because the extent tree is already corrupted, ext4_find_extent() can return a leaf path without p_ext. ext4_split_extent_at() then dereferences path[depth].p_ext while trying to fix up the original extent length, causing a NULL pointer dereference while handling a pre-existing filesystem corruption. Do not enter the recovery path for corruption errors, and validate p_ext after refinding the extent before touching it. This keeps the recovery path limited to cases it can actually repair and turns the syzbot-triggered crash into a proper corruption report. Fixes: 716b9c23b862 ("ext4: refactor split and convert extents") Reported-by: syzbot+1ffa5d865557e51cb604@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=1ffa5d865557e51cb604 Reviewed-by: Jan Kara Reviewed-by: Zhang Yi Signed-off-by: hongao Link: https://patch.msgid.link/EF77870F23FF9C90+20260324015815.35248-1-hongao@uniontech.com Signed-off-by: Theodore Ts'o Cc: stable@kernel.org --- fs/ext4/extents.c | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index da78eb23aaa6..8cce1479be6d 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -3254,6 +3254,9 @@ static struct ext4_ext_path *ext4_split_extent_at(handle_t *handle, insert_err = PTR_ERR(path); err = 0; + if (insert_err != -ENOSPC && insert_err != -EDQUOT && + insert_err != -ENOMEM) + goto out_path; /* * Get a new path to try to zeroout or fix the extent length. @@ -3270,13 +3273,20 @@ static struct ext4_ext_path *ext4_split_extent_at(handle_t *handle, goto out_path; } + depth = ext_depth(inode); + ex = path[depth].p_ext; + if (!ex) { + EXT4_ERROR_INODE(inode, + "bad extent address lblock: %lu, depth: %d pblock %llu", + (unsigned long)ee_block, depth, path[depth].p_block); + err = -EFSCORRUPTED; + goto out; + } + err = ext4_ext_get_access(handle, inode, path + depth); if (err) goto out; - depth = ext_depth(inode); - ex = path[depth].p_ext; - fix_extent_len: ex->ee_len = orig_ex.ee_len; err = ext4_ext_dirty(handle, inode, path + path->p_depth); From bb81702370fad22c06ca12b6e1648754dbc37e0f Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Thu, 26 Mar 2026 00:58:34 -0400 Subject: [PATCH 292/401] ext4: handle wraparound when searching for blocks for indirect mapped blocks Commit 4865c768b563 ("ext4: always allocate blocks only from groups inode can use") restricts what blocks will be allocated for indirect block based files to block numbers that fit within 32-bit block numbers. However, when using a review bot running on the latest Gemini LLM to check this commit when backporting into an LTS based kernel, it raised this concern: If ac->ac_g_ex.fe_group is >= ngroups (for instance, if the goal group was populated via stream allocation from s_mb_last_groups), then start will be >= ngroups. Does this allow allocating blocks beyond the 32-bit limit for indirect block mapped files? The commit message mentions that ext4_mb_scan_groups_linear() takes care to not select unsupported groups. However, its loop uses group = *start, and the very first iteration will call ext4_mb_scan_group() with this unsupported group because next_linear_group() is only called at the end of the iteration. After reviewing the code paths involved and considering the LLM review, I determined that this can happen when there is a file system where some files/directories are extent-mapped and others are indirect-block mapped. To address this, add a safety clamp in ext4_mb_scan_groups(). Fixes: 4865c768b563 ("ext4: always allocate blocks only from groups inode can use") Cc: Jan Kara Reviewed-by: Baokun Li Reviewed-by: Jan Kara Signed-off-by: Theodore Ts'o Link: https://patch.msgid.link/20260326045834.1175822-1-tytso@mit.edu Signed-off-by: Theodore Ts'o Cc: stable@kernel.org --- fs/ext4/mballoc.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index bb6faebf9b6d..cb2bd87c355c 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -1199,6 +1199,8 @@ static int ext4_mb_scan_groups(struct ext4_allocation_context *ac) /* searching for the right group start from the goal value specified */ start = ac->ac_g_ex.fe_group; + if (start >= ngroups) + start = 0; ac->ac_prefetch_grp = start; ac->ac_prefetch_nr = 0; From 9ee29d20aab228adfb02ca93f87fb53c56c2f3af Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Fri, 27 Mar 2026 02:13:15 -0400 Subject: [PATCH 293/401] ext4: always drain queued discard work in ext4_mb_release() While reviewing recent ext4 patch[1], Sashiko raised the following concern[2]: > If the filesystem is initially mounted with the discard option, > deleting files will populate sbi->s_discard_list and queue > s_discard_work. If it is then remounted with nodiscard, the > EXT4_MOUNT_DISCARD flag is cleared, but the pending s_discard_work is > neither cancelled nor flushed. [1] https://lore.kernel.org/r/20260319094545.19291-1-qiang.zhang@linux.dev/ [2] https://sashiko.dev/#/patchset/20260319094545.19291-1-qiang.zhang%40linux.dev The concern was valid, but it had nothing to do with the patch[1]. One of the problems with Sashiko in its current (early) form is that it will detect pre-existing issues and report it as a problem with the patch that it is reviewing. In practice, it would be hard to hit deliberately (unless you are a malicious syzkaller fuzzer), since it would involve mounting the file system with -o discard, and then deleting a large number of files, remounting the file system with -o nodiscard, and then immediately unmounting the file system before the queued discard work has a change to drain on its own. Fix it because it's a real bug, and to avoid Sashiko from raising this concern when analyzing future patches to mballoc.c. Signed-off-by: Theodore Ts'o Fixes: 55cdd0af2bc5 ("ext4: get discard out of jbd2 commit kthread contex") Cc: stable@kernel.org --- fs/ext4/mballoc.c | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index cb2bd87c355c..bb58eafb87bc 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -3893,13 +3893,11 @@ void ext4_mb_release(struct super_block *sb) struct kmem_cache *cachep = get_groupinfo_cache(sb->s_blocksize_bits); int count; - if (test_opt(sb, DISCARD)) { - /* - * wait the discard work to drain all of ext4_free_data - */ - flush_work(&sbi->s_discard_work); - WARN_ON_ONCE(!list_empty(&sbi->s_discard_list)); - } + /* + * wait the discard work to drain all of ext4_free_data + */ + flush_work(&sbi->s_discard_work); + WARN_ON_ONCE(!list_empty(&sbi->s_discard_list)); group_info = rcu_access_pointer(sbi->s_group_info); if (group_info) { From 4576100b8cd03118267513cafacde164b498b322 Mon Sep 17 00:00:00 2001 From: Xiang Mei Date: Thu, 26 Mar 2026 13:43:09 -0700 Subject: [PATCH 294/401] net/sched: sch_hfsc: fix divide-by-zero in rtsc_min() m2sm() converts a u32 slope to a u64 scaled value. For large inputs (e.g. m1=4000000000), the result can reach 2^32. rtsc_min() stores the difference of two such u64 values in a u32 variable `dsm` and uses it as a divisor. When the difference is exactly 2^32 the truncation yields zero, causing a divide-by-zero oops in the concave-curve intersection path: Oops: divide error: 0000 RIP: 0010:rtsc_min (net/sched/sch_hfsc.c:601) Call Trace: init_ed (net/sched/sch_hfsc.c:629) hfsc_enqueue (net/sched/sch_hfsc.c:1569) [...] Widen `dsm` to u64 and replace do_div() with div64_u64() so the full difference is preserved. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Reported-by: Weiming Shi Signed-off-by: Xiang Mei Acked-by: Jamal Hadi Salim Link: https://patch.msgid.link/20260326204310.1549327-1-xmei5@asu.edu Signed-off-by: Jakub Kicinski --- net/sched/sch_hfsc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c index b5657ffbbf84..83b2ca2e37fc 100644 --- a/net/sched/sch_hfsc.c +++ b/net/sched/sch_hfsc.c @@ -555,7 +555,7 @@ static void rtsc_min(struct runtime_sc *rtsc, struct internal_sc *isc, u64 x, u64 y) { u64 y1, y2, dx, dy; - u32 dsm; + u64 dsm; if (isc->sm1 <= isc->sm2) { /* service curve is convex */ @@ -598,7 +598,7 @@ rtsc_min(struct runtime_sc *rtsc, struct internal_sc *isc, u64 x, u64 y) */ dx = (y1 - y) << SM_SHIFT; dsm = isc->sm1 - isc->sm2; - do_div(dx, dsm); + dx = div64_u64(dx, dsm); /* * check if (x, y1) belongs to the 1st segment of rtsc. * if so, add the offset. From 5d17af9eb2dd3de6846ed344c883de7812e6cc09 Mon Sep 17 00:00:00 2001 From: Xiang Mei Date: Thu, 26 Mar 2026 13:43:10 -0700 Subject: [PATCH 295/401] selftests/tc-testing: add test for HFSC divide-by-zero in rtsc_min() Add a regression test for the divide-by-zero in rtsc_min() triggered when m2sm() converts a large m1 value (e.g. 32gbit) to a u64 scaled slope reaching 2^32. rtsc_min() stores the difference of two such u64 values (sm1 - sm2) in a u32 variable `dsm`, truncating 2^32 to zero and causing a divide-by-zero oops in the concave-curve intersection path. The test configures an HFSC class with m1=32gbit d=1ms m2=0bit, sends a packet to activate the class, waits for it to drain and go idle, then sends another packet to trigger reactivation through rtsc_min(). Signed-off-by: Xiang Mei Acked-by: Jamal Hadi Salim Reviewed-by: Victor Nogueira Link: https://patch.msgid.link/20260326204310.1549327-2-xmei5@asu.edu Signed-off-by: Jakub Kicinski --- .../tc-testing/tc-tests/infra/qdiscs.json | 25 +++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json b/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json index 6a39640aa2a8..1e5efb2a31eb 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json +++ b/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json @@ -1111,5 +1111,30 @@ "teardown": [ "$TC qdisc del dev $DUMMY root handle 1:" ] + }, + { + "id": "a3d7", + "name": "HFSC with large m1 - no divide-by-zero on class reactivation", + "category": [ + "qdisc", + "hfsc" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "$TC qdisc replace dev $DUMMY root handle 1: hfsc default 1", + "$TC class replace dev $DUMMY parent 1: classid 1:1 hfsc rt m1 32gbit d 1ms m2 0bit ls m1 32gbit d 1ms m2 0bit", + "ping -I$DUMMY -f -c1 -s64 -W1 10.10.10.1 || true", + "sleep 1" + ], + "cmdUnderTest": "ping -I$DUMMY -f -c1 -s64 -W1 10.10.10.1 || true", + "expExitCode": "0", + "verifyCmd": "$TC qdisc show dev $DUMMY", + "matchPattern": "qdisc hfsc 1: root", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DUMMY handle 1: root" + ] } ] From 4c5e7f0fcd592801c9cc18f29f80fbee84eb8669 Mon Sep 17 00:00:00 2001 From: Jinjiang Tu Date: Thu, 19 Mar 2026 09:25:41 +0800 Subject: [PATCH 296/401] mm/huge_memory: fix folio isn't locked in softleaf_to_folio() On arm64 server, we found folio that get from migration entry isn't locked in softleaf_to_folio(). This issue triggers when mTHP splitting and zap_nonpresent_ptes() races, and the root cause is lack of memory barrier in softleaf_to_folio(). The race is as follows: CPU0 CPU1 deferred_split_scan() zap_nonpresent_ptes() lock folio split_folio() unmap_folio() change ptes to migration entries __split_folio_to_order() softleaf_to_folio() set flags(including PG_locked) for tail pages folio = pfn_folio(softleaf_to_pfn(entry)) smp_wmb() VM_WARN_ON_ONCE(!folio_test_locked(folio)) prep_compound_page() for tail pages In __split_folio_to_order(), smp_wmb() guarantees page flags of tail pages are visible before the tail page becomes non-compound. smp_wmb() should be paired with smp_rmb() in softleaf_to_folio(), which is missed. As a result, if zap_nonpresent_ptes() accesses migration entry that stores tail pfn, softleaf_to_folio() may see the updated compound_head of tail page before page->flags. This issue will trigger VM_WARN_ON_ONCE() in pfn_swap_entry_folio() because of the race between folio split and zap_nonpresent_ptes() leading to a folio incorrectly undergoing modification without a folio lock being held. This is a BUG_ON() before commit 93976a20345b ("mm: eliminate further swapops predicates"), which in merged in v6.19-rc1. To fix it, add missing smp_rmb() if the softleaf entry is migration entry in softleaf_to_folio() and softleaf_to_page(). [tujinjiang@huawei.com: update function name and comments] Link: https://lkml.kernel.org/r/20260321075214.3305564-1-tujinjiang@huawei.com Link: https://lkml.kernel.org/r/20260319012541.4158561-1-tujinjiang@huawei.com Fixes: e9b61f19858a ("thp: reintroduce split_huge_page()") Signed-off-by: Jinjiang Tu Acked-by: David Hildenbrand (Arm) Reviewed-by: Lorenzo Stoakes (Oracle) Cc: Barry Song Cc: Kefeng Wang Cc: Liam Howlett Cc: Michal Hocko Cc: Mike Rapoport Cc: Nanyong Sun Cc: Ryan Roberts Cc: Suren Baghdasaryan Cc: Vlastimil Babka Cc: Signed-off-by: Andrew Morton --- include/linux/leafops.h | 32 +++++++++++++++++++++----------- 1 file changed, 21 insertions(+), 11 deletions(-) diff --git a/include/linux/leafops.h b/include/linux/leafops.h index a9ff94b744f2..05673d3529e7 100644 --- a/include/linux/leafops.h +++ b/include/linux/leafops.h @@ -363,6 +363,23 @@ static inline unsigned long softleaf_to_pfn(softleaf_t entry) return swp_offset(entry) & SWP_PFN_MASK; } +static inline void softleaf_migration_sync(softleaf_t entry, + struct folio *folio) +{ + /* + * Ensure we do not race with split, which might alter tail pages into new + * folios and thus result in observing an unlocked folio. + * This matches the write barrier in __split_folio_to_order(). + */ + smp_rmb(); + + /* + * Any use of migration entries may only occur while the + * corresponding page is locked + */ + VM_WARN_ON_ONCE(!folio_test_locked(folio)); +} + /** * softleaf_to_page() - Obtains struct page for PFN encoded within leaf entry. * @entry: Leaf entry, softleaf_has_pfn(@entry) must return true. @@ -374,11 +391,8 @@ static inline struct page *softleaf_to_page(softleaf_t entry) struct page *page = pfn_to_page(softleaf_to_pfn(entry)); VM_WARN_ON_ONCE(!softleaf_has_pfn(entry)); - /* - * Any use of migration entries may only occur while the - * corresponding page is locked - */ - VM_WARN_ON_ONCE(softleaf_is_migration(entry) && !PageLocked(page)); + if (softleaf_is_migration(entry)) + softleaf_migration_sync(entry, page_folio(page)); return page; } @@ -394,12 +408,8 @@ static inline struct folio *softleaf_to_folio(softleaf_t entry) struct folio *folio = pfn_folio(softleaf_to_pfn(entry)); VM_WARN_ON_ONCE(!softleaf_has_pfn(entry)); - /* - * Any use of migration entries may only occur while the - * corresponding folio is locked. - */ - VM_WARN_ON_ONCE(softleaf_is_migration(entry) && - !folio_test_locked(folio)); + if (softleaf_is_migration(entry)) + softleaf_migration_sync(entry, folio); return folio; } From 26d3dca201f3662e51d25022cfce0f642a150a90 Mon Sep 17 00:00:00 2001 From: "Harry Yoo (Oracle)" Date: Fri, 20 Mar 2026 21:59:25 +0900 Subject: [PATCH 297/401] MAINTAINERS, mailmap: update email address for Harry Yoo Update my email address to harry@kernel.org. Link: https://lkml.kernel.org/r/20260320125925.2259998-1-harry@kernel.org Signed-off-by: Harry Yoo (Oracle) Signed-off-by: Andrew Morton --- .mailmap | 1 + MAINTAINERS | 4 ++-- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/.mailmap b/.mailmap index 7d14504daf24..2d04aeba68b4 100644 --- a/.mailmap +++ b/.mailmap @@ -316,6 +316,7 @@ Hans Verkuil Hans Verkuil Hao Ge Harry Yoo <42.hyeyoo@gmail.com> +Harry Yoo Heiko Carstens Heiko Carstens Heiko Stuebner diff --git a/MAINTAINERS b/MAINTAINERS index 7d10988cbc62..915f074c3577 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -16877,7 +16877,7 @@ M: Lorenzo Stoakes R: Rik van Riel R: Liam R. Howlett R: Vlastimil Babka -R: Harry Yoo +R: Harry Yoo R: Jann Horn L: linux-mm@kvack.org S: Maintained @@ -24343,7 +24343,7 @@ F: drivers/nvmem/layouts/sl28vpd.c SLAB ALLOCATOR M: Vlastimil Babka -M: Harry Yoo +M: Harry Yoo M: Andrew Morton R: Hao Li R: Christoph Lameter From 9e0d0ddfbc0e3491da7e2db73faa08d8d4f322b2 Mon Sep 17 00:00:00 2001 From: Alexandre Ghiti Date: Fri, 20 Mar 2026 06:05:59 +0100 Subject: [PATCH 298/401] mm/swap: fix swap cache memcg accounting The swap readahead path was recently refactored and while doing this, the order between the charging of the folio in the memcg and the addition of the folio in the swap cache was inverted. Since the accounting of the folio is done while adding the folio to the swap cache and the folio is not charged in the memcg yet, the accounting is then done at the node level, which is wrong. Fix this by charging the folio in the memcg before adding it to the swap cache. Link: https://lkml.kernel.org/r/20260320050601.1833108-1-alex@ghiti.fr Fixes: 2732acda82c9 ("mm, swap: use swap cache as the swap in synchronize layer") Signed-off-by: Alexandre Ghiti Acked-by: Kairui Song Acked-by: Johannes Weiner Reviewed-by: Nhat Pham Acked-by: Chris Li Cc: Alexandre Ghiti Cc: Baoquan He Cc: Barry Song Cc: Kemeng Shi Cc: Signed-off-by: Andrew Morton --- mm/swap_state.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/mm/swap_state.c b/mm/swap_state.c index 6d0eef7470be..48aff2c917c0 100644 --- a/mm/swap_state.c +++ b/mm/swap_state.c @@ -494,6 +494,10 @@ static struct folio *__swap_cache_prepare_and_add(swp_entry_t entry, __folio_set_locked(folio); __folio_set_swapbacked(folio); + + if (!charged && mem_cgroup_swapin_charge_folio(folio, NULL, gfp, entry)) + goto failed; + for (;;) { ret = swap_cache_add_folio(folio, entry, &shadow); if (!ret) @@ -514,11 +518,6 @@ static struct folio *__swap_cache_prepare_and_add(swp_entry_t entry, goto failed; } - if (!charged && mem_cgroup_swapin_charge_folio(folio, NULL, gfp, entry)) { - swap_cache_del_folio(folio); - goto failed; - } - memcg1_swapin(entry, folio_nr_pages(folio)); if (shadow) workingset_refault(folio, shadow); From 7fe000eb32904758a85e62f6ea9483f89d5dabfc Mon Sep 17 00:00:00 2001 From: Josh Law Date: Sat, 21 Mar 2026 10:54:24 -0700 Subject: [PATCH 299/401] mm/damon/sysfs: fix param_ctx leak on damon_sysfs_new_test_ctx() failure Patch series "mm/damon/sysfs: fix memory leak and NULL dereference issues", v4. DAMON_SYSFS can leak memory under allocation failure, and do NULL pointer dereference when a privileged user make wrong sequences of control. Fix those. This patch (of 3): When damon_sysfs_new_test_ctx() fails in damon_sysfs_commit_input(), param_ctx is leaked because the early return skips the cleanup at the out label. Destroy param_ctx before returning. Link: https://lkml.kernel.org/r/20260321175427.86000-1-sj@kernel.org Link: https://lkml.kernel.org/r/20260321175427.86000-2-sj@kernel.org Fixes: f0c5118ebb0e ("mm/damon/sysfs: catch commit test ctx alloc failure") Signed-off-by: Josh Law Reviewed-by: SeongJae Park Signed-off-by: SeongJae Park Cc: [6.18+] Signed-off-by: Andrew Morton --- mm/damon/sysfs.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/mm/damon/sysfs.c b/mm/damon/sysfs.c index 576d1ddd736b..b573b9d60784 100644 --- a/mm/damon/sysfs.c +++ b/mm/damon/sysfs.c @@ -1524,8 +1524,10 @@ static int damon_sysfs_commit_input(void *data) if (IS_ERR(param_ctx)) return PTR_ERR(param_ctx); test_ctx = damon_sysfs_new_test_ctx(kdamond->damon_ctx); - if (!test_ctx) + if (!test_ctx) { + damon_destroy_ctx(param_ctx); return -ENOMEM; + } err = damon_commit_ctx(test_ctx, param_ctx); if (err) goto out; From 1bfe9fb5ed2667fb075682408b776b5273162615 Mon Sep 17 00:00:00 2001 From: Josh Law Date: Sat, 21 Mar 2026 10:54:25 -0700 Subject: [PATCH 300/401] mm/damon/sysfs: check contexts->nr before accessing contexts_arr[0] Multiple sysfs command paths dereference contexts_arr[0] without first verifying that kdamond->contexts->nr == 1. A user can set nr_contexts to 0 via sysfs while DAMON is running, causing NULL pointer dereferences. In more detail, the issue can be triggered by privileged users like below. First, start DAMON and make contexts directory empty (kdamond->contexts->nr == 0). # damo start # cd /sys/kernel/mm/damon/admin/kdamonds/0 # echo 0 > contexts/nr_contexts Then, each of below commands will cause the NULL pointer dereference. # echo update_schemes_stats > state # echo update_schemes_tried_regions > state # echo update_schemes_tried_bytes > state # echo update_schemes_effective_quotas > state # echo update_tuned_intervals > state Guard all commands (except OFF) at the entry point of damon_sysfs_handle_cmd(). Link: https://lkml.kernel.org/r/20260321175427.86000-3-sj@kernel.org Fixes: 0ac32b8affb5 ("mm/damon/sysfs: support DAMOS stats") Signed-off-by: Josh Law Reviewed-by: SeongJae Park Signed-off-by: SeongJae Park Cc: [5.18+] Signed-off-by: Andrew Morton --- mm/damon/sysfs.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/mm/damon/sysfs.c b/mm/damon/sysfs.c index b573b9d60784..ddc30586c0e6 100644 --- a/mm/damon/sysfs.c +++ b/mm/damon/sysfs.c @@ -1749,6 +1749,9 @@ static int damon_sysfs_update_schemes_tried_regions( static int damon_sysfs_handle_cmd(enum damon_sysfs_cmd cmd, struct damon_sysfs_kdamond *kdamond) { + if (cmd != DAMON_SYSFS_CMD_OFF && kdamond->contexts->nr != 1) + return -EINVAL; + switch (cmd) { case DAMON_SYSFS_CMD_ON: return damon_sysfs_turn_damon_on(kdamond); From 6557004a8b59c7701e695f02be03c7e20ed1cc15 Mon Sep 17 00:00:00 2001 From: Josh Law Date: Sat, 21 Mar 2026 10:54:26 -0700 Subject: [PATCH 301/401] mm/damon/sysfs: check contexts->nr in repeat_call_fn damon_sysfs_repeat_call_fn() calls damon_sysfs_upd_tuned_intervals(), damon_sysfs_upd_schemes_stats(), and damon_sysfs_upd_schemes_effective_quotas() without checking contexts->nr. If nr_contexts is set to 0 via sysfs while DAMON is running, these functions dereference contexts_arr[0] and cause a NULL pointer dereference. Add the missing check. For example, the issue can be reproduced using DAMON sysfs interface and DAMON user-space tool (damo) [1] like below. $ sudo damo start --refresh_interval 1s $ echo 0 | sudo tee \ /sys/kernel/mm/damon/admin/kdamonds/0/contexts/nr_contexts Link: https://patch.msgid.link/20260320163559.178101-3-objecting@objecting.org Link: https://lkml.kernel.org/r/20260321175427.86000-4-sj@kernel.org Link: https://github.com/damonitor/damo [1] Fixes: d809a7c64ba8 ("mm/damon/sysfs: implement refresh_ms file internal work") Signed-off-by: Josh Law Reviewed-by: SeongJae Park Signed-off-by: SeongJae Park Cc: [6.17+] Signed-off-by: Andrew Morton --- mm/damon/sysfs.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/mm/damon/sysfs.c b/mm/damon/sysfs.c index ddc30586c0e6..6a44a2f3d8fc 100644 --- a/mm/damon/sysfs.c +++ b/mm/damon/sysfs.c @@ -1620,9 +1620,12 @@ static int damon_sysfs_repeat_call_fn(void *data) if (!mutex_trylock(&damon_sysfs_lock)) return 0; + if (sysfs_kdamond->contexts->nr != 1) + goto out; damon_sysfs_upd_tuned_intervals(sysfs_kdamond); damon_sysfs_upd_schemes_stats(sysfs_kdamond); damon_sysfs_upd_schemes_effective_quotas(sysfs_kdamond); +out: mutex_unlock(&damon_sysfs_lock); return 0; } From ffef67b93aa352b34e6aeba3d52c19a63885409a Mon Sep 17 00:00:00 2001 From: "David Hildenbrand (Arm)" Date: Mon, 23 Mar 2026 21:20:18 +0100 Subject: [PATCH 302/401] mm/memory: fix PMD/PUD checks in follow_pfnmap_start() follow_pfnmap_start() suffers from two problems: (1) We are not re-fetching the pmd/pud after taking the PTL Therefore, we are not properly stabilizing what the lock actually protects. If there is concurrent zapping, we would indicate to the caller that we found an entry, however, that entry might already have been invalidated, or contain a different PFN after taking the lock. Properly use pmdp_get() / pudp_get() after taking the lock. (2) pmd_leaf() / pud_leaf() are not well defined on non-present entries pmd_leaf()/pud_leaf() could wrongly trigger on non-present entries. There is no real guarantee that pmd_leaf()/pud_leaf() returns something reasonable on non-present entries. Most architectures indeed either perform a present check or make it work by smart use of flags. However, for example loongarch checks the _PAGE_HUGE flag in pmd_leaf(), and always sets the _PAGE_HUGE flag in __swp_entry_to_pmd(). Whereby pmd_trans_huge() explicitly checks pmd_present(), pmd_leaf() does not do that. Let's check pmd_present()/pud_present() before assuming "the is a present PMD leaf" when spotting pmd_leaf()/pud_leaf(), like other page table handling code that traverses user page tables does. Given that non-present PMD entries are likely rare in VM_IO|VM_PFNMAP, (1) is likely more relevant than (2). It is questionable how often (1) would actually trigger, but let's CC stable to be sure. This was found by code inspection. Link: https://lkml.kernel.org/r/20260323-follow_pfnmap_fix-v1-1-5b0ec10872b3@kernel.org Fixes: 6da8e9634bb7 ("mm: new follow_pfnmap API") Signed-off-by: David Hildenbrand (Arm) Acked-by: Mike Rapoport (Microsoft) Reviewed-by: Lorenzo Stoakes (Oracle) Cc: Liam Howlett Cc: Michal Hocko Cc: Peter Xu Cc: Suren Baghdasaryan Cc: Vlastimil Babka Cc: Signed-off-by: Andrew Morton --- mm/memory.c | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/mm/memory.c b/mm/memory.c index 2f815a34d924..c65e82c86fed 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -6815,11 +6815,16 @@ int follow_pfnmap_start(struct follow_pfnmap_args *args) pudp = pud_offset(p4dp, address); pud = pudp_get(pudp); - if (pud_none(pud)) + if (!pud_present(pud)) goto out; if (pud_leaf(pud)) { lock = pud_lock(mm, pudp); - if (!unlikely(pud_leaf(pud))) { + pud = pudp_get(pudp); + + if (unlikely(!pud_present(pud))) { + spin_unlock(lock); + goto out; + } else if (unlikely(!pud_leaf(pud))) { spin_unlock(lock); goto retry; } @@ -6831,9 +6836,16 @@ int follow_pfnmap_start(struct follow_pfnmap_args *args) pmdp = pmd_offset(pudp, address); pmd = pmdp_get_lockless(pmdp); + if (!pmd_present(pmd)) + goto out; if (pmd_leaf(pmd)) { lock = pmd_lock(mm, pmdp); - if (!unlikely(pmd_leaf(pmd))) { + pmd = pmdp_get(pmdp); + + if (unlikely(!pmd_present(pmd))) { + spin_unlock(lock); + goto out; + } else if (unlikely(!pmd_leaf(pmd))) { spin_unlock(lock); goto retry; } From 3b89863c3fa482912911cd65a12a3aeef662c250 Mon Sep 17 00:00:00 2001 From: Max Boone Date: Wed, 25 Mar 2026 10:59:16 +0100 Subject: [PATCH 303/401] mm/pagewalk: fix race between concurrent split and refault The splitting of a PUD entry in walk_pud_range() can race with a concurrent thread refaulting the PUD leaf entry causing it to try walking a PMD range that has disappeared. An example and reproduction of this is to try reading numa_maps of a process while VFIO-PCI is setting up DMA (specifically the vfio_pin_pages_remote call) on a large BAR for that process. This will trigger a kernel BUG: vfio-pci 0000:03:00.0: enabling device (0000 -> 0002) BUG: unable to handle page fault for address: ffffa23980000000 PGD 0 P4D 0 Oops: Oops: 0000 [#1] SMP NOPTI ... RIP: 0010:walk_pgd_range+0x3b5/0x7a0 Code: 8d 43 ff 48 89 44 24 28 4d 89 ce 4d 8d a7 00 00 20 00 48 8b 4c 24 28 49 81 e4 00 00 e0 ff 49 8d 44 24 ff 48 39 c8 4c 0f 43 e3 <49> f7 06 9f ff ff ff 75 3b 48 8b 44 24 20 48 8b 40 28 48 85 c0 74 RSP: 0018:ffffac23e1ecf808 EFLAGS: 00010287 RAX: 00007f44c01fffff RBX: 00007f4500000000 RCX: 00007f44ffffffff RDX: 0000000000000000 RSI: 000ffffffffff000 RDI: ffffffff93378fe0 RBP: ffffac23e1ecf918 R08: 0000000000000004 R09: ffffa23980000000 R10: 0000000000000020 R11: 0000000000000004 R12: 00007f44c0200000 R13: 00007f44c0000000 R14: ffffa23980000000 R15: 00007f44c0000000 FS: 00007fe884739580(0000) GS:ffff9b7d7a9c0000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: ffffa23980000000 CR3: 000000c0650e2005 CR4: 0000000000770ef0 PKRU: 55555554 Call Trace: __walk_page_range+0x195/0x1b0 walk_page_vma+0x62/0xc0 show_numa_map+0x12b/0x3b0 seq_read_iter+0x297/0x440 seq_read+0x11d/0x140 vfs_read+0xc2/0x340 ksys_read+0x5f/0xe0 do_syscall_64+0x68/0x130 ? get_page_from_freelist+0x5c2/0x17e0 ? mas_store_prealloc+0x17e/0x360 ? vma_set_page_prot+0x4c/0xa0 ? __alloc_pages_noprof+0x14e/0x2d0 ? __mod_memcg_lruvec_state+0x8d/0x140 ? __lruvec_stat_mod_folio+0x76/0xb0 ? __folio_mod_stat+0x26/0x80 ? do_anonymous_page+0x705/0x900 ? __handle_mm_fault+0xa8d/0x1000 ? __count_memcg_events+0x53/0xf0 ? handle_mm_fault+0xa5/0x360 ? do_user_addr_fault+0x342/0x640 ? arch_exit_to_user_mode_prepare.constprop.0+0x16/0xa0 ? irqentry_exit_to_user_mode+0x24/0x100 entry_SYSCALL_64_after_hwframe+0x76/0x7e RIP: 0033:0x7fe88464f47e Code: c0 e9 b6 fe ff ff 50 48 8d 3d be 07 0b 00 e8 69 01 02 00 66 0f 1f 84 00 00 00 00 00 64 8b 04 25 18 00 00 00 85 c0 75 14 0f 05 <48> 3d 00 f0 ff ff 77 5a c3 66 0f 1f 84 00 00 00 00 00 48 83 ec 28 RSP: 002b:00007ffe6cd9a9b8 EFLAGS: 00000246 ORIG_RAX: 0000000000000000 RAX: ffffffffffffffda RBX: 0000000000020000 RCX: 00007fe88464f47e RDX: 0000000000020000 RSI: 00007fe884543000 RDI: 0000000000000003 RBP: 00007fe884543000 R08: 00007fe884542010 R09: 0000000000000000 R10: fffffffffffffbc5 R11: 0000000000000246 R12: 0000000000000000 R13: 0000000000000003 R14: 0000000000020000 R15: 0000000000020000 Fix this by validating the PUD entry in walk_pmd_range() using a stable snapshot (pudp_get()). If the PUD is not present or is a leaf, retry the walk via ACTION_AGAIN instead of descending further. This mirrors the retry logic in walk_pte_range(), which lets walk_pmd_range() retry if the PTE is not being got by pte_offset_map_lock(). Link: https://lkml.kernel.org/r/20260325-pagewalk-check-pmd-refault-v2-1-707bff33bc60@akamai.com Fixes: f9e54c3a2f5b ("vfio/pci: implement huge_fault support") Co-developed-by: David Hildenbrand (Arm) Signed-off-by: David Hildenbrand (Arm) Signed-off-by: Max Boone Acked-by: David Hildenbrand (Arm) Cc: Liam Howlett Cc: Lorenzo Stoakes (Oracle) Cc: Michal Hocko Cc: Mike Rapoport Cc: Suren Baghdasaryan Cc: Vlastimil Babka Cc: Signed-off-by: Andrew Morton --- mm/pagewalk.c | 25 ++++++++++++++++++++++--- 1 file changed, 22 insertions(+), 3 deletions(-) diff --git a/mm/pagewalk.c b/mm/pagewalk.c index a94c401ab2cf..4e7bcd975c54 100644 --- a/mm/pagewalk.c +++ b/mm/pagewalk.c @@ -97,6 +97,7 @@ static int walk_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, static int walk_pmd_range(pud_t *pud, unsigned long addr, unsigned long end, struct mm_walk *walk) { + pud_t pudval = pudp_get(pud); pmd_t *pmd; unsigned long next; const struct mm_walk_ops *ops = walk->ops; @@ -105,6 +106,24 @@ static int walk_pmd_range(pud_t *pud, unsigned long addr, unsigned long end, int err = 0; int depth = real_depth(3); + /* + * For PTE handling, pte_offset_map_lock() takes care of checking + * whether there actually is a page table. But it also has to be + * very careful about concurrent page table reclaim. + * + * Similarly, we have to be careful here - a PUD entry that points + * to a PMD table cannot go away, so we can just walk it. But if + * it's something else, we need to ensure we didn't race something, + * so need to retry. + * + * A pertinent example of this is a PUD refault after PUD split - + * we will need to split again or risk accessing invalid memory. + */ + if (!pud_present(pudval) || pud_leaf(pudval)) { + walk->action = ACTION_AGAIN; + return 0; + } + pmd = pmd_offset(pud, addr); do { again: @@ -218,12 +237,12 @@ static int walk_pud_range(p4d_t *p4d, unsigned long addr, unsigned long end, else if (pud_leaf(*pud) || !pud_present(*pud)) continue; /* Nothing to do. */ - if (pud_none(*pud)) - goto again; - err = walk_pmd_range(pud, addr, next, walk); if (err) break; + + if (walk->action == ACTION_AGAIN) + goto again; } while (pud++, addr = next, addr != end); return err; From 2598ab9d63f41160c7081998857fef409182933d Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 23 Mar 2026 21:55:16 +0100 Subject: [PATCH 304/401] bug: avoid format attribute warning for clang as well Like gcc, clang-22 now also warns about a function that it incorrectly identifies as a printf-style format: lib/bug.c:190:22: error: diagnostic behavior may be improved by adding the 'format(printf, 1, 0)' attribute to the declaration of '__warn_printf' [-Werror,-Wmissing-format-attribute] 179 | static void __warn_printf(const char *fmt, struct pt_regs *regs) | __attribute__((format(printf, 1, 0))) 180 | { 181 | if (!fmt) 182 | return; 183 | 184 | #ifdef HAVE_ARCH_BUG_FORMAT_ARGS 185 | if (regs) { 186 | struct arch_va_list _args; 187 | va_list *args = __warn_args(&_args, regs); 188 | 189 | if (args) { 190 | vprintk(fmt, *args); | ^ Revert the change that added a gcc-specific workaround, and instead add the generic annotation that avoid the warning. Link: https://lkml.kernel.org/r/20260323205534.1284284-1-arnd@kernel.org Fixes: d36067d6ea00 ("bug: Hush suggest-attribute=format for __warn_printf()") Suggested-by: Andy Shevchenko Suggested-by: Brendan Jackman Link: https://lore.kernel.org/all/20251208141618.2805983-1-andriy.shevchenko@linux.intel.com/T/#u Signed-off-by: Arnd Bergmann Reviewed-by: Brendan Jackman Reviewed-by: Andy Shevchenko Cc: Bill Wendling Cc: Ingo Molnar Cc: Justin Stitt Cc: Nathan Chancellor Cc: Peter Zijlstra Signed-off-by: Andrew Morton --- lib/bug.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/lib/bug.c b/lib/bug.c index 623c467a8b76..aab9e6a40c5f 100644 --- a/lib/bug.c +++ b/lib/bug.c @@ -173,10 +173,8 @@ struct bug_entry *find_bug(unsigned long bugaddr) return module_find_bug(bugaddr); } -__diag_push(); -__diag_ignore(GCC, all, "-Wsuggest-attribute=format", - "Not a valid __printf() conversion candidate."); -static void __warn_printf(const char *fmt, struct pt_regs *regs) +static __printf(1, 0) +void __warn_printf(const char *fmt, struct pt_regs *regs) { if (!fmt) return; @@ -195,7 +193,6 @@ static void __warn_printf(const char *fmt, struct pt_regs *regs) printk("%s", fmt); } -__diag_pop(); static enum bug_trap_type __report_bug(struct bug_entry *bug, unsigned long bugaddr, struct pt_regs *regs) { From 2697dd8ae721db4f6a53d4f4cbd438212a80f8dc Mon Sep 17 00:00:00 2001 From: "Lorenzo Stoakes (Oracle)" Date: Fri, 27 Mar 2026 17:31:04 +0000 Subject: [PATCH 305/401] mm/mseal: update VMA end correctly on merge Previously we stored the end of the current VMA in curr_end, and then upon iterating to the next VMA updated curr_start to curr_end to advance to the next VMA. However, this doesn't take into account the fact that a VMA might be updated due to a merge by vma_modify_flags(), which can result in curr_end being stale and thus, upon setting curr_start to curr_end, ending up with an incorrect curr_start on the next iteration. Resolve the issue by setting curr_end to vma->vm_end unconditionally to ensure this value remains updated should this occur. While we're here, eliminate this entire class of bug by simply setting const curr_[start/end] to be clamped to the input range and VMAs, which also happens to simplify the logic. Link: https://lkml.kernel.org/r/20260327173104.322405-1-ljs@kernel.org Fixes: 6c2da14ae1e0 ("mm/mseal: rework mseal apply logic") Signed-off-by: Lorenzo Stoakes (Oracle) Reported-by: Antonius Closes: https://lore.kernel.org/linux-mm/CAK8a0jwWGj9-SgFk0yKFh7i8jMkwKm5b0ao9=kmXWjO54veX2g@mail.gmail.com/ Suggested-by: David Hildenbrand (ARM) Acked-by: Vlastimil Babka (SUSE) Reviewed-by: Pedro Falcato Acked-by: David Hildenbrand (Arm) Cc: Jann Horn Cc: Jeff Xu Cc: Liam Howlett Cc: Signed-off-by: Andrew Morton --- mm/mseal.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/mm/mseal.c b/mm/mseal.c index 316b5e1dec78..ac58643181f7 100644 --- a/mm/mseal.c +++ b/mm/mseal.c @@ -56,7 +56,6 @@ static int mseal_apply(struct mm_struct *mm, unsigned long start, unsigned long end) { struct vm_area_struct *vma, *prev; - unsigned long curr_start = start; VMA_ITERATOR(vmi, mm, start); /* We know there are no gaps so this will be non-NULL. */ @@ -66,6 +65,7 @@ static int mseal_apply(struct mm_struct *mm, prev = vma; for_each_vma_range(vmi, vma, end) { + const unsigned long curr_start = MAX(vma->vm_start, start); const unsigned long curr_end = MIN(vma->vm_end, end); if (!(vma->vm_flags & VM_SEALED)) { @@ -79,7 +79,6 @@ static int mseal_apply(struct mm_struct *mm, } prev = vma; - curr_start = curr_end; } return 0; From eeee5a710f26ce57807024ef330fe5a850eaecd8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Beh=C3=BAn?= Date: Thu, 26 Mar 2026 13:20:38 +0100 Subject: [PATCH 306/401] net: sfp: Fix Ubiquiti U-Fiber Instant SFP module on mvneta MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In commit 8110633db49d7de2 ("net: sfp-bus: allow SFP quirks to override Autoneg and pause bits") we moved the setting of Autoneg and pause bits before the call to SFP quirk when parsing SFP module support. Since the quirk for Ubiquiti U-Fiber Instant SFP module zeroes the support bits and sets 1000baseX_Full only, the above mentioned commit changed the overall computed support from 1000baseX_Full, Autoneg, Pause, Asym_Pause to just 1000baseX_Full. This broke the SFP module for mvneta, which requires Autoneg for 1000baseX since commit c762b7fac1b249a9 ("net: mvneta: deny disabling autoneg for 802.3z modes"). Fix this by setting back the Autoneg, Pause and Asym_Pause bits in the quirk. Fixes: 8110633db49d7de2 ("net: sfp-bus: allow SFP quirks to override Autoneg and pause bits") Signed-off-by: Marek Behún Reviewed-by: Russell King (Oracle) Link: https://patch.msgid.link/20260326122038.2489589-1-kabel@kernel.org Signed-off-by: Jakub Kicinski --- drivers/net/phy/sfp.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/net/phy/sfp.c b/drivers/net/phy/sfp.c index 5db841377199..ce8924613363 100644 --- a/drivers/net/phy/sfp.c +++ b/drivers/net/phy/sfp.c @@ -480,11 +480,16 @@ static void sfp_quirk_ubnt_uf_instant(const struct sfp_eeprom_id *id, { /* Ubiquiti U-Fiber Instant module claims that support all transceiver * types including 10G Ethernet which is not truth. So clear all claimed - * modes and set only one mode which module supports: 1000baseX_Full. + * modes and set only one mode which module supports: 1000baseX_Full, + * along with the Autoneg and pause bits. */ linkmode_zero(caps->link_modes); linkmode_set_bit(ETHTOOL_LINK_MODE_1000baseX_Full_BIT, caps->link_modes); + linkmode_set_bit(ETHTOOL_LINK_MODE_Autoneg_BIT, caps->link_modes); + linkmode_set_bit(ETHTOOL_LINK_MODE_Pause_BIT, caps->link_modes); + linkmode_set_bit(ETHTOOL_LINK_MODE_Asym_Pause_BIT, caps->link_modes); + phy_interface_zero(caps->interfaces); __set_bit(PHY_INTERFACE_MODE_1000BASEX, caps->interfaces); } From d389954a6cae7bf76b7b082ac3511d177b77ef2d Mon Sep 17 00:00:00 2001 From: Wei Fang Date: Thu, 26 Mar 2026 15:52:32 +0800 Subject: [PATCH 307/401] net: enetc: check whether the RSS algorithm is Toeplitz Both ENETC v1 and v4 only provide Toeplitz RSS support. This patch adds a validation check to reject attempts to configure other RSS algorithms, avoiding misleading configuration options for users. Fixes: d382563f541b ("enetc: Add RFS and RSS support") Signed-off-by: Wei Fang Reviewed-by: Clark Wang Reviewed-by: Claudiu Manoil Link: https://patch.msgid.link/20260326075233.3628047-2-wei.fang@nxp.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/freescale/enetc/enetc_ethtool.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/ethernet/freescale/enetc/enetc_ethtool.c b/drivers/net/ethernet/freescale/enetc/enetc_ethtool.c index 2fe140ddebb2..a393647e6062 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc_ethtool.c +++ b/drivers/net/ethernet/freescale/enetc/enetc_ethtool.c @@ -795,6 +795,10 @@ static int enetc_set_rxfh(struct net_device *ndev, struct enetc_si *si = priv->si; int err = 0; + if (rxfh->hfunc != ETH_RSS_HASH_NO_CHANGE && + rxfh->hfunc != ETH_RSS_HASH_TOP) + return -EOPNOTSUPP; + /* set hash key, if PF */ if (rxfh->key && enetc_si_is_pf(si)) enetc_set_rss_key(si, rxfh->key); From a142d139168cce8d5776245b5494c7f7f5d7fb7d Mon Sep 17 00:00:00 2001 From: Wei Fang Date: Thu, 26 Mar 2026 15:52:33 +0800 Subject: [PATCH 308/401] net: enetc: do not allow VF to configure the RSS key VFs do not have privilege to configure the RSS key because the registers are owned by the PF. Currently, if VF attempts to configure the RSS key, enetc_set_rxfh() simply skips the configuration and does not generate a warning, which may mislead users into thinking the feature is supported. To improve this situation, add a check to reject RSS key configuration on VFs. Fixes: d382563f541b ("enetc: Add RFS and RSS support") Signed-off-by: Wei Fang Reviewed-by: Clark Wang Reviewed-by: Claudiu Manoil Link: https://patch.msgid.link/20260326075233.3628047-3-wei.fang@nxp.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/freescale/enetc/enetc_ethtool.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/freescale/enetc/enetc_ethtool.c b/drivers/net/ethernet/freescale/enetc/enetc_ethtool.c index a393647e6062..7c17acaf7a38 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc_ethtool.c +++ b/drivers/net/ethernet/freescale/enetc/enetc_ethtool.c @@ -800,8 +800,12 @@ static int enetc_set_rxfh(struct net_device *ndev, return -EOPNOTSUPP; /* set hash key, if PF */ - if (rxfh->key && enetc_si_is_pf(si)) + if (rxfh->key) { + if (!enetc_si_is_pf(si)) + return -EOPNOTSUPP; + enetc_set_rss_key(si, rxfh->key); + } /* set RSS table */ if (rxfh->indir) From bc5b4e5ae1a67700a618328217b6a3bd0f296e97 Mon Sep 17 00:00:00 2001 From: Phil Willoughby Date: Sat, 28 Mar 2026 08:07:34 +0000 Subject: [PATCH 309/401] ALSA: usb-audio: Fix quirk flags for NeuralDSP Quad Cortex The NeuralDSP Quad Cortex does not support DSD playback. We need this product-specific entry with zero quirks because otherwise it falls through to the vendor-specific entry which marks it as supporting DSD playback. Cc: Yue Wang Cc: Jaroslav Kysela Cc: Takashi Iwai Signed-off-by: Phil Willoughby Link: https://patch.msgid.link/20260328080921.3310-1-willerz@gmail.com Signed-off-by: Takashi Iwai --- sound/usb/quirks.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sound/usb/quirks.c b/sound/usb/quirks.c index 1f82e9e02d4b..6e40c18c37f9 100644 --- a/sound/usb/quirks.c +++ b/sound/usb/quirks.c @@ -2305,6 +2305,8 @@ static const struct usb_audio_quirk_flags_table quirk_flags_table[] = { QUIRK_FLAG_PLAYBACK_FIRST | QUIRK_FLAG_GENERIC_IMPLICIT_FB), DEVICE_FLG(0x13e5, 0x0001, /* Serato Phono */ QUIRK_FLAG_IGNORE_CTL_ERROR), + DEVICE_FLG(0x152a, 0x880a, /* NeuralDSP Quad Cortex */ + 0), /* Doesn't have the vendor quirk which would otherwise apply */ DEVICE_FLG(0x154e, 0x1002, /* Denon DCD-1500RE */ QUIRK_FLAG_ITF_USB_DSD_DAC | QUIRK_FLAG_CTL_MSG_DELAY), DEVICE_FLG(0x154e, 0x1003, /* Denon DA-300USB */ From bac1e57adf08c9ee33e95fb09cd032f330294e70 Mon Sep 17 00:00:00 2001 From: "Dustin L. Howett" Date: Fri, 27 Mar 2026 10:54:40 -0500 Subject: [PATCH 310/401] ALSA: hda/realtek: add quirk for Framework F111:000F Similar to commit 7b509910b3ad ("ALSA hda/realtek: Add quirk for Framework F111:000C") and previous quirks for Framework systems with Realtek codecs. 000F is another new platform with an ALC285 which needs the same quirk. Signed-off-by: Dustin L. Howett Link: https://patch.msgid.link/20260327-framework-alsa-000f-v1-1-74013aba1c00@howett.net Signed-off-by: Takashi Iwai --- sound/hda/codecs/realtek/alc269.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/hda/codecs/realtek/alc269.c b/sound/hda/codecs/realtek/alc269.c index 6787e54fcfe6..393910857e89 100644 --- a/sound/hda/codecs/realtek/alc269.c +++ b/sound/hda/codecs/realtek/alc269.c @@ -7757,6 +7757,7 @@ static const struct hda_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0xf111, 0x0009, "Framework Laptop", ALC295_FIXUP_FRAMEWORK_LAPTOP_MIC_NO_PRESENCE), SND_PCI_QUIRK(0xf111, 0x000b, "Framework Laptop", ALC295_FIXUP_FRAMEWORK_LAPTOP_MIC_NO_PRESENCE), SND_PCI_QUIRK(0xf111, 0x000c, "Framework Laptop", ALC295_FIXUP_FRAMEWORK_LAPTOP_MIC_NO_PRESENCE), + SND_PCI_QUIRK(0xf111, 0x000f, "Framework Laptop", ALC295_FIXUP_FRAMEWORK_LAPTOP_MIC_NO_PRESENCE), #if 0 /* Below is a quirk table taken from the old code. From 696b0a9bd2e4b4c7082369202fe9406345a6d11e Mon Sep 17 00:00:00 2001 From: Stuart Hayhurst Date: Fri, 27 Mar 2026 15:57:36 +0000 Subject: [PATCH 311/401] ALSA: hda/intel: Add MSI X870E Tomahawk to denylist by DMI ID This motherboard uses USB audio instead, causing this driver to complain about "no codecs found!". Add it to the denylist to silence the warning. The first attempt only matched on the PCI device, but this caused issues for some laptops, so DMI match against the board as well. Signed-off-by: Stuart Hayhurst Link: https://patch.msgid.link/20260327155737.21818-2-stuart.a.hayhurst@gmail.com Signed-off-by: Takashi Iwai --- sound/hda/controllers/intel.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/sound/hda/controllers/intel.c b/sound/hda/controllers/intel.c index 2edbcab597c8..8a7bd49e411f 100644 --- a/sound/hda/controllers/intel.c +++ b/sound/hda/controllers/intel.c @@ -2085,6 +2085,11 @@ static struct pci_device_id driver_denylist_ideapad_z570[] = { {} }; +static struct pci_device_id driver_denylist_msi_x870e[] = { + { PCI_DEVICE_SUB(0x1022, 0x15e3, 0x1462, 0xee59) }, /* MSI X870E Tomahawk WiFi */ + {} +}; + /* DMI-based denylist, to be used when: * - PCI subsystem IDs are zero, impossible to distinguish from valid sound cards. * - Different modifications of the same laptop use different GPU models. @@ -2098,6 +2103,14 @@ static const struct dmi_system_id driver_denylist_dmi[] = { }, .driver_data = &driver_denylist_ideapad_z570, }, + { + /* PCI device matching alone incorrectly matches some laptops */ + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "Micro-Star International Co., Ltd."), + DMI_MATCH(DMI_BOARD_NAME, "MAG X870E TOMAHAWK WIFI (MS-7E59)"), + }, + .driver_data = &driver_denylist_msi_x870e, + }, {} }; From 1fbf85dbf02c96c318e056fb5b8fc614758fee3c Mon Sep 17 00:00:00 2001 From: Sourav Nayak Date: Fri, 27 Mar 2026 19:58:05 +0530 Subject: [PATCH 312/401] ALSA: hda/realtek: add quirk for HP Victus 15-fb0xxx This adds a mute led quirck for HP Victus 15-fb0xxx (103c:8a3d) model - As it used 0x8(full bright)/0x7f(little dim) for mute led on and other values as 0ff (0x0, 0x4, ...) - So, use ALC245_FIXUP_HP_MUTE_LED_V2_COEFBIT insted for safer approach Cc: Signed-off-by: Sourav Nayak Link: https://patch.msgid.link/20260327142805.17139-1-nonameblank007@gmail.com Signed-off-by: Takashi Iwai --- sound/hda/codecs/realtek/alc269.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/hda/codecs/realtek/alc269.c b/sound/hda/codecs/realtek/alc269.c index 393910857e89..78ab0e86621b 100644 --- a/sound/hda/codecs/realtek/alc269.c +++ b/sound/hda/codecs/realtek/alc269.c @@ -7009,6 +7009,7 @@ static const struct hda_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x103c, 0x8a30, "HP Envy 17", ALC287_FIXUP_CS35L41_I2C_2), SND_PCI_QUIRK(0x103c, 0x8a31, "HP Envy 15", ALC287_FIXUP_CS35L41_I2C_2), SND_PCI_QUIRK(0x103c, 0x8a34, "HP Pavilion x360 2-in-1 Laptop 14-ek0xxx", ALC245_FIXUP_HP_MUTE_LED_COEFBIT), + SND_PCI_QUIRK(0x103c, 0x8a3d, "HP Victus 15-fb0xxx (MB 8A3D)", ALC245_FIXUP_HP_MUTE_LED_V2_COEFBIT), SND_PCI_QUIRK(0x103c, 0x8a4f, "HP Victus 15-fa0xxx (MB 8A4F)", ALC245_FIXUP_HP_MUTE_LED_COEFBIT), SND_PCI_QUIRK(0x103c, 0x8a6e, "HP EDNA 360", ALC287_FIXUP_CS35L41_I2C_4), SND_PCI_QUIRK(0x103c, 0x8a74, "HP ProBook 440 G8 Notebook PC", ALC236_FIXUP_HP_GPIO_LED), From 250ab25391edeeab8462b68be42e4904506c409c Mon Sep 17 00:00:00 2001 From: Wesley Atwell Date: Tue, 24 Mar 2026 16:13:26 -0600 Subject: [PATCH 313/401] tracing: Drain deferred trigger frees if kthread creation fails Boot-time trigger registration can fail before the trigger-data cleanup kthread exists. Deferring those frees until late init is fine, but the post-boot fallback must still drain the deferred list if kthread creation never succeeds. Otherwise, boot-deferred nodes can accumulate on trigger_data_free_list, later frees fall back to synchronously freeing only the current object, and the older queued entries are leaked forever. To trigger this, add the following to the kernel command line: trace_event=sched_switch trace_trigger=sched_switch.traceon,sched_switch.traceon The second traceon trigger will fail and be freed. This triggers a NULL pointer dereference and crashes the kernel. Keep the deferred boot-time behavior, but when kthread creation fails, drain the whole queued list synchronously. Do the same in the late-init drain path so queued entries are not stranded there either. Cc: stable@vger.kernel.org Link: https://patch.msgid.link/20260324221326.1395799-3-atwellwea@gmail.com Fixes: 61d445af0a7c ("tracing: Add bulk garbage collection of freeing event_trigger_data") Signed-off-by: Wesley Atwell Signed-off-by: Steven Rostedt (Google) --- kernel/trace/trace_events_trigger.c | 85 +++++++++++++++++++++++------ 1 file changed, 69 insertions(+), 16 deletions(-) diff --git a/kernel/trace/trace_events_trigger.c b/kernel/trace/trace_events_trigger.c index d5230b759a2d..655db2e82513 100644 --- a/kernel/trace/trace_events_trigger.c +++ b/kernel/trace/trace_events_trigger.c @@ -22,6 +22,39 @@ static struct task_struct *trigger_kthread; static struct llist_head trigger_data_free_list; static DEFINE_MUTEX(trigger_data_kthread_mutex); +static int trigger_kthread_fn(void *ignore); + +static void trigger_create_kthread_locked(void) +{ + lockdep_assert_held(&trigger_data_kthread_mutex); + + if (!trigger_kthread) { + struct task_struct *kthread; + + kthread = kthread_create(trigger_kthread_fn, NULL, + "trigger_data_free"); + if (!IS_ERR(kthread)) + WRITE_ONCE(trigger_kthread, kthread); + } +} + +static void trigger_data_free_queued_locked(void) +{ + struct event_trigger_data *data, *tmp; + struct llist_node *llnodes; + + lockdep_assert_held(&trigger_data_kthread_mutex); + + llnodes = llist_del_all(&trigger_data_free_list); + if (!llnodes) + return; + + tracepoint_synchronize_unregister(); + + llist_for_each_entry_safe(data, tmp, llnodes, llist) + kfree(data); +} + /* Bulk garbage collection of event_trigger_data elements */ static int trigger_kthread_fn(void *ignore) { @@ -56,30 +89,50 @@ void trigger_data_free(struct event_trigger_data *data) if (data->cmd_ops->set_filter) data->cmd_ops->set_filter(NULL, data, NULL); - if (unlikely(!trigger_kthread)) { - guard(mutex)(&trigger_data_kthread_mutex); - /* Check again after taking mutex */ - if (!trigger_kthread) { - struct task_struct *kthread; - - kthread = kthread_create(trigger_kthread_fn, NULL, - "trigger_data_free"); - if (!IS_ERR(kthread)) - WRITE_ONCE(trigger_kthread, kthread); - } + /* + * Boot-time trigger registration can fail before kthread creation + * works. Keep the deferred-free semantics during boot and let late + * init start the kthread to drain the list. + */ + if (system_state == SYSTEM_BOOTING && !trigger_kthread) { + llist_add(&data->llist, &trigger_data_free_list); + return; } - if (!trigger_kthread) { - /* Do it the slow way */ - tracepoint_synchronize_unregister(); - kfree(data); - return; + if (unlikely(!trigger_kthread)) { + guard(mutex)(&trigger_data_kthread_mutex); + + trigger_create_kthread_locked(); + /* Check again after taking mutex */ + if (!trigger_kthread) { + llist_add(&data->llist, &trigger_data_free_list); + /* Drain the queued frees synchronously if creation failed. */ + trigger_data_free_queued_locked(); + return; + } } llist_add(&data->llist, &trigger_data_free_list); wake_up_process(trigger_kthread); } +static int __init trigger_data_free_init(void) +{ + guard(mutex)(&trigger_data_kthread_mutex); + + if (llist_empty(&trigger_data_free_list)) + return 0; + + trigger_create_kthread_locked(); + if (trigger_kthread) + wake_up_process(trigger_kthread); + else + trigger_data_free_queued_locked(); + + return 0; +} +late_initcall(trigger_data_free_init); + static inline void data_ops_trigger(struct event_trigger_data *data, struct trace_buffer *buffer, void *rec, struct ring_buffer_event *event) From 210d36d892de5195e6766c45519dfb1e65f3eb83 Mon Sep 17 00:00:00 2001 From: Davidlohr Bueso Date: Wed, 25 Mar 2026 17:17:59 -0700 Subject: [PATCH 314/401] futex: Clear stale exiting pointer in futex_lock_pi() retry path Fuzzying/stressing futexes triggered: WARNING: kernel/futex/core.c:825 at wait_for_owner_exiting+0x7a/0x80, CPU#11: futex_lock_pi_s/524 When futex_lock_pi_atomic() sees the owner is exiting, it returns -EBUSY and stores a refcounted task pointer in 'exiting'. After wait_for_owner_exiting() consumes that reference, the local pointer is never reset to nil. Upon a retry, if futex_lock_pi_atomic() returns a different error, the bogus pointer is passed to wait_for_owner_exiting(). CPU0 CPU1 CPU2 futex_lock_pi(uaddr) // acquires the PI futex exit() futex_cleanup_begin() futex_state = EXITING; futex_lock_pi(uaddr) futex_lock_pi_atomic() attach_to_pi_owner() // observes EXITING *exiting = owner; // takes ref return -EBUSY wait_for_owner_exiting(-EBUSY, owner) put_task_struct(); // drops ref // exiting still points to owner goto retry; futex_lock_pi_atomic() lock_pi_update_atomic() cmpxchg(uaddr) *uaddr ^= WAITERS // whatever // value changed return -EAGAIN; wait_for_owner_exiting(-EAGAIN, exiting) // stale WARN_ON_ONCE(exiting) Fix this by resetting upon retry, essentially aligning it with requeue_pi. Fixes: 3ef240eaff36 ("futex: Prevent exit livelock") Signed-off-by: Davidlohr Bueso Signed-off-by: Thomas Gleixner Cc: stable@vger.kernel.org Link: https://patch.msgid.link/20260326001759.4129680-1-dave@stgolabs.net --- kernel/futex/pi.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/kernel/futex/pi.c b/kernel/futex/pi.c index bc1f7e83a37e..7808068fa59e 100644 --- a/kernel/futex/pi.c +++ b/kernel/futex/pi.c @@ -918,7 +918,7 @@ int fixup_pi_owner(u32 __user *uaddr, struct futex_q *q, int locked) int futex_lock_pi(u32 __user *uaddr, unsigned int flags, ktime_t *time, int trylock) { struct hrtimer_sleeper timeout, *to; - struct task_struct *exiting = NULL; + struct task_struct *exiting; struct rt_mutex_waiter rt_waiter; struct futex_q q = futex_q_init; DEFINE_WAKE_Q(wake_q); @@ -933,6 +933,7 @@ int futex_lock_pi(u32 __user *uaddr, unsigned int flags, ktime_t *time, int tryl to = futex_setup_timer(time, &timeout, flags, 0); retry: + exiting = NULL; ret = get_futex_key(uaddr, flags, &q.key, FUTEX_WRITE); if (unlikely(ret != 0)) goto out; From b0faf733fc1cf7f198c299dcc1638571d7cfd0f7 Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Fri, 27 Mar 2026 16:18:24 +0100 Subject: [PATCH 315/401] MAINTAINERS: drop outdated I2C website As stated on the website: "This wiki has been archived and the content is no longer updated." No need to reference it. Signed-off-by: Wolfram Sang --- MAINTAINERS | 2 -- 1 file changed, 2 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index 7d10988cbc62..6593e5c7d821 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -12009,7 +12009,6 @@ I2C SUBSYSTEM M: Wolfram Sang L: linux-i2c@vger.kernel.org S: Maintained -W: https://i2c.wiki.kernel.org/ Q: https://patchwork.ozlabs.org/project/linux-i2c/list/ T: git git://git.kernel.org/pub/scm/linux/kernel/git/wsa/linux.git F: Documentation/i2c/ @@ -12035,7 +12034,6 @@ I2C SUBSYSTEM HOST DRIVERS M: Andi Shyti L: linux-i2c@vger.kernel.org S: Maintained -W: https://i2c.wiki.kernel.org/ Q: https://patchwork.ozlabs.org/project/linux-i2c/list/ T: git git://git.kernel.org/pub/scm/linux/kernel/git/andi.shyti/linux.git F: Documentation/devicetree/bindings/i2c/ From b045ab3dff97edae6d538eeff900a34c098761f8 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Sun, 29 Mar 2026 11:12:37 +0200 Subject: [PATCH 316/401] ALSA: ctxfi: Fix missing SPDIFI1 index handling SPDIF1 DAIO type isn't properly handled in daio_device_index() for hw20k2, and it returned -EINVAL, which ended up with the out-of-bounds array access. Follow the hw20k1 pattern and return the proper index for this type, too. Reported-and-tested-by: Karsten Hohmeier Closes: https://lore.kernel.org/20260315155004.15633-1-linux@hohmatik.de Cc: Link: https://patch.msgid.link/20260329091240.420194-1-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/pci/ctxfi/ctdaio.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/ctxfi/ctdaio.c b/sound/pci/ctxfi/ctdaio.c index b8bde27f3a1d..19faa81d5400 100644 --- a/sound/pci/ctxfi/ctdaio.c +++ b/sound/pci/ctxfi/ctdaio.c @@ -118,6 +118,7 @@ static unsigned int daio_device_index(enum DAIOTYP type, struct hw *hw) switch (type) { case SPDIFOO: return 0; case SPDIFIO: return 0; + case SPDIFI1: return 1; case LINEO1: return 4; case LINEO2: return 7; case LINEO3: return 5; From 277c6960d4ddb94d16198afd70c92c3d4593d131 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Sun, 29 Mar 2026 11:12:38 +0200 Subject: [PATCH 317/401] ALSA: ctxfi: Check the error for index mapping The ctxfi driver blindly assumed a proper value returned from daio_device_index(), but it's not always true. Add a proper error check to deal with the error from the function. Cc: Link: https://lore.kernel.org/87cy149n6k.wl-tiwai@suse.de Link: https://patch.msgid.link/20260329091240.420194-2-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/pci/ctxfi/ctdaio.c | 81 +++++++++++++++++++++++++--------------- 1 file changed, 50 insertions(+), 31 deletions(-) diff --git a/sound/pci/ctxfi/ctdaio.c b/sound/pci/ctxfi/ctdaio.c index 19faa81d5400..4dbb1dd7af32 100644 --- a/sound/pci/ctxfi/ctdaio.c +++ b/sound/pci/ctxfi/ctdaio.c @@ -99,7 +99,7 @@ static const struct rsc_ops daio_in_rsc_ops_20k2 = { .output_slot = daio_index, }; -static unsigned int daio_device_index(enum DAIOTYP type, struct hw *hw) +static int daio_device_index(enum DAIOTYP type, struct hw *hw) { switch (hw->chip_type) { case ATC20K1: @@ -112,7 +112,9 @@ static unsigned int daio_device_index(enum DAIOTYP type, struct hw *hw) case LINEO3: return 5; case LINEO4: return 6; case LINEIM: return 7; - default: return -EINVAL; + default: + pr_err("ctxfi: Invalid type %d for hw20k1\n", type); + return -EINVAL; } case ATC20K2: switch (type) { @@ -126,9 +128,12 @@ static unsigned int daio_device_index(enum DAIOTYP type, struct hw *hw) case LINEIM: return 4; case MIC: return 5; case RCA: return 3; - default: return -EINVAL; + default: + pr_err("ctxfi: Invalid type %d for hw20k2\n", type); + return -EINVAL; } default: + pr_err("ctxfi: Invalid chip type %d\n", hw->chip_type); return -EINVAL; } } @@ -149,8 +154,11 @@ static int dao_spdif_set_spos(struct dao *dao, unsigned int spos) static int dao_commit_write(struct dao *dao) { - dao->hw->dao_commit_write(dao->hw, - daio_device_index(dao->daio.type, dao->hw), dao->ctrl_blk); + int idx = daio_device_index(dao->daio.type, dao->hw); + + if (idx < 0) + return idx; + dao->hw->dao_commit_write(dao->hw, idx, dao->ctrl_blk); return 0; } @@ -288,8 +296,11 @@ static int dai_set_enb_srt(struct dai *dai, unsigned int enb) static int dai_commit_write(struct dai *dai) { - dai->hw->dai_commit_write(dai->hw, - daio_device_index(dai->daio.type, dai->hw), dai->ctrl_blk); + int idx = daio_device_index(dai->daio.type, dai->hw); + + if (idx < 0) + return idx; + dai->hw->dai_commit_write(dai->hw, idx, dai->ctrl_blk); return 0; } @@ -368,7 +379,7 @@ static int dao_rsc_init(struct dao *dao, { struct hw *hw = mgr->mgr.hw; unsigned int conf; - int err; + int idx, err; err = daio_rsc_init(&dao->daio, desc, mgr->mgr.hw); if (err) @@ -387,15 +398,18 @@ static int dao_rsc_init(struct dao *dao, if (err) goto error2; - hw->daio_mgr_dsb_dao(mgr->mgr.ctrl_blk, - daio_device_index(dao->daio.type, hw)); + idx = daio_device_index(dao->daio.type, hw); + if (idx < 0) { + err = idx; + goto error2; + } + + hw->daio_mgr_dsb_dao(mgr->mgr.ctrl_blk, idx); hw->daio_mgr_commit_write(hw, mgr->mgr.ctrl_blk); conf = (desc->msr & 0x7) | (desc->passthru << 3); - hw->daio_mgr_dao_init(hw, mgr->mgr.ctrl_blk, - daio_device_index(dao->daio.type, hw), conf); - hw->daio_mgr_enb_dao(mgr->mgr.ctrl_blk, - daio_device_index(dao->daio.type, hw)); + hw->daio_mgr_dao_init(hw, mgr->mgr.ctrl_blk, idx, conf); + hw->daio_mgr_enb_dao(mgr->mgr.ctrl_blk, idx); hw->daio_mgr_commit_write(hw, mgr->mgr.ctrl_blk); return 0; @@ -444,7 +458,7 @@ static int dai_rsc_init(struct dai *dai, const struct daio_desc *desc, struct daio_mgr *mgr) { - int err; + int idx, err; struct hw *hw = mgr->mgr.hw; unsigned int rsr, msr; @@ -458,6 +472,12 @@ static int dai_rsc_init(struct dai *dai, if (err) goto error1; + idx = daio_device_index(dai->daio.type, dai->hw); + if (idx < 0) { + err = idx; + goto error1; + } + for (rsr = 0, msr = desc->msr; msr > 1; msr >>= 1) rsr++; @@ -466,8 +486,7 @@ static int dai_rsc_init(struct dai *dai, /* default to disabling control of a SRC */ hw->dai_srt_set_ec(dai->ctrl_blk, 0); hw->dai_srt_set_et(dai->ctrl_blk, 0); /* default to disabling SRT */ - hw->dai_commit_write(hw, - daio_device_index(dai->daio.type, dai->hw), dai->ctrl_blk); + hw->dai_commit_write(hw, idx, dai->ctrl_blk); return 0; @@ -582,28 +601,28 @@ static int put_daio_rsc(struct daio_mgr *mgr, struct daio *daio) static int daio_mgr_enb_daio(struct daio_mgr *mgr, struct daio *daio) { struct hw *hw = mgr->mgr.hw; + int idx = daio_device_index(daio->type, hw); - if (daio->output) { - hw->daio_mgr_enb_dao(mgr->mgr.ctrl_blk, - daio_device_index(daio->type, hw)); - } else { - hw->daio_mgr_enb_dai(mgr->mgr.ctrl_blk, - daio_device_index(daio->type, hw)); - } + if (idx < 0) + return idx; + if (daio->output) + hw->daio_mgr_enb_dao(mgr->mgr.ctrl_blk, idx); + else + hw->daio_mgr_enb_dai(mgr->mgr.ctrl_blk, idx); return 0; } static int daio_mgr_dsb_daio(struct daio_mgr *mgr, struct daio *daio) { struct hw *hw = mgr->mgr.hw; + int idx = daio_device_index(daio->type, hw); - if (daio->output) { - hw->daio_mgr_dsb_dao(mgr->mgr.ctrl_blk, - daio_device_index(daio->type, hw)); - } else { - hw->daio_mgr_dsb_dai(mgr->mgr.ctrl_blk, - daio_device_index(daio->type, hw)); - } + if (idx < 0) + return idx; + if (daio->output) + hw->daio_mgr_dsb_dao(mgr->mgr.ctrl_blk, idx); + else + hw->daio_mgr_dsb_dai(mgr->mgr.ctrl_blk, idx); return 0; } From 7aaa8047eafd0bd628065b15757d9b48c5f9c07d Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 29 Mar 2026 15:40:00 -0700 Subject: [PATCH 318/401] Linux 7.0-rc6 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index e1279c4d5b24..6b1d9fb1a6b4 100644 --- a/Makefile +++ b/Makefile @@ -2,7 +2,7 @@ VERSION = 7 PATCHLEVEL = 0 SUBLEVEL = 0 -EXTRAVERSION = -rc5 +EXTRAVERSION = -rc6 NAME = Baby Opossum Posse # *DOCUMENTATION* From 45424e871abf2a152e247a9cff78359f18dd95c0 Mon Sep 17 00:00:00 2001 From: Berk Cem Goksel Date: Sun, 29 Mar 2026 16:38:25 +0300 Subject: [PATCH 319/401] ALSA: caiaq: fix stack out-of-bounds read in init_card The loop creates a whitespace-stripped copy of the card shortname where `len < sizeof(card->id)` is used for the bounds check. Since sizeof(card->id) is 16 and the local id buffer is also 16 bytes, writing 16 non-space characters fills the entire buffer, overwriting the terminating nullbyte. When this non-null-terminated string is later passed to snd_card_set_id() -> copy_valid_id_string(), the function scans forward with `while (*nid && ...)` and reads past the end of the stack buffer, reading the contents of the stack. A USB device with a product name containing many non-ASCII, non-space characters (e.g. multibyte UTF-8) will reliably trigger this as follows: BUG: KASAN: stack-out-of-bounds in copy_valid_id_string sound/core/init.c:696 [inline] BUG: KASAN: stack-out-of-bounds in snd_card_set_id_no_lock+0x698/0x74c sound/core/init.c:718 The off-by-one has been present since commit bafeee5b1f8d ("ALSA: snd_usb_caiaq: give better shortname") from June 2009 (v2.6.31-rc1), which first introduced this whitespace-stripping loop. The original code never accounted for the null terminator when bounding the copy. Fix this by changing the loop bound to `sizeof(card->id) - 1`, ensuring at least one byte remains as the null terminator. Fixes: bafeee5b1f8d ("ALSA: snd_usb_caiaq: give better shortname") Cc: stable@vger.kernel.org Cc: Andrey Konovalov Reported-by: Berk Cem Goksel Signed-off-by: Berk Cem Goksel Link: https://patch.msgid.link/20260329133825.581585-1-berkcgoksel@gmail.com Signed-off-by: Takashi Iwai --- sound/usb/caiaq/device.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/usb/caiaq/device.c b/sound/usb/caiaq/device.c index dfd820483849..3a71bab8a477 100644 --- a/sound/usb/caiaq/device.c +++ b/sound/usb/caiaq/device.c @@ -488,7 +488,7 @@ static int init_card(struct snd_usb_caiaqdev *cdev) memset(id, 0, sizeof(id)); for (c = card->shortname, len = 0; - *c && len < sizeof(card->id); c++) + *c && len < sizeof(card->id) - 1; c++) if (*c != ' ') id[len++] = *c; From f025ac8c698ac7d29eb3b5025bcdaf7ad675785d Mon Sep 17 00:00:00 2001 From: Dag Smedberg Date: Sun, 29 Mar 2026 19:04:20 +0200 Subject: [PATCH 320/401] ALSA: usb-audio: Exclude Scarlett Solo 1st Gen from SKIP_IFACE_SETUP Same issue that the Scarlett 2i2 1st Gen had: QUIRK_FLAG_SKIP_IFACE_SETUP causes distorted audio on the Scarlett Solo 1st Gen (1235:801c). Fixes: 38c322068a26 ("ALSA: usb-audio: Add QUIRK_FLAG_SKIP_IFACE_SETUP") Reported-by: Dag Smedberg Tested-by: Dag Smedberg Signed-off-by: Dag Smedberg Link: https://patch.msgid.link/20260329170420.4122-1-dag@dsmedberg.se Signed-off-by: Takashi Iwai --- sound/usb/quirks.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/usb/quirks.c b/sound/usb/quirks.c index 6e40c18c37f9..116da076a194 100644 --- a/sound/usb/quirks.c +++ b/sound/usb/quirks.c @@ -2435,6 +2435,7 @@ static const struct usb_audio_quirk_flags_table quirk_flags_table[] = { QUIRK_FLAG_VALIDATE_RATES), DEVICE_FLG(0x1235, 0x8006, 0), /* Focusrite Scarlett 2i2 1st Gen */ DEVICE_FLG(0x1235, 0x800a, 0), /* Focusrite Scarlett 2i4 1st Gen */ + DEVICE_FLG(0x1235, 0x801c, 0), /* Focusrite Scarlett Solo 1st Gen */ VENDOR_FLG(0x1235, /* Focusrite Novation */ QUIRK_FLAG_SKIP_CLOCK_SELECTOR | QUIRK_FLAG_SKIP_IFACE_SETUP), From f1af71d568e55536d9297bfa7907ad497108cf30 Mon Sep 17 00:00:00 2001 From: Zhang Heng Date: Mon, 30 Mar 2026 15:53:34 +0800 Subject: [PATCH 321/401] ALSA: hda/realtek: Add quirk for ASUS ROG Strix SCAR 15 ASUS ROG Strix SCAR 15, like the Strix G15, requires the ALC285_FIXUP_ASUS_G533Z_PINS quirk to work properly. Link: https://bugzilla.kernel.org/show_bug.cgi?id=221247 Cc: Signed-off-by: Zhang Heng Link: https://patch.msgid.link/20260330075334.50962-2-zhangheng@kylinos.cn Signed-off-by: Takashi Iwai --- sound/hda/codecs/realtek/alc269.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/hda/codecs/realtek/alc269.c b/sound/hda/codecs/realtek/alc269.c index 78ab0e86621b..c14046e09aa4 100644 --- a/sound/hda/codecs/realtek/alc269.c +++ b/sound/hda/codecs/realtek/alc269.c @@ -7269,6 +7269,7 @@ static const struct hda_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1043, 0x1533, "ASUS GV302XA/XJ/XQ/XU/XV/XI", ALC287_FIXUP_CS35L41_I2C_2), SND_PCI_QUIRK(0x1043, 0x1573, "ASUS GZ301VV/VQ/VU/VJ/VA/VC/VE/VVC/VQC/VUC/VJC/VEC/VCC", ALC285_FIXUP_ASUS_HEADSET_MIC), SND_PCI_QUIRK(0x1043, 0x1584, "ASUS UM3406GA ", ALC287_FIXUP_CS35L41_I2C_2), + SND_PCI_QUIRK(0x1043, 0x1602, "ASUS ROG Strix SCAR 15", ALC285_FIXUP_ASUS_G533Z_PINS), SND_PCI_QUIRK(0x1043, 0x1652, "ASUS ROG Zephyrus Do 15 SE", ALC289_FIXUP_ASUS_ZEPHYRUS_DUAL_SPK), SND_PCI_QUIRK(0x1043, 0x1662, "ASUS GV301QH", ALC294_FIXUP_ASUS_DUAL_SPK), SND_PCI_QUIRK(0x1043, 0x1663, "ASUS GU603ZI/ZJ/ZQ/ZU/ZV", ALC285_FIXUP_ASUS_HEADSET_MIC), From 27c299698464c515c5cd97b4fcf1a0e38600b2ac Mon Sep 17 00:00:00 2001 From: Zhang Heng Date: Mon, 30 Mar 2026 17:51:33 +0800 Subject: [PATCH 322/401] ASoC: amd: yc: Add DMI quirk for ASUS Vivobook Pro 16X OLED M7601RM Add a DMI quirk for the ASUS Vivobook Pro 16X OLED M7601RM fixing the issue where the internal microphone was not detected. Link: https://bugzilla.kernel.org/show_bug.cgi?id=221288 Signed-off-by: Zhang Heng Link: https://patch.msgid.link/20260330095133.81786-1-zhangheng@kylinos.cn Signed-off-by: Mark Brown --- sound/soc/amd/yc/acp6x-mach.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/sound/soc/amd/yc/acp6x-mach.c b/sound/soc/amd/yc/acp6x-mach.c index 6f1c105ca77e..87d6aeb78807 100644 --- a/sound/soc/amd/yc/acp6x-mach.c +++ b/sound/soc/amd/yc/acp6x-mach.c @@ -731,6 +731,13 @@ static const struct dmi_system_id yc_acp_quirk_table[] = { DMI_MATCH(DMI_PRODUCT_NAME, "Thin A15 B7VE"), } }, + { + .driver_data = &acp6x_card, + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "ASUSTeK COMPUTER INC."), + DMI_MATCH(DMI_PRODUCT_NAME, "M7601RM"), + } + }, {} }; From 8ec017cf31299c4b6287ebe27afe81c986aeef88 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Gilson=20Marquato=20J=C3=BAnior?= Date: Mon, 30 Mar 2026 02:43:48 +0100 Subject: [PATCH 323/401] ASoC: amd: yc: Add DMI entry for HP Laptop 15-fc0xxx MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The HP Laptop 15-fc0xxx (subsystem ID 0x103c8dc9) has an internal DMIC connected to the AMD ACP6x audio coprocessor. Add a DMI quirk entry so the internal microphone is properly detected on this model. Tested on HP Laptop 15-fc0237ns with Fedora 43 (kernel 6.19.9). Signed-off-by: Gilson Marquato Júnior Link: https://patch.msgid.link/20260330-hp-15-fc0xxx-dmic-v2-v1-1-6dd6f53a1917@hotmail.com Signed-off-by: Mark Brown --- sound/soc/amd/yc/acp6x-mach.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/sound/soc/amd/yc/acp6x-mach.c b/sound/soc/amd/yc/acp6x-mach.c index 87d6aeb78807..aa6200933182 100644 --- a/sound/soc/amd/yc/acp6x-mach.c +++ b/sound/soc/amd/yc/acp6x-mach.c @@ -45,6 +45,13 @@ static struct snd_soc_card acp6x_card = { }; static const struct dmi_system_id yc_acp_quirk_table[] = { + { + .driver_data = &acp6x_card, + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "HP"), + DMI_MATCH(DMI_PRODUCT_NAME, "HP Laptop 15-fc0xxx"), + } + }, { .driver_data = &acp6x_card, .matches = { From ea31be8a2c8c99eac198f3b7f2dc770111f2b182 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Mon, 30 Mar 2026 18:22:20 +0200 Subject: [PATCH 324/401] ALSA: hda/realtek: Add quirk for Samsung Book2 Pro 360 (NP950QED) There is another Book2 Pro model (NP950QED) that seems equipped with the same speaker module as the non-360 model, which requires ALC298_FIXUP_SAMSUNG_AMP_V2_2_AMPS quirk. Reported-by: Throw Link: https://patch.msgid.link/20260330162249.147665-1-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/hda/codecs/realtek/alc269.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/hda/codecs/realtek/alc269.c b/sound/hda/codecs/realtek/alc269.c index c14046e09aa4..766d7bb2d4f5 100644 --- a/sound/hda/codecs/realtek/alc269.c +++ b/sound/hda/codecs/realtek/alc269.c @@ -7409,6 +7409,7 @@ static const struct hda_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x144d, 0xc188, "Samsung Galaxy Book Flex (NT950QCT-A38A)", ALC298_FIXUP_SAMSUNG_AMP), SND_PCI_QUIRK(0x144d, 0xc189, "Samsung Galaxy Book Flex (NT950QCG-X716)", ALC298_FIXUP_SAMSUNG_AMP), SND_PCI_QUIRK(0x144d, 0xc18a, "Samsung Galaxy Book Ion (NP930XCJ-K01US)", ALC298_FIXUP_SAMSUNG_AMP), + SND_PCI_QUIRK(0x144d, 0xc1ac, "Samsung Galaxy Book2 Pro 360 (NP950QED)", ALC298_FIXUP_SAMSUNG_AMP_V2_2_AMPS), SND_PCI_QUIRK(0x144d, 0xc1a3, "Samsung Galaxy Book Pro (NP935XDB-KC1SE)", ALC298_FIXUP_SAMSUNG_AMP), SND_PCI_QUIRK(0x144d, 0xc1a4, "Samsung Galaxy Book Pro 360 (NT935QBD)", ALC298_FIXUP_SAMSUNG_AMP), SND_PCI_QUIRK(0x144d, 0xc1a6, "Samsung Galaxy Book Pro 360 (NP930QBD)", ALC298_FIXUP_SAMSUNG_AMP), From 2e8b1a1d12ae3338efeb1c3de3eb4e9324b87a28 Mon Sep 17 00:00:00 2001 From: Tomas Glozar Date: Mon, 30 Mar 2026 11:12:07 +0200 Subject: [PATCH 325/401] rtla: Fix build without libbpf header rtla supports building without libbpf. However, BPF actions patchset [1] adds an include of bpf/libbpf.h into timerlat_bpf.h, which breaks build on systems that don't have libbpf headers installed. This is a leftover from a draft version of the patchset where timerlat_bpf_set_action() (which takes a struct bpf_program * argument) was defined in the header. timerlat_bpf.c already includes bpf/libbpf.h via timerlat.skel.h when libbpf is present. Remove the redundant include to fix build on systems without libbpf headers. [1] https://lore.kernel.org/linux-trace-kernel/20251126144205.331954-1-tglozar@redhat.com/T/ Cc: John Kacur Cc: Luis Goncalves Cc: Crystal Wood Cc: Costa Shulyupin Link: https://patch.msgid.link/20260330091207.16184-1-tglozar@redhat.com Reported-by: Steven Rostedt (Google) Closes: https://lore.kernel.org/linux-trace-kernel/20260329122202.65a8b575@robin/ Fixes: 8cd0f08ac72e ("rtla/timerlat: Support tail call from BPF program") Signed-off-by: Tomas Glozar Reviewed-by: Wander Lairson Costa Signed-off-by: Steven Rostedt (Google) --- tools/tracing/rtla/src/timerlat_bpf.h | 1 - 1 file changed, 1 deletion(-) diff --git a/tools/tracing/rtla/src/timerlat_bpf.h b/tools/tracing/rtla/src/timerlat_bpf.h index 169abeaf4363..f7c5675737fe 100644 --- a/tools/tracing/rtla/src/timerlat_bpf.h +++ b/tools/tracing/rtla/src/timerlat_bpf.h @@ -12,7 +12,6 @@ enum summary_field { }; #ifndef __bpf__ -#include #ifdef HAVE_BPF_SKEL int timerlat_bpf_init(struct timerlat_params *params); int timerlat_bpf_attach(void); From b9eff9732cb0f86a68c9d1592a98ceab47c01e95 Mon Sep 17 00:00:00 2001 From: Kuninori Morimoto Date: Fri, 27 Mar 2026 02:43:54 +0000 Subject: [PATCH 326/401] ASoC: soc-core: call missing INIT_LIST_HEAD() for card_aux_list Component has "card_aux_list" which is added/deled in bind/unbind aux dev function (A), and used in for_each_card_auxs() loop (B). static void soc_unbind_aux_dev(...) { ... for_each_card_auxs_safe(...) { ... (A) list_del(&component->card_aux_list); } ^^^^^^^^^^^^^ } static int soc_bind_aux_dev(...) { ... for_each_card_pre_auxs(...) { ... (A) list_add(&component->card_aux_list, ...); } ^^^^^^^^^^^^^ ... } #define for_each_card_auxs(card, component) \ (B) list_for_each_entry(component, ..., card_aux_list) ^^^^^^^^^^^^^ But it has been used without calling INIT_LIST_HEAD(). > git grep card_aux_list sound/soc sound/soc/soc-core.c: list_del(&component->card_aux_list); sound/soc/soc-core.c: list_add(&component->card_aux_list, ...); call missing INIT_LIST_HEAD() for it. Signed-off-by: Kuninori Morimoto Link: https://patch.msgid.link/87341mxa8l.wl-kuninori.morimoto.gx@renesas.com Signed-off-by: Mark Brown --- sound/soc/soc-core.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/soc/soc-core.c b/sound/soc/soc-core.c index 573693e21780..ff6eb6bfc63b 100644 --- a/sound/soc/soc-core.c +++ b/sound/soc/soc-core.c @@ -2859,6 +2859,7 @@ int snd_soc_component_initialize(struct snd_soc_component *component, INIT_LIST_HEAD(&component->dobj_list); INIT_LIST_HEAD(&component->card_list); INIT_LIST_HEAD(&component->list); + INIT_LIST_HEAD(&component->card_aux_list); mutex_init(&component->io_mutex); if (!component->name) { From 415cb193bb9736f0e830286c72a6fa8eb2a9cc5c Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Sat, 28 Mar 2026 14:18:55 -1000 Subject: [PATCH 327/401] sched_ext: Fix SCX_KICK_WAIT deadlock by deferring wait to balance callback SCX_KICK_WAIT busy-waits in kick_cpus_irq_workfn() using smp_cond_load_acquire() until the target CPU's kick_sync advances. Because the irq_work runs in hardirq context, the waiting CPU cannot reschedule and its own kick_sync never advances. If multiple CPUs form a wait cycle, all CPUs deadlock. Replace the busy-wait in kick_cpus_irq_workfn() with resched_curr() to force the CPU through do_pick_task_scx(), which queues a balance callback to perform the wait. The balance callback drops the rq lock and enables IRQs following the sched_core_balance() pattern, so the CPU can process IPIs while waiting. The local CPU's kick_sync is advanced on entry to do_pick_task_scx() and continuously during the wait, ensuring any CPU that starts waiting for us sees the advancement and cannot form cyclic dependencies. Fixes: 90e55164dad4 ("sched_ext: Implement SCX_KICK_WAIT") Cc: stable@vger.kernel.org # v6.12+ Reported-by: Christian Loehle Link: https://lore.kernel.org/r/20260316100249.1651641-1-christian.loehle@arm.com Signed-off-by: Tejun Heo Tested-by: Christian Loehle --- kernel/sched/ext.c | 95 ++++++++++++++++++++++++++++++++------------ kernel/sched/sched.h | 3 ++ 2 files changed, 73 insertions(+), 25 deletions(-) diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c index 26a6ac2f8826..d5bdcdb3f700 100644 --- a/kernel/sched/ext.c +++ b/kernel/sched/ext.c @@ -2404,7 +2404,7 @@ static void put_prev_task_scx(struct rq *rq, struct task_struct *p, { struct scx_sched *sch = scx_root; - /* see kick_cpus_irq_workfn() */ + /* see kick_sync_wait_bal_cb() */ smp_store_release(&rq->scx.kick_sync, rq->scx.kick_sync + 1); update_curr_scx(rq); @@ -2447,6 +2447,48 @@ static void put_prev_task_scx(struct rq *rq, struct task_struct *p, switch_class(rq, next); } +static void kick_sync_wait_bal_cb(struct rq *rq) +{ + struct scx_kick_syncs __rcu *ks = __this_cpu_read(scx_kick_syncs); + unsigned long *ksyncs = rcu_dereference_sched(ks)->syncs; + bool waited; + s32 cpu; + + /* + * Drop rq lock and enable IRQs while waiting. IRQs must be enabled + * — a target CPU may be waiting for us to process an IPI (e.g. TLB + * flush) while we wait for its kick_sync to advance. + * + * Also, keep advancing our own kick_sync so that new kick_sync waits + * targeting us, which can start after we drop the lock, cannot form + * cyclic dependencies. + */ +retry: + waited = false; + for_each_cpu(cpu, rq->scx.cpus_to_sync) { + /* + * smp_load_acquire() pairs with smp_store_release() on + * kick_sync updates on the target CPUs. + */ + if (cpu == cpu_of(rq) || + smp_load_acquire(&cpu_rq(cpu)->scx.kick_sync) != ksyncs[cpu]) { + cpumask_clear_cpu(cpu, rq->scx.cpus_to_sync); + continue; + } + + raw_spin_rq_unlock_irq(rq); + while (READ_ONCE(cpu_rq(cpu)->scx.kick_sync) == ksyncs[cpu]) { + smp_store_release(&rq->scx.kick_sync, rq->scx.kick_sync + 1); + cpu_relax(); + } + raw_spin_rq_lock_irq(rq); + waited = true; + } + + if (waited) + goto retry; +} + static struct task_struct *first_local_task(struct rq *rq) { return list_first_entry_or_null(&rq->scx.local_dsq.list, @@ -2460,7 +2502,7 @@ do_pick_task_scx(struct rq *rq, struct rq_flags *rf, bool force_scx) bool keep_prev; struct task_struct *p; - /* see kick_cpus_irq_workfn() */ + /* see kick_sync_wait_bal_cb() */ smp_store_release(&rq->scx.kick_sync, rq->scx.kick_sync + 1); rq_modified_begin(rq, &ext_sched_class); @@ -2470,6 +2512,17 @@ do_pick_task_scx(struct rq *rq, struct rq_flags *rf, bool force_scx) rq_repin_lock(rq, rf); maybe_queue_balance_callback(rq); + /* + * Defer to a balance callback which can drop rq lock and enable + * IRQs. Waiting directly in the pick path would deadlock against + * CPUs sending us IPIs (e.g. TLB flushes) while we wait for them. + */ + if (unlikely(rq->scx.kick_sync_pending)) { + rq->scx.kick_sync_pending = false; + queue_balance_callback(rq, &rq->scx.kick_sync_bal_cb, + kick_sync_wait_bal_cb); + } + /* * If any higher-priority sched class enqueued a runnable task on * this rq during balance_one(), abort and return RETRY_TASK, so @@ -4713,6 +4766,9 @@ static void scx_dump_state(struct scx_exit_info *ei, size_t dump_len) if (!cpumask_empty(rq->scx.cpus_to_wait)) dump_line(&ns, " cpus_to_wait : %*pb", cpumask_pr_args(rq->scx.cpus_to_wait)); + if (!cpumask_empty(rq->scx.cpus_to_sync)) + dump_line(&ns, " cpus_to_sync : %*pb", + cpumask_pr_args(rq->scx.cpus_to_sync)); used = seq_buf_used(&ns); if (SCX_HAS_OP(sch, dump_cpu)) { @@ -5610,11 +5666,11 @@ static bool kick_one_cpu(s32 cpu, struct rq *this_rq, unsigned long *ksyncs) if (cpumask_test_cpu(cpu, this_scx->cpus_to_wait)) { if (cur_class == &ext_sched_class) { + cpumask_set_cpu(cpu, this_scx->cpus_to_sync); ksyncs[cpu] = rq->scx.kick_sync; should_wait = true; - } else { - cpumask_clear_cpu(cpu, this_scx->cpus_to_wait); } + cpumask_clear_cpu(cpu, this_scx->cpus_to_wait); } resched_curr(rq); @@ -5669,27 +5725,15 @@ static void kick_cpus_irq_workfn(struct irq_work *irq_work) cpumask_clear_cpu(cpu, this_scx->cpus_to_kick_if_idle); } - if (!should_wait) - return; - - for_each_cpu(cpu, this_scx->cpus_to_wait) { - unsigned long *wait_kick_sync = &cpu_rq(cpu)->scx.kick_sync; - - /* - * Busy-wait until the task running at the time of kicking is no - * longer running. This can be used to implement e.g. core - * scheduling. - * - * smp_cond_load_acquire() pairs with store_releases in - * pick_task_scx() and put_prev_task_scx(). The former breaks - * the wait if SCX's scheduling path is entered even if the same - * task is picked subsequently. The latter is necessary to break - * the wait when $cpu is taken by a higher sched class. - */ - if (cpu != cpu_of(this_rq)) - smp_cond_load_acquire(wait_kick_sync, VAL != ksyncs[cpu]); - - cpumask_clear_cpu(cpu, this_scx->cpus_to_wait); + /* + * Can't wait in hardirq — kick_sync can't advance, deadlocking if + * CPUs wait for each other. Defer to kick_sync_wait_bal_cb(). + */ + if (should_wait) { + raw_spin_rq_lock(this_rq); + this_scx->kick_sync_pending = true; + resched_curr(this_rq); + raw_spin_rq_unlock(this_rq); } } @@ -5794,6 +5838,7 @@ void __init init_sched_ext_class(void) BUG_ON(!zalloc_cpumask_var_node(&rq->scx.cpus_to_kick_if_idle, GFP_KERNEL, n)); BUG_ON(!zalloc_cpumask_var_node(&rq->scx.cpus_to_preempt, GFP_KERNEL, n)); BUG_ON(!zalloc_cpumask_var_node(&rq->scx.cpus_to_wait, GFP_KERNEL, n)); + BUG_ON(!zalloc_cpumask_var_node(&rq->scx.cpus_to_sync, GFP_KERNEL, n)); rq->scx.deferred_irq_work = IRQ_WORK_INIT_HARD(deferred_irq_workfn); rq->scx.kick_cpus_irq_work = IRQ_WORK_INIT_HARD(kick_cpus_irq_workfn); diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index 43bbf0693cca..1ef9ba480f51 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -805,9 +805,12 @@ struct scx_rq { cpumask_var_t cpus_to_kick_if_idle; cpumask_var_t cpus_to_preempt; cpumask_var_t cpus_to_wait; + cpumask_var_t cpus_to_sync; + bool kick_sync_pending; unsigned long kick_sync; local_t reenq_local_deferred; struct balance_callback deferred_bal_cb; + struct balance_callback kick_sync_bal_cb; struct irq_work deferred_irq_work; struct irq_work kick_cpus_irq_work; struct scx_dispatch_q bypass_dsq; From 090d34f0f0285124452373225bcc520a31e305e4 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Sat, 28 Mar 2026 14:18:56 -1000 Subject: [PATCH 328/401] selftests/sched_ext: Add cyclic SCX_KICK_WAIT stress test Add a test that creates a 3-CPU kick_wait cycle (A->B->C->A). A BPF scheduler kicks the next CPU in the ring with SCX_KICK_WAIT on every enqueue while userspace workers generate continuous scheduling churn via sched_yield(). Without the preceding fix, this hangs the machine within seconds. Signed-off-by: Tejun Heo Reviewed-by: Christian Loehle Tested-by: Christian Loehle --- tools/testing/selftests/sched_ext/Makefile | 1 + .../sched_ext/cyclic_kick_wait.bpf.c | 68 ++++++ .../selftests/sched_ext/cyclic_kick_wait.c | 194 ++++++++++++++++++ 3 files changed, 263 insertions(+) create mode 100644 tools/testing/selftests/sched_ext/cyclic_kick_wait.bpf.c create mode 100644 tools/testing/selftests/sched_ext/cyclic_kick_wait.c diff --git a/tools/testing/selftests/sched_ext/Makefile b/tools/testing/selftests/sched_ext/Makefile index 006300ac6dff..1c9ca328cca1 100644 --- a/tools/testing/selftests/sched_ext/Makefile +++ b/tools/testing/selftests/sched_ext/Makefile @@ -188,6 +188,7 @@ auto-test-targets := \ rt_stall \ test_example \ total_bw \ + cyclic_kick_wait \ testcase-targets := $(addsuffix .o,$(addprefix $(SCXOBJ_DIR)/,$(auto-test-targets))) diff --git a/tools/testing/selftests/sched_ext/cyclic_kick_wait.bpf.c b/tools/testing/selftests/sched_ext/cyclic_kick_wait.bpf.c new file mode 100644 index 000000000000..cb34d3335917 --- /dev/null +++ b/tools/testing/selftests/sched_ext/cyclic_kick_wait.bpf.c @@ -0,0 +1,68 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Stress concurrent SCX_KICK_WAIT calls to reproduce wait-cycle deadlock. + * + * Three CPUs are designated from userspace. Every enqueue from one of the + * three CPUs kicks the next CPU in the ring with SCX_KICK_WAIT, creating a + * persistent A -> B -> C -> A wait cycle pressure. + */ +#include + +char _license[] SEC("license") = "GPL"; + +const volatile s32 test_cpu_a; +const volatile s32 test_cpu_b; +const volatile s32 test_cpu_c; + +u64 nr_enqueues; +u64 nr_wait_kicks; + +UEI_DEFINE(uei); + +static s32 target_cpu(s32 cpu) +{ + if (cpu == test_cpu_a) + return test_cpu_b; + if (cpu == test_cpu_b) + return test_cpu_c; + if (cpu == test_cpu_c) + return test_cpu_a; + return -1; +} + +void BPF_STRUCT_OPS(cyclic_kick_wait_enqueue, struct task_struct *p, + u64 enq_flags) +{ + s32 this_cpu = bpf_get_smp_processor_id(); + s32 tgt; + + __sync_fetch_and_add(&nr_enqueues, 1); + + if (p->flags & PF_KTHREAD) { + scx_bpf_dsq_insert(p, SCX_DSQ_LOCAL, SCX_SLICE_INF, + enq_flags | SCX_ENQ_PREEMPT); + return; + } + + scx_bpf_dsq_insert(p, SCX_DSQ_GLOBAL, SCX_SLICE_DFL, enq_flags); + + tgt = target_cpu(this_cpu); + if (tgt < 0 || tgt == this_cpu) + return; + + __sync_fetch_and_add(&nr_wait_kicks, 1); + scx_bpf_kick_cpu(tgt, SCX_KICK_WAIT); +} + +void BPF_STRUCT_OPS(cyclic_kick_wait_exit, struct scx_exit_info *ei) +{ + UEI_RECORD(uei, ei); +} + +SEC(".struct_ops.link") +struct sched_ext_ops cyclic_kick_wait_ops = { + .enqueue = cyclic_kick_wait_enqueue, + .exit = cyclic_kick_wait_exit, + .name = "cyclic_kick_wait", + .timeout_ms = 1000U, +}; diff --git a/tools/testing/selftests/sched_ext/cyclic_kick_wait.c b/tools/testing/selftests/sched_ext/cyclic_kick_wait.c new file mode 100644 index 000000000000..c2e5aa9de715 --- /dev/null +++ b/tools/testing/selftests/sched_ext/cyclic_kick_wait.c @@ -0,0 +1,194 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Test SCX_KICK_WAIT forward progress under cyclic wait pressure. + * + * SCX_KICK_WAIT busy-waits until the target CPU enters the scheduling path. + * If multiple CPUs form a wait cycle (A waits for B, B waits for C, C waits + * for A), all CPUs deadlock unless the implementation breaks the cycle. + * + * This test creates that scenario: three CPUs are arranged in a ring. The BPF + * scheduler's ops.enqueue() kicks the next CPU in the ring with SCX_KICK_WAIT + * on every enqueue. Userspace pins 4 worker threads per CPU that loop calling + * sched_yield(), generating a steady stream of enqueues and thus sustained + * A->B->C->A kick_wait cycle pressure. The test passes if the system remains + * responsive for 5 seconds without the scheduler being killed by the watchdog. + */ +#define _GNU_SOURCE + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "scx_test.h" +#include "cyclic_kick_wait.bpf.skel.h" + +#define WORKERS_PER_CPU 4 +#define NR_TEST_CPUS 3 +#define NR_WORKERS (NR_TEST_CPUS * WORKERS_PER_CPU) + +struct worker_ctx { + pthread_t tid; + int cpu; + volatile bool stop; + volatile __u64 iters; + bool started; +}; + +static void *worker_fn(void *arg) +{ + struct worker_ctx *worker = arg; + cpu_set_t mask; + + CPU_ZERO(&mask); + CPU_SET(worker->cpu, &mask); + + if (sched_setaffinity(0, sizeof(mask), &mask)) + return (void *)(uintptr_t)errno; + + while (!worker->stop) { + sched_yield(); + worker->iters++; + } + + return NULL; +} + +static int join_worker(struct worker_ctx *worker) +{ + void *ret; + struct timespec ts; + int err; + + if (!worker->started) + return 0; + + if (clock_gettime(CLOCK_REALTIME, &ts)) + return -errno; + + ts.tv_sec += 2; + err = pthread_timedjoin_np(worker->tid, &ret, &ts); + if (err == ETIMEDOUT) + pthread_detach(worker->tid); + if (err) + return -err; + + if ((uintptr_t)ret) + return -(int)(uintptr_t)ret; + + return 0; +} + +static enum scx_test_status setup(void **ctx) +{ + struct cyclic_kick_wait *skel; + + skel = cyclic_kick_wait__open(); + SCX_FAIL_IF(!skel, "Failed to open skel"); + SCX_ENUM_INIT(skel); + + *ctx = skel; + return SCX_TEST_PASS; +} + +static enum scx_test_status run(void *ctx) +{ + struct cyclic_kick_wait *skel = ctx; + struct worker_ctx workers[NR_WORKERS] = {}; + struct bpf_link *link = NULL; + enum scx_test_status status = SCX_TEST_PASS; + int test_cpus[NR_TEST_CPUS]; + int nr_cpus = 0; + cpu_set_t mask; + int ret, i; + + if (sched_getaffinity(0, sizeof(mask), &mask)) { + SCX_ERR("Failed to get affinity (%d)", errno); + return SCX_TEST_FAIL; + } + + for (i = 0; i < CPU_SETSIZE; i++) { + if (CPU_ISSET(i, &mask)) + test_cpus[nr_cpus++] = i; + if (nr_cpus == NR_TEST_CPUS) + break; + } + + if (nr_cpus < NR_TEST_CPUS) + return SCX_TEST_SKIP; + + skel->rodata->test_cpu_a = test_cpus[0]; + skel->rodata->test_cpu_b = test_cpus[1]; + skel->rodata->test_cpu_c = test_cpus[2]; + + if (cyclic_kick_wait__load(skel)) { + SCX_ERR("Failed to load skel"); + return SCX_TEST_FAIL; + } + + link = bpf_map__attach_struct_ops(skel->maps.cyclic_kick_wait_ops); + if (!link) { + SCX_ERR("Failed to attach scheduler"); + return SCX_TEST_FAIL; + } + + for (i = 0; i < NR_WORKERS; i++) + workers[i].cpu = test_cpus[i / WORKERS_PER_CPU]; + + for (i = 0; i < NR_WORKERS; i++) { + ret = pthread_create(&workers[i].tid, NULL, worker_fn, &workers[i]); + if (ret) { + SCX_ERR("Failed to create worker thread %d (%d)", i, ret); + status = SCX_TEST_FAIL; + goto out; + } + workers[i].started = true; + } + + sleep(5); + + if (skel->data->uei.kind != EXIT_KIND(SCX_EXIT_NONE)) { + SCX_ERR("Scheduler exited unexpectedly (kind=%llu code=%lld)", + (unsigned long long)skel->data->uei.kind, + (long long)skel->data->uei.exit_code); + status = SCX_TEST_FAIL; + } + +out: + for (i = 0; i < NR_WORKERS; i++) + workers[i].stop = true; + + for (i = 0; i < NR_WORKERS; i++) { + ret = join_worker(&workers[i]); + if (ret && status == SCX_TEST_PASS) { + SCX_ERR("Failed to join worker thread %d (%d)", i, ret); + status = SCX_TEST_FAIL; + } + } + + if (link) + bpf_link__destroy(link); + + return status; +} + +static void cleanup(void *ctx) +{ + struct cyclic_kick_wait *skel = ctx; + + cyclic_kick_wait__destroy(skel); +} + +struct scx_test cyclic_kick_wait = { + .name = "cyclic_kick_wait", + .description = "Verify SCX_KICK_WAIT forward progress under a 3-CPU wait cycle", + .setup = setup, + .run = run, + .cleanup = cleanup, +}; +REGISTER_SCX_TEST(&cyclic_kick_wait) From 622363757b2286dd2c2984b0d80255cbb35a0495 Mon Sep 17 00:00:00 2001 From: Jihed Chaibi Date: Tue, 24 Mar 2026 22:09:09 +0100 Subject: [PATCH 329/401] ASoC: ep93xx: Fix unchecked clk_prepare_enable() and add rollback on failure MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ep93xx_i2s_enable() calls clk_prepare_enable() on three clocks in sequence (mclk, sclk, lrclk) without checking the return value of any of them. If an intermediate enable fails, the clocks that were already enabled are never rolled back, leaking them until the next disable cycle — which may never come if the stream never started cleanly. Change ep93xx_i2s_enable() from void to int. Add error checking after each clk_prepare_enable() call and unwind already-enabled clocks on failure. Propagate the error through ep93xx_i2s_startup() and ep93xx_i2s_resume(), both of which already return int. Signed-off-by: Jihed Chaibi Fixes: f4ff6b56bc8a ("ASoC: cirrus: i2s: Prepare clock before using it") Link: https://patch.msgid.link/20260324210909.45494-1-jihed.chaibi.dev@gmail.com Signed-off-by: Mark Brown --- sound/soc/cirrus/ep93xx-i2s.c | 34 ++++++++++++++++++++++++---------- 1 file changed, 24 insertions(+), 10 deletions(-) diff --git a/sound/soc/cirrus/ep93xx-i2s.c b/sound/soc/cirrus/ep93xx-i2s.c index cca01c03f048..5dba741594fa 100644 --- a/sound/soc/cirrus/ep93xx-i2s.c +++ b/sound/soc/cirrus/ep93xx-i2s.c @@ -91,16 +91,28 @@ static inline unsigned ep93xx_i2s_read_reg(struct ep93xx_i2s_info *info, return __raw_readl(info->regs + reg); } -static void ep93xx_i2s_enable(struct ep93xx_i2s_info *info, int stream) +static int ep93xx_i2s_enable(struct ep93xx_i2s_info *info, int stream) { unsigned base_reg; + int err; if ((ep93xx_i2s_read_reg(info, EP93XX_I2S_TX0EN) & 0x1) == 0 && (ep93xx_i2s_read_reg(info, EP93XX_I2S_RX0EN) & 0x1) == 0) { /* Enable clocks */ - clk_prepare_enable(info->mclk); - clk_prepare_enable(info->sclk); - clk_prepare_enable(info->lrclk); + err = clk_prepare_enable(info->mclk); + if (err) + return err; + err = clk_prepare_enable(info->sclk); + if (err) { + clk_disable_unprepare(info->mclk); + return err; + } + err = clk_prepare_enable(info->lrclk); + if (err) { + clk_disable_unprepare(info->sclk); + clk_disable_unprepare(info->mclk); + return err; + } /* Enable i2s */ ep93xx_i2s_write_reg(info, EP93XX_I2S_GLCTRL, 1); @@ -119,6 +131,8 @@ static void ep93xx_i2s_enable(struct ep93xx_i2s_info *info, int stream) ep93xx_i2s_write_reg(info, EP93XX_I2S_TXCTRL, EP93XX_I2S_TXCTRL_TXEMPTY_LVL | EP93XX_I2S_TXCTRL_TXUFIE); + + return 0; } static void ep93xx_i2s_disable(struct ep93xx_i2s_info *info, int stream) @@ -195,9 +209,7 @@ static int ep93xx_i2s_startup(struct snd_pcm_substream *substream, { struct ep93xx_i2s_info *info = snd_soc_dai_get_drvdata(dai); - ep93xx_i2s_enable(info, substream->stream); - - return 0; + return ep93xx_i2s_enable(info, substream->stream); } static void ep93xx_i2s_shutdown(struct snd_pcm_substream *substream, @@ -373,14 +385,16 @@ static int ep93xx_i2s_suspend(struct snd_soc_component *component) static int ep93xx_i2s_resume(struct snd_soc_component *component) { struct ep93xx_i2s_info *info = snd_soc_component_get_drvdata(component); + int err; if (!snd_soc_component_active(component)) return 0; - ep93xx_i2s_enable(info, SNDRV_PCM_STREAM_PLAYBACK); - ep93xx_i2s_enable(info, SNDRV_PCM_STREAM_CAPTURE); + err = ep93xx_i2s_enable(info, SNDRV_PCM_STREAM_PLAYBACK); + if (err) + return err; - return 0; + return ep93xx_i2s_enable(info, SNDRV_PCM_STREAM_CAPTURE); } #else #define ep93xx_i2s_suspend NULL From fd63f185979b047fb22a0dfc6bd94d0cab6a6a70 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Fri, 27 Mar 2026 10:52:57 +0100 Subject: [PATCH 330/401] ipv6: prevent possible UaF in addrconf_permanent_addr() The mentioned helper try to warn the user about an exceptional condition, but the message is delivered too late, accessing the ipv6 after its possible deletion. Reorder the statement to avoid the possible UaF; while at it, place the warning outside the idev->lock as it needs no protection. Reported-by: Jakub Kicinski Closes: https://sashiko.dev/#/patchset/8c8bfe2e1a324e501f0e15fef404a77443fd8caf.1774365668.git.pabeni%40redhat.com Fixes: f1705ec197e7 ("net: ipv6: Make address flushing on ifdown optional") Signed-off-by: Paolo Abeni Link: https://patch.msgid.link/ef973c3a8cb4f8f1787ed469f3e5391b9fe95aa0.1774601542.git.pabeni@redhat.com Signed-off-by: Jakub Kicinski --- net/ipv6/addrconf.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index f4e23b543585..dd0b4d80e0f8 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -3625,12 +3625,12 @@ static void addrconf_permanent_addr(struct net *net, struct net_device *dev) if ((ifp->flags & IFA_F_PERMANENT) && fixup_permanent_addr(net, idev, ifp) < 0) { write_unlock_bh(&idev->lock); - in6_ifa_hold(ifp); - ipv6_del_addr(ifp); - write_lock_bh(&idev->lock); net_info_ratelimited("%s: Failed to add prefix route for address %pI6c; dropping\n", idev->dev->name, &ifp->addr); + in6_ifa_hold(ifp); + ipv6_del_addr(ifp); + write_lock_bh(&idev->lock); } } From 514aac3599879a7ed48b7dc19e31145beb6958ac Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Fri, 27 Mar 2026 10:48:21 +0100 Subject: [PATCH 331/401] net: airoha: Add missing cleanup bits in airoha_qdma_cleanup_rx_queue() In order to properly cleanup hw rx QDMA queues and bring the device to the initial state, reset rx DMA queue head/tail index. Moreover, reset queued DMA descriptor fields. Fixes: 23020f049327 ("net: airoha: Introduce ethernet support for EN7581 SoC") Tested-by: Madhur Agrawal Signed-off-by: Lorenzo Bianconi Link: https://patch.msgid.link/20260327-airoha_qdma_cleanup_rx_queue-fix-v1-1-369d6ab1511a@kernel.org Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/airoha/airoha_eth.c | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/airoha/airoha_eth.c b/drivers/net/ethernet/airoha/airoha_eth.c index 56cf9a926a83..c2a54dbcbb0d 100644 --- a/drivers/net/ethernet/airoha/airoha_eth.c +++ b/drivers/net/ethernet/airoha/airoha_eth.c @@ -794,18 +794,34 @@ static int airoha_qdma_init_rx_queue(struct airoha_queue *q, static void airoha_qdma_cleanup_rx_queue(struct airoha_queue *q) { - struct airoha_eth *eth = q->qdma->eth; + struct airoha_qdma *qdma = q->qdma; + struct airoha_eth *eth = qdma->eth; + int qid = q - &qdma->q_rx[0]; while (q->queued) { struct airoha_queue_entry *e = &q->entry[q->tail]; + struct airoha_qdma_desc *desc = &q->desc[q->tail]; struct page *page = virt_to_head_page(e->buf); dma_sync_single_for_cpu(eth->dev, e->dma_addr, e->dma_len, page_pool_get_dma_dir(q->page_pool)); page_pool_put_full_page(q->page_pool, page, false); + /* Reset DMA descriptor */ + WRITE_ONCE(desc->ctrl, 0); + WRITE_ONCE(desc->addr, 0); + WRITE_ONCE(desc->data, 0); + WRITE_ONCE(desc->msg0, 0); + WRITE_ONCE(desc->msg1, 0); + WRITE_ONCE(desc->msg2, 0); + WRITE_ONCE(desc->msg3, 0); + q->tail = (q->tail + 1) % q->ndesc; q->queued--; } + + q->head = q->tail; + airoha_qdma_rmw(qdma, REG_RX_DMA_IDX(qid), RX_RING_DMA_IDX_MASK, + FIELD_PREP(RX_RING_DMA_IDX_MASK, q->tail)); } static int airoha_qdma_init_rx(struct airoha_qdma *qdma) From ddc748a391dd8642ba6b2e4fe22e7f2ddf84b7f0 Mon Sep 17 00:00:00 2001 From: Guoyu Su Date: Fri, 27 Mar 2026 23:35:07 +0800 Subject: [PATCH 332/401] net: use skb_header_pointer() for TCPv4 GSO frag_off check Syzbot reported a KMSAN uninit-value warning in gso_features_check() called from netif_skb_features() [1]. gso_features_check() reads iph->frag_off to decide whether to clear mangleid_features. Accessing the IPv4 header via ip_hdr()/inner_ip_hdr() can rely on skb header offsets that are not always safe for direct dereference on packets injected from PF_PACKET paths. Use skb_header_pointer() for the TCPv4 frag_off check so the header read is robust whether data is already linear or needs copying. [1] https://syzkaller.appspot.com/bug?extid=1543a7d954d9c6d00407 Link: https://lore.kernel.org/netdev/willemdebruijn.kernel.1a9f35039caab@gmail.com/ Fixes: cbc53e08a793 ("GSO: Add GSO type for fixed IPv4 ID") Reported-by: syzbot+1543a7d954d9c6d00407@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=1543a7d954d9c6d00407 Tested-by: syzbot+1543a7d954d9c6d00407@syzkaller.appspotmail.com Signed-off-by: Guoyu Su Reviewed-by: Willem de Bruijn Link: https://patch.msgid.link/20260327153507.39742-1-yss2813483011xxl@gmail.com Signed-off-by: Jakub Kicinski --- net/core/dev.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/net/core/dev.c b/net/core/dev.c index fc5557062414..831129f2a69b 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -3821,10 +3821,15 @@ static netdev_features_t gso_features_check(const struct sk_buff *skb, * segmentation-offloads.rst). */ if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV4) { - struct iphdr *iph = skb->encapsulation ? - inner_ip_hdr(skb) : ip_hdr(skb); + const struct iphdr *iph; + struct iphdr _iph; + int nhoff = skb->encapsulation ? + skb_inner_network_offset(skb) : + skb_network_offset(skb); - if (!(iph->frag_off & htons(IP_DF))) + iph = skb_header_pointer(skb, nhoff, sizeof(_iph), &_iph); + + if (!iph || !(iph->frag_off & htons(IP_DF))) features &= ~dev->mangleid_features; } From e6e3eb5ee89ac4c163d46429391c889a1bb5e404 Mon Sep 17 00:00:00 2001 From: Yochai Eisenrich Date: Sun, 29 Mar 2026 00:14:36 +0300 Subject: [PATCH 333/401] net: sched: cls_api: fix tc_chain_fill_node to initialize tcm_info to zero to prevent an info-leak When building netlink messages, tc_chain_fill_node() never initializes the tcm_info field of struct tcmsg. Since the allocation is not zeroed, kernel heap memory is leaked to userspace through this 4-byte field. The fix simply zeroes tcm_info alongside the other fields that are already initialized. Fixes: 32a4f5ecd738 ("net: sched: introduce chain object to uapi") Signed-off-by: Yochai Eisenrich Acked-by: Jamal Hadi Salim Link: https://patch.msgid.link/20260328211436.1010152-1-echelonh@gmail.com Signed-off-by: Jakub Kicinski --- net/sched/cls_api.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index 4829c27446e3..20f7f9ee0b35 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -2969,6 +2969,7 @@ static int tc_chain_fill_node(const struct tcf_proto_ops *tmplt_ops, tcm->tcm__pad1 = 0; tcm->tcm__pad2 = 0; tcm->tcm_handle = 0; + tcm->tcm_info = 0; if (block->q) { tcm->tcm_ifindex = qdisc_dev(block->q)->ifindex; tcm->tcm_parent = block->q->handle; From cedc1bf327de62ec30af9743bd1f601c2de30553 Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Sun, 29 Mar 2026 12:32:27 +0200 Subject: [PATCH 334/401] net: airoha: Delay offloading until all net_devices are fully registered Netfilter flowtable can theoretically try to offload flower rules as soon as a net_device is registered while all the other ones are not registered or initialized, triggering a possible NULL pointer dereferencing of qdma pointer in airoha_ppe_set_cpu_port routine. Moreover, if register_netdev() fails for a particular net_device, there is a small race if Netfilter tries to offload flowtable rules before all the net_devices are properly unregistered in airoha_probe() error patch, triggering a NULL pointer dereferencing in airoha_ppe_set_cpu_port routine. In order to avoid any possible race, delay offloading until all net_devices are registered in the networking subsystem. Signed-off-by: Lorenzo Bianconi Link: https://patch.msgid.link/20260329-airoha-regiser-race-fix-v2-1-f4ebb139277b@kernel.org Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/airoha/airoha_eth.c | 2 ++ drivers/net/ethernet/airoha/airoha_eth.h | 1 + drivers/net/ethernet/airoha/airoha_ppe.c | 7 +++++++ 3 files changed, 10 insertions(+) diff --git a/drivers/net/ethernet/airoha/airoha_eth.c b/drivers/net/ethernet/airoha/airoha_eth.c index c2a54dbcbb0d..95ba99b89428 100644 --- a/drivers/net/ethernet/airoha/airoha_eth.c +++ b/drivers/net/ethernet/airoha/airoha_eth.c @@ -2962,6 +2962,8 @@ static int airoha_register_gdm_devices(struct airoha_eth *eth) return err; } + set_bit(DEV_STATE_REGISTERED, ð->state); + return 0; } diff --git a/drivers/net/ethernet/airoha/airoha_eth.h b/drivers/net/ethernet/airoha/airoha_eth.h index 20e602d61e61..a97903569335 100644 --- a/drivers/net/ethernet/airoha/airoha_eth.h +++ b/drivers/net/ethernet/airoha/airoha_eth.h @@ -88,6 +88,7 @@ enum { enum { DEV_STATE_INITIALIZED, + DEV_STATE_REGISTERED, }; enum { diff --git a/drivers/net/ethernet/airoha/airoha_ppe.c b/drivers/net/ethernet/airoha/airoha_ppe.c index 5724f8f2defd..c2c32b6833df 100644 --- a/drivers/net/ethernet/airoha/airoha_ppe.c +++ b/drivers/net/ethernet/airoha/airoha_ppe.c @@ -1368,6 +1368,13 @@ int airoha_ppe_setup_tc_block_cb(struct airoha_ppe_dev *dev, void *type_data) struct airoha_eth *eth = ppe->eth; int err = 0; + /* Netfilter flowtable can try to offload flower rules while not all + * the net_devices are registered or initialized. Delay offloading + * until all net_devices are registered in the system. + */ + if (!test_bit(DEV_STATE_REGISTERED, ð->state)) + return -EBUSY; + mutex_lock(&flow_offload_mutex); if (!eth->npu) From 4ee937107d52f9e5c350e4b5e629760e328b3d9f Mon Sep 17 00:00:00 2001 From: Pengpeng Hou Date: Sun, 29 Mar 2026 07:43:56 +0800 Subject: [PATCH 335/401] bnxt_en: set backing store type from query type bnxt_hwrm_func_backing_store_qcaps_v2() stores resp->type from the firmware response in ctxm->type and later uses that value to index fixed backing-store metadata arrays such as ctx_arr[] and bnxt_bstore_to_trace[]. ctxm->type is fixed by the current backing-store query type and matches the array index of ctx->ctx_arr. Set ctxm->type from the current loop variable instead of depending on resp->type. Also update the loop to advance type from next_valid_type in the for statement, which keeps the control flow simpler for non-valid and unchanged entries. Fixes: 6a4d0774f02d ("bnxt_en: Add support for new backing store query firmware API") Signed-off-by: Pengpeng Hou Reviewed-by: Michael Chan Tested-by: Michael Chan Link: https://patch.msgid.link/20260328234357.43669-1-pengpeng@iscas.ac.cn Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 0751c0e4581a..7ed805713fbb 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -8671,7 +8671,7 @@ static int bnxt_hwrm_func_backing_store_qcaps_v2(struct bnxt *bp) struct hwrm_func_backing_store_qcaps_v2_output *resp; struct hwrm_func_backing_store_qcaps_v2_input *req; struct bnxt_ctx_mem_info *ctx = bp->ctx; - u16 type; + u16 type, next_type = 0; int rc; rc = hwrm_req_init(bp, req, HWRM_FUNC_BACKING_STORE_QCAPS_V2); @@ -8687,7 +8687,7 @@ static int bnxt_hwrm_func_backing_store_qcaps_v2(struct bnxt *bp) resp = hwrm_req_hold(bp, req); - for (type = 0; type < BNXT_CTX_V2_MAX; ) { + for (type = 0; type < BNXT_CTX_V2_MAX; type = next_type) { struct bnxt_ctx_mem_type *ctxm = &ctx->ctx_arr[type]; u8 init_val, init_off, i; u32 max_entries; @@ -8700,7 +8700,7 @@ static int bnxt_hwrm_func_backing_store_qcaps_v2(struct bnxt *bp) if (rc) goto ctx_done; flags = le32_to_cpu(resp->flags); - type = le16_to_cpu(resp->next_valid_type); + next_type = le16_to_cpu(resp->next_valid_type); if (!(flags & BNXT_CTX_MEM_TYPE_VALID)) { bnxt_free_one_ctx_mem(bp, ctxm, true); continue; @@ -8715,7 +8715,7 @@ static int bnxt_hwrm_func_backing_store_qcaps_v2(struct bnxt *bp) else continue; } - ctxm->type = le16_to_cpu(resp->type); + ctxm->type = type; ctxm->entry_size = entry_size; ctxm->flags = flags; ctxm->instance_bmap = le32_to_cpu(resp->instance_bit_map); From 720460722310c7ab35421aa81a3153ff96b6c82b Mon Sep 17 00:00:00 2001 From: Zhang Heng Date: Tue, 31 Mar 2026 09:35:36 +0800 Subject: [PATCH 336/401] ALSA: hda/realtek: add quirk for HP Laptop 15-fc0xxx For the HP Laptop 15-fc0xxx with ALC236, the built-in mic 0x12 was not set up, making it unusable; after adding it, it now works properly. Link: https://bugzilla.kernel.org/show_bug.cgi?id=221233 Signed-off-by: Zhang Heng Link: https://patch.msgid.link/20260331013536.13778-1-zhangheng@kylinos.cn Signed-off-by: Takashi Iwai --- sound/hda/codecs/realtek/alc269.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/sound/hda/codecs/realtek/alc269.c b/sound/hda/codecs/realtek/alc269.c index 766d7bb2d4f5..57ba642252b0 100644 --- a/sound/hda/codecs/realtek/alc269.c +++ b/sound/hda/codecs/realtek/alc269.c @@ -4123,6 +4123,7 @@ enum { ALC245_FIXUP_BASS_HP_DAC, ALC245_FIXUP_ACER_MICMUTE_LED, ALC245_FIXUP_CS35L41_I2C_2_MUTE_LED, + ALC236_FIXUP_HP_DMIC, }; /* A special fixup for Lenovo C940 and Yoga Duet 7; @@ -6658,6 +6659,13 @@ static const struct hda_fixup alc269_fixups[] = { .v.func = alc245_fixup_hp_mute_led_coefbit, .chained = true, .chain_id = ALC287_FIXUP_CS35L41_I2C_2, + }, + [ALC236_FIXUP_HP_DMIC] = { + .type = HDA_FIXUP_PINS, + .v.pins = (const struct hda_pintbl[]) { + { 0x12, 0x90a60160 }, /* use as internal mic */ + { } + }, } }; @@ -7153,6 +7161,7 @@ static const struct hda_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x103c, 0x8da1, "HP 16 Clipper OmniBook X", ALC287_FIXUP_CS35L41_I2C_2), SND_PCI_QUIRK(0x103c, 0x8da7, "HP 14 Enstrom OmniBook X", ALC287_FIXUP_CS35L41_I2C_2), SND_PCI_QUIRK(0x103c, 0x8da8, "HP 16 Piston OmniBook X", ALC287_FIXUP_CS35L41_I2C_2), + SND_PCI_QUIRK(0x103c, 0x8dc9, "HP Laptop 15-fc0xxx", ALC236_FIXUP_HP_DMIC), SND_PCI_QUIRK(0x103c, 0x8dd4, "HP EliteStudio 8 AIO", ALC274_FIXUP_HP_AIO_BIND_DACS), SND_PCI_QUIRK(0x103c, 0x8dd7, "HP Laptop 15-fd0xxx", ALC236_FIXUP_HP_MUTE_LED_COEFBIT2), SND_PCI_QUIRK(0x103c, 0x8de8, "HP Gemtree", ALC245_FIXUP_TAS2781_SPI_2), From e6c888202297eca21860b669edb74fc600e679d9 Mon Sep 17 00:00:00 2001 From: songxiebing Date: Tue, 31 Mar 2026 11:36:50 +0800 Subject: [PATCH 337/401] ALSA: hda/realtek: Add quirk for Lenovo Yoga Slim 7 14AKP10 The Pin Complex 0x17 (bass/woofer speakers) is incorrectly reported as unconnected in the BIOS (pin default 0x411111f0 = N/A). This causes the kernel to configure speaker_outs=0, meaning only the tweeters (pin 0x14) are used. The result is very low, tinny audio with no bass. The existing quirk ALC287_FIXUP_YOGA9_14IAP7_BASS_SPK_PIN (already present in patch_realtek.c for SSID 0x17aa3801) fixes the issue completely. Reported-by: Garcicasti Link: https://bugzilla.kernel.org/show_bug.cgi?id=221298 Signed-off-by: songxiebing Link: https://patch.msgid.link/20260331033650.285601-1-songxiebing@kylinos.cn Signed-off-by: Takashi Iwai --- sound/hda/codecs/realtek/alc269.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/hda/codecs/realtek/alc269.c b/sound/hda/codecs/realtek/alc269.c index 57ba642252b0..fa5f08cd2c65 100644 --- a/sound/hda/codecs/realtek/alc269.c +++ b/sound/hda/codecs/realtek/alc269.c @@ -7676,6 +7676,7 @@ static const struct hda_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x17aa, 0x3902, "Lenovo E50-80", ALC269_FIXUP_DMIC_THINKPAD_ACPI), SND_PCI_QUIRK(0x17aa, 0x390d, "Lenovo Yoga Pro 7 14ASP10", ALC287_FIXUP_YOGA9_14IAP7_BASS_SPK_PIN), SND_PCI_QUIRK(0x17aa, 0x3913, "Lenovo 145", ALC236_FIXUP_LENOVO_INV_DMIC), + SND_PCI_QUIRK(0x17aa, 0x391a, "Lenovo Yoga Slim 7 14AKP10", ALC287_FIXUP_YOGA9_14IAP7_BASS_SPK_PIN), SND_PCI_QUIRK(0x17aa, 0x391f, "Yoga S990-16 pro Quad YC Quad", ALC287_FIXUP_TXNW2781_I2C), SND_PCI_QUIRK(0x17aa, 0x3920, "Yoga S990-16 pro Quad VECO Quad", ALC287_FIXUP_TXNW2781_I2C), SND_PCI_QUIRK(0x17aa, 0x3929, "Thinkbook 13x Gen 5", ALC287_FIXUP_MG_RTKC_CSAMP_CS35L41_I2C_THINKPAD), From 75dc1980cf48826287e43dc7a49e310c6691f97e Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 31 Mar 2026 10:12:17 +0200 Subject: [PATCH 338/401] ALSA: ctxfi: Don't enumerate SPDIF1 at DAIO initialization The recent refactoring of xfi driver changed the assignment of atc->daios[] at atc_get_resources(); now it loops over all enum DAIOTYP entries while it looped formerly only a part of them. The problem is that the last entry, SPDIF1, is a special type that is used only for hw20k1 CTSB073X model (as a replacement of SPDIFIO), and there is no corresponding definition for hw20k2. Due to the lack of the info, it caused a kernel crash on hw20k2, which was already worked around by the commit b045ab3dff97 ("ALSA: ctxfi: Fix missing SPDIFI1 index handling"). This patch addresses the root cause of the regression above properly, simply by skipping the incorrect SPDIF1 type in the parser loop. For making the change clearer, the code is slightly arranged, too. Fixes: a2dbaeb5c61e ("ALSA: ctxfi: Refactor resource alloc for sparse mappings") Cc: Link: https://bugzilla.suse.com/show_bug.cgi?id=1259925 Link: https://patch.msgid.link/20260331081227.216134-1-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/pci/ctxfi/ctatc.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/sound/pci/ctxfi/ctatc.c b/sound/pci/ctxfi/ctatc.c index f122e396bc55..da2667cb2489 100644 --- a/sound/pci/ctxfi/ctatc.c +++ b/sound/pci/ctxfi/ctatc.c @@ -1427,10 +1427,14 @@ static int atc_get_resources(struct ct_atc *atc) daio_mgr = (struct daio_mgr *)atc->rsc_mgrs[DAIO]; da_desc.msr = atc->msr; for (i = 0; i < NUM_DAIOTYP; i++) { - if (((i == MIC) && !cap.dedicated_mic) || ((i == RCA) && !cap.dedicated_rca)) + if (((i == MIC) && !cap.dedicated_mic) || + ((i == RCA) && !cap.dedicated_rca) || + i == SPDIFI1) continue; - da_desc.type = (atc->model != CTSB073X) ? i : - ((i == SPDIFIO) ? SPDIFI1 : i); + if (atc->model == CTSB073X && i == SPDIFIO) + da_desc.type = SPDIFI1; + else + da_desc.type = i; da_desc.output = (i < LINEIM) || (i == RCA); err = daio_mgr->get_daio(daio_mgr, &da_desc, (struct daio **)&atc->daios[i]); From 2884bf72fb8f03409e423397319205de48adca16 Mon Sep 17 00:00:00 2001 From: Xiang Mei Date: Thu, 26 Mar 2026 00:55:53 -0700 Subject: [PATCH 339/401] net: bonding: fix use-after-free in bond_xmit_broadcast() bond_xmit_broadcast() reuses the original skb for the last slave (determined by bond_is_last_slave()) and clones it for others. Concurrent slave enslave/release can mutate the slave list during RCU-protected iteration, changing which slave is "last" mid-loop. This causes the original skb to be double-consumed (double-freed). Replace the racy bond_is_last_slave() check with a simple index comparison (i + 1 == slaves_count) against the pre-snapshot slave count taken via READ_ONCE() before the loop. This preserves the zero-copy optimization for the last slave while making the "last" determination stable against concurrent list mutations. The UAF can trigger the following crash: ================================================================== BUG: KASAN: slab-use-after-free in skb_clone Read of size 8 at addr ffff888100ef8d40 by task exploit/147 CPU: 1 UID: 0 PID: 147 Comm: exploit Not tainted 7.0.0-rc3+ #4 PREEMPTLAZY Call Trace: dump_stack_lvl (lib/dump_stack.c:123) print_report (mm/kasan/report.c:379 mm/kasan/report.c:482) kasan_report (mm/kasan/report.c:597) skb_clone (include/linux/skbuff.h:1724 include/linux/skbuff.h:1792 include/linux/skbuff.h:3396 net/core/skbuff.c:2108) bond_xmit_broadcast (drivers/net/bonding/bond_main.c:5334) bond_start_xmit (drivers/net/bonding/bond_main.c:5567 drivers/net/bonding/bond_main.c:5593) dev_hard_start_xmit (include/linux/netdevice.h:5325 include/linux/netdevice.h:5334 net/core/dev.c:3871 net/core/dev.c:3887) __dev_queue_xmit (include/linux/netdevice.h:3601 net/core/dev.c:4838) ip6_finish_output2 (include/net/neighbour.h:540 include/net/neighbour.h:554 net/ipv6/ip6_output.c:136) ip6_finish_output (net/ipv6/ip6_output.c:208 net/ipv6/ip6_output.c:219) ip6_output (net/ipv6/ip6_output.c:250) ip6_send_skb (net/ipv6/ip6_output.c:1985) udp_v6_send_skb (net/ipv6/udp.c:1442) udpv6_sendmsg (net/ipv6/udp.c:1733) __sys_sendto (net/socket.c:730 net/socket.c:742 net/socket.c:2206) __x64_sys_sendto (net/socket.c:2209) do_syscall_64 (arch/x86/entry/syscall_64.c:63 arch/x86/entry/syscall_64.c:94) entry_SYSCALL_64_after_hwframe (arch/x86/entry/entry_64.S:130) Allocated by task 147: Freed by task 147: The buggy address belongs to the object at ffff888100ef8c80 which belongs to the cache skbuff_head_cache of size 224 The buggy address is located 192 bytes inside of freed 224-byte region [ffff888100ef8c80, ffff888100ef8d60) Memory state around the buggy address: ffff888100ef8c00: fb fb fb fb fc fc fc fc fc fc fc fc fc fc fc fc ffff888100ef8c80: fa fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb >ffff888100ef8d00: fb fb fb fb fb fb fb fb fb fb fb fb fc fc fc fc ^ ffff888100ef8d80: fc fc fc fc fc fc fc fc fa fb fb fb fb fb fb fb ffff888100ef8e00: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ================================================================== Fixes: 4e5bd03ae346 ("net: bonding: fix bond_xmit_broadcast return value error bug") Reported-by: Weiming Shi Signed-off-by: Xiang Mei Link: https://patch.msgid.link/20260326075553.3960562-1-xmei5@asu.edu Signed-off-by: Paolo Abeni --- drivers/net/bonding/bond_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 33f414d03ab9..a5484d11553d 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -5326,7 +5326,7 @@ static netdev_tx_t bond_xmit_broadcast(struct sk_buff *skb, if (!(bond_slave_is_up(slave) && slave->link == BOND_LINK_UP)) continue; - if (bond_is_last_slave(bond, slave)) { + if (i + 1 == slaves_count) { skb2 = skb; skb_used = true; } else { From 30fe3f5f6494f827d812ff179f295a8e532709d6 Mon Sep 17 00:00:00 2001 From: Pengpeng Hou Date: Thu, 26 Mar 2026 22:20:33 +0800 Subject: [PATCH 340/401] NFC: pn533: bound the UART receive buffer pn532_receive_buf() appends every incoming byte to dev->recv_skb and only resets the buffer after pn532_uart_rx_is_frame() recognizes a complete frame. A continuous stream of bytes without a valid PN532 frame header therefore keeps growing the skb until skb_put_u8() hits the tail limit. Drop the accumulated partial frame once the fixed receive buffer is full so malformed UART traffic cannot grow the skb past PN532_UART_SKB_BUFF_LEN. Fixes: c656aa4c27b1 ("nfc: pn533: add UART phy driver") Signed-off-by: Pengpeng Hou Link: https://patch.msgid.link/20260326142033.82297-1-pengpeng@iscas.ac.cn Signed-off-by: Paolo Abeni --- drivers/nfc/pn533/uart.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/nfc/pn533/uart.c b/drivers/nfc/pn533/uart.c index 6d2f520a5bc8..1b82b7b2a5fa 100644 --- a/drivers/nfc/pn533/uart.c +++ b/drivers/nfc/pn533/uart.c @@ -211,6 +211,9 @@ static size_t pn532_receive_buf(struct serdev_device *serdev, timer_delete(&dev->cmd_timeout); for (i = 0; i < count; i++) { + if (unlikely(!skb_tailroom(dev->recv_skb))) + skb_trim(dev->recv_skb, 0); + skb_put_u8(dev->recv_skb, *data++); if (!pn532_uart_rx_is_frame(dev->recv_skb)) continue; From 393e0b4f178ec7fce1141dacc3304e3607a92ee9 Mon Sep 17 00:00:00 2001 From: Suraj Gupta Date: Fri, 27 Mar 2026 13:02:37 +0530 Subject: [PATCH 341/401] net: xilinx: axienet: Correct BD length masks to match AXIDMA IP spec The XAXIDMA_BD_CTRL_LENGTH_MASK and XAXIDMA_BD_STS_ACTUAL_LEN_MASK macros were defined as 0x007FFFFF (23 bits), but the AXI DMA IP product guide (PG021) specifies the buffer length field as bits 25:0 (26 bits). Update both masks to match the IP documentation. In practice this had no functional impact, since Ethernet frames are far smaller than 2^23 bytes and the extra bits were always zero, but the masks should still reflect the hardware specification. Fixes: 8a3b7a252dca ("drivers/net/ethernet/xilinx: added Xilinx AXI Ethernet driver") Signed-off-by: Suraj Gupta Reviewed-by: Sean Anderson Link: https://patch.msgid.link/20260327073238.134948-2-suraj.gupta2@amd.com Signed-off-by: Paolo Abeni --- drivers/net/ethernet/xilinx/xilinx_axienet.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/xilinx/xilinx_axienet.h b/drivers/net/ethernet/xilinx/xilinx_axienet.h index 5ff742103beb..fcd3aaef27fc 100644 --- a/drivers/net/ethernet/xilinx/xilinx_axienet.h +++ b/drivers/net/ethernet/xilinx/xilinx_axienet.h @@ -105,7 +105,7 @@ #define XAXIDMA_BD_HAS_DRE_MASK 0xF00 /* Whether has DRE mask */ #define XAXIDMA_BD_WORDLEN_MASK 0xFF /* Whether has DRE mask */ -#define XAXIDMA_BD_CTRL_LENGTH_MASK 0x007FFFFF /* Requested len */ +#define XAXIDMA_BD_CTRL_LENGTH_MASK GENMASK(25, 0) /* Requested len */ #define XAXIDMA_BD_CTRL_TXSOF_MASK 0x08000000 /* First tx packet */ #define XAXIDMA_BD_CTRL_TXEOF_MASK 0x04000000 /* Last tx packet */ #define XAXIDMA_BD_CTRL_ALL_MASK 0x0C000000 /* All control bits */ @@ -130,7 +130,7 @@ #define XAXIDMA_BD_CTRL_TXEOF_MASK 0x04000000 /* Last tx packet */ #define XAXIDMA_BD_CTRL_ALL_MASK 0x0C000000 /* All control bits */ -#define XAXIDMA_BD_STS_ACTUAL_LEN_MASK 0x007FFFFF /* Actual len */ +#define XAXIDMA_BD_STS_ACTUAL_LEN_MASK GENMASK(25, 0) /* Actual len */ #define XAXIDMA_BD_STS_COMPLETE_MASK 0x80000000 /* Completed */ #define XAXIDMA_BD_STS_DEC_ERR_MASK 0x40000000 /* Decode error */ #define XAXIDMA_BD_STS_SLV_ERR_MASK 0x20000000 /* Slave error */ From d1978d03e86785872871bff9c2623174b10740de Mon Sep 17 00:00:00 2001 From: Suraj Gupta Date: Fri, 27 Mar 2026 13:02:38 +0530 Subject: [PATCH 342/401] net: xilinx: axienet: Fix BQL accounting for multi-BD TX packets When a TX packet spans multiple buffer descriptors (scatter-gather), axienet_free_tx_chain sums the per-BD actual length from descriptor status into a caller-provided accumulator. That sum is reset on each NAPI poll. If the BDs for a single packet complete across different polls, the earlier bytes are lost and never credited to BQL. This causes BQL to think bytes are permanently in-flight, eventually stalling the TX queue. The SKB pointer is stored only on the last BD of a packet. When that BD completes, use skb->len for the byte count instead of summing per-BD status lengths. This matches netdev_sent_queue(), which debits skb->len, and naturally survives across polls because no partial packet contributes to the accumulator. Fixes: c900e49d58eb ("net: xilinx: axienet: Implement BQL") Signed-off-by: Suraj Gupta Reviewed-by: Sean Anderson Link: https://patch.msgid.link/20260327073238.134948-3-suraj.gupta2@amd.com Signed-off-by: Paolo Abeni --- drivers/net/ethernet/xilinx/xilinx_axienet_main.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c index b06e4c37ff61..263c4b67fd5a 100644 --- a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c +++ b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c @@ -770,8 +770,8 @@ static int axienet_device_reset(struct net_device *ndev) * @first_bd: Index of first descriptor to clean up * @nr_bds: Max number of descriptors to clean up * @force: Whether to clean descriptors even if not complete - * @sizep: Pointer to a u32 filled with the total sum of all bytes - * in all cleaned-up descriptors. Ignored if NULL. + * @sizep: Pointer to a u32 accumulating the total byte count of + * completed packets (using skb->len). Ignored if NULL. * @budget: NAPI budget (use 0 when not called from NAPI poll) * * Would either be called after a successful transmit operation, or after @@ -805,6 +805,8 @@ static int axienet_free_tx_chain(struct axienet_local *lp, u32 first_bd, DMA_TO_DEVICE); if (cur_p->skb && (status & XAXIDMA_BD_STS_COMPLETE_MASK)) { + if (sizep) + *sizep += cur_p->skb->len; napi_consume_skb(cur_p->skb, budget); packets++; } @@ -818,9 +820,6 @@ static int axienet_free_tx_chain(struct axienet_local *lp, u32 first_bd, wmb(); cur_p->cntrl = 0; cur_p->status = 0; - - if (sizep) - *sizep += status & XAXIDMA_BD_STS_ACTUAL_LEN_MASK; } if (!force) { From 51e3eb3d074a8c7c306d447612011cf8568c8d6f Mon Sep 17 00:00:00 2001 From: Sachin Mokashi Date: Fri, 27 Mar 2026 09:14:39 -0400 Subject: [PATCH 343/401] ASoC: Intel: ehl_rt5660: Use the correct rtd->dev device in hw_params In rt5660_hw_params(), the error path for snd_soc_dai_set_sysclk() correctly uses rtd->dev as the logging device, but the error path for snd_soc_dai_set_pll() uses codec_dai->dev instead. These two devices are distinct: - rtd->dev is the platform device of the PCM runtime (the Intel HDA/SSP controller, e.g. 0000:00:1f.3), which owns the machine driver callback. - codec_dai->dev is the I2C device of the rt5660 codec itself (i2c-10EC5660:00). Since hw_params is a machine driver operation and both calls are made within the same function from the machine driver's context, all error messages should be attributed to rtd->dev. Using codec_dai->dev for one of them would suggest the error originates inside the codec driver, which is misleading. Align the PLL error log with the sysclk one to use rtd->dev, matching the convention used by all other Intel board drivers in this directory. Signed-off-by: Sachin Mokashi Link: https://patch.msgid.link/20260327131439.1330373-1-sachin.mokashi@intel.com Signed-off-by: Mark Brown --- sound/soc/intel/boards/ehl_rt5660.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/intel/boards/ehl_rt5660.c b/sound/soc/intel/boards/ehl_rt5660.c index 5c7b218f22b7..c40cd9fb1a26 100644 --- a/sound/soc/intel/boards/ehl_rt5660.c +++ b/sound/soc/intel/boards/ehl_rt5660.c @@ -127,7 +127,7 @@ static int rt5660_hw_params(struct snd_pcm_substream *substream, params_rate(params) * 50, params_rate(params) * 512); if (ret < 0) - dev_err(codec_dai->dev, "can't set codec pll: %d\n", ret); + dev_err(rtd->dev, "can't set codec pll: %d\n", ret); return ret; } From e920c36f2073d533bdf19ba6ab690432c8173b63 Mon Sep 17 00:00:00 2001 From: Julian Braha Date: Wed, 25 Mar 2026 00:15:21 +0000 Subject: [PATCH 344/401] ASoC: Intel: boards: fix unmet dependency on PINCTRL This reverts commit c073f0757663 ("ASoC: Intel: sof_sdw: select PINCTRL_CS42L43 and SPI_CS42L43") Currently, SND_SOC_INTEL_SOUNDWIRE_SOF_MACH selects PINCTRL_CS42L43 without also selecting or depending on PINCTRL, despite PINCTRL_CS42L43 depending on PINCTRL. See the following Kbuild warning: WARNING: unmet direct dependencies detected for PINCTRL_CS42L43 Depends on [n]: PINCTRL [=n] && MFD_CS42L43 [=m] Selected by [m]: - SND_SOC_INTEL_SOUNDWIRE_SOF_MACH [=m] && SOUND [=y] && SND [=m] && SND_SOC [=m] && SND_SOC_INTEL_MACH [=y] && (SND_SOC_SOF_INTEL_COMMON [=m] || !SND_SOC_SOF_INTEL_COMMON [=m]) && SND_SOC_SOF_INTEL_SOUNDWIRE [=m] && I2C [=y] && SPI_MASTER [=y] && ACPI [=y] && (MFD_INTEL_LPSS [=n] || COMPILE_TEST [=y]) && (SND_SOC_INTEL_USER_FRIENDLY_LONG_NAMES [=n] || COMPILE_TEST [=y]) && SOUNDWIRE [=m] In response to v1 of this patch [1], Arnd pointed out that there is no compile-time dependency sof_sdw and the PINCTRL_CS42L43 driver. After testing, I can confirm that the kernel compiled with SND_SOC_INTEL_SOUNDWIRE_SOF_MACH enabled and PINCTRL_CS42L43 disabled. This unmet dependency was detected by kconfirm, a static analysis tool for Kconfig. Link: https://lore.kernel.org/all/b8aecc71-1fed-4f52-9f6c-263fbe56d493@app.fastmail.com/ [1] Fixes: c073f0757663 ("ASoC: Intel: sof_sdw: select PINCTRL_CS42L43 and SPI_CS42L43") Signed-off-by: Julian Braha Acked-by: Arnd Bergmann Link: https://patch.msgid.link/20260325001522.1727678-1-julianbraha@gmail.com Signed-off-by: Mark Brown --- sound/soc/intel/boards/Kconfig | 2 -- 1 file changed, 2 deletions(-) diff --git a/sound/soc/intel/boards/Kconfig b/sound/soc/intel/boards/Kconfig index c5942b5655d3..d53af8f7e55b 100644 --- a/sound/soc/intel/boards/Kconfig +++ b/sound/soc/intel/boards/Kconfig @@ -530,8 +530,6 @@ config SND_SOC_INTEL_SOUNDWIRE_SOF_MACH select SND_SOC_CS42L43_SDW select MFD_CS42L43 select MFD_CS42L43_SDW - select PINCTRL_CS42L43 - select SPI_CS42L43 select SND_SOC_CS35L56_SPI select SND_SOC_CS35L56_SDW select SND_SOC_DMIC From fa6e24963342de4370e3a3c9af41e38277b74cf3 Mon Sep 17 00:00:00 2001 From: Xiang Mei Date: Fri, 27 Mar 2026 23:30:00 -0700 Subject: [PATCH 345/401] bridge: mrp: reject zero test interval to avoid OOM panic br_mrp_start_test() and br_mrp_start_in_test() accept the user-supplied interval value from netlink without validation. When interval is 0, usecs_to_jiffies(0) yields 0, causing the delayed work (br_mrp_test_work_expired / br_mrp_in_test_work_expired) to reschedule itself with zero delay. This creates a tight loop on system_percpu_wq that allocates and transmits MRP test frames at maximum rate, exhausting all system memory and causing a kernel panic via OOM deadlock. The same zero-interval issue applies to br_mrp_start_in_test_parse() for interconnect test frames. Use NLA_POLICY_MIN(NLA_U32, 1) in the nla_policy tables for both IFLA_BRIDGE_MRP_START_TEST_INTERVAL and IFLA_BRIDGE_MRP_START_IN_TEST_INTERVAL, so zero is rejected at the netlink attribute parsing layer before the value ever reaches the workqueue scheduling code. This is consistent with how other bridge subsystems (br_fdb, br_mst) enforce range constraints on netlink attributes. Fixes: 20f6a05ef635 ("bridge: mrp: Rework the MRP netlink interface") Fixes: 7ab1748e4ce6 ("bridge: mrp: Extend MRP netlink interface for configuring MRP interconnect") Reported-by: Weiming Shi Signed-off-by: Xiang Mei Acked-by: Nikolay Aleksandrov Reviewed-by: Ido Schimmel Link: https://patch.msgid.link/20260328063000.1845376-1-xmei5@asu.edu Signed-off-by: Paolo Abeni --- net/bridge/br_mrp_netlink.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/bridge/br_mrp_netlink.c b/net/bridge/br_mrp_netlink.c index ce6f63c77cc0..86f0e75d6e34 100644 --- a/net/bridge/br_mrp_netlink.c +++ b/net/bridge/br_mrp_netlink.c @@ -196,7 +196,7 @@ static const struct nla_policy br_mrp_start_test_policy[IFLA_BRIDGE_MRP_START_TEST_MAX + 1] = { [IFLA_BRIDGE_MRP_START_TEST_UNSPEC] = { .type = NLA_REJECT }, [IFLA_BRIDGE_MRP_START_TEST_RING_ID] = { .type = NLA_U32 }, - [IFLA_BRIDGE_MRP_START_TEST_INTERVAL] = { .type = NLA_U32 }, + [IFLA_BRIDGE_MRP_START_TEST_INTERVAL] = NLA_POLICY_MIN(NLA_U32, 1), [IFLA_BRIDGE_MRP_START_TEST_MAX_MISS] = { .type = NLA_U32 }, [IFLA_BRIDGE_MRP_START_TEST_PERIOD] = { .type = NLA_U32 }, [IFLA_BRIDGE_MRP_START_TEST_MONITOR] = { .type = NLA_U32 }, @@ -316,7 +316,7 @@ static const struct nla_policy br_mrp_start_in_test_policy[IFLA_BRIDGE_MRP_START_IN_TEST_MAX + 1] = { [IFLA_BRIDGE_MRP_START_IN_TEST_UNSPEC] = { .type = NLA_REJECT }, [IFLA_BRIDGE_MRP_START_IN_TEST_IN_ID] = { .type = NLA_U32 }, - [IFLA_BRIDGE_MRP_START_IN_TEST_INTERVAL] = { .type = NLA_U32 }, + [IFLA_BRIDGE_MRP_START_IN_TEST_INTERVAL] = NLA_POLICY_MIN(NLA_U32, 1), [IFLA_BRIDGE_MRP_START_IN_TEST_MAX_MISS] = { .type = NLA_U32 }, [IFLA_BRIDGE_MRP_START_IN_TEST_PERIOD] = { .type = NLA_U32 }, }; From 217d5bc9f96272316ac5a3215c7cc32a5127bbf3 Mon Sep 17 00:00:00 2001 From: Alexander Savenko Date: Tue, 31 Mar 2026 11:29:28 +0300 Subject: [PATCH 346/401] ALSA: hda/realtek: Add quirk for Lenovo Yoga Pro 7 14IMH9 The Lenovo Yoga Pro 7 14IMH9 (DMI: 83E2) shares PCI SSID 17aa:3847 with the Legion 7 16ACHG6, but has a different codec subsystem ID (17aa:38cf). The existing SND_PCI_QUIRK for 17aa:3847 applies ALC287_FIXUP_LEGION_16ACHG6, which attempts to initialize an external I2C amplifier (CLSA0100) that is not present on the Yoga Pro 7 14IMH9. As a result, pin 0x17 (bass speakers) is connected to DAC 0x06 which has no volume control, making hardware volume adjustment completely non-functional. Audio is either silent or at maximum volume regardless of the slider position. Add a HDA_CODEC_QUIRK entry using the codec subsystem ID (17aa:38cf) to correctly identify the Yoga Pro 7 14IMH9 and apply ALC287_FIXUP_YOGA9_14IMH9_BASS_SPK_PIN, which redirects pin 0x17 to DAC 0x02 and restores proper volume control. The existing Legion entry is preserved unchanged. This follows the same pattern used for 17aa:386e, where Legion Y9000X and Yoga Pro 7 14ARP8 share a PCI SSID but are distinguished via HDA_CODEC_QUIRK. Link: https://github.com/nomad4tech/lenovo-yoga-pro-7-linux Tested-by: Alexander Savenko Signed-off-by: Alexander Savenko Link: https://patch.msgid.link/20260331082929.44890-1-alex.sav4387@gmail.com Signed-off-by: Takashi Iwai --- sound/hda/codecs/realtek/alc269.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/sound/hda/codecs/realtek/alc269.c b/sound/hda/codecs/realtek/alc269.c index fa5f08cd2c65..c6f878678986 100644 --- a/sound/hda/codecs/realtek/alc269.c +++ b/sound/hda/codecs/realtek/alc269.c @@ -7614,6 +7614,10 @@ static const struct hda_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x17aa, 0x3834, "Lenovo IdeaPad Slim 9i 14ITL5", ALC287_FIXUP_YOGA7_14ITL_SPEAKERS), SND_PCI_QUIRK(0x17aa, 0x383d, "Legion Y9000X 2019", ALC285_FIXUP_LEGION_Y9000X_SPEAKERS), SND_PCI_QUIRK(0x17aa, 0x3843, "Lenovo Yoga 9i / Yoga Book 9i", ALC287_FIXUP_LENOVO_YOGA_BOOK_9I), + /* Yoga Pro 7 14IMH9 shares PCI SSID 17aa:3847 with Legion 7 16ACHG6; + * use codec SSID to distinguish them + */ + HDA_CODEC_QUIRK(0x17aa, 0x38cf, "Lenovo Yoga Pro 7 14IMH9", ALC287_FIXUP_YOGA9_14IMH9_BASS_SPK_PIN), SND_PCI_QUIRK(0x17aa, 0x3847, "Legion 7 16ACHG6", ALC287_FIXUP_LEGION_16ACHG6), SND_PCI_QUIRK(0x17aa, 0x384a, "Lenovo Yoga 7 15ITL5", ALC287_FIXUP_YOGA7_14ITL_SPEAKERS), SND_PCI_QUIRK(0x17aa, 0x3852, "Lenovo Yoga 7 14ITL5", ALC287_FIXUP_YOGA7_14ITL_SPEAKERS), From dd9b99b822684f421f9b7e1e5a69d791ffc1d48f Mon Sep 17 00:00:00 2001 From: Zhang Heng Date: Tue, 31 Mar 2026 17:46:14 +0800 Subject: [PATCH 347/401] ALSA: hda/realtek: add quirk for Acer Swift SFG14-73 fix mute/micmute LEDs and headset microphone for Acer Swift SFG14-73. Link: https://bugzilla.kernel.org/show_bug.cgi?id=220279 Cc: stable@vger.kernel.org Signed-off-by: Zhang Heng Link: https://patch.msgid.link/20260331094614.186063-1-zhangheng@kylinos.cn Signed-off-by: Takashi Iwai --- sound/hda/codecs/realtek/alc269.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/hda/codecs/realtek/alc269.c b/sound/hda/codecs/realtek/alc269.c index c6f878678986..6db23ce8a5fe 100644 --- a/sound/hda/codecs/realtek/alc269.c +++ b/sound/hda/codecs/realtek/alc269.c @@ -6720,6 +6720,7 @@ static const struct hda_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1025, 0x1597, "Acer Nitro 5 AN517-55", ALC2XX_FIXUP_HEADSET_MIC), SND_PCI_QUIRK(0x1025, 0x169a, "Acer Swift SFG16", ALC256_FIXUP_ACER_SFG16_MICMUTE_LED), SND_PCI_QUIRK(0x1025, 0x171e, "Acer Nitro ANV15-51", ALC245_FIXUP_ACER_MICMUTE_LED), + SND_PCI_QUIRK(0x1025, 0x173a, "Acer Swift SFG14-73", ALC245_FIXUP_ACER_MICMUTE_LED), SND_PCI_QUIRK(0x1025, 0x1826, "Acer Helios ZPC", ALC287_FIXUP_PREDATOR_SPK_CS35L41_I2C_2), SND_PCI_QUIRK(0x1025, 0x182c, "Acer Helios ZPD", ALC287_FIXUP_PREDATOR_SPK_CS35L41_I2C_2), SND_PCI_QUIRK(0x1025, 0x1844, "Acer Helios ZPS", ALC287_FIXUP_PREDATOR_SPK_CS35L41_I2C_2), From bbe5ab8191a33572c11be8628c55b79246307125 Mon Sep 17 00:00:00 2001 From: Waiman Long Date: Tue, 31 Mar 2026 11:11:07 -0400 Subject: [PATCH 348/401] cgroup/cpuset: Simplify setsched decision check in task iteration loop of cpuset_can_attach() Centralize the check required to run security_task_setscheduler() in the task iteration loop of cpuset_can_attach() outside of the loop as it has no dependency on the characteristics of the tasks themselves. There is no functional change. Signed-off-by: Waiman Long Signed-off-by: Tejun Heo --- kernel/cgroup/cpuset.c | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c index d21868455341..58c5b7b72cca 100644 --- a/kernel/cgroup/cpuset.c +++ b/kernel/cgroup/cpuset.c @@ -2988,7 +2988,7 @@ static int cpuset_can_attach(struct cgroup_taskset *tset) struct cgroup_subsys_state *css; struct cpuset *cs, *oldcs; struct task_struct *task; - bool cpus_updated, mems_updated; + bool setsched_check; int ret; /* used later by cpuset_attach() */ @@ -3003,20 +3003,21 @@ static int cpuset_can_attach(struct cgroup_taskset *tset) if (ret) goto out_unlock; - cpus_updated = !cpumask_equal(cs->effective_cpus, oldcs->effective_cpus); - mems_updated = !nodes_equal(cs->effective_mems, oldcs->effective_mems); + /* + * Skip rights over task setsched check in v2 when nothing changes, + * migration permission derives from hierarchy ownership in + * cgroup_procs_write_permission()). + */ + setsched_check = !cpuset_v2() || + !cpumask_equal(cs->effective_cpus, oldcs->effective_cpus) || + !nodes_equal(cs->effective_mems, oldcs->effective_mems); cgroup_taskset_for_each(task, css, tset) { ret = task_can_attach(task); if (ret) goto out_unlock; - /* - * Skip rights over task check in v2 when nothing changes, - * migration permission derives from hierarchy ownership in - * cgroup_procs_write_permission()). - */ - if (!cpuset_v2() || (cpus_updated || mems_updated)) { + if (setsched_check) { ret = security_task_setscheduler(task); if (ret) goto out_unlock; From 089f3fcd690c71cb3d8ca09f34027764e28920a0 Mon Sep 17 00:00:00 2001 From: Waiman Long Date: Tue, 31 Mar 2026 11:11:08 -0400 Subject: [PATCH 349/401] cgroup/cpuset: Skip security check for hotplug induced v1 task migration When a CPU hot removal causes a v1 cpuset to lose all its CPUs, the cpuset hotplug handler will schedule a work function to migrate tasks in that cpuset with no CPU to its ancestor to enable those tasks to continue running. If a strict security policy is in place, however, the task migration may fail when security_task_setscheduler() call in cpuset_can_attach() returns a -EACCES error. That will mean that those tasks will have no CPU to run on. The system administrators will have to explicitly intervene to either add CPUs to that cpuset or move the tasks elsewhere if they are aware of it. This problem was found by a reported test failure in the LTP's cpuset_hotplug_test.sh. Fix this problem by treating this special case as an exception to skip the setsched security check in cpuset_can_attach() when a v1 cpuset with tasks have no CPU left. With that patch applied, the cpuset_hotplug_test.sh test can be run successfully without failure. Signed-off-by: Waiman Long Signed-off-by: Tejun Heo --- kernel/cgroup/cpuset.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c index 58c5b7b72cca..1335e437098e 100644 --- a/kernel/cgroup/cpuset.c +++ b/kernel/cgroup/cpuset.c @@ -3012,6 +3012,16 @@ static int cpuset_can_attach(struct cgroup_taskset *tset) !cpumask_equal(cs->effective_cpus, oldcs->effective_cpus) || !nodes_equal(cs->effective_mems, oldcs->effective_mems); + /* + * A v1 cpuset with tasks will have no CPU left only when CPU hotplug + * brings the last online CPU offline as users are not allowed to empty + * cpuset.cpus when there are active tasks inside. When that happens, + * we should allow tasks to migrate out without security check to make + * sure they will be able to run after migration. + */ + if (!is_in_v2_mode() && cpumask_empty(oldcs->effective_cpus)) + setsched_check = false; + cgroup_taskset_for_each(task, css, tset) { ret = task_can_attach(task); if (ret) From 9ca562bb8e66978b53028fa32b1a190708e6a091 Mon Sep 17 00:00:00 2001 From: Zhengchuan Liang Date: Mon, 30 Mar 2026 16:46:24 +0800 Subject: [PATCH 350/401] net: ipv6: flowlabel: defer exclusive option free until RCU teardown `ip6fl_seq_show()` walks the global flowlabel hash under the seq-file RCU read-side lock and prints `fl->opt->opt_nflen` when an option block is present. Exclusive flowlabels currently free `fl->opt` as soon as `fl->users` drops to zero in `fl_release()`. However, the surrounding `struct ip6_flowlabel` remains visible in the global hash table until later garbage collection removes it and `fl_free_rcu()` finally tears it down. A concurrent `/proc/net/ip6_flowlabel` reader can therefore race that early `kfree()` and dereference freed option state, triggering a crash in `ip6fl_seq_show()`. Fix this by keeping `fl->opt` alive until `fl_free_rcu()`. That matches the lifetime already required for the enclosing flowlabel while readers can still reach it under RCU. Fixes: d3aedd5ebd4b ("ipv6 flowlabel: Convert hash list to RCU.") Reported-by: Yifan Wu Reported-by: Juefei Pu Co-developed-by: Yuan Tan Signed-off-by: Yuan Tan Suggested-by: Xin Liu Tested-by: Ren Wei Signed-off-by: Zhengchuan Liang Signed-off-by: Ren Wei Reviewed-by: Eric Dumazet Link: https://patch.msgid.link/07351f0ec47bcee289576f39f9354f4a64add6e4.1774855883.git.zcliangcn@gmail.com Signed-off-by: Jakub Kicinski --- net/ipv6/ip6_flowlabel.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/net/ipv6/ip6_flowlabel.c b/net/ipv6/ip6_flowlabel.c index 7c12bf75beed..c92f98c6f6ec 100644 --- a/net/ipv6/ip6_flowlabel.c +++ b/net/ipv6/ip6_flowlabel.c @@ -133,11 +133,6 @@ static void fl_release(struct ip6_flowlabel *fl) if (time_after(ttd, fl->expires)) fl->expires = ttd; ttd = fl->expires; - if (fl->opt && fl->share == IPV6_FL_S_EXCL) { - struct ipv6_txoptions *opt = fl->opt; - fl->opt = NULL; - kfree(opt); - } if (!timer_pending(&ip6_fl_gc_timer) || time_after(ip6_fl_gc_timer.expires, ttd)) mod_timer(&ip6_fl_gc_timer, ttd); From 5dd8025a49c268ab6b94d978532af3ad341132a7 Mon Sep 17 00:00:00 2001 From: Li Xiasong Date: Mon, 30 Mar 2026 20:03:35 +0800 Subject: [PATCH 351/401] mptcp: fix soft lockup in mptcp_recvmsg() syzbot reported a soft lockup in mptcp_recvmsg() [0]. When receiving data with MSG_PEEK | MSG_WAITALL flags, the skb is not removed from the sk_receive_queue. This causes sk_wait_data() to always find available data and never perform actual waiting, leading to a soft lockup. Fix this by adding a 'last' parameter to track the last peeked skb. This allows sk_wait_data() to make informed waiting decisions and prevent infinite loops when MSG_PEEK is used. [0]: watchdog: BUG: soft lockup - CPU#2 stuck for 156s! [server:1963] Modules linked in: CPU: 2 UID: 0 PID: 1963 Comm: server Not tainted 6.19.0-rc8 #61 PREEMPT(none) Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.15.0-1 04/01/2014 RIP: 0010:sk_wait_data+0x15/0x190 Code: 80 00 00 00 00 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 f3 0f 1e fa 41 56 41 55 41 54 49 89 f4 55 48 89 d5 53 48 89 fb <48> 83 ec 30 65 48 8b 05 17 a4 6b 01 48 89 44 24 28 31 c0 65 48 8b RSP: 0018:ffffc90000603ca0 EFLAGS: 00000246 RAX: 0000000000000000 RBX: ffff888102bf0800 RCX: 0000000000000001 RDX: 0000000000000000 RSI: ffffc90000603d18 RDI: ffff888102bf0800 RBP: 0000000000000000 R08: 0000000000000002 R09: 0000000000000101 R10: 0000000000000000 R11: 0000000000000075 R12: ffffc90000603d18 R13: ffff888102bf0800 R14: ffff888102bf0800 R15: 0000000000000000 FS: 00007f6e38b8c4c0(0000) GS:ffff8881b877e000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 000055aa7bff1680 CR3: 0000000105cbe000 CR4: 00000000000006f0 Call Trace: mptcp_recvmsg+0x547/0x8c0 net/mptcp/protocol.c:2329 inet_recvmsg+0x11f/0x130 net/ipv4/af_inet.c:891 sock_recvmsg+0x94/0xc0 net/socket.c:1100 __sys_recvfrom+0xb2/0x130 net/socket.c:2256 __x64_sys_recvfrom+0x1f/0x30 net/socket.c:2267 do_syscall_64+0x59/0x2d0 arch/x86/entry/syscall_64.c:94 entry_SYSCALL_64_after_hwframe+0x76/0x7e arch/x86/entry/entry_64.S:131 RIP: 0033:0x7f6e386a4a1d Code: 0f 1f 84 00 00 00 00 00 0f 1f 44 00 00 48 8d 05 f1 de 2c 00 41 89 ca 8b 00 85 c0 75 20 45 31 c9 45 31 c0 b8 2d 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 6b f3 c3 66 0f 1f 84 00 00 00 00 00 41 56 41 RSP: 002b:00007ffc3c4bb078 EFLAGS: 00000246 ORIG_RAX: 000000000000002d RAX: ffffffffffffffda RBX: 000000000000861e RCX: 00007f6e386a4a1d RDX: 00000000000003ff RSI: 00007ffc3c4bb150 RDI: 0000000000000004 RBP: 00007ffc3c4bb570 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000103 R11: 0000000000000246 R12: 00005605dbc00be0 R13: 00007ffc3c4bb650 R14: 0000000000000000 R15: 0000000000000000 Fixes: 8e04ce45a8db ("mptcp: fix MSG_PEEK stream corruption") Signed-off-by: Li Xiasong Reviewed-by: Matthieu Baerts (NGI0) Link: https://patch.msgid.link/20260330120335.659027-1-lixiasong1@huawei.com Signed-off-by: Jakub Kicinski --- net/mptcp/protocol.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index cf1852b99963..65c3bb8016f4 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -2006,7 +2006,7 @@ static void mptcp_eat_recv_skb(struct sock *sk, struct sk_buff *skb) static int __mptcp_recvmsg_mskq(struct sock *sk, struct msghdr *msg, size_t len, int flags, int copied_total, struct scm_timestamping_internal *tss, - int *cmsg_flags) + int *cmsg_flags, struct sk_buff **last) { struct mptcp_sock *msk = mptcp_sk(sk); struct sk_buff *skb, *tmp; @@ -2023,6 +2023,7 @@ static int __mptcp_recvmsg_mskq(struct sock *sk, struct msghdr *msg, /* skip already peeked skbs */ if (total_data_len + data_len <= copied_total) { total_data_len += data_len; + *last = skb; continue; } @@ -2058,6 +2059,8 @@ static int __mptcp_recvmsg_mskq(struct sock *sk, struct msghdr *msg, } mptcp_eat_recv_skb(sk, skb); + } else { + *last = skb; } if (copied >= len) @@ -2288,10 +2291,12 @@ static int mptcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, cmsg_flags = MPTCP_CMSG_INQ; while (copied < len) { + struct sk_buff *last = NULL; int err, bytes_read; bytes_read = __mptcp_recvmsg_mskq(sk, msg, len - copied, flags, - copied, &tss, &cmsg_flags); + copied, &tss, &cmsg_flags, + &last); if (unlikely(bytes_read < 0)) { if (!copied) copied = bytes_read; @@ -2343,7 +2348,7 @@ static int mptcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, pr_debug("block timeout %ld\n", timeo); mptcp_cleanup_rbuf(msk, copied); - err = sk_wait_data(sk, &timeo, NULL); + err = sk_wait_data(sk, &timeo, last); if (err < 0) { err = copied ? : err; goto out_err; From c0fd0fe745f5e8c568d898cd1513d0083e46204a Mon Sep 17 00:00:00 2001 From: Yufan Chen Date: Sun, 29 Mar 2026 00:32:57 +0800 Subject: [PATCH 352/401] net: ftgmac100: fix ring allocation unwind on open failure ftgmac100_alloc_rings() allocates rx_skbs, tx_skbs, rxdes, txdes, and rx_scratch in stages. On intermediate failures it returned -ENOMEM directly, leaking resources allocated earlier in the function. Rework the failure path to use staged local unwind labels and free allocated resources in reverse order before returning -ENOMEM. This matches common netdev allocation cleanup style. Fixes: d72e01a0430f ("ftgmac100: Use a scratch buffer for failed RX allocations") Cc: stable@vger.kernel.org Signed-off-by: Yufan Chen Link: https://patch.msgid.link/20260328163257.60836-1-yufan.chen@linux.dev Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/faraday/ftgmac100.c | 28 ++++++++++++++++++++---- 1 file changed, 24 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/faraday/ftgmac100.c b/drivers/net/ethernet/faraday/ftgmac100.c index 1e91e79c8134..6d2fe5c2f390 100644 --- a/drivers/net/ethernet/faraday/ftgmac100.c +++ b/drivers/net/ethernet/faraday/ftgmac100.c @@ -977,19 +977,19 @@ static int ftgmac100_alloc_rings(struct ftgmac100 *priv) priv->tx_skbs = kcalloc(MAX_TX_QUEUE_ENTRIES, sizeof(void *), GFP_KERNEL); if (!priv->tx_skbs) - return -ENOMEM; + goto err_free_rx_skbs; /* Allocate descriptors */ priv->rxdes = dma_alloc_coherent(priv->dev, MAX_RX_QUEUE_ENTRIES * sizeof(struct ftgmac100_rxdes), &priv->rxdes_dma, GFP_KERNEL); if (!priv->rxdes) - return -ENOMEM; + goto err_free_tx_skbs; priv->txdes = dma_alloc_coherent(priv->dev, MAX_TX_QUEUE_ENTRIES * sizeof(struct ftgmac100_txdes), &priv->txdes_dma, GFP_KERNEL); if (!priv->txdes) - return -ENOMEM; + goto err_free_rxdes; /* Allocate scratch packet buffer */ priv->rx_scratch = dma_alloc_coherent(priv->dev, @@ -997,9 +997,29 @@ static int ftgmac100_alloc_rings(struct ftgmac100 *priv) &priv->rx_scratch_dma, GFP_KERNEL); if (!priv->rx_scratch) - return -ENOMEM; + goto err_free_txdes; return 0; + +err_free_txdes: + dma_free_coherent(priv->dev, + MAX_TX_QUEUE_ENTRIES * + sizeof(struct ftgmac100_txdes), + priv->txdes, priv->txdes_dma); + priv->txdes = NULL; +err_free_rxdes: + dma_free_coherent(priv->dev, + MAX_RX_QUEUE_ENTRIES * + sizeof(struct ftgmac100_rxdes), + priv->rxdes, priv->rxdes_dma); + priv->rxdes = NULL; +err_free_tx_skbs: + kfree(priv->tx_skbs); + priv->tx_skbs = NULL; +err_free_rx_skbs: + kfree(priv->rx_skbs); + priv->rx_skbs = NULL; + return -ENOMEM; } static void ftgmac100_init_rings(struct ftgmac100 *priv) From 48b3cd69265f346f64b93064723492da46206e9b Mon Sep 17 00:00:00 2001 From: Michal Piekos Date: Sat, 28 Mar 2026 09:55:51 +0100 Subject: [PATCH 353/401] net: stmmac: skip VLAN restore when VLAN hash ops are missing stmmac_vlan_restore() unconditionally calls stmmac_vlan_update() when NETIF_F_VLAN_FEATURES is set. On platforms where priv->hw->vlan (or ->update_vlan_hash) is not provided, stmmac_update_vlan_hash() returns -EINVAL via stmmac_do_void_callback(), resulting in a spurious "Failed to restore VLANs" error even when no VLAN filtering is in use. Remove not needed comment. Remove not used return value from stmmac_vlan_restore(). Tested on Orange Pi Zero 3. Fixes: bd7ad51253a7 ("net: stmmac: Fix VLAN HW state restore") Signed-off-by: Michal Piekos Link: https://patch.msgid.link/20260328-vlan-restore-error-v4-1-f88624c530dc@mmpsystems.pl Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index 6827c99bde8c..13d3cac056be 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -156,7 +156,7 @@ static void stmmac_tx_timer_arm(struct stmmac_priv *priv, u32 queue); static void stmmac_flush_tx_descriptors(struct stmmac_priv *priv, int queue); static void stmmac_set_dma_operation_mode(struct stmmac_priv *priv, u32 txmode, u32 rxmode, u32 chan); -static int stmmac_vlan_restore(struct stmmac_priv *priv); +static void stmmac_vlan_restore(struct stmmac_priv *priv); #ifdef CONFIG_DEBUG_FS static const struct net_device_ops stmmac_netdev_ops; @@ -6859,21 +6859,15 @@ static int stmmac_vlan_rx_kill_vid(struct net_device *ndev, __be16 proto, u16 vi return ret; } -static int stmmac_vlan_restore(struct stmmac_priv *priv) +static void stmmac_vlan_restore(struct stmmac_priv *priv) { - int ret; - if (!(priv->dev->features & NETIF_F_VLAN_FEATURES)) - return 0; + return; if (priv->hw->num_vlan) stmmac_restore_hw_vlan_rx_fltr(priv, priv->dev, priv->hw); - ret = stmmac_vlan_update(priv, priv->num_double_vlans); - if (ret) - netdev_err(priv->dev, "Failed to restore VLANs\n"); - - return ret; + stmmac_vlan_update(priv, priv->num_double_vlans); } static int stmmac_bpf(struct net_device *dev, struct netdev_bpf *bpf) From a0dafdbd1049a8ea661a1a471be1b840bd8aed13 Mon Sep 17 00:00:00 2001 From: "Geoffrey D. Bennett" Date: Wed, 1 Apr 2026 16:01:27 +1030 Subject: [PATCH 354/401] ALSA: usb-audio: Exclude Scarlett 2i2 1st Gen (8016) from SKIP_IFACE_SETUP Same issue as the other 1st Gen Scarletts: QUIRK_FLAG_SKIP_IFACE_SETUP causes distorted audio on this revision of the Scarlett 2i2 1st Gen (1235:8016). Fixes: 38c322068a26 ("ALSA: usb-audio: Add QUIRK_FLAG_SKIP_IFACE_SETUP") Reported-by: lukas-reineke [https://github.com/geoffreybennett/linux-fcp/issues/54] Signed-off-by: Geoffrey D. Bennett Link: https://patch.msgid.link/acytr8aEUba4VXmZ@m.b4.vu Signed-off-by: Takashi Iwai --- sound/usb/quirks.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/usb/quirks.c b/sound/usb/quirks.c index 116da076a194..4cfa24c06fcd 100644 --- a/sound/usb/quirks.c +++ b/sound/usb/quirks.c @@ -2435,6 +2435,7 @@ static const struct usb_audio_quirk_flags_table quirk_flags_table[] = { QUIRK_FLAG_VALIDATE_RATES), DEVICE_FLG(0x1235, 0x8006, 0), /* Focusrite Scarlett 2i2 1st Gen */ DEVICE_FLG(0x1235, 0x800a, 0), /* Focusrite Scarlett 2i4 1st Gen */ + DEVICE_FLG(0x1235, 0x8016, 0), /* Focusrite Scarlett 2i2 1st Gen */ DEVICE_FLG(0x1235, 0x801c, 0), /* Focusrite Scarlett Solo 1st Gen */ VENDOR_FLG(0x1235, /* Focusrite Novation */ QUIRK_FLAG_SKIP_CLOCK_SELECTOR | From 76522fcdbc3a02b568f5d957f7e66fc194abb893 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Thu, 26 Mar 2026 00:17:09 +0100 Subject: [PATCH 355/401] netfilter: flowtable: strictly check for maximum number of actions The maximum number of flowtable hardware offload actions in IPv6 is: * ethernet mangling (4 payload actions, 2 for each ethernet address) * SNAT (4 payload actions) * DNAT (4 payload actions) * Double VLAN (4 vlan actions, 2 for popping vlan, and 2 for pushing) for QinQ. * Redirect (1 action) Which makes 17, while the maximum is 16. But act_ct supports for tunnels actions too. Note that payload action operates at 32-bit word level, so mangling an IPv6 address takes 4 payload actions. Update flow_action_entry_next() calls to check for the maximum number of supported actions. While at it, rise the maximum number of actions per flow from 16 to 24 so this works fine with IPv6 setups. Fixes: c29f74e0df7a ("netfilter: nf_flow_table: hardware offload support") Reported-by: Hyunwoo Kim Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_flow_table_offload.c | 196 +++++++++++++++++--------- 1 file changed, 130 insertions(+), 66 deletions(-) diff --git a/net/netfilter/nf_flow_table_offload.c b/net/netfilter/nf_flow_table_offload.c index 9b677e116487..93d0aa7f8fcc 100644 --- a/net/netfilter/nf_flow_table_offload.c +++ b/net/netfilter/nf_flow_table_offload.c @@ -14,6 +14,8 @@ #include #include +#define NF_FLOW_RULE_ACTION_MAX 24 + static struct workqueue_struct *nf_flow_offload_add_wq; static struct workqueue_struct *nf_flow_offload_del_wq; static struct workqueue_struct *nf_flow_offload_stats_wq; @@ -216,7 +218,12 @@ static void flow_offload_mangle(struct flow_action_entry *entry, static inline struct flow_action_entry * flow_action_entry_next(struct nf_flow_rule *flow_rule) { - int i = flow_rule->rule->action.num_entries++; + int i; + + if (unlikely(flow_rule->rule->action.num_entries >= NF_FLOW_RULE_ACTION_MAX)) + return NULL; + + i = flow_rule->rule->action.num_entries++; return &flow_rule->rule->action.entries[i]; } @@ -234,6 +241,9 @@ static int flow_offload_eth_src(struct net *net, u32 mask, val; u16 val16; + if (!entry0 || !entry1) + return -E2BIG; + this_tuple = &flow->tuplehash[dir].tuple; switch (this_tuple->xmit_type) { @@ -284,6 +294,9 @@ static int flow_offload_eth_dst(struct net *net, u8 nud_state; u16 val16; + if (!entry0 || !entry1) + return -E2BIG; + this_tuple = &flow->tuplehash[dir].tuple; switch (this_tuple->xmit_type) { @@ -325,16 +338,19 @@ static int flow_offload_eth_dst(struct net *net, return 0; } -static void flow_offload_ipv4_snat(struct net *net, - const struct flow_offload *flow, - enum flow_offload_tuple_dir dir, - struct nf_flow_rule *flow_rule) +static int flow_offload_ipv4_snat(struct net *net, + const struct flow_offload *flow, + enum flow_offload_tuple_dir dir, + struct nf_flow_rule *flow_rule) { struct flow_action_entry *entry = flow_action_entry_next(flow_rule); u32 mask = ~htonl(0xffffffff); __be32 addr; u32 offset; + if (!entry) + return -E2BIG; + switch (dir) { case FLOW_OFFLOAD_DIR_ORIGINAL: addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_v4.s_addr; @@ -345,23 +361,27 @@ static void flow_offload_ipv4_snat(struct net *net, offset = offsetof(struct iphdr, daddr); break; default: - return; + return -EOPNOTSUPP; } flow_offload_mangle(entry, FLOW_ACT_MANGLE_HDR_TYPE_IP4, offset, &addr, &mask); + return 0; } -static void flow_offload_ipv4_dnat(struct net *net, - const struct flow_offload *flow, - enum flow_offload_tuple_dir dir, - struct nf_flow_rule *flow_rule) +static int flow_offload_ipv4_dnat(struct net *net, + const struct flow_offload *flow, + enum flow_offload_tuple_dir dir, + struct nf_flow_rule *flow_rule) { struct flow_action_entry *entry = flow_action_entry_next(flow_rule); u32 mask = ~htonl(0xffffffff); __be32 addr; u32 offset; + if (!entry) + return -E2BIG; + switch (dir) { case FLOW_OFFLOAD_DIR_ORIGINAL: addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.src_v4.s_addr; @@ -372,14 +392,15 @@ static void flow_offload_ipv4_dnat(struct net *net, offset = offsetof(struct iphdr, saddr); break; default: - return; + return -EOPNOTSUPP; } flow_offload_mangle(entry, FLOW_ACT_MANGLE_HDR_TYPE_IP4, offset, &addr, &mask); + return 0; } -static void flow_offload_ipv6_mangle(struct nf_flow_rule *flow_rule, +static int flow_offload_ipv6_mangle(struct nf_flow_rule *flow_rule, unsigned int offset, const __be32 *addr, const __be32 *mask) { @@ -388,15 +409,20 @@ static void flow_offload_ipv6_mangle(struct nf_flow_rule *flow_rule, for (i = 0; i < sizeof(struct in6_addr) / sizeof(u32); i++) { entry = flow_action_entry_next(flow_rule); + if (!entry) + return -E2BIG; + flow_offload_mangle(entry, FLOW_ACT_MANGLE_HDR_TYPE_IP6, offset + i * sizeof(u32), &addr[i], mask); } + + return 0; } -static void flow_offload_ipv6_snat(struct net *net, - const struct flow_offload *flow, - enum flow_offload_tuple_dir dir, - struct nf_flow_rule *flow_rule) +static int flow_offload_ipv6_snat(struct net *net, + const struct flow_offload *flow, + enum flow_offload_tuple_dir dir, + struct nf_flow_rule *flow_rule) { u32 mask = ~htonl(0xffffffff); const __be32 *addr; @@ -412,16 +438,16 @@ static void flow_offload_ipv6_snat(struct net *net, offset = offsetof(struct ipv6hdr, daddr); break; default: - return; + return -EOPNOTSUPP; } - flow_offload_ipv6_mangle(flow_rule, offset, addr, &mask); + return flow_offload_ipv6_mangle(flow_rule, offset, addr, &mask); } -static void flow_offload_ipv6_dnat(struct net *net, - const struct flow_offload *flow, - enum flow_offload_tuple_dir dir, - struct nf_flow_rule *flow_rule) +static int flow_offload_ipv6_dnat(struct net *net, + const struct flow_offload *flow, + enum flow_offload_tuple_dir dir, + struct nf_flow_rule *flow_rule) { u32 mask = ~htonl(0xffffffff); const __be32 *addr; @@ -437,10 +463,10 @@ static void flow_offload_ipv6_dnat(struct net *net, offset = offsetof(struct ipv6hdr, saddr); break; default: - return; + return -EOPNOTSUPP; } - flow_offload_ipv6_mangle(flow_rule, offset, addr, &mask); + return flow_offload_ipv6_mangle(flow_rule, offset, addr, &mask); } static int flow_offload_l4proto(const struct flow_offload *flow) @@ -462,15 +488,18 @@ static int flow_offload_l4proto(const struct flow_offload *flow) return type; } -static void flow_offload_port_snat(struct net *net, - const struct flow_offload *flow, - enum flow_offload_tuple_dir dir, - struct nf_flow_rule *flow_rule) +static int flow_offload_port_snat(struct net *net, + const struct flow_offload *flow, + enum flow_offload_tuple_dir dir, + struct nf_flow_rule *flow_rule) { struct flow_action_entry *entry = flow_action_entry_next(flow_rule); u32 mask, port; u32 offset; + if (!entry) + return -E2BIG; + switch (dir) { case FLOW_OFFLOAD_DIR_ORIGINAL: port = ntohs(flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_port); @@ -485,22 +514,26 @@ static void flow_offload_port_snat(struct net *net, mask = ~htonl(0xffff); break; default: - return; + return -EOPNOTSUPP; } flow_offload_mangle(entry, flow_offload_l4proto(flow), offset, &port, &mask); + return 0; } -static void flow_offload_port_dnat(struct net *net, - const struct flow_offload *flow, - enum flow_offload_tuple_dir dir, - struct nf_flow_rule *flow_rule) +static int flow_offload_port_dnat(struct net *net, + const struct flow_offload *flow, + enum flow_offload_tuple_dir dir, + struct nf_flow_rule *flow_rule) { struct flow_action_entry *entry = flow_action_entry_next(flow_rule); u32 mask, port; u32 offset; + if (!entry) + return -E2BIG; + switch (dir) { case FLOW_OFFLOAD_DIR_ORIGINAL: port = ntohs(flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.src_port); @@ -515,20 +548,24 @@ static void flow_offload_port_dnat(struct net *net, mask = ~htonl(0xffff0000); break; default: - return; + return -EOPNOTSUPP; } flow_offload_mangle(entry, flow_offload_l4proto(flow), offset, &port, &mask); + return 0; } -static void flow_offload_ipv4_checksum(struct net *net, - const struct flow_offload *flow, - struct nf_flow_rule *flow_rule) +static int flow_offload_ipv4_checksum(struct net *net, + const struct flow_offload *flow, + struct nf_flow_rule *flow_rule) { u8 protonum = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.l4proto; struct flow_action_entry *entry = flow_action_entry_next(flow_rule); + if (!entry) + return -E2BIG; + entry->id = FLOW_ACTION_CSUM; entry->csum_flags = TCA_CSUM_UPDATE_FLAG_IPV4HDR; @@ -540,12 +577,14 @@ static void flow_offload_ipv4_checksum(struct net *net, entry->csum_flags |= TCA_CSUM_UPDATE_FLAG_UDP; break; } + + return 0; } -static void flow_offload_redirect(struct net *net, - const struct flow_offload *flow, - enum flow_offload_tuple_dir dir, - struct nf_flow_rule *flow_rule) +static int flow_offload_redirect(struct net *net, + const struct flow_offload *flow, + enum flow_offload_tuple_dir dir, + struct nf_flow_rule *flow_rule) { const struct flow_offload_tuple *this_tuple, *other_tuple; struct flow_action_entry *entry; @@ -563,21 +602,28 @@ static void flow_offload_redirect(struct net *net, ifindex = other_tuple->iifidx; break; default: - return; + return -EOPNOTSUPP; } dev = dev_get_by_index(net, ifindex); if (!dev) - return; + return -ENODEV; entry = flow_action_entry_next(flow_rule); + if (!entry) { + dev_put(dev); + return -E2BIG; + } + entry->id = FLOW_ACTION_REDIRECT; entry->dev = dev; + + return 0; } -static void flow_offload_encap_tunnel(const struct flow_offload *flow, - enum flow_offload_tuple_dir dir, - struct nf_flow_rule *flow_rule) +static int flow_offload_encap_tunnel(const struct flow_offload *flow, + enum flow_offload_tuple_dir dir, + struct nf_flow_rule *flow_rule) { const struct flow_offload_tuple *this_tuple; struct flow_action_entry *entry; @@ -585,7 +631,7 @@ static void flow_offload_encap_tunnel(const struct flow_offload *flow, this_tuple = &flow->tuplehash[dir].tuple; if (this_tuple->xmit_type == FLOW_OFFLOAD_XMIT_DIRECT) - return; + return 0; dst = this_tuple->dst_cache; if (dst && dst->lwtstate) { @@ -594,15 +640,19 @@ static void flow_offload_encap_tunnel(const struct flow_offload *flow, tun_info = lwt_tun_info(dst->lwtstate); if (tun_info && (tun_info->mode & IP_TUNNEL_INFO_TX)) { entry = flow_action_entry_next(flow_rule); + if (!entry) + return -E2BIG; entry->id = FLOW_ACTION_TUNNEL_ENCAP; entry->tunnel = tun_info; } } + + return 0; } -static void flow_offload_decap_tunnel(const struct flow_offload *flow, - enum flow_offload_tuple_dir dir, - struct nf_flow_rule *flow_rule) +static int flow_offload_decap_tunnel(const struct flow_offload *flow, + enum flow_offload_tuple_dir dir, + struct nf_flow_rule *flow_rule) { const struct flow_offload_tuple *other_tuple; struct flow_action_entry *entry; @@ -610,7 +660,7 @@ static void flow_offload_decap_tunnel(const struct flow_offload *flow, other_tuple = &flow->tuplehash[!dir].tuple; if (other_tuple->xmit_type == FLOW_OFFLOAD_XMIT_DIRECT) - return; + return 0; dst = other_tuple->dst_cache; if (dst && dst->lwtstate) { @@ -619,9 +669,13 @@ static void flow_offload_decap_tunnel(const struct flow_offload *flow, tun_info = lwt_tun_info(dst->lwtstate); if (tun_info && (tun_info->mode & IP_TUNNEL_INFO_TX)) { entry = flow_action_entry_next(flow_rule); + if (!entry) + return -E2BIG; entry->id = FLOW_ACTION_TUNNEL_DECAP; } } + + return 0; } static int @@ -633,8 +687,9 @@ nf_flow_rule_route_common(struct net *net, const struct flow_offload *flow, const struct flow_offload_tuple *tuple; int i; - flow_offload_decap_tunnel(flow, dir, flow_rule); - flow_offload_encap_tunnel(flow, dir, flow_rule); + if (flow_offload_decap_tunnel(flow, dir, flow_rule) < 0 || + flow_offload_encap_tunnel(flow, dir, flow_rule) < 0) + return -1; if (flow_offload_eth_src(net, flow, dir, flow_rule) < 0 || flow_offload_eth_dst(net, flow, dir, flow_rule) < 0) @@ -650,6 +705,8 @@ nf_flow_rule_route_common(struct net *net, const struct flow_offload *flow, if (tuple->encap[i].proto == htons(ETH_P_8021Q)) { entry = flow_action_entry_next(flow_rule); + if (!entry) + return -1; entry->id = FLOW_ACTION_VLAN_POP; } } @@ -663,6 +720,8 @@ nf_flow_rule_route_common(struct net *net, const struct flow_offload *flow, continue; entry = flow_action_entry_next(flow_rule); + if (!entry) + return -1; switch (other_tuple->encap[i].proto) { case htons(ETH_P_PPP_SES): @@ -688,18 +747,22 @@ int nf_flow_rule_route_ipv4(struct net *net, struct flow_offload *flow, return -1; if (test_bit(NF_FLOW_SNAT, &flow->flags)) { - flow_offload_ipv4_snat(net, flow, dir, flow_rule); - flow_offload_port_snat(net, flow, dir, flow_rule); + if (flow_offload_ipv4_snat(net, flow, dir, flow_rule) < 0 || + flow_offload_port_snat(net, flow, dir, flow_rule) < 0) + return -1; } if (test_bit(NF_FLOW_DNAT, &flow->flags)) { - flow_offload_ipv4_dnat(net, flow, dir, flow_rule); - flow_offload_port_dnat(net, flow, dir, flow_rule); + if (flow_offload_ipv4_dnat(net, flow, dir, flow_rule) < 0 || + flow_offload_port_dnat(net, flow, dir, flow_rule) < 0) + return -1; } if (test_bit(NF_FLOW_SNAT, &flow->flags) || test_bit(NF_FLOW_DNAT, &flow->flags)) - flow_offload_ipv4_checksum(net, flow, flow_rule); + if (flow_offload_ipv4_checksum(net, flow, flow_rule) < 0) + return -1; - flow_offload_redirect(net, flow, dir, flow_rule); + if (flow_offload_redirect(net, flow, dir, flow_rule) < 0) + return -1; return 0; } @@ -713,22 +776,23 @@ int nf_flow_rule_route_ipv6(struct net *net, struct flow_offload *flow, return -1; if (test_bit(NF_FLOW_SNAT, &flow->flags)) { - flow_offload_ipv6_snat(net, flow, dir, flow_rule); - flow_offload_port_snat(net, flow, dir, flow_rule); + if (flow_offload_ipv6_snat(net, flow, dir, flow_rule) < 0 || + flow_offload_port_snat(net, flow, dir, flow_rule) < 0) + return -1; } if (test_bit(NF_FLOW_DNAT, &flow->flags)) { - flow_offload_ipv6_dnat(net, flow, dir, flow_rule); - flow_offload_port_dnat(net, flow, dir, flow_rule); + if (flow_offload_ipv6_dnat(net, flow, dir, flow_rule) < 0 || + flow_offload_port_dnat(net, flow, dir, flow_rule) < 0) + return -1; } - flow_offload_redirect(net, flow, dir, flow_rule); + if (flow_offload_redirect(net, flow, dir, flow_rule) < 0) + return -1; return 0; } EXPORT_SYMBOL_GPL(nf_flow_rule_route_ipv6); -#define NF_FLOW_RULE_ACTION_MAX 16 - static struct nf_flow_rule * nf_flow_offload_rule_alloc(struct net *net, const struct flow_offload_work *offload, From 6d52a4a0520a6696bdde51caa11f2d6821cd0c01 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Thu, 26 Mar 2026 16:17:24 +0100 Subject: [PATCH 356/401] netfilter: nfnetlink_log: account for netlink header size This is a followup to an old bug fix: NLMSG_DONE needs to account for the netlink header size, not just the attribute size. This can result in a WARN splat + drop of the netlink message, but other than this there are no ill effects. Fixes: 9dfa1dfe4d5e ("netfilter: nf_log: account for size of NLMSG_DONE attribute") Reported-by: Yiming Qian Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nfnetlink_log.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c index fcbe54940b2e..f80978c06fa0 100644 --- a/net/netfilter/nfnetlink_log.c +++ b/net/netfilter/nfnetlink_log.c @@ -726,7 +726,7 @@ nfulnl_log_packet(struct net *net, + nla_total_size(plen) /* prefix */ + nla_total_size(sizeof(struct nfulnl_msg_packet_hw)) + nla_total_size(sizeof(struct nfulnl_msg_packet_timestamp)) - + nla_total_size(sizeof(struct nfgenmsg)); /* NLMSG_DONE */ + + nlmsg_total_size(sizeof(struct nfgenmsg)); /* NLMSG_DONE */ if (in && skb_mac_header_was_set(skb)) { size += nla_total_size(skb->dev->hard_header_len) From a958a4f90ddd7de0800b33ca9d7b886b7d40f74e Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 31 Mar 2026 23:13:36 +0200 Subject: [PATCH 357/401] netfilter: x_tables: ensure names are nul-terminated Reject names that lack a \0 character before feeding them to functions that expect c-strings. Fixes tag is the most recent commit that needs this change. Fixes: c38c4597e4bf ("netfilter: implement xt_cgroup cgroup2 path match") Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/xt_cgroup.c | 6 ++++++ net/netfilter/xt_rateest.c | 5 +++++ 2 files changed, 11 insertions(+) diff --git a/net/netfilter/xt_cgroup.c b/net/netfilter/xt_cgroup.c index c437fbd59ec1..43d2ae2be628 100644 --- a/net/netfilter/xt_cgroup.c +++ b/net/netfilter/xt_cgroup.c @@ -65,6 +65,9 @@ static int cgroup_mt_check_v1(const struct xt_mtchk_param *par) info->priv = NULL; if (info->has_path) { + if (strnlen(info->path, sizeof(info->path)) >= sizeof(info->path)) + return -ENAMETOOLONG; + cgrp = cgroup_get_from_path(info->path); if (IS_ERR(cgrp)) { pr_info_ratelimited("invalid path, errno=%ld\n", @@ -102,6 +105,9 @@ static int cgroup_mt_check_v2(const struct xt_mtchk_param *par) info->priv = NULL; if (info->has_path) { + if (strnlen(info->path, sizeof(info->path)) >= sizeof(info->path)) + return -ENAMETOOLONG; + cgrp = cgroup_get_from_path(info->path); if (IS_ERR(cgrp)) { pr_info_ratelimited("invalid path, errno=%ld\n", diff --git a/net/netfilter/xt_rateest.c b/net/netfilter/xt_rateest.c index 72324bd976af..b1d736c15fcb 100644 --- a/net/netfilter/xt_rateest.c +++ b/net/netfilter/xt_rateest.c @@ -91,6 +91,11 @@ static int xt_rateest_mt_checkentry(const struct xt_mtchk_param *par) goto err1; } + if (strnlen(info->name1, sizeof(info->name1)) >= sizeof(info->name1)) + return -ENAMETOOLONG; + if (strnlen(info->name2, sizeof(info->name2)) >= sizeof(info->name2)) + return -ENAMETOOLONG; + ret = -ENOENT; est1 = xt_rateest_lookup(par->net, info->name1); if (!est1) From b7e8590987aa94c9dc51518fad0e58cb887b1db5 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Mon, 30 Mar 2026 14:16:34 +0200 Subject: [PATCH 358/401] netfilter: ipset: use nla_strcmp for IPSET_ATTR_NAME attr IPSET_ATTR_NAME and IPSET_ATTR_NAMEREF are of NLA_STRING type, they cannot be treated like a c-string. They either have to be switched to NLA_NUL_STRING, or the compare operations need to use the nla functions. Fixes: f830837f0eed ("netfilter: ipset: list:set set type support") Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter/ipset/ip_set.h | 2 +- net/netfilter/ipset/ip_set_core.c | 4 ++-- net/netfilter/ipset/ip_set_list_set.c | 4 ++-- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/include/linux/netfilter/ipset/ip_set.h b/include/linux/netfilter/ipset/ip_set.h index e9f4f845d760..b98331572ad2 100644 --- a/include/linux/netfilter/ipset/ip_set.h +++ b/include/linux/netfilter/ipset/ip_set.h @@ -309,7 +309,7 @@ enum { /* register and unregister set references */ extern ip_set_id_t ip_set_get_byname(struct net *net, - const char *name, struct ip_set **set); + const struct nlattr *name, struct ip_set **set); extern void ip_set_put_byindex(struct net *net, ip_set_id_t index); extern void ip_set_name_byindex(struct net *net, ip_set_id_t index, char *name); extern ip_set_id_t ip_set_nfnl_get_byindex(struct net *net, ip_set_id_t index); diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c index a2fe711cb5e3..d0c9fe59c67d 100644 --- a/net/netfilter/ipset/ip_set_core.c +++ b/net/netfilter/ipset/ip_set_core.c @@ -821,7 +821,7 @@ EXPORT_SYMBOL_GPL(ip_set_del); * */ ip_set_id_t -ip_set_get_byname(struct net *net, const char *name, struct ip_set **set) +ip_set_get_byname(struct net *net, const struct nlattr *name, struct ip_set **set) { ip_set_id_t i, index = IPSET_INVALID_ID; struct ip_set *s; @@ -830,7 +830,7 @@ ip_set_get_byname(struct net *net, const char *name, struct ip_set **set) rcu_read_lock(); for (i = 0; i < inst->ip_set_max; i++) { s = rcu_dereference(inst->ip_set_list)[i]; - if (s && STRNCMP(s->name, name)) { + if (s && nla_strcmp(name, s->name) == 0) { __ip_set_get(s); index = i; *set = s; diff --git a/net/netfilter/ipset/ip_set_list_set.c b/net/netfilter/ipset/ip_set_list_set.c index 98b91b34c314..1cef84f15e8c 100644 --- a/net/netfilter/ipset/ip_set_list_set.c +++ b/net/netfilter/ipset/ip_set_list_set.c @@ -367,7 +367,7 @@ list_set_uadt(struct ip_set *set, struct nlattr *tb[], ret = ip_set_get_extensions(set, tb, &ext); if (ret) return ret; - e.id = ip_set_get_byname(map->net, nla_data(tb[IPSET_ATTR_NAME]), &s); + e.id = ip_set_get_byname(map->net, tb[IPSET_ATTR_NAME], &s); if (e.id == IPSET_INVALID_ID) return -IPSET_ERR_NAME; /* "Loop detection" */ @@ -389,7 +389,7 @@ list_set_uadt(struct ip_set *set, struct nlattr *tb[], if (tb[IPSET_ATTR_NAMEREF]) { e.refid = ip_set_get_byname(map->net, - nla_data(tb[IPSET_ATTR_NAMEREF]), + tb[IPSET_ATTR_NAMEREF], &s); if (e.refid == IPSET_INVALID_ID) { ret = -IPSET_ERR_NAMEREF; From a242a9ae58aa46ff7dae51ce64150a93957abe65 Mon Sep 17 00:00:00 2001 From: Qi Tang Date: Mon, 30 Mar 2026 00:50:36 +0800 Subject: [PATCH 359/401] netfilter: nf_conntrack_helper: pass helper to expect cleanup nf_conntrack_helper_unregister() calls nf_ct_expect_iterate_destroy() to remove expectations belonging to the helper being unregistered. However, it passes NULL instead of the helper pointer as the data argument, so expect_iter_me() never matches any expectation and all of them survive the cleanup. After unregister returns, nfnl_cthelper_del() frees the helper object immediately. Subsequent expectation dumps or packet-driven init_conntrack() calls then dereference the freed exp->helper, causing a use-after-free. Pass the actual helper pointer so expectations referencing it are properly destroyed before the helper object is freed. BUG: KASAN: slab-use-after-free in string+0x38f/0x430 Read of size 1 at addr ffff888003b14d20 by task poc/103 Call Trace: string+0x38f/0x430 vsnprintf+0x3cc/0x1170 seq_printf+0x17a/0x240 exp_seq_show+0x2e5/0x560 seq_read_iter+0x419/0x1280 proc_reg_read+0x1ac/0x270 vfs_read+0x179/0x930 ksys_read+0xef/0x1c0 Freed by task 103: The buggy address is located 32 bytes inside of freed 192-byte region [ffff888003b14d00, ffff888003b14dc0) Fixes: ac7b84839003 ("netfilter: expect: add and use nf_ct_expect_iterate helpers") Signed-off-by: Qi Tang Reviewed-by: Phil Sutter Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_conntrack_helper.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/netfilter/nf_conntrack_helper.c b/net/netfilter/nf_conntrack_helper.c index 1b330ba6613b..a715304a53d8 100644 --- a/net/netfilter/nf_conntrack_helper.c +++ b/net/netfilter/nf_conntrack_helper.c @@ -415,7 +415,7 @@ void nf_conntrack_helper_unregister(struct nf_conntrack_helper *me) */ synchronize_rcu(); - nf_ct_expect_iterate_destroy(expect_iter_me, NULL); + nf_ct_expect_iterate_destroy(expect_iter_me, me); nf_ct_iterate_destroy(unhelp, me); /* nf_ct_iterate_destroy() does an unconditional synchronize_rcu() as From 35177c6877134a21315f37d57a5577846225623e Mon Sep 17 00:00:00 2001 From: Qi Tang Date: Tue, 31 Mar 2026 14:17:12 +0800 Subject: [PATCH 360/401] netfilter: ctnetlink: zero expect NAT fields when CTA_EXPECT_NAT absent ctnetlink_alloc_expect() allocates expectations from a non-zeroing slab cache via nf_ct_expect_alloc(). When CTA_EXPECT_NAT is not present in the netlink message, saved_addr and saved_proto are never initialized. Stale data from a previous slab occupant can then be dumped to userspace by ctnetlink_exp_dump_expect(), which checks these fields to decide whether to emit CTA_EXPECT_NAT. The safe sibling nf_ct_expect_init(), used by the packet path, explicitly zeroes these fields. Zero saved_addr, saved_proto and dir in the else branch, guarded by IS_ENABLED(CONFIG_NF_NAT) since these fields only exist when NAT is enabled. Confirmed by priming the expect slab with NAT-bearing expectations, freeing them, creating a new expectation without CTA_EXPECT_NAT, and observing that the ctnetlink dump emits a spurious CTA_EXPECT_NAT containing stale data from the prior allocation. Fixes: 076a0ca02644 ("netfilter: ctnetlink: add NAT support for expectations") Reported-by: kernel test robot Signed-off-by: Qi Tang Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_conntrack_netlink.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index 3f408f3713bb..38bd7124d9f7 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -3588,6 +3588,12 @@ ctnetlink_alloc_expect(const struct nlattr * const cda[], struct nf_conn *ct, exp, nf_ct_l3num(ct)); if (err < 0) goto err_out; +#if IS_ENABLED(CONFIG_NF_NAT) + } else { + memset(&exp->saved_addr, 0, sizeof(exp->saved_addr)); + memset(&exp->saved_proto, 0, sizeof(exp->saved_proto)); + exp->dir = 0; +#endif } return exp; err_out: From 917b61fa2042f11e2af4c428e43f08199586633a Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Mon, 30 Mar 2026 11:26:22 +0200 Subject: [PATCH 361/401] netfilter: ctnetlink: ignore explicit helper on new expectations Use the existing master conntrack helper, anything else is not really supported and it just makes validation more complicated, so just ignore what helper userspace suggests for this expectation. This was uncovered when validating CTA_EXPECT_CLASS via different helper provided by userspace than the existing master conntrack helper: BUG: KASAN: slab-out-of-bounds in nf_ct_expect_related_report+0x2479/0x27c0 Read of size 4 at addr ffff8880043fe408 by task poc/102 Call Trace: nf_ct_expect_related_report+0x2479/0x27c0 ctnetlink_create_expect+0x22b/0x3b0 ctnetlink_new_expect+0x4bd/0x5c0 nfnetlink_rcv_msg+0x67a/0x950 netlink_rcv_skb+0x120/0x350 Allowing to read kernel memory bytes off the expectation boundary. CTA_EXPECT_HELP_NAME is still used to offer the helper name to userspace via netlink dump. Fixes: bd0779370588 ("netfilter: nfnetlink_queue: allow to attach expectations to conntracks") Reported-by: Qi Tang Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_conntrack_netlink.c | 54 +++++----------------------- 1 file changed, 9 insertions(+), 45 deletions(-) diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index 38bd7124d9f7..a20cd82446c5 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -2636,7 +2636,6 @@ static const struct nla_policy exp_nla_policy[CTA_EXPECT_MAX+1] = { static struct nf_conntrack_expect * ctnetlink_alloc_expect(const struct nlattr *const cda[], struct nf_conn *ct, - struct nf_conntrack_helper *helper, struct nf_conntrack_tuple *tuple, struct nf_conntrack_tuple *mask); @@ -2865,7 +2864,6 @@ ctnetlink_glue_attach_expect(const struct nlattr *attr, struct nf_conn *ct, { struct nlattr *cda[CTA_EXPECT_MAX+1]; struct nf_conntrack_tuple tuple, mask; - struct nf_conntrack_helper *helper = NULL; struct nf_conntrack_expect *exp; int err; @@ -2879,17 +2877,8 @@ ctnetlink_glue_attach_expect(const struct nlattr *attr, struct nf_conn *ct, if (err < 0) return err; - if (cda[CTA_EXPECT_HELP_NAME]) { - const char *helpname = nla_data(cda[CTA_EXPECT_HELP_NAME]); - - helper = __nf_conntrack_helper_find(helpname, nf_ct_l3num(ct), - nf_ct_protonum(ct)); - if (helper == NULL) - return -EOPNOTSUPP; - } - exp = ctnetlink_alloc_expect((const struct nlattr * const *)cda, ct, - helper, &tuple, &mask); + &tuple, &mask); if (IS_ERR(exp)) return PTR_ERR(exp); @@ -3528,11 +3517,11 @@ ctnetlink_parse_expect_nat(const struct nlattr *attr, static struct nf_conntrack_expect * ctnetlink_alloc_expect(const struct nlattr * const cda[], struct nf_conn *ct, - struct nf_conntrack_helper *helper, struct nf_conntrack_tuple *tuple, struct nf_conntrack_tuple *mask) { struct net *net = read_pnet(&ct->ct_net); + struct nf_conntrack_helper *helper; struct nf_conntrack_expect *exp; struct nf_conn_help *help; u32 class = 0; @@ -3542,7 +3531,11 @@ ctnetlink_alloc_expect(const struct nlattr * const cda[], struct nf_conn *ct, if (!help) return ERR_PTR(-EOPNOTSUPP); - if (cda[CTA_EXPECT_CLASS] && helper) { + helper = rcu_dereference(help->helper); + if (!helper) + return ERR_PTR(-EOPNOTSUPP); + + if (cda[CTA_EXPECT_CLASS]) { class = ntohl(nla_get_be32(cda[CTA_EXPECT_CLASS])); if (class > helper->expect_class_max) return ERR_PTR(-EINVAL); @@ -3576,8 +3569,6 @@ ctnetlink_alloc_expect(const struct nlattr * const cda[], struct nf_conn *ct, #ifdef CONFIG_NF_CONNTRACK_ZONES exp->zone = ct->zone; #endif - if (!helper) - helper = rcu_dereference(help->helper); rcu_assign_pointer(exp->helper, helper); exp->tuple = *tuple; exp->mask.src.u3 = mask->src.u3; @@ -3609,7 +3600,6 @@ ctnetlink_create_expect(struct net *net, { struct nf_conntrack_tuple tuple, mask, master_tuple; struct nf_conntrack_tuple_hash *h = NULL; - struct nf_conntrack_helper *helper = NULL; struct nf_conntrack_expect *exp; struct nf_conn *ct; int err; @@ -3635,33 +3625,7 @@ ctnetlink_create_expect(struct net *net, ct = nf_ct_tuplehash_to_ctrack(h); rcu_read_lock(); - if (cda[CTA_EXPECT_HELP_NAME]) { - const char *helpname = nla_data(cda[CTA_EXPECT_HELP_NAME]); - - helper = __nf_conntrack_helper_find(helpname, u3, - nf_ct_protonum(ct)); - if (helper == NULL) { - rcu_read_unlock(); -#ifdef CONFIG_MODULES - if (request_module("nfct-helper-%s", helpname) < 0) { - err = -EOPNOTSUPP; - goto err_ct; - } - rcu_read_lock(); - helper = __nf_conntrack_helper_find(helpname, u3, - nf_ct_protonum(ct)); - if (helper) { - err = -EAGAIN; - goto err_rcu; - } - rcu_read_unlock(); -#endif - err = -EOPNOTSUPP; - goto err_ct; - } - } - - exp = ctnetlink_alloc_expect(cda, ct, helper, &tuple, &mask); + exp = ctnetlink_alloc_expect(cda, ct, &tuple, &mask); if (IS_ERR(exp)) { err = PTR_ERR(exp); goto err_rcu; @@ -3671,8 +3635,8 @@ ctnetlink_create_expect(struct net *net, nf_ct_expect_put(exp); err_rcu: rcu_read_unlock(); -err_ct: nf_ct_put(ct); + return err; } From 9862ef9ab0a116c6dca98842aab7de13a252ae02 Mon Sep 17 00:00:00 2001 From: Yifan Wu Date: Mon, 30 Mar 2026 14:39:24 -0700 Subject: [PATCH 362/401] netfilter: ipset: drop logically empty buckets in mtype_del mtype_del() counts empty slots below n->pos in k, but it only drops the bucket when both n->pos and k are zero. This misses buckets whose live entries have all been removed while n->pos still points past deleted slots. Treat a bucket as empty when all positions below n->pos are unused and release it directly instead of shrinking it further. Fixes: 8af1c6fbd923 ("netfilter: ipset: Fix forceadd evaluation path") Cc: stable@vger.kernel.org Reported-by: Juefei Pu Reported-by: Xin Liu Signed-off-by: Yifan Wu Co-developed-by: Yuan Tan Signed-off-by: Yuan Tan Reviewed-by: Phil Sutter Signed-off-by: Pablo Neira Ayuso --- net/netfilter/ipset/ip_set_hash_gen.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/netfilter/ipset/ip_set_hash_gen.h b/net/netfilter/ipset/ip_set_hash_gen.h index 181daa9c2019..b79e5dd2af03 100644 --- a/net/netfilter/ipset/ip_set_hash_gen.h +++ b/net/netfilter/ipset/ip_set_hash_gen.h @@ -1098,7 +1098,7 @@ mtype_del(struct ip_set *set, void *value, const struct ip_set_ext *ext, if (!test_bit(i, n->used)) k++; } - if (n->pos == 0 && k == 0) { + if (k == n->pos) { t->hregion[r].ext_size -= ext_size(n->size, dsize); rcu_assign_pointer(hbucket(t, key), NULL); kfree_rcu(n, rcu); From 3d5d488f11776738deab9da336038add95d342d1 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Tue, 31 Mar 2026 16:41:25 +0200 Subject: [PATCH 363/401] netfilter: x_tables: restrict xt_check_match/xt_check_target extensions for NFPROTO_ARP Weiming Shi says: xt_match and xt_target structs registered with NFPROTO_UNSPEC can be loaded by any protocol family through nft_compat. When such a match/target sets .hooks to restrict which hooks it may run on, the bitmask uses NF_INET_* constants. This is only correct for families whose hook layout matches NF_INET_*: IPv4, IPv6, INET, and bridge all share the same five hooks (PRE_ROUTING ... POST_ROUTING). ARP only has three hooks (IN=0, OUT=1, FORWARD=2) with different semantics. Because NF_ARP_OUT == 1 == NF_INET_LOCAL_IN, the .hooks validation silently passes for the wrong reasons, allowing matches to run on ARP chains where the hook assumptions (e.g. state->in being set on input hooks) do not hold. This leads to NULL pointer dereferences; xt_devgroup is one concrete example: Oops: general protection fault, probably for non-canonical address 0xdffffc0000000044: 0000 [#1] SMP KASAN NOPTI KASAN: null-ptr-deref in range [0x0000000000000220-0x0000000000000227] RIP: 0010:devgroup_mt+0xff/0x350 Call Trace: nft_match_eval (net/netfilter/nft_compat.c:407) nft_do_chain (net/netfilter/nf_tables_core.c:285) nft_do_chain_arp (net/netfilter/nft_chain_filter.c:61) nf_hook_slow (net/netfilter/core.c:623) arp_xmit (net/ipv4/arp.c:666) Kernel panic - not syncing: Fatal exception in interrupt Fix it by restricting arptables to NFPROTO_ARP extensions only. Note that arptables-legacy only supports: - arpt_CLASSIFY - arpt_mangle - arpt_MARK that provide explicit NFPROTO_ARP match/target declarations. Fixes: 9291747f118d ("netfilter: xtables: add device group match") Reported-by: Xiang Mei Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/x_tables.c | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c index e594b3b7ad82..b39017c80548 100644 --- a/net/netfilter/x_tables.c +++ b/net/netfilter/x_tables.c @@ -501,6 +501,17 @@ int xt_check_match(struct xt_mtchk_param *par, par->match->table, par->table); return -EINVAL; } + + /* NFPROTO_UNSPEC implies NF_INET_* hooks which do not overlap with + * NF_ARP_IN,OUT,FORWARD, allow explicit extensions with NFPROTO_ARP + * support. + */ + if (par->family == NFPROTO_ARP && + par->match->family != NFPROTO_ARP) { + pr_info_ratelimited("%s_tables: %s match: not valid for this family\n", + xt_prefix[par->family], par->match->name); + return -EINVAL; + } if (par->match->hooks && (par->hook_mask & ~par->match->hooks) != 0) { char used[64], allow[64]; @@ -1016,6 +1027,18 @@ int xt_check_target(struct xt_tgchk_param *par, par->target->table, par->table); return -EINVAL; } + + /* NFPROTO_UNSPEC implies NF_INET_* hooks which do not overlap with + * NF_ARP_IN,OUT,FORWARD, allow explicit extensions with NFPROTO_ARP + * support. + */ + if (par->family == NFPROTO_ARP && + par->target->family != NFPROTO_ARP) { + pr_info_ratelimited("%s_tables: %s target: not valid for this family\n", + xt_prefix[par->family], par->target->name); + return -EINVAL; + } + if (par->target->hooks && (par->hook_mask & ~par->target->hooks) != 0) { char used[64], allow[64]; From da107398cbd4bbdb6bffecb2ce86d5c9384f4cec Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Tue, 31 Mar 2026 23:08:02 +0200 Subject: [PATCH 364/401] netfilter: nf_tables: reject immediate NF_QUEUE verdict nft_queue is always used from userspace nftables to deliver the NF_QUEUE verdict. Immediately emitting an NF_QUEUE verdict is never used by the userspace nft tools, so reject immediate NF_QUEUE verdicts. The arp family does not provide queue support, but such an immediate verdict is still reachable. Globally reject NF_QUEUE immediate verdicts to address this issue. Fixes: f342de4e2f33 ("netfilter: nf_tables: reject QUEUE/DROP verdict parameters") Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_tables_api.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 3922cff1bb3d..8c42247a176c 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -11667,8 +11667,6 @@ static int nft_verdict_init(const struct nft_ctx *ctx, struct nft_data *data, switch (data->verdict.code) { case NF_ACCEPT: case NF_DROP: - case NF_QUEUE: - break; case NFT_CONTINUE: case NFT_BREAK: case NFT_RETURN: @@ -11703,6 +11701,11 @@ static int nft_verdict_init(const struct nft_ctx *ctx, struct nft_data *data, data->verdict.chain = chain; break; + case NF_QUEUE: + /* The nft_queue expression is used for this purpose, an + * immediate NF_QUEUE verdict should not ever be seen here. + */ + fallthrough; default: return -EINVAL; } From e74c38ef6f170179c0029b5744d6a14dfd543108 Mon Sep 17 00:00:00 2001 From: Simon Trimmer Date: Tue, 31 Mar 2026 13:19:16 +0000 Subject: [PATCH 365/401] ASoC: amd: ps: Fix missing leading zeros in subsystem_device SSID log Ensure that subsystem_device is printed with leading zeros when combined with subsystem_vendor to form the SSID. Without this, devices with upper bits unset may appear to have an incorrect SSID in the debug output. Signed-off-by: Simon Trimmer Link: https://patch.msgid.link/20260331131916.145546-1-simont@opensource.cirrus.com Signed-off-by: Mark Brown --- sound/soc/amd/ps/pci-ps.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/amd/ps/pci-ps.c b/sound/soc/amd/ps/pci-ps.c index 3a20cc10d61f..9751cf0784a6 100644 --- a/sound/soc/amd/ps/pci-ps.c +++ b/sound/soc/amd/ps/pci-ps.c @@ -339,7 +339,7 @@ static struct snd_soc_acpi_mach *acp63_sdw_machine_select(struct device *dev) mach->mach_params.subsystem_device = acp_data->subsystem_device; mach->mach_params.subsystem_id_set = true; - dev_dbg(dev, "SSID %x%x\n", mach->mach_params.subsystem_vendor, + dev_dbg(dev, "SSID %x%04x\n", mach->mach_params.subsystem_vendor, mach->mach_params.subsystem_device); return mach; } From a834a0b66ec6fb743377201a0f4229bb2503f4ce Mon Sep 17 00:00:00 2001 From: Pauli Virtanen Date: Wed, 25 Mar 2026 21:07:46 +0200 Subject: [PATCH 366/401] Bluetooth: hci_sync: call destroy in hci_cmd_sync_run if immediate hci_cmd_sync_run() may run the work immediately if called from existing sync work (otherwise it queues a new sync work). In this case it fails to call the destroy() function. On immediate run, make it behave same way as if item was queued successfully: call destroy, and return 0. The only callsite is hci_abort_conn() via hci_cmd_sync_run_once(), and this changes its return value. However, its return value is not used except as the return value for hci_disconnect(), and nothing uses the return value of hci_disconnect(). Hence there should be no behavior change anywhere. Fixes: c898f6d7b093b ("Bluetooth: hci_sync: Introduce hci_cmd_sync_run/hci_cmd_sync_run_once") Signed-off-by: Pauli Virtanen Signed-off-by: Luiz Augusto von Dentz --- net/bluetooth/hci_sync.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/net/bluetooth/hci_sync.c b/net/bluetooth/hci_sync.c index 45d16639874a..6283a4df78b0 100644 --- a/net/bluetooth/hci_sync.c +++ b/net/bluetooth/hci_sync.c @@ -801,8 +801,15 @@ int hci_cmd_sync_run(struct hci_dev *hdev, hci_cmd_sync_work_func_t func, return -ENETDOWN; /* If on cmd_sync_work then run immediately otherwise queue */ - if (current_work() == &hdev->cmd_sync_work) - return func(hdev, data); + if (current_work() == &hdev->cmd_sync_work) { + int err; + + err = func(hdev, data); + if (destroy) + destroy(hdev, data, err); + + return 0; + } return hci_cmd_sync_submit(hdev, func, data, destroy); } From 8a5b0135d4a5d9683203a3d9a12a711ccec5936b Mon Sep 17 00:00:00 2001 From: Cen Zhang Date: Thu, 26 Mar 2026 23:16:45 +0800 Subject: [PATCH 367/401] Bluetooth: SCO: fix race conditions in sco_sock_connect() sco_sock_connect() checks sk_state and sk_type without holding the socket lock. Two concurrent connect() syscalls on the same socket can both pass the check and enter sco_connect(), leading to use-after-free. The buggy scenario involves three participants and was confirmed with additional logging instrumentation: Thread A (connect): HCI disconnect: Thread B (connect): sco_sock_connect(sk) sco_sock_connect(sk) sk_state==BT_OPEN sk_state==BT_OPEN (pass, no lock) (pass, no lock) sco_connect(sk): sco_connect(sk): hci_dev_lock hci_dev_lock hci_connect_sco <- blocked -> hcon1 sco_conn_add->conn1 lock_sock(sk) sco_chan_add: conn1->sk = sk sk->conn = conn1 sk_state=BT_CONNECT release_sock hci_dev_unlock hci_dev_lock sco_conn_del: lock_sock(sk) sco_chan_del: sk->conn=NULL conn1->sk=NULL sk_state= BT_CLOSED SOCK_ZAPPED release_sock hci_dev_unlock (unblocked) hci_connect_sco -> hcon2 sco_conn_add -> conn2 lock_sock(sk) sco_chan_add: sk->conn=conn2 sk_state= BT_CONNECT // zombie sk! release_sock hci_dev_unlock Thread B revives a BT_CLOSED + SOCK_ZAPPED socket back to BT_CONNECT. Subsequent cleanup triggers double sock_put() and use-after-free. Meanwhile conn1 is leaked as it was orphaned when sco_conn_del() cleared the association. Fix this by: - Moving lock_sock() before the sk_state/sk_type checks in sco_sock_connect() to serialize concurrent connect attempts - Fixing the sk_type != SOCK_SEQPACKET check to actually return the error instead of just assigning it - Adding a state re-check in sco_connect() after lock_sock() to catch state changes during the window between the locks - Adding sco_pi(sk)->conn check in sco_chan_add() to prevent double-attach of a socket to multiple connections - Adding hci_conn_drop() on sco_chan_add failure to prevent HCI connection leaks Fixes: 9a8ec9e8ebb5 ("Bluetooth: SCO: Fix possible circular locking dependency on sco_connect_cfm") Signed-off-by: Cen Zhang Signed-off-by: Luiz Augusto von Dentz --- net/bluetooth/sco.c | 30 +++++++++++++++++++++++------- 1 file changed, 23 insertions(+), 7 deletions(-) diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c index 584e059de20a..b84587811ef4 100644 --- a/net/bluetooth/sco.c +++ b/net/bluetooth/sco.c @@ -298,7 +298,7 @@ static int sco_chan_add(struct sco_conn *conn, struct sock *sk, int err = 0; sco_conn_lock(conn); - if (conn->sk) + if (conn->sk || sco_pi(sk)->conn) err = -EBUSY; else __sco_chan_add(conn, sk, parent); @@ -353,9 +353,20 @@ static int sco_connect(struct sock *sk) lock_sock(sk); + /* Recheck state after reacquiring the socket lock, as another + * thread may have changed it (e.g., closed the socket). + */ + if (sk->sk_state != BT_OPEN && sk->sk_state != BT_BOUND) { + release_sock(sk); + hci_conn_drop(hcon); + err = -EBADFD; + goto unlock; + } + err = sco_chan_add(conn, sk, NULL); if (err) { release_sock(sk); + hci_conn_drop(hcon); goto unlock; } @@ -656,13 +667,18 @@ static int sco_sock_connect(struct socket *sock, struct sockaddr_unsized *addr, addr->sa_family != AF_BLUETOOTH) return -EINVAL; - if (sk->sk_state != BT_OPEN && sk->sk_state != BT_BOUND) - return -EBADFD; - - if (sk->sk_type != SOCK_SEQPACKET) - err = -EINVAL; - lock_sock(sk); + + if (sk->sk_state != BT_OPEN && sk->sk_state != BT_BOUND) { + release_sock(sk); + return -EBADFD; + } + + if (sk->sk_type != SOCK_SEQPACKET) { + release_sock(sk); + return -EINVAL; + } + /* Set destination address and psm */ bacpy(&sco_pi(sk)->dst, &sa->sco_bdaddr); release_sock(sk); From 2b2bf47cd75518c36fa2d41380e4a40641cc89cd Mon Sep 17 00:00:00 2001 From: Oleh Konko Date: Thu, 26 Mar 2026 17:31:24 +0000 Subject: [PATCH 368/401] Bluetooth: hci_event: move wake reason storage into validated event handlers hci_store_wake_reason() is called from hci_event_packet() immediately after stripping the HCI event header but before hci_event_func() enforces the per-event minimum payload length from hci_ev_table. This means a short HCI event frame can reach bacpy() before any bounds check runs. Rather than duplicating skb parsing and per-event length checks inside hci_store_wake_reason(), move wake-address storage into the individual event handlers after their existing event-length validation has succeeded. Convert hci_store_wake_reason() into a small helper that only stores an already-validated bdaddr while the caller holds hci_dev_lock(). Use the same helper after hci_event_func() with a NULL address to preserve the existing unexpected-wake fallback semantics when no validated event handler records a wake address. Annotate the helper with __must_hold(&hdev->lock) and add lockdep_assert_held(&hdev->lock) so future call paths keep the lock contract explicit. Call the helper from hci_conn_request_evt(), hci_conn_complete_evt(), hci_sync_conn_complete_evt(), le_conn_complete_evt(), hci_le_adv_report_evt(), hci_le_ext_adv_report_evt(), hci_le_direct_adv_report_evt(), hci_le_pa_sync_established_evt(), and hci_le_past_received_evt(). Fixes: 2f20216c1d6f ("Bluetooth: Emit controller suspend and resume events") Cc: stable@vger.kernel.org Signed-off-by: Oleh Konko Signed-off-by: Luiz Augusto von Dentz --- net/bluetooth/hci_event.c | 94 +++++++++++++++------------------------ 1 file changed, 35 insertions(+), 59 deletions(-) diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index 286529d2e554..81d2f9a3eec9 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -80,6 +80,10 @@ static void *hci_le_ev_skb_pull(struct hci_dev *hdev, struct sk_buff *skb, return data; } +static void hci_store_wake_reason(struct hci_dev *hdev, + const bdaddr_t *bdaddr, u8 addr_type) + __must_hold(&hdev->lock); + static u8 hci_cc_inquiry_cancel(struct hci_dev *hdev, void *data, struct sk_buff *skb) { @@ -3111,6 +3115,7 @@ static void hci_conn_complete_evt(struct hci_dev *hdev, void *data, bt_dev_dbg(hdev, "status 0x%2.2x", status); hci_dev_lock(hdev); + hci_store_wake_reason(hdev, &ev->bdaddr, BDADDR_BREDR); /* Check for existing connection: * @@ -3274,6 +3279,10 @@ static void hci_conn_request_evt(struct hci_dev *hdev, void *data, bt_dev_dbg(hdev, "bdaddr %pMR type 0x%x", &ev->bdaddr, ev->link_type); + hci_dev_lock(hdev); + hci_store_wake_reason(hdev, &ev->bdaddr, BDADDR_BREDR); + hci_dev_unlock(hdev); + /* Reject incoming connection from device with same BD ADDR against * CVE-2020-26555 */ @@ -5021,6 +5030,7 @@ static void hci_sync_conn_complete_evt(struct hci_dev *hdev, void *data, bt_dev_dbg(hdev, "status 0x%2.2x", status); hci_dev_lock(hdev); + hci_store_wake_reason(hdev, &ev->bdaddr, BDADDR_BREDR); conn = hci_conn_hash_lookup_ba(hdev, ev->link_type, &ev->bdaddr); if (!conn) { @@ -5713,6 +5723,7 @@ static void le_conn_complete_evt(struct hci_dev *hdev, u8 status, int err; hci_dev_lock(hdev); + hci_store_wake_reason(hdev, bdaddr, bdaddr_type); /* All controllers implicitly stop advertising in the event of a * connection, so ensure that the state bit is cleared. @@ -6005,6 +6016,7 @@ static void hci_le_past_received_evt(struct hci_dev *hdev, void *data, bt_dev_dbg(hdev, "status 0x%2.2x", ev->status); hci_dev_lock(hdev); + hci_store_wake_reason(hdev, &ev->bdaddr, ev->bdaddr_type); hci_dev_clear_flag(hdev, HCI_PA_SYNC); @@ -6403,6 +6415,8 @@ static void hci_le_adv_report_evt(struct hci_dev *hdev, void *data, info->length + 1)) break; + hci_store_wake_reason(hdev, &info->bdaddr, info->bdaddr_type); + if (info->length <= max_adv_len(hdev)) { rssi = info->data[info->length]; process_adv_report(hdev, info->type, &info->bdaddr, @@ -6491,6 +6505,8 @@ static void hci_le_ext_adv_report_evt(struct hci_dev *hdev, void *data, info->length)) break; + hci_store_wake_reason(hdev, &info->bdaddr, info->bdaddr_type); + evt_type = __le16_to_cpu(info->type) & LE_EXT_ADV_EVT_TYPE_MASK; legacy_evt_type = ext_evt_type_to_legacy(hdev, evt_type); @@ -6536,6 +6552,7 @@ static void hci_le_pa_sync_established_evt(struct hci_dev *hdev, void *data, bt_dev_dbg(hdev, "status 0x%2.2x", ev->status); hci_dev_lock(hdev); + hci_store_wake_reason(hdev, &ev->bdaddr, ev->bdaddr_type); hci_dev_clear_flag(hdev, HCI_PA_SYNC); @@ -6834,6 +6851,8 @@ static void hci_le_direct_adv_report_evt(struct hci_dev *hdev, void *data, for (i = 0; i < ev->num; i++) { struct hci_ev_le_direct_adv_info *info = &ev->info[i]; + hci_store_wake_reason(hdev, &info->bdaddr, info->bdaddr_type); + process_adv_report(hdev, info->type, &info->bdaddr, info->bdaddr_type, &info->direct_addr, info->direct_addr_type, HCI_ADV_PHY_1M, 0, @@ -7517,73 +7536,29 @@ static bool hci_get_cmd_complete(struct hci_dev *hdev, u16 opcode, return true; } -static void hci_store_wake_reason(struct hci_dev *hdev, u8 event, - struct sk_buff *skb) +static void hci_store_wake_reason(struct hci_dev *hdev, + const bdaddr_t *bdaddr, u8 addr_type) + __must_hold(&hdev->lock) { - struct hci_ev_le_advertising_info *adv; - struct hci_ev_le_direct_adv_info *direct_adv; - struct hci_ev_le_ext_adv_info *ext_adv; - const struct hci_ev_conn_complete *conn_complete = (void *)skb->data; - const struct hci_ev_conn_request *conn_request = (void *)skb->data; - - hci_dev_lock(hdev); + lockdep_assert_held(&hdev->lock); /* If we are currently suspended and this is the first BT event seen, * save the wake reason associated with the event. */ if (!hdev->suspended || hdev->wake_reason) - goto unlock; + return; + + if (!bdaddr) { + hdev->wake_reason = MGMT_WAKE_REASON_UNEXPECTED; + return; + } /* Default to remote wake. Values for wake_reason are documented in the * Bluez mgmt api docs. */ hdev->wake_reason = MGMT_WAKE_REASON_REMOTE_WAKE; - - /* Once configured for remote wakeup, we should only wake up for - * reconnections. It's useful to see which device is waking us up so - * keep track of the bdaddr of the connection event that woke us up. - */ - if (event == HCI_EV_CONN_REQUEST) { - bacpy(&hdev->wake_addr, &conn_request->bdaddr); - hdev->wake_addr_type = BDADDR_BREDR; - } else if (event == HCI_EV_CONN_COMPLETE) { - bacpy(&hdev->wake_addr, &conn_complete->bdaddr); - hdev->wake_addr_type = BDADDR_BREDR; - } else if (event == HCI_EV_LE_META) { - struct hci_ev_le_meta *le_ev = (void *)skb->data; - u8 subevent = le_ev->subevent; - u8 *ptr = &skb->data[sizeof(*le_ev)]; - u8 num_reports = *ptr; - - if ((subevent == HCI_EV_LE_ADVERTISING_REPORT || - subevent == HCI_EV_LE_DIRECT_ADV_REPORT || - subevent == HCI_EV_LE_EXT_ADV_REPORT) && - num_reports) { - adv = (void *)(ptr + 1); - direct_adv = (void *)(ptr + 1); - ext_adv = (void *)(ptr + 1); - - switch (subevent) { - case HCI_EV_LE_ADVERTISING_REPORT: - bacpy(&hdev->wake_addr, &adv->bdaddr); - hdev->wake_addr_type = adv->bdaddr_type; - break; - case HCI_EV_LE_DIRECT_ADV_REPORT: - bacpy(&hdev->wake_addr, &direct_adv->bdaddr); - hdev->wake_addr_type = direct_adv->bdaddr_type; - break; - case HCI_EV_LE_EXT_ADV_REPORT: - bacpy(&hdev->wake_addr, &ext_adv->bdaddr); - hdev->wake_addr_type = ext_adv->bdaddr_type; - break; - } - } - } else { - hdev->wake_reason = MGMT_WAKE_REASON_UNEXPECTED; - } - -unlock: - hci_dev_unlock(hdev); + bacpy(&hdev->wake_addr, bdaddr); + hdev->wake_addr_type = addr_type; } #define HCI_EV_VL(_op, _func, _min_len, _max_len) \ @@ -7830,14 +7805,15 @@ void hci_event_packet(struct hci_dev *hdev, struct sk_buff *skb) skb_pull(skb, HCI_EVENT_HDR_SIZE); - /* Store wake reason if we're suspended */ - hci_store_wake_reason(hdev, event, skb); - bt_dev_dbg(hdev, "event 0x%2.2x", event); hci_event_func(hdev, event, skb, &opcode, &status, &req_complete, &req_complete_skb); + hci_dev_lock(hdev); + hci_store_wake_reason(hdev, NULL, 0); + hci_dev_unlock(hdev); + if (req_complete) { req_complete(hdev, status, opcode); } else if (req_complete_skb) { From 2969554bcfccb5c609f6b6cd4a014933f3a66dd0 Mon Sep 17 00:00:00 2001 From: Pauli Virtanen Date: Wed, 25 Mar 2026 21:07:43 +0200 Subject: [PATCH 369/401] Bluetooth: hci_sync: hci_cmd_sync_queue_once() return -EEXIST if exists hci_cmd_sync_queue_once() needs to indicate whether a queue item was added, so caller can know if callbacks are called, so it can avoid leaking resources. Change the function to return -EEXIST if queue item already exists. Modify all callsites to handle that. Signed-off-by: Pauli Virtanen Signed-off-by: Luiz Augusto von Dentz --- net/bluetooth/hci_sync.c | 53 +++++++++++++++++++++++++++------------- 1 file changed, 36 insertions(+), 17 deletions(-) diff --git a/net/bluetooth/hci_sync.c b/net/bluetooth/hci_sync.c index 6283a4df78b0..97745710e3ce 100644 --- a/net/bluetooth/hci_sync.c +++ b/net/bluetooth/hci_sync.c @@ -780,7 +780,7 @@ int hci_cmd_sync_queue_once(struct hci_dev *hdev, hci_cmd_sync_work_func_t func, void *data, hci_cmd_sync_work_destroy_t destroy) { if (hci_cmd_sync_lookup_entry(hdev, func, data, destroy)) - return 0; + return -EEXIST; return hci_cmd_sync_queue(hdev, func, data, destroy); } @@ -3262,6 +3262,8 @@ static int update_passive_scan_sync(struct hci_dev *hdev, void *data) int hci_update_passive_scan(struct hci_dev *hdev) { + int err; + /* Only queue if it would have any effect */ if (!test_bit(HCI_UP, &hdev->flags) || test_bit(HCI_INIT, &hdev->flags) || @@ -3271,8 +3273,9 @@ int hci_update_passive_scan(struct hci_dev *hdev) hci_dev_test_flag(hdev, HCI_UNREGISTER)) return 0; - return hci_cmd_sync_queue_once(hdev, update_passive_scan_sync, NULL, - NULL); + err = hci_cmd_sync_queue_once(hdev, update_passive_scan_sync, NULL, + NULL); + return (err == -EEXIST) ? 0 : err; } int hci_write_sc_support_sync(struct hci_dev *hdev, u8 val) @@ -6965,8 +6968,11 @@ static int hci_acl_create_conn_sync(struct hci_dev *hdev, void *data) int hci_connect_acl_sync(struct hci_dev *hdev, struct hci_conn *conn) { - return hci_cmd_sync_queue_once(hdev, hci_acl_create_conn_sync, conn, - NULL); + int err; + + err = hci_cmd_sync_queue_once(hdev, hci_acl_create_conn_sync, conn, + NULL); + return (err == -EEXIST) ? 0 : err; } static void create_le_conn_complete(struct hci_dev *hdev, void *data, int err) @@ -7002,8 +7008,11 @@ static void create_le_conn_complete(struct hci_dev *hdev, void *data, int err) int hci_connect_le_sync(struct hci_dev *hdev, struct hci_conn *conn) { - return hci_cmd_sync_queue_once(hdev, hci_le_create_conn_sync, conn, - create_le_conn_complete); + int err; + + err = hci_cmd_sync_queue_once(hdev, hci_le_create_conn_sync, conn, + create_le_conn_complete); + return (err == -EEXIST) ? 0 : err; } int hci_cancel_connect_sync(struct hci_dev *hdev, struct hci_conn *conn) @@ -7210,8 +7219,11 @@ static int hci_le_pa_create_sync(struct hci_dev *hdev, void *data) int hci_connect_pa_sync(struct hci_dev *hdev, struct hci_conn *conn) { - return hci_cmd_sync_queue_once(hdev, hci_le_pa_create_sync, conn, - create_pa_complete); + int err; + + err = hci_cmd_sync_queue_once(hdev, hci_le_pa_create_sync, conn, + create_pa_complete); + return (err == -EEXIST) ? 0 : err; } static void create_big_complete(struct hci_dev *hdev, void *data, int err) @@ -7273,8 +7285,11 @@ static int hci_le_big_create_sync(struct hci_dev *hdev, void *data) int hci_connect_big_sync(struct hci_dev *hdev, struct hci_conn *conn) { - return hci_cmd_sync_queue_once(hdev, hci_le_big_create_sync, conn, - create_big_complete); + int err; + + err = hci_cmd_sync_queue_once(hdev, hci_le_big_create_sync, conn, + create_big_complete); + return (err == -EEXIST) ? 0 : err; } struct past_data { @@ -7366,7 +7381,7 @@ int hci_past_sync(struct hci_conn *conn, struct hci_conn *le) if (err) kfree(data); - return err; + return (err == -EEXIST) ? 0 : err; } static void le_read_features_complete(struct hci_dev *hdev, void *data, int err) @@ -7453,7 +7468,7 @@ int hci_le_read_remote_features(struct hci_conn *conn) else err = -EOPNOTSUPP; - return err; + return (err == -EEXIST) ? 0 : err; } static void pkt_type_changed(struct hci_dev *hdev, void *data, int err) @@ -7479,6 +7494,7 @@ int hci_acl_change_pkt_type(struct hci_conn *conn, u16 pkt_type) { struct hci_dev *hdev = conn->hdev; struct hci_cp_change_conn_ptype *cp; + int err; cp = kmalloc_obj(*cp); if (!cp) @@ -7487,8 +7503,9 @@ int hci_acl_change_pkt_type(struct hci_conn *conn, u16 pkt_type) cp->handle = cpu_to_le16(conn->handle); cp->pkt_type = cpu_to_le16(pkt_type); - return hci_cmd_sync_queue_once(hdev, hci_change_conn_ptype_sync, cp, - pkt_type_changed); + err = hci_cmd_sync_queue_once(hdev, hci_change_conn_ptype_sync, cp, + pkt_type_changed); + return (err == -EEXIST) ? 0 : err; } static void le_phy_update_complete(struct hci_dev *hdev, void *data, int err) @@ -7514,6 +7531,7 @@ int hci_le_set_phy(struct hci_conn *conn, u8 tx_phys, u8 rx_phys) { struct hci_dev *hdev = conn->hdev; struct hci_cp_le_set_phy *cp; + int err; cp = kmalloc_obj(*cp); if (!cp) @@ -7524,6 +7542,7 @@ int hci_le_set_phy(struct hci_conn *conn, u8 tx_phys, u8 rx_phys) cp->tx_phys = tx_phys; cp->rx_phys = rx_phys; - return hci_cmd_sync_queue_once(hdev, hci_le_set_phy_sync, cp, - le_phy_update_complete); + err = hci_cmd_sync_queue_once(hdev, hci_le_set_phy_sync, cp, + le_phy_update_complete); + return (err == -EEXIST) ? 0 : err; } From aca377208e7f7322bf4e107cdec6e7d7e8aa7a88 Mon Sep 17 00:00:00 2001 From: Pauli Virtanen Date: Wed, 25 Mar 2026 21:07:44 +0200 Subject: [PATCH 370/401] Bluetooth: hci_sync: fix leaks when hci_cmd_sync_queue_once fails When hci_cmd_sync_queue_once() returns with error, the destroy callback will not be called. Fix leaking references / memory on these failures. Signed-off-by: Pauli Virtanen Signed-off-by: Luiz Augusto von Dentz --- net/bluetooth/hci_sync.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/net/bluetooth/hci_sync.c b/net/bluetooth/hci_sync.c index 97745710e3ce..8cbbba50e77e 100644 --- a/net/bluetooth/hci_sync.c +++ b/net/bluetooth/hci_sync.c @@ -7460,13 +7460,16 @@ int hci_le_read_remote_features(struct hci_conn *conn) * role is possible. Otherwise just transition into the * connected state without requesting the remote features. */ - if (conn->out || (hdev->le_features[0] & HCI_LE_PERIPHERAL_FEATURES)) + if (conn->out || (hdev->le_features[0] & HCI_LE_PERIPHERAL_FEATURES)) { err = hci_cmd_sync_queue_once(hdev, hci_le_read_remote_features_sync, hci_conn_hold(conn), le_read_features_complete); - else + if (err) + hci_conn_drop(conn); + } else { err = -EOPNOTSUPP; + } return (err == -EEXIST) ? 0 : err; } @@ -7505,6 +7508,9 @@ int hci_acl_change_pkt_type(struct hci_conn *conn, u16 pkt_type) err = hci_cmd_sync_queue_once(hdev, hci_change_conn_ptype_sync, cp, pkt_type_changed); + if (err) + kfree(cp); + return (err == -EEXIST) ? 0 : err; } @@ -7544,5 +7550,8 @@ int hci_le_set_phy(struct hci_conn *conn, u8 tx_phys, u8 rx_phys) err = hci_cmd_sync_queue_once(hdev, hci_le_set_phy_sync, cp, le_phy_update_complete); + if (err) + kfree(cp); + return (err == -EEXIST) ? 0 : err; } From 035c25007c9e698bef3826070ee34bb6d778020c Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Wed, 25 Mar 2026 11:11:46 -0400 Subject: [PATCH 371/401] Bluetooth: hci_sync: Fix UAF in le_read_features_complete This fixes the following backtrace caused by hci_conn being freed before le_read_features_complete but after hci_le_read_remote_features_sync so hci_conn_del -> hci_cmd_sync_dequeue is not able to prevent it: ================================================================== BUG: KASAN: slab-use-after-free in instrument_atomic_read_write include/linux/instrumented.h:96 [inline] BUG: KASAN: slab-use-after-free in atomic_dec_and_test include/linux/atomic/atomic-instrumented.h:1383 [inline] BUG: KASAN: slab-use-after-free in hci_conn_drop include/net/bluetooth/hci_core.h:1688 [inline] BUG: KASAN: slab-use-after-free in le_read_features_complete+0x5b/0x340 net/bluetooth/hci_sync.c:7344 Write of size 4 at addr ffff8880796b0010 by task kworker/u9:0/52 CPU: 0 UID: 0 PID: 52 Comm: kworker/u9:0 Not tainted syzkaller #0 PREEMPT(full) Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 10/25/2025 Workqueue: hci0 hci_cmd_sync_work Call Trace: __dump_stack lib/dump_stack.c:94 [inline] dump_stack_lvl+0x116/0x1f0 lib/dump_stack.c:120 print_address_description mm/kasan/report.c:378 [inline] print_report+0xcd/0x630 mm/kasan/report.c:482 kasan_report+0xe0/0x110 mm/kasan/report.c:595 check_region_inline mm/kasan/generic.c:194 [inline] kasan_check_range+0x100/0x1b0 mm/kasan/generic.c:200 instrument_atomic_read_write include/linux/instrumented.h:96 [inline] atomic_dec_and_test include/linux/atomic/atomic-instrumented.h:1383 [inline] hci_conn_drop include/net/bluetooth/hci_core.h:1688 [inline] le_read_features_complete+0x5b/0x340 net/bluetooth/hci_sync.c:7344 hci_cmd_sync_work+0x1ff/0x430 net/bluetooth/hci_sync.c:334 process_one_work+0x9ba/0x1b20 kernel/workqueue.c:3257 process_scheduled_works kernel/workqueue.c:3340 [inline] worker_thread+0x6c8/0xf10 kernel/workqueue.c:3421 kthread+0x3c5/0x780 kernel/kthread.c:463 ret_from_fork+0x983/0xb10 arch/x86/kernel/process.c:158 ret_from_fork_asm+0x1a/0x30 arch/x86/entry/entry_64.S:246 Allocated by task 5932: kasan_save_stack+0x33/0x60 mm/kasan/common.c:56 kasan_save_track+0x14/0x30 mm/kasan/common.c:77 poison_kmalloc_redzone mm/kasan/common.c:400 [inline] __kasan_kmalloc+0xaa/0xb0 mm/kasan/common.c:417 kmalloc_noprof include/linux/slab.h:957 [inline] kzalloc_noprof include/linux/slab.h:1094 [inline] __hci_conn_add+0xf8/0x1c70 net/bluetooth/hci_conn.c:963 hci_conn_add_unset+0x76/0x100 net/bluetooth/hci_conn.c:1084 le_conn_complete_evt+0x639/0x1f20 net/bluetooth/hci_event.c:5714 hci_le_enh_conn_complete_evt+0x23d/0x380 net/bluetooth/hci_event.c:5861 hci_le_meta_evt+0x357/0x5e0 net/bluetooth/hci_event.c:7408 hci_event_func net/bluetooth/hci_event.c:7716 [inline] hci_event_packet+0x685/0x11c0 net/bluetooth/hci_event.c:7773 hci_rx_work+0x2c9/0xeb0 net/bluetooth/hci_core.c:4076 process_one_work+0x9ba/0x1b20 kernel/workqueue.c:3257 process_scheduled_works kernel/workqueue.c:3340 [inline] worker_thread+0x6c8/0xf10 kernel/workqueue.c:3421 kthread+0x3c5/0x780 kernel/kthread.c:463 ret_from_fork+0x983/0xb10 arch/x86/kernel/process.c:158 ret_from_fork_asm+0x1a/0x30 arch/x86/entry/entry_64.S:246 Freed by task 5932: kasan_save_stack+0x33/0x60 mm/kasan/common.c:56 kasan_save_track+0x14/0x30 mm/kasan/common.c:77 __kasan_save_free_info+0x3b/0x60 mm/kasan/generic.c:587 kasan_save_free_info mm/kasan/kasan.h:406 [inline] poison_slab_object mm/kasan/common.c:252 [inline] __kasan_slab_free+0x5f/0x80 mm/kasan/common.c:284 kasan_slab_free include/linux/kasan.h:234 [inline] slab_free_hook mm/slub.c:2540 [inline] slab_free mm/slub.c:6663 [inline] kfree+0x2f8/0x6e0 mm/slub.c:6871 device_release+0xa4/0x240 drivers/base/core.c:2565 kobject_cleanup lib/kobject.c:689 [inline] kobject_release lib/kobject.c:720 [inline] kref_put include/linux/kref.h:65 [inline] kobject_put+0x1e7/0x590 lib/kobject.c:737 put_device drivers/base/core.c:3797 [inline] device_unregister+0x2f/0xc0 drivers/base/core.c:3920 hci_conn_del_sysfs+0xb4/0x180 net/bluetooth/hci_sysfs.c:79 hci_conn_cleanup net/bluetooth/hci_conn.c:173 [inline] hci_conn_del+0x657/0x1180 net/bluetooth/hci_conn.c:1234 hci_disconn_complete_evt+0x410/0xa00 net/bluetooth/hci_event.c:3451 hci_event_func net/bluetooth/hci_event.c:7719 [inline] hci_event_packet+0xa10/0x11c0 net/bluetooth/hci_event.c:7773 hci_rx_work+0x2c9/0xeb0 net/bluetooth/hci_core.c:4076 process_one_work+0x9ba/0x1b20 kernel/workqueue.c:3257 process_scheduled_works kernel/workqueue.c:3340 [inline] worker_thread+0x6c8/0xf10 kernel/workqueue.c:3421 kthread+0x3c5/0x780 kernel/kthread.c:463 ret_from_fork+0x983/0xb10 arch/x86/kernel/process.c:158 ret_from_fork_asm+0x1a/0x30 arch/x86/entry/entry_64.S:246 The buggy address belongs to the object at ffff8880796b0000 which belongs to the cache kmalloc-8k of size 8192 The buggy address is located 16 bytes inside of freed 8192-byte region [ffff8880796b0000, ffff8880796b2000) The buggy address belongs to the physical page: page: refcount:0 mapcount:0 mapping:0000000000000000 index:0x0 pfn:0x796b0 head: order:3 mapcount:0 entire_mapcount:0 nr_pages_mapped:0 pincount:0 anon flags: 0xfff00000000040(head|node=0|zone=1|lastcpupid=0x7ff) page_type: f5(slab) raw: 00fff00000000040 ffff88813ff27280 0000000000000000 0000000000000001 raw: 0000000000000000 0000000000020002 00000000f5000000 0000000000000000 head: 00fff00000000040 ffff88813ff27280 0000000000000000 0000000000000001 head: 0000000000000000 0000000000020002 00000000f5000000 0000000000000000 head: 00fff00000000003 ffffea0001e5ac01 00000000ffffffff 00000000ffffffff head: ffffffffffffffff 0000000000000000 00000000ffffffff 0000000000000008 page dumped because: kasan: bad access detected page_owner tracks the page as allocated page last allocated via order 3, migratetype Unmovable, gfp_mask 0xd2040(__GFP_IO|__GFP_NOWARN|__GFP_NORETRY|__GFP_COMP|__GFP_NOMEMALLOC), pid 5657, tgid 5657 (dhcpcd-run-hook), ts 79819636908, free_ts 79814310558 set_page_owner include/linux/page_owner.h:32 [inline] post_alloc_hook+0x1af/0x220 mm/page_alloc.c:1845 prep_new_page mm/page_alloc.c:1853 [inline] get_page_from_freelist+0xd0b/0x31a0 mm/page_alloc.c:3879 __alloc_frozen_pages_noprof+0x25f/0x2440 mm/page_alloc.c:5183 alloc_pages_mpol+0x1fb/0x550 mm/mempolicy.c:2416 alloc_slab_page mm/slub.c:3075 [inline] allocate_slab mm/slub.c:3248 [inline] new_slab+0x2c3/0x430 mm/slub.c:3302 ___slab_alloc+0xe18/0x1c90 mm/slub.c:4651 __slab_alloc.constprop.0+0x63/0x110 mm/slub.c:4774 __slab_alloc_node mm/slub.c:4850 [inline] slab_alloc_node mm/slub.c:5246 [inline] __kmalloc_cache_noprof+0x477/0x800 mm/slub.c:5766 kmalloc_noprof include/linux/slab.h:957 [inline] kzalloc_noprof include/linux/slab.h:1094 [inline] tomoyo_print_bprm security/tomoyo/audit.c:26 [inline] tomoyo_init_log+0xc8a/0x2140 security/tomoyo/audit.c:264 tomoyo_supervisor+0x302/0x13b0 security/tomoyo/common.c:2198 tomoyo_audit_env_log security/tomoyo/environ.c:36 [inline] tomoyo_env_perm+0x191/0x200 security/tomoyo/environ.c:63 tomoyo_environ security/tomoyo/domain.c:672 [inline] tomoyo_find_next_domain+0xec1/0x20b0 security/tomoyo/domain.c:888 tomoyo_bprm_check_security security/tomoyo/tomoyo.c:102 [inline] tomoyo_bprm_check_security+0x12d/0x1d0 security/tomoyo/tomoyo.c:92 security_bprm_check+0x1b9/0x1e0 security/security.c:794 search_binary_handler fs/exec.c:1659 [inline] exec_binprm fs/exec.c:1701 [inline] bprm_execve fs/exec.c:1753 [inline] bprm_execve+0x81e/0x1620 fs/exec.c:1729 do_execveat_common.isra.0+0x4a5/0x610 fs/exec.c:1859 page last free pid 5657 tgid 5657 stack trace: reset_page_owner include/linux/page_owner.h:25 [inline] free_pages_prepare mm/page_alloc.c:1394 [inline] __free_frozen_pages+0x7df/0x1160 mm/page_alloc.c:2901 discard_slab mm/slub.c:3346 [inline] __put_partials+0x130/0x170 mm/slub.c:3886 qlink_free mm/kasan/quarantine.c:163 [inline] qlist_free_all+0x4c/0xf0 mm/kasan/quarantine.c:179 kasan_quarantine_reduce+0x195/0x1e0 mm/kasan/quarantine.c:286 __kasan_slab_alloc+0x69/0x90 mm/kasan/common.c:352 kasan_slab_alloc include/linux/kasan.h:252 [inline] slab_post_alloc_hook mm/slub.c:4948 [inline] slab_alloc_node mm/slub.c:5258 [inline] __kmalloc_cache_noprof+0x274/0x800 mm/slub.c:5766 kmalloc_noprof include/linux/slab.h:957 [inline] tomoyo_print_header security/tomoyo/audit.c:156 [inline] tomoyo_init_log+0x197/0x2140 security/tomoyo/audit.c:255 tomoyo_supervisor+0x302/0x13b0 security/tomoyo/common.c:2198 tomoyo_audit_env_log security/tomoyo/environ.c:36 [inline] tomoyo_env_perm+0x191/0x200 security/tomoyo/environ.c:63 tomoyo_environ security/tomoyo/domain.c:672 [inline] tomoyo_find_next_domain+0xec1/0x20b0 security/tomoyo/domain.c:888 tomoyo_bprm_check_security security/tomoyo/tomoyo.c:102 [inline] tomoyo_bprm_check_security+0x12d/0x1d0 security/tomoyo/tomoyo.c:92 security_bprm_check+0x1b9/0x1e0 security/security.c:794 search_binary_handler fs/exec.c:1659 [inline] exec_binprm fs/exec.c:1701 [inline] bprm_execve fs/exec.c:1753 [inline] bprm_execve+0x81e/0x1620 fs/exec.c:1729 do_execveat_common.isra.0+0x4a5/0x610 fs/exec.c:1859 do_execve fs/exec.c:1933 [inline] __do_sys_execve fs/exec.c:2009 [inline] __se_sys_execve fs/exec.c:2004 [inline] __x64_sys_execve+0x8e/0xb0 fs/exec.c:2004 do_syscall_x64 arch/x86/entry/syscall_64.c:63 [inline] do_syscall_64+0xcd/0xf80 arch/x86/entry/syscall_64.c:94 Memory state around the buggy address: ffff8880796aff00: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc ffff8880796aff80: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc >ffff8880796b0000: fa fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ^ ffff8880796b0080: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ffff8880796b0100: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ================================================================== Fixes: a106e50be74b ("Bluetooth: HCI: Add support for LL Extended Feature Set") Reported-by: syzbot+87badbb9094e008e0685@syzkaller.appspotmail.com Tested-by: syzbot+87badbb9094e008e0685@syzkaller.appspotmail.com Closes: https://syzbot.org/bug?extid=87badbb9094e008e0685 Signed-off-by: Luiz Augusto von Dentz Signed-off-by: Pauli Virtanen --- net/bluetooth/hci_sync.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/net/bluetooth/hci_sync.c b/net/bluetooth/hci_sync.c index 8cbbba50e77e..ffb0ceda6f7b 100644 --- a/net/bluetooth/hci_sync.c +++ b/net/bluetooth/hci_sync.c @@ -7390,10 +7390,8 @@ static void le_read_features_complete(struct hci_dev *hdev, void *data, int err) bt_dev_dbg(hdev, "err %d", err); - if (err == -ECANCELED) - return; - hci_conn_drop(conn); + hci_conn_put(conn); } static int hci_le_read_all_remote_features_sync(struct hci_dev *hdev, @@ -7463,10 +7461,12 @@ int hci_le_read_remote_features(struct hci_conn *conn) if (conn->out || (hdev->le_features[0] & HCI_LE_PERIPHERAL_FEATURES)) { err = hci_cmd_sync_queue_once(hdev, hci_le_read_remote_features_sync, - hci_conn_hold(conn), + hci_conn_hold(hci_conn_get(conn)), le_read_features_complete); - if (err) + if (err) { hci_conn_drop(conn); + hci_conn_put(conn); + } } else { err = -EOPNOTSUPP; } From 0ffac654e95c1bdfe2d4edf28fb18d6ba1f103e6 Mon Sep 17 00:00:00 2001 From: Jonathan Rissanen Date: Fri, 27 Mar 2026 11:47:21 +0100 Subject: [PATCH 372/401] Bluetooth: hci_h4: Fix race during initialization Commit 5df5dafc171b ("Bluetooth: hci_uart: Fix another race during initialization") fixed a race for hci commands sent during initialization. However, there is still a race that happens if an hci event from one of these commands is received before HCI_UART_REGISTERED has been set at the end of hci_uart_register_dev(). The event will be ignored which causes the command to fail with a timeout in the log: "Bluetooth: hci0: command 0x1003 tx timeout" This is because the hci event receive path (hci_uart_tty_receive -> h4_recv) requires HCI_UART_REGISTERED to be set in h4_recv(), while the hci command transmit path (hci_uart_send_frame -> h4_enqueue) only requires HCI_UART_PROTO_INIT to be set in hci_uart_send_frame(). The check for HCI_UART_REGISTERED was originally added in commit c2578202919a ("Bluetooth: Fix H4 crash from incoming UART packets") to fix a crash caused by hu->hdev being null dereferenced. That can no longer happen: once HCI_UART_PROTO_INIT is set in hci_uart_register_dev() all pointers (hu, hu->priv and hu->hdev) are valid, and hci_uart_tty_receive() already calls h4_recv() on HCI_UART_PROTO_INIT or HCI_UART_PROTO_READY. Remove the check for HCI_UART_REGISTERED in h4_recv() to fix the race condition. Fixes: 5df5dafc171b ("Bluetooth: hci_uart: Fix another race during initialization") Signed-off-by: Jonathan Rissanen Signed-off-by: Luiz Augusto von Dentz --- drivers/bluetooth/hci_h4.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/bluetooth/hci_h4.c b/drivers/bluetooth/hci_h4.c index 07cc2a79a77b..a889a66a326f 100644 --- a/drivers/bluetooth/hci_h4.c +++ b/drivers/bluetooth/hci_h4.c @@ -109,9 +109,6 @@ static int h4_recv(struct hci_uart *hu, const void *data, int count) { struct h4_struct *h4 = hu->priv; - if (!test_bit(HCI_UART_REGISTERED, &hu->flags)) - return -EUNATCH; - h4->rx_skb = h4_recv_buf(hu, h4->rx_skb, data, count, h4_recv_pkts, ARRAY_SIZE(h4_recv_pkts)); if (IS_ERR(h4->rx_skb)) { From b8dbe9648d69059cfe3a28917bfbf7e61efd7f15 Mon Sep 17 00:00:00 2001 From: Keenan Dong Date: Sat, 28 Mar 2026 16:46:47 +0800 Subject: [PATCH 373/401] Bluetooth: MGMT: validate LTK enc_size on load Load Long Term Keys stores the user-provided enc_size and later uses it to size fixed-size stack operations when replying to LE LTK requests. An enc_size larger than the 16-byte key buffer can therefore overflow the reply stack buffer. Reject oversized enc_size values while validating the management LTK record so invalid keys never reach the stored key state. Fixes: 346af67b8d11 ("Bluetooth: Add MGMT handlers for dealing with SMP LTK's") Reported-by: Keenan Dong Signed-off-by: Keenan Dong Signed-off-by: Luiz Augusto von Dentz --- net/bluetooth/mgmt.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index e5f9287fb826..adcd86c15b4e 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -7248,6 +7248,9 @@ static bool ltk_is_valid(struct mgmt_ltk_info *key) if (key->initiator != 0x00 && key->initiator != 0x01) return false; + if (key->enc_size > sizeof(key->val)) + return false; + switch (key->addr.type) { case BDADDR_LE_PUBLIC: return true; From a2639a7f0f5bf7d73f337f8f077c19415c62ed2c Mon Sep 17 00:00:00 2001 From: Pauli Virtanen Date: Sun, 29 Mar 2026 16:43:01 +0300 Subject: [PATCH 374/401] Bluetooth: hci_conn: fix potential UAF in set_cig_params_sync hci_conn lookup and field access must be covered by hdev lock in set_cig_params_sync, otherwise it's possible it is freed concurrently. Take hdev lock to prevent hci_conn from being deleted or modified concurrently. Just RCU lock is not suitable here, as we also want to avoid "tearing" in the configuration. Fixes: a091289218202 ("Bluetooth: hci_conn: Fix hci_le_set_cig_params") Signed-off-by: Pauli Virtanen Signed-off-by: Luiz Augusto von Dentz --- net/bluetooth/hci_conn.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c index e6393f17576b..11d3ad8d2551 100644 --- a/net/bluetooth/hci_conn.c +++ b/net/bluetooth/hci_conn.c @@ -1843,9 +1843,13 @@ static int set_cig_params_sync(struct hci_dev *hdev, void *data) u8 aux_num_cis = 0; u8 cis_id; + hci_dev_lock(hdev); + conn = hci_conn_hash_lookup_cig(hdev, cig_id); - if (!conn) + if (!conn) { + hci_dev_unlock(hdev); return 0; + } qos = &conn->iso_qos; pdu->cig_id = cig_id; @@ -1884,6 +1888,8 @@ static int set_cig_params_sync(struct hci_dev *hdev, void *data) } pdu->num_cis = aux_num_cis; + hci_dev_unlock(hdev); + if (!pdu->num_cis) return 0; From b255531b27da336571411248c2a72a350662bd09 Mon Sep 17 00:00:00 2001 From: Pauli Virtanen Date: Sun, 29 Mar 2026 16:43:02 +0300 Subject: [PATCH 375/401] Bluetooth: hci_event: fix potential UAF in hci_le_remote_conn_param_req_evt hci_conn lookup and field access must be covered by hdev lock in hci_le_remote_conn_param_req_evt, otherwise it's possible it is freed concurrently. Extend the hci_dev_lock critical section to cover all conn usage. Fixes: 95118dd4edfec ("Bluetooth: hci_event: Use of a function table to handle LE subevents") Signed-off-by: Pauli Virtanen Signed-off-by: Luiz Augusto von Dentz --- net/bluetooth/hci_event.c | 33 ++++++++++++++++++++------------- 1 file changed, 20 insertions(+), 13 deletions(-) diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index 81d2f9a3eec9..3ebc5e6d45d9 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -6784,25 +6784,31 @@ static void hci_le_remote_conn_param_req_evt(struct hci_dev *hdev, void *data, latency = le16_to_cpu(ev->latency); timeout = le16_to_cpu(ev->timeout); + hci_dev_lock(hdev); + hcon = hci_conn_hash_lookup_handle(hdev, handle); - if (!hcon || hcon->state != BT_CONNECTED) - return send_conn_param_neg_reply(hdev, handle, - HCI_ERROR_UNKNOWN_CONN_ID); + if (!hcon || hcon->state != BT_CONNECTED) { + send_conn_param_neg_reply(hdev, handle, + HCI_ERROR_UNKNOWN_CONN_ID); + goto unlock; + } - if (max > hcon->le_conn_max_interval) - return send_conn_param_neg_reply(hdev, handle, - HCI_ERROR_INVALID_LL_PARAMS); + if (max > hcon->le_conn_max_interval) { + send_conn_param_neg_reply(hdev, handle, + HCI_ERROR_INVALID_LL_PARAMS); + goto unlock; + } - if (hci_check_conn_params(min, max, latency, timeout)) - return send_conn_param_neg_reply(hdev, handle, - HCI_ERROR_INVALID_LL_PARAMS); + if (hci_check_conn_params(min, max, latency, timeout)) { + send_conn_param_neg_reply(hdev, handle, + HCI_ERROR_INVALID_LL_PARAMS); + goto unlock; + } if (hcon->role == HCI_ROLE_MASTER) { struct hci_conn_params *params; u8 store_hint; - hci_dev_lock(hdev); - params = hci_conn_params_lookup(hdev, &hcon->dst, hcon->dst_type); if (params) { @@ -6815,8 +6821,6 @@ static void hci_le_remote_conn_param_req_evt(struct hci_dev *hdev, void *data, store_hint = 0x00; } - hci_dev_unlock(hdev); - mgmt_new_conn_param(hdev, &hcon->dst, hcon->dst_type, store_hint, min, max, latency, timeout); } @@ -6830,6 +6834,9 @@ static void hci_le_remote_conn_param_req_evt(struct hci_dev *hdev, void *data, cp.max_ce_len = 0; hci_send_cmd(hdev, HCI_OP_LE_CONN_PARAM_REQ_REPLY, sizeof(cp), &cp); + +unlock: + hci_dev_unlock(hdev); } static void hci_le_direct_adv_report_evt(struct hci_dev *hdev, void *data, From bda93eec78cdbfe5cda00785cefebd443e56b88b Mon Sep 17 00:00:00 2001 From: Keenan Dong Date: Wed, 1 Apr 2026 22:25:26 +0800 Subject: [PATCH 376/401] Bluetooth: MGMT: validate mesh send advertising payload length mesh_send() currently bounds MGMT_OP_MESH_SEND by total command length, but it never verifies that the bytes supplied for the flexible adv_data[] array actually match the embedded adv_data_len field. MGMT_MESH_SEND_SIZE only covers the fixed header, so a truncated command can still pass the existing 20..50 byte range check and later drive the async mesh send path past the end of the queued command buffer. Keep rejecting zero-length and oversized advertising payloads, but validate adv_data_len explicitly and require the command length to exactly match the flexible array size before queueing the request. Fixes: b338d91703fa ("Bluetooth: Implement support for Mesh") Reported-by: Keenan Dong Signed-off-by: Keenan Dong Signed-off-by: Luiz Augusto von Dentz --- net/bluetooth/mgmt.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index adcd86c15b4e..b05bb380e5f8 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -2478,6 +2478,7 @@ static int mesh_send(struct sock *sk, struct hci_dev *hdev, void *data, u16 len) struct mgmt_mesh_tx *mesh_tx; struct mgmt_cp_mesh_send *send = data; struct mgmt_rp_mesh_read_features rp; + u16 expected_len; bool sending; int err = 0; @@ -2485,12 +2486,19 @@ static int mesh_send(struct sock *sk, struct hci_dev *hdev, void *data, u16 len) !hci_dev_test_flag(hdev, HCI_MESH_EXPERIMENTAL)) return mgmt_cmd_status(sk, hdev->id, MGMT_OP_MESH_SEND, MGMT_STATUS_NOT_SUPPORTED); - if (!hci_dev_test_flag(hdev, HCI_LE_ENABLED) || - len <= MGMT_MESH_SEND_SIZE || - len > (MGMT_MESH_SEND_SIZE + 31)) + if (!hci_dev_test_flag(hdev, HCI_LE_ENABLED)) return mgmt_cmd_status(sk, hdev->id, MGMT_OP_MESH_SEND, MGMT_STATUS_REJECTED); + if (!send->adv_data_len || send->adv_data_len > 31) + return mgmt_cmd_status(sk, hdev->id, MGMT_OP_MESH_SEND, + MGMT_STATUS_REJECTED); + + expected_len = struct_size(send, adv_data, send->adv_data_len); + if (expected_len != len) + return mgmt_cmd_status(sk, hdev->id, MGMT_OP_MESH_SEND, + MGMT_STATUS_INVALID_PARAMS); + hci_dev_lock(hdev); memset(&rp, 0, sizeof(rp)); From d05111bfe37bfd8bd4d2dfe6675d6bdeef43f7c7 Mon Sep 17 00:00:00 2001 From: Oleh Konko Date: Tue, 31 Mar 2026 11:52:12 +0000 Subject: [PATCH 377/401] Bluetooth: SMP: force responder MITM requirements before building the pairing response smp_cmd_pairing_req() currently builds the pairing response from the initiator auth_req before enforcing the local BT_SECURITY_HIGH requirement. If the initiator omits SMP_AUTH_MITM, the response can also omit it even though the local side still requires MITM. tk_request() then sees an auth value without SMP_AUTH_MITM and may select JUST_CFM, making method selection inconsistent with the pairing policy the responder already enforces. When the local side requires HIGH security, first verify that MITM can be achieved from the IO capabilities and then force SMP_AUTH_MITM in the response in both rsp.auth_req and auth. This keeps the responder auth bits and later method selection aligned. Fixes: 2b64d153a0cc ("Bluetooth: Add MITM mechanism to LE-SMP") Cc: stable@vger.kernel.org Suggested-by: Luiz Augusto von Dentz Signed-off-by: Oleh Konko Signed-off-by: Luiz Augusto von Dentz --- net/bluetooth/smp.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/net/bluetooth/smp.c b/net/bluetooth/smp.c index 485e3468bd26..deb8dd244b77 100644 --- a/net/bluetooth/smp.c +++ b/net/bluetooth/smp.c @@ -1826,7 +1826,7 @@ static u8 smp_cmd_pairing_req(struct l2cap_conn *conn, struct sk_buff *skb) if (sec_level > conn->hcon->pending_sec_level) conn->hcon->pending_sec_level = sec_level; - /* If we need MITM check that it can be achieved */ + /* If we need MITM check that it can be achieved. */ if (conn->hcon->pending_sec_level >= BT_SECURITY_HIGH) { u8 method; @@ -1834,6 +1834,10 @@ static u8 smp_cmd_pairing_req(struct l2cap_conn *conn, struct sk_buff *skb) req->io_capability); if (method == JUST_WORKS || method == JUST_CFM) return SMP_AUTH_REQUIREMENTS; + + /* Force MITM bit if it isn't set by the initiator. */ + auth |= SMP_AUTH_MITM; + rsp.auth_req |= SMP_AUTH_MITM; } key_size = min(req->max_key_size, rsp.max_key_size); From 20756fec2f0108cb88e815941f1ffff88dc286fe Mon Sep 17 00:00:00 2001 From: Oleh Konko Date: Tue, 31 Mar 2026 11:52:13 +0000 Subject: [PATCH 378/401] Bluetooth: SMP: derive legacy responder STK authentication from MITM state The legacy responder path in smp_random() currently labels the stored STK as authenticated whenever pending_sec_level is BT_SECURITY_HIGH. That reflects what the local service requested, not what the pairing flow actually achieved. For Just Works/Confirm legacy pairing, SMP_FLAG_MITM_AUTH stays clear and the resulting STK should remain unauthenticated even if the local side requested HIGH security. Use the established MITM state when storing the responder STK so the key metadata matches the pairing result. This also keeps the legacy path aligned with the Secure Connections code, which already treats JUST_WORKS/JUST_CFM as unauthenticated. Fixes: fff3490f4781 ("Bluetooth: Fix setting correct authentication information for SMP STK") Cc: stable@vger.kernel.org Signed-off-by: Oleh Konko Signed-off-by: Luiz Augusto von Dentz --- net/bluetooth/smp.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/net/bluetooth/smp.c b/net/bluetooth/smp.c index deb8dd244b77..98f1da4f5f55 100644 --- a/net/bluetooth/smp.c +++ b/net/bluetooth/smp.c @@ -1018,10 +1018,7 @@ static u8 smp_random(struct smp_chan *smp) smp_s1(smp->tk, smp->prnd, smp->rrnd, stk); - if (hcon->pending_sec_level == BT_SECURITY_HIGH) - auth = 1; - else - auth = 0; + auth = test_bit(SMP_FLAG_MITM_AUTH, &smp->flags) ? 1 : 0; /* Even though there's no _RESPONDER suffix this is the * responder STK we're adding for later lookup (the initiator From bc39a094730ce062fa034a529c93147c096cb488 Mon Sep 17 00:00:00 2001 From: hkbinbin Date: Tue, 31 Mar 2026 05:39:16 +0000 Subject: [PATCH 379/401] Bluetooth: hci_sync: fix stack buffer overflow in hci_le_big_create_sync MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit hci_le_big_create_sync() uses DEFINE_FLEX to allocate a struct hci_cp_le_big_create_sync on the stack with room for 0x11 (17) BIS entries. However, conn->num_bis can hold up to HCI_MAX_ISO_BIS (31) entries — validated against ISO_MAX_NUM_BIS (0x1f) in the caller hci_conn_big_create_sync(). When conn->num_bis is between 18 and 31, the memcpy that copies conn->bis into cp->bis writes up to 14 bytes past the stack buffer, corrupting adjacent stack memory. This is trivially reproducible: binding an ISO socket with bc_num_bis = ISO_MAX_NUM_BIS (31) and calling listen() will eventually trigger hci_le_big_create_sync() from the HCI command sync worker, causing a KASAN-detectable stack-out-of-bounds write: BUG: KASAN: stack-out-of-bounds in hci_le_big_create_sync+0x256/0x3b0 Write of size 31 at addr ffffc90000487b48 by task kworker/u9:0/71 Fix this by changing the DEFINE_FLEX count from the incorrect 0x11 to HCI_MAX_ISO_BIS, which matches the maximum number of BIS entries that conn->bis can actually carry. Fixes: 42ecf1947135 ("Bluetooth: ISO: Do not emit LE BIG Create Sync if previous is pending") Cc: stable@vger.kernel.org Signed-off-by: hkbinbin Reviewed-by: Paul Menzel Signed-off-by: Luiz Augusto von Dentz --- net/bluetooth/hci_sync.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/bluetooth/hci_sync.c b/net/bluetooth/hci_sync.c index ffb0ceda6f7b..919ec275dd23 100644 --- a/net/bluetooth/hci_sync.c +++ b/net/bluetooth/hci_sync.c @@ -7241,7 +7241,8 @@ static void create_big_complete(struct hci_dev *hdev, void *data, int err) static int hci_le_big_create_sync(struct hci_dev *hdev, void *data) { - DEFINE_FLEX(struct hci_cp_le_big_create_sync, cp, bis, num_bis, 0x11); + DEFINE_FLEX(struct hci_cp_le_big_create_sync, cp, bis, num_bis, + HCI_MAX_ISO_BIS); struct hci_conn *conn = data; struct bt_iso_qos *qos = &conn->iso_qos; int err; From ffb5a4843c5bde702ed17cbcdbda98b37f7a6dad Mon Sep 17 00:00:00 2001 From: Hangbin Liu Date: Tue, 31 Mar 2026 12:17:18 +0800 Subject: [PATCH 380/401] ipv6: fix data race in fib6_metric_set() using cmpxchg fib6_metric_set() may be called concurrently from softirq context without holding the FIB table lock. A typical path is: ndisc_router_discovery() spin_unlock_bh(&table->tb6_lock) <- lock released fib6_metric_set(rt, RTAX_HOPLIMIT, ...) <- lockless call When two CPUs process Router Advertisement packets for the same router simultaneously, they can both arrive at fib6_metric_set() with the same fib6_info pointer whose fib6_metrics still points to dst_default_metrics. if (f6i->fib6_metrics == &dst_default_metrics) { /* both CPUs: true */ struct dst_metrics *p = kzalloc_obj(*p, GFP_ATOMIC); refcount_set(&p->refcnt, 1); f6i->fib6_metrics = p; /* CPU1 overwrites CPU0's p -> p0 leaked */ } The dst_metrics allocated by the losing CPU has refcnt=1 but no pointer to it anywhere in memory, producing a kmemleak report: unreferenced object 0xff1100025aca1400 (size 96): comm "softirq", pid 0, jiffies 4299271239 backtrace: kmalloc_trace+0x28a/0x380 fib6_metric_set+0xcd/0x180 ndisc_router_discovery+0x12dc/0x24b0 icmpv6_rcv+0xc16/0x1360 Fix this by: - Set val for p->metrics before published via cmpxchg() so the metrics value is ready before the pointer becomes visible to other CPUs. - Replace the plain pointer store with cmpxchg() and free the allocation safely when competition failed. - Add READ_ONCE()/WRITE_ONCE() for metrics[] setting in the non-default metrics path to prevent compiler-based data races. Fixes: d4ead6b34b67 ("net/ipv6: move metrics from dst to rt6_info") Reported-by: Fei Liu Reviewed-by: Jiayuan Chen Signed-off-by: Hangbin Liu Reviewed-by: Eric Dumazet Link: https://patch.msgid.link/20260331-b4-fib6_metric_set-kmemleak-v3-1-88d27f4d8825@gmail.com Signed-off-by: Jakub Kicinski --- net/ipv6/ip6_fib.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index dd26657b6a4a..45ef4d65dcbc 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -727,20 +727,28 @@ static int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb) void fib6_metric_set(struct fib6_info *f6i, int metric, u32 val) { + struct dst_metrics *m; + if (!f6i) return; - if (f6i->fib6_metrics == &dst_default_metrics) { + if (READ_ONCE(f6i->fib6_metrics) == &dst_default_metrics) { + struct dst_metrics *dflt = (struct dst_metrics *)&dst_default_metrics; struct dst_metrics *p = kzalloc_obj(*p, GFP_ATOMIC); if (!p) return; + p->metrics[metric - 1] = val; refcount_set(&p->refcnt, 1); - f6i->fib6_metrics = p; + if (cmpxchg(&f6i->fib6_metrics, dflt, p) != dflt) + kfree(p); + else + return; } - f6i->fib6_metrics->metrics[metric - 1] = val; + m = READ_ONCE(f6i->fib6_metrics); + WRITE_ONCE(m->metrics[metric - 1], val); } /* From a54ecccfae62c5c85259ae5ea5d9c20009519049 Mon Sep 17 00:00:00 2001 From: Weiming Shi Date: Tue, 31 Mar 2026 00:32:38 +0800 Subject: [PATCH 381/401] rds: ib: reject FRMR registration before IB connection is established rds_ib_get_mr() extracts the rds_ib_connection from conn->c_transport_data and passes it to rds_ib_reg_frmr() for FRWR memory registration. On a fresh outgoing connection, ic is allocated in rds_ib_conn_alloc() with i_cm_id = NULL because the connection worker has not yet called rds_ib_conn_path_connect() to create the rdma_cm_id. When sendmsg() with RDS_CMSG_RDMA_MAP is called on such a connection, the sendmsg path parses the control message before any connection establishment, allowing rds_ib_post_reg_frmr() to dereference ic->i_cm_id->qp and crash the kernel. The existing guard in rds_ib_reg_frmr() only checks for !ic (added in commit 9e630bcb7701), which does not catch this case since ic is allocated early and is always non-NULL once the connection object exists. KASAN: null-ptr-deref in range [0x0000000000000010-0x0000000000000017] RIP: 0010:rds_ib_post_reg_frmr+0x50e/0x920 Call Trace: rds_ib_post_reg_frmr (net/rds/ib_frmr.c:167) rds_ib_map_frmr (net/rds/ib_frmr.c:252) rds_ib_reg_frmr (net/rds/ib_frmr.c:430) rds_ib_get_mr (net/rds/ib_rdma.c:615) __rds_rdma_map (net/rds/rdma.c:295) rds_cmsg_rdma_map (net/rds/rdma.c:860) rds_sendmsg (net/rds/send.c:1363) ____sys_sendmsg do_syscall_64 Add a check in rds_ib_get_mr() that verifies ic, i_cm_id, and qp are all non-NULL before proceeding with FRMR registration, mirroring the guard already present in rds_ib_post_inv(). Return -ENODEV when the connection is not ready, which the existing error handling in rds_cmsg_send() converts to -EAGAIN for userspace retry and triggers rds_conn_connect_if_down() to start the connection worker. Fixes: 1659185fb4d0 ("RDS: IB: Support Fastreg MR (FRMR) memory registration mode") Reported-by: Xiang Mei Signed-off-by: Weiming Shi Reviewed-by: Allison Henderson Link: https://patch.msgid.link/20260330163237.2752440-2-bestswngs@gmail.com Signed-off-by: Jakub Kicinski --- net/rds/ib_rdma.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/net/rds/ib_rdma.c b/net/rds/ib_rdma.c index 077f7041df15..2cfec252eeac 100644 --- a/net/rds/ib_rdma.c +++ b/net/rds/ib_rdma.c @@ -604,8 +604,13 @@ void *rds_ib_get_mr(struct scatterlist *sg, unsigned long nents, return ibmr; } - if (conn) + if (conn) { ic = conn->c_transport_data; + if (!ic || !ic->i_cm_id || !ic->i_cm_id->qp) { + ret = -ENODEV; + goto out; + } + } if (!rds_ibdev->mr_8k_pool || !rds_ibdev->mr_1m_pool) { ret = -ENODEV; From d64cb81dcbd54927515a7f65e5e24affdc73c14b Mon Sep 17 00:00:00 2001 From: Yucheng Lu Date: Tue, 31 Mar 2026 16:00:21 +0800 Subject: [PATCH 382/401] net/sched: sch_netem: fix out-of-bounds access in packet corruption In netem_enqueue(), the packet corruption logic uses get_random_u32_below(skb_headlen(skb)) to select an index for modifying skb->data. When an AF_PACKET TX_RING sends fully non-linear packets over an IPIP tunnel, skb_headlen(skb) evaluates to 0. Passing 0 to get_random_u32_below() takes the variable-ceil slow path which returns an unconstrained 32-bit random integer. Using this unconstrained value as an offset into skb->data results in an out-of-bounds memory access. Fix this by verifying skb_headlen(skb) is non-zero before attempting to corrupt the linear data area. Fully non-linear packets will silently bypass the corruption logic. Fixes: c865e5d99e25 ("[PKT_SCHED] netem: packet corruption option") Reported-by: Yifan Wu Reported-by: Juefei Pu Signed-off-by: Yuan Tan Signed-off-by: Xin Liu Signed-off-by: Yuhang Zheng Signed-off-by: Yucheng Lu Reviewed-by: Stephen Hemminger Link: https://patch.msgid.link/45435c0935df877853a81e6d06205ac738ec65fa.1774941614.git.kanolyc@gmail.com Signed-off-by: Jakub Kicinski --- net/sched/sch_netem.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c index 5de1c932944a..20df1c08b1e9 100644 --- a/net/sched/sch_netem.c +++ b/net/sched/sch_netem.c @@ -519,8 +519,9 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch, goto finish_segs; } - skb->data[get_random_u32_below(skb_headlen(skb))] ^= - 1<data[get_random_u32_below(skb_headlen(skb))] ^= + 1 << get_random_u32_below(8); } if (unlikely(q->t_len >= sch->limit)) { From b4e5f04c58a29c499faa85d12952ca9a4faf1cb9 Mon Sep 17 00:00:00 2001 From: Srujana Challa Date: Thu, 26 Mar 2026 19:53:44 +0530 Subject: [PATCH 383/401] virtio_net: clamp rss_max_key_size to NETDEV_RSS_KEY_LEN rss_max_key_size in the virtio spec is the maximum key size supported by the device, not a mandatory size the driver must use. Also the value 40 is a spec minimum, not a spec maximum. The current code rejects RSS and can fail probe when the device reports a larger rss_max_key_size than the driver buffer limit. Instead, clamp the effective key length to min(device rss_max_key_size, NETDEV_RSS_KEY_LEN) and keep RSS enabled. This keeps probe working on devices that advertise larger maximum key sizes while respecting the netdev RSS key buffer size limit. Fixes: 3f7d9c1964fc ("virtio_net: Add hash_key_length check") Cc: stable@vger.kernel.org Signed-off-by: Srujana Challa Acked-by: Michael S. Tsirkin Link: https://patch.msgid.link/20260326142344.1171317-1-schalla@marvell.com Signed-off-by: Jakub Kicinski --- drivers/net/virtio_net.c | 20 +++++++++----------- 1 file changed, 9 insertions(+), 11 deletions(-) diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index ab2108ee206a..c0b9bc5574e2 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -381,8 +381,6 @@ struct receive_queue { struct xdp_buff **xsk_buffs; }; -#define VIRTIO_NET_RSS_MAX_KEY_SIZE 40 - /* Control VQ buffers: protected by the rtnl lock */ struct control_buf { struct virtio_net_ctrl_hdr hdr; @@ -486,7 +484,7 @@ struct virtnet_info { /* Must be last as it ends in a flexible-array member. */ TRAILING_OVERLAP(struct virtio_net_rss_config_trailer, rss_trailer, hash_key_data, - u8 rss_hash_key_data[VIRTIO_NET_RSS_MAX_KEY_SIZE]; + u8 rss_hash_key_data[NETDEV_RSS_KEY_LEN]; ); }; static_assert(offsetof(struct virtnet_info, rss_trailer.hash_key_data) == @@ -6708,6 +6706,7 @@ static int virtnet_probe(struct virtio_device *vdev) struct virtnet_info *vi; u16 max_queue_pairs; int mtu = 0; + u16 key_sz; /* Find if host supports multiqueue/rss virtio_net device */ max_queue_pairs = 1; @@ -6842,14 +6841,13 @@ static int virtnet_probe(struct virtio_device *vdev) } if (vi->has_rss || vi->has_rss_hash_report) { - vi->rss_key_size = - virtio_cread8(vdev, offsetof(struct virtio_net_config, rss_max_key_size)); - if (vi->rss_key_size > VIRTIO_NET_RSS_MAX_KEY_SIZE) { - dev_err(&vdev->dev, "rss_max_key_size=%u exceeds the limit %u.\n", - vi->rss_key_size, VIRTIO_NET_RSS_MAX_KEY_SIZE); - err = -EINVAL; - goto free; - } + key_sz = virtio_cread8(vdev, offsetof(struct virtio_net_config, rss_max_key_size)); + + vi->rss_key_size = min_t(u16, key_sz, NETDEV_RSS_KEY_LEN); + if (key_sz > vi->rss_key_size) + dev_warn(&vdev->dev, + "rss_max_key_size=%u exceeds driver limit %u, clamping\n", + key_sz, vi->rss_key_size); vi->rss_hash_types_supported = virtio_cread32(vdev, offsetof(struct virtio_net_config, supported_hash_types)); From ce8fe5287b87e24e225c342f3b0ec04f0b3680fe Mon Sep 17 00:00:00 2001 From: Fedor Pchelkin Date: Mon, 30 Mar 2026 21:45:40 +0300 Subject: [PATCH 384/401] net: macb: fix clk handling on PCI glue driver removal platform_device_unregister() may still want to use the registered clks during runtime resume callback. Note that there is a commit d82d5303c4c5 ("net: macb: fix use after free on rmmod") that addressed the similar problem of clk vs platform device unregistration but just moved the bug to another place. Save the pointers to clks into local variables for reuse after platform device is unregistered. BUG: KASAN: use-after-free in clk_prepare+0x5a/0x60 Read of size 8 at addr ffff888104f85e00 by task modprobe/597 CPU: 2 PID: 597 Comm: modprobe Not tainted 6.1.164+ #114 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.16.1-0-g3208b098f51a-prebuilt.qemu.org 04/01/2014 Call Trace: dump_stack_lvl+0x8d/0xba print_report+0x17f/0x496 kasan_report+0xd9/0x180 clk_prepare+0x5a/0x60 macb_runtime_resume+0x13d/0x410 [macb] pm_generic_runtime_resume+0x97/0xd0 __rpm_callback+0xc8/0x4d0 rpm_callback+0xf6/0x230 rpm_resume+0xeeb/0x1a70 __pm_runtime_resume+0xb4/0x170 bus_remove_device+0x2e3/0x4b0 device_del+0x5b3/0xdc0 platform_device_del+0x4e/0x280 platform_device_unregister+0x11/0x50 pci_device_remove+0xae/0x210 device_remove+0xcb/0x180 device_release_driver_internal+0x529/0x770 driver_detach+0xd4/0x1a0 bus_remove_driver+0x135/0x260 driver_unregister+0x72/0xb0 pci_unregister_driver+0x26/0x220 __do_sys_delete_module+0x32e/0x550 do_syscall_64+0x35/0x80 entry_SYSCALL_64_after_hwframe+0x6e/0xd8 Allocated by task 519: kasan_save_stack+0x2c/0x50 kasan_set_track+0x21/0x30 __kasan_kmalloc+0x8e/0x90 __clk_register+0x458/0x2890 clk_hw_register+0x1a/0x60 __clk_hw_register_fixed_rate+0x255/0x410 clk_register_fixed_rate+0x3c/0xa0 macb_probe+0x1d8/0x42e [macb_pci] local_pci_probe+0xd7/0x190 pci_device_probe+0x252/0x600 really_probe+0x255/0x7f0 __driver_probe_device+0x1ee/0x330 driver_probe_device+0x4c/0x1f0 __driver_attach+0x1df/0x4e0 bus_for_each_dev+0x15d/0x1f0 bus_add_driver+0x486/0x5e0 driver_register+0x23a/0x3d0 do_one_initcall+0xfd/0x4d0 do_init_module+0x18b/0x5a0 load_module+0x5663/0x7950 __do_sys_finit_module+0x101/0x180 do_syscall_64+0x35/0x80 entry_SYSCALL_64_after_hwframe+0x6e/0xd8 Freed by task 597: kasan_save_stack+0x2c/0x50 kasan_set_track+0x21/0x30 kasan_save_free_info+0x2a/0x50 __kasan_slab_free+0x106/0x180 __kmem_cache_free+0xbc/0x320 clk_unregister+0x6de/0x8d0 macb_remove+0x73/0xc0 [macb_pci] pci_device_remove+0xae/0x210 device_remove+0xcb/0x180 device_release_driver_internal+0x529/0x770 driver_detach+0xd4/0x1a0 bus_remove_driver+0x135/0x260 driver_unregister+0x72/0xb0 pci_unregister_driver+0x26/0x220 __do_sys_delete_module+0x32e/0x550 do_syscall_64+0x35/0x80 entry_SYSCALL_64_after_hwframe+0x6e/0xd8 Fixes: d82d5303c4c5 ("net: macb: fix use after free on rmmod") Signed-off-by: Fedor Pchelkin Link: https://patch.msgid.link/20260330184542.626619-1-pchelkin@ispras.ru Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/cadence/macb_pci.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/cadence/macb_pci.c b/drivers/net/ethernet/cadence/macb_pci.c index fc4f5aee6ab3..0ce5b736ea43 100644 --- a/drivers/net/ethernet/cadence/macb_pci.c +++ b/drivers/net/ethernet/cadence/macb_pci.c @@ -109,10 +109,12 @@ static void macb_remove(struct pci_dev *pdev) { struct platform_device *plat_dev = pci_get_drvdata(pdev); struct macb_platform_data *plat_data = dev_get_platdata(&plat_dev->dev); + struct clk *pclk = plat_data->pclk; + struct clk *hclk = plat_data->hclk; - clk_unregister(plat_data->pclk); - clk_unregister(plat_data->hclk); platform_device_unregister(plat_dev); + clk_unregister(pclk); + clk_unregister(hclk); } static const struct pci_device_id dev_id_table[] = { From f0f367a4f459cc8118aadc43c6bba53c60d93f8d Mon Sep 17 00:00:00 2001 From: Fedor Pchelkin Date: Mon, 30 Mar 2026 21:45:41 +0300 Subject: [PATCH 385/401] net: macb: properly unregister fixed rate clocks The additional resources allocated with clk_register_fixed_rate() need to be released with clk_unregister_fixed_rate(), otherwise they are lost. Fixes: 83a77e9ec415 ("net: macb: Added PCI wrapper for Platform Driver.") Signed-off-by: Fedor Pchelkin Link: https://patch.msgid.link/20260330184542.626619-2-pchelkin@ispras.ru Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/cadence/macb_pci.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/cadence/macb_pci.c b/drivers/net/ethernet/cadence/macb_pci.c index 0ce5b736ea43..b79dec17e6b0 100644 --- a/drivers/net/ethernet/cadence/macb_pci.c +++ b/drivers/net/ethernet/cadence/macb_pci.c @@ -96,10 +96,10 @@ static int macb_probe(struct pci_dev *pdev, const struct pci_device_id *id) return 0; err_plat_dev_register: - clk_unregister(plat_data.hclk); + clk_unregister_fixed_rate(plat_data.hclk); err_hclk_register: - clk_unregister(plat_data.pclk); + clk_unregister_fixed_rate(plat_data.pclk); err_pclk_register: return err; @@ -113,8 +113,8 @@ static void macb_remove(struct pci_dev *pdev) struct clk *hclk = plat_data->hclk; platform_device_unregister(plat_dev); - clk_unregister(pclk); - clk_unregister(hclk); + clk_unregister_fixed_rate(pclk); + clk_unregister_fixed_rate(hclk); } static const struct pci_device_id dev_id_table[] = { From bf16bca6653679d8a514d6c1c5a2c67065033f14 Mon Sep 17 00:00:00 2001 From: Shay Drory Date: Mon, 30 Mar 2026 22:40:13 +0300 Subject: [PATCH 386/401] net/mlx5: lag: Check for LAG device before creating debugfs __mlx5_lag_dev_add_mdev() may return 0 (success) even when an error occurs that is handled gracefully. Consequently, the initialization flow proceeds to call mlx5_ldev_add_debugfs() even when there is no valid LAG context. mlx5_ldev_add_debugfs() blindly created the debugfs directory and attributes. This exposed interfaces (like the members file) that rely on a valid ldev pointer, leading to potential NULL pointer dereferences if accessed when ldev is NULL. Add a check to verify that mlx5_lag_dev(dev) returns a valid pointer before attempting to create the debugfs entries. Fixes: 7f46a0b7327a ("net/mlx5: Lag, add debugfs to query hardware lag state") Signed-off-by: Shay Drory Reviewed-by: Mark Bloch Signed-off-by: Tariq Toukan Link: https://patch.msgid.link/20260330194015.53585-2-tariqt@nvidia.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlx5/core/lag/debugfs.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/debugfs.c b/drivers/net/ethernet/mellanox/mlx5/core/lag/debugfs.c index 62b6faa4276a..b8d5f6a44d26 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lag/debugfs.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/debugfs.c @@ -160,8 +160,11 @@ DEFINE_SHOW_ATTRIBUTE(members); void mlx5_ldev_add_debugfs(struct mlx5_core_dev *dev) { + struct mlx5_lag *ldev = mlx5_lag_dev(dev); struct dentry *dbg; + if (!ldev) + return; dbg = debugfs_create_dir("lag", mlx5_debugfs_get_dev_root(dev)); dev->priv.dbg.lag_debugfs = dbg; From 10dc35f6a443d488f219d1a1e3fb8f8dac422070 Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Mon, 30 Mar 2026 22:40:14 +0300 Subject: [PATCH 387/401] net/mlx5: Avoid "No data available" when FW version queries fail Avoid printing the misleading "kernel answers: No data available" devlink output when querying firmware or pending firmware version fails (e.g. MLX5 fw state errors / flash failures). FW can fail on loading the pending flash image and get its version due to various reasons, examples: mlxfw: Firmware flash failed: key not applicable, err (7) mlx5_fw_image_pending: can't read pending fw version while fw state is 1 and the resulting: $ devlink dev info kernel answers: No data available Instead, just report 0 or 0xfff.. versions in case of failure to indicate a problem, and let other information be shown. after the fix: $ devlink dev info pci/0000:00:06.0: driver mlx5_core serial_number xxx... board.serial_number MT2225300179 versions: fixed: fw.psid MT_0000000436 running: fw.version 22.41.0188 fw 22.41.0188 stored: fw.version 255.255.65535 fw 255.255.65535 Fixes: 9c86b07e3069 ("net/mlx5: Added fw version query command") Signed-off-by: Saeed Mahameed Reviewed-by: Moshe Shemesh Signed-off-by: Tariq Toukan Link: https://patch.msgid.link/20260330194015.53585-3-tariqt@nvidia.com Signed-off-by: Jakub Kicinski --- .../net/ethernet/mellanox/mlx5/core/devlink.c | 4 +- drivers/net/ethernet/mellanox/mlx5/core/fw.c | 49 ++++++++++++------- .../ethernet/mellanox/mlx5/core/mlx5_core.h | 4 +- 3 files changed, 35 insertions(+), 22 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c index 6698ac55a4bf..73cf0321bb86 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c @@ -107,9 +107,7 @@ mlx5_devlink_info_get(struct devlink *devlink, struct devlink_info_req *req, if (err) return err; - err = mlx5_fw_version_query(dev, &running_fw, &stored_fw); - if (err) - return err; + mlx5_fw_version_query(dev, &running_fw, &stored_fw); snprintf(version_str, sizeof(version_str), "%d.%d.%04d", mlx5_fw_ver_major(running_fw), mlx5_fw_ver_minor(running_fw), diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fw.c b/drivers/net/ethernet/mellanox/mlx5/core/fw.c index eeb4437975f2..c1f220e5fe18 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fw.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fw.c @@ -822,48 +822,63 @@ mlx5_fw_image_pending(struct mlx5_core_dev *dev, return 0; } -int mlx5_fw_version_query(struct mlx5_core_dev *dev, - u32 *running_ver, u32 *pending_ver) +void mlx5_fw_version_query(struct mlx5_core_dev *dev, + u32 *running_ver, u32 *pending_ver) { u32 reg_mcqi_version[MLX5_ST_SZ_DW(mcqi_version)] = {}; bool pending_version_exists; int component_index; int err; + *running_ver = 0; + *pending_ver = 0; + if (!MLX5_CAP_GEN(dev, mcam_reg) || !MLX5_CAP_MCAM_REG(dev, mcqi) || !MLX5_CAP_MCAM_REG(dev, mcqs)) { mlx5_core_warn(dev, "fw query isn't supported by the FW\n"); - return -EOPNOTSUPP; + return; } component_index = mlx5_get_boot_img_component_index(dev); - if (component_index < 0) - return component_index; + if (component_index < 0) { + mlx5_core_warn(dev, "fw query failed to find boot img component index, err %d\n", + component_index); + return; + } + *running_ver = U32_MAX; /* indicate failure */ err = mlx5_reg_mcqi_version_query(dev, component_index, MCQI_FW_RUNNING_VERSION, reg_mcqi_version); - if (err) - return err; - - *running_ver = MLX5_GET(mcqi_version, reg_mcqi_version, version); + if (!err) + *running_ver = MLX5_GET(mcqi_version, reg_mcqi_version, + version); + else + mlx5_core_warn(dev, "failed to query running version, err %d\n", + err); + *pending_ver = U32_MAX; /* indicate failure */ err = mlx5_fw_image_pending(dev, component_index, &pending_version_exists); - if (err) - return err; + if (err) { + mlx5_core_warn(dev, "failed to query pending image, err %d\n", + err); + return; + } if (!pending_version_exists) { *pending_ver = 0; - return 0; + return; } err = mlx5_reg_mcqi_version_query(dev, component_index, MCQI_FW_STORED_VERSION, reg_mcqi_version); - if (err) - return err; + if (!err) + *pending_ver = MLX5_GET(mcqi_version, reg_mcqi_version, + version); + else + mlx5_core_warn(dev, "failed to query pending version, err %d\n", + err); - *pending_ver = MLX5_GET(mcqi_version, reg_mcqi_version, version); - - return 0; + return; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h index b635b423d972..1507e881d962 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h @@ -393,8 +393,8 @@ int mlx5_port_max_linkspeed(struct mlx5_core_dev *mdev, u32 *speed); int mlx5_firmware_flash(struct mlx5_core_dev *dev, const struct firmware *fw, struct netlink_ext_ack *extack); -int mlx5_fw_version_query(struct mlx5_core_dev *dev, - u32 *running_ver, u32 *stored_ver); +void mlx5_fw_version_query(struct mlx5_core_dev *dev, u32 *running_ver, + u32 *stored_ver); #ifdef CONFIG_MLX5_CORE_EN int mlx5e_init(void); From 403186400a1a6166efe7031edc549c15fee4723f Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Mon, 30 Mar 2026 22:40:15 +0300 Subject: [PATCH 388/401] net/mlx5: Fix switchdev mode rollback in case of failure If for some internal reason switchdev mode fails, we rollback to legacy mode, before this patch, rollback will unregister the uplink netdev and leave it unregistered causing the below kernel bug. To fix this, we need to avoid netdev unregister by setting the proper rollback flag 'MLX5_PRIV_FLAGS_SWITCH_LEGACY' to indicate legacy mode. devlink (431) used greatest stack depth: 11048 bytes left mlx5_core 0000:00:03.0: E-Switch: Disable: mode(LEGACY), nvfs(0), \ necvfs(0), active vports(0) mlx5_core 0000:00:03.0: E-Switch: Supported tc chains and prios offload mlx5_core 0000:00:03.0: Loading uplink representor for vport 65535 mlx5_core 0000:00:03.0: mlx5_cmd_out_err:816:(pid 456): \ QUERY_HCA_CAP(0x100) op_mod(0x0) failed, \ status bad parameter(0x3), syndrome (0x3a3846), err(-22) mlx5_core 0000:00:03.0 enp0s3np0 (unregistered): Unloading uplink \ representor for vport 65535 ------------[ cut here ]------------ kernel BUG at net/core/dev.c:12070! Oops: invalid opcode: 0000 [#1] SMP NOPTI CPU: 2 UID: 0 PID: 456 Comm: devlink Not tainted 6.16.0-rc3+ \ #9 PREEMPT(voluntary) RIP: 0010:unregister_netdevice_many_notify+0x123/0xae0 ... Call Trace: [ 90.923094] unregister_netdevice_queue+0xad/0xf0 [ 90.923323] unregister_netdev+0x1c/0x40 [ 90.923522] mlx5e_vport_rep_unload+0x61/0xc6 [ 90.923736] esw_offloads_enable+0x8e6/0x920 [ 90.923947] mlx5_eswitch_enable_locked+0x349/0x430 [ 90.924182] ? is_mp_supported+0x57/0xb0 [ 90.924376] mlx5_devlink_eswitch_mode_set+0x167/0x350 [ 90.924628] devlink_nl_eswitch_set_doit+0x6f/0xf0 [ 90.924862] genl_family_rcv_msg_doit+0xe8/0x140 [ 90.925088] genl_rcv_msg+0x18b/0x290 [ 90.925269] ? __pfx_devlink_nl_pre_doit+0x10/0x10 [ 90.925506] ? __pfx_devlink_nl_eswitch_set_doit+0x10/0x10 [ 90.925766] ? __pfx_devlink_nl_post_doit+0x10/0x10 [ 90.926001] ? __pfx_genl_rcv_msg+0x10/0x10 [ 90.926206] netlink_rcv_skb+0x52/0x100 [ 90.926393] genl_rcv+0x28/0x40 [ 90.926557] netlink_unicast+0x27d/0x3d0 [ 90.926749] netlink_sendmsg+0x1f7/0x430 [ 90.926942] __sys_sendto+0x213/0x220 [ 90.927127] ? __sys_recvmsg+0x6a/0xd0 [ 90.927312] __x64_sys_sendto+0x24/0x30 [ 90.927504] do_syscall_64+0x50/0x1c0 [ 90.927687] entry_SYSCALL_64_after_hwframe+0x76/0x7e [ 90.927929] RIP: 0033:0x7f7d0363e047 Fixes: 2a4f56fbcc47 ("net/mlx5e: Keep netdev when leave switchdev for devlink set legacy only") Signed-off-by: Saeed Mahameed Reviewed-by: Jianbo Liu Signed-off-by: Tariq Toukan Link: https://patch.msgid.link/20260330194015.53585-4-tariqt@nvidia.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index 7a9ee36b8dca..01f6aecc4fcc 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -3761,6 +3761,8 @@ int esw_offloads_enable(struct mlx5_eswitch *esw) return 0; err_vports: + /* rollback to legacy, indicates don't unregister the uplink netdev */ + esw->dev->priv.flags |= MLX5_PRIV_FLAGS_SWITCH_LEGACY; mlx5_esw_offloads_rep_unload(esw, MLX5_VPORT_UPLINK); err_uplink: esw_offloads_steering_cleanup(esw); From ceee35e5674aa84cf9e504c2a9dae4587511556c Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Mon, 30 Mar 2026 23:51:36 -0700 Subject: [PATCH 389/401] bnxt_en: Refactor some basic ring setup and adjustment logic Refactor out the basic code that trims the default rings, sets up and adjusts XDP TX rings and CP rings. There is no change in behavior. This is to prepare for the next bug fix patch. Reviewed-by: Kalesh AP Reviewed-by: Pavan Chebbi Reviewed-by: Andy Gospodarek Signed-off-by: Michael Chan Link: https://patch.msgid.link/20260331065138.948205-2-michael.chan@broadcom.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 53 +++++++++++++------ drivers/net/ethernet/broadcom/bnxt/bnxt.h | 1 + .../net/ethernet/broadcom/bnxt/bnxt_ethtool.c | 5 +- drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c | 5 +- 4 files changed, 41 insertions(+), 23 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 7ed805713fbb..173f962fc2ab 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -12992,6 +12992,21 @@ static int bnxt_tx_nr_rings_per_tc(struct bnxt *bp) return bp->num_tc ? bp->tx_nr_rings / bp->num_tc : bp->tx_nr_rings; } +static void bnxt_set_xdp_tx_rings(struct bnxt *bp) +{ + bp->tx_nr_rings_xdp = bp->tx_nr_rings_per_tc; + bp->tx_nr_rings += bp->tx_nr_rings_xdp; +} + +static void bnxt_adj_tx_rings(struct bnxt *bp) +{ + /* Make adjustments if reserved TX rings are less than requested */ + bp->tx_nr_rings -= bp->tx_nr_rings_xdp; + bp->tx_nr_rings_per_tc = bnxt_tx_nr_rings_per_tc(bp); + if (bp->tx_nr_rings_xdp) + bnxt_set_xdp_tx_rings(bp); +} + static int __bnxt_open_nic(struct bnxt *bp, bool irq_re_init, bool link_re_init) { int rc = 0; @@ -13009,13 +13024,7 @@ static int __bnxt_open_nic(struct bnxt *bp, bool irq_re_init, bool link_re_init) if (rc) return rc; - /* Make adjustments if reserved TX rings are less than requested */ - bp->tx_nr_rings -= bp->tx_nr_rings_xdp; - bp->tx_nr_rings_per_tc = bnxt_tx_nr_rings_per_tc(bp); - if (bp->tx_nr_rings_xdp) { - bp->tx_nr_rings_xdp = bp->tx_nr_rings_per_tc; - bp->tx_nr_rings += bp->tx_nr_rings_xdp; - } + bnxt_adj_tx_rings(bp); rc = bnxt_alloc_mem(bp, irq_re_init); if (rc) { netdev_err(bp->dev, "bnxt_alloc_mem err: %x\n", rc); @@ -15436,11 +15445,19 @@ static int bnxt_change_mtu(struct net_device *dev, int new_mtu) return 0; } +void bnxt_set_cp_rings(struct bnxt *bp, bool sh) +{ + int tx_cp = bnxt_num_tx_to_cp(bp, bp->tx_nr_rings); + + bp->cp_nr_rings = sh ? max_t(int, tx_cp, bp->rx_nr_rings) : + tx_cp + bp->rx_nr_rings; +} + int bnxt_setup_mq_tc(struct net_device *dev, u8 tc) { struct bnxt *bp = netdev_priv(dev); bool sh = false; - int rc, tx_cp; + int rc; if (tc > bp->max_tc) { netdev_err(dev, "Too many traffic classes requested: %d. Max supported is %d.\n", @@ -15473,9 +15490,7 @@ int bnxt_setup_mq_tc(struct net_device *dev, u8 tc) bp->num_tc = 0; } bp->tx_nr_rings += bp->tx_nr_rings_xdp; - tx_cp = bnxt_num_tx_to_cp(bp, bp->tx_nr_rings); - bp->cp_nr_rings = sh ? max_t(int, tx_cp, bp->rx_nr_rings) : - tx_cp + bp->rx_nr_rings; + bnxt_set_cp_rings(bp, sh); if (netif_running(bp->dev)) return bnxt_open_nic(bp, true, false); @@ -16525,6 +16540,15 @@ static void bnxt_trim_dflt_sh_rings(struct bnxt *bp) bp->tx_nr_rings = bnxt_tx_nr_rings(bp); } +static void bnxt_adj_dflt_rings(struct bnxt *bp, bool sh) +{ + if (sh) + bnxt_trim_dflt_sh_rings(bp); + else + bp->cp_nr_rings = bp->tx_nr_rings_per_tc + bp->rx_nr_rings; + bp->tx_nr_rings = bnxt_tx_nr_rings(bp); +} + static int bnxt_set_dflt_rings(struct bnxt *bp, bool sh) { int dflt_rings, max_rx_rings, max_tx_rings, rc; @@ -16550,11 +16574,8 @@ static int bnxt_set_dflt_rings(struct bnxt *bp, bool sh) return rc; bp->rx_nr_rings = min_t(int, dflt_rings, max_rx_rings); bp->tx_nr_rings_per_tc = min_t(int, dflt_rings, max_tx_rings); - if (sh) - bnxt_trim_dflt_sh_rings(bp); - else - bp->cp_nr_rings = bp->tx_nr_rings_per_tc + bp->rx_nr_rings; - bp->tx_nr_rings = bnxt_tx_nr_rings(bp); + + bnxt_adj_dflt_rings(bp, sh); avail_msix = bnxt_get_max_func_irqs(bp) - bp->cp_nr_rings; if (avail_msix >= BNXT_MIN_ROCE_CP_RINGS) { diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h index a97d651130df..4bc7f7aeaab3 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h @@ -2985,6 +2985,7 @@ int bnxt_check_rings(struct bnxt *bp, int tx, int rx, bool sh, int tcs, int tx_xdp); int bnxt_fw_init_one(struct bnxt *bp); bool bnxt_hwrm_reset_permitted(struct bnxt *bp); +void bnxt_set_cp_rings(struct bnxt *bp, bool sh); int bnxt_setup_mq_tc(struct net_device *dev, u8 tc); struct bnxt_ntuple_filter *bnxt_lookup_ntp_filter_from_idx(struct bnxt *bp, struct bnxt_ntuple_filter *fltr, u32 idx); diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c index 28d0ece2e7b1..0407aa1b3190 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c @@ -945,7 +945,6 @@ static int bnxt_set_channels(struct net_device *dev, bool sh = false; int tx_xdp = 0; int rc = 0; - int tx_cp; if (channel->other_count) return -EINVAL; @@ -1013,9 +1012,7 @@ static int bnxt_set_channels(struct net_device *dev, if (tcs > 1) bp->tx_nr_rings = bp->tx_nr_rings_per_tc * tcs + tx_xdp; - tx_cp = bnxt_num_tx_to_cp(bp, bp->tx_nr_rings); - bp->cp_nr_rings = sh ? max_t(int, tx_cp, bp->rx_nr_rings) : - tx_cp + bp->rx_nr_rings; + bnxt_set_cp_rings(bp, sh); /* After changing number of rx channels, update NTUPLE feature. */ netdev_update_features(dev); diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c index 85cbeb35681c..bebe37e139c9 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c @@ -384,7 +384,7 @@ int bnxt_xdp_xmit(struct net_device *dev, int num_frames, static int bnxt_xdp_set(struct bnxt *bp, struct bpf_prog *prog) { struct net_device *dev = bp->dev; - int tx_xdp = 0, tx_cp, rc, tc; + int tx_xdp = 0, rc, tc; struct bpf_prog *old; netdev_assert_locked(dev); @@ -431,8 +431,7 @@ static int bnxt_xdp_set(struct bnxt *bp, struct bpf_prog *prog) } bp->tx_nr_rings_xdp = tx_xdp; bp->tx_nr_rings = bp->tx_nr_rings_per_tc * tc + tx_xdp; - tx_cp = bnxt_num_tx_to_cp(bp, bp->tx_nr_rings); - bp->cp_nr_rings = max_t(int, tx_cp, bp->rx_nr_rings); + bnxt_set_cp_rings(bp, true); bnxt_set_tpa_flags(bp); bnxt_set_ring_params(bp); From e4bf81dcad0a6fff2bbe5331d2c7fb30d45a788c Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Mon, 30 Mar 2026 23:51:37 -0700 Subject: [PATCH 390/401] bnxt_en: Don't assume XDP is never enabled in bnxt_init_dflt_ring_mode() The original code made the assumption that when we set up the initial default ring mode, we must be just loading the driver and XDP cannot be enabled yet. This is not true when the FW goes through a resource or capability change. Resource reservations will be cancelled and reinitialized with XDP already enabled. devlink reload with XDP enabled will also have the same issue. This scenario will cause the ring arithmetic to be all wrong in the bnxt_init_dflt_ring_mode() path causing failure: bnxt_en 0000:a1:00.0 ens2f0np0: bnxt_setup_int_mode err: ffffffea bnxt_en 0000:a1:00.0 ens2f0np0: bnxt_request_irq err: ffffffea bnxt_en 0000:a1:00.0 ens2f0np0: nic open fail (rc: ffffffea) Fix it by properly accounting for XDP in the bnxt_init_dflt_ring_mode() path by using the refactored helper functions in the previous patch. Reviewed-by: Andy Gospodarek Reviewed-by: Pavan Chebbi Reviewed-by: Kalesh AP Fixes: ec5d31e3c15d ("bnxt_en: Handle firmware reset status during IF_UP.") Fixes: 228ea8c187d8 ("bnxt_en: implement devlink dev reload driver_reinit") Signed-off-by: Michael Chan Link: https://patch.msgid.link/20260331065138.948205-3-michael.chan@broadcom.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 173f962fc2ab..f11f3a704da5 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -16547,6 +16547,10 @@ static void bnxt_adj_dflt_rings(struct bnxt *bp, bool sh) else bp->cp_nr_rings = bp->tx_nr_rings_per_tc + bp->rx_nr_rings; bp->tx_nr_rings = bnxt_tx_nr_rings(bp); + if (sh && READ_ONCE(bp->xdp_prog)) { + bnxt_set_xdp_tx_rings(bp); + bnxt_set_cp_rings(bp, true); + } } static int bnxt_set_dflt_rings(struct bnxt *bp, bool sh) @@ -16588,16 +16592,17 @@ static int bnxt_set_dflt_rings(struct bnxt *bp, bool sh) rc = __bnxt_reserve_rings(bp); if (rc && rc != -ENODEV) netdev_warn(bp->dev, "Unable to reserve tx rings\n"); - bp->tx_nr_rings_per_tc = bnxt_tx_nr_rings_per_tc(bp); + + bnxt_adj_tx_rings(bp); if (sh) - bnxt_trim_dflt_sh_rings(bp); + bnxt_adj_dflt_rings(bp, true); /* Rings may have been trimmed, re-reserve the trimmed rings. */ if (bnxt_need_reserve_rings(bp)) { rc = __bnxt_reserve_rings(bp); if (rc && rc != -ENODEV) netdev_warn(bp->dev, "2nd rings reservation failed.\n"); - bp->tx_nr_rings_per_tc = bnxt_tx_nr_rings_per_tc(bp); + bnxt_adj_tx_rings(bp); } if (BNXT_CHIP_TYPE_NITRO_A0(bp)) { bp->rx_nr_rings++; @@ -16631,7 +16636,7 @@ static int bnxt_init_dflt_ring_mode(struct bnxt *bp) if (rc) goto init_dflt_ring_err; - bp->tx_nr_rings_per_tc = bnxt_tx_nr_rings_per_tc(bp); + bnxt_adj_tx_rings(bp); bnxt_set_dflt_rfs(bp); From 071dbfa304e85a6b04a593e950d18fa170997288 Mon Sep 17 00:00:00 2001 From: Pavan Chebbi Date: Mon, 30 Mar 2026 23:51:38 -0700 Subject: [PATCH 391/401] bnxt_en: Restore default stat ctxs for ULP when resource is available During resource reservation, if the L2 driver does not have enough MSIX vectors to provide to the RoCE driver, it sets the stat ctxs for ULP also to 0 so that we don't have to reserve it unnecessarily. However, subsequently the user may reduce L2 rings thereby freeing up some resources that the L2 driver can now earmark for RoCE. In this case, the driver should restore the default ULP stat ctxs to make sure that all RoCE resources are ready for use. The RoCE driver may fail to initialize in this scenario without this fix. Fixes: d630624ebd70 ("bnxt_en: Utilize ulp client resources if RoCE is not registered") Reviewed-by: Kalesh AP Signed-off-by: Pavan Chebbi Signed-off-by: Michael Chan Link: https://patch.msgid.link/20260331065138.948205-4-michael.chan@broadcom.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index f11f3a704da5..3f775196ef81 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -8045,6 +8045,8 @@ static int __bnxt_reserve_rings(struct bnxt *bp) ulp_msix = bnxt_get_avail_msix(bp, bp->ulp_num_msix_want); if (!ulp_msix) bnxt_set_ulp_stat_ctxs(bp, 0); + else + bnxt_set_dflt_ulp_stat_ctxs(bp); if (ulp_msix > bp->ulp_num_msix_want) ulp_msix = bp->ulp_num_msix_want; From d10a26aa4d072320530e6968ef945c8c575edf61 Mon Sep 17 00:00:00 2001 From: Martin Schiller Date: Tue, 31 Mar 2026 09:43:17 +0200 Subject: [PATCH 392/401] net/x25: Fix potential double free of skb When alloc_skb fails in x25_queue_rx_frame it calls kfree_skb(skb) at line 48 and returns 1 (error). This error propagates back through the call chain: x25_queue_rx_frame returns 1 | v x25_state3_machine receives the return value 1 and takes the else branch at line 278, setting queued=0 and returning 0 | v x25_process_rx_frame returns queued=0 | v x25_backlog_rcv at line 452 sees queued=0 and calls kfree_skb(skb) again This would free the same skb twice. Looking at x25_backlog_rcv: net/x25/x25_in.c:x25_backlog_rcv() { ... queued = x25_process_rx_frame(sk, skb); ... if (!queued) kfree_skb(skb); } Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Martin Schiller Link: https://patch.msgid.link/20260331-x25_fraglen-v4-1-3e69f18464b4@dev.tdt.de Signed-off-by: Paolo Abeni --- net/x25/x25_in.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/net/x25/x25_in.c b/net/x25/x25_in.c index b981a4828d08..0dbc73efab1c 100644 --- a/net/x25/x25_in.c +++ b/net/x25/x25_in.c @@ -44,10 +44,9 @@ static int x25_queue_rx_frame(struct sock *sk, struct sk_buff *skb, int more) if (x25->fraglen > 0) { /* End of fragment */ int len = x25->fraglen + skb->len; - if ((skbn = alloc_skb(len, GFP_ATOMIC)) == NULL){ - kfree_skb(skb); + skbn = alloc_skb(len, GFP_ATOMIC); + if (!skbn) return 1; - } skb_queue_tail(&x25->fragment_queue, skb); From a1822cb524e89b4cd2cf0b82e484a2335496a6d9 Mon Sep 17 00:00:00 2001 From: Martin Schiller Date: Tue, 31 Mar 2026 09:43:18 +0200 Subject: [PATCH 393/401] net/x25: Fix overflow when accumulating packets Add a check to ensure that `x25_sock.fraglen` does not overflow. The `fraglen` also needs to be resetted when purging `fragment_queue` in `x25_clear_queues()`. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Suggested-by: Yiming Qian Signed-off-by: Martin Schiller Link: https://patch.msgid.link/20260331-x25_fraglen-v4-2-3e69f18464b4@dev.tdt.de Signed-off-by: Paolo Abeni --- net/x25/x25_in.c | 4 ++++ net/x25/x25_subr.c | 1 + 2 files changed, 5 insertions(+) diff --git a/net/x25/x25_in.c b/net/x25/x25_in.c index 0dbc73efab1c..e47ebd8acd21 100644 --- a/net/x25/x25_in.c +++ b/net/x25/x25_in.c @@ -34,6 +34,10 @@ static int x25_queue_rx_frame(struct sock *sk, struct sk_buff *skb, int more) struct sk_buff *skbo, *skbn = skb; struct x25_sock *x25 = x25_sk(sk); + /* make sure we don't overflow */ + if (x25->fraglen + skb->len > USHRT_MAX) + return 1; + if (more) { x25->fraglen += skb->len; skb_queue_tail(&x25->fragment_queue, skb); diff --git a/net/x25/x25_subr.c b/net/x25/x25_subr.c index 0285aaa1e93c..159708d9ad20 100644 --- a/net/x25/x25_subr.c +++ b/net/x25/x25_subr.c @@ -40,6 +40,7 @@ void x25_clear_queues(struct sock *sk) skb_queue_purge(&x25->interrupt_in_queue); skb_queue_purge(&x25->interrupt_out_queue); skb_queue_purge(&x25->fragment_queue); + x25->fraglen = 0; } From faeea8bbf6e958bf3c00cb08263109661975987c Mon Sep 17 00:00:00 2001 From: Xiang Mei Date: Mon, 30 Mar 2026 22:02:15 -0700 Subject: [PATCH 394/401] net/sched: cls_fw: fix NULL pointer dereference on shared blocks The old-method path in fw_classify() calls tcf_block_q() and dereferences q->handle. Shared blocks leave block->q NULL, causing a NULL deref when an empty cls_fw filter is attached to a shared block and a packet with a nonzero major skb mark is classified. Reject the configuration in fw_change() when the old method (no TCA_OPTIONS) is used on a shared block, since fw_classify()'s old-method path needs block->q which is NULL for shared blocks. The fixed null-ptr-deref calling stack: KASAN: null-ptr-deref in range [0x0000000000000038-0x000000000000003f] RIP: 0010:fw_classify (net/sched/cls_fw.c:81) Call Trace: tcf_classify (./include/net/tc_wrapper.h:197 net/sched/cls_api.c:1764 net/sched/cls_api.c:1860) tc_run (net/core/dev.c:4401) __dev_queue_xmit (net/core/dev.c:4535 net/core/dev.c:4790) Fixes: 1abf272022cf ("net: sched: tcindex, fw, flow: use tcf_block_q helper to get struct Qdisc") Reported-by: Weiming Shi Signed-off-by: Xiang Mei Acked-by: Jamal Hadi Salim Link: https://patch.msgid.link/20260331050217.504278-1-xmei5@asu.edu Signed-off-by: Paolo Abeni --- net/sched/cls_fw.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/net/sched/cls_fw.c b/net/sched/cls_fw.c index be81c108179d..23884ef8b80c 100644 --- a/net/sched/cls_fw.c +++ b/net/sched/cls_fw.c @@ -247,8 +247,18 @@ static int fw_change(struct net *net, struct sk_buff *in_skb, struct nlattr *tb[TCA_FW_MAX + 1]; int err; - if (!opt) - return handle ? -EINVAL : 0; /* Succeed if it is old method. */ + if (!opt) { + if (handle) + return -EINVAL; + + if (tcf_block_shared(tp->chain->block)) { + NL_SET_ERR_MSG(extack, + "Must specify mark when attaching fw filter to block"); + return -EINVAL; + } + + return 0; /* Succeed if it is old method. */ + } err = nla_parse_nested_deprecated(tb, TCA_FW_MAX, opt, fw_policy, NULL); From 1a280dd4bd1d616a01d6ffe0de284c907b555504 Mon Sep 17 00:00:00 2001 From: Xiang Mei Date: Mon, 30 Mar 2026 22:02:16 -0700 Subject: [PATCH 395/401] net/sched: cls_flow: fix NULL pointer dereference on shared blocks flow_change() calls tcf_block_q() and dereferences q->handle to derive a default baseclass. Shared blocks leave block->q NULL, causing a NULL deref when a flow filter without a fully qualified baseclass is created on a shared block. Check tcf_block_shared() before accessing block->q and return -EINVAL for shared blocks. This avoids the null-deref shown below: ======================================================================= KASAN: null-ptr-deref in range [0x0000000000000038-0x000000000000003f] RIP: 0010:flow_change (net/sched/cls_flow.c:508) Call Trace: tc_new_tfilter (net/sched/cls_api.c:2432) rtnetlink_rcv_msg (net/core/rtnetlink.c:6980) [...] ======================================================================= Fixes: 1abf272022cf ("net: sched: tcindex, fw, flow: use tcf_block_q helper to get struct Qdisc") Reported-by: Weiming Shi Signed-off-by: Xiang Mei Acked-by: Jamal Hadi Salim Link: https://patch.msgid.link/20260331050217.504278-2-xmei5@asu.edu Signed-off-by: Paolo Abeni --- net/sched/cls_flow.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/net/sched/cls_flow.c b/net/sched/cls_flow.c index 339c664beff6..ab364e4e4686 100644 --- a/net/sched/cls_flow.c +++ b/net/sched/cls_flow.c @@ -503,8 +503,16 @@ static int flow_change(struct net *net, struct sk_buff *in_skb, } if (TC_H_MAJ(baseclass) == 0) { - struct Qdisc *q = tcf_block_q(tp->chain->block); + struct tcf_block *block = tp->chain->block; + struct Qdisc *q; + if (tcf_block_shared(block)) { + NL_SET_ERR_MSG(extack, + "Must specify baseclass when attaching flow filter to block"); + goto err2; + } + + q = tcf_block_q(block); baseclass = TC_H_MAKE(q->handle, baseclass); } if (TC_H_MIN(baseclass) == 0) From 70f73562d278d9f88e7095e327f2a50082a82c65 Mon Sep 17 00:00:00 2001 From: Xiang Mei Date: Mon, 30 Mar 2026 22:02:17 -0700 Subject: [PATCH 396/401] selftests/tc-testing: add tests for cls_fw and cls_flow on shared blocks Regression tests for the shared-block NULL derefs fixed in the previous two patches: - fw: attempt to attach an empty fw filter to a shared block and verify the configuration is rejected with EINVAL. - flow: create a flow filter on a shared block without a baseclass and verify the configuration is rejected with EINVAL. Signed-off-by: Xiang Mei Acked-by: Jamal Hadi Salim Reviewed-by: Victor Nogueira Link: https://patch.msgid.link/20260331050217.504278-3-xmei5@asu.edu Signed-off-by: Paolo Abeni --- .../tc-testing/tc-tests/infra/filter.json | 44 +++++++++++++++++++ 1 file changed, 44 insertions(+) diff --git a/tools/testing/selftests/tc-testing/tc-tests/infra/filter.json b/tools/testing/selftests/tc-testing/tc-tests/infra/filter.json index 8d10042b489b..dbce6436ed26 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/infra/filter.json +++ b/tools/testing/selftests/tc-testing/tc-tests/infra/filter.json @@ -22,5 +22,49 @@ "teardown": [ "$TC qdisc del dev $DUMMY root handle 1: htb default 1" ] + }, + { + "id": "b7e3", + "name": "Empty fw filter on shared block - rejected at config time", + "category": [ + "filter", + "fw" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "$TC qdisc add dev $DEV1 egress_block 1 clsact" + ], + "cmdUnderTest": "$TC filter add block 1 protocol ip prio 1 fw", + "expExitCode": "2", + "verifyCmd": "$TC filter show block 1", + "matchPattern": "fw", + "matchCount": "0", + "teardown": [ + "$TC qdisc del dev $DEV1 clsact" + ] + }, + { + "id": "c8f4", + "name": "Flow filter on shared block without baseclass - rejected at config time", + "category": [ + "filter", + "flow" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "$TC qdisc add dev $DEV1 ingress_block 1 clsact" + ], + "cmdUnderTest": "$TC filter add block 1 protocol ip prio 1 handle 1 flow map key dst", + "expExitCode": "2", + "verifyCmd": "$TC filter show block 1", + "matchPattern": "flow", + "matchCount": "0", + "teardown": [ + "$TC qdisc del dev $DEV1 clsact" + ] } ] From b18c833888742ca9de80c250f9d40d0e97caa9f6 Mon Sep 17 00:00:00 2001 From: Stefano Garzarella Date: Wed, 1 Apr 2026 11:21:53 +0200 Subject: [PATCH 397/401] vsock: initialize child_ns_mode_locked in vsock_net_init() The `child_ns_mode_locked` field lives in `struct net`, which persists across vsock module reloads. When the module is unloaded and reloaded, `vsock_net_init()` resets `mode` and `child_ns_mode` back to their default values, but does not reset `child_ns_mode_locked`. The stale lock from the previous module load causes subsequent writes to `child_ns_mode` to silently fail: `vsock_net_set_child_mode()` sees the old lock, skips updating the actual value, and returns success when the requested mode matches the stale lock. The sysctl handler reports no error, but `child_ns_mode` remains unchanged. Steps to reproduce: $ modprobe vsock $ echo local > /proc/sys/net/vsock/child_ns_mode $ cat /proc/sys/net/vsock/child_ns_mode local $ modprobe -r vsock $ modprobe vsock $ echo local > /proc/sys/net/vsock/child_ns_mode $ cat /proc/sys/net/vsock/child_ns_mode global <--- expected "local" Fix this by initializing `child_ns_mode_locked` to 0 (unlocked) in `vsock_net_init()`, so the write-once mechanism works correctly after module reload. Fixes: 102eab95f025 ("vsock: lock down child_ns_mode as write-once") Reported-by: Jin Liu Signed-off-by: Stefano Garzarella Reviewed-by: Bobby Eshleman Link: https://patch.msgid.link/20260401092153.28462-1-sgarzare@redhat.com Signed-off-by: Jakub Kicinski --- net/vmw_vsock/af_vsock.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c index 2f7d94d682cb..d912ed2f012a 100644 --- a/net/vmw_vsock/af_vsock.c +++ b/net/vmw_vsock/af_vsock.c @@ -2928,6 +2928,7 @@ static void vsock_net_init(struct net *net) net->vsock.mode = vsock_net_child_mode(current->nsproxy->net_ns); net->vsock.child_ns_mode = net->vsock.mode; + net->vsock.child_ns_mode_locked = 0; } static __net_init int vsock_sysctl_init_net(struct net *net) From f5df2990c364d1ac596d24b3118dbc56503f7cd4 Mon Sep 17 00:00:00 2001 From: Luka Gejak Date: Wed, 1 Apr 2026 11:22:42 +0200 Subject: [PATCH 398/401] net: hsr: serialize seq_blocks merge across nodes During node merging, hsr_handle_sup_frame() walks node_curr->seq_blocks to update node_real without holding node_curr->seq_out_lock. This allows concurrent mutations from duplicate registration paths, risking inconsistent state or XArray/bitmap corruption. Fix this by locking both nodes' seq_out_lock during the merge. To prevent ABBA deadlocks, locks are acquired in order of memory address. Reviewed-by: Felix Maurer Fixes: 415e6367512b ("hsr: Implement more robust duplicate discard for PRP") Signed-off-by: Luka Gejak Link: https://patch.msgid.link/20260401092243.52121-2-luka.gejak@linux.dev Signed-off-by: Jakub Kicinski --- net/hsr/hsr_framereg.c | 38 ++++++++++++++++++++++++++++++++++++-- 1 file changed, 36 insertions(+), 2 deletions(-) diff --git a/net/hsr/hsr_framereg.c b/net/hsr/hsr_framereg.c index 50996f4de7f9..d41863593674 100644 --- a/net/hsr/hsr_framereg.c +++ b/net/hsr/hsr_framereg.c @@ -123,6 +123,40 @@ static void hsr_free_node_rcu(struct rcu_head *rn) hsr_free_node(node); } +static void hsr_lock_seq_out_pair(struct hsr_node *node_a, + struct hsr_node *node_b) +{ + if (node_a == node_b) { + spin_lock_bh(&node_a->seq_out_lock); + return; + } + + if (node_a < node_b) { + spin_lock_bh(&node_a->seq_out_lock); + spin_lock_nested(&node_b->seq_out_lock, SINGLE_DEPTH_NESTING); + } else { + spin_lock_bh(&node_b->seq_out_lock); + spin_lock_nested(&node_a->seq_out_lock, SINGLE_DEPTH_NESTING); + } +} + +static void hsr_unlock_seq_out_pair(struct hsr_node *node_a, + struct hsr_node *node_b) +{ + if (node_a == node_b) { + spin_unlock_bh(&node_a->seq_out_lock); + return; + } + + if (node_a < node_b) { + spin_unlock(&node_b->seq_out_lock); + spin_unlock_bh(&node_a->seq_out_lock); + } else { + spin_unlock(&node_a->seq_out_lock); + spin_unlock_bh(&node_b->seq_out_lock); + } +} + void hsr_del_nodes(struct list_head *node_db) { struct hsr_node *node; @@ -432,7 +466,7 @@ void hsr_handle_sup_frame(struct hsr_frame_info *frame) } ether_addr_copy(node_real->macaddress_B, ethhdr->h_source); - spin_lock_bh(&node_real->seq_out_lock); + hsr_lock_seq_out_pair(node_real, node_curr); for (i = 0; i < HSR_PT_PORTS; i++) { if (!node_curr->time_in_stale[i] && time_after(node_curr->time_in[i], node_real->time_in[i])) { @@ -455,7 +489,7 @@ void hsr_handle_sup_frame(struct hsr_frame_info *frame) src_blk->seq_nrs[i], HSR_SEQ_BLOCK_SIZE); } } - spin_unlock_bh(&node_real->seq_out_lock); + hsr_unlock_seq_out_pair(node_real, node_curr); node_real->addr_B_port = port_rcv->type; spin_lock_bh(&hsr->list_lock); From 2e3514e63bfb0e972b1f19668547a455d0129e88 Mon Sep 17 00:00:00 2001 From: Luka Gejak Date: Wed, 1 Apr 2026 11:22:43 +0200 Subject: [PATCH 399/401] net: hsr: fix VLAN add unwind on slave errors When vlan_vid_add() fails for a secondary slave, the error path calls vlan_vid_del() on the failing port instead of the peer slave that had already succeeded. This results in asymmetric VLAN state across the HSR pair. Fix this by switching to a centralized unwind path that removes the VID from any slave device that was already programmed. Fixes: 1a8a63a5305e ("net: hsr: Add VLAN CTAG filter support") Signed-off-by: Luka Gejak Link: https://patch.msgid.link/20260401092243.52121-3-luka.gejak@linux.dev Signed-off-by: Jakub Kicinski --- net/hsr/hsr_device.c | 32 +++++++++++++++++--------------- 1 file changed, 17 insertions(+), 15 deletions(-) diff --git a/net/hsr/hsr_device.c b/net/hsr/hsr_device.c index d1bfc49b5f01..fd2fea25eff0 100644 --- a/net/hsr/hsr_device.c +++ b/net/hsr/hsr_device.c @@ -532,8 +532,8 @@ static void hsr_change_rx_flags(struct net_device *dev, int change) static int hsr_ndo_vlan_rx_add_vid(struct net_device *dev, __be16 proto, u16 vid) { - bool is_slave_a_added = false; - bool is_slave_b_added = false; + struct net_device *slave_a_dev = NULL; + struct net_device *slave_b_dev = NULL; struct hsr_port *port; struct hsr_priv *hsr; int ret = 0; @@ -549,33 +549,35 @@ static int hsr_ndo_vlan_rx_add_vid(struct net_device *dev, switch (port->type) { case HSR_PT_SLAVE_A: if (ret) { - /* clean up Slave-B */ netdev_err(dev, "add vid failed for Slave-A\n"); - if (is_slave_b_added) - vlan_vid_del(port->dev, proto, vid); - return ret; + goto unwind; } - - is_slave_a_added = true; + slave_a_dev = port->dev; break; - case HSR_PT_SLAVE_B: if (ret) { - /* clean up Slave-A */ netdev_err(dev, "add vid failed for Slave-B\n"); - if (is_slave_a_added) - vlan_vid_del(port->dev, proto, vid); - return ret; + goto unwind; } - - is_slave_b_added = true; + slave_b_dev = port->dev; break; default: + if (ret) + goto unwind; break; } } return 0; + +unwind: + if (slave_a_dev) + vlan_vid_del(slave_a_dev, proto, vid); + + if (slave_b_dev) + vlan_vid_del(slave_b_dev, proto, vid); + + return ret; } static int hsr_ndo_vlan_rx_kill_vid(struct net_device *dev, From 4e453375561fc60820e6b9d8ebeb6b3ee177d42e Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 1 Apr 2026 15:47:21 +0000 Subject: [PATCH 400/401] ipv6: avoid overflows in ip6_datagram_send_ctl() Yiming Qian reported : I believe I found a locally triggerable kernel bug in the IPv6 sendmsg ancillary-data path that can panic the kernel via `skb_under_panic()` (local DoS). The core issue is a mismatch between: - a 16-bit length accumulator (`struct ipv6_txoptions::opt_flen`, type `__u16`) and - a pointer to the *last* provided destination-options header (`opt->dst1opt`) when multiple `IPV6_DSTOPTS` control messages (cmsgs) are provided. - `include/net/ipv6.h`: - `struct ipv6_txoptions::opt_flen` is `__u16` (wrap possible). (lines 291-307, especially 298) - `net/ipv6/datagram.c:ip6_datagram_send_ctl()`: - Accepts repeated `IPV6_DSTOPTS` and accumulates into `opt_flen` without rejecting duplicates. (lines 909-933) - `net/ipv6/ip6_output.c:__ip6_append_data()`: - Uses `opt->opt_flen + opt->opt_nflen` to compute header sizes/headroom decisions. (lines 1448-1466, especially 1463-1465) - `net/ipv6/ip6_output.c:__ip6_make_skb()`: - Calls `ipv6_push_frag_opts()` if `opt->opt_flen` is non-zero. (lines 1930-1934) - `net/ipv6/exthdrs.c:ipv6_push_frag_opts()` / `ipv6_push_exthdr()`: - Push size comes from `ipv6_optlen(opt->dst1opt)` (based on the pointed-to header). (lines 1179-1185 and 1206-1211) 1. `opt_flen` is a 16-bit accumulator: - `include/net/ipv6.h:298` defines `__u16 opt_flen; /* after fragment hdr */`. 2. `ip6_datagram_send_ctl()` accepts *repeated* `IPV6_DSTOPTS` cmsgs and increments `opt_flen` each time: - In `net/ipv6/datagram.c:909-933`, for `IPV6_DSTOPTS`: - It computes `len = ((hdr->hdrlen + 1) << 3);` - It checks `CAP_NET_RAW` using `ns_capable(net->user_ns, CAP_NET_RAW)`. (line 922) - Then it does: - `opt->opt_flen += len;` (line 927) - `opt->dst1opt = hdr;` (line 928) There is no duplicate rejection here (unlike the legacy `IPV6_2292DSTOPTS` path which rejects duplicates at `net/ipv6/datagram.c:901-904`). If enough large `IPV6_DSTOPTS` cmsgs are provided, `opt_flen` wraps while `dst1opt` still points to a large (2048-byte) destination-options header. In the attached PoC (`poc.c`): - 32 cmsgs with `hdrlen=255` => `len = (255+1)*8 = 2048` - 1 cmsg with `hdrlen=0` => `len = 8` - Total increment: `32*2048 + 8 = 65544`, so `(__u16)opt_flen == 8` - The last cmsg is 2048 bytes, so `dst1opt` points to a 2048-byte header. 3. The transmit path sizes headers using the wrapped `opt_flen`: - In `net/ipv6/ip6_output.c:1463-1465`: - `headersize = sizeof(struct ipv6hdr) + (opt ? opt->opt_flen + opt->opt_nflen : 0) + ...;` With wrapped `opt_flen`, `headersize`/headroom decisions underestimate what will be pushed later. 4. When building the final skb, the actual push length comes from `dst1opt` and is not limited by wrapped `opt_flen`: - In `net/ipv6/ip6_output.c:1930-1934`: - `if (opt->opt_flen) proto = ipv6_push_frag_opts(skb, opt, proto);` - In `net/ipv6/exthdrs.c:1206-1211`, `ipv6_push_frag_opts()` pushes `dst1opt` via `ipv6_push_exthdr()`. - In `net/ipv6/exthdrs.c:1179-1184`, `ipv6_push_exthdr()` does: - `skb_push(skb, ipv6_optlen(opt));` - `memcpy(h, opt, ipv6_optlen(opt));` With insufficient headroom, `skb_push()` underflows and triggers `skb_under_panic()` -> `BUG()`: - `net/core/skbuff.c:2669-2675` (`skb_push()` calls `skb_under_panic()`) - `net/core/skbuff.c:207-214` (`skb_panic()` ends in `BUG()`) - The `IPV6_DSTOPTS` cmsg path requires `CAP_NET_RAW` in the target netns user namespace (`ns_capable(net->user_ns, CAP_NET_RAW)`). - Root (or any task with `CAP_NET_RAW`) can trigger this without user namespaces. - An unprivileged `uid=1000` user can trigger this if unprivileged user namespaces are enabled and it can create a userns+netns to obtain namespaced `CAP_NET_RAW` (the attached PoC does this). - Local denial of service: kernel BUG/panic (system crash). - Reproducible with a small userspace PoC. This patch does not reject duplicated options, as this might break some user applications. Instead, it makes sure to adjust opt_flen and opt_nflen to correctly reflect the size of the current option headers, preventing the overflows and the potential for panics. This applies to IPV6_DSTOPTS, IPV6_HOPOPTS, and IPV6_RTHDR. Specifically: When a new IPV6_DSTOPTS is processed, the length of the old opt->dst1opt is subtracted from opt->opt_flen before adding the new length. When a new IPV6_HOPOPTS is processed, the length of the old opt->dst0opt is subtracted from opt->opt_nflen. When a new Routing Header (IPV6_RTHDR or IPV6_2292RTHDR) is processed, the length of the old opt->srcrt is subtracted from opt->opt_nflen. In the special case within IPV6_2292RTHDR handling where dst1opt is moved to dst0opt, the length of the old opt->dst0opt is subtracted from opt->opt_nflen before the new one is added. Fixes: 333fad5364d6 ("[IPV6]: Support several new sockopt / ancillary data in Advanced API (RFC3542).") Reported-by: Yiming Qian Closes: https://lore.kernel.org/netdev/CAL_bE8JNzawgr5OX5m+3jnQDHry2XxhQT5=jThW1zDPtUikRYA@mail.gmail.com/ Signed-off-by: Eric Dumazet Link: https://patch.msgid.link/20260401154721.3740056-1-edumazet@google.com Signed-off-by: Jakub Kicinski --- net/ipv6/datagram.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c index c564b68a0562..993e2d76fc1f 100644 --- a/net/ipv6/datagram.c +++ b/net/ipv6/datagram.c @@ -763,6 +763,7 @@ int ip6_datagram_send_ctl(struct net *net, struct sock *sk, { struct in6_pktinfo *src_info; struct cmsghdr *cmsg; + struct ipv6_rt_hdr *orthdr; struct ipv6_rt_hdr *rthdr; struct ipv6_opt_hdr *hdr; struct ipv6_txoptions *opt = ipc6->opt; @@ -924,9 +925,13 @@ int ip6_datagram_send_ctl(struct net *net, struct sock *sk, goto exit_f; } if (cmsg->cmsg_type == IPV6_DSTOPTS) { + if (opt->dst1opt) + opt->opt_flen -= ipv6_optlen(opt->dst1opt); opt->opt_flen += len; opt->dst1opt = hdr; } else { + if (opt->dst0opt) + opt->opt_nflen -= ipv6_optlen(opt->dst0opt); opt->opt_nflen += len; opt->dst0opt = hdr; } @@ -969,12 +974,17 @@ int ip6_datagram_send_ctl(struct net *net, struct sock *sk, goto exit_f; } + orthdr = opt->srcrt; + if (orthdr) + opt->opt_nflen -= ((orthdr->hdrlen + 1) << 3); opt->opt_nflen += len; opt->srcrt = rthdr; if (cmsg->cmsg_type == IPV6_2292RTHDR && opt->dst1opt) { int dsthdrlen = ((opt->dst1opt->hdrlen+1)<<3); + if (opt->dst0opt) + opt->opt_nflen -= ipv6_optlen(opt->dst0opt); opt->opt_nflen += dsthdrlen; opt->dst0opt = opt->dst1opt; opt->dst1opt = NULL; From ec7067e661193403a7a00980bda8612db5954142 Mon Sep 17 00:00:00 2001 From: Dimitri Daskalakis Date: Wed, 1 Apr 2026 09:28:48 -0700 Subject: [PATCH 401/401] eth: fbnic: Increase FBNIC_QUEUE_SIZE_MIN to 64 On systems with 64K pages, RX queues will be wedged if users set the descriptor count to the current minimum (16). Fbnic fragments large pages into 4K chunks, and scales down the ring size accordingly. With 64K pages and 16 descriptors, the ring size mask is 0 and will never be filled. 32 descriptors is another special case that wedges the RX rings. Internally, the rings track pages for the head/tail pointers, not page fragments. So with 32 descriptors, there's only 1 usable page as one ring slot is kept empty to disambiguate between an empty/full ring. As a result, the head pointer never advances and the HW stalls after consuming 16 page fragments. Fixes: 0cb4c0a13723 ("eth: fbnic: Implement Rx queue alloc/start/stop/free") Signed-off-by: Dimitri Daskalakis Link: https://patch.msgid.link/20260401162848.2335350-1-dimitri.daskalakis1@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/meta/fbnic/fbnic_txrx.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_txrx.h b/drivers/net/ethernet/meta/fbnic/fbnic_txrx.h index 980965274079..e03c9d2c38dc 100644 --- a/drivers/net/ethernet/meta/fbnic/fbnic_txrx.h +++ b/drivers/net/ethernet/meta/fbnic/fbnic_txrx.h @@ -38,7 +38,7 @@ struct fbnic_net; #define FBNIC_MAX_XDPQS 128u /* These apply to TWQs, TCQ, RCQ */ -#define FBNIC_QUEUE_SIZE_MIN 16u +#define FBNIC_QUEUE_SIZE_MIN 64u #define FBNIC_QUEUE_SIZE_MAX SZ_64K #define FBNIC_TXQ_SIZE_DEFAULT 1024