From 29d1f56c4f3001b7f547123e0a307c009ac717f8 Mon Sep 17 00:00:00 2001 From: Heiko Stuebner Date: Tue, 10 Feb 2026 13:01:42 +0100 Subject: [PATCH 001/248] Revert "arm64: dts: rockchip: Further describe the WiFi for the Pinebook Pro" This reverts commit 6d54d935062e2d4a7d3f779ceb9eeff108d0535d. It seems there are different variants of the Wifi chipset in use on the Pinebook Pro. And according to the reported regression - see Closes below, the reverted change causes issues with one Wifi chipset. The original commit message indicates a "further description" only and does not indicate this would fix an actual problem, so a revert should not cause further problems. Fixes: 6d54d935062e ("arm64: dts: rockchip: Further describe the WiFi for the Pinebook Pro") Cc: Jan Palus Cc: Peter Robinson Cc: Thorsten Leemhuis Cc: stable@vger.kernel.org Closes: https://lore.kernel.org/r/aUKOlj-RvTYlrpiS@rock.grzadka/ Tested-by: Jan Palus Reviewed-by: Dragan Simic Signed-off-by: Heiko Stuebner Link: https://patch.msgid.link/20260210120142.698512-1-heiko@sntech.de Signed-off-by: Heiko Stuebner --- .../boot/dts/rockchip/rk3399-pinebook-pro.dts | 18 ------------------ 1 file changed, 18 deletions(-) diff --git a/arch/arm64/boot/dts/rockchip/rk3399-pinebook-pro.dts b/arch/arm64/boot/dts/rockchip/rk3399-pinebook-pro.dts index 753d51344954..ae937a3afa11 100644 --- a/arch/arm64/boot/dts/rockchip/rk3399-pinebook-pro.dts +++ b/arch/arm64/boot/dts/rockchip/rk3399-pinebook-pro.dts @@ -879,12 +879,6 @@ vcc5v0_host_en_pin: vcc5v0-host-en-pin { }; }; - wifi { - wifi_host_wake_l: wifi-host-wake-l { - rockchip,pins = <0 RK_PA3 RK_FUNC_GPIO &pcfg_pull_none>; - }; - }; - wireless-bluetooth { bt_wake_pin: bt-wake-pin { rockchip,pins = <2 RK_PD3 RK_FUNC_GPIO &pcfg_pull_none>; @@ -942,19 +936,7 @@ &sdio0 { pinctrl-names = "default"; pinctrl-0 = <&sdio0_bus4 &sdio0_cmd &sdio0_clk>; sd-uhs-sdr104; - #address-cells = <1>; - #size-cells = <0>; status = "okay"; - - brcmf: wifi@1 { - compatible = "brcm,bcm4329-fmac"; - reg = <1>; - interrupt-parent = <&gpio0>; - interrupts = ; - interrupt-names = "host-wake"; - pinctrl-names = "default"; - pinctrl-0 = <&wifi_host_wake_l>; - }; }; &sdhci { From 7ec1bd3d9be671d04325b9e06149b8813f6a4836 Mon Sep 17 00:00:00 2001 From: Potin Lai Date: Thu, 22 Jan 2026 16:37:56 +0800 Subject: [PATCH 002/248] soc: aspeed: socinfo: Mask table entries for accurate SoC ID matching The siliconid_to_name() function currently masks the input silicon ID with 0xff00ffff, but compares it against unmasked table entries. This causes matching to fail if the table entries contain non-zero values in the bits covered by the mask (bits 16-23). Update the logic to apply the 0xff00ffff mask to the table entries during comparison. This ensures that only the relevant model and revision bits are considered, providing a consistent match across different manufacturing batches. [arj: Add Fixes: tag, fix 'soninfo' typo, clarify function reference] Fixes: e0218dca5787 ("soc: aspeed: Add soc info driver") Signed-off-by: Potin Lai Link: https://patch.msgid.link/20260122-soc_aspeed_name_fix-v1-1-33a847f2581c@gmail.com Signed-off-by: Andrew Jeffery --- drivers/soc/aspeed/aspeed-socinfo.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/soc/aspeed/aspeed-socinfo.c b/drivers/soc/aspeed/aspeed-socinfo.c index 5e34e01ad26d..fb8fde94b651 100644 --- a/drivers/soc/aspeed/aspeed-socinfo.c +++ b/drivers/soc/aspeed/aspeed-socinfo.c @@ -39,7 +39,7 @@ static const char *siliconid_to_name(u32 siliconid) unsigned int i; for (i = 0 ; i < ARRAY_SIZE(rev_table) ; ++i) { - if (rev_table[i].id == id) + if ((rev_table[i].id & 0xff00ffff) == id) return rev_table[i].name; } From 3ecea84d2b90bbf934d5ca75514fa902fd71e03f Mon Sep 17 00:00:00 2001 From: Daniel J Blueman Date: Fri, 20 Feb 2026 20:44:58 +0800 Subject: [PATCH 003/248] arm64: dts: qcom: hamoa/x1: fix idle exit latency Designs based on the Qualcomm X1 Hamoa reference platform report: driver: Idle state 1 target residency too low This is because the declared X1 idle entry plus exit latency of 680us exceeds the declared minimum 600us residency time: entry-latency-us = <180>; exit-latency-us = <500>; min-residency-us = <600>; Fix this to be 320us so the sum of the entry and exit latencies matches the downstream 500us exit latency, as directed by Maulik. Tested on a Lenovo Yoga Slim 7x with Qualcomm X1E-80-100. Fixes: 2e65616ef07f ("arm64: dts: qcom: x1e80100: Update C4/C5 residency/exit numbers") Signed-off-by: Daniel J Blueman Reviewed-by: Konrad Dybcio Link: https://lore.kernel.org/r/20260220124626.8611-1-daniel@quora.org Signed-off-by: Bjorn Andersson --- arch/arm64/boot/dts/qcom/hamoa.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/boot/dts/qcom/hamoa.dtsi b/arch/arm64/boot/dts/qcom/hamoa.dtsi index db65c392e618..4b0784af4bd3 100644 --- a/arch/arm64/boot/dts/qcom/hamoa.dtsi +++ b/arch/arm64/boot/dts/qcom/hamoa.dtsi @@ -269,7 +269,7 @@ cluster_c4: cpu-sleep-0 { idle-state-name = "ret"; arm,psci-suspend-param = <0x00000004>; entry-latency-us = <180>; - exit-latency-us = <500>; + exit-latency-us = <320>; min-residency-us = <600>; }; }; From b7df21c59739cceb7b866c6c5e8a6ba03875ab71 Mon Sep 17 00:00:00 2001 From: Ravi Hothi Date: Fri, 20 Feb 2026 14:32:20 +0530 Subject: [PATCH 004/248] arm64: dts: qcom: qcm6490-idp: Fix WCD9370 reset GPIO polarity The WCD9370 audio codec reset line on QCM6490 IDP should be active-low, but the device tree described it as active-high. As a result, the codec is kept in reset and fails to reset the SoundWire, leading to timeouts and ASoC card probe failure (-ETIMEDOUT). Fix the reset GPIO polarity to GPIO_ACTIVE_LOW so the codec can properly initialize. Fixes: aa04c298619f ("arm64: dts: qcom: qcm6490-idp: Add WSA8830 speakers and WCD9370 headset codec") Signed-off-by: Ravi Hothi Reviewed-by: Krzysztof Kozlowski Reviewed-by: Konrad Dybcio Link: https://lore.kernel.org/r/20260220090220.2992193-1-ravi.hothi@oss.qualcomm.com Signed-off-by: Bjorn Andersson --- arch/arm64/boot/dts/qcom/qcm6490-idp.dts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/boot/dts/qcom/qcm6490-idp.dts b/arch/arm64/boot/dts/qcom/qcm6490-idp.dts index 089a027c57d5..b2f00e107643 100644 --- a/arch/arm64/boot/dts/qcom/qcm6490-idp.dts +++ b/arch/arm64/boot/dts/qcom/qcm6490-idp.dts @@ -177,7 +177,7 @@ wcd9370: audio-codec-0 { pinctrl-0 = <&wcd_default>; pinctrl-names = "default"; - reset-gpios = <&tlmm 83 GPIO_ACTIVE_HIGH>; + reset-gpios = <&tlmm 83 GPIO_ACTIVE_LOW>; vdd-buck-supply = <&vreg_l17b_1p7>; vdd-rxtx-supply = <&vreg_l18b_1p8>; From 1f99b5d93d99ca17d50b386a674d0ce1f20932d8 Mon Sep 17 00:00:00 2001 From: Sebastian Krzyszkowiak Date: Wed, 28 Jan 2026 00:28:28 +0100 Subject: [PATCH 005/248] arm64: dts: imx8mq: Set the correct gpu_ahb clock frequency According to i.MX 8M Quad Reference Manual, GPU_AHB_CLK_ROOT's maximum frequency is 400MHz. Fixes: 45d2c84eb3a2 ("arm64: dts: imx8mq: add GPU node") Reviewed-by: Frank Li Signed-off-by: Sebastian Krzyszkowiak Reviewed-by: Peng Fan Reviewed-by: Fabio Estevam Signed-off-by: Frank Li --- arch/arm64/boot/dts/freescale/imx8mq.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/boot/dts/freescale/imx8mq.dtsi b/arch/arm64/boot/dts/freescale/imx8mq.dtsi index 607962f807be..6a25e219832c 100644 --- a/arch/arm64/boot/dts/freescale/imx8mq.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8mq.dtsi @@ -1632,7 +1632,7 @@ gpu: gpu@38000000 { <&clk IMX8MQ_GPU_PLL_OUT>, <&clk IMX8MQ_GPU_PLL>; assigned-clock-rates = <800000000>, <800000000>, - <800000000>, <800000000>, <0>; + <800000000>, <400000000>, <0>; power-domains = <&pgc_gpu>; }; From 08903184553def7ba1ad6ba4fa8afe1ba2ee0a21 Mon Sep 17 00:00:00 2001 From: Luke Wang Date: Tue, 3 Feb 2026 19:23:08 +0800 Subject: [PATCH 006/248] arm64: dts: imx93-9x9-qsb: change usdhc tuning step for eMMC and SD During system resume, the following errors occurred: [ 430.638625] mmc1: error -84 writing Cache Enable bit [ 430.643618] mmc1: error -84 doing runtime resume For eMMC and SD, there are two tuning pass windows and the gap between those two windows may only have one cell. If tuning step > 1, the gap may just be skipped and host assumes those two windows as a continuous windows. This will cause a wrong delay cell near the gap to be selected. Set the tuning step to 1 to avoid selecting the wrong delay cell. For SDIO, the gap is sufficiently large, so the default tuning step does not cause this issue. Fixes: 0565d20cd8c2 ("arm64: dts: freescale: Support i.MX93 9x9 Quick Start Board") Signed-off-by: Luke Wang Reviewed-by: Frank Li Signed-off-by: Frank Li --- arch/arm64/boot/dts/freescale/imx93-9x9-qsb.dts | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/arm64/boot/dts/freescale/imx93-9x9-qsb.dts b/arch/arm64/boot/dts/freescale/imx93-9x9-qsb.dts index 0852067eab2c..197c8f8b7f66 100644 --- a/arch/arm64/boot/dts/freescale/imx93-9x9-qsb.dts +++ b/arch/arm64/boot/dts/freescale/imx93-9x9-qsb.dts @@ -507,6 +507,7 @@ &usdhc1 { pinctrl-2 = <&pinctrl_usdhc1_200mhz>; bus-width = <8>; non-removable; + fsl,tuning-step = <1>; status = "okay"; }; @@ -519,6 +520,7 @@ &usdhc2 { vmmc-supply = <®_usdhc2_vmmc>; bus-width = <4>; no-mmc; + fsl,tuning-step = <1>; status = "okay"; }; From 44db7bc66eb38e85bb32777c5fd3a4e7baa84147 Mon Sep 17 00:00:00 2001 From: Markus Niebel Date: Mon, 9 Feb 2026 16:50:13 +0100 Subject: [PATCH 007/248] arm64: dts: imx91-tqma9131: improve eMMC pad configuration MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Use DSE x4 an PullUp for CMD an DAT, DSE x4 and PullDown for CLK to improve stability and detection at low temperatures under -25°C. Fixes: e71db39f0c7c ("arm64: dts: freescale: add initial device tree for TQMa91xx/MBa91xxCA") Signed-off-by: Markus Niebel Signed-off-by: Alexander Stein Reviewed-by: Frank Li Signed-off-by: Frank Li --- .../boot/dts/freescale/imx91-tqma9131.dtsi | 20 +++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/arch/arm64/boot/dts/freescale/imx91-tqma9131.dtsi b/arch/arm64/boot/dts/freescale/imx91-tqma9131.dtsi index 5792952b7a8e..c99d7bc16848 100644 --- a/arch/arm64/boot/dts/freescale/imx91-tqma9131.dtsi +++ b/arch/arm64/boot/dts/freescale/imx91-tqma9131.dtsi @@ -272,20 +272,20 @@ pinctrl_reg_usdhc2_vmmc: regusdhc2vmmcgrp { /* enable SION for data and cmd pad due to ERR052021 */ pinctrl_usdhc1: usdhc1grp { fsl,pins = /* PD | FSEL 3 | DSE X5 */ - , + , /* HYS | FSEL 0 | no drive */ , /* HYS | FSEL 3 | X5 */ - , + , /* HYS | FSEL 3 | X4 */ - , - , - , - , - , - , - , - ; + , + , + , + , + , + , + , + ; }; pinctrl_wdog: wdoggrp { From b6c94c71f349479b76fcc0ef0dc7147f3f326dff Mon Sep 17 00:00:00 2001 From: Markus Niebel Date: Mon, 9 Feb 2026 16:50:14 +0100 Subject: [PATCH 008/248] arm64: dts: imx93-tqma9352: improve eMMC pad configuration MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Use DSE x4 an PullUp for CMD an DAT, DSE x4 and PullDown for CLK to improve stability and detection at low temperatures under -25°C. Fixes: 0b5fdfaa8e45 ("arm64: dts: freescale: imx93-tqma9352: set SION for cmd and data pad of USDHC") Signed-off-by: Markus Niebel Signed-off-by: Alexander Stein Reviewed-by: Frank Li Signed-off-by: Frank Li --- .../boot/dts/freescale/imx93-tqma9352.dtsi | 26 +++++++++---------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/arch/arm64/boot/dts/freescale/imx93-tqma9352.dtsi b/arch/arm64/boot/dts/freescale/imx93-tqma9352.dtsi index 3a23e2eb9feb..ce34a296495c 100644 --- a/arch/arm64/boot/dts/freescale/imx93-tqma9352.dtsi +++ b/arch/arm64/boot/dts/freescale/imx93-tqma9352.dtsi @@ -271,21 +271,21 @@ MX93_PAD_SD2_RESET_B__GPIO3_IO07 0x106 /* enable SION for data and cmd pad due to ERR052021 */ pinctrl_usdhc1: usdhc1grp { fsl,pins = < - /* PD | FSEL 3 | DSE X5 */ - MX93_PAD_SD1_CLK__USDHC1_CLK 0x5be + /* PD | FSEL 3 | DSE X4 */ + MX93_PAD_SD1_CLK__USDHC1_CLK 0x59e /* HYS | FSEL 0 | no drive */ MX93_PAD_SD1_STROBE__USDHC1_STROBE 0x1000 - /* HYS | FSEL 3 | X5 */ - MX93_PAD_SD1_CMD__USDHC1_CMD 0x400011be - /* HYS | FSEL 3 | X4 */ - MX93_PAD_SD1_DATA0__USDHC1_DATA0 0x4000119e - MX93_PAD_SD1_DATA1__USDHC1_DATA1 0x4000119e - MX93_PAD_SD1_DATA2__USDHC1_DATA2 0x4000119e - MX93_PAD_SD1_DATA3__USDHC1_DATA3 0x4000119e - MX93_PAD_SD1_DATA4__USDHC1_DATA4 0x4000119e - MX93_PAD_SD1_DATA5__USDHC1_DATA5 0x4000119e - MX93_PAD_SD1_DATA6__USDHC1_DATA6 0x4000119e - MX93_PAD_SD1_DATA7__USDHC1_DATA7 0x4000119e + /* HYS | PU | FSEL 3 | DSE X4 */ + MX93_PAD_SD1_CMD__USDHC1_CMD 0x4000139e + /* HYS | PU | FSEL 3 | DSE X4 */ + MX93_PAD_SD1_DATA0__USDHC1_DATA0 0x4000139e + MX93_PAD_SD1_DATA1__USDHC1_DATA1 0x4000139e + MX93_PAD_SD1_DATA2__USDHC1_DATA2 0x4000139e + MX93_PAD_SD1_DATA3__USDHC1_DATA3 0x4000139e + MX93_PAD_SD1_DATA4__USDHC1_DATA4 0x4000139e + MX93_PAD_SD1_DATA5__USDHC1_DATA5 0x4000139e + MX93_PAD_SD1_DATA6__USDHC1_DATA6 0x4000139e + MX93_PAD_SD1_DATA7__USDHC1_DATA7 0x4000139e >; }; From 5b2a16ab0dbd090dc545c05ee79a077cc7a9c1e0 Mon Sep 17 00:00:00 2001 From: Loic Poulain Date: Mon, 2 Feb 2026 16:56:11 +0100 Subject: [PATCH 009/248] arm64: dts: qcom: monaco: Fix UART10 pinconf UART10 RTS and TX pins were incorrectly mapped to gpio84 and gpio85. Correct them to gpio85 (RTS) and gpio86 (TX) to match the hardware I/O mapping. Fixes: 467284a3097f ("arm64: dts: qcom: qcs8300: Add QUPv3 configuration") Signed-off-by: Loic Poulain Reviewed-by: Konrad Dybcio Reviewed-by: Dmitry Baryshkov Link: https://lore.kernel.org/r/20260202155611.1568-1-loic.poulain@oss.qualcomm.com Signed-off-by: Bjorn Andersson --- arch/arm64/boot/dts/qcom/monaco.dtsi | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/boot/dts/qcom/monaco.dtsi b/arch/arm64/boot/dts/qcom/monaco.dtsi index 5d2df4305d1c..dbb2273b0ee8 100644 --- a/arch/arm64/boot/dts/qcom/monaco.dtsi +++ b/arch/arm64/boot/dts/qcom/monaco.dtsi @@ -6414,12 +6414,12 @@ qup_uart10_cts: qup-uart10-cts-state { }; qup_uart10_rts: qup-uart10-rts-state { - pins = "gpio84"; + pins = "gpio85"; function = "qup1_se2"; }; qup_uart10_tx: qup-uart10-tx-state { - pins = "gpio85"; + pins = "gpio86"; function = "qup1_se2"; }; From 57d2371d52be1d574b33382bfbf8052485b99d8b Mon Sep 17 00:00:00 2001 From: Zhang Rui Date: Wed, 4 Mar 2026 08:30:12 +0800 Subject: [PATCH 010/248] tools/power turbostat: Fix illegal memory access when SMT is present and disabled When SMT is present and disabled, turbostat may under-size the thread_data array. This can corrupt results or cause turbostat to exit with a segmentation fault. [lenb: commit message] Fixes: a2b4d0f8bf07 ("tools/power turbostat: Favor cpu# over core#") Signed-off-by: Zhang Rui Signed-off-by: Len Brown --- tools/power/x86/turbostat/turbostat.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index 1a2671c28209..ae827485950d 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -9702,13 +9702,12 @@ void allocate_counters(struct counters *counters) { int i; int num_cores = topo.cores_per_node * topo.nodes_per_pkg * topo.num_packages; - int num_threads = topo.threads_per_core * num_cores; - counters->threads = calloc(num_threads, sizeof(struct thread_data)); + counters->threads = calloc(topo.max_cpu_num + 1, sizeof(struct thread_data)); if (counters->threads == NULL) goto error; - for (i = 0; i < num_threads; i++) + for (i = 0; i < topo.max_cpu_num + 1; i++) (counters->threads)[i].cpu_id = -1; counters->cores = calloc(num_cores, sizeof(struct core_data)); From 2619da73bb2f10d88f7e1087125c40144fdf0987 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Thu, 5 Mar 2026 20:49:55 +0100 Subject: [PATCH 011/248] KVM: x86: Use __DECLARE_FLEX_ARRAY() for UAPI structures with VLAs Commit 94dfc73e7cf4 ("treewide: uapi: Replace zero-length arrays with flexible-array members") broke the userspace API for C++. These structures ending in VLAs are typically a *header*, which can be followed by an arbitrary number of entries. Userspace typically creates a larger structure with some non-zero number of entries, for example in QEMU's kvm_arch_get_supported_msr_feature(): struct { struct kvm_msrs info; struct kvm_msr_entry entries[1]; } msr_data = {}; While that works in C, it fails in C++ with an error like: flexible array member 'kvm_msrs::entries' not at end of 'struct msr_data' Fix this by using __DECLARE_FLEX_ARRAY() for the VLA, which uses [0] for C++ compilation. Fixes: 94dfc73e7cf4 ("treewide: uapi: Replace zero-length arrays with flexible-array members") Cc: stable@vger.kernel.org Signed-off-by: David Woodhouse Link: https://patch.msgid.link/3abaf6aefd6e5efeff3b860ac38421d9dec908db.camel@infradead.org [sean: tag for stable@] Signed-off-by: Sean Christopherson --- arch/x86/include/uapi/asm/kvm.h | 12 ++++++------ include/uapi/linux/kvm.h | 11 ++++++----- 2 files changed, 12 insertions(+), 11 deletions(-) diff --git a/arch/x86/include/uapi/asm/kvm.h b/arch/x86/include/uapi/asm/kvm.h index 0d4538fa6c31..5f2b30d0405c 100644 --- a/arch/x86/include/uapi/asm/kvm.h +++ b/arch/x86/include/uapi/asm/kvm.h @@ -197,13 +197,13 @@ struct kvm_msrs { __u32 nmsrs; /* number of msrs in entries */ __u32 pad; - struct kvm_msr_entry entries[]; + __DECLARE_FLEX_ARRAY(struct kvm_msr_entry, entries); }; /* for KVM_GET_MSR_INDEX_LIST */ struct kvm_msr_list { __u32 nmsrs; /* number of msrs in entries */ - __u32 indices[]; + __DECLARE_FLEX_ARRAY(__u32, indices); }; /* Maximum size of any access bitmap in bytes */ @@ -245,7 +245,7 @@ struct kvm_cpuid_entry { struct kvm_cpuid { __u32 nent; __u32 padding; - struct kvm_cpuid_entry entries[]; + __DECLARE_FLEX_ARRAY(struct kvm_cpuid_entry, entries); }; struct kvm_cpuid_entry2 { @@ -267,7 +267,7 @@ struct kvm_cpuid_entry2 { struct kvm_cpuid2 { __u32 nent; __u32 padding; - struct kvm_cpuid_entry2 entries[]; + __DECLARE_FLEX_ARRAY(struct kvm_cpuid_entry2, entries); }; /* for KVM_GET_PIT and KVM_SET_PIT */ @@ -398,7 +398,7 @@ struct kvm_xsave { * the contents of CPUID leaf 0xD on the host. */ __u32 region[1024]; - __u32 extra[]; + __DECLARE_FLEX_ARRAY(__u32, extra); }; #define KVM_MAX_XCRS 16 @@ -566,7 +566,7 @@ struct kvm_pmu_event_filter { __u32 fixed_counter_bitmap; __u32 flags; __u32 pad[4]; - __u64 events[]; + __DECLARE_FLEX_ARRAY(__u64, events); }; #define KVM_PMU_EVENT_ALLOW 0 diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 80364d4dbebb..3f0d8d3c3daf 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -11,6 +11,7 @@ #include #include #include +#include #include #include @@ -542,7 +543,7 @@ struct kvm_coalesced_mmio { struct kvm_coalesced_mmio_ring { __u32 first, last; - struct kvm_coalesced_mmio coalesced_mmio[]; + __DECLARE_FLEX_ARRAY(struct kvm_coalesced_mmio, coalesced_mmio); }; #define KVM_COALESCED_MMIO_MAX \ @@ -592,7 +593,7 @@ struct kvm_clear_dirty_log { /* for KVM_SET_SIGNAL_MASK */ struct kvm_signal_mask { __u32 len; - __u8 sigset[]; + __DECLARE_FLEX_ARRAY(__u8, sigset); }; /* for KVM_TPR_ACCESS_REPORTING */ @@ -1051,7 +1052,7 @@ struct kvm_irq_routing_entry { struct kvm_irq_routing { __u32 nr; __u32 flags; - struct kvm_irq_routing_entry entries[]; + __DECLARE_FLEX_ARRAY(struct kvm_irq_routing_entry, entries); }; #define KVM_IRQFD_FLAG_DEASSIGN (1 << 0) @@ -1142,7 +1143,7 @@ struct kvm_dirty_tlb { struct kvm_reg_list { __u64 n; /* number of regs */ - __u64 reg[]; + __DECLARE_FLEX_ARRAY(__u64, reg); }; struct kvm_one_reg { @@ -1608,7 +1609,7 @@ struct kvm_stats_desc { #ifdef __KERNEL__ char name[KVM_STATS_NAME_SIZE]; #else - char name[]; + __DECLARE_FLEX_ARRAY(char, name); #endif }; From 641f6fda143b879da1515f821ee475073678cf2a Mon Sep 17 00:00:00 2001 From: Mukesh Ojha Date: Thu, 29 Jan 2026 20:53:20 +0530 Subject: [PATCH 012/248] soc: qcom: pd-mapper: Fix element length in servreg_loc_pfr_req_ei It looks element length declared in servreg_loc_pfr_req_ei for reason not matching servreg_loc_pfr_req's reason field due which we could observe decoding error on PD crash. qmi_decode_string_elem: String len 81 >= Max Len 65 Fix this by matching with servreg_loc_pfr_req's reason field. Fixes: 1ebcde047c54 ("soc: qcom: add pd-mapper implementation") Signed-off-by: Mukesh Ojha Reviewed-by: Dmitry Baryshkov Tested-by: Nikita Travkin Link: https://lore.kernel.org/r/20260129152320.3658053-2-mukesh.ojha@oss.qualcomm.com Signed-off-by: Bjorn Andersson --- drivers/soc/qcom/pdr_internal.h | 2 +- drivers/soc/qcom/qcom_pdr_msg.c | 2 +- include/linux/soc/qcom/pdr.h | 1 + 3 files changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/soc/qcom/pdr_internal.h b/drivers/soc/qcom/pdr_internal.h index 039508c1bbf7..047c0160b617 100644 --- a/drivers/soc/qcom/pdr_internal.h +++ b/drivers/soc/qcom/pdr_internal.h @@ -84,7 +84,7 @@ struct servreg_set_ack_resp { struct servreg_loc_pfr_req { char service[SERVREG_NAME_LENGTH + 1]; - char reason[257]; + char reason[SERVREG_PFR_LENGTH + 1]; }; struct servreg_loc_pfr_resp { diff --git a/drivers/soc/qcom/qcom_pdr_msg.c b/drivers/soc/qcom/qcom_pdr_msg.c index ca98932140d8..02022b11ecf0 100644 --- a/drivers/soc/qcom/qcom_pdr_msg.c +++ b/drivers/soc/qcom/qcom_pdr_msg.c @@ -325,7 +325,7 @@ const struct qmi_elem_info servreg_loc_pfr_req_ei[] = { }, { .data_type = QMI_STRING, - .elem_len = SERVREG_NAME_LENGTH + 1, + .elem_len = SERVREG_PFR_LENGTH + 1, .elem_size = sizeof(char), .array_type = VAR_LEN_ARRAY, .tlv_type = 0x02, diff --git a/include/linux/soc/qcom/pdr.h b/include/linux/soc/qcom/pdr.h index 83a8ea612e69..2b7691e47c2a 100644 --- a/include/linux/soc/qcom/pdr.h +++ b/include/linux/soc/qcom/pdr.h @@ -5,6 +5,7 @@ #include #define SERVREG_NAME_LENGTH 64 +#define SERVREG_PFR_LENGTH 256 struct pdr_service; struct pdr_handle; From d487085006109e5981e059476818243759d2e925 Mon Sep 17 00:00:00 2001 From: Konrad Dybcio Date: Fri, 6 Mar 2026 12:20:14 +0100 Subject: [PATCH 013/248] soc: qcom: pmic_glink_altmode: Fix SVID=DP && unconnected edge case The commit referenced in Fixes started evaluating the value of alt_port->mux_ctrl before checking the active SVID. This led to drm_aux_hpd_bridge_notify() no longer being called for the 'DP unplug' case. Perhaps somewhat interestingly, the firmware sends a notification with SVID=DP, mux_ctrl=MUX_CTRL_STATE_NO_CONN and pin_assignment=0 on unplug. 'pin_assignment' was previously interpreted as a bitfield excerpt from the second byte of the DP pg_altmode payload (and stored as an u8). That value is used in pmic_glink_altmode_sc8280xp_notify(), decremented by 1 (DPAM_HPD_A). Previously, this would result in an u8 underflow that would rollover to 0xff (which prior to the Fixes patch would have caused a pmic_glink_altmode_safe() and 'disconnected' bridge notification). That check was removed, without a replacement. Resolve this issue by making sure the SID=DP && mux_ctrl=NO_CONN combo once again results in a HPD bridge notification. Fixes: 0539c5a6fdef ("soc: qcom: pmic_glink_altmode: Consume TBT3/USB4 mode notifications") Reported-by: Abel Vesa Tested-by: Abel Vesa Signed-off-by: Konrad Dybcio Link: https://lore.kernel.org/r/20260306-topic-pgaltmode_fixup-v1-1-ec154b2d8e89@oss.qualcomm.com Signed-off-by: Bjorn Andersson --- drivers/soc/qcom/pmic_glink_altmode.c | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/drivers/soc/qcom/pmic_glink_altmode.c b/drivers/soc/qcom/pmic_glink_altmode.c index d0afdcb96ee1..b496b88842a2 100644 --- a/drivers/soc/qcom/pmic_glink_altmode.c +++ b/drivers/soc/qcom/pmic_glink_altmode.c @@ -350,15 +350,17 @@ static void pmic_glink_altmode_worker(struct work_struct *work) typec_switch_set(alt_port->typec_switch, alt_port->orientation); - if (alt_port->mux_ctrl == MUX_CTRL_STATE_NO_CONN) { - pmic_glink_altmode_safe(altmode, alt_port); - } else if (alt_port->svid == USB_TYPEC_TBT_SID) { + if (alt_port->svid == USB_TYPEC_TBT_SID) { pmic_glink_altmode_enable_tbt(altmode, alt_port); } else if (alt_port->svid == USB_TYPEC_DP_SID) { - pmic_glink_altmode_enable_dp(altmode, alt_port, - alt_port->mode, - alt_port->hpd_state, - alt_port->hpd_irq); + if (alt_port->mux_ctrl == MUX_CTRL_STATE_NO_CONN) { + pmic_glink_altmode_safe(altmode, alt_port); + } else { + pmic_glink_altmode_enable_dp(altmode, alt_port, + alt_port->mode, + alt_port->hpd_state, + alt_port->hpd_irq); + } if (alt_port->hpd_state) conn_status = connector_status_connected; @@ -368,6 +370,8 @@ static void pmic_glink_altmode_worker(struct work_struct *work) drm_aux_hpd_bridge_notify(&alt_port->bridge->dev, conn_status); } else if (alt_port->mux_ctrl == MUX_CTRL_STATE_TUNNELING) { pmic_glink_altmode_enable_usb4(altmode, alt_port); + } else if (alt_port->mux_ctrl == MUX_CTRL_STATE_NO_CONN) { + pmic_glink_altmode_safe(altmode, alt_port); } else { pmic_glink_altmode_enable_usb(altmode, alt_port); } From d56c9cca2e7f8f0ad91f37abd4e6357c2318b0db Mon Sep 17 00:00:00 2001 From: Even Xu Date: Tue, 17 Mar 2026 13:56:28 +0800 Subject: [PATCH 014/248] HID: Intel-thc-hid: Intel-quicki2c: Add NVL Device IDs Add Nova Lake THC QuickI2C device IDs to support list. Signed-off-by: Even Xu Signed-off-by: Jiri Kosina --- drivers/hid/intel-thc-hid/intel-quicki2c/pci-quicki2c.c | 7 +++++++ drivers/hid/intel-thc-hid/intel-quicki2c/quicki2c-dev.h | 4 ++++ 2 files changed, 11 insertions(+) diff --git a/drivers/hid/intel-thc-hid/intel-quicki2c/pci-quicki2c.c b/drivers/hid/intel-thc-hid/intel-quicki2c/pci-quicki2c.c index f178017352ba..46d3e9a01999 100644 --- a/drivers/hid/intel-thc-hid/intel-quicki2c/pci-quicki2c.c +++ b/drivers/hid/intel-thc-hid/intel-quicki2c/pci-quicki2c.c @@ -26,6 +26,11 @@ static struct quicki2c_ddata ptl_ddata = { .max_interrupt_delay = MAX_RX_INTERRUPT_DELAY, }; +static struct quicki2c_ddata nvl_ddata = { + .max_detect_size = MAX_RX_DETECT_SIZE_NVL, + .max_interrupt_delay = MAX_RX_INTERRUPT_DELAY, +}; + /* THC QuickI2C ACPI method to get device properties */ /* HIDI2C device method */ static guid_t i2c_hid_guid = @@ -1032,6 +1037,8 @@ static const struct pci_device_id quicki2c_pci_tbl[] = { { PCI_DEVICE_DATA(INTEL, THC_PTL_U_DEVICE_ID_I2C_PORT2, &ptl_ddata) }, { PCI_DEVICE_DATA(INTEL, THC_WCL_DEVICE_ID_I2C_PORT1, &ptl_ddata) }, { PCI_DEVICE_DATA(INTEL, THC_WCL_DEVICE_ID_I2C_PORT2, &ptl_ddata) }, + { PCI_DEVICE_DATA(INTEL, THC_NVL_H_DEVICE_ID_I2C_PORT1, &nvl_ddata) }, + { PCI_DEVICE_DATA(INTEL, THC_NVL_H_DEVICE_ID_I2C_PORT2, &nvl_ddata) }, { } }; MODULE_DEVICE_TABLE(pci, quicki2c_pci_tbl); diff --git a/drivers/hid/intel-thc-hid/intel-quicki2c/quicki2c-dev.h b/drivers/hid/intel-thc-hid/intel-quicki2c/quicki2c-dev.h index 33a1e3db1cb2..61dbdece59a1 100644 --- a/drivers/hid/intel-thc-hid/intel-quicki2c/quicki2c-dev.h +++ b/drivers/hid/intel-thc-hid/intel-quicki2c/quicki2c-dev.h @@ -15,6 +15,8 @@ #define PCI_DEVICE_ID_INTEL_THC_PTL_U_DEVICE_ID_I2C_PORT2 0xE44A #define PCI_DEVICE_ID_INTEL_THC_WCL_DEVICE_ID_I2C_PORT1 0x4D48 #define PCI_DEVICE_ID_INTEL_THC_WCL_DEVICE_ID_I2C_PORT2 0x4D4A +#define PCI_DEVICE_ID_INTEL_THC_NVL_H_DEVICE_ID_I2C_PORT1 0xD348 +#define PCI_DEVICE_ID_INTEL_THC_NVL_H_DEVICE_ID_I2C_PORT2 0xD34A /* Packet size value, the unit is 16 bytes */ #define MAX_PACKET_SIZE_VALUE_LNL 256 @@ -40,6 +42,8 @@ /* PTL Max packet size detection capability is 255 Bytes */ #define MAX_RX_DETECT_SIZE_PTL 255 +/* NVL Max packet size detection capability is 64K Bytes */ +#define MAX_RX_DETECT_SIZE_NVL 65535 /* Max interrupt delay capability is 2.56ms */ #define MAX_RX_INTERRUPT_DELAY 256 From 48e91af0cbe942d50ef6257d850accdca1d01378 Mon Sep 17 00:00:00 2001 From: Even Xu Date: Tue, 17 Mar 2026 13:56:29 +0800 Subject: [PATCH 015/248] HID: Intel-thc-hid: Intel-quickspi: Add NVL Device IDs Add Nova Lake THC QuickSPI device IDs to support list. Signed-off-by: Even Xu Signed-off-by: Jiri Kosina --- drivers/hid/intel-thc-hid/intel-quickspi/pci-quickspi.c | 6 ++++++ drivers/hid/intel-thc-hid/intel-quickspi/quickspi-dev.h | 2 ++ 2 files changed, 8 insertions(+) diff --git a/drivers/hid/intel-thc-hid/intel-quickspi/pci-quickspi.c b/drivers/hid/intel-thc-hid/intel-quickspi/pci-quickspi.c index ad6bd59963b2..b6a69995692c 100644 --- a/drivers/hid/intel-thc-hid/intel-quickspi/pci-quickspi.c +++ b/drivers/hid/intel-thc-hid/intel-quickspi/pci-quickspi.c @@ -37,6 +37,10 @@ struct quickspi_driver_data arl = { .max_packet_size_value = MAX_PACKET_SIZE_VALUE_MTL, }; +struct quickspi_driver_data nvl = { + .max_packet_size_value = MAX_PACKET_SIZE_VALUE_LNL, +}; + /* THC QuickSPI ACPI method to get device properties */ /* HIDSPI Method: {6e2ac436-0fcf-41af-a265-b32a220dcfab} */ static guid_t hidspi_guid = @@ -982,6 +986,8 @@ static const struct pci_device_id quickspi_pci_tbl[] = { {PCI_DEVICE_DATA(INTEL, THC_WCL_DEVICE_ID_SPI_PORT2, &ptl), }, {PCI_DEVICE_DATA(INTEL, THC_ARL_DEVICE_ID_SPI_PORT1, &arl), }, {PCI_DEVICE_DATA(INTEL, THC_ARL_DEVICE_ID_SPI_PORT2, &arl), }, + {PCI_DEVICE_DATA(INTEL, THC_NVL_H_DEVICE_ID_SPI_PORT1, &nvl), }, + {PCI_DEVICE_DATA(INTEL, THC_NVL_H_DEVICE_ID_SPI_PORT2, &nvl), }, {} }; MODULE_DEVICE_TABLE(pci, quickspi_pci_tbl); diff --git a/drivers/hid/intel-thc-hid/intel-quickspi/quickspi-dev.h b/drivers/hid/intel-thc-hid/intel-quickspi/quickspi-dev.h index c30e1a42eb09..bf5e18f5a5f4 100644 --- a/drivers/hid/intel-thc-hid/intel-quickspi/quickspi-dev.h +++ b/drivers/hid/intel-thc-hid/intel-quickspi/quickspi-dev.h @@ -23,6 +23,8 @@ #define PCI_DEVICE_ID_INTEL_THC_WCL_DEVICE_ID_SPI_PORT2 0x4D4B #define PCI_DEVICE_ID_INTEL_THC_ARL_DEVICE_ID_SPI_PORT1 0x7749 #define PCI_DEVICE_ID_INTEL_THC_ARL_DEVICE_ID_SPI_PORT2 0x774B +#define PCI_DEVICE_ID_INTEL_THC_NVL_H_DEVICE_ID_SPI_PORT1 0xD349 +#define PCI_DEVICE_ID_INTEL_THC_NVL_H_DEVICE_ID_SPI_PORT2 0xD34B /* HIDSPI special ACPI parameters DSM methods */ #define ACPI_QUICKSPI_REVISION_NUM 2 From b8ead30e2b2c7f32c8d2782e805160b110766592 Mon Sep 17 00:00:00 2001 From: Len Brown Date: Sat, 7 Mar 2026 21:43:11 -0500 Subject: [PATCH 016/248] tools/power turbostat: Fix swidle header vs data display I changed my mind about displaying swidle statistics, which are "added counters". Recently I reverted the column headers to 8-columns, but kept print_decimal_value() padding out to 16-columns for all 64-bit counters. Simplify by keeping print_decimial_value() at %lld -- which will often fit into 8-columns, and live with the fact that it can overflow and shift the other columns, which continue to tab-delimited. This is a better compromise than inserting a bunch of space characters that most users don't like. Fixes: 1a23ba6a1ba2 ("tools/power turbostat: Print wide names only for RAW 64-bit columns") Reported-by: Artem Bityutskiy Signed-off-by: Len Brown --- tools/power/x86/turbostat/turbostat.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index ae827485950d..791b9154f662 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -2852,10 +2852,9 @@ static inline int print_hex_value(int width, int *printed, char *delim, unsigned static inline int print_decimal_value(int width, int *printed, char *delim, unsigned long long value) { - if (width <= 32) - return (sprintf(outp, "%s%d", (*printed++ ? delim : ""), (unsigned int)value)); - else - return (sprintf(outp, "%s%-8lld", (*printed++ ? delim : ""), value)); + UNUSED(width); + + return (sprintf(outp, "%s%lld", (*printed++ ? delim : ""), value)); } static inline int print_float_value(int *printed, char *delim, double value) From 99b38fa34342b227a863948460b7937a97dbce28 Mon Sep 17 00:00:00 2001 From: Zhang Rui Date: Tue, 10 Mar 2026 14:02:01 +0800 Subject: [PATCH 017/248] tools/power turbostat: Eliminate unnecessary data structure allocation Linux core_id's are a per-package namespace, not a per-node namespace. Rename topo.cores_per_node to topo.cores_per_pkg to reflect this. Eliminate topo.nodes_per_pkg from the sizing for core data structures, since it has no role except to unnecessarily bloat the allocation. Validated on multiple Intel platforms (ICX/SPR/SRF/EMR/GNR/CWF) with various CPU online/offline configurations and SMT enabled/disabled scenarios. No functional changes. [lenb: commit message] Signed-off-by: Zhang Rui Signed-off-by: Len Brown --- tools/power/x86/turbostat/turbostat.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index 791b9154f662..14021a6ed717 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -2409,7 +2409,7 @@ struct topo_params { int max_l3_id; int max_node_num; int nodes_per_pkg; - int cores_per_node; + int cores_per_pkg; int threads_per_core; } topo; @@ -9633,9 +9633,9 @@ void topology_probe(bool startup) topo.max_core_id = max_core_id; /* within a package */ topo.max_package_id = max_package_id; - topo.cores_per_node = max_core_id + 1; + topo.cores_per_pkg = max_core_id + 1; if (debug > 1) - fprintf(outf, "max_core_id %d, sizing for %d cores per package\n", max_core_id, topo.cores_per_node); + fprintf(outf, "max_core_id %d, sizing for %d cores per package\n", max_core_id, topo.cores_per_pkg); if (!summary_only) BIC_PRESENT(BIC_Core); @@ -9700,7 +9700,7 @@ void allocate_counters_1(struct counters *counters) void allocate_counters(struct counters *counters) { int i; - int num_cores = topo.cores_per_node * topo.nodes_per_pkg * topo.num_packages; + int num_cores = topo.cores_per_pkg * topo.num_packages; counters->threads = calloc(topo.max_cpu_num + 1, sizeof(struct thread_data)); if (counters->threads == NULL) From a444083286434ec1fd127c5da11a3091e6013008 Mon Sep 17 00:00:00 2001 From: Serhii Pievniev Date: Wed, 25 Feb 2026 18:16:03 -0500 Subject: [PATCH 018/248] tools/power/turbostat: Fix microcode patch level output for AMD/Hygon turbostat always used the same logic to read the microcode patch level, which is correct for Intel but not for AMD/Hygon. While Intel stores the patch level in the upper 32 bits of MSR, AMD stores it in the lower 32 bits, which causes turbostat to report the microcode version as 0x0 on AMD/Hygon. Fix by shifting right by 32 for non-AMD/Hygon, preserving the existing behavior for Intel and unknown vendors. Fixes: 3e4048466c39 ("tools/power turbostat: Add --no-msr option") Signed-off-by: Serhii Pievniev Signed-off-by: Len Brown --- tools/power/x86/turbostat/turbostat.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index 14021a6ed717..b985bce69142 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -9121,10 +9121,13 @@ void process_cpuid() cpuid_has_hv = ecx_flags & (1 << 31); if (!no_msr) { - if (get_msr(sched_getcpu(), MSR_IA32_UCODE_REV, &ucode_patch)) + if (get_msr(sched_getcpu(), MSR_IA32_UCODE_REV, &ucode_patch)) { warnx("get_msr(UCODE)"); - else + } else { ucode_patch_valid = true; + if (!authentic_amd && !hygon_genuine) + ucode_patch >>= 32; + } } /* @@ -9138,7 +9141,7 @@ void process_cpuid() if (!quiet) { fprintf(outf, "CPUID(1): family:model:stepping 0x%x:%x:%x (%d:%d:%d)", family, model, stepping, family, model, stepping); if (ucode_patch_valid) - fprintf(outf, " microcode 0x%x", (unsigned int)((ucode_patch >> 32) & 0xFFFFFFFF)); + fprintf(outf, " microcode 0x%x", (unsigned int)ucode_patch); fputc('\n', outf); fprintf(outf, "CPUID(0x80000000): max_extended_levels: 0x%x\n", max_extended_level); From bccaf98c1077f2216ecae12923e21ffaebd67d95 Mon Sep 17 00:00:00 2001 From: Max Krummenacher Date: Thu, 19 Feb 2026 16:11:49 +0100 Subject: [PATCH 019/248] Revert "ARM: dts: imx: move nand related property under nand@0" This reverts commit 8124b4a4a96b57d6cc3705a9df9623c52baa047b. The change introduced a regression: at least Colibri iMX6ULL and Colibri iMX7 no longer boot with that commit applied, while they boot again after reverting it. Although this has only been verified on these two modules, the issue is expected to affect all device trees using the gpmi-nand driver. [ 0.876938] Creating 5 MTD partitions on "gpmi-nand": [ 0.876974] 0x000000000000-0x000000080000 : "mx7-bcb" [ 0.879860] 0x000000080000-0x000000200000 : "u-boot1" [ 0.884761] 0x000000200000-0x000000380000 : "u-boot2" [ 0.886993] 0x000000380000-0x000000400000 : "u-boot-env" [ 0.894686] 0x000000400000-0x000020000000 : "ubi" [ 0.899054] gpmi-nand 33002000.nand-controller: driver registered. ... [ 0.960443] ubi0: default fastmap pool size: 200 [ 0.960476] ubi0: default fastmap WL pool size: 100 [ 0.960500] ubi0: attaching mtd4 [ 1.636355] ubi0 error: scan_peb: bad image sequence number 1588722158 in PEB 4060, expected 1574791632 ... [ 1.649889] ubi0 error: ubi_attach_mtd_dev: failed to attach mtd4, error -22 [ 1.650029] UBI error: cannot attach mtd4 ... [ 1.670262] Kernel panic - not syncing: VFS: Unable to mount root fs on unknown-block(0,253) Fixes: 8124b4a4a96b ("ARM: dts: imx: move nand related property under nand@0") Signed-off-by: Max Krummenacher Signed-off-by: Frank Li --- arch/arm/boot/dts/nxp/imx/imx6-logicpd-som.dtsi | 6 +----- arch/arm/boot/dts/nxp/imx/imx6qdl-icore.dtsi | 6 +----- arch/arm/boot/dts/nxp/imx/imx6qdl-phytec-pfla02.dtsi | 6 +----- .../boot/dts/nxp/imx/imx6qdl-phytec-phycore-som.dtsi | 6 +----- arch/arm/boot/dts/nxp/imx/imx6qdl-skov-cpu.dtsi | 6 +----- arch/arm/boot/dts/nxp/imx/imx6qdl-tx6.dtsi | 6 +----- arch/arm/boot/dts/nxp/imx/imx6ul-geam.dts | 6 +----- arch/arm/boot/dts/nxp/imx/imx6ul-isiot.dtsi | 6 +----- .../boot/dts/nxp/imx/imx6ul-phytec-phycore-som.dtsi | 6 +----- arch/arm/boot/dts/nxp/imx/imx6ul-tx6ul.dtsi | 6 +----- arch/arm/boot/dts/nxp/imx/imx6ull-colibri.dtsi | 12 ++++-------- .../boot/dts/nxp/imx/imx6ull-engicam-microgea.dtsi | 12 ++++-------- arch/arm/boot/dts/nxp/imx/imx6ull-myir-mys-6ulx.dtsi | 6 +----- arch/arm/boot/dts/nxp/imx/imx6ulz-bsh-smm-m2.dts | 6 +----- arch/arm/boot/dts/nxp/imx/imx7-colibri.dtsi | 8 ++------ 15 files changed, 22 insertions(+), 82 deletions(-) diff --git a/arch/arm/boot/dts/nxp/imx/imx6-logicpd-som.dtsi b/arch/arm/boot/dts/nxp/imx/imx6-logicpd-som.dtsi index f452764fae00..547fb141ec0c 100644 --- a/arch/arm/boot/dts/nxp/imx/imx6-logicpd-som.dtsi +++ b/arch/arm/boot/dts/nxp/imx/imx6-logicpd-som.dtsi @@ -36,12 +36,8 @@ &clks { &gpmi { pinctrl-names = "default"; pinctrl-0 = <&pinctrl_gpmi_nand>; + nand-on-flash-bbt; status = "okay"; - - nand@0 { - reg = <0>; - nand-on-flash-bbt; - }; }; &i2c3 { diff --git a/arch/arm/boot/dts/nxp/imx/imx6qdl-icore.dtsi b/arch/arm/boot/dts/nxp/imx/imx6qdl-icore.dtsi index 58ecdb87c6d4..9975b6ee433d 100644 --- a/arch/arm/boot/dts/nxp/imx/imx6qdl-icore.dtsi +++ b/arch/arm/boot/dts/nxp/imx/imx6qdl-icore.dtsi @@ -172,12 +172,8 @@ eth_phy: ethernet-phy@0 { &gpmi { pinctrl-names = "default"; pinctrl-0 = <&pinctrl_gpmi_nand>; + nand-on-flash-bbt; status = "okay"; - - nand@0 { - reg = <0>; - nand-on-flash-bbt; - }; }; &i2c1 { diff --git a/arch/arm/boot/dts/nxp/imx/imx6qdl-phytec-pfla02.dtsi b/arch/arm/boot/dts/nxp/imx/imx6qdl-phytec-pfla02.dtsi index 6f3becd33a5b..aa9a442852f4 100644 --- a/arch/arm/boot/dts/nxp/imx/imx6qdl-phytec-pfla02.dtsi +++ b/arch/arm/boot/dts/nxp/imx/imx6qdl-phytec-pfla02.dtsi @@ -102,12 +102,8 @@ ethphy: ethernet-phy@0 { &gpmi { pinctrl-names = "default"; pinctrl-0 = <&pinctrl_gpmi_nand>; + nand-on-flash-bbt; status = "okay"; - - nand@0 { - reg = <0>; - nand-on-flash-bbt; - }; }; &i2c1 { diff --git a/arch/arm/boot/dts/nxp/imx/imx6qdl-phytec-phycore-som.dtsi b/arch/arm/boot/dts/nxp/imx/imx6qdl-phytec-phycore-som.dtsi index f2140dd8525f..85e278eb2016 100644 --- a/arch/arm/boot/dts/nxp/imx/imx6qdl-phytec-phycore-som.dtsi +++ b/arch/arm/boot/dts/nxp/imx/imx6qdl-phytec-phycore-som.dtsi @@ -73,12 +73,8 @@ ethphy: ethernet-phy@3 { &gpmi { pinctrl-names = "default"; pinctrl-0 = <&pinctrl_gpmi_nand>; + nand-on-flash-bbt; status = "disabled"; - - nand@0 { - reg = <0>; - nand-on-flash-bbt; - }; }; &i2c3 { diff --git a/arch/arm/boot/dts/nxp/imx/imx6qdl-skov-cpu.dtsi b/arch/arm/boot/dts/nxp/imx/imx6qdl-skov-cpu.dtsi index 131a3428ddb8..c93dbc595ef6 100644 --- a/arch/arm/boot/dts/nxp/imx/imx6qdl-skov-cpu.dtsi +++ b/arch/arm/boot/dts/nxp/imx/imx6qdl-skov-cpu.dtsi @@ -260,14 +260,10 @@ fixed-link { &gpmi { pinctrl-names = "default"; pinctrl-0 = <&pinctrl_gpmi_nand>; + nand-on-flash-bbt; #address-cells = <1>; #size-cells = <0>; status = "okay"; - - nand@0 { - reg = <0>; - nand-on-flash-bbt; - }; }; &i2c3 { diff --git a/arch/arm/boot/dts/nxp/imx/imx6qdl-tx6.dtsi b/arch/arm/boot/dts/nxp/imx/imx6qdl-tx6.dtsi index d29adfef5fdb..57297d6521cf 100644 --- a/arch/arm/boot/dts/nxp/imx/imx6qdl-tx6.dtsi +++ b/arch/arm/boot/dts/nxp/imx/imx6qdl-tx6.dtsi @@ -252,13 +252,9 @@ etnphy: ethernet-phy@0 { &gpmi { pinctrl-names = "default"; pinctrl-0 = <&pinctrl_gpmi_nand>; + nand-on-flash-bbt; fsl,no-blockmark-swap; status = "okay"; - - nand@0 { - reg = <0>; - nand-on-flash-bbt; - }; }; &i2c1 { diff --git a/arch/arm/boot/dts/nxp/imx/imx6ul-geam.dts b/arch/arm/boot/dts/nxp/imx/imx6ul-geam.dts index 40d530c1dc29..2a6bb5ff808a 100644 --- a/arch/arm/boot/dts/nxp/imx/imx6ul-geam.dts +++ b/arch/arm/boot/dts/nxp/imx/imx6ul-geam.dts @@ -133,12 +133,8 @@ ethphy1: ethernet-phy@1 { &gpmi { pinctrl-names = "default"; pinctrl-0 = <&pinctrl_gpmi_nand>; + nand-on-flash-bbt; status = "okay"; - - nand@0 { - reg = <0>; - nand-on-flash-bbt; - }; }; &i2c1 { diff --git a/arch/arm/boot/dts/nxp/imx/imx6ul-isiot.dtsi b/arch/arm/boot/dts/nxp/imx/imx6ul-isiot.dtsi index 776f6f78ee46..e34c8cbe36ae 100644 --- a/arch/arm/boot/dts/nxp/imx/imx6ul-isiot.dtsi +++ b/arch/arm/boot/dts/nxp/imx/imx6ul-isiot.dtsi @@ -101,12 +101,8 @@ ethphy0: ethernet-phy@0 { &gpmi { pinctrl-names = "default"; pinctrl-0 = <&pinctrl_gpmi_nand>; + nand-on-flash-bbt; status = "disabled"; - - nand@0 { - reg = <0>; - nand-on-flash-bbt; - }; }; &i2c1 { diff --git a/arch/arm/boot/dts/nxp/imx/imx6ul-phytec-phycore-som.dtsi b/arch/arm/boot/dts/nxp/imx/imx6ul-phytec-phycore-som.dtsi index 27e4d2aec137..a3ea1b208462 100644 --- a/arch/arm/boot/dts/nxp/imx/imx6ul-phytec-phycore-som.dtsi +++ b/arch/arm/boot/dts/nxp/imx/imx6ul-phytec-phycore-som.dtsi @@ -63,12 +63,8 @@ ethphy1: ethernet-phy@1 { &gpmi { pinctrl-names = "default"; pinctrl-0 = <&pinctrl_gpmi_nand>; + nand-on-flash-bbt; status = "disabled"; - - nand@0 { - reg = <0>; - nand-on-flash-bbt; - }; }; &i2c1 { diff --git a/arch/arm/boot/dts/nxp/imx/imx6ul-tx6ul.dtsi b/arch/arm/boot/dts/nxp/imx/imx6ul-tx6ul.dtsi index dc53f9286ffe..1992dfb53b45 100644 --- a/arch/arm/boot/dts/nxp/imx/imx6ul-tx6ul.dtsi +++ b/arch/arm/boot/dts/nxp/imx/imx6ul-tx6ul.dtsi @@ -296,13 +296,9 @@ &fec2 { &gpmi { pinctrl-names = "default"; pinctrl-0 = <&pinctrl_gpmi_nand>; + nand-on-flash-bbt; fsl,no-blockmark-swap; status = "okay"; - - nand@0 { - reg = <0>; - nand-on-flash-bbt; - }; }; &i2c2 { diff --git a/arch/arm/boot/dts/nxp/imx/imx6ull-colibri.dtsi b/arch/arm/boot/dts/nxp/imx/imx6ull-colibri.dtsi index eaed2cbf0c82..ec3c1e7301f4 100644 --- a/arch/arm/boot/dts/nxp/imx/imx6ull-colibri.dtsi +++ b/arch/arm/boot/dts/nxp/imx/imx6ull-colibri.dtsi @@ -160,15 +160,11 @@ &gpmi { pinctrl-names = "default"; pinctrl-0 = <&pinctrl_gpmi_nand>; fsl,use-minimum-ecc; + nand-on-flash-bbt; + nand-ecc-mode = "hw"; + nand-ecc-strength = <8>; + nand-ecc-step-size = <512>; status = "okay"; - - nand@0 { - reg = <0>; - nand-on-flash-bbt; - nand-ecc-mode = "hw"; - nand-ecc-strength = <8>; - nand-ecc-step-size = <512>; - }; }; /* I2C3_SDA/SCL on SODIMM 194/196 (e.g. RTC on carrier board) */ diff --git a/arch/arm/boot/dts/nxp/imx/imx6ull-engicam-microgea.dtsi b/arch/arm/boot/dts/nxp/imx/imx6ull-engicam-microgea.dtsi index 3dfd43b32055..43518bf07602 100644 --- a/arch/arm/boot/dts/nxp/imx/imx6ull-engicam-microgea.dtsi +++ b/arch/arm/boot/dts/nxp/imx/imx6ull-engicam-microgea.dtsi @@ -43,15 +43,11 @@ ethphy0: ethernet-phy@0 { &gpmi { pinctrl-names = "default"; pinctrl-0 = <&pinctrl_gpmi_nand>; + nand-ecc-mode = "hw"; + nand-ecc-strength = <0>; + nand-ecc-step-size = <0>; + nand-on-flash-bbt; status = "okay"; - - nand@0 { - reg = <0>; - nand-ecc-mode = "hw"; - nand-ecc-strength = <0>; - nand-ecc-step-size = <0>; - nand-on-flash-bbt; - }; }; &iomuxc { diff --git a/arch/arm/boot/dts/nxp/imx/imx6ull-myir-mys-6ulx.dtsi b/arch/arm/boot/dts/nxp/imx/imx6ull-myir-mys-6ulx.dtsi index fc298f57bfff..83b9de17cee2 100644 --- a/arch/arm/boot/dts/nxp/imx/imx6ull-myir-mys-6ulx.dtsi +++ b/arch/arm/boot/dts/nxp/imx/imx6ull-myir-mys-6ulx.dtsi @@ -60,12 +60,8 @@ ethphy0: ethernet-phy@0 { &gpmi { pinctrl-names = "default"; pinctrl-0 = <&pinctrl_gpmi_nand>; + nand-on-flash-bbt; status = "disabled"; - - nand@0 { - reg = <0>; - nand-on-flash-bbt; - }; }; &uart1 { diff --git a/arch/arm/boot/dts/nxp/imx/imx6ulz-bsh-smm-m2.dts b/arch/arm/boot/dts/nxp/imx/imx6ulz-bsh-smm-m2.dts index 8ec18eae98a4..2d9f495660c9 100644 --- a/arch/arm/boot/dts/nxp/imx/imx6ulz-bsh-smm-m2.dts +++ b/arch/arm/boot/dts/nxp/imx/imx6ulz-bsh-smm-m2.dts @@ -25,12 +25,8 @@ usdhc2_pwrseq: usdhc2-pwrseq { &gpmi { pinctrl-names = "default"; pinctrl-0 = <&pinctrl_gpmi_nand>; + nand-on-flash-bbt; status = "okay"; - - nand@0 { - reg = <0>; - nand-on-flash-bbt; - }; }; &snvs_poweroff { diff --git a/arch/arm/boot/dts/nxp/imx/imx7-colibri.dtsi b/arch/arm/boot/dts/nxp/imx/imx7-colibri.dtsi index a41dc4edfc0d..8666dcd7fe97 100644 --- a/arch/arm/boot/dts/nxp/imx/imx7-colibri.dtsi +++ b/arch/arm/boot/dts/nxp/imx/imx7-colibri.dtsi @@ -375,14 +375,10 @@ &gpio7 { /* NAND on such SKUs */ &gpmi { fsl,use-minimum-ecc; + nand-ecc-mode = "hw"; + nand-on-flash-bbt; pinctrl-names = "default"; pinctrl-0 = <&pinctrl_gpmi_nand>; - - nand@0 { - reg = <0>; - nand-ecc-mode = "hw"; - nand-on-flash-bbt; - }; }; /* On-module Power I2C */ From 4cd46ea0eb4504f7f4fea92cb4601c5c9a3e545e Mon Sep 17 00:00:00 2001 From: Sebastian Krzyszkowiak Date: Sat, 21 Feb 2026 19:15:18 +0100 Subject: [PATCH 020/248] Revert "arm64: dts: imx8mq-librem5: Set the DVS voltages lower" This reverts commit c24a9b698fb02cd0723fa8375abab07f94b97b10. It's been found that there's a significant per-unit variance in accepted supply voltages and the current set still makes some units unstable. Revert back to nominal values. Cc: stable@vger.kernel.org Fixes: c24a9b698fb0 ("arm64: dts: imx8mq-librem5: Set the DVS voltages lower") Signed-off-by: Sebastian Krzyszkowiak Signed-off-by: Frank Li --- .../boot/dts/freescale/imx8mq-librem5-r3.dts | 2 +- .../boot/dts/freescale/imx8mq-librem5.dtsi | 22 +++++-------------- 2 files changed, 7 insertions(+), 17 deletions(-) diff --git a/arch/arm64/boot/dts/freescale/imx8mq-librem5-r3.dts b/arch/arm64/boot/dts/freescale/imx8mq-librem5-r3.dts index 077c5cd2586f..4533a84fb0b9 100644 --- a/arch/arm64/boot/dts/freescale/imx8mq-librem5-r3.dts +++ b/arch/arm64/boot/dts/freescale/imx8mq-librem5-r3.dts @@ -7,7 +7,7 @@ &a53_opp_table { opp-1000000000 { - opp-microvolt = <950000>; + opp-microvolt = <1000000>; }; }; diff --git a/arch/arm64/boot/dts/freescale/imx8mq-librem5.dtsi b/arch/arm64/boot/dts/freescale/imx8mq-librem5.dtsi index eee390c27210..7818d84f25a7 100644 --- a/arch/arm64/boot/dts/freescale/imx8mq-librem5.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8mq-librem5.dtsi @@ -880,8 +880,8 @@ buck1_reg: BUCK1 { regulator-max-microvolt = <1300000>; regulator-boot-on; regulator-ramp-delay = <1250>; - rohm,dvs-run-voltage = <880000>; - rohm,dvs-idle-voltage = <820000>; + rohm,dvs-run-voltage = <900000>; + rohm,dvs-idle-voltage = <850000>; rohm,dvs-suspend-voltage = <810000>; regulator-always-on; }; @@ -892,8 +892,8 @@ buck2_reg: BUCK2 { regulator-max-microvolt = <1300000>; regulator-boot-on; regulator-ramp-delay = <1250>; - rohm,dvs-run-voltage = <950000>; - rohm,dvs-idle-voltage = <850000>; + rohm,dvs-run-voltage = <1000000>; + rohm,dvs-idle-voltage = <900000>; regulator-always-on; }; @@ -902,14 +902,14 @@ buck3_reg: BUCK3 { regulator-min-microvolt = <700000>; regulator-max-microvolt = <1300000>; regulator-boot-on; - rohm,dvs-run-voltage = <850000>; + rohm,dvs-run-voltage = <900000>; }; buck4_reg: BUCK4 { regulator-name = "buck4"; regulator-min-microvolt = <700000>; regulator-max-microvolt = <1300000>; - rohm,dvs-run-voltage = <930000>; + rohm,dvs-run-voltage = <1000000>; }; buck5_reg: BUCK5 { @@ -1448,13 +1448,3 @@ &wdog1 { fsl,ext-reset-output; status = "okay"; }; - -&a53_opp_table { - opp-1000000000 { - opp-microvolt = <850000>; - }; - - opp-1500000000 { - opp-microvolt = <950000>; - }; -}; From 511f76bf1dce5acf8907b65a7d1bc8f7e7c0d637 Mon Sep 17 00:00:00 2001 From: Sebastian Krzyszkowiak Date: Sat, 21 Feb 2026 19:15:19 +0100 Subject: [PATCH 021/248] arm64: dts: imx8mq-librem5: Bump BUCK1 suspend voltage up to 0.85V MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The minimal voltage of VDD_SOC sourced from BUCK1 is 0.81V, which is the currently set value. However, BD71837 only guarantees accuracy of ±0.01V, and this still doesn't factor other reasons for actual voltage to slightly drop in, resulting in the possibility of running out of the operational range. Bump the voltage up to 0.85V, which should give enough headroom. Cc: stable@vger.kernel.org Fixes: 8f0216b006e5 ("arm64: dts: Add a device tree for the Librem 5 phone") Signed-off-by: Sebastian Krzyszkowiak Signed-off-by: Frank Li --- arch/arm64/boot/dts/freescale/imx8mq-librem5.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/boot/dts/freescale/imx8mq-librem5.dtsi b/arch/arm64/boot/dts/freescale/imx8mq-librem5.dtsi index 7818d84f25a7..f5d529c5baf3 100644 --- a/arch/arm64/boot/dts/freescale/imx8mq-librem5.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8mq-librem5.dtsi @@ -882,7 +882,7 @@ buck1_reg: BUCK1 { regulator-ramp-delay = <1250>; rohm,dvs-run-voltage = <900000>; rohm,dvs-idle-voltage = <850000>; - rohm,dvs-suspend-voltage = <810000>; + rohm,dvs-suspend-voltage = <850000>; regulator-always-on; }; From ed532d738a82269dfe490436b9b8def2274cfb6e Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Wed, 11 Mar 2026 11:00:31 +0200 Subject: [PATCH 022/248] tools/power turbostat: Consistently use print_float_value() Fix the PMT thread code to use print_float_value(), to be consistent with the PMT core and package code. [lenb: commit message] Signed-off-by: Artem Bityutskiy Signed-off-by: Len Brown --- tools/power/x86/turbostat/turbostat.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index b985bce69142..9744f9caac9a 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -3489,12 +3489,12 @@ int format_counters(PER_THREAD_PARAMS) case PMT_TYPE_XTAL_TIME: value_converted = pct(value_raw / crystal_hz, interval_float); - outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), value_converted); + outp += print_float_value(&printed, delim, value_converted); break; case PMT_TYPE_TCORE_CLOCK: value_converted = pct(value_raw / tcore_clock_freq_hz, interval_float); - outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), value_converted); + outp += print_float_value(&printed, delim, value_converted); } } From 23cb4f5c81766e70e5f32ed0987ee8fb5ab2e00a Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Wed, 11 Mar 2026 11:00:32 +0200 Subject: [PATCH 023/248] tools/power turbostat: Fix incorrect format variable In the perf thread, core, and package counter loops, an incorrect 'mp->format' variable is used instead of 'pp->format'. [lenb: edit commit message] Fixes: 696d15cbd8c2 ("tools/power turbostat: Refactor floating point printout code") Signed-off-by: Artem Bityutskiy Signed-off-by: Len Brown --- tools/power/x86/turbostat/turbostat.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index 9744f9caac9a..4d954533c71d 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -3468,7 +3468,7 @@ int format_counters(PER_THREAD_PARAMS) for (i = 0, pp = sys.perf_tp; pp; ++i, pp = pp->next) { if (pp->format == FORMAT_RAW) outp += print_hex_value(pp->width, &printed, delim, t->perf_counter[i]); - else if (pp->format == FORMAT_DELTA || mp->format == FORMAT_AVERAGE) + else if (pp->format == FORMAT_DELTA || pp->format == FORMAT_AVERAGE) outp += print_decimal_value(pp->width, &printed, delim, t->perf_counter[i]); else if (pp->format == FORMAT_PERCENT) { if (pp->type == COUNTER_USEC) @@ -3538,7 +3538,7 @@ int format_counters(PER_THREAD_PARAMS) for (i = 0, pp = sys.perf_cp; pp; i++, pp = pp->next) { if (pp->format == FORMAT_RAW) outp += print_hex_value(pp->width, &printed, delim, c->perf_counter[i]); - else if (pp->format == FORMAT_DELTA || mp->format == FORMAT_AVERAGE) + else if (pp->format == FORMAT_DELTA || pp->format == FORMAT_AVERAGE) outp += print_decimal_value(pp->width, &printed, delim, c->perf_counter[i]); else if (pp->format == FORMAT_PERCENT) outp += print_float_value(&printed, delim, pct(c->perf_counter[i], tsc)); @@ -3694,7 +3694,7 @@ int format_counters(PER_THREAD_PARAMS) outp += print_hex_value(pp->width, &printed, delim, p->perf_counter[i]); else if (pp->type == COUNTER_K2M) outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), (unsigned int)p->perf_counter[i] / 1000); - else if (pp->format == FORMAT_DELTA || mp->format == FORMAT_AVERAGE) + else if (pp->format == FORMAT_DELTA || pp->format == FORMAT_AVERAGE) outp += print_decimal_value(pp->width, &printed, delim, p->perf_counter[i]); else if (pp->format == FORMAT_PERCENT) outp += print_float_value(&printed, delim, pct(p->perf_counter[i], tsc)); From b6398bc2ef3a78f1be37ba01ae0a5eedaee47803 Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Wed, 11 Mar 2026 11:00:33 +0200 Subject: [PATCH 024/248] tools/power turbostat: Fix --show/--hide for individual cpuidle counters Problem: individual swidle counter names (C1, C1+, C1-, etc.) cannot be selected via --show/--hide due to two bugs in probe_cpuidle_counts(): 1. The function returns immediately when BIC_cpuidle is not enabled, without checking deferred_add_index. 2. The deferred name check runs against name_buf before the trailing newline is stripped, so is_deferred_add("C1\n") never matches "C1". Fix: 1. Relax the early return to pass through when deferred names are queued. 2. Strip the trailing newline from name_buf before performing deferred name checks. 3. Check each suffixed variant (C1+, C1, C1-) individually so that e.g. "--show C1+" enables only the requested metric. In addition, introduce a helper function to avoid repeating the condition (readability cleanup). Fixes: ec4acd3166d8 ("tools/power turbostat: disable "cpuidle" invocation counters, by default") Signed-off-by: Artem Bityutskiy Signed-off-by: Len Brown --- tools/power/x86/turbostat/turbostat.c | 35 ++++++++++++++++----------- 1 file changed, 21 insertions(+), 14 deletions(-) diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index 4d954533c71d..3487548841e1 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -11285,6 +11285,14 @@ void probe_cpuidle_residency(void) } } +static bool cpuidle_counter_wanted(char *name) +{ + if (is_deferred_skip(name)) + return false; + + return DO_BIC(BIC_cpuidle) || is_deferred_add(name); +} + void probe_cpuidle_counts(void) { char path[64]; @@ -11294,7 +11302,7 @@ void probe_cpuidle_counts(void) int min_state = 1024, max_state = 0; char *sp; - if (!DO_BIC(BIC_cpuidle)) + if (!DO_BIC(BIC_cpuidle) && !deferred_add_index) return; for (state = 10; state >= 0; --state) { @@ -11309,12 +11317,6 @@ void probe_cpuidle_counts(void) remove_underbar(name_buf); - if (!DO_BIC(BIC_cpuidle) && !is_deferred_add(name_buf)) - continue; - - if (is_deferred_skip(name_buf)) - continue; - /* truncate "C1-HSW\n" to "C1", or truncate "C1\n" to "C1" */ sp = strchr(name_buf, '-'); if (!sp) @@ -11329,16 +11331,19 @@ void probe_cpuidle_counts(void) * Add 'C1+' for C1, and so on. The 'below' sysfs file always contains 0 for * the last state, so do not add it. */ - *sp = '+'; *(sp + 1) = '\0'; - sprintf(path, "cpuidle/state%d/below", state); - add_counter(0, path, name_buf, 64, SCOPE_CPU, COUNTER_ITEMS, FORMAT_DELTA, SYSFS_PERCPU, 0); + if (cpuidle_counter_wanted(name_buf)) { + sprintf(path, "cpuidle/state%d/below", state); + add_counter(0, path, name_buf, 64, SCOPE_CPU, COUNTER_ITEMS, FORMAT_DELTA, SYSFS_PERCPU, 0); + } } *sp = '\0'; - sprintf(path, "cpuidle/state%d/usage", state); - add_counter(0, path, name_buf, 64, SCOPE_CPU, COUNTER_ITEMS, FORMAT_DELTA, SYSFS_PERCPU, 0); + if (cpuidle_counter_wanted(name_buf)) { + sprintf(path, "cpuidle/state%d/usage", state); + add_counter(0, path, name_buf, 64, SCOPE_CPU, COUNTER_ITEMS, FORMAT_DELTA, SYSFS_PERCPU, 0); + } /* * The 'above' sysfs file always contains 0 for the shallowest state (smallest @@ -11347,8 +11352,10 @@ void probe_cpuidle_counts(void) if (state != min_state) { *sp = '-'; *(sp + 1) = '\0'; - sprintf(path, "cpuidle/state%d/above", state); - add_counter(0, path, name_buf, 64, SCOPE_CPU, COUNTER_ITEMS, FORMAT_DELTA, SYSFS_PERCPU, 0); + if (cpuidle_counter_wanted(name_buf)) { + sprintf(path, "cpuidle/state%d/above", state); + add_counter(0, path, name_buf, 64, SCOPE_CPU, COUNTER_ITEMS, FORMAT_DELTA, SYSFS_PERCPU, 0); + } } } } From 85d98669fa7f1d3041d962515e45ee6e392db6f8 Mon Sep 17 00:00:00 2001 From: Loic Poulain Date: Mon, 2 Mar 2026 15:26:03 +0100 Subject: [PATCH 025/248] arm64: dts: qcom: monaco: Reserve full Gunyah metadata region MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We observe spurious "Synchronous External Abort" exceptions (ESR=0x96000010) and kernel crashes on Monaco-based platforms. These faults are caused by the kernel inadvertently accessing hypervisor-owned memory that is not properly marked as reserved. >From boot log, The Qualcomm hypervisor reports the memory range at 0x91a80000 of size 0x80000 (512 KiB) as hypervisor-owned: qhee_hyp_assign_remove_memory: 0x91a80000/0x80000 -> ret 0 However, the EFI memory map provided by firmware only reserves the subrange 0x91a40000–0x91a87fff (288 KiB). The remaining portion (0x91a88000–0x91afffff) is incorrectly reported as conventional memory (from efi debug): efi: 0x000091a40000-0x000091a87fff [Reserved...] efi: 0x000091a88000-0x0000938fffff [Conventional...] As a result, the allocator may hand out PFNs inside the hypervisor owned region, causing fatal aborts when the kernel accesses those addresses. Add a reserved-memory carveout for the Gunyah hypervisor metadata at 0x91a80000 (512 KiB) and mark it as no-map so Linux does not map or allocate from this area. For the record: Hyp version: gunyah-e78adb36e debug (2025-11-17 05:38:05 UTC) UEFI Ver: 6.0.260122.BOOT.MXF.1.0.c1-00449-KODIAKLA-1 Fixes: 7be190e4bdd2 ("arm64: dts: qcom: add QCS8300 platform") Signed-off-by: Loic Poulain Reviewed-by: Konrad Dybcio Reviewed-by: Dmitry Baryshkov Link: https://lore.kernel.org/r/20260302142603.1113355-1-loic.poulain@oss.qualcomm.com Signed-off-by: Bjorn Andersson --- arch/arm64/boot/dts/qcom/monaco.dtsi | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/arch/arm64/boot/dts/qcom/monaco.dtsi b/arch/arm64/boot/dts/qcom/monaco.dtsi index dbb2273b0ee8..0cb9fd154b68 100644 --- a/arch/arm64/boot/dts/qcom/monaco.dtsi +++ b/arch/arm64/boot/dts/qcom/monaco.dtsi @@ -765,6 +765,11 @@ smem_mem: smem@90900000 { hwlocks = <&tcsr_mutex 3>; }; + gunyah_md_mem: gunyah-md-region@91a80000 { + reg = <0x0 0x91a80000 0x0 0x80000>; + no-map; + }; + lpass_machine_learning_mem: lpass-machine-learning-region@93b00000 { reg = <0x0 0x93b00000 0x0 0xf00000>; no-map; From cdbefe9d4029d4834d404f7ba13a960b38a69e88 Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Wed, 11 Mar 2026 11:00:34 +0200 Subject: [PATCH 026/248] tools/power turbostat: Fix delimiter bug in print functions Commands that add counters, such as 'turbostat --show C1,C1+' display merged columns without a delimiter. This is caused by the bad syntax: '(*printed++ ? delim : "")', shared by print_name()/print_hex_value()/print_decimal_value()/print_float_value() Use '((*printed)++ ? delim : "")' to correctly increment the value at *printed. [lenb: fix code and commit message typo, re-word] Fixes: 56dbb878507b ("tools/power turbostat: Refactor added column header printing") Signed-off-by: Artem Bityutskiy Signed-off-by: Len Brown --- tools/power/x86/turbostat/turbostat.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index 3487548841e1..34e2143cd4b3 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -2837,29 +2837,29 @@ static inline int print_name(int width, int *printed, char *delim, char *name, e UNUSED(type); if (format == FORMAT_RAW && width >= 64) - return (sprintf(outp, "%s%-8s", (*printed++ ? delim : ""), name)); + return (sprintf(outp, "%s%-8s", ((*printed)++ ? delim : ""), name)); else - return (sprintf(outp, "%s%s", (*printed++ ? delim : ""), name)); + return (sprintf(outp, "%s%s", ((*printed)++ ? delim : ""), name)); } static inline int print_hex_value(int width, int *printed, char *delim, unsigned long long value) { if (width <= 32) - return (sprintf(outp, "%s%08x", (*printed++ ? delim : ""), (unsigned int)value)); + return (sprintf(outp, "%s%08x", ((*printed)++ ? delim : ""), (unsigned int)value)); else - return (sprintf(outp, "%s%016llx", (*printed++ ? delim : ""), value)); + return (sprintf(outp, "%s%016llx", ((*printed)++ ? delim : ""), value)); } static inline int print_decimal_value(int width, int *printed, char *delim, unsigned long long value) { UNUSED(width); - return (sprintf(outp, "%s%lld", (*printed++ ? delim : ""), value)); + return (sprintf(outp, "%s%lld", ((*printed)++ ? delim : ""), value)); } static inline int print_float_value(int *printed, char *delim, double value) { - return (sprintf(outp, "%s%0.2f", (*printed++ ? delim : ""), value)); + return (sprintf(outp, "%s%0.2f", ((*printed)++ ? delim : ""), value)); } void print_header(char *delim) From a343fb1e03cfc9f6dc83a5efb2a8d33e9cdaf6b9 Mon Sep 17 00:00:00 2001 From: Konrad Dybcio Date: Tue, 17 Mar 2026 15:14:40 +0100 Subject: [PATCH 027/248] soc: qcom: pmic_glink_altmode: Fix TBT->SAFE->!TBT transition Similar to the case of commit d48708500610 ("soc: qcom: pmic_glink_altmode: Fix SVID=DP && unconnected edge case"), leaving the TBT altmode makes pmic_glink_altmode report a SVID=TBT && mux_ctrl=0 message. Said commit reordered the check such that the SVID is processed before checking for NO_CONN. Rework this to take into account valid values of mux_ctrl first and hopefully solve this for good.. Fixes: d48708500610 ("soc: qcom: pmic_glink_altmode: Fix SVID=DP && unconnected edge case") Signed-off-by: Konrad Dybcio Reviewed-by: Dmitry Baryshkov Link: https://lore.kernel.org/r/20260317-topic-tbt_pg_fixup-v1-1-325b8647bc82@oss.qualcomm.com Signed-off-by: Bjorn Andersson --- drivers/soc/qcom/pmic_glink_altmode.c | 23 ++++++++++++++++++----- 1 file changed, 18 insertions(+), 5 deletions(-) diff --git a/drivers/soc/qcom/pmic_glink_altmode.c b/drivers/soc/qcom/pmic_glink_altmode.c index b496b88842a2..619bad2c27ee 100644 --- a/drivers/soc/qcom/pmic_glink_altmode.c +++ b/drivers/soc/qcom/pmic_glink_altmode.c @@ -62,6 +62,9 @@ struct usbc_notify { u8 orientation; u8 mux_ctrl; #define MUX_CTRL_STATE_NO_CONN 0 +#define MUX_CTRL_STATE_USB3_ONLY 1 +#define MUX_CTRL_STATE_DP4LN 2 +#define MUX_CTRL_STATE_USB3_DP 3 #define MUX_CTRL_STATE_TUNNELING 4 u8 res; @@ -350,9 +353,12 @@ static void pmic_glink_altmode_worker(struct work_struct *work) typec_switch_set(alt_port->typec_switch, alt_port->orientation); - if (alt_port->svid == USB_TYPEC_TBT_SID) { - pmic_glink_altmode_enable_tbt(altmode, alt_port); - } else if (alt_port->svid == USB_TYPEC_DP_SID) { + /* + * MUX_CTRL_STATE_DP4LN/USB3_DP may only be set if SVID=DP, but we need + * to special-case the SVID=DP && mux_ctrl=NO_CONN case to deliver a + * HPD notification + */ + if (alt_port->svid == USB_TYPEC_DP_SID) { if (alt_port->mux_ctrl == MUX_CTRL_STATE_NO_CONN) { pmic_glink_altmode_safe(altmode, alt_port); } else { @@ -369,11 +375,18 @@ static void pmic_glink_altmode_worker(struct work_struct *work) drm_aux_hpd_bridge_notify(&alt_port->bridge->dev, conn_status); } else if (alt_port->mux_ctrl == MUX_CTRL_STATE_TUNNELING) { - pmic_glink_altmode_enable_usb4(altmode, alt_port); + if (alt_port->svid == USB_TYPEC_TBT_SID) + pmic_glink_altmode_enable_tbt(altmode, alt_port); + else + pmic_glink_altmode_enable_usb4(altmode, alt_port); + } else if (alt_port->mux_ctrl == MUX_CTRL_STATE_USB3_ONLY) { + pmic_glink_altmode_enable_usb(altmode, alt_port); } else if (alt_port->mux_ctrl == MUX_CTRL_STATE_NO_CONN) { pmic_glink_altmode_safe(altmode, alt_port); } else { - pmic_glink_altmode_enable_usb(altmode, alt_port); + dev_err(altmode->dev, "Got unknown mux_ctrl: %u on port %u, forcing safe mode\n", + alt_port->mux_ctrl, alt_port->index); + pmic_glink_altmode_safe(altmode, alt_port); } pmic_glink_altmode_request(altmode, ALTMODE_PAN_ACK, alt_port->index); From 340bba73c545bfc7e8fcbc5ee4c02f85088f024d Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Wed, 11 Mar 2026 20:30:25 +0100 Subject: [PATCH 028/248] pinctrl: intel: Improve capability support The register space of a certain capability starts at the offset just after the respective node in the capability list. It means that there are no fixed offsets for them from SoC to SoC generation and they have to be calculated at run-time. Improve capability support by adding the respective calculation algorithm and in the result enable PWM on more platforms that currently may use the wrong register. Signed-off-by: Andy Shevchenko --- drivers/pinctrl/intel/pinctrl-intel.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/drivers/pinctrl/intel/pinctrl-intel.c b/drivers/pinctrl/intel/pinctrl-intel.c index 9d32bb8bc13a..adaa37a42754 100644 --- a/drivers/pinctrl/intel/pinctrl-intel.c +++ b/drivers/pinctrl/intel/pinctrl-intel.c @@ -53,8 +53,6 @@ #define PADOWN_MASK(p) (GENMASK(3, 0) << PADOWN_SHIFT(p)) #define PADOWN_GPP(p) ((p) / 8) -#define PWMC 0x204 - /* Offset from pad_regs */ #define PADCFG0 0x000 #define PADCFG0_RXEVCFG_MASK GENMASK(26, 25) @@ -1549,8 +1547,10 @@ static int intel_pinctrl_pm_init(struct intel_pinctrl *pctrl) } static int intel_pinctrl_probe_pwm(struct intel_pinctrl *pctrl, - struct intel_community *community) + struct intel_community *community, + unsigned short capability_offset) { + void __iomem *base = community->regs + capability_offset + 4; static const struct pwm_lpss_boardinfo info = { .clk_rate = 19200000, .npwm = 1, @@ -1564,7 +1564,7 @@ static int intel_pinctrl_probe_pwm(struct intel_pinctrl *pctrl, if (!IS_REACHABLE(CONFIG_PWM_LPSS)) return 0; - chip = devm_pwm_lpss_probe(pctrl->dev, community->regs + PWMC, &info); + chip = devm_pwm_lpss_probe(pctrl->dev, base, &info); return PTR_ERR_OR_ZERO(chip); } @@ -1595,6 +1595,7 @@ int intel_pinctrl_probe(struct platform_device *pdev, for (i = 0; i < pctrl->ncommunities; i++) { struct intel_community *community = &pctrl->communities[i]; + unsigned short capability_offset[6]; void __iomem *regs; u32 offset; u32 value; @@ -1622,15 +1623,19 @@ int intel_pinctrl_probe(struct platform_device *pdev, switch ((value & CAPLIST_ID_MASK) >> CAPLIST_ID_SHIFT) { case CAPLIST_ID_GPIO_HW_INFO: community->features |= PINCTRL_FEATURE_GPIO_HW_INFO; + capability_offset[CAPLIST_ID_GPIO_HW_INFO] = offset; break; case CAPLIST_ID_PWM: community->features |= PINCTRL_FEATURE_PWM; + capability_offset[CAPLIST_ID_PWM] = offset; break; case CAPLIST_ID_BLINK: community->features |= PINCTRL_FEATURE_BLINK; + capability_offset[CAPLIST_ID_BLINK] = offset; break; case CAPLIST_ID_EXP: community->features |= PINCTRL_FEATURE_EXP; + capability_offset[CAPLIST_ID_EXP] = offset; break; default: break; @@ -1653,7 +1658,7 @@ int intel_pinctrl_probe(struct platform_device *pdev, if (ret) return ret; - ret = intel_pinctrl_probe_pwm(pctrl, community); + ret = intel_pinctrl_probe_pwm(pctrl, community, capability_offset[CAPLIST_ID_PWM]); if (ret) return ret; } From a4337a24d13e9e3b98a113e71d6b80dc5ed5f8c4 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Wed, 11 Mar 2026 18:14:04 +0100 Subject: [PATCH 029/248] pinctrl: intel: Fix the revision for new features (1kOhm PD, HW debouncer) The 1kOhm pull down and hardware debouncer are features of the revision 0.92 of the Chassis specification. Fix that in the code accordingly. Signed-off-by: Andy Shevchenko --- drivers/pinctrl/intel/pinctrl-intel.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/pinctrl/intel/pinctrl-intel.c b/drivers/pinctrl/intel/pinctrl-intel.c index adaa37a42754..a5a264ba6fbb 100644 --- a/drivers/pinctrl/intel/pinctrl-intel.c +++ b/drivers/pinctrl/intel/pinctrl-intel.c @@ -1611,7 +1611,7 @@ int intel_pinctrl_probe(struct platform_device *pdev, value = readl(regs + REVID); if (value == ~0u) return -ENODEV; - if (((value & REVID_MASK) >> REVID_SHIFT) >= 0x94) { + if (((value & REVID_MASK) >> REVID_SHIFT) >= 0x92) { community->features |= PINCTRL_FEATURE_DEBOUNCE; community->features |= PINCTRL_FEATURE_1K_PD; } From 1ca468e78ea97c3365befdd408f71bda4b295134 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Wed, 11 Mar 2026 18:15:58 +0100 Subject: [PATCH 030/248] pinctrl: intel: Enable 3-bit PAD_OWN feature Starting from revision 1.1 of the Chassis specification the PAD_OWN is represented by 3 bits instead of 2 bits in the previous revisions. Update the driver to support this feature. Signed-off-by: Andy Shevchenko --- drivers/pinctrl/intel/pinctrl-intel.c | 21 ++++++++++++++++----- drivers/pinctrl/intel/pinctrl-intel.h | 1 + 2 files changed, 17 insertions(+), 5 deletions(-) diff --git a/drivers/pinctrl/intel/pinctrl-intel.c b/drivers/pinctrl/intel/pinctrl-intel.c index a5a264ba6fbb..97bf5ec78db4 100644 --- a/drivers/pinctrl/intel/pinctrl-intel.c +++ b/drivers/pinctrl/intel/pinctrl-intel.c @@ -203,19 +203,25 @@ static bool intel_pad_owned_by_host(const struct intel_pinctrl *pctrl, unsigned community = intel_get_community(pctrl, pin); if (!community) return false; - if (!community->padown_offset) + + /* If padown_offset is not provided, assume host ownership */ + padown = community->regs + community->padown_offset; + if (padown == community->regs) return true; + /* New HW generations have extended PAD_OWN registers */ + if (community->features & PINCTRL_FEATURE_3BIT_PAD_OWN) + return !(readl(padown + pin_to_padno(community, pin) * 4) & 7); + padgrp = intel_community_get_padgroup(community, pin); if (!padgrp) return false; gpp_offset = padgroup_offset(padgrp, pin); gpp = PADOWN_GPP(gpp_offset); - offset = community->padown_offset + padgrp->padown_num * 4 + gpp * 4; - padown = community->regs + offset; + offset = padgrp->padown_num * 4 + gpp * 4; - return !(readl(padown) & PADOWN_MASK(gpp_offset)); + return !(readl(padown + offset) & PADOWN_MASK(gpp_offset)); } static bool intel_pad_acpi_mode(const struct intel_pinctrl *pctrl, unsigned int pin) @@ -1597,6 +1603,7 @@ int intel_pinctrl_probe(struct platform_device *pdev, struct intel_community *community = &pctrl->communities[i]; unsigned short capability_offset[6]; void __iomem *regs; + u32 revision; u32 offset; u32 value; @@ -1611,10 +1618,14 @@ int intel_pinctrl_probe(struct platform_device *pdev, value = readl(regs + REVID); if (value == ~0u) return -ENODEV; - if (((value & REVID_MASK) >> REVID_SHIFT) >= 0x92) { + + revision = (value & REVID_MASK) >> REVID_SHIFT; + if (revision >= 0x092) { community->features |= PINCTRL_FEATURE_DEBOUNCE; community->features |= PINCTRL_FEATURE_1K_PD; } + if (revision >= 0x110) + community->features |= PINCTRL_FEATURE_3BIT_PAD_OWN; /* Determine community features based on the capabilities */ offset = CAPLIST; diff --git a/drivers/pinctrl/intel/pinctrl-intel.h b/drivers/pinctrl/intel/pinctrl-intel.h index 2f37109d5860..b5476b9de0db 100644 --- a/drivers/pinctrl/intel/pinctrl-intel.h +++ b/drivers/pinctrl/intel/pinctrl-intel.h @@ -150,6 +150,7 @@ struct intel_community { #define PINCTRL_FEATURE_PWM BIT(3) #define PINCTRL_FEATURE_BLINK BIT(4) #define PINCTRL_FEATURE_EXP BIT(5) +#define PINCTRL_FEATURE_3BIT_PAD_OWN BIT(6) #define __INTEL_COMMUNITY(b, s, e, g, n, gs, gn, soc) \ { \ From 27459f86a43792d5c29f267a41dbd387601e772b Mon Sep 17 00:00:00 2001 From: Felix Gu Date: Mon, 9 Mar 2026 20:16:14 +0800 Subject: [PATCH 031/248] soc: microchip: mpfs-control-scb: Fix resource leak on driver unbind Use devm_mfd_add_devices() instead of mfd_add_devices() to ensure child devices are properly removed when the driver unbinds. Fixes: 4aac11c9a6e7 ("soc: microchip: add mfd drivers for two syscon regions on PolarFire SoC") Signed-off-by: Felix Gu Signed-off-by: Conor Dooley --- drivers/soc/microchip/mpfs-control-scb.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/soc/microchip/mpfs-control-scb.c b/drivers/soc/microchip/mpfs-control-scb.c index f0b84b1f49cb..8dda5704a389 100644 --- a/drivers/soc/microchip/mpfs-control-scb.c +++ b/drivers/soc/microchip/mpfs-control-scb.c @@ -14,8 +14,10 @@ static int mpfs_control_scb_probe(struct platform_device *pdev) { struct device *dev = &pdev->dev; - return mfd_add_devices(dev, PLATFORM_DEVID_NONE, mpfs_control_scb_devs, - ARRAY_SIZE(mpfs_control_scb_devs), NULL, 0, NULL); + return devm_mfd_add_devices(dev, PLATFORM_DEVID_NONE, + mpfs_control_scb_devs, + ARRAY_SIZE(mpfs_control_scb_devs), NULL, 0, + NULL); } static const struct of_device_id mpfs_control_scb_of_match[] = { From 3bfc213d4675736567a4e263c51c25144d565949 Mon Sep 17 00:00:00 2001 From: Felix Gu Date: Mon, 9 Mar 2026 20:16:15 +0800 Subject: [PATCH 032/248] soc: microchip: mpfs-mss-top-sysreg: Fix resource leak on driver unbind Use devm_mfd_add_devices() instead of mfd_add_devices() to ensure child devices are properly removed when the driver unbinds. Fixes: 4aac11c9a6e7 ("soc: microchip: add mfd drivers for two syscon regions on PolarFire SoC") Signed-off-by: Felix Gu Signed-off-by: Conor Dooley --- drivers/soc/microchip/mpfs-mss-top-sysreg.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/soc/microchip/mpfs-mss-top-sysreg.c b/drivers/soc/microchip/mpfs-mss-top-sysreg.c index b2244e44ff0f..b0f42b8dd3ed 100644 --- a/drivers/soc/microchip/mpfs-mss-top-sysreg.c +++ b/drivers/soc/microchip/mpfs-mss-top-sysreg.c @@ -16,8 +16,10 @@ static int mpfs_mss_top_sysreg_probe(struct platform_device *pdev) struct device *dev = &pdev->dev; int ret; - ret = mfd_add_devices(dev, PLATFORM_DEVID_NONE, mpfs_mss_top_sysreg_devs, - ARRAY_SIZE(mpfs_mss_top_sysreg_devs) , NULL, 0, NULL); + ret = devm_mfd_add_devices(dev, PLATFORM_DEVID_NONE, + mpfs_mss_top_sysreg_devs, + ARRAY_SIZE(mpfs_mss_top_sysreg_devs), NULL, + 0, NULL); if (ret) return ret; From c961cc86af01246a7ce706bbc29072d314e00880 Mon Sep 17 00:00:00 2001 From: Biju Das Date: Wed, 18 Mar 2026 12:08:52 +0000 Subject: [PATCH 033/248] reset: rzg2l-usbphy-ctrl: Fix malformed MODULE_AUTHOR string Fix a malformed MODULE_AUTHOR macro in the RZ/G2L USBPHY control driver where the author's name and opening angle bracket were missing, leaving only the email address with a stray closing >. Correct it to the standard Name format. Signed-off-by: Biju Das Reviewed-by: Philipp Zabel Signed-off-by: Philipp Zabel --- drivers/reset/reset-rzg2l-usbphy-ctrl.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/reset/reset-rzg2l-usbphy-ctrl.c b/drivers/reset/reset-rzg2l-usbphy-ctrl.c index 05dd9b4a02df..fd75d9601a3b 100644 --- a/drivers/reset/reset-rzg2l-usbphy-ctrl.c +++ b/drivers/reset/reset-rzg2l-usbphy-ctrl.c @@ -350,4 +350,4 @@ module_platform_driver(rzg2l_usbphy_ctrl_driver); MODULE_LICENSE("GPL v2"); MODULE_DESCRIPTION("Renesas RZ/G2L USBPHY Control"); -MODULE_AUTHOR("biju.das.jz@bp.renesas.com>"); +MODULE_AUTHOR("Biju Das "); From c1f2b0f2b5e37b2c27540a175aea2755a3799433 Mon Sep 17 00:00:00 2001 From: Shawn Guo Date: Fri, 27 Feb 2026 15:19:58 +0800 Subject: [PATCH 034/248] arm64: dts: hisilicon: poplar: Correct PCIe reset GPIO polarity The PCIe reset GPIO on Poplar is actually active low. The active high worked before because kernel driver didn't respect the setting from DT. This is changed since commit 1d26a55fbeb9 ("PCI: histb: Switch to using gpiod API"), and thus PCIe on Poplar got brken since then. Fix the problem by correcting the polarity. Fixes: 32fa01761bd9 ("arm64: dts: hi3798cv200: enable PCIe support for poplar board") Cc: stable@vger.kernel.org Signed-off-by: Shawn Guo Signed-off-by: Wei Xu --- arch/arm64/boot/dts/hisilicon/hi3798cv200-poplar.dts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/boot/dts/hisilicon/hi3798cv200-poplar.dts b/arch/arm64/boot/dts/hisilicon/hi3798cv200-poplar.dts index 7d370dac4c85..579d55daa7d0 100644 --- a/arch/arm64/boot/dts/hisilicon/hi3798cv200-poplar.dts +++ b/arch/arm64/boot/dts/hisilicon/hi3798cv200-poplar.dts @@ -179,7 +179,7 @@ &ohci { }; &pcie { - reset-gpios = <&gpio4 4 GPIO_ACTIVE_HIGH>; + reset-gpios = <&gpio4 4 GPIO_ACTIVE_LOW>; vpcie-supply = <®_pcie>; status = "okay"; }; From 1af997cad473d505248df6d9577183bb91f69670 Mon Sep 17 00:00:00 2001 From: Shawn Guo Date: Fri, 27 Feb 2026 15:22:10 +0800 Subject: [PATCH 035/248] arm64: dts: hisilicon: hi3798cv200: Add missing dma-ranges Reboot starts failing on Poplar since commit 8424ecdde7df ("arm64: mm: Set ZONE_DMA size based on devicetree's dma-ranges"), which effectively changes zone_dma_bits from 30 to 32 for arm64 platforms that do not properly define dma-ranges in device tree. It's unclear how Poplar reboot gets broken by this change exactly, but a dma-ranges limiting zone_dma to the first 1 GB fixes the regression. Fixes: 2f20182ed670 ("arm64: dts: hisilicon: add dts files for hi3798cv200-poplar board") Cc: stable@vger.kernel.org Signed-off-by: Shawn Guo Signed-off-by: Wei Xu --- arch/arm64/boot/dts/hisilicon/hi3798cv200.dtsi | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm64/boot/dts/hisilicon/hi3798cv200.dtsi b/arch/arm64/boot/dts/hisilicon/hi3798cv200.dtsi index f6bc001c3832..2f4ad5da5e33 100644 --- a/arch/arm64/boot/dts/hisilicon/hi3798cv200.dtsi +++ b/arch/arm64/boot/dts/hisilicon/hi3798cv200.dtsi @@ -122,6 +122,7 @@ soc: soc@f0000000 { #address-cells = <1>; #size-cells = <1>; ranges = <0x0 0x0 0xf0000000 0x10000000>; + dma-ranges = <0x0 0x0 0x0 0x40000000>; crg: clock-reset-controller@8a22000 { compatible = "hisilicon,hi3798cv200-crg", "syscon", "simple-mfd"; From 907150bbe566e23714a25d7bcb910f236c3c44c0 Mon Sep 17 00:00:00 2001 From: Mihai Sain Date: Mon, 9 Feb 2026 11:07:35 +0200 Subject: [PATCH 036/248] ARM: dts: microchip: sam9x7: fix gpio-lines count for pioB The pioB controller on the SAM9X7 SoC actually supports 27 GPIO lines. The previous value of 26 was incorrect, leading to the last pin being unavailable for use by the GPIO subsystem. Update the #gpio-lines property to reflect the correct hardware specification. Fixes: 41af45af8bc3 ("ARM: dts: at91: sam9x7: add device tree for SoC") Signed-off-by: Mihai Sain Link: https://lore.kernel.org/r/20260209090735.2016-1-mihai.sain@microchip.com Signed-off-by: Claudiu Beznea --- arch/arm/boot/dts/microchip/sam9x7.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/boot/dts/microchip/sam9x7.dtsi b/arch/arm/boot/dts/microchip/sam9x7.dtsi index 46dacbbd201d..d242d7a934d0 100644 --- a/arch/arm/boot/dts/microchip/sam9x7.dtsi +++ b/arch/arm/boot/dts/microchip/sam9x7.dtsi @@ -1226,7 +1226,7 @@ pioB: gpio@fffff600 { interrupt-controller; #gpio-cells = <2>; gpio-controller; - #gpio-lines = <26>; + #gpio-lines = <27>; clocks = <&pmc PMC_TYPE_PERIPHERAL 3>; }; From fbffb8c7c7bb4d38e9f65e0bee446685011de5d8 Mon Sep 17 00:00:00 2001 From: Guangshuo Li Date: Sat, 21 Mar 2026 15:42:40 +0800 Subject: [PATCH 037/248] reset: gpio: fix double free in reset_add_gpio_aux_device() error path When __auxiliary_device_add() fails, reset_add_gpio_aux_device() calls auxiliary_device_uninit(adev). The device release callback reset_gpio_aux_device_release() frees adev, but the current error path then calls kfree(adev) again, causing a double free. Keep kfree(adev) for the auxiliary_device_init() failure path, but avoid freeing adev after auxiliary_device_uninit(). Fixes: 5fc4e4cf7a22 ("reset: gpio: use software nodes to setup the GPIO lookup") Cc: stable@vger.kernel.org Signed-off-by: Guangshuo Li Reviewed-by: Bartosz Golaszewski Signed-off-by: Philipp Zabel --- drivers/reset/core.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/reset/core.c b/drivers/reset/core.c index fceec45c8afc..352c2360603b 100644 --- a/drivers/reset/core.c +++ b/drivers/reset/core.c @@ -856,7 +856,6 @@ static int reset_add_gpio_aux_device(struct device *parent, ret = __auxiliary_device_add(adev, "reset"); if (ret) { auxiliary_device_uninit(adev); - kfree(adev); return ret; } From a0e0c2f8c5f32b675f58e25a9338283cedb5ad2b Mon Sep 17 00:00:00 2001 From: Yixun Lan Date: Fri, 20 Mar 2026 11:06:17 +0000 Subject: [PATCH 038/248] reset: spacemit: k3: Decouple composite reset lines Instead of grouping several different reset lines into one composite reset, decouple them to individual ones which make it more aligned with underlying hardware. And for DWC USB driver, it will match well with the number of the reset property in the DT bindings. The DWC3 USB host controller in K3 SoC has three reset lines - AHB, VCC, PHY. The PCIe controller also has three reset lines - DBI, Slave, Master. Also three reset lines each for UCIE and RCPU block. As an agreement with maintainer, the reset IDs has been rearranged as contiguous number but keep most part unchanged to avoid break patches which already sent to mailing list. The changes of DT binding header file and reset driver are merged together as one single commit to avoid git-bisect breakage. Fixes: 938ce3b16582 ("reset: spacemit: Add SpacemiT K3 reset driver") Fixes: 216e0a5e98e5 ("dt-bindings: soc: spacemit: Add K3 reset support and IDs") Signed-off-by: Yixun Lan Reviewed-by: Philipp Zabel Acked-by: Conor Dooley Signed-off-by: Philipp Zabel --- drivers/reset/spacemit/reset-spacemit-k3.c | 60 +++++++++++-------- .../dt-bindings/reset/spacemit,k3-resets.h | 48 +++++++++++---- 2 files changed, 72 insertions(+), 36 deletions(-) diff --git a/drivers/reset/spacemit/reset-spacemit-k3.c b/drivers/reset/spacemit/reset-spacemit-k3.c index e9e32e4c1ba5..9841f5e057b2 100644 --- a/drivers/reset/spacemit/reset-spacemit-k3.c +++ b/drivers/reset/spacemit/reset-spacemit-k3.c @@ -112,16 +112,21 @@ static const struct ccu_reset_data k3_apmu_resets[] = { [RESET_APMU_SDH0] = RESET_DATA(APMU_SDH0_CLK_RES_CTRL, 0, BIT(1)), [RESET_APMU_SDH1] = RESET_DATA(APMU_SDH1_CLK_RES_CTRL, 0, BIT(1)), [RESET_APMU_SDH2] = RESET_DATA(APMU_SDH2_CLK_RES_CTRL, 0, BIT(1)), - [RESET_APMU_USB2] = RESET_DATA(APMU_USB_CLK_RES_CTRL, 0, - BIT(1)|BIT(2)|BIT(3)), - [RESET_APMU_USB3_PORTA] = RESET_DATA(APMU_USB_CLK_RES_CTRL, 0, - BIT(5)|BIT(6)|BIT(7)), - [RESET_APMU_USB3_PORTB] = RESET_DATA(APMU_USB_CLK_RES_CTRL, 0, - BIT(9)|BIT(10)|BIT(11)), - [RESET_APMU_USB3_PORTC] = RESET_DATA(APMU_USB_CLK_RES_CTRL, 0, - BIT(13)|BIT(14)|BIT(15)), - [RESET_APMU_USB3_PORTD] = RESET_DATA(APMU_USB_CLK_RES_CTRL, 0, - BIT(17)|BIT(18)|BIT(19)), + [RESET_APMU_USB2_AHB] = RESET_DATA(APMU_USB_CLK_RES_CTRL, 0, BIT(1)), + [RESET_APMU_USB2_VCC] = RESET_DATA(APMU_USB_CLK_RES_CTRL, 0, BIT(2)), + [RESET_APMU_USB2_PHY] = RESET_DATA(APMU_USB_CLK_RES_CTRL, 0, BIT(3)), + [RESET_APMU_USB3_A_AHB] = RESET_DATA(APMU_USB_CLK_RES_CTRL, 0, BIT(5)), + [RESET_APMU_USB3_A_VCC] = RESET_DATA(APMU_USB_CLK_RES_CTRL, 0, BIT(6)), + [RESET_APMU_USB3_A_PHY] = RESET_DATA(APMU_USB_CLK_RES_CTRL, 0, BIT(7)), + [RESET_APMU_USB3_B_AHB] = RESET_DATA(APMU_USB_CLK_RES_CTRL, 0, BIT(9)), + [RESET_APMU_USB3_B_VCC] = RESET_DATA(APMU_USB_CLK_RES_CTRL, 0, BIT(10)), + [RESET_APMU_USB3_B_PHY] = RESET_DATA(APMU_USB_CLK_RES_CTRL, 0, BIT(11)), + [RESET_APMU_USB3_C_AHB] = RESET_DATA(APMU_USB_CLK_RES_CTRL, 0, BIT(13)), + [RESET_APMU_USB3_C_VCC] = RESET_DATA(APMU_USB_CLK_RES_CTRL, 0, BIT(14)), + [RESET_APMU_USB3_C_PHY] = RESET_DATA(APMU_USB_CLK_RES_CTRL, 0, BIT(15)), + [RESET_APMU_USB3_D_AHB] = RESET_DATA(APMU_USB_CLK_RES_CTRL, 0, BIT(17)), + [RESET_APMU_USB3_D_VCC] = RESET_DATA(APMU_USB_CLK_RES_CTRL, 0, BIT(18)), + [RESET_APMU_USB3_D_PHY] = RESET_DATA(APMU_USB_CLK_RES_CTRL, 0, BIT(19)), [RESET_APMU_QSPI] = RESET_DATA(APMU_QSPI_CLK_RES_CTRL, 0, BIT(1)), [RESET_APMU_QSPI_BUS] = RESET_DATA(APMU_QSPI_CLK_RES_CTRL, 0, BIT(0)), [RESET_APMU_DMA] = RESET_DATA(APMU_DMA_CLK_RES_CTRL, 0, BIT(0)), @@ -151,10 +156,12 @@ static const struct ccu_reset_data k3_apmu_resets[] = { [RESET_APMU_CPU7_SW] = RESET_DATA(APMU_PMU_CC2_AP, BIT(26), 0), [RESET_APMU_C1_MPSUB_SW] = RESET_DATA(APMU_PMU_CC2_AP, BIT(28), 0), [RESET_APMU_MPSUB_DBG] = RESET_DATA(APMU_PMU_CC2_AP, BIT(29), 0), - [RESET_APMU_UCIE] = RESET_DATA(APMU_UCIE_CTRL, - BIT(1) | BIT(2) | BIT(3), 0), - [RESET_APMU_RCPU] = RESET_DATA(APMU_RCPU_CLK_RES_CTRL, 0, - BIT(3) | BIT(2) | BIT(0)), + [RESET_APMU_UCIE_IP] = RESET_DATA(APMU_UCIE_CTRL, BIT(1), 0), + [RESET_APMU_UCIE_HOT] = RESET_DATA(APMU_UCIE_CTRL, BIT(2), 0), + [RESET_APMU_UCIE_MON] = RESET_DATA(APMU_UCIE_CTRL, BIT(3), 0), + [RESET_APMU_RCPU_AUDIO_SYS] = RESET_DATA(APMU_RCPU_CLK_RES_CTRL, 0, BIT(0)), + [RESET_APMU_RCPU_MCU_CORE] = RESET_DATA(APMU_RCPU_CLK_RES_CTRL, 0, BIT(2)), + [RESET_APMU_RCPU_AUDIO_APMU] = RESET_DATA(APMU_RCPU_CLK_RES_CTRL, 0, BIT(3)), [RESET_APMU_DSI4LN2_ESCCLK] = RESET_DATA(APMU_LCD_CLK_RES_CTRL3, 0, BIT(3)), [RESET_APMU_DSI4LN2_LCD_SW] = RESET_DATA(APMU_LCD_CLK_RES_CTRL3, 0, BIT(4)), [RESET_APMU_DSI4LN2_LCD_MCLK] = RESET_DATA(APMU_LCD_CLK_RES_CTRL4, 0, BIT(9)), @@ -164,16 +171,21 @@ static const struct ccu_reset_data k3_apmu_resets[] = { [RESET_APMU_UFS_ACLK] = RESET_DATA(APMU_UFS_CLK_RES_CTRL, 0, BIT(0)), [RESET_APMU_EDP0] = RESET_DATA(APMU_LCD_EDP_CTRL, 0, BIT(0)), [RESET_APMU_EDP1] = RESET_DATA(APMU_LCD_EDP_CTRL, 0, BIT(16)), - [RESET_APMU_PCIE_PORTA] = RESET_DATA(APMU_PCIE_CLK_RES_CTRL_A, 0, - BIT(5) | BIT(4) | BIT(3)), - [RESET_APMU_PCIE_PORTB] = RESET_DATA(APMU_PCIE_CLK_RES_CTRL_B, 0, - BIT(5) | BIT(4) | BIT(3)), - [RESET_APMU_PCIE_PORTC] = RESET_DATA(APMU_PCIE_CLK_RES_CTRL_C, 0, - BIT(5) | BIT(4) | BIT(3)), - [RESET_APMU_PCIE_PORTD] = RESET_DATA(APMU_PCIE_CLK_RES_CTRL_D, 0, - BIT(5) | BIT(4) | BIT(3)), - [RESET_APMU_PCIE_PORTE] = RESET_DATA(APMU_PCIE_CLK_RES_CTRL_E, 0, - BIT(5) | BIT(4) | BIT(3)), + [RESET_APMU_PCIE_A_DBI] = RESET_DATA(APMU_PCIE_CLK_RES_CTRL_A, 0, BIT(3)), + [RESET_APMU_PCIE_A_SLAVE] = RESET_DATA(APMU_PCIE_CLK_RES_CTRL_A, 0, BIT(4)), + [RESET_APMU_PCIE_A_MASTER] = RESET_DATA(APMU_PCIE_CLK_RES_CTRL_A, 0, BIT(5)), + [RESET_APMU_PCIE_B_DBI] = RESET_DATA(APMU_PCIE_CLK_RES_CTRL_B, 0, BIT(3)), + [RESET_APMU_PCIE_B_SLAVE] = RESET_DATA(APMU_PCIE_CLK_RES_CTRL_B, 0, BIT(4)), + [RESET_APMU_PCIE_B_MASTER] = RESET_DATA(APMU_PCIE_CLK_RES_CTRL_B, 0, BIT(5)), + [RESET_APMU_PCIE_C_DBI] = RESET_DATA(APMU_PCIE_CLK_RES_CTRL_C, 0, BIT(3)), + [RESET_APMU_PCIE_C_SLAVE] = RESET_DATA(APMU_PCIE_CLK_RES_CTRL_C, 0, BIT(4)), + [RESET_APMU_PCIE_C_MASTER] = RESET_DATA(APMU_PCIE_CLK_RES_CTRL_C, 0, BIT(5)), + [RESET_APMU_PCIE_D_DBI] = RESET_DATA(APMU_PCIE_CLK_RES_CTRL_D, 0, BIT(3)), + [RESET_APMU_PCIE_D_SLAVE] = RESET_DATA(APMU_PCIE_CLK_RES_CTRL_D, 0, BIT(4)), + [RESET_APMU_PCIE_D_MASTER] = RESET_DATA(APMU_PCIE_CLK_RES_CTRL_D, 0, BIT(5)), + [RESET_APMU_PCIE_E_DBI] = RESET_DATA(APMU_PCIE_CLK_RES_CTRL_E, 0, BIT(3)), + [RESET_APMU_PCIE_E_SLAVE] = RESET_DATA(APMU_PCIE_CLK_RES_CTRL_E, 0, BIT(4)), + [RESET_APMU_PCIE_E_MASTER] = RESET_DATA(APMU_PCIE_CLK_RES_CTRL_E, 0, BIT(5)), [RESET_APMU_EMAC0] = RESET_DATA(APMU_EMAC0_CLK_RES_CTRL, 0, BIT(1)), [RESET_APMU_EMAC1] = RESET_DATA(APMU_EMAC1_CLK_RES_CTRL, 0, BIT(1)), [RESET_APMU_EMAC2] = RESET_DATA(APMU_EMAC2_CLK_RES_CTRL, 0, BIT(1)), diff --git a/include/dt-bindings/reset/spacemit,k3-resets.h b/include/dt-bindings/reset/spacemit,k3-resets.h index 79ac1c22b7b5..dc1ef009ba79 100644 --- a/include/dt-bindings/reset/spacemit,k3-resets.h +++ b/include/dt-bindings/reset/spacemit,k3-resets.h @@ -97,11 +97,11 @@ #define RESET_APMU_SDH0 13 #define RESET_APMU_SDH1 14 #define RESET_APMU_SDH2 15 -#define RESET_APMU_USB2 16 -#define RESET_APMU_USB3_PORTA 17 -#define RESET_APMU_USB3_PORTB 18 -#define RESET_APMU_USB3_PORTC 19 -#define RESET_APMU_USB3_PORTD 20 +#define RESET_APMU_USB2_AHB 16 +#define RESET_APMU_USB2_VCC 17 +#define RESET_APMU_USB2_PHY 18 +#define RESET_APMU_USB3_A_AHB 19 +#define RESET_APMU_USB3_A_VCC 20 #define RESET_APMU_QSPI 21 #define RESET_APMU_QSPI_BUS 22 #define RESET_APMU_DMA 23 @@ -132,8 +132,8 @@ #define RESET_APMU_CPU7_SW 48 #define RESET_APMU_C1_MPSUB_SW 49 #define RESET_APMU_MPSUB_DBG 50 -#define RESET_APMU_UCIE 51 -#define RESET_APMU_RCPU 52 +#define RESET_APMU_USB3_A_PHY 51 /* USB3 A */ +#define RESET_APMU_USB3_B_AHB 52 #define RESET_APMU_DSI4LN2_ESCCLK 53 #define RESET_APMU_DSI4LN2_LCD_SW 54 #define RESET_APMU_DSI4LN2_LCD_MCLK 55 @@ -143,16 +143,40 @@ #define RESET_APMU_UFS_ACLK 59 #define RESET_APMU_EDP0 60 #define RESET_APMU_EDP1 61 -#define RESET_APMU_PCIE_PORTA 62 -#define RESET_APMU_PCIE_PORTB 63 -#define RESET_APMU_PCIE_PORTC 64 -#define RESET_APMU_PCIE_PORTD 65 -#define RESET_APMU_PCIE_PORTE 66 +#define RESET_APMU_USB3_B_VCC 62 /* USB3 B */ +#define RESET_APMU_USB3_B_PHY 63 +#define RESET_APMU_USB3_C_AHB 64 +#define RESET_APMU_USB3_C_VCC 65 +#define RESET_APMU_USB3_C_PHY 66 #define RESET_APMU_EMAC0 67 #define RESET_APMU_EMAC1 68 #define RESET_APMU_EMAC2 69 #define RESET_APMU_ESPI_MCLK 70 #define RESET_APMU_ESPI_SCLK 71 +#define RESET_APMU_USB3_D_AHB 72 /* USB3 D */ +#define RESET_APMU_USB3_D_VCC 73 +#define RESET_APMU_USB3_D_PHY 74 +#define RESET_APMU_UCIE_IP 75 +#define RESET_APMU_UCIE_HOT 76 +#define RESET_APMU_UCIE_MON 77 +#define RESET_APMU_RCPU_AUDIO_SYS 78 +#define RESET_APMU_RCPU_MCU_CORE 79 +#define RESET_APMU_RCPU_AUDIO_APMU 80 +#define RESET_APMU_PCIE_A_DBI 81 +#define RESET_APMU_PCIE_A_SLAVE 82 +#define RESET_APMU_PCIE_A_MASTER 83 +#define RESET_APMU_PCIE_B_DBI 84 +#define RESET_APMU_PCIE_B_SLAVE 85 +#define RESET_APMU_PCIE_B_MASTER 86 +#define RESET_APMU_PCIE_C_DBI 87 +#define RESET_APMU_PCIE_C_SLAVE 88 +#define RESET_APMU_PCIE_C_MASTER 89 +#define RESET_APMU_PCIE_D_DBI 90 +#define RESET_APMU_PCIE_D_SLAVE 91 +#define RESET_APMU_PCIE_D_MASTER 92 +#define RESET_APMU_PCIE_E_DBI 93 +#define RESET_APMU_PCIE_E_SLAVE 94 +#define RESET_APMU_PCIE_E_MASTER 95 /* DCIU resets*/ #define RESET_DCIU_HDMA 0 From 6896ca5a9d05275fbeb38640c9bbdb95698de188 Mon Sep 17 00:00:00 2001 From: Chen-Yu Tsai Date: Tue, 24 Mar 2026 01:19:26 +0800 Subject: [PATCH 039/248] arm64: dts: allwinner: sun55i: Fix r-spi DMA r-spi has DRQs for both the main and MCU DMA controllers on the A523 SoC family, however it seems it that it is mainly routed to the MCU DMA controller, with no obvious way to change it. Change the DMA channels of r-spi to the MCU so that it works properly. Fixes: 1bec3bd1f839 ("arm64: dts: allwinner: sun55i: Add SPI controllers") Acked-by: Jernej Skrabec Link: https://patch.msgid.link/20260323171927.1256507-1-wens@kernel.org Signed-off-by: Chen-Yu Tsai --- arch/arm64/boot/dts/allwinner/sun55i-a523.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/boot/dts/allwinner/sun55i-a523.dtsi b/arch/arm64/boot/dts/allwinner/sun55i-a523.dtsi index 9335977751e2..a4230205c02b 100644 --- a/arch/arm64/boot/dts/allwinner/sun55i-a523.dtsi +++ b/arch/arm64/boot/dts/allwinner/sun55i-a523.dtsi @@ -901,7 +901,7 @@ r_spi0: spi@7092000 { interrupts = ; clocks = <&r_ccu CLK_BUS_R_SPI>, <&r_ccu CLK_R_SPI>; clock-names = "ahb", "mod"; - dmas = <&dma 53>, <&dma 53>; + dmas = <&mcu_dma 13>, <&mcu_dma 13>; dma-names = "rx", "tx"; resets = <&r_ccu RST_BUS_R_SPI>; status = "disabled"; From 966a08c293cb9290d3fe932961404e87b3f81327 Mon Sep 17 00:00:00 2001 From: Sumit Garg Date: Thu, 22 Jan 2026 17:40:40 +0530 Subject: [PATCH 040/248] dt-bindings: display: msm: qcm2290-mdss: Fix iommus property Fix IOMMU DT propety for display via dropping SMMU stream IDs which relates to secure context bank. Assigning Linux kernel (HLOS) VMID to secure context bank stream IDs is incorrect. The maximum value for iommus property is updated accordingly. These DT bindings changes should be backwards compatible. Signed-off-by: Sumit Garg Reviewed-by: Rob Herring (Arm) Link: https://lore.kernel.org/r/20260122121042.579270-2-sumit.garg@kernel.org Signed-off-by: Bjorn Andersson --- .../devicetree/bindings/display/msm/qcom,qcm2290-mdss.yaml | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/Documentation/devicetree/bindings/display/msm/qcom,qcm2290-mdss.yaml b/Documentation/devicetree/bindings/display/msm/qcom,qcm2290-mdss.yaml index f0cdb5422688..2772cdec7e42 100644 --- a/Documentation/devicetree/bindings/display/msm/qcom,qcm2290-mdss.yaml +++ b/Documentation/devicetree/bindings/display/msm/qcom,qcm2290-mdss.yaml @@ -33,7 +33,7 @@ properties: - const: core iommus: - maxItems: 2 + maxItems: 1 interconnects: items: @@ -107,9 +107,7 @@ examples: interconnect-names = "mdp0-mem", "cpu-cfg"; - iommus = <&apps_smmu 0x420 0x2>, - <&apps_smmu 0x421 0x0>; - ranges; + iommus = <&apps_smmu 0x420 0x2>; display-controller@5e01000 { compatible = "qcom,qcm2290-dpu"; From 7e59cd4ad586afd87f67491cf91fa1141292cf57 Mon Sep 17 00:00:00 2001 From: Sumit Garg Date: Thu, 22 Jan 2026 17:40:41 +0530 Subject: [PATCH 041/248] dt-bindings: media: venus: Fix iommus property Fix IOMMU DT propety for venus via dropping SMMU stream IDs which relates to secure context bank. Assigning Linux kernel (HLOS) VMID to secure context bank stream IDs is incorrect. The maximum value for iommus property is updated accordingly. These DT bindings changes should be backwards compatible. Signed-off-by: Sumit Garg Reviewed-by: Rob Herring (Arm) Link: https://lore.kernel.org/r/20260122121042.579270-3-sumit.garg@kernel.org Signed-off-by: Bjorn Andersson --- .../devicetree/bindings/media/qcom,qcm2290-venus.yaml | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/Documentation/devicetree/bindings/media/qcom,qcm2290-venus.yaml b/Documentation/devicetree/bindings/media/qcom,qcm2290-venus.yaml index 3f3ee82fc878..7e6dc410c2d2 100644 --- a/Documentation/devicetree/bindings/media/qcom,qcm2290-venus.yaml +++ b/Documentation/devicetree/bindings/media/qcom,qcm2290-venus.yaml @@ -42,7 +42,7 @@ properties: - const: vcodec0_bus iommus: - maxItems: 5 + maxItems: 2 interconnects: maxItems: 2 @@ -102,10 +102,7 @@ examples: memory-region = <&pil_video_mem>; iommus = <&apps_smmu 0x860 0x0>, - <&apps_smmu 0x880 0x0>, - <&apps_smmu 0x861 0x04>, - <&apps_smmu 0x863 0x0>, - <&apps_smmu 0x804 0xe0>; + <&apps_smmu 0x880 0x0>; interconnects = <&mmnrt_virt MASTER_VIDEO_P0 RPM_ALWAYS_TAG &bimc SLAVE_EBI1 RPM_ALWAYS_TAG>, From 2c409e03fc04b5cded81b7add9ce509706c922e3 Mon Sep 17 00:00:00 2001 From: Sumit Garg Date: Thu, 22 Jan 2026 17:40:42 +0530 Subject: [PATCH 042/248] arm64: dts: qcom: agatti: Fix IOMMU DT properties Fix IOMMU DT propeties for GPU, display and video peripherals via dropping SMMU stream IDs which relates to secure context bank. This problem only surfaced when the Gunyah based firmware stack is ported on Agatti replacing the legacy QHEE based firmware stack. Assigning Linux kernel (HLOS) VMID to secure context bank stream IDs is treated as a fault by Gunyah hypervisor which were previously ignored by QHEE hypervisor. The DT changes should be backwards compatible with legacy QHEE based firmware stack too. Suggested-by: Prakash Gupta Reviewed-by: Konrad Dybcio Reviewed-by: Akhil P Oommen Reviewed-by: Dmitry Baryshkov Signed-off-by: Sumit Garg Link: https://lore.kernel.org/r/20260122121042.579270-4-sumit.garg@kernel.org Signed-off-by: Bjorn Andersson --- arch/arm64/boot/dts/qcom/agatti.dtsi | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/arch/arm64/boot/dts/qcom/agatti.dtsi b/arch/arm64/boot/dts/qcom/agatti.dtsi index 76b93b7bd50f..893cb0689013 100644 --- a/arch/arm64/boot/dts/qcom/agatti.dtsi +++ b/arch/arm64/boot/dts/qcom/agatti.dtsi @@ -1669,8 +1669,7 @@ gpu: gpu@5900000 { &bimc SLAVE_EBI1 RPM_ALWAYS_TAG>; interconnect-names = "gfx-mem"; - iommus = <&adreno_smmu 0 1>, - <&adreno_smmu 2 0>; + iommus = <&adreno_smmu 0 1>; operating-points-v2 = <&gpu_opp_table>; power-domains = <&rpmpd QCM2290_VDDCX>; qcom,gmu = <&gmu_wrapper>; @@ -1951,8 +1950,7 @@ mdss: display-subsystem@5e00000 { power-domains = <&dispcc MDSS_GDSC>; - iommus = <&apps_smmu 0x420 0x2>, - <&apps_smmu 0x421 0x0>; + iommus = <&apps_smmu 0x420 0x2>; interconnects = <&mmrt_virt MASTER_MDP0 RPM_ALWAYS_TAG &bimc SLAVE_EBI1 RPM_ALWAYS_TAG>, <&bimc MASTER_APPSS_PROC RPM_ALWAYS_TAG @@ -2436,10 +2434,7 @@ venus: video-codec@5a00000 { memory-region = <&pil_video_mem>; iommus = <&apps_smmu 0x860 0x0>, - <&apps_smmu 0x880 0x0>, - <&apps_smmu 0x861 0x04>, - <&apps_smmu 0x863 0x0>, - <&apps_smmu 0x804 0xe0>; + <&apps_smmu 0x880 0x0>; interconnects = <&mmnrt_virt MASTER_VIDEO_P0 RPM_ALWAYS_TAG &bimc SLAVE_EBI1 RPM_ALWAYS_TAG>, From ed8444006df9863ffa682e315352c44a49d9f4cb Mon Sep 17 00:00:00 2001 From: Marek Vasut Date: Tue, 24 Mar 2026 15:33:28 +0100 Subject: [PATCH 043/248] arm64: dts: renesas: sparrow-hawk: Reserve first 128 MiB of DRAM Mark the first 128 MiB of DRAM as reserved. The first 128 MiB of DRAM may optionally be used by TFA and other firmware for its own purposes, and in such case, Linux must not use this memory. On this platform, U-Boot runs in EL3 and starts TFA BL31 and Linux from a single combined fitImage. U-Boot has full access to all memory in the 0x40000000..0xbfffffff range, as well memory in the memory banks in the 64-bit address ranges, and therefore U-Boot patches this full complete view of platform memory layout into the DT that is passed to the next stage. The next stage is TFA BL31 and then the Linux kernel. The TFA BL31 does not modify the DT passed from U-Boot to TFA BL31 and then to Linux with any new reserved-memory {} node to reserve memory areas used by the TFA BL31 to prevent the next stage from using those areas, which lets Linux to use all of the available DRAM as described in the DT that was passed in by U-Boot, including the areas that are newly utilized by TFA BL31. In case of high DRAM utilization, for example in case of four instances of "memtester 3900M" running in parallel, unless the memory used by TFA BL31 is properly reserved, Linux may use and corrupt the memory used by TFA BL31, which would often lead to system becoming unresponsive. Until TFA BL31 can properly fill its own reserved-memory node into the DT, and to assure older versions of TFA BL31 do not cause problems, add explicitly reserved-memory {} node which prevents Linux from using the first 128 MiB of DRAM. Note that TFA BL31 can be adjusted to use different memory areas, this newly added reserved-memory {} node follows longer-term practice on the R-Car SoCs where the first 128 MiB of DRAM is reserved for firmware use. In case user does modify TFA BL31 to use different memory ranges, they must either use a future version of TFA BL31 which properly patches a reserved-memory {} node into the DT, or they must adjust the address ranges of this reserved-memory {} node accordingly. Fixes: a719915e76f2 ("arm64: dts: renesas: r8a779g3: Add Retronix R-Car V4H Sparrow Hawk board support") Cc: stable@vger.kernel.org Signed-off-by: Marek Vasut Reviewed-by: Geert Uytterhoeven Link: https://patch.msgid.link/20260324143342.17872-1-marek.vasut+renesas@mailbox.org Signed-off-by: Geert Uytterhoeven --- arch/arm64/boot/dts/renesas/r8a779g3-sparrow-hawk.dts | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/arch/arm64/boot/dts/renesas/r8a779g3-sparrow-hawk.dts b/arch/arm64/boot/dts/renesas/r8a779g3-sparrow-hawk.dts index ff07d984cbf2..812b133cf29e 100644 --- a/arch/arm64/boot/dts/renesas/r8a779g3-sparrow-hawk.dts +++ b/arch/arm64/boot/dts/renesas/r8a779g3-sparrow-hawk.dts @@ -118,6 +118,17 @@ memory@600000000 { reg = <0x6 0x00000000 0x1 0x00000000>; }; + reserved-memory { + #address-cells = <2>; + #size-cells = <2>; + ranges; + + tfa@40000000 { + reg = <0x0 0x40000000 0x0 0x8000000>; + no-map; + }; + }; + /* Page 27 / DSI to Display */ dp-con { compatible = "dp-connector"; From 7b3b1e5a87b2f5e35c52b5386d7c327be869454f Mon Sep 17 00:00:00 2001 From: Long Li Date: Mon, 16 Mar 2026 14:07:42 -0700 Subject: [PATCH 044/248] PCI: hv: Set default NUMA node to 0 for devices without affinity info When hv_pci_assign_numa_node() processes a device that does not have HV_PCI_DEVICE_FLAG_NUMA_AFFINITY set or has an out-of-range virtual_numa_node, the device NUMA node is left unset. On x86_64, the uninitialized default happens to be 0, but on ARM64 it is NUMA_NO_NODE (-1). Tests show that when no NUMA information is available from the Hyper-V host, devices perform best when assigned to node 0. With NUMA_NO_NODE the kernel may spread work across NUMA nodes, which degrades performance on Hyper-V, particularly for high-throughput devices like MANA. Always set the device NUMA node to 0 before the conditional NUMA affinity check, so that devices get a performant default when the host provides no NUMA information, and behavior is consistent on both x86_64 and ARM64. Fixes: 999dd956d838 ("PCI: hv: Add support for protocol 1.3 and support PCI_BUS_RELATIONS2") Signed-off-by: Long Li Reviewed-by: Michael Kelley Signed-off-by: Wei Liu --- drivers/pci/controller/pci-hyperv.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/pci/controller/pci-hyperv.c b/drivers/pci/controller/pci-hyperv.c index 2c7a406b4ba8..38a790f642a1 100644 --- a/drivers/pci/controller/pci-hyperv.c +++ b/drivers/pci/controller/pci-hyperv.c @@ -2485,6 +2485,14 @@ static void hv_pci_assign_numa_node(struct hv_pcibus_device *hbus) if (!hv_dev) continue; + /* + * If the Hyper-V host doesn't provide a NUMA node for the + * device, default to node 0. With NUMA_NO_NODE the kernel + * may spread work across NUMA nodes, which degrades + * performance on Hyper-V. + */ + set_dev_node(&dev->dev, 0); + if (hv_dev->desc.flags & HV_PCI_DEVICE_FLAG_NUMA_AFFINITY && hv_dev->desc.virtual_numa_node < num_possible_nodes()) /* From c7596f9001e2b83293e3658e4e1addde69bb335d Mon Sep 17 00:00:00 2001 From: Conor Dooley Date: Tue, 3 Mar 2026 11:24:06 +0000 Subject: [PATCH 045/248] firmware: microchip: fail auto-update probe if no flash found There's no point letting the driver probe if there is no flash, as trying to do a firmware upload will fail. Move the code that attempts to get the flash from firmware upload to probe, and let it emit a message to users stating why auto-update is not supported. The code currently could have a problem if there's a flash in devicetree, but the system controller driver fails to get a pointer to it from the mtd subsystem, which will cause mpfs_sys_controller_get_flash() to return an error. Check for errors and null, instead of just null, in the new clause. CC: stable@vger.kernel.org Fixes: ec5b0f1193ad4 ("firmware: microchip: add PolarFire SoC Auto Update support") Signed-off-by: Conor Dooley --- drivers/firmware/microchip/mpfs-auto-update.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/firmware/microchip/mpfs-auto-update.c b/drivers/firmware/microchip/mpfs-auto-update.c index e194f7acb2a9..8fc3749d4a70 100644 --- a/drivers/firmware/microchip/mpfs-auto-update.c +++ b/drivers/firmware/microchip/mpfs-auto-update.c @@ -113,10 +113,6 @@ static enum fw_upload_err mpfs_auto_update_prepare(struct fw_upload *fw_uploader * be added here. */ - priv->flash = mpfs_sys_controller_get_flash(priv->sys_controller); - if (!priv->flash) - return FW_UPLOAD_ERR_HW_ERROR; - erase_size = round_up(erase_size, (u64)priv->flash->erasesize); /* @@ -427,6 +423,12 @@ static int mpfs_auto_update_probe(struct platform_device *pdev) return dev_err_probe(dev, PTR_ERR(priv->sys_controller), "Could not register as a sub device of the system controller\n"); + priv->flash = mpfs_sys_controller_get_flash(priv->sys_controller); + if (IS_ERR_OR_NULL(priv->flash)) { + dev_dbg(dev, "No flash connected to the system controller, auto-update not supported\n"); + return -ENODEV; + } + priv->dev = dev; platform_set_drvdata(pdev, priv); From 88bdac5443e5269bb39c4968d5ee0becbffe3f82 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Wed, 25 Mar 2026 13:22:10 +0100 Subject: [PATCH 046/248] dt-bindings: display/msm: qcm2290-mdss: Fix missing ranges in example Device node has children with MMIO addressing, so must have ranges: msm/qcom,qcm2290-mdss.example.dtb: display-subsystem@5e00000 (qcom,qcm2290-mdss): 'ranges' is a required property Fixes: 966a08c293cb ("dt-bindings: display: msm: qcm2290-mdss: Fix iommus property") Signed-off-by: Krzysztof Kozlowski Reviewed-by: Sumit Garg Link: https://lore.kernel.org/r/20260325122209.147128-2-krzysztof.kozlowski@oss.qualcomm.com Signed-off-by: Bjorn Andersson --- .../devicetree/bindings/display/msm/qcom,qcm2290-mdss.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/Documentation/devicetree/bindings/display/msm/qcom,qcm2290-mdss.yaml b/Documentation/devicetree/bindings/display/msm/qcom,qcm2290-mdss.yaml index 2772cdec7e42..bb09ecd1a5b4 100644 --- a/Documentation/devicetree/bindings/display/msm/qcom,qcm2290-mdss.yaml +++ b/Documentation/devicetree/bindings/display/msm/qcom,qcm2290-mdss.yaml @@ -108,6 +108,7 @@ examples: "cpu-cfg"; iommus = <&apps_smmu 0x420 0x2>; + ranges; display-controller@5e01000 { compatible = "qcom,qcm2290-dpu"; From d802d848308b35220f21a8025352f0c0aba15c12 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Beno=C3=AEt=20Sevens?= Date: Mon, 23 Mar 2026 16:11:07 +0000 Subject: [PATCH 047/248] HID: roccat: fix use-after-free in roccat_report_event MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit roccat_report_event() iterates over the device->readers list without holding the readers_lock. This allows a concurrent roccat_release() to remove and free a reader while it's still being accessed, leading to a use-after-free. Protect the readers list traversal with the readers_lock mutex. Signed-off-by: Benoît Sevens Reviewed-by: Silvan Jegen Signed-off-by: Jiri Kosina --- drivers/hid/hid-roccat.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/hid/hid-roccat.c b/drivers/hid/hid-roccat.c index c7f7562e22e5..e413662f7508 100644 --- a/drivers/hid/hid-roccat.c +++ b/drivers/hid/hid-roccat.c @@ -257,6 +257,7 @@ int roccat_report_event(int minor, u8 const *data) if (!new_value) return -ENOMEM; + mutex_lock(&device->readers_lock); mutex_lock(&device->cbuf_lock); report = &device->cbuf[device->cbuf_end]; @@ -279,6 +280,7 @@ int roccat_report_event(int minor, u8 const *data) } mutex_unlock(&device->cbuf_lock); + mutex_unlock(&device->readers_lock); wake_up_interruptible(&device->wait); return 0; From 532743944324a873bbaf8620fcabcd0e69e30c36 Mon Sep 17 00:00:00 2001 From: leo vriska Date: Wed, 4 Mar 2026 13:36:59 -0500 Subject: [PATCH 048/248] HID: quirks: add HID_QUIRK_ALWAYS_POLL for 8BitDo Pro 3 According to a mailing list report [1], this controller's predecessor has the same issue. However, it uses the xpad driver instead of HID, so this quirk wouldn't apply. [1]: https://lore.kernel.org/linux-input/unufo3$det$1@ciao.gmane.io/ Signed-off-by: leo vriska Signed-off-by: Jiri Kosina --- drivers/hid/hid-ids.h | 3 +++ drivers/hid/hid-quirks.c | 1 + 2 files changed, 4 insertions(+) diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h index afcee13bad61..e432cc0df680 100644 --- a/drivers/hid/hid-ids.h +++ b/drivers/hid/hid-ids.h @@ -22,6 +22,9 @@ #define USB_DEVICE_ID_3M2256 0x0502 #define USB_DEVICE_ID_3M3266 0x0506 +#define USB_VENDOR_ID_8BITDO 0x2dc8 +#define USB_DEVICE_ID_8BITDO_PRO_3 0x6009 + #define USB_VENDOR_ID_A4TECH 0x09da #define USB_DEVICE_ID_A4TECH_WCP32PU 0x0006 #define USB_DEVICE_ID_A4TECH_X5_005D 0x000a diff --git a/drivers/hid/hid-quirks.c b/drivers/hid/hid-quirks.c index 3217e436c052..f6be3ffee023 100644 --- a/drivers/hid/hid-quirks.c +++ b/drivers/hid/hid-quirks.c @@ -25,6 +25,7 @@ */ static const struct hid_device_id hid_quirks[] = { + { HID_USB_DEVICE(USB_VENDOR_ID_8BITDO, USB_DEVICE_ID_8BITDO_PRO_3), HID_QUIRK_ALWAYS_POLL }, { HID_USB_DEVICE(USB_VENDOR_ID_AASHIMA, USB_DEVICE_ID_AASHIMA_GAMEPAD), HID_QUIRK_BADPAD }, { HID_USB_DEVICE(USB_VENDOR_ID_AASHIMA, USB_DEVICE_ID_AASHIMA_PREDATOR), HID_QUIRK_BADPAD }, { HID_USB_DEVICE(USB_VENDOR_ID_ADATA_XPG, USB_VENDOR_ID_ADATA_XPG_WL_GAMING_MOUSE), HID_QUIRK_ALWAYS_POLL }, From e1415b9418eb22b4a7a1ef4b4aec9dd0a49e3fa7 Mon Sep 17 00:00:00 2001 From: Srinivas Pandruvada Date: Wed, 25 Mar 2026 12:26:38 -0700 Subject: [PATCH 049/248] platform/x86: ISST: Reset core count to 0 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Based on feature revision, number of buckets can be less than the TRL_MAX_BUCKETS. In that case core counts in the remaining buckets can be set to some invalid values. Hence reset core count to 0 for all buckets before assigning correct values. Fixes: 885d1c2a30b7 ("platform/x86: ISST: Support SST-TF revision 2") Signed-off-by: Srinivas Pandruvada Cc: stable@vger.kernel.org Link: https://patch.msgid.link/20260325192638.3417281-1-srinivas.pandruvada@linux.intel.com Reviewed-by: Ilpo Järvinen Signed-off-by: Ilpo Järvinen --- drivers/platform/x86/intel/speed_select_if/isst_tpmi_core.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/platform/x86/intel/speed_select_if/isst_tpmi_core.c b/drivers/platform/x86/intel/speed_select_if/isst_tpmi_core.c index e238c3105c78..63f08d66703a 100644 --- a/drivers/platform/x86/intel/speed_select_if/isst_tpmi_core.c +++ b/drivers/platform/x86/intel/speed_select_if/isst_tpmi_core.c @@ -1461,6 +1461,8 @@ static int isst_if_get_turbo_freq_info(void __user *argp) SST_MUL_FACTOR_FREQ) } + memset(turbo_freq.bucket_core_counts, 0, sizeof(turbo_freq.bucket_core_counts)); + if (feature_rev >= 2) { bool has_tf_info_8 = false; From 4ab604b3f3aa8dcccc7505f5d310016682a99d5f Mon Sep 17 00:00:00 2001 From: Srinivas Pandruvada Date: Wed, 25 Mar 2026 12:29:09 -0700 Subject: [PATCH 050/248] platform/x86/intel-uncore-freq: Handle autonomous UFS status bit MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When the AUTONOMOUS_UFS_DISABLED bit is set in the header, the ELC (Efficiency Latency Control) feature is non-functional. Hence, return error for read or write to ELC attributes. Fixes: bb516dc79c4a ("platform/x86/intel-uncore-freq: Add support for efficiency latency control") Signed-off-by: Srinivas Pandruvada Cc: stable@vger.kernel.org Link: https://patch.msgid.link/20260325192909.3417322-1-srinivas.pandruvada@linux.intel.com Reviewed-by: Ilpo Järvinen Signed-off-by: Ilpo Järvinen --- .../x86/intel/uncore-frequency/uncore-frequency-tpmi.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/platform/x86/intel/uncore-frequency/uncore-frequency-tpmi.c b/drivers/platform/x86/intel/uncore-frequency/uncore-frequency-tpmi.c index 1237d9570886..4c7e64db478c 100644 --- a/drivers/platform/x86/intel/uncore-frequency/uncore-frequency-tpmi.c +++ b/drivers/platform/x86/intel/uncore-frequency/uncore-frequency-tpmi.c @@ -537,6 +537,7 @@ static void set_cdie_id(int domain_id, struct tpmi_uncore_cluster_info *cluster_ #define UNCORE_VERSION_MASK GENMASK_ULL(7, 0) #define UNCORE_LOCAL_FABRIC_CLUSTER_ID_MASK GENMASK_ULL(15, 8) #define UNCORE_CLUSTER_OFF_MASK GENMASK_ULL(7, 0) +#define UNCORE_AUTONOMOUS_UFS_DISABLED BIT(32) #define UNCORE_MAX_CLUSTER_PER_DOMAIN 8 static int uncore_probe(struct auxiliary_device *auxdev, const struct auxiliary_device_id *id) @@ -598,6 +599,7 @@ static int uncore_probe(struct auxiliary_device *auxdev, const struct auxiliary_ for (i = 0; i < num_resources; ++i) { struct tpmi_uncore_power_domain_info *pd_info; + bool auto_ufs_enabled; struct resource *res; u64 cluster_offset; u8 cluster_mask; @@ -647,6 +649,8 @@ static int uncore_probe(struct auxiliary_device *auxdev, const struct auxiliary_ continue; } + auto_ufs_enabled = !(header & UNCORE_AUTONOMOUS_UFS_DISABLED); + /* Find out number of clusters in this resource */ pd_info->cluster_count = hweight8(cluster_mask); @@ -689,7 +693,9 @@ static int uncore_probe(struct auxiliary_device *auxdev, const struct auxiliary_ cluster_info->uncore_root = tpmi_uncore; - if (TPMI_MINOR_VERSION(pd_info->ufs_header_ver) >= UNCORE_ELC_SUPPORTED_VERSION) + if ((TPMI_MINOR_VERSION(pd_info->ufs_header_ver) >= + UNCORE_ELC_SUPPORTED_VERSION) && + auto_ufs_enabled) cluster_info->elc_supported = true; ret = uncore_freq_add_entry(&cluster_info->uncore_data, 0); From ba7d46e0a384398c58eee34579986f830260d37b Mon Sep 17 00:00:00 2001 From: Srinivas Pandruvada Date: Wed, 25 Mar 2026 12:31:57 -0700 Subject: [PATCH 051/248] platform/x86: ISST: Increase minor version MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The current implementation doesn't require any changes to support minor version 3, hence increment it to avoid "Unsupported minor version:3" message. Signed-off-by: Srinivas Pandruvada Link: https://patch.msgid.link/20260325193158.3417382-1-srinivas.pandruvada@linux.intel.com Reviewed-by: Ilpo Järvinen Signed-off-by: Ilpo Järvinen --- drivers/platform/x86/intel/speed_select_if/isst_tpmi_core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/platform/x86/intel/speed_select_if/isst_tpmi_core.c b/drivers/platform/x86/intel/speed_select_if/isst_tpmi_core.c index 63f08d66703a..b804cb753f94 100644 --- a/drivers/platform/x86/intel/speed_select_if/isst_tpmi_core.c +++ b/drivers/platform/x86/intel/speed_select_if/isst_tpmi_core.c @@ -36,7 +36,7 @@ /* Supported SST hardware version by this driver */ #define ISST_MAJOR_VERSION 0 -#define ISST_MINOR_VERSION 2 +#define ISST_MINOR_VERSION 3 /* * Used to indicate if value read from MMIO needs to get multiplied From b7f4e7babc122426e8802dffdd8ff1501db6adf4 Mon Sep 17 00:00:00 2001 From: Srinivas Pandruvada Date: Wed, 25 Mar 2026 12:31:58 -0700 Subject: [PATCH 052/248] platform/x86/intel-uncore-freq: Increase minor version MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The current implementation doesn't require any changes to support minor version 3, hence increment it to avoid "Unsupported minor version:3" message. Signed-off-by: Srinivas Pandruvada Link: https://patch.msgid.link/20260325193158.3417382-2-srinivas.pandruvada@linux.intel.com Reviewed-by: Ilpo Järvinen Signed-off-by: Ilpo Järvinen --- .../platform/x86/intel/uncore-frequency/uncore-frequency-tpmi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/platform/x86/intel/uncore-frequency/uncore-frequency-tpmi.c b/drivers/platform/x86/intel/uncore-frequency/uncore-frequency-tpmi.c index 4c7e64db478c..88015a2c6a0d 100644 --- a/drivers/platform/x86/intel/uncore-frequency/uncore-frequency-tpmi.c +++ b/drivers/platform/x86/intel/uncore-frequency/uncore-frequency-tpmi.c @@ -31,7 +31,7 @@ #include "uncore-frequency-common.h" #define UNCORE_MAJOR_VERSION 0 -#define UNCORE_MINOR_VERSION 2 +#define UNCORE_MINOR_VERSION 3 #define UNCORE_ELC_SUPPORTED_VERSION 2 #define UNCORE_HEADER_INDEX 0 #define UNCORE_FABRIC_CLUSTER_OFFSET 8 From 1a9452c428a6b76f0b797bae21daa454fccef1a2 Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Tue, 24 Mar 2026 16:16:41 -0500 Subject: [PATCH 053/248] platform/x86/amd: pmc: Add Thinkpad L14 Gen3 to quirk_s2idle_bug MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This platform is a similar vintage of platforms that had a BIOS bug leading to a 10s delay at resume from s0i3. Add a quirk for it. Reported-by: Imrane Closes: https://bugzilla.kernel.org/show_bug.cgi?id=221273 Tested-by: Imrane Signed-off-by: Mario Limonciello Link: https://patch.msgid.link/20260324211647.357924-1-mario.limonciello@amd.com Reviewed-by: Ilpo Järvinen Signed-off-by: Ilpo Järvinen --- drivers/platform/x86/amd/pmc/pmc-quirks.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/platform/x86/amd/pmc/pmc-quirks.c b/drivers/platform/x86/amd/pmc/pmc-quirks.c index ed285afaf9b0..24506e342943 100644 --- a/drivers/platform/x86/amd/pmc/pmc-quirks.c +++ b/drivers/platform/x86/amd/pmc/pmc-quirks.c @@ -203,6 +203,15 @@ static const struct dmi_system_id fwbug_list[] = { DMI_MATCH(DMI_PRODUCT_NAME, "82XQ"), } }, + /* https://bugzilla.kernel.org/show_bug.cgi?id=221273 */ + { + .ident = "Thinkpad L14 Gen3", + .driver_data = &quirk_s2idle_bug, + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "LENOVO"), + DMI_MATCH(DMI_PRODUCT_NAME, "21C6"), + } + }, /* https://gitlab.freedesktop.org/drm/amd/-/issues/4434 */ { .ident = "Lenovo Yoga 6 13ALC6", From 743677a8cb30b09f16a7f167f497c2c927891b5a Mon Sep 17 00:00:00 2001 From: Maximilian Pezzullo Date: Wed, 4 Mar 2026 09:25:22 +0100 Subject: [PATCH 054/248] HID: amd_sfh: don't log error when device discovery fails with -EOPNOTSUPP When sensor discovery fails on systems without AMD SFH sensors, the code already emits a warning via dev_warn() in amd_sfh_hid_client_init(). The subsequent dev_err() in sfh_init_work() for the same -EOPNOTSUPP return value is redundant and causes unnecessary alarm. Suppress the dev_err() for -EOPNOTSUPP to avoid confusing users who have no AMD SFH sensors. Fixes: 2105e8e00da4 ("HID: amd_sfh: Improve boot time when SFH is available") Reported-by: Casey Croy Closes: https://bugzilla.kernel.org/show_bug.cgi?id=221099 Signed-off-by: Maximilian Pezzullo Acked-by: Basavaraj Natikar Signed-off-by: Jiri Kosina --- drivers/hid/amd-sfh-hid/amd_sfh_pcie.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/hid/amd-sfh-hid/amd_sfh_pcie.c b/drivers/hid/amd-sfh-hid/amd_sfh_pcie.c index 1d9f955573aa..4b81cebdc335 100644 --- a/drivers/hid/amd-sfh-hid/amd_sfh_pcie.c +++ b/drivers/hid/amd-sfh-hid/amd_sfh_pcie.c @@ -413,7 +413,8 @@ static void sfh_init_work(struct work_struct *work) rc = amd_sfh_hid_client_init(mp2); if (rc) { amd_sfh_clear_intr(mp2); - dev_err(&pdev->dev, "amd_sfh_hid_client_init failed err %d\n", rc); + if (rc != -EOPNOTSUPP) + dev_err(&pdev->dev, "amd_sfh_hid_client_init failed err %d\n", rc); return; } From 1a2d30aa73140e008e62dc5898e4c2c3b59b35e4 Mon Sep 17 00:00:00 2001 From: Lode Willems Date: Tue, 24 Mar 2026 21:43:06 +0100 Subject: [PATCH 055/248] HID: Kysona: Add support for VXE Dragonfly R1 Pro Apparently this same protocol is used by more mice from different brands. This patch adds support for the VXE Dragonfly R1 Pro. Tested-by: Dominykas Svetikas Signed-off-by: Lode Willems Signed-off-by: Jiri Kosina --- drivers/hid/hid-ids.h | 4 ++++ drivers/hid/hid-kysona.c | 2 ++ 2 files changed, 6 insertions(+) diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h index e432cc0df680..c1e4a6ce9631 100644 --- a/drivers/hid/hid-ids.h +++ b/drivers/hid/hid-ids.h @@ -1474,6 +1474,10 @@ #define USB_VENDOR_ID_VTL 0x0306 #define USB_DEVICE_ID_VTL_MULTITOUCH_FF3F 0xff3f +#define USB_VENDOR_ID_VXE 0x3554 +#define USB_DEVICE_ID_VXE_DRAGONFLY_R1_PRO_DONGLE 0xf58a +#define USB_DEVICE_ID_VXE_DRAGONFLY_R1_PRO_WIRED 0xf58c + #define USB_VENDOR_ID_WACOM 0x056a #define USB_DEVICE_ID_WACOM_GRAPHIRE_BLUETOOTH 0x81 #define USB_DEVICE_ID_WACOM_INTUOS4_BLUETOOTH 0x00BD diff --git a/drivers/hid/hid-kysona.c b/drivers/hid/hid-kysona.c index 09bfe30d02cb..ccbd8380064e 100644 --- a/drivers/hid/hid-kysona.c +++ b/drivers/hid/hid-kysona.c @@ -272,6 +272,8 @@ static void kysona_remove(struct hid_device *hdev) static const struct hid_device_id kysona_devices[] = { { HID_USB_DEVICE(USB_VENDOR_ID_KYSONA, USB_DEVICE_ID_KYSONA_M600_DONGLE) }, { HID_USB_DEVICE(USB_VENDOR_ID_KYSONA, USB_DEVICE_ID_KYSONA_M600_WIRED) }, + { HID_USB_DEVICE(USB_VENDOR_ID_VXE, USB_DEVICE_ID_VXE_DRAGONFLY_R1_PRO_DONGLE) }, + { HID_USB_DEVICE(USB_VENDOR_ID_VXE, USB_DEVICE_ID_VXE_DRAGONFLY_R1_PRO_WIRED) }, { } }; MODULE_DEVICE_TABLE(hid, kysona_devices); From 45065a5095c7773fb98c35d60c20c3b513540597 Mon Sep 17 00:00:00 2001 From: Akshai Murari Date: Fri, 27 Mar 2026 06:54:45 +0000 Subject: [PATCH 056/248] Input: add keycodes for contextual AI usages (HUTRR119) HUTRR119 introduces new usages for keys intended to invoke AI agents based on the current context. These are useful with the increasing number of operating systems with integrated Large Language Models Add new key definitions for KEY_ACTION_ON_SELECTION, KEY_CONTEXTUAL_INSERT and KEY_CONTEXTUAL_QUERY Signed-off-by: Akshai Murari Acked-by: Dmitry Torokhov Signed-off-by: Jiri Kosina --- drivers/hid/hid-debug.c | 6 ++++++ drivers/hid/hid-input.c | 3 +++ include/uapi/linux/input-event-codes.h | 4 ++++ 3 files changed, 13 insertions(+) diff --git a/drivers/hid/hid-debug.c b/drivers/hid/hid-debug.c index c5865b0d2aaa..a8d2b36a7852 100644 --- a/drivers/hid/hid-debug.c +++ b/drivers/hid/hid-debug.c @@ -990,6 +990,9 @@ static const struct hid_usage_entry hid_usage_table[] = { { 0x0c, 0x01c9, "ALContactSync" }, { 0x0c, 0x01ca, "ALNavigation" }, { 0x0c, 0x01cb, "ALContextawareDesktopAssistant" }, + { 0x0c, 0x01cc, "ALActionOnSelection" }, + { 0x0c, 0x01cd, "ALContextualInsertion" }, + { 0x0c, 0x01ce, "ALContextualQuery" }, { 0x0c, 0x0200, "GenericGUIApplicationControls" }, { 0x0c, 0x0201, "ACNew" }, { 0x0c, 0x0202, "ACOpen" }, @@ -3375,6 +3378,9 @@ static const char *keys[KEY_MAX + 1] = { [KEY_BRIGHTNESS_MIN] = "BrightnessMin", [KEY_BRIGHTNESS_MAX] = "BrightnessMax", [KEY_BRIGHTNESS_AUTO] = "BrightnessAuto", + [KEY_ACTION_ON_SELECTION] = "ActionOnSelection", + [KEY_CONTEXTUAL_INSERT] = "ContextualInsert", + [KEY_CONTEXTUAL_QUERY] = "ContextualQuery", [KEY_KBDINPUTASSIST_PREV] = "KbdInputAssistPrev", [KEY_KBDINPUTASSIST_NEXT] = "KbdInputAssistNext", [KEY_KBDINPUTASSIST_PREVGROUP] = "KbdInputAssistPrevGroup", diff --git a/drivers/hid/hid-input.c b/drivers/hid/hid-input.c index 8fc20df99b97..ce9c3251287d 100644 --- a/drivers/hid/hid-input.c +++ b/drivers/hid/hid-input.c @@ -1227,6 +1227,9 @@ static void hidinput_configure_usage(struct hid_input *hidinput, struct hid_fiel case 0x1bc: map_key_clear(KEY_MESSENGER); break; case 0x1bd: map_key_clear(KEY_INFO); break; case 0x1cb: map_key_clear(KEY_ASSISTANT); break; + case 0x1cc: map_key_clear(KEY_ACTION_ON_SELECTION); break; + case 0x1cd: map_key_clear(KEY_CONTEXTUAL_INSERT); break; + case 0x1ce: map_key_clear(KEY_CONTEXTUAL_QUERY); break; case 0x201: map_key_clear(KEY_NEW); break; case 0x202: map_key_clear(KEY_OPEN); break; case 0x203: map_key_clear(KEY_CLOSE); break; diff --git a/include/uapi/linux/input-event-codes.h b/include/uapi/linux/input-event-codes.h index 4bdb6a165987..3528168f7c6d 100644 --- a/include/uapi/linux/input-event-codes.h +++ b/include/uapi/linux/input-event-codes.h @@ -643,6 +643,10 @@ #define KEY_EPRIVACY_SCREEN_ON 0x252 #define KEY_EPRIVACY_SCREEN_OFF 0x253 +#define KEY_ACTION_ON_SELECTION 0x254 /* AL Action on Selection (HUTRR119) */ +#define KEY_CONTEXTUAL_INSERT 0x255 /* AL Contextual Insertion (HUTRR119) */ +#define KEY_CONTEXTUAL_QUERY 0x256 /* AL Contextual Query (HUTRR119) */ + #define KEY_KBDINPUTASSIST_PREV 0x260 #define KEY_KBDINPUTASSIST_NEXT 0x261 #define KEY_KBDINPUTASSIST_PREVGROUP 0x262 From 742de64b62b690a368dbeb846499eb8ac8ceedb9 Mon Sep 17 00:00:00 2001 From: Janne Grunau Date: Fri, 27 Mar 2026 09:30:29 +0100 Subject: [PATCH 057/248] kbuild: modules-cpio-pkg: Respect INSTALL_MOD_PATH MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The modules-cpio-pkg target added in commit 2a9c8c0b59d3 ("kbuild: add target to build a cpio containing modules") is incompatible with initramfs with merged /lib and /usr/lib directories [1]. "/lib" cannot be a link and directory at the same time. Respect a non-empty INSTALL_MOD_PATH in the modules-cpio-pkg target so that `make INSTALL_MOD_PATH=/usr modules-cpio-pkg` results in the same module install location as `make INSTALL_MOD_PATH=/usr modules_install`. Tested with Fedora distribution initramfs produced by dracut. Link: https://systemd.io/THE_CASE_FOR_THE_USR_MERGE/ [1] Fixes: 2a9c8c0b59d3 ("kbuild: add target to build a cpio containing modules") Cc: stable@vger.kernel.org Reviewed-by: Simon Glass Reviewed-by: Nathan Chancellor Reviewed-by: Thomas Weißschuh Signed-off-by: Janne Grunau Reviewed-by: Nicolas Schier Tested-by: Nicolas Schier Reviewed-by: Ahmad Fatoum Link: https://patch.msgid.link/20260327-kbuild-modules-cpio-pkg-usr-merge-v3-1-ef507dfa006c@jannau.net Signed-off-by: Nathan Chancellor --- scripts/Makefile.package | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/scripts/Makefile.package b/scripts/Makefile.package index 0ec946f9b905..6d36786ba31c 100644 --- a/scripts/Makefile.package +++ b/scripts/Makefile.package @@ -195,7 +195,7 @@ tar%-pkg: linux-$(KERNELRELEASE)-$(ARCH).tar.% FORCE .tmp_modules_cpio: FORCE $(Q)$(MAKE) -f $(srctree)/Makefile $(Q)rm -rf $@ - $(Q)$(MAKE) -f $(srctree)/Makefile INSTALL_MOD_PATH=$@ modules_install + $(Q)$(MAKE) -f $(srctree)/Makefile INSTALL_MOD_PATH=$@/$(INSTALL_MOD_PATH) modules_install quiet_cmd_cpio = CPIO $@ cmd_cpio = $(CONFIG_SHELL) $(srctree)/usr/gen_initramfs.sh -o $@ $< @@ -264,6 +264,7 @@ help: @echo ' tarxz-pkg - Build the kernel as a xz compressed tarball' @echo ' tarzst-pkg - Build the kernel as a zstd compressed tarball' @echo ' modules-cpio-pkg - Build the kernel modules as cpio archive' + @echo ' (uses INSTALL_MOD_PATH inside the archive)' @echo ' perf-tar-src-pkg - Build the perf source tarball with no compression' @echo ' perf-targz-src-pkg - Build the perf source tarball with gzip compression' @echo ' perf-tarbz2-src-pkg - Build the perf source tarball with bz2 compression' From deb4605671cfae3b2803cfbbf4739e7245248398 Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Wed, 25 Mar 2026 18:20:30 -0700 Subject: [PATCH 058/248] modpost: Declare extra_warn with unused attribute A recent strengthening of -Wunused-but-set-variable (enabled with -Wall) in clang under a new subwarning, -Wunused-but-set-global, points out an unused static global variable in scripts/mod/modpost.c: scripts/mod/modpost.c:59:13: error: variable 'extra_warn' set but not used [-Werror,-Wunused-but-set-global] 59 | static bool extra_warn; | ^ This variable has been unused since commit 6c6c1fc09de3 ("modpost: require a MODULE_DESCRIPTION()") but that is expected, as there are currently no extra warnings at W=1 right now. Declare the variable with the unused attribute to make it clear to the compiler that this variable may be unused. Cc: stable@vger.kernel.org Fixes: 6c6c1fc09de3 ("modpost: require a MODULE_DESCRIPTION()") Link: https://patch.msgid.link/20260325-modpost-extra_warn-unused-but-set-global-v1-1-2e84003b7e81@kernel.org Reviewed-by: Nicolas Schier Signed-off-by: Nathan Chancellor --- scripts/mod/modpost.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c index 0c25b5ad497b..c3bc801d8b2d 100644 --- a/scripts/mod/modpost.c +++ b/scripts/mod/modpost.c @@ -56,7 +56,7 @@ static bool allow_missing_ns_imports; static bool error_occurred; -static bool extra_warn; +static bool extra_warn __attribute__((unused)); bool target_is_big_endian; bool host_is_big_endian; From 71a98248c63c535eaa4d4c22f099b68d902006d0 Mon Sep 17 00:00:00 2001 From: Yasuaki Torimaru Date: Thu, 26 Mar 2026 14:58:00 +0900 Subject: [PATCH 059/248] xfrm: clear trailing padding in build_polexpire() build_expire() clears the trailing padding bytes of struct xfrm_user_expire after setting the hard field via memset_after(), but the analogous function build_polexpire() does not do this for struct xfrm_user_polexpire. The padding bytes after the __u8 hard field are left uninitialized from the heap allocation, and are then sent to userspace via netlink multicast to XFRMNLGRP_EXPIRE listeners, leaking kernel heap memory contents. Add the missing memset_after() call, matching build_expire(). Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Cc: stable@vger.kernel.org Signed-off-by: Yasuaki Torimaru Reviewed-by: Simon Horman Reviewed-by: Breno Leitao Signed-off-by: Steffen Klassert --- net/xfrm/xfrm_user.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index 1656b487f833..5d59c11fc01e 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -3960,6 +3960,8 @@ static int build_polexpire(struct sk_buff *skb, struct xfrm_policy *xp, return err; } upe->hard = !!hard; + /* clear the padding bytes */ + memset_after(upe, 0, hard); nlmsg_end(skb, nlh); return 0; From 7081d46d32312f1a31f0e0e99c6835a394037599 Mon Sep 17 00:00:00 2001 From: Keenan Dong Date: Thu, 26 Mar 2026 20:36:39 +0800 Subject: [PATCH 060/248] xfrm: account XFRMA_IF_ID in aevent size calculation xfrm_get_ae() allocates the reply skb with xfrm_aevent_msgsize(), then build_aevent() appends attributes including XFRMA_IF_ID when x->if_id is set. xfrm_aevent_msgsize() does not include space for XFRMA_IF_ID. For states with if_id, build_aevent() can fail with -EMSGSIZE and hit BUG_ON(err < 0) in xfrm_get_ae(), turning a malformed netlink interaction into a kernel panic. Account XFRMA_IF_ID in the size calculation unconditionally and replace the BUG_ON with normal error unwinding. Fixes: 7e6526404ade ("xfrm: Add a new lookup key to match xfrm interfaces.") Reported-by: Keenan Dong Signed-off-by: Keenan Dong Signed-off-by: Steffen Klassert --- net/xfrm/xfrm_user.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index 5d59c11fc01e..a779590c985a 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -2677,7 +2677,8 @@ static inline unsigned int xfrm_aevent_msgsize(struct xfrm_state *x) + nla_total_size(4) /* XFRM_AE_RTHR */ + nla_total_size(4) /* XFRM_AE_ETHR */ + nla_total_size(sizeof(x->dir)) /* XFRMA_SA_DIR */ - + nla_total_size(4); /* XFRMA_SA_PCPU */ + + nla_total_size(4) /* XFRMA_SA_PCPU */ + + nla_total_size(sizeof(x->if_id)); /* XFRMA_IF_ID */ } static int build_aevent(struct sk_buff *skb, struct xfrm_state *x, const struct km_event *c) @@ -2789,7 +2790,12 @@ static int xfrm_get_ae(struct sk_buff *skb, struct nlmsghdr *nlh, c.portid = nlh->nlmsg_pid; err = build_aevent(r_skb, x, &c); - BUG_ON(err < 0); + if (err < 0) { + spin_unlock_bh(&x->lock); + xfrm_state_put(x); + kfree_skb(r_skb); + return err; + } err = nlmsg_unicast(xfrm_net_nlsk(net, skb), r_skb, NETLINK_CB(skb).portid); spin_unlock_bh(&x->lock); From 3d48c9fd78dd0b1809669ec49c4d0997b8127512 Mon Sep 17 00:00:00 2001 From: Mikhail Gavrilov Date: Fri, 27 Mar 2026 17:41:56 +0500 Subject: [PATCH 061/248] dma-debug: suppress cacheline overlap warning when arch has no DMA alignment requirement When CONFIG_DMA_API_DEBUG is enabled, the DMA debug infrastructure tracks active mappings per cacheline and warns if two different DMA mappings share the same cacheline ("cacheline tracking EEXIST, overlapping mappings aren't supported"). On x86_64, ARCH_KMALLOC_MINALIGN defaults to 8, so small kmalloc allocations (e.g. the 8-byte hub->buffer and hub->status in the USB hub driver) frequently land in the same 64-byte cacheline. When both are DMA-mapped, this triggers a false positive warning. This has been reported repeatedly since v5.14 (when the EEXIST check was added) across various USB host controllers and devices including xhci_hcd with USB hubs, USB audio devices, and USB ethernet adapters. The cacheline overlap is only a real concern on architectures that require DMA buffer alignment to cacheline boundaries (i.e. where ARCH_DMA_MINALIGN >= L1_CACHE_BYTES). On architectures like x86_64 where dma_get_cache_alignment() returns 1, the hardware is cache-coherent and overlapping cacheline mappings are harmless. Suppress the EEXIST warning when dma_get_cache_alignment() is less than L1_CACHE_BYTES, indicating the architecture does not require cacheline-aligned DMA buffers. Verified with a kernel module reproducer that performs two kmalloc(8) allocations back-to-back and DMA-maps both: Before: allocations share a cacheline, EEXIST fires within ~50 pairs After: same cacheline pair found, but no warning emitted Fixes: 2b4bbc6231d7 ("dma-debug: report -EEXIST errors in add_dma_entry") Link: https://bugzilla.kernel.org/show_bug.cgi?id=215740 Suggested-by: Harry Yoo Tested-by: Mikhail Gavrilov Signed-off-by: Mikhail Gavrilov Signed-off-by: Marek Szyprowski Link: https://lore.kernel.org/r/20260327124156.24820-1-mikhail.v.gavrilov@gmail.com --- kernel/dma/debug.c | 1 + 1 file changed, 1 insertion(+) diff --git a/kernel/dma/debug.c b/kernel/dma/debug.c index 0677918f06a8..1a725edbbbf6 100644 --- a/kernel/dma/debug.c +++ b/kernel/dma/debug.c @@ -615,6 +615,7 @@ static void add_dma_entry(struct dma_debug_entry *entry, unsigned long attrs) } else if (rc == -EEXIST && !(attrs & DMA_ATTR_SKIP_CPU_SYNC) && !(entry->is_cache_clean && overlap_cache_clean) && + dma_get_cache_alignment() >= L1_CACHE_BYTES && !(IS_ENABLED(CONFIG_DMA_BOUNCE_UNALIGNED_KMALLOC) && is_swiotlb_active(entry->dev))) { err_printk(entry->dev, entry, From 11b72b1ca9891c77bc876ef9fc39d6825847ffee Mon Sep 17 00:00:00 2001 From: Ziyue Zhang Date: Mon, 30 Mar 2026 10:09:34 +0800 Subject: [PATCH 062/248] arm64: dts: qcom: hamoa: Fix incomplete Root Port property migration Historically, the Qualcomm PCIe controller node (Host bridge) described all Root Port properties, such as PHY, PERST#, and WAKE#. But to provide a more accurate hardware description and to support future multi-Root Port controllers, these properties were moved to the Root Port node in the devicetree bindings. Commit 960609b22be5 ("arm64: dts: qcom: hamoa: Move PHY, PERST, and Wake GPIOs to PCIe port nodes and add port Nodes for all PCIe ports") initiated this transition for the Hamoa platform by moving the PHY property to the Root Port node in hamoa.dtsi. However, it only updated some platform specific DTS files for PERST# and WAKE#, leaving others in a "mixed" binding state. While the PCIe controller driver supports both legacy and Root Port bindings, It cannot correctly handle a mix of both. In these cases, the driver parses the PHY from the Root Port node, but fails to find the PERST# property (which it then assumes is not present, as it is optional). Consequently, the controller probe succeeds, but PERST# remains uncontrolled, preventing PCIe endpoints from functioning. So, fix the incomplete migration by moving the PERST# and WAKE# properties from the controller node to the Root Port node in all remaining Hamoa platform DTS files. Fixes: 960609b22be5 ("arm64: dts: qcom: hamoa: Move PHY, PERST, and Wake GPIOs to PCIe port nodes and add port Nodes for all PCIe ports") Signed-off-by: Ziyue Zhang Reviewed-by: Manivannan Sadhasivam Reviewed-by: Konrad Dybcio Link: https://lore.kernel.org/r/20260330020934.3501247-1-ziyue.zhang@oss.qualcomm.com Signed-off-by: Bjorn Andersson --- .../boot/dts/qcom/x1-asus-zenbook-a14.dtsi | 16 ++++++++----- arch/arm64/boot/dts/qcom/x1-crd.dtsi | 24 ++++++++++++------- arch/arm64/boot/dts/qcom/x1-dell-thena.dtsi | 14 ++++++----- .../boot/dts/qcom/x1-hp-omnibook-x14.dtsi | 14 ++++++----- .../boot/dts/qcom/x1-microsoft-denali.dtsi | 8 ++++--- .../dts/qcom/x1e80100-lenovo-yoga-slim7x.dts | 6 ++--- .../qcom/x1e80100-medion-sprchrgd-14-s1.dts | 15 ++++++------ .../dts/qcom/x1p42100-lenovo-thinkbook-16.dts | 14 ++++++----- 8 files changed, 65 insertions(+), 46 deletions(-) diff --git a/arch/arm64/boot/dts/qcom/x1-asus-zenbook-a14.dtsi b/arch/arm64/boot/dts/qcom/x1-asus-zenbook-a14.dtsi index 8e5c5575a532..0a382cc9e643 100644 --- a/arch/arm64/boot/dts/qcom/x1-asus-zenbook-a14.dtsi +++ b/arch/arm64/boot/dts/qcom/x1-asus-zenbook-a14.dtsi @@ -1032,9 +1032,6 @@ &mdss_dp3_phy { }; &pcie4 { - perst-gpios = <&tlmm 146 GPIO_ACTIVE_LOW>; - wake-gpios = <&tlmm 148 GPIO_ACTIVE_LOW>; - pinctrl-0 = <&pcie4_default>; pinctrl-names = "default"; @@ -1048,10 +1045,12 @@ &pcie4_phy { status = "okay"; }; -&pcie6a { - perst-gpios = <&tlmm 152 GPIO_ACTIVE_LOW>; - wake-gpios = <&tlmm 154 GPIO_ACTIVE_LOW>; +&pcie4_port0 { + reset-gpios = <&tlmm 146 GPIO_ACTIVE_LOW>; + wake-gpios = <&tlmm 148 GPIO_ACTIVE_LOW>; +}; +&pcie6a { vddpe-3v3-supply = <&vreg_nvme>; pinctrl-0 = <&pcie6a_default>; @@ -1067,6 +1066,11 @@ &pcie6a_phy { status = "okay"; }; +&pcie6a_port0 { + reset-gpios = <&tlmm 152 GPIO_ACTIVE_LOW>; + wake-gpios = <&tlmm 154 GPIO_ACTIVE_LOW>; +}; + &pm8550_gpios { rtmr0_default: rtmr0-reset-n-active-state { pins = "gpio10"; diff --git a/arch/arm64/boot/dts/qcom/x1-crd.dtsi b/arch/arm64/boot/dts/qcom/x1-crd.dtsi index ded96fb43489..2fbf9ec66fb8 100644 --- a/arch/arm64/boot/dts/qcom/x1-crd.dtsi +++ b/arch/arm64/boot/dts/qcom/x1-crd.dtsi @@ -1216,15 +1216,17 @@ &mdss_dp3_phy { }; &pcie4 { - perst-gpios = <&tlmm 146 GPIO_ACTIVE_LOW>; - wake-gpios = <&tlmm 148 GPIO_ACTIVE_LOW>; - pinctrl-0 = <&pcie4_default>; pinctrl-names = "default"; status = "okay"; }; +&pcie4_port0 { + reset-gpios = <&tlmm 146 GPIO_ACTIVE_LOW>; + wake-gpios = <&tlmm 148 GPIO_ACTIVE_LOW>; +}; + &pcie4_phy { vdda-phy-supply = <&vreg_l3i_0p8>; vdda-pll-supply = <&vreg_l3e_1p2>; @@ -1233,9 +1235,6 @@ &pcie4_phy { }; &pcie5 { - perst-gpios = <&tlmm 149 GPIO_ACTIVE_LOW>; - wake-gpios = <&tlmm 151 GPIO_ACTIVE_LOW>; - vddpe-3v3-supply = <&vreg_wwan>; pinctrl-0 = <&pcie5_default>; @@ -1251,10 +1250,12 @@ &pcie5_phy { status = "okay"; }; -&pcie6a { - perst-gpios = <&tlmm 152 GPIO_ACTIVE_LOW>; - wake-gpios = <&tlmm 154 GPIO_ACTIVE_LOW>; +&pcie5_port0 { + reset-gpios = <&tlmm 149 GPIO_ACTIVE_LOW>; + wake-gpios = <&tlmm 151 GPIO_ACTIVE_LOW>; +}; +&pcie6a { vddpe-3v3-supply = <&vreg_nvme>; pinctrl-names = "default"; @@ -1270,6 +1271,11 @@ &pcie6a_phy { status = "okay"; }; +&pcie6a_port0 { + reset-gpios = <&tlmm 152 GPIO_ACTIVE_LOW>; + wake-gpios = <&tlmm 154 GPIO_ACTIVE_LOW>; +}; + &pm8550_gpios { kypd_vol_up_n: kypd-vol-up-n-state { pins = "gpio6"; diff --git a/arch/arm64/boot/dts/qcom/x1-dell-thena.dtsi b/arch/arm64/boot/dts/qcom/x1-dell-thena.dtsi index bf04a12b16bc..217ca8c7d81d 100644 --- a/arch/arm64/boot/dts/qcom/x1-dell-thena.dtsi +++ b/arch/arm64/boot/dts/qcom/x1-dell-thena.dtsi @@ -1081,9 +1081,6 @@ &mdss_dp3_phy { }; &pcie4 { - perst-gpios = <&tlmm 146 GPIO_ACTIVE_LOW>; - wake-gpios = <&tlmm 148 GPIO_ACTIVE_LOW>; - pinctrl-0 = <&pcie4_default>; pinctrl-names = "default"; @@ -1098,6 +1095,9 @@ &pcie4_phy { }; &pcie4_port0 { + reset-gpios = <&tlmm 146 GPIO_ACTIVE_LOW>; + wake-gpios = <&tlmm 148 GPIO_ACTIVE_LOW>; + wifi@0 { compatible = "pci17cb,1107"; reg = <0x10000 0x0 0x0 0x0 0x0>; @@ -1115,9 +1115,6 @@ wifi@0 { }; &pcie6a { - perst-gpios = <&tlmm 152 GPIO_ACTIVE_LOW>; - wake-gpios = <&tlmm 154 GPIO_ACTIVE_LOW>; - vddpe-3v3-supply = <&vreg_nvme>; pinctrl-0 = <&pcie6a_default>; @@ -1126,6 +1123,11 @@ &pcie6a { status = "okay"; }; +&pcie6a_port0 { + reset-gpios = <&tlmm 152 GPIO_ACTIVE_LOW>; + wake-gpios = <&tlmm 154 GPIO_ACTIVE_LOW>; +}; + &pcie6a_phy { vdda-phy-supply = <&vreg_l1d_0p8>; vdda-pll-supply = <&vreg_l2j_1p2>; diff --git a/arch/arm64/boot/dts/qcom/x1-hp-omnibook-x14.dtsi b/arch/arm64/boot/dts/qcom/x1-hp-omnibook-x14.dtsi index a4075434162a..41063948c583 100644 --- a/arch/arm64/boot/dts/qcom/x1-hp-omnibook-x14.dtsi +++ b/arch/arm64/boot/dts/qcom/x1-hp-omnibook-x14.dtsi @@ -1065,9 +1065,6 @@ &mdss_dp3_phy { }; &pcie4 { - perst-gpios = <&tlmm 146 GPIO_ACTIVE_LOW>; - wake-gpios = <&tlmm 148 GPIO_ACTIVE_LOW>; - pinctrl-0 = <&pcie4_default>; pinctrl-names = "default"; @@ -1082,6 +1079,9 @@ &pcie4_phy { }; &pcie4_port0 { + reset-gpios = <&tlmm 146 GPIO_ACTIVE_LOW>; + wake-gpios = <&tlmm 148 GPIO_ACTIVE_LOW>; + wifi@0 { compatible = "pci17cb,1107"; reg = <0x10000 0x0 0x0 0x0 0x0>; @@ -1099,9 +1099,6 @@ wifi@0 { }; &pcie6a { - perst-gpios = <&tlmm 152 GPIO_ACTIVE_LOW>; - wake-gpios = <&tlmm 154 GPIO_ACTIVE_LOW>; - vddpe-3v3-supply = <&vreg_nvme>; pinctrl-0 = <&pcie6a_default>; @@ -1110,6 +1107,11 @@ &pcie6a { status = "okay"; }; +&pcie6a_port0 { + reset-gpios = <&tlmm 152 GPIO_ACTIVE_LOW>; + wake-gpios = <&tlmm 154 GPIO_ACTIVE_LOW>; +}; + &pcie6a_phy { vdda-phy-supply = <&vreg_l1d_0p8>; vdda-pll-supply = <&vreg_l2j_1p2>; diff --git a/arch/arm64/boot/dts/qcom/x1-microsoft-denali.dtsi b/arch/arm64/boot/dts/qcom/x1-microsoft-denali.dtsi index d77be02848b5..ba6b7b5a9191 100644 --- a/arch/arm64/boot/dts/qcom/x1-microsoft-denali.dtsi +++ b/arch/arm64/boot/dts/qcom/x1-microsoft-denali.dtsi @@ -964,9 +964,6 @@ wifi@0 { }; &pcie6a { - perst-gpios = <&tlmm 152 GPIO_ACTIVE_LOW>; - wake-gpios = <&tlmm 154 GPIO_ACTIVE_LOW>; - vddpe-3v3-supply = <&vreg_nvme>; pinctrl-0 = <&pcie6a_default>; @@ -982,6 +979,11 @@ &pcie6a_phy { status = "okay"; }; +&pcie6a_port0 { + reset-gpios = <&tlmm 152 GPIO_ACTIVE_LOW>; + wake-gpios = <&tlmm 154 GPIO_ACTIVE_LOW>; +}; + &pm8550_gpios { rtmr0_default: rtmr0-reset-n-active-state { pins = "gpio10"; diff --git a/arch/arm64/boot/dts/qcom/x1e80100-lenovo-yoga-slim7x.dts b/arch/arm64/boot/dts/qcom/x1e80100-lenovo-yoga-slim7x.dts index d6472e5a3f9f..d7938d349205 100644 --- a/arch/arm64/boot/dts/qcom/x1e80100-lenovo-yoga-slim7x.dts +++ b/arch/arm64/boot/dts/qcom/x1e80100-lenovo-yoga-slim7x.dts @@ -1126,9 +1126,6 @@ &mdss_dp3_phy { }; &pcie4 { - perst-gpios = <&tlmm 146 GPIO_ACTIVE_LOW>; - wake-gpios = <&tlmm 148 GPIO_ACTIVE_LOW>; - pinctrl-0 = <&pcie4_default>; pinctrl-names = "default"; @@ -1143,6 +1140,9 @@ &pcie4_phy { }; &pcie4_port0 { + reset-gpios = <&tlmm 146 GPIO_ACTIVE_LOW>; + wake-gpios = <&tlmm 148 GPIO_ACTIVE_LOW>; + wifi@0 { compatible = "pci17cb,1107"; reg = <0x10000 0x0 0x0 0x0 0x0>; diff --git a/arch/arm64/boot/dts/qcom/x1e80100-medion-sprchrgd-14-s1.dts b/arch/arm64/boot/dts/qcom/x1e80100-medion-sprchrgd-14-s1.dts index 20a33e6f27ee..eec5f2f1f75d 100644 --- a/arch/arm64/boot/dts/qcom/x1e80100-medion-sprchrgd-14-s1.dts +++ b/arch/arm64/boot/dts/qcom/x1e80100-medion-sprchrgd-14-s1.dts @@ -1033,9 +1033,6 @@ &mdss_dp3_phy { }; &pcie4 { - perst-gpios = <&tlmm 146 GPIO_ACTIVE_LOW>; - wake-gpios = <&tlmm 148 GPIO_ACTIVE_LOW>; - pinctrl-0 = <&pcie4_default>; pinctrl-names = "default"; @@ -1050,6 +1047,9 @@ &pcie4_phy { }; &pcie4_port0 { + reset-gpios = <&tlmm 146 GPIO_ACTIVE_LOW>; + wake-gpios = <&tlmm 148 GPIO_ACTIVE_LOW>; + wifi@0 { compatible = "pci17cb,1107"; reg = <0x10000 0x0 0x0 0x0 0x0>; @@ -1067,10 +1067,6 @@ wifi@0 { }; &pcie6a { - perst-gpios = <&tlmm 152 GPIO_ACTIVE_LOW>; - - wake-gpios = <&tlmm 154 GPIO_ACTIVE_LOW>; - vddpe-3v3-supply = <&vreg_nvme>; pinctrl-0 = <&pcie6a_default>; @@ -1086,6 +1082,11 @@ &pcie6a_phy { status = "okay"; }; +&pcie6a_port0 { + reset-gpios = <&tlmm 152 GPIO_ACTIVE_LOW>; + wake-gpios = <&tlmm 154 GPIO_ACTIVE_LOW>; +}; + &pm8550_gpios { rtmr0_default: rtmr0-reset-n-active-state { pins = "gpio10"; diff --git a/arch/arm64/boot/dts/qcom/x1p42100-lenovo-thinkbook-16.dts b/arch/arm64/boot/dts/qcom/x1p42100-lenovo-thinkbook-16.dts index 1e5eb8c5dc98..06747b54a38e 100644 --- a/arch/arm64/boot/dts/qcom/x1p42100-lenovo-thinkbook-16.dts +++ b/arch/arm64/boot/dts/qcom/x1p42100-lenovo-thinkbook-16.dts @@ -1131,9 +1131,6 @@ &mdss_dp3_phy { }; &pcie4 { - perst-gpios = <&tlmm 146 GPIO_ACTIVE_LOW>; - wake-gpios = <&tlmm 148 GPIO_ACTIVE_LOW>; - pinctrl-0 = <&pcie4_default>; pinctrl-names = "default"; @@ -1148,6 +1145,9 @@ &pcie4_phy { }; &pcie4_port0 { + reset-gpios = <&tlmm 146 GPIO_ACTIVE_LOW>; + wake-gpios = <&tlmm 148 GPIO_ACTIVE_LOW>; + wifi@0 { compatible = "pci17cb,1107"; reg = <0x10000 0x0 0x0 0x0 0x0>; @@ -1165,9 +1165,6 @@ wifi@0 { }; &pcie6a { - perst-gpios = <&tlmm 152 GPIO_ACTIVE_LOW>; - wake-gpios = <&tlmm 154 GPIO_ACTIVE_LOW>; - vddpe-3v3-supply = <&vreg_nvme>; pinctrl-0 = <&pcie6a_default>; @@ -1183,6 +1180,11 @@ &pcie6a_phy { status = "okay"; }; +&pcie6a_port0 { + reset-gpios = <&tlmm 152 GPIO_ACTIVE_LOW>; + wake-gpios = <&tlmm 154 GPIO_ACTIVE_LOW>; +}; + &pm8550_pwm { status = "okay"; }; From 1635c2acdde86c4f555b627aec873c8677c421ed Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Fri, 27 Mar 2026 09:18:21 +1100 Subject: [PATCH 063/248] cachefiles: fix incorrect dentry refcount in cachefiles_cull() The patch mentioned below changed cachefiles_bury_object() to expect 2 references to the 'rep' dentry. Three of the callers were changed to use start_removing_dentry() which takes an extra reference so in those cases the call gets the expected references. However there is another call to cachefiles_bury_object() in cachefiles_cull() which did not need to be changed to use start_removing_dentry() and so was not properly considered. It still passed the dentry with just one reference so the net result is that a reference is lost. To meet the expectations of cachefiles_bury_object(), cachefiles_cull() must take an extra reference before the call. It will be dropped by cachefiles_bury_object(). Reported-by: Marc Dionne Fixes: 7bb1eb45e43c ("VFS: introduce start_removing_dentry()") Signed-off-by: NeilBrown Link: https://patch.msgid.link/177456350181.1851489.16359967086642190170@noble.neil.brown.name Acked-by: Paulo Alcantara (Red Hat) Signed-off-by: Christian Brauner --- fs/cachefiles/namei.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/fs/cachefiles/namei.c b/fs/cachefiles/namei.c index e5ec90dccc27..eb9eb7683e3c 100644 --- a/fs/cachefiles/namei.c +++ b/fs/cachefiles/namei.c @@ -810,6 +810,11 @@ int cachefiles_cull(struct cachefiles_cache *cache, struct dentry *dir, if (ret < 0) goto error_unlock; + /* + * cachefiles_bury_object() expects 2 references to 'victim', + * and drops one. + */ + dget(victim); ret = cachefiles_bury_object(cache, NULL, dir, victim, FSCACHE_OBJECT_WAS_CULLED); dput(victim); From dff34ef879c5e73298443956a8b391311ba78d57 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Fri, 27 Mar 2026 11:52:05 +0100 Subject: [PATCH 064/248] mmc: vub300: fix NULL-deref on disconnect Make sure to deregister the controller before dropping the reference to the driver data on disconnect to avoid NULL-pointer dereferences or use-after-free. Fixes: 88095e7b473a ("mmc: Add new VUB300 USB-to-SD/SDIO/MMC driver") Cc: stable@vger.kernel.org # 3.0+ Signed-off-by: Johan Hovold Signed-off-by: Ulf Hansson --- drivers/mmc/host/vub300.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/mmc/host/vub300.c b/drivers/mmc/host/vub300.c index ff49d0770506..f173c7cf4e1a 100644 --- a/drivers/mmc/host/vub300.c +++ b/drivers/mmc/host/vub300.c @@ -2365,8 +2365,8 @@ static void vub300_disconnect(struct usb_interface *interface) usb_set_intfdata(interface, NULL); /* prevent more I/O from starting */ vub300->interface = NULL; - kref_put(&vub300->kref, vub300_delete); mmc_remove_host(mmc); + kref_put(&vub300->kref, vub300_delete); pr_info("USB vub300 remote SDIO host controller[%d]" " now disconnected", ifnum); return; From 8f4d20a710225ec7a565f6a0459862d3b1f32330 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Fri, 27 Mar 2026 11:52:06 +0100 Subject: [PATCH 065/248] mmc: vub300: fix use-after-free on disconnect The vub300 driver maintains an explicit reference count for the controller and its driver data and the last reference can in theory be dropped after the driver has been unbound. This specifically means that the controller allocation must not be device managed as that can lead to use-after-free. Note that the lifetime is currently also incorrectly tied the parent USB device rather than interface, which can lead to memory leaks if the driver is unbound without its device being physically disconnected (e.g. on probe deferral). Fix both issues by reverting to non-managed allocation of the controller. Fixes: dcfdd698dc52 ("mmc: vub300: Use devm_mmc_alloc_host() helper") Cc: stable@vger.kernel.org # 6.17+ Cc: Binbin Zhou Signed-off-by: Johan Hovold Signed-off-by: Ulf Hansson --- drivers/mmc/host/vub300.c | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/drivers/mmc/host/vub300.c b/drivers/mmc/host/vub300.c index f173c7cf4e1a..3c9df27f9fa7 100644 --- a/drivers/mmc/host/vub300.c +++ b/drivers/mmc/host/vub300.c @@ -369,11 +369,14 @@ struct vub300_mmc_host { static void vub300_delete(struct kref *kref) { /* kref callback - softirq */ struct vub300_mmc_host *vub300 = kref_to_vub300_mmc_host(kref); + struct mmc_host *mmc = vub300->mmc; + usb_free_urb(vub300->command_out_urb); vub300->command_out_urb = NULL; usb_free_urb(vub300->command_res_urb); vub300->command_res_urb = NULL; usb_put_dev(vub300->udev); + mmc_free_host(mmc); /* * and hence also frees vub300 * which is contained at the end of struct mmc @@ -2112,7 +2115,7 @@ static int vub300_probe(struct usb_interface *interface, goto error1; } /* this also allocates memory for our VUB300 mmc host device */ - mmc = devm_mmc_alloc_host(&udev->dev, sizeof(*vub300)); + mmc = mmc_alloc_host(sizeof(*vub300), &udev->dev); if (!mmc) { retval = -ENOMEM; dev_err(&udev->dev, "not enough memory for the mmc_host\n"); @@ -2269,7 +2272,7 @@ static int vub300_probe(struct usb_interface *interface, dev_err(&vub300->udev->dev, "Could not find two sets of bulk-in/out endpoint pairs\n"); retval = -EINVAL; - goto error4; + goto err_free_host; } retval = usb_control_msg(vub300->udev, usb_rcvctrlpipe(vub300->udev, 0), @@ -2278,14 +2281,14 @@ static int vub300_probe(struct usb_interface *interface, 0x0000, 0x0000, &vub300->hc_info, sizeof(vub300->hc_info), 1000); if (retval < 0) - goto error4; + goto err_free_host; retval = usb_control_msg(vub300->udev, usb_sndctrlpipe(vub300->udev, 0), SET_ROM_WAIT_STATES, USB_DIR_OUT | USB_TYPE_VENDOR | USB_RECIP_DEVICE, firmware_rom_wait_states, 0x0000, NULL, 0, 1000); if (retval < 0) - goto error4; + goto err_free_host; dev_info(&vub300->udev->dev, "operating_mode = %s %s %d MHz %s %d byte USB packets\n", (mmc->caps & MMC_CAP_SDIO_IRQ) ? "IRQs" : "POLL", @@ -2300,7 +2303,7 @@ static int vub300_probe(struct usb_interface *interface, 0x0000, 0x0000, &vub300->system_port_status, sizeof(vub300->system_port_status), 1000); if (retval < 0) { - goto error4; + goto err_free_host; } else if (sizeof(vub300->system_port_status) == retval) { vub300->card_present = (0x0001 & vub300->system_port_status.port_flags) ? 1 : 0; @@ -2308,7 +2311,7 @@ static int vub300_probe(struct usb_interface *interface, (0x0010 & vub300->system_port_status.port_flags) ? 1 : 0; } else { retval = -EINVAL; - goto error4; + goto err_free_host; } usb_set_intfdata(interface, vub300); INIT_DELAYED_WORK(&vub300->pollwork, vub300_pollwork_thread); @@ -2338,6 +2341,8 @@ static int vub300_probe(struct usb_interface *interface, return 0; error6: timer_delete_sync(&vub300->inactivity_timer); +err_free_host: + mmc_free_host(mmc); /* * and hence also frees vub300 * which is contained at the end of struct mmc From 82cbae59263e3941a8bb79f0b25e0b0f085132d4 Mon Sep 17 00:00:00 2001 From: Denis Benato Date: Sun, 29 Mar 2026 14:46:57 +0200 Subject: [PATCH 066/248] platform/x86: asus-armoury: add support for GV302XU MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add TDP data for laptop model GV302XU. Signed-off-by: Denis Benato Link: https://patch.msgid.link/20260329124659.3967495-2-denis.benato@linux.dev Reviewed-by: Ilpo Järvinen Signed-off-by: Ilpo Järvinen --- drivers/platform/x86/asus-armoury.h | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/drivers/platform/x86/asus-armoury.h b/drivers/platform/x86/asus-armoury.h index 569743746347..a53204c24d43 100644 --- a/drivers/platform/x86/asus-armoury.h +++ b/drivers/platform/x86/asus-armoury.h @@ -1376,6 +1376,33 @@ static const struct dmi_system_id power_limits[] = { }, }, }, + { + .matches = { + DMI_MATCH(DMI_BOARD_NAME, "GV302XU"), + }, + .driver_data = &(struct power_data) { + .ac_data = &(struct power_limits) { + .ppt_pl1_spl_min = 15, + .ppt_pl1_spl_max = 55, + .ppt_pl2_sppt_min = 25, + .ppt_pl2_sppt_max = 60, + .ppt_pl3_fppt_min = 35, + .ppt_pl3_fppt_max = 65, + .nv_temp_target_min = 75, + .nv_temp_target_max = 87, + }, + .dc_data = &(struct power_limits) { + .ppt_pl1_spl_min = 15, + .ppt_pl1_spl_max = 35, + .ppt_pl2_sppt_min = 25, + .ppt_pl2_sppt_max = 35, + .ppt_pl3_fppt_min = 35, + .ppt_pl3_fppt_max = 65, + .nv_temp_target_min = 75, + .nv_temp_target_max = 87, + }, + }, + }, { .matches = { DMI_MATCH(DMI_BOARD_NAME, "GV302XV"), From b4f04a6038fa97f7ceea33e99456b9838c79dde7 Mon Sep 17 00:00:00 2001 From: Denis Benato Date: Sun, 29 Mar 2026 14:46:58 +0200 Subject: [PATCH 067/248] platform/x86: asus-armoury: add support for FA607NU MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add TDP data for laptop model FA607NU. Signed-off-by: Denis Benato Link: https://patch.msgid.link/20260329124659.3967495-3-denis.benato@linux.dev Reviewed-by: Ilpo Järvinen Signed-off-by: Ilpo Järvinen --- drivers/platform/x86/asus-armoury.h | 31 +++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/drivers/platform/x86/asus-armoury.h b/drivers/platform/x86/asus-armoury.h index a53204c24d43..ed02574139fa 100644 --- a/drivers/platform/x86/asus-armoury.h +++ b/drivers/platform/x86/asus-armoury.h @@ -592,6 +592,37 @@ static const struct dmi_system_id power_limits[] = { }, }, }, + { + .matches = { + DMI_MATCH(DMI_BOARD_NAME, "FA607NU"), + }, + .driver_data = &(struct power_data) { + .ac_data = &(struct power_limits) { + .ppt_pl1_spl_min = 15, + .ppt_pl1_spl_max = 80, + .ppt_pl2_sppt_min = 35, + .ppt_pl2_sppt_max = 80, + .ppt_pl3_fppt_min = 35, + .ppt_pl3_fppt_max = 80, + .nv_dynamic_boost_min = 5, + .nv_dynamic_boost_max = 25, + .nv_temp_target_min = 75, + .nv_temp_target_max = 87, + }, + .dc_data = &(struct power_limits) { + .ppt_pl1_spl_min = 25, + .ppt_pl1_spl_def = 45, + .ppt_pl1_spl_max = 65, + .ppt_pl2_sppt_min = 25, + .ppt_pl2_sppt_def = 54, + .ppt_pl2_sppt_max = 65, + .ppt_pl3_fppt_min = 25, + .ppt_pl3_fppt_max = 65, + .nv_temp_target_min = 75, + .nv_temp_target_max = 87, + }, + }, + }, { .matches = { DMI_MATCH(DMI_BOARD_NAME, "FA607P"), From 5d486669b3db11ccc2d1b9f4e42c11c2766f35ba Mon Sep 17 00:00:00 2001 From: Denis Benato Date: Sun, 29 Mar 2026 14:46:59 +0200 Subject: [PATCH 068/248] platform/x86: asus-armoury: add support for GU605MU MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add TDP data for laptop model GU605MU. Signed-off-by: Denis Benato Link: https://patch.msgid.link/20260329124659.3967495-4-denis.benato@linux.dev Reviewed-by: Ilpo Järvinen Signed-off-by: Ilpo Järvinen --- drivers/platform/x86/asus-armoury.h | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/drivers/platform/x86/asus-armoury.h b/drivers/platform/x86/asus-armoury.h index ed02574139fa..c30d2b451e01 100644 --- a/drivers/platform/x86/asus-armoury.h +++ b/drivers/platform/x86/asus-armoury.h @@ -1340,6 +1340,34 @@ static const struct dmi_system_id power_limits[] = { .requires_fan_curve = true, }, }, + { + .matches = { + DMI_MATCH(DMI_BOARD_NAME, "GU605MU"), + }, + .driver_data = &(struct power_data) { + .ac_data = &(struct power_limits) { + .ppt_pl1_spl_min = 28, + .ppt_pl1_spl_max = 90, + .ppt_pl2_sppt_min = 28, + .ppt_pl2_sppt_max = 135, + .nv_dynamic_boost_min = 5, + .nv_dynamic_boost_max = 20, + .nv_temp_target_min = 75, + .nv_temp_target_max = 87, + .nv_tgp_min = 55, + .nv_tgp_max = 85, + }, + .dc_data = &(struct power_limits) { + .ppt_pl1_spl_min = 25, + .ppt_pl1_spl_max = 35, + .ppt_pl2_sppt_min = 38, + .ppt_pl2_sppt_max = 53, + .nv_temp_target_min = 75, + .nv_temp_target_max = 87, + }, + .requires_fan_curve = true, + }, + }, { .matches = { DMI_MATCH(DMI_BOARD_NAME, "GU605M"), From 88c4bd90725557796c15878b7cb70066e9e6b5ab Mon Sep 17 00:00:00 2001 From: Michal Wilczynski Date: Thu, 3 Apr 2025 15:10:51 +0200 Subject: [PATCH 069/248] firmware: thead: Fix buffer overflow and use standard endian macros Addresses two issues in the TH1520 AON firmware protocol driver: 1. Fix a potential buffer overflow where the code used unsafe pointer arithmetic to access the 'mode' field through the 'resource' pointer with an offset. This was flagged by Smatch static checker as: "buffer overflow 'data' 2 <= 3" 2. Replace custom RPC_SET_BE* and RPC_GET_BE* macros with standard kernel endianness conversion macros (cpu_to_be16, etc.) for better portability and maintainability. The functionality was re-tested with the GPU power-up sequence, confirming the GPU powers up correctly and the driver probes successfully. [ 12.702370] powervr ffef400000.gpu: [drm] loaded firmware powervr/rogue_36.52.104.182_v1.fw [ 12.711043] powervr ffef400000.gpu: [drm] FW version v1.0 (build 6645434 OS) [ 12.719787] [drm] Initialized powervr 1.0.0 for ffef400000.gpu on minor 0 Fixes: e4b3cbd840e5 ("firmware: thead: Add AON firmware protocol driver") Reported-by: Dan Carpenter Closes: https://lore.kernel.org/all/17a0ccce-060b-4b9d-a3c4-8d5d5823b1c9@stanley.mountain/ Signed-off-by: Michal Wilczynski Reviewed-by: Dan Carpenter Acked-by: Drew Fustini Cc: stable@vger.kernel.org Signed-off-by: Ulf Hansson --- drivers/firmware/thead,th1520-aon.c | 7 +- .../linux/firmware/thead/thead,th1520-aon.h | 74 ------------------- 2 files changed, 3 insertions(+), 78 deletions(-) diff --git a/drivers/firmware/thead,th1520-aon.c b/drivers/firmware/thead,th1520-aon.c index a35fd5e2a07f..d7f19b5b5f46 100644 --- a/drivers/firmware/thead,th1520-aon.c +++ b/drivers/firmware/thead,th1520-aon.c @@ -170,10 +170,9 @@ int th1520_aon_power_update(struct th1520_aon_chan *aon_chan, u16 rsrc, hdr->func = TH1520_AON_PM_FUNC_SET_RESOURCE_POWER_MODE; hdr->size = TH1520_AON_RPC_MSG_NUM; - RPC_SET_BE16(&msg.resource, 0, rsrc); - RPC_SET_BE16(&msg.resource, 2, - (power_on ? TH1520_AON_PM_PW_MODE_ON : - TH1520_AON_PM_PW_MODE_OFF)); + msg.resource = cpu_to_be16(rsrc); + msg.mode = cpu_to_be16(power_on ? TH1520_AON_PM_PW_MODE_ON : + TH1520_AON_PM_PW_MODE_OFF); ret = th1520_aon_call_rpc(aon_chan, &msg); if (ret) diff --git a/include/linux/firmware/thead/thead,th1520-aon.h b/include/linux/firmware/thead/thead,th1520-aon.h index dae132b66873..d81f5f6f5b90 100644 --- a/include/linux/firmware/thead/thead,th1520-aon.h +++ b/include/linux/firmware/thead/thead,th1520-aon.h @@ -97,80 +97,6 @@ struct th1520_aon_rpc_ack_common { #define RPC_GET_SVC_FLAG_ACK_TYPE(MESG) (((MESG)->svc & 0x40) >> 6) #define RPC_SET_SVC_FLAG_ACK_TYPE(MESG, ACK) ((MESG)->svc |= (ACK) << 6) -#define RPC_SET_BE64(MESG, OFFSET, SET_DATA) \ - do { \ - u8 *data = (u8 *)(MESG); \ - u64 _offset = (OFFSET); \ - u64 _set_data = (SET_DATA); \ - data[_offset + 7] = _set_data & 0xFF; \ - data[_offset + 6] = (_set_data & 0xFF00) >> 8; \ - data[_offset + 5] = (_set_data & 0xFF0000) >> 16; \ - data[_offset + 4] = (_set_data & 0xFF000000) >> 24; \ - data[_offset + 3] = (_set_data & 0xFF00000000) >> 32; \ - data[_offset + 2] = (_set_data & 0xFF0000000000) >> 40; \ - data[_offset + 1] = (_set_data & 0xFF000000000000) >> 48; \ - data[_offset + 0] = (_set_data & 0xFF00000000000000) >> 56; \ - } while (0) - -#define RPC_SET_BE32(MESG, OFFSET, SET_DATA) \ - do { \ - u8 *data = (u8 *)(MESG); \ - u64 _offset = (OFFSET); \ - u64 _set_data = (SET_DATA); \ - data[_offset + 3] = (_set_data) & 0xFF; \ - data[_offset + 2] = (_set_data & 0xFF00) >> 8; \ - data[_offset + 1] = (_set_data & 0xFF0000) >> 16; \ - data[_offset + 0] = (_set_data & 0xFF000000) >> 24; \ - } while (0) - -#define RPC_SET_BE16(MESG, OFFSET, SET_DATA) \ - do { \ - u8 *data = (u8 *)(MESG); \ - u64 _offset = (OFFSET); \ - u64 _set_data = (SET_DATA); \ - data[_offset + 1] = (_set_data) & 0xFF; \ - data[_offset + 0] = (_set_data & 0xFF00) >> 8; \ - } while (0) - -#define RPC_SET_U8(MESG, OFFSET, SET_DATA) \ - do { \ - u8 *data = (u8 *)(MESG); \ - data[OFFSET] = (SET_DATA) & 0xFF; \ - } while (0) - -#define RPC_GET_BE64(MESG, OFFSET, PTR) \ - do { \ - u8 *data = (u8 *)(MESG); \ - u64 _offset = (OFFSET); \ - *(u32 *)(PTR) = \ - (data[_offset + 7] | data[_offset + 6] << 8 | \ - data[_offset + 5] << 16 | data[_offset + 4] << 24 | \ - data[_offset + 3] << 32 | data[_offset + 2] << 40 | \ - data[_offset + 1] << 48 | data[_offset + 0] << 56); \ - } while (0) - -#define RPC_GET_BE32(MESG, OFFSET, PTR) \ - do { \ - u8 *data = (u8 *)(MESG); \ - u64 _offset = (OFFSET); \ - *(u32 *)(PTR) = \ - (data[_offset + 3] | data[_offset + 2] << 8 | \ - data[_offset + 1] << 16 | data[_offset + 0] << 24); \ - } while (0) - -#define RPC_GET_BE16(MESG, OFFSET, PTR) \ - do { \ - u8 *data = (u8 *)(MESG); \ - u64 _offset = (OFFSET); \ - *(u16 *)(PTR) = (data[_offset + 1] | data[_offset + 0] << 8); \ - } while (0) - -#define RPC_GET_U8(MESG, OFFSET, PTR) \ - do { \ - u8 *data = (u8 *)(MESG); \ - *(u8 *)(PTR) = (data[OFFSET]); \ - } while (0) - /* * Defines for SC PM Power Mode */ From e91d5f94acf68618ea3ad9c92ac28614e791ae7d Mon Sep 17 00:00:00 2001 From: Jacky Bai Date: Fri, 20 Mar 2026 16:43:46 +0800 Subject: [PATCH 070/248] pmdomain: imx8mp-blk-ctrl: Keep the NOC_HDCP clock enabled Keep the NOC_HDCP clock always enabled to fix the potential hang caused by the NoC ADB400 port power down handshake. Fixes: 77b0ddb42add ("soc: imx: add i.MX8MP HDMI blk ctrl HDCP/HRV_MWR") Signed-off-by: Jacky Bai Cc: stable@vger.kernel.org Signed-off-by: Ulf Hansson --- drivers/pmdomain/imx/imx8mp-blk-ctrl.c | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/drivers/pmdomain/imx/imx8mp-blk-ctrl.c b/drivers/pmdomain/imx/imx8mp-blk-ctrl.c index 8fc79f9723f0..3f5b9499d30a 100644 --- a/drivers/pmdomain/imx/imx8mp-blk-ctrl.c +++ b/drivers/pmdomain/imx/imx8mp-blk-ctrl.c @@ -352,9 +352,6 @@ static void imx8mp_hdmi_blk_ctrl_power_on(struct imx8mp_blk_ctrl *bc, regmap_set_bits(bc->regmap, HDMI_RTX_RESET_CTL0, BIT(12)); regmap_clear_bits(bc->regmap, HDMI_TX_CONTROL0, BIT(3)); break; - case IMX8MP_HDMIBLK_PD_HDCP: - regmap_set_bits(bc->regmap, HDMI_RTX_CLK_CTL0, BIT(11)); - break; case IMX8MP_HDMIBLK_PD_HRV: regmap_set_bits(bc->regmap, HDMI_RTX_CLK_CTL1, BIT(3) | BIT(4) | BIT(5)); regmap_set_bits(bc->regmap, HDMI_RTX_RESET_CTL0, BIT(15)); @@ -408,9 +405,6 @@ static void imx8mp_hdmi_blk_ctrl_power_off(struct imx8mp_blk_ctrl *bc, regmap_clear_bits(bc->regmap, HDMI_RTX_CLK_CTL0, BIT(7)); regmap_clear_bits(bc->regmap, HDMI_RTX_CLK_CTL1, BIT(22) | BIT(24)); break; - case IMX8MP_HDMIBLK_PD_HDCP: - regmap_clear_bits(bc->regmap, HDMI_RTX_CLK_CTL0, BIT(11)); - break; case IMX8MP_HDMIBLK_PD_HRV: regmap_clear_bits(bc->regmap, HDMI_RTX_RESET_CTL0, BIT(15)); regmap_clear_bits(bc->regmap, HDMI_RTX_CLK_CTL1, BIT(3) | BIT(4) | BIT(5)); @@ -439,7 +433,7 @@ static int imx8mp_hdmi_power_notifier(struct notifier_block *nb, regmap_write(bc->regmap, HDMI_RTX_CLK_CTL0, 0x0); regmap_write(bc->regmap, HDMI_RTX_CLK_CTL1, 0x0); regmap_set_bits(bc->regmap, HDMI_RTX_CLK_CTL0, - BIT(0) | BIT(1) | BIT(10)); + BIT(0) | BIT(1) | BIT(10) | BIT(11)); regmap_set_bits(bc->regmap, HDMI_RTX_RESET_CTL0, BIT(0)); /* From 703ccb63ae9f7444d6ff876d024e17f628103c69 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Tue, 31 Mar 2026 18:07:39 -0700 Subject: [PATCH 071/248] workqueue: Add pool_workqueue to pending_pwqs list when unplugging multiple inactive works MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In unplug_oldest_pwq(), the first inactive work item on the pool_workqueue is activated correctly. However, if multiple inactive works exist on the same pool_workqueue, subsequent works fail to activate because wq_node_nr_active.pending_pwqs is empty — the list insertion is skipped when the pool_workqueue is plugged. Fix this by checking for additional inactive works in unplug_oldest_pwq() and updating wq_node_nr_active.pending_pwqs accordingly. Fixes: 4c065dbce1e8 ("workqueue: Enable unbound cpumask update on ordered workqueues") Cc: stable@vger.kernel.org Cc: Carlos Santa Cc: Ryan Neph Cc: Lai Jiangshan Cc: Waiman Long Cc: linux-kernel@vger.kernel.org Signed-off-by: Matthew Brost Signed-off-by: Tejun Heo Acked-by: Waiman Long --- kernel/workqueue.c | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/kernel/workqueue.c b/kernel/workqueue.c index eda756556341..c6ea96d5b716 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -1849,8 +1849,20 @@ static void unplug_oldest_pwq(struct workqueue_struct *wq) raw_spin_lock_irq(&pwq->pool->lock); if (pwq->plugged) { pwq->plugged = false; - if (pwq_activate_first_inactive(pwq, true)) + if (pwq_activate_first_inactive(pwq, true)) { + /* + * While plugged, queueing skips activation which + * includes bumping the nr_active count and adding the + * pwq to nna->pending_pwqs if the count can't be + * obtained. We need to restore both for the pwq being + * unplugged. The first call activates the first + * inactive work item and the second, if there are more + * inactive, puts the pwq on pending_pwqs. + */ + pwq_activate_first_inactive(pwq, false); + kick_pool(pwq->pool); + } } raw_spin_unlock_irq(&pwq->pool->lock); } From 39ed7d89b973329cc5c764b65ba6302b17b1907e Mon Sep 17 00:00:00 2001 From: Anthony Pighin Date: Tue, 31 Mar 2026 14:26:32 -0400 Subject: [PATCH 072/248] i2c: imx: zero-initialize dma_slave_config for eDMA commit 66d88e16f204 ("dmaengine: fsl-edma: read/write multiple registers in cyclic transactions") causes fsl_edma_fill_tcd() to read dst_port_window_size and src_port_window_size when building transfer control descriptors. Initialize the structure so unset fields are explicitly zero. Fixes: 66d88e16f204 ("dmaengine: fsl-edma: read/write multiple registers in cyclic transactions") Signed-off-by: Anthony Pighin Cc: # v6.14+ Signed-off-by: Andi Shyti Link: https://lore.kernel.org/r/20260331182632.888110-1-anthony.pighin@nokia.com --- drivers/i2c/busses/i2c-imx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/i2c/busses/i2c-imx.c b/drivers/i2c/busses/i2c-imx.c index 452d120a210b..a208fefd3c3b 100644 --- a/drivers/i2c/busses/i2c-imx.c +++ b/drivers/i2c/busses/i2c-imx.c @@ -401,7 +401,7 @@ static void i2c_imx_reset_regs(struct imx_i2c_struct *i2c_imx) static int i2c_imx_dma_request(struct imx_i2c_struct *i2c_imx, dma_addr_t phy_addr) { struct imx_i2c_dma *dma; - struct dma_slave_config dma_sconfig; + struct dma_slave_config dma_sconfig = {}; struct device *dev = i2c_imx->adapter.dev.parent; int ret; From aaefbdde9abdc43699e110679c0e10972a5e1c59 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ma=C3=ADra=20Canal?= Date: Mon, 30 Mar 2026 14:51:43 -0300 Subject: [PATCH 073/248] drm/vc4: Release runtime PM reference after binding V3D MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The vc4_v3d_bind() function acquires a runtime PM reference via pm_runtime_resume_and_get() to access V3D registers during setup. However, this reference is never released after a successful bind. This prevents the device from ever runtime suspending, since the reference count never reaches zero. Release the runtime PM reference by adding pm_runtime_put_autosuspend() after autosuspend is configured, allowing the device to runtime suspend after the delay. Fixes: 266cff37d7fc ("drm/vc4: v3d: Rework the runtime_pm setup") Reviewed-by: Melissa Wen Link: https://patch.msgid.link/20260330-vc4-misc-fixes-v1-1-92defc940a29@igalia.com Signed-off-by: Maíra Canal --- drivers/gpu/drm/vc4/vc4_v3d.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/vc4/vc4_v3d.c b/drivers/gpu/drm/vc4/vc4_v3d.c index 3ffe09bc89d2..d31b906cb8e7 100644 --- a/drivers/gpu/drm/vc4/vc4_v3d.c +++ b/drivers/gpu/drm/vc4/vc4_v3d.c @@ -481,6 +481,7 @@ static int vc4_v3d_bind(struct device *dev, struct device *master, void *data) pm_runtime_use_autosuspend(dev); pm_runtime_set_autosuspend_delay(dev, 40); /* a little over 2 frames. */ + pm_runtime_put_autosuspend(dev); return 0; From f4dfd6847b3e5d24e336bca6057485116d17aea4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ma=C3=ADra=20Canal?= Date: Mon, 30 Mar 2026 14:51:44 -0300 Subject: [PATCH 074/248] drm/vc4: Fix memory leak of BO array in hang state MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The hang state's BO array is allocated separately with kzalloc() in vc4_save_hang_state() but never freed in vc4_free_hang_state(). Add the missing kfree() for the BO array before freeing the hang state struct. Fixes: 214613656b51 ("drm/vc4: Add an interface for capturing the GPU state after a hang.") Reviewed-by: Melissa Wen Link: https://patch.msgid.link/20260330-vc4-misc-fixes-v1-2-92defc940a29@igalia.com Signed-off-by: Maíra Canal --- drivers/gpu/drm/vc4/vc4_gem.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/vc4/vc4_gem.c b/drivers/gpu/drm/vc4/vc4_gem.c index ad8cbd727b80..a64d5a962452 100644 --- a/drivers/gpu/drm/vc4/vc4_gem.c +++ b/drivers/gpu/drm/vc4/vc4_gem.c @@ -62,6 +62,7 @@ vc4_free_hang_state(struct drm_device *dev, struct vc4_hang_state *state) for (i = 0; i < state->user_state.bo_count; i++) drm_gem_object_put(state->bo[i]); + kfree(state->bo); kfree(state); } From 9525d169e5fd481538cf8c663cc5839e54f2e481 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ma=C3=ADra=20Canal?= Date: Mon, 30 Mar 2026 14:51:45 -0300 Subject: [PATCH 075/248] drm/vc4: Fix a memory leak in hang state error path MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When vc4_save_hang_state() encounters an early return condition, it returns without freeing the previously allocated `kernel_state`, leaking memory. Add the missing kfree() calls by consolidating the early return paths into a single place. Fixes: 214613656b51 ("drm/vc4: Add an interface for capturing the GPU state after a hang.") Reviewed-by: Melissa Wen Link: https://patch.msgid.link/20260330-vc4-misc-fixes-v1-3-92defc940a29@igalia.com Signed-off-by: Maíra Canal --- drivers/gpu/drm/vc4/vc4_gem.c | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/vc4/vc4_gem.c b/drivers/gpu/drm/vc4/vc4_gem.c index a64d5a962452..34bdac107ce1 100644 --- a/drivers/gpu/drm/vc4/vc4_gem.c +++ b/drivers/gpu/drm/vc4/vc4_gem.c @@ -171,10 +171,8 @@ vc4_save_hang_state(struct drm_device *dev) spin_lock_irqsave(&vc4->job_lock, irqflags); exec[0] = vc4_first_bin_job(vc4); exec[1] = vc4_first_render_job(vc4); - if (!exec[0] && !exec[1]) { - spin_unlock_irqrestore(&vc4->job_lock, irqflags); - return; - } + if (!exec[0] && !exec[1]) + goto err_free_state; /* Get the bos from both binner and renderer into hang state. */ state->bo_count = 0; @@ -191,10 +189,8 @@ vc4_save_hang_state(struct drm_device *dev) kernel_state->bo = kzalloc_objs(*kernel_state->bo, state->bo_count, GFP_ATOMIC); - if (!kernel_state->bo) { - spin_unlock_irqrestore(&vc4->job_lock, irqflags); - return; - } + if (!kernel_state->bo) + goto err_free_state; k = 0; for (i = 0; i < 2; i++) { @@ -286,6 +282,12 @@ vc4_save_hang_state(struct drm_device *dev) vc4->hang_state = kernel_state; spin_unlock_irqrestore(&vc4->job_lock, irqflags); } + + return; + +err_free_state: + spin_unlock_irqrestore(&vc4->job_lock, irqflags); + kfree(kernel_state); } static void From 338c56050d8e892604da97f67bfa8cc4015a955f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ma=C3=ADra=20Canal?= Date: Mon, 30 Mar 2026 14:51:46 -0300 Subject: [PATCH 076/248] drm/vc4: Protect madv read in vc4_gem_object_mmap() with madv_lock MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The mmap callback reads bo->madv without holding madv_lock, racing with concurrent DRM_IOCTL_VC4_GEM_MADVISE calls that modify the field under the same lock. Add the missing locking to prevent the data race. Fixes: b9f19259b84d ("drm/vc4: Add the DRM_IOCTL_VC4_GEM_MADVISE ioctl") Reviewed-by: Melissa Wen Link: https://patch.msgid.link/20260330-vc4-misc-fixes-v1-4-92defc940a29@igalia.com Signed-off-by: Maíra Canal --- drivers/gpu/drm/vc4/vc4_bo.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/vc4/vc4_bo.c b/drivers/gpu/drm/vc4/vc4_bo.c index 1f93bc5a3d02..cb2b62a41972 100644 --- a/drivers/gpu/drm/vc4/vc4_bo.c +++ b/drivers/gpu/drm/vc4/vc4_bo.c @@ -738,12 +738,15 @@ static int vc4_gem_object_mmap(struct drm_gem_object *obj, struct vm_area_struct return -EINVAL; } + mutex_lock(&bo->madv_lock); if (bo->madv != VC4_MADV_WILLNEED) { DRM_DEBUG("mmapping of %s BO not allowed\n", bo->madv == VC4_MADV_DONTNEED ? "purgeable" : "purged"); + mutex_unlock(&bo->madv_lock); return -EINVAL; } + mutex_unlock(&bo->madv_lock); return drm_gem_dma_mmap(&bo->base, vma); } From 3a359bf5c61d52e7f09754108309d637532164a6 Mon Sep 17 00:00:00 2001 From: Ruide Cao Date: Thu, 2 Apr 2026 23:12:31 +0800 Subject: [PATCH 077/248] batman-adv: reject oversized global TT response buffers batadv_tt_prepare_tvlv_global_data() builds the allocation length for a global TT response in 16-bit temporaries. When a remote originator advertises a large enough global TT, the TT payload length plus the VLAN header offset can exceed 65535 and wrap before kmalloc(). The full-table response path still uses the original TT payload length when it fills tt_change, so the wrapped allocation is too small and batadv_tt_prepare_tvlv_global_data() writes past the end of the heap object before the later packet-size check runs. Fix this by rejecting TT responses whose TVLV value length cannot fit in the 16-bit TVLV payload length field. Fixes: 7ea7b4a14275 ("batman-adv: make the TT CRC logic VLAN specific") Cc: stable@vger.kernel.org Reported-by: Yifan Wu Reported-by: Juefei Pu Co-developed-by: Yuan Tan Signed-off-by: Yuan Tan Suggested-by: Xin Liu Tested-by: Ren Wei Signed-off-by: Ruide Cao Signed-off-by: Ren Wei Signed-off-by: Sven Eckelmann Signed-off-by: Simon Wunderlich --- net/batman-adv/translation-table.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c index 6e95e883c2bf..05cddcf994f6 100644 --- a/net/batman-adv/translation-table.c +++ b/net/batman-adv/translation-table.c @@ -798,8 +798,8 @@ batadv_tt_prepare_tvlv_global_data(struct batadv_orig_node *orig_node, { u16 num_vlan = 0; u16 num_entries = 0; - u16 change_offset; - u16 tvlv_len; + u16 tvlv_len = 0; + unsigned int change_offset; struct batadv_tvlv_tt_vlan_data *tt_vlan; struct batadv_orig_node_vlan *vlan; u8 *tt_change_ptr; @@ -816,6 +816,11 @@ batadv_tt_prepare_tvlv_global_data(struct batadv_orig_node *orig_node, if (*tt_len < 0) *tt_len = batadv_tt_len(num_entries); + if (change_offset > U16_MAX || *tt_len > U16_MAX - change_offset) { + *tt_len = 0; + goto out; + } + tvlv_len = *tt_len; tvlv_len += change_offset; From 07712db80857d5d09ae08f3df85a708ecfc3b61f Mon Sep 17 00:00:00 2001 From: Nicholas Carlini Date: Tue, 31 Mar 2026 15:25:32 +0200 Subject: [PATCH 078/248] eventpoll: defer struct eventpoll free to RCU grace period In certain situations, ep_free() in eventpoll.c will kfree the epi->ep eventpoll struct while it still being used by another concurrent thread. Defer the kfree() to an RCU callback to prevent UAF. Fixes: f2e467a48287 ("eventpoll: Fix semi-unbounded recursion") Signed-off-by: Nicholas Carlini Signed-off-by: Christian Brauner --- fs/eventpoll.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/fs/eventpoll.c b/fs/eventpoll.c index 5714e900567c..4b43bf41296d 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c @@ -226,6 +226,9 @@ struct eventpoll { */ refcount_t refcount; + /* used to defer freeing past ep_get_upwards_depth_proc() RCU walk */ + struct rcu_head rcu; + #ifdef CONFIG_NET_RX_BUSY_POLL /* used to track busy poll napi_id */ unsigned int napi_id; @@ -819,7 +822,8 @@ static void ep_free(struct eventpoll *ep) mutex_destroy(&ep->mtx); free_uid(ep->user); wakeup_source_unregister(ep->ws); - kfree(ep); + /* ep_get_upwards_depth_proc() may still hold epi->ep under RCU */ + kfree_rcu(ep, rcu); } /* From 857fa8f2a5b184c206c703a3d9ce05cea683cfed Mon Sep 17 00:00:00 2001 From: Jean Delvare Date: Wed, 1 Apr 2026 12:23:23 +0200 Subject: [PATCH 079/248] accel: ethosu: Add hardware dependency hint The Ethos-U NPU is only available on ARM systems, so add a hardware dependency hint to prevent this driver from being needlessly included in kernels built for other architectures. Signed-off-by: Jean Delvare Link: https://patch.msgid.link/20260401122323.6127a77c@endymion Signed-off-by: Rob Herring (Arm) --- drivers/accel/ethosu/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/accel/ethosu/Kconfig b/drivers/accel/ethosu/Kconfig index d25f9b3eb317..f68e6e286903 100644 --- a/drivers/accel/ethosu/Kconfig +++ b/drivers/accel/ethosu/Kconfig @@ -4,6 +4,7 @@ config DRM_ACCEL_ARM_ETHOSU tristate "Arm Ethos-U65/U85 NPU" depends on HAS_IOMEM depends on DRM_ACCEL + depends on ARM || ARM64 || COMPILE_TEST select DRM_GEM_DMA_HELPER select DRM_SCHED select GENERIC_ALLOCATOR From 8eceab19eba9dcbfd2a0daec72e1bf48aa100170 Mon Sep 17 00:00:00 2001 From: Douya Le Date: Thu, 2 Apr 2026 23:34:55 +0800 Subject: [PATCH 080/248] crypto: af_alg - limit RX SG extraction by receive buffer budget Make af_alg_get_rsgl() limit each RX scatterlist extraction to the remaining receive buffer budget. af_alg_get_rsgl() currently uses af_alg_readable() only as a gate before extracting data into the RX scatterlist. Limit each extraction to the remaining af_alg_rcvbuf(sk) budget so that receive-side accounting matches the amount of data attached to the request. If skcipher cannot obtain enough RX space for at least one chunk while more data remains to be processed, reject the recvmsg call instead of rounding the request length down to zero. Fixes: e870456d8e7c8d57c059ea479b5aadbb55ff4c3a ("crypto: algif_skcipher - overhaul memory management") Reported-by: Yifan Wu Reported-by: Juefei Pu Co-developed-by: Yuan Tan Signed-off-by: Yuan Tan Suggested-by: Xin Liu Signed-off-by: Douya Le Signed-off-by: Ren Wei Signed-off-by: Herbert Xu --- crypto/af_alg.c | 2 ++ crypto/algif_skcipher.c | 5 +++++ 2 files changed, 7 insertions(+) diff --git a/crypto/af_alg.c b/crypto/af_alg.c index 8e0199394984..437f3e77c7e0 100644 --- a/crypto/af_alg.c +++ b/crypto/af_alg.c @@ -1229,6 +1229,8 @@ int af_alg_get_rsgl(struct sock *sk, struct msghdr *msg, int flags, seglen = min_t(size_t, (maxsize - len), msg_data_left(msg)); + /* Never pin more pages than the remaining RX accounting budget. */ + seglen = min_t(size_t, seglen, af_alg_rcvbuf(sk)); if (list_empty(&areq->rsgl_list)) { rsgl = &areq->first_rsgl; diff --git a/crypto/algif_skcipher.c b/crypto/algif_skcipher.c index 82735e51be10..ba0a17fd95ac 100644 --- a/crypto/algif_skcipher.c +++ b/crypto/algif_skcipher.c @@ -130,6 +130,11 @@ static int _skcipher_recvmsg(struct socket *sock, struct msghdr *msg, * full block size buffers. */ if (ctx->more || len < ctx->used) { + if (len < bs) { + err = -EINVAL; + goto free; + } + len -= len % bs; cflags |= CRYPTO_SKCIPHER_REQ_NOTFINAL; } From 2fd68c7ea2ae741a4446d54c8a461255022e2dc7 Mon Sep 17 00:00:00 2001 From: Qingfang Deng Date: Wed, 1 Apr 2026 10:28:39 +0800 Subject: [PATCH 081/248] MAINTAINERS: orphan PPP over Ethernet driver We haven't seen activities from Michal Ostrowski for quite a long time. The last commit from him is fb64bb560e18 ("PPPoE: Fix flush/close races."), which was in 2009. Email to mostrows@earthlink.net also bounces. Signed-off-by: Qingfang Deng Link: https://patch.msgid.link/20260401022842.15082-1-dqfext@gmail.com Signed-off-by: Jakub Kicinski --- MAINTAINERS | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index c3fe46d7c4bc..c6e226ae109d 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -21075,8 +21075,7 @@ F: include/uapi/linux/atmppp.h F: net/atm/pppoatm.c PPP OVER ETHERNET -M: Michal Ostrowski -S: Maintained +S: Orphan F: drivers/net/ppp/pppoe.c F: drivers/net/ppp/pppox.c From e9c9f084cd78a58e2331fbf83c3d5625fb86e33a Mon Sep 17 00:00:00 2001 From: Allison Henderson Date: Wed, 1 Apr 2026 17:58:33 -0700 Subject: [PATCH 082/248] MAINTAINERS: Update email for Allison Henderson Switch active email address to kernel.org alias Signed-off-by: Allison Henderson Link: https://patch.msgid.link/20260402005833.38376-1-achender@kernel.org Signed-off-by: Jakub Kicinski --- MAINTAINERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index c6e226ae109d..4601903a8f9b 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -22129,7 +22129,7 @@ S: Supported F: drivers/infiniband/sw/rdmavt RDS - RELIABLE DATAGRAM SOCKETS -M: Allison Henderson +M: Allison Henderson L: netdev@vger.kernel.org L: linux-rdma@vger.kernel.org L: rds-devel@oss.oracle.com (moderated for non-subscribers) From 6dede3967619b5944003227a5d09fdc21ed57d10 Mon Sep 17 00:00:00 2001 From: David Carlier Date: Wed, 1 Apr 2026 22:12:18 +0100 Subject: [PATCH 083/248] net: altera-tse: fix skb leak on DMA mapping error in tse_start_xmit() When dma_map_single() fails in tse_start_xmit(), the function returns NETDEV_TX_OK without freeing the skb. Since NETDEV_TX_OK tells the stack the packet was consumed, the skb is never freed, leaking memory on every DMA mapping failure. Add dev_kfree_skb_any() before returning to properly free the skb. Fixes: bbd2190ce96d ("Altera TSE: Add main and header file for Altera Ethernet Driver") Cc: stable@vger.kernel.org Signed-off-by: David Carlier Link: https://patch.msgid.link/20260401211218.279185-1-devnexen@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/altera/altera_tse_main.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/altera/altera_tse_main.c b/drivers/net/ethernet/altera/altera_tse_main.c index 4342e2d026f8..9eed0be4411e 100644 --- a/drivers/net/ethernet/altera/altera_tse_main.c +++ b/drivers/net/ethernet/altera/altera_tse_main.c @@ -570,6 +570,7 @@ static netdev_tx_t tse_start_xmit(struct sk_buff *skb, struct net_device *dev) DMA_TO_DEVICE); if (dma_mapping_error(priv->device, dma_addr)) { netdev_err(priv->dev, "%s: DMA mapping error\n", __func__); + dev_kfree_skb_any(skb); ret = NETDEV_TX_OK; goto out; } From 51f4e090b9f87b40c21b6daadb5c06e6c0a07b67 Mon Sep 17 00:00:00 2001 From: Tyllis Xu Date: Tue, 31 Mar 2026 23:47:07 -0500 Subject: [PATCH 084/248] net: stmmac: fix integer underflow in chain mode The jumbo_frm() chain-mode implementation unconditionally computes len = nopaged_len - bmax; where nopaged_len = skb_headlen(skb) (linear bytes only) and bmax is BUF_SIZE_8KiB or BUF_SIZE_2KiB. However, the caller stmmac_xmit() decides to invoke jumbo_frm() based on skb->len (total length including page fragments): is_jumbo = stmmac_is_jumbo_frm(priv, skb->len, enh_desc); When a packet has a small linear portion (nopaged_len <= bmax) but a large total length due to page fragments (skb->len > bmax), the subtraction wraps as an unsigned integer, producing a huge len value (~0xFFFFxxxx). This causes the while (len != 0) loop to execute hundreds of thousands of iterations, passing skb->data + bmax * i pointers far beyond the skb buffer to dma_map_single(). On IOMMU-less SoCs (the typical deployment for stmmac), this maps arbitrary kernel memory to the DMA engine, constituting a kernel memory disclosure and potential memory corruption from hardware. Fix this by introducing a buf_len local variable clamped to min(nopaged_len, bmax). Computing len = nopaged_len - buf_len is then always safe: it is zero when the linear portion fits within a single descriptor, causing the while (len != 0) loop to be skipped naturally, and the fragment loop in stmmac_xmit() handles page fragments afterward. Fixes: 286a83721720 ("stmmac: add CHAINED descriptor mode support (V4)") Cc: stable@vger.kernel.org Signed-off-by: Tyllis Xu Link: https://patch.msgid.link/20260401044708.1386919-1-LivelyCarpet87@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/stmicro/stmmac/chain_mode.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/chain_mode.c b/drivers/net/ethernet/stmicro/stmmac/chain_mode.c index 120a009c9992..37f9417c7c0e 100644 --- a/drivers/net/ethernet/stmicro/stmmac/chain_mode.c +++ b/drivers/net/ethernet/stmicro/stmmac/chain_mode.c @@ -20,7 +20,7 @@ static int jumbo_frm(struct stmmac_tx_queue *tx_q, struct sk_buff *skb, unsigned int nopaged_len = skb_headlen(skb); struct stmmac_priv *priv = tx_q->priv_data; unsigned int entry = tx_q->cur_tx; - unsigned int bmax, des2; + unsigned int bmax, buf_len, des2; unsigned int i = 1, len; struct dma_desc *desc; @@ -31,17 +31,18 @@ static int jumbo_frm(struct stmmac_tx_queue *tx_q, struct sk_buff *skb, else bmax = BUF_SIZE_2KiB; - len = nopaged_len - bmax; + buf_len = min_t(unsigned int, nopaged_len, bmax); + len = nopaged_len - buf_len; des2 = dma_map_single(priv->device, skb->data, - bmax, DMA_TO_DEVICE); + buf_len, DMA_TO_DEVICE); desc->des2 = cpu_to_le32(des2); if (dma_mapping_error(priv->device, des2)) return -1; tx_q->tx_skbuff_dma[entry].buf = des2; - tx_q->tx_skbuff_dma[entry].len = bmax; + tx_q->tx_skbuff_dma[entry].len = buf_len; /* do not close the descriptor and do not set own bit */ - stmmac_prepare_tx_desc(priv, desc, 1, bmax, csum, STMMAC_CHAIN_MODE, + stmmac_prepare_tx_desc(priv, desc, 1, buf_len, csum, STMMAC_CHAIN_MODE, 0, false, skb->len); while (len != 0) { From c842743d073bdd683606cb414eb0ca84465dd834 Mon Sep 17 00:00:00 2001 From: Ruide Cao Date: Thu, 2 Apr 2026 22:46:20 +0800 Subject: [PATCH 085/248] net: sched: act_csum: validate nested VLAN headers tcf_csum_act() walks nested VLAN headers directly from skb->data when an skb still carries in-payload VLAN tags. The current code reads vlan->h_vlan_encapsulated_proto and then pulls VLAN_HLEN bytes without first ensuring that the full VLAN header is present in the linear area. If only part of an inner VLAN header is linearized, accessing h_vlan_encapsulated_proto reads past the linear area, and the following skb_pull(VLAN_HLEN) may violate skb invariants. Fix this by requiring pskb_may_pull(skb, VLAN_HLEN) before accessing and pulling each nested VLAN header. If the header still is not fully available, drop the packet through the existing error path. Fixes: 2ecba2d1e45b ("net: sched: act_csum: Fix csum calc for tagged packets") Reported-by: Yifan Wu Reported-by: Juefei Pu Co-developed-by: Yuan Tan Signed-off-by: Yuan Tan Suggested-by: Xin Liu Tested-by: Ren Wei Signed-off-by: Ruide Cao Signed-off-by: Ren Wei Reviewed-by: Simon Horman Link: https://patch.msgid.link/22df2fcb49f410203eafa5d97963dd36089f4ecf.1774892775.git.caoruide123@gmail.com Signed-off-by: Jakub Kicinski --- net/sched/act_csum.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/net/sched/act_csum.c b/net/sched/act_csum.c index 213e1ce9d2da..a9e4635d899e 100644 --- a/net/sched/act_csum.c +++ b/net/sched/act_csum.c @@ -604,8 +604,12 @@ TC_INDIRECT_SCOPE int tcf_csum_act(struct sk_buff *skb, protocol = skb->protocol; orig_vlan_tag_present = true; } else { - struct vlan_hdr *vlan = (struct vlan_hdr *)skb->data; + struct vlan_hdr *vlan; + if (!pskb_may_pull(skb, VLAN_HLEN)) + goto drop; + + vlan = (struct vlan_hdr *)skb->data; protocol = vlan->h_vlan_encapsulated_proto; skb_pull(skb, VLAN_HLEN); skb_reset_network_header(skb); From e16a0d36777b572196de4944aaa196adf828eb8e Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 2 Apr 2026 16:10:40 +0200 Subject: [PATCH 086/248] net: fec: make FIXED_PHY dependency unconditional When CONFIG_FIXED_PHY is in a loadable module, the fec driver cannot be built-in any more: x86_64-linux-ld: vmlinux.o: in function `fec_enet_mii_probe': fec_main.c:(.text+0xc4f367): undefined reference to `fixed_phy_unregister' x86_64-linux-ld: vmlinux.o: in function `fec_enet_close': fec_main.c:(.text+0xc59591): undefined reference to `fixed_phy_unregister' x86_64-linux-ld: vmlinux.o: in function `fec_enet_mii_probe.cold': Select the fixed phy support on all targets to make this build correctly, not just on coldfire. Notat that Essentially the stub helpers in include/linux/phy_fixed.h cannot be used correctly because of this build time dependency, and we could just remove them to hit the build failure more often when a driver uses them without the 'select FIXED_PHY'. Fixes: dc86b621e1b4 ("net: fec: register a fixed phy using fixed_phy_register_100fd if needed") Signed-off-by: Arnd Bergmann Reviewed-by: Simon Horman Link: https://patch.msgid.link/20260402141048.2713445-1-arnd@kernel.org Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/freescale/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/freescale/Kconfig b/drivers/net/ethernet/freescale/Kconfig index e2a591cf9601..11edbb46a118 100644 --- a/drivers/net/ethernet/freescale/Kconfig +++ b/drivers/net/ethernet/freescale/Kconfig @@ -28,7 +28,7 @@ config FEC depends on PTP_1588_CLOCK_OPTIONAL select CRC32 select PHYLIB - select FIXED_PHY if M5272 + select FIXED_PHY select PAGE_POOL imply PAGE_POOL_STATS imply NET_SELFTESTS From b120e4432f9f56c7103133d6a11245e617695adb Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 2 Apr 2026 10:35:19 +0000 Subject: [PATCH 087/248] net: lapbether: handle NETDEV_PRE_TYPE_CHANGE lapbeth_data_transmit() expects the underlying device type to be ARPHRD_ETHER. Returning NOTIFY_BAD from lapbeth_device_event() makes sure bonding driver can not break this expectation. Fixes: 872254dd6b1f ("net/bonding: Enable bonding to enslave non ARPHRD_ETHER") Reported-by: syzbot+d8c285748fa7292580a9@syzkaller.appspotmail.com Closes: https://lore.kernel.org/netdev/69cd22a1.050a0220.70c3a.0002.GAE@google.com/T/#u Signed-off-by: Eric Dumazet Cc: Martin Schiller Reviewed-by: Simon Horman Link: https://patch.msgid.link/20260402103519.1201565-1-edumazet@google.com Signed-off-by: Jakub Kicinski --- drivers/net/wan/lapbether.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/drivers/net/wan/lapbether.c b/drivers/net/wan/lapbether.c index f357a7ac70ac..9861c99ea56c 100644 --- a/drivers/net/wan/lapbether.c +++ b/drivers/net/wan/lapbether.c @@ -446,33 +446,36 @@ static void lapbeth_free_device(struct lapbethdev *lapbeth) static int lapbeth_device_event(struct notifier_block *this, unsigned long event, void *ptr) { - struct lapbethdev *lapbeth; struct net_device *dev = netdev_notifier_info_to_dev(ptr); + struct lapbethdev *lapbeth; if (dev_net(dev) != &init_net) return NOTIFY_DONE; - if (!dev_is_ethdev(dev) && !lapbeth_get_x25_dev(dev)) + lapbeth = lapbeth_get_x25_dev(dev); + if (!dev_is_ethdev(dev) && !lapbeth) return NOTIFY_DONE; switch (event) { case NETDEV_UP: /* New ethernet device -> new LAPB interface */ - if (!lapbeth_get_x25_dev(dev)) + if (!lapbeth) lapbeth_new_device(dev); break; case NETDEV_GOING_DOWN: /* ethernet device closes -> close LAPB interface */ - lapbeth = lapbeth_get_x25_dev(dev); if (lapbeth) dev_close(lapbeth->axdev); break; case NETDEV_UNREGISTER: /* ethernet device disappears -> remove LAPB interface */ - lapbeth = lapbeth_get_x25_dev(dev); if (lapbeth) lapbeth_free_device(lapbeth); break; + case NETDEV_PRE_TYPE_CHANGE: + /* Our underlying device type must not change. */ + if (lapbeth) + return NOTIFY_BAD; } return NOTIFY_DONE; From 285fa6b1e03cff78ead0383e1b259c44b95faf90 Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Thu, 2 Apr 2026 14:57:10 +0200 Subject: [PATCH 088/248] net: airoha: Fix memory leak in airoha_qdma_rx_process() If an error occurs on the subsequents buffers belonging to the non-linear part of the skb (e.g. due to an error in the payload length reported by the NIC or if we consumed all the available fragments for the skb), the page_pool fragment will not be linked to the skb so it will not return to the pool in the airoha_qdma_rx_process() error path. Fix the memory leak partially reverting commit 'd6d2b0e1538d ("net: airoha: Fix page recycling in airoha_qdma_rx_process()")' and always running page_pool_put_full_page routine in the airoha_qdma_rx_process() error path. Fixes: d6d2b0e1538d ("net: airoha: Fix page recycling in airoha_qdma_rx_process()") Signed-off-by: Lorenzo Bianconi Reviewed-by: Simon Horman Link: https://patch.msgid.link/20260402-airoha_qdma_rx_process-mem-leak-fix-v1-1-b5706f402d3c@kernel.org Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/airoha/airoha_eth.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/net/ethernet/airoha/airoha_eth.c b/drivers/net/ethernet/airoha/airoha_eth.c index 95ba99b89428..91cb63a32d99 100644 --- a/drivers/net/ethernet/airoha/airoha_eth.c +++ b/drivers/net/ethernet/airoha/airoha_eth.c @@ -697,9 +697,8 @@ static int airoha_qdma_rx_process(struct airoha_queue *q, int budget) if (q->skb) { dev_kfree_skb(q->skb); q->skb = NULL; - } else { - page_pool_put_full_page(q->page_pool, page, true); } + page_pool_put_full_page(q->page_pool, page, true); } airoha_qdma_fill_rx_queue(q); From 4e65a8b8daa18d63255ec58964dd192c7fdd9f8b Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 2 Apr 2026 10:17:32 +0000 Subject: [PATCH 089/248] ipv6: ioam: fix potential NULL dereferences in __ioam6_fill_trace_data() We need to check __in6_dev_get() for possible NULL value, as suggested by Yiming Qian. Also add skb_dst_dev_rcu() instead of skb_dst_dev(), and two missing READ_ONCE(). Note that @dev can't be NULL. Fixes: 9ee11f0fff20 ("ipv6: ioam: Data plane support for Pre-allocated Trace") Reported-by: Yiming Qian Signed-off-by: Eric Dumazet Reviewed-by: Justin Iurman Link: https://patch.msgid.link/20260402101732.1188059-1-edumazet@google.com Signed-off-by: Jakub Kicinski --- net/ipv6/ioam6.c | 27 ++++++++++++++++----------- 1 file changed, 16 insertions(+), 11 deletions(-) diff --git a/net/ipv6/ioam6.c b/net/ipv6/ioam6.c index 3978773bec42..05a0b7d7e2aa 100644 --- a/net/ipv6/ioam6.c +++ b/net/ipv6/ioam6.c @@ -710,7 +710,9 @@ static void __ioam6_fill_trace_data(struct sk_buff *skb, struct ioam6_schema *sc, unsigned int sclen, bool is_input) { - struct net_device *dev = skb_dst_dev(skb); + /* Note: skb_dst_dev_rcu() can't be NULL at this point. */ + struct net_device *dev = skb_dst_dev_rcu(skb); + struct inet6_dev *i_skb_dev, *idev; struct timespec64 ts; ktime_t tstamp; u64 raw64; @@ -721,13 +723,16 @@ static void __ioam6_fill_trace_data(struct sk_buff *skb, data = trace->data + trace->remlen * 4 - trace->nodelen * 4 - sclen * 4; + i_skb_dev = skb->dev ? __in6_dev_get(skb->dev) : NULL; + idev = __in6_dev_get(dev); + /* hop_lim and node_id */ if (trace->type.bit0) { byte = ipv6_hdr(skb)->hop_limit; if (is_input) byte--; - raw32 = dev_net(dev)->ipv6.sysctl.ioam6_id; + raw32 = READ_ONCE(dev_net(dev)->ipv6.sysctl.ioam6_id); *(__be32 *)data = cpu_to_be32((byte << 24) | raw32); data += sizeof(__be32); @@ -735,18 +740,18 @@ static void __ioam6_fill_trace_data(struct sk_buff *skb, /* ingress_if_id and egress_if_id */ if (trace->type.bit1) { - if (!skb->dev) + if (!i_skb_dev) raw16 = IOAM6_U16_UNAVAILABLE; else - raw16 = (__force u16)READ_ONCE(__in6_dev_get(skb->dev)->cnf.ioam6_id); + raw16 = (__force u16)READ_ONCE(i_skb_dev->cnf.ioam6_id); *(__be16 *)data = cpu_to_be16(raw16); data += sizeof(__be16); - if (dev->flags & IFF_LOOPBACK) + if ((dev->flags & IFF_LOOPBACK) || !idev) raw16 = IOAM6_U16_UNAVAILABLE; else - raw16 = (__force u16)READ_ONCE(__in6_dev_get(dev)->cnf.ioam6_id); + raw16 = (__force u16)READ_ONCE(idev->cnf.ioam6_id); *(__be16 *)data = cpu_to_be16(raw16); data += sizeof(__be16); @@ -822,7 +827,7 @@ static void __ioam6_fill_trace_data(struct sk_buff *skb, if (is_input) byte--; - raw64 = dev_net(dev)->ipv6.sysctl.ioam6_id_wide; + raw64 = READ_ONCE(dev_net(dev)->ipv6.sysctl.ioam6_id_wide); *(__be64 *)data = cpu_to_be64(((u64)byte << 56) | raw64); data += sizeof(__be64); @@ -830,18 +835,18 @@ static void __ioam6_fill_trace_data(struct sk_buff *skb, /* ingress_if_id and egress_if_id (wide) */ if (trace->type.bit9) { - if (!skb->dev) + if (!i_skb_dev) raw32 = IOAM6_U32_UNAVAILABLE; else - raw32 = READ_ONCE(__in6_dev_get(skb->dev)->cnf.ioam6_id_wide); + raw32 = READ_ONCE(i_skb_dev->cnf.ioam6_id_wide); *(__be32 *)data = cpu_to_be32(raw32); data += sizeof(__be32); - if (dev->flags & IFF_LOOPBACK) + if ((dev->flags & IFF_LOOPBACK) || !idev) raw32 = IOAM6_U32_UNAVAILABLE; else - raw32 = READ_ONCE(__in6_dev_get(dev)->cnf.ioam6_id_wide); + raw32 = READ_ONCE(idev->cnf.ioam6_id_wide); *(__be32 *)data = cpu_to_be32(raw32); data += sizeof(__be32); From 1979645e1842cb7017525a61a0e0e0beb924d02a Mon Sep 17 00:00:00 2001 From: Zijing Yin Date: Thu, 2 Apr 2026 07:01:53 -0700 Subject: [PATCH 090/248] bridge: guard local VLAN-0 FDB helpers against NULL vlan group When CONFIG_BRIDGE_VLAN_FILTERING is not set, br_vlan_group() and nbp_vlan_group() return NULL (br_private.h stub definitions). The BR_BOOLOPT_FDB_LOCAL_VLAN_0 toggle code is compiled unconditionally and reaches br_fdb_delete_locals_per_vlan_port() and br_fdb_insert_locals_per_vlan_port(), where the NULL vlan group pointer is dereferenced via list_for_each_entry(v, &vg->vlan_list, vlist). The observed crash is in the delete path, triggered when creating a bridge with IFLA_BR_MULTI_BOOLOPT containing BR_BOOLOPT_FDB_LOCAL_VLAN_0 via RTM_NEWLINK. The insert helper has the same bug pattern. Oops: general protection fault, probably for non-canonical address 0xdffffc0000000056: 0000 [#1] KASAN NOPTI KASAN: null-ptr-deref in range [0x00000000000002b0-0x00000000000002b7] RIP: 0010:br_fdb_delete_locals_per_vlan+0x2b9/0x310 Call Trace: br_fdb_toggle_local_vlan_0+0x452/0x4c0 br_toggle_fdb_local_vlan_0+0x31/0x80 net/bridge/br.c:276 br_boolopt_toggle net/bridge/br.c:313 br_boolopt_multi_toggle net/bridge/br.c:364 br_changelink net/bridge/br_netlink.c:1542 br_dev_newlink net/bridge/br_netlink.c:1575 Add NULL checks for the vlan group pointer in both helpers, returning early when there are no VLANs to iterate. This matches the existing pattern used by other bridge FDB functions such as br_fdb_add() and br_fdb_delete(). Fixes: 21446c06b441 ("net: bridge: Introduce UAPI for BR_BOOLOPT_FDB_LOCAL_VLAN_0") Signed-off-by: Zijing Yin Reviewed-by: Ido Schimmel Acked-by: Nikolay Aleksandrov Link: https://patch.msgid.link/20260402140153.3925663-1-yzjaurora@gmail.com Signed-off-by: Jakub Kicinski --- net/bridge/br_fdb.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c index 0501ffcb8a3d..e2c17f620f00 100644 --- a/net/bridge/br_fdb.c +++ b/net/bridge/br_fdb.c @@ -597,6 +597,9 @@ static void br_fdb_delete_locals_per_vlan_port(struct net_bridge *br, dev = br->dev; } + if (!vg) + return; + list_for_each_entry(v, &vg->vlan_list, vlist) br_fdb_find_delete_local(br, p, dev->dev_addr, v->vid); } @@ -630,6 +633,9 @@ static int br_fdb_insert_locals_per_vlan_port(struct net_bridge *br, dev = br->dev; } + if (!vg) + return 0; + list_for_each_entry(v, &vg->vlan_list, vlist) { if (!br_vlan_should_use(v)) continue; From 7b735ef81286007794a227ce2539419479c02a5f Mon Sep 17 00:00:00 2001 From: Nikolaos Gkarlis Date: Thu, 2 Apr 2026 20:14:32 +0200 Subject: [PATCH 091/248] rtnetlink: add missing netlink_ns_capable() check for peer netns rtnl_newlink() lacks a CAP_NET_ADMIN capability check on the peer network namespace when creating paired devices (veth, vxcan, netkit). This allows an unprivileged user with a user namespace to create interfaces in arbitrary network namespaces, including init_net. Add a netlink_ns_capable() check for CAP_NET_ADMIN in the peer namespace before allowing device creation to proceed. Fixes: 81adee47dfb6 ("net: Support specifying the network namespace upon device creation.") Signed-off-by: Nikolaos Gkarlis Reviewed-by: Kuniyuki Iwashima Link: https://patch.msgid.link/20260402181432.4126920-1-nickgarlis@gmail.com Signed-off-by: Jakub Kicinski --- net/core/rtnetlink.c | 40 +++++++++++++++++++++++++++------------- 1 file changed, 27 insertions(+), 13 deletions(-) diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index fae8034efbff..69daba3ddaf0 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -3894,28 +3894,42 @@ static int rtnl_newlink_create(struct sk_buff *skb, struct ifinfomsg *ifm, goto out; } -static struct net *rtnl_get_peer_net(const struct rtnl_link_ops *ops, +static struct net *rtnl_get_peer_net(struct sk_buff *skb, + const struct rtnl_link_ops *ops, struct nlattr *tbp[], struct nlattr *data[], struct netlink_ext_ack *extack) { - struct nlattr *tb[IFLA_MAX + 1]; + struct nlattr *tb[IFLA_MAX + 1], **attrs; + struct net *net; int err; - if (!data || !data[ops->peer_type]) - return rtnl_link_get_net_ifla(tbp); - - err = rtnl_nla_parse_ifinfomsg(tb, data[ops->peer_type], extack); - if (err < 0) - return ERR_PTR(err); - - if (ops->validate) { - err = ops->validate(tb, NULL, extack); + if (!data || !data[ops->peer_type]) { + attrs = tbp; + } else { + err = rtnl_nla_parse_ifinfomsg(tb, data[ops->peer_type], extack); if (err < 0) return ERR_PTR(err); + + if (ops->validate) { + err = ops->validate(tb, NULL, extack); + if (err < 0) + return ERR_PTR(err); + } + + attrs = tb; } - return rtnl_link_get_net_ifla(tb); + net = rtnl_link_get_net_ifla(attrs); + if (IS_ERR_OR_NULL(net)) + return net; + + if (!netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN)) { + put_net(net); + return ERR_PTR(-EPERM); + } + + return net; } static int __rtnl_newlink(struct sk_buff *skb, struct nlmsghdr *nlh, @@ -4054,7 +4068,7 @@ static int rtnl_newlink(struct sk_buff *skb, struct nlmsghdr *nlh, } if (ops->peer_type) { - peer_net = rtnl_get_peer_net(ops, tb, data, extack); + peer_net = rtnl_get_peer_net(skb, ops, tb, data, extack); if (IS_ERR(peer_net)) { ret = PTR_ERR(peer_net); goto put_ops; From 48a5fe38772b6f039522469ee6131a67838221a8 Mon Sep 17 00:00:00 2001 From: Oleh Konko Date: Thu, 2 Apr 2026 09:48:57 +0000 Subject: [PATCH 092/248] tipc: fix bc_ackers underflow on duplicate GRP_ACK_MSG The GRP_ACK_MSG handler in tipc_group_proto_rcv() currently decrements bc_ackers on every inbound group ACK, even when the same member has already acknowledged the current broadcast round. Because bc_ackers is a u16, a duplicate ACK received after the last legitimate ACK wraps the counter to 65535. Once wrapped, tipc_group_bc_cong() keeps reporting congestion and later group broadcasts on the affected socket stay blocked until the group is recreated. Fix this by ignoring duplicate or stale ACKs before touching bc_acked or bc_ackers. This makes repeated GRP_ACK_MSG handling idempotent and prevents the underflow path. Fixes: 2f487712b893 ("tipc: guarantee that group broadcast doesn't bypass group unicast") Cc: stable@vger.kernel.org Signed-off-by: Oleh Konko Reviewed-by: Tung Nguyen Reviewed-by: Simon Horman Link: https://patch.msgid.link/41a4833f368641218e444fdcff822039.security@1seal.org Signed-off-by: Jakub Kicinski --- net/tipc/group.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/net/tipc/group.c b/net/tipc/group.c index e0e6227b433b..14e6732624e2 100644 --- a/net/tipc/group.c +++ b/net/tipc/group.c @@ -746,6 +746,7 @@ void tipc_group_proto_rcv(struct tipc_group *grp, bool *usr_wakeup, u32 port = msg_origport(hdr); struct tipc_member *m, *pm; u16 remitted, in_flight; + u16 acked; if (!grp) return; @@ -798,7 +799,10 @@ void tipc_group_proto_rcv(struct tipc_group *grp, bool *usr_wakeup, case GRP_ACK_MSG: if (!m) return; - m->bc_acked = msg_grp_bc_acked(hdr); + acked = msg_grp_bc_acked(hdr); + if (less_eq(acked, m->bc_acked)) + return; + m->bc_acked = acked; if (--grp->bc_ackers) return; list_del_init(&m->small_win); From b76254c55dc8f23edc089027dd3f8792554c69fb Mon Sep 17 00:00:00 2001 From: Pengpeng Hou Date: Thu, 2 Apr 2026 15:12:07 +0800 Subject: [PATCH 093/248] net: qualcomm: qca_uart: report the consumed byte on RX skb allocation failure qca_tty_receive() consumes each input byte before checking whether a completed frame needs a fresh receive skb. When the current byte completes a frame, the driver delivers that frame and then allocates a new skb for the next one. If that allocation fails, the current code returns i even though data[i] has already been consumed and may already have completed the delivered frame. Since serdev interprets the return value as the number of accepted bytes, this under-reports progress by one byte and can replay the final byte of the completed frame into a fresh parser state on the next call. Return i + 1 in that failure path so the accepted-byte count matches the actual receive-state progress. Fixes: dfc768fbe618 ("net: qualcomm: add QCA7000 UART driver") Cc: stable@vger.kernel.org Signed-off-by: Pengpeng Hou Reviewed-by: Stefan Wahren Reviewed-by: Simon Horman Link: https://patch.msgid.link/20260402071207.4036-1-pengpeng@iscas.ac.cn Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/qualcomm/qca_uart.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/qualcomm/qca_uart.c b/drivers/net/ethernet/qualcomm/qca_uart.c index 37efb1ea9fcd..847a5f928e41 100644 --- a/drivers/net/ethernet/qualcomm/qca_uart.c +++ b/drivers/net/ethernet/qualcomm/qca_uart.c @@ -100,7 +100,7 @@ qca_tty_receive(struct serdev_device *serdev, const u8 *data, size_t count) if (!qca->rx_skb) { netdev_dbg(netdev, "recv: out of RX resources\n"); n_stats->rx_errors++; - return i; + return i + 1; } } } From 06aaf04ca815f7a1f17762fd847b7bc14b8833fb Mon Sep 17 00:00:00 2001 From: Fernando Fernandez Mancera Date: Thu, 2 Apr 2026 09:26:12 +0200 Subject: [PATCH 094/248] ipv4: nexthop: avoid duplicate NHA_HW_STATS_ENABLE on nexthop group dump Currently NHA_HW_STATS_ENABLE is included twice everytime a dump of nexthop group is performed with NHA_OP_FLAG_DUMP_STATS. As all the stats querying were moved to nla_put_nh_group_stats(), leave only that instance of the attribute querying. Fixes: 5072ae00aea4 ("net: nexthop: Expose nexthop group HW stats to user space") Signed-off-by: Fernando Fernandez Mancera Reviewed-by: Eric Dumazet Reviewed-by: Ido Schimmel Link: https://patch.msgid.link/20260402072613.25262-1-fmancera@suse.de Signed-off-by: Jakub Kicinski --- net/ipv4/nexthop.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/net/ipv4/nexthop.c b/net/ipv4/nexthop.c index c942f1282236..a0c694583299 100644 --- a/net/ipv4/nexthop.c +++ b/net/ipv4/nexthop.c @@ -902,8 +902,7 @@ static int nla_put_nh_group(struct sk_buff *skb, struct nexthop *nh, goto nla_put_failure; if (op_flags & NHA_OP_FLAG_DUMP_STATS && - (nla_put_u32(skb, NHA_HW_STATS_ENABLE, nhg->hw_stats) || - nla_put_nh_group_stats(skb, nh, op_flags))) + nla_put_nh_group_stats(skb, nh, op_flags)) goto nla_put_failure; return 0; From 14cf0cd35361f4e94824bf8a42f72713d7702a73 Mon Sep 17 00:00:00 2001 From: Fernando Fernandez Mancera Date: Thu, 2 Apr 2026 09:26:13 +0200 Subject: [PATCH 095/248] ipv4: nexthop: allocate skb dynamically in rtm_get_nexthop() When querying a nexthop object via RTM_GETNEXTHOP, the kernel currently allocates a fixed-size skb using NLMSG_GOODSIZE. While sufficient for single nexthops and small Equal-Cost Multi-Path groups, this fixed allocation fails for large nexthop groups like 512 nexthops. This results in the following warning splat: WARNING: net/ipv4/nexthop.c:3395 at rtm_get_nexthop+0x176/0x1c0, CPU#20: rep/4608 [...] RIP: 0010:rtm_get_nexthop (net/ipv4/nexthop.c:3395) [...] Call Trace: rtnetlink_rcv_msg (net/core/rtnetlink.c:6989) netlink_rcv_skb (net/netlink/af_netlink.c:2550) netlink_unicast (net/netlink/af_netlink.c:1319 net/netlink/af_netlink.c:1344) netlink_sendmsg (net/netlink/af_netlink.c:1894) ____sys_sendmsg (net/socket.c:721 net/socket.c:736 net/socket.c:2585) ___sys_sendmsg (net/socket.c:2641) __sys_sendmsg (net/socket.c:2671) do_syscall_64 (arch/x86/entry/syscall_64.c:63 arch/x86/entry/syscall_64.c:94) entry_SYSCALL_64_after_hwframe (arch/x86/entry/entry_64.S:130) Fix this by allocating the size dynamically using nh_nlmsg_size() and using nlmsg_new(), this is consistent with nexthop_notify() behavior. In addition, adjust nh_nlmsg_size_grp() so it calculates the size needed based on flags passed. While at it, also add the size of NHA_FDB for nexthop group size calculation as it was missing too. This cannot be reproduced via iproute2 as the group size is currently limited and the command fails as follows: addattr_l ERROR: message exceeded bound of 1048 Fixes: 430a049190de ("nexthop: Add support for nexthop groups") Reported-by: Yiming Qian Closes: https://lore.kernel.org/netdev/CAL_bE8Li2h4KO+AQFXW4S6Yb_u5X4oSKnkywW+LPFjuErhqELA@mail.gmail.com/ Signed-off-by: Fernando Fernandez Mancera Reviewed-by: Eric Dumazet Reviewed-by: Ido Schimmel Link: https://patch.msgid.link/20260402072613.25262-2-fmancera@suse.de Signed-off-by: Jakub Kicinski --- net/ipv4/nexthop.c | 38 +++++++++++++++++++++++++++----------- 1 file changed, 27 insertions(+), 11 deletions(-) diff --git a/net/ipv4/nexthop.c b/net/ipv4/nexthop.c index a0c694583299..2c9036c719b6 100644 --- a/net/ipv4/nexthop.c +++ b/net/ipv4/nexthop.c @@ -1003,16 +1003,32 @@ static size_t nh_nlmsg_size_grp_res(struct nh_group *nhg) nla_total_size_64bit(8);/* NHA_RES_GROUP_UNBALANCED_TIME */ } -static size_t nh_nlmsg_size_grp(struct nexthop *nh) +static size_t nh_nlmsg_size_grp(struct nexthop *nh, u32 op_flags) { struct nh_group *nhg = rtnl_dereference(nh->nh_grp); size_t sz = sizeof(struct nexthop_grp) * nhg->num_nh; size_t tot = nla_total_size(sz) + - nla_total_size(2); /* NHA_GROUP_TYPE */ + nla_total_size(2) + /* NHA_GROUP_TYPE */ + nla_total_size(0); /* NHA_FDB */ if (nhg->resilient) tot += nh_nlmsg_size_grp_res(nhg); + if (op_flags & NHA_OP_FLAG_DUMP_STATS) { + tot += nla_total_size(0) + /* NHA_GROUP_STATS */ + nla_total_size(4); /* NHA_HW_STATS_ENABLE */ + tot += nhg->num_nh * + (nla_total_size(0) + /* NHA_GROUP_STATS_ENTRY */ + nla_total_size(4) + /* NHA_GROUP_STATS_ENTRY_ID */ + nla_total_size_64bit(8)); /* NHA_GROUP_STATS_ENTRY_PACKETS */ + + if (op_flags & NHA_OP_FLAG_DUMP_HW_STATS) { + tot += nhg->num_nh * + nla_total_size_64bit(8); /* NHA_GROUP_STATS_ENTRY_PACKETS_HW */ + tot += nla_total_size(4); /* NHA_HW_STATS_USED */ + } + } + return tot; } @@ -1047,14 +1063,14 @@ static size_t nh_nlmsg_size_single(struct nexthop *nh) return sz; } -static size_t nh_nlmsg_size(struct nexthop *nh) +static size_t nh_nlmsg_size(struct nexthop *nh, u32 op_flags) { size_t sz = NLMSG_ALIGN(sizeof(struct nhmsg)); sz += nla_total_size(4); /* NHA_ID */ if (nh->is_group) - sz += nh_nlmsg_size_grp(nh) + + sz += nh_nlmsg_size_grp(nh, op_flags) + nla_total_size(4) + /* NHA_OP_FLAGS */ 0; else @@ -1070,7 +1086,7 @@ static void nexthop_notify(int event, struct nexthop *nh, struct nl_info *info) struct sk_buff *skb; int err = -ENOBUFS; - skb = nlmsg_new(nh_nlmsg_size(nh), gfp_any()); + skb = nlmsg_new(nh_nlmsg_size(nh, 0), gfp_any()); if (!skb) goto errout; @@ -3376,15 +3392,15 @@ static int rtm_get_nexthop(struct sk_buff *in_skb, struct nlmsghdr *nlh, if (err) return err; - err = -ENOBUFS; - skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); - if (!skb) - goto out; - err = -ENOENT; nh = nexthop_find_by_id(net, id); if (!nh) - goto errout_free; + goto out; + + err = -ENOBUFS; + skb = nlmsg_new(nh_nlmsg_size(nh, op_flags), GFP_KERNEL); + if (!skb) + goto out; err = nh_fill_node(skb, nh, RTM_NEWNEXTHOP, NETLINK_CB(in_skb).portid, nlh->nlmsg_seq, 0, op_flags); From fde29fd9349327acc50d19a0b5f3d5a6c964dfd8 Mon Sep 17 00:00:00 2001 From: Yiqi Sun Date: Thu, 2 Apr 2026 15:04:19 +0800 Subject: [PATCH 096/248] ipv4: icmp: fix null-ptr-deref in icmp_build_probe() ipv6_stub->ipv6_dev_find() may return ERR_PTR(-EAFNOSUPPORT) when the IPv6 stack is not active (CONFIG_IPV6=m and not loaded), and passing this error pointer to dev_hold() will cause a kernel crash with null-ptr-deref. Instead, silently discard the request. RFC 8335 does not appear to define a specific response for the case where an IPv6 interface identifier is syntactically valid but the implementation cannot perform the lookup at runtime, and silently dropping the request may safer than misreporting "No Such Interface". Fixes: d329ea5bd884 ("icmp: add response to RFC 8335 PROBE messages") Signed-off-by: Yiqi Sun Link: https://patch.msgid.link/20260402070419.2291578-1-sunyiqixm@gmail.com Signed-off-by: Jakub Kicinski --- net/ipv4/icmp.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index 568bd1e95d44..4e2a6c70dcd8 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -1346,6 +1346,13 @@ bool icmp_build_probe(struct sk_buff *skb, struct icmphdr *icmphdr) if (iio->ident.addr.ctype3_hdr.addrlen != sizeof(struct in6_addr)) goto send_mal_query; dev = ipv6_stub->ipv6_dev_find(net, &iio->ident.addr.ip_addr.ipv6_addr, dev); + /* + * If IPv6 identifier lookup is unavailable, silently + * discard the request instead of misreporting NO_IF. + */ + if (IS_ERR(dev)) + return false; + dev_hold(dev); break; #endif From 77facb35227c421467cdb49268de433168c2dcef Mon Sep 17 00:00:00 2001 From: Chris J Arges Date: Thu, 2 Apr 2026 17:23:16 -0500 Subject: [PATCH 097/248] net: increase IP_TUNNEL_RECURSION_LIMIT to 5 In configurations with multiple tunnel layers and MPLS lwtunnel routing, a single tunnel hop can increment the counter beyond this limit. This causes packets to be dropped with the "Dead loop on virtual device" message even when a routing loop doesn't exist. Increase IP_TUNNEL_RECURSION_LIMIT from 4 to 5 to handle this use-case. Fixes: 6f1a9140ecda ("net: add xmit recursion limit to tunnel xmit functions") Link: https://lore.kernel.org/netdev/88deb91b-ef1b-403c-8eeb-0f971f27e34f@redhat.com/ Signed-off-by: Chris J Arges Link: https://patch.msgid.link/20260402222401.3408368-1-carges@cloudflare.com Signed-off-by: Jakub Kicinski --- include/net/ip_tunnels.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h index 1f577a4f8ce9..d708b66e55cd 100644 --- a/include/net/ip_tunnels.h +++ b/include/net/ip_tunnels.h @@ -32,7 +32,7 @@ * recursion involves route lookups and full IP output, consuming much * more stack per level, so a lower limit is needed. */ -#define IP_TUNNEL_RECURSION_LIMIT 4 +#define IP_TUNNEL_RECURSION_LIMIT 5 /* Keep error state on tunnel for 30 sec */ #define IPTUNNEL_ERR_TIMEO (30*HZ) From 5c14a19d5b1645cce1cb1252833d70b23635b632 Mon Sep 17 00:00:00 2001 From: Pengpeng Hou Date: Thu, 2 Apr 2026 12:21:48 +0800 Subject: [PATCH 098/248] nfc: s3fwrn5: allocate rx skb before consuming bytes s3fwrn82_uart_read() reports the number of accepted bytes to the serdev core. The current code consumes bytes into recv_skb and may already deliver a complete frame before allocating a fresh receive buffer. If that alloc_skb() fails, the callback returns 0 even though it has already consumed bytes, and it leaves recv_skb as NULL for the next receive callback. That breaks the receive_buf() accounting contract and can also lead to a NULL dereference on the next skb_put_u8(). Allocate the receive skb lazily before consuming the next byte instead. If allocation fails, return the number of bytes already accepted. Fixes: 3f52c2cb7e3a ("nfc: s3fwrn5: Support a UART interface") Signed-off-by: Pengpeng Hou Link: https://patch.msgid.link/20260402042148.65236-1-pengpeng@iscas.ac.cn Signed-off-by: Jakub Kicinski --- drivers/nfc/s3fwrn5/uart.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/drivers/nfc/s3fwrn5/uart.c b/drivers/nfc/s3fwrn5/uart.c index 9c09c10c2a46..4ee481bd7e96 100644 --- a/drivers/nfc/s3fwrn5/uart.c +++ b/drivers/nfc/s3fwrn5/uart.c @@ -58,6 +58,12 @@ static size_t s3fwrn82_uart_read(struct serdev_device *serdev, size_t i; for (i = 0; i < count; i++) { + if (!phy->recv_skb) { + phy->recv_skb = alloc_skb(NCI_SKB_BUFF_LEN, GFP_KERNEL); + if (!phy->recv_skb) + return i; + } + skb_put_u8(phy->recv_skb, *data++); if (phy->recv_skb->len < S3FWRN82_NCI_HEADER) @@ -69,9 +75,7 @@ static size_t s3fwrn82_uart_read(struct serdev_device *serdev, s3fwrn5_recv_frame(phy->common.ndev, phy->recv_skb, phy->common.mode); - phy->recv_skb = alloc_skb(NCI_SKB_BUFF_LEN, GFP_KERNEL); - if (!phy->recv_skb) - return 0; + phy->recv_skb = NULL; } return i; From 1345e9f4e3f3bc7d8a0a2138ae29e205a857a555 Mon Sep 17 00:00:00 2001 From: Jon Hunter Date: Wed, 1 Apr 2026 11:29:39 +0100 Subject: [PATCH 099/248] net: stmmac: Fix PTP ref clock for Tegra234 Since commit 030ce919e114 ("net: stmmac: make sure that ptp_rate is not 0 before configuring timestamping") was added the following error is observed on Tegra234: ERR KERN tegra-mgbe 6800000.ethernet eth0: Invalid PTP clock rate WARNING KERN tegra-mgbe 6800000.ethernet eth0: PTP init failed It turns out that the Tegra234 device-tree binding defines the PTP ref clock name as 'ptp-ref' and not 'ptp_ref' and the above commit now exposes this and that the PTP clock is not configured correctly. In order to update device-tree to use the correct 'ptp_ref' name, update the Tegra MGBE driver to use 'ptp_ref' by default and fallback to using 'ptp-ref' if this clock name is present. Fixes: d8ca113724e7 ("net: stmmac: tegra: Add MGBE support") Signed-off-by: Jon Hunter Reviewed-by: Simon Horman Link: https://patch.msgid.link/20260401102941.17466-2-jonathanh@nvidia.com Signed-off-by: Jakub Kicinski --- .../net/ethernet/stmicro/stmmac/dwmac-tegra.c | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-tegra.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-tegra.c index d765acbe3754..21a0a11fc011 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-tegra.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-tegra.c @@ -9,7 +9,7 @@ #include "stmmac_platform.h" static const char *const mgbe_clks[] = { - "rx-pcs", "tx", "tx-pcs", "mac-divider", "mac", "mgbe", "ptp-ref", "mac" + "rx-pcs", "tx", "tx-pcs", "mac-divider", "mac", "mgbe", "ptp_ref", "mac" }; struct tegra_mgbe { @@ -215,6 +215,7 @@ static int tegra_mgbe_probe(struct platform_device *pdev) { struct plat_stmmacenet_data *plat; struct stmmac_resources res; + bool use_legacy_ptp = false; struct tegra_mgbe *mgbe; int irq, err, i; u32 value; @@ -257,9 +258,23 @@ static int tegra_mgbe_probe(struct platform_device *pdev) if (!mgbe->clks) return -ENOMEM; - for (i = 0; i < ARRAY_SIZE(mgbe_clks); i++) + /* Older device-trees use 'ptp-ref' rather than 'ptp_ref'. + * Fall back when the legacy name is present. + */ + if (of_property_match_string(pdev->dev.of_node, "clock-names", + "ptp-ref") >= 0) + use_legacy_ptp = true; + + for (i = 0; i < ARRAY_SIZE(mgbe_clks); i++) { mgbe->clks[i].id = mgbe_clks[i]; + if (use_legacy_ptp && !strcmp(mgbe_clks[i], "ptp_ref")) { + dev_warn(mgbe->dev, + "Device-tree update needed for PTP clock!\n"); + mgbe->clks[i].id = "ptp-ref"; + } + } + err = devm_clk_bulk_get(mgbe->dev, ARRAY_SIZE(mgbe_clks), mgbe->clks); if (err < 0) return err; From fb22b1fc5bca3c0aad95388933497ceb30f1fb26 Mon Sep 17 00:00:00 2001 From: Jon Hunter Date: Wed, 1 Apr 2026 11:29:40 +0100 Subject: [PATCH 100/248] dt-bindings: net: Fix Tegra234 MGBE PTP clock The PTP clock for the Tegra234 MGBE device is incorrectly named 'ptp-ref' and should be 'ptp_ref'. This is causing the following warning to be observed on Tegra234 platforms that use this device: ERR KERN tegra-mgbe 6800000.ethernet eth0: Invalid PTP clock rate WARNING KERN tegra-mgbe 6800000.ethernet eth0: PTP init failed Although this constitutes an ABI breakage in the binding for this device, PTP support has clearly never worked and so fix this now so we can correct the device-tree for this device. Note that the MGBE driver still supports the legacy 'ptp-ref' clock name and so older/existing device-trees will still work, but given that this is not the correct name, there is no point to advertise this in the binding. Fixes: 189c2e5c7669 ("dt-bindings: net: Add Tegra234 MGBE") Signed-off-by: Jon Hunter Reviewed-by: Krzysztof Kozlowski Link: https://patch.msgid.link/20260401102941.17466-3-jonathanh@nvidia.com Signed-off-by: Jakub Kicinski --- .../devicetree/bindings/net/nvidia,tegra234-mgbe.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Documentation/devicetree/bindings/net/nvidia,tegra234-mgbe.yaml b/Documentation/devicetree/bindings/net/nvidia,tegra234-mgbe.yaml index 2bd3efff2485..215f14d1897d 100644 --- a/Documentation/devicetree/bindings/net/nvidia,tegra234-mgbe.yaml +++ b/Documentation/devicetree/bindings/net/nvidia,tegra234-mgbe.yaml @@ -42,7 +42,7 @@ properties: - const: mgbe - const: mac - const: mac-divider - - const: ptp-ref + - const: ptp_ref - const: rx-input-m - const: rx-input - const: tx @@ -133,7 +133,7 @@ examples: <&bpmp TEGRA234_CLK_MGBE0_RX_PCS_M>, <&bpmp TEGRA234_CLK_MGBE0_RX_PCS>, <&bpmp TEGRA234_CLK_MGBE0_TX_PCS>; - clock-names = "mgbe", "mac", "mac-divider", "ptp-ref", "rx-input-m", + clock-names = "mgbe", "mac", "mac-divider", "ptp_ref", "rx-input-m", "rx-input", "tx", "eee-pcs", "rx-pcs-input", "rx-pcs-m", "rx-pcs", "tx-pcs"; resets = <&bpmp TEGRA234_RESET_MGBE0_MAC>, From b6422dff0e518245019233432b6bccfc30b73e2f Mon Sep 17 00:00:00 2001 From: Sahil Chandna Date: Fri, 3 Apr 2026 05:09:29 -0700 Subject: [PATCH 101/248] PCI: hv: Fix double ida_free in hv_pci_probe error path If hv_pci_probe() fails after storing the domain number in hbus->bridge->domain_nr, there is a call to free this domain_nr via pci_bus_release_emul_domain_nr(), however, during cleanup, the bridge release callback pci_release_host_bridge_dev() also frees the domain_nr causing ida_free to be called on same ID twice and triggering following warning: ida_free called for id=28971 which is not allocated. WARNING: lib/idr.c:594 at ida_free+0xdf/0x160, CPU#0: kworker/0:2/198 Call Trace: pci_bus_release_emul_domain_nr+0x17/0x20 pci_release_host_bridge_dev+0x4b/0x60 device_release+0x3b/0xa0 kobject_put+0x8e/0x220 devm_pci_alloc_host_bridge_release+0xe/0x20 devres_release_all+0x9a/0xd0 device_unbind_cleanup+0x12/0xa0 really_probe+0x1c5/0x3f0 vmbus_add_channel_work+0x135/0x1a0 Fix this by letting pci core handle the free domain_nr and remove the explicit free called in pci-hyperv driver. Fixes: bcce8c74f1ce ("PCI: Enable host bridge emulation for PCI_DOMAINS_GENERIC platforms") Signed-off-by: Sahil Chandna Reviewed-by: Manivannan Sadhasivam Reviewed-by: Saurabh Sengar Signed-off-by: Wei Liu --- drivers/pci/controller/pci-hyperv.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/pci/controller/pci-hyperv.c b/drivers/pci/controller/pci-hyperv.c index 38a790f642a1..49c0a2d51162 100644 --- a/drivers/pci/controller/pci-hyperv.c +++ b/drivers/pci/controller/pci-hyperv.c @@ -3786,7 +3786,7 @@ static int hv_pci_probe(struct hv_device *hdev, hbus->bridge->domain_nr); if (!hbus->wq) { ret = -ENOMEM; - goto free_dom; + goto free_bus; } hdev->channel->next_request_id_callback = vmbus_next_request_id; @@ -3882,8 +3882,6 @@ static int hv_pci_probe(struct hv_device *hdev, vmbus_close(hdev->channel); destroy_wq: destroy_workqueue(hbus->wq); -free_dom: - pci_bus_release_emul_domain_nr(hbus->bridge->domain_nr); free_bus: kfree(hbus); return ret; From 16cbec24897624051b324aa3a85859c38ca65fde Mon Sep 17 00:00:00 2001 From: Stanislav Kinsburskii Date: Tue, 24 Mar 2026 23:57:40 +0000 Subject: [PATCH 102/248] mshv: Fix infinite fault loop on permission-denied GPA intercepts Prevent infinite fault loops when guests access memory regions without proper permissions. Currently, mshv_handle_gpa_intercept() attempts to remap pages for all faults on movable memory regions, regardless of whether the access type is permitted. When a guest writes to a read-only region, the remap succeeds but the region remains read-only, causing immediate re-fault and spinning the vCPU indefinitely. Validate intercept access type against region permissions before attempting remaps. Reject writes to non-writable regions and executes to non-executable regions early, returning false to let the VMM handle the intercept appropriately. This also closes a potential DoS vector where malicious guests could intentionally trigger these fault loops to consume host resources. Fixes: b9a66cd5ccbb ("mshv: Add support for movable memory regions") Signed-off-by: Stanislav Kinsburskii Reviewed-by: Anirudh Rayabharam (Microsoft) Signed-off-by: Wei Liu --- drivers/hv/mshv_root_main.c | 15 ++++++++++++--- include/hyperv/hvgdk_mini.h | 6 ++++++ include/hyperv/hvhdk.h | 4 ++-- 3 files changed, 20 insertions(+), 5 deletions(-) diff --git a/drivers/hv/mshv_root_main.c b/drivers/hv/mshv_root_main.c index 6f42423f7faa..c8e5523a52b6 100644 --- a/drivers/hv/mshv_root_main.c +++ b/drivers/hv/mshv_root_main.c @@ -630,7 +630,7 @@ static bool mshv_handle_gpa_intercept(struct mshv_vp *vp) { struct mshv_partition *p = vp->vp_partition; struct mshv_mem_region *region; - bool ret; + bool ret = false; u64 gfn; #if defined(CONFIG_X86_64) struct hv_x64_memory_intercept_message *msg = @@ -641,6 +641,8 @@ static bool mshv_handle_gpa_intercept(struct mshv_vp *vp) (struct hv_arm64_memory_intercept_message *) vp->vp_intercept_msg_page->u.payload; #endif + enum hv_intercept_access_type access_type = + msg->header.intercept_access_type; gfn = HVPFN_DOWN(msg->guest_physical_address); @@ -648,12 +650,19 @@ static bool mshv_handle_gpa_intercept(struct mshv_vp *vp) if (!region) return false; + if (access_type == HV_INTERCEPT_ACCESS_WRITE && + !(region->hv_map_flags & HV_MAP_GPA_WRITABLE)) + goto put_region; + + if (access_type == HV_INTERCEPT_ACCESS_EXECUTE && + !(region->hv_map_flags & HV_MAP_GPA_EXECUTABLE)) + goto put_region; + /* Only movable memory ranges are supported for GPA intercepts */ if (region->mreg_type == MSHV_REGION_TYPE_MEM_MOVABLE) ret = mshv_region_handle_gfn_fault(region, gfn); - else - ret = false; +put_region: mshv_region_put(region); return ret; diff --git a/include/hyperv/hvgdk_mini.h b/include/hyperv/hvgdk_mini.h index 1823a290a7b7..f9600f87186a 100644 --- a/include/hyperv/hvgdk_mini.h +++ b/include/hyperv/hvgdk_mini.h @@ -1533,4 +1533,10 @@ struct hv_mmio_write_input { u8 data[HV_HYPERCALL_MMIO_MAX_DATA_LENGTH]; } __packed; +enum hv_intercept_access_type { + HV_INTERCEPT_ACCESS_READ = 0, + HV_INTERCEPT_ACCESS_WRITE = 1, + HV_INTERCEPT_ACCESS_EXECUTE = 2 +}; + #endif /* _HV_HVGDK_MINI_H */ diff --git a/include/hyperv/hvhdk.h b/include/hyperv/hvhdk.h index 245f3db53bf1..5e83d3714966 100644 --- a/include/hyperv/hvhdk.h +++ b/include/hyperv/hvhdk.h @@ -779,7 +779,7 @@ struct hv_x64_intercept_message_header { u32 vp_index; u8 instruction_length:4; u8 cr8:4; /* Only set for exo partitions */ - u8 intercept_access_type; + u8 intercept_access_type; /* enum hv_intercept_access_type */ union hv_x64_vp_execution_state execution_state; struct hv_x64_segment_register cs_segment; u64 rip; @@ -825,7 +825,7 @@ union hv_arm64_vp_execution_state { struct hv_arm64_intercept_message_header { u32 vp_index; u8 instruction_length; - u8 intercept_access_type; + u8 intercept_access_type; /* enum hv_intercept_access_type */ union hv_arm64_vp_execution_state execution_state; u64 pc; u64 cpsr; From a621d9cdc8d08bd2fe8dfe6fa6897d256de8248f Mon Sep 17 00:00:00 2001 From: Paul Walmsley Date: Sat, 4 Apr 2026 18:40:57 -0600 Subject: [PATCH 103/248] riscv: ptrace: cfi: fix "PRACE" typo in uapi header A CFI-related macro defined in arch/riscv/uapi/asm/ptrace.h misspells "PTRACE" as "PRACE"; fix this. Fixes: 2af7c9cf021c ("riscv/ptrace: expose riscv CFI status and state via ptrace and in core files") Cc: Deepak Gupta Signed-off-by: Paul Walmsley --- arch/riscv/include/uapi/asm/ptrace.h | 2 +- arch/riscv/kernel/ptrace.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/riscv/include/uapi/asm/ptrace.h b/arch/riscv/include/uapi/asm/ptrace.h index 70a74adad914..69d71443e111 100644 --- a/arch/riscv/include/uapi/asm/ptrace.h +++ b/arch/riscv/include/uapi/asm/ptrace.h @@ -146,7 +146,7 @@ struct __sc_riscv_cfi_state { #define PTRACE_CFI_SS_LOCK_STATE _BITUL(PTRACE_CFI_SS_LOCK_BIT) #define PTRACE_CFI_SS_PTR_STATE _BITUL(PTRACE_CFI_SS_PTR_BIT) -#define PRACE_CFI_STATE_INVALID_MASK ~(PTRACE_CFI_LP_EN_STATE | \ +#define PTRACE_CFI_STATE_INVALID_MASK ~(PTRACE_CFI_LP_EN_STATE | \ PTRACE_CFI_LP_LOCK_STATE | \ PTRACE_CFI_ELP_STATE | \ PTRACE_CFI_SS_EN_STATE | \ diff --git a/arch/riscv/kernel/ptrace.c b/arch/riscv/kernel/ptrace.c index e592bd6b7665..2704a532e916 100644 --- a/arch/riscv/kernel/ptrace.c +++ b/arch/riscv/kernel/ptrace.c @@ -351,7 +351,7 @@ static int riscv_cfi_set(struct task_struct *target, if ((user_cfi.cfi_status.cfi_state & (PTRACE_CFI_LP_EN_STATE | PTRACE_CFI_LP_LOCK_STATE | PTRACE_CFI_SS_EN_STATE | PTRACE_CFI_SS_LOCK_STATE)) || - (user_cfi.cfi_status.cfi_state & PRACE_CFI_STATE_INVALID_MASK)) + (user_cfi.cfi_status.cfi_state & PTRACE_CFI_STATE_INVALID_MASK)) return -EINVAL; /* If lpad is enabled on target and ptrace requests to set / clear elp, do that */ From a6ede084c4b7cd6ecd0d31d5292336e556901bd7 Mon Sep 17 00:00:00 2001 From: Zong Li Date: Sat, 4 Apr 2026 18:40:58 -0600 Subject: [PATCH 104/248] riscv: cfi: clear CFI lock status in start_thread() When libc locks the CFI status through the following prctl: - PR_LOCK_SHADOW_STACK_STATUS - PR_LOCK_INDIR_BR_LP_STATUS A newly execd address space will inherit the lock status if it does not clear the lock bits. Since the lock bits remain set, libc will later fail to enable the landing pad and shadow stack. Signed-off-by: Zong Li Link: https://patch.msgid.link/20260323065640.4045713-1-zong.li@sifive.com [pjw@kernel.org: ensure we unlock before changing state; cleaned up subject line] Signed-off-by: Paul Walmsley --- arch/riscv/include/asm/usercfi.h | 8 ++++---- arch/riscv/kernel/process.c | 2 ++ arch/riscv/kernel/usercfi.c | 12 ++++++------ 3 files changed, 12 insertions(+), 10 deletions(-) diff --git a/arch/riscv/include/asm/usercfi.h b/arch/riscv/include/asm/usercfi.h index 7495baae1e3c..f56966edbf5c 100644 --- a/arch/riscv/include/asm/usercfi.h +++ b/arch/riscv/include/asm/usercfi.h @@ -39,7 +39,7 @@ void set_active_shstk(struct task_struct *task, unsigned long shstk_addr); bool is_shstk_enabled(struct task_struct *task); bool is_shstk_locked(struct task_struct *task); bool is_shstk_allocated(struct task_struct *task); -void set_shstk_lock(struct task_struct *task); +void set_shstk_lock(struct task_struct *task, bool lock); void set_shstk_status(struct task_struct *task, bool enable); unsigned long get_active_shstk(struct task_struct *task); int restore_user_shstk(struct task_struct *tsk, unsigned long shstk_ptr); @@ -47,7 +47,7 @@ int save_user_shstk(struct task_struct *tsk, unsigned long *saved_shstk_ptr); bool is_indir_lp_enabled(struct task_struct *task); bool is_indir_lp_locked(struct task_struct *task); void set_indir_lp_status(struct task_struct *task, bool enable); -void set_indir_lp_lock(struct task_struct *task); +void set_indir_lp_lock(struct task_struct *task, bool lock); #define PR_SHADOW_STACK_SUPPORTED_STATUS_MASK (PR_SHADOW_STACK_ENABLE) @@ -69,7 +69,7 @@ void set_indir_lp_lock(struct task_struct *task); #define is_shstk_allocated(task) false -#define set_shstk_lock(task) do {} while (0) +#define set_shstk_lock(task, lock) do {} while (0) #define set_shstk_status(task, enable) do {} while (0) @@ -79,7 +79,7 @@ void set_indir_lp_lock(struct task_struct *task); #define set_indir_lp_status(task, enable) do {} while (0) -#define set_indir_lp_lock(task) do {} while (0) +#define set_indir_lp_lock(task, lock) do {} while (0) #define restore_user_shstk(tsk, shstk_ptr) -EINVAL diff --git a/arch/riscv/kernel/process.c b/arch/riscv/kernel/process.c index 5957effab57c..b2df7f72241a 100644 --- a/arch/riscv/kernel/process.c +++ b/arch/riscv/kernel/process.c @@ -160,6 +160,7 @@ void start_thread(struct pt_regs *regs, unsigned long pc, * clear shadow stack state on exec. * libc will set it later via prctl. */ + set_shstk_lock(current, false); set_shstk_status(current, false); set_shstk_base(current, 0, 0); set_active_shstk(current, 0); @@ -167,6 +168,7 @@ void start_thread(struct pt_regs *regs, unsigned long pc, * disable indirect branch tracking on exec. * libc will enable it later via prctl. */ + set_indir_lp_lock(current, false); set_indir_lp_status(current, false); #ifdef CONFIG_64BIT diff --git a/arch/riscv/kernel/usercfi.c b/arch/riscv/kernel/usercfi.c index 1adba746f164..9052171c1a8c 100644 --- a/arch/riscv/kernel/usercfi.c +++ b/arch/riscv/kernel/usercfi.c @@ -74,9 +74,9 @@ void set_shstk_status(struct task_struct *task, bool enable) csr_write(CSR_ENVCFG, task->thread.envcfg); } -void set_shstk_lock(struct task_struct *task) +void set_shstk_lock(struct task_struct *task, bool lock) { - task->thread_info.user_cfi_state.ubcfi_locked = 1; + task->thread_info.user_cfi_state.ubcfi_locked = lock; } bool is_indir_lp_enabled(struct task_struct *task) @@ -104,9 +104,9 @@ void set_indir_lp_status(struct task_struct *task, bool enable) csr_write(CSR_ENVCFG, task->thread.envcfg); } -void set_indir_lp_lock(struct task_struct *task) +void set_indir_lp_lock(struct task_struct *task, bool lock) { - task->thread_info.user_cfi_state.ufcfi_locked = 1; + task->thread_info.user_cfi_state.ufcfi_locked = lock; } /* * If size is 0, then to be compatible with regular stack we want it to be as big as @@ -452,7 +452,7 @@ int arch_lock_shadow_stack_status(struct task_struct *task, !is_shstk_enabled(task) || arg != 0) return -EINVAL; - set_shstk_lock(task); + set_shstk_lock(task, true); return 0; } @@ -502,7 +502,7 @@ int arch_lock_indir_br_lp_status(struct task_struct *task, !is_indir_lp_enabled(task) || arg != 0) return -EINVAL; - set_indir_lp_lock(task); + set_indir_lp_lock(task, true); return 0; } From ac4e61c730d778f8d8b8455da052952dbf8e0317 Mon Sep 17 00:00:00 2001 From: Paul Walmsley Date: Sat, 4 Apr 2026 18:40:58 -0600 Subject: [PATCH 105/248] riscv: ptrace: expand "LP" references to "branch landing pads" in uapi headers Per Linus' comments about the unreadability of abbreviations such as "LP", rename the RISC-V ptrace landing pad CFI macro names to be more explicit. This primarily involves expanding "LP" in the names to some variant of "branch landing pad." Link: https://lore.kernel.org/linux-riscv/CAHk-=whhSLGZAx3N5jJpb4GLFDqH_QvS07D+6BnkPWmCEzTAgw@mail.gmail.com/ Cc: Deepak Gupta Signed-off-by: Paul Walmsley --- arch/riscv/include/uapi/asm/ptrace.h | 20 +++++++++++--------- arch/riscv/kernel/ptrace.c | 10 +++++----- tools/testing/selftests/riscv/cfi/cfitests.c | 5 +++-- 3 files changed, 19 insertions(+), 16 deletions(-) diff --git a/arch/riscv/include/uapi/asm/ptrace.h b/arch/riscv/include/uapi/asm/ptrace.h index 69d71443e111..9985bd58148e 100644 --- a/arch/riscv/include/uapi/asm/ptrace.h +++ b/arch/riscv/include/uapi/asm/ptrace.h @@ -132,23 +132,25 @@ struct __sc_riscv_cfi_state { unsigned long ss_ptr; /* shadow stack pointer */ }; -#define PTRACE_CFI_LP_EN_BIT 0 -#define PTRACE_CFI_LP_LOCK_BIT 1 -#define PTRACE_CFI_ELP_BIT 2 +#define PTRACE_CFI_BRANCH_LANDING_PAD_EN_BIT 0 +#define PTRACE_CFI_BRANCH_LANDING_PAD_LOCK_BIT 1 +#define PTRACE_CFI_BRANCH_EXPECTED_LANDING_PAD_BIT 2 #define PTRACE_CFI_SS_EN_BIT 3 #define PTRACE_CFI_SS_LOCK_BIT 4 #define PTRACE_CFI_SS_PTR_BIT 5 -#define PTRACE_CFI_LP_EN_STATE _BITUL(PTRACE_CFI_LP_EN_BIT) -#define PTRACE_CFI_LP_LOCK_STATE _BITUL(PTRACE_CFI_LP_LOCK_BIT) -#define PTRACE_CFI_ELP_STATE _BITUL(PTRACE_CFI_ELP_BIT) +#define PTRACE_CFI_BRANCH_LANDING_PAD_EN_STATE _BITUL(PTRACE_CFI_BRANCH_LANDING_PAD_EN_BIT) +#define PTRACE_CFI_BRANCH_LANDING_PAD_LOCK_STATE \ + _BITUL(PTRACE_CFI_BRANCH_LANDING_PAD_LOCK_BIT) +#define PTRACE_CFI_BRANCH_EXPECTED_LANDING_PAD_STATE \ + _BITUL(PTRACE_CFI_BRANCH_EXPECTED_LANDING_PAD_BIT) #define PTRACE_CFI_SS_EN_STATE _BITUL(PTRACE_CFI_SS_EN_BIT) #define PTRACE_CFI_SS_LOCK_STATE _BITUL(PTRACE_CFI_SS_LOCK_BIT) #define PTRACE_CFI_SS_PTR_STATE _BITUL(PTRACE_CFI_SS_PTR_BIT) -#define PTRACE_CFI_STATE_INVALID_MASK ~(PTRACE_CFI_LP_EN_STATE | \ - PTRACE_CFI_LP_LOCK_STATE | \ - PTRACE_CFI_ELP_STATE | \ +#define PTRACE_CFI_STATE_INVALID_MASK ~(PTRACE_CFI_BRANCH_LANDING_PAD_EN_STATE | \ + PTRACE_CFI_BRANCH_LANDING_PAD_LOCK_STATE | \ + PTRACE_CFI_BRANCH_EXPECTED_LANDING_PAD_STATE | \ PTRACE_CFI_SS_EN_STATE | \ PTRACE_CFI_SS_LOCK_STATE | \ PTRACE_CFI_SS_PTR_STATE) diff --git a/arch/riscv/kernel/ptrace.c b/arch/riscv/kernel/ptrace.c index 2704a532e916..b6156625d6f8 100644 --- a/arch/riscv/kernel/ptrace.c +++ b/arch/riscv/kernel/ptrace.c @@ -303,11 +303,11 @@ static int riscv_cfi_get(struct task_struct *target, regs = task_pt_regs(target); if (is_indir_lp_enabled(target)) { - user_cfi.cfi_status.cfi_state |= PTRACE_CFI_LP_EN_STATE; + user_cfi.cfi_status.cfi_state |= PTRACE_CFI_BRANCH_LANDING_PAD_EN_STATE; user_cfi.cfi_status.cfi_state |= is_indir_lp_locked(target) ? - PTRACE_CFI_LP_LOCK_STATE : 0; + PTRACE_CFI_BRANCH_LANDING_PAD_LOCK_STATE : 0; user_cfi.cfi_status.cfi_state |= (regs->status & SR_ELP) ? - PTRACE_CFI_ELP_STATE : 0; + PTRACE_CFI_BRANCH_EXPECTED_LANDING_PAD_STATE : 0; } if (is_shstk_enabled(target)) { @@ -349,7 +349,7 @@ static int riscv_cfi_set(struct task_struct *target, * rsvd field should be set to zero so that if those fields are needed in future */ if ((user_cfi.cfi_status.cfi_state & - (PTRACE_CFI_LP_EN_STATE | PTRACE_CFI_LP_LOCK_STATE | + (PTRACE_CFI_BRANCH_LANDING_PAD_EN_STATE | PTRACE_CFI_BRANCH_LANDING_PAD_LOCK_STATE | PTRACE_CFI_SS_EN_STATE | PTRACE_CFI_SS_LOCK_STATE)) || (user_cfi.cfi_status.cfi_state & PTRACE_CFI_STATE_INVALID_MASK)) return -EINVAL; @@ -357,7 +357,7 @@ static int riscv_cfi_set(struct task_struct *target, /* If lpad is enabled on target and ptrace requests to set / clear elp, do that */ if (is_indir_lp_enabled(target)) { if (user_cfi.cfi_status.cfi_state & - PTRACE_CFI_ELP_STATE) /* set elp state */ + PTRACE_CFI_BRANCH_EXPECTED_LANDING_PAD_STATE) /* set elp state */ regs->status |= SR_ELP; else regs->status &= ~SR_ELP; /* clear elp state */ diff --git a/tools/testing/selftests/riscv/cfi/cfitests.c b/tools/testing/selftests/riscv/cfi/cfitests.c index 298544854415..68374a27ec81 100644 --- a/tools/testing/selftests/riscv/cfi/cfitests.c +++ b/tools/testing/selftests/riscv/cfi/cfitests.c @@ -94,7 +94,7 @@ bool cfi_ptrace_test(void) } switch (ptrace_test_num) { -#define CFI_ENABLE_MASK (PTRACE_CFI_LP_EN_STATE | \ +#define CFI_ENABLE_MASK (PTRACE_CFI_BRANCH_LANDING_PAD_EN_STATE | \ PTRACE_CFI_SS_EN_STATE | \ PTRACE_CFI_SS_PTR_STATE) case 0: @@ -106,7 +106,8 @@ bool cfi_ptrace_test(void) __func__); break; case 1: - if (!(cfi_reg.cfi_status.cfi_state & PTRACE_CFI_ELP_STATE)) + if (!(cfi_reg.cfi_status.cfi_state & + PTRACE_CFI_BRANCH_EXPECTED_LANDING_PAD_STATE)) ksft_exit_fail_msg("%s: elp must have been set\n", __func__); /* clear elp state. not interested in anything else */ cfi_reg.cfi_status.cfi_state = 0; From adfc80dd0d7831335b5105fb3d8747094bf42878 Mon Sep 17 00:00:00 2001 From: Paul Walmsley Date: Sat, 4 Apr 2026 18:40:58 -0600 Subject: [PATCH 106/248] prctl: rename branch landing pad implementation functions to be more explicit Per Linus' comments about the unreadability of abbreviations such as "indir_br_lp", rename the three prctl() implementation functions to be more explicit. This involves renaming "indir_br_lp_status" in the function names to "branch_landing_pad_state". While here, add _prctl_ into the function names, following the speculation control prctl implementation functions. Link: https://lore.kernel.org/linux-riscv/CAHk-=whhSLGZAx3N5jJpb4GLFDqH_QvS07D+6BnkPWmCEzTAgw@mail.gmail.com/ Cc: Deepak Gupta Cc: Linus Torvalds Cc: Mark Brown Signed-off-by: Paul Walmsley --- arch/riscv/kernel/usercfi.c | 16 ++++++++-------- include/linux/cpu.h | 6 +++--- kernel/sys.c | 15 ++++++++------- 3 files changed, 19 insertions(+), 18 deletions(-) diff --git a/arch/riscv/kernel/usercfi.c b/arch/riscv/kernel/usercfi.c index 9052171c1a8c..04ab1eb8df29 100644 --- a/arch/riscv/kernel/usercfi.c +++ b/arch/riscv/kernel/usercfi.c @@ -457,7 +457,8 @@ int arch_lock_shadow_stack_status(struct task_struct *task, return 0; } -int arch_get_indir_br_lp_status(struct task_struct *t, unsigned long __user *status) +int arch_prctl_get_branch_landing_pad_state(struct task_struct *t, + unsigned long __user *state) { unsigned long fcfi_status = 0; @@ -467,10 +468,10 @@ int arch_get_indir_br_lp_status(struct task_struct *t, unsigned long __user *sta /* indirect branch tracking is enabled on the task or not */ fcfi_status |= (is_indir_lp_enabled(t) ? PR_INDIR_BR_LP_ENABLE : 0); - return copy_to_user(status, &fcfi_status, sizeof(fcfi_status)) ? -EFAULT : 0; + return copy_to_user(state, &fcfi_status, sizeof(fcfi_status)) ? -EFAULT : 0; } -int arch_set_indir_br_lp_status(struct task_struct *t, unsigned long status) +int arch_prctl_set_branch_landing_pad_state(struct task_struct *t, unsigned long state) { bool enable_indir_lp = false; @@ -482,24 +483,23 @@ int arch_set_indir_br_lp_status(struct task_struct *t, unsigned long status) return -EINVAL; /* Reject unknown flags */ - if (status & ~PR_INDIR_BR_LP_ENABLE) + if (state & ~PR_INDIR_BR_LP_ENABLE) return -EINVAL; - enable_indir_lp = (status & PR_INDIR_BR_LP_ENABLE); + enable_indir_lp = (state & PR_INDIR_BR_LP_ENABLE); set_indir_lp_status(t, enable_indir_lp); return 0; } -int arch_lock_indir_br_lp_status(struct task_struct *task, - unsigned long arg) +int arch_prctl_lock_branch_landing_pad_state(struct task_struct *task) { /* * If indirect branch tracking is not supported or not enabled on task, * nothing to lock here */ if (!is_user_lpad_enabled() || - !is_indir_lp_enabled(task) || arg != 0) + !is_indir_lp_enabled(task)) return -EINVAL; set_indir_lp_lock(task, true); diff --git a/include/linux/cpu.h b/include/linux/cpu.h index 8239cd95a005..9b6b0d87fdb0 100644 --- a/include/linux/cpu.h +++ b/include/linux/cpu.h @@ -229,8 +229,8 @@ static inline bool cpu_attack_vector_mitigated(enum cpu_attack_vectors v) #define smt_mitigations SMT_MITIGATIONS_OFF #endif -int arch_get_indir_br_lp_status(struct task_struct *t, unsigned long __user *status); -int arch_set_indir_br_lp_status(struct task_struct *t, unsigned long status); -int arch_lock_indir_br_lp_status(struct task_struct *t, unsigned long status); +int arch_prctl_get_branch_landing_pad_state(struct task_struct *t, unsigned long __user *state); +int arch_prctl_set_branch_landing_pad_state(struct task_struct *t, unsigned long state); +int arch_prctl_lock_branch_landing_pad_state(struct task_struct *t); #endif /* _LINUX_CPU_H_ */ diff --git a/kernel/sys.c b/kernel/sys.c index c86eba9aa7e9..a5e0c187bbbf 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -2388,17 +2388,18 @@ int __weak arch_lock_shadow_stack_status(struct task_struct *t, unsigned long st return -EINVAL; } -int __weak arch_get_indir_br_lp_status(struct task_struct *t, unsigned long __user *status) +int __weak arch_prctl_get_branch_landing_pad_state(struct task_struct *t, + unsigned long __user *state) { return -EINVAL; } -int __weak arch_set_indir_br_lp_status(struct task_struct *t, unsigned long status) +int __weak arch_prctl_set_branch_landing_pad_state(struct task_struct *t, unsigned long state) { return -EINVAL; } -int __weak arch_lock_indir_br_lp_status(struct task_struct *t, unsigned long status) +int __weak arch_prctl_lock_branch_landing_pad_state(struct task_struct *t) { return -EINVAL; } @@ -2891,17 +2892,17 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3, case PR_GET_INDIR_BR_LP_STATUS: if (arg3 || arg4 || arg5) return -EINVAL; - error = arch_get_indir_br_lp_status(me, (unsigned long __user *)arg2); + error = arch_prctl_get_branch_landing_pad_state(me, (unsigned long __user *)arg2); break; case PR_SET_INDIR_BR_LP_STATUS: if (arg3 || arg4 || arg5) return -EINVAL; - error = arch_set_indir_br_lp_status(me, arg2); + error = arch_prctl_set_branch_landing_pad_state(me, arg2); break; case PR_LOCK_INDIR_BR_LP_STATUS: - if (arg3 || arg4 || arg5) + if (arg2 || arg3 || arg4 || arg5) return -EINVAL; - error = arch_lock_indir_br_lp_status(me, arg2); + error = arch_prctl_lock_branch_landing_pad_state(me); break; default: trace_task_prctl_unknown(option, arg2, arg3, arg4, arg5); From e5342fe2c1bb5b4fab6ed531a0122c6417e57ecf Mon Sep 17 00:00:00 2001 From: Paul Walmsley Date: Sat, 4 Apr 2026 18:40:58 -0600 Subject: [PATCH 107/248] riscv: ptrace: cfi: expand "SS" references to "shadow stack" in uapi headers Similar to the recent change to expand "LP" to "branch landing pad", let's expand "SS" in the ptrace uapi macros to "shadow stack" as well. This aligns with the existing prctl() arguments, which use the expanded "shadow stack" names, rather than just the abbreviation. Link: https://lore.kernel.org/linux-riscv/CAHk-=whhSLGZAx3N5jJpb4GLFDqH_QvS07D+6BnkPWmCEzTAgw@mail.gmail.com/ Cc: Deepak Gupta Signed-off-by: Paul Walmsley --- arch/riscv/include/uapi/asm/ptrace.h | 18 +++++++++--------- arch/riscv/kernel/ptrace.c | 10 +++++----- tools/testing/selftests/riscv/cfi/cfitests.c | 4 ++-- 3 files changed, 16 insertions(+), 16 deletions(-) diff --git a/arch/riscv/include/uapi/asm/ptrace.h b/arch/riscv/include/uapi/asm/ptrace.h index 9985bd58148e..3de2b7124aff 100644 --- a/arch/riscv/include/uapi/asm/ptrace.h +++ b/arch/riscv/include/uapi/asm/ptrace.h @@ -135,25 +135,25 @@ struct __sc_riscv_cfi_state { #define PTRACE_CFI_BRANCH_LANDING_PAD_EN_BIT 0 #define PTRACE_CFI_BRANCH_LANDING_PAD_LOCK_BIT 1 #define PTRACE_CFI_BRANCH_EXPECTED_LANDING_PAD_BIT 2 -#define PTRACE_CFI_SS_EN_BIT 3 -#define PTRACE_CFI_SS_LOCK_BIT 4 -#define PTRACE_CFI_SS_PTR_BIT 5 +#define PTRACE_CFI_SHADOW_STACK_EN_BIT 3 +#define PTRACE_CFI_SHADOW_STACK_LOCK_BIT 4 +#define PTRACE_CFI_SHADOW_STACK_PTR_BIT 5 #define PTRACE_CFI_BRANCH_LANDING_PAD_EN_STATE _BITUL(PTRACE_CFI_BRANCH_LANDING_PAD_EN_BIT) #define PTRACE_CFI_BRANCH_LANDING_PAD_LOCK_STATE \ _BITUL(PTRACE_CFI_BRANCH_LANDING_PAD_LOCK_BIT) #define PTRACE_CFI_BRANCH_EXPECTED_LANDING_PAD_STATE \ _BITUL(PTRACE_CFI_BRANCH_EXPECTED_LANDING_PAD_BIT) -#define PTRACE_CFI_SS_EN_STATE _BITUL(PTRACE_CFI_SS_EN_BIT) -#define PTRACE_CFI_SS_LOCK_STATE _BITUL(PTRACE_CFI_SS_LOCK_BIT) -#define PTRACE_CFI_SS_PTR_STATE _BITUL(PTRACE_CFI_SS_PTR_BIT) +#define PTRACE_CFI_SHADOW_STACK_EN_STATE _BITUL(PTRACE_CFI_SHADOW_STACK_EN_BIT) +#define PTRACE_CFI_SHADOW_STACK_LOCK_STATE _BITUL(PTRACE_CFI_SHADOW_STACK_LOCK_BIT) +#define PTRACE_CFI_SHADOW_STACK_PTR_STATE _BITUL(PTRACE_CFI_SHADOW_STACK_PTR_BIT) #define PTRACE_CFI_STATE_INVALID_MASK ~(PTRACE_CFI_BRANCH_LANDING_PAD_EN_STATE | \ PTRACE_CFI_BRANCH_LANDING_PAD_LOCK_STATE | \ PTRACE_CFI_BRANCH_EXPECTED_LANDING_PAD_STATE | \ - PTRACE_CFI_SS_EN_STATE | \ - PTRACE_CFI_SS_LOCK_STATE | \ - PTRACE_CFI_SS_PTR_STATE) + PTRACE_CFI_SHADOW_STACK_EN_STATE | \ + PTRACE_CFI_SHADOW_STACK_LOCK_STATE | \ + PTRACE_CFI_SHADOW_STACK_PTR_STATE) struct __cfi_status { __u64 cfi_state; diff --git a/arch/riscv/kernel/ptrace.c b/arch/riscv/kernel/ptrace.c index b6156625d6f8..93de2e7a3074 100644 --- a/arch/riscv/kernel/ptrace.c +++ b/arch/riscv/kernel/ptrace.c @@ -311,10 +311,10 @@ static int riscv_cfi_get(struct task_struct *target, } if (is_shstk_enabled(target)) { - user_cfi.cfi_status.cfi_state |= (PTRACE_CFI_SS_EN_STATE | - PTRACE_CFI_SS_PTR_STATE); + user_cfi.cfi_status.cfi_state |= (PTRACE_CFI_SHADOW_STACK_EN_STATE | + PTRACE_CFI_SHADOW_STACK_PTR_STATE); user_cfi.cfi_status.cfi_state |= is_shstk_locked(target) ? - PTRACE_CFI_SS_LOCK_STATE : 0; + PTRACE_CFI_SHADOW_STACK_LOCK_STATE : 0; user_cfi.shstk_ptr = get_active_shstk(target); } @@ -350,7 +350,7 @@ static int riscv_cfi_set(struct task_struct *target, */ if ((user_cfi.cfi_status.cfi_state & (PTRACE_CFI_BRANCH_LANDING_PAD_EN_STATE | PTRACE_CFI_BRANCH_LANDING_PAD_LOCK_STATE | - PTRACE_CFI_SS_EN_STATE | PTRACE_CFI_SS_LOCK_STATE)) || + PTRACE_CFI_SHADOW_STACK_EN_STATE | PTRACE_CFI_SHADOW_STACK_LOCK_STATE)) || (user_cfi.cfi_status.cfi_state & PTRACE_CFI_STATE_INVALID_MASK)) return -EINVAL; @@ -365,7 +365,7 @@ static int riscv_cfi_set(struct task_struct *target, /* If shadow stack enabled on target, set new shadow stack pointer */ if (is_shstk_enabled(target) && - (user_cfi.cfi_status.cfi_state & PTRACE_CFI_SS_PTR_STATE)) + (user_cfi.cfi_status.cfi_state & PTRACE_CFI_SHADOW_STACK_PTR_STATE)) set_active_shstk(target, user_cfi.shstk_ptr); return 0; diff --git a/tools/testing/selftests/riscv/cfi/cfitests.c b/tools/testing/selftests/riscv/cfi/cfitests.c index 68374a27ec81..0dac74b8553c 100644 --- a/tools/testing/selftests/riscv/cfi/cfitests.c +++ b/tools/testing/selftests/riscv/cfi/cfitests.c @@ -95,8 +95,8 @@ bool cfi_ptrace_test(void) switch (ptrace_test_num) { #define CFI_ENABLE_MASK (PTRACE_CFI_BRANCH_LANDING_PAD_EN_STATE | \ - PTRACE_CFI_SS_EN_STATE | \ - PTRACE_CFI_SS_PTR_STATE) + PTRACE_CFI_SHADOW_STACK_EN_STATE | \ + PTRACE_CFI_SHADOW_STACK_PTR_STATE) case 0: if ((cfi_reg.cfi_status.cfi_state & CFI_ENABLE_MASK) != CFI_ENABLE_MASK) ksft_exit_fail_msg("%s: ptrace_getregset failed, %llu\n", __func__, From 08ee1559052be302f1d3752f48360b89517d9f8d Mon Sep 17 00:00:00 2001 From: Paul Walmsley Date: Sat, 4 Apr 2026 18:40:58 -0600 Subject: [PATCH 108/248] prctl: cfi: change the branch landing pad prctl()s to be more descriptive Per Linus' comments requesting the replacement of "INDIR_BR_LP" in the indirect branch tracking prctl()s with something more readable, and suggesting the use of the speculation control prctl()s as an exemplar, reimplement the prctl()s and related constants that control per-task forward-edge control flow integrity. This primarily involves two changes. First, the prctls are restructured to resemble the style of the speculative execution workaround control prctls PR_{GET,SET}_SPECULATION_CTRL, to make them easier to extend in the future. Second, the "indir_br_lp" abbrevation is expanded to "branch_landing_pads" to be less telegraphic. The kselftest and documentation is adjusted accordingly. Link: https://lore.kernel.org/linux-riscv/CAHk-=whhSLGZAx3N5jJpb4GLFDqH_QvS07D+6BnkPWmCEzTAgw@mail.gmail.com/ Cc: Deepak Gupta Cc: Linus Torvalds Cc: Mark Brown Signed-off-by: Paul Walmsley --- Documentation/arch/riscv/zicfilp.rst | 57 ++++++++++++------- arch/riscv/kernel/usercfi.c | 15 +++-- include/uapi/linux/prctl.h | 37 +++++------- kernel/sys.c | 23 ++++---- .../trace/beauty/include/uapi/linux/prctl.h | 36 +++++------- tools/testing/selftests/riscv/cfi/cfitests.c | 4 +- 6 files changed, 88 insertions(+), 84 deletions(-) diff --git a/Documentation/arch/riscv/zicfilp.rst b/Documentation/arch/riscv/zicfilp.rst index 78a3e01ff68c..ab7d8e62ddaf 100644 --- a/Documentation/arch/riscv/zicfilp.rst +++ b/Documentation/arch/riscv/zicfilp.rst @@ -76,34 +76,49 @@ the program. 4. prctl() enabling -------------------- -:c:macro:`PR_SET_INDIR_BR_LP_STATUS` / :c:macro:`PR_GET_INDIR_BR_LP_STATUS` / -:c:macro:`PR_LOCK_INDIR_BR_LP_STATUS` are three prctls added to manage indirect -branch tracking. These prctls are architecture-agnostic and return -EINVAL if -the underlying functionality is not supported. +Per-task indirect branch tracking state can be monitored and +controlled via the :c:macro:`PR_GET_CFI` and :c:macro:`PR_SET_CFI` +``prctl()` arguments (respectively), by supplying +:c:macro:`PR_CFI_BRANCH_LANDING_PADS` as the second argument. These +are architecture-agnostic, and will return -EINVAL if the underlying +functionality is not supported. -* prctl(PR_SET_INDIR_BR_LP_STATUS, unsigned long arg) +* prctl(:c:macro:`PR_SET_CFI`, :c:macro:`PR_CFI_BRANCH_LANDING_PADS`, unsigned long arg) -If arg1 is :c:macro:`PR_INDIR_BR_LP_ENABLE` and if CPU supports -``zicfilp`` then the kernel will enable indirect branch tracking for the -task. The dynamic loader can issue this :c:macro:`prctl` once it has +arg is a bitmask. + +If :c:macro:`PR_CFI_ENABLE` is set in arg, and the CPU supports +``zicfilp``, then the kernel will enable indirect branch tracking for +the task. The dynamic loader can issue this ``prctl()`` once it has determined that all the objects loaded in the address space support -indirect branch tracking. Additionally, if there is a `dlopen` to an -object which wasn't compiled with ``zicfilp``, the dynamic loader can -issue this prctl with arg1 set to 0 (i.e. :c:macro:`PR_INDIR_BR_LP_ENABLE` -cleared). +indirect branch tracking. -* prctl(PR_GET_INDIR_BR_LP_STATUS, unsigned long * arg) +Indirect branch tracking state can also be locked once enabled. This +prevents the task from subsequently disabling it. This is done by +setting the bit :c:macro:`PR_CFI_LOCK` in arg. Either indirect branch +tracking must already be enabled for the task, or the bit +:c:macro:`PR_CFI_ENABLE` must also be set in arg. This is intended +for environments that wish to run with a strict security posture that +do not wish to load objects without ``zicfilp`` support. -Returns the current status of indirect branch tracking. If enabled -it'll return :c:macro:`PR_INDIR_BR_LP_ENABLE` +Indirect branch tracking can also be disabled for the task, assuming +that it has not previously been enabled and locked. If there is a +``dlopen()`` to an object which wasn't compiled with ``zicfilp``, the +dynamic loader can issue this ``prctl()`` with arg set to +:c:macro:`PR_CFI_DISABLE`. Disabling indirect branch tracking for the +task is not possible if it has previously been enabled and locked. -* prctl(PR_LOCK_INDIR_BR_LP_STATUS, unsigned long arg) -Locks the current status of indirect branch tracking on the task. User -space may want to run with a strict security posture and wouldn't want -loading of objects without ``zicfilp`` support in them, to disallow -disabling of indirect branch tracking. In this case, user space can -use this prctl to lock the current settings. +* prctl(:c:macro:`PR_GET_CFI`, :c:macro:`PR_CFI_BRANCH_LANDING_PADS`, unsigned long * arg) + +Returns the current status of indirect branch tracking into a bitmask +stored into the memory location pointed to by arg. The bitmask will +have the :c:macro:`PR_CFI_ENABLE` bit set if indirect branch tracking +is currently enabled for the task, and if it is locked, will +additionally have the :c:macro:`PR_CFI_LOCK` bit set. If indirect +branch tracking is currently disabled for the task, the +:c:macro:`PR_CFI_DISABLE` bit will be set. + 5. violations related to indirect branch tracking -------------------------------------------------- diff --git a/arch/riscv/kernel/usercfi.c b/arch/riscv/kernel/usercfi.c index 04ab1eb8df29..2c535737511d 100644 --- a/arch/riscv/kernel/usercfi.c +++ b/arch/riscv/kernel/usercfi.c @@ -465,16 +465,14 @@ int arch_prctl_get_branch_landing_pad_state(struct task_struct *t, if (!is_user_lpad_enabled()) return -EINVAL; - /* indirect branch tracking is enabled on the task or not */ - fcfi_status |= (is_indir_lp_enabled(t) ? PR_INDIR_BR_LP_ENABLE : 0); + fcfi_status = (is_indir_lp_enabled(t) ? PR_CFI_ENABLE : PR_CFI_DISABLE); + fcfi_status |= (is_indir_lp_locked(t) ? PR_CFI_LOCK : 0); return copy_to_user(state, &fcfi_status, sizeof(fcfi_status)) ? -EFAULT : 0; } int arch_prctl_set_branch_landing_pad_state(struct task_struct *t, unsigned long state) { - bool enable_indir_lp = false; - if (!is_user_lpad_enabled()) return -EINVAL; @@ -482,12 +480,13 @@ int arch_prctl_set_branch_landing_pad_state(struct task_struct *t, unsigned long if (is_indir_lp_locked(t)) return -EINVAL; - /* Reject unknown flags */ - if (state & ~PR_INDIR_BR_LP_ENABLE) + if (!(state & (PR_CFI_ENABLE | PR_CFI_DISABLE))) return -EINVAL; - enable_indir_lp = (state & PR_INDIR_BR_LP_ENABLE); - set_indir_lp_status(t, enable_indir_lp); + if (state & PR_CFI_ENABLE && state & PR_CFI_DISABLE) + return -EINVAL; + + set_indir_lp_status(t, !!(state & PR_CFI_ENABLE)); return 0; } diff --git a/include/uapi/linux/prctl.h b/include/uapi/linux/prctl.h index 55b0446fff9d..b6ec6f693719 100644 --- a/include/uapi/linux/prctl.h +++ b/include/uapi/linux/prctl.h @@ -397,30 +397,23 @@ struct prctl_mm_map { # define PR_RSEQ_SLICE_EXT_ENABLE 0x01 /* - * Get the current indirect branch tracking configuration for the current - * thread, this will be the value configured via PR_SET_INDIR_BR_LP_STATUS. + * Get or set the control flow integrity (CFI) configuration for the + * current thread. + * + * Some per-thread control flow integrity settings are not yet + * controlled through this prctl(); see for example + * PR_{GET,SET,LOCK}_SHADOW_STACK_STATUS */ -#define PR_GET_INDIR_BR_LP_STATUS 80 - +#define PR_GET_CFI 80 +#define PR_SET_CFI 81 /* - * Set the indirect branch tracking configuration. PR_INDIR_BR_LP_ENABLE will - * enable cpu feature for user thread, to track all indirect branches and ensure - * they land on arch defined landing pad instruction. - * x86 - If enabled, an indirect branch must land on an ENDBRANCH instruction. - * arch64 - If enabled, an indirect branch must land on a BTI instruction. - * riscv - If enabled, an indirect branch must land on an lpad instruction. - * PR_INDIR_BR_LP_DISABLE will disable feature for user thread and indirect - * branches will no more be tracked by cpu to land on arch defined landing pad - * instruction. + * Forward-edge CFI variants (excluding ARM64 BTI, which has its own + * prctl()s). */ -#define PR_SET_INDIR_BR_LP_STATUS 81 -# define PR_INDIR_BR_LP_ENABLE (1UL << 0) - -/* - * Prevent further changes to the specified indirect branch tracking - * configuration. All bits may be locked via this call, including - * undefined bits. - */ -#define PR_LOCK_INDIR_BR_LP_STATUS 82 +#define PR_CFI_BRANCH_LANDING_PADS 0 +/* Return and control values for PR_{GET,SET}_CFI */ +# define PR_CFI_ENABLE _BITUL(0) +# define PR_CFI_DISABLE _BITUL(1) +# define PR_CFI_LOCK _BITUL(2) #endif /* _LINUX_PRCTL_H */ diff --git a/kernel/sys.c b/kernel/sys.c index a5e0c187bbbf..62e842055cc9 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -2889,20 +2889,23 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3, return -EINVAL; error = rseq_slice_extension_prctl(arg2, arg3); break; - case PR_GET_INDIR_BR_LP_STATUS: - if (arg3 || arg4 || arg5) + case PR_GET_CFI: + if (arg2 != PR_CFI_BRANCH_LANDING_PADS) return -EINVAL; - error = arch_prctl_get_branch_landing_pad_state(me, (unsigned long __user *)arg2); + if (arg4 || arg5) + return -EINVAL; + error = arch_prctl_get_branch_landing_pad_state(me, (unsigned long __user *)arg3); break; - case PR_SET_INDIR_BR_LP_STATUS: - if (arg3 || arg4 || arg5) + case PR_SET_CFI: + if (arg2 != PR_CFI_BRANCH_LANDING_PADS) return -EINVAL; - error = arch_prctl_set_branch_landing_pad_state(me, arg2); - break; - case PR_LOCK_INDIR_BR_LP_STATUS: - if (arg2 || arg3 || arg4 || arg5) + if (arg4 || arg5) return -EINVAL; - error = arch_prctl_lock_branch_landing_pad_state(me); + error = arch_prctl_set_branch_landing_pad_state(me, arg3); + if (error) + break; + if (arg3 & PR_CFI_LOCK && !(arg3 & PR_CFI_DISABLE)) + error = arch_prctl_lock_branch_landing_pad_state(me); break; default: trace_task_prctl_unknown(option, arg2, arg3, arg4, arg5); diff --git a/tools/perf/trace/beauty/include/uapi/linux/prctl.h b/tools/perf/trace/beauty/include/uapi/linux/prctl.h index 55b0446fff9d..560f99bc4782 100644 --- a/tools/perf/trace/beauty/include/uapi/linux/prctl.h +++ b/tools/perf/trace/beauty/include/uapi/linux/prctl.h @@ -397,30 +397,24 @@ struct prctl_mm_map { # define PR_RSEQ_SLICE_EXT_ENABLE 0x01 /* - * Get the current indirect branch tracking configuration for the current - * thread, this will be the value configured via PR_SET_INDIR_BR_LP_STATUS. + * Get or set the control flow integrity (CFI) configuration for the + * current thread. + * + * Some per-thread control flow integrity settings are not yet + * controlled through this prctl(); see for example + * PR_{GET,SET,LOCK}_SHADOW_STACK_STATUS */ -#define PR_GET_INDIR_BR_LP_STATUS 80 - +#define PR_GET_CFI 80 +#define PR_SET_CFI 81 /* - * Set the indirect branch tracking configuration. PR_INDIR_BR_LP_ENABLE will - * enable cpu feature for user thread, to track all indirect branches and ensure - * they land on arch defined landing pad instruction. - * x86 - If enabled, an indirect branch must land on an ENDBRANCH instruction. - * arch64 - If enabled, an indirect branch must land on a BTI instruction. - * riscv - If enabled, an indirect branch must land on an lpad instruction. - * PR_INDIR_BR_LP_DISABLE will disable feature for user thread and indirect - * branches will no more be tracked by cpu to land on arch defined landing pad - * instruction. + * Forward-edge CFI variants (excluding ARM64 BTI, which has its own + * prctl()s). */ -#define PR_SET_INDIR_BR_LP_STATUS 81 -# define PR_INDIR_BR_LP_ENABLE (1UL << 0) +#define PR_CFI_BRANCH_LANDING_PADS 0 +/* Return and control values for PR_{GET,SET}_CFI */ +# define PR_CFI_ENABLE _BITUL(0) +# define PR_CFI_DISABLE _BITUL(1) +# define PR_CFI_LOCK _BITUL(2) -/* - * Prevent further changes to the specified indirect branch tracking - * configuration. All bits may be locked via this call, including - * undefined bits. - */ -#define PR_LOCK_INDIR_BR_LP_STATUS 82 #endif /* _LINUX_PRCTL_H */ diff --git a/tools/testing/selftests/riscv/cfi/cfitests.c b/tools/testing/selftests/riscv/cfi/cfitests.c index 0dac74b8553c..39d097b6881f 100644 --- a/tools/testing/selftests/riscv/cfi/cfitests.c +++ b/tools/testing/selftests/riscv/cfi/cfitests.c @@ -146,11 +146,11 @@ int main(int argc, char *argv[]) * pads for user mode except lighting up a bit in senvcfg via a prctl. * Enable landing pad support throughout the execution of the test binary. */ - ret = my_syscall5(__NR_prctl, PR_GET_INDIR_BR_LP_STATUS, &lpad_status, 0, 0, 0); + ret = my_syscall5(__NR_prctl, PR_GET_CFI, PR_CFI_BRANCH_LANDING_PADS, &lpad_status, 0, 0); if (ret) ksft_exit_fail_msg("Get landing pad status failed with %d\n", ret); - if (!(lpad_status & PR_INDIR_BR_LP_ENABLE)) + if (!(lpad_status & PR_CFI_ENABLE)) ksft_exit_fail_msg("Landing pad is not enabled, should be enabled via glibc\n"); ret = my_syscall5(__NR_prctl, PR_GET_SHADOW_STACK_STATUS, &ss_status, 0, 0, 0); From 0422b07bc4c296b736e240d95d21fbfebbfaa2ca Mon Sep 17 00:00:00 2001 From: Yazen Ghannam Date: Sat, 28 Feb 2026 09:08:14 -0500 Subject: [PATCH 109/248] x86/mce/amd: Filter bogus hardware errors on Zen3 clients MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Users have been observing multiple L3 cache deferred errors after recent kernel rework of deferred error handling.¹ ⁴ The errors are bogus due to inconsistent status values. Also, user verified that bogus MCA_DESTAT values are present on the system even with an older kernel.² The errors seem to be garbage values present in the MCA_DESTAT of some L3 cache banks. These were implicitly ignored before the recent kernel rework because these do not generate a deferred error interrupt. A later revision of the rework patch was merged for v6.19. This naturally filtered out most of the bogus error logs. However, a few signatures still remain.³ Minimize the scope of the filter to the reported CPU family/model/stepping and only for errors which don't have the Enabled bit in the MCi status MSR. ¹ https://lore.kernel.org/20250915010010.3547-1-spasswolf@web.de ² https://lore.kernel.org/6e1eda7dd55f6fa30405edf7b0f75695cf55b237.camel@web.de ³ https://lore.kernel.org/21ba47fa8893b33b94370c2a42e5084cf0d2e975.camel@web.de ⁴ https://lore.kernel.org/r/CAKFB093B2k3sKsGJ_QNX1jVQsaXVFyy=wNwpzCGLOXa_vSDwXw@mail.gmail.com [ bp: Generalize the condition according to which errors are bogus. ] Fixes: 7cb735d7c0cb ("x86/mce: Unify AMD DFR handler with MCA Polling") Closes: https://lore.kernel.org/20250915010010.3547-1-spasswolf@web.de Reported-by: Bert Karwatzki Signed-off-by: Yazen Ghannam Signed-off-by: Borislav Petkov (AMD) Reviewed-by: Mario Limonciello Tested-By: Bert Karwatzki Cc: stable@vger.kernel.org Link: https://lore.kernel.org/20250915010010.3547-1-spasswolf@web.de --- arch/x86/kernel/cpu/mce/amd.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/arch/x86/kernel/cpu/mce/amd.c b/arch/x86/kernel/cpu/mce/amd.c index a030ee4cecc2..28deaba08833 100644 --- a/arch/x86/kernel/cpu/mce/amd.c +++ b/arch/x86/kernel/cpu/mce/amd.c @@ -604,6 +604,14 @@ bool amd_filter_mce(struct mce *m) enum smca_bank_types bank_type = smca_get_bank_type(m->extcpu, m->bank); struct cpuinfo_x86 *c = &boot_cpu_data; + /* Bogus hw errors on Cezanne A0. */ + if (c->x86 == 0x19 && + c->x86_model == 0x50 && + c->x86_stepping == 0x0) { + if (!(m->status & MCI_STATUS_EN)) + return true; + } + /* See Family 17h Models 10h-2Fh Erratum #1114. */ if (c->x86 == 0x17 && c->x86_model >= 0x10 && c->x86_model <= 0x2F && From 51520e03e70d6c73e33ee7cbe0319767d05764fe Mon Sep 17 00:00:00 2001 From: "Borislav Petkov (AMD)" Date: Tue, 31 Mar 2026 14:16:23 +0200 Subject: [PATCH 110/248] EDAC/mc: Fix error path ordering in edac_mc_alloc() When the mci->pvt_info allocation in edac_mc_alloc() fails, the error path will call put_device() which will end up calling the device's release function. However, the init ordering is wrong such that device_initialize() happens *after* the failed allocation and thus the device itself and the release function pointer are not initialized yet when they're called: MCE: In-kernel MCE decoding enabled. ------------[ cut here ]------------ kobject: '(null)': is not initialized, yet kobject_put() is being called. WARNING: lib/kobject.c:734 at kobject_put, CPU#22: systemd-udevd CPU: 22 UID: 0 PID: 538 Comm: systemd-udevd Not tainted 7.0.0-rc1+ #2 PREEMPT(full) RIP: 0010:kobject_put Call Trace: edac_mc_alloc+0xbe/0xe0 [edac_core] amd64_edac_init+0x7a4/0xff0 [amd64_edac] ? __pfx_amd64_edac_init+0x10/0x10 [amd64_edac] do_one_initcall ... Reorder the calling sequence so that the device is initialized and thus the release function pointer is properly set before it can be used. This was found by Claude while reviewing another EDAC patch. Fixes: 0bbb265f7089 ("EDAC/mc: Get rid of silly one-shot struct allocation in edac_mc_alloc()") Reported-by: Claude Code:claude-opus-4.5 Signed-off-by: Borislav Petkov (AMD) Reviewed-by: Qiuxu Zhuo Cc: stable@kernel.org Link: https://patch.msgid.link/20260331121623.4871-1-bp@kernel.org --- drivers/edac/edac_mc.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/edac/edac_mc.c b/drivers/edac/edac_mc.c index 29e9828422bb..11d66333d93b 100644 --- a/drivers/edac/edac_mc.c +++ b/drivers/edac/edac_mc.c @@ -369,13 +369,13 @@ struct mem_ctl_info *edac_mc_alloc(unsigned int mc_num, if (!mci->layers) goto error; + mci->dev.release = mci_release; + device_initialize(&mci->dev); + mci->pvt_info = kzalloc(sz_pvt, GFP_KERNEL); if (!mci->pvt_info) goto error; - mci->dev.release = mci_release; - device_initialize(&mci->dev); - /* setup index and various internal pointers */ mci->mc_idx = mc_num; mci->tot_dimms = tot_dimms; From 4346be6577aaa04586167402ae87bbdbe32484a4 Mon Sep 17 00:00:00 2001 From: Pengpeng Hou Date: Thu, 2 Apr 2026 00:03:15 +0800 Subject: [PATCH 111/248] tracing/probe: reject non-closed empty immediate strings parse_probe_arg() accepts quoted immediate strings and passes the body after the opening quote to __parse_imm_string(). That helper currently computes strlen(str) and immediately dereferences str[len - 1], which underflows when the body is empty and not closed with double-quotation. Reject empty non-closed immediate strings before checking for the closing quote. Link: https://lore.kernel.org/all/20260401160315.88518-1-pengpeng@iscas.ac.cn/ Fixes: a42e3c4de964 ("tracing/probe: Add immediate string parameter support") Signed-off-by: Pengpeng Hou Reviewed-by: Steven Rostedt (Google) Signed-off-by: Masami Hiramatsu (Google) --- kernel/trace/trace_probe.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/trace/trace_probe.c b/kernel/trace/trace_probe.c index e0a5dc86c07e..e1c73065dae5 100644 --- a/kernel/trace/trace_probe.c +++ b/kernel/trace/trace_probe.c @@ -1068,7 +1068,7 @@ static int __parse_imm_string(char *str, char **pbuf, int offs) { size_t len = strlen(str); - if (str[len - 1] != '"') { + if (!len || str[len - 1] != '"') { trace_probe_log_err(offs + len, IMMSTR_NO_CLOSE); return -EINVAL; } From 82d8701b2c930d0e96b0dbc9115a218d791cb0d2 Mon Sep 17 00:00:00 2001 From: Haoze Xie Date: Mon, 6 Apr 2026 21:17:28 +0800 Subject: [PATCH 112/248] batman-adv: hold claim backbone gateways by reference batadv_bla_add_claim() can replace claim->backbone_gw and drop the old gateway's last reference while readers still follow the pointer. The netlink claim dump path dereferences claim->backbone_gw->orig and takes claim->backbone_gw->crc_lock without pinning the underlying backbone gateway. batadv_bla_check_claim() still has the same naked pointer access pattern. Reuse batadv_bla_claim_get_backbone_gw() in both readers so they operate on a stable gateway reference until the read-side work is complete. This keeps the dump and claim-check paths aligned with the lifetime rules introduced for the other BLA claim readers. Fixes: 23721387c409 ("batman-adv: add basic bridge loop avoidance code") Fixes: 04f3f5bf1883 ("batman-adv: add B.A.T.M.A.N. Dump BLA claims via netlink") Cc: stable@vger.kernel.org Reported-by: Yifan Wu Reported-by: Juefei Pu Co-developed-by: Yuan Tan Signed-off-by: Yuan Tan Suggested-by: Xin Liu Signed-off-by: Haoze Xie Signed-off-by: Ao Zhou Signed-off-by: Sven Eckelmann Signed-off-by: Simon Wunderlich --- net/batman-adv/bridge_loop_avoidance.c | 27 +++++++++++++++++--------- 1 file changed, 18 insertions(+), 9 deletions(-) diff --git a/net/batman-adv/bridge_loop_avoidance.c b/net/batman-adv/bridge_loop_avoidance.c index 3dc791c15bf7..648fa97ea913 100644 --- a/net/batman-adv/bridge_loop_avoidance.c +++ b/net/batman-adv/bridge_loop_avoidance.c @@ -2130,6 +2130,7 @@ batadv_bla_claim_dump_entry(struct sk_buff *msg, u32 portid, struct batadv_bla_claim *claim) { const u8 *primary_addr = primary_if->net_dev->dev_addr; + struct batadv_bla_backbone_gw *backbone_gw; u16 backbone_crc; bool is_own; void *hdr; @@ -2145,32 +2146,35 @@ batadv_bla_claim_dump_entry(struct sk_buff *msg, u32 portid, genl_dump_check_consistent(cb, hdr); - is_own = batadv_compare_eth(claim->backbone_gw->orig, - primary_addr); + backbone_gw = batadv_bla_claim_get_backbone_gw(claim); - spin_lock_bh(&claim->backbone_gw->crc_lock); - backbone_crc = claim->backbone_gw->crc; - spin_unlock_bh(&claim->backbone_gw->crc_lock); + is_own = batadv_compare_eth(backbone_gw->orig, primary_addr); + + spin_lock_bh(&backbone_gw->crc_lock); + backbone_crc = backbone_gw->crc; + spin_unlock_bh(&backbone_gw->crc_lock); if (is_own) if (nla_put_flag(msg, BATADV_ATTR_BLA_OWN)) { genlmsg_cancel(msg, hdr); - goto out; + goto put_backbone_gw; } if (nla_put(msg, BATADV_ATTR_BLA_ADDRESS, ETH_ALEN, claim->addr) || nla_put_u16(msg, BATADV_ATTR_BLA_VID, claim->vid) || nla_put(msg, BATADV_ATTR_BLA_BACKBONE, ETH_ALEN, - claim->backbone_gw->orig) || + backbone_gw->orig) || nla_put_u16(msg, BATADV_ATTR_BLA_CRC, backbone_crc)) { genlmsg_cancel(msg, hdr); - goto out; + goto put_backbone_gw; } genlmsg_end(msg, hdr); ret = 0; +put_backbone_gw: + batadv_backbone_gw_put(backbone_gw); out: return ret; } @@ -2448,6 +2452,7 @@ int batadv_bla_backbone_dump(struct sk_buff *msg, struct netlink_callback *cb) bool batadv_bla_check_claim(struct batadv_priv *bat_priv, u8 *addr, unsigned short vid) { + struct batadv_bla_backbone_gw *backbone_gw; struct batadv_bla_claim search_claim; struct batadv_bla_claim *claim = NULL; struct batadv_hard_iface *primary_if = NULL; @@ -2470,9 +2475,13 @@ bool batadv_bla_check_claim(struct batadv_priv *bat_priv, * return false. */ if (claim) { - if (!batadv_compare_eth(claim->backbone_gw->orig, + backbone_gw = batadv_bla_claim_get_backbone_gw(claim); + + if (!batadv_compare_eth(backbone_gw->orig, primary_if->net_dev->dev_addr)) ret = false; + + batadv_backbone_gw_put(backbone_gw); batadv_claim_put(claim); } From f58df566524ebcdfa394329c64f47e3c9257516e Mon Sep 17 00:00:00 2001 From: Baolin Wang Date: Tue, 17 Mar 2026 17:29:55 +0800 Subject: [PATCH 113/248] mm: filemap: fix nr_pages calculation overflow in filemap_map_pages() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When running stress-ng on my Arm64 machine with v7.0-rc3 kernel, I encountered some very strange crash issues showing up as "Bad page state": " [ 734.496287] BUG: Bad page state in process stress-ng-env pfn:415735fb [ 734.496427] page: refcount:0 mapcount:1 mapping:0000000000000000 index:0x4cf316 pfn:0x415735fb [ 734.496434] flags: 0x57fffe000000800(owner_2|node=1|zone=2|lastcpupid=0x3ffff) [ 734.496439] raw: 057fffe000000800 0000000000000000 dead000000000122 0000000000000000 [ 734.496440] raw: 00000000004cf316 0000000000000000 0000000000000000 0000000000000000 [ 734.496442] page dumped because: nonzero mapcount " After analyzing this page’s state, it is hard to understand why the mapcount is not 0 while the refcount is 0, since this page is not where the issue first occurred. By enabling the CONFIG_DEBUG_VM config, I can reproduce the crash as well and captured the first warning where the issue appears: " [ 734.469226] page: refcount:33 mapcount:0 mapping:00000000bef2d187 index:0x81a0 pfn:0x415735c0 [ 734.469304] head: order:5 mapcount:0 entire_mapcount:0 nr_pages_mapped:0 pincount:0 [ 734.469315] memcg:ffff000807a8ec00 [ 734.469320] aops:ext4_da_aops ino:100b6f dentry name(?):"stress-ng-mmaptorture-9397-0-2736200540" [ 734.469335] flags: 0x57fffe400000069(locked|uptodate|lru|head|node=1|zone=2|lastcpupid=0x3ffff) ...... [ 734.469364] page dumped because: VM_WARN_ON_FOLIO((_Generic((page + nr_pages - 1), const struct page *: (const struct folio *)_compound_head(page + nr_pages - 1), struct page *: (struct folio *)_compound_head(page + nr_pages - 1))) != folio) [ 734.469390] ------------[ cut here ]------------ [ 734.469393] WARNING: ./include/linux/rmap.h:351 at folio_add_file_rmap_ptes+0x3b8/0x468, CPU#90: stress-ng-mlock/9430 [ 734.469551] folio_add_file_rmap_ptes+0x3b8/0x468 (P) [ 734.469555] set_pte_range+0xd8/0x2f8 [ 734.469566] filemap_map_folio_range+0x190/0x400 [ 734.469579] filemap_map_pages+0x348/0x638 [ 734.469583] do_fault_around+0x140/0x198 ...... [ 734.469640] el0t_64_sync+0x184/0x188 " The code that triggers the warning is: "VM_WARN_ON_FOLIO(page_folio(page + nr_pages - 1) != folio, folio)", which indicates that set_pte_range() tried to map beyond the large folio’s size. By adding more debug information, I found that 'nr_pages' had overflowed in filemap_map_pages(), causing set_pte_range() to establish mappings for a range exceeding the folio size, potentially corrupting fields of pages that do not belong to this folio (e.g., page->_mapcount). After above analysis, I think the possible race is as follows: CPU 0 CPU 1 filemap_map_pages() ext4_setattr() //get and lock folio with old inode->i_size next_uptodate_folio() ....... //shrink the inode->i_size i_size_write(inode, attr->ia_size); //calculate the end_pgoff with the new inode->i_size file_end = DIV_ROUND_UP(i_size_read(mapping->host), PAGE_SIZE) - 1; end_pgoff = min(end_pgoff, file_end); ...... //nr_pages can be overflowed, cause xas.xa_index > end_pgoff end = folio_next_index(folio) - 1; nr_pages = min(end, end_pgoff) - xas.xa_index + 1; ...... //map large folio filemap_map_folio_range() ...... //truncate folios truncate_pagecache(inode, inode->i_size); To fix this issue, move the 'end_pgoff' calculation before next_uptodate_folio(), so the retrieved folio stays consistent with the file end to avoid 'nr_pages' calculation overflow. After this patch, the crash issue is gone. Link: https://lkml.kernel.org/r/1cf1ac59018fc647a87b0dad605d4056a71c14e4.1773739704.git.baolin.wang@linux.alibaba.com Fixes: 743a2753a02e ("filemap: cap PTE range to be created to allowed zero fill in folio_map_range()") Signed-off-by: Baolin Wang Reported-by: Yuanhe Shu Tested-by: Yuanhe Shu Acked-by: Kiryl Shutsemau (Meta) Acked-by: David Hildenbrand (Arm) Cc: Christian Brauner Cc: Daniel Gomez Cc: "Darrick J. Wong" Cc: Dave Chinner Cc: David Howells Cc: Hannes Reinecke Cc: Lorenzo Stoakes (Oracle) Cc: Luis Chamberalin Cc: Matthew Wilcox (Oracle) Cc: Pankaj Raghav Cc: Signed-off-by: Andrew Morton --- mm/filemap.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/mm/filemap.c b/mm/filemap.c index 406cef06b684..3c1e785542dd 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -3883,14 +3883,19 @@ vm_fault_t filemap_map_pages(struct vm_fault *vmf, unsigned int nr_pages = 0, folio_type; unsigned short mmap_miss = 0, mmap_miss_saved; + /* + * Recalculate end_pgoff based on file_end before calling + * next_uptodate_folio() to avoid races with concurrent + * truncation. + */ + file_end = DIV_ROUND_UP(i_size_read(mapping->host), PAGE_SIZE) - 1; + end_pgoff = min(end_pgoff, file_end); + rcu_read_lock(); folio = next_uptodate_folio(&xas, mapping, end_pgoff); if (!folio) goto out; - file_end = DIV_ROUND_UP(i_size_read(mapping->host), PAGE_SIZE) - 1; - end_pgoff = min(end_pgoff, file_end); - /* * Do not allow to map with PMD across i_size to preserve * SIGBUS semantics. From 307e0c5859b0aecc34180468b1aa76684adcf539 Mon Sep 17 00:00:00 2001 From: Leo Timmins Date: Wed, 25 Mar 2026 12:46:07 +0800 Subject: [PATCH 114/248] liveupdate: propagate file deserialization failures luo_session_deserialize() ignored the return value from luo_file_deserialize(). As a result, a session could be left partially restored even though the /dev/liveupdate open path treats deserialization failures as fatal. Propagate the error so a failed file deserialization aborts session deserialization instead of silently continuing. Link: https://lkml.kernel.org/r/20260325044608.8407-1-leotimmins1974@gmail.com Link: https://lkml.kernel.org/r/20260325044608.8407-2-leotimmins1974@gmail.com Fixes: 16cec0d26521 ("liveupdate: luo_session: add ioctls for file preservation") Signed-off-by: Leo Timmins Reviewed-by: Pasha Tatashin Reviewed-by: Pratyush Yadav Cc: Mike Rapoport Cc: Signed-off-by: Andrew Morton --- kernel/liveupdate/luo_session.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/kernel/liveupdate/luo_session.c b/kernel/liveupdate/luo_session.c index 783677295640..25ae704d7787 100644 --- a/kernel/liveupdate/luo_session.c +++ b/kernel/liveupdate/luo_session.c @@ -558,8 +558,13 @@ int luo_session_deserialize(void) } scoped_guard(mutex, &session->mutex) { - luo_file_deserialize(&session->file_set, - &sh->ser[i].file_set_ser); + err = luo_file_deserialize(&session->file_set, + &sh->ser[i].file_set_ser); + } + if (err) { + pr_warn("Failed to deserialize files for session [%s] %pe\n", + session->name, ERR_PTR(err)); + return err; } } From cece9dc61daab6006d3ac9d36a0df2dd58fef18d Mon Sep 17 00:00:00 2001 From: Joanne Koong Date: Thu, 26 Mar 2026 14:51:27 -0700 Subject: [PATCH 115/248] mm: reinstate unconditional writeback start in balance_dirty_pages() Commit 64dd89ae01f2 ("mm/block/fs: remove laptop_mode") removed this unconditional writeback start from balance_dirty_pages(): if (unlikely(!writeback_in_progress(wb))) wb_start_background_writeback(wb); This logic needs to be reinstated to prevent performance regressions for strictlimited BDIs and memcg setups. The problem occurs because: a) For strictlimited BDIs, throttling is calculated using per-wb thresholds. The per-wb threshold can be exceeded even when the global dirty threshold was not exceeded (nr_dirty < gdtc->bg_thresh) b) For memcg-based throttling, memcg uses its own dirty count / thresholds and can trigger throttling even when the global threshold isn't exceeded Without the unconditional writeback start, IO is throttled as it waits for dirty pages to be written back but there is no writeback running. This leads to severe stalls. On fuse, buffered write performance dropped from 1400 MiB/s to 2000 KiB/s. Reinstate the unconditional writeback start so that writeback is guaranteed to be running whenever IO needs to be throttled. Link: https://lkml.kernel.org/r/20260326215127.3857682-2-joannelkoong@gmail.com Fixes: 64dd89ae01f2 ("mm/block/fs: remove laptop_mode") Signed-off-by: Joanne Koong Reviewed-by: Christoph Hellwig Reviewed-by: Jan Kara Acked-by: Johannes Weiner Cc: Matthew Wilcox (Oracle) Cc: Signed-off-by: Andrew Morton --- mm/page-writeback.c | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/mm/page-writeback.c b/mm/page-writeback.c index 601a5e048d12..c1a4b32af1a7 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -1858,6 +1858,27 @@ static int balance_dirty_pages(struct bdi_writeback *wb, break; } + /* + * Unconditionally start background writeback if it's not + * already in progress. We need to do this because the global + * dirty threshold check above (nr_dirty > gdtc->bg_thresh) + * doesn't account for these cases: + * + * a) strictlimit BDIs: throttling is calculated using per-wb + * thresholds. The per-wb threshold can be exceeded even when + * nr_dirty < gdtc->bg_thresh + * + * b) memcg-based throttling: memcg uses its own dirty count and + * thresholds and can trigger throttling even when global + * nr_dirty < gdtc->bg_thresh + * + * Writeback needs to be started else the writer stalls in the + * throttle loop waiting for dirty pages to be written back + * while no writeback is running. + */ + if (unlikely(!writeback_in_progress(wb))) + wb_start_background_writeback(wb); + mem_cgroup_flush_foreign(wb); /* From 0199390a6b92fc21860e1b858abf525c7e73b956 Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Thu, 26 Mar 2026 17:32:22 -0700 Subject: [PATCH 116/248] mm/damon/sysfs: dealloc repeat_call_control if damon_call() fails damon_call() for repeat_call_control of DAMON_SYSFS could fail if somehow the kdamond is stopped before the damon_call(). It could happen, for example, when te damon context was made for monitroing of a virtual address processes, and the process is terminated immediately, before the damon_call() invocation. In the case, the dyanmically allocated repeat_call_control is not deallocated and leaked. Fix the leak by deallocating the repeat_call_control under the damon_call() failure. This issue is discovered by sashiko [1]. Link: https://lkml.kernel.org/r/20260327003224.55752-1-sj@kernel.org Link: https://lore.kernel.org/20260320020630.962-1-sj@kernel.org [1] Fixes: 04a06b139ec0 ("mm/damon/sysfs: use dynamically allocated repeat mode damon_call_control") Signed-off-by: SeongJae Park Cc: [6.17+] Signed-off-by: Andrew Morton --- mm/damon/sysfs.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/mm/damon/sysfs.c b/mm/damon/sysfs.c index 6a44a2f3d8fc..eefa959aa30a 100644 --- a/mm/damon/sysfs.c +++ b/mm/damon/sysfs.c @@ -1670,7 +1670,8 @@ static int damon_sysfs_turn_damon_on(struct damon_sysfs_kdamond *kdamond) repeat_call_control->data = kdamond; repeat_call_control->repeat = true; repeat_call_control->dealloc_on_cancel = true; - damon_call(ctx, repeat_call_control); + if (damon_call(ctx, repeat_call_control)) + kfree(repeat_call_control); return err; } From 2ecbe06abf9bfb2261cd6464a6bc3a3615625402 Mon Sep 17 00:00:00 2001 From: Hao Li Date: Mon, 30 Mar 2026 11:57:49 +0800 Subject: [PATCH 117/248] mm/memory_hotplug: maintain N_NORMAL_MEMORY during hotplug N_NORMAL_MEMORY is initialized from zone population at boot, but memory hotplug currently only updates N_MEMORY. As a result, a node that gains normal memory via hotplug can remain invisible to users iterating over N_NORMAL_MEMORY, while a node that loses its last normal memory can stay incorrectly marked as such. The most visible effect is that /sys/devices/system/node/has_normal_memory does not report a node even after that node has gained normal memory via hotplug. Also, list_lru-based shrinkers can undercount objects on such a node and may skip reclaim on that node entirely, which can lead to a higher memory footprint than expected. Restore N_NORMAL_MEMORY maintenance directly in online_pages() and offline_pages(). Set the bit when a node that currently lacks normal memory onlines pages into a zone <= ZONE_NORMAL, and clear it when offlining removes the last present pages from zones <= ZONE_NORMAL. This restores the intended semantics without bringing back the old status_change_nid_normal notifier plumbing which was removed in 8d2882a8edb8. Current users that benefit include list_lru, zswap, nfsd filecache, hugetlb_cgroup, and has_normal_memory sysfs reporting. Link: https://lkml.kernel.org/r/20260330035941.518186-1-hao.li@linux.dev Fixes: 8d2882a8edb8 ("mm,memory_hotplug: remove status_change_nid_normal and update documentation") Signed-off-by: Hao Li Reviewed-by: Harry Yoo (Oracle) Acked-by: Vlastimil Babka (SUSE) Reviewed-by: Joshua Hahn Acked-by: David Hildenbrand (Arm) Cc: Oscar Salvador Cc: Vlastimil Babka Cc: Signed-off-by: Andrew Morton --- mm/memory_hotplug.c | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index bc805029da51..05a47953ef21 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -1209,6 +1209,13 @@ int online_pages(unsigned long pfn, unsigned long nr_pages, if (node_arg.nid >= 0) node_set_state(nid, N_MEMORY); + /* + * Check whether we are adding normal memory to the node for the first + * time. + */ + if (!node_state(nid, N_NORMAL_MEMORY) && zone_idx(zone) <= ZONE_NORMAL) + node_set_state(nid, N_NORMAL_MEMORY); + if (need_zonelists_rebuild) build_all_zonelists(NULL); @@ -1908,6 +1915,8 @@ int offline_pages(unsigned long start_pfn, unsigned long nr_pages, unsigned long flags; char *reason; int ret; + unsigned long normal_pages = 0; + enum zone_type zt; /* * {on,off}lining is constrained to full memory sections (or more @@ -2055,6 +2064,17 @@ int offline_pages(unsigned long start_pfn, unsigned long nr_pages, /* reinitialise watermarks and update pcp limits */ init_per_zone_wmark_min(); + /* + * Check whether this operation removes the last normal memory from + * the node. We do this before clearing N_MEMORY to avoid the possible + * transient "!N_MEMORY && N_NORMAL_MEMORY" state. + */ + if (zone_idx(zone) <= ZONE_NORMAL) { + for (zt = 0; zt <= ZONE_NORMAL; zt++) + normal_pages += pgdat->node_zones[zt].present_pages; + if (!normal_pages) + node_clear_state(node, N_NORMAL_MEMORY); + } /* * Make sure to mark the node as memory-less before rebuilding the zone * list. Otherwise this node would still appear in the fallback lists. From 894f99eb535edc4514f756818f3c4f688ba53a59 Mon Sep 17 00:00:00 2001 From: Sechang Lim Date: Tue, 31 Mar 2026 18:08:11 +0000 Subject: [PATCH 118/248] mm/vma: fix memory leak in __mmap_region() commit 605f6586ecf7 ("mm/vma: do not leak memory when .mmap_prepare swaps the file") handled the success path by skipping get_file() via file_doesnt_need_get, but missed the error path. When /dev/zero is mmap'd with MAP_SHARED, mmap_zero_prepare() calls shmem_zero_setup_desc() which allocates a new shmem file to back the mapping. If __mmap_new_vma() subsequently fails, this replacement file is never fput()'d - the original is released by ksys_mmap_pgoff(), but nobody releases the new one. Add fput() for the swapped file in the error path. Reproducible with fault injection. FAULT_INJECTION: forcing a failure. name failslab, interval 1, probability 0, space 0, times 1 CPU: 2 UID: 0 PID: 366 Comm: syz.7.14 Not tainted 7.0.0-rc6 #2 PREEMPT(full) Hardware name: QEMU Ubuntu 24.04 PC v2 (i440FX + PIIX, arch_caps fix, 1996), BIOS 1.16.3-debian-1.16.3-2 04/01/2014 Call Trace: dump_stack_lvl+0x164/0x1f0 should_fail_ex+0x525/0x650 should_failslab+0xdf/0x140 kmem_cache_alloc_noprof+0x78/0x630 vm_area_alloc+0x24/0x160 __mmap_region+0xf6b/0x2660 mmap_region+0x2eb/0x3a0 do_mmap+0xc79/0x1240 vm_mmap_pgoff+0x252/0x4c0 ksys_mmap_pgoff+0xf8/0x120 __x64_sys_mmap+0x12a/0x190 do_syscall_64+0xa9/0x580 entry_SYSCALL_64_after_hwframe+0x76/0x7e kmemleak: 1 new suspected memory leaks (see /sys/kernel/debug/kmemleak) BUG: memory leak unreferenced object 0xffff8881118aca80 (size 360): comm "syz.7.14", pid 366, jiffies 4294913255 hex dump (first 32 bytes): 00 00 00 00 ad 4e ad de ff ff ff ff 00 00 00 00 .....N.......... ff ff ff ff ff ff ff ff c0 28 4d ae ff ff ff ff .........(M..... backtrace (crc db0f53bc): kmem_cache_alloc_noprof+0x3ab/0x630 alloc_empty_file+0x5a/0x1e0 alloc_file_pseudo+0x135/0x220 __shmem_file_setup+0x274/0x420 shmem_zero_setup_desc+0x9c/0x170 mmap_zero_prepare+0x123/0x140 __mmap_region+0xdda/0x2660 mmap_region+0x2eb/0x3a0 do_mmap+0xc79/0x1240 vm_mmap_pgoff+0x252/0x4c0 ksys_mmap_pgoff+0xf8/0x120 __x64_sys_mmap+0x12a/0x190 do_syscall_64+0xa9/0x580 entry_SYSCALL_64_after_hwframe+0x76/0x7e Found by syzkaller. Link: https://lkml.kernel.org/r/20260331180811.1333348-1-rhkrqnwk98@gmail.com Fixes: 605f6586ecf7 ("mm/vma: do not leak memory when .mmap_prepare swaps the file") Signed-off-by: Sechang Lim Reviewed-by: Lorenzo Stoakes (Oracle) Acked-by: Vlastimil Babka (SUSE) Cc: Jann Horn Cc: Liam Howlett Cc: Lorenzo Stoakes (Oracle) Cc: Pedro Falcato Cc: Signed-off-by: Andrew Morton --- mm/vma.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/mm/vma.c b/mm/vma.c index be64f781a3aa..c8df5f561ad7 100644 --- a/mm/vma.c +++ b/mm/vma.c @@ -2781,6 +2781,13 @@ static unsigned long __mmap_region(struct file *file, unsigned long addr, if (map.charged) vm_unacct_memory(map.charged); abort_munmap: + /* + * This indicates that .mmap_prepare has set a new file, differing from + * desc->vm_file. But since we're aborting the operation, only the + * original file will be cleaned up. Ensure we clean up both. + */ + if (map.file_doesnt_need_get) + fput(map.file); vms_abort_munmap_vmas(&map.vms, &map.mas_detach); return error; } From 4c04c6b47c361612b1d70cec8f7a60b1482d1400 Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Thu, 2 Apr 2026 06:44:17 -0700 Subject: [PATCH 119/248] mm/damon/stat: deallocate damon_call() failure leaking damon_ctx MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit damon_stat_start() always allocates the module's damon_ctx object (damon_stat_context). Meanwhile, if damon_call() in the function fails, the damon_ctx object is not deallocated. Hence, if the damon_call() is failed, and the user writes Y to “enabled” again, the previously allocated damon_ctx object is leaked. This cannot simply be fixed by deallocating the damon_ctx object when damon_call() fails. That's because damon_call() failure doesn't guarantee the kdamond main function, which accesses the damon_ctx object, is completely finished. In other words, if damon_stat_start() deallocates the damon_ctx object after damon_call() failure, the not-yet-terminated kdamond could access the freed memory (use-after-free). Fix the leak while avoiding the use-after-free by keeping returning damon_stat_start() without deallocating the damon_ctx object after damon_call() failure, but deallocating it when the function is invoked again and the kdamond is completely terminated. If the kdamond is not yet terminated, simply return -EAGAIN, as the kdamond will soon be terminated. The issue was discovered [1] by sashiko. Link: https://lkml.kernel.org/r/20260402134418.74121-1-sj@kernel.org Link: https://lore.kernel.org/20260401012428.86694-1-sj@kernel.org [1] Fixes: 405f61996d9d ("mm/damon/stat: use damon_call() repeat mode instead of damon_callback") Signed-off-by: SeongJae Park Cc: # 6.17.x Signed-off-by: Andrew Morton --- mm/damon/stat.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/mm/damon/stat.c b/mm/damon/stat.c index cf2c5a541eee..60351a719460 100644 --- a/mm/damon/stat.c +++ b/mm/damon/stat.c @@ -245,6 +245,12 @@ static int damon_stat_start(void) { int err; + if (damon_stat_context) { + if (damon_is_running(damon_stat_context)) + return -EAGAIN; + damon_destroy_ctx(damon_stat_context); + } + damon_stat_context = damon_stat_build_ctx(); if (!damon_stat_context) return -ENOMEM; @@ -261,6 +267,7 @@ static void damon_stat_stop(void) { damon_stop(&damon_stat_context, 1); damon_destroy_ctx(damon_stat_context); + damon_stat_context = NULL; } static int damon_stat_enabled_store( From 7bc5da4842bed3252d26e742213741a4d0ac1b14 Mon Sep 17 00:00:00 2001 From: Joseph Qi Date: Fri, 3 Apr 2026 14:38:30 +0800 Subject: [PATCH 120/248] ocfs2: fix out-of-bounds write in ocfs2_write_end_inline KASAN reports a use-after-free write of 4086 bytes in ocfs2_write_end_inline, called from ocfs2_write_end_nolock during a copy_file_range splice fallback on a corrupted ocfs2 filesystem mounted on a loop device. The actual bug is an out-of-bounds write past the inode block buffer, not a true use-after-free. The write overflows into an adjacent freed page, which KASAN reports as UAF. The root cause is that ocfs2_try_to_write_inline_data trusts the on-disk id_count field to determine whether a write fits in inline data. On a corrupted filesystem, id_count can exceed the physical maximum inline data capacity, causing writes to overflow the inode block buffer. Call trace (crash path): vfs_copy_file_range (fs/read_write.c:1634) do_splice_direct splice_direct_to_actor iter_file_splice_write ocfs2_file_write_iter generic_perform_write ocfs2_write_end ocfs2_write_end_nolock (fs/ocfs2/aops.c:1949) ocfs2_write_end_inline (fs/ocfs2/aops.c:1915) memcpy_from_folio <-- KASAN: write OOB So add id_count upper bound check in ocfs2_validate_inode_block() to alongside the existing i_size check to fix it. Link: https://lkml.kernel.org/r/20260403063830.3662739-1-joseph.qi@linux.alibaba.com Signed-off-by: Joseph Qi Reported-by: syzbot+62c1793956716ea8b28a@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=62c1793956716ea8b28a Cc: Mark Fasheh Cc: Joel Becker Cc: Junxiao Bi Cc: Changwei Ge Cc: Jun Piao Cc: Heming Zhao Cc: Signed-off-by: Andrew Morton --- fs/ocfs2/inode.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/fs/ocfs2/inode.c b/fs/ocfs2/inode.c index 03a51662ea8e..a2ccd8011706 100644 --- a/fs/ocfs2/inode.c +++ b/fs/ocfs2/inode.c @@ -1505,6 +1505,16 @@ int ocfs2_validate_inode_block(struct super_block *sb, goto bail; } + if (le16_to_cpu(data->id_count) > + ocfs2_max_inline_data_with_xattr(sb, di)) { + rc = ocfs2_error(sb, + "Invalid dinode #%llu: inline data id_count %u exceeds max %d\n", + (unsigned long long)bh->b_blocknr, + le16_to_cpu(data->id_count), + ocfs2_max_inline_data_with_xattr(sb, di)); + goto bail; + } + if (le64_to_cpu(di->i_size) > le16_to_cpu(data->id_count)) { rc = ocfs2_error(sb, "Invalid dinode #%llu: inline data i_size %llu exceeds id_count %u\n", From 591478118293c1bd628de330a99eb1eb2ef8d76b Mon Sep 17 00:00:00 2001 From: Emil Tantilov Date: Thu, 19 Mar 2026 14:13:33 -0700 Subject: [PATCH 121/248] idpf: fix PREEMPT_RT raw/bh spinlock nesting for async VC handling Switch from using the completion's raw spinlock to a local lock in the idpf_vc_xn struct. The conversion is safe because complete/_all() are called outside the lock and there is no reason to share the completion lock in the current logic. This avoids invalid wait context reported by the kernel due to the async handler taking BH spinlock: [ 805.726977] ============================= [ 805.726991] [ BUG: Invalid wait context ] [ 805.727006] 7.0.0-rc2-net-devq-031026+ #28 Tainted: G S OE [ 805.727026] ----------------------------- [ 805.727038] kworker/u261:0/572 is trying to lock: [ 805.727051] ff190da6a8dbb6a0 (&vport_config->mac_filter_list_lock){+...}-{3:3}, at: idpf_mac_filter_async_handler+0xe9/0x260 [idpf] [ 805.727099] other info that might help us debug this: [ 805.727111] context-{5:5} [ 805.727119] 3 locks held by kworker/u261:0/572: [ 805.727132] #0: ff190da6db3e6148 ((wq_completion)idpf-0000:83:00.0-mbx){+.+.}-{0:0}, at: process_one_work+0x4b5/0x730 [ 805.727163] #1: ff3c6f0a6131fe50 ((work_completion)(&(&adapter->mbx_task)->work)){+.+.}-{0:0}, at: process_one_work+0x1e5/0x730 [ 805.727191] #2: ff190da765190020 (&x->wait#34){+.+.}-{2:2}, at: idpf_recv_mb_msg+0xc8/0x710 [idpf] [ 805.727218] stack backtrace: ... [ 805.727238] Workqueue: idpf-0000:83:00.0-mbx idpf_mbx_task [idpf] [ 805.727247] Call Trace: [ 805.727249] [ 805.727251] dump_stack_lvl+0x77/0xb0 [ 805.727259] __lock_acquire+0xb3b/0x2290 [ 805.727268] ? __irq_work_queue_local+0x59/0x130 [ 805.727275] lock_acquire+0xc6/0x2f0 [ 805.727277] ? idpf_mac_filter_async_handler+0xe9/0x260 [idpf] [ 805.727284] ? _printk+0x5b/0x80 [ 805.727290] _raw_spin_lock_bh+0x38/0x50 [ 805.727298] ? idpf_mac_filter_async_handler+0xe9/0x260 [idpf] [ 805.727303] idpf_mac_filter_async_handler+0xe9/0x260 [idpf] [ 805.727310] idpf_recv_mb_msg+0x1c8/0x710 [idpf] [ 805.727317] process_one_work+0x226/0x730 [ 805.727322] worker_thread+0x19e/0x340 [ 805.727325] ? __pfx_worker_thread+0x10/0x10 [ 805.727328] kthread+0xf4/0x130 [ 805.727333] ? __pfx_kthread+0x10/0x10 [ 805.727336] ret_from_fork+0x32c/0x410 [ 805.727345] ? __pfx_kthread+0x10/0x10 [ 805.727347] ret_from_fork_asm+0x1a/0x30 [ 805.727354] Fixes: 34c21fa894a1 ("idpf: implement virtchnl transaction manager") Cc: stable@vger.kernel.org Suggested-by: Sebastian Andrzej Siewior Reported-by: Ray Zhang Signed-off-by: Emil Tantilov Reviewed-by: Aleksandr Loktionov Acked-by: Sebastian Andrzej Siewior Tested-by: Samuel Salin Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/idpf/idpf_virtchnl.c | 14 +++++--------- drivers/net/ethernet/intel/idpf/idpf_virtchnl.h | 5 +++-- 2 files changed, 8 insertions(+), 11 deletions(-) diff --git a/drivers/net/ethernet/intel/idpf/idpf_virtchnl.c b/drivers/net/ethernet/intel/idpf/idpf_virtchnl.c index 113ecfc16dd7..582e0c8e9dc0 100644 --- a/drivers/net/ethernet/intel/idpf/idpf_virtchnl.c +++ b/drivers/net/ethernet/intel/idpf/idpf_virtchnl.c @@ -287,26 +287,21 @@ int idpf_send_mb_msg(struct idpf_adapter *adapter, struct idpf_ctlq_info *asq, return err; } -/* API for virtchnl "transaction" support ("xn" for short). - * - * We are reusing the completion lock to serialize the accesses to the - * transaction state for simplicity, but it could be its own separate synchro - * as well. For now, this API is only used from within a workqueue context; - * raw_spin_lock() is enough. - */ +/* API for virtchnl "transaction" support ("xn" for short). */ + /** * idpf_vc_xn_lock - Request exclusive access to vc transaction * @xn: struct idpf_vc_xn* to access */ #define idpf_vc_xn_lock(xn) \ - raw_spin_lock(&(xn)->completed.wait.lock) + spin_lock(&(xn)->lock) /** * idpf_vc_xn_unlock - Release exclusive access to vc transaction * @xn: struct idpf_vc_xn* to access */ #define idpf_vc_xn_unlock(xn) \ - raw_spin_unlock(&(xn)->completed.wait.lock) + spin_unlock(&(xn)->lock) /** * idpf_vc_xn_release_bufs - Release reference to reply buffer(s) and @@ -338,6 +333,7 @@ static void idpf_vc_xn_init(struct idpf_vc_xn_manager *vcxn_mngr) xn->state = IDPF_VC_XN_IDLE; xn->idx = i; idpf_vc_xn_release_bufs(xn); + spin_lock_init(&xn->lock); init_completion(&xn->completed); } diff --git a/drivers/net/ethernet/intel/idpf/idpf_virtchnl.h b/drivers/net/ethernet/intel/idpf/idpf_virtchnl.h index fe065911ad5a..6876e3ed9d1b 100644 --- a/drivers/net/ethernet/intel/idpf/idpf_virtchnl.h +++ b/drivers/net/ethernet/intel/idpf/idpf_virtchnl.h @@ -42,8 +42,8 @@ typedef int (*async_vc_cb) (struct idpf_adapter *, struct idpf_vc_xn *, * struct idpf_vc_xn - Data structure representing virtchnl transactions * @completed: virtchnl event loop uses that to signal when a reply is * available, uses kernel completion API - * @state: virtchnl event loop stores the data below, protected by the - * completion's lock. + * @lock: protects the transaction state fields below + * @state: virtchnl event loop stores the data below, protected by @lock * @reply_sz: Original size of reply, may be > reply_buf.iov_len; it will be * truncated on its way to the receiver thread according to * reply_buf.iov_len. @@ -58,6 +58,7 @@ typedef int (*async_vc_cb) (struct idpf_adapter *, struct idpf_vc_xn *, */ struct idpf_vc_xn { struct completion completed; + spinlock_t lock; enum idpf_vc_xn_state state; size_t reply_sz; struct kvec reply; From d086fae65006368618104ba4c57779440eab2217 Mon Sep 17 00:00:00 2001 From: Emil Tantilov Date: Thu, 19 Mar 2026 14:13:34 -0700 Subject: [PATCH 122/248] idpf: improve locking around idpf_vc_xn_push_free() Protect the set_bit() operation for the free_xn bitmask in idpf_vc_xn_push_free(), to make the locking consistent with rest of the code and avoid potential races in that logic. Fixes: 34c21fa894a1 ("idpf: implement virtchnl transaction manager") Cc: stable@vger.kernel.org Reported-by: Ray Zhang Signed-off-by: Emil Tantilov Reviewed-by: Aleksandr Loktionov Acked-by: Sebastian Andrzej Siewior Tested-by: Samuel Salin Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/idpf/idpf_virtchnl.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/intel/idpf/idpf_virtchnl.c b/drivers/net/ethernet/intel/idpf/idpf_virtchnl.c index 582e0c8e9dc0..fbd5a15b015c 100644 --- a/drivers/net/ethernet/intel/idpf/idpf_virtchnl.c +++ b/drivers/net/ethernet/intel/idpf/idpf_virtchnl.c @@ -402,7 +402,9 @@ static void idpf_vc_xn_push_free(struct idpf_vc_xn_manager *vcxn_mngr, struct idpf_vc_xn *xn) { idpf_vc_xn_release_bufs(xn); + spin_lock_bh(&vcxn_mngr->xn_bm_lock); set_bit(xn->idx, vcxn_mngr->free_xn_bm); + spin_unlock_bh(&vcxn_mngr->xn_bm_lock); } /** From 8e2a2420e267a515f6db56a6e9570b5cacd92919 Mon Sep 17 00:00:00 2001 From: Emil Tantilov Date: Thu, 19 Mar 2026 14:13:35 -0700 Subject: [PATCH 123/248] idpf: set the payload size before calling the async handler Set the payload size before forwarding the reply to the async handler. Without this, xn->reply_sz will be 0 and idpf_mac_filter_async_handler() will never get past the size check. Fixes: 34c21fa894a1 ("idpf: implement virtchnl transaction manager") Cc: stable@vger.kernel.org Signed-off-by: Emil Tantilov Reviewed-by: Aleksandr Loktionov Reviewed-by: Li Li Acked-by: Sebastian Andrzej Siewior Tested-by: Samuel Salin Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/idpf/idpf_virtchnl.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/ethernet/intel/idpf/idpf_virtchnl.c b/drivers/net/ethernet/intel/idpf/idpf_virtchnl.c index fbd5a15b015c..be66f9b2e101 100644 --- a/drivers/net/ethernet/intel/idpf/idpf_virtchnl.c +++ b/drivers/net/ethernet/intel/idpf/idpf_virtchnl.c @@ -615,6 +615,10 @@ idpf_vc_xn_forward_reply(struct idpf_adapter *adapter, err = -ENXIO; goto out_unlock; case IDPF_VC_XN_ASYNC: + /* Set reply_sz from the actual payload so that async_handler + * can evaluate the response. + */ + xn->reply_sz = ctlq_msg->data_len; err = idpf_vc_xn_forward_async(adapter, xn, ctlq_msg); idpf_vc_xn_unlock(xn); return err; From bb3f21edc7056cdf44a7f7bd7ba65af40741838c Mon Sep 17 00:00:00 2001 From: Kohei Enju Date: Sun, 1 Feb 2026 14:14:00 +0000 Subject: [PATCH 124/248] ice: ptp: don't WARN when controlling PF is unavailable In VFIO passthrough setups, it is possible to pass through only a PF which doesn't own the source timer. In that case the PTP controlling PF (adapter->ctrl_pf) is never initialized in the VM, so ice_get_ctrl_ptp() returns NULL and triggers WARN_ON() in ice_ptp_setup_pf(). Since this is an expected behavior in that configuration, replace WARN_ON() with an informational message and return -EOPNOTSUPP. Fixes: e800654e85b5 ("ice: Use ice_adapter for PTP shared data instead of auxdev") Signed-off-by: Kohei Enju Reviewed-by: Aleksandr Loktionov Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_ptp.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/ice/ice_ptp.c b/drivers/net/ethernet/intel/ice/ice_ptp.c index 094e96219f45..a3c5f0a5e09b 100644 --- a/drivers/net/ethernet/intel/ice/ice_ptp.c +++ b/drivers/net/ethernet/intel/ice/ice_ptp.c @@ -3080,7 +3080,13 @@ static int ice_ptp_setup_pf(struct ice_pf *pf) struct ice_ptp *ctrl_ptp = ice_get_ctrl_ptp(pf); struct ice_ptp *ptp = &pf->ptp; - if (WARN_ON(!ctrl_ptp) || pf->hw.mac_type == ICE_MAC_UNKNOWN) + if (!ctrl_ptp) { + dev_info(ice_pf_to_dev(pf), + "PTP unavailable: no controlling PF\n"); + return -EOPNOTSUPP; + } + + if (pf->hw.mac_type == ICE_MAC_UNKNOWN) return -ENODEV; INIT_LIST_HEAD(&ptp->port.list_node); From bf6dbadb72b997721e9b67348652926c076878d1 Mon Sep 17 00:00:00 2001 From: Petr Oros Date: Fri, 27 Mar 2026 08:46:58 +0100 Subject: [PATCH 125/248] ice: fix PTP timestamping broken by SyncE code on E825C The E825C SyncE support added in commit ad1df4f2d591 ("ice: dpll: Support E825-C SyncE and dynamic pin discovery") introduced a SyncE reconfiguration block in ice_ptp_link_change() that prevents ice_ptp_port_phy_restart() from being called in several error paths. Without the PHY restart, PTP timestamps stop working after any link change event. There are three ways the PHY restart gets blocked: 1. When DPLL initialization fails (e.g. missing ACPI firmware node properties), ICE_FLAG_DPLL is not set and the function returns early before reaching the PHY restart. 2. When ice_tspll_bypass_mux_active_e825c() fails to read the CGU register, WARN_ON_ONCE fires and the function returns early. 3. When ice_tspll_cfg_synce_ethdiv_e825c() fails to configure the clock divider for an active pin, same early return. SyncE and PTP are independent features. SyncE reconfiguration failures must not prevent the PTP PHY restart that is essential for timestamp recovery after link changes. Fix by making the entire SyncE block conditional on ICE_FLAG_DPLL without an early return, and replacing the WARN_ON_ONCE + return error handling inside the loop with dev_err_once + break. The function always proceeds to ice_ptp_port_phy_restart() regardless of SyncE errors. Fixes: ad1df4f2d591 ("ice: dpll: Support E825-C SyncE and dynamic pin discovery") Signed-off-by: Petr Oros Reviewed-by: Grzegorz Nitka Reviewed-by: Aleksandr Loktionov Tested-by: Sunitha Mekala (A Contingent worker at Intel) Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_ptp.c | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_ptp.c b/drivers/net/ethernet/intel/ice/ice_ptp.c index a3c5f0a5e09b..6cb0cf7a9891 100644 --- a/drivers/net/ethernet/intel/ice/ice_ptp.c +++ b/drivers/net/ethernet/intel/ice/ice_ptp.c @@ -1296,12 +1296,10 @@ void ice_ptp_link_change(struct ice_pf *pf, bool linkup) if (pf->hw.reset_ongoing) return; - if (hw->mac_type == ICE_MAC_GENERIC_3K_E825) { + if (hw->mac_type == ICE_MAC_GENERIC_3K_E825 && + test_bit(ICE_FLAG_DPLL, pf->flags)) { int pin, err; - if (!test_bit(ICE_FLAG_DPLL, pf->flags)) - return; - mutex_lock(&pf->dplls.lock); for (pin = 0; pin < ICE_SYNCE_CLK_NUM; pin++) { enum ice_synce_clk clk_pin; @@ -1314,15 +1312,19 @@ void ice_ptp_link_change(struct ice_pf *pf, bool linkup) port_num, &active, clk_pin); - if (WARN_ON_ONCE(err)) { - mutex_unlock(&pf->dplls.lock); - return; + if (err) { + dev_err_once(ice_pf_to_dev(pf), + "Failed to read SyncE bypass mux for pin %d, err %d\n", + pin, err); + break; } err = ice_tspll_cfg_synce_ethdiv_e825c(hw, clk_pin); - if (active && WARN_ON_ONCE(err)) { - mutex_unlock(&pf->dplls.lock); - return; + if (active && err) { + dev_err_once(ice_pf_to_dev(pf), + "Failed to configure SyncE ETH divider for pin %d, err %d\n", + pin, err); + break; } } mutex_unlock(&pf->dplls.lock); From d8ae40dc20cbd7bb6e6b36a928e2db2296060ad2 Mon Sep 17 00:00:00 2001 From: Aleksandr Loktionov Date: Wed, 4 Mar 2026 09:42:32 +0100 Subject: [PATCH 126/248] ixgbe: stop re-reading flash on every get_drvinfo for e610 ixgbe_get_drvinfo() calls ixgbe_refresh_fw_version() on every ethtool query for e610 adapters. That ends up in ixgbe_discover_flash_size(), which bisects the full 16 MB NVM space issuing one ACI command per step (~20 ms each, ~24 steps total = ~500 ms). Profiling on an idle E610-XAT2 system with telegraf scraping ethtool stats every 10 seconds: kretprobe:ixgbe_get_drvinfo took 527603 us kretprobe:ixgbe_get_drvinfo took 523978 us kretprobe:ixgbe_get_drvinfo took 552975 us kretprobe:ice_get_drvinfo took 3 us kretprobe:igb_get_drvinfo took 2 us kretprobe:i40e_get_drvinfo took 5 us The half-second stall happens under the RTNL lock, causing visible latency on ip-link and friends. The FW version can only change after an EMPR reset. All flash data is already populated at probe time and the cached adapter->eeprom_id is what get_drvinfo should be returning. The only place that needs to trigger a re-read is ixgbe_devlink_reload_empr_finish(), right after the EMPR completes and new firmware is running. Additionally, refresh the FW version in ixgbe_reinit_locked() so that any PF that undergoes a reinit after an EMPR (e.g. triggered by another PF's devlink reload) also picks up the new version in adapter->eeprom_id. ixgbe_devlink_info_get() keeps its refresh call for explicit "devlink dev info" queries, which is fine given those are user-initiated. Fixes: c9e563cae19e ("ixgbe: add support for devlink reload") Co-developed-by: Jedrzej Jagielski Signed-off-by: Jedrzej Jagielski Signed-off-by: Aleksandr Loktionov Reviewed-by: Simon Horman Tested-by: Rinitha S (A Contingent worker at Intel) Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ixgbe/devlink/devlink.c | 2 +- drivers/net/ethernet/intel/ixgbe/ixgbe.h | 2 +- drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c | 13 +++++++------ drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 10 ++++++++++ 4 files changed, 19 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/intel/ixgbe/devlink/devlink.c b/drivers/net/ethernet/intel/ixgbe/devlink/devlink.c index b0404f37271a..cf8908b82f8a 100644 --- a/drivers/net/ethernet/intel/ixgbe/devlink/devlink.c +++ b/drivers/net/ethernet/intel/ixgbe/devlink/devlink.c @@ -474,7 +474,7 @@ static int ixgbe_devlink_reload_empr_finish(struct devlink *devlink, adapter->flags2 &= ~(IXGBE_FLAG2_API_MISMATCH | IXGBE_FLAG2_FW_ROLLBACK); - return 0; + return ixgbe_refresh_fw_version(adapter); } static const struct devlink_ops ixgbe_devlink_ops = { diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe.h b/drivers/net/ethernet/intel/ixgbe/ixgbe.h index dce4936708eb..047f04045585 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe.h +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe.h @@ -973,7 +973,7 @@ int ixgbe_init_interrupt_scheme(struct ixgbe_adapter *adapter); bool ixgbe_wol_supported(struct ixgbe_adapter *adapter, u16 device_id, u16 subdevice_id); void ixgbe_set_fw_version_e610(struct ixgbe_adapter *adapter); -void ixgbe_refresh_fw_version(struct ixgbe_adapter *adapter); +int ixgbe_refresh_fw_version(struct ixgbe_adapter *adapter); #ifdef CONFIG_PCI_IOV void ixgbe_full_sync_mac_table(struct ixgbe_adapter *adapter); #endif diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c index 56aabaa5caec..ba049b3a9609 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c @@ -1155,12 +1155,17 @@ static int ixgbe_set_eeprom(struct net_device *netdev, return ret_val; } -void ixgbe_refresh_fw_version(struct ixgbe_adapter *adapter) +int ixgbe_refresh_fw_version(struct ixgbe_adapter *adapter) { struct ixgbe_hw *hw = &adapter->hw; + int err; + + err = ixgbe_get_flash_data(hw); + if (err) + return err; - ixgbe_get_flash_data(hw); ixgbe_set_fw_version_e610(adapter); + return 0; } static void ixgbe_get_drvinfo(struct net_device *netdev, @@ -1168,10 +1173,6 @@ static void ixgbe_get_drvinfo(struct net_device *netdev, { struct ixgbe_adapter *adapter = ixgbe_from_netdev(netdev); - /* need to refresh info for e610 in case fw reloads in runtime */ - if (adapter->hw.mac.type == ixgbe_mac_e610) - ixgbe_refresh_fw_version(adapter); - strscpy(drvinfo->driver, ixgbe_driver_name, sizeof(drvinfo->driver)); strscpy(drvinfo->fw_version, adapter->eeprom_id, diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index e4101c59074d..59801187a839 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -6289,6 +6289,16 @@ void ixgbe_reinit_locked(struct ixgbe_adapter *adapter) if (adapter->flags & IXGBE_FLAG_SRIOV_ENABLED) msleep(2000); ixgbe_up(adapter); + + /* E610 has no FW event to notify all PFs of an EMPR reset, so + * refresh the FW version here to pick up any new FW version after + * a hardware reset (e.g. EMPR triggered by another PF's devlink + * reload). ixgbe_refresh_fw_version() updates both hw->flash and + * adapter->eeprom_id so ethtool -i reports the correct string. + */ + if (adapter->hw.mac.type == ixgbe_mac_e610) + (void)ixgbe_refresh_fw_version(adapter); + clear_bit(__IXGBE_RESETTING, &adapter->state); } From 4821d563cd7f251ae728be1a6d04af82a294a5b9 Mon Sep 17 00:00:00 2001 From: Michal Schmidt Date: Fri, 13 Mar 2026 09:22:29 +0100 Subject: [PATCH 127/248] ixgbevf: add missing negotiate_features op to Hyper-V ops table Commit a7075f501bd3 ("ixgbevf: fix mailbox API compatibility by negotiating supported features") added the .negotiate_features callback to ixgbe_mac_operations and populated it in ixgbevf_mac_ops, but forgot to add it to ixgbevf_hv_mac_ops. This leaves the function pointer NULL on Hyper-V VMs. During probe, ixgbevf_negotiate_api() calls ixgbevf_set_features(), which unconditionally dereferences hw->mac.ops.negotiate_features(). On Hyper-V this results in a NULL pointer dereference: BUG: kernel NULL pointer dereference, address: 0000000000000000 [...] Hardware name: Microsoft Corporation Virtual Machine/Virtual Machine [...] Workqueue: events work_for_cpu_fn RIP: 0010:0x0 [...] Call Trace: ixgbevf_negotiate_api+0x66/0x160 [ixgbevf] ixgbevf_sw_init+0xe4/0x1f0 [ixgbevf] ixgbevf_probe+0x20f/0x4a0 [ixgbevf] local_pci_probe+0x50/0xa0 work_for_cpu_fn+0x1a/0x30 [...] Add ixgbevf_hv_negotiate_features_vf() that returns -EOPNOTSUPP and wire it into ixgbevf_hv_mac_ops. The caller already handles -EOPNOTSUPP gracefully. Fixes: a7075f501bd3 ("ixgbevf: fix mailbox API compatibility by negotiating supported features") Reported-by: Xiaoqiang Xiong Closes: https://issues.redhat.com/browse/RHEL-155455 Assisted-by: Claude:claude-4.6-opus-high Cursor Tested-by: Xiaoqiang Xiong Signed-off-by: Michal Schmidt Reviewed-by: Aleksandr Loktionov Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ixgbevf/vf.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/net/ethernet/intel/ixgbevf/vf.c b/drivers/net/ethernet/intel/ixgbevf/vf.c index b67b580f7f1c..f6df86d124b9 100644 --- a/drivers/net/ethernet/intel/ixgbevf/vf.c +++ b/drivers/net/ethernet/intel/ixgbevf/vf.c @@ -709,6 +709,12 @@ static int ixgbevf_negotiate_features_vf(struct ixgbe_hw *hw, u32 *pf_features) return err; } +static int ixgbevf_hv_negotiate_features_vf(struct ixgbe_hw *hw, + u32 *pf_features) +{ + return -EOPNOTSUPP; +} + /** * ixgbevf_set_vfta_vf - Set/Unset VLAN filter table address * @hw: pointer to the HW structure @@ -1142,6 +1148,7 @@ static const struct ixgbe_mac_operations ixgbevf_hv_mac_ops = { .setup_link = ixgbevf_setup_mac_link_vf, .check_link = ixgbevf_hv_check_mac_link_vf, .negotiate_api_version = ixgbevf_hv_negotiate_api_version_vf, + .negotiate_features = ixgbevf_hv_negotiate_features_vf, .set_rar = ixgbevf_hv_set_rar_vf, .update_mc_addr_list = ixgbevf_hv_update_mc_addr_list_vf, .update_xcast_mode = ixgbevf_hv_update_xcast_mode, From b1e067240379f950a0022208e0685f3465c211cb Mon Sep 17 00:00:00 2001 From: Alex Dvoretsky Date: Thu, 12 Mar 2026 14:52:55 +0100 Subject: [PATCH 128/248] igb: remove napi_synchronize() in igb_down() When an AF_XDP zero-copy application terminates abruptly (e.g., kill -9), the XSK buffer pool is destroyed but NAPI polling continues. igb_clean_rx_irq_zc() repeatedly returns the full budget, preventing napi_complete_done() from clearing NAPI_STATE_SCHED. igb_down() calls napi_synchronize() before napi_disable() for each queue vector. napi_synchronize() spins waiting for NAPI_STATE_SCHED to clear, which never happens. igb_down() blocks indefinitely, the TX watchdog fires, and the TX queue remains permanently stalled. napi_disable() already handles this correctly: it sets NAPI_STATE_DISABLE. After a full-budget poll, __napi_poll() checks napi_disable_pending(). If set, it forces completion and clears NAPI_STATE_SCHED, breaking the loop that napi_synchronize() cannot. napi_synchronize() was added in commit 41f149a285da ("igb: Fix possible panic caused by Rx traffic arrival while interface is down"). napi_disable() provides stronger guarantees: it prevents further scheduling and waits for any active poll to exit. Other Intel drivers (ixgbe, ice, i40e) use napi_disable() without a preceding napi_synchronize() in their down paths. Remove redundant napi_synchronize() call and reorder napi_disable() before igb_set_queue_napi() so the queue-to-NAPI mapping is only cleared after polling has fully stopped. Fixes: 2c6196013f84 ("igb: Add AF_XDP zero-copy Rx support") Cc: stable@vger.kernel.org Suggested-by: Maciej Fijalkowski Reviewed-by: Aleksandr Loktionov Signed-off-by: Alex Dvoretsky Reviewed-by: Maciej Fijalkowski Tested-by: Patryk Holda Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/igb/igb_main.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c index ee99fd8fd513..ce91dda00ec0 100644 --- a/drivers/net/ethernet/intel/igb/igb_main.c +++ b/drivers/net/ethernet/intel/igb/igb_main.c @@ -2203,9 +2203,8 @@ void igb_down(struct igb_adapter *adapter) for (i = 0; i < adapter->num_q_vectors; i++) { if (adapter->q_vector[i]) { - napi_synchronize(&adapter->q_vector[i]->napi); - igb_set_queue_napi(adapter, i, NULL); napi_disable(&adapter->q_vector[i]->napi); + igb_set_queue_napi(adapter, i, NULL); } } From d3baa34a470771399c1495bc04b1e26ac15d598e Mon Sep 17 00:00:00 2001 From: Agalakov Daniil Date: Wed, 18 Mar 2026 15:05:05 +0300 Subject: [PATCH 129/248] e1000: check return value of e1000_read_eeprom [Why] e1000_set_eeprom() performs a read-modify-write operation when the write range is not word-aligned. This requires reading the first and last words of the range from the EEPROM to preserve the unmodified bytes. However, the code does not check the return value of e1000_read_eeprom(). If the read fails, the operation continues using uninitialized data from eeprom_buff. This results in corrupted data being written back to the EEPROM for the boundary words. Add the missing error checks and abort the operation if reading fails. Found by Linux Verification Center (linuxtesting.org) with SVACE. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Co-developed-by: Iskhakov Daniil Signed-off-by: Iskhakov Daniil Signed-off-by: Agalakov Daniil Reviewed-by: Aleksandr Loktionov Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/e1000/e1000_ethtool.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/e1000/e1000_ethtool.c b/drivers/net/ethernet/intel/e1000/e1000_ethtool.c index ab232b3fbbd0..4dcbeabb3ad2 100644 --- a/drivers/net/ethernet/intel/e1000/e1000_ethtool.c +++ b/drivers/net/ethernet/intel/e1000/e1000_ethtool.c @@ -496,14 +496,19 @@ static int e1000_set_eeprom(struct net_device *netdev, */ ret_val = e1000_read_eeprom(hw, first_word, 1, &eeprom_buff[0]); + if (ret_val) + goto out; + ptr++; } - if (((eeprom->offset + eeprom->len) & 1) && (ret_val == 0)) { + if ((eeprom->offset + eeprom->len) & 1) { /* need read/modify/write of last changed EEPROM word * only the first byte of the word is being modified */ ret_val = e1000_read_eeprom(hw, last_word, 1, &eeprom_buff[last_word - first_word]); + if (ret_val) + goto out; } /* Device's eeprom is always little-endian, word addressable */ @@ -522,6 +527,7 @@ static int e1000_set_eeprom(struct net_device *netdev, if ((ret_val == 0) && (first_word <= EEPROM_CHECKSUM_REG)) e1000_update_eeprom_checksum(hw); +out: kfree(eeprom_buff); return ret_val; } From a315e022a72d95ef5f1d4e58e903cb492b0ad931 Mon Sep 17 00:00:00 2001 From: Maciej Fijalkowski Date: Thu, 2 Apr 2026 17:49:51 +0200 Subject: [PATCH 130/248] xsk: tighten UMEM headroom validation to account for tailroom and min frame MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The current headroom validation in xdp_umem_reg() could leave us with insufficient space dedicated to even receive minimum-sized ethernet frame. Furthermore if multi-buffer would come to play then skb_shared_info stored at the end of XSK frame would be corrupted. HW typically works with 128-aligned sizes so let us provide this value as bare minimum. Multi-buffer setting is known later in the configuration process so besides accounting for 128 bytes, let us also take care of tailroom space upfront. Reviewed-by: Björn Töpel Acked-by: Stanislav Fomichev Fixes: 99e3a236dd43 ("xsk: Add missing check on user supplied headroom size") Signed-off-by: Maciej Fijalkowski Link: https://patch.msgid.link/20260402154958.562179-2-maciej.fijalkowski@intel.com Signed-off-by: Jakub Kicinski --- net/xdp/xdp_umem.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/xdp/xdp_umem.c b/net/xdp/xdp_umem.c index 066ce07c506d..58da2f4f4397 100644 --- a/net/xdp/xdp_umem.c +++ b/net/xdp/xdp_umem.c @@ -203,7 +203,8 @@ static int xdp_umem_reg(struct xdp_umem *umem, struct xdp_umem_reg *mr) if (!unaligned_chunks && chunks_rem) return -EINVAL; - if (headroom >= chunk_size - XDP_PACKET_HEADROOM) + if (headroom > chunk_size - XDP_PACKET_HEADROOM - + SKB_DATA_ALIGN(sizeof(struct skb_shared_info)) - 128) return -EINVAL; if (mr->flags & XDP_UMEM_TX_METADATA_LEN) { From 1ee1605138fc94cc8f8f273321dd2471c64977f9 Mon Sep 17 00:00:00 2001 From: Maciej Fijalkowski Date: Thu, 2 Apr 2026 17:49:52 +0200 Subject: [PATCH 131/248] xsk: respect tailroom for ZC setups MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Multi-buffer XDP stores information about frags in skb_shared_info that sits at the tailroom of a packet. The storage space is reserved via xdp_data_hard_end(): ((xdp)->data_hard_start + (xdp)->frame_sz - \ SKB_DATA_ALIGN(sizeof(struct skb_shared_info))) and then we refer to it via macro below: static inline struct skb_shared_info * xdp_get_shared_info_from_buff(const struct xdp_buff *xdp) { return (struct skb_shared_info *)xdp_data_hard_end(xdp); } Currently we do not respect this tailroom space in multi-buffer AF_XDP ZC scenario. To address this, introduce xsk_pool_get_tailroom() and use it within xsk_pool_get_rx_frame_size() which is used in ZC drivers to configure length of HW Rx buffer. Typically drivers on Rx Hw buffers side work on 128 byte alignment so let us align the value returned by xsk_pool_get_rx_frame_size() in order to avoid addressing this on driver's side. This addresses the fact that idpf uses mentioned function *before* pool->dev being set so we were at risk that after subtracting tailroom we would not provide 128-byte aligned value to HW. Since xsk_pool_get_rx_frame_size() is actively used in xsk_rcv_check() and __xsk_rcv(), add a variant of this routine that will not include 128 byte alignment and therefore old behavior is preserved. Reviewed-by: Björn Töpel Acked-by: Stanislav Fomichev Fixes: 24ea50127ecf ("xsk: support mbuf on ZC RX") Signed-off-by: Maciej Fijalkowski Link: https://patch.msgid.link/20260402154958.562179-3-maciej.fijalkowski@intel.com Signed-off-by: Jakub Kicinski --- include/net/xdp_sock_drv.h | 23 ++++++++++++++++++++++- net/xdp/xsk.c | 4 ++-- 2 files changed, 24 insertions(+), 3 deletions(-) diff --git a/include/net/xdp_sock_drv.h b/include/net/xdp_sock_drv.h index 6b9ebae2dc95..46797645a0c2 100644 --- a/include/net/xdp_sock_drv.h +++ b/include/net/xdp_sock_drv.h @@ -41,16 +41,37 @@ static inline u32 xsk_pool_get_headroom(struct xsk_buff_pool *pool) return XDP_PACKET_HEADROOM + pool->headroom; } +static inline u32 xsk_pool_get_tailroom(bool mbuf) +{ + return mbuf ? SKB_DATA_ALIGN(sizeof(struct skb_shared_info)) : 0; +} + static inline u32 xsk_pool_get_chunk_size(struct xsk_buff_pool *pool) { return pool->chunk_size; } -static inline u32 xsk_pool_get_rx_frame_size(struct xsk_buff_pool *pool) +static inline u32 __xsk_pool_get_rx_frame_size(struct xsk_buff_pool *pool) { return xsk_pool_get_chunk_size(pool) - xsk_pool_get_headroom(pool); } +static inline u32 xsk_pool_get_rx_frame_size(struct xsk_buff_pool *pool) +{ + u32 frame_size = __xsk_pool_get_rx_frame_size(pool); + struct xdp_umem *umem = pool->umem; + bool mbuf; + + /* Reserve tailroom only for zero-copy pools that opted into + * multi-buffer. The reserved area is used for skb_shared_info, + * matching the XDP core's xdp_data_hard_end() layout. + */ + mbuf = pool->dev && (umem->flags & XDP_UMEM_SG_FLAG); + frame_size -= xsk_pool_get_tailroom(mbuf); + + return ALIGN_DOWN(frame_size, 128); +} + static inline u32 xsk_pool_get_rx_frag_step(struct xsk_buff_pool *pool) { return pool->unaligned ? 0 : xsk_pool_get_chunk_size(pool); diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c index 6149f6a79897..c8ef9e427c9c 100644 --- a/net/xdp/xsk.c +++ b/net/xdp/xsk.c @@ -239,7 +239,7 @@ static u32 xsk_copy_xdp(void *to, void **from, u32 to_len, static int __xsk_rcv(struct xdp_sock *xs, struct xdp_buff *xdp, u32 len) { - u32 frame_size = xsk_pool_get_rx_frame_size(xs->pool); + u32 frame_size = __xsk_pool_get_rx_frame_size(xs->pool); void *copy_from = xsk_copy_xdp_start(xdp), *copy_to; u32 from_len, meta_len, rem, num_desc; struct xdp_buff_xsk *xskb; @@ -338,7 +338,7 @@ static int xsk_rcv_check(struct xdp_sock *xs, struct xdp_buff *xdp, u32 len) if (xs->dev != xdp->rxq->dev || xs->queue_id != xdp->rxq->queue_index) return -EINVAL; - if (len > xsk_pool_get_rx_frame_size(xs->pool) && !xs->sg) { + if (len > __xsk_pool_get_rx_frame_size(xs->pool) && !xs->sg) { xs->rx_dropped++; return -ENOSPC; } From 93e84fe45b752d17a5a46b306ed78f0133bbc719 Mon Sep 17 00:00:00 2001 From: Maciej Fijalkowski Date: Thu, 2 Apr 2026 17:49:53 +0200 Subject: [PATCH 132/248] xsk: fix XDP_UMEM_SG_FLAG issues MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently xp_assign_dev_shared() is missing XDP_USE_SG being propagated to flags so set it in order to preserve mtu check that is supposed to be done only when no multi-buffer setup is in picture. Also, this flag has the same value as XDP_UMEM_TX_SW_CSUM so we could get unexpected SG setups for software Tx checksums. Since csum flag is UAPI, modify value of XDP_UMEM_SG_FLAG. Fixes: d609f3d228a8 ("xsk: add multi-buffer support for sockets sharing umem") Reviewed-by: Björn Töpel Signed-off-by: Maciej Fijalkowski Link: https://patch.msgid.link/20260402154958.562179-4-maciej.fijalkowski@intel.com Signed-off-by: Jakub Kicinski --- include/net/xdp_sock.h | 2 +- net/xdp/xsk_buff_pool.c | 4 ++++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/include/net/xdp_sock.h b/include/net/xdp_sock.h index 23e8861e8b25..ebac60a3d8a1 100644 --- a/include/net/xdp_sock.h +++ b/include/net/xdp_sock.h @@ -14,7 +14,7 @@ #include #include -#define XDP_UMEM_SG_FLAG (1 << 1) +#define XDP_UMEM_SG_FLAG BIT(3) struct net_device; struct xsk_queue; diff --git a/net/xdp/xsk_buff_pool.c b/net/xdp/xsk_buff_pool.c index 37b7a68b89b3..729602a3cec0 100644 --- a/net/xdp/xsk_buff_pool.c +++ b/net/xdp/xsk_buff_pool.c @@ -247,6 +247,10 @@ int xp_assign_dev_shared(struct xsk_buff_pool *pool, struct xdp_sock *umem_xs, struct xdp_umem *umem = umem_xs->umem; flags = umem->zc ? XDP_ZEROCOPY : XDP_COPY; + + if (umem->flags & XDP_UMEM_SG_FLAG) + flags |= XDP_USE_SG; + if (umem_xs->pool->uses_need_wakeup) flags |= XDP_USE_NEED_WAKEUP; From 36ee60b569ba0dfb6f961333b90d19ab5b323fa9 Mon Sep 17 00:00:00 2001 From: Maciej Fijalkowski Date: Thu, 2 Apr 2026 17:49:54 +0200 Subject: [PATCH 133/248] xsk: validate MTU against usable frame size on bind MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit AF_XDP bind currently accepts zero-copy pool configurations without verifying that the device MTU fits into the usable frame space provided by the UMEM chunk. This becomes a problem since we started to respect tailroom which is subtracted from chunk_size (among with headroom). 2k chunk size might not provide enough space for standard 1500 MTU, so let us catch such settings at bind time. Furthermore, validate whether underlying HW will be able to satisfy configured MTU wrt XSK's frame size multiplied by supported Rx buffer chain length (that is exposed via net_device::xdp_zc_max_segs). Fixes: 24ea50127ecf ("xsk: support mbuf on ZC RX") Reviewed-by: Björn Töpel Signed-off-by: Maciej Fijalkowski Link: https://patch.msgid.link/20260402154958.562179-5-maciej.fijalkowski@intel.com Signed-off-by: Jakub Kicinski --- net/xdp/xsk_buff_pool.c | 28 +++++++++++++++++++++++++--- 1 file changed, 25 insertions(+), 3 deletions(-) diff --git a/net/xdp/xsk_buff_pool.c b/net/xdp/xsk_buff_pool.c index 729602a3cec0..cd7bc50872f6 100644 --- a/net/xdp/xsk_buff_pool.c +++ b/net/xdp/xsk_buff_pool.c @@ -10,6 +10,8 @@ #include "xdp_umem.h" #include "xsk.h" +#define ETH_PAD_LEN (ETH_HLEN + 2 * VLAN_HLEN + ETH_FCS_LEN) + void xp_add_xsk(struct xsk_buff_pool *pool, struct xdp_sock *xs) { if (!xs->tx) @@ -157,8 +159,12 @@ static void xp_disable_drv_zc(struct xsk_buff_pool *pool) int xp_assign_dev(struct xsk_buff_pool *pool, struct net_device *netdev, u16 queue_id, u16 flags) { + u32 needed = netdev->mtu + ETH_PAD_LEN; + u32 segs = netdev->xdp_zc_max_segs; + bool mbuf = flags & XDP_USE_SG; bool force_zc, force_copy; struct netdev_bpf bpf; + u32 frame_size; int err = 0; ASSERT_RTNL(); @@ -178,7 +184,7 @@ int xp_assign_dev(struct xsk_buff_pool *pool, if (err) return err; - if (flags & XDP_USE_SG) + if (mbuf) pool->umem->flags |= XDP_UMEM_SG_FLAG; if (flags & XDP_USE_NEED_WAKEUP) @@ -200,8 +206,24 @@ int xp_assign_dev(struct xsk_buff_pool *pool, goto err_unreg_pool; } - if (netdev->xdp_zc_max_segs == 1 && (flags & XDP_USE_SG)) { - err = -EOPNOTSUPP; + if (mbuf) { + if (segs == 1) { + err = -EOPNOTSUPP; + goto err_unreg_pool; + } + } else { + segs = 1; + } + + /* open-code xsk_pool_get_rx_frame_size() as pool->dev is not + * set yet at this point; we are before getting down to driver + */ + frame_size = __xsk_pool_get_rx_frame_size(pool) - + xsk_pool_get_tailroom(mbuf); + frame_size = ALIGN_DOWN(frame_size, 128); + + if (needed > frame_size * segs) { + err = -EINVAL; goto err_unreg_pool; } From c5866a6be47207b05c6eead6292c8551c8a91669 Mon Sep 17 00:00:00 2001 From: Maciej Fijalkowski Date: Thu, 2 Apr 2026 17:49:55 +0200 Subject: [PATCH 134/248] selftests: bpf: introduce a common routine for reading procfs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Parametrize current way of getting MAX_SKB_FRAGS value from {sys,proc}fs so that it can be re-used to get cache line size of system's CPU. All that just to mimic and compute size of kernel's struct skb_shared_info which for xsk and test suite interpret as tailroom. Introduce two variables to ifobject struct that will carry count of skb frags and tailroom size. Do the reading and computing once, at the beginning of test suite execution in xskxceiver, but for test_progs such way is not possible as in this environment each test setups and torns down ifobject structs. Reviewed-by: Björn Töpel Signed-off-by: Maciej Fijalkowski Link: https://patch.msgid.link/20260402154958.562179-6-maciej.fijalkowski@intel.com Signed-off-by: Jakub Kicinski --- .../selftests/bpf/prog_tests/test_xsk.c | 25 +------------------ .../selftests/bpf/prog_tests/test_xsk.h | 23 +++++++++++++++++ tools/testing/selftests/bpf/prog_tests/xsk.c | 19 ++++++++++++++ tools/testing/selftests/bpf/xskxceiver.c | 23 +++++++++++++++++ 4 files changed, 66 insertions(+), 24 deletions(-) diff --git a/tools/testing/selftests/bpf/prog_tests/test_xsk.c b/tools/testing/selftests/bpf/prog_tests/test_xsk.c index 7e38ec6e656b..62118ffba661 100644 --- a/tools/testing/selftests/bpf/prog_tests/test_xsk.c +++ b/tools/testing/selftests/bpf/prog_tests/test_xsk.c @@ -179,25 +179,6 @@ int xsk_configure_socket(struct xsk_socket_info *xsk, struct xsk_umem_info *umem return xsk_socket__create(&xsk->xsk, ifobject->ifindex, 0, umem->umem, rxr, txr, &cfg); } -#define MAX_SKB_FRAGS_PATH "/proc/sys/net/core/max_skb_frags" -static unsigned int get_max_skb_frags(void) -{ - unsigned int max_skb_frags = 0; - FILE *file; - - file = fopen(MAX_SKB_FRAGS_PATH, "r"); - if (!file) { - ksft_print_msg("Error opening %s\n", MAX_SKB_FRAGS_PATH); - return 0; - } - - if (fscanf(file, "%u", &max_skb_frags) != 1) - ksft_print_msg("Error reading %s\n", MAX_SKB_FRAGS_PATH); - - fclose(file); - return max_skb_frags; -} - static int set_ring_size(struct ifobject *ifobj) { int ret; @@ -2242,11 +2223,7 @@ int testapp_too_many_frags(struct test_spec *test) if (test->mode == TEST_MODE_ZC) { max_frags = test->ifobj_tx->xdp_zc_max_segs; } else { - max_frags = get_max_skb_frags(); - if (!max_frags) { - ksft_print_msg("Can't get MAX_SKB_FRAGS from system, using default (17)\n"); - max_frags = 17; - } + max_frags = test->ifobj_tx->max_skb_frags; max_frags += 1; } diff --git a/tools/testing/selftests/bpf/prog_tests/test_xsk.h b/tools/testing/selftests/bpf/prog_tests/test_xsk.h index 8fc78a057de0..1ab8aee4ce56 100644 --- a/tools/testing/selftests/bpf/prog_tests/test_xsk.h +++ b/tools/testing/selftests/bpf/prog_tests/test_xsk.h @@ -31,6 +31,9 @@ #define SOCK_RECONF_CTR 10 #define USLEEP_MAX 10000 +#define MAX_SKB_FRAGS_PATH "/proc/sys/net/core/max_skb_frags" +#define SMP_CACHE_BYTES_PATH "/sys/devices/system/cpu/cpu0/cache/index0/coherency_line_size" + extern bool opt_verbose; #define print_verbose(x...) do { if (opt_verbose) ksft_print_msg(x); } while (0) @@ -45,6 +48,24 @@ static inline u64 ceil_u64(u64 a, u64 b) return (a + b - 1) / b; } +static inline unsigned int read_procfs_val(const char *path) +{ + unsigned int read_val = 0; + FILE *file; + + file = fopen(path, "r"); + if (!file) { + ksft_print_msg("Error opening %s\n", path); + return 0; + } + + if (fscanf(file, "%u", &read_val) != 1) + ksft_print_msg("Error reading %s\n", path); + + fclose(file); + return read_val; +} + /* Simple test */ enum test_mode { TEST_MODE_SKB, @@ -115,6 +136,8 @@ struct ifobject { int mtu; u32 bind_flags; u32 xdp_zc_max_segs; + u32 umem_tailroom; + u32 max_skb_frags; bool tx_on; bool rx_on; bool use_poll; diff --git a/tools/testing/selftests/bpf/prog_tests/xsk.c b/tools/testing/selftests/bpf/prog_tests/xsk.c index dd4c35c0e428..6e2f63ee2a6c 100644 --- a/tools/testing/selftests/bpf/prog_tests/xsk.c +++ b/tools/testing/selftests/bpf/prog_tests/xsk.c @@ -62,6 +62,7 @@ int configure_ifobj(struct ifobject *tx, struct ifobject *rx) static void test_xsk(const struct test_spec *test_to_run, enum test_mode mode) { + u32 max_frags, umem_tailroom, cache_line_size; struct ifobject *ifobj_tx, *ifobj_rx; struct test_spec test; int ret; @@ -84,6 +85,24 @@ static void test_xsk(const struct test_spec *test_to_run, enum test_mode mode) ifobj_tx->set_ring.default_rx = ifobj_tx->ring.rx_pending; } + cache_line_size = read_procfs_val(SMP_CACHE_BYTES_PATH); + if (!cache_line_size) + cache_line_size = 64; + + max_frags = read_procfs_val(MAX_SKB_FRAGS_PATH); + if (!max_frags) + max_frags = 17; + + ifobj_tx->max_skb_frags = max_frags; + ifobj_rx->max_skb_frags = max_frags; + + /* 48 bytes is a part of skb_shared_info w/o frags array; + * 16 bytes is sizeof(skb_frag_t) + */ + umem_tailroom = ALIGN(48 + (max_frags * 16), cache_line_size); + ifobj_tx->umem_tailroom = umem_tailroom; + ifobj_rx->umem_tailroom = umem_tailroom; + if (!ASSERT_OK(init_iface(ifobj_rx, worker_testapp_validate_rx), "init RX")) goto delete_rx; if (!ASSERT_OK(init_iface(ifobj_tx, worker_testapp_validate_tx), "init TX")) diff --git a/tools/testing/selftests/bpf/xskxceiver.c b/tools/testing/selftests/bpf/xskxceiver.c index 05b3cebc5ca9..7dad8556a722 100644 --- a/tools/testing/selftests/bpf/xskxceiver.c +++ b/tools/testing/selftests/bpf/xskxceiver.c @@ -80,6 +80,7 @@ #include #include #include +#include #include #include #include @@ -333,6 +334,7 @@ static void print_tests(void) int main(int argc, char **argv) { const size_t total_tests = ARRAY_SIZE(tests) + ARRAY_SIZE(ci_skip_tests); + u32 cache_line_size, max_frags, umem_tailroom; struct pkt_stream *rx_pkt_stream_default; struct pkt_stream *tx_pkt_stream_default; struct ifobject *ifobj_tx, *ifobj_rx; @@ -354,6 +356,27 @@ int main(int argc, char **argv) setlocale(LC_ALL, ""); + cache_line_size = read_procfs_val(SMP_CACHE_BYTES_PATH); + if (!cache_line_size) { + ksft_print_msg("Can't get SMP_CACHE_BYTES from system, using default (64)\n"); + cache_line_size = 64; + } + + max_frags = read_procfs_val(MAX_SKB_FRAGS_PATH); + if (!max_frags) { + ksft_print_msg("Can't get MAX_SKB_FRAGS from system, using default (17)\n"); + max_frags = 17; + } + ifobj_tx->max_skb_frags = max_frags; + ifobj_rx->max_skb_frags = max_frags; + + /* 48 bytes is a part of skb_shared_info w/o frags array; + * 16 bytes is sizeof(skb_frag_t) + */ + umem_tailroom = ALIGN(48 + (max_frags * 16), cache_line_size); + ifobj_tx->umem_tailroom = umem_tailroom; + ifobj_rx->umem_tailroom = umem_tailroom; + parse_command_line(ifobj_tx, ifobj_rx, argc, argv); if (opt_print_tests) { From 3197c51ce2fa00410f86f4829e4f9223553632f6 Mon Sep 17 00:00:00 2001 From: Maciej Fijalkowski Date: Thu, 2 Apr 2026 17:49:56 +0200 Subject: [PATCH 135/248] selftests: bpf: fix pkt grow tests MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Skip tail adjust tests in xskxceiver for SKB mode as it is not very friendly for it. multi-buffer case does not work as xdp_rxq_info that is registered for generic XDP does not report ::frag_size. The non-mbuf path copies packet via skb_pp_cow_data() which only accounts for headroom, leaving us with no tailroom and causing underlying XDP prog to drop packets therefore. For multi-buffer test on other modes, change the amount of bytes we use for growth, assume worst-case scenario and take care of headroom and tailroom. Reviewed-by: Björn Töpel Signed-off-by: Maciej Fijalkowski Link: https://patch.msgid.link/20260402154958.562179-7-maciej.fijalkowski@intel.com Signed-off-by: Jakub Kicinski --- .../selftests/bpf/prog_tests/test_xsk.c | 24 ++++++++++++++++--- 1 file changed, 21 insertions(+), 3 deletions(-) diff --git a/tools/testing/selftests/bpf/prog_tests/test_xsk.c b/tools/testing/selftests/bpf/prog_tests/test_xsk.c index 62118ffba661..ee60bcc22ee4 100644 --- a/tools/testing/selftests/bpf/prog_tests/test_xsk.c +++ b/tools/testing/selftests/bpf/prog_tests/test_xsk.c @@ -2528,16 +2528,34 @@ int testapp_adjust_tail_shrink_mb(struct test_spec *test) int testapp_adjust_tail_grow(struct test_spec *test) { + if (test->mode == TEST_MODE_SKB) + return TEST_SKIP; + /* Grow by 4 bytes for testing purpose */ return testapp_adjust_tail(test, 4, MIN_PKT_SIZE * 2); } int testapp_adjust_tail_grow_mb(struct test_spec *test) { + u32 grow_size; + + if (test->mode == TEST_MODE_SKB) + return TEST_SKIP; + + /* worst case scenario is when underlying setup will work on 3k + * buffers, let us account for it; given that we will use 6k as + * pkt_len, expect that it will be broken down to 2 descs each + * with 3k payload; + * + * 4k is truesize, 3k payload, 256 HR, 320 TR; + */ + grow_size = XSK_UMEM__MAX_FRAME_SIZE - + XSK_UMEM__LARGE_FRAME_SIZE - + XDP_PACKET_HEADROOM - + test->ifobj_tx->umem_tailroom; test->mtu = MAX_ETH_JUMBO_SIZE; - /* Grow by (frag_size - last_frag_Size) - 1 to stay inside the last fragment */ - return testapp_adjust_tail(test, (XSK_UMEM__MAX_FRAME_SIZE / 2) - 1, - XSK_UMEM__LARGE_FRAME_SIZE * 2); + + return testapp_adjust_tail(test, grow_size, XSK_UMEM__LARGE_FRAME_SIZE * 2); } int testapp_tx_queue_consumer(struct test_spec *test) From 16546954e117e65661109e788682891b15d4e3ce Mon Sep 17 00:00:00 2001 From: Maciej Fijalkowski Date: Thu, 2 Apr 2026 17:49:57 +0200 Subject: [PATCH 136/248] selftests: bpf: have a separate variable for drop test MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently two different XDP programs share a static variable for different purposes (picking where to redirect on shared umem test & whether to drop a packet). This can be a problem when running full test suite - idx can be written by shared umem test and this value can cause a false behavior within XDP drop half test. Introduce a dedicated variable for drop half test so that these two don't step on each other toes. There is no real need for using __sync_fetch_and_add here as XSK tests are executed on single CPU. Reviewed-by: Björn Töpel Signed-off-by: Maciej Fijalkowski Link: https://patch.msgid.link/20260402154958.562179-8-maciej.fijalkowski@intel.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/bpf/progs/xsk_xdp_progs.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/bpf/progs/xsk_xdp_progs.c b/tools/testing/selftests/bpf/progs/xsk_xdp_progs.c index 683306db8594..023d8befd4ca 100644 --- a/tools/testing/selftests/bpf/progs/xsk_xdp_progs.c +++ b/tools/testing/selftests/bpf/progs/xsk_xdp_progs.c @@ -26,8 +26,10 @@ SEC("xdp.frags") int xsk_def_prog(struct xdp_md *xdp) SEC("xdp.frags") int xsk_xdp_drop(struct xdp_md *xdp) { + static unsigned int drop_idx; + /* Drop every other packet */ - if (idx++ % 2) + if (drop_idx++ % 2) return XDP_DROP; return bpf_redirect_map(&xsk, 0, XDP_DROP); From 62838e363e4f0753d43b2b78e9d3f6ad3c9102ec Mon Sep 17 00:00:00 2001 From: Maciej Fijalkowski Date: Thu, 2 Apr 2026 17:49:58 +0200 Subject: [PATCH 137/248] selftests: bpf: adjust rx_dropped xskxceiver's test to respect tailroom MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Since we have changed how big user defined headroom in umem can be, change the logic in testapp_stats_rx_dropped() so we pass updated headroom validation in xdp_umem_reg() and still drop half of frames. Test works on non-mbuf setup so __xsk_pool_get_rx_frame_size() that is called on xsk_rcv_check() will not account skb_shared_info size. Taking the tailroom size into account in test being fixed is needed as xdp_umem_reg() defaults to respect it. Reviewed-by: Björn Töpel Signed-off-by: Maciej Fijalkowski Link: https://patch.msgid.link/20260402154958.562179-9-maciej.fijalkowski@intel.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/bpf/prog_tests/test_xsk.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/bpf/prog_tests/test_xsk.c b/tools/testing/selftests/bpf/prog_tests/test_xsk.c index ee60bcc22ee4..7950c504ed28 100644 --- a/tools/testing/selftests/bpf/prog_tests/test_xsk.c +++ b/tools/testing/selftests/bpf/prog_tests/test_xsk.c @@ -1959,15 +1959,17 @@ int testapp_headroom(struct test_spec *test) int testapp_stats_rx_dropped(struct test_spec *test) { + u32 umem_tr = test->ifobj_tx->umem_tailroom; + if (test->mode == TEST_MODE_ZC) { ksft_print_msg("Can not run RX_DROPPED test for ZC mode\n"); return TEST_SKIP; } - if (pkt_stream_replace_half(test, MIN_PKT_SIZE * 4, 0)) + if (pkt_stream_replace_half(test, (MIN_PKT_SIZE * 3) + umem_tr, 0)) return TEST_FAILURE; test->ifobj_rx->umem->frame_headroom = test->ifobj_rx->umem->frame_size - - XDP_PACKET_HEADROOM - MIN_PKT_SIZE * 3; + XDP_PACKET_HEADROOM - (MIN_PKT_SIZE * 2) - umem_tr; if (pkt_stream_receive_half(test)) return TEST_FAILURE; test->ifobj_rx->validation_func = validate_rx_dropped; From 24ad7ff668896325591fa0b570f2cca6c55f136f Mon Sep 17 00:00:00 2001 From: Stefano Garzarella Date: Fri, 3 Apr 2026 11:32:51 +0200 Subject: [PATCH 138/248] vsock/test: fix send_buf()/recv_buf() EINTR handling When send() or recv() returns -1 with errno == EINTR, the code skips the break but still adds the return value to nwritten/nread, making it decrease by 1. This leads to wrong buffer offsets and wrong bytes count. Fix it by explicitly continuing the loop on EINTR, so the return value is only added when it is positive. Fixes: a8ed71a27ef5 ("vsock/test: add recv_buf() utility function") Fixes: 12329bd51fdc ("vsock/test: add send_buf() utility function") Signed-off-by: Stefano Garzarella Reviewed-by: Luigi Leonardi Link: https://patch.msgid.link/20260403093251.30662-1-sgarzare@redhat.com Signed-off-by: Jakub Kicinski --- tools/testing/vsock/util.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/tools/testing/vsock/util.c b/tools/testing/vsock/util.c index 9430ef5b8bc3..1fe1338c79cd 100644 --- a/tools/testing/vsock/util.c +++ b/tools/testing/vsock/util.c @@ -344,7 +344,9 @@ void send_buf(int fd, const void *buf, size_t len, int flags, ret = send(fd, buf + nwritten, len - nwritten, flags); timeout_check("send"); - if (ret == 0 || (ret < 0 && errno != EINTR)) + if (ret < 0 && errno == EINTR) + continue; + if (ret <= 0) break; nwritten += ret; @@ -396,7 +398,9 @@ void recv_buf(int fd, void *buf, size_t len, int flags, ssize_t expected_ret) ret = recv(fd, buf + nread, len - nread, flags); timeout_check("recv"); - if (ret == 0 || (ret < 0 && errno != EINTR)) + if (ret < 0 && errno == EINTR) + continue; + if (ret <= 0) break; nread += ret; From 0f42e3f4fe2a58394e37241d02d9ca6ab7b7d516 Mon Sep 17 00:00:00 2001 From: Jiayuan Chen Date: Fri, 3 Apr 2026 09:45:12 +0800 Subject: [PATCH 139/248] net: skb: fix cross-cache free of KFENCE-allocated skb head SKB_SMALL_HEAD_CACHE_SIZE is intentionally set to a non-power-of-2 value (e.g. 704 on x86_64) to avoid collisions with generic kmalloc bucket sizes. This ensures that skb_kfree_head() can reliably use skb_end_offset to distinguish skb heads allocated from skb_small_head_cache vs. generic kmalloc caches. However, when KFENCE is enabled, kfence_ksize() returns the exact requested allocation size instead of the slab bucket size. If a caller (e.g. bpf_test_init) allocates skb head data via kzalloc() and the requested size happens to equal SKB_SMALL_HEAD_CACHE_SIZE, then slab_build_skb() -> ksize() returns that exact value. After subtracting skb_shared_info overhead, skb_end_offset ends up matching SKB_SMALL_HEAD_HEADROOM, causing skb_kfree_head() to incorrectly free the object to skb_small_head_cache instead of back to the original kmalloc cache, resulting in a slab cross-cache free: kmem_cache_free(skbuff_small_head): Wrong slab cache. Expected skbuff_small_head but got kmalloc-1k Fix this by always calling kfree(head) in skb_kfree_head(). This keeps the free path generic and avoids allocator-specific misclassification for KFENCE objects. Fixes: bf9f1baa279f ("net: add dedicated kmem_cache for typical/small skb->head") Reported-by: Antonius Closes: https://lore.kernel.org/netdev/CAK8a0jxC5L5N7hq-DT2_NhUyjBxrPocoiDazzsBk4TGgT1r4-A@mail.gmail.com/ Signed-off-by: Jiayuan Chen Reviewed-by: Eric Dumazet Link: https://patch.msgid.link/20260403014517.142550-1-jiayuan.chen@linux.dev Signed-off-by: Jakub Kicinski --- net/core/skbuff.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 0e217041958a..43ee86dcf2ea 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -1083,10 +1083,7 @@ static int skb_pp_frag_ref(struct sk_buff *skb) static void skb_kfree_head(void *head, unsigned int end_offset) { - if (end_offset == SKB_SMALL_HEAD_HEADROOM) - kmem_cache_free(net_hotdata.skb_small_head_cache, head); - else - kfree(head); + kfree(head); } static void skb_free_head(struct sk_buff *skb) From a9d4f4f6e65e0bf9bbddedecc84d67249991979c Mon Sep 17 00:00:00 2001 From: Michael Guralnik Date: Fri, 3 Apr 2026 12:17:56 +0300 Subject: [PATCH 140/248] net/mlx5: Update the list of the PCI supported devices Add the upcoming ConnectX-10 NVLink-C2C device ID to the table of supported PCI device IDs. Cc: stable@vger.kernel.org Signed-off-by: Michael Guralnik Reviewed-by: Patrisious Haddad Signed-off-by: Tariq Toukan Reviewed-by: Simon Horman Link: https://patch.msgid.link/20260403091756.139583-1-tariqt@nvidia.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlx5/core/main.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index fdc3ba20912e..3f73d9b1115d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -2267,6 +2267,7 @@ static const struct pci_device_id mlx5_core_pci_table[] = { { PCI_VDEVICE(MELLANOX, 0x1023) }, /* ConnectX-8 */ { PCI_VDEVICE(MELLANOX, 0x1025) }, /* ConnectX-9 */ { PCI_VDEVICE(MELLANOX, 0x1027) }, /* ConnectX-10 */ + { PCI_VDEVICE(MELLANOX, 0x2101) }, /* ConnectX-10 NVLink-C2C */ { PCI_VDEVICE(MELLANOX, 0xa2d2) }, /* BlueField integrated ConnectX-5 network controller */ { PCI_VDEVICE(MELLANOX, 0xa2d3), MLX5_PCI_DEV_IS_VF}, /* BlueField integrated ConnectX-5 network controller VF */ { PCI_VDEVICE(MELLANOX, 0xa2d6) }, /* BlueField-2 integrated ConnectX-6 Dx network controller */ From e2a39d1a88f00ed83ebc7a19b7673d4ffd50b173 Mon Sep 17 00:00:00 2001 From: Zide Chen Date: Fri, 13 Mar 2026 10:40:47 -0700 Subject: [PATCH 141/248] perf/x86/intel/uncore: Fix iounmap() leak on global_init failure Kernel test robot reported: Unverified Error/Warning (likely false positive, kindly check if interested): arch/x86/events/intel/uncore_discovery.c:293:2-8: ERROR: missing iounmap; ioremap on line 288 and execution via conditional on line 292 If domain->global_init() fails in __parse_discovery_table(), the ioremap'ed MMIO region is not released before returning, resulting in an MMIO mapping leak. Fixes: b575fc0e3357 ("perf/x86/intel/uncore: Add domain global init callback") Reported-by: kernel test robot Signed-off-by: Zide Chen Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Dapeng Mi Link: https://patch.msgid.link/20260313174050.171704-2-zide.chen@intel.com --- arch/x86/events/intel/uncore_discovery.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/arch/x86/events/intel/uncore_discovery.c b/arch/x86/events/intel/uncore_discovery.c index 12e259a988dd..427939be2fb4 100644 --- a/arch/x86/events/intel/uncore_discovery.c +++ b/arch/x86/events/intel/uncore_discovery.c @@ -264,6 +264,7 @@ static int __parse_discovery_table(struct uncore_discovery_domain *domain, struct uncore_unit_discovery unit; void __iomem *io_addr; unsigned long size; + int ret = 0; int i; size = UNCORE_DISCOVERY_GLOBAL_MAP_SIZE; @@ -273,21 +274,23 @@ static int __parse_discovery_table(struct uncore_discovery_domain *domain, /* Read Global Discovery State */ memcpy_fromio(&global, io_addr, sizeof(struct uncore_global_discovery)); + iounmap(io_addr); + if (uncore_discovery_invalid_unit(global)) { pr_info("Invalid Global Discovery State: 0x%llx 0x%llx 0x%llx\n", global.table1, global.ctl, global.table3); - iounmap(io_addr); return -EINVAL; } - iounmap(io_addr); size = (1 + global.max_units) * global.stride * 8; io_addr = ioremap(addr, size); if (!io_addr) return -ENOMEM; - if (domain->global_init && domain->global_init(global.ctl)) - return -ENODEV; + if (domain->global_init && domain->global_init(global.ctl)) { + ret = -ENODEV; + goto out; + } /* Parsing Unit Discovery State */ for (i = 0; i < global.max_units; i++) { @@ -307,8 +310,10 @@ static int __parse_discovery_table(struct uncore_discovery_domain *domain, } *parsed = true; + +out: iounmap(io_addr); - return 0; + return ret; } static int parse_discovery_table(struct uncore_discovery_domain *domain, From 7b568e9eba2fad89a696f22f0413d44cf4a1f892 Mon Sep 17 00:00:00 2001 From: Zide Chen Date: Fri, 13 Mar 2026 10:40:48 -0700 Subject: [PATCH 142/248] perf/x86/intel/uncore: Skip discovery table for offline dies This warning can be triggered if NUMA is disabled and the system boots with fewer CPUs than the number of CPUs in die 0. WARNING: CPU: 9 PID: 7257 at uncore.c:1157 uncore_pci_pmu_register+0x136/0x160 [intel_uncore] Currently, the discovery table continues to be parsed even if all CPUs in the associated die are offline. This can lead to an array overflow at "pmu->boxes[die] = box" in uncore_pci_pmu_register(), which may trigger the warning above or cause other issues. Fixes: edae1f06c2cd ("perf/x86/intel/uncore: Parse uncore discovery tables") Reported-by: Steve Wahl Signed-off-by: Zide Chen Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Dapeng Mi Tested-by: Steve Wahl Link: https://patch.msgid.link/20260313174050.171704-3-zide.chen@intel.com --- arch/x86/events/intel/uncore_discovery.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/events/intel/uncore_discovery.c b/arch/x86/events/intel/uncore_discovery.c index 427939be2fb4..583cbd06b9b8 100644 --- a/arch/x86/events/intel/uncore_discovery.c +++ b/arch/x86/events/intel/uncore_discovery.c @@ -371,7 +371,7 @@ static bool uncore_discovery_pci(struct uncore_discovery_domain *domain) (val & UNCORE_DISCOVERY_DVSEC2_BIR_MASK) * UNCORE_DISCOVERY_BIR_STEP; die = get_device_die_id(dev); - if (die < 0) + if ((die < 0) || (die >= uncore_max_dies())) continue; parse_discovery_table(domain, dev, die, bar_offset, &parsed); From a16d1ec4dd0cdcf689f324adde6067083bce9099 Mon Sep 17 00:00:00 2001 From: Zide Chen Date: Fri, 13 Mar 2026 10:40:49 -0700 Subject: [PATCH 143/248] perf/x86/intel/uncore: Fix die ID init and look up bugs In snbep_pci2phy_map_init(), in the nr_node_ids > 8 path, uncore_device_to_die() may return -1 when all CPUs associated with the UBOX device are offline. Remove the WARN_ON_ONCE(die_id == -1) check for two reasons: - The current code breaks out of the loop. This is incorrect because pci_get_device() does not guarantee iteration in domain or bus order, so additional UBOX devices may be skipped during the scan. - Returning -EINVAL is incorrect, since marking offline buses with die_id == -1 is expected and should not be treated as an error. Separately, when NUMA is disabled on a NUMA-capable platform, pcibus_to_node() returns NUMA_NO_NODE, causing uncore_device_to_die() to return -1 for all PCI devices. As a result, spr_update_device_location(), used on Intel SPR and EMR, ignores the corresponding PMON units and does not add them to the RB tree. Fix this by using uncore_pcibus_to_dieid(), which retrieves topology from the UBOX GIDNIDMAP register and works regardless of whether NUMA is enabled in Linux. This requires snbep_pci2phy_map_init() to be added in spr_uncore_pci_init(). Keep uncore_device_to_die() only for the nr_node_ids > 8 case, where NUMA is expected to be enabled. Fixes: 9a7832ce3d92 ("perf/x86/intel/uncore: With > 8 nodes, get pci bus die id from NUMA info") Fixes: 65248a9a9ee1 ("perf/x86/uncore: Add a quirk for UPI on SPR") Signed-off-by: Zide Chen Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Dapeng Mi Tested-by: Steve Wahl Link: https://patch.msgid.link/20260313174050.171704-4-zide.chen@intel.com --- arch/x86/events/intel/uncore.c | 1 + arch/x86/events/intel/uncore_snbep.c | 13 ++++++------- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/arch/x86/events/intel/uncore.c b/arch/x86/events/intel/uncore.c index 786bd51a0d89..e9cc1ba921c5 100644 --- a/arch/x86/events/intel/uncore.c +++ b/arch/x86/events/intel/uncore.c @@ -67,6 +67,7 @@ int uncore_die_to_segment(int die) return bus ? pci_domain_nr(bus) : -EINVAL; } +/* Note: This API can only be used when NUMA information is available. */ int uncore_device_to_die(struct pci_dev *dev) { int node = pcibus_to_node(dev->bus); diff --git a/arch/x86/events/intel/uncore_snbep.c b/arch/x86/events/intel/uncore_snbep.c index 0a1d08136cc1..a4b337bf83c0 100644 --- a/arch/x86/events/intel/uncore_snbep.c +++ b/arch/x86/events/intel/uncore_snbep.c @@ -1459,13 +1459,7 @@ static int snbep_pci2phy_map_init(int devid, int nodeid_loc, int idmap_loc, bool } map->pbus_to_dieid[bus] = die_id = uncore_device_to_die(ubox_dev); - raw_spin_unlock(&pci2phy_map_lock); - - if (WARN_ON_ONCE(die_id == -1)) { - err = -EINVAL; - break; - } } } @@ -6420,7 +6414,7 @@ static void spr_update_device_location(int type_id) while ((dev = pci_get_device(PCI_VENDOR_ID_INTEL, device, dev)) != NULL) { - die = uncore_device_to_die(dev); + die = uncore_pcibus_to_dieid(dev->bus); if (die < 0) continue; @@ -6444,6 +6438,11 @@ static void spr_update_device_location(int type_id) int spr_uncore_pci_init(void) { + int ret = snbep_pci2phy_map_init(0x3250, SKX_CPUNODEID, SKX_GIDNIDMAP, true); + + if (ret) + return ret; + /* * The discovery table of UPI on some SPR variant is broken, * which impacts the detection of both UPI and M3UPI uncore PMON. From 16bcbe6738bea7b4aee0a29324ce12c21c4b0ea0 Mon Sep 17 00:00:00 2001 From: Zide Chen Date: Fri, 13 Mar 2026 10:40:50 -0700 Subject: [PATCH 144/248] perf/x86/intel/uncore: Remove extra double quote mark The third argument in INTEL_UNCORE_FR_EVENT_DESC() is subject to __stringify(), and the extra double quote marks can result in the expansion "3.814697266e-6" in the sysfs knobs, instead of 3.814697266e-6. This is incorrect, though it may still work for perf, e.g. perf stat -e uncore_iio_free_running_0/bw_in_port0/ Fixes: d8987048f665 ("perf/x86/intel/uncore: Support IIO free-running counters on DMR") Closes: https://lore.kernel.org/all/20251231224233.113839-1-zide.chen@intel.com/ Reported-by: Chun-Tse Shao Signed-off-by: Zide Chen Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Chun-Tse Shao Reviewed-by: Dapeng Mi Link: https://patch.msgid.link/20260313174050.171704-5-zide.chen@intel.com --- arch/x86/events/intel/uncore_snbep.c | 48 ++++++++++++++-------------- 1 file changed, 24 insertions(+), 24 deletions(-) diff --git a/arch/x86/events/intel/uncore_snbep.c b/arch/x86/events/intel/uncore_snbep.c index a4b337bf83c0..215d33e260ed 100644 --- a/arch/x86/events/intel/uncore_snbep.c +++ b/arch/x86/events/intel/uncore_snbep.c @@ -6934,34 +6934,34 @@ static struct freerunning_counters dmr_iio_freerunning[] = { static struct uncore_event_desc dmr_uncore_iio_freerunning_events[] = { /* ITC Free Running Data BW counter for inbound traffic */ - INTEL_UNCORE_FR_EVENT_DESC(inb_data_port0, 0x10, "3.814697266e-6"), - INTEL_UNCORE_FR_EVENT_DESC(inb_data_port1, 0x11, "3.814697266e-6"), - INTEL_UNCORE_FR_EVENT_DESC(inb_data_port2, 0x12, "3.814697266e-6"), - INTEL_UNCORE_FR_EVENT_DESC(inb_data_port3, 0x13, "3.814697266e-6"), - INTEL_UNCORE_FR_EVENT_DESC(inb_data_port4, 0x14, "3.814697266e-6"), - INTEL_UNCORE_FR_EVENT_DESC(inb_data_port5, 0x15, "3.814697266e-6"), - INTEL_UNCORE_FR_EVENT_DESC(inb_data_port6, 0x16, "3.814697266e-6"), - INTEL_UNCORE_FR_EVENT_DESC(inb_data_port7, 0x17, "3.814697266e-6"), + INTEL_UNCORE_FR_EVENT_DESC(inb_data_port0, 0x10, 3.814697266e-6), + INTEL_UNCORE_FR_EVENT_DESC(inb_data_port1, 0x11, 3.814697266e-6), + INTEL_UNCORE_FR_EVENT_DESC(inb_data_port2, 0x12, 3.814697266e-6), + INTEL_UNCORE_FR_EVENT_DESC(inb_data_port3, 0x13, 3.814697266e-6), + INTEL_UNCORE_FR_EVENT_DESC(inb_data_port4, 0x14, 3.814697266e-6), + INTEL_UNCORE_FR_EVENT_DESC(inb_data_port5, 0x15, 3.814697266e-6), + INTEL_UNCORE_FR_EVENT_DESC(inb_data_port6, 0x16, 3.814697266e-6), + INTEL_UNCORE_FR_EVENT_DESC(inb_data_port7, 0x17, 3.814697266e-6), /* ITC Free Running BW IN counters */ - INTEL_UNCORE_FR_EVENT_DESC(bw_in_port0, 0x20, "3.814697266e-6"), - INTEL_UNCORE_FR_EVENT_DESC(bw_in_port1, 0x21, "3.814697266e-6"), - INTEL_UNCORE_FR_EVENT_DESC(bw_in_port2, 0x22, "3.814697266e-6"), - INTEL_UNCORE_FR_EVENT_DESC(bw_in_port3, 0x23, "3.814697266e-6"), - INTEL_UNCORE_FR_EVENT_DESC(bw_in_port4, 0x24, "3.814697266e-6"), - INTEL_UNCORE_FR_EVENT_DESC(bw_in_port5, 0x25, "3.814697266e-6"), - INTEL_UNCORE_FR_EVENT_DESC(bw_in_port6, 0x26, "3.814697266e-6"), - INTEL_UNCORE_FR_EVENT_DESC(bw_in_port7, 0x27, "3.814697266e-6"), + INTEL_UNCORE_FR_EVENT_DESC(bw_in_port0, 0x20, 3.814697266e-6), + INTEL_UNCORE_FR_EVENT_DESC(bw_in_port1, 0x21, 3.814697266e-6), + INTEL_UNCORE_FR_EVENT_DESC(bw_in_port2, 0x22, 3.814697266e-6), + INTEL_UNCORE_FR_EVENT_DESC(bw_in_port3, 0x23, 3.814697266e-6), + INTEL_UNCORE_FR_EVENT_DESC(bw_in_port4, 0x24, 3.814697266e-6), + INTEL_UNCORE_FR_EVENT_DESC(bw_in_port5, 0x25, 3.814697266e-6), + INTEL_UNCORE_FR_EVENT_DESC(bw_in_port6, 0x26, 3.814697266e-6), + INTEL_UNCORE_FR_EVENT_DESC(bw_in_port7, 0x27, 3.814697266e-6), /* ITC Free Running BW OUT counters */ - INTEL_UNCORE_FR_EVENT_DESC(bw_out_port0, 0x30, "3.814697266e-6"), - INTEL_UNCORE_FR_EVENT_DESC(bw_out_port1, 0x31, "3.814697266e-6"), - INTEL_UNCORE_FR_EVENT_DESC(bw_out_port2, 0x32, "3.814697266e-6"), - INTEL_UNCORE_FR_EVENT_DESC(bw_out_port3, 0x33, "3.814697266e-6"), - INTEL_UNCORE_FR_EVENT_DESC(bw_out_port4, 0x34, "3.814697266e-6"), - INTEL_UNCORE_FR_EVENT_DESC(bw_out_port5, 0x35, "3.814697266e-6"), - INTEL_UNCORE_FR_EVENT_DESC(bw_out_port6, 0x36, "3.814697266e-6"), - INTEL_UNCORE_FR_EVENT_DESC(bw_out_port7, 0x37, "3.814697266e-6"), + INTEL_UNCORE_FR_EVENT_DESC(bw_out_port0, 0x30, 3.814697266e-6), + INTEL_UNCORE_FR_EVENT_DESC(bw_out_port1, 0x31, 3.814697266e-6), + INTEL_UNCORE_FR_EVENT_DESC(bw_out_port2, 0x32, 3.814697266e-6), + INTEL_UNCORE_FR_EVENT_DESC(bw_out_port3, 0x33, 3.814697266e-6), + INTEL_UNCORE_FR_EVENT_DESC(bw_out_port4, 0x34, 3.814697266e-6), + INTEL_UNCORE_FR_EVENT_DESC(bw_out_port5, 0x35, 3.814697266e-6), + INTEL_UNCORE_FR_EVENT_DESC(bw_out_port6, 0x36, 3.814697266e-6), + INTEL_UNCORE_FR_EVENT_DESC(bw_out_port7, 0x37, 3.814697266e-6), /* Free Running Clock Counter */ INTEL_UNCORE_EVENT_DESC(clockticks, "event=0xff,umask=0x40"), From 105c42566a550e2d05fc14f763216a8765ee5d0e Mon Sep 17 00:00:00 2001 From: Arthur Husband Date: Mon, 6 Apr 2026 15:23:35 -0700 Subject: [PATCH 145/248] ata: ahci: force 32-bit DMA for JMicron JMB582/JMB585 The JMicron JMB585 (and JMB582) SATA controllers advertise 64-bit DMA support via the S64A bit in the AHCI CAP register, but their 64-bit DMA implementation is defective. Under sustained I/O, DMA transfers targeting addresses above 4GB silently corrupt data -- writes land at incorrect memory addresses with no errors logged. The failure pattern is similar to the ASMedia ASM1061 (commit 20730e9b2778 ("ahci: add 43-bit DMA address quirk for ASMedia ASM1061 controllers")), which also falsely advertised full 64-bit DMA support. However, the JMB585 requires a stricter 32-bit DMA mask rather than 43-bit, as corruption occurs with any address above 4GB. On the Minisforum N5 Pro specifically, the combination of the JMB585's broken 64-bit DMA with the AMD Family 1Ah (Strix Point) IOMMU causes silent data corruption that is only detectable via checksumming filesystems (BTRFS/ZFS scrub). The corruption occurs when 32-bit IOVA space is exhausted and the kernel transparently switches to 64-bit DMA addresses. Add device-specific PCI ID entries for the JMB582 (0x0582) and JMB585 (0x0585) before the generic JMicron class match, using a new board type that combines AHCI_HFLAG_IGN_IRQ_IF_ERR (preserving existing behavior) with AHCI_HFLAG_32BIT_ONLY to force 32-bit DMA masks. Signed-off-by: Arthur Husband Reviewed-by: Damien Le Moal Signed-off-by: Niklas Cassel --- drivers/ata/ahci.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/drivers/ata/ahci.c b/drivers/ata/ahci.c index 931d0081169b..1d73a53370cf 100644 --- a/drivers/ata/ahci.c +++ b/drivers/ata/ahci.c @@ -68,6 +68,7 @@ enum board_ids { /* board IDs for specific chipsets in alphabetical order */ board_ahci_al, board_ahci_avn, + board_ahci_jmb585, board_ahci_mcp65, board_ahci_mcp77, board_ahci_mcp89, @@ -212,6 +213,15 @@ static const struct ata_port_info ahci_port_info[] = { .udma_mask = ATA_UDMA6, .port_ops = &ahci_avn_ops, }, + /* JMicron JMB582/585: 64-bit DMA is broken, force 32-bit */ + [board_ahci_jmb585] = { + AHCI_HFLAGS (AHCI_HFLAG_IGN_IRQ_IF_ERR | + AHCI_HFLAG_32BIT_ONLY), + .flags = AHCI_FLAG_COMMON, + .pio_mask = ATA_PIO4, + .udma_mask = ATA_UDMA6, + .port_ops = &ahci_ops, + }, [board_ahci_mcp65] = { AHCI_HFLAGS (AHCI_HFLAG_NO_FPDMA_AA | AHCI_HFLAG_NO_PMP | AHCI_HFLAG_YES_NCQ), @@ -439,6 +449,10 @@ static const struct pci_device_id ahci_pci_tbl[] = { /* Elkhart Lake IDs 0x4b60 & 0x4b62 https://sata-io.org/product/8803 not tested yet */ { PCI_VDEVICE(INTEL, 0x4b63), board_ahci_pcs_quirk }, /* Elkhart Lake AHCI */ + /* JMicron JMB582/585: force 32-bit DMA (broken 64-bit implementation) */ + { PCI_VDEVICE(JMICRON, 0x0582), board_ahci_jmb585 }, + { PCI_VDEVICE(JMICRON, 0x0585), board_ahci_jmb585 }, + /* JMicron 360/1/3/5/6, match class to avoid IDE function */ { PCI_VENDOR_ID_JMICRON, PCI_ANY_ID, PCI_ANY_ID, PCI_ANY_ID, PCI_CLASS_STORAGE_SATA_AHCI, 0xffffff, board_ahci_ign_iferr }, From 75519f5df2a9b23f7bf305e12dc9a6e3e65c24b7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jouni=20H=C3=B6gander?= Date: Fri, 27 Mar 2026 13:45:53 +0200 Subject: [PATCH 146/248] drm/i915/psr: Do not use pipe_src as borders for SU area MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This far using crtc_state->pipe_src as borders for Selective Update area haven't caused visible problems as drm_rect_width(crtc_state->pipe_src) == crtc_state->hw.adjusted_mode.crtc_hdisplay and drm_rect_height(crtc_state->pipe_src) == crtc_state->hw.adjusted_mode.crtc_vdisplay when pipe scaling is not used. On the other hand using pipe scaling is forcing full frame updates and all the Selective Update area calculations are skipped. Now this improper usage of crtc_state->pipe_src is causing following warnings: <4> [7771.978166] xe 0000:00:02.0: [drm] drm_WARN_ON_ONCE(su_lines % vdsc_cfg->slice_height) after WARN_ON_ONCE was added by commit: "drm/i915/dsc: Add helper for writing DSC Selective Update ET parameters" These warnings are seen when DSC and pipe scaling are enabled simultaneously. This is because on full frame update SU area is improperly set as pipe_src which is not aligned with DSC slice height. Fix these by creating local rectangle using crtc_state->hw.adjusted_mode.crtc_hdisplay and crtc_state->hw.adjusted_mode.crtc_vdisplay. Use this local rectangle as borders for SU area. Fixes: d6774b8c3c58 ("drm/i915: Ensure damage clip area is within pipe area") Cc: # v6.0+ Signed-off-by: Jouni Högander Reviewed-by: Mika Kahola Link: https://patch.msgid.link/20260327114553.195285-1-jouni.hogander@intel.com (cherry picked from commit da0cdc1c329dd2ff09c41fbbe9fbd9c92c5d2c6e) Signed-off-by: Joonas Lahtinen --- drivers/gpu/drm/i915/display/intel_psr.c | 30 +++++++++++++++--------- 1 file changed, 19 insertions(+), 11 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_psr.c b/drivers/gpu/drm/i915/display/intel_psr.c index 3791944389db..097e18c1adb2 100644 --- a/drivers/gpu/drm/i915/display/intel_psr.c +++ b/drivers/gpu/drm/i915/display/intel_psr.c @@ -2678,9 +2678,9 @@ static u32 psr2_pipe_srcsz_early_tpt_calc(struct intel_crtc_state *crtc_state, static void clip_area_update(struct drm_rect *overlap_damage_area, struct drm_rect *damage_area, - struct drm_rect *pipe_src) + struct drm_rect *display_area) { - if (!drm_rect_intersect(damage_area, pipe_src)) + if (!drm_rect_intersect(damage_area, display_area)) return; if (overlap_damage_area->y1 == -1) { @@ -2731,6 +2731,7 @@ static bool intel_psr2_sel_fetch_pipe_alignment(struct intel_crtc_state *crtc_st static void intel_psr2_sel_fetch_et_alignment(struct intel_atomic_state *state, struct intel_crtc *crtc, + struct drm_rect *display_area, bool *cursor_in_su_area) { struct intel_crtc_state *crtc_state = intel_atomic_get_new_crtc_state(state, crtc); @@ -2758,7 +2759,7 @@ intel_psr2_sel_fetch_et_alignment(struct intel_atomic_state *state, continue; clip_area_update(&crtc_state->psr2_su_area, &new_plane_state->uapi.dst, - &crtc_state->pipe_src); + display_area); *cursor_in_su_area = true; } } @@ -2855,6 +2856,12 @@ int intel_psr2_sel_fetch_update(struct intel_atomic_state *state, struct intel_crtc_state *crtc_state = intel_atomic_get_new_crtc_state(state, crtc); struct intel_plane_state *new_plane_state, *old_plane_state; struct intel_plane *plane; + struct drm_rect display_area = { + .x1 = 0, + .y1 = 0, + .x2 = crtc_state->hw.adjusted_mode.crtc_hdisplay, + .y2 = crtc_state->hw.adjusted_mode.crtc_vdisplay, + }; bool full_update = false, su_area_changed; int i, ret; @@ -2868,7 +2875,7 @@ int intel_psr2_sel_fetch_update(struct intel_atomic_state *state, crtc_state->psr2_su_area.x1 = 0; crtc_state->psr2_su_area.y1 = -1; - crtc_state->psr2_su_area.x2 = drm_rect_width(&crtc_state->pipe_src); + crtc_state->psr2_su_area.x2 = drm_rect_width(&display_area); crtc_state->psr2_su_area.y2 = -1; /* @@ -2906,14 +2913,14 @@ int intel_psr2_sel_fetch_update(struct intel_atomic_state *state, damaged_area.y1 = old_plane_state->uapi.dst.y1; damaged_area.y2 = old_plane_state->uapi.dst.y2; clip_area_update(&crtc_state->psr2_su_area, &damaged_area, - &crtc_state->pipe_src); + &display_area); } if (new_plane_state->uapi.visible) { damaged_area.y1 = new_plane_state->uapi.dst.y1; damaged_area.y2 = new_plane_state->uapi.dst.y2; clip_area_update(&crtc_state->psr2_su_area, &damaged_area, - &crtc_state->pipe_src); + &display_area); } continue; } else if (new_plane_state->uapi.alpha != old_plane_state->uapi.alpha) { @@ -2921,7 +2928,7 @@ int intel_psr2_sel_fetch_update(struct intel_atomic_state *state, damaged_area.y1 = new_plane_state->uapi.dst.y1; damaged_area.y2 = new_plane_state->uapi.dst.y2; clip_area_update(&crtc_state->psr2_su_area, &damaged_area, - &crtc_state->pipe_src); + &display_area); continue; } @@ -2937,7 +2944,7 @@ int intel_psr2_sel_fetch_update(struct intel_atomic_state *state, damaged_area.x1 += new_plane_state->uapi.dst.x1 - src.x1; damaged_area.x2 += new_plane_state->uapi.dst.x1 - src.x1; - clip_area_update(&crtc_state->psr2_su_area, &damaged_area, &crtc_state->pipe_src); + clip_area_update(&crtc_state->psr2_su_area, &damaged_area, &display_area); } /* @@ -2972,7 +2979,8 @@ int intel_psr2_sel_fetch_update(struct intel_atomic_state *state, * cursor is added into affected planes even when * cursor is not updated by itself. */ - intel_psr2_sel_fetch_et_alignment(state, crtc, &cursor_in_su_area); + intel_psr2_sel_fetch_et_alignment(state, crtc, &display_area, + &cursor_in_su_area); su_area_changed = intel_psr2_sel_fetch_pipe_alignment(crtc_state); @@ -3048,8 +3056,8 @@ int intel_psr2_sel_fetch_update(struct intel_atomic_state *state, skip_sel_fetch_set_loop: if (full_update) - clip_area_update(&crtc_state->psr2_su_area, &crtc_state->pipe_src, - &crtc_state->pipe_src); + clip_area_update(&crtc_state->psr2_su_area, &display_area, + &display_area); psr2_man_trk_ctl_calc(crtc_state, full_update); crtc_state->pipe_srcsz_early_tpt = From 069daad4f2ae9c5c108131995529d5f02392c446 Mon Sep 17 00:00:00 2001 From: Steffen Klassert Date: Thu, 2 Apr 2026 13:31:04 +0200 Subject: [PATCH 147/248] xfrm: Wait for RCU readers during policy netns exit xfrm_policy_fini() frees the policy_bydst hash tables after flushing the policy work items and deleting all policies, but it does not wait for concurrent RCU readers to leave their read-side critical sections first. The policy_bydst tables are published via rcu_assign_pointer() and are looked up through rcu_dereference_check(), so netns teardown must also wait for an RCU grace period before freeing the table memory. Fix this by adding synchronize_rcu() before freeing the policy hash tables. Fixes: e1e551bc5630 ("xfrm: policy: prepare policy_bydst hash for rcu lookups") Signed-off-by: Steffen Klassert Reviewed-by: Florian Westphal --- net/xfrm/xfrm_policy.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 362939aa56cf..8f0188e763c7 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -4290,6 +4290,8 @@ static void xfrm_policy_fini(struct net *net) #endif xfrm_policy_flush(net, XFRM_POLICY_TYPE_MAIN, false); + synchronize_rcu(); + WARN_ON(!list_empty(&net->xfrm.policy_all)); for (dir = 0; dir < XFRM_POLICY_MAX; dir++) { From 1c428b03840094410c5fb6a5db30640486bbbfcb Mon Sep 17 00:00:00 2001 From: Qi Tang Date: Thu, 2 Apr 2026 19:44:01 +0800 Subject: [PATCH 148/248] xfrm: hold dev ref until after transport_finish NF_HOOK After async crypto completes, xfrm_input_resume() calls dev_put() immediately on re-entry before the skb reaches transport_finish. The skb->dev pointer is then used inside NF_HOOK and its okfn, which can race with device teardown. Remove the dev_put from the async resumption entry and instead drop the reference after the NF_HOOK call in transport_finish, using a saved device pointer since NF_HOOK may consume the skb. This covers NF_DROP, NF_QUEUE and NF_STOLEN paths that skip the okfn. For non-transport exits (decaps, gro, drop) and secondary async return points, release the reference inline when async is set. Suggested-by: Florian Westphal Fixes: acf568ee859f ("xfrm: Reinject transport-mode packets through tasklet") Cc: stable@vger.kernel.org Signed-off-by: Qi Tang Signed-off-by: Steffen Klassert --- net/ipv4/xfrm4_input.c | 5 ++++- net/ipv6/xfrm6_input.c | 5 ++++- net/xfrm/xfrm_input.c | 18 ++++++++++++++---- 3 files changed, 22 insertions(+), 6 deletions(-) diff --git a/net/ipv4/xfrm4_input.c b/net/ipv4/xfrm4_input.c index f28cfd88eaf5..c2eac844bcdb 100644 --- a/net/ipv4/xfrm4_input.c +++ b/net/ipv4/xfrm4_input.c @@ -50,6 +50,7 @@ int xfrm4_transport_finish(struct sk_buff *skb, int async) { struct xfrm_offload *xo = xfrm_offload(skb); struct iphdr *iph = ip_hdr(skb); + struct net_device *dev = skb->dev; iph->protocol = XFRM_MODE_SKB_CB(skb)->protocol; @@ -73,8 +74,10 @@ int xfrm4_transport_finish(struct sk_buff *skb, int async) } NF_HOOK(NFPROTO_IPV4, NF_INET_PRE_ROUTING, - dev_net(skb->dev), NULL, skb, skb->dev, NULL, + dev_net(dev), NULL, skb, dev, NULL, xfrm4_rcv_encap_finish); + if (async) + dev_put(dev); return 0; } diff --git a/net/ipv6/xfrm6_input.c b/net/ipv6/xfrm6_input.c index 9005fc156a20..699a001ac166 100644 --- a/net/ipv6/xfrm6_input.c +++ b/net/ipv6/xfrm6_input.c @@ -43,6 +43,7 @@ static int xfrm6_transport_finish2(struct net *net, struct sock *sk, int xfrm6_transport_finish(struct sk_buff *skb, int async) { struct xfrm_offload *xo = xfrm_offload(skb); + struct net_device *dev = skb->dev; int nhlen = -skb_network_offset(skb); skb_network_header(skb)[IP6CB(skb)->nhoff] = @@ -68,8 +69,10 @@ int xfrm6_transport_finish(struct sk_buff *skb, int async) } NF_HOOK(NFPROTO_IPV6, NF_INET_PRE_ROUTING, - dev_net(skb->dev), NULL, skb, skb->dev, NULL, + dev_net(dev), NULL, skb, dev, NULL, xfrm6_transport_finish2); + if (async) + dev_put(dev); return 0; } diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c index dc1312ed5a09..f65291eba1f6 100644 --- a/net/xfrm/xfrm_input.c +++ b/net/xfrm/xfrm_input.c @@ -506,7 +506,6 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type) /* An encap_type of -1 indicates async resumption. */ if (encap_type == -1) { async = 1; - dev_put(skb->dev); seq = XFRM_SKB_CB(skb)->seq.input.low; spin_lock(&x->lock); goto resume; @@ -659,8 +658,11 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type) dev_hold(skb->dev); nexthdr = x->type->input(x, skb); - if (nexthdr == -EINPROGRESS) + if (nexthdr == -EINPROGRESS) { + if (async) + dev_put(skb->dev); return 0; + } dev_put(skb->dev); spin_lock(&x->lock); @@ -695,9 +697,11 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type) XFRM_MODE_SKB_CB(skb)->protocol = nexthdr; err = xfrm_inner_mode_input(x, skb); - if (err == -EINPROGRESS) + if (err == -EINPROGRESS) { + if (async) + dev_put(skb->dev); return 0; - else if (err) { + } else if (err) { XFRM_INC_STATS(net, LINUX_MIB_XFRMINSTATEMODEERROR); goto drop; } @@ -734,6 +738,8 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type) sp->olen = 0; if (skb_valid_dst(skb)) skb_dst_drop(skb); + if (async) + dev_put(skb->dev); gro_cells_receive(&gro_cells, skb); return 0; } else { @@ -753,6 +759,8 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type) sp->olen = 0; if (skb_valid_dst(skb)) skb_dst_drop(skb); + if (async) + dev_put(skb->dev); gro_cells_receive(&gro_cells, skb); return err; } @@ -763,6 +771,8 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type) drop_unlock: spin_unlock(&x->lock); drop: + if (async) + dev_put(skb->dev); xfrm_rcv_cb(skb, family, x && x->type ? x->type->proto : nexthdr, -1); kfree_skb(skb); return 0; From 83317cce60a032c49480dcdabe146435bd689d03 Mon Sep 17 00:00:00 2001 From: Kotlyarov Mihail Date: Sat, 4 Apr 2026 12:05:20 +0300 Subject: [PATCH 149/248] xfrm: fix refcount leak in xfrm_migrate_policy_find syzkaller reported a memory leak in xfrm_policy_alloc: BUG: memory leak unreferenced object 0xffff888114d79000 (size 1024): comm "syz.1.17", pid 931 ... xfrm_policy_alloc+0xb3/0x4b0 net/xfrm/xfrm_policy.c:432 The root cause is a double call to xfrm_pol_hold_rcu() in xfrm_migrate_policy_find(). The lookup function already returns a policy with held reference, making the second call redundant. Remove the redundant xfrm_pol_hold_rcu() call to fix the refcount imbalance and prevent the memory leak. Found by Linux Verification Center (linuxtesting.org) with Syzkaller. Fixes: 563d5ca93e88 ("xfrm: switch migrate to xfrm_policy_lookup_bytype") Signed-off-by: Kotlyarov Mihail Reviewed-by: Florian Westphal Signed-off-by: Steffen Klassert --- net/xfrm/xfrm_policy.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 8f0188e763c7..a872af5610dc 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -4528,9 +4528,6 @@ static struct xfrm_policy *xfrm_migrate_policy_find(const struct xfrm_selector * pol = xfrm_policy_lookup_bytype(net, type, &fl, sel->family, dir, if_id); if (IS_ERR_OR_NULL(pol)) goto out_unlock; - - if (!xfrm_pol_hold_rcu(pol)) - pol = NULL; out_unlock: rcu_read_unlock(); return pol; From 1beb76b2053b68c491b78370794b8ff63c8f8c02 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Mon, 6 Apr 2026 17:33:03 +0200 Subject: [PATCH 150/248] xfrm_user: fix info leak in build_mapping() struct xfrm_usersa_id has a one-byte padding hole after the proto field, which ends up never getting set to zero before copying out to userspace. Fix that up by zeroing out the whole structure before setting individual variables. Fixes: 3a2dfbe8acb1 ("xfrm: Notify changes in UDP encapsulation via netlink") Cc: Steffen Klassert Cc: Herbert Xu Cc: "David S. Miller" Cc: Eric Dumazet Cc: Jakub Kicinski Cc: Paolo Abeni Cc: Simon Horman Assisted-by: gregkh_clanker_t1000 Signed-off-by: Greg Kroah-Hartman Signed-off-by: Steffen Klassert --- net/xfrm/xfrm_user.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index a779590c985a..baa43c325da2 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -4172,6 +4172,7 @@ static int build_mapping(struct sk_buff *skb, struct xfrm_state *x, um = nlmsg_data(nlh); + memset(&um->id, 0, sizeof(um->id)); memcpy(&um->id.daddr, &x->id.daddr, sizeof(um->id.daddr)); um->id.spi = x->id.spi; um->id.family = x->props.family; From d10119968d0e1f2b669604baf2a8b5fdb72fa6b4 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Mon, 6 Apr 2026 17:34:22 +0200 Subject: [PATCH 151/248] xfrm_user: fix info leak in build_report() struct xfrm_user_report is a __u8 proto field followed by a struct xfrm_selector which means there is three "empty" bytes of padding, but the padding is never zeroed before copying to userspace. Fix that up by zeroing the structure before setting individual member variables. Cc: stable Cc: Steffen Klassert Cc: Herbert Xu Cc: "David S. Miller" Cc: Eric Dumazet Cc: Jakub Kicinski Cc: Paolo Abeni Cc: Simon Horman Assisted-by: gregkh_clanker_t1000 Signed-off-by: Greg Kroah-Hartman Signed-off-by: Steffen Klassert --- net/xfrm/xfrm_user.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index baa43c325da2..d56450f61669 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -4125,6 +4125,7 @@ static int build_report(struct sk_buff *skb, u8 proto, return -EMSGSIZE; ur = nlmsg_data(nlh); + memset(ur, 0, sizeof(*ur)); ur->proto = proto; memcpy(&ur->sel, sel, sizeof(ur->sel)); From 426c355742f02cf743b347d9d7dbdc1bfbfa31ef Mon Sep 17 00:00:00 2001 From: Zhengchuan Liang Date: Sun, 22 Mar 2026 11:46:08 -0700 Subject: [PATCH 152/248] net: af_key: zero aligned sockaddr tail in PF_KEY exports PF_KEY export paths use `pfkey_sockaddr_size()` when reserving sockaddr payload space, so IPv6 addresses occupy 32 bytes on the wire. However, `pfkey_sockaddr_fill()` initializes only the first 28 bytes of `struct sockaddr_in6`, leaving the final 4 aligned bytes uninitialized. Not every PF_KEY message is affected. The state and policy dump builders already zero the whole message buffer before filling the sockaddr payloads. Keep the fix to the export paths that still append aligned sockaddr payloads with plain `skb_put()`: - `SADB_ACQUIRE` - `SADB_X_NAT_T_NEW_MAPPING` - `SADB_X_MIGRATE` Fix those paths by clearing only the aligned sockaddr tail after `pfkey_sockaddr_fill()`. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Fixes: 08de61beab8a ("[PFKEYV2]: Extension for dynamic update of endpoint address(es)") Reported-by: Yifan Wu Reported-by: Juefei Pu Co-developed-by: Yuan Tan Signed-off-by: Yuan Tan Suggested-by: Xin Liu Tested-by: Xiao Liu Signed-off-by: Zhengchuan Liang Signed-off-by: Steffen Klassert --- net/key/af_key.c | 52 +++++++++++++++++++++++++++++++----------------- 1 file changed, 34 insertions(+), 18 deletions(-) diff --git a/net/key/af_key.c b/net/key/af_key.c index 72ac2ace419d..5d480ae39405 100644 --- a/net/key/af_key.c +++ b/net/key/af_key.c @@ -757,6 +757,22 @@ static unsigned int pfkey_sockaddr_fill(const xfrm_address_t *xaddr, __be16 port return 0; } +static unsigned int pfkey_sockaddr_fill_zero_tail(const xfrm_address_t *xaddr, + __be16 port, + struct sockaddr *sa, + unsigned short family) +{ + unsigned int prefixlen; + int sockaddr_len = pfkey_sockaddr_len(family); + int sockaddr_size = pfkey_sockaddr_size(family); + + prefixlen = pfkey_sockaddr_fill(xaddr, port, sa, family); + if (sockaddr_size > sockaddr_len) + memset((u8 *)sa + sockaddr_len, 0, sockaddr_size - sockaddr_len); + + return prefixlen; +} + static struct sk_buff *__pfkey_xfrm_state2msg(const struct xfrm_state *x, int add_keys, int hsc) { @@ -3206,9 +3222,9 @@ static int pfkey_send_acquire(struct xfrm_state *x, struct xfrm_tmpl *t, struct addr->sadb_address_proto = 0; addr->sadb_address_reserved = 0; addr->sadb_address_prefixlen = - pfkey_sockaddr_fill(&x->props.saddr, 0, - (struct sockaddr *) (addr + 1), - x->props.family); + pfkey_sockaddr_fill_zero_tail(&x->props.saddr, 0, + (struct sockaddr *)(addr + 1), + x->props.family); if (!addr->sadb_address_prefixlen) BUG(); @@ -3221,9 +3237,9 @@ static int pfkey_send_acquire(struct xfrm_state *x, struct xfrm_tmpl *t, struct addr->sadb_address_proto = 0; addr->sadb_address_reserved = 0; addr->sadb_address_prefixlen = - pfkey_sockaddr_fill(&x->id.daddr, 0, - (struct sockaddr *) (addr + 1), - x->props.family); + pfkey_sockaddr_fill_zero_tail(&x->id.daddr, 0, + (struct sockaddr *)(addr + 1), + x->props.family); if (!addr->sadb_address_prefixlen) BUG(); @@ -3421,9 +3437,9 @@ static int pfkey_send_new_mapping(struct xfrm_state *x, xfrm_address_t *ipaddr, addr->sadb_address_proto = 0; addr->sadb_address_reserved = 0; addr->sadb_address_prefixlen = - pfkey_sockaddr_fill(&x->props.saddr, 0, - (struct sockaddr *) (addr + 1), - x->props.family); + pfkey_sockaddr_fill_zero_tail(&x->props.saddr, 0, + (struct sockaddr *)(addr + 1), + x->props.family); if (!addr->sadb_address_prefixlen) BUG(); @@ -3443,9 +3459,9 @@ static int pfkey_send_new_mapping(struct xfrm_state *x, xfrm_address_t *ipaddr, addr->sadb_address_proto = 0; addr->sadb_address_reserved = 0; addr->sadb_address_prefixlen = - pfkey_sockaddr_fill(ipaddr, 0, - (struct sockaddr *) (addr + 1), - x->props.family); + pfkey_sockaddr_fill_zero_tail(ipaddr, 0, + (struct sockaddr *)(addr + 1), + x->props.family); if (!addr->sadb_address_prefixlen) BUG(); @@ -3474,15 +3490,15 @@ static int set_sadb_address(struct sk_buff *skb, int sasize, int type, switch (type) { case SADB_EXT_ADDRESS_SRC: addr->sadb_address_prefixlen = sel->prefixlen_s; - pfkey_sockaddr_fill(&sel->saddr, 0, - (struct sockaddr *)(addr + 1), - sel->family); + pfkey_sockaddr_fill_zero_tail(&sel->saddr, 0, + (struct sockaddr *)(addr + 1), + sel->family); break; case SADB_EXT_ADDRESS_DST: addr->sadb_address_prefixlen = sel->prefixlen_d; - pfkey_sockaddr_fill(&sel->daddr, 0, - (struct sockaddr *)(addr + 1), - sel->family); + pfkey_sockaddr_fill_zero_tail(&sel->daddr, 0, + (struct sockaddr *)(addr + 1), + sel->family); break; default: return -EINVAL; From db5b8cecbdf479ad13156af750377e5b43853fab Mon Sep 17 00:00:00 2001 From: Francesco Lavra Date: Mon, 30 Mar 2026 18:19:14 +0200 Subject: [PATCH 153/248] pinctrl: mcp23s08: Disable all pin interrupts during probe A chip being probed may have the interrupt-on-change feature enabled on some of its pins, for example after a reboot. This can cause the chip to generate interrupts for pins that don't have a registered nested handler, which leads to a kernel crash such as below: [ 7.928897] Unable to handle kernel read from unreadable memory at virtual address 00000000000000ac [ 7.932314] Mem abort info: [ 7.935081] ESR = 0x0000000096000004 [ 7.938808] EC = 0x25: DABT (current EL), IL = 32 bits [ 7.944094] SET = 0, FnV = 0 [ 7.947127] EA = 0, S1PTW = 0 [ 7.950247] FSC = 0x04: level 0 translation fault [ 7.955101] Data abort info: [ 7.957961] ISV = 0, ISS = 0x00000004, ISS2 = 0x00000000 [ 7.963421] CM = 0, WnR = 0, TnD = 0, TagAccess = 0 [ 7.968447] GCS = 0, Overlay = 0, DirtyBit = 0, Xs = 0 [ 7.973734] user pgtable: 4k pages, 48-bit VAs, pgdp=00000000089b7000 [ 7.980148] [00000000000000ac] pgd=0000000000000000, p4d=0000000000000000 [ 7.986913] Internal error: Oops: 0000000096000004 [#1] SMP [ 7.992545] Modules linked in: [ 8.073678] CPU: 0 UID: 0 PID: 81 Comm: irq/18-4-0025 Not tainted 7.0.0-rc6-gd2b5a1f931c8-dirty #199 [ 8.073689] Hardware name: Khadas VIM3 (DT) [ 8.073692] pstate: 604000c5 (nZCv daIF +PAN -UAO -TCO -DIT -SSBS BTYPE=--) [ 8.094639] pc : _raw_spin_lock_irq+0x40/0x80 [ 8.098970] lr : handle_nested_irq+0x2c/0x168 [ 8.098979] sp : ffff800082b2bd20 [ 8.106599] x29: ffff800082b2bd20 x28: ffff800080107920 x27: ffff800080104d88 [ 8.106611] x26: ffff000003298080 x25: 0000000000000001 x24: 000000000000ff00 [ 8.113707] x23: 0000000000000001 x22: 0000000000000000 x21: 000000000000000e [ 8.120850] x20: 0000000000000000 x19: 00000000000000ac x18: 0000000000000000 [ 8.135046] x17: 0000000000000000 x16: 0000000000000000 x15: 0000000000000000 [ 8.135062] x14: ffff800081567ea8 x13: ffffffffffffffff x12: 0000000000000000 [ 8.135070] x11: 00000000000000c0 x10: 0000000000000b60 x9 : ffff800080109e0c [ 8.135078] x8 : 1fffe0000069dbc1 x7 : 0000000000000001 x6 : ffff0000034ede00 [ 8.135086] x5 : 0000000000000000 x4 : ffff0000034ede08 x3 : 0000000000000001 [ 8.163460] x2 : 0000000000000000 x1 : 0000000000000001 x0 : 00000000000000ac [ 8.170560] Call trace: [ 8.180094] _raw_spin_lock_irq+0x40/0x80 (P) [ 8.184443] mcp23s08_irq+0x248/0x358 [ 8.184462] irq_thread_fn+0x34/0xb8 [ 8.184470] irq_thread+0x1a4/0x310 [ 8.195093] kthread+0x13c/0x150 [ 8.198309] ret_from_fork+0x10/0x20 [ 8.201850] Code: d65f03c0 d2800002 52800023 f9800011 (885ffc01) [ 8.207931] ---[ end trace 0000000000000000 ]--- This issue has always been present, but has been latent until commit "f9f4fda15e72" ("pinctrl: mcp23s08: init reg_defaults from HW at probe and switch cache type"), which correctly removed reg_defaults from the regmap and as a side effect changed the behavior of the interrupt handler so that the real value of the MCP_GPINTEN register is now being read from the chip instead of using a bogus 0 default value; a non-zero value for this register can trigger the invocation of a nested handler which may not exist (yet). Fix this issue by disabling all pin interrupts during initialization. Fixes: f9f4fda15e72 ("pinctrl: mcp23s08: init reg_defaults from HW at probe and switch cache type") Signed-off-by: Francesco Lavra Signed-off-by: Linus Walleij --- drivers/pinctrl/pinctrl-mcp23s08.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/pinctrl/pinctrl-mcp23s08.c b/drivers/pinctrl/pinctrl-mcp23s08.c index 586f2f67c617..b89b3169e8be 100644 --- a/drivers/pinctrl/pinctrl-mcp23s08.c +++ b/drivers/pinctrl/pinctrl-mcp23s08.c @@ -664,6 +664,15 @@ int mcp23s08_probe_one(struct mcp23s08 *mcp, struct device *dev, if (mcp->irq && mcp->irq_controller) { struct gpio_irq_chip *girq = &mcp->chip.irq; + /* + * Disable all pin interrupts, to prevent the interrupt handler from + * calling nested handlers for any currently-enabled interrupts that + * do not (yet) have an actual handler. + */ + ret = mcp_write(mcp, MCP_GPINTEN, 0); + if (ret < 0) + return dev_err_probe(dev, ret, "can't disable interrupts\n"); + gpio_irq_chip_set_chip(girq, &mcp23s08_irq_chip); /* This will let us handle the parent IRQ in the driver */ girq->parent_handler = NULL; From 12cd7632757a54ce586e36040210b1a738a0fc53 Mon Sep 17 00:00:00 2001 From: Thomas Fourier Date: Wed, 18 Feb 2026 14:07:37 +0100 Subject: [PATCH 154/248] wifi: brcmsmac: Fix dma_free_coherent() size dma_alloc_consistent() may change the size to align it. The new size is saved in alloced. Change the free size to match the allocation size. Fixes: 5b435de0d786 ("net: wireless: add brcm80211 drivers") Cc: Signed-off-by: Thomas Fourier Acked-by: Arend van Spriel Link: https://patch.msgid.link/20260218130741.46566-3-fourier.thomas@gmail.com Signed-off-by: Johannes Berg --- drivers/net/wireless/broadcom/brcm80211/brcmsmac/dma.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmsmac/dma.c b/drivers/net/wireless/broadcom/brcm80211/brcmsmac/dma.c index 9f6ef7ce1b58..a329c20e92fb 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmsmac/dma.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmsmac/dma.c @@ -483,7 +483,7 @@ static void *dma_ringalloc(struct dma_info *di, u32 boundary, uint size, if (((desc_strtaddr + size - 1) & boundary) != (desc_strtaddr & boundary)) { *alignbits = dma_align_sizetobits(size); - dma_free_coherent(di->dmadev, size, va, *descpa); + dma_free_coherent(di->dmadev, *alloced, va, *descpa); va = dma_alloc_consistent(di, size, *alignbits, alloced, descpa); } From 304950a467d83678bd0b0f46331882e2ac23b12d Mon Sep 17 00:00:00 2001 From: Pengpeng Hou Date: Mon, 23 Mar 2026 15:45:51 +0800 Subject: [PATCH 155/248] wifi: brcmfmac: validate bsscfg indices in IF events brcmf_fweh_handle_if_event() validates the firmware-provided interface index before it touches drvr->iflist[], but it still uses the raw bsscfgidx field as an array index without a matching range check. Reject IF events whose bsscfg index does not fit in drvr->iflist[] before indexing the interface array. Signed-off-by: Pengpeng Hou Acked-by: Arend van Spriel Link: https://patch.msgid.link/20260323074551.93530-1-pengpeng@iscas.ac.cn [add missing wifi prefix] Signed-off-by: Johannes Berg --- drivers/net/wireless/broadcom/brcm80211/brcmfmac/fweh.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/fweh.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/fweh.c index 984886481f4e..1cff4ba76943 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/fweh.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/fweh.c @@ -153,6 +153,11 @@ static void brcmf_fweh_handle_if_event(struct brcmf_pub *drvr, bphy_err(drvr, "invalid interface index: %u\n", ifevent->ifidx); return; } + if (ifevent->bsscfgidx >= BRCMF_MAX_IFS) { + bphy_err(drvr, "invalid bsscfg index: %u\n", + ifevent->bsscfgidx); + return; + } ifp = drvr->iflist[ifevent->bsscfgidx]; From 25369b22223d1c56e42a0cd4ac9137349d5a898e Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Fri, 27 Mar 2026 12:32:19 +0100 Subject: [PATCH 156/248] wifi: rt2x00usb: fix devres lifetime USB drivers bind to USB interfaces and any device managed resources should have their lifetime tied to the interface rather than parent USB device. This avoids issues like memory leaks when drivers are unbound without their devices being physically disconnected (e.g. on probe deferral or configuration changes). Fix the USB anchor lifetime so that it is released on driver unbind. Fixes: 8b4c0009313f ("rt2x00usb: Use usb anchor to manage URB") Cc: stable@vger.kernel.org # 4.7 Cc: Vishal Thanki Signed-off-by: Johan Hovold Acked-by: Stanislaw Gruszka Reviewed-by: Greg Kroah-Hartman Link: https://patch.msgid.link/20260327113219.1313748-1-johan@kernel.org Signed-off-by: Johannes Berg --- drivers/net/wireless/ralink/rt2x00/rt2x00usb.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/ralink/rt2x00/rt2x00usb.c b/drivers/net/wireless/ralink/rt2x00/rt2x00usb.c index 54599cad78f9..aa5ecade2a40 100644 --- a/drivers/net/wireless/ralink/rt2x00/rt2x00usb.c +++ b/drivers/net/wireless/ralink/rt2x00/rt2x00usb.c @@ -828,7 +828,7 @@ int rt2x00usb_probe(struct usb_interface *usb_intf, if (retval) goto exit_free_device; - rt2x00dev->anchor = devm_kmalloc(&usb_dev->dev, + rt2x00dev->anchor = devm_kmalloc(&usb_intf->dev, sizeof(struct usb_anchor), GFP_KERNEL); if (!rt2x00dev->anchor) { From ea245d78dec594372e27d8c79616baf49e98a4a1 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Mon, 30 Mar 2026 11:14:13 +0200 Subject: [PATCH 157/248] net: rfkill: prevent unlimited numbers of rfkill events from being created Userspace can create an unlimited number of rfkill events if the system is so configured, while not consuming them from the rfkill file descriptor, causing a potential out of memory situation. Prevent this from bounding the number of pending rfkill events at a "large" number (i.e. 1000) to prevent abuses like this. Cc: Johannes Berg Reported-by: Yuan Tan Reported-by: Yifan Wu Reported-by: Juefei Pu Reported-by: Xin Liu Cc: stable Signed-off-by: Greg Kroah-Hartman Link: https://patch.msgid.link/2026033013-disfigure-scroll-e25e@gregkh Signed-off-by: Johannes Berg --- net/rfkill/core.c | 35 ++++++++++++++++++++++++----------- 1 file changed, 24 insertions(+), 11 deletions(-) diff --git a/net/rfkill/core.c b/net/rfkill/core.c index 2444237bc36a..4827e1fb8804 100644 --- a/net/rfkill/core.c +++ b/net/rfkill/core.c @@ -73,11 +73,14 @@ struct rfkill_int_event { struct rfkill_event_ext ev; }; +/* Max rfkill events that can be "in-flight" for one data source */ +#define MAX_RFKILL_EVENT 1000 struct rfkill_data { struct list_head list; struct list_head events; struct mutex mtx; wait_queue_head_t read_wait; + u32 event_count; bool input_handler; u8 max_size; }; @@ -255,10 +258,12 @@ static void rfkill_global_led_trigger_unregister(void) } #endif /* CONFIG_RFKILL_LEDS */ -static void rfkill_fill_event(struct rfkill_event_ext *ev, - struct rfkill *rfkill, - enum rfkill_operation op) +static int rfkill_fill_event(struct rfkill_int_event *int_ev, + struct rfkill *rfkill, + struct rfkill_data *data, + enum rfkill_operation op) { + struct rfkill_event_ext *ev = &int_ev->ev; unsigned long flags; ev->idx = rfkill->idx; @@ -271,6 +276,15 @@ static void rfkill_fill_event(struct rfkill_event_ext *ev, RFKILL_BLOCK_SW_PREV)); ev->hard_block_reasons = rfkill->hard_block_reasons; spin_unlock_irqrestore(&rfkill->lock, flags); + + scoped_guard(mutex, &data->mtx) { + if (data->event_count++ > MAX_RFKILL_EVENT) { + data->event_count--; + return -ENOSPC; + } + list_add_tail(&int_ev->list, &data->events); + } + return 0; } static void rfkill_send_events(struct rfkill *rfkill, enum rfkill_operation op) @@ -282,10 +296,10 @@ static void rfkill_send_events(struct rfkill *rfkill, enum rfkill_operation op) ev = kzalloc_obj(*ev); if (!ev) continue; - rfkill_fill_event(&ev->ev, rfkill, op); - mutex_lock(&data->mtx); - list_add_tail(&ev->list, &data->events); - mutex_unlock(&data->mtx); + if (rfkill_fill_event(ev, rfkill, data, op)) { + kfree(ev); + continue; + } wake_up_interruptible(&data->read_wait); } } @@ -1186,10 +1200,8 @@ static int rfkill_fop_open(struct inode *inode, struct file *file) if (!ev) goto free; rfkill_sync(rfkill); - rfkill_fill_event(&ev->ev, rfkill, RFKILL_OP_ADD); - mutex_lock(&data->mtx); - list_add_tail(&ev->list, &data->events); - mutex_unlock(&data->mtx); + if (rfkill_fill_event(ev, rfkill, data, RFKILL_OP_ADD)) + kfree(ev); } list_add(&data->list, &rfkill_fds); mutex_unlock(&rfkill_global_mutex); @@ -1259,6 +1271,7 @@ static ssize_t rfkill_fop_read(struct file *file, char __user *buf, ret = -EFAULT; list_del(&ev->list); + data->event_count--; kfree(ev); out: mutex_unlock(&data->mtx); From 5a1140404cbf7ba40137dfb1fb96893aa9a67d68 Mon Sep 17 00:00:00 2001 From: Nathan Rebello Date: Tue, 7 Apr 2026 02:39:58 -0400 Subject: [PATCH 158/248] usb: typec: ucsi: skip connector validation before init Notifications can arrive before ucsi_init() has populated ucsi->cap.num_connectors via GET_CAPABILITY. At that point num_connectors is still 0, causing all valid connector numbers to be incorrectly rejected as bogus. Skip the bounds check when num_connectors is 0 (not yet initialized). Pre-init notifications are already handled safely by the early-event guard in ucsi_connector_change(). Reported-by: Takashi Iwai Fixes: d2d8c17ac01a ("usb: typec: ucsi: validate connector number in ucsi_notify_common()") Cc: stable@vger.kernel.org Signed-off-by: Nathan Rebello Tested-by: Takashi Iwai Link: https://patch.msgid.link/20260407063958.863-1-nathan.c.rebello@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/typec/ucsi/ucsi.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/usb/typec/ucsi/ucsi.c b/drivers/usb/typec/ucsi/ucsi.c index 8333bdaf5566..46262ee0d192 100644 --- a/drivers/usb/typec/ucsi/ucsi.c +++ b/drivers/usb/typec/ucsi/ucsi.c @@ -44,7 +44,8 @@ void ucsi_notify_common(struct ucsi *ucsi, u32 cci) return; if (UCSI_CCI_CONNECTOR(cci)) { - if (UCSI_CCI_CONNECTOR(cci) <= ucsi->cap.num_connectors) + if (!ucsi->cap.num_connectors || + UCSI_CCI_CONNECTOR(cci) <= ucsi->cap.num_connectors) ucsi_connector_change(ucsi, UCSI_CCI_CONNECTOR(cci)); else dev_err(ucsi->dev, "bogus connector number in CCI: %lu\n", From 1f8fd9490e3184e9a2394df2e682901a1d57ce71 Mon Sep 17 00:00:00 2001 From: Pei Xiao Date: Tue, 7 Apr 2026 17:55:08 +0800 Subject: [PATCH 159/248] spi: zynq-qspi: Simplify clock handling with devm_clk_get_enabled() Replace devm_clk_get() followed by clk_prepare_enable() with devm_clk_get_enabled() for both "pclk" and "ref_clk". This removes the need for explicit clock enable and disable calls, as the managed API automatically disables the clocks on device removal or probe failure. Remove the now-unnecessary clk_disable_unprepare() calls from the probe error paths and the remove callback. Simplify error handling by jumping directly to the remove_ctlr label. Signed-off-by: Pei Xiao Acked-by: Michal Simek Link: https://patch.msgid.link/24043625f89376da36feca2408f990a85be7ab36.1775555500.git.xiaopei01@kylinos.cn Signed-off-by: Mark Brown --- drivers/spi/spi-zynq-qspi.c | 42 ++++++------------------------------- 1 file changed, 6 insertions(+), 36 deletions(-) diff --git a/drivers/spi/spi-zynq-qspi.c b/drivers/spi/spi-zynq-qspi.c index 5232483c4a3a..af252500195c 100644 --- a/drivers/spi/spi-zynq-qspi.c +++ b/drivers/spi/spi-zynq-qspi.c @@ -381,21 +381,10 @@ static int zynq_qspi_setup_op(struct spi_device *spi) { struct spi_controller *ctlr = spi->controller; struct zynq_qspi *qspi = spi_controller_get_devdata(ctlr); - int ret; if (ctlr->busy) return -EBUSY; - ret = clk_enable(qspi->refclk); - if (ret) - return ret; - - ret = clk_enable(qspi->pclk); - if (ret) { - clk_disable(qspi->refclk); - return ret; - } - zynq_qspi_write(qspi, ZYNQ_QSPI_ENABLE_OFFSET, ZYNQ_QSPI_ENABLE_ENABLE_MASK); @@ -661,7 +650,7 @@ static int zynq_qspi_probe(struct platform_device *pdev) goto remove_ctlr; } - xqspi->pclk = devm_clk_get(&pdev->dev, "pclk"); + xqspi->pclk = devm_clk_get_enabled(&pdev->dev, "pclk"); if (IS_ERR(xqspi->pclk)) { dev_err(&pdev->dev, "pclk clock not found.\n"); ret = PTR_ERR(xqspi->pclk); @@ -670,36 +659,24 @@ static int zynq_qspi_probe(struct platform_device *pdev) init_completion(&xqspi->data_completion); - xqspi->refclk = devm_clk_get(&pdev->dev, "ref_clk"); + xqspi->refclk = devm_clk_get_enabled(&pdev->dev, "ref_clk"); if (IS_ERR(xqspi->refclk)) { dev_err(&pdev->dev, "ref_clk clock not found.\n"); ret = PTR_ERR(xqspi->refclk); goto remove_ctlr; } - ret = clk_prepare_enable(xqspi->pclk); - if (ret) { - dev_err(&pdev->dev, "Unable to enable APB clock.\n"); - goto remove_ctlr; - } - - ret = clk_prepare_enable(xqspi->refclk); - if (ret) { - dev_err(&pdev->dev, "Unable to enable device clock.\n"); - goto clk_dis_pclk; - } - xqspi->irq = platform_get_irq(pdev, 0); if (xqspi->irq < 0) { ret = xqspi->irq; - goto clk_dis_all; + goto remove_ctlr; } ret = devm_request_irq(&pdev->dev, xqspi->irq, zynq_qspi_irq, 0, pdev->name, xqspi); if (ret != 0) { ret = -ENXIO; dev_err(&pdev->dev, "request_irq failed\n"); - goto clk_dis_all; + goto remove_ctlr; } ret = of_property_read_u32(np, "num-cs", @@ -709,7 +686,7 @@ static int zynq_qspi_probe(struct platform_device *pdev) } else if (num_cs > ZYNQ_QSPI_MAX_NUM_CS) { ret = -EINVAL; dev_err(&pdev->dev, "only 2 chip selects are available\n"); - goto clk_dis_all; + goto remove_ctlr; } else { ctlr->num_chipselect = num_cs; } @@ -728,15 +705,11 @@ static int zynq_qspi_probe(struct platform_device *pdev) ret = devm_spi_register_controller(&pdev->dev, ctlr); if (ret) { dev_err(&pdev->dev, "devm_spi_register_controller failed\n"); - goto clk_dis_all; + goto remove_ctlr; } return ret; -clk_dis_all: - clk_disable_unprepare(xqspi->refclk); -clk_dis_pclk: - clk_disable_unprepare(xqspi->pclk); remove_ctlr: spi_controller_put(ctlr); @@ -758,9 +731,6 @@ static void zynq_qspi_remove(struct platform_device *pdev) struct zynq_qspi *xqspi = platform_get_drvdata(pdev); zynq_qspi_write(xqspi, ZYNQ_QSPI_ENABLE_OFFSET, 0); - - clk_disable_unprepare(xqspi->refclk); - clk_disable_unprepare(xqspi->pclk); } static const struct of_device_id zynq_qspi_of_match[] = { From c2812c0cb909211a1d2e7cec862406e32833b9de Mon Sep 17 00:00:00 2001 From: Ulf Hansson Date: Tue, 7 Apr 2026 14:14:53 +0200 Subject: [PATCH 160/248] MAINTAINERS, mailmap: Change Ulf Hansson's email Change my email in MAINTAINERS and add a few entries in mailmap to start using ulfh@kernel.org. Signed-off-by: Ulf Hansson --- .mailmap | 2 ++ MAINTAINERS | 14 +++++++------- 2 files changed, 9 insertions(+), 7 deletions(-) diff --git a/.mailmap b/.mailmap index 2d04aeba68b4..22c5ab1c5d55 100644 --- a/.mailmap +++ b/.mailmap @@ -849,6 +849,8 @@ Tvrtko Ursulin Tvrtko Ursulin Tycho Andersen Tzung-Bi Shih +Ulf Hansson +Ulf Hansson Umang Jain Uwe Kleine-König Uwe Kleine-König diff --git a/MAINTAINERS b/MAINTAINERS index c3fe46d7c4bc..7167dcea737d 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -6716,7 +6716,7 @@ F: include/linux/platform_data/cpuidle-exynos.h CPUIDLE DRIVER - ARM PSCI M: Lorenzo Pieralisi M: Sudeep Holla -M: Ulf Hansson +M: Ulf Hansson L: linux-pm@vger.kernel.org L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) S: Supported @@ -6724,7 +6724,7 @@ T: git git://git.kernel.org/pub/scm/linux/kernel/git/ulfh/linux-pm.git F: drivers/cpuidle/cpuidle-psci.c CPUIDLE DRIVER - ARM PSCI PM DOMAIN -M: Ulf Hansson +M: Ulf Hansson L: linux-pm@vger.kernel.org L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) S: Supported @@ -6733,7 +6733,7 @@ F: drivers/cpuidle/cpuidle-psci-domain.c F: drivers/cpuidle/cpuidle-psci.h CPUIDLE DRIVER - DT IDLE PM DOMAIN -M: Ulf Hansson +M: Ulf Hansson L: linux-pm@vger.kernel.org S: Supported T: git git://git.kernel.org/pub/scm/linux/kernel/git/ulfh/linux-pm.git @@ -10729,7 +10729,7 @@ F: Documentation/devicetree/bindings/i2c/i2c-demux-pinctrl.yaml F: drivers/i2c/muxes/i2c-demux-pinctrl.c GENERIC PM DOMAINS -M: Ulf Hansson +M: Ulf Hansson L: linux-pm@vger.kernel.org S: Supported F: Documentation/devicetree/bindings/power/power?domain* @@ -18089,7 +18089,7 @@ F: drivers/mmc/host/mmc_spi.c F: include/linux/spi/mmc_spi.h MULTIMEDIA CARD (MMC), SECURE DIGITAL (SD) AND SDIO SUBSYSTEM -M: Ulf Hansson +M: Ulf Hansson L: linux-mmc@vger.kernel.org S: Maintained T: git git://git.kernel.org/pub/scm/linux/kernel/git/ulfh/mmc.git @@ -24696,7 +24696,7 @@ F: drivers/media/i2c/imx415.c SONY MEMORYSTICK SUBSYSTEM M: Maxim Levitsky M: Alex Dubov -M: Ulf Hansson +M: Ulf Hansson L: linux-mmc@vger.kernel.org S: Maintained T: git git://git.kernel.org/pub/scm/linux/kernel/git/ulfh/mmc.git @@ -27615,7 +27615,7 @@ F: Documentation/fb/uvesafb.rst F: drivers/video/fbdev/uvesafb.* Ux500 CLOCK DRIVERS -M: Ulf Hansson +M: Ulf Hansson L: linux-clk@vger.kernel.org L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) S: Maintained From a9b8b18364fffce4c451e6f6fd218fa4ab646705 Mon Sep 17 00:00:00 2001 From: Muhammad Alifa Ramdhan Date: Fri, 3 Apr 2026 09:36:17 +0800 Subject: [PATCH 161/248] net/tls: fix use-after-free in -EBUSY error path of tls_do_encryption The -EBUSY handling in tls_do_encryption(), introduced by commit 859054147318 ("net: tls: handle backlogging of crypto requests"), has a use-after-free due to double cleanup of encrypt_pending and the scatterlist entry. When crypto_aead_encrypt() returns -EBUSY, the request is enqueued to the cryptd backlog and the async callback tls_encrypt_done() will be invoked upon completion. That callback unconditionally restores the scatterlist entry (sge->offset, sge->length) and decrements ctx->encrypt_pending. However, if tls_encrypt_async_wait() returns an error, the synchronous error path in tls_do_encryption() performs the same cleanup again, double-decrementing encrypt_pending and double-restoring the scatterlist. The double-decrement corrupts the encrypt_pending sentinel (initialized to 1), making tls_encrypt_async_wait() permanently skip the wait for pending async callbacks. A subsequent sendmsg can then free the tls_rec via bpf_exec_tx_verdict() while a cryptd callback is still pending, resulting in a use-after-free when the callback fires on the freed record. Fix this by skipping the synchronous cleanup when the -EBUSY async wait returns an error, since the callback has already handled encrypt_pending and sge restoration. Fixes: 859054147318 ("net: tls: handle backlogging of crypto requests") Cc: stable@vger.kernel.org Signed-off-by: Muhammad Alifa Ramdhan Reviewed-by: Sabrina Dubroca Link: https://patch.msgid.link/20260403013617.2838875-1-ramdhan@starlabs.sg Signed-off-by: Paolo Abeni --- net/tls/tls_sw.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c index dd9dda759bbb..83e78a3d1e65 100644 --- a/net/tls/tls_sw.c +++ b/net/tls/tls_sw.c @@ -584,6 +584,16 @@ static int tls_do_encryption(struct sock *sk, if (rc == -EBUSY) { rc = tls_encrypt_async_wait(ctx); rc = rc ?: -EINPROGRESS; + /* + * The async callback tls_encrypt_done() has already + * decremented encrypt_pending and restored the sge on + * both success and error. Skip the synchronous cleanup + * below on error, just remove the record and return. + */ + if (rc != -EINPROGRESS) { + list_del(&rec->list); + return rc; + } } if (!rc || rc != -EINPROGRESS) { atomic_dec(&ctx->encrypt_pending); From 82b915051d32a68ea3bbe261c93f5620699ff047 Mon Sep 17 00:00:00 2001 From: Josh Snyder Date: Thu, 2 Apr 2026 16:23:38 -0700 Subject: [PATCH 162/248] tick/nohz: Fix inverted return value in check_tick_dependency() fast path Commit 56534673cea7f ("tick/nohz: Optimize check_tick_dependency() with early return") added a fast path that returns !val when the tick_stop tracepoint is disabled. This is inverted: the slow path returns true when a dependency IS found (val != 0), but !val returns true when val is zero (no dependency). The result is that can_stop_full_tick() sees "dependency found" when there are none, and the tick never stops on nohz_full CPUs. Fix this by returning !!val instead of !val, matching the slow-path semantics. Fixes: 56534673cea7f ("tick/nohz: Optimize check_tick_dependency() with early return") Signed-off-by: Josh Snyder Signed-off-by: Thomas Gleixner Assisted-by: Claude:claude-opus-4-6 Link: https://patch.msgid.link/20260402-fix-idle-tick2-v1-1-eecb589649d3@code406.com --- kernel/time/tick-sched.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index f7907fadd63f..36449f0010a4 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -345,7 +345,7 @@ static bool check_tick_dependency(atomic_t *dep) int val = atomic_read(dep); if (likely(!tracepoint_enabled(tick_stop))) - return !val; + return !!val; if (val & TICK_DEP_MASK_POSIX_TIMER) { trace_tick_stop(0, TICK_DEP_MASK_POSIX_TIMER); From b1baaff4e5231f0d7db209f72a43112cd2d29257 Mon Sep 17 00:00:00 2001 From: Matti Vaittinen Date: Tue, 7 Apr 2026 16:27:19 +0300 Subject: [PATCH 163/248] regulator: bd71828-regulator.c: Fix LDON-HEAD mode The ROHM BD72720 supports so called LDON-HEAD -mode, in which the buck10 is expected to be supplying power for an LDO. In this mode, the buck10 voltage will follow what is set for the LDO, on order to lower the power-loss in the LDO. This hardware configuration can be adverticed via the device-tree. When this is done, the Linux driver should omit registering the voltage control operations for the buck10, because the voltage control is now done by the hardware. This is done by modifying the buck10 regulator descriptor, before passing it to the regulator registration functions. There is an off-by-one error when the regulator descriptor array is indexed, and wrong descriptor is modified causing the LDO1 operations to be modified instead of the BUCK10 operations. Fix this by correcting the indexing. Signed-off-by: Matti Vaittinen Fixes: f16a9d76a71d ("regulator: bd71828: Support ROHM BD72720") Link: https://patch.msgid.link/e7eef0bd407522ae5d9b7d0c4ec43f40b1dba833.1775565148.git.mazziesaccount@gmail.com Signed-off-by: Mark Brown --- drivers/regulator/bd71828-regulator.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/regulator/bd71828-regulator.c b/drivers/regulator/bd71828-regulator.c index c8f3343cfe23..473beb4399d9 100644 --- a/drivers/regulator/bd71828-regulator.c +++ b/drivers/regulator/bd71828-regulator.c @@ -785,7 +785,7 @@ static const struct bd71828_regulator_data bd71828_rdata[] = { }, }; -#define BD72720_BUCK10_DESC_INDEX 10 +#define BD72720_BUCK10_DESC_INDEX 9 #define BD72720_NUM_BUCK_VOLTS 0x100 #define BD72720_NUM_LDO_VOLTS 0x100 #define BD72720_NUM_LDO12346_VOLTS 0x80 From 3ffe5eb4a5f248c0d4b849f050af973396656f85 Mon Sep 17 00:00:00 2001 From: Claudio Imbrenda Date: Tue, 7 Apr 2026 18:17:21 +0200 Subject: [PATCH 164/248] KVM: s390: vsie: Fix races with partial gmap invalidations Introduce a new boolean flag, used for shadow gmaps, to keep track of whether the gmap has been invalidated, either partially or totally. Use the new flag to check whether shadow gmap invalidations happened during shadowing. In such cases, abort whatever was going on, return -EAGAIN and let the caller try again. Fixes: 19d6c5b80443 ("KVM: s390: vsie: Fix unshadowing while shadowing") Signed-off-by: Claudio Imbrenda Message-ID: <20260407161721.247044-1-imbrenda@linux.ibm.com> --- arch/s390/kvm/gaccess.c | 9 +++++---- arch/s390/kvm/gmap.c | 3 +++ arch/s390/kvm/gmap.h | 1 + 3 files changed, 9 insertions(+), 4 deletions(-) diff --git a/arch/s390/kvm/gaccess.c b/arch/s390/kvm/gaccess.c index 53a8550e7102..290e03a13a95 100644 --- a/arch/s390/kvm/gaccess.c +++ b/arch/s390/kvm/gaccess.c @@ -1449,7 +1449,7 @@ static int _do_shadow_pte(struct gmap *sg, gpa_t raddr, union pte *ptep_h, union pgste_set_unlock(ptep_h, pgste); if (rc) return rc; - if (!sg->parent) + if (sg->invalidated) return -EAGAIN; newpte = _pte(f->pfn, 0, !p, 0); @@ -1479,7 +1479,7 @@ static int _do_shadow_crste(struct gmap *sg, gpa_t raddr, union crste *host, uni do { /* _gmap_crstep_xchg_atomic() could have unshadowed this shadow gmap */ - if (!sg->parent) + if (sg->invalidated) return -EAGAIN; oldcrste = READ_ONCE(*host); newcrste = _crste_fc1(f->pfn, oldcrste.h.tt, f->writable, !p); @@ -1492,7 +1492,7 @@ static int _do_shadow_crste(struct gmap *sg, gpa_t raddr, union crste *host, uni if (!newcrste.h.p && !f->writable) return -EOPNOTSUPP; } while (!_gmap_crstep_xchg_atomic(sg->parent, host, oldcrste, newcrste, f->gfn, false)); - if (!sg->parent) + if (sg->invalidated) return -EAGAIN; newcrste = _crste_fc1(f->pfn, oldcrste.h.tt, 0, !p); @@ -1545,7 +1545,7 @@ static int _gaccess_do_shadow(struct kvm_s390_mmu_cache *mc, struct gmap *sg, entries[i].pfn, i + 1, entries[i].writable); if (rc) return rc; - if (!sg->parent) + if (sg->invalidated) return -EAGAIN; } @@ -1601,6 +1601,7 @@ static inline int _gaccess_shadow_fault(struct kvm_vcpu *vcpu, struct gmap *sg, scoped_guard(spinlock, &parent->children_lock) { if (READ_ONCE(sg->parent) != parent) return -EAGAIN; + sg->invalidated = false; rc = _gaccess_do_shadow(vcpu->arch.mc, sg, saddr, walk); } if (rc == -ENOMEM) diff --git a/arch/s390/kvm/gmap.c b/arch/s390/kvm/gmap.c index 645c32c767d2..0111d31e0386 100644 --- a/arch/s390/kvm/gmap.c +++ b/arch/s390/kvm/gmap.c @@ -181,6 +181,7 @@ void gmap_remove_child(struct gmap *child) list_del(&child->list); child->parent = NULL; + child->invalidated = true; } /** @@ -1069,6 +1070,7 @@ static void gmap_unshadow_level(struct gmap *sg, gfn_t r_gfn, int level) if (level > TABLE_TYPE_PAGE_TABLE) align = 1UL << (11 * level + _SEGMENT_SHIFT); kvm_s390_vsie_gmap_notifier(sg, ALIGN_DOWN(gaddr, align), ALIGN(gaddr + 1, align)); + sg->invalidated = true; if (dat_entry_walk(NULL, r_gfn, sg->asce, 0, level, &crstep, &ptep)) return; if (ptep) { @@ -1174,6 +1176,7 @@ static inline int __gmap_protect_asce_top_level(struct kvm_s390_mmu_cache *mc, s scoped_guard(spinlock, &parent->children_lock) { if (READ_ONCE(sg->parent) != parent) return -EAGAIN; + sg->invalidated = false; for (i = 0; i < CRST_TABLE_PAGES; i++) { if (!context->f[i].valid) continue; diff --git a/arch/s390/kvm/gmap.h b/arch/s390/kvm/gmap.h index 579399ef5480..31ea13fda142 100644 --- a/arch/s390/kvm/gmap.h +++ b/arch/s390/kvm/gmap.h @@ -60,6 +60,7 @@ enum gmap_flags { struct gmap { unsigned long flags; unsigned char edat_level; + bool invalidated; struct kvm *kvm; union asce asce; struct list_head list; From 705355a82b8ea86e0afe44560e970eedc74c05d3 Mon Sep 17 00:00:00 2001 From: Raju Rangoju Date: Mon, 6 Apr 2026 14:40:42 +0530 Subject: [PATCH 165/248] MAINTAINERS: Update AMD SPI driver maintainers Due to additional responsibilities, Raju Rangoju will no longer be supporting AMD SPI driver. Maintenance will be handled by Krishnamoorthi going forward. Cc: Krishnamoorthi M Signed-off-by: Raju Rangoju Link: https://patch.msgid.link/20260406091042.4065767-1-Raju.Rangoju@amd.com Signed-off-by: Mark Brown --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index c3fe46d7c4bc..213f6a294a9f 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -1272,6 +1272,7 @@ F: drivers/hid/amd-sfh-hid/ AMD SPI DRIVER M: Raju Rangoju +M: Krishnamoorthi M L: linux-spi@vger.kernel.org S: Supported F: drivers/spi/spi-amd-pci.c From 7596459f3c93d8d45a1bf12d4d7526b50c15baa2 Mon Sep 17 00:00:00 2001 From: Vinay Belgaumkar Date: Tue, 31 Mar 2026 18:27:10 -0700 Subject: [PATCH 166/248] drm/xe: Fix bug in idledly unit conversion We only need to convert to picosecond units before writing to RING_IDLEDLY. Fixes: 7c53ff050ba8 ("drm/xe: Apply Wa_16023105232") Cc: Tangudu Tilak Tirumalesh Acked-by: Tangudu Tilak Tirumalesh Signed-off-by: Vinay Belgaumkar Link: https://patch.msgid.link/20260401012710.4165547-1-vinay.belgaumkar@intel.com (cherry picked from commit 13743bd628bc9d9a0e2fe53488b2891aedf7cc74) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_hw_engine.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_hw_engine.c b/drivers/gpu/drm/xe/xe_hw_engine.c index 688d645e0e73..aa0b7a427f0b 100644 --- a/drivers/gpu/drm/xe/xe_hw_engine.c +++ b/drivers/gpu/drm/xe/xe_hw_engine.c @@ -595,9 +595,8 @@ static void adjust_idledly(struct xe_hw_engine *hwe) maxcnt *= maxcnt_units_ns; if (xe_gt_WARN_ON(gt, idledly >= maxcnt || inhibit_switch)) { - idledly = DIV_ROUND_CLOSEST(((maxcnt - 1) * maxcnt_units_ns), + idledly = DIV_ROUND_CLOSEST(((maxcnt - 1) * 1000), idledly_units_ps); - idledly = DIV_ROUND_CLOSEST(idledly, 1000); xe_mmio_write32(>->mmio, RING_IDLEDLY(hwe->mmio_base), idledly); } } From 95aca8602ef70ffd3d971675751c81826e124f90 Mon Sep 17 00:00:00 2001 From: John Pavlick Date: Mon, 6 Apr 2026 13:23:33 +0000 Subject: [PATCH 167/248] net: sfp: add quirks for Hisense and HSGQ GPON ONT SFP modules Several GPON ONT SFP sticks based on Realtek RTL960x report 1000BASE-LX at 1300MBd in their EEPROM but can operate at 2500base-X. On hosts capable of 2500base-X (e.g. Banana Pi R3 / MT7986), the kernel negotiates only 1G because it trusts the incorrect EEPROM data. Add quirks for: - Hisense-Leox LXT-010S-H - Hisense ZNID-GPON-2311NA - HSGQ HSGQ-XPON-Stick Each quirk advertises 2500base-X and ignores TX_FAULT during the module's ~40s Linux boot time. Tested on Banana Pi R3 (MT7986) with OpenWrt 25.12.1, confirmed 2.5Gbps link and full throughput with flow offloading. Reviewed-by: Russell King (Oracle) Suggested-by: Marcin Nita Signed-off-by: John Pavlick Link: https://patch.msgid.link/20260406132321.72563-1-jspavlick@posteo.net Signed-off-by: Jakub Kicinski --- drivers/net/phy/sfp.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/drivers/net/phy/sfp.c b/drivers/net/phy/sfp.c index ce8924613363..6b7b8ae15d10 100644 --- a/drivers/net/phy/sfp.c +++ b/drivers/net/phy/sfp.c @@ -543,6 +543,22 @@ static const struct sfp_quirk sfp_quirks[] = { SFP_QUIRK("HUAWEI", "MA5671A", sfp_quirk_2500basex, sfp_fixup_ignore_tx_fault_and_los), + // Hisense LXT-010S-H is a GPON ONT SFP (sold as LEOX LXT-010S-H) that + // can operate at 2500base-X, but reports 1000BASE-LX / 1300MBd in its + // EEPROM + SFP_QUIRK("Hisense-Leox", "LXT-010S-H", sfp_quirk_2500basex, + sfp_fixup_ignore_tx_fault), + + // Hisense ZNID-GPON-2311NA can operate at 2500base-X, but reports + // 1000BASE-LX / 1300MBd in its EEPROM + SFP_QUIRK("Hisense", "ZNID-GPON-2311NA", sfp_quirk_2500basex, + sfp_fixup_ignore_tx_fault), + + // HSGQ HSGQ-XPON-Stick can operate at 2500base-X, but reports + // 1000BASE-LX / 1300MBd in its EEPROM + SFP_QUIRK("HSGQ", "HSGQ-XPON-Stick", sfp_quirk_2500basex, + sfp_fixup_ignore_tx_fault), + // Lantech 8330-262D-E and 8330-265D can operate at 2500base-X, but // incorrectly report 2500MBd NRZ in their EEPROM. // Some 8330-265D modules have inverted LOS, while all of them report From f2777d5cb5c094e20f2323d953b1740baee5f8e8 Mon Sep 17 00:00:00 2001 From: Johan Alvarado Date: Mon, 6 Apr 2026 07:44:25 +0000 Subject: [PATCH 168/248] net: stmmac: dwmac-motorcomm: fix eFUSE MAC address read failure This patch fixes an issue where reading the MAC address from the eFUSE fails due to a race condition. The root cause was identified by comparing the driver's behavior with a custom U-Boot port. In U-Boot, the MAC address was read successfully every time because the driver was loaded later in the boot process, giving the hardware ample time to initialize. In Linux, reading the eFUSE immediately returns all zeros, resulting in a fallback to a random MAC address. Hardware cold-boot testing revealed that the eFUSE controller requires a short settling time to load its internal data. Adding a 2000-5000us delay after the reset ensures the hardware is fully ready, allowing the native MAC address to be read consistently. Fixes: 02ff155ea281 ("net: stmmac: Add glue driver for Motorcomm YT6801 ethernet controller") Reported-by: Georg Gottleuber Closes: https://lore.kernel.org/24cfefff-1233-4745-8c47-812b502d5d19@tuxedocomputers.com Signed-off-by: Johan Alvarado Reviewed-by: Yao Zi Reviewed-by: Andrew Lunn Link: https://patch.msgid.link/fc5992a4-9532-49c3-8ec1-c2f8c5b84ca1@smtp-relay.sendinblue.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/stmicro/stmmac/dwmac-motorcomm.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-motorcomm.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-motorcomm.c index 8b45b9cf7202..663d87ccfa0f 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-motorcomm.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-motorcomm.c @@ -6,6 +6,7 @@ */ #include +#include #include #include #include @@ -334,6 +335,13 @@ static int motorcomm_probe(struct pci_dev *pdev, const struct pci_device_id *id) motorcomm_reset(priv); + /* + * After system reset, the eFuse controller needs time to load + * its internal data. Without this delay, eFuse reads return + * all zeros, causing MAC address detection to fail. + */ + usleep_range(2000, 5000); + ret = motorcomm_efuse_read_mac(&pdev->dev, priv, res.mac); if (ret == -ENOENT) { dev_warn(&pdev->dev, "eFuse contains no valid MAC address\n"); From 944b3b734cfbbe9502274c092bc3b8220764cc92 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Fri, 3 Apr 2026 17:19:38 -0700 Subject: [PATCH 169/248] net: avoid nul-deref trying to bind mp to incapable device Sashiko points out that we use qops in __net_mp_open_rxq() but never validate they are null. This was introduced when check was moved from netdev_rx_queue_restart(). Look at ops directly instead of the locking config. qops imply netdev_need_ops_lock(). We used netdev_need_ops_lock() initially to signify that the real_num_rx_queues check below is safe without rtnl_lock, but I'm not sure if this is actually clear to most people, anyway. Fixes: da7772a2b4ad ("net: move mp->rx_page_size validation to __net_mp_open_rxq()") Acked-by: Daniel Borkmann Reviewed-by: Mina Almasry Link: https://patch.msgid.link/20260404001938.2425670-1-kuba@kernel.org Signed-off-by: Jakub Kicinski --- net/core/netdev_rx_queue.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/core/netdev_rx_queue.c b/net/core/netdev_rx_queue.c index 668a90658f25..05fd2875d725 100644 --- a/net/core/netdev_rx_queue.c +++ b/net/core/netdev_rx_queue.c @@ -117,7 +117,7 @@ int __net_mp_open_rxq(struct net_device *dev, unsigned int rxq_idx, struct netdev_rx_queue *rxq; int ret; - if (!netdev_need_ops_lock(dev)) + if (!qops) return -EOPNOTSUPP; if (rxq_idx >= dev->real_num_rx_queues) { From efaa71faf212324ecbf6d5339e9717fe53254f58 Mon Sep 17 00:00:00 2001 From: Daniel Golle Date: Sun, 5 Apr 2026 22:29:19 +0100 Subject: [PATCH 170/248] selftests: net: bridge_vlan_mcast: wait for h1 before querier check The querier-interval test adds h1 (currently a slave of the VRF created by simple_if_init) to a temporary bridge br1 acting as an outside IGMP querier. The kernel VRF driver (drivers/net/vrf.c) calls cycle_netdev() on every slave add and remove, toggling the interface admin-down then up. Phylink takes the PHY down during the admin-down half of that cycle. Since h1 and swp1 are cable-connected, swp1 also loses its link may need several seconds to re-negotiate. Use setup_wait_dev $h1 0 which waits for h1 to return to UP state, so the test can rely on the link being back up at this point. Fixes: 4d8610ee8bd77 ("selftests: net: bridge: add vlan mcast_querier_interval tests") Signed-off-by: Daniel Golle Reviewed-by: Alexander Sverdlin Link: https://patch.msgid.link/c830f130860fd2efae08bfb9e5b25fd028e58ce5.1775424423.git.daniel@makrotopia.org Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/forwarding/bridge_vlan_mcast.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/testing/selftests/net/forwarding/bridge_vlan_mcast.sh b/tools/testing/selftests/net/forwarding/bridge_vlan_mcast.sh index 72dfbeaf56b9..e8031f68200a 100755 --- a/tools/testing/selftests/net/forwarding/bridge_vlan_mcast.sh +++ b/tools/testing/selftests/net/forwarding/bridge_vlan_mcast.sh @@ -414,6 +414,7 @@ vlmc_querier_intvl_test() bridge vlan add vid 10 dev br1 self pvid untagged ip link set dev $h1 master br1 ip link set dev br1 up + setup_wait_dev $h1 0 bridge vlan add vid 10 dev $h1 master bridge vlan global set vid 10 dev br1 mcast_snooping 1 mcast_querier 1 sleep 2 From c3812651b522fe8437ebb7063b75ddb95b571643 Mon Sep 17 00:00:00 2001 From: Andrea Mayer Date: Sat, 4 Apr 2026 02:44:04 +0200 Subject: [PATCH 171/248] seg6: separate dst_cache for input and output paths in seg6 lwtunnel The seg6 lwtunnel uses a single dst_cache per encap route, shared between seg6_input_core() and seg6_output_core(). These two paths can perform the post-encap SID lookup in different routing contexts (e.g., ip rules matching on the ingress interface, or VRF table separation). Whichever path runs first populates the cache, and the other reuses it blindly, bypassing its own lookup. Fix this by splitting the cache into cache_input and cache_output, so each path maintains its own cached dst independently. Fixes: 6c8702c60b88 ("ipv6: sr: add support for SRH encapsulation and injection with lwtunnels") Cc: stable@vger.kernel.org Signed-off-by: Andrea Mayer Reviewed-by: Nicolas Dichtel Reviewed-by: Justin Iurman Link: https://patch.msgid.link/20260404004405.4057-2-andrea.mayer@uniroma2.it Signed-off-by: Jakub Kicinski --- net/ipv6/seg6_iptunnel.c | 34 +++++++++++++++++++++++----------- 1 file changed, 23 insertions(+), 11 deletions(-) diff --git a/net/ipv6/seg6_iptunnel.c b/net/ipv6/seg6_iptunnel.c index 3e1b9991131a..d6a0f7df9080 100644 --- a/net/ipv6/seg6_iptunnel.c +++ b/net/ipv6/seg6_iptunnel.c @@ -48,7 +48,8 @@ static size_t seg6_lwt_headroom(struct seg6_iptunnel_encap *tuninfo) } struct seg6_lwt { - struct dst_cache cache; + struct dst_cache cache_input; + struct dst_cache cache_output; struct seg6_iptunnel_encap tuninfo[]; }; @@ -488,7 +489,7 @@ static int seg6_input_core(struct net *net, struct sock *sk, slwt = seg6_lwt_lwtunnel(lwtst); local_bh_disable(); - dst = dst_cache_get(&slwt->cache); + dst = dst_cache_get(&slwt->cache_input); local_bh_enable(); err = seg6_do_srh(skb, dst); @@ -504,7 +505,7 @@ static int seg6_input_core(struct net *net, struct sock *sk, /* cache only if we don't create a dst reference loop */ if (!dst->error && lwtst != dst->lwtstate) { local_bh_disable(); - dst_cache_set_ip6(&slwt->cache, dst, + dst_cache_set_ip6(&slwt->cache_input, dst, &ipv6_hdr(skb)->saddr); local_bh_enable(); } @@ -564,7 +565,7 @@ static int seg6_output_core(struct net *net, struct sock *sk, slwt = seg6_lwt_lwtunnel(orig_dst->lwtstate); local_bh_disable(); - dst = dst_cache_get(&slwt->cache); + dst = dst_cache_get(&slwt->cache_output); local_bh_enable(); err = seg6_do_srh(skb, dst); @@ -591,7 +592,7 @@ static int seg6_output_core(struct net *net, struct sock *sk, /* cache only if we don't create a dst reference loop */ if (orig_dst->lwtstate != dst->lwtstate) { local_bh_disable(); - dst_cache_set_ip6(&slwt->cache, dst, &fl6.saddr); + dst_cache_set_ip6(&slwt->cache_output, dst, &fl6.saddr); local_bh_enable(); } @@ -701,11 +702,13 @@ static int seg6_build_state(struct net *net, struct nlattr *nla, slwt = seg6_lwt_lwtunnel(newts); - err = dst_cache_init(&slwt->cache, GFP_ATOMIC); - if (err) { - kfree(newts); - return err; - } + err = dst_cache_init(&slwt->cache_input, GFP_ATOMIC); + if (err) + goto err_free_newts; + + err = dst_cache_init(&slwt->cache_output, GFP_ATOMIC); + if (err) + goto err_destroy_input; memcpy(&slwt->tuninfo, tuninfo, tuninfo_len); @@ -720,11 +723,20 @@ static int seg6_build_state(struct net *net, struct nlattr *nla, *ts = newts; return 0; + +err_destroy_input: + dst_cache_destroy(&slwt->cache_input); +err_free_newts: + kfree(newts); + return err; } static void seg6_destroy_state(struct lwtunnel_state *lwt) { - dst_cache_destroy(&seg6_lwt_lwtunnel(lwt)->cache); + struct seg6_lwt *slwt = seg6_lwt_lwtunnel(lwt); + + dst_cache_destroy(&slwt->cache_input); + dst_cache_destroy(&slwt->cache_output); } static int seg6_fill_encap_info(struct sk_buff *skb, From 32dfd742f06a68fac6499a58f52025990c854031 Mon Sep 17 00:00:00 2001 From: Andrea Mayer Date: Sat, 4 Apr 2026 02:44:05 +0200 Subject: [PATCH 172/248] selftests: seg6: add test for dst_cache isolation in seg6 lwtunnel Add a selftest that verifies the dst_cache in seg6 lwtunnel is not shared between the input (forwarding) and output (locally generated) paths. The test creates three namespaces (ns_src, ns_router, ns_dst) connected in a line. An SRv6 encap route on ns_router encapsulates traffic destined to cafe::1 with SID fc00::100. The SID is reachable only for forwarded traffic (from ns_src) via an ip rule matching the ingress interface (iif veth-r0 lookup 100), and blackholed in the main table. The test verifies that: 1. A packet generated locally on ns_router does not reach ns_dst with an empty cache, since the SID is blackholed; 2. A forwarded packet from ns_src populates the input cache from table 100 and reaches ns_dst; 3. A packet generated locally on ns_router still does not reach ns_dst after the input cache is populated, confirming the output path does not reuse the input cache entry. Both the forwarded and local packets are pinned to the same CPU with taskset, since dst_cache is per-cpu. Cc: Shuah Khan Signed-off-by: Andrea Mayer Reviewed-by: Nicolas Dichtel Reviewed-by: Justin Iurman Link: https://patch.msgid.link/20260404004405.4057-3-andrea.mayer@uniroma2.it Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/Makefile | 1 + .../selftests/net/srv6_iptunnel_cache.sh | 197 ++++++++++++++++++ 2 files changed, 198 insertions(+) create mode 100755 tools/testing/selftests/net/srv6_iptunnel_cache.sh diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile index 605c54c0e8a3..c709523c99c6 100644 --- a/tools/testing/selftests/net/Makefile +++ b/tools/testing/selftests/net/Makefile @@ -89,6 +89,7 @@ TEST_PROGS := \ srv6_end_x_next_csid_l3vpn_test.sh \ srv6_hencap_red_l3vpn_test.sh \ srv6_hl2encap_red_l2vpn_test.sh \ + srv6_iptunnel_cache.sh \ stress_reuseport_listen.sh \ tcp_fastopen_backup_key.sh \ test_bpf.sh \ diff --git a/tools/testing/selftests/net/srv6_iptunnel_cache.sh b/tools/testing/selftests/net/srv6_iptunnel_cache.sh new file mode 100755 index 000000000000..62638ab679d9 --- /dev/null +++ b/tools/testing/selftests/net/srv6_iptunnel_cache.sh @@ -0,0 +1,197 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# author: Andrea Mayer + +# This test verifies that the seg6 lwtunnel does not share the dst_cache +# between the input (forwarding) and output (locally generated) paths. +# +# A shared dst_cache allows a forwarded packet to populate the cache and a +# subsequent locally generated packet to silently reuse that entry, bypassing +# its own route lookup. To expose this, the SID is made reachable only for +# forwarded traffic (via an ip rule matching iif) and blackholed for everything +# else. A local ping on ns_router must always hit the blackhole; +# if it succeeds after a forwarded packet has populated the +# cache, the bug is confirmed. +# +# Both forwarded and local packets are pinned to the same CPU with taskset, +# since dst_cache is per-cpu. +# +# +# +--------------------+ +--------------------+ +# | ns_src | | ns_dst | +# | | | | +# | veth-s0 | | veth-d0 | +# | fd00::1/64 | | fd01::2/64 | +# +-------+------------+ +----------+---------+ +# | | +# | +--------------------+ | +# | | ns_router | | +# | | | | +# +------------+ veth-r0 veth-r1 +--------------+ +# | fd00::2 fd01::1 | +# +--------------------+ +# +# +# ns_router: encap (main table) +# +---------+---------------------------------------+ +# | dst | action | +# +---------+---------------------------------------+ +# | cafe::1 | encap seg6 mode encap segs fc00::100 | +# +---------+---------------------------------------+ +# +# ns_router: post-encap SID resolution +# +-------+------------+----------------------------+ +# | table | dst | action | +# +-------+------------+----------------------------+ +# | 100 | fc00::100 | via fd01::2 dev veth-r1 | +# +-------+------------+----------------------------+ +# | main | fc00::100 | blackhole | +# +-------+------------+----------------------------+ +# +# ns_router: ip rule +# +------------------+------------------------------+ +# | match | action | +# +------------------+------------------------------+ +# | iif veth-r0 | lookup 100 | +# +------------------+------------------------------+ +# +# ns_dst: SRv6 decap (main table) +# +--------------+----------------------------------+ +# | SID | action | +# +--------------+----------------------------------+ +# | fc00::100 | End.DT6 table 255 (local) | +# +--------------+----------------------------------+ + +source lib.sh + +readonly SID="fc00::100" +readonly DEST="cafe::1" + +readonly SRC_MAC="02:00:00:00:00:01" +readonly RTR_R0_MAC="02:00:00:00:00:02" +readonly RTR_R1_MAC="02:00:00:00:00:03" +readonly DST_MAC="02:00:00:00:00:04" + +cleanup() +{ + cleanup_ns "${NS_SRC}" "${NS_RTR}" "${NS_DST}" +} + +check_prerequisites() +{ + if ! command -v ip &>/dev/null; then + echo "SKIP: ip tool not found" + exit "${ksft_skip}" + fi + + if ! command -v ping &>/dev/null; then + echo "SKIP: ping not found" + exit "${ksft_skip}" + fi + + if ! command -v sysctl &>/dev/null; then + echo "SKIP: sysctl not found" + exit "${ksft_skip}" + fi + + if ! command -v taskset &>/dev/null; then + echo "SKIP: taskset not found" + exit "${ksft_skip}" + fi +} + +setup() +{ + setup_ns NS_SRC NS_RTR NS_DST + + ip link add veth-s0 netns "${NS_SRC}" type veth \ + peer name veth-r0 netns "${NS_RTR}" + ip link add veth-r1 netns "${NS_RTR}" type veth \ + peer name veth-d0 netns "${NS_DST}" + + ip -n "${NS_SRC}" link set veth-s0 address "${SRC_MAC}" + ip -n "${NS_RTR}" link set veth-r0 address "${RTR_R0_MAC}" + ip -n "${NS_RTR}" link set veth-r1 address "${RTR_R1_MAC}" + ip -n "${NS_DST}" link set veth-d0 address "${DST_MAC}" + + # ns_src + ip -n "${NS_SRC}" link set veth-s0 up + ip -n "${NS_SRC}" addr add fd00::1/64 dev veth-s0 nodad + ip -n "${NS_SRC}" -6 route add "${DEST}"/128 via fd00::2 + + # ns_router + ip -n "${NS_RTR}" link set veth-r0 up + ip -n "${NS_RTR}" addr add fd00::2/64 dev veth-r0 nodad + ip -n "${NS_RTR}" link set veth-r1 up + ip -n "${NS_RTR}" addr add fd01::1/64 dev veth-r1 nodad + ip netns exec "${NS_RTR}" sysctl -qw net.ipv6.conf.all.forwarding=1 + + ip -n "${NS_RTR}" -6 route add "${DEST}"/128 \ + encap seg6 mode encap segs "${SID}" dev veth-r0 + ip -n "${NS_RTR}" -6 route add "${SID}"/128 table 100 \ + via fd01::2 dev veth-r1 + ip -n "${NS_RTR}" -6 route add blackhole "${SID}"/128 + ip -n "${NS_RTR}" -6 rule add iif veth-r0 lookup 100 + + # ns_dst + ip -n "${NS_DST}" link set veth-d0 up + ip -n "${NS_DST}" addr add fd01::2/64 dev veth-d0 nodad + ip -n "${NS_DST}" addr add "${DEST}"/128 dev lo nodad + ip -n "${NS_DST}" -6 route add "${SID}"/128 \ + encap seg6local action End.DT6 table 255 dev veth-d0 + ip -n "${NS_DST}" -6 route add fd00::/64 via fd01::1 + + # static neighbors + ip -n "${NS_SRC}" -6 neigh add fd00::2 dev veth-s0 \ + lladdr "${RTR_R0_MAC}" nud permanent + ip -n "${NS_RTR}" -6 neigh add fd00::1 dev veth-r0 \ + lladdr "${SRC_MAC}" nud permanent + ip -n "${NS_RTR}" -6 neigh add fd01::2 dev veth-r1 \ + lladdr "${DST_MAC}" nud permanent + ip -n "${NS_DST}" -6 neigh add fd01::1 dev veth-d0 \ + lladdr "${RTR_R1_MAC}" nud permanent +} + +test_cache_isolation() +{ + RET=0 + + # local ping with empty cache: must fail (SID is blackholed) + if ip netns exec "${NS_RTR}" taskset -c 0 \ + ping -c 1 -W 2 "${DEST}" &>/dev/null; then + echo "SKIP: local ping succeeded, topology broken" + exit "${ksft_skip}" + fi + + # forward from ns_src to populate the input cache + if ! ip netns exec "${NS_SRC}" taskset -c 0 \ + ping -c 1 -W 2 "${DEST}" &>/dev/null; then + echo "SKIP: forwarded ping failed, topology broken" + exit "${ksft_skip}" + fi + + # local ping again: must still fail; if the output path reuses + # the input cache, it bypasses the blackhole and the ping succeeds + if ip netns exec "${NS_RTR}" taskset -c 0 \ + ping -c 1 -W 2 "${DEST}" &>/dev/null; then + echo "FAIL: output path used dst cached by input path" + RET="${ksft_fail}" + else + echo "PASS: output path dst_cache is independent" + fi + + return "${RET}" +} + +if [ "$(id -u)" -ne 0 ]; then + echo "SKIP: Need root privileges" + exit "${ksft_skip}" +fi + +trap cleanup EXIT + +check_prerequisites +setup +test_cache_isolation +exit "${RET}" From 4cda78d6f8bf2b700529f2fbccb994c3e826d7c2 Mon Sep 17 00:00:00 2001 From: Mikhail Gavrilov Date: Tue, 7 Apr 2026 12:50:31 +0500 Subject: [PATCH 173/248] Input: uinput - fix circular locking dependency with ff-core A lockdep circular locking dependency warning can be triggered reproducibly when using a force-feedback gamepad with uinput (for example, playing ELDEN RING under Wine with a Flydigi Vader 5 controller): ff->mutex -> udev->mutex -> input_mutex -> dev->mutex -> ff->mutex The cycle is caused by four lock acquisition paths: 1. ff upload: input_ff_upload() holds ff->mutex and calls uinput_dev_upload_effect() -> uinput_request_submit() -> uinput_request_send(), which acquires udev->mutex. 2. device create: uinput_ioctl_handler() holds udev->mutex and calls uinput_create_device() -> input_register_device(), which acquires input_mutex. 3. device register: input_register_device() holds input_mutex and calls kbd_connect() -> input_register_handle(), which acquires dev->mutex. 4. evdev release: evdev_release() calls input_flush_device() under dev->mutex, which calls input_ff_flush() acquiring ff->mutex. Fix this by introducing a new state_lock spinlock to protect udev->state and udev->dev access in uinput_request_send() instead of acquiring udev->mutex. The function only needs to atomically check device state and queue an input event into the ring buffer via uinput_dev_event() -- both operations are safe under a spinlock (ktime_get_ts64() and wake_up_interruptible() do not sleep). This breaks the ff->mutex -> udev->mutex link since a spinlock is a leaf in the lock ordering and cannot form cycles with mutexes. To keep state transitions visible to uinput_request_send(), protect writes to udev->state in uinput_create_device() and uinput_destroy_device() with the same state_lock spinlock. Additionally, move init_completion(&request->done) from uinput_request_send() to uinput_request_submit() before uinput_request_reserve_slot(). Once the slot is allocated, uinput_flush_requests() may call complete() on it at any time from the destroy path, so the completion must be initialised before the request becomes visible. Lock ordering after the fix: ff->mutex -> state_lock (spinlock, leaf) udev->mutex -> state_lock (spinlock, leaf) udev->mutex -> input_mutex -> dev->mutex -> ff->mutex (no back-edge) Fixes: ff462551235d ("Input: uinput - switch to the new FF interface") Cc: stable@vger.kernel.org Link: https://lore.kernel.org/all/CABXGCsMoxag+kEwHhb7KqhuyxfmGGd0P=tHZyb1uKE0pLr8Hkg@mail.gmail.com/ Signed-off-by: Mikhail Gavrilov Link: https://patch.msgid.link/20260407075031.38351-1-mikhail.v.gavrilov@gmail.com Signed-off-by: Dmitry Torokhov --- drivers/input/misc/uinput.c | 28 +++++++++++++++++++++------- 1 file changed, 21 insertions(+), 7 deletions(-) diff --git a/drivers/input/misc/uinput.c b/drivers/input/misc/uinput.c index 13336a2fd49c..a973e82205b5 100644 --- a/drivers/input/misc/uinput.c +++ b/drivers/input/misc/uinput.c @@ -57,6 +57,7 @@ struct uinput_device { struct input_dev *dev; struct mutex mutex; enum uinput_state state; + spinlock_t state_lock; wait_queue_head_t waitq; unsigned char ready; unsigned char head; @@ -146,19 +147,15 @@ static void uinput_request_release_slot(struct uinput_device *udev, static int uinput_request_send(struct uinput_device *udev, struct uinput_request *request) { - int retval; + int retval = 0; - retval = mutex_lock_interruptible(&udev->mutex); - if (retval) - return retval; + spin_lock(&udev->state_lock); if (udev->state != UIST_CREATED) { retval = -ENODEV; goto out; } - init_completion(&request->done); - /* * Tell our userspace application about this new request * by queueing an input event. @@ -166,7 +163,7 @@ static int uinput_request_send(struct uinput_device *udev, uinput_dev_event(udev->dev, EV_UINPUT, request->code, request->id); out: - mutex_unlock(&udev->mutex); + spin_unlock(&udev->state_lock); return retval; } @@ -175,6 +172,13 @@ static int uinput_request_submit(struct uinput_device *udev, { int retval; + /* + * Initialize completion before allocating the request slot. + * Once the slot is allocated, uinput_flush_requests() may + * complete it at any time, so it must be initialized first. + */ + init_completion(&request->done); + retval = uinput_request_reserve_slot(udev, request); if (retval) return retval; @@ -289,7 +293,14 @@ static void uinput_destroy_device(struct uinput_device *udev) struct input_dev *dev = udev->dev; enum uinput_state old_state = udev->state; + /* + * Update state under state_lock so that concurrent + * uinput_request_send() sees the state change before we + * flush pending requests and tear down the device. + */ + spin_lock(&udev->state_lock); udev->state = UIST_NEW_DEVICE; + spin_unlock(&udev->state_lock); if (dev) { name = dev->name; @@ -366,7 +377,9 @@ static int uinput_create_device(struct uinput_device *udev) if (error) goto fail2; + spin_lock(&udev->state_lock); udev->state = UIST_CREATED; + spin_unlock(&udev->state_lock); return 0; @@ -384,6 +397,7 @@ static int uinput_open(struct inode *inode, struct file *file) return -ENOMEM; mutex_init(&newdev->mutex); + spin_lock_init(&newdev->state_lock); spin_lock_init(&newdev->requests_lock); init_waitqueue_head(&newdev->requests_waitq); init_waitqueue_head(&newdev->waitq); From 828ec7f803f41588a120e6d804297e74a482ab9d Mon Sep 17 00:00:00 2001 From: Matti Vaittinen Date: Tue, 7 Apr 2026 15:41:48 +0300 Subject: [PATCH 174/248] gpio: bd72720: handle missing regmap Currently the probe does not check whether getting the regmap succeeded. This can cause crash when regmap is used, if it wasn't successfully obtained. Failing to get the regmap is unlikely, especially since this driver is expected to be kicked by the MFD driver only after registering the regmap - but it is still better to handle this gracefully. Signed-off-by: Matti Vaittinen Fixes: e7eef0bd4075 ("regulator: bd71828-regulator.c: Fix LDON-HEAD mode") Link: https://patch.msgid.link/5bfffee380863bcf24f3062e48094c8eb7b1342f.1775565381.git.mazziesaccount@gmail.com Signed-off-by: Bartosz Golaszewski --- drivers/gpio/gpio-bd72720.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpio/gpio-bd72720.c b/drivers/gpio/gpio-bd72720.c index 6549dbf4c7ad..d0f936ed80af 100644 --- a/drivers/gpio/gpio-bd72720.c +++ b/drivers/gpio/gpio-bd72720.c @@ -256,6 +256,8 @@ static int gpo_bd72720_probe(struct platform_device *pdev) g->dev = dev; g->chip.parent = parent; g->regmap = dev_get_regmap(parent, NULL); + if (!g->regmap) + return -ENODEV; return devm_gpiochip_add_data(dev, &g->chip, g); } From 14a857056466be9d3d907a94e92a704ac1be149b Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Sat, 4 Apr 2026 12:22:44 +0200 Subject: [PATCH 175/248] sched/deadline: Use revised wakeup rule for dl_server John noted that commit 115135422562 ("sched/deadline: Fix 'stuck' dl_server") unfixed the issue from commit a3a70caf7906 ("sched/deadline: Fix dl_server behaviour"). The issue in commit 115135422562 was for wakeups of the server after the deadline; in which case you *have* to start a new period. The case for a3a70caf7906 is wakeups before the deadline. Now, because the server is effectively running a least-laxity policy, it means that any wakeup during the runnable phase means dl_entity_overflow() will be true. This means we need to adjust the runtime to allow it to still run until the existing deadline expires. Use the revised wakeup rule for dl_defer entities. Fixes: 115135422562 ("sched/deadline: Fix 'stuck' dl_server") Reported-by: John Stultz Signed-off-by: Peter Zijlstra (Intel) Acked-by: Juri Lelli Tested-by: John Stultz Link: https://patch.msgid.link/20260404102244.GB22575@noisy.programming.kicks-ass.net --- kernel/sched/deadline.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c index d08b00429323..674de6a48551 100644 --- a/kernel/sched/deadline.c +++ b/kernel/sched/deadline.c @@ -1027,7 +1027,7 @@ static void update_dl_entity(struct sched_dl_entity *dl_se) if (dl_time_before(dl_se->deadline, rq_clock(rq)) || dl_entity_overflow(dl_se, rq_clock(rq))) { - if (unlikely(!dl_is_implicit(dl_se) && + if (unlikely((!dl_is_implicit(dl_se) || dl_se->dl_defer) && !dl_time_before(dl_se->deadline, rq_clock(rq)) && !is_dl_boosted(dl_se))) { update_dl_revised_wakeup(dl_se, rq); From 4c71fd099513bfa8acab529b626e1f0097b76061 Mon Sep 17 00:00:00 2001 From: Sebastian Brzezinka Date: Wed, 1 Apr 2026 12:10:07 +0200 Subject: [PATCH 176/248] drm/i915/gt: fix refcount underflow in intel_engine_park_heartbeat A use-after-free / refcount underflow is possible when the heartbeat worker and intel_engine_park_heartbeat() race to release the same engine->heartbeat.systole request. The heartbeat worker reads engine->heartbeat.systole and calls i915_request_put() on it when the request is complete, but clears the pointer in a separate, non-atomic step. Concurrently, a request retirement on another CPU can drop the engine wakeref to zero, triggering __engine_park() -> intel_engine_park_heartbeat(). If the heartbeat timer is pending at that point, cancel_delayed_work() returns true and intel_engine_park_heartbeat() reads the stale non-NULL systole pointer and calls i915_request_put() on it again, causing a refcount underflow: ``` <4> [487.221889] Workqueue: i915-unordered engine_retire [i915] <4> [487.222640] RIP: 0010:refcount_warn_saturate+0x68/0xb0 ... <4> [487.222707] Call Trace: <4> [487.222711] <4> [487.222716] intel_engine_park_heartbeat.part.0+0x6f/0x80 [i915] <4> [487.223115] intel_engine_park_heartbeat+0x25/0x40 [i915] <4> [487.223566] __engine_park+0xb9/0x650 [i915] <4> [487.223973] ____intel_wakeref_put_last+0x2e/0xb0 [i915] <4> [487.224408] __intel_wakeref_put_last+0x72/0x90 [i915] <4> [487.224797] intel_context_exit_engine+0x7c/0x80 [i915] <4> [487.225238] intel_context_exit+0xf1/0x1b0 [i915] <4> [487.225695] i915_request_retire.part.0+0x1b9/0x530 [i915] <4> [487.226178] i915_request_retire+0x1c/0x40 [i915] <4> [487.226625] engine_retire+0x122/0x180 [i915] <4> [487.227037] process_one_work+0x239/0x760 <4> [487.227060] worker_thread+0x200/0x3f0 <4> [487.227068] ? __pfx_worker_thread+0x10/0x10 <4> [487.227075] kthread+0x10d/0x150 <4> [487.227083] ? __pfx_kthread+0x10/0x10 <4> [487.227092] ret_from_fork+0x3d4/0x480 <4> [487.227099] ? __pfx_kthread+0x10/0x10 <4> [487.227107] ret_from_fork_asm+0x1a/0x30 <4> [487.227141] ``` Fix this by replacing the non-atomic pointer read + separate clear with xchg() in both racing paths. xchg() is a single indivisible hardware instruction that atomically reads the old pointer and writes NULL. This guarantees only one of the two concurrent callers obtains the non-NULL pointer and performs the put, the other gets NULL and skips it. Closes: https://gitlab.freedesktop.org/drm/i915/kernel/-/work_items/15880 Fixes: 058179e72e09 ("drm/i915/gt: Replace hangcheck by heartbeats") Cc: # v5.5+ Signed-off-by: Sebastian Brzezinka Reviewed-by: Krzysztof Karas Reviewed-by: Andi Shyti Signed-off-by: Andi Shyti Link: https://lore.kernel.org/r/d4c1c14255688dd07cc8044973c4f032a8d1559e.1775038106.git.sebastian.brzezinka@intel.com (cherry picked from commit 13238dc0ee4f9ab8dafa2cca7295736191ae2f42) Signed-off-by: Joonas Lahtinen --- .../gpu/drm/i915/gt/intel_engine_heartbeat.c | 26 +++++++++++++------ 1 file changed, 18 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c b/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c index b279878dca29..6424ecce8bcb 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c @@ -148,10 +148,12 @@ static void heartbeat(struct work_struct *wrk) /* Just in case everything has gone horribly wrong, give it a kick */ intel_engine_flush_submission(engine); - rq = engine->heartbeat.systole; - if (rq && i915_request_completed(rq)) { - i915_request_put(rq); - engine->heartbeat.systole = NULL; + rq = xchg(&engine->heartbeat.systole, NULL); + if (rq) { + if (i915_request_completed(rq)) + i915_request_put(rq); + else + engine->heartbeat.systole = rq; } if (!intel_engine_pm_get_if_awake(engine)) @@ -232,8 +234,11 @@ static void heartbeat(struct work_struct *wrk) unlock: mutex_unlock(&ce->timeline->mutex); out: - if (!engine->i915->params.enable_hangcheck || !next_heartbeat(engine)) - i915_request_put(fetch_and_zero(&engine->heartbeat.systole)); + if (!engine->i915->params.enable_hangcheck || !next_heartbeat(engine)) { + rq = xchg(&engine->heartbeat.systole, NULL); + if (rq) + i915_request_put(rq); + } intel_engine_pm_put(engine); } @@ -247,8 +252,13 @@ void intel_engine_unpark_heartbeat(struct intel_engine_cs *engine) void intel_engine_park_heartbeat(struct intel_engine_cs *engine) { - if (cancel_delayed_work(&engine->heartbeat.work)) - i915_request_put(fetch_and_zero(&engine->heartbeat.systole)); + if (cancel_delayed_work(&engine->heartbeat.work)) { + struct i915_request *rq; + + rq = xchg(&engine->heartbeat.systole, NULL); + if (rq) + i915_request_put(rq); + } } void intel_gt_unpark_heartbeats(struct intel_gt *gt) From 9a91797e61d286805ae10a92cc48959c30800556 Mon Sep 17 00:00:00 2001 From: Weiming Shi Date: Wed, 1 Apr 2026 15:58:01 +0800 Subject: [PATCH 177/248] ipvs: fix NULL deref in ip_vs_add_service error path When ip_vs_bind_scheduler() succeeds in ip_vs_add_service(), the local variable sched is set to NULL. If ip_vs_start_estimator() subsequently fails, the out_err cleanup calls ip_vs_unbind_scheduler(svc, sched) with sched == NULL. ip_vs_unbind_scheduler() passes the cur_sched NULL check (because svc->scheduler was set by the successful bind) but then dereferences the NULL sched parameter at sched->done_service, causing a kernel panic at offset 0x30 from NULL. Oops: general protection fault, [..] [#1] PREEMPT SMP KASAN NOPTI KASAN: null-ptr-deref in range [0x0000000000000030-0x0000000000000037] RIP: 0010:ip_vs_unbind_scheduler (net/netfilter/ipvs/ip_vs_sched.c:69) Call Trace: ip_vs_add_service.isra.0 (net/netfilter/ipvs/ip_vs_ctl.c:1500) do_ip_vs_set_ctl (net/netfilter/ipvs/ip_vs_ctl.c:2809) nf_setsockopt (net/netfilter/nf_sockopt.c:102) [..] Fix by simply not clearing the local sched variable after a successful bind. ip_vs_unbind_scheduler() already detects whether a scheduler is installed via svc->scheduler, and keeping sched non-NULL ensures the error path passes the correct pointer to both ip_vs_unbind_scheduler() and ip_vs_scheduler_put(). While the bug is older, the problem popups in more recent kernels (6.2), when the new error path is taken after the ip_vs_start_estimator() call. Fixes: 705dd3444081 ("ipvs: use kthreads for stats estimation") Reported-by: Xiang Mei Signed-off-by: Weiming Shi Acked-by: Simon Horman Acked-by: Julian Anastasov Signed-off-by: Florian Westphal --- net/netfilter/ipvs/ip_vs_ctl.c | 1 - 1 file changed, 1 deletion(-) diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index 35642de2a0fe..2aaf50f52c8e 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -1452,7 +1452,6 @@ ip_vs_add_service(struct netns_ipvs *ipvs, struct ip_vs_service_user_kern *u, ret = ip_vs_bind_scheduler(svc, sched); if (ret) goto out_err; - sched = NULL; } ret = ip_vs_start_estimator(ipvs, &svc->stats); From 1f3083aec8836213da441270cdb1ab612dd82cf4 Mon Sep 17 00:00:00 2001 From: Xiang Mei Date: Wed, 1 Apr 2026 14:20:57 -0700 Subject: [PATCH 178/248] netfilter: nfnetlink_log: initialize nfgenmsg in NLMSG_DONE terminator When batching multiple NFLOG messages (inst->qlen > 1), __nfulnl_send() appends an NLMSG_DONE terminator with sizeof(struct nfgenmsg) payload via nlmsg_put(), but never initializes the nfgenmsg bytes. The nlmsg_put() helper only zeroes alignment padding after the payload, not the payload itself, so four bytes of stale kernel heap data are leaked to userspace in the NLMSG_DONE message body. Use nfnl_msg_put() to build the NLMSG_DONE terminator, which initializes the nfgenmsg payload via nfnl_fill_hdr(), consistent with how __build_packet_message() already constructs NFULNL_MSG_PACKET headers. Fixes: 29c5d4afba51 ("[NETFILTER]: nfnetlink_log: fix sending of multipart messages") Reported-by: Weiming Shi Signed-off-by: Xiang Mei Signed-off-by: Florian Westphal --- net/netfilter/nfnetlink_log.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c index f80978c06fa0..0db908518b2f 100644 --- a/net/netfilter/nfnetlink_log.c +++ b/net/netfilter/nfnetlink_log.c @@ -361,10 +361,10 @@ static void __nfulnl_send(struct nfulnl_instance *inst) { if (inst->qlen > 1) { - struct nlmsghdr *nlh = nlmsg_put(inst->skb, 0, 0, - NLMSG_DONE, - sizeof(struct nfgenmsg), - 0); + struct nlmsghdr *nlh = nfnl_msg_put(inst->skb, 0, 0, + NLMSG_DONE, 0, + AF_UNSPEC, NFNETLINK_V0, + htons(inst->group_num)); if (WARN_ONCE(!nlh, "bad nlskb size: %u, tailroom %d\n", inst->skb->len, skb_tailroom(inst->skb))) { kfree_skb(inst->skb); From ff64c5bfef12461df8450e0f50bb693b5269c720 Mon Sep 17 00:00:00 2001 From: Ren Wei Date: Fri, 3 Apr 2026 23:52:52 +0800 Subject: [PATCH 179/248] netfilter: xt_multiport: validate range encoding in checkentry ports_match_v1() treats any non-zero pflags entry as the start of a port range and unconditionally consumes the next ports[] element as the range end. The checkentry path currently validates protocol, flags and count, but it does not validate the range encoding itself. As a result, malformed rules can mark the last slot as a range start or place two range starts back to back, leaving ports_match_v1() to step past the last valid ports[] element while interpreting the rule. Reject malformed multiport v1 rules in checkentry by validating that each range start has a following element and that the following element is not itself marked as another range start. Fixes: a89ecb6a2ef7 ("[NETFILTER]: x_tables: unify IPv4/IPv6 multiport match") Reported-by: Yifan Wu Reported-by: Juefei Pu Co-developed-by: Yuan Tan Signed-off-by: Yuan Tan Suggested-by: Xin Liu Tested-by: Yuhang Zheng Signed-off-by: Ren Wei Signed-off-by: Florian Westphal --- net/netfilter/xt_multiport.c | 34 ++++++++++++++++++++++++++++++---- 1 file changed, 30 insertions(+), 4 deletions(-) diff --git a/net/netfilter/xt_multiport.c b/net/netfilter/xt_multiport.c index 44a00f5acde8..a1691ff405d3 100644 --- a/net/netfilter/xt_multiport.c +++ b/net/netfilter/xt_multiport.c @@ -105,6 +105,28 @@ multiport_mt(const struct sk_buff *skb, struct xt_action_param *par) return ports_match_v1(multiinfo, ntohs(pptr[0]), ntohs(pptr[1])); } +static bool +multiport_valid_ranges(const struct xt_multiport_v1 *multiinfo) +{ + unsigned int i; + + for (i = 0; i < multiinfo->count; i++) { + if (!multiinfo->pflags[i]) + continue; + + if (++i >= multiinfo->count) + return false; + + if (multiinfo->pflags[i]) + return false; + + if (multiinfo->ports[i - 1] > multiinfo->ports[i]) + return false; + } + + return true; +} + static inline bool check(u_int16_t proto, u_int8_t ip_invflags, @@ -127,8 +149,10 @@ static int multiport_mt_check(const struct xt_mtchk_param *par) const struct ipt_ip *ip = par->entryinfo; const struct xt_multiport_v1 *multiinfo = par->matchinfo; - return check(ip->proto, ip->invflags, multiinfo->flags, - multiinfo->count) ? 0 : -EINVAL; + if (!check(ip->proto, ip->invflags, multiinfo->flags, multiinfo->count)) + return -EINVAL; + + return multiport_valid_ranges(multiinfo) ? 0 : -EINVAL; } static int multiport_mt6_check(const struct xt_mtchk_param *par) @@ -136,8 +160,10 @@ static int multiport_mt6_check(const struct xt_mtchk_param *par) const struct ip6t_ip6 *ip = par->entryinfo; const struct xt_multiport_v1 *multiinfo = par->matchinfo; - return check(ip->proto, ip->invflags, multiinfo->flags, - multiinfo->count) ? 0 : -EINVAL; + if (!check(ip->proto, ip->invflags, multiinfo->flags, multiinfo->count)) + return -EINVAL; + + return multiport_valid_ranges(multiinfo) ? 0 : -EINVAL; } static struct xt_match multiport_mt_reg[] __read_mostly = { From fdce0b3590f724540795b874b4c8850c90e6b0a8 Mon Sep 17 00:00:00 2001 From: Zhengchuan Liang Date: Sat, 4 Apr 2026 17:39:47 +0800 Subject: [PATCH 180/248] netfilter: ip6t_eui64: reject invalid MAC header for all packets `eui64_mt6()` derives a modified EUI-64 from the Ethernet source address and compares it with the low 64 bits of the IPv6 source address. The existing guard only rejects an invalid MAC header when `par->fragoff != 0`. For packets with `par->fragoff == 0`, `eui64_mt6()` can still reach `eth_hdr(skb)` even when the MAC header is not valid. Fix this by removing the `par->fragoff != 0` condition so that packets with an invalid MAC header are rejected before accessing `eth_hdr(skb)`. Fixes: 1da177e4c3f41 ("Linux-2.6.12-rc2") Reported-by: Yifan Wu Reported-by: Juefei Pu Co-developed-by: Yuan Tan Signed-off-by: Yuan Tan Suggested-by: Xin Liu Tested-by: Ren Wei Signed-off-by: Zhengchuan Liang Signed-off-by: Ren Wei Signed-off-by: Florian Westphal --- net/ipv6/netfilter/ip6t_eui64.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/net/ipv6/netfilter/ip6t_eui64.c b/net/ipv6/netfilter/ip6t_eui64.c index d704f7ed300c..da69a27e8332 100644 --- a/net/ipv6/netfilter/ip6t_eui64.c +++ b/net/ipv6/netfilter/ip6t_eui64.c @@ -22,8 +22,7 @@ eui64_mt6(const struct sk_buff *skb, struct xt_action_param *par) unsigned char eui64[8]; if (!(skb_mac_header(skb) >= skb->head && - skb_mac_header(skb) + ETH_HLEN <= skb->data) && - par->fragoff != 0) { + skb_mac_header(skb) + ETH_HLEN <= skb->data)) { par->hotdrop = true; return false; } From f8dca15a1b190787bbd03285304b569631160eda Mon Sep 17 00:00:00 2001 From: Tuan Do Date: Fri, 3 Apr 2026 00:33:17 -0700 Subject: [PATCH 181/248] netfilter: nft_ct: fix use-after-free in timeout object destroy nft_ct_timeout_obj_destroy() frees the timeout object with kfree() immediately after nf_ct_untimeout(), without waiting for an RCU grace period. Concurrent packet processing on other CPUs may still hold RCU-protected references to the timeout object obtained via rcu_dereference() in nf_ct_timeout_data(). Add an rcu_head to struct nf_ct_timeout and use kfree_rcu() to defer freeing until after an RCU grace period, matching the approach already used in nfnetlink_cttimeout.c. KASAN report: BUG: KASAN: slab-use-after-free in nf_conntrack_tcp_packet+0x1381/0x29d0 Read of size 4 at addr ffff8881035fe19c by task exploit/80 Call Trace: nf_conntrack_tcp_packet+0x1381/0x29d0 nf_conntrack_in+0x612/0x8b0 nf_hook_slow+0x70/0x100 __ip_local_out+0x1b2/0x210 tcp_sendmsg_locked+0x722/0x1580 __sys_sendto+0x2d8/0x320 Allocated by task 75: nft_ct_timeout_obj_init+0xf6/0x290 nft_obj_init+0x107/0x1b0 nf_tables_newobj+0x680/0x9c0 nfnetlink_rcv_batch+0xc29/0xe00 Freed by task 26: nft_obj_destroy+0x3f/0xa0 nf_tables_trans_destroy_work+0x51c/0x5c0 process_one_work+0x2c4/0x5a0 Fixes: 7e0b2b57f01d ("netfilter: nft_ct: add ct timeout support") Cc: stable@vger.kernel.org Signed-off-by: Tuan Do Signed-off-by: Florian Westphal --- include/net/netfilter/nf_conntrack_timeout.h | 1 + net/netfilter/nft_ct.c | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/include/net/netfilter/nf_conntrack_timeout.h b/include/net/netfilter/nf_conntrack_timeout.h index 9fdaba911de6..3a66d4abb6d6 100644 --- a/include/net/netfilter/nf_conntrack_timeout.h +++ b/include/net/netfilter/nf_conntrack_timeout.h @@ -14,6 +14,7 @@ struct nf_ct_timeout { __u16 l3num; const struct nf_conntrack_l4proto *l4proto; + struct rcu_head rcu; char data[]; }; diff --git a/net/netfilter/nft_ct.c b/net/netfilter/nft_ct.c index 128ff8155b5d..04c74ccf9b84 100644 --- a/net/netfilter/nft_ct.c +++ b/net/netfilter/nft_ct.c @@ -1020,7 +1020,7 @@ static void nft_ct_timeout_obj_destroy(const struct nft_ctx *ctx, nf_queue_nf_hook_drop(ctx->net); nf_ct_untimeout(ctx->net, timeout); nf_ct_netns_put(ctx->net, ctx->family); - kfree(priv->timeout); + kfree_rcu(priv->timeout, rcu); } static int nft_ct_timeout_obj_dump(struct sk_buff *skb, From 936206e3f6ff411581e615e930263d6f8b78df9d Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 7 Apr 2026 17:00:01 +0200 Subject: [PATCH 182/248] netfilter: nfnetlink_queue: make hash table per queue Sharing a global hash table among all queues is tempting, but it can cause crash: BUG: KASAN: slab-use-after-free in nfqnl_recv_verdict+0x11ac/0x15e0 [nfnetlink_queue] [..] nfqnl_recv_verdict+0x11ac/0x15e0 [nfnetlink_queue] nfnetlink_rcv_msg+0x46a/0x930 kmem_cache_alloc_node_noprof+0x11e/0x450 struct nf_queue_entry is freed via kfree, but parallel cpu can still encounter such an nf_queue_entry when walking the list. Alternative fix is to free the nf_queue_entry via kfree_rcu() instead, but as we have to alloc/free for each skb this will cause more mem pressure. Cc: Scott Mitchell Fixes: e19079adcd26 ("netfilter: nfnetlink_queue: optimize verdict lookup with hash table") Signed-off-by: Florian Westphal --- include/net/netfilter/nf_queue.h | 1 - net/netfilter/nfnetlink_queue.c | 139 +++++++++++-------------------- 2 files changed, 49 insertions(+), 91 deletions(-) diff --git a/include/net/netfilter/nf_queue.h b/include/net/netfilter/nf_queue.h index 45eb26b2e95b..d17035d14d96 100644 --- a/include/net/netfilter/nf_queue.h +++ b/include/net/netfilter/nf_queue.h @@ -23,7 +23,6 @@ struct nf_queue_entry { struct nf_hook_state state; bool nf_ct_is_unconfirmed; u16 size; /* sizeof(entry) + saved route keys */ - u16 queue_num; /* extra space to store route keys */ }; diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c index 47f7f62906e2..8e02f84784da 100644 --- a/net/netfilter/nfnetlink_queue.c +++ b/net/netfilter/nfnetlink_queue.c @@ -49,8 +49,8 @@ #endif #define NFQNL_QMAX_DEFAULT 1024 -#define NFQNL_HASH_MIN 1024 -#define NFQNL_HASH_MAX 1048576 +#define NFQNL_HASH_MIN 8 +#define NFQNL_HASH_MAX 32768 /* We're using struct nlattr which has 16bit nla_len. Note that nla_len * includes the header length. Thus, the maximum packet length that we @@ -60,29 +60,10 @@ */ #define NFQNL_MAX_COPY_RANGE (0xffff - NLA_HDRLEN) -/* Composite key for packet lookup: (net, queue_num, packet_id) */ -struct nfqnl_packet_key { - possible_net_t net; - u32 packet_id; - u16 queue_num; -} __aligned(sizeof(u32)); /* jhash2 requires 32-bit alignment */ - -/* Global rhashtable - one for entire system, all netns */ -static struct rhashtable nfqnl_packet_map __read_mostly; - -/* Helper to initialize composite key */ -static inline void nfqnl_init_key(struct nfqnl_packet_key *key, - struct net *net, u32 packet_id, u16 queue_num) -{ - memset(key, 0, sizeof(*key)); - write_pnet(&key->net, net); - key->packet_id = packet_id; - key->queue_num = queue_num; -} - struct nfqnl_instance { struct hlist_node hlist; /* global list of queues */ - struct rcu_head rcu; + struct rhashtable nfqnl_packet_map; + struct rcu_work rwork; u32 peer_portid; unsigned int queue_maxlen; @@ -106,6 +87,7 @@ struct nfqnl_instance { typedef int (*nfqnl_cmpfn)(struct nf_queue_entry *, unsigned long); +static struct workqueue_struct *nfq_cleanup_wq __read_mostly; static unsigned int nfnl_queue_net_id __read_mostly; #define INSTANCE_BUCKETS 16 @@ -124,34 +106,10 @@ static inline u_int8_t instance_hashfn(u_int16_t queue_num) return ((queue_num >> 8) ^ queue_num) % INSTANCE_BUCKETS; } -/* Extract composite key from nf_queue_entry for hashing */ -static u32 nfqnl_packet_obj_hashfn(const void *data, u32 len, u32 seed) -{ - const struct nf_queue_entry *entry = data; - struct nfqnl_packet_key key; - - nfqnl_init_key(&key, entry->state.net, entry->id, entry->queue_num); - - return jhash2((u32 *)&key, sizeof(key) / sizeof(u32), seed); -} - -/* Compare stack-allocated key against entry */ -static int nfqnl_packet_obj_cmpfn(struct rhashtable_compare_arg *arg, - const void *obj) -{ - const struct nfqnl_packet_key *key = arg->key; - const struct nf_queue_entry *entry = obj; - - return !net_eq(entry->state.net, read_pnet(&key->net)) || - entry->queue_num != key->queue_num || - entry->id != key->packet_id; -} - static const struct rhashtable_params nfqnl_rhashtable_params = { .head_offset = offsetof(struct nf_queue_entry, hash_node), - .key_len = sizeof(struct nfqnl_packet_key), - .obj_hashfn = nfqnl_packet_obj_hashfn, - .obj_cmpfn = nfqnl_packet_obj_cmpfn, + .key_offset = offsetof(struct nf_queue_entry, id), + .key_len = sizeof(u32), .automatic_shrinking = true, .min_size = NFQNL_HASH_MIN, .max_size = NFQNL_HASH_MAX, @@ -190,6 +148,10 @@ instance_create(struct nfnl_queue_net *q, u_int16_t queue_num, u32 portid) spin_lock_init(&inst->lock); INIT_LIST_HEAD(&inst->queue_list); + err = rhashtable_init(&inst->nfqnl_packet_map, &nfqnl_rhashtable_params); + if (err < 0) + goto out_free; + spin_lock(&q->instances_lock); if (instance_lookup(q, queue_num)) { err = -EEXIST; @@ -210,6 +172,8 @@ instance_create(struct nfnl_queue_net *q, u_int16_t queue_num, u32 portid) out_unlock: spin_unlock(&q->instances_lock); + rhashtable_destroy(&inst->nfqnl_packet_map); +out_free: kfree(inst); return ERR_PTR(err); } @@ -217,15 +181,18 @@ instance_create(struct nfnl_queue_net *q, u_int16_t queue_num, u32 portid) static void nfqnl_flush(struct nfqnl_instance *queue, nfqnl_cmpfn cmpfn, unsigned long data); -static void -instance_destroy_rcu(struct rcu_head *head) +static void instance_destroy_work(struct work_struct *work) { - struct nfqnl_instance *inst = container_of(head, struct nfqnl_instance, - rcu); + struct nfqnl_instance *inst; + inst = container_of(to_rcu_work(work), struct nfqnl_instance, + rwork); rcu_read_lock(); nfqnl_flush(inst, NULL, 0); rcu_read_unlock(); + + rhashtable_destroy(&inst->nfqnl_packet_map); + kfree(inst); module_put(THIS_MODULE); } @@ -234,7 +201,9 @@ static void __instance_destroy(struct nfqnl_instance *inst) { hlist_del_rcu(&inst->hlist); - call_rcu(&inst->rcu, instance_destroy_rcu); + + INIT_RCU_WORK(&inst->rwork, instance_destroy_work); + queue_rcu_work(nfq_cleanup_wq, &inst->rwork); } static void @@ -250,9 +219,7 @@ __enqueue_entry(struct nfqnl_instance *queue, struct nf_queue_entry *entry) { int err; - entry->queue_num = queue->queue_num; - - err = rhashtable_insert_fast(&nfqnl_packet_map, &entry->hash_node, + err = rhashtable_insert_fast(&queue->nfqnl_packet_map, &entry->hash_node, nfqnl_rhashtable_params); if (unlikely(err)) return err; @@ -266,23 +233,19 @@ __enqueue_entry(struct nfqnl_instance *queue, struct nf_queue_entry *entry) static void __dequeue_entry(struct nfqnl_instance *queue, struct nf_queue_entry *entry) { - rhashtable_remove_fast(&nfqnl_packet_map, &entry->hash_node, + rhashtable_remove_fast(&queue->nfqnl_packet_map, &entry->hash_node, nfqnl_rhashtable_params); list_del(&entry->list); queue->queue_total--; } static struct nf_queue_entry * -find_dequeue_entry(struct nfqnl_instance *queue, unsigned int id, - struct net *net) +find_dequeue_entry(struct nfqnl_instance *queue, unsigned int id) { - struct nfqnl_packet_key key; struct nf_queue_entry *entry; - nfqnl_init_key(&key, net, id, queue->queue_num); - spin_lock_bh(&queue->lock); - entry = rhashtable_lookup_fast(&nfqnl_packet_map, &key, + entry = rhashtable_lookup_fast(&queue->nfqnl_packet_map, &id, nfqnl_rhashtable_params); if (entry) @@ -1531,7 +1494,7 @@ static int nfqnl_recv_verdict(struct sk_buff *skb, const struct nfnl_info *info, verdict = ntohl(vhdr->verdict); - entry = find_dequeue_entry(queue, ntohl(vhdr->id), info->net); + entry = find_dequeue_entry(queue, ntohl(vhdr->id)); if (entry == NULL) return -ENOENT; @@ -1880,40 +1843,38 @@ static int __init nfnetlink_queue_init(void) { int status; - status = rhashtable_init(&nfqnl_packet_map, &nfqnl_rhashtable_params); - if (status < 0) - return status; + nfq_cleanup_wq = alloc_ordered_workqueue("nfq_workqueue", 0); + if (!nfq_cleanup_wq) + return -ENOMEM; status = register_pernet_subsys(&nfnl_queue_net_ops); - if (status < 0) { - pr_err("failed to register pernet ops\n"); - goto cleanup_rhashtable; - } + if (status < 0) + goto cleanup_pernet_subsys; - netlink_register_notifier(&nfqnl_rtnl_notifier); - status = nfnetlink_subsys_register(&nfqnl_subsys); - if (status < 0) { - pr_err("failed to create netlink socket\n"); - goto cleanup_netlink_notifier; - } + status = netlink_register_notifier(&nfqnl_rtnl_notifier); + if (status < 0) + goto cleanup_rtnl_notifier; status = register_netdevice_notifier(&nfqnl_dev_notifier); - if (status < 0) { - pr_err("failed to register netdevice notifier\n"); - goto cleanup_netlink_subsys; - } + if (status < 0) + goto cleanup_dev_notifier; + + status = nfnetlink_subsys_register(&nfqnl_subsys); + if (status < 0) + goto cleanup_nfqnl_subsys; nf_register_queue_handler(&nfqh); return status; -cleanup_netlink_subsys: - nfnetlink_subsys_unregister(&nfqnl_subsys); -cleanup_netlink_notifier: +cleanup_nfqnl_subsys: + unregister_netdevice_notifier(&nfqnl_dev_notifier); +cleanup_dev_notifier: netlink_unregister_notifier(&nfqnl_rtnl_notifier); +cleanup_rtnl_notifier: unregister_pernet_subsys(&nfnl_queue_net_ops); -cleanup_rhashtable: - rhashtable_destroy(&nfqnl_packet_map); +cleanup_pernet_subsys: + destroy_workqueue(nfq_cleanup_wq); return status; } @@ -1924,9 +1885,7 @@ static void __exit nfnetlink_queue_fini(void) nfnetlink_subsys_unregister(&nfqnl_subsys); netlink_unregister_notifier(&nfqnl_rtnl_notifier); unregister_pernet_subsys(&nfnl_queue_net_ops); - - rhashtable_destroy(&nfqnl_packet_map); - + destroy_workqueue(nfq_cleanup_wq); rcu_barrier(); /* Wait for completion of call_rcu()'s */ } From dde1a6084c5ca9d143a562540d5453454d79ea15 Mon Sep 17 00:00:00 2001 From: Fernando Fernandez Mancera Date: Mon, 6 Apr 2026 23:18:31 +0200 Subject: [PATCH 183/248] selftests: nft_queue.sh: add a parallel stress test Introduce a new stress test to check for race conditions in the nfnetlink_queue subsystem, where an entry is freed while another CPU is concurrently walking the global rhashtable. To trigger this, `nf_queue.c` is extended with two new flags: * -O (out-of-order): Buffers packet IDs and flushes them in reverse. * -b (bogus verdicts): Floods the kernel with non-existent packet IDs. The bogus verdict loop forces the kernel's lookup function to perform full rhashtable bucket traversals (-ENOENT). Combined with reverse-order flushing and heavy parallel UDP/ping flooding across 8 queues, this puts the nfnetlink_queue code under pressure. Joint work with Florian Westphal. Signed-off-by: Fernando Fernandez Mancera Signed-off-by: Florian Westphal --- .../selftests/net/netfilter/nf_queue.c | 50 +++++++++-- .../selftests/net/netfilter/nft_queue.sh | 83 ++++++++++++++++--- 2 files changed, 115 insertions(+), 18 deletions(-) diff --git a/tools/testing/selftests/net/netfilter/nf_queue.c b/tools/testing/selftests/net/netfilter/nf_queue.c index 116c0ca0eabb..8bbec37f5356 100644 --- a/tools/testing/selftests/net/netfilter/nf_queue.c +++ b/tools/testing/selftests/net/netfilter/nf_queue.c @@ -19,6 +19,8 @@ struct options { bool count_packets; bool gso_enabled; bool failopen; + bool out_of_order; + bool bogus_verdict; int verbose; unsigned int queue_num; unsigned int timeout; @@ -31,7 +33,7 @@ static struct options opts; static void help(const char *p) { - printf("Usage: %s [-c|-v [-vv] ] [-o] [-t timeout] [-q queue_num] [-Qdst_queue ] [ -d ms_delay ] [-G]\n", p); + printf("Usage: %s [-c|-v [-vv] ] [-o] [-O] [-b] [-t timeout] [-q queue_num] [-Qdst_queue ] [ -d ms_delay ] [-G]\n", p); } static int parse_attr_cb(const struct nlattr *attr, void *data) @@ -275,7 +277,9 @@ static int mainloop(void) unsigned int buflen = 64 * 1024 + MNL_SOCKET_BUFFER_SIZE; struct mnl_socket *nl; struct nlmsghdr *nlh; + uint32_t ooo_ids[16]; unsigned int portid; + int ooo_count = 0; char *buf; int ret; @@ -308,6 +312,9 @@ static int mainloop(void) ret = mnl_cb_run(buf, ret, 0, portid, queue_cb, NULL); if (ret < 0) { + /* bogus verdict mode will generate ENOENT error messages */ + if (opts.bogus_verdict && errno == ENOENT) + continue; perror("mnl_cb_run"); exit(EXIT_FAILURE); } @@ -316,10 +323,35 @@ static int mainloop(void) if (opts.delay_ms) sleep_ms(opts.delay_ms); - nlh = nfq_build_verdict(buf, id, opts.queue_num, opts.verdict); - if (mnl_socket_sendto(nl, nlh, nlh->nlmsg_len) < 0) { - perror("mnl_socket_sendto"); - exit(EXIT_FAILURE); + if (opts.bogus_verdict) { + for (int i = 0; i < 50; i++) { + nlh = nfq_build_verdict(buf, id + 0x7FFFFFFF + i, + opts.queue_num, opts.verdict); + mnl_socket_sendto(nl, nlh, nlh->nlmsg_len); + } + } + + if (opts.out_of_order) { + ooo_ids[ooo_count] = id; + if (ooo_count >= 15) { + for (ooo_count; ooo_count >= 0; ooo_count--) { + nlh = nfq_build_verdict(buf, ooo_ids[ooo_count], + opts.queue_num, opts.verdict); + if (mnl_socket_sendto(nl, nlh, nlh->nlmsg_len) < 0) { + perror("mnl_socket_sendto"); + exit(EXIT_FAILURE); + } + } + ooo_count = 0; + } else { + ooo_count++; + } + } else { + nlh = nfq_build_verdict(buf, id, opts.queue_num, opts.verdict); + if (mnl_socket_sendto(nl, nlh, nlh->nlmsg_len) < 0) { + perror("mnl_socket_sendto"); + exit(EXIT_FAILURE); + } } } @@ -332,7 +364,7 @@ static void parse_opts(int argc, char **argv) { int c; - while ((c = getopt(argc, argv, "chvot:q:Q:d:G")) != -1) { + while ((c = getopt(argc, argv, "chvoObt:q:Q:d:G")) != -1) { switch (c) { case 'c': opts.count_packets = true; @@ -375,6 +407,12 @@ static void parse_opts(int argc, char **argv) case 'v': opts.verbose++; break; + case 'O': + opts.out_of_order = true; + break; + case 'b': + opts.bogus_verdict = true; + break; } } diff --git a/tools/testing/selftests/net/netfilter/nft_queue.sh b/tools/testing/selftests/net/netfilter/nft_queue.sh index ea766bdc5d04..d80390848e85 100755 --- a/tools/testing/selftests/net/netfilter/nft_queue.sh +++ b/tools/testing/selftests/net/netfilter/nft_queue.sh @@ -11,6 +11,7 @@ ret=0 timeout=5 SCTP_TEST_TIMEOUT=60 +STRESS_TEST_TIMEOUT=30 cleanup() { @@ -719,6 +720,74 @@ EOF fi } +check_tainted() +{ + local msg="$1" + + if [ "$tainted_then" -ne 0 ];then + return + fi + + read tainted_now < /proc/sys/kernel/tainted + if [ "$tainted_now" -eq 0 ];then + echo "PASS: $msg" + else + echo "TAINT: $msg" + dmesg + ret=1 + fi +} + +test_queue_stress() +{ + read tainted_then < /proc/sys/kernel/tainted + local i + + ip netns exec "$nsrouter" nft -f /dev/stdin < /dev/null & + + timeout "$STRESS_TEST_TIMEOUT" ip netns exec "$ns3" \ + socat -u UDP-LISTEN:12345,fork,pf=ipv4 STDOUT > /dev/null & + + for i in $(seq 0 7); do + ip netns exec "$nsrouter" timeout "$STRESS_TEST_TIMEOUT" \ + ./nf_queue -q $i -t 2 -O -b > /dev/null & + done + + ip netns exec "$ns1" timeout "$STRESS_TEST_TIMEOUT" \ + ping -q -f 10.0.2.99 > /dev/null 2>&1 & + ip netns exec "$ns1" timeout "$STRESS_TEST_TIMEOUT" \ + ping -q -f 10.0.3.99 > /dev/null 2>&1 & + ip netns exec "$ns1" timeout "$STRESS_TEST_TIMEOUT" \ + ping -q -f "dead:2::99" > /dev/null 2>&1 & + ip netns exec "$ns1" timeout "$STRESS_TEST_TIMEOUT" \ + ping -q -f "dead:3::99" > /dev/null 2>&1 & + + busywait "$BUSYWAIT_TIMEOUT" udp_listener_ready "$ns2" 12345 + busywait "$BUSYWAIT_TIMEOUT" udp_listener_ready "$ns3" 12345 + + for i in $(seq 1 4);do + ip netns exec "$ns1" timeout "$STRESS_TEST_TIMEOUT" \ + socat -u STDIN UDP-DATAGRAM:10.0.2.99:12345 < /dev/zero > /dev/null & + ip netns exec "$ns1" timeout "$STRESS_TEST_TIMEOUT" \ + socat -u STDIN UDP-DATAGRAM:10.0.3.99:12345 < /dev/zero > /dev/null & + done + + wait + + check_tainted "concurrent queueing" +} + test_queue_removal() { read tainted_then < /proc/sys/kernel/tainted @@ -742,18 +811,7 @@ EOF ip netns exec "$ns1" nft flush ruleset - if [ "$tainted_then" -ne 0 ];then - return - fi - - read tainted_now < /proc/sys/kernel/tainted - if [ "$tainted_now" -eq 0 ];then - echo "PASS: queue program exiting while packets queued" - else - echo "TAINT: queue program exiting while packets queued" - dmesg - ret=1 - fi + check_tainted "queue program exiting while packets queued" } ip netns exec "$nsrouter" sysctl net.ipv6.conf.all.forwarding=1 > /dev/null @@ -799,6 +857,7 @@ test_sctp_forward test_sctp_output test_udp_nat_race test_udp_gro_ct +test_queue_stress # should be last, adds vrf device in ns1 and changes routes test_icmp_vrf From 5241c2ca33bb181bf7abb7cb4bba1cc67d1b6278 Mon Sep 17 00:00:00 2001 From: Thomas Zimmermann Date: Thu, 2 Apr 2026 11:09:16 +0200 Subject: [PATCH 184/248] firmware: efi: Never declare sysfb_primary_display on x86 The x86 architecture comes with its own instance of the global state variable sysfb_primary_display. Never declare it in the EFI subsystem. Fix the test for CONFIG_FIRMWARE_EDID accordingly. Signed-off-by: Thomas Zimmermann Fixes: e65ca1646311 ("efi: export sysfb_primary_display for EDID") Signed-off-by: Ard Biesheuvel --- drivers/firmware/efi/efi-init.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/firmware/efi/efi-init.c b/drivers/firmware/efi/efi-init.c index 5a595d026f58..6103b1a082d2 100644 --- a/drivers/firmware/efi/efi-init.c +++ b/drivers/firmware/efi/efi-init.c @@ -60,7 +60,7 @@ extern __weak const efi_config_table_type_t efi_arch_tables[]; * x86 defines its own instance of sysfb_primary_display and uses * it even without EFI, everything else can get them from here. */ -#if !defined(CONFIG_X86) && (defined(CONFIG_SYSFB) || defined(CONFIG_EFI_EARLYCON)) || defined(CONFIG_FIRMWARE_EDID) +#if !defined(CONFIG_X86) && (defined(CONFIG_SYSFB) || defined(CONFIG_EFI_EARLYCON) || defined(CONFIG_FIRMWARE_EDID)) struct sysfb_display_info sysfb_primary_display __section(".data"); EXPORT_SYMBOL_GPL(sysfb_primary_display); #endif From ff14dafde15c11403fac61367a34fea08926e9ee Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Tue, 7 Apr 2026 22:16:27 -0700 Subject: [PATCH 185/248] Input: uinput - take event lock when submitting FF request "event" To avoid racing with FF playback events and corrupting device's event queue take event_lock spinlock when calling uinput_dev_event() when submitting a FF upload or erase "event". Tested-by: Mikhail Gavrilov Link: https://patch.msgid.link/adXkf6MWzlB8LA_s@google.com Cc: stable@vger.kernel.org Signed-off-by: Dmitry Torokhov --- drivers/input/misc/uinput.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/input/misc/uinput.c b/drivers/input/misc/uinput.c index a973e82205b5..0e9544a98e67 100644 --- a/drivers/input/misc/uinput.c +++ b/drivers/input/misc/uinput.c @@ -25,8 +25,10 @@ #include #include #include +#include #include #include +#include #include #include "../input-compat.h" @@ -76,6 +78,8 @@ static int uinput_dev_event(struct input_dev *dev, struct uinput_device *udev = input_get_drvdata(dev); struct timespec64 ts; + lockdep_assert_held(&dev->event_lock); + ktime_get_ts64(&ts); udev->buff[udev->head] = (struct input_event) { @@ -147,6 +151,7 @@ static void uinput_request_release_slot(struct uinput_device *udev, static int uinput_request_send(struct uinput_device *udev, struct uinput_request *request) { + unsigned long flags; int retval = 0; spin_lock(&udev->state_lock); @@ -160,7 +165,9 @@ static int uinput_request_send(struct uinput_device *udev, * Tell our userspace application about this new request * by queueing an input event. */ + spin_lock_irqsave(&udev->dev->event_lock, flags); uinput_dev_event(udev->dev, EV_UINPUT, request->code, request->id); + spin_unlock_irqrestore(&udev->dev->event_lock, flags); out: spin_unlock(&udev->state_lock); From 52f657e34d7b21b47434d9d8b26fa7f6778b63a0 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Wed, 8 Apr 2026 13:18:57 -0700 Subject: [PATCH 186/248] x86: shadow stacks: proper error handling for mmap lock MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 김영민 reports that shstk_pop_sigframe() doesn't check for errors from mmap_read_lock_killable(), which is a silly oversight, and also shows that we haven't marked those functions with "__must_check", which would have immediately caught it. So let's fix both issues. Reported-by: 김영민 Acked-by: Oleg Nesterov Acked-by: Dave Hansen Acked-by: Rick Edgecombe Signed-off-by: Linus Torvalds --- arch/x86/kernel/shstk.c | 3 ++- include/linux/mmap_lock.h | 6 +++--- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/arch/x86/kernel/shstk.c b/arch/x86/kernel/shstk.c index 978232b6d48d..ff8edea8511b 100644 --- a/arch/x86/kernel/shstk.c +++ b/arch/x86/kernel/shstk.c @@ -351,7 +351,8 @@ static int shstk_pop_sigframe(unsigned long *ssp) need_to_check_vma = PAGE_ALIGN(*ssp) == *ssp; if (need_to_check_vma) - mmap_read_lock_killable(current->mm); + if (mmap_read_lock_killable(current->mm)) + return -EINTR; err = get_shstk_data(&token_addr, (unsigned long __user *)*ssp); if (unlikely(err)) diff --git a/include/linux/mmap_lock.h b/include/linux/mmap_lock.h index 93eca48bc443..04b8f61ece5d 100644 --- a/include/linux/mmap_lock.h +++ b/include/linux/mmap_lock.h @@ -546,7 +546,7 @@ static inline void mmap_write_lock_nested(struct mm_struct *mm, int subclass) __mmap_lock_trace_acquire_returned(mm, true, true); } -static inline int mmap_write_lock_killable(struct mm_struct *mm) +static inline int __must_check mmap_write_lock_killable(struct mm_struct *mm) { int ret; @@ -593,7 +593,7 @@ static inline void mmap_read_lock(struct mm_struct *mm) __mmap_lock_trace_acquire_returned(mm, false, true); } -static inline int mmap_read_lock_killable(struct mm_struct *mm) +static inline int __must_check mmap_read_lock_killable(struct mm_struct *mm) { int ret; @@ -603,7 +603,7 @@ static inline int mmap_read_lock_killable(struct mm_struct *mm) return ret; } -static inline bool mmap_read_trylock(struct mm_struct *mm) +static inline bool __must_check mmap_read_trylock(struct mm_struct *mm) { bool ret; From c09ea768bdb975e828f8e17293c397c3d14ad85d Mon Sep 17 00:00:00 2001 From: Felix Gu Date: Sun, 5 Apr 2026 14:51:52 +0800 Subject: [PATCH 187/248] net: mdio: realtek-rtl9300: use scoped device_for_each_child_node loop Switch to device_for_each_child_node_scoped() to auto-release fwnode references on early exit. Fixes: 24e31e474769 ("net: mdio: Add RTL9300 MDIO driver") Signed-off-by: Felix Gu Reviewed-by: Andrew Lunn Link: https://patch.msgid.link/20260405-rtl9300-v1-1-08e4499cf944@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/mdio/mdio-realtek-rtl9300.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/net/mdio/mdio-realtek-rtl9300.c b/drivers/net/mdio/mdio-realtek-rtl9300.c index 405a07075dd1..8d5fb014ca06 100644 --- a/drivers/net/mdio/mdio-realtek-rtl9300.c +++ b/drivers/net/mdio/mdio-realtek-rtl9300.c @@ -466,7 +466,6 @@ static int rtl9300_mdiobus_probe(struct platform_device *pdev) { struct device *dev = &pdev->dev; struct rtl9300_mdio_priv *priv; - struct fwnode_handle *child; int err; priv = devm_kzalloc(dev, sizeof(*priv), GFP_KERNEL); @@ -487,7 +486,7 @@ static int rtl9300_mdiobus_probe(struct platform_device *pdev) if (err) return err; - device_for_each_child_node(dev, child) { + device_for_each_child_node_scoped(dev, child) { err = rtl9300_mdiobus_probe_one(dev, priv, child); if (err) return err; From bdbfead6d38979475df0c2f4bad2b19394fe9bdc Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 8 Apr 2026 13:12:29 +0100 Subject: [PATCH 188/248] rxrpc: Fix key quota calculation for multitoken keys In the rxrpc key preparsing, every token extracted sets the proposed quota value, but for multitoken keys, this will overwrite the previous proposed quota, losing it. Fix this by adding to the proposed quota instead. Fixes: 8a7a3eb4ddbe ("KEYS: RxRPC: Use key preparsing") Closes: https://sashiko.dev/#/patchset/20260319150150.4189381-1-dhowells%40redhat.com Signed-off-by: David Howells cc: Marc Dionne cc: Jeffrey Altman cc: Simon Horman cc: linux-afs@lists.infradead.org cc: stable@kernel.org Link: https://patch.msgid.link/20260408121252.2249051-2-dhowells@redhat.com Signed-off-by: Jakub Kicinski --- net/rxrpc/key.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/net/rxrpc/key.c b/net/rxrpc/key.c index 85078114b2dd..af403f0ccab5 100644 --- a/net/rxrpc/key.c +++ b/net/rxrpc/key.c @@ -72,7 +72,7 @@ static int rxrpc_preparse_xdr_rxkad(struct key_preparsed_payload *prep, return -EKEYREJECTED; plen = sizeof(*token) + sizeof(*token->kad) + tktlen; - prep->quotalen = datalen + plen; + prep->quotalen += datalen + plen; plen -= sizeof(*token); token = kzalloc_obj(*token); @@ -199,7 +199,7 @@ static int rxrpc_preparse_xdr_yfs_rxgk(struct key_preparsed_payload *prep, } plen = sizeof(*token) + sizeof(*token->rxgk) + tktlen + keylen; - prep->quotalen = datalen + plen; + prep->quotalen += datalen + plen; plen -= sizeof(*token); token = kzalloc_obj(*token); @@ -460,6 +460,7 @@ static int rxrpc_preparse(struct key_preparsed_payload *prep) memcpy(&kver, prep->data, sizeof(kver)); prep->data += sizeof(kver); prep->datalen -= sizeof(kver); + prep->quotalen = 0; _debug("KEY I/F VERSION: %u", kver); @@ -497,7 +498,7 @@ static int rxrpc_preparse(struct key_preparsed_payload *prep) goto error; plen = sizeof(*token->kad) + v1->ticket_length; - prep->quotalen = plen + sizeof(*token); + prep->quotalen += plen + sizeof(*token); ret = -ENOMEM; token = kzalloc_obj(*token); From b555912b9b21075e8298015f888ffe3ff60b1a97 Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 8 Apr 2026 13:12:30 +0100 Subject: [PATCH 189/248] rxrpc: Fix key parsing memleak In rxrpc_preparse_xdr_yfs_rxgk(), the memory attached to token->rxgk can be leaked in a few error paths after it's allocated. Fix this by freeing it in the "reject_token:" case. Fixes: 0ca100ff4df6 ("rxrpc: Add YFS RxGK (GSSAPI) security class") Closes: https://sashiko.dev/#/patchset/20260319150150.4189381-1-dhowells%40redhat.com Signed-off-by: David Howells cc: Marc Dionne cc: Jeffrey Altman cc: Simon Horman cc: linux-afs@lists.infradead.org cc: stable@kernel.org Link: https://patch.msgid.link/20260408121252.2249051-3-dhowells@redhat.com Signed-off-by: Jakub Kicinski --- net/rxrpc/key.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/rxrpc/key.c b/net/rxrpc/key.c index af403f0ccab5..26d4336a4a02 100644 --- a/net/rxrpc/key.c +++ b/net/rxrpc/key.c @@ -274,6 +274,7 @@ static int rxrpc_preparse_xdr_yfs_rxgk(struct key_preparsed_payload *prep, nomem: return -ENOMEM; reject_token: + kfree(token->rxgk); kfree(token); reject: return -EKEYREJECTED; From 6a59d84b4fc2f27f7b40e348506cc686712e260b Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 8 Apr 2026 13:12:31 +0100 Subject: [PATCH 190/248] rxrpc: Fix anonymous key handling In rxrpc_new_client_call_for_sendmsg(), a key with no payload is meant to be substituted for a NULL key pointer, but the variable this is done with is subsequently not used. Fix this by using "key" rather than "rx->key" when filling in the connection parameters. Note that this only affects direct use of AF_RXRPC; the kAFS filesystem doesn't use sendmsg() directly and so bypasses the issue. Further, AF_RXRPC passes a NULL key in if no key is set, so using an anonymous key in that manner works. Since this hasn't been noticed to this point, it might be better just to remove the "key" variable and the code that sets it - and, arguably, rxrpc_init_client_call_security() would be a better place to handle it. Fixes: 19ffa01c9c45 ("rxrpc: Use structs to hold connection params and protocol info") Closes: https://sashiko.dev/#/patchset/20260319150150.4189381-1-dhowells%40redhat.com Signed-off-by: David Howells cc: Marc Dionne cc: Jeffrey Altman cc: Simon Horman cc: linux-afs@lists.infradead.org cc: stable@kernel.org Link: https://patch.msgid.link/20260408121252.2249051-4-dhowells@redhat.com Signed-off-by: Jakub Kicinski --- net/rxrpc/sendmsg.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/rxrpc/sendmsg.c b/net/rxrpc/sendmsg.c index 04f9c5f2dc24..c35de4fd75e3 100644 --- a/net/rxrpc/sendmsg.c +++ b/net/rxrpc/sendmsg.c @@ -637,7 +637,7 @@ rxrpc_new_client_call_for_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, memset(&cp, 0, sizeof(cp)); cp.local = rx->local; cp.peer = peer; - cp.key = rx->key; + cp.key = key; cp.security_level = rx->min_sec_level; cp.exclusive = rx->exclusive | p->exclusive; cp.upgrade = p->upgrade; From 146d4ab94cf129ee06cd467cb5c71368a6b5bad6 Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 8 Apr 2026 13:12:32 +0100 Subject: [PATCH 191/248] rxrpc: Fix call removal to use RCU safe deletion Fix rxrpc call removal from the rxnet->calls list to use list_del_rcu() rather than list_del_init() to prevent stuffing up reading /proc/net/rxrpc/calls from potentially getting into an infinite loop. This, however, means that list_empty() no longer works on an entry that's been deleted from the list, making it harder to detect prior deletion. Fix this by: Firstly, make rxrpc_destroy_all_calls() only dump the first ten calls that are unexpectedly still on the list. Limiting the number of steps means there's no need to call cond_resched() or to remove calls from the list here, thereby eliminating the need for rxrpc_put_call() to check for that. rxrpc_put_call() can then be fixed to unconditionally delete the call from the list as it is the only place that the deletion occurs. Fixes: 2baec2c3f854 ("rxrpc: Support network namespacing") Closes: https://sashiko.dev/#/patchset/20260319150150.4189381-1-dhowells%40redhat.com Signed-off-by: David Howells cc: Marc Dionne cc: Jeffrey Altman cc: Linus Torvalds cc: Simon Horman cc: linux-afs@lists.infradead.org cc: stable@kernel.org Link: https://patch.msgid.link/20260408121252.2249051-5-dhowells@redhat.com Signed-off-by: Jakub Kicinski --- include/trace/events/rxrpc.h | 2 +- net/rxrpc/call_object.c | 24 +++++++++--------------- 2 files changed, 10 insertions(+), 16 deletions(-) diff --git a/include/trace/events/rxrpc.h b/include/trace/events/rxrpc.h index 869f97c9bf73..a826cd80007b 100644 --- a/include/trace/events/rxrpc.h +++ b/include/trace/events/rxrpc.h @@ -347,7 +347,7 @@ EM(rxrpc_call_see_release, "SEE release ") \ EM(rxrpc_call_see_userid_exists, "SEE u-exists") \ EM(rxrpc_call_see_waiting_call, "SEE q-conn ") \ - E_(rxrpc_call_see_zap, "SEE zap ") + E_(rxrpc_call_see_still_live, "SEE !still-l") #define rxrpc_txqueue_traces \ EM(rxrpc_txqueue_await_reply, "AWR") \ diff --git a/net/rxrpc/call_object.c b/net/rxrpc/call_object.c index 918f41d97a2f..59329cfe1532 100644 --- a/net/rxrpc/call_object.c +++ b/net/rxrpc/call_object.c @@ -654,11 +654,9 @@ void rxrpc_put_call(struct rxrpc_call *call, enum rxrpc_call_trace why) if (dead) { ASSERTCMP(__rxrpc_call_state(call), ==, RXRPC_CALL_COMPLETE); - if (!list_empty(&call->link)) { - spin_lock(&rxnet->call_lock); - list_del_init(&call->link); - spin_unlock(&rxnet->call_lock); - } + spin_lock(&rxnet->call_lock); + list_del_rcu(&call->link); + spin_unlock(&rxnet->call_lock); rxrpc_cleanup_call(call); } @@ -730,24 +728,20 @@ void rxrpc_destroy_all_calls(struct rxrpc_net *rxnet) _enter(""); if (!list_empty(&rxnet->calls)) { + int shown = 0; + spin_lock(&rxnet->call_lock); - while (!list_empty(&rxnet->calls)) { - call = list_entry(rxnet->calls.next, - struct rxrpc_call, link); - _debug("Zapping call %p", call); - - rxrpc_see_call(call, rxrpc_call_see_zap); - list_del_init(&call->link); + list_for_each_entry(call, &rxnet->calls, link) { + rxrpc_see_call(call, rxrpc_call_see_still_live); pr_err("Call %p still in use (%d,%s,%lx,%lx)!\n", call, refcount_read(&call->ref), rxrpc_call_states[__rxrpc_call_state(call)], call->flags, call->events); - spin_unlock(&rxnet->call_lock); - cond_resched(); - spin_lock(&rxnet->call_lock); + if (++shown >= 10) + break; } spin_unlock(&rxnet->call_lock); From d179a868dd755b0cfcf7582e00943d702b9943b8 Mon Sep 17 00:00:00 2001 From: Oleh Konko Date: Wed, 8 Apr 2026 13:12:33 +0100 Subject: [PATCH 192/248] rxrpc: Fix RxGK token loading to check bounds rxrpc_preparse_xdr_yfs_rxgk() reads the raw key length and ticket length from the XDR token as u32 values and passes each through round_up(x, 4) before using the rounded value for validation and allocation. When the raw length is >= 0xfffffffd, round_up() wraps to 0, so the bounds check and kzalloc both use 0 while the subsequent memcpy still copies the original ~4 GiB value, producing a heap buffer overflow reachable from an unprivileged add_key() call. Fix this by: (1) Rejecting raw key lengths above AFSTOKEN_GK_KEY_MAX and raw ticket lengths above AFSTOKEN_GK_TOKEN_MAX before rounding, consistent with the caps that the RxKAD path already enforces via AFSTOKEN_RK_TIX_MAX. (2) Sizing the flexible-array allocation from the validated raw key length via struct_size_t() instead of the rounded value. (3) Caching the raw lengths so that the later field assignments and memcpy calls do not re-read from the token, eliminating a class of TOCTOU re-parse. The control path (valid token with lengths within bounds) is unaffected. Fixes: 0ca100ff4df6 ("rxrpc: Add YFS RxGK (GSSAPI) security class") Signed-off-by: Oleh Konko Signed-off-by: David Howells Reviewed-by: Jeffrey Altman cc: Marc Dionne cc: Simon Horman cc: linux-afs@lists.infradead.org cc: stable@kernel.org Link: https://patch.msgid.link/20260408121252.2249051-6-dhowells@redhat.com Signed-off-by: Jakub Kicinski --- net/rxrpc/key.c | 30 +++++++++++++++++------------- 1 file changed, 17 insertions(+), 13 deletions(-) diff --git a/net/rxrpc/key.c b/net/rxrpc/key.c index 26d4336a4a02..77237a82be3b 100644 --- a/net/rxrpc/key.c +++ b/net/rxrpc/key.c @@ -13,6 +13,7 @@ #include #include #include +#include #include #include #include @@ -171,7 +172,7 @@ static int rxrpc_preparse_xdr_yfs_rxgk(struct key_preparsed_payload *prep, size_t plen; const __be32 *ticket, *key; s64 tmp; - u32 tktlen, keylen; + size_t raw_keylen, raw_tktlen, keylen, tktlen; _enter(",{%x,%x,%x,%x},%x", ntohl(xdr[0]), ntohl(xdr[1]), ntohl(xdr[2]), ntohl(xdr[3]), @@ -181,18 +182,22 @@ static int rxrpc_preparse_xdr_yfs_rxgk(struct key_preparsed_payload *prep, goto reject; key = xdr + (6 * 2 + 1); - keylen = ntohl(key[-1]); - _debug("keylen: %x", keylen); - keylen = round_up(keylen, 4); + raw_keylen = ntohl(key[-1]); + _debug("keylen: %zx", raw_keylen); + if (raw_keylen > AFSTOKEN_GK_KEY_MAX) + goto reject; + keylen = round_up(raw_keylen, 4); if ((6 * 2 + 2) * 4 + keylen > toklen) goto reject; ticket = xdr + (6 * 2 + 1 + (keylen / 4) + 1); - tktlen = ntohl(ticket[-1]); - _debug("tktlen: %x", tktlen); - tktlen = round_up(tktlen, 4); + raw_tktlen = ntohl(ticket[-1]); + _debug("tktlen: %zx", raw_tktlen); + if (raw_tktlen > AFSTOKEN_GK_TOKEN_MAX) + goto reject; + tktlen = round_up(raw_tktlen, 4); if ((6 * 2 + 2) * 4 + keylen + tktlen != toklen) { - kleave(" = -EKEYREJECTED [%x!=%x, %x,%x]", + kleave(" = -EKEYREJECTED [%zx!=%x, %zx,%zx]", (6 * 2 + 2) * 4 + keylen + tktlen, toklen, keylen, tktlen); goto reject; @@ -206,7 +211,7 @@ static int rxrpc_preparse_xdr_yfs_rxgk(struct key_preparsed_payload *prep, if (!token) goto nomem; - token->rxgk = kzalloc(sizeof(*token->rxgk) + keylen, GFP_KERNEL); + token->rxgk = kzalloc(struct_size_t(struct rxgk_key, _key, raw_keylen), GFP_KERNEL); if (!token->rxgk) goto nomem_token; @@ -221,9 +226,9 @@ static int rxrpc_preparse_xdr_yfs_rxgk(struct key_preparsed_payload *prep, token->rxgk->enctype = tmp = xdr_dec64(xdr + 5 * 2); if (tmp < 0 || tmp > UINT_MAX) goto reject_token; - token->rxgk->key.len = ntohl(key[-1]); + token->rxgk->key.len = raw_keylen; token->rxgk->key.data = token->rxgk->_key; - token->rxgk->ticket.len = ntohl(ticket[-1]); + token->rxgk->ticket.len = raw_tktlen; if (token->rxgk->endtime != 0) { expiry = rxrpc_s64_to_time64(token->rxgk->endtime); @@ -236,8 +241,7 @@ static int rxrpc_preparse_xdr_yfs_rxgk(struct key_preparsed_payload *prep, memcpy(token->rxgk->key.data, key, token->rxgk->key.len); /* Pad the ticket so that we can use it directly in XDR */ - token->rxgk->ticket.data = kzalloc(round_up(token->rxgk->ticket.len, 4), - GFP_KERNEL); + token->rxgk->ticket.data = kzalloc(tktlen, GFP_KERNEL); if (!token->rxgk->ticket.data) goto nomem_yrxgk; memcpy(token->rxgk->ticket.data, ticket, token->rxgk->ticket.len); From b33f5741bb187db8ff32e8f5b96def77cc94dfca Mon Sep 17 00:00:00 2001 From: Alok Tiwari Date: Wed, 8 Apr 2026 13:12:34 +0100 Subject: [PATCH 193/248] rxrpc: Fix use of wrong skb when comparing queued RESP challenge serial In rxrpc_post_response(), the code should be comparing the challenge serial number from the cached response before deciding to switch to a newer response, but looks at the newer packet private data instead, rendering the comparison always false. Fix this by switching to look at the older packet. Fix further[1] to substitute the new packet in place of the old one if newer and also to release whichever we don't use. Fixes: 5800b1cf3fd8 ("rxrpc: Allow CHALLENGEs to the passed to the app for a RESPONSE") Signed-off-by: Alok Tiwari Signed-off-by: David Howells Reviewed-by: Jeffrey Altman cc: Marc Dionne cc: Simon Horman cc: linux-afs@lists.infradead.org cc: stable@kernel.org Link: https://sashiko.dev/#/patchset/20260319150150.4189381-1-dhowells%40redhat.com [1] Link: https://patch.msgid.link/20260408121252.2249051-7-dhowells@redhat.com Signed-off-by: Jakub Kicinski --- include/trace/events/rxrpc.h | 1 + net/rxrpc/conn_event.c | 5 +++-- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/include/trace/events/rxrpc.h b/include/trace/events/rxrpc.h index a826cd80007b..f7f559204b87 100644 --- a/include/trace/events/rxrpc.h +++ b/include/trace/events/rxrpc.h @@ -185,6 +185,7 @@ EM(rxrpc_skb_put_input, "PUT input ") \ EM(rxrpc_skb_put_jumbo_subpacket, "PUT jumbo-sub") \ EM(rxrpc_skb_put_oob, "PUT oob ") \ + EM(rxrpc_skb_put_old_response, "PUT old-resp ") \ EM(rxrpc_skb_put_purge, "PUT purge ") \ EM(rxrpc_skb_put_purge_oob, "PUT purge-oob") \ EM(rxrpc_skb_put_response, "PUT response ") \ diff --git a/net/rxrpc/conn_event.c b/net/rxrpc/conn_event.c index 98ad9b51ca2c..c50cbfc5a313 100644 --- a/net/rxrpc/conn_event.c +++ b/net/rxrpc/conn_event.c @@ -557,11 +557,11 @@ void rxrpc_post_response(struct rxrpc_connection *conn, struct sk_buff *skb) spin_lock_irq(&local->lock); old = conn->tx_response; if (old) { - struct rxrpc_skb_priv *osp = rxrpc_skb(skb); + struct rxrpc_skb_priv *osp = rxrpc_skb(old); /* Always go with the response to the most recent challenge. */ if (after(sp->resp.challenge_serial, osp->resp.challenge_serial)) - conn->tx_response = old; + conn->tx_response = skb; else old = skb; } else { @@ -569,4 +569,5 @@ void rxrpc_post_response(struct rxrpc_connection *conn, struct sk_buff *skb) } spin_unlock_irq(&local->lock); rxrpc_poke_conn(conn, rxrpc_conn_get_poke_response); + rxrpc_free_skb(old, rxrpc_skb_put_old_response); } From 65b3ffe0972ed023acc3981a0f7e1ae5d0208bd3 Mon Sep 17 00:00:00 2001 From: Alok Tiwari Date: Wed, 8 Apr 2026 13:12:35 +0100 Subject: [PATCH 194/248] rxrpc: Fix rack timer warning to report unexpected mode rxrpc_rack_timer_expired() clears call->rack_timer_mode to OFF before the switch. The default case warning therefore always prints OFF and doesn't identify the unexpected timer mode. Log the saved mode value instead so the warning reports the actual unexpected rack timer mode. Fixes: 7c482665931b ("rxrpc: Implement RACK/TLP to deal with transmission stalls [RFC8985]") Signed-off-by: Alok Tiwari Signed-off-by: David Howells Reviewed-by: Simon Horman Reviewed-by: Jeffrey Altman cc: Marc Dionne cc: linux-afs@lists.infradead.org cc: stable@kernel.org Link: https://patch.msgid.link/20260408121252.2249051-8-dhowells@redhat.com Signed-off-by: Jakub Kicinski --- net/rxrpc/input_rack.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/rxrpc/input_rack.c b/net/rxrpc/input_rack.c index 13c371261e0a..9eb109ffba56 100644 --- a/net/rxrpc/input_rack.c +++ b/net/rxrpc/input_rack.c @@ -413,6 +413,6 @@ void rxrpc_rack_timer_expired(struct rxrpc_call *call, ktime_t overran_by) break; //case RXRPC_CALL_RACKTIMER_ZEROWIN: default: - pr_warn("Unexpected rack timer %u", call->rack_timer_mode); + pr_warn("Unexpected rack timer %u", mode); } } From d666540d217e8d420544ebdfbadeedd623562733 Mon Sep 17 00:00:00 2001 From: Anderson Nascimento Date: Wed, 8 Apr 2026 13:12:36 +0100 Subject: [PATCH 195/248] rxrpc: Fix key reference count leak from call->key When creating a client call in rxrpc_alloc_client_call(), the code obtains a reference to the key. This is never cleaned up and gets leaked when the call is destroyed. Fix this by freeing call->key in rxrpc_destroy_call(). Before the patch, it shows the key reference counter elevated: $ cat /proc/keys | grep afs@54321 1bffe9cd I--Q--i 8053480 4169w 3b010000 1000 1000 rxrpc afs@54321: ka $ After the patch, the invalidated key is removed when the code exits: $ cat /proc/keys | grep afs@54321 $ Fixes: f3441d4125fc ("rxrpc: Copy client call parameters into rxrpc_call earlier") Signed-off-by: Anderson Nascimento Co-developed-by: David Howells Signed-off-by: David Howells Reviewed-by: Jeffrey Altman cc: Marc Dionne cc: Simon Horman cc: linux-afs@lists.infradead.org cc: stable@kernel.org Link: https://patch.msgid.link/20260408121252.2249051-9-dhowells@redhat.com Signed-off-by: Jakub Kicinski --- net/rxrpc/call_object.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/rxrpc/call_object.c b/net/rxrpc/call_object.c index 59329cfe1532..f035f486c139 100644 --- a/net/rxrpc/call_object.c +++ b/net/rxrpc/call_object.c @@ -692,6 +692,7 @@ static void rxrpc_destroy_call(struct work_struct *work) rxrpc_put_bundle(call->bundle, rxrpc_bundle_put_call); rxrpc_put_peer(call->peer, rxrpc_peer_put_call); rxrpc_put_local(call->local, rxrpc_local_put_call); + key_put(call->key); call_rcu(&call->rcu, rxrpc_rcu_free_call); } From 0cd3e3f3f2ec1a45aa559e2c0f3d57fac5eb3c25 Mon Sep 17 00:00:00 2001 From: Marc Dionne Date: Wed, 8 Apr 2026 13:12:37 +0100 Subject: [PATCH 196/248] rxrpc: Fix to request an ack if window is limited Peers may only send immediate acks for every 2 UDP packets received. When sending a jumbogram, it is important to check that there is sufficient window space to send another same sized jumbogram following the current one, and request an ack if there isn't. Failure to do so may cause the call to stall waiting for an ack until the resend timer fires. Where jumbograms are in use this causes a very significant drop in performance. Fixes: fe24a5494390 ("rxrpc: Send jumbo DATA packets") Signed-off-by: Marc Dionne Signed-off-by: David Howells cc: Jeffrey Altman cc: Simon Horman cc: linux-afs@lists.infradead.org cc: stable@kernel.org Link: https://patch.msgid.link/20260408121252.2249051-10-dhowells@redhat.com Signed-off-by: Jakub Kicinski --- include/trace/events/rxrpc.h | 1 + net/rxrpc/ar-internal.h | 2 +- net/rxrpc/output.c | 2 ++ net/rxrpc/proc.c | 5 +++-- 4 files changed, 7 insertions(+), 3 deletions(-) diff --git a/include/trace/events/rxrpc.h b/include/trace/events/rxrpc.h index f7f559204b87..578b8038b211 100644 --- a/include/trace/events/rxrpc.h +++ b/include/trace/events/rxrpc.h @@ -521,6 +521,7 @@ #define rxrpc_req_ack_traces \ EM(rxrpc_reqack_ack_lost, "ACK-LOST ") \ EM(rxrpc_reqack_app_stall, "APP-STALL ") \ + EM(rxrpc_reqack_jumbo_win, "JUMBO-WIN ") \ EM(rxrpc_reqack_more_rtt, "MORE-RTT ") \ EM(rxrpc_reqack_no_srv_last, "NO-SRVLAST") \ EM(rxrpc_reqack_old_rtt, "OLD-RTT ") \ diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index 36d6ca0d1089..96ecb83c9071 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -117,7 +117,7 @@ struct rxrpc_net { atomic_t stat_tx_jumbo[10]; atomic_t stat_rx_jumbo[10]; - atomic_t stat_why_req_ack[8]; + atomic_t stat_why_req_ack[9]; atomic_t stat_io_loop; }; diff --git a/net/rxrpc/output.c b/net/rxrpc/output.c index d70db367e358..870e59bf06af 100644 --- a/net/rxrpc/output.c +++ b/net/rxrpc/output.c @@ -479,6 +479,8 @@ static size_t rxrpc_prepare_data_subpacket(struct rxrpc_call *call, why = rxrpc_reqack_old_rtt; else if (!last && !after(READ_ONCE(call->send_top), txb->seq)) why = rxrpc_reqack_app_stall; + else if (call->tx_winsize <= (2 * req->n) || call->cong_cwnd <= (2 * req->n)) + why = rxrpc_reqack_jumbo_win; else goto dont_set_request_ack; diff --git a/net/rxrpc/proc.c b/net/rxrpc/proc.c index 59292f7f9205..7755fca5beb8 100644 --- a/net/rxrpc/proc.c +++ b/net/rxrpc/proc.c @@ -518,11 +518,12 @@ int rxrpc_stats_show(struct seq_file *seq, void *v) atomic_read(&rxnet->stat_rx_acks[RXRPC_ACK_IDLE]), atomic_read(&rxnet->stat_rx_acks[0])); seq_printf(seq, - "Why-Req-A: acklost=%u mrtt=%u ortt=%u stall=%u\n", + "Why-Req-A: acklost=%u mrtt=%u ortt=%u stall=%u jwin=%u\n", atomic_read(&rxnet->stat_why_req_ack[rxrpc_reqack_ack_lost]), atomic_read(&rxnet->stat_why_req_ack[rxrpc_reqack_more_rtt]), atomic_read(&rxnet->stat_why_req_ack[rxrpc_reqack_old_rtt]), - atomic_read(&rxnet->stat_why_req_ack[rxrpc_reqack_app_stall])); + atomic_read(&rxnet->stat_why_req_ack[rxrpc_reqack_app_stall]), + atomic_read(&rxnet->stat_why_req_ack[rxrpc_reqack_jumbo_win])); seq_printf(seq, "Why-Req-A: nolast=%u retx=%u slows=%u smtxw=%u\n", atomic_read(&rxnet->stat_why_req_ack[rxrpc_reqack_no_srv_last]), From 6331f1b24a3e85465f6454e003a3e6c22005a5c5 Mon Sep 17 00:00:00 2001 From: Douya Le Date: Wed, 8 Apr 2026 13:12:38 +0100 Subject: [PATCH 197/248] rxrpc: Only put the call ref if one was acquired rxrpc_input_packet_on_conn() can process a to-client packet after the current client call on the channel has already been torn down. In that case chan->call is NULL, rxrpc_try_get_call() returns NULL and there is no reference to drop. The client-side implicit-end error path does not account for that and unconditionally calls rxrpc_put_call(). This turns a protocol error path into a kernel crash instead of rejecting the packet. Only drop the call reference if one was actually acquired. Keep the existing protocol error handling unchanged. Fixes: 5e6ef4f1017c ("rxrpc: Make the I/O thread take over the call and local processor work") Reported-by: Yifan Wu Reported-by: Juefei Pu Signed-off-by: Douya Le Co-developed-by: Yuan Tan Signed-off-by: Yuan Tan Suggested-by: Xin Liu Signed-off-by: Ao Zhou Signed-off-by: David Howells cc: Marc Dionne cc: Simon Horman cc: linux-afs@lists.infradead.org cc: stable@kernel.org Link: https://patch.msgid.link/20260408121252.2249051-11-dhowells@redhat.com Signed-off-by: Jakub Kicinski --- net/rxrpc/io_thread.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/rxrpc/io_thread.c b/net/rxrpc/io_thread.c index e939ecf417c4..697956931925 100644 --- a/net/rxrpc/io_thread.c +++ b/net/rxrpc/io_thread.c @@ -419,7 +419,8 @@ static int rxrpc_input_packet_on_conn(struct rxrpc_connection *conn, if (sp->hdr.callNumber > chan->call_id) { if (rxrpc_to_client(sp)) { - rxrpc_put_call(call, rxrpc_call_put_input); + if (call) + rxrpc_put_call(call, rxrpc_call_put_input); return rxrpc_protocol_error(skb, rxrpc_eproto_unexpected_implicit_end); } From fe4447cd95623b1cfacc15f280aab73a6d7340b2 Mon Sep 17 00:00:00 2001 From: Yuqi Xu Date: Wed, 8 Apr 2026 13:12:39 +0100 Subject: [PATCH 198/248] rxrpc: reject undecryptable rxkad response tickets rxkad_decrypt_ticket() decrypts the RXKAD response ticket and then parses the buffer as plaintext without checking whether crypto_skcipher_decrypt() succeeded. A malformed RESPONSE can therefore use a non-block-aligned ticket length, make the decrypt operation fail, and still drive the ticket parser with attacker-controlled bytes. Check the decrypt result and abort the connection with RXKADBADTICKET when ticket decryption fails. Fixes: 17926a79320a ("[AF_RXRPC]: Provide secure RxRPC sockets for use by userspace and kernel both") Reported-by: Yifan Wu Reported-by: Juefei Pu Co-developed-by: Yuan Tan Signed-off-by: Yuan Tan Suggested-by: Xin Liu Tested-by: Ren Wei Signed-off-by: Yuqi Xu Signed-off-by: Ren Wei Signed-off-by: David Howells cc: Marc Dionne cc: Simon Horman cc: linux-afs@lists.infradead.org cc: stable@kernel.org Link: https://patch.msgid.link/20260408121252.2249051-12-dhowells@redhat.com Signed-off-by: Jakub Kicinski --- net/rxrpc/rxkad.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/net/rxrpc/rxkad.c b/net/rxrpc/rxkad.c index e923d6829008..0f79d694cb08 100644 --- a/net/rxrpc/rxkad.c +++ b/net/rxrpc/rxkad.c @@ -958,6 +958,7 @@ static int rxkad_decrypt_ticket(struct rxrpc_connection *conn, struct in_addr addr; unsigned int life; time64_t issue, now; + int ret; bool little_endian; u8 *p, *q, *name, *end; @@ -977,8 +978,11 @@ static int rxkad_decrypt_ticket(struct rxrpc_connection *conn, sg_init_one(&sg[0], ticket, ticket_len); skcipher_request_set_callback(req, 0, NULL, NULL); skcipher_request_set_crypt(req, sg, sg, ticket_len, iv.x); - crypto_skcipher_decrypt(req); + ret = crypto_skcipher_decrypt(req); skcipher_request_free(req); + if (ret < 0) + return rxrpc_abort_conn(conn, skb, RXKADBADTICKET, -EPROTO, + rxkad_abort_resp_tkt_short); p = ticket; end = p + ticket_len; From 3e3138007887504ee9206d0bfb5acb062c600025 Mon Sep 17 00:00:00 2001 From: Keenan Dong Date: Wed, 8 Apr 2026 13:12:40 +0100 Subject: [PATCH 199/248] rxrpc: fix RESPONSE authenticator parser OOB read rxgk_verify_authenticator() copies auth_len bytes into a temporary buffer and then passes p + auth_len as the parser limit to rxgk_do_verify_authenticator(). Since p is a __be32 *, that inflates the parser end pointer by a factor of four and lets malformed RESPONSE authenticators read past the kmalloc() buffer. Decoded from the original latest-net reproduction logs with scripts/decode_stacktrace.sh: BUG: KASAN: slab-out-of-bounds in rxgk_verify_response() Call Trace: dump_stack_lvl() [lib/dump_stack.c:123] print_report() [mm/kasan/report.c:379 mm/kasan/report.c:482] kasan_report() [mm/kasan/report.c:597] rxgk_verify_response() [net/rxrpc/rxgk.c:1103 net/rxrpc/rxgk.c:1167 net/rxrpc/rxgk.c:1274] rxrpc_process_connection() [net/rxrpc/conn_event.c:266 net/rxrpc/conn_event.c:364 net/rxrpc/conn_event.c:386] process_one_work() [kernel/workqueue.c:3281] worker_thread() [kernel/workqueue.c:3353 kernel/workqueue.c:3440] kthread() [kernel/kthread.c:436] ret_from_fork() [arch/x86/kernel/process.c:164] Allocated by task 54: rxgk_verify_response() [include/linux/slab.h:954 net/rxrpc/rxgk.c:1155 net/rxrpc/rxgk.c:1274] rxrpc_process_connection() [net/rxrpc/conn_event.c:266 net/rxrpc/conn_event.c:364 net/rxrpc/conn_event.c:386] Convert the byte count to __be32 units before constructing the parser limit. Fixes: 9d1d2b59341f ("rxrpc: rxgk: Implement the yfs-rxgk security class (GSSAPI)") Signed-off-by: Keenan Dong Signed-off-by: David Howells cc: Marc Dionne cc: Simon Horman cc: Willy Tarreau cc: linux-afs@lists.infradead.org cc: stable@kernel.org Link: https://patch.msgid.link/20260408121252.2249051-13-dhowells@redhat.com Signed-off-by: Jakub Kicinski --- net/rxrpc/rxgk.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/rxrpc/rxgk.c b/net/rxrpc/rxgk.c index f9f5a2dc62ed..01dbdf0b5cf2 100644 --- a/net/rxrpc/rxgk.c +++ b/net/rxrpc/rxgk.c @@ -1164,7 +1164,8 @@ static int rxgk_verify_authenticator(struct rxrpc_connection *conn, } p = auth; - ret = rxgk_do_verify_authenticator(conn, krb5, skb, p, p + auth_len); + ret = rxgk_do_verify_authenticator(conn, krb5, skb, p, + p + auth_len / sizeof(*p)); error: kfree(auth); return ret; From a2567217ade970ecc458144b6be469bc015b23e5 Mon Sep 17 00:00:00 2001 From: Keenan Dong Date: Wed, 8 Apr 2026 13:12:41 +0100 Subject: [PATCH 200/248] rxrpc: fix oversized RESPONSE authenticator length check rxgk_verify_response() decodes auth_len from the packet and is supposed to verify that it fits in the remaining bytes. The existing check is inverted, so oversized RESPONSE authenticators are accepted and passed to rxgk_decrypt_skb(), which can later reach skb_to_sgvec() with an impossible length and hit BUG_ON(len). Decoded from the original latest-net reproduction logs with scripts/decode_stacktrace.sh: RIP: __skb_to_sgvec() [net/core/skbuff.c:5285 (discriminator 1)] Call Trace: skb_to_sgvec() [net/core/skbuff.c:5305] rxgk_decrypt_skb() [net/rxrpc/rxgk_common.h:81] rxgk_verify_response() [net/rxrpc/rxgk.c:1268] rxrpc_process_connection() [net/rxrpc/conn_event.c:266 net/rxrpc/conn_event.c:364 net/rxrpc/conn_event.c:386] process_one_work() [kernel/workqueue.c:3281] worker_thread() [kernel/workqueue.c:3353 kernel/workqueue.c:3440] kthread() [kernel/kthread.c:436] ret_from_fork() [arch/x86/kernel/process.c:164] Reject authenticator lengths that exceed the remaining packet payload. Fixes: 9d1d2b59341f ("rxrpc: rxgk: Implement the yfs-rxgk security class (GSSAPI)") Signed-off-by: Keenan Dong Signed-off-by: David Howells cc: Marc Dionne cc: Simon Horman cc: Willy Tarreau cc: linux-afs@lists.infradead.org cc: stable@kernel.org Link: https://patch.msgid.link/20260408121252.2249051-14-dhowells@redhat.com Signed-off-by: Jakub Kicinski --- net/rxrpc/rxgk.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/rxrpc/rxgk.c b/net/rxrpc/rxgk.c index 01dbdf0b5cf2..9e4a4ff28913 100644 --- a/net/rxrpc/rxgk.c +++ b/net/rxrpc/rxgk.c @@ -1224,7 +1224,7 @@ static int rxgk_verify_response(struct rxrpc_connection *conn, auth_offset = offset; auth_len = ntohl(xauth_len); - if (auth_len < len) + if (auth_len > len) goto short_packet; if (auth_len & 3) goto inconsistent; From f125846ee79fcae537a964ce66494e96fa54a6de Mon Sep 17 00:00:00 2001 From: Luxiao Xu Date: Wed, 8 Apr 2026 13:12:42 +0100 Subject: [PATCH 201/248] rxrpc: fix reference count leak in rxrpc_server_keyring() This patch fixes a reference count leak in rxrpc_server_keyring() by checking if rx->securities is already set. Fixes: 17926a79320a ("[AF_RXRPC]: Provide secure RxRPC sockets for use by userspace and kernel both") Reported-by: Yifan Wu Reported-by: Juefei Pu Co-developed-by: Yuan Tan Signed-off-by: Yuan Tan Suggested-by: Xin Liu Tested-by: Ren Wei Signed-off-by: Luxiao Xu Signed-off-by: Ren Wei Signed-off-by: David Howells cc: Marc Dionne cc: Simon Horman cc: linux-afs@lists.infradead.org cc: stable@kernel.org Link: https://patch.msgid.link/20260408121252.2249051-15-dhowells@redhat.com Signed-off-by: Jakub Kicinski --- net/rxrpc/server_key.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/rxrpc/server_key.c b/net/rxrpc/server_key.c index 36b05fd842a7..27491f1e1273 100644 --- a/net/rxrpc/server_key.c +++ b/net/rxrpc/server_key.c @@ -125,6 +125,9 @@ int rxrpc_server_keyring(struct rxrpc_sock *rx, sockptr_t optval, int optlen) _enter(""); + if (rx->securities) + return -EINVAL; + if (optlen <= 0 || optlen > PAGE_SIZE - 1) return -EINVAL; From 2afd86ccbb2082a3c4258aea8c07e5bb6267bc2f Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 8 Apr 2026 13:12:43 +0100 Subject: [PATCH 202/248] rxrpc: Fix key/keyring checks in setsockopt(RXRPC_SECURITY_KEY/KEYRING) An AF_RXRPC socket can be both client and server at the same time. When sending new calls (ie. it's acting as a client), it uses rx->key to set the security, and when accepting incoming calls (ie. it's acting as a server), it uses rx->securities. setsockopt(RXRPC_SECURITY_KEY) sets rx->key to point to an rxrpc-type key and setsockopt(RXRPC_SECURITY_KEYRING) sets rx->securities to point to a keyring of rxrpc_s-type keys. Now, it should be possible to use both rx->key and rx->securities on the same socket - but for userspace AF_RXRPC sockets rxrpc_setsockopt() prevents that. Fix this by: (1) Remove the incorrect check rxrpc_setsockopt(RXRPC_SECURITY_KEYRING) makes on rx->key. (2) Move the check that rxrpc_setsockopt(RXRPC_SECURITY_KEY) makes on rx->key down into rxrpc_request_key(). (3) Remove rxrpc_request_key()'s check on rx->securities. This (in combination with a previous patch) pushes the checks down into the functions that set those pointers and removes the cross-checks that prevent both key and keyring being set. Fixes: 17926a79320a ("[AF_RXRPC]: Provide secure RxRPC sockets for use by userspace and kernel both") Closes: https://sashiko.dev/#/patchset/20260401105614.1696001-10-dhowells@redhat.com Signed-off-by: David Howells cc: Marc Dionne cc: Anderson Nascimento cc: Luxiao Xu cc: Yuan Tan cc: Simon Horman cc: linux-afs@lists.infradead.org cc: stable@kernel.org Link: https://patch.msgid.link/20260408121252.2249051-16-dhowells@redhat.com Signed-off-by: Jakub Kicinski --- net/rxrpc/af_rxrpc.c | 6 ------ net/rxrpc/key.c | 2 +- 2 files changed, 1 insertion(+), 7 deletions(-) diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c index 0f90272ac254..32ec91fa938f 100644 --- a/net/rxrpc/af_rxrpc.c +++ b/net/rxrpc/af_rxrpc.c @@ -654,9 +654,6 @@ static int rxrpc_setsockopt(struct socket *sock, int level, int optname, goto success; case RXRPC_SECURITY_KEY: - ret = -EINVAL; - if (rx->key) - goto error; ret = -EISCONN; if (rx->sk.sk_state != RXRPC_UNBOUND) goto error; @@ -664,9 +661,6 @@ static int rxrpc_setsockopt(struct socket *sock, int level, int optname, goto error; case RXRPC_SECURITY_KEYRING: - ret = -EINVAL; - if (rx->key) - goto error; ret = -EISCONN; if (rx->sk.sk_state != RXRPC_UNBOUND) goto error; diff --git a/net/rxrpc/key.c b/net/rxrpc/key.c index 77237a82be3b..6301d79ee35a 100644 --- a/net/rxrpc/key.c +++ b/net/rxrpc/key.c @@ -622,7 +622,7 @@ int rxrpc_request_key(struct rxrpc_sock *rx, sockptr_t optval, int optlen) _enter(""); - if (optlen <= 0 || optlen > PAGE_SIZE - 1 || rx->securities) + if (optlen <= 0 || optlen > PAGE_SIZE - 1 || rx->key) return -EINVAL; description = memdup_sockptr_nul(optval, optlen); From f93af41b9f5f798823d0d0fb8765c2a936d76270 Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 8 Apr 2026 13:12:44 +0100 Subject: [PATCH 203/248] rxrpc: Fix missing error checks for rxkad encryption/decryption failure Add error checking for failure of crypto_skcipher_en/decrypt() to various rxkad function as the crypto functions can fail with ENOMEM at least. Fixes: 17926a79320a ("[AF_RXRPC]: Provide secure RxRPC sockets for use by userspace and kernel both") Closes: https://sashiko.dev/#/patchset/20260401105614.1696001-10-dhowells@redhat.com Signed-off-by: David Howells cc: Marc Dionne cc: Jeffrey Altman cc: Simon Horman cc: linux-afs@lists.infradead.org cc: stable@kernel.org Link: https://patch.msgid.link/20260408121252.2249051-17-dhowells@redhat.com Signed-off-by: Jakub Kicinski --- net/rxrpc/rxkad.c | 57 +++++++++++++++++++++++++++++++---------------- 1 file changed, 38 insertions(+), 19 deletions(-) diff --git a/net/rxrpc/rxkad.c b/net/rxrpc/rxkad.c index 0f79d694cb08..eb7f2769d2b1 100644 --- a/net/rxrpc/rxkad.c +++ b/net/rxrpc/rxkad.c @@ -197,6 +197,7 @@ static int rxkad_prime_packet_security(struct rxrpc_connection *conn, struct rxrpc_crypt iv; __be32 *tmpbuf; size_t tmpsize = 4 * sizeof(__be32); + int ret; _enter(""); @@ -225,13 +226,13 @@ static int rxkad_prime_packet_security(struct rxrpc_connection *conn, skcipher_request_set_sync_tfm(req, ci); skcipher_request_set_callback(req, 0, NULL, NULL); skcipher_request_set_crypt(req, &sg, &sg, tmpsize, iv.x); - crypto_skcipher_encrypt(req); + ret = crypto_skcipher_encrypt(req); skcipher_request_free(req); memcpy(&conn->rxkad.csum_iv, tmpbuf + 2, sizeof(conn->rxkad.csum_iv)); kfree(tmpbuf); - _leave(" = 0"); - return 0; + _leave(" = %d", ret); + return ret; } /* @@ -264,6 +265,7 @@ static int rxkad_secure_packet_auth(const struct rxrpc_call *call, struct scatterlist sg; size_t pad; u16 check; + int ret; _enter(""); @@ -286,11 +288,11 @@ static int rxkad_secure_packet_auth(const struct rxrpc_call *call, skcipher_request_set_sync_tfm(req, call->conn->rxkad.cipher); skcipher_request_set_callback(req, 0, NULL, NULL); skcipher_request_set_crypt(req, &sg, &sg, 8, iv.x); - crypto_skcipher_encrypt(req); + ret = crypto_skcipher_encrypt(req); skcipher_request_zero(req); - _leave(" = 0"); - return 0; + _leave(" = %d", ret); + return ret; } /* @@ -345,7 +347,7 @@ static int rxkad_secure_packet(struct rxrpc_call *call, struct rxrpc_txbuf *txb) union { __be32 buf[2]; } crypto __aligned(8); - u32 x, y; + u32 x, y = 0; int ret; _enter("{%d{%x}},{#%u},%u,", @@ -376,8 +378,10 @@ static int rxkad_secure_packet(struct rxrpc_call *call, struct rxrpc_txbuf *txb) skcipher_request_set_sync_tfm(req, call->conn->rxkad.cipher); skcipher_request_set_callback(req, 0, NULL, NULL); skcipher_request_set_crypt(req, &sg, &sg, 8, iv.x); - crypto_skcipher_encrypt(req); + ret = crypto_skcipher_encrypt(req); skcipher_request_zero(req); + if (ret < 0) + goto out; y = ntohl(crypto.buf[1]); y = (y >> 16) & 0xffff; @@ -413,6 +417,7 @@ static int rxkad_secure_packet(struct rxrpc_call *call, struct rxrpc_txbuf *txb) memset(p + txb->pkt_len, 0, gap); } +out: skcipher_request_free(req); _leave(" = %d [set %x]", ret, y); return ret; @@ -453,8 +458,10 @@ static int rxkad_verify_packet_1(struct rxrpc_call *call, struct sk_buff *skb, skcipher_request_set_sync_tfm(req, call->conn->rxkad.cipher); skcipher_request_set_callback(req, 0, NULL, NULL); skcipher_request_set_crypt(req, sg, sg, 8, iv.x); - crypto_skcipher_decrypt(req); + ret = crypto_skcipher_decrypt(req); skcipher_request_zero(req); + if (ret < 0) + return ret; /* Extract the decrypted packet length */ if (skb_copy_bits(skb, sp->offset, &sechdr, sizeof(sechdr)) < 0) @@ -531,10 +538,14 @@ static int rxkad_verify_packet_2(struct rxrpc_call *call, struct sk_buff *skb, skcipher_request_set_sync_tfm(req, call->conn->rxkad.cipher); skcipher_request_set_callback(req, 0, NULL, NULL); skcipher_request_set_crypt(req, sg, sg, sp->len, iv.x); - crypto_skcipher_decrypt(req); + ret = crypto_skcipher_decrypt(req); skcipher_request_zero(req); if (sg != _sg) kfree(sg); + if (ret < 0) { + WARN_ON_ONCE(ret != -ENOMEM); + return ret; + } /* Extract the decrypted packet length */ if (skb_copy_bits(skb, sp->offset, &sechdr, sizeof(sechdr)) < 0) @@ -602,8 +613,10 @@ static int rxkad_verify_packet(struct rxrpc_call *call, struct sk_buff *skb) skcipher_request_set_sync_tfm(req, call->conn->rxkad.cipher); skcipher_request_set_callback(req, 0, NULL, NULL); skcipher_request_set_crypt(req, &sg, &sg, 8, iv.x); - crypto_skcipher_encrypt(req); + ret = crypto_skcipher_encrypt(req); skcipher_request_zero(req); + if (ret < 0) + goto out; y = ntohl(crypto.buf[1]); cksum = (y >> 16) & 0xffff; @@ -1077,21 +1090,23 @@ static int rxkad_decrypt_ticket(struct rxrpc_connection *conn, /* * decrypt the response packet */ -static void rxkad_decrypt_response(struct rxrpc_connection *conn, - struct rxkad_response *resp, - const struct rxrpc_crypt *session_key) +static int rxkad_decrypt_response(struct rxrpc_connection *conn, + struct rxkad_response *resp, + const struct rxrpc_crypt *session_key) { struct skcipher_request *req = rxkad_ci_req; struct scatterlist sg[1]; struct rxrpc_crypt iv; + int ret; _enter(",,%08x%08x", ntohl(session_key->n[0]), ntohl(session_key->n[1])); mutex_lock(&rxkad_ci_mutex); - if (crypto_sync_skcipher_setkey(rxkad_ci, session_key->x, - sizeof(*session_key)) < 0) - BUG(); + ret = crypto_sync_skcipher_setkey(rxkad_ci, session_key->x, + sizeof(*session_key)); + if (ret < 0) + goto unlock; memcpy(&iv, session_key, sizeof(iv)); @@ -1100,12 +1115,14 @@ static void rxkad_decrypt_response(struct rxrpc_connection *conn, skcipher_request_set_sync_tfm(req, rxkad_ci); skcipher_request_set_callback(req, 0, NULL, NULL); skcipher_request_set_crypt(req, sg, sg, sizeof(resp->encrypted), iv.x); - crypto_skcipher_decrypt(req); + ret = crypto_skcipher_decrypt(req); skcipher_request_zero(req); +unlock: mutex_unlock(&rxkad_ci_mutex); _leave(""); + return ret; } /* @@ -1198,7 +1215,9 @@ static int rxkad_verify_response(struct rxrpc_connection *conn, /* use the session key from inside the ticket to decrypt the * response */ - rxkad_decrypt_response(conn, response, &session_key); + ret = rxkad_decrypt_response(conn, response, &session_key); + if (ret < 0) + goto temporary_error_free_ticket; if (ntohl(response->encrypted.epoch) != conn->proto.epoch || ntohl(response->encrypted.cid) != conn->proto.cid || From 699e52180f4231c257821c037ed5c99d5eb0edb8 Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 8 Apr 2026 13:12:45 +0100 Subject: [PATCH 204/248] rxrpc: Fix integer overflow in rxgk_verify_response() In rxgk_verify_response(), there's a potential integer overflow due to rounding up token_len before checking it, thereby allowing the length check to be bypassed. Fix this by checking the unrounded value against len too (len is limited as the response must fit in a single UDP packet). Fixes: 9d1d2b59341f ("rxrpc: rxgk: Implement the yfs-rxgk security class (GSSAPI)") Closes: https://sashiko.dev/#/patchset/20260401105614.1696001-10-dhowells@redhat.com Signed-off-by: David Howells cc: Marc Dionne cc: Jeffrey Altman cc: Simon Horman cc: linux-afs@lists.infradead.org cc: stable@kernel.org Link: https://patch.msgid.link/20260408121252.2249051-18-dhowells@redhat.com Signed-off-by: Jakub Kicinski --- net/rxrpc/rxgk.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/rxrpc/rxgk.c b/net/rxrpc/rxgk.c index 9e4a4ff28913..064c1531fc99 100644 --- a/net/rxrpc/rxgk.c +++ b/net/rxrpc/rxgk.c @@ -1209,7 +1209,8 @@ static int rxgk_verify_response(struct rxrpc_connection *conn, token_offset = offset; token_len = ntohl(rhdr.token_len); - if (xdr_round_up(token_len) + sizeof(__be32) > len) + if (token_len > len || + xdr_round_up(token_len) + sizeof(__be32) > len) goto short_packet; trace_rxrpc_rx_response(conn, sp->hdr.serial, 0, sp->hdr.cksum, token_len); From 7e1876caa8363056f58a21d3b31b82c2daf7e608 Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 8 Apr 2026 13:12:46 +0100 Subject: [PATCH 205/248] rxrpc: Fix leak of rxgk context in rxgk_verify_response() Fix rxgk_verify_response() to clean up the rxgk context it creates. Fixes: 9d1d2b59341f ("rxrpc: rxgk: Implement the yfs-rxgk security class (GSSAPI)") Closes: https://sashiko.dev/#/patchset/20260401105614.1696001-10-dhowells@redhat.com Signed-off-by: David Howells cc: Marc Dionne cc: Jeffrey Altman cc: Simon Horman cc: linux-afs@lists.infradead.org cc: stable@kernel.org Link: https://patch.msgid.link/20260408121252.2249051-19-dhowells@redhat.com Signed-off-by: Jakub Kicinski --- net/rxrpc/rxgk.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/net/rxrpc/rxgk.c b/net/rxrpc/rxgk.c index 064c1531fc99..c67e3c2ca871 100644 --- a/net/rxrpc/rxgk.c +++ b/net/rxrpc/rxgk.c @@ -1270,16 +1270,18 @@ static int rxgk_verify_response(struct rxrpc_connection *conn, if (ret < 0) { rxrpc_abort_conn(conn, skb, RXGK_SEALEDINCON, ret, rxgk_abort_resp_auth_dec); - goto out; + goto out_gk; } ret = rxgk_verify_authenticator(conn, krb5, skb, auth_offset, auth_len); if (ret < 0) - goto out; + goto out_gk; conn->key = key; key = NULL; ret = 0; +out_gk: + rxgk_put(gk); out: key_put(key); _leave(" = %d", ret); From f564af387c8c28238f8ebc13314c589d7ba8475d Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 8 Apr 2026 13:12:47 +0100 Subject: [PATCH 206/248] rxrpc: Fix buffer overread in rxgk_do_verify_authenticator() Fix rxgk_do_verify_authenticator() to check the buffer size before checking the nonce. Fixes: 9d1d2b59341f ("rxrpc: rxgk: Implement the yfs-rxgk security class (GSSAPI)") Closes: https://sashiko.dev/#/patchset/20260401105614.1696001-10-dhowells@redhat.com Signed-off-by: David Howells cc: Marc Dionne cc: Jeffrey Altman cc: Simon Horman cc: linux-afs@lists.infradead.org cc: stable@kernel.org Link: https://patch.msgid.link/20260408121252.2249051-20-dhowells@redhat.com Signed-off-by: Jakub Kicinski --- net/rxrpc/rxgk.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/net/rxrpc/rxgk.c b/net/rxrpc/rxgk.c index c67e3c2ca871..0d5e654da918 100644 --- a/net/rxrpc/rxgk.c +++ b/net/rxrpc/rxgk.c @@ -1085,6 +1085,9 @@ static int rxgk_do_verify_authenticator(struct rxrpc_connection *conn, _enter(""); + if ((end - p) * sizeof(__be32) < 24) + return rxrpc_abort_conn(conn, skb, RXGK_NOTAUTH, -EPROTO, + rxgk_abort_resp_short_auth); if (memcmp(p, conn->rxgk.nonce, 20) != 0) return rxrpc_abort_conn(conn, skb, RXGK_NOTAUTH, -EPROTO, rxgk_abort_resp_bad_nonce); @@ -1098,7 +1101,7 @@ static int rxgk_do_verify_authenticator(struct rxrpc_connection *conn, p += xdr_round_up(app_len) / sizeof(__be32); if (end - p < 4) return rxrpc_abort_conn(conn, skb, RXGK_NOTAUTH, -EPROTO, - rxgk_abort_resp_short_applen); + rxgk_abort_resp_short_auth); level = ntohl(*p++); epoch = ntohl(*p++); From c43ffdcfdbb5567b1f143556df8a04b4eeea041c Mon Sep 17 00:00:00 2001 From: Wang Jie Date: Wed, 8 Apr 2026 13:12:48 +0100 Subject: [PATCH 207/248] rxrpc: only handle RESPONSE during service challenge Only process RESPONSE packets while the service connection is still in RXRPC_CONN_SERVICE_CHALLENGING. Check that state under state_lock before running response verification and security initialization, then use a local secured flag to decide whether to queue the secured-connection work after the state transition. This keeps duplicate or late RESPONSE packets from re-running the setup path and removes the unlocked post-transition state test. Fixes: 17926a79320a ("[AF_RXRPC]: Provide secure RxRPC sockets for use by userspace and kernel both") Reported-by: Yifan Wu Reported-by: Juefei Pu Co-developed-by: Yuan Tan Signed-off-by: Yuan Tan Suggested-by: Xin Liu Signed-off-by: Jie Wang Signed-off-by: Yang Yang Signed-off-by: David Howells cc: Marc Dionne cc: Jeffrey Altman cc: Simon Horman cc: linux-afs@lists.infradead.org cc: stable@kernel.org Link: https://patch.msgid.link/20260408121252.2249051-21-dhowells@redhat.com Signed-off-by: Jakub Kicinski --- net/rxrpc/conn_event.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/net/rxrpc/conn_event.c b/net/rxrpc/conn_event.c index c50cbfc5a313..9a41ec708aeb 100644 --- a/net/rxrpc/conn_event.c +++ b/net/rxrpc/conn_event.c @@ -247,6 +247,7 @@ static int rxrpc_process_event(struct rxrpc_connection *conn, struct sk_buff *skb) { struct rxrpc_skb_priv *sp = rxrpc_skb(skb); + bool secured = false; int ret; if (conn->state == RXRPC_CONN_ABORTED) @@ -262,6 +263,13 @@ static int rxrpc_process_event(struct rxrpc_connection *conn, return ret; case RXRPC_PACKET_TYPE_RESPONSE: + spin_lock_irq(&conn->state_lock); + if (conn->state != RXRPC_CONN_SERVICE_CHALLENGING) { + spin_unlock_irq(&conn->state_lock); + return 0; + } + spin_unlock_irq(&conn->state_lock); + ret = conn->security->verify_response(conn, skb); if (ret < 0) return ret; @@ -272,11 +280,13 @@ static int rxrpc_process_event(struct rxrpc_connection *conn, return ret; spin_lock_irq(&conn->state_lock); - if (conn->state == RXRPC_CONN_SERVICE_CHALLENGING) + if (conn->state == RXRPC_CONN_SERVICE_CHALLENGING) { conn->state = RXRPC_CONN_SERVICE; + secured = true; + } spin_unlock_irq(&conn->state_lock); - if (conn->state == RXRPC_CONN_SERVICE) { + if (secured) { /* Offload call state flipping to the I/O thread. As * we've already received the packet, put it on the * front of the queue. From a44ce6aa2efb61fe44f2cfab72bb01544bbca272 Mon Sep 17 00:00:00 2001 From: Pengpeng Hou Date: Wed, 8 Apr 2026 13:12:49 +0100 Subject: [PATCH 208/248] rxrpc: proc: size address buffers for %pISpc output The AF_RXRPC procfs helpers format local and remote socket addresses into fixed 50-byte stack buffers with "%pISpc". That is too small for the longest current-tree IPv6-with-port form the formatter can produce. In lib/vsprintf.c, the compressed IPv6 path uses a dotted-quad tail not only for v4mapped addresses, but also for ISATAP addresses via ipv6_addr_is_isatap(). As a result, a case such as [ffff:ffff:ffff:ffff:0:5efe:255.255.255.255]:65535 is possible with the current formatter. That is 50 visible characters, so 51 bytes including the trailing NUL, which does not fit in the existing char[50] buffers used by net/rxrpc/proc.c. Size the buffers from the formatter's maximum textual form and switch the call sites to scnprintf(). Changes since v1: - correct the changelog to cite the actual maximum current-tree case explicitly - frame the proof around the ISATAP formatting path instead of the earlier mapped-v4 example Fixes: 75b54cb57ca3 ("rxrpc: Add IPv6 support") Signed-off-by: Pengpeng Hou Signed-off-by: David Howells cc: Marc Dionne cc: Anderson Nascimento cc: Simon Horman cc: linux-afs@lists.infradead.org cc: stable@kernel.org Link: https://patch.msgid.link/20260408121252.2249051-22-dhowells@redhat.com Signed-off-by: Jakub Kicinski --- net/rxrpc/proc.c | 32 ++++++++++++++++++-------------- 1 file changed, 18 insertions(+), 14 deletions(-) diff --git a/net/rxrpc/proc.c b/net/rxrpc/proc.c index 7755fca5beb8..e9a27fa7b25d 100644 --- a/net/rxrpc/proc.c +++ b/net/rxrpc/proc.c @@ -10,6 +10,10 @@ #include #include "ar-internal.h" +#define RXRPC_PROC_ADDRBUF_SIZE \ + (sizeof("[xxxx:xxxx:xxxx:xxxx:xxxx:xxxx:255.255.255.255]") + \ + sizeof(":12345")) + static const char *const rxrpc_conn_states[RXRPC_CONN__NR_STATES] = { [RXRPC_CONN_UNUSED] = "Unused ", [RXRPC_CONN_CLIENT_UNSECURED] = "ClUnsec ", @@ -53,7 +57,7 @@ static int rxrpc_call_seq_show(struct seq_file *seq, void *v) struct rxrpc_net *rxnet = rxrpc_net(seq_file_net(seq)); enum rxrpc_call_state state; rxrpc_seq_t tx_bottom; - char lbuff[50], rbuff[50]; + char lbuff[RXRPC_PROC_ADDRBUF_SIZE], rbuff[RXRPC_PROC_ADDRBUF_SIZE]; long timeout = 0; if (v == &rxnet->calls) { @@ -69,11 +73,11 @@ static int rxrpc_call_seq_show(struct seq_file *seq, void *v) local = call->local; if (local) - sprintf(lbuff, "%pISpc", &local->srx.transport); + scnprintf(lbuff, sizeof(lbuff), "%pISpc", &local->srx.transport); else strcpy(lbuff, "no_local"); - sprintf(rbuff, "%pISpc", &call->dest_srx.transport); + scnprintf(rbuff, sizeof(rbuff), "%pISpc", &call->dest_srx.transport); state = rxrpc_call_state(call); if (state != RXRPC_CALL_SERVER_PREALLOC) @@ -142,7 +146,7 @@ static int rxrpc_connection_seq_show(struct seq_file *seq, void *v) struct rxrpc_connection *conn; struct rxrpc_net *rxnet = rxrpc_net(seq_file_net(seq)); const char *state; - char lbuff[50], rbuff[50]; + char lbuff[RXRPC_PROC_ADDRBUF_SIZE], rbuff[RXRPC_PROC_ADDRBUF_SIZE]; if (v == &rxnet->conn_proc_list) { seq_puts(seq, @@ -161,8 +165,8 @@ static int rxrpc_connection_seq_show(struct seq_file *seq, void *v) goto print; } - sprintf(lbuff, "%pISpc", &conn->local->srx.transport); - sprintf(rbuff, "%pISpc", &conn->peer->srx.transport); + scnprintf(lbuff, sizeof(lbuff), "%pISpc", &conn->local->srx.transport); + scnprintf(rbuff, sizeof(rbuff), "%pISpc", &conn->peer->srx.transport); print: state = rxrpc_is_conn_aborted(conn) ? rxrpc_call_completions[conn->completion] : @@ -228,7 +232,7 @@ static int rxrpc_bundle_seq_show(struct seq_file *seq, void *v) { struct rxrpc_bundle *bundle; struct rxrpc_net *rxnet = rxrpc_net(seq_file_net(seq)); - char lbuff[50], rbuff[50]; + char lbuff[RXRPC_PROC_ADDRBUF_SIZE], rbuff[RXRPC_PROC_ADDRBUF_SIZE]; if (v == &rxnet->bundle_proc_list) { seq_puts(seq, @@ -242,8 +246,8 @@ static int rxrpc_bundle_seq_show(struct seq_file *seq, void *v) bundle = list_entry(v, struct rxrpc_bundle, proc_link); - sprintf(lbuff, "%pISpc", &bundle->local->srx.transport); - sprintf(rbuff, "%pISpc", &bundle->peer->srx.transport); + scnprintf(lbuff, sizeof(lbuff), "%pISpc", &bundle->local->srx.transport); + scnprintf(rbuff, sizeof(rbuff), "%pISpc", &bundle->peer->srx.transport); seq_printf(seq, "UDP %-47.47s %-47.47s %4x %3u %3d" " %c%c%c %08x | %08x %08x %08x %08x %08x\n", @@ -279,7 +283,7 @@ static int rxrpc_peer_seq_show(struct seq_file *seq, void *v) { struct rxrpc_peer *peer; time64_t now; - char lbuff[50], rbuff[50]; + char lbuff[RXRPC_PROC_ADDRBUF_SIZE], rbuff[RXRPC_PROC_ADDRBUF_SIZE]; if (v == SEQ_START_TOKEN) { seq_puts(seq, @@ -290,9 +294,9 @@ static int rxrpc_peer_seq_show(struct seq_file *seq, void *v) peer = list_entry(v, struct rxrpc_peer, hash_link); - sprintf(lbuff, "%pISpc", &peer->local->srx.transport); + scnprintf(lbuff, sizeof(lbuff), "%pISpc", &peer->local->srx.transport); - sprintf(rbuff, "%pISpc", &peer->srx.transport); + scnprintf(rbuff, sizeof(rbuff), "%pISpc", &peer->srx.transport); now = ktime_get_seconds(); seq_printf(seq, @@ -401,7 +405,7 @@ const struct seq_operations rxrpc_peer_seq_ops = { static int rxrpc_local_seq_show(struct seq_file *seq, void *v) { struct rxrpc_local *local; - char lbuff[50]; + char lbuff[RXRPC_PROC_ADDRBUF_SIZE]; if (v == SEQ_START_TOKEN) { seq_puts(seq, @@ -412,7 +416,7 @@ static int rxrpc_local_seq_show(struct seq_file *seq, void *v) local = hlist_entry(v, struct rxrpc_local, link); - sprintf(lbuff, "%pISpc", &local->srx.transport); + scnprintf(lbuff, sizeof(lbuff), "%pISpc", &local->srx.transport); seq_printf(seq, "UDP %-47.47s %3u %3u %3u\n", From b30b1675aa2bcf0491fd3830b051df4e08a7c8ca Mon Sep 17 00:00:00 2001 From: Justin Iurman Date: Sat, 4 Apr 2026 15:41:37 +0200 Subject: [PATCH 209/248] net: ioam6: fix OOB and missing lock When trace->type.bit6 is set: if (trace->type.bit6) { ... queue = skb_get_tx_queue(dev, skb); qdisc = rcu_dereference(queue->qdisc); This code can lead to an out-of-bounds access of the dev->_tx[] array when is_input is true. In such a case, the packet is on the RX path and skb->queue_mapping contains the RX queue index of the ingress device. If the ingress device has more RX queues than the egress device (dev) has TX queues, skb_get_queue_mapping(skb) will exceed dev->num_tx_queues. Add a check to avoid this situation since skb_get_tx_queue() does not clamp the index. This issue has also revealed that per queue visibility cannot be accurate and will be replaced later as a new feature. While at it, add missing lock around qdisc_qstats_qlen_backlog(). The function __ioam6_fill_trace_data() is called from both softirq and process contexts, hence the use of spin_lock_bh() here. Fixes: b63c5478e9cb ("ipv6: ioam: Support for Queue depth data field") Reported-by: Jakub Kicinski Closes: https://lore.kernel.org/netdev/20260403214418.2233266-2-kuba@kernel.org/ Signed-off-by: Justin Iurman Link: https://patch.msgid.link/20260404134137.24553-1-justin.iurman@gmail.com Signed-off-by: Jakub Kicinski --- net/ipv6/ioam6.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/net/ipv6/ioam6.c b/net/ipv6/ioam6.c index 05a0b7d7e2aa..e963a71858a7 100644 --- a/net/ipv6/ioam6.c +++ b/net/ipv6/ioam6.c @@ -803,12 +803,16 @@ static void __ioam6_fill_trace_data(struct sk_buff *skb, struct Qdisc *qdisc; __u32 qlen, backlog; - if (dev->flags & IFF_LOOPBACK) { + if (dev->flags & IFF_LOOPBACK || + skb_get_queue_mapping(skb) >= dev->num_tx_queues) { *(__be32 *)data = cpu_to_be32(IOAM6_U32_UNAVAILABLE); } else { queue = skb_get_tx_queue(dev, skb); qdisc = rcu_dereference(queue->qdisc); + + spin_lock_bh(qdisc_lock(qdisc)); qdisc_qstats_qlen_backlog(qdisc, &qlen, &backlog); + spin_unlock_bh(qdisc_lock(qdisc)); *(__be32 *)data = cpu_to_be32(backlog); } From 5a37d228799b0ec2c277459c83c814a59d310bc3 Mon Sep 17 00:00:00 2001 From: Fabio Baltieri Date: Sun, 5 Apr 2026 23:20:13 +0100 Subject: [PATCH 210/248] net: txgbe: leave space for null terminators on property_entry Lists of struct property_entry are supposed to be terminated with an empty property, this driver currently seems to be allocating exactly the amount of entry used. Change the struct definition to leave an extra element for all property_entry. Fixes: c3e382ad6d15 ("net: txgbe: Add software nodes to support phylink") Signed-off-by: Fabio Baltieri Tested-by: Jiawen Wu Link: https://patch.msgid.link/20260405222013.5347-1-fabio.baltieri@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/wangxun/txgbe/txgbe_type.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/wangxun/txgbe/txgbe_type.h b/drivers/net/ethernet/wangxun/txgbe/txgbe_type.h index 82433e9cb0e3..6b05f32b4a01 100644 --- a/drivers/net/ethernet/wangxun/txgbe/txgbe_type.h +++ b/drivers/net/ethernet/wangxun/txgbe/txgbe_type.h @@ -424,10 +424,10 @@ struct txgbe_nodes { char i2c_name[32]; char sfp_name[32]; char phylink_name[32]; - struct property_entry gpio_props[1]; - struct property_entry i2c_props[3]; - struct property_entry sfp_props[8]; - struct property_entry phylink_props[2]; + struct property_entry gpio_props[2]; + struct property_entry i2c_props[4]; + struct property_entry sfp_props[9]; + struct property_entry phylink_props[3]; struct software_node_ref_args i2c_ref[1]; struct software_node_ref_args gpio0_ref[1]; struct software_node_ref_args gpio1_ref[1]; From 9b55b253907e7431210483519c5ad711a37dafa1 Mon Sep 17 00:00:00 2001 From: Jiayuan Chen Date: Mon, 6 Apr 2026 11:15:10 +0800 Subject: [PATCH 211/248] mptcp: fix slab-use-after-free in __inet_lookup_established The ehash table lookups are lockless and rely on SLAB_TYPESAFE_BY_RCU to guarantee socket memory stability during RCU read-side critical sections. Both tcp_prot and tcpv6_prot have their slab caches created with this flag via proto_register(). However, MPTCP's mptcp_subflow_init() copies tcpv6_prot into tcpv6_prot_override during inet_init() (fs_initcall, level 5), before inet6_init() (module_init/device_initcall, level 6) has called proto_register(&tcpv6_prot). At that point, tcpv6_prot.slab is still NULL, so tcpv6_prot_override.slab remains NULL permanently. This causes MPTCP v6 subflow child sockets to be allocated via kmalloc (falling into kmalloc-4k) instead of the TCPv6 slab cache. The kmalloc-4k cache lacks SLAB_TYPESAFE_BY_RCU, so when these sockets are freed without SOCK_RCU_FREE (which is cleared for child sockets by design), the memory can be immediately reused. Concurrent ehash lookups under rcu_read_lock can then access freed memory, triggering a slab-use-after-free in __inet_lookup_established. Fix this by splitting the IPv6-specific initialization out of mptcp_subflow_init() into a new mptcp_subflow_v6_init(), called from mptcp_proto_v6_init() before protocol registration. This ensures tcpv6_prot_override.slab correctly inherits the SLAB_TYPESAFE_BY_RCU slab cache. Fixes: b19bc2945b40 ("mptcp: implement delegated actions") Cc: stable@vger.kernel.org Signed-off-by: Jiayuan Chen Reviewed-by: Matthieu Baerts (NGI0) Link: https://patch.msgid.link/20260406031512.189159-1-jiayuan.chen@linux.dev Signed-off-by: Jakub Kicinski --- net/mptcp/protocol.c | 2 ++ net/mptcp/protocol.h | 1 + net/mptcp/subflow.c | 15 +++++++++------ 3 files changed, 12 insertions(+), 6 deletions(-) diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index 65c3bb8016f4..614c3f583ca0 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -4660,6 +4660,8 @@ int __init mptcp_proto_v6_init(void) { int err; + mptcp_subflow_v6_init(); + mptcp_v6_prot = mptcp_prot; strscpy(mptcp_v6_prot.name, "MPTCPv6", sizeof(mptcp_v6_prot.name)); mptcp_v6_prot.slab = NULL; diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index 0bd1ee860316..ec15e503da8b 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -875,6 +875,7 @@ static inline void mptcp_subflow_tcp_fallback(struct sock *sk, void __init mptcp_proto_init(void); #if IS_ENABLED(CONFIG_MPTCP_IPV6) int __init mptcp_proto_v6_init(void); +void __init mptcp_subflow_v6_init(void); #endif struct sock *mptcp_sk_clone_init(const struct sock *sk, diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index 6716970693e9..4ff5863aa9fd 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -2165,7 +2165,15 @@ void __init mptcp_subflow_init(void) tcp_prot_override.psock_update_sk_prot = NULL; #endif + mptcp_diag_subflow_init(&subflow_ulp_ops); + + if (tcp_register_ulp(&subflow_ulp_ops) != 0) + panic("MPTCP: failed to register subflows to ULP\n"); +} + #if IS_ENABLED(CONFIG_MPTCP_IPV6) +void __init mptcp_subflow_v6_init(void) +{ /* In struct mptcp_subflow_request_sock, we assume the TCP request sock * structures for v4 and v6 have the same size. It should not changed in * the future but better to make sure to be warned if it is no longer @@ -2204,10 +2212,5 @@ void __init mptcp_subflow_init(void) /* Disable sockmap processing for subflows */ tcpv6_prot_override.psock_update_sk_prot = NULL; #endif -#endif - - mptcp_diag_subflow_init(&subflow_ulp_ops); - - if (tcp_register_ulp(&subflow_ulp_ops) != 0) - panic("MPTCP: failed to register subflows to ULP\n"); } +#endif From 8e2760eaab778494fc1fa257031e0e1799647f46 Mon Sep 17 00:00:00 2001 From: "Matthieu Baerts (NGI0)" Date: Tue, 7 Apr 2026 10:41:41 +0200 Subject: [PATCH 212/248] Revert "mptcp: add needs_id for netlink appending addr" This commit was originally adding the ability to add MPTCP endpoints with ID 0 by accident. The in-kernel PM, handling MPTCP endpoints at the net namespace level, is not supposed to handle endpoints with such ID, because this ID 0 is reserved to the initial subflow, as mentioned in the MPTCPv1 protocol [1], a per-connection setting. Note that 'ip mptcp endpoint add id 0' stops early with an error, but other tools might still request the in-kernel PM to create MPTCP endpoints with this restricted ID 0. In other words, it was wrong to call the mptcp_pm_has_addr_attr_id helper to check whether the address ID attribute is set: if it was set to 0, a new MPTCP endpoint would be created with ID 0, which is not expected, and might cause various issues later. Fixes: 584f38942626 ("mptcp: add needs_id for netlink appending addr") Cc: stable@vger.kernel.org Link: https://datatracker.ietf.org/doc/html/rfc8684#section-3.2-9 [1] Reviewed-by: Geliang Tang Signed-off-by: Matthieu Baerts (NGI0) Link: https://patch.msgid.link/20260407-net-mptcp-revert-pm-needs-id-v2-1-7a25cbc324f8@kernel.org Signed-off-by: Jakub Kicinski --- net/mptcp/pm_kernel.c | 24 +++++------------------- 1 file changed, 5 insertions(+), 19 deletions(-) diff --git a/net/mptcp/pm_kernel.c b/net/mptcp/pm_kernel.c index 82e59f9c6dd9..0ebf43be9939 100644 --- a/net/mptcp/pm_kernel.c +++ b/net/mptcp/pm_kernel.c @@ -720,7 +720,7 @@ static void __mptcp_pm_release_addr_entry(struct mptcp_pm_addr_entry *entry) static int mptcp_pm_nl_append_new_local_addr(struct pm_nl_pernet *pernet, struct mptcp_pm_addr_entry *entry, - bool needs_id, bool replace) + bool replace) { struct mptcp_pm_addr_entry *cur, *del_entry = NULL; int ret = -EINVAL; @@ -779,7 +779,7 @@ static int mptcp_pm_nl_append_new_local_addr(struct pm_nl_pernet *pernet, } } - if (!entry->addr.id && needs_id) { + if (!entry->addr.id) { find_next: entry->addr.id = find_next_zero_bit(pernet->id_bitmap, MPTCP_PM_MAX_ADDR_ID + 1, @@ -790,7 +790,7 @@ static int mptcp_pm_nl_append_new_local_addr(struct pm_nl_pernet *pernet, } } - if (!entry->addr.id && needs_id) + if (!entry->addr.id) goto out; __set_bit(entry->addr.id, pernet->id_bitmap); @@ -923,7 +923,7 @@ int mptcp_pm_nl_get_local_id(struct mptcp_sock *msk, return -ENOMEM; entry->addr.port = 0; - ret = mptcp_pm_nl_append_new_local_addr(pernet, entry, true, false); + ret = mptcp_pm_nl_append_new_local_addr(pernet, entry, false); if (ret < 0) kfree(entry); @@ -977,18 +977,6 @@ static int mptcp_nl_add_subflow_or_signal_addr(struct net *net, return 0; } -static bool mptcp_pm_has_addr_attr_id(const struct nlattr *attr, - struct genl_info *info) -{ - struct nlattr *tb[MPTCP_PM_ADDR_ATTR_MAX + 1]; - - if (!nla_parse_nested_deprecated(tb, MPTCP_PM_ADDR_ATTR_MAX, attr, - mptcp_pm_address_nl_policy, info->extack) && - tb[MPTCP_PM_ADDR_ATTR_ID]) - return true; - return false; -} - /* Add an MPTCP endpoint */ int mptcp_pm_nl_add_addr_doit(struct sk_buff *skb, struct genl_info *info) { @@ -1037,9 +1025,7 @@ int mptcp_pm_nl_add_addr_doit(struct sk_buff *skb, struct genl_info *info) goto out_free; } } - ret = mptcp_pm_nl_append_new_local_addr(pernet, entry, - !mptcp_pm_has_addr_attr_id(attr, info), - true); + ret = mptcp_pm_nl_append_new_local_addr(pernet, entry, true); if (ret < 0) { GENL_SET_ERR_MSG_FMT(info, "too many addresses or duplicate one: %d", ret); goto out_free; From 39897df386376912d561d4946499379effa1e7ef Mon Sep 17 00:00:00 2001 From: Jiexun Wang Date: Tue, 7 Apr 2026 16:00:14 +0800 Subject: [PATCH 213/248] af_unix: read UNIX_DIAG_VFS data under unix_state_lock Exact UNIX diag lookups hold a reference to the socket, but not to u->path. Meanwhile, unix_release_sock() clears u->path under unix_state_lock() and drops the path reference after unlocking. Read the inode and device numbers for UNIX_DIAG_VFS while holding unix_state_lock(), then emit the netlink attribute after dropping the lock. This keeps the VFS data stable while the reply is being built. Fixes: 5f7b0569460b ("unix_diag: Unix inode info NLA") Reported-by: Yifan Wu Reported-by: Juefei Pu Co-developed-by: Yuan Tan Signed-off-by: Yuan Tan Suggested-by: Xin Liu Tested-by: Ren Wei Signed-off-by: Jiexun Wang Signed-off-by: Ren Wei Reviewed-by: Kuniyuki Iwashima Link: https://patch.msgid.link/20260407080015.1744197-1-n05ec@lzu.edu.cn Signed-off-by: Jakub Kicinski --- net/unix/diag.c | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/net/unix/diag.c b/net/unix/diag.c index ca3473026151..c9c1e51c4419 100644 --- a/net/unix/diag.c +++ b/net/unix/diag.c @@ -28,18 +28,23 @@ static int sk_diag_dump_name(struct sock *sk, struct sk_buff *nlskb) static int sk_diag_dump_vfs(struct sock *sk, struct sk_buff *nlskb) { - struct dentry *dentry = unix_sk(sk)->path.dentry; + struct unix_diag_vfs uv; + struct dentry *dentry; + bool have_vfs = false; + unix_state_lock(sk); + dentry = unix_sk(sk)->path.dentry; if (dentry) { - struct unix_diag_vfs uv = { - .udiag_vfs_ino = d_backing_inode(dentry)->i_ino, - .udiag_vfs_dev = dentry->d_sb->s_dev, - }; - - return nla_put(nlskb, UNIX_DIAG_VFS, sizeof(uv), &uv); + uv.udiag_vfs_ino = d_backing_inode(dentry)->i_ino; + uv.udiag_vfs_dev = dentry->d_sb->s_dev; + have_vfs = true; } + unix_state_unlock(sk); - return 0; + if (!have_vfs) + return 0; + + return nla_put(nlskb, UNIX_DIAG_VFS, sizeof(uv), &uv); } static int sk_diag_dump_peer(struct sock *sk, struct sk_buff *nlskb) From 0006c6f1091bbeea88b8a88a6548b9fb2f803c74 Mon Sep 17 00:00:00 2001 From: Li RongQing Date: Mon, 6 Apr 2026 22:27:30 -0400 Subject: [PATCH 214/248] devlink: Fix incorrect skb socket family dumping The devlink_fmsg_dump_skb function was incorrectly using the socket type (sk->sk_type) instead of the socket family (sk->sk_family) when filling the "family" field in the fast message dump. This patch fixes this to properly display the socket family. Fixes: 3dbfde7f6bc7b8 ("devlink: add devlink_fmsg_dump_skb() function") Signed-off-by: Li RongQing Link: https://patch.msgid.link/20260407022730.2393-1-lirongqing@baidu.com Signed-off-by: Jakub Kicinski --- net/devlink/health.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/devlink/health.c b/net/devlink/health.c index 449c7611c640..ea7a334e939b 100644 --- a/net/devlink/health.c +++ b/net/devlink/health.c @@ -1327,7 +1327,7 @@ void devlink_fmsg_dump_skb(struct devlink_fmsg *fmsg, const struct sk_buff *skb) if (sk) { devlink_fmsg_pair_nest_start(fmsg, "sk"); devlink_fmsg_obj_nest_start(fmsg); - devlink_fmsg_put(fmsg, "family", sk->sk_type); + devlink_fmsg_put(fmsg, "family", sk->sk_family); devlink_fmsg_put(fmsg, "type", sk->sk_type); devlink_fmsg_put(fmsg, "proto", sk->sk_protocol); devlink_fmsg_obj_nest_end(fmsg); From 30f3b767aed45fdaf8f887e66e1f19bd0cbd4483 Mon Sep 17 00:00:00 2001 From: Raju Rangoju Date: Mon, 6 Apr 2026 13:08:16 +0530 Subject: [PATCH 215/248] MAINTAINERS: Add Prashanth as additional maintainer for amd-xgbe driver Add Prashanth as an additional maintainer for the amd-xgbe Ethernet driver to help with ongoing development and maintenance. Cc: Prashanth Kumar K R Signed-off-by: Raju Rangoju Link: https://patch.msgid.link/20260406073816.3218387-1-Raju.Rangoju@amd.com Signed-off-by: Jakub Kicinski --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index 4601903a8f9b..dc935838e516 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -1291,6 +1291,7 @@ F: include/uapi/drm/amdxdna_accel.h AMD XGBE DRIVER M: Raju Rangoju +M: Prashanth Kumar K R L: netdev@vger.kernel.org S: Maintained F: arch/arm64/boot/dts/amd/amd-seattle-xgbe*.dtsi From 9709b56d908acc120fe8b4ae250b3c9d749ea832 Mon Sep 17 00:00:00 2001 From: Alexander Koskovich Date: Fri, 3 Apr 2026 18:43:47 +0200 Subject: [PATCH 216/248] net: ipa: fix GENERIC_CMD register field masks for IPA v5.0+ Fix the field masks to match the hardware layout documented in downstream GSI (GSI_V3_0_EE_n_GSI_EE_GENERIC_CMD_*). Notably this fixes a WARN I was seeing when I tried to send "stop" to the MPSS remoteproc while IPA was up. Fixes: faf0678ec8a0 ("net: ipa: add IPA v5.0 GSI register definitions") Signed-off-by: Alexander Koskovich Signed-off-by: Luca Weiss Reviewed-by: Simon Horman Link: https://patch.msgid.link/20260403-milos-ipa-v1-1-01e9e4e03d3e@fairphone.com Signed-off-by: Paolo Abeni --- drivers/net/ipa/reg/gsi_reg-v5.0.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/net/ipa/reg/gsi_reg-v5.0.c b/drivers/net/ipa/reg/gsi_reg-v5.0.c index 36d1e65df71b..3334d8e20ad2 100644 --- a/drivers/net/ipa/reg/gsi_reg-v5.0.c +++ b/drivers/net/ipa/reg/gsi_reg-v5.0.c @@ -156,9 +156,10 @@ REG_FIELDS(EV_CH_CMD, ev_ch_cmd, 0x00025010 + 0x12000 * GSI_EE_AP); static const u32 reg_generic_cmd_fmask[] = { [GENERIC_OPCODE] = GENMASK(4, 0), - [GENERIC_CHID] = GENMASK(9, 5), - [GENERIC_EE] = GENMASK(13, 10), - /* Bits 14-31 reserved */ + [GENERIC_CHID] = GENMASK(12, 5), + [GENERIC_EE] = GENMASK(16, 13), + /* Bits 17-23 reserved */ + [GENERIC_PARAMS] = GENMASK(31, 24), }; REG_FIELDS(GENERIC_CMD, generic_cmd, 0x00025018 + 0x12000 * GSI_EE_AP); From 56007972c0b1e783ca714d6f1f4d6e66e531d21f Mon Sep 17 00:00:00 2001 From: Alexander Koskovich Date: Fri, 3 Apr 2026 18:43:48 +0200 Subject: [PATCH 217/248] net: ipa: fix event ring index not programmed for IPA v5.0+ For IPA v5.0+, the event ring index field moved from CH_C_CNTXT_0 to CH_C_CNTXT_1. The v5.0 register definition intended to define this field in the CH_C_CNTXT_1 fmask array but used the old identifier of ERINDEX instead of CH_ERINDEX. Without a valid event ring, GSI channels could never signal transfer completions. This caused gsi_channel_trans_quiesce() to block forever in wait_for_completion(). At least for IPA v5.2 this resolves an issue seen where runtime suspend, system suspend, and remoteproc stop all hanged forever. It also meant the IPA data path was completely non functional. Fixes: faf0678ec8a0 ("net: ipa: add IPA v5.0 GSI register definitions") Signed-off-by: Alexander Koskovich Signed-off-by: Luca Weiss Reviewed-by: Simon Horman Link: https://patch.msgid.link/20260403-milos-ipa-v1-2-01e9e4e03d3e@fairphone.com Signed-off-by: Paolo Abeni --- drivers/net/ipa/reg/gsi_reg-v5.0.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ipa/reg/gsi_reg-v5.0.c b/drivers/net/ipa/reg/gsi_reg-v5.0.c index 3334d8e20ad2..6c4a7fbe4de9 100644 --- a/drivers/net/ipa/reg/gsi_reg-v5.0.c +++ b/drivers/net/ipa/reg/gsi_reg-v5.0.c @@ -30,7 +30,7 @@ REG_STRIDE_FIELDS(CH_C_CNTXT_0, ch_c_cntxt_0, static const u32 reg_ch_c_cntxt_1_fmask[] = { [CH_R_LENGTH] = GENMASK(23, 0), - [ERINDEX] = GENMASK(31, 24), + [CH_ERINDEX] = GENMASK(31, 24), }; REG_STRIDE_FIELDS(CH_C_CNTXT_1, ch_c_cntxt_1, From ebe560ea5f54134279356703e73b7f867c89db13 Mon Sep 17 00:00:00 2001 From: Alice Mikityanska Date: Fri, 3 Apr 2026 20:49:49 +0300 Subject: [PATCH 218/248] l2tp: Drop large packets with UDP encap syzbot reported a WARN on my patch series [1]. The actual issue is an overflow of 16-bit UDP length field, and it exists in the upstream code. My series added a debug WARN with an overflow check that exposed the issue, that's why syzbot tripped on my patches, rather than on upstream code. syzbot's repro: r0 = socket$pppl2tp(0x18, 0x1, 0x1) r1 = socket$inet6_udp(0xa, 0x2, 0x0) connect$inet6(r1, &(0x7f00000000c0)={0xa, 0x0, 0x0, @loopback, 0xfffffffc}, 0x1c) connect$pppl2tp(r0, &(0x7f0000000240)=@pppol2tpin6={0x18, 0x1, {0x0, r1, 0x4, 0x0, 0x0, 0x0, {0xa, 0x4e22, 0xffff, @ipv4={'\x00', '\xff\xff', @empty}}}}, 0x32) writev(r0, &(0x7f0000000080)=[{&(0x7f0000000000)="ee", 0x34000}], 0x1) It basically sends an oversized (0x34000 bytes) PPPoL2TP packet with UDP encapsulation, and l2tp_xmit_core doesn't check for overflows when it assigns the UDP length field. The value gets trimmed to 16 bites. Add an overflow check that drops oversized packets and avoids sending packets with trimmed UDP length to the wire. syzbot's stack trace (with my patch applied): len >= 65536u WARNING: ./include/linux/udp.h:38 at udp_set_len_short include/linux/udp.h:38 [inline], CPU#1: syz.0.17/5957 WARNING: ./include/linux/udp.h:38 at l2tp_xmit_core net/l2tp/l2tp_core.c:1293 [inline], CPU#1: syz.0.17/5957 WARNING: ./include/linux/udp.h:38 at l2tp_xmit_skb+0x1204/0x18d0 net/l2tp/l2tp_core.c:1327, CPU#1: syz.0.17/5957 Modules linked in: CPU: 1 UID: 0 PID: 5957 Comm: syz.0.17 Not tainted syzkaller #0 PREEMPT(full) Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.16.2-debian-1.16.2-1 04/01/2014 RIP: 0010:udp_set_len_short include/linux/udp.h:38 [inline] RIP: 0010:l2tp_xmit_core net/l2tp/l2tp_core.c:1293 [inline] RIP: 0010:l2tp_xmit_skb+0x1204/0x18d0 net/l2tp/l2tp_core.c:1327 Code: 0f 0b 90 e9 21 f9 ff ff e8 e9 05 ec f6 90 0f 0b 90 e9 8d f9 ff ff e8 db 05 ec f6 90 0f 0b 90 e9 cc f9 ff ff e8 cd 05 ec f6 90 <0f> 0b 90 e9 de fa ff ff 44 89 f1 80 e1 07 80 c1 03 38 c1 0f 8c 4f RSP: 0018:ffffc90003d67878 EFLAGS: 00010293 RAX: ffffffff8ad985e3 RBX: ffff8881a6400090 RCX: ffff8881697f0000 RDX: 0000000000000000 RSI: 0000000000034010 RDI: 000000000000ffff RBP: dffffc0000000000 R08: 0000000000000003 R09: 0000000000000004 R10: dffffc0000000000 R11: fffff520007acf00 R12: ffff8881baf20900 R13: 0000000000034010 R14: ffff8881a640008e R15: ffff8881760f7000 FS: 000055557e81f500(0000) GS:ffff8882a9467000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000200000033000 CR3: 00000001612f4000 CR4: 00000000000006f0 Call Trace: pppol2tp_sendmsg+0x40a/0x5f0 net/l2tp/l2tp_ppp.c:302 sock_sendmsg_nosec net/socket.c:727 [inline] __sock_sendmsg net/socket.c:742 [inline] sock_write_iter+0x503/0x550 net/socket.c:1195 do_iter_readv_writev+0x619/0x8c0 fs/read_write.c:-1 vfs_writev+0x33c/0x990 fs/read_write.c:1059 do_writev+0x154/0x2e0 fs/read_write.c:1105 do_syscall_x64 arch/x86/entry/syscall_64.c:63 [inline] do_syscall_64+0x14d/0xf80 arch/x86/entry/syscall_64.c:94 entry_SYSCALL_64_after_hwframe+0x77/0x7f RIP: 0033:0x7f636479c629 Code: ff c3 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 44 00 00 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 c7 c1 e8 ff ff ff f7 d8 64 89 01 48 RSP: 002b:00007ffffd4241c8 EFLAGS: 00000246 ORIG_RAX: 0000000000000014 RAX: ffffffffffffffda RBX: 00007f6364a15fa0 RCX: 00007f636479c629 RDX: 0000000000000001 RSI: 0000200000000080 RDI: 0000000000000003 RBP: 00007f6364832b39 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000000 R13: 00007f6364a15fac R14: 00007f6364a15fa0 R15: 00007f6364a15fa0 [1]: https://lore.kernel.org/all/20260226201600.222044-1-alice.kernel@fastmail.im/ Fixes: 3557baabf280 ("[L2TP]: PPP over L2TP driver core") Reported-by: syzbot+ci3edea60a44225dec@syzkaller.appspotmail.com Closes: https://lore.kernel.org/netdev/69a1dfba.050a0220.3a55be.0026.GAE@google.com/ Signed-off-by: Alice Mikityanska Link: https://patch.msgid.link/20260403174949.843941-1-alice.kernel@fastmail.im Signed-off-by: Paolo Abeni --- net/l2tp/l2tp_core.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c index c89ae52764b8..157fc23ce4e1 100644 --- a/net/l2tp/l2tp_core.c +++ b/net/l2tp/l2tp_core.c @@ -1290,6 +1290,11 @@ static int l2tp_xmit_core(struct l2tp_session *session, struct sk_buff *skb, uns uh->source = inet->inet_sport; uh->dest = inet->inet_dport; udp_len = uhlen + session->hdr_len + data_len; + if (udp_len > U16_MAX) { + kfree_skb(skb); + ret = NET_XMIT_DROP; + goto out_unlock; + } uh->len = htons(udp_len); /* Calculate UDP checksum if configured to do so */ From 1561d96f5f55c1bca9ff047ace5813f4f244eea6 Mon Sep 17 00:00:00 2001 From: Samasth Norway Ananda Date: Tue, 7 Apr 2026 14:02:47 -0700 Subject: [PATCH 219/248] gpio: tegra: fix irq_release_resources calling enable instead of disable tegra_gpio_irq_release_resources() erroneously calls tegra_gpio_enable() instead of tegra_gpio_disable(). When IRQ resources are released, the GPIO configuration bit (CNF) should be cleared to deconfigure the pin as a GPIO. Leaving it enabled wastes power and can cause unexpected behavior if the pin is later reused for an alternate function via pinctrl. Fixes: 66fecef5bde0 ("gpio: tegra: Convert to gpio_irq_chip") Signed-off-by: Samasth Norway Ananda Link: https://patch.msgid.link/20260407210247.1737938-1-samasth.norway.ananda@oracle.com Signed-off-by: Bartosz Golaszewski --- drivers/gpio/gpio-tegra.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpio/gpio-tegra.c b/drivers/gpio/gpio-tegra.c index 15a5762a82c2..b14052fe64ac 100644 --- a/drivers/gpio/gpio-tegra.c +++ b/drivers/gpio/gpio-tegra.c @@ -595,7 +595,7 @@ static void tegra_gpio_irq_release_resources(struct irq_data *d) struct tegra_gpio_info *tgi = gpiochip_get_data(chip); gpiochip_relres_irq(chip, d->hwirq); - tegra_gpio_enable(tgi, d->hwirq); + tegra_gpio_disable(tgi, d->hwirq); } static void tegra_gpio_irq_print_chip(struct irq_data *d, struct seq_file *s) From a13edf9b92fc4700b3020d7ea547a3d64dd33b63 Mon Sep 17 00:00:00 2001 From: Joonas Lahtinen Date: Thu, 9 Apr 2026 08:31:11 +0300 Subject: [PATCH 220/248] drm/i915/gem: Drop check for changed VM in EXECBUF MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Since the introduction of d4433c7600f7 ("drm/i915/gem: Use the proto-context to handle create parameters (v5)") it has not been possible for VM to change after context creation so the check will never fail. Sima's analysis: This check was added in f7ce8639f6ff ("drm/i915/gem: Split the context's obj:vma lut into its own mutex") but without any hint in the commit message as to why. In another hunk of that commit there's a hint though in __eb_add_lut: /* user racing with ctx set-vm */ This would mean that this bug was introduced in e0695db7298e ("drm/i915: Create/destroy VM (ppGTT) for use with contexts"), which allowed to change the gem_ctx->vm at runtime, opening up the race that was partially fixed in the earlier referenced commit about a year later. But it cannot be exploited anymore in anything remotely recent because with the introduction of proto-contexts we've made gem_ctx->vm invariant again, exactly to preemptively close all these potential issues. Specifically d4433c7600f7 ("drm/i915/gem: Use the proto-context to handle create parameters (v5)") is the vm specific part of the proto-context work. v3: - Include Sima's analysis and WARN_ON_ONCE v4: - Focus only on latest mainline codebase References: https://lore.kernel.org/all/20260324151741.29338-1-sosohero200@gmail.com/ Reported-by: Ville Syrjälä Cc: Linus Torvalds Cc: Simona Vetter Cc: Tvrtko Ursulin Cc: Andi Shyti Signed-off-by: Joonas Lahtinen Reviewed-by: Tvrtko Ursulin Link: https://patch.msgid.link/20260409053111.8914-1-joonas.lahtinen@linux.intel.com (cherry picked from commit f6d4afc9ec6a0bc326151b35a7a3369369180079) Signed-off-by: Joonas Lahtinen --- drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c index 942f4eed817f..65ce54b20ec2 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c @@ -896,7 +896,7 @@ static struct i915_vma *eb_lookup_vma(struct i915_execbuffer *eb, u32 handle) rcu_read_lock(); vma = radix_tree_lookup(&eb->gem_context->handles_vma, handle); - if (likely(vma && vma->vm == vm)) + if (likely(vma)) vma = i915_vma_tryget(vma); else vma = NULL; From c71ba669b570c7b3f86ec875be222ea11dacb352 Mon Sep 17 00:00:00 2001 From: Pengpeng Hou Date: Sun, 5 Apr 2026 08:40:00 +0800 Subject: [PATCH 221/248] nfc: pn533: allocate rx skb before consuming bytes pn532_receive_buf() reports the number of accepted bytes to the serdev core. The current code consumes bytes into recv_skb and may already hand a complete frame to pn533_recv_frame() before allocating a fresh receive buffer. If that alloc_skb() fails, the callback returns 0 even though it has already consumed bytes, and it leaves recv_skb as NULL for the next receive callback. That breaks the receive_buf() accounting contract and can also lead to a NULL dereference on the next skb_put_u8(). Allocate the receive skb lazily before consuming the next byte instead. If allocation fails, return the number of bytes already accepted. Fixes: c656aa4c27b1 ("nfc: pn533: add UART phy driver") Cc: stable@vger.kernel.org Signed-off-by: Pengpeng Hou Link: https://patch.msgid.link/20260405094003.3-pn533-v2-pengpeng@iscas.ac.cn Signed-off-by: Paolo Abeni --- drivers/nfc/pn533/uart.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/drivers/nfc/pn533/uart.c b/drivers/nfc/pn533/uart.c index 1b82b7b2a5fa..e0d67cd2ac9b 100644 --- a/drivers/nfc/pn533/uart.c +++ b/drivers/nfc/pn533/uart.c @@ -211,6 +211,13 @@ static size_t pn532_receive_buf(struct serdev_device *serdev, timer_delete(&dev->cmd_timeout); for (i = 0; i < count; i++) { + if (!dev->recv_skb) { + dev->recv_skb = alloc_skb(PN532_UART_SKB_BUFF_LEN, + GFP_KERNEL); + if (!dev->recv_skb) + return i; + } + if (unlikely(!skb_tailroom(dev->recv_skb))) skb_trim(dev->recv_skb, 0); @@ -219,9 +226,7 @@ static size_t pn532_receive_buf(struct serdev_device *serdev, continue; pn533_recv_frame(dev->priv, dev->recv_skb, 0); - dev->recv_skb = alloc_skb(PN532_UART_SKB_BUFF_LEN, GFP_KERNEL); - if (!dev->recv_skb) - return 0; + dev->recv_skb = NULL; } return i; From 7e0548525abd2bff9694e016b6a469ccd2d5a053 Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Wed, 8 Apr 2026 15:40:57 +0100 Subject: [PATCH 222/248] iommu: Ensure .iotlb_sync is called correctly Many drivers have no reason to use the iotlb_gather mechanism, but do still depend on .iotlb_sync being called to properly complete an unmap. Since the core code is now relying on the gather to detect when there is legitimately something to sync, it should also take care of encoding a successful unmap when the driver does not touch the gather itself. Fixes: 90c5def10bea ("iommu: Do not call drivers for empty gathers") Reported-by: Jon Hunter Closes: https://lore.kernel.org/r/8800a38b-8515-4bbe-af15-0dae81274bf7@nvidia.com Signed-off-by: Robin Murphy Tested-by: Jon Hunter Reviewed-by: Jason Gunthorpe Tested-by: Russell King (Oracle) Signed-off-by: Will Deacon --- drivers/iommu/iommu.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index 50718ab810a4..ee83850c7060 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -2717,6 +2717,12 @@ static size_t __iommu_unmap(struct iommu_domain *domain, pr_debug("unmapped: iova 0x%lx size 0x%zx\n", iova, unmapped_page); + /* + * If the driver itself isn't using the gather, make sure + * it looks non-empty so iotlb_sync will still be called. + */ + if (iotlb_gather->start >= iotlb_gather->end) + iommu_iotlb_gather_add_range(iotlb_gather, iova, size); iova += unmapped_page; unmapped += unmapped_page; From e3b2cf6e5dba416a03152f299d99982dfe1e861d Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Wed, 1 Apr 2026 12:15:58 +0200 Subject: [PATCH 223/248] kernfs: pass struct ns_common instead of const void * for namespace tags kernfs has historically used const void * to pass around namespace tags used for directory-level namespace filtering. The only current user of this is sysfs network namespace tagging where struct net pointers are cast to void *. Replace all const void * namespace parameters with const struct ns_common * throughout the kernfs, sysfs, and kobject namespace layers. This includes the kobj_ns_type_operations callbacks, kobject_namespace(), and all sysfs/kernfs APIs that accept or return namespace tags. Passing struct ns_common is needed because various codepaths require access to the underlying namespace. A struct ns_common can always be converted back to the concrete namespace type (e.g., struct net) via container_of() or to_ns_common() in the reverse direction. This is a preparatory change for switching to ns_id-based directory iteration to prevent a KASLR pointer leak through the current use of raw namespace pointers as hash seeds and comparison keys. Signed-off-by: Christian Brauner --- drivers/base/class.c | 4 +-- drivers/base/core.c | 7 ++-- drivers/infiniband/core/device.c | 5 +-- drivers/infiniband/ulp/srp/ib_srp.c | 7 ++-- drivers/net/bonding/bond_sysfs.c | 4 +-- drivers/net/ipvlan/ipvtap.c | 5 +-- drivers/net/macvtap.c | 5 +-- fs/kernfs/dir.c | 30 +++++++++-------- fs/kernfs/file.c | 2 +- fs/kernfs/kernfs-internal.h | 2 +- fs/kernfs/mount.c | 2 +- fs/nfs/sysfs.c | 16 +++++---- fs/sysfs/dir.c | 6 ++-- fs/sysfs/file.c | 8 ++--- fs/sysfs/mount.c | 10 +++--- fs/sysfs/symlink.c | 7 ++-- fs/sysfs/sysfs.h | 4 +-- include/linux/device/class.h | 6 ++-- include/linux/kernfs.h | 40 ++++++++++++++--------- include/linux/kobject.h | 4 +-- include/linux/kobject_ns.h | 13 ++++---- include/linux/netdevice.h | 4 +-- include/linux/sysfs.h | 24 +++++++------- include/net/net_namespace.h | 8 ++--- lib/kobject.c | 8 ++--- lib/kobject_uevent.c | 13 +++++--- net/core/net-sysfs.c | 50 ++++++++++++++--------------- net/core/net_namespace.c | 8 ++--- net/sunrpc/sysfs.c | 17 ++++++---- net/wireless/sysfs.c | 4 +-- 30 files changed, 175 insertions(+), 148 deletions(-) diff --git a/drivers/base/class.c b/drivers/base/class.c index 827fc7adacc7..ffab0a9c8ccb 100644 --- a/drivers/base/class.c +++ b/drivers/base/class.c @@ -127,7 +127,7 @@ static const struct kobj_type class_ktype = { }; int class_create_file_ns(const struct class *cls, const struct class_attribute *attr, - const void *ns) + const struct ns_common *ns) { struct subsys_private *sp = class_to_subsys(cls); int error; @@ -143,7 +143,7 @@ int class_create_file_ns(const struct class *cls, const struct class_attribute * EXPORT_SYMBOL_GPL(class_create_file_ns); void class_remove_file_ns(const struct class *cls, const struct class_attribute *attr, - const void *ns) + const struct ns_common *ns) { struct subsys_private *sp = class_to_subsys(cls); diff --git a/drivers/base/core.c b/drivers/base/core.c index 09b98f02f559..0613de0fbe44 100644 --- a/drivers/base/core.c +++ b/drivers/base/core.c @@ -2570,15 +2570,14 @@ static void device_release(struct kobject *kobj) kfree(p); } -static const void *device_namespace(const struct kobject *kobj) +static const struct ns_common *device_namespace(const struct kobject *kobj) { const struct device *dev = kobj_to_dev(kobj); - const void *ns = NULL; if (dev->class && dev->class->namespace) - ns = dev->class->namespace(dev); + return dev->class->namespace(dev); - return ns; + return NULL; } static void device_get_ownership(const struct kobject *kobj, kuid_t *uid, kgid_t *gid) diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c index 558b73940d66..7945614be36d 100644 --- a/drivers/infiniband/core/device.c +++ b/drivers/infiniband/core/device.c @@ -509,12 +509,13 @@ static int ib_device_uevent(const struct device *device, return 0; } -static const void *net_namespace(const struct device *d) +static const struct ns_common *net_namespace(const struct device *d) { const struct ib_core_device *coredev = container_of(d, struct ib_core_device, dev); + struct net *net = read_pnet(&coredev->rdma_net); - return read_pnet(&coredev->rdma_net); + return net ? to_ns_common(net) : NULL; } static struct class ib_class = { diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c index 30339dcabb4d..b58868e1cf11 100644 --- a/drivers/infiniband/ulp/srp/ib_srp.c +++ b/drivers/infiniband/ulp/srp/ib_srp.c @@ -43,6 +43,7 @@ #include #include #include +#include #include #include @@ -1048,7 +1049,7 @@ static void srp_remove_target(struct srp_target_port *target) scsi_remove_host(target->scsi_host); srp_stop_rport_timers(target->rport); srp_disconnect_target(target); - kobj_ns_drop(KOBJ_NS_TYPE_NET, target->net); + kobj_ns_drop(KOBJ_NS_TYPE_NET, to_ns_common(target->net)); for (i = 0; i < target->ch_count; i++) { ch = &target->ch[i]; srp_free_ch_ib(target, ch); @@ -3713,7 +3714,7 @@ static ssize_t add_target_store(struct device *dev, target = host_to_target(target_host); - target->net = kobj_ns_grab_current(KOBJ_NS_TYPE_NET); + target->net = to_net_ns(kobj_ns_grab_current(KOBJ_NS_TYPE_NET)); target->io_class = SRP_REV16A_IB_IO_CLASS; target->scsi_host = target_host; target->srp_host = host; @@ -3905,7 +3906,7 @@ static ssize_t add_target_store(struct device *dev, * earlier in this function. */ if (target->state != SRP_TARGET_REMOVED) - kobj_ns_drop(KOBJ_NS_TYPE_NET, target->net); + kobj_ns_drop(KOBJ_NS_TYPE_NET, to_ns_common(target->net)); scsi_host_put(target->scsi_host); } diff --git a/drivers/net/bonding/bond_sysfs.c b/drivers/net/bonding/bond_sysfs.c index 9a75ad3181ab..eaba44c76a5e 100644 --- a/drivers/net/bonding/bond_sysfs.c +++ b/drivers/net/bonding/bond_sysfs.c @@ -808,7 +808,7 @@ int __net_init bond_create_sysfs(struct bond_net *bn) sysfs_attr_init(&bn->class_attr_bonding_masters.attr); ret = netdev_class_create_file_ns(&bn->class_attr_bonding_masters, - bn->net); + to_ns_common(bn->net)); /* Permit multiple loads of the module by ignoring failures to * create the bonding_masters sysfs file. Bonding devices * created by second or subsequent loads of the module will @@ -835,7 +835,7 @@ int __net_init bond_create_sysfs(struct bond_net *bn) /* Remove /sys/class/net/bonding_masters. */ void __net_exit bond_destroy_sysfs(struct bond_net *bn) { - netdev_class_remove_file_ns(&bn->class_attr_bonding_masters, bn->net); + netdev_class_remove_file_ns(&bn->class_attr_bonding_masters, to_ns_common(bn->net)); } /* Initialize sysfs for each bond. This sets up and registers diff --git a/drivers/net/ipvlan/ipvtap.c b/drivers/net/ipvlan/ipvtap.c index edd13916831a..2d6bbddd1edd 100644 --- a/drivers/net/ipvlan/ipvtap.c +++ b/drivers/net/ipvlan/ipvtap.c @@ -30,10 +30,11 @@ static dev_t ipvtap_major; static struct cdev ipvtap_cdev; -static const void *ipvtap_net_namespace(const struct device *d) +static const struct ns_common *ipvtap_net_namespace(const struct device *d) { const struct net_device *dev = to_net_dev(d->parent); - return dev_net(dev); + + return to_ns_common(dev_net(dev)); } static struct class ipvtap_class = { diff --git a/drivers/net/macvtap.c b/drivers/net/macvtap.c index b391a0f740a3..cc975dfb7380 100644 --- a/drivers/net/macvtap.c +++ b/drivers/net/macvtap.c @@ -35,10 +35,11 @@ struct macvtap_dev { */ static dev_t macvtap_major; -static const void *macvtap_net_namespace(const struct device *d) +static const struct ns_common *macvtap_net_namespace(const struct device *d) { const struct net_device *dev = to_net_dev(d->parent); - return dev_net(dev); + + return to_ns_common(dev_net(dev)); } static struct class macvtap_class = { diff --git a/fs/kernfs/dir.c b/fs/kernfs/dir.c index 8d40c4b1db9f..be262145ae08 100644 --- a/fs/kernfs/dir.c +++ b/fs/kernfs/dir.c @@ -313,7 +313,8 @@ struct kernfs_node *kernfs_get_parent(struct kernfs_node *kn) * * Return: 31-bit hash of ns + name (so it fits in an off_t) */ -static unsigned int kernfs_name_hash(const char *name, const void *ns) +static unsigned int kernfs_name_hash(const char *name, + const struct ns_common *ns) { unsigned long hash = init_name_hash(ns); unsigned int len = strlen(name); @@ -330,7 +331,7 @@ static unsigned int kernfs_name_hash(const char *name, const void *ns) } static int kernfs_name_compare(unsigned int hash, const char *name, - const void *ns, const struct kernfs_node *kn) + const struct ns_common *ns, const struct kernfs_node *kn) { if (hash < kn->hash) return -1; @@ -856,7 +857,7 @@ int kernfs_add_one(struct kernfs_node *kn) */ static struct kernfs_node *kernfs_find_ns(struct kernfs_node *parent, const unsigned char *name, - const void *ns) + const struct ns_common *ns) { struct rb_node *node = parent->dir.children.rb_node; bool has_ns = kernfs_ns_enabled(parent); @@ -889,7 +890,7 @@ static struct kernfs_node *kernfs_find_ns(struct kernfs_node *parent, static struct kernfs_node *kernfs_walk_ns(struct kernfs_node *parent, const unsigned char *path, - const void *ns) + const struct ns_common *ns) { ssize_t len; char *p, *name; @@ -930,7 +931,8 @@ static struct kernfs_node *kernfs_walk_ns(struct kernfs_node *parent, * Return: pointer to the found kernfs_node on success, %NULL on failure. */ struct kernfs_node *kernfs_find_and_get_ns(struct kernfs_node *parent, - const char *name, const void *ns) + const char *name, + const struct ns_common *ns) { struct kernfs_node *kn; struct kernfs_root *root = kernfs_root(parent); @@ -956,7 +958,8 @@ EXPORT_SYMBOL_GPL(kernfs_find_and_get_ns); * Return: pointer to the found kernfs_node on success, %NULL on failure. */ struct kernfs_node *kernfs_walk_and_get_ns(struct kernfs_node *parent, - const char *path, const void *ns) + const char *path, + const struct ns_common *ns) { struct kernfs_node *kn; struct kernfs_root *root = kernfs_root(parent); @@ -1079,7 +1082,8 @@ struct kernfs_node *kernfs_root_to_node(struct kernfs_root *root) struct kernfs_node *kernfs_create_dir_ns(struct kernfs_node *parent, const char *name, umode_t mode, kuid_t uid, kgid_t gid, - void *priv, const void *ns) + void *priv, + const struct ns_common *ns) { struct kernfs_node *kn; int rc; @@ -1221,7 +1225,7 @@ static struct dentry *kernfs_iop_lookup(struct inode *dir, struct kernfs_node *kn; struct kernfs_root *root; struct inode *inode = NULL; - const void *ns = NULL; + const struct ns_common *ns = NULL; root = kernfs_root(parent); down_read(&root->kernfs_rwsem); @@ -1702,7 +1706,7 @@ bool kernfs_remove_self(struct kernfs_node *kn) * Return: %0 on success, -ENOENT if such entry doesn't exist. */ int kernfs_remove_by_name_ns(struct kernfs_node *parent, const char *name, - const void *ns) + const struct ns_common *ns) { struct kernfs_node *kn; struct kernfs_root *root; @@ -1741,7 +1745,7 @@ int kernfs_remove_by_name_ns(struct kernfs_node *parent, const char *name, * Return: %0 on success, -errno on failure. */ int kernfs_rename_ns(struct kernfs_node *kn, struct kernfs_node *new_parent, - const char *new_name, const void *new_ns) + const char *new_name, const struct ns_common *new_ns) { struct kernfs_node *old_parent; struct kernfs_root *root; @@ -1832,7 +1836,7 @@ static int kernfs_dir_fop_release(struct inode *inode, struct file *filp) return 0; } -static struct kernfs_node *kernfs_dir_pos(const void *ns, +static struct kernfs_node *kernfs_dir_pos(const struct ns_common *ns, struct kernfs_node *parent, loff_t hash, struct kernfs_node *pos) { if (pos) { @@ -1867,7 +1871,7 @@ static struct kernfs_node *kernfs_dir_pos(const void *ns, return pos; } -static struct kernfs_node *kernfs_dir_next_pos(const void *ns, +static struct kernfs_node *kernfs_dir_next_pos(const struct ns_common *ns, struct kernfs_node *parent, ino_t ino, struct kernfs_node *pos) { pos = kernfs_dir_pos(ns, parent, ino, pos); @@ -1889,7 +1893,7 @@ static int kernfs_fop_readdir(struct file *file, struct dir_context *ctx) struct kernfs_node *parent = kernfs_dentry_node(dentry); struct kernfs_node *pos = file->private_data; struct kernfs_root *root; - const void *ns = NULL; + const struct ns_common *ns = NULL; if (!dir_emit_dots(file, ctx)) return 0; diff --git a/fs/kernfs/file.c b/fs/kernfs/file.c index e32406d62c0d..1163aa769738 100644 --- a/fs/kernfs/file.c +++ b/fs/kernfs/file.c @@ -1045,7 +1045,7 @@ struct kernfs_node *__kernfs_create_file(struct kernfs_node *parent, umode_t mode, kuid_t uid, kgid_t gid, loff_t size, const struct kernfs_ops *ops, - void *priv, const void *ns, + void *priv, const struct ns_common *ns, struct lock_class_key *key) { struct kernfs_node *kn; diff --git a/fs/kernfs/kernfs-internal.h b/fs/kernfs/kernfs-internal.h index 6061b6f70d2a..b1fd9622a5e3 100644 --- a/fs/kernfs/kernfs-internal.h +++ b/fs/kernfs/kernfs-internal.h @@ -97,7 +97,7 @@ struct kernfs_super_info { * instance. If multiple tags become necessary, make the following * an array and compare kernfs_node tag against every entry. */ - const void *ns; + const struct ns_common *ns; /* anchored at kernfs_root->supers, protected by kernfs_rwsem */ struct list_head node; diff --git a/fs/kernfs/mount.c b/fs/kernfs/mount.c index 048f00b73b71..6e3217b6e481 100644 --- a/fs/kernfs/mount.c +++ b/fs/kernfs/mount.c @@ -345,7 +345,7 @@ static int kernfs_set_super(struct super_block *sb, struct fs_context *fc) * * Return: the namespace tag associated with kernfs super_block @sb. */ -const void *kernfs_super_ns(struct super_block *sb) +const struct ns_common *kernfs_super_ns(struct super_block *sb) { struct kernfs_super_info *info = kernfs_info(sb); diff --git a/fs/nfs/sysfs.c b/fs/nfs/sysfs.c index 7d8921f524a6..1da4f707f9ef 100644 --- a/fs/nfs/sysfs.c +++ b/fs/nfs/sysfs.c @@ -11,6 +11,7 @@ #include #include #include +#include #include #include @@ -127,9 +128,10 @@ static void nfs_netns_client_release(struct kobject *kobj) kfree(rcu_dereference_raw(c->identifier)); } -static const void *nfs_netns_client_namespace(const struct kobject *kobj) +static const struct ns_common *nfs_netns_client_namespace(const struct kobject *kobj) { - return container_of(kobj, struct nfs_netns_client, kobject)->net; + return to_ns_common(container_of(kobj, struct nfs_netns_client, + kobject)->net); } static struct kobj_attribute nfs_netns_client_id = __ATTR(identifier, @@ -156,9 +158,10 @@ static void nfs_netns_object_release(struct kobject *kobj) kfree(c); } -static const void *nfs_netns_namespace(const struct kobject *kobj) +static const struct ns_common *nfs_netns_namespace(const struct kobject *kobj) { - return container_of(kobj, struct nfs_netns_client, nfs_net_kobj)->net; + return to_ns_common(container_of(kobj, struct nfs_netns_client, + nfs_net_kobj)->net); } static struct kobj_type nfs_netns_object_type = { @@ -350,9 +353,10 @@ static void nfs_sysfs_sb_release(struct kobject *kobj) /* no-op: why? see lib/kobject.c kobject_cleanup() */ } -static const void *nfs_netns_server_namespace(const struct kobject *kobj) +static const struct ns_common *nfs_netns_server_namespace(const struct kobject *kobj) { - return container_of(kobj, struct nfs_server, kobj)->nfs_client->cl_net; + return to_ns_common(container_of(kobj, struct nfs_server, + kobj)->nfs_client->cl_net); } static struct kobj_type nfs_sb_ktype = { diff --git a/fs/sysfs/dir.c b/fs/sysfs/dir.c index 94e12efd92f2..ffdcd4153c58 100644 --- a/fs/sysfs/dir.c +++ b/fs/sysfs/dir.c @@ -37,7 +37,7 @@ void sysfs_warn_dup(struct kernfs_node *parent, const char *name) * @kobj: object we're creating directory for * @ns: the namespace tag to use */ -int sysfs_create_dir_ns(struct kobject *kobj, const void *ns) +int sysfs_create_dir_ns(struct kobject *kobj, const struct ns_common *ns) { struct kernfs_node *parent, *kn; kuid_t uid; @@ -103,7 +103,7 @@ void sysfs_remove_dir(struct kobject *kobj) } int sysfs_rename_dir_ns(struct kobject *kobj, const char *new_name, - const void *new_ns) + const struct ns_common *new_ns) { struct kernfs_node *parent; int ret; @@ -115,7 +115,7 @@ int sysfs_rename_dir_ns(struct kobject *kobj, const char *new_name, } int sysfs_move_dir_ns(struct kobject *kobj, struct kobject *new_parent_kobj, - const void *new_ns) + const struct ns_common *new_ns) { struct kernfs_node *kn = kobj->sd; struct kernfs_node *new_parent; diff --git a/fs/sysfs/file.c b/fs/sysfs/file.c index a8176c875f55..5709cede1d75 100644 --- a/fs/sysfs/file.c +++ b/fs/sysfs/file.c @@ -272,7 +272,7 @@ static const struct kernfs_ops sysfs_bin_kfops_mmap = { int sysfs_add_file_mode_ns(struct kernfs_node *parent, const struct attribute *attr, umode_t mode, kuid_t uid, - kgid_t gid, const void *ns) + kgid_t gid, const struct ns_common *ns) { struct kobject *kobj = parent->priv; const struct sysfs_ops *sysfs_ops = kobj->ktype->sysfs_ops; @@ -322,7 +322,7 @@ int sysfs_add_file_mode_ns(struct kernfs_node *parent, int sysfs_add_bin_file_mode_ns(struct kernfs_node *parent, const struct bin_attribute *battr, umode_t mode, size_t size, - kuid_t uid, kgid_t gid, const void *ns) + kuid_t uid, kgid_t gid, const struct ns_common *ns) { const struct attribute *attr = &battr->attr; struct lock_class_key *key = NULL; @@ -362,7 +362,7 @@ int sysfs_add_bin_file_mode_ns(struct kernfs_node *parent, * @ns: namespace the new file should belong to */ int sysfs_create_file_ns(struct kobject *kobj, const struct attribute *attr, - const void *ns) + const struct ns_common *ns) { kuid_t uid; kgid_t gid; @@ -505,7 +505,7 @@ EXPORT_SYMBOL_GPL(sysfs_unbreak_active_protection); * Hash the attribute name and namespace tag and kill the victim. */ void sysfs_remove_file_ns(struct kobject *kobj, const struct attribute *attr, - const void *ns) + const struct ns_common *ns) { struct kernfs_node *parent = kobj->sd; diff --git a/fs/sysfs/mount.c b/fs/sysfs/mount.c index e65c60158a04..b199e8ff79b1 100644 --- a/fs/sysfs/mount.c +++ b/fs/sysfs/mount.c @@ -55,7 +55,7 @@ static const struct fs_context_operations sysfs_fs_context_ops = { static int sysfs_init_fs_context(struct fs_context *fc) { struct kernfs_fs_context *kfc; - struct net *netns; + struct ns_common *ns; if (!(fc->sb_flags & SB_KERNMOUNT)) { if (!kobj_ns_current_may_mount(KOBJ_NS_TYPE_NET)) @@ -66,12 +66,14 @@ static int sysfs_init_fs_context(struct fs_context *fc) if (!kfc) return -ENOMEM; - kfc->ns_tag = netns = kobj_ns_grab_current(KOBJ_NS_TYPE_NET); + kfc->ns_tag = ns = kobj_ns_grab_current(KOBJ_NS_TYPE_NET); kfc->root = sysfs_root; kfc->magic = SYSFS_MAGIC; fc->fs_private = kfc; fc->ops = &sysfs_fs_context_ops; - if (netns) { + if (ns) { + struct net *netns = to_net_ns(ns); + put_user_ns(fc->user_ns); fc->user_ns = get_user_ns(netns->user_ns); } @@ -81,7 +83,7 @@ static int sysfs_init_fs_context(struct fs_context *fc) static void sysfs_kill_sb(struct super_block *sb) { - void *ns = (void *)kernfs_super_ns(sb); + struct ns_common *ns = (struct ns_common *)kernfs_super_ns(sb); kernfs_kill_sb(sb); kobj_ns_drop(KOBJ_NS_TYPE_NET, ns); diff --git a/fs/sysfs/symlink.c b/fs/sysfs/symlink.c index 5603530a1a52..5f9c05fb1394 100644 --- a/fs/sysfs/symlink.c +++ b/fs/sysfs/symlink.c @@ -121,7 +121,7 @@ EXPORT_SYMBOL_GPL(sysfs_create_link_nowarn); void sysfs_delete_link(struct kobject *kobj, struct kobject *targ, const char *name) { - const void *ns = NULL; + const struct ns_common *ns = NULL; /* * We don't own @target and it may be removed at any time. @@ -164,10 +164,11 @@ EXPORT_SYMBOL_GPL(sysfs_remove_link); * A helper function for the common rename symlink idiom. */ int sysfs_rename_link_ns(struct kobject *kobj, struct kobject *targ, - const char *old, const char *new, const void *new_ns) + const char *old, const char *new, + const struct ns_common *new_ns) { struct kernfs_node *parent, *kn = NULL; - const void *old_ns = NULL; + const struct ns_common *old_ns = NULL; int result; if (!kobj) diff --git a/fs/sysfs/sysfs.h b/fs/sysfs/sysfs.h index 8e012f25e1c0..f4583dcafcd1 100644 --- a/fs/sysfs/sysfs.h +++ b/fs/sysfs/sysfs.h @@ -29,10 +29,10 @@ void sysfs_warn_dup(struct kernfs_node *parent, const char *name); */ int sysfs_add_file_mode_ns(struct kernfs_node *parent, const struct attribute *attr, umode_t amode, kuid_t uid, - kgid_t gid, const void *ns); + kgid_t gid, const struct ns_common *ns); int sysfs_add_bin_file_mode_ns(struct kernfs_node *parent, const struct bin_attribute *battr, umode_t mode, size_t size, - kuid_t uid, kgid_t gid, const void *ns); + kuid_t uid, kgid_t gid, const struct ns_common *ns); /* * symlink.c diff --git a/include/linux/device/class.h b/include/linux/device/class.h index 65880e60c720..021da0d61796 100644 --- a/include/linux/device/class.h +++ b/include/linux/device/class.h @@ -62,7 +62,7 @@ struct class { int (*shutdown_pre)(struct device *dev); const struct kobj_ns_type_operations *ns_type; - const void *(*namespace)(const struct device *dev); + const struct ns_common *(*namespace)(const struct device *dev); void (*get_ownership)(const struct device *dev, kuid_t *uid, kgid_t *gid); @@ -180,9 +180,9 @@ struct class_attribute { struct class_attribute class_attr_##_name = __ATTR_WO(_name) int __must_check class_create_file_ns(const struct class *class, const struct class_attribute *attr, - const void *ns); + const struct ns_common *ns); void class_remove_file_ns(const struct class *class, const struct class_attribute *attr, - const void *ns); + const struct ns_common *ns); static inline int __must_check class_create_file(const struct class *class, const struct class_attribute *attr) diff --git a/include/linux/kernfs.h b/include/linux/kernfs.h index b5a5f32fdfd1..4f0ab88a1b31 100644 --- a/include/linux/kernfs.h +++ b/include/linux/kernfs.h @@ -23,6 +23,7 @@ struct file; struct dentry; struct iattr; +struct ns_common; struct seq_file; struct vm_area_struct; struct vm_operations_struct; @@ -209,7 +210,7 @@ struct kernfs_node { struct rb_node rb; - const void *ns; /* namespace tag */ + const struct ns_common *ns; /* namespace tag */ unsigned int hash; /* ns + name hash */ unsigned short flags; umode_t mode; @@ -331,7 +332,7 @@ struct kernfs_ops { */ struct kernfs_fs_context { struct kernfs_root *root; /* Root of the hierarchy being mounted */ - void *ns_tag; /* Namespace tag of the mount (or NULL) */ + struct ns_common *ns_tag; /* Namespace tag of the mount (or NULL) */ unsigned long magic; /* File system specific magic number */ /* The following are set/used by kernfs_mount() */ @@ -406,9 +407,11 @@ void pr_cont_kernfs_name(struct kernfs_node *kn); void pr_cont_kernfs_path(struct kernfs_node *kn); struct kernfs_node *kernfs_get_parent(struct kernfs_node *kn); struct kernfs_node *kernfs_find_and_get_ns(struct kernfs_node *parent, - const char *name, const void *ns); + const char *name, + const struct ns_common *ns); struct kernfs_node *kernfs_walk_and_get_ns(struct kernfs_node *parent, - const char *path, const void *ns); + const char *path, + const struct ns_common *ns); void kernfs_get(struct kernfs_node *kn); void kernfs_put(struct kernfs_node *kn); @@ -426,7 +429,8 @@ unsigned int kernfs_root_flags(struct kernfs_node *kn); struct kernfs_node *kernfs_create_dir_ns(struct kernfs_node *parent, const char *name, umode_t mode, kuid_t uid, kgid_t gid, - void *priv, const void *ns); + void *priv, + const struct ns_common *ns); struct kernfs_node *kernfs_create_empty_dir(struct kernfs_node *parent, const char *name); struct kernfs_node *__kernfs_create_file(struct kernfs_node *parent, @@ -434,7 +438,8 @@ struct kernfs_node *__kernfs_create_file(struct kernfs_node *parent, kuid_t uid, kgid_t gid, loff_t size, const struct kernfs_ops *ops, - void *priv, const void *ns, + void *priv, + const struct ns_common *ns, struct lock_class_key *key); struct kernfs_node *kernfs_create_link(struct kernfs_node *parent, const char *name, @@ -446,9 +451,9 @@ void kernfs_break_active_protection(struct kernfs_node *kn); void kernfs_unbreak_active_protection(struct kernfs_node *kn); bool kernfs_remove_self(struct kernfs_node *kn); int kernfs_remove_by_name_ns(struct kernfs_node *parent, const char *name, - const void *ns); + const struct ns_common *ns); int kernfs_rename_ns(struct kernfs_node *kn, struct kernfs_node *new_parent, - const char *new_name, const void *new_ns); + const char *new_name, const struct ns_common *new_ns); int kernfs_setattr(struct kernfs_node *kn, const struct iattr *iattr); __poll_t kernfs_generic_poll(struct kernfs_open_file *of, struct poll_table_struct *pt); @@ -459,7 +464,7 @@ int kernfs_xattr_get(struct kernfs_node *kn, const char *name, int kernfs_xattr_set(struct kernfs_node *kn, const char *name, const void *value, size_t size, int flags); -const void *kernfs_super_ns(struct super_block *sb); +const struct ns_common *kernfs_super_ns(struct super_block *sb); int kernfs_get_tree(struct fs_context *fc); void kernfs_free_fs_context(struct fs_context *fc); void kernfs_kill_sb(struct super_block *sb); @@ -494,11 +499,11 @@ static inline struct kernfs_node *kernfs_get_parent(struct kernfs_node *kn) static inline struct kernfs_node * kernfs_find_and_get_ns(struct kernfs_node *parent, const char *name, - const void *ns) + const struct ns_common *ns) { return NULL; } static inline struct kernfs_node * kernfs_walk_and_get_ns(struct kernfs_node *parent, const char *path, - const void *ns) + const struct ns_common *ns) { return NULL; } static inline void kernfs_get(struct kernfs_node *kn) { } @@ -526,14 +531,15 @@ static inline unsigned int kernfs_root_flags(struct kernfs_node *kn) static inline struct kernfs_node * kernfs_create_dir_ns(struct kernfs_node *parent, const char *name, umode_t mode, kuid_t uid, kgid_t gid, - void *priv, const void *ns) + void *priv, const struct ns_common *ns) { return ERR_PTR(-ENOSYS); } static inline struct kernfs_node * __kernfs_create_file(struct kernfs_node *parent, const char *name, umode_t mode, kuid_t uid, kgid_t gid, loff_t size, const struct kernfs_ops *ops, - void *priv, const void *ns, struct lock_class_key *key) + void *priv, const struct ns_common *ns, + struct lock_class_key *key) { return ERR_PTR(-ENOSYS); } static inline struct kernfs_node * @@ -549,12 +555,14 @@ static inline bool kernfs_remove_self(struct kernfs_node *kn) { return false; } static inline int kernfs_remove_by_name_ns(struct kernfs_node *kn, - const char *name, const void *ns) + const char *name, + const struct ns_common *ns) { return -ENOSYS; } static inline int kernfs_rename_ns(struct kernfs_node *kn, struct kernfs_node *new_parent, - const char *new_name, const void *new_ns) + const char *new_name, + const struct ns_common *new_ns) { return -ENOSYS; } static inline int kernfs_setattr(struct kernfs_node *kn, @@ -575,7 +583,7 @@ static inline int kernfs_xattr_set(struct kernfs_node *kn, const char *name, const void *value, size_t size, int flags) { return -ENOSYS; } -static inline const void *kernfs_super_ns(struct super_block *sb) +static inline const struct ns_common *kernfs_super_ns(struct super_block *sb) { return NULL; } static inline int kernfs_get_tree(struct fs_context *fc) diff --git a/include/linux/kobject.h b/include/linux/kobject.h index c8219505a79f..bcb5d4e32001 100644 --- a/include/linux/kobject.h +++ b/include/linux/kobject.h @@ -109,7 +109,7 @@ struct kobject *kobject_get(struct kobject *kobj); struct kobject * __must_check kobject_get_unless_zero(struct kobject *kobj); void kobject_put(struct kobject *kobj); -const void *kobject_namespace(const struct kobject *kobj); +const struct ns_common *kobject_namespace(const struct kobject *kobj); void kobject_get_ownership(const struct kobject *kobj, kuid_t *uid, kgid_t *gid); char *kobject_get_path(const struct kobject *kobj, gfp_t flag); @@ -118,7 +118,7 @@ struct kobj_type { const struct sysfs_ops *sysfs_ops; const struct attribute_group **default_groups; const struct kobj_ns_type_operations *(*child_ns_type)(const struct kobject *kobj); - const void *(*namespace)(const struct kobject *kobj); + const struct ns_common *(*namespace)(const struct kobject *kobj); void (*get_ownership)(const struct kobject *kobj, kuid_t *uid, kgid_t *gid); }; diff --git a/include/linux/kobject_ns.h b/include/linux/kobject_ns.h index 150fe2ae1b6b..4f0990e09b93 100644 --- a/include/linux/kobject_ns.h +++ b/include/linux/kobject_ns.h @@ -16,6 +16,7 @@ #ifndef _LINUX_KOBJECT_NS_H #define _LINUX_KOBJECT_NS_H +struct ns_common; struct sock; struct kobject; @@ -39,10 +40,10 @@ enum kobj_ns_type { struct kobj_ns_type_operations { enum kobj_ns_type type; bool (*current_may_mount)(void); - void *(*grab_current_ns)(void); - const void *(*netlink_ns)(struct sock *sk); - const void *(*initial_ns)(void); - void (*drop_ns)(void *); + struct ns_common *(*grab_current_ns)(void); + const struct ns_common *(*netlink_ns)(struct sock *sk); + const struct ns_common *(*initial_ns)(void); + void (*drop_ns)(struct ns_common *); }; int kobj_ns_type_register(const struct kobj_ns_type_operations *ops); @@ -51,7 +52,7 @@ const struct kobj_ns_type_operations *kobj_child_ns_ops(const struct kobject *pa const struct kobj_ns_type_operations *kobj_ns_ops(const struct kobject *kobj); bool kobj_ns_current_may_mount(enum kobj_ns_type type); -void *kobj_ns_grab_current(enum kobj_ns_type type); -void kobj_ns_drop(enum kobj_ns_type type, void *ns); +struct ns_common *kobj_ns_grab_current(enum kobj_ns_type type); +void kobj_ns_drop(enum kobj_ns_type type, struct ns_common *ns); #endif /* _LINUX_KOBJECT_NS_H */ diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 7ca01eb3f7d2..85c20bdd36fb 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -5339,9 +5339,9 @@ static inline netdev_tx_t netdev_start_xmit(struct sk_buff *skb, struct net_devi } int netdev_class_create_file_ns(const struct class_attribute *class_attr, - const void *ns); + const struct ns_common *ns); void netdev_class_remove_file_ns(const struct class_attribute *class_attr, - const void *ns); + const struct ns_common *ns); extern const struct kobj_ns_type_operations net_ns_type_operations; diff --git a/include/linux/sysfs.h b/include/linux/sysfs.h index 99b775f3ff46..468259fb6049 100644 --- a/include/linux/sysfs.h +++ b/include/linux/sysfs.h @@ -396,13 +396,13 @@ struct sysfs_ops { #ifdef CONFIG_SYSFS -int __must_check sysfs_create_dir_ns(struct kobject *kobj, const void *ns); +int __must_check sysfs_create_dir_ns(struct kobject *kobj, const struct ns_common *ns); void sysfs_remove_dir(struct kobject *kobj); int __must_check sysfs_rename_dir_ns(struct kobject *kobj, const char *new_name, - const void *new_ns); + const struct ns_common *new_ns); int __must_check sysfs_move_dir_ns(struct kobject *kobj, struct kobject *new_parent_kobj, - const void *new_ns); + const struct ns_common *new_ns); int __must_check sysfs_create_mount_point(struct kobject *parent_kobj, const char *name); void sysfs_remove_mount_point(struct kobject *parent_kobj, @@ -410,7 +410,7 @@ void sysfs_remove_mount_point(struct kobject *parent_kobj, int __must_check sysfs_create_file_ns(struct kobject *kobj, const struct attribute *attr, - const void *ns); + const struct ns_common *ns); int __must_check sysfs_create_files(struct kobject *kobj, const struct attribute * const *attr); int __must_check sysfs_chmod_file(struct kobject *kobj, @@ -419,7 +419,7 @@ struct kernfs_node *sysfs_break_active_protection(struct kobject *kobj, const struct attribute *attr); void sysfs_unbreak_active_protection(struct kernfs_node *kn); void sysfs_remove_file_ns(struct kobject *kobj, const struct attribute *attr, - const void *ns); + const struct ns_common *ns); bool sysfs_remove_file_self(struct kobject *kobj, const struct attribute *attr); void sysfs_remove_files(struct kobject *kobj, const struct attribute * const *attr); @@ -437,7 +437,7 @@ void sysfs_remove_link(struct kobject *kobj, const char *name); int sysfs_rename_link_ns(struct kobject *kobj, struct kobject *target, const char *old_name, const char *new_name, - const void *new_ns); + const struct ns_common *new_ns); void sysfs_delete_link(struct kobject *dir, struct kobject *targ, const char *name); @@ -502,7 +502,7 @@ ssize_t sysfs_bin_attr_simple_read(struct file *file, struct kobject *kobj, #else /* CONFIG_SYSFS */ -static inline int sysfs_create_dir_ns(struct kobject *kobj, const void *ns) +static inline int sysfs_create_dir_ns(struct kobject *kobj, const struct ns_common *ns) { return 0; } @@ -512,14 +512,14 @@ static inline void sysfs_remove_dir(struct kobject *kobj) } static inline int sysfs_rename_dir_ns(struct kobject *kobj, - const char *new_name, const void *new_ns) + const char *new_name, const struct ns_common *new_ns) { return 0; } static inline int sysfs_move_dir_ns(struct kobject *kobj, struct kobject *new_parent_kobj, - const void *new_ns) + const struct ns_common *new_ns) { return 0; } @@ -537,7 +537,7 @@ static inline void sysfs_remove_mount_point(struct kobject *parent_kobj, static inline int sysfs_create_file_ns(struct kobject *kobj, const struct attribute *attr, - const void *ns) + const struct ns_common *ns) { return 0; } @@ -567,7 +567,7 @@ static inline void sysfs_unbreak_active_protection(struct kernfs_node *kn) static inline void sysfs_remove_file_ns(struct kobject *kobj, const struct attribute *attr, - const void *ns) + const struct ns_common *ns) { } @@ -612,7 +612,7 @@ static inline void sysfs_remove_link(struct kobject *kobj, const char *name) static inline int sysfs_rename_link_ns(struct kobject *k, struct kobject *t, const char *old_name, - const char *new_name, const void *ns) + const char *new_name, const struct ns_common *ns) { return 0; } diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h index d7bec49ee9ea..80de5e98a66d 100644 --- a/include/net/net_namespace.h +++ b/include/net/net_namespace.h @@ -264,14 +264,14 @@ void ipx_unregister_sysctl(void); #define ipx_unregister_sysctl() #endif -#ifdef CONFIG_NET_NS -void __put_net(struct net *net); - static inline struct net *to_net_ns(struct ns_common *ns) { return container_of(ns, struct net, ns); } +#ifdef CONFIG_NET_NS +void __put_net(struct net *net); + /* Try using get_net_track() instead */ static inline struct net *get_net(struct net *net) { @@ -309,7 +309,7 @@ static inline int check_net(const struct net *net) return ns_ref_read(net) != 0; } -void net_drop_ns(void *); +void net_drop_ns(struct ns_common *); void net_passive_dec(struct net *net); #else diff --git a/lib/kobject.c b/lib/kobject.c index cfdb2c3f20a2..9c9ff0f5175f 100644 --- a/lib/kobject.c +++ b/lib/kobject.c @@ -27,7 +27,7 @@ * and thus @kobj should have a namespace tag associated with it. Returns * %NULL otherwise. */ -const void *kobject_namespace(const struct kobject *kobj) +const struct ns_common *kobject_namespace(const struct kobject *kobj) { const struct kobj_ns_type_operations *ns_ops = kobj_ns_ops(kobj); @@ -1083,9 +1083,9 @@ bool kobj_ns_current_may_mount(enum kobj_ns_type type) return may_mount; } -void *kobj_ns_grab_current(enum kobj_ns_type type) +struct ns_common *kobj_ns_grab_current(enum kobj_ns_type type) { - void *ns = NULL; + struct ns_common *ns = NULL; spin_lock(&kobj_ns_type_lock); if (kobj_ns_type_is_valid(type) && kobj_ns_ops_tbl[type]) @@ -1096,7 +1096,7 @@ void *kobj_ns_grab_current(enum kobj_ns_type type) } EXPORT_SYMBOL_GPL(kobj_ns_grab_current); -void kobj_ns_drop(enum kobj_ns_type type, void *ns) +void kobj_ns_drop(enum kobj_ns_type type, struct ns_common *ns) { spin_lock(&kobj_ns_type_lock); if (kobj_ns_type_is_valid(type) && diff --git a/lib/kobject_uevent.c b/lib/kobject_uevent.c index 871941c9830c..ddbc4d7482d2 100644 --- a/lib/kobject_uevent.c +++ b/lib/kobject_uevent.c @@ -238,7 +238,7 @@ static int kobj_usermode_filter(struct kobject *kobj) ops = kobj_ns_ops(kobj); if (ops) { - const void *init_ns, *ns; + const struct ns_common *init_ns, *ns; ns = kobj->ktype->namespace(kobj); init_ns = ops->initial_ns(); @@ -388,7 +388,7 @@ static int kobject_uevent_net_broadcast(struct kobject *kobj, #ifdef CONFIG_NET const struct kobj_ns_type_operations *ops; - const struct net *net = NULL; + const struct ns_common *ns = NULL; ops = kobj_ns_ops(kobj); if (!ops && kobj->kset) { @@ -404,14 +404,17 @@ static int kobject_uevent_net_broadcast(struct kobject *kobj, */ if (ops && ops->netlink_ns && kobj->ktype->namespace) if (ops->type == KOBJ_NS_TYPE_NET) - net = kobj->ktype->namespace(kobj); + ns = kobj->ktype->namespace(kobj); - if (!net) + if (!ns) ret = uevent_net_broadcast_untagged(env, action_string, devpath); - else + else { + const struct net *net = container_of(ns, struct net, ns); + ret = uevent_net_broadcast_tagged(net->uevent_sock->sk, env, action_string, devpath); + } #endif return ret; diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index 07624b682b08..b9740a397f55 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -1181,24 +1181,24 @@ static void rx_queue_release(struct kobject *kobj) netdev_put(queue->dev, &queue->dev_tracker); } -static const void *rx_queue_namespace(const struct kobject *kobj) +static const struct ns_common *rx_queue_namespace(const struct kobject *kobj) { struct netdev_rx_queue *queue = to_rx_queue(kobj); struct device *dev = &queue->dev->dev; - const void *ns = NULL; if (dev->class && dev->class->namespace) - ns = dev->class->namespace(dev); + return dev->class->namespace(dev); - return ns; + return NULL; } static void rx_queue_get_ownership(const struct kobject *kobj, kuid_t *uid, kgid_t *gid) { - const struct net *net = rx_queue_namespace(kobj); + const struct ns_common *ns = rx_queue_namespace(kobj); - net_ns_get_ownership(net, uid, gid); + net_ns_get_ownership(ns ? container_of(ns, struct net, ns) : NULL, + uid, gid); } static const struct kobj_type rx_queue_ktype = { @@ -1931,24 +1931,24 @@ static void netdev_queue_release(struct kobject *kobj) netdev_put(queue->dev, &queue->dev_tracker); } -static const void *netdev_queue_namespace(const struct kobject *kobj) +static const struct ns_common *netdev_queue_namespace(const struct kobject *kobj) { struct netdev_queue *queue = to_netdev_queue(kobj); struct device *dev = &queue->dev->dev; - const void *ns = NULL; if (dev->class && dev->class->namespace) - ns = dev->class->namespace(dev); + return dev->class->namespace(dev); - return ns; + return NULL; } static void netdev_queue_get_ownership(const struct kobject *kobj, kuid_t *uid, kgid_t *gid) { - const struct net *net = netdev_queue_namespace(kobj); + const struct ns_common *ns = netdev_queue_namespace(kobj); - net_ns_get_ownership(net, uid, gid); + net_ns_get_ownership(ns ? container_of(ns, struct net, ns) : NULL, + uid, gid); } static const struct kobj_type netdev_queue_ktype = { @@ -2185,24 +2185,24 @@ static bool net_current_may_mount(void) return ns_capable(net->user_ns, CAP_SYS_ADMIN); } -static void *net_grab_current_ns(void) +static struct ns_common *net_grab_current_ns(void) { - struct net *ns = current->nsproxy->net_ns; + struct net *net = current->nsproxy->net_ns; #ifdef CONFIG_NET_NS - if (ns) - refcount_inc(&ns->passive); + if (net) + refcount_inc(&net->passive); #endif - return ns; + return net ? to_ns_common(net) : NULL; } -static const void *net_initial_ns(void) +static const struct ns_common *net_initial_ns(void) { - return &init_net; + return to_ns_common(&init_net); } -static const void *net_netlink_ns(struct sock *sk) +static const struct ns_common *net_netlink_ns(struct sock *sk) { - return sock_net(sk); + return to_ns_common(sock_net(sk)); } const struct kobj_ns_type_operations net_ns_type_operations = { @@ -2252,11 +2252,11 @@ static void netdev_release(struct device *d) kvfree(dev); } -static const void *net_namespace(const struct device *d) +static const struct ns_common *net_namespace(const struct device *d) { const struct net_device *dev = to_net_dev(d); - return dev_net(dev); + return to_ns_common(dev_net(dev)); } static void net_get_ownership(const struct device *d, kuid_t *uid, kgid_t *gid) @@ -2402,14 +2402,14 @@ int netdev_change_owner(struct net_device *ndev, const struct net *net_old, } int netdev_class_create_file_ns(const struct class_attribute *class_attr, - const void *ns) + const struct ns_common *ns) { return class_create_file_ns(&net_class, class_attr, ns); } EXPORT_SYMBOL(netdev_class_create_file_ns); void netdev_class_remove_file_ns(const struct class_attribute *class_attr, - const void *ns) + const struct ns_common *ns) { class_remove_file_ns(&net_class, class_attr, ns); } diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c index 1057d16d5dd2..24aa10a1d0ea 100644 --- a/net/core/net_namespace.c +++ b/net/core/net_namespace.c @@ -540,12 +540,10 @@ void net_passive_dec(struct net *net) } } -void net_drop_ns(void *p) +void net_drop_ns(struct ns_common *ns) { - struct net *net = (struct net *)p; - - if (net) - net_passive_dec(net); + if (ns) + net_passive_dec(to_net_ns(ns)); } struct net *copy_net_ns(u64 flags, diff --git a/net/sunrpc/sysfs.c b/net/sunrpc/sysfs.c index af8fac9cedd4..a90480f80154 100644 --- a/net/sunrpc/sysfs.c +++ b/net/sunrpc/sysfs.c @@ -6,6 +6,7 @@ #include #include #include +#include #include "sysfs.h" @@ -553,20 +554,22 @@ static void rpc_sysfs_xprt_release(struct kobject *kobj) kfree(xprt); } -static const void *rpc_sysfs_client_namespace(const struct kobject *kobj) +static const struct ns_common *rpc_sysfs_client_namespace(const struct kobject *kobj) { - return container_of(kobj, struct rpc_sysfs_client, kobject)->net; + return to_ns_common(container_of(kobj, struct rpc_sysfs_client, + kobject)->net); } -static const void *rpc_sysfs_xprt_switch_namespace(const struct kobject *kobj) +static const struct ns_common *rpc_sysfs_xprt_switch_namespace(const struct kobject *kobj) { - return container_of(kobj, struct rpc_sysfs_xprt_switch, kobject)->net; + return to_ns_common(container_of(kobj, struct rpc_sysfs_xprt_switch, + kobject)->net); } -static const void *rpc_sysfs_xprt_namespace(const struct kobject *kobj) +static const struct ns_common *rpc_sysfs_xprt_namespace(const struct kobject *kobj) { - return container_of(kobj, struct rpc_sysfs_xprt, - kobject)->xprt->xprt_net; + return to_ns_common(container_of(kobj, struct rpc_sysfs_xprt, + kobject)->xprt->xprt_net); } static struct kobj_attribute rpc_sysfs_clnt_version = __ATTR(rpc_version, diff --git a/net/wireless/sysfs.c b/net/wireless/sysfs.c index 2e0ea69b9604..0b9abe70d39d 100644 --- a/net/wireless/sysfs.c +++ b/net/wireless/sysfs.c @@ -154,11 +154,11 @@ static SIMPLE_DEV_PM_OPS(wiphy_pm_ops, wiphy_suspend, wiphy_resume); #define WIPHY_PM_OPS NULL #endif -static const void *wiphy_namespace(const struct device *d) +static const struct ns_common *wiphy_namespace(const struct device *d) { struct wiphy *wiphy = container_of(d, struct wiphy, dev); - return wiphy_net(wiphy); + return to_ns_common(wiphy_net(wiphy)); } struct class ieee80211_class = { From 1fe989e1c42a315c7e7918e7b812377137085036 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Wed, 1 Apr 2026 12:21:16 +0200 Subject: [PATCH 224/248] kernfs: use namespace id instead of pointer for hashing and comparison kernfs uses the namespace tag as both a hash seed (via init_name_hash()) and a comparison key in the rbtree. The resulting hash values are exposed to userspace through directory seek positions (ctx->pos), and the raw pointer comparisons in kernfs_name_compare() encode kernel pointer ordering into the rbtree layout. This constitutes a KASLR information leak since the hash and ordering derived from kernel pointers can be observed from userspace. Fix this by using the 64-bit namespace id (ns_common::ns_id) instead of the raw pointer value for both hashing and comparison. The namespace id is a stable, non-secret identifier that is already exposed to userspace through other interfaces (e.g., /proc/pid/ns/, ioctl NS_GET_NSID). Introduce kernfs_ns_id() as a helper that extracts the namespace id from a potentially-NULL ns_common pointer, returning 0 for the no-namespace case. All namespace equality checks in the directory iteration and dentry revalidation paths are also switched from pointer comparison to ns_id comparison for consistency. Signed-off-by: Christian Brauner --- fs/kernfs/dir.c | 33 ++++++++++++++++++++++++++------- 1 file changed, 26 insertions(+), 7 deletions(-) diff --git a/fs/kernfs/dir.c b/fs/kernfs/dir.c index be262145ae08..f5cf1d74b5f1 100644 --- a/fs/kernfs/dir.c +++ b/fs/kernfs/dir.c @@ -14,6 +14,7 @@ #include #include #include +#include #include "kernfs-internal.h" @@ -306,6 +307,18 @@ struct kernfs_node *kernfs_get_parent(struct kernfs_node *kn) return parent; } +/* + * kernfs_ns_id - return the namespace id for a given namespace + * @ns: namespace tag (may be NULL) + * + * Use the 64-bit namespace id instead of raw pointers for hashing + * and comparison to avoid leaking kernel addresses to userspace. + */ +static u64 kernfs_ns_id(const struct ns_common *ns) +{ + return ns ? ns->ns_id : 0; +} + /** * kernfs_name_hash - calculate hash of @ns + @name * @name: Null terminated string to hash @@ -316,7 +329,7 @@ struct kernfs_node *kernfs_get_parent(struct kernfs_node *kn) static unsigned int kernfs_name_hash(const char *name, const struct ns_common *ns) { - unsigned long hash = init_name_hash(ns); + unsigned long hash = init_name_hash(kernfs_ns_id(ns)); unsigned int len = strlen(name); while (len--) hash = partial_name_hash(*name++, hash); @@ -333,13 +346,16 @@ static unsigned int kernfs_name_hash(const char *name, static int kernfs_name_compare(unsigned int hash, const char *name, const struct ns_common *ns, const struct kernfs_node *kn) { + u64 ns_id = kernfs_ns_id(ns); + u64 kn_ns_id = kernfs_ns_id(kn->ns); + if (hash < kn->hash) return -1; if (hash > kn->hash) return 1; - if (ns < kn->ns) + if (ns_id < kn_ns_id) return -1; - if (ns > kn->ns) + if (ns_id > kn_ns_id) return 1; return strcmp(name, kernfs_rcu_name(kn)); } @@ -1203,7 +1219,7 @@ static int kernfs_dop_revalidate(struct inode *dir, const struct qstr *name, /* The kernfs node has been moved to a different namespace */ if (parent && kernfs_ns_enabled(parent) && - kernfs_info(dentry->d_sb)->ns != kn->ns) + kernfs_ns_id(kernfs_info(dentry->d_sb)->ns) != kernfs_ns_id(kn->ns)) goto out_bad; up_read(&root->kernfs_rwsem); @@ -1775,7 +1791,8 @@ int kernfs_rename_ns(struct kernfs_node *kn, struct kernfs_node *new_parent, old_name = kernfs_rcu_name(kn); if (!new_name) new_name = old_name; - if ((old_parent == new_parent) && (kn->ns == new_ns) && + if ((old_parent == new_parent) && + (kernfs_ns_id(kn->ns) == kernfs_ns_id(new_ns)) && (strcmp(old_name, new_name) == 0)) goto out; /* nothing to rename */ @@ -1861,7 +1878,8 @@ static struct kernfs_node *kernfs_dir_pos(const struct ns_common *ns, } } /* Skip over entries which are dying/dead or in the wrong namespace */ - while (pos && (!kernfs_active(pos) || pos->ns != ns)) { + while (pos && (!kernfs_active(pos) || + kernfs_ns_id(pos->ns) != kernfs_ns_id(ns))) { struct rb_node *node = rb_next(&pos->rb); if (!node) pos = NULL; @@ -1882,7 +1900,8 @@ static struct kernfs_node *kernfs_dir_next_pos(const struct ns_common *ns, pos = NULL; else pos = rb_to_kn(node); - } while (pos && (!kernfs_active(pos) || pos->ns != ns)); + } while (pos && (!kernfs_active(pos) || + kernfs_ns_id(pos->ns) != kernfs_ns_id(ns))); } return pos; } From cb76a81c7cec37bdf525164561b02665cd763421 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Thu, 2 Apr 2026 09:12:11 +0200 Subject: [PATCH 225/248] kernfs: make directory seek namespace-aware The rbtree backing kernfs directories is ordered by (hash, ns_id, name) but kernfs_dir_pos() only searches by hash when seeking to a position during readdir. When two nodes from different namespaces share the same hash value, the binary search can land on a node in the wrong namespace. The subsequent skip-forward loop walks rb_next() and may overshoot the correct node, silently dropping an entry from the readdir results. With the recent switch from raw namespace pointers to public namespace ids as hash seeds, computing hash collisions became an offline operation. An unprivileged user could unshare into a new network namespace, create a single interface whose name-hash collides with a target entry in init_net, and cause a victim's seekdir/readdir on /sys/class/net to miss that entry. Fix this by extending the rbtree search in kernfs_dir_pos() to also compare namespace ids when hashes match. Since the rbtree is already ordered by (hash, ns_id, name), this makes the seek land directly in the correct namespace's range, eliminating the wrong-namespace overshoot. Signed-off-by: Christian Brauner --- fs/kernfs/dir.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/fs/kernfs/dir.c b/fs/kernfs/dir.c index f5cf1d74b5f1..22a4dff2a3af 100644 --- a/fs/kernfs/dir.c +++ b/fs/kernfs/dir.c @@ -1866,6 +1866,7 @@ static struct kernfs_node *kernfs_dir_pos(const struct ns_common *ns, } if (!pos && (hash > 1) && (hash < INT_MAX)) { struct rb_node *node = parent->dir.children.rb_node; + u64 ns_id = kernfs_ns_id(ns); while (node) { pos = rb_to_kn(node); @@ -1873,6 +1874,10 @@ static struct kernfs_node *kernfs_dir_pos(const struct ns_common *ns, node = node->rb_left; else if (hash > pos->hash) node = node->rb_right; + else if (ns_id < kernfs_ns_id(pos->ns)) + node = node->rb_left; + else if (ns_id > kernfs_ns_id(pos->ns)) + node = node->rb_right; else break; } From 3fd0da4fd8851a7e62d009b7db6c4a05b092bc19 Mon Sep 17 00:00:00 2001 From: David Carlier Date: Sun, 5 Apr 2026 06:52:39 +0100 Subject: [PATCH 226/248] net: lan966x: fix page_pool error handling in lan966x_fdma_rx_alloc_page_pool() page_pool_create() can return an ERR_PTR on failure. The return value is used unconditionally in the loop that follows, passing the error pointer through xdp_rxq_info_reg_mem_model() into page_pool_use_xdp_mem(), which dereferences it, causing a kernel oops. Add an IS_ERR check after page_pool_create() to return early on failure. Fixes: 11871aba1974 ("net: lan96x: Use page_pool API") Cc: stable@vger.kernel.org Signed-off-by: David Carlier Link: https://patch.msgid.link/20260405055241.35767-2-devnexen@gmail.com Signed-off-by: Paolo Abeni --- drivers/net/ethernet/microchip/lan966x/lan966x_fdma.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_fdma.c b/drivers/net/ethernet/microchip/lan966x/lan966x_fdma.c index 7b6369e43451..74851c63e46a 100644 --- a/drivers/net/ethernet/microchip/lan966x/lan966x_fdma.c +++ b/drivers/net/ethernet/microchip/lan966x/lan966x_fdma.c @@ -91,6 +91,8 @@ static int lan966x_fdma_rx_alloc_page_pool(struct lan966x_rx *rx) pp_params.dma_dir = DMA_BIDIRECTIONAL; rx->page_pool = page_pool_create(&pp_params); + if (unlikely(IS_ERR(rx->page_pool))) + return PTR_ERR(rx->page_pool); for (int i = 0; i < lan966x->num_phys_ports; ++i) { struct lan966x_port *port; From 076344a6ad9d1308faaed1402fdcfdda68b604ab Mon Sep 17 00:00:00 2001 From: David Carlier Date: Sun, 5 Apr 2026 06:52:40 +0100 Subject: [PATCH 227/248] net: lan966x: fix page pool leak in error paths lan966x_fdma_rx_alloc() creates a page pool but does not destroy it if the subsequent fdma_alloc_coherent() call fails, leaking the pool. Similarly, lan966x_fdma_init() frees the coherent DMA memory when lan966x_fdma_tx_alloc() fails but does not destroy the page pool that was successfully created by lan966x_fdma_rx_alloc(), leaking it. Add the missing page_pool_destroy() calls in both error paths. Fixes: 11871aba1974 ("net: lan96x: Use page_pool API") Cc: stable@vger.kernel.org Signed-off-by: David Carlier Link: https://patch.msgid.link/20260405055241.35767-3-devnexen@gmail.com Signed-off-by: Paolo Abeni --- drivers/net/ethernet/microchip/lan966x/lan966x_fdma.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_fdma.c b/drivers/net/ethernet/microchip/lan966x/lan966x_fdma.c index 74851c63e46a..10773fe93d4d 100644 --- a/drivers/net/ethernet/microchip/lan966x/lan966x_fdma.c +++ b/drivers/net/ethernet/microchip/lan966x/lan966x_fdma.c @@ -119,8 +119,10 @@ static int lan966x_fdma_rx_alloc(struct lan966x_rx *rx) return PTR_ERR(rx->page_pool); err = fdma_alloc_coherent(lan966x->dev, fdma); - if (err) + if (err) { + page_pool_destroy(rx->page_pool); return err; + } fdma_dcbs_init(fdma, FDMA_DCB_INFO_DATAL(fdma->db_size), FDMA_DCB_STATUS_INTR); @@ -957,6 +959,7 @@ int lan966x_fdma_init(struct lan966x *lan966x) err = lan966x_fdma_tx_alloc(&lan966x->tx); if (err) { fdma_free_coherent(lan966x->dev, &lan966x->rx.fdma); + page_pool_destroy(lan966x->rx.page_pool); return err; } From 59c3d55a946cacdb4181600723c20ac4f4c20c84 Mon Sep 17 00:00:00 2001 From: David Carlier Date: Sun, 5 Apr 2026 06:52:41 +0100 Subject: [PATCH 228/248] net: lan966x: fix use-after-free and leak in lan966x_fdma_reload() When lan966x_fdma_reload() fails to allocate new RX buffers, the restore path restarts DMA using old descriptors whose pages were already freed via lan966x_fdma_rx_free_pages(). Since page_pool_put_full_page() can release pages back to the buddy allocator, the hardware may DMA into memory now owned by other kernel subsystems. Additionally, on the restore path, the newly created page pool (if allocation partially succeeded) is overwritten without being destroyed, leaking it. Fix both issues by deferring the release of old pages until after the new allocation succeeds. Save the old page array before the allocation so old pages can be freed on the success path. On the failure path, the old descriptors, pages and page pool are all still valid, making the restore safe. Also ensure the restore path re-enables NAPI and wakes the netdev, matching the success path. Fixes: 89ba464fcf54 ("net: lan966x: refactor buffer reload function") Cc: stable@vger.kernel.org Signed-off-by: David Carlier Link: https://patch.msgid.link/20260405055241.35767-4-devnexen@gmail.com Signed-off-by: Paolo Abeni --- .../ethernet/microchip/lan966x/lan966x_fdma.c | 21 ++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_fdma.c b/drivers/net/ethernet/microchip/lan966x/lan966x_fdma.c index 10773fe93d4d..f8ce735a7fc0 100644 --- a/drivers/net/ethernet/microchip/lan966x/lan966x_fdma.c +++ b/drivers/net/ethernet/microchip/lan966x/lan966x_fdma.c @@ -812,9 +812,15 @@ static int lan966x_qsys_sw_status(struct lan966x *lan966x) static int lan966x_fdma_reload(struct lan966x *lan966x, int new_mtu) { + struct page *(*old_pages)[FDMA_RX_DCB_MAX_DBS]; struct page_pool *page_pool; struct fdma fdma_rx_old; - int err; + int err, i, j; + + old_pages = kmemdup(lan966x->rx.page, sizeof(lan966x->rx.page), + GFP_KERNEL); + if (!old_pages) + return -ENOMEM; /* Store these for later to free them */ memcpy(&fdma_rx_old, &lan966x->rx.fdma, sizeof(struct fdma)); @@ -825,7 +831,6 @@ static int lan966x_fdma_reload(struct lan966x *lan966x, int new_mtu) lan966x_fdma_stop_netdev(lan966x); lan966x_fdma_rx_disable(&lan966x->rx); - lan966x_fdma_rx_free_pages(&lan966x->rx); lan966x->rx.page_order = round_up(new_mtu, PAGE_SIZE) / PAGE_SIZE - 1; lan966x->rx.max_mtu = new_mtu; err = lan966x_fdma_rx_alloc(&lan966x->rx); @@ -833,6 +838,11 @@ static int lan966x_fdma_reload(struct lan966x *lan966x, int new_mtu) goto restore; lan966x_fdma_rx_start(&lan966x->rx); + for (i = 0; i < fdma_rx_old.n_dcbs; ++i) + for (j = 0; j < fdma_rx_old.n_dbs; ++j) + page_pool_put_full_page(page_pool, + old_pages[i][j], false); + fdma_free_coherent(lan966x->dev, &fdma_rx_old); page_pool_destroy(page_pool); @@ -840,12 +850,17 @@ static int lan966x_fdma_reload(struct lan966x *lan966x, int new_mtu) lan966x_fdma_wakeup_netdev(lan966x); napi_enable(&lan966x->napi); - return err; + kfree(old_pages); + return 0; restore: lan966x->rx.page_pool = page_pool; memcpy(&lan966x->rx.fdma, &fdma_rx_old, sizeof(struct fdma)); lan966x_fdma_rx_start(&lan966x->rx); + lan966x_fdma_wakeup_netdev(lan966x); + napi_enable(&lan966x->napi); + + kfree(old_pages); return err; } From 57df6923ca53b524d06d2347b896d9de74b3bc86 Mon Sep 17 00:00:00 2001 From: Samasth Norway Ananda Date: Thu, 9 Apr 2026 11:58:53 -0700 Subject: [PATCH 229/248] gpio: tegra: return -ENOMEM on allocation failure in probe devm_kzalloc() failure in tegra_gpio_probe() returns -ENODEV, which indicates "no such device". The correct error code for a memory allocation failure is -ENOMEM. Signed-off-by: Samasth Norway Ananda Link: https://patch.msgid.link/20260409185853.2163034-1-samasth.norway.ananda@oracle.com Signed-off-by: Bartosz Golaszewski --- drivers/gpio/gpio-tegra.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpio/gpio-tegra.c b/drivers/gpio/gpio-tegra.c index b14052fe64ac..df06b56a2ade 100644 --- a/drivers/gpio/gpio-tegra.c +++ b/drivers/gpio/gpio-tegra.c @@ -698,7 +698,7 @@ static int tegra_gpio_probe(struct platform_device *pdev) tgi = devm_kzalloc(&pdev->dev, sizeof(*tgi), GFP_KERNEL); if (!tgi) - return -ENODEV; + return -ENOMEM; tgi->soc = of_device_get_match_data(&pdev->dev); tgi->dev = &pdev->dev; From ba893caead54745595e29953f0531cf3651610aa Mon Sep 17 00:00:00 2001 From: David Arcari Date: Thu, 19 Mar 2026 10:03:07 -0400 Subject: [PATCH 230/248] tools/power turbostat: Allow execution to continue after perf_l2_init() failure Currently, if perf_l2_init() fails turbostat exits after issuing the following error (which was encountered on AlderLake): turbostat: perf_l2_init(cpu0, 0x0, 0xff24) REFS: Invalid argument This occurs because perf_l2_init() calls err(). However, the code has been written in such a manner that it is able to perform cleanup and continue. Therefore, this issue can be addressed by changing the appropriate calls to err() to warnx(). Additionally, correct the PMU type arguments passed to the warning strings in the ecore and lcore blocks so the logs accurately reflect the failing counter type. Signed-off-by: David Arcari Signed-off-by: Len Brown --- tools/power/x86/turbostat/turbostat.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index 34e2143cd4b3..e9e8ef72395a 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -9405,13 +9405,13 @@ void perf_l2_init(void) if (!is_hybrid) { fd_l2_percpu[cpu] = open_perf_counter(cpu, perf_pmu_types.uniform, perf_model_support->first.refs, -1, PERF_FORMAT_GROUP); if (fd_l2_percpu[cpu] == -1) { - err(-1, "%s(cpu%d, 0x%x, 0x%llx) REFS", __func__, cpu, perf_pmu_types.uniform, perf_model_support->first.refs); + warnx("%s(cpu%d, 0x%x, 0x%llx) REFS", __func__, cpu, perf_pmu_types.uniform, perf_model_support->first.refs); free_fd_l2_percpu(); return; } retval = open_perf_counter(cpu, perf_pmu_types.uniform, perf_model_support->first.hits, fd_l2_percpu[cpu], PERF_FORMAT_GROUP); if (retval == -1) { - err(-1, "%s(cpu%d, 0x%x, 0x%llx) HITS", __func__, cpu, perf_pmu_types.uniform, perf_model_support->first.hits); + warnx("%s(cpu%d, 0x%x, 0x%llx) HITS", __func__, cpu, perf_pmu_types.uniform, perf_model_support->first.hits); free_fd_l2_percpu(); return; } @@ -9420,39 +9420,39 @@ void perf_l2_init(void) if (perf_pcore_set && CPU_ISSET_S(cpu, cpu_possible_setsize, perf_pcore_set)) { fd_l2_percpu[cpu] = open_perf_counter(cpu, perf_pmu_types.pcore, perf_model_support->first.refs, -1, PERF_FORMAT_GROUP); if (fd_l2_percpu[cpu] == -1) { - err(-1, "%s(cpu%d, 0x%x, 0x%llx) REFS", __func__, cpu, perf_pmu_types.pcore, perf_model_support->first.refs); + warnx("%s(cpu%d, 0x%x, 0x%llx) REFS", __func__, cpu, perf_pmu_types.pcore, perf_model_support->first.refs); free_fd_l2_percpu(); return; } retval = open_perf_counter(cpu, perf_pmu_types.pcore, perf_model_support->first.hits, fd_l2_percpu[cpu], PERF_FORMAT_GROUP); if (retval == -1) { - err(-1, "%s(cpu%d, 0x%x, 0x%llx) HITS", __func__, cpu, perf_pmu_types.pcore, perf_model_support->first.hits); + warnx("%s(cpu%d, 0x%x, 0x%llx) HITS", __func__, cpu, perf_pmu_types.pcore, perf_model_support->first.hits); free_fd_l2_percpu(); return; } } else if (perf_ecore_set && CPU_ISSET_S(cpu, cpu_possible_setsize, perf_ecore_set)) { fd_l2_percpu[cpu] = open_perf_counter(cpu, perf_pmu_types.ecore, perf_model_support->second.refs, -1, PERF_FORMAT_GROUP); if (fd_l2_percpu[cpu] == -1) { - err(-1, "%s(cpu%d, 0x%x, 0x%llx) REFS", __func__, cpu, perf_pmu_types.pcore, perf_model_support->second.refs); + warnx("%s(cpu%d, 0x%x, 0x%llx) REFS", __func__, cpu, perf_pmu_types.ecore, perf_model_support->second.refs); free_fd_l2_percpu(); return; } retval = open_perf_counter(cpu, perf_pmu_types.ecore, perf_model_support->second.hits, fd_l2_percpu[cpu], PERF_FORMAT_GROUP); if (retval == -1) { - err(-1, "%s(cpu%d, 0x%x, 0x%llx) HITS", __func__, cpu, perf_pmu_types.pcore, perf_model_support->second.hits); + warnx("%s(cpu%d, 0x%x, 0x%llx) HITS", __func__, cpu, perf_pmu_types.ecore, perf_model_support->second.hits); free_fd_l2_percpu(); return; } } else if (perf_lcore_set && CPU_ISSET_S(cpu, cpu_possible_setsize, perf_lcore_set)) { fd_l2_percpu[cpu] = open_perf_counter(cpu, perf_pmu_types.lcore, perf_model_support->third.refs, -1, PERF_FORMAT_GROUP); if (fd_l2_percpu[cpu] == -1) { - err(-1, "%s(cpu%d, 0x%x, 0x%llx) REFS", __func__, cpu, perf_pmu_types.pcore, perf_model_support->third.refs); + warnx("%s(cpu%d, 0x%x, 0x%llx) REFS", __func__, cpu, perf_pmu_types.lcore, perf_model_support->third.refs); free_fd_l2_percpu(); return; } retval = open_perf_counter(cpu, perf_pmu_types.lcore, perf_model_support->third.hits, fd_l2_percpu[cpu], PERF_FORMAT_GROUP); if (retval == -1) { - err(-1, "%s(cpu%d, 0x%x, 0x%llx) HITS", __func__, cpu, perf_pmu_types.pcore, perf_model_support->third.hits); + warnx("%s(cpu%d, 0x%x, 0x%llx) HITS", __func__, cpu, perf_pmu_types.lcore, perf_model_support->third.hits); free_fd_l2_percpu(); return; } From d6e152d905bdb1f32f9d99775e2f453350399a6a Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 7 Apr 2026 10:54:17 +0200 Subject: [PATCH 231/248] clockevents: Prevent timer interrupt starvation Calvin reported an odd NMI watchdog lockup which claims that the CPU locked up in user space. He provided a reproducer, which sets up a timerfd based timer and then rearms it in a loop with an absolute expiry time of 1ns. As the expiry time is in the past, the timer ends up as the first expiring timer in the per CPU hrtimer base and the clockevent device is programmed with the minimum delta value. If the machine is fast enough, this ends up in a endless loop of programming the delta value to the minimum value defined by the clock event device, before the timer interrupt can fire, which starves the interrupt and consequently triggers the lockup detector because the hrtimer callback of the lockup mechanism is never invoked. As a first step to prevent this, avoid reprogramming the clock event device when: - a forced minimum delta event is pending - the new expiry delta is less then or equal to the minimum delta Thanks to Calvin for providing the reproducer and to Borislav for testing and providing data from his Zen5 machine. The problem is not limited to Zen5, but depending on the underlying clock event device (e.g. TSC deadline timer on Intel) and the CPU speed not necessarily observable. This change serves only as the last resort and further changes will be made to prevent this scenario earlier in the call chain as far as possible. [ tglx: Updated to restore the old behaviour vs. !force and delta <= 0 and fixed up the tick-broadcast handlers as pointed out by Borislav ] Fixes: d316c57ff6bf ("[PATCH] clockevents: add core functionality") Reported-by: Calvin Owens Signed-off-by: Thomas Gleixner Tested-by: Calvin Owens Tested-by: Borislav Petkov Link: https://lore.kernel.org/lkml/acMe-QZUel-bBYUh@mozart.vkv.me/ Link: https://patch.msgid.link/20260407083247.562657657@kernel.org --- include/linux/clockchips.h | 2 ++ kernel/time/clockevents.c | 27 +++++++++++++++++++-------- kernel/time/hrtimer.c | 1 + kernel/time/tick-broadcast.c | 8 +++++++- kernel/time/tick-common.c | 1 + kernel/time/tick-sched.c | 1 + 6 files changed, 31 insertions(+), 9 deletions(-) diff --git a/include/linux/clockchips.h b/include/linux/clockchips.h index b0df28ddd394..50cdc9da8d32 100644 --- a/include/linux/clockchips.h +++ b/include/linux/clockchips.h @@ -80,6 +80,7 @@ enum clock_event_state { * @shift: nanoseconds to cycles divisor (power of two) * @state_use_accessors:current state of the device, assigned by the core code * @features: features + * @next_event_forced: True if the last programming was a forced event * @retries: number of forced programming retries * @set_state_periodic: switch state to periodic * @set_state_oneshot: switch state to oneshot @@ -108,6 +109,7 @@ struct clock_event_device { u32 shift; enum clock_event_state state_use_accessors; unsigned int features; + unsigned int next_event_forced; unsigned long retries; int (*set_state_periodic)(struct clock_event_device *); diff --git a/kernel/time/clockevents.c b/kernel/time/clockevents.c index eaae1ce9f060..38570998a19b 100644 --- a/kernel/time/clockevents.c +++ b/kernel/time/clockevents.c @@ -172,6 +172,7 @@ void clockevents_shutdown(struct clock_event_device *dev) { clockevents_switch_state(dev, CLOCK_EVT_STATE_SHUTDOWN); dev->next_event = KTIME_MAX; + dev->next_event_forced = 0; } /** @@ -305,7 +306,6 @@ int clockevents_program_event(struct clock_event_device *dev, ktime_t expires, { unsigned long long clc; int64_t delta; - int rc; if (WARN_ON_ONCE(expires < 0)) return -ETIME; @@ -324,16 +324,27 @@ int clockevents_program_event(struct clock_event_device *dev, ktime_t expires, return dev->set_next_ktime(expires, dev); delta = ktime_to_ns(ktime_sub(expires, ktime_get())); - if (delta <= 0) - return force ? clockevents_program_min_delta(dev) : -ETIME; - delta = min(delta, (int64_t) dev->max_delta_ns); - delta = max(delta, (int64_t) dev->min_delta_ns); + /* Required for tick_periodic() during early boot */ + if (delta <= 0 && !force) + return -ETIME; - clc = ((unsigned long long) delta * dev->mult) >> dev->shift; - rc = dev->set_next_event((unsigned long) clc, dev); + if (delta > (int64_t)dev->min_delta_ns) { + delta = min(delta, (int64_t) dev->max_delta_ns); + clc = ((unsigned long long) delta * dev->mult) >> dev->shift; + if (!dev->set_next_event((unsigned long) clc, dev)) + return 0; + } - return (rc && force) ? clockevents_program_min_delta(dev) : rc; + if (dev->next_event_forced) + return 0; + + if (dev->set_next_event(dev->min_delta_ticks, dev)) { + if (!force || clockevents_program_min_delta(dev)) + return -ETIME; + } + dev->next_event_forced = 1; + return 0; } /* diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c index 860af7a58428..1e37142fe52f 100644 --- a/kernel/time/hrtimer.c +++ b/kernel/time/hrtimer.c @@ -1888,6 +1888,7 @@ void hrtimer_interrupt(struct clock_event_device *dev) BUG_ON(!cpu_base->hres_active); cpu_base->nr_events++; dev->next_event = KTIME_MAX; + dev->next_event_forced = 0; raw_spin_lock_irqsave(&cpu_base->lock, flags); entry_time = now = hrtimer_update_base(cpu_base); diff --git a/kernel/time/tick-broadcast.c b/kernel/time/tick-broadcast.c index f63c65881364..7e57fa31ee26 100644 --- a/kernel/time/tick-broadcast.c +++ b/kernel/time/tick-broadcast.c @@ -76,8 +76,10 @@ const struct clock_event_device *tick_get_wakeup_device(int cpu) */ static void tick_broadcast_start_periodic(struct clock_event_device *bc) { - if (bc) + if (bc) { + bc->next_event_forced = 0; tick_setup_periodic(bc, 1); + } } /* @@ -403,6 +405,7 @@ static void tick_handle_periodic_broadcast(struct clock_event_device *dev) bool bc_local; raw_spin_lock(&tick_broadcast_lock); + tick_broadcast_device.evtdev->next_event_forced = 0; /* Handle spurious interrupts gracefully */ if (clockevent_state_shutdown(tick_broadcast_device.evtdev)) { @@ -696,6 +699,7 @@ static void tick_handle_oneshot_broadcast(struct clock_event_device *dev) raw_spin_lock(&tick_broadcast_lock); dev->next_event = KTIME_MAX; + tick_broadcast_device.evtdev->next_event_forced = 0; next_event = KTIME_MAX; cpumask_clear(tmpmask); now = ktime_get(); @@ -1063,6 +1067,7 @@ static void tick_broadcast_setup_oneshot(struct clock_event_device *bc, bc->event_handler = tick_handle_oneshot_broadcast; + bc->next_event_forced = 0; bc->next_event = KTIME_MAX; /* @@ -1175,6 +1180,7 @@ void hotplug_cpu__broadcast_tick_pull(int deadcpu) } /* This moves the broadcast assignment to this CPU: */ + bc->next_event_forced = 0; clockevents_program_event(bc, bc->next_event, 1); } raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags); diff --git a/kernel/time/tick-common.c b/kernel/time/tick-common.c index d305d8521896..6a9198a4279b 100644 --- a/kernel/time/tick-common.c +++ b/kernel/time/tick-common.c @@ -110,6 +110,7 @@ void tick_handle_periodic(struct clock_event_device *dev) int cpu = smp_processor_id(); ktime_t next = dev->next_event; + dev->next_event_forced = 0; tick_periodic(cpu); /* diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index 36449f0010a4..d1f27df1e60e 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -1513,6 +1513,7 @@ static void tick_nohz_lowres_handler(struct clock_event_device *dev) struct tick_sched *ts = this_cpu_ptr(&tick_cpu_sched); dev->next_event = KTIME_MAX; + dev->next_event_forced = 0; if (likely(tick_nohz_handler(&ts->sched_timer) == HRTIMER_RESTART)) tick_program_event(hrtimer_get_expires(&ts->sched_timer), 1); From 31d00156e50ecad37f2cb6cbf04aaa9a260505ef Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Sat, 4 Apr 2026 08:29:58 +0800 Subject: [PATCH 232/248] crypto: af_alg - Fix page reassignment overflow in af_alg_pull_tsgl When page reassignment was added to af_alg_pull_tsgl the original loop wasn't updated so it may try to reassign one more page than necessary. Add the check to the reassignment so that this does not happen. Also update the comment which still refers to the obsolete offset argument. Reported-by: syzbot+d23888375c2737c17ba5@syzkaller.appspotmail.com Fixes: e870456d8e7c ("crypto: algif_skcipher - overhaul memory management") Signed-off-by: Herbert Xu --- crypto/af_alg.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/crypto/af_alg.c b/crypto/af_alg.c index 437f3e77c7e0..dd0e5be4d8c0 100644 --- a/crypto/af_alg.c +++ b/crypto/af_alg.c @@ -705,8 +705,8 @@ void af_alg_pull_tsgl(struct sock *sk, size_t used, struct scatterlist *dst) * Assumption: caller created af_alg_count_tsgl(len) * SG entries in dst. */ - if (dst) { - /* reassign page to dst after offset */ + if (dst && plen) { + /* reassign page to dst */ get_page(page); sg_set_page(dst + j, page, plen, sg[i].offset); j++; From d702c3408213bb12bd570bb97204d8340d141c51 Mon Sep 17 00:00:00 2001 From: Lukas Wunner Date: Tue, 7 Apr 2026 12:58:18 +0200 Subject: [PATCH 233/248] X.509: Fix out-of-bounds access when parsing extensions Leo reports an out-of-bounds access when parsing a certificate with empty Basic Constraints or Key Usage extension because the first byte of the extension is read before checking its length. Fix it. The bug can be triggered by an unprivileged user by submitting a specially crafted certificate to the kernel through the keyrings(7) API. Leo has demonstrated this with a proof-of-concept program responsibly disclosed off-list. Fixes: 30eae2b037af ("KEYS: X.509: Parse Basic Constraints for CA") Fixes: 567671281a75 ("KEYS: X.509: Parse Key Usage") Reported-by: Leo Lin # off-list Signed-off-by: Lukas Wunner Reviewed-by: Ignat Korchagin Cc: stable@vger.kernel.org # v6.4+ Signed-off-by: Herbert Xu --- crypto/asymmetric_keys/x509_cert_parser.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/crypto/asymmetric_keys/x509_cert_parser.c b/crypto/asymmetric_keys/x509_cert_parser.c index 37e4fb9da106..bfd10f0195e0 100644 --- a/crypto/asymmetric_keys/x509_cert_parser.c +++ b/crypto/asymmetric_keys/x509_cert_parser.c @@ -609,10 +609,10 @@ int x509_process_extension(void *context, size_t hdrlen, * 0x04 is where keyCertSign lands in this bit string * 0x80 is where digitalSignature lands in this bit string */ - if (v[0] != ASN1_BTS) - return -EBADMSG; if (vlen < 4) return -EBADMSG; + if (v[0] != ASN1_BTS) + return -EBADMSG; if (v[2] >= 8) return -EBADMSG; if (v[3] & 0x80) @@ -645,10 +645,10 @@ int x509_process_extension(void *context, size_t hdrlen, * (Expect 0xFF if the CA is TRUE) * vlen should match the entire extension size */ - if (v[0] != (ASN1_CONS_BIT | ASN1_SEQ)) - return -EBADMSG; if (vlen < 2) return -EBADMSG; + if (v[0] != (ASN1_CONS_BIT | ASN1_SEQ)) + return -EBADMSG; if (v[1] != vlen - 2) return -EBADMSG; /* Empty SEQUENCE means CA:FALSE (default value omitted per DER) */ From 3d14bd48e3a77091cbce637a12c2ae31b4a1687c Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Sun, 12 Apr 2026 13:32:21 +0800 Subject: [PATCH 234/248] crypto: algif_aead - Fix minimum RX size check for decryption The check for the minimum receive buffer size did not take the tag size into account during decryption. Fix this by adding the required extra length. Reported-by: syzbot+aa11561819dc42ebbc7c@syzkaller.appspotmail.com Reported-by: Daniel Pouzzner Fixes: d887c52d6ae4 ("crypto: algif_aead - overhaul memory management") Signed-off-by: Herbert Xu --- crypto/algif_aead.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crypto/algif_aead.c b/crypto/algif_aead.c index dda15bb05e89..f8bd45f7dc83 100644 --- a/crypto/algif_aead.c +++ b/crypto/algif_aead.c @@ -144,7 +144,7 @@ static int _aead_recvmsg(struct socket *sock, struct msghdr *msg, if (usedpages < outlen) { size_t less = outlen - usedpages; - if (used < less) { + if (used < less + (ctx->enc ? 0 : as)) { err = -EINVAL; goto free; } From 028ef9c96e96197026887c0f092424679298aae8 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 12 Apr 2026 13:48:06 -0700 Subject: [PATCH 235/248] Linux 7.0 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 4f54c5685638..36d0a32fbe49 100644 --- a/Makefile +++ b/Makefile @@ -2,7 +2,7 @@ VERSION = 7 PATCHLEVEL = 0 SUBLEVEL = 0 -EXTRAVERSION = -rc7 +EXTRAVERSION = NAME = Baby Opossum Posse # *DOCUMENTATION* From bc9b1ebaa7624edde54d4ae842d11cebb26db09b Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 13 Apr 2026 09:00:45 +0200 Subject: [PATCH 236/248] ASoC: tas2781: fix unused-const-variable warning When both CONFIG_OF and CONFIG_ACPI are disabled, the ID table is not referenced any more: sound/soc/codecs/tas2781-i2c.c:102:35: error: 'tasdevice_id' defined but not used [-Werror=unused-const-variable=] 102 | static const struct i2c_device_id tasdevice_id[] = { | ^~~~~~~~~~~~ Remove the #ifdef checks and just include the ID tables unconditionally to get a clean build in all configurations. The code already uses IS_ENABLED() checks for both to benefit from dead code elimination and the ID tables are small enough that they can just be included all the time. Fixes: 9a52d1b7cb4a ("ASoC: tas2781: Explicit association of Device, Device Name, and Device ID") Signed-off-by: Arnd Bergmann Link: https://patch.msgid.link/20260413070059.3828364-1-arnd@kernel.org Signed-off-by: Mark Brown --- sound/soc/codecs/tas2781-i2c.c | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/sound/soc/codecs/tas2781-i2c.c b/sound/soc/codecs/tas2781-i2c.c index c593f9da0c5b..8af30f4d68da 100644 --- a/sound/soc/codecs/tas2781-i2c.c +++ b/sound/soc/codecs/tas2781-i2c.c @@ -122,7 +122,6 @@ static const struct i2c_device_id tasdevice_id[] = { {} }; -#ifdef CONFIG_OF static const struct of_device_id tasdevice_of_match[] = { { .compatible = "ti,tas2020", .data = &tasdevice_id[TAS2020] }, { .compatible = "ti,tas2118", .data = &tasdevice_id[TAS2118] }, @@ -146,7 +145,6 @@ static const struct of_device_id tasdevice_of_match[] = { {}, }; MODULE_DEVICE_TABLE(of, tasdevice_of_match); -#endif /** * tas2781_digital_getvol - get the volum control @@ -2083,7 +2081,6 @@ static void tasdevice_i2c_remove(struct i2c_client *client) tasdevice_remove(tas_priv); } -#ifdef CONFIG_ACPI static const struct acpi_device_id tasdevice_acpi_match[] = { { "TXNW2020", (kernel_ulong_t)&tasdevice_id[TAS2020] }, { "TXNW2118", (kernel_ulong_t)&tasdevice_id[TAS2118] }, @@ -2108,15 +2105,12 @@ static const struct acpi_device_id tasdevice_acpi_match[] = { }; MODULE_DEVICE_TABLE(acpi, tasdevice_acpi_match); -#endif static struct i2c_driver tasdevice_i2c_driver = { .driver = { .name = "tasdev-codec", - .of_match_table = of_match_ptr(tasdevice_of_match), -#ifdef CONFIG_ACPI - .acpi_match_table = ACPI_PTR(tasdevice_acpi_match), -#endif + .of_match_table = tasdevice_of_match, + .acpi_match_table = tasdevice_acpi_match, }, .probe = tasdevice_i2c_probe, .remove = tasdevice_i2c_remove, From 54a032d3e62fd1792cb16d8096aaf00397589c5f Mon Sep 17 00:00:00 2001 From: Peng Fan Date: Mon, 13 Apr 2026 18:52:43 +0800 Subject: [PATCH 237/248] ASoC: pxa2xx-ac97: fix error handling for reset GPIO descriptor The reset GPIO obtained via devm_gpiod_get() may return an ERR_PTR() when the GPIO is missing or an error occurs. The current code unconditionally assigns PTR_ERR() to ret and later dereferences rst_gpio via desc_to_gpio(), which is incorrect when rst_gpio is an error pointer. Rework the logic to first check IS_ERR(rst_gpio) before converting the descriptor. Handle -ENOENT by disabling reset GPIO support, and return other errors to the caller as expected. Fixes: c76d50b71e89 ("ASoC: ac97: Convert to GPIO descriptors") Reported-by: kernel test robot Reported-by: Dan Carpenter Closes: https://lore.kernel.org/r/202604041426.i2C1xqHk-lkp@intel.com/ Signed-off-by: Peng Fan Link: https://patch.msgid.link/20260413-ac97-v1-1-b44b9e084307@nxp.com Signed-off-by: Mark Brown --- sound/arm/pxa2xx-ac97-lib.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/sound/arm/pxa2xx-ac97-lib.c b/sound/arm/pxa2xx-ac97-lib.c index 1e114dbcf93c..79eb557d4942 100644 --- a/sound/arm/pxa2xx-ac97-lib.c +++ b/sound/arm/pxa2xx-ac97-lib.c @@ -331,12 +331,15 @@ int pxa2xx_ac97_hw_probe(struct platform_device *dev) if (dev->dev.of_node) { /* Assert reset using GPIOD_OUT_HIGH, because reset is GPIO_ACTIVE_LOW */ rst_gpio = devm_gpiod_get(&dev->dev, "reset", GPIOD_OUT_HIGH); - ret = PTR_ERR(rst_gpio); - if (ret == -ENOENT) - reset_gpio = -1; - else if (ret) - return ret; - reset_gpio = desc_to_gpio(rst_gpio); + if (IS_ERR(rst_gpio)) { + ret = PTR_ERR(rst_gpio); + if (ret == -ENOENT) + reset_gpio = -1; + else if (ret) + return ret; + } else { + reset_gpio = desc_to_gpio(rst_gpio); + } } else { if (cpu_is_pxa27x()) reset_gpio = 113; From cf162476f7e082662691e75f96bfc99c6a64810d Mon Sep 17 00:00:00 2001 From: Shuming Fan Date: Tue, 14 Apr 2026 15:14:41 +0800 Subject: [PATCH 238/248] ASoC: rt1320: fix the warning 'rae_fw' from request_firmware() not released New smatch warnings: sound/soc/codecs/rt1320-sdw.c:1575 rt1320_rae_load() warn: 'rae_fw' from request_firmware() not released on lines: 1575. Fixes: 22937af75abb ("ASoC: rt1320: support RAE parameters loading") Reported-by: kernel test robot Reported-by: Dan Carpenter Closes: https://lore.kernel.org/r/202604111548.EL450PMb-lkp@intel.com/ Signed-off-by: Shuming Fan Link: https://patch.msgid.link/20260414071441.1524039-1-shumingf@realtek.com Signed-off-by: Mark Brown --- sound/soc/codecs/rt1320-sdw.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/soc/codecs/rt1320-sdw.c b/sound/soc/codecs/rt1320-sdw.c index b0aeeab26bd9..192faa431b5e 100644 --- a/sound/soc/codecs/rt1320-sdw.c +++ b/sound/soc/codecs/rt1320-sdw.c @@ -1498,6 +1498,7 @@ static int rt1320_rae_load(struct rt1320_sdw_priv *rt1320) } if (!retry && !(value & 0x40)) { dev_err(dev, "%s: RAE is not ready to load\n", __func__); + release_firmware(rae_fw); return -ETIMEDOUT; } From ab463b4655857b1865c611ff5fbcb752cd804b0b Mon Sep 17 00:00:00 2001 From: Bard Liao Date: Mon, 13 Apr 2026 14:07:59 +0800 Subject: [PATCH 239/248] ASoC: SOF: Intel: NVL: add platform name MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The platform name will be used in the topology name. Fixes: 1800bcdc68ead ("ASoC: SOF: Intel: add support for Nova Lake NVL") Signed-off-by: Bard Liao Reviewed-by: Péter Ujfalusi Link: https://patch.msgid.link/20260413060800.3156425-2-yung-chuan.liao@linux.intel.com Signed-off-by: Mark Brown --- sound/soc/sof/intel/nvl.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/soc/sof/intel/nvl.c b/sound/soc/sof/intel/nvl.c index 0d763998558f..0ee3507824e5 100644 --- a/sound/soc/sof/intel/nvl.c +++ b/sound/soc/sof/intel/nvl.c @@ -48,6 +48,7 @@ const struct sof_intel_dsp_desc nvl_chip_info = { .power_down_dsp = mtl_power_down_dsp, .disable_interrupts = lnl_dsp_disable_interrupts, .hw_ip_version = SOF_INTEL_ACE_4_0, + .platform = "nvl", }; const struct sof_intel_dsp_desc nvl_s_chip_info = { From a158fe7b0c817eebcf2871fe1306347a376030e8 Mon Sep 17 00:00:00 2001 From: Bard Liao Date: Mon, 13 Apr 2026 14:08:00 +0800 Subject: [PATCH 240/248] ASoC: SOF: Intel: NVL-S: add platform name MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The platform name will be used in the topology name. Fixes: d3df422f66e8a ("ASoC: SOF: Intel: add initial support for NVL-S") Signed-off-by: Bard Liao Reviewed-by: Péter Ujfalusi Link: https://patch.msgid.link/20260413060800.3156425-3-yung-chuan.liao@linux.intel.com Signed-off-by: Mark Brown --- sound/soc/sof/intel/nvl.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/soc/sof/intel/nvl.c b/sound/soc/sof/intel/nvl.c index 0ee3507824e5..86d2e1aa0eec 100644 --- a/sound/soc/sof/intel/nvl.c +++ b/sound/soc/sof/intel/nvl.c @@ -73,6 +73,7 @@ const struct sof_intel_dsp_desc nvl_s_chip_info = { .power_down_dsp = mtl_power_down_dsp, .disable_interrupts = lnl_dsp_disable_interrupts, .hw_ip_version = SOF_INTEL_ACE_4_0, + .platform = "nvl", }; MODULE_IMPORT_NS("SND_SOC_SOF_INTEL_MTL"); From 37e9faf21670cf86d36eebc3b4d27afe6819983a Mon Sep 17 00:00:00 2001 From: Hsieh Hung-En Date: Wed, 15 Apr 2026 11:02:51 +0800 Subject: [PATCH 241/248] ASoC: es8311: Check regcache_sync() error in resume The es8311_resume() function currently ignores the return value of regcache_sync(). If syncing the cache fails, the function still returns 0, leaving the codec in a potentially incorrect state. Check the return value and propagate it to the ASoC core to ensure resume failures are properly handled. Signed-off-by: Hsieh Hung-En Link: https://patch.msgid.link/20260415030252.5547-2-hungen3108@gmail.com Signed-off-by: Mark Brown --- sound/soc/codecs/es8311.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/sound/soc/codecs/es8311.c b/sound/soc/codecs/es8311.c index 0b07a53cc792..9e371e2d6eae 100644 --- a/sound/soc/codecs/es8311.c +++ b/sound/soc/codecs/es8311.c @@ -862,13 +862,18 @@ static int es8311_suspend(struct snd_soc_component *component) static int es8311_resume(struct snd_soc_component *component) { struct es8311_priv *es8311; + int ret; es8311 = snd_soc_component_get_drvdata(component); es8311_reset(component, false); regcache_cache_only(es8311->regmap, false); - regcache_sync(es8311->regmap); + ret = regcache_sync(es8311->regmap); + if (ret) { + dev_err(component->dev, "unable to sync regcache\n"); + return ret; + } return 0; } From c15bc1681045f158811643d6c990f87c590dd693 Mon Sep 17 00:00:00 2001 From: Hsieh Hung-En Date: Wed, 15 Apr 2026 11:02:52 +0800 Subject: [PATCH 242/248] ASoC: es8311: Fix clock leak and check update_bits in set_bias_level() In es8311_set_bias_level(), the return value of snd_soc_component_update_bits() was ignored. If this fails, not only is the VMID selection not applied, but the previously enabled mclk is left running, leading to an unbalanced clock reference count (clock leak). Check the return value and ensure clk_disable_unprepare() is called on failure to maintain proper resource management. Signed-off-by: Hsieh Hung-En Link: https://patch.msgid.link/20260415030252.5547-3-hungen3108@gmail.com Signed-off-by: Mark Brown --- sound/soc/codecs/es8311.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/sound/soc/codecs/es8311.c b/sound/soc/codecs/es8311.c index 9e371e2d6eae..564af5c04dbb 100644 --- a/sound/soc/codecs/es8311.c +++ b/sound/soc/codecs/es8311.c @@ -761,6 +761,7 @@ static int es8311_set_bias_level(struct snd_soc_component *component, { struct es8311_priv *es8311 = snd_soc_component_get_drvdata(component); struct snd_soc_dapm_context *dapm = snd_soc_component_to_dapm(component); + int ret; switch (level) { case SND_SOC_BIAS_ON: @@ -769,17 +770,21 @@ static int es8311_set_bias_level(struct snd_soc_component *component, break; case SND_SOC_BIAS_STANDBY: if (snd_soc_dapm_get_bias_level(dapm) == SND_SOC_BIAS_OFF) { - int ret = clk_prepare_enable(es8311->mclk); + ret = clk_prepare_enable(es8311->mclk); if (ret) { dev_err(component->dev, "unable to prepare mclk\n"); return ret; } - snd_soc_component_update_bits( - component, ES8311_SYS3, - ES8311_SYS3_PDN_VMIDSEL_MASK, - ES8311_SYS3_PDN_VMIDSEL_STARTUP_NORMAL_SPEED); + ret = snd_soc_component_update_bits( + component, ES8311_SYS3, + ES8311_SYS3_PDN_VMIDSEL_MASK, + ES8311_SYS3_PDN_VMIDSEL_STARTUP_NORMAL_SPEED); + if (ret < 0) { + clk_disable_unprepare(es8311->mclk); + return ret; + } } break; From ca1b11b36d8231a748c77e4732e40de9998fa9d8 Mon Sep 17 00:00:00 2001 From: Charles Keepax Date: Mon, 13 Apr 2026 13:46:20 +0100 Subject: [PATCH 243/248] regmap: sdw-mbq: Allow defers on undeferrable controls It is a fairly common DisCo issue to have the deferrability of controls marked incorrectly and Windows seems very permissive in this regard. As there isn't really any down side to trying a defer even if the control isn't deferrable, allow this but add a warning message. Signed-off-by: Charles Keepax Link: https://patch.msgid.link/20260413124621.1345315-2-ckeepax@opensource.cirrus.com Signed-off-by: Mark Brown --- drivers/base/regmap/regmap-sdw-mbq.c | 36 ++++++++++++++-------------- 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/drivers/base/regmap/regmap-sdw-mbq.c b/drivers/base/regmap/regmap-sdw-mbq.c index 6a61629f5f89..4533fe793c5f 100644 --- a/drivers/base/regmap/regmap-sdw-mbq.c +++ b/drivers/base/regmap/regmap-sdw-mbq.c @@ -74,7 +74,7 @@ static int regmap_sdw_mbq_poll_busy(struct sdw_slave *slave, unsigned int reg, static int regmap_sdw_mbq_write_impl(struct sdw_slave *slave, unsigned int reg, unsigned int val, - int mbq_size, bool deferrable) + int mbq_size) { int shift = mbq_size * BITS_PER_BYTE; int ret; @@ -88,17 +88,14 @@ static int regmap_sdw_mbq_write_impl(struct sdw_slave *slave, return ret; } - ret = sdw_write_no_pm(slave, reg, val & 0xff); - if (deferrable && ret == -ENODATA) - return -EAGAIN; - - return ret; + return sdw_write_no_pm(slave, reg, val & 0xff); } static int regmap_sdw_mbq_write(void *context, unsigned int reg, unsigned int val) { struct regmap_mbq_context *ctx = context; struct sdw_slave *slave = ctx->sdw; + struct device *dev = ctx->dev; bool deferrable = regmap_sdw_mbq_deferrable(ctx, reg); int mbq_size = regmap_sdw_mbq_size(ctx, reg); int ret; @@ -113,13 +110,16 @@ static int regmap_sdw_mbq_write(void *context, unsigned int reg, unsigned int va * process a single wait/timeout on function busy and a single retry * of the transaction. */ - ret = regmap_sdw_mbq_write_impl(slave, reg, val, mbq_size, deferrable); - if (ret == -EAGAIN) { + ret = regmap_sdw_mbq_write_impl(slave, reg, val, mbq_size); + if (ret == -ENODATA) { + if (!deferrable) + dev_warn(dev, "Defer on undeferrable control: %x\n", reg); + ret = regmap_sdw_mbq_poll_busy(slave, reg, ctx); if (ret) return ret; - ret = regmap_sdw_mbq_write_impl(slave, reg, val, mbq_size, false); + ret = regmap_sdw_mbq_write_impl(slave, reg, val, mbq_size); } return ret; @@ -127,18 +127,14 @@ static int regmap_sdw_mbq_write(void *context, unsigned int reg, unsigned int va static int regmap_sdw_mbq_read_impl(struct sdw_slave *slave, unsigned int reg, unsigned int *val, - int mbq_size, bool deferrable) + int mbq_size) { int shift = BITS_PER_BYTE; int read; read = sdw_read_no_pm(slave, reg); - if (read < 0) { - if (deferrable && read == -ENODATA) - return -EAGAIN; - + if (read < 0) return read; - } *val = read; @@ -158,6 +154,7 @@ static int regmap_sdw_mbq_read(void *context, unsigned int reg, unsigned int *va { struct regmap_mbq_context *ctx = context; struct sdw_slave *slave = ctx->sdw; + struct device *dev = ctx->dev; bool deferrable = regmap_sdw_mbq_deferrable(ctx, reg); int mbq_size = regmap_sdw_mbq_size(ctx, reg); int ret; @@ -172,13 +169,16 @@ static int regmap_sdw_mbq_read(void *context, unsigned int reg, unsigned int *va * process a single wait/timeout on function busy and a single retry * of the transaction. */ - ret = regmap_sdw_mbq_read_impl(slave, reg, val, mbq_size, deferrable); - if (ret == -EAGAIN) { + ret = regmap_sdw_mbq_read_impl(slave, reg, val, mbq_size); + if (ret == -ENODATA) { + if (!deferrable) + dev_warn(dev, "Defer on undeferable control: %x\n", reg); + ret = regmap_sdw_mbq_poll_busy(slave, reg, ctx); if (ret) return ret; - ret = regmap_sdw_mbq_read_impl(slave, reg, val, mbq_size, false); + ret = regmap_sdw_mbq_read_impl(slave, reg, val, mbq_size); } return ret; From 956c032be7ca3f440d4786ea37e941bf862bb170 Mon Sep 17 00:00:00 2001 From: Charles Keepax Date: Mon, 13 Apr 2026 13:46:21 +0100 Subject: [PATCH 244/248] ASoC: SDCA: Fix reading of mipi-sdca-control-deferrable The discussion in [1] highlighted that the SDCA code shouldn't be using fwnode_property_read_bool() for DisCo controls, as the spec allows setting the value to zero meaning the property should not be used. Correct a small bug in the SDCA code that will mark such controls as deferrable. Link: https://lore.kernel.org/linux-sound/20260311142153.2201761-1-rf@opensource.cirrus.com/ [1] Fixes: 42b144cb6a2d ("ASoC: SDCA: Add SDCA Control parsing") Signed-off-by: Charles Keepax Link: https://patch.msgid.link/20260413124621.1345315-3-ckeepax@opensource.cirrus.com Signed-off-by: Mark Brown --- sound/soc/sdca/sdca_functions.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/sound/soc/sdca/sdca_functions.c b/sound/soc/sdca/sdca_functions.c index dca60ee8e62c..c5fe1a471c36 100644 --- a/sound/soc/sdca/sdca_functions.c +++ b/sound/soc/sdca/sdca_functions.c @@ -1006,8 +1006,11 @@ static int find_sdca_entity_control(struct device *dev, struct sdca_entity *enti control->has_fixed = true; fallthrough; case SDCA_ACCESS_MODE_RO: - control->deferrable = fwnode_property_read_bool(control_node, - "mipi-sdca-control-deferrable"); + ret = fwnode_property_read_u32(control_node, + "mipi-sdca-control-deferrable", + &tmp); + if (ret == 0) + control->deferrable = !!tmp; break; default: break; From 0a5ee0e520eff98ee2b4568194562870877b050f Mon Sep 17 00:00:00 2001 From: Tobias Heider Date: Wed, 22 Apr 2026 15:30:59 +0200 Subject: [PATCH 245/248] ASoC: qcom: x1e80100: limit speaker volumes Limit the digital gain and PA volumes to a combined -3 dB in the machine driver to reduce the risk of speaker damage until we have active speaker protection in place (or higher safe levels have been established). Based on commit c481016bb4f8 ("ASoC: qcom: sc8280xp: limit speaker volumes") which addressed the same issue on the sc8280x SoC with some minor changes as explained below. The Digital Volume behaves almost identical to sc8280x since both use the same lpass-wsa-macro, but x1e80100 has two sets of controls prefixed with WSA and WSA2. For PA x1e80100 machines use wsa884x amplifiers which expose a linear scale from -9 dB to 9 dB with a 1.5 dB step size giving us 0 dB = -9 dB + 6 * 1.5 dB. On x1e80100 there are two different speaker topologies we need to handle: 2-Speakers: SpkrLeft, Spkr Right 4-Speakers: WooferLeft, WooferRight, TweeterLeft, TweeterRight Signed-off-by: Tobias Heider Tested-by: Srinivas Kandagatla Reviewed-by: Srinivas Kandagatla Link: https://patch.msgid.link/20260422-x1e80100-audio-limit-v2-1-333258b97697@canonical.com Signed-off-by: Mark Brown --- sound/soc/qcom/x1e80100.c | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/sound/soc/qcom/x1e80100.c b/sound/soc/qcom/x1e80100.c index a3f4785c4bbe..c81df41ace88 100644 --- a/sound/soc/qcom/x1e80100.c +++ b/sound/soc/qcom/x1e80100.c @@ -27,10 +27,29 @@ static int x1e80100_snd_init(struct snd_soc_pcm_runtime *rtd) { struct x1e80100_snd_data *data = snd_soc_card_get_drvdata(rtd->card); struct snd_soc_dai *cpu_dai = snd_soc_rtd_to_cpu(rtd, 0); + struct snd_soc_card *card = rtd->card; struct snd_soc_jack *dp_jack = NULL; int dp_pcm_id = 0; switch (cpu_dai->id) { + case WSA_CODEC_DMA_RX_0: + case WSA_CODEC_DMA_RX_1: + /* + * Set limit of -3 dB on Digital Volume and 0 dB on PA Volume + * to reduce the risk of speaker damage until we have active + * speaker protection in place. + */ + snd_soc_limit_volume(card, "WSA WSA_RX0 Digital Volume", 81); + snd_soc_limit_volume(card, "WSA WSA_RX1 Digital Volume", 81); + snd_soc_limit_volume(card, "WSA2 WSA_RX0 Digital Volume", 81); + snd_soc_limit_volume(card, "WSA2 WSA_RX1 Digital Volume", 81); + snd_soc_limit_volume(card, "SpkrLeft PA Volume", 6); + snd_soc_limit_volume(card, "SpkrRight PA Volume", 6); + snd_soc_limit_volume(card, "WooferLeft PA Volume", 6); + snd_soc_limit_volume(card, "TweeterLeft PA Volume", 6); + snd_soc_limit_volume(card, "WooferRight PA Volume", 6); + snd_soc_limit_volume(card, "TweeterRight PA Volume", 6); + break; case DISPLAY_PORT_RX_0: dp_pcm_id = 0; dp_jack = &data->dp_jack[dp_pcm_id]; From 87a3f5c8ac2096e9406ce2ed3bf5b9bc1589a92d Mon Sep 17 00:00:00 2001 From: Maciej Strozek Date: Mon, 20 Apr 2026 12:48:17 +0100 Subject: [PATCH 246/248] ASoC: sdw_utils: cs42l43: allow spk component names to be combined Move handling of cs42l43-spk component string into SOF mechanism [1] which will allow it to be aggregated with other speakers. Likewise handle the cs35l56-bridge special case which should not be combined to keep compatibility with UCM. Link: https://github.com/thesofproject/linux/pull/5445 [1] Link: https://github.com/alsa-project/alsa-ucm-conf/pull/747 Reviewed-by: Bard Liao Signed-off-by: Maciej Strozek Suggested-by: Aaron Ma Tested-by: Aaron Ma Link: https://patch.msgid.link/20260420114823.194226-1-mstrozek@opensource.cirrus.com Signed-off-by: Mark Brown --- sound/soc/sdw_utils/soc_sdw_bridge_cs35l56.c | 6 ------ sound/soc/sdw_utils/soc_sdw_cs42l43.c | 12 +----------- sound/soc/sdw_utils/soc_sdw_utils.c | 20 ++++++++++++++++---- 3 files changed, 17 insertions(+), 21 deletions(-) diff --git a/sound/soc/sdw_utils/soc_sdw_bridge_cs35l56.c b/sound/soc/sdw_utils/soc_sdw_bridge_cs35l56.c index 2a7109d53cbe..e0e32a279787 100644 --- a/sound/soc/sdw_utils/soc_sdw_bridge_cs35l56.c +++ b/sound/soc/sdw_utils/soc_sdw_bridge_cs35l56.c @@ -40,12 +40,6 @@ static int asoc_sdw_bridge_cs35l56_asp_init(struct snd_soc_pcm_runtime *rtd) struct snd_soc_dai *codec_dai; struct snd_soc_dai *cpu_dai; - card->components = devm_kasprintf(card->dev, GFP_KERNEL, - "%s spk:cs35l56-bridge", - card->components); - if (!card->components) - return -ENOMEM; - ret = snd_soc_dapm_new_controls(dapm, bridge_widgets, ARRAY_SIZE(bridge_widgets)); if (ret) { diff --git a/sound/soc/sdw_utils/soc_sdw_cs42l43.c b/sound/soc/sdw_utils/soc_sdw_cs42l43.c index 4a451b9d4f13..e99ea3c4e5dd 100644 --- a/sound/soc/sdw_utils/soc_sdw_cs42l43.c +++ b/sound/soc/sdw_utils/soc_sdw_cs42l43.c @@ -107,21 +107,11 @@ EXPORT_SYMBOL_NS(asoc_sdw_cs42l43_hs_rtd_init, "SND_SOC_SDW_UTILS"); int asoc_sdw_cs42l43_spk_rtd_init(struct snd_soc_pcm_runtime *rtd, struct snd_soc_dai *dai) { - struct snd_soc_component *component = snd_soc_rtd_to_codec(rtd, 0)->component; + struct snd_soc_component *component = dai->component; struct snd_soc_card *card = rtd->card; struct snd_soc_dapm_context *dapm = snd_soc_card_to_dapm(card); - struct asoc_sdw_mc_private *ctx = snd_soc_card_get_drvdata(card); int ret; - if (!(ctx->mc_quirk & SOC_SDW_SIDECAR_AMPS)) { - /* Will be set by the bridge code in this case */ - card->components = devm_kasprintf(card->dev, GFP_KERNEL, - "%s spk:cs42l43-spk", - card->components); - if (!card->components) - return -ENOMEM; - } - ret = snd_soc_limit_volume(card, "cs42l43 Speaker Digital Volume", CS42L43_SPK_VOLUME_0DB); if (ret) diff --git a/sound/soc/sdw_utils/soc_sdw_utils.c b/sound/soc/sdw_utils/soc_sdw_utils.c index 2807f536eef0..1637cc3f3d59 100644 --- a/sound/soc/sdw_utils/soc_sdw_utils.c +++ b/sound/soc/sdw_utils/soc_sdw_utils.c @@ -758,6 +758,7 @@ struct asoc_sdw_codec_info codec_info_list[] = { { .direction = {true, false}, .codec_name = "cs42l43-codec", + .component_name = "cs42l43-spk", .dai_name = "cs42l43-dp6", .dai_type = SOC_SDW_DAI_TYPE_AMP, .dailink = {SOC_SDW_AMP_OUT_DAI_ID, SOC_SDW_UNUSED_DAI_ID}, @@ -1104,6 +1105,7 @@ static int asoc_sdw_find_codec_info_dai_index(const struct asoc_sdw_codec_info * int asoc_sdw_rtd_init(struct snd_soc_pcm_runtime *rtd) { struct snd_soc_card *card = rtd->card; + struct asoc_sdw_mc_private *ctx = snd_soc_card_get_drvdata(card); struct snd_soc_dapm_context *dapm = snd_soc_card_to_dapm(card); struct asoc_sdw_codec_info *codec_info; struct snd_soc_dai *dai; @@ -1179,16 +1181,26 @@ int asoc_sdw_rtd_init(struct snd_soc_pcm_runtime *rtd) /* Generate the spk component string for card->components string */ if (codec_info->dais[dai_index].dai_type == SOC_SDW_DAI_TYPE_AMP && codec_info->dais[dai_index].component_name) { + const char *component; + + /* + * For the special case of cs42l43 with sidecar amps, use only + * "cs35l56-bridge" as the component name in card->components + */ + if (ctx->mc_quirk & SOC_SDW_SIDECAR_AMPS && + !strcmp(codec_info->dais[dai_index].component_name, "cs42l43-spk")) + component = "cs35l56-bridge"; + else + component = codec_info->dais[dai_index].component_name; + if (strlen (spk_components) == 0) spk_components = - devm_kasprintf(card->dev, GFP_KERNEL, "%s", - codec_info->dais[dai_index].component_name); + devm_kasprintf(card->dev, GFP_KERNEL, "%s", component); else /* Append component name to spk_components */ spk_components = devm_kasprintf(card->dev, GFP_KERNEL, - "%s+%s", spk_components, - codec_info->dais[dai_index].component_name); + "%s+%s", spk_components, component); } codec_info->dais[dai_index].rtd_init_done = true; From 6d619f73970397e13d2d3f830b183fcd9f58e749 Mon Sep 17 00:00:00 2001 From: Baojun Xu Date: Tue, 14 Apr 2026 09:54:40 +0800 Subject: [PATCH 247/248] ASoC: dt-bindings: ti,tas2781: Add TAS5832 support TAS5832 is in same family with TAS5827/28/30. Signed-off-by: Baojun Xu Reviewed-by: Krzysztof Kozlowski Link: https://patch.msgid.link/20260414015441.2439-1-baojun.xu@ti.com Signed-off-by: Mark Brown --- Documentation/devicetree/bindings/sound/ti,tas2781.yaml | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/Documentation/devicetree/bindings/sound/ti,tas2781.yaml b/Documentation/devicetree/bindings/sound/ti,tas2781.yaml index f3a5638f4239..b21466bb0730 100644 --- a/Documentation/devicetree/bindings/sound/ti,tas2781.yaml +++ b/Documentation/devicetree/bindings/sound/ti,tas2781.yaml @@ -1,5 +1,5 @@ # SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) -# Copyright (C) 2022 - 2025 Texas Instruments Incorporated +# Copyright (C) 2022 - 2026 Texas Instruments Incorporated %YAML 1.2 --- $id: http://devicetree.org/schemas/sound/ti,tas2781.yaml# @@ -107,6 +107,9 @@ properties: ti,tas5830: 65-W Stereo, Digital Input, High Efficiency Closed-Loop Class-D Amplifier with Class-H Algorithm + + ti,tas5832: 81-W Stereo, Digital Input, High Efficiency Closed-Loop + Class-D Amplifier with Class-H Algorithm oneOf: - items: - enum: @@ -128,6 +131,7 @@ properties: - ti,tas5827 - ti,tas5828 - ti,tas5830 + - ti,tas5832 - const: ti,tas2781 - enum: - ti,tas2781 @@ -264,6 +268,7 @@ allOf: - ti,tas5827 - ti,tas5828 - ti,tas5830 + - ti,tas5832 then: properties: reg: From 1f95fdef685ee76393981de062e6b26210d88a9c Mon Sep 17 00:00:00 2001 From: Baojun Xu Date: Tue, 14 Apr 2026 09:54:41 +0800 Subject: [PATCH 248/248] ASoC: tas2781: Add tas5832 support TAS5832 is in same family with TAS5827/28/30. Signed-off-by: Baojun Xu Link: https://patch.msgid.link/20260414015441.2439-2-baojun.xu@ti.com Signed-off-by: Mark Brown --- include/sound/tas2781.h | 1 + sound/soc/codecs/tas2781-i2c.c | 5 +++++ 2 files changed, 6 insertions(+) diff --git a/include/sound/tas2781.h b/include/sound/tas2781.h index e847cf51878c..95296bb4a33a 100644 --- a/include/sound/tas2781.h +++ b/include/sound/tas2781.h @@ -131,6 +131,7 @@ enum audio_device { TAS5827, TAS5828, TAS5830, + TAS5832, TAS_OTHERS, }; diff --git a/sound/soc/codecs/tas2781-i2c.c b/sound/soc/codecs/tas2781-i2c.c index 8af30f4d68da..a78a8f9b9833 100644 --- a/sound/soc/codecs/tas2781-i2c.c +++ b/sound/soc/codecs/tas2781-i2c.c @@ -119,6 +119,7 @@ static const struct i2c_device_id tasdevice_id[] = { { "tas5827", TAS5827 }, { "tas5828", TAS5828 }, { "tas5830", TAS5830 }, + { "tas5832", TAS5832 }, {} }; @@ -142,6 +143,7 @@ static const struct of_device_id tasdevice_of_match[] = { { .compatible = "ti,tas5827", .data = &tasdevice_id[TAS5827] }, { .compatible = "ti,tas5828", .data = &tasdevice_id[TAS5828] }, { .compatible = "ti,tas5830", .data = &tasdevice_id[TAS5830] }, + { .compatible = "ti,tas5832", .data = &tasdevice_id[TAS5832] }, {}, }; MODULE_DEVICE_TABLE(of, tasdevice_of_match); @@ -1744,6 +1746,7 @@ static void tasdevice_fw_ready(const struct firmware *fmw, case TAS5827: case TAS5828: case TAS5830: + case TAS5832: /* If DSP FW fail, DSP kcontrol won't be created. */ tasdevice_dsp_remove(tas_priv); } @@ -1915,6 +1918,7 @@ static int tasdevice_codec_probe(struct snd_soc_component *codec) case TAS5827: case TAS5828: case TAS5830: + case TAS5832: p = (struct snd_kcontrol_new *)tas5825_snd_controls; size = ARRAY_SIZE(tas5825_snd_controls); break; @@ -2101,6 +2105,7 @@ static const struct acpi_device_id tasdevice_acpi_match[] = { { "TXNW5827", (kernel_ulong_t)&tasdevice_id[TAS5827] }, { "TXNW5828", (kernel_ulong_t)&tasdevice_id[TAS5828] }, { "TXNW5830", (kernel_ulong_t)&tasdevice_id[TAS5830] }, + { "TXNW5832", (kernel_ulong_t)&tasdevice_id[TAS5832] }, {}, };